1*c0909341SAndroid Build Coastguard Worker; Copyright © 2022-2023, VideoLAN and dav1d authors 2*c0909341SAndroid Build Coastguard Worker; Copyright © 2022-2023, Two Orioles, LLC 3*c0909341SAndroid Build Coastguard Worker; All rights reserved. 4*c0909341SAndroid Build Coastguard Worker; 5*c0909341SAndroid Build Coastguard Worker; Redistribution and use in source and binary forms, with or without 6*c0909341SAndroid Build Coastguard Worker; modification, are permitted provided that the following conditions are met: 7*c0909341SAndroid Build Coastguard Worker; 8*c0909341SAndroid Build Coastguard Worker; 1. Redistributions of source code must retain the above copyright notice, this 9*c0909341SAndroid Build Coastguard Worker; list of conditions and the following disclaimer. 10*c0909341SAndroid Build Coastguard Worker; 11*c0909341SAndroid Build Coastguard Worker; 2. Redistributions in binary form must reproduce the above copyright notice, 12*c0909341SAndroid Build Coastguard Worker; this list of conditions and the following disclaimer in the documentation 13*c0909341SAndroid Build Coastguard Worker; and/or other materials provided with the distribution. 14*c0909341SAndroid Build Coastguard Worker; 15*c0909341SAndroid Build Coastguard Worker; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 16*c0909341SAndroid Build Coastguard Worker; ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17*c0909341SAndroid Build Coastguard Worker; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18*c0909341SAndroid Build Coastguard Worker; DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 19*c0909341SAndroid Build Coastguard Worker; ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 20*c0909341SAndroid Build Coastguard Worker; (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 21*c0909341SAndroid Build Coastguard Worker; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 22*c0909341SAndroid Build Coastguard Worker; ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23*c0909341SAndroid Build Coastguard Worker; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 24*c0909341SAndroid Build Coastguard Worker; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25*c0909341SAndroid Build Coastguard Worker 26*c0909341SAndroid Build Coastguard Worker%include "config.asm" 27*c0909341SAndroid Build Coastguard Worker%include "ext/x86/x86inc.asm" 28*c0909341SAndroid Build Coastguard Worker 29*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 30*c0909341SAndroid Build Coastguard Worker 31*c0909341SAndroid Build Coastguard WorkerSECTION_RODATA 64 32*c0909341SAndroid Build Coastguard Worker 33*c0909341SAndroid Build Coastguard Workeridct8x8p: db 0, 1, 4, 5, 2, 3, 6, 7, 16, 17, 20, 21, 18, 19, 22, 23 34*c0909341SAndroid Build Coastguard Worker db 8, 9, 12, 13, 10, 11, 14, 15, 24, 25, 28, 29, 26, 27, 30, 31 35*c0909341SAndroid Build Coastguard Worker db 32, 33, 36, 37, 34, 35, 38, 39, 48, 49, 52, 53, 50, 51, 54, 55 36*c0909341SAndroid Build Coastguard Worker db 40, 41, 44, 45, 42, 43, 46, 47, 56, 57, 60, 61, 58, 59, 62, 63 37*c0909341SAndroid Build Coastguard Workeridtx8x8p: db 0, 1, 32, 33, 2, 3, 34, 35, 4, 5, 36, 37, 6, 7, 38, 39 38*c0909341SAndroid Build Coastguard Worker db 8, 9, 40, 41, 10, 11, 42, 43, 12, 13, 44, 45, 14, 15, 46, 47 39*c0909341SAndroid Build Coastguard Worker db 16, 17, 48, 49, 18, 19, 50, 51, 20, 21, 52, 53, 22, 23, 54, 55 40*c0909341SAndroid Build Coastguard Worker db 24, 25, 56, 57, 26, 27, 58, 59, 28, 29, 60, 61, 30, 31, 62, 63 41*c0909341SAndroid Build Coastguard Workeridct8x16p: db 54, 55, 2, 3, 22, 23, 34, 35, 38, 39, 18, 19, 6, 7, 50, 51 42*c0909341SAndroid Build Coastguard Worker db 62, 63, 10, 11, 30, 31, 42, 43, 46, 47, 26, 27, 14, 15, 58, 59 43*c0909341SAndroid Build Coastguard Worker db 52, 53, 4, 5, 20, 21, 36, 37, 32, 33, 0, 1, 48, 49, 16, 17 44*c0909341SAndroid Build Coastguard Worker db 60, 61, 12, 13, 28, 29, 44, 45, 40, 41, 8, 9, 56, 57, 24, 25 45*c0909341SAndroid Build Coastguard Workeriadst8x16p: db 0, 1, 54, 55, 48, 49, 6, 7, 16, 17, 38, 39, 32, 33, 22, 23 46*c0909341SAndroid Build Coastguard Worker db 8, 9, 62, 63, 56, 57, 14, 15, 24, 25, 46, 47, 40, 41, 30, 31 47*c0909341SAndroid Build Coastguard Worker db 4, 5, 50, 51, 52, 53, 2, 3, 20, 21, 34, 35, 36, 37, 18, 19 48*c0909341SAndroid Build Coastguard Worker db 12, 13, 58, 59, 60, 61, 10, 11, 28, 29, 42, 43, 44, 45, 26, 27 49*c0909341SAndroid Build Coastguard WorkerpermA: db 0, 1, 0, 8, 4, 5, 1, 9, 8, 9, 4, 12, 12, 13, 5, 13 50*c0909341SAndroid Build Coastguard Worker db 16, 17, 16, 24, 20, 21, 17, 25, 24, 25, 20, 28, 28, 29, 21, 29 51*c0909341SAndroid Build Coastguard Worker db 2, 3, 2, 10, 6, 7, 3, 11, 10, 11, 6, 14, 14, 15, 7, 15 52*c0909341SAndroid Build Coastguard Worker db 18, 19, 18, 26, 22, 23, 19, 27, 26, 27, 22, 30, 30, 31, 23, 31 53*c0909341SAndroid Build Coastguard WorkerpermB: db 4, 2, 1, 8, 0, 0, 1, 0, 12, 3, 3, 10, 8, 1, 3, 2 54*c0909341SAndroid Build Coastguard Worker db 5, 10, 5, 12, 1, 8, 5, 4, 13, 11, 7, 14, 9, 9, 7, 6 55*c0909341SAndroid Build Coastguard Worker db 6, 6, 13, 4, 2, 4, 4, 5, 14, 7, 15, 6, 10, 5, 6, 7 56*c0909341SAndroid Build Coastguard Worker db 7, 14, 9, 0, 3, 12, 0, 1, 15, 15, 11, 2, 11, 13, 2, 3 57*c0909341SAndroid Build Coastguard WorkerpermC: db 0, 9, 0, 0, 0, 1, 4, 4, 2, 11, 2, 2, 2, 3, 6, 6 58*c0909341SAndroid Build Coastguard Worker db 1, 8, 1, 8, 4, 5, 5, 12, 3, 10, 3, 10, 6, 7, 7, 14 59*c0909341SAndroid Build Coastguard Worker db 9, 1, 8, 1, 1, 0, 12, 5, 11, 3, 10, 3, 3, 2, 14, 7 60*c0909341SAndroid Build Coastguard Worker db 8, 0, 9, 9, 5, 4, 13, 13, 10, 2, 11, 11, 7, 6, 15, 15 61*c0909341SAndroid Build Coastguard Workeridct8x32p: db 0, 1, 4, 5, 16, 17, 20, 21, 32, 33, 36, 37, 48, 49, 52, 53 62*c0909341SAndroid Build Coastguard Worker db 8, 9, 12, 13, 24, 25, 28, 29, 40, 41, 44, 45, 56, 57, 60, 61 63*c0909341SAndroid Build Coastguard Worker db 2, 3, 6, 7, 18, 19, 22, 23, 34, 35, 38, 39, 50, 51, 54, 55 64*c0909341SAndroid Build Coastguard Worker db 10, 11, 14, 15, 26, 27, 30, 31, 42, 43, 46, 47, 58, 59, 62, 63 65*c0909341SAndroid Build Coastguard Workeridct32x8p: db 2, 18, 0, 16, 3, 19, 1, 17, 10, 26, 8, 24, 11, 27, 9, 25 66*c0909341SAndroid Build Coastguard Worker db 34, 50, 32, 48, 35, 51, 33, 49, 42, 58, 40, 56, 43, 59, 41, 57 67*c0909341SAndroid Build Coastguard Worker db 6, 22, 4, 20, 7, 23, 5, 21, 14, 30, 12, 28, 15, 31, 13, 29 68*c0909341SAndroid Build Coastguard Worker db 38, 54, 36, 52, 39, 55, 37, 53, 46, 62, 44, 60, 47, 63, 45, 61 69*c0909341SAndroid Build Coastguard Workeridtx32x8p: db 0, 8, 16, 24, 4, 12, 20, 28, 2, 10, 18, 26, 6, 14, 22, 30 70*c0909341SAndroid Build Coastguard Worker db 32, 40, 48, 56, 36, 44, 52, 60, 34, 42, 50, 58, 38, 46, 54, 62 71*c0909341SAndroid Build Coastguard Worker db 1, 9, 17, 25, 5, 13, 21, 29, 3, 11, 19, 27, 7, 15, 23, 31 72*c0909341SAndroid Build Coastguard Worker db 33, 41, 49, 57, 37, 45, 53, 61, 35, 43, 51, 59, 39, 47, 55, 63 73*c0909341SAndroid Build Coastguard Worker 74*c0909341SAndroid Build Coastguard Workerpw_2048_m2048: times 16 dw 2048 75*c0909341SAndroid Build Coastguard Workerpw_m2048_2048: times 16 dw -2048 76*c0909341SAndroid Build Coastguard Workerpw_2048: times 16 dw 2048 77*c0909341SAndroid Build Coastguard Worker 78*c0909341SAndroid Build Coastguard Worker; flags: 0 = ++, 1 = +-, 2 = -+, 3 = ++-, 4=-- 79*c0909341SAndroid Build Coastguard Worker%macro COEF_PAIR 2-3 0 ; a, b, flags 80*c0909341SAndroid Build Coastguard Worker%if %3 == 1 81*c0909341SAndroid Build Coastguard Workerpd_%1_m%2: dd %1, %1, -%2, -%2 82*c0909341SAndroid Build Coastguard Worker%define pd_%1 (pd_%1_m%2 + 4*0) 83*c0909341SAndroid Build Coastguard Worker%define pd_m%2 (pd_%1_m%2 + 4*2) 84*c0909341SAndroid Build Coastguard Worker%elif %3 == 2 85*c0909341SAndroid Build Coastguard Workerpd_m%1_%2: dd -%1, -%1, %2, %2 86*c0909341SAndroid Build Coastguard Worker%define pd_m%1 (pd_m%1_%2 + 4*0) 87*c0909341SAndroid Build Coastguard Worker%define pd_%2 (pd_m%1_%2 + 4*2) 88*c0909341SAndroid Build Coastguard Worker%elif %3 == 4 89*c0909341SAndroid Build Coastguard Workerpd_m%1_m%2: dd -%1, -%1, -%2, -%2 90*c0909341SAndroid Build Coastguard Worker%define pd_m%1 (pd_m%1_m%2 + 4*0) 91*c0909341SAndroid Build Coastguard Worker%define pd_m%2 (pd_m%1_m%2 + 4*2) 92*c0909341SAndroid Build Coastguard Worker%else 93*c0909341SAndroid Build Coastguard Workerpd_%1_%2: dd %1, %1, %2, %2 94*c0909341SAndroid Build Coastguard Worker%define pd_%1 (pd_%1_%2 + 4*0) 95*c0909341SAndroid Build Coastguard Worker%define pd_%2 (pd_%1_%2 + 4*2) 96*c0909341SAndroid Build Coastguard Worker%if %3 == 3 97*c0909341SAndroid Build Coastguard Worker%define pd_%2_m%2 pd_%2 98*c0909341SAndroid Build Coastguard Workerdd -%2, -%2 99*c0909341SAndroid Build Coastguard Worker%endif 100*c0909341SAndroid Build Coastguard Worker%endif 101*c0909341SAndroid Build Coastguard Worker%endmacro 102*c0909341SAndroid Build Coastguard Worker 103*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR 101, 501 104*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR 201, 601, 1 105*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR 201, 995 106*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR 401, 1189, 1 107*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR 401, 1931 108*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR 401, 3920 109*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR 401, 4076 110*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR 700, 301, 4 111*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR 799, 2276, 1 112*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR 799, 3406 113*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR 799, 4017 114*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR 1380, 601 115*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR 1751, 2440 116*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR 2598, 1189 117*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR 2598, 1931, 2 118*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR 2598, 3612 119*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR 2751, 2106 120*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR 2896, 1567, 3 121*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR 2896, 3784, 3 122*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR 3035, 3513 123*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR 3166, 1931 124*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR 3166, 3612 125*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR 3166, 3920 126*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR 3703, 3290 127*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR 3857, 4052 128*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR 4017, 2276 129*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR 4017, 3406 130*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR 4036, 4085 131*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR 4076, 1189 132*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR 4076, 3612 133*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR 4076, 3920 134*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR 4091, 3973 135*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR 4091, 4052 136*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR 4095, 4065 137*c0909341SAndroid Build Coastguard Worker 138*c0909341SAndroid Build Coastguard Workerpb_32: times 4 db 32 139*c0909341SAndroid Build Coastguard Workerpw_5: times 2 dw 5 140*c0909341SAndroid Build Coastguard Workerpw_4096: times 2 dw 4096 141*c0909341SAndroid Build Coastguard Workerpw_8192: times 2 dw 8192 142*c0909341SAndroid Build Coastguard Workerpw_1697x16: times 2 dw 1697*16 143*c0909341SAndroid Build Coastguard Workerpw_2896x8: times 2 dw 2896*8 144*c0909341SAndroid Build Coastguard Workerpixel_10bpc_max: times 2 dw 0x03ff 145*c0909341SAndroid Build Coastguard Workerdconly_10bpc: times 2 dw 0x7c00 146*c0909341SAndroid Build Coastguard Workerclip_18b_min: dd -0x20000 147*c0909341SAndroid Build Coastguard Workerclip_18b_max: dd 0x1ffff 148*c0909341SAndroid Build Coastguard Workerpd_1: dd 1 149*c0909341SAndroid Build Coastguard Workerpd_2: dd 2 150*c0909341SAndroid Build Coastguard Workerpd_1448: dd 1448 151*c0909341SAndroid Build Coastguard Workerpd_2048: dd 2048 152*c0909341SAndroid Build Coastguard Workerpd_3071: dd 3071 ; 1024 + 2048 - 1 153*c0909341SAndroid Build Coastguard Workerpd_3072: dd 3072 ; 1024 + 2048 154*c0909341SAndroid Build Coastguard Workerpd_5119: dd 5119 ; 1024 + 4096 - 1 155*c0909341SAndroid Build Coastguard Workerpd_5120: dd 5120 ; 1024 + 4096 156*c0909341SAndroid Build Coastguard Workerpd_5793: dd 5793 157*c0909341SAndroid Build Coastguard Worker 158*c0909341SAndroid Build Coastguard Workercextern dup16_perm 159*c0909341SAndroid Build Coastguard Workercextern int8_permA 160*c0909341SAndroid Build Coastguard Workercextern idct64_mul_16bpc 161*c0909341SAndroid Build Coastguard Workercextern idct_8x8_internal_8bpc_avx512icl.main 162*c0909341SAndroid Build Coastguard Workercextern iadst_8x8_internal_8bpc_avx512icl.main_pass2 163*c0909341SAndroid Build Coastguard Workercextern idct_8x16_internal_8bpc_avx512icl.main 164*c0909341SAndroid Build Coastguard Workercextern idct_8x16_internal_8bpc_avx512icl.main2 165*c0909341SAndroid Build Coastguard Workercextern idct_8x16_internal_8bpc_avx512icl.main_fast 166*c0909341SAndroid Build Coastguard Workercextern idct_8x16_internal_8bpc_avx512icl.main_fast2 167*c0909341SAndroid Build Coastguard Workercextern iadst_8x16_internal_8bpc_avx512icl.main2 168*c0909341SAndroid Build Coastguard Workercextern idct_16x8_internal_8bpc_avx512icl.main 169*c0909341SAndroid Build Coastguard Workercextern iadst_16x8_internal_8bpc_avx512icl.main_pass2 170*c0909341SAndroid Build Coastguard Workercextern idct_16x16_internal_8bpc_avx512icl.main 171*c0909341SAndroid Build Coastguard Workercextern idct_16x16_internal_8bpc_avx512icl.main2 172*c0909341SAndroid Build Coastguard Workercextern idct_16x16_internal_8bpc_avx512icl.main_fast 173*c0909341SAndroid Build Coastguard Workercextern idct_16x16_internal_8bpc_avx512icl.main_fast2 174*c0909341SAndroid Build Coastguard Workercextern iadst_16x16_internal_8bpc_avx512icl.main_pass2b 175*c0909341SAndroid Build Coastguard Workercextern inv_txfm_add_dct_dct_8x32_8bpc_avx512icl.main 176*c0909341SAndroid Build Coastguard Workercextern inv_txfm_add_dct_dct_8x32_8bpc_avx512icl.main_fast 177*c0909341SAndroid Build Coastguard Workercextern inv_txfm_add_dct_dct_8x32_8bpc_avx512icl.main_fast2 178*c0909341SAndroid Build Coastguard Workercextern inv_txfm_add_dct_dct_8x32_8bpc_avx512icl.main_end 179*c0909341SAndroid Build Coastguard Workercextern inv_txfm_add_dct_dct_16x32_8bpc_avx512icl.main_oddhalf 180*c0909341SAndroid Build Coastguard Workercextern inv_txfm_add_dct_dct_16x32_8bpc_avx512icl.main_oddhalf_fast 181*c0909341SAndroid Build Coastguard Workercextern inv_txfm_add_dct_dct_16x32_8bpc_avx512icl.main_oddhalf_fast2 182*c0909341SAndroid Build Coastguard Workercextern inv_txfm_add_dct_dct_32x8_8bpc_avx512icl.main 183*c0909341SAndroid Build Coastguard Workercextern inv_txfm_add_dct_dct_32x16_8bpc_avx512icl.main_oddhalf 184*c0909341SAndroid Build Coastguard Workercextern inv_txfm_add_dct_dct_32x16_8bpc_avx512icl.main_oddhalf_fast 185*c0909341SAndroid Build Coastguard Workercextern inv_txfm_add_dct_dct_32x16_8bpc_avx512icl.main_oddhalf_fast2 186*c0909341SAndroid Build Coastguard Workercextern inv_txfm_add_dct_dct_32x16_8bpc_avx512icl.main_oddhalf_fast3 187*c0909341SAndroid Build Coastguard Workercextern inv_txfm_add_dct_dct_32x32_8bpc_avx512icl.main_oddhalf 188*c0909341SAndroid Build Coastguard Workercextern inv_txfm_add_dct_dct_32x32_8bpc_avx512icl.main_oddhalf_fast 189*c0909341SAndroid Build Coastguard Workercextern inv_txfm_add_dct_dct_32x32_8bpc_avx512icl.main_oddhalf_fast2 190*c0909341SAndroid Build Coastguard Workercextern inv_txfm_add_dct_dct_32x32_8bpc_avx512icl.main_oddhalf_fast3 191*c0909341SAndroid Build Coastguard Workercextern inv_txfm_add_dct_dct_16x64_8bpc_avx512icl.main_oddhalf 192*c0909341SAndroid Build Coastguard Workercextern inv_txfm_add_dct_dct_16x64_8bpc_avx512icl.main_oddhalf_fast 193*c0909341SAndroid Build Coastguard Workercextern inv_txfm_add_dct_dct_32x64_8bpc_avx512icl.main_part1 194*c0909341SAndroid Build Coastguard Workercextern inv_txfm_add_dct_dct_32x64_8bpc_avx512icl.main_part1_fast 195*c0909341SAndroid Build Coastguard Workercextern inv_txfm_add_dct_dct_32x64_8bpc_avx512icl.main_part1_fast2 196*c0909341SAndroid Build Coastguard Workercextern inv_txfm_add_dct_dct_32x64_8bpc_avx512icl.main_part2 197*c0909341SAndroid Build Coastguard Worker 198*c0909341SAndroid Build Coastguard WorkerSECTION .text 199*c0909341SAndroid Build Coastguard Worker 200*c0909341SAndroid Build Coastguard Worker%define o_base (pw_2048+4*128) 201*c0909341SAndroid Build Coastguard Worker%define o_base_8bpc (int8_permA+64*18) 202*c0909341SAndroid Build Coastguard Worker%define o(x) (r5 - o_base + (x)) 203*c0909341SAndroid Build Coastguard Worker%define m(x) mangle(private_prefix %+ _ %+ x %+ SUFFIX) 204*c0909341SAndroid Build Coastguard Worker 205*c0909341SAndroid Build Coastguard WorkerINIT_ZMM avx512icl 206*c0909341SAndroid Build Coastguard Worker 207*c0909341SAndroid Build Coastguard Worker; dst1 = (src1 * coef1 - src2 * coef2 + rnd) >> 12 208*c0909341SAndroid Build Coastguard Worker; dst2 = (src1 * coef2 + src2 * coef1 + rnd) >> 12 209*c0909341SAndroid Build Coastguard Worker; flags: 1 = inv_dst1, 2 = inv_dst2 210*c0909341SAndroid Build Coastguard Worker; skip round/shift if rnd is not a number 211*c0909341SAndroid Build Coastguard Worker%macro ITX_MULSUB_2D 8-9 0 ; dst/src[1-2], tmp[1-3], rnd, coef[1-2], flags 212*c0909341SAndroid Build Coastguard Worker%if %8 < 32 213*c0909341SAndroid Build Coastguard Worker pmulld m%4, m%1, m%8 214*c0909341SAndroid Build Coastguard Worker pmulld m%3, m%2, m%8 215*c0909341SAndroid Build Coastguard Worker%else 216*c0909341SAndroid Build Coastguard Worker%if %8 < 4096 217*c0909341SAndroid Build Coastguard Worker vpbroadcastd m%3, [o(pd_%8)] 218*c0909341SAndroid Build Coastguard Worker%else 219*c0909341SAndroid Build Coastguard Worker vbroadcasti32x4 m%3, [o(pd_%8)] 220*c0909341SAndroid Build Coastguard Worker%endif 221*c0909341SAndroid Build Coastguard Worker pmulld m%4, m%1, m%3 222*c0909341SAndroid Build Coastguard Worker pmulld m%3, m%2 223*c0909341SAndroid Build Coastguard Worker%endif 224*c0909341SAndroid Build Coastguard Worker%if %7 < 32 225*c0909341SAndroid Build Coastguard Worker pmulld m%1, m%7 226*c0909341SAndroid Build Coastguard Worker pmulld m%2, m%7 227*c0909341SAndroid Build Coastguard Worker%else 228*c0909341SAndroid Build Coastguard Worker%if %7 < 4096 229*c0909341SAndroid Build Coastguard Worker vpbroadcastd m%5, [o(pd_%7)] 230*c0909341SAndroid Build Coastguard Worker%else 231*c0909341SAndroid Build Coastguard Worker vbroadcasti32x4 m%5, [o(pd_%7)] 232*c0909341SAndroid Build Coastguard Worker%endif 233*c0909341SAndroid Build Coastguard Worker pmulld m%1, m%5 234*c0909341SAndroid Build Coastguard Worker pmulld m%2, m%5 235*c0909341SAndroid Build Coastguard Worker%endif 236*c0909341SAndroid Build Coastguard Worker%if %9 & 2 237*c0909341SAndroid Build Coastguard Worker psubd m%4, m%6, m%4 238*c0909341SAndroid Build Coastguard Worker psubd m%2, m%4, m%2 239*c0909341SAndroid Build Coastguard Worker%else 240*c0909341SAndroid Build Coastguard Worker%ifnum %6 241*c0909341SAndroid Build Coastguard Worker paddd m%4, m%6 242*c0909341SAndroid Build Coastguard Worker%endif 243*c0909341SAndroid Build Coastguard Worker paddd m%2, m%4 244*c0909341SAndroid Build Coastguard Worker%endif 245*c0909341SAndroid Build Coastguard Worker%ifnum %6 246*c0909341SAndroid Build Coastguard Worker paddd m%1, m%6 247*c0909341SAndroid Build Coastguard Worker%endif 248*c0909341SAndroid Build Coastguard Worker%if %9 & 1 249*c0909341SAndroid Build Coastguard Worker psubd m%1, m%3, m%1 250*c0909341SAndroid Build Coastguard Worker%else 251*c0909341SAndroid Build Coastguard Worker psubd m%1, m%3 252*c0909341SAndroid Build Coastguard Worker%endif 253*c0909341SAndroid Build Coastguard Worker%ifnum %6 254*c0909341SAndroid Build Coastguard Worker psrad m%2, 12 255*c0909341SAndroid Build Coastguard Worker psrad m%1, 12 256*c0909341SAndroid Build Coastguard Worker%endif 257*c0909341SAndroid Build Coastguard Worker%endmacro 258*c0909341SAndroid Build Coastguard Worker 259*c0909341SAndroid Build Coastguard Worker%macro INV_TXFM_FN 4 ; type1, type2, eob_offset, size 260*c0909341SAndroid Build Coastguard Workercglobal inv_txfm_add_%1_%2_%4_10bpc, 4, 7, 0, dst, stride, c, eob, tx2 261*c0909341SAndroid Build Coastguard Worker %define %%p1 m(i%1_%4_internal_10bpc) 262*c0909341SAndroid Build Coastguard Worker lea r5, [o_base] 263*c0909341SAndroid Build Coastguard Worker ; Jump to the 1st txfm function if we're not taking the fast path, which 264*c0909341SAndroid Build Coastguard Worker ; in turn performs an indirect jump to the 2nd txfm function. 265*c0909341SAndroid Build Coastguard Worker lea tx2q, [m(i%2_%4_internal_10bpc).pass2] 266*c0909341SAndroid Build Coastguard Worker%ifidn %1_%2, dct_dct 267*c0909341SAndroid Build Coastguard Worker test eobd, eobd 268*c0909341SAndroid Build Coastguard Worker jnz %%p1 269*c0909341SAndroid Build Coastguard Worker%else 270*c0909341SAndroid Build Coastguard Worker%if %3 271*c0909341SAndroid Build Coastguard Worker add eobd, %3 272*c0909341SAndroid Build Coastguard Worker%endif 273*c0909341SAndroid Build Coastguard Worker ; jump to the 1st txfm function unless it's located directly after this 274*c0909341SAndroid Build Coastguard Worker times ((%%end - %%p1) >> 31) & 1 jmp %%p1 275*c0909341SAndroid Build Coastguard WorkerALIGN function_align 276*c0909341SAndroid Build Coastguard Worker%%end: 277*c0909341SAndroid Build Coastguard Worker%endif 278*c0909341SAndroid Build Coastguard Worker%endmacro 279*c0909341SAndroid Build Coastguard Worker 280*c0909341SAndroid Build Coastguard Worker%macro INV_TXFM_8X8_FN 2-3 0 ; type1, type2, eob_offset 281*c0909341SAndroid Build Coastguard Worker INV_TXFM_FN %1, %2, %3, 8x8 282*c0909341SAndroid Build Coastguard Worker%ifidn %1_%2, dct_dct 283*c0909341SAndroid Build Coastguard Worker imul r6d, [cq], 181 284*c0909341SAndroid Build Coastguard Worker mov [cq], eobd ; 0 285*c0909341SAndroid Build Coastguard Worker or r3d, 8 286*c0909341SAndroid Build Coastguard Worker.dconly: 287*c0909341SAndroid Build Coastguard Worker add r6d, 384 288*c0909341SAndroid Build Coastguard Worker sar r6d, 9 289*c0909341SAndroid Build Coastguard Worker.dconly2: 290*c0909341SAndroid Build Coastguard Worker vpbroadcastd ym2, [o(dconly_10bpc)] 291*c0909341SAndroid Build Coastguard Worker imul r6d, 181 292*c0909341SAndroid Build Coastguard Worker add r6d, 2176 293*c0909341SAndroid Build Coastguard Worker sar r6d, 12 294*c0909341SAndroid Build Coastguard Worker vpbroadcastw ym1, r6d 295*c0909341SAndroid Build Coastguard Worker paddsw ym1, ym2 296*c0909341SAndroid Build Coastguard Worker.dconly_loop: 297*c0909341SAndroid Build Coastguard Worker mova xm0, [dstq+strideq*0] 298*c0909341SAndroid Build Coastguard Worker vinserti32x4 ym0, [dstq+strideq*1], 1 299*c0909341SAndroid Build Coastguard Worker paddsw ym0, ym1 300*c0909341SAndroid Build Coastguard Worker psubusw ym0, ym2 301*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*0], xm0 302*c0909341SAndroid Build Coastguard Worker vextracti32x4 [dstq+strideq*1], ym0, 1 303*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+strideq*2] 304*c0909341SAndroid Build Coastguard Worker sub r3d, 2 305*c0909341SAndroid Build Coastguard Worker jg .dconly_loop 306*c0909341SAndroid Build Coastguard Worker RET 307*c0909341SAndroid Build Coastguard Worker%endif 308*c0909341SAndroid Build Coastguard Worker%endmacro 309*c0909341SAndroid Build Coastguard Worker 310*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN dct, dct 311*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN dct, adst 312*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN dct, flipadst 313*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN dct, identity 314*c0909341SAndroid Build Coastguard Worker 315*c0909341SAndroid Build Coastguard Workercglobal idct_8x8_internal_10bpc, 0, 7, 16, dst, stride, c, eob, tx2 316*c0909341SAndroid Build Coastguard Worker call .load 317*c0909341SAndroid Build Coastguard Worker vpermi2q m1, m0, m2 ; 1 5 318*c0909341SAndroid Build Coastguard Worker vpermi2q m3, m6, m4 ; 7 3 319*c0909341SAndroid Build Coastguard Worker vpermt2q m0, m5, m4 ; 0 2 320*c0909341SAndroid Build Coastguard Worker vpermt2q m2, m5, m6 ; 4 6 321*c0909341SAndroid Build Coastguard Worker call .main 322*c0909341SAndroid Build Coastguard Worker call .main_end 323*c0909341SAndroid Build Coastguard Worker mova m4, [o(idct8x8p)] 324*c0909341SAndroid Build Coastguard Worker packssdw m0, m2 ; 0 1 4 5 325*c0909341SAndroid Build Coastguard Worker packssdw m1, m3 ; 3 2 7 6 326*c0909341SAndroid Build Coastguard Worker vpermb m0, m4, m0 327*c0909341SAndroid Build Coastguard Worker vprolq m1, 32 328*c0909341SAndroid Build Coastguard Worker vpermb m2, m4, m1 329*c0909341SAndroid Build Coastguard Worker punpckhdq m1, m0, m2 330*c0909341SAndroid Build Coastguard Worker punpckldq m0, m2 331*c0909341SAndroid Build Coastguard Worker jmp tx2q 332*c0909341SAndroid Build Coastguard Worker.pass2: 333*c0909341SAndroid Build Coastguard Worker lea r5, [o_base_8bpc] 334*c0909341SAndroid Build Coastguard Worker vextracti32x8 ym2, m0, 1 335*c0909341SAndroid Build Coastguard Worker vextracti32x8 ym3, m1, 1 336*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_8bpc).main 337*c0909341SAndroid Build Coastguard Worker mova m10, [permC] 338*c0909341SAndroid Build Coastguard Worker vpbroadcastd m12, [pw_2048] 339*c0909341SAndroid Build Coastguard Worker.end: 340*c0909341SAndroid Build Coastguard Worker vpermt2q m0, m10, m1 341*c0909341SAndroid Build Coastguard Worker vpermt2q m2, m10, m3 342*c0909341SAndroid Build Coastguard Worker.end2: 343*c0909341SAndroid Build Coastguard Worker vpbroadcastd m11, [pixel_10bpc_max] 344*c0909341SAndroid Build Coastguard Worker lea r6, [strideq*3] 345*c0909341SAndroid Build Coastguard Worker pxor m10, m10 346*c0909341SAndroid Build Coastguard Worker pmulhrsw m8, m12, m0 347*c0909341SAndroid Build Coastguard Worker call .write_8x4_start 348*c0909341SAndroid Build Coastguard Worker pmulhrsw m8, m12, m2 349*c0909341SAndroid Build Coastguard Worker.write_8x4: 350*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+strideq*4] 351*c0909341SAndroid Build Coastguard Worker add cq, 64*2 352*c0909341SAndroid Build Coastguard Worker.write_8x4_start: 353*c0909341SAndroid Build Coastguard Worker mova xm9, [dstq+strideq*0] 354*c0909341SAndroid Build Coastguard Worker vinserti32x4 ym9, [dstq+strideq*1], 1 355*c0909341SAndroid Build Coastguard Worker vinserti32x4 m9, [dstq+strideq*2], 2 356*c0909341SAndroid Build Coastguard Worker vinserti32x4 m9, [dstq+r6 ], 3 357*c0909341SAndroid Build Coastguard Worker mova [cq+64*0], m10 358*c0909341SAndroid Build Coastguard Worker mova [cq+64*1], m10 359*c0909341SAndroid Build Coastguard Worker paddw m9, m8 360*c0909341SAndroid Build Coastguard Worker pmaxsw m9, m10 361*c0909341SAndroid Build Coastguard Worker pminsw m9, m11 362*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*0], xm9 363*c0909341SAndroid Build Coastguard Worker vextracti32x4 [dstq+strideq*1], ym9, 1 364*c0909341SAndroid Build Coastguard Worker vextracti32x4 [dstq+strideq*2], m9, 2 365*c0909341SAndroid Build Coastguard Worker vextracti32x4 [dstq+r6 ], m9, 3 366*c0909341SAndroid Build Coastguard Worker ret 367*c0909341SAndroid Build Coastguard WorkerALIGN function_align 368*c0909341SAndroid Build Coastguard Worker.load: 369*c0909341SAndroid Build Coastguard Worker mova m0, [cq+64*0] ; 0 1 370*c0909341SAndroid Build Coastguard Worker mova m4, [cq+64*1] ; 2 3 371*c0909341SAndroid Build Coastguard Worker mova m1, [o(permB)] 372*c0909341SAndroid Build Coastguard Worker mova m2, [cq+64*2] ; 4 5 373*c0909341SAndroid Build Coastguard Worker mova m6, [cq+64*3] ; 6 7 374*c0909341SAndroid Build Coastguard Worker vpbroadcastd m13, [o(pd_2048)] 375*c0909341SAndroid Build Coastguard Worker vpbroadcastd m14, [o(clip_18b_min)] 376*c0909341SAndroid Build Coastguard Worker vpbroadcastd m15, [o(clip_18b_max)] 377*c0909341SAndroid Build Coastguard Worker psrlq m5, m1, 32 378*c0909341SAndroid Build Coastguard Worker vpbroadcastd m12, [o(pd_2896)] 379*c0909341SAndroid Build Coastguard Worker mova m3, m1 380*c0909341SAndroid Build Coastguard Worker vpbroadcastd m11, [o(pd_1)] 381*c0909341SAndroid Build Coastguard Worker ret 382*c0909341SAndroid Build Coastguard WorkerALIGN function_align 383*c0909341SAndroid Build Coastguard Worker.main_fast: ; bottom half is zero 384*c0909341SAndroid Build Coastguard Worker vbroadcasti32x4 m3, [o(pd_4017_3406)] 385*c0909341SAndroid Build Coastguard Worker vbroadcasti32x4 m8, [o(pd_799_m2276)] 386*c0909341SAndroid Build Coastguard Worker vbroadcasti32x4 m2, [o(pd_2896_3784)] 387*c0909341SAndroid Build Coastguard Worker vbroadcasti32x4 m9, [o(pd_2896_1567)] 388*c0909341SAndroid Build Coastguard Worker pmulld m3, m1 ; t4a t5a 389*c0909341SAndroid Build Coastguard Worker pmulld m1, m8 ; t7a t6a 390*c0909341SAndroid Build Coastguard Worker pmulld m2, m0 ; t0 t3 391*c0909341SAndroid Build Coastguard Worker pmulld m0, m9 ; t1 t2 392*c0909341SAndroid Build Coastguard Worker jmp .main2 393*c0909341SAndroid Build Coastguard Worker.main: 394*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 1, 3, 8, 9, 10, _, 799_3406, 4017_2276 395*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 0, 2, 8, 9, 10, _, 2896_1567, 2896_3784 396*c0909341SAndroid Build Coastguard Worker.main2: 397*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m13}, m1, m3, m0, m2 398*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 12 }, m1, m3, m0, m2 399*c0909341SAndroid Build Coastguard Worker punpcklqdq m8, m1, m3 ; t4a t7a 400*c0909341SAndroid Build Coastguard Worker punpckhqdq m1, m3 ; t5a t6a 401*c0909341SAndroid Build Coastguard Worker psubd m3, m8, m1 ; t5a t6a 402*c0909341SAndroid Build Coastguard Worker paddd m8, m1 ; t4 t7 403*c0909341SAndroid Build Coastguard Worker pmaxsd m3, m14 404*c0909341SAndroid Build Coastguard Worker punpckhqdq m1, m2, m0 ; t3 t2 405*c0909341SAndroid Build Coastguard Worker pminsd m3, m15 406*c0909341SAndroid Build Coastguard Worker punpcklqdq m2, m0 ; t0 t1 407*c0909341SAndroid Build Coastguard Worker pmulld m3, m12 408*c0909341SAndroid Build Coastguard Worker paddd m0, m2, m1 ; dct4 out0 out1 409*c0909341SAndroid Build Coastguard Worker psubd m2, m1 ; dct4 out3 out2 410*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m14}, m8, m0, m2 411*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m15}, m8, m0, m2 412*c0909341SAndroid Build Coastguard Worker.main3: 413*c0909341SAndroid Build Coastguard Worker pshufd m1, m3, q1032 414*c0909341SAndroid Build Coastguard Worker paddd m3, m13 415*c0909341SAndroid Build Coastguard Worker psubd m9, m3, m1 416*c0909341SAndroid Build Coastguard Worker paddd m3, m1 417*c0909341SAndroid Build Coastguard Worker psrad m9, 12 418*c0909341SAndroid Build Coastguard Worker psrad m3, 12 419*c0909341SAndroid Build Coastguard Worker punpckhqdq m1, m8, m3 ; t7 t6 420*c0909341SAndroid Build Coastguard Worker shufpd m8, m9, 0xaa ; t4 t5 421*c0909341SAndroid Build Coastguard Worker ret 422*c0909341SAndroid Build Coastguard Worker.main_end: 423*c0909341SAndroid Build Coastguard Worker paddd m0, m11 424*c0909341SAndroid Build Coastguard Worker paddd m2, m11 425*c0909341SAndroid Build Coastguard Worker psubd m3, m0, m1 ; out7 out6 426*c0909341SAndroid Build Coastguard Worker paddd m0, m1 ; out0 out1 427*c0909341SAndroid Build Coastguard Worker paddd m1, m2, m8 ; out3 out2 428*c0909341SAndroid Build Coastguard Worker psubd m2, m8 ; out4 out5 429*c0909341SAndroid Build Coastguard Worker REPX {vpsravd x, m11}, m0, m2, m3, m1 430*c0909341SAndroid Build Coastguard Worker ret 431*c0909341SAndroid Build Coastguard Worker 432*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN adst, dct 433*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN adst, flipadst 434*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN adst, identity 435*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN adst, adst 436*c0909341SAndroid Build Coastguard Worker 437*c0909341SAndroid Build Coastguard Workercglobal iadst_8x8_internal_10bpc, 0, 7, 16, dst, stride, c, eob, tx2 438*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_10bpc).load 439*c0909341SAndroid Build Coastguard Worker vpermi2q m1, m6, m2 ; 7 5 440*c0909341SAndroid Build Coastguard Worker vpermi2q m3, m4, m0 ; 3 1 441*c0909341SAndroid Build Coastguard Worker vpermt2q m0, m5, m4 ; 0 2 442*c0909341SAndroid Build Coastguard Worker vpermt2q m2, m5, m6 ; 4 6 443*c0909341SAndroid Build Coastguard Worker call .main 444*c0909341SAndroid Build Coastguard Worker punpckldq m1, m2, m4 ; out4 out6 445*c0909341SAndroid Build Coastguard Worker punpckhdq m2, m0 ; -out5 -out7 446*c0909341SAndroid Build Coastguard Worker punpckldq m0, m3 ; out0 out2 447*c0909341SAndroid Build Coastguard Worker punpckhdq m4, m3 ; -out1 -out3 448*c0909341SAndroid Build Coastguard Worker paddd m1, m11 449*c0909341SAndroid Build Coastguard Worker psubd m3, m11, m2 450*c0909341SAndroid Build Coastguard Worker paddd m0, m11 451*c0909341SAndroid Build Coastguard Worker psubd m4, m11, m4 452*c0909341SAndroid Build Coastguard Worker.pass1_end: 453*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 1}, m1, m0, m3, m4 454*c0909341SAndroid Build Coastguard Worker packssdw m0, m1 ; 0 2 4 6 455*c0909341SAndroid Build Coastguard Worker packssdw m4, m3 ; 1 3 5 7 456*c0909341SAndroid Build Coastguard Worker psrlq m1, [o(permB)], 8 457*c0909341SAndroid Build Coastguard Worker punpckhwd m3, m0, m4 458*c0909341SAndroid Build Coastguard Worker punpcklwd m0, m4 459*c0909341SAndroid Build Coastguard Worker psrlq m2, m1, 32 460*c0909341SAndroid Build Coastguard Worker vpermi2q m1, m0, m3 461*c0909341SAndroid Build Coastguard Worker vpermt2q m0, m2, m3 462*c0909341SAndroid Build Coastguard Worker jmp tx2q 463*c0909341SAndroid Build Coastguard Worker.pass2: 464*c0909341SAndroid Build Coastguard Worker call .main_pass2 465*c0909341SAndroid Build Coastguard Worker movu m10, [permC+2] 466*c0909341SAndroid Build Coastguard Worker vbroadcasti32x8 m12, [pw_2048_m2048+16] 467*c0909341SAndroid Build Coastguard Worker jmp m(idct_8x8_internal_10bpc).end 468*c0909341SAndroid Build Coastguard Worker.main_pass2: 469*c0909341SAndroid Build Coastguard Worker vextracti32x8 ym2, m0, 1 470*c0909341SAndroid Build Coastguard Worker vextracti32x8 ym3, m1, 1 471*c0909341SAndroid Build Coastguard Worker lea r5, [o_base_8bpc] 472*c0909341SAndroid Build Coastguard Worker pshufd ym4, ym0, q1032 473*c0909341SAndroid Build Coastguard Worker pshufd ym5, ym1, q1032 474*c0909341SAndroid Build Coastguard Worker jmp m(iadst_8x8_internal_8bpc).main_pass2 475*c0909341SAndroid Build Coastguard WorkerALIGN function_align 476*c0909341SAndroid Build Coastguard Worker.main: 477*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 1, 0, 4, 5, 6, 13, 401_1931, 4076_3612 478*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 3, 2, 4, 5, 6, 13, 3166_3920, 2598_1189 479*c0909341SAndroid Build Coastguard Worker psubd m4, m0, m2 ; t4 t6 480*c0909341SAndroid Build Coastguard Worker paddd m0, m2 ; t0 t2 481*c0909341SAndroid Build Coastguard Worker psubd m2, m1, m3 ; t5 t7 482*c0909341SAndroid Build Coastguard Worker paddd m1, m3 ; t1 t3 483*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m14}, m4, m2, m0, m1 484*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m15}, m4, m2, m0, m1 485*c0909341SAndroid Build Coastguard Worker pxor m5, m5 486*c0909341SAndroid Build Coastguard Worker psubd m5, m4 487*c0909341SAndroid Build Coastguard Worker shufpd m4, m2, 0xaa ; t4 t7 488*c0909341SAndroid Build Coastguard Worker shufpd m2, m5, 0xaa ; t5 -t6 489*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 4, 2, 3, 5, 6, 13, 1567, 3784 490*c0909341SAndroid Build Coastguard Worker punpckhqdq m3, m0, m1 491*c0909341SAndroid Build Coastguard Worker punpcklqdq m0, m1 492*c0909341SAndroid Build Coastguard Worker psubd m1, m0, m3 ; t2 t3 493*c0909341SAndroid Build Coastguard Worker paddd m0, m3 ; out0 -out7 494*c0909341SAndroid Build Coastguard Worker punpckhqdq m3, m4, m2 ; t7a t6a 495*c0909341SAndroid Build Coastguard Worker punpcklqdq m4, m2 ; t5a t4a 496*c0909341SAndroid Build Coastguard Worker psubd m2, m4, m3 ; t7 t6 497*c0909341SAndroid Build Coastguard Worker paddd m4, m3 ; out6 -out1 498*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m14}, m1, m2 499*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m15}, m1, m2 500*c0909341SAndroid Build Coastguard Worker shufpd m3, m1, m2, 0xaa 501*c0909341SAndroid Build Coastguard Worker shufpd m1, m2, 0x55 502*c0909341SAndroid Build Coastguard Worker pmulld m3, m12 503*c0909341SAndroid Build Coastguard Worker pmulld m1, m12 504*c0909341SAndroid Build Coastguard Worker paddd m3, m13 505*c0909341SAndroid Build Coastguard Worker psubd m2, m3, m1 506*c0909341SAndroid Build Coastguard Worker paddd m3, m1 507*c0909341SAndroid Build Coastguard Worker psrad m2, 12 ; out4 -out5 508*c0909341SAndroid Build Coastguard Worker pshufd m3, m3, q1032 509*c0909341SAndroid Build Coastguard Worker psrad m3, 12 ; out2 -out3 510*c0909341SAndroid Build Coastguard Worker ret 511*c0909341SAndroid Build Coastguard Worker 512*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN flipadst, dct 513*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN flipadst, adst 514*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN flipadst, identity 515*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN flipadst, flipadst 516*c0909341SAndroid Build Coastguard Worker 517*c0909341SAndroid Build Coastguard Workercglobal iflipadst_8x8_internal_10bpc, 0, 7, 16, dst, stride, c, eob, tx2 518*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_10bpc).load 519*c0909341SAndroid Build Coastguard Worker vpermi2q m1, m6, m2 ; 7 5 520*c0909341SAndroid Build Coastguard Worker vpermi2q m3, m4, m0 ; 3 1 521*c0909341SAndroid Build Coastguard Worker vpermt2q m0, m5, m4 ; 0 2 522*c0909341SAndroid Build Coastguard Worker vpermt2q m2, m5, m6 ; 4 6 523*c0909341SAndroid Build Coastguard Worker call m(iadst_8x8_internal_10bpc).main 524*c0909341SAndroid Build Coastguard Worker punpckhdq m1, m3, m4 ; -out3 -out1 525*c0909341SAndroid Build Coastguard Worker punpckldq m3, m0 ; out2 out0 526*c0909341SAndroid Build Coastguard Worker punpckhdq m0, m2 ; -out7 -out5 527*c0909341SAndroid Build Coastguard Worker punpckldq m4, m2 ; out6 out4 528*c0909341SAndroid Build Coastguard Worker psubd m1, m11, m1 529*c0909341SAndroid Build Coastguard Worker paddd m3, m11 530*c0909341SAndroid Build Coastguard Worker psubd m0, m11, m0 531*c0909341SAndroid Build Coastguard Worker paddd m4, m11 532*c0909341SAndroid Build Coastguard Worker jmp m(iadst_8x8_internal_10bpc).pass1_end 533*c0909341SAndroid Build Coastguard Worker.pass2: 534*c0909341SAndroid Build Coastguard Worker call m(iadst_8x8_internal_10bpc).main_pass2 535*c0909341SAndroid Build Coastguard Worker movu m10, [permC+1] 536*c0909341SAndroid Build Coastguard Worker vbroadcasti32x8 m12, [pw_m2048_2048+16] 537*c0909341SAndroid Build Coastguard Worker lea r6, [strideq*3] 538*c0909341SAndroid Build Coastguard Worker vpermt2q m0, m10, m1 ; 7 6 5 4 539*c0909341SAndroid Build Coastguard Worker vpbroadcastd m11, [pixel_10bpc_max] 540*c0909341SAndroid Build Coastguard Worker vpermt2q m2, m10, m3 ; 3 2 1 0 541*c0909341SAndroid Build Coastguard Worker pxor m10, m10 542*c0909341SAndroid Build Coastguard Worker pmulhrsw m8, m12, m2 543*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_10bpc).write_8x4_start 544*c0909341SAndroid Build Coastguard Worker pmulhrsw m8, m12, m0 545*c0909341SAndroid Build Coastguard Worker jmp m(idct_8x8_internal_10bpc).write_8x4 546*c0909341SAndroid Build Coastguard Worker 547*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN identity, dct 548*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN identity, adst 549*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN identity, flipadst 550*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN identity, identity 551*c0909341SAndroid Build Coastguard Worker 552*c0909341SAndroid Build Coastguard Workercglobal iidentity_8x8_internal_10bpc, 0, 7, 16, dst, stride, c, eob, tx2 553*c0909341SAndroid Build Coastguard Worker mova m1, [cq+64*0] 554*c0909341SAndroid Build Coastguard Worker packssdw m1, [cq+64*2] ; 0 4 1 5 555*c0909341SAndroid Build Coastguard Worker mova m2, [cq+64*1] ; 2 6 3 7 556*c0909341SAndroid Build Coastguard Worker packssdw m2, [cq+64*3] 557*c0909341SAndroid Build Coastguard Worker mova m0, [o(idtx8x8p)] 558*c0909341SAndroid Build Coastguard Worker vpermb m1, m0, m1 559*c0909341SAndroid Build Coastguard Worker vpermb m2, m0, m2 560*c0909341SAndroid Build Coastguard Worker punpckldq m0, m1, m2 ; 0 1 4 5 561*c0909341SAndroid Build Coastguard Worker punpckhdq m1, m2 ; 2 3 6 7 562*c0909341SAndroid Build Coastguard Worker jmp tx2q 563*c0909341SAndroid Build Coastguard Worker.pass2: 564*c0909341SAndroid Build Coastguard Worker movu m3, [o(permC+2)] 565*c0909341SAndroid Build Coastguard Worker vpbroadcastd m12, [o(pw_4096)] 566*c0909341SAndroid Build Coastguard Worker psrlq m2, m3, 32 567*c0909341SAndroid Build Coastguard Worker vpermi2q m2, m0, m1 568*c0909341SAndroid Build Coastguard Worker vpermt2q m0, m3, m1 569*c0909341SAndroid Build Coastguard Worker jmp m(idct_8x8_internal_10bpc).end2 570*c0909341SAndroid Build Coastguard Worker 571*c0909341SAndroid Build Coastguard Worker%macro INV_TXFM_8X16_FN 2-3 0 ; type1, type2, eob_offset 572*c0909341SAndroid Build Coastguard Worker INV_TXFM_FN %1, %2, %3, 8x16 573*c0909341SAndroid Build Coastguard Worker%ifidn %1_%2, dct_dct 574*c0909341SAndroid Build Coastguard Worker imul r6d, [cq], 181 575*c0909341SAndroid Build Coastguard Worker mov [cq], eobd ; 0 576*c0909341SAndroid Build Coastguard Worker or r3d, 16 577*c0909341SAndroid Build Coastguard Worker add r6d, 128 578*c0909341SAndroid Build Coastguard Worker sar r6d, 8 579*c0909341SAndroid Build Coastguard Worker imul r6d, 181 580*c0909341SAndroid Build Coastguard Worker jmp m(inv_txfm_add_dct_dct_8x8_10bpc).dconly 581*c0909341SAndroid Build Coastguard Worker%endif 582*c0909341SAndroid Build Coastguard Worker%endmacro 583*c0909341SAndroid Build Coastguard Worker 584*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN dct, dct 585*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN dct, identity, 35 586*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN dct, flipadst 587*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN dct, adst 588*c0909341SAndroid Build Coastguard Worker 589*c0909341SAndroid Build Coastguard Workercglobal idct_8x16_internal_10bpc, 0, 7, 16, dst, stride, c, eob, tx2 590*c0909341SAndroid Build Coastguard Worker%undef cmp 591*c0909341SAndroid Build Coastguard Worker cmp eobd, 43 592*c0909341SAndroid Build Coastguard Worker jl .fast 593*c0909341SAndroid Build Coastguard Worker call .load 594*c0909341SAndroid Build Coastguard Worker call .main 595*c0909341SAndroid Build Coastguard Worker call .main_end 596*c0909341SAndroid Build Coastguard Worker.pass1_end: 597*c0909341SAndroid Build Coastguard Worker packssdw m0, m4 598*c0909341SAndroid Build Coastguard Worker packssdw m1, m5 599*c0909341SAndroid Build Coastguard Worker packssdw m2, m6 600*c0909341SAndroid Build Coastguard Worker packssdw m3, m7 601*c0909341SAndroid Build Coastguard Worker jmp tx2q 602*c0909341SAndroid Build Coastguard Worker.pass2: 603*c0909341SAndroid Build Coastguard Worker mova m8, [o(idct8x16p)] 604*c0909341SAndroid Build Coastguard Worker REPX {vpermb x, m8, x}, m0, m1, m2, m3 605*c0909341SAndroid Build Coastguard Worker punpckhdq m5, m0, m1 606*c0909341SAndroid Build Coastguard Worker punpckldq m0, m1 607*c0909341SAndroid Build Coastguard Worker punpckhdq m4, m2, m3 608*c0909341SAndroid Build Coastguard Worker punpckldq m2, m3 609*c0909341SAndroid Build Coastguard Worker punpcklqdq m8, m0, m2 ; 15 1 610*c0909341SAndroid Build Coastguard Worker punpckhqdq m0, m2 ; 7 9 611*c0909341SAndroid Build Coastguard Worker punpckhqdq m1, m5, m4 ; 3 13 612*c0909341SAndroid Build Coastguard Worker punpcklqdq m5, m4 ; 11 5 613*c0909341SAndroid Build Coastguard Worker lea r5, [o_base_8bpc] 614*c0909341SAndroid Build Coastguard Worker vextracti32x8 ym7, m8, 1 ; 14 2 615*c0909341SAndroid Build Coastguard Worker vextracti32x8 ym3, m0, 1 ; 6 10 616*c0909341SAndroid Build Coastguard Worker vextracti32x8 ym6, m1, 1 ; 12 4 617*c0909341SAndroid Build Coastguard Worker vextracti32x8 ym9, m5, 1 ; 8 0 618*c0909341SAndroid Build Coastguard Worker call m(idct_8x16_internal_8bpc).main2 619*c0909341SAndroid Build Coastguard Worker mova m8, [permC] 620*c0909341SAndroid Build Coastguard Worker vpbroadcastd m12, [pw_2048] 621*c0909341SAndroid Build Coastguard Worker vpermt2q m0, m8, m1 622*c0909341SAndroid Build Coastguard Worker lea r6, [strideq*3] 623*c0909341SAndroid Build Coastguard Worker vpermt2q m2, m8, m3 624*c0909341SAndroid Build Coastguard Worker vpbroadcastd m11, [pixel_10bpc_max] 625*c0909341SAndroid Build Coastguard Worker vpermt2q m4, m8, m5 626*c0909341SAndroid Build Coastguard Worker pxor m10, m10 627*c0909341SAndroid Build Coastguard Worker vpermt2q m6, m8, m7 628*c0909341SAndroid Build Coastguard Worker pmulhrsw m8, m12, m0 629*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_10bpc).write_8x4_start 630*c0909341SAndroid Build Coastguard Worker pmulhrsw m8, m12, m2 631*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_10bpc).write_8x4 632*c0909341SAndroid Build Coastguard Worker pmulhrsw m8, m12, m4 633*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_10bpc).write_8x4 634*c0909341SAndroid Build Coastguard Worker pmulhrsw m8, m12, m6 635*c0909341SAndroid Build Coastguard Worker jmp m(idct_8x8_internal_10bpc).write_8x4 636*c0909341SAndroid Build Coastguard Worker.fast: 637*c0909341SAndroid Build Coastguard Worker mova ym0, [cq+64*0] 638*c0909341SAndroid Build Coastguard Worker mova ym4, [cq+64*2] 639*c0909341SAndroid Build Coastguard Worker mova ym1, [cq+64*1] 640*c0909341SAndroid Build Coastguard Worker mova ym5, [cq+64*5] 641*c0909341SAndroid Build Coastguard Worker mova ym2, [cq+64*4] 642*c0909341SAndroid Build Coastguard Worker mova ym6, [cq+64*6] 643*c0909341SAndroid Build Coastguard Worker mova ym3, [cq+64*7] 644*c0909341SAndroid Build Coastguard Worker mova ym7, [cq+64*3] 645*c0909341SAndroid Build Coastguard Worker call .round_input_fast 646*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_10bpc).main 647*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_10bpc).main_end 648*c0909341SAndroid Build Coastguard Worker movu m6, [o(permC+3)] 649*c0909341SAndroid Build Coastguard Worker packssdw m3, m1, m3 650*c0909341SAndroid Build Coastguard Worker packssdw m1, m0, m2 651*c0909341SAndroid Build Coastguard Worker vprolq m3, 32 652*c0909341SAndroid Build Coastguard Worker vpermd m1, m6, m1 653*c0909341SAndroid Build Coastguard Worker vpermd m3, m6, m3 654*c0909341SAndroid Build Coastguard Worker mova ym0, ym1 ; 0 4 655*c0909341SAndroid Build Coastguard Worker vextracti32x8 ym1, m1, 1 ; 1 5 656*c0909341SAndroid Build Coastguard Worker mova ym2, ym3 ; 2 6 657*c0909341SAndroid Build Coastguard Worker vextracti32x8 ym3, m3, 1 ; 3 7 658*c0909341SAndroid Build Coastguard Worker jmp tx2q 659*c0909341SAndroid Build Coastguard WorkerALIGN function_align 660*c0909341SAndroid Build Coastguard Worker.round_input_fast: 661*c0909341SAndroid Build Coastguard Worker movshdup m8, [o(permB)] 662*c0909341SAndroid Build Coastguard Worker vpbroadcastd m12, [o(pd_2896)] 663*c0909341SAndroid Build Coastguard Worker vpermt2q m0, m8, m4 664*c0909341SAndroid Build Coastguard Worker vpermt2q m1, m8, m5 665*c0909341SAndroid Build Coastguard Worker vpermt2q m2, m8, m6 666*c0909341SAndroid Build Coastguard Worker vpermt2q m3, m8, m7 667*c0909341SAndroid Build Coastguard Worker vpbroadcastd m13, [o(pd_2048)] 668*c0909341SAndroid Build Coastguard Worker REPX {pmulld x, m12}, m0, m1, m2, m3 669*c0909341SAndroid Build Coastguard Worker vpbroadcastd m14, [o(clip_18b_min)] 670*c0909341SAndroid Build Coastguard Worker vpbroadcastd m15, [o(clip_18b_max)] 671*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m13}, m0, m1, m2, m3 672*c0909341SAndroid Build Coastguard Worker vpbroadcastd m11, [o(pd_1)] 673*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 12 }, m0, m1, m2, m3 674*c0909341SAndroid Build Coastguard Worker ret 675*c0909341SAndroid Build Coastguard WorkerALIGN function_align 676*c0909341SAndroid Build Coastguard Worker.load: 677*c0909341SAndroid Build Coastguard Worker vpbroadcastd m14, [o(clip_18b_min)] 678*c0909341SAndroid Build Coastguard Worker vpbroadcastd m15, [o(clip_18b_max)] 679*c0909341SAndroid Build Coastguard Worker.load2: 680*c0909341SAndroid Build Coastguard Worker vpbroadcastd m12, [o(pd_2896)] 681*c0909341SAndroid Build Coastguard Worker pmulld m0, m12, [cq+64*0] 682*c0909341SAndroid Build Coastguard Worker pmulld m1, m12, [cq+64*1] 683*c0909341SAndroid Build Coastguard Worker pmulld m2, m12, [cq+64*2] 684*c0909341SAndroid Build Coastguard Worker pmulld m3, m12, [cq+64*3] 685*c0909341SAndroid Build Coastguard Worker vpbroadcastd m13, [o(pd_2048)] 686*c0909341SAndroid Build Coastguard Worker pmulld m4, m12, [cq+64*4] 687*c0909341SAndroid Build Coastguard Worker pmulld m5, m12, [cq+64*5] 688*c0909341SAndroid Build Coastguard Worker pmulld m6, m12, [cq+64*6] 689*c0909341SAndroid Build Coastguard Worker pmulld m7, m12, [cq+64*7] 690*c0909341SAndroid Build Coastguard Worker.round: 691*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m13}, m0, m1, m2, m3 692*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 12 }, m0, m1, m2, m3 693*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m13}, m4, m5, m6, m7 694*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 12 }, m4, m5, m6, m7 695*c0909341SAndroid Build Coastguard Worker ret 696*c0909341SAndroid Build Coastguard WorkerALIGN function_align 697*c0909341SAndroid Build Coastguard Worker.main_fast2_rect2: 698*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m13}, m0, m1 699*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 12 }, m0, m1 700*c0909341SAndroid Build Coastguard Worker.main_fast2: 701*c0909341SAndroid Build Coastguard Worker pmulld m0, m12 702*c0909341SAndroid Build Coastguard Worker pmulld m6, m1, [o(pd_4017)] {1to16} ; t7a 703*c0909341SAndroid Build Coastguard Worker pmulld m8, m1, [o(pd_799)] {1to16} ; t4a 704*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m13}, m0, m6, m8 705*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 12 }, m0, m6, m8 706*c0909341SAndroid Build Coastguard Worker pmulld m5, m6, m12 707*c0909341SAndroid Build Coastguard Worker pmulld m1, m8, m12 708*c0909341SAndroid Build Coastguard Worker paddd m5, m13 709*c0909341SAndroid Build Coastguard Worker psubd m4, m5, m1 710*c0909341SAndroid Build Coastguard Worker paddd m5, m1 711*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 12 }, m4, m5 712*c0909341SAndroid Build Coastguard Worker REPX {mova x, m0 }, m1, m2, m3 713*c0909341SAndroid Build Coastguard Worker ret 714*c0909341SAndroid Build Coastguard Worker.main_fast_rect2: 715*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m13}, m0, m1, m2, m3 716*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 12 }, m0, m1, m2, m3 717*c0909341SAndroid Build Coastguard Worker.main_fast: 718*c0909341SAndroid Build Coastguard Worker pmulld m0, m12 719*c0909341SAndroid Build Coastguard Worker pmulld m5, m3, [o(pd_2276)] {1to16} ; t5a 720*c0909341SAndroid Build Coastguard Worker pmulld m3, [o(pd_3406)] {1to16} ; t6a 721*c0909341SAndroid Build Coastguard Worker pmulld m7, m1, [o(pd_4017)] {1to16} ; t7a 722*c0909341SAndroid Build Coastguard Worker pmulld m1, [o(pd_799)] {1to16} ; t4a 723*c0909341SAndroid Build Coastguard Worker pmulld m6, m2, [o(pd_3784)] {1to16} ; t3 724*c0909341SAndroid Build Coastguard Worker pmulld m2, [o(pd_1567)] {1to16} ; t2 725*c0909341SAndroid Build Coastguard Worker paddd m0, m13 726*c0909341SAndroid Build Coastguard Worker psubd m5, m13, m5 727*c0909341SAndroid Build Coastguard Worker psrad m0, 12 ; t0 728*c0909341SAndroid Build Coastguard Worker mova m9, m0 ; t1 729*c0909341SAndroid Build Coastguard Worker jmp .main2 730*c0909341SAndroid Build Coastguard Worker.main_rect2: 731*c0909341SAndroid Build Coastguard Worker call .round 732*c0909341SAndroid Build Coastguard Worker.main: 733*c0909341SAndroid Build Coastguard Worker pmulld m0, m12 734*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 5, 3, 8, 9, 10, _, 3406, 2276 ; t5a t6a 735*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 1, 7, 8, 9, 10, _, 799, 4017 ; t4a t7a 736*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 2, 6, 8, 9, 10, _, 1567, 3784 ; t2 t3 737*c0909341SAndroid Build Coastguard Worker pmulld m4, m12 738*c0909341SAndroid Build Coastguard Worker paddd m0, m13 739*c0909341SAndroid Build Coastguard Worker paddd m5, m13 740*c0909341SAndroid Build Coastguard Worker psubd m9, m0, m4 ; t1 741*c0909341SAndroid Build Coastguard Worker paddd m0, m4 ; t0 742*c0909341SAndroid Build Coastguard Worker psrad m9, 12 743*c0909341SAndroid Build Coastguard Worker psrad m0, 12 744*c0909341SAndroid Build Coastguard Worker.main2: 745*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m13}, m3, m1, m7 746*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 12 }, m5, m1, m3, m7 747*c0909341SAndroid Build Coastguard Worker paddd m8, m1, m5 ; t4 748*c0909341SAndroid Build Coastguard Worker psubd m1, m5 ; t5a 749*c0909341SAndroid Build Coastguard Worker psubd m5, m7, m3 ; t6a 750*c0909341SAndroid Build Coastguard Worker paddd m7, m3 ; t7 751*c0909341SAndroid Build Coastguard Worker pmaxsd m5, m14 752*c0909341SAndroid Build Coastguard Worker pmaxsd m1, m14 753*c0909341SAndroid Build Coastguard Worker paddd m2, m13 754*c0909341SAndroid Build Coastguard Worker paddd m6, m13 755*c0909341SAndroid Build Coastguard Worker pminsd m5, m15 756*c0909341SAndroid Build Coastguard Worker pminsd m1, m15 757*c0909341SAndroid Build Coastguard Worker pmulld m5, m12 758*c0909341SAndroid Build Coastguard Worker pmulld m1, m12 759*c0909341SAndroid Build Coastguard Worker pmaxsd m8, m14 760*c0909341SAndroid Build Coastguard Worker pmaxsd m7, m14 761*c0909341SAndroid Build Coastguard Worker pminsd m8, m15 762*c0909341SAndroid Build Coastguard Worker paddd m5, m13 763*c0909341SAndroid Build Coastguard Worker psubd m4, m5, m1 764*c0909341SAndroid Build Coastguard Worker paddd m5, m1 765*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 12 }, m2, m6, m5, m4 766*c0909341SAndroid Build Coastguard Worker paddd m1, m9, m2 ; dct4 out1 767*c0909341SAndroid Build Coastguard Worker psubd m2, m9, m2 ; dct4 out2 768*c0909341SAndroid Build Coastguard Worker psubd m3, m0, m6 ; dct4 out3 769*c0909341SAndroid Build Coastguard Worker paddd m0, m6 ; dct4 out0 770*c0909341SAndroid Build Coastguard Worker pminsd m6, m15, m7 771*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m14}, m0, m1, m2, m3 772*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m15}, m0, m1, m2, m3 773*c0909341SAndroid Build Coastguard Worker ret 774*c0909341SAndroid Build Coastguard Worker.main_end: 775*c0909341SAndroid Build Coastguard Worker vpbroadcastd m11, [o(pd_1)] 776*c0909341SAndroid Build Coastguard Worker.main_end2: 777*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m11}, m0, m1, m2, m3 778*c0909341SAndroid Build Coastguard Worker psubd m7, m0, m6 ; out7 779*c0909341SAndroid Build Coastguard Worker paddd m0, m6 ; out0 780*c0909341SAndroid Build Coastguard Worker psubd m6, m1, m5 ; out6 781*c0909341SAndroid Build Coastguard Worker paddd m1, m5 ; out1 782*c0909341SAndroid Build Coastguard Worker psubd m5, m2, m4 ; out5 783*c0909341SAndroid Build Coastguard Worker paddd m2, m4 ; out2 784*c0909341SAndroid Build Coastguard Worker psubd m4, m3, m8 ; out4 785*c0909341SAndroid Build Coastguard Worker paddd m3, m8 ; out3 786*c0909341SAndroid Build Coastguard Worker REPX {vpsravd x, m11}, m0, m1, m2, m3, m4, m5, m6, m7 787*c0909341SAndroid Build Coastguard Worker ret 788*c0909341SAndroid Build Coastguard Worker 789*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN adst, dct 790*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN adst, identity, 35 791*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN adst, flipadst 792*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN adst, adst 793*c0909341SAndroid Build Coastguard Worker 794*c0909341SAndroid Build Coastguard Workercglobal iadst_8x16_internal_10bpc, 0, 7, 16, dst, stride, c, eob, tx2 795*c0909341SAndroid Build Coastguard Worker%undef cmp 796*c0909341SAndroid Build Coastguard Worker cmp eobd, 43 797*c0909341SAndroid Build Coastguard Worker jl .fast 798*c0909341SAndroid Build Coastguard Worker call m(idct_8x16_internal_10bpc).load 799*c0909341SAndroid Build Coastguard Worker call .main 800*c0909341SAndroid Build Coastguard Worker psrad m0, 1 801*c0909341SAndroid Build Coastguard Worker psrad m1, 1 802*c0909341SAndroid Build Coastguard Worker psrad m6, m10, 1 803*c0909341SAndroid Build Coastguard Worker psrad m7, m11, 1 804*c0909341SAndroid Build Coastguard Worker psrad m2, 12 805*c0909341SAndroid Build Coastguard Worker psrad m3, 12 806*c0909341SAndroid Build Coastguard Worker psrad m4, m8, 12 807*c0909341SAndroid Build Coastguard Worker psrad m5, m9, 12 808*c0909341SAndroid Build Coastguard Worker jmp m(idct_8x16_internal_10bpc).pass1_end 809*c0909341SAndroid Build Coastguard Worker.fast: 810*c0909341SAndroid Build Coastguard Worker call .fast_main 811*c0909341SAndroid Build Coastguard Worker punpcklqdq m1, m2, m4 ; out4 out6 812*c0909341SAndroid Build Coastguard Worker punpckhqdq m2, m0 ; -out5 -out7 813*c0909341SAndroid Build Coastguard Worker punpcklqdq m0, m3 ; out0 out2 814*c0909341SAndroid Build Coastguard Worker punpckhqdq m4, m3 ; -out1 -out3 815*c0909341SAndroid Build Coastguard Worker paddd m1, m11 816*c0909341SAndroid Build Coastguard Worker psubd m3, m11, m2 817*c0909341SAndroid Build Coastguard Worker paddd m0, m11 818*c0909341SAndroid Build Coastguard Worker psubd m4, m11, m4 819*c0909341SAndroid Build Coastguard Worker.fast_end: 820*c0909341SAndroid Build Coastguard Worker movu m5, [o(permC+3)] 821*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 1}, m1, m0, m3, m4 822*c0909341SAndroid Build Coastguard Worker packssdw m2, m0, m1 ; 0 2 4 6 823*c0909341SAndroid Build Coastguard Worker packssdw m3, m4, m3 ; 1 3 5 7 824*c0909341SAndroid Build Coastguard Worker vpermd m2, m5, m2 825*c0909341SAndroid Build Coastguard Worker vpermd m3, m5, m3 826*c0909341SAndroid Build Coastguard Worker mova ym0, ym2 827*c0909341SAndroid Build Coastguard Worker vextracti32x8 ym2, m2, 1 828*c0909341SAndroid Build Coastguard Worker mova ym1, ym3 829*c0909341SAndroid Build Coastguard Worker vextracti32x8 ym3, m3, 1 830*c0909341SAndroid Build Coastguard Worker jmp tx2q 831*c0909341SAndroid Build Coastguard Worker.pass2: 832*c0909341SAndroid Build Coastguard Worker call .pass2_main 833*c0909341SAndroid Build Coastguard Worker movu m4, [permB+2] 834*c0909341SAndroid Build Coastguard Worker vbroadcasti32x8 m12, [pw_2048_m2048+16] 835*c0909341SAndroid Build Coastguard Worker psrlq m7, m4, 8 836*c0909341SAndroid Build Coastguard Worker vpermi2q m4, m0, m3 ; 0 1 2 3 837*c0909341SAndroid Build Coastguard Worker psrlq m5, m7, 24 838*c0909341SAndroid Build Coastguard Worker vpermi2q m7, m0, m3 ; 12 13 14 15 839*c0909341SAndroid Build Coastguard Worker psrlq m6, m5, 8 840*c0909341SAndroid Build Coastguard Worker vpermq m5, m5, m1 ; 4 5 6 7 841*c0909341SAndroid Build Coastguard Worker vpermq m6, m6, m2 ; 8 9 10 11 842*c0909341SAndroid Build Coastguard Worker.pass2_end: 843*c0909341SAndroid Build Coastguard Worker vpbroadcastd m11, [pixel_10bpc_max] 844*c0909341SAndroid Build Coastguard Worker pxor m10, m10 845*c0909341SAndroid Build Coastguard Worker lea r6, [strideq*3] 846*c0909341SAndroid Build Coastguard Worker pmulhrsw m8, m12, m4 847*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_10bpc).write_8x4_start 848*c0909341SAndroid Build Coastguard Worker pmulhrsw m8, m12, m5 849*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_10bpc).write_8x4 850*c0909341SAndroid Build Coastguard Worker pmulhrsw m8, m12, m6 851*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_10bpc).write_8x4 852*c0909341SAndroid Build Coastguard Worker pmulhrsw m8, m12, m7 853*c0909341SAndroid Build Coastguard Worker jmp m(idct_8x8_internal_10bpc).write_8x4 854*c0909341SAndroid Build Coastguard WorkerALIGN function_align 855*c0909341SAndroid Build Coastguard Worker.main: 856*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 7, 0, 8, 9, 10, 13, 401, 4076 ; t1a, t0a 857*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 1, 6, 8, 9, 10, 13, 3920, 1189 ; t7a, t6a 858*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 5, 2, 8, 9, 10, 13, 1931, 3612 ; t3a, t2a 859*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 3, 4, 8, 9, 10, 13, 3166, 2598 ; t5a, t4a 860*c0909341SAndroid Build Coastguard Worker psubd m8, m2, m6 ; t6 861*c0909341SAndroid Build Coastguard Worker paddd m2, m6 ; t2 862*c0909341SAndroid Build Coastguard Worker psubd m6, m0, m4 ; t4 863*c0909341SAndroid Build Coastguard Worker paddd m0, m4 ; t0 864*c0909341SAndroid Build Coastguard Worker psubd m4, m5, m1 ; t7 865*c0909341SAndroid Build Coastguard Worker paddd m5, m1 ; t3 866*c0909341SAndroid Build Coastguard Worker psubd m1, m7, m3 ; t5 867*c0909341SAndroid Build Coastguard Worker paddd m7, m3 ; t1 868*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m14}, m6, m1, m8, m4, m2, m0, m5, m7 869*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m15}, m6, m1, m8, m4, m2, m0, m5, m7 870*c0909341SAndroid Build Coastguard Worker vpbroadcastd m10, [o(pd_1567)] 871*c0909341SAndroid Build Coastguard Worker vpbroadcastd m11, [o(pd_3784)] 872*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 6, 1, 3, 9, _, 13, 10, 11 ; t5a, t4a 873*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 4, 8, 3, 9, _, 13, 11, 10 ; t6a, t7a 874*c0909341SAndroid Build Coastguard Worker vpbroadcastd m12, [o(pd_1448)] 875*c0909341SAndroid Build Coastguard Worker psubd m9, m6, m8 ; t7 876*c0909341SAndroid Build Coastguard Worker paddd m6, m8 ; out6 877*c0909341SAndroid Build Coastguard Worker psubd m3, m7, m5 ; t3 878*c0909341SAndroid Build Coastguard Worker paddd m7, m5 ; -out7 879*c0909341SAndroid Build Coastguard Worker psubd m5, m0, m2 ; t2 880*c0909341SAndroid Build Coastguard Worker paddd m0, m2 ; out0 881*c0909341SAndroid Build Coastguard Worker psubd m2, m1, m4 ; t6 882*c0909341SAndroid Build Coastguard Worker paddd m1, m4 ; -out1 883*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m14}, m5, m3, m2, m9 884*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m15}, m5, m3, m2, m9 885*c0909341SAndroid Build Coastguard Worker REPX {pmulld x, m12}, m5, m3, m2, m9 886*c0909341SAndroid Build Coastguard Worker vpbroadcastd m4, [o(pd_1)] 887*c0909341SAndroid Build Coastguard Worker psubd m8, m5, m3 ; (t2 - t3) * 1448 888*c0909341SAndroid Build Coastguard Worker paddd m3, m5 ; (t2 + t3) * 1448 889*c0909341SAndroid Build Coastguard Worker psubd m5, m2, m9 ; (t6 - t7) * 1448 890*c0909341SAndroid Build Coastguard Worker paddd m2, m9 ; (t6 + t7) * 1448 891*c0909341SAndroid Build Coastguard Worker vpbroadcastd m9, [o(pd_3072)] 892*c0909341SAndroid Build Coastguard Worker paddd m0, m4 893*c0909341SAndroid Build Coastguard Worker psubd m1, m4, m1 894*c0909341SAndroid Build Coastguard Worker paddd m10, m6, m4 895*c0909341SAndroid Build Coastguard Worker psubd m11, m4, m7 896*c0909341SAndroid Build Coastguard Worker paddd m2, m9 897*c0909341SAndroid Build Coastguard Worker paddd m8, m9 898*c0909341SAndroid Build Coastguard Worker vpbroadcastd m9, [o(pd_3071)] 899*c0909341SAndroid Build Coastguard Worker psubd m3, m9, m3 900*c0909341SAndroid Build Coastguard Worker psubd m9, m5 901*c0909341SAndroid Build Coastguard Worker ret 902*c0909341SAndroid Build Coastguard WorkerALIGN function_align 903*c0909341SAndroid Build Coastguard Worker.fast_main: 904*c0909341SAndroid Build Coastguard Worker mova ym0, [cq+64*0] 905*c0909341SAndroid Build Coastguard Worker mova ym4, [cq+64*2] 906*c0909341SAndroid Build Coastguard Worker mova ym1, [cq+64*7] 907*c0909341SAndroid Build Coastguard Worker mova ym5, [cq+64*5] 908*c0909341SAndroid Build Coastguard Worker mova ym2, [cq+64*4] 909*c0909341SAndroid Build Coastguard Worker mova ym6, [cq+64*6] 910*c0909341SAndroid Build Coastguard Worker mova ym3, [cq+64*3] 911*c0909341SAndroid Build Coastguard Worker mova ym7, [cq+64*1] 912*c0909341SAndroid Build Coastguard Worker call m(idct_8x16_internal_10bpc).round_input_fast 913*c0909341SAndroid Build Coastguard Worker jmp m(iadst_8x8_internal_10bpc).main 914*c0909341SAndroid Build Coastguard WorkerALIGN function_align 915*c0909341SAndroid Build Coastguard Worker.pass2_main: 916*c0909341SAndroid Build Coastguard Worker mova m8, [o(iadst8x16p)] 917*c0909341SAndroid Build Coastguard Worker REPX {vpermb x, m8, x}, m0, m1, m2, m3 918*c0909341SAndroid Build Coastguard Worker vpbroadcastd m10, [o(pw_2896x8)] 919*c0909341SAndroid Build Coastguard Worker punpckhdq m5, m0, m1 920*c0909341SAndroid Build Coastguard Worker punpckldq m0, m1 921*c0909341SAndroid Build Coastguard Worker punpckhdq m1, m2, m3 922*c0909341SAndroid Build Coastguard Worker punpckldq m2, m3 923*c0909341SAndroid Build Coastguard Worker lea r5, [o_base_8bpc] 924*c0909341SAndroid Build Coastguard Worker punpckhqdq m4, m0, m2 ; 12 3 14 1 925*c0909341SAndroid Build Coastguard Worker punpcklqdq m0, m2 ; 0 15 2 13 926*c0909341SAndroid Build Coastguard Worker punpckhqdq m6, m5, m1 ; 8 7 10 5 927*c0909341SAndroid Build Coastguard Worker punpcklqdq m5, m1 ; 4 11 6 9 928*c0909341SAndroid Build Coastguard Worker call m(iadst_8x16_internal_8bpc).main2 929*c0909341SAndroid Build Coastguard Worker paddsw m1, m2, m4 930*c0909341SAndroid Build Coastguard Worker psubsw m2, m4 931*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m10 ; -out7 out4 out6 -out5 932*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m10 ; out8 -out11 -out9 out10 933*c0909341SAndroid Build Coastguard Worker ret 934*c0909341SAndroid Build Coastguard Worker 935*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN flipadst, dct 936*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN flipadst, identity, 35 937*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN flipadst, adst 938*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN flipadst, flipadst 939*c0909341SAndroid Build Coastguard Worker 940*c0909341SAndroid Build Coastguard Workercglobal iflipadst_8x16_internal_10bpc, 0, 7, 16, dst, stride, c, eob, tx2 941*c0909341SAndroid Build Coastguard Worker%undef cmp 942*c0909341SAndroid Build Coastguard Worker cmp eobd, 43 943*c0909341SAndroid Build Coastguard Worker jl .fast 944*c0909341SAndroid Build Coastguard Worker call m(idct_8x16_internal_10bpc).load 945*c0909341SAndroid Build Coastguard Worker call m(iadst_8x16_internal_10bpc).main 946*c0909341SAndroid Build Coastguard Worker psrad m7, m0, 1 947*c0909341SAndroid Build Coastguard Worker psrad m0, m11, 1 948*c0909341SAndroid Build Coastguard Worker psrad m6, m1, 1 949*c0909341SAndroid Build Coastguard Worker psrad m1, m10, 1 950*c0909341SAndroid Build Coastguard Worker psrad m5, m2, 12 951*c0909341SAndroid Build Coastguard Worker psrad m2, m9, 12 952*c0909341SAndroid Build Coastguard Worker psrad m4, m3, 12 953*c0909341SAndroid Build Coastguard Worker psrad m3, m8, 12 954*c0909341SAndroid Build Coastguard Worker jmp m(idct_8x16_internal_10bpc).pass1_end 955*c0909341SAndroid Build Coastguard Worker.fast: 956*c0909341SAndroid Build Coastguard Worker call m(iadst_8x16_internal_10bpc).fast_main 957*c0909341SAndroid Build Coastguard Worker punpckhqdq m1, m3, m4 ; -out3 -out1 958*c0909341SAndroid Build Coastguard Worker punpcklqdq m3, m0 ; out2 out0 959*c0909341SAndroid Build Coastguard Worker punpckhqdq m0, m2 ; -out7 -out5 960*c0909341SAndroid Build Coastguard Worker punpcklqdq m4, m2 ; out6 out4 961*c0909341SAndroid Build Coastguard Worker psubd m1, m11, m1 962*c0909341SAndroid Build Coastguard Worker paddd m3, m11 963*c0909341SAndroid Build Coastguard Worker psubd m0, m11, m0 964*c0909341SAndroid Build Coastguard Worker paddd m4, m11 965*c0909341SAndroid Build Coastguard Worker jmp m(iadst_8x16_internal_10bpc).fast_end 966*c0909341SAndroid Build Coastguard Worker.pass2: 967*c0909341SAndroid Build Coastguard Worker call m(iadst_8x16_internal_10bpc).pass2_main 968*c0909341SAndroid Build Coastguard Worker movu m7, [permB+2] 969*c0909341SAndroid Build Coastguard Worker vbroadcasti32x8 m12, [pw_m2048_2048+16] 970*c0909341SAndroid Build Coastguard Worker psrlq m4, m7, 8 971*c0909341SAndroid Build Coastguard Worker vpermi2q m7, m3, m0 ; 3 2 1 0 972*c0909341SAndroid Build Coastguard Worker psrlq m5, m4, 24 973*c0909341SAndroid Build Coastguard Worker vpermi2q m4, m3, m0 ; 15 14 13 12 974*c0909341SAndroid Build Coastguard Worker psrlq m6, m5, 8 975*c0909341SAndroid Build Coastguard Worker vpermq m5, m5, m2 ; 11 10 9 8 976*c0909341SAndroid Build Coastguard Worker vpermq m6, m6, m1 ; 7 6 5 4 977*c0909341SAndroid Build Coastguard Worker jmp m(iadst_8x16_internal_10bpc).pass2_end 978*c0909341SAndroid Build Coastguard Worker 979*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN identity, dct 980*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN identity, adst 981*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN identity, flipadst 982*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN identity, identity 983*c0909341SAndroid Build Coastguard Worker 984*c0909341SAndroid Build Coastguard Workercglobal iidentity_8x16_internal_10bpc, 0, 7, 16, dst, stride, c, eob, tx2 985*c0909341SAndroid Build Coastguard Worker call m(idct_8x16_internal_10bpc).load2 986*c0909341SAndroid Build Coastguard Worker jmp m(idct_8x16_internal_10bpc).pass1_end 987*c0909341SAndroid Build Coastguard Worker.pass2: 988*c0909341SAndroid Build Coastguard Worker vpbroadcastd m8, [o(pw_1697x16)] 989*c0909341SAndroid Build Coastguard Worker pmulhrsw m4, m8, m0 990*c0909341SAndroid Build Coastguard Worker pmulhrsw m5, m8, m1 991*c0909341SAndroid Build Coastguard Worker pmulhrsw m6, m8, m2 992*c0909341SAndroid Build Coastguard Worker pmulhrsw m7, m8, m3 993*c0909341SAndroid Build Coastguard Worker REPX {paddsw x, x}, m0, m1, m2, m3 994*c0909341SAndroid Build Coastguard Worker paddsw m0, m4 995*c0909341SAndroid Build Coastguard Worker paddsw m1, m5 996*c0909341SAndroid Build Coastguard Worker paddsw m2, m6 997*c0909341SAndroid Build Coastguard Worker paddsw m3, m7 998*c0909341SAndroid Build Coastguard Worker vpbroadcastd m7, [o(pw_2048)] 999*c0909341SAndroid Build Coastguard Worker punpckhwd m4, m0, m1 1000*c0909341SAndroid Build Coastguard Worker punpcklwd m0, m1 1001*c0909341SAndroid Build Coastguard Worker punpckhwd m1, m2, m3 1002*c0909341SAndroid Build Coastguard Worker punpcklwd m2, m3 1003*c0909341SAndroid Build Coastguard Worker vpbroadcastd m6, [o(pixel_10bpc_max)] 1004*c0909341SAndroid Build Coastguard Worker punpckhdq m3, m0, m2 1005*c0909341SAndroid Build Coastguard Worker punpckldq m0, m2 1006*c0909341SAndroid Build Coastguard Worker punpckldq m2, m4, m1 1007*c0909341SAndroid Build Coastguard Worker punpckhdq m4, m1 1008*c0909341SAndroid Build Coastguard Worker pxor m5, m5 1009*c0909341SAndroid Build Coastguard Worker punpckhqdq m1, m0, m2 ; 1 5 9 13 1010*c0909341SAndroid Build Coastguard Worker punpcklqdq m0, m2 ; 0 4 8 12 1011*c0909341SAndroid Build Coastguard Worker punpcklqdq m2, m3, m4 ; 2 6 10 14 1012*c0909341SAndroid Build Coastguard Worker punpckhqdq m3, m4 ; 3 7 11 15 1013*c0909341SAndroid Build Coastguard Worker lea r6, [strideq*3] 1014*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m7 1015*c0909341SAndroid Build Coastguard Worker call .write_8x4_start 1016*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m7, m1 1017*c0909341SAndroid Build Coastguard Worker call .write_8x4 1018*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m7, m2 1019*c0909341SAndroid Build Coastguard Worker call .write_8x4 1020*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m7, m3 1021*c0909341SAndroid Build Coastguard Worker.write_8x4: 1022*c0909341SAndroid Build Coastguard Worker add dstq, strideq 1023*c0909341SAndroid Build Coastguard Worker add cq, 64*2 1024*c0909341SAndroid Build Coastguard Worker.write_8x4_start: 1025*c0909341SAndroid Build Coastguard Worker mova xm4, [dstq+strideq*0] 1026*c0909341SAndroid Build Coastguard Worker vinserti32x4 ym4, [dstq+strideq*4], 1 1027*c0909341SAndroid Build Coastguard Worker vinserti32x4 m4, [dstq+strideq*8], 2 1028*c0909341SAndroid Build Coastguard Worker vinserti32x4 m4, [dstq+r6*4 ], 3 1029*c0909341SAndroid Build Coastguard Worker mova [cq+64*0], m5 1030*c0909341SAndroid Build Coastguard Worker mova [cq+64*1], m5 1031*c0909341SAndroid Build Coastguard Worker paddw m4, m0 1032*c0909341SAndroid Build Coastguard Worker pmaxsw m4, m5 1033*c0909341SAndroid Build Coastguard Worker pminsw m4, m6 1034*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*0], xm4 1035*c0909341SAndroid Build Coastguard Worker vextracti32x4 [dstq+strideq*4], ym4, 1 1036*c0909341SAndroid Build Coastguard Worker vextracti32x4 [dstq+strideq*8], m4, 2 1037*c0909341SAndroid Build Coastguard Worker vextracti32x4 [dstq+r6*4 ], m4, 3 1038*c0909341SAndroid Build Coastguard Worker ret 1039*c0909341SAndroid Build Coastguard Worker 1040*c0909341SAndroid Build Coastguard Worker%macro INV_TXFM_16X8_FN 2-3 0 ; type1, type2, eob_offset 1041*c0909341SAndroid Build Coastguard Worker INV_TXFM_FN %1, %2, %3, 16x8 1042*c0909341SAndroid Build Coastguard Worker%ifidn %1_%2, dct_dct 1043*c0909341SAndroid Build Coastguard Worker imul r6d, [cq], 181 1044*c0909341SAndroid Build Coastguard Worker mov [cq], eobd ; 0 1045*c0909341SAndroid Build Coastguard Worker or r3d, 8 1046*c0909341SAndroid Build Coastguard Worker.dconly: 1047*c0909341SAndroid Build Coastguard Worker add r6d, 128 1048*c0909341SAndroid Build Coastguard Worker sar r6d, 8 1049*c0909341SAndroid Build Coastguard Worker imul r6d, 181 1050*c0909341SAndroid Build Coastguard Worker add r6d, 384 1051*c0909341SAndroid Build Coastguard Worker sar r6d, 9 1052*c0909341SAndroid Build Coastguard Worker.dconly2: 1053*c0909341SAndroid Build Coastguard Worker vpbroadcastd m2, [o(dconly_10bpc)] 1054*c0909341SAndroid Build Coastguard Worker imul r6d, 181 1055*c0909341SAndroid Build Coastguard Worker add r6d, 2176 1056*c0909341SAndroid Build Coastguard Worker sar r6d, 12 1057*c0909341SAndroid Build Coastguard Worker vpbroadcastw m1, r6d 1058*c0909341SAndroid Build Coastguard Worker paddsw m1, m2 1059*c0909341SAndroid Build Coastguard Worker.dconly_loop: 1060*c0909341SAndroid Build Coastguard Worker mova ym0, [dstq+strideq*0] 1061*c0909341SAndroid Build Coastguard Worker vinserti32x8 m0, [dstq+strideq*1], 1 1062*c0909341SAndroid Build Coastguard Worker paddsw m0, m1 1063*c0909341SAndroid Build Coastguard Worker psubusw m0, m2 1064*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*0], ym0 1065*c0909341SAndroid Build Coastguard Worker vextracti32x8 [dstq+strideq*1], m0, 1 1066*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+strideq*2] 1067*c0909341SAndroid Build Coastguard Worker sub r3d, 2 1068*c0909341SAndroid Build Coastguard Worker jg .dconly_loop 1069*c0909341SAndroid Build Coastguard Worker RET 1070*c0909341SAndroid Build Coastguard Worker%endif 1071*c0909341SAndroid Build Coastguard Worker%endmacro 1072*c0909341SAndroid Build Coastguard Worker 1073*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN dct, dct 1074*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN dct, identity, -21 1075*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN dct, flipadst 1076*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN dct, adst 1077*c0909341SAndroid Build Coastguard Worker 1078*c0909341SAndroid Build Coastguard Workercglobal idct_16x8_internal_10bpc, 0, 7, 16, dst, stride, c, eob, tx2 1079*c0909341SAndroid Build Coastguard Worker%undef cmp 1080*c0909341SAndroid Build Coastguard Worker vpbroadcastd m12, [o(pd_2896)] 1081*c0909341SAndroid Build Coastguard Worker pmulld m4, m12, [cq+64*0] ; 0 1 1082*c0909341SAndroid Build Coastguard Worker pmulld m9, m12, [cq+64*1] ; 2 3 1083*c0909341SAndroid Build Coastguard Worker pmulld m8, m12, [cq+64*2] ; 4 5 1084*c0909341SAndroid Build Coastguard Worker pmulld m7, m12, [cq+64*3] ; 6 7 1085*c0909341SAndroid Build Coastguard Worker vpbroadcastd m13, [o(pd_2048)] 1086*c0909341SAndroid Build Coastguard Worker pxor m2, m2 1087*c0909341SAndroid Build Coastguard Worker mova m15, [o(permB)] 1088*c0909341SAndroid Build Coastguard Worker REPX {mova [cq+64*x], m2}, 0, 1, 2, 3 1089*c0909341SAndroid Build Coastguard Worker psrlq m0, m15, 32 1090*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m13}, m4, m9, m8, m7 1091*c0909341SAndroid Build Coastguard Worker vpbroadcastd m14, [o(clip_18b_min)] 1092*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 12 }, m4, m8, m9, m7 1093*c0909341SAndroid Build Coastguard Worker mova m1, m0 1094*c0909341SAndroid Build Coastguard Worker vpermi2q m0, m4, m8 ; 0 4 1095*c0909341SAndroid Build Coastguard Worker cmp eobd, 43 1096*c0909341SAndroid Build Coastguard Worker jl .fast 1097*c0909341SAndroid Build Coastguard Worker pmulld m5, m12, [cq+64*4] ; 8 9 1098*c0909341SAndroid Build Coastguard Worker pmulld m10, m12, [cq+64*5] ; 10 11 1099*c0909341SAndroid Build Coastguard Worker pmulld m11, m12, [cq+64*6] ; 12 13 1100*c0909341SAndroid Build Coastguard Worker pmulld m6, m12, [cq+64*7] ; 14 15 1101*c0909341SAndroid Build Coastguard Worker REPX {mova [cq+64*x], m2}, 4, 5, 6, 7 1102*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m13}, m5, m10, m11, m6 1103*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 12 }, m10, m5, m11, m6 1104*c0909341SAndroid Build Coastguard Worker mova m2, m1 1105*c0909341SAndroid Build Coastguard Worker vpermi2q m1, m9, m10 ; 2 10 1106*c0909341SAndroid Build Coastguard Worker mova m3, m2 1107*c0909341SAndroid Build Coastguard Worker vpermi2q m2, m5, m11 ; 8 12 1108*c0909341SAndroid Build Coastguard Worker vpermi2q m3, m6, m7 ; 14 6 1109*c0909341SAndroid Build Coastguard Worker vpermt2q m4, m15, m11 ; 1 13 1110*c0909341SAndroid Build Coastguard Worker vpermt2q m6, m15, m9 ; 15 3 1111*c0909341SAndroid Build Coastguard Worker vpermt2q m5, m15, m8 ; 9 5 1112*c0909341SAndroid Build Coastguard Worker vpermt2q m7, m15, m10 ; 7 11 1113*c0909341SAndroid Build Coastguard Worker vpbroadcastd m15, [o(clip_18b_max)] 1114*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_10bpc).main 1115*c0909341SAndroid Build Coastguard Worker call .main 1116*c0909341SAndroid Build Coastguard Worker jmp .pass1_end 1117*c0909341SAndroid Build Coastguard Worker.fast: 1118*c0909341SAndroid Build Coastguard Worker vpermi2q m1, m9, m7 ; 2 6 1119*c0909341SAndroid Build Coastguard Worker vpermt2q m4, m15, m9 ; 1 3 1120*c0909341SAndroid Build Coastguard Worker vpermt2q m7, m15, m8 ; 7 5 1121*c0909341SAndroid Build Coastguard Worker vpbroadcastd m15, [o(clip_18b_max)] 1122*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_10bpc).main_fast 1123*c0909341SAndroid Build Coastguard Worker call .main_fast 1124*c0909341SAndroid Build Coastguard Worker.pass1_end: 1125*c0909341SAndroid Build Coastguard Worker call m(idct_8x16_internal_10bpc).main_end 1126*c0909341SAndroid Build Coastguard Worker mova m8, [o(permA)] 1127*c0909341SAndroid Build Coastguard Worker psrlq m9, m8, 8 1128*c0909341SAndroid Build Coastguard Worker.pass1_end2: 1129*c0909341SAndroid Build Coastguard Worker mova m10, m9 1130*c0909341SAndroid Build Coastguard Worker mova m11, m8 1131*c0909341SAndroid Build Coastguard Worker call .transpose_16x8 1132*c0909341SAndroid Build Coastguard Worker jmp tx2q 1133*c0909341SAndroid Build Coastguard Worker.pass2: 1134*c0909341SAndroid Build Coastguard Worker lea r5, [o_base_8bpc] 1135*c0909341SAndroid Build Coastguard Worker call m(idct_16x8_internal_8bpc).main 1136*c0909341SAndroid Build Coastguard Worker movshdup m4, [permC] 1137*c0909341SAndroid Build Coastguard Worker vpbroadcastd m11, [pw_2048] 1138*c0909341SAndroid Build Coastguard Worker psrlq m5, m4, 8 1139*c0909341SAndroid Build Coastguard Worker.end: 1140*c0909341SAndroid Build Coastguard Worker vpbroadcastd m13, [pixel_10bpc_max] 1141*c0909341SAndroid Build Coastguard Worker pxor m12, m12 1142*c0909341SAndroid Build Coastguard Worker vpermq m8, m4, m0 1143*c0909341SAndroid Build Coastguard Worker vpermq m9, m5, m1 1144*c0909341SAndroid Build Coastguard Worker lea r6, [strideq*3] 1145*c0909341SAndroid Build Coastguard Worker call .write_16x4 1146*c0909341SAndroid Build Coastguard Worker vpermq m8, m4, m2 1147*c0909341SAndroid Build Coastguard Worker vpermq m9, m5, m3 1148*c0909341SAndroid Build Coastguard Worker.write_16x4: 1149*c0909341SAndroid Build Coastguard Worker pmulhrsw m8, m11 1150*c0909341SAndroid Build Coastguard Worker pmulhrsw m9, m11 1151*c0909341SAndroid Build Coastguard Worker.write_16x4_noround: 1152*c0909341SAndroid Build Coastguard Worker mova ym10, [dstq+strideq*0] 1153*c0909341SAndroid Build Coastguard Worker vinserti32x8 m10, [dstq+strideq*1], 1 1154*c0909341SAndroid Build Coastguard Worker paddw m8, m10 1155*c0909341SAndroid Build Coastguard Worker mova ym10, [dstq+strideq*2] 1156*c0909341SAndroid Build Coastguard Worker vinserti32x8 m10, [dstq+r6 ], 1 1157*c0909341SAndroid Build Coastguard Worker paddw m9, m10 1158*c0909341SAndroid Build Coastguard Worker pmaxsw m8, m12 1159*c0909341SAndroid Build Coastguard Worker pmaxsw m9, m12 1160*c0909341SAndroid Build Coastguard Worker pminsw m8, m13 1161*c0909341SAndroid Build Coastguard Worker pminsw m9, m13 1162*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*0], ym8 1163*c0909341SAndroid Build Coastguard Worker vextracti32x8 [dstq+strideq*1], m8, 1 1164*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*2], ym9 1165*c0909341SAndroid Build Coastguard Worker vextracti32x8 [dstq+r6 ], m9, 1 1166*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+strideq*4] 1167*c0909341SAndroid Build Coastguard Worker ret 1168*c0909341SAndroid Build Coastguard WorkerALIGN function_align 1169*c0909341SAndroid Build Coastguard Worker.main_fast: ; bottom half is zero 1170*c0909341SAndroid Build Coastguard Worker vbroadcasti32x4 m6, [o(pd_4076_3920)] 1171*c0909341SAndroid Build Coastguard Worker vbroadcasti32x4 m3, [o(pd_401_m1189)] 1172*c0909341SAndroid Build Coastguard Worker vbroadcasti32x4 m5, [o(pd_m2598_1931)] 1173*c0909341SAndroid Build Coastguard Worker vbroadcasti32x4 m9, [o(pd_3166_3612)] 1174*c0909341SAndroid Build Coastguard Worker pmulld m6, m4 ; t15a t12a 1175*c0909341SAndroid Build Coastguard Worker pmulld m4, m3 ; t8a t11a 1176*c0909341SAndroid Build Coastguard Worker pmulld m5, m7 ; t9a t10a 1177*c0909341SAndroid Build Coastguard Worker pmulld m7, m9 ; t14a t13a 1178*c0909341SAndroid Build Coastguard Worker jmp .main2 1179*c0909341SAndroid Build Coastguard Worker.main: 1180*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 4, 6, 3, 9, 10, _, 401_3920, 4076_1189 1181*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 5, 7, 3, 9, 10, _, 3166_1931, 2598_3612 1182*c0909341SAndroid Build Coastguard Worker.main2: 1183*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m13}, m4, m6, m5, m7 1184*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 12 }, m4, m5, m6, m7 1185*c0909341SAndroid Build Coastguard Worker paddd m9, m4, m5 ; t8 t11 1186*c0909341SAndroid Build Coastguard Worker psubd m4, m5 ; t9 t10 1187*c0909341SAndroid Build Coastguard Worker psubd m5, m6, m7 ; t14 t13 1188*c0909341SAndroid Build Coastguard Worker paddd m6, m7 ; t15 t12 1189*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m14}, m5, m4, m9, m6 1190*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m15}, m5, m4, m9, m6 1191*c0909341SAndroid Build Coastguard Worker.main3: 1192*c0909341SAndroid Build Coastguard Worker psubd m3, m0, m1 ; dct8 out7 out6 1193*c0909341SAndroid Build Coastguard Worker paddd m0, m1 ; dct8 out0 out1 1194*c0909341SAndroid Build Coastguard Worker vbroadcasti32x4 m7, [o(pd_3784_m3784)] 1195*c0909341SAndroid Build Coastguard Worker pmulld m7, m5 1196*c0909341SAndroid Build Coastguard Worker vpmulld m5, [o(pd_1567)] {1to16} 1197*c0909341SAndroid Build Coastguard Worker paddd m1, m2, m8 ; dct8 out3 out2 1198*c0909341SAndroid Build Coastguard Worker psubd m2, m8 ; dct8 out4 out5 1199*c0909341SAndroid Build Coastguard Worker vbroadcasti32x4 m8, [o(pd_1567_m1567)] 1200*c0909341SAndroid Build Coastguard Worker pmulld m8, m4 1201*c0909341SAndroid Build Coastguard Worker vpmulld m4, [o(pd_3784)] {1to16} 1202*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m14}, m0, m1 1203*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m15}, m0, m1 1204*c0909341SAndroid Build Coastguard Worker paddd m7, m13 1205*c0909341SAndroid Build Coastguard Worker paddd m5, m13 1206*c0909341SAndroid Build Coastguard Worker paddd m7, m8 1207*c0909341SAndroid Build Coastguard Worker psubd m5, m4 1208*c0909341SAndroid Build Coastguard Worker psrad m7, 12 ; t14a t10a 1209*c0909341SAndroid Build Coastguard Worker psrad m5, 12 ; t9a t13a 1210*c0909341SAndroid Build Coastguard Worker punpckhqdq m4, m9, m7 1211*c0909341SAndroid Build Coastguard Worker punpcklqdq m8, m9, m5 1212*c0909341SAndroid Build Coastguard Worker punpckhqdq m5, m6, m5 1213*c0909341SAndroid Build Coastguard Worker punpcklqdq m6, m7 1214*c0909341SAndroid Build Coastguard Worker psubd m7, m8, m4 ; t11a t10 1215*c0909341SAndroid Build Coastguard Worker paddd m8, m4 ; t8a t9 1216*c0909341SAndroid Build Coastguard Worker psubd m4, m6, m5 ; t12a t13 1217*c0909341SAndroid Build Coastguard Worker paddd m6, m5 ; t15a t14 1218*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m14}, m4, m7 1219*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m15}, m4, m7 1220*c0909341SAndroid Build Coastguard Worker pmulld m4, m12 1221*c0909341SAndroid Build Coastguard Worker pmulld m7, m12 1222*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m14}, m2, m3, m6, m8 1223*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m15}, m2, m3, m6, m8 1224*c0909341SAndroid Build Coastguard Worker paddd m4, m13 1225*c0909341SAndroid Build Coastguard Worker paddd m5, m4, m7 1226*c0909341SAndroid Build Coastguard Worker psubd m4, m7 1227*c0909341SAndroid Build Coastguard Worker psrad m4, 12 ; t11 t10a 1228*c0909341SAndroid Build Coastguard Worker psrad m5, 12 ; t12 t13a 1229*c0909341SAndroid Build Coastguard Worker ret 1230*c0909341SAndroid Build Coastguard WorkerALIGN function_align 1231*c0909341SAndroid Build Coastguard Worker.transpose_16x8: 1232*c0909341SAndroid Build Coastguard Worker packssdw m0, m4 1233*c0909341SAndroid Build Coastguard Worker packssdw m1, m5 1234*c0909341SAndroid Build Coastguard Worker packssdw m2, m6 1235*c0909341SAndroid Build Coastguard Worker packssdw m3, m7 1236*c0909341SAndroid Build Coastguard Worker vpermi2d m8, m0, m2 1237*c0909341SAndroid Build Coastguard Worker vpermt2d m0, m9, m2 1238*c0909341SAndroid Build Coastguard Worker vpermi2d m10, m1, m3 1239*c0909341SAndroid Build Coastguard Worker vpermi2d m11, m1, m3 1240*c0909341SAndroid Build Coastguard Worker punpckhwd m3, m8, m0 1241*c0909341SAndroid Build Coastguard Worker punpcklwd m1, m8, m0 1242*c0909341SAndroid Build Coastguard Worker punpckhwd m4, m10, m11 1243*c0909341SAndroid Build Coastguard Worker punpcklwd m2, m10, m11 1244*c0909341SAndroid Build Coastguard Worker punpckldq m0, m1, m2 1245*c0909341SAndroid Build Coastguard Worker punpckhdq m1, m2 1246*c0909341SAndroid Build Coastguard Worker punpckldq m2, m3, m4 1247*c0909341SAndroid Build Coastguard Worker punpckhdq m3, m4 1248*c0909341SAndroid Build Coastguard Worker ret 1249*c0909341SAndroid Build Coastguard Worker 1250*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN adst, dct 1251*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN adst, identity, -21 1252*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN adst, flipadst 1253*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN adst, adst 1254*c0909341SAndroid Build Coastguard Worker 1255*c0909341SAndroid Build Coastguard Workercglobal iadst_16x8_internal_10bpc, 0, 7, 16, dst, stride, c, eob, tx2 1256*c0909341SAndroid Build Coastguard Worker%undef cmp 1257*c0909341SAndroid Build Coastguard Worker call .main_pass1 1258*c0909341SAndroid Build Coastguard Worker vpbroadcastd m9, [o(pd_1)] 1259*c0909341SAndroid Build Coastguard Worker paddd m0, m9 1260*c0909341SAndroid Build Coastguard Worker psubd m1, m9, m1 1261*c0909341SAndroid Build Coastguard Worker paddd m2, m9 1262*c0909341SAndroid Build Coastguard Worker psubd m3, m9, m3 1263*c0909341SAndroid Build Coastguard Worker paddd m4, m9, m5 1264*c0909341SAndroid Build Coastguard Worker psubd m5, m9, m6 1265*c0909341SAndroid Build Coastguard Worker paddd m6, m9, m7 1266*c0909341SAndroid Build Coastguard Worker psubd m7, m9, m8 1267*c0909341SAndroid Build Coastguard Worker.pass1_end: 1268*c0909341SAndroid Build Coastguard Worker mova m9, [o(permA)] 1269*c0909341SAndroid Build Coastguard Worker psrlq m8, m9, 8 1270*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 1}, m0, m4, m1, m5, m2, m6, m3, m7 1271*c0909341SAndroid Build Coastguard Worker jmp m(idct_16x8_internal_10bpc).pass1_end2 1272*c0909341SAndroid Build Coastguard Worker.pass2: 1273*c0909341SAndroid Build Coastguard Worker call .main_pass2 1274*c0909341SAndroid Build Coastguard Worker vpermq m8, m11, m0 1275*c0909341SAndroid Build Coastguard Worker vpermq m9, m11, m1 1276*c0909341SAndroid Build Coastguard Worker call m(idct_16x8_internal_10bpc).write_16x4_noround 1277*c0909341SAndroid Build Coastguard Worker vpermq m8, m11, m2 1278*c0909341SAndroid Build Coastguard Worker vpermq m9, m11, m3 1279*c0909341SAndroid Build Coastguard Worker jmp m(idct_16x8_internal_10bpc).write_16x4_noround 1280*c0909341SAndroid Build Coastguard WorkerALIGN function_align 1281*c0909341SAndroid Build Coastguard Worker.main_pass1: 1282*c0909341SAndroid Build Coastguard Worker vpbroadcastd m12, [o(pd_2896)] 1283*c0909341SAndroid Build Coastguard Worker pmulld m2, m12, [cq+64*0] 1284*c0909341SAndroid Build Coastguard Worker pmulld m7, m12, [cq+64*1] 1285*c0909341SAndroid Build Coastguard Worker pmulld m1, m12, [cq+64*2] 1286*c0909341SAndroid Build Coastguard Worker pmulld m5, m12, [cq+64*3] 1287*c0909341SAndroid Build Coastguard Worker vpbroadcastd m13, [o(pd_2048)] 1288*c0909341SAndroid Build Coastguard Worker pxor m4, m4 1289*c0909341SAndroid Build Coastguard Worker mova m10, [o(permB)] 1290*c0909341SAndroid Build Coastguard Worker REPX {mova [cq+64*x], m4}, 0, 1, 2, 3 1291*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m13}, m2, m7, m1, m5 1292*c0909341SAndroid Build Coastguard Worker psrlq m6, m10, 32 1293*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 12 }, m2, m7, m1, m5 1294*c0909341SAndroid Build Coastguard Worker mova m0, m6 1295*c0909341SAndroid Build Coastguard Worker vpermi2q m0, m2, m7 ; 0 2 1296*c0909341SAndroid Build Coastguard Worker vpermt2q m7, m10, m2 ; 3 1 1297*c0909341SAndroid Build Coastguard Worker mova m2, m6 1298*c0909341SAndroid Build Coastguard Worker vpermi2q m2, m1, m5 ; 4 6 1299*c0909341SAndroid Build Coastguard Worker vpermt2q m5, m10, m1 ; 7 5 1300*c0909341SAndroid Build Coastguard Worker cmp eobd, 43 1301*c0909341SAndroid Build Coastguard Worker jl .main_fast 1302*c0909341SAndroid Build Coastguard Worker pmulld m8, m12, [cq+64*4] 1303*c0909341SAndroid Build Coastguard Worker pmulld m3, m12, [cq+64*5] 1304*c0909341SAndroid Build Coastguard Worker pmulld m9, m12, [cq+64*6] 1305*c0909341SAndroid Build Coastguard Worker pmulld m1, m12, [cq+64*7] 1306*c0909341SAndroid Build Coastguard Worker REPX {mova [cq+64*x], m4}, 4, 5, 6, 7 1307*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m13}, m8, m3, m9, m1 1308*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 12 }, m8, m3, m9, m1 1309*c0909341SAndroid Build Coastguard Worker mova m4, m6 1310*c0909341SAndroid Build Coastguard Worker vpermi2q m4, m8, m3 ; 8 10 1311*c0909341SAndroid Build Coastguard Worker vpermt2q m3, m10, m8 ; 11 9 1312*c0909341SAndroid Build Coastguard Worker vpermi2q m6, m9, m1 ; 12 14 1313*c0909341SAndroid Build Coastguard Worker vpermt2q m1, m10, m9 ; 15 13 1314*c0909341SAndroid Build Coastguard Worker.main: 1315*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 1, 0, 8, 9, 10, _, 201_995, 4091_3973, 1 1316*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 3, 2, 8, 9, 10, _, 1751_2440, 3703_3290, 1 1317*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 5, 4, 8, 9, 10, _, 3035_3513, 2751_2106 1318*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 7, 6, 8, 9, 10, _, 3857_4052, 1380_601 1319*c0909341SAndroid Build Coastguard Worker jmp .main2 1320*c0909341SAndroid Build Coastguard Worker.main_fast: 1321*c0909341SAndroid Build Coastguard Worker vbroadcasti32x4 m1, [o(pd_4091_3973)] 1322*c0909341SAndroid Build Coastguard Worker vbroadcasti32x4 m8, [o(pd_201_995)] 1323*c0909341SAndroid Build Coastguard Worker vbroadcasti32x4 m3, [o(pd_3703_3290)] 1324*c0909341SAndroid Build Coastguard Worker vbroadcasti32x4 m9, [o(pd_1751_2440)] 1325*c0909341SAndroid Build Coastguard Worker vbroadcasti32x4 m4, [o(pd_2751_2106)] 1326*c0909341SAndroid Build Coastguard Worker vbroadcasti32x4 m10, [o(pd_3035_3513)] 1327*c0909341SAndroid Build Coastguard Worker vbroadcasti32x4 m6, [o(pd_1380_601)] 1328*c0909341SAndroid Build Coastguard Worker vbroadcasti32x4 m11, [o(pd_3857_4052)] 1329*c0909341SAndroid Build Coastguard Worker pmulld m1, m0 1330*c0909341SAndroid Build Coastguard Worker pmulld m0, m8 1331*c0909341SAndroid Build Coastguard Worker pmulld m3, m2 1332*c0909341SAndroid Build Coastguard Worker pmulld m2, m9 1333*c0909341SAndroid Build Coastguard Worker pmulld m4, m5 1334*c0909341SAndroid Build Coastguard Worker pmulld m5, m10 1335*c0909341SAndroid Build Coastguard Worker pmulld m6, m7 1336*c0909341SAndroid Build Coastguard Worker pmulld m7, m11 1337*c0909341SAndroid Build Coastguard Worker.main2: 1338*c0909341SAndroid Build Coastguard Worker vpbroadcastd m14, [o(clip_18b_min)] 1339*c0909341SAndroid Build Coastguard Worker vpbroadcastd m15, [o(clip_18b_max)] 1340*c0909341SAndroid Build Coastguard Worker REPX {psubd x, m13, x}, m1, m3 1341*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m13 }, m0, m2, m4, m5, m6, m7 1342*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 12 }, m0, m4, m1, m5, m2, m6, m3, m7 1343*c0909341SAndroid Build Coastguard Worker psubd m8, m0, m4 ; t8a t10a 1344*c0909341SAndroid Build Coastguard Worker paddd m0, m4 ; t0a t2a 1345*c0909341SAndroid Build Coastguard Worker psubd m4, m1, m5 ; t9a t11a 1346*c0909341SAndroid Build Coastguard Worker paddd m1, m5 ; t1a t3a 1347*c0909341SAndroid Build Coastguard Worker psubd m5, m2, m6 ; t12a t14a 1348*c0909341SAndroid Build Coastguard Worker paddd m2, m6 ; t4a t6a 1349*c0909341SAndroid Build Coastguard Worker psubd m6, m3, m7 ; t13a t15a 1350*c0909341SAndroid Build Coastguard Worker paddd m3, m7 ; t5a t7a 1351*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m14}, m8, m4, m5, m6 1352*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m15}, m8, m4, m5, m6 1353*c0909341SAndroid Build Coastguard Worker vbroadcasti32x4 m11, [o(pd_4017_2276)] 1354*c0909341SAndroid Build Coastguard Worker vbroadcasti32x4 m10, [o(pd_799_3406)] 1355*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 8, 4, 7, 9, _, 13, 10, 11 1356*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 6, 5, 7, 9, _, 13, 11, 10 1357*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m14}, m0, m2, m1, m3 1358*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m15}, m0, m2, m1, m3 1359*c0909341SAndroid Build Coastguard Worker psubd m7, m0, m2 ; t4 t6 1360*c0909341SAndroid Build Coastguard Worker paddd m0, m2 ; t0 t2 1361*c0909341SAndroid Build Coastguard Worker psubd m2, m1, m3 ; t5 t7 1362*c0909341SAndroid Build Coastguard Worker paddd m1, m3 ; t1 t3 1363*c0909341SAndroid Build Coastguard Worker psubd m3, m4, m6 ; t12a t14a 1364*c0909341SAndroid Build Coastguard Worker paddd m4, m6 ; t8a t10a 1365*c0909341SAndroid Build Coastguard Worker psubd m6, m8, m5 ; t13a t15a 1366*c0909341SAndroid Build Coastguard Worker paddd m8, m5 ; t9a t11a 1367*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m14}, m7, m3, m2, m6 1368*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m15}, m7, m3, m2, m6 1369*c0909341SAndroid Build Coastguard Worker punpcklqdq m5, m3, m7 ; t12a t4 1370*c0909341SAndroid Build Coastguard Worker punpckhqdq m3, m7 ; t14a t6 1371*c0909341SAndroid Build Coastguard Worker punpckhqdq m7, m6, m2 ; t15a t7 1372*c0909341SAndroid Build Coastguard Worker punpcklqdq m6, m2 ; t13a t5 1373*c0909341SAndroid Build Coastguard Worker vpbroadcastd m11, [o(pd_1567)] 1374*c0909341SAndroid Build Coastguard Worker vpbroadcastd m10, [o(pd_3784)] 1375*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 7, 3, 2, 9, 10, 13, 10, 11 1376*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 5, 6, 2, 9, 10, 13, 11, 10 1377*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m14}, m0, m4, m1, m8 1378*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m15}, m0, m4, m1, m8 1379*c0909341SAndroid Build Coastguard Worker punpckhqdq m2, m4, m0 ; t10a t2 1380*c0909341SAndroid Build Coastguard Worker punpcklqdq m4, m0 ; t8a t0 1381*c0909341SAndroid Build Coastguard Worker punpckhqdq m0, m8, m1 ; t11a t3 1382*c0909341SAndroid Build Coastguard Worker punpcklqdq m8, m1 ; t9a t1 1383*c0909341SAndroid Build Coastguard Worker paddd m1, m6, m7 ; out2 -out3 1384*c0909341SAndroid Build Coastguard Worker psubd m6, m7 ; t14a t6 1385*c0909341SAndroid Build Coastguard Worker paddd m7, m5, m3 ; -out13 out12 1386*c0909341SAndroid Build Coastguard Worker psubd m5, m3 ; t15a t7 1387*c0909341SAndroid Build Coastguard Worker psubd m3, m8, m0 ; t11 t3a 1388*c0909341SAndroid Build Coastguard Worker paddd m8, m0 ; out14 -out15 1389*c0909341SAndroid Build Coastguard Worker paddd m0, m4, m2 ; -out1 out0 1390*c0909341SAndroid Build Coastguard Worker psubd m4, m2 ; t10 t2a 1391*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m14}, m6, m5, m3, m4 1392*c0909341SAndroid Build Coastguard Worker mov r6d, 0x3333 1393*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m15}, m6, m5, m3, m4 1394*c0909341SAndroid Build Coastguard Worker kmovw k1, r6d 1395*c0909341SAndroid Build Coastguard Worker REPX {pmulld x, m12}, m6, m5, m3, m4 1396*c0909341SAndroid Build Coastguard Worker pxor m9, m9 1397*c0909341SAndroid Build Coastguard Worker REPX {vpsubd x{k1}, m9, x}, m0, m1, m7, m8 1398*c0909341SAndroid Build Coastguard Worker paddd m6, m13 1399*c0909341SAndroid Build Coastguard Worker paddd m4, m13 1400*c0909341SAndroid Build Coastguard Worker paddd m2, m6, m5 ; -out5 out4 1401*c0909341SAndroid Build Coastguard Worker psubd m6, m5 ; out10 -out11 1402*c0909341SAndroid Build Coastguard Worker psubd m5, m4, m3 ; -out9 out8 1403*c0909341SAndroid Build Coastguard Worker paddd m3, m4 ; out6 -out7 1404*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 12}, m2, m3, m5, m6 1405*c0909341SAndroid Build Coastguard Worker REPX {vpsubd x{k1}, m9, x}, m2, m3, m5, m6 1406*c0909341SAndroid Build Coastguard Worker ret 1407*c0909341SAndroid Build Coastguard WorkerALIGN function_align 1408*c0909341SAndroid Build Coastguard Worker.main_pass2: 1409*c0909341SAndroid Build Coastguard Worker lea r5, [o_base_8bpc] 1410*c0909341SAndroid Build Coastguard Worker pshufd m4, m0, q1032 1411*c0909341SAndroid Build Coastguard Worker pshufd m5, m1, q1032 1412*c0909341SAndroid Build Coastguard Worker call m(iadst_16x8_internal_8bpc).main_pass2 1413*c0909341SAndroid Build Coastguard Worker movshdup m11, [permC] 1414*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m6 1415*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m6 1416*c0909341SAndroid Build Coastguard Worker vpbroadcastd m13, [pixel_10bpc_max] 1417*c0909341SAndroid Build Coastguard Worker pxor m12, m12 1418*c0909341SAndroid Build Coastguard Worker lea r6, [strideq*3] 1419*c0909341SAndroid Build Coastguard Worker ret 1420*c0909341SAndroid Build Coastguard Worker 1421*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN flipadst, dct 1422*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN flipadst, identity, -21 1423*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN flipadst, adst 1424*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN flipadst, flipadst 1425*c0909341SAndroid Build Coastguard Worker 1426*c0909341SAndroid Build Coastguard Workercglobal iflipadst_16x8_internal_10bpc, 0, 7, 16, dst, stride, c, eob, tx2 1427*c0909341SAndroid Build Coastguard Worker call m(iadst_16x8_internal_10bpc).main_pass1 1428*c0909341SAndroid Build Coastguard Worker vpbroadcastd m9, [o(pd_1)] 1429*c0909341SAndroid Build Coastguard Worker psubd m4, m9, m3 1430*c0909341SAndroid Build Coastguard Worker paddd m3, m9, m5 1431*c0909341SAndroid Build Coastguard Worker paddd m5, m9, m2 1432*c0909341SAndroid Build Coastguard Worker psubd m2, m9, m6 1433*c0909341SAndroid Build Coastguard Worker psubd m6, m9, m1 1434*c0909341SAndroid Build Coastguard Worker paddd m1, m9, m7 1435*c0909341SAndroid Build Coastguard Worker paddd m7, m9, m0 1436*c0909341SAndroid Build Coastguard Worker psubd m0, m9, m8 1437*c0909341SAndroid Build Coastguard Worker jmp m(iadst_16x8_internal_10bpc).pass1_end 1438*c0909341SAndroid Build Coastguard Worker.pass2: 1439*c0909341SAndroid Build Coastguard Worker call m(iadst_16x8_internal_10bpc).main_pass2 1440*c0909341SAndroid Build Coastguard Worker psrlq m11, 8 1441*c0909341SAndroid Build Coastguard Worker vpermq m8, m11, m3 1442*c0909341SAndroid Build Coastguard Worker vpermq m9, m11, m2 1443*c0909341SAndroid Build Coastguard Worker call m(idct_16x8_internal_10bpc).write_16x4_noround 1444*c0909341SAndroid Build Coastguard Worker vpermq m8, m11, m1 1445*c0909341SAndroid Build Coastguard Worker vpermq m9, m11, m0 1446*c0909341SAndroid Build Coastguard Worker jmp m(idct_16x8_internal_10bpc).write_16x4_noround 1447*c0909341SAndroid Build Coastguard Worker 1448*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN identity, dct 1449*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN identity, adst 1450*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN identity, flipadst 1451*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN identity, identity 1452*c0909341SAndroid Build Coastguard Worker 1453*c0909341SAndroid Build Coastguard Workercglobal iidentity_16x8_internal_10bpc, 0, 7, 16, dst, stride, c, eob, tx2 1454*c0909341SAndroid Build Coastguard Worker call m(idct_8x16_internal_10bpc).load2 1455*c0909341SAndroid Build Coastguard Worker vpbroadcastd m8, [o(pd_5793)] 1456*c0909341SAndroid Build Coastguard Worker vpbroadcastd m13, [o(pd_3072)] 1457*c0909341SAndroid Build Coastguard Worker pxor m10, m10 1458*c0909341SAndroid Build Coastguard Worker REPX {pmulld x, m8}, m0, m1, m2, m3, m4, m5, m6, m7 1459*c0909341SAndroid Build Coastguard Worker REPX {mova [cq+64*x], m10}, 0, 1, 2, 3, 4, 5, 6, 7 1460*c0909341SAndroid Build Coastguard Worker call m(idct_8x16_internal_10bpc).round 1461*c0909341SAndroid Build Coastguard Worker psrlq m8, [o(permA)], 16 1462*c0909341SAndroid Build Coastguard Worker psrlq m9, m8, 8 1463*c0909341SAndroid Build Coastguard Worker mova m10, m8 1464*c0909341SAndroid Build Coastguard Worker mova m11, m9 1465*c0909341SAndroid Build Coastguard Worker call m(idct_16x8_internal_10bpc).transpose_16x8 1466*c0909341SAndroid Build Coastguard Worker jmp tx2q 1467*c0909341SAndroid Build Coastguard Worker.pass2: 1468*c0909341SAndroid Build Coastguard Worker movshdup m4, [o(permC)] 1469*c0909341SAndroid Build Coastguard Worker vpbroadcastd m11, [o(pw_4096)] 1470*c0909341SAndroid Build Coastguard Worker mova m5, m4 1471*c0909341SAndroid Build Coastguard Worker jmp m(idct_16x8_internal_10bpc).end 1472*c0909341SAndroid Build Coastguard Worker 1473*c0909341SAndroid Build Coastguard Worker%macro INV_TXFM_16X16_FN 2-3 0 ; type1, type2, eob_offset 1474*c0909341SAndroid Build Coastguard Worker INV_TXFM_FN %1, %2, %3, 16x16 1475*c0909341SAndroid Build Coastguard Worker%ifidn %1_%2, dct_dct 1476*c0909341SAndroid Build Coastguard Worker imul r6d, [cq], 181 1477*c0909341SAndroid Build Coastguard Worker mov [cq], eobd ; 0 1478*c0909341SAndroid Build Coastguard Worker or r3d, 16 1479*c0909341SAndroid Build Coastguard Worker add r6d, 640 1480*c0909341SAndroid Build Coastguard Worker sar r6d, 10 1481*c0909341SAndroid Build Coastguard Worker jmp m(inv_txfm_add_dct_dct_16x8_10bpc).dconly2 1482*c0909341SAndroid Build Coastguard Worker%endif 1483*c0909341SAndroid Build Coastguard Worker%endmacro 1484*c0909341SAndroid Build Coastguard Worker 1485*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X16_FN dct, dct 1486*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X16_FN dct, identity, 28 1487*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X16_FN dct, flipadst 1488*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X16_FN dct, adst 1489*c0909341SAndroid Build Coastguard Worker 1490*c0909341SAndroid Build Coastguard Workercglobal idct_16x16_internal_10bpc, 0, 7, 16, dst, stride, c, eob, tx2 1491*c0909341SAndroid Build Coastguard Worker%undef cmp 1492*c0909341SAndroid Build Coastguard Worker vpbroadcastd m13, [o(pd_2048)] 1493*c0909341SAndroid Build Coastguard Worker vpbroadcastd m12, [o(pd_2896)] 1494*c0909341SAndroid Build Coastguard Worker vpbroadcastd m14, [o(clip_18b_min)] 1495*c0909341SAndroid Build Coastguard Worker vpbroadcastd m15, [o(clip_18b_max)] 1496*c0909341SAndroid Build Coastguard Worker cmp eobd, 36 1497*c0909341SAndroid Build Coastguard Worker jl .fast 1498*c0909341SAndroid Build Coastguard Worker mova m0, [cq+64* 0] 1499*c0909341SAndroid Build Coastguard Worker mova m1, [cq+64* 2] 1500*c0909341SAndroid Build Coastguard Worker mova m2, [cq+64* 4] 1501*c0909341SAndroid Build Coastguard Worker mova m3, [cq+64* 6] 1502*c0909341SAndroid Build Coastguard Worker mova m4, [cq+64* 8] 1503*c0909341SAndroid Build Coastguard Worker mova m5, [cq+64*10] 1504*c0909341SAndroid Build Coastguard Worker mova m6, [cq+64*12] 1505*c0909341SAndroid Build Coastguard Worker mova m7, [cq+64*14] 1506*c0909341SAndroid Build Coastguard Worker%if WIN64 1507*c0909341SAndroid Build Coastguard Worker movaps [cq+16*0], xmm6 1508*c0909341SAndroid Build Coastguard Worker movaps [cq+16*1], xmm7 1509*c0909341SAndroid Build Coastguard Worker%endif 1510*c0909341SAndroid Build Coastguard Worker call m(idct_8x16_internal_10bpc).main 1511*c0909341SAndroid Build Coastguard Worker mova m16, [cq+64* 1] 1512*c0909341SAndroid Build Coastguard Worker mova m17, [cq+64* 3] 1513*c0909341SAndroid Build Coastguard Worker mova m18, [cq+64* 5] 1514*c0909341SAndroid Build Coastguard Worker mova m19, [cq+64* 7] 1515*c0909341SAndroid Build Coastguard Worker mova m20, [cq+64* 9] 1516*c0909341SAndroid Build Coastguard Worker mova m21, [cq+64*11] 1517*c0909341SAndroid Build Coastguard Worker mova m22, [cq+64*13] 1518*c0909341SAndroid Build Coastguard Worker mova m23, [cq+64*15] 1519*c0909341SAndroid Build Coastguard Worker call .main 1520*c0909341SAndroid Build Coastguard Worker call .main_end 1521*c0909341SAndroid Build Coastguard Worker.pass1_end: 1522*c0909341SAndroid Build Coastguard Worker%if WIN64 1523*c0909341SAndroid Build Coastguard Worker movaps xmm6, [cq+16*0] 1524*c0909341SAndroid Build Coastguard Worker movaps xmm7, [cq+16*1] 1525*c0909341SAndroid Build Coastguard Worker%endif 1526*c0909341SAndroid Build Coastguard Worker vzeroupper 1527*c0909341SAndroid Build Coastguard Worker.pass1_end2: 1528*c0909341SAndroid Build Coastguard Worker call .main_end3 1529*c0909341SAndroid Build Coastguard Worker.pass1_end3: 1530*c0909341SAndroid Build Coastguard Worker mov r6d, 64*12 1531*c0909341SAndroid Build Coastguard Worker pxor m8, m8 1532*c0909341SAndroid Build Coastguard Worker.zero_loop: 1533*c0909341SAndroid Build Coastguard Worker mova [cq+r6+64*3], m8 1534*c0909341SAndroid Build Coastguard Worker mova [cq+r6+64*2], m8 1535*c0909341SAndroid Build Coastguard Worker mova [cq+r6+64*1], m8 1536*c0909341SAndroid Build Coastguard Worker mova [cq+r6+64*0], m8 1537*c0909341SAndroid Build Coastguard Worker sub r6d, 64*4 1538*c0909341SAndroid Build Coastguard Worker jge .zero_loop 1539*c0909341SAndroid Build Coastguard Worker jmp tx2q 1540*c0909341SAndroid Build Coastguard Worker.pass2: 1541*c0909341SAndroid Build Coastguard Worker lea r5, [o_base_8bpc] 1542*c0909341SAndroid Build Coastguard Worker call m(idct_16x16_internal_8bpc).main 1543*c0909341SAndroid Build Coastguard Worker movshdup m12, [permC] 1544*c0909341SAndroid Build Coastguard Worker vpbroadcastd m11, [pw_2048] 1545*c0909341SAndroid Build Coastguard Worker psrlq m13, m12, 8 1546*c0909341SAndroid Build Coastguard Worker vpermq m8, m12, m0 1547*c0909341SAndroid Build Coastguard Worker vpermq m0, m13, m7 1548*c0909341SAndroid Build Coastguard Worker vpermq m7, m13, m1 1549*c0909341SAndroid Build Coastguard Worker vpermq m1, m12, m6 1550*c0909341SAndroid Build Coastguard Worker vpermq m6, m12, m2 1551*c0909341SAndroid Build Coastguard Worker vpermq m2, m13, m5 1552*c0909341SAndroid Build Coastguard Worker vpermq m5, m13, m3 1553*c0909341SAndroid Build Coastguard Worker vpermq m3, m12, m4 1554*c0909341SAndroid Build Coastguard Worker.pass2_end: 1555*c0909341SAndroid Build Coastguard Worker lea r6, [strideq*3] 1556*c0909341SAndroid Build Coastguard Worker vpbroadcastd m13, [pixel_10bpc_max] 1557*c0909341SAndroid Build Coastguard Worker pxor m12, m12 1558*c0909341SAndroid Build Coastguard Worker pmulhrsw m8, m11, m8 1559*c0909341SAndroid Build Coastguard Worker pmulhrsw m9, m11, m7 1560*c0909341SAndroid Build Coastguard Worker call m(idct_16x8_internal_10bpc).write_16x4_noround 1561*c0909341SAndroid Build Coastguard Worker pmulhrsw m8, m11, m6 1562*c0909341SAndroid Build Coastguard Worker pmulhrsw m9, m11, m5 1563*c0909341SAndroid Build Coastguard Worker call m(idct_16x8_internal_10bpc).write_16x4_noround 1564*c0909341SAndroid Build Coastguard Worker pmulhrsw m8, m11, m3 1565*c0909341SAndroid Build Coastguard Worker pmulhrsw m9, m11, m2 1566*c0909341SAndroid Build Coastguard Worker call m(idct_16x8_internal_10bpc).write_16x4_noround 1567*c0909341SAndroid Build Coastguard Worker pmulhrsw m8, m11, m1 1568*c0909341SAndroid Build Coastguard Worker pmulhrsw m9, m11, m0 1569*c0909341SAndroid Build Coastguard Worker jmp m(idct_16x8_internal_10bpc).write_16x4_noround 1570*c0909341SAndroid Build Coastguard Worker.fast: 1571*c0909341SAndroid Build Coastguard Worker mova ym0, [cq+64*0] 1572*c0909341SAndroid Build Coastguard Worker mova ym2, [cq+64*4] 1573*c0909341SAndroid Build Coastguard Worker movshdup m8, [o(permB)] 1574*c0909341SAndroid Build Coastguard Worker mova ym1, [cq+64*2] 1575*c0909341SAndroid Build Coastguard Worker mova ym3, [cq+64*6] 1576*c0909341SAndroid Build Coastguard Worker mova ym4, [cq+64*1] 1577*c0909341SAndroid Build Coastguard Worker mova ym5, [cq+64*3] 1578*c0909341SAndroid Build Coastguard Worker mova ym6, [cq+64*5] 1579*c0909341SAndroid Build Coastguard Worker mova ym7, [cq+64*7] 1580*c0909341SAndroid Build Coastguard Worker vpermt2q m0, m8, m2 ; 0 4 1581*c0909341SAndroid Build Coastguard Worker vpermt2q m1, m8, m3 ; 2 6 1582*c0909341SAndroid Build Coastguard Worker vpermt2q m4, m8, m5 ; 1 3 1583*c0909341SAndroid Build Coastguard Worker vpermt2q m7, m8, m6 ; 7 5 1584*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_10bpc).main_fast 1585*c0909341SAndroid Build Coastguard Worker call m(idct_16x8_internal_10bpc).main_fast 1586*c0909341SAndroid Build Coastguard Worker vpbroadcastd m11, [o(pd_2)] 1587*c0909341SAndroid Build Coastguard Worker call m(idct_8x16_internal_10bpc).main_end2 1588*c0909341SAndroid Build Coastguard Worker mova m8, [o(permA)] 1589*c0909341SAndroid Build Coastguard Worker psrlq m9, m8, 8 1590*c0909341SAndroid Build Coastguard Worker jmp m(iadst_16x16_internal_10bpc).pass1_fast_end2 1591*c0909341SAndroid Build Coastguard WorkerALIGN function_align 1592*c0909341SAndroid Build Coastguard Worker.main_fast2_rect2: 1593*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m13}, m16, m17 1594*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 12 }, m16, m17 1595*c0909341SAndroid Build Coastguard Worker.main_fast2: 1596*c0909341SAndroid Build Coastguard Worker pmulld m22, m16, [o(pd_4076)] {1to16} ; t15a 1597*c0909341SAndroid Build Coastguard Worker pmulld m9, m16, [o(pd_401)] {1to16} ; t8a 1598*c0909341SAndroid Build Coastguard Worker pmulld m18, m17, [o(pd_1189)] {1to16} ; t11a 1599*c0909341SAndroid Build Coastguard Worker pmulld m17, [o(pd_3920)] {1to16} ; t12a 1600*c0909341SAndroid Build Coastguard Worker psubd m18, m13, m18 1601*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m13}, m22, m9, m17 1602*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 12 }, m18, m22, m9, m17 1603*c0909341SAndroid Build Coastguard Worker 1604*c0909341SAndroid Build Coastguard Worker mova m20, m9 1605*c0909341SAndroid Build Coastguard Worker mova m16, m18 1606*c0909341SAndroid Build Coastguard Worker mova m23, m22 1607*c0909341SAndroid Build Coastguard Worker mova m19, m17 1608*c0909341SAndroid Build Coastguard Worker jmp .main3 1609*c0909341SAndroid Build Coastguard Worker.main_fast_rect2: 1610*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m13}, m16, m17, m18, m19 1611*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 12 }, m16, m17, m18, m19 1612*c0909341SAndroid Build Coastguard Worker.main_fast: 1613*c0909341SAndroid Build Coastguard Worker pmulld m23, m16, [o(pd_4076)] {1to16} ; t15a 1614*c0909341SAndroid Build Coastguard Worker pmulld m16, [o(pd_401)] {1to16} ; t8a 1615*c0909341SAndroid Build Coastguard Worker pmulld m20, m19, [o(pd_2598)] {1to16} ; t9a 1616*c0909341SAndroid Build Coastguard Worker pmulld m19, [o(pd_3166)] {1to16} ; t14a 1617*c0909341SAndroid Build Coastguard Worker pmulld m22, m17, [o(pd_1189)] {1to16} ; t11a 1618*c0909341SAndroid Build Coastguard Worker pmulld m17, [o(pd_3920)] {1to16} ; t12a 1619*c0909341SAndroid Build Coastguard Worker pmulld m21, m18, [o(pd_3612)] {1to16} ; t13a 1620*c0909341SAndroid Build Coastguard Worker pmulld m18, [o(pd_1931)] {1to16} ; t10a 1621*c0909341SAndroid Build Coastguard Worker psubd m20, m13, m20 1622*c0909341SAndroid Build Coastguard Worker psubd m22, m13, m22 1623*c0909341SAndroid Build Coastguard Worker call .round2 1624*c0909341SAndroid Build Coastguard Worker jmp .main2 1625*c0909341SAndroid Build Coastguard Worker.main_rect2: 1626*c0909341SAndroid Build Coastguard Worker call .round 1627*c0909341SAndroid Build Coastguard Worker.main: 1628*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 16, 23, 7, 9, 10, _, 401, 4076 ; t8a, t15a 1629*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 20, 19, 7, 9, 10, _, 3166, 2598 ; t9a, t14a 1630*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 22, 17, 7, 9, 10, _, 3920, 1189 ; t11a, t12a 1631*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 18, 21, 7, 9, 10, _, 1931, 3612 ; t10a, t13a 1632*c0909341SAndroid Build Coastguard Worker call .round 1633*c0909341SAndroid Build Coastguard Worker.main2: 1634*c0909341SAndroid Build Coastguard Worker paddd m9, m20, m16 ; t8 1635*c0909341SAndroid Build Coastguard Worker psubd m20, m16, m20 ; t9 1636*c0909341SAndroid Build Coastguard Worker psubd m16, m22, m18 ; t10 1637*c0909341SAndroid Build Coastguard Worker paddd m18, m22 ; t11 1638*c0909341SAndroid Build Coastguard Worker paddd m22, m23, m19 ; t15 1639*c0909341SAndroid Build Coastguard Worker psubd m23, m19 ; t14 1640*c0909341SAndroid Build Coastguard Worker psubd m19, m17, m21 ; t13 1641*c0909341SAndroid Build Coastguard Worker paddd m17, m21 ; t12 1642*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m14}, m20, m23, m16, m19 1643*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m15}, m20, m23, m16, m19 1644*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m14}, m9, m18, m22, m17 1645*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m15}, m9, m18, m22, m17 1646*c0909341SAndroid Build Coastguard Worker.main3: 1647*c0909341SAndroid Build Coastguard Worker vpbroadcastd m11, [o(pd_3784)] 1648*c0909341SAndroid Build Coastguard Worker vpbroadcastd m10, [o(pd_1567)] 1649*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 23, 20, 21, 7, _, 13, 10, 11 1650*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 19, 16, 21, 7, _, 13, 10, 11, 2 1651*c0909341SAndroid Build Coastguard Worker paddd m21, m20, m19 ; t14 1652*c0909341SAndroid Build Coastguard Worker psubd m20, m19 ; t13 1653*c0909341SAndroid Build Coastguard Worker psubd m19, m9, m18 ; t11a 1654*c0909341SAndroid Build Coastguard Worker paddd m9, m18 ; t8a 1655*c0909341SAndroid Build Coastguard Worker psubd m18, m23, m16 ; t10 1656*c0909341SAndroid Build Coastguard Worker paddd m16, m23 ; t9 1657*c0909341SAndroid Build Coastguard Worker psubd m23, m22, m17 ; t12a 1658*c0909341SAndroid Build Coastguard Worker paddd m22, m17 ; t15a 1659*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m14}, m20, m23, m18, m19 1660*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m15}, m20, m23, m18, m19 1661*c0909341SAndroid Build Coastguard Worker REPX {pmulld x, m12}, m20, m23, m18, m19 1662*c0909341SAndroid Build Coastguard Worker psubd m7, m0, m6 ; dct8 out7 1663*c0909341SAndroid Build Coastguard Worker paddd m0, m6 ; dct8 out0 1664*c0909341SAndroid Build Coastguard Worker psubd m6, m1, m5 ; dct8 out6 1665*c0909341SAndroid Build Coastguard Worker paddd m1, m5 ; dct8 out1 1666*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m14}, m7, m0, m6, m1 1667*c0909341SAndroid Build Coastguard Worker psubd m5, m2, m4 ; dct8 out5 1668*c0909341SAndroid Build Coastguard Worker paddd m2, m4 ; dct8 out2 1669*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m15}, m7, m0, m6, m1 1670*c0909341SAndroid Build Coastguard Worker psubd m4, m3, m8 ; dct8 out4 1671*c0909341SAndroid Build Coastguard Worker paddd m3, m8 ; dct8 out3 1672*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m14}, m5, m2, m4, m3 1673*c0909341SAndroid Build Coastguard Worker paddd m20, m13 1674*c0909341SAndroid Build Coastguard Worker paddd m23, m13 1675*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m15}, m5, m2, m4, m3 1676*c0909341SAndroid Build Coastguard Worker psubd m17, m20, m18 ; t10a 1677*c0909341SAndroid Build Coastguard Worker paddd m20, m18 ; t13a 1678*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m14}, m22, m21, m16, m9 1679*c0909341SAndroid Build Coastguard Worker psubd m18, m23, m19 ; t11 1680*c0909341SAndroid Build Coastguard Worker paddd m19, m23 ; t12 1681*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m15}, m22, m21, m16, m9 1682*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 12 }, m20, m19, m18, m17 1683*c0909341SAndroid Build Coastguard Worker ret 1684*c0909341SAndroid Build Coastguard Worker.main_end: 1685*c0909341SAndroid Build Coastguard Worker vpbroadcastd m11, [o(pd_2)] 1686*c0909341SAndroid Build Coastguard Worker.main_end2: 1687*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m11}, m0, m1, m2, m3, m4, m5, m6, m7 1688*c0909341SAndroid Build Coastguard Worker psubd m23, m0, m22 ; out15 1689*c0909341SAndroid Build Coastguard Worker paddd m0, m22 ; out0 1690*c0909341SAndroid Build Coastguard Worker psubd m22, m1, m21 ; out14 1691*c0909341SAndroid Build Coastguard Worker paddd m1, m21 ; out1 1692*c0909341SAndroid Build Coastguard Worker psubd m21, m2, m20 ; out13 1693*c0909341SAndroid Build Coastguard Worker paddd m2, m20 ; out2 1694*c0909341SAndroid Build Coastguard Worker psubd m20, m3, m19 ; out12 1695*c0909341SAndroid Build Coastguard Worker paddd m3, m19 ; out3 1696*c0909341SAndroid Build Coastguard Worker psubd m19, m4, m18 ; out11 1697*c0909341SAndroid Build Coastguard Worker paddd m4, m18 ; out4 1698*c0909341SAndroid Build Coastguard Worker psubd m18, m5, m17 ; out10 1699*c0909341SAndroid Build Coastguard Worker paddd m5, m17 ; out5 1700*c0909341SAndroid Build Coastguard Worker psubd m17, m6, m16 ; out9 1701*c0909341SAndroid Build Coastguard Worker paddd m6, m16 ; out6 1702*c0909341SAndroid Build Coastguard Worker psubd m16, m7, m9 ; out8 1703*c0909341SAndroid Build Coastguard Worker paddd m7, m9 ; out7 1704*c0909341SAndroid Build Coastguard Worker REPX {vpsravd x, m11}, m0, m16, m1, m17, m2, m18, m3, m19, \ 1705*c0909341SAndroid Build Coastguard Worker m4, m20, m5, m21, m6, m22, m7, m23 1706*c0909341SAndroid Build Coastguard Worker packssdw m0, m16 1707*c0909341SAndroid Build Coastguard Worker packssdw m1, m17 1708*c0909341SAndroid Build Coastguard Worker packssdw m2, m18 1709*c0909341SAndroid Build Coastguard Worker packssdw m3, m19 1710*c0909341SAndroid Build Coastguard Worker packssdw m4, m20 1711*c0909341SAndroid Build Coastguard Worker packssdw m5, m21 1712*c0909341SAndroid Build Coastguard Worker packssdw m6, m22 1713*c0909341SAndroid Build Coastguard Worker packssdw m7, m23 1714*c0909341SAndroid Build Coastguard Worker ret 1715*c0909341SAndroid Build Coastguard Worker.main_end3: 1716*c0909341SAndroid Build Coastguard Worker punpckhwd m8, m0, m1 1717*c0909341SAndroid Build Coastguard Worker punpcklwd m0, m1 1718*c0909341SAndroid Build Coastguard Worker punpckhwd m1, m2, m3 1719*c0909341SAndroid Build Coastguard Worker punpcklwd m2, m3 1720*c0909341SAndroid Build Coastguard Worker punpckhwd m3, m4, m5 1721*c0909341SAndroid Build Coastguard Worker punpcklwd m4, m5 1722*c0909341SAndroid Build Coastguard Worker punpcklwd m5, m6, m7 1723*c0909341SAndroid Build Coastguard Worker punpckhwd m6, m7 1724*c0909341SAndroid Build Coastguard Worker punpckhdq m7, m0, m2 1725*c0909341SAndroid Build Coastguard Worker punpckldq m0, m2 1726*c0909341SAndroid Build Coastguard Worker punpckhdq m2, m8, m1 1727*c0909341SAndroid Build Coastguard Worker punpckldq m8, m1 1728*c0909341SAndroid Build Coastguard Worker punpckhdq m1, m4, m5 1729*c0909341SAndroid Build Coastguard Worker punpckldq m4, m5 1730*c0909341SAndroid Build Coastguard Worker punpckhdq m5, m3, m6 1731*c0909341SAndroid Build Coastguard Worker punpckldq m3, m6 1732*c0909341SAndroid Build Coastguard Worker vshufi32x4 m6, m0, m4, q3232 1733*c0909341SAndroid Build Coastguard Worker vinserti32x8 m0, ym4, 1 1734*c0909341SAndroid Build Coastguard Worker vinserti32x8 m4, m8, ym3, 1 1735*c0909341SAndroid Build Coastguard Worker vshufi32x4 m8, m3, q3232 1736*c0909341SAndroid Build Coastguard Worker vinserti32x8 m3, m7, ym1, 1 1737*c0909341SAndroid Build Coastguard Worker vshufi32x4 m7, m1, q3232 1738*c0909341SAndroid Build Coastguard Worker vshufi32x4 m1, m2, m5, q3232 1739*c0909341SAndroid Build Coastguard Worker vinserti32x8 m2, ym5, 1 1740*c0909341SAndroid Build Coastguard Worker vshufi32x4 m5, m7, m1, q2020 ; 10 11 1741*c0909341SAndroid Build Coastguard Worker vshufi32x4 m7, m1, q3131 ; 14 15 1742*c0909341SAndroid Build Coastguard Worker vshufi32x4 m1, m3, m2, q2020 ; 2 3 1743*c0909341SAndroid Build Coastguard Worker vshufi32x4 m3, m2, q3131 ; 6 7 1744*c0909341SAndroid Build Coastguard Worker vshufi32x4 m2, m0, m4, q3131 ; 4 5 1745*c0909341SAndroid Build Coastguard Worker vshufi32x4 m0, m4, q2020 ; 0 1 1746*c0909341SAndroid Build Coastguard Worker vshufi32x4 m4, m6, m8, q2020 ; 8 9 1747*c0909341SAndroid Build Coastguard Worker vshufi32x4 m6, m8, q3131 ; 12 13 1748*c0909341SAndroid Build Coastguard Worker ret 1749*c0909341SAndroid Build Coastguard WorkerALIGN function_align 1750*c0909341SAndroid Build Coastguard Worker.round: 1751*c0909341SAndroid Build Coastguard Worker paddd m20, m13 1752*c0909341SAndroid Build Coastguard Worker paddd m22, m13 1753*c0909341SAndroid Build Coastguard Worker.round2: 1754*c0909341SAndroid Build Coastguard Worker paddd m16, m13 1755*c0909341SAndroid Build Coastguard Worker paddd m18, m13 1756*c0909341SAndroid Build Coastguard Worker.round3: 1757*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 12 }, m16, m18, m20, m22 1758*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m13}, m17, m19, m21, m23 1759*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 12 }, m17, m19, m21, m23 1760*c0909341SAndroid Build Coastguard Worker ret 1761*c0909341SAndroid Build Coastguard Worker 1762*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X16_FN adst, dct 1763*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X16_FN adst, flipadst 1764*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X16_FN adst, adst 1765*c0909341SAndroid Build Coastguard Worker 1766*c0909341SAndroid Build Coastguard Workercglobal iadst_16x16_internal_10bpc, 0, 7, 16, dst, stride, c, eob, tx2 1767*c0909341SAndroid Build Coastguard Worker%undef cmp 1768*c0909341SAndroid Build Coastguard Worker cmp eobd, 36 1769*c0909341SAndroid Build Coastguard Worker jl .fast 1770*c0909341SAndroid Build Coastguard Worker call .main_pass1 1771*c0909341SAndroid Build Coastguard Worker packssdw m0, m16 1772*c0909341SAndroid Build Coastguard Worker packssdw m1, m17 1773*c0909341SAndroid Build Coastguard Worker packssdw m2, m18 1774*c0909341SAndroid Build Coastguard Worker packssdw m3, m19 1775*c0909341SAndroid Build Coastguard Worker packssdw m4, m5, m20 1776*c0909341SAndroid Build Coastguard Worker packssdw m5, m6, m21 1777*c0909341SAndroid Build Coastguard Worker packssdw m6, m7, m22 1778*c0909341SAndroid Build Coastguard Worker packssdw m7, m8, m23 1779*c0909341SAndroid Build Coastguard Worker jmp m(idct_16x16_internal_10bpc).pass1_end 1780*c0909341SAndroid Build Coastguard Worker.fast: 1781*c0909341SAndroid Build Coastguard Worker call .main_pass1_fast 1782*c0909341SAndroid Build Coastguard Worker vpbroadcastd m9, [o(pd_2)] 1783*c0909341SAndroid Build Coastguard Worker paddd m0, m9 1784*c0909341SAndroid Build Coastguard Worker psubd m1, m9, m1 1785*c0909341SAndroid Build Coastguard Worker paddd m2, m9 1786*c0909341SAndroid Build Coastguard Worker psubd m3, m9, m3 1787*c0909341SAndroid Build Coastguard Worker paddd m4, m9, m5 1788*c0909341SAndroid Build Coastguard Worker psubd m5, m9, m6 1789*c0909341SAndroid Build Coastguard Worker paddd m6, m9, m7 1790*c0909341SAndroid Build Coastguard Worker psubd m7, m9, m8 1791*c0909341SAndroid Build Coastguard Worker.pass1_fast_end: 1792*c0909341SAndroid Build Coastguard Worker mova m9, [o(permA)] 1793*c0909341SAndroid Build Coastguard Worker psrlq m8, m9, 8 1794*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 2}, m0, m1, m2, m3, m4, m5, m6, m7 1795*c0909341SAndroid Build Coastguard Worker.pass1_fast_end2: 1796*c0909341SAndroid Build Coastguard Worker mova m10, m9 1797*c0909341SAndroid Build Coastguard Worker mova m11, m8 1798*c0909341SAndroid Build Coastguard Worker call m(idct_16x8_internal_10bpc).transpose_16x8 1799*c0909341SAndroid Build Coastguard Worker pxor m4, m4 1800*c0909341SAndroid Build Coastguard Worker REPX {mova x, m4}, m5, m6, m7 1801*c0909341SAndroid Build Coastguard Worker REPX {mova [cq+64*x], ym4}, 0, 1, 2, 3, 4, 5, 6, 7 1802*c0909341SAndroid Build Coastguard Worker jmp tx2q 1803*c0909341SAndroid Build Coastguard Worker.pass2: 1804*c0909341SAndroid Build Coastguard Worker lea r5, [o_base_8bpc] 1805*c0909341SAndroid Build Coastguard Worker call m(iadst_16x16_internal_8bpc).main_pass2b 1806*c0909341SAndroid Build Coastguard Worker movshdup m12, [permC] 1807*c0909341SAndroid Build Coastguard Worker mova m11, [pw_2048_m2048] 1808*c0909341SAndroid Build Coastguard Worker psrlq m13, m12, 8 1809*c0909341SAndroid Build Coastguard Worker vpermq m8, m13, m0 1810*c0909341SAndroid Build Coastguard Worker vpermq m0, m12, m7 1811*c0909341SAndroid Build Coastguard Worker vpermq m7, m13, m1 1812*c0909341SAndroid Build Coastguard Worker vpermq m1, m12, m6 1813*c0909341SAndroid Build Coastguard Worker vpermq m6, m13, m2 1814*c0909341SAndroid Build Coastguard Worker vpermq m2, m12, m5 1815*c0909341SAndroid Build Coastguard Worker vpermq m5, m13, m3 1816*c0909341SAndroid Build Coastguard Worker vpermq m3, m12, m4 1817*c0909341SAndroid Build Coastguard Worker jmp m(idct_16x16_internal_10bpc).pass2_end 1818*c0909341SAndroid Build Coastguard WorkerALIGN function_align 1819*c0909341SAndroid Build Coastguard Worker.main_pass1: 1820*c0909341SAndroid Build Coastguard Worker mova m0, [cq+64* 0] 1821*c0909341SAndroid Build Coastguard Worker%if WIN64 1822*c0909341SAndroid Build Coastguard Worker movaps [cq+16*0], xmm6 1823*c0909341SAndroid Build Coastguard Worker movaps [cq+16*1], xmm7 1824*c0909341SAndroid Build Coastguard Worker%endif 1825*c0909341SAndroid Build Coastguard Worker mova m23, [cq+64*15] 1826*c0909341SAndroid Build Coastguard Worker vpbroadcastd m13, [o(pd_2048)] 1827*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 23, 0, 8, 9, 10, 13, 201, 4091 ; t1 t0 1828*c0909341SAndroid Build Coastguard Worker mova m7, [cq+64* 7] 1829*c0909341SAndroid Build Coastguard Worker mova m16, [cq+64* 8] 1830*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 7, 16, 8, 9, 10, 13, 3035, 2751 ; t9 t8 1831*c0909341SAndroid Build Coastguard Worker mova m2, [cq+64* 2] 1832*c0909341SAndroid Build Coastguard Worker mova m21, [cq+64*13] 1833*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 21, 2, 8, 9, 10, 13, 995, 3973 ; t3 t2 1834*c0909341SAndroid Build Coastguard Worker mova m5, [cq+64* 5] 1835*c0909341SAndroid Build Coastguard Worker mova m18, [cq+64*10] 1836*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 5, 18, 8, 9, 10, 13, 3513, 2106 ; t11 t10 1837*c0909341SAndroid Build Coastguard Worker mova m4, [cq+64* 4] 1838*c0909341SAndroid Build Coastguard Worker mova m19, [cq+64*11] 1839*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 19, 4, 8, 9, 10, 13, 1751, 3703 ; t5 t4 1840*c0909341SAndroid Build Coastguard Worker mova m3, [cq+64* 3] 1841*c0909341SAndroid Build Coastguard Worker mova m20, [cq+64*12] 1842*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 3, 20, 8, 9, 10, 13, 3857, 1380 ; t13 t12 1843*c0909341SAndroid Build Coastguard Worker mova m6, [cq+64* 6] 1844*c0909341SAndroid Build Coastguard Worker mova m17, [cq+64* 9] 1845*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 17, 6, 8, 9, 10, 13, 2440, 3290 ; t7 t6 1846*c0909341SAndroid Build Coastguard Worker mova m1, [cq+64* 1] 1847*c0909341SAndroid Build Coastguard Worker mova m22, [cq+64*14] 1848*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 1, 22, 8, 9, 10, 13, 4052, 601 ; t15 t14 1849*c0909341SAndroid Build Coastguard Worker vpbroadcastd m14, [o(clip_18b_min)] 1850*c0909341SAndroid Build Coastguard Worker vpbroadcastd m15, [o(clip_18b_max)] 1851*c0909341SAndroid Build Coastguard Worker psubd m9, m23, m7 ; t9a 1852*c0909341SAndroid Build Coastguard Worker paddd m23, m7 ; t1a 1853*c0909341SAndroid Build Coastguard Worker psubd m7, m2, m18 ; t10a 1854*c0909341SAndroid Build Coastguard Worker paddd m18, m2 ; t2a 1855*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m14}, m9, m23, m7, m18 1856*c0909341SAndroid Build Coastguard Worker psubd m2, m17, m1 ; t15a 1857*c0909341SAndroid Build Coastguard Worker paddd m17, m1 ; t7a 1858*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m15}, m9, m23, m7, m18 1859*c0909341SAndroid Build Coastguard Worker psubd m1, m21, m5 ; t11a 1860*c0909341SAndroid Build Coastguard Worker paddd m21, m5 ; t3a 1861*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m14}, m2, m17, m1, m21 1862*c0909341SAndroid Build Coastguard Worker psubd m5, m4, m20 ; t12a 1863*c0909341SAndroid Build Coastguard Worker paddd m4, m20 ; t4a 1864*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m15}, m2, m17, m1, m21 1865*c0909341SAndroid Build Coastguard Worker psubd m20, m19, m3 ; t13a 1866*c0909341SAndroid Build Coastguard Worker paddd m19, m3 ; t5a 1867*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m14}, m5, m4, m20, m19 1868*c0909341SAndroid Build Coastguard Worker psubd m8, m6, m22 ; t14a 1869*c0909341SAndroid Build Coastguard Worker paddd m6, m22 ; t6a 1870*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m15}, m5, m4, m20, m19 1871*c0909341SAndroid Build Coastguard Worker psubd m22, m0, m16 ; t8a 1872*c0909341SAndroid Build Coastguard Worker paddd m16, m0 ; t0a 1873*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m14}, m8, m6, m22, m16 1874*c0909341SAndroid Build Coastguard Worker vpbroadcastd m11, [o(pd_4017)] 1875*c0909341SAndroid Build Coastguard Worker vpbroadcastd m10, [o(pd_799)] 1876*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m15}, m8, m6, m22, m16 1877*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 22, 9, 0, 3, _, 13, 10, 11 ; t9 t8 1878*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 20, 5, 0, 3, _, 13, 11, 10 ; t12 t13 1879*c0909341SAndroid Build Coastguard Worker vpbroadcastd m11, [o(pd_2276)] 1880*c0909341SAndroid Build Coastguard Worker vpbroadcastd m10, [o(pd_3406)] 1881*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 7, 1, 0, 3, _, 13, 10, 11 ; t11 t10 1882*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 2, 8, 0, 3, _, 13, 11, 10 ; t14 t15 1883*c0909341SAndroid Build Coastguard Worker paddd m0, m16, m4 ; t0 1884*c0909341SAndroid Build Coastguard Worker psubd m16, m4 ; t4 1885*c0909341SAndroid Build Coastguard Worker psubd m3, m23, m19 ; t5 1886*c0909341SAndroid Build Coastguard Worker paddd m23, m19 ; t1 1887*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m14}, m0, m16, m3, m23 1888*c0909341SAndroid Build Coastguard Worker psubd m19, m18, m6 ; t6 1889*c0909341SAndroid Build Coastguard Worker paddd m18, m6 ; t2 1890*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m15}, m0, m16, m3, m23 1891*c0909341SAndroid Build Coastguard Worker psubd m6, m21, m17 ; t7 1892*c0909341SAndroid Build Coastguard Worker paddd m21, m17 ; t3 1893*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m14}, m19, m18, m6, m21 1894*c0909341SAndroid Build Coastguard Worker paddd m17, m9, m20 ; t8a 1895*c0909341SAndroid Build Coastguard Worker psubd m9, m20 ; t12a 1896*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m15}, m19, m18, m6, m21 1897*c0909341SAndroid Build Coastguard Worker psubd m20, m22, m5 ; t13a 1898*c0909341SAndroid Build Coastguard Worker paddd m22, m5 ; t9a 1899*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m14}, m17, m9, m20, m22 1900*c0909341SAndroid Build Coastguard Worker psubd m5, m1, m2 ; t14a 1901*c0909341SAndroid Build Coastguard Worker paddd m1, m2 ; t10a 1902*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m15}, m17, m9, m20, m22 1903*c0909341SAndroid Build Coastguard Worker psubd m2, m7, m8 ; t15a 1904*c0909341SAndroid Build Coastguard Worker paddd m7, m8 ; t11a 1905*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m14}, m5, m1, m2, m7 1906*c0909341SAndroid Build Coastguard Worker vpbroadcastd m11, [o(pd_3784)] 1907*c0909341SAndroid Build Coastguard Worker vpbroadcastd m10, [o(pd_1567)] 1908*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m15}, m5, m1, m2, m7 1909*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 16, 3, 4, 8, _, 13, 10, 11 ; t5a t4a 1910*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 6, 19, 4, 8, _, 13, 11, 10 ; t6a t7a 1911*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 9, 20, 4, 8, _, 13, 10, 11 ; t13 t12 1912*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 2, 5, 4, 8, _, 13, 11, 10 ; t14 t15 1913*c0909341SAndroid Build Coastguard Worker psubd m8, m0, m18 ; t2a 1914*c0909341SAndroid Build Coastguard Worker paddd m0, m18 ; out0 1915*c0909341SAndroid Build Coastguard Worker psubd m18, m23, m21 ; t3a 1916*c0909341SAndroid Build Coastguard Worker paddd m23, m21 ; -out15 1917*c0909341SAndroid Build Coastguard Worker paddd m21, m9, m5 ; -out13 1918*c0909341SAndroid Build Coastguard Worker psubd m9, m5 ; t15a 1919*c0909341SAndroid Build Coastguard Worker psubd m5, m3, m6 ; t6 1920*c0909341SAndroid Build Coastguard Worker paddd m3, m6 ; -out3 1921*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m14}, m8, m18, m9, m5 1922*c0909341SAndroid Build Coastguard Worker psubd m6, m20, m2 ; t14a 1923*c0909341SAndroid Build Coastguard Worker paddd m2, m20 ; out2 1924*c0909341SAndroid Build Coastguard Worker paddd m20, m16, m19 ; out12 1925*c0909341SAndroid Build Coastguard Worker psubd m16, m19 ; t7 1926*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m15}, m8, m18, m9, m5 1927*c0909341SAndroid Build Coastguard Worker psubd m19, m22, m7 ; t11 1928*c0909341SAndroid Build Coastguard Worker paddd m22, m7 ; out14 1929*c0909341SAndroid Build Coastguard Worker psubd m7, m17, m1 ; t10 1930*c0909341SAndroid Build Coastguard Worker paddd m1, m17 ; -out1 1931*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m14}, m6, m16, m19, m7 1932*c0909341SAndroid Build Coastguard Worker vpbroadcastd m12, [o(pd_1448)] 1933*c0909341SAndroid Build Coastguard Worker vpbroadcastd m4, [o(pd_2)] 1934*c0909341SAndroid Build Coastguard Worker vpbroadcastd m10, [o(pd_5120)] 1935*c0909341SAndroid Build Coastguard Worker vpbroadcastd m11, [o(pd_5119)] 1936*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m15}, m6, m16, m19, m7 1937*c0909341SAndroid Build Coastguard Worker psubd m17, m7, m19 ; -out9 1938*c0909341SAndroid Build Coastguard Worker paddd m7, m19 ; out6 1939*c0909341SAndroid Build Coastguard Worker psubd m19, m5, m16 ; -out11 1940*c0909341SAndroid Build Coastguard Worker paddd m5, m16 ; out4 1941*c0909341SAndroid Build Coastguard Worker REPX {pmulld x, m12}, m17, m7, m19, m5 1942*c0909341SAndroid Build Coastguard Worker psubd m16, m8, m18 ; out8 1943*c0909341SAndroid Build Coastguard Worker paddd m8, m18 ; -out7 1944*c0909341SAndroid Build Coastguard Worker psubd m18, m6, m9 ; out10 1945*c0909341SAndroid Build Coastguard Worker paddd m6, m9 ; -out5 1946*c0909341SAndroid Build Coastguard Worker REPX {pmulld x, m12}, m16, m8, m18, m6 1947*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m4 }, m0, m2, m20, m22 1948*c0909341SAndroid Build Coastguard Worker REPX {psubd x, m4, x}, m1, m3, m21, m23 1949*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m10 }, m7, m5, m16, m18 1950*c0909341SAndroid Build Coastguard Worker REPX {psubd x, m11, x}, m17, m19, m8, m6 1951*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 2 }, m20, m22, m0, m2, m21, m23, m1, m3 1952*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 13}, m17, m19, m5, m7, m16, m18, m6, m8 1953*c0909341SAndroid Build Coastguard Worker ret 1954*c0909341SAndroid Build Coastguard WorkerALIGN function_align 1955*c0909341SAndroid Build Coastguard Worker.main_pass1_fast: 1956*c0909341SAndroid Build Coastguard Worker mova ym0, [cq+64*0] 1957*c0909341SAndroid Build Coastguard Worker mova ym1, [cq+64*2] 1958*c0909341SAndroid Build Coastguard Worker movshdup m8, [o(permB)] 1959*c0909341SAndroid Build Coastguard Worker mova ym6, [cq+64*1] 1960*c0909341SAndroid Build Coastguard Worker mova ym7, [cq+64*3] 1961*c0909341SAndroid Build Coastguard Worker mova ym2, [cq+64*4] 1962*c0909341SAndroid Build Coastguard Worker mova ym3, [cq+64*6] 1963*c0909341SAndroid Build Coastguard Worker mova ym4, [cq+64*5] 1964*c0909341SAndroid Build Coastguard Worker mova ym5, [cq+64*7] 1965*c0909341SAndroid Build Coastguard Worker vpermt2q m0, m8, m1 ; 0 2 1966*c0909341SAndroid Build Coastguard Worker vpermt2q m7, m8, m6 ; 3 1 1967*c0909341SAndroid Build Coastguard Worker vpermt2q m2, m8, m3 ; 4 6 1968*c0909341SAndroid Build Coastguard Worker vpermt2q m5, m8, m4 ; 7 5 1969*c0909341SAndroid Build Coastguard Worker vpbroadcastd m13, [o(pd_2048)] 1970*c0909341SAndroid Build Coastguard Worker vpbroadcastd m12, [o(pd_2896)] 1971*c0909341SAndroid Build Coastguard Worker jmp m(iadst_16x8_internal_10bpc).main_fast 1972*c0909341SAndroid Build Coastguard Worker 1973*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X16_FN flipadst, dct 1974*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X16_FN flipadst, adst 1975*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X16_FN flipadst, flipadst 1976*c0909341SAndroid Build Coastguard Worker 1977*c0909341SAndroid Build Coastguard Workercglobal iflipadst_16x16_internal_10bpc, 0, 7, 16, dst, stride, c, eob, tx2 1978*c0909341SAndroid Build Coastguard Worker%undef cmp 1979*c0909341SAndroid Build Coastguard Worker cmp eobd, 36 1980*c0909341SAndroid Build Coastguard Worker jl .fast 1981*c0909341SAndroid Build Coastguard Worker call m(iadst_16x16_internal_10bpc).main_pass1 1982*c0909341SAndroid Build Coastguard Worker packssdw m4, m19, m3 1983*c0909341SAndroid Build Coastguard Worker packssdw m3, m20, m5 1984*c0909341SAndroid Build Coastguard Worker packssdw m5, m18, m2 1985*c0909341SAndroid Build Coastguard Worker packssdw m2, m21, m6 1986*c0909341SAndroid Build Coastguard Worker packssdw m6, m17, m1 1987*c0909341SAndroid Build Coastguard Worker packssdw m1, m22, m7 1988*c0909341SAndroid Build Coastguard Worker packssdw m7, m16, m0 1989*c0909341SAndroid Build Coastguard Worker packssdw m0, m23, m8 1990*c0909341SAndroid Build Coastguard Worker jmp m(idct_16x16_internal_10bpc).pass1_end 1991*c0909341SAndroid Build Coastguard Worker.fast: 1992*c0909341SAndroid Build Coastguard Worker call m(iadst_16x16_internal_10bpc).main_pass1_fast 1993*c0909341SAndroid Build Coastguard Worker vpbroadcastd m9, [o(pd_2)] 1994*c0909341SAndroid Build Coastguard Worker psubd m4, m9, m3 1995*c0909341SAndroid Build Coastguard Worker paddd m3, m9, m5 1996*c0909341SAndroid Build Coastguard Worker paddd m5, m9, m2 1997*c0909341SAndroid Build Coastguard Worker psubd m2, m9, m6 1998*c0909341SAndroid Build Coastguard Worker psubd m6, m9, m1 1999*c0909341SAndroid Build Coastguard Worker paddd m1, m9, m7 2000*c0909341SAndroid Build Coastguard Worker paddd m7, m9, m0 2001*c0909341SAndroid Build Coastguard Worker psubd m0, m9, m8 2002*c0909341SAndroid Build Coastguard Worker jmp m(iadst_16x16_internal_10bpc).pass1_fast_end 2003*c0909341SAndroid Build Coastguard Worker.pass2: 2004*c0909341SAndroid Build Coastguard Worker lea r5, [o_base_8bpc] 2005*c0909341SAndroid Build Coastguard Worker call m(iadst_16x16_internal_8bpc).main_pass2b 2006*c0909341SAndroid Build Coastguard Worker movshdup m12, [permC] 2007*c0909341SAndroid Build Coastguard Worker movu m11, [pw_m2048_2048] 2008*c0909341SAndroid Build Coastguard Worker psrlq m13, m12, 8 2009*c0909341SAndroid Build Coastguard Worker vpermq m8, m13, m7 2010*c0909341SAndroid Build Coastguard Worker vpermq m7, m13, m6 2011*c0909341SAndroid Build Coastguard Worker vpermq m6, m13, m5 2012*c0909341SAndroid Build Coastguard Worker vpermq m5, m13, m4 2013*c0909341SAndroid Build Coastguard Worker vpermq m3, m12, m3 2014*c0909341SAndroid Build Coastguard Worker vpermq m2, m12, m2 2015*c0909341SAndroid Build Coastguard Worker vpermq m1, m12, m1 2016*c0909341SAndroid Build Coastguard Worker vpermq m0, m12, m0 2017*c0909341SAndroid Build Coastguard Worker jmp m(idct_16x16_internal_10bpc).pass2_end 2018*c0909341SAndroid Build Coastguard Worker 2019*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X16_FN identity, dct, -92 2020*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X16_FN identity, identity 2021*c0909341SAndroid Build Coastguard Worker 2022*c0909341SAndroid Build Coastguard Workercglobal iidentity_16x16_internal_10bpc, 0, 7, 16, dst, stride, c, eob, tx2 2023*c0909341SAndroid Build Coastguard Worker%undef cmp 2024*c0909341SAndroid Build Coastguard Worker vpbroadcastd m10, [o(pd_5793)] 2025*c0909341SAndroid Build Coastguard Worker vpbroadcastd m11, [o(pd_5120)] 2026*c0909341SAndroid Build Coastguard Worker mov r6, cq 2027*c0909341SAndroid Build Coastguard Worker cmp eobd, 36 2028*c0909341SAndroid Build Coastguard Worker jl .fast 2029*c0909341SAndroid Build Coastguard Worker call .pass1_main 2030*c0909341SAndroid Build Coastguard Worker packssdw m0, m6, m8 2031*c0909341SAndroid Build Coastguard Worker packssdw m1, m7, m9 2032*c0909341SAndroid Build Coastguard Worker call .pass1_main 2033*c0909341SAndroid Build Coastguard Worker packssdw m2, m6, m8 2034*c0909341SAndroid Build Coastguard Worker packssdw m3, m7, m9 2035*c0909341SAndroid Build Coastguard Worker call .pass1_main 2036*c0909341SAndroid Build Coastguard Worker packssdw m4, m6, m8 2037*c0909341SAndroid Build Coastguard Worker packssdw m5, m7, m9 2038*c0909341SAndroid Build Coastguard Worker call .pass1_main 2039*c0909341SAndroid Build Coastguard Worker packssdw m6, m8 2040*c0909341SAndroid Build Coastguard Worker packssdw m7, m9 2041*c0909341SAndroid Build Coastguard Worker jmp m(idct_16x16_internal_10bpc).pass1_end2 2042*c0909341SAndroid Build Coastguard Worker.fast: 2043*c0909341SAndroid Build Coastguard Worker call .pass1_main_fast 2044*c0909341SAndroid Build Coastguard Worker packssdw m0, m6, m7 2045*c0909341SAndroid Build Coastguard Worker call .pass1_main_fast 2046*c0909341SAndroid Build Coastguard Worker packssdw m1, m6, m7 2047*c0909341SAndroid Build Coastguard Worker call .pass1_main_fast 2048*c0909341SAndroid Build Coastguard Worker packssdw m2, m6, m7 2049*c0909341SAndroid Build Coastguard Worker call .pass1_main_fast 2050*c0909341SAndroid Build Coastguard Worker packssdw m3, m6, m7 2051*c0909341SAndroid Build Coastguard Worker punpckhwd m4, m0, m1 2052*c0909341SAndroid Build Coastguard Worker punpcklwd m0, m1 2053*c0909341SAndroid Build Coastguard Worker punpckhwd m1, m2, m3 2054*c0909341SAndroid Build Coastguard Worker punpcklwd m2, m3 2055*c0909341SAndroid Build Coastguard Worker punpckldq m3, m4, m1 2056*c0909341SAndroid Build Coastguard Worker punpckhdq m4, m1 2057*c0909341SAndroid Build Coastguard Worker punpckhdq m1, m0, m2 2058*c0909341SAndroid Build Coastguard Worker punpckldq m0, m2 2059*c0909341SAndroid Build Coastguard Worker pxor m7, m7 2060*c0909341SAndroid Build Coastguard Worker vshufi32x4 m2, m0, m3, q3131 2061*c0909341SAndroid Build Coastguard Worker vshufi32x4 m0, m3, q2020 2062*c0909341SAndroid Build Coastguard Worker vshufi32x4 m3, m1, m4, q3131 2063*c0909341SAndroid Build Coastguard Worker vshufi32x4 m1, m4, q2020 2064*c0909341SAndroid Build Coastguard Worker REPX {mova x, m7}, m4, m5, m6 2065*c0909341SAndroid Build Coastguard Worker jmp m(idct_16x16_internal_10bpc).pass1_end3 2066*c0909341SAndroid Build Coastguard Worker.pass2: 2067*c0909341SAndroid Build Coastguard Worker movshdup m14, [o(permC)] 2068*c0909341SAndroid Build Coastguard Worker vpbroadcastd m15, [o(pw_1697x16)] 2069*c0909341SAndroid Build Coastguard Worker lea r6, [strideq*3] 2070*c0909341SAndroid Build Coastguard Worker vpbroadcastd m11, [o(pw_2048)] 2071*c0909341SAndroid Build Coastguard Worker pxor m12, m12 2072*c0909341SAndroid Build Coastguard Worker vpbroadcastd m13, [pixel_10bpc_max] 2073*c0909341SAndroid Build Coastguard Worker vpermq m8, m14, m0 2074*c0909341SAndroid Build Coastguard Worker vpermq m9, m14, m1 2075*c0909341SAndroid Build Coastguard Worker call .pass2_main 2076*c0909341SAndroid Build Coastguard Worker vpermq m8, m14, m2 2077*c0909341SAndroid Build Coastguard Worker vpermq m9, m14, m3 2078*c0909341SAndroid Build Coastguard Worker call .pass2_main 2079*c0909341SAndroid Build Coastguard Worker vpermq m8, m14, m4 2080*c0909341SAndroid Build Coastguard Worker vpermq m9, m14, m5 2081*c0909341SAndroid Build Coastguard Worker call .pass2_main 2082*c0909341SAndroid Build Coastguard Worker vpermq m8, m14, m6 2083*c0909341SAndroid Build Coastguard Worker vpermq m9, m14, m7 2084*c0909341SAndroid Build Coastguard Worker.pass2_main: 2085*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m15, m8 2086*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m15, m9 2087*c0909341SAndroid Build Coastguard Worker paddsw m8, m8 2088*c0909341SAndroid Build Coastguard Worker paddsw m9, m9 2089*c0909341SAndroid Build Coastguard Worker paddsw m8, m0 2090*c0909341SAndroid Build Coastguard Worker paddsw m9, m1 2091*c0909341SAndroid Build Coastguard Worker jmp m(idct_16x8_internal_10bpc).write_16x4 2092*c0909341SAndroid Build Coastguard WorkerALIGN function_align 2093*c0909341SAndroid Build Coastguard Worker.pass1_main: 2094*c0909341SAndroid Build Coastguard Worker pmulld m6, m10, [r6+64*0] 2095*c0909341SAndroid Build Coastguard Worker pmulld m7, m10, [r6+64*1] 2096*c0909341SAndroid Build Coastguard Worker pmulld m8, m10, [r6+64*8] 2097*c0909341SAndroid Build Coastguard Worker pmulld m9, m10, [r6+64*9] 2098*c0909341SAndroid Build Coastguard Worker add r6, 64*2 2099*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m11}, m6, m7, m8, m9 2100*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 13 }, m6, m8, m7, m9 2101*c0909341SAndroid Build Coastguard Worker ret 2102*c0909341SAndroid Build Coastguard WorkerALIGN function_align 2103*c0909341SAndroid Build Coastguard Worker.pass1_main_fast: 2104*c0909341SAndroid Build Coastguard Worker mova ym6, [r6+64* 0] 2105*c0909341SAndroid Build Coastguard Worker vinserti32x8 m6, [r6+64* 4], 1 2106*c0909341SAndroid Build Coastguard Worker mova ym7, [r6+64* 8] 2107*c0909341SAndroid Build Coastguard Worker vinserti32x8 m7, [r6+64*12], 1 2108*c0909341SAndroid Build Coastguard Worker add r6, 64 2109*c0909341SAndroid Build Coastguard Worker REPX {pmulld x, m10}, m6, m7 2110*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m11}, m6, m7 2111*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 13 }, m6, m7 2112*c0909341SAndroid Build Coastguard Worker ret 2113*c0909341SAndroid Build Coastguard Worker 2114*c0909341SAndroid Build Coastguard Workercglobal inv_txfm_add_dct_dct_8x32_10bpc, 4, 7, 22, dst, stride, c, eob 2115*c0909341SAndroid Build Coastguard Worker%undef cmp 2116*c0909341SAndroid Build Coastguard Worker lea r5, [o_base] 2117*c0909341SAndroid Build Coastguard Worker test eobd, eobd 2118*c0909341SAndroid Build Coastguard Worker jz .dconly 2119*c0909341SAndroid Build Coastguard Worker vpbroadcastd m12, [o(pd_2896)] 2120*c0909341SAndroid Build Coastguard Worker vpbroadcastd m13, [o(pd_2048)] 2121*c0909341SAndroid Build Coastguard Worker vpbroadcastd m14, [o(clip_18b_min)] 2122*c0909341SAndroid Build Coastguard Worker vpbroadcastd m15, [o(clip_18b_max)] 2123*c0909341SAndroid Build Coastguard Worker vpbroadcastd m11, [o(pd_2)] 2124*c0909341SAndroid Build Coastguard Worker mova m20, [o(idct8x32p)] 2125*c0909341SAndroid Build Coastguard Worker pxor m21, m21 2126*c0909341SAndroid Build Coastguard Worker cmp eobd, 43 2127*c0909341SAndroid Build Coastguard Worker jl .fast 2128*c0909341SAndroid Build Coastguard Worker call .pass1_main 2129*c0909341SAndroid Build Coastguard Worker punpcklwd m16, m0, m1 2130*c0909341SAndroid Build Coastguard Worker punpcklwd m17, m2, m3 2131*c0909341SAndroid Build Coastguard Worker punpckhwd m18, m0, m1 2132*c0909341SAndroid Build Coastguard Worker punpckhwd m19, m2, m3 2133*c0909341SAndroid Build Coastguard Worker cmp eobd, 107 2134*c0909341SAndroid Build Coastguard Worker jge .full 2135*c0909341SAndroid Build Coastguard Worker punpckldq m0, m16, m17 ; 0 2 2136*c0909341SAndroid Build Coastguard Worker punpckhdq m1, m16, m17 ; 4 6 2137*c0909341SAndroid Build Coastguard Worker punpckldq m2, m18, m19 ; 8 10 2138*c0909341SAndroid Build Coastguard Worker punpckhdq m3, m18, m19 ; 12 14 2139*c0909341SAndroid Build Coastguard Worker lea r5, [o_base_8bpc] 2140*c0909341SAndroid Build Coastguard Worker vextracti32x8 ym14, m0, 1 2141*c0909341SAndroid Build Coastguard Worker vextracti32x8 ym15, m1, 1 2142*c0909341SAndroid Build Coastguard Worker vextracti32x8 ym16, m2, 1 2143*c0909341SAndroid Build Coastguard Worker vextracti32x8 ym17, m3, 1 2144*c0909341SAndroid Build Coastguard Worker call m(idct_8x16_internal_8bpc).main_fast 2145*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_8x32_8bpc).main_fast 2146*c0909341SAndroid Build Coastguard Worker jmp .end 2147*c0909341SAndroid Build Coastguard Worker.full: 2148*c0909341SAndroid Build Coastguard Worker add cq, 64 2149*c0909341SAndroid Build Coastguard Worker call .pass1_main 2150*c0909341SAndroid Build Coastguard Worker punpcklwd m5, m0, m1 2151*c0909341SAndroid Build Coastguard Worker punpcklwd m6, m2, m3 2152*c0909341SAndroid Build Coastguard Worker punpckhwd m7, m0, m1 2153*c0909341SAndroid Build Coastguard Worker punpckhwd m8, m2, m3 2154*c0909341SAndroid Build Coastguard Worker punpckldq m0, m16, m17 ; 0 2 2155*c0909341SAndroid Build Coastguard Worker punpckhdq m1, m16, m17 ; 4 6 2156*c0909341SAndroid Build Coastguard Worker punpckldq m2, m18, m19 ; 8 10 2157*c0909341SAndroid Build Coastguard Worker punpckhdq m3, m18, m19 ; 12 14 2158*c0909341SAndroid Build Coastguard Worker punpckldq m4, m5, m6 ; 16 18 2159*c0909341SAndroid Build Coastguard Worker punpckhdq m5, m6 ; 20 22 2160*c0909341SAndroid Build Coastguard Worker punpckldq m6, m7, m8 ; 24 26 2161*c0909341SAndroid Build Coastguard Worker punpckhdq m7, m8 ; 28 30 2162*c0909341SAndroid Build Coastguard Worker lea r5, [o_base_8bpc] 2163*c0909341SAndroid Build Coastguard Worker vextracti32x8 ym14, m0, 1 2164*c0909341SAndroid Build Coastguard Worker vextracti32x8 ym15, m1, 1 2165*c0909341SAndroid Build Coastguard Worker vextracti32x8 ym16, m2, 1 2166*c0909341SAndroid Build Coastguard Worker vextracti32x8 ym17, m3, 1 2167*c0909341SAndroid Build Coastguard Worker vextracti32x8 ym18, m4, 1 2168*c0909341SAndroid Build Coastguard Worker vextracti32x8 ym19, m5, 1 2169*c0909341SAndroid Build Coastguard Worker vextracti32x8 ym20, m6, 1 2170*c0909341SAndroid Build Coastguard Worker vextracti32x8 ym21, m7, 1 2171*c0909341SAndroid Build Coastguard Worker call m(idct_8x16_internal_8bpc).main 2172*c0909341SAndroid Build Coastguard Worker REPX {pshufd x, x, q1032}, ym18, ym19, ym20, ym21 2173*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_8x32_8bpc).main 2174*c0909341SAndroid Build Coastguard Worker jmp .end 2175*c0909341SAndroid Build Coastguard Worker.fast: 2176*c0909341SAndroid Build Coastguard Worker movshdup m8, [o(permB)] 2177*c0909341SAndroid Build Coastguard Worker mova ym1, [cq+128*1] 2178*c0909341SAndroid Build Coastguard Worker mova ym5, [cq+128*5] 2179*c0909341SAndroid Build Coastguard Worker mova ym7, [cq+128*3] 2180*c0909341SAndroid Build Coastguard Worker mova ym3, [cq+128*7] 2181*c0909341SAndroid Build Coastguard Worker mova ym0, [cq+128*0] 2182*c0909341SAndroid Build Coastguard Worker mova ym4, [cq+128*2] 2183*c0909341SAndroid Build Coastguard Worker mova ym2, [cq+128*4] 2184*c0909341SAndroid Build Coastguard Worker mova ym6, [cq+128*6] 2185*c0909341SAndroid Build Coastguard Worker vpermt2q m1, m8, m5 ; 1 5 2186*c0909341SAndroid Build Coastguard Worker vpermt2q m3, m8, m7 ; 7 3 2187*c0909341SAndroid Build Coastguard Worker vpermt2q m0, m8, m4 ; 0 2 2188*c0909341SAndroid Build Coastguard Worker vpermt2q m2, m8, m6 ; 4 6 2189*c0909341SAndroid Build Coastguard Worker mova [cq+128*0], ym21 2190*c0909341SAndroid Build Coastguard Worker REPX {vmovdqa32 [cq+128*x], ym21}, 1, 2, 3, 4, 5, 6, 7 2191*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_10bpc).main 2192*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_10bpc).main_end 2193*c0909341SAndroid Build Coastguard Worker packssdw m0, m2 2194*c0909341SAndroid Build Coastguard Worker packssdw m1, m3 2195*c0909341SAndroid Build Coastguard Worker vpermb m0, m20, m0 2196*c0909341SAndroid Build Coastguard Worker vprold m20, 16 2197*c0909341SAndroid Build Coastguard Worker vpermb m2, m20, m1 2198*c0909341SAndroid Build Coastguard Worker punpckhdq m1, m0, m2 2199*c0909341SAndroid Build Coastguard Worker punpckldq m0, m2 2200*c0909341SAndroid Build Coastguard Worker lea r5, [o_base_8bpc] 2201*c0909341SAndroid Build Coastguard Worker vextracti32x8 ym14, m0, 1 2202*c0909341SAndroid Build Coastguard Worker vextracti32x8 ym15, m1, 1 2203*c0909341SAndroid Build Coastguard Worker call m(idct_8x16_internal_8bpc).main_fast2 2204*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_8x32_8bpc).main_fast2 2205*c0909341SAndroid Build Coastguard Worker.end: 2206*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_8x32_8bpc).main_end ; performs vzeroupper 2207*c0909341SAndroid Build Coastguard Worker lea r3, [strideq*2] 2208*c0909341SAndroid Build Coastguard Worker vpbroadcastd m12, [pixel_10bpc_max] 2209*c0909341SAndroid Build Coastguard Worker lea r6, [strideq*3] 2210*c0909341SAndroid Build Coastguard Worker pxor m11, m11 2211*c0909341SAndroid Build Coastguard Worker lea r3, [dstq+r3*8] 2212*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m10 2213*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m10 2214*c0909341SAndroid Build Coastguard Worker call .write_8x4x2 2215*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m10, m2 2216*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m10, m3 2217*c0909341SAndroid Build Coastguard Worker call .write_8x4x2 2218*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m10, m4 2219*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m10, m5 2220*c0909341SAndroid Build Coastguard Worker call .write_8x4x2 2221*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m10, m6 2222*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m10, m7 2223*c0909341SAndroid Build Coastguard Worker.write_8x4x2: 2224*c0909341SAndroid Build Coastguard Worker mova xm8, [dstq+strideq*0] 2225*c0909341SAndroid Build Coastguard Worker vinserti32x4 ym8, [dstq+strideq*1], 1 2226*c0909341SAndroid Build Coastguard Worker vinserti32x4 m8, [dstq+strideq*2], 2 2227*c0909341SAndroid Build Coastguard Worker vinserti32x4 m8, [dstq+r6 ], 3 2228*c0909341SAndroid Build Coastguard Worker mova xm9, [r3 +r6 ] 2229*c0909341SAndroid Build Coastguard Worker vinserti32x4 ym9, [r3 +strideq*2], 1 2230*c0909341SAndroid Build Coastguard Worker vinserti32x4 m9, [r3 +strideq*1], 2 2231*c0909341SAndroid Build Coastguard Worker vinserti32x4 m9, [r3 +strideq*0], 3 2232*c0909341SAndroid Build Coastguard Worker paddw m8, m0 2233*c0909341SAndroid Build Coastguard Worker paddw m9, m1 2234*c0909341SAndroid Build Coastguard Worker pmaxsw m8, m11 2235*c0909341SAndroid Build Coastguard Worker pmaxsw m9, m11 2236*c0909341SAndroid Build Coastguard Worker pminsw m8, m12 2237*c0909341SAndroid Build Coastguard Worker pminsw m9, m12 2238*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*0], xm8 2239*c0909341SAndroid Build Coastguard Worker vextracti32x4 [dstq+strideq*1], ym8, 1 2240*c0909341SAndroid Build Coastguard Worker vextracti32x4 [dstq+strideq*2], m8, 2 2241*c0909341SAndroid Build Coastguard Worker vextracti32x4 [dstq+r6 ], m8, 3 2242*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+strideq*4] 2243*c0909341SAndroid Build Coastguard Worker vextracti32x4 [r3 +strideq*0], m9, 3 2244*c0909341SAndroid Build Coastguard Worker vextracti32x4 [r3 +strideq*1], m9, 2 2245*c0909341SAndroid Build Coastguard Worker vextracti32x4 [r3 +strideq*2], ym9, 1 2246*c0909341SAndroid Build Coastguard Worker mova [r3 +r6 ], xm9 2247*c0909341SAndroid Build Coastguard Worker lea r3, [r3+strideq*4] 2248*c0909341SAndroid Build Coastguard Worker ret 2249*c0909341SAndroid Build Coastguard Worker.dconly: 2250*c0909341SAndroid Build Coastguard Worker imul r6d, [cq], 181 2251*c0909341SAndroid Build Coastguard Worker mov [cq], eobd 2252*c0909341SAndroid Build Coastguard Worker or r3d, 32 2253*c0909341SAndroid Build Coastguard Worker add r6d, 640 2254*c0909341SAndroid Build Coastguard Worker sar r6d, 10 2255*c0909341SAndroid Build Coastguard Worker jmp m(inv_txfm_add_dct_dct_8x8_10bpc).dconly2 2256*c0909341SAndroid Build Coastguard WorkerALIGN function_align 2257*c0909341SAndroid Build Coastguard Worker.pass1_main: 2258*c0909341SAndroid Build Coastguard Worker mova m0, [cq+128*0] 2259*c0909341SAndroid Build Coastguard Worker mova m1, [cq+128*1] 2260*c0909341SAndroid Build Coastguard Worker mova m2, [cq+128*2] 2261*c0909341SAndroid Build Coastguard Worker mova m3, [cq+128*3] 2262*c0909341SAndroid Build Coastguard Worker mova m4, [cq+128*4] 2263*c0909341SAndroid Build Coastguard Worker mova m5, [cq+128*5] 2264*c0909341SAndroid Build Coastguard Worker mova m6, [cq+128*6] 2265*c0909341SAndroid Build Coastguard Worker mova m7, [cq+128*7] 2266*c0909341SAndroid Build Coastguard Worker REPX {mova [cq+128*x], m21}, 0, 1, 2, 3, 4, 5, 6, 7 2267*c0909341SAndroid Build Coastguard Worker call m(idct_8x16_internal_10bpc).main 2268*c0909341SAndroid Build Coastguard Worker call m(idct_8x16_internal_10bpc).main_end2 2269*c0909341SAndroid Build Coastguard Worker packssdw m0, m4 2270*c0909341SAndroid Build Coastguard Worker packssdw m1, m5 2271*c0909341SAndroid Build Coastguard Worker packssdw m2, m6 2272*c0909341SAndroid Build Coastguard Worker packssdw m3, m7 2273*c0909341SAndroid Build Coastguard Worker REPX {vpermb x, m20, x}, m0, m1, m2, m3 2274*c0909341SAndroid Build Coastguard Worker ret 2275*c0909341SAndroid Build Coastguard Worker 2276*c0909341SAndroid Build Coastguard Workercglobal inv_txfm_add_identity_identity_8x32_10bpc, 4, 8, 12, dst, stride, c, eob 2277*c0909341SAndroid Build Coastguard Worker vpbroadcastd m9, [pw_5] 2278*c0909341SAndroid Build Coastguard Worker lea r4, [strideq*3] 2279*c0909341SAndroid Build Coastguard Worker pxor m10, m10 2280*c0909341SAndroid Build Coastguard Worker lea r5, [strideq*5] 2281*c0909341SAndroid Build Coastguard Worker vpbroadcastd m11, [pixel_10bpc_max] 2282*c0909341SAndroid Build Coastguard Worker sub eobd, 107 2283*c0909341SAndroid Build Coastguard Worker lea r6, [strideq+r4*2] 2284*c0909341SAndroid Build Coastguard Worker.loop: 2285*c0909341SAndroid Build Coastguard Worker mova m0, [cq+128*0] 2286*c0909341SAndroid Build Coastguard Worker packssdw m0, [cq+128*1] 2287*c0909341SAndroid Build Coastguard Worker mova m1, [cq+128*2] 2288*c0909341SAndroid Build Coastguard Worker packssdw m1, [cq+128*3] 2289*c0909341SAndroid Build Coastguard Worker mova m2, [cq+128*4] 2290*c0909341SAndroid Build Coastguard Worker packssdw m2, [cq+128*5] 2291*c0909341SAndroid Build Coastguard Worker mova m3, [cq+128*6] 2292*c0909341SAndroid Build Coastguard Worker packssdw m3, [cq+128*7] 2293*c0909341SAndroid Build Coastguard Worker lea r7, [dstq+strideq*8] 2294*c0909341SAndroid Build Coastguard Worker REPX {mova [cq+128*x], m10}, 0, 1, 2, 3 2295*c0909341SAndroid Build Coastguard Worker REPX {paddsw x, m9}, m0, m1, m2, m3 2296*c0909341SAndroid Build Coastguard Worker REPX {mova [cq+128*x], m10}, 4, 5, 6, 7 2297*c0909341SAndroid Build Coastguard Worker REPX {psraw x, 3 }, m0, m1, m2, m3 2298*c0909341SAndroid Build Coastguard Worker add cq, 64 2299*c0909341SAndroid Build Coastguard Worker mova xm4, [dstq+strideq*0] 2300*c0909341SAndroid Build Coastguard Worker mova xm5, [dstq+strideq*1] 2301*c0909341SAndroid Build Coastguard Worker mova xm6, [dstq+strideq*2] 2302*c0909341SAndroid Build Coastguard Worker mova xm7, [dstq+r4 *1] 2303*c0909341SAndroid Build Coastguard Worker punpckhwd m8, m0, m1 2304*c0909341SAndroid Build Coastguard Worker vinserti32x4 ym4, [dstq+strideq*4], 1 2305*c0909341SAndroid Build Coastguard Worker punpcklwd m0, m1 2306*c0909341SAndroid Build Coastguard Worker vinserti32x4 ym5, [dstq+r5 *1], 1 2307*c0909341SAndroid Build Coastguard Worker punpckhwd m1, m2, m3 2308*c0909341SAndroid Build Coastguard Worker vinserti32x4 ym6, [dstq+r4 *2], 1 2309*c0909341SAndroid Build Coastguard Worker punpcklwd m2, m3 2310*c0909341SAndroid Build Coastguard Worker vinserti32x4 ym7, [dstq+r6 *1], 1 2311*c0909341SAndroid Build Coastguard Worker punpckhwd m3, m0, m8 2312*c0909341SAndroid Build Coastguard Worker vinserti32x4 m4, [r7 +strideq*0], 2 2313*c0909341SAndroid Build Coastguard Worker punpcklwd m0, m8 2314*c0909341SAndroid Build Coastguard Worker vinserti32x4 m5, [r7 +strideq*1], 2 2315*c0909341SAndroid Build Coastguard Worker punpckhwd m8, m2, m1 2316*c0909341SAndroid Build Coastguard Worker vinserti32x4 m6, [r7 +strideq*2], 2 2317*c0909341SAndroid Build Coastguard Worker punpcklwd m2, m1 2318*c0909341SAndroid Build Coastguard Worker vinserti32x4 m7, [r7 +r4 *1], 2 2319*c0909341SAndroid Build Coastguard Worker punpckhqdq m1, m0, m2 2320*c0909341SAndroid Build Coastguard Worker vinserti32x4 m4, [r7 +strideq*4], 3 2321*c0909341SAndroid Build Coastguard Worker punpcklqdq m0, m2 2322*c0909341SAndroid Build Coastguard Worker vinserti32x4 m5, [r7 +r5 *1], 3 2323*c0909341SAndroid Build Coastguard Worker punpcklqdq m2, m3, m8 2324*c0909341SAndroid Build Coastguard Worker vinserti32x4 m6, [r7 +r4 *2], 3 2325*c0909341SAndroid Build Coastguard Worker punpckhqdq m3, m8 2326*c0909341SAndroid Build Coastguard Worker vinserti32x4 m7, [r7 +r6 *1], 3 2327*c0909341SAndroid Build Coastguard Worker paddw m0, m4 2328*c0909341SAndroid Build Coastguard Worker paddw m1, m5 2329*c0909341SAndroid Build Coastguard Worker paddw m2, m6 2330*c0909341SAndroid Build Coastguard Worker paddw m3, m7 2331*c0909341SAndroid Build Coastguard Worker REPX {pmaxsw x, m10}, m0, m1, m2, m3 2332*c0909341SAndroid Build Coastguard Worker REPX {pminsw x, m11}, m0, m1, m2, m3 2333*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*0], xm0 2334*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*1], xm1 2335*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*2], xm2 2336*c0909341SAndroid Build Coastguard Worker mova [dstq+r4 *1], xm3 2337*c0909341SAndroid Build Coastguard Worker vextracti32x4 [dstq+strideq*4], ym0, 1 2338*c0909341SAndroid Build Coastguard Worker vextracti32x4 [dstq+r5 *1], ym1, 1 2339*c0909341SAndroid Build Coastguard Worker vextracti32x4 [dstq+r4 *2], ym2, 1 2340*c0909341SAndroid Build Coastguard Worker vextracti32x4 [dstq+r6 *1], ym3, 1 2341*c0909341SAndroid Build Coastguard Worker lea dstq, [r7+strideq*8] 2342*c0909341SAndroid Build Coastguard Worker vextracti32x4 [r7 +strideq*0], m0, 2 2343*c0909341SAndroid Build Coastguard Worker vextracti32x4 [r7 +strideq*1], m1, 2 2344*c0909341SAndroid Build Coastguard Worker vextracti32x4 [r7 +strideq*2], m2, 2 2345*c0909341SAndroid Build Coastguard Worker vextracti32x4 [r7 +r4 *1], m3, 2 2346*c0909341SAndroid Build Coastguard Worker vextracti32x4 [r7 +strideq*4], m0, 3 2347*c0909341SAndroid Build Coastguard Worker vextracti32x4 [r7 +r5 *1], m1, 3 2348*c0909341SAndroid Build Coastguard Worker vextracti32x4 [r7 +r4 *2], m2, 3 2349*c0909341SAndroid Build Coastguard Worker vextracti32x4 [r7 +r6 *1], m3, 3 2350*c0909341SAndroid Build Coastguard Worker add eobd, 0x80000000 2351*c0909341SAndroid Build Coastguard Worker jnc .loop 2352*c0909341SAndroid Build Coastguard Worker RET 2353*c0909341SAndroid Build Coastguard Worker 2354*c0909341SAndroid Build Coastguard Workercglobal inv_txfm_add_dct_dct_32x8_10bpc, 4, 7, 0, dst, stride, c, eob 2355*c0909341SAndroid Build Coastguard Worker%undef cmp 2356*c0909341SAndroid Build Coastguard Worker lea r5, [o_base] 2357*c0909341SAndroid Build Coastguard Worker test eobd, eobd 2358*c0909341SAndroid Build Coastguard Worker jz .dconly 2359*c0909341SAndroid Build Coastguard Worker mova m11, [o(permB)] 2360*c0909341SAndroid Build Coastguard Worker mova m0, [cq+64* 0] ; 0 1 2361*c0909341SAndroid Build Coastguard Worker mova m4, [cq+64* 1] ; 2 3 2362*c0909341SAndroid Build Coastguard Worker mova m1, [cq+64* 2] ; 4 5 2363*c0909341SAndroid Build Coastguard Worker mova m8, [cq+64* 3] ; 6 7 2364*c0909341SAndroid Build Coastguard Worker vpbroadcastd m12, [o(pd_2896)] 2365*c0909341SAndroid Build Coastguard Worker vpbroadcastd m13, [o(pd_2048)] 2366*c0909341SAndroid Build Coastguard Worker vpbroadcastd m14, [o(clip_18b_min)] 2367*c0909341SAndroid Build Coastguard Worker vpbroadcastd m15, [o(clip_18b_max)] 2368*c0909341SAndroid Build Coastguard Worker psrlq m10, m11, 32 2369*c0909341SAndroid Build Coastguard Worker%if WIN64 2370*c0909341SAndroid Build Coastguard Worker movaps [cq+16*0], xmm6 2371*c0909341SAndroid Build Coastguard Worker movaps [cq+16*1], xmm7 2372*c0909341SAndroid Build Coastguard Worker%endif 2373*c0909341SAndroid Build Coastguard Worker mova m16, m11 2374*c0909341SAndroid Build Coastguard Worker vpermi2q m16, m0, m1 ; 1 5 2375*c0909341SAndroid Build Coastguard Worker mova m17, m11 2376*c0909341SAndroid Build Coastguard Worker vpermi2q m17, m8, m4 ; 7 3 2377*c0909341SAndroid Build Coastguard Worker cmp eobd, 43 2378*c0909341SAndroid Build Coastguard Worker jl .fast 2379*c0909341SAndroid Build Coastguard Worker mova m18, [cq+64* 4] ; 8 9 2380*c0909341SAndroid Build Coastguard Worker mova m20, [cq+64* 5] ; 10 11 2381*c0909341SAndroid Build Coastguard Worker mova m6, [cq+64* 6] ; 12 13 2382*c0909341SAndroid Build Coastguard Worker mova m7, [cq+64* 7] ; 14 15 2383*c0909341SAndroid Build Coastguard Worker vpermt2q m0, m10, m18 ; 0 8 2384*c0909341SAndroid Build Coastguard Worker vpermt2q m18, m11, m6 ; 9 13 2385*c0909341SAndroid Build Coastguard Worker mova m19, m11 2386*c0909341SAndroid Build Coastguard Worker vpermi2q m19, m7, m20 ; 15 11 2387*c0909341SAndroid Build Coastguard Worker cmp eobd, 107 2388*c0909341SAndroid Build Coastguard Worker jge .full 2389*c0909341SAndroid Build Coastguard Worker vpermt2q m1, m10, m6 ; 4 12 2390*c0909341SAndroid Build Coastguard Worker vpermt2q m4, m10, m8 ; 2 6 2391*c0909341SAndroid Build Coastguard Worker vpermt2q m7, m10, m20 ; 14 10 2392*c0909341SAndroid Build Coastguard Worker mov r6d, 64*1 2393*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_10bpc).main_fast 2394*c0909341SAndroid Build Coastguard Worker call m(idct_16x8_internal_10bpc).main_fast 2395*c0909341SAndroid Build Coastguard Worker call .main_fast 2396*c0909341SAndroid Build Coastguard Worker call m(idct_16x16_internal_10bpc).main_end 2397*c0909341SAndroid Build Coastguard Worker jmp .end 2398*c0909341SAndroid Build Coastguard Worker.full: 2399*c0909341SAndroid Build Coastguard Worker mova m2, [cq+64* 8] ; 16 17 2400*c0909341SAndroid Build Coastguard Worker mova m5, [cq+64* 9] ; 18 19 2401*c0909341SAndroid Build Coastguard Worker mova m9, [cq+64*10] ; 20 21 2402*c0909341SAndroid Build Coastguard Worker mova m21, [cq+64*11] ; 22 23 2403*c0909341SAndroid Build Coastguard Worker vpermt2q m1, m10, m9 ; 4 20 2404*c0909341SAndroid Build Coastguard Worker vpermt2q m7, m10, m21 ; 14 22 2405*c0909341SAndroid Build Coastguard Worker vpermt2q m21, m11, m5 ; 23 19 2406*c0909341SAndroid Build Coastguard Worker vpermt2q m5, m10, m20 ; 18 10 2407*c0909341SAndroid Build Coastguard Worker mova m20, m11 2408*c0909341SAndroid Build Coastguard Worker vpermi2q m20, m2, m9 ; 17 21 2409*c0909341SAndroid Build Coastguard Worker mova m22, [cq+64*12] ; 24 25 2410*c0909341SAndroid Build Coastguard Worker mova m9, [cq+64*13] ; 26 27 2411*c0909341SAndroid Build Coastguard Worker mova m3, [cq+64*14] ; 28 29 2412*c0909341SAndroid Build Coastguard Worker mova m23, [cq+64*15] ; 30 31 2413*c0909341SAndroid Build Coastguard Worker vpermt2q m2, m10, m22 ; 16 24 2414*c0909341SAndroid Build Coastguard Worker vpermt2q m22, m11, m3 ; 25 29 2415*c0909341SAndroid Build Coastguard Worker vpermt2q m3, m10, m6 ; 28 12 2416*c0909341SAndroid Build Coastguard Worker vpermt2q m4, m10, m9 ; 2 26 2417*c0909341SAndroid Build Coastguard Worker mova m6, m10 2418*c0909341SAndroid Build Coastguard Worker vpermi2q m6, m23, m8 ; 30 6 2419*c0909341SAndroid Build Coastguard Worker vpermt2q m23, m11, m9 ; 31 27 2420*c0909341SAndroid Build Coastguard Worker mov r6d, 64*3 2421*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_10bpc).main 2422*c0909341SAndroid Build Coastguard Worker call m(idct_16x8_internal_10bpc).main 2423*c0909341SAndroid Build Coastguard Worker call .main 2424*c0909341SAndroid Build Coastguard Worker call m(idct_16x16_internal_10bpc).main_end 2425*c0909341SAndroid Build Coastguard Worker jmp .end 2426*c0909341SAndroid Build Coastguard Worker.fast: 2427*c0909341SAndroid Build Coastguard Worker vpermq m0, m10, m0 ; 0 0 2428*c0909341SAndroid Build Coastguard Worker vpermq m1, m10, m1 ; 4 4 2429*c0909341SAndroid Build Coastguard Worker vpermt2q m4, m10, m8 ; 2 6 2430*c0909341SAndroid Build Coastguard Worker xor r6d, r6d 2431*c0909341SAndroid Build Coastguard Worker call .main_fast2 2432*c0909341SAndroid Build Coastguard Worker call m(idct_16x16_internal_10bpc).main_end 2433*c0909341SAndroid Build Coastguard Worker.end: 2434*c0909341SAndroid Build Coastguard Worker%if WIN64 2435*c0909341SAndroid Build Coastguard Worker movaps xmm6, [cq+16*0] 2436*c0909341SAndroid Build Coastguard Worker movaps xmm7, [cq+16*1] 2437*c0909341SAndroid Build Coastguard Worker%endif 2438*c0909341SAndroid Build Coastguard Worker vzeroupper 2439*c0909341SAndroid Build Coastguard Worker call .transpose_8x32 2440*c0909341SAndroid Build Coastguard Worker pxor m14, m14 2441*c0909341SAndroid Build Coastguard Worker.zero_loop: 2442*c0909341SAndroid Build Coastguard Worker mova [cq+r6*4+64*3], m14 2443*c0909341SAndroid Build Coastguard Worker mova [cq+r6*4+64*2], m14 2444*c0909341SAndroid Build Coastguard Worker mova [cq+r6*4+64*1], m14 2445*c0909341SAndroid Build Coastguard Worker mova [cq+r6*4+64*0], m14 2446*c0909341SAndroid Build Coastguard Worker sub r6d, 64 2447*c0909341SAndroid Build Coastguard Worker jge .zero_loop 2448*c0909341SAndroid Build Coastguard Worker lea r5, [o_base_8bpc] 2449*c0909341SAndroid Build Coastguard Worker punpckhqdq m1, m0, m2 2450*c0909341SAndroid Build Coastguard Worker punpcklqdq m0, m2 2451*c0909341SAndroid Build Coastguard Worker punpcklqdq m2, m3, m4 2452*c0909341SAndroid Build Coastguard Worker punpckhqdq m3, m4 2453*c0909341SAndroid Build Coastguard Worker punpcklqdq m4, m5, m7 2454*c0909341SAndroid Build Coastguard Worker punpckhqdq m5, m7 2455*c0909341SAndroid Build Coastguard Worker punpckhqdq m7, m6, m8 2456*c0909341SAndroid Build Coastguard Worker punpcklqdq m6, m8 2457*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_32x8_8bpc).main 2458*c0909341SAndroid Build Coastguard Worker pxor m12, m12 2459*c0909341SAndroid Build Coastguard Worker.write_32x8_start: 2460*c0909341SAndroid Build Coastguard Worker vpbroadcastd m11, [pw_2048] 2461*c0909341SAndroid Build Coastguard Worker vpbroadcastd m13, [pixel_10bpc_max] 2462*c0909341SAndroid Build Coastguard Worker lea r3, [strideq*3] 2463*c0909341SAndroid Build Coastguard Worker.write_32x8: 2464*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m11 2465*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m11 2466*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m11 2467*c0909341SAndroid Build Coastguard Worker pmulhrsw m3, m11 2468*c0909341SAndroid Build Coastguard Worker call .write_32x4 2469*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m11, m4 2470*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m11, m5 2471*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m11, m6 2472*c0909341SAndroid Build Coastguard Worker pmulhrsw m3, m11, m7 2473*c0909341SAndroid Build Coastguard Worker.write_32x4: 2474*c0909341SAndroid Build Coastguard Worker paddw m0, [dstq+strideq*0] 2475*c0909341SAndroid Build Coastguard Worker paddw m1, [dstq+strideq*1] 2476*c0909341SAndroid Build Coastguard Worker paddw m2, [dstq+strideq*2] 2477*c0909341SAndroid Build Coastguard Worker paddw m3, [dstq+r3 ] 2478*c0909341SAndroid Build Coastguard Worker REPX {pmaxsw x, m12}, m0, m1, m2, m3 2479*c0909341SAndroid Build Coastguard Worker REPX {pminsw x, m13}, m0, m1, m2, m3 2480*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*0], m0 2481*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*1], m1 2482*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*2], m2 2483*c0909341SAndroid Build Coastguard Worker mova [dstq+r3 ], m3 2484*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+strideq*4] 2485*c0909341SAndroid Build Coastguard Worker ret 2486*c0909341SAndroid Build Coastguard Worker.dconly: 2487*c0909341SAndroid Build Coastguard Worker imul r6d, [cq], 181 2488*c0909341SAndroid Build Coastguard Worker mov [cq], eobd 2489*c0909341SAndroid Build Coastguard Worker or r3d, 8 2490*c0909341SAndroid Build Coastguard Worker add r6d, 640 2491*c0909341SAndroid Build Coastguard Worker sar r6d, 10 2492*c0909341SAndroid Build Coastguard Worker jmp m(inv_txfm_add_dct_dct_32x16_10bpc).dconly2 2493*c0909341SAndroid Build Coastguard WorkerALIGN function_align 2494*c0909341SAndroid Build Coastguard Worker.main_fast3: 2495*c0909341SAndroid Build Coastguard Worker ; assuming m0=in0 in0, m4=in2 in2, and m16=in1 in3 2496*c0909341SAndroid Build Coastguard Worker vbroadcasti32x4 m5, [o(pd_401_4076)] 2497*c0909341SAndroid Build Coastguard Worker pmulld m3, m0, m12 2498*c0909341SAndroid Build Coastguard Worker pmulld m4, m5 2499*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m13}, m3, m4 2500*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 12 }, m3, m4 ; m3=idct8:t0-7, m4=t8a t15a 2501*c0909341SAndroid Build Coastguard Worker 2502*c0909341SAndroid Build Coastguard Worker ; t8a t15a -> t8/9 t14/15 2503*c0909341SAndroid Build Coastguard Worker 2504*c0909341SAndroid Build Coastguard Worker vbroadcasti32x4 m5, [o(pd_3784_m3784)] 2505*c0909341SAndroid Build Coastguard Worker pshufd m7, m4, q1032 2506*c0909341SAndroid Build Coastguard Worker pmulld m6, m4, [o(pd_1567)]{bcstd} 2507*c0909341SAndroid Build Coastguard Worker pmulld m5, m7 2508*c0909341SAndroid Build Coastguard Worker paddd m6, m13 2509*c0909341SAndroid Build Coastguard Worker paddd m5, m6 2510*c0909341SAndroid Build Coastguard Worker psrad m5, 12 ; m5=t9a t14a 2511*c0909341SAndroid Build Coastguard Worker 2512*c0909341SAndroid Build Coastguard Worker ; t14a t9a -> t13/14 t9/10 [m5] & t8 15 -> t8/11a t12/15a [m4] 2513*c0909341SAndroid Build Coastguard Worker 2514*c0909341SAndroid Build Coastguard Worker shufps m6, m4, m5, q1032 ; t12 t13 2515*c0909341SAndroid Build Coastguard Worker shufps m8, m4, m5, q3210 ; t11a t10 2516*c0909341SAndroid Build Coastguard Worker pmulld m9, m6, m12 2517*c0909341SAndroid Build Coastguard Worker pmulld m7, m8, m12 2518*c0909341SAndroid Build Coastguard Worker paddd m9, m13 2519*c0909341SAndroid Build Coastguard Worker paddd m5, m9, m7 ; t12 t13a 2520*c0909341SAndroid Build Coastguard Worker psubd m4, m9, m7 ; t11 t10a 2521*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 12 }, m5, m4 2522*c0909341SAndroid Build Coastguard Worker 2523*c0909341SAndroid Build Coastguard Worker psubd m7, m3, m6 ; dct16 out15 out14 2524*c0909341SAndroid Build Coastguard Worker paddd m0, m3, m6 ; dct16 out0 out1 2525*c0909341SAndroid Build Coastguard Worker psubd m6, m3, m5 ; dct16 out12 out13 2526*c0909341SAndroid Build Coastguard Worker paddd m1, m3, m5 ; dct16 out3 out2 2527*c0909341SAndroid Build Coastguard Worker psubd m5, m3, m4 ; dct16 out11 out10 2528*c0909341SAndroid Build Coastguard Worker paddd m2, m3, m4 ; dct16 out4 out5 2529*c0909341SAndroid Build Coastguard Worker psubd m4, m3, m8 ; dct16 out8 out9 2530*c0909341SAndroid Build Coastguard Worker paddd m3, m8 ; dct16 out7 out6 2531*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m14}, m7, m0, m6, m1, m5, m2, m4, m3 2532*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m15}, m0, m1, m2, m3, m4, m5, m6, m7 2533*c0909341SAndroid Build Coastguard Worker 2534*c0909341SAndroid Build Coastguard Worker ; idct32_bottomhalf 2535*c0909341SAndroid Build Coastguard Worker vbroadcasti32x4 m18, [o(pd_201_m601)] 2536*c0909341SAndroid Build Coastguard Worker vbroadcasti32x4 m19, [o(pd_4091_4052)] 2537*c0909341SAndroid Build Coastguard Worker pmulld m17, m16, m19 2538*c0909341SAndroid Build Coastguard Worker pmulld m16, m18 2539*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m13}, m17, m16 2540*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 12 }, m17, m16 2541*c0909341SAndroid Build Coastguard Worker 2542*c0909341SAndroid Build Coastguard Worker ; m17: t31a t24a -> t30/31 t24/25, m16: t16a t23a -> t16/17 t22/23 [step2] 2543*c0909341SAndroid Build Coastguard Worker 2544*c0909341SAndroid Build Coastguard Worker vbroadcasti32x4 m10, [o(pd_799_m2276)] 2545*c0909341SAndroid Build Coastguard Worker vbroadcasti32x4 m11, [o(pd_4017_3406)] 2546*c0909341SAndroid Build Coastguard Worker pmulld m18, m17, m10 2547*c0909341SAndroid Build Coastguard Worker pmulld m19, m17, m11 2548*c0909341SAndroid Build Coastguard Worker pmulld m8, m16, m11 2549*c0909341SAndroid Build Coastguard Worker pmulld m9, m16, m10 2550*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m13}, m18, m19 2551*c0909341SAndroid Build Coastguard Worker psubd m18, m8 2552*c0909341SAndroid Build Coastguard Worker paddd m19, m9 2553*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 12 }, m18, m19 2554*c0909341SAndroid Build Coastguard Worker 2555*c0909341SAndroid Build Coastguard Worker ; m17=t31 t24 -> t28/31a t24/27a, m16=t16 t23 -> t16/19a t20/23a 2556*c0909341SAndroid Build Coastguard Worker ; m18=t17a t22a -> t17/18 t21/22, m19=t30a t25a -> t29/30 t25/26 2557*c0909341SAndroid Build Coastguard Worker 2558*c0909341SAndroid Build Coastguard Worker punpckhqdq m23, m17, m19 ; t24a t25 [or t27a t26] 2559*c0909341SAndroid Build Coastguard Worker punpcklqdq m20, m16, m18 ; t16a t17 [or t19a t18] 2560*c0909341SAndroid Build Coastguard Worker punpckhqdq m22, m16, m18 ; t23a t22 [or t20a t21] 2561*c0909341SAndroid Build Coastguard Worker punpcklqdq m16, m17, m19 ; t28a t29 [or t31a t30] 2562*c0909341SAndroid Build Coastguard Worker mova m21, m23 2563*c0909341SAndroid Build Coastguard Worker mova m18, m20 2564*c0909341SAndroid Build Coastguard Worker mova m17, m22 2565*c0909341SAndroid Build Coastguard Worker mova m19, m16 2566*c0909341SAndroid Build Coastguard Worker 2567*c0909341SAndroid Build Coastguard Worker jmp .main4 2568*c0909341SAndroid Build Coastguard Worker.main_fast2: ; bottom three-quarters are zero 2569*c0909341SAndroid Build Coastguard Worker vbroadcasti32x4 m8, [o(pd_799_4017)] 2570*c0909341SAndroid Build Coastguard Worker pmulld m8, m1 ; t4 t7 2571*c0909341SAndroid Build Coastguard Worker vpmulld m0, [o(pd_2896)] {1to16} ; t0 t1 2572*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m13}, m8, m0 2573*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 12 }, m8, m0 2574*c0909341SAndroid Build Coastguard Worker pmulld m3, m8, m12 2575*c0909341SAndroid Build Coastguard Worker mova m2, m0 ; t3 t2 2576*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_10bpc).main3 2577*c0909341SAndroid Build Coastguard Worker vbroadcasti32x4 m6, [o(pd_4076_3920)] 2578*c0909341SAndroid Build Coastguard Worker vbroadcasti32x4 m3, [o(pd_401_m1189)] 2579*c0909341SAndroid Build Coastguard Worker pmulld m6, m4 ; t15 t12 2580*c0909341SAndroid Build Coastguard Worker pmulld m4, m3 ; t9 t10 2581*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m13}, m6, m4 2582*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 12 }, m6, m4 2583*c0909341SAndroid Build Coastguard Worker mova m5, m6 ; t14 t13 2584*c0909341SAndroid Build Coastguard Worker mova m9, m4 ; t8 t11 2585*c0909341SAndroid Build Coastguard Worker call m(idct_16x8_internal_10bpc).main3 2586*c0909341SAndroid Build Coastguard Worker vbroadcasti32x4 m23, [o(pd_4091_3973)] 2587*c0909341SAndroid Build Coastguard Worker vbroadcasti32x4 m7, [o(pd_201_995)] 2588*c0909341SAndroid Build Coastguard Worker vbroadcasti32x4 m22, [o(pd_1380_601)] 2589*c0909341SAndroid Build Coastguard Worker vbroadcasti32x4 m9, [o(pd_3857_4052)] 2590*c0909341SAndroid Build Coastguard Worker pmulld m23, m16 ; t16 t20 2591*c0909341SAndroid Build Coastguard Worker pmulld m16, m7 ; t31 t27 2592*c0909341SAndroid Build Coastguard Worker pmulld m22, m17 ; -t19 -t25 2593*c0909341SAndroid Build Coastguard Worker pmulld m17, m9 ; t28 t24 2594*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m13}, m23, m16, m17 2595*c0909341SAndroid Build Coastguard Worker psubd m22, m13, m22 2596*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 12 }, m23, m16, m22, m17 2597*c0909341SAndroid Build Coastguard Worker mova m20, m23 ; t30 t26 2598*c0909341SAndroid Build Coastguard Worker mova m9, m16 ; t17 t21 2599*c0909341SAndroid Build Coastguard Worker mova m19, m22 ; t18 t22 2600*c0909341SAndroid Build Coastguard Worker mova m18, m17 ; t29 t25 2601*c0909341SAndroid Build Coastguard Worker jmp .main3 2602*c0909341SAndroid Build Coastguard Worker.main_fast: ; bottom half is zero 2603*c0909341SAndroid Build Coastguard Worker vbroadcasti32x4 m23, [o(pd_4091_3973)] 2604*c0909341SAndroid Build Coastguard Worker vbroadcasti32x4 m7, [o(pd_201_995)] 2605*c0909341SAndroid Build Coastguard Worker vbroadcasti32x4 m20, [o(pd_2751_2106)] 2606*c0909341SAndroid Build Coastguard Worker vbroadcasti32x4 m9, [o(pd_3035_3513)] 2607*c0909341SAndroid Build Coastguard Worker vbroadcasti32x4 m21, [o(pd_3703_3290)] 2608*c0909341SAndroid Build Coastguard Worker vbroadcasti32x4 m10, [o(pd_1751_2440)] 2609*c0909341SAndroid Build Coastguard Worker vbroadcasti32x4 m22, [o(pd_1380_601)] 2610*c0909341SAndroid Build Coastguard Worker vbroadcasti32x4 m11, [o(pd_3857_4052)] 2611*c0909341SAndroid Build Coastguard Worker pmulld m23, m16 ; t16a t20a 2612*c0909341SAndroid Build Coastguard Worker pmulld m16, m7 ; t31a t27a 2613*c0909341SAndroid Build Coastguard Worker pmulld m20, m19 ; -t17a -t21a 2614*c0909341SAndroid Build Coastguard Worker pmulld m19, m9 ; t30a t26a 2615*c0909341SAndroid Build Coastguard Worker pmulld m21, m18 ; t18a t22a 2616*c0909341SAndroid Build Coastguard Worker pmulld m18, m10 ; t29a t25a 2617*c0909341SAndroid Build Coastguard Worker pmulld m22, m17 ; -t19a -t25a 2618*c0909341SAndroid Build Coastguard Worker pmulld m17, m11 ; t28a t24a 2619*c0909341SAndroid Build Coastguard Worker psubd m20, m13, m20 2620*c0909341SAndroid Build Coastguard Worker psubd m22, m13, m22 2621*c0909341SAndroid Build Coastguard Worker jmp .main2 2622*c0909341SAndroid Build Coastguard Worker.main: 2623*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 16, 23, 7, 9, 10, _, 201_995, 4091_3973 2624*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 20, 19, 7, 9, 10, _, 3035_3513, 2751_2106 2625*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 18, 21, 7, 9, 10, _, 1751_2440, 3703_3290 2626*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 22, 17, 7, 9, 10, _, 3857_4052, 1380_601 2627*c0909341SAndroid Build Coastguard Worker paddd m20, m13 2628*c0909341SAndroid Build Coastguard Worker paddd m22, m13 2629*c0909341SAndroid Build Coastguard Worker.main2: 2630*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m13}, m16, m23, m19 2631*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 12 }, m16, m20, m23, m19 2632*c0909341SAndroid Build Coastguard Worker psubd m9, m16, m20 ; t17 t21 2633*c0909341SAndroid Build Coastguard Worker paddd m16, m20 ; t16 t20 2634*c0909341SAndroid Build Coastguard Worker psubd m20, m23, m19 ; t30 t26 2635*c0909341SAndroid Build Coastguard Worker paddd m23, m19 ; t31 t27 2636*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m14}, m9, m16, m20, m23 2637*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m13}, m21, m18, m17 2638*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 12 }, m18, m22, m21, m17 2639*c0909341SAndroid Build Coastguard Worker psubd m19, m22, m18 ; t18 t22 2640*c0909341SAndroid Build Coastguard Worker paddd m22, m18 ; t19 t23 2641*c0909341SAndroid Build Coastguard Worker psubd m18, m17, m21 ; t29 t25 2642*c0909341SAndroid Build Coastguard Worker paddd m17, m21 ; t28 t24 2643*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m14}, m19, m22, m18, m17 2644*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m15}, m20, m9, m18, m19, m16, m23, m22, m17 2645*c0909341SAndroid Build Coastguard Worker.main3: 2646*c0909341SAndroid Build Coastguard Worker vbroadcasti32x4 m11, [o(pd_4017_2276)] 2647*c0909341SAndroid Build Coastguard Worker vbroadcasti32x4 m10, [o(pd_799_3406)] 2648*c0909341SAndroid Build Coastguard Worker psubd m7, m0, m6 ; dct16 out15 out14 2649*c0909341SAndroid Build Coastguard Worker paddd m0, m6 ; dct16 out0 out1 2650*c0909341SAndroid Build Coastguard Worker psubd m6, m1, m5 ; dct16 out12 out13 2651*c0909341SAndroid Build Coastguard Worker paddd m1, m5 ; dct16 out3 out2 2652*c0909341SAndroid Build Coastguard Worker psubd m5, m2, m4 ; dct16 out11 out10 2653*c0909341SAndroid Build Coastguard Worker paddd m2, m4 ; dct16 out4 out5 2654*c0909341SAndroid Build Coastguard Worker psubd m4, m3, m8 ; dct16 out8 out9 2655*c0909341SAndroid Build Coastguard Worker paddd m3, m8 ; dct16 out7 out6 2656*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 20, 9, 8, 21, _, 13, 10, 11 2657*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 18, 19, 8, 21, _, 13, 10, 11, 2 2658*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m14}, m7, m0, m6, m1, m5, m2, m4, m3 2659*c0909341SAndroid Build Coastguard Worker punpckhqdq m21, m16, m20 ; t20 t21a 2660*c0909341SAndroid Build Coastguard Worker punpcklqdq m16, m20 ; t16 t17a 2661*c0909341SAndroid Build Coastguard Worker punpcklqdq m20, m22, m19 ; t19 t18a 2662*c0909341SAndroid Build Coastguard Worker punpckhqdq m22, m19 ; t23 t22a 2663*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m15}, m0, m1, m2, m3, m4, m5, m6, m7 2664*c0909341SAndroid Build Coastguard Worker punpcklqdq m19, m23, m9 ; t31 t30a 2665*c0909341SAndroid Build Coastguard Worker punpckhqdq m23, m9 ; t27 t26a 2666*c0909341SAndroid Build Coastguard Worker punpckhqdq m9, m17, m18 ; t24 t25a 2667*c0909341SAndroid Build Coastguard Worker punpcklqdq m17, m18 ; t28 t29a 2668*c0909341SAndroid Build Coastguard Worker psubd m18, m16, m20 ; t19a t18 2669*c0909341SAndroid Build Coastguard Worker paddd m20, m16 ; t16a t17 2670*c0909341SAndroid Build Coastguard Worker psubd m16, m19, m17 ; t28a t29 2671*c0909341SAndroid Build Coastguard Worker paddd m19, m17 ; t31a t30 2672*c0909341SAndroid Build Coastguard Worker psubd m17, m22, m21 ; t20a t21 2673*c0909341SAndroid Build Coastguard Worker paddd m22, m21 ; t23a t22 2674*c0909341SAndroid Build Coastguard Worker psubd m21, m9, m23 ; t27a t26 2675*c0909341SAndroid Build Coastguard Worker paddd m23, m9 ; t24a t25 2676*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m14}, m18, m16, m17, m21 2677*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m15}, m16, m18, m21, m17 2678*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m14}, m20, m22, m19, m23 2679*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m15}, m20, m22, m19, m23 2680*c0909341SAndroid Build Coastguard Worker.main4: 2681*c0909341SAndroid Build Coastguard Worker vpbroadcastd m11, [o(pd_3784)] 2682*c0909341SAndroid Build Coastguard Worker vpbroadcastd m10, [o(pd_1567)] 2683*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 16, 18, 8, 9, _, 13, 10, 11 2684*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 21, 17, 8, 9, _, 13, 10, 11, 2 2685*c0909341SAndroid Build Coastguard Worker paddd m9, m20, m22 ; t16 t17a 2686*c0909341SAndroid Build Coastguard Worker psubd m20, m22 ; t23 t22a 2687*c0909341SAndroid Build Coastguard Worker paddd m22, m19, m23 ; t31 t30a 2688*c0909341SAndroid Build Coastguard Worker psubd m19, m23 ; t24 t25a 2689*c0909341SAndroid Build Coastguard Worker psubd m23, m16, m17 ; t20a t21 2690*c0909341SAndroid Build Coastguard Worker paddd m16, m17 ; t19a t18 2691*c0909341SAndroid Build Coastguard Worker psubd m17, m18, m21 ; t27a t26 2692*c0909341SAndroid Build Coastguard Worker paddd m21, m18 ; t28a t29 2693*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m14}, m20, m19, m23, m17 2694*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m15}, m19, m20, m17, m23 2695*c0909341SAndroid Build Coastguard Worker REPX {pmulld x, m12}, m19, m20, m17, m23 2696*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m14}, m22, m21, m16, m9 2697*c0909341SAndroid Build Coastguard Worker paddd m19, m13 2698*c0909341SAndroid Build Coastguard Worker paddd m17, m13 2699*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m15}, m22, m21, m16, m9 2700*c0909341SAndroid Build Coastguard Worker psubd m18, m19, m20 ; t23a t22 2701*c0909341SAndroid Build Coastguard Worker paddd m19, m20 ; t24a t25 2702*c0909341SAndroid Build Coastguard Worker paddd m20, m17, m23 ; t27 t26a 2703*c0909341SAndroid Build Coastguard Worker psubd m17, m23 ; t20 t21a 2704*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 12 }, m20, m19, m18, m17 2705*c0909341SAndroid Build Coastguard Worker ret 2706*c0909341SAndroid Build Coastguard Worker.transpose_8x32: 2707*c0909341SAndroid Build Coastguard Worker mova m10, [o(idct32x8p)] 2708*c0909341SAndroid Build Coastguard Worker psrlw m8, m10, 8 2709*c0909341SAndroid Build Coastguard Worker mova m9, m8 2710*c0909341SAndroid Build Coastguard Worker vpermi2w m8, m1, m5 2711*c0909341SAndroid Build Coastguard Worker vpermt2w m1, m10, m5 2712*c0909341SAndroid Build Coastguard Worker vprold m5, m9, 16 2713*c0909341SAndroid Build Coastguard Worker vpermi2w m9, m3, m7 2714*c0909341SAndroid Build Coastguard Worker vpermt2w m3, m10, m7 2715*c0909341SAndroid Build Coastguard Worker vprold m10, 16 2716*c0909341SAndroid Build Coastguard Worker mova m7, m5 2717*c0909341SAndroid Build Coastguard Worker vpermi2w m5, m0, m4 2718*c0909341SAndroid Build Coastguard Worker vpermt2w m0, m10, m4 2719*c0909341SAndroid Build Coastguard Worker vpermi2w m7, m2, m6 2720*c0909341SAndroid Build Coastguard Worker vpermt2w m2, m10, m6 2721*c0909341SAndroid Build Coastguard Worker punpckhdq m6, m5, m8 2722*c0909341SAndroid Build Coastguard Worker punpckldq m5, m8 2723*c0909341SAndroid Build Coastguard Worker punpckhdq m8, m7, m9 2724*c0909341SAndroid Build Coastguard Worker punpckldq m7, m9 2725*c0909341SAndroid Build Coastguard Worker punpckhdq m4, m2, m3 2726*c0909341SAndroid Build Coastguard Worker punpckldq m2, m3 2727*c0909341SAndroid Build Coastguard Worker punpckhdq m3, m0, m1 2728*c0909341SAndroid Build Coastguard Worker punpckldq m0, m1 2729*c0909341SAndroid Build Coastguard Worker ret 2730*c0909341SAndroid Build Coastguard Worker 2731*c0909341SAndroid Build Coastguard Workercglobal inv_txfm_add_identity_identity_32x8_10bpc, 4, 7, 10, dst, stride, c, eob 2732*c0909341SAndroid Build Coastguard Worker vpbroadcastd m5, [pw_4096] 2733*c0909341SAndroid Build Coastguard Worker lea r4, [strideq*3] 2734*c0909341SAndroid Build Coastguard Worker mova m6, [idtx32x8p] 2735*c0909341SAndroid Build Coastguard Worker lea r5, [strideq*5] 2736*c0909341SAndroid Build Coastguard Worker vpbroadcastd m9, [pixel_10bpc_max] 2737*c0909341SAndroid Build Coastguard Worker lea r6, [strideq+r4*2] 2738*c0909341SAndroid Build Coastguard Worker pxor m8, m8 2739*c0909341SAndroid Build Coastguard Worker sub eobd, 107 2740*c0909341SAndroid Build Coastguard Worker psrlw m7, m6, 8 2741*c0909341SAndroid Build Coastguard Worker.loop: 2742*c0909341SAndroid Build Coastguard Worker mova m0, [cq+64*0] 2743*c0909341SAndroid Build Coastguard Worker packssdw m0, [cq+64*1] ; 02 13 2744*c0909341SAndroid Build Coastguard Worker mova m1, [cq+64*2] 2745*c0909341SAndroid Build Coastguard Worker packssdw m1, [cq+64*3] ; 46 57 2746*c0909341SAndroid Build Coastguard Worker mova m2, [cq+64*4] 2747*c0909341SAndroid Build Coastguard Worker packssdw m2, [cq+64*5] ; 8a 9b 2748*c0909341SAndroid Build Coastguard Worker mova m3, [cq+64*6] 2749*c0909341SAndroid Build Coastguard Worker packssdw m3, [cq+64*7] ; ce df 2750*c0909341SAndroid Build Coastguard Worker REPX {pmulhrsw x, m5}, m0, m1, m2, m3 2751*c0909341SAndroid Build Coastguard Worker REPX {mova [cq+64*x], m8}, 0, 1, 2, 3 2752*c0909341SAndroid Build Coastguard Worker mova m4, m6 2753*c0909341SAndroid Build Coastguard Worker vpermi2w m4, m1, m3 2754*c0909341SAndroid Build Coastguard Worker vpermt2w m1, m7, m3 2755*c0909341SAndroid Build Coastguard Worker REPX {mova [cq+64*x], m8}, 4, 5, 6, 7 2756*c0909341SAndroid Build Coastguard Worker mova m3, m7 2757*c0909341SAndroid Build Coastguard Worker vpermi2w m3, m0, m2 2758*c0909341SAndroid Build Coastguard Worker vpermt2w m0, m6, m2 2759*c0909341SAndroid Build Coastguard Worker add cq, 64*8 2760*c0909341SAndroid Build Coastguard Worker punpcklqdq m2, m3, m1 ; 4 5 2761*c0909341SAndroid Build Coastguard Worker punpckhqdq m3, m1 ; 6 7 2762*c0909341SAndroid Build Coastguard Worker punpckhqdq m1, m0, m4 ; 2 3 2763*c0909341SAndroid Build Coastguard Worker punpcklqdq m0, m4 ; 0 1 2764*c0909341SAndroid Build Coastguard Worker mova ym4, [dstq+strideq*0] 2765*c0909341SAndroid Build Coastguard Worker vinserti32x8 m4, [dstq+strideq*1], 1 2766*c0909341SAndroid Build Coastguard Worker paddw m0, m4 2767*c0909341SAndroid Build Coastguard Worker mova ym4, [dstq+strideq*2] 2768*c0909341SAndroid Build Coastguard Worker vinserti32x8 m4, [dstq+r4 *1], 1 2769*c0909341SAndroid Build Coastguard Worker paddw m1, m4 2770*c0909341SAndroid Build Coastguard Worker mova ym4, [dstq+strideq*4] 2771*c0909341SAndroid Build Coastguard Worker vinserti32x8 m4, [dstq+r5 *1], 1 2772*c0909341SAndroid Build Coastguard Worker paddw m2, m4 2773*c0909341SAndroid Build Coastguard Worker mova ym4, [dstq+r4 *2] 2774*c0909341SAndroid Build Coastguard Worker vinserti32x8 m4, [dstq+r6 *1], 1 2775*c0909341SAndroid Build Coastguard Worker paddw m3, m4 2776*c0909341SAndroid Build Coastguard Worker REPX {pmaxsw x, m8}, m0, m1, m2, m3 2777*c0909341SAndroid Build Coastguard Worker REPX {pminsw x, m9}, m0, m1, m2, m3 2778*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*0], ym0 2779*c0909341SAndroid Build Coastguard Worker vextracti32x8 [dstq+strideq*1], m0, 1 2780*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*2], ym1 2781*c0909341SAndroid Build Coastguard Worker vextracti32x8 [dstq+r4 *1], m1, 1 2782*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*4], ym2 2783*c0909341SAndroid Build Coastguard Worker vextracti32x8 [dstq+r5 *1], m2, 1 2784*c0909341SAndroid Build Coastguard Worker mova [dstq+r4 *2], ym3 2785*c0909341SAndroid Build Coastguard Worker vextracti32x8 [dstq+r6 *1], m3, 1 2786*c0909341SAndroid Build Coastguard Worker add dstq, 32 2787*c0909341SAndroid Build Coastguard Worker add eobd, 0x80000000 2788*c0909341SAndroid Build Coastguard Worker jnc .loop 2789*c0909341SAndroid Build Coastguard Worker RET 2790*c0909341SAndroid Build Coastguard Worker 2791*c0909341SAndroid Build Coastguard Workercglobal inv_txfm_add_dct_dct_16x32_10bpc, 4, 7, 0, dst, stride, c, eob 2792*c0909341SAndroid Build Coastguard Worker%undef cmp 2793*c0909341SAndroid Build Coastguard Worker lea r5, [o_base] 2794*c0909341SAndroid Build Coastguard Worker test eobd, eobd 2795*c0909341SAndroid Build Coastguard Worker jz .dconly 2796*c0909341SAndroid Build Coastguard Worker vpbroadcastd m12, [o(pd_2896)] 2797*c0909341SAndroid Build Coastguard Worker vpbroadcastd m13, [o(pd_2048)] 2798*c0909341SAndroid Build Coastguard Worker vpbroadcastd m14, [o(clip_18b_min)] 2799*c0909341SAndroid Build Coastguard Worker vpbroadcastd m15, [o(clip_18b_max)] 2800*c0909341SAndroid Build Coastguard Worker%if WIN64 2801*c0909341SAndroid Build Coastguard Worker movaps [rsp+ 8], xmm6 2802*c0909341SAndroid Build Coastguard Worker movaps [rsp+24], xmm7 2803*c0909341SAndroid Build Coastguard Worker%endif 2804*c0909341SAndroid Build Coastguard Worker cmp eobd, 36 2805*c0909341SAndroid Build Coastguard Worker jl .fast 2806*c0909341SAndroid Build Coastguard Worker call .pass1 2807*c0909341SAndroid Build Coastguard Worker cmp eobd, 151 2808*c0909341SAndroid Build Coastguard Worker jge .full 2809*c0909341SAndroid Build Coastguard Worker lea r5, [o_base_8bpc] 2810*c0909341SAndroid Build Coastguard Worker pxor m9, m9 2811*c0909341SAndroid Build Coastguard Worker punpcklwd m8, m1, m1 ; 2 2812*c0909341SAndroid Build Coastguard Worker punpckhwd m14, m1, m1 ; 3 2813*c0909341SAndroid Build Coastguard Worker punpcklwd m1, m3, m3 ; 6 2814*c0909341SAndroid Build Coastguard Worker punpckhwd m15, m3, m3 ; 7 2815*c0909341SAndroid Build Coastguard Worker punpcklwd m3, m6, m6 ; 12 2816*c0909341SAndroid Build Coastguard Worker punpckhwd m19, m6, m6 ; 13 2817*c0909341SAndroid Build Coastguard Worker punpcklwd m6, m9, m4 ; __ 8 2818*c0909341SAndroid Build Coastguard Worker punpckhwd m20, m4, m4 ; 9 2819*c0909341SAndroid Build Coastguard Worker punpckhwd m16, m5, m5 ; 11 2820*c0909341SAndroid Build Coastguard Worker punpcklwd m5, m5 ; 10 2821*c0909341SAndroid Build Coastguard Worker punpcklwd m9, m0 ; __ 0 2822*c0909341SAndroid Build Coastguard Worker punpckhwd m21, m0, m0 ; 1 2823*c0909341SAndroid Build Coastguard Worker punpcklwd m0, m7, m7 ; 14 2824*c0909341SAndroid Build Coastguard Worker punpckhwd m17, m7, m7 ; 15 2825*c0909341SAndroid Build Coastguard Worker punpcklwd m7, m2, m2 ; 4 2826*c0909341SAndroid Build Coastguard Worker punpckhwd m18, m2, m2 ; 5 2827*c0909341SAndroid Build Coastguard Worker call m(idct_16x16_internal_8bpc).main_fast 2828*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_16x32_8bpc).main_oddhalf_fast 2829*c0909341SAndroid Build Coastguard Worker mov r6d, 64*3 2830*c0909341SAndroid Build Coastguard Worker pxor m8, m8 2831*c0909341SAndroid Build Coastguard Worker.zero_loop: 2832*c0909341SAndroid Build Coastguard Worker REPX {mova [cq+r6*8+128*x], m8}, 3, 2, 1, 0 2833*c0909341SAndroid Build Coastguard Worker sub r6d, 64 2834*c0909341SAndroid Build Coastguard Worker jge .zero_loop 2835*c0909341SAndroid Build Coastguard Worker jmp .pass2_end 2836*c0909341SAndroid Build Coastguard Worker.full: 2837*c0909341SAndroid Build Coastguard Worker mova [cq+128*0], m0 2838*c0909341SAndroid Build Coastguard Worker mova [cq+128*1], m1 2839*c0909341SAndroid Build Coastguard Worker mova [cq+128*2], m2 2840*c0909341SAndroid Build Coastguard Worker mova [cq+128*3], m3 2841*c0909341SAndroid Build Coastguard Worker mova [cq+128*4], m4 2842*c0909341SAndroid Build Coastguard Worker mova [cq+128*5], m5 2843*c0909341SAndroid Build Coastguard Worker mova [cq+128*6], m6 2844*c0909341SAndroid Build Coastguard Worker mova [cq+128*7], m7 2845*c0909341SAndroid Build Coastguard Worker add cq, 64 2846*c0909341SAndroid Build Coastguard Worker call .pass1 2847*c0909341SAndroid Build Coastguard Worker mova m9, [cq-64* 1] ; 0 1 2848*c0909341SAndroid Build Coastguard Worker mova m14, [cq+64* 1] ; 2 3 2849*c0909341SAndroid Build Coastguard Worker mova m18, [cq+64* 3] ; 4 5 2850*c0909341SAndroid Build Coastguard Worker mova m15, [cq+64* 5] ; 6 7 2851*c0909341SAndroid Build Coastguard Worker mova m20, [cq+64* 7] ; 8 9 2852*c0909341SAndroid Build Coastguard Worker mova m16, [cq+64* 9] ; 10 11 2853*c0909341SAndroid Build Coastguard Worker mova m22, [cq+64*11] ; 12 13 2854*c0909341SAndroid Build Coastguard Worker mova m19, [cq+64*13] ; 14 15 2855*c0909341SAndroid Build Coastguard Worker lea r5, [o_base_8bpc] 2856*c0909341SAndroid Build Coastguard Worker punpcklwd m8, m7, m14 ; 30 2 2857*c0909341SAndroid Build Coastguard Worker punpckhwd m21, m7, m9 ; 31 1 2858*c0909341SAndroid Build Coastguard Worker punpcklwd m7, m6, m18 ; 28 4 2859*c0909341SAndroid Build Coastguard Worker punpckhwd m14, m6 ; 3 29 2860*c0909341SAndroid Build Coastguard Worker punpcklwd m9, m0, m9 ; 16 0 2861*c0909341SAndroid Build Coastguard Worker punpckhwd m17, m19, m0 ; 15 17 2862*c0909341SAndroid Build Coastguard Worker punpcklwd m0, m19, m1 ; 14 18 2863*c0909341SAndroid Build Coastguard Worker punpckhwd m19, m1, m22 ; 19 13 2864*c0909341SAndroid Build Coastguard Worker punpcklwd m1, m15, m5 ; 6 26 2865*c0909341SAndroid Build Coastguard Worker punpckhwd m18, m5, m18 ; 27 5 2866*c0909341SAndroid Build Coastguard Worker punpcklwd m6, m4, m20 ; 24 8 2867*c0909341SAndroid Build Coastguard Worker punpckhwd m15, m4 ; 7 25 2868*c0909341SAndroid Build Coastguard Worker punpcklwd m5, m3, m16 ; 22 10 2869*c0909341SAndroid Build Coastguard Worker punpckhwd m20, m3, m20 ; 23 9 2870*c0909341SAndroid Build Coastguard Worker punpcklwd m3, m22, m2 ; 12 20 2871*c0909341SAndroid Build Coastguard Worker punpckhwd m16, m2 ; 11 21 2872*c0909341SAndroid Build Coastguard Worker call m(idct_16x16_internal_8bpc).main2 2873*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_16x32_8bpc).main_oddhalf 2874*c0909341SAndroid Build Coastguard Worker mov r6d, 32*7 2875*c0909341SAndroid Build Coastguard Worker pxor m8, m8 2876*c0909341SAndroid Build Coastguard Worker.full_zero_loop: 2877*c0909341SAndroid Build Coastguard Worker REPX {mova [cq+r6*8+64*x], m8}, 2, 1, 0, -1 2878*c0909341SAndroid Build Coastguard Worker sub r6d, 32 2879*c0909341SAndroid Build Coastguard Worker jge .full_zero_loop 2880*c0909341SAndroid Build Coastguard Worker jmp .pass2_end 2881*c0909341SAndroid Build Coastguard Worker.fast: 2882*c0909341SAndroid Build Coastguard Worker mova ym0, [cq+128*0] 2883*c0909341SAndroid Build Coastguard Worker mova ym2, [cq+128*4] 2884*c0909341SAndroid Build Coastguard Worker movshdup m8, [o(permB)] 2885*c0909341SAndroid Build Coastguard Worker mova ym1, [cq+128*2] 2886*c0909341SAndroid Build Coastguard Worker mova ym3, [cq+128*6] 2887*c0909341SAndroid Build Coastguard Worker mova ym4, [cq+128*1] 2888*c0909341SAndroid Build Coastguard Worker mova ym5, [cq+128*3] 2889*c0909341SAndroid Build Coastguard Worker mova ym6, [cq+128*5] 2890*c0909341SAndroid Build Coastguard Worker mova ym7, [cq+128*7] 2891*c0909341SAndroid Build Coastguard Worker vpermt2q m0, m8, m2 ; 0 4 2892*c0909341SAndroid Build Coastguard Worker vpermt2q m1, m8, m3 ; 2 6 2893*c0909341SAndroid Build Coastguard Worker vpermt2q m4, m8, m5 ; 1 3 2894*c0909341SAndroid Build Coastguard Worker vpermt2q m7, m8, m6 ; 7 5 2895*c0909341SAndroid Build Coastguard Worker REPX {pmulld x, m12}, m0, m1, m4, m7 2896*c0909341SAndroid Build Coastguard Worker pxor ym16, ym16 2897*c0909341SAndroid Build Coastguard Worker mova [cq+128*0], ym16 2898*c0909341SAndroid Build Coastguard Worker REPX {vmovdqa32 [cq+128*x], ym16}, 1, 2, 3, 4, 5, 6, 7 2899*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m13}, m0, m1, m4, m7 2900*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 12 }, m0, m1, m4, m7 2901*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_10bpc).main_fast 2902*c0909341SAndroid Build Coastguard Worker call m(idct_16x8_internal_10bpc).main_fast 2903*c0909341SAndroid Build Coastguard Worker vpbroadcastd m11, [o(pd_1)] 2904*c0909341SAndroid Build Coastguard Worker call m(idct_8x16_internal_10bpc).main_end2 2905*c0909341SAndroid Build Coastguard Worker mova m8, [o(idct8x32p)] 2906*c0909341SAndroid Build Coastguard Worker packssdw m0, m4 2907*c0909341SAndroid Build Coastguard Worker packssdw m1, m5 2908*c0909341SAndroid Build Coastguard Worker packssdw m2, m6 2909*c0909341SAndroid Build Coastguard Worker packssdw m3, m7 2910*c0909341SAndroid Build Coastguard Worker mova m6, [dup16_perm] 2911*c0909341SAndroid Build Coastguard Worker vpermb m0, m8, m0 2912*c0909341SAndroid Build Coastguard Worker vpermb m2, m8, m2 2913*c0909341SAndroid Build Coastguard Worker vprold m8, 16 2914*c0909341SAndroid Build Coastguard Worker vpermb m1, m8, m1 2915*c0909341SAndroid Build Coastguard Worker vpermb m3, m8, m3 2916*c0909341SAndroid Build Coastguard Worker punpckldq m4, m0, m2 2917*c0909341SAndroid Build Coastguard Worker punpckhdq m0, m2 2918*c0909341SAndroid Build Coastguard Worker punpckldq m2, m1, m3 2919*c0909341SAndroid Build Coastguard Worker punpckhdq m1, m3 2920*c0909341SAndroid Build Coastguard Worker punpckldq m21, m4, m2 2921*c0909341SAndroid Build Coastguard Worker punpckhdq m14, m4, m2 2922*c0909341SAndroid Build Coastguard Worker punpckldq m18, m0, m1 2923*c0909341SAndroid Build Coastguard Worker punpckhdq m15, m0, m1 2924*c0909341SAndroid Build Coastguard Worker vpermb m8, m6, m14 ; 2 2925*c0909341SAndroid Build Coastguard Worker vpermb m1, m6, m15 ; 6 2926*c0909341SAndroid Build Coastguard Worker vpermb m7, m6, m18 ; 4 2927*c0909341SAndroid Build Coastguard Worker pmovzxwd m9, ym21 ; 0 2928*c0909341SAndroid Build Coastguard Worker vpord m6, [o(pb_32)] {1to16} 2929*c0909341SAndroid Build Coastguard Worker lea r5, [o_base_8bpc] 2930*c0909341SAndroid Build Coastguard Worker vpermb m21, m6, m21 ; 1 2931*c0909341SAndroid Build Coastguard Worker vpermb m15, m6, m15 ; 7 2932*c0909341SAndroid Build Coastguard Worker vpermb m18, m6, m18 ; 5 2933*c0909341SAndroid Build Coastguard Worker vpermb m14, m6, m14 ; 3 2934*c0909341SAndroid Build Coastguard Worker pslld m9, 16 2935*c0909341SAndroid Build Coastguard Worker call m(idct_16x16_internal_8bpc).main_fast2 2936*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_16x32_8bpc).main_oddhalf_fast2 2937*c0909341SAndroid Build Coastguard Worker.pass2_end: 2938*c0909341SAndroid Build Coastguard Worker movshdup m22, [permC] 2939*c0909341SAndroid Build Coastguard Worker vpbroadcastd m11, [pw_2048] 2940*c0909341SAndroid Build Coastguard Worker vpbroadcastd m13, [pixel_10bpc_max] 2941*c0909341SAndroid Build Coastguard Worker lea r6, [strideq*3] 2942*c0909341SAndroid Build Coastguard Worker pxor m12, m12 2943*c0909341SAndroid Build Coastguard Worker psrlq m23, m22, 8 2944*c0909341SAndroid Build Coastguard Worker vpermq m8, m22, m0 2945*c0909341SAndroid Build Coastguard Worker vpermq m9, m23, m1 2946*c0909341SAndroid Build Coastguard Worker call m(idct_16x8_internal_10bpc).write_16x4 2947*c0909341SAndroid Build Coastguard Worker vpermq m8, m22, m2 2948*c0909341SAndroid Build Coastguard Worker vpermq m9, m23, m3 2949*c0909341SAndroid Build Coastguard Worker call m(idct_16x8_internal_10bpc).write_16x4 2950*c0909341SAndroid Build Coastguard Worker vpermq m8, m22, m4 2951*c0909341SAndroid Build Coastguard Worker vpermq m9, m23, m5 2952*c0909341SAndroid Build Coastguard Worker call m(idct_16x8_internal_10bpc).write_16x4 2953*c0909341SAndroid Build Coastguard Worker vpermq m8, m22, m6 2954*c0909341SAndroid Build Coastguard Worker vpermq m9, m23, m7 2955*c0909341SAndroid Build Coastguard Worker call m(idct_16x8_internal_10bpc).write_16x4 2956*c0909341SAndroid Build Coastguard Worker vpermq m8, m22, m14 2957*c0909341SAndroid Build Coastguard Worker vpermq m9, m23, m15 2958*c0909341SAndroid Build Coastguard Worker call m(idct_16x8_internal_10bpc).write_16x4 2959*c0909341SAndroid Build Coastguard Worker vpermq m8, m22, m16 2960*c0909341SAndroid Build Coastguard Worker vpermq m9, m23, m17 2961*c0909341SAndroid Build Coastguard Worker call m(idct_16x8_internal_10bpc).write_16x4 2962*c0909341SAndroid Build Coastguard Worker vpermq m8, m22, m18 2963*c0909341SAndroid Build Coastguard Worker vpermq m9, m23, m19 2964*c0909341SAndroid Build Coastguard Worker call m(idct_16x8_internal_10bpc).write_16x4 2965*c0909341SAndroid Build Coastguard Worker vpermq m8, m22, m20 2966*c0909341SAndroid Build Coastguard Worker vpermq m9, m23, m21 2967*c0909341SAndroid Build Coastguard Worker%if WIN64 2968*c0909341SAndroid Build Coastguard Worker movaps xmm6, [rsp+ 8] 2969*c0909341SAndroid Build Coastguard Worker movaps xmm7, [rsp+24] 2970*c0909341SAndroid Build Coastguard Worker%endif 2971*c0909341SAndroid Build Coastguard Worker vzeroupper 2972*c0909341SAndroid Build Coastguard Worker jmp m(idct_16x8_internal_10bpc).write_16x4 2973*c0909341SAndroid Build Coastguard Worker.pass1: 2974*c0909341SAndroid Build Coastguard Worker pmulld m0, m12, [cq+128* 0] 2975*c0909341SAndroid Build Coastguard Worker pmulld m1, m12, [cq+128* 2] 2976*c0909341SAndroid Build Coastguard Worker pmulld m2, m12, [cq+128* 4] 2977*c0909341SAndroid Build Coastguard Worker pmulld m3, m12, [cq+128* 6] 2978*c0909341SAndroid Build Coastguard Worker pmulld m4, m12, [cq+128* 8] 2979*c0909341SAndroid Build Coastguard Worker pmulld m5, m12, [cq+128*10] 2980*c0909341SAndroid Build Coastguard Worker pmulld m6, m12, [cq+128*12] 2981*c0909341SAndroid Build Coastguard Worker pmulld m7, m12, [cq+128*14] 2982*c0909341SAndroid Build Coastguard Worker call m(idct_8x16_internal_10bpc).main_rect2 2983*c0909341SAndroid Build Coastguard Worker pmulld m16, m12, [cq+128* 1] 2984*c0909341SAndroid Build Coastguard Worker pmulld m17, m12, [cq+128* 3] 2985*c0909341SAndroid Build Coastguard Worker pmulld m18, m12, [cq+128* 5] 2986*c0909341SAndroid Build Coastguard Worker pmulld m19, m12, [cq+128* 7] 2987*c0909341SAndroid Build Coastguard Worker pmulld m20, m12, [cq+128* 9] 2988*c0909341SAndroid Build Coastguard Worker pmulld m21, m12, [cq+128*11] 2989*c0909341SAndroid Build Coastguard Worker pmulld m22, m12, [cq+128*13] 2990*c0909341SAndroid Build Coastguard Worker pmulld m23, m12, [cq+128*15] 2991*c0909341SAndroid Build Coastguard Worker call m(idct_16x16_internal_10bpc).main_rect2 2992*c0909341SAndroid Build Coastguard Worker vpbroadcastd m11, [o(pd_1)] 2993*c0909341SAndroid Build Coastguard Worker call m(idct_16x16_internal_10bpc).main_end2 2994*c0909341SAndroid Build Coastguard Worker jmp m(idct_16x16_internal_10bpc).main_end3 2995*c0909341SAndroid Build Coastguard Worker.dconly: 2996*c0909341SAndroid Build Coastguard Worker imul r6d, [cq], 181 2997*c0909341SAndroid Build Coastguard Worker mov [cq], eobd 2998*c0909341SAndroid Build Coastguard Worker or r3d, 32 2999*c0909341SAndroid Build Coastguard Worker jmp m(inv_txfm_add_dct_dct_16x8_10bpc).dconly 3000*c0909341SAndroid Build Coastguard Worker 3001*c0909341SAndroid Build Coastguard Workercglobal inv_txfm_add_identity_identity_16x32_10bpc, 4, 7, 16, dst, stride, c, eob 3002*c0909341SAndroid Build Coastguard Worker%undef cmp 3003*c0909341SAndroid Build Coastguard Worker vpbroadcastd m10, [pw_2896x8] 3004*c0909341SAndroid Build Coastguard Worker vpbroadcastd m11, [pw_1697x16] 3005*c0909341SAndroid Build Coastguard Worker vpbroadcastd m13, [pw_8192] 3006*c0909341SAndroid Build Coastguard Worker vpbroadcastd m15, [pixel_10bpc_max] 3007*c0909341SAndroid Build Coastguard Worker lea r6, [strideq*9] 3008*c0909341SAndroid Build Coastguard Worker pxor m14, m14 3009*c0909341SAndroid Build Coastguard Worker paddw m12, m13, m13 ; pw_16384 3010*c0909341SAndroid Build Coastguard Worker cmp eobd, 151 3011*c0909341SAndroid Build Coastguard Worker jl .main 3012*c0909341SAndroid Build Coastguard Worker call .main 3013*c0909341SAndroid Build Coastguard Worker add cq, 64-128*4 3014*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+strideq*8] 3015*c0909341SAndroid Build Coastguard Worker.main: 3016*c0909341SAndroid Build Coastguard Worker call .main_internal 3017*c0909341SAndroid Build Coastguard Worker add cq, 128*4 3018*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m13, m2 3019*c0909341SAndroid Build Coastguard Worker pmulhrsw m3, m13, m4 3020*c0909341SAndroid Build Coastguard Worker pmulhrsw m5, m13, m6 3021*c0909341SAndroid Build Coastguard Worker pmulhrsw m7, m13, m8 3022*c0909341SAndroid Build Coastguard Worker call .main_internal 3023*c0909341SAndroid Build Coastguard Worker.main2: 3024*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m13 3025*c0909341SAndroid Build Coastguard Worker pmulhrsw m4, m13 3026*c0909341SAndroid Build Coastguard Worker pmulhrsw m6, m13 3027*c0909341SAndroid Build Coastguard Worker pmulhrsw m8, m13 3028*c0909341SAndroid Build Coastguard Worker punpcklqdq m0, m1, m2 ; 0 8 3029*c0909341SAndroid Build Coastguard Worker punpckhqdq m1, m2 ; 1 9 3030*c0909341SAndroid Build Coastguard Worker call .write_16x2x2 3031*c0909341SAndroid Build Coastguard Worker punpcklqdq m0, m3, m4 ; 2 10 3032*c0909341SAndroid Build Coastguard Worker punpckhqdq m1, m3, m4 ; 3 11 3033*c0909341SAndroid Build Coastguard Worker call .write_16x2x2 3034*c0909341SAndroid Build Coastguard Worker punpcklqdq m0, m5, m6 ; 4 12 3035*c0909341SAndroid Build Coastguard Worker punpckhqdq m1, m5, m6 ; 5 13 3036*c0909341SAndroid Build Coastguard Worker call .write_16x2x2 3037*c0909341SAndroid Build Coastguard Worker punpcklqdq m0, m7, m8 ; 6 14 3038*c0909341SAndroid Build Coastguard Worker punpckhqdq m1, m7, m8 ; 7 15 3039*c0909341SAndroid Build Coastguard Worker.write_16x2x2: 3040*c0909341SAndroid Build Coastguard Worker mova ym2, [dstq+strideq*0] 3041*c0909341SAndroid Build Coastguard Worker vinserti32x8 m2, [dstq+strideq*8], 1 3042*c0909341SAndroid Build Coastguard Worker mova ym9, [dstq+strideq*1] 3043*c0909341SAndroid Build Coastguard Worker vinserti32x8 m9, [dstq+r6 ], 1 3044*c0909341SAndroid Build Coastguard Worker paddw m0, m2 3045*c0909341SAndroid Build Coastguard Worker paddw m1, m9 3046*c0909341SAndroid Build Coastguard Worker pmaxsw m0, m14 3047*c0909341SAndroid Build Coastguard Worker pmaxsw m1, m14 3048*c0909341SAndroid Build Coastguard Worker pminsw m0, m15 3049*c0909341SAndroid Build Coastguard Worker pminsw m1, m15 3050*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*0], ym0 3051*c0909341SAndroid Build Coastguard Worker vextracti32x8 [dstq+strideq*8], m0, 1 3052*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*1], ym1 3053*c0909341SAndroid Build Coastguard Worker vextracti32x8 [dstq+r6 ], m1, 1 3054*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+strideq*2] 3055*c0909341SAndroid Build Coastguard Worker ret 3056*c0909341SAndroid Build Coastguard Worker.main_internal: 3057*c0909341SAndroid Build Coastguard Worker mova m8, [cq+128* 0] 3058*c0909341SAndroid Build Coastguard Worker packssdw m8, [cq+128* 8] 3059*c0909341SAndroid Build Coastguard Worker mova m6, [cq+128* 1] 3060*c0909341SAndroid Build Coastguard Worker packssdw m6, [cq+128* 9] 3061*c0909341SAndroid Build Coastguard Worker mova m0, [cq+128* 2] 3062*c0909341SAndroid Build Coastguard Worker packssdw m0, [cq+128*10] 3063*c0909341SAndroid Build Coastguard Worker mova m2, [cq+128* 3] 3064*c0909341SAndroid Build Coastguard Worker packssdw m2, [cq+128*11] 3065*c0909341SAndroid Build Coastguard Worker REPX {pmulhrsw x, m10}, m8, m6, m0, m2 3066*c0909341SAndroid Build Coastguard Worker REPX {vpermq x, x, q3120}, m8, m6, m0, m2 3067*c0909341SAndroid Build Coastguard Worker pmulhrsw m4, m11, m8 3068*c0909341SAndroid Build Coastguard Worker pmulhrsw m9, m11, m6 3069*c0909341SAndroid Build Coastguard Worker REPX {mova [cq+128*x], m14}, 0, 1, 2, 3 3070*c0909341SAndroid Build Coastguard Worker pmulhrsw m4, m12 3071*c0909341SAndroid Build Coastguard Worker pmulhrsw m9, m12 3072*c0909341SAndroid Build Coastguard Worker paddsw m8, m4 3073*c0909341SAndroid Build Coastguard Worker paddsw m6, m9 3074*c0909341SAndroid Build Coastguard Worker pmulhrsw m4, m11, m0 3075*c0909341SAndroid Build Coastguard Worker pmulhrsw m9, m11, m2 3076*c0909341SAndroid Build Coastguard Worker REPX {mova [cq+128*x], m14}, 8, 9, 10, 11 3077*c0909341SAndroid Build Coastguard Worker pmulhrsw m4, m12 3078*c0909341SAndroid Build Coastguard Worker pmulhrsw m9, m12 3079*c0909341SAndroid Build Coastguard Worker paddsw m0, m4 3080*c0909341SAndroid Build Coastguard Worker paddsw m2, m9 3081*c0909341SAndroid Build Coastguard Worker punpcklwd m4, m8, m6 3082*c0909341SAndroid Build Coastguard Worker punpckhwd m8, m6 3083*c0909341SAndroid Build Coastguard Worker punpcklwd m6, m0, m2 3084*c0909341SAndroid Build Coastguard Worker punpckhwd m0, m2 3085*c0909341SAndroid Build Coastguard Worker punpckldq m2, m4, m6 ; 0 1 3086*c0909341SAndroid Build Coastguard Worker punpckhdq m4, m6 ; 2 3 3087*c0909341SAndroid Build Coastguard Worker punpckldq m6, m8, m0 ; 4 5 3088*c0909341SAndroid Build Coastguard Worker punpckhdq m8, m0 ; 6 7 3089*c0909341SAndroid Build Coastguard Worker ret 3090*c0909341SAndroid Build Coastguard Worker 3091*c0909341SAndroid Build Coastguard Workercglobal inv_txfm_add_dct_dct_32x16_10bpc, 4, 7, 0, dst, stride, c, eob 3092*c0909341SAndroid Build Coastguard Worker%undef cmp 3093*c0909341SAndroid Build Coastguard Worker lea r5, [o_base] 3094*c0909341SAndroid Build Coastguard Worker test eobd, eobd 3095*c0909341SAndroid Build Coastguard Worker jz .dconly 3096*c0909341SAndroid Build Coastguard Worker vpbroadcastd m12, [o(pd_2896)] 3097*c0909341SAndroid Build Coastguard Worker vpbroadcastd m13, [o(pd_2048)] 3098*c0909341SAndroid Build Coastguard Worker vpbroadcastd m14, [o(clip_18b_min)] 3099*c0909341SAndroid Build Coastguard Worker vpbroadcastd m15, [o(clip_18b_max)] 3100*c0909341SAndroid Build Coastguard Worker%if WIN64 3101*c0909341SAndroid Build Coastguard Worker movaps [rsp+ 8], xmm6 3102*c0909341SAndroid Build Coastguard Worker movaps [rsp+24], xmm7 3103*c0909341SAndroid Build Coastguard Worker%endif 3104*c0909341SAndroid Build Coastguard Worker mov r6d, 8*12 3105*c0909341SAndroid Build Coastguard Worker cmp eobd, 36 3106*c0909341SAndroid Build Coastguard Worker jl .fast 3107*c0909341SAndroid Build Coastguard Worker pmulld m0, m12, [cq+64* 0] 3108*c0909341SAndroid Build Coastguard Worker pmulld m1, m12, [cq+64* 4] 3109*c0909341SAndroid Build Coastguard Worker pmulld m2, m12, [cq+64* 8] 3110*c0909341SAndroid Build Coastguard Worker pmulld m3, m12, [cq+64*12] 3111*c0909341SAndroid Build Coastguard Worker pmulld m16, m12, [cq+64* 2] 3112*c0909341SAndroid Build Coastguard Worker pmulld m17, m12, [cq+64* 6] 3113*c0909341SAndroid Build Coastguard Worker pmulld m18, m12, [cq+64*10] 3114*c0909341SAndroid Build Coastguard Worker pmulld m19, m12, [cq+64*14] 3115*c0909341SAndroid Build Coastguard Worker cmp eobd, 151 3116*c0909341SAndroid Build Coastguard Worker jge .full 3117*c0909341SAndroid Build Coastguard Worker call m(idct_8x16_internal_10bpc).main_fast_rect2 3118*c0909341SAndroid Build Coastguard Worker call m(idct_16x16_internal_10bpc).main_fast_rect2 3119*c0909341SAndroid Build Coastguard Worker call .idct16_sumsub 3120*c0909341SAndroid Build Coastguard Worker call .pass1_load_spill 3121*c0909341SAndroid Build Coastguard Worker call .main_fast_rect2 3122*c0909341SAndroid Build Coastguard Worker jmp .pass1_end 3123*c0909341SAndroid Build Coastguard Worker.full: 3124*c0909341SAndroid Build Coastguard Worker pmulld m4, m12, [cq+64*16] 3125*c0909341SAndroid Build Coastguard Worker pmulld m5, m12, [cq+64*20] 3126*c0909341SAndroid Build Coastguard Worker pmulld m6, m12, [cq+64*24] 3127*c0909341SAndroid Build Coastguard Worker pmulld m7, m12, [cq+64*28] 3128*c0909341SAndroid Build Coastguard Worker pmulld m20, m12, [cq+64*18] 3129*c0909341SAndroid Build Coastguard Worker pmulld m21, m12, [cq+64*22] 3130*c0909341SAndroid Build Coastguard Worker pmulld m22, m12, [cq+64*26] 3131*c0909341SAndroid Build Coastguard Worker pmulld m23, m12, [cq+64*30] 3132*c0909341SAndroid Build Coastguard Worker add r6d, 8*16 3133*c0909341SAndroid Build Coastguard Worker call m(idct_8x16_internal_10bpc).main_rect2 3134*c0909341SAndroid Build Coastguard Worker call m(idct_16x16_internal_10bpc).main_rect2 3135*c0909341SAndroid Build Coastguard Worker call .idct16_sumsub 3136*c0909341SAndroid Build Coastguard Worker call .pass1_load_spill 3137*c0909341SAndroid Build Coastguard Worker pmulld m16, m12, [cq+64*17] 3138*c0909341SAndroid Build Coastguard Worker pmulld m17, m12, [cq+64*19] 3139*c0909341SAndroid Build Coastguard Worker pmulld m18, m12, [cq+64*21] 3140*c0909341SAndroid Build Coastguard Worker pmulld m19, m12, [cq+64*23] 3141*c0909341SAndroid Build Coastguard Worker pmulld m20, m12, [cq+64*25] 3142*c0909341SAndroid Build Coastguard Worker pmulld m21, m12, [cq+64*27] 3143*c0909341SAndroid Build Coastguard Worker pmulld m22, m12, [cq+64*29] 3144*c0909341SAndroid Build Coastguard Worker pmulld m23, m12, [cq+64*31] 3145*c0909341SAndroid Build Coastguard Worker call .main_rect2 3146*c0909341SAndroid Build Coastguard Worker.pass1_end: 3147*c0909341SAndroid Build Coastguard Worker vpbroadcastd m11, [o(pd_1)] 3148*c0909341SAndroid Build Coastguard Worker lea r4, [cq+64] 3149*c0909341SAndroid Build Coastguard Worker call .idct32_pass1_end 3150*c0909341SAndroid Build Coastguard Worker lea r5, [o_base_8bpc] 3151*c0909341SAndroid Build Coastguard Worker punpckhqdq m19, m5, m16 ; 11 3152*c0909341SAndroid Build Coastguard Worker punpcklqdq m5, m16 ; 10 3153*c0909341SAndroid Build Coastguard Worker punpckhqdq m16, m2, m1 ; 5 3154*c0909341SAndroid Build Coastguard Worker punpcklqdq m2, m1 ; 4 3155*c0909341SAndroid Build Coastguard Worker punpcklqdq m1, m15, m4 ; 2 3156*c0909341SAndroid Build Coastguard Worker punpckhqdq m15, m4 ; 3 3157*c0909341SAndroid Build Coastguard Worker punpcklqdq m4, m14, m18 ; 8 3158*c0909341SAndroid Build Coastguard Worker punpckhqdq m18, m14, m18 ; 9 3159*c0909341SAndroid Build Coastguard Worker punpckhqdq m14, m0, m20 ; 1 3160*c0909341SAndroid Build Coastguard Worker punpcklqdq m0, m20 ; 0 3161*c0909341SAndroid Build Coastguard Worker punpckhqdq m20, m6, m17 ; 13 3162*c0909341SAndroid Build Coastguard Worker punpcklqdq m6, m17 ; 12 3163*c0909341SAndroid Build Coastguard Worker punpckhqdq m17, m3, m21 ; 7 3164*c0909341SAndroid Build Coastguard Worker punpcklqdq m3, m21 ; 6 3165*c0909341SAndroid Build Coastguard Worker punpckhqdq m21, m7, m8 ; 15 3166*c0909341SAndroid Build Coastguard Worker punpcklqdq m7, m8 ; 14 3167*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_32x8_8bpc).main 3168*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_32x16_8bpc).main_oddhalf 3169*c0909341SAndroid Build Coastguard Worker jmp .end 3170*c0909341SAndroid Build Coastguard Worker.fast: 3171*c0909341SAndroid Build Coastguard Worker pmulld ym0, ym12, [cq+64*0] 3172*c0909341SAndroid Build Coastguard Worker pmulld ym1, ym12, [cq+64*4] 3173*c0909341SAndroid Build Coastguard Worker movshdup m7, [o(permB)] 3174*c0909341SAndroid Build Coastguard Worker mova ym4, [cq+64*2] 3175*c0909341SAndroid Build Coastguard Worker mova ym5, [cq+64*6] 3176*c0909341SAndroid Build Coastguard Worker mova ym16, [cq+64*1] 3177*c0909341SAndroid Build Coastguard Worker mova ym2, [cq+64*5] 3178*c0909341SAndroid Build Coastguard Worker mova ym3, [cq+64*3] 3179*c0909341SAndroid Build Coastguard Worker mova ym17, [cq+64*7] 3180*c0909341SAndroid Build Coastguard Worker vpermt2q m4, m7, m5 ; 2 6 3181*c0909341SAndroid Build Coastguard Worker vpermt2q m16, m7, m2 ; 1 5 3182*c0909341SAndroid Build Coastguard Worker vpermt2q m17, m7, m3 ; 7 3 3183*c0909341SAndroid Build Coastguard Worker paddd ym0, ym13 3184*c0909341SAndroid Build Coastguard Worker paddd ym1, ym13 3185*c0909341SAndroid Build Coastguard Worker psrad ym0, 12 3186*c0909341SAndroid Build Coastguard Worker psrad ym1, 12 3187*c0909341SAndroid Build Coastguard Worker vpermq m0, m7, m0 ; 0 0 3188*c0909341SAndroid Build Coastguard Worker vpermq m1, m7, m1 ; 4 4 3189*c0909341SAndroid Build Coastguard Worker REPX {pmulld x, m12}, m4, m16, m17 3190*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m13}, m4, m16, m17 3191*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 12 }, m4, m16, m17 3192*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_32x8_10bpc).main_fast2 3193*c0909341SAndroid Build Coastguard Worker vpbroadcastd m11, [o(pd_1)] 3194*c0909341SAndroid Build Coastguard Worker call m(idct_16x16_internal_10bpc).main_end2 3195*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_32x8_10bpc).transpose_8x32 3196*c0909341SAndroid Build Coastguard Worker lea r5, [o_base_8bpc] 3197*c0909341SAndroid Build Coastguard Worker punpckhqdq m14, m0, m2 ; 1 3198*c0909341SAndroid Build Coastguard Worker punpcklqdq m0, m2 ; 0 3199*c0909341SAndroid Build Coastguard Worker punpcklqdq m1, m3, m4 ; 2 3200*c0909341SAndroid Build Coastguard Worker punpckhqdq m15, m3, m4 ; 3 3201*c0909341SAndroid Build Coastguard Worker punpcklqdq m2, m5, m7 ; 4 3202*c0909341SAndroid Build Coastguard Worker punpckhqdq m16, m5, m7 ; 5 3203*c0909341SAndroid Build Coastguard Worker punpcklqdq m3, m6, m8 ; 6 3204*c0909341SAndroid Build Coastguard Worker punpckhqdq m17, m6, m8 ; 7 3205*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_32x16_8bpc).main_oddhalf_fast 3206*c0909341SAndroid Build Coastguard Worker.end: 3207*c0909341SAndroid Build Coastguard Worker%if WIN64 3208*c0909341SAndroid Build Coastguard Worker movaps xmm6, [rsp+ 8] 3209*c0909341SAndroid Build Coastguard Worker movaps xmm7, [rsp+24] 3210*c0909341SAndroid Build Coastguard Worker%endif 3211*c0909341SAndroid Build Coastguard Worker pxor m12, m12 3212*c0909341SAndroid Build Coastguard Worker.zero_loop: 3213*c0909341SAndroid Build Coastguard Worker mova [cq+r6*8+64*3], m12 3214*c0909341SAndroid Build Coastguard Worker mova [cq+r6*8+64*2], m12 3215*c0909341SAndroid Build Coastguard Worker mova [cq+r6*8+64*1], m12 3216*c0909341SAndroid Build Coastguard Worker mova [cq+r6*8+64*0], m12 3217*c0909341SAndroid Build Coastguard Worker sub r6d, 8*4 3218*c0909341SAndroid Build Coastguard Worker jge .zero_loop 3219*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_32x8_10bpc).write_32x8_start 3220*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m11, m14 3221*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m11, m15 3222*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m11, m16 3223*c0909341SAndroid Build Coastguard Worker pmulhrsw m3, m11, m17 3224*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_32x8_10bpc).write_32x4 3225*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m11, m18 3226*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m11, m19 3227*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m11, m20 3228*c0909341SAndroid Build Coastguard Worker pmulhrsw m3, m11, m21 3229*c0909341SAndroid Build Coastguard Worker vzeroupper 3230*c0909341SAndroid Build Coastguard Worker jmp m(inv_txfm_add_dct_dct_32x8_10bpc).write_32x4 3231*c0909341SAndroid Build Coastguard Worker.dconly: 3232*c0909341SAndroid Build Coastguard Worker imul r6d, [cq], 181 3233*c0909341SAndroid Build Coastguard Worker mov [cq], eobd 3234*c0909341SAndroid Build Coastguard Worker or r3d, 16 3235*c0909341SAndroid Build Coastguard Worker.dconly3: 3236*c0909341SAndroid Build Coastguard Worker add r6d, 128 3237*c0909341SAndroid Build Coastguard Worker sar r6d, 8 3238*c0909341SAndroid Build Coastguard Worker imul r6d, 181 3239*c0909341SAndroid Build Coastguard Worker add r6d, 384 3240*c0909341SAndroid Build Coastguard Worker sar r6d, 9 3241*c0909341SAndroid Build Coastguard Worker.dconly2: 3242*c0909341SAndroid Build Coastguard Worker vpbroadcastd m3, [o(dconly_10bpc)] 3243*c0909341SAndroid Build Coastguard Worker imul r6d, 181 3244*c0909341SAndroid Build Coastguard Worker add r6d, 2176 3245*c0909341SAndroid Build Coastguard Worker sar r6d, 12 3246*c0909341SAndroid Build Coastguard Worker vpbroadcastw m2, r6d 3247*c0909341SAndroid Build Coastguard Worker paddsw m2, m3 3248*c0909341SAndroid Build Coastguard Worker.dconly_loop: 3249*c0909341SAndroid Build Coastguard Worker paddsw m0, m2, [dstq+strideq*0] 3250*c0909341SAndroid Build Coastguard Worker paddsw m1, m2, [dstq+strideq*1] 3251*c0909341SAndroid Build Coastguard Worker psubusw m0, m3 3252*c0909341SAndroid Build Coastguard Worker psubusw m1, m3 3253*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*0], m0 3254*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*1], m1 3255*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+strideq*2] 3256*c0909341SAndroid Build Coastguard Worker sub r3d, 2 3257*c0909341SAndroid Build Coastguard Worker jg .dconly_loop 3258*c0909341SAndroid Build Coastguard Worker RET 3259*c0909341SAndroid Build Coastguard WorkerALIGN function_align 3260*c0909341SAndroid Build Coastguard Worker.idct16_sumsub: 3261*c0909341SAndroid Build Coastguard Worker psubd m23, m0, m22 ; t15 3262*c0909341SAndroid Build Coastguard Worker paddd m0, m22 ; t0 3263*c0909341SAndroid Build Coastguard Worker psubd m22, m1, m21 ; t14 3264*c0909341SAndroid Build Coastguard Worker paddd m1, m21 ; t1 3265*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m14}, m23, m0, m22, m1 3266*c0909341SAndroid Build Coastguard Worker psubd m21, m2, m20 ; t13 3267*c0909341SAndroid Build Coastguard Worker paddd m2, m20 ; t2 3268*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m15}, m23, m0, m22, m1 3269*c0909341SAndroid Build Coastguard Worker psubd m20, m3, m19 ; t12 3270*c0909341SAndroid Build Coastguard Worker paddd m3, m19 ; t3 3271*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m14}, m21, m2, m20, m3 3272*c0909341SAndroid Build Coastguard Worker psubd m19, m4, m18 ; t11 3273*c0909341SAndroid Build Coastguard Worker paddd m4, m18 ; t4 3274*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m15}, m21, m2, m20, m3 3275*c0909341SAndroid Build Coastguard Worker psubd m18, m5, m17 ; t10 3276*c0909341SAndroid Build Coastguard Worker paddd m5, m17 ; t5 3277*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m14}, m19, m4, m18, m5 3278*c0909341SAndroid Build Coastguard Worker psubd m17, m6, m16 ; t9 3279*c0909341SAndroid Build Coastguard Worker paddd m6, m16 ; t6 3280*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m15}, m19, m4, m18, m5 3281*c0909341SAndroid Build Coastguard Worker psubd m16, m7, m9 ; t8 3282*c0909341SAndroid Build Coastguard Worker paddd m7, m9 ; t7 3283*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m14}, m17, m6, m16, m7 3284*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m15}, m17, m6, m16, m7 3285*c0909341SAndroid Build Coastguard Worker ret 3286*c0909341SAndroid Build Coastguard Worker.idct32_pass1_end: 3287*c0909341SAndroid Build Coastguard Worker psrlq m12, [o(permC)], 24 ; 0 2 8 10 1 3 9 11 3288*c0909341SAndroid Build Coastguard Worker psrlq m13, m12, 32 ; 4 6 12 14 5 7 13 15 3289*c0909341SAndroid Build Coastguard Worker%macro IDCT32_PASS1_END 2 ; low, high 3290*c0909341SAndroid Build Coastguard Worker paddd m8, m11, [r4+128*%1] 3291*c0909341SAndroid Build Coastguard Worker paddd m9, m11, [cq+128*%1] 3292*c0909341SAndroid Build Coastguard Worker psubd m10, m8, m%1 ; out 16+n 3293*c0909341SAndroid Build Coastguard Worker paddd m8, m%1 ; out 15-n 3294*c0909341SAndroid Build Coastguard Worker paddd m%1, m9, m%2 ; out 0+n 3295*c0909341SAndroid Build Coastguard Worker psubd m9, m%2 ; out 31-n 3296*c0909341SAndroid Build Coastguard Worker REPX {vpsravd x, m11}, m10, m%1, m8, m9 3297*c0909341SAndroid Build Coastguard Worker packssdw m%1, m10 ; 0+n 16+n 3298*c0909341SAndroid Build Coastguard Worker packssdw m%2, m8, m9 ; 15-n 31-n 3299*c0909341SAndroid Build Coastguard Worker%endmacro 3300*c0909341SAndroid Build Coastguard Worker IDCT32_PASS1_END 0, 23 ; 0 16, 15 31 3301*c0909341SAndroid Build Coastguard Worker IDCT32_PASS1_END 7, 16 ; 7 23, 8 24 3302*c0909341SAndroid Build Coastguard Worker IDCT32_PASS1_END 1, 22 ; 1 17, 14 30 3303*c0909341SAndroid Build Coastguard Worker IDCT32_PASS1_END 6, 17 ; 6 22, 9 25 3304*c0909341SAndroid Build Coastguard Worker IDCT32_PASS1_END 2, 21 ; 2 18, 13 29 3305*c0909341SAndroid Build Coastguard Worker IDCT32_PASS1_END 5, 18 ; 5 21, 10 26 3306*c0909341SAndroid Build Coastguard Worker IDCT32_PASS1_END 3, 20 ; 3 19, 12 28 3307*c0909341SAndroid Build Coastguard Worker IDCT32_PASS1_END 4, 19 ; 4 20, 11 27 3308*c0909341SAndroid Build Coastguard Worker.transpose_16x32: 3309*c0909341SAndroid Build Coastguard Worker mova m14, m13 3310*c0909341SAndroid Build Coastguard Worker vpermi2q m14, m0, m16 3311*c0909341SAndroid Build Coastguard Worker vpermt2q m0, m12, m16 3312*c0909341SAndroid Build Coastguard Worker mova m15, m13 3313*c0909341SAndroid Build Coastguard Worker vpermi2q m15, m1, m17 3314*c0909341SAndroid Build Coastguard Worker vpermt2q m1, m12, m17 3315*c0909341SAndroid Build Coastguard Worker mova m16, m13 3316*c0909341SAndroid Build Coastguard Worker vpermi2q m16, m2, m18 3317*c0909341SAndroid Build Coastguard Worker vpermt2q m2, m12, m18 3318*c0909341SAndroid Build Coastguard Worker mova m17, m13 3319*c0909341SAndroid Build Coastguard Worker vpermi2q m17, m3, m19 3320*c0909341SAndroid Build Coastguard Worker vpermt2q m3, m12, m19 3321*c0909341SAndroid Build Coastguard Worker mova m18, m13 3322*c0909341SAndroid Build Coastguard Worker vpermi2q m18, m4, m20 3323*c0909341SAndroid Build Coastguard Worker vpermt2q m4, m12, m20 3324*c0909341SAndroid Build Coastguard Worker mova m19, m13 3325*c0909341SAndroid Build Coastguard Worker vpermi2q m19, m5, m21 3326*c0909341SAndroid Build Coastguard Worker vpermt2q m5, m12, m21 3327*c0909341SAndroid Build Coastguard Worker mova m20, m13 3328*c0909341SAndroid Build Coastguard Worker vpermi2q m20, m6, m22 3329*c0909341SAndroid Build Coastguard Worker vpermt2q m6, m12, m22 3330*c0909341SAndroid Build Coastguard Worker mova m21, m13 3331*c0909341SAndroid Build Coastguard Worker vpermi2q m21, m7, m23 3332*c0909341SAndroid Build Coastguard Worker vpermt2q m7, m12, m23 3333*c0909341SAndroid Build Coastguard Worker punpckhwd m8, m2, m3 ; c04 d04 c05 d05 c06 d06 c07 d07 3334*c0909341SAndroid Build Coastguard Worker punpcklwd m2, m3 ; c00 d00 c01 d01 c02 d02 c03 d03 3335*c0909341SAndroid Build Coastguard Worker punpckhwd m3, m0, m1 ; a04 b04 a05 b05 a06 b06 a07 b07 3336*c0909341SAndroid Build Coastguard Worker punpcklwd m0, m1 ; a00 b00 a01 b01 a02 b02 a03 b03 3337*c0909341SAndroid Build Coastguard Worker punpckhwd m1, m4, m5 ; e04 f04 e05 f05 e06 f06 e07 f07 3338*c0909341SAndroid Build Coastguard Worker punpcklwd m4, m5 ; e00 f00 e01 f01 e02 f02 e03 f03 3339*c0909341SAndroid Build Coastguard Worker punpckhwd m5, m6, m7 ; g04 h04 g05 h05 g06 h06 g07 h07 3340*c0909341SAndroid Build Coastguard Worker punpcklwd m6, m7 ; g00 h00 g01 h01 g02 h02 g03 h03 3341*c0909341SAndroid Build Coastguard Worker punpckhwd m7, m14, m15 ; a12 b12 a13 b13 a14 b14 a15 b15 3342*c0909341SAndroid Build Coastguard Worker punpcklwd m14, m15 ; a08 b08 a09 b09 a10 b10 a11 b11 3343*c0909341SAndroid Build Coastguard Worker punpckhwd m15, m16, m17 ; c12 d12 c13 d13 c14 d14 c15 d15 3344*c0909341SAndroid Build Coastguard Worker punpcklwd m16, m17 ; c08 d08 c09 d09 c10 d10 c11 d11 3345*c0909341SAndroid Build Coastguard Worker punpckhwd m17, m18, m19 ; e12 f12 e13 f13 e14 f14 e15 f15 3346*c0909341SAndroid Build Coastguard Worker punpcklwd m18, m19 ; e08 f08 e09 f09 e10 f10 e11 f11 3347*c0909341SAndroid Build Coastguard Worker punpckhwd m19, m20, m21 ; g12 h12 g13 h13 g14 h14 g15 h15 3348*c0909341SAndroid Build Coastguard Worker punpcklwd m20, m21 ; g08 h08 g09 h09 g10 h10 g11 h11 3349*c0909341SAndroid Build Coastguard Worker punpckhdq m21, m1, m5 ; e06 f06 g06 h06 e07 f07 g07 h07 3350*c0909341SAndroid Build Coastguard Worker punpckldq m1, m5 ; e04 f04 g04 h04 e05 f05 g05 h05 3351*c0909341SAndroid Build Coastguard Worker punpckhdq m5, m14, m16 ; a10 b10 c10 d10 a11 b11 c11 d11 3352*c0909341SAndroid Build Coastguard Worker punpckldq m14, m16 ; a08 b08 c08 d08 a09 b09 c09 d09 3353*c0909341SAndroid Build Coastguard Worker punpckhdq m16, m18, m20 ; e10 f10 g10 h10 e11 f11 g11 h11 3354*c0909341SAndroid Build Coastguard Worker punpckldq m18, m20 ; e08 f08 g08 h08 e09 f09 g09 h09 3355*c0909341SAndroid Build Coastguard Worker punpckldq m20, m4, m6 ; e00 f00 g00 h00 e01 f01 g01 h01 3356*c0909341SAndroid Build Coastguard Worker punpckhdq m4, m6 ; e02 f02 g02 h02 e03 f03 g03 h03 3357*c0909341SAndroid Build Coastguard Worker punpckldq m6, m7, m15 ; a12 b12 c12 d12 a13 b13 c13 d13 3358*c0909341SAndroid Build Coastguard Worker punpckhdq m7, m15 ; a14 b14 c14 d14 a15 b15 c15 d15 3359*c0909341SAndroid Build Coastguard Worker punpckhdq m15, m0, m2 ; a02 b02 c02 d02 a03 b03 c03 d03 3360*c0909341SAndroid Build Coastguard Worker punpckldq m0, m2 ; a00 b00 c00 d00 a01 b01 c01 d01 3361*c0909341SAndroid Build Coastguard Worker punpckldq m2, m3, m8 ; a04 b04 c04 d04 a05 b05 c05 d05 3362*c0909341SAndroid Build Coastguard Worker punpckhdq m3, m8 ; a06 b06 c06 d06 a07 b07 c07 d07 3363*c0909341SAndroid Build Coastguard Worker punpckhdq m8, m17, m19 ; e14 f14 g14 h14 e15 f15 g15 h15 3364*c0909341SAndroid Build Coastguard Worker punpckldq m17, m19 ; e12 f12 g12 h12 e13 f13 g13 h13 3365*c0909341SAndroid Build Coastguard Worker ret 3366*c0909341SAndroid Build Coastguard Worker.pass1_load_spill: 3367*c0909341SAndroid Build Coastguard Worker mova [cq+64* 0], m0 3368*c0909341SAndroid Build Coastguard Worker mova [cq+64* 2], m1 3369*c0909341SAndroid Build Coastguard Worker mova [cq+64* 4], m2 3370*c0909341SAndroid Build Coastguard Worker mova [cq+64* 6], m3 3371*c0909341SAndroid Build Coastguard Worker mova [cq+64* 8], m4 3372*c0909341SAndroid Build Coastguard Worker mova [cq+64*10], m5 3373*c0909341SAndroid Build Coastguard Worker mova [cq+64*12], m6 3374*c0909341SAndroid Build Coastguard Worker mova [cq+64*14], m7 3375*c0909341SAndroid Build Coastguard Worker pmulld m0, m12, [cq+64* 1] 3376*c0909341SAndroid Build Coastguard Worker pmulld m1, m12, [cq+64* 3] 3377*c0909341SAndroid Build Coastguard Worker pmulld m2, m12, [cq+64* 5] 3378*c0909341SAndroid Build Coastguard Worker pmulld m3, m12, [cq+64* 7] 3379*c0909341SAndroid Build Coastguard Worker pmulld m4, m12, [cq+64* 9] 3380*c0909341SAndroid Build Coastguard Worker pmulld m5, m12, [cq+64*11] 3381*c0909341SAndroid Build Coastguard Worker pmulld m6, m12, [cq+64*13] 3382*c0909341SAndroid Build Coastguard Worker pmulld m7, m12, [cq+64*15] 3383*c0909341SAndroid Build Coastguard Worker mova [cq+64* 1], m23 3384*c0909341SAndroid Build Coastguard Worker mova [cq+64* 3], m22 3385*c0909341SAndroid Build Coastguard Worker mova [cq+64* 5], m21 3386*c0909341SAndroid Build Coastguard Worker mova [cq+64* 7], m20 3387*c0909341SAndroid Build Coastguard Worker mova [cq+64* 9], m19 3388*c0909341SAndroid Build Coastguard Worker mova [cq+64*11], m18 3389*c0909341SAndroid Build Coastguard Worker mova [cq+64*13], m17 3390*c0909341SAndroid Build Coastguard Worker mova [cq+64*15], m16 3391*c0909341SAndroid Build Coastguard Worker ret 3392*c0909341SAndroid Build Coastguard Worker.main_fast2_rect2: 3393*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m13}, m0, m1, m2, m3 3394*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 12 }, m0, m1, m2, m3 3395*c0909341SAndroid Build Coastguard Worker.main_fast2: ; bottom 3/4 is zero 3396*c0909341SAndroid Build Coastguard Worker pmulld m23, m0, [o(pd_4091)] {1to16} ; t31a 3397*c0909341SAndroid Build Coastguard Worker pmulld m0, [o(pd_201)] {1to16} ; t16a 3398*c0909341SAndroid Build Coastguard Worker pmulld m20, m3, [o(pd_1380)] {1to16} ; t19a 3399*c0909341SAndroid Build Coastguard Worker pmulld m3, [o(pd_3857)] {1to16} ; t28a 3400*c0909341SAndroid Build Coastguard Worker pmulld m21, m2, [o(pd_3973)] {1to16} ; t27a 3401*c0909341SAndroid Build Coastguard Worker pmulld m2, [o(pd_995)] {1to16} ; t20a 3402*c0909341SAndroid Build Coastguard Worker pmulld m6, m1, [o(pd_601)] {1to16} ; t23a 3403*c0909341SAndroid Build Coastguard Worker pmulld m17, m1, [o(pd_4052)] {1to16} ; t24a 3404*c0909341SAndroid Build Coastguard Worker REPX {psubd x, m13, x}, m20, m6 3405*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m13}, m23, m0, m3, m21, m2, m17 3406*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 12 }, m20, m6, m23, m0, m3, m21, m2, m17 3407*c0909341SAndroid Build Coastguard Worker mova m8, m0 3408*c0909341SAndroid Build Coastguard Worker mova m16, m23 3409*c0909341SAndroid Build Coastguard Worker mova m7, m20 3410*c0909341SAndroid Build Coastguard Worker mova m4, m3 3411*c0909341SAndroid Build Coastguard Worker mova m19, m2 3412*c0909341SAndroid Build Coastguard Worker mova m18, m21 3413*c0909341SAndroid Build Coastguard Worker mova m5, m6 3414*c0909341SAndroid Build Coastguard Worker mova m22, m17 3415*c0909341SAndroid Build Coastguard Worker jmp .main3 3416*c0909341SAndroid Build Coastguard Worker.main_fast_rect2: 3417*c0909341SAndroid Build Coastguard Worker call m(idct_8x16_internal_10bpc).round 3418*c0909341SAndroid Build Coastguard Worker.main_fast: ; bottom half is zero 3419*c0909341SAndroid Build Coastguard Worker pmulld m23, m0, [o(pd_4091)] {1to16} ; t31a 3420*c0909341SAndroid Build Coastguard Worker pmulld m0, [o(pd_201)] {1to16} ; t16a 3421*c0909341SAndroid Build Coastguard Worker pmulld m16, m7, [o(pd_2751)] {1to16} ; t17a 3422*c0909341SAndroid Build Coastguard Worker pmulld m7, [o(pd_3035)] {1to16} ; t30a 3423*c0909341SAndroid Build Coastguard Worker pmulld m19, m4, [o(pd_3703)] {1to16} ; t29a 3424*c0909341SAndroid Build Coastguard Worker pmulld m4, [o(pd_1751)] {1to16} ; t18a 3425*c0909341SAndroid Build Coastguard Worker pmulld m20, m3, [o(pd_1380)] {1to16} ; t19a 3426*c0909341SAndroid Build Coastguard Worker pmulld m3, [o(pd_3857)] {1to16} ; t28a 3427*c0909341SAndroid Build Coastguard Worker pmulld m21, m2, [o(pd_3973)] {1to16} ; t27a 3428*c0909341SAndroid Build Coastguard Worker pmulld m2, [o(pd_995)] {1to16} ; t20a 3429*c0909341SAndroid Build Coastguard Worker pmulld m18, m5, [o(pd_2106)] {1to16} ; t21a 3430*c0909341SAndroid Build Coastguard Worker pmulld m5, [o(pd_3513)] {1to16} ; t26a 3431*c0909341SAndroid Build Coastguard Worker pmulld m17, m6, [o(pd_3290)] {1to16} ; t25a 3432*c0909341SAndroid Build Coastguard Worker pmulld m6, [o(pd_2440)] {1to16} ; t22a 3433*c0909341SAndroid Build Coastguard Worker pmulld m22, m1, [o(pd_601)] {1to16} ; t23a 3434*c0909341SAndroid Build Coastguard Worker pmulld m1, [o(pd_4052)] {1to16} ; t24a 3435*c0909341SAndroid Build Coastguard Worker REPX {psubd x, m13, x}, m16, m20, m18, m22 3436*c0909341SAndroid Build Coastguard Worker call m(idct_16x16_internal_10bpc).round3 3437*c0909341SAndroid Build Coastguard Worker jmp .main2 3438*c0909341SAndroid Build Coastguard Worker.main_rect2: 3439*c0909341SAndroid Build Coastguard Worker call m(idct_8x16_internal_10bpc).round 3440*c0909341SAndroid Build Coastguard Worker call m(idct_16x16_internal_10bpc).round 3441*c0909341SAndroid Build Coastguard Worker.main: 3442*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 0, 23, 8, 9, 10, _, 201, 4091 ; t16a, t31a 3443*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 16, 7, 8, 9, 10, _, 3035, 2751 ; t17a, t30a 3444*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 4, 19, 8, 9, 10, _, 1751, 3703 ; t18a, t29a 3445*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 20, 3, 8, 9, 10, _, 3857, 1380 ; t19a, t28a 3446*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 2, 21, 8, 9, 10, _, 995, 3973 ; t20a, t27a 3447*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 18, 5, 8, 9, 10, _, 3513, 2106 ; t21a, t26a 3448*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 6, 17, 8, 9, 10, _, 2440, 3290 ; t22a, t25a 3449*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 22, 1, 8, 9, 10, _, 4052, 601 ; t23a, t24a 3450*c0909341SAndroid Build Coastguard Worker call m(idct_16x16_internal_10bpc).round 3451*c0909341SAndroid Build Coastguard Worker.main2: 3452*c0909341SAndroid Build Coastguard Worker call m(idct_8x16_internal_10bpc).round 3453*c0909341SAndroid Build Coastguard Worker psubd m8, m0, m16 ; t17 3454*c0909341SAndroid Build Coastguard Worker paddd m0, m16 ; t16 3455*c0909341SAndroid Build Coastguard Worker psubd m16, m23, m7 ; t30 3456*c0909341SAndroid Build Coastguard Worker paddd m23, m7 ; t31 3457*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m14}, m8, m0, m16, m23 3458*c0909341SAndroid Build Coastguard Worker paddd m7, m20, m4 ; t19 3459*c0909341SAndroid Build Coastguard Worker psubd m20, m4 ; t18 3460*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m15}, m8, m0, m16, m23 3461*c0909341SAndroid Build Coastguard Worker paddd m4, m3, m19 ; t28 3462*c0909341SAndroid Build Coastguard Worker psubd m3, m19 ; t29 3463*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m14}, m7, m20, m4, m3 3464*c0909341SAndroid Build Coastguard Worker psubd m19, m2, m18 ; t21 3465*c0909341SAndroid Build Coastguard Worker paddd m2, m18 ; t20 3466*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m15}, m7, m20, m4, m3 3467*c0909341SAndroid Build Coastguard Worker psubd m18, m21, m5 ; t26 3468*c0909341SAndroid Build Coastguard Worker paddd m21, m5 ; t27 3469*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m14}, m19, m2, m18, m21 3470*c0909341SAndroid Build Coastguard Worker psubd m5, m22, m6 ; t22 3471*c0909341SAndroid Build Coastguard Worker paddd m6, m22 ; t23 3472*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m15}, m19, m2, m18, m21 3473*c0909341SAndroid Build Coastguard Worker psubd m22, m1, m17 ; t25 3474*c0909341SAndroid Build Coastguard Worker paddd m17, m1 ; t24 3475*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m14}, m5, m6, m22, m17 3476*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m15}, m5, m6, m22, m17 3477*c0909341SAndroid Build Coastguard Worker.main3: 3478*c0909341SAndroid Build Coastguard Worker vpbroadcastd m11, [o(pd_4017)] 3479*c0909341SAndroid Build Coastguard Worker vpbroadcastd m10, [o(pd_799)] 3480*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 16, 8, 9, 1, _, 13, 10, 11 ; t17a, t30a 3481*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 3, 20, 9, 1, _, 13, 10, 11, 2 ; t29a, t18a 3482*c0909341SAndroid Build Coastguard Worker vpbroadcastd m11, [o(pd_2276)] 3483*c0909341SAndroid Build Coastguard Worker vpbroadcastd m10, [o(pd_3406)] 3484*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 18, 19, 9, 1, _, 13, 10, 11 ; t21a, t26a 3485*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 22, 5, 9, 1, _, 13, 10, 11, 2 ; t25a, t22a 3486*c0909341SAndroid Build Coastguard Worker paddd m1, m6, m2 ; t23a 3487*c0909341SAndroid Build Coastguard Worker psubd m6, m2 ; t20a 3488*c0909341SAndroid Build Coastguard Worker psubd m2, m17, m21 ; t27a 3489*c0909341SAndroid Build Coastguard Worker paddd m17, m21 ; t24a 3490*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m14}, m1, m6, m2, m17 3491*c0909341SAndroid Build Coastguard Worker psubd m21, m23, m4 ; t28a 3492*c0909341SAndroid Build Coastguard Worker paddd m23, m4 ; t31a 3493*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m15}, m1, m6, m2, m17 3494*c0909341SAndroid Build Coastguard Worker psubd m4, m16, m20 ; t18 3495*c0909341SAndroid Build Coastguard Worker paddd m16, m20 ; t17 3496*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m14}, m21, m23, m4, m16 3497*c0909341SAndroid Build Coastguard Worker psubd m20, m0, m7 ; t19a 3498*c0909341SAndroid Build Coastguard Worker paddd m0, m7 ; t16a 3499*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m15}, m21, m23, m4, m16 3500*c0909341SAndroid Build Coastguard Worker psubd m7, m8, m3 ; t29 3501*c0909341SAndroid Build Coastguard Worker paddd m3, m8 ; t30 3502*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m14}, m20, m0, m7, m3 3503*c0909341SAndroid Build Coastguard Worker paddd m8, m5, m18 ; t22 3504*c0909341SAndroid Build Coastguard Worker psubd m5, m18 ; t21 3505*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m15}, m20, m0, m7, m3 3506*c0909341SAndroid Build Coastguard Worker psubd m18, m22, m19 ; t26 3507*c0909341SAndroid Build Coastguard Worker paddd m22, m19 ; t25 3508*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m14}, m8, m5, m18, m22 3509*c0909341SAndroid Build Coastguard Worker vpbroadcastd m11, [o(pd_3784)] 3510*c0909341SAndroid Build Coastguard Worker vpbroadcastd m10, [o(pd_1567)] 3511*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m15}, m8, m5, m18, m22 3512*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 21, 20, 9, 19, _, 13, 10, 11 ; t19, t28 3513*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 2, 6, 9, 19, _, 13, 10, 11, 2 ; t27, t20 3514*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 7, 4, 9, 19, _, 13, 10, 11 ; t18a, t29a 3515*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 18, 5, 9, 19, _, 13, 10, 11, 2 ; t26a, t21a 3516*c0909341SAndroid Build Coastguard Worker psubd m19, m0, m1 ; t23 3517*c0909341SAndroid Build Coastguard Worker paddd m0, m1 ; t16 3518*c0909341SAndroid Build Coastguard Worker paddd m1, m8, m16 ; t17a 3519*c0909341SAndroid Build Coastguard Worker psubd m8, m16, m8 ; t22a 3520*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m14}, m19, m0, m1, m8 3521*c0909341SAndroid Build Coastguard Worker psubd m16, m23, m17 ; t24 3522*c0909341SAndroid Build Coastguard Worker paddd m23, m17 ; t31 3523*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m15}, m19, m0, m1, m8 3524*c0909341SAndroid Build Coastguard Worker psubd m17, m3, m22 ; t25a 3525*c0909341SAndroid Build Coastguard Worker paddd m22, m3 ; t30a 3526*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m14}, m16, m23, m17, m22 3527*c0909341SAndroid Build Coastguard Worker paddd m3, m6, m21 ; t19a 3528*c0909341SAndroid Build Coastguard Worker psubd m6, m21, m6 ; t20a 3529*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m15}, m16, m23, m17, m22 3530*c0909341SAndroid Build Coastguard Worker paddd m21, m18, m4 ; t29 3531*c0909341SAndroid Build Coastguard Worker psubd m18, m4, m18 ; t26 3532*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m14}, m3, m6, m21, m18 3533*c0909341SAndroid Build Coastguard Worker psubd m4, m20, m2 ; t27a 3534*c0909341SAndroid Build Coastguard Worker paddd m20, m2 ; t28a 3535*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m15}, m3, m6, m21, m18 3536*c0909341SAndroid Build Coastguard Worker paddd m2, m7, m5 ; t18 3537*c0909341SAndroid Build Coastguard Worker psubd m7, m5 ; t21 3538*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m14}, m4, m20, m2, m7 3539*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m15}, m4, m20, m2, m7 3540*c0909341SAndroid Build Coastguard Worker REPX {pmulld x, m12}, m18, m16, m4, m17, m7, m19, m6, m8 3541*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m13}, m18, m16, m4, m17 3542*c0909341SAndroid Build Coastguard Worker psubd m5, m18, m7 ; t21a 3543*c0909341SAndroid Build Coastguard Worker paddd m18, m7 ; t26a 3544*c0909341SAndroid Build Coastguard Worker psubd m7, m16, m19 ; t23a 3545*c0909341SAndroid Build Coastguard Worker paddd m16, m19 ; t24a 3546*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 12 }, m5, m18, m7, m16 3547*c0909341SAndroid Build Coastguard Worker paddd m19, m4, m6 ; t27 3548*c0909341SAndroid Build Coastguard Worker psubd m4, m6 ; t20 3549*c0909341SAndroid Build Coastguard Worker psubd m6, m17, m8 ; t22 3550*c0909341SAndroid Build Coastguard Worker paddd m17, m8 ; t25 3551*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 12 }, m19, m4, m6, m17 3552*c0909341SAndroid Build Coastguard Worker ret 3553*c0909341SAndroid Build Coastguard Worker 3554*c0909341SAndroid Build Coastguard Workercglobal inv_txfm_add_identity_identity_32x16_10bpc, 4, 7, 16, dst, stride, c, eob 3555*c0909341SAndroid Build Coastguard Worker%undef cmp 3556*c0909341SAndroid Build Coastguard Worker vpbroadcastd m10, [pw_2896x8] 3557*c0909341SAndroid Build Coastguard Worker vpbroadcastd m11, [pw_1697x16] 3558*c0909341SAndroid Build Coastguard Worker vpbroadcastd m13, [pw_2048] 3559*c0909341SAndroid Build Coastguard Worker vpbroadcastd m15, [pixel_10bpc_max] 3560*c0909341SAndroid Build Coastguard Worker lea r6, [strideq*9] 3561*c0909341SAndroid Build Coastguard Worker pxor m14, m14 3562*c0909341SAndroid Build Coastguard Worker cmp eobd, 151 3563*c0909341SAndroid Build Coastguard Worker jl .main 3564*c0909341SAndroid Build Coastguard Worker mov r4, dstq 3565*c0909341SAndroid Build Coastguard Worker call .main 3566*c0909341SAndroid Build Coastguard Worker add cq, 64*12 3567*c0909341SAndroid Build Coastguard Worker lea dstq, [r4+32] 3568*c0909341SAndroid Build Coastguard Worker.main: 3569*c0909341SAndroid Build Coastguard Worker call .main_internal 3570*c0909341SAndroid Build Coastguard Worker add cq, 64*4 3571*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m13, m2 3572*c0909341SAndroid Build Coastguard Worker pmulhrsw m3, m13, m4 3573*c0909341SAndroid Build Coastguard Worker pmulhrsw m5, m13, m6 3574*c0909341SAndroid Build Coastguard Worker pmulhrsw m7, m13, m8 3575*c0909341SAndroid Build Coastguard Worker call .main_internal 3576*c0909341SAndroid Build Coastguard Worker jmp m(inv_txfm_add_identity_identity_16x32_10bpc).main2 3577*c0909341SAndroid Build Coastguard Worker.main_internal: 3578*c0909341SAndroid Build Coastguard Worker mova m8, [cq+64* 0] 3579*c0909341SAndroid Build Coastguard Worker packssdw m8, [cq+64* 8] 3580*c0909341SAndroid Build Coastguard Worker mova m6, [cq+64* 1] 3581*c0909341SAndroid Build Coastguard Worker packssdw m6, [cq+64* 9] 3582*c0909341SAndroid Build Coastguard Worker mova m0, [cq+64* 2] 3583*c0909341SAndroid Build Coastguard Worker packssdw m0, [cq+64*10] 3584*c0909341SAndroid Build Coastguard Worker mova m2, [cq+64* 3] 3585*c0909341SAndroid Build Coastguard Worker packssdw m2, [cq+64*11] 3586*c0909341SAndroid Build Coastguard Worker REPX {pmulhrsw x, m10}, m8, m6, m0, m2 3587*c0909341SAndroid Build Coastguard Worker REPX {paddsw x, x }, m8, m6, m0, m2 3588*c0909341SAndroid Build Coastguard Worker REPX {vpermq x, x, q3120}, m8, m6, m0, m2 3589*c0909341SAndroid Build Coastguard Worker pmulhrsw m4, m11, m8 3590*c0909341SAndroid Build Coastguard Worker pmulhrsw m9, m11, m6 3591*c0909341SAndroid Build Coastguard Worker paddsw m8, m8 3592*c0909341SAndroid Build Coastguard Worker paddsw m6, m6 3593*c0909341SAndroid Build Coastguard Worker REPX {mova [cq+64*x], m14}, 0, 1, 2, 3 3594*c0909341SAndroid Build Coastguard Worker paddsw m8, m4 3595*c0909341SAndroid Build Coastguard Worker paddsw m6, m9 3596*c0909341SAndroid Build Coastguard Worker pmulhrsw m4, m11, m0 3597*c0909341SAndroid Build Coastguard Worker pmulhrsw m9, m11, m2 3598*c0909341SAndroid Build Coastguard Worker paddsw m0, m0 3599*c0909341SAndroid Build Coastguard Worker paddsw m2, m2 3600*c0909341SAndroid Build Coastguard Worker REPX {mova [cq+64*x], m14}, 8, 9, 10, 11 3601*c0909341SAndroid Build Coastguard Worker paddsw m0, m4 3602*c0909341SAndroid Build Coastguard Worker paddsw m2, m9 3603*c0909341SAndroid Build Coastguard Worker punpcklwd m4, m8, m6 3604*c0909341SAndroid Build Coastguard Worker punpckhwd m8, m6 3605*c0909341SAndroid Build Coastguard Worker punpcklwd m6, m0, m2 3606*c0909341SAndroid Build Coastguard Worker punpckhwd m0, m2 3607*c0909341SAndroid Build Coastguard Worker punpckldq m2, m4, m6 ; 0 1 3608*c0909341SAndroid Build Coastguard Worker punpckhdq m4, m6 ; 2 3 3609*c0909341SAndroid Build Coastguard Worker punpckldq m6, m8, m0 ; 4 5 3610*c0909341SAndroid Build Coastguard Worker punpckhdq m8, m0 ; 6 7 3611*c0909341SAndroid Build Coastguard Worker ret 3612*c0909341SAndroid Build Coastguard Worker 3613*c0909341SAndroid Build Coastguard Workercglobal inv_txfm_add_dct_dct_32x32_10bpc, 4, 7, 0, dst, stride, c, eob 3614*c0909341SAndroid Build Coastguard Worker%undef cmp 3615*c0909341SAndroid Build Coastguard Worker lea r5, [o_base] 3616*c0909341SAndroid Build Coastguard Worker test eobd, eobd 3617*c0909341SAndroid Build Coastguard Worker jz .dconly 3618*c0909341SAndroid Build Coastguard Worker vpbroadcastd m12, [o(pd_2896)] 3619*c0909341SAndroid Build Coastguard Worker vpbroadcastd m13, [o(pd_2048)] 3620*c0909341SAndroid Build Coastguard Worker vpbroadcastd m14, [o(clip_18b_min)] 3621*c0909341SAndroid Build Coastguard Worker vpbroadcastd m15, [o(clip_18b_max)] 3622*c0909341SAndroid Build Coastguard Worker WIN64_SPILL_XMM 30 3623*c0909341SAndroid Build Coastguard Worker cmp eobd, 136 3624*c0909341SAndroid Build Coastguard Worker jl .fast 3625*c0909341SAndroid Build Coastguard Worker add cq, 64 3626*c0909341SAndroid Build Coastguard Worker cmp eobd, 543 3627*c0909341SAndroid Build Coastguard Worker jge .full 3628*c0909341SAndroid Build Coastguard Worker call .pass1_fast ; bottomright 16x16 zero 3629*c0909341SAndroid Build Coastguard Worker mov r6d, 16*12 3630*c0909341SAndroid Build Coastguard Worker jmp .lefthalf 3631*c0909341SAndroid Build Coastguard Worker.full: 3632*c0909341SAndroid Build Coastguard Worker call .pass1 3633*c0909341SAndroid Build Coastguard Worker mov r6d, 16*28 3634*c0909341SAndroid Build Coastguard Worker.lefthalf: 3635*c0909341SAndroid Build Coastguard Worker mova [cq+128* 0], m0 3636*c0909341SAndroid Build Coastguard Worker mova [cq+128* 1], m1 3637*c0909341SAndroid Build Coastguard Worker mova [cq+128* 2], m2 3638*c0909341SAndroid Build Coastguard Worker mova [cq+128* 3], m3 3639*c0909341SAndroid Build Coastguard Worker mova [cq+128* 4], m14 3640*c0909341SAndroid Build Coastguard Worker mova [cq+128* 5], m15 3641*c0909341SAndroid Build Coastguard Worker mova [cq+128* 6], m16 3642*c0909341SAndroid Build Coastguard Worker mova [cq+128* 7], m17 3643*c0909341SAndroid Build Coastguard Worker mova [cq+128* 8], m22 3644*c0909341SAndroid Build Coastguard Worker mova [cq+128* 9], m23 3645*c0909341SAndroid Build Coastguard Worker mova [cq+128*10], m24 3646*c0909341SAndroid Build Coastguard Worker mova [cq+128*11], m25 3647*c0909341SAndroid Build Coastguard Worker mova [cq+128*12], m26 3648*c0909341SAndroid Build Coastguard Worker mova [cq+128*13], m27 3649*c0909341SAndroid Build Coastguard Worker mova [cq+128*14], m28 3650*c0909341SAndroid Build Coastguard Worker mova [cq+128*15], m29 3651*c0909341SAndroid Build Coastguard Worker sub cq, 64 3652*c0909341SAndroid Build Coastguard Worker vpbroadcastd m12, [o(pd_2896)] 3653*c0909341SAndroid Build Coastguard Worker vpbroadcastd m13, [o(pd_2048)] 3654*c0909341SAndroid Build Coastguard Worker vpbroadcastd m14, [o(clip_18b_min)] 3655*c0909341SAndroid Build Coastguard Worker vpbroadcastd m15, [o(clip_18b_max)] 3656*c0909341SAndroid Build Coastguard Worker call .pass1 3657*c0909341SAndroid Build Coastguard Worker lea r5, [o_base_8bpc] 3658*c0909341SAndroid Build Coastguard Worker call .pass2_start 3659*c0909341SAndroid Build Coastguard Worker pxor m12, m12 3660*c0909341SAndroid Build Coastguard Worker.right_zero_loop: 3661*c0909341SAndroid Build Coastguard Worker mova [cq+r6*8+64+128*3], m12 3662*c0909341SAndroid Build Coastguard Worker mova [cq+r6*8+64+128*2], m12 3663*c0909341SAndroid Build Coastguard Worker mova [cq+r6*8+64+128*1], m12 3664*c0909341SAndroid Build Coastguard Worker mova [cq+r6*8+64+128*0], m12 3665*c0909341SAndroid Build Coastguard Worker sub r6d, 16*4 3666*c0909341SAndroid Build Coastguard Worker jge .right_zero_loop 3667*c0909341SAndroid Build Coastguard Worker mov r6d, 16*28 3668*c0909341SAndroid Build Coastguard Worker jmp .end2 3669*c0909341SAndroid Build Coastguard Worker.pass2_start: 3670*c0909341SAndroid Build Coastguard Worker mova m4, [cq+64+128* 0] 3671*c0909341SAndroid Build Coastguard Worker mova m5, [cq+64+128* 1] 3672*c0909341SAndroid Build Coastguard Worker mova m6, [cq+64+128* 2] 3673*c0909341SAndroid Build Coastguard Worker mova m7, [cq+64+128* 3] 3674*c0909341SAndroid Build Coastguard Worker mova m18, [cq+64+128* 4] 3675*c0909341SAndroid Build Coastguard Worker mova m19, [cq+64+128* 5] 3676*c0909341SAndroid Build Coastguard Worker mova m20, [cq+64+128* 6] 3677*c0909341SAndroid Build Coastguard Worker mova m21, [cq+64+128* 7] 3678*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_32x8_8bpc).main 3679*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_32x16_8bpc).main_oddhalf 3680*c0909341SAndroid Build Coastguard Worker mova [cq+128*0], m14 3681*c0909341SAndroid Build Coastguard Worker mova [cq+128*1], m15 3682*c0909341SAndroid Build Coastguard Worker mova [cq+128*2], m16 3683*c0909341SAndroid Build Coastguard Worker mova [cq+128*3], m17 3684*c0909341SAndroid Build Coastguard Worker mova [cq+128*4], m18 3685*c0909341SAndroid Build Coastguard Worker mova [cq+128*5], m19 3686*c0909341SAndroid Build Coastguard Worker mova [cq+128*6], m20 3687*c0909341SAndroid Build Coastguard Worker mova [cq+128*7], m21 3688*c0909341SAndroid Build Coastguard Worker mova m14, [cq+64+128* 8] 3689*c0909341SAndroid Build Coastguard Worker mova m15, [cq+64+128* 9] 3690*c0909341SAndroid Build Coastguard Worker mova m16, [cq+64+128*10] 3691*c0909341SAndroid Build Coastguard Worker mova m17, [cq+64+128*11] 3692*c0909341SAndroid Build Coastguard Worker mova m18, [cq+64+128*12] 3693*c0909341SAndroid Build Coastguard Worker mova m19, [cq+64+128*13] 3694*c0909341SAndroid Build Coastguard Worker mova m20, [cq+64+128*14] 3695*c0909341SAndroid Build Coastguard Worker mova m21, [cq+64+128*15] 3696*c0909341SAndroid Build Coastguard Worker jmp m(inv_txfm_add_dct_dct_32x32_8bpc).main_oddhalf 3697*c0909341SAndroid Build Coastguard Worker.fast: ; topleft 16x16 nonzero 3698*c0909341SAndroid Build Coastguard Worker cmp eobd, 36 3699*c0909341SAndroid Build Coastguard Worker jl .fast2 3700*c0909341SAndroid Build Coastguard Worker call .pass1_fast 3701*c0909341SAndroid Build Coastguard Worker lea r5, [o_base_8bpc] 3702*c0909341SAndroid Build Coastguard Worker call .pass2_fast_start 3703*c0909341SAndroid Build Coastguard Worker jmp .end 3704*c0909341SAndroid Build Coastguard Worker.pass2_fast_start: 3705*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_32x16_8bpc).main_oddhalf_fast 3706*c0909341SAndroid Build Coastguard Worker mova [cq+128*0], m14 3707*c0909341SAndroid Build Coastguard Worker mova [cq+128*1], m15 3708*c0909341SAndroid Build Coastguard Worker mova [cq+128*2], m16 3709*c0909341SAndroid Build Coastguard Worker mova [cq+128*3], m17 3710*c0909341SAndroid Build Coastguard Worker mova [cq+128*4], m18 3711*c0909341SAndroid Build Coastguard Worker mova [cq+128*5], m19 3712*c0909341SAndroid Build Coastguard Worker mova [cq+128*6], m20 3713*c0909341SAndroid Build Coastguard Worker mova [cq+128*7], m21 3714*c0909341SAndroid Build Coastguard Worker jmp m(inv_txfm_add_dct_dct_32x32_8bpc).main_oddhalf_fast 3715*c0909341SAndroid Build Coastguard Worker.fast2: ; topleft 8x8 nonzero 3716*c0909341SAndroid Build Coastguard Worker movshdup m7, [o(permB)] 3717*c0909341SAndroid Build Coastguard Worker mova ym0, [cq+128*0] 3718*c0909341SAndroid Build Coastguard Worker mova ym1, [cq+128*4] 3719*c0909341SAndroid Build Coastguard Worker mova ym4, [cq+128*2] 3720*c0909341SAndroid Build Coastguard Worker mova ym5, [cq+128*6] 3721*c0909341SAndroid Build Coastguard Worker mova ym16, [cq+128*1] 3722*c0909341SAndroid Build Coastguard Worker mova ym2, [cq+128*5] 3723*c0909341SAndroid Build Coastguard Worker mova ym3, [cq+128*3] 3724*c0909341SAndroid Build Coastguard Worker mova ym17, [cq+128*7] 3725*c0909341SAndroid Build Coastguard Worker mov r6d, 16*4 3726*c0909341SAndroid Build Coastguard Worker vpermq m0, m7, m0 ; 0 0 3727*c0909341SAndroid Build Coastguard Worker vpermq m1, m7, m1 ; 4 4 3728*c0909341SAndroid Build Coastguard Worker vpermt2q m4, m7, m5 ; 2 6 3729*c0909341SAndroid Build Coastguard Worker vpermt2q m16, m7, m2 ; 1 5 3730*c0909341SAndroid Build Coastguard Worker vpermt2q m17, m7, m3 ; 7 3 3731*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_32x8_10bpc).main_fast2 3732*c0909341SAndroid Build Coastguard Worker call m(idct_16x16_internal_10bpc).main_end 3733*c0909341SAndroid Build Coastguard Worker call .pass2_fast2_start 3734*c0909341SAndroid Build Coastguard Worker.end: 3735*c0909341SAndroid Build Coastguard Worker pxor m12, m12 3736*c0909341SAndroid Build Coastguard Worker.end2: 3737*c0909341SAndroid Build Coastguard Worker call .pass2_end 3738*c0909341SAndroid Build Coastguard Worker.zero_loop: 3739*c0909341SAndroid Build Coastguard Worker mova [cq+r6*8+128*3], m12 3740*c0909341SAndroid Build Coastguard Worker mova [cq+r6*8+128*2], m12 3741*c0909341SAndroid Build Coastguard Worker mova [cq+r6*8+128*1], m12 3742*c0909341SAndroid Build Coastguard Worker mova [cq+r6*8+128*0], m12 3743*c0909341SAndroid Build Coastguard Worker sub r6d, 16*4 3744*c0909341SAndroid Build Coastguard Worker jge .zero_loop 3745*c0909341SAndroid Build Coastguard Worker WIN64_RESTORE_XMM 3746*c0909341SAndroid Build Coastguard Worker vzeroupper 3747*c0909341SAndroid Build Coastguard Worker ret 3748*c0909341SAndroid Build Coastguard Worker.pass2_fast2_start: 3749*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_32x8_10bpc).transpose_8x32 3750*c0909341SAndroid Build Coastguard Worker lea r5, [o_base_8bpc] 3751*c0909341SAndroid Build Coastguard Worker punpckhqdq m22, m0, m2 ; 1 3752*c0909341SAndroid Build Coastguard Worker punpcklqdq m0, m2 ; 0 3753*c0909341SAndroid Build Coastguard Worker punpcklqdq m1, m5, m7 ; 4 3754*c0909341SAndroid Build Coastguard Worker punpckhqdq m24, m5, m7 ; 5 3755*c0909341SAndroid Build Coastguard Worker punpcklqdq m14, m3, m4 ; 2 3756*c0909341SAndroid Build Coastguard Worker punpckhqdq m23, m3, m4 ; 3 3757*c0909341SAndroid Build Coastguard Worker punpcklqdq m15, m6, m8 ; 6 3758*c0909341SAndroid Build Coastguard Worker punpckhqdq m25, m6, m8 ; 7 3759*c0909341SAndroid Build Coastguard Worker mova m10, m13 3760*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_32x16_8bpc).main_oddhalf_fast2 3761*c0909341SAndroid Build Coastguard Worker mova [cq+128*0], m14 3762*c0909341SAndroid Build Coastguard Worker mova [cq+128*1], m15 3763*c0909341SAndroid Build Coastguard Worker mova [cq+128*2], m16 3764*c0909341SAndroid Build Coastguard Worker mova [cq+128*3], m17 3765*c0909341SAndroid Build Coastguard Worker mova [cq+128*4], m18 3766*c0909341SAndroid Build Coastguard Worker mova [cq+128*5], m19 3767*c0909341SAndroid Build Coastguard Worker mova [cq+128*6], m20 3768*c0909341SAndroid Build Coastguard Worker mova [cq+128*7], m21 3769*c0909341SAndroid Build Coastguard Worker jmp m(inv_txfm_add_dct_dct_32x32_8bpc).main_oddhalf_fast2 3770*c0909341SAndroid Build Coastguard Worker.pass2_end: 3771*c0909341SAndroid Build Coastguard Worker psubsw m9, m0, m29 ; out31 3772*c0909341SAndroid Build Coastguard Worker paddsw m0, m29 ; out0 3773*c0909341SAndroid Build Coastguard Worker psubsw m29, m1, m28 ; out30 3774*c0909341SAndroid Build Coastguard Worker paddsw m1, m28 ; out1 3775*c0909341SAndroid Build Coastguard Worker psubsw m28, m2, m27 ; out29 3776*c0909341SAndroid Build Coastguard Worker paddsw m2, m27 ; out2 3777*c0909341SAndroid Build Coastguard Worker psubsw m27, m3, m26 ; out28 3778*c0909341SAndroid Build Coastguard Worker paddsw m3, m26 ; out3 3779*c0909341SAndroid Build Coastguard Worker psubsw m26, m4, m25 ; out27 3780*c0909341SAndroid Build Coastguard Worker paddsw m4, m25 ; out4 3781*c0909341SAndroid Build Coastguard Worker psubsw m25, m5, m24 ; out26 3782*c0909341SAndroid Build Coastguard Worker paddsw m5, m24 ; out5 3783*c0909341SAndroid Build Coastguard Worker psubsw m24, m6, m23 ; out25 3784*c0909341SAndroid Build Coastguard Worker paddsw m6, m23 ; out6 3785*c0909341SAndroid Build Coastguard Worker psubsw m23, m7, m22 ; out24 3786*c0909341SAndroid Build Coastguard Worker paddsw m7, m22 ; out7 3787*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_32x8_10bpc).write_32x8_start 3788*c0909341SAndroid Build Coastguard Worker mova m0, [cq+128*0] 3789*c0909341SAndroid Build Coastguard Worker mova m1, [cq+128*1] 3790*c0909341SAndroid Build Coastguard Worker mova m2, [cq+128*2] 3791*c0909341SAndroid Build Coastguard Worker mova m3, [cq+128*3] 3792*c0909341SAndroid Build Coastguard Worker mova m4, [cq+128*4] 3793*c0909341SAndroid Build Coastguard Worker mova m5, [cq+128*5] 3794*c0909341SAndroid Build Coastguard Worker mova m6, [cq+128*6] 3795*c0909341SAndroid Build Coastguard Worker mova m7, [cq+128*7] 3796*c0909341SAndroid Build Coastguard Worker psubsw m22, m0, m21 ; out23 3797*c0909341SAndroid Build Coastguard Worker paddsw m0, m21 ; out8 3798*c0909341SAndroid Build Coastguard Worker psubsw m21, m1, m20 ; out22 3799*c0909341SAndroid Build Coastguard Worker paddsw m1, m20 ; out9 3800*c0909341SAndroid Build Coastguard Worker psubsw m20, m2, m19 ; out21 3801*c0909341SAndroid Build Coastguard Worker paddsw m2, m19 ; out10 3802*c0909341SAndroid Build Coastguard Worker psubsw m19, m3, m18 ; out20 3803*c0909341SAndroid Build Coastguard Worker paddsw m3, m18 ; out11 3804*c0909341SAndroid Build Coastguard Worker psubsw m18, m4, m17 ; out19 3805*c0909341SAndroid Build Coastguard Worker paddsw m4, m17 ; out12 3806*c0909341SAndroid Build Coastguard Worker psubsw m17, m5, m16 ; out18 3807*c0909341SAndroid Build Coastguard Worker paddsw m5, m16 ; out13 3808*c0909341SAndroid Build Coastguard Worker psubsw m16, m6, m15 ; out17 3809*c0909341SAndroid Build Coastguard Worker paddsw m6, m15 ; out14 3810*c0909341SAndroid Build Coastguard Worker psubsw m15, m7, m14 ; out16 3811*c0909341SAndroid Build Coastguard Worker paddsw m7, m14 ; out15 3812*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_32x8_10bpc).write_32x8 3813*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m11, m15 3814*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m11, m16 3815*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m11, m17 3816*c0909341SAndroid Build Coastguard Worker pmulhrsw m3, m11, m18 3817*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_32x8_10bpc).write_32x4 3818*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m11, m19 3819*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m11, m20 3820*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m11, m21 3821*c0909341SAndroid Build Coastguard Worker pmulhrsw m3, m11, m22 3822*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_32x8_10bpc).write_32x4 3823*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m11, m23 3824*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m11, m24 3825*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m11, m25 3826*c0909341SAndroid Build Coastguard Worker pmulhrsw m3, m11, m26 3827*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_32x8_10bpc).write_32x4 3828*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m11, m27 3829*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m11, m28 3830*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m11, m29 3831*c0909341SAndroid Build Coastguard Worker pmulhrsw m3, m11, m9 3832*c0909341SAndroid Build Coastguard Worker jmp m(inv_txfm_add_dct_dct_32x8_10bpc).write_32x4 3833*c0909341SAndroid Build Coastguard Worker.dconly: 3834*c0909341SAndroid Build Coastguard Worker imul r6d, [cq], 181 3835*c0909341SAndroid Build Coastguard Worker mov [cq], eobd 3836*c0909341SAndroid Build Coastguard Worker or r3d, 32 3837*c0909341SAndroid Build Coastguard Worker add r6d, 640 3838*c0909341SAndroid Build Coastguard Worker sar r6d, 10 3839*c0909341SAndroid Build Coastguard Worker jmp m(inv_txfm_add_dct_dct_32x16_10bpc).dconly2 3840*c0909341SAndroid Build Coastguard Worker.pass1_fast: 3841*c0909341SAndroid Build Coastguard Worker mova m0, [cq+128* 0] 3842*c0909341SAndroid Build Coastguard Worker mova m1, [cq+128* 4] 3843*c0909341SAndroid Build Coastguard Worker mova m2, [cq+128* 8] 3844*c0909341SAndroid Build Coastguard Worker mova m3, [cq+128*12] 3845*c0909341SAndroid Build Coastguard Worker mov r6d, 16*12 3846*c0909341SAndroid Build Coastguard Worker call m(idct_8x16_internal_10bpc).main_fast 3847*c0909341SAndroid Build Coastguard Worker mova m16, [cq+128* 2] 3848*c0909341SAndroid Build Coastguard Worker mova m17, [cq+128* 6] 3849*c0909341SAndroid Build Coastguard Worker mova m18, [cq+128*10] 3850*c0909341SAndroid Build Coastguard Worker mova m19, [cq+128*14] 3851*c0909341SAndroid Build Coastguard Worker call m(idct_16x16_internal_10bpc).main_fast 3852*c0909341SAndroid Build Coastguard Worker call .pass1_load_spill 3853*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_32x16_10bpc).main_fast 3854*c0909341SAndroid Build Coastguard Worker jmp .pass1_end 3855*c0909341SAndroid Build Coastguard Worker.pass1: 3856*c0909341SAndroid Build Coastguard Worker mova m0, [cq+128* 0] 3857*c0909341SAndroid Build Coastguard Worker mova m1, [cq+128* 4] 3858*c0909341SAndroid Build Coastguard Worker mova m2, [cq+128* 8] 3859*c0909341SAndroid Build Coastguard Worker mova m3, [cq+128*12] 3860*c0909341SAndroid Build Coastguard Worker mova m4, [cq+128*16] 3861*c0909341SAndroid Build Coastguard Worker mova m5, [cq+128*20] 3862*c0909341SAndroid Build Coastguard Worker mova m6, [cq+128*24] 3863*c0909341SAndroid Build Coastguard Worker mova m7, [cq+128*28] 3864*c0909341SAndroid Build Coastguard Worker call m(idct_8x16_internal_10bpc).main 3865*c0909341SAndroid Build Coastguard Worker mova m16, [cq+128* 2] 3866*c0909341SAndroid Build Coastguard Worker mova m17, [cq+128* 6] 3867*c0909341SAndroid Build Coastguard Worker mova m18, [cq+128*10] 3868*c0909341SAndroid Build Coastguard Worker mova m19, [cq+128*14] 3869*c0909341SAndroid Build Coastguard Worker mova m20, [cq+128*18] 3870*c0909341SAndroid Build Coastguard Worker mova m21, [cq+128*22] 3871*c0909341SAndroid Build Coastguard Worker mova m22, [cq+128*26] 3872*c0909341SAndroid Build Coastguard Worker mova m23, [cq+128*30] 3873*c0909341SAndroid Build Coastguard Worker call m(idct_16x16_internal_10bpc).main 3874*c0909341SAndroid Build Coastguard Worker call .pass1_load_spill 3875*c0909341SAndroid Build Coastguard Worker mova m16, [cq+128*17] 3876*c0909341SAndroid Build Coastguard Worker mova m17, [cq+128*19] 3877*c0909341SAndroid Build Coastguard Worker mova m18, [cq+128*21] 3878*c0909341SAndroid Build Coastguard Worker mova m19, [cq+128*23] 3879*c0909341SAndroid Build Coastguard Worker mova m20, [cq+128*25] 3880*c0909341SAndroid Build Coastguard Worker mova m21, [cq+128*27] 3881*c0909341SAndroid Build Coastguard Worker mova m22, [cq+128*29] 3882*c0909341SAndroid Build Coastguard Worker mova m23, [cq+128*31] 3883*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_32x16_10bpc).main 3884*c0909341SAndroid Build Coastguard Worker.pass1_end: 3885*c0909341SAndroid Build Coastguard Worker vpbroadcastd m11, [o(pd_2)] 3886*c0909341SAndroid Build Coastguard Worker lea r4, [cq+128*8] 3887*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_32x16_10bpc).idct32_pass1_end 3888*c0909341SAndroid Build Coastguard Worker punpckhqdq m22, m0, m20 ; 1 3889*c0909341SAndroid Build Coastguard Worker punpcklqdq m0, m20 ; 0 3890*c0909341SAndroid Build Coastguard Worker punpckhqdq m24, m2, m1 ; 5 3891*c0909341SAndroid Build Coastguard Worker punpcklqdq m1, m2, m1 ; 4 3892*c0909341SAndroid Build Coastguard Worker punpcklqdq m2, m14, m18 ; 8 3893*c0909341SAndroid Build Coastguard Worker punpckhqdq m26, m14, m18 ; 9 3894*c0909341SAndroid Build Coastguard Worker punpcklqdq m14, m15, m4 ; 2 3895*c0909341SAndroid Build Coastguard Worker punpckhqdq m23, m15, m4 ; 3 3896*c0909341SAndroid Build Coastguard Worker punpckhqdq m25, m3, m21 ; 7 3897*c0909341SAndroid Build Coastguard Worker punpcklqdq m15, m3, m21 ; 6 3898*c0909341SAndroid Build Coastguard Worker punpckhqdq m28, m6, m17 ; 13 3899*c0909341SAndroid Build Coastguard Worker punpcklqdq m3, m6, m17 ; 12 3900*c0909341SAndroid Build Coastguard Worker punpckhqdq m27, m5, m16 ; 11 3901*c0909341SAndroid Build Coastguard Worker punpcklqdq m16, m5, m16 ; 10 3902*c0909341SAndroid Build Coastguard Worker punpckhqdq m29, m7, m8 ; 15 3903*c0909341SAndroid Build Coastguard Worker punpcklqdq m17, m7, m8 ; 14 3904*c0909341SAndroid Build Coastguard Worker ret 3905*c0909341SAndroid Build Coastguard Worker.pass1_load_spill: 3906*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_32x16_10bpc).idct16_sumsub 3907*c0909341SAndroid Build Coastguard Worker mova [cq+128* 0], m0 3908*c0909341SAndroid Build Coastguard Worker mova m0, [cq+128* 1] 3909*c0909341SAndroid Build Coastguard Worker mova [cq+128* 1], m1 3910*c0909341SAndroid Build Coastguard Worker mova [cq+128* 2], m2 3911*c0909341SAndroid Build Coastguard Worker mova m1, [cq+128* 3] 3912*c0909341SAndroid Build Coastguard Worker mova m2, [cq+128* 5] 3913*c0909341SAndroid Build Coastguard Worker mova [cq+128* 3], m3 3914*c0909341SAndroid Build Coastguard Worker mova [cq+128* 4], m4 3915*c0909341SAndroid Build Coastguard Worker mova m3, [cq+128* 7] 3916*c0909341SAndroid Build Coastguard Worker mova m4, [cq+128* 9] 3917*c0909341SAndroid Build Coastguard Worker mova [cq+128* 5], m5 3918*c0909341SAndroid Build Coastguard Worker mova [cq+128* 6], m6 3919*c0909341SAndroid Build Coastguard Worker mova [cq+128* 7], m7 3920*c0909341SAndroid Build Coastguard Worker mova m5, [cq+128*11] 3921*c0909341SAndroid Build Coastguard Worker mova m6, [cq+128*13] 3922*c0909341SAndroid Build Coastguard Worker mova m7, [cq+128*15] 3923*c0909341SAndroid Build Coastguard Worker mova [cq+128* 8], m23 3924*c0909341SAndroid Build Coastguard Worker mova [cq+128* 9], m22 3925*c0909341SAndroid Build Coastguard Worker mova [cq+128*10], m21 3926*c0909341SAndroid Build Coastguard Worker mova [cq+128*11], m20 3927*c0909341SAndroid Build Coastguard Worker mova [cq+128*12], m19 3928*c0909341SAndroid Build Coastguard Worker mova [cq+128*13], m18 3929*c0909341SAndroid Build Coastguard Worker mova [cq+128*14], m17 3930*c0909341SAndroid Build Coastguard Worker mova [cq+128*15], m16 3931*c0909341SAndroid Build Coastguard Worker ret 3932*c0909341SAndroid Build Coastguard Worker 3933*c0909341SAndroid Build Coastguard Workercglobal inv_txfm_add_identity_identity_32x32_10bpc, 4, 7, 16, dst, stride, c, eob 3934*c0909341SAndroid Build Coastguard Worker%undef cmp 3935*c0909341SAndroid Build Coastguard Worker vpbroadcastd m13, [pw_8192] 3936*c0909341SAndroid Build Coastguard Worker vpbroadcastd m15, [pixel_10bpc_max] 3937*c0909341SAndroid Build Coastguard Worker pxor m14, m14 3938*c0909341SAndroid Build Coastguard Worker lea r6, [strideq*9] 3939*c0909341SAndroid Build Coastguard Worker cmp eobd, 136 3940*c0909341SAndroid Build Coastguard Worker jl .main 3941*c0909341SAndroid Build Coastguard Worker mov r4, dstq 3942*c0909341SAndroid Build Coastguard Worker call .main 3943*c0909341SAndroid Build Coastguard Worker add cq, 64-128*4 3944*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+strideq*8] 3945*c0909341SAndroid Build Coastguard Worker call .main 3946*c0909341SAndroid Build Coastguard Worker add cq, 128*12-64 3947*c0909341SAndroid Build Coastguard Worker lea dstq, [r4+32] 3948*c0909341SAndroid Build Coastguard Worker cmp eobd, 543 3949*c0909341SAndroid Build Coastguard Worker jl .main 3950*c0909341SAndroid Build Coastguard Worker call .main 3951*c0909341SAndroid Build Coastguard Worker add cq, 64-128*4 3952*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+strideq*8] 3953*c0909341SAndroid Build Coastguard Worker.main: 3954*c0909341SAndroid Build Coastguard Worker call .main_internal 3955*c0909341SAndroid Build Coastguard Worker add cq, 128*4 3956*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m13, m2 3957*c0909341SAndroid Build Coastguard Worker pmulhrsw m3, m13, m4 3958*c0909341SAndroid Build Coastguard Worker pmulhrsw m5, m13, m6 3959*c0909341SAndroid Build Coastguard Worker pmulhrsw m7, m13, m8 3960*c0909341SAndroid Build Coastguard Worker call .main_internal 3961*c0909341SAndroid Build Coastguard Worker jmp m(inv_txfm_add_identity_identity_16x32_10bpc).main2 3962*c0909341SAndroid Build Coastguard Worker.main_internal: 3963*c0909341SAndroid Build Coastguard Worker mova m8, [cq+128* 0] 3964*c0909341SAndroid Build Coastguard Worker packssdw m8, [cq+128* 8] 3965*c0909341SAndroid Build Coastguard Worker mova m6, [cq+128* 1] 3966*c0909341SAndroid Build Coastguard Worker packssdw m6, [cq+128* 9] 3967*c0909341SAndroid Build Coastguard Worker mova m0, [cq+128* 2] 3968*c0909341SAndroid Build Coastguard Worker packssdw m0, [cq+128*10] 3969*c0909341SAndroid Build Coastguard Worker mova m2, [cq+128* 3] 3970*c0909341SAndroid Build Coastguard Worker packssdw m2, [cq+128*11] 3971*c0909341SAndroid Build Coastguard Worker REPX {vpermq x, x, q3120}, m8, m6, m0, m2 3972*c0909341SAndroid Build Coastguard Worker REPX {mova [cq+128*x], m14}, 0, 1, 2, 3 3973*c0909341SAndroid Build Coastguard Worker punpcklwd m4, m8, m6 3974*c0909341SAndroid Build Coastguard Worker punpckhwd m8, m6 3975*c0909341SAndroid Build Coastguard Worker punpcklwd m6, m0, m2 3976*c0909341SAndroid Build Coastguard Worker punpckhwd m0, m2 3977*c0909341SAndroid Build Coastguard Worker REPX {mova [cq+128*x], m14}, 8, 9, 10, 11 3978*c0909341SAndroid Build Coastguard Worker punpckldq m2, m4, m6 ; 0 1 3979*c0909341SAndroid Build Coastguard Worker punpckhdq m4, m6 ; 2 3 3980*c0909341SAndroid Build Coastguard Worker punpckldq m6, m8, m0 ; 4 5 3981*c0909341SAndroid Build Coastguard Worker punpckhdq m8, m0 ; 6 7 3982*c0909341SAndroid Build Coastguard Worker ret 3983*c0909341SAndroid Build Coastguard Worker 3984*c0909341SAndroid Build Coastguard Workercglobal inv_txfm_add_dct_dct_16x64_10bpc, 4, 7, 0, dst, stride, c, eob 3985*c0909341SAndroid Build Coastguard Worker lea r5, [o_base] 3986*c0909341SAndroid Build Coastguard Worker test eobd, eobd 3987*c0909341SAndroid Build Coastguard Worker jz .dconly 3988*c0909341SAndroid Build Coastguard Worker 3989*c0909341SAndroid Build Coastguard Worker PROLOGUE 4, 7, 32, -8*mmsize, dst, stride, c, eob 3990*c0909341SAndroid Build Coastguard Worker%undef cmp 3991*c0909341SAndroid Build Coastguard Worker vpbroadcastd m12, [o(pd_2896)] 3992*c0909341SAndroid Build Coastguard Worker vpbroadcastd m13, [o(pd_2048)] 3993*c0909341SAndroid Build Coastguard Worker vpbroadcastd m14, [o(clip_18b_min)] 3994*c0909341SAndroid Build Coastguard Worker vpbroadcastd m15, [o(clip_18b_max)] 3995*c0909341SAndroid Build Coastguard Worker cmp eobd, 36 3996*c0909341SAndroid Build Coastguard Worker jl .fast 3997*c0909341SAndroid Build Coastguard Worker call .pass1 3998*c0909341SAndroid Build Coastguard Worker cmp eobd, 151 3999*c0909341SAndroid Build Coastguard Worker jge .full 4000*c0909341SAndroid Build Coastguard Worker lea r5, [o_base_8bpc] 4001*c0909341SAndroid Build Coastguard Worker 4002*c0909341SAndroid Build Coastguard Worker punpckhwd m22, m0, m0 4003*c0909341SAndroid Build Coastguard Worker punpckhwd m23, m1, m1 4004*c0909341SAndroid Build Coastguard Worker punpckhwd m24, m2, m2 4005*c0909341SAndroid Build Coastguard Worker punpckhwd m25, m3, m3 4006*c0909341SAndroid Build Coastguard Worker punpckhwd m26, m4, m4 4007*c0909341SAndroid Build Coastguard Worker punpckhwd m27, m5, m5 4008*c0909341SAndroid Build Coastguard Worker punpckhwd m28, m6, m6 4009*c0909341SAndroid Build Coastguard Worker punpckhwd m29, m7, m7 4010*c0909341SAndroid Build Coastguard Worker punpcklwd m21, m1, m1 4011*c0909341SAndroid Build Coastguard Worker punpcklwd m14, m3, m3 4012*c0909341SAndroid Build Coastguard Worker punpcklwd m18, m5, m5 4013*c0909341SAndroid Build Coastguard Worker punpcklwd m15, m7, m7 4014*c0909341SAndroid Build Coastguard Worker pxor m9, m9 4015*c0909341SAndroid Build Coastguard Worker punpcklwd m9, m9, m0 4016*c0909341SAndroid Build Coastguard Worker punpcklwd m8, m2, m2 4017*c0909341SAndroid Build Coastguard Worker punpcklwd m7, m4, m4 4018*c0909341SAndroid Build Coastguard Worker punpcklwd m1, m6, m6 4019*c0909341SAndroid Build Coastguard Worker call m(idct_16x16_internal_8bpc).main_fast2 4020*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_16x32_8bpc).main_oddhalf_fast2 4021*c0909341SAndroid Build Coastguard Worker mova [rsp+mmsize*0], m14 4022*c0909341SAndroid Build Coastguard Worker mova [rsp+mmsize*1], m15 4023*c0909341SAndroid Build Coastguard Worker mova [rsp+mmsize*2], m16 4024*c0909341SAndroid Build Coastguard Worker mova [rsp+mmsize*3], m17 4025*c0909341SAndroid Build Coastguard Worker mova [rsp+mmsize*4], m18 4026*c0909341SAndroid Build Coastguard Worker mova [rsp+mmsize*5], m19 4027*c0909341SAndroid Build Coastguard Worker mova [rsp+mmsize*6], m20 4028*c0909341SAndroid Build Coastguard Worker mova [rsp+mmsize*7], m21 4029*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_16x64_8bpc).main_oddhalf_fast 4030*c0909341SAndroid Build Coastguard Worker 4031*c0909341SAndroid Build Coastguard Worker pxor m12, m12 4032*c0909341SAndroid Build Coastguard Worker mov r3d, 64*3 4033*c0909341SAndroid Build Coastguard Worker.zero_loop: 4034*c0909341SAndroid Build Coastguard Worker REPX {mova [cq+r3*8+128*x], m12}, 0, 1, 2, 3 4035*c0909341SAndroid Build Coastguard Worker sub r3d, 64 4036*c0909341SAndroid Build Coastguard Worker jge .zero_loop 4037*c0909341SAndroid Build Coastguard Worker 4038*c0909341SAndroid Build Coastguard Worker jmp .pass2_end 4039*c0909341SAndroid Build Coastguard Worker.full: 4040*c0909341SAndroid Build Coastguard Worker mova [cq+128*0], m0 4041*c0909341SAndroid Build Coastguard Worker mova [cq+128*1], m1 4042*c0909341SAndroid Build Coastguard Worker mova [cq+128*2], m2 4043*c0909341SAndroid Build Coastguard Worker mova [cq+128*3], m3 4044*c0909341SAndroid Build Coastguard Worker mova [cq+128*4], m4 4045*c0909341SAndroid Build Coastguard Worker mova [cq+128*5], m5 4046*c0909341SAndroid Build Coastguard Worker mova [cq+128*6], m6 4047*c0909341SAndroid Build Coastguard Worker mova [cq+128*7], m7 4048*c0909341SAndroid Build Coastguard Worker add cq, 64 4049*c0909341SAndroid Build Coastguard Worker call .pass1 4050*c0909341SAndroid Build Coastguard Worker sub cq, 64 4051*c0909341SAndroid Build Coastguard Worker mova m22, [cq+128*0] ; 0 1 4052*c0909341SAndroid Build Coastguard Worker mova m23, [cq+128*1] ; 2 3 4053*c0909341SAndroid Build Coastguard Worker mova m24, [cq+128*2] ; 4 5 4054*c0909341SAndroid Build Coastguard Worker mova m25, [cq+128*3] ; 6 7 4055*c0909341SAndroid Build Coastguard Worker mova m26, [cq+128*4] ; 8 9 4056*c0909341SAndroid Build Coastguard Worker mova m27, [cq+128*5] ; 10 11 4057*c0909341SAndroid Build Coastguard Worker mova m28, [cq+128*6] ; 12 13 4058*c0909341SAndroid Build Coastguard Worker mova m29, [cq+128*7] ; 14 15 4059*c0909341SAndroid Build Coastguard Worker mova [cq+64* 8], m0 4060*c0909341SAndroid Build Coastguard Worker mova [cq+64* 9], m1 4061*c0909341SAndroid Build Coastguard Worker mova [cq+64*10], m2 4062*c0909341SAndroid Build Coastguard Worker mova [cq+64*11], m3 4063*c0909341SAndroid Build Coastguard Worker mova [cq+64*12], m4 4064*c0909341SAndroid Build Coastguard Worker mova [cq+64*13], m5 4065*c0909341SAndroid Build Coastguard Worker mova [cq+64*14], m6 4066*c0909341SAndroid Build Coastguard Worker mova [cq+64*15], m7 4067*c0909341SAndroid Build Coastguard Worker lea r5, [o_base_8bpc] 4068*c0909341SAndroid Build Coastguard Worker 4069*c0909341SAndroid Build Coastguard Worker punpcklwd m20, m1, m1 4070*c0909341SAndroid Build Coastguard Worker punpcklwd m16, m3, m3 4071*c0909341SAndroid Build Coastguard Worker punpcklwd m19, m5, m5 4072*c0909341SAndroid Build Coastguard Worker punpcklwd m17, m7, m7 4073*c0909341SAndroid Build Coastguard Worker punpcklwd m8, m24, m24 ; 4 4074*c0909341SAndroid Build Coastguard Worker punpcklwd m5, m2, m2 ; 20 4075*c0909341SAndroid Build Coastguard Worker punpcklwd m1, m28, m28 ; 12 4076*c0909341SAndroid Build Coastguard Worker punpcklwd m7, m26, m26 ; 8 4077*c0909341SAndroid Build Coastguard Worker punpcklwd m3, m4, m4 ; 24 4078*c0909341SAndroid Build Coastguard Worker punpcklwd m4, m6, m6 ; 28 4079*c0909341SAndroid Build Coastguard Worker pxor m9, m9 4080*c0909341SAndroid Build Coastguard Worker punpcklwd m6, m9, m0 ; __ 16 4081*c0909341SAndroid Build Coastguard Worker mova m0, m4 4082*c0909341SAndroid Build Coastguard Worker punpcklwd m9, m9, m22 ; __ 0 4083*c0909341SAndroid Build Coastguard Worker call m(idct_16x16_internal_8bpc).main_fast 4084*c0909341SAndroid Build Coastguard Worker punpcklwd m21, m23, m23 ; 2 4085*c0909341SAndroid Build Coastguard Worker punpcklwd m15, m29, m29 ; 14 4086*c0909341SAndroid Build Coastguard Worker punpcklwd m18, m27, m27 ; 10 4087*c0909341SAndroid Build Coastguard Worker punpcklwd m14, m25, m25 ; 6 4088*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_16x32_8bpc).main_oddhalf_fast 4089*c0909341SAndroid Build Coastguard Worker mova [rsp+mmsize*0], m14 4090*c0909341SAndroid Build Coastguard Worker mova [rsp+mmsize*1], m15 4091*c0909341SAndroid Build Coastguard Worker mova [rsp+mmsize*2], m16 4092*c0909341SAndroid Build Coastguard Worker mova [rsp+mmsize*3], m17 4093*c0909341SAndroid Build Coastguard Worker mova [rsp+mmsize*4], m18 4094*c0909341SAndroid Build Coastguard Worker mova [rsp+mmsize*5], m19 4095*c0909341SAndroid Build Coastguard Worker mova [rsp+mmsize*6], m20 4096*c0909341SAndroid Build Coastguard Worker mova [rsp+mmsize*7], m21 4097*c0909341SAndroid Build Coastguard Worker mova m21, [cq+64*15] 4098*c0909341SAndroid Build Coastguard Worker mova m14, [cq+64* 8] 4099*c0909341SAndroid Build Coastguard Worker mova m17, [cq+64*11] 4100*c0909341SAndroid Build Coastguard Worker mova m18, [cq+64*12] 4101*c0909341SAndroid Build Coastguard Worker mova m19, [cq+64*13] 4102*c0909341SAndroid Build Coastguard Worker mova m16, [cq+64*10] 4103*c0909341SAndroid Build Coastguard Worker mova m15, [cq+64* 9] 4104*c0909341SAndroid Build Coastguard Worker mova m20, [cq+64*14] 4105*c0909341SAndroid Build Coastguard Worker REPX {punpckhwd x, x}, m22, m21, m14, m29, m26, m17, m18, m25, \ 4106*c0909341SAndroid Build Coastguard Worker m24, m19, m16, m27, m28, m15, m20, m23 4107*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_16x64_8bpc).main_oddhalf 4108*c0909341SAndroid Build Coastguard Worker 4109*c0909341SAndroid Build Coastguard Worker pxor m12, m12 4110*c0909341SAndroid Build Coastguard Worker mov r3d, 32*7 4111*c0909341SAndroid Build Coastguard Worker.full_zero_loop: 4112*c0909341SAndroid Build Coastguard Worker REPX {mova [cq+r3*8+64*x], m12}, 0, 1, 2, 3 4113*c0909341SAndroid Build Coastguard Worker sub r3d, 32 4114*c0909341SAndroid Build Coastguard Worker jge .full_zero_loop 4115*c0909341SAndroid Build Coastguard Worker 4116*c0909341SAndroid Build Coastguard Worker jmp .pass2_end 4117*c0909341SAndroid Build Coastguard Worker.fast: 4118*c0909341SAndroid Build Coastguard Worker mova ym0, [cq+128*0] 4119*c0909341SAndroid Build Coastguard Worker mova ym2, [cq+128*4] 4120*c0909341SAndroid Build Coastguard Worker movshdup m8, [o(permB)] 4121*c0909341SAndroid Build Coastguard Worker mova ym1, [cq+128*2] 4122*c0909341SAndroid Build Coastguard Worker mova ym3, [cq+128*6] 4123*c0909341SAndroid Build Coastguard Worker mova ym4, [cq+128*1] 4124*c0909341SAndroid Build Coastguard Worker mova ym5, [cq+128*3] 4125*c0909341SAndroid Build Coastguard Worker mova ym6, [cq+128*5] 4126*c0909341SAndroid Build Coastguard Worker mova ym7, [cq+128*7] 4127*c0909341SAndroid Build Coastguard Worker vpermt2q m0, m8, m2 ; 0 4 4128*c0909341SAndroid Build Coastguard Worker vpermt2q m1, m8, m3 ; 2 6 4129*c0909341SAndroid Build Coastguard Worker vpermt2q m4, m8, m5 ; 1 3 4130*c0909341SAndroid Build Coastguard Worker vpermt2q m7, m8, m6 ; 7 5 4131*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_10bpc).main_fast 4132*c0909341SAndroid Build Coastguard Worker call m(idct_16x8_internal_10bpc).main_fast 4133*c0909341SAndroid Build Coastguard Worker vpbroadcastd m11, [o(pd_2)] 4134*c0909341SAndroid Build Coastguard Worker call m(idct_8x16_internal_10bpc).main_end2 4135*c0909341SAndroid Build Coastguard Worker mova m8, [o(idct8x32p)] 4136*c0909341SAndroid Build Coastguard Worker packssdw m0, m4 4137*c0909341SAndroid Build Coastguard Worker packssdw m1, m5 4138*c0909341SAndroid Build Coastguard Worker packssdw m2, m6 4139*c0909341SAndroid Build Coastguard Worker packssdw m3, m7 4140*c0909341SAndroid Build Coastguard Worker mova m6, [dup16_perm] 4141*c0909341SAndroid Build Coastguard Worker vpermb m0, m8, m0 4142*c0909341SAndroid Build Coastguard Worker vpermb m2, m8, m2 4143*c0909341SAndroid Build Coastguard Worker vprold m8, 16 4144*c0909341SAndroid Build Coastguard Worker vpermb m1, m8, m1 4145*c0909341SAndroid Build Coastguard Worker vpermb m3, m8, m3 4146*c0909341SAndroid Build Coastguard Worker punpckldq m4, m0, m2 4147*c0909341SAndroid Build Coastguard Worker punpckhdq m0, m2 4148*c0909341SAndroid Build Coastguard Worker punpckldq m2, m1, m3 4149*c0909341SAndroid Build Coastguard Worker punpckhdq m1, m3 4150*c0909341SAndroid Build Coastguard Worker punpckldq m21, m4, m2 4151*c0909341SAndroid Build Coastguard Worker punpckhdq m14, m4, m2 4152*c0909341SAndroid Build Coastguard Worker punpckldq m18, m0, m1 4153*c0909341SAndroid Build Coastguard Worker punpckhdq m15, m0, m1 4154*c0909341SAndroid Build Coastguard Worker vpord m7, m6, [o(pb_32)] {1to16} 4155*c0909341SAndroid Build Coastguard Worker vpermb m22, m7, m21 ; 1 4156*c0909341SAndroid Build Coastguard Worker pmovzxwd m9, ym21 ; 0 4157*c0909341SAndroid Build Coastguard Worker vpermb m8, m6, m18 ; 4 4158*c0909341SAndroid Build Coastguard Worker vpermb m24, m7, m18 ; 5 4159*c0909341SAndroid Build Coastguard Worker vpermb m21, m6, m14 ; 2 4160*c0909341SAndroid Build Coastguard Worker vpermb m23, m7, m14 ; 3 4161*c0909341SAndroid Build Coastguard Worker vpermb m14, m6, m15 ; 6 4162*c0909341SAndroid Build Coastguard Worker vpermb m25, m7, m15 ; 7 4163*c0909341SAndroid Build Coastguard Worker lea r5, [o_base_8bpc] 4164*c0909341SAndroid Build Coastguard Worker pslld m9, 16 4165*c0909341SAndroid Build Coastguard Worker 4166*c0909341SAndroid Build Coastguard Worker pxor m7, m7 4167*c0909341SAndroid Build Coastguard Worker REPX {mova x, m7}, m1, m18, m15, m26, m27, m28, m29 4168*c0909341SAndroid Build Coastguard Worker 4169*c0909341SAndroid Build Coastguard Worker call m(idct_16x16_internal_8bpc).main_fast2 4170*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_16x32_8bpc).main_oddhalf_fast2 4171*c0909341SAndroid Build Coastguard Worker mova [rsp+mmsize*0], m14 4172*c0909341SAndroid Build Coastguard Worker mova [rsp+mmsize*1], m15 4173*c0909341SAndroid Build Coastguard Worker mova [rsp+mmsize*2], m16 4174*c0909341SAndroid Build Coastguard Worker mova [rsp+mmsize*3], m17 4175*c0909341SAndroid Build Coastguard Worker mova [rsp+mmsize*4], m18 4176*c0909341SAndroid Build Coastguard Worker mova [rsp+mmsize*5], m19 4177*c0909341SAndroid Build Coastguard Worker mova [rsp+mmsize*6], m20 4178*c0909341SAndroid Build Coastguard Worker mova [rsp+mmsize*7], m21 4179*c0909341SAndroid Build Coastguard Worker 4180*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_16x64_8bpc).main_oddhalf_fast 4181*c0909341SAndroid Build Coastguard Worker 4182*c0909341SAndroid Build Coastguard Worker pxor m12, m12 4183*c0909341SAndroid Build Coastguard Worker REPX {mova [cq+128*x], ym12}, 0, 1, 2, 3, 4, 5, 6, 7 4184*c0909341SAndroid Build Coastguard Worker.pass2_end: 4185*c0909341SAndroid Build Coastguard Worker movshdup m30, [permC] 4186*c0909341SAndroid Build Coastguard Worker vpbroadcastd m11, [pw_2048] 4187*c0909341SAndroid Build Coastguard Worker vpbroadcastd m13, [pixel_10bpc_max] 4188*c0909341SAndroid Build Coastguard Worker lea r6, [strideq*3] 4189*c0909341SAndroid Build Coastguard Worker psrlq m31, m30, 8 4190*c0909341SAndroid Build Coastguard Worker vpermq m8, m30, m0 4191*c0909341SAndroid Build Coastguard Worker vpermq m9, m31, m1 4192*c0909341SAndroid Build Coastguard Worker call m(idct_16x8_internal_10bpc).write_16x4 4193*c0909341SAndroid Build Coastguard Worker vpermq m8, m30, m2 4194*c0909341SAndroid Build Coastguard Worker vpermq m9, m31, m3 4195*c0909341SAndroid Build Coastguard Worker call m(idct_16x8_internal_10bpc).write_16x4 4196*c0909341SAndroid Build Coastguard Worker vpermq m8, m30, m4 4197*c0909341SAndroid Build Coastguard Worker vpermq m9, m31, m5 4198*c0909341SAndroid Build Coastguard Worker call m(idct_16x8_internal_10bpc).write_16x4 4199*c0909341SAndroid Build Coastguard Worker vpermq m8, m30, m6 4200*c0909341SAndroid Build Coastguard Worker vpermq m9, m31, m7 4201*c0909341SAndroid Build Coastguard Worker call m(idct_16x8_internal_10bpc).write_16x4 4202*c0909341SAndroid Build Coastguard Worker 4203*c0909341SAndroid Build Coastguard Worker mova m1, [rsp+mmsize*0] 4204*c0909341SAndroid Build Coastguard Worker mova m2, [rsp+mmsize*1] 4205*c0909341SAndroid Build Coastguard Worker mova m3, [rsp+mmsize*2] 4206*c0909341SAndroid Build Coastguard Worker mova m4, [rsp+mmsize*3] 4207*c0909341SAndroid Build Coastguard Worker mova m5, [rsp+mmsize*4] 4208*c0909341SAndroid Build Coastguard Worker mova m6, [rsp+mmsize*5] 4209*c0909341SAndroid Build Coastguard Worker mova m7, [rsp+mmsize*6] 4210*c0909341SAndroid Build Coastguard Worker mova m8, [rsp+mmsize*7] 4211*c0909341SAndroid Build Coastguard Worker 4212*c0909341SAndroid Build Coastguard Worker paddsw m0, m1, m21 4213*c0909341SAndroid Build Coastguard Worker psubsw m21, m1, m21 4214*c0909341SAndroid Build Coastguard Worker paddsw m1, m2, m20 4215*c0909341SAndroid Build Coastguard Worker psubsw m20, m2, m20 4216*c0909341SAndroid Build Coastguard Worker paddsw m2, m3, m19 4217*c0909341SAndroid Build Coastguard Worker psubsw m19, m3, m19 4218*c0909341SAndroid Build Coastguard Worker paddsw m3, m4, m18 4219*c0909341SAndroid Build Coastguard Worker psubsw m18, m4, m18 4220*c0909341SAndroid Build Coastguard Worker paddsw m4, m5, m17 4221*c0909341SAndroid Build Coastguard Worker psubsw m17, m5, m17 4222*c0909341SAndroid Build Coastguard Worker paddsw m5, m6, m16 4223*c0909341SAndroid Build Coastguard Worker psubsw m16, m6, m16 4224*c0909341SAndroid Build Coastguard Worker paddsw m6, m7, m15 4225*c0909341SAndroid Build Coastguard Worker psubsw m15, m7, m15 4226*c0909341SAndroid Build Coastguard Worker paddsw m7, m8, m14 4227*c0909341SAndroid Build Coastguard Worker psubsw m14, m8, m14 4228*c0909341SAndroid Build Coastguard Worker 4229*c0909341SAndroid Build Coastguard Worker vpermq m8, m30, m0 4230*c0909341SAndroid Build Coastguard Worker vpermq m9, m31, m1 4231*c0909341SAndroid Build Coastguard Worker call m(idct_16x8_internal_10bpc).write_16x4 4232*c0909341SAndroid Build Coastguard Worker vpermq m8, m30, m2 4233*c0909341SAndroid Build Coastguard Worker vpermq m9, m31, m3 4234*c0909341SAndroid Build Coastguard Worker call m(idct_16x8_internal_10bpc).write_16x4 4235*c0909341SAndroid Build Coastguard Worker vpermq m8, m30, m4 4236*c0909341SAndroid Build Coastguard Worker vpermq m9, m31, m5 4237*c0909341SAndroid Build Coastguard Worker call m(idct_16x8_internal_10bpc).write_16x4 4238*c0909341SAndroid Build Coastguard Worker vpermq m8, m30, m6 4239*c0909341SAndroid Build Coastguard Worker vpermq m9, m31, m7 4240*c0909341SAndroid Build Coastguard Worker call m(idct_16x8_internal_10bpc).write_16x4 4241*c0909341SAndroid Build Coastguard Worker 4242*c0909341SAndroid Build Coastguard Worker vpermq m8, m30, m14 4243*c0909341SAndroid Build Coastguard Worker vpermq m9, m31, m15 4244*c0909341SAndroid Build Coastguard Worker call m(idct_16x8_internal_10bpc).write_16x4 4245*c0909341SAndroid Build Coastguard Worker vpermq m8, m30, m16 4246*c0909341SAndroid Build Coastguard Worker vpermq m9, m31, m17 4247*c0909341SAndroid Build Coastguard Worker call m(idct_16x8_internal_10bpc).write_16x4 4248*c0909341SAndroid Build Coastguard Worker vpermq m8, m30, m18 4249*c0909341SAndroid Build Coastguard Worker vpermq m9, m31, m19 4250*c0909341SAndroid Build Coastguard Worker call m(idct_16x8_internal_10bpc).write_16x4 4251*c0909341SAndroid Build Coastguard Worker vpermq m8, m30, m20 4252*c0909341SAndroid Build Coastguard Worker vpermq m9, m31, m21 4253*c0909341SAndroid Build Coastguard Worker call m(idct_16x8_internal_10bpc).write_16x4 4254*c0909341SAndroid Build Coastguard Worker 4255*c0909341SAndroid Build Coastguard Worker vpermq m8, m30, m22 4256*c0909341SAndroid Build Coastguard Worker vpermq m9, m31, m23 4257*c0909341SAndroid Build Coastguard Worker call m(idct_16x8_internal_10bpc).write_16x4 4258*c0909341SAndroid Build Coastguard Worker vpermq m8, m30, m24 4259*c0909341SAndroid Build Coastguard Worker vpermq m9, m31, m25 4260*c0909341SAndroid Build Coastguard Worker call m(idct_16x8_internal_10bpc).write_16x4 4261*c0909341SAndroid Build Coastguard Worker vpermq m8, m30, m26 4262*c0909341SAndroid Build Coastguard Worker vpermq m9, m31, m27 4263*c0909341SAndroid Build Coastguard Worker call m(idct_16x8_internal_10bpc).write_16x4 4264*c0909341SAndroid Build Coastguard Worker vpermq m8, m30, m28 4265*c0909341SAndroid Build Coastguard Worker vpermq m9, m31, m29 4266*c0909341SAndroid Build Coastguard Worker call m(idct_16x8_internal_10bpc).write_16x4 4267*c0909341SAndroid Build Coastguard Worker RET 4268*c0909341SAndroid Build Coastguard Worker.pass1: 4269*c0909341SAndroid Build Coastguard Worker mova m0, [cq+128* 0] 4270*c0909341SAndroid Build Coastguard Worker mova m1, [cq+128* 2] 4271*c0909341SAndroid Build Coastguard Worker mova m2, [cq+128* 4] 4272*c0909341SAndroid Build Coastguard Worker mova m3, [cq+128* 6] 4273*c0909341SAndroid Build Coastguard Worker mova m4, [cq+128* 8] 4274*c0909341SAndroid Build Coastguard Worker mova m5, [cq+128*10] 4275*c0909341SAndroid Build Coastguard Worker mova m6, [cq+128*12] 4276*c0909341SAndroid Build Coastguard Worker mova m7, [cq+128*14] 4277*c0909341SAndroid Build Coastguard Worker call m(idct_8x16_internal_10bpc).main 4278*c0909341SAndroid Build Coastguard Worker mova m16, [cq+128* 1] 4279*c0909341SAndroid Build Coastguard Worker mova m17, [cq+128* 3] 4280*c0909341SAndroid Build Coastguard Worker mova m18, [cq+128* 5] 4281*c0909341SAndroid Build Coastguard Worker mova m19, [cq+128* 7] 4282*c0909341SAndroid Build Coastguard Worker mova m20, [cq+128* 9] 4283*c0909341SAndroid Build Coastguard Worker mova m21, [cq+128*11] 4284*c0909341SAndroid Build Coastguard Worker mova m22, [cq+128*13] 4285*c0909341SAndroid Build Coastguard Worker mova m23, [cq+128*15] 4286*c0909341SAndroid Build Coastguard Worker call m(idct_16x16_internal_10bpc).main 4287*c0909341SAndroid Build Coastguard Worker call m(idct_16x16_internal_10bpc).main_end 4288*c0909341SAndroid Build Coastguard Worker jmp m(idct_16x16_internal_10bpc).main_end3 4289*c0909341SAndroid Build Coastguard Worker.dconly: 4290*c0909341SAndroid Build Coastguard Worker imul r6d, [cq], 181 4291*c0909341SAndroid Build Coastguard Worker mov [cq], eobd 4292*c0909341SAndroid Build Coastguard Worker or r3d, 64 4293*c0909341SAndroid Build Coastguard Worker add r6d, 640 4294*c0909341SAndroid Build Coastguard Worker sar r6d, 10 4295*c0909341SAndroid Build Coastguard Worker jmp m(inv_txfm_add_dct_dct_16x8_10bpc).dconly2 4296*c0909341SAndroid Build Coastguard Worker 4297*c0909341SAndroid Build Coastguard Workercglobal inv_txfm_add_dct_dct_32x64_10bpc, 4, 7, 0, dst, stride, c, eob 4298*c0909341SAndroid Build Coastguard Worker lea r5, [o_base] 4299*c0909341SAndroid Build Coastguard Worker test eobd, eobd 4300*c0909341SAndroid Build Coastguard Worker jz .dconly 4301*c0909341SAndroid Build Coastguard Worker PROLOGUE 4, 7, 32, -64*40, dst, stride, c, eob 4302*c0909341SAndroid Build Coastguard Worker%undef cmp 4303*c0909341SAndroid Build Coastguard Worker vpbroadcastd m12, [o(pd_2896)] 4304*c0909341SAndroid Build Coastguard Worker vpbroadcastd m13, [o(pd_2048)] 4305*c0909341SAndroid Build Coastguard Worker vpbroadcastd m14, [o(clip_18b_min)] 4306*c0909341SAndroid Build Coastguard Worker vpbroadcastd m15, [o(clip_18b_max)] 4307*c0909341SAndroid Build Coastguard Worker cmp eobd, 136 4308*c0909341SAndroid Build Coastguard Worker jl .fast 4309*c0909341SAndroid Build Coastguard Worker add cq, 64 4310*c0909341SAndroid Build Coastguard Worker cmp eobd, 543 4311*c0909341SAndroid Build Coastguard Worker jge .full 4312*c0909341SAndroid Build Coastguard Worker call .pass1_fast ; bottomright 16x16 zero 4313*c0909341SAndroid Build Coastguard Worker jmp .lefthalf 4314*c0909341SAndroid Build Coastguard Worker.full: 4315*c0909341SAndroid Build Coastguard Worker call .pass1 4316*c0909341SAndroid Build Coastguard Worker mov r3d, 16*28 4317*c0909341SAndroid Build Coastguard Worker.lefthalf: 4318*c0909341SAndroid Build Coastguard Worker mova [cq+128* 0], m27 4319*c0909341SAndroid Build Coastguard Worker mova [cq+128* 1], m14 4320*c0909341SAndroid Build Coastguard Worker mova [cq+128* 2], m28 4321*c0909341SAndroid Build Coastguard Worker mova [cq+128* 3], m15 4322*c0909341SAndroid Build Coastguard Worker mova [cq+128* 4], m22 4323*c0909341SAndroid Build Coastguard Worker mova [cq+128* 5], m23 4324*c0909341SAndroid Build Coastguard Worker mova [cq+128* 6], m24 4325*c0909341SAndroid Build Coastguard Worker mova [cq+128* 7], m25 4326*c0909341SAndroid Build Coastguard Worker mova [cq+128* 8], m0 4327*c0909341SAndroid Build Coastguard Worker mova [cq+128* 9], m26 4328*c0909341SAndroid Build Coastguard Worker mova [cq+128*10], m20 4329*c0909341SAndroid Build Coastguard Worker mova [cq+128*11], m21 4330*c0909341SAndroid Build Coastguard Worker mova [cq+128*12], m18 4331*c0909341SAndroid Build Coastguard Worker mova [cq+128*13], m16 4332*c0909341SAndroid Build Coastguard Worker mova [cq+128*14], m17 4333*c0909341SAndroid Build Coastguard Worker mova [cq+128*15], m3 4334*c0909341SAndroid Build Coastguard Worker sub cq, 64 4335*c0909341SAndroid Build Coastguard Worker vpbroadcastd m12, [o(pd_2896)] 4336*c0909341SAndroid Build Coastguard Worker vpbroadcastd m13, [o(pd_2048)] 4337*c0909341SAndroid Build Coastguard Worker vpbroadcastd m14, [o(clip_18b_min)] 4338*c0909341SAndroid Build Coastguard Worker vpbroadcastd m15, [o(clip_18b_max)] 4339*c0909341SAndroid Build Coastguard Worker call .pass1 4340*c0909341SAndroid Build Coastguard Worker call .pass2_start 4341*c0909341SAndroid Build Coastguard Worker 4342*c0909341SAndroid Build Coastguard Worker pxor m31, m31 4343*c0909341SAndroid Build Coastguard Worker.right_zero_loop: 4344*c0909341SAndroid Build Coastguard Worker REPX {mova [cq+r3*8+64+128*x], m31}, 0, 1, 2, 3 4345*c0909341SAndroid Build Coastguard Worker sub r3d, 16*4 4346*c0909341SAndroid Build Coastguard Worker jge .right_zero_loop 4347*c0909341SAndroid Build Coastguard Worker mov r3d, 16*28 4348*c0909341SAndroid Build Coastguard Worker jmp .left_zero_loop 4349*c0909341SAndroid Build Coastguard Worker.pass2_start: 4350*c0909341SAndroid Build Coastguard Worker vpbroadcastd m10, [o(pd_2048)] 4351*c0909341SAndroid Build Coastguard Worker lea r5, [o_base_8bpc] 4352*c0909341SAndroid Build Coastguard Worker 4353*c0909341SAndroid Build Coastguard Worker lea r4, [rsp+gprsize] 4354*c0909341SAndroid Build Coastguard Worker mova m1, [cq+128*15+64] 4355*c0909341SAndroid Build Coastguard Worker mova m2, [cq+128* 8+64] 4356*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_32x64_8bpc).main_part1 4357*c0909341SAndroid Build Coastguard Worker mova m0, m21 4358*c0909341SAndroid Build Coastguard Worker mova m1, [cq+128*12+64] 4359*c0909341SAndroid Build Coastguard Worker mova m2, [cq+128*11+64] 4360*c0909341SAndroid Build Coastguard Worker mova m3, m18 4361*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_32x64_8bpc).main_part1 4362*c0909341SAndroid Build Coastguard Worker mova m0, m20 4363*c0909341SAndroid Build Coastguard Worker mova m1, [cq+128*13+64] 4364*c0909341SAndroid Build Coastguard Worker mova m2, [cq+128*10+64] 4365*c0909341SAndroid Build Coastguard Worker mova m3, m16 4366*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_32x64_8bpc).main_part1 4367*c0909341SAndroid Build Coastguard Worker mova m0, m26 4368*c0909341SAndroid Build Coastguard Worker mova m1, [cq+128*14+64] 4369*c0909341SAndroid Build Coastguard Worker mova m2, [cq+128* 9+64] 4370*c0909341SAndroid Build Coastguard Worker mova m3, m17 4371*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_32x64_8bpc).main_part1 4372*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_32x64_8bpc).main_part2 4373*c0909341SAndroid Build Coastguard Worker 4374*c0909341SAndroid Build Coastguard Worker mova m0, m27 4375*c0909341SAndroid Build Coastguard Worker mova m1, m28 4376*c0909341SAndroid Build Coastguard Worker mova m2, [cq+128* 0+64] 4377*c0909341SAndroid Build Coastguard Worker mova m3, [cq+128* 2+64] 4378*c0909341SAndroid Build Coastguard Worker mova m16, [cq+128* 1+64] 4379*c0909341SAndroid Build Coastguard Worker mova m17, [cq+128* 3+64] 4380*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_32x16_8bpc).main_oddhalf_fast 4381*c0909341SAndroid Build Coastguard Worker mova m26, [cq+128* 4+64] 4382*c0909341SAndroid Build Coastguard Worker mova m27, [cq+128* 5+64] 4383*c0909341SAndroid Build Coastguard Worker mova m28, [cq+128* 6+64] 4384*c0909341SAndroid Build Coastguard Worker mova m29, [cq+128* 7+64] 4385*c0909341SAndroid Build Coastguard Worker mova [rsp+64*32+gprsize], m14 4386*c0909341SAndroid Build Coastguard Worker mova [rsp+64*33+gprsize], m15 4387*c0909341SAndroid Build Coastguard Worker mova [rsp+64*34+gprsize], m16 4388*c0909341SAndroid Build Coastguard Worker mova [rsp+64*35+gprsize], m17 4389*c0909341SAndroid Build Coastguard Worker mova [rsp+64*36+gprsize], m18 4390*c0909341SAndroid Build Coastguard Worker mova [rsp+64*37+gprsize], m19 4391*c0909341SAndroid Build Coastguard Worker mova [rsp+64*38+gprsize], m20 4392*c0909341SAndroid Build Coastguard Worker mova [rsp+64*39+gprsize], m21 4393*c0909341SAndroid Build Coastguard Worker jmp m(inv_txfm_add_dct_dct_32x32_8bpc).main_oddhalf_fast 4394*c0909341SAndroid Build Coastguard Worker.fast: ; topleft 16x16 nonzero 4395*c0909341SAndroid Build Coastguard Worker cmp eobd, 36 4396*c0909341SAndroid Build Coastguard Worker jl .fast2 4397*c0909341SAndroid Build Coastguard Worker call .pass1_fast 4398*c0909341SAndroid Build Coastguard Worker vpbroadcastd m10, [o(pd_2048)] 4399*c0909341SAndroid Build Coastguard Worker call .pass2_fast_start 4400*c0909341SAndroid Build Coastguard Worker jmp .end 4401*c0909341SAndroid Build Coastguard Worker.fast2: ; topleft 8x8 nonzero 4402*c0909341SAndroid Build Coastguard Worker movshdup m7, [o(permB)] 4403*c0909341SAndroid Build Coastguard Worker mova ym0, [cq+128*0] 4404*c0909341SAndroid Build Coastguard Worker mova ym1, [cq+128*4] 4405*c0909341SAndroid Build Coastguard Worker mova ym4, [cq+128*2] 4406*c0909341SAndroid Build Coastguard Worker mova ym5, [cq+128*6] 4407*c0909341SAndroid Build Coastguard Worker mova ym16, [cq+128*1] 4408*c0909341SAndroid Build Coastguard Worker mova ym2, [cq+128*5] 4409*c0909341SAndroid Build Coastguard Worker mova ym3, [cq+128*3] 4410*c0909341SAndroid Build Coastguard Worker mova ym17, [cq+128*7] 4411*c0909341SAndroid Build Coastguard Worker mov r3d, 16*4 4412*c0909341SAndroid Build Coastguard Worker vpermq m0, m7, m0 ; 0 0 4413*c0909341SAndroid Build Coastguard Worker vpermq m1, m7, m1 ; 4 4 4414*c0909341SAndroid Build Coastguard Worker vpermt2q m4, m7, m5 ; 2 6 4415*c0909341SAndroid Build Coastguard Worker vpermt2q m16, m7, m2 ; 1 5 4416*c0909341SAndroid Build Coastguard Worker vpermt2q m17, m7, m3 ; 7 3 4417*c0909341SAndroid Build Coastguard Worker REPX {pmulld x, m12}, m0, m1, m4, m16, m17 4418*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m13}, m0, m1, m4, m16, m17 4419*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 12 }, m0, m1, m4, m16, m17 4420*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_32x8_10bpc).main_fast2 4421*c0909341SAndroid Build Coastguard Worker vpbroadcastd m11, [o(pd_1)] 4422*c0909341SAndroid Build Coastguard Worker call m(idct_16x16_internal_10bpc).main_end2 4423*c0909341SAndroid Build Coastguard Worker 4424*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_32x8_10bpc).transpose_8x32 4425*c0909341SAndroid Build Coastguard Worker punpcklqdq m27, m0, m2 ; 0 4426*c0909341SAndroid Build Coastguard Worker punpckhqdq m0, m2 ; 1 4427*c0909341SAndroid Build Coastguard Worker punpcklqdq m22, m3, m4 ; 2 4428*c0909341SAndroid Build Coastguard Worker punpckhqdq m26, m3, m4 ; 3 4429*c0909341SAndroid Build Coastguard Worker punpcklqdq m14, m5, m7 ; 4 4430*c0909341SAndroid Build Coastguard Worker punpckhqdq m20, m5, m7 ; 5 4431*c0909341SAndroid Build Coastguard Worker punpcklqdq m23, m6, m8 ; 6 4432*c0909341SAndroid Build Coastguard Worker punpckhqdq m21, m6, m8 ; 7 4433*c0909341SAndroid Build Coastguard Worker 4434*c0909341SAndroid Build Coastguard Worker mova m10, m13 4435*c0909341SAndroid Build Coastguard Worker call .pass2_fast2_start 4436*c0909341SAndroid Build Coastguard Worker.end: 4437*c0909341SAndroid Build Coastguard Worker 4438*c0909341SAndroid Build Coastguard Worker pxor m31, m31 4439*c0909341SAndroid Build Coastguard Worker 4440*c0909341SAndroid Build Coastguard Worker.left_zero_loop: 4441*c0909341SAndroid Build Coastguard Worker REPX {mova [cq+r3*8+128*x], m31}, 0, 1, 2, 3 4442*c0909341SAndroid Build Coastguard Worker sub r3d, 16*4 4443*c0909341SAndroid Build Coastguard Worker jge .left_zero_loop 4444*c0909341SAndroid Build Coastguard Worker 4445*c0909341SAndroid Build Coastguard Worker call .pass2_end 4446*c0909341SAndroid Build Coastguard Worker RET 4447*c0909341SAndroid Build Coastguard Worker.pass2_end: 4448*c0909341SAndroid Build Coastguard Worker DEFINE_ARGS dst, stride, _, dst2, stride32, stklo, stkhi 4449*c0909341SAndroid Build Coastguard Worker vpbroadcastd m30, [pixel_10bpc_max] 4450*c0909341SAndroid Build Coastguard Worker vpbroadcastd m13, [pw_2048] 4451*c0909341SAndroid Build Coastguard Worker 4452*c0909341SAndroid Build Coastguard Worker mov stride32q, strideq 4453*c0909341SAndroid Build Coastguard Worker shl stride32q, 5 4454*c0909341SAndroid Build Coastguard Worker lea stkhiq, [rsp+31*mmsize+gprsize] 4455*c0909341SAndroid Build Coastguard Worker lea dst2q, [dstq+stride32q] 4456*c0909341SAndroid Build Coastguard Worker lea stkloq, [rsp+gprsize] 4457*c0909341SAndroid Build Coastguard Worker sub dst2q, strideq ; dst31 4458*c0909341SAndroid Build Coastguard Worker 4459*c0909341SAndroid Build Coastguard Worker paddsw m8, m0, m29 ; t0[idct32] 4460*c0909341SAndroid Build Coastguard Worker psubsw m9, m0, m29 ; t31[idct32] 4461*c0909341SAndroid Build Coastguard Worker call .end_sumsub_write 4462*c0909341SAndroid Build Coastguard Worker paddsw m8, m1, m28 ; t1[idct32] 4463*c0909341SAndroid Build Coastguard Worker psubsw m9, m1, m28 ; t30[idct32] 4464*c0909341SAndroid Build Coastguard Worker call .end_sumsub_write 4465*c0909341SAndroid Build Coastguard Worker paddsw m8, m2, m27 ; t2[idct32] 4466*c0909341SAndroid Build Coastguard Worker psubsw m9, m2, m27 ; t29[idct32] 4467*c0909341SAndroid Build Coastguard Worker call .end_sumsub_write 4468*c0909341SAndroid Build Coastguard Worker paddsw m8, m3, m26 ; t3[idct32] 4469*c0909341SAndroid Build Coastguard Worker psubsw m9, m3, m26 ; t28[idct32] 4470*c0909341SAndroid Build Coastguard Worker call .end_sumsub_write 4471*c0909341SAndroid Build Coastguard Worker paddsw m8, m4, m25 ; t4[idct32] 4472*c0909341SAndroid Build Coastguard Worker psubsw m9, m4, m25 ; t27[idct32] 4473*c0909341SAndroid Build Coastguard Worker call .end_sumsub_write 4474*c0909341SAndroid Build Coastguard Worker paddsw m8, m5, m24 ; t5[idct32] 4475*c0909341SAndroid Build Coastguard Worker psubsw m9, m5, m24 ; t26[idct32] 4476*c0909341SAndroid Build Coastguard Worker call .end_sumsub_write 4477*c0909341SAndroid Build Coastguard Worker paddsw m8, m6, m23 ; t6[idct32] 4478*c0909341SAndroid Build Coastguard Worker psubsw m9, m6, m23 ; t25[idct32] 4479*c0909341SAndroid Build Coastguard Worker call .end_sumsub_write 4480*c0909341SAndroid Build Coastguard Worker paddsw m8, m7, m22 ; t7[idct32] 4481*c0909341SAndroid Build Coastguard Worker psubsw m9, m7, m22 ; t24[idct32] 4482*c0909341SAndroid Build Coastguard Worker call .end_sumsub_write 4483*c0909341SAndroid Build Coastguard Worker mova m0, [rsp+64*32+gprsize] 4484*c0909341SAndroid Build Coastguard Worker mova m1, [rsp+64*33+gprsize] 4485*c0909341SAndroid Build Coastguard Worker mova m2, [rsp+64*34+gprsize] 4486*c0909341SAndroid Build Coastguard Worker mova m3, [rsp+64*35+gprsize] 4487*c0909341SAndroid Build Coastguard Worker mova m4, [rsp+64*36+gprsize] 4488*c0909341SAndroid Build Coastguard Worker mova m5, [rsp+64*37+gprsize] 4489*c0909341SAndroid Build Coastguard Worker mova m6, [rsp+64*38+gprsize] 4490*c0909341SAndroid Build Coastguard Worker mova m7, [rsp+64*39+gprsize] 4491*c0909341SAndroid Build Coastguard Worker paddsw m8, m0, m21 ; t8[idct32] 4492*c0909341SAndroid Build Coastguard Worker psubsw m9, m0, m21 ; t23[idct32] 4493*c0909341SAndroid Build Coastguard Worker call .end_sumsub_write 4494*c0909341SAndroid Build Coastguard Worker paddsw m8, m1, m20 ; t9[idct32] 4495*c0909341SAndroid Build Coastguard Worker psubsw m9, m1, m20 ; t22[idct32] 4496*c0909341SAndroid Build Coastguard Worker call .end_sumsub_write 4497*c0909341SAndroid Build Coastguard Worker paddsw m8, m2, m19 ; t10[idct32] 4498*c0909341SAndroid Build Coastguard Worker psubsw m9, m2, m19 ; t21[idct32] 4499*c0909341SAndroid Build Coastguard Worker call .end_sumsub_write 4500*c0909341SAndroid Build Coastguard Worker paddsw m8, m3, m18 ; t11[idct32] 4501*c0909341SAndroid Build Coastguard Worker psubsw m9, m3, m18 ; t20[idct32] 4502*c0909341SAndroid Build Coastguard Worker call .end_sumsub_write 4503*c0909341SAndroid Build Coastguard Worker paddsw m8, m4, m17 ; t12[idct32] 4504*c0909341SAndroid Build Coastguard Worker psubsw m9, m4, m17 ; t19[idct32] 4505*c0909341SAndroid Build Coastguard Worker call .end_sumsub_write 4506*c0909341SAndroid Build Coastguard Worker paddsw m8, m5, m16 ; t13[idct32] 4507*c0909341SAndroid Build Coastguard Worker psubsw m9, m5, m16 ; t18[idct32] 4508*c0909341SAndroid Build Coastguard Worker call .end_sumsub_write 4509*c0909341SAndroid Build Coastguard Worker paddsw m8, m6, m15 ; t14[idct32] 4510*c0909341SAndroid Build Coastguard Worker psubsw m9, m6, m15 ; t17[idct32] 4511*c0909341SAndroid Build Coastguard Worker call .end_sumsub_write 4512*c0909341SAndroid Build Coastguard Worker paddsw m8, m7, m14 ; t15[idct32] 4513*c0909341SAndroid Build Coastguard Worker psubsw m9, m7, m14 ; t16[idct32] 4514*c0909341SAndroid Build Coastguard Worker ; fall-through 4515*c0909341SAndroid Build Coastguard Worker.end_sumsub_write: 4516*c0909341SAndroid Build Coastguard Worker mova m10, [stkhiq] ; t63-n 4517*c0909341SAndroid Build Coastguard Worker mova m12, [stkloq] ; t32+n 4518*c0909341SAndroid Build Coastguard Worker psubsw m11, m8, m10 ; out63-n 4519*c0909341SAndroid Build Coastguard Worker paddsw m8, m10 ; out0 +n 4520*c0909341SAndroid Build Coastguard Worker psubsw m10, m9, m12 ; out32+n 4521*c0909341SAndroid Build Coastguard Worker paddsw m9, m12 ; out32-n 4522*c0909341SAndroid Build Coastguard Worker REPX {pmulhrsw x, m13}, m11, m8, m10, m9 4523*c0909341SAndroid Build Coastguard Worker paddw m8, [dstq] 4524*c0909341SAndroid Build Coastguard Worker paddw m9, [dst2q] 4525*c0909341SAndroid Build Coastguard Worker paddw m10, [dstq+stride32q] 4526*c0909341SAndroid Build Coastguard Worker paddw m11, [dst2q+stride32q] 4527*c0909341SAndroid Build Coastguard Worker REPX {pminsw x, m30}, m11, m8, m10, m9 4528*c0909341SAndroid Build Coastguard Worker REPX {pmaxsw x, m31}, m11, m8, m10, m9 4529*c0909341SAndroid Build Coastguard Worker mova [dstq ], m8 4530*c0909341SAndroid Build Coastguard Worker mova [dst2q ], m9 4531*c0909341SAndroid Build Coastguard Worker mova [dstq +stride32q], m10 4532*c0909341SAndroid Build Coastguard Worker mova [dst2q+stride32q], m11 4533*c0909341SAndroid Build Coastguard Worker add stkloq, mmsize 4534*c0909341SAndroid Build Coastguard Worker sub stkhiq, mmsize 4535*c0909341SAndroid Build Coastguard Worker add dstq, strideq 4536*c0909341SAndroid Build Coastguard Worker sub dst2q, strideq 4537*c0909341SAndroid Build Coastguard Worker ret 4538*c0909341SAndroid Build Coastguard Worker.pass2_fast_start: 4539*c0909341SAndroid Build Coastguard Worker lea r5, [o_base_8bpc] 4540*c0909341SAndroid Build Coastguard Worker lea r4, [rsp+gprsize] 4541*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_32x64_8bpc).main_part1_fast 4542*c0909341SAndroid Build Coastguard Worker mova m0, m21 4543*c0909341SAndroid Build Coastguard Worker mova m3, m18 4544*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_32x64_8bpc).main_part1_fast 4545*c0909341SAndroid Build Coastguard Worker mova m0, m20 4546*c0909341SAndroid Build Coastguard Worker mova m3, m16 4547*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_32x64_8bpc).main_part1_fast 4548*c0909341SAndroid Build Coastguard Worker mova m0, m26 4549*c0909341SAndroid Build Coastguard Worker mova m3, m17 4550*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_32x64_8bpc).main_part1_fast 4551*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_32x64_8bpc).main_part2 4552*c0909341SAndroid Build Coastguard Worker 4553*c0909341SAndroid Build Coastguard Worker mova m0, m27 4554*c0909341SAndroid Build Coastguard Worker mova m1, m28 4555*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_32x16_8bpc).main_oddhalf_fast2 4556*c0909341SAndroid Build Coastguard Worker mova [rsp+64*32+gprsize], m14 4557*c0909341SAndroid Build Coastguard Worker mova [rsp+64*33+gprsize], m15 4558*c0909341SAndroid Build Coastguard Worker mova [rsp+64*34+gprsize], m16 4559*c0909341SAndroid Build Coastguard Worker mova [rsp+64*35+gprsize], m17 4560*c0909341SAndroid Build Coastguard Worker mova [rsp+64*36+gprsize], m18 4561*c0909341SAndroid Build Coastguard Worker mova [rsp+64*37+gprsize], m19 4562*c0909341SAndroid Build Coastguard Worker mova [rsp+64*38+gprsize], m20 4563*c0909341SAndroid Build Coastguard Worker mova [rsp+64*39+gprsize], m21 4564*c0909341SAndroid Build Coastguard Worker jmp m(inv_txfm_add_dct_dct_32x32_8bpc).main_oddhalf_fast2 4565*c0909341SAndroid Build Coastguard Worker.pass2_fast2_start: 4566*c0909341SAndroid Build Coastguard Worker lea r5, [o_base_8bpc] 4567*c0909341SAndroid Build Coastguard Worker lea r4, [rsp+gprsize] 4568*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_32x64_8bpc).main_part1_fast2 4569*c0909341SAndroid Build Coastguard Worker mova m0, m21 4570*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_32x64_8bpc).main_part1_fast2 4571*c0909341SAndroid Build Coastguard Worker mova m0, m20 4572*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_32x64_8bpc).main_part1_fast2 4573*c0909341SAndroid Build Coastguard Worker mova m0, m26 4574*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_32x64_8bpc).main_part1_fast2 4575*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_32x64_8bpc).main_part2 4576*c0909341SAndroid Build Coastguard Worker 4577*c0909341SAndroid Build Coastguard Worker mova m0, m27 4578*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_32x16_8bpc).main_oddhalf_fast3 4579*c0909341SAndroid Build Coastguard Worker mova [rsp+64*32+gprsize], m14 4580*c0909341SAndroid Build Coastguard Worker mova [rsp+64*33+gprsize], m15 4581*c0909341SAndroid Build Coastguard Worker mova [rsp+64*34+gprsize], m16 4582*c0909341SAndroid Build Coastguard Worker mova [rsp+64*35+gprsize], m17 4583*c0909341SAndroid Build Coastguard Worker mova [rsp+64*36+gprsize], m18 4584*c0909341SAndroid Build Coastguard Worker mova [rsp+64*37+gprsize], m19 4585*c0909341SAndroid Build Coastguard Worker mova [rsp+64*38+gprsize], m20 4586*c0909341SAndroid Build Coastguard Worker mova [rsp+64*39+gprsize], m21 4587*c0909341SAndroid Build Coastguard Worker jmp m(inv_txfm_add_dct_dct_32x32_8bpc).main_oddhalf_fast3 4588*c0909341SAndroid Build Coastguard Worker.dconly: 4589*c0909341SAndroid Build Coastguard Worker DEFINE_ARGS dst, stride, c, eob 4590*c0909341SAndroid Build Coastguard Worker imul r6d, [cq], 181 4591*c0909341SAndroid Build Coastguard Worker mov [cq], eobd 4592*c0909341SAndroid Build Coastguard Worker or r3d, 64 4593*c0909341SAndroid Build Coastguard Worker jmp m(inv_txfm_add_dct_dct_32x16_10bpc).dconly3 4594*c0909341SAndroid Build Coastguard Worker.pass1_fast: 4595*c0909341SAndroid Build Coastguard Worker pmulld m0, m12, [cq+128* 0] 4596*c0909341SAndroid Build Coastguard Worker pmulld m1, m12, [cq+128* 4] 4597*c0909341SAndroid Build Coastguard Worker pmulld m2, m12, [cq+128* 8] 4598*c0909341SAndroid Build Coastguard Worker pmulld m3, m12, [cq+128*12] 4599*c0909341SAndroid Build Coastguard Worker mov r3d, 16*12 4600*c0909341SAndroid Build Coastguard Worker call m(idct_8x16_internal_10bpc).main_fast_rect2 4601*c0909341SAndroid Build Coastguard Worker pmulld m16, m12, [cq+128* 2] 4602*c0909341SAndroid Build Coastguard Worker pmulld m17, m12, [cq+128* 6] 4603*c0909341SAndroid Build Coastguard Worker pmulld m18, m12, [cq+128*10] 4604*c0909341SAndroid Build Coastguard Worker pmulld m19, m12, [cq+128*14] 4605*c0909341SAndroid Build Coastguard Worker call m(idct_16x16_internal_10bpc).main_fast_rect2 4606*c0909341SAndroid Build Coastguard Worker call .pass1_load_spill 4607*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_32x16_10bpc).main_fast_rect2 4608*c0909341SAndroid Build Coastguard Worker jmp .pass1_end 4609*c0909341SAndroid Build Coastguard Worker.pass1: 4610*c0909341SAndroid Build Coastguard Worker pmulld m0, m12, [cq+128* 0] 4611*c0909341SAndroid Build Coastguard Worker pmulld m1, m12, [cq+128* 4] 4612*c0909341SAndroid Build Coastguard Worker pmulld m2, m12, [cq+128* 8] 4613*c0909341SAndroid Build Coastguard Worker pmulld m3, m12, [cq+128*12] 4614*c0909341SAndroid Build Coastguard Worker pmulld m4, m12, [cq+128*16] 4615*c0909341SAndroid Build Coastguard Worker pmulld m5, m12, [cq+128*20] 4616*c0909341SAndroid Build Coastguard Worker pmulld m6, m12, [cq+128*24] 4617*c0909341SAndroid Build Coastguard Worker pmulld m7, m12, [cq+128*28] 4618*c0909341SAndroid Build Coastguard Worker call m(idct_8x16_internal_10bpc).main_rect2 4619*c0909341SAndroid Build Coastguard Worker pmulld m16, m12, [cq+128* 2] 4620*c0909341SAndroid Build Coastguard Worker pmulld m17, m12, [cq+128* 6] 4621*c0909341SAndroid Build Coastguard Worker pmulld m18, m12, [cq+128*10] 4622*c0909341SAndroid Build Coastguard Worker pmulld m19, m12, [cq+128*14] 4623*c0909341SAndroid Build Coastguard Worker pmulld m20, m12, [cq+128*18] 4624*c0909341SAndroid Build Coastguard Worker pmulld m21, m12, [cq+128*22] 4625*c0909341SAndroid Build Coastguard Worker pmulld m22, m12, [cq+128*26] 4626*c0909341SAndroid Build Coastguard Worker pmulld m23, m12, [cq+128*30] 4627*c0909341SAndroid Build Coastguard Worker call m(idct_16x16_internal_10bpc).main_rect2 4628*c0909341SAndroid Build Coastguard Worker call .pass1_load_spill 4629*c0909341SAndroid Build Coastguard Worker pmulld m16, m12, [cq+128*17] 4630*c0909341SAndroid Build Coastguard Worker pmulld m17, m12, [cq+128*19] 4631*c0909341SAndroid Build Coastguard Worker pmulld m18, m12, [cq+128*21] 4632*c0909341SAndroid Build Coastguard Worker pmulld m19, m12, [cq+128*23] 4633*c0909341SAndroid Build Coastguard Worker pmulld m20, m12, [cq+128*25] 4634*c0909341SAndroid Build Coastguard Worker pmulld m21, m12, [cq+128*27] 4635*c0909341SAndroid Build Coastguard Worker pmulld m22, m12, [cq+128*29] 4636*c0909341SAndroid Build Coastguard Worker pmulld m23, m12, [cq+128*31] 4637*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_32x16_10bpc).main_rect2 4638*c0909341SAndroid Build Coastguard Worker.pass1_end: 4639*c0909341SAndroid Build Coastguard Worker vpbroadcastd m11, [o(pd_1)] 4640*c0909341SAndroid Build Coastguard Worker lea r4, [cq+128*8] 4641*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_32x16_10bpc).idct32_pass1_end 4642*c0909341SAndroid Build Coastguard Worker punpcklqdq m27, m0, m20 ; 0 4643*c0909341SAndroid Build Coastguard Worker punpckhqdq m0, m20 ; 1 4644*c0909341SAndroid Build Coastguard Worker punpcklqdq m24, m5, m16 ; 10 4645*c0909341SAndroid Build Coastguard Worker punpckhqdq m16, m5, m16 ; 11 4646*c0909341SAndroid Build Coastguard Worker punpcklqdq m23, m3, m21 ; 6 4647*c0909341SAndroid Build Coastguard Worker punpckhqdq m21, m3, m21 ; 7 4648*c0909341SAndroid Build Coastguard Worker punpcklqdq m25, m7, m8 ; 14 4649*c0909341SAndroid Build Coastguard Worker punpckhqdq m3, m7, m8 ; 15 4650*c0909341SAndroid Build Coastguard Worker punpcklqdq m22, m15, m4 ; 2 4651*c0909341SAndroid Build Coastguard Worker punpckhqdq m26, m15, m4 ; 3 4652*c0909341SAndroid Build Coastguard Worker punpcklqdq m15, m6, m17 ; 12 4653*c0909341SAndroid Build Coastguard Worker punpckhqdq m17, m6, m17 ; 13 4654*c0909341SAndroid Build Coastguard Worker punpcklqdq m28, m14, m18 ; 8 4655*c0909341SAndroid Build Coastguard Worker punpckhqdq m18, m14, m18 ; 9 4656*c0909341SAndroid Build Coastguard Worker punpcklqdq m14, m2, m1 ; 4 4657*c0909341SAndroid Build Coastguard Worker punpckhqdq m20, m2, m1 ; 5 4658*c0909341SAndroid Build Coastguard Worker ret 4659*c0909341SAndroid Build Coastguard Worker.pass1_load_spill: 4660*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_32x16_10bpc).idct16_sumsub 4661*c0909341SAndroid Build Coastguard Worker mova [cq+128* 0], m0 4662*c0909341SAndroid Build Coastguard Worker pmulld m0, m12, [cq+128* 1] 4663*c0909341SAndroid Build Coastguard Worker mova [cq+128* 1], m1 4664*c0909341SAndroid Build Coastguard Worker mova [cq+128* 2], m2 4665*c0909341SAndroid Build Coastguard Worker pmulld m1, m12, [cq+128* 3] 4666*c0909341SAndroid Build Coastguard Worker pmulld m2, m12, [cq+128* 5] 4667*c0909341SAndroid Build Coastguard Worker mova [cq+128* 3], m3 4668*c0909341SAndroid Build Coastguard Worker mova [cq+128* 4], m4 4669*c0909341SAndroid Build Coastguard Worker pmulld m3, m12, [cq+128* 7] 4670*c0909341SAndroid Build Coastguard Worker pmulld m4, m12, [cq+128* 9] 4671*c0909341SAndroid Build Coastguard Worker mova [cq+128* 5], m5 4672*c0909341SAndroid Build Coastguard Worker mova [cq+128* 6], m6 4673*c0909341SAndroid Build Coastguard Worker mova [cq+128* 7], m7 4674*c0909341SAndroid Build Coastguard Worker pmulld m5, m12, [cq+128*11] 4675*c0909341SAndroid Build Coastguard Worker pmulld m6, m12, [cq+128*13] 4676*c0909341SAndroid Build Coastguard Worker pmulld m7, m12, [cq+128*15] 4677*c0909341SAndroid Build Coastguard Worker mova [cq+128* 8], m23 4678*c0909341SAndroid Build Coastguard Worker mova [cq+128* 9], m22 4679*c0909341SAndroid Build Coastguard Worker mova [cq+128*10], m21 4680*c0909341SAndroid Build Coastguard Worker mova [cq+128*11], m20 4681*c0909341SAndroid Build Coastguard Worker mova [cq+128*12], m19 4682*c0909341SAndroid Build Coastguard Worker mova [cq+128*13], m18 4683*c0909341SAndroid Build Coastguard Worker mova [cq+128*14], m17 4684*c0909341SAndroid Build Coastguard Worker mova [cq+128*15], m16 4685*c0909341SAndroid Build Coastguard Worker ret 4686*c0909341SAndroid Build Coastguard Worker 4687*c0909341SAndroid Build Coastguard Workercglobal inv_txfm_add_dct_dct_64x16_10bpc, 4, 7, 0, dst, stride, c, eob 4688*c0909341SAndroid Build Coastguard Worker%undef cmp 4689*c0909341SAndroid Build Coastguard Worker lea r5, [o_base] 4690*c0909341SAndroid Build Coastguard Worker test eobd, eobd 4691*c0909341SAndroid Build Coastguard Worker jz .dconly 4692*c0909341SAndroid Build Coastguard Worker 4693*c0909341SAndroid Build Coastguard Worker PROLOGUE 4, 7, 32, -64*32, dst, stride, c, eob 4694*c0909341SAndroid Build Coastguard Worker%undef cmp 4695*c0909341SAndroid Build Coastguard Worker vpbroadcastd m12, [o(pd_2896)] 4696*c0909341SAndroid Build Coastguard Worker vpbroadcastd m13, [o(pd_2048)] 4697*c0909341SAndroid Build Coastguard Worker vpbroadcastd m14, [o(clip_18b_min)] 4698*c0909341SAndroid Build Coastguard Worker vpbroadcastd m15, [o(clip_18b_max)] 4699*c0909341SAndroid Build Coastguard Worker cmp eobd, 36 4700*c0909341SAndroid Build Coastguard Worker jl .fast ; 8x8 4701*c0909341SAndroid Build Coastguard Worker cmp eobd, 151 4702*c0909341SAndroid Build Coastguard Worker jge .full ; 16x16 4703*c0909341SAndroid Build Coastguard Worker lea r4, [idct64_mul_16bpc] 4704*c0909341SAndroid Build Coastguard Worker lea r6, [rsp+4*64] 4705*c0909341SAndroid Build Coastguard Worker mova m0, [cq+64* 1] 4706*c0909341SAndroid Build Coastguard Worker mova m3, [cq+64*15] 4707*c0909341SAndroid Build Coastguard Worker call .main_part1_fast 4708*c0909341SAndroid Build Coastguard Worker mova m0, [cq+64* 7] 4709*c0909341SAndroid Build Coastguard Worker mova m3, [cq+64* 9] 4710*c0909341SAndroid Build Coastguard Worker call .main_part1_fast 4711*c0909341SAndroid Build Coastguard Worker mova m0, [cq+64* 5] 4712*c0909341SAndroid Build Coastguard Worker mova m3, [cq+64*11] 4713*c0909341SAndroid Build Coastguard Worker call .main_part1_fast 4714*c0909341SAndroid Build Coastguard Worker mova m0, [cq+64* 3] 4715*c0909341SAndroid Build Coastguard Worker mova m3, [cq+64*13] 4716*c0909341SAndroid Build Coastguard Worker call .main_part1_fast 4717*c0909341SAndroid Build Coastguard Worker call .main_part2 4718*c0909341SAndroid Build Coastguard Worker mova m0, [cq+64* 0] 4719*c0909341SAndroid Build Coastguard Worker mova m1, [cq+64* 8] 4720*c0909341SAndroid Build Coastguard Worker mova m16, [cq+64* 4] 4721*c0909341SAndroid Build Coastguard Worker mova m17, [cq+64*12] 4722*c0909341SAndroid Build Coastguard Worker call m(idct_8x16_internal_10bpc).main_fast2 4723*c0909341SAndroid Build Coastguard Worker call m(idct_16x16_internal_10bpc).main_fast2 4724*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_32x16_10bpc).idct16_sumsub 4725*c0909341SAndroid Build Coastguard Worker call .pass1_load_spill 4726*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_32x16_10bpc).main_fast2 4727*c0909341SAndroid Build Coastguard Worker mov r6d, 12*8 4728*c0909341SAndroid Build Coastguard Worker jmp .idct64_end 4729*c0909341SAndroid Build Coastguard Worker.full: 4730*c0909341SAndroid Build Coastguard Worker lea r4, [idct64_mul_16bpc] 4731*c0909341SAndroid Build Coastguard Worker lea r6, [rsp+4*64] 4732*c0909341SAndroid Build Coastguard Worker mova m0, [cq+64* 1] 4733*c0909341SAndroid Build Coastguard Worker mova m1, [cq+64*31] 4734*c0909341SAndroid Build Coastguard Worker mova m2, [cq+64*17] 4735*c0909341SAndroid Build Coastguard Worker mova m3, [cq+64*15] 4736*c0909341SAndroid Build Coastguard Worker call .main_part1 4737*c0909341SAndroid Build Coastguard Worker mova m0, [cq+64* 7] 4738*c0909341SAndroid Build Coastguard Worker mova m1, [cq+64*25] 4739*c0909341SAndroid Build Coastguard Worker mova m2, [cq+64*23] 4740*c0909341SAndroid Build Coastguard Worker mova m3, [cq+64* 9] 4741*c0909341SAndroid Build Coastguard Worker call .main_part1 4742*c0909341SAndroid Build Coastguard Worker mova m0, [cq+64* 5] 4743*c0909341SAndroid Build Coastguard Worker mova m1, [cq+64*27] 4744*c0909341SAndroid Build Coastguard Worker mova m2, [cq+64*21] 4745*c0909341SAndroid Build Coastguard Worker mova m3, [cq+64*11] 4746*c0909341SAndroid Build Coastguard Worker call .main_part1 4747*c0909341SAndroid Build Coastguard Worker mova m0, [cq+64* 3] 4748*c0909341SAndroid Build Coastguard Worker mova m1, [cq+64*29] 4749*c0909341SAndroid Build Coastguard Worker mova m2, [cq+64*19] 4750*c0909341SAndroid Build Coastguard Worker mova m3, [cq+64*13] 4751*c0909341SAndroid Build Coastguard Worker call .main_part1 4752*c0909341SAndroid Build Coastguard Worker call .main_part2 4753*c0909341SAndroid Build Coastguard Worker mova m0, [cq+64* 0] 4754*c0909341SAndroid Build Coastguard Worker mova m1, [cq+64* 8] 4755*c0909341SAndroid Build Coastguard Worker mova m2, [cq+64*16] 4756*c0909341SAndroid Build Coastguard Worker mova m3, [cq+64*24] 4757*c0909341SAndroid Build Coastguard Worker mova m16, [cq+64* 4] 4758*c0909341SAndroid Build Coastguard Worker mova m17, [cq+64*12] 4759*c0909341SAndroid Build Coastguard Worker mova m18, [cq+64*20] 4760*c0909341SAndroid Build Coastguard Worker mova m19, [cq+64*28] 4761*c0909341SAndroid Build Coastguard Worker call m(idct_8x16_internal_10bpc).main_fast 4762*c0909341SAndroid Build Coastguard Worker call m(idct_16x16_internal_10bpc).main_fast 4763*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_32x16_10bpc).idct16_sumsub 4764*c0909341SAndroid Build Coastguard Worker call .pass1_load_spill 4765*c0909341SAndroid Build Coastguard Worker mova m4, [cq+64*18] 4766*c0909341SAndroid Build Coastguard Worker mova m5, [cq+64*22] 4767*c0909341SAndroid Build Coastguard Worker mova m6, [cq+64*26] 4768*c0909341SAndroid Build Coastguard Worker mova m7, [cq+64*30] 4769*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_32x16_10bpc).main_fast 4770*c0909341SAndroid Build Coastguard Worker mov r6d, 28*8 4771*c0909341SAndroid Build Coastguard Worker jmp .idct64_end 4772*c0909341SAndroid Build Coastguard Worker.dconly: 4773*c0909341SAndroid Build Coastguard Worker imul r6d, [cq], 181 4774*c0909341SAndroid Build Coastguard Worker mov [cq], eobd 4775*c0909341SAndroid Build Coastguard Worker or r3d, 16 4776*c0909341SAndroid Build Coastguard Worker.dconly1: 4777*c0909341SAndroid Build Coastguard Worker add r6d, 640 4778*c0909341SAndroid Build Coastguard Worker sar r6d, 10 4779*c0909341SAndroid Build Coastguard Worker.dconly2: 4780*c0909341SAndroid Build Coastguard Worker vpbroadcastd m3, [o(dconly_10bpc)] 4781*c0909341SAndroid Build Coastguard Worker imul r6d, 181 4782*c0909341SAndroid Build Coastguard Worker add r6d, 2176 4783*c0909341SAndroid Build Coastguard Worker sar r6d, 12 4784*c0909341SAndroid Build Coastguard Worker vpbroadcastw m2, r6d 4785*c0909341SAndroid Build Coastguard Worker paddsw m2, m3 4786*c0909341SAndroid Build Coastguard Worker.dconly_loop: 4787*c0909341SAndroid Build Coastguard Worker paddsw m0, m2, [dstq+64*0] 4788*c0909341SAndroid Build Coastguard Worker paddsw m1, m2, [dstq+64*1] 4789*c0909341SAndroid Build Coastguard Worker psubusw m0, m3 4790*c0909341SAndroid Build Coastguard Worker psubusw m1, m3 4791*c0909341SAndroid Build Coastguard Worker mova [dstq+64*0], m0 4792*c0909341SAndroid Build Coastguard Worker mova [dstq+64*1], m1 4793*c0909341SAndroid Build Coastguard Worker add dstq, strideq 4794*c0909341SAndroid Build Coastguard Worker dec r3d 4795*c0909341SAndroid Build Coastguard Worker jg .dconly_loop 4796*c0909341SAndroid Build Coastguard Worker ret 4797*c0909341SAndroid Build Coastguard Worker.pass1_load_spill: 4798*c0909341SAndroid Build Coastguard Worker mova [cq+64* 0], m0 4799*c0909341SAndroid Build Coastguard Worker mova m0, [cq+64* 2] 4800*c0909341SAndroid Build Coastguard Worker mova [cq+64* 2], m1 4801*c0909341SAndroid Build Coastguard Worker mova m1, [cq+64* 6] 4802*c0909341SAndroid Build Coastguard Worker mova [cq+64* 4], m2 4803*c0909341SAndroid Build Coastguard Worker mova [cq+64* 6], m3 4804*c0909341SAndroid Build Coastguard Worker mova m2, [cq+64*10] 4805*c0909341SAndroid Build Coastguard Worker mova m3, [cq+64*14] 4806*c0909341SAndroid Build Coastguard Worker mova [cq+64* 8], m4 4807*c0909341SAndroid Build Coastguard Worker mova [cq+64*10], m5 4808*c0909341SAndroid Build Coastguard Worker mova [cq+64*12], m6 4809*c0909341SAndroid Build Coastguard Worker mova [cq+64*14], m7 4810*c0909341SAndroid Build Coastguard Worker mova [cq+64* 1], m23 4811*c0909341SAndroid Build Coastguard Worker mova [cq+64* 3], m22 4812*c0909341SAndroid Build Coastguard Worker mova [cq+64* 5], m21 4813*c0909341SAndroid Build Coastguard Worker mova [cq+64* 7], m20 4814*c0909341SAndroid Build Coastguard Worker mova [cq+64* 9], m19 4815*c0909341SAndroid Build Coastguard Worker mova [cq+64*11], m18 4816*c0909341SAndroid Build Coastguard Worker mova [cq+64*13], m17 4817*c0909341SAndroid Build Coastguard Worker mova [cq+64*15], m16 4818*c0909341SAndroid Build Coastguard Worker ret 4819*c0909341SAndroid Build Coastguard WorkerALIGN function_align 4820*c0909341SAndroid Build Coastguard Worker.main_part1_fast_rect2: 4821*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m13}, m0, m3 4822*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 12 }, m0, m3 4823*c0909341SAndroid Build Coastguard Worker.main_part1_fast: 4824*c0909341SAndroid Build Coastguard Worker pmulld m7, m0, [r4+4*0]{bcstd} ; t63a 4825*c0909341SAndroid Build Coastguard Worker pmulld m0, [r4+4*1]{bcstd} ; t32a 4826*c0909341SAndroid Build Coastguard Worker pmulld m4, m3, [r4+4*6]{bcstd} ; t60a 4827*c0909341SAndroid Build Coastguard Worker pmulld m3, [r4+4*7]{bcstd} ; t35a 4828*c0909341SAndroid Build Coastguard Worker vpbroadcastd m10, [r4+4*8] 4829*c0909341SAndroid Build Coastguard Worker vpbroadcastd m11, [r4+4*9] 4830*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m13}, m7, m0, m4, m3 4831*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 12 }, m7, m0, m4, m3 4832*c0909341SAndroid Build Coastguard Worker mova m8, m0 4833*c0909341SAndroid Build Coastguard Worker mova m1, m7 4834*c0909341SAndroid Build Coastguard Worker mova m6, m3 4835*c0909341SAndroid Build Coastguard Worker mova m2, m4 4836*c0909341SAndroid Build Coastguard Worker jmp .main_part1b 4837*c0909341SAndroid Build Coastguard Worker.main_part1_rect2: 4838*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m13}, m0, m1, m2, m3 4839*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 12 }, m0, m1, m2, m3 4840*c0909341SAndroid Build Coastguard Worker.main_part1: ; idct64 steps 1-5 4841*c0909341SAndroid Build Coastguard Worker ; in1/31/17/15 -> t32a/33/34a/35/60/61a/62/63a 4842*c0909341SAndroid Build Coastguard Worker ; in7/25/23/ 9 -> t56a/57/58a/59/36/37a/38/39a 4843*c0909341SAndroid Build Coastguard Worker ; in5/27/21/11 -> t40a/41/42a/43/52/53a/54/55a 4844*c0909341SAndroid Build Coastguard Worker ; in3/29/19/13 -> t48a/49/50a/51/44/45a/46/47a 4845*c0909341SAndroid Build Coastguard Worker pmulld m7, m0, [r4+4*0]{bcstd} ; t63a 4846*c0909341SAndroid Build Coastguard Worker pmulld m0, [r4+4*1]{bcstd} ; t32a 4847*c0909341SAndroid Build Coastguard Worker pmulld m6, m1, [r4+4*2]{bcstd} ; t62a 4848*c0909341SAndroid Build Coastguard Worker pmulld m1, [r4+4*3]{bcstd} ; t33a 4849*c0909341SAndroid Build Coastguard Worker pmulld m5, m2, [r4+4*4]{bcstd} ; t61a 4850*c0909341SAndroid Build Coastguard Worker pmulld m2, [r4+4*5]{bcstd} ; t34a 4851*c0909341SAndroid Build Coastguard Worker pmulld m4, m3, [r4+4*6]{bcstd} ; t60a 4852*c0909341SAndroid Build Coastguard Worker pmulld m3, [r4+4*7]{bcstd} ; t35a 4853*c0909341SAndroid Build Coastguard Worker vpbroadcastd m10, [r4+4*8] 4854*c0909341SAndroid Build Coastguard Worker vpbroadcastd m11, [r4+4*9] 4855*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m13}, m7, m0, m6, m1, m5, m2, m4, m3 4856*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 12 }, m0, m1, m7, m6, m2, m3, m5, m4 4857*c0909341SAndroid Build Coastguard Worker psubd m8, m0, m1 ; t33 4858*c0909341SAndroid Build Coastguard Worker paddd m0, m1 ; t32 4859*c0909341SAndroid Build Coastguard Worker psubd m1, m7, m6 ; t62 4860*c0909341SAndroid Build Coastguard Worker paddd m7, m6 ; t63 4861*c0909341SAndroid Build Coastguard Worker psubd m6, m3, m2 ; t34 4862*c0909341SAndroid Build Coastguard Worker paddd m3, m2 ; t35 4863*c0909341SAndroid Build Coastguard Worker psubd m2, m4, m5 ; t61 4864*c0909341SAndroid Build Coastguard Worker paddd m4, m5 ; t60 4865*c0909341SAndroid Build Coastguard Worker.main_part1b: 4866*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m14}, m8, m1, m6, m2 4867*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m15}, m8, m1, m6, m2 4868*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 1, 8, 5, 9, _, 13, 10, 11 ; t33a, t62a 4869*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 2, 6, 5, 9, _, 13, 10, 11, 2 ; t61a, t34a 4870*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m14}, m0, m3, m7, m4 4871*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m15}, m0, m3, m7, m4 4872*c0909341SAndroid Build Coastguard Worker vpbroadcastd m10, [r4+4*10] 4873*c0909341SAndroid Build Coastguard Worker vpbroadcastd m11, [r4+4*11] 4874*c0909341SAndroid Build Coastguard Worker psubd m5, m0, m3 ; t35a 4875*c0909341SAndroid Build Coastguard Worker paddd m0, m3 ; t32a 4876*c0909341SAndroid Build Coastguard Worker psubd m3, m7, m4 ; t60a 4877*c0909341SAndroid Build Coastguard Worker paddd m7, m4 ; t63a 4878*c0909341SAndroid Build Coastguard Worker psubd m4, m1, m6 ; t34 4879*c0909341SAndroid Build Coastguard Worker paddd m1, m6 ; t33 4880*c0909341SAndroid Build Coastguard Worker psubd m6, m8, m2 ; t61 4881*c0909341SAndroid Build Coastguard Worker paddd m8, m2 ; t62 4882*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m14}, m5, m3, m4, m6 4883*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m15}, m5, m3, m4, m6 4884*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 3, 5, 2, 9, _, 13, 10, 11 ; t35, t60 4885*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 6, 4, 2, 9, _, 13, 10, 11 ; t34a, t61a 4886*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m14}, m0, m7, m1, m8 4887*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m15}, m0, m7, m1, m8 4888*c0909341SAndroid Build Coastguard Worker add r4, 4*12 4889*c0909341SAndroid Build Coastguard Worker mova [r6-64*4], m0 4890*c0909341SAndroid Build Coastguard Worker mova [r6+64*3], m7 4891*c0909341SAndroid Build Coastguard Worker mova [r6-64*3], m1 4892*c0909341SAndroid Build Coastguard Worker mova [r6+64*2], m8 4893*c0909341SAndroid Build Coastguard Worker mova [r6-64*2], m6 4894*c0909341SAndroid Build Coastguard Worker mova [r6+64*1], m4 4895*c0909341SAndroid Build Coastguard Worker mova [r6-64*1], m3 4896*c0909341SAndroid Build Coastguard Worker mova [r6+64*0], m5 4897*c0909341SAndroid Build Coastguard Worker add r6, 64*8 4898*c0909341SAndroid Build Coastguard Worker ret 4899*c0909341SAndroid Build Coastguard Worker.main_part2: ; idct64 steps 6-9 4900*c0909341SAndroid Build Coastguard Worker lea r4, [r6+64*3] 4901*c0909341SAndroid Build Coastguard Worker sub r6, 64*4 4902*c0909341SAndroid Build Coastguard Worker vpbroadcastd m10, [pd_1567] 4903*c0909341SAndroid Build Coastguard Worker vpbroadcastd m11, [pd_3784] 4904*c0909341SAndroid Build Coastguard Worker.main_part2_loop: 4905*c0909341SAndroid Build Coastguard Worker mova m0, [r6-64*32] ; t32a 4906*c0909341SAndroid Build Coastguard Worker mova m1, [r4-64*24] ; t39a 4907*c0909341SAndroid Build Coastguard Worker mova m2, [r4-64*32] ; t63a 4908*c0909341SAndroid Build Coastguard Worker mova m3, [r6-64*24] ; t56a 4909*c0909341SAndroid Build Coastguard Worker mova m4, [r6-64*16] ; t40a 4910*c0909341SAndroid Build Coastguard Worker mova m5, [r4-64* 8] ; t47a 4911*c0909341SAndroid Build Coastguard Worker mova m6, [r4-64*16] ; t55a 4912*c0909341SAndroid Build Coastguard Worker mova m7, [r6-64* 8] ; t48a 4913*c0909341SAndroid Build Coastguard Worker psubd m8, m0, m1 ; t39 4914*c0909341SAndroid Build Coastguard Worker paddd m0, m1 ; t32 4915*c0909341SAndroid Build Coastguard Worker psubd m1, m2, m3 ; t56 4916*c0909341SAndroid Build Coastguard Worker paddd m2, m3 ; t63 4917*c0909341SAndroid Build Coastguard Worker psubd m3, m5, m4 ; t40 4918*c0909341SAndroid Build Coastguard Worker paddd m5, m4 ; t47 4919*c0909341SAndroid Build Coastguard Worker psubd m4, m7, m6 ; t55 4920*c0909341SAndroid Build Coastguard Worker paddd m7, m6 ; t48 4921*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m14}, m8, m1, m3, m4 4922*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m15}, m8, m1, m3, m4 4923*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 1, 8, 6, 9, _, 13, 10, 11 ; t39a, t56a 4924*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 4, 3, 6, 9, _, 13, 10, 11, 2 ; t55a, t40a 4925*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m14}, m0, m2, m5, m7 4926*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m15}, m0, m5, m2, m7 4927*c0909341SAndroid Build Coastguard Worker psubd m6, m2, m7 ; t48a 4928*c0909341SAndroid Build Coastguard Worker paddd m2, m7 ; t63a 4929*c0909341SAndroid Build Coastguard Worker psubd m7, m0, m5 ; t47a 4930*c0909341SAndroid Build Coastguard Worker paddd m0, m5 ; t32a 4931*c0909341SAndroid Build Coastguard Worker psubd m5, m8, m4 ; t55 4932*c0909341SAndroid Build Coastguard Worker paddd m8, m4 ; t56 4933*c0909341SAndroid Build Coastguard Worker psubd m4, m1, m3 ; t40 4934*c0909341SAndroid Build Coastguard Worker paddd m1, m3 ; t39 4935*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m14}, m6, m7, m5, m4 4936*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m15}, m6, m7, m5, m4 4937*c0909341SAndroid Build Coastguard Worker REPX {pmulld x, m12}, m6, m7, m5, m4 4938*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m14}, m2, m0, m8, m1 4939*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m15}, m2, m0, m8, m1 4940*c0909341SAndroid Build Coastguard Worker paddd m6, m13 4941*c0909341SAndroid Build Coastguard Worker paddd m5, m13 4942*c0909341SAndroid Build Coastguard Worker psubd m3, m6, m7 ; t47 4943*c0909341SAndroid Build Coastguard Worker paddd m6, m7 ; t48 4944*c0909341SAndroid Build Coastguard Worker psubd m7, m5, m4 ; t40a 4945*c0909341SAndroid Build Coastguard Worker paddd m5, m4 ; t55a 4946*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 12}, m3, m6, m7, m5 4947*c0909341SAndroid Build Coastguard Worker mova [r4-64* 8], m2 4948*c0909341SAndroid Build Coastguard Worker mova [r6-64*32], m0 4949*c0909341SAndroid Build Coastguard Worker mova [r6-64* 8], m8 4950*c0909341SAndroid Build Coastguard Worker mova [r4-64*32], m1 4951*c0909341SAndroid Build Coastguard Worker mova [r4-64*24], m3 4952*c0909341SAndroid Build Coastguard Worker mova [r6-64*16], m6 4953*c0909341SAndroid Build Coastguard Worker mova [r6-64*24], m7 4954*c0909341SAndroid Build Coastguard Worker mova [r4-64*16], m5 4955*c0909341SAndroid Build Coastguard Worker add r6, 64 4956*c0909341SAndroid Build Coastguard Worker sub r4, 64 4957*c0909341SAndroid Build Coastguard Worker cmp r6, r4 4958*c0909341SAndroid Build Coastguard Worker jl .main_part2_loop 4959*c0909341SAndroid Build Coastguard Worker ret 4960*c0909341SAndroid Build Coastguard Worker.idct64_main_end: 4961*c0909341SAndroid Build Coastguard Worker%macro IDCT64_PASS1_END 9 4962*c0909341SAndroid Build Coastguard Worker mova m%5, [%9+%1*128] ; t0+n [idct32] + idct64 rounding 4963*c0909341SAndroid Build Coastguard Worker psubd m%6, m%5, m%2 ; out31-n [idct32] = t31-n [idct64] 4964*c0909341SAndroid Build Coastguard Worker paddd m%5, m%2 ; out0+n [idct32] = t0+n [idct64] 4965*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m14}, m%6, m%5 4966*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m15}, m%6, m%5 4967*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m11}, m%6, m%5 4968*c0909341SAndroid Build Coastguard Worker mova m%2, [r3+%3*64] ; t32+n [idct64] 4969*c0909341SAndroid Build Coastguard Worker mova m%7, [r3+%4*64] ; t63-n [idct64] 4970*c0909341SAndroid Build Coastguard Worker psubd m%8, m%5, m%7 ; out63-n 4971*c0909341SAndroid Build Coastguard Worker paddd m%5, m%7 ; out0+n 4972*c0909341SAndroid Build Coastguard Worker psubd m%7, m%6, m%2 ; out32+n 4973*c0909341SAndroid Build Coastguard Worker paddd m%6, m%2 ; out31-n 4974*c0909341SAndroid Build Coastguard Worker REPX {vpsravd x, m11}, m%8, m%5, m%7, m%6 4975*c0909341SAndroid Build Coastguard Worker%endmacro 4976*c0909341SAndroid Build Coastguard Worker 4977*c0909341SAndroid Build Coastguard Worker%macro IDCT64_PASS1_ENDx4 1 4978*c0909341SAndroid Build Coastguard Worker%assign %%m1 %1 ; t32+n 4979*c0909341SAndroid Build Coastguard Worker%assign %%m2 (7-%1) ; t39-n 4980*c0909341SAndroid Build Coastguard Worker%assign %%m3 (8+%1) ; t40+n 4981*c0909341SAndroid Build Coastguard Worker%assign %%m4 (15-%1) ; t47-n 4982*c0909341SAndroid Build Coastguard Worker%assign %%m5 (16+%1) ; t48+n 4983*c0909341SAndroid Build Coastguard Worker%assign %%m6 (23-%1) ; t55-n 4984*c0909341SAndroid Build Coastguard Worker%assign %%m7 (24+%1) ; t56+n 4985*c0909341SAndroid Build Coastguard Worker%assign %%m8 (31-%1) ; t63-n 4986*c0909341SAndroid Build Coastguard Worker 4987*c0909341SAndroid Build Coastguard Worker%assign %%r1 %1 ; t16+n 4988*c0909341SAndroid Build Coastguard Worker%assign %%r2 (7-%1) ; t23-n 4989*c0909341SAndroid Build Coastguard Worker%assign %%r3 (16+%1) ; t24-n 4990*c0909341SAndroid Build Coastguard Worker%assign %%r4 (23-%1) ; t31-n 4991*c0909341SAndroid Build Coastguard Worker 4992*c0909341SAndroid Build Coastguard Worker%assign %%c1 (%1) ; t0/8+n 4993*c0909341SAndroid Build Coastguard Worker%assign %%c2 (7-%1) ; t7/15-n 4994*c0909341SAndroid Build Coastguard Worker 4995*c0909341SAndroid Build Coastguard Worker IDCT64_PASS1_END %%c1, %%r4, %%m1, %%m8, 24, 25, 26, 27, cq ; out0/31/32/63 4996*c0909341SAndroid Build Coastguard Worker IDCT64_PASS1_END %%c1, %%r1, %%m4, %%m5, 28, 29, 30, 31, r4 ; out15/16/47/48 4997*c0909341SAndroid Build Coastguard Worker packssdw m %+ %%r1, m24, m29 4998*c0909341SAndroid Build Coastguard Worker packssdw m %+ %%r4, m28, m25 4999*c0909341SAndroid Build Coastguard Worker packssdw m26, m31 5000*c0909341SAndroid Build Coastguard Worker packssdw m30, m27 5001*c0909341SAndroid Build Coastguard Worker mova [r3+%%m5*mmsize], m26 5002*c0909341SAndroid Build Coastguard Worker mova [r3+%%m8*mmsize], m30 5003*c0909341SAndroid Build Coastguard Worker IDCT64_PASS1_END %%c2, %%r3, %%m2, %%m7, 24, 25, 26, 27, cq ; out7/24/39/56 5004*c0909341SAndroid Build Coastguard Worker IDCT64_PASS1_END %%c2, %%r2, %%m3, %%m6, 28, 29, 30, 31, r4 ; out8/23/40/55 5005*c0909341SAndroid Build Coastguard Worker packssdw m %+ %%r2, m24, m29 5006*c0909341SAndroid Build Coastguard Worker packssdw m %+ %%r3, m28, m25 5007*c0909341SAndroid Build Coastguard Worker packssdw m26, m31 5008*c0909341SAndroid Build Coastguard Worker packssdw m30, m27 5009*c0909341SAndroid Build Coastguard Worker mova [r3+%%m6*mmsize], m26 5010*c0909341SAndroid Build Coastguard Worker mova [r3+%%m7*mmsize], m30 5011*c0909341SAndroid Build Coastguard Worker%endmacro 5012*c0909341SAndroid Build Coastguard Worker IDCT64_PASS1_ENDx4 0 5013*c0909341SAndroid Build Coastguard Worker IDCT64_PASS1_ENDx4 1 5014*c0909341SAndroid Build Coastguard Worker IDCT64_PASS1_ENDx4 2 5015*c0909341SAndroid Build Coastguard Worker IDCT64_PASS1_ENDx4 3 5016*c0909341SAndroid Build Coastguard Worker ret 5017*c0909341SAndroid Build Coastguard Worker.idct64_end: 5018*c0909341SAndroid Build Coastguard Worker vpbroadcastd m11, [o(pd_2)] 5019*c0909341SAndroid Build Coastguard Worker lea r4, [cq+64] 5020*c0909341SAndroid Build Coastguard Worker mov r3, rsp 5021*c0909341SAndroid Build Coastguard Worker lea r5, [o_base_8bpc] 5022*c0909341SAndroid Build Coastguard Worker call .idct64_main_end 5023*c0909341SAndroid Build Coastguard Worker 5024*c0909341SAndroid Build Coastguard Worker pxor m12, m12 5025*c0909341SAndroid Build Coastguard Worker.zero_loop: 5026*c0909341SAndroid Build Coastguard Worker REPX {mova [cq+r6*8+64*x], m12}, 0, 1, 2, 3 5027*c0909341SAndroid Build Coastguard Worker sub r6d, 8*4 5028*c0909341SAndroid Build Coastguard Worker jge .zero_loop 5029*c0909341SAndroid Build Coastguard Worker 5030*c0909341SAndroid Build Coastguard Worker lea r3, [strideq*3] 5031*c0909341SAndroid Build Coastguard Worker mov r4, dstq 5032*c0909341SAndroid Build Coastguard Worker call .pass2 5033*c0909341SAndroid Build Coastguard Worker mova m0, [rsp+16*mmsize] 5034*c0909341SAndroid Build Coastguard Worker mova m1, [rsp+17*mmsize] 5035*c0909341SAndroid Build Coastguard Worker mova m2, [rsp+18*mmsize] 5036*c0909341SAndroid Build Coastguard Worker mova m3, [rsp+19*mmsize] 5037*c0909341SAndroid Build Coastguard Worker mova m4, [rsp+20*mmsize] 5038*c0909341SAndroid Build Coastguard Worker mova m5, [rsp+21*mmsize] 5039*c0909341SAndroid Build Coastguard Worker mova m6, [rsp+22*mmsize] 5040*c0909341SAndroid Build Coastguard Worker mova m7, [rsp+23*mmsize] 5041*c0909341SAndroid Build Coastguard Worker mova m16, [rsp+24*mmsize] 5042*c0909341SAndroid Build Coastguard Worker mova m17, [rsp+25*mmsize] 5043*c0909341SAndroid Build Coastguard Worker mova m18, [rsp+26*mmsize] 5044*c0909341SAndroid Build Coastguard Worker mova m19, [rsp+27*mmsize] 5045*c0909341SAndroid Build Coastguard Worker mova m20, [rsp+28*mmsize] 5046*c0909341SAndroid Build Coastguard Worker mova m21, [rsp+29*mmsize] 5047*c0909341SAndroid Build Coastguard Worker mova m22, [rsp+30*mmsize] 5048*c0909341SAndroid Build Coastguard Worker mova m23, [rsp+31*mmsize] 5049*c0909341SAndroid Build Coastguard Worker lea dstq, [r4+64] 5050*c0909341SAndroid Build Coastguard Worker call .pass2 5051*c0909341SAndroid Build Coastguard Worker RET 5052*c0909341SAndroid Build Coastguard Worker.pass2: 5053*c0909341SAndroid Build Coastguard Worker psrlq m12, [permC], 24 ; 0 2 8 10 1 3 9 11 5054*c0909341SAndroid Build Coastguard Worker psrlq m13, m12, 32 ; 4 6 12 14 5 7 13 15 5055*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_32x16_10bpc).transpose_16x32 5056*c0909341SAndroid Build Coastguard Worker 5057*c0909341SAndroid Build Coastguard Worker punpckhqdq m19, m5, m16 ; 11 5058*c0909341SAndroid Build Coastguard Worker punpcklqdq m5, m16 ; 10 5059*c0909341SAndroid Build Coastguard Worker punpckhqdq m16, m2, m1 ; 5 5060*c0909341SAndroid Build Coastguard Worker punpcklqdq m2, m1 ; 4 5061*c0909341SAndroid Build Coastguard Worker punpcklqdq m1, m15, m4 ; 2 5062*c0909341SAndroid Build Coastguard Worker punpckhqdq m15, m4 ; 3 5063*c0909341SAndroid Build Coastguard Worker punpcklqdq m4, m14, m18 ; 8 5064*c0909341SAndroid Build Coastguard Worker punpckhqdq m18, m14, m18 ; 9 5065*c0909341SAndroid Build Coastguard Worker punpckhqdq m14, m0, m20 ; 1 5066*c0909341SAndroid Build Coastguard Worker punpcklqdq m0, m20 ; 0 5067*c0909341SAndroid Build Coastguard Worker punpckhqdq m20, m6, m17 ; 13 5068*c0909341SAndroid Build Coastguard Worker punpcklqdq m6, m17 ; 12 5069*c0909341SAndroid Build Coastguard Worker punpckhqdq m17, m3, m21 ; 7 5070*c0909341SAndroid Build Coastguard Worker punpcklqdq m3, m21 ; 6 5071*c0909341SAndroid Build Coastguard Worker punpckhqdq m21, m7, m8 ; 15 5072*c0909341SAndroid Build Coastguard Worker punpcklqdq m7, m8 ; 14 5073*c0909341SAndroid Build Coastguard Worker 5074*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_32x8_8bpc).main 5075*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_32x16_8bpc).main_oddhalf 5076*c0909341SAndroid Build Coastguard Worker.write: 5077*c0909341SAndroid Build Coastguard Worker vpbroadcastd m11, [pw_2048] 5078*c0909341SAndroid Build Coastguard Worker pxor m12, m12 5079*c0909341SAndroid Build Coastguard Worker vpbroadcastd m13, [pixel_10bpc_max] 5080*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_32x8_10bpc).write_32x8 5081*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m11, m14 5082*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m11, m15 5083*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m11, m16 5084*c0909341SAndroid Build Coastguard Worker pmulhrsw m3, m11, m17 5085*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_32x8_10bpc).write_32x4 5086*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m11, m18 5087*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m11, m19 5088*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m11, m20 5089*c0909341SAndroid Build Coastguard Worker pmulhrsw m3, m11, m21 5090*c0909341SAndroid Build Coastguard Worker jmp m(inv_txfm_add_dct_dct_32x8_10bpc).write_32x4 5091*c0909341SAndroid Build Coastguard Worker.fast: ; 8x8 packed 5092*c0909341SAndroid Build Coastguard Worker movshdup m7, [o(permB)] 5093*c0909341SAndroid Build Coastguard Worker mova ym0, [cq+64*1] 5094*c0909341SAndroid Build Coastguard Worker mova ym2, [cq+64*5] 5095*c0909341SAndroid Build Coastguard Worker mova ym3, [cq+64*3] 5096*c0909341SAndroid Build Coastguard Worker mova ym1, [cq+64*7] 5097*c0909341SAndroid Build Coastguard Worker vpermt2q m0, m7, m2 ; 1 5 5098*c0909341SAndroid Build Coastguard Worker vpermt2q m1, m7, m3 ; 7 3 5099*c0909341SAndroid Build Coastguard Worker call .main_oddhalf_packed 5100*c0909341SAndroid Build Coastguard Worker mova [rsp+ 0*mmsize], m0 5101*c0909341SAndroid Build Coastguard Worker mova [rsp+ 1*mmsize], m1 5102*c0909341SAndroid Build Coastguard Worker mova [rsp+ 2*mmsize], m2 5103*c0909341SAndroid Build Coastguard Worker mova [rsp+ 3*mmsize], m3 5104*c0909341SAndroid Build Coastguard Worker mova [rsp+ 4*mmsize], m4 5105*c0909341SAndroid Build Coastguard Worker mova [rsp+ 5*mmsize], m5 5106*c0909341SAndroid Build Coastguard Worker mova [rsp+ 6*mmsize], m6 5107*c0909341SAndroid Build Coastguard Worker mova [rsp+ 7*mmsize], m7 5108*c0909341SAndroid Build Coastguard Worker mova [rsp+ 8*mmsize], m16 5109*c0909341SAndroid Build Coastguard Worker mova [rsp+ 9*mmsize], m17 5110*c0909341SAndroid Build Coastguard Worker mova [rsp+10*mmsize], m18 5111*c0909341SAndroid Build Coastguard Worker mova [rsp+11*mmsize], m19 5112*c0909341SAndroid Build Coastguard Worker mova [rsp+12*mmsize], m20 5113*c0909341SAndroid Build Coastguard Worker mova [rsp+13*mmsize], m21 5114*c0909341SAndroid Build Coastguard Worker mova [rsp+14*mmsize], m22 5115*c0909341SAndroid Build Coastguard Worker mova [rsp+15*mmsize], m23 5116*c0909341SAndroid Build Coastguard Worker 5117*c0909341SAndroid Build Coastguard Worker movshdup m7, [o(permB)] 5118*c0909341SAndroid Build Coastguard Worker mova ym0, [cq+64*0] 5119*c0909341SAndroid Build Coastguard Worker mova ym4, [cq+64*4] 5120*c0909341SAndroid Build Coastguard Worker mova ym16, [cq+64*2] 5121*c0909341SAndroid Build Coastguard Worker mova ym5, [cq+64*6] 5122*c0909341SAndroid Build Coastguard Worker vpermt2q m16, m7, m5 ; 2 6 5123*c0909341SAndroid Build Coastguard Worker vpermq m0, m7, m0 ; 0 0 5124*c0909341SAndroid Build Coastguard Worker vpermq m4, m7, m4 ; 4 4 5125*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_32x8_10bpc).main_fast3 5126*c0909341SAndroid Build Coastguard Worker ; m0-7,9,16-22 contain un-sumsub'ed dct32 output data 5127*c0909341SAndroid Build Coastguard Worker 5128*c0909341SAndroid Build Coastguard Worker ; zero input coefs 5129*c0909341SAndroid Build Coastguard Worker pxor m12, m12 5130*c0909341SAndroid Build Coastguard Worker REPX {mova [cq+x*64], ym12}, 0, 1, 2, 3, 4, 5, 6, 7 5131*c0909341SAndroid Build Coastguard Worker 5132*c0909341SAndroid Build Coastguard Worker vpbroadcastd m11, [o(pd_2)] 5133*c0909341SAndroid Build Coastguard Worker call .main_end 5134*c0909341SAndroid Build Coastguard Worker lea r3, [strideq*3] 5135*c0909341SAndroid Build Coastguard Worker mov r4, dstq 5136*c0909341SAndroid Build Coastguard Worker call .pass2_fast 5137*c0909341SAndroid Build Coastguard Worker mova m0, m24 5138*c0909341SAndroid Build Coastguard Worker mova m1, m25 5139*c0909341SAndroid Build Coastguard Worker mova m2, m26 5140*c0909341SAndroid Build Coastguard Worker mova m3, m27 5141*c0909341SAndroid Build Coastguard Worker mova m4, m28 5142*c0909341SAndroid Build Coastguard Worker mova m5, m29 5143*c0909341SAndroid Build Coastguard Worker mova m6, m30 5144*c0909341SAndroid Build Coastguard Worker mova m7, m31 5145*c0909341SAndroid Build Coastguard Worker lea dstq, [r4+64] 5146*c0909341SAndroid Build Coastguard Worker lea r5, [o_base] 5147*c0909341SAndroid Build Coastguard Worker call .pass2_fast 5148*c0909341SAndroid Build Coastguard Worker RET 5149*c0909341SAndroid Build Coastguard Worker.pass2_fast: 5150*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_32x8_10bpc).transpose_8x32 5151*c0909341SAndroid Build Coastguard Worker lea r5, [o_base_8bpc] 5152*c0909341SAndroid Build Coastguard Worker punpckhqdq m14, m0, m2 ; 1 5153*c0909341SAndroid Build Coastguard Worker punpcklqdq m0, m2 ; 0 5154*c0909341SAndroid Build Coastguard Worker punpcklqdq m1, m3, m4 ; 2 5155*c0909341SAndroid Build Coastguard Worker punpckhqdq m15, m3, m4 ; 3 5156*c0909341SAndroid Build Coastguard Worker punpcklqdq m2, m5, m7 ; 4 5157*c0909341SAndroid Build Coastguard Worker punpckhqdq m16, m5, m7 ; 5 5158*c0909341SAndroid Build Coastguard Worker punpcklqdq m3, m6, m8 ; 6 5159*c0909341SAndroid Build Coastguard Worker punpckhqdq m17, m6, m8 ; 7 5160*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_32x16_8bpc).main_oddhalf_fast 5161*c0909341SAndroid Build Coastguard Worker jmp .write 5162*c0909341SAndroid Build Coastguard Worker.main_end: 5163*c0909341SAndroid Build Coastguard Worker 5164*c0909341SAndroid Build Coastguard Worker%macro IDCT64_PASS1_PACKED_END 7 5165*c0909341SAndroid Build Coastguard Worker psubd m%5, m%1, m%2 ; out31-n [idct32] = t31-n [idct64] 5166*c0909341SAndroid Build Coastguard Worker paddd m%1, m%2 ; out0+n [idct32] = t0+n [idct64] 5167*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m14}, m%5, m%1 5168*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m15}, m%5, m%1 5169*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m11}, m%5, m%1 5170*c0909341SAndroid Build Coastguard Worker mova m%2, [rsp+%6*64+gprsize] ; t32+n [idct64] 5171*c0909341SAndroid Build Coastguard Worker mova m%3, [rsp+%7*64+gprsize] ; t63-n [idct64] 5172*c0909341SAndroid Build Coastguard Worker psubd m%4, m%1, m%3 ; out63-n 5173*c0909341SAndroid Build Coastguard Worker paddd m%1, m%3 ; out0+n 5174*c0909341SAndroid Build Coastguard Worker psubd m%3, m%5, m%2 ; out32+n 5175*c0909341SAndroid Build Coastguard Worker paddd m%2, m%5 ; out31-n 5176*c0909341SAndroid Build Coastguard Worker REPX {vpsravd x, m11}, m%4, m%1, m%3, m%2 5177*c0909341SAndroid Build Coastguard Worker%endmacro 5178*c0909341SAndroid Build Coastguard Worker 5179*c0909341SAndroid Build Coastguard Worker IDCT64_PASS1_PACKED_END 0, 22, 24, 10, 12, 0, 15 ; out0/1,31/30,32/33,63/62 5180*c0909341SAndroid Build Coastguard Worker IDCT64_PASS1_PACKED_END 7, 9, 31, 13, 12, 7, 8 ; out15/14,16/17,47/46,48/49 5181*c0909341SAndroid Build Coastguard Worker packssdw m0, m9 5182*c0909341SAndroid Build Coastguard Worker packssdw m7, m22 5183*c0909341SAndroid Build Coastguard Worker packssdw m24, m13 5184*c0909341SAndroid Build Coastguard Worker packssdw m31, m10 5185*c0909341SAndroid Build Coastguard Worker IDCT64_PASS1_PACKED_END 1, 21, 25, 10, 12, 1, 14 ; out3/2,28/29,35/34,60/61 5186*c0909341SAndroid Build Coastguard Worker IDCT64_PASS1_PACKED_END 6, 16, 30, 13, 12, 6, 9 ; out12/13,19/18,44/45,51/50 5187*c0909341SAndroid Build Coastguard Worker packssdw m1, m16 5188*c0909341SAndroid Build Coastguard Worker packssdw m6, m21 5189*c0909341SAndroid Build Coastguard Worker packssdw m25, m13 5190*c0909341SAndroid Build Coastguard Worker packssdw m30, m10 5191*c0909341SAndroid Build Coastguard Worker IDCT64_PASS1_PACKED_END 2, 20, 26, 10, 12, 2, 13 ; out4/5,27/26,36/37,59/58 5192*c0909341SAndroid Build Coastguard Worker IDCT64_PASS1_PACKED_END 5, 17, 29, 13, 12, 5, 10 ; out11/10,20/21,43/42,52/53 5193*c0909341SAndroid Build Coastguard Worker packssdw m2, m17 5194*c0909341SAndroid Build Coastguard Worker packssdw m5, m20 5195*c0909341SAndroid Build Coastguard Worker packssdw m26, m13 5196*c0909341SAndroid Build Coastguard Worker packssdw m29, m10 5197*c0909341SAndroid Build Coastguard Worker IDCT64_PASS1_PACKED_END 3, 19, 27, 10, 12, 3, 12 ; out7/6,24/25,39/38,56/57 5198*c0909341SAndroid Build Coastguard Worker IDCT64_PASS1_PACKED_END 4, 18, 28, 13, 12, 4, 11 ; out8/9,23/22,40/41,55/54 5199*c0909341SAndroid Build Coastguard Worker packssdw m3, m18 5200*c0909341SAndroid Build Coastguard Worker packssdw m4, m19 5201*c0909341SAndroid Build Coastguard Worker packssdw m27, m13 5202*c0909341SAndroid Build Coastguard Worker packssdw m28, m10 5203*c0909341SAndroid Build Coastguard Worker ret 5204*c0909341SAndroid Build Coastguard Worker.main_oddhalf_packed_rect2: 5205*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m13}, m0, m1 5206*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 12 }, m0, m1 5207*c0909341SAndroid Build Coastguard Worker.main_oddhalf_packed: 5208*c0909341SAndroid Build Coastguard Worker ; m0=in1 in5, m1=in7 in3 5209*c0909341SAndroid Build Coastguard Worker vbroadcasti32x4 m2, [o(pd_101_501)] 5210*c0909341SAndroid Build Coastguard Worker vbroadcasti32x4 m3, [o(pd_m700_m301)] 5211*c0909341SAndroid Build Coastguard Worker vbroadcasti32x4 m4, [o(pd_4095_4065)] 5212*c0909341SAndroid Build Coastguard Worker vbroadcasti32x4 m5, [o(pd_4036_4085)] 5213*c0909341SAndroid Build Coastguard Worker pmulld m2, m0 5214*c0909341SAndroid Build Coastguard Worker pmulld m3, m1 5215*c0909341SAndroid Build Coastguard Worker pmulld m0, m4 5216*c0909341SAndroid Build Coastguard Worker pmulld m1, m5 5217*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m13}, m2, m3, m0, m1 5218*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 12 }, m2, m3, m0, m1 5219*c0909341SAndroid Build Coastguard Worker 5220*c0909341SAndroid Build Coastguard Worker ; m2=t32a t40a -> t32/33 t40/41, m3=t39a t47a -> t38/39 t46/47 5221*c0909341SAndroid Build Coastguard Worker ; m0=t63a t55a -> t62/63 t54/55, m1=t56a t48a -> t56/57 t48/49 5222*c0909341SAndroid Build Coastguard Worker ; end of step 1-2 5223*c0909341SAndroid Build Coastguard Worker 5224*c0909341SAndroid Build Coastguard Worker vbroadcasti32x4 m10, [o(pd_401_1931)] 5225*c0909341SAndroid Build Coastguard Worker vbroadcasti32x4 m11, [o(pd_4076_3612)] 5226*c0909341SAndroid Build Coastguard Worker mova m4, m0 5227*c0909341SAndroid Build Coastguard Worker mova m5, m2 5228*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 4, 5, 8, 9, _, 13, 10, 11 5229*c0909341SAndroid Build Coastguard Worker vbroadcasti32x4 m10, [o(pd_3166_3920)] 5230*c0909341SAndroid Build Coastguard Worker vbroadcasti32x4 m11, [o(pd_2598_1189)] 5231*c0909341SAndroid Build Coastguard Worker mova m6, m3 5232*c0909341SAndroid Build Coastguard Worker mova m7, m1 5233*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 7, 6, 8, 9, _, 13, 10, 11, 2 5234*c0909341SAndroid Build Coastguard Worker 5235*c0909341SAndroid Build Coastguard Worker ; m4=t33a t41a -> t41/42 t33/34, m5=t63a t54a -> t61/62 t53/54 5236*c0909341SAndroid Build Coastguard Worker ; m6=t38a t46a -> t37/38 t45/46, m7=t57a t49a -> t57/58 t49/50 5237*c0909341SAndroid Build Coastguard Worker ; and from earlier: 5238*c0909341SAndroid Build Coastguard Worker ; m0=t63 t55 -> t60/63a t52/55a, m1=t56 t48 -> t56/59a t48/51a 5239*c0909341SAndroid Build Coastguard Worker ; m2=t32 t40 -> t32/35a t40/43a, m3=t39 t47 -> t36/39a t44/47a 5240*c0909341SAndroid Build Coastguard Worker ; end of step 3-4 5241*c0909341SAndroid Build Coastguard Worker 5242*c0909341SAndroid Build Coastguard Worker punpcklqdq m22, m2, m4 ; t32a/33 or t35a/34 5243*c0909341SAndroid Build Coastguard Worker punpcklqdq m21, m3, m6 ; t36a/37 or t39a/38 5244*c0909341SAndroid Build Coastguard Worker punpckhqdq m18, m2, m4 ; t40a/41 or t43a/42 5245*c0909341SAndroid Build Coastguard Worker punpckhqdq m17, m3, m6 ; t44a/45 or t47a/46 5246*c0909341SAndroid Build Coastguard Worker punpckhqdq m6, m1, m7 ; t48a/49 or t51a/50 5247*c0909341SAndroid Build Coastguard Worker punpckhqdq m19, m0, m5 ; t52a/53 or t55a/54 5248*c0909341SAndroid Build Coastguard Worker punpcklqdq m8, m1, m7 ; t56a/57 or t59a/58 5249*c0909341SAndroid Build Coastguard Worker punpcklqdq m23, m0, m5 ; t60a/61 or t63a/62 5250*c0909341SAndroid Build Coastguard Worker mova m0, m22 5251*c0909341SAndroid Build Coastguard Worker mova m7, m21 5252*c0909341SAndroid Build Coastguard Worker mova m3, m18 5253*c0909341SAndroid Build Coastguard Worker mova m16, m17 5254*c0909341SAndroid Build Coastguard Worker mova m5, m6 5255*c0909341SAndroid Build Coastguard Worker mova m4, m19 5256*c0909341SAndroid Build Coastguard Worker mova m2, m8 5257*c0909341SAndroid Build Coastguard Worker mova m1, m23 5258*c0909341SAndroid Build Coastguard Worker ; m0/22/7/21,18/3/17/16,6/5/19/4,2/8/1/23: t32-63[a] 5259*c0909341SAndroid Build Coastguard Worker 5260*c0909341SAndroid Build Coastguard Worker ; step5 5261*c0909341SAndroid Build Coastguard Worker vpbroadcastd m10, [o(pd_799)] 5262*c0909341SAndroid Build Coastguard Worker vpbroadcastd m11, [o(pd_4017)] 5263*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 1, 22, 20, 9, _, 13, 10, 11 ; t35/34a, t60/61a 5264*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 8, 7, 20, 9, _, 13, 10, 11, 2 ; t59/58a, t36/37a 5265*c0909341SAndroid Build Coastguard Worker vpbroadcastd m10, [o(pd_3406)] 5266*c0909341SAndroid Build Coastguard Worker vpbroadcastd m11, [o(pd_2276)] 5267*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 19, 3, 20, 9, _, 13, 10, 11 ; t43/42a, t52/53a 5268*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 5, 17, 20, 9, _, 13, 10, 11, 2 ; t51/50a, t44/45a 5269*c0909341SAndroid Build Coastguard Worker ; m0-1/7/21: t32-39[a], m18-19/17-16: t40-47[a] 5270*c0909341SAndroid Build Coastguard Worker ; m6-5/3-4: t48-55[a], m2/8/22-23: t56-63[a] 5271*c0909341SAndroid Build Coastguard Worker 5272*c0909341SAndroid Build Coastguard Worker ; step6 5273*c0909341SAndroid Build Coastguard Worker psubd m20, m0, m21 ; t39/38a 5274*c0909341SAndroid Build Coastguard Worker paddd m0, m21 ; t32/33a 5275*c0909341SAndroid Build Coastguard Worker psubd m21, m1, m7 ; t36a/37 5276*c0909341SAndroid Build Coastguard Worker paddd m1, m7 ; t35a/34 5277*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m14}, m20, m0, m21, m1 5278*c0909341SAndroid Build Coastguard Worker psubd m7, m16, m18 ; t40/41a 5279*c0909341SAndroid Build Coastguard Worker paddd m16, m18 ; t47/46a 5280*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m15}, m20, m0, m21, m1 5281*c0909341SAndroid Build Coastguard Worker psubd m18, m17, m19 ; t43a/42 5282*c0909341SAndroid Build Coastguard Worker paddd m17, m19 ; t44a/45 5283*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m14}, m7, m16, m18, m17 5284*c0909341SAndroid Build Coastguard Worker psubd m19, m6, m4 ; t55/54a 5285*c0909341SAndroid Build Coastguard Worker paddd m6, m4 ; t48/49a 5286*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m15}, m7, m16, m18, m17 5287*c0909341SAndroid Build Coastguard Worker psubd m4, m5, m3 ; t52a/53 5288*c0909341SAndroid Build Coastguard Worker paddd m5, m3 ; t51a/50 5289*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m14}, m19, m6, m4, m5 5290*c0909341SAndroid Build Coastguard Worker psubd m3, m23, m2 ; t56/57a 5291*c0909341SAndroid Build Coastguard Worker paddd m23, m2 ; t63/62a 5292*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m15}, m19, m6, m4, m5 5293*c0909341SAndroid Build Coastguard Worker psubd m2, m22, m8 ; t59a/58 5294*c0909341SAndroid Build Coastguard Worker paddd m22, m8 ; t60a/61 5295*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m14}, m3, m23, m2, m22 5296*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m15}, m3, m23, m2, m22 5297*c0909341SAndroid Build Coastguard Worker ; m0-1: t32-35[a], m17-16: t44-47[a], m6-5: t48-51[a], m22-23: t60-63[a] 5298*c0909341SAndroid Build Coastguard Worker ; m21-20: t36-39[a], m7/18: t40-43[a], m4/19: t52-55[a], m3-2: t56-59[a] 5299*c0909341SAndroid Build Coastguard Worker 5300*c0909341SAndroid Build Coastguard Worker ; step7 5301*c0909341SAndroid Build Coastguard Worker vpbroadcastd m10, [o(pd_1567)] 5302*c0909341SAndroid Build Coastguard Worker vpbroadcastd m11, [o(pd_3784)] 5303*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 2, 21, 8, 9, _, 13, 10, 11 ; t36/37a, t59/58a 5304*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 3, 20, 8, 9, _, 13, 10, 11 ; t39a/38, t56a/57 5305*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 19, 7, 8, 9, _, 13, 10, 11, 2 ; t55a/54, t40a/41 5306*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 4, 18, 8, 9, _, 13, 10, 11, 2 ; t52/53a, t43/42a 5307*c0909341SAndroid Build Coastguard Worker ; m0-3: t32-39[a], m7,18-16: t40-47[a], m6-4,19: t48-55[a], m20-23: t56-63[a] 5308*c0909341SAndroid Build Coastguard Worker 5309*c0909341SAndroid Build Coastguard Worker ; step8 5310*c0909341SAndroid Build Coastguard Worker psubd m8, m0, m16 ; t47a/46 5311*c0909341SAndroid Build Coastguard Worker paddd m0, m16 ; t32a/33 5312*c0909341SAndroid Build Coastguard Worker psubd m16, m1, m17 ; t44/45a 5313*c0909341SAndroid Build Coastguard Worker paddd m1, m17 ; t35/34a 5314*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m14}, m8, m0, m16, m1 5315*c0909341SAndroid Build Coastguard Worker psubd m17, m2, m18 ; t43a/42 5316*c0909341SAndroid Build Coastguard Worker paddd m2, m18 ; t36a/37 5317*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m15}, m8, m0, m16, m1 5318*c0909341SAndroid Build Coastguard Worker psubd m18, m3, m7 ; t40/41a 5319*c0909341SAndroid Build Coastguard Worker paddd m3, m7 ; t39/38a 5320*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m14}, m17, m2, m18, m3 5321*c0909341SAndroid Build Coastguard Worker psubd m7, m23, m6 ; t48a/49 5322*c0909341SAndroid Build Coastguard Worker paddd m23, m6 ; t63a/62 5323*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m15}, m17, m2, m18, m3 5324*c0909341SAndroid Build Coastguard Worker psubd m6, m22, m5 ; t51/50a 5325*c0909341SAndroid Build Coastguard Worker paddd m22, m5 ; t60/61a 5326*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m14}, m7, m23, m6, m22 5327*c0909341SAndroid Build Coastguard Worker psubd m5, m21, m4 ; t52a/53 5328*c0909341SAndroid Build Coastguard Worker paddd m21, m4 ; t59a/58 5329*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m15}, m7, m23, m6, m22 5330*c0909341SAndroid Build Coastguard Worker psubd m4, m20, m19 ; t55/54a 5331*c0909341SAndroid Build Coastguard Worker paddd m20, m19 ; t56/57a 5332*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m14}, m5, m21, m4, m20 5333*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m15}, m5, m21, m4, m20 5334*c0909341SAndroid Build Coastguard Worker ; m0-3=t32-39[a], m18-16,8: t40-47[a], m7-4=t48-55[a], m20-23=t56-63[a] 5335*c0909341SAndroid Build Coastguard Worker 5336*c0909341SAndroid Build Coastguard Worker ; step9 5337*c0909341SAndroid Build Coastguard Worker REPX {pmulld x, m12}, m4, m18, m5, m17, m6, m16, m7, m8 5338*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m13}, m4, m5, m6, m7 5339*c0909341SAndroid Build Coastguard Worker paddd m19, m4, m18 ; t55a/54 5340*c0909341SAndroid Build Coastguard Worker psubd m4, m18 ; t40a/41 5341*c0909341SAndroid Build Coastguard Worker paddd m18, m5, m17 ; t52/53a 5342*c0909341SAndroid Build Coastguard Worker psubd m5, m17 ; t43/42a 5343*c0909341SAndroid Build Coastguard Worker paddd m17, m6, m16 ; t51a/50 5344*c0909341SAndroid Build Coastguard Worker psubd m6, m16 ; t44a/45 5345*c0909341SAndroid Build Coastguard Worker paddd m16, m7, m8 ; t48/49a 5346*c0909341SAndroid Build Coastguard Worker psubd m7, m8 ; t47/46a 5347*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 12 }, m19, m4, m18, m5, m17, m6, m16, m7 5348*c0909341SAndroid Build Coastguard Worker ; m4-7=t40-47[a], m16-19=t48-55[a] 5349*c0909341SAndroid Build Coastguard Worker ret 5350*c0909341SAndroid Build Coastguard Worker 5351*c0909341SAndroid Build Coastguard Workercglobal inv_txfm_add_dct_dct_64x32_10bpc, 4, 7, 0, dst, stride, c, eob 5352*c0909341SAndroid Build Coastguard Worker lea r5, [o_base] 5353*c0909341SAndroid Build Coastguard Worker test eobd, eobd 5354*c0909341SAndroid Build Coastguard Worker jz .dconly 5355*c0909341SAndroid Build Coastguard Worker 5356*c0909341SAndroid Build Coastguard Worker PROLOGUE 4, 8, 32, -64*32, dst, stride, c, eob 5357*c0909341SAndroid Build Coastguard Worker%undef cmp 5358*c0909341SAndroid Build Coastguard Worker vpbroadcastd m12, [o(pd_2896)] 5359*c0909341SAndroid Build Coastguard Worker vpbroadcastd m13, [o(pd_2048)] 5360*c0909341SAndroid Build Coastguard Worker vpbroadcastd m14, [o(clip_18b_min)] 5361*c0909341SAndroid Build Coastguard Worker vpbroadcastd m15, [o(clip_18b_max)] 5362*c0909341SAndroid Build Coastguard Worker cmp eobd, 136 5363*c0909341SAndroid Build Coastguard Worker jl .fast 5364*c0909341SAndroid Build Coastguard Worker add cq, 64 5365*c0909341SAndroid Build Coastguard Worker cmp eobd, 543 5366*c0909341SAndroid Build Coastguard Worker jge .full 5367*c0909341SAndroid Build Coastguard Worker call .pass1_fast ; bottomright 16x16 zero 5368*c0909341SAndroid Build Coastguard Worker mov r7d, 16*12 5369*c0909341SAndroid Build Coastguard Worker jmp .lefthalf 5370*c0909341SAndroid Build Coastguard Worker.full: 5371*c0909341SAndroid Build Coastguard Worker call .pass1 5372*c0909341SAndroid Build Coastguard Worker mov r7d, 16*28 5373*c0909341SAndroid Build Coastguard Worker.lefthalf: 5374*c0909341SAndroid Build Coastguard Worker mova [cq+128* 0], m0 5375*c0909341SAndroid Build Coastguard Worker mova [cq+128* 1], m1 5376*c0909341SAndroid Build Coastguard Worker mova [cq+128* 2], m2 5377*c0909341SAndroid Build Coastguard Worker mova [cq+128* 3], m3 5378*c0909341SAndroid Build Coastguard Worker mova [cq+128* 4], m14 5379*c0909341SAndroid Build Coastguard Worker mova [cq+128* 5], m15 5380*c0909341SAndroid Build Coastguard Worker mova [cq+128* 6], m16 5381*c0909341SAndroid Build Coastguard Worker mova [cq+128* 7], m17 5382*c0909341SAndroid Build Coastguard Worker mova [cq+128* 8], m22 5383*c0909341SAndroid Build Coastguard Worker mova [cq+128* 9], m23 5384*c0909341SAndroid Build Coastguard Worker mova [cq+128*10], m24 5385*c0909341SAndroid Build Coastguard Worker mova [cq+128*11], m25 5386*c0909341SAndroid Build Coastguard Worker mova [cq+128*12], m26 5387*c0909341SAndroid Build Coastguard Worker mova [cq+128*13], m27 5388*c0909341SAndroid Build Coastguard Worker mova [cq+128*14], m28 5389*c0909341SAndroid Build Coastguard Worker mova [cq+128*15], m29 5390*c0909341SAndroid Build Coastguard Worker sub cq, 64 5391*c0909341SAndroid Build Coastguard Worker vpbroadcastd m12, [o(pd_2896)] 5392*c0909341SAndroid Build Coastguard Worker vpbroadcastd m13, [o(pd_2048)] 5393*c0909341SAndroid Build Coastguard Worker vpbroadcastd m14, [o(clip_18b_min)] 5394*c0909341SAndroid Build Coastguard Worker vpbroadcastd m15, [o(clip_18b_max)] 5395*c0909341SAndroid Build Coastguard Worker sub rsp, 16*64 5396*c0909341SAndroid Build Coastguard Worker call .pass1 5397*c0909341SAndroid Build Coastguard Worker add rsp, 16*64 5398*c0909341SAndroid Build Coastguard Worker lea r5, [o_base_8bpc] 5399*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_32x32_10bpc).pass2_start 5400*c0909341SAndroid Build Coastguard Worker mov r4, dstq 5401*c0909341SAndroid Build Coastguard Worker pxor m12, m12 5402*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_32x32_10bpc).pass2_end 5403*c0909341SAndroid Build Coastguard Worker lea dstq, [r4+64] 5404*c0909341SAndroid Build Coastguard Worker mova m0, [rsp+16*mmsize] 5405*c0909341SAndroid Build Coastguard Worker mova m1, [rsp+17*mmsize] 5406*c0909341SAndroid Build Coastguard Worker mova m2, [rsp+18*mmsize] 5407*c0909341SAndroid Build Coastguard Worker mova m3, [rsp+19*mmsize] 5408*c0909341SAndroid Build Coastguard Worker mova m4, [rsp+20*mmsize] 5409*c0909341SAndroid Build Coastguard Worker mova m5, [rsp+21*mmsize] 5410*c0909341SAndroid Build Coastguard Worker mova m6, [rsp+22*mmsize] 5411*c0909341SAndroid Build Coastguard Worker mova m7, [rsp+23*mmsize] 5412*c0909341SAndroid Build Coastguard Worker mova m16, [rsp+24*mmsize] 5413*c0909341SAndroid Build Coastguard Worker mova m17, [rsp+25*mmsize] 5414*c0909341SAndroid Build Coastguard Worker mova m18, [rsp+26*mmsize] 5415*c0909341SAndroid Build Coastguard Worker mova m19, [rsp+27*mmsize] 5416*c0909341SAndroid Build Coastguard Worker mova m20, [rsp+28*mmsize] 5417*c0909341SAndroid Build Coastguard Worker mova m21, [rsp+29*mmsize] 5418*c0909341SAndroid Build Coastguard Worker mova m22, [rsp+30*mmsize] 5419*c0909341SAndroid Build Coastguard Worker mova m23, [rsp+31*mmsize] 5420*c0909341SAndroid Build Coastguard Worker call .transpose 5421*c0909341SAndroid Build Coastguard Worker mova [cq+128* 0+64], m0 5422*c0909341SAndroid Build Coastguard Worker mova [cq+128* 1+64], m1 5423*c0909341SAndroid Build Coastguard Worker mova [cq+128* 2+64], m2 5424*c0909341SAndroid Build Coastguard Worker mova [cq+128* 3+64], m3 5425*c0909341SAndroid Build Coastguard Worker mova [cq+128* 4+64], m14 5426*c0909341SAndroid Build Coastguard Worker mova [cq+128* 5+64], m15 5427*c0909341SAndroid Build Coastguard Worker mova [cq+128* 6+64], m16 5428*c0909341SAndroid Build Coastguard Worker mova [cq+128* 7+64], m17 5429*c0909341SAndroid Build Coastguard Worker mova [cq+128* 8+64], m22 5430*c0909341SAndroid Build Coastguard Worker mova [cq+128* 9+64], m23 5431*c0909341SAndroid Build Coastguard Worker mova [cq+128*10+64], m24 5432*c0909341SAndroid Build Coastguard Worker mova [cq+128*11+64], m25 5433*c0909341SAndroid Build Coastguard Worker mova [cq+128*12+64], m26 5434*c0909341SAndroid Build Coastguard Worker mova [cq+128*13+64], m27 5435*c0909341SAndroid Build Coastguard Worker mova [cq+128*14+64], m28 5436*c0909341SAndroid Build Coastguard Worker mova [cq+128*15+64], m29 5437*c0909341SAndroid Build Coastguard Worker mova m0, [rsp+ 0*mmsize] 5438*c0909341SAndroid Build Coastguard Worker mova m1, [rsp+ 1*mmsize] 5439*c0909341SAndroid Build Coastguard Worker mova m2, [rsp+ 2*mmsize] 5440*c0909341SAndroid Build Coastguard Worker mova m3, [rsp+ 3*mmsize] 5441*c0909341SAndroid Build Coastguard Worker mova m4, [rsp+ 4*mmsize] 5442*c0909341SAndroid Build Coastguard Worker mova m5, [rsp+ 5*mmsize] 5443*c0909341SAndroid Build Coastguard Worker mova m6, [rsp+ 6*mmsize] 5444*c0909341SAndroid Build Coastguard Worker mova m7, [rsp+ 7*mmsize] 5445*c0909341SAndroid Build Coastguard Worker mova m16, [rsp+ 8*mmsize] 5446*c0909341SAndroid Build Coastguard Worker mova m17, [rsp+ 9*mmsize] 5447*c0909341SAndroid Build Coastguard Worker mova m18, [rsp+10*mmsize] 5448*c0909341SAndroid Build Coastguard Worker mova m19, [rsp+11*mmsize] 5449*c0909341SAndroid Build Coastguard Worker mova m20, [rsp+12*mmsize] 5450*c0909341SAndroid Build Coastguard Worker mova m21, [rsp+13*mmsize] 5451*c0909341SAndroid Build Coastguard Worker mova m22, [rsp+14*mmsize] 5452*c0909341SAndroid Build Coastguard Worker mova m23, [rsp+15*mmsize] 5453*c0909341SAndroid Build Coastguard Worker call .transpose 5454*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_32x32_10bpc).pass2_start 5455*c0909341SAndroid Build Coastguard Worker pxor m12, m12 5456*c0909341SAndroid Build Coastguard Worker.right_zero_loop: 5457*c0909341SAndroid Build Coastguard Worker mova [cq+r7*8+64+128*3], m12 5458*c0909341SAndroid Build Coastguard Worker mova [cq+r7*8+64+128*2], m12 5459*c0909341SAndroid Build Coastguard Worker mova [cq+r7*8+64+128*1], m12 5460*c0909341SAndroid Build Coastguard Worker mova [cq+r7*8+64+128*0], m12 5461*c0909341SAndroid Build Coastguard Worker sub r7d, 16*4 5462*c0909341SAndroid Build Coastguard Worker jge .right_zero_loop 5463*c0909341SAndroid Build Coastguard Worker mov r7d, 16*28 5464*c0909341SAndroid Build Coastguard Worker jmp .end 5465*c0909341SAndroid Build Coastguard Worker.fast: ; topleft 16x16 nonzero 5466*c0909341SAndroid Build Coastguard Worker cmp eobd, 36 5467*c0909341SAndroid Build Coastguard Worker jl .fast2 5468*c0909341SAndroid Build Coastguard Worker call .pass1_fast 5469*c0909341SAndroid Build Coastguard Worker lea r5, [o_base_8bpc] 5470*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_32x32_10bpc).pass2_fast_start 5471*c0909341SAndroid Build Coastguard Worker mov r4, dstq 5472*c0909341SAndroid Build Coastguard Worker pxor m12, m12 5473*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_32x32_10bpc).pass2_end 5474*c0909341SAndroid Build Coastguard Worker lea dstq, [r4+64] 5475*c0909341SAndroid Build Coastguard Worker mova m0, [rsp+16*mmsize] 5476*c0909341SAndroid Build Coastguard Worker mova m1, [rsp+17*mmsize] 5477*c0909341SAndroid Build Coastguard Worker mova m2, [rsp+18*mmsize] 5478*c0909341SAndroid Build Coastguard Worker mova m3, [rsp+19*mmsize] 5479*c0909341SAndroid Build Coastguard Worker mova m4, [rsp+20*mmsize] 5480*c0909341SAndroid Build Coastguard Worker mova m5, [rsp+21*mmsize] 5481*c0909341SAndroid Build Coastguard Worker mova m6, [rsp+22*mmsize] 5482*c0909341SAndroid Build Coastguard Worker mova m7, [rsp+23*mmsize] 5483*c0909341SAndroid Build Coastguard Worker mova m16, [rsp+24*mmsize] 5484*c0909341SAndroid Build Coastguard Worker mova m17, [rsp+25*mmsize] 5485*c0909341SAndroid Build Coastguard Worker mova m18, [rsp+26*mmsize] 5486*c0909341SAndroid Build Coastguard Worker mova m19, [rsp+27*mmsize] 5487*c0909341SAndroid Build Coastguard Worker mova m20, [rsp+28*mmsize] 5488*c0909341SAndroid Build Coastguard Worker mova m21, [rsp+29*mmsize] 5489*c0909341SAndroid Build Coastguard Worker mova m22, [rsp+30*mmsize] 5490*c0909341SAndroid Build Coastguard Worker mova m23, [rsp+31*mmsize] 5491*c0909341SAndroid Build Coastguard Worker call .transpose 5492*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_32x32_10bpc).pass2_fast_start 5493*c0909341SAndroid Build Coastguard Worker mov r7d, 16*12 5494*c0909341SAndroid Build Coastguard Worker pxor m12, m12 5495*c0909341SAndroid Build Coastguard Worker jmp .end 5496*c0909341SAndroid Build Coastguard Worker.fast2: ; topleft 8x8 nonzero 5497*c0909341SAndroid Build Coastguard Worker movshdup m7, [o(permB)] 5498*c0909341SAndroid Build Coastguard Worker mova ym0, [cq+128*1] 5499*c0909341SAndroid Build Coastguard Worker mova ym2, [cq+128*5] 5500*c0909341SAndroid Build Coastguard Worker mova ym3, [cq+128*3] 5501*c0909341SAndroid Build Coastguard Worker mova ym1, [cq+128*7] 5502*c0909341SAndroid Build Coastguard Worker vpermt2q m0, m7, m2 ; 1 5 5503*c0909341SAndroid Build Coastguard Worker vpermt2q m1, m7, m3 ; 7 3 5504*c0909341SAndroid Build Coastguard Worker REPX {pmulld x, m12}, m0, m1 5505*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_64x16_10bpc).main_oddhalf_packed_rect2 5506*c0909341SAndroid Build Coastguard Worker mova [rsp+ 0*mmsize], m0 5507*c0909341SAndroid Build Coastguard Worker mova [rsp+ 1*mmsize], m1 5508*c0909341SAndroid Build Coastguard Worker mova [rsp+ 2*mmsize], m2 5509*c0909341SAndroid Build Coastguard Worker mova [rsp+ 3*mmsize], m3 5510*c0909341SAndroid Build Coastguard Worker mova [rsp+ 4*mmsize], m4 5511*c0909341SAndroid Build Coastguard Worker mova [rsp+ 5*mmsize], m5 5512*c0909341SAndroid Build Coastguard Worker mova [rsp+ 6*mmsize], m6 5513*c0909341SAndroid Build Coastguard Worker mova [rsp+ 7*mmsize], m7 5514*c0909341SAndroid Build Coastguard Worker mova [rsp+ 8*mmsize], m16 5515*c0909341SAndroid Build Coastguard Worker mova [rsp+ 9*mmsize], m17 5516*c0909341SAndroid Build Coastguard Worker mova [rsp+10*mmsize], m18 5517*c0909341SAndroid Build Coastguard Worker mova [rsp+11*mmsize], m19 5518*c0909341SAndroid Build Coastguard Worker mova [rsp+12*mmsize], m20 5519*c0909341SAndroid Build Coastguard Worker mova [rsp+13*mmsize], m21 5520*c0909341SAndroid Build Coastguard Worker mova [rsp+14*mmsize], m22 5521*c0909341SAndroid Build Coastguard Worker mova [rsp+15*mmsize], m23 5522*c0909341SAndroid Build Coastguard Worker 5523*c0909341SAndroid Build Coastguard Worker movshdup m7, [o(permB)] 5524*c0909341SAndroid Build Coastguard Worker pmulld ym0, ym12, [cq+128*0] 5525*c0909341SAndroid Build Coastguard Worker pmulld ym4, ym12, [cq+128*4] 5526*c0909341SAndroid Build Coastguard Worker mova ym16, [cq+128*2] 5527*c0909341SAndroid Build Coastguard Worker mova ym5, [cq+128*6] 5528*c0909341SAndroid Build Coastguard Worker REPX {paddd x, ym13}, ym0, ym4 5529*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 12 }, ym0, ym4 5530*c0909341SAndroid Build Coastguard Worker vpermt2q m16, m7, m5 ; 2 6 5531*c0909341SAndroid Build Coastguard Worker vpermq m0, m7, m0 ; 0 0 5532*c0909341SAndroid Build Coastguard Worker vpermq m4, m7, m4 ; 4 4 5533*c0909341SAndroid Build Coastguard Worker pmulld m16, m12 5534*c0909341SAndroid Build Coastguard Worker paddd m16, m13 5535*c0909341SAndroid Build Coastguard Worker psrad m16, 12 5536*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_32x8_10bpc).main_fast3 5537*c0909341SAndroid Build Coastguard Worker 5538*c0909341SAndroid Build Coastguard Worker vpbroadcastd m11, [o(pd_1)] 5539*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_64x16_10bpc).main_end 5540*c0909341SAndroid Build Coastguard Worker mova [rsp+16*mmsize], m24 5541*c0909341SAndroid Build Coastguard Worker mova [rsp+17*mmsize], m25 5542*c0909341SAndroid Build Coastguard Worker mova [rsp+18*mmsize], m26 5543*c0909341SAndroid Build Coastguard Worker mova [rsp+19*mmsize], m27 5544*c0909341SAndroid Build Coastguard Worker mova [rsp+20*mmsize], m28 5545*c0909341SAndroid Build Coastguard Worker mova [rsp+21*mmsize], m29 5546*c0909341SAndroid Build Coastguard Worker mova [rsp+22*mmsize], m30 5547*c0909341SAndroid Build Coastguard Worker mova [rsp+23*mmsize], m31 5548*c0909341SAndroid Build Coastguard Worker vpbroadcastd m13, [o(pd_2048)] 5549*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_32x32_10bpc).pass2_fast2_start 5550*c0909341SAndroid Build Coastguard Worker mov r7d, 16*4 5551*c0909341SAndroid Build Coastguard Worker mov r4, dstq 5552*c0909341SAndroid Build Coastguard Worker pxor m12, m12 5553*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_32x32_10bpc).pass2_end 5554*c0909341SAndroid Build Coastguard Worker lea dstq, [r4+64] 5555*c0909341SAndroid Build Coastguard Worker mova m0, [rsp+16*mmsize] 5556*c0909341SAndroid Build Coastguard Worker mova m1, [rsp+17*mmsize] 5557*c0909341SAndroid Build Coastguard Worker mova m2, [rsp+18*mmsize] 5558*c0909341SAndroid Build Coastguard Worker mova m3, [rsp+19*mmsize] 5559*c0909341SAndroid Build Coastguard Worker mova m4, [rsp+20*mmsize] 5560*c0909341SAndroid Build Coastguard Worker mova m5, [rsp+21*mmsize] 5561*c0909341SAndroid Build Coastguard Worker mova m6, [rsp+22*mmsize] 5562*c0909341SAndroid Build Coastguard Worker mova m7, [rsp+23*mmsize] 5563*c0909341SAndroid Build Coastguard Worker lea r5, [o_base] 5564*c0909341SAndroid Build Coastguard Worker vpbroadcastd m13, [o(pd_2048)] 5565*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_32x32_10bpc).pass2_fast2_start 5566*c0909341SAndroid Build Coastguard Worker pxor m12, m12 5567*c0909341SAndroid Build Coastguard Worker.end: 5568*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_32x32_10bpc).pass2_end 5569*c0909341SAndroid Build Coastguard Worker.zero_loop: 5570*c0909341SAndroid Build Coastguard Worker mova [cq+r7*8+128*3], m12 5571*c0909341SAndroid Build Coastguard Worker mova [cq+r7*8+128*2], m12 5572*c0909341SAndroid Build Coastguard Worker mova [cq+r7*8+128*1], m12 5573*c0909341SAndroid Build Coastguard Worker mova [cq+r7*8+128*0], m12 5574*c0909341SAndroid Build Coastguard Worker sub r7d, 16*4 5575*c0909341SAndroid Build Coastguard Worker jge .zero_loop 5576*c0909341SAndroid Build Coastguard Worker RET 5577*c0909341SAndroid Build Coastguard Worker.dconly: 5578*c0909341SAndroid Build Coastguard Worker imul r6d, [cq], 181 5579*c0909341SAndroid Build Coastguard Worker mov [cq], eobd 5580*c0909341SAndroid Build Coastguard Worker or r3d, 32 5581*c0909341SAndroid Build Coastguard Worker add r6d, 128 5582*c0909341SAndroid Build Coastguard Worker sar r6d, 8 5583*c0909341SAndroid Build Coastguard Worker imul r6d, 181 5584*c0909341SAndroid Build Coastguard Worker add r6d, 384 5585*c0909341SAndroid Build Coastguard Worker sar r6d, 9 5586*c0909341SAndroid Build Coastguard Worker jmp m(inv_txfm_add_dct_dct_64x16_10bpc).dconly2 5587*c0909341SAndroid Build Coastguard Worker.pass1_fast: 5588*c0909341SAndroid Build Coastguard Worker lea r4, [idct64_mul_16bpc] 5589*c0909341SAndroid Build Coastguard Worker lea r6, [rsp+4*64+gprsize] 5590*c0909341SAndroid Build Coastguard Worker pmulld m0, m12, [cq+128* 1] 5591*c0909341SAndroid Build Coastguard Worker pmulld m3, m12, [cq+128*15] 5592*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_64x16_10bpc).main_part1_fast_rect2 5593*c0909341SAndroid Build Coastguard Worker pmulld m0, m12, [cq+128* 7] 5594*c0909341SAndroid Build Coastguard Worker pmulld m3, m12, [cq+128* 9] 5595*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_64x16_10bpc).main_part1_fast_rect2 5596*c0909341SAndroid Build Coastguard Worker pmulld m0, m12, [cq+128* 5] 5597*c0909341SAndroid Build Coastguard Worker pmulld m3, m12, [cq+128*11] 5598*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_64x16_10bpc).main_part1_fast_rect2 5599*c0909341SAndroid Build Coastguard Worker pmulld m0, m12, [cq+128* 3] 5600*c0909341SAndroid Build Coastguard Worker pmulld m3, m12, [cq+128*13] 5601*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_64x16_10bpc).main_part1_fast_rect2 5602*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_64x16_10bpc).main_part2 5603*c0909341SAndroid Build Coastguard Worker pmulld m0, m12, [cq+128* 0] 5604*c0909341SAndroid Build Coastguard Worker pmulld m1, m12, [cq+128* 8] 5605*c0909341SAndroid Build Coastguard Worker pmulld m16, m12, [cq+128* 4] 5606*c0909341SAndroid Build Coastguard Worker pmulld m17, m12, [cq+128*12] 5607*c0909341SAndroid Build Coastguard Worker call m(idct_8x16_internal_10bpc).main_fast2_rect2 5608*c0909341SAndroid Build Coastguard Worker call m(idct_16x16_internal_10bpc).main_fast2_rect2 5609*c0909341SAndroid Build Coastguard Worker call .pass1_load_spill 5610*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_32x16_10bpc).main_fast2_rect2 5611*c0909341SAndroid Build Coastguard Worker jmp .pass1_end 5612*c0909341SAndroid Build Coastguard Worker.pass1: 5613*c0909341SAndroid Build Coastguard Worker lea r4, [idct64_mul_16bpc] 5614*c0909341SAndroid Build Coastguard Worker lea r6, [rsp+4*64+gprsize] 5615*c0909341SAndroid Build Coastguard Worker pmulld m0, m12, [cq+128* 1] 5616*c0909341SAndroid Build Coastguard Worker pmulld m1, m12, [cq+128*31] 5617*c0909341SAndroid Build Coastguard Worker pmulld m2, m12, [cq+128*17] 5618*c0909341SAndroid Build Coastguard Worker pmulld m3, m12, [cq+128*15] 5619*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_64x16_10bpc).main_part1_rect2 5620*c0909341SAndroid Build Coastguard Worker pmulld m0, m12, [cq+128* 7] 5621*c0909341SAndroid Build Coastguard Worker pmulld m1, m12, [cq+128*25] 5622*c0909341SAndroid Build Coastguard Worker pmulld m2, m12, [cq+128*23] 5623*c0909341SAndroid Build Coastguard Worker pmulld m3, m12, [cq+128* 9] 5624*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_64x16_10bpc).main_part1_rect2 5625*c0909341SAndroid Build Coastguard Worker pmulld m0, m12, [cq+128* 5] 5626*c0909341SAndroid Build Coastguard Worker pmulld m1, m12, [cq+128*27] 5627*c0909341SAndroid Build Coastguard Worker pmulld m2, m12, [cq+128*21] 5628*c0909341SAndroid Build Coastguard Worker pmulld m3, m12, [cq+128*11] 5629*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_64x16_10bpc).main_part1_rect2 5630*c0909341SAndroid Build Coastguard Worker pmulld m0, m12, [cq+128* 3] 5631*c0909341SAndroid Build Coastguard Worker pmulld m1, m12, [cq+128*29] 5632*c0909341SAndroid Build Coastguard Worker pmulld m2, m12, [cq+128*19] 5633*c0909341SAndroid Build Coastguard Worker pmulld m3, m12, [cq+128*13] 5634*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_64x16_10bpc).main_part1_rect2 5635*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_64x16_10bpc).main_part2 5636*c0909341SAndroid Build Coastguard Worker pmulld m0, m12, [cq+128* 0] 5637*c0909341SAndroid Build Coastguard Worker pmulld m1, m12, [cq+128* 8] 5638*c0909341SAndroid Build Coastguard Worker pmulld m2, m12, [cq+128*16] 5639*c0909341SAndroid Build Coastguard Worker pmulld m3, m12, [cq+128*24] 5640*c0909341SAndroid Build Coastguard Worker pmulld m16, m12, [cq+128* 4] 5641*c0909341SAndroid Build Coastguard Worker pmulld m17, m12, [cq+128*12] 5642*c0909341SAndroid Build Coastguard Worker pmulld m18, m12, [cq+128*20] 5643*c0909341SAndroid Build Coastguard Worker pmulld m19, m12, [cq+128*28] 5644*c0909341SAndroid Build Coastguard Worker call m(idct_8x16_internal_10bpc).main_fast_rect2 5645*c0909341SAndroid Build Coastguard Worker call m(idct_16x16_internal_10bpc).main_fast_rect2 5646*c0909341SAndroid Build Coastguard Worker call .pass1_load_spill 5647*c0909341SAndroid Build Coastguard Worker pmulld m4, m12, [cq+128*18] 5648*c0909341SAndroid Build Coastguard Worker pmulld m5, m12, [cq+128*22] 5649*c0909341SAndroid Build Coastguard Worker pmulld m6, m12, [cq+128*26] 5650*c0909341SAndroid Build Coastguard Worker pmulld m7, m12, [cq+128*30] 5651*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_32x16_10bpc).main_fast_rect2 5652*c0909341SAndroid Build Coastguard Worker.pass1_end: 5653*c0909341SAndroid Build Coastguard Worker vpbroadcastd m11, [o(pd_1)] 5654*c0909341SAndroid Build Coastguard Worker lea r3, [rsp+gprsize] 5655*c0909341SAndroid Build Coastguard Worker lea r4, [cq+8*128] 5656*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_64x16_10bpc).idct64_main_end 5657*c0909341SAndroid Build Coastguard Worker ; transpose one half immediately, we can transpose lower half later 5658*c0909341SAndroid Build Coastguard Worker.transpose: 5659*c0909341SAndroid Build Coastguard Worker ; transpose m0-7,16-23 5660*c0909341SAndroid Build Coastguard Worker psrlq m12, [permC], 24 ; 0 2 8 10 1 3 9 11 5661*c0909341SAndroid Build Coastguard Worker psrlq m13, m12, 32 ; 4 6 12 14 5 7 13 15 5662*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_32x16_10bpc).transpose_16x32 5663*c0909341SAndroid Build Coastguard Worker punpckhqdq m22, m0, m20 ; 1 5664*c0909341SAndroid Build Coastguard Worker punpcklqdq m0, m20 ; 0 5665*c0909341SAndroid Build Coastguard Worker punpckhqdq m24, m2, m1 ; 5 5666*c0909341SAndroid Build Coastguard Worker punpcklqdq m1, m2, m1 ; 4 5667*c0909341SAndroid Build Coastguard Worker punpcklqdq m2, m14, m18 ; 8 5668*c0909341SAndroid Build Coastguard Worker punpckhqdq m26, m14, m18 ; 9 5669*c0909341SAndroid Build Coastguard Worker punpcklqdq m14, m15, m4 ; 2 5670*c0909341SAndroid Build Coastguard Worker punpckhqdq m23, m15, m4 ; 3 5671*c0909341SAndroid Build Coastguard Worker punpckhqdq m25, m3, m21 ; 7 5672*c0909341SAndroid Build Coastguard Worker punpcklqdq m15, m3, m21 ; 6 5673*c0909341SAndroid Build Coastguard Worker punpckhqdq m28, m6, m17 ; 13 5674*c0909341SAndroid Build Coastguard Worker punpcklqdq m3, m6, m17 ; 12 5675*c0909341SAndroid Build Coastguard Worker punpckhqdq m27, m5, m16 ; 11 5676*c0909341SAndroid Build Coastguard Worker punpcklqdq m16, m5, m16 ; 10 5677*c0909341SAndroid Build Coastguard Worker punpckhqdq m29, m7, m8 ; 15 5678*c0909341SAndroid Build Coastguard Worker punpcklqdq m17, m7, m8 ; 14 5679*c0909341SAndroid Build Coastguard Worker ret 5680*c0909341SAndroid Build Coastguard Worker.pass1_load_spill: 5681*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_32x16_10bpc).idct16_sumsub 5682*c0909341SAndroid Build Coastguard Worker mova [cq+128* 0], m0 5683*c0909341SAndroid Build Coastguard Worker mova [cq+128* 1], m1 5684*c0909341SAndroid Build Coastguard Worker pmulld m0, m12, [cq+128* 2] 5685*c0909341SAndroid Build Coastguard Worker pmulld m1, m12, [cq+128* 6] 5686*c0909341SAndroid Build Coastguard Worker mova [cq+128* 2], m2 5687*c0909341SAndroid Build Coastguard Worker mova [cq+128* 3], m3 5688*c0909341SAndroid Build Coastguard Worker pmulld m2, m12, [cq+128*10] 5689*c0909341SAndroid Build Coastguard Worker pmulld m3, m12, [cq+128*14] 5690*c0909341SAndroid Build Coastguard Worker mova [cq+128* 4], m4 5691*c0909341SAndroid Build Coastguard Worker mova [cq+128* 5], m5 5692*c0909341SAndroid Build Coastguard Worker mova [cq+128* 6], m6 5693*c0909341SAndroid Build Coastguard Worker mova [cq+128* 7], m7 5694*c0909341SAndroid Build Coastguard Worker mova [cq+128* 8], m23 5695*c0909341SAndroid Build Coastguard Worker mova [cq+128* 9], m22 5696*c0909341SAndroid Build Coastguard Worker mova [cq+128*10], m21 5697*c0909341SAndroid Build Coastguard Worker mova [cq+128*11], m20 5698*c0909341SAndroid Build Coastguard Worker mova [cq+128*12], m19 5699*c0909341SAndroid Build Coastguard Worker mova [cq+128*13], m18 5700*c0909341SAndroid Build Coastguard Worker mova [cq+128*14], m17 5701*c0909341SAndroid Build Coastguard Worker mova [cq+128*15], m16 5702*c0909341SAndroid Build Coastguard Worker ret 5703*c0909341SAndroid Build Coastguard Worker 5704*c0909341SAndroid Build Coastguard Workercglobal inv_txfm_add_dct_dct_64x64_10bpc, 4, 7, 0, dst, stride, c, eob 5705*c0909341SAndroid Build Coastguard Worker lea r5, [o_base] 5706*c0909341SAndroid Build Coastguard Worker test eobd, eobd 5707*c0909341SAndroid Build Coastguard Worker jz .dconly 5708*c0909341SAndroid Build Coastguard Worker 5709*c0909341SAndroid Build Coastguard Worker PROLOGUE 4, 9, 32, -64*32, dst, stride, c, eob 5710*c0909341SAndroid Build Coastguard Worker%undef cmp 5711*c0909341SAndroid Build Coastguard Worker vpbroadcastd m12, [o(pd_2896)] 5712*c0909341SAndroid Build Coastguard Worker vpbroadcastd m13, [o(pd_2048)] 5713*c0909341SAndroid Build Coastguard Worker vpbroadcastd m14, [o(clip_18b_min)] 5714*c0909341SAndroid Build Coastguard Worker vpbroadcastd m15, [o(clip_18b_max)] 5715*c0909341SAndroid Build Coastguard Worker cmp eobd, 136 5716*c0909341SAndroid Build Coastguard Worker jl .fast 5717*c0909341SAndroid Build Coastguard Worker add cq, 64 5718*c0909341SAndroid Build Coastguard Worker cmp eobd, 543 5719*c0909341SAndroid Build Coastguard Worker jge .full 5720*c0909341SAndroid Build Coastguard Worker call .pass1_fast ; bottomright 16x16 zero 5721*c0909341SAndroid Build Coastguard Worker mov r7d, 16*12 5722*c0909341SAndroid Build Coastguard Worker jmp .lefthalf 5723*c0909341SAndroid Build Coastguard Worker.full: 5724*c0909341SAndroid Build Coastguard Worker call .pass1 5725*c0909341SAndroid Build Coastguard Worker mov r7d, 16*28 5726*c0909341SAndroid Build Coastguard Worker.lefthalf: 5727*c0909341SAndroid Build Coastguard Worker mova [cq+128* 0], m27 5728*c0909341SAndroid Build Coastguard Worker mova [cq+128* 1], m14 5729*c0909341SAndroid Build Coastguard Worker mova [cq+128* 2], m28 5730*c0909341SAndroid Build Coastguard Worker mova [cq+128* 3], m15 5731*c0909341SAndroid Build Coastguard Worker mova [cq+128* 4], m22 5732*c0909341SAndroid Build Coastguard Worker mova [cq+128* 5], m23 5733*c0909341SAndroid Build Coastguard Worker mova [cq+128* 6], m24 5734*c0909341SAndroid Build Coastguard Worker mova [cq+128* 7], m25 5735*c0909341SAndroid Build Coastguard Worker mova [cq+128* 8], m0 5736*c0909341SAndroid Build Coastguard Worker mova [cq+128* 9], m26 5737*c0909341SAndroid Build Coastguard Worker mova [cq+128*10], m20 5738*c0909341SAndroid Build Coastguard Worker mova [cq+128*11], m21 5739*c0909341SAndroid Build Coastguard Worker mova [cq+128*12], m18 5740*c0909341SAndroid Build Coastguard Worker mova [cq+128*13], m16 5741*c0909341SAndroid Build Coastguard Worker mova [cq+128*14], m17 5742*c0909341SAndroid Build Coastguard Worker mova [cq+128*15], m3 5743*c0909341SAndroid Build Coastguard Worker sub cq, 64 5744*c0909341SAndroid Build Coastguard Worker vpbroadcastd m12, [o(pd_2896)] 5745*c0909341SAndroid Build Coastguard Worker vpbroadcastd m13, [o(pd_2048)] 5746*c0909341SAndroid Build Coastguard Worker vpbroadcastd m14, [o(clip_18b_min)] 5747*c0909341SAndroid Build Coastguard Worker vpbroadcastd m15, [o(clip_18b_max)] 5748*c0909341SAndroid Build Coastguard Worker sub rsp, 16*64 5749*c0909341SAndroid Build Coastguard Worker call .pass1 5750*c0909341SAndroid Build Coastguard Worker sub rsp, 24*64 5751*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_32x64_10bpc).pass2_start 5752*c0909341SAndroid Build Coastguard Worker mov r8, dstq 5753*c0909341SAndroid Build Coastguard Worker pxor m31, m31 5754*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_32x64_10bpc).pass2_end 5755*c0909341SAndroid Build Coastguard Worker lea dstq, [r8+64] 5756*c0909341SAndroid Build Coastguard Worker mova m0, [rsp+56*mmsize] 5757*c0909341SAndroid Build Coastguard Worker mova m1, [rsp+57*mmsize] 5758*c0909341SAndroid Build Coastguard Worker mova m2, [rsp+58*mmsize] 5759*c0909341SAndroid Build Coastguard Worker mova m3, [rsp+59*mmsize] 5760*c0909341SAndroid Build Coastguard Worker mova m4, [rsp+60*mmsize] 5761*c0909341SAndroid Build Coastguard Worker mova m5, [rsp+61*mmsize] 5762*c0909341SAndroid Build Coastguard Worker mova m6, [rsp+62*mmsize] 5763*c0909341SAndroid Build Coastguard Worker mova m7, [rsp+63*mmsize] 5764*c0909341SAndroid Build Coastguard Worker mova m16, [rsp+64*mmsize] 5765*c0909341SAndroid Build Coastguard Worker mova m17, [rsp+65*mmsize] 5766*c0909341SAndroid Build Coastguard Worker mova m18, [rsp+66*mmsize] 5767*c0909341SAndroid Build Coastguard Worker mova m19, [rsp+67*mmsize] 5768*c0909341SAndroid Build Coastguard Worker mova m20, [rsp+68*mmsize] 5769*c0909341SAndroid Build Coastguard Worker mova m21, [rsp+69*mmsize] 5770*c0909341SAndroid Build Coastguard Worker mova m22, [rsp+70*mmsize] 5771*c0909341SAndroid Build Coastguard Worker mova m23, [rsp+71*mmsize] 5772*c0909341SAndroid Build Coastguard Worker call .transpose 5773*c0909341SAndroid Build Coastguard Worker mova [cq+128* 0+64], m27 5774*c0909341SAndroid Build Coastguard Worker mova [cq+128* 1+64], m14 5775*c0909341SAndroid Build Coastguard Worker mova [cq+128* 2+64], m28 5776*c0909341SAndroid Build Coastguard Worker mova [cq+128* 3+64], m15 5777*c0909341SAndroid Build Coastguard Worker mova [cq+128* 4+64], m22 5778*c0909341SAndroid Build Coastguard Worker mova [cq+128* 5+64], m23 5779*c0909341SAndroid Build Coastguard Worker mova [cq+128* 6+64], m24 5780*c0909341SAndroid Build Coastguard Worker mova [cq+128* 7+64], m25 5781*c0909341SAndroid Build Coastguard Worker mova [cq+128* 8+64], m0 5782*c0909341SAndroid Build Coastguard Worker mova [cq+128* 9+64], m26 5783*c0909341SAndroid Build Coastguard Worker mova [cq+128*10+64], m20 5784*c0909341SAndroid Build Coastguard Worker mova [cq+128*11+64], m21 5785*c0909341SAndroid Build Coastguard Worker mova [cq+128*12+64], m18 5786*c0909341SAndroid Build Coastguard Worker mova [cq+128*13+64], m16 5787*c0909341SAndroid Build Coastguard Worker mova [cq+128*14+64], m17 5788*c0909341SAndroid Build Coastguard Worker mova [cq+128*15+64], m3 5789*c0909341SAndroid Build Coastguard Worker mova m0, [rsp+40*mmsize] 5790*c0909341SAndroid Build Coastguard Worker mova m1, [rsp+41*mmsize] 5791*c0909341SAndroid Build Coastguard Worker mova m2, [rsp+42*mmsize] 5792*c0909341SAndroid Build Coastguard Worker mova m3, [rsp+43*mmsize] 5793*c0909341SAndroid Build Coastguard Worker mova m4, [rsp+44*mmsize] 5794*c0909341SAndroid Build Coastguard Worker mova m5, [rsp+45*mmsize] 5795*c0909341SAndroid Build Coastguard Worker mova m6, [rsp+46*mmsize] 5796*c0909341SAndroid Build Coastguard Worker mova m7, [rsp+47*mmsize] 5797*c0909341SAndroid Build Coastguard Worker mova m16, [rsp+48*mmsize] 5798*c0909341SAndroid Build Coastguard Worker mova m17, [rsp+49*mmsize] 5799*c0909341SAndroid Build Coastguard Worker mova m18, [rsp+50*mmsize] 5800*c0909341SAndroid Build Coastguard Worker mova m19, [rsp+51*mmsize] 5801*c0909341SAndroid Build Coastguard Worker mova m20, [rsp+52*mmsize] 5802*c0909341SAndroid Build Coastguard Worker mova m21, [rsp+53*mmsize] 5803*c0909341SAndroid Build Coastguard Worker mova m22, [rsp+54*mmsize] 5804*c0909341SAndroid Build Coastguard Worker mova m23, [rsp+55*mmsize] 5805*c0909341SAndroid Build Coastguard Worker add rsp, 32*64 5806*c0909341SAndroid Build Coastguard Worker call .transpose 5807*c0909341SAndroid Build Coastguard Worker lea r5, [o_base] 5808*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_32x64_10bpc).pass2_start 5809*c0909341SAndroid Build Coastguard Worker.right_zero_loop: 5810*c0909341SAndroid Build Coastguard Worker REPX {mova [cq+r7*8+64+128*x], m31}, 0, 1, 2, 3 5811*c0909341SAndroid Build Coastguard Worker sub r7d, 16*4 5812*c0909341SAndroid Build Coastguard Worker jge .right_zero_loop 5813*c0909341SAndroid Build Coastguard Worker mov r7d, 16*28 5814*c0909341SAndroid Build Coastguard Worker jmp .end 5815*c0909341SAndroid Build Coastguard Worker.fast: ; topleft 16x16 nonzero 5816*c0909341SAndroid Build Coastguard Worker cmp eobd, 36 5817*c0909341SAndroid Build Coastguard Worker jl .fast2 5818*c0909341SAndroid Build Coastguard Worker call .pass1_fast 5819*c0909341SAndroid Build Coastguard Worker sub rsp, 24*64 5820*c0909341SAndroid Build Coastguard Worker vpbroadcastd m10, [o(pd_2048)] 5821*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_32x64_10bpc).pass2_fast_start 5822*c0909341SAndroid Build Coastguard Worker mov r8, dstq 5823*c0909341SAndroid Build Coastguard Worker pxor m31, m31 5824*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_32x64_10bpc).pass2_end 5825*c0909341SAndroid Build Coastguard Worker lea dstq, [r8+64] 5826*c0909341SAndroid Build Coastguard Worker mova m0, [rsp+40*mmsize] 5827*c0909341SAndroid Build Coastguard Worker mova m1, [rsp+41*mmsize] 5828*c0909341SAndroid Build Coastguard Worker mova m2, [rsp+42*mmsize] 5829*c0909341SAndroid Build Coastguard Worker mova m3, [rsp+43*mmsize] 5830*c0909341SAndroid Build Coastguard Worker mova m4, [rsp+44*mmsize] 5831*c0909341SAndroid Build Coastguard Worker mova m5, [rsp+45*mmsize] 5832*c0909341SAndroid Build Coastguard Worker mova m6, [rsp+46*mmsize] 5833*c0909341SAndroid Build Coastguard Worker mova m7, [rsp+47*mmsize] 5834*c0909341SAndroid Build Coastguard Worker mova m16, [rsp+48*mmsize] 5835*c0909341SAndroid Build Coastguard Worker mova m17, [rsp+49*mmsize] 5836*c0909341SAndroid Build Coastguard Worker mova m18, [rsp+50*mmsize] 5837*c0909341SAndroid Build Coastguard Worker mova m19, [rsp+51*mmsize] 5838*c0909341SAndroid Build Coastguard Worker mova m20, [rsp+52*mmsize] 5839*c0909341SAndroid Build Coastguard Worker mova m21, [rsp+53*mmsize] 5840*c0909341SAndroid Build Coastguard Worker mova m22, [rsp+54*mmsize] 5841*c0909341SAndroid Build Coastguard Worker mova m23, [rsp+55*mmsize] 5842*c0909341SAndroid Build Coastguard Worker add rsp, 16*64 5843*c0909341SAndroid Build Coastguard Worker call .transpose 5844*c0909341SAndroid Build Coastguard Worker lea r5, [o_base] 5845*c0909341SAndroid Build Coastguard Worker vpbroadcastd m10, [o(pd_2048)] 5846*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_32x64_10bpc).pass2_fast_start 5847*c0909341SAndroid Build Coastguard Worker mov r7d, 16*12 5848*c0909341SAndroid Build Coastguard Worker jmp .end 5849*c0909341SAndroid Build Coastguard Worker.fast2: ; topleft 8x8 nonzero 5850*c0909341SAndroid Build Coastguard Worker movshdup m7, [o(permB)] 5851*c0909341SAndroid Build Coastguard Worker mova ym0, [cq+128*1] 5852*c0909341SAndroid Build Coastguard Worker mova ym2, [cq+128*5] 5853*c0909341SAndroid Build Coastguard Worker mova ym3, [cq+128*3] 5854*c0909341SAndroid Build Coastguard Worker mova ym1, [cq+128*7] 5855*c0909341SAndroid Build Coastguard Worker vpermt2q m0, m7, m2 ; 1 5 5856*c0909341SAndroid Build Coastguard Worker vpermt2q m1, m7, m3 ; 7 3 5857*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_64x16_10bpc).main_oddhalf_packed 5858*c0909341SAndroid Build Coastguard Worker mova [rsp+ 0*mmsize], m0 5859*c0909341SAndroid Build Coastguard Worker mova [rsp+ 1*mmsize], m1 5860*c0909341SAndroid Build Coastguard Worker mova [rsp+ 2*mmsize], m2 5861*c0909341SAndroid Build Coastguard Worker mova [rsp+ 3*mmsize], m3 5862*c0909341SAndroid Build Coastguard Worker mova [rsp+ 4*mmsize], m4 5863*c0909341SAndroid Build Coastguard Worker mova [rsp+ 5*mmsize], m5 5864*c0909341SAndroid Build Coastguard Worker mova [rsp+ 6*mmsize], m6 5865*c0909341SAndroid Build Coastguard Worker mova [rsp+ 7*mmsize], m7 5866*c0909341SAndroid Build Coastguard Worker mova [rsp+ 8*mmsize], m16 5867*c0909341SAndroid Build Coastguard Worker mova [rsp+ 9*mmsize], m17 5868*c0909341SAndroid Build Coastguard Worker mova [rsp+10*mmsize], m18 5869*c0909341SAndroid Build Coastguard Worker mova [rsp+11*mmsize], m19 5870*c0909341SAndroid Build Coastguard Worker mova [rsp+12*mmsize], m20 5871*c0909341SAndroid Build Coastguard Worker mova [rsp+13*mmsize], m21 5872*c0909341SAndroid Build Coastguard Worker mova [rsp+14*mmsize], m22 5873*c0909341SAndroid Build Coastguard Worker mova [rsp+15*mmsize], m23 5874*c0909341SAndroid Build Coastguard Worker 5875*c0909341SAndroid Build Coastguard Worker movshdup m7, [o(permB)] 5876*c0909341SAndroid Build Coastguard Worker mova ym0, [cq+128*0] 5877*c0909341SAndroid Build Coastguard Worker mova ym4, [cq+128*4] 5878*c0909341SAndroid Build Coastguard Worker mova ym16, [cq+128*2] 5879*c0909341SAndroid Build Coastguard Worker mova ym5, [cq+128*6] 5880*c0909341SAndroid Build Coastguard Worker vpermt2q m16, m7, m5 ; 2 6 5881*c0909341SAndroid Build Coastguard Worker vpermq m0, m7, m0 ; 0 0 5882*c0909341SAndroid Build Coastguard Worker vpermq m4, m7, m4 ; 4 4 5883*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_32x8_10bpc).main_fast3 5884*c0909341SAndroid Build Coastguard Worker 5885*c0909341SAndroid Build Coastguard Worker vpbroadcastd m11, [o(pd_2)] 5886*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_64x16_10bpc).main_end 5887*c0909341SAndroid Build Coastguard Worker sub rsp, 16*64 5888*c0909341SAndroid Build Coastguard Worker mova [rsp+40*mmsize], m24 5889*c0909341SAndroid Build Coastguard Worker mova [rsp+41*mmsize], m25 5890*c0909341SAndroid Build Coastguard Worker mova [rsp+42*mmsize], m26 5891*c0909341SAndroid Build Coastguard Worker mova [rsp+43*mmsize], m27 5892*c0909341SAndroid Build Coastguard Worker mova [rsp+44*mmsize], m28 5893*c0909341SAndroid Build Coastguard Worker mova [rsp+45*mmsize], m29 5894*c0909341SAndroid Build Coastguard Worker mova [rsp+46*mmsize], m30 5895*c0909341SAndroid Build Coastguard Worker mova [rsp+47*mmsize], m31 5896*c0909341SAndroid Build Coastguard Worker call .pass2_fast2_start 5897*c0909341SAndroid Build Coastguard Worker mov r7d, 16*4 5898*c0909341SAndroid Build Coastguard Worker mov r8, dstq 5899*c0909341SAndroid Build Coastguard Worker pxor m31, m31 5900*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_32x64_10bpc).pass2_end 5901*c0909341SAndroid Build Coastguard Worker lea dstq, [r8+64] 5902*c0909341SAndroid Build Coastguard Worker mova m0, [rsp+40*mmsize] 5903*c0909341SAndroid Build Coastguard Worker mova m1, [rsp+41*mmsize] 5904*c0909341SAndroid Build Coastguard Worker mova m2, [rsp+42*mmsize] 5905*c0909341SAndroid Build Coastguard Worker mova m3, [rsp+43*mmsize] 5906*c0909341SAndroid Build Coastguard Worker mova m4, [rsp+44*mmsize] 5907*c0909341SAndroid Build Coastguard Worker mova m5, [rsp+45*mmsize] 5908*c0909341SAndroid Build Coastguard Worker mova m6, [rsp+46*mmsize] 5909*c0909341SAndroid Build Coastguard Worker mova m7, [rsp+47*mmsize] 5910*c0909341SAndroid Build Coastguard Worker add rsp, 8*64 5911*c0909341SAndroid Build Coastguard Worker lea r5, [o_base] 5912*c0909341SAndroid Build Coastguard Worker call .pass2_fast2_start 5913*c0909341SAndroid Build Coastguard Worker.end: 5914*c0909341SAndroid Build Coastguard Worker pxor m31, m31 5915*c0909341SAndroid Build Coastguard Worker.zero_loop: 5916*c0909341SAndroid Build Coastguard Worker REPX {mova [cq+r7*8+128*x], m31}, 0, 1, 2, 3 5917*c0909341SAndroid Build Coastguard Worker sub r7d, 16*4 5918*c0909341SAndroid Build Coastguard Worker jge .zero_loop 5919*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_32x64_10bpc).pass2_end 5920*c0909341SAndroid Build Coastguard Worker add rsp, 8*64 ; FIXME adjust stack_size_padded instead? 5921*c0909341SAndroid Build Coastguard Worker RET 5922*c0909341SAndroid Build Coastguard Worker.pass2_fast2_start: 5923*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_32x8_10bpc).transpose_8x32 5924*c0909341SAndroid Build Coastguard Worker punpcklqdq m27, m0, m2 ; 0 5925*c0909341SAndroid Build Coastguard Worker punpckhqdq m0, m2 ; 1 5926*c0909341SAndroid Build Coastguard Worker punpcklqdq m22, m3, m4 ; 2 5927*c0909341SAndroid Build Coastguard Worker punpckhqdq m26, m3, m4 ; 3 5928*c0909341SAndroid Build Coastguard Worker punpcklqdq m14, m5, m7 ; 4 5929*c0909341SAndroid Build Coastguard Worker punpckhqdq m20, m5, m7 ; 5 5930*c0909341SAndroid Build Coastguard Worker punpcklqdq m23, m6, m8 ; 6 5931*c0909341SAndroid Build Coastguard Worker punpckhqdq m21, m6, m8 ; 7 5932*c0909341SAndroid Build Coastguard Worker vpbroadcastd m10, [o(pd_2048)] 5933*c0909341SAndroid Build Coastguard Worker jmp m(inv_txfm_add_dct_dct_32x64_10bpc).pass2_fast2_start 5934*c0909341SAndroid Build Coastguard Worker.dconly: 5935*c0909341SAndroid Build Coastguard Worker imul r6d, [cq], 181 5936*c0909341SAndroid Build Coastguard Worker mov [cq], eobd 5937*c0909341SAndroid Build Coastguard Worker or r3d, 64 5938*c0909341SAndroid Build Coastguard Worker jmp m(inv_txfm_add_dct_dct_64x16_10bpc).dconly1 5939*c0909341SAndroid Build Coastguard Worker.pass1_fast: 5940*c0909341SAndroid Build Coastguard Worker lea r4, [idct64_mul_16bpc] 5941*c0909341SAndroid Build Coastguard Worker lea r6, [rsp+4*64+gprsize] 5942*c0909341SAndroid Build Coastguard Worker mova m0, [cq+128* 1] 5943*c0909341SAndroid Build Coastguard Worker mova m3, [cq+128*15] 5944*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_64x16_10bpc).main_part1_fast 5945*c0909341SAndroid Build Coastguard Worker mova m0, [cq+128* 7] 5946*c0909341SAndroid Build Coastguard Worker mova m3, [cq+128* 9] 5947*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_64x16_10bpc).main_part1_fast 5948*c0909341SAndroid Build Coastguard Worker mova m0, [cq+128* 5] 5949*c0909341SAndroid Build Coastguard Worker mova m3, [cq+128*11] 5950*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_64x16_10bpc).main_part1_fast 5951*c0909341SAndroid Build Coastguard Worker mova m0, [cq+128* 3] 5952*c0909341SAndroid Build Coastguard Worker mova m3, [cq+128*13] 5953*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_64x16_10bpc).main_part1_fast 5954*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_64x16_10bpc).main_part2 5955*c0909341SAndroid Build Coastguard Worker mova m0, [cq+128* 0] 5956*c0909341SAndroid Build Coastguard Worker mova m1, [cq+128* 8] 5957*c0909341SAndroid Build Coastguard Worker mova m16, [cq+128* 4] 5958*c0909341SAndroid Build Coastguard Worker mova m17, [cq+128*12] 5959*c0909341SAndroid Build Coastguard Worker call m(idct_8x16_internal_10bpc).main_fast2 5960*c0909341SAndroid Build Coastguard Worker call m(idct_16x16_internal_10bpc).main_fast2 5961*c0909341SAndroid Build Coastguard Worker call .pass1_load_spill 5962*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_32x16_10bpc).main_fast2 5963*c0909341SAndroid Build Coastguard Worker jmp .pass1_end 5964*c0909341SAndroid Build Coastguard Worker.pass1: 5965*c0909341SAndroid Build Coastguard Worker lea r4, [idct64_mul_16bpc] 5966*c0909341SAndroid Build Coastguard Worker lea r6, [rsp+4*64+gprsize] 5967*c0909341SAndroid Build Coastguard Worker mova m0, [cq+128* 1] 5968*c0909341SAndroid Build Coastguard Worker mova m1, [cq+128*31] 5969*c0909341SAndroid Build Coastguard Worker mova m2, [cq+128*17] 5970*c0909341SAndroid Build Coastguard Worker mova m3, [cq+128*15] 5971*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_64x16_10bpc).main_part1 5972*c0909341SAndroid Build Coastguard Worker mova m0, [cq+128* 7] 5973*c0909341SAndroid Build Coastguard Worker mova m1, [cq+128*25] 5974*c0909341SAndroid Build Coastguard Worker mova m2, [cq+128*23] 5975*c0909341SAndroid Build Coastguard Worker mova m3, [cq+128* 9] 5976*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_64x16_10bpc).main_part1 5977*c0909341SAndroid Build Coastguard Worker mova m0, [cq+128* 5] 5978*c0909341SAndroid Build Coastguard Worker mova m1, [cq+128*27] 5979*c0909341SAndroid Build Coastguard Worker mova m2, [cq+128*21] 5980*c0909341SAndroid Build Coastguard Worker mova m3, [cq+128*11] 5981*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_64x16_10bpc).main_part1 5982*c0909341SAndroid Build Coastguard Worker mova m0, [cq+128* 3] 5983*c0909341SAndroid Build Coastguard Worker mova m1, [cq+128*29] 5984*c0909341SAndroid Build Coastguard Worker mova m2, [cq+128*19] 5985*c0909341SAndroid Build Coastguard Worker mova m3, [cq+128*13] 5986*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_64x16_10bpc).main_part1 5987*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_64x16_10bpc).main_part2 5988*c0909341SAndroid Build Coastguard Worker mova m0, [cq+128* 0] 5989*c0909341SAndroid Build Coastguard Worker mova m1, [cq+128* 8] 5990*c0909341SAndroid Build Coastguard Worker mova m2, [cq+128*16] 5991*c0909341SAndroid Build Coastguard Worker mova m3, [cq+128*24] 5992*c0909341SAndroid Build Coastguard Worker mova m16, [cq+128* 4] 5993*c0909341SAndroid Build Coastguard Worker mova m17, [cq+128*12] 5994*c0909341SAndroid Build Coastguard Worker mova m18, [cq+128*20] 5995*c0909341SAndroid Build Coastguard Worker mova m19, [cq+128*28] 5996*c0909341SAndroid Build Coastguard Worker call m(idct_8x16_internal_10bpc).main_fast 5997*c0909341SAndroid Build Coastguard Worker call m(idct_16x16_internal_10bpc).main_fast 5998*c0909341SAndroid Build Coastguard Worker call .pass1_load_spill 5999*c0909341SAndroid Build Coastguard Worker mova m4, [cq+128*18] 6000*c0909341SAndroid Build Coastguard Worker mova m5, [cq+128*22] 6001*c0909341SAndroid Build Coastguard Worker mova m6, [cq+128*26] 6002*c0909341SAndroid Build Coastguard Worker mova m7, [cq+128*30] 6003*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_32x16_10bpc).main_fast 6004*c0909341SAndroid Build Coastguard Worker.pass1_end: 6005*c0909341SAndroid Build Coastguard Worker vpbroadcastd m11, [o(pd_2)] 6006*c0909341SAndroid Build Coastguard Worker lea r3, [rsp+gprsize] 6007*c0909341SAndroid Build Coastguard Worker lea r4, [cq+8*128] 6008*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_64x16_10bpc).idct64_main_end 6009*c0909341SAndroid Build Coastguard Worker ; transpose one half immediately, we can transpose lower half later 6010*c0909341SAndroid Build Coastguard Worker.transpose: 6011*c0909341SAndroid Build Coastguard Worker ; transpose m0-7,16-23 6012*c0909341SAndroid Build Coastguard Worker psrlq m12, [permC], 24 ; 0 2 8 10 1 3 9 11 6013*c0909341SAndroid Build Coastguard Worker psrlq m13, m12, 32 ; 4 6 12 14 5 7 13 15 6014*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_32x16_10bpc).transpose_16x32 6015*c0909341SAndroid Build Coastguard Worker punpcklqdq m27, m0, m20 ; 0 6016*c0909341SAndroid Build Coastguard Worker punpckhqdq m0, m20 ; 1 6017*c0909341SAndroid Build Coastguard Worker punpcklqdq m24, m5, m16 ; 10 6018*c0909341SAndroid Build Coastguard Worker punpckhqdq m16, m5, m16 ; 11 6019*c0909341SAndroid Build Coastguard Worker punpcklqdq m23, m3, m21 ; 6 6020*c0909341SAndroid Build Coastguard Worker punpckhqdq m21, m3, m21 ; 7 6021*c0909341SAndroid Build Coastguard Worker punpcklqdq m25, m7, m8 ; 14 6022*c0909341SAndroid Build Coastguard Worker punpckhqdq m3, m7, m8 ; 15 6023*c0909341SAndroid Build Coastguard Worker punpcklqdq m22, m15, m4 ; 2 6024*c0909341SAndroid Build Coastguard Worker punpckhqdq m26, m15, m4 ; 3 6025*c0909341SAndroid Build Coastguard Worker punpcklqdq m15, m6, m17 ; 12 6026*c0909341SAndroid Build Coastguard Worker punpckhqdq m17, m6, m17 ; 13 6027*c0909341SAndroid Build Coastguard Worker punpcklqdq m28, m14, m18 ; 8 6028*c0909341SAndroid Build Coastguard Worker punpckhqdq m18, m14, m18 ; 9 6029*c0909341SAndroid Build Coastguard Worker punpcklqdq m14, m2, m1 ; 4 6030*c0909341SAndroid Build Coastguard Worker punpckhqdq m20, m2, m1 ; 5 6031*c0909341SAndroid Build Coastguard Worker ret 6032*c0909341SAndroid Build Coastguard Worker.pass1_load_spill: 6033*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_32x16_10bpc).idct16_sumsub 6034*c0909341SAndroid Build Coastguard Worker mova [cq+128* 0], m0 6035*c0909341SAndroid Build Coastguard Worker mova [cq+128* 1], m1 6036*c0909341SAndroid Build Coastguard Worker mova m0, [cq+128* 2] 6037*c0909341SAndroid Build Coastguard Worker mova m1, [cq+128* 6] 6038*c0909341SAndroid Build Coastguard Worker mova [cq+128* 2], m2 6039*c0909341SAndroid Build Coastguard Worker mova [cq+128* 3], m3 6040*c0909341SAndroid Build Coastguard Worker mova m2, [cq+128*10] 6041*c0909341SAndroid Build Coastguard Worker mova m3, [cq+128*14] 6042*c0909341SAndroid Build Coastguard Worker mova [cq+128* 4], m4 6043*c0909341SAndroid Build Coastguard Worker mova [cq+128* 5], m5 6044*c0909341SAndroid Build Coastguard Worker mova [cq+128* 6], m6 6045*c0909341SAndroid Build Coastguard Worker mova [cq+128* 7], m7 6046*c0909341SAndroid Build Coastguard Worker mova [cq+128* 8], m23 6047*c0909341SAndroid Build Coastguard Worker mova [cq+128* 9], m22 6048*c0909341SAndroid Build Coastguard Worker mova [cq+128*10], m21 6049*c0909341SAndroid Build Coastguard Worker mova [cq+128*11], m20 6050*c0909341SAndroid Build Coastguard Worker mova [cq+128*12], m19 6051*c0909341SAndroid Build Coastguard Worker mova [cq+128*13], m18 6052*c0909341SAndroid Build Coastguard Worker mova [cq+128*14], m17 6053*c0909341SAndroid Build Coastguard Worker mova [cq+128*15], m16 6054*c0909341SAndroid Build Coastguard Worker ret 6055*c0909341SAndroid Build Coastguard Worker 6056*c0909341SAndroid Build Coastguard Worker%endif ; ARCH_X86_64 6057