1*c0909341SAndroid Build Coastguard Worker; Copyright © 2021, VideoLAN and dav1d authors 2*c0909341SAndroid Build Coastguard Worker; Copyright © 2021, Two Orioles, LLC 3*c0909341SAndroid Build Coastguard Worker; All rights reserved. 4*c0909341SAndroid Build Coastguard Worker; 5*c0909341SAndroid Build Coastguard Worker; Redistribution and use in source and binary forms, with or without 6*c0909341SAndroid Build Coastguard Worker; modification, are permitted provided that the following conditions are met: 7*c0909341SAndroid Build Coastguard Worker; 8*c0909341SAndroid Build Coastguard Worker; 1. Redistributions of source code must retain the above copyright notice, this 9*c0909341SAndroid Build Coastguard Worker; list of conditions and the following disclaimer. 10*c0909341SAndroid Build Coastguard Worker; 11*c0909341SAndroid Build Coastguard Worker; 2. Redistributions in binary form must reproduce the above copyright notice, 12*c0909341SAndroid Build Coastguard Worker; this list of conditions and the following disclaimer in the documentation 13*c0909341SAndroid Build Coastguard Worker; and/or other materials provided with the distribution. 14*c0909341SAndroid Build Coastguard Worker; 15*c0909341SAndroid Build Coastguard Worker; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 16*c0909341SAndroid Build Coastguard Worker; ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17*c0909341SAndroid Build Coastguard Worker; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18*c0909341SAndroid Build Coastguard Worker; DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 19*c0909341SAndroid Build Coastguard Worker; ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 20*c0909341SAndroid Build Coastguard Worker; (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 21*c0909341SAndroid Build Coastguard Worker; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 22*c0909341SAndroid Build Coastguard Worker; ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23*c0909341SAndroid Build Coastguard Worker; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 24*c0909341SAndroid Build Coastguard Worker; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25*c0909341SAndroid Build Coastguard Worker 26*c0909341SAndroid Build Coastguard Worker%include "config.asm" 27*c0909341SAndroid Build Coastguard Worker%include "ext/x86/x86inc.asm" 28*c0909341SAndroid Build Coastguard Worker 29*c0909341SAndroid Build Coastguard WorkerSECTION_RODATA 30*c0909341SAndroid Build Coastguard Worker 31*c0909341SAndroid Build Coastguard Worker; dav1d_obmc_masks[] << 9 32*c0909341SAndroid Build Coastguard Workerobmc_masks: dw 0, 0, 9728, 0, 12800, 7168, 2560, 0 33*c0909341SAndroid Build Coastguard Worker dw 14336, 11264, 8192, 5632, 3584, 1536, 0, 0 34*c0909341SAndroid Build Coastguard Worker dw 15360, 13824, 12288, 10752, 9216, 7680, 6144, 5120 35*c0909341SAndroid Build Coastguard Worker dw 4096, 3072, 2048, 1536, 0, 0, 0, 0 36*c0909341SAndroid Build Coastguard Worker dw 15872, 14848, 14336, 13312, 12288, 11776, 10752, 10240 37*c0909341SAndroid Build Coastguard Worker dw 9728, 8704, 8192, 7168, 6656, 6144, 5632, 4608 38*c0909341SAndroid Build Coastguard Worker dw 4096, 3584, 3072, 2560, 2048, 2048, 1536, 1024 39*c0909341SAndroid Build Coastguard Worker 40*c0909341SAndroid Build Coastguard Workerblend_shuf: db 0, 1, 0, 1, 0, 1, 0, 1, 2, 3, 2, 3, 2, 3, 2, 3 41*c0909341SAndroid Build Coastguard Workerspel_h_shufA: db 0, 1, 2, 3, 2, 3, 4, 5, 4, 5, 6, 7, 6, 7, 8, 9 42*c0909341SAndroid Build Coastguard Workerspel_h_shufB: db 4, 5, 6, 7, 6, 7, 8, 9, 8, 9, 10, 11, 10, 11, 12, 13 43*c0909341SAndroid Build Coastguard Workerspel_h_shuf2: db 0, 1, 2, 3, 4, 5, 6, 7, 2, 3, 4, 5, 6, 7, 8, 9 44*c0909341SAndroid Build Coastguard Workerspel_s_shuf2: db 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7 45*c0909341SAndroid Build Coastguard Workerspel_s_shuf8: db 0, 1, 8, 9, 2, 3, 10, 11, 4, 5, 12, 13, 6, 7, 14, 15 46*c0909341SAndroid Build Coastguard Workerunpckw: db 0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15 47*c0909341SAndroid Build Coastguard Workerrescale_mul: dd 0, 1, 2, 3 48*c0909341SAndroid Build Coastguard Workerresize_shuf: db 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 2, 3, 4, 5, 6, 7 49*c0909341SAndroid Build Coastguard Worker db 8, 9, 10, 11, 12, 13, 14, 15, 14, 15, 14, 15, 14, 15, 14, 15 50*c0909341SAndroid Build Coastguard Workerbdct_lb_q: times 8 db 0 51*c0909341SAndroid Build Coastguard Worker times 8 db 4 52*c0909341SAndroid Build Coastguard Worker times 8 db 8 53*c0909341SAndroid Build Coastguard Worker times 8 db 12 54*c0909341SAndroid Build Coastguard Worker 55*c0909341SAndroid Build Coastguard Workerpw_2: times 8 dw 2 56*c0909341SAndroid Build Coastguard Workerpw_16: times 4 dw 16 57*c0909341SAndroid Build Coastguard Workerprep_mul: times 4 dw 16 58*c0909341SAndroid Build Coastguard Worker times 8 dw 4 59*c0909341SAndroid Build Coastguard Workerpw_64: times 8 dw 64 60*c0909341SAndroid Build Coastguard Workerpw_256: times 8 dw 256 61*c0909341SAndroid Build Coastguard Workerpw_2048: times 4 dw 2048 62*c0909341SAndroid Build Coastguard Workerbidir_mul: times 4 dw 2048 63*c0909341SAndroid Build Coastguard Workerpw_8192: times 8 dw 8192 64*c0909341SAndroid Build Coastguard Workerpw_27615: times 8 dw 27615 65*c0909341SAndroid Build Coastguard Workerpw_32766: times 8 dw 32766 66*c0909341SAndroid Build Coastguard Workerpw_m512: times 8 dw -512 67*c0909341SAndroid Build Coastguard Workerpd_63: times 4 dd 63 68*c0909341SAndroid Build Coastguard Workerpd_64: times 4 dd 64 69*c0909341SAndroid Build Coastguard Workerpd_512: times 4 dd 512 70*c0909341SAndroid Build Coastguard Workerpd_2560: times 2 dd 2560 71*c0909341SAndroid Build Coastguard Workerpd_8704: times 2 dd 8704 72*c0909341SAndroid Build Coastguard Workerpd_m524256: times 4 dd -524256 ; -8192 << 6 + 32 73*c0909341SAndroid Build Coastguard Workerpd_0x3ff: times 4 dd 0x3ff 74*c0909341SAndroid Build Coastguard Workerpd_0x4000: times 4 dd 0x4000 75*c0909341SAndroid Build Coastguard Workerpq_0x400000: times 2 dq 0x400000 76*c0909341SAndroid Build Coastguard Workerpq_0x40000000: times 2 dq 0x40000000 77*c0909341SAndroid Build Coastguard Workerpd_65538: times 2 dd 65538 78*c0909341SAndroid Build Coastguard Worker 79*c0909341SAndroid Build Coastguard Workerput_bilin_h_rnd: times 4 dw 8 80*c0909341SAndroid Build Coastguard Worker times 4 dw 10 81*c0909341SAndroid Build Coastguard Workers_8tap_h_rnd: times 2 dd 2 82*c0909341SAndroid Build Coastguard Worker times 2 dd 8 83*c0909341SAndroid Build Coastguard Workerput_s_8tap_v_rnd: times 2 dd 512 84*c0909341SAndroid Build Coastguard Worker times 2 dd 128 85*c0909341SAndroid Build Coastguard Workers_8tap_h_sh: dd 2, 4 86*c0909341SAndroid Build Coastguard Workerput_s_8tap_v_sh: dd 10, 8 87*c0909341SAndroid Build Coastguard Workerbidir_rnd: times 4 dw -16400 88*c0909341SAndroid Build Coastguard Worker times 4 dw -16388 89*c0909341SAndroid Build Coastguard Workerput_8tap_h_rnd: dd 34, 34, 40, 40 90*c0909341SAndroid Build Coastguard Workerprep_8tap_1d_rnd: times 2 dd 8 - (8192 << 4) 91*c0909341SAndroid Build Coastguard Workerprep_8tap_2d_rnd: times 4 dd 32 - (8192 << 5) 92*c0909341SAndroid Build Coastguard Worker 93*c0909341SAndroid Build Coastguard Workerwarp8x8_shift: dd 11, 13 94*c0909341SAndroid Build Coastguard Workerwarp8x8_rnd1: dd 1024, 1024, 4096, 4096 95*c0909341SAndroid Build Coastguard Workerwarp8x8_rnd2: times 4 dw 4096 96*c0909341SAndroid Build Coastguard Worker times 4 dw 16384 97*c0909341SAndroid Build Coastguard Workerwarp8x8t_rnd: times 2 dd 16384 - (8192 << 15) 98*c0909341SAndroid Build Coastguard Worker 99*c0909341SAndroid Build Coastguard Worker%macro BIDIR_JMP_TABLE 2-* 100*c0909341SAndroid Build Coastguard Worker %xdefine %1_%2_table (%%table - 2*%3) 101*c0909341SAndroid Build Coastguard Worker %xdefine %%base %1_%2_table 102*c0909341SAndroid Build Coastguard Worker %xdefine %%prefix mangle(private_prefix %+ _%1_16bpc_%2) 103*c0909341SAndroid Build Coastguard Worker %%table: 104*c0909341SAndroid Build Coastguard Worker %rep %0 - 2 105*c0909341SAndroid Build Coastguard Worker dd %%prefix %+ .w%3 - %%base 106*c0909341SAndroid Build Coastguard Worker %rotate 1 107*c0909341SAndroid Build Coastguard Worker %endrep 108*c0909341SAndroid Build Coastguard Worker%endmacro 109*c0909341SAndroid Build Coastguard Worker 110*c0909341SAndroid Build Coastguard WorkerBIDIR_JMP_TABLE avg, ssse3, 4, 8, 16, 32, 64, 128 111*c0909341SAndroid Build Coastguard WorkerBIDIR_JMP_TABLE w_avg, ssse3, 4, 8, 16, 32, 64, 128 112*c0909341SAndroid Build Coastguard WorkerBIDIR_JMP_TABLE mask, ssse3, 4, 8, 16, 32, 64, 128 113*c0909341SAndroid Build Coastguard WorkerBIDIR_JMP_TABLE w_mask_420, ssse3, 4, 8, 16, 32, 64, 128 114*c0909341SAndroid Build Coastguard WorkerBIDIR_JMP_TABLE w_mask_422, ssse3, 4, 8, 16, 32, 64, 128 115*c0909341SAndroid Build Coastguard WorkerBIDIR_JMP_TABLE w_mask_444, ssse3, 4, 8, 16, 32, 64, 128 116*c0909341SAndroid Build Coastguard WorkerBIDIR_JMP_TABLE blend, ssse3, 4, 8, 16, 32 117*c0909341SAndroid Build Coastguard WorkerBIDIR_JMP_TABLE blend_v, ssse3, 2, 4, 8, 16, 32 118*c0909341SAndroid Build Coastguard WorkerBIDIR_JMP_TABLE blend_h, ssse3, 2, 4, 8, 16, 32, 64, 128 119*c0909341SAndroid Build Coastguard Worker 120*c0909341SAndroid Build Coastguard Worker%macro BASE_JMP_TABLE 3-* 121*c0909341SAndroid Build Coastguard Worker %xdefine %1_%2_table (%%table - %3) 122*c0909341SAndroid Build Coastguard Worker %xdefine %%base %1_%2 123*c0909341SAndroid Build Coastguard Worker %%table: 124*c0909341SAndroid Build Coastguard Worker %rep %0 - 2 125*c0909341SAndroid Build Coastguard Worker dw %%base %+ _w%3 - %%base 126*c0909341SAndroid Build Coastguard Worker %rotate 1 127*c0909341SAndroid Build Coastguard Worker %endrep 128*c0909341SAndroid Build Coastguard Worker%endmacro 129*c0909341SAndroid Build Coastguard Worker 130*c0909341SAndroid Build Coastguard Worker%xdefine put_ssse3 mangle(private_prefix %+ _put_bilin_16bpc_ssse3.put) 131*c0909341SAndroid Build Coastguard Worker%xdefine prep_ssse3 mangle(private_prefix %+ _prep_bilin_16bpc_ssse3.prep) 132*c0909341SAndroid Build Coastguard Worker 133*c0909341SAndroid Build Coastguard WorkerBASE_JMP_TABLE put, ssse3, 2, 4, 8, 16, 32, 64, 128 134*c0909341SAndroid Build Coastguard WorkerBASE_JMP_TABLE prep, ssse3, 4, 8, 16, 32, 64, 128 135*c0909341SAndroid Build Coastguard Worker 136*c0909341SAndroid Build Coastguard Worker%macro SCALED_JMP_TABLE 2-* 137*c0909341SAndroid Build Coastguard Worker %xdefine %1_%2_table (%%table - %3) 138*c0909341SAndroid Build Coastguard Worker %xdefine %%base mangle(private_prefix %+ _%1_16bpc_%2) 139*c0909341SAndroid Build Coastguard Worker%%table: 140*c0909341SAndroid Build Coastguard Worker %rep %0 - 2 141*c0909341SAndroid Build Coastguard Worker dw %%base %+ .w%3 - %%base 142*c0909341SAndroid Build Coastguard Worker %rotate 1 143*c0909341SAndroid Build Coastguard Worker %endrep 144*c0909341SAndroid Build Coastguard Worker %rotate 2 145*c0909341SAndroid Build Coastguard Worker%%dy_1024: 146*c0909341SAndroid Build Coastguard Worker %xdefine %1_%2_dy1_table (%%dy_1024 - %3) 147*c0909341SAndroid Build Coastguard Worker %rep %0 - 2 148*c0909341SAndroid Build Coastguard Worker dw %%base %+ .dy1_w%3 - %%base 149*c0909341SAndroid Build Coastguard Worker %rotate 1 150*c0909341SAndroid Build Coastguard Worker %endrep 151*c0909341SAndroid Build Coastguard Worker %rotate 2 152*c0909341SAndroid Build Coastguard Worker%%dy_2048: 153*c0909341SAndroid Build Coastguard Worker %xdefine %1_%2_dy2_table (%%dy_2048 - %3) 154*c0909341SAndroid Build Coastguard Worker %rep %0 - 2 155*c0909341SAndroid Build Coastguard Worker dw %%base %+ .dy2_w%3 - %%base 156*c0909341SAndroid Build Coastguard Worker %rotate 1 157*c0909341SAndroid Build Coastguard Worker %endrep 158*c0909341SAndroid Build Coastguard Worker%endmacro 159*c0909341SAndroid Build Coastguard Worker 160*c0909341SAndroid Build Coastguard WorkerSCALED_JMP_TABLE put_8tap_scaled, ssse3, 2, 4, 8, 16, 32, 64, 128 161*c0909341SAndroid Build Coastguard WorkerSCALED_JMP_TABLE prep_8tap_scaled, ssse3, 4, 8, 16, 32, 64, 128 162*c0909341SAndroid Build Coastguard Worker 163*c0909341SAndroid Build Coastguard Workercextern mc_subpel_filters 164*c0909341SAndroid Build Coastguard Worker%define subpel_filters (mangle(private_prefix %+ _mc_subpel_filters)-8) 165*c0909341SAndroid Build Coastguard Worker 166*c0909341SAndroid Build Coastguard Workercextern mc_warp_filter 167*c0909341SAndroid Build Coastguard Workercextern resize_filter 168*c0909341SAndroid Build Coastguard Worker 169*c0909341SAndroid Build Coastguard WorkerSECTION .text 170*c0909341SAndroid Build Coastguard Worker 171*c0909341SAndroid Build Coastguard Worker%if UNIX64 172*c0909341SAndroid Build Coastguard WorkerDECLARE_REG_TMP 7 173*c0909341SAndroid Build Coastguard Worker%else 174*c0909341SAndroid Build Coastguard WorkerDECLARE_REG_TMP 5 175*c0909341SAndroid Build Coastguard Worker%endif 176*c0909341SAndroid Build Coastguard Worker 177*c0909341SAndroid Build Coastguard WorkerINIT_XMM ssse3 178*c0909341SAndroid Build Coastguard Workercglobal put_bilin_16bpc, 4, 7, 0, dst, ds, src, ss, w, h, mxy 179*c0909341SAndroid Build Coastguard Worker%define base t0-put_ssse3 180*c0909341SAndroid Build Coastguard Worker mov mxyd, r6m ; mx 181*c0909341SAndroid Build Coastguard Worker LEA t0, put_ssse3 182*c0909341SAndroid Build Coastguard Worker movifnidn wd, wm 183*c0909341SAndroid Build Coastguard Worker test mxyd, mxyd 184*c0909341SAndroid Build Coastguard Worker jnz .h 185*c0909341SAndroid Build Coastguard Worker mov mxyd, r7m ; my 186*c0909341SAndroid Build Coastguard Worker test mxyd, mxyd 187*c0909341SAndroid Build Coastguard Worker jnz .v 188*c0909341SAndroid Build Coastguard Worker.put: 189*c0909341SAndroid Build Coastguard Worker tzcnt wd, wd 190*c0909341SAndroid Build Coastguard Worker movzx wd, word [base+put_ssse3_table+wq*2] 191*c0909341SAndroid Build Coastguard Worker add wq, t0 192*c0909341SAndroid Build Coastguard Worker movifnidn hd, hm 193*c0909341SAndroid Build Coastguard Worker jmp wq 194*c0909341SAndroid Build Coastguard Worker.put_w2: 195*c0909341SAndroid Build Coastguard Worker mov r4d, [srcq+ssq*0] 196*c0909341SAndroid Build Coastguard Worker mov r6d, [srcq+ssq*1] 197*c0909341SAndroid Build Coastguard Worker lea srcq, [srcq+ssq*2] 198*c0909341SAndroid Build Coastguard Worker mov [dstq+dsq*0], r4d 199*c0909341SAndroid Build Coastguard Worker mov [dstq+dsq*1], r6d 200*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+dsq*2] 201*c0909341SAndroid Build Coastguard Worker sub hd, 2 202*c0909341SAndroid Build Coastguard Worker jg .put_w2 203*c0909341SAndroid Build Coastguard Worker RET 204*c0909341SAndroid Build Coastguard Worker.put_w4: 205*c0909341SAndroid Build Coastguard Worker movq m0, [srcq+ssq*0] 206*c0909341SAndroid Build Coastguard Worker movq m1, [srcq+ssq*1] 207*c0909341SAndroid Build Coastguard Worker lea srcq, [srcq+ssq*2] 208*c0909341SAndroid Build Coastguard Worker movq [dstq+dsq*0], m0 209*c0909341SAndroid Build Coastguard Worker movq [dstq+dsq*1], m1 210*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+dsq*2] 211*c0909341SAndroid Build Coastguard Worker sub hd, 2 212*c0909341SAndroid Build Coastguard Worker jg .put_w4 213*c0909341SAndroid Build Coastguard Worker RET 214*c0909341SAndroid Build Coastguard Worker.put_w8: 215*c0909341SAndroid Build Coastguard Worker movu m0, [srcq+ssq*0] 216*c0909341SAndroid Build Coastguard Worker movu m1, [srcq+ssq*1] 217*c0909341SAndroid Build Coastguard Worker lea srcq, [srcq+ssq*2] 218*c0909341SAndroid Build Coastguard Worker mova [dstq+dsq*0], m0 219*c0909341SAndroid Build Coastguard Worker mova [dstq+dsq*1], m1 220*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+dsq*2] 221*c0909341SAndroid Build Coastguard Worker sub hd, 2 222*c0909341SAndroid Build Coastguard Worker jg .put_w8 223*c0909341SAndroid Build Coastguard Worker RET 224*c0909341SAndroid Build Coastguard Worker.put_w16: 225*c0909341SAndroid Build Coastguard Worker movu m0, [srcq+ssq*0+16*0] 226*c0909341SAndroid Build Coastguard Worker movu m1, [srcq+ssq*0+16*1] 227*c0909341SAndroid Build Coastguard Worker movu m2, [srcq+ssq*1+16*0] 228*c0909341SAndroid Build Coastguard Worker movu m3, [srcq+ssq*1+16*1] 229*c0909341SAndroid Build Coastguard Worker lea srcq, [srcq+ssq*2] 230*c0909341SAndroid Build Coastguard Worker mova [dstq+dsq*0+16*0], m0 231*c0909341SAndroid Build Coastguard Worker mova [dstq+dsq*0+16*1], m1 232*c0909341SAndroid Build Coastguard Worker mova [dstq+dsq*1+16*0], m2 233*c0909341SAndroid Build Coastguard Worker mova [dstq+dsq*1+16*1], m3 234*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+dsq*2] 235*c0909341SAndroid Build Coastguard Worker sub hd, 2 236*c0909341SAndroid Build Coastguard Worker jg .put_w16 237*c0909341SAndroid Build Coastguard Worker RET 238*c0909341SAndroid Build Coastguard Worker.put_w32: 239*c0909341SAndroid Build Coastguard Worker movu m0, [srcq+16*0] 240*c0909341SAndroid Build Coastguard Worker movu m1, [srcq+16*1] 241*c0909341SAndroid Build Coastguard Worker movu m2, [srcq+16*2] 242*c0909341SAndroid Build Coastguard Worker movu m3, [srcq+16*3] 243*c0909341SAndroid Build Coastguard Worker add srcq, ssq 244*c0909341SAndroid Build Coastguard Worker mova [dstq+16*0], m0 245*c0909341SAndroid Build Coastguard Worker mova [dstq+16*1], m1 246*c0909341SAndroid Build Coastguard Worker mova [dstq+16*2], m2 247*c0909341SAndroid Build Coastguard Worker mova [dstq+16*3], m3 248*c0909341SAndroid Build Coastguard Worker add dstq, dsq 249*c0909341SAndroid Build Coastguard Worker dec hd 250*c0909341SAndroid Build Coastguard Worker jg .put_w32 251*c0909341SAndroid Build Coastguard Worker RET 252*c0909341SAndroid Build Coastguard Worker.put_w64: 253*c0909341SAndroid Build Coastguard Worker movu m0, [srcq+16*0] 254*c0909341SAndroid Build Coastguard Worker movu m1, [srcq+16*1] 255*c0909341SAndroid Build Coastguard Worker movu m2, [srcq+16*2] 256*c0909341SAndroid Build Coastguard Worker movu m3, [srcq+16*3] 257*c0909341SAndroid Build Coastguard Worker mova [dstq+16*0], m0 258*c0909341SAndroid Build Coastguard Worker mova [dstq+16*1], m1 259*c0909341SAndroid Build Coastguard Worker mova [dstq+16*2], m2 260*c0909341SAndroid Build Coastguard Worker mova [dstq+16*3], m3 261*c0909341SAndroid Build Coastguard Worker movu m0, [srcq+16*4] 262*c0909341SAndroid Build Coastguard Worker movu m1, [srcq+16*5] 263*c0909341SAndroid Build Coastguard Worker movu m2, [srcq+16*6] 264*c0909341SAndroid Build Coastguard Worker movu m3, [srcq+16*7] 265*c0909341SAndroid Build Coastguard Worker add srcq, ssq 266*c0909341SAndroid Build Coastguard Worker mova [dstq+16*4], m0 267*c0909341SAndroid Build Coastguard Worker mova [dstq+16*5], m1 268*c0909341SAndroid Build Coastguard Worker mova [dstq+16*6], m2 269*c0909341SAndroid Build Coastguard Worker mova [dstq+16*7], m3 270*c0909341SAndroid Build Coastguard Worker add dstq, dsq 271*c0909341SAndroid Build Coastguard Worker dec hd 272*c0909341SAndroid Build Coastguard Worker jg .put_w64 273*c0909341SAndroid Build Coastguard Worker RET 274*c0909341SAndroid Build Coastguard Worker.put_w128: 275*c0909341SAndroid Build Coastguard Worker add srcq, 16*8 276*c0909341SAndroid Build Coastguard Worker add dstq, 16*8 277*c0909341SAndroid Build Coastguard Worker.put_w128_loop: 278*c0909341SAndroid Build Coastguard Worker movu m0, [srcq-16*8] 279*c0909341SAndroid Build Coastguard Worker movu m1, [srcq-16*7] 280*c0909341SAndroid Build Coastguard Worker movu m2, [srcq-16*6] 281*c0909341SAndroid Build Coastguard Worker movu m3, [srcq-16*5] 282*c0909341SAndroid Build Coastguard Worker mova [dstq-16*8], m0 283*c0909341SAndroid Build Coastguard Worker mova [dstq-16*7], m1 284*c0909341SAndroid Build Coastguard Worker mova [dstq-16*6], m2 285*c0909341SAndroid Build Coastguard Worker mova [dstq-16*5], m3 286*c0909341SAndroid Build Coastguard Worker movu m0, [srcq-16*4] 287*c0909341SAndroid Build Coastguard Worker movu m1, [srcq-16*3] 288*c0909341SAndroid Build Coastguard Worker movu m2, [srcq-16*2] 289*c0909341SAndroid Build Coastguard Worker movu m3, [srcq-16*1] 290*c0909341SAndroid Build Coastguard Worker mova [dstq-16*4], m0 291*c0909341SAndroid Build Coastguard Worker mova [dstq-16*3], m1 292*c0909341SAndroid Build Coastguard Worker mova [dstq-16*2], m2 293*c0909341SAndroid Build Coastguard Worker mova [dstq-16*1], m3 294*c0909341SAndroid Build Coastguard Worker movu m0, [srcq+16*0] 295*c0909341SAndroid Build Coastguard Worker movu m1, [srcq+16*1] 296*c0909341SAndroid Build Coastguard Worker movu m2, [srcq+16*2] 297*c0909341SAndroid Build Coastguard Worker movu m3, [srcq+16*3] 298*c0909341SAndroid Build Coastguard Worker mova [dstq+16*0], m0 299*c0909341SAndroid Build Coastguard Worker mova [dstq+16*1], m1 300*c0909341SAndroid Build Coastguard Worker mova [dstq+16*2], m2 301*c0909341SAndroid Build Coastguard Worker mova [dstq+16*3], m3 302*c0909341SAndroid Build Coastguard Worker movu m0, [srcq+16*4] 303*c0909341SAndroid Build Coastguard Worker movu m1, [srcq+16*5] 304*c0909341SAndroid Build Coastguard Worker movu m2, [srcq+16*6] 305*c0909341SAndroid Build Coastguard Worker movu m3, [srcq+16*7] 306*c0909341SAndroid Build Coastguard Worker add srcq, ssq 307*c0909341SAndroid Build Coastguard Worker mova [dstq+16*4], m0 308*c0909341SAndroid Build Coastguard Worker mova [dstq+16*5], m1 309*c0909341SAndroid Build Coastguard Worker mova [dstq+16*6], m2 310*c0909341SAndroid Build Coastguard Worker mova [dstq+16*7], m3 311*c0909341SAndroid Build Coastguard Worker add dstq, dsq 312*c0909341SAndroid Build Coastguard Worker dec hd 313*c0909341SAndroid Build Coastguard Worker jg .put_w128_loop 314*c0909341SAndroid Build Coastguard Worker RET 315*c0909341SAndroid Build Coastguard Worker.h: 316*c0909341SAndroid Build Coastguard Worker movd m5, mxyd 317*c0909341SAndroid Build Coastguard Worker mov mxyd, r7m ; my 318*c0909341SAndroid Build Coastguard Worker mova m4, [base+pw_16] 319*c0909341SAndroid Build Coastguard Worker pshufb m5, [base+pw_256] 320*c0909341SAndroid Build Coastguard Worker psubw m4, m5 321*c0909341SAndroid Build Coastguard Worker test mxyd, mxyd 322*c0909341SAndroid Build Coastguard Worker jnz .hv 323*c0909341SAndroid Build Coastguard Worker ; 12-bit is rounded twice so we can't use the same pmulhrsw approach as .v 324*c0909341SAndroid Build Coastguard Worker mov r6d, r8m ; bitdepth_max 325*c0909341SAndroid Build Coastguard Worker shr r6d, 11 326*c0909341SAndroid Build Coastguard Worker movddup m3, [base+put_bilin_h_rnd+r6*8] 327*c0909341SAndroid Build Coastguard Worker movifnidn hd, hm 328*c0909341SAndroid Build Coastguard Worker sub wd, 8 329*c0909341SAndroid Build Coastguard Worker jg .h_w16 330*c0909341SAndroid Build Coastguard Worker je .h_w8 331*c0909341SAndroid Build Coastguard Worker cmp wd, -4 332*c0909341SAndroid Build Coastguard Worker je .h_w4 333*c0909341SAndroid Build Coastguard Worker.h_w2: 334*c0909341SAndroid Build Coastguard Worker movq m1, [srcq+ssq*0] 335*c0909341SAndroid Build Coastguard Worker movhps m1, [srcq+ssq*1] 336*c0909341SAndroid Build Coastguard Worker lea srcq, [srcq+ssq*2] 337*c0909341SAndroid Build Coastguard Worker pmullw m0, m4, m1 338*c0909341SAndroid Build Coastguard Worker psrlq m1, 16 339*c0909341SAndroid Build Coastguard Worker pmullw m1, m5 340*c0909341SAndroid Build Coastguard Worker paddw m0, m3 341*c0909341SAndroid Build Coastguard Worker paddw m0, m1 342*c0909341SAndroid Build Coastguard Worker psrlw m0, 4 343*c0909341SAndroid Build Coastguard Worker movd [dstq+dsq*0], m0 344*c0909341SAndroid Build Coastguard Worker punpckhqdq m0, m0 345*c0909341SAndroid Build Coastguard Worker movd [dstq+dsq*1], m0 346*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+dsq*2] 347*c0909341SAndroid Build Coastguard Worker sub hd, 2 348*c0909341SAndroid Build Coastguard Worker jg .h_w2 349*c0909341SAndroid Build Coastguard Worker RET 350*c0909341SAndroid Build Coastguard Worker.h_w4: 351*c0909341SAndroid Build Coastguard Worker movq m0, [srcq+ssq*0] 352*c0909341SAndroid Build Coastguard Worker movhps m0, [srcq+ssq*1] 353*c0909341SAndroid Build Coastguard Worker movq m1, [srcq+ssq*0+2] 354*c0909341SAndroid Build Coastguard Worker movhps m1, [srcq+ssq*1+2] 355*c0909341SAndroid Build Coastguard Worker lea srcq, [srcq+ssq*2] 356*c0909341SAndroid Build Coastguard Worker pmullw m0, m4 357*c0909341SAndroid Build Coastguard Worker pmullw m1, m5 358*c0909341SAndroid Build Coastguard Worker paddw m0, m3 359*c0909341SAndroid Build Coastguard Worker paddw m0, m1 360*c0909341SAndroid Build Coastguard Worker psrlw m0, 4 361*c0909341SAndroid Build Coastguard Worker movq [dstq+dsq*0], m0 362*c0909341SAndroid Build Coastguard Worker movhps [dstq+dsq*1], m0 363*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+dsq*2] 364*c0909341SAndroid Build Coastguard Worker sub hd, 2 365*c0909341SAndroid Build Coastguard Worker jg .h_w4 366*c0909341SAndroid Build Coastguard Worker RET 367*c0909341SAndroid Build Coastguard Worker.h_w8: 368*c0909341SAndroid Build Coastguard Worker movu m0, [srcq+ssq*0] 369*c0909341SAndroid Build Coastguard Worker movu m1, [srcq+ssq*0+2] 370*c0909341SAndroid Build Coastguard Worker pmullw m0, m4 371*c0909341SAndroid Build Coastguard Worker pmullw m1, m5 372*c0909341SAndroid Build Coastguard Worker paddw m0, m3 373*c0909341SAndroid Build Coastguard Worker paddw m0, m1 374*c0909341SAndroid Build Coastguard Worker movu m1, [srcq+ssq*1] 375*c0909341SAndroid Build Coastguard Worker movu m2, [srcq+ssq*1+2] 376*c0909341SAndroid Build Coastguard Worker lea srcq, [srcq+ssq*2] 377*c0909341SAndroid Build Coastguard Worker pmullw m1, m4 378*c0909341SAndroid Build Coastguard Worker pmullw m2, m5 379*c0909341SAndroid Build Coastguard Worker paddw m1, m3 380*c0909341SAndroid Build Coastguard Worker paddw m1, m2 381*c0909341SAndroid Build Coastguard Worker psrlw m0, 4 382*c0909341SAndroid Build Coastguard Worker psrlw m1, 4 383*c0909341SAndroid Build Coastguard Worker mova [dstq+dsq*0], m0 384*c0909341SAndroid Build Coastguard Worker mova [dstq+dsq*1], m1 385*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+dsq*2] 386*c0909341SAndroid Build Coastguard Worker sub hd, 2 387*c0909341SAndroid Build Coastguard Worker jg .h_w8 388*c0909341SAndroid Build Coastguard Worker RET 389*c0909341SAndroid Build Coastguard Worker.h_w16: 390*c0909341SAndroid Build Coastguard Worker lea srcq, [srcq+wq*2] 391*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+wq*2] 392*c0909341SAndroid Build Coastguard Worker neg wq 393*c0909341SAndroid Build Coastguard Worker.h_w16_loop0: 394*c0909341SAndroid Build Coastguard Worker mov r6, wq 395*c0909341SAndroid Build Coastguard Worker.h_w16_loop: 396*c0909341SAndroid Build Coastguard Worker movu m0, [srcq+r6*2+ 0] 397*c0909341SAndroid Build Coastguard Worker movu m1, [srcq+r6*2+ 2] 398*c0909341SAndroid Build Coastguard Worker pmullw m0, m4 399*c0909341SAndroid Build Coastguard Worker pmullw m1, m5 400*c0909341SAndroid Build Coastguard Worker paddw m0, m3 401*c0909341SAndroid Build Coastguard Worker paddw m0, m1 402*c0909341SAndroid Build Coastguard Worker movu m1, [srcq+r6*2+16] 403*c0909341SAndroid Build Coastguard Worker movu m2, [srcq+r6*2+18] 404*c0909341SAndroid Build Coastguard Worker pmullw m1, m4 405*c0909341SAndroid Build Coastguard Worker pmullw m2, m5 406*c0909341SAndroid Build Coastguard Worker paddw m1, m3 407*c0909341SAndroid Build Coastguard Worker paddw m1, m2 408*c0909341SAndroid Build Coastguard Worker psrlw m0, 4 409*c0909341SAndroid Build Coastguard Worker psrlw m1, 4 410*c0909341SAndroid Build Coastguard Worker mova [dstq+r6*2+16*0], m0 411*c0909341SAndroid Build Coastguard Worker mova [dstq+r6*2+16*1], m1 412*c0909341SAndroid Build Coastguard Worker add r6, 16 413*c0909341SAndroid Build Coastguard Worker jl .h_w16_loop 414*c0909341SAndroid Build Coastguard Worker add srcq, ssq 415*c0909341SAndroid Build Coastguard Worker add dstq, dsq 416*c0909341SAndroid Build Coastguard Worker dec hd 417*c0909341SAndroid Build Coastguard Worker jg .h_w16_loop0 418*c0909341SAndroid Build Coastguard Worker RET 419*c0909341SAndroid Build Coastguard Worker.v: 420*c0909341SAndroid Build Coastguard Worker shl mxyd, 11 421*c0909341SAndroid Build Coastguard Worker movd m5, mxyd 422*c0909341SAndroid Build Coastguard Worker pshufb m5, [base+pw_256] 423*c0909341SAndroid Build Coastguard Worker movifnidn hd, hm 424*c0909341SAndroid Build Coastguard Worker cmp wd, 4 425*c0909341SAndroid Build Coastguard Worker jg .v_w8 426*c0909341SAndroid Build Coastguard Worker je .v_w4 427*c0909341SAndroid Build Coastguard Worker.v_w2: 428*c0909341SAndroid Build Coastguard Worker movd m0, [srcq+ssq*0] 429*c0909341SAndroid Build Coastguard Worker.v_w2_loop: 430*c0909341SAndroid Build Coastguard Worker movd m1, [srcq+ssq*1] 431*c0909341SAndroid Build Coastguard Worker lea srcq, [srcq+ssq*2] 432*c0909341SAndroid Build Coastguard Worker punpcklqdq m2, m0, m1 433*c0909341SAndroid Build Coastguard Worker movd m0, [srcq+ssq*0] 434*c0909341SAndroid Build Coastguard Worker punpcklqdq m1, m0 435*c0909341SAndroid Build Coastguard Worker psubw m1, m2 436*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m5 437*c0909341SAndroid Build Coastguard Worker paddw m1, m2 438*c0909341SAndroid Build Coastguard Worker movd [dstq+dsq*0], m1 439*c0909341SAndroid Build Coastguard Worker punpckhqdq m1, m1 440*c0909341SAndroid Build Coastguard Worker movd [dstq+dsq*1], m1 441*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+dsq*2] 442*c0909341SAndroid Build Coastguard Worker sub hd, 2 443*c0909341SAndroid Build Coastguard Worker jg .v_w2_loop 444*c0909341SAndroid Build Coastguard Worker RET 445*c0909341SAndroid Build Coastguard Worker.v_w4: 446*c0909341SAndroid Build Coastguard Worker movq m0, [srcq+ssq*0] 447*c0909341SAndroid Build Coastguard Worker.v_w4_loop: 448*c0909341SAndroid Build Coastguard Worker movq m1, [srcq+ssq*1] 449*c0909341SAndroid Build Coastguard Worker lea srcq, [srcq+ssq*2] 450*c0909341SAndroid Build Coastguard Worker punpcklqdq m2, m0, m1 451*c0909341SAndroid Build Coastguard Worker movq m0, [srcq+ssq*0] 452*c0909341SAndroid Build Coastguard Worker punpcklqdq m1, m0 453*c0909341SAndroid Build Coastguard Worker psubw m1, m2 454*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m5 455*c0909341SAndroid Build Coastguard Worker paddw m1, m2 456*c0909341SAndroid Build Coastguard Worker movq [dstq+dsq*0], m1 457*c0909341SAndroid Build Coastguard Worker movhps [dstq+dsq*1], m1 458*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+dsq*2] 459*c0909341SAndroid Build Coastguard Worker sub hd, 2 460*c0909341SAndroid Build Coastguard Worker jg .v_w4_loop 461*c0909341SAndroid Build Coastguard Worker RET 462*c0909341SAndroid Build Coastguard Worker.v_w8: 463*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 464*c0909341SAndroid Build Coastguard Worker%if WIN64 465*c0909341SAndroid Build Coastguard Worker push r7 466*c0909341SAndroid Build Coastguard Worker%endif 467*c0909341SAndroid Build Coastguard Worker shl wd, 5 468*c0909341SAndroid Build Coastguard Worker mov r7, srcq 469*c0909341SAndroid Build Coastguard Worker lea r6d, [wq+hq-256] 470*c0909341SAndroid Build Coastguard Worker mov r4, dstq 471*c0909341SAndroid Build Coastguard Worker%else 472*c0909341SAndroid Build Coastguard Worker mov r6, srcq 473*c0909341SAndroid Build Coastguard Worker%endif 474*c0909341SAndroid Build Coastguard Worker.v_w8_loop0: 475*c0909341SAndroid Build Coastguard Worker movu m0, [srcq+ssq*0] 476*c0909341SAndroid Build Coastguard Worker.v_w8_loop: 477*c0909341SAndroid Build Coastguard Worker movu m3, [srcq+ssq*1] 478*c0909341SAndroid Build Coastguard Worker lea srcq, [srcq+ssq*2] 479*c0909341SAndroid Build Coastguard Worker psubw m1, m3, m0 480*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m5 481*c0909341SAndroid Build Coastguard Worker paddw m1, m0 482*c0909341SAndroid Build Coastguard Worker movu m0, [srcq+ssq*0] 483*c0909341SAndroid Build Coastguard Worker psubw m2, m0, m3 484*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m5 485*c0909341SAndroid Build Coastguard Worker paddw m2, m3 486*c0909341SAndroid Build Coastguard Worker mova [dstq+dsq*0], m1 487*c0909341SAndroid Build Coastguard Worker mova [dstq+dsq*1], m2 488*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+dsq*2] 489*c0909341SAndroid Build Coastguard Worker sub hd, 2 490*c0909341SAndroid Build Coastguard Worker jg .v_w8_loop 491*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 492*c0909341SAndroid Build Coastguard Worker add r7, 16 493*c0909341SAndroid Build Coastguard Worker add r4, 16 494*c0909341SAndroid Build Coastguard Worker movzx hd, r6b 495*c0909341SAndroid Build Coastguard Worker mov srcq, r7 496*c0909341SAndroid Build Coastguard Worker mov dstq, r4 497*c0909341SAndroid Build Coastguard Worker sub r6d, 1<<8 498*c0909341SAndroid Build Coastguard Worker%else 499*c0909341SAndroid Build Coastguard Worker mov dstq, dstmp 500*c0909341SAndroid Build Coastguard Worker add r6, 16 501*c0909341SAndroid Build Coastguard Worker mov hd, hm 502*c0909341SAndroid Build Coastguard Worker add dstq, 16 503*c0909341SAndroid Build Coastguard Worker mov srcq, r6 504*c0909341SAndroid Build Coastguard Worker mov dstmp, dstq 505*c0909341SAndroid Build Coastguard Worker sub wd, 8 506*c0909341SAndroid Build Coastguard Worker%endif 507*c0909341SAndroid Build Coastguard Worker jg .v_w8_loop0 508*c0909341SAndroid Build Coastguard Worker%if WIN64 509*c0909341SAndroid Build Coastguard Worker pop r7 510*c0909341SAndroid Build Coastguard Worker%endif 511*c0909341SAndroid Build Coastguard Worker RET 512*c0909341SAndroid Build Coastguard Worker.hv: 513*c0909341SAndroid Build Coastguard Worker WIN64_SPILL_XMM 8 514*c0909341SAndroid Build Coastguard Worker shl mxyd, 11 515*c0909341SAndroid Build Coastguard Worker mova m3, [base+pw_2] 516*c0909341SAndroid Build Coastguard Worker movd m6, mxyd 517*c0909341SAndroid Build Coastguard Worker mova m7, [base+pw_8192] 518*c0909341SAndroid Build Coastguard Worker pshufb m6, [base+pw_256] 519*c0909341SAndroid Build Coastguard Worker test dword r8m, 0x800 520*c0909341SAndroid Build Coastguard Worker jnz .hv_12bpc 521*c0909341SAndroid Build Coastguard Worker psllw m4, 2 522*c0909341SAndroid Build Coastguard Worker psllw m5, 2 523*c0909341SAndroid Build Coastguard Worker mova m7, [base+pw_2048] 524*c0909341SAndroid Build Coastguard Worker.hv_12bpc: 525*c0909341SAndroid Build Coastguard Worker movifnidn hd, hm 526*c0909341SAndroid Build Coastguard Worker cmp wd, 4 527*c0909341SAndroid Build Coastguard Worker jg .hv_w8 528*c0909341SAndroid Build Coastguard Worker je .hv_w4 529*c0909341SAndroid Build Coastguard Worker.hv_w2: 530*c0909341SAndroid Build Coastguard Worker movddup m0, [srcq+ssq*0] 531*c0909341SAndroid Build Coastguard Worker pshufhw m1, m0, q0321 532*c0909341SAndroid Build Coastguard Worker pmullw m0, m4 533*c0909341SAndroid Build Coastguard Worker pmullw m1, m5 534*c0909341SAndroid Build Coastguard Worker paddw m0, m3 535*c0909341SAndroid Build Coastguard Worker paddw m0, m1 536*c0909341SAndroid Build Coastguard Worker psrlw m0, 2 537*c0909341SAndroid Build Coastguard Worker.hv_w2_loop: 538*c0909341SAndroid Build Coastguard Worker movq m2, [srcq+ssq*1] 539*c0909341SAndroid Build Coastguard Worker lea srcq, [srcq+ssq*2] 540*c0909341SAndroid Build Coastguard Worker movhps m2, [srcq+ssq*0] 541*c0909341SAndroid Build Coastguard Worker pmullw m1, m4, m2 542*c0909341SAndroid Build Coastguard Worker psrlq m2, 16 543*c0909341SAndroid Build Coastguard Worker pmullw m2, m5 544*c0909341SAndroid Build Coastguard Worker paddw m1, m3 545*c0909341SAndroid Build Coastguard Worker paddw m1, m2 546*c0909341SAndroid Build Coastguard Worker psrlw m1, 2 ; 1 _ 2 _ 547*c0909341SAndroid Build Coastguard Worker shufpd m2, m0, m1, 0x01 ; 0 _ 1 _ 548*c0909341SAndroid Build Coastguard Worker mova m0, m1 549*c0909341SAndroid Build Coastguard Worker psubw m1, m2 550*c0909341SAndroid Build Coastguard Worker paddw m1, m1 551*c0909341SAndroid Build Coastguard Worker pmulhw m1, m6 552*c0909341SAndroid Build Coastguard Worker paddw m1, m2 553*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m7 554*c0909341SAndroid Build Coastguard Worker movd [dstq+dsq*0], m1 555*c0909341SAndroid Build Coastguard Worker punpckhqdq m1, m1 556*c0909341SAndroid Build Coastguard Worker movd [dstq+dsq*1], m1 557*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+dsq*2] 558*c0909341SAndroid Build Coastguard Worker sub hd, 2 559*c0909341SAndroid Build Coastguard Worker jg .hv_w2_loop 560*c0909341SAndroid Build Coastguard Worker RET 561*c0909341SAndroid Build Coastguard Worker.hv_w4: 562*c0909341SAndroid Build Coastguard Worker movddup m0, [srcq+ssq*0] 563*c0909341SAndroid Build Coastguard Worker movddup m1, [srcq+ssq*0+2] 564*c0909341SAndroid Build Coastguard Worker pmullw m0, m4 565*c0909341SAndroid Build Coastguard Worker pmullw m1, m5 566*c0909341SAndroid Build Coastguard Worker paddw m0, m3 567*c0909341SAndroid Build Coastguard Worker paddw m0, m1 568*c0909341SAndroid Build Coastguard Worker psrlw m0, 2 569*c0909341SAndroid Build Coastguard Worker.hv_w4_loop: 570*c0909341SAndroid Build Coastguard Worker movq m1, [srcq+ssq*1] 571*c0909341SAndroid Build Coastguard Worker movq m2, [srcq+ssq*1+2] 572*c0909341SAndroid Build Coastguard Worker lea srcq, [srcq+ssq*2] 573*c0909341SAndroid Build Coastguard Worker movhps m1, [srcq+ssq*0] 574*c0909341SAndroid Build Coastguard Worker movhps m2, [srcq+ssq*0+2] 575*c0909341SAndroid Build Coastguard Worker pmullw m1, m4 576*c0909341SAndroid Build Coastguard Worker pmullw m2, m5 577*c0909341SAndroid Build Coastguard Worker paddw m1, m3 578*c0909341SAndroid Build Coastguard Worker paddw m1, m2 579*c0909341SAndroid Build Coastguard Worker psrlw m1, 2 ; 1 2 580*c0909341SAndroid Build Coastguard Worker shufpd m2, m0, m1, 0x01 ; 0 1 581*c0909341SAndroid Build Coastguard Worker mova m0, m1 582*c0909341SAndroid Build Coastguard Worker psubw m1, m2 583*c0909341SAndroid Build Coastguard Worker paddw m1, m1 584*c0909341SAndroid Build Coastguard Worker pmulhw m1, m6 585*c0909341SAndroid Build Coastguard Worker paddw m1, m2 586*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m7 587*c0909341SAndroid Build Coastguard Worker movq [dstq+dsq*0], m1 588*c0909341SAndroid Build Coastguard Worker movhps [dstq+dsq*1], m1 589*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+dsq*2] 590*c0909341SAndroid Build Coastguard Worker sub hd, 2 591*c0909341SAndroid Build Coastguard Worker jg .hv_w4_loop 592*c0909341SAndroid Build Coastguard Worker RET 593*c0909341SAndroid Build Coastguard Worker.hv_w8: 594*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 595*c0909341SAndroid Build Coastguard Worker%if WIN64 596*c0909341SAndroid Build Coastguard Worker push r7 597*c0909341SAndroid Build Coastguard Worker%endif 598*c0909341SAndroid Build Coastguard Worker shl wd, 5 599*c0909341SAndroid Build Coastguard Worker lea r6d, [wq+hq-256] 600*c0909341SAndroid Build Coastguard Worker mov r4, srcq 601*c0909341SAndroid Build Coastguard Worker mov r7, dstq 602*c0909341SAndroid Build Coastguard Worker%else 603*c0909341SAndroid Build Coastguard Worker mov r6, srcq 604*c0909341SAndroid Build Coastguard Worker%endif 605*c0909341SAndroid Build Coastguard Worker.hv_w8_loop0: 606*c0909341SAndroid Build Coastguard Worker movu m0, [srcq+ssq*0] 607*c0909341SAndroid Build Coastguard Worker movu m1, [srcq+ssq*0+2] 608*c0909341SAndroid Build Coastguard Worker pmullw m0, m4 609*c0909341SAndroid Build Coastguard Worker pmullw m1, m5 610*c0909341SAndroid Build Coastguard Worker paddw m0, m3 611*c0909341SAndroid Build Coastguard Worker paddw m0, m1 612*c0909341SAndroid Build Coastguard Worker psrlw m0, 2 613*c0909341SAndroid Build Coastguard Worker.hv_w8_loop: 614*c0909341SAndroid Build Coastguard Worker movu m1, [srcq+ssq*1] 615*c0909341SAndroid Build Coastguard Worker movu m2, [srcq+ssq*1+2] 616*c0909341SAndroid Build Coastguard Worker lea srcq, [srcq+ssq*2] 617*c0909341SAndroid Build Coastguard Worker pmullw m1, m4 618*c0909341SAndroid Build Coastguard Worker pmullw m2, m5 619*c0909341SAndroid Build Coastguard Worker paddw m1, m3 620*c0909341SAndroid Build Coastguard Worker paddw m1, m2 621*c0909341SAndroid Build Coastguard Worker psrlw m1, 2 622*c0909341SAndroid Build Coastguard Worker psubw m2, m1, m0 623*c0909341SAndroid Build Coastguard Worker paddw m2, m2 624*c0909341SAndroid Build Coastguard Worker pmulhw m2, m6 625*c0909341SAndroid Build Coastguard Worker paddw m2, m0 626*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m7 627*c0909341SAndroid Build Coastguard Worker mova [dstq+dsq*0], m2 628*c0909341SAndroid Build Coastguard Worker movu m0, [srcq+ssq*0] 629*c0909341SAndroid Build Coastguard Worker movu m2, [srcq+ssq*0+2] 630*c0909341SAndroid Build Coastguard Worker pmullw m0, m4 631*c0909341SAndroid Build Coastguard Worker pmullw m2, m5 632*c0909341SAndroid Build Coastguard Worker paddw m0, m3 633*c0909341SAndroid Build Coastguard Worker paddw m0, m2 634*c0909341SAndroid Build Coastguard Worker psrlw m0, 2 635*c0909341SAndroid Build Coastguard Worker psubw m2, m0, m1 636*c0909341SAndroid Build Coastguard Worker paddw m2, m2 637*c0909341SAndroid Build Coastguard Worker pmulhw m2, m6 638*c0909341SAndroid Build Coastguard Worker paddw m2, m1 639*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m7 640*c0909341SAndroid Build Coastguard Worker mova [dstq+dsq*1], m2 641*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+dsq*2] 642*c0909341SAndroid Build Coastguard Worker sub hd, 2 643*c0909341SAndroid Build Coastguard Worker jg .hv_w8_loop 644*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 645*c0909341SAndroid Build Coastguard Worker add r4, 16 646*c0909341SAndroid Build Coastguard Worker add r7, 16 647*c0909341SAndroid Build Coastguard Worker movzx hd, r6b 648*c0909341SAndroid Build Coastguard Worker mov srcq, r4 649*c0909341SAndroid Build Coastguard Worker mov dstq, r7 650*c0909341SAndroid Build Coastguard Worker sub r6d, 1<<8 651*c0909341SAndroid Build Coastguard Worker%else 652*c0909341SAndroid Build Coastguard Worker mov dstq, dstmp 653*c0909341SAndroid Build Coastguard Worker add r6, 16 654*c0909341SAndroid Build Coastguard Worker mov hd, hm 655*c0909341SAndroid Build Coastguard Worker add dstq, 16 656*c0909341SAndroid Build Coastguard Worker mov srcq, r6 657*c0909341SAndroid Build Coastguard Worker mov dstmp, dstq 658*c0909341SAndroid Build Coastguard Worker sub wd, 8 659*c0909341SAndroid Build Coastguard Worker%endif 660*c0909341SAndroid Build Coastguard Worker jg .hv_w8_loop0 661*c0909341SAndroid Build Coastguard Worker%if WIN64 662*c0909341SAndroid Build Coastguard Worker pop r7 663*c0909341SAndroid Build Coastguard Worker%endif 664*c0909341SAndroid Build Coastguard Worker RET 665*c0909341SAndroid Build Coastguard Worker 666*c0909341SAndroid Build Coastguard Workercglobal prep_bilin_16bpc, 4, 7, 0, tmp, src, stride, w, h, mxy, stride3 667*c0909341SAndroid Build Coastguard Worker%define base r6-prep_ssse3 668*c0909341SAndroid Build Coastguard Worker movifnidn mxyd, r5m ; mx 669*c0909341SAndroid Build Coastguard Worker LEA r6, prep_ssse3 670*c0909341SAndroid Build Coastguard Worker movifnidn hd, hm 671*c0909341SAndroid Build Coastguard Worker test mxyd, mxyd 672*c0909341SAndroid Build Coastguard Worker jnz .h 673*c0909341SAndroid Build Coastguard Worker mov mxyd, r6m ; my 674*c0909341SAndroid Build Coastguard Worker test mxyd, mxyd 675*c0909341SAndroid Build Coastguard Worker jnz .v 676*c0909341SAndroid Build Coastguard Worker.prep: 677*c0909341SAndroid Build Coastguard Worker tzcnt wd, wd 678*c0909341SAndroid Build Coastguard Worker movzx wd, word [base+prep_ssse3_table+wq*2] 679*c0909341SAndroid Build Coastguard Worker mov r5d, r7m ; bitdepth_max 680*c0909341SAndroid Build Coastguard Worker mova m5, [base+pw_8192] 681*c0909341SAndroid Build Coastguard Worker add wq, r6 682*c0909341SAndroid Build Coastguard Worker shr r5d, 11 683*c0909341SAndroid Build Coastguard Worker movddup m4, [base+prep_mul+r5*8] 684*c0909341SAndroid Build Coastguard Worker lea stride3q, [strideq*3] 685*c0909341SAndroid Build Coastguard Worker jmp wq 686*c0909341SAndroid Build Coastguard Worker.prep_w4: 687*c0909341SAndroid Build Coastguard Worker movq m0, [srcq+strideq*0] 688*c0909341SAndroid Build Coastguard Worker movhps m0, [srcq+strideq*1] 689*c0909341SAndroid Build Coastguard Worker movq m1, [srcq+strideq*2] 690*c0909341SAndroid Build Coastguard Worker movhps m1, [srcq+stride3q ] 691*c0909341SAndroid Build Coastguard Worker lea srcq, [srcq+strideq*4] 692*c0909341SAndroid Build Coastguard Worker pmullw m0, m4 693*c0909341SAndroid Build Coastguard Worker pmullw m1, m4 694*c0909341SAndroid Build Coastguard Worker psubw m0, m5 695*c0909341SAndroid Build Coastguard Worker psubw m1, m5 696*c0909341SAndroid Build Coastguard Worker mova [tmpq+16*0], m0 697*c0909341SAndroid Build Coastguard Worker mova [tmpq+16*1], m1 698*c0909341SAndroid Build Coastguard Worker add tmpq, 16*2 699*c0909341SAndroid Build Coastguard Worker sub hd, 4 700*c0909341SAndroid Build Coastguard Worker jg .prep_w4 701*c0909341SAndroid Build Coastguard Worker RET 702*c0909341SAndroid Build Coastguard Worker.prep_w8: 703*c0909341SAndroid Build Coastguard Worker movu m0, [srcq+strideq*0] 704*c0909341SAndroid Build Coastguard Worker movu m1, [srcq+strideq*1] 705*c0909341SAndroid Build Coastguard Worker movu m2, [srcq+strideq*2] 706*c0909341SAndroid Build Coastguard Worker movu m3, [srcq+stride3q ] 707*c0909341SAndroid Build Coastguard Worker lea srcq, [srcq+strideq*4] 708*c0909341SAndroid Build Coastguard Worker REPX {pmullw x, m4}, m0, m1, m2, m3 709*c0909341SAndroid Build Coastguard Worker REPX {psubw x, m5}, m0, m1, m2, m3 710*c0909341SAndroid Build Coastguard Worker mova [tmpq+16*0], m0 711*c0909341SAndroid Build Coastguard Worker mova [tmpq+16*1], m1 712*c0909341SAndroid Build Coastguard Worker mova [tmpq+16*2], m2 713*c0909341SAndroid Build Coastguard Worker mova [tmpq+16*3], m3 714*c0909341SAndroid Build Coastguard Worker add tmpq, 16*4 715*c0909341SAndroid Build Coastguard Worker sub hd, 4 716*c0909341SAndroid Build Coastguard Worker jg .prep_w8 717*c0909341SAndroid Build Coastguard Worker RET 718*c0909341SAndroid Build Coastguard Worker.prep_w16: 719*c0909341SAndroid Build Coastguard Worker movu m0, [srcq+strideq*0+16*0] 720*c0909341SAndroid Build Coastguard Worker movu m1, [srcq+strideq*0+16*1] 721*c0909341SAndroid Build Coastguard Worker movu m2, [srcq+strideq*1+16*0] 722*c0909341SAndroid Build Coastguard Worker movu m3, [srcq+strideq*1+16*1] 723*c0909341SAndroid Build Coastguard Worker lea srcq, [srcq+strideq*2] 724*c0909341SAndroid Build Coastguard Worker REPX {pmullw x, m4}, m0, m1, m2, m3 725*c0909341SAndroid Build Coastguard Worker REPX {psubw x, m5}, m0, m1, m2, m3 726*c0909341SAndroid Build Coastguard Worker mova [tmpq+16*0], m0 727*c0909341SAndroid Build Coastguard Worker mova [tmpq+16*1], m1 728*c0909341SAndroid Build Coastguard Worker mova [tmpq+16*2], m2 729*c0909341SAndroid Build Coastguard Worker mova [tmpq+16*3], m3 730*c0909341SAndroid Build Coastguard Worker add tmpq, 16*4 731*c0909341SAndroid Build Coastguard Worker sub hd, 2 732*c0909341SAndroid Build Coastguard Worker jg .prep_w16 733*c0909341SAndroid Build Coastguard Worker RET 734*c0909341SAndroid Build Coastguard Worker.prep_w32: 735*c0909341SAndroid Build Coastguard Worker movu m0, [srcq+16*0] 736*c0909341SAndroid Build Coastguard Worker movu m1, [srcq+16*1] 737*c0909341SAndroid Build Coastguard Worker movu m2, [srcq+16*2] 738*c0909341SAndroid Build Coastguard Worker movu m3, [srcq+16*3] 739*c0909341SAndroid Build Coastguard Worker add srcq, strideq 740*c0909341SAndroid Build Coastguard Worker REPX {pmullw x, m4}, m0, m1, m2, m3 741*c0909341SAndroid Build Coastguard Worker REPX {psubw x, m5}, m0, m1, m2, m3 742*c0909341SAndroid Build Coastguard Worker mova [tmpq+16*0], m0 743*c0909341SAndroid Build Coastguard Worker mova [tmpq+16*1], m1 744*c0909341SAndroid Build Coastguard Worker mova [tmpq+16*2], m2 745*c0909341SAndroid Build Coastguard Worker mova [tmpq+16*3], m3 746*c0909341SAndroid Build Coastguard Worker add tmpq, 16*4 747*c0909341SAndroid Build Coastguard Worker dec hd 748*c0909341SAndroid Build Coastguard Worker jg .prep_w32 749*c0909341SAndroid Build Coastguard Worker RET 750*c0909341SAndroid Build Coastguard Worker.prep_w64: 751*c0909341SAndroid Build Coastguard Worker movu m0, [srcq+16*0] 752*c0909341SAndroid Build Coastguard Worker movu m1, [srcq+16*1] 753*c0909341SAndroid Build Coastguard Worker movu m2, [srcq+16*2] 754*c0909341SAndroid Build Coastguard Worker movu m3, [srcq+16*3] 755*c0909341SAndroid Build Coastguard Worker REPX {pmullw x, m4}, m0, m1, m2, m3 756*c0909341SAndroid Build Coastguard Worker REPX {psubw x, m5}, m0, m1, m2, m3 757*c0909341SAndroid Build Coastguard Worker mova [tmpq+16*0], m0 758*c0909341SAndroid Build Coastguard Worker mova [tmpq+16*1], m1 759*c0909341SAndroid Build Coastguard Worker mova [tmpq+16*2], m2 760*c0909341SAndroid Build Coastguard Worker mova [tmpq+16*3], m3 761*c0909341SAndroid Build Coastguard Worker movu m0, [srcq+16*4] 762*c0909341SAndroid Build Coastguard Worker movu m1, [srcq+16*5] 763*c0909341SAndroid Build Coastguard Worker movu m2, [srcq+16*6] 764*c0909341SAndroid Build Coastguard Worker movu m3, [srcq+16*7] 765*c0909341SAndroid Build Coastguard Worker add srcq, strideq 766*c0909341SAndroid Build Coastguard Worker REPX {pmullw x, m4}, m0, m1, m2, m3 767*c0909341SAndroid Build Coastguard Worker REPX {psubw x, m5}, m0, m1, m2, m3 768*c0909341SAndroid Build Coastguard Worker mova [tmpq+16*4], m0 769*c0909341SAndroid Build Coastguard Worker mova [tmpq+16*5], m1 770*c0909341SAndroid Build Coastguard Worker mova [tmpq+16*6], m2 771*c0909341SAndroid Build Coastguard Worker mova [tmpq+16*7], m3 772*c0909341SAndroid Build Coastguard Worker add tmpq, 16*8 773*c0909341SAndroid Build Coastguard Worker dec hd 774*c0909341SAndroid Build Coastguard Worker jg .prep_w64 775*c0909341SAndroid Build Coastguard Worker RET 776*c0909341SAndroid Build Coastguard Worker.prep_w128: 777*c0909341SAndroid Build Coastguard Worker movu m0, [srcq+16* 0] 778*c0909341SAndroid Build Coastguard Worker movu m1, [srcq+16* 1] 779*c0909341SAndroid Build Coastguard Worker movu m2, [srcq+16* 2] 780*c0909341SAndroid Build Coastguard Worker movu m3, [srcq+16* 3] 781*c0909341SAndroid Build Coastguard Worker REPX {pmullw x, m4}, m0, m1, m2, m3 782*c0909341SAndroid Build Coastguard Worker REPX {psubw x, m5}, m0, m1, m2, m3 783*c0909341SAndroid Build Coastguard Worker mova [tmpq+16*0], m0 784*c0909341SAndroid Build Coastguard Worker mova [tmpq+16*1], m1 785*c0909341SAndroid Build Coastguard Worker mova [tmpq+16*2], m2 786*c0909341SAndroid Build Coastguard Worker mova [tmpq+16*3], m3 787*c0909341SAndroid Build Coastguard Worker movu m0, [srcq+16* 4] 788*c0909341SAndroid Build Coastguard Worker movu m1, [srcq+16* 5] 789*c0909341SAndroid Build Coastguard Worker movu m2, [srcq+16* 6] 790*c0909341SAndroid Build Coastguard Worker movu m3, [srcq+16* 7] 791*c0909341SAndroid Build Coastguard Worker REPX {pmullw x, m4}, m0, m1, m2, m3 792*c0909341SAndroid Build Coastguard Worker REPX {psubw x, m5}, m0, m1, m2, m3 793*c0909341SAndroid Build Coastguard Worker mova [tmpq+16*4], m0 794*c0909341SAndroid Build Coastguard Worker mova [tmpq+16*5], m1 795*c0909341SAndroid Build Coastguard Worker mova [tmpq+16*6], m2 796*c0909341SAndroid Build Coastguard Worker mova [tmpq+16*7], m3 797*c0909341SAndroid Build Coastguard Worker movu m0, [srcq+16* 8] 798*c0909341SAndroid Build Coastguard Worker movu m1, [srcq+16* 9] 799*c0909341SAndroid Build Coastguard Worker movu m2, [srcq+16*10] 800*c0909341SAndroid Build Coastguard Worker movu m3, [srcq+16*11] 801*c0909341SAndroid Build Coastguard Worker add tmpq, 16*16 802*c0909341SAndroid Build Coastguard Worker REPX {pmullw x, m4}, m0, m1, m2, m3 803*c0909341SAndroid Build Coastguard Worker REPX {psubw x, m5}, m0, m1, m2, m3 804*c0909341SAndroid Build Coastguard Worker mova [tmpq-16*8], m0 805*c0909341SAndroid Build Coastguard Worker mova [tmpq-16*7], m1 806*c0909341SAndroid Build Coastguard Worker mova [tmpq-16*6], m2 807*c0909341SAndroid Build Coastguard Worker mova [tmpq-16*5], m3 808*c0909341SAndroid Build Coastguard Worker movu m0, [srcq+16*12] 809*c0909341SAndroid Build Coastguard Worker movu m1, [srcq+16*13] 810*c0909341SAndroid Build Coastguard Worker movu m2, [srcq+16*14] 811*c0909341SAndroid Build Coastguard Worker movu m3, [srcq+16*15] 812*c0909341SAndroid Build Coastguard Worker add srcq, strideq 813*c0909341SAndroid Build Coastguard Worker REPX {pmullw x, m4}, m0, m1, m2, m3 814*c0909341SAndroid Build Coastguard Worker REPX {psubw x, m5}, m0, m1, m2, m3 815*c0909341SAndroid Build Coastguard Worker mova [tmpq-16*4], m0 816*c0909341SAndroid Build Coastguard Worker mova [tmpq-16*3], m1 817*c0909341SAndroid Build Coastguard Worker mova [tmpq-16*2], m2 818*c0909341SAndroid Build Coastguard Worker mova [tmpq-16*1], m3 819*c0909341SAndroid Build Coastguard Worker dec hd 820*c0909341SAndroid Build Coastguard Worker jg .prep_w128 821*c0909341SAndroid Build Coastguard Worker RET 822*c0909341SAndroid Build Coastguard Worker.h: 823*c0909341SAndroid Build Coastguard Worker movd m4, mxyd 824*c0909341SAndroid Build Coastguard Worker mov mxyd, r6m ; my 825*c0909341SAndroid Build Coastguard Worker mova m3, [base+pw_16] 826*c0909341SAndroid Build Coastguard Worker pshufb m4, [base+pw_256] 827*c0909341SAndroid Build Coastguard Worker mova m5, [base+pw_32766] 828*c0909341SAndroid Build Coastguard Worker psubw m3, m4 829*c0909341SAndroid Build Coastguard Worker test dword r7m, 0x800 830*c0909341SAndroid Build Coastguard Worker jnz .h_12bpc 831*c0909341SAndroid Build Coastguard Worker psllw m3, 2 832*c0909341SAndroid Build Coastguard Worker psllw m4, 2 833*c0909341SAndroid Build Coastguard Worker.h_12bpc: 834*c0909341SAndroid Build Coastguard Worker test mxyd, mxyd 835*c0909341SAndroid Build Coastguard Worker jnz .hv 836*c0909341SAndroid Build Coastguard Worker sub wd, 8 837*c0909341SAndroid Build Coastguard Worker je .h_w8 838*c0909341SAndroid Build Coastguard Worker jg .h_w16 839*c0909341SAndroid Build Coastguard Worker.h_w4: 840*c0909341SAndroid Build Coastguard Worker movq m0, [srcq+strideq*0] 841*c0909341SAndroid Build Coastguard Worker movhps m0, [srcq+strideq*1] 842*c0909341SAndroid Build Coastguard Worker movq m1, [srcq+strideq*0+2] 843*c0909341SAndroid Build Coastguard Worker movhps m1, [srcq+strideq*1+2] 844*c0909341SAndroid Build Coastguard Worker lea srcq, [srcq+strideq*2] 845*c0909341SAndroid Build Coastguard Worker pmullw m0, m3 846*c0909341SAndroid Build Coastguard Worker pmullw m1, m4 847*c0909341SAndroid Build Coastguard Worker psubw m0, m5 848*c0909341SAndroid Build Coastguard Worker paddw m0, m1 849*c0909341SAndroid Build Coastguard Worker psraw m0, 2 850*c0909341SAndroid Build Coastguard Worker mova [tmpq], m0 851*c0909341SAndroid Build Coastguard Worker add tmpq, 16 852*c0909341SAndroid Build Coastguard Worker sub hd, 2 853*c0909341SAndroid Build Coastguard Worker jg .h_w4 854*c0909341SAndroid Build Coastguard Worker RET 855*c0909341SAndroid Build Coastguard Worker.h_w8: 856*c0909341SAndroid Build Coastguard Worker movu m0, [srcq+strideq*0] 857*c0909341SAndroid Build Coastguard Worker movu m1, [srcq+strideq*0+2] 858*c0909341SAndroid Build Coastguard Worker pmullw m0, m3 859*c0909341SAndroid Build Coastguard Worker pmullw m1, m4 860*c0909341SAndroid Build Coastguard Worker psubw m0, m5 861*c0909341SAndroid Build Coastguard Worker paddw m0, m1 862*c0909341SAndroid Build Coastguard Worker movu m1, [srcq+strideq*1] 863*c0909341SAndroid Build Coastguard Worker movu m2, [srcq+strideq*1+2] 864*c0909341SAndroid Build Coastguard Worker lea srcq, [srcq+strideq*2] 865*c0909341SAndroid Build Coastguard Worker pmullw m1, m3 866*c0909341SAndroid Build Coastguard Worker pmullw m2, m4 867*c0909341SAndroid Build Coastguard Worker psubw m1, m5 868*c0909341SAndroid Build Coastguard Worker paddw m1, m2 869*c0909341SAndroid Build Coastguard Worker psraw m0, 2 870*c0909341SAndroid Build Coastguard Worker psraw m1, 2 871*c0909341SAndroid Build Coastguard Worker mova [tmpq+16*0], m0 872*c0909341SAndroid Build Coastguard Worker mova [tmpq+16*1], m1 873*c0909341SAndroid Build Coastguard Worker add tmpq, 16*2 874*c0909341SAndroid Build Coastguard Worker sub hd, 2 875*c0909341SAndroid Build Coastguard Worker jg .h_w8 876*c0909341SAndroid Build Coastguard Worker RET 877*c0909341SAndroid Build Coastguard Worker.h_w16: 878*c0909341SAndroid Build Coastguard Worker lea srcq, [srcq+wq*2] 879*c0909341SAndroid Build Coastguard Worker neg wq 880*c0909341SAndroid Build Coastguard Worker.h_w16_loop0: 881*c0909341SAndroid Build Coastguard Worker mov r6, wq 882*c0909341SAndroid Build Coastguard Worker.h_w16_loop: 883*c0909341SAndroid Build Coastguard Worker movu m0, [srcq+r6*2+ 0] 884*c0909341SAndroid Build Coastguard Worker movu m1, [srcq+r6*2+ 2] 885*c0909341SAndroid Build Coastguard Worker pmullw m0, m3 886*c0909341SAndroid Build Coastguard Worker pmullw m1, m4 887*c0909341SAndroid Build Coastguard Worker psubw m0, m5 888*c0909341SAndroid Build Coastguard Worker paddw m0, m1 889*c0909341SAndroid Build Coastguard Worker movu m1, [srcq+r6*2+16] 890*c0909341SAndroid Build Coastguard Worker movu m2, [srcq+r6*2+18] 891*c0909341SAndroid Build Coastguard Worker pmullw m1, m3 892*c0909341SAndroid Build Coastguard Worker pmullw m2, m4 893*c0909341SAndroid Build Coastguard Worker psubw m1, m5 894*c0909341SAndroid Build Coastguard Worker paddw m1, m2 895*c0909341SAndroid Build Coastguard Worker psraw m0, 2 896*c0909341SAndroid Build Coastguard Worker psraw m1, 2 897*c0909341SAndroid Build Coastguard Worker mova [tmpq+16*0], m0 898*c0909341SAndroid Build Coastguard Worker mova [tmpq+16*1], m1 899*c0909341SAndroid Build Coastguard Worker add tmpq, 16*2 900*c0909341SAndroid Build Coastguard Worker add r6, 16 901*c0909341SAndroid Build Coastguard Worker jl .h_w16_loop 902*c0909341SAndroid Build Coastguard Worker add srcq, strideq 903*c0909341SAndroid Build Coastguard Worker dec hd 904*c0909341SAndroid Build Coastguard Worker jg .h_w16_loop0 905*c0909341SAndroid Build Coastguard Worker RET 906*c0909341SAndroid Build Coastguard Worker.v: 907*c0909341SAndroid Build Coastguard Worker movd m4, mxyd 908*c0909341SAndroid Build Coastguard Worker mova m3, [base+pw_16] 909*c0909341SAndroid Build Coastguard Worker pshufb m4, [base+pw_256] 910*c0909341SAndroid Build Coastguard Worker mova m5, [base+pw_32766] 911*c0909341SAndroid Build Coastguard Worker psubw m3, m4 912*c0909341SAndroid Build Coastguard Worker test dword r7m, 0x800 913*c0909341SAndroid Build Coastguard Worker jnz .v_12bpc 914*c0909341SAndroid Build Coastguard Worker psllw m3, 2 915*c0909341SAndroid Build Coastguard Worker psllw m4, 2 916*c0909341SAndroid Build Coastguard Worker.v_12bpc: 917*c0909341SAndroid Build Coastguard Worker cmp wd, 8 918*c0909341SAndroid Build Coastguard Worker je .v_w8 919*c0909341SAndroid Build Coastguard Worker jg .v_w16 920*c0909341SAndroid Build Coastguard Worker.v_w4: 921*c0909341SAndroid Build Coastguard Worker movq m0, [srcq+strideq*0] 922*c0909341SAndroid Build Coastguard Worker.v_w4_loop: 923*c0909341SAndroid Build Coastguard Worker movq m2, [srcq+strideq*1] 924*c0909341SAndroid Build Coastguard Worker lea srcq, [srcq+strideq*2] 925*c0909341SAndroid Build Coastguard Worker punpcklqdq m1, m0, m2 ; 0 1 926*c0909341SAndroid Build Coastguard Worker movq m0, [srcq+strideq*0] 927*c0909341SAndroid Build Coastguard Worker punpcklqdq m2, m0 ; 1 2 928*c0909341SAndroid Build Coastguard Worker pmullw m1, m3 929*c0909341SAndroid Build Coastguard Worker pmullw m2, m4 930*c0909341SAndroid Build Coastguard Worker psubw m1, m5 931*c0909341SAndroid Build Coastguard Worker paddw m1, m2 932*c0909341SAndroid Build Coastguard Worker psraw m1, 2 933*c0909341SAndroid Build Coastguard Worker mova [tmpq], m1 934*c0909341SAndroid Build Coastguard Worker add tmpq, 16 935*c0909341SAndroid Build Coastguard Worker sub hd, 2 936*c0909341SAndroid Build Coastguard Worker jg .v_w4_loop 937*c0909341SAndroid Build Coastguard Worker RET 938*c0909341SAndroid Build Coastguard Worker.v_w8: 939*c0909341SAndroid Build Coastguard Worker movu m0, [srcq+strideq*0] 940*c0909341SAndroid Build Coastguard Worker.v_w8_loop: 941*c0909341SAndroid Build Coastguard Worker movu m2, [srcq+strideq*1] 942*c0909341SAndroid Build Coastguard Worker lea srcq, [srcq+strideq*2] 943*c0909341SAndroid Build Coastguard Worker pmullw m0, m3 944*c0909341SAndroid Build Coastguard Worker pmullw m1, m4, m2 945*c0909341SAndroid Build Coastguard Worker psubw m0, m5 946*c0909341SAndroid Build Coastguard Worker paddw m1, m0 947*c0909341SAndroid Build Coastguard Worker movu m0, [srcq+strideq*0] 948*c0909341SAndroid Build Coastguard Worker psraw m1, 2 949*c0909341SAndroid Build Coastguard Worker pmullw m2, m3 950*c0909341SAndroid Build Coastguard Worker mova [tmpq+16*0], m1 951*c0909341SAndroid Build Coastguard Worker pmullw m1, m4, m0 952*c0909341SAndroid Build Coastguard Worker psubw m2, m5 953*c0909341SAndroid Build Coastguard Worker paddw m1, m2 954*c0909341SAndroid Build Coastguard Worker psraw m1, 2 955*c0909341SAndroid Build Coastguard Worker mova [tmpq+16*1], m1 956*c0909341SAndroid Build Coastguard Worker add tmpq, 16*2 957*c0909341SAndroid Build Coastguard Worker sub hd, 2 958*c0909341SAndroid Build Coastguard Worker jg .v_w8_loop 959*c0909341SAndroid Build Coastguard Worker RET 960*c0909341SAndroid Build Coastguard Worker.v_w16: 961*c0909341SAndroid Build Coastguard Worker%if WIN64 962*c0909341SAndroid Build Coastguard Worker push r7 963*c0909341SAndroid Build Coastguard Worker%endif 964*c0909341SAndroid Build Coastguard Worker mov r5, srcq 965*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 966*c0909341SAndroid Build Coastguard Worker lea r6d, [wq*4-32] 967*c0909341SAndroid Build Coastguard Worker mov wd, wd 968*c0909341SAndroid Build Coastguard Worker lea r6d, [hq+r6*8] 969*c0909341SAndroid Build Coastguard Worker mov r7, tmpq 970*c0909341SAndroid Build Coastguard Worker%else 971*c0909341SAndroid Build Coastguard Worker mov r6d, wd 972*c0909341SAndroid Build Coastguard Worker%endif 973*c0909341SAndroid Build Coastguard Worker.v_w16_loop0: 974*c0909341SAndroid Build Coastguard Worker movu m0, [srcq+strideq*0] 975*c0909341SAndroid Build Coastguard Worker.v_w16_loop: 976*c0909341SAndroid Build Coastguard Worker movu m2, [srcq+strideq*1] 977*c0909341SAndroid Build Coastguard Worker lea srcq, [srcq+strideq*2] 978*c0909341SAndroid Build Coastguard Worker pmullw m0, m3 979*c0909341SAndroid Build Coastguard Worker pmullw m1, m4, m2 980*c0909341SAndroid Build Coastguard Worker psubw m0, m5 981*c0909341SAndroid Build Coastguard Worker paddw m1, m0 982*c0909341SAndroid Build Coastguard Worker movu m0, [srcq+strideq*0] 983*c0909341SAndroid Build Coastguard Worker psraw m1, 2 984*c0909341SAndroid Build Coastguard Worker pmullw m2, m3 985*c0909341SAndroid Build Coastguard Worker mova [tmpq+wq*0], m1 986*c0909341SAndroid Build Coastguard Worker pmullw m1, m4, m0 987*c0909341SAndroid Build Coastguard Worker psubw m2, m5 988*c0909341SAndroid Build Coastguard Worker paddw m1, m2 989*c0909341SAndroid Build Coastguard Worker psraw m1, 2 990*c0909341SAndroid Build Coastguard Worker mova [tmpq+wq*2], m1 991*c0909341SAndroid Build Coastguard Worker lea tmpq, [tmpq+wq*4] 992*c0909341SAndroid Build Coastguard Worker sub hd, 2 993*c0909341SAndroid Build Coastguard Worker jg .v_w16_loop 994*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 995*c0909341SAndroid Build Coastguard Worker add r5, 16 996*c0909341SAndroid Build Coastguard Worker add r7, 16 997*c0909341SAndroid Build Coastguard Worker movzx hd, r6b 998*c0909341SAndroid Build Coastguard Worker mov srcq, r5 999*c0909341SAndroid Build Coastguard Worker mov tmpq, r7 1000*c0909341SAndroid Build Coastguard Worker sub r6d, 1<<8 1001*c0909341SAndroid Build Coastguard Worker%else 1002*c0909341SAndroid Build Coastguard Worker mov tmpq, tmpmp 1003*c0909341SAndroid Build Coastguard Worker add r5, 16 1004*c0909341SAndroid Build Coastguard Worker mov hd, hm 1005*c0909341SAndroid Build Coastguard Worker add tmpq, 16 1006*c0909341SAndroid Build Coastguard Worker mov srcq, r5 1007*c0909341SAndroid Build Coastguard Worker mov tmpmp, tmpq 1008*c0909341SAndroid Build Coastguard Worker sub r6d, 8 1009*c0909341SAndroid Build Coastguard Worker%endif 1010*c0909341SAndroid Build Coastguard Worker jg .v_w16_loop0 1011*c0909341SAndroid Build Coastguard Worker%if WIN64 1012*c0909341SAndroid Build Coastguard Worker pop r7 1013*c0909341SAndroid Build Coastguard Worker%endif 1014*c0909341SAndroid Build Coastguard Worker RET 1015*c0909341SAndroid Build Coastguard Worker.hv: 1016*c0909341SAndroid Build Coastguard Worker WIN64_SPILL_XMM 7 1017*c0909341SAndroid Build Coastguard Worker shl mxyd, 11 1018*c0909341SAndroid Build Coastguard Worker movd m6, mxyd 1019*c0909341SAndroid Build Coastguard Worker pshufb m6, [base+pw_256] 1020*c0909341SAndroid Build Coastguard Worker cmp wd, 8 1021*c0909341SAndroid Build Coastguard Worker je .hv_w8 1022*c0909341SAndroid Build Coastguard Worker jg .hv_w16 1023*c0909341SAndroid Build Coastguard Worker.hv_w4: 1024*c0909341SAndroid Build Coastguard Worker movddup m0, [srcq+strideq*0] 1025*c0909341SAndroid Build Coastguard Worker movddup m1, [srcq+strideq*0+2] 1026*c0909341SAndroid Build Coastguard Worker pmullw m0, m3 1027*c0909341SAndroid Build Coastguard Worker pmullw m1, m4 1028*c0909341SAndroid Build Coastguard Worker psubw m0, m5 1029*c0909341SAndroid Build Coastguard Worker paddw m0, m1 1030*c0909341SAndroid Build Coastguard Worker psraw m0, 2 1031*c0909341SAndroid Build Coastguard Worker.hv_w4_loop: 1032*c0909341SAndroid Build Coastguard Worker movq m1, [srcq+strideq*1] 1033*c0909341SAndroid Build Coastguard Worker movq m2, [srcq+strideq*1+2] 1034*c0909341SAndroid Build Coastguard Worker lea srcq, [srcq+strideq*2] 1035*c0909341SAndroid Build Coastguard Worker movhps m1, [srcq+strideq*0] 1036*c0909341SAndroid Build Coastguard Worker movhps m2, [srcq+strideq*0+2] 1037*c0909341SAndroid Build Coastguard Worker pmullw m1, m3 1038*c0909341SAndroid Build Coastguard Worker pmullw m2, m4 1039*c0909341SAndroid Build Coastguard Worker psubw m1, m5 1040*c0909341SAndroid Build Coastguard Worker paddw m1, m2 1041*c0909341SAndroid Build Coastguard Worker psraw m1, 2 ; 1 2 1042*c0909341SAndroid Build Coastguard Worker shufpd m2, m0, m1, 0x01 ; 0 1 1043*c0909341SAndroid Build Coastguard Worker mova m0, m1 1044*c0909341SAndroid Build Coastguard Worker psubw m1, m2 1045*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m6 1046*c0909341SAndroid Build Coastguard Worker paddw m1, m2 1047*c0909341SAndroid Build Coastguard Worker mova [tmpq], m1 1048*c0909341SAndroid Build Coastguard Worker add tmpq, 16 1049*c0909341SAndroid Build Coastguard Worker sub hd, 2 1050*c0909341SAndroid Build Coastguard Worker jg .hv_w4_loop 1051*c0909341SAndroid Build Coastguard Worker RET 1052*c0909341SAndroid Build Coastguard Worker.hv_w8: 1053*c0909341SAndroid Build Coastguard Worker movu m0, [srcq+strideq*0] 1054*c0909341SAndroid Build Coastguard Worker movu m1, [srcq+strideq*0+2] 1055*c0909341SAndroid Build Coastguard Worker pmullw m0, m3 1056*c0909341SAndroid Build Coastguard Worker pmullw m1, m4 1057*c0909341SAndroid Build Coastguard Worker psubw m0, m5 1058*c0909341SAndroid Build Coastguard Worker paddw m0, m1 1059*c0909341SAndroid Build Coastguard Worker psraw m0, 2 1060*c0909341SAndroid Build Coastguard Worker.hv_w8_loop: 1061*c0909341SAndroid Build Coastguard Worker movu m1, [srcq+strideq*1] 1062*c0909341SAndroid Build Coastguard Worker movu m2, [srcq+strideq*1+2] 1063*c0909341SAndroid Build Coastguard Worker lea srcq, [srcq+strideq*2] 1064*c0909341SAndroid Build Coastguard Worker pmullw m1, m3 1065*c0909341SAndroid Build Coastguard Worker pmullw m2, m4 1066*c0909341SAndroid Build Coastguard Worker psubw m1, m5 1067*c0909341SAndroid Build Coastguard Worker paddw m1, m2 1068*c0909341SAndroid Build Coastguard Worker psraw m1, 2 1069*c0909341SAndroid Build Coastguard Worker psubw m2, m1, m0 1070*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m6 1071*c0909341SAndroid Build Coastguard Worker paddw m2, m0 1072*c0909341SAndroid Build Coastguard Worker mova [tmpq+16*0], m2 1073*c0909341SAndroid Build Coastguard Worker movu m0, [srcq+strideq*0] 1074*c0909341SAndroid Build Coastguard Worker movu m2, [srcq+strideq*0+2] 1075*c0909341SAndroid Build Coastguard Worker pmullw m0, m3 1076*c0909341SAndroid Build Coastguard Worker pmullw m2, m4 1077*c0909341SAndroid Build Coastguard Worker psubw m0, m5 1078*c0909341SAndroid Build Coastguard Worker paddw m0, m2 1079*c0909341SAndroid Build Coastguard Worker psraw m0, 2 1080*c0909341SAndroid Build Coastguard Worker psubw m2, m0, m1 1081*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m6 1082*c0909341SAndroid Build Coastguard Worker paddw m2, m1 1083*c0909341SAndroid Build Coastguard Worker mova [tmpq+16*1], m2 1084*c0909341SAndroid Build Coastguard Worker add tmpq, 16*2 1085*c0909341SAndroid Build Coastguard Worker sub hd, 2 1086*c0909341SAndroid Build Coastguard Worker jg .hv_w8_loop 1087*c0909341SAndroid Build Coastguard Worker RET 1088*c0909341SAndroid Build Coastguard Worker.hv_w16: 1089*c0909341SAndroid Build Coastguard Worker%if WIN64 1090*c0909341SAndroid Build Coastguard Worker push r7 1091*c0909341SAndroid Build Coastguard Worker%endif 1092*c0909341SAndroid Build Coastguard Worker mov r5, srcq 1093*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 1094*c0909341SAndroid Build Coastguard Worker lea r6d, [wq*4-32] 1095*c0909341SAndroid Build Coastguard Worker mov wd, wd 1096*c0909341SAndroid Build Coastguard Worker lea r6d, [hq+r6*8] 1097*c0909341SAndroid Build Coastguard Worker mov r7, tmpq 1098*c0909341SAndroid Build Coastguard Worker%else 1099*c0909341SAndroid Build Coastguard Worker mov r6d, wd 1100*c0909341SAndroid Build Coastguard Worker%endif 1101*c0909341SAndroid Build Coastguard Worker.hv_w16_loop0: 1102*c0909341SAndroid Build Coastguard Worker movu m0, [srcq+strideq*0] 1103*c0909341SAndroid Build Coastguard Worker movu m1, [srcq+strideq*0+2] 1104*c0909341SAndroid Build Coastguard Worker pmullw m0, m3 1105*c0909341SAndroid Build Coastguard Worker pmullw m1, m4 1106*c0909341SAndroid Build Coastguard Worker psubw m0, m5 1107*c0909341SAndroid Build Coastguard Worker paddw m0, m1 1108*c0909341SAndroid Build Coastguard Worker psraw m0, 2 1109*c0909341SAndroid Build Coastguard Worker.hv_w16_loop: 1110*c0909341SAndroid Build Coastguard Worker movu m1, [srcq+strideq*1] 1111*c0909341SAndroid Build Coastguard Worker movu m2, [srcq+strideq*1+2] 1112*c0909341SAndroid Build Coastguard Worker lea srcq, [srcq+strideq*2] 1113*c0909341SAndroid Build Coastguard Worker pmullw m1, m3 1114*c0909341SAndroid Build Coastguard Worker pmullw m2, m4 1115*c0909341SAndroid Build Coastguard Worker psubw m1, m5 1116*c0909341SAndroid Build Coastguard Worker paddw m1, m2 1117*c0909341SAndroid Build Coastguard Worker psraw m1, 2 1118*c0909341SAndroid Build Coastguard Worker psubw m2, m1, m0 1119*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m6 1120*c0909341SAndroid Build Coastguard Worker paddw m2, m0 1121*c0909341SAndroid Build Coastguard Worker mova [tmpq+wq*0], m2 1122*c0909341SAndroid Build Coastguard Worker movu m0, [srcq+strideq*0] 1123*c0909341SAndroid Build Coastguard Worker movu m2, [srcq+strideq*0+2] 1124*c0909341SAndroid Build Coastguard Worker pmullw m0, m3 1125*c0909341SAndroid Build Coastguard Worker pmullw m2, m4 1126*c0909341SAndroid Build Coastguard Worker psubw m0, m5 1127*c0909341SAndroid Build Coastguard Worker paddw m0, m2 1128*c0909341SAndroid Build Coastguard Worker psraw m0, 2 1129*c0909341SAndroid Build Coastguard Worker psubw m2, m0, m1 1130*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m6 1131*c0909341SAndroid Build Coastguard Worker paddw m2, m1 1132*c0909341SAndroid Build Coastguard Worker mova [tmpq+wq*2], m2 1133*c0909341SAndroid Build Coastguard Worker lea tmpq, [tmpq+wq*4] 1134*c0909341SAndroid Build Coastguard Worker sub hd, 2 1135*c0909341SAndroid Build Coastguard Worker jg .hv_w16_loop 1136*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 1137*c0909341SAndroid Build Coastguard Worker add r5, 16 1138*c0909341SAndroid Build Coastguard Worker add r7, 16 1139*c0909341SAndroid Build Coastguard Worker movzx hd, r6b 1140*c0909341SAndroid Build Coastguard Worker mov srcq, r5 1141*c0909341SAndroid Build Coastguard Worker mov tmpq, r7 1142*c0909341SAndroid Build Coastguard Worker sub r6d, 1<<8 1143*c0909341SAndroid Build Coastguard Worker%else 1144*c0909341SAndroid Build Coastguard Worker mov tmpq, tmpmp 1145*c0909341SAndroid Build Coastguard Worker add r5, 16 1146*c0909341SAndroid Build Coastguard Worker mov hd, hm 1147*c0909341SAndroid Build Coastguard Worker add tmpq, 16 1148*c0909341SAndroid Build Coastguard Worker mov srcq, r5 1149*c0909341SAndroid Build Coastguard Worker mov tmpmp, tmpq 1150*c0909341SAndroid Build Coastguard Worker sub r6d, 8 1151*c0909341SAndroid Build Coastguard Worker%endif 1152*c0909341SAndroid Build Coastguard Worker jg .hv_w16_loop0 1153*c0909341SAndroid Build Coastguard Worker%if WIN64 1154*c0909341SAndroid Build Coastguard Worker pop r7 1155*c0909341SAndroid Build Coastguard Worker%endif 1156*c0909341SAndroid Build Coastguard Worker RET 1157*c0909341SAndroid Build Coastguard Worker 1158*c0909341SAndroid Build Coastguard Worker; int8_t subpel_filters[5][15][8] 1159*c0909341SAndroid Build Coastguard Worker%assign FILTER_REGULAR (0*15 << 16) | 3*15 1160*c0909341SAndroid Build Coastguard Worker%assign FILTER_SMOOTH (1*15 << 16) | 4*15 1161*c0909341SAndroid Build Coastguard Worker%assign FILTER_SHARP (2*15 << 16) | 3*15 1162*c0909341SAndroid Build Coastguard Worker 1163*c0909341SAndroid Build Coastguard Worker%macro FN 4-5 ; prefix, type, type_h, type_v, jmp_to 1164*c0909341SAndroid Build Coastguard Workercglobal %1_%2_16bpc 1165*c0909341SAndroid Build Coastguard Worker mov t0d, FILTER_%3 1166*c0909341SAndroid Build Coastguard Worker%ifidn %3, %4 1167*c0909341SAndroid Build Coastguard Worker mov t1d, t0d 1168*c0909341SAndroid Build Coastguard Worker%else 1169*c0909341SAndroid Build Coastguard Worker mov t1d, FILTER_%4 1170*c0909341SAndroid Build Coastguard Worker%endif 1171*c0909341SAndroid Build Coastguard Worker%if %0 == 5 ; skip the jump in the last filter 1172*c0909341SAndroid Build Coastguard Worker jmp mangle(private_prefix %+ _%5 %+ SUFFIX) 1173*c0909341SAndroid Build Coastguard Worker%endif 1174*c0909341SAndroid Build Coastguard Worker%endmacro 1175*c0909341SAndroid Build Coastguard Worker 1176*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_32 1177*c0909341SAndroid Build Coastguard WorkerDECLARE_REG_TMP 1, 2, 6 1178*c0909341SAndroid Build Coastguard Worker%elif WIN64 1179*c0909341SAndroid Build Coastguard WorkerDECLARE_REG_TMP 4, 5, 8 1180*c0909341SAndroid Build Coastguard Worker%else 1181*c0909341SAndroid Build Coastguard WorkerDECLARE_REG_TMP 7, 8, 8 1182*c0909341SAndroid Build Coastguard Worker%endif 1183*c0909341SAndroid Build Coastguard Worker 1184*c0909341SAndroid Build Coastguard Worker%define PUT_8TAP_FN FN put_8tap, 1185*c0909341SAndroid Build Coastguard WorkerPUT_8TAP_FN smooth, SMOOTH, SMOOTH, put_6tap_16bpc 1186*c0909341SAndroid Build Coastguard WorkerPUT_8TAP_FN smooth_regular, SMOOTH, REGULAR, put_6tap_16bpc 1187*c0909341SAndroid Build Coastguard WorkerPUT_8TAP_FN regular_smooth, REGULAR, SMOOTH, put_6tap_16bpc 1188*c0909341SAndroid Build Coastguard WorkerPUT_8TAP_FN regular, REGULAR, REGULAR 1189*c0909341SAndroid Build Coastguard Worker 1190*c0909341SAndroid Build Coastguard Workercglobal put_6tap_16bpc, 0, 9, 0, dst, ds, src, ss, w, h, mx, my 1191*c0909341SAndroid Build Coastguard Worker %define base t2-put_ssse3 1192*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_32 1193*c0909341SAndroid Build Coastguard Worker %define mxb r0b 1194*c0909341SAndroid Build Coastguard Worker %define mxd r0 1195*c0909341SAndroid Build Coastguard Worker %define mxq r0 1196*c0909341SAndroid Build Coastguard Worker %define myb r1b 1197*c0909341SAndroid Build Coastguard Worker %define myd r1 1198*c0909341SAndroid Build Coastguard Worker %define myq r1 1199*c0909341SAndroid Build Coastguard Worker%endif 1200*c0909341SAndroid Build Coastguard Worker imul mxd, mxm, 0x010101 1201*c0909341SAndroid Build Coastguard Worker add mxd, t0d ; 6tap_h, mx, 4tap_h 1202*c0909341SAndroid Build Coastguard Worker imul myd, mym, 0x010101 1203*c0909341SAndroid Build Coastguard Worker add myd, t1d ; 6tap_v, my, 4tap_v 1204*c0909341SAndroid Build Coastguard Worker LEA t2, put_ssse3 1205*c0909341SAndroid Build Coastguard Worker movifnidn wd, wm 1206*c0909341SAndroid Build Coastguard Worker movifnidn srcq, srcmp 1207*c0909341SAndroid Build Coastguard Worker movifnidn ssq, ssmp 1208*c0909341SAndroid Build Coastguard Worker movifnidn hd, hm 1209*c0909341SAndroid Build Coastguard Worker test mxd, 0xf00 1210*c0909341SAndroid Build Coastguard Worker jnz .h 1211*c0909341SAndroid Build Coastguard Worker test myd, 0xf00 1212*c0909341SAndroid Build Coastguard Worker jnz .v 1213*c0909341SAndroid Build Coastguard Worker.put: 1214*c0909341SAndroid Build Coastguard Worker tzcnt wd, wd 1215*c0909341SAndroid Build Coastguard Worker movzx wd, word [base+put_ssse3_table+wq*2] 1216*c0909341SAndroid Build Coastguard Worker movifnidn dstq, dstmp 1217*c0909341SAndroid Build Coastguard Worker movifnidn dsq, dsmp 1218*c0909341SAndroid Build Coastguard Worker add wq, t2 1219*c0909341SAndroid Build Coastguard Worker%if WIN64 1220*c0909341SAndroid Build Coastguard Worker pop r8 1221*c0909341SAndroid Build Coastguard Worker pop r7 1222*c0909341SAndroid Build Coastguard Worker%endif 1223*c0909341SAndroid Build Coastguard Worker jmp wq 1224*c0909341SAndroid Build Coastguard Worker.h_w2: 1225*c0909341SAndroid Build Coastguard Worker mova m2, [base+spel_h_shuf2] 1226*c0909341SAndroid Build Coastguard Worker pshufd m3, m3, q2121 1227*c0909341SAndroid Build Coastguard Worker.h_w2_loop: 1228*c0909341SAndroid Build Coastguard Worker movu m0, [srcq+ssq*0] 1229*c0909341SAndroid Build Coastguard Worker movu m1, [srcq+ssq*1] 1230*c0909341SAndroid Build Coastguard Worker lea srcq, [srcq+ssq*2] 1231*c0909341SAndroid Build Coastguard Worker pshufb m0, m2 1232*c0909341SAndroid Build Coastguard Worker pshufb m1, m2 1233*c0909341SAndroid Build Coastguard Worker pmaddwd m0, m3 1234*c0909341SAndroid Build Coastguard Worker pmaddwd m1, m3 1235*c0909341SAndroid Build Coastguard Worker phaddd m0, m1 1236*c0909341SAndroid Build Coastguard Worker paddd m0, m4 1237*c0909341SAndroid Build Coastguard Worker psrad m0, 6 1238*c0909341SAndroid Build Coastguard Worker packssdw m0, m0 1239*c0909341SAndroid Build Coastguard Worker pxor m1, m1 1240*c0909341SAndroid Build Coastguard Worker pminsw m0, m5 1241*c0909341SAndroid Build Coastguard Worker pmaxsw m0, m1 1242*c0909341SAndroid Build Coastguard Worker movd [dstq+dsq*0], m0 1243*c0909341SAndroid Build Coastguard Worker pshuflw m0, m0, q3232 1244*c0909341SAndroid Build Coastguard Worker movd [dstq+dsq*1], m0 1245*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+dsq*2] 1246*c0909341SAndroid Build Coastguard Worker sub hd, 2 1247*c0909341SAndroid Build Coastguard Worker jg .h_w2_loop 1248*c0909341SAndroid Build Coastguard Worker RET 1249*c0909341SAndroid Build Coastguard Worker.h_w4: 1250*c0909341SAndroid Build Coastguard Worker movzx mxd, mxb 1251*c0909341SAndroid Build Coastguard Worker lea srcq, [srcq-2] 1252*c0909341SAndroid Build Coastguard Worker movq m3, [base+subpel_filters+mxq*8] 1253*c0909341SAndroid Build Coastguard Worker movifnidn dstq, dstmp 1254*c0909341SAndroid Build Coastguard Worker punpcklbw m3, m3 1255*c0909341SAndroid Build Coastguard Worker psraw m3, 8 ; sign-extend 1256*c0909341SAndroid Build Coastguard Worker jl .h_w2 1257*c0909341SAndroid Build Coastguard Worker WIN64_SPILL_XMM 9 1258*c0909341SAndroid Build Coastguard Worker mova m7, [base+spel_h_shufA] 1259*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_32 1260*c0909341SAndroid Build Coastguard Worker %define m8 [base+spel_h_shufB] 1261*c0909341SAndroid Build Coastguard Worker%else 1262*c0909341SAndroid Build Coastguard Worker mova m8, [base+spel_h_shufB] 1263*c0909341SAndroid Build Coastguard Worker%endif 1264*c0909341SAndroid Build Coastguard Worker pshufd m2, m3, q1111 1265*c0909341SAndroid Build Coastguard Worker pshufd m3, m3, q2222 1266*c0909341SAndroid Build Coastguard Worker.h_w4_loop: 1267*c0909341SAndroid Build Coastguard Worker movu m0, [srcq+ssq*0] 1268*c0909341SAndroid Build Coastguard Worker movu m1, [srcq+ssq*1] 1269*c0909341SAndroid Build Coastguard Worker lea srcq, [srcq+ssq*2] 1270*c0909341SAndroid Build Coastguard Worker pshufb m6, m0, m7 ; 0 1 1 2 2 3 3 4 1271*c0909341SAndroid Build Coastguard Worker pmaddwd m6, m2 1272*c0909341SAndroid Build Coastguard Worker pshufb m0, m8 ; 2 3 3 4 4 5 5 6 1273*c0909341SAndroid Build Coastguard Worker pmaddwd m0, m3 1274*c0909341SAndroid Build Coastguard Worker paddd m0, m6 1275*c0909341SAndroid Build Coastguard Worker pshufb m6, m1, m7 1276*c0909341SAndroid Build Coastguard Worker pmaddwd m6, m2 1277*c0909341SAndroid Build Coastguard Worker pshufb m1, m8 1278*c0909341SAndroid Build Coastguard Worker pmaddwd m1, m3 1279*c0909341SAndroid Build Coastguard Worker paddd m0, m4 1280*c0909341SAndroid Build Coastguard Worker paddd m6, m4 1281*c0909341SAndroid Build Coastguard Worker paddd m1, m6 1282*c0909341SAndroid Build Coastguard Worker psrad m0, 6 1283*c0909341SAndroid Build Coastguard Worker psrad m1, 6 1284*c0909341SAndroid Build Coastguard Worker packssdw m0, m1 1285*c0909341SAndroid Build Coastguard Worker pxor m1, m1 1286*c0909341SAndroid Build Coastguard Worker pminsw m0, m5 1287*c0909341SAndroid Build Coastguard Worker pmaxsw m0, m1 1288*c0909341SAndroid Build Coastguard Worker movq [dstq+dsq*0], m0 1289*c0909341SAndroid Build Coastguard Worker movhps [dstq+dsq*1], m0 1290*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+dsq*2] 1291*c0909341SAndroid Build Coastguard Worker sub hd, 2 1292*c0909341SAndroid Build Coastguard Worker jg .h_w4_loop 1293*c0909341SAndroid Build Coastguard Worker RET 1294*c0909341SAndroid Build Coastguard Worker.h: 1295*c0909341SAndroid Build Coastguard Worker RESET_STACK_STATE 1296*c0909341SAndroid Build Coastguard Worker test myd, 0xf00 1297*c0909341SAndroid Build Coastguard Worker jnz .hv 1298*c0909341SAndroid Build Coastguard Worker mov myd, r8m 1299*c0909341SAndroid Build Coastguard Worker movd m5, r8m 1300*c0909341SAndroid Build Coastguard Worker shr myd, 11 1301*c0909341SAndroid Build Coastguard Worker movddup m4, [base+put_8tap_h_rnd+myq*8] 1302*c0909341SAndroid Build Coastguard Worker movifnidn dsq, dsmp 1303*c0909341SAndroid Build Coastguard Worker pshufb m5, [base+pw_256] 1304*c0909341SAndroid Build Coastguard Worker sub wd, 4 1305*c0909341SAndroid Build Coastguard Worker jle .h_w4 1306*c0909341SAndroid Build Coastguard Worker WIN64_SPILL_XMM 11 1307*c0909341SAndroid Build Coastguard Worker shr mxd, 16 1308*c0909341SAndroid Build Coastguard Worker movq m2, [base+subpel_filters+1+mxq*8] 1309*c0909341SAndroid Build Coastguard Worker movifnidn dstq, dstmp 1310*c0909341SAndroid Build Coastguard Worker mova m6, [base+spel_h_shufA] 1311*c0909341SAndroid Build Coastguard Worker mova m7, [base+spel_h_shufB] 1312*c0909341SAndroid Build Coastguard Worker lea srcq, [srcq+wq*2] 1313*c0909341SAndroid Build Coastguard Worker punpcklbw m2, m2 1314*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+wq*2] 1315*c0909341SAndroid Build Coastguard Worker psraw m2, 8 1316*c0909341SAndroid Build Coastguard Worker neg wq 1317*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_32 1318*c0909341SAndroid Build Coastguard Worker ALLOC_STACK -16*3 1319*c0909341SAndroid Build Coastguard Worker %define m8 [rsp+16*0] 1320*c0909341SAndroid Build Coastguard Worker %define m9 [rsp+16*1] 1321*c0909341SAndroid Build Coastguard Worker %define m10 [rsp+16*2] 1322*c0909341SAndroid Build Coastguard Worker pshufd m0, m2, q0000 1323*c0909341SAndroid Build Coastguard Worker pshufd m1, m2, q1111 1324*c0909341SAndroid Build Coastguard Worker pshufd m2, m2, q2222 1325*c0909341SAndroid Build Coastguard Worker mova m8, m0 1326*c0909341SAndroid Build Coastguard Worker mova m9, m1 1327*c0909341SAndroid Build Coastguard Worker mova m10, m2 1328*c0909341SAndroid Build Coastguard Worker%else 1329*c0909341SAndroid Build Coastguard Worker pshufd m8, m2, q0000 1330*c0909341SAndroid Build Coastguard Worker pshufd m9, m2, q1111 1331*c0909341SAndroid Build Coastguard Worker pshufd m10, m2, q2222 1332*c0909341SAndroid Build Coastguard Worker%endif 1333*c0909341SAndroid Build Coastguard Worker.h_w8_loop0: 1334*c0909341SAndroid Build Coastguard Worker mov r6, wq 1335*c0909341SAndroid Build Coastguard Worker.h_w8_loop: 1336*c0909341SAndroid Build Coastguard Worker movu m3, [srcq+r6*2-4] 1337*c0909341SAndroid Build Coastguard Worker movu m2, [srcq+r6*2+8] 1338*c0909341SAndroid Build Coastguard Worker pshufb m0, m3, m6 ; 01 12 23 34 1339*c0909341SAndroid Build Coastguard Worker pmaddwd m0, m8 ; abcd0 1340*c0909341SAndroid Build Coastguard Worker pshufb m3, m7 ; 23 34 45 56 1341*c0909341SAndroid Build Coastguard Worker pmaddwd m1, m9, m3 ; abcd1 1342*c0909341SAndroid Build Coastguard Worker paddd m0, m1 1343*c0909341SAndroid Build Coastguard Worker pshufb m1, m2, m6 ; 67 78 89 9a 1344*c0909341SAndroid Build Coastguard Worker shufpd m3, m1, 0x01 ; 45 56 67 78 1345*c0909341SAndroid Build Coastguard Worker pmaddwd m1, m9 ; efgh1 1346*c0909341SAndroid Build Coastguard Worker pshufb m2, m7 ; 89 9a ab bc 1347*c0909341SAndroid Build Coastguard Worker pmaddwd m2, m10 ; efgh2 1348*c0909341SAndroid Build Coastguard Worker paddd m1, m2 1349*c0909341SAndroid Build Coastguard Worker pmaddwd m2, m10, m3 ; abcd2 1350*c0909341SAndroid Build Coastguard Worker pmaddwd m3, m8 ; efgh0 1351*c0909341SAndroid Build Coastguard Worker paddd m0, m4 1352*c0909341SAndroid Build Coastguard Worker paddd m1, m4 1353*c0909341SAndroid Build Coastguard Worker paddd m0, m2 1354*c0909341SAndroid Build Coastguard Worker paddd m1, m3 1355*c0909341SAndroid Build Coastguard Worker psrad m0, 6 1356*c0909341SAndroid Build Coastguard Worker psrad m1, 6 1357*c0909341SAndroid Build Coastguard Worker packssdw m0, m1 1358*c0909341SAndroid Build Coastguard Worker pxor m1, m1 1359*c0909341SAndroid Build Coastguard Worker pminsw m0, m5 1360*c0909341SAndroid Build Coastguard Worker pmaxsw m0, m1 1361*c0909341SAndroid Build Coastguard Worker mova [dstq+r6*2], m0 1362*c0909341SAndroid Build Coastguard Worker add r6, 8 1363*c0909341SAndroid Build Coastguard Worker jl .h_w8_loop 1364*c0909341SAndroid Build Coastguard Worker add srcq, ssq 1365*c0909341SAndroid Build Coastguard Worker add dstq, dsq 1366*c0909341SAndroid Build Coastguard Worker dec hd 1367*c0909341SAndroid Build Coastguard Worker jg .h_w8_loop0 1368*c0909341SAndroid Build Coastguard Worker RET 1369*c0909341SAndroid Build Coastguard Worker.v: 1370*c0909341SAndroid Build Coastguard Worker movzx mxd, myb 1371*c0909341SAndroid Build Coastguard Worker shr myd, 16 1372*c0909341SAndroid Build Coastguard Worker cmp hd, 6 1373*c0909341SAndroid Build Coastguard Worker cmovb myd, mxd 1374*c0909341SAndroid Build Coastguard Worker movq m2, [base+subpel_filters+1+myq*8] 1375*c0909341SAndroid Build Coastguard Worker WIN64_SPILL_XMM 11, 16 1376*c0909341SAndroid Build Coastguard Worker movd m5, r8m 1377*c0909341SAndroid Build Coastguard Worker movifnidn dstq, dstmp 1378*c0909341SAndroid Build Coastguard Worker movifnidn dsq, dsmp 1379*c0909341SAndroid Build Coastguard Worker punpcklbw m2, m2 1380*c0909341SAndroid Build Coastguard Worker pshufb m5, [base+pw_256] 1381*c0909341SAndroid Build Coastguard Worker psraw m2, 8 ; sign-extend 1382*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_32 1383*c0909341SAndroid Build Coastguard Worker ALLOC_STACK -16*4 1384*c0909341SAndroid Build Coastguard Worker pshufd m0, m2, q0000 1385*c0909341SAndroid Build Coastguard Worker mov r6, ssq 1386*c0909341SAndroid Build Coastguard Worker pshufd m1, m2, q1111 1387*c0909341SAndroid Build Coastguard Worker neg r6 1388*c0909341SAndroid Build Coastguard Worker pshufd m2, m2, q2222 1389*c0909341SAndroid Build Coastguard Worker mova m8, m0 1390*c0909341SAndroid Build Coastguard Worker mova m9, m1 1391*c0909341SAndroid Build Coastguard Worker mova m10, m2 1392*c0909341SAndroid Build Coastguard Worker cmp wd, 2 1393*c0909341SAndroid Build Coastguard Worker jne .v_w4 1394*c0909341SAndroid Build Coastguard Worker%else 1395*c0909341SAndroid Build Coastguard Worker mov r6, ssq 1396*c0909341SAndroid Build Coastguard Worker pshufd m8, m2, q0000 1397*c0909341SAndroid Build Coastguard Worker neg r6 1398*c0909341SAndroid Build Coastguard Worker cmp wd, 4 1399*c0909341SAndroid Build Coastguard Worker jg .v_w8 1400*c0909341SAndroid Build Coastguard Worker pshufd m9, m2, q1111 1401*c0909341SAndroid Build Coastguard Worker pshufd m10, m2, q2222 1402*c0909341SAndroid Build Coastguard Worker je .v_w4 1403*c0909341SAndroid Build Coastguard Worker%endif 1404*c0909341SAndroid Build Coastguard Worker.v_w2: 1405*c0909341SAndroid Build Coastguard Worker movd m1, [srcq+r6 *2] 1406*c0909341SAndroid Build Coastguard Worker movd m3, [srcq+r6 *1] 1407*c0909341SAndroid Build Coastguard Worker movd m2, [srcq+ssq*0] 1408*c0909341SAndroid Build Coastguard Worker movd m4, [srcq+ssq*1] 1409*c0909341SAndroid Build Coastguard Worker lea srcq, [srcq+ssq*2] 1410*c0909341SAndroid Build Coastguard Worker movd m0, [srcq+ssq*0] 1411*c0909341SAndroid Build Coastguard Worker punpckldq m1, m3 ; 0 1 1412*c0909341SAndroid Build Coastguard Worker punpckldq m3, m2 ; 1 2 1413*c0909341SAndroid Build Coastguard Worker punpckldq m2, m4 ; 2 3 1414*c0909341SAndroid Build Coastguard Worker punpckldq m4, m0 ; 3 4 1415*c0909341SAndroid Build Coastguard Worker punpcklwd m1, m3 ; 01 12 1416*c0909341SAndroid Build Coastguard Worker punpcklwd m2, m4 ; 23 34 1417*c0909341SAndroid Build Coastguard Worker pxor m6, m6 1418*c0909341SAndroid Build Coastguard Worker.v_w2_loop: 1419*c0909341SAndroid Build Coastguard Worker movd m3, [srcq+ssq*1] 1420*c0909341SAndroid Build Coastguard Worker lea srcq, [srcq+ssq*2] 1421*c0909341SAndroid Build Coastguard Worker pmaddwd m4, m8, m1 ; a0 b0 1422*c0909341SAndroid Build Coastguard Worker mova m1, m2 1423*c0909341SAndroid Build Coastguard Worker pmaddwd m2, m9 ; a1 b1 1424*c0909341SAndroid Build Coastguard Worker paddd m4, m2 1425*c0909341SAndroid Build Coastguard Worker punpckldq m2, m0, m3 ; 4 5 1426*c0909341SAndroid Build Coastguard Worker movd m0, [srcq+ssq*0] 1427*c0909341SAndroid Build Coastguard Worker punpckldq m3, m0 ; 5 6 1428*c0909341SAndroid Build Coastguard Worker punpcklwd m2, m3 ; 67 78 1429*c0909341SAndroid Build Coastguard Worker pmaddwd m3, m10, m2 ; a2 b2 1430*c0909341SAndroid Build Coastguard Worker paddd m4, m3 1431*c0909341SAndroid Build Coastguard Worker psrad m4, 5 1432*c0909341SAndroid Build Coastguard Worker packssdw m4, m4 1433*c0909341SAndroid Build Coastguard Worker pmaxsw m4, m6 1434*c0909341SAndroid Build Coastguard Worker pavgw m4, m6 1435*c0909341SAndroid Build Coastguard Worker pminsw m4, m5 1436*c0909341SAndroid Build Coastguard Worker movd [dstq+dsq*0], m4 1437*c0909341SAndroid Build Coastguard Worker pshuflw m4, m4, q3232 1438*c0909341SAndroid Build Coastguard Worker movd [dstq+dsq*1], m4 1439*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+dsq*2] 1440*c0909341SAndroid Build Coastguard Worker sub hd, 2 1441*c0909341SAndroid Build Coastguard Worker jg .v_w2_loop 1442*c0909341SAndroid Build Coastguard Worker RET 1443*c0909341SAndroid Build Coastguard Worker.v_w4: 1444*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_32 1445*c0909341SAndroid Build Coastguard Worker shl wd, 14 1446*c0909341SAndroid Build Coastguard Worker lea srcq, [srcq+r6*2] 1447*c0909341SAndroid Build Coastguard Worker lea wd, [wq+hq-(1<<16)] 1448*c0909341SAndroid Build Coastguard Worker%if STACK_ALIGNMENT < 16 1449*c0909341SAndroid Build Coastguard Worker %define dstmp [esp+16*3] 1450*c0909341SAndroid Build Coastguard Worker%endif 1451*c0909341SAndroid Build Coastguard Worker.v_w4_loop0: 1452*c0909341SAndroid Build Coastguard Worker mov dstmp, dstq 1453*c0909341SAndroid Build Coastguard Worker movq m1, [srcq+ssq*0] 1454*c0909341SAndroid Build Coastguard Worker movq m2, [srcq+ssq*1] 1455*c0909341SAndroid Build Coastguard Worker lea r6, [srcq+ssq*2] 1456*c0909341SAndroid Build Coastguard Worker movq m3, [r6 +ssq*0] 1457*c0909341SAndroid Build Coastguard Worker movq m4, [r6 +ssq*1] 1458*c0909341SAndroid Build Coastguard Worker lea r6, [r6 +ssq*2] 1459*c0909341SAndroid Build Coastguard Worker%else 1460*c0909341SAndroid Build Coastguard Worker movq m1, [srcq+r6 *2] 1461*c0909341SAndroid Build Coastguard Worker movq m2, [srcq+r6 *1] 1462*c0909341SAndroid Build Coastguard Worker lea r6, [srcq+ssq*2] 1463*c0909341SAndroid Build Coastguard Worker movq m3, [srcq+ssq*0] 1464*c0909341SAndroid Build Coastguard Worker movq m4, [srcq+ssq*1] 1465*c0909341SAndroid Build Coastguard Worker%endif 1466*c0909341SAndroid Build Coastguard Worker movq m0, [r6 +ssq*0] 1467*c0909341SAndroid Build Coastguard Worker punpcklwd m1, m2 ; 01 1468*c0909341SAndroid Build Coastguard Worker punpcklwd m2, m3 ; 12 1469*c0909341SAndroid Build Coastguard Worker punpcklwd m3, m4 ; 23 1470*c0909341SAndroid Build Coastguard Worker punpcklwd m4, m0 ; 34 1471*c0909341SAndroid Build Coastguard Worker.v_w4_loop: 1472*c0909341SAndroid Build Coastguard Worker pmaddwd m6, m8, m1 ; a0 1473*c0909341SAndroid Build Coastguard Worker pmaddwd m7, m8, m2 ; b0 1474*c0909341SAndroid Build Coastguard Worker mova m1, m3 1475*c0909341SAndroid Build Coastguard Worker pmaddwd m3, m9 ; a1 1476*c0909341SAndroid Build Coastguard Worker mova m2, m4 1477*c0909341SAndroid Build Coastguard Worker pmaddwd m4, m9 ; b1 1478*c0909341SAndroid Build Coastguard Worker paddd m6, m3 1479*c0909341SAndroid Build Coastguard Worker movq m3, [r6+ssq*0] 1480*c0909341SAndroid Build Coastguard Worker paddd m7, m4 1481*c0909341SAndroid Build Coastguard Worker movq m4, [r6+ssq*1] 1482*c0909341SAndroid Build Coastguard Worker lea r6, [r6+ssq*2] 1483*c0909341SAndroid Build Coastguard Worker movq m0, [r6+ssq*0] 1484*c0909341SAndroid Build Coastguard Worker punpcklwd m3, m4 ; 45 1485*c0909341SAndroid Build Coastguard Worker punpcklwd m4, m0 ; 56 1486*c0909341SAndroid Build Coastguard Worker pmaddwd m0, m10, m3 ; a2 1487*c0909341SAndroid Build Coastguard Worker paddd m6, m0 1488*c0909341SAndroid Build Coastguard Worker pmaddwd m0, m10, m4 ; b2 1489*c0909341SAndroid Build Coastguard Worker paddd m7, m0 1490*c0909341SAndroid Build Coastguard Worker psrad m6, 5 1491*c0909341SAndroid Build Coastguard Worker psrad m7, 5 1492*c0909341SAndroid Build Coastguard Worker packssdw m6, m7 1493*c0909341SAndroid Build Coastguard Worker pxor m7, m7 1494*c0909341SAndroid Build Coastguard Worker pmaxsw m6, m7 1495*c0909341SAndroid Build Coastguard Worker pavgw m6, m7 1496*c0909341SAndroid Build Coastguard Worker pminsw m6, m5 1497*c0909341SAndroid Build Coastguard Worker movq [dstq+dsq*0], m6 1498*c0909341SAndroid Build Coastguard Worker movhps [dstq+dsq*1], m6 1499*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+dsq*2] 1500*c0909341SAndroid Build Coastguard Worker sub hd, 2 1501*c0909341SAndroid Build Coastguard Worker jg .v_w4_loop 1502*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_32 1503*c0909341SAndroid Build Coastguard Worker mov dstq, dstmp 1504*c0909341SAndroid Build Coastguard Worker add srcq, 8 1505*c0909341SAndroid Build Coastguard Worker movzx hd, ww 1506*c0909341SAndroid Build Coastguard Worker add dstq, 8 1507*c0909341SAndroid Build Coastguard Worker sub wd, 1<<16 1508*c0909341SAndroid Build Coastguard Worker jg .v_w4_loop0 1509*c0909341SAndroid Build Coastguard Worker RET 1510*c0909341SAndroid Build Coastguard Worker%else 1511*c0909341SAndroid Build Coastguard Worker RET 1512*c0909341SAndroid Build Coastguard Worker.v_w8: 1513*c0909341SAndroid Build Coastguard Worker mova r6m, m8 1514*c0909341SAndroid Build Coastguard Worker shl wd, 5 1515*c0909341SAndroid Build Coastguard Worker pshufd m6, m2, q1111 1516*c0909341SAndroid Build Coastguard Worker lea wd, [wq+hq-(1<<8)] 1517*c0909341SAndroid Build Coastguard Worker pshufd m7, m2, q2222 1518*c0909341SAndroid Build Coastguard Worker WIN64_PUSH_XMM 16 1519*c0909341SAndroid Build Coastguard Worker.v_w8_loop0: 1520*c0909341SAndroid Build Coastguard Worker movu m9, [srcq+ r6*2] 1521*c0909341SAndroid Build Coastguard Worker movu m11, [srcq+ r6*1] 1522*c0909341SAndroid Build Coastguard Worker lea r7, [srcq+ssq*2] 1523*c0909341SAndroid Build Coastguard Worker movu m13, [srcq+ssq*0] 1524*c0909341SAndroid Build Coastguard Worker movu m15, [srcq+ssq*1] 1525*c0909341SAndroid Build Coastguard Worker mov r8, dstq 1526*c0909341SAndroid Build Coastguard Worker movu m4, [r7 +ssq*0] 1527*c0909341SAndroid Build Coastguard Worker punpcklwd m8, m9, m11 ; 01 1528*c0909341SAndroid Build Coastguard Worker punpckhwd m9, m11 1529*c0909341SAndroid Build Coastguard Worker punpcklwd m10, m11, m13 ; 12 1530*c0909341SAndroid Build Coastguard Worker punpckhwd m11, m13 1531*c0909341SAndroid Build Coastguard Worker punpcklwd m12, m13, m15 ; 23 1532*c0909341SAndroid Build Coastguard Worker punpckhwd m13, m15 1533*c0909341SAndroid Build Coastguard Worker punpcklwd m14, m15, m4 ; 34 1534*c0909341SAndroid Build Coastguard Worker punpckhwd m15, m4 1535*c0909341SAndroid Build Coastguard Worker.v_w8_loop: 1536*c0909341SAndroid Build Coastguard Worker mova m3, r6m 1537*c0909341SAndroid Build Coastguard Worker pmaddwd m0, m8, m3 ; a0 1538*c0909341SAndroid Build Coastguard Worker pmaddwd m2, m9, m3 ; a0' 1539*c0909341SAndroid Build Coastguard Worker pmaddwd m1, m10, m3 ; b0 1540*c0909341SAndroid Build Coastguard Worker pmaddwd m3, m11 ; b0' 1541*c0909341SAndroid Build Coastguard Worker mova m8, m12 1542*c0909341SAndroid Build Coastguard Worker pmaddwd m12, m6 ; a1 1543*c0909341SAndroid Build Coastguard Worker mova m9, m13 1544*c0909341SAndroid Build Coastguard Worker pmaddwd m13, m6 ; a1' 1545*c0909341SAndroid Build Coastguard Worker mova m10, m14 1546*c0909341SAndroid Build Coastguard Worker pmaddwd m14, m6 ; b1 1547*c0909341SAndroid Build Coastguard Worker mova m11, m15 1548*c0909341SAndroid Build Coastguard Worker pmaddwd m15, m6 ; b1' 1549*c0909341SAndroid Build Coastguard Worker paddd m0, m12 1550*c0909341SAndroid Build Coastguard Worker paddd m2, m13 1551*c0909341SAndroid Build Coastguard Worker movu m13, [r7+ssq*0] 1552*c0909341SAndroid Build Coastguard Worker paddd m1, m14 1553*c0909341SAndroid Build Coastguard Worker paddd m3, m15 1554*c0909341SAndroid Build Coastguard Worker movu m15, [r7+ssq*1] 1555*c0909341SAndroid Build Coastguard Worker lea r7, [r7+ssq*2] 1556*c0909341SAndroid Build Coastguard Worker movu m4, [r7+ssq*0] 1557*c0909341SAndroid Build Coastguard Worker punpcklwd m12, m13, m15 ; 45 1558*c0909341SAndroid Build Coastguard Worker punpckhwd m13, m15 1559*c0909341SAndroid Build Coastguard Worker punpcklwd m14, m15, m4 ; 56 1560*c0909341SAndroid Build Coastguard Worker punpckhwd m15, m4 1561*c0909341SAndroid Build Coastguard Worker pmaddwd m4, m7, m12 ; a2 1562*c0909341SAndroid Build Coastguard Worker paddd m0, m4 1563*c0909341SAndroid Build Coastguard Worker pmaddwd m4, m7, m13 ; a2' 1564*c0909341SAndroid Build Coastguard Worker paddd m2, m4 1565*c0909341SAndroid Build Coastguard Worker pmaddwd m4, m7, m14 ; b2 1566*c0909341SAndroid Build Coastguard Worker paddd m1, m4 1567*c0909341SAndroid Build Coastguard Worker pmaddwd m4, m7, m15 ; b2' 1568*c0909341SAndroid Build Coastguard Worker paddd m3, m4 1569*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 5}, m0, m2, m1, m3 1570*c0909341SAndroid Build Coastguard Worker packssdw m0, m2 1571*c0909341SAndroid Build Coastguard Worker packssdw m1, m3 1572*c0909341SAndroid Build Coastguard Worker pxor m2, m2 1573*c0909341SAndroid Build Coastguard Worker pmaxsw m0, m2 1574*c0909341SAndroid Build Coastguard Worker pmaxsw m1, m2 1575*c0909341SAndroid Build Coastguard Worker pavgw m0, m2 1576*c0909341SAndroid Build Coastguard Worker pavgw m1, m2 1577*c0909341SAndroid Build Coastguard Worker pminsw m0, m5 1578*c0909341SAndroid Build Coastguard Worker pminsw m1, m5 1579*c0909341SAndroid Build Coastguard Worker mova [r8+dsq*0], m0 1580*c0909341SAndroid Build Coastguard Worker mova [r8+dsq*1], m1 1581*c0909341SAndroid Build Coastguard Worker lea r8, [r8+dsq*2] 1582*c0909341SAndroid Build Coastguard Worker sub hd, 2 1583*c0909341SAndroid Build Coastguard Worker jg .v_w8_loop 1584*c0909341SAndroid Build Coastguard Worker add srcq, 16 1585*c0909341SAndroid Build Coastguard Worker add dstq, 16 1586*c0909341SAndroid Build Coastguard Worker movzx hd, wb 1587*c0909341SAndroid Build Coastguard Worker sub wd, 1<<8 1588*c0909341SAndroid Build Coastguard Worker jg .v_w8_loop0 1589*c0909341SAndroid Build Coastguard Worker RET 1590*c0909341SAndroid Build Coastguard Worker%endif 1591*c0909341SAndroid Build Coastguard Worker.hv: 1592*c0909341SAndroid Build Coastguard Worker cmp wd, 4 1593*c0909341SAndroid Build Coastguard Worker jg .hv_w8 1594*c0909341SAndroid Build Coastguard Worker WIN64_SPILL_XMM 12, 16 1595*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_32 1596*c0909341SAndroid Build Coastguard Worker movd m3, r8m 1597*c0909341SAndroid Build Coastguard Worker pshufb m3, [base+pw_256] 1598*c0909341SAndroid Build Coastguard Worker%else 1599*c0909341SAndroid Build Coastguard Worker movd m11, r8m 1600*c0909341SAndroid Build Coastguard Worker pshufb m11, [base+pw_256] 1601*c0909341SAndroid Build Coastguard Worker%endif 1602*c0909341SAndroid Build Coastguard Worker movzx mxd, mxb 1603*c0909341SAndroid Build Coastguard Worker movq m0, [base+subpel_filters+mxq*8] 1604*c0909341SAndroid Build Coastguard Worker movzx mxd, myb 1605*c0909341SAndroid Build Coastguard Worker shr myd, 16 1606*c0909341SAndroid Build Coastguard Worker cmp hd, 6 1607*c0909341SAndroid Build Coastguard Worker cmovb myd, mxd 1608*c0909341SAndroid Build Coastguard Worker movq m2, [base+subpel_filters+1+myq*8] 1609*c0909341SAndroid Build Coastguard Worker movddup m7, [base+pd_8704] 1610*c0909341SAndroid Build Coastguard Worker sub srcq, 2 1611*c0909341SAndroid Build Coastguard Worker pshuflw m0, m0, q2121 1612*c0909341SAndroid Build Coastguard Worker pxor m6, m6 1613*c0909341SAndroid Build Coastguard Worker punpcklbw m6, m0 1614*c0909341SAndroid Build Coastguard Worker punpcklbw m2, m2 1615*c0909341SAndroid Build Coastguard Worker psraw m2, 8 ; sign-extend 1616*c0909341SAndroid Build Coastguard Worker test dword r8m, 0x800 1617*c0909341SAndroid Build Coastguard Worker jz .hv_w2_10bpc 1618*c0909341SAndroid Build Coastguard Worker movddup m7, [base+pd_2560] 1619*c0909341SAndroid Build Coastguard Worker psraw m6, 2 1620*c0909341SAndroid Build Coastguard Worker psllw m2, 2 1621*c0909341SAndroid Build Coastguard Worker.hv_w2_10bpc: 1622*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_32 1623*c0909341SAndroid Build Coastguard Worker%assign regs_used 2 1624*c0909341SAndroid Build Coastguard Worker ALLOC_STACK -16*7 1625*c0909341SAndroid Build Coastguard Worker%assign regs_used 7 1626*c0909341SAndroid Build Coastguard Worker mov dstq, r0mp 1627*c0909341SAndroid Build Coastguard Worker mov dsq, r1mp 1628*c0909341SAndroid Build Coastguard Worker %define m11 [esp+16*4] 1629*c0909341SAndroid Build Coastguard Worker pshufd m0, m2, q0000 1630*c0909341SAndroid Build Coastguard Worker pshufd m1, m2, q1111 1631*c0909341SAndroid Build Coastguard Worker pshufd m2, m2, q2222 1632*c0909341SAndroid Build Coastguard Worker mova m8, m0 1633*c0909341SAndroid Build Coastguard Worker mova m9, m1 1634*c0909341SAndroid Build Coastguard Worker mova m10, m2 1635*c0909341SAndroid Build Coastguard Worker mova m11, m3 1636*c0909341SAndroid Build Coastguard Worker neg ssq 1637*c0909341SAndroid Build Coastguard Worker movu m3, [srcq+ssq*2] 1638*c0909341SAndroid Build Coastguard Worker movu m4, [srcq+ssq*1] 1639*c0909341SAndroid Build Coastguard Worker neg ssq 1640*c0909341SAndroid Build Coastguard Worker%else 1641*c0909341SAndroid Build Coastguard Worker pshufd m8, m2, q0000 1642*c0909341SAndroid Build Coastguard Worker mov r6, ssq 1643*c0909341SAndroid Build Coastguard Worker pshufd m9, m2, q1111 1644*c0909341SAndroid Build Coastguard Worker neg r6 1645*c0909341SAndroid Build Coastguard Worker pshufd m10, m2, q2222 1646*c0909341SAndroid Build Coastguard Worker movu m3, [srcq+r6 *2] 1647*c0909341SAndroid Build Coastguard Worker movu m4, [srcq+r6 *1] 1648*c0909341SAndroid Build Coastguard Worker%endif 1649*c0909341SAndroid Build Coastguard Worker movu m1, [srcq+ssq*0] 1650*c0909341SAndroid Build Coastguard Worker movu m0, [srcq+ssq*1] 1651*c0909341SAndroid Build Coastguard Worker lea srcq, [srcq+ssq*2] 1652*c0909341SAndroid Build Coastguard Worker movu m2, [srcq+ssq*0] 1653*c0909341SAndroid Build Coastguard Worker cmp wd, 4 1654*c0909341SAndroid Build Coastguard Worker je .hv_w4 1655*c0909341SAndroid Build Coastguard Worker mova m5, [base+spel_h_shuf2] 1656*c0909341SAndroid Build Coastguard Worker REPX {pshufb x, m5}, m3, m4, m0, m1, m2 1657*c0909341SAndroid Build Coastguard Worker REPX {pmaddwd x, m6}, m3, m0, m4, m1, m2 1658*c0909341SAndroid Build Coastguard Worker phaddd m3, m0 ; 0 3 1659*c0909341SAndroid Build Coastguard Worker phaddd m4, m1 ; 1 2 1660*c0909341SAndroid Build Coastguard Worker phaddd m0, m2 ; 3 4 1661*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m7}, m3, m4, m0 1662*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 10}, m3, m4, m0 1663*c0909341SAndroid Build Coastguard Worker packssdw m3, m4 ; 0 3 1 2 1664*c0909341SAndroid Build Coastguard Worker packssdw m4, m0 ; 1 2 3 4 1665*c0909341SAndroid Build Coastguard Worker pshufd m2, m3, q1320 ; 0 1 2 3 1666*c0909341SAndroid Build Coastguard Worker punpcklwd m1, m2, m4 ; 01 12 1667*c0909341SAndroid Build Coastguard Worker punpckhwd m2, m4 ; 23 34 1668*c0909341SAndroid Build Coastguard Worker.hv_w2_loop: 1669*c0909341SAndroid Build Coastguard Worker movu m3, [srcq+ssq*1] 1670*c0909341SAndroid Build Coastguard Worker lea srcq, [srcq+ssq*2] 1671*c0909341SAndroid Build Coastguard Worker movu m4, [srcq+ssq*0] 1672*c0909341SAndroid Build Coastguard Worker pshufb m3, m5 1673*c0909341SAndroid Build Coastguard Worker pshufb m4, m5 1674*c0909341SAndroid Build Coastguard Worker pmaddwd m3, m6 1675*c0909341SAndroid Build Coastguard Worker pmaddwd m4, m6 1676*c0909341SAndroid Build Coastguard Worker phaddd m3, m4 1677*c0909341SAndroid Build Coastguard Worker pmaddwd m4, m8, m1 ; a0 b0 1678*c0909341SAndroid Build Coastguard Worker mova m1, m2 1679*c0909341SAndroid Build Coastguard Worker pmaddwd m2, m9 ; a1 b1 1680*c0909341SAndroid Build Coastguard Worker paddd m4, m2 1681*c0909341SAndroid Build Coastguard Worker paddd m3, m7 1682*c0909341SAndroid Build Coastguard Worker psrad m3, 10 ; 5 6 1683*c0909341SAndroid Build Coastguard Worker packssdw m0, m3 1684*c0909341SAndroid Build Coastguard Worker pshufd m2, m0, q2103 1685*c0909341SAndroid Build Coastguard Worker punpckhwd m2, m0 ; 45 56 1686*c0909341SAndroid Build Coastguard Worker mova m0, m3 1687*c0909341SAndroid Build Coastguard Worker pmaddwd m3, m10, m2 ; a2 b2 1688*c0909341SAndroid Build Coastguard Worker paddd m4, m3 1689*c0909341SAndroid Build Coastguard Worker psrad m4, 10 1690*c0909341SAndroid Build Coastguard Worker packssdw m4, m4 1691*c0909341SAndroid Build Coastguard Worker pxor m3, m3 1692*c0909341SAndroid Build Coastguard Worker pminsw m4, m11 1693*c0909341SAndroid Build Coastguard Worker pmaxsw m4, m3 1694*c0909341SAndroid Build Coastguard Worker movd [dstq+dsq*0], m4 1695*c0909341SAndroid Build Coastguard Worker pshuflw m4, m4, q1032 1696*c0909341SAndroid Build Coastguard Worker movd [dstq+dsq*1], m4 1697*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+dsq*2] 1698*c0909341SAndroid Build Coastguard Worker sub hd, 2 1699*c0909341SAndroid Build Coastguard Worker jg .hv_w2_loop 1700*c0909341SAndroid Build Coastguard Worker RET 1701*c0909341SAndroid Build Coastguard Worker.hv_w4: 1702*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_32 1703*c0909341SAndroid Build Coastguard Worker %define m12 [esp+16*5] 1704*c0909341SAndroid Build Coastguard Worker %define m13 [esp+16*6] 1705*c0909341SAndroid Build Coastguard Worker %define m14 [base+spel_h_shufA] 1706*c0909341SAndroid Build Coastguard Worker %define m15 [base+spel_h_shufB] 1707*c0909341SAndroid Build Coastguard Worker pshufd m5, m6, q0000 1708*c0909341SAndroid Build Coastguard Worker pshufd m6, m6, q1111 1709*c0909341SAndroid Build Coastguard Worker mova m12, m5 1710*c0909341SAndroid Build Coastguard Worker mova m13, m6 1711*c0909341SAndroid Build Coastguard Worker%else 1712*c0909341SAndroid Build Coastguard Worker WIN64_PUSH_XMM 16 1713*c0909341SAndroid Build Coastguard Worker mova m14, [base+spel_h_shufA] 1714*c0909341SAndroid Build Coastguard Worker mova m15, [base+spel_h_shufB] 1715*c0909341SAndroid Build Coastguard Worker pshufd m12, m6, q0000 1716*c0909341SAndroid Build Coastguard Worker pshufd m13, m6, q1111 1717*c0909341SAndroid Build Coastguard Worker%endif 1718*c0909341SAndroid Build Coastguard Worker%macro HV_H_W4_6TAP 3-4 m15 ; dst, src, tmp, shufB 1719*c0909341SAndroid Build Coastguard Worker pshufb %3, %2, m14 1720*c0909341SAndroid Build Coastguard Worker pmaddwd %3, m12 1721*c0909341SAndroid Build Coastguard Worker pshufb %2, %4 1722*c0909341SAndroid Build Coastguard Worker pmaddwd %2, m13 1723*c0909341SAndroid Build Coastguard Worker paddd %3, m7 1724*c0909341SAndroid Build Coastguard Worker paddd %1, %2, %3 1725*c0909341SAndroid Build Coastguard Worker%endmacro 1726*c0909341SAndroid Build Coastguard Worker HV_H_W4_6TAP m3, m3, m5 1727*c0909341SAndroid Build Coastguard Worker HV_H_W4_6TAP m4, m4, m5 1728*c0909341SAndroid Build Coastguard Worker HV_H_W4_6TAP m5, m1, m5 1729*c0909341SAndroid Build Coastguard Worker HV_H_W4_6TAP m0, m0, m1 1730*c0909341SAndroid Build Coastguard Worker HV_H_W4_6TAP m2, m2, m1 1731*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 10}, m3, m5, m4, m0, m2 1732*c0909341SAndroid Build Coastguard Worker packssdw m3, m5 ; 0 2 1733*c0909341SAndroid Build Coastguard Worker packssdw m4, m0 ; 1 3 1734*c0909341SAndroid Build Coastguard Worker packssdw m5, m2 ; 2 4 1735*c0909341SAndroid Build Coastguard Worker punpcklwd m1, m3, m4 ; 01 1736*c0909341SAndroid Build Coastguard Worker punpckhwd m3, m4 ; 23 1737*c0909341SAndroid Build Coastguard Worker punpcklwd m2, m4, m5 ; 12 1738*c0909341SAndroid Build Coastguard Worker punpckhwd m4, m5 ; 34 1739*c0909341SAndroid Build Coastguard Worker.hv_w4_loop: 1740*c0909341SAndroid Build Coastguard Worker movu m0, [srcq+ssq*1] 1741*c0909341SAndroid Build Coastguard Worker pmaddwd m5, m8, m1 ; a0 1742*c0909341SAndroid Build Coastguard Worker lea srcq, [srcq+ssq*2] 1743*c0909341SAndroid Build Coastguard Worker pmaddwd m6, m8, m2 ; b0 1744*c0909341SAndroid Build Coastguard Worker mova m1, m3 1745*c0909341SAndroid Build Coastguard Worker pmaddwd m3, m9 ; a1 1746*c0909341SAndroid Build Coastguard Worker mova m2, m4 1747*c0909341SAndroid Build Coastguard Worker pmaddwd m4, m9 ; b1 1748*c0909341SAndroid Build Coastguard Worker paddd m5, m3 1749*c0909341SAndroid Build Coastguard Worker movu m3, [srcq+ssq*0] 1750*c0909341SAndroid Build Coastguard Worker paddd m6, m4 1751*c0909341SAndroid Build Coastguard Worker HV_H_W4_6TAP m0, m0, m4 1752*c0909341SAndroid Build Coastguard Worker HV_H_W4_6TAP m3, m3, m4 1753*c0909341SAndroid Build Coastguard Worker psrad m4, m2, 16 1754*c0909341SAndroid Build Coastguard Worker psrad m0, 10 1755*c0909341SAndroid Build Coastguard Worker psrad m3, 10 1756*c0909341SAndroid Build Coastguard Worker packssdw m4, m0 ; 4 5 1757*c0909341SAndroid Build Coastguard Worker packssdw m0, m3 ; 5 6 1758*c0909341SAndroid Build Coastguard Worker punpcklwd m3, m4, m0 ; 45 1759*c0909341SAndroid Build Coastguard Worker punpckhwd m4, m0 ; 56 1760*c0909341SAndroid Build Coastguard Worker pmaddwd m0, m10, m3 ; a2 1761*c0909341SAndroid Build Coastguard Worker paddd m5, m0 1762*c0909341SAndroid Build Coastguard Worker pmaddwd m0, m10, m4 ; b2 1763*c0909341SAndroid Build Coastguard Worker paddd m6, m0 1764*c0909341SAndroid Build Coastguard Worker psrad m5, 10 1765*c0909341SAndroid Build Coastguard Worker psrad m6, 10 1766*c0909341SAndroid Build Coastguard Worker packssdw m5, m6 1767*c0909341SAndroid Build Coastguard Worker pxor m6, m6 1768*c0909341SAndroid Build Coastguard Worker pminsw m5, m11 1769*c0909341SAndroid Build Coastguard Worker pmaxsw m5, m6 1770*c0909341SAndroid Build Coastguard Worker movq [dstq+dsq*0], m5 1771*c0909341SAndroid Build Coastguard Worker movhps [dstq+dsq*1], m5 1772*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+dsq*2] 1773*c0909341SAndroid Build Coastguard Worker sub hd, 2 1774*c0909341SAndroid Build Coastguard Worker jg .hv_w4_loop 1775*c0909341SAndroid Build Coastguard Worker RET 1776*c0909341SAndroid Build Coastguard Worker.hv_w8: 1777*c0909341SAndroid Build Coastguard Worker RESET_STACK_STATE 1778*c0909341SAndroid Build Coastguard Worker shr mxd, 16 1779*c0909341SAndroid Build Coastguard Worker movq m2, [base+subpel_filters+1+mxq*8] 1780*c0909341SAndroid Build Coastguard Worker movzx mxd, myb 1781*c0909341SAndroid Build Coastguard Worker shr myd, 16 1782*c0909341SAndroid Build Coastguard Worker cmp hd, 6 1783*c0909341SAndroid Build Coastguard Worker cmovb myd, mxd 1784*c0909341SAndroid Build Coastguard Worker movq m1, [base+subpel_filters+1+myq*8] 1785*c0909341SAndroid Build Coastguard Worker movd m3, r8m 1786*c0909341SAndroid Build Coastguard Worker movddup m4, [base+pd_8704] 1787*c0909341SAndroid Build Coastguard Worker pshufb m3, [base+pw_256] 1788*c0909341SAndroid Build Coastguard Worker pxor m0, m0 1789*c0909341SAndroid Build Coastguard Worker punpcklbw m0, m2 1790*c0909341SAndroid Build Coastguard Worker punpcklbw m1, m1 1791*c0909341SAndroid Build Coastguard Worker sub srcq, 4 1792*c0909341SAndroid Build Coastguard Worker psraw m1, 8 ; sign-extend 1793*c0909341SAndroid Build Coastguard Worker test dword r8m, 0x800 1794*c0909341SAndroid Build Coastguard Worker jz .hv_w8_10bpc 1795*c0909341SAndroid Build Coastguard Worker movddup m4, [base+pd_2560] 1796*c0909341SAndroid Build Coastguard Worker psraw m0, 2 1797*c0909341SAndroid Build Coastguard Worker psllw m1, 2 1798*c0909341SAndroid Build Coastguard Worker.hv_w8_10bpc: 1799*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_32 1800*c0909341SAndroid Build Coastguard Worker%assign regs_used 2 1801*c0909341SAndroid Build Coastguard Worker ALLOC_STACK -16*9 1802*c0909341SAndroid Build Coastguard Worker%assign regs_used 7 1803*c0909341SAndroid Build Coastguard Worker mov dstq, r0mp 1804*c0909341SAndroid Build Coastguard Worker mov dsq, r1mp 1805*c0909341SAndroid Build Coastguard Worker mova [rsp+16*7], m4 1806*c0909341SAndroid Build Coastguard Worker%else 1807*c0909341SAndroid Build Coastguard Worker ALLOC_STACK 16*7, 16 1808*c0909341SAndroid Build Coastguard Worker%endif 1809*c0909341SAndroid Build Coastguard Worker mova [rsp+16*6], m3 1810*c0909341SAndroid Build Coastguard Worker pshufd m2, m0, q0000 1811*c0909341SAndroid Build Coastguard Worker mova [rsp+16*0], m2 1812*c0909341SAndroid Build Coastguard Worker pshufd m2, m0, q1111 1813*c0909341SAndroid Build Coastguard Worker mova [rsp+16*1], m2 1814*c0909341SAndroid Build Coastguard Worker pshufd m0, m0, q2222 1815*c0909341SAndroid Build Coastguard Worker mova [rsp+16*2], m0 1816*c0909341SAndroid Build Coastguard Worker pshufd m2, m1, q0000 1817*c0909341SAndroid Build Coastguard Worker mova [rsp+16*3], m2 1818*c0909341SAndroid Build Coastguard Worker pshufd m2, m1, q1111 1819*c0909341SAndroid Build Coastguard Worker mova [rsp+16*4], m2 1820*c0909341SAndroid Build Coastguard Worker pshufd m1, m1, q2222 1821*c0909341SAndroid Build Coastguard Worker mova [rsp+16*5], m1 1822*c0909341SAndroid Build Coastguard Worker mov r6, ssq 1823*c0909341SAndroid Build Coastguard Worker neg r6 1824*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_32 1825*c0909341SAndroid Build Coastguard Worker shl wd, 14 1826*c0909341SAndroid Build Coastguard Worker lea r4d, [wq+hq-(1<<16)] 1827*c0909341SAndroid Build Coastguard Worker%if STACK_ALIGNMENT < 16 1828*c0909341SAndroid Build Coastguard Worker %define srcmp [esp+16*8+4*0] 1829*c0909341SAndroid Build Coastguard Worker %define dstmp [esp+16*8+4*1] 1830*c0909341SAndroid Build Coastguard Worker%endif 1831*c0909341SAndroid Build Coastguard Worker%macro HV_H_6TAP 3-6 [rsp+16*0], [rsp+16*1], [rsp+16*2] ; dst, src[1-2], mul[1-3] 1832*c0909341SAndroid Build Coastguard Worker punpcklwd %1, %2, %3 ; 01 12 23 34 1833*c0909341SAndroid Build Coastguard Worker punpckhwd %2, %3 ; 45 56 67 78 1834*c0909341SAndroid Build Coastguard Worker pmaddwd %3, %4, %1 ; a0 1835*c0909341SAndroid Build Coastguard Worker shufpd %1, %2, 0x01 ; 23 34 45 56 1836*c0909341SAndroid Build Coastguard Worker pmaddwd %2, %6 ; a2 1837*c0909341SAndroid Build Coastguard Worker pmaddwd %1, %5 ; a1 1838*c0909341SAndroid Build Coastguard Worker paddd %2, %3 1839*c0909341SAndroid Build Coastguard Worker paddd %1, %2 1840*c0909341SAndroid Build Coastguard Worker%endmacro 1841*c0909341SAndroid Build Coastguard Worker.hv_w8_loop0: 1842*c0909341SAndroid Build Coastguard Worker mov srcmp, srcq 1843*c0909341SAndroid Build Coastguard Worker mov dstmp, dstq 1844*c0909341SAndroid Build Coastguard Worker movu m5, [srcq+r6*2+0] 1845*c0909341SAndroid Build Coastguard Worker movu m6, [srcq+r6*2+2] 1846*c0909341SAndroid Build Coastguard Worker mova m7, [rsp+16*0] 1847*c0909341SAndroid Build Coastguard Worker mova m1, [rsp+16*1] 1848*c0909341SAndroid Build Coastguard Worker mova m0, [rsp+16*2] 1849*c0909341SAndroid Build Coastguard Worker HV_H_6TAP m2, m5, m6, m7, m1, m0 1850*c0909341SAndroid Build Coastguard Worker movu m5, [srcq+r6*1+0] 1851*c0909341SAndroid Build Coastguard Worker movu m6, [srcq+r6*1+2] 1852*c0909341SAndroid Build Coastguard Worker HV_H_6TAP m3, m5, m6, m7, m1, m0 1853*c0909341SAndroid Build Coastguard Worker movu m5, [srcq+ssq*0+0] 1854*c0909341SAndroid Build Coastguard Worker movu m6, [srcq+ssq*0+2] 1855*c0909341SAndroid Build Coastguard Worker HV_H_6TAP m4, m5, m6, m7, m1, m0 1856*c0909341SAndroid Build Coastguard Worker movu m5, [srcq+ssq*1+0] 1857*c0909341SAndroid Build Coastguard Worker movu m6, [srcq+ssq*1+2] 1858*c0909341SAndroid Build Coastguard Worker lea srcq, [srcq+ssq*2] 1859*c0909341SAndroid Build Coastguard Worker HV_H_6TAP m0, m5, m6, m7, m1 1860*c0909341SAndroid Build Coastguard Worker movu m5, [srcq+ssq*0+0] 1861*c0909341SAndroid Build Coastguard Worker movu m6, [srcq+ssq*0+2] 1862*c0909341SAndroid Build Coastguard Worker HV_H_6TAP m1, m5, m6, m7 1863*c0909341SAndroid Build Coastguard Worker mova m5, [rsp+16*7] 1864*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m5}, m2, m3, m4, m0, m1 1865*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 10}, m2, m4, m3, m0, m1 1866*c0909341SAndroid Build Coastguard Worker packssdw m2, m4 ; 0 2 1867*c0909341SAndroid Build Coastguard Worker packssdw m3, m0 ; 1 3 1868*c0909341SAndroid Build Coastguard Worker packssdw m4, m1 ; 2 4 1869*c0909341SAndroid Build Coastguard Worker punpcklwd m0, m2, m3 ; 01 1870*c0909341SAndroid Build Coastguard Worker punpckhwd m2, m3 ; 23 1871*c0909341SAndroid Build Coastguard Worker punpcklwd m1, m3, m4 ; 12 1872*c0909341SAndroid Build Coastguard Worker punpckhwd m3, m4 ; 34 1873*c0909341SAndroid Build Coastguard Worker.hv_w8_loop: 1874*c0909341SAndroid Build Coastguard Worker mova m5, [rsp+16*3] 1875*c0909341SAndroid Build Coastguard Worker mova m6, [rsp+16*4] 1876*c0909341SAndroid Build Coastguard Worker pmaddwd m4, m0, m5 ; a0 1877*c0909341SAndroid Build Coastguard Worker pmaddwd m5, m1 ; b0 1878*c0909341SAndroid Build Coastguard Worker mova m0, m2 1879*c0909341SAndroid Build Coastguard Worker pmaddwd m2, m6 ; a1 1880*c0909341SAndroid Build Coastguard Worker mova m1, m3 1881*c0909341SAndroid Build Coastguard Worker pmaddwd m3, m6 ; b1 1882*c0909341SAndroid Build Coastguard Worker paddd m4, m2 1883*c0909341SAndroid Build Coastguard Worker movu m2, [srcq+ssq*1+0] 1884*c0909341SAndroid Build Coastguard Worker paddd m5, m3 1885*c0909341SAndroid Build Coastguard Worker movu m3, [srcq+ssq*1+2] 1886*c0909341SAndroid Build Coastguard Worker lea srcq, [srcq+ssq*2] 1887*c0909341SAndroid Build Coastguard Worker HV_H_6TAP m6, m2, m3 1888*c0909341SAndroid Build Coastguard Worker movu m2, [srcq+ssq*0+0] 1889*c0909341SAndroid Build Coastguard Worker movu m3, [srcq+ssq*0+2] 1890*c0909341SAndroid Build Coastguard Worker HV_H_6TAP m7, m2, m3 1891*c0909341SAndroid Build Coastguard Worker mova m2, [rsp+16*7] 1892*c0909341SAndroid Build Coastguard Worker psrad m3, m1, 16 1893*c0909341SAndroid Build Coastguard Worker paddd m6, m2 1894*c0909341SAndroid Build Coastguard Worker paddd m7, m2 1895*c0909341SAndroid Build Coastguard Worker psrad m6, 10 1896*c0909341SAndroid Build Coastguard Worker psrad m7, 10 1897*c0909341SAndroid Build Coastguard Worker packssdw m3, m6 ; 4 5 1898*c0909341SAndroid Build Coastguard Worker packssdw m6, m7 ; 5 6 1899*c0909341SAndroid Build Coastguard Worker mova m7, [rsp+16*5] 1900*c0909341SAndroid Build Coastguard Worker punpcklwd m2, m3, m6 ; 45 1901*c0909341SAndroid Build Coastguard Worker punpckhwd m3, m6 ; 56 1902*c0909341SAndroid Build Coastguard Worker pmaddwd m6, m2, m7 ; a2 1903*c0909341SAndroid Build Coastguard Worker pmaddwd m7, m3 ; b2 1904*c0909341SAndroid Build Coastguard Worker paddd m4, m6 1905*c0909341SAndroid Build Coastguard Worker paddd m5, m7 1906*c0909341SAndroid Build Coastguard Worker psrad m4, 10 1907*c0909341SAndroid Build Coastguard Worker psrad m5, 10 1908*c0909341SAndroid Build Coastguard Worker packssdw m4, m5 1909*c0909341SAndroid Build Coastguard Worker pxor m5, m5 1910*c0909341SAndroid Build Coastguard Worker pminsw m4, [rsp+16*6] 1911*c0909341SAndroid Build Coastguard Worker pmaxsw m4, m5 1912*c0909341SAndroid Build Coastguard Worker movq [dstq+dsq*0], m4 1913*c0909341SAndroid Build Coastguard Worker movhps [dstq+dsq*1], m4 1914*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+dsq*2] 1915*c0909341SAndroid Build Coastguard Worker sub hd, 2 1916*c0909341SAndroid Build Coastguard Worker jg .hv_w8_loop 1917*c0909341SAndroid Build Coastguard Worker mov srcq, srcmp 1918*c0909341SAndroid Build Coastguard Worker mov dstq, dstmp 1919*c0909341SAndroid Build Coastguard Worker movzx hd, r4w 1920*c0909341SAndroid Build Coastguard Worker add srcq, 8 1921*c0909341SAndroid Build Coastguard Worker add dstq, 8 1922*c0909341SAndroid Build Coastguard Worker sub r4d, 1<<16 1923*c0909341SAndroid Build Coastguard Worker%else 1924*c0909341SAndroid Build Coastguard Worker shl wd, 5 1925*c0909341SAndroid Build Coastguard Worker lea r8d, [wq+hq-256] 1926*c0909341SAndroid Build Coastguard Worker%macro HV_H_6TAP 5-9 [spel_h_shufA], [rsp+16*0], [rsp+16*1], [rsp+16*2] ; dst, src[1-3], shift, shuf, mul[1-3] 1927*c0909341SAndroid Build Coastguard Worker%ifid %6 1928*c0909341SAndroid Build Coastguard Worker REPX {pshufb x, %6}, %2, %3, %4 1929*c0909341SAndroid Build Coastguard Worker%else 1930*c0909341SAndroid Build Coastguard Worker mova %1, %6 1931*c0909341SAndroid Build Coastguard Worker pshufb %2, %1 ; 01 12 23 34 1932*c0909341SAndroid Build Coastguard Worker pshufb %3, %1 ; 45 56 67 78 1933*c0909341SAndroid Build Coastguard Worker pshufb %4, %1 ; 89 9a ab bc 1934*c0909341SAndroid Build Coastguard Worker%endif 1935*c0909341SAndroid Build Coastguard Worker pmaddwd %1, %7, %2 1936*c0909341SAndroid Build Coastguard Worker shufpd %2, %3, 0x01 ; 23 34 45 56 1937*c0909341SAndroid Build Coastguard Worker pmaddwd %2, %8 1938*c0909341SAndroid Build Coastguard Worker paddd %1, %2 1939*c0909341SAndroid Build Coastguard Worker pmaddwd %2, %9, %3 1940*c0909341SAndroid Build Coastguard Worker paddd %1, %2 1941*c0909341SAndroid Build Coastguard Worker pmaddwd %2, %7, %3 1942*c0909341SAndroid Build Coastguard Worker shufpd %3, %4, 0x01 ; 67 78 89 9a 1943*c0909341SAndroid Build Coastguard Worker pmaddwd %4, %9 1944*c0909341SAndroid Build Coastguard Worker pmaddwd %3, %8 1945*c0909341SAndroid Build Coastguard Worker paddd %1, m4 1946*c0909341SAndroid Build Coastguard Worker paddd %2, m4 1947*c0909341SAndroid Build Coastguard Worker paddd %3, %4 1948*c0909341SAndroid Build Coastguard Worker paddd %2, %3 1949*c0909341SAndroid Build Coastguard Worker psrad %1, %5 1950*c0909341SAndroid Build Coastguard Worker psrad %2, %5 1951*c0909341SAndroid Build Coastguard Worker packssdw %1, %2 1952*c0909341SAndroid Build Coastguard Worker%endmacro 1953*c0909341SAndroid Build Coastguard Worker.hv_w8_loop0: 1954*c0909341SAndroid Build Coastguard Worker mova m5, [spel_h_shufA] 1955*c0909341SAndroid Build Coastguard Worker movu m0, [srcq+r6*2+ 0] 1956*c0909341SAndroid Build Coastguard Worker mova m6, [rsp+16*0] 1957*c0909341SAndroid Build Coastguard Worker movu m1, [srcq+r6*2+ 8] 1958*c0909341SAndroid Build Coastguard Worker mova m7, [rsp+16*1] 1959*c0909341SAndroid Build Coastguard Worker movu m2, [srcq+r6*2+16] 1960*c0909341SAndroid Build Coastguard Worker mova m8, [rsp+16*2] 1961*c0909341SAndroid Build Coastguard Worker HV_H_6TAP m9, m0, m1, m2, 10, m5, m6, m7, m8 1962*c0909341SAndroid Build Coastguard Worker movu m0, [srcq+r6*1+ 0] 1963*c0909341SAndroid Build Coastguard Worker movu m1, [srcq+r6*1+ 8] 1964*c0909341SAndroid Build Coastguard Worker movu m2, [srcq+r6*1+16] 1965*c0909341SAndroid Build Coastguard Worker lea r4, [srcq+ssq*2] 1966*c0909341SAndroid Build Coastguard Worker HV_H_6TAP m11, m0, m1, m2, 10, m5, m6, m7, m8 1967*c0909341SAndroid Build Coastguard Worker movu m0, [srcq+ssq*0+ 0] 1968*c0909341SAndroid Build Coastguard Worker movu m1, [srcq+ssq*0+ 8] 1969*c0909341SAndroid Build Coastguard Worker movu m2, [srcq+ssq*0+16] 1970*c0909341SAndroid Build Coastguard Worker mov r7, dstq 1971*c0909341SAndroid Build Coastguard Worker HV_H_6TAP m13, m0, m1, m2, 10, m5, m6, m7, m8 1972*c0909341SAndroid Build Coastguard Worker movu m0, [srcq+ssq*1+ 0] 1973*c0909341SAndroid Build Coastguard Worker movu m1, [srcq+ssq*1+ 8] 1974*c0909341SAndroid Build Coastguard Worker movu m2, [srcq+ssq*1+16] 1975*c0909341SAndroid Build Coastguard Worker HV_H_6TAP m15, m0, m1, m2, 10, m5, m6, m7, m8 1976*c0909341SAndroid Build Coastguard Worker movu m0, [r4+ssq*0+ 0] 1977*c0909341SAndroid Build Coastguard Worker movu m1, [r4+ssq*0+ 8] 1978*c0909341SAndroid Build Coastguard Worker movu m2, [r4+ssq*0+16] 1979*c0909341SAndroid Build Coastguard Worker HV_H_6TAP m5, m0, m1, m2, 10, m5, m6, m7, m8 1980*c0909341SAndroid Build Coastguard Worker punpcklwd m8, m9, m11 ; 01 1981*c0909341SAndroid Build Coastguard Worker punpckhwd m9, m11 1982*c0909341SAndroid Build Coastguard Worker punpcklwd m10, m11, m13 ; 12 1983*c0909341SAndroid Build Coastguard Worker punpckhwd m11, m13 1984*c0909341SAndroid Build Coastguard Worker punpcklwd m12, m13, m15 ; 23 1985*c0909341SAndroid Build Coastguard Worker punpckhwd m13, m15 1986*c0909341SAndroid Build Coastguard Worker punpcklwd m14, m15, m5 ; 34 1987*c0909341SAndroid Build Coastguard Worker punpckhwd m15, m5 1988*c0909341SAndroid Build Coastguard Worker.hv_w8_loop: 1989*c0909341SAndroid Build Coastguard Worker mova m3, [rsp+16*3] 1990*c0909341SAndroid Build Coastguard Worker mova m7, [rsp+16*4] 1991*c0909341SAndroid Build Coastguard Worker pmaddwd m0, m8, m3 ; a0 1992*c0909341SAndroid Build Coastguard Worker mova m8, m12 1993*c0909341SAndroid Build Coastguard Worker pmaddwd m2, m9, m3 ; a0' 1994*c0909341SAndroid Build Coastguard Worker mova m9, m13 1995*c0909341SAndroid Build Coastguard Worker pmaddwd m1, m10, m3 ; b0 1996*c0909341SAndroid Build Coastguard Worker mova m10, m14 1997*c0909341SAndroid Build Coastguard Worker pmaddwd m3, m11 ; b0' 1998*c0909341SAndroid Build Coastguard Worker mova m11, m15 1999*c0909341SAndroid Build Coastguard Worker REPX {pmaddwd x, m7}, m12, m13, m14, m15 2000*c0909341SAndroid Build Coastguard Worker movu m6, [r4+ssq*1+ 0] 2001*c0909341SAndroid Build Coastguard Worker paddd m0, m12 2002*c0909341SAndroid Build Coastguard Worker movu m7, [r4+ssq*1+ 8] 2003*c0909341SAndroid Build Coastguard Worker paddd m2, m13 2004*c0909341SAndroid Build Coastguard Worker movu m12, [r4+ssq*1+16] 2005*c0909341SAndroid Build Coastguard Worker paddd m1, m14 2006*c0909341SAndroid Build Coastguard Worker lea r4, [r4+ssq*2] 2007*c0909341SAndroid Build Coastguard Worker paddd m3, m15 2008*c0909341SAndroid Build Coastguard Worker HV_H_6TAP m15, m6, m7, m12, 10 2009*c0909341SAndroid Build Coastguard Worker movu m6, [r4+ssq*0+ 0] 2010*c0909341SAndroid Build Coastguard Worker movu m7, [r4+ssq*0+ 8] 2011*c0909341SAndroid Build Coastguard Worker movu m14, [r4+ssq*0+16] 2012*c0909341SAndroid Build Coastguard Worker punpcklwd m12, m5, m15 ; 45 2013*c0909341SAndroid Build Coastguard Worker punpckhwd m13, m5, m15 2014*c0909341SAndroid Build Coastguard Worker HV_H_6TAP m5, m6, m7, m14, 10 2015*c0909341SAndroid Build Coastguard Worker mova m7, [rsp+16*5] 2016*c0909341SAndroid Build Coastguard Worker punpcklwd m14, m15, m5 ; 56 2017*c0909341SAndroid Build Coastguard Worker punpckhwd m15, m5 2018*c0909341SAndroid Build Coastguard Worker pmaddwd m6, m12, m7 ; a2 2019*c0909341SAndroid Build Coastguard Worker paddd m0, m6 2020*c0909341SAndroid Build Coastguard Worker pmaddwd m6, m13, m7 ; a2' 2021*c0909341SAndroid Build Coastguard Worker paddd m2, m6 2022*c0909341SAndroid Build Coastguard Worker pmaddwd m6, m14, m7 ; b2 2023*c0909341SAndroid Build Coastguard Worker pmaddwd m7, m15 ; b2' 2024*c0909341SAndroid Build Coastguard Worker paddd m1, m6 2025*c0909341SAndroid Build Coastguard Worker mova m6, [rsp+16*6] 2026*c0909341SAndroid Build Coastguard Worker paddd m3, m7 2027*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 10}, m0, m2, m1, m3 2028*c0909341SAndroid Build Coastguard Worker packssdw m0, m2 2029*c0909341SAndroid Build Coastguard Worker packssdw m1, m3 2030*c0909341SAndroid Build Coastguard Worker pxor m2, m2 2031*c0909341SAndroid Build Coastguard Worker pminsw m0, m6 2032*c0909341SAndroid Build Coastguard Worker pminsw m1, m6 2033*c0909341SAndroid Build Coastguard Worker pmaxsw m0, m2 2034*c0909341SAndroid Build Coastguard Worker pmaxsw m1, m2 2035*c0909341SAndroid Build Coastguard Worker mova [r7+dsq*0], m0 2036*c0909341SAndroid Build Coastguard Worker mova [r7+dsq*1], m1 2037*c0909341SAndroid Build Coastguard Worker lea r7, [r7+dsq*2] 2038*c0909341SAndroid Build Coastguard Worker sub hd, 2 2039*c0909341SAndroid Build Coastguard Worker jg .hv_w8_loop 2040*c0909341SAndroid Build Coastguard Worker add srcq, 16 2041*c0909341SAndroid Build Coastguard Worker add dstq, 16 2042*c0909341SAndroid Build Coastguard Worker movzx hd, r8b 2043*c0909341SAndroid Build Coastguard Worker sub r8d, 1<<8 2044*c0909341SAndroid Build Coastguard Worker%endif 2045*c0909341SAndroid Build Coastguard Worker jg .hv_w8_loop0 2046*c0909341SAndroid Build Coastguard Worker RET 2047*c0909341SAndroid Build Coastguard Worker 2048*c0909341SAndroid Build Coastguard WorkerPUT_8TAP_FN smooth_sharp, SMOOTH, SHARP, put_8tap_16bpc 2049*c0909341SAndroid Build Coastguard WorkerPUT_8TAP_FN sharp_smooth, SHARP, SMOOTH, put_8tap_16bpc 2050*c0909341SAndroid Build Coastguard WorkerPUT_8TAP_FN regular_sharp, REGULAR, SHARP, put_8tap_16bpc 2051*c0909341SAndroid Build Coastguard WorkerPUT_8TAP_FN sharp_regular, SHARP, REGULAR, put_8tap_16bpc 2052*c0909341SAndroid Build Coastguard WorkerPUT_8TAP_FN sharp, SHARP, SHARP 2053*c0909341SAndroid Build Coastguard Worker 2054*c0909341SAndroid Build Coastguard Workercglobal put_8tap_16bpc, 0, 9, 0, dst, ds, src, ss, w, h, mx, my 2055*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_32 2056*c0909341SAndroid Build Coastguard Worker %define mxb r0b 2057*c0909341SAndroid Build Coastguard Worker %define mxd r0 2058*c0909341SAndroid Build Coastguard Worker %define mxq r0 2059*c0909341SAndroid Build Coastguard Worker %define myb r1b 2060*c0909341SAndroid Build Coastguard Worker %define myd r1 2061*c0909341SAndroid Build Coastguard Worker %define myq r1 2062*c0909341SAndroid Build Coastguard Worker %define m8 [esp+16*0] 2063*c0909341SAndroid Build Coastguard Worker %define m9 [esp+16*1] 2064*c0909341SAndroid Build Coastguard Worker %define m10 [esp+16*2] 2065*c0909341SAndroid Build Coastguard Worker %define m11 [esp+16*3] 2066*c0909341SAndroid Build Coastguard Worker %define m12 [esp+16*4] 2067*c0909341SAndroid Build Coastguard Worker %define m13 [esp+16*5] 2068*c0909341SAndroid Build Coastguard Worker %define m14 [esp+16*6] 2069*c0909341SAndroid Build Coastguard Worker %define m15 [esp+16*7] 2070*c0909341SAndroid Build Coastguard Worker%endif 2071*c0909341SAndroid Build Coastguard Worker imul mxd, mxm, 0x010101 2072*c0909341SAndroid Build Coastguard Worker add mxd, t0d ; 8tap_h, mx, 4tap_h 2073*c0909341SAndroid Build Coastguard Worker imul myd, mym, 0x010101 2074*c0909341SAndroid Build Coastguard Worker add myd, t1d ; 8tap_v, my, 4tap_v 2075*c0909341SAndroid Build Coastguard Worker LEA t2, put_ssse3 2076*c0909341SAndroid Build Coastguard Worker movifnidn wd, wm 2077*c0909341SAndroid Build Coastguard Worker movifnidn srcq, srcmp 2078*c0909341SAndroid Build Coastguard Worker movifnidn ssq, ssmp 2079*c0909341SAndroid Build Coastguard Worker movifnidn hd, hm 2080*c0909341SAndroid Build Coastguard Worker test mxd, 0xf00 2081*c0909341SAndroid Build Coastguard Worker jnz .h 2082*c0909341SAndroid Build Coastguard Worker test myd, 0xf00 2083*c0909341SAndroid Build Coastguard Worker jz mangle(private_prefix %+ _put_6tap_16bpc_ssse3).put 2084*c0909341SAndroid Build Coastguard Worker.v: 2085*c0909341SAndroid Build Coastguard Worker movzx mxd, myb 2086*c0909341SAndroid Build Coastguard Worker shr myd, 16 2087*c0909341SAndroid Build Coastguard Worker cmp hd, 6 2088*c0909341SAndroid Build Coastguard Worker cmovb myd, mxd 2089*c0909341SAndroid Build Coastguard Worker movq m3, [base+subpel_filters+myq*8] 2090*c0909341SAndroid Build Coastguard Worker WIN64_SPILL_XMM 15 2091*c0909341SAndroid Build Coastguard Worker movd m7, r8m 2092*c0909341SAndroid Build Coastguard Worker movifnidn dstq, dstmp 2093*c0909341SAndroid Build Coastguard Worker movifnidn dsq, dsmp 2094*c0909341SAndroid Build Coastguard Worker punpcklbw m3, m3 2095*c0909341SAndroid Build Coastguard Worker pshufb m7, [base+pw_256] 2096*c0909341SAndroid Build Coastguard Worker psraw m3, 8 ; sign-extend 2097*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_32 2098*c0909341SAndroid Build Coastguard Worker ALLOC_STACK -16*7 2099*c0909341SAndroid Build Coastguard Worker pshufd m0, m3, q0000 2100*c0909341SAndroid Build Coastguard Worker pshufd m1, m3, q1111 2101*c0909341SAndroid Build Coastguard Worker pshufd m2, m3, q2222 2102*c0909341SAndroid Build Coastguard Worker pshufd m3, m3, q3333 2103*c0909341SAndroid Build Coastguard Worker mova m8, m0 2104*c0909341SAndroid Build Coastguard Worker mova m9, m1 2105*c0909341SAndroid Build Coastguard Worker mova m10, m2 2106*c0909341SAndroid Build Coastguard Worker mova m11, m3 2107*c0909341SAndroid Build Coastguard Worker%else 2108*c0909341SAndroid Build Coastguard Worker pshufd m8, m3, q0000 2109*c0909341SAndroid Build Coastguard Worker pshufd m9, m3, q1111 2110*c0909341SAndroid Build Coastguard Worker pshufd m10, m3, q2222 2111*c0909341SAndroid Build Coastguard Worker pshufd m11, m3, q3333 2112*c0909341SAndroid Build Coastguard Worker%endif 2113*c0909341SAndroid Build Coastguard Worker lea r6, [ssq*3] 2114*c0909341SAndroid Build Coastguard Worker sub srcq, r6 2115*c0909341SAndroid Build Coastguard Worker cmp wd, 2 2116*c0909341SAndroid Build Coastguard Worker jne .v_w4 2117*c0909341SAndroid Build Coastguard Worker.v_w2: 2118*c0909341SAndroid Build Coastguard Worker movd m1, [srcq+ssq*0] 2119*c0909341SAndroid Build Coastguard Worker movd m4, [srcq+ssq*1] 2120*c0909341SAndroid Build Coastguard Worker movd m2, [srcq+ssq*2] 2121*c0909341SAndroid Build Coastguard Worker add srcq, r6 2122*c0909341SAndroid Build Coastguard Worker movd m5, [srcq+ssq*0] 2123*c0909341SAndroid Build Coastguard Worker movd m3, [srcq+ssq*1] 2124*c0909341SAndroid Build Coastguard Worker movd m6, [srcq+ssq*2] 2125*c0909341SAndroid Build Coastguard Worker add srcq, r6 2126*c0909341SAndroid Build Coastguard Worker movd m0, [srcq+ssq*0] 2127*c0909341SAndroid Build Coastguard Worker punpckldq m1, m4 ; 0 1 2128*c0909341SAndroid Build Coastguard Worker punpckldq m4, m2 ; 1 2 2129*c0909341SAndroid Build Coastguard Worker punpckldq m2, m5 ; 2 3 2130*c0909341SAndroid Build Coastguard Worker punpckldq m5, m3 ; 3 4 2131*c0909341SAndroid Build Coastguard Worker punpckldq m3, m6 ; 4 5 2132*c0909341SAndroid Build Coastguard Worker punpckldq m6, m0 ; 5 6 2133*c0909341SAndroid Build Coastguard Worker punpcklwd m1, m4 ; 01 12 2134*c0909341SAndroid Build Coastguard Worker punpcklwd m2, m5 ; 23 34 2135*c0909341SAndroid Build Coastguard Worker punpcklwd m3, m6 ; 45 56 2136*c0909341SAndroid Build Coastguard Worker pxor m6, m6 2137*c0909341SAndroid Build Coastguard Worker.v_w2_loop: 2138*c0909341SAndroid Build Coastguard Worker movd m4, [srcq+ssq*1] 2139*c0909341SAndroid Build Coastguard Worker lea srcq, [srcq+ssq*2] 2140*c0909341SAndroid Build Coastguard Worker pmaddwd m5, m8, m1 ; a0 b0 2141*c0909341SAndroid Build Coastguard Worker mova m1, m2 2142*c0909341SAndroid Build Coastguard Worker pmaddwd m2, m9 ; a1 b1 2143*c0909341SAndroid Build Coastguard Worker paddd m5, m2 2144*c0909341SAndroid Build Coastguard Worker mova m2, m3 2145*c0909341SAndroid Build Coastguard Worker pmaddwd m3, m10 ; a2 b2 2146*c0909341SAndroid Build Coastguard Worker paddd m5, m3 2147*c0909341SAndroid Build Coastguard Worker punpckldq m3, m0, m4 ; 6 7 2148*c0909341SAndroid Build Coastguard Worker movd m0, [srcq+ssq*0] 2149*c0909341SAndroid Build Coastguard Worker punpckldq m4, m0 ; 7 8 2150*c0909341SAndroid Build Coastguard Worker punpcklwd m3, m4 ; 67 78 2151*c0909341SAndroid Build Coastguard Worker pmaddwd m4, m11, m3 ; a3 b3 2152*c0909341SAndroid Build Coastguard Worker paddd m5, m4 2153*c0909341SAndroid Build Coastguard Worker psrad m5, 5 2154*c0909341SAndroid Build Coastguard Worker packssdw m5, m5 2155*c0909341SAndroid Build Coastguard Worker pmaxsw m5, m6 2156*c0909341SAndroid Build Coastguard Worker pavgw m5, m6 2157*c0909341SAndroid Build Coastguard Worker pminsw m5, m7 2158*c0909341SAndroid Build Coastguard Worker movd [dstq+dsq*0], m5 2159*c0909341SAndroid Build Coastguard Worker pshuflw m5, m5, q3232 2160*c0909341SAndroid Build Coastguard Worker movd [dstq+dsq*1], m5 2161*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+dsq*2] 2162*c0909341SAndroid Build Coastguard Worker sub hd, 2 2163*c0909341SAndroid Build Coastguard Worker jg .v_w2_loop 2164*c0909341SAndroid Build Coastguard Worker RET 2165*c0909341SAndroid Build Coastguard Worker.v_w4: 2166*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_32 2167*c0909341SAndroid Build Coastguard Worker shl wd, 14 2168*c0909341SAndroid Build Coastguard Worker%if STACK_ALIGNMENT < 16 2169*c0909341SAndroid Build Coastguard Worker mov [esp+4*29], srcq 2170*c0909341SAndroid Build Coastguard Worker mov [esp+4*30], dstq 2171*c0909341SAndroid Build Coastguard Worker%else 2172*c0909341SAndroid Build Coastguard Worker mov srcmp, srcq 2173*c0909341SAndroid Build Coastguard Worker%endif 2174*c0909341SAndroid Build Coastguard Worker lea wd, [wq+hq-(1<<16)] 2175*c0909341SAndroid Build Coastguard Worker%else 2176*c0909341SAndroid Build Coastguard Worker shl wd, 6 2177*c0909341SAndroid Build Coastguard Worker mov r7, srcq 2178*c0909341SAndroid Build Coastguard Worker mov r8, dstq 2179*c0909341SAndroid Build Coastguard Worker lea wd, [wq+hq-(1<<8)] 2180*c0909341SAndroid Build Coastguard Worker%endif 2181*c0909341SAndroid Build Coastguard Worker.v_w4_loop0: 2182*c0909341SAndroid Build Coastguard Worker movq m1, [srcq+ssq*0] 2183*c0909341SAndroid Build Coastguard Worker movq m2, [srcq+ssq*1] 2184*c0909341SAndroid Build Coastguard Worker movq m3, [srcq+ssq*2] 2185*c0909341SAndroid Build Coastguard Worker add srcq, r6 2186*c0909341SAndroid Build Coastguard Worker movq m4, [srcq+ssq*0] 2187*c0909341SAndroid Build Coastguard Worker movq m5, [srcq+ssq*1] 2188*c0909341SAndroid Build Coastguard Worker movq m6, [srcq+ssq*2] 2189*c0909341SAndroid Build Coastguard Worker add srcq, r6 2190*c0909341SAndroid Build Coastguard Worker movq m0, [srcq+ssq*0] 2191*c0909341SAndroid Build Coastguard Worker punpcklwd m1, m2 ; 01 2192*c0909341SAndroid Build Coastguard Worker punpcklwd m2, m3 ; 12 2193*c0909341SAndroid Build Coastguard Worker punpcklwd m3, m4 ; 23 2194*c0909341SAndroid Build Coastguard Worker punpcklwd m4, m5 ; 34 2195*c0909341SAndroid Build Coastguard Worker punpcklwd m5, m6 ; 45 2196*c0909341SAndroid Build Coastguard Worker punpcklwd m6, m0 ; 56 2197*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_32 2198*c0909341SAndroid Build Coastguard Worker jmp .v_w4_loop_start 2199*c0909341SAndroid Build Coastguard Worker.v_w4_loop: 2200*c0909341SAndroid Build Coastguard Worker mova m1, m12 2201*c0909341SAndroid Build Coastguard Worker mova m2, m13 2202*c0909341SAndroid Build Coastguard Worker mova m3, m14 2203*c0909341SAndroid Build Coastguard Worker.v_w4_loop_start: 2204*c0909341SAndroid Build Coastguard Worker pmaddwd m1, m8 ; a0 2205*c0909341SAndroid Build Coastguard Worker pmaddwd m2, m8 ; b0 2206*c0909341SAndroid Build Coastguard Worker mova m12, m3 2207*c0909341SAndroid Build Coastguard Worker mova m13, m4 2208*c0909341SAndroid Build Coastguard Worker pmaddwd m3, m9 ; a1 2209*c0909341SAndroid Build Coastguard Worker pmaddwd m4, m9 ; b1 2210*c0909341SAndroid Build Coastguard Worker paddd m1, m3 2211*c0909341SAndroid Build Coastguard Worker paddd m2, m4 2212*c0909341SAndroid Build Coastguard Worker mova m14, m5 2213*c0909341SAndroid Build Coastguard Worker mova m4, m6 2214*c0909341SAndroid Build Coastguard Worker pmaddwd m5, m10 ; a2 2215*c0909341SAndroid Build Coastguard Worker pmaddwd m6, m10 ; b2 2216*c0909341SAndroid Build Coastguard Worker paddd m1, m5 2217*c0909341SAndroid Build Coastguard Worker paddd m2, m6 2218*c0909341SAndroid Build Coastguard Worker movq m6, [srcq+ssq*1] 2219*c0909341SAndroid Build Coastguard Worker lea srcq, [srcq+ssq*2] 2220*c0909341SAndroid Build Coastguard Worker punpcklwd m5, m0, m6 ; 67 2221*c0909341SAndroid Build Coastguard Worker movq m0, [srcq+ssq*0] 2222*c0909341SAndroid Build Coastguard Worker pmaddwd m3, m11, m5 ; a3 2223*c0909341SAndroid Build Coastguard Worker punpcklwd m6, m0 ; 78 2224*c0909341SAndroid Build Coastguard Worker paddd m1, m3 2225*c0909341SAndroid Build Coastguard Worker pmaddwd m3, m11, m6 ; b3 2226*c0909341SAndroid Build Coastguard Worker paddd m2, m3 2227*c0909341SAndroid Build Coastguard Worker psrad m1, 5 2228*c0909341SAndroid Build Coastguard Worker psrad m2, 5 2229*c0909341SAndroid Build Coastguard Worker packssdw m1, m2 2230*c0909341SAndroid Build Coastguard Worker pxor m2, m2 2231*c0909341SAndroid Build Coastguard Worker pmaxsw m1, m2 2232*c0909341SAndroid Build Coastguard Worker pavgw m1, m2 2233*c0909341SAndroid Build Coastguard Worker pminsw m1, m7 2234*c0909341SAndroid Build Coastguard Worker movq [dstq+dsq*0], m1 2235*c0909341SAndroid Build Coastguard Worker movhps [dstq+dsq*1], m1 2236*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+dsq*2] 2237*c0909341SAndroid Build Coastguard Worker sub hd, 2 2238*c0909341SAndroid Build Coastguard Worker jg .v_w4_loop 2239*c0909341SAndroid Build Coastguard Worker%if STACK_ALIGNMENT < 16 2240*c0909341SAndroid Build Coastguard Worker mov srcq, [esp+4*29] 2241*c0909341SAndroid Build Coastguard Worker mov dstq, [esp+4*30] 2242*c0909341SAndroid Build Coastguard Worker movzx hd, ww 2243*c0909341SAndroid Build Coastguard Worker add srcq, 8 2244*c0909341SAndroid Build Coastguard Worker add dstq, 8 2245*c0909341SAndroid Build Coastguard Worker mov [esp+4*29], srcq 2246*c0909341SAndroid Build Coastguard Worker mov [esp+4*30], dstq 2247*c0909341SAndroid Build Coastguard Worker%else 2248*c0909341SAndroid Build Coastguard Worker mov srcq, srcmp 2249*c0909341SAndroid Build Coastguard Worker mov dstq, dstmp 2250*c0909341SAndroid Build Coastguard Worker movzx hd, ww 2251*c0909341SAndroid Build Coastguard Worker add srcq, 8 2252*c0909341SAndroid Build Coastguard Worker add dstq, 8 2253*c0909341SAndroid Build Coastguard Worker mov srcmp, srcq 2254*c0909341SAndroid Build Coastguard Worker mov dstmp, dstq 2255*c0909341SAndroid Build Coastguard Worker%endif 2256*c0909341SAndroid Build Coastguard Worker sub wd, 1<<16 2257*c0909341SAndroid Build Coastguard Worker%else 2258*c0909341SAndroid Build Coastguard Worker.v_w4_loop: 2259*c0909341SAndroid Build Coastguard Worker pmaddwd m12, m8, m1 ; a0 2260*c0909341SAndroid Build Coastguard Worker pmaddwd m13, m8, m2 ; b0 2261*c0909341SAndroid Build Coastguard Worker mova m1, m3 2262*c0909341SAndroid Build Coastguard Worker mova m2, m4 2263*c0909341SAndroid Build Coastguard Worker pmaddwd m3, m9 ; a1 2264*c0909341SAndroid Build Coastguard Worker pmaddwd m4, m9 ; b1 2265*c0909341SAndroid Build Coastguard Worker paddd m12, m3 2266*c0909341SAndroid Build Coastguard Worker paddd m13, m4 2267*c0909341SAndroid Build Coastguard Worker mova m3, m5 2268*c0909341SAndroid Build Coastguard Worker mova m4, m6 2269*c0909341SAndroid Build Coastguard Worker pmaddwd m5, m10 ; a2 2270*c0909341SAndroid Build Coastguard Worker pmaddwd m6, m10 ; b2 2271*c0909341SAndroid Build Coastguard Worker paddd m12, m5 2272*c0909341SAndroid Build Coastguard Worker paddd m13, m6 2273*c0909341SAndroid Build Coastguard Worker movq m6, [srcq+ssq*1] 2274*c0909341SAndroid Build Coastguard Worker lea srcq, [srcq+ssq*2] 2275*c0909341SAndroid Build Coastguard Worker punpcklwd m5, m0, m6 ; 67 2276*c0909341SAndroid Build Coastguard Worker movq m0, [srcq+ssq*0] 2277*c0909341SAndroid Build Coastguard Worker pmaddwd m14, m11, m5 ; a3 2278*c0909341SAndroid Build Coastguard Worker punpcklwd m6, m0 ; 78 2279*c0909341SAndroid Build Coastguard Worker paddd m12, m14 2280*c0909341SAndroid Build Coastguard Worker pmaddwd m14, m11, m6 ; b3 2281*c0909341SAndroid Build Coastguard Worker paddd m13, m14 2282*c0909341SAndroid Build Coastguard Worker psrad m12, 5 2283*c0909341SAndroid Build Coastguard Worker psrad m13, 5 2284*c0909341SAndroid Build Coastguard Worker packssdw m12, m13 2285*c0909341SAndroid Build Coastguard Worker pxor m13, m13 2286*c0909341SAndroid Build Coastguard Worker pmaxsw m12, m13 2287*c0909341SAndroid Build Coastguard Worker pavgw m12, m13 2288*c0909341SAndroid Build Coastguard Worker pminsw m12, m7 2289*c0909341SAndroid Build Coastguard Worker movq [dstq+dsq*0], m12 2290*c0909341SAndroid Build Coastguard Worker movhps [dstq+dsq*1], m12 2291*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+dsq*2] 2292*c0909341SAndroid Build Coastguard Worker sub hd, 2 2293*c0909341SAndroid Build Coastguard Worker jg .v_w4_loop 2294*c0909341SAndroid Build Coastguard Worker add r7, 8 2295*c0909341SAndroid Build Coastguard Worker add r8, 8 2296*c0909341SAndroid Build Coastguard Worker movzx hd, wb 2297*c0909341SAndroid Build Coastguard Worker mov srcq, r7 2298*c0909341SAndroid Build Coastguard Worker mov dstq, r8 2299*c0909341SAndroid Build Coastguard Worker sub wd, 1<<8 2300*c0909341SAndroid Build Coastguard Worker%endif 2301*c0909341SAndroid Build Coastguard Worker jg .v_w4_loop0 2302*c0909341SAndroid Build Coastguard Worker RET 2303*c0909341SAndroid Build Coastguard Worker.h: 2304*c0909341SAndroid Build Coastguard Worker RESET_STACK_STATE 2305*c0909341SAndroid Build Coastguard Worker test myd, 0xf00 2306*c0909341SAndroid Build Coastguard Worker jnz .hv 2307*c0909341SAndroid Build Coastguard Worker mov myd, r8m 2308*c0909341SAndroid Build Coastguard Worker movd m5, r8m 2309*c0909341SAndroid Build Coastguard Worker shr myd, 11 2310*c0909341SAndroid Build Coastguard Worker movddup m4, [base+put_8tap_h_rnd+myq*8] 2311*c0909341SAndroid Build Coastguard Worker movifnidn dsq, dsmp 2312*c0909341SAndroid Build Coastguard Worker pshufb m5, [base+pw_256] 2313*c0909341SAndroid Build Coastguard Worker cmp wd, 4 2314*c0909341SAndroid Build Coastguard Worker jle mangle(private_prefix %+ _put_6tap_16bpc_ssse3).h_w4 2315*c0909341SAndroid Build Coastguard Worker WIN64_SPILL_XMM 12 2316*c0909341SAndroid Build Coastguard Worker shr mxd, 16 2317*c0909341SAndroid Build Coastguard Worker movq m3, [base+subpel_filters+mxq*8] 2318*c0909341SAndroid Build Coastguard Worker movifnidn dstq, dstmp 2319*c0909341SAndroid Build Coastguard Worker mova m6, [base+spel_h_shufA] 2320*c0909341SAndroid Build Coastguard Worker mova m7, [base+spel_h_shufB] 2321*c0909341SAndroid Build Coastguard Worker%if UNIX64 2322*c0909341SAndroid Build Coastguard Worker mov wd, wd 2323*c0909341SAndroid Build Coastguard Worker%endif 2324*c0909341SAndroid Build Coastguard Worker lea srcq, [srcq+wq*2] 2325*c0909341SAndroid Build Coastguard Worker punpcklbw m3, m3 2326*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+wq*2] 2327*c0909341SAndroid Build Coastguard Worker psraw m3, 8 2328*c0909341SAndroid Build Coastguard Worker neg wq 2329*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_32 2330*c0909341SAndroid Build Coastguard Worker ALLOC_STACK -16*4 2331*c0909341SAndroid Build Coastguard Worker pshufd m0, m3, q0000 2332*c0909341SAndroid Build Coastguard Worker pshufd m1, m3, q1111 2333*c0909341SAndroid Build Coastguard Worker pshufd m2, m3, q2222 2334*c0909341SAndroid Build Coastguard Worker pshufd m3, m3, q3333 2335*c0909341SAndroid Build Coastguard Worker mova m8, m0 2336*c0909341SAndroid Build Coastguard Worker mova m9, m1 2337*c0909341SAndroid Build Coastguard Worker mova m10, m2 2338*c0909341SAndroid Build Coastguard Worker mova m11, m3 2339*c0909341SAndroid Build Coastguard Worker%else 2340*c0909341SAndroid Build Coastguard Worker pshufd m8, m3, q0000 2341*c0909341SAndroid Build Coastguard Worker pshufd m9, m3, q1111 2342*c0909341SAndroid Build Coastguard Worker pshufd m10, m3, q2222 2343*c0909341SAndroid Build Coastguard Worker pshufd m11, m3, q3333 2344*c0909341SAndroid Build Coastguard Worker%endif 2345*c0909341SAndroid Build Coastguard Worker.h_w8_loop0: 2346*c0909341SAndroid Build Coastguard Worker mov r6, wq 2347*c0909341SAndroid Build Coastguard Worker.h_w8_loop: 2348*c0909341SAndroid Build Coastguard Worker movu m0, [srcq+r6*2- 6] 2349*c0909341SAndroid Build Coastguard Worker movu m1, [srcq+r6*2+ 2] 2350*c0909341SAndroid Build Coastguard Worker pshufb m2, m0, m6 ; 0 1 1 2 2 3 3 4 2351*c0909341SAndroid Build Coastguard Worker pshufb m0, m7 ; 2 3 3 4 4 5 5 6 2352*c0909341SAndroid Build Coastguard Worker pmaddwd m2, m8 ; abcd0 2353*c0909341SAndroid Build Coastguard Worker pmaddwd m0, m9 ; abcd1 2354*c0909341SAndroid Build Coastguard Worker pshufb m3, m1, m6 ; 4 5 5 6 6 7 7 8 2355*c0909341SAndroid Build Coastguard Worker pshufb m1, m7 ; 6 7 7 8 8 9 9 a 2356*c0909341SAndroid Build Coastguard Worker paddd m2, m4 2357*c0909341SAndroid Build Coastguard Worker paddd m0, m2 2358*c0909341SAndroid Build Coastguard Worker pmaddwd m2, m10, m3 ; abcd2 2359*c0909341SAndroid Build Coastguard Worker pmaddwd m3, m8 ; efgh0 2360*c0909341SAndroid Build Coastguard Worker paddd m0, m2 2361*c0909341SAndroid Build Coastguard Worker pmaddwd m2, m11, m1 ; abcd3 2362*c0909341SAndroid Build Coastguard Worker pmaddwd m1, m9 ; efgh1 2363*c0909341SAndroid Build Coastguard Worker paddd m0, m2 2364*c0909341SAndroid Build Coastguard Worker movu m2, [srcq+r6*2+10] 2365*c0909341SAndroid Build Coastguard Worker paddd m3, m4 2366*c0909341SAndroid Build Coastguard Worker paddd m1, m3 2367*c0909341SAndroid Build Coastguard Worker pshufb m3, m2, m6 ; 8 9 9 a a b b c 2368*c0909341SAndroid Build Coastguard Worker pshufb m2, m7 ; a b b c c d d e 2369*c0909341SAndroid Build Coastguard Worker pmaddwd m3, m10 ; efgh2 2370*c0909341SAndroid Build Coastguard Worker pmaddwd m2, m11 ; efgh3 2371*c0909341SAndroid Build Coastguard Worker paddd m1, m3 2372*c0909341SAndroid Build Coastguard Worker paddd m1, m2 2373*c0909341SAndroid Build Coastguard Worker psrad m0, 6 2374*c0909341SAndroid Build Coastguard Worker psrad m1, 6 2375*c0909341SAndroid Build Coastguard Worker packssdw m0, m1 2376*c0909341SAndroid Build Coastguard Worker pxor m1, m1 2377*c0909341SAndroid Build Coastguard Worker pminsw m0, m5 2378*c0909341SAndroid Build Coastguard Worker pmaxsw m0, m1 2379*c0909341SAndroid Build Coastguard Worker mova [dstq+r6*2], m0 2380*c0909341SAndroid Build Coastguard Worker add r6, 8 2381*c0909341SAndroid Build Coastguard Worker jl .h_w8_loop 2382*c0909341SAndroid Build Coastguard Worker add srcq, ssq 2383*c0909341SAndroid Build Coastguard Worker add dstq, dsq 2384*c0909341SAndroid Build Coastguard Worker dec hd 2385*c0909341SAndroid Build Coastguard Worker jg .h_w8_loop0 2386*c0909341SAndroid Build Coastguard Worker RET 2387*c0909341SAndroid Build Coastguard Worker.hv: 2388*c0909341SAndroid Build Coastguard Worker RESET_STACK_STATE 2389*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_32 2390*c0909341SAndroid Build Coastguard Worker movd m4, r8m 2391*c0909341SAndroid Build Coastguard Worker pshufb m4, [base+pw_256] 2392*c0909341SAndroid Build Coastguard Worker%else 2393*c0909341SAndroid Build Coastguard Worker%if WIN64 2394*c0909341SAndroid Build Coastguard Worker ALLOC_STACK 16*6, 16 2395*c0909341SAndroid Build Coastguard Worker%endif 2396*c0909341SAndroid Build Coastguard Worker movd m15, r8m 2397*c0909341SAndroid Build Coastguard Worker pshufb m15, [base+pw_256] 2398*c0909341SAndroid Build Coastguard Worker%endif 2399*c0909341SAndroid Build Coastguard Worker cmp wd, 4 2400*c0909341SAndroid Build Coastguard Worker jg .hv_w8 2401*c0909341SAndroid Build Coastguard Worker movzx mxd, mxb 2402*c0909341SAndroid Build Coastguard Worker je .hv_w4 2403*c0909341SAndroid Build Coastguard Worker movq m0, [base+subpel_filters+mxq*8] 2404*c0909341SAndroid Build Coastguard Worker movzx mxd, myb 2405*c0909341SAndroid Build Coastguard Worker shr myd, 16 2406*c0909341SAndroid Build Coastguard Worker cmp hd, 6 2407*c0909341SAndroid Build Coastguard Worker cmovb myd, mxd 2408*c0909341SAndroid Build Coastguard Worker movq m3, [base+subpel_filters+myq*8] 2409*c0909341SAndroid Build Coastguard Worker movddup m6, [base+pd_8704] 2410*c0909341SAndroid Build Coastguard Worker pshuflw m0, m0, q2121 2411*c0909341SAndroid Build Coastguard Worker pxor m7, m7 2412*c0909341SAndroid Build Coastguard Worker punpcklbw m7, m0 2413*c0909341SAndroid Build Coastguard Worker punpcklbw m3, m3 2414*c0909341SAndroid Build Coastguard Worker psraw m3, 8 ; sign-extend 2415*c0909341SAndroid Build Coastguard Worker test dword r8m, 0x800 2416*c0909341SAndroid Build Coastguard Worker jz .hv_w2_10bpc 2417*c0909341SAndroid Build Coastguard Worker movddup m6, [base+pd_2560] 2418*c0909341SAndroid Build Coastguard Worker psraw m7, 2 2419*c0909341SAndroid Build Coastguard Worker psllw m3, 2 2420*c0909341SAndroid Build Coastguard Worker.hv_w2_10bpc: 2421*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_32 2422*c0909341SAndroid Build Coastguard Worker mov dstq, dstmp 2423*c0909341SAndroid Build Coastguard Worker mov dsq, dsmp 2424*c0909341SAndroid Build Coastguard Worker mova m5, [base+spel_h_shuf2] 2425*c0909341SAndroid Build Coastguard Worker ALLOC_STACK -16*8 2426*c0909341SAndroid Build Coastguard Worker pshufd m0, m3, q0000 2427*c0909341SAndroid Build Coastguard Worker pshufd m1, m3, q1111 2428*c0909341SAndroid Build Coastguard Worker pshufd m2, m3, q2222 2429*c0909341SAndroid Build Coastguard Worker pshufd m3, m3, q3333 2430*c0909341SAndroid Build Coastguard Worker mova m9, m5 2431*c0909341SAndroid Build Coastguard Worker mova m11, m0 2432*c0909341SAndroid Build Coastguard Worker mova m12, m1 2433*c0909341SAndroid Build Coastguard Worker mova m13, m2 2434*c0909341SAndroid Build Coastguard Worker mova m14, m3 2435*c0909341SAndroid Build Coastguard Worker mova m15, m4 2436*c0909341SAndroid Build Coastguard Worker%else 2437*c0909341SAndroid Build Coastguard Worker mova m9, [base+spel_h_shuf2] 2438*c0909341SAndroid Build Coastguard Worker pshufd m11, m3, q0000 2439*c0909341SAndroid Build Coastguard Worker pshufd m12, m3, q1111 2440*c0909341SAndroid Build Coastguard Worker pshufd m13, m3, q2222 2441*c0909341SAndroid Build Coastguard Worker pshufd m14, m3, q3333 2442*c0909341SAndroid Build Coastguard Worker%endif 2443*c0909341SAndroid Build Coastguard Worker lea r6, [ssq*3] 2444*c0909341SAndroid Build Coastguard Worker sub srcq, 2 2445*c0909341SAndroid Build Coastguard Worker sub srcq, r6 2446*c0909341SAndroid Build Coastguard Worker movu m2, [srcq+ssq*0] 2447*c0909341SAndroid Build Coastguard Worker movu m3, [srcq+ssq*1] 2448*c0909341SAndroid Build Coastguard Worker movu m1, [srcq+ssq*2] 2449*c0909341SAndroid Build Coastguard Worker add srcq, r6 2450*c0909341SAndroid Build Coastguard Worker movu m4, [srcq+ssq*0] 2451*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_32 2452*c0909341SAndroid Build Coastguard Worker REPX {pshufb x, m5}, m2, m3, m1, m4 2453*c0909341SAndroid Build Coastguard Worker%else 2454*c0909341SAndroid Build Coastguard Worker REPX {pshufb x, m9}, m2, m3, m1, m4 2455*c0909341SAndroid Build Coastguard Worker%endif 2456*c0909341SAndroid Build Coastguard Worker REPX {pmaddwd x, m7}, m2, m3, m1, m4 2457*c0909341SAndroid Build Coastguard Worker phaddd m2, m3 ; 0 1 2458*c0909341SAndroid Build Coastguard Worker phaddd m1, m4 ; 2 3 2459*c0909341SAndroid Build Coastguard Worker movu m3, [srcq+ssq*1] 2460*c0909341SAndroid Build Coastguard Worker movu m4, [srcq+ssq*2] 2461*c0909341SAndroid Build Coastguard Worker add srcq, r6 2462*c0909341SAndroid Build Coastguard Worker movu m0, [srcq+ssq*0] 2463*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_32 2464*c0909341SAndroid Build Coastguard Worker REPX {pshufb x, m5}, m3, m4, m0 2465*c0909341SAndroid Build Coastguard Worker%else 2466*c0909341SAndroid Build Coastguard Worker REPX {pshufb x, m9}, m3, m4, m0 2467*c0909341SAndroid Build Coastguard Worker%endif 2468*c0909341SAndroid Build Coastguard Worker REPX {pmaddwd x, m7}, m3, m4, m0 2469*c0909341SAndroid Build Coastguard Worker phaddd m3, m4 ; 4 5 2470*c0909341SAndroid Build Coastguard Worker phaddd m0, m0 ; 6 6 2471*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m6}, m2, m1, m3, m0 2472*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 10}, m2, m1, m3, m0 2473*c0909341SAndroid Build Coastguard Worker packssdw m2, m1 ; 0 1 2 3 2474*c0909341SAndroid Build Coastguard Worker packssdw m3, m0 ; 4 5 6 _ 2475*c0909341SAndroid Build Coastguard Worker palignr m4, m3, m2, 4 ; 1 2 3 4 2476*c0909341SAndroid Build Coastguard Worker pshufd m5, m3, q0321 ; 5 6 _ _ 2477*c0909341SAndroid Build Coastguard Worker punpcklwd m1, m2, m4 ; 01 12 2478*c0909341SAndroid Build Coastguard Worker punpckhwd m2, m4 ; 23 34 2479*c0909341SAndroid Build Coastguard Worker punpcklwd m3, m5 ; 45 56 2480*c0909341SAndroid Build Coastguard Worker.hv_w2_loop: 2481*c0909341SAndroid Build Coastguard Worker movu m4, [srcq+ssq*1] 2482*c0909341SAndroid Build Coastguard Worker lea srcq, [srcq+ssq*2] 2483*c0909341SAndroid Build Coastguard Worker movu m5, [srcq+ssq*0] 2484*c0909341SAndroid Build Coastguard Worker pshufb m4, m9 2485*c0909341SAndroid Build Coastguard Worker pshufb m5, m9 2486*c0909341SAndroid Build Coastguard Worker pmaddwd m4, m7 2487*c0909341SAndroid Build Coastguard Worker pmaddwd m5, m7 2488*c0909341SAndroid Build Coastguard Worker phaddd m4, m5 2489*c0909341SAndroid Build Coastguard Worker pmaddwd m5, m11, m1 ; a0 b0 2490*c0909341SAndroid Build Coastguard Worker mova m1, m2 2491*c0909341SAndroid Build Coastguard Worker pmaddwd m2, m12 ; a1 b1 2492*c0909341SAndroid Build Coastguard Worker paddd m5, m2 2493*c0909341SAndroid Build Coastguard Worker mova m2, m3 2494*c0909341SAndroid Build Coastguard Worker pmaddwd m3, m13 ; a2 b2 2495*c0909341SAndroid Build Coastguard Worker paddd m5, m3 2496*c0909341SAndroid Build Coastguard Worker paddd m4, m6 2497*c0909341SAndroid Build Coastguard Worker psrad m4, 10 ; 7 8 2498*c0909341SAndroid Build Coastguard Worker packssdw m0, m4 2499*c0909341SAndroid Build Coastguard Worker pshufd m3, m0, q2103 2500*c0909341SAndroid Build Coastguard Worker punpckhwd m3, m0 ; 67 78 2501*c0909341SAndroid Build Coastguard Worker mova m0, m4 2502*c0909341SAndroid Build Coastguard Worker pmaddwd m4, m14, m3 ; a3 b3 2503*c0909341SAndroid Build Coastguard Worker paddd m5, m4 2504*c0909341SAndroid Build Coastguard Worker psrad m5, 10 2505*c0909341SAndroid Build Coastguard Worker packssdw m5, m5 2506*c0909341SAndroid Build Coastguard Worker pxor m4, m4 2507*c0909341SAndroid Build Coastguard Worker pminsw m5, m15 2508*c0909341SAndroid Build Coastguard Worker pmaxsw m5, m4 2509*c0909341SAndroid Build Coastguard Worker movd [dstq+dsq*0], m5 2510*c0909341SAndroid Build Coastguard Worker pshuflw m5, m5, q3232 2511*c0909341SAndroid Build Coastguard Worker movd [dstq+dsq*1], m5 2512*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+dsq*2] 2513*c0909341SAndroid Build Coastguard Worker sub hd, 2 2514*c0909341SAndroid Build Coastguard Worker jg .hv_w2_loop 2515*c0909341SAndroid Build Coastguard Worker RET 2516*c0909341SAndroid Build Coastguard Worker.hv_w8: 2517*c0909341SAndroid Build Coastguard Worker shr mxd, 16 2518*c0909341SAndroid Build Coastguard Worker.hv_w4: 2519*c0909341SAndroid Build Coastguard Worker movq m2, [base+subpel_filters+mxq*8] 2520*c0909341SAndroid Build Coastguard Worker movzx mxd, myb 2521*c0909341SAndroid Build Coastguard Worker shr myd, 16 2522*c0909341SAndroid Build Coastguard Worker cmp hd, 6 2523*c0909341SAndroid Build Coastguard Worker cmovb myd, mxd 2524*c0909341SAndroid Build Coastguard Worker movq m3, [base+subpel_filters+myq*8] 2525*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_32 2526*c0909341SAndroid Build Coastguard Worker RESET_STACK_STATE 2527*c0909341SAndroid Build Coastguard Worker mov dstq, dstmp 2528*c0909341SAndroid Build Coastguard Worker mov dsq, dsmp 2529*c0909341SAndroid Build Coastguard Worker mova m0, [base+spel_h_shufA] 2530*c0909341SAndroid Build Coastguard Worker mova m1, [base+spel_h_shufB] 2531*c0909341SAndroid Build Coastguard Worker mova m6, [base+pd_512] 2532*c0909341SAndroid Build Coastguard Worker ALLOC_STACK -16*15 2533*c0909341SAndroid Build Coastguard Worker mova m8, m0 2534*c0909341SAndroid Build Coastguard Worker mova m9, m1 2535*c0909341SAndroid Build Coastguard Worker mova m14, m6 2536*c0909341SAndroid Build Coastguard Worker%else 2537*c0909341SAndroid Build Coastguard Worker mova m8, [base+spel_h_shufA] 2538*c0909341SAndroid Build Coastguard Worker mova m9, [base+spel_h_shufB] 2539*c0909341SAndroid Build Coastguard Worker%endif 2540*c0909341SAndroid Build Coastguard Worker pxor m0, m0 2541*c0909341SAndroid Build Coastguard Worker punpcklbw m0, m2 2542*c0909341SAndroid Build Coastguard Worker punpcklbw m3, m3 2543*c0909341SAndroid Build Coastguard Worker psraw m3, 8 2544*c0909341SAndroid Build Coastguard Worker test dword r8m, 0x800 2545*c0909341SAndroid Build Coastguard Worker jz .hv_w4_10bpc 2546*c0909341SAndroid Build Coastguard Worker psraw m0, 2 2547*c0909341SAndroid Build Coastguard Worker psllw m3, 2 2548*c0909341SAndroid Build Coastguard Worker.hv_w4_10bpc: 2549*c0909341SAndroid Build Coastguard Worker lea r6, [ssq*3] 2550*c0909341SAndroid Build Coastguard Worker sub srcq, 6 2551*c0909341SAndroid Build Coastguard Worker sub srcq, r6 2552*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_32 2553*c0909341SAndroid Build Coastguard Worker %define tmp esp+16*8 2554*c0909341SAndroid Build Coastguard Worker shl wd, 14 2555*c0909341SAndroid Build Coastguard Worker%if STACK_ALIGNMENT < 16 2556*c0909341SAndroid Build Coastguard Worker mov [esp+4*61], srcq 2557*c0909341SAndroid Build Coastguard Worker mov [esp+4*62], dstq 2558*c0909341SAndroid Build Coastguard Worker%else 2559*c0909341SAndroid Build Coastguard Worker mov srcmp, srcq 2560*c0909341SAndroid Build Coastguard Worker%endif 2561*c0909341SAndroid Build Coastguard Worker mova [tmp+16*5], m4 2562*c0909341SAndroid Build Coastguard Worker lea wd, [wq+hq-(1<<16)] 2563*c0909341SAndroid Build Coastguard Worker pshufd m1, m0, q0000 2564*c0909341SAndroid Build Coastguard Worker pshufd m2, m0, q1111 2565*c0909341SAndroid Build Coastguard Worker pshufd m5, m0, q2222 2566*c0909341SAndroid Build Coastguard Worker pshufd m0, m0, q3333 2567*c0909341SAndroid Build Coastguard Worker mova m10, m1 2568*c0909341SAndroid Build Coastguard Worker mova m11, m2 2569*c0909341SAndroid Build Coastguard Worker mova m12, m5 2570*c0909341SAndroid Build Coastguard Worker mova m13, m0 2571*c0909341SAndroid Build Coastguard Worker%else 2572*c0909341SAndroid Build Coastguard Worker%if WIN64 2573*c0909341SAndroid Build Coastguard Worker %define tmp rsp 2574*c0909341SAndroid Build Coastguard Worker%else 2575*c0909341SAndroid Build Coastguard Worker %define tmp rsp-104 ; red zone 2576*c0909341SAndroid Build Coastguard Worker%endif 2577*c0909341SAndroid Build Coastguard Worker shl wd, 6 2578*c0909341SAndroid Build Coastguard Worker mov r7, srcq 2579*c0909341SAndroid Build Coastguard Worker mov r8, dstq 2580*c0909341SAndroid Build Coastguard Worker lea wd, [wq+hq-(1<<8)] 2581*c0909341SAndroid Build Coastguard Worker pshufd m10, m0, q0000 2582*c0909341SAndroid Build Coastguard Worker pshufd m11, m0, q1111 2583*c0909341SAndroid Build Coastguard Worker pshufd m12, m0, q2222 2584*c0909341SAndroid Build Coastguard Worker pshufd m13, m0, q3333 2585*c0909341SAndroid Build Coastguard Worker mova [tmp+16*5], m15 2586*c0909341SAndroid Build Coastguard Worker%endif 2587*c0909341SAndroid Build Coastguard Worker pshufd m0, m3, q0000 2588*c0909341SAndroid Build Coastguard Worker pshufd m1, m3, q1111 2589*c0909341SAndroid Build Coastguard Worker pshufd m2, m3, q2222 2590*c0909341SAndroid Build Coastguard Worker pshufd m3, m3, q3333 2591*c0909341SAndroid Build Coastguard Worker mova [tmp+16*1], m0 2592*c0909341SAndroid Build Coastguard Worker mova [tmp+16*2], m1 2593*c0909341SAndroid Build Coastguard Worker mova [tmp+16*3], m2 2594*c0909341SAndroid Build Coastguard Worker mova [tmp+16*4], m3 2595*c0909341SAndroid Build Coastguard Worker%macro PUT_8TAP_HV_H 4-5 m14 ; dst/src+0, src+8, tmp, shift, [pd_512] 2596*c0909341SAndroid Build Coastguard Worker pshufb m%3, m%1, m8 ; 0 1 1 2 2 3 3 4 2597*c0909341SAndroid Build Coastguard Worker pshufb m%1, m9 ; 2 3 3 4 4 5 5 6 2598*c0909341SAndroid Build Coastguard Worker pmaddwd m%3, m10 2599*c0909341SAndroid Build Coastguard Worker pmaddwd m%1, m11 2600*c0909341SAndroid Build Coastguard Worker paddd m%3, %5 2601*c0909341SAndroid Build Coastguard Worker paddd m%1, m%3 2602*c0909341SAndroid Build Coastguard Worker pshufb m%3, m%2, m8 ; 4 5 5 6 6 7 7 8 2603*c0909341SAndroid Build Coastguard Worker pshufb m%2, m9 ; 6 7 7 8 8 9 9 a 2604*c0909341SAndroid Build Coastguard Worker pmaddwd m%3, m12 2605*c0909341SAndroid Build Coastguard Worker pmaddwd m%2, m13 2606*c0909341SAndroid Build Coastguard Worker paddd m%1, m%3 2607*c0909341SAndroid Build Coastguard Worker paddd m%1, m%2 2608*c0909341SAndroid Build Coastguard Worker psrad m%1, %4 2609*c0909341SAndroid Build Coastguard Worker%endmacro 2610*c0909341SAndroid Build Coastguard Worker.hv_w4_loop0: 2611*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 2612*c0909341SAndroid Build Coastguard Worker mova m14, [pd_512] 2613*c0909341SAndroid Build Coastguard Worker%endif 2614*c0909341SAndroid Build Coastguard Worker movu m4, [srcq+ssq*0+0] 2615*c0909341SAndroid Build Coastguard Worker movu m1, [srcq+ssq*0+8] 2616*c0909341SAndroid Build Coastguard Worker movu m5, [srcq+ssq*1+0] 2617*c0909341SAndroid Build Coastguard Worker movu m2, [srcq+ssq*1+8] 2618*c0909341SAndroid Build Coastguard Worker movu m6, [srcq+ssq*2+0] 2619*c0909341SAndroid Build Coastguard Worker movu m3, [srcq+ssq*2+8] 2620*c0909341SAndroid Build Coastguard Worker add srcq, r6 2621*c0909341SAndroid Build Coastguard Worker PUT_8TAP_HV_H 4, 1, 0, 10 2622*c0909341SAndroid Build Coastguard Worker PUT_8TAP_HV_H 5, 2, 0, 10 2623*c0909341SAndroid Build Coastguard Worker PUT_8TAP_HV_H 6, 3, 0, 10 2624*c0909341SAndroid Build Coastguard Worker movu m7, [srcq+ssq*0+0] 2625*c0909341SAndroid Build Coastguard Worker movu m2, [srcq+ssq*0+8] 2626*c0909341SAndroid Build Coastguard Worker movu m1, [srcq+ssq*1+0] 2627*c0909341SAndroid Build Coastguard Worker movu m3, [srcq+ssq*1+8] 2628*c0909341SAndroid Build Coastguard Worker PUT_8TAP_HV_H 7, 2, 0, 10 2629*c0909341SAndroid Build Coastguard Worker PUT_8TAP_HV_H 1, 3, 0, 10 2630*c0909341SAndroid Build Coastguard Worker movu m2, [srcq+ssq*2+0] 2631*c0909341SAndroid Build Coastguard Worker movu m3, [srcq+ssq*2+8] 2632*c0909341SAndroid Build Coastguard Worker add srcq, r6 2633*c0909341SAndroid Build Coastguard Worker PUT_8TAP_HV_H 2, 3, 0, 10 2634*c0909341SAndroid Build Coastguard Worker packssdw m4, m7 ; 0 3 2635*c0909341SAndroid Build Coastguard Worker packssdw m5, m1 ; 1 4 2636*c0909341SAndroid Build Coastguard Worker movu m0, [srcq+ssq*0+0] 2637*c0909341SAndroid Build Coastguard Worker movu m1, [srcq+ssq*0+8] 2638*c0909341SAndroid Build Coastguard Worker PUT_8TAP_HV_H 0, 1, 3, 10 2639*c0909341SAndroid Build Coastguard Worker packssdw m6, m2 ; 2 5 2640*c0909341SAndroid Build Coastguard Worker packssdw m7, m0 ; 3 6 2641*c0909341SAndroid Build Coastguard Worker punpcklwd m1, m4, m5 ; 01 2642*c0909341SAndroid Build Coastguard Worker punpckhwd m4, m5 ; 34 2643*c0909341SAndroid Build Coastguard Worker punpcklwd m2, m5, m6 ; 12 2644*c0909341SAndroid Build Coastguard Worker punpckhwd m5, m6 ; 45 2645*c0909341SAndroid Build Coastguard Worker punpcklwd m3, m6, m7 ; 23 2646*c0909341SAndroid Build Coastguard Worker punpckhwd m6, m7 ; 56 2647*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_32 2648*c0909341SAndroid Build Coastguard Worker jmp .hv_w4_loop_start 2649*c0909341SAndroid Build Coastguard Worker.hv_w4_loop: 2650*c0909341SAndroid Build Coastguard Worker mova m1, [tmp+16*6] 2651*c0909341SAndroid Build Coastguard Worker mova m2, m15 2652*c0909341SAndroid Build Coastguard Worker.hv_w4_loop_start: 2653*c0909341SAndroid Build Coastguard Worker mova m7, [tmp+16*1] 2654*c0909341SAndroid Build Coastguard Worker pmaddwd m1, m7 ; a0 2655*c0909341SAndroid Build Coastguard Worker pmaddwd m2, m7 ; b0 2656*c0909341SAndroid Build Coastguard Worker mova m7, [tmp+16*2] 2657*c0909341SAndroid Build Coastguard Worker mova [tmp+16*6], m3 2658*c0909341SAndroid Build Coastguard Worker pmaddwd m3, m7 ; a1 2659*c0909341SAndroid Build Coastguard Worker mova m15, m4 2660*c0909341SAndroid Build Coastguard Worker pmaddwd m4, m7 ; b1 2661*c0909341SAndroid Build Coastguard Worker mova m7, [tmp+16*3] 2662*c0909341SAndroid Build Coastguard Worker paddd m1, m3 2663*c0909341SAndroid Build Coastguard Worker paddd m2, m4 2664*c0909341SAndroid Build Coastguard Worker mova m3, m5 2665*c0909341SAndroid Build Coastguard Worker pmaddwd m5, m7 ; a2 2666*c0909341SAndroid Build Coastguard Worker mova m4, m6 2667*c0909341SAndroid Build Coastguard Worker pmaddwd m6, m7 ; b2 2668*c0909341SAndroid Build Coastguard Worker paddd m1, m5 2669*c0909341SAndroid Build Coastguard Worker paddd m2, m6 2670*c0909341SAndroid Build Coastguard Worker movu m7, [srcq+ssq*1+0] 2671*c0909341SAndroid Build Coastguard Worker movu m5, [srcq+ssq*1+8] 2672*c0909341SAndroid Build Coastguard Worker lea srcq, [srcq+ssq*2] 2673*c0909341SAndroid Build Coastguard Worker PUT_8TAP_HV_H 7, 5, 6, 10 2674*c0909341SAndroid Build Coastguard Worker packssdw m0, m7 ; 6 7 2675*c0909341SAndroid Build Coastguard Worker mova [tmp+16*0], m0 2676*c0909341SAndroid Build Coastguard Worker movu m0, [srcq+ssq*0+0] 2677*c0909341SAndroid Build Coastguard Worker movu m5, [srcq+ssq*0+8] 2678*c0909341SAndroid Build Coastguard Worker PUT_8TAP_HV_H 0, 5, 6, 10 2679*c0909341SAndroid Build Coastguard Worker mova m6, [tmp+16*0] 2680*c0909341SAndroid Build Coastguard Worker packssdw m7, m0 ; 7 8 2681*c0909341SAndroid Build Coastguard Worker punpcklwd m5, m6, m7 ; 67 2682*c0909341SAndroid Build Coastguard Worker punpckhwd m6, m7 ; 78 2683*c0909341SAndroid Build Coastguard Worker pmaddwd m7, m5, [tmp+16*4] 2684*c0909341SAndroid Build Coastguard Worker paddd m1, m7 ; a3 2685*c0909341SAndroid Build Coastguard Worker pmaddwd m7, m6, [tmp+16*4] 2686*c0909341SAndroid Build Coastguard Worker paddd m2, m7 ; b3 2687*c0909341SAndroid Build Coastguard Worker psrad m1, 9 2688*c0909341SAndroid Build Coastguard Worker psrad m2, 9 2689*c0909341SAndroid Build Coastguard Worker packssdw m1, m2 2690*c0909341SAndroid Build Coastguard Worker pxor m7, m7 2691*c0909341SAndroid Build Coastguard Worker pmaxsw m1, m7 2692*c0909341SAndroid Build Coastguard Worker pavgw m7, m1 2693*c0909341SAndroid Build Coastguard Worker pminsw m7, [tmp+16*5] 2694*c0909341SAndroid Build Coastguard Worker movq [dstq+dsq*0], m7 2695*c0909341SAndroid Build Coastguard Worker movhps [dstq+dsq*1], m7 2696*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+dsq*2] 2697*c0909341SAndroid Build Coastguard Worker sub hd, 2 2698*c0909341SAndroid Build Coastguard Worker jg .hv_w4_loop 2699*c0909341SAndroid Build Coastguard Worker%if STACK_ALIGNMENT < 16 2700*c0909341SAndroid Build Coastguard Worker mov srcq, [esp+4*61] 2701*c0909341SAndroid Build Coastguard Worker mov dstq, [esp+4*62] 2702*c0909341SAndroid Build Coastguard Worker add srcq, 8 2703*c0909341SAndroid Build Coastguard Worker add dstq, 8 2704*c0909341SAndroid Build Coastguard Worker mov [esp+4*61], srcq 2705*c0909341SAndroid Build Coastguard Worker mov [esp+4*62], dstq 2706*c0909341SAndroid Build Coastguard Worker%else 2707*c0909341SAndroid Build Coastguard Worker mov srcq, srcmp 2708*c0909341SAndroid Build Coastguard Worker mov dstq, dstmp 2709*c0909341SAndroid Build Coastguard Worker add srcq, 8 2710*c0909341SAndroid Build Coastguard Worker add dstq, 8 2711*c0909341SAndroid Build Coastguard Worker mov srcmp, srcq 2712*c0909341SAndroid Build Coastguard Worker mov dstmp, dstq 2713*c0909341SAndroid Build Coastguard Worker%endif 2714*c0909341SAndroid Build Coastguard Worker movzx hd, ww 2715*c0909341SAndroid Build Coastguard Worker sub wd, 1<<16 2716*c0909341SAndroid Build Coastguard Worker%else 2717*c0909341SAndroid Build Coastguard Worker.hv_w4_loop: 2718*c0909341SAndroid Build Coastguard Worker mova m15, [tmp+16*1] 2719*c0909341SAndroid Build Coastguard Worker pmaddwd m14, m15, m1 ; a0 2720*c0909341SAndroid Build Coastguard Worker pmaddwd m15, m2 ; b0 2721*c0909341SAndroid Build Coastguard Worker mova m7, [tmp+16*2] 2722*c0909341SAndroid Build Coastguard Worker mova m1, m3 2723*c0909341SAndroid Build Coastguard Worker pmaddwd m3, m7 ; a1 2724*c0909341SAndroid Build Coastguard Worker mova m2, m4 2725*c0909341SAndroid Build Coastguard Worker pmaddwd m4, m7 ; b1 2726*c0909341SAndroid Build Coastguard Worker mova m7, [tmp+16*3] 2727*c0909341SAndroid Build Coastguard Worker paddd m14, m3 2728*c0909341SAndroid Build Coastguard Worker paddd m15, m4 2729*c0909341SAndroid Build Coastguard Worker mova m3, m5 2730*c0909341SAndroid Build Coastguard Worker pmaddwd m5, m7 ; a2 2731*c0909341SAndroid Build Coastguard Worker mova m4, m6 2732*c0909341SAndroid Build Coastguard Worker pmaddwd m6, m7 ; b2 2733*c0909341SAndroid Build Coastguard Worker paddd m14, m5 2734*c0909341SAndroid Build Coastguard Worker paddd m15, m6 2735*c0909341SAndroid Build Coastguard Worker movu m7, [srcq+ssq*1+0] 2736*c0909341SAndroid Build Coastguard Worker movu m5, [srcq+ssq*1+8] 2737*c0909341SAndroid Build Coastguard Worker lea srcq, [srcq+ssq*2] 2738*c0909341SAndroid Build Coastguard Worker PUT_8TAP_HV_H 7, 5, 6, 10, [pd_512] 2739*c0909341SAndroid Build Coastguard Worker packssdw m0, m7 ; 6 7 2740*c0909341SAndroid Build Coastguard Worker mova [tmp+16*0], m0 2741*c0909341SAndroid Build Coastguard Worker movu m0, [srcq+ssq*0+0] 2742*c0909341SAndroid Build Coastguard Worker movu m5, [srcq+ssq*0+8] 2743*c0909341SAndroid Build Coastguard Worker PUT_8TAP_HV_H 0, 5, 6, 10, [pd_512] 2744*c0909341SAndroid Build Coastguard Worker mova m6, [tmp+16*0] 2745*c0909341SAndroid Build Coastguard Worker packssdw m7, m0 ; 7 8 2746*c0909341SAndroid Build Coastguard Worker punpcklwd m5, m6, m7 ; 67 2747*c0909341SAndroid Build Coastguard Worker punpckhwd m6, m7 ; 78 2748*c0909341SAndroid Build Coastguard Worker pmaddwd m7, m5, [tmp+16*4] 2749*c0909341SAndroid Build Coastguard Worker paddd m14, m7 ; a3 2750*c0909341SAndroid Build Coastguard Worker pmaddwd m7, m6, [tmp+16*4] 2751*c0909341SAndroid Build Coastguard Worker paddd m15, m7 ; b3 2752*c0909341SAndroid Build Coastguard Worker psrad m14, 9 2753*c0909341SAndroid Build Coastguard Worker psrad m15, 9 2754*c0909341SAndroid Build Coastguard Worker packssdw m14, m15 2755*c0909341SAndroid Build Coastguard Worker pxor m7, m7 2756*c0909341SAndroid Build Coastguard Worker pmaxsw m14, m7 2757*c0909341SAndroid Build Coastguard Worker pavgw m7, m14 2758*c0909341SAndroid Build Coastguard Worker pminsw m7, [tmp+16*5] 2759*c0909341SAndroid Build Coastguard Worker movq [dstq+dsq*0], m7 2760*c0909341SAndroid Build Coastguard Worker movhps [dstq+dsq*1], m7 2761*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+dsq*2] 2762*c0909341SAndroid Build Coastguard Worker sub hd, 2 2763*c0909341SAndroid Build Coastguard Worker jg .hv_w4_loop 2764*c0909341SAndroid Build Coastguard Worker add r7, 8 2765*c0909341SAndroid Build Coastguard Worker add r8, 8 2766*c0909341SAndroid Build Coastguard Worker movzx hd, wb 2767*c0909341SAndroid Build Coastguard Worker mov srcq, r7 2768*c0909341SAndroid Build Coastguard Worker mov dstq, r8 2769*c0909341SAndroid Build Coastguard Worker sub wd, 1<<8 2770*c0909341SAndroid Build Coastguard Worker%endif 2771*c0909341SAndroid Build Coastguard Worker jg .hv_w4_loop0 2772*c0909341SAndroid Build Coastguard Worker RET 2773*c0909341SAndroid Build Coastguard Worker%undef tmp 2774*c0909341SAndroid Build Coastguard Worker 2775*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_32 2776*c0909341SAndroid Build Coastguard WorkerDECLARE_REG_TMP 2, 1, 6, 4 2777*c0909341SAndroid Build Coastguard Worker%elif WIN64 2778*c0909341SAndroid Build Coastguard WorkerDECLARE_REG_TMP 6, 4, 7, 4 2779*c0909341SAndroid Build Coastguard Worker%else 2780*c0909341SAndroid Build Coastguard WorkerDECLARE_REG_TMP 6, 7, 7, 8 2781*c0909341SAndroid Build Coastguard Worker%endif 2782*c0909341SAndroid Build Coastguard Worker 2783*c0909341SAndroid Build Coastguard Worker%define PREP_8TAP_FN FN prep_8tap, 2784*c0909341SAndroid Build Coastguard WorkerPREP_8TAP_FN smooth, SMOOTH, SMOOTH, prep_6tap_16bpc 2785*c0909341SAndroid Build Coastguard WorkerPREP_8TAP_FN smooth_regular, SMOOTH, REGULAR, prep_6tap_16bpc 2786*c0909341SAndroid Build Coastguard WorkerPREP_8TAP_FN regular_smooth, REGULAR, SMOOTH, prep_6tap_16bpc 2787*c0909341SAndroid Build Coastguard WorkerPREP_8TAP_FN regular, REGULAR, REGULAR 2788*c0909341SAndroid Build Coastguard Worker 2789*c0909341SAndroid Build Coastguard Workercglobal prep_6tap_16bpc, 0, 8, 0, tmp, src, ss, w, h, mx, my 2790*c0909341SAndroid Build Coastguard Worker %define base t2-prep_ssse3 2791*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_32 2792*c0909341SAndroid Build Coastguard Worker %define mxb r0b 2793*c0909341SAndroid Build Coastguard Worker %define mxd r0 2794*c0909341SAndroid Build Coastguard Worker %define mxq r0 2795*c0909341SAndroid Build Coastguard Worker %define myb r2b 2796*c0909341SAndroid Build Coastguard Worker %define myd r2 2797*c0909341SAndroid Build Coastguard Worker %define myq r2 2798*c0909341SAndroid Build Coastguard Worker%endif 2799*c0909341SAndroid Build Coastguard Worker imul mxd, mxm, 0x010101 2800*c0909341SAndroid Build Coastguard Worker add mxd, t0d ; 6tap_h, mx, 4tap_h 2801*c0909341SAndroid Build Coastguard Worker imul myd, mym, 0x010101 2802*c0909341SAndroid Build Coastguard Worker add myd, t1d ; 6tap_v, my, 4tap_v 2803*c0909341SAndroid Build Coastguard Worker LEA t2, prep_ssse3 2804*c0909341SAndroid Build Coastguard Worker movifnidn wd, wm 2805*c0909341SAndroid Build Coastguard Worker movifnidn hd, hm 2806*c0909341SAndroid Build Coastguard Worker movifnidn srcq, srcmp 2807*c0909341SAndroid Build Coastguard Worker test mxd, 0xf00 2808*c0909341SAndroid Build Coastguard Worker jnz .h 2809*c0909341SAndroid Build Coastguard Worker test myd, 0xf00 2810*c0909341SAndroid Build Coastguard Worker jnz .v 2811*c0909341SAndroid Build Coastguard Worker.prep: 2812*c0909341SAndroid Build Coastguard Worker tzcnt wd, wd 2813*c0909341SAndroid Build Coastguard Worker mov myd, r7m ; bitdepth_max 2814*c0909341SAndroid Build Coastguard Worker movzx wd, word [base+prep_ssse3_table+wq*2] 2815*c0909341SAndroid Build Coastguard Worker mova m5, [base+pw_8192] 2816*c0909341SAndroid Build Coastguard Worker shr myd, 11 2817*c0909341SAndroid Build Coastguard Worker add wq, t2 2818*c0909341SAndroid Build Coastguard Worker movddup m4, [base+prep_mul+myq*8] 2819*c0909341SAndroid Build Coastguard Worker movifnidn ssq, ssmp 2820*c0909341SAndroid Build Coastguard Worker movifnidn tmpq, tmpmp 2821*c0909341SAndroid Build Coastguard Worker lea r6, [ssq*3] 2822*c0909341SAndroid Build Coastguard Worker%if WIN64 2823*c0909341SAndroid Build Coastguard Worker pop r7 2824*c0909341SAndroid Build Coastguard Worker%endif 2825*c0909341SAndroid Build Coastguard Worker jmp wq 2826*c0909341SAndroid Build Coastguard Worker.h: 2827*c0909341SAndroid Build Coastguard Worker RESET_STACK_STATE 2828*c0909341SAndroid Build Coastguard Worker test myd, 0xf00 2829*c0909341SAndroid Build Coastguard Worker jnz .hv 2830*c0909341SAndroid Build Coastguard Worker movifnidn ssq, r2mp 2831*c0909341SAndroid Build Coastguard Worker movddup m5, [base+prep_8tap_1d_rnd] 2832*c0909341SAndroid Build Coastguard Worker cmp wd, 4 2833*c0909341SAndroid Build Coastguard Worker je mangle(private_prefix %+ _prep_8tap_16bpc_ssse3).h_w4 2834*c0909341SAndroid Build Coastguard Worker WIN64_SPILL_XMM 10 2835*c0909341SAndroid Build Coastguard Worker shr mxd, 16 2836*c0909341SAndroid Build Coastguard Worker movq m2, [base+subpel_filters+1+mxq*8] 2837*c0909341SAndroid Build Coastguard Worker movifnidn tmpq, r0mp 2838*c0909341SAndroid Build Coastguard Worker mova m4, [base+spel_h_shufA] 2839*c0909341SAndroid Build Coastguard Worker add wd, wd 2840*c0909341SAndroid Build Coastguard Worker mova m6, [base+spel_h_shufB] 2841*c0909341SAndroid Build Coastguard Worker add srcq, wq 2842*c0909341SAndroid Build Coastguard Worker punpcklbw m2, m2 2843*c0909341SAndroid Build Coastguard Worker add tmpq, wq 2844*c0909341SAndroid Build Coastguard Worker psraw m2, 8 2845*c0909341SAndroid Build Coastguard Worker neg wq 2846*c0909341SAndroid Build Coastguard Worker test dword r7m, 0x800 2847*c0909341SAndroid Build Coastguard Worker jnz .h_w8_12bpc 2848*c0909341SAndroid Build Coastguard Worker psllw m2, 2 2849*c0909341SAndroid Build Coastguard Worker.h_w8_12bpc: 2850*c0909341SAndroid Build Coastguard Worker pshufd m7, m2, q0000 2851*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_32 2852*c0909341SAndroid Build Coastguard Worker ALLOC_STACK -16*2 2853*c0909341SAndroid Build Coastguard Worker %define m8 [rsp+16*0] 2854*c0909341SAndroid Build Coastguard Worker %define m9 [rsp+16*1] 2855*c0909341SAndroid Build Coastguard Worker pshufd m0, m2, q1111 2856*c0909341SAndroid Build Coastguard Worker pshufd m1, m2, q2222 2857*c0909341SAndroid Build Coastguard Worker mova m8, m0 2858*c0909341SAndroid Build Coastguard Worker mova m9, m1 2859*c0909341SAndroid Build Coastguard Worker%else 2860*c0909341SAndroid Build Coastguard Worker pshufd m8, m2, q1111 2861*c0909341SAndroid Build Coastguard Worker pshufd m9, m2, q2222 2862*c0909341SAndroid Build Coastguard Worker%endif 2863*c0909341SAndroid Build Coastguard Worker.h_w8_loop0: 2864*c0909341SAndroid Build Coastguard Worker mov r6, wq 2865*c0909341SAndroid Build Coastguard Worker.h_w8_loop: 2866*c0909341SAndroid Build Coastguard Worker movu m3, [srcq+r6-4] 2867*c0909341SAndroid Build Coastguard Worker movu m2, [srcq+r6+8] 2868*c0909341SAndroid Build Coastguard Worker pshufb m0, m3, m4 ; 01 12 23 34 2869*c0909341SAndroid Build Coastguard Worker pmaddwd m0, m7 ; abcd0 2870*c0909341SAndroid Build Coastguard Worker pshufb m3, m6 ; 23 34 45 56 2871*c0909341SAndroid Build Coastguard Worker pmaddwd m1, m8, m3 ; abcd1 2872*c0909341SAndroid Build Coastguard Worker paddd m0, m1 2873*c0909341SAndroid Build Coastguard Worker pshufb m1, m2, m4 ; 67 78 89 9a 2874*c0909341SAndroid Build Coastguard Worker shufpd m3, m1, 0x01; 45 56 67 78 2875*c0909341SAndroid Build Coastguard Worker pmaddwd m1, m8 ; efgh1 2876*c0909341SAndroid Build Coastguard Worker pshufb m2, m6 ; 89 9a ab bc 2877*c0909341SAndroid Build Coastguard Worker pmaddwd m2, m9 ; efgh2 2878*c0909341SAndroid Build Coastguard Worker paddd m1, m2 2879*c0909341SAndroid Build Coastguard Worker pmaddwd m2, m9 , m3 ; abcd2 2880*c0909341SAndroid Build Coastguard Worker pmaddwd m3, m7 ; efgh0 2881*c0909341SAndroid Build Coastguard Worker paddd m0, m5 2882*c0909341SAndroid Build Coastguard Worker paddd m1, m5 2883*c0909341SAndroid Build Coastguard Worker paddd m0, m2 2884*c0909341SAndroid Build Coastguard Worker paddd m1, m3 2885*c0909341SAndroid Build Coastguard Worker psrad m0, 4 2886*c0909341SAndroid Build Coastguard Worker psrad m1, 4 2887*c0909341SAndroid Build Coastguard Worker packssdw m0, m1 2888*c0909341SAndroid Build Coastguard Worker mova [tmpq+r6], m0 2889*c0909341SAndroid Build Coastguard Worker add r6, 16 2890*c0909341SAndroid Build Coastguard Worker jl .h_w8_loop 2891*c0909341SAndroid Build Coastguard Worker add srcq, ssq 2892*c0909341SAndroid Build Coastguard Worker sub tmpq, wq 2893*c0909341SAndroid Build Coastguard Worker dec hd 2894*c0909341SAndroid Build Coastguard Worker jg .h_w8_loop0 2895*c0909341SAndroid Build Coastguard Worker RET 2896*c0909341SAndroid Build Coastguard Worker.v: 2897*c0909341SAndroid Build Coastguard Worker movzx mxd, myb 2898*c0909341SAndroid Build Coastguard Worker shr myd, 16 2899*c0909341SAndroid Build Coastguard Worker cmp hd, 6 2900*c0909341SAndroid Build Coastguard Worker cmovb myd, mxd 2901*c0909341SAndroid Build Coastguard Worker movddup m5, [base+prep_8tap_1d_rnd] 2902*c0909341SAndroid Build Coastguard Worker movq m2, [base+subpel_filters+1+myq*8] 2903*c0909341SAndroid Build Coastguard Worker WIN64_SPILL_XMM 11, 16 2904*c0909341SAndroid Build Coastguard Worker movifnidn ssq, r2mp 2905*c0909341SAndroid Build Coastguard Worker movifnidn tmpq, r0mp 2906*c0909341SAndroid Build Coastguard Worker punpcklbw m2, m2 2907*c0909341SAndroid Build Coastguard Worker sub srcq, ssq 2908*c0909341SAndroid Build Coastguard Worker psraw m2, 8 ; sign-extend 2909*c0909341SAndroid Build Coastguard Worker test dword r7m, 0x800 2910*c0909341SAndroid Build Coastguard Worker jnz .v_12bpc 2911*c0909341SAndroid Build Coastguard Worker psllw m2, 2 2912*c0909341SAndroid Build Coastguard Worker.v_12bpc: 2913*c0909341SAndroid Build Coastguard Worker sub srcq, ssq 2914*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_32 2915*c0909341SAndroid Build Coastguard Worker ALLOC_STACK -16*4 2916*c0909341SAndroid Build Coastguard Worker pshufd m0, m2, q0000 2917*c0909341SAndroid Build Coastguard Worker mov r6d, wd 2918*c0909341SAndroid Build Coastguard Worker pshufd m1, m2, q1111 2919*c0909341SAndroid Build Coastguard Worker shl r6d, 14 2920*c0909341SAndroid Build Coastguard Worker pshufd m2, m2, q2222 2921*c0909341SAndroid Build Coastguard Worker lea r6d, [r6+hq-(1<<16)] 2922*c0909341SAndroid Build Coastguard Worker mova m8, m0 2923*c0909341SAndroid Build Coastguard Worker mova m9, m1 2924*c0909341SAndroid Build Coastguard Worker mova m10, m2 2925*c0909341SAndroid Build Coastguard Worker%if STACK_ALIGNMENT < 16 2926*c0909341SAndroid Build Coastguard Worker %define srcmp [esp+16*3+4*0] 2927*c0909341SAndroid Build Coastguard Worker %define tmpmp [esp+16*3+4*1] 2928*c0909341SAndroid Build Coastguard Worker%endif 2929*c0909341SAndroid Build Coastguard Worker.v_w4_loop0: 2930*c0909341SAndroid Build Coastguard Worker mov srcmp, srcq 2931*c0909341SAndroid Build Coastguard Worker mov tmpmp, tmpq 2932*c0909341SAndroid Build Coastguard Worker%else 2933*c0909341SAndroid Build Coastguard Worker pshufd m8, m2, q0000 2934*c0909341SAndroid Build Coastguard Worker and wd, -8 2935*c0909341SAndroid Build Coastguard Worker jnz .v_w8 2936*c0909341SAndroid Build Coastguard Worker pshufd m9, m2, q1111 2937*c0909341SAndroid Build Coastguard Worker pshufd m10, m2, q2222 2938*c0909341SAndroid Build Coastguard Worker%endif 2939*c0909341SAndroid Build Coastguard Worker movq m1, [srcq+ssq*0] 2940*c0909341SAndroid Build Coastguard Worker movq m2, [srcq+ssq*1] 2941*c0909341SAndroid Build Coastguard Worker lea srcq, [srcq+ssq*2] 2942*c0909341SAndroid Build Coastguard Worker movq m3, [srcq+ssq*0] 2943*c0909341SAndroid Build Coastguard Worker movq m4, [srcq+ssq*1] 2944*c0909341SAndroid Build Coastguard Worker lea srcq, [srcq+ssq*2] 2945*c0909341SAndroid Build Coastguard Worker movq m0, [srcq+ssq*0] 2946*c0909341SAndroid Build Coastguard Worker punpcklwd m1, m2 ; 01 2947*c0909341SAndroid Build Coastguard Worker punpcklwd m2, m3 ; 12 2948*c0909341SAndroid Build Coastguard Worker punpcklwd m3, m4 ; 23 2949*c0909341SAndroid Build Coastguard Worker punpcklwd m4, m0 ; 34 2950*c0909341SAndroid Build Coastguard Worker.v_w4_loop: 2951*c0909341SAndroid Build Coastguard Worker pmaddwd m6, m8, m1 ; a0 2952*c0909341SAndroid Build Coastguard Worker pmaddwd m7, m8, m2 ; b0 2953*c0909341SAndroid Build Coastguard Worker mova m1, m3 2954*c0909341SAndroid Build Coastguard Worker pmaddwd m3, m9 ; a1 2955*c0909341SAndroid Build Coastguard Worker mova m2, m4 2956*c0909341SAndroid Build Coastguard Worker pmaddwd m4, m9 ; b1 2957*c0909341SAndroid Build Coastguard Worker paddd m6, m3 2958*c0909341SAndroid Build Coastguard Worker movq m3, [srcq+ssq*0] 2959*c0909341SAndroid Build Coastguard Worker paddd m7, m4 2960*c0909341SAndroid Build Coastguard Worker movq m4, [srcq+ssq*1] 2961*c0909341SAndroid Build Coastguard Worker lea srcq, [srcq+ssq*2] 2962*c0909341SAndroid Build Coastguard Worker movq m0, [srcq+ssq*0] 2963*c0909341SAndroid Build Coastguard Worker punpcklwd m3, m4 ; 45 2964*c0909341SAndroid Build Coastguard Worker punpcklwd m4, m0 ; 56 2965*c0909341SAndroid Build Coastguard Worker pmaddwd m0, m10, m3 ; a2 2966*c0909341SAndroid Build Coastguard Worker paddd m6, m5 2967*c0909341SAndroid Build Coastguard Worker paddd m6, m0 2968*c0909341SAndroid Build Coastguard Worker pmaddwd m0, m10, m4 ; b2 2969*c0909341SAndroid Build Coastguard Worker paddd m7, m5 2970*c0909341SAndroid Build Coastguard Worker paddd m7, m0 2971*c0909341SAndroid Build Coastguard Worker psrad m6, 4 2972*c0909341SAndroid Build Coastguard Worker psrad m7, 4 2973*c0909341SAndroid Build Coastguard Worker packssdw m6, m7 2974*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_32 2975*c0909341SAndroid Build Coastguard Worker movq [tmpq+wq*0], m6 2976*c0909341SAndroid Build Coastguard Worker movhps [tmpq+wq*2], m6 2977*c0909341SAndroid Build Coastguard Worker lea tmpq, [tmpq+wq*4] 2978*c0909341SAndroid Build Coastguard Worker sub hd, 2 2979*c0909341SAndroid Build Coastguard Worker jg .v_w4_loop 2980*c0909341SAndroid Build Coastguard Worker mov srcq, srcmp 2981*c0909341SAndroid Build Coastguard Worker mov tmpq, tmpmp 2982*c0909341SAndroid Build Coastguard Worker movzx hd, r6w 2983*c0909341SAndroid Build Coastguard Worker add srcq, 8 2984*c0909341SAndroid Build Coastguard Worker add tmpq, 8 2985*c0909341SAndroid Build Coastguard Worker sub r6d, 1<<16 2986*c0909341SAndroid Build Coastguard Worker jg .v_w4_loop0 2987*c0909341SAndroid Build Coastguard Worker RET 2988*c0909341SAndroid Build Coastguard Worker%else 2989*c0909341SAndroid Build Coastguard Worker mova [tmpq], m6 2990*c0909341SAndroid Build Coastguard Worker add tmpq, 16 2991*c0909341SAndroid Build Coastguard Worker sub hd, 2 2992*c0909341SAndroid Build Coastguard Worker jg .v_w4_loop 2993*c0909341SAndroid Build Coastguard Worker RET 2994*c0909341SAndroid Build Coastguard Worker.v_w8: 2995*c0909341SAndroid Build Coastguard Worker mova r6m, m8 2996*c0909341SAndroid Build Coastguard Worker lea r6d, [wq*4-(1<<5)] 2997*c0909341SAndroid Build Coastguard Worker pshufd m6, m2, q1111 2998*c0909341SAndroid Build Coastguard Worker lea r6d, [hq+r6*8] 2999*c0909341SAndroid Build Coastguard Worker pshufd m7, m2, q2222 3000*c0909341SAndroid Build Coastguard Worker WIN64_PUSH_XMM 16 3001*c0909341SAndroid Build Coastguard Worker.v_w8_loop0: 3002*c0909341SAndroid Build Coastguard Worker movu m9, [srcq+ssq*0] 3003*c0909341SAndroid Build Coastguard Worker lea r5, [srcq+ssq*2] 3004*c0909341SAndroid Build Coastguard Worker movu m11, [srcq+ssq*1] 3005*c0909341SAndroid Build Coastguard Worker mov r7, tmpq 3006*c0909341SAndroid Build Coastguard Worker movu m13, [r5+ssq*0] 3007*c0909341SAndroid Build Coastguard Worker movu m15, [r5+ssq*1] 3008*c0909341SAndroid Build Coastguard Worker lea r5, [r5+ssq*2] 3009*c0909341SAndroid Build Coastguard Worker movu m4, [r5+ssq*0] 3010*c0909341SAndroid Build Coastguard Worker punpcklwd m8, m9, m11 ; 01 3011*c0909341SAndroid Build Coastguard Worker punpckhwd m9, m11 3012*c0909341SAndroid Build Coastguard Worker punpcklwd m10, m11, m13 ; 12 3013*c0909341SAndroid Build Coastguard Worker punpckhwd m11, m13 3014*c0909341SAndroid Build Coastguard Worker punpcklwd m12, m13, m15 ; 23 3015*c0909341SAndroid Build Coastguard Worker punpckhwd m13, m15 3016*c0909341SAndroid Build Coastguard Worker punpcklwd m14, m15, m4 ; 34 3017*c0909341SAndroid Build Coastguard Worker punpckhwd m15, m4 3018*c0909341SAndroid Build Coastguard Worker.v_w8_loop: 3019*c0909341SAndroid Build Coastguard Worker mova m3, r6m 3020*c0909341SAndroid Build Coastguard Worker pmaddwd m0, m8, m3 ; a0 3021*c0909341SAndroid Build Coastguard Worker pmaddwd m2, m9, m3 ; a0' 3022*c0909341SAndroid Build Coastguard Worker pmaddwd m1, m10, m3 ; b0 3023*c0909341SAndroid Build Coastguard Worker pmaddwd m3, m11 ; b0' 3024*c0909341SAndroid Build Coastguard Worker mova m8, m12 3025*c0909341SAndroid Build Coastguard Worker pmaddwd m12, m6 ; a1 3026*c0909341SAndroid Build Coastguard Worker mova m9, m13 3027*c0909341SAndroid Build Coastguard Worker pmaddwd m13, m6 ; a1' 3028*c0909341SAndroid Build Coastguard Worker mova m10, m14 3029*c0909341SAndroid Build Coastguard Worker pmaddwd m14, m6 ; b1 3030*c0909341SAndroid Build Coastguard Worker mova m11, m15 3031*c0909341SAndroid Build Coastguard Worker pmaddwd m15, m6 ; b1' 3032*c0909341SAndroid Build Coastguard Worker paddd m0, m12 3033*c0909341SAndroid Build Coastguard Worker paddd m2, m13 3034*c0909341SAndroid Build Coastguard Worker movu m13, [r5+ssq*0] 3035*c0909341SAndroid Build Coastguard Worker paddd m1, m14 3036*c0909341SAndroid Build Coastguard Worker paddd m3, m15 3037*c0909341SAndroid Build Coastguard Worker movu m15, [r5+ssq*1] 3038*c0909341SAndroid Build Coastguard Worker lea r5, [r5+ssq*2] 3039*c0909341SAndroid Build Coastguard Worker movu m4, [r5+ssq*0] 3040*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m5}, m0, m2, m1, m3 3041*c0909341SAndroid Build Coastguard Worker punpcklwd m12, m13, m15 ; 45 3042*c0909341SAndroid Build Coastguard Worker punpckhwd m13, m15 3043*c0909341SAndroid Build Coastguard Worker punpcklwd m14, m15, m4 ; 56 3044*c0909341SAndroid Build Coastguard Worker punpckhwd m15, m4 3045*c0909341SAndroid Build Coastguard Worker pmaddwd m4, m7, m12 ; a2 3046*c0909341SAndroid Build Coastguard Worker paddd m0, m4 3047*c0909341SAndroid Build Coastguard Worker pmaddwd m4, m7, m13 ; a2' 3048*c0909341SAndroid Build Coastguard Worker paddd m2, m4 3049*c0909341SAndroid Build Coastguard Worker pmaddwd m4, m7, m14 ; b2 3050*c0909341SAndroid Build Coastguard Worker paddd m1, m4 3051*c0909341SAndroid Build Coastguard Worker pmaddwd m4, m7, m15 ; b2' 3052*c0909341SAndroid Build Coastguard Worker paddd m3, m4 3053*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 4}, m0, m2, m1, m3 3054*c0909341SAndroid Build Coastguard Worker packssdw m0, m2 3055*c0909341SAndroid Build Coastguard Worker packssdw m1, m3 3056*c0909341SAndroid Build Coastguard Worker mova [r7+wq*0], m0 3057*c0909341SAndroid Build Coastguard Worker mova [r7+wq*2], m1 3058*c0909341SAndroid Build Coastguard Worker lea r7, [r7+wq*4] 3059*c0909341SAndroid Build Coastguard Worker sub hd, 2 3060*c0909341SAndroid Build Coastguard Worker jg .v_w8_loop 3061*c0909341SAndroid Build Coastguard Worker add srcq, 16 3062*c0909341SAndroid Build Coastguard Worker add tmpq, 16 3063*c0909341SAndroid Build Coastguard Worker movzx hd, r6b 3064*c0909341SAndroid Build Coastguard Worker sub r6d, 1<<8 3065*c0909341SAndroid Build Coastguard Worker jg .v_w8_loop0 3066*c0909341SAndroid Build Coastguard Worker RET 3067*c0909341SAndroid Build Coastguard Worker%endif 3068*c0909341SAndroid Build Coastguard Worker.hv: 3069*c0909341SAndroid Build Coastguard Worker and wd, -8 3070*c0909341SAndroid Build Coastguard Worker jnz .hv_w8 3071*c0909341SAndroid Build Coastguard Worker movzx mxd, mxb 3072*c0909341SAndroid Build Coastguard Worker movq m0, [base+subpel_filters+mxq*8] 3073*c0909341SAndroid Build Coastguard Worker movzx mxd, myb 3074*c0909341SAndroid Build Coastguard Worker shr myd, 16 3075*c0909341SAndroid Build Coastguard Worker cmp hd, 6 3076*c0909341SAndroid Build Coastguard Worker cmovb myd, mxd 3077*c0909341SAndroid Build Coastguard Worker movq m2, [base+subpel_filters+1+myq*8] 3078*c0909341SAndroid Build Coastguard Worker WIN64_SPILL_XMM 15 3079*c0909341SAndroid Build Coastguard Worker movifnidn ssq, r2mp 3080*c0909341SAndroid Build Coastguard Worker movifnidn tmpq, r0mp 3081*c0909341SAndroid Build Coastguard Worker mova m7, [base+prep_8tap_2d_rnd] 3082*c0909341SAndroid Build Coastguard Worker sub srcq, 2 3083*c0909341SAndroid Build Coastguard Worker pshuflw m0, m0, q2121 3084*c0909341SAndroid Build Coastguard Worker pxor m6, m6 3085*c0909341SAndroid Build Coastguard Worker punpcklbw m6, m0 3086*c0909341SAndroid Build Coastguard Worker punpcklbw m2, m2 3087*c0909341SAndroid Build Coastguard Worker psraw m6, 4 3088*c0909341SAndroid Build Coastguard Worker psraw m2, 8 3089*c0909341SAndroid Build Coastguard Worker test dword r7m, 0x800 3090*c0909341SAndroid Build Coastguard Worker jz .hv_w4_10bpc 3091*c0909341SAndroid Build Coastguard Worker psraw m6, 2 3092*c0909341SAndroid Build Coastguard Worker.hv_w4_10bpc: 3093*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_32 3094*c0909341SAndroid Build Coastguard Worker%assign regs_used 4 3095*c0909341SAndroid Build Coastguard Worker ALLOC_STACK -16*7 3096*c0909341SAndroid Build Coastguard Worker%assign regs_used 7 3097*c0909341SAndroid Build Coastguard Worker %define m10 [esp+16*3] 3098*c0909341SAndroid Build Coastguard Worker %define m12 [esp+16*5] 3099*c0909341SAndroid Build Coastguard Worker %define m13 [esp+16*6] 3100*c0909341SAndroid Build Coastguard Worker %define m14 [base+spel_h_shufA] 3101*c0909341SAndroid Build Coastguard Worker %define m11 [base+spel_h_shufB] 3102*c0909341SAndroid Build Coastguard Worker pshufd m0, m2, q0000 3103*c0909341SAndroid Build Coastguard Worker pshufd m1, m2, q1111 3104*c0909341SAndroid Build Coastguard Worker pshufd m2, m2, q2222 3105*c0909341SAndroid Build Coastguard Worker pshufd m5, m6, q0000 3106*c0909341SAndroid Build Coastguard Worker pshufd m6, m6, q1111 3107*c0909341SAndroid Build Coastguard Worker mova m8, m0 3108*c0909341SAndroid Build Coastguard Worker mova m9, m1 3109*c0909341SAndroid Build Coastguard Worker mova m10, m2 3110*c0909341SAndroid Build Coastguard Worker mova m12, m5 3111*c0909341SAndroid Build Coastguard Worker mova m13, m6 3112*c0909341SAndroid Build Coastguard Worker neg ssq 3113*c0909341SAndroid Build Coastguard Worker movu m3, [srcq+ssq*2] 3114*c0909341SAndroid Build Coastguard Worker movu m4, [srcq+ssq*1] 3115*c0909341SAndroid Build Coastguard Worker neg ssq 3116*c0909341SAndroid Build Coastguard Worker%else 3117*c0909341SAndroid Build Coastguard Worker mov r6, ssq 3118*c0909341SAndroid Build Coastguard Worker pshufd m8, m2, q0000 3119*c0909341SAndroid Build Coastguard Worker neg r6 3120*c0909341SAndroid Build Coastguard Worker pshufd m9, m2, q1111 3121*c0909341SAndroid Build Coastguard Worker movu m3, [srcq+r6 *2] 3122*c0909341SAndroid Build Coastguard Worker pshufd m10, m2, q2222 3123*c0909341SAndroid Build Coastguard Worker movu m4, [srcq+r6 *1] 3124*c0909341SAndroid Build Coastguard Worker pshufd m12, m6, q0000 3125*c0909341SAndroid Build Coastguard Worker mova m14, [base+spel_h_shufA] 3126*c0909341SAndroid Build Coastguard Worker pshufd m13, m6, q1111 3127*c0909341SAndroid Build Coastguard Worker mova m11, [base+spel_h_shufB] 3128*c0909341SAndroid Build Coastguard Worker%endif 3129*c0909341SAndroid Build Coastguard Worker movu m1, [srcq+ssq*0] 3130*c0909341SAndroid Build Coastguard Worker movu m0, [srcq+ssq*1] 3131*c0909341SAndroid Build Coastguard Worker lea srcq, [srcq+ssq*2] 3132*c0909341SAndroid Build Coastguard Worker movu m2, [srcq+ssq*0] 3133*c0909341SAndroid Build Coastguard Worker HV_H_W4_6TAP m3, m3, m5, m11 3134*c0909341SAndroid Build Coastguard Worker HV_H_W4_6TAP m4, m4, m5, m11 3135*c0909341SAndroid Build Coastguard Worker HV_H_W4_6TAP m5, m1, m5, m11 3136*c0909341SAndroid Build Coastguard Worker HV_H_W4_6TAP m0, m0, m1, m11 3137*c0909341SAndroid Build Coastguard Worker HV_H_W4_6TAP m2, m2, m1, m11 3138*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 6}, m3, m5, m4, m0, m2 3139*c0909341SAndroid Build Coastguard Worker packssdw m3, m5 ; 0 2 3140*c0909341SAndroid Build Coastguard Worker packssdw m4, m0 ; 1 3 3141*c0909341SAndroid Build Coastguard Worker packssdw m5, m2 ; 2 4 3142*c0909341SAndroid Build Coastguard Worker punpcklwd m1, m3, m4 ; 01 3143*c0909341SAndroid Build Coastguard Worker punpckhwd m3, m4 ; 23 3144*c0909341SAndroid Build Coastguard Worker punpcklwd m2, m4, m5 ; 12 3145*c0909341SAndroid Build Coastguard Worker punpckhwd m4, m5 ; 34 3146*c0909341SAndroid Build Coastguard Worker.hv_w4_loop: 3147*c0909341SAndroid Build Coastguard Worker movu m0, [srcq+ssq*1] 3148*c0909341SAndroid Build Coastguard Worker pmaddwd m5, m8, m1 ; a0 3149*c0909341SAndroid Build Coastguard Worker lea srcq, [srcq+ssq*2] 3150*c0909341SAndroid Build Coastguard Worker pmaddwd m6, m8, m2 ; b0 3151*c0909341SAndroid Build Coastguard Worker mova m1, m3 3152*c0909341SAndroid Build Coastguard Worker pmaddwd m3, m9 ; a1 3153*c0909341SAndroid Build Coastguard Worker mova m2, m4 3154*c0909341SAndroid Build Coastguard Worker pmaddwd m4, m9 ; b1 3155*c0909341SAndroid Build Coastguard Worker paddd m5, m3 3156*c0909341SAndroid Build Coastguard Worker movu m3, [srcq+ssq*0] 3157*c0909341SAndroid Build Coastguard Worker paddd m6, m4 3158*c0909341SAndroid Build Coastguard Worker HV_H_W4_6TAP m0, m0, m4, m11 3159*c0909341SAndroid Build Coastguard Worker HV_H_W4_6TAP m3, m3, m4, m11 3160*c0909341SAndroid Build Coastguard Worker psrad m4, m2, 16 3161*c0909341SAndroid Build Coastguard Worker psrad m0, 6 3162*c0909341SAndroid Build Coastguard Worker psrad m3, 6 3163*c0909341SAndroid Build Coastguard Worker packssdw m4, m0 ; 4 5 3164*c0909341SAndroid Build Coastguard Worker packssdw m0, m3 ; 5 6 3165*c0909341SAndroid Build Coastguard Worker punpcklwd m3, m4, m0 ; 45 3166*c0909341SAndroid Build Coastguard Worker punpckhwd m4, m0 ; 56 3167*c0909341SAndroid Build Coastguard Worker pmaddwd m0, m10, m3 ; a2 3168*c0909341SAndroid Build Coastguard Worker paddd m5, m7 3169*c0909341SAndroid Build Coastguard Worker paddd m5, m0 3170*c0909341SAndroid Build Coastguard Worker pmaddwd m0, m10, m4 ; b2 3171*c0909341SAndroid Build Coastguard Worker paddd m6, m7 3172*c0909341SAndroid Build Coastguard Worker paddd m6, m0 3173*c0909341SAndroid Build Coastguard Worker psrad m5, 6 3174*c0909341SAndroid Build Coastguard Worker psrad m6, 6 3175*c0909341SAndroid Build Coastguard Worker packssdw m5, m6 3176*c0909341SAndroid Build Coastguard Worker mova [tmpq], m5 3177*c0909341SAndroid Build Coastguard Worker add tmpq, 16 3178*c0909341SAndroid Build Coastguard Worker sub hd, 2 3179*c0909341SAndroid Build Coastguard Worker jg .hv_w4_loop 3180*c0909341SAndroid Build Coastguard Worker RET 3181*c0909341SAndroid Build Coastguard Worker.hv_w8: 3182*c0909341SAndroid Build Coastguard Worker RESET_STACK_STATE 3183*c0909341SAndroid Build Coastguard Worker shr mxd, 16 3184*c0909341SAndroid Build Coastguard Worker movq m2, [base+subpel_filters+1+mxq*8] 3185*c0909341SAndroid Build Coastguard Worker movzx mxd, myb 3186*c0909341SAndroid Build Coastguard Worker shr myd, 16 3187*c0909341SAndroid Build Coastguard Worker cmp hd, 6 3188*c0909341SAndroid Build Coastguard Worker cmovb myd, mxd 3189*c0909341SAndroid Build Coastguard Worker movq m1, [base+subpel_filters+1+myq*8] 3190*c0909341SAndroid Build Coastguard Worker movifnidn ssq, r2mp 3191*c0909341SAndroid Build Coastguard Worker mova m4, [base+prep_8tap_2d_rnd] 3192*c0909341SAndroid Build Coastguard Worker pxor m0, m0 3193*c0909341SAndroid Build Coastguard Worker punpcklbw m0, m2 3194*c0909341SAndroid Build Coastguard Worker punpcklbw m1, m1 3195*c0909341SAndroid Build Coastguard Worker sub srcq, 4 3196*c0909341SAndroid Build Coastguard Worker psraw m0, 4 3197*c0909341SAndroid Build Coastguard Worker psraw m1, 8 3198*c0909341SAndroid Build Coastguard Worker test dword r7m, 0x800 3199*c0909341SAndroid Build Coastguard Worker jz .hv_w8_10bpc 3200*c0909341SAndroid Build Coastguard Worker psraw m0, 2 3201*c0909341SAndroid Build Coastguard Worker.hv_w8_10bpc: 3202*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_32 3203*c0909341SAndroid Build Coastguard Worker%assign regs_used 1 3204*c0909341SAndroid Build Coastguard Worker ALLOC_STACK -16*9 3205*c0909341SAndroid Build Coastguard Worker%assign regs_used 7 3206*c0909341SAndroid Build Coastguard Worker mov tmpq, r0mp 3207*c0909341SAndroid Build Coastguard Worker mova [rsp+16*7], m4 3208*c0909341SAndroid Build Coastguard Worker%else 3209*c0909341SAndroid Build Coastguard Worker%if WIN64 3210*c0909341SAndroid Build Coastguard Worker PUSH r8 3211*c0909341SAndroid Build Coastguard Worker%assign regs_used 9 3212*c0909341SAndroid Build Coastguard Worker%endif 3213*c0909341SAndroid Build Coastguard Worker ALLOC_STACK 16*6, 16 3214*c0909341SAndroid Build Coastguard Worker%endif 3215*c0909341SAndroid Build Coastguard Worker pshufd m2, m0, q0000 3216*c0909341SAndroid Build Coastguard Worker mova [rsp+16*0], m2 3217*c0909341SAndroid Build Coastguard Worker pshufd m2, m0, q1111 3218*c0909341SAndroid Build Coastguard Worker mova [rsp+16*1], m2 3219*c0909341SAndroid Build Coastguard Worker pshufd m0, m0, q2222 3220*c0909341SAndroid Build Coastguard Worker mova [rsp+16*2], m0 3221*c0909341SAndroid Build Coastguard Worker pshufd m2, m1, q0000 3222*c0909341SAndroid Build Coastguard Worker mova [rsp+16*3], m2 3223*c0909341SAndroid Build Coastguard Worker pshufd m2, m1, q1111 3224*c0909341SAndroid Build Coastguard Worker mova [rsp+16*4], m2 3225*c0909341SAndroid Build Coastguard Worker pshufd m1, m1, q2222 3226*c0909341SAndroid Build Coastguard Worker mova [rsp+16*5], m1 3227*c0909341SAndroid Build Coastguard Worker mov r6, ssq 3228*c0909341SAndroid Build Coastguard Worker neg r6 3229*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_32 3230*c0909341SAndroid Build Coastguard Worker mov r5d, wd 3231*c0909341SAndroid Build Coastguard Worker shl r5d, 14 3232*c0909341SAndroid Build Coastguard Worker lea r5d, [r5+hq-(1<<16)] 3233*c0909341SAndroid Build Coastguard Worker%if STACK_ALIGNMENT < 16 3234*c0909341SAndroid Build Coastguard Worker %define srcmp [esp+16*8+4*0] 3235*c0909341SAndroid Build Coastguard Worker %define tmpmp [esp+16*8+4*1] 3236*c0909341SAndroid Build Coastguard Worker%endif 3237*c0909341SAndroid Build Coastguard Worker.hv_w8_loop0: 3238*c0909341SAndroid Build Coastguard Worker mov srcmp, srcq 3239*c0909341SAndroid Build Coastguard Worker mov tmpmp, tmpq 3240*c0909341SAndroid Build Coastguard Worker movu m5, [srcq+r6*2+0] 3241*c0909341SAndroid Build Coastguard Worker movu m6, [srcq+r6*2+2] 3242*c0909341SAndroid Build Coastguard Worker mova m7, [rsp+16*0] 3243*c0909341SAndroid Build Coastguard Worker mova m1, [rsp+16*1] 3244*c0909341SAndroid Build Coastguard Worker mova m0, [rsp+16*2] 3245*c0909341SAndroid Build Coastguard Worker HV_H_6TAP m2, m5, m6, m7, m1, m0 3246*c0909341SAndroid Build Coastguard Worker movu m5, [srcq+r6*1+0] 3247*c0909341SAndroid Build Coastguard Worker movu m6, [srcq+r6*1+2] 3248*c0909341SAndroid Build Coastguard Worker HV_H_6TAP m3, m5, m6, m7, m1, m0 3249*c0909341SAndroid Build Coastguard Worker movu m5, [srcq+ssq*0+0] 3250*c0909341SAndroid Build Coastguard Worker movu m6, [srcq+ssq*0+2] 3251*c0909341SAndroid Build Coastguard Worker HV_H_6TAP m4, m5, m6, m7, m1, m0 3252*c0909341SAndroid Build Coastguard Worker movu m5, [srcq+ssq*1+0] 3253*c0909341SAndroid Build Coastguard Worker movu m6, [srcq+ssq*1+2] 3254*c0909341SAndroid Build Coastguard Worker lea srcq, [srcq+ssq*2] 3255*c0909341SAndroid Build Coastguard Worker HV_H_6TAP m0, m5, m6, m7, m1 3256*c0909341SAndroid Build Coastguard Worker movu m5, [srcq+ssq*0+0] 3257*c0909341SAndroid Build Coastguard Worker movu m6, [srcq+ssq*0+2] 3258*c0909341SAndroid Build Coastguard Worker HV_H_6TAP m1, m5, m6, m7 3259*c0909341SAndroid Build Coastguard Worker mova m5, [rsp+16*7] 3260*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m5}, m2, m3, m4, m0, m1 3261*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 6 }, m2, m4, m3, m0, m1 3262*c0909341SAndroid Build Coastguard Worker packssdw m2, m4 ; 0 2 3263*c0909341SAndroid Build Coastguard Worker packssdw m3, m0 ; 1 3 3264*c0909341SAndroid Build Coastguard Worker packssdw m4, m1 ; 2 4 3265*c0909341SAndroid Build Coastguard Worker punpcklwd m0, m2, m3 ; 01 3266*c0909341SAndroid Build Coastguard Worker punpckhwd m2, m3 ; 23 3267*c0909341SAndroid Build Coastguard Worker punpcklwd m1, m3, m4 ; 12 3268*c0909341SAndroid Build Coastguard Worker punpckhwd m3, m4 ; 34 3269*c0909341SAndroid Build Coastguard Worker.hv_w8_loop: 3270*c0909341SAndroid Build Coastguard Worker mova m5, [rsp+16*3] 3271*c0909341SAndroid Build Coastguard Worker mova m6, [rsp+16*4] 3272*c0909341SAndroid Build Coastguard Worker pmaddwd m4, m0, m5 ; a0 3273*c0909341SAndroid Build Coastguard Worker pmaddwd m5, m1 ; b0 3274*c0909341SAndroid Build Coastguard Worker mova m0, m2 3275*c0909341SAndroid Build Coastguard Worker pmaddwd m2, m6 ; a1 3276*c0909341SAndroid Build Coastguard Worker mova m1, m3 3277*c0909341SAndroid Build Coastguard Worker pmaddwd m3, m6 ; b1 3278*c0909341SAndroid Build Coastguard Worker paddd m4, m2 3279*c0909341SAndroid Build Coastguard Worker movu m2, [srcq+ssq*1+0] 3280*c0909341SAndroid Build Coastguard Worker paddd m5, m3 3281*c0909341SAndroid Build Coastguard Worker movu m3, [srcq+ssq*1+2] 3282*c0909341SAndroid Build Coastguard Worker lea srcq, [srcq+ssq*2] 3283*c0909341SAndroid Build Coastguard Worker HV_H_6TAP m6, m2, m3 3284*c0909341SAndroid Build Coastguard Worker movu m2, [srcq+ssq*0+0] 3285*c0909341SAndroid Build Coastguard Worker movu m3, [srcq+ssq*0+2] 3286*c0909341SAndroid Build Coastguard Worker HV_H_6TAP m7, m2, m3 3287*c0909341SAndroid Build Coastguard Worker mova m2, [rsp+16*7] 3288*c0909341SAndroid Build Coastguard Worker psrad m3, m1, 16 3289*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m2}, m6, m7, m4, m5 3290*c0909341SAndroid Build Coastguard Worker psrad m6, 6 3291*c0909341SAndroid Build Coastguard Worker psrad m7, 6 3292*c0909341SAndroid Build Coastguard Worker packssdw m3, m6 ; 4 5 3293*c0909341SAndroid Build Coastguard Worker packssdw m6, m7 ; 5 6 3294*c0909341SAndroid Build Coastguard Worker mova m7, [rsp+16*5] 3295*c0909341SAndroid Build Coastguard Worker punpcklwd m2, m3, m6 ; 45 3296*c0909341SAndroid Build Coastguard Worker punpckhwd m3, m6 ; 56 3297*c0909341SAndroid Build Coastguard Worker pmaddwd m6, m2, m7 ; a2 3298*c0909341SAndroid Build Coastguard Worker pmaddwd m7, m3 ; b2 3299*c0909341SAndroid Build Coastguard Worker paddd m4, m6 3300*c0909341SAndroid Build Coastguard Worker paddd m5, m7 3301*c0909341SAndroid Build Coastguard Worker psrad m4, 6 3302*c0909341SAndroid Build Coastguard Worker psrad m5, 6 3303*c0909341SAndroid Build Coastguard Worker packssdw m4, m5 3304*c0909341SAndroid Build Coastguard Worker movq [tmpq+wq*0], m4 3305*c0909341SAndroid Build Coastguard Worker movhps [tmpq+wq*2], m4 3306*c0909341SAndroid Build Coastguard Worker lea tmpq, [tmpq+wq*4] 3307*c0909341SAndroid Build Coastguard Worker sub hd, 2 3308*c0909341SAndroid Build Coastguard Worker jg .hv_w8_loop 3309*c0909341SAndroid Build Coastguard Worker mov srcq, srcmp 3310*c0909341SAndroid Build Coastguard Worker mov tmpq, tmpmp 3311*c0909341SAndroid Build Coastguard Worker movzx hd, r5w 3312*c0909341SAndroid Build Coastguard Worker add srcq, 8 3313*c0909341SAndroid Build Coastguard Worker add tmpq, 8 3314*c0909341SAndroid Build Coastguard Worker sub r5d, 1<<16 3315*c0909341SAndroid Build Coastguard Worker%else 3316*c0909341SAndroid Build Coastguard Worker lea r8d, [wq*4-(1<<5)] 3317*c0909341SAndroid Build Coastguard Worker lea r8d, [hq+r8*8] 3318*c0909341SAndroid Build Coastguard Worker.hv_w8_loop0: 3319*c0909341SAndroid Build Coastguard Worker mova m5, [spel_h_shufA] 3320*c0909341SAndroid Build Coastguard Worker movu m0, [srcq+r6*2+ 0] 3321*c0909341SAndroid Build Coastguard Worker mova m6, [rsp+16*0] 3322*c0909341SAndroid Build Coastguard Worker movu m1, [srcq+r6*2+ 8] 3323*c0909341SAndroid Build Coastguard Worker mova m7, [rsp+16*1] 3324*c0909341SAndroid Build Coastguard Worker movu m2, [srcq+r6*2+16] 3325*c0909341SAndroid Build Coastguard Worker mova m8, [rsp+16*2] 3326*c0909341SAndroid Build Coastguard Worker HV_H_6TAP m9, m0, m1, m2, 6, m5, m6, m7, m8 3327*c0909341SAndroid Build Coastguard Worker movu m0, [srcq+r6*1+ 0] 3328*c0909341SAndroid Build Coastguard Worker movu m1, [srcq+r6*1+ 8] 3329*c0909341SAndroid Build Coastguard Worker movu m2, [srcq+r6*1+16] 3330*c0909341SAndroid Build Coastguard Worker lea r5, [srcq+ssq*2] 3331*c0909341SAndroid Build Coastguard Worker HV_H_6TAP m11, m0, m1, m2, 6, m5, m6, m7, m8 3332*c0909341SAndroid Build Coastguard Worker movu m0, [srcq+ssq*0+ 0] 3333*c0909341SAndroid Build Coastguard Worker movu m1, [srcq+ssq*0+ 8] 3334*c0909341SAndroid Build Coastguard Worker movu m2, [srcq+ssq*0+16] 3335*c0909341SAndroid Build Coastguard Worker mov r7, tmpq 3336*c0909341SAndroid Build Coastguard Worker HV_H_6TAP m13, m0, m1, m2, 6, m5, m6, m7, m8 3337*c0909341SAndroid Build Coastguard Worker movu m0, [srcq+ssq*1+ 0] 3338*c0909341SAndroid Build Coastguard Worker movu m1, [srcq+ssq*1+ 8] 3339*c0909341SAndroid Build Coastguard Worker movu m2, [srcq+ssq*1+16] 3340*c0909341SAndroid Build Coastguard Worker HV_H_6TAP m15, m0, m1, m2, 6, m5, m6, m7, m8 3341*c0909341SAndroid Build Coastguard Worker movu m0, [r5+ssq*0+ 0] 3342*c0909341SAndroid Build Coastguard Worker movu m1, [r5+ssq*0+ 8] 3343*c0909341SAndroid Build Coastguard Worker movu m2, [r5+ssq*0+16] 3344*c0909341SAndroid Build Coastguard Worker HV_H_6TAP m5, m0, m1, m2, 6, m5, m6, m7, m8 3345*c0909341SAndroid Build Coastguard Worker punpcklwd m8, m9, m11 ; 01 3346*c0909341SAndroid Build Coastguard Worker punpckhwd m9, m11 3347*c0909341SAndroid Build Coastguard Worker punpcklwd m10, m11, m13 ; 12 3348*c0909341SAndroid Build Coastguard Worker punpckhwd m11, m13 3349*c0909341SAndroid Build Coastguard Worker punpcklwd m12, m13, m15 ; 23 3350*c0909341SAndroid Build Coastguard Worker punpckhwd m13, m15 3351*c0909341SAndroid Build Coastguard Worker punpcklwd m14, m15, m5 ; 34 3352*c0909341SAndroid Build Coastguard Worker punpckhwd m15, m5 3353*c0909341SAndroid Build Coastguard Worker.hv_w8_loop: 3354*c0909341SAndroid Build Coastguard Worker mova m3, [rsp+16*3] 3355*c0909341SAndroid Build Coastguard Worker mova m7, [rsp+16*4] 3356*c0909341SAndroid Build Coastguard Worker pmaddwd m0, m8, m3 ; a0 3357*c0909341SAndroid Build Coastguard Worker mova m8, m12 3358*c0909341SAndroid Build Coastguard Worker pmaddwd m2, m9, m3 ; a0' 3359*c0909341SAndroid Build Coastguard Worker mova m9, m13 3360*c0909341SAndroid Build Coastguard Worker pmaddwd m1, m10, m3 ; b0 3361*c0909341SAndroid Build Coastguard Worker mova m10, m14 3362*c0909341SAndroid Build Coastguard Worker pmaddwd m3, m11 ; b0' 3363*c0909341SAndroid Build Coastguard Worker mova m11, m15 3364*c0909341SAndroid Build Coastguard Worker REPX {pmaddwd x, m7}, m12, m13, m14, m15 3365*c0909341SAndroid Build Coastguard Worker movu m6, [r5+ssq*1+ 0] 3366*c0909341SAndroid Build Coastguard Worker paddd m0, m12 3367*c0909341SAndroid Build Coastguard Worker movu m7, [r5+ssq*1+ 8] 3368*c0909341SAndroid Build Coastguard Worker paddd m2, m13 3369*c0909341SAndroid Build Coastguard Worker movu m12, [r5+ssq*1+16] 3370*c0909341SAndroid Build Coastguard Worker paddd m1, m14 3371*c0909341SAndroid Build Coastguard Worker lea r5, [r5+ssq*2] 3372*c0909341SAndroid Build Coastguard Worker paddd m3, m15 3373*c0909341SAndroid Build Coastguard Worker HV_H_6TAP m15, m6, m7, m12, 6 3374*c0909341SAndroid Build Coastguard Worker movu m6, [r5+ssq*0+ 0] 3375*c0909341SAndroid Build Coastguard Worker movu m7, [r5+ssq*0+ 8] 3376*c0909341SAndroid Build Coastguard Worker movu m14, [r5+ssq*0+16] 3377*c0909341SAndroid Build Coastguard Worker punpcklwd m12, m5, m15 ; 45 3378*c0909341SAndroid Build Coastguard Worker punpckhwd m13, m5, m15 3379*c0909341SAndroid Build Coastguard Worker HV_H_6TAP m5, m6, m7, m14, 6 3380*c0909341SAndroid Build Coastguard Worker mova m7, [rsp+16*5] 3381*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m4}, m0, m2, m1, m3 3382*c0909341SAndroid Build Coastguard Worker punpcklwd m14, m15, m5 ; 56 3383*c0909341SAndroid Build Coastguard Worker punpckhwd m15, m5 3384*c0909341SAndroid Build Coastguard Worker pmaddwd m6, m12, m7 ; a2 3385*c0909341SAndroid Build Coastguard Worker paddd m0, m6 3386*c0909341SAndroid Build Coastguard Worker pmaddwd m6, m13, m7 ; a2' 3387*c0909341SAndroid Build Coastguard Worker paddd m2, m6 3388*c0909341SAndroid Build Coastguard Worker pmaddwd m6, m14, m7 ; b2 3389*c0909341SAndroid Build Coastguard Worker pmaddwd m7, m15 ; b2' 3390*c0909341SAndroid Build Coastguard Worker paddd m1, m6 3391*c0909341SAndroid Build Coastguard Worker paddd m3, m7 3392*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 6}, m0, m2, m1, m3 3393*c0909341SAndroid Build Coastguard Worker packssdw m0, m2 3394*c0909341SAndroid Build Coastguard Worker packssdw m1, m3 3395*c0909341SAndroid Build Coastguard Worker mova [r7+wq*0], m0 3396*c0909341SAndroid Build Coastguard Worker mova [r7+wq*2], m1 3397*c0909341SAndroid Build Coastguard Worker lea r7, [r7+wq*4] 3398*c0909341SAndroid Build Coastguard Worker sub hd, 2 3399*c0909341SAndroid Build Coastguard Worker jg .hv_w8_loop 3400*c0909341SAndroid Build Coastguard Worker add srcq, 16 3401*c0909341SAndroid Build Coastguard Worker add tmpq, 16 3402*c0909341SAndroid Build Coastguard Worker movzx hd, r8b 3403*c0909341SAndroid Build Coastguard Worker sub r8d, 1<<8 3404*c0909341SAndroid Build Coastguard Worker%endif 3405*c0909341SAndroid Build Coastguard Worker jg .hv_w8_loop0 3406*c0909341SAndroid Build Coastguard Worker RET 3407*c0909341SAndroid Build Coastguard Worker 3408*c0909341SAndroid Build Coastguard WorkerPREP_8TAP_FN smooth_sharp, SMOOTH, SHARP, prep_8tap_16bpc 3409*c0909341SAndroid Build Coastguard WorkerPREP_8TAP_FN sharp_smooth, SHARP, SMOOTH, prep_8tap_16bpc 3410*c0909341SAndroid Build Coastguard WorkerPREP_8TAP_FN regular_sharp, REGULAR, SHARP, prep_8tap_16bpc 3411*c0909341SAndroid Build Coastguard WorkerPREP_8TAP_FN sharp_regular, SHARP, REGULAR, prep_8tap_16bpc 3412*c0909341SAndroid Build Coastguard WorkerPREP_8TAP_FN sharp, SHARP, SHARP 3413*c0909341SAndroid Build Coastguard Worker 3414*c0909341SAndroid Build Coastguard Workercglobal prep_8tap_16bpc, 0, 8, 0, tmp, src, ss, w, h, mx, my 3415*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_32 3416*c0909341SAndroid Build Coastguard Worker %define mxb r0b 3417*c0909341SAndroid Build Coastguard Worker %define mxd r0 3418*c0909341SAndroid Build Coastguard Worker %define mxq r0 3419*c0909341SAndroid Build Coastguard Worker %define myb r2b 3420*c0909341SAndroid Build Coastguard Worker %define myd r2 3421*c0909341SAndroid Build Coastguard Worker %define myq r2 3422*c0909341SAndroid Build Coastguard Worker %define m8 [esp+16*0] 3423*c0909341SAndroid Build Coastguard Worker %define m9 [esp+16*1] 3424*c0909341SAndroid Build Coastguard Worker %define m10 [esp+16*2] 3425*c0909341SAndroid Build Coastguard Worker %define m11 [esp+16*3] 3426*c0909341SAndroid Build Coastguard Worker %define m12 [esp+16*4] 3427*c0909341SAndroid Build Coastguard Worker %define m13 [esp+16*5] 3428*c0909341SAndroid Build Coastguard Worker %define m14 [esp+16*6] 3429*c0909341SAndroid Build Coastguard Worker %define m15 [esp+16*7] 3430*c0909341SAndroid Build Coastguard Worker%endif 3431*c0909341SAndroid Build Coastguard Worker imul mxd, mxm, 0x010101 3432*c0909341SAndroid Build Coastguard Worker add mxd, t0d ; 8tap_h, mx, 4tap_h 3433*c0909341SAndroid Build Coastguard Worker imul myd, mym, 0x010101 3434*c0909341SAndroid Build Coastguard Worker add myd, t1d ; 8tap_v, my, 4tap_v 3435*c0909341SAndroid Build Coastguard Worker LEA t2, prep_ssse3 3436*c0909341SAndroid Build Coastguard Worker movifnidn wd, wm 3437*c0909341SAndroid Build Coastguard Worker movifnidn srcq, srcmp 3438*c0909341SAndroid Build Coastguard Worker test mxd, 0xf00 3439*c0909341SAndroid Build Coastguard Worker jnz .h 3440*c0909341SAndroid Build Coastguard Worker movifnidn hd, hm 3441*c0909341SAndroid Build Coastguard Worker test myd, 0xf00 3442*c0909341SAndroid Build Coastguard Worker jz mangle(private_prefix %+ _prep_6tap_16bpc_ssse3).prep 3443*c0909341SAndroid Build Coastguard Worker.v: 3444*c0909341SAndroid Build Coastguard Worker movzx mxd, myb 3445*c0909341SAndroid Build Coastguard Worker shr myd, 16 3446*c0909341SAndroid Build Coastguard Worker cmp hd, 4 3447*c0909341SAndroid Build Coastguard Worker cmove myd, mxd 3448*c0909341SAndroid Build Coastguard Worker movq m3, [base+subpel_filters+myq*8] 3449*c0909341SAndroid Build Coastguard Worker WIN64_SPILL_XMM 15 3450*c0909341SAndroid Build Coastguard Worker movddup m7, [base+prep_8tap_1d_rnd] 3451*c0909341SAndroid Build Coastguard Worker movifnidn ssq, r2mp 3452*c0909341SAndroid Build Coastguard Worker movifnidn tmpq, r0mp 3453*c0909341SAndroid Build Coastguard Worker punpcklbw m3, m3 3454*c0909341SAndroid Build Coastguard Worker psraw m3, 8 ; sign-extend 3455*c0909341SAndroid Build Coastguard Worker test dword r7m, 0x800 3456*c0909341SAndroid Build Coastguard Worker jnz .v_12bpc 3457*c0909341SAndroid Build Coastguard Worker psllw m3, 2 3458*c0909341SAndroid Build Coastguard Worker.v_12bpc: 3459*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_32 3460*c0909341SAndroid Build Coastguard Worker ALLOC_STACK -16*7 3461*c0909341SAndroid Build Coastguard Worker pshufd m0, m3, q0000 3462*c0909341SAndroid Build Coastguard Worker pshufd m1, m3, q1111 3463*c0909341SAndroid Build Coastguard Worker pshufd m2, m3, q2222 3464*c0909341SAndroid Build Coastguard Worker pshufd m3, m3, q3333 3465*c0909341SAndroid Build Coastguard Worker mova m8, m0 3466*c0909341SAndroid Build Coastguard Worker mova m9, m1 3467*c0909341SAndroid Build Coastguard Worker mova m10, m2 3468*c0909341SAndroid Build Coastguard Worker mova m11, m3 3469*c0909341SAndroid Build Coastguard Worker%else 3470*c0909341SAndroid Build Coastguard Worker pshufd m8, m3, q0000 3471*c0909341SAndroid Build Coastguard Worker pshufd m9, m3, q1111 3472*c0909341SAndroid Build Coastguard Worker pshufd m10, m3, q2222 3473*c0909341SAndroid Build Coastguard Worker pshufd m11, m3, q3333 3474*c0909341SAndroid Build Coastguard Worker%endif 3475*c0909341SAndroid Build Coastguard Worker lea r6, [ssq*3] 3476*c0909341SAndroid Build Coastguard Worker sub srcq, r6 3477*c0909341SAndroid Build Coastguard Worker mov r6d, wd 3478*c0909341SAndroid Build Coastguard Worker shl wd, 6 3479*c0909341SAndroid Build Coastguard Worker mov r5, srcq 3480*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 3481*c0909341SAndroid Build Coastguard Worker mov r7, tmpq 3482*c0909341SAndroid Build Coastguard Worker%elif STACK_ALIGNMENT < 16 3483*c0909341SAndroid Build Coastguard Worker mov [esp+4*29], tmpq 3484*c0909341SAndroid Build Coastguard Worker%endif 3485*c0909341SAndroid Build Coastguard Worker lea wd, [wq+hq-(1<<8)] 3486*c0909341SAndroid Build Coastguard Worker.v_loop0: 3487*c0909341SAndroid Build Coastguard Worker movq m1, [srcq+ssq*0] 3488*c0909341SAndroid Build Coastguard Worker movq m2, [srcq+ssq*1] 3489*c0909341SAndroid Build Coastguard Worker lea srcq, [srcq+ssq*2] 3490*c0909341SAndroid Build Coastguard Worker movq m3, [srcq+ssq*0] 3491*c0909341SAndroid Build Coastguard Worker movq m4, [srcq+ssq*1] 3492*c0909341SAndroid Build Coastguard Worker lea srcq, [srcq+ssq*2] 3493*c0909341SAndroid Build Coastguard Worker movq m5, [srcq+ssq*0] 3494*c0909341SAndroid Build Coastguard Worker movq m6, [srcq+ssq*1] 3495*c0909341SAndroid Build Coastguard Worker lea srcq, [srcq+ssq*2] 3496*c0909341SAndroid Build Coastguard Worker movq m0, [srcq+ssq*0] 3497*c0909341SAndroid Build Coastguard Worker punpcklwd m1, m2 ; 01 3498*c0909341SAndroid Build Coastguard Worker punpcklwd m2, m3 ; 12 3499*c0909341SAndroid Build Coastguard Worker punpcklwd m3, m4 ; 23 3500*c0909341SAndroid Build Coastguard Worker punpcklwd m4, m5 ; 34 3501*c0909341SAndroid Build Coastguard Worker punpcklwd m5, m6 ; 45 3502*c0909341SAndroid Build Coastguard Worker punpcklwd m6, m0 ; 56 3503*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_32 3504*c0909341SAndroid Build Coastguard Worker jmp .v_loop_start 3505*c0909341SAndroid Build Coastguard Worker.v_loop: 3506*c0909341SAndroid Build Coastguard Worker mova m1, m12 3507*c0909341SAndroid Build Coastguard Worker mova m2, m13 3508*c0909341SAndroid Build Coastguard Worker mova m3, m14 3509*c0909341SAndroid Build Coastguard Worker.v_loop_start: 3510*c0909341SAndroid Build Coastguard Worker pmaddwd m1, m8 ; a0 3511*c0909341SAndroid Build Coastguard Worker pmaddwd m2, m8 ; b0 3512*c0909341SAndroid Build Coastguard Worker mova m12, m3 3513*c0909341SAndroid Build Coastguard Worker mova m13, m4 3514*c0909341SAndroid Build Coastguard Worker pmaddwd m3, m9 ; a1 3515*c0909341SAndroid Build Coastguard Worker pmaddwd m4, m9 ; b1 3516*c0909341SAndroid Build Coastguard Worker paddd m1, m3 3517*c0909341SAndroid Build Coastguard Worker paddd m2, m4 3518*c0909341SAndroid Build Coastguard Worker mova m14, m5 3519*c0909341SAndroid Build Coastguard Worker mova m4, m6 3520*c0909341SAndroid Build Coastguard Worker pmaddwd m5, m10 ; a2 3521*c0909341SAndroid Build Coastguard Worker pmaddwd m6, m10 ; b2 3522*c0909341SAndroid Build Coastguard Worker paddd m1, m5 3523*c0909341SAndroid Build Coastguard Worker paddd m2, m6 3524*c0909341SAndroid Build Coastguard Worker movq m6, [srcq+ssq*1] 3525*c0909341SAndroid Build Coastguard Worker lea srcq, [srcq+ssq*2] 3526*c0909341SAndroid Build Coastguard Worker punpcklwd m5, m0, m6 ; 67 3527*c0909341SAndroid Build Coastguard Worker movq m0, [srcq+ssq*0] 3528*c0909341SAndroid Build Coastguard Worker pmaddwd m3, m11, m5 ; a3 3529*c0909341SAndroid Build Coastguard Worker punpcklwd m6, m0 ; 78 3530*c0909341SAndroid Build Coastguard Worker paddd m1, m7 3531*c0909341SAndroid Build Coastguard Worker paddd m1, m3 3532*c0909341SAndroid Build Coastguard Worker pmaddwd m3, m11, m6 ; b3 3533*c0909341SAndroid Build Coastguard Worker paddd m2, m7 3534*c0909341SAndroid Build Coastguard Worker paddd m2, m3 3535*c0909341SAndroid Build Coastguard Worker psrad m1, 4 3536*c0909341SAndroid Build Coastguard Worker psrad m2, 4 3537*c0909341SAndroid Build Coastguard Worker packssdw m1, m2 3538*c0909341SAndroid Build Coastguard Worker movq [tmpq+r6*0], m1 3539*c0909341SAndroid Build Coastguard Worker movhps [tmpq+r6*2], m1 3540*c0909341SAndroid Build Coastguard Worker lea tmpq, [tmpq+r6*4] 3541*c0909341SAndroid Build Coastguard Worker sub hd, 2 3542*c0909341SAndroid Build Coastguard Worker jg .v_loop 3543*c0909341SAndroid Build Coastguard Worker%if STACK_ALIGNMENT < 16 3544*c0909341SAndroid Build Coastguard Worker mov tmpq, [esp+4*29] 3545*c0909341SAndroid Build Coastguard Worker add r5, 8 3546*c0909341SAndroid Build Coastguard Worker add tmpq, 8 3547*c0909341SAndroid Build Coastguard Worker mov srcq, r5 3548*c0909341SAndroid Build Coastguard Worker mov [esp+4*29], tmpq 3549*c0909341SAndroid Build Coastguard Worker%else 3550*c0909341SAndroid Build Coastguard Worker mov tmpq, tmpmp 3551*c0909341SAndroid Build Coastguard Worker add r5, 8 3552*c0909341SAndroid Build Coastguard Worker add tmpq, 8 3553*c0909341SAndroid Build Coastguard Worker mov srcq, r5 3554*c0909341SAndroid Build Coastguard Worker mov tmpmp, tmpq 3555*c0909341SAndroid Build Coastguard Worker%endif 3556*c0909341SAndroid Build Coastguard Worker%else 3557*c0909341SAndroid Build Coastguard Worker.v_loop: 3558*c0909341SAndroid Build Coastguard Worker pmaddwd m12, m8, m1 ; a0 3559*c0909341SAndroid Build Coastguard Worker pmaddwd m13, m8, m2 ; b0 3560*c0909341SAndroid Build Coastguard Worker mova m1, m3 3561*c0909341SAndroid Build Coastguard Worker mova m2, m4 3562*c0909341SAndroid Build Coastguard Worker pmaddwd m3, m9 ; a1 3563*c0909341SAndroid Build Coastguard Worker pmaddwd m4, m9 ; b1 3564*c0909341SAndroid Build Coastguard Worker paddd m12, m3 3565*c0909341SAndroid Build Coastguard Worker paddd m13, m4 3566*c0909341SAndroid Build Coastguard Worker mova m3, m5 3567*c0909341SAndroid Build Coastguard Worker mova m4, m6 3568*c0909341SAndroid Build Coastguard Worker pmaddwd m5, m10 ; a2 3569*c0909341SAndroid Build Coastguard Worker pmaddwd m6, m10 ; b2 3570*c0909341SAndroid Build Coastguard Worker paddd m12, m5 3571*c0909341SAndroid Build Coastguard Worker paddd m13, m6 3572*c0909341SAndroid Build Coastguard Worker movq m6, [srcq+ssq*1] 3573*c0909341SAndroid Build Coastguard Worker lea srcq, [srcq+ssq*2] 3574*c0909341SAndroid Build Coastguard Worker punpcklwd m5, m0, m6 ; 67 3575*c0909341SAndroid Build Coastguard Worker movq m0, [srcq+ssq*0] 3576*c0909341SAndroid Build Coastguard Worker pmaddwd m14, m11, m5 ; a3 3577*c0909341SAndroid Build Coastguard Worker punpcklwd m6, m0 ; 78 3578*c0909341SAndroid Build Coastguard Worker paddd m12, m7 3579*c0909341SAndroid Build Coastguard Worker paddd m12, m14 3580*c0909341SAndroid Build Coastguard Worker pmaddwd m14, m11, m6 ; b3 3581*c0909341SAndroid Build Coastguard Worker paddd m13, m7 3582*c0909341SAndroid Build Coastguard Worker paddd m13, m14 3583*c0909341SAndroid Build Coastguard Worker psrad m12, 4 3584*c0909341SAndroid Build Coastguard Worker psrad m13, 4 3585*c0909341SAndroid Build Coastguard Worker packssdw m12, m13 3586*c0909341SAndroid Build Coastguard Worker movq [tmpq+r6*0], m12 3587*c0909341SAndroid Build Coastguard Worker movhps [tmpq+r6*2], m12 3588*c0909341SAndroid Build Coastguard Worker lea tmpq, [tmpq+r6*4] 3589*c0909341SAndroid Build Coastguard Worker sub hd, 2 3590*c0909341SAndroid Build Coastguard Worker jg .v_loop 3591*c0909341SAndroid Build Coastguard Worker add r5, 8 3592*c0909341SAndroid Build Coastguard Worker add r7, 8 3593*c0909341SAndroid Build Coastguard Worker mov srcq, r5 3594*c0909341SAndroid Build Coastguard Worker mov tmpq, r7 3595*c0909341SAndroid Build Coastguard Worker%endif 3596*c0909341SAndroid Build Coastguard Worker movzx hd, wb 3597*c0909341SAndroid Build Coastguard Worker sub wd, 1<<8 3598*c0909341SAndroid Build Coastguard Worker jg .v_loop0 3599*c0909341SAndroid Build Coastguard Worker RET 3600*c0909341SAndroid Build Coastguard Worker.h: 3601*c0909341SAndroid Build Coastguard Worker RESET_STACK_STATE 3602*c0909341SAndroid Build Coastguard Worker test myd, 0xf00 3603*c0909341SAndroid Build Coastguard Worker jnz .hv 3604*c0909341SAndroid Build Coastguard Worker movifnidn ssq, r2mp 3605*c0909341SAndroid Build Coastguard Worker movifnidn hd, r4m 3606*c0909341SAndroid Build Coastguard Worker movddup m5, [base+prep_8tap_1d_rnd] 3607*c0909341SAndroid Build Coastguard Worker cmp wd, 4 3608*c0909341SAndroid Build Coastguard Worker jne .h_w8 3609*c0909341SAndroid Build Coastguard Worker.h_w4: 3610*c0909341SAndroid Build Coastguard Worker movzx mxd, mxb 3611*c0909341SAndroid Build Coastguard Worker movq m0, [base+subpel_filters+mxq*8] 3612*c0909341SAndroid Build Coastguard Worker mova m3, [base+spel_h_shufA] 3613*c0909341SAndroid Build Coastguard Worker mova m4, [base+spel_h_shufB] 3614*c0909341SAndroid Build Coastguard Worker movifnidn tmpq, tmpmp 3615*c0909341SAndroid Build Coastguard Worker sub srcq, 2 3616*c0909341SAndroid Build Coastguard Worker WIN64_SPILL_XMM 8 3617*c0909341SAndroid Build Coastguard Worker punpcklbw m0, m0 3618*c0909341SAndroid Build Coastguard Worker psraw m0, 8 3619*c0909341SAndroid Build Coastguard Worker test dword r7m, 0x800 3620*c0909341SAndroid Build Coastguard Worker jnz .h_w4_12bpc 3621*c0909341SAndroid Build Coastguard Worker psllw m0, 2 3622*c0909341SAndroid Build Coastguard Worker.h_w4_12bpc: 3623*c0909341SAndroid Build Coastguard Worker pshufd m6, m0, q1111 3624*c0909341SAndroid Build Coastguard Worker pshufd m7, m0, q2222 3625*c0909341SAndroid Build Coastguard Worker.h_w4_loop: 3626*c0909341SAndroid Build Coastguard Worker movu m1, [srcq+ssq*0] 3627*c0909341SAndroid Build Coastguard Worker movu m2, [srcq+ssq*1] 3628*c0909341SAndroid Build Coastguard Worker lea srcq, [srcq+ssq*2] 3629*c0909341SAndroid Build Coastguard Worker pshufb m0, m1, m3 ; 0 1 1 2 2 3 3 4 3630*c0909341SAndroid Build Coastguard Worker pshufb m1, m4 ; 2 3 3 4 4 5 5 6 3631*c0909341SAndroid Build Coastguard Worker pmaddwd m0, m6 3632*c0909341SAndroid Build Coastguard Worker pmaddwd m1, m7 3633*c0909341SAndroid Build Coastguard Worker paddd m0, m5 3634*c0909341SAndroid Build Coastguard Worker paddd m0, m1 3635*c0909341SAndroid Build Coastguard Worker pshufb m1, m2, m3 3636*c0909341SAndroid Build Coastguard Worker pshufb m2, m4 3637*c0909341SAndroid Build Coastguard Worker pmaddwd m1, m6 3638*c0909341SAndroid Build Coastguard Worker pmaddwd m2, m7 3639*c0909341SAndroid Build Coastguard Worker paddd m1, m5 3640*c0909341SAndroid Build Coastguard Worker paddd m1, m2 3641*c0909341SAndroid Build Coastguard Worker psrad m0, 4 3642*c0909341SAndroid Build Coastguard Worker psrad m1, 4 3643*c0909341SAndroid Build Coastguard Worker packssdw m0, m1 3644*c0909341SAndroid Build Coastguard Worker mova [tmpq], m0 3645*c0909341SAndroid Build Coastguard Worker add tmpq, 16 3646*c0909341SAndroid Build Coastguard Worker sub hd, 2 3647*c0909341SAndroid Build Coastguard Worker jg .h_w4_loop 3648*c0909341SAndroid Build Coastguard Worker RET 3649*c0909341SAndroid Build Coastguard Worker.h_w8: 3650*c0909341SAndroid Build Coastguard Worker WIN64_SPILL_XMM 11 3651*c0909341SAndroid Build Coastguard Worker shr mxd, 16 3652*c0909341SAndroid Build Coastguard Worker movq m2, [base+subpel_filters+mxq*8] 3653*c0909341SAndroid Build Coastguard Worker mova m4, [base+spel_h_shufA] 3654*c0909341SAndroid Build Coastguard Worker mova m6, [base+spel_h_shufB] 3655*c0909341SAndroid Build Coastguard Worker movifnidn tmpq, r0mp 3656*c0909341SAndroid Build Coastguard Worker add wd, wd 3657*c0909341SAndroid Build Coastguard Worker punpcklbw m2, m2 3658*c0909341SAndroid Build Coastguard Worker add srcq, wq 3659*c0909341SAndroid Build Coastguard Worker psraw m2, 8 3660*c0909341SAndroid Build Coastguard Worker add tmpq, wq 3661*c0909341SAndroid Build Coastguard Worker neg wq 3662*c0909341SAndroid Build Coastguard Worker test dword r7m, 0x800 3663*c0909341SAndroid Build Coastguard Worker jnz .h_w8_12bpc 3664*c0909341SAndroid Build Coastguard Worker psllw m2, 2 3665*c0909341SAndroid Build Coastguard Worker.h_w8_12bpc: 3666*c0909341SAndroid Build Coastguard Worker pshufd m7, m2, q0000 3667*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_32 3668*c0909341SAndroid Build Coastguard Worker ALLOC_STACK -16*3 3669*c0909341SAndroid Build Coastguard Worker pshufd m0, m2, q1111 3670*c0909341SAndroid Build Coastguard Worker pshufd m1, m2, q2222 3671*c0909341SAndroid Build Coastguard Worker pshufd m2, m2, q3333 3672*c0909341SAndroid Build Coastguard Worker mova m8, m0 3673*c0909341SAndroid Build Coastguard Worker mova m9, m1 3674*c0909341SAndroid Build Coastguard Worker mova m10, m2 3675*c0909341SAndroid Build Coastguard Worker%else 3676*c0909341SAndroid Build Coastguard Worker pshufd m8, m2, q1111 3677*c0909341SAndroid Build Coastguard Worker pshufd m9, m2, q2222 3678*c0909341SAndroid Build Coastguard Worker pshufd m10, m2, q3333 3679*c0909341SAndroid Build Coastguard Worker%endif 3680*c0909341SAndroid Build Coastguard Worker.h_w8_loop0: 3681*c0909341SAndroid Build Coastguard Worker mov r6, wq 3682*c0909341SAndroid Build Coastguard Worker.h_w8_loop: 3683*c0909341SAndroid Build Coastguard Worker movu m0, [srcq+r6- 6] 3684*c0909341SAndroid Build Coastguard Worker movu m1, [srcq+r6+ 2] 3685*c0909341SAndroid Build Coastguard Worker pshufb m2, m0, m4 ; 0 1 1 2 2 3 3 4 3686*c0909341SAndroid Build Coastguard Worker pshufb m0, m6 ; 2 3 3 4 4 5 5 6 3687*c0909341SAndroid Build Coastguard Worker pmaddwd m2, m7 ; abcd0 3688*c0909341SAndroid Build Coastguard Worker pmaddwd m0, m8 ; abcd1 3689*c0909341SAndroid Build Coastguard Worker pshufb m3, m1, m4 ; 4 5 5 6 6 7 7 8 3690*c0909341SAndroid Build Coastguard Worker pshufb m1, m6 ; 6 7 7 8 8 9 9 a 3691*c0909341SAndroid Build Coastguard Worker paddd m2, m5 3692*c0909341SAndroid Build Coastguard Worker paddd m0, m2 3693*c0909341SAndroid Build Coastguard Worker pmaddwd m2, m9, m3 ; abcd2 3694*c0909341SAndroid Build Coastguard Worker pmaddwd m3, m7 ; efgh0 3695*c0909341SAndroid Build Coastguard Worker paddd m0, m2 3696*c0909341SAndroid Build Coastguard Worker pmaddwd m2, m10, m1 ; abcd3 3697*c0909341SAndroid Build Coastguard Worker pmaddwd m1, m8 ; efgh1 3698*c0909341SAndroid Build Coastguard Worker paddd m0, m2 3699*c0909341SAndroid Build Coastguard Worker movu m2, [srcq+r6+10] 3700*c0909341SAndroid Build Coastguard Worker paddd m3, m5 3701*c0909341SAndroid Build Coastguard Worker paddd m1, m3 3702*c0909341SAndroid Build Coastguard Worker pshufb m3, m2, m4 ; a b b c c d d e 3703*c0909341SAndroid Build Coastguard Worker pshufb m2, m6 ; 8 9 9 a a b b c 3704*c0909341SAndroid Build Coastguard Worker pmaddwd m3, m9 ; efgh2 3705*c0909341SAndroid Build Coastguard Worker pmaddwd m2, m10 ; efgh3 3706*c0909341SAndroid Build Coastguard Worker paddd m1, m3 3707*c0909341SAndroid Build Coastguard Worker paddd m1, m2 3708*c0909341SAndroid Build Coastguard Worker psrad m0, 4 3709*c0909341SAndroid Build Coastguard Worker psrad m1, 4 3710*c0909341SAndroid Build Coastguard Worker packssdw m0, m1 3711*c0909341SAndroid Build Coastguard Worker mova [tmpq+r6], m0 3712*c0909341SAndroid Build Coastguard Worker add r6, 16 3713*c0909341SAndroid Build Coastguard Worker jl .h_w8_loop 3714*c0909341SAndroid Build Coastguard Worker add srcq, ssq 3715*c0909341SAndroid Build Coastguard Worker sub tmpq, wq 3716*c0909341SAndroid Build Coastguard Worker dec hd 3717*c0909341SAndroid Build Coastguard Worker jg .h_w8_loop0 3718*c0909341SAndroid Build Coastguard Worker RET 3719*c0909341SAndroid Build Coastguard Worker.hv: 3720*c0909341SAndroid Build Coastguard Worker RESET_STACK_STATE 3721*c0909341SAndroid Build Coastguard Worker movzx t3d, mxb 3722*c0909341SAndroid Build Coastguard Worker shr mxd, 16 3723*c0909341SAndroid Build Coastguard Worker cmp wd, 4 3724*c0909341SAndroid Build Coastguard Worker cmove mxd, t3d 3725*c0909341SAndroid Build Coastguard Worker movifnidn hd, r4m 3726*c0909341SAndroid Build Coastguard Worker movq m2, [base+subpel_filters+mxq*8] 3727*c0909341SAndroid Build Coastguard Worker movzx mxd, myb 3728*c0909341SAndroid Build Coastguard Worker shr myd, 16 3729*c0909341SAndroid Build Coastguard Worker cmp hd, 4 3730*c0909341SAndroid Build Coastguard Worker cmove myd, mxd 3731*c0909341SAndroid Build Coastguard Worker movq m3, [base+subpel_filters+myq*8] 3732*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_32 3733*c0909341SAndroid Build Coastguard Worker mov ssq, r2mp 3734*c0909341SAndroid Build Coastguard Worker mov tmpq, r0mp 3735*c0909341SAndroid Build Coastguard Worker mova m0, [base+spel_h_shufA] 3736*c0909341SAndroid Build Coastguard Worker mova m1, [base+spel_h_shufB] 3737*c0909341SAndroid Build Coastguard Worker mova m4, [base+prep_8tap_2d_rnd] 3738*c0909341SAndroid Build Coastguard Worker ALLOC_STACK -16*14 3739*c0909341SAndroid Build Coastguard Worker mova m8, m0 3740*c0909341SAndroid Build Coastguard Worker mova m9, m1 3741*c0909341SAndroid Build Coastguard Worker mova m14, m4 3742*c0909341SAndroid Build Coastguard Worker%else 3743*c0909341SAndroid Build Coastguard Worker%if WIN64 3744*c0909341SAndroid Build Coastguard Worker ALLOC_STACK 16*6, 16 3745*c0909341SAndroid Build Coastguard Worker%endif 3746*c0909341SAndroid Build Coastguard Worker mova m8, [base+spel_h_shufA] 3747*c0909341SAndroid Build Coastguard Worker mova m9, [base+spel_h_shufB] 3748*c0909341SAndroid Build Coastguard Worker%endif 3749*c0909341SAndroid Build Coastguard Worker pxor m0, m0 3750*c0909341SAndroid Build Coastguard Worker punpcklbw m0, m2 3751*c0909341SAndroid Build Coastguard Worker punpcklbw m3, m3 3752*c0909341SAndroid Build Coastguard Worker psraw m0, 4 3753*c0909341SAndroid Build Coastguard Worker psraw m3, 8 3754*c0909341SAndroid Build Coastguard Worker test dword r7m, 0x800 3755*c0909341SAndroid Build Coastguard Worker jz .hv_10bpc 3756*c0909341SAndroid Build Coastguard Worker psraw m0, 2 3757*c0909341SAndroid Build Coastguard Worker.hv_10bpc: 3758*c0909341SAndroid Build Coastguard Worker lea r6, [ssq*3] 3759*c0909341SAndroid Build Coastguard Worker sub srcq, 6 3760*c0909341SAndroid Build Coastguard Worker sub srcq, r6 3761*c0909341SAndroid Build Coastguard Worker mov r6d, wd 3762*c0909341SAndroid Build Coastguard Worker shl wd, 6 3763*c0909341SAndroid Build Coastguard Worker mov r5, srcq 3764*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_32 3765*c0909341SAndroid Build Coastguard Worker %define tmp esp+16*8 3766*c0909341SAndroid Build Coastguard Worker%if STACK_ALIGNMENT < 16 3767*c0909341SAndroid Build Coastguard Worker mov [esp+4*61], tmpq 3768*c0909341SAndroid Build Coastguard Worker%endif 3769*c0909341SAndroid Build Coastguard Worker pshufd m1, m0, q0000 3770*c0909341SAndroid Build Coastguard Worker pshufd m2, m0, q1111 3771*c0909341SAndroid Build Coastguard Worker pshufd m5, m0, q2222 3772*c0909341SAndroid Build Coastguard Worker pshufd m0, m0, q3333 3773*c0909341SAndroid Build Coastguard Worker mova m10, m1 3774*c0909341SAndroid Build Coastguard Worker mova m11, m2 3775*c0909341SAndroid Build Coastguard Worker mova m12, m5 3776*c0909341SAndroid Build Coastguard Worker mova m13, m0 3777*c0909341SAndroid Build Coastguard Worker%else 3778*c0909341SAndroid Build Coastguard Worker%if WIN64 3779*c0909341SAndroid Build Coastguard Worker %define tmp rsp 3780*c0909341SAndroid Build Coastguard Worker%else 3781*c0909341SAndroid Build Coastguard Worker %define tmp rsp-88 ; red zone 3782*c0909341SAndroid Build Coastguard Worker%endif 3783*c0909341SAndroid Build Coastguard Worker mov r7, tmpq 3784*c0909341SAndroid Build Coastguard Worker pshufd m10, m0, q0000 3785*c0909341SAndroid Build Coastguard Worker pshufd m11, m0, q1111 3786*c0909341SAndroid Build Coastguard Worker pshufd m12, m0, q2222 3787*c0909341SAndroid Build Coastguard Worker pshufd m13, m0, q3333 3788*c0909341SAndroid Build Coastguard Worker%endif 3789*c0909341SAndroid Build Coastguard Worker lea wd, [wq+hq-(1<<8)] 3790*c0909341SAndroid Build Coastguard Worker pshufd m0, m3, q0000 3791*c0909341SAndroid Build Coastguard Worker pshufd m1, m3, q1111 3792*c0909341SAndroid Build Coastguard Worker pshufd m2, m3, q2222 3793*c0909341SAndroid Build Coastguard Worker pshufd m3, m3, q3333 3794*c0909341SAndroid Build Coastguard Worker mova [tmp+16*1], m0 3795*c0909341SAndroid Build Coastguard Worker mova [tmp+16*2], m1 3796*c0909341SAndroid Build Coastguard Worker mova [tmp+16*3], m2 3797*c0909341SAndroid Build Coastguard Worker mova [tmp+16*4], m3 3798*c0909341SAndroid Build Coastguard Worker.hv_loop0: 3799*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 3800*c0909341SAndroid Build Coastguard Worker mova m14, [prep_8tap_2d_rnd] 3801*c0909341SAndroid Build Coastguard Worker%endif 3802*c0909341SAndroid Build Coastguard Worker movu m4, [srcq+ssq*0+0] 3803*c0909341SAndroid Build Coastguard Worker movu m1, [srcq+ssq*0+8] 3804*c0909341SAndroid Build Coastguard Worker movu m5, [srcq+ssq*1+0] 3805*c0909341SAndroid Build Coastguard Worker movu m2, [srcq+ssq*1+8] 3806*c0909341SAndroid Build Coastguard Worker lea srcq, [srcq+ssq*2] 3807*c0909341SAndroid Build Coastguard Worker movu m6, [srcq+ssq*0+0] 3808*c0909341SAndroid Build Coastguard Worker movu m3, [srcq+ssq*0+8] 3809*c0909341SAndroid Build Coastguard Worker PUT_8TAP_HV_H 4, 1, 0, 6 3810*c0909341SAndroid Build Coastguard Worker PUT_8TAP_HV_H 5, 2, 0, 6 3811*c0909341SAndroid Build Coastguard Worker PUT_8TAP_HV_H 6, 3, 0, 6 3812*c0909341SAndroid Build Coastguard Worker movu m7, [srcq+ssq*1+0] 3813*c0909341SAndroid Build Coastguard Worker movu m2, [srcq+ssq*1+8] 3814*c0909341SAndroid Build Coastguard Worker lea srcq, [srcq+ssq*2] 3815*c0909341SAndroid Build Coastguard Worker movu m1, [srcq+ssq*0+0] 3816*c0909341SAndroid Build Coastguard Worker movu m3, [srcq+ssq*0+8] 3817*c0909341SAndroid Build Coastguard Worker PUT_8TAP_HV_H 7, 2, 0, 6 3818*c0909341SAndroid Build Coastguard Worker PUT_8TAP_HV_H 1, 3, 0, 6 3819*c0909341SAndroid Build Coastguard Worker movu m2, [srcq+ssq*1+0] 3820*c0909341SAndroid Build Coastguard Worker movu m3, [srcq+ssq*1+8] 3821*c0909341SAndroid Build Coastguard Worker lea srcq, [srcq+ssq*2] 3822*c0909341SAndroid Build Coastguard Worker PUT_8TAP_HV_H 2, 3, 0, 6 3823*c0909341SAndroid Build Coastguard Worker packssdw m4, m7 ; 0 3 3824*c0909341SAndroid Build Coastguard Worker packssdw m5, m1 ; 1 4 3825*c0909341SAndroid Build Coastguard Worker movu m0, [srcq+ssq*0+0] 3826*c0909341SAndroid Build Coastguard Worker movu m1, [srcq+ssq*0+8] 3827*c0909341SAndroid Build Coastguard Worker PUT_8TAP_HV_H 0, 1, 3, 6 3828*c0909341SAndroid Build Coastguard Worker packssdw m6, m2 ; 2 5 3829*c0909341SAndroid Build Coastguard Worker packssdw m7, m0 ; 3 6 3830*c0909341SAndroid Build Coastguard Worker punpcklwd m1, m4, m5 ; 01 3831*c0909341SAndroid Build Coastguard Worker punpckhwd m4, m5 ; 34 3832*c0909341SAndroid Build Coastguard Worker punpcklwd m2, m5, m6 ; 12 3833*c0909341SAndroid Build Coastguard Worker punpckhwd m5, m6 ; 45 3834*c0909341SAndroid Build Coastguard Worker punpcklwd m3, m6, m7 ; 23 3835*c0909341SAndroid Build Coastguard Worker punpckhwd m6, m7 ; 56 3836*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_32 3837*c0909341SAndroid Build Coastguard Worker jmp .hv_loop_start 3838*c0909341SAndroid Build Coastguard Worker.hv_loop: 3839*c0909341SAndroid Build Coastguard Worker mova m1, [tmp+16*5] 3840*c0909341SAndroid Build Coastguard Worker mova m2, m15 3841*c0909341SAndroid Build Coastguard Worker.hv_loop_start: 3842*c0909341SAndroid Build Coastguard Worker mova m7, [tmp+16*1] 3843*c0909341SAndroid Build Coastguard Worker pmaddwd m1, m7 ; a0 3844*c0909341SAndroid Build Coastguard Worker pmaddwd m2, m7 ; b0 3845*c0909341SAndroid Build Coastguard Worker mova m7, [tmp+16*2] 3846*c0909341SAndroid Build Coastguard Worker mova [tmp+16*5], m3 3847*c0909341SAndroid Build Coastguard Worker pmaddwd m3, m7 ; a1 3848*c0909341SAndroid Build Coastguard Worker mova m15, m4 3849*c0909341SAndroid Build Coastguard Worker pmaddwd m4, m7 ; b1 3850*c0909341SAndroid Build Coastguard Worker mova m7, [tmp+16*3] 3851*c0909341SAndroid Build Coastguard Worker paddd m1, m14 3852*c0909341SAndroid Build Coastguard Worker paddd m2, m14 3853*c0909341SAndroid Build Coastguard Worker paddd m1, m3 3854*c0909341SAndroid Build Coastguard Worker paddd m2, m4 3855*c0909341SAndroid Build Coastguard Worker mova m3, m5 3856*c0909341SAndroid Build Coastguard Worker pmaddwd m5, m7 ; a2 3857*c0909341SAndroid Build Coastguard Worker mova m4, m6 3858*c0909341SAndroid Build Coastguard Worker pmaddwd m6, m7 ; b2 3859*c0909341SAndroid Build Coastguard Worker paddd m1, m5 3860*c0909341SAndroid Build Coastguard Worker paddd m2, m6 3861*c0909341SAndroid Build Coastguard Worker movu m7, [srcq+ssq*1+0] 3862*c0909341SAndroid Build Coastguard Worker movu m5, [srcq+ssq*1+8] 3863*c0909341SAndroid Build Coastguard Worker lea srcq, [srcq+ssq*2] 3864*c0909341SAndroid Build Coastguard Worker PUT_8TAP_HV_H 7, 5, 6, 6 3865*c0909341SAndroid Build Coastguard Worker packssdw m0, m7 ; 6 7 3866*c0909341SAndroid Build Coastguard Worker mova [tmp+16*0], m0 3867*c0909341SAndroid Build Coastguard Worker movu m0, [srcq+ssq*0+0] 3868*c0909341SAndroid Build Coastguard Worker movu m5, [srcq+ssq*0+8] 3869*c0909341SAndroid Build Coastguard Worker PUT_8TAP_HV_H 0, 5, 6, 6 3870*c0909341SAndroid Build Coastguard Worker mova m6, [tmp+16*0] 3871*c0909341SAndroid Build Coastguard Worker packssdw m7, m0 ; 7 8 3872*c0909341SAndroid Build Coastguard Worker punpcklwd m5, m6, m7 ; 67 3873*c0909341SAndroid Build Coastguard Worker punpckhwd m6, m7 ; 78 3874*c0909341SAndroid Build Coastguard Worker pmaddwd m7, m5, [tmp+16*4] 3875*c0909341SAndroid Build Coastguard Worker paddd m1, m7 ; a3 3876*c0909341SAndroid Build Coastguard Worker pmaddwd m7, m6, [tmp+16*4] 3877*c0909341SAndroid Build Coastguard Worker paddd m2, m7 ; b3 3878*c0909341SAndroid Build Coastguard Worker psrad m1, 6 3879*c0909341SAndroid Build Coastguard Worker psrad m2, 6 3880*c0909341SAndroid Build Coastguard Worker packssdw m1, m2 3881*c0909341SAndroid Build Coastguard Worker movq [tmpq+r6*0], m1 3882*c0909341SAndroid Build Coastguard Worker movhps [tmpq+r6*2], m1 3883*c0909341SAndroid Build Coastguard Worker lea tmpq, [tmpq+r6*4] 3884*c0909341SAndroid Build Coastguard Worker sub hd, 2 3885*c0909341SAndroid Build Coastguard Worker jg .hv_loop 3886*c0909341SAndroid Build Coastguard Worker%if STACK_ALIGNMENT < 16 3887*c0909341SAndroid Build Coastguard Worker mov tmpq, [esp+4*61] 3888*c0909341SAndroid Build Coastguard Worker add r5, 8 3889*c0909341SAndroid Build Coastguard Worker add tmpq, 8 3890*c0909341SAndroid Build Coastguard Worker mov srcq, r5 3891*c0909341SAndroid Build Coastguard Worker mov [esp+4*61], tmpq 3892*c0909341SAndroid Build Coastguard Worker%else 3893*c0909341SAndroid Build Coastguard Worker mov tmpq, tmpmp 3894*c0909341SAndroid Build Coastguard Worker add r5, 8 3895*c0909341SAndroid Build Coastguard Worker add tmpq, 8 3896*c0909341SAndroid Build Coastguard Worker mov srcq, r5 3897*c0909341SAndroid Build Coastguard Worker mov tmpmp, tmpq 3898*c0909341SAndroid Build Coastguard Worker%endif 3899*c0909341SAndroid Build Coastguard Worker%else 3900*c0909341SAndroid Build Coastguard Worker.hv_loop: 3901*c0909341SAndroid Build Coastguard Worker mova m15, [tmp+16*1] 3902*c0909341SAndroid Build Coastguard Worker mova m7, [prep_8tap_2d_rnd] 3903*c0909341SAndroid Build Coastguard Worker pmaddwd m14, m15, m1 ; a0 3904*c0909341SAndroid Build Coastguard Worker pmaddwd m15, m2 ; b0 3905*c0909341SAndroid Build Coastguard Worker paddd m14, m7 3906*c0909341SAndroid Build Coastguard Worker paddd m15, m7 3907*c0909341SAndroid Build Coastguard Worker mova m7, [tmp+16*2] 3908*c0909341SAndroid Build Coastguard Worker mova m1, m3 3909*c0909341SAndroid Build Coastguard Worker pmaddwd m3, m7 ; a1 3910*c0909341SAndroid Build Coastguard Worker mova m2, m4 3911*c0909341SAndroid Build Coastguard Worker pmaddwd m4, m7 ; b1 3912*c0909341SAndroid Build Coastguard Worker mova m7, [tmp+16*3] 3913*c0909341SAndroid Build Coastguard Worker paddd m14, m3 3914*c0909341SAndroid Build Coastguard Worker paddd m15, m4 3915*c0909341SAndroid Build Coastguard Worker mova m3, m5 3916*c0909341SAndroid Build Coastguard Worker pmaddwd m5, m7 ; a2 3917*c0909341SAndroid Build Coastguard Worker mova m4, m6 3918*c0909341SAndroid Build Coastguard Worker pmaddwd m6, m7 ; b2 3919*c0909341SAndroid Build Coastguard Worker paddd m14, m5 3920*c0909341SAndroid Build Coastguard Worker paddd m15, m6 3921*c0909341SAndroid Build Coastguard Worker movu m7, [srcq+ssq*1+0] 3922*c0909341SAndroid Build Coastguard Worker movu m5, [srcq+ssq*1+8] 3923*c0909341SAndroid Build Coastguard Worker lea srcq, [srcq+ssq*2] 3924*c0909341SAndroid Build Coastguard Worker PUT_8TAP_HV_H 7, 5, 6, 6, [prep_8tap_2d_rnd] 3925*c0909341SAndroid Build Coastguard Worker packssdw m0, m7 ; 6 7 3926*c0909341SAndroid Build Coastguard Worker mova [tmp+16*0], m0 3927*c0909341SAndroid Build Coastguard Worker movu m0, [srcq+ssq*0+0] 3928*c0909341SAndroid Build Coastguard Worker movu m5, [srcq+ssq*0+8] 3929*c0909341SAndroid Build Coastguard Worker PUT_8TAP_HV_H 0, 5, 6, 6, [prep_8tap_2d_rnd] 3930*c0909341SAndroid Build Coastguard Worker mova m6, [tmp+16*0] 3931*c0909341SAndroid Build Coastguard Worker packssdw m7, m0 ; 7 8 3932*c0909341SAndroid Build Coastguard Worker punpcklwd m5, m6, m7 ; 67 3933*c0909341SAndroid Build Coastguard Worker punpckhwd m6, m7 ; 78 3934*c0909341SAndroid Build Coastguard Worker pmaddwd m7, m5, [tmp+16*4] 3935*c0909341SAndroid Build Coastguard Worker paddd m14, m7 ; a3 3936*c0909341SAndroid Build Coastguard Worker pmaddwd m7, m6, [tmp+16*4] 3937*c0909341SAndroid Build Coastguard Worker paddd m15, m7 ; b3 3938*c0909341SAndroid Build Coastguard Worker psrad m14, 6 3939*c0909341SAndroid Build Coastguard Worker psrad m15, 6 3940*c0909341SAndroid Build Coastguard Worker packssdw m14, m15 3941*c0909341SAndroid Build Coastguard Worker movq [tmpq+r6*0], m14 3942*c0909341SAndroid Build Coastguard Worker movhps [tmpq+r6*2], m14 3943*c0909341SAndroid Build Coastguard Worker lea tmpq, [tmpq+r6*4] 3944*c0909341SAndroid Build Coastguard Worker sub hd, 2 3945*c0909341SAndroid Build Coastguard Worker jg .hv_loop 3946*c0909341SAndroid Build Coastguard Worker add r5, 8 3947*c0909341SAndroid Build Coastguard Worker add r7, 8 3948*c0909341SAndroid Build Coastguard Worker mov srcq, r5 3949*c0909341SAndroid Build Coastguard Worker mov tmpq, r7 3950*c0909341SAndroid Build Coastguard Worker%endif 3951*c0909341SAndroid Build Coastguard Worker movzx hd, wb 3952*c0909341SAndroid Build Coastguard Worker sub wd, 1<<8 3953*c0909341SAndroid Build Coastguard Worker jg .hv_loop0 3954*c0909341SAndroid Build Coastguard Worker RET 3955*c0909341SAndroid Build Coastguard Worker%undef tmp 3956*c0909341SAndroid Build Coastguard Worker 3957*c0909341SAndroid Build Coastguard Worker%macro movifprep 2 3958*c0909341SAndroid Build Coastguard Worker %if isprep 3959*c0909341SAndroid Build Coastguard Worker mov %1, %2 3960*c0909341SAndroid Build Coastguard Worker %endif 3961*c0909341SAndroid Build Coastguard Worker%endmacro 3962*c0909341SAndroid Build Coastguard Worker 3963*c0909341SAndroid Build Coastguard Worker%macro SAVE_REG 1 3964*c0909341SAndroid Build Coastguard Worker %xdefine r%1_save r%1 3965*c0909341SAndroid Build Coastguard Worker %xdefine r%1q_save r%1q 3966*c0909341SAndroid Build Coastguard Worker %xdefine r%1d_save r%1d 3967*c0909341SAndroid Build Coastguard Worker %if ARCH_X86_32 3968*c0909341SAndroid Build Coastguard Worker %define r%1m_save [rstk+stack_offset+(%1+1)*4] 3969*c0909341SAndroid Build Coastguard Worker %endif 3970*c0909341SAndroid Build Coastguard Worker%endmacro 3971*c0909341SAndroid Build Coastguard Worker 3972*c0909341SAndroid Build Coastguard Worker%macro LOAD_REG 1 3973*c0909341SAndroid Build Coastguard Worker %xdefine r%1 r%1_save 3974*c0909341SAndroid Build Coastguard Worker %xdefine r%1q r%1q_save 3975*c0909341SAndroid Build Coastguard Worker %xdefine r%1d r%1d_save 3976*c0909341SAndroid Build Coastguard Worker %if ARCH_X86_32 3977*c0909341SAndroid Build Coastguard Worker %define r%1m r%1m_save 3978*c0909341SAndroid Build Coastguard Worker %endif 3979*c0909341SAndroid Build Coastguard Worker %undef r%1d_save 3980*c0909341SAndroid Build Coastguard Worker %undef r%1q_save 3981*c0909341SAndroid Build Coastguard Worker %undef r%1_save 3982*c0909341SAndroid Build Coastguard Worker%endmacro 3983*c0909341SAndroid Build Coastguard Worker 3984*c0909341SAndroid Build Coastguard Worker%macro REMAP_REG 2-3 3985*c0909341SAndroid Build Coastguard Worker %xdefine r%1 r%2 3986*c0909341SAndroid Build Coastguard Worker %xdefine r%1q r%2q 3987*c0909341SAndroid Build Coastguard Worker %xdefine r%1d r%2d 3988*c0909341SAndroid Build Coastguard Worker %if ARCH_X86_32 3989*c0909341SAndroid Build Coastguard Worker %if %3 == 0 3990*c0909341SAndroid Build Coastguard Worker %xdefine r%1m r%2m 3991*c0909341SAndroid Build Coastguard Worker %else 3992*c0909341SAndroid Build Coastguard Worker %define r%1m [rstk+stack_offset+(%1+1)*4] 3993*c0909341SAndroid Build Coastguard Worker %endif 3994*c0909341SAndroid Build Coastguard Worker %endif 3995*c0909341SAndroid Build Coastguard Worker%endmacro 3996*c0909341SAndroid Build Coastguard Worker 3997*c0909341SAndroid Build Coastguard Worker%macro MCT_8TAP_SCALED_REMAP_REGS_TO_PREV 0 3998*c0909341SAndroid Build Coastguard Worker %if isprep 3999*c0909341SAndroid Build Coastguard Worker %if ARCH_X86_64 4000*c0909341SAndroid Build Coastguard Worker SAVE_REG 14 4001*c0909341SAndroid Build Coastguard Worker %assign %%i 14 4002*c0909341SAndroid Build Coastguard Worker %rep 14 4003*c0909341SAndroid Build Coastguard Worker %assign %%j %%i-1 4004*c0909341SAndroid Build Coastguard Worker REMAP_REG %%i, %%j 4005*c0909341SAndroid Build Coastguard Worker %assign %%i %%i-1 4006*c0909341SAndroid Build Coastguard Worker %endrep 4007*c0909341SAndroid Build Coastguard Worker %else 4008*c0909341SAndroid Build Coastguard Worker SAVE_REG 5 4009*c0909341SAndroid Build Coastguard Worker %assign %%i 5 4010*c0909341SAndroid Build Coastguard Worker %rep 5 4011*c0909341SAndroid Build Coastguard Worker %assign %%j %%i-1 4012*c0909341SAndroid Build Coastguard Worker REMAP_REG %%i, %%j, 0 4013*c0909341SAndroid Build Coastguard Worker %assign %%i %%i-1 4014*c0909341SAndroid Build Coastguard Worker %endrep 4015*c0909341SAndroid Build Coastguard Worker %endif 4016*c0909341SAndroid Build Coastguard Worker %endif 4017*c0909341SAndroid Build Coastguard Worker%endmacro 4018*c0909341SAndroid Build Coastguard Worker 4019*c0909341SAndroid Build Coastguard Worker%macro MCT_8TAP_SCALED_REMAP_REGS_TO_DEFAULT 0 4020*c0909341SAndroid Build Coastguard Worker %if isprep 4021*c0909341SAndroid Build Coastguard Worker %assign %%i 1 4022*c0909341SAndroid Build Coastguard Worker %if ARCH_X86_64 4023*c0909341SAndroid Build Coastguard Worker %rep 13 4024*c0909341SAndroid Build Coastguard Worker %assign %%j %%i+1 4025*c0909341SAndroid Build Coastguard Worker REMAP_REG %%i, %%j 4026*c0909341SAndroid Build Coastguard Worker %assign %%i %%i+1 4027*c0909341SAndroid Build Coastguard Worker %endrep 4028*c0909341SAndroid Build Coastguard Worker LOAD_REG 14 4029*c0909341SAndroid Build Coastguard Worker %else 4030*c0909341SAndroid Build Coastguard Worker %rep 4 4031*c0909341SAndroid Build Coastguard Worker %assign %%j %%i+1 4032*c0909341SAndroid Build Coastguard Worker REMAP_REG %%i, %%j, 1 4033*c0909341SAndroid Build Coastguard Worker %assign %%i %%i+1 4034*c0909341SAndroid Build Coastguard Worker %endrep 4035*c0909341SAndroid Build Coastguard Worker LOAD_REG 5 4036*c0909341SAndroid Build Coastguard Worker %endif 4037*c0909341SAndroid Build Coastguard Worker %endif 4038*c0909341SAndroid Build Coastguard Worker%endmacro 4039*c0909341SAndroid Build Coastguard Worker 4040*c0909341SAndroid Build Coastguard Worker%macro MC_8TAP_SCALED_RET 0-1 1 ; leave_mapping_unchanged 4041*c0909341SAndroid Build Coastguard Worker MCT_8TAP_SCALED_REMAP_REGS_TO_DEFAULT 4042*c0909341SAndroid Build Coastguard Worker RET 4043*c0909341SAndroid Build Coastguard Worker %if %1 4044*c0909341SAndroid Build Coastguard Worker MCT_8TAP_SCALED_REMAP_REGS_TO_PREV 4045*c0909341SAndroid Build Coastguard Worker %endif 4046*c0909341SAndroid Build Coastguard Worker%endmacro 4047*c0909341SAndroid Build Coastguard Worker 4048*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_32 4049*c0909341SAndroid Build Coastguard Worker %macro MC_4TAP_SCALED_H 1 ; dst_mem 4050*c0909341SAndroid Build Coastguard Worker movu m7, [srcq+ssq*0] 4051*c0909341SAndroid Build Coastguard Worker movu m2, [srcq+ssq*1] 4052*c0909341SAndroid Build Coastguard Worker movu m5, [r4 +ssq*0] 4053*c0909341SAndroid Build Coastguard Worker movu m6, [r4 +ssq*1] 4054*c0909341SAndroid Build Coastguard Worker lea srcq, [srcq+ssq*2] 4055*c0909341SAndroid Build Coastguard Worker lea r4, [r4 +ssq*2] 4056*c0909341SAndroid Build Coastguard Worker REPX {pshufb x, m12}, m7, m2 4057*c0909341SAndroid Build Coastguard Worker REPX {pmaddwd x, m13}, m7, m2 4058*c0909341SAndroid Build Coastguard Worker REPX {pshufb x, m14}, m5, m6 4059*c0909341SAndroid Build Coastguard Worker REPX {pmaddwd x, m15}, m5, m6 4060*c0909341SAndroid Build Coastguard Worker phaddd m7, m5 4061*c0909341SAndroid Build Coastguard Worker phaddd m2, m6 4062*c0909341SAndroid Build Coastguard Worker mova m5, [esp+0x00] 4063*c0909341SAndroid Build Coastguard Worker movd m6, [esp+0x10] 4064*c0909341SAndroid Build Coastguard Worker paddd m7, m5 4065*c0909341SAndroid Build Coastguard Worker paddd m2, m5 4066*c0909341SAndroid Build Coastguard Worker psrad m7, m6 4067*c0909341SAndroid Build Coastguard Worker psrad m2, m6 4068*c0909341SAndroid Build Coastguard Worker packssdw m7, m2 4069*c0909341SAndroid Build Coastguard Worker mova [stk+%1], m7 4070*c0909341SAndroid Build Coastguard Worker %endmacro 4071*c0909341SAndroid Build Coastguard Worker%endif 4072*c0909341SAndroid Build Coastguard Worker 4073*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 4074*c0909341SAndroid Build Coastguard Worker %macro MC_8TAP_SCALED_H 8 ; dst, tmp[0-6] 4075*c0909341SAndroid Build Coastguard Worker movu m%1, [srcq+ r4*2] 4076*c0909341SAndroid Build Coastguard Worker movu m%2, [srcq+ r6*2] 4077*c0909341SAndroid Build Coastguard Worker movu m%3, [srcq+ r7*2] 4078*c0909341SAndroid Build Coastguard Worker movu m%4, [srcq+ r9*2] 4079*c0909341SAndroid Build Coastguard Worker movu m%5, [srcq+r10*2] 4080*c0909341SAndroid Build Coastguard Worker movu m%6, [srcq+r11*2] 4081*c0909341SAndroid Build Coastguard Worker movu m%7, [srcq+r13*2] 4082*c0909341SAndroid Build Coastguard Worker movu m%8, [srcq+ rX*2] 4083*c0909341SAndroid Build Coastguard Worker add srcq, ssq 4084*c0909341SAndroid Build Coastguard Worker pmaddwd m%1, [stk+0x10] 4085*c0909341SAndroid Build Coastguard Worker pmaddwd m%2, [stk+0x20] 4086*c0909341SAndroid Build Coastguard Worker pmaddwd m%3, [stk+0x30] 4087*c0909341SAndroid Build Coastguard Worker pmaddwd m%4, [stk+0x40] 4088*c0909341SAndroid Build Coastguard Worker pmaddwd m%5, [stk+0x50] 4089*c0909341SAndroid Build Coastguard Worker pmaddwd m%6, [stk+0x60] 4090*c0909341SAndroid Build Coastguard Worker pmaddwd m%7, [stk+0x70] 4091*c0909341SAndroid Build Coastguard Worker pmaddwd m%8, [stk+0x80] 4092*c0909341SAndroid Build Coastguard Worker phaddd m%1, m%2 4093*c0909341SAndroid Build Coastguard Worker phaddd m%3, m%4 4094*c0909341SAndroid Build Coastguard Worker phaddd m%5, m%6 4095*c0909341SAndroid Build Coastguard Worker phaddd m%7, m%8 4096*c0909341SAndroid Build Coastguard Worker phaddd m%1, m%3 4097*c0909341SAndroid Build Coastguard Worker phaddd m%5, m%7 4098*c0909341SAndroid Build Coastguard Worker paddd m%1, hround 4099*c0909341SAndroid Build Coastguard Worker paddd m%5, hround 4100*c0909341SAndroid Build Coastguard Worker psrad m%1, m12 4101*c0909341SAndroid Build Coastguard Worker psrad m%5, m12 4102*c0909341SAndroid Build Coastguard Worker packssdw m%1, m%5 4103*c0909341SAndroid Build Coastguard Worker %endmacro 4104*c0909341SAndroid Build Coastguard Worker%else 4105*c0909341SAndroid Build Coastguard Worker %macro MC_8TAP_SCALED_H 2-3 1 ; weights_mem_start, h_mem, load_fh_offsets 4106*c0909341SAndroid Build Coastguard Worker %if %3 == 1 4107*c0909341SAndroid Build Coastguard Worker mov r0, [stk+ 0] 4108*c0909341SAndroid Build Coastguard Worker mov rX, [stk+ 4] 4109*c0909341SAndroid Build Coastguard Worker mov r4, [stk+ 8] 4110*c0909341SAndroid Build Coastguard Worker mov r5, [stk+12] 4111*c0909341SAndroid Build Coastguard Worker %endif 4112*c0909341SAndroid Build Coastguard Worker movu m0, [srcq+r0*2] 4113*c0909341SAndroid Build Coastguard Worker movu m1, [srcq+rX*2] 4114*c0909341SAndroid Build Coastguard Worker movu m2, [srcq+r4*2] 4115*c0909341SAndroid Build Coastguard Worker movu m3, [srcq+r5*2] 4116*c0909341SAndroid Build Coastguard Worker mov r0, [stk+16] 4117*c0909341SAndroid Build Coastguard Worker mov rX, [stk+20] 4118*c0909341SAndroid Build Coastguard Worker mov r4, [stk+24] 4119*c0909341SAndroid Build Coastguard Worker mov r5, [stk+28] 4120*c0909341SAndroid Build Coastguard Worker pmaddwd m0, [stk+%1+0x00] 4121*c0909341SAndroid Build Coastguard Worker pmaddwd m1, [stk+%1+0x10] 4122*c0909341SAndroid Build Coastguard Worker pmaddwd m2, [stk+%1+0x20] 4123*c0909341SAndroid Build Coastguard Worker pmaddwd m3, [stk+%1+0x30] 4124*c0909341SAndroid Build Coastguard Worker phaddd m0, m1 4125*c0909341SAndroid Build Coastguard Worker phaddd m2, m3 4126*c0909341SAndroid Build Coastguard Worker movu m4, [srcq+r0*2] 4127*c0909341SAndroid Build Coastguard Worker movu m5, [srcq+rX*2] 4128*c0909341SAndroid Build Coastguard Worker movu m6, [srcq+r4*2] 4129*c0909341SAndroid Build Coastguard Worker movu m7, [srcq+r5*2] 4130*c0909341SAndroid Build Coastguard Worker add srcq, ssq 4131*c0909341SAndroid Build Coastguard Worker pmaddwd m4, [stk+%1+0xa0] 4132*c0909341SAndroid Build Coastguard Worker pmaddwd m5, [stk+%1+0xb0] 4133*c0909341SAndroid Build Coastguard Worker pmaddwd m6, [stk+%1+0xc0] 4134*c0909341SAndroid Build Coastguard Worker pmaddwd m7, [stk+%1+0xd0] 4135*c0909341SAndroid Build Coastguard Worker phaddd m4, m5 4136*c0909341SAndroid Build Coastguard Worker phaddd m6, m7 4137*c0909341SAndroid Build Coastguard Worker phaddd m0, m2 4138*c0909341SAndroid Build Coastguard Worker phaddd m4, m6 4139*c0909341SAndroid Build Coastguard Worker paddd m0, hround 4140*c0909341SAndroid Build Coastguard Worker paddd m4, hround 4141*c0909341SAndroid Build Coastguard Worker psrad m0, m12 4142*c0909341SAndroid Build Coastguard Worker psrad m4, m12 4143*c0909341SAndroid Build Coastguard Worker packssdw m0, m4 4144*c0909341SAndroid Build Coastguard Worker %if %2 != 0 4145*c0909341SAndroid Build Coastguard Worker mova [stk+%2], m0 4146*c0909341SAndroid Build Coastguard Worker %endif 4147*c0909341SAndroid Build Coastguard Worker %endmacro 4148*c0909341SAndroid Build Coastguard Worker%endif 4149*c0909341SAndroid Build Coastguard Worker 4150*c0909341SAndroid Build Coastguard Worker%macro MC_8TAP_SCALED 1 4151*c0909341SAndroid Build Coastguard Worker%ifidn %1, put 4152*c0909341SAndroid Build Coastguard Worker %assign isput 1 4153*c0909341SAndroid Build Coastguard Worker %assign isprep 0 4154*c0909341SAndroid Build Coastguard Worker %if ARCH_X86_64 4155*c0909341SAndroid Build Coastguard Worker %if required_stack_alignment <= STACK_ALIGNMENT 4156*c0909341SAndroid Build Coastguard Workercglobal put_8tap_scaled_16bpc, 2, 15, 16, 0x1c0, dst, ds, src, ss, w, h, mx, my, dx, dy, pxmax 4157*c0909341SAndroid Build Coastguard Worker %else 4158*c0909341SAndroid Build Coastguard Workercglobal put_8tap_scaled_16bpc, 2, 14, 16, 0x1c0, dst, ds, src, ss, w, h, mx, my, dx, dy, pxmax 4159*c0909341SAndroid Build Coastguard Worker %endif 4160*c0909341SAndroid Build Coastguard Worker %else ; ARCH_X86_32 4161*c0909341SAndroid Build Coastguard Worker %if required_stack_alignment <= STACK_ALIGNMENT 4162*c0909341SAndroid Build Coastguard Workercglobal put_8tap_scaled_16bpc, 0, 7, 8, 0x200, dst, ds, src, ss, w, h, mx, my, dx, dy, pxmax 4163*c0909341SAndroid Build Coastguard Worker %else 4164*c0909341SAndroid Build Coastguard Workercglobal put_8tap_scaled_16bpc, 0, 7, 8, -0x200-0x30, dst, ds, src, ss, w, h, mx, my, dx, dy, pxmax 4165*c0909341SAndroid Build Coastguard Worker %endif 4166*c0909341SAndroid Build Coastguard Worker %endif 4167*c0909341SAndroid Build Coastguard Worker %xdefine base_reg r12 4168*c0909341SAndroid Build Coastguard Worker%else ; prep 4169*c0909341SAndroid Build Coastguard Worker %assign isput 0 4170*c0909341SAndroid Build Coastguard Worker %assign isprep 1 4171*c0909341SAndroid Build Coastguard Worker %if ARCH_X86_64 4172*c0909341SAndroid Build Coastguard Worker %if required_stack_alignment <= STACK_ALIGNMENT 4173*c0909341SAndroid Build Coastguard Workercglobal prep_8tap_scaled_16bpc, 2, 15, 16, 0x1c0, tmp, src, ss, w, h, mx, my, dx, dy, pxmax 4174*c0909341SAndroid Build Coastguard Worker %xdefine tmp_stridem r14q 4175*c0909341SAndroid Build Coastguard Worker %else 4176*c0909341SAndroid Build Coastguard Workercglobal prep_8tap_scaled_16bpc, 2, 14, 16, 0x1c0, tmp, src, ss, w, h, mx, my, dx, dy, pxmax 4177*c0909341SAndroid Build Coastguard Worker %define tmp_stridem qword [stk+0x138] 4178*c0909341SAndroid Build Coastguard Worker %endif 4179*c0909341SAndroid Build Coastguard Worker %xdefine base_reg r11 4180*c0909341SAndroid Build Coastguard Worker %else ; ARCH_X86_32 4181*c0909341SAndroid Build Coastguard Worker %if required_stack_alignment <= STACK_ALIGNMENT 4182*c0909341SAndroid Build Coastguard Workercglobal prep_8tap_scaled_16bpc, 0, 7, 8, 0x200, tmp, src, ss, w, h, mx, my, dx, dy, pxmax 4183*c0909341SAndroid Build Coastguard Worker %else 4184*c0909341SAndroid Build Coastguard Workercglobal prep_8tap_scaled_16bpc, 0, 6, 8, 0x200, tmp, src, ss, w, h, mx, my, dx, dy, pxmax 4185*c0909341SAndroid Build Coastguard Worker %endif 4186*c0909341SAndroid Build Coastguard Worker %define tmp_stridem dword [stk+0x138] 4187*c0909341SAndroid Build Coastguard Worker %endif 4188*c0909341SAndroid Build Coastguard Worker%endif 4189*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_32 4190*c0909341SAndroid Build Coastguard Worker mov [esp+0x1f0], t0d 4191*c0909341SAndroid Build Coastguard Worker mov [esp+0x1f4], t1d 4192*c0909341SAndroid Build Coastguard Worker %if isput && required_stack_alignment > STACK_ALIGNMENT 4193*c0909341SAndroid Build Coastguard Worker mov dstd, dstm 4194*c0909341SAndroid Build Coastguard Worker mov dsd, dsm 4195*c0909341SAndroid Build Coastguard Worker mov srcd, srcm 4196*c0909341SAndroid Build Coastguard Worker mov ssd, ssm 4197*c0909341SAndroid Build Coastguard Worker mov hd, hm 4198*c0909341SAndroid Build Coastguard Worker mov r4, mxm 4199*c0909341SAndroid Build Coastguard Worker %define r0m [esp+0x200] 4200*c0909341SAndroid Build Coastguard Worker %define dsm [esp+0x204] 4201*c0909341SAndroid Build Coastguard Worker %define dsmp dsm 4202*c0909341SAndroid Build Coastguard Worker %define r1m dsm 4203*c0909341SAndroid Build Coastguard Worker %define r2m [esp+0x208] 4204*c0909341SAndroid Build Coastguard Worker %define ssm [esp+0x20c] 4205*c0909341SAndroid Build Coastguard Worker %define r3m ssm 4206*c0909341SAndroid Build Coastguard Worker %define hm [esp+0x210] 4207*c0909341SAndroid Build Coastguard Worker %define mxm [esp+0x214] 4208*c0909341SAndroid Build Coastguard Worker mov r0m, dstd 4209*c0909341SAndroid Build Coastguard Worker mov dsm, dsd 4210*c0909341SAndroid Build Coastguard Worker mov r2m, srcd 4211*c0909341SAndroid Build Coastguard Worker mov ssm, ssd 4212*c0909341SAndroid Build Coastguard Worker mov hm, hd 4213*c0909341SAndroid Build Coastguard Worker mov r0, mym 4214*c0909341SAndroid Build Coastguard Worker mov r1, dxm 4215*c0909341SAndroid Build Coastguard Worker mov r2, dym 4216*c0909341SAndroid Build Coastguard Worker %define mym [esp+0x218] 4217*c0909341SAndroid Build Coastguard Worker %define dxm [esp+0x21c] 4218*c0909341SAndroid Build Coastguard Worker %define dym [esp+0x220] 4219*c0909341SAndroid Build Coastguard Worker mov mxm, r4 4220*c0909341SAndroid Build Coastguard Worker mov mym, r0 4221*c0909341SAndroid Build Coastguard Worker mov dxm, r1 4222*c0909341SAndroid Build Coastguard Worker mov dym, r2 4223*c0909341SAndroid Build Coastguard Worker tzcnt wd, wm 4224*c0909341SAndroid Build Coastguard Worker %endif 4225*c0909341SAndroid Build Coastguard Worker %if isput 4226*c0909341SAndroid Build Coastguard Worker mov r3, pxmaxm 4227*c0909341SAndroid Build Coastguard Worker %define pxmaxm r3 4228*c0909341SAndroid Build Coastguard Worker %else 4229*c0909341SAndroid Build Coastguard Worker mov r2, pxmaxm 4230*c0909341SAndroid Build Coastguard Worker %endif 4231*c0909341SAndroid Build Coastguard Worker %if isprep && required_stack_alignment > STACK_ALIGNMENT 4232*c0909341SAndroid Build Coastguard Worker %xdefine base_reg r5 4233*c0909341SAndroid Build Coastguard Worker %else 4234*c0909341SAndroid Build Coastguard Worker %xdefine base_reg r6 4235*c0909341SAndroid Build Coastguard Worker %endif 4236*c0909341SAndroid Build Coastguard Worker%endif 4237*c0909341SAndroid Build Coastguard Worker LEA base_reg, %1_8tap_scaled_16bpc_ssse3 4238*c0909341SAndroid Build Coastguard Worker%xdefine base base_reg-%1_8tap_scaled_16bpc_ssse3 4239*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 || isprep || required_stack_alignment <= STACK_ALIGNMENT 4240*c0909341SAndroid Build Coastguard Worker tzcnt wd, wm 4241*c0909341SAndroid Build Coastguard Worker%endif 4242*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 4243*c0909341SAndroid Build Coastguard Worker %if isput 4244*c0909341SAndroid Build Coastguard Worker mov r7d, pxmaxm 4245*c0909341SAndroid Build Coastguard Worker %endif 4246*c0909341SAndroid Build Coastguard Worker%else 4247*c0909341SAndroid Build Coastguard Worker %define m8 m0 4248*c0909341SAndroid Build Coastguard Worker %define m9 m1 4249*c0909341SAndroid Build Coastguard Worker %define m14 m4 4250*c0909341SAndroid Build Coastguard Worker %define m15 m3 4251*c0909341SAndroid Build Coastguard Worker%endif 4252*c0909341SAndroid Build Coastguard Worker movd m8, dxm 4253*c0909341SAndroid Build Coastguard Worker movd m14, mxm 4254*c0909341SAndroid Build Coastguard Worker%if isput 4255*c0909341SAndroid Build Coastguard Worker movd m15, pxmaxm 4256*c0909341SAndroid Build Coastguard Worker%endif 4257*c0909341SAndroid Build Coastguard Worker pshufd m8, m8, q0000 4258*c0909341SAndroid Build Coastguard Worker pshufd m14, m14, q0000 4259*c0909341SAndroid Build Coastguard Worker%if isput 4260*c0909341SAndroid Build Coastguard Worker pshuflw m15, m15, q0000 4261*c0909341SAndroid Build Coastguard Worker punpcklqdq m15, m15 4262*c0909341SAndroid Build Coastguard Worker%endif 4263*c0909341SAndroid Build Coastguard Worker%if isprep 4264*c0909341SAndroid Build Coastguard Worker %if UNIX64 4265*c0909341SAndroid Build Coastguard Worker mov r5d, t0d 4266*c0909341SAndroid Build Coastguard Worker DECLARE_REG_TMP 5, 7 4267*c0909341SAndroid Build Coastguard Worker %endif 4268*c0909341SAndroid Build Coastguard Worker %if ARCH_X86_64 4269*c0909341SAndroid Build Coastguard Worker mov r6d, pxmaxm 4270*c0909341SAndroid Build Coastguard Worker %endif 4271*c0909341SAndroid Build Coastguard Worker%endif 4272*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 4273*c0909341SAndroid Build Coastguard Worker mov dyd, dym 4274*c0909341SAndroid Build Coastguard Worker%endif 4275*c0909341SAndroid Build Coastguard Worker%if isput 4276*c0909341SAndroid Build Coastguard Worker %if WIN64 4277*c0909341SAndroid Build Coastguard Worker mov r8d, hm 4278*c0909341SAndroid Build Coastguard Worker DEFINE_ARGS dst, ds, src, ss, w, _, _, my, h, dy, ss3 4279*c0909341SAndroid Build Coastguard Worker %define hm r5m 4280*c0909341SAndroid Build Coastguard Worker %define dxm r8m 4281*c0909341SAndroid Build Coastguard Worker %elif ARCH_X86_64 4282*c0909341SAndroid Build Coastguard Worker DEFINE_ARGS dst, ds, src, ss, w, h, _, my, dx, dy, ss3 4283*c0909341SAndroid Build Coastguard Worker %define hm r6m 4284*c0909341SAndroid Build Coastguard Worker %else 4285*c0909341SAndroid Build Coastguard Worker %endif 4286*c0909341SAndroid Build Coastguard Worker %if ARCH_X86_64 4287*c0909341SAndroid Build Coastguard Worker %if required_stack_alignment > STACK_ALIGNMENT 4288*c0909341SAndroid Build Coastguard Worker %define dsm [rsp+0x138] 4289*c0909341SAndroid Build Coastguard Worker %define rX r1 4290*c0909341SAndroid Build Coastguard Worker %define rXd r1d 4291*c0909341SAndroid Build Coastguard Worker %else 4292*c0909341SAndroid Build Coastguard Worker %define dsm dsq 4293*c0909341SAndroid Build Coastguard Worker %define rX r14 4294*c0909341SAndroid Build Coastguard Worker %define rXd r14d 4295*c0909341SAndroid Build Coastguard Worker %endif 4296*c0909341SAndroid Build Coastguard Worker %else 4297*c0909341SAndroid Build Coastguard Worker %define rX r1 4298*c0909341SAndroid Build Coastguard Worker %endif 4299*c0909341SAndroid Build Coastguard Worker%else ; prep 4300*c0909341SAndroid Build Coastguard Worker %if WIN64 4301*c0909341SAndroid Build Coastguard Worker mov r7d, hm 4302*c0909341SAndroid Build Coastguard Worker DEFINE_ARGS tmp, src, ss, w, _, _, my, h, dy, ss3 4303*c0909341SAndroid Build Coastguard Worker %define hm r4m 4304*c0909341SAndroid Build Coastguard Worker %define dxm r7m 4305*c0909341SAndroid Build Coastguard Worker %elif ARCH_X86_64 4306*c0909341SAndroid Build Coastguard Worker DEFINE_ARGS tmp, src, ss, w, h, _, my, dx, dy, ss3 4307*c0909341SAndroid Build Coastguard Worker %xdefine hm r7m 4308*c0909341SAndroid Build Coastguard Worker %endif 4309*c0909341SAndroid Build Coastguard Worker MCT_8TAP_SCALED_REMAP_REGS_TO_PREV 4310*c0909341SAndroid Build Coastguard Worker %if ARCH_X86_64 4311*c0909341SAndroid Build Coastguard Worker %define rX r14 4312*c0909341SAndroid Build Coastguard Worker %define rXd r14d 4313*c0909341SAndroid Build Coastguard Worker %else 4314*c0909341SAndroid Build Coastguard Worker %define rX r3 4315*c0909341SAndroid Build Coastguard Worker %endif 4316*c0909341SAndroid Build Coastguard Worker%endif 4317*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 4318*c0909341SAndroid Build Coastguard Worker shr r7d, 11 4319*c0909341SAndroid Build Coastguard Worker mova m10, [base+pd_0x3ff] 4320*c0909341SAndroid Build Coastguard Worker movddup m11, [base+s_8tap_h_rnd+r7*8] 4321*c0909341SAndroid Build Coastguard Worker movd m12, [base+s_8tap_h_sh+r7*4] 4322*c0909341SAndroid Build Coastguard Worker %if isput 4323*c0909341SAndroid Build Coastguard Worker movddup m13, [base+put_s_8tap_v_rnd+r7*8] 4324*c0909341SAndroid Build Coastguard Worker movd m7, [base+put_s_8tap_v_sh+r7*4] 4325*c0909341SAndroid Build Coastguard Worker %define pxmaxm [rsp] 4326*c0909341SAndroid Build Coastguard Worker mova pxmaxm, m15 4327*c0909341SAndroid Build Coastguard Worker punpcklqdq m12, m7 4328*c0909341SAndroid Build Coastguard Worker %endif 4329*c0909341SAndroid Build Coastguard Worker lea ss3q, [ssq*3] 4330*c0909341SAndroid Build Coastguard Worker movzx r7d, t1b 4331*c0909341SAndroid Build Coastguard Worker shr t1d, 16 4332*c0909341SAndroid Build Coastguard Worker cmp hd, 6 4333*c0909341SAndroid Build Coastguard Worker cmovs t1d, r7d 4334*c0909341SAndroid Build Coastguard Worker sub srcq, ss3q 4335*c0909341SAndroid Build Coastguard Worker%else 4336*c0909341SAndroid Build Coastguard Worker %define m10 [base+pd_0x3ff] 4337*c0909341SAndroid Build Coastguard Worker %define m11 [esp+0x00] 4338*c0909341SAndroid Build Coastguard Worker %define m12 [esp+0x10] 4339*c0909341SAndroid Build Coastguard Worker shr r3, 11 4340*c0909341SAndroid Build Coastguard Worker movddup m1, [base+s_8tap_h_rnd+r3*8] 4341*c0909341SAndroid Build Coastguard Worker movd m2, [base+s_8tap_h_sh+r3*4] 4342*c0909341SAndroid Build Coastguard Worker %if isput 4343*c0909341SAndroid Build Coastguard Worker %define m13 [esp+0x20] 4344*c0909341SAndroid Build Coastguard Worker %define pxmaxm [esp+0x30] 4345*c0909341SAndroid Build Coastguard Worker %define stk esp+0x40 4346*c0909341SAndroid Build Coastguard Worker movddup m5, [base+put_s_8tap_v_rnd+r3*8] 4347*c0909341SAndroid Build Coastguard Worker movd m6, [base+put_s_8tap_v_sh+r3*4] 4348*c0909341SAndroid Build Coastguard Worker mova pxmaxm, m15 4349*c0909341SAndroid Build Coastguard Worker punpcklqdq m2, m6 4350*c0909341SAndroid Build Coastguard Worker mova m13, m5 4351*c0909341SAndroid Build Coastguard Worker %else 4352*c0909341SAndroid Build Coastguard Worker %define m13 [base+pd_m524256] 4353*c0909341SAndroid Build Coastguard Worker %endif 4354*c0909341SAndroid Build Coastguard Worker mov ssd, ssm 4355*c0909341SAndroid Build Coastguard Worker mova m11, m1 4356*c0909341SAndroid Build Coastguard Worker mova m12, m2 4357*c0909341SAndroid Build Coastguard Worker MCT_8TAP_SCALED_REMAP_REGS_TO_DEFAULT 4358*c0909341SAndroid Build Coastguard Worker mov r1, [esp+0x1f4] 4359*c0909341SAndroid Build Coastguard Worker lea r0, [ssd*3] 4360*c0909341SAndroid Build Coastguard Worker movzx r2, r1b 4361*c0909341SAndroid Build Coastguard Worker shr r1, 16 4362*c0909341SAndroid Build Coastguard Worker cmp dword hm, 6 4363*c0909341SAndroid Build Coastguard Worker cmovs r1, r2 4364*c0909341SAndroid Build Coastguard Worker mov [esp+0x1f4], r1 4365*c0909341SAndroid Build Coastguard Worker %if isprep 4366*c0909341SAndroid Build Coastguard Worker mov r1, r1m 4367*c0909341SAndroid Build Coastguard Worker %endif 4368*c0909341SAndroid Build Coastguard Worker mov r2, r2m 4369*c0909341SAndroid Build Coastguard Worker sub srcq, r0 4370*c0909341SAndroid Build Coastguard Worker MCT_8TAP_SCALED_REMAP_REGS_TO_PREV 4371*c0909341SAndroid Build Coastguard Worker %define ss3q r0 4372*c0909341SAndroid Build Coastguard Worker %define myd r4 4373*c0909341SAndroid Build Coastguard Worker %define dyd dword dym 4374*c0909341SAndroid Build Coastguard Worker %define hd dword hm 4375*c0909341SAndroid Build Coastguard Worker%endif 4376*c0909341SAndroid Build Coastguard Worker cmp dyd, 1024 4377*c0909341SAndroid Build Coastguard Worker je .dy1 4378*c0909341SAndroid Build Coastguard Worker cmp dyd, 2048 4379*c0909341SAndroid Build Coastguard Worker je .dy2 4380*c0909341SAndroid Build Coastguard Worker movzx wd, word [base+%1_8tap_scaled_ssse3_table+wq*2] 4381*c0909341SAndroid Build Coastguard Worker add wq, base_reg 4382*c0909341SAndroid Build Coastguard Worker jmp wq 4383*c0909341SAndroid Build Coastguard Worker%if isput 4384*c0909341SAndroid Build Coastguard Worker.w2: 4385*c0909341SAndroid Build Coastguard Worker %if ARCH_X86_64 4386*c0909341SAndroid Build Coastguard Worker mov myd, mym 4387*c0909341SAndroid Build Coastguard Worker movzx t0d, t0b 4388*c0909341SAndroid Build Coastguard Worker sub srcq, 2 4389*c0909341SAndroid Build Coastguard Worker movd m15, t0d 4390*c0909341SAndroid Build Coastguard Worker %else 4391*c0909341SAndroid Build Coastguard Worker movzx r4, byte [esp+0x1f0] 4392*c0909341SAndroid Build Coastguard Worker sub srcq, 2 4393*c0909341SAndroid Build Coastguard Worker movd m15, r4 4394*c0909341SAndroid Build Coastguard Worker %endif 4395*c0909341SAndroid Build Coastguard Worker pxor m9, m9 4396*c0909341SAndroid Build Coastguard Worker punpckldq m9, m8 4397*c0909341SAndroid Build Coastguard Worker paddd m14, m9 ; mx+dx*[0-1] 4398*c0909341SAndroid Build Coastguard Worker %if ARCH_X86_64 4399*c0909341SAndroid Build Coastguard Worker mova m9, [base+pd_0x4000] 4400*c0909341SAndroid Build Coastguard Worker %endif 4401*c0909341SAndroid Build Coastguard Worker pshufd m15, m15, q0000 4402*c0909341SAndroid Build Coastguard Worker pand m8, m14, m10 4403*c0909341SAndroid Build Coastguard Worker psrld m8, 6 4404*c0909341SAndroid Build Coastguard Worker paddd m15, m8 4405*c0909341SAndroid Build Coastguard Worker movd r4d, m15 4406*c0909341SAndroid Build Coastguard Worker pshufd m15, m15, q0321 4407*c0909341SAndroid Build Coastguard Worker %if ARCH_X86_64 4408*c0909341SAndroid Build Coastguard Worker movd r6d, m15 4409*c0909341SAndroid Build Coastguard Worker %else 4410*c0909341SAndroid Build Coastguard Worker movd r3d, m15 4411*c0909341SAndroid Build Coastguard Worker %endif 4412*c0909341SAndroid Build Coastguard Worker mova m5, [base+bdct_lb_q] 4413*c0909341SAndroid Build Coastguard Worker mova m6, [base+spel_s_shuf2] 4414*c0909341SAndroid Build Coastguard Worker movd m15, [base+subpel_filters+r4*8+2] 4415*c0909341SAndroid Build Coastguard Worker %if ARCH_X86_64 4416*c0909341SAndroid Build Coastguard Worker movd m7, [base+subpel_filters+r6*8+2] 4417*c0909341SAndroid Build Coastguard Worker %else 4418*c0909341SAndroid Build Coastguard Worker movd m7, [base+subpel_filters+r3*8+2] 4419*c0909341SAndroid Build Coastguard Worker %endif 4420*c0909341SAndroid Build Coastguard Worker pxor m2, m2 4421*c0909341SAndroid Build Coastguard Worker pcmpeqd m8, m2 4422*c0909341SAndroid Build Coastguard Worker psrld m14, 10 4423*c0909341SAndroid Build Coastguard Worker paddd m14, m14 4424*c0909341SAndroid Build Coastguard Worker %if ARCH_X86_32 4425*c0909341SAndroid Build Coastguard Worker mov r3, r3m 4426*c0909341SAndroid Build Coastguard Worker pshufb m14, m5 4427*c0909341SAndroid Build Coastguard Worker paddb m14, m6 4428*c0909341SAndroid Build Coastguard Worker mova [stk], m14 4429*c0909341SAndroid Build Coastguard Worker SWAP m5, m0 4430*c0909341SAndroid Build Coastguard Worker SWAP m6, m3 4431*c0909341SAndroid Build Coastguard Worker %define m15 m6 4432*c0909341SAndroid Build Coastguard Worker %endif 4433*c0909341SAndroid Build Coastguard Worker movu m0, [srcq+ssq*0] 4434*c0909341SAndroid Build Coastguard Worker movu m1, [srcq+ssq*1] 4435*c0909341SAndroid Build Coastguard Worker movu m2, [srcq+ssq*2] 4436*c0909341SAndroid Build Coastguard Worker movu m3, [srcq+ss3q ] 4437*c0909341SAndroid Build Coastguard Worker lea srcq, [srcq+ssq*4] 4438*c0909341SAndroid Build Coastguard Worker punpckldq m15, m7 4439*c0909341SAndroid Build Coastguard Worker %if ARCH_X86_64 4440*c0909341SAndroid Build Coastguard Worker pshufb m14, m5 4441*c0909341SAndroid Build Coastguard Worker paddb m14, m6 4442*c0909341SAndroid Build Coastguard Worker pand m9, m8 4443*c0909341SAndroid Build Coastguard Worker pandn m8, m15 4444*c0909341SAndroid Build Coastguard Worker SWAP m15, m8 4445*c0909341SAndroid Build Coastguard Worker por m15, m9 4446*c0909341SAndroid Build Coastguard Worker movu m4, [srcq+ssq*0] 4447*c0909341SAndroid Build Coastguard Worker movu m5, [srcq+ssq*1] 4448*c0909341SAndroid Build Coastguard Worker movu m6, [srcq+ssq*2] 4449*c0909341SAndroid Build Coastguard Worker movu m7, [srcq+ss3q ] 4450*c0909341SAndroid Build Coastguard Worker lea srcq, [srcq+ssq*4] 4451*c0909341SAndroid Build Coastguard Worker %else 4452*c0909341SAndroid Build Coastguard Worker pand m7, m5, [base+pd_0x4000] 4453*c0909341SAndroid Build Coastguard Worker pandn m5, m15 4454*c0909341SAndroid Build Coastguard Worker por m5, m7 4455*c0909341SAndroid Build Coastguard Worker %define m15 m5 4456*c0909341SAndroid Build Coastguard Worker %endif 4457*c0909341SAndroid Build Coastguard Worker punpcklbw m15, m15 4458*c0909341SAndroid Build Coastguard Worker psraw m15, 8 4459*c0909341SAndroid Build Coastguard Worker REPX {pshufb x, m14}, m0, m1, m2, m3 4460*c0909341SAndroid Build Coastguard Worker REPX {pmaddwd x, m15}, m0, m1, m2, m3 4461*c0909341SAndroid Build Coastguard Worker %if ARCH_X86_64 4462*c0909341SAndroid Build Coastguard Worker REPX {pshufb x, m14}, m4, m5, m6, m7 4463*c0909341SAndroid Build Coastguard Worker REPX {pmaddwd x, m15}, m4, m5, m6, m7 4464*c0909341SAndroid Build Coastguard Worker phaddd m0, m1 4465*c0909341SAndroid Build Coastguard Worker phaddd m2, m3 4466*c0909341SAndroid Build Coastguard Worker phaddd m4, m5 4467*c0909341SAndroid Build Coastguard Worker phaddd m6, m7 4468*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m11}, m0, m2, m4, m6 4469*c0909341SAndroid Build Coastguard Worker REPX {psrad x, m12}, m0, m2, m4, m6 4470*c0909341SAndroid Build Coastguard Worker packssdw m0, m2 ; 0 1 2 3 4471*c0909341SAndroid Build Coastguard Worker packssdw m4, m6 ; 4 5 6 7 4472*c0909341SAndroid Build Coastguard Worker SWAP m1, m4 4473*c0909341SAndroid Build Coastguard Worker %else 4474*c0909341SAndroid Build Coastguard Worker mova [stk+0x10], m15 4475*c0909341SAndroid Build Coastguard Worker phaddd m0, m1 4476*c0909341SAndroid Build Coastguard Worker phaddd m2, m3 4477*c0909341SAndroid Build Coastguard Worker movu m1, [srcq+ssq*0] 4478*c0909341SAndroid Build Coastguard Worker movu m7, [srcq+ssq*1] 4479*c0909341SAndroid Build Coastguard Worker movu m6, [srcq+ssq*2] 4480*c0909341SAndroid Build Coastguard Worker movu m3, [srcq+ss3q ] 4481*c0909341SAndroid Build Coastguard Worker lea srcq, [srcq+ssq*4] 4482*c0909341SAndroid Build Coastguard Worker REPX {pshufb x, m14}, m1, m7, m6, m3 4483*c0909341SAndroid Build Coastguard Worker REPX {pmaddwd x, m15}, m1, m7, m6, m3 4484*c0909341SAndroid Build Coastguard Worker phaddd m1, m7 4485*c0909341SAndroid Build Coastguard Worker phaddd m6, m3 4486*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m11}, m0, m2, m1, m6 4487*c0909341SAndroid Build Coastguard Worker REPX {psrad x, m12}, m0, m2, m1, m6 4488*c0909341SAndroid Build Coastguard Worker packssdw m0, m2 4489*c0909341SAndroid Build Coastguard Worker packssdw m1, m6 4490*c0909341SAndroid Build Coastguard Worker %define m14 [stk+0x00] 4491*c0909341SAndroid Build Coastguard Worker %define m15 [stk+0x10] 4492*c0909341SAndroid Build Coastguard Worker %endif 4493*c0909341SAndroid Build Coastguard Worker palignr m2, m1, m0, 4 ; 1 2 3 4 4494*c0909341SAndroid Build Coastguard Worker punpcklwd m3, m0, m2 ; 01 12 4495*c0909341SAndroid Build Coastguard Worker punpckhwd m0, m2 ; 23 34 4496*c0909341SAndroid Build Coastguard Worker pshufd m5, m1, q0321 ; 5 6 7 _ 4497*c0909341SAndroid Build Coastguard Worker punpcklwd m2, m1, m5 ; 45 56 4498*c0909341SAndroid Build Coastguard Worker punpckhwd m4, m1, m5 ; 67 __ 4499*c0909341SAndroid Build Coastguard Worker %if ARCH_X86_32 4500*c0909341SAndroid Build Coastguard Worker mov myd, mym 4501*c0909341SAndroid Build Coastguard Worker mov r0, r0m 4502*c0909341SAndroid Build Coastguard Worker mova [stk+0x20], m3 4503*c0909341SAndroid Build Coastguard Worker mova [stk+0x30], m0 4504*c0909341SAndroid Build Coastguard Worker mova [stk+0x40], m2 4505*c0909341SAndroid Build Coastguard Worker mova [stk+0x50], m4 4506*c0909341SAndroid Build Coastguard Worker %endif 4507*c0909341SAndroid Build Coastguard Worker.w2_loop: 4508*c0909341SAndroid Build Coastguard Worker and myd, 0x3ff 4509*c0909341SAndroid Build Coastguard Worker %if ARCH_X86_64 4510*c0909341SAndroid Build Coastguard Worker mov r6d, 64 << 24 4511*c0909341SAndroid Build Coastguard Worker mov r4d, myd 4512*c0909341SAndroid Build Coastguard Worker shr r4d, 6 4513*c0909341SAndroid Build Coastguard Worker lea r4d, [t1+r4] 4514*c0909341SAndroid Build Coastguard Worker cmovnz r6q, [base+subpel_filters+r4*8] 4515*c0909341SAndroid Build Coastguard Worker movq m10, r6q 4516*c0909341SAndroid Build Coastguard Worker punpcklbw m10, m10 4517*c0909341SAndroid Build Coastguard Worker psraw m10, 8 4518*c0909341SAndroid Build Coastguard Worker pshufd m7, m10, q0000 4519*c0909341SAndroid Build Coastguard Worker pshufd m8, m10, q1111 4520*c0909341SAndroid Build Coastguard Worker pmaddwd m5, m3, m7 4521*c0909341SAndroid Build Coastguard Worker pmaddwd m6, m0, m8 4522*c0909341SAndroid Build Coastguard Worker pshufd m9, m10, q2222 4523*c0909341SAndroid Build Coastguard Worker pshufd m10, m10, q3333 4524*c0909341SAndroid Build Coastguard Worker pmaddwd m7, m2, m9 4525*c0909341SAndroid Build Coastguard Worker pmaddwd m8, m4, m10 4526*c0909341SAndroid Build Coastguard Worker paddd m5, m6 4527*c0909341SAndroid Build Coastguard Worker paddd m7, m8 4528*c0909341SAndroid Build Coastguard Worker %else 4529*c0909341SAndroid Build Coastguard Worker mov r1, [esp+0x1f4] 4530*c0909341SAndroid Build Coastguard Worker xor r3, r3 4531*c0909341SAndroid Build Coastguard Worker mov r5, myd 4532*c0909341SAndroid Build Coastguard Worker shr r5, 6 4533*c0909341SAndroid Build Coastguard Worker lea r1, [r1+r5] 4534*c0909341SAndroid Build Coastguard Worker mov r5, 64 << 24 4535*c0909341SAndroid Build Coastguard Worker cmovnz r3, [base+subpel_filters+r1*8+4] 4536*c0909341SAndroid Build Coastguard Worker cmovnz r5, [base+subpel_filters+r1*8+0] 4537*c0909341SAndroid Build Coastguard Worker movd m6, r3 4538*c0909341SAndroid Build Coastguard Worker movd m7, r5 4539*c0909341SAndroid Build Coastguard Worker punpckldq m7, m6 4540*c0909341SAndroid Build Coastguard Worker punpcklbw m7, m7 4541*c0909341SAndroid Build Coastguard Worker psraw m7, 8 4542*c0909341SAndroid Build Coastguard Worker pshufd m5, m7, q0000 4543*c0909341SAndroid Build Coastguard Worker pshufd m6, m7, q1111 4544*c0909341SAndroid Build Coastguard Worker pmaddwd m3, m5 4545*c0909341SAndroid Build Coastguard Worker pmaddwd m0, m6 4546*c0909341SAndroid Build Coastguard Worker pshufd m5, m7, q2222 4547*c0909341SAndroid Build Coastguard Worker pshufd m7, m7, q3333 4548*c0909341SAndroid Build Coastguard Worker pmaddwd m2, m5 4549*c0909341SAndroid Build Coastguard Worker pmaddwd m4, m7 4550*c0909341SAndroid Build Coastguard Worker paddd m3, m0 4551*c0909341SAndroid Build Coastguard Worker paddd m2, m4 4552*c0909341SAndroid Build Coastguard Worker SWAP m5, m3 4553*c0909341SAndroid Build Coastguard Worker SWAP m7, m2 4554*c0909341SAndroid Build Coastguard Worker %define m8 m3 4555*c0909341SAndroid Build Coastguard Worker %endif 4556*c0909341SAndroid Build Coastguard Worker paddd m5, m13 4557*c0909341SAndroid Build Coastguard Worker pshufd m6, m12, q1032 4558*c0909341SAndroid Build Coastguard Worker pxor m8, m8 4559*c0909341SAndroid Build Coastguard Worker paddd m5, m7 4560*c0909341SAndroid Build Coastguard Worker psrad m5, m6 4561*c0909341SAndroid Build Coastguard Worker packssdw m5, m5 4562*c0909341SAndroid Build Coastguard Worker pmaxsw m5, m8 4563*c0909341SAndroid Build Coastguard Worker pminsw m5, pxmaxm 4564*c0909341SAndroid Build Coastguard Worker movd [dstq], m5 4565*c0909341SAndroid Build Coastguard Worker add dstq, dsmp 4566*c0909341SAndroid Build Coastguard Worker dec hd 4567*c0909341SAndroid Build Coastguard Worker jz .ret 4568*c0909341SAndroid Build Coastguard Worker %if ARCH_X86_64 4569*c0909341SAndroid Build Coastguard Worker add myd, dyd 4570*c0909341SAndroid Build Coastguard Worker %else 4571*c0909341SAndroid Build Coastguard Worker add myd, dym 4572*c0909341SAndroid Build Coastguard Worker %endif 4573*c0909341SAndroid Build Coastguard Worker test myd, ~0x3ff 4574*c0909341SAndroid Build Coastguard Worker %if ARCH_X86_32 4575*c0909341SAndroid Build Coastguard Worker SWAP m3, m5 4576*c0909341SAndroid Build Coastguard Worker SWAP m2, m7 4577*c0909341SAndroid Build Coastguard Worker mova m3, [stk+0x20] 4578*c0909341SAndroid Build Coastguard Worker mova m0, [stk+0x30] 4579*c0909341SAndroid Build Coastguard Worker mova m2, [stk+0x40] 4580*c0909341SAndroid Build Coastguard Worker mova m4, [stk+0x50] 4581*c0909341SAndroid Build Coastguard Worker %endif 4582*c0909341SAndroid Build Coastguard Worker jz .w2_loop 4583*c0909341SAndroid Build Coastguard Worker %if ARCH_X86_32 4584*c0909341SAndroid Build Coastguard Worker mov r3, r3m 4585*c0909341SAndroid Build Coastguard Worker %endif 4586*c0909341SAndroid Build Coastguard Worker movu m5, [srcq] 4587*c0909341SAndroid Build Coastguard Worker test myd, 0x400 4588*c0909341SAndroid Build Coastguard Worker jz .w2_skip_line 4589*c0909341SAndroid Build Coastguard Worker add srcq, ssq 4590*c0909341SAndroid Build Coastguard Worker shufps m3, m0, q1032 ; 01 12 4591*c0909341SAndroid Build Coastguard Worker shufps m0, m2, q1032 ; 23 34 4592*c0909341SAndroid Build Coastguard Worker shufps m2, m4, q1032 ; 45 56 4593*c0909341SAndroid Build Coastguard Worker pshufb m5, m14 4594*c0909341SAndroid Build Coastguard Worker pmaddwd m5, m15 4595*c0909341SAndroid Build Coastguard Worker phaddd m5, m5 4596*c0909341SAndroid Build Coastguard Worker paddd m5, m11 4597*c0909341SAndroid Build Coastguard Worker psrad m5, m12 4598*c0909341SAndroid Build Coastguard Worker packssdw m5, m5 4599*c0909341SAndroid Build Coastguard Worker palignr m4, m5, m1, 12 4600*c0909341SAndroid Build Coastguard Worker punpcklqdq m1, m4, m4 ; 6 7 6 7 4601*c0909341SAndroid Build Coastguard Worker punpcklwd m4, m1, m5 ; 67 __ 4602*c0909341SAndroid Build Coastguard Worker %if ARCH_X86_32 4603*c0909341SAndroid Build Coastguard Worker mova [stk+0x20], m3 4604*c0909341SAndroid Build Coastguard Worker mova [stk+0x30], m0 4605*c0909341SAndroid Build Coastguard Worker mova [stk+0x40], m2 4606*c0909341SAndroid Build Coastguard Worker mova [stk+0x50], m4 4607*c0909341SAndroid Build Coastguard Worker %endif 4608*c0909341SAndroid Build Coastguard Worker jmp .w2_loop 4609*c0909341SAndroid Build Coastguard Worker.w2_skip_line: 4610*c0909341SAndroid Build Coastguard Worker movu m6, [srcq+ssq*1] 4611*c0909341SAndroid Build Coastguard Worker lea srcq, [srcq+ssq*2] 4612*c0909341SAndroid Build Coastguard Worker mova m3, m0 ; 01 12 4613*c0909341SAndroid Build Coastguard Worker mova m0, m2 ; 23 34 4614*c0909341SAndroid Build Coastguard Worker pshufb m5, m14 4615*c0909341SAndroid Build Coastguard Worker pshufb m6, m14 4616*c0909341SAndroid Build Coastguard Worker pmaddwd m5, m15 4617*c0909341SAndroid Build Coastguard Worker pmaddwd m6, m15 4618*c0909341SAndroid Build Coastguard Worker phaddd m5, m6 4619*c0909341SAndroid Build Coastguard Worker paddd m5, m11 4620*c0909341SAndroid Build Coastguard Worker psrad m5, m12 4621*c0909341SAndroid Build Coastguard Worker packssdw m5, m5 ; 6 7 6 7 4622*c0909341SAndroid Build Coastguard Worker punpckhqdq m1, m5 ; 4 5 6 7 4623*c0909341SAndroid Build Coastguard Worker pshufd m5, m1, q0321 ; 5 6 7 _ 4624*c0909341SAndroid Build Coastguard Worker punpcklwd m2, m1, m5 ; 45 56 4625*c0909341SAndroid Build Coastguard Worker punpckhwd m4, m1, m5 ; 67 __ 4626*c0909341SAndroid Build Coastguard Worker %if ARCH_X86_32 4627*c0909341SAndroid Build Coastguard Worker mova [stk+0x20], m3 4628*c0909341SAndroid Build Coastguard Worker mova [stk+0x30], m0 4629*c0909341SAndroid Build Coastguard Worker mova [stk+0x40], m2 4630*c0909341SAndroid Build Coastguard Worker mova [stk+0x50], m4 4631*c0909341SAndroid Build Coastguard Worker %endif 4632*c0909341SAndroid Build Coastguard Worker jmp .w2_loop 4633*c0909341SAndroid Build Coastguard Worker%endif 4634*c0909341SAndroid Build Coastguard WorkerINIT_XMM ssse3 4635*c0909341SAndroid Build Coastguard Worker.w4: 4636*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 4637*c0909341SAndroid Build Coastguard Worker mov myd, mym 4638*c0909341SAndroid Build Coastguard Worker mova [rsp+0x10], m11 4639*c0909341SAndroid Build Coastguard Worker mova [rsp+0x20], m12 4640*c0909341SAndroid Build Coastguard Worker %if isput 4641*c0909341SAndroid Build Coastguard Worker mova [rsp+0x30], m13 4642*c0909341SAndroid Build Coastguard Worker %endif 4643*c0909341SAndroid Build Coastguard Worker movzx t0d, t0b 4644*c0909341SAndroid Build Coastguard Worker sub srcq, 2 4645*c0909341SAndroid Build Coastguard Worker movd m15, t0d 4646*c0909341SAndroid Build Coastguard Worker%else 4647*c0909341SAndroid Build Coastguard Worker %define m8 m0 4648*c0909341SAndroid Build Coastguard Worker %xdefine m14 m4 4649*c0909341SAndroid Build Coastguard Worker %define m15 m3 4650*c0909341SAndroid Build Coastguard Worker movzx r4, byte [esp+0x1f0] 4651*c0909341SAndroid Build Coastguard Worker sub srcq, 2 4652*c0909341SAndroid Build Coastguard Worker movd m15, r4 4653*c0909341SAndroid Build Coastguard Worker%endif 4654*c0909341SAndroid Build Coastguard Worker pmaddwd m8, [base+rescale_mul] 4655*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 4656*c0909341SAndroid Build Coastguard Worker mova m9, [base+pd_0x4000] 4657*c0909341SAndroid Build Coastguard Worker%else 4658*c0909341SAndroid Build Coastguard Worker %define m9 [base+pd_0x4000] 4659*c0909341SAndroid Build Coastguard Worker%endif 4660*c0909341SAndroid Build Coastguard Worker pshufd m15, m15, q0000 4661*c0909341SAndroid Build Coastguard Worker paddd m14, m8 ; mx+dx*[0-3] 4662*c0909341SAndroid Build Coastguard Worker pand m0, m14, m10 4663*c0909341SAndroid Build Coastguard Worker psrld m0, 6 4664*c0909341SAndroid Build Coastguard Worker paddd m15, m0 4665*c0909341SAndroid Build Coastguard Worker pshufd m7, m15, q1032 4666*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 4667*c0909341SAndroid Build Coastguard Worker movd r4d, m15 4668*c0909341SAndroid Build Coastguard Worker movd r11d, m7 4669*c0909341SAndroid Build Coastguard Worker pshufd m15, m15, q0321 4670*c0909341SAndroid Build Coastguard Worker pshufd m7, m7, q0321 4671*c0909341SAndroid Build Coastguard Worker movd r6d, m15 4672*c0909341SAndroid Build Coastguard Worker movd r13d, m7 4673*c0909341SAndroid Build Coastguard Worker mova m10, [base+bdct_lb_q+ 0] 4674*c0909341SAndroid Build Coastguard Worker mova m11, [base+bdct_lb_q+16] 4675*c0909341SAndroid Build Coastguard Worker movd m13, [base+subpel_filters+ r4*8+2] 4676*c0909341SAndroid Build Coastguard Worker movd m2, [base+subpel_filters+ r6*8+2] 4677*c0909341SAndroid Build Coastguard Worker movd m15, [base+subpel_filters+r11*8+2] 4678*c0909341SAndroid Build Coastguard Worker movd m4, [base+subpel_filters+r13*8+2] 4679*c0909341SAndroid Build Coastguard Worker%else 4680*c0909341SAndroid Build Coastguard Worker movd r0, m15 4681*c0909341SAndroid Build Coastguard Worker movd r4, m7 4682*c0909341SAndroid Build Coastguard Worker pshufd m15, m15, q0321 4683*c0909341SAndroid Build Coastguard Worker pshufd m7, m7, q0321 4684*c0909341SAndroid Build Coastguard Worker movd rX, m15 4685*c0909341SAndroid Build Coastguard Worker movd r5, m7 4686*c0909341SAndroid Build Coastguard Worker mova m5, [base+bdct_lb_q+ 0] 4687*c0909341SAndroid Build Coastguard Worker mova m6, [base+bdct_lb_q+16] 4688*c0909341SAndroid Build Coastguard Worker movd m1, [base+subpel_filters+r0*8+2] 4689*c0909341SAndroid Build Coastguard Worker movd m2, [base+subpel_filters+rX*8+2] 4690*c0909341SAndroid Build Coastguard Worker movd m3, [base+subpel_filters+r4*8+2] 4691*c0909341SAndroid Build Coastguard Worker movd m7, [base+subpel_filters+r5*8+2] 4692*c0909341SAndroid Build Coastguard Worker movifprep r3, r3m 4693*c0909341SAndroid Build Coastguard Worker SWAP m4, m7 4694*c0909341SAndroid Build Coastguard Worker %define m10 m5 4695*c0909341SAndroid Build Coastguard Worker %define m11 m6 4696*c0909341SAndroid Build Coastguard Worker %define m12 m1 4697*c0909341SAndroid Build Coastguard Worker %define m13 m1 4698*c0909341SAndroid Build Coastguard Worker%endif 4699*c0909341SAndroid Build Coastguard Worker psrld m14, 10 4700*c0909341SAndroid Build Coastguard Worker paddd m14, m14 4701*c0909341SAndroid Build Coastguard Worker punpckldq m13, m2 4702*c0909341SAndroid Build Coastguard Worker punpckldq m15, m4 4703*c0909341SAndroid Build Coastguard Worker punpcklqdq m13, m15 4704*c0909341SAndroid Build Coastguard Worker pxor m2, m2 4705*c0909341SAndroid Build Coastguard Worker pcmpeqd m0, m2 4706*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 4707*c0909341SAndroid Build Coastguard Worker pand m9, m0 4708*c0909341SAndroid Build Coastguard Worker%else 4709*c0909341SAndroid Build Coastguard Worker pand m2, m9, m0 4710*c0909341SAndroid Build Coastguard Worker %define m9 m2 4711*c0909341SAndroid Build Coastguard Worker SWAP m7, m4 4712*c0909341SAndroid Build Coastguard Worker%endif 4713*c0909341SAndroid Build Coastguard Worker pandn m0, m13 4714*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 4715*c0909341SAndroid Build Coastguard Worker SWAP m13, m0 4716*c0909341SAndroid Build Coastguard Worker%else 4717*c0909341SAndroid Build Coastguard Worker %define m13 m0 4718*c0909341SAndroid Build Coastguard Worker%endif 4719*c0909341SAndroid Build Coastguard Worker por m13, m9 4720*c0909341SAndroid Build Coastguard Worker punpckhbw m15, m13, m13 4721*c0909341SAndroid Build Coastguard Worker punpcklbw m13, m13 4722*c0909341SAndroid Build Coastguard Worker psraw m15, 8 4723*c0909341SAndroid Build Coastguard Worker psraw m13, 8 4724*c0909341SAndroid Build Coastguard Worker pshufb m12, m14, m10 4725*c0909341SAndroid Build Coastguard Worker pshufb m14, m11 4726*c0909341SAndroid Build Coastguard Worker mova m10, [base+spel_s_shuf2] 4727*c0909341SAndroid Build Coastguard Worker movd r4d, m14 4728*c0909341SAndroid Build Coastguard Worker shr r4d, 24 4729*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_32 4730*c0909341SAndroid Build Coastguard Worker mova [stk+0x20], m13 4731*c0909341SAndroid Build Coastguard Worker mova [stk+0x30], m15 4732*c0909341SAndroid Build Coastguard Worker pxor m2, m2 4733*c0909341SAndroid Build Coastguard Worker%endif 4734*c0909341SAndroid Build Coastguard Worker pshufb m7, m14, m2 4735*c0909341SAndroid Build Coastguard Worker psubb m14, m7 4736*c0909341SAndroid Build Coastguard Worker paddb m12, m10 4737*c0909341SAndroid Build Coastguard Worker paddb m14, m10 4738*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 4739*c0909341SAndroid Build Coastguard Worker lea r6, [r4+ssq*1] 4740*c0909341SAndroid Build Coastguard Worker lea r11, [r4+ssq*2] 4741*c0909341SAndroid Build Coastguard Worker lea r13, [r4+ss3q ] 4742*c0909341SAndroid Build Coastguard Worker movu m7, [srcq+ssq*0] 4743*c0909341SAndroid Build Coastguard Worker movu m9, [srcq+ssq*1] 4744*c0909341SAndroid Build Coastguard Worker movu m8, [srcq+ssq*2] 4745*c0909341SAndroid Build Coastguard Worker movu m10, [srcq+ss3q ] 4746*c0909341SAndroid Build Coastguard Worker movu m1, [srcq+r4 ] 4747*c0909341SAndroid Build Coastguard Worker movu m3, [srcq+r6 ] 4748*c0909341SAndroid Build Coastguard Worker movu m2, [srcq+r11 ] 4749*c0909341SAndroid Build Coastguard Worker movu m4, [srcq+r13 ] 4750*c0909341SAndroid Build Coastguard Worker lea srcq, [srcq+ssq*4] 4751*c0909341SAndroid Build Coastguard Worker REPX {pshufb x, m12}, m7, m9, m8, m10 4752*c0909341SAndroid Build Coastguard Worker REPX {pmaddwd x, m13}, m7, m9, m8, m10 4753*c0909341SAndroid Build Coastguard Worker REPX {pshufb x, m14}, m1, m2, m3, m4 4754*c0909341SAndroid Build Coastguard Worker REPX {pmaddwd x, m15}, m1, m2, m3, m4 4755*c0909341SAndroid Build Coastguard Worker mova m5, [rsp+0x10] 4756*c0909341SAndroid Build Coastguard Worker movd xm6, [rsp+0x20] 4757*c0909341SAndroid Build Coastguard Worker phaddd m7, m1 4758*c0909341SAndroid Build Coastguard Worker phaddd m9, m3 4759*c0909341SAndroid Build Coastguard Worker phaddd m8, m2 4760*c0909341SAndroid Build Coastguard Worker phaddd m10, m4 4761*c0909341SAndroid Build Coastguard Worker movu m1, [srcq+ssq*0] 4762*c0909341SAndroid Build Coastguard Worker movu m2, [srcq+ssq*1] 4763*c0909341SAndroid Build Coastguard Worker movu m3, [srcq+ssq*2] 4764*c0909341SAndroid Build Coastguard Worker movu m4, [srcq+ss3q ] 4765*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m5}, m7, m9, m8, m10 4766*c0909341SAndroid Build Coastguard Worker REPX {psrad x, xm6}, m7, m9, m8, m10 4767*c0909341SAndroid Build Coastguard Worker packssdw m7, m9 ; 0 1 4768*c0909341SAndroid Build Coastguard Worker packssdw m8, m10 ; 2 3 4769*c0909341SAndroid Build Coastguard Worker movu m0, [srcq+r4 ] 4770*c0909341SAndroid Build Coastguard Worker movu m9, [srcq+r6 ] 4771*c0909341SAndroid Build Coastguard Worker movu m10, [srcq+r11 ] 4772*c0909341SAndroid Build Coastguard Worker movu m11, [srcq+r13 ] 4773*c0909341SAndroid Build Coastguard Worker lea srcq, [srcq+ssq*4] 4774*c0909341SAndroid Build Coastguard Worker REPX {pshufb x, m12}, m1, m2, m3, m4 4775*c0909341SAndroid Build Coastguard Worker REPX {pmaddwd x, m13}, m1, m2, m3, m4 4776*c0909341SAndroid Build Coastguard Worker REPX {pshufb x, m14}, m0, m9, m10, m11 4777*c0909341SAndroid Build Coastguard Worker REPX {pmaddwd x, m15}, m0, m9, m10, m11 4778*c0909341SAndroid Build Coastguard Worker phaddd m1, m0 4779*c0909341SAndroid Build Coastguard Worker phaddd m2, m9 4780*c0909341SAndroid Build Coastguard Worker phaddd m3, m10 4781*c0909341SAndroid Build Coastguard Worker phaddd m4, m11 4782*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m5}, m1, m2, m3, m4 4783*c0909341SAndroid Build Coastguard Worker REPX {psrad x, xm6}, m1, m2, m3, m4 4784*c0909341SAndroid Build Coastguard Worker packssdw m1, m2 ; 4 5 4785*c0909341SAndroid Build Coastguard Worker packssdw m3, m4 ; 6 7 4786*c0909341SAndroid Build Coastguard Worker SWAP m9, m1 4787*c0909341SAndroid Build Coastguard Worker shufps m4, m7, m8, q1032 ; 1 2 4788*c0909341SAndroid Build Coastguard Worker shufps m5, m8, m9, q1032 ; 3 4 4789*c0909341SAndroid Build Coastguard Worker shufps m6, m9, m3, q1032 ; 5 6 4790*c0909341SAndroid Build Coastguard Worker pshufd m10, m3, q1032 ; 7 _ 4791*c0909341SAndroid Build Coastguard Worker punpcklwd m0, m7, m4 ; 01 4792*c0909341SAndroid Build Coastguard Worker punpckhwd m7, m4 ; 12 4793*c0909341SAndroid Build Coastguard Worker punpcklwd m1, m8, m5 ; 23 4794*c0909341SAndroid Build Coastguard Worker punpckhwd m8, m5 ; 34 4795*c0909341SAndroid Build Coastguard Worker punpcklwd m2, m9, m6 ; 45 4796*c0909341SAndroid Build Coastguard Worker punpckhwd m9, m6 ; 56 4797*c0909341SAndroid Build Coastguard Worker punpcklwd m3, m10 ; 67 4798*c0909341SAndroid Build Coastguard Worker mova [rsp+0x40], m7 4799*c0909341SAndroid Build Coastguard Worker mova [rsp+0x50], m8 4800*c0909341SAndroid Build Coastguard Worker mova [rsp+0x60], m9 4801*c0909341SAndroid Build Coastguard Worker%else 4802*c0909341SAndroid Build Coastguard Worker mova [stk+0x00], m12 4803*c0909341SAndroid Build Coastguard Worker mova [stk+0x10], m14 4804*c0909341SAndroid Build Coastguard Worker add r4, srcq 4805*c0909341SAndroid Build Coastguard Worker MC_4TAP_SCALED_H 0x40 ; 0 1 4806*c0909341SAndroid Build Coastguard Worker MC_4TAP_SCALED_H 0x50 ; 2 3 4807*c0909341SAndroid Build Coastguard Worker MC_4TAP_SCALED_H 0x60 ; 4 5 4808*c0909341SAndroid Build Coastguard Worker MC_4TAP_SCALED_H 0x70 ; 6 7 4809*c0909341SAndroid Build Coastguard Worker mova m4, [stk+0x40] 4810*c0909341SAndroid Build Coastguard Worker mova m5, [stk+0x50] 4811*c0909341SAndroid Build Coastguard Worker mova m6, [stk+0x60] 4812*c0909341SAndroid Build Coastguard Worker mova m7, [stk+0x70] 4813*c0909341SAndroid Build Coastguard Worker mov [stk+0xc0], r4 4814*c0909341SAndroid Build Coastguard Worker shufps m1, m4, m5, q1032 ; 1 2 4815*c0909341SAndroid Build Coastguard Worker shufps m2, m5, m6, q1032 ; 3 4 4816*c0909341SAndroid Build Coastguard Worker shufps m3, m6, m7, q1032 ; 5 6 4817*c0909341SAndroid Build Coastguard Worker pshufd m0, m7, q1032 ; 7 _ 4818*c0909341SAndroid Build Coastguard Worker mova [stk+0xb0], m0 4819*c0909341SAndroid Build Coastguard Worker punpcklwd m0, m4, m1 ; 01 4820*c0909341SAndroid Build Coastguard Worker punpckhwd m4, m1 ; 12 4821*c0909341SAndroid Build Coastguard Worker punpcklwd m1, m5, m2 ; 23 4822*c0909341SAndroid Build Coastguard Worker punpckhwd m5, m2 ; 34 4823*c0909341SAndroid Build Coastguard Worker punpcklwd m2, m6, m3 ; 45 4824*c0909341SAndroid Build Coastguard Worker punpckhwd m6, m3 ; 56 4825*c0909341SAndroid Build Coastguard Worker punpcklwd m3, m7, [stk+0xb0] ; 67 4826*c0909341SAndroid Build Coastguard Worker mov myd, mym 4827*c0909341SAndroid Build Coastguard Worker mov r0, r0m 4828*c0909341SAndroid Build Coastguard Worker mova [stk+0x40], m0 ; 01 4829*c0909341SAndroid Build Coastguard Worker mova [stk+0x50], m1 ; 23 4830*c0909341SAndroid Build Coastguard Worker mova [stk+0x60], m2 ; 45 4831*c0909341SAndroid Build Coastguard Worker mova [stk+0x70], m3 ; 67 4832*c0909341SAndroid Build Coastguard Worker mova [stk+0x80], m4 ; 12 4833*c0909341SAndroid Build Coastguard Worker mova [stk+0x90], m5 ; 34 4834*c0909341SAndroid Build Coastguard Worker mova [stk+0xa0], m6 ; 56 4835*c0909341SAndroid Build Coastguard Worker %define m12 [stk+0x00] 4836*c0909341SAndroid Build Coastguard Worker %define m14 [stk+0x10] 4837*c0909341SAndroid Build Coastguard Worker %define m13 [stk+0x20] 4838*c0909341SAndroid Build Coastguard Worker %define m15 [stk+0x30] 4839*c0909341SAndroid Build Coastguard Worker %define hrnd_mem [esp+0x00] 4840*c0909341SAndroid Build Coastguard Worker %define hsh_mem [esp+0x10] 4841*c0909341SAndroid Build Coastguard Worker %if isput 4842*c0909341SAndroid Build Coastguard Worker %define vrnd_mem [esp+0x20] 4843*c0909341SAndroid Build Coastguard Worker %else 4844*c0909341SAndroid Build Coastguard Worker %define vrnd_mem [base+pd_m524256] 4845*c0909341SAndroid Build Coastguard Worker %endif 4846*c0909341SAndroid Build Coastguard Worker%endif 4847*c0909341SAndroid Build Coastguard Worker.w4_loop: 4848*c0909341SAndroid Build Coastguard Worker and myd, 0x3ff 4849*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 4850*c0909341SAndroid Build Coastguard Worker mov r11d, 64 << 24 4851*c0909341SAndroid Build Coastguard Worker mov r13d, myd 4852*c0909341SAndroid Build Coastguard Worker shr r13d, 6 4853*c0909341SAndroid Build Coastguard Worker lea r13d, [t1+r13] 4854*c0909341SAndroid Build Coastguard Worker cmovnz r11q, [base+subpel_filters+r13*8] 4855*c0909341SAndroid Build Coastguard Worker movq m9, r11q 4856*c0909341SAndroid Build Coastguard Worker punpcklbw m9, m9 4857*c0909341SAndroid Build Coastguard Worker psraw m9, 8 4858*c0909341SAndroid Build Coastguard Worker pshufd m7, m9, q0000 4859*c0909341SAndroid Build Coastguard Worker pshufd m8, m9, q1111 4860*c0909341SAndroid Build Coastguard Worker pmaddwd m4, m0, m7 4861*c0909341SAndroid Build Coastguard Worker pmaddwd m5, m1, m8 4862*c0909341SAndroid Build Coastguard Worker pshufd m7, m9, q2222 4863*c0909341SAndroid Build Coastguard Worker pshufd m9, m9, q3333 4864*c0909341SAndroid Build Coastguard Worker pmaddwd m6, m2, m7 4865*c0909341SAndroid Build Coastguard Worker pmaddwd m8, m3, m9 4866*c0909341SAndroid Build Coastguard Worker %if isput 4867*c0909341SAndroid Build Coastguard Worker movd m9, [rsp+0x28] 4868*c0909341SAndroid Build Coastguard Worker %define vrnd_mem [rsp+0x30] 4869*c0909341SAndroid Build Coastguard Worker %else 4870*c0909341SAndroid Build Coastguard Worker %define vrnd_mem [base+pd_m524256] 4871*c0909341SAndroid Build Coastguard Worker %endif 4872*c0909341SAndroid Build Coastguard Worker paddd m4, m5 4873*c0909341SAndroid Build Coastguard Worker paddd m6, m8 4874*c0909341SAndroid Build Coastguard Worker paddd m4, m6 4875*c0909341SAndroid Build Coastguard Worker paddd m4, vrnd_mem 4876*c0909341SAndroid Build Coastguard Worker%else 4877*c0909341SAndroid Build Coastguard Worker mov mym, myd 4878*c0909341SAndroid Build Coastguard Worker mov r5, [esp+0x1f4] 4879*c0909341SAndroid Build Coastguard Worker xor r3, r3 4880*c0909341SAndroid Build Coastguard Worker shr r4, 6 4881*c0909341SAndroid Build Coastguard Worker lea r5, [r5+r4] 4882*c0909341SAndroid Build Coastguard Worker mov r4, 64 << 24 4883*c0909341SAndroid Build Coastguard Worker cmovnz r4, [base+subpel_filters+r5*8+0] 4884*c0909341SAndroid Build Coastguard Worker cmovnz r3, [base+subpel_filters+r5*8+4] 4885*c0909341SAndroid Build Coastguard Worker movd m7, r4 4886*c0909341SAndroid Build Coastguard Worker movd m6, r3 4887*c0909341SAndroid Build Coastguard Worker punpckldq m7, m6 4888*c0909341SAndroid Build Coastguard Worker punpcklbw m7, m7 4889*c0909341SAndroid Build Coastguard Worker psraw m7, 8 4890*c0909341SAndroid Build Coastguard Worker pshufd m4, m7, q0000 4891*c0909341SAndroid Build Coastguard Worker pshufd m5, m7, q1111 4892*c0909341SAndroid Build Coastguard Worker pshufd m6, m7, q2222 4893*c0909341SAndroid Build Coastguard Worker pshufd m7, m7, q3333 4894*c0909341SAndroid Build Coastguard Worker pmaddwd m0, m4 4895*c0909341SAndroid Build Coastguard Worker pmaddwd m1, m5 4896*c0909341SAndroid Build Coastguard Worker pmaddwd m2, m6 4897*c0909341SAndroid Build Coastguard Worker pmaddwd m3, m7 4898*c0909341SAndroid Build Coastguard Worker %if isput 4899*c0909341SAndroid Build Coastguard Worker movd m4, [esp+0x18] 4900*c0909341SAndroid Build Coastguard Worker %endif 4901*c0909341SAndroid Build Coastguard Worker paddd m0, m1 4902*c0909341SAndroid Build Coastguard Worker paddd m2, m3 4903*c0909341SAndroid Build Coastguard Worker paddd m0, vrnd_mem 4904*c0909341SAndroid Build Coastguard Worker paddd m0, m2 4905*c0909341SAndroid Build Coastguard Worker SWAP m4, m0 4906*c0909341SAndroid Build Coastguard Worker %define m9 m0 4907*c0909341SAndroid Build Coastguard Worker%endif 4908*c0909341SAndroid Build Coastguard Worker%if isput 4909*c0909341SAndroid Build Coastguard Worker pxor m5, m5 4910*c0909341SAndroid Build Coastguard Worker psrad m4, m9 4911*c0909341SAndroid Build Coastguard Worker packssdw m4, m4 4912*c0909341SAndroid Build Coastguard Worker pmaxsw m4, m5 4913*c0909341SAndroid Build Coastguard Worker pminsw m4, pxmaxm 4914*c0909341SAndroid Build Coastguard Worker movq [dstq], m4 4915*c0909341SAndroid Build Coastguard Worker add dstq, dsmp 4916*c0909341SAndroid Build Coastguard Worker%else 4917*c0909341SAndroid Build Coastguard Worker psrad m4, 6 4918*c0909341SAndroid Build Coastguard Worker packssdw m4, m4 4919*c0909341SAndroid Build Coastguard Worker movq [tmpq], m4 4920*c0909341SAndroid Build Coastguard Worker add tmpq, 8 4921*c0909341SAndroid Build Coastguard Worker%endif 4922*c0909341SAndroid Build Coastguard Worker dec hd 4923*c0909341SAndroid Build Coastguard Worker jz .ret 4924*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 4925*c0909341SAndroid Build Coastguard Worker add myd, dyd 4926*c0909341SAndroid Build Coastguard Worker test myd, ~0x3ff 4927*c0909341SAndroid Build Coastguard Worker jz .w4_loop 4928*c0909341SAndroid Build Coastguard Worker mova m8, [rsp+0x10] 4929*c0909341SAndroid Build Coastguard Worker movd m9, [rsp+0x20] 4930*c0909341SAndroid Build Coastguard Worker movu m4, [srcq] 4931*c0909341SAndroid Build Coastguard Worker movu m5, [srcq+r4] 4932*c0909341SAndroid Build Coastguard Worker test myd, 0x400 4933*c0909341SAndroid Build Coastguard Worker jz .w4_skip_line 4934*c0909341SAndroid Build Coastguard Worker mova m0, [rsp+0x40] 4935*c0909341SAndroid Build Coastguard Worker mova [rsp+0x40], m1 4936*c0909341SAndroid Build Coastguard Worker mova m1, [rsp+0x50] 4937*c0909341SAndroid Build Coastguard Worker mova [rsp+0x50], m2 4938*c0909341SAndroid Build Coastguard Worker mova m2, [rsp+0x60] 4939*c0909341SAndroid Build Coastguard Worker mova [rsp+0x60], m3 4940*c0909341SAndroid Build Coastguard Worker pshufb m4, m12 4941*c0909341SAndroid Build Coastguard Worker pshufb m5, m14 4942*c0909341SAndroid Build Coastguard Worker pmaddwd m4, m13 4943*c0909341SAndroid Build Coastguard Worker pmaddwd m5, m15 4944*c0909341SAndroid Build Coastguard Worker phaddd m4, m5 4945*c0909341SAndroid Build Coastguard Worker paddd m4, m8 4946*c0909341SAndroid Build Coastguard Worker psrad m4, m9 4947*c0909341SAndroid Build Coastguard Worker packssdw m4, m4 4948*c0909341SAndroid Build Coastguard Worker punpcklwd m3, m10, m4 4949*c0909341SAndroid Build Coastguard Worker mova m10, m4 4950*c0909341SAndroid Build Coastguard Worker add srcq, ssq 4951*c0909341SAndroid Build Coastguard Worker jmp .w4_loop 4952*c0909341SAndroid Build Coastguard Worker.w4_skip_line: 4953*c0909341SAndroid Build Coastguard Worker movu m6, [srcq+ssq*1] 4954*c0909341SAndroid Build Coastguard Worker movu m7, [srcq+r6] 4955*c0909341SAndroid Build Coastguard Worker mova m0, [rsp+0x50] 4956*c0909341SAndroid Build Coastguard Worker mova m11, [rsp+0x60] 4957*c0909341SAndroid Build Coastguard Worker pshufb m4, m12 4958*c0909341SAndroid Build Coastguard Worker pshufb m6, m12 4959*c0909341SAndroid Build Coastguard Worker pshufb m5, m14 4960*c0909341SAndroid Build Coastguard Worker pshufb m7, m14 4961*c0909341SAndroid Build Coastguard Worker pmaddwd m4, m13 4962*c0909341SAndroid Build Coastguard Worker pmaddwd m6, m13 4963*c0909341SAndroid Build Coastguard Worker pmaddwd m5, m15 4964*c0909341SAndroid Build Coastguard Worker pmaddwd m7, m15 4965*c0909341SAndroid Build Coastguard Worker mova [rsp+0x40], m0 4966*c0909341SAndroid Build Coastguard Worker mova [rsp+0x50], m11 4967*c0909341SAndroid Build Coastguard Worker phaddd m4, m5 4968*c0909341SAndroid Build Coastguard Worker phaddd m6, m7 4969*c0909341SAndroid Build Coastguard Worker paddd m4, m8 4970*c0909341SAndroid Build Coastguard Worker paddd m6, m8 4971*c0909341SAndroid Build Coastguard Worker psrad m4, m9 4972*c0909341SAndroid Build Coastguard Worker psrad m6, m9 4973*c0909341SAndroid Build Coastguard Worker packssdw m4, m6 4974*c0909341SAndroid Build Coastguard Worker punpcklwd m9, m10, m4 4975*c0909341SAndroid Build Coastguard Worker mova [rsp+0x60], m9 4976*c0909341SAndroid Build Coastguard Worker pshufd m10, m4, q1032 4977*c0909341SAndroid Build Coastguard Worker mova m0, m1 4978*c0909341SAndroid Build Coastguard Worker mova m1, m2 4979*c0909341SAndroid Build Coastguard Worker mova m2, m3 4980*c0909341SAndroid Build Coastguard Worker punpcklwd m3, m4, m10 4981*c0909341SAndroid Build Coastguard Worker lea srcq, [srcq+ssq*2] 4982*c0909341SAndroid Build Coastguard Worker jmp .w4_loop 4983*c0909341SAndroid Build Coastguard Worker%else 4984*c0909341SAndroid Build Coastguard Worker SWAP m0, m4 4985*c0909341SAndroid Build Coastguard Worker mov myd, mym 4986*c0909341SAndroid Build Coastguard Worker mov r3, r3m 4987*c0909341SAndroid Build Coastguard Worker add myd, dym 4988*c0909341SAndroid Build Coastguard Worker test myd, ~0x3ff 4989*c0909341SAndroid Build Coastguard Worker jnz .w4_next_line 4990*c0909341SAndroid Build Coastguard Worker mova m0, [stk+0x40] 4991*c0909341SAndroid Build Coastguard Worker mova m1, [stk+0x50] 4992*c0909341SAndroid Build Coastguard Worker mova m2, [stk+0x60] 4993*c0909341SAndroid Build Coastguard Worker mova m3, [stk+0x70] 4994*c0909341SAndroid Build Coastguard Worker jmp .w4_loop 4995*c0909341SAndroid Build Coastguard Worker.w4_next_line: 4996*c0909341SAndroid Build Coastguard Worker mov r5, [stk+0xc0] 4997*c0909341SAndroid Build Coastguard Worker movu m4, [srcq] 4998*c0909341SAndroid Build Coastguard Worker movu m5, [r5] 4999*c0909341SAndroid Build Coastguard Worker test myd, 0x400 5000*c0909341SAndroid Build Coastguard Worker jz .w4_skip_line 5001*c0909341SAndroid Build Coastguard Worker add [stk+0xc0], ssq 5002*c0909341SAndroid Build Coastguard Worker mova m0, [stk+0x80] 5003*c0909341SAndroid Build Coastguard Worker mova m3, [stk+0x50] 5004*c0909341SAndroid Build Coastguard Worker mova [stk+0x40], m0 5005*c0909341SAndroid Build Coastguard Worker mova [stk+0x80], m3 5006*c0909341SAndroid Build Coastguard Worker mova m1, [stk+0x90] 5007*c0909341SAndroid Build Coastguard Worker mova m6, [stk+0x60] 5008*c0909341SAndroid Build Coastguard Worker mova [stk+0x50], m1 5009*c0909341SAndroid Build Coastguard Worker mova [stk+0x90], m6 5010*c0909341SAndroid Build Coastguard Worker mova m2, [stk+0xa0] 5011*c0909341SAndroid Build Coastguard Worker mova m7, [stk+0x70] 5012*c0909341SAndroid Build Coastguard Worker mova [stk+0x60], m2 5013*c0909341SAndroid Build Coastguard Worker mova [stk+0xa0], m7 5014*c0909341SAndroid Build Coastguard Worker pshufb m4, m12 5015*c0909341SAndroid Build Coastguard Worker pshufb m5, m14 5016*c0909341SAndroid Build Coastguard Worker pmaddwd m4, m13 5017*c0909341SAndroid Build Coastguard Worker pmaddwd m5, m15 5018*c0909341SAndroid Build Coastguard Worker phaddd m4, m5 5019*c0909341SAndroid Build Coastguard Worker paddd m4, hrnd_mem 5020*c0909341SAndroid Build Coastguard Worker psrad m4, hsh_mem 5021*c0909341SAndroid Build Coastguard Worker packssdw m4, m4 5022*c0909341SAndroid Build Coastguard Worker punpcklwd m3, [stk+0xb0], m4 5023*c0909341SAndroid Build Coastguard Worker mova [stk+0xb0], m4 5024*c0909341SAndroid Build Coastguard Worker mova [stk+0x70], m3 5025*c0909341SAndroid Build Coastguard Worker add srcq, ssq 5026*c0909341SAndroid Build Coastguard Worker jmp .w4_loop 5027*c0909341SAndroid Build Coastguard Worker.w4_skip_line: 5028*c0909341SAndroid Build Coastguard Worker movu m6, [srcq+ssq*1] 5029*c0909341SAndroid Build Coastguard Worker movu m7, [r5 +ssq*1] 5030*c0909341SAndroid Build Coastguard Worker lea r5, [r5 +ssq*2] 5031*c0909341SAndroid Build Coastguard Worker mov [stk+0xc0], r5 5032*c0909341SAndroid Build Coastguard Worker mova m0, [stk+0x50] 5033*c0909341SAndroid Build Coastguard Worker mova m1, [stk+0x60] 5034*c0909341SAndroid Build Coastguard Worker mova m2, [stk+0x70] 5035*c0909341SAndroid Build Coastguard Worker mova m3, [stk+0x90] 5036*c0909341SAndroid Build Coastguard Worker pshufb m4, m12 5037*c0909341SAndroid Build Coastguard Worker pshufb m6, m12 5038*c0909341SAndroid Build Coastguard Worker pshufb m5, m14 5039*c0909341SAndroid Build Coastguard Worker pshufb m7, m14 5040*c0909341SAndroid Build Coastguard Worker pmaddwd m4, m13 5041*c0909341SAndroid Build Coastguard Worker pmaddwd m6, m13 5042*c0909341SAndroid Build Coastguard Worker pmaddwd m5, m15 5043*c0909341SAndroid Build Coastguard Worker pmaddwd m7, m15 5044*c0909341SAndroid Build Coastguard Worker mova [stk+0x40], m0 5045*c0909341SAndroid Build Coastguard Worker mova [stk+0x50], m1 5046*c0909341SAndroid Build Coastguard Worker mova [stk+0x60], m2 5047*c0909341SAndroid Build Coastguard Worker mova [stk+0x80], m3 5048*c0909341SAndroid Build Coastguard Worker phaddd m4, m5 5049*c0909341SAndroid Build Coastguard Worker phaddd m6, m7 5050*c0909341SAndroid Build Coastguard Worker mova m5, [stk+0xa0] 5051*c0909341SAndroid Build Coastguard Worker mova m7, [stk+0xb0] 5052*c0909341SAndroid Build Coastguard Worker paddd m4, hrnd_mem 5053*c0909341SAndroid Build Coastguard Worker paddd m6, hrnd_mem 5054*c0909341SAndroid Build Coastguard Worker psrad m4, hsh_mem 5055*c0909341SAndroid Build Coastguard Worker psrad m6, hsh_mem 5056*c0909341SAndroid Build Coastguard Worker packssdw m4, m6 5057*c0909341SAndroid Build Coastguard Worker punpcklwd m7, m4 5058*c0909341SAndroid Build Coastguard Worker pshufd m6, m4, q1032 5059*c0909341SAndroid Build Coastguard Worker mova [stk+0x90], m5 5060*c0909341SAndroid Build Coastguard Worker mova [stk+0xa0], m7 5061*c0909341SAndroid Build Coastguard Worker mova [stk+0xb0], m6 5062*c0909341SAndroid Build Coastguard Worker punpcklwd m3, m4, m6 5063*c0909341SAndroid Build Coastguard Worker mova [stk+0x70], m3 5064*c0909341SAndroid Build Coastguard Worker lea srcq, [srcq+ssq*2] 5065*c0909341SAndroid Build Coastguard Worker jmp .w4_loop 5066*c0909341SAndroid Build Coastguard Worker%endif 5067*c0909341SAndroid Build Coastguard WorkerINIT_XMM ssse3 5068*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 5069*c0909341SAndroid Build Coastguard Worker %define stk rsp+0x20 5070*c0909341SAndroid Build Coastguard Worker%endif 5071*c0909341SAndroid Build Coastguard Worker.w8: 5072*c0909341SAndroid Build Coastguard Worker mov dword [stk+0xf0], 1 5073*c0909341SAndroid Build Coastguard Worker movifprep tmp_stridem, 16 5074*c0909341SAndroid Build Coastguard Worker jmp .w_start 5075*c0909341SAndroid Build Coastguard Worker.w16: 5076*c0909341SAndroid Build Coastguard Worker mov dword [stk+0xf0], 2 5077*c0909341SAndroid Build Coastguard Worker movifprep tmp_stridem, 32 5078*c0909341SAndroid Build Coastguard Worker jmp .w_start 5079*c0909341SAndroid Build Coastguard Worker.w32: 5080*c0909341SAndroid Build Coastguard Worker mov dword [stk+0xf0], 4 5081*c0909341SAndroid Build Coastguard Worker movifprep tmp_stridem, 64 5082*c0909341SAndroid Build Coastguard Worker jmp .w_start 5083*c0909341SAndroid Build Coastguard Worker.w64: 5084*c0909341SAndroid Build Coastguard Worker mov dword [stk+0xf0], 8 5085*c0909341SAndroid Build Coastguard Worker movifprep tmp_stridem, 128 5086*c0909341SAndroid Build Coastguard Worker jmp .w_start 5087*c0909341SAndroid Build Coastguard Worker.w128: 5088*c0909341SAndroid Build Coastguard Worker mov dword [stk+0xf0], 16 5089*c0909341SAndroid Build Coastguard Worker movifprep tmp_stridem, 256 5090*c0909341SAndroid Build Coastguard Worker.w_start: 5091*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 5092*c0909341SAndroid Build Coastguard Worker %ifidn %1, put 5093*c0909341SAndroid Build Coastguard Worker movifnidn dsm, dsq 5094*c0909341SAndroid Build Coastguard Worker %endif 5095*c0909341SAndroid Build Coastguard Worker mova [rsp+0x10], m11 5096*c0909341SAndroid Build Coastguard Worker %define hround m11 5097*c0909341SAndroid Build Coastguard Worker shr t0d, 16 5098*c0909341SAndroid Build Coastguard Worker movd m15, t0d 5099*c0909341SAndroid Build Coastguard Worker %if isprep 5100*c0909341SAndroid Build Coastguard Worker mova m13, [base+pd_m524256] 5101*c0909341SAndroid Build Coastguard Worker %endif 5102*c0909341SAndroid Build Coastguard Worker%else 5103*c0909341SAndroid Build Coastguard Worker %define hround [esp+0x00] 5104*c0909341SAndroid Build Coastguard Worker %define m12 [esp+0x10] 5105*c0909341SAndroid Build Coastguard Worker %define m10 [base+pd_0x3ff] 5106*c0909341SAndroid Build Coastguard Worker %define m8 m0 5107*c0909341SAndroid Build Coastguard Worker %xdefine m14 m4 5108*c0909341SAndroid Build Coastguard Worker %define m15 m3 5109*c0909341SAndroid Build Coastguard Worker %if isprep 5110*c0909341SAndroid Build Coastguard Worker %define ssq ssm 5111*c0909341SAndroid Build Coastguard Worker %endif 5112*c0909341SAndroid Build Coastguard Worker mov r4, [esp+0x1f0] 5113*c0909341SAndroid Build Coastguard Worker shr r4, 16 5114*c0909341SAndroid Build Coastguard Worker movd m15, r4 5115*c0909341SAndroid Build Coastguard Worker mov r0, r0m 5116*c0909341SAndroid Build Coastguard Worker mov myd, mym 5117*c0909341SAndroid Build Coastguard Worker%endif 5118*c0909341SAndroid Build Coastguard Worker sub srcq, 6 5119*c0909341SAndroid Build Coastguard Worker pslld m7, m8, 2 ; dx*4 5120*c0909341SAndroid Build Coastguard Worker pmaddwd m8, [base+rescale_mul] ; dx*[0-3] 5121*c0909341SAndroid Build Coastguard Worker pshufd m15, m15, q0000 5122*c0909341SAndroid Build Coastguard Worker paddd m14, m8 ; mx+dx*[0-3] 5123*c0909341SAndroid Build Coastguard Worker mova [stk+0x100], m7 5124*c0909341SAndroid Build Coastguard Worker mova [stk+0x120], m15 5125*c0909341SAndroid Build Coastguard Worker mov [stk+0x0f8], srcq 5126*c0909341SAndroid Build Coastguard Worker mov [stk+0x130], r0q ; dstq / tmpq 5127*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 && UNIX64 5128*c0909341SAndroid Build Coastguard Worker mov hm, hd 5129*c0909341SAndroid Build Coastguard Worker%elif ARCH_X86_32 5130*c0909341SAndroid Build Coastguard Worker mov r5, hm 5131*c0909341SAndroid Build Coastguard Worker mov [stk+0x0f4], myd 5132*c0909341SAndroid Build Coastguard Worker mov [stk+0x134], r5 5133*c0909341SAndroid Build Coastguard Worker%endif 5134*c0909341SAndroid Build Coastguard Worker jmp .hloop 5135*c0909341SAndroid Build Coastguard Worker.hloop_prep: 5136*c0909341SAndroid Build Coastguard Worker dec dword [stk+0x0f0] 5137*c0909341SAndroid Build Coastguard Worker jz .ret 5138*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 5139*c0909341SAndroid Build Coastguard Worker add qword [stk+0x130], 16 5140*c0909341SAndroid Build Coastguard Worker mov hd, hm 5141*c0909341SAndroid Build Coastguard Worker%else 5142*c0909341SAndroid Build Coastguard Worker add dword [stk+0x130], 16 5143*c0909341SAndroid Build Coastguard Worker mov myd, [stk+0x0f4] 5144*c0909341SAndroid Build Coastguard Worker mov r5, [stk+0x134] 5145*c0909341SAndroid Build Coastguard Worker mov r0, [stk+0x130] 5146*c0909341SAndroid Build Coastguard Worker%endif 5147*c0909341SAndroid Build Coastguard Worker mova m7, [stk+0x100] 5148*c0909341SAndroid Build Coastguard Worker mova m14, [stk+0x110] 5149*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 5150*c0909341SAndroid Build Coastguard Worker mova m10, [base+pd_0x3ff] 5151*c0909341SAndroid Build Coastguard Worker mova m11, [rsp+0x10] 5152*c0909341SAndroid Build Coastguard Worker%endif 5153*c0909341SAndroid Build Coastguard Worker mova m15, [stk+0x120] 5154*c0909341SAndroid Build Coastguard Worker mov srcq, [stk+0x0f8] 5155*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 5156*c0909341SAndroid Build Coastguard Worker mov r0q, [stk+0x130] ; dstq / tmpq 5157*c0909341SAndroid Build Coastguard Worker%else 5158*c0909341SAndroid Build Coastguard Worker mov mym, myd 5159*c0909341SAndroid Build Coastguard Worker mov hm, r5 5160*c0909341SAndroid Build Coastguard Worker mov r0m, r0 5161*c0909341SAndroid Build Coastguard Worker mov r3, r3m 5162*c0909341SAndroid Build Coastguard Worker%endif 5163*c0909341SAndroid Build Coastguard Worker paddd m14, m7 5164*c0909341SAndroid Build Coastguard Worker.hloop: 5165*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 5166*c0909341SAndroid Build Coastguard Worker mova m9, [base+pq_0x40000000] 5167*c0909341SAndroid Build Coastguard Worker%else 5168*c0909341SAndroid Build Coastguard Worker %define m9 [base+pq_0x40000000] 5169*c0909341SAndroid Build Coastguard Worker%endif 5170*c0909341SAndroid Build Coastguard Worker pxor m1, m1 5171*c0909341SAndroid Build Coastguard Worker psrld m2, m14, 10 5172*c0909341SAndroid Build Coastguard Worker mova [stk], m2 5173*c0909341SAndroid Build Coastguard Worker pand m6, m14, m10 5174*c0909341SAndroid Build Coastguard Worker psrld m6, 6 5175*c0909341SAndroid Build Coastguard Worker paddd m5, m15, m6 5176*c0909341SAndroid Build Coastguard Worker pcmpeqd m6, m1 5177*c0909341SAndroid Build Coastguard Worker pshufd m2, m5, q1032 5178*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 5179*c0909341SAndroid Build Coastguard Worker movd r4d, m5 5180*c0909341SAndroid Build Coastguard Worker movd r6d, m2 5181*c0909341SAndroid Build Coastguard Worker pshufd m5, m5, q0321 5182*c0909341SAndroid Build Coastguard Worker pshufd m2, m2, q0321 5183*c0909341SAndroid Build Coastguard Worker movd r7d, m5 5184*c0909341SAndroid Build Coastguard Worker movd r9d, m2 5185*c0909341SAndroid Build Coastguard Worker movq m0, [base+subpel_filters+r4*8] 5186*c0909341SAndroid Build Coastguard Worker movq m1, [base+subpel_filters+r6*8] 5187*c0909341SAndroid Build Coastguard Worker movhps m0, [base+subpel_filters+r7*8] 5188*c0909341SAndroid Build Coastguard Worker movhps m1, [base+subpel_filters+r9*8] 5189*c0909341SAndroid Build Coastguard Worker%else 5190*c0909341SAndroid Build Coastguard Worker movd r0, m5 5191*c0909341SAndroid Build Coastguard Worker movd rX, m2 5192*c0909341SAndroid Build Coastguard Worker pshufd m5, m5, q0321 5193*c0909341SAndroid Build Coastguard Worker pshufd m2, m2, q0321 5194*c0909341SAndroid Build Coastguard Worker movd r4, m5 5195*c0909341SAndroid Build Coastguard Worker movd r5, m2 5196*c0909341SAndroid Build Coastguard Worker movq m0, [base+subpel_filters+r0*8] 5197*c0909341SAndroid Build Coastguard Worker movq m1, [base+subpel_filters+rX*8] 5198*c0909341SAndroid Build Coastguard Worker movhps m0, [base+subpel_filters+r4*8] 5199*c0909341SAndroid Build Coastguard Worker movhps m1, [base+subpel_filters+r5*8] 5200*c0909341SAndroid Build Coastguard Worker%endif 5201*c0909341SAndroid Build Coastguard Worker paddd m14, m7 ; mx+dx*[4-7] 5202*c0909341SAndroid Build Coastguard Worker pand m5, m14, m10 5203*c0909341SAndroid Build Coastguard Worker psrld m5, 6 5204*c0909341SAndroid Build Coastguard Worker paddd m15, m5 5205*c0909341SAndroid Build Coastguard Worker pxor m2, m2 5206*c0909341SAndroid Build Coastguard Worker pcmpeqd m5, m2 5207*c0909341SAndroid Build Coastguard Worker mova [stk+0x110], m14 5208*c0909341SAndroid Build Coastguard Worker pshufd m4, m15, q1032 5209*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 5210*c0909341SAndroid Build Coastguard Worker movd r10d, m15 5211*c0909341SAndroid Build Coastguard Worker movd r11d, m4 5212*c0909341SAndroid Build Coastguard Worker pshufd m15, m15, q0321 5213*c0909341SAndroid Build Coastguard Worker pshufd m4, m4, q0321 5214*c0909341SAndroid Build Coastguard Worker movd r13d, m15 5215*c0909341SAndroid Build Coastguard Worker movd rXd, m4 5216*c0909341SAndroid Build Coastguard Worker movq m2, [base+subpel_filters+r10*8] 5217*c0909341SAndroid Build Coastguard Worker movq m3, [base+subpel_filters+r11*8] 5218*c0909341SAndroid Build Coastguard Worker movhps m2, [base+subpel_filters+r13*8] 5219*c0909341SAndroid Build Coastguard Worker movhps m3, [base+subpel_filters+ rX*8] 5220*c0909341SAndroid Build Coastguard Worker psrld m14, 10 5221*c0909341SAndroid Build Coastguard Worker movq r11, m14 5222*c0909341SAndroid Build Coastguard Worker punpckhqdq m14, m14 5223*c0909341SAndroid Build Coastguard Worker movq rX, m14 5224*c0909341SAndroid Build Coastguard Worker mov r10d, r11d 5225*c0909341SAndroid Build Coastguard Worker shr r11, 32 5226*c0909341SAndroid Build Coastguard Worker mov r13d, rXd 5227*c0909341SAndroid Build Coastguard Worker shr rX, 32 5228*c0909341SAndroid Build Coastguard Worker mov r4d, [stk+ 0] 5229*c0909341SAndroid Build Coastguard Worker mov r6d, [stk+ 4] 5230*c0909341SAndroid Build Coastguard Worker mov r7d, [stk+ 8] 5231*c0909341SAndroid Build Coastguard Worker mov r9d, [stk+12] 5232*c0909341SAndroid Build Coastguard Worker pshufd m4, m6, q1100 5233*c0909341SAndroid Build Coastguard Worker pshufd m6, m6, q3322 5234*c0909341SAndroid Build Coastguard Worker pshufd m14, m5, q1100 5235*c0909341SAndroid Build Coastguard Worker pshufd m5, m5, q3322 5236*c0909341SAndroid Build Coastguard Worker pand m7, m9, m4 5237*c0909341SAndroid Build Coastguard Worker pand m8, m9, m6 5238*c0909341SAndroid Build Coastguard Worker pand m15, m9, m14 5239*c0909341SAndroid Build Coastguard Worker pand m9, m9, m5 5240*c0909341SAndroid Build Coastguard Worker pandn m4, m0 5241*c0909341SAndroid Build Coastguard Worker pandn m6, m1 5242*c0909341SAndroid Build Coastguard Worker pandn m14, m2 5243*c0909341SAndroid Build Coastguard Worker pandn m5, m3 5244*c0909341SAndroid Build Coastguard Worker por m7, m4 5245*c0909341SAndroid Build Coastguard Worker por m8, m6 5246*c0909341SAndroid Build Coastguard Worker por m15, m14 5247*c0909341SAndroid Build Coastguard Worker por m9, m5 5248*c0909341SAndroid Build Coastguard Worker punpcklbw m0, m7, m7 5249*c0909341SAndroid Build Coastguard Worker punpckhbw m7, m7 5250*c0909341SAndroid Build Coastguard Worker punpcklbw m1, m8, m8 5251*c0909341SAndroid Build Coastguard Worker punpckhbw m8, m8 5252*c0909341SAndroid Build Coastguard Worker psraw m0, 8 5253*c0909341SAndroid Build Coastguard Worker psraw m7, 8 5254*c0909341SAndroid Build Coastguard Worker psraw m1, 8 5255*c0909341SAndroid Build Coastguard Worker psraw m8, 8 5256*c0909341SAndroid Build Coastguard Worker punpcklbw m2, m15, m15 5257*c0909341SAndroid Build Coastguard Worker punpckhbw m15, m15 5258*c0909341SAndroid Build Coastguard Worker punpcklbw m3, m9, m9 5259*c0909341SAndroid Build Coastguard Worker punpckhbw m9, m9 5260*c0909341SAndroid Build Coastguard Worker psraw m2, 8 5261*c0909341SAndroid Build Coastguard Worker psraw m15, 8 5262*c0909341SAndroid Build Coastguard Worker psraw m3, 8 5263*c0909341SAndroid Build Coastguard Worker psraw m9, 8 5264*c0909341SAndroid Build Coastguard Worker mova [stk+0x10], m0 5265*c0909341SAndroid Build Coastguard Worker mova [stk+0x20], m7 5266*c0909341SAndroid Build Coastguard Worker mova [stk+0x30], m1 5267*c0909341SAndroid Build Coastguard Worker mova [stk+0x40], m8 5268*c0909341SAndroid Build Coastguard Worker mova [stk+0x50], m2 5269*c0909341SAndroid Build Coastguard Worker mova [stk+0x60], m15 5270*c0909341SAndroid Build Coastguard Worker mova [stk+0x70], m3 5271*c0909341SAndroid Build Coastguard Worker mova [stk+0x80], m9 5272*c0909341SAndroid Build Coastguard Worker MC_8TAP_SCALED_H 1, 2, 3, 4, 5, 6, 9, 10 ; 0 5273*c0909341SAndroid Build Coastguard Worker mova [stk+0x90], m1 5274*c0909341SAndroid Build Coastguard Worker MC_8TAP_SCALED_H 2, 3, 4, 5, 6, 1, 9, 10 ; 1 5275*c0909341SAndroid Build Coastguard Worker mova [stk+0xa0], m2 5276*c0909341SAndroid Build Coastguard Worker MC_8TAP_SCALED_H 3, 4, 5, 6, 1, 2, 9, 10 ; 2 5277*c0909341SAndroid Build Coastguard Worker mova [stk+0xb0], m3 5278*c0909341SAndroid Build Coastguard Worker MC_8TAP_SCALED_H 4, 5, 6, 1, 2, 3, 9, 10 ; 3 5279*c0909341SAndroid Build Coastguard Worker mova [stk+0xc0], m4 5280*c0909341SAndroid Build Coastguard Worker MC_8TAP_SCALED_H 5, 6, 1, 2, 3, 4, 9, 10 ; 4 5281*c0909341SAndroid Build Coastguard Worker mova [stk+0xd0], m5 5282*c0909341SAndroid Build Coastguard Worker MC_8TAP_SCALED_H 6, 1, 2, 3, 4, 5, 9, 10 ; 5 5283*c0909341SAndroid Build Coastguard Worker MC_8TAP_SCALED_H 7, 1, 2, 3, 4, 5, 9, 10 ; 6 5284*c0909341SAndroid Build Coastguard Worker MC_8TAP_SCALED_H 8, 1, 2, 3, 4, 5, 9, 10 ; 7 5285*c0909341SAndroid Build Coastguard Worker mova m5, [stk+0xd0] 5286*c0909341SAndroid Build Coastguard Worker mova m1, [stk+0x90] 5287*c0909341SAndroid Build Coastguard Worker mova m2, [stk+0xa0] 5288*c0909341SAndroid Build Coastguard Worker mova m3, [stk+0xb0] 5289*c0909341SAndroid Build Coastguard Worker mova m9, [stk+0xc0] 5290*c0909341SAndroid Build Coastguard Worker mov myd, mym 5291*c0909341SAndroid Build Coastguard Worker mov dyd, dym 5292*c0909341SAndroid Build Coastguard Worker punpcklwd m4, m5, m6 ; 45a 5293*c0909341SAndroid Build Coastguard Worker punpckhwd m5, m6 ; 45b 5294*c0909341SAndroid Build Coastguard Worker punpcklwd m6, m7, m8 ; 67a 5295*c0909341SAndroid Build Coastguard Worker punpckhwd m7, m8 ; 67b 5296*c0909341SAndroid Build Coastguard Worker punpcklwd m0, m1, m2 ; 01a 5297*c0909341SAndroid Build Coastguard Worker punpckhwd m1, m2 ; 01b 5298*c0909341SAndroid Build Coastguard Worker punpcklwd m2, m3, m9 ; 23a 5299*c0909341SAndroid Build Coastguard Worker punpckhwd m3, m9 ; 23b 5300*c0909341SAndroid Build Coastguard Worker mova [stk+0x90], m4 5301*c0909341SAndroid Build Coastguard Worker mova [stk+0xa0], m5 5302*c0909341SAndroid Build Coastguard Worker mova [stk+0xb0], m6 5303*c0909341SAndroid Build Coastguard Worker mova [stk+0xc0], m7 5304*c0909341SAndroid Build Coastguard Worker %define hround [rsp+0x10] 5305*c0909341SAndroid Build Coastguard Worker.vloop: 5306*c0909341SAndroid Build Coastguard Worker and myd, 0x3ff 5307*c0909341SAndroid Build Coastguard Worker mov r6d, 64 << 24 5308*c0909341SAndroid Build Coastguard Worker mov r4d, myd 5309*c0909341SAndroid Build Coastguard Worker shr r4d, 6 5310*c0909341SAndroid Build Coastguard Worker lea r4d, [t1+r4] 5311*c0909341SAndroid Build Coastguard Worker cmovnz r6q, [base+subpel_filters+r4*8] 5312*c0909341SAndroid Build Coastguard Worker movq m11, r6q 5313*c0909341SAndroid Build Coastguard Worker punpcklbw m11, m11 5314*c0909341SAndroid Build Coastguard Worker psraw m11, 8 5315*c0909341SAndroid Build Coastguard Worker pshufd m5, m11, q0000 5316*c0909341SAndroid Build Coastguard Worker pshufd m7, m11, q1111 5317*c0909341SAndroid Build Coastguard Worker pshufd m10, m11, q2222 5318*c0909341SAndroid Build Coastguard Worker pshufd m11, m11, q3333 5319*c0909341SAndroid Build Coastguard Worker pmaddwd m4, m5, m0 5320*c0909341SAndroid Build Coastguard Worker pmaddwd m5, m5, m1 5321*c0909341SAndroid Build Coastguard Worker pmaddwd m6, m7, m2 5322*c0909341SAndroid Build Coastguard Worker pmaddwd m7, m7, m3 5323*c0909341SAndroid Build Coastguard Worker paddd m4, m13 5324*c0909341SAndroid Build Coastguard Worker paddd m5, m13 5325*c0909341SAndroid Build Coastguard Worker paddd m4, m6 5326*c0909341SAndroid Build Coastguard Worker paddd m5, m7 5327*c0909341SAndroid Build Coastguard Worker pmaddwd m6, [stk+0x90], m10 5328*c0909341SAndroid Build Coastguard Worker pmaddwd m7, [stk+0xa0], m10 5329*c0909341SAndroid Build Coastguard Worker pmaddwd m8, [stk+0xb0], m11 5330*c0909341SAndroid Build Coastguard Worker pmaddwd m9, [stk+0xc0], m11 5331*c0909341SAndroid Build Coastguard Worker paddd m4, m6 5332*c0909341SAndroid Build Coastguard Worker paddd m5, m7 5333*c0909341SAndroid Build Coastguard Worker %if isput 5334*c0909341SAndroid Build Coastguard Worker pshufd m6, m12, q1032 5335*c0909341SAndroid Build Coastguard Worker %endif 5336*c0909341SAndroid Build Coastguard Worker paddd m4, m8 5337*c0909341SAndroid Build Coastguard Worker paddd m5, m9 5338*c0909341SAndroid Build Coastguard Worker%else 5339*c0909341SAndroid Build Coastguard Worker movd r0, m15 5340*c0909341SAndroid Build Coastguard Worker movd rX, m4 5341*c0909341SAndroid Build Coastguard Worker pshufd m15, m15, q0321 5342*c0909341SAndroid Build Coastguard Worker pshufd m4, m4, q0321 5343*c0909341SAndroid Build Coastguard Worker movd r4, m15 5344*c0909341SAndroid Build Coastguard Worker movd r5, m4 5345*c0909341SAndroid Build Coastguard Worker mova m14, [stk+0x110] 5346*c0909341SAndroid Build Coastguard Worker movq m2, [base+subpel_filters+r0*8] 5347*c0909341SAndroid Build Coastguard Worker movq m3, [base+subpel_filters+rX*8] 5348*c0909341SAndroid Build Coastguard Worker movhps m2, [base+subpel_filters+r4*8] 5349*c0909341SAndroid Build Coastguard Worker movhps m3, [base+subpel_filters+r5*8] 5350*c0909341SAndroid Build Coastguard Worker psrld m14, 10 5351*c0909341SAndroid Build Coastguard Worker mova [stk+16], m14 5352*c0909341SAndroid Build Coastguard Worker mov r0, [stk+ 0] 5353*c0909341SAndroid Build Coastguard Worker mov rX, [stk+ 4] 5354*c0909341SAndroid Build Coastguard Worker mov r4, [stk+ 8] 5355*c0909341SAndroid Build Coastguard Worker mov r5, [stk+12] 5356*c0909341SAndroid Build Coastguard Worker mova [stk+0x20], m0 5357*c0909341SAndroid Build Coastguard Worker mova [stk+0x30], m1 5358*c0909341SAndroid Build Coastguard Worker mova [stk+0x40], m2 5359*c0909341SAndroid Build Coastguard Worker mova [stk+0x50], m3 5360*c0909341SAndroid Build Coastguard Worker pshufd m4, m6, q1100 5361*c0909341SAndroid Build Coastguard Worker pshufd m6, m6, q3322 5362*c0909341SAndroid Build Coastguard Worker pshufd m7, m5, q1100 5363*c0909341SAndroid Build Coastguard Worker pshufd m5, m5, q3322 5364*c0909341SAndroid Build Coastguard Worker pand m0, m9, m4 5365*c0909341SAndroid Build Coastguard Worker pand m1, m9, m6 5366*c0909341SAndroid Build Coastguard Worker pand m2, m9, m7 5367*c0909341SAndroid Build Coastguard Worker pand m3, m9, m5 5368*c0909341SAndroid Build Coastguard Worker pandn m4, [stk+0x20] 5369*c0909341SAndroid Build Coastguard Worker pandn m6, [stk+0x30] 5370*c0909341SAndroid Build Coastguard Worker pandn m7, [stk+0x40] 5371*c0909341SAndroid Build Coastguard Worker pandn m5, [stk+0x50] 5372*c0909341SAndroid Build Coastguard Worker por m0, m4 5373*c0909341SAndroid Build Coastguard Worker por m1, m6 5374*c0909341SAndroid Build Coastguard Worker por m2, m7 5375*c0909341SAndroid Build Coastguard Worker por m3, m5 5376*c0909341SAndroid Build Coastguard Worker punpcklbw m4, m0, m0 5377*c0909341SAndroid Build Coastguard Worker punpckhbw m0, m0 5378*c0909341SAndroid Build Coastguard Worker punpcklbw m5, m1, m1 5379*c0909341SAndroid Build Coastguard Worker punpckhbw m1, m1 5380*c0909341SAndroid Build Coastguard Worker psraw m4, 8 5381*c0909341SAndroid Build Coastguard Worker psraw m0, 8 5382*c0909341SAndroid Build Coastguard Worker psraw m5, 8 5383*c0909341SAndroid Build Coastguard Worker psraw m1, 8 5384*c0909341SAndroid Build Coastguard Worker punpcklbw m6, m2, m2 5385*c0909341SAndroid Build Coastguard Worker punpckhbw m2, m2 5386*c0909341SAndroid Build Coastguard Worker punpcklbw m7, m3, m3 5387*c0909341SAndroid Build Coastguard Worker punpckhbw m3, m3 5388*c0909341SAndroid Build Coastguard Worker psraw m6, 8 5389*c0909341SAndroid Build Coastguard Worker psraw m2, 8 5390*c0909341SAndroid Build Coastguard Worker psraw m7, 8 5391*c0909341SAndroid Build Coastguard Worker psraw m3, 8 5392*c0909341SAndroid Build Coastguard Worker mova [stk+0x0a0], m4 5393*c0909341SAndroid Build Coastguard Worker mova [stk+0x0b0], m0 5394*c0909341SAndroid Build Coastguard Worker mova [stk+0x0c0], m5 5395*c0909341SAndroid Build Coastguard Worker mova [stk+0x0d0], m1 5396*c0909341SAndroid Build Coastguard Worker mova [stk+0x140], m6 5397*c0909341SAndroid Build Coastguard Worker mova [stk+0x150], m2 5398*c0909341SAndroid Build Coastguard Worker mova [stk+0x160], m7 5399*c0909341SAndroid Build Coastguard Worker mova [stk+0x170], m3 5400*c0909341SAndroid Build Coastguard Worker MC_8TAP_SCALED_H 0xa0, 0x20, 0 ; 0 5401*c0909341SAndroid Build Coastguard Worker MC_8TAP_SCALED_H 0xa0, 0x30 ; 1 5402*c0909341SAndroid Build Coastguard Worker MC_8TAP_SCALED_H 0xa0, 0x40 ; 2 5403*c0909341SAndroid Build Coastguard Worker MC_8TAP_SCALED_H 0xa0, 0x50 ; 3 5404*c0909341SAndroid Build Coastguard Worker MC_8TAP_SCALED_H 0xa0, 0x60 ; 4 5405*c0909341SAndroid Build Coastguard Worker MC_8TAP_SCALED_H 0xa0, 0x70 ; 5 5406*c0909341SAndroid Build Coastguard Worker MC_8TAP_SCALED_H 0xa0, 0x80 ; 6 5407*c0909341SAndroid Build Coastguard Worker MC_8TAP_SCALED_H 0xa0, 0x90 ; 7 5408*c0909341SAndroid Build Coastguard Worker mova m5, [stk+0x60] 5409*c0909341SAndroid Build Coastguard Worker mova m6, [stk+0x70] 5410*c0909341SAndroid Build Coastguard Worker mova m7, [stk+0x80] 5411*c0909341SAndroid Build Coastguard Worker mova m0, [stk+0x90] 5412*c0909341SAndroid Build Coastguard Worker mov myd, mym 5413*c0909341SAndroid Build Coastguard Worker punpcklwd m4, m5, m6 ; 45a 5414*c0909341SAndroid Build Coastguard Worker punpckhwd m5, m6 ; 45b 5415*c0909341SAndroid Build Coastguard Worker punpcklwd m6, m7, m0 ; 67a 5416*c0909341SAndroid Build Coastguard Worker punpckhwd m7, m0 ; 67b 5417*c0909341SAndroid Build Coastguard Worker mova [stk+0x60], m4 5418*c0909341SAndroid Build Coastguard Worker mova [stk+0x70], m5 5419*c0909341SAndroid Build Coastguard Worker mova [stk+0x80], m6 5420*c0909341SAndroid Build Coastguard Worker mova [stk+0x90], m7 5421*c0909341SAndroid Build Coastguard Worker mova m1, [stk+0x20] 5422*c0909341SAndroid Build Coastguard Worker mova m2, [stk+0x30] 5423*c0909341SAndroid Build Coastguard Worker mova m3, [stk+0x40] 5424*c0909341SAndroid Build Coastguard Worker mova m4, [stk+0x50] 5425*c0909341SAndroid Build Coastguard Worker punpcklwd m0, m1, m2 ; 01a 5426*c0909341SAndroid Build Coastguard Worker punpckhwd m1, m2 ; 01b 5427*c0909341SAndroid Build Coastguard Worker punpcklwd m2, m3, m4 ; 23a 5428*c0909341SAndroid Build Coastguard Worker punpckhwd m3, m4 ; 23b 5429*c0909341SAndroid Build Coastguard Worker mova [stk+0x20], m0 5430*c0909341SAndroid Build Coastguard Worker mova [stk+0x30], m1 5431*c0909341SAndroid Build Coastguard Worker mova [stk+0x40], m2 5432*c0909341SAndroid Build Coastguard Worker mova [stk+0x50], m3 5433*c0909341SAndroid Build Coastguard Worker.vloop: 5434*c0909341SAndroid Build Coastguard Worker mov r0, r0m 5435*c0909341SAndroid Build Coastguard Worker mov r5, [esp+0x1f4] 5436*c0909341SAndroid Build Coastguard Worker and myd, 0x3ff 5437*c0909341SAndroid Build Coastguard Worker mov mym, myd 5438*c0909341SAndroid Build Coastguard Worker xor r3, r3 5439*c0909341SAndroid Build Coastguard Worker shr r4, 6 5440*c0909341SAndroid Build Coastguard Worker lea r5, [r5+r4] 5441*c0909341SAndroid Build Coastguard Worker mov r4, 64 << 24 5442*c0909341SAndroid Build Coastguard Worker cmovnz r4, [base+subpel_filters+r5*8+0] 5443*c0909341SAndroid Build Coastguard Worker cmovnz r3, [base+subpel_filters+r5*8+4] 5444*c0909341SAndroid Build Coastguard Worker movd m7, r4 5445*c0909341SAndroid Build Coastguard Worker movd m6, r3 5446*c0909341SAndroid Build Coastguard Worker punpckldq m7, m6 5447*c0909341SAndroid Build Coastguard Worker punpcklbw m7, m7 5448*c0909341SAndroid Build Coastguard Worker psraw m7, 8 5449*c0909341SAndroid Build Coastguard Worker pshufd m4, m7, q0000 5450*c0909341SAndroid Build Coastguard Worker pshufd m5, m7, q1111 5451*c0909341SAndroid Build Coastguard Worker pmaddwd m0, m4 5452*c0909341SAndroid Build Coastguard Worker pmaddwd m1, m4 5453*c0909341SAndroid Build Coastguard Worker pmaddwd m2, m5 5454*c0909341SAndroid Build Coastguard Worker pmaddwd m3, m5 5455*c0909341SAndroid Build Coastguard Worker pshufd m6, m7, q2222 5456*c0909341SAndroid Build Coastguard Worker pshufd m7, m7, q3333 5457*c0909341SAndroid Build Coastguard Worker paddd m0, m2 5458*c0909341SAndroid Build Coastguard Worker paddd m1, m3 5459*c0909341SAndroid Build Coastguard Worker pmaddwd m2, [stk+0x60], m6 5460*c0909341SAndroid Build Coastguard Worker pmaddwd m3, [stk+0x70], m6 5461*c0909341SAndroid Build Coastguard Worker pmaddwd m4, [stk+0x80], m7 5462*c0909341SAndroid Build Coastguard Worker pmaddwd m5, [stk+0x90], m7 5463*c0909341SAndroid Build Coastguard Worker %if isput 5464*c0909341SAndroid Build Coastguard Worker movd m6, [esp+0x18] 5465*c0909341SAndroid Build Coastguard Worker %endif 5466*c0909341SAndroid Build Coastguard Worker paddd m0, m2 5467*c0909341SAndroid Build Coastguard Worker paddd m1, m3 5468*c0909341SAndroid Build Coastguard Worker paddd m0, vrnd_mem 5469*c0909341SAndroid Build Coastguard Worker paddd m1, vrnd_mem 5470*c0909341SAndroid Build Coastguard Worker paddd m4, m0 5471*c0909341SAndroid Build Coastguard Worker paddd m5, m1 5472*c0909341SAndroid Build Coastguard Worker%endif 5473*c0909341SAndroid Build Coastguard Worker%ifidn %1, put 5474*c0909341SAndroid Build Coastguard Worker psrad m4, m6 5475*c0909341SAndroid Build Coastguard Worker psrad m5, m6 5476*c0909341SAndroid Build Coastguard Worker packssdw m4, m5 5477*c0909341SAndroid Build Coastguard Worker pxor m7, m7 5478*c0909341SAndroid Build Coastguard Worker pmaxsw m4, m7 5479*c0909341SAndroid Build Coastguard Worker pminsw m4, pxmaxm 5480*c0909341SAndroid Build Coastguard Worker mova [dstq], m4 5481*c0909341SAndroid Build Coastguard Worker add dstq, dsm 5482*c0909341SAndroid Build Coastguard Worker%else 5483*c0909341SAndroid Build Coastguard Worker psrad m4, 6 5484*c0909341SAndroid Build Coastguard Worker psrad m5, 6 5485*c0909341SAndroid Build Coastguard Worker packssdw m4, m5 5486*c0909341SAndroid Build Coastguard Worker mova [tmpq], m4 5487*c0909341SAndroid Build Coastguard Worker add tmpq, tmp_stridem 5488*c0909341SAndroid Build Coastguard Worker%endif 5489*c0909341SAndroid Build Coastguard Worker dec hd 5490*c0909341SAndroid Build Coastguard Worker jz .hloop_prep 5491*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 5492*c0909341SAndroid Build Coastguard Worker add myd, dyd 5493*c0909341SAndroid Build Coastguard Worker test myd, ~0x3ff 5494*c0909341SAndroid Build Coastguard Worker jz .vloop 5495*c0909341SAndroid Build Coastguard Worker test myd, 0x400 5496*c0909341SAndroid Build Coastguard Worker mov [stk+0x140], myd 5497*c0909341SAndroid Build Coastguard Worker mov r4d, [stk+ 0] 5498*c0909341SAndroid Build Coastguard Worker mov r6d, [stk+ 4] 5499*c0909341SAndroid Build Coastguard Worker mov r7d, [stk+ 8] 5500*c0909341SAndroid Build Coastguard Worker mov r9d, [stk+12] 5501*c0909341SAndroid Build Coastguard Worker jz .skip_line 5502*c0909341SAndroid Build Coastguard Worker mova m14, [base+unpckw] 5503*c0909341SAndroid Build Coastguard Worker movu m8, [srcq+r10*2] 5504*c0909341SAndroid Build Coastguard Worker movu m9, [srcq+r11*2] 5505*c0909341SAndroid Build Coastguard Worker movu m10, [srcq+r13*2] 5506*c0909341SAndroid Build Coastguard Worker movu m11, [srcq+ rX*2] 5507*c0909341SAndroid Build Coastguard Worker movu m4, [srcq+ r4*2] 5508*c0909341SAndroid Build Coastguard Worker movu m5, [srcq+ r6*2] 5509*c0909341SAndroid Build Coastguard Worker movu m6, [srcq+ r7*2] 5510*c0909341SAndroid Build Coastguard Worker movu m7, [srcq+ r9*2] 5511*c0909341SAndroid Build Coastguard Worker add srcq, ssq 5512*c0909341SAndroid Build Coastguard Worker mov myd, [stk+0x140] 5513*c0909341SAndroid Build Coastguard Worker mov dyd, dym 5514*c0909341SAndroid Build Coastguard Worker pshufd m15, m14, q1032 5515*c0909341SAndroid Build Coastguard Worker pshufb m0, m14 ; 0a 1a 5516*c0909341SAndroid Build Coastguard Worker pshufb m1, m14 ; 0b 1b 5517*c0909341SAndroid Build Coastguard Worker pshufb m2, m15 ; 3a 2a 5518*c0909341SAndroid Build Coastguard Worker pshufb m3, m15 ; 3b 2b 5519*c0909341SAndroid Build Coastguard Worker pmaddwd m8, [stk+0x50] 5520*c0909341SAndroid Build Coastguard Worker pmaddwd m9, [stk+0x60] 5521*c0909341SAndroid Build Coastguard Worker pmaddwd m10, [stk+0x70] 5522*c0909341SAndroid Build Coastguard Worker pmaddwd m11, [stk+0x80] 5523*c0909341SAndroid Build Coastguard Worker pmaddwd m4, [stk+0x10] 5524*c0909341SAndroid Build Coastguard Worker pmaddwd m5, [stk+0x20] 5525*c0909341SAndroid Build Coastguard Worker pmaddwd m6, [stk+0x30] 5526*c0909341SAndroid Build Coastguard Worker pmaddwd m7, [stk+0x40] 5527*c0909341SAndroid Build Coastguard Worker phaddd m8, m9 5528*c0909341SAndroid Build Coastguard Worker phaddd m10, m11 5529*c0909341SAndroid Build Coastguard Worker mova m11, hround 5530*c0909341SAndroid Build Coastguard Worker phaddd m4, m5 5531*c0909341SAndroid Build Coastguard Worker phaddd m6, m7 5532*c0909341SAndroid Build Coastguard Worker phaddd m8, m10 5533*c0909341SAndroid Build Coastguard Worker phaddd m4, m6 5534*c0909341SAndroid Build Coastguard Worker paddd m4, m11 5535*c0909341SAndroid Build Coastguard Worker paddd m8, m11 5536*c0909341SAndroid Build Coastguard Worker psrad m4, m12 5537*c0909341SAndroid Build Coastguard Worker psrad m8, m12 5538*c0909341SAndroid Build Coastguard Worker packssdw m4, m8 5539*c0909341SAndroid Build Coastguard Worker pshufb m5, [stk+0x90], m14 ; 4a 5a 5540*c0909341SAndroid Build Coastguard Worker pshufb m6, [stk+0xa0], m14 ; 4b 5b 5541*c0909341SAndroid Build Coastguard Worker pshufb m7, [stk+0xb0], m15 ; 7a 6a 5542*c0909341SAndroid Build Coastguard Worker pshufb m8, [stk+0xc0], m15 ; 7b 6b 5543*c0909341SAndroid Build Coastguard Worker punpckhwd m0, m2 ; 12a 5544*c0909341SAndroid Build Coastguard Worker punpckhwd m1, m3 ; 12b 5545*c0909341SAndroid Build Coastguard Worker punpcklwd m2, m5 ; 34a 5546*c0909341SAndroid Build Coastguard Worker punpcklwd m3, m6 ; 34b 5547*c0909341SAndroid Build Coastguard Worker punpckhwd m5, m7 ; 56a 5548*c0909341SAndroid Build Coastguard Worker punpckhwd m6, m8 ; 56b 5549*c0909341SAndroid Build Coastguard Worker punpcklwd m7, m4 ; 78a 5550*c0909341SAndroid Build Coastguard Worker punpckhqdq m4, m4 5551*c0909341SAndroid Build Coastguard Worker punpcklwd m8, m4 ; 78b 5552*c0909341SAndroid Build Coastguard Worker mova [stk+0x90], m5 5553*c0909341SAndroid Build Coastguard Worker mova [stk+0xa0], m6 5554*c0909341SAndroid Build Coastguard Worker mova [stk+0xb0], m7 5555*c0909341SAndroid Build Coastguard Worker mova [stk+0xc0], m8 5556*c0909341SAndroid Build Coastguard Worker jmp .vloop 5557*c0909341SAndroid Build Coastguard Worker.skip_line: 5558*c0909341SAndroid Build Coastguard Worker MC_8TAP_SCALED_H 4, 8, 5, 6, 7, 9, 10, 11 5559*c0909341SAndroid Build Coastguard Worker MC_8TAP_SCALED_H 8, 5, 6, 7, 9, 0, 10, 11 5560*c0909341SAndroid Build Coastguard Worker mov myd, [stk+0x140] 5561*c0909341SAndroid Build Coastguard Worker mov dyd, dym 5562*c0909341SAndroid Build Coastguard Worker mova m0, m2 ; 01a 5563*c0909341SAndroid Build Coastguard Worker mova m1, m3 ; 01b 5564*c0909341SAndroid Build Coastguard Worker mova m2, [stk+0x90] ; 23a 5565*c0909341SAndroid Build Coastguard Worker mova m3, [stk+0xa0] ; 23b 5566*c0909341SAndroid Build Coastguard Worker mova m5, [stk+0xb0] ; 45a 5567*c0909341SAndroid Build Coastguard Worker mova m6, [stk+0xc0] ; 45b 5568*c0909341SAndroid Build Coastguard Worker punpcklwd m7, m4, m8 ; 67a 5569*c0909341SAndroid Build Coastguard Worker punpckhwd m4, m8 ; 67b 5570*c0909341SAndroid Build Coastguard Worker mova [stk+0x90], m5 5571*c0909341SAndroid Build Coastguard Worker mova [stk+0xa0], m6 5572*c0909341SAndroid Build Coastguard Worker mova [stk+0xb0], m7 5573*c0909341SAndroid Build Coastguard Worker mova [stk+0xc0], m4 5574*c0909341SAndroid Build Coastguard Worker%else 5575*c0909341SAndroid Build Coastguard Worker mov r0m, r0 5576*c0909341SAndroid Build Coastguard Worker mov myd, mym 5577*c0909341SAndroid Build Coastguard Worker mov r3, r3m 5578*c0909341SAndroid Build Coastguard Worker add myd, dym 5579*c0909341SAndroid Build Coastguard Worker test myd, ~0x3ff 5580*c0909341SAndroid Build Coastguard Worker mov mym, myd 5581*c0909341SAndroid Build Coastguard Worker jnz .next_line 5582*c0909341SAndroid Build Coastguard Worker mova m0, [stk+0x20] 5583*c0909341SAndroid Build Coastguard Worker mova m1, [stk+0x30] 5584*c0909341SAndroid Build Coastguard Worker mova m2, [stk+0x40] 5585*c0909341SAndroid Build Coastguard Worker mova m3, [stk+0x50] 5586*c0909341SAndroid Build Coastguard Worker jmp .vloop 5587*c0909341SAndroid Build Coastguard Worker.next_line: 5588*c0909341SAndroid Build Coastguard Worker test myd, 0x400 5589*c0909341SAndroid Build Coastguard Worker mov r0, [stk+ 0] 5590*c0909341SAndroid Build Coastguard Worker mov rX, [stk+ 4] 5591*c0909341SAndroid Build Coastguard Worker mov r4, [stk+ 8] 5592*c0909341SAndroid Build Coastguard Worker mov r5, [stk+12] 5593*c0909341SAndroid Build Coastguard Worker jz .skip_line 5594*c0909341SAndroid Build Coastguard Worker MC_8TAP_SCALED_H 0xa0, 0xe0, 0 ; 8 5595*c0909341SAndroid Build Coastguard Worker mova m7, [base+unpckw] 5596*c0909341SAndroid Build Coastguard Worker pshufd m4, m7, q1032 5597*c0909341SAndroid Build Coastguard Worker pshufb m0, [stk+0x20], m7 ; 0a 1a 5598*c0909341SAndroid Build Coastguard Worker pshufb m1, [stk+0x30], m7 ; 0b 1b 5599*c0909341SAndroid Build Coastguard Worker pshufb m2, [stk+0x40], m4 ; 3a 2a 5600*c0909341SAndroid Build Coastguard Worker pshufb m3, [stk+0x50], m4 ; 3b 2b 5601*c0909341SAndroid Build Coastguard Worker pshufb m5, [stk+0x60], m7 ; 4a 5a 5602*c0909341SAndroid Build Coastguard Worker pshufb m6, [stk+0x70], m7 ; 4b 5b 5603*c0909341SAndroid Build Coastguard Worker pshufb m7, [stk+0x80], m4 ; 7a 6a 5604*c0909341SAndroid Build Coastguard Worker punpckhwd m0, m2 ; 12a 5605*c0909341SAndroid Build Coastguard Worker punpckhwd m1, m3 ; 12b 5606*c0909341SAndroid Build Coastguard Worker punpcklwd m2, m5 ; 34a 5607*c0909341SAndroid Build Coastguard Worker punpcklwd m3, m6 ; 34b 5608*c0909341SAndroid Build Coastguard Worker mova [stk+0x20], m0 5609*c0909341SAndroid Build Coastguard Worker mova [stk+0x30], m1 5610*c0909341SAndroid Build Coastguard Worker mova [stk+0x40], m2 5611*c0909341SAndroid Build Coastguard Worker mova [stk+0x50], m3 5612*c0909341SAndroid Build Coastguard Worker punpckhwd m5, m7 ; 56a 5613*c0909341SAndroid Build Coastguard Worker mova [stk+0x60], m5 5614*c0909341SAndroid Build Coastguard Worker pshufb m5, [stk+0x90], m4 ; 7b 6b 5615*c0909341SAndroid Build Coastguard Worker punpcklwd m7, [stk+0xe0] ; 78a 5616*c0909341SAndroid Build Coastguard Worker punpckhwd m6, m5 ; 56b 5617*c0909341SAndroid Build Coastguard Worker mova [stk+0x70], m6 5618*c0909341SAndroid Build Coastguard Worker movq m6, [stk+0xe8] 5619*c0909341SAndroid Build Coastguard Worker mova [stk+0x80], m7 5620*c0909341SAndroid Build Coastguard Worker punpcklwd m5, m6 5621*c0909341SAndroid Build Coastguard Worker mov myd, mym 5622*c0909341SAndroid Build Coastguard Worker mova [stk+0x90], m5 5623*c0909341SAndroid Build Coastguard Worker jmp .vloop 5624*c0909341SAndroid Build Coastguard Worker.skip_line: 5625*c0909341SAndroid Build Coastguard Worker MC_8TAP_SCALED_H 0xa0, 0xe0, 0 ; 8 5626*c0909341SAndroid Build Coastguard Worker MC_8TAP_SCALED_H 0xa0, 0 ; 9 5627*c0909341SAndroid Build Coastguard Worker mova m7, [stk+0xe0] 5628*c0909341SAndroid Build Coastguard Worker mova m2, [stk+0x60] ; 23a 5629*c0909341SAndroid Build Coastguard Worker mova m3, [stk+0x70] ; 23b 5630*c0909341SAndroid Build Coastguard Worker mova m4, [stk+0x80] ; 45a 5631*c0909341SAndroid Build Coastguard Worker mova m5, [stk+0x90] ; 45b 5632*c0909341SAndroid Build Coastguard Worker punpcklwd m6, m7, m0 ; 67a 5633*c0909341SAndroid Build Coastguard Worker punpckhwd m7, m0 ; 67b 5634*c0909341SAndroid Build Coastguard Worker mova m0, [stk+0x40] ; 01a 5635*c0909341SAndroid Build Coastguard Worker mova m1, [stk+0x50] ; 01b 5636*c0909341SAndroid Build Coastguard Worker mov myd, mym 5637*c0909341SAndroid Build Coastguard Worker mova [stk+0x40], m2 5638*c0909341SAndroid Build Coastguard Worker mova [stk+0x50], m3 5639*c0909341SAndroid Build Coastguard Worker mova [stk+0x60], m4 5640*c0909341SAndroid Build Coastguard Worker mova [stk+0x70], m5 5641*c0909341SAndroid Build Coastguard Worker mova [stk+0x80], m6 5642*c0909341SAndroid Build Coastguard Worker mova [stk+0x90], m7 5643*c0909341SAndroid Build Coastguard Worker mova [stk+0x20], m0 5644*c0909341SAndroid Build Coastguard Worker mova [stk+0x30], m1 5645*c0909341SAndroid Build Coastguard Worker%endif 5646*c0909341SAndroid Build Coastguard Worker jmp .vloop 5647*c0909341SAndroid Build Coastguard WorkerINIT_XMM ssse3 5648*c0909341SAndroid Build Coastguard Worker.dy1: 5649*c0909341SAndroid Build Coastguard Worker movzx wd, word [base+%1_8tap_scaled_ssse3_dy1_table+wq*2] 5650*c0909341SAndroid Build Coastguard Worker add wq, base_reg 5651*c0909341SAndroid Build Coastguard Worker jmp wq 5652*c0909341SAndroid Build Coastguard Worker%if isput 5653*c0909341SAndroid Build Coastguard Worker.dy1_w2: 5654*c0909341SAndroid Build Coastguard Worker %if ARCH_X86_64 5655*c0909341SAndroid Build Coastguard Worker mov myd, mym 5656*c0909341SAndroid Build Coastguard Worker movzx t0d, t0b 5657*c0909341SAndroid Build Coastguard Worker sub srcq, 2 5658*c0909341SAndroid Build Coastguard Worker movd m15, t0d 5659*c0909341SAndroid Build Coastguard Worker %else 5660*c0909341SAndroid Build Coastguard Worker %define m8 m0 5661*c0909341SAndroid Build Coastguard Worker %define m9 m1 5662*c0909341SAndroid Build Coastguard Worker %define m14 m4 5663*c0909341SAndroid Build Coastguard Worker %define m15 m3 5664*c0909341SAndroid Build Coastguard Worker %define m11 [esp+0x00] 5665*c0909341SAndroid Build Coastguard Worker %define m12 [esp+0x10] 5666*c0909341SAndroid Build Coastguard Worker %define m13 [esp+0x20] 5667*c0909341SAndroid Build Coastguard Worker movzx r5, byte [esp+0x1f0] 5668*c0909341SAndroid Build Coastguard Worker sub srcq, 2 5669*c0909341SAndroid Build Coastguard Worker movd m15, r5 5670*c0909341SAndroid Build Coastguard Worker mov r1, r1m 5671*c0909341SAndroid Build Coastguard Worker %endif 5672*c0909341SAndroid Build Coastguard Worker pxor m9, m9 5673*c0909341SAndroid Build Coastguard Worker punpckldq m9, m8 5674*c0909341SAndroid Build Coastguard Worker paddd m14, m9 ; mx+dx*[0-1] 5675*c0909341SAndroid Build Coastguard Worker %if ARCH_X86_64 5676*c0909341SAndroid Build Coastguard Worker mova m9, [base+pd_0x4000] 5677*c0909341SAndroid Build Coastguard Worker %endif 5678*c0909341SAndroid Build Coastguard Worker pshufd m15, m15, q0000 5679*c0909341SAndroid Build Coastguard Worker pand m8, m14, m10 5680*c0909341SAndroid Build Coastguard Worker psrld m8, 6 5681*c0909341SAndroid Build Coastguard Worker paddd m15, m8 5682*c0909341SAndroid Build Coastguard Worker movd r4d, m15 5683*c0909341SAndroid Build Coastguard Worker pshufd m15, m15, q0321 5684*c0909341SAndroid Build Coastguard Worker %if ARCH_X86_64 5685*c0909341SAndroid Build Coastguard Worker movd r6d, m15 5686*c0909341SAndroid Build Coastguard Worker %else 5687*c0909341SAndroid Build Coastguard Worker movd r3d, m15 5688*c0909341SAndroid Build Coastguard Worker %endif 5689*c0909341SAndroid Build Coastguard Worker mova m5, [base+bdct_lb_q] 5690*c0909341SAndroid Build Coastguard Worker mova m6, [base+spel_s_shuf2] 5691*c0909341SAndroid Build Coastguard Worker movd m15, [base+subpel_filters+r4*8+2] 5692*c0909341SAndroid Build Coastguard Worker %if ARCH_X86_64 5693*c0909341SAndroid Build Coastguard Worker movd m7, [base+subpel_filters+r6*8+2] 5694*c0909341SAndroid Build Coastguard Worker %else 5695*c0909341SAndroid Build Coastguard Worker movd m7, [base+subpel_filters+r3*8+2] 5696*c0909341SAndroid Build Coastguard Worker %endif 5697*c0909341SAndroid Build Coastguard Worker pxor m2, m2 5698*c0909341SAndroid Build Coastguard Worker pcmpeqd m8, m2 5699*c0909341SAndroid Build Coastguard Worker psrld m14, 10 5700*c0909341SAndroid Build Coastguard Worker paddd m14, m14 5701*c0909341SAndroid Build Coastguard Worker %if ARCH_X86_32 5702*c0909341SAndroid Build Coastguard Worker mov r3, r3m 5703*c0909341SAndroid Build Coastguard Worker pshufb m14, m5 5704*c0909341SAndroid Build Coastguard Worker paddb m14, m6 5705*c0909341SAndroid Build Coastguard Worker mova [stk], m14 5706*c0909341SAndroid Build Coastguard Worker SWAP m5, m0 5707*c0909341SAndroid Build Coastguard Worker SWAP m6, m3 5708*c0909341SAndroid Build Coastguard Worker %define m15 m6 5709*c0909341SAndroid Build Coastguard Worker %endif 5710*c0909341SAndroid Build Coastguard Worker movu m0, [srcq+ssq*0] 5711*c0909341SAndroid Build Coastguard Worker movu m1, [srcq+ssq*1] 5712*c0909341SAndroid Build Coastguard Worker movu m2, [srcq+ssq*2] 5713*c0909341SAndroid Build Coastguard Worker movu m3, [srcq+ss3q ] 5714*c0909341SAndroid Build Coastguard Worker lea srcq, [srcq+ssq*4] 5715*c0909341SAndroid Build Coastguard Worker punpckldq m15, m7 5716*c0909341SAndroid Build Coastguard Worker %if ARCH_X86_64 5717*c0909341SAndroid Build Coastguard Worker pshufb m14, m5 5718*c0909341SAndroid Build Coastguard Worker paddb m14, m6 5719*c0909341SAndroid Build Coastguard Worker pand m9, m8 5720*c0909341SAndroid Build Coastguard Worker pandn m8, m15 5721*c0909341SAndroid Build Coastguard Worker SWAP m15, m8 5722*c0909341SAndroid Build Coastguard Worker por m15, m9 5723*c0909341SAndroid Build Coastguard Worker movu m4, [srcq+ssq*0] 5724*c0909341SAndroid Build Coastguard Worker movu m5, [srcq+ssq*1] 5725*c0909341SAndroid Build Coastguard Worker movu m6, [srcq+ssq*2] 5726*c0909341SAndroid Build Coastguard Worker add srcq, ss3q 5727*c0909341SAndroid Build Coastguard Worker shr myd, 6 5728*c0909341SAndroid Build Coastguard Worker mov r4d, 64 << 24 5729*c0909341SAndroid Build Coastguard Worker lea myd, [t1+myq] 5730*c0909341SAndroid Build Coastguard Worker cmovnz r4q, [base+subpel_filters+myq*8] 5731*c0909341SAndroid Build Coastguard Worker %else 5732*c0909341SAndroid Build Coastguard Worker pand m7, m5, [base+pd_0x4000] 5733*c0909341SAndroid Build Coastguard Worker pandn m5, m15 5734*c0909341SAndroid Build Coastguard Worker por m5, m7 5735*c0909341SAndroid Build Coastguard Worker %define m15 m5 5736*c0909341SAndroid Build Coastguard Worker mov myd, mym 5737*c0909341SAndroid Build Coastguard Worker mov r5, [esp+0x1f4] 5738*c0909341SAndroid Build Coastguard Worker xor r3, r3 5739*c0909341SAndroid Build Coastguard Worker shr myd, 6 5740*c0909341SAndroid Build Coastguard Worker lea r5, [r5+myd] 5741*c0909341SAndroid Build Coastguard Worker mov r4, 64 << 24 5742*c0909341SAndroid Build Coastguard Worker cmovnz r4, [base+subpel_filters+r5*8+0] 5743*c0909341SAndroid Build Coastguard Worker cmovnz r3, [base+subpel_filters+r5*8+4] 5744*c0909341SAndroid Build Coastguard Worker mov [stk+0x20], r3 5745*c0909341SAndroid Build Coastguard Worker mov r3, r3m 5746*c0909341SAndroid Build Coastguard Worker %endif 5747*c0909341SAndroid Build Coastguard Worker punpcklbw m15, m15 5748*c0909341SAndroid Build Coastguard Worker psraw m15, 8 5749*c0909341SAndroid Build Coastguard Worker REPX {pshufb x, m14}, m0, m1, m2, m3 5750*c0909341SAndroid Build Coastguard Worker REPX {pmaddwd x, m15}, m0, m1, m2, m3 5751*c0909341SAndroid Build Coastguard Worker %if ARCH_X86_64 5752*c0909341SAndroid Build Coastguard Worker REPX {pshufb x, m14}, m4, m5, m6 5753*c0909341SAndroid Build Coastguard Worker REPX {pmaddwd x, m15}, m4, m5, m6 5754*c0909341SAndroid Build Coastguard Worker phaddd m0, m1 5755*c0909341SAndroid Build Coastguard Worker phaddd m2, m3 5756*c0909341SAndroid Build Coastguard Worker phaddd m4, m5 5757*c0909341SAndroid Build Coastguard Worker phaddd m6, m6 5758*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m11}, m0, m2, m4, m6 5759*c0909341SAndroid Build Coastguard Worker REPX {psrad x, m12}, m0, m2, m4, m6 5760*c0909341SAndroid Build Coastguard Worker packssdw m0, m2 ; 0 1 2 3 5761*c0909341SAndroid Build Coastguard Worker packssdw m4, m6 ; 4 5 6 5762*c0909341SAndroid Build Coastguard Worker SWAP m1, m4 5763*c0909341SAndroid Build Coastguard Worker movq m10, r4 5764*c0909341SAndroid Build Coastguard Worker %else 5765*c0909341SAndroid Build Coastguard Worker mova [stk+0x10], m15 5766*c0909341SAndroid Build Coastguard Worker phaddd m0, m1 5767*c0909341SAndroid Build Coastguard Worker phaddd m2, m3 5768*c0909341SAndroid Build Coastguard Worker movu m1, [srcq+ssq*0] 5769*c0909341SAndroid Build Coastguard Worker movu m7, [srcq+ssq*1] 5770*c0909341SAndroid Build Coastguard Worker movu m6, [srcq+ssq*2] 5771*c0909341SAndroid Build Coastguard Worker add srcq, ss3q 5772*c0909341SAndroid Build Coastguard Worker REPX {pshufb x, m14}, m1, m7, m6 5773*c0909341SAndroid Build Coastguard Worker REPX {pmaddwd x, m15}, m1, m7, m6 5774*c0909341SAndroid Build Coastguard Worker %define m14 [stk+0x00] 5775*c0909341SAndroid Build Coastguard Worker %define m15 [stk+0x10] 5776*c0909341SAndroid Build Coastguard Worker phaddd m1, m7 5777*c0909341SAndroid Build Coastguard Worker phaddd m6, m6 5778*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m11}, m0, m2, m1, m6 5779*c0909341SAndroid Build Coastguard Worker REPX {psrad x, m12}, m0, m2, m1, m6 5780*c0909341SAndroid Build Coastguard Worker packssdw m0, m2 5781*c0909341SAndroid Build Coastguard Worker packssdw m1, m6 5782*c0909341SAndroid Build Coastguard Worker %define m8 m6 5783*c0909341SAndroid Build Coastguard Worker %define m9 m4 5784*c0909341SAndroid Build Coastguard Worker %define m10 m5 5785*c0909341SAndroid Build Coastguard Worker movd m10, r4 5786*c0909341SAndroid Build Coastguard Worker movd m9, [stk+0x20] 5787*c0909341SAndroid Build Coastguard Worker punpckldq m10, m9 5788*c0909341SAndroid Build Coastguard Worker %endif 5789*c0909341SAndroid Build Coastguard Worker punpcklbw m10, m10 5790*c0909341SAndroid Build Coastguard Worker psraw m10, 8 5791*c0909341SAndroid Build Coastguard Worker pshufd m7, m10, q0000 5792*c0909341SAndroid Build Coastguard Worker pshufd m8, m10, q1111 5793*c0909341SAndroid Build Coastguard Worker pshufd m9, m10, q2222 5794*c0909341SAndroid Build Coastguard Worker pshufd m10, m10, q3333 5795*c0909341SAndroid Build Coastguard Worker %if ARCH_X86_32 5796*c0909341SAndroid Build Coastguard Worker mova [stk+0x50], m7 5797*c0909341SAndroid Build Coastguard Worker mova [stk+0x60], m8 5798*c0909341SAndroid Build Coastguard Worker mova [stk+0x70], m9 5799*c0909341SAndroid Build Coastguard Worker mova [stk+0x80], m10 5800*c0909341SAndroid Build Coastguard Worker %define m7 [stk+0x50] 5801*c0909341SAndroid Build Coastguard Worker %define m8 [stk+0x60] 5802*c0909341SAndroid Build Coastguard Worker %define m9 [stk+0x70] 5803*c0909341SAndroid Build Coastguard Worker %define m10 [stk+0x80] 5804*c0909341SAndroid Build Coastguard Worker %endif 5805*c0909341SAndroid Build Coastguard Worker palignr m2, m1, m0, 4 ; 1 2 3 4 5806*c0909341SAndroid Build Coastguard Worker punpcklwd m3, m0, m2 ; 01 12 5807*c0909341SAndroid Build Coastguard Worker punpckhwd m0, m2 ; 23 34 5808*c0909341SAndroid Build Coastguard Worker pshufd m4, m1, q2121 ; 5 6 5 6 5809*c0909341SAndroid Build Coastguard Worker punpcklwd m2, m1, m4 ; 45 56 5810*c0909341SAndroid Build Coastguard Worker %if ARCH_X86_32 5811*c0909341SAndroid Build Coastguard Worker mov r0, r0m 5812*c0909341SAndroid Build Coastguard Worker %endif 5813*c0909341SAndroid Build Coastguard Worker.dy1_w2_loop: 5814*c0909341SAndroid Build Coastguard Worker movu m1, [srcq+ssq*0] 5815*c0909341SAndroid Build Coastguard Worker movu m6, [srcq+ssq*1] 5816*c0909341SAndroid Build Coastguard Worker lea srcq, [srcq+ssq*2] 5817*c0909341SAndroid Build Coastguard Worker pmaddwd m5, m3, m7 5818*c0909341SAndroid Build Coastguard Worker mova m3, m0 5819*c0909341SAndroid Build Coastguard Worker pmaddwd m0, m8 5820*c0909341SAndroid Build Coastguard Worker pshufb m1, m14 5821*c0909341SAndroid Build Coastguard Worker pshufb m6, m14 5822*c0909341SAndroid Build Coastguard Worker pmaddwd m1, m15 5823*c0909341SAndroid Build Coastguard Worker pmaddwd m6, m15 5824*c0909341SAndroid Build Coastguard Worker phaddd m1, m6 5825*c0909341SAndroid Build Coastguard Worker paddd m1, m11 5826*c0909341SAndroid Build Coastguard Worker psrad m1, m12 5827*c0909341SAndroid Build Coastguard Worker packssdw m1, m1 5828*c0909341SAndroid Build Coastguard Worker paddd m5, m0 5829*c0909341SAndroid Build Coastguard Worker mova m0, m2 5830*c0909341SAndroid Build Coastguard Worker pmaddwd m2, m9 5831*c0909341SAndroid Build Coastguard Worker paddd m5, m2 5832*c0909341SAndroid Build Coastguard Worker palignr m2, m1, m4, 12 5833*c0909341SAndroid Build Coastguard Worker punpcklwd m2, m1 ; 67 78 5834*c0909341SAndroid Build Coastguard Worker pmaddwd m4, m2, m10 5835*c0909341SAndroid Build Coastguard Worker paddd m5, m13 5836*c0909341SAndroid Build Coastguard Worker paddd m5, m4 5837*c0909341SAndroid Build Coastguard Worker pxor m6, m6 5838*c0909341SAndroid Build Coastguard Worker mova m4, m1 5839*c0909341SAndroid Build Coastguard Worker pshufd m1, m12, q1032 5840*c0909341SAndroid Build Coastguard Worker psrad m5, m1 5841*c0909341SAndroid Build Coastguard Worker packssdw m5, m5 5842*c0909341SAndroid Build Coastguard Worker pmaxsw m5, m6 5843*c0909341SAndroid Build Coastguard Worker pminsw m5, pxmaxm 5844*c0909341SAndroid Build Coastguard Worker movd [dstq+dsq*0], m5 5845*c0909341SAndroid Build Coastguard Worker pshuflw m5, m5, q1032 5846*c0909341SAndroid Build Coastguard Worker movd [dstq+dsq*1], m5 5847*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+dsq*2] 5848*c0909341SAndroid Build Coastguard Worker sub hd, 2 5849*c0909341SAndroid Build Coastguard Worker jg .dy1_w2_loop 5850*c0909341SAndroid Build Coastguard Worker RET 5851*c0909341SAndroid Build Coastguard Worker%endif 5852*c0909341SAndroid Build Coastguard WorkerINIT_XMM ssse3 5853*c0909341SAndroid Build Coastguard Worker.dy1_w4: 5854*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 5855*c0909341SAndroid Build Coastguard Worker mov myd, mym 5856*c0909341SAndroid Build Coastguard Worker mova [rsp+0x10], m11 5857*c0909341SAndroid Build Coastguard Worker mova [rsp+0x20], m12 5858*c0909341SAndroid Build Coastguard Worker %if isput 5859*c0909341SAndroid Build Coastguard Worker mova [rsp+0x30], m13 5860*c0909341SAndroid Build Coastguard Worker %define vrnd_mem [rsp+0x30] 5861*c0909341SAndroid Build Coastguard Worker %define stk rsp+0x40 5862*c0909341SAndroid Build Coastguard Worker %else 5863*c0909341SAndroid Build Coastguard Worker %define vrnd_mem [base+pd_m524256] 5864*c0909341SAndroid Build Coastguard Worker %define stk rsp+0x30 5865*c0909341SAndroid Build Coastguard Worker %endif 5866*c0909341SAndroid Build Coastguard Worker movzx t0d, t0b 5867*c0909341SAndroid Build Coastguard Worker sub srcq, 2 5868*c0909341SAndroid Build Coastguard Worker movd m15, t0d 5869*c0909341SAndroid Build Coastguard Worker%else 5870*c0909341SAndroid Build Coastguard Worker %define m10 [base+pd_0x3ff] 5871*c0909341SAndroid Build Coastguard Worker %define m9 [base+pd_0x4000] 5872*c0909341SAndroid Build Coastguard Worker %define m8 m0 5873*c0909341SAndroid Build Coastguard Worker %xdefine m14 m4 5874*c0909341SAndroid Build Coastguard Worker %define m15 m3 5875*c0909341SAndroid Build Coastguard Worker %if isprep 5876*c0909341SAndroid Build Coastguard Worker %define ssq r3 5877*c0909341SAndroid Build Coastguard Worker %endif 5878*c0909341SAndroid Build Coastguard Worker movzx r5, byte [esp+0x1f0] 5879*c0909341SAndroid Build Coastguard Worker sub srcq, 2 5880*c0909341SAndroid Build Coastguard Worker movd m15, r5 5881*c0909341SAndroid Build Coastguard Worker%endif 5882*c0909341SAndroid Build Coastguard Worker pmaddwd m8, [base+rescale_mul] 5883*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 5884*c0909341SAndroid Build Coastguard Worker mova m9, [base+pd_0x4000] 5885*c0909341SAndroid Build Coastguard Worker%endif 5886*c0909341SAndroid Build Coastguard Worker pshufd m15, m15, q0000 5887*c0909341SAndroid Build Coastguard Worker paddd m14, m8 ; mx+dx*[0-3] 5888*c0909341SAndroid Build Coastguard Worker pand m0, m14, m10 5889*c0909341SAndroid Build Coastguard Worker psrld m0, 6 5890*c0909341SAndroid Build Coastguard Worker paddd m15, m0 5891*c0909341SAndroid Build Coastguard Worker pshufd m7, m15, q1032 5892*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 5893*c0909341SAndroid Build Coastguard Worker movd r4d, m15 5894*c0909341SAndroid Build Coastguard Worker movd r11d, m7 5895*c0909341SAndroid Build Coastguard Worker pshufd m15, m15, q0321 5896*c0909341SAndroid Build Coastguard Worker pshufd m7, m7, q0321 5897*c0909341SAndroid Build Coastguard Worker movd r6d, m15 5898*c0909341SAndroid Build Coastguard Worker movd r13d, m7 5899*c0909341SAndroid Build Coastguard Worker mova m10, [base+bdct_lb_q+ 0] 5900*c0909341SAndroid Build Coastguard Worker mova m11, [base+bdct_lb_q+16] 5901*c0909341SAndroid Build Coastguard Worker movd m13, [base+subpel_filters+ r4*8+2] 5902*c0909341SAndroid Build Coastguard Worker movd m2, [base+subpel_filters+ r6*8+2] 5903*c0909341SAndroid Build Coastguard Worker movd m15, [base+subpel_filters+r11*8+2] 5904*c0909341SAndroid Build Coastguard Worker movd m4, [base+subpel_filters+r13*8+2] 5905*c0909341SAndroid Build Coastguard Worker%else 5906*c0909341SAndroid Build Coastguard Worker movd r0, m15 5907*c0909341SAndroid Build Coastguard Worker movd r4, m7 5908*c0909341SAndroid Build Coastguard Worker pshufd m15, m15, q0321 5909*c0909341SAndroid Build Coastguard Worker pshufd m7, m7, q0321 5910*c0909341SAndroid Build Coastguard Worker movd rX, m15 5911*c0909341SAndroid Build Coastguard Worker movd r5, m7 5912*c0909341SAndroid Build Coastguard Worker mova m5, [base+bdct_lb_q+ 0] 5913*c0909341SAndroid Build Coastguard Worker mova m6, [base+bdct_lb_q+16] 5914*c0909341SAndroid Build Coastguard Worker movd m1, [base+subpel_filters+r0*8+2] 5915*c0909341SAndroid Build Coastguard Worker movd m2, [base+subpel_filters+rX*8+2] 5916*c0909341SAndroid Build Coastguard Worker movd m3, [base+subpel_filters+r4*8+2] 5917*c0909341SAndroid Build Coastguard Worker movd m7, [base+subpel_filters+r5*8+2] 5918*c0909341SAndroid Build Coastguard Worker SWAP m4, m7 5919*c0909341SAndroid Build Coastguard Worker %if isprep 5920*c0909341SAndroid Build Coastguard Worker mov r3, r3m 5921*c0909341SAndroid Build Coastguard Worker %endif 5922*c0909341SAndroid Build Coastguard Worker %define m10 m5 5923*c0909341SAndroid Build Coastguard Worker %define m11 m6 5924*c0909341SAndroid Build Coastguard Worker %define m12 m1 5925*c0909341SAndroid Build Coastguard Worker %define m13 m1 5926*c0909341SAndroid Build Coastguard Worker%endif 5927*c0909341SAndroid Build Coastguard Worker psrld m14, 10 5928*c0909341SAndroid Build Coastguard Worker paddd m14, m14 5929*c0909341SAndroid Build Coastguard Worker punpckldq m13, m2 5930*c0909341SAndroid Build Coastguard Worker punpckldq m15, m4 5931*c0909341SAndroid Build Coastguard Worker punpcklqdq m13, m15 5932*c0909341SAndroid Build Coastguard Worker pxor m2, m2 5933*c0909341SAndroid Build Coastguard Worker pcmpeqd m0, m2 5934*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 5935*c0909341SAndroid Build Coastguard Worker pand m9, m0 5936*c0909341SAndroid Build Coastguard Worker%else 5937*c0909341SAndroid Build Coastguard Worker pand m2, m9, m0 5938*c0909341SAndroid Build Coastguard Worker %define m9 m2 5939*c0909341SAndroid Build Coastguard Worker SWAP m7, m4 5940*c0909341SAndroid Build Coastguard Worker%endif 5941*c0909341SAndroid Build Coastguard Worker pandn m0, m13 5942*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 5943*c0909341SAndroid Build Coastguard Worker SWAP m13, m0 5944*c0909341SAndroid Build Coastguard Worker%else 5945*c0909341SAndroid Build Coastguard Worker %define m13 m0 5946*c0909341SAndroid Build Coastguard Worker%endif 5947*c0909341SAndroid Build Coastguard Worker por m13, m9 5948*c0909341SAndroid Build Coastguard Worker punpckhbw m15, m13, m13 5949*c0909341SAndroid Build Coastguard Worker punpcklbw m13, m13 5950*c0909341SAndroid Build Coastguard Worker psraw m15, 8 5951*c0909341SAndroid Build Coastguard Worker psraw m13, 8 5952*c0909341SAndroid Build Coastguard Worker pshufb m12, m14, m10 5953*c0909341SAndroid Build Coastguard Worker pshufb m14, m11 5954*c0909341SAndroid Build Coastguard Worker mova m10, [base+spel_s_shuf2] 5955*c0909341SAndroid Build Coastguard Worker movd r4d, m14 5956*c0909341SAndroid Build Coastguard Worker shr r4d, 24 5957*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_32 5958*c0909341SAndroid Build Coastguard Worker mova [stk+0x40], m13 5959*c0909341SAndroid Build Coastguard Worker mova [stk+0x50], m15 5960*c0909341SAndroid Build Coastguard Worker pxor m2, m2 5961*c0909341SAndroid Build Coastguard Worker%endif 5962*c0909341SAndroid Build Coastguard Worker pshufb m7, m14, m2 5963*c0909341SAndroid Build Coastguard Worker psubb m14, m7 5964*c0909341SAndroid Build Coastguard Worker paddb m12, m10 5965*c0909341SAndroid Build Coastguard Worker paddb m14, m10 5966*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 5967*c0909341SAndroid Build Coastguard Worker lea r6, [r4+ssq*1] 5968*c0909341SAndroid Build Coastguard Worker lea r11, [r4+ssq*2] 5969*c0909341SAndroid Build Coastguard Worker lea r13, [r4+ss3q ] 5970*c0909341SAndroid Build Coastguard Worker movu m7, [srcq+ssq*0] 5971*c0909341SAndroid Build Coastguard Worker movu m9, [srcq+ssq*1] 5972*c0909341SAndroid Build Coastguard Worker movu m8, [srcq+ssq*2] 5973*c0909341SAndroid Build Coastguard Worker movu m10, [srcq+ss3q ] 5974*c0909341SAndroid Build Coastguard Worker movu m1, [srcq+r4 ] 5975*c0909341SAndroid Build Coastguard Worker movu m3, [srcq+r6 ] 5976*c0909341SAndroid Build Coastguard Worker movu m2, [srcq+r11 ] 5977*c0909341SAndroid Build Coastguard Worker movu m4, [srcq+r13 ] 5978*c0909341SAndroid Build Coastguard Worker lea srcq, [srcq+ssq*4] 5979*c0909341SAndroid Build Coastguard Worker REPX {pshufb x, m12}, m7, m9, m8, m10 5980*c0909341SAndroid Build Coastguard Worker REPX {pmaddwd x, m13}, m7, m9, m8, m10 5981*c0909341SAndroid Build Coastguard Worker REPX {pshufb x, m14}, m1, m3, m2, m4 5982*c0909341SAndroid Build Coastguard Worker REPX {pmaddwd x, m15}, m1, m3, m2, m4 5983*c0909341SAndroid Build Coastguard Worker mova m5, [rsp+0x10] 5984*c0909341SAndroid Build Coastguard Worker movd xm6, [rsp+0x20] 5985*c0909341SAndroid Build Coastguard Worker phaddd m7, m1 5986*c0909341SAndroid Build Coastguard Worker phaddd m9, m3 5987*c0909341SAndroid Build Coastguard Worker phaddd m8, m2 5988*c0909341SAndroid Build Coastguard Worker phaddd m10, m4 5989*c0909341SAndroid Build Coastguard Worker movu m1, [srcq+ssq*0] 5990*c0909341SAndroid Build Coastguard Worker movu m2, [srcq+ssq*1] 5991*c0909341SAndroid Build Coastguard Worker movu m3, [srcq+ssq*2] 5992*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m5}, m7, m9, m8, m10 5993*c0909341SAndroid Build Coastguard Worker REPX {psrad x, xm6}, m7, m9, m8, m10 5994*c0909341SAndroid Build Coastguard Worker packssdw m7, m9 ; 0 1 5995*c0909341SAndroid Build Coastguard Worker packssdw m8, m10 ; 2 3 5996*c0909341SAndroid Build Coastguard Worker movu m0, [srcq+r4 ] 5997*c0909341SAndroid Build Coastguard Worker movu m9, [srcq+r6 ] 5998*c0909341SAndroid Build Coastguard Worker movu m10, [srcq+r11 ] 5999*c0909341SAndroid Build Coastguard Worker add srcq, ss3q 6000*c0909341SAndroid Build Coastguard Worker REPX {pshufb x, m12}, m1, m2, m3 6001*c0909341SAndroid Build Coastguard Worker REPX {pmaddwd x, m13}, m1, m2, m3 6002*c0909341SAndroid Build Coastguard Worker REPX {pshufb x, m14}, m0, m9, m10 6003*c0909341SAndroid Build Coastguard Worker REPX {pmaddwd x, m15}, m0, m9, m10 6004*c0909341SAndroid Build Coastguard Worker phaddd m1, m0 6005*c0909341SAndroid Build Coastguard Worker phaddd m2, m9 6006*c0909341SAndroid Build Coastguard Worker phaddd m3, m10 6007*c0909341SAndroid Build Coastguard Worker shr myd, 6 6008*c0909341SAndroid Build Coastguard Worker mov r13d, 64 << 24 6009*c0909341SAndroid Build Coastguard Worker lea myd, [t1+myq] 6010*c0909341SAndroid Build Coastguard Worker cmovnz r13q, [base+subpel_filters+myq*8] 6011*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m5}, m1, m2, m3 6012*c0909341SAndroid Build Coastguard Worker REPX {psrad x, xm6}, m1, m2, m3 6013*c0909341SAndroid Build Coastguard Worker packssdw m1, m2 ; 4 5 6014*c0909341SAndroid Build Coastguard Worker packssdw m3, m3 ; 6 6 6015*c0909341SAndroid Build Coastguard Worker SWAP m9, m1 6016*c0909341SAndroid Build Coastguard Worker shufps m4, m7, m8, q1032 ; 1 2 6017*c0909341SAndroid Build Coastguard Worker shufps m5, m8, m9, q1032 ; 3 4 6018*c0909341SAndroid Build Coastguard Worker shufps m6, m9, m3, q1032 ; 5 6 6019*c0909341SAndroid Build Coastguard Worker punpcklwd m0, m7, m4 ; 01 6020*c0909341SAndroid Build Coastguard Worker punpckhwd m7, m4 ; 12 6021*c0909341SAndroid Build Coastguard Worker punpcklwd m1, m8, m5 ; 23 6022*c0909341SAndroid Build Coastguard Worker punpckhwd m8, m5 ; 34 6023*c0909341SAndroid Build Coastguard Worker punpcklwd m2, m9, m6 ; 45 6024*c0909341SAndroid Build Coastguard Worker punpckhwd m9, m6 ; 56 6025*c0909341SAndroid Build Coastguard Worker movq m10, r13 6026*c0909341SAndroid Build Coastguard Worker mova [stk+0x00], m1 6027*c0909341SAndroid Build Coastguard Worker mova [stk+0x10], m8 6028*c0909341SAndroid Build Coastguard Worker mova [stk+0x20], m2 6029*c0909341SAndroid Build Coastguard Worker mova [stk+0x30], m9 6030*c0909341SAndroid Build Coastguard Worker mova [stk+0x40], m3 6031*c0909341SAndroid Build Coastguard Worker %define hrnd_mem [rsp+0x10] 6032*c0909341SAndroid Build Coastguard Worker %define hsh_mem [rsp+0x20] 6033*c0909341SAndroid Build Coastguard Worker %define vsh_mem [rsp+0x28] 6034*c0909341SAndroid Build Coastguard Worker %if isput 6035*c0909341SAndroid Build Coastguard Worker %define vrnd_mem [rsp+0x30] 6036*c0909341SAndroid Build Coastguard Worker %else 6037*c0909341SAndroid Build Coastguard Worker %define vrnd_mem [base+pd_m524256] 6038*c0909341SAndroid Build Coastguard Worker %endif 6039*c0909341SAndroid Build Coastguard Worker%else 6040*c0909341SAndroid Build Coastguard Worker mova [stk+0x20], m12 6041*c0909341SAndroid Build Coastguard Worker mova [stk+0x30], m14 6042*c0909341SAndroid Build Coastguard Worker add r4, srcq 6043*c0909341SAndroid Build Coastguard Worker MC_4TAP_SCALED_H 0x60 ; 0 1 6044*c0909341SAndroid Build Coastguard Worker MC_4TAP_SCALED_H 0x70 ; 2 3 6045*c0909341SAndroid Build Coastguard Worker MC_4TAP_SCALED_H 0x80 ; 4 5 6046*c0909341SAndroid Build Coastguard Worker movu m7, [srcq] 6047*c0909341SAndroid Build Coastguard Worker movu m2, [r4] 6048*c0909341SAndroid Build Coastguard Worker add srcq, ssq 6049*c0909341SAndroid Build Coastguard Worker add r4, ssq 6050*c0909341SAndroid Build Coastguard Worker mov [stk+0xb0], r4 6051*c0909341SAndroid Build Coastguard Worker pshufb m7, m12 6052*c0909341SAndroid Build Coastguard Worker pshufb m2, m14 6053*c0909341SAndroid Build Coastguard Worker pmaddwd m7, m13 6054*c0909341SAndroid Build Coastguard Worker pmaddwd m2, m15 6055*c0909341SAndroid Build Coastguard Worker phaddd m7, m2 6056*c0909341SAndroid Build Coastguard Worker paddd m7, [esp+0x00] 6057*c0909341SAndroid Build Coastguard Worker psrad m7, [esp+0x10] 6058*c0909341SAndroid Build Coastguard Worker packssdw m7, m7 ; 6 6 6059*c0909341SAndroid Build Coastguard Worker mova m4, [stk+0x60] 6060*c0909341SAndroid Build Coastguard Worker mova m5, [stk+0x70] 6061*c0909341SAndroid Build Coastguard Worker mova m6, [stk+0x80] 6062*c0909341SAndroid Build Coastguard Worker mov myd, mym 6063*c0909341SAndroid Build Coastguard Worker mov rX, [esp+0x1f4] 6064*c0909341SAndroid Build Coastguard Worker xor r5, r5 6065*c0909341SAndroid Build Coastguard Worker shr myd, 6 6066*c0909341SAndroid Build Coastguard Worker lea rX, [rX+myd] 6067*c0909341SAndroid Build Coastguard Worker mov r4, 64 << 24 6068*c0909341SAndroid Build Coastguard Worker cmovnz r4, [base+subpel_filters+rX*8+0] 6069*c0909341SAndroid Build Coastguard Worker cmovnz r5, [base+subpel_filters+rX*8+4] 6070*c0909341SAndroid Build Coastguard Worker mov r3, r3m 6071*c0909341SAndroid Build Coastguard Worker shufps m1, m4, m5, q1032 ; 1 2 6072*c0909341SAndroid Build Coastguard Worker shufps m2, m5, m6, q1032 ; 3 4 6073*c0909341SAndroid Build Coastguard Worker shufps m3, m6, m7, q1032 ; 5 6 6074*c0909341SAndroid Build Coastguard Worker mova [stk+0xa0], m7 6075*c0909341SAndroid Build Coastguard Worker punpcklwd m0, m4, m1 ; 01 6076*c0909341SAndroid Build Coastguard Worker punpckhwd m4, m1 ; 12 6077*c0909341SAndroid Build Coastguard Worker punpcklwd m1, m5, m2 ; 23 6078*c0909341SAndroid Build Coastguard Worker punpckhwd m5, m2 ; 34 6079*c0909341SAndroid Build Coastguard Worker punpcklwd m2, m6, m3 ; 45 6080*c0909341SAndroid Build Coastguard Worker punpckhwd m6, m3 ; 56 6081*c0909341SAndroid Build Coastguard Worker movd m7, r4 6082*c0909341SAndroid Build Coastguard Worker movd m3, r5 6083*c0909341SAndroid Build Coastguard Worker mov r0, r0m 6084*c0909341SAndroid Build Coastguard Worker %if isput 6085*c0909341SAndroid Build Coastguard Worker mov r1, r1m 6086*c0909341SAndroid Build Coastguard Worker %endif 6087*c0909341SAndroid Build Coastguard Worker mov r4, [stk+0xb0] 6088*c0909341SAndroid Build Coastguard Worker mova [stk+0xc0], m4 ; 12 6089*c0909341SAndroid Build Coastguard Worker mova [stk+0x60], m1 ; 23 6090*c0909341SAndroid Build Coastguard Worker mova [stk+0x70], m2 ; 45 6091*c0909341SAndroid Build Coastguard Worker mova [stk+0x80], m5 ; 34 6092*c0909341SAndroid Build Coastguard Worker mova [stk+0x90], m6 ; 56 6093*c0909341SAndroid Build Coastguard Worker %define m12 [stk+0x20] 6094*c0909341SAndroid Build Coastguard Worker %define m14 [stk+0x30] 6095*c0909341SAndroid Build Coastguard Worker %define m13 [stk+0x40] 6096*c0909341SAndroid Build Coastguard Worker %define m15 [stk+0x50] 6097*c0909341SAndroid Build Coastguard Worker %define hrnd_mem [esp+0x00] 6098*c0909341SAndroid Build Coastguard Worker %define hsh_mem [esp+0x10] 6099*c0909341SAndroid Build Coastguard Worker %define vsh_mem [esp+0x18] 6100*c0909341SAndroid Build Coastguard Worker %if isput 6101*c0909341SAndroid Build Coastguard Worker %define vrnd_mem [esp+0x20] 6102*c0909341SAndroid Build Coastguard Worker %else 6103*c0909341SAndroid Build Coastguard Worker %define vrnd_mem [base+pd_m524256] 6104*c0909341SAndroid Build Coastguard Worker %endif 6105*c0909341SAndroid Build Coastguard Worker %define m10 m7 6106*c0909341SAndroid Build Coastguard Worker punpckldq m10, m3 6107*c0909341SAndroid Build Coastguard Worker%endif 6108*c0909341SAndroid Build Coastguard Worker punpcklbw m10, m10 6109*c0909341SAndroid Build Coastguard Worker psraw m10, 8 6110*c0909341SAndroid Build Coastguard Worker pshufd m3, m10, q0000 6111*c0909341SAndroid Build Coastguard Worker pshufd m4, m10, q1111 6112*c0909341SAndroid Build Coastguard Worker pshufd m5, m10, q2222 6113*c0909341SAndroid Build Coastguard Worker pshufd m10, m10, q3333 6114*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_32 6115*c0909341SAndroid Build Coastguard Worker %xdefine m8 m3 6116*c0909341SAndroid Build Coastguard Worker %xdefine m9 m6 6117*c0909341SAndroid Build Coastguard Worker %xdefine m11 m5 6118*c0909341SAndroid Build Coastguard Worker %xdefine m6 m4 6119*c0909341SAndroid Build Coastguard Worker mova [stk+0x100], m3 6120*c0909341SAndroid Build Coastguard Worker mova [stk+0x110], m4 6121*c0909341SAndroid Build Coastguard Worker mova [stk+0x120], m5 6122*c0909341SAndroid Build Coastguard Worker mova [stk+0x130], m10 6123*c0909341SAndroid Build Coastguard Worker %define m3 [stk+0x100] 6124*c0909341SAndroid Build Coastguard Worker %define m4 [stk+0x110] 6125*c0909341SAndroid Build Coastguard Worker %define m5 [stk+0x120] 6126*c0909341SAndroid Build Coastguard Worker %define m10 [stk+0x130] 6127*c0909341SAndroid Build Coastguard Worker mova m7, [stk+0xc0] 6128*c0909341SAndroid Build Coastguard Worker mova m8, [stk+0x80] 6129*c0909341SAndroid Build Coastguard Worker%endif 6130*c0909341SAndroid Build Coastguard Worker.dy1_w4_loop: 6131*c0909341SAndroid Build Coastguard Worker movu m11, [srcq+ssq*0] 6132*c0909341SAndroid Build Coastguard Worker movu m6, [srcq+ssq*1] 6133*c0909341SAndroid Build Coastguard Worker pmaddwd m0, m3 6134*c0909341SAndroid Build Coastguard Worker pmaddwd m7, m3 6135*c0909341SAndroid Build Coastguard Worker pmaddwd m1, m4 6136*c0909341SAndroid Build Coastguard Worker pmaddwd m8, m4 6137*c0909341SAndroid Build Coastguard Worker pmaddwd m2, m5 6138*c0909341SAndroid Build Coastguard Worker pmaddwd m9, m5 6139*c0909341SAndroid Build Coastguard Worker paddd m1, m0 6140*c0909341SAndroid Build Coastguard Worker paddd m8, m7 6141*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 6142*c0909341SAndroid Build Coastguard Worker movu m0, [srcq+r4] 6143*c0909341SAndroid Build Coastguard Worker movu m7, [srcq+r6] 6144*c0909341SAndroid Build Coastguard Worker%else 6145*c0909341SAndroid Build Coastguard Worker movu m0, [r4+ssq*0] 6146*c0909341SAndroid Build Coastguard Worker movu m7, [r4+ssq*1] 6147*c0909341SAndroid Build Coastguard Worker lea r4, [r4+ssq*2] 6148*c0909341SAndroid Build Coastguard Worker%endif 6149*c0909341SAndroid Build Coastguard Worker lea srcq, [srcq+ssq*2] 6150*c0909341SAndroid Build Coastguard Worker paddd m1, m2 6151*c0909341SAndroid Build Coastguard Worker paddd m8, m9 6152*c0909341SAndroid Build Coastguard Worker pshufb m11, m12 6153*c0909341SAndroid Build Coastguard Worker pshufb m6, m12 6154*c0909341SAndroid Build Coastguard Worker pmaddwd m11, m13 6155*c0909341SAndroid Build Coastguard Worker pmaddwd m6, m13 6156*c0909341SAndroid Build Coastguard Worker pshufb m0, m14 6157*c0909341SAndroid Build Coastguard Worker pshufb m7, m14 6158*c0909341SAndroid Build Coastguard Worker pmaddwd m0, m15 6159*c0909341SAndroid Build Coastguard Worker pmaddwd m7, m15 6160*c0909341SAndroid Build Coastguard Worker phaddd m11, m0 6161*c0909341SAndroid Build Coastguard Worker phaddd m6, m7 6162*c0909341SAndroid Build Coastguard Worker paddd m11, hrnd_mem 6163*c0909341SAndroid Build Coastguard Worker paddd m6, hrnd_mem 6164*c0909341SAndroid Build Coastguard Worker psrad m11, hsh_mem 6165*c0909341SAndroid Build Coastguard Worker psrad m6, hsh_mem 6166*c0909341SAndroid Build Coastguard Worker packssdw m11, m6 ; 7 8 6167*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 6168*c0909341SAndroid Build Coastguard Worker shufps m9, [stk+0x40], m11, q1032 ; 6 7 6169*c0909341SAndroid Build Coastguard Worker mova m0, [stk+0x00] 6170*c0909341SAndroid Build Coastguard Worker mova [stk+0x40], m11 6171*c0909341SAndroid Build Coastguard Worker%else 6172*c0909341SAndroid Build Coastguard Worker shufps m9, [stk+0xa0], m11, q1032 ; 6 7 6173*c0909341SAndroid Build Coastguard Worker mova m0, [stk+0x60] 6174*c0909341SAndroid Build Coastguard Worker mova [stk+0xa0], m11 6175*c0909341SAndroid Build Coastguard Worker%endif 6176*c0909341SAndroid Build Coastguard Worker punpcklwd m2, m9, m11 ; 67 6177*c0909341SAndroid Build Coastguard Worker punpckhwd m9, m11 ; 78 6178*c0909341SAndroid Build Coastguard Worker pmaddwd m6, m2, m10 6179*c0909341SAndroid Build Coastguard Worker pmaddwd m7, m9, m10 6180*c0909341SAndroid Build Coastguard Worker%if isput 6181*c0909341SAndroid Build Coastguard Worker movd m11, vsh_mem 6182*c0909341SAndroid Build Coastguard Worker%endif 6183*c0909341SAndroid Build Coastguard Worker paddd m1, vrnd_mem 6184*c0909341SAndroid Build Coastguard Worker paddd m8, vrnd_mem 6185*c0909341SAndroid Build Coastguard Worker paddd m1, m6 6186*c0909341SAndroid Build Coastguard Worker paddd m8, m7 6187*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 6188*c0909341SAndroid Build Coastguard Worker mova m7, [stk+0x10] 6189*c0909341SAndroid Build Coastguard Worker%else 6190*c0909341SAndroid Build Coastguard Worker mova m7, [stk+0x80] 6191*c0909341SAndroid Build Coastguard Worker%endif 6192*c0909341SAndroid Build Coastguard Worker%if isput 6193*c0909341SAndroid Build Coastguard Worker psrad m1, m11 6194*c0909341SAndroid Build Coastguard Worker psrad m8, m11 6195*c0909341SAndroid Build Coastguard Worker%else 6196*c0909341SAndroid Build Coastguard Worker psrad m1, 6 6197*c0909341SAndroid Build Coastguard Worker psrad m8, 6 6198*c0909341SAndroid Build Coastguard Worker%endif 6199*c0909341SAndroid Build Coastguard Worker packssdw m1, m8 6200*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 6201*c0909341SAndroid Build Coastguard Worker mova m8, [stk+0x30] 6202*c0909341SAndroid Build Coastguard Worker%else 6203*c0909341SAndroid Build Coastguard Worker mova m8, [stk+0x90] 6204*c0909341SAndroid Build Coastguard Worker%endif 6205*c0909341SAndroid Build Coastguard Worker%if isput 6206*c0909341SAndroid Build Coastguard Worker pxor m6, m6 6207*c0909341SAndroid Build Coastguard Worker pmaxsw m1, m6 6208*c0909341SAndroid Build Coastguard Worker pminsw m1, pxmaxm 6209*c0909341SAndroid Build Coastguard Worker movq [dstq+dsq*0], m1 6210*c0909341SAndroid Build Coastguard Worker movhps [dstq+dsq*1], m1 6211*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+dsq*2] 6212*c0909341SAndroid Build Coastguard Worker%else 6213*c0909341SAndroid Build Coastguard Worker mova [tmpq], m1 6214*c0909341SAndroid Build Coastguard Worker add tmpq, 16 6215*c0909341SAndroid Build Coastguard Worker%endif 6216*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 6217*c0909341SAndroid Build Coastguard Worker mova m1, [stk+0x20] 6218*c0909341SAndroid Build Coastguard Worker mova [stk+0x10], m8 6219*c0909341SAndroid Build Coastguard Worker mova [stk+0x00], m1 6220*c0909341SAndroid Build Coastguard Worker mova [stk+0x20], m2 6221*c0909341SAndroid Build Coastguard Worker mova [stk+0x30], m9 6222*c0909341SAndroid Build Coastguard Worker%else 6223*c0909341SAndroid Build Coastguard Worker mova m1, [stk+0x70] 6224*c0909341SAndroid Build Coastguard Worker mova [stk+0x80], m8 6225*c0909341SAndroid Build Coastguard Worker mova [stk+0x60], m1 6226*c0909341SAndroid Build Coastguard Worker mova [stk+0x70], m2 6227*c0909341SAndroid Build Coastguard Worker mova [stk+0x90], m9 6228*c0909341SAndroid Build Coastguard Worker%endif 6229*c0909341SAndroid Build Coastguard Worker sub hd, 2 6230*c0909341SAndroid Build Coastguard Worker jg .dy1_w4_loop 6231*c0909341SAndroid Build Coastguard Worker MC_8TAP_SCALED_RET ; why not jz .ret? 6232*c0909341SAndroid Build Coastguard WorkerINIT_XMM ssse3 6233*c0909341SAndroid Build Coastguard Worker.dy1_w8: 6234*c0909341SAndroid Build Coastguard Worker mov dword [stk+0xf0], 1 6235*c0909341SAndroid Build Coastguard Worker movifprep tmp_stridem, 16 6236*c0909341SAndroid Build Coastguard Worker jmp .dy1_w_start 6237*c0909341SAndroid Build Coastguard Worker.dy1_w16: 6238*c0909341SAndroid Build Coastguard Worker mov dword [stk+0xf0], 2 6239*c0909341SAndroid Build Coastguard Worker movifprep tmp_stridem, 32 6240*c0909341SAndroid Build Coastguard Worker jmp .dy1_w_start 6241*c0909341SAndroid Build Coastguard Worker.dy1_w32: 6242*c0909341SAndroid Build Coastguard Worker mov dword [stk+0xf0], 4 6243*c0909341SAndroid Build Coastguard Worker movifprep tmp_stridem, 64 6244*c0909341SAndroid Build Coastguard Worker jmp .dy1_w_start 6245*c0909341SAndroid Build Coastguard Worker.dy1_w64: 6246*c0909341SAndroid Build Coastguard Worker mov dword [stk+0xf0], 8 6247*c0909341SAndroid Build Coastguard Worker movifprep tmp_stridem, 128 6248*c0909341SAndroid Build Coastguard Worker jmp .dy1_w_start 6249*c0909341SAndroid Build Coastguard Worker.dy1_w128: 6250*c0909341SAndroid Build Coastguard Worker mov dword [stk+0xf0], 16 6251*c0909341SAndroid Build Coastguard Worker movifprep tmp_stridem, 256 6252*c0909341SAndroid Build Coastguard Worker.dy1_w_start: 6253*c0909341SAndroid Build Coastguard Worker mov myd, mym 6254*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 6255*c0909341SAndroid Build Coastguard Worker %ifidn %1, put 6256*c0909341SAndroid Build Coastguard Worker movifnidn dsm, dsq 6257*c0909341SAndroid Build Coastguard Worker %endif 6258*c0909341SAndroid Build Coastguard Worker mova [rsp+0x10], m11 6259*c0909341SAndroid Build Coastguard Worker mova [rsp+0x20], m12 6260*c0909341SAndroid Build Coastguard Worker %define hround m11 6261*c0909341SAndroid Build Coastguard Worker %if isput 6262*c0909341SAndroid Build Coastguard Worker mova [rsp+0x30], m13 6263*c0909341SAndroid Build Coastguard Worker %else 6264*c0909341SAndroid Build Coastguard Worker mova m13, [base+pd_m524256] 6265*c0909341SAndroid Build Coastguard Worker %endif 6266*c0909341SAndroid Build Coastguard Worker shr t0d, 16 6267*c0909341SAndroid Build Coastguard Worker shr myd, 6 6268*c0909341SAndroid Build Coastguard Worker mov r4d, 64 << 24 6269*c0909341SAndroid Build Coastguard Worker lea myd, [t1+myq] 6270*c0909341SAndroid Build Coastguard Worker cmovnz r4q, [base+subpel_filters+myq*8] 6271*c0909341SAndroid Build Coastguard Worker movd m15, t0d 6272*c0909341SAndroid Build Coastguard Worker%else 6273*c0909341SAndroid Build Coastguard Worker %define hround [esp+0x00] 6274*c0909341SAndroid Build Coastguard Worker %define m12 [esp+0x10] 6275*c0909341SAndroid Build Coastguard Worker %define m10 [base+pd_0x3ff] 6276*c0909341SAndroid Build Coastguard Worker %define m8 m0 6277*c0909341SAndroid Build Coastguard Worker %xdefine m14 m4 6278*c0909341SAndroid Build Coastguard Worker %xdefine m15 m3 6279*c0909341SAndroid Build Coastguard Worker %if isprep 6280*c0909341SAndroid Build Coastguard Worker %define ssq ssm 6281*c0909341SAndroid Build Coastguard Worker %endif 6282*c0909341SAndroid Build Coastguard Worker mov r5, [esp+0x1f0] 6283*c0909341SAndroid Build Coastguard Worker mov r3, [esp+0x1f4] 6284*c0909341SAndroid Build Coastguard Worker shr r5, 16 6285*c0909341SAndroid Build Coastguard Worker movd m15, r5 6286*c0909341SAndroid Build Coastguard Worker xor r5, r5 6287*c0909341SAndroid Build Coastguard Worker shr myd, 6 6288*c0909341SAndroid Build Coastguard Worker lea r3, [r3+myd] 6289*c0909341SAndroid Build Coastguard Worker mov r4, 64 << 24 6290*c0909341SAndroid Build Coastguard Worker cmovnz r4, [base+subpel_filters+r3*8+0] 6291*c0909341SAndroid Build Coastguard Worker cmovnz r5, [base+subpel_filters+r3*8+4] 6292*c0909341SAndroid Build Coastguard Worker mov r0, r0m 6293*c0909341SAndroid Build Coastguard Worker mov r3, r3m 6294*c0909341SAndroid Build Coastguard Worker%endif 6295*c0909341SAndroid Build Coastguard Worker sub srcq, 6 6296*c0909341SAndroid Build Coastguard Worker pslld m7, m8, 2 ; dx*4 6297*c0909341SAndroid Build Coastguard Worker pmaddwd m8, [base+rescale_mul] ; dx*[0-3] 6298*c0909341SAndroid Build Coastguard Worker pshufd m15, m15, q0000 6299*c0909341SAndroid Build Coastguard Worker paddd m14, m8 ; mx+dx*[0-3] 6300*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 6301*c0909341SAndroid Build Coastguard Worker movq m3, r4q 6302*c0909341SAndroid Build Coastguard Worker%else 6303*c0909341SAndroid Build Coastguard Worker movd m5, r4 6304*c0909341SAndroid Build Coastguard Worker movd m6, r5 6305*c0909341SAndroid Build Coastguard Worker punpckldq m5, m6 6306*c0909341SAndroid Build Coastguard Worker SWAP m3, m5 6307*c0909341SAndroid Build Coastguard Worker%endif 6308*c0909341SAndroid Build Coastguard Worker punpcklbw m3, m3 6309*c0909341SAndroid Build Coastguard Worker psraw m3, 8 6310*c0909341SAndroid Build Coastguard Worker mova [stk+0x100], m7 6311*c0909341SAndroid Build Coastguard Worker mova [stk+0x120], m15 6312*c0909341SAndroid Build Coastguard Worker mov [stk+0x0f8], srcq 6313*c0909341SAndroid Build Coastguard Worker mov [stk+0x130], r0q ; dstq / tmpq 6314*c0909341SAndroid Build Coastguard Worker pshufd m0, m3, q0000 6315*c0909341SAndroid Build Coastguard Worker pshufd m1, m3, q1111 6316*c0909341SAndroid Build Coastguard Worker pshufd m2, m3, q2222 6317*c0909341SAndroid Build Coastguard Worker pshufd m3, m3, q3333 6318*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 6319*c0909341SAndroid Build Coastguard Worker mova [stk+0x140], m0 6320*c0909341SAndroid Build Coastguard Worker mova [stk+0x150], m1 6321*c0909341SAndroid Build Coastguard Worker mova [stk+0x160], m2 6322*c0909341SAndroid Build Coastguard Worker mova [stk+0x170], m3 6323*c0909341SAndroid Build Coastguard Worker %if UNIX64 6324*c0909341SAndroid Build Coastguard Worker mov hm, hd 6325*c0909341SAndroid Build Coastguard Worker %endif 6326*c0909341SAndroid Build Coastguard Worker%else 6327*c0909341SAndroid Build Coastguard Worker mova [stk+0x180], m0 6328*c0909341SAndroid Build Coastguard Worker mova [stk+0x190], m1 6329*c0909341SAndroid Build Coastguard Worker mova [stk+0x1a0], m2 6330*c0909341SAndroid Build Coastguard Worker mova [stk+0x1b0], m3 6331*c0909341SAndroid Build Coastguard Worker SWAP m5, m3 6332*c0909341SAndroid Build Coastguard Worker mov r5, hm 6333*c0909341SAndroid Build Coastguard Worker mov [stk+0x134], r5 6334*c0909341SAndroid Build Coastguard Worker%endif 6335*c0909341SAndroid Build Coastguard Worker jmp .dy1_hloop 6336*c0909341SAndroid Build Coastguard Worker.dy1_hloop_prep: 6337*c0909341SAndroid Build Coastguard Worker dec dword [stk+0x0f0] 6338*c0909341SAndroid Build Coastguard Worker jz .ret 6339*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 6340*c0909341SAndroid Build Coastguard Worker add qword [stk+0x130], 16 6341*c0909341SAndroid Build Coastguard Worker mov hd, hm 6342*c0909341SAndroid Build Coastguard Worker%else 6343*c0909341SAndroid Build Coastguard Worker add dword [stk+0x130], 16 6344*c0909341SAndroid Build Coastguard Worker mov r5, [stk+0x134] 6345*c0909341SAndroid Build Coastguard Worker mov r0, [stk+0x130] 6346*c0909341SAndroid Build Coastguard Worker%endif 6347*c0909341SAndroid Build Coastguard Worker mova m7, [stk+0x100] 6348*c0909341SAndroid Build Coastguard Worker mova m14, [stk+0x110] 6349*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 6350*c0909341SAndroid Build Coastguard Worker mova m10, [base+pd_0x3ff] 6351*c0909341SAndroid Build Coastguard Worker mova m11, [rsp+0x10] 6352*c0909341SAndroid Build Coastguard Worker%endif 6353*c0909341SAndroid Build Coastguard Worker mova m15, [stk+0x120] 6354*c0909341SAndroid Build Coastguard Worker mov srcq, [stk+0x0f8] 6355*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 6356*c0909341SAndroid Build Coastguard Worker mov r0q, [stk+0x130] ; dstq / tmpq 6357*c0909341SAndroid Build Coastguard Worker%else 6358*c0909341SAndroid Build Coastguard Worker mov hm, r5 6359*c0909341SAndroid Build Coastguard Worker mov r0m, r0 6360*c0909341SAndroid Build Coastguard Worker mov r3, r3m 6361*c0909341SAndroid Build Coastguard Worker%endif 6362*c0909341SAndroid Build Coastguard Worker paddd m14, m7 6363*c0909341SAndroid Build Coastguard Worker.dy1_hloop: 6364*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 6365*c0909341SAndroid Build Coastguard Worker mova m9, [base+pq_0x40000000] 6366*c0909341SAndroid Build Coastguard Worker%else 6367*c0909341SAndroid Build Coastguard Worker %define m9 [base+pq_0x40000000] 6368*c0909341SAndroid Build Coastguard Worker%endif 6369*c0909341SAndroid Build Coastguard Worker pxor m1, m1 6370*c0909341SAndroid Build Coastguard Worker psrld m2, m14, 10 6371*c0909341SAndroid Build Coastguard Worker mova [stk], m2 6372*c0909341SAndroid Build Coastguard Worker pand m6, m14, m10 6373*c0909341SAndroid Build Coastguard Worker psrld m6, 6 6374*c0909341SAndroid Build Coastguard Worker paddd m5, m15, m6 6375*c0909341SAndroid Build Coastguard Worker pcmpeqd m6, m1 6376*c0909341SAndroid Build Coastguard Worker pshufd m2, m5, q1032 6377*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 6378*c0909341SAndroid Build Coastguard Worker movd r4d, m5 6379*c0909341SAndroid Build Coastguard Worker movd r6d, m2 6380*c0909341SAndroid Build Coastguard Worker pshufd m5, m5, q0321 6381*c0909341SAndroid Build Coastguard Worker pshufd m2, m2, q0321 6382*c0909341SAndroid Build Coastguard Worker movd r7d, m5 6383*c0909341SAndroid Build Coastguard Worker movd r9d, m2 6384*c0909341SAndroid Build Coastguard Worker movq m0, [base+subpel_filters+r4*8] 6385*c0909341SAndroid Build Coastguard Worker movq m1, [base+subpel_filters+r6*8] 6386*c0909341SAndroid Build Coastguard Worker movhps m0, [base+subpel_filters+r7*8] 6387*c0909341SAndroid Build Coastguard Worker movhps m1, [base+subpel_filters+r9*8] 6388*c0909341SAndroid Build Coastguard Worker%else 6389*c0909341SAndroid Build Coastguard Worker movd r0, m5 6390*c0909341SAndroid Build Coastguard Worker movd rX, m2 6391*c0909341SAndroid Build Coastguard Worker pshufd m5, m5, q0321 6392*c0909341SAndroid Build Coastguard Worker pshufd m2, m2, q0321 6393*c0909341SAndroid Build Coastguard Worker movd r4, m5 6394*c0909341SAndroid Build Coastguard Worker movd r5, m2 6395*c0909341SAndroid Build Coastguard Worker movq m0, [base+subpel_filters+r0*8] 6396*c0909341SAndroid Build Coastguard Worker movq m1, [base+subpel_filters+rX*8] 6397*c0909341SAndroid Build Coastguard Worker movhps m0, [base+subpel_filters+r4*8] 6398*c0909341SAndroid Build Coastguard Worker movhps m1, [base+subpel_filters+r5*8] 6399*c0909341SAndroid Build Coastguard Worker%endif 6400*c0909341SAndroid Build Coastguard Worker paddd m14, m7 ; mx+dx*[4-7] 6401*c0909341SAndroid Build Coastguard Worker pand m5, m14, m10 6402*c0909341SAndroid Build Coastguard Worker psrld m5, 6 6403*c0909341SAndroid Build Coastguard Worker paddd m15, m5 6404*c0909341SAndroid Build Coastguard Worker pxor m2, m2 6405*c0909341SAndroid Build Coastguard Worker pcmpeqd m5, m2 6406*c0909341SAndroid Build Coastguard Worker mova [stk+0x110], m14 6407*c0909341SAndroid Build Coastguard Worker pshufd m4, m15, q1032 6408*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 6409*c0909341SAndroid Build Coastguard Worker movd r10d, m15 6410*c0909341SAndroid Build Coastguard Worker movd r11d, m4 6411*c0909341SAndroid Build Coastguard Worker pshufd m15, m15, q0321 6412*c0909341SAndroid Build Coastguard Worker pshufd m4, m4, q0321 6413*c0909341SAndroid Build Coastguard Worker movd r13d, m15 6414*c0909341SAndroid Build Coastguard Worker movd rXd, m4 6415*c0909341SAndroid Build Coastguard Worker movq m2, [base+subpel_filters+r10*8] 6416*c0909341SAndroid Build Coastguard Worker movq m3, [base+subpel_filters+r11*8] 6417*c0909341SAndroid Build Coastguard Worker movhps m2, [base+subpel_filters+r13*8] 6418*c0909341SAndroid Build Coastguard Worker movhps m3, [base+subpel_filters+ rX*8] 6419*c0909341SAndroid Build Coastguard Worker psrld m14, 10 6420*c0909341SAndroid Build Coastguard Worker movq r11, m14 6421*c0909341SAndroid Build Coastguard Worker punpckhqdq m14, m14 6422*c0909341SAndroid Build Coastguard Worker movq rX, m14 6423*c0909341SAndroid Build Coastguard Worker mov r10d, r11d 6424*c0909341SAndroid Build Coastguard Worker shr r11, 32 6425*c0909341SAndroid Build Coastguard Worker mov r13d, rXd 6426*c0909341SAndroid Build Coastguard Worker shr rX, 32 6427*c0909341SAndroid Build Coastguard Worker mov r4d, [stk+ 0] 6428*c0909341SAndroid Build Coastguard Worker mov r6d, [stk+ 4] 6429*c0909341SAndroid Build Coastguard Worker mov r7d, [stk+ 8] 6430*c0909341SAndroid Build Coastguard Worker mov r9d, [stk+12] 6431*c0909341SAndroid Build Coastguard Worker pshufd m4, m6, q1100 6432*c0909341SAndroid Build Coastguard Worker pshufd m6, m6, q3322 6433*c0909341SAndroid Build Coastguard Worker pshufd m14, m5, q1100 6434*c0909341SAndroid Build Coastguard Worker pshufd m5, m5, q3322 6435*c0909341SAndroid Build Coastguard Worker pand m7, m9, m4 6436*c0909341SAndroid Build Coastguard Worker pand m8, m9, m6 6437*c0909341SAndroid Build Coastguard Worker pand m15, m9, m14 6438*c0909341SAndroid Build Coastguard Worker pand m9, m9, m5 6439*c0909341SAndroid Build Coastguard Worker pandn m4, m0 6440*c0909341SAndroid Build Coastguard Worker pandn m6, m1 6441*c0909341SAndroid Build Coastguard Worker pandn m14, m2 6442*c0909341SAndroid Build Coastguard Worker pandn m5, m3 6443*c0909341SAndroid Build Coastguard Worker por m7, m4 6444*c0909341SAndroid Build Coastguard Worker por m8, m6 6445*c0909341SAndroid Build Coastguard Worker por m15, m14 6446*c0909341SAndroid Build Coastguard Worker por m9, m5 6447*c0909341SAndroid Build Coastguard Worker punpcklbw m0, m7, m7 6448*c0909341SAndroid Build Coastguard Worker punpckhbw m7, m7 6449*c0909341SAndroid Build Coastguard Worker punpcklbw m1, m8, m8 6450*c0909341SAndroid Build Coastguard Worker punpckhbw m8, m8 6451*c0909341SAndroid Build Coastguard Worker psraw m0, 8 6452*c0909341SAndroid Build Coastguard Worker psraw m7, 8 6453*c0909341SAndroid Build Coastguard Worker psraw m1, 8 6454*c0909341SAndroid Build Coastguard Worker psraw m8, 8 6455*c0909341SAndroid Build Coastguard Worker punpcklbw m2, m15, m15 6456*c0909341SAndroid Build Coastguard Worker punpckhbw m15, m15 6457*c0909341SAndroid Build Coastguard Worker punpcklbw m3, m9, m9 6458*c0909341SAndroid Build Coastguard Worker punpckhbw m9, m9 6459*c0909341SAndroid Build Coastguard Worker psraw m2, 8 6460*c0909341SAndroid Build Coastguard Worker psraw m15, 8 6461*c0909341SAndroid Build Coastguard Worker psraw m3, 8 6462*c0909341SAndroid Build Coastguard Worker psraw m9, 8 6463*c0909341SAndroid Build Coastguard Worker mova [stk+0x10], m0 6464*c0909341SAndroid Build Coastguard Worker mova [stk+0x20], m7 6465*c0909341SAndroid Build Coastguard Worker mova [stk+0x30], m1 6466*c0909341SAndroid Build Coastguard Worker mova [stk+0x40], m8 6467*c0909341SAndroid Build Coastguard Worker mova [stk+0x50], m2 6468*c0909341SAndroid Build Coastguard Worker mova [stk+0x60], m15 6469*c0909341SAndroid Build Coastguard Worker mova [stk+0x70], m3 6470*c0909341SAndroid Build Coastguard Worker mova [stk+0x80], m9 6471*c0909341SAndroid Build Coastguard Worker MC_8TAP_SCALED_H 1, 2, 3, 4, 5, 6, 9, 10 ; 0 6472*c0909341SAndroid Build Coastguard Worker mova [stk+0x90], m1 6473*c0909341SAndroid Build Coastguard Worker MC_8TAP_SCALED_H 2, 3, 4, 5, 6, 1, 9, 10 ; 1 6474*c0909341SAndroid Build Coastguard Worker mova [stk+0xa0], m2 6475*c0909341SAndroid Build Coastguard Worker MC_8TAP_SCALED_H 3, 4, 5, 6, 1, 2, 9, 10 ; 2 6476*c0909341SAndroid Build Coastguard Worker mova [stk+0xb0], m3 6477*c0909341SAndroid Build Coastguard Worker MC_8TAP_SCALED_H 4, 5, 6, 1, 2, 3, 9, 10 ; 3 6478*c0909341SAndroid Build Coastguard Worker mova [stk+0xc0], m4 6479*c0909341SAndroid Build Coastguard Worker MC_8TAP_SCALED_H 5, 6, 1, 2, 3, 4, 9, 10 ; 4 6480*c0909341SAndroid Build Coastguard Worker mova [stk+0xd0], m5 6481*c0909341SAndroid Build Coastguard Worker MC_8TAP_SCALED_H 6, 1, 2, 3, 4, 5, 9, 10 ; 5 6482*c0909341SAndroid Build Coastguard Worker MC_8TAP_SCALED_H 7, 1, 2, 3, 4, 5, 9, 10 ; 6 6483*c0909341SAndroid Build Coastguard Worker MC_8TAP_SCALED_H 8, 1, 2, 3, 4, 5, 9, 10 ; 7 6484*c0909341SAndroid Build Coastguard Worker mova m5, [stk+0xd0] 6485*c0909341SAndroid Build Coastguard Worker mova m1, [stk+0x90] 6486*c0909341SAndroid Build Coastguard Worker mova m2, [stk+0xa0] 6487*c0909341SAndroid Build Coastguard Worker mova m3, [stk+0xb0] 6488*c0909341SAndroid Build Coastguard Worker mova m9, [stk+0xc0] 6489*c0909341SAndroid Build Coastguard Worker punpcklwd m4, m5, m6 ; 45a 6490*c0909341SAndroid Build Coastguard Worker punpckhwd m5, m6 ; 45b 6491*c0909341SAndroid Build Coastguard Worker punpcklwd m6, m7, m8 ; 67a 6492*c0909341SAndroid Build Coastguard Worker punpckhwd m7, m8 ; 67b 6493*c0909341SAndroid Build Coastguard Worker punpcklwd m0, m1, m2 ; 01a 6494*c0909341SAndroid Build Coastguard Worker punpckhwd m1, m2 ; 01b 6495*c0909341SAndroid Build Coastguard Worker punpcklwd m2, m3, m9 ; 23a 6496*c0909341SAndroid Build Coastguard Worker punpckhwd m3, m9 ; 23b 6497*c0909341SAndroid Build Coastguard Worker mova m10, [stk+0x140] 6498*c0909341SAndroid Build Coastguard Worker mova m11, [stk+0x150] 6499*c0909341SAndroid Build Coastguard Worker mova m14, [stk+0x160] 6500*c0909341SAndroid Build Coastguard Worker mova m15, [stk+0x170] 6501*c0909341SAndroid Build Coastguard Worker mova [stk+0x90], m4 6502*c0909341SAndroid Build Coastguard Worker mova [stk+0xa0], m5 6503*c0909341SAndroid Build Coastguard Worker mova [stk+0xb0], m6 6504*c0909341SAndroid Build Coastguard Worker mova [stk+0xc0], m7 6505*c0909341SAndroid Build Coastguard Worker %define hround [rsp+0x10] 6506*c0909341SAndroid Build Coastguard Worker %define shift [rsp+0x20] 6507*c0909341SAndroid Build Coastguard Worker %if isput 6508*c0909341SAndroid Build Coastguard Worker %define vround [rsp+0x30] 6509*c0909341SAndroid Build Coastguard Worker %else 6510*c0909341SAndroid Build Coastguard Worker %define vround [base+pd_m524256] 6511*c0909341SAndroid Build Coastguard Worker %endif 6512*c0909341SAndroid Build Coastguard Worker.dy1_vloop: 6513*c0909341SAndroid Build Coastguard Worker pmaddwd m4, m0, m10 6514*c0909341SAndroid Build Coastguard Worker pmaddwd m5, m1, m10 6515*c0909341SAndroid Build Coastguard Worker pmaddwd m6, m2, m11 6516*c0909341SAndroid Build Coastguard Worker pmaddwd m7, m3, m11 6517*c0909341SAndroid Build Coastguard Worker paddd m4, m13 6518*c0909341SAndroid Build Coastguard Worker paddd m5, m13 6519*c0909341SAndroid Build Coastguard Worker paddd m4, m6 6520*c0909341SAndroid Build Coastguard Worker paddd m5, m7 6521*c0909341SAndroid Build Coastguard Worker pmaddwd m6, [stk+0x90], m14 6522*c0909341SAndroid Build Coastguard Worker pmaddwd m7, [stk+0xa0], m14 6523*c0909341SAndroid Build Coastguard Worker pmaddwd m8, [stk+0xb0], m15 6524*c0909341SAndroid Build Coastguard Worker pmaddwd m9, [stk+0xc0], m15 6525*c0909341SAndroid Build Coastguard Worker paddd m4, m6 6526*c0909341SAndroid Build Coastguard Worker paddd m5, m7 6527*c0909341SAndroid Build Coastguard Worker %if isput 6528*c0909341SAndroid Build Coastguard Worker pshufd m6, m12, q1032 6529*c0909341SAndroid Build Coastguard Worker %endif 6530*c0909341SAndroid Build Coastguard Worker paddd m4, m8 6531*c0909341SAndroid Build Coastguard Worker paddd m5, m9 6532*c0909341SAndroid Build Coastguard Worker%else 6533*c0909341SAndroid Build Coastguard Worker movd r0, m15 6534*c0909341SAndroid Build Coastguard Worker movd rX, m4 6535*c0909341SAndroid Build Coastguard Worker pshufd m15, m15, q0321 6536*c0909341SAndroid Build Coastguard Worker pshufd m4, m4, q0321 6537*c0909341SAndroid Build Coastguard Worker movd r4, m15 6538*c0909341SAndroid Build Coastguard Worker movd r5, m4 6539*c0909341SAndroid Build Coastguard Worker mova m14, [stk+0x110] 6540*c0909341SAndroid Build Coastguard Worker movq m2, [base+subpel_filters+r0*8] 6541*c0909341SAndroid Build Coastguard Worker movq m3, [base+subpel_filters+rX*8] 6542*c0909341SAndroid Build Coastguard Worker movhps m2, [base+subpel_filters+r4*8] 6543*c0909341SAndroid Build Coastguard Worker movhps m3, [base+subpel_filters+r5*8] 6544*c0909341SAndroid Build Coastguard Worker psrld m14, 10 6545*c0909341SAndroid Build Coastguard Worker mova [stk+16], m14 6546*c0909341SAndroid Build Coastguard Worker mov r0, [stk+ 0] 6547*c0909341SAndroid Build Coastguard Worker mov rX, [stk+ 4] 6548*c0909341SAndroid Build Coastguard Worker mov r4, [stk+ 8] 6549*c0909341SAndroid Build Coastguard Worker mov r5, [stk+12] 6550*c0909341SAndroid Build Coastguard Worker mova [stk+0x20], m0 6551*c0909341SAndroid Build Coastguard Worker mova [stk+0x30], m1 6552*c0909341SAndroid Build Coastguard Worker mova [stk+0x40], m2 6553*c0909341SAndroid Build Coastguard Worker mova [stk+0x50], m3 6554*c0909341SAndroid Build Coastguard Worker pshufd m4, m6, q1100 6555*c0909341SAndroid Build Coastguard Worker pshufd m6, m6, q3322 6556*c0909341SAndroid Build Coastguard Worker pshufd m7, m5, q1100 6557*c0909341SAndroid Build Coastguard Worker pshufd m5, m5, q3322 6558*c0909341SAndroid Build Coastguard Worker pand m0, m9, m4 6559*c0909341SAndroid Build Coastguard Worker pand m1, m9, m6 6560*c0909341SAndroid Build Coastguard Worker pand m2, m9, m7 6561*c0909341SAndroid Build Coastguard Worker pand m3, m9, m5 6562*c0909341SAndroid Build Coastguard Worker pandn m4, [stk+0x20] 6563*c0909341SAndroid Build Coastguard Worker pandn m6, [stk+0x30] 6564*c0909341SAndroid Build Coastguard Worker pandn m7, [stk+0x40] 6565*c0909341SAndroid Build Coastguard Worker pandn m5, [stk+0x50] 6566*c0909341SAndroid Build Coastguard Worker por m0, m4 6567*c0909341SAndroid Build Coastguard Worker por m1, m6 6568*c0909341SAndroid Build Coastguard Worker por m2, m7 6569*c0909341SAndroid Build Coastguard Worker por m3, m5 6570*c0909341SAndroid Build Coastguard Worker punpcklbw m4, m0, m0 6571*c0909341SAndroid Build Coastguard Worker punpckhbw m0, m0 6572*c0909341SAndroid Build Coastguard Worker punpcklbw m5, m1, m1 6573*c0909341SAndroid Build Coastguard Worker punpckhbw m1, m1 6574*c0909341SAndroid Build Coastguard Worker psraw m4, 8 6575*c0909341SAndroid Build Coastguard Worker psraw m0, 8 6576*c0909341SAndroid Build Coastguard Worker psraw m5, 8 6577*c0909341SAndroid Build Coastguard Worker psraw m1, 8 6578*c0909341SAndroid Build Coastguard Worker punpcklbw m6, m2, m2 6579*c0909341SAndroid Build Coastguard Worker punpckhbw m2, m2 6580*c0909341SAndroid Build Coastguard Worker punpcklbw m7, m3, m3 6581*c0909341SAndroid Build Coastguard Worker punpckhbw m3, m3 6582*c0909341SAndroid Build Coastguard Worker psraw m6, 8 6583*c0909341SAndroid Build Coastguard Worker psraw m2, 8 6584*c0909341SAndroid Build Coastguard Worker psraw m7, 8 6585*c0909341SAndroid Build Coastguard Worker psraw m3, 8 6586*c0909341SAndroid Build Coastguard Worker mova [stk+0x0a0], m4 6587*c0909341SAndroid Build Coastguard Worker mova [stk+0x0b0], m0 6588*c0909341SAndroid Build Coastguard Worker mova [stk+0x0c0], m5 6589*c0909341SAndroid Build Coastguard Worker mova [stk+0x0d0], m1 6590*c0909341SAndroid Build Coastguard Worker mova [stk+0x140], m6 6591*c0909341SAndroid Build Coastguard Worker mova [stk+0x150], m2 6592*c0909341SAndroid Build Coastguard Worker mova [stk+0x160], m7 6593*c0909341SAndroid Build Coastguard Worker mova [stk+0x170], m3 6594*c0909341SAndroid Build Coastguard Worker MC_8TAP_SCALED_H 0xa0, 0x20, 0 ; 0 6595*c0909341SAndroid Build Coastguard Worker MC_8TAP_SCALED_H 0xa0, 0x30 ; 1 6596*c0909341SAndroid Build Coastguard Worker MC_8TAP_SCALED_H 0xa0, 0x40 ; 2 6597*c0909341SAndroid Build Coastguard Worker MC_8TAP_SCALED_H 0xa0, 0x50 ; 3 6598*c0909341SAndroid Build Coastguard Worker MC_8TAP_SCALED_H 0xa0, 0x60 ; 4 6599*c0909341SAndroid Build Coastguard Worker MC_8TAP_SCALED_H 0xa0, 0x70 ; 5 6600*c0909341SAndroid Build Coastguard Worker MC_8TAP_SCALED_H 0xa0, 0x80 ; 6 6601*c0909341SAndroid Build Coastguard Worker MC_8TAP_SCALED_H 0xa0, 0x90 ; 7 6602*c0909341SAndroid Build Coastguard Worker mova m5, [stk+0x60] 6603*c0909341SAndroid Build Coastguard Worker mova m6, [stk+0x70] 6604*c0909341SAndroid Build Coastguard Worker mova m7, [stk+0x80] 6605*c0909341SAndroid Build Coastguard Worker mova m0, [stk+0x90] 6606*c0909341SAndroid Build Coastguard Worker mov r0, r0m 6607*c0909341SAndroid Build Coastguard Worker punpcklwd m4, m5, m6 ; 45a 6608*c0909341SAndroid Build Coastguard Worker punpckhwd m5, m6 ; 45b 6609*c0909341SAndroid Build Coastguard Worker punpcklwd m6, m7, m0 ; 67a 6610*c0909341SAndroid Build Coastguard Worker punpckhwd m7, m0 ; 67b 6611*c0909341SAndroid Build Coastguard Worker mova [stk+0x60], m4 6612*c0909341SAndroid Build Coastguard Worker mova [stk+0x70], m5 6613*c0909341SAndroid Build Coastguard Worker mova [stk+0x80], m6 6614*c0909341SAndroid Build Coastguard Worker mova [stk+0x90], m7 6615*c0909341SAndroid Build Coastguard Worker mova m1, [stk+0x20] 6616*c0909341SAndroid Build Coastguard Worker mova m2, [stk+0x30] 6617*c0909341SAndroid Build Coastguard Worker mova m3, [stk+0x40] 6618*c0909341SAndroid Build Coastguard Worker mova m4, [stk+0x50] 6619*c0909341SAndroid Build Coastguard Worker punpcklwd m0, m1, m2 ; 01a 6620*c0909341SAndroid Build Coastguard Worker punpckhwd m1, m2 ; 01b 6621*c0909341SAndroid Build Coastguard Worker punpcklwd m2, m3, m4 ; 23a 6622*c0909341SAndroid Build Coastguard Worker punpckhwd m3, m4 ; 23b 6623*c0909341SAndroid Build Coastguard Worker mova m4, [stk+0x180] 6624*c0909341SAndroid Build Coastguard Worker mova m5, [stk+0x190] 6625*c0909341SAndroid Build Coastguard Worker mova m6, [stk+0x1a0] 6626*c0909341SAndroid Build Coastguard Worker mova m7, [stk+0x1b0] 6627*c0909341SAndroid Build Coastguard Worker mova [stk+0x20], m0 6628*c0909341SAndroid Build Coastguard Worker mova [stk+0x30], m1 6629*c0909341SAndroid Build Coastguard Worker mova [stk+0x40], m2 6630*c0909341SAndroid Build Coastguard Worker mova [stk+0x50], m3 6631*c0909341SAndroid Build Coastguard Worker.dy1_vloop: 6632*c0909341SAndroid Build Coastguard Worker pmaddwd m0, m4 6633*c0909341SAndroid Build Coastguard Worker pmaddwd m1, m4 6634*c0909341SAndroid Build Coastguard Worker pmaddwd m2, m5 6635*c0909341SAndroid Build Coastguard Worker pmaddwd m3, m5 6636*c0909341SAndroid Build Coastguard Worker paddd m0, m2 6637*c0909341SAndroid Build Coastguard Worker paddd m1, m3 6638*c0909341SAndroid Build Coastguard Worker pmaddwd m2, [stk+0x60], m6 6639*c0909341SAndroid Build Coastguard Worker pmaddwd m3, [stk+0x70], m6 6640*c0909341SAndroid Build Coastguard Worker pmaddwd m4, [stk+0x80], m7 6641*c0909341SAndroid Build Coastguard Worker pmaddwd m5, [stk+0x90], m7 6642*c0909341SAndroid Build Coastguard Worker %if isput 6643*c0909341SAndroid Build Coastguard Worker movd m6, [esp+0x18] 6644*c0909341SAndroid Build Coastguard Worker %endif 6645*c0909341SAndroid Build Coastguard Worker paddd m0, m2 6646*c0909341SAndroid Build Coastguard Worker paddd m1, m3 6647*c0909341SAndroid Build Coastguard Worker paddd m0, vrnd_mem 6648*c0909341SAndroid Build Coastguard Worker paddd m1, vrnd_mem 6649*c0909341SAndroid Build Coastguard Worker paddd m4, m0 6650*c0909341SAndroid Build Coastguard Worker paddd m5, m1 6651*c0909341SAndroid Build Coastguard Worker%endif 6652*c0909341SAndroid Build Coastguard Worker%ifidn %1, put 6653*c0909341SAndroid Build Coastguard Worker psrad m4, m6 6654*c0909341SAndroid Build Coastguard Worker psrad m5, m6 6655*c0909341SAndroid Build Coastguard Worker packssdw m4, m5 6656*c0909341SAndroid Build Coastguard Worker pxor m7, m7 6657*c0909341SAndroid Build Coastguard Worker pmaxsw m4, m7 6658*c0909341SAndroid Build Coastguard Worker pminsw m4, pxmaxm 6659*c0909341SAndroid Build Coastguard Worker mova [dstq], m4 6660*c0909341SAndroid Build Coastguard Worker add dstq, dsm 6661*c0909341SAndroid Build Coastguard Worker%else 6662*c0909341SAndroid Build Coastguard Worker psrad m4, 6 6663*c0909341SAndroid Build Coastguard Worker psrad m5, 6 6664*c0909341SAndroid Build Coastguard Worker packssdw m4, m5 6665*c0909341SAndroid Build Coastguard Worker mova [tmpq], m4 6666*c0909341SAndroid Build Coastguard Worker add tmpq, tmp_stridem 6667*c0909341SAndroid Build Coastguard Worker%endif 6668*c0909341SAndroid Build Coastguard Worker dec hd 6669*c0909341SAndroid Build Coastguard Worker jz .dy1_hloop_prep 6670*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 6671*c0909341SAndroid Build Coastguard Worker movu m8, [srcq+r10*2] 6672*c0909341SAndroid Build Coastguard Worker movu m9, [srcq+r11*2] 6673*c0909341SAndroid Build Coastguard Worker movu m12, [srcq+r13*2] 6674*c0909341SAndroid Build Coastguard Worker movu m13, [srcq+ rX*2] 6675*c0909341SAndroid Build Coastguard Worker movu m4, [srcq+ r4*2] 6676*c0909341SAndroid Build Coastguard Worker movu m5, [srcq+ r6*2] 6677*c0909341SAndroid Build Coastguard Worker movu m6, [srcq+ r7*2] 6678*c0909341SAndroid Build Coastguard Worker movu m7, [srcq+ r9*2] 6679*c0909341SAndroid Build Coastguard Worker add srcq, ssq 6680*c0909341SAndroid Build Coastguard Worker pmaddwd m8, [stk+0x50] 6681*c0909341SAndroid Build Coastguard Worker pmaddwd m9, [stk+0x60] 6682*c0909341SAndroid Build Coastguard Worker pmaddwd m12, [stk+0x70] 6683*c0909341SAndroid Build Coastguard Worker pmaddwd m13, [stk+0x80] 6684*c0909341SAndroid Build Coastguard Worker pmaddwd m4, [stk+0x10] 6685*c0909341SAndroid Build Coastguard Worker pmaddwd m5, [stk+0x20] 6686*c0909341SAndroid Build Coastguard Worker pmaddwd m6, [stk+0x30] 6687*c0909341SAndroid Build Coastguard Worker pmaddwd m7, [stk+0x40] 6688*c0909341SAndroid Build Coastguard Worker phaddd m8, m9 6689*c0909341SAndroid Build Coastguard Worker phaddd m12, m13 6690*c0909341SAndroid Build Coastguard Worker mova m9, [base+unpckw] 6691*c0909341SAndroid Build Coastguard Worker mova m13, hround 6692*c0909341SAndroid Build Coastguard Worker phaddd m4, m5 6693*c0909341SAndroid Build Coastguard Worker phaddd m6, m7 6694*c0909341SAndroid Build Coastguard Worker phaddd m8, m12 6695*c0909341SAndroid Build Coastguard Worker phaddd m4, m6 6696*c0909341SAndroid Build Coastguard Worker pshufd m5, m9, q1032 6697*c0909341SAndroid Build Coastguard Worker pshufb m0, m9 ; 0a 1a 6698*c0909341SAndroid Build Coastguard Worker pshufb m1, m9 ; 0b 1b 6699*c0909341SAndroid Build Coastguard Worker pshufb m2, m5 ; 3a 2a 6700*c0909341SAndroid Build Coastguard Worker pshufb m3, m5 ; 3b 2b 6701*c0909341SAndroid Build Coastguard Worker mova m12, shift 6702*c0909341SAndroid Build Coastguard Worker paddd m4, m13 6703*c0909341SAndroid Build Coastguard Worker paddd m8, m13 6704*c0909341SAndroid Build Coastguard Worker psrad m4, m12 6705*c0909341SAndroid Build Coastguard Worker psrad m8, m12 6706*c0909341SAndroid Build Coastguard Worker packssdw m4, m8 6707*c0909341SAndroid Build Coastguard Worker pshufb m6, [stk+0x90], m9 ; 4a 5a 6708*c0909341SAndroid Build Coastguard Worker pshufb m7, [stk+0xa0], m9 ; 4b 5b 6709*c0909341SAndroid Build Coastguard Worker pshufb m8, [stk+0xb0], m5 ; 7a 6a 6710*c0909341SAndroid Build Coastguard Worker pshufb m13, [stk+0xc0], m5 ; 7b 6b 6711*c0909341SAndroid Build Coastguard Worker punpckhwd m0, m2 ; 12a 6712*c0909341SAndroid Build Coastguard Worker punpckhwd m1, m3 ; 12b 6713*c0909341SAndroid Build Coastguard Worker punpcklwd m2, m6 ; 34a 6714*c0909341SAndroid Build Coastguard Worker punpcklwd m3, m7 ; 34b 6715*c0909341SAndroid Build Coastguard Worker punpckhwd m6, m8 ; 56a 6716*c0909341SAndroid Build Coastguard Worker punpckhwd m7, m13 ; 56b 6717*c0909341SAndroid Build Coastguard Worker punpcklwd m8, m4 ; 78a 6718*c0909341SAndroid Build Coastguard Worker punpckhqdq m4, m4 6719*c0909341SAndroid Build Coastguard Worker punpcklwd m13, m4 ; 78b 6720*c0909341SAndroid Build Coastguard Worker mova [stk+0x90], m6 6721*c0909341SAndroid Build Coastguard Worker mova [stk+0xa0], m7 6722*c0909341SAndroid Build Coastguard Worker mova [stk+0xb0], m8 6723*c0909341SAndroid Build Coastguard Worker mova [stk+0xc0], m13 6724*c0909341SAndroid Build Coastguard Worker mova m13, vround 6725*c0909341SAndroid Build Coastguard Worker%else 6726*c0909341SAndroid Build Coastguard Worker mov r0m, r0 6727*c0909341SAndroid Build Coastguard Worker mov r3, r3m 6728*c0909341SAndroid Build Coastguard Worker mov r0, [stk+ 0] 6729*c0909341SAndroid Build Coastguard Worker mov rX, [stk+ 4] 6730*c0909341SAndroid Build Coastguard Worker mov r4, [stk+ 8] 6731*c0909341SAndroid Build Coastguard Worker mov r5, [stk+12] 6732*c0909341SAndroid Build Coastguard Worker MC_8TAP_SCALED_H 0xa0, 0xe0, 0 ; 8 6733*c0909341SAndroid Build Coastguard Worker mova m7, [base+unpckw] 6734*c0909341SAndroid Build Coastguard Worker pshufd m4, m7, q1032 6735*c0909341SAndroid Build Coastguard Worker pshufb m0, [stk+0x20], m7 ; 0a 1a 6736*c0909341SAndroid Build Coastguard Worker pshufb m1, [stk+0x30], m7 ; 0b 1b 6737*c0909341SAndroid Build Coastguard Worker pshufb m2, [stk+0x40], m4 ; 3a 2a 6738*c0909341SAndroid Build Coastguard Worker pshufb m3, [stk+0x50], m4 ; 3b 2b 6739*c0909341SAndroid Build Coastguard Worker pshufb m5, [stk+0x60], m7 ; 4a 5a 6740*c0909341SAndroid Build Coastguard Worker pshufb m6, [stk+0x70], m7 ; 4b 5b 6741*c0909341SAndroid Build Coastguard Worker pshufb m7, [stk+0x80], m4 ; 7a 6a 6742*c0909341SAndroid Build Coastguard Worker punpckhwd m0, m2 ; 12a 6743*c0909341SAndroid Build Coastguard Worker punpckhwd m1, m3 ; 12b 6744*c0909341SAndroid Build Coastguard Worker punpcklwd m2, m5 ; 34a 6745*c0909341SAndroid Build Coastguard Worker punpcklwd m3, m6 ; 34b 6746*c0909341SAndroid Build Coastguard Worker mova [stk+0x20], m0 6747*c0909341SAndroid Build Coastguard Worker mova [stk+0x30], m1 6748*c0909341SAndroid Build Coastguard Worker mova [stk+0x40], m2 6749*c0909341SAndroid Build Coastguard Worker mova [stk+0x50], m3 6750*c0909341SAndroid Build Coastguard Worker punpckhwd m5, m7 ; 56a 6751*c0909341SAndroid Build Coastguard Worker mova [stk+0x60], m5 6752*c0909341SAndroid Build Coastguard Worker pshufb m5, [stk+0x90], m4 ; 7b 6b 6753*c0909341SAndroid Build Coastguard Worker punpcklwd m7, [stk+0xe0] ; 78a 6754*c0909341SAndroid Build Coastguard Worker mova m4, [stk+0x180] 6755*c0909341SAndroid Build Coastguard Worker punpckhwd m6, m5 ; 56b 6756*c0909341SAndroid Build Coastguard Worker mova [stk+0x70], m6 6757*c0909341SAndroid Build Coastguard Worker movq m6, [stk+0xe8] 6758*c0909341SAndroid Build Coastguard Worker mova [stk+0x80], m7 6759*c0909341SAndroid Build Coastguard Worker mova m7, [stk+0x1b0] 6760*c0909341SAndroid Build Coastguard Worker punpcklwd m5, m6 6761*c0909341SAndroid Build Coastguard Worker mova m6, [stk+0x1a0] 6762*c0909341SAndroid Build Coastguard Worker mova [stk+0x90], m5 6763*c0909341SAndroid Build Coastguard Worker mova m5, [stk+0x190] 6764*c0909341SAndroid Build Coastguard Worker mov r0, r0m 6765*c0909341SAndroid Build Coastguard Worker%endif 6766*c0909341SAndroid Build Coastguard Worker jmp .dy1_vloop 6767*c0909341SAndroid Build Coastguard WorkerINIT_XMM ssse3 6768*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 6769*c0909341SAndroid Build Coastguard Worker %define stk rsp+0x20 6770*c0909341SAndroid Build Coastguard Worker%endif 6771*c0909341SAndroid Build Coastguard Worker.dy2: 6772*c0909341SAndroid Build Coastguard Worker movzx wd, word [base+%1_8tap_scaled_ssse3_dy2_table+wq*2] 6773*c0909341SAndroid Build Coastguard Worker add wq, base_reg 6774*c0909341SAndroid Build Coastguard Worker jmp wq 6775*c0909341SAndroid Build Coastguard Worker%if isput 6776*c0909341SAndroid Build Coastguard Worker.dy2_w2: 6777*c0909341SAndroid Build Coastguard Worker %if ARCH_X86_64 6778*c0909341SAndroid Build Coastguard Worker mov myd, mym 6779*c0909341SAndroid Build Coastguard Worker mova [rsp+0x10], m13 6780*c0909341SAndroid Build Coastguard Worker %define vrnd_mem [rsp+0x10] 6781*c0909341SAndroid Build Coastguard Worker movzx t0d, t0b 6782*c0909341SAndroid Build Coastguard Worker sub srcq, 2 6783*c0909341SAndroid Build Coastguard Worker movd m15, t0d 6784*c0909341SAndroid Build Coastguard Worker %else 6785*c0909341SAndroid Build Coastguard Worker %define m8 m0 6786*c0909341SAndroid Build Coastguard Worker %define m9 m1 6787*c0909341SAndroid Build Coastguard Worker %define m14 m4 6788*c0909341SAndroid Build Coastguard Worker %define m15 m3 6789*c0909341SAndroid Build Coastguard Worker %define m11 [esp+0x00] 6790*c0909341SAndroid Build Coastguard Worker %define m12 [esp+0x10] 6791*c0909341SAndroid Build Coastguard Worker %define vrnd_mem [esp+0x20] 6792*c0909341SAndroid Build Coastguard Worker mov r1, r1m 6793*c0909341SAndroid Build Coastguard Worker movzx r5, byte [esp+0x1f0] 6794*c0909341SAndroid Build Coastguard Worker sub srcq, 2 6795*c0909341SAndroid Build Coastguard Worker movd m15, r5 6796*c0909341SAndroid Build Coastguard Worker %endif 6797*c0909341SAndroid Build Coastguard Worker pxor m9, m9 6798*c0909341SAndroid Build Coastguard Worker punpckldq m9, m8 6799*c0909341SAndroid Build Coastguard Worker paddd m14, m9 ; mx+dx*[0-1] 6800*c0909341SAndroid Build Coastguard Worker %if ARCH_X86_64 6801*c0909341SAndroid Build Coastguard Worker mova m9, [base+pd_0x4000] 6802*c0909341SAndroid Build Coastguard Worker %endif 6803*c0909341SAndroid Build Coastguard Worker pshufd m15, m15, q0000 6804*c0909341SAndroid Build Coastguard Worker pand m8, m14, m10 6805*c0909341SAndroid Build Coastguard Worker psrld m8, 6 6806*c0909341SAndroid Build Coastguard Worker paddd m15, m8 6807*c0909341SAndroid Build Coastguard Worker movd r4d, m15 6808*c0909341SAndroid Build Coastguard Worker pshufd m15, m15, q0321 6809*c0909341SAndroid Build Coastguard Worker %if ARCH_X86_64 6810*c0909341SAndroid Build Coastguard Worker movd r6d, m15 6811*c0909341SAndroid Build Coastguard Worker %else 6812*c0909341SAndroid Build Coastguard Worker movd r3d, m15 6813*c0909341SAndroid Build Coastguard Worker %endif 6814*c0909341SAndroid Build Coastguard Worker mova m5, [base+bdct_lb_q] 6815*c0909341SAndroid Build Coastguard Worker mova m6, [base+spel_s_shuf2] 6816*c0909341SAndroid Build Coastguard Worker movd m15, [base+subpel_filters+r4*8+2] 6817*c0909341SAndroid Build Coastguard Worker %if ARCH_X86_64 6818*c0909341SAndroid Build Coastguard Worker movd m7, [base+subpel_filters+r6*8+2] 6819*c0909341SAndroid Build Coastguard Worker %else 6820*c0909341SAndroid Build Coastguard Worker movd m7, [base+subpel_filters+r3*8+2] 6821*c0909341SAndroid Build Coastguard Worker %endif 6822*c0909341SAndroid Build Coastguard Worker pxor m2, m2 6823*c0909341SAndroid Build Coastguard Worker pcmpeqd m8, m2 6824*c0909341SAndroid Build Coastguard Worker psrld m14, 10 6825*c0909341SAndroid Build Coastguard Worker paddd m14, m14 6826*c0909341SAndroid Build Coastguard Worker %if ARCH_X86_32 6827*c0909341SAndroid Build Coastguard Worker mov r3, r3m 6828*c0909341SAndroid Build Coastguard Worker pshufb m14, m5 6829*c0909341SAndroid Build Coastguard Worker paddb m14, m6 6830*c0909341SAndroid Build Coastguard Worker mova [stk], m14 6831*c0909341SAndroid Build Coastguard Worker SWAP m5, m0 6832*c0909341SAndroid Build Coastguard Worker SWAP m6, m3 6833*c0909341SAndroid Build Coastguard Worker %define m15 m6 6834*c0909341SAndroid Build Coastguard Worker %endif 6835*c0909341SAndroid Build Coastguard Worker movu m0, [srcq+ssq*0] 6836*c0909341SAndroid Build Coastguard Worker movu m1, [srcq+ssq*2] 6837*c0909341SAndroid Build Coastguard Worker movu m2, [srcq+ssq*4] 6838*c0909341SAndroid Build Coastguard Worker punpckldq m15, m7 6839*c0909341SAndroid Build Coastguard Worker %if ARCH_X86_64 6840*c0909341SAndroid Build Coastguard Worker pshufb m14, m5 6841*c0909341SAndroid Build Coastguard Worker paddb m14, m6 6842*c0909341SAndroid Build Coastguard Worker pand m9, m8 6843*c0909341SAndroid Build Coastguard Worker pandn m8, m15 6844*c0909341SAndroid Build Coastguard Worker SWAP m15, m8 6845*c0909341SAndroid Build Coastguard Worker por m15, m9 6846*c0909341SAndroid Build Coastguard Worker movu m4, [srcq+ssq*1] 6847*c0909341SAndroid Build Coastguard Worker movu m5, [srcq+ss3q ] 6848*c0909341SAndroid Build Coastguard Worker lea srcq, [srcq+ssq*4] 6849*c0909341SAndroid Build Coastguard Worker movu m6, [srcq+ssq*1] 6850*c0909341SAndroid Build Coastguard Worker lea srcq, [srcq+ssq*2] 6851*c0909341SAndroid Build Coastguard Worker shr myd, 6 6852*c0909341SAndroid Build Coastguard Worker mov r4d, 64 << 24 6853*c0909341SAndroid Build Coastguard Worker lea myd, [t1+myq] 6854*c0909341SAndroid Build Coastguard Worker cmovnz r4q, [base+subpel_filters+myq*8] 6855*c0909341SAndroid Build Coastguard Worker %else 6856*c0909341SAndroid Build Coastguard Worker pand m7, m5, [base+pd_0x4000] 6857*c0909341SAndroid Build Coastguard Worker pandn m5, m15 6858*c0909341SAndroid Build Coastguard Worker por m5, m7 6859*c0909341SAndroid Build Coastguard Worker %define m15 m5 6860*c0909341SAndroid Build Coastguard Worker mov myd, mym 6861*c0909341SAndroid Build Coastguard Worker mov r5, [esp+0x1f4] 6862*c0909341SAndroid Build Coastguard Worker xor r3, r3 6863*c0909341SAndroid Build Coastguard Worker shr myd, 6 6864*c0909341SAndroid Build Coastguard Worker lea r5, [r5+myd] 6865*c0909341SAndroid Build Coastguard Worker mov r4, 64 << 24 6866*c0909341SAndroid Build Coastguard Worker cmovnz r4, [base+subpel_filters+r5*8+0] 6867*c0909341SAndroid Build Coastguard Worker cmovnz r3, [base+subpel_filters+r5*8+4] 6868*c0909341SAndroid Build Coastguard Worker mov [stk+0x20], r3 6869*c0909341SAndroid Build Coastguard Worker mov r3, r3m 6870*c0909341SAndroid Build Coastguard Worker %endif 6871*c0909341SAndroid Build Coastguard Worker punpcklbw m15, m15 6872*c0909341SAndroid Build Coastguard Worker psraw m15, 8 6873*c0909341SAndroid Build Coastguard Worker REPX {pshufb x, m14}, m0, m1, m2 6874*c0909341SAndroid Build Coastguard Worker REPX {pmaddwd x, m15}, m0, m1, m2 6875*c0909341SAndroid Build Coastguard Worker %if ARCH_X86_64 6876*c0909341SAndroid Build Coastguard Worker REPX {pshufb x, m14}, m4, m5, m6 6877*c0909341SAndroid Build Coastguard Worker REPX {pmaddwd x, m15}, m4, m5, m6 6878*c0909341SAndroid Build Coastguard Worker phaddd m0, m1 6879*c0909341SAndroid Build Coastguard Worker phaddd m1, m2 6880*c0909341SAndroid Build Coastguard Worker phaddd m4, m5 6881*c0909341SAndroid Build Coastguard Worker phaddd m5, m6 6882*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m11}, m0, m1, m4, m5 6883*c0909341SAndroid Build Coastguard Worker REPX {psrad x, m12}, m0, m1, m4, m5 6884*c0909341SAndroid Build Coastguard Worker packssdw m0, m1 ; 0 2 2 4 6885*c0909341SAndroid Build Coastguard Worker packssdw m4, m5 ; 1 3 3 5 6886*c0909341SAndroid Build Coastguard Worker SWAP m2, m4 6887*c0909341SAndroid Build Coastguard Worker movq m10, r4 6888*c0909341SAndroid Build Coastguard Worker %else 6889*c0909341SAndroid Build Coastguard Worker mova [stk+0x10], m15 6890*c0909341SAndroid Build Coastguard Worker phaddd m0, m1 6891*c0909341SAndroid Build Coastguard Worker phaddd m1, m2 6892*c0909341SAndroid Build Coastguard Worker movu m2, [srcq+ssq*1] 6893*c0909341SAndroid Build Coastguard Worker movu m7, [srcq+ss3q ] 6894*c0909341SAndroid Build Coastguard Worker lea srcq, [srcq+ssq*4] 6895*c0909341SAndroid Build Coastguard Worker movu m6, [srcq+ssq*1] 6896*c0909341SAndroid Build Coastguard Worker lea srcq, [srcq+ssq*2] 6897*c0909341SAndroid Build Coastguard Worker REPX {pshufb x, m14}, m2, m7, m6 6898*c0909341SAndroid Build Coastguard Worker REPX {pmaddwd x, m15}, m2, m7, m6 6899*c0909341SAndroid Build Coastguard Worker %define m14 [stk+0x00] 6900*c0909341SAndroid Build Coastguard Worker %define m15 [stk+0x10] 6901*c0909341SAndroid Build Coastguard Worker phaddd m2, m7 6902*c0909341SAndroid Build Coastguard Worker phaddd m7, m6 6903*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m11}, m0, m1, m2, m7 6904*c0909341SAndroid Build Coastguard Worker REPX {psrad x, m12}, m0, m1, m2, m7 6905*c0909341SAndroid Build Coastguard Worker packssdw m0, m1 6906*c0909341SAndroid Build Coastguard Worker packssdw m2, m7 6907*c0909341SAndroid Build Coastguard Worker %define m8 m6 6908*c0909341SAndroid Build Coastguard Worker %define m9 m4 6909*c0909341SAndroid Build Coastguard Worker %define m10 m5 6910*c0909341SAndroid Build Coastguard Worker movd m10, r4 6911*c0909341SAndroid Build Coastguard Worker movd m9, [stk+0x20] 6912*c0909341SAndroid Build Coastguard Worker punpckldq m10, m9 6913*c0909341SAndroid Build Coastguard Worker %endif 6914*c0909341SAndroid Build Coastguard Worker punpcklbw m10, m10 6915*c0909341SAndroid Build Coastguard Worker psraw m10, 8 6916*c0909341SAndroid Build Coastguard Worker pshufd m7, m10, q0000 6917*c0909341SAndroid Build Coastguard Worker pshufd m8, m10, q1111 6918*c0909341SAndroid Build Coastguard Worker pshufd m9, m10, q2222 6919*c0909341SAndroid Build Coastguard Worker pshufd m10, m10, q3333 6920*c0909341SAndroid Build Coastguard Worker %if ARCH_X86_32 6921*c0909341SAndroid Build Coastguard Worker mova [stk+0x50], m7 6922*c0909341SAndroid Build Coastguard Worker mova [stk+0x60], m8 6923*c0909341SAndroid Build Coastguard Worker mova [stk+0x70], m9 6924*c0909341SAndroid Build Coastguard Worker mova [stk+0x80], m10 6925*c0909341SAndroid Build Coastguard Worker %xdefine m13 m7 6926*c0909341SAndroid Build Coastguard Worker %define m7 [stk+0x50] 6927*c0909341SAndroid Build Coastguard Worker %define m8 [stk+0x60] 6928*c0909341SAndroid Build Coastguard Worker %define m9 [stk+0x70] 6929*c0909341SAndroid Build Coastguard Worker %define m10 [stk+0x80] 6930*c0909341SAndroid Build Coastguard Worker %endif 6931*c0909341SAndroid Build Coastguard Worker punpcklwd m1, m0, m2 ; 01 23 6932*c0909341SAndroid Build Coastguard Worker punpckhwd m3, m0, m2 ; 23 45 6933*c0909341SAndroid Build Coastguard Worker %if ARCH_X86_32 6934*c0909341SAndroid Build Coastguard Worker mov r4, r0m 6935*c0909341SAndroid Build Coastguard Worker %define dstq r4 6936*c0909341SAndroid Build Coastguard Worker mova [stk+0x20], m3 6937*c0909341SAndroid Build Coastguard Worker mova [stk+0x30], m0 6938*c0909341SAndroid Build Coastguard Worker %endif 6939*c0909341SAndroid Build Coastguard Worker.dy2_w2_loop: 6940*c0909341SAndroid Build Coastguard Worker movu m4, [srcq+ssq*0] 6941*c0909341SAndroid Build Coastguard Worker movu m5, [srcq+ssq*1] 6942*c0909341SAndroid Build Coastguard Worker movu m6, [srcq+ssq*2] 6943*c0909341SAndroid Build Coastguard Worker movu m13, [srcq+ss3q ] 6944*c0909341SAndroid Build Coastguard Worker lea srcq, [srcq+ssq*4] 6945*c0909341SAndroid Build Coastguard Worker pmaddwd m3, m8 6946*c0909341SAndroid Build Coastguard Worker REPX {pshufb x, m14}, m4, m5, m6, m13 6947*c0909341SAndroid Build Coastguard Worker REPX {pmaddwd x, m15}, m4, m5, m6, m13 6948*c0909341SAndroid Build Coastguard Worker phaddd m4, m5 6949*c0909341SAndroid Build Coastguard Worker phaddd m6, m13 6950*c0909341SAndroid Build Coastguard Worker pmaddwd m5, m1, m7 6951*c0909341SAndroid Build Coastguard Worker paddd m4, m11 6952*c0909341SAndroid Build Coastguard Worker paddd m6, m11 6953*c0909341SAndroid Build Coastguard Worker psrad m4, m12 6954*c0909341SAndroid Build Coastguard Worker psrad m6, m12 6955*c0909341SAndroid Build Coastguard Worker packssdw m4, m6 ; 6 7 8 9 6956*c0909341SAndroid Build Coastguard Worker paddd m5, m3 6957*c0909341SAndroid Build Coastguard Worker pshufd m3, m4, q2200 6958*c0909341SAndroid Build Coastguard Worker pshufd m4, m4, q3311 6959*c0909341SAndroid Build Coastguard Worker palignr m3, m0, 12 ; 4 6 6 8 6960*c0909341SAndroid Build Coastguard Worker palignr m4, m2, 12 ; 5 7 7 9 6961*c0909341SAndroid Build Coastguard Worker mova m0, m3 6962*c0909341SAndroid Build Coastguard Worker mova m2, m4 6963*c0909341SAndroid Build Coastguard Worker punpcklwd m1, m3, m4 6964*c0909341SAndroid Build Coastguard Worker punpckhwd m3, m4 6965*c0909341SAndroid Build Coastguard Worker pmaddwd m6, m1, m9 6966*c0909341SAndroid Build Coastguard Worker pmaddwd m4, m3, m10 6967*c0909341SAndroid Build Coastguard Worker paddd m5, vrnd_mem 6968*c0909341SAndroid Build Coastguard Worker paddd m6, m4 6969*c0909341SAndroid Build Coastguard Worker paddd m5, m6 6970*c0909341SAndroid Build Coastguard Worker pshufd m4, m12, q1032 6971*c0909341SAndroid Build Coastguard Worker pxor m6, m6 6972*c0909341SAndroid Build Coastguard Worker psrad m5, m4 6973*c0909341SAndroid Build Coastguard Worker packssdw m5, m5 6974*c0909341SAndroid Build Coastguard Worker pmaxsw m5, m6 6975*c0909341SAndroid Build Coastguard Worker pminsw m5, pxmaxm 6976*c0909341SAndroid Build Coastguard Worker movd [dstq+dsq*0], m5 6977*c0909341SAndroid Build Coastguard Worker pshuflw m5, m5, q1032 6978*c0909341SAndroid Build Coastguard Worker movd [dstq+dsq*1], m5 6979*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+dsq*2] 6980*c0909341SAndroid Build Coastguard Worker sub hd, 2 6981*c0909341SAndroid Build Coastguard Worker jg .dy2_w2_loop 6982*c0909341SAndroid Build Coastguard Worker RET 6983*c0909341SAndroid Build Coastguard Worker%endif 6984*c0909341SAndroid Build Coastguard WorkerINIT_XMM ssse3 6985*c0909341SAndroid Build Coastguard Worker.dy2_w4: 6986*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 6987*c0909341SAndroid Build Coastguard Worker mov myd, mym 6988*c0909341SAndroid Build Coastguard Worker mova [rsp+0x10], m11 6989*c0909341SAndroid Build Coastguard Worker mova [rsp+0x20], m12 6990*c0909341SAndroid Build Coastguard Worker %if isput 6991*c0909341SAndroid Build Coastguard Worker mova [rsp+0x30], m13 6992*c0909341SAndroid Build Coastguard Worker %define vrnd_mem [rsp+0x30] 6993*c0909341SAndroid Build Coastguard Worker %define stk rsp+0x40 6994*c0909341SAndroid Build Coastguard Worker %else 6995*c0909341SAndroid Build Coastguard Worker %define vrnd_mem [base+pd_m524256] 6996*c0909341SAndroid Build Coastguard Worker %define stk rsp+0x30 6997*c0909341SAndroid Build Coastguard Worker %endif 6998*c0909341SAndroid Build Coastguard Worker movzx t0d, t0b 6999*c0909341SAndroid Build Coastguard Worker sub srcq, 2 7000*c0909341SAndroid Build Coastguard Worker movd m15, t0d 7001*c0909341SAndroid Build Coastguard Worker%else 7002*c0909341SAndroid Build Coastguard Worker %define m10 [base+pd_0x3ff] 7003*c0909341SAndroid Build Coastguard Worker %define m9 [base+pd_0x4000] 7004*c0909341SAndroid Build Coastguard Worker %define m8 m0 7005*c0909341SAndroid Build Coastguard Worker %xdefine m14 m4 7006*c0909341SAndroid Build Coastguard Worker %define m15 m3 7007*c0909341SAndroid Build Coastguard Worker %if isprep 7008*c0909341SAndroid Build Coastguard Worker %define ssq r3 7009*c0909341SAndroid Build Coastguard Worker %endif 7010*c0909341SAndroid Build Coastguard Worker movzx r5, byte [esp+0x1f0] 7011*c0909341SAndroid Build Coastguard Worker sub srcq, 2 7012*c0909341SAndroid Build Coastguard Worker movd m15, r5 7013*c0909341SAndroid Build Coastguard Worker%endif 7014*c0909341SAndroid Build Coastguard Worker pmaddwd m8, [base+rescale_mul] 7015*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 7016*c0909341SAndroid Build Coastguard Worker mova m9, [base+pd_0x4000] 7017*c0909341SAndroid Build Coastguard Worker%endif 7018*c0909341SAndroid Build Coastguard Worker pshufd m15, m15, q0000 7019*c0909341SAndroid Build Coastguard Worker paddd m14, m8 ; mx+dx*[0-3] 7020*c0909341SAndroid Build Coastguard Worker pand m0, m14, m10 7021*c0909341SAndroid Build Coastguard Worker psrld m0, 6 7022*c0909341SAndroid Build Coastguard Worker paddd m15, m0 7023*c0909341SAndroid Build Coastguard Worker pshufd m7, m15, q1032 7024*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 7025*c0909341SAndroid Build Coastguard Worker movd r4d, m15 7026*c0909341SAndroid Build Coastguard Worker movd r11d, m7 7027*c0909341SAndroid Build Coastguard Worker pshufd m15, m15, q0321 7028*c0909341SAndroid Build Coastguard Worker pshufd m7, m7, q0321 7029*c0909341SAndroid Build Coastguard Worker movd r6d, m15 7030*c0909341SAndroid Build Coastguard Worker movd r13d, m7 7031*c0909341SAndroid Build Coastguard Worker mova m10, [base+bdct_lb_q+ 0] 7032*c0909341SAndroid Build Coastguard Worker mova m11, [base+bdct_lb_q+16] 7033*c0909341SAndroid Build Coastguard Worker movd m13, [base+subpel_filters+ r4*8+2] 7034*c0909341SAndroid Build Coastguard Worker movd m2, [base+subpel_filters+ r6*8+2] 7035*c0909341SAndroid Build Coastguard Worker movd m15, [base+subpel_filters+r11*8+2] 7036*c0909341SAndroid Build Coastguard Worker movd m4, [base+subpel_filters+r13*8+2] 7037*c0909341SAndroid Build Coastguard Worker%else 7038*c0909341SAndroid Build Coastguard Worker movd r1, m15 7039*c0909341SAndroid Build Coastguard Worker movd r4, m7 7040*c0909341SAndroid Build Coastguard Worker pshufd m15, m15, q0321 7041*c0909341SAndroid Build Coastguard Worker pshufd m7, m7, q0321 7042*c0909341SAndroid Build Coastguard Worker movd r3, m15 7043*c0909341SAndroid Build Coastguard Worker movd r5, m7 7044*c0909341SAndroid Build Coastguard Worker mova m5, [base+bdct_lb_q+ 0] 7045*c0909341SAndroid Build Coastguard Worker mova m6, [base+bdct_lb_q+16] 7046*c0909341SAndroid Build Coastguard Worker movd m1, [base+subpel_filters+r1*8+2] 7047*c0909341SAndroid Build Coastguard Worker movd m2, [base+subpel_filters+r3*8+2] 7048*c0909341SAndroid Build Coastguard Worker movd m3, [base+subpel_filters+r4*8+2] 7049*c0909341SAndroid Build Coastguard Worker movd m7, [base+subpel_filters+r5*8+2] 7050*c0909341SAndroid Build Coastguard Worker SWAP m4, m7 7051*c0909341SAndroid Build Coastguard Worker mov r3, r3m 7052*c0909341SAndroid Build Coastguard Worker %if isprep 7053*c0909341SAndroid Build Coastguard Worker lea ss3q, [ssq*3] 7054*c0909341SAndroid Build Coastguard Worker %endif 7055*c0909341SAndroid Build Coastguard Worker %define m10 m5 7056*c0909341SAndroid Build Coastguard Worker %define m11 m6 7057*c0909341SAndroid Build Coastguard Worker %define m12 m1 7058*c0909341SAndroid Build Coastguard Worker %define m13 m1 7059*c0909341SAndroid Build Coastguard Worker%endif 7060*c0909341SAndroid Build Coastguard Worker psrld m14, 10 7061*c0909341SAndroid Build Coastguard Worker paddd m14, m14 7062*c0909341SAndroid Build Coastguard Worker punpckldq m13, m2 7063*c0909341SAndroid Build Coastguard Worker punpckldq m15, m4 7064*c0909341SAndroid Build Coastguard Worker punpcklqdq m13, m15 7065*c0909341SAndroid Build Coastguard Worker pxor m2, m2 7066*c0909341SAndroid Build Coastguard Worker pcmpeqd m0, m2 7067*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 7068*c0909341SAndroid Build Coastguard Worker pand m9, m0 7069*c0909341SAndroid Build Coastguard Worker%else 7070*c0909341SAndroid Build Coastguard Worker pand m2, m9, m0 7071*c0909341SAndroid Build Coastguard Worker %define m9 m2 7072*c0909341SAndroid Build Coastguard Worker SWAP m7, m4 7073*c0909341SAndroid Build Coastguard Worker%endif 7074*c0909341SAndroid Build Coastguard Worker pandn m0, m13 7075*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 7076*c0909341SAndroid Build Coastguard Worker SWAP m13, m0 7077*c0909341SAndroid Build Coastguard Worker%else 7078*c0909341SAndroid Build Coastguard Worker %define m13 m0 7079*c0909341SAndroid Build Coastguard Worker%endif 7080*c0909341SAndroid Build Coastguard Worker por m13, m9 7081*c0909341SAndroid Build Coastguard Worker punpckhbw m15, m13, m13 7082*c0909341SAndroid Build Coastguard Worker punpcklbw m13, m13 7083*c0909341SAndroid Build Coastguard Worker psraw m15, 8 7084*c0909341SAndroid Build Coastguard Worker psraw m13, 8 7085*c0909341SAndroid Build Coastguard Worker pshufb m12, m14, m10 7086*c0909341SAndroid Build Coastguard Worker pshufb m14, m11 7087*c0909341SAndroid Build Coastguard Worker mova m10, [base+spel_s_shuf2] 7088*c0909341SAndroid Build Coastguard Worker movd r4d, m14 7089*c0909341SAndroid Build Coastguard Worker shr r4d, 24 7090*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_32 7091*c0909341SAndroid Build Coastguard Worker mova [stk+0x40], m13 7092*c0909341SAndroid Build Coastguard Worker mova [stk+0x50], m15 7093*c0909341SAndroid Build Coastguard Worker pxor m2, m2 7094*c0909341SAndroid Build Coastguard Worker%endif 7095*c0909341SAndroid Build Coastguard Worker pshufb m7, m14, m2 7096*c0909341SAndroid Build Coastguard Worker psubb m14, m7 7097*c0909341SAndroid Build Coastguard Worker paddb m12, m10 7098*c0909341SAndroid Build Coastguard Worker paddb m14, m10 7099*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 7100*c0909341SAndroid Build Coastguard Worker lea r6, [r4+ssq*1] 7101*c0909341SAndroid Build Coastguard Worker lea r11, [r4+ssq*2] 7102*c0909341SAndroid Build Coastguard Worker lea r13, [r4+ss3q ] 7103*c0909341SAndroid Build Coastguard Worker movu m1, [srcq+ssq*0] 7104*c0909341SAndroid Build Coastguard Worker movu m8, [srcq+ssq*2] 7105*c0909341SAndroid Build Coastguard Worker movu m9, [srcq+ssq*1] 7106*c0909341SAndroid Build Coastguard Worker movu m10, [srcq+ss3q ] 7107*c0909341SAndroid Build Coastguard Worker movu m7, [srcq+r4 ] 7108*c0909341SAndroid Build Coastguard Worker movu m2, [srcq+r11 ] 7109*c0909341SAndroid Build Coastguard Worker movu m3, [srcq+r6 ] 7110*c0909341SAndroid Build Coastguard Worker movu m4, [srcq+r13 ] 7111*c0909341SAndroid Build Coastguard Worker lea srcq, [srcq+ssq*4] 7112*c0909341SAndroid Build Coastguard Worker REPX {pshufb x, m12}, m1, m9, m8, m10 7113*c0909341SAndroid Build Coastguard Worker REPX {pmaddwd x, m13}, m1, m9, m8, m10 7114*c0909341SAndroid Build Coastguard Worker REPX {pshufb x, m14}, m7, m3, m2, m4 7115*c0909341SAndroid Build Coastguard Worker REPX {pmaddwd x, m15}, m7, m3, m2, m4 7116*c0909341SAndroid Build Coastguard Worker mova m5, [rsp+0x10] 7117*c0909341SAndroid Build Coastguard Worker movd xm6, [rsp+0x20] 7118*c0909341SAndroid Build Coastguard Worker phaddd m1, m7 7119*c0909341SAndroid Build Coastguard Worker phaddd m8, m2 7120*c0909341SAndroid Build Coastguard Worker phaddd m9, m3 7121*c0909341SAndroid Build Coastguard Worker phaddd m10, m4 7122*c0909341SAndroid Build Coastguard Worker movu m2, [srcq+ssq*0] 7123*c0909341SAndroid Build Coastguard Worker movu m3, [srcq+ssq*1] 7124*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m5}, m1, m9, m8, m10 7125*c0909341SAndroid Build Coastguard Worker REPX {psrad x, xm6}, m1, m9, m8, m10 7126*c0909341SAndroid Build Coastguard Worker packssdw m1, m8 ; 0 2 7127*c0909341SAndroid Build Coastguard Worker packssdw m9, m10 ; 1 3 7128*c0909341SAndroid Build Coastguard Worker movu m0, [srcq+r4 ] 7129*c0909341SAndroid Build Coastguard Worker movu m8, [srcq+r6 ] 7130*c0909341SAndroid Build Coastguard Worker lea srcq, [srcq+ssq*2] 7131*c0909341SAndroid Build Coastguard Worker REPX {pshufb x, m12}, m2, m3 7132*c0909341SAndroid Build Coastguard Worker REPX {pmaddwd x, m13}, m2, m3 7133*c0909341SAndroid Build Coastguard Worker REPX {pshufb x, m14}, m0, m8 7134*c0909341SAndroid Build Coastguard Worker REPX {pmaddwd x, m15}, m0, m8 7135*c0909341SAndroid Build Coastguard Worker phaddd m2, m0 7136*c0909341SAndroid Build Coastguard Worker phaddd m3, m8 7137*c0909341SAndroid Build Coastguard Worker shr myd, 6 7138*c0909341SAndroid Build Coastguard Worker mov r9d, 64 << 24 7139*c0909341SAndroid Build Coastguard Worker lea myd, [t1+myq] 7140*c0909341SAndroid Build Coastguard Worker cmovnz r9q, [base+subpel_filters+myq*8] 7141*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m5}, m2, m3 7142*c0909341SAndroid Build Coastguard Worker REPX {psrad x, xm6}, m2, m3 7143*c0909341SAndroid Build Coastguard Worker packssdw m2, m3 ; 4 5 7144*c0909341SAndroid Build Coastguard Worker pshufd m3, m2, q1032 ; 5 _ 7145*c0909341SAndroid Build Coastguard Worker punpcklwd m0, m1, m9 ; 01 7146*c0909341SAndroid Build Coastguard Worker punpckhwd m1, m9 ; 23 7147*c0909341SAndroid Build Coastguard Worker punpcklwd m2, m3 ; 45 7148*c0909341SAndroid Build Coastguard Worker movq m10, r9 7149*c0909341SAndroid Build Coastguard Worker %define hrnd_mem [rsp+0x10] 7150*c0909341SAndroid Build Coastguard Worker %define hsh_mem [rsp+0x20] 7151*c0909341SAndroid Build Coastguard Worker %define vsh_mem [rsp+0x28] 7152*c0909341SAndroid Build Coastguard Worker %if isput 7153*c0909341SAndroid Build Coastguard Worker %define vrnd_mem [rsp+0x30] 7154*c0909341SAndroid Build Coastguard Worker %else 7155*c0909341SAndroid Build Coastguard Worker %define vrnd_mem [base+pd_m524256] 7156*c0909341SAndroid Build Coastguard Worker %endif 7157*c0909341SAndroid Build Coastguard Worker%else 7158*c0909341SAndroid Build Coastguard Worker mova [stk+0x20], m12 7159*c0909341SAndroid Build Coastguard Worker mova [stk+0x30], m14 7160*c0909341SAndroid Build Coastguard Worker add r4, srcq 7161*c0909341SAndroid Build Coastguard Worker MC_4TAP_SCALED_H 0x60 ; 0 1 7162*c0909341SAndroid Build Coastguard Worker MC_4TAP_SCALED_H 0x70 ; 2 3 7163*c0909341SAndroid Build Coastguard Worker MC_4TAP_SCALED_H 0x80 ; 4 5 7164*c0909341SAndroid Build Coastguard Worker mov [stk+0xe0], r4 7165*c0909341SAndroid Build Coastguard Worker mova m3, [base+spel_s_shuf8] 7166*c0909341SAndroid Build Coastguard Worker mova m0, [stk+0x60] 7167*c0909341SAndroid Build Coastguard Worker mova m1, [stk+0x70] 7168*c0909341SAndroid Build Coastguard Worker mova m2, [stk+0x80] 7169*c0909341SAndroid Build Coastguard Worker mov myd, mym 7170*c0909341SAndroid Build Coastguard Worker mov rX, [esp+0x1f4] 7171*c0909341SAndroid Build Coastguard Worker xor r5, r5 7172*c0909341SAndroid Build Coastguard Worker shr myd, 6 7173*c0909341SAndroid Build Coastguard Worker lea rX, [rX+myd] 7174*c0909341SAndroid Build Coastguard Worker mov r4, 64 << 24 7175*c0909341SAndroid Build Coastguard Worker cmovnz r4, [base+subpel_filters+rX*8+0] 7176*c0909341SAndroid Build Coastguard Worker cmovnz r5, [base+subpel_filters+rX*8+4] 7177*c0909341SAndroid Build Coastguard Worker mov r3, r3m 7178*c0909341SAndroid Build Coastguard Worker pshufb m0, m3 ; 01 7179*c0909341SAndroid Build Coastguard Worker pshufb m1, m3 ; 23 7180*c0909341SAndroid Build Coastguard Worker pshufb m2, m3 ; 45 7181*c0909341SAndroid Build Coastguard Worker movd m7, r4 7182*c0909341SAndroid Build Coastguard Worker movd m4, r5 7183*c0909341SAndroid Build Coastguard Worker mov r5, r0m 7184*c0909341SAndroid Build Coastguard Worker %if isput 7185*c0909341SAndroid Build Coastguard Worker mov r1, r1m 7186*c0909341SAndroid Build Coastguard Worker %endif 7187*c0909341SAndroid Build Coastguard Worker mov r4, [stk+0xe0] 7188*c0909341SAndroid Build Coastguard Worker %define dstq r5 7189*c0909341SAndroid Build Coastguard Worker %define tmpq r5 7190*c0909341SAndroid Build Coastguard Worker %define m12 [stk+0x20] 7191*c0909341SAndroid Build Coastguard Worker %define m14 [stk+0x30] 7192*c0909341SAndroid Build Coastguard Worker %define m13 [stk+0x40] 7193*c0909341SAndroid Build Coastguard Worker %define m15 [stk+0x50] 7194*c0909341SAndroid Build Coastguard Worker %define hrnd_mem [esp+0x00] 7195*c0909341SAndroid Build Coastguard Worker %define hsh_mem [esp+0x10] 7196*c0909341SAndroid Build Coastguard Worker %define vsh_mem [esp+0x18] 7197*c0909341SAndroid Build Coastguard Worker %if isput 7198*c0909341SAndroid Build Coastguard Worker %define vrnd_mem [esp+0x20] 7199*c0909341SAndroid Build Coastguard Worker %else 7200*c0909341SAndroid Build Coastguard Worker %define vrnd_mem [base+pd_m524256] 7201*c0909341SAndroid Build Coastguard Worker %endif 7202*c0909341SAndroid Build Coastguard Worker %define m10 m7 7203*c0909341SAndroid Build Coastguard Worker punpckldq m10, m4 7204*c0909341SAndroid Build Coastguard Worker%endif 7205*c0909341SAndroid Build Coastguard Worker punpcklbw m10, m10 7206*c0909341SAndroid Build Coastguard Worker psraw m10, 8 7207*c0909341SAndroid Build Coastguard Worker pshufd m3, m10, q0000 7208*c0909341SAndroid Build Coastguard Worker pshufd m4, m10, q1111 7209*c0909341SAndroid Build Coastguard Worker pshufd m5, m10, q2222 7210*c0909341SAndroid Build Coastguard Worker pshufd m10, m10, q3333 7211*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_32 7212*c0909341SAndroid Build Coastguard Worker %xdefine m8 m3 7213*c0909341SAndroid Build Coastguard Worker %xdefine m9 m6 7214*c0909341SAndroid Build Coastguard Worker %xdefine m11 m5 7215*c0909341SAndroid Build Coastguard Worker %xdefine m6 m4 7216*c0909341SAndroid Build Coastguard Worker mova [stk+0x100], m3 7217*c0909341SAndroid Build Coastguard Worker mova [stk+0x110], m4 7218*c0909341SAndroid Build Coastguard Worker mova [stk+0x120], m5 7219*c0909341SAndroid Build Coastguard Worker mova [stk+0x130], m10 7220*c0909341SAndroid Build Coastguard Worker %define m3 [stk+0x100] 7221*c0909341SAndroid Build Coastguard Worker %define m4 [stk+0x110] 7222*c0909341SAndroid Build Coastguard Worker %define m5 [stk+0x120] 7223*c0909341SAndroid Build Coastguard Worker %define m10 [stk+0x130] 7224*c0909341SAndroid Build Coastguard Worker%endif 7225*c0909341SAndroid Build Coastguard Worker.dy2_w4_loop: 7226*c0909341SAndroid Build Coastguard Worker pmaddwd m8, m0, m3 7227*c0909341SAndroid Build Coastguard Worker pmaddwd m9, m1, m3 7228*c0909341SAndroid Build Coastguard Worker mova m0, m2 7229*c0909341SAndroid Build Coastguard Worker pmaddwd m1, m4 7230*c0909341SAndroid Build Coastguard Worker pmaddwd m11, m2, m4 7231*c0909341SAndroid Build Coastguard Worker paddd m8, vrnd_mem 7232*c0909341SAndroid Build Coastguard Worker paddd m9, vrnd_mem 7233*c0909341SAndroid Build Coastguard Worker pmaddwd m2, m5 7234*c0909341SAndroid Build Coastguard Worker paddd m8, m1 7235*c0909341SAndroid Build Coastguard Worker paddd m9, m11 7236*c0909341SAndroid Build Coastguard Worker paddd m8, m2 7237*c0909341SAndroid Build Coastguard Worker movu m6, [srcq+ssq*0] 7238*c0909341SAndroid Build Coastguard Worker movu m1, [srcq+ssq*2] 7239*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 7240*c0909341SAndroid Build Coastguard Worker movu m11, [srcq+r4 ] 7241*c0909341SAndroid Build Coastguard Worker movu m2, [srcq+r11] 7242*c0909341SAndroid Build Coastguard Worker%else 7243*c0909341SAndroid Build Coastguard Worker movu m11, [r4+ssq*0] 7244*c0909341SAndroid Build Coastguard Worker movu m2, [r4+ssq*2] 7245*c0909341SAndroid Build Coastguard Worker%endif 7246*c0909341SAndroid Build Coastguard Worker pshufb m6, m12 7247*c0909341SAndroid Build Coastguard Worker pshufb m1, m12 7248*c0909341SAndroid Build Coastguard Worker pmaddwd m6, m13 7249*c0909341SAndroid Build Coastguard Worker pmaddwd m1, m13 7250*c0909341SAndroid Build Coastguard Worker pshufb m11, m14 7251*c0909341SAndroid Build Coastguard Worker pshufb m2, m14 7252*c0909341SAndroid Build Coastguard Worker pmaddwd m11, m15 7253*c0909341SAndroid Build Coastguard Worker pmaddwd m2, m15 7254*c0909341SAndroid Build Coastguard Worker phaddd m6, m11 7255*c0909341SAndroid Build Coastguard Worker phaddd m1, m2 7256*c0909341SAndroid Build Coastguard Worker paddd m6, hrnd_mem 7257*c0909341SAndroid Build Coastguard Worker paddd m1, hrnd_mem 7258*c0909341SAndroid Build Coastguard Worker psrad m6, hsh_mem 7259*c0909341SAndroid Build Coastguard Worker psrad m1, hsh_mem 7260*c0909341SAndroid Build Coastguard Worker movu m7, [srcq+ssq*1] 7261*c0909341SAndroid Build Coastguard Worker movu m11, [srcq+ss3q ] 7262*c0909341SAndroid Build Coastguard Worker packssdw m6, m1 ; 6 8 7263*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 7264*c0909341SAndroid Build Coastguard Worker movu m2, [srcq+r6 ] 7265*c0909341SAndroid Build Coastguard Worker movu m1, [srcq+r13] 7266*c0909341SAndroid Build Coastguard Worker%else 7267*c0909341SAndroid Build Coastguard Worker movu m2, [r4+ssq*1] 7268*c0909341SAndroid Build Coastguard Worker movu m1, [r4+ss3q ] 7269*c0909341SAndroid Build Coastguard Worker%endif 7270*c0909341SAndroid Build Coastguard Worker pshufb m7, m12 7271*c0909341SAndroid Build Coastguard Worker pshufb m11, m12 7272*c0909341SAndroid Build Coastguard Worker pmaddwd m7, m13 7273*c0909341SAndroid Build Coastguard Worker pmaddwd m11, m13 7274*c0909341SAndroid Build Coastguard Worker pshufb m2, m14 7275*c0909341SAndroid Build Coastguard Worker pshufb m1, m14 7276*c0909341SAndroid Build Coastguard Worker pmaddwd m2, m15 7277*c0909341SAndroid Build Coastguard Worker pmaddwd m1, m15 7278*c0909341SAndroid Build Coastguard Worker phaddd m7, m2 7279*c0909341SAndroid Build Coastguard Worker phaddd m11, m1 7280*c0909341SAndroid Build Coastguard Worker paddd m7, hrnd_mem 7281*c0909341SAndroid Build Coastguard Worker paddd m11, hrnd_mem 7282*c0909341SAndroid Build Coastguard Worker psrad m7, hsh_mem 7283*c0909341SAndroid Build Coastguard Worker psrad m11, hsh_mem 7284*c0909341SAndroid Build Coastguard Worker packssdw m7, m11 ; 7 9 7285*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_32 7286*c0909341SAndroid Build Coastguard Worker lea r4, [r4+ssq*4] 7287*c0909341SAndroid Build Coastguard Worker%endif 7288*c0909341SAndroid Build Coastguard Worker lea srcq, [srcq+ssq*4] 7289*c0909341SAndroid Build Coastguard Worker punpcklwd m1, m6, m7 ; 67 7290*c0909341SAndroid Build Coastguard Worker punpckhwd m6, m7 ; 89 7291*c0909341SAndroid Build Coastguard Worker mova m2, m6 7292*c0909341SAndroid Build Coastguard Worker pmaddwd m11, m1, m5 7293*c0909341SAndroid Build Coastguard Worker pmaddwd m7, m1, m10 7294*c0909341SAndroid Build Coastguard Worker pmaddwd m6, m10 7295*c0909341SAndroid Build Coastguard Worker paddd m9, m11 7296*c0909341SAndroid Build Coastguard Worker%if isput 7297*c0909341SAndroid Build Coastguard Worker movd m11, vsh_mem 7298*c0909341SAndroid Build Coastguard Worker%endif 7299*c0909341SAndroid Build Coastguard Worker paddd m8, m7 7300*c0909341SAndroid Build Coastguard Worker paddd m9, m6 7301*c0909341SAndroid Build Coastguard Worker%if isput 7302*c0909341SAndroid Build Coastguard Worker psrad m8, m11 7303*c0909341SAndroid Build Coastguard Worker psrad m9, m11 7304*c0909341SAndroid Build Coastguard Worker packssdw m8, m9 7305*c0909341SAndroid Build Coastguard Worker pxor m7, m7 7306*c0909341SAndroid Build Coastguard Worker pmaxsw m8, m7 7307*c0909341SAndroid Build Coastguard Worker pminsw m8, pxmaxm 7308*c0909341SAndroid Build Coastguard Worker movq [dstq+dsq*0], m8 7309*c0909341SAndroid Build Coastguard Worker movhps [dstq+dsq*1], m8 7310*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+dsq*2] 7311*c0909341SAndroid Build Coastguard Worker%else 7312*c0909341SAndroid Build Coastguard Worker psrad m8, 6 7313*c0909341SAndroid Build Coastguard Worker psrad m9, 6 7314*c0909341SAndroid Build Coastguard Worker packssdw m8, m9 7315*c0909341SAndroid Build Coastguard Worker mova [tmpq], m8 7316*c0909341SAndroid Build Coastguard Worker add tmpq, 16 7317*c0909341SAndroid Build Coastguard Worker%endif 7318*c0909341SAndroid Build Coastguard Worker sub hd, 2 7319*c0909341SAndroid Build Coastguard Worker jg .dy2_w4_loop 7320*c0909341SAndroid Build Coastguard Worker MC_8TAP_SCALED_RET ; why not jz .ret? 7321*c0909341SAndroid Build Coastguard WorkerINIT_XMM ssse3 7322*c0909341SAndroid Build Coastguard Worker.dy2_w8: 7323*c0909341SAndroid Build Coastguard Worker mov dword [stk+0xf0], 1 7324*c0909341SAndroid Build Coastguard Worker movifprep tmp_stridem, 16 7325*c0909341SAndroid Build Coastguard Worker jmp .dy2_w_start 7326*c0909341SAndroid Build Coastguard Worker.dy2_w16: 7327*c0909341SAndroid Build Coastguard Worker mov dword [stk+0xf0], 2 7328*c0909341SAndroid Build Coastguard Worker movifprep tmp_stridem, 32 7329*c0909341SAndroid Build Coastguard Worker jmp .dy2_w_start 7330*c0909341SAndroid Build Coastguard Worker.dy2_w32: 7331*c0909341SAndroid Build Coastguard Worker mov dword [stk+0xf0], 4 7332*c0909341SAndroid Build Coastguard Worker movifprep tmp_stridem, 64 7333*c0909341SAndroid Build Coastguard Worker jmp .dy2_w_start 7334*c0909341SAndroid Build Coastguard Worker.dy2_w64: 7335*c0909341SAndroid Build Coastguard Worker mov dword [stk+0xf0], 8 7336*c0909341SAndroid Build Coastguard Worker movifprep tmp_stridem, 128 7337*c0909341SAndroid Build Coastguard Worker jmp .dy2_w_start 7338*c0909341SAndroid Build Coastguard Worker.dy2_w128: 7339*c0909341SAndroid Build Coastguard Worker mov dword [stk+0xf0], 16 7340*c0909341SAndroid Build Coastguard Worker movifprep tmp_stridem, 256 7341*c0909341SAndroid Build Coastguard Worker.dy2_w_start: 7342*c0909341SAndroid Build Coastguard Worker mov myd, mym 7343*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 7344*c0909341SAndroid Build Coastguard Worker %ifidn %1, put 7345*c0909341SAndroid Build Coastguard Worker movifnidn dsm, dsq 7346*c0909341SAndroid Build Coastguard Worker %endif 7347*c0909341SAndroid Build Coastguard Worker mova [rsp+0x10], m11 7348*c0909341SAndroid Build Coastguard Worker mova [rsp+0x20], m12 7349*c0909341SAndroid Build Coastguard Worker %define hround m11 7350*c0909341SAndroid Build Coastguard Worker %if isput 7351*c0909341SAndroid Build Coastguard Worker mova [rsp+0x30], m13 7352*c0909341SAndroid Build Coastguard Worker %else 7353*c0909341SAndroid Build Coastguard Worker mova m13, [base+pd_m524256] 7354*c0909341SAndroid Build Coastguard Worker %endif 7355*c0909341SAndroid Build Coastguard Worker shr t0d, 16 7356*c0909341SAndroid Build Coastguard Worker shr myd, 6 7357*c0909341SAndroid Build Coastguard Worker mov r4d, 64 << 24 7358*c0909341SAndroid Build Coastguard Worker lea myd, [t1+myq] 7359*c0909341SAndroid Build Coastguard Worker cmovnz r4q, [base+subpel_filters+myq*8] 7360*c0909341SAndroid Build Coastguard Worker movd m15, t0d 7361*c0909341SAndroid Build Coastguard Worker%else 7362*c0909341SAndroid Build Coastguard Worker %define hround [esp+0x00] 7363*c0909341SAndroid Build Coastguard Worker %define m12 [esp+0x10] 7364*c0909341SAndroid Build Coastguard Worker %define m10 [base+pd_0x3ff] 7365*c0909341SAndroid Build Coastguard Worker %define m8 m0 7366*c0909341SAndroid Build Coastguard Worker %xdefine m14 m4 7367*c0909341SAndroid Build Coastguard Worker %xdefine m15 m3 7368*c0909341SAndroid Build Coastguard Worker %if isput 7369*c0909341SAndroid Build Coastguard Worker %define dstq r0 7370*c0909341SAndroid Build Coastguard Worker %else 7371*c0909341SAndroid Build Coastguard Worker %define tmpq r0 7372*c0909341SAndroid Build Coastguard Worker %define ssq ssm 7373*c0909341SAndroid Build Coastguard Worker %endif 7374*c0909341SAndroid Build Coastguard Worker mov r5, [esp+0x1f0] 7375*c0909341SAndroid Build Coastguard Worker mov r3, [esp+0x1f4] 7376*c0909341SAndroid Build Coastguard Worker shr r5, 16 7377*c0909341SAndroid Build Coastguard Worker movd m15, r5 7378*c0909341SAndroid Build Coastguard Worker xor r5, r5 7379*c0909341SAndroid Build Coastguard Worker shr myd, 6 7380*c0909341SAndroid Build Coastguard Worker lea r3, [r3+myd] 7381*c0909341SAndroid Build Coastguard Worker mov r4, 64 << 24 7382*c0909341SAndroid Build Coastguard Worker cmovnz r4, [base+subpel_filters+r3*8+0] 7383*c0909341SAndroid Build Coastguard Worker cmovnz r5, [base+subpel_filters+r3*8+4] 7384*c0909341SAndroid Build Coastguard Worker mov r0, r0m 7385*c0909341SAndroid Build Coastguard Worker mov r3, r3m 7386*c0909341SAndroid Build Coastguard Worker%endif 7387*c0909341SAndroid Build Coastguard Worker sub srcq, 6 7388*c0909341SAndroid Build Coastguard Worker pslld m7, m8, 2 ; dx*4 7389*c0909341SAndroid Build Coastguard Worker pmaddwd m8, [base+rescale_mul] ; dx*[0-3] 7390*c0909341SAndroid Build Coastguard Worker pshufd m15, m15, q0000 7391*c0909341SAndroid Build Coastguard Worker paddd m14, m8 ; mx+dx*[0-3] 7392*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 7393*c0909341SAndroid Build Coastguard Worker movq m3, r4q 7394*c0909341SAndroid Build Coastguard Worker%else 7395*c0909341SAndroid Build Coastguard Worker movd m5, r4 7396*c0909341SAndroid Build Coastguard Worker movd m6, r5 7397*c0909341SAndroid Build Coastguard Worker punpckldq m5, m6 7398*c0909341SAndroid Build Coastguard Worker SWAP m3, m5 7399*c0909341SAndroid Build Coastguard Worker%endif 7400*c0909341SAndroid Build Coastguard Worker punpcklbw m3, m3 7401*c0909341SAndroid Build Coastguard Worker psraw m3, 8 7402*c0909341SAndroid Build Coastguard Worker mova [stk+0x100], m7 7403*c0909341SAndroid Build Coastguard Worker mova [stk+0x120], m15 7404*c0909341SAndroid Build Coastguard Worker mov [stk+0x0f8], srcq 7405*c0909341SAndroid Build Coastguard Worker mov [stk+0x130], r0q ; dstq / tmpq 7406*c0909341SAndroid Build Coastguard Worker pshufd m0, m3, q0000 7407*c0909341SAndroid Build Coastguard Worker pshufd m1, m3, q1111 7408*c0909341SAndroid Build Coastguard Worker pshufd m2, m3, q2222 7409*c0909341SAndroid Build Coastguard Worker pshufd m3, m3, q3333 7410*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 7411*c0909341SAndroid Build Coastguard Worker mova [stk+0x140], m0 7412*c0909341SAndroid Build Coastguard Worker mova [stk+0x150], m1 7413*c0909341SAndroid Build Coastguard Worker mova [stk+0x160], m2 7414*c0909341SAndroid Build Coastguard Worker mova [stk+0x170], m3 7415*c0909341SAndroid Build Coastguard Worker %if UNIX64 7416*c0909341SAndroid Build Coastguard Worker mov hm, hd 7417*c0909341SAndroid Build Coastguard Worker %endif 7418*c0909341SAndroid Build Coastguard Worker%else 7419*c0909341SAndroid Build Coastguard Worker mova [stk+0x180], m0 7420*c0909341SAndroid Build Coastguard Worker mova [stk+0x190], m1 7421*c0909341SAndroid Build Coastguard Worker mova [stk+0x1a0], m2 7422*c0909341SAndroid Build Coastguard Worker mova [stk+0x1b0], m3 7423*c0909341SAndroid Build Coastguard Worker SWAP m5, m3 7424*c0909341SAndroid Build Coastguard Worker mov r5, hm 7425*c0909341SAndroid Build Coastguard Worker mov [stk+0x134], r5 7426*c0909341SAndroid Build Coastguard Worker%endif 7427*c0909341SAndroid Build Coastguard Worker jmp .dy2_hloop 7428*c0909341SAndroid Build Coastguard Worker.dy2_hloop_prep: 7429*c0909341SAndroid Build Coastguard Worker dec dword [stk+0x0f0] 7430*c0909341SAndroid Build Coastguard Worker jz .ret 7431*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 7432*c0909341SAndroid Build Coastguard Worker add qword [stk+0x130], 16 7433*c0909341SAndroid Build Coastguard Worker mov hd, hm 7434*c0909341SAndroid Build Coastguard Worker%else 7435*c0909341SAndroid Build Coastguard Worker add dword [stk+0x130], 16 7436*c0909341SAndroid Build Coastguard Worker mov r5, [stk+0x134] 7437*c0909341SAndroid Build Coastguard Worker mov r0, [stk+0x130] 7438*c0909341SAndroid Build Coastguard Worker%endif 7439*c0909341SAndroid Build Coastguard Worker mova m7, [stk+0x100] 7440*c0909341SAndroid Build Coastguard Worker mova m14, [stk+0x110] 7441*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 7442*c0909341SAndroid Build Coastguard Worker mova m10, [base+pd_0x3ff] 7443*c0909341SAndroid Build Coastguard Worker mova m11, [rsp+0x10] 7444*c0909341SAndroid Build Coastguard Worker%endif 7445*c0909341SAndroid Build Coastguard Worker mova m15, [stk+0x120] 7446*c0909341SAndroid Build Coastguard Worker mov srcq, [stk+0x0f8] 7447*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 7448*c0909341SAndroid Build Coastguard Worker mov r0q, [stk+0x130] ; dstq / tmpq 7449*c0909341SAndroid Build Coastguard Worker%else 7450*c0909341SAndroid Build Coastguard Worker mov hm, r5 7451*c0909341SAndroid Build Coastguard Worker mov r0m, r0 7452*c0909341SAndroid Build Coastguard Worker mov r3, r3m 7453*c0909341SAndroid Build Coastguard Worker%endif 7454*c0909341SAndroid Build Coastguard Worker paddd m14, m7 7455*c0909341SAndroid Build Coastguard Worker.dy2_hloop: 7456*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 7457*c0909341SAndroid Build Coastguard Worker mova m9, [base+pq_0x40000000] 7458*c0909341SAndroid Build Coastguard Worker%else 7459*c0909341SAndroid Build Coastguard Worker %define m9 [base+pq_0x40000000] 7460*c0909341SAndroid Build Coastguard Worker%endif 7461*c0909341SAndroid Build Coastguard Worker pxor m1, m1 7462*c0909341SAndroid Build Coastguard Worker psrld m2, m14, 10 7463*c0909341SAndroid Build Coastguard Worker mova [stk], m2 7464*c0909341SAndroid Build Coastguard Worker pand m6, m14, m10 7465*c0909341SAndroid Build Coastguard Worker psrld m6, 6 7466*c0909341SAndroid Build Coastguard Worker paddd m5, m15, m6 7467*c0909341SAndroid Build Coastguard Worker pcmpeqd m6, m1 7468*c0909341SAndroid Build Coastguard Worker pshufd m2, m5, q1032 7469*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 7470*c0909341SAndroid Build Coastguard Worker movd r4d, m5 7471*c0909341SAndroid Build Coastguard Worker movd r6d, m2 7472*c0909341SAndroid Build Coastguard Worker pshufd m5, m5, q0321 7473*c0909341SAndroid Build Coastguard Worker pshufd m2, m2, q0321 7474*c0909341SAndroid Build Coastguard Worker movd r7d, m5 7475*c0909341SAndroid Build Coastguard Worker movd r9d, m2 7476*c0909341SAndroid Build Coastguard Worker movq m0, [base+subpel_filters+r4*8] 7477*c0909341SAndroid Build Coastguard Worker movq m1, [base+subpel_filters+r6*8] 7478*c0909341SAndroid Build Coastguard Worker movhps m0, [base+subpel_filters+r7*8] 7479*c0909341SAndroid Build Coastguard Worker movhps m1, [base+subpel_filters+r9*8] 7480*c0909341SAndroid Build Coastguard Worker%else 7481*c0909341SAndroid Build Coastguard Worker movd r0, m5 7482*c0909341SAndroid Build Coastguard Worker movd rX, m2 7483*c0909341SAndroid Build Coastguard Worker pshufd m5, m5, q0321 7484*c0909341SAndroid Build Coastguard Worker pshufd m2, m2, q0321 7485*c0909341SAndroid Build Coastguard Worker movd r4, m5 7486*c0909341SAndroid Build Coastguard Worker movd r5, m2 7487*c0909341SAndroid Build Coastguard Worker movq m0, [base+subpel_filters+r0*8] 7488*c0909341SAndroid Build Coastguard Worker movq m1, [base+subpel_filters+rX*8] 7489*c0909341SAndroid Build Coastguard Worker movhps m0, [base+subpel_filters+r4*8] 7490*c0909341SAndroid Build Coastguard Worker movhps m1, [base+subpel_filters+r5*8] 7491*c0909341SAndroid Build Coastguard Worker%endif 7492*c0909341SAndroid Build Coastguard Worker paddd m14, m7 ; mx+dx*[4-7] 7493*c0909341SAndroid Build Coastguard Worker pand m5, m14, m10 7494*c0909341SAndroid Build Coastguard Worker psrld m5, 6 7495*c0909341SAndroid Build Coastguard Worker paddd m15, m5 7496*c0909341SAndroid Build Coastguard Worker pxor m2, m2 7497*c0909341SAndroid Build Coastguard Worker pcmpeqd m5, m2 7498*c0909341SAndroid Build Coastguard Worker mova [stk+0x110], m14 7499*c0909341SAndroid Build Coastguard Worker pshufd m4, m15, q1032 7500*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 7501*c0909341SAndroid Build Coastguard Worker movd r10d, m15 7502*c0909341SAndroid Build Coastguard Worker movd r11d, m4 7503*c0909341SAndroid Build Coastguard Worker pshufd m15, m15, q0321 7504*c0909341SAndroid Build Coastguard Worker pshufd m4, m4, q0321 7505*c0909341SAndroid Build Coastguard Worker movd r13d, m15 7506*c0909341SAndroid Build Coastguard Worker movd rXd, m4 7507*c0909341SAndroid Build Coastguard Worker movq m2, [base+subpel_filters+r10*8] 7508*c0909341SAndroid Build Coastguard Worker movq m3, [base+subpel_filters+r11*8] 7509*c0909341SAndroid Build Coastguard Worker movhps m2, [base+subpel_filters+r13*8] 7510*c0909341SAndroid Build Coastguard Worker movhps m3, [base+subpel_filters+ rX*8] 7511*c0909341SAndroid Build Coastguard Worker psrld m14, 10 7512*c0909341SAndroid Build Coastguard Worker movq r11, m14 7513*c0909341SAndroid Build Coastguard Worker punpckhqdq m14, m14 7514*c0909341SAndroid Build Coastguard Worker movq rX, m14 7515*c0909341SAndroid Build Coastguard Worker mov r10d, r11d 7516*c0909341SAndroid Build Coastguard Worker shr r11, 32 7517*c0909341SAndroid Build Coastguard Worker mov r13d, rXd 7518*c0909341SAndroid Build Coastguard Worker shr rX, 32 7519*c0909341SAndroid Build Coastguard Worker mov r4d, [stk+ 0] 7520*c0909341SAndroid Build Coastguard Worker mov r6d, [stk+ 4] 7521*c0909341SAndroid Build Coastguard Worker mov r7d, [stk+ 8] 7522*c0909341SAndroid Build Coastguard Worker mov r9d, [stk+12] 7523*c0909341SAndroid Build Coastguard Worker pshufd m4, m6, q1100 7524*c0909341SAndroid Build Coastguard Worker pshufd m6, m6, q3322 7525*c0909341SAndroid Build Coastguard Worker pshufd m14, m5, q1100 7526*c0909341SAndroid Build Coastguard Worker pshufd m5, m5, q3322 7527*c0909341SAndroid Build Coastguard Worker pand m7, m9, m4 7528*c0909341SAndroid Build Coastguard Worker pand m8, m9, m6 7529*c0909341SAndroid Build Coastguard Worker pand m15, m9, m14 7530*c0909341SAndroid Build Coastguard Worker pand m9, m9, m5 7531*c0909341SAndroid Build Coastguard Worker pandn m4, m0 7532*c0909341SAndroid Build Coastguard Worker pandn m6, m1 7533*c0909341SAndroid Build Coastguard Worker pandn m14, m2 7534*c0909341SAndroid Build Coastguard Worker pandn m5, m3 7535*c0909341SAndroid Build Coastguard Worker por m7, m4 7536*c0909341SAndroid Build Coastguard Worker por m8, m6 7537*c0909341SAndroid Build Coastguard Worker por m15, m14 7538*c0909341SAndroid Build Coastguard Worker por m9, m5 7539*c0909341SAndroid Build Coastguard Worker punpcklbw m0, m7, m7 7540*c0909341SAndroid Build Coastguard Worker punpckhbw m7, m7 7541*c0909341SAndroid Build Coastguard Worker punpcklbw m1, m8, m8 7542*c0909341SAndroid Build Coastguard Worker punpckhbw m8, m8 7543*c0909341SAndroid Build Coastguard Worker psraw m0, 8 7544*c0909341SAndroid Build Coastguard Worker psraw m7, 8 7545*c0909341SAndroid Build Coastguard Worker psraw m1, 8 7546*c0909341SAndroid Build Coastguard Worker psraw m8, 8 7547*c0909341SAndroid Build Coastguard Worker punpcklbw m2, m15, m15 7548*c0909341SAndroid Build Coastguard Worker punpckhbw m15, m15 7549*c0909341SAndroid Build Coastguard Worker punpcklbw m3, m9, m9 7550*c0909341SAndroid Build Coastguard Worker punpckhbw m9, m9 7551*c0909341SAndroid Build Coastguard Worker psraw m2, 8 7552*c0909341SAndroid Build Coastguard Worker psraw m15, 8 7553*c0909341SAndroid Build Coastguard Worker psraw m3, 8 7554*c0909341SAndroid Build Coastguard Worker psraw m9, 8 7555*c0909341SAndroid Build Coastguard Worker mova [stk+0x10], m0 7556*c0909341SAndroid Build Coastguard Worker mova [stk+0x20], m7 7557*c0909341SAndroid Build Coastguard Worker mova [stk+0x30], m1 7558*c0909341SAndroid Build Coastguard Worker mova [stk+0x40], m8 7559*c0909341SAndroid Build Coastguard Worker mova [stk+0x50], m2 7560*c0909341SAndroid Build Coastguard Worker mova [stk+0x60], m15 7561*c0909341SAndroid Build Coastguard Worker mova [stk+0x70], m3 7562*c0909341SAndroid Build Coastguard Worker mova [stk+0x80], m9 7563*c0909341SAndroid Build Coastguard Worker MC_8TAP_SCALED_H 1, 2, 3, 4, 5, 6, 9, 10 ; 0 7564*c0909341SAndroid Build Coastguard Worker mova [stk+0x90], m1 7565*c0909341SAndroid Build Coastguard Worker MC_8TAP_SCALED_H 2, 3, 4, 5, 6, 1, 9, 10 ; 1 7566*c0909341SAndroid Build Coastguard Worker mova [stk+0xa0], m2 7567*c0909341SAndroid Build Coastguard Worker MC_8TAP_SCALED_H 3, 4, 5, 6, 1, 2, 9, 10 ; 2 7568*c0909341SAndroid Build Coastguard Worker mova [stk+0xb0], m3 7569*c0909341SAndroid Build Coastguard Worker MC_8TAP_SCALED_H 4, 5, 6, 1, 2, 3, 9, 10 ; 3 7570*c0909341SAndroid Build Coastguard Worker mova [stk+0xc0], m4 7571*c0909341SAndroid Build Coastguard Worker MC_8TAP_SCALED_H 5, 6, 1, 2, 3, 4, 9, 10 ; 4 7572*c0909341SAndroid Build Coastguard Worker mova [stk+0xd0], m5 7573*c0909341SAndroid Build Coastguard Worker MC_8TAP_SCALED_H 6, 1, 2, 3, 4, 5, 9, 10 ; 5 7574*c0909341SAndroid Build Coastguard Worker MC_8TAP_SCALED_H 7, 1, 2, 3, 4, 5, 9, 10 ; 6 7575*c0909341SAndroid Build Coastguard Worker MC_8TAP_SCALED_H 8, 1, 2, 3, 4, 5, 9, 10 ; 7 7576*c0909341SAndroid Build Coastguard Worker mova m5, [stk+0xd0] 7577*c0909341SAndroid Build Coastguard Worker mova m1, [stk+0x90] 7578*c0909341SAndroid Build Coastguard Worker mova m2, [stk+0xa0] 7579*c0909341SAndroid Build Coastguard Worker mova m3, [stk+0xb0] 7580*c0909341SAndroid Build Coastguard Worker mova m9, [stk+0xc0] 7581*c0909341SAndroid Build Coastguard Worker punpcklwd m4, m5, m6 ; 45a 7582*c0909341SAndroid Build Coastguard Worker punpckhwd m5, m6 ; 45b 7583*c0909341SAndroid Build Coastguard Worker punpcklwd m6, m7, m8 ; 67a 7584*c0909341SAndroid Build Coastguard Worker punpckhwd m7, m8 ; 67b 7585*c0909341SAndroid Build Coastguard Worker punpcklwd m0, m1, m2 ; 01a 7586*c0909341SAndroid Build Coastguard Worker punpckhwd m1, m2 ; 01b 7587*c0909341SAndroid Build Coastguard Worker punpcklwd m2, m3, m9 ; 23a 7588*c0909341SAndroid Build Coastguard Worker punpckhwd m3, m9 ; 23b 7589*c0909341SAndroid Build Coastguard Worker mova m10, [stk+0x140] 7590*c0909341SAndroid Build Coastguard Worker mova m11, [stk+0x150] 7591*c0909341SAndroid Build Coastguard Worker mova m14, [stk+0x160] 7592*c0909341SAndroid Build Coastguard Worker mova m15, [stk+0x170] 7593*c0909341SAndroid Build Coastguard Worker mova [stk+0x90], m4 7594*c0909341SAndroid Build Coastguard Worker mova [stk+0xa0], m5 7595*c0909341SAndroid Build Coastguard Worker mova [stk+0xb0], m6 7596*c0909341SAndroid Build Coastguard Worker mova [stk+0xc0], m7 7597*c0909341SAndroid Build Coastguard Worker %define hround [rsp+0x10] 7598*c0909341SAndroid Build Coastguard Worker %define shift [rsp+0x20] 7599*c0909341SAndroid Build Coastguard Worker %if isput 7600*c0909341SAndroid Build Coastguard Worker %define vround [rsp+0x30] 7601*c0909341SAndroid Build Coastguard Worker %else 7602*c0909341SAndroid Build Coastguard Worker %define vround [base+pd_m524256] 7603*c0909341SAndroid Build Coastguard Worker %endif 7604*c0909341SAndroid Build Coastguard Worker.dy2_vloop: 7605*c0909341SAndroid Build Coastguard Worker pmaddwd m4, m0, m10 7606*c0909341SAndroid Build Coastguard Worker pmaddwd m5, m1, m10 7607*c0909341SAndroid Build Coastguard Worker pmaddwd m6, m2, m11 7608*c0909341SAndroid Build Coastguard Worker pmaddwd m7, m3, m11 7609*c0909341SAndroid Build Coastguard Worker paddd m4, m13 7610*c0909341SAndroid Build Coastguard Worker paddd m5, m13 7611*c0909341SAndroid Build Coastguard Worker paddd m4, m6 7612*c0909341SAndroid Build Coastguard Worker paddd m5, m7 7613*c0909341SAndroid Build Coastguard Worker pmaddwd m6, [stk+0x90], m14 7614*c0909341SAndroid Build Coastguard Worker pmaddwd m7, [stk+0xa0], m14 7615*c0909341SAndroid Build Coastguard Worker pmaddwd m8, [stk+0xb0], m15 7616*c0909341SAndroid Build Coastguard Worker pmaddwd m9, [stk+0xc0], m15 7617*c0909341SAndroid Build Coastguard Worker paddd m4, m6 7618*c0909341SAndroid Build Coastguard Worker paddd m5, m7 7619*c0909341SAndroid Build Coastguard Worker %if isput 7620*c0909341SAndroid Build Coastguard Worker pshufd m6, m12, q1032 7621*c0909341SAndroid Build Coastguard Worker %endif 7622*c0909341SAndroid Build Coastguard Worker paddd m4, m8 7623*c0909341SAndroid Build Coastguard Worker paddd m5, m9 7624*c0909341SAndroid Build Coastguard Worker%else 7625*c0909341SAndroid Build Coastguard Worker movd r0, m15 7626*c0909341SAndroid Build Coastguard Worker movd rX, m4 7627*c0909341SAndroid Build Coastguard Worker pshufd m15, m15, q0321 7628*c0909341SAndroid Build Coastguard Worker pshufd m4, m4, q0321 7629*c0909341SAndroid Build Coastguard Worker movd r4, m15 7630*c0909341SAndroid Build Coastguard Worker movd r5, m4 7631*c0909341SAndroid Build Coastguard Worker mova m14, [stk+0x110] 7632*c0909341SAndroid Build Coastguard Worker movq m2, [base+subpel_filters+r0*8] 7633*c0909341SAndroid Build Coastguard Worker movq m3, [base+subpel_filters+rX*8] 7634*c0909341SAndroid Build Coastguard Worker movhps m2, [base+subpel_filters+r4*8] 7635*c0909341SAndroid Build Coastguard Worker movhps m3, [base+subpel_filters+r5*8] 7636*c0909341SAndroid Build Coastguard Worker psrld m14, 10 7637*c0909341SAndroid Build Coastguard Worker mova [stk+16], m14 7638*c0909341SAndroid Build Coastguard Worker mov r0, [stk+ 0] 7639*c0909341SAndroid Build Coastguard Worker mov rX, [stk+ 4] 7640*c0909341SAndroid Build Coastguard Worker mov r4, [stk+ 8] 7641*c0909341SAndroid Build Coastguard Worker mov r5, [stk+12] 7642*c0909341SAndroid Build Coastguard Worker mova [stk+0x20], m0 7643*c0909341SAndroid Build Coastguard Worker mova [stk+0x30], m1 7644*c0909341SAndroid Build Coastguard Worker mova [stk+0x40], m2 7645*c0909341SAndroid Build Coastguard Worker mova [stk+0x50], m3 7646*c0909341SAndroid Build Coastguard Worker pshufd m4, m6, q1100 7647*c0909341SAndroid Build Coastguard Worker pshufd m6, m6, q3322 7648*c0909341SAndroid Build Coastguard Worker pshufd m7, m5, q1100 7649*c0909341SAndroid Build Coastguard Worker pshufd m5, m5, q3322 7650*c0909341SAndroid Build Coastguard Worker pand m0, m9, m4 7651*c0909341SAndroid Build Coastguard Worker pand m1, m9, m6 7652*c0909341SAndroid Build Coastguard Worker pand m2, m9, m7 7653*c0909341SAndroid Build Coastguard Worker pand m3, m9, m5 7654*c0909341SAndroid Build Coastguard Worker pandn m4, [stk+0x20] 7655*c0909341SAndroid Build Coastguard Worker pandn m6, [stk+0x30] 7656*c0909341SAndroid Build Coastguard Worker pandn m7, [stk+0x40] 7657*c0909341SAndroid Build Coastguard Worker pandn m5, [stk+0x50] 7658*c0909341SAndroid Build Coastguard Worker por m0, m4 7659*c0909341SAndroid Build Coastguard Worker por m1, m6 7660*c0909341SAndroid Build Coastguard Worker por m2, m7 7661*c0909341SAndroid Build Coastguard Worker por m3, m5 7662*c0909341SAndroid Build Coastguard Worker punpcklbw m4, m0, m0 7663*c0909341SAndroid Build Coastguard Worker punpckhbw m0, m0 7664*c0909341SAndroid Build Coastguard Worker punpcklbw m5, m1, m1 7665*c0909341SAndroid Build Coastguard Worker punpckhbw m1, m1 7666*c0909341SAndroid Build Coastguard Worker psraw m4, 8 7667*c0909341SAndroid Build Coastguard Worker psraw m0, 8 7668*c0909341SAndroid Build Coastguard Worker psraw m5, 8 7669*c0909341SAndroid Build Coastguard Worker psraw m1, 8 7670*c0909341SAndroid Build Coastguard Worker punpcklbw m6, m2, m2 7671*c0909341SAndroid Build Coastguard Worker punpckhbw m2, m2 7672*c0909341SAndroid Build Coastguard Worker punpcklbw m7, m3, m3 7673*c0909341SAndroid Build Coastguard Worker punpckhbw m3, m3 7674*c0909341SAndroid Build Coastguard Worker psraw m6, 8 7675*c0909341SAndroid Build Coastguard Worker psraw m2, 8 7676*c0909341SAndroid Build Coastguard Worker psraw m7, 8 7677*c0909341SAndroid Build Coastguard Worker psraw m3, 8 7678*c0909341SAndroid Build Coastguard Worker mova [stk+0x0a0], m4 7679*c0909341SAndroid Build Coastguard Worker mova [stk+0x0b0], m0 7680*c0909341SAndroid Build Coastguard Worker mova [stk+0x0c0], m5 7681*c0909341SAndroid Build Coastguard Worker mova [stk+0x0d0], m1 7682*c0909341SAndroid Build Coastguard Worker mova [stk+0x140], m6 7683*c0909341SAndroid Build Coastguard Worker mova [stk+0x150], m2 7684*c0909341SAndroid Build Coastguard Worker mova [stk+0x160], m7 7685*c0909341SAndroid Build Coastguard Worker mova [stk+0x170], m3 7686*c0909341SAndroid Build Coastguard Worker MC_8TAP_SCALED_H 0xa0, 0x20, 0 ; 0 7687*c0909341SAndroid Build Coastguard Worker MC_8TAP_SCALED_H 0xa0, 0x30 ; 1 7688*c0909341SAndroid Build Coastguard Worker MC_8TAP_SCALED_H 0xa0, 0x40 ; 2 7689*c0909341SAndroid Build Coastguard Worker MC_8TAP_SCALED_H 0xa0, 0x50 ; 3 7690*c0909341SAndroid Build Coastguard Worker MC_8TAP_SCALED_H 0xa0, 0x60 ; 4 7691*c0909341SAndroid Build Coastguard Worker MC_8TAP_SCALED_H 0xa0, 0x70 ; 5 7692*c0909341SAndroid Build Coastguard Worker MC_8TAP_SCALED_H 0xa0, 0x80 ; 6 7693*c0909341SAndroid Build Coastguard Worker MC_8TAP_SCALED_H 0xa0, 0x90 ; 7 7694*c0909341SAndroid Build Coastguard Worker mova m5, [stk+0x60] 7695*c0909341SAndroid Build Coastguard Worker mova m6, [stk+0x70] 7696*c0909341SAndroid Build Coastguard Worker mova m7, [stk+0x80] 7697*c0909341SAndroid Build Coastguard Worker mova m0, [stk+0x90] 7698*c0909341SAndroid Build Coastguard Worker mov r0, r0m 7699*c0909341SAndroid Build Coastguard Worker punpcklwd m4, m5, m6 ; 45a 7700*c0909341SAndroid Build Coastguard Worker punpckhwd m5, m6 ; 45b 7701*c0909341SAndroid Build Coastguard Worker punpcklwd m6, m7, m0 ; 67a 7702*c0909341SAndroid Build Coastguard Worker punpckhwd m7, m0 ; 67b 7703*c0909341SAndroid Build Coastguard Worker mova [stk+0x60], m4 7704*c0909341SAndroid Build Coastguard Worker mova [stk+0x70], m5 7705*c0909341SAndroid Build Coastguard Worker mova [stk+0x80], m6 7706*c0909341SAndroid Build Coastguard Worker mova [stk+0x90], m7 7707*c0909341SAndroid Build Coastguard Worker mova m1, [stk+0x20] 7708*c0909341SAndroid Build Coastguard Worker mova m2, [stk+0x30] 7709*c0909341SAndroid Build Coastguard Worker mova m3, [stk+0x40] 7710*c0909341SAndroid Build Coastguard Worker mova m4, [stk+0x50] 7711*c0909341SAndroid Build Coastguard Worker punpcklwd m0, m1, m2 ; 01a 7712*c0909341SAndroid Build Coastguard Worker punpckhwd m1, m2 ; 01b 7713*c0909341SAndroid Build Coastguard Worker punpcklwd m2, m3, m4 ; 23a 7714*c0909341SAndroid Build Coastguard Worker punpckhwd m3, m4 ; 23b 7715*c0909341SAndroid Build Coastguard Worker mova m4, [stk+0x180] 7716*c0909341SAndroid Build Coastguard Worker mova m5, [stk+0x190] 7717*c0909341SAndroid Build Coastguard Worker mova m6, [stk+0x1a0] 7718*c0909341SAndroid Build Coastguard Worker mova m7, [stk+0x1b0] 7719*c0909341SAndroid Build Coastguard Worker mova [stk+0x40], m2 7720*c0909341SAndroid Build Coastguard Worker mova [stk+0x50], m3 7721*c0909341SAndroid Build Coastguard Worker.dy2_vloop: 7722*c0909341SAndroid Build Coastguard Worker pmaddwd m0, m4 7723*c0909341SAndroid Build Coastguard Worker pmaddwd m1, m4 7724*c0909341SAndroid Build Coastguard Worker pmaddwd m2, m5 7725*c0909341SAndroid Build Coastguard Worker pmaddwd m3, m5 7726*c0909341SAndroid Build Coastguard Worker paddd m0, m2 7727*c0909341SAndroid Build Coastguard Worker paddd m1, m3 7728*c0909341SAndroid Build Coastguard Worker pmaddwd m2, [stk+0x60], m6 7729*c0909341SAndroid Build Coastguard Worker pmaddwd m3, [stk+0x70], m6 7730*c0909341SAndroid Build Coastguard Worker pmaddwd m4, [stk+0x80], m7 7731*c0909341SAndroid Build Coastguard Worker pmaddwd m5, [stk+0x90], m7 7732*c0909341SAndroid Build Coastguard Worker %if isput 7733*c0909341SAndroid Build Coastguard Worker movd m6, [esp+0x18] 7734*c0909341SAndroid Build Coastguard Worker %endif 7735*c0909341SAndroid Build Coastguard Worker paddd m0, m2 7736*c0909341SAndroid Build Coastguard Worker paddd m1, m3 7737*c0909341SAndroid Build Coastguard Worker paddd m0, vrnd_mem 7738*c0909341SAndroid Build Coastguard Worker paddd m1, vrnd_mem 7739*c0909341SAndroid Build Coastguard Worker paddd m4, m0 7740*c0909341SAndroid Build Coastguard Worker paddd m5, m1 7741*c0909341SAndroid Build Coastguard Worker%endif 7742*c0909341SAndroid Build Coastguard Worker%ifidn %1, put 7743*c0909341SAndroid Build Coastguard Worker psrad m4, m6 7744*c0909341SAndroid Build Coastguard Worker psrad m5, m6 7745*c0909341SAndroid Build Coastguard Worker packssdw m4, m5 7746*c0909341SAndroid Build Coastguard Worker pxor m7, m7 7747*c0909341SAndroid Build Coastguard Worker pmaxsw m4, m7 7748*c0909341SAndroid Build Coastguard Worker pminsw m4, pxmaxm 7749*c0909341SAndroid Build Coastguard Worker mova [dstq], m4 7750*c0909341SAndroid Build Coastguard Worker add dstq, dsm 7751*c0909341SAndroid Build Coastguard Worker%else 7752*c0909341SAndroid Build Coastguard Worker psrad m4, 6 7753*c0909341SAndroid Build Coastguard Worker psrad m5, 6 7754*c0909341SAndroid Build Coastguard Worker packssdw m4, m5 7755*c0909341SAndroid Build Coastguard Worker mova [tmpq], m4 7756*c0909341SAndroid Build Coastguard Worker add tmpq, tmp_stridem 7757*c0909341SAndroid Build Coastguard Worker%endif 7758*c0909341SAndroid Build Coastguard Worker dec hd 7759*c0909341SAndroid Build Coastguard Worker jz .dy2_hloop_prep 7760*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 7761*c0909341SAndroid Build Coastguard Worker MC_8TAP_SCALED_H 4, 8, 5, 6, 7, 9, 0, 1 7762*c0909341SAndroid Build Coastguard Worker mova [stk+0xd0], m4 7763*c0909341SAndroid Build Coastguard Worker MC_8TAP_SCALED_H 8, 5, 6, 7, 9, 4, 0, 1 7764*c0909341SAndroid Build Coastguard Worker mova m4, [stk+0xd0] 7765*c0909341SAndroid Build Coastguard Worker mova m0, m2 ; 01a 7766*c0909341SAndroid Build Coastguard Worker mova m1, m3 ; 01b 7767*c0909341SAndroid Build Coastguard Worker mova m2, [stk+0x90] ; 23a 7768*c0909341SAndroid Build Coastguard Worker mova m3, [stk+0xa0] ; 23b 7769*c0909341SAndroid Build Coastguard Worker mova m5, [stk+0xb0] ; 45a 7770*c0909341SAndroid Build Coastguard Worker mova m6, [stk+0xc0] ; 45b 7771*c0909341SAndroid Build Coastguard Worker punpcklwd m7, m4, m8 ; 67a 7772*c0909341SAndroid Build Coastguard Worker punpckhwd m4, m8 ; 67b 7773*c0909341SAndroid Build Coastguard Worker mova [stk+0x90], m5 7774*c0909341SAndroid Build Coastguard Worker mova [stk+0xa0], m6 7775*c0909341SAndroid Build Coastguard Worker mova [stk+0xb0], m7 7776*c0909341SAndroid Build Coastguard Worker mova [stk+0xc0], m4 7777*c0909341SAndroid Build Coastguard Worker%else 7778*c0909341SAndroid Build Coastguard Worker mov r0m, r0 7779*c0909341SAndroid Build Coastguard Worker mov r3, r3m 7780*c0909341SAndroid Build Coastguard Worker MC_8TAP_SCALED_H 0xa0, 0xe0 ; 8 7781*c0909341SAndroid Build Coastguard Worker MC_8TAP_SCALED_H 0xa0, 0 ; 9 7782*c0909341SAndroid Build Coastguard Worker mova m7, [stk+0xe0] 7783*c0909341SAndroid Build Coastguard Worker mova m2, [stk+0x60] ; 23a 7784*c0909341SAndroid Build Coastguard Worker mova m3, [stk+0x70] ; 23b 7785*c0909341SAndroid Build Coastguard Worker mova m4, [stk+0x80] ; 45a 7786*c0909341SAndroid Build Coastguard Worker mova m5, [stk+0x90] ; 45b 7787*c0909341SAndroid Build Coastguard Worker punpcklwd m6, m7, m0 ; 67a 7788*c0909341SAndroid Build Coastguard Worker punpckhwd m7, m0 ; 67b 7789*c0909341SAndroid Build Coastguard Worker mova m0, [stk+0x40] ; 01a 7790*c0909341SAndroid Build Coastguard Worker mova m1, [stk+0x50] ; 01b 7791*c0909341SAndroid Build Coastguard Worker mova [stk+0x40], m2 7792*c0909341SAndroid Build Coastguard Worker mova [stk+0x50], m3 7793*c0909341SAndroid Build Coastguard Worker mova [stk+0x60], m4 7794*c0909341SAndroid Build Coastguard Worker mova [stk+0x70], m5 7795*c0909341SAndroid Build Coastguard Worker mova m4, [stk+0x180] 7796*c0909341SAndroid Build Coastguard Worker mova m5, [stk+0x190] 7797*c0909341SAndroid Build Coastguard Worker mova [stk+0x80], m6 7798*c0909341SAndroid Build Coastguard Worker mova [stk+0x90], m7 7799*c0909341SAndroid Build Coastguard Worker mova m6, [stk+0x1a0] 7800*c0909341SAndroid Build Coastguard Worker mova m7, [stk+0x1b0] 7801*c0909341SAndroid Build Coastguard Worker mov r0, r0m 7802*c0909341SAndroid Build Coastguard Worker%endif 7803*c0909341SAndroid Build Coastguard Worker jmp .dy2_vloop 7804*c0909341SAndroid Build Coastguard WorkerINIT_XMM ssse3 7805*c0909341SAndroid Build Coastguard Worker.ret: 7806*c0909341SAndroid Build Coastguard Worker MC_8TAP_SCALED_RET 0 7807*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_32 && !isprep && required_stack_alignment > STACK_ALIGNMENT 7808*c0909341SAndroid Build Coastguard Worker %define r0m [rstk+stack_offset+ 4] 7809*c0909341SAndroid Build Coastguard Worker %define r1m [rstk+stack_offset+ 8] 7810*c0909341SAndroid Build Coastguard Worker %define r2m [rstk+stack_offset+12] 7811*c0909341SAndroid Build Coastguard Worker %define r3m [rstk+stack_offset+16] 7812*c0909341SAndroid Build Coastguard Worker%endif 7813*c0909341SAndroid Build Coastguard Worker%undef isput 7814*c0909341SAndroid Build Coastguard Worker%undef isprep 7815*c0909341SAndroid Build Coastguard Worker%endmacro 7816*c0909341SAndroid Build Coastguard Worker 7817*c0909341SAndroid Build Coastguard Worker%macro BILIN_SCALED_FN 1 7818*c0909341SAndroid Build Coastguard Workercglobal %1_bilin_scaled_16bpc 7819*c0909341SAndroid Build Coastguard Worker mov t0d, (5*15 << 16) | 5*15 7820*c0909341SAndroid Build Coastguard Worker mov t1d, (5*15 << 16) | 5*15 7821*c0909341SAndroid Build Coastguard Worker jmp mangle(private_prefix %+ _%1_8tap_scaled_16bpc %+ SUFFIX) 7822*c0909341SAndroid Build Coastguard Worker%endmacro 7823*c0909341SAndroid Build Coastguard Worker 7824*c0909341SAndroid Build Coastguard Worker%if WIN64 7825*c0909341SAndroid Build Coastguard WorkerDECLARE_REG_TMP 6, 5 7826*c0909341SAndroid Build Coastguard Worker%elif ARCH_X86_64 7827*c0909341SAndroid Build Coastguard WorkerDECLARE_REG_TMP 6, 8 7828*c0909341SAndroid Build Coastguard Worker%else 7829*c0909341SAndroid Build Coastguard WorkerDECLARE_REG_TMP 1, 2 7830*c0909341SAndroid Build Coastguard Worker%endif 7831*c0909341SAndroid Build Coastguard Worker 7832*c0909341SAndroid Build Coastguard Worker%define PUT_8TAP_SCALED_FN FN put_8tap_scaled, 7833*c0909341SAndroid Build Coastguard WorkerBILIN_SCALED_FN put 7834*c0909341SAndroid Build Coastguard WorkerPUT_8TAP_SCALED_FN sharp, SHARP, SHARP, put_8tap_scaled_16bpc 7835*c0909341SAndroid Build Coastguard WorkerPUT_8TAP_SCALED_FN sharp_smooth, SHARP, SMOOTH, put_8tap_scaled_16bpc 7836*c0909341SAndroid Build Coastguard WorkerPUT_8TAP_SCALED_FN smooth_sharp, SMOOTH, SHARP, put_8tap_scaled_16bpc 7837*c0909341SAndroid Build Coastguard WorkerPUT_8TAP_SCALED_FN smooth, SMOOTH, SMOOTH, put_8tap_scaled_16bpc 7838*c0909341SAndroid Build Coastguard WorkerPUT_8TAP_SCALED_FN sharp_regular, SHARP, REGULAR, put_8tap_scaled_16bpc 7839*c0909341SAndroid Build Coastguard WorkerPUT_8TAP_SCALED_FN regular_sharp, REGULAR, SHARP, put_8tap_scaled_16bpc 7840*c0909341SAndroid Build Coastguard WorkerPUT_8TAP_SCALED_FN smooth_regular, SMOOTH, REGULAR, put_8tap_scaled_16bpc 7841*c0909341SAndroid Build Coastguard WorkerPUT_8TAP_SCALED_FN regular_smooth, REGULAR, SMOOTH, put_8tap_scaled_16bpc 7842*c0909341SAndroid Build Coastguard WorkerPUT_8TAP_SCALED_FN regular, REGULAR, REGULAR 7843*c0909341SAndroid Build Coastguard WorkerMC_8TAP_SCALED put 7844*c0909341SAndroid Build Coastguard Worker 7845*c0909341SAndroid Build Coastguard Worker%if WIN64 7846*c0909341SAndroid Build Coastguard WorkerDECLARE_REG_TMP 5, 4 7847*c0909341SAndroid Build Coastguard Worker%elif ARCH_X86_64 7848*c0909341SAndroid Build Coastguard WorkerDECLARE_REG_TMP 6, 7 7849*c0909341SAndroid Build Coastguard Worker%else 7850*c0909341SAndroid Build Coastguard WorkerDECLARE_REG_TMP 1, 2 7851*c0909341SAndroid Build Coastguard Worker%endif 7852*c0909341SAndroid Build Coastguard Worker 7853*c0909341SAndroid Build Coastguard Worker%define PREP_8TAP_SCALED_FN FN prep_8tap_scaled, 7854*c0909341SAndroid Build Coastguard WorkerBILIN_SCALED_FN prep 7855*c0909341SAndroid Build Coastguard WorkerPREP_8TAP_SCALED_FN sharp, SHARP, SHARP, prep_8tap_scaled_16bpc 7856*c0909341SAndroid Build Coastguard WorkerPREP_8TAP_SCALED_FN sharp_smooth, SHARP, SMOOTH, prep_8tap_scaled_16bpc 7857*c0909341SAndroid Build Coastguard WorkerPREP_8TAP_SCALED_FN smooth_sharp, SMOOTH, SHARP, prep_8tap_scaled_16bpc 7858*c0909341SAndroid Build Coastguard WorkerPREP_8TAP_SCALED_FN smooth, SMOOTH, SMOOTH, prep_8tap_scaled_16bpc 7859*c0909341SAndroid Build Coastguard WorkerPREP_8TAP_SCALED_FN sharp_regular, SHARP, REGULAR, prep_8tap_scaled_16bpc 7860*c0909341SAndroid Build Coastguard WorkerPREP_8TAP_SCALED_FN regular_sharp, REGULAR, SHARP, prep_8tap_scaled_16bpc 7861*c0909341SAndroid Build Coastguard WorkerPREP_8TAP_SCALED_FN smooth_regular, SMOOTH, REGULAR, prep_8tap_scaled_16bpc 7862*c0909341SAndroid Build Coastguard WorkerPREP_8TAP_SCALED_FN regular_smooth, REGULAR, SMOOTH, prep_8tap_scaled_16bpc 7863*c0909341SAndroid Build Coastguard WorkerPREP_8TAP_SCALED_FN regular, REGULAR, REGULAR 7864*c0909341SAndroid Build Coastguard WorkerMC_8TAP_SCALED prep 7865*c0909341SAndroid Build Coastguard Worker 7866*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 7867*c0909341SAndroid Build Coastguard WorkerDECLARE_REG_TMP 6 7868*c0909341SAndroid Build Coastguard Worker%else 7869*c0909341SAndroid Build Coastguard WorkerDECLARE_REG_TMP 2 7870*c0909341SAndroid Build Coastguard Worker%endif 7871*c0909341SAndroid Build Coastguard Worker 7872*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 7873*c0909341SAndroid Build Coastguard Worker; warp8x8t spills one less xmm register than warp8x8 on WIN64, compensate that 7874*c0909341SAndroid Build Coastguard Worker; by allocating 16 bytes more stack space so that stack offsets match up. 7875*c0909341SAndroid Build Coastguard Worker%if WIN64 && STACK_ALIGNMENT == 16 7876*c0909341SAndroid Build Coastguard Worker%assign stksz 16*14 7877*c0909341SAndroid Build Coastguard Worker%else 7878*c0909341SAndroid Build Coastguard Worker%assign stksz 16*13 7879*c0909341SAndroid Build Coastguard Worker%endif 7880*c0909341SAndroid Build Coastguard Workercglobal warp_affine_8x8t_16bpc, 4, 13, 9, stksz, dst, ds, src, ss, delta, \ 7881*c0909341SAndroid Build Coastguard Worker mx, tmp, alpha, beta, \ 7882*c0909341SAndroid Build Coastguard Worker filter, my, gamma, cnt 7883*c0909341SAndroid Build Coastguard Worker%assign stack_size_padded_8x8t stack_size_padded 7884*c0909341SAndroid Build Coastguard Worker%else 7885*c0909341SAndroid Build Coastguard Workercglobal warp_affine_8x8t_16bpc, 0, 7, 8, -16*17, alpha, gamma, src, tmp, \ 7886*c0909341SAndroid Build Coastguard Worker filter, mx, my 7887*c0909341SAndroid Build Coastguard Worker%define m8 [esp+16*13] 7888*c0909341SAndroid Build Coastguard Worker%define m9 [esp+16*14] 7889*c0909341SAndroid Build Coastguard Worker%define cntd dword [esp+4*63] 7890*c0909341SAndroid Build Coastguard Worker%define dstq tmpq 7891*c0909341SAndroid Build Coastguard Worker%define dsq 0 7892*c0909341SAndroid Build Coastguard Worker%if STACK_ALIGNMENT < 16 7893*c0909341SAndroid Build Coastguard Worker%define dstm [esp+4*65] 7894*c0909341SAndroid Build Coastguard Worker%define dsm [esp+4*66] 7895*c0909341SAndroid Build Coastguard Worker%else 7896*c0909341SAndroid Build Coastguard Worker%define dstm r0m 7897*c0909341SAndroid Build Coastguard Worker%define dsm r1m 7898*c0909341SAndroid Build Coastguard Worker%endif 7899*c0909341SAndroid Build Coastguard Worker%endif 7900*c0909341SAndroid Build Coastguard Worker%define base filterq-$$ 7901*c0909341SAndroid Build Coastguard Worker mov t0d, r7m 7902*c0909341SAndroid Build Coastguard Worker LEA filterq, $$ 7903*c0909341SAndroid Build Coastguard Worker shr t0d, 11 7904*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 7905*c0909341SAndroid Build Coastguard Worker movddup m8, [base+warp8x8t_rnd] 7906*c0909341SAndroid Build Coastguard Worker%else 7907*c0909341SAndroid Build Coastguard Worker movddup m1, [base+warp8x8t_rnd] 7908*c0909341SAndroid Build Coastguard Worker mov r1, r1m 7909*c0909341SAndroid Build Coastguard Worker add r1, r1 7910*c0909341SAndroid Build Coastguard Worker mova m8, m1 7911*c0909341SAndroid Build Coastguard Worker mov r1m, r1 ; ds *= 2 7912*c0909341SAndroid Build Coastguard Worker%endif 7913*c0909341SAndroid Build Coastguard Worker call mangle(private_prefix %+ _warp_affine_8x8_16bpc_ssse3).main 7914*c0909341SAndroid Build Coastguard Worker jmp .start 7915*c0909341SAndroid Build Coastguard Worker.loop: 7916*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 7917*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+dsq*4] 7918*c0909341SAndroid Build Coastguard Worker%else 7919*c0909341SAndroid Build Coastguard Worker add dstq, dsm 7920*c0909341SAndroid Build Coastguard Worker mov dstm, dstq 7921*c0909341SAndroid Build Coastguard Worker%endif 7922*c0909341SAndroid Build Coastguard Worker call mangle(private_prefix %+ _warp_affine_8x8_16bpc_ssse3).main2 7923*c0909341SAndroid Build Coastguard Worker.start: 7924*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_32 7925*c0909341SAndroid Build Coastguard Worker mov dstq, dstm 7926*c0909341SAndroid Build Coastguard Worker%endif 7927*c0909341SAndroid Build Coastguard Worker paddd m1, m8 7928*c0909341SAndroid Build Coastguard Worker paddd m2, m8 7929*c0909341SAndroid Build Coastguard Worker psrad m1, 15 7930*c0909341SAndroid Build Coastguard Worker psrad m2, 15 7931*c0909341SAndroid Build Coastguard Worker packssdw m1, m2 7932*c0909341SAndroid Build Coastguard Worker mova [dstq+dsq*0], m1 7933*c0909341SAndroid Build Coastguard Worker call mangle(private_prefix %+ _warp_affine_8x8_16bpc_ssse3).main3 7934*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_32 7935*c0909341SAndroid Build Coastguard Worker mov dstq, dstm 7936*c0909341SAndroid Build Coastguard Worker add dstq, dsm 7937*c0909341SAndroid Build Coastguard Worker%endif 7938*c0909341SAndroid Build Coastguard Worker paddd m1, m8 7939*c0909341SAndroid Build Coastguard Worker paddd m2, m8 7940*c0909341SAndroid Build Coastguard Worker psrad m1, 15 7941*c0909341SAndroid Build Coastguard Worker psrad m2, 15 7942*c0909341SAndroid Build Coastguard Worker packssdw m1, m2 7943*c0909341SAndroid Build Coastguard Worker mova [dstq+dsq*2], m1 7944*c0909341SAndroid Build Coastguard Worker dec cntd 7945*c0909341SAndroid Build Coastguard Worker jg .loop 7946*c0909341SAndroid Build Coastguard Worker RET 7947*c0909341SAndroid Build Coastguard Worker 7948*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 7949*c0909341SAndroid Build Coastguard Workercglobal warp_affine_8x8_16bpc, 4, 13, 10, 16*13, dst, ds, src, ss, delta, \ 7950*c0909341SAndroid Build Coastguard Worker mx, tmp, alpha, beta, \ 7951*c0909341SAndroid Build Coastguard Worker filter, my, gamma, cnt 7952*c0909341SAndroid Build Coastguard WorkerASSERT stack_size_padded == stack_size_padded_8x8t 7953*c0909341SAndroid Build Coastguard Worker%else 7954*c0909341SAndroid Build Coastguard Workercglobal warp_affine_8x8_16bpc, 0, 7, 8, -16*17, alpha, gamma, src, tmp, \ 7955*c0909341SAndroid Build Coastguard Worker filter, mx, my 7956*c0909341SAndroid Build Coastguard Worker%endif 7957*c0909341SAndroid Build Coastguard Worker mov t0d, r7m 7958*c0909341SAndroid Build Coastguard Worker LEA filterq, $$ 7959*c0909341SAndroid Build Coastguard Worker shr t0d, 11 7960*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 7961*c0909341SAndroid Build Coastguard Worker movddup m8, [base+warp8x8_rnd2+t0*8] 7962*c0909341SAndroid Build Coastguard Worker movd m9, r7m ; pixel_max 7963*c0909341SAndroid Build Coastguard Worker pshufb m9, [base+pw_256] 7964*c0909341SAndroid Build Coastguard Worker%else 7965*c0909341SAndroid Build Coastguard Worker movddup m1, [base+warp8x8_rnd2+t0*8] 7966*c0909341SAndroid Build Coastguard Worker movd m2, r7m ; pixel_max 7967*c0909341SAndroid Build Coastguard Worker pshufb m2, [base+pw_256] 7968*c0909341SAndroid Build Coastguard Worker mova m8, m1 7969*c0909341SAndroid Build Coastguard Worker mova m9, m2 7970*c0909341SAndroid Build Coastguard Worker%endif 7971*c0909341SAndroid Build Coastguard Worker call .main 7972*c0909341SAndroid Build Coastguard Worker jmp .start 7973*c0909341SAndroid Build Coastguard Worker.loop: 7974*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 7975*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+dsq*2] 7976*c0909341SAndroid Build Coastguard Worker%else 7977*c0909341SAndroid Build Coastguard Worker add dstq, dsm 7978*c0909341SAndroid Build Coastguard Worker mov dstm, dstq 7979*c0909341SAndroid Build Coastguard Worker%endif 7980*c0909341SAndroid Build Coastguard Worker call .main2 7981*c0909341SAndroid Build Coastguard Worker.start: 7982*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_32 7983*c0909341SAndroid Build Coastguard Worker mov dstq, dstm 7984*c0909341SAndroid Build Coastguard Worker%endif 7985*c0909341SAndroid Build Coastguard Worker psrad m1, 16 7986*c0909341SAndroid Build Coastguard Worker psrad m2, 16 7987*c0909341SAndroid Build Coastguard Worker packssdw m1, m2 7988*c0909341SAndroid Build Coastguard Worker pmaxsw m1, m6 7989*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m8 7990*c0909341SAndroid Build Coastguard Worker pminsw m1, m9 7991*c0909341SAndroid Build Coastguard Worker mova [dstq+dsq*0], m1 7992*c0909341SAndroid Build Coastguard Worker call .main3 7993*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_32 7994*c0909341SAndroid Build Coastguard Worker mov dstq, dstm 7995*c0909341SAndroid Build Coastguard Worker add dstq, dsm 7996*c0909341SAndroid Build Coastguard Worker%endif 7997*c0909341SAndroid Build Coastguard Worker psrad m1, 16 7998*c0909341SAndroid Build Coastguard Worker psrad m2, 16 7999*c0909341SAndroid Build Coastguard Worker packssdw m1, m2 8000*c0909341SAndroid Build Coastguard Worker pmaxsw m1, m6 8001*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m8 8002*c0909341SAndroid Build Coastguard Worker pminsw m1, m9 8003*c0909341SAndroid Build Coastguard Worker mova [dstq+dsq*1], m1 8004*c0909341SAndroid Build Coastguard Worker dec cntd 8005*c0909341SAndroid Build Coastguard Worker jg .loop 8006*c0909341SAndroid Build Coastguard Worker RET 8007*c0909341SAndroid Build Coastguard WorkerALIGN function_align 8008*c0909341SAndroid Build Coastguard Worker.main: 8009*c0909341SAndroid Build Coastguard Worker ; Stack args offset by one (r4m -> r5m etc.) due to call 8010*c0909341SAndroid Build Coastguard Worker%if WIN64 8011*c0909341SAndroid Build Coastguard Worker mov deltaq, r5m 8012*c0909341SAndroid Build Coastguard Worker mov mxd, r6m 8013*c0909341SAndroid Build Coastguard Worker%endif 8014*c0909341SAndroid Build Coastguard Worker movd m0, [base+warp8x8_shift+t0*4] 8015*c0909341SAndroid Build Coastguard Worker movddup m7, [base+warp8x8_rnd1+t0*8] 8016*c0909341SAndroid Build Coastguard Worker add filterq, mc_warp_filter-$$ 8017*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 8018*c0909341SAndroid Build Coastguard Worker movsx alphad, word [deltaq+2*0] 8019*c0909341SAndroid Build Coastguard Worker movsx betad, word [deltaq+2*1] 8020*c0909341SAndroid Build Coastguard Worker movsx gammad, word [deltaq+2*2] 8021*c0909341SAndroid Build Coastguard Worker movsx deltad, word [deltaq+2*3] 8022*c0909341SAndroid Build Coastguard Worker lea tmpq, [ssq*3] 8023*c0909341SAndroid Build Coastguard Worker add mxd, 512+(64<<10) 8024*c0909341SAndroid Build Coastguard Worker sub srcq, tmpq ; src -= ss*3 8025*c0909341SAndroid Build Coastguard Worker imul tmpd, alphad, -7 8026*c0909341SAndroid Build Coastguard Worker mov myd, r7m 8027*c0909341SAndroid Build Coastguard Worker add betad, tmpd ; beta -= alpha*7 8028*c0909341SAndroid Build Coastguard Worker imul tmpd, gammad, -7 8029*c0909341SAndroid Build Coastguard Worker add myd, 512+(64<<10) 8030*c0909341SAndroid Build Coastguard Worker mov cntd, 4 8031*c0909341SAndroid Build Coastguard Worker add deltad, tmpd ; delta -= gamma*7 8032*c0909341SAndroid Build Coastguard Worker%else 8033*c0909341SAndroid Build Coastguard Worker%if STACK_ALIGNMENT < 16 8034*c0909341SAndroid Build Coastguard Worker %assign stack_offset stack_offset - gprsize 8035*c0909341SAndroid Build Coastguard Worker%endif 8036*c0909341SAndroid Build Coastguard Worker mov r3d, r5m ; abcd 8037*c0909341SAndroid Build Coastguard Worker%if STACK_ALIGNMENT < 16 8038*c0909341SAndroid Build Coastguard Worker mov r0, r1m ; dst 8039*c0909341SAndroid Build Coastguard Worker mov r1, r2m ; ds 8040*c0909341SAndroid Build Coastguard Worker mov [esp+gprsize+4*65], r0 8041*c0909341SAndroid Build Coastguard Worker mov [esp+gprsize+4*66], r1 8042*c0909341SAndroid Build Coastguard Worker%endif 8043*c0909341SAndroid Build Coastguard Worker movsx alphad, word [r3+2*0] 8044*c0909341SAndroid Build Coastguard Worker movsx r2d, word [r3+2*1] 8045*c0909341SAndroid Build Coastguard Worker movsx gammad, word [r3+2*2] 8046*c0909341SAndroid Build Coastguard Worker movsx r3d, word [r3+2*3] 8047*c0909341SAndroid Build Coastguard Worker imul r5d, alphad, -7 8048*c0909341SAndroid Build Coastguard Worker add r2d, r5d ; beta -= alpha*7 8049*c0909341SAndroid Build Coastguard Worker imul r5d, gammad, -7 8050*c0909341SAndroid Build Coastguard Worker mov [esp+gprsize+4*60], r2d 8051*c0909341SAndroid Build Coastguard Worker add r3d, r5d ; delta -= gamma*7 8052*c0909341SAndroid Build Coastguard Worker mov [esp+gprsize+4*61], r3d 8053*c0909341SAndroid Build Coastguard Worker mov r3d, r4m ; ss 8054*c0909341SAndroid Build Coastguard Worker mov srcq, r3m 8055*c0909341SAndroid Build Coastguard Worker mov mxd, r6m 8056*c0909341SAndroid Build Coastguard Worker mov myd, r7m 8057*c0909341SAndroid Build Coastguard Worker mov dword [esp+gprsize+4*63], 4 ; cnt 8058*c0909341SAndroid Build Coastguard Worker mov [esp+gprsize+4*62], r3 8059*c0909341SAndroid Build Coastguard Worker lea r3, [r3*3] 8060*c0909341SAndroid Build Coastguard Worker add mxd, 512+(64<<10) 8061*c0909341SAndroid Build Coastguard Worker add myd, 512+(64<<10) 8062*c0909341SAndroid Build Coastguard Worker sub srcq, r3 ; src -= ss*3 8063*c0909341SAndroid Build Coastguard Worker%if STACK_ALIGNMENT < 16 8064*c0909341SAndroid Build Coastguard Worker %assign stack_offset stack_offset + gprsize 8065*c0909341SAndroid Build Coastguard Worker%endif 8066*c0909341SAndroid Build Coastguard Worker%endif 8067*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize], m0 8068*c0909341SAndroid Build Coastguard Worker pxor m6, m6 8069*c0909341SAndroid Build Coastguard Worker call .h 8070*c0909341SAndroid Build Coastguard Worker mova m5, m0 8071*c0909341SAndroid Build Coastguard Worker call .h 8072*c0909341SAndroid Build Coastguard Worker punpcklwd m1, m5, m0 ; 01 8073*c0909341SAndroid Build Coastguard Worker punpckhwd m5, m0 8074*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16* 1], m1 8075*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16* 4], m5 8076*c0909341SAndroid Build Coastguard Worker mova m5, m0 8077*c0909341SAndroid Build Coastguard Worker call .h 8078*c0909341SAndroid Build Coastguard Worker punpcklwd m1, m5, m0 ; 12 8079*c0909341SAndroid Build Coastguard Worker punpckhwd m5, m0 8080*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16* 7], m1 8081*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*10], m5 8082*c0909341SAndroid Build Coastguard Worker mova m5, m0 8083*c0909341SAndroid Build Coastguard Worker call .h 8084*c0909341SAndroid Build Coastguard Worker punpcklwd m1, m5, m0 ; 23 8085*c0909341SAndroid Build Coastguard Worker punpckhwd m5, m0 8086*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16* 2], m1 8087*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16* 5], m5 8088*c0909341SAndroid Build Coastguard Worker mova m5, m0 8089*c0909341SAndroid Build Coastguard Worker call .h 8090*c0909341SAndroid Build Coastguard Worker punpcklwd m1, m5, m0 ; 34 8091*c0909341SAndroid Build Coastguard Worker punpckhwd m5, m0 8092*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16* 8], m1 8093*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*11], m5 8094*c0909341SAndroid Build Coastguard Worker mova m5, m0 8095*c0909341SAndroid Build Coastguard Worker call .h 8096*c0909341SAndroid Build Coastguard Worker punpcklwd m1, m5, m0 ; 45 8097*c0909341SAndroid Build Coastguard Worker punpckhwd m5, m0 8098*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16* 3], m1 8099*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16* 6], m5 8100*c0909341SAndroid Build Coastguard Worker mova m5, m0 8101*c0909341SAndroid Build Coastguard Worker call .h 8102*c0909341SAndroid Build Coastguard Worker punpcklwd m1, m5, m0 ; 56 8103*c0909341SAndroid Build Coastguard Worker punpckhwd m5, m0 8104*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16* 9], m1 8105*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*12], m5 8106*c0909341SAndroid Build Coastguard Worker mova m5, m0 8107*c0909341SAndroid Build Coastguard Worker.main2: 8108*c0909341SAndroid Build Coastguard Worker call .h 8109*c0909341SAndroid Build Coastguard Worker%macro WARP_V 6 ; 01l, 23l, 45l, 01h, 23h, 45h 8110*c0909341SAndroid Build Coastguard Worker lea tmpd, [myq+gammaq] 8111*c0909341SAndroid Build Coastguard Worker shr myd, 10 8112*c0909341SAndroid Build Coastguard Worker movq m4, [filterq+myq*8] ; a 8113*c0909341SAndroid Build Coastguard Worker lea myd, [tmpq+gammaq] 8114*c0909341SAndroid Build Coastguard Worker shr tmpd, 10 8115*c0909341SAndroid Build Coastguard Worker movq m2, [filterq+tmpq*8] ; b 8116*c0909341SAndroid Build Coastguard Worker lea tmpd, [myq+gammaq] 8117*c0909341SAndroid Build Coastguard Worker shr myd, 10 8118*c0909341SAndroid Build Coastguard Worker movq m3, [filterq+myq*8] ; c 8119*c0909341SAndroid Build Coastguard Worker lea myd, [tmpq+gammaq] 8120*c0909341SAndroid Build Coastguard Worker shr tmpd, 10 8121*c0909341SAndroid Build Coastguard Worker movq m1, [filterq+tmpq*8] ; d 8122*c0909341SAndroid Build Coastguard Worker lea tmpd, [myq+gammaq] 8123*c0909341SAndroid Build Coastguard Worker shr myd, 10 8124*c0909341SAndroid Build Coastguard Worker punpcklwd m4, m2 8125*c0909341SAndroid Build Coastguard Worker punpcklwd m3, m1 8126*c0909341SAndroid Build Coastguard Worker punpckldq m2, m4, m3 8127*c0909341SAndroid Build Coastguard Worker punpckhdq m4, m3 8128*c0909341SAndroid Build Coastguard Worker punpcklbw m1, m6, m2 ; a0 a1 b0 b1 c0 c1 d0 d1 << 8 8129*c0909341SAndroid Build Coastguard Worker pmaddwd m1, [rsp+gprsize+16*%1] 8130*c0909341SAndroid Build Coastguard Worker punpckhbw m3, m6, m2 ; a2 a3 b2 b3 c2 c3 d2 d3 << 8 8131*c0909341SAndroid Build Coastguard Worker mova m2, [rsp+gprsize+16*%2] 8132*c0909341SAndroid Build Coastguard Worker pmaddwd m3, m2 8133*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*%1], m2 8134*c0909341SAndroid Build Coastguard Worker paddd m1, m3 8135*c0909341SAndroid Build Coastguard Worker punpcklbw m3, m6, m4 ; a4 a5 b4 b5 c4 c5 d4 d5 << 8 8136*c0909341SAndroid Build Coastguard Worker mova m2, [rsp+gprsize+16*%3] 8137*c0909341SAndroid Build Coastguard Worker pmaddwd m3, m2 8138*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*%2], m2 8139*c0909341SAndroid Build Coastguard Worker paddd m1, m3 8140*c0909341SAndroid Build Coastguard Worker punpcklwd m3, m5, m0 ; 67 8141*c0909341SAndroid Build Coastguard Worker punpckhbw m2, m6, m4 ; a6 a7 b6 b7 c6 c7 d6 d7 << 8 8142*c0909341SAndroid Build Coastguard Worker pmaddwd m2, m3 8143*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*%3], m3 8144*c0909341SAndroid Build Coastguard Worker paddd m1, m2 8145*c0909341SAndroid Build Coastguard Worker movq m4, [filterq+myq*8] ; e 8146*c0909341SAndroid Build Coastguard Worker lea myd, [tmpq+gammaq] 8147*c0909341SAndroid Build Coastguard Worker shr tmpd, 10 8148*c0909341SAndroid Build Coastguard Worker movq m3, [filterq+tmpq*8] ; f 8149*c0909341SAndroid Build Coastguard Worker lea tmpd, [myq+gammaq] 8150*c0909341SAndroid Build Coastguard Worker shr myd, 10 8151*c0909341SAndroid Build Coastguard Worker movq m2, [filterq+myq*8] ; g 8152*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 8153*c0909341SAndroid Build Coastguard Worker lea myd, [tmpq+deltaq] ; my += delta 8154*c0909341SAndroid Build Coastguard Worker%else 8155*c0909341SAndroid Build Coastguard Worker mov myd, [esp+gprsize+4*61] 8156*c0909341SAndroid Build Coastguard Worker add myd, tmpd 8157*c0909341SAndroid Build Coastguard Worker%endif 8158*c0909341SAndroid Build Coastguard Worker shr tmpd, 10 8159*c0909341SAndroid Build Coastguard Worker punpcklwd m4, m3 8160*c0909341SAndroid Build Coastguard Worker movq m3, [filterq+tmpq*8] ; h 8161*c0909341SAndroid Build Coastguard Worker punpcklwd m2, m3 8162*c0909341SAndroid Build Coastguard Worker punpckldq m3, m4, m2 8163*c0909341SAndroid Build Coastguard Worker punpckhdq m4, m2 8164*c0909341SAndroid Build Coastguard Worker punpcklbw m2, m6, m3 ; e0 e1 f0 f1 g0 g1 h0 h1 << 8 8165*c0909341SAndroid Build Coastguard Worker pmaddwd m2, [rsp+gprsize+16*%4] 8166*c0909341SAndroid Build Coastguard Worker punpckhbw m6, m3 ; e2 e3 f2 f3 g2 g3 h2 h3 << 8 8167*c0909341SAndroid Build Coastguard Worker mova m3, [rsp+gprsize+16*%5] 8168*c0909341SAndroid Build Coastguard Worker pmaddwd m6, m3 8169*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*%4], m3 8170*c0909341SAndroid Build Coastguard Worker pxor m3, m3 8171*c0909341SAndroid Build Coastguard Worker paddd m2, m6 8172*c0909341SAndroid Build Coastguard Worker punpcklbw m3, m4 ; e4 e5 f4 f5 g4 g5 h4 h5 << 8 8173*c0909341SAndroid Build Coastguard Worker mova m6, [rsp+gprsize+16*%6] 8174*c0909341SAndroid Build Coastguard Worker pmaddwd m3, m6 8175*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*%5], m6 8176*c0909341SAndroid Build Coastguard Worker punpckhwd m5, m0 8177*c0909341SAndroid Build Coastguard Worker pxor m6, m6 8178*c0909341SAndroid Build Coastguard Worker paddd m2, m3 8179*c0909341SAndroid Build Coastguard Worker punpckhbw m3, m6, m4 ; e6 e7 f6 f7 g6 g7 h6 h7 << 8 8180*c0909341SAndroid Build Coastguard Worker pmaddwd m3, m5 8181*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+16*%6], m5 8182*c0909341SAndroid Build Coastguard Worker mova m5, m0 8183*c0909341SAndroid Build Coastguard Worker paddd m2, m3 8184*c0909341SAndroid Build Coastguard Worker%endmacro 8185*c0909341SAndroid Build Coastguard Worker WARP_V 1, 2, 3, 4, 5, 6 8186*c0909341SAndroid Build Coastguard Worker ret 8187*c0909341SAndroid Build Coastguard Worker.main3: 8188*c0909341SAndroid Build Coastguard Worker call .h 8189*c0909341SAndroid Build Coastguard Worker WARP_V 7, 8, 9, 10, 11, 12 8190*c0909341SAndroid Build Coastguard Worker ret 8191*c0909341SAndroid Build Coastguard WorkerALIGN function_align 8192*c0909341SAndroid Build Coastguard Worker.h: 8193*c0909341SAndroid Build Coastguard Worker lea tmpd, [mxq+alphaq] 8194*c0909341SAndroid Build Coastguard Worker shr mxd, 10 8195*c0909341SAndroid Build Coastguard Worker movq m3, [filterq+mxq*8] 8196*c0909341SAndroid Build Coastguard Worker punpcklbw m0, m6, m3 8197*c0909341SAndroid Build Coastguard Worker movu m3, [srcq-6] 8198*c0909341SAndroid Build Coastguard Worker pmaddwd m0, m3 ; 0 8199*c0909341SAndroid Build Coastguard Worker lea mxd, [tmpq+alphaq] 8200*c0909341SAndroid Build Coastguard Worker shr tmpd, 10 8201*c0909341SAndroid Build Coastguard Worker movq m3, [filterq+tmpq*8] 8202*c0909341SAndroid Build Coastguard Worker punpcklbw m2, m6, m3 8203*c0909341SAndroid Build Coastguard Worker movu m3, [srcq-4] 8204*c0909341SAndroid Build Coastguard Worker pmaddwd m2, m3 ; 1 8205*c0909341SAndroid Build Coastguard Worker lea tmpd, [mxq+alphaq] 8206*c0909341SAndroid Build Coastguard Worker shr mxd, 10 8207*c0909341SAndroid Build Coastguard Worker movq m3, [filterq+mxq*8] 8208*c0909341SAndroid Build Coastguard Worker phaddd m0, m2 ; 0 1 8209*c0909341SAndroid Build Coastguard Worker punpcklbw m2, m6, m3 8210*c0909341SAndroid Build Coastguard Worker movu m3, [srcq-2] 8211*c0909341SAndroid Build Coastguard Worker pmaddwd m2, m3 ; 2 8212*c0909341SAndroid Build Coastguard Worker lea mxd, [tmpq+alphaq] 8213*c0909341SAndroid Build Coastguard Worker shr tmpd, 10 8214*c0909341SAndroid Build Coastguard Worker movq m3, [filterq+tmpq*8] 8215*c0909341SAndroid Build Coastguard Worker punpcklbw m1, m6, m3 8216*c0909341SAndroid Build Coastguard Worker movu m3, [srcq+0] 8217*c0909341SAndroid Build Coastguard Worker pmaddwd m1, m3 ; 3 8218*c0909341SAndroid Build Coastguard Worker lea tmpd, [mxq+alphaq] 8219*c0909341SAndroid Build Coastguard Worker shr mxd, 10 8220*c0909341SAndroid Build Coastguard Worker movq m3, [filterq+mxq*8] 8221*c0909341SAndroid Build Coastguard Worker phaddd m2, m1 ; 2 3 8222*c0909341SAndroid Build Coastguard Worker punpcklbw m1, m6, m3 8223*c0909341SAndroid Build Coastguard Worker movu m3, [srcq+2] 8224*c0909341SAndroid Build Coastguard Worker pmaddwd m1, m3 ; 4 8225*c0909341SAndroid Build Coastguard Worker lea mxd, [tmpq+alphaq] 8226*c0909341SAndroid Build Coastguard Worker shr tmpd, 10 8227*c0909341SAndroid Build Coastguard Worker movq m3, [filterq+tmpq*8] 8228*c0909341SAndroid Build Coastguard Worker phaddd m0, m2 ; 0 1 2 3 8229*c0909341SAndroid Build Coastguard Worker punpcklbw m2, m6, m3 8230*c0909341SAndroid Build Coastguard Worker movu m3, [srcq+4] 8231*c0909341SAndroid Build Coastguard Worker pmaddwd m2, m3 ; 5 8232*c0909341SAndroid Build Coastguard Worker lea tmpd, [mxq+alphaq] 8233*c0909341SAndroid Build Coastguard Worker shr mxd, 10 8234*c0909341SAndroid Build Coastguard Worker movq m3, [filterq+mxq*8] 8235*c0909341SAndroid Build Coastguard Worker phaddd m1, m2 ; 4 5 8236*c0909341SAndroid Build Coastguard Worker punpcklbw m2, m6, m3 8237*c0909341SAndroid Build Coastguard Worker movu m3, [srcq+6] 8238*c0909341SAndroid Build Coastguard Worker pmaddwd m2, m3 ; 6 8239*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 8240*c0909341SAndroid Build Coastguard Worker lea mxd, [tmpq+betaq] ; mx += beta 8241*c0909341SAndroid Build Coastguard Worker%else 8242*c0909341SAndroid Build Coastguard Worker mov mxd, [esp+gprsize*2+4*60] 8243*c0909341SAndroid Build Coastguard Worker add mxd, tmpd 8244*c0909341SAndroid Build Coastguard Worker%endif 8245*c0909341SAndroid Build Coastguard Worker shr tmpd, 10 8246*c0909341SAndroid Build Coastguard Worker movq m3, [filterq+tmpq*8] 8247*c0909341SAndroid Build Coastguard Worker punpcklbw m4, m6, m3 8248*c0909341SAndroid Build Coastguard Worker movu m3, [srcq+8] 8249*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 8250*c0909341SAndroid Build Coastguard Worker add srcq, ssq 8251*c0909341SAndroid Build Coastguard Worker%else 8252*c0909341SAndroid Build Coastguard Worker add srcq, [esp+gprsize*2+4*62] 8253*c0909341SAndroid Build Coastguard Worker%endif 8254*c0909341SAndroid Build Coastguard Worker pmaddwd m3, m4 ; 7 8255*c0909341SAndroid Build Coastguard Worker phaddd m2, m3 ; 6 7 8256*c0909341SAndroid Build Coastguard Worker phaddd m1, m2 ; 4 5 6 7 8257*c0909341SAndroid Build Coastguard Worker paddd m0, m7 8258*c0909341SAndroid Build Coastguard Worker paddd m1, m7 8259*c0909341SAndroid Build Coastguard Worker psrad m0, [rsp+gprsize*2] 8260*c0909341SAndroid Build Coastguard Worker psrad m1, [rsp+gprsize*2] 8261*c0909341SAndroid Build Coastguard Worker packssdw m0, m1 8262*c0909341SAndroid Build Coastguard Worker ret 8263*c0909341SAndroid Build Coastguard Worker 8264*c0909341SAndroid Build Coastguard Worker%macro BIDIR_FN 0 8265*c0909341SAndroid Build Coastguard Worker call .main 8266*c0909341SAndroid Build Coastguard Worker jmp wq 8267*c0909341SAndroid Build Coastguard Worker.w4_loop: 8268*c0909341SAndroid Build Coastguard Worker call .main 8269*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+strideq*2] 8270*c0909341SAndroid Build Coastguard Worker.w4: 8271*c0909341SAndroid Build Coastguard Worker movq [dstq+strideq*0], m0 8272*c0909341SAndroid Build Coastguard Worker movhps [dstq+strideq*1], m0 8273*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+strideq*2] 8274*c0909341SAndroid Build Coastguard Worker movq [dstq+strideq*0], m1 8275*c0909341SAndroid Build Coastguard Worker movhps [dstq+strideq*1], m1 8276*c0909341SAndroid Build Coastguard Worker sub hd, 4 8277*c0909341SAndroid Build Coastguard Worker jg .w4_loop 8278*c0909341SAndroid Build Coastguard Worker.ret: 8279*c0909341SAndroid Build Coastguard Worker RET 8280*c0909341SAndroid Build Coastguard Worker.w8_loop: 8281*c0909341SAndroid Build Coastguard Worker call .main 8282*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+strideq*2] 8283*c0909341SAndroid Build Coastguard Worker.w8: 8284*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*0], m0 8285*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*1], m1 8286*c0909341SAndroid Build Coastguard Worker sub hd, 2 8287*c0909341SAndroid Build Coastguard Worker jne .w8_loop 8288*c0909341SAndroid Build Coastguard Worker RET 8289*c0909341SAndroid Build Coastguard Worker.w16_loop: 8290*c0909341SAndroid Build Coastguard Worker call .main 8291*c0909341SAndroid Build Coastguard Worker add dstq, strideq 8292*c0909341SAndroid Build Coastguard Worker.w16: 8293*c0909341SAndroid Build Coastguard Worker mova [dstq+16*0], m0 8294*c0909341SAndroid Build Coastguard Worker mova [dstq+16*1], m1 8295*c0909341SAndroid Build Coastguard Worker dec hd 8296*c0909341SAndroid Build Coastguard Worker jg .w16_loop 8297*c0909341SAndroid Build Coastguard Worker RET 8298*c0909341SAndroid Build Coastguard Worker.w32_loop: 8299*c0909341SAndroid Build Coastguard Worker call .main 8300*c0909341SAndroid Build Coastguard Worker add dstq, strideq 8301*c0909341SAndroid Build Coastguard Worker.w32: 8302*c0909341SAndroid Build Coastguard Worker mova [dstq+16*0], m0 8303*c0909341SAndroid Build Coastguard Worker mova [dstq+16*1], m1 8304*c0909341SAndroid Build Coastguard Worker call .main 8305*c0909341SAndroid Build Coastguard Worker mova [dstq+16*2], m0 8306*c0909341SAndroid Build Coastguard Worker mova [dstq+16*3], m1 8307*c0909341SAndroid Build Coastguard Worker dec hd 8308*c0909341SAndroid Build Coastguard Worker jg .w32_loop 8309*c0909341SAndroid Build Coastguard Worker RET 8310*c0909341SAndroid Build Coastguard Worker.w64_loop: 8311*c0909341SAndroid Build Coastguard Worker call .main 8312*c0909341SAndroid Build Coastguard Worker add dstq, strideq 8313*c0909341SAndroid Build Coastguard Worker.w64: 8314*c0909341SAndroid Build Coastguard Worker mova [dstq+16*0], m0 8315*c0909341SAndroid Build Coastguard Worker mova [dstq+16*1], m1 8316*c0909341SAndroid Build Coastguard Worker call .main 8317*c0909341SAndroid Build Coastguard Worker mova [dstq+16*2], m0 8318*c0909341SAndroid Build Coastguard Worker mova [dstq+16*3], m1 8319*c0909341SAndroid Build Coastguard Worker call .main 8320*c0909341SAndroid Build Coastguard Worker mova [dstq+16*4], m0 8321*c0909341SAndroid Build Coastguard Worker mova [dstq+16*5], m1 8322*c0909341SAndroid Build Coastguard Worker call .main 8323*c0909341SAndroid Build Coastguard Worker mova [dstq+16*6], m0 8324*c0909341SAndroid Build Coastguard Worker mova [dstq+16*7], m1 8325*c0909341SAndroid Build Coastguard Worker dec hd 8326*c0909341SAndroid Build Coastguard Worker jg .w64_loop 8327*c0909341SAndroid Build Coastguard Worker RET 8328*c0909341SAndroid Build Coastguard Worker.w128_loop: 8329*c0909341SAndroid Build Coastguard Worker call .main 8330*c0909341SAndroid Build Coastguard Worker add dstq, strideq 8331*c0909341SAndroid Build Coastguard Worker.w128: 8332*c0909341SAndroid Build Coastguard Worker mova [dstq+16* 0], m0 8333*c0909341SAndroid Build Coastguard Worker mova [dstq+16* 1], m1 8334*c0909341SAndroid Build Coastguard Worker call .main 8335*c0909341SAndroid Build Coastguard Worker mova [dstq+16* 2], m0 8336*c0909341SAndroid Build Coastguard Worker mova [dstq+16* 3], m1 8337*c0909341SAndroid Build Coastguard Worker call .main 8338*c0909341SAndroid Build Coastguard Worker mova [dstq+16* 4], m0 8339*c0909341SAndroid Build Coastguard Worker mova [dstq+16* 5], m1 8340*c0909341SAndroid Build Coastguard Worker call .main 8341*c0909341SAndroid Build Coastguard Worker mova [dstq+16* 6], m0 8342*c0909341SAndroid Build Coastguard Worker mova [dstq+16* 7], m1 8343*c0909341SAndroid Build Coastguard Worker call .main 8344*c0909341SAndroid Build Coastguard Worker mova [dstq+16* 8], m0 8345*c0909341SAndroid Build Coastguard Worker mova [dstq+16* 9], m1 8346*c0909341SAndroid Build Coastguard Worker call .main 8347*c0909341SAndroid Build Coastguard Worker mova [dstq+16*10], m0 8348*c0909341SAndroid Build Coastguard Worker mova [dstq+16*11], m1 8349*c0909341SAndroid Build Coastguard Worker call .main 8350*c0909341SAndroid Build Coastguard Worker mova [dstq+16*12], m0 8351*c0909341SAndroid Build Coastguard Worker mova [dstq+16*13], m1 8352*c0909341SAndroid Build Coastguard Worker call .main 8353*c0909341SAndroid Build Coastguard Worker mova [dstq+16*14], m0 8354*c0909341SAndroid Build Coastguard Worker mova [dstq+16*15], m1 8355*c0909341SAndroid Build Coastguard Worker dec hd 8356*c0909341SAndroid Build Coastguard Worker jg .w128_loop 8357*c0909341SAndroid Build Coastguard Worker RET 8358*c0909341SAndroid Build Coastguard Worker%endmacro 8359*c0909341SAndroid Build Coastguard Worker 8360*c0909341SAndroid Build Coastguard Worker%if UNIX64 8361*c0909341SAndroid Build Coastguard WorkerDECLARE_REG_TMP 7 8362*c0909341SAndroid Build Coastguard Worker%else 8363*c0909341SAndroid Build Coastguard WorkerDECLARE_REG_TMP 5 8364*c0909341SAndroid Build Coastguard Worker%endif 8365*c0909341SAndroid Build Coastguard Worker 8366*c0909341SAndroid Build Coastguard Workercglobal avg_16bpc, 4, 7, 4, dst, stride, tmp1, tmp2, w, h 8367*c0909341SAndroid Build Coastguard Worker%define base r6-avg_ssse3_table 8368*c0909341SAndroid Build Coastguard Worker LEA r6, avg_ssse3_table 8369*c0909341SAndroid Build Coastguard Worker tzcnt wd, wm 8370*c0909341SAndroid Build Coastguard Worker mov t0d, r6m ; pixel_max 8371*c0909341SAndroid Build Coastguard Worker movsxd wq, [r6+wq*4] 8372*c0909341SAndroid Build Coastguard Worker shr t0d, 11 8373*c0909341SAndroid Build Coastguard Worker movddup m2, [base+bidir_rnd+t0*8] 8374*c0909341SAndroid Build Coastguard Worker movddup m3, [base+bidir_mul+t0*8] 8375*c0909341SAndroid Build Coastguard Worker movifnidn hd, hm 8376*c0909341SAndroid Build Coastguard Worker add wq, r6 8377*c0909341SAndroid Build Coastguard Worker BIDIR_FN 8378*c0909341SAndroid Build Coastguard WorkerALIGN function_align 8379*c0909341SAndroid Build Coastguard Worker.main: 8380*c0909341SAndroid Build Coastguard Worker mova m0, [tmp1q+16*0] 8381*c0909341SAndroid Build Coastguard Worker paddsw m0, [tmp2q+16*0] 8382*c0909341SAndroid Build Coastguard Worker mova m1, [tmp1q+16*1] 8383*c0909341SAndroid Build Coastguard Worker paddsw m1, [tmp2q+16*1] 8384*c0909341SAndroid Build Coastguard Worker add tmp1q, 16*2 8385*c0909341SAndroid Build Coastguard Worker add tmp2q, 16*2 8386*c0909341SAndroid Build Coastguard Worker pmaxsw m0, m2 8387*c0909341SAndroid Build Coastguard Worker pmaxsw m1, m2 8388*c0909341SAndroid Build Coastguard Worker psubsw m0, m2 8389*c0909341SAndroid Build Coastguard Worker psubsw m1, m2 8390*c0909341SAndroid Build Coastguard Worker pmulhw m0, m3 8391*c0909341SAndroid Build Coastguard Worker pmulhw m1, m3 8392*c0909341SAndroid Build Coastguard Worker ret 8393*c0909341SAndroid Build Coastguard Worker 8394*c0909341SAndroid Build Coastguard Workercglobal w_avg_16bpc, 4, 7, 8, dst, stride, tmp1, tmp2, w, h 8395*c0909341SAndroid Build Coastguard Worker%define base r6-w_avg_ssse3_table 8396*c0909341SAndroid Build Coastguard Worker LEA r6, w_avg_ssse3_table 8397*c0909341SAndroid Build Coastguard Worker tzcnt wd, wm 8398*c0909341SAndroid Build Coastguard Worker mov t0d, r6m ; weight 8399*c0909341SAndroid Build Coastguard Worker movd m6, r7m ; pixel_max 8400*c0909341SAndroid Build Coastguard Worker movddup m5, [base+pd_65538] 8401*c0909341SAndroid Build Coastguard Worker movsxd wq, [r6+wq*4] 8402*c0909341SAndroid Build Coastguard Worker pshufb m6, [base+pw_256] 8403*c0909341SAndroid Build Coastguard Worker add wq, r6 8404*c0909341SAndroid Build Coastguard Worker lea r6d, [t0-16] 8405*c0909341SAndroid Build Coastguard Worker shl t0d, 16 8406*c0909341SAndroid Build Coastguard Worker sub t0d, r6d ; 16-weight, weight 8407*c0909341SAndroid Build Coastguard Worker paddw m5, m6 8408*c0909341SAndroid Build Coastguard Worker mov r6d, t0d 8409*c0909341SAndroid Build Coastguard Worker shl t0d, 2 8410*c0909341SAndroid Build Coastguard Worker test dword r7m, 0x800 8411*c0909341SAndroid Build Coastguard Worker cmovnz r6d, t0d 8412*c0909341SAndroid Build Coastguard Worker movifnidn hd, hm 8413*c0909341SAndroid Build Coastguard Worker movd m4, r6d 8414*c0909341SAndroid Build Coastguard Worker pslld m5, 7 8415*c0909341SAndroid Build Coastguard Worker pxor m7, m7 8416*c0909341SAndroid Build Coastguard Worker pshufd m4, m4, q0000 8417*c0909341SAndroid Build Coastguard Worker BIDIR_FN 8418*c0909341SAndroid Build Coastguard WorkerALIGN function_align 8419*c0909341SAndroid Build Coastguard Worker.main: 8420*c0909341SAndroid Build Coastguard Worker mova m2, [tmp1q+16*0] 8421*c0909341SAndroid Build Coastguard Worker mova m0, [tmp2q+16*0] 8422*c0909341SAndroid Build Coastguard Worker punpckhwd m3, m0, m2 8423*c0909341SAndroid Build Coastguard Worker punpcklwd m0, m2 8424*c0909341SAndroid Build Coastguard Worker mova m2, [tmp1q+16*1] 8425*c0909341SAndroid Build Coastguard Worker mova m1, [tmp2q+16*1] 8426*c0909341SAndroid Build Coastguard Worker add tmp1q, 16*2 8427*c0909341SAndroid Build Coastguard Worker add tmp2q, 16*2 8428*c0909341SAndroid Build Coastguard Worker pmaddwd m3, m4 8429*c0909341SAndroid Build Coastguard Worker pmaddwd m0, m4 8430*c0909341SAndroid Build Coastguard Worker paddd m3, m5 8431*c0909341SAndroid Build Coastguard Worker paddd m0, m5 8432*c0909341SAndroid Build Coastguard Worker psrad m3, 8 8433*c0909341SAndroid Build Coastguard Worker psrad m0, 8 8434*c0909341SAndroid Build Coastguard Worker packssdw m0, m3 8435*c0909341SAndroid Build Coastguard Worker punpckhwd m3, m1, m2 8436*c0909341SAndroid Build Coastguard Worker punpcklwd m1, m2 8437*c0909341SAndroid Build Coastguard Worker pmaddwd m3, m4 8438*c0909341SAndroid Build Coastguard Worker pmaddwd m1, m4 8439*c0909341SAndroid Build Coastguard Worker paddd m3, m5 8440*c0909341SAndroid Build Coastguard Worker paddd m1, m5 8441*c0909341SAndroid Build Coastguard Worker psrad m3, 8 8442*c0909341SAndroid Build Coastguard Worker psrad m1, 8 8443*c0909341SAndroid Build Coastguard Worker packssdw m1, m3 8444*c0909341SAndroid Build Coastguard Worker pminsw m0, m6 8445*c0909341SAndroid Build Coastguard Worker pminsw m1, m6 8446*c0909341SAndroid Build Coastguard Worker pmaxsw m0, m7 8447*c0909341SAndroid Build Coastguard Worker pmaxsw m1, m7 8448*c0909341SAndroid Build Coastguard Worker ret 8449*c0909341SAndroid Build Coastguard Worker 8450*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 8451*c0909341SAndroid Build Coastguard Workercglobal mask_16bpc, 4, 7, 9, dst, stride, tmp1, tmp2, w, h, mask 8452*c0909341SAndroid Build Coastguard Worker%else 8453*c0909341SAndroid Build Coastguard Workercglobal mask_16bpc, 4, 7, 8, dst, stride, tmp1, tmp2, w, mask 8454*c0909341SAndroid Build Coastguard Worker%define hd dword r5m 8455*c0909341SAndroid Build Coastguard Worker%define m8 [base+pw_64] 8456*c0909341SAndroid Build Coastguard Worker%endif 8457*c0909341SAndroid Build Coastguard Worker%define base r6-mask_ssse3_table 8458*c0909341SAndroid Build Coastguard Worker LEA r6, mask_ssse3_table 8459*c0909341SAndroid Build Coastguard Worker tzcnt wd, wm 8460*c0909341SAndroid Build Coastguard Worker mov t0d, r7m ; pixel_max 8461*c0909341SAndroid Build Coastguard Worker shr t0d, 11 8462*c0909341SAndroid Build Coastguard Worker movsxd wq, [r6+wq*4] 8463*c0909341SAndroid Build Coastguard Worker movddup m6, [base+bidir_rnd+t0*8] 8464*c0909341SAndroid Build Coastguard Worker movddup m7, [base+bidir_mul+t0*8] 8465*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 8466*c0909341SAndroid Build Coastguard Worker mova m8, [base+pw_64] 8467*c0909341SAndroid Build Coastguard Worker movifnidn hd, hm 8468*c0909341SAndroid Build Coastguard Worker%endif 8469*c0909341SAndroid Build Coastguard Worker add wq, r6 8470*c0909341SAndroid Build Coastguard Worker mov maskq, r6mp 8471*c0909341SAndroid Build Coastguard Worker BIDIR_FN 8472*c0909341SAndroid Build Coastguard WorkerALIGN function_align 8473*c0909341SAndroid Build Coastguard Worker.main: 8474*c0909341SAndroid Build Coastguard Worker movq m3, [maskq+8*0] 8475*c0909341SAndroid Build Coastguard Worker mova m0, [tmp1q+16*0] 8476*c0909341SAndroid Build Coastguard Worker mova m4, [tmp2q+16*0] 8477*c0909341SAndroid Build Coastguard Worker pxor m5, m5 8478*c0909341SAndroid Build Coastguard Worker punpcklbw m3, m5 8479*c0909341SAndroid Build Coastguard Worker punpckhwd m2, m0, m4 8480*c0909341SAndroid Build Coastguard Worker punpcklwd m0, m4 8481*c0909341SAndroid Build Coastguard Worker psubw m1, m8, m3 8482*c0909341SAndroid Build Coastguard Worker punpckhwd m4, m3, m1 ; m, 64-m 8483*c0909341SAndroid Build Coastguard Worker punpcklwd m3, m1 8484*c0909341SAndroid Build Coastguard Worker pmaddwd m2, m4 ; tmp1 * m + tmp2 * (64-m) 8485*c0909341SAndroid Build Coastguard Worker pmaddwd m0, m3 8486*c0909341SAndroid Build Coastguard Worker movq m3, [maskq+8*1] 8487*c0909341SAndroid Build Coastguard Worker mova m1, [tmp1q+16*1] 8488*c0909341SAndroid Build Coastguard Worker mova m4, [tmp2q+16*1] 8489*c0909341SAndroid Build Coastguard Worker add maskq, 8*2 8490*c0909341SAndroid Build Coastguard Worker add tmp1q, 16*2 8491*c0909341SAndroid Build Coastguard Worker add tmp2q, 16*2 8492*c0909341SAndroid Build Coastguard Worker psrad m2, 5 8493*c0909341SAndroid Build Coastguard Worker psrad m0, 5 8494*c0909341SAndroid Build Coastguard Worker packssdw m0, m2 8495*c0909341SAndroid Build Coastguard Worker punpcklbw m3, m5 8496*c0909341SAndroid Build Coastguard Worker punpckhwd m2, m1, m4 8497*c0909341SAndroid Build Coastguard Worker punpcklwd m1, m4 8498*c0909341SAndroid Build Coastguard Worker psubw m5, m8, m3 8499*c0909341SAndroid Build Coastguard Worker punpckhwd m4, m3, m5 ; m, 64-m 8500*c0909341SAndroid Build Coastguard Worker punpcklwd m3, m5 8501*c0909341SAndroid Build Coastguard Worker pmaddwd m2, m4 ; tmp1 * m + tmp2 * (64-m) 8502*c0909341SAndroid Build Coastguard Worker pmaddwd m1, m3 8503*c0909341SAndroid Build Coastguard Worker psrad m2, 5 8504*c0909341SAndroid Build Coastguard Worker psrad m1, 5 8505*c0909341SAndroid Build Coastguard Worker packssdw m1, m2 8506*c0909341SAndroid Build Coastguard Worker pmaxsw m0, m6 8507*c0909341SAndroid Build Coastguard Worker pmaxsw m1, m6 8508*c0909341SAndroid Build Coastguard Worker psubsw m0, m6 8509*c0909341SAndroid Build Coastguard Worker psubsw m1, m6 8510*c0909341SAndroid Build Coastguard Worker pmulhw m0, m7 8511*c0909341SAndroid Build Coastguard Worker pmulhw m1, m7 8512*c0909341SAndroid Build Coastguard Worker ret 8513*c0909341SAndroid Build Coastguard Worker 8514*c0909341SAndroid Build Coastguard Workercglobal w_mask_420_16bpc, 4, 7, 12, dst, stride, tmp1, tmp2, w, h, mask 8515*c0909341SAndroid Build Coastguard Worker%define base t0-w_mask_420_ssse3_table 8516*c0909341SAndroid Build Coastguard Worker LEA t0, w_mask_420_ssse3_table 8517*c0909341SAndroid Build Coastguard Worker tzcnt wd, wm 8518*c0909341SAndroid Build Coastguard Worker mov r6d, r8m ; pixel_max 8519*c0909341SAndroid Build Coastguard Worker movd m0, r7m ; sign 8520*c0909341SAndroid Build Coastguard Worker shr r6d, 11 8521*c0909341SAndroid Build Coastguard Worker movsxd wq, [t0+wq*4] 8522*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 8523*c0909341SAndroid Build Coastguard Worker mova m8, [base+pw_27615] ; ((64 - 38) << 10) + 1023 - 32 8524*c0909341SAndroid Build Coastguard Worker mova m9, [base+pw_64] 8525*c0909341SAndroid Build Coastguard Worker movddup m10, [base+bidir_rnd+r6*8] 8526*c0909341SAndroid Build Coastguard Worker movddup m11, [base+bidir_mul+r6*8] 8527*c0909341SAndroid Build Coastguard Worker%else 8528*c0909341SAndroid Build Coastguard Worker mova m1, [base+pw_27615] ; ((64 - 38) << 10) + 1023 - 32 8529*c0909341SAndroid Build Coastguard Worker mova m2, [base+pw_64] 8530*c0909341SAndroid Build Coastguard Worker movddup m3, [base+bidir_rnd+r6*8] 8531*c0909341SAndroid Build Coastguard Worker movddup m4, [base+bidir_mul+r6*8] 8532*c0909341SAndroid Build Coastguard Worker ALLOC_STACK -16*4 8533*c0909341SAndroid Build Coastguard Worker mova [rsp+16*0], m1 8534*c0909341SAndroid Build Coastguard Worker mova [rsp+16*1], m2 8535*c0909341SAndroid Build Coastguard Worker mova [rsp+16*2], m3 8536*c0909341SAndroid Build Coastguard Worker mova [rsp+16*3], m4 8537*c0909341SAndroid Build Coastguard Worker %define m8 [rsp+gprsize+16*0] 8538*c0909341SAndroid Build Coastguard Worker %define m9 [rsp+gprsize+16*1] 8539*c0909341SAndroid Build Coastguard Worker %define m10 [rsp+gprsize+16*2] 8540*c0909341SAndroid Build Coastguard Worker %define m11 [rsp+gprsize+16*3] 8541*c0909341SAndroid Build Coastguard Worker%endif 8542*c0909341SAndroid Build Coastguard Worker movd m7, [base+pw_2] 8543*c0909341SAndroid Build Coastguard Worker psubw m7, m0 8544*c0909341SAndroid Build Coastguard Worker pshufb m7, [base+pw_256] 8545*c0909341SAndroid Build Coastguard Worker add wq, t0 8546*c0909341SAndroid Build Coastguard Worker movifnidn hd, r5m 8547*c0909341SAndroid Build Coastguard Worker mov maskq, r6mp 8548*c0909341SAndroid Build Coastguard Worker call .main 8549*c0909341SAndroid Build Coastguard Worker jmp wq 8550*c0909341SAndroid Build Coastguard Worker.w4_loop: 8551*c0909341SAndroid Build Coastguard Worker call .main 8552*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+strideq*2] 8553*c0909341SAndroid Build Coastguard Worker add maskq, 4 8554*c0909341SAndroid Build Coastguard Worker.w4: 8555*c0909341SAndroid Build Coastguard Worker movq [dstq+strideq*0], m0 8556*c0909341SAndroid Build Coastguard Worker phaddw m2, m3 8557*c0909341SAndroid Build Coastguard Worker movhps [dstq+strideq*1], m0 8558*c0909341SAndroid Build Coastguard Worker phaddd m2, m2 8559*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+strideq*2] 8560*c0909341SAndroid Build Coastguard Worker paddw m2, m7 8561*c0909341SAndroid Build Coastguard Worker movq [dstq+strideq*0], m1 8562*c0909341SAndroid Build Coastguard Worker psrlw m2, 2 8563*c0909341SAndroid Build Coastguard Worker movhps [dstq+strideq*1], m1 8564*c0909341SAndroid Build Coastguard Worker packuswb m2, m2 8565*c0909341SAndroid Build Coastguard Worker movd [maskq], m2 8566*c0909341SAndroid Build Coastguard Worker sub hd, 4 8567*c0909341SAndroid Build Coastguard Worker jg .w4_loop 8568*c0909341SAndroid Build Coastguard Worker RET 8569*c0909341SAndroid Build Coastguard Worker.w8_loop: 8570*c0909341SAndroid Build Coastguard Worker call .main 8571*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+strideq*2] 8572*c0909341SAndroid Build Coastguard Worker add maskq, 4 8573*c0909341SAndroid Build Coastguard Worker.w8: 8574*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*0], m0 8575*c0909341SAndroid Build Coastguard Worker paddw m2, m3 8576*c0909341SAndroid Build Coastguard Worker phaddw m2, m2 8577*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*1], m1 8578*c0909341SAndroid Build Coastguard Worker paddw m2, m7 8579*c0909341SAndroid Build Coastguard Worker psrlw m2, 2 8580*c0909341SAndroid Build Coastguard Worker packuswb m2, m2 8581*c0909341SAndroid Build Coastguard Worker movd [maskq], m2 8582*c0909341SAndroid Build Coastguard Worker sub hd, 2 8583*c0909341SAndroid Build Coastguard Worker jg .w8_loop 8584*c0909341SAndroid Build Coastguard Worker RET 8585*c0909341SAndroid Build Coastguard Worker.w16_loop: 8586*c0909341SAndroid Build Coastguard Worker call .main 8587*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+strideq*2] 8588*c0909341SAndroid Build Coastguard Worker add maskq, 8 8589*c0909341SAndroid Build Coastguard Worker.w16: 8590*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*1+16*0], m2 8591*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*0+16*0], m0 8592*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*1+16*1], m3 8593*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*0+16*1], m1 8594*c0909341SAndroid Build Coastguard Worker call .main 8595*c0909341SAndroid Build Coastguard Worker paddw m2, [dstq+strideq*1+16*0] 8596*c0909341SAndroid Build Coastguard Worker paddw m3, [dstq+strideq*1+16*1] 8597*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*1+16*0], m0 8598*c0909341SAndroid Build Coastguard Worker phaddw m2, m3 8599*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*1+16*1], m1 8600*c0909341SAndroid Build Coastguard Worker paddw m2, m7 8601*c0909341SAndroid Build Coastguard Worker psrlw m2, 2 8602*c0909341SAndroid Build Coastguard Worker packuswb m2, m2 8603*c0909341SAndroid Build Coastguard Worker movq [maskq], m2 8604*c0909341SAndroid Build Coastguard Worker sub hd, 2 8605*c0909341SAndroid Build Coastguard Worker jg .w16_loop 8606*c0909341SAndroid Build Coastguard Worker RET 8607*c0909341SAndroid Build Coastguard Worker.w32_loop: 8608*c0909341SAndroid Build Coastguard Worker call .main 8609*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+strideq*2] 8610*c0909341SAndroid Build Coastguard Worker add maskq, 16 8611*c0909341SAndroid Build Coastguard Worker.w32: 8612*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*1+16*0], m2 8613*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*0+16*0], m0 8614*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*1+16*1], m3 8615*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*0+16*1], m1 8616*c0909341SAndroid Build Coastguard Worker call .main 8617*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*0+16*2], m0 8618*c0909341SAndroid Build Coastguard Worker phaddw m2, m3 8619*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*1+16*3], m2 8620*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*0+16*3], m1 8621*c0909341SAndroid Build Coastguard Worker call .main 8622*c0909341SAndroid Build Coastguard Worker paddw m2, [dstq+strideq*1+16*0] 8623*c0909341SAndroid Build Coastguard Worker paddw m3, [dstq+strideq*1+16*1] 8624*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*1+16*0], m0 8625*c0909341SAndroid Build Coastguard Worker phaddw m2, m3 8626*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*1+16*2], m2 8627*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*1+16*1], m1 8628*c0909341SAndroid Build Coastguard Worker call .main 8629*c0909341SAndroid Build Coastguard Worker phaddw m2, m3 8630*c0909341SAndroid Build Coastguard Worker paddw m3, m7, [dstq+strideq*1+16*2] 8631*c0909341SAndroid Build Coastguard Worker paddw m2, [dstq+strideq*1+16*3] 8632*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*1+16*2], m0 8633*c0909341SAndroid Build Coastguard Worker paddw m2, m7 8634*c0909341SAndroid Build Coastguard Worker psrlw m3, 2 8635*c0909341SAndroid Build Coastguard Worker psrlw m2, 2 8636*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*1+16*3], m1 8637*c0909341SAndroid Build Coastguard Worker packuswb m3, m2 8638*c0909341SAndroid Build Coastguard Worker mova [maskq], m3 8639*c0909341SAndroid Build Coastguard Worker sub hd, 2 8640*c0909341SAndroid Build Coastguard Worker jg .w32_loop 8641*c0909341SAndroid Build Coastguard Worker RET 8642*c0909341SAndroid Build Coastguard Worker.w64_loop: 8643*c0909341SAndroid Build Coastguard Worker call .main 8644*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+strideq*2] 8645*c0909341SAndroid Build Coastguard Worker add maskq, 16*2 8646*c0909341SAndroid Build Coastguard Worker.w64: 8647*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*1+16*1], m2 8648*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*0+16*0], m0 8649*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*1+16*2], m3 8650*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*0+16*1], m1 8651*c0909341SAndroid Build Coastguard Worker call .main 8652*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*1+16*3], m2 8653*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*0+16*2], m0 8654*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*1+16*4], m3 8655*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*0+16*3], m1 8656*c0909341SAndroid Build Coastguard Worker call .main 8657*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*1+16*5], m2 8658*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*0+16*4], m0 8659*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*1+16*6], m3 8660*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*0+16*5], m1 8661*c0909341SAndroid Build Coastguard Worker call .main 8662*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*0+16*6], m0 8663*c0909341SAndroid Build Coastguard Worker phaddw m2, m3 8664*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*1+16*7], m2 8665*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*0+16*7], m1 8666*c0909341SAndroid Build Coastguard Worker call .main 8667*c0909341SAndroid Build Coastguard Worker paddw m2, [dstq+strideq*1+16*1] 8668*c0909341SAndroid Build Coastguard Worker paddw m3, [dstq+strideq*1+16*2] 8669*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*1+16*0], m0 8670*c0909341SAndroid Build Coastguard Worker phaddw m2, m3 8671*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*1+16*2], m2 8672*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*1+16*1], m1 8673*c0909341SAndroid Build Coastguard Worker call .main 8674*c0909341SAndroid Build Coastguard Worker paddw m2, [dstq+strideq*1+16*3] 8675*c0909341SAndroid Build Coastguard Worker paddw m3, [dstq+strideq*1+16*4] 8676*c0909341SAndroid Build Coastguard Worker phaddw m2, m3 8677*c0909341SAndroid Build Coastguard Worker paddw m3, m7, [dstq+strideq*1+16*2] 8678*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*1+16*2], m0 8679*c0909341SAndroid Build Coastguard Worker paddw m2, m7 8680*c0909341SAndroid Build Coastguard Worker psrlw m3, 2 8681*c0909341SAndroid Build Coastguard Worker psrlw m2, 2 8682*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*1+16*3], m1 8683*c0909341SAndroid Build Coastguard Worker packuswb m3, m2 8684*c0909341SAndroid Build Coastguard Worker mova [maskq+16*0], m3 8685*c0909341SAndroid Build Coastguard Worker call .main 8686*c0909341SAndroid Build Coastguard Worker paddw m2, [dstq+strideq*1+16*5] 8687*c0909341SAndroid Build Coastguard Worker paddw m3, [dstq+strideq*1+16*6] 8688*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*1+16*4], m0 8689*c0909341SAndroid Build Coastguard Worker phaddw m2, m3 8690*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*1+16*6], m2 8691*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*1+16*5], m1 8692*c0909341SAndroid Build Coastguard Worker call .main 8693*c0909341SAndroid Build Coastguard Worker phaddw m2, m3 8694*c0909341SAndroid Build Coastguard Worker paddw m3, m7, [dstq+strideq*1+16*6] 8695*c0909341SAndroid Build Coastguard Worker paddw m2, [dstq+strideq*1+16*7] 8696*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*1+16*6], m0 8697*c0909341SAndroid Build Coastguard Worker paddw m2, m7 8698*c0909341SAndroid Build Coastguard Worker psrlw m3, 2 8699*c0909341SAndroid Build Coastguard Worker psrlw m2, 2 8700*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*1+16*7], m1 8701*c0909341SAndroid Build Coastguard Worker packuswb m3, m2 8702*c0909341SAndroid Build Coastguard Worker mova [maskq+16*1], m3 8703*c0909341SAndroid Build Coastguard Worker sub hd, 2 8704*c0909341SAndroid Build Coastguard Worker jg .w64_loop 8705*c0909341SAndroid Build Coastguard Worker RET 8706*c0909341SAndroid Build Coastguard Worker.w128_loop: 8707*c0909341SAndroid Build Coastguard Worker call .main 8708*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+strideq*2] 8709*c0909341SAndroid Build Coastguard Worker add maskq, 16*4 8710*c0909341SAndroid Build Coastguard Worker.w128: 8711*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*1+16* 1], m2 8712*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*0+16* 0], m0 8713*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*1+16* 2], m3 8714*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*0+16* 1], m1 8715*c0909341SAndroid Build Coastguard Worker call .main 8716*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*1+16* 3], m2 8717*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*0+16* 2], m0 8718*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*1+16* 4], m3 8719*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*0+16* 3], m1 8720*c0909341SAndroid Build Coastguard Worker call .main 8721*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*1+16* 5], m2 8722*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*0+16* 4], m0 8723*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*1+16* 6], m3 8724*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*0+16* 5], m1 8725*c0909341SAndroid Build Coastguard Worker call .main 8726*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*1+16* 7], m2 8727*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*0+16* 6], m0 8728*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*1+16* 8], m3 8729*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*0+16* 7], m1 8730*c0909341SAndroid Build Coastguard Worker call .main 8731*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*1+16* 9], m2 8732*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*0+16* 8], m0 8733*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*1+16*10], m3 8734*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*0+16* 9], m1 8735*c0909341SAndroid Build Coastguard Worker call .main 8736*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*1+16*11], m2 8737*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*0+16*10], m0 8738*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*1+16*12], m3 8739*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*0+16*11], m1 8740*c0909341SAndroid Build Coastguard Worker call .main 8741*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*1+16*13], m2 8742*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*0+16*12], m0 8743*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*1+16*14], m3 8744*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*0+16*13], m1 8745*c0909341SAndroid Build Coastguard Worker call .main 8746*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*0+16*14], m0 8747*c0909341SAndroid Build Coastguard Worker phaddw m2, m3 8748*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*1+16*15], m2 8749*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*0+16*15], m1 8750*c0909341SAndroid Build Coastguard Worker call .main 8751*c0909341SAndroid Build Coastguard Worker paddw m2, [dstq+strideq*1+16* 1] 8752*c0909341SAndroid Build Coastguard Worker paddw m3, [dstq+strideq*1+16* 2] 8753*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*1+16* 0], m0 8754*c0909341SAndroid Build Coastguard Worker phaddw m2, m3 8755*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*1+16* 2], m2 8756*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*1+16* 1], m1 8757*c0909341SAndroid Build Coastguard Worker call .main 8758*c0909341SAndroid Build Coastguard Worker paddw m2, [dstq+strideq*1+16* 3] 8759*c0909341SAndroid Build Coastguard Worker paddw m3, [dstq+strideq*1+16* 4] 8760*c0909341SAndroid Build Coastguard Worker phaddw m2, m3 8761*c0909341SAndroid Build Coastguard Worker paddw m3, m7, [dstq+strideq*1+16* 2] 8762*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*1+16* 2], m0 8763*c0909341SAndroid Build Coastguard Worker paddw m2, m7 8764*c0909341SAndroid Build Coastguard Worker psrlw m3, 2 8765*c0909341SAndroid Build Coastguard Worker psrlw m2, 2 8766*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*1+16* 3], m1 8767*c0909341SAndroid Build Coastguard Worker packuswb m3, m2 8768*c0909341SAndroid Build Coastguard Worker mova [maskq+16*0], m3 8769*c0909341SAndroid Build Coastguard Worker call .main 8770*c0909341SAndroid Build Coastguard Worker paddw m2, [dstq+strideq*1+16* 5] 8771*c0909341SAndroid Build Coastguard Worker paddw m3, [dstq+strideq*1+16* 6] 8772*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*1+16* 4], m0 8773*c0909341SAndroid Build Coastguard Worker phaddw m2, m3 8774*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*1+16* 6], m2 8775*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*1+16* 5], m1 8776*c0909341SAndroid Build Coastguard Worker call .main 8777*c0909341SAndroid Build Coastguard Worker paddw m2, [dstq+strideq*1+16* 7] 8778*c0909341SAndroid Build Coastguard Worker paddw m3, [dstq+strideq*1+16* 8] 8779*c0909341SAndroid Build Coastguard Worker phaddw m2, m3 8780*c0909341SAndroid Build Coastguard Worker paddw m3, m7, [dstq+strideq*1+16* 6] 8781*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*1+16* 6], m0 8782*c0909341SAndroid Build Coastguard Worker paddw m2, m7 8783*c0909341SAndroid Build Coastguard Worker psrlw m3, 2 8784*c0909341SAndroid Build Coastguard Worker psrlw m2, 2 8785*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*1+16* 7], m1 8786*c0909341SAndroid Build Coastguard Worker packuswb m3, m2 8787*c0909341SAndroid Build Coastguard Worker mova [maskq+16*1], m3 8788*c0909341SAndroid Build Coastguard Worker call .main 8789*c0909341SAndroid Build Coastguard Worker paddw m2, [dstq+strideq*1+16* 9] 8790*c0909341SAndroid Build Coastguard Worker paddw m3, [dstq+strideq*1+16*10] 8791*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*1+16* 8], m0 8792*c0909341SAndroid Build Coastguard Worker phaddw m2, m3 8793*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*1+16*10], m2 8794*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*1+16* 9], m1 8795*c0909341SAndroid Build Coastguard Worker call .main 8796*c0909341SAndroid Build Coastguard Worker paddw m2, [dstq+strideq*1+16*11] 8797*c0909341SAndroid Build Coastguard Worker paddw m3, [dstq+strideq*1+16*12] 8798*c0909341SAndroid Build Coastguard Worker phaddw m2, m3 8799*c0909341SAndroid Build Coastguard Worker paddw m3, m7, [dstq+strideq*1+16*10] 8800*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*1+16*10], m0 8801*c0909341SAndroid Build Coastguard Worker paddw m2, m7 8802*c0909341SAndroid Build Coastguard Worker psrlw m3, 2 8803*c0909341SAndroid Build Coastguard Worker psrlw m2, 2 8804*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*1+16*11], m1 8805*c0909341SAndroid Build Coastguard Worker packuswb m3, m2 8806*c0909341SAndroid Build Coastguard Worker mova [maskq+16*2], m3 8807*c0909341SAndroid Build Coastguard Worker call .main 8808*c0909341SAndroid Build Coastguard Worker paddw m2, [dstq+strideq*1+16*13] 8809*c0909341SAndroid Build Coastguard Worker paddw m3, [dstq+strideq*1+16*14] 8810*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*1+16*12], m0 8811*c0909341SAndroid Build Coastguard Worker phaddw m2, m3 8812*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*1+16*14], m2 8813*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*1+16*13], m1 8814*c0909341SAndroid Build Coastguard Worker call .main 8815*c0909341SAndroid Build Coastguard Worker phaddw m2, m3 8816*c0909341SAndroid Build Coastguard Worker paddw m3, m7, [dstq+strideq*1+16*14] 8817*c0909341SAndroid Build Coastguard Worker paddw m2, [dstq+strideq*1+16*15] 8818*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*1+16*14], m0 8819*c0909341SAndroid Build Coastguard Worker paddw m2, m7 8820*c0909341SAndroid Build Coastguard Worker psrlw m3, 2 8821*c0909341SAndroid Build Coastguard Worker psrlw m2, 2 8822*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*1+16*15], m1 8823*c0909341SAndroid Build Coastguard Worker packuswb m3, m2 8824*c0909341SAndroid Build Coastguard Worker mova [maskq+16*3], m3 8825*c0909341SAndroid Build Coastguard Worker sub hd, 2 8826*c0909341SAndroid Build Coastguard Worker jg .w128_loop 8827*c0909341SAndroid Build Coastguard Worker RET 8828*c0909341SAndroid Build Coastguard WorkerALIGN function_align 8829*c0909341SAndroid Build Coastguard Worker.main: 8830*c0909341SAndroid Build Coastguard Worker%macro W_MASK 2 ; dst/tmp_offset, mask 8831*c0909341SAndroid Build Coastguard Worker mova m%1, [tmp1q+16*%1] 8832*c0909341SAndroid Build Coastguard Worker mova m%2, [tmp2q+16*%1] 8833*c0909341SAndroid Build Coastguard Worker punpcklwd m4, m%2, m%1 8834*c0909341SAndroid Build Coastguard Worker punpckhwd m5, m%2, m%1 8835*c0909341SAndroid Build Coastguard Worker psubsw m%1, m%2 8836*c0909341SAndroid Build Coastguard Worker pabsw m%1, m%1 8837*c0909341SAndroid Build Coastguard Worker psubusw m6, m8, m%1 8838*c0909341SAndroid Build Coastguard Worker psrlw m6, 10 ; 64-m 8839*c0909341SAndroid Build Coastguard Worker psubw m%2, m9, m6 ; m 8840*c0909341SAndroid Build Coastguard Worker punpcklwd m%1, m6, m%2 8841*c0909341SAndroid Build Coastguard Worker punpckhwd m6, m%2 8842*c0909341SAndroid Build Coastguard Worker pmaddwd m%1, m4 8843*c0909341SAndroid Build Coastguard Worker pmaddwd m6, m5 8844*c0909341SAndroid Build Coastguard Worker psrad m%1, 5 8845*c0909341SAndroid Build Coastguard Worker psrad m6, 5 8846*c0909341SAndroid Build Coastguard Worker packssdw m%1, m6 8847*c0909341SAndroid Build Coastguard Worker pmaxsw m%1, m10 8848*c0909341SAndroid Build Coastguard Worker psubsw m%1, m10 8849*c0909341SAndroid Build Coastguard Worker pmulhw m%1, m11 8850*c0909341SAndroid Build Coastguard Worker%endmacro 8851*c0909341SAndroid Build Coastguard Worker W_MASK 0, 2 8852*c0909341SAndroid Build Coastguard Worker W_MASK 1, 3 8853*c0909341SAndroid Build Coastguard Worker add tmp1q, 16*2 8854*c0909341SAndroid Build Coastguard Worker add tmp2q, 16*2 8855*c0909341SAndroid Build Coastguard Worker ret 8856*c0909341SAndroid Build Coastguard Worker 8857*c0909341SAndroid Build Coastguard Workercglobal w_mask_422_16bpc, 4, 7, 12, dst, stride, tmp1, tmp2, w, h, mask 8858*c0909341SAndroid Build Coastguard Worker%define base t0-w_mask_422_ssse3_table 8859*c0909341SAndroid Build Coastguard Worker LEA t0, w_mask_422_ssse3_table 8860*c0909341SAndroid Build Coastguard Worker tzcnt wd, wm 8861*c0909341SAndroid Build Coastguard Worker mov r6d, r8m ; pixel_max 8862*c0909341SAndroid Build Coastguard Worker movd m7, r7m ; sign 8863*c0909341SAndroid Build Coastguard Worker shr r6d, 11 8864*c0909341SAndroid Build Coastguard Worker movsxd wq, [t0+wq*4] 8865*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 8866*c0909341SAndroid Build Coastguard Worker mova m8, [base+pw_27615] 8867*c0909341SAndroid Build Coastguard Worker mova m9, [base+pw_64] 8868*c0909341SAndroid Build Coastguard Worker movddup m10, [base+bidir_rnd+r6*8] 8869*c0909341SAndroid Build Coastguard Worker movddup m11, [base+bidir_mul+r6*8] 8870*c0909341SAndroid Build Coastguard Worker%else 8871*c0909341SAndroid Build Coastguard Worker mova m1, [base+pw_27615] 8872*c0909341SAndroid Build Coastguard Worker mova m2, [base+pw_64] 8873*c0909341SAndroid Build Coastguard Worker movddup m3, [base+bidir_rnd+r6*8] 8874*c0909341SAndroid Build Coastguard Worker movddup m4, [base+bidir_mul+r6*8] 8875*c0909341SAndroid Build Coastguard Worker ALLOC_STACK -16*4 8876*c0909341SAndroid Build Coastguard Worker mova [rsp+16*0], m1 8877*c0909341SAndroid Build Coastguard Worker mova [rsp+16*1], m2 8878*c0909341SAndroid Build Coastguard Worker mova [rsp+16*2], m3 8879*c0909341SAndroid Build Coastguard Worker mova [rsp+16*3], m4 8880*c0909341SAndroid Build Coastguard Worker%endif 8881*c0909341SAndroid Build Coastguard Worker pxor m0, m0 8882*c0909341SAndroid Build Coastguard Worker add wq, t0 8883*c0909341SAndroid Build Coastguard Worker pshufb m7, m0 8884*c0909341SAndroid Build Coastguard Worker movifnidn hd, r5m 8885*c0909341SAndroid Build Coastguard Worker mov maskq, r6mp 8886*c0909341SAndroid Build Coastguard Worker call .main 8887*c0909341SAndroid Build Coastguard Worker jmp wq 8888*c0909341SAndroid Build Coastguard Worker.w4_loop: 8889*c0909341SAndroid Build Coastguard Worker call .main 8890*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+strideq*2] 8891*c0909341SAndroid Build Coastguard Worker.w4: 8892*c0909341SAndroid Build Coastguard Worker movq [dstq+strideq*0], m0 8893*c0909341SAndroid Build Coastguard Worker movhps [dstq+strideq*1], m0 8894*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+strideq*2] 8895*c0909341SAndroid Build Coastguard Worker movq [dstq+strideq*0], m1 8896*c0909341SAndroid Build Coastguard Worker movhps [dstq+strideq*1], m1 8897*c0909341SAndroid Build Coastguard Worker sub hd, 4 8898*c0909341SAndroid Build Coastguard Worker jg .w4_loop 8899*c0909341SAndroid Build Coastguard Worker.end: 8900*c0909341SAndroid Build Coastguard Worker RET 8901*c0909341SAndroid Build Coastguard Worker.w8_loop: 8902*c0909341SAndroid Build Coastguard Worker call .main 8903*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+strideq*2] 8904*c0909341SAndroid Build Coastguard Worker.w8: 8905*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*0], m0 8906*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*1], m1 8907*c0909341SAndroid Build Coastguard Worker sub hd, 2 8908*c0909341SAndroid Build Coastguard Worker jg .w8_loop 8909*c0909341SAndroid Build Coastguard Worker.w8_end: 8910*c0909341SAndroid Build Coastguard Worker RET 8911*c0909341SAndroid Build Coastguard Worker.w16_loop: 8912*c0909341SAndroid Build Coastguard Worker call .main 8913*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+strideq*2] 8914*c0909341SAndroid Build Coastguard Worker.w16: 8915*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*0+16*0], m0 8916*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*0+16*1], m1 8917*c0909341SAndroid Build Coastguard Worker call .main 8918*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*1+16*0], m0 8919*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*1+16*1], m1 8920*c0909341SAndroid Build Coastguard Worker sub hd, 2 8921*c0909341SAndroid Build Coastguard Worker jg .w16_loop 8922*c0909341SAndroid Build Coastguard Worker RET 8923*c0909341SAndroid Build Coastguard Worker.w32_loop: 8924*c0909341SAndroid Build Coastguard Worker call .main 8925*c0909341SAndroid Build Coastguard Worker add dstq, strideq 8926*c0909341SAndroid Build Coastguard Worker.w32: 8927*c0909341SAndroid Build Coastguard Worker mova [dstq+16*0], m0 8928*c0909341SAndroid Build Coastguard Worker mova [dstq+16*1], m1 8929*c0909341SAndroid Build Coastguard Worker call .main 8930*c0909341SAndroid Build Coastguard Worker mova [dstq+16*2], m0 8931*c0909341SAndroid Build Coastguard Worker mova [dstq+16*3], m1 8932*c0909341SAndroid Build Coastguard Worker dec hd 8933*c0909341SAndroid Build Coastguard Worker jg .w32_loop 8934*c0909341SAndroid Build Coastguard Worker RET 8935*c0909341SAndroid Build Coastguard Worker.w64_loop: 8936*c0909341SAndroid Build Coastguard Worker call .main 8937*c0909341SAndroid Build Coastguard Worker add dstq, strideq 8938*c0909341SAndroid Build Coastguard Worker.w64: 8939*c0909341SAndroid Build Coastguard Worker mova [dstq+16*0], m0 8940*c0909341SAndroid Build Coastguard Worker mova [dstq+16*1], m1 8941*c0909341SAndroid Build Coastguard Worker call .main 8942*c0909341SAndroid Build Coastguard Worker mova [dstq+16*2], m0 8943*c0909341SAndroid Build Coastguard Worker mova [dstq+16*3], m1 8944*c0909341SAndroid Build Coastguard Worker call .main 8945*c0909341SAndroid Build Coastguard Worker mova [dstq+16*4], m0 8946*c0909341SAndroid Build Coastguard Worker mova [dstq+16*5], m1 8947*c0909341SAndroid Build Coastguard Worker call .main 8948*c0909341SAndroid Build Coastguard Worker mova [dstq+16*6], m0 8949*c0909341SAndroid Build Coastguard Worker mova [dstq+16*7], m1 8950*c0909341SAndroid Build Coastguard Worker dec hd 8951*c0909341SAndroid Build Coastguard Worker jg .w64_loop 8952*c0909341SAndroid Build Coastguard Worker RET 8953*c0909341SAndroid Build Coastguard Worker.w128_loop: 8954*c0909341SAndroid Build Coastguard Worker call .main 8955*c0909341SAndroid Build Coastguard Worker add dstq, strideq 8956*c0909341SAndroid Build Coastguard Worker.w128: 8957*c0909341SAndroid Build Coastguard Worker mova [dstq+16* 0], m0 8958*c0909341SAndroid Build Coastguard Worker mova [dstq+16* 1], m1 8959*c0909341SAndroid Build Coastguard Worker call .main 8960*c0909341SAndroid Build Coastguard Worker mova [dstq+16* 2], m0 8961*c0909341SAndroid Build Coastguard Worker mova [dstq+16* 3], m1 8962*c0909341SAndroid Build Coastguard Worker call .main 8963*c0909341SAndroid Build Coastguard Worker mova [dstq+16* 4], m0 8964*c0909341SAndroid Build Coastguard Worker mova [dstq+16* 5], m1 8965*c0909341SAndroid Build Coastguard Worker call .main 8966*c0909341SAndroid Build Coastguard Worker mova [dstq+16* 6], m0 8967*c0909341SAndroid Build Coastguard Worker mova [dstq+16* 7], m1 8968*c0909341SAndroid Build Coastguard Worker call .main 8969*c0909341SAndroid Build Coastguard Worker mova [dstq+16* 8], m0 8970*c0909341SAndroid Build Coastguard Worker mova [dstq+16* 9], m1 8971*c0909341SAndroid Build Coastguard Worker call .main 8972*c0909341SAndroid Build Coastguard Worker mova [dstq+16*10], m0 8973*c0909341SAndroid Build Coastguard Worker mova [dstq+16*11], m1 8974*c0909341SAndroid Build Coastguard Worker call .main 8975*c0909341SAndroid Build Coastguard Worker mova [dstq+16*12], m0 8976*c0909341SAndroid Build Coastguard Worker mova [dstq+16*13], m1 8977*c0909341SAndroid Build Coastguard Worker call .main 8978*c0909341SAndroid Build Coastguard Worker mova [dstq+16*14], m0 8979*c0909341SAndroid Build Coastguard Worker mova [dstq+16*15], m1 8980*c0909341SAndroid Build Coastguard Worker dec hd 8981*c0909341SAndroid Build Coastguard Worker jg .w128_loop 8982*c0909341SAndroid Build Coastguard Worker RET 8983*c0909341SAndroid Build Coastguard WorkerALIGN function_align 8984*c0909341SAndroid Build Coastguard Worker.main: 8985*c0909341SAndroid Build Coastguard Worker W_MASK 0, 2 8986*c0909341SAndroid Build Coastguard Worker W_MASK 1, 3 8987*c0909341SAndroid Build Coastguard Worker phaddw m2, m3 8988*c0909341SAndroid Build Coastguard Worker add tmp1q, 16*2 8989*c0909341SAndroid Build Coastguard Worker add tmp2q, 16*2 8990*c0909341SAndroid Build Coastguard Worker packuswb m2, m2 8991*c0909341SAndroid Build Coastguard Worker pxor m3, m3 8992*c0909341SAndroid Build Coastguard Worker psubb m2, m7 8993*c0909341SAndroid Build Coastguard Worker pavgb m2, m3 8994*c0909341SAndroid Build Coastguard Worker movq [maskq], m2 8995*c0909341SAndroid Build Coastguard Worker add maskq, 8 8996*c0909341SAndroid Build Coastguard Worker ret 8997*c0909341SAndroid Build Coastguard Worker 8998*c0909341SAndroid Build Coastguard Workercglobal w_mask_444_16bpc, 4, 7, 12, dst, stride, tmp1, tmp2, w, h, mask 8999*c0909341SAndroid Build Coastguard Worker%define base t0-w_mask_444_ssse3_table 9000*c0909341SAndroid Build Coastguard Worker LEA t0, w_mask_444_ssse3_table 9001*c0909341SAndroid Build Coastguard Worker tzcnt wd, wm 9002*c0909341SAndroid Build Coastguard Worker mov r6d, r8m ; pixel_max 9003*c0909341SAndroid Build Coastguard Worker shr r6d, 11 9004*c0909341SAndroid Build Coastguard Worker movsxd wq, [t0+wq*4] 9005*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 9006*c0909341SAndroid Build Coastguard Worker mova m8, [base+pw_27615] 9007*c0909341SAndroid Build Coastguard Worker mova m9, [base+pw_64] 9008*c0909341SAndroid Build Coastguard Worker movddup m10, [base+bidir_rnd+r6*8] 9009*c0909341SAndroid Build Coastguard Worker movddup m11, [base+bidir_mul+r6*8] 9010*c0909341SAndroid Build Coastguard Worker%else 9011*c0909341SAndroid Build Coastguard Worker mova m1, [base+pw_27615] 9012*c0909341SAndroid Build Coastguard Worker mova m2, [base+pw_64] 9013*c0909341SAndroid Build Coastguard Worker movddup m3, [base+bidir_rnd+r6*8] 9014*c0909341SAndroid Build Coastguard Worker movddup m7, [base+bidir_mul+r6*8] 9015*c0909341SAndroid Build Coastguard Worker ALLOC_STACK -16*3 9016*c0909341SAndroid Build Coastguard Worker mova [rsp+16*0], m1 9017*c0909341SAndroid Build Coastguard Worker mova [rsp+16*1], m2 9018*c0909341SAndroid Build Coastguard Worker mova [rsp+16*2], m3 9019*c0909341SAndroid Build Coastguard Worker %define m11 m7 9020*c0909341SAndroid Build Coastguard Worker%endif 9021*c0909341SAndroid Build Coastguard Worker add wq, t0 9022*c0909341SAndroid Build Coastguard Worker movifnidn hd, r5m 9023*c0909341SAndroid Build Coastguard Worker mov maskq, r6mp 9024*c0909341SAndroid Build Coastguard Worker call .main 9025*c0909341SAndroid Build Coastguard Worker jmp wq 9026*c0909341SAndroid Build Coastguard Worker.w4_loop: 9027*c0909341SAndroid Build Coastguard Worker call .main 9028*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+strideq*2] 9029*c0909341SAndroid Build Coastguard Worker.w4: 9030*c0909341SAndroid Build Coastguard Worker movq [dstq+strideq*0], m0 9031*c0909341SAndroid Build Coastguard Worker movhps [dstq+strideq*1], m0 9032*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+strideq*2] 9033*c0909341SAndroid Build Coastguard Worker movq [dstq+strideq*0], m1 9034*c0909341SAndroid Build Coastguard Worker movhps [dstq+strideq*1], m1 9035*c0909341SAndroid Build Coastguard Worker sub hd, 4 9036*c0909341SAndroid Build Coastguard Worker jg .w4_loop 9037*c0909341SAndroid Build Coastguard Worker.end: 9038*c0909341SAndroid Build Coastguard Worker RET 9039*c0909341SAndroid Build Coastguard Worker.w8_loop: 9040*c0909341SAndroid Build Coastguard Worker call .main 9041*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+strideq*2] 9042*c0909341SAndroid Build Coastguard Worker.w8: 9043*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*0], m0 9044*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*1], m1 9045*c0909341SAndroid Build Coastguard Worker sub hd, 2 9046*c0909341SAndroid Build Coastguard Worker jg .w8_loop 9047*c0909341SAndroid Build Coastguard Worker.w8_end: 9048*c0909341SAndroid Build Coastguard Worker RET 9049*c0909341SAndroid Build Coastguard Worker.w16_loop: 9050*c0909341SAndroid Build Coastguard Worker call .main 9051*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+strideq*2] 9052*c0909341SAndroid Build Coastguard Worker.w16: 9053*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*0+16*0], m0 9054*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*0+16*1], m1 9055*c0909341SAndroid Build Coastguard Worker call .main 9056*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*1+16*0], m0 9057*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*1+16*1], m1 9058*c0909341SAndroid Build Coastguard Worker sub hd, 2 9059*c0909341SAndroid Build Coastguard Worker jg .w16_loop 9060*c0909341SAndroid Build Coastguard Worker RET 9061*c0909341SAndroid Build Coastguard Worker.w32_loop: 9062*c0909341SAndroid Build Coastguard Worker call .main 9063*c0909341SAndroid Build Coastguard Worker add dstq, strideq 9064*c0909341SAndroid Build Coastguard Worker.w32: 9065*c0909341SAndroid Build Coastguard Worker mova [dstq+16*0], m0 9066*c0909341SAndroid Build Coastguard Worker mova [dstq+16*1], m1 9067*c0909341SAndroid Build Coastguard Worker call .main 9068*c0909341SAndroid Build Coastguard Worker mova [dstq+16*2], m0 9069*c0909341SAndroid Build Coastguard Worker mova [dstq+16*3], m1 9070*c0909341SAndroid Build Coastguard Worker dec hd 9071*c0909341SAndroid Build Coastguard Worker jg .w32_loop 9072*c0909341SAndroid Build Coastguard Worker RET 9073*c0909341SAndroid Build Coastguard Worker.w64_loop: 9074*c0909341SAndroid Build Coastguard Worker call .main 9075*c0909341SAndroid Build Coastguard Worker add dstq, strideq 9076*c0909341SAndroid Build Coastguard Worker.w64: 9077*c0909341SAndroid Build Coastguard Worker mova [dstq+16*0], m0 9078*c0909341SAndroid Build Coastguard Worker mova [dstq+16*1], m1 9079*c0909341SAndroid Build Coastguard Worker call .main 9080*c0909341SAndroid Build Coastguard Worker mova [dstq+16*2], m0 9081*c0909341SAndroid Build Coastguard Worker mova [dstq+16*3], m1 9082*c0909341SAndroid Build Coastguard Worker call .main 9083*c0909341SAndroid Build Coastguard Worker mova [dstq+16*4], m0 9084*c0909341SAndroid Build Coastguard Worker mova [dstq+16*5], m1 9085*c0909341SAndroid Build Coastguard Worker call .main 9086*c0909341SAndroid Build Coastguard Worker mova [dstq+16*6], m0 9087*c0909341SAndroid Build Coastguard Worker mova [dstq+16*7], m1 9088*c0909341SAndroid Build Coastguard Worker dec hd 9089*c0909341SAndroid Build Coastguard Worker jg .w64_loop 9090*c0909341SAndroid Build Coastguard Worker RET 9091*c0909341SAndroid Build Coastguard Worker.w128_loop: 9092*c0909341SAndroid Build Coastguard Worker call .main 9093*c0909341SAndroid Build Coastguard Worker add dstq, strideq 9094*c0909341SAndroid Build Coastguard Worker.w128: 9095*c0909341SAndroid Build Coastguard Worker mova [dstq+16* 0], m0 9096*c0909341SAndroid Build Coastguard Worker mova [dstq+16* 1], m1 9097*c0909341SAndroid Build Coastguard Worker call .main 9098*c0909341SAndroid Build Coastguard Worker mova [dstq+16* 2], m0 9099*c0909341SAndroid Build Coastguard Worker mova [dstq+16* 3], m1 9100*c0909341SAndroid Build Coastguard Worker call .main 9101*c0909341SAndroid Build Coastguard Worker mova [dstq+16* 4], m0 9102*c0909341SAndroid Build Coastguard Worker mova [dstq+16* 5], m1 9103*c0909341SAndroid Build Coastguard Worker call .main 9104*c0909341SAndroid Build Coastguard Worker mova [dstq+16* 6], m0 9105*c0909341SAndroid Build Coastguard Worker mova [dstq+16* 7], m1 9106*c0909341SAndroid Build Coastguard Worker call .main 9107*c0909341SAndroid Build Coastguard Worker mova [dstq+16* 8], m0 9108*c0909341SAndroid Build Coastguard Worker mova [dstq+16* 9], m1 9109*c0909341SAndroid Build Coastguard Worker call .main 9110*c0909341SAndroid Build Coastguard Worker mova [dstq+16*10], m0 9111*c0909341SAndroid Build Coastguard Worker mova [dstq+16*11], m1 9112*c0909341SAndroid Build Coastguard Worker call .main 9113*c0909341SAndroid Build Coastguard Worker mova [dstq+16*12], m0 9114*c0909341SAndroid Build Coastguard Worker mova [dstq+16*13], m1 9115*c0909341SAndroid Build Coastguard Worker call .main 9116*c0909341SAndroid Build Coastguard Worker mova [dstq+16*14], m0 9117*c0909341SAndroid Build Coastguard Worker mova [dstq+16*15], m1 9118*c0909341SAndroid Build Coastguard Worker dec hd 9119*c0909341SAndroid Build Coastguard Worker jg .w128_loop 9120*c0909341SAndroid Build Coastguard Worker RET 9121*c0909341SAndroid Build Coastguard WorkerALIGN function_align 9122*c0909341SAndroid Build Coastguard Worker.main: 9123*c0909341SAndroid Build Coastguard Worker W_MASK 0, 2 9124*c0909341SAndroid Build Coastguard Worker W_MASK 1, 3 9125*c0909341SAndroid Build Coastguard Worker packuswb m2, m3 9126*c0909341SAndroid Build Coastguard Worker add tmp1q, 16*2 9127*c0909341SAndroid Build Coastguard Worker add tmp2q, 16*2 9128*c0909341SAndroid Build Coastguard Worker mova [maskq], m2 9129*c0909341SAndroid Build Coastguard Worker add maskq, 16 9130*c0909341SAndroid Build Coastguard Worker ret 9131*c0909341SAndroid Build Coastguard Worker 9132*c0909341SAndroid Build Coastguard Worker; (a * (64 - m) + b * m + 32) >> 6 9133*c0909341SAndroid Build Coastguard Worker; = (((b - a) * m + 32) >> 6) + a 9134*c0909341SAndroid Build Coastguard Worker; = (((b - a) * (m << 9) + 16384) >> 15) + a 9135*c0909341SAndroid Build Coastguard Worker; except m << 9 overflows int16_t when m == 64 (which is possible), 9136*c0909341SAndroid Build Coastguard Worker; but if we negate m it works out (-64 << 9 == -32768). 9137*c0909341SAndroid Build Coastguard Worker; = (((a - b) * (m * -512) + 16384) >> 15) + a 9138*c0909341SAndroid Build Coastguard Workercglobal blend_16bpc, 3, 7, 8, dst, stride, tmp, w, h, mask, stride3 9139*c0909341SAndroid Build Coastguard Worker%define base r6-blend_ssse3_table 9140*c0909341SAndroid Build Coastguard Worker LEA r6, blend_ssse3_table 9141*c0909341SAndroid Build Coastguard Worker tzcnt wd, wm 9142*c0909341SAndroid Build Coastguard Worker movifnidn hd, hm 9143*c0909341SAndroid Build Coastguard Worker movsxd wq, [r6+wq*4] 9144*c0909341SAndroid Build Coastguard Worker movifnidn maskq, maskmp 9145*c0909341SAndroid Build Coastguard Worker mova m7, [base+pw_m512] 9146*c0909341SAndroid Build Coastguard Worker add wq, r6 9147*c0909341SAndroid Build Coastguard Worker lea stride3q, [strideq*3] 9148*c0909341SAndroid Build Coastguard Worker pxor m6, m6 9149*c0909341SAndroid Build Coastguard Worker jmp wq 9150*c0909341SAndroid Build Coastguard Worker.w4: 9151*c0909341SAndroid Build Coastguard Worker mova m5, [maskq] 9152*c0909341SAndroid Build Coastguard Worker movq m0, [dstq+strideq*0] 9153*c0909341SAndroid Build Coastguard Worker movhps m0, [dstq+strideq*1] 9154*c0909341SAndroid Build Coastguard Worker movq m1, [dstq+strideq*2] 9155*c0909341SAndroid Build Coastguard Worker movhps m1, [dstq+stride3q ] 9156*c0909341SAndroid Build Coastguard Worker psubw m2, m0, [tmpq+16*0] 9157*c0909341SAndroid Build Coastguard Worker psubw m3, m1, [tmpq+16*1] 9158*c0909341SAndroid Build Coastguard Worker add maskq, 16 9159*c0909341SAndroid Build Coastguard Worker add tmpq, 32 9160*c0909341SAndroid Build Coastguard Worker punpcklbw m4, m5, m6 9161*c0909341SAndroid Build Coastguard Worker punpckhbw m5, m6 9162*c0909341SAndroid Build Coastguard Worker pmullw m4, m7 9163*c0909341SAndroid Build Coastguard Worker pmullw m5, m7 9164*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m4 9165*c0909341SAndroid Build Coastguard Worker pmulhrsw m3, m5 9166*c0909341SAndroid Build Coastguard Worker paddw m0, m2 9167*c0909341SAndroid Build Coastguard Worker paddw m1, m3 9168*c0909341SAndroid Build Coastguard Worker movq [dstq+strideq*0], m0 9169*c0909341SAndroid Build Coastguard Worker movhps [dstq+strideq*1], m0 9170*c0909341SAndroid Build Coastguard Worker movq [dstq+strideq*2], m1 9171*c0909341SAndroid Build Coastguard Worker movhps [dstq+stride3q ], m1 9172*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+strideq*4] 9173*c0909341SAndroid Build Coastguard Worker sub hd, 4 9174*c0909341SAndroid Build Coastguard Worker jg .w4 9175*c0909341SAndroid Build Coastguard Worker RET 9176*c0909341SAndroid Build Coastguard Worker.w8: 9177*c0909341SAndroid Build Coastguard Worker mova m5, [maskq] 9178*c0909341SAndroid Build Coastguard Worker mova m0, [dstq+strideq*0] 9179*c0909341SAndroid Build Coastguard Worker mova m1, [dstq+strideq*1] 9180*c0909341SAndroid Build Coastguard Worker psubw m2, m0, [tmpq+16*0] 9181*c0909341SAndroid Build Coastguard Worker psubw m3, m1, [tmpq+16*1] 9182*c0909341SAndroid Build Coastguard Worker add maskq, 16 9183*c0909341SAndroid Build Coastguard Worker add tmpq, 32 9184*c0909341SAndroid Build Coastguard Worker punpcklbw m4, m5, m6 9185*c0909341SAndroid Build Coastguard Worker punpckhbw m5, m6 9186*c0909341SAndroid Build Coastguard Worker pmullw m4, m7 9187*c0909341SAndroid Build Coastguard Worker pmullw m5, m7 9188*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m4 9189*c0909341SAndroid Build Coastguard Worker pmulhrsw m3, m5 9190*c0909341SAndroid Build Coastguard Worker paddw m0, m2 9191*c0909341SAndroid Build Coastguard Worker paddw m1, m3 9192*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*0], m0 9193*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*1], m1 9194*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+strideq*2] 9195*c0909341SAndroid Build Coastguard Worker sub hd, 2 9196*c0909341SAndroid Build Coastguard Worker jg .w8 9197*c0909341SAndroid Build Coastguard Worker RET 9198*c0909341SAndroid Build Coastguard Worker.w16: 9199*c0909341SAndroid Build Coastguard Worker mova m5, [maskq] 9200*c0909341SAndroid Build Coastguard Worker mova m0, [dstq+16*0] 9201*c0909341SAndroid Build Coastguard Worker mova m1, [dstq+16*1] 9202*c0909341SAndroid Build Coastguard Worker psubw m2, m0, [tmpq+16*0] 9203*c0909341SAndroid Build Coastguard Worker psubw m3, m1, [tmpq+16*1] 9204*c0909341SAndroid Build Coastguard Worker add maskq, 16 9205*c0909341SAndroid Build Coastguard Worker add tmpq, 32 9206*c0909341SAndroid Build Coastguard Worker punpcklbw m4, m5, m6 9207*c0909341SAndroid Build Coastguard Worker punpckhbw m5, m6 9208*c0909341SAndroid Build Coastguard Worker pmullw m4, m7 9209*c0909341SAndroid Build Coastguard Worker pmullw m5, m7 9210*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m4 9211*c0909341SAndroid Build Coastguard Worker pmulhrsw m3, m5 9212*c0909341SAndroid Build Coastguard Worker paddw m0, m2 9213*c0909341SAndroid Build Coastguard Worker paddw m1, m3 9214*c0909341SAndroid Build Coastguard Worker mova [dstq+16*0], m0 9215*c0909341SAndroid Build Coastguard Worker mova [dstq+16*1], m1 9216*c0909341SAndroid Build Coastguard Worker add dstq, strideq 9217*c0909341SAndroid Build Coastguard Worker dec hd 9218*c0909341SAndroid Build Coastguard Worker jg .w16 9219*c0909341SAndroid Build Coastguard Worker RET 9220*c0909341SAndroid Build Coastguard Worker.w32: 9221*c0909341SAndroid Build Coastguard Worker mova m5, [maskq+16*0] 9222*c0909341SAndroid Build Coastguard Worker mova m0, [dstq+16*0] 9223*c0909341SAndroid Build Coastguard Worker mova m1, [dstq+16*1] 9224*c0909341SAndroid Build Coastguard Worker psubw m2, m0, [tmpq+16*0] 9225*c0909341SAndroid Build Coastguard Worker psubw m3, m1, [tmpq+16*1] 9226*c0909341SAndroid Build Coastguard Worker punpcklbw m4, m5, m6 9227*c0909341SAndroid Build Coastguard Worker punpckhbw m5, m6 9228*c0909341SAndroid Build Coastguard Worker pmullw m4, m7 9229*c0909341SAndroid Build Coastguard Worker pmullw m5, m7 9230*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m4 9231*c0909341SAndroid Build Coastguard Worker pmulhrsw m3, m5 9232*c0909341SAndroid Build Coastguard Worker paddw m0, m2 9233*c0909341SAndroid Build Coastguard Worker paddw m1, m3 9234*c0909341SAndroid Build Coastguard Worker mova [dstq+16*0], m0 9235*c0909341SAndroid Build Coastguard Worker mova [dstq+16*1], m1 9236*c0909341SAndroid Build Coastguard Worker mova m5, [maskq+16*1] 9237*c0909341SAndroid Build Coastguard Worker mova m0, [dstq+16*2] 9238*c0909341SAndroid Build Coastguard Worker mova m1, [dstq+16*3] 9239*c0909341SAndroid Build Coastguard Worker psubw m2, m0, [tmpq+16*2] 9240*c0909341SAndroid Build Coastguard Worker psubw m3, m1, [tmpq+16*3] 9241*c0909341SAndroid Build Coastguard Worker add maskq, 32 9242*c0909341SAndroid Build Coastguard Worker add tmpq, 64 9243*c0909341SAndroid Build Coastguard Worker punpcklbw m4, m5, m6 9244*c0909341SAndroid Build Coastguard Worker punpckhbw m5, m6 9245*c0909341SAndroid Build Coastguard Worker pmullw m4, m7 9246*c0909341SAndroid Build Coastguard Worker pmullw m5, m7 9247*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m4 9248*c0909341SAndroid Build Coastguard Worker pmulhrsw m3, m5 9249*c0909341SAndroid Build Coastguard Worker paddw m0, m2 9250*c0909341SAndroid Build Coastguard Worker paddw m1, m3 9251*c0909341SAndroid Build Coastguard Worker mova [dstq+16*2], m0 9252*c0909341SAndroid Build Coastguard Worker mova [dstq+16*3], m1 9253*c0909341SAndroid Build Coastguard Worker add dstq, strideq 9254*c0909341SAndroid Build Coastguard Worker dec hd 9255*c0909341SAndroid Build Coastguard Worker jg .w32 9256*c0909341SAndroid Build Coastguard Worker RET 9257*c0909341SAndroid Build Coastguard Worker 9258*c0909341SAndroid Build Coastguard Workercglobal blend_v_16bpc, 3, 6, 6, dst, stride, tmp, w, h 9259*c0909341SAndroid Build Coastguard Worker%define base r5-blend_v_ssse3_table 9260*c0909341SAndroid Build Coastguard Worker LEA r5, blend_v_ssse3_table 9261*c0909341SAndroid Build Coastguard Worker tzcnt wd, wm 9262*c0909341SAndroid Build Coastguard Worker movifnidn hd, hm 9263*c0909341SAndroid Build Coastguard Worker movsxd wq, [r5+wq*4] 9264*c0909341SAndroid Build Coastguard Worker add wq, r5 9265*c0909341SAndroid Build Coastguard Worker jmp wq 9266*c0909341SAndroid Build Coastguard Worker.w2: 9267*c0909341SAndroid Build Coastguard Worker movd m4, [base+obmc_masks+2*2] 9268*c0909341SAndroid Build Coastguard Worker.w2_loop: 9269*c0909341SAndroid Build Coastguard Worker movd m0, [dstq+strideq*0] 9270*c0909341SAndroid Build Coastguard Worker movd m2, [tmpq+4*0] 9271*c0909341SAndroid Build Coastguard Worker movd m1, [dstq+strideq*1] 9272*c0909341SAndroid Build Coastguard Worker movd m3, [tmpq+4*1] 9273*c0909341SAndroid Build Coastguard Worker add tmpq, 4*2 9274*c0909341SAndroid Build Coastguard Worker psubw m2, m0 9275*c0909341SAndroid Build Coastguard Worker psubw m3, m1 9276*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m4 9277*c0909341SAndroid Build Coastguard Worker pmulhrsw m3, m4 9278*c0909341SAndroid Build Coastguard Worker paddw m0, m2 9279*c0909341SAndroid Build Coastguard Worker paddw m1, m3 9280*c0909341SAndroid Build Coastguard Worker movd [dstq+strideq*0], m0 9281*c0909341SAndroid Build Coastguard Worker movd [dstq+strideq*1], m1 9282*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+strideq*2] 9283*c0909341SAndroid Build Coastguard Worker sub hd, 2 9284*c0909341SAndroid Build Coastguard Worker jg .w2_loop 9285*c0909341SAndroid Build Coastguard Worker RET 9286*c0909341SAndroid Build Coastguard Worker.w4: 9287*c0909341SAndroid Build Coastguard Worker movddup m2, [base+obmc_masks+4*2] 9288*c0909341SAndroid Build Coastguard Worker.w4_loop: 9289*c0909341SAndroid Build Coastguard Worker movq m0, [dstq+strideq*0] 9290*c0909341SAndroid Build Coastguard Worker movhps m0, [dstq+strideq*1] 9291*c0909341SAndroid Build Coastguard Worker mova m1, [tmpq] 9292*c0909341SAndroid Build Coastguard Worker add tmpq, 8*2 9293*c0909341SAndroid Build Coastguard Worker psubw m1, m0 9294*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m2 9295*c0909341SAndroid Build Coastguard Worker paddw m0, m1 9296*c0909341SAndroid Build Coastguard Worker movq [dstq+strideq*0], m0 9297*c0909341SAndroid Build Coastguard Worker movhps [dstq+strideq*1], m0 9298*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+strideq*2] 9299*c0909341SAndroid Build Coastguard Worker sub hd, 2 9300*c0909341SAndroid Build Coastguard Worker jg .w4_loop 9301*c0909341SAndroid Build Coastguard Worker RET 9302*c0909341SAndroid Build Coastguard Worker.w8: 9303*c0909341SAndroid Build Coastguard Worker mova m4, [base+obmc_masks+8*2] 9304*c0909341SAndroid Build Coastguard Worker.w8_loop: 9305*c0909341SAndroid Build Coastguard Worker mova m0, [dstq+strideq*0] 9306*c0909341SAndroid Build Coastguard Worker mova m2, [tmpq+16*0] 9307*c0909341SAndroid Build Coastguard Worker mova m1, [dstq+strideq*1] 9308*c0909341SAndroid Build Coastguard Worker mova m3, [tmpq+16*1] 9309*c0909341SAndroid Build Coastguard Worker add tmpq, 16*2 9310*c0909341SAndroid Build Coastguard Worker psubw m2, m0 9311*c0909341SAndroid Build Coastguard Worker psubw m3, m1 9312*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m4 9313*c0909341SAndroid Build Coastguard Worker pmulhrsw m3, m4 9314*c0909341SAndroid Build Coastguard Worker paddw m0, m2 9315*c0909341SAndroid Build Coastguard Worker paddw m1, m3 9316*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*0], m0 9317*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*1], m1 9318*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+strideq*2] 9319*c0909341SAndroid Build Coastguard Worker sub hd, 2 9320*c0909341SAndroid Build Coastguard Worker jg .w8_loop 9321*c0909341SAndroid Build Coastguard Worker RET 9322*c0909341SAndroid Build Coastguard Worker.w16: 9323*c0909341SAndroid Build Coastguard Worker mova m4, [base+obmc_masks+16*2] 9324*c0909341SAndroid Build Coastguard Worker movq m5, [base+obmc_masks+16*3] 9325*c0909341SAndroid Build Coastguard Worker.w16_loop: 9326*c0909341SAndroid Build Coastguard Worker mova m0, [dstq+16*0] 9327*c0909341SAndroid Build Coastguard Worker mova m2, [tmpq+16*0] 9328*c0909341SAndroid Build Coastguard Worker mova m1, [dstq+16*1] 9329*c0909341SAndroid Build Coastguard Worker mova m3, [tmpq+16*1] 9330*c0909341SAndroid Build Coastguard Worker add tmpq, 16*2 9331*c0909341SAndroid Build Coastguard Worker psubw m2, m0 9332*c0909341SAndroid Build Coastguard Worker psubw m3, m1 9333*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m4 9334*c0909341SAndroid Build Coastguard Worker pmulhrsw m3, m5 9335*c0909341SAndroid Build Coastguard Worker paddw m0, m2 9336*c0909341SAndroid Build Coastguard Worker paddw m1, m3 9337*c0909341SAndroid Build Coastguard Worker mova [dstq+16*0], m0 9338*c0909341SAndroid Build Coastguard Worker mova [dstq+16*1], m1 9339*c0909341SAndroid Build Coastguard Worker add dstq, strideq 9340*c0909341SAndroid Build Coastguard Worker dec hd 9341*c0909341SAndroid Build Coastguard Worker jg .w16_loop 9342*c0909341SAndroid Build Coastguard Worker RET 9343*c0909341SAndroid Build Coastguard Worker.w32: 9344*c0909341SAndroid Build Coastguard Worker%if WIN64 9345*c0909341SAndroid Build Coastguard Worker movaps [rsp+8], m6 9346*c0909341SAndroid Build Coastguard Worker%endif 9347*c0909341SAndroid Build Coastguard Worker mova m4, [base+obmc_masks+16*4] 9348*c0909341SAndroid Build Coastguard Worker mova m5, [base+obmc_masks+16*5] 9349*c0909341SAndroid Build Coastguard Worker mova m6, [base+obmc_masks+16*6] 9350*c0909341SAndroid Build Coastguard Worker.w32_loop: 9351*c0909341SAndroid Build Coastguard Worker mova m0, [dstq+16*0] 9352*c0909341SAndroid Build Coastguard Worker mova m2, [tmpq+16*0] 9353*c0909341SAndroid Build Coastguard Worker mova m1, [dstq+16*1] 9354*c0909341SAndroid Build Coastguard Worker mova m3, [tmpq+16*1] 9355*c0909341SAndroid Build Coastguard Worker psubw m2, m0 9356*c0909341SAndroid Build Coastguard Worker psubw m3, m1 9357*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m4 9358*c0909341SAndroid Build Coastguard Worker pmulhrsw m3, m5 9359*c0909341SAndroid Build Coastguard Worker paddw m0, m2 9360*c0909341SAndroid Build Coastguard Worker mova m2, [dstq+16*2] 9361*c0909341SAndroid Build Coastguard Worker paddw m1, m3 9362*c0909341SAndroid Build Coastguard Worker mova m3, [tmpq+16*2] 9363*c0909341SAndroid Build Coastguard Worker add tmpq, 16*4 9364*c0909341SAndroid Build Coastguard Worker psubw m3, m2 9365*c0909341SAndroid Build Coastguard Worker pmulhrsw m3, m6 9366*c0909341SAndroid Build Coastguard Worker paddw m2, m3 9367*c0909341SAndroid Build Coastguard Worker mova [dstq+16*0], m0 9368*c0909341SAndroid Build Coastguard Worker mova [dstq+16*1], m1 9369*c0909341SAndroid Build Coastguard Worker mova [dstq+16*2], m2 9370*c0909341SAndroid Build Coastguard Worker add dstq, strideq 9371*c0909341SAndroid Build Coastguard Worker dec hd 9372*c0909341SAndroid Build Coastguard Worker jg .w32_loop 9373*c0909341SAndroid Build Coastguard Worker%if WIN64 9374*c0909341SAndroid Build Coastguard Worker movaps m6, [rsp+8] 9375*c0909341SAndroid Build Coastguard Worker%endif 9376*c0909341SAndroid Build Coastguard Worker RET 9377*c0909341SAndroid Build Coastguard Worker 9378*c0909341SAndroid Build Coastguard Worker%macro BLEND_H_ROW 2-3 0; dst_off, tmp_off, inc_tmp 9379*c0909341SAndroid Build Coastguard Worker mova m0, [dstq+16*(%1+0)] 9380*c0909341SAndroid Build Coastguard Worker mova m2, [tmpq+16*(%2+0)] 9381*c0909341SAndroid Build Coastguard Worker mova m1, [dstq+16*(%1+1)] 9382*c0909341SAndroid Build Coastguard Worker mova m3, [tmpq+16*(%2+1)] 9383*c0909341SAndroid Build Coastguard Worker%if %3 9384*c0909341SAndroid Build Coastguard Worker add tmpq, 16*%3 9385*c0909341SAndroid Build Coastguard Worker%endif 9386*c0909341SAndroid Build Coastguard Worker psubw m2, m0 9387*c0909341SAndroid Build Coastguard Worker psubw m3, m1 9388*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m5 9389*c0909341SAndroid Build Coastguard Worker pmulhrsw m3, m5 9390*c0909341SAndroid Build Coastguard Worker paddw m0, m2 9391*c0909341SAndroid Build Coastguard Worker paddw m1, m3 9392*c0909341SAndroid Build Coastguard Worker mova [dstq+16*(%1+0)], m0 9393*c0909341SAndroid Build Coastguard Worker mova [dstq+16*(%1+1)], m1 9394*c0909341SAndroid Build Coastguard Worker%endmacro 9395*c0909341SAndroid Build Coastguard Worker 9396*c0909341SAndroid Build Coastguard Workercglobal blend_h_16bpc, 3, 7, 6, dst, ds, tmp, w, h, mask 9397*c0909341SAndroid Build Coastguard Worker%define base r6-blend_h_ssse3_table 9398*c0909341SAndroid Build Coastguard Worker LEA r6, blend_h_ssse3_table 9399*c0909341SAndroid Build Coastguard Worker tzcnt wd, wm 9400*c0909341SAndroid Build Coastguard Worker mov hd, hm 9401*c0909341SAndroid Build Coastguard Worker movsxd wq, [r6+wq*4] 9402*c0909341SAndroid Build Coastguard Worker movddup m4, [base+blend_shuf] 9403*c0909341SAndroid Build Coastguard Worker lea maskq, [base+obmc_masks+hq*2] 9404*c0909341SAndroid Build Coastguard Worker lea hd, [hq*3] 9405*c0909341SAndroid Build Coastguard Worker add wq, r6 9406*c0909341SAndroid Build Coastguard Worker shr hd, 2 ; h * 3/4 9407*c0909341SAndroid Build Coastguard Worker lea maskq, [maskq+hq*2] 9408*c0909341SAndroid Build Coastguard Worker neg hq 9409*c0909341SAndroid Build Coastguard Worker jmp wq 9410*c0909341SAndroid Build Coastguard Worker.w2: 9411*c0909341SAndroid Build Coastguard Worker movd m0, [dstq+dsq*0] 9412*c0909341SAndroid Build Coastguard Worker movd m2, [dstq+dsq*1] 9413*c0909341SAndroid Build Coastguard Worker movd m3, [maskq+hq*2] 9414*c0909341SAndroid Build Coastguard Worker movq m1, [tmpq] 9415*c0909341SAndroid Build Coastguard Worker add tmpq, 4*2 9416*c0909341SAndroid Build Coastguard Worker punpckldq m0, m2 9417*c0909341SAndroid Build Coastguard Worker punpcklwd m3, m3 9418*c0909341SAndroid Build Coastguard Worker psubw m1, m0 9419*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m3 9420*c0909341SAndroid Build Coastguard Worker paddw m0, m1 9421*c0909341SAndroid Build Coastguard Worker movd [dstq+dsq*0], m0 9422*c0909341SAndroid Build Coastguard Worker psrlq m0, 32 9423*c0909341SAndroid Build Coastguard Worker movd [dstq+dsq*1], m0 9424*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+dsq*2] 9425*c0909341SAndroid Build Coastguard Worker add hq, 2 9426*c0909341SAndroid Build Coastguard Worker jl .w2 9427*c0909341SAndroid Build Coastguard Worker RET 9428*c0909341SAndroid Build Coastguard Worker.w4: 9429*c0909341SAndroid Build Coastguard Worker mova m3, [base+blend_shuf] 9430*c0909341SAndroid Build Coastguard Worker.w4_loop: 9431*c0909341SAndroid Build Coastguard Worker movq m0, [dstq+dsq*0] 9432*c0909341SAndroid Build Coastguard Worker movhps m0, [dstq+dsq*1] 9433*c0909341SAndroid Build Coastguard Worker movd m2, [maskq+hq*2] 9434*c0909341SAndroid Build Coastguard Worker mova m1, [tmpq] 9435*c0909341SAndroid Build Coastguard Worker add tmpq, 8*2 9436*c0909341SAndroid Build Coastguard Worker psubw m1, m0 9437*c0909341SAndroid Build Coastguard Worker pshufb m2, m3 9438*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m2 9439*c0909341SAndroid Build Coastguard Worker paddw m0, m1 9440*c0909341SAndroid Build Coastguard Worker movq [dstq+dsq*0], m0 9441*c0909341SAndroid Build Coastguard Worker movhps [dstq+dsq*1], m0 9442*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+dsq*2] 9443*c0909341SAndroid Build Coastguard Worker add hq, 2 9444*c0909341SAndroid Build Coastguard Worker jl .w4_loop 9445*c0909341SAndroid Build Coastguard Worker RET 9446*c0909341SAndroid Build Coastguard Worker.w8: 9447*c0909341SAndroid Build Coastguard Worker movddup m5, [base+blend_shuf+8] 9448*c0909341SAndroid Build Coastguard Worker%if WIN64 9449*c0909341SAndroid Build Coastguard Worker movaps [rsp+ 8], m6 9450*c0909341SAndroid Build Coastguard Worker movaps [rsp+24], m7 9451*c0909341SAndroid Build Coastguard Worker%endif 9452*c0909341SAndroid Build Coastguard Worker.w8_loop: 9453*c0909341SAndroid Build Coastguard Worker movd m7, [maskq+hq*2] 9454*c0909341SAndroid Build Coastguard Worker mova m0, [dstq+dsq*0] 9455*c0909341SAndroid Build Coastguard Worker mova m2, [tmpq+16*0] 9456*c0909341SAndroid Build Coastguard Worker mova m1, [dstq+dsq*1] 9457*c0909341SAndroid Build Coastguard Worker mova m3, [tmpq+16*1] 9458*c0909341SAndroid Build Coastguard Worker add tmpq, 16*2 9459*c0909341SAndroid Build Coastguard Worker pshufb m6, m7, m4 9460*c0909341SAndroid Build Coastguard Worker psubw m2, m0 9461*c0909341SAndroid Build Coastguard Worker pshufb m7, m5 9462*c0909341SAndroid Build Coastguard Worker psubw m3, m1 9463*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m6 9464*c0909341SAndroid Build Coastguard Worker pmulhrsw m3, m7 9465*c0909341SAndroid Build Coastguard Worker paddw m0, m2 9466*c0909341SAndroid Build Coastguard Worker paddw m1, m3 9467*c0909341SAndroid Build Coastguard Worker mova [dstq+dsq*0], m0 9468*c0909341SAndroid Build Coastguard Worker mova [dstq+dsq*1], m1 9469*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+dsq*2] 9470*c0909341SAndroid Build Coastguard Worker add hq, 2 9471*c0909341SAndroid Build Coastguard Worker jl .w8_loop 9472*c0909341SAndroid Build Coastguard Worker%if WIN64 9473*c0909341SAndroid Build Coastguard Worker movaps m6, [rsp+ 8] 9474*c0909341SAndroid Build Coastguard Worker movaps m7, [rsp+24] 9475*c0909341SAndroid Build Coastguard Worker%endif 9476*c0909341SAndroid Build Coastguard Worker RET 9477*c0909341SAndroid Build Coastguard Worker.w16: 9478*c0909341SAndroid Build Coastguard Worker movd m5, [maskq+hq*2] 9479*c0909341SAndroid Build Coastguard Worker pshufb m5, m4 9480*c0909341SAndroid Build Coastguard Worker BLEND_H_ROW 0, 0, 2 9481*c0909341SAndroid Build Coastguard Worker add dstq, dsq 9482*c0909341SAndroid Build Coastguard Worker inc hq 9483*c0909341SAndroid Build Coastguard Worker jl .w16 9484*c0909341SAndroid Build Coastguard Worker RET 9485*c0909341SAndroid Build Coastguard Worker.w32: 9486*c0909341SAndroid Build Coastguard Worker movd m5, [maskq+hq*2] 9487*c0909341SAndroid Build Coastguard Worker pshufb m5, m4 9488*c0909341SAndroid Build Coastguard Worker BLEND_H_ROW 0, 0 9489*c0909341SAndroid Build Coastguard Worker BLEND_H_ROW 2, 2, 4 9490*c0909341SAndroid Build Coastguard Worker add dstq, dsq 9491*c0909341SAndroid Build Coastguard Worker inc hq 9492*c0909341SAndroid Build Coastguard Worker jl .w32 9493*c0909341SAndroid Build Coastguard Worker RET 9494*c0909341SAndroid Build Coastguard Worker.w64: 9495*c0909341SAndroid Build Coastguard Worker movd m5, [maskq+hq*2] 9496*c0909341SAndroid Build Coastguard Worker pshufb m5, m4 9497*c0909341SAndroid Build Coastguard Worker BLEND_H_ROW 0, 0 9498*c0909341SAndroid Build Coastguard Worker BLEND_H_ROW 2, 2 9499*c0909341SAndroid Build Coastguard Worker BLEND_H_ROW 4, 4 9500*c0909341SAndroid Build Coastguard Worker BLEND_H_ROW 6, 6, 8 9501*c0909341SAndroid Build Coastguard Worker add dstq, dsq 9502*c0909341SAndroid Build Coastguard Worker inc hq 9503*c0909341SAndroid Build Coastguard Worker jl .w64 9504*c0909341SAndroid Build Coastguard Worker RET 9505*c0909341SAndroid Build Coastguard Worker.w128: 9506*c0909341SAndroid Build Coastguard Worker movd m5, [maskq+hq*2] 9507*c0909341SAndroid Build Coastguard Worker pshufb m5, m4 9508*c0909341SAndroid Build Coastguard Worker BLEND_H_ROW 0, 0 9509*c0909341SAndroid Build Coastguard Worker BLEND_H_ROW 2, 2 9510*c0909341SAndroid Build Coastguard Worker BLEND_H_ROW 4, 4 9511*c0909341SAndroid Build Coastguard Worker BLEND_H_ROW 6, 6, 16 9512*c0909341SAndroid Build Coastguard Worker BLEND_H_ROW 8, -8 9513*c0909341SAndroid Build Coastguard Worker BLEND_H_ROW 10, -6 9514*c0909341SAndroid Build Coastguard Worker BLEND_H_ROW 12, -4 9515*c0909341SAndroid Build Coastguard Worker BLEND_H_ROW 14, -2 9516*c0909341SAndroid Build Coastguard Worker add dstq, dsq 9517*c0909341SAndroid Build Coastguard Worker inc hq 9518*c0909341SAndroid Build Coastguard Worker jl .w128 9519*c0909341SAndroid Build Coastguard Worker RET 9520*c0909341SAndroid Build Coastguard Worker 9521*c0909341SAndroid Build Coastguard Worker; emu_edge args: 9522*c0909341SAndroid Build Coastguard Worker; const intptr_t bw, const intptr_t bh, const intptr_t iw, const intptr_t ih, 9523*c0909341SAndroid Build Coastguard Worker; const intptr_t x, const intptr_t y, pixel *dst, const ptrdiff_t dst_stride, 9524*c0909341SAndroid Build Coastguard Worker; const pixel *ref, const ptrdiff_t ref_stride 9525*c0909341SAndroid Build Coastguard Worker; 9526*c0909341SAndroid Build Coastguard Worker; bw, bh total filled size 9527*c0909341SAndroid Build Coastguard Worker; iw, ih, copied block -> fill bottom, right 9528*c0909341SAndroid Build Coastguard Worker; x, y, offset in bw/bh -> fill top, left 9529*c0909341SAndroid Build Coastguard Workercglobal emu_edge_16bpc, 10, 13, 1, bw, bh, iw, ih, x, \ 9530*c0909341SAndroid Build Coastguard Worker y, dst, dstride, src, sstride, \ 9531*c0909341SAndroid Build Coastguard Worker bottomext, rightext, blk 9532*c0909341SAndroid Build Coastguard Worker ; we assume that the buffer (stride) is larger than width, so we can 9533*c0909341SAndroid Build Coastguard Worker ; safely overwrite by a few bytes 9534*c0909341SAndroid Build Coastguard Worker 9535*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 9536*c0909341SAndroid Build Coastguard Worker %define reg_zero r12q 9537*c0909341SAndroid Build Coastguard Worker %define reg_tmp r10 9538*c0909341SAndroid Build Coastguard Worker %define reg_src srcq 9539*c0909341SAndroid Build Coastguard Worker %define reg_bottomext bottomextq 9540*c0909341SAndroid Build Coastguard Worker %define reg_rightext rightextq 9541*c0909341SAndroid Build Coastguard Worker %define reg_blkm r9m 9542*c0909341SAndroid Build Coastguard Worker%else 9543*c0909341SAndroid Build Coastguard Worker %define reg_zero r6 9544*c0909341SAndroid Build Coastguard Worker %define reg_tmp r0 9545*c0909341SAndroid Build Coastguard Worker %define reg_src r1 9546*c0909341SAndroid Build Coastguard Worker %define reg_bottomext r0 9547*c0909341SAndroid Build Coastguard Worker %define reg_rightext r1 9548*c0909341SAndroid Build Coastguard Worker %define reg_blkm r2m 9549*c0909341SAndroid Build Coastguard Worker%endif 9550*c0909341SAndroid Build Coastguard Worker ; 9551*c0909341SAndroid Build Coastguard Worker ; ref += iclip(y, 0, ih - 1) * PXSTRIDE(ref_stride) 9552*c0909341SAndroid Build Coastguard Worker xor reg_zero, reg_zero 9553*c0909341SAndroid Build Coastguard Worker lea reg_tmp, [ihq-1] 9554*c0909341SAndroid Build Coastguard Worker cmp yq, ihq 9555*c0909341SAndroid Build Coastguard Worker cmovs reg_tmp, yq 9556*c0909341SAndroid Build Coastguard Worker test yq, yq 9557*c0909341SAndroid Build Coastguard Worker cmovs reg_tmp, reg_zero 9558*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 9559*c0909341SAndroid Build Coastguard Worker imul reg_tmp, sstrideq 9560*c0909341SAndroid Build Coastguard Worker add srcq, reg_tmp 9561*c0909341SAndroid Build Coastguard Worker%else 9562*c0909341SAndroid Build Coastguard Worker imul reg_tmp, sstridem 9563*c0909341SAndroid Build Coastguard Worker mov reg_src, srcm 9564*c0909341SAndroid Build Coastguard Worker add reg_src, reg_tmp 9565*c0909341SAndroid Build Coastguard Worker%endif 9566*c0909341SAndroid Build Coastguard Worker ; 9567*c0909341SAndroid Build Coastguard Worker ; ref += iclip(x, 0, iw - 1) 9568*c0909341SAndroid Build Coastguard Worker lea reg_tmp, [iwq-1] 9569*c0909341SAndroid Build Coastguard Worker cmp xq, iwq 9570*c0909341SAndroid Build Coastguard Worker cmovs reg_tmp, xq 9571*c0909341SAndroid Build Coastguard Worker test xq, xq 9572*c0909341SAndroid Build Coastguard Worker cmovs reg_tmp, reg_zero 9573*c0909341SAndroid Build Coastguard Worker lea reg_src, [reg_src+reg_tmp*2] 9574*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_32 9575*c0909341SAndroid Build Coastguard Worker mov srcm, reg_src 9576*c0909341SAndroid Build Coastguard Worker%endif 9577*c0909341SAndroid Build Coastguard Worker ; 9578*c0909341SAndroid Build Coastguard Worker ; bottom_ext = iclip(y + bh - ih, 0, bh - 1) 9579*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_32 9580*c0909341SAndroid Build Coastguard Worker mov r1, r1m ; restore bh 9581*c0909341SAndroid Build Coastguard Worker%endif 9582*c0909341SAndroid Build Coastguard Worker lea reg_bottomext, [yq+bhq] 9583*c0909341SAndroid Build Coastguard Worker sub reg_bottomext, ihq 9584*c0909341SAndroid Build Coastguard Worker lea r3, [bhq-1] 9585*c0909341SAndroid Build Coastguard Worker cmovs reg_bottomext, reg_zero 9586*c0909341SAndroid Build Coastguard Worker ; 9587*c0909341SAndroid Build Coastguard Worker 9588*c0909341SAndroid Build Coastguard Worker DEFINE_ARGS bw, bh, iw, ih, x, \ 9589*c0909341SAndroid Build Coastguard Worker topext, dst, dstride, src, sstride, \ 9590*c0909341SAndroid Build Coastguard Worker bottomext, rightext, blk 9591*c0909341SAndroid Build Coastguard Worker 9592*c0909341SAndroid Build Coastguard Worker ; top_ext = iclip(-y, 0, bh - 1) 9593*c0909341SAndroid Build Coastguard Worker neg topextq 9594*c0909341SAndroid Build Coastguard Worker cmovs topextq, reg_zero 9595*c0909341SAndroid Build Coastguard Worker cmp reg_bottomext, bhq 9596*c0909341SAndroid Build Coastguard Worker cmovns reg_bottomext, r3 9597*c0909341SAndroid Build Coastguard Worker cmp topextq, bhq 9598*c0909341SAndroid Build Coastguard Worker cmovg topextq, r3 9599*c0909341SAndroid Build Coastguard Worker %if ARCH_X86_32 9600*c0909341SAndroid Build Coastguard Worker mov r4m, reg_bottomext 9601*c0909341SAndroid Build Coastguard Worker ; 9602*c0909341SAndroid Build Coastguard Worker ; right_ext = iclip(x + bw - iw, 0, bw - 1) 9603*c0909341SAndroid Build Coastguard Worker mov r0, r0m ; restore bw 9604*c0909341SAndroid Build Coastguard Worker %endif 9605*c0909341SAndroid Build Coastguard Worker lea reg_rightext, [xq+bwq] 9606*c0909341SAndroid Build Coastguard Worker sub reg_rightext, iwq 9607*c0909341SAndroid Build Coastguard Worker lea r2, [bwq-1] 9608*c0909341SAndroid Build Coastguard Worker cmovs reg_rightext, reg_zero 9609*c0909341SAndroid Build Coastguard Worker 9610*c0909341SAndroid Build Coastguard Worker DEFINE_ARGS bw, bh, iw, ih, leftext, \ 9611*c0909341SAndroid Build Coastguard Worker topext, dst, dstride, src, sstride, \ 9612*c0909341SAndroid Build Coastguard Worker bottomext, rightext, blk 9613*c0909341SAndroid Build Coastguard Worker 9614*c0909341SAndroid Build Coastguard Worker ; left_ext = iclip(-x, 0, bw - 1) 9615*c0909341SAndroid Build Coastguard Worker neg leftextq 9616*c0909341SAndroid Build Coastguard Worker cmovs leftextq, reg_zero 9617*c0909341SAndroid Build Coastguard Worker cmp reg_rightext, bwq 9618*c0909341SAndroid Build Coastguard Worker cmovns reg_rightext, r2 9619*c0909341SAndroid Build Coastguard Worker %if ARCH_X86_32 9620*c0909341SAndroid Build Coastguard Worker mov r3m, r1 9621*c0909341SAndroid Build Coastguard Worker %endif 9622*c0909341SAndroid Build Coastguard Worker cmp leftextq, bwq 9623*c0909341SAndroid Build Coastguard Worker cmovns leftextq, r2 9624*c0909341SAndroid Build Coastguard Worker 9625*c0909341SAndroid Build Coastguard Worker%undef reg_zero 9626*c0909341SAndroid Build Coastguard Worker%undef reg_tmp 9627*c0909341SAndroid Build Coastguard Worker%undef reg_src 9628*c0909341SAndroid Build Coastguard Worker%undef reg_bottomext 9629*c0909341SAndroid Build Coastguard Worker%undef reg_rightext 9630*c0909341SAndroid Build Coastguard Worker 9631*c0909341SAndroid Build Coastguard Worker DEFINE_ARGS bw, centerh, centerw, dummy, leftext, \ 9632*c0909341SAndroid Build Coastguard Worker topext, dst, dstride, src, sstride, \ 9633*c0909341SAndroid Build Coastguard Worker bottomext, rightext, blk 9634*c0909341SAndroid Build Coastguard Worker 9635*c0909341SAndroid Build Coastguard Worker ; center_h = bh - top_ext - bottom_ext 9636*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 9637*c0909341SAndroid Build Coastguard Worker lea r3, [bottomextq+topextq] 9638*c0909341SAndroid Build Coastguard Worker sub centerhq, r3 9639*c0909341SAndroid Build Coastguard Worker%else 9640*c0909341SAndroid Build Coastguard Worker mov r1, centerhm ; restore r1 9641*c0909341SAndroid Build Coastguard Worker sub centerhq, topextq 9642*c0909341SAndroid Build Coastguard Worker sub centerhq, r4m 9643*c0909341SAndroid Build Coastguard Worker mov r1m, centerhq 9644*c0909341SAndroid Build Coastguard Worker%endif 9645*c0909341SAndroid Build Coastguard Worker ; 9646*c0909341SAndroid Build Coastguard Worker ; blk += top_ext * PXSTRIDE(dst_stride) 9647*c0909341SAndroid Build Coastguard Worker mov r2, topextq 9648*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 9649*c0909341SAndroid Build Coastguard Worker imul r2, dstrideq 9650*c0909341SAndroid Build Coastguard Worker%else 9651*c0909341SAndroid Build Coastguard Worker mov r6, r6m ; restore dstq 9652*c0909341SAndroid Build Coastguard Worker imul r2, dstridem 9653*c0909341SAndroid Build Coastguard Worker%endif 9654*c0909341SAndroid Build Coastguard Worker add dstq, r2 9655*c0909341SAndroid Build Coastguard Worker mov reg_blkm, dstq ; save pointer for ext 9656*c0909341SAndroid Build Coastguard Worker ; 9657*c0909341SAndroid Build Coastguard Worker ; center_w = bw - left_ext - right_ext 9658*c0909341SAndroid Build Coastguard Worker mov centerwq, bwq 9659*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 9660*c0909341SAndroid Build Coastguard Worker lea r3, [rightextq+leftextq] 9661*c0909341SAndroid Build Coastguard Worker sub centerwq, r3 9662*c0909341SAndroid Build Coastguard Worker%else 9663*c0909341SAndroid Build Coastguard Worker sub centerwq, r3m 9664*c0909341SAndroid Build Coastguard Worker sub centerwq, leftextq 9665*c0909341SAndroid Build Coastguard Worker%endif 9666*c0909341SAndroid Build Coastguard Worker 9667*c0909341SAndroid Build Coastguard Worker; vloop Macro 9668*c0909341SAndroid Build Coastguard Worker%macro v_loop 3 ; need_left_ext, need_right_ext, suffix 9669*c0909341SAndroid Build Coastguard Worker %if ARCH_X86_64 9670*c0909341SAndroid Build Coastguard Worker %define reg_tmp r12 9671*c0909341SAndroid Build Coastguard Worker %else 9672*c0909341SAndroid Build Coastguard Worker %define reg_tmp r0 9673*c0909341SAndroid Build Coastguard Worker %endif 9674*c0909341SAndroid Build Coastguard Worker.v_loop_%3: 9675*c0909341SAndroid Build Coastguard Worker %if ARCH_X86_32 9676*c0909341SAndroid Build Coastguard Worker mov r0, r0m 9677*c0909341SAndroid Build Coastguard Worker mov r1, r1m 9678*c0909341SAndroid Build Coastguard Worker %endif 9679*c0909341SAndroid Build Coastguard Worker%if %1 9680*c0909341SAndroid Build Coastguard Worker ; left extension 9681*c0909341SAndroid Build Coastguard Worker %if ARCH_X86_64 9682*c0909341SAndroid Build Coastguard Worker movd m0, [srcq] 9683*c0909341SAndroid Build Coastguard Worker %else 9684*c0909341SAndroid Build Coastguard Worker mov r3, srcm 9685*c0909341SAndroid Build Coastguard Worker movd m0, [r3] 9686*c0909341SAndroid Build Coastguard Worker %endif 9687*c0909341SAndroid Build Coastguard Worker pshuflw m0, m0, q0000 9688*c0909341SAndroid Build Coastguard Worker punpcklqdq m0, m0 9689*c0909341SAndroid Build Coastguard Worker xor r3, r3 9690*c0909341SAndroid Build Coastguard Worker.left_loop_%3: 9691*c0909341SAndroid Build Coastguard Worker mova [dstq+r3*2], m0 9692*c0909341SAndroid Build Coastguard Worker add r3, mmsize/2 9693*c0909341SAndroid Build Coastguard Worker cmp r3, leftextq 9694*c0909341SAndroid Build Coastguard Worker jl .left_loop_%3 9695*c0909341SAndroid Build Coastguard Worker ; body 9696*c0909341SAndroid Build Coastguard Worker lea reg_tmp, [dstq+leftextq*2] 9697*c0909341SAndroid Build Coastguard Worker%endif 9698*c0909341SAndroid Build Coastguard Worker xor r3, r3 9699*c0909341SAndroid Build Coastguard Worker.body_loop_%3: 9700*c0909341SAndroid Build Coastguard Worker %if ARCH_X86_64 9701*c0909341SAndroid Build Coastguard Worker movu m0, [srcq+r3*2] 9702*c0909341SAndroid Build Coastguard Worker %else 9703*c0909341SAndroid Build Coastguard Worker mov r1, srcm 9704*c0909341SAndroid Build Coastguard Worker movu m0, [r1+r3*2] 9705*c0909341SAndroid Build Coastguard Worker %endif 9706*c0909341SAndroid Build Coastguard Worker%if %1 9707*c0909341SAndroid Build Coastguard Worker movu [reg_tmp+r3*2], m0 9708*c0909341SAndroid Build Coastguard Worker%else 9709*c0909341SAndroid Build Coastguard Worker movu [dstq+r3*2], m0 9710*c0909341SAndroid Build Coastguard Worker%endif 9711*c0909341SAndroid Build Coastguard Worker add r3, mmsize/2 9712*c0909341SAndroid Build Coastguard Worker cmp r3, centerwq 9713*c0909341SAndroid Build Coastguard Worker jl .body_loop_%3 9714*c0909341SAndroid Build Coastguard Worker%if %2 9715*c0909341SAndroid Build Coastguard Worker ; right extension 9716*c0909341SAndroid Build Coastguard Worker%if %1 9717*c0909341SAndroid Build Coastguard Worker lea reg_tmp, [reg_tmp+centerwq*2] 9718*c0909341SAndroid Build Coastguard Worker%else 9719*c0909341SAndroid Build Coastguard Worker lea reg_tmp, [dstq+centerwq*2] 9720*c0909341SAndroid Build Coastguard Worker%endif 9721*c0909341SAndroid Build Coastguard Worker %if ARCH_X86_64 9722*c0909341SAndroid Build Coastguard Worker movd m0, [srcq+centerwq*2-2] 9723*c0909341SAndroid Build Coastguard Worker %else 9724*c0909341SAndroid Build Coastguard Worker mov r3, srcm 9725*c0909341SAndroid Build Coastguard Worker movd m0, [r3+centerwq*2-2] 9726*c0909341SAndroid Build Coastguard Worker %endif 9727*c0909341SAndroid Build Coastguard Worker pshuflw m0, m0, q0000 9728*c0909341SAndroid Build Coastguard Worker punpcklqdq m0, m0 9729*c0909341SAndroid Build Coastguard Worker xor r3, r3 9730*c0909341SAndroid Build Coastguard Worker.right_loop_%3: 9731*c0909341SAndroid Build Coastguard Worker movu [reg_tmp+r3*2], m0 9732*c0909341SAndroid Build Coastguard Worker add r3, mmsize/2 9733*c0909341SAndroid Build Coastguard Worker %if ARCH_X86_64 9734*c0909341SAndroid Build Coastguard Worker cmp r3, rightextq 9735*c0909341SAndroid Build Coastguard Worker %else 9736*c0909341SAndroid Build Coastguard Worker cmp r3, r3m 9737*c0909341SAndroid Build Coastguard Worker %endif 9738*c0909341SAndroid Build Coastguard Worker jl .right_loop_%3 9739*c0909341SAndroid Build Coastguard Worker%endif 9740*c0909341SAndroid Build Coastguard Worker %if ARCH_X86_64 9741*c0909341SAndroid Build Coastguard Worker add dstq, dstrideq 9742*c0909341SAndroid Build Coastguard Worker add srcq, sstrideq 9743*c0909341SAndroid Build Coastguard Worker dec centerhq 9744*c0909341SAndroid Build Coastguard Worker jg .v_loop_%3 9745*c0909341SAndroid Build Coastguard Worker %else 9746*c0909341SAndroid Build Coastguard Worker add dstq, dstridem 9747*c0909341SAndroid Build Coastguard Worker mov r0, sstridem 9748*c0909341SAndroid Build Coastguard Worker add srcm, r0 9749*c0909341SAndroid Build Coastguard Worker sub dword centerhm, 1 9750*c0909341SAndroid Build Coastguard Worker jg .v_loop_%3 9751*c0909341SAndroid Build Coastguard Worker mov r0, r0m ; restore r0 9752*c0909341SAndroid Build Coastguard Worker %endif 9753*c0909341SAndroid Build Coastguard Worker%endmacro ; vloop MACRO 9754*c0909341SAndroid Build Coastguard Worker 9755*c0909341SAndroid Build Coastguard Worker test leftextq, leftextq 9756*c0909341SAndroid Build Coastguard Worker jnz .need_left_ext 9757*c0909341SAndroid Build Coastguard Worker %if ARCH_X86_64 9758*c0909341SAndroid Build Coastguard Worker test rightextq, rightextq 9759*c0909341SAndroid Build Coastguard Worker jnz .need_right_ext 9760*c0909341SAndroid Build Coastguard Worker %else 9761*c0909341SAndroid Build Coastguard Worker cmp leftextq, r3m ; leftextq == 0 9762*c0909341SAndroid Build Coastguard Worker jne .need_right_ext 9763*c0909341SAndroid Build Coastguard Worker %endif 9764*c0909341SAndroid Build Coastguard Worker v_loop 0, 0, 0 9765*c0909341SAndroid Build Coastguard Worker jmp .body_done 9766*c0909341SAndroid Build Coastguard Worker 9767*c0909341SAndroid Build Coastguard Worker ;left right extensions 9768*c0909341SAndroid Build Coastguard Worker.need_left_ext: 9769*c0909341SAndroid Build Coastguard Worker %if ARCH_X86_64 9770*c0909341SAndroid Build Coastguard Worker test rightextq, rightextq 9771*c0909341SAndroid Build Coastguard Worker %else 9772*c0909341SAndroid Build Coastguard Worker mov r3, r3m 9773*c0909341SAndroid Build Coastguard Worker test r3, r3 9774*c0909341SAndroid Build Coastguard Worker %endif 9775*c0909341SAndroid Build Coastguard Worker jnz .need_left_right_ext 9776*c0909341SAndroid Build Coastguard Worker v_loop 1, 0, 1 9777*c0909341SAndroid Build Coastguard Worker jmp .body_done 9778*c0909341SAndroid Build Coastguard Worker 9779*c0909341SAndroid Build Coastguard Worker.need_left_right_ext: 9780*c0909341SAndroid Build Coastguard Worker v_loop 1, 1, 2 9781*c0909341SAndroid Build Coastguard Worker jmp .body_done 9782*c0909341SAndroid Build Coastguard Worker 9783*c0909341SAndroid Build Coastguard Worker.need_right_ext: 9784*c0909341SAndroid Build Coastguard Worker v_loop 0, 1, 3 9785*c0909341SAndroid Build Coastguard Worker 9786*c0909341SAndroid Build Coastguard Worker.body_done: 9787*c0909341SAndroid Build Coastguard Worker; r0 ; bw 9788*c0909341SAndroid Build Coastguard Worker; r1 ;; x loop 9789*c0909341SAndroid Build Coastguard Worker; r4 ;; y loop 9790*c0909341SAndroid Build Coastguard Worker; r5 ; topextq 9791*c0909341SAndroid Build Coastguard Worker; r6 ;dstq 9792*c0909341SAndroid Build Coastguard Worker; r7 ;dstrideq 9793*c0909341SAndroid Build Coastguard Worker; r8 ; srcq 9794*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 9795*c0909341SAndroid Build Coastguard Worker %define reg_dstride dstrideq 9796*c0909341SAndroid Build Coastguard Worker%else 9797*c0909341SAndroid Build Coastguard Worker %define reg_dstride r2 9798*c0909341SAndroid Build Coastguard Worker%endif 9799*c0909341SAndroid Build Coastguard Worker ; 9800*c0909341SAndroid Build Coastguard Worker ; bottom edge extension 9801*c0909341SAndroid Build Coastguard Worker %if ARCH_X86_64 9802*c0909341SAndroid Build Coastguard Worker test bottomextq, bottomextq 9803*c0909341SAndroid Build Coastguard Worker jz .top 9804*c0909341SAndroid Build Coastguard Worker %else 9805*c0909341SAndroid Build Coastguard Worker xor r1, r1 9806*c0909341SAndroid Build Coastguard Worker cmp r1, r4m 9807*c0909341SAndroid Build Coastguard Worker je .top 9808*c0909341SAndroid Build Coastguard Worker %endif 9809*c0909341SAndroid Build Coastguard Worker ; 9810*c0909341SAndroid Build Coastguard Worker %if ARCH_X86_64 9811*c0909341SAndroid Build Coastguard Worker mov srcq, dstq 9812*c0909341SAndroid Build Coastguard Worker sub srcq, dstrideq 9813*c0909341SAndroid Build Coastguard Worker xor r1, r1 9814*c0909341SAndroid Build Coastguard Worker %else 9815*c0909341SAndroid Build Coastguard Worker mov r3, dstq 9816*c0909341SAndroid Build Coastguard Worker mov reg_dstride, dstridem 9817*c0909341SAndroid Build Coastguard Worker sub r3, reg_dstride 9818*c0909341SAndroid Build Coastguard Worker mov srcm, r3 9819*c0909341SAndroid Build Coastguard Worker %endif 9820*c0909341SAndroid Build Coastguard Worker ; 9821*c0909341SAndroid Build Coastguard Worker.bottom_x_loop: 9822*c0909341SAndroid Build Coastguard Worker %if ARCH_X86_64 9823*c0909341SAndroid Build Coastguard Worker mova m0, [srcq+r1*2] 9824*c0909341SAndroid Build Coastguard Worker lea r3, [dstq+r1*2] 9825*c0909341SAndroid Build Coastguard Worker mov r4, bottomextq 9826*c0909341SAndroid Build Coastguard Worker %else 9827*c0909341SAndroid Build Coastguard Worker mov r3, srcm 9828*c0909341SAndroid Build Coastguard Worker mova m0, [r3+r1*2] 9829*c0909341SAndroid Build Coastguard Worker lea r3, [dstq+r1*2] 9830*c0909341SAndroid Build Coastguard Worker mov r4, r4m 9831*c0909341SAndroid Build Coastguard Worker %endif 9832*c0909341SAndroid Build Coastguard Worker ; 9833*c0909341SAndroid Build Coastguard Worker.bottom_y_loop: 9834*c0909341SAndroid Build Coastguard Worker mova [r3], m0 9835*c0909341SAndroid Build Coastguard Worker add r3, reg_dstride 9836*c0909341SAndroid Build Coastguard Worker dec r4 9837*c0909341SAndroid Build Coastguard Worker jg .bottom_y_loop 9838*c0909341SAndroid Build Coastguard Worker add r1, mmsize/2 9839*c0909341SAndroid Build Coastguard Worker cmp r1, bwq 9840*c0909341SAndroid Build Coastguard Worker jl .bottom_x_loop 9841*c0909341SAndroid Build Coastguard Worker 9842*c0909341SAndroid Build Coastguard Worker.top: 9843*c0909341SAndroid Build Coastguard Worker ; top edge extension 9844*c0909341SAndroid Build Coastguard Worker test topextq, topextq 9845*c0909341SAndroid Build Coastguard Worker jz .end 9846*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 9847*c0909341SAndroid Build Coastguard Worker mov srcq, reg_blkm 9848*c0909341SAndroid Build Coastguard Worker%else 9849*c0909341SAndroid Build Coastguard Worker mov r3, reg_blkm 9850*c0909341SAndroid Build Coastguard Worker mov reg_dstride, dstridem 9851*c0909341SAndroid Build Coastguard Worker%endif 9852*c0909341SAndroid Build Coastguard Worker mov dstq, dstm 9853*c0909341SAndroid Build Coastguard Worker xor r1, r1 9854*c0909341SAndroid Build Coastguard Worker ; 9855*c0909341SAndroid Build Coastguard Worker.top_x_loop: 9856*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 9857*c0909341SAndroid Build Coastguard Worker mova m0, [srcq+r1*2] 9858*c0909341SAndroid Build Coastguard Worker%else 9859*c0909341SAndroid Build Coastguard Worker mov r3, reg_blkm 9860*c0909341SAndroid Build Coastguard Worker mova m0, [r3+r1*2] 9861*c0909341SAndroid Build Coastguard Worker%endif 9862*c0909341SAndroid Build Coastguard Worker lea r3, [dstq+r1*2] 9863*c0909341SAndroid Build Coastguard Worker mov r4, topextq 9864*c0909341SAndroid Build Coastguard Worker ; 9865*c0909341SAndroid Build Coastguard Worker.top_y_loop: 9866*c0909341SAndroid Build Coastguard Worker mova [r3], m0 9867*c0909341SAndroid Build Coastguard Worker add r3, reg_dstride 9868*c0909341SAndroid Build Coastguard Worker dec r4 9869*c0909341SAndroid Build Coastguard Worker jg .top_y_loop 9870*c0909341SAndroid Build Coastguard Worker add r1, mmsize/2 9871*c0909341SAndroid Build Coastguard Worker cmp r1, bwq 9872*c0909341SAndroid Build Coastguard Worker jl .top_x_loop 9873*c0909341SAndroid Build Coastguard Worker 9874*c0909341SAndroid Build Coastguard Worker.end: 9875*c0909341SAndroid Build Coastguard Worker RET 9876*c0909341SAndroid Build Coastguard Worker 9877*c0909341SAndroid Build Coastguard Worker%undef reg_dstride 9878*c0909341SAndroid Build Coastguard Worker%undef reg_blkm 9879*c0909341SAndroid Build Coastguard Worker%undef reg_tmp 9880*c0909341SAndroid Build Coastguard Worker 9881*c0909341SAndroid Build Coastguard Worker%macro SCRATCH 3 9882*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_32 9883*c0909341SAndroid Build Coastguard Worker mova [rsp+%3*mmsize], m%1 9884*c0909341SAndroid Build Coastguard Worker%define m%2 [rsp+%3*mmsize] 9885*c0909341SAndroid Build Coastguard Worker%else 9886*c0909341SAndroid Build Coastguard Worker SWAP %1, %2 9887*c0909341SAndroid Build Coastguard Worker%endif 9888*c0909341SAndroid Build Coastguard Worker%endmacro 9889*c0909341SAndroid Build Coastguard Worker 9890*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 9891*c0909341SAndroid Build Coastguard Workercglobal resize_16bpc, 0, 12, 16, 1*16, dst, dst_stride, src, src_stride, \ 9892*c0909341SAndroid Build Coastguard Worker dst_w, h, src_w, dx, mx0, pxmax 9893*c0909341SAndroid Build Coastguard Worker%elif STACK_ALIGNMENT >= 16 9894*c0909341SAndroid Build Coastguard Workercglobal resize_16bpc, 0, 7, 8, 6*16, dst, dst_stride, src, src_stride, \ 9895*c0909341SAndroid Build Coastguard Worker dst_w, h, src_w, dx, mx0, pxmax 9896*c0909341SAndroid Build Coastguard Worker%else 9897*c0909341SAndroid Build Coastguard Workercglobal resize_16bpc, 0, 6, 8, 6*16, dst, dst_stride, src, src_stride, \ 9898*c0909341SAndroid Build Coastguard Worker dst_w, h, src_w, dx, mx0, pxmax 9899*c0909341SAndroid Build Coastguard Worker%endif 9900*c0909341SAndroid Build Coastguard Worker movifnidn dstq, dstmp 9901*c0909341SAndroid Build Coastguard Worker movifnidn srcq, srcmp 9902*c0909341SAndroid Build Coastguard Worker%if STACK_ALIGNMENT >= 16 9903*c0909341SAndroid Build Coastguard Worker movifnidn dst_wd, dst_wm 9904*c0909341SAndroid Build Coastguard Worker%endif 9905*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 9906*c0909341SAndroid Build Coastguard Worker movifnidn hd, hm 9907*c0909341SAndroid Build Coastguard Worker%endif 9908*c0909341SAndroid Build Coastguard Worker sub dword mx0m, 4<<14 9909*c0909341SAndroid Build Coastguard Worker sub dword src_wm, 8 9910*c0909341SAndroid Build Coastguard Worker movd m4, pxmaxm 9911*c0909341SAndroid Build Coastguard Worker movd m7, dxm 9912*c0909341SAndroid Build Coastguard Worker movd m6, mx0m 9913*c0909341SAndroid Build Coastguard Worker movd m5, src_wm 9914*c0909341SAndroid Build Coastguard Worker punpcklwd m4, m4 9915*c0909341SAndroid Build Coastguard Worker pshufd m4, m4, q0000 9916*c0909341SAndroid Build Coastguard Worker pshufd m7, m7, q0000 9917*c0909341SAndroid Build Coastguard Worker pshufd m6, m6, q0000 9918*c0909341SAndroid Build Coastguard Worker pshufd m5, m5, q0000 9919*c0909341SAndroid Build Coastguard Worker mova [rsp+16*3*ARCH_X86_32], m4 9920*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 9921*c0909341SAndroid Build Coastguard Worker DEFINE_ARGS dst, dst_stride, src, src_stride, dst_w, h, x 9922*c0909341SAndroid Build Coastguard Worker LEA r7, $$ 9923*c0909341SAndroid Build Coastguard Worker %define base r7-$$ 9924*c0909341SAndroid Build Coastguard Worker%else 9925*c0909341SAndroid Build Coastguard Worker DEFINE_ARGS dst, dst_stride, src, src_stride, dst_w, x 9926*c0909341SAndroid Build Coastguard Worker %define hd dword r5m 9927*c0909341SAndroid Build Coastguard Worker %if STACK_ALIGNMENT >= 16 9928*c0909341SAndroid Build Coastguard Worker LEA r6, $$ 9929*c0909341SAndroid Build Coastguard Worker %define base r6-$$ 9930*c0909341SAndroid Build Coastguard Worker %else 9931*c0909341SAndroid Build Coastguard Worker LEA r4, $$ 9932*c0909341SAndroid Build Coastguard Worker %define base r4-$$ 9933*c0909341SAndroid Build Coastguard Worker %endif 9934*c0909341SAndroid Build Coastguard Worker%endif 9935*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 9936*c0909341SAndroid Build Coastguard Worker mova m12, [base+pd_64] 9937*c0909341SAndroid Build Coastguard Worker mova m11, [base+pd_63] 9938*c0909341SAndroid Build Coastguard Worker%else 9939*c0909341SAndroid Build Coastguard Worker %define m12 [base+pd_64] 9940*c0909341SAndroid Build Coastguard Worker %define m11 [base+pd_63] 9941*c0909341SAndroid Build Coastguard Worker%endif 9942*c0909341SAndroid Build Coastguard Worker pmaddwd m4, m7, [base+rescale_mul] ; dx*[0,1,2,3] 9943*c0909341SAndroid Build Coastguard Worker pslld m7, 2 ; dx*4 9944*c0909341SAndroid Build Coastguard Worker pslld m5, 14 9945*c0909341SAndroid Build Coastguard Worker paddd m6, m4 ; mx+[0..3]*dx 9946*c0909341SAndroid Build Coastguard Worker SCRATCH 7, 15, 0 9947*c0909341SAndroid Build Coastguard Worker SCRATCH 6, 14, 1 9948*c0909341SAndroid Build Coastguard Worker SCRATCH 5, 13, 2 9949*c0909341SAndroid Build Coastguard Worker pxor m1, m1 9950*c0909341SAndroid Build Coastguard Worker.loop_y: 9951*c0909341SAndroid Build Coastguard Worker xor xd, xd 9952*c0909341SAndroid Build Coastguard Worker mova m0, m14 ; per-line working version of mx 9953*c0909341SAndroid Build Coastguard Worker.loop_x: 9954*c0909341SAndroid Build Coastguard Worker pcmpgtd m1, m0 9955*c0909341SAndroid Build Coastguard Worker pandn m1, m0 9956*c0909341SAndroid Build Coastguard Worker psrad m2, m0, 8 ; filter offset (unmasked) 9957*c0909341SAndroid Build Coastguard Worker pcmpgtd m3, m13, m1 9958*c0909341SAndroid Build Coastguard Worker pand m1, m3 9959*c0909341SAndroid Build Coastguard Worker pandn m3, m13 9960*c0909341SAndroid Build Coastguard Worker por m1, m3 9961*c0909341SAndroid Build Coastguard Worker psubd m3, m0, m1 ; pshufb offset 9962*c0909341SAndroid Build Coastguard Worker psrad m1, 14 ; clipped src_x offset 9963*c0909341SAndroid Build Coastguard Worker psrad m3, 14 ; pshufb edge_emu offset 9964*c0909341SAndroid Build Coastguard Worker pand m2, m11 ; filter offset (masked) 9965*c0909341SAndroid Build Coastguard Worker ; load source pixels 9966*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 9967*c0909341SAndroid Build Coastguard Worker movd r8d, m1 9968*c0909341SAndroid Build Coastguard Worker pshuflw m1, m1, q3232 9969*c0909341SAndroid Build Coastguard Worker movd r9d, m1 9970*c0909341SAndroid Build Coastguard Worker punpckhqdq m1, m1 9971*c0909341SAndroid Build Coastguard Worker movd r10d, m1 9972*c0909341SAndroid Build Coastguard Worker psrlq m1, 32 9973*c0909341SAndroid Build Coastguard Worker movd r11d, m1 9974*c0909341SAndroid Build Coastguard Worker movu m4, [srcq+r8*2] 9975*c0909341SAndroid Build Coastguard Worker movu m5, [srcq+r9*2] 9976*c0909341SAndroid Build Coastguard Worker movu m6, [srcq+r10*2] 9977*c0909341SAndroid Build Coastguard Worker movu m7, [srcq+r11*2] 9978*c0909341SAndroid Build Coastguard Worker ; if no emulation is required, we don't need to shuffle or emulate edges 9979*c0909341SAndroid Build Coastguard Worker packssdw m3, m3 9980*c0909341SAndroid Build Coastguard Worker movq r11, m3 9981*c0909341SAndroid Build Coastguard Worker test r11, r11 9982*c0909341SAndroid Build Coastguard Worker jz .filter 9983*c0909341SAndroid Build Coastguard Worker movsx r8, r11w 9984*c0909341SAndroid Build Coastguard Worker sar r11, 16 9985*c0909341SAndroid Build Coastguard Worker movsx r9, r11w 9986*c0909341SAndroid Build Coastguard Worker sar r11, 16 9987*c0909341SAndroid Build Coastguard Worker movsx r10, r11w 9988*c0909341SAndroid Build Coastguard Worker sar r11, 16 9989*c0909341SAndroid Build Coastguard Worker movu m1, [base+resize_shuf+8+r8*2] 9990*c0909341SAndroid Build Coastguard Worker movu m3, [base+resize_shuf+8+r9*2] 9991*c0909341SAndroid Build Coastguard Worker movu m8, [base+resize_shuf+8+r10*2] 9992*c0909341SAndroid Build Coastguard Worker movu m9, [base+resize_shuf+8+r11*2] 9993*c0909341SAndroid Build Coastguard Worker pshufb m4, m1 9994*c0909341SAndroid Build Coastguard Worker pshufb m5, m3 9995*c0909341SAndroid Build Coastguard Worker pshufb m6, m8 9996*c0909341SAndroid Build Coastguard Worker pshufb m7, m9 9997*c0909341SAndroid Build Coastguard Worker.filter: 9998*c0909341SAndroid Build Coastguard Worker movd r8d, m2 9999*c0909341SAndroid Build Coastguard Worker pshuflw m2, m2, q3232 10000*c0909341SAndroid Build Coastguard Worker movd r9d, m2 10001*c0909341SAndroid Build Coastguard Worker punpckhqdq m2, m2 10002*c0909341SAndroid Build Coastguard Worker movd r10d, m2 10003*c0909341SAndroid Build Coastguard Worker psrlq m2, 32 10004*c0909341SAndroid Build Coastguard Worker movd r11d, m2 10005*c0909341SAndroid Build Coastguard Worker movq m8, [base+resize_filter+r8*8] 10006*c0909341SAndroid Build Coastguard Worker movq m2, [base+resize_filter+r9*8] 10007*c0909341SAndroid Build Coastguard Worker pxor m9, m9 10008*c0909341SAndroid Build Coastguard Worker punpcklbw m1, m9, m8 10009*c0909341SAndroid Build Coastguard Worker punpcklbw m3, m9, m2 10010*c0909341SAndroid Build Coastguard Worker psraw m1, 8 10011*c0909341SAndroid Build Coastguard Worker psraw m3, 8 10012*c0909341SAndroid Build Coastguard Worker movq m10, [base+resize_filter+r10*8] 10013*c0909341SAndroid Build Coastguard Worker movq m2, [base+resize_filter+r11*8] 10014*c0909341SAndroid Build Coastguard Worker punpcklbw m8, m9, m10 10015*c0909341SAndroid Build Coastguard Worker punpcklbw m9, m2 10016*c0909341SAndroid Build Coastguard Worker psraw m8, 8 10017*c0909341SAndroid Build Coastguard Worker psraw m9, 8 10018*c0909341SAndroid Build Coastguard Worker pmaddwd m4, m1 10019*c0909341SAndroid Build Coastguard Worker pmaddwd m5, m3 10020*c0909341SAndroid Build Coastguard Worker pmaddwd m6, m8 10021*c0909341SAndroid Build Coastguard Worker pmaddwd m7, m9 10022*c0909341SAndroid Build Coastguard Worker phaddd m4, m5 10023*c0909341SAndroid Build Coastguard Worker%else 10024*c0909341SAndroid Build Coastguard Worker movd r3, m1 10025*c0909341SAndroid Build Coastguard Worker pshuflw m1, m1, q3232 10026*c0909341SAndroid Build Coastguard Worker movd r1, m1 10027*c0909341SAndroid Build Coastguard Worker punpckhqdq m1, m1 10028*c0909341SAndroid Build Coastguard Worker movu m4, [srcq+r3*2] 10029*c0909341SAndroid Build Coastguard Worker movu m5, [srcq+r1*2] 10030*c0909341SAndroid Build Coastguard Worker movd r3, m1 10031*c0909341SAndroid Build Coastguard Worker psrlq m1, 32 10032*c0909341SAndroid Build Coastguard Worker movd r1, m1 10033*c0909341SAndroid Build Coastguard Worker movu m6, [srcq+r3*2] 10034*c0909341SAndroid Build Coastguard Worker movu m7, [srcq+r1*2] 10035*c0909341SAndroid Build Coastguard Worker ; if no emulation is required, we don't need to shuffle or emulate edges 10036*c0909341SAndroid Build Coastguard Worker pxor m1, m1 10037*c0909341SAndroid Build Coastguard Worker pcmpeqb m1, m3 10038*c0909341SAndroid Build Coastguard Worker pmovmskb r3d, m1 10039*c0909341SAndroid Build Coastguard Worker cmp r3d, 0xffff 10040*c0909341SAndroid Build Coastguard Worker je .filter 10041*c0909341SAndroid Build Coastguard Worker movd r3, m3 10042*c0909341SAndroid Build Coastguard Worker movu m1, [base+resize_shuf+8+r3*2] 10043*c0909341SAndroid Build Coastguard Worker pshuflw m3, m3, q3232 10044*c0909341SAndroid Build Coastguard Worker movd r1, m3 10045*c0909341SAndroid Build Coastguard Worker pshufb m4, m1 10046*c0909341SAndroid Build Coastguard Worker movu m1, [base+resize_shuf+8+r1*2] 10047*c0909341SAndroid Build Coastguard Worker punpckhqdq m3, m3 10048*c0909341SAndroid Build Coastguard Worker movd r3, m3 10049*c0909341SAndroid Build Coastguard Worker pshufb m5, m1 10050*c0909341SAndroid Build Coastguard Worker movu m1, [base+resize_shuf+8+r3*2] 10051*c0909341SAndroid Build Coastguard Worker psrlq m3, 32 10052*c0909341SAndroid Build Coastguard Worker movd r1, m3 10053*c0909341SAndroid Build Coastguard Worker pshufb m6, m1 10054*c0909341SAndroid Build Coastguard Worker movu m1, [base+resize_shuf+8+r1*2] 10055*c0909341SAndroid Build Coastguard Worker pshufb m7, m1 10056*c0909341SAndroid Build Coastguard Worker.filter: 10057*c0909341SAndroid Build Coastguard Worker mova [esp+4*16], m6 10058*c0909341SAndroid Build Coastguard Worker mova [esp+5*16], m7 10059*c0909341SAndroid Build Coastguard Worker movd r3, m2 10060*c0909341SAndroid Build Coastguard Worker pshuflw m2, m2, q3232 10061*c0909341SAndroid Build Coastguard Worker movd r1, m2 10062*c0909341SAndroid Build Coastguard Worker movq m6, [base+resize_filter+r3*8] 10063*c0909341SAndroid Build Coastguard Worker movq m7, [base+resize_filter+r1*8] 10064*c0909341SAndroid Build Coastguard Worker pxor m3, m3 10065*c0909341SAndroid Build Coastguard Worker punpcklbw m1, m3, m6 10066*c0909341SAndroid Build Coastguard Worker punpcklbw m3, m7 10067*c0909341SAndroid Build Coastguard Worker psraw m1, 8 10068*c0909341SAndroid Build Coastguard Worker psraw m3, 8 10069*c0909341SAndroid Build Coastguard Worker pmaddwd m4, m1 10070*c0909341SAndroid Build Coastguard Worker pmaddwd m5, m3 10071*c0909341SAndroid Build Coastguard Worker punpckhqdq m2, m2 10072*c0909341SAndroid Build Coastguard Worker movd r3, m2 10073*c0909341SAndroid Build Coastguard Worker psrlq m2, 32 10074*c0909341SAndroid Build Coastguard Worker movd r1, m2 10075*c0909341SAndroid Build Coastguard Worker phaddd m4, m5 10076*c0909341SAndroid Build Coastguard Worker movq m2, [base+resize_filter+r3*8] 10077*c0909341SAndroid Build Coastguard Worker movq m5, [base+resize_filter+r1*8] 10078*c0909341SAndroid Build Coastguard Worker mova m6, [esp+4*16] 10079*c0909341SAndroid Build Coastguard Worker mova m7, [esp+5*16] 10080*c0909341SAndroid Build Coastguard Worker pxor m3, m3 10081*c0909341SAndroid Build Coastguard Worker punpcklbw m1, m3, m2 10082*c0909341SAndroid Build Coastguard Worker punpcklbw m3, m5 10083*c0909341SAndroid Build Coastguard Worker psraw m1, 8 10084*c0909341SAndroid Build Coastguard Worker psraw m3, 8 10085*c0909341SAndroid Build Coastguard Worker pmaddwd m6, m1 10086*c0909341SAndroid Build Coastguard Worker pmaddwd m7, m3 10087*c0909341SAndroid Build Coastguard Worker%endif 10088*c0909341SAndroid Build Coastguard Worker phaddd m6, m7 10089*c0909341SAndroid Build Coastguard Worker phaddd m4, m6 10090*c0909341SAndroid Build Coastguard Worker pxor m1, m1 10091*c0909341SAndroid Build Coastguard Worker psubd m2, m12, m4 10092*c0909341SAndroid Build Coastguard Worker psrad m2, 7 10093*c0909341SAndroid Build Coastguard Worker packssdw m2, m2 10094*c0909341SAndroid Build Coastguard Worker pmaxsw m2, m1 10095*c0909341SAndroid Build Coastguard Worker pminsw m2, [rsp+16*3*ARCH_X86_32] 10096*c0909341SAndroid Build Coastguard Worker movq [dstq+xq*2], m2 10097*c0909341SAndroid Build Coastguard Worker paddd m0, m15 10098*c0909341SAndroid Build Coastguard Worker add xd, 4 10099*c0909341SAndroid Build Coastguard Worker%if STACK_ALIGNMENT >= 16 10100*c0909341SAndroid Build Coastguard Worker cmp xd, dst_wd 10101*c0909341SAndroid Build Coastguard Worker%else 10102*c0909341SAndroid Build Coastguard Worker cmp xd, dst_wm 10103*c0909341SAndroid Build Coastguard Worker%endif 10104*c0909341SAndroid Build Coastguard Worker jl .loop_x 10105*c0909341SAndroid Build Coastguard Worker add dstq, dst_stridemp 10106*c0909341SAndroid Build Coastguard Worker add srcq, src_stridemp 10107*c0909341SAndroid Build Coastguard Worker dec hd 10108*c0909341SAndroid Build Coastguard Worker jg .loop_y 10109*c0909341SAndroid Build Coastguard Worker RET 10110