1*c0909341SAndroid Build Coastguard Worker; Copyright © 2021, VideoLAN and dav1d authors 2*c0909341SAndroid Build Coastguard Worker; Copyright © 2021, Two Orioles, LLC 3*c0909341SAndroid Build Coastguard Worker; Copyright (c) 2017-2021, The rav1e contributors 4*c0909341SAndroid Build Coastguard Worker; Copyright (c) 2021, Nathan Egge 5*c0909341SAndroid Build Coastguard Worker; All rights reserved. 6*c0909341SAndroid Build Coastguard Worker; 7*c0909341SAndroid Build Coastguard Worker; Redistribution and use in source and binary forms, with or without 8*c0909341SAndroid Build Coastguard Worker; modification, are permitted provided that the following conditions are met: 9*c0909341SAndroid Build Coastguard Worker; 10*c0909341SAndroid Build Coastguard Worker; 1. Redistributions of source code must retain the above copyright notice, this 11*c0909341SAndroid Build Coastguard Worker; list of conditions and the following disclaimer. 12*c0909341SAndroid Build Coastguard Worker; 13*c0909341SAndroid Build Coastguard Worker; 2. Redistributions in binary form must reproduce the above copyright notice, 14*c0909341SAndroid Build Coastguard Worker; this list of conditions and the following disclaimer in the documentation 15*c0909341SAndroid Build Coastguard Worker; and/or other materials provided with the distribution. 16*c0909341SAndroid Build Coastguard Worker; 17*c0909341SAndroid Build Coastguard Worker; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 18*c0909341SAndroid Build Coastguard Worker; ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 19*c0909341SAndroid Build Coastguard Worker; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 20*c0909341SAndroid Build Coastguard Worker; DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 21*c0909341SAndroid Build Coastguard Worker; ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 22*c0909341SAndroid Build Coastguard Worker; (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 23*c0909341SAndroid Build Coastguard Worker; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 24*c0909341SAndroid Build Coastguard Worker; ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25*c0909341SAndroid Build Coastguard Worker; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 26*c0909341SAndroid Build Coastguard Worker; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27*c0909341SAndroid Build Coastguard Worker 28*c0909341SAndroid Build Coastguard Worker%include "config.asm" 29*c0909341SAndroid Build Coastguard Worker%include "ext/x86/x86inc.asm" 30*c0909341SAndroid Build Coastguard Worker 31*c0909341SAndroid Build Coastguard WorkerSECTION_RODATA 32*c0909341SAndroid Build Coastguard Worker 33*c0909341SAndroid Build Coastguard Worker%macro DUP8 1-* 34*c0909341SAndroid Build Coastguard Worker %rep %0 35*c0909341SAndroid Build Coastguard Worker times 8 dw %1 36*c0909341SAndroid Build Coastguard Worker %rotate 1 37*c0909341SAndroid Build Coastguard Worker %endrep 38*c0909341SAndroid Build Coastguard Worker%endmacro 39*c0909341SAndroid Build Coastguard Worker 40*c0909341SAndroid Build Coastguard Workerpri_taps: DUP8 4, 2, 3, 3 41*c0909341SAndroid Build Coastguard Workerdir_table: db 1 * 32 + 0, 2 * 32 + 0 42*c0909341SAndroid Build Coastguard Worker db 1 * 32 + 0, 2 * 32 - 2 43*c0909341SAndroid Build Coastguard Worker db -1 * 32 + 2, -2 * 32 + 4 44*c0909341SAndroid Build Coastguard Worker db 0 * 32 + 2, -1 * 32 + 4 45*c0909341SAndroid Build Coastguard Worker db 0 * 32 + 2, 0 * 32 + 4 46*c0909341SAndroid Build Coastguard Worker db 0 * 32 + 2, 1 * 32 + 4 47*c0909341SAndroid Build Coastguard Worker db 1 * 32 + 2, 2 * 32 + 4 48*c0909341SAndroid Build Coastguard Worker db 1 * 32 + 0, 2 * 32 + 2 49*c0909341SAndroid Build Coastguard Worker db 1 * 32 + 0, 2 * 32 + 0 50*c0909341SAndroid Build Coastguard Worker db 1 * 32 + 0, 2 * 32 - 2 51*c0909341SAndroid Build Coastguard Worker db -1 * 32 + 2, -2 * 32 + 4 52*c0909341SAndroid Build Coastguard Worker db 0 * 32 + 2, -1 * 32 + 4 53*c0909341SAndroid Build Coastguard Worker 54*c0909341SAndroid Build Coastguard Workerdir_shift: times 4 dw 0x4000 55*c0909341SAndroid Build Coastguard Worker times 4 dw 0x1000 56*c0909341SAndroid Build Coastguard Worker 57*c0909341SAndroid Build Coastguard Workerpw_128: times 4 dw 128 58*c0909341SAndroid Build Coastguard Workerpw_2048: times 8 dw 2048 59*c0909341SAndroid Build Coastguard Workerpw_m16384: times 8 dw -16384 60*c0909341SAndroid Build Coastguard Worker 61*c0909341SAndroid Build Coastguard Workercextern cdef_dir_8bpc_ssse3.main 62*c0909341SAndroid Build Coastguard Workercextern cdef_dir_8bpc_sse4.main 63*c0909341SAndroid Build Coastguard Workercextern shufw_6543210x 64*c0909341SAndroid Build Coastguard Worker 65*c0909341SAndroid Build Coastguard WorkerSECTION .text 66*c0909341SAndroid Build Coastguard Worker 67*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_32 68*c0909341SAndroid Build Coastguard WorkerDECLARE_REG_TMP 5, 3 69*c0909341SAndroid Build Coastguard Worker%elif WIN64 70*c0909341SAndroid Build Coastguard WorkerDECLARE_REG_TMP 8, 4 71*c0909341SAndroid Build Coastguard Worker%else 72*c0909341SAndroid Build Coastguard WorkerDECLARE_REG_TMP 8, 6 73*c0909341SAndroid Build Coastguard Worker%endif 74*c0909341SAndroid Build Coastguard Worker 75*c0909341SAndroid Build Coastguard Worker%macro CDEF_FILTER 2 ; w, h 76*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 77*c0909341SAndroid Build Coastguard Worker DEFINE_ARGS dst, stride, _, tmp, pridmp, pri, sec, dir 78*c0909341SAndroid Build Coastguard Worker mova m8, [base+pw_2048] 79*c0909341SAndroid Build Coastguard Worker%else 80*c0909341SAndroid Build Coastguard Worker DEFINE_ARGS dst, pridmp, tmp, sec, pri, _, dir 81*c0909341SAndroid Build Coastguard Worker %define m8 [base+pw_2048] 82*c0909341SAndroid Build Coastguard Worker %define m9 [rsp+16*1+gprsize] 83*c0909341SAndroid Build Coastguard Worker %define m10 [rsp+16*2+gprsize] 84*c0909341SAndroid Build Coastguard Worker%endif 85*c0909341SAndroid Build Coastguard Worker movifnidn prid, r5m 86*c0909341SAndroid Build Coastguard Worker movifnidn secd, r6m 87*c0909341SAndroid Build Coastguard Worker test prid, prid 88*c0909341SAndroid Build Coastguard Worker jz .sec_only 89*c0909341SAndroid Build Coastguard Worker movd m6, r5m 90*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_32 91*c0909341SAndroid Build Coastguard Worker mov [rsp+24], pridmpd 92*c0909341SAndroid Build Coastguard Worker%endif 93*c0909341SAndroid Build Coastguard Worker bsr pridmpd, prid 94*c0909341SAndroid Build Coastguard Worker lea tmpd, [priq*4] 95*c0909341SAndroid Build Coastguard Worker cmp dword r10m, 0x3ff ; if (bpc == 10) 96*c0909341SAndroid Build Coastguard Worker cmove prid, tmpd ; pri <<= 2 97*c0909341SAndroid Build Coastguard Worker mov tmpd, r8m ; damping 98*c0909341SAndroid Build Coastguard Worker mov dird, r7m 99*c0909341SAndroid Build Coastguard Worker and prid, 16 100*c0909341SAndroid Build Coastguard Worker pshufb m6, m7 ; splat 101*c0909341SAndroid Build Coastguard Worker lea dirq, [base+dir_table+dirq*2] 102*c0909341SAndroid Build Coastguard Worker lea priq, [base+pri_taps+priq*2] 103*c0909341SAndroid Build Coastguard Worker test secd, secd 104*c0909341SAndroid Build Coastguard Worker jz .pri_only 105*c0909341SAndroid Build Coastguard Worker mova [rsp], m6 106*c0909341SAndroid Build Coastguard Worker movd m6, secd 107*c0909341SAndroid Build Coastguard Worker tzcnt secd, secd 108*c0909341SAndroid Build Coastguard Worker sub pridmpd, tmpd 109*c0909341SAndroid Build Coastguard Worker sub tmpd, secd 110*c0909341SAndroid Build Coastguard Worker pshufb m6, m7 111*c0909341SAndroid Build Coastguard Worker xor secd, secd 112*c0909341SAndroid Build Coastguard Worker neg pridmpd 113*c0909341SAndroid Build Coastguard Worker cmovs pridmpd, secd 114*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_32 115*c0909341SAndroid Build Coastguard Worker mov [pri_shift+4], secd 116*c0909341SAndroid Build Coastguard Worker mov [sec_shift+4], secd 117*c0909341SAndroid Build Coastguard Worker%endif 118*c0909341SAndroid Build Coastguard Worker mov [pri_shift+0], pridmpq 119*c0909341SAndroid Build Coastguard Worker mov [sec_shift+0], tmpq 120*c0909341SAndroid Build Coastguard Worker lea tmpq, [px] 121*c0909341SAndroid Build Coastguard Worker%if WIN64 122*c0909341SAndroid Build Coastguard Worker movaps r4m, m9 123*c0909341SAndroid Build Coastguard Worker movaps r6m, m10 124*c0909341SAndroid Build Coastguard Worker%elif ARCH_X86_32 125*c0909341SAndroid Build Coastguard Worker mov pridmpd, [rsp+24] 126*c0909341SAndroid Build Coastguard Worker%endif 127*c0909341SAndroid Build Coastguard Worker%rep %1*%2/8 128*c0909341SAndroid Build Coastguard Worker call mangle(private_prefix %+ _cdef_filter_%1x%1_16bpc %+ SUFFIX).pri_sec 129*c0909341SAndroid Build Coastguard Worker%endrep 130*c0909341SAndroid Build Coastguard Worker%if WIN64 131*c0909341SAndroid Build Coastguard Worker movaps m9, r4m 132*c0909341SAndroid Build Coastguard Worker movaps m10, r6m 133*c0909341SAndroid Build Coastguard Worker%endif 134*c0909341SAndroid Build Coastguard Worker jmp .end 135*c0909341SAndroid Build Coastguard Worker.pri_only: 136*c0909341SAndroid Build Coastguard Worker sub tmpd, pridmpd 137*c0909341SAndroid Build Coastguard Worker cmovs tmpd, secd 138*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_32 139*c0909341SAndroid Build Coastguard Worker mov pridmpd, [rsp+24] 140*c0909341SAndroid Build Coastguard Worker mov [pri_shift+4], secd 141*c0909341SAndroid Build Coastguard Worker%endif 142*c0909341SAndroid Build Coastguard Worker mov [pri_shift+0], tmpq 143*c0909341SAndroid Build Coastguard Worker lea tmpq, [px] 144*c0909341SAndroid Build Coastguard Worker%rep %1*%2/8 145*c0909341SAndroid Build Coastguard Worker call mangle(private_prefix %+ _cdef_filter_%1x%1_16bpc %+ SUFFIX).pri 146*c0909341SAndroid Build Coastguard Worker%endrep 147*c0909341SAndroid Build Coastguard Worker.end: 148*c0909341SAndroid Build Coastguard Worker RET 149*c0909341SAndroid Build Coastguard Worker.sec_only: 150*c0909341SAndroid Build Coastguard Worker mov tmpd, r8m ; damping 151*c0909341SAndroid Build Coastguard Worker movd m6, r6m 152*c0909341SAndroid Build Coastguard Worker tzcnt secd, secd 153*c0909341SAndroid Build Coastguard Worker mov dird, r7m 154*c0909341SAndroid Build Coastguard Worker pshufb m6, m7 155*c0909341SAndroid Build Coastguard Worker sub tmpd, secd 156*c0909341SAndroid Build Coastguard Worker lea dirq, [base+dir_table+dirq*2] 157*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_32 158*c0909341SAndroid Build Coastguard Worker mov [sec_shift+4], prid 159*c0909341SAndroid Build Coastguard Worker%endif 160*c0909341SAndroid Build Coastguard Worker mov [sec_shift+0], tmpq 161*c0909341SAndroid Build Coastguard Worker lea tmpq, [px] 162*c0909341SAndroid Build Coastguard Worker%rep %1*%2/8 163*c0909341SAndroid Build Coastguard Worker call mangle(private_prefix %+ _cdef_filter_%1x%1_16bpc %+ SUFFIX).sec 164*c0909341SAndroid Build Coastguard Worker%endrep 165*c0909341SAndroid Build Coastguard Worker jmp .end 166*c0909341SAndroid Build Coastguard Worker%if %1 == %2 167*c0909341SAndroid Build Coastguard Worker %if ARCH_X86_64 168*c0909341SAndroid Build Coastguard Worker DEFINE_ARGS dst, stride, _, tmp, off, pri, _, dir 169*c0909341SAndroid Build Coastguard Worker %else 170*c0909341SAndroid Build Coastguard Worker DEFINE_ARGS dst, stride, tmp, off, pri, _, dir 171*c0909341SAndroid Build Coastguard Worker %endif 172*c0909341SAndroid Build Coastguard WorkerALIGN function_align 173*c0909341SAndroid Build Coastguard Worker.pri: 174*c0909341SAndroid Build Coastguard Worker movsx offq, byte [dirq+4] ; off_k0 175*c0909341SAndroid Build Coastguard Worker%if %1 == 4 176*c0909341SAndroid Build Coastguard Worker movq m1, [dstq+strideq*0] 177*c0909341SAndroid Build Coastguard Worker movhps m1, [dstq+strideq*1] 178*c0909341SAndroid Build Coastguard Worker movq m2, [tmpq+offq+32*0] ; k0p0 179*c0909341SAndroid Build Coastguard Worker movhps m2, [tmpq+offq+32*1] 180*c0909341SAndroid Build Coastguard Worker neg offq 181*c0909341SAndroid Build Coastguard Worker movq m3, [tmpq+offq+32*0] ; k0p1 182*c0909341SAndroid Build Coastguard Worker movhps m3, [tmpq+offq+32*1] 183*c0909341SAndroid Build Coastguard Worker%else 184*c0909341SAndroid Build Coastguard Worker mova m1, [dstq] 185*c0909341SAndroid Build Coastguard Worker movu m2, [tmpq+offq] 186*c0909341SAndroid Build Coastguard Worker neg offq 187*c0909341SAndroid Build Coastguard Worker movu m3, [tmpq+offq] 188*c0909341SAndroid Build Coastguard Worker%endif 189*c0909341SAndroid Build Coastguard Worker movsx offq, byte [dirq+5] ; off_k1 190*c0909341SAndroid Build Coastguard Worker psubw m2, m1 ; diff_k0p0 191*c0909341SAndroid Build Coastguard Worker psubw m3, m1 ; diff_k0p1 192*c0909341SAndroid Build Coastguard Worker pabsw m4, m2 ; adiff_k0p0 193*c0909341SAndroid Build Coastguard Worker psrlw m5, m4, [pri_shift+gprsize] 194*c0909341SAndroid Build Coastguard Worker psubusw m0, m6, m5 195*c0909341SAndroid Build Coastguard Worker pabsw m5, m3 ; adiff_k0p1 196*c0909341SAndroid Build Coastguard Worker pminsw m0, m4 197*c0909341SAndroid Build Coastguard Worker psrlw m4, m5, [pri_shift+gprsize] 198*c0909341SAndroid Build Coastguard Worker psignw m0, m2 ; constrain(diff_k0p0) 199*c0909341SAndroid Build Coastguard Worker psubusw m2, m6, m4 200*c0909341SAndroid Build Coastguard Worker pminsw m2, m5 201*c0909341SAndroid Build Coastguard Worker%if %1 == 4 202*c0909341SAndroid Build Coastguard Worker movq m4, [tmpq+offq+32*0] ; k1p0 203*c0909341SAndroid Build Coastguard Worker movhps m4, [tmpq+offq+32*1] 204*c0909341SAndroid Build Coastguard Worker neg offq 205*c0909341SAndroid Build Coastguard Worker movq m5, [tmpq+offq+32*0] ; k1p1 206*c0909341SAndroid Build Coastguard Worker movhps m5, [tmpq+offq+32*1] 207*c0909341SAndroid Build Coastguard Worker%else 208*c0909341SAndroid Build Coastguard Worker movu m4, [tmpq+offq] 209*c0909341SAndroid Build Coastguard Worker neg offq 210*c0909341SAndroid Build Coastguard Worker movu m5, [tmpq+offq] 211*c0909341SAndroid Build Coastguard Worker%endif 212*c0909341SAndroid Build Coastguard Worker psubw m4, m1 ; diff_k1p0 213*c0909341SAndroid Build Coastguard Worker psubw m5, m1 ; diff_k1p1 214*c0909341SAndroid Build Coastguard Worker psignw m2, m3 ; constrain(diff_k0p1) 215*c0909341SAndroid Build Coastguard Worker pabsw m3, m4 ; adiff_k1p0 216*c0909341SAndroid Build Coastguard Worker paddw m0, m2 ; constrain(diff_k0) 217*c0909341SAndroid Build Coastguard Worker psrlw m2, m3, [pri_shift+gprsize] 218*c0909341SAndroid Build Coastguard Worker psubusw m7, m6, m2 219*c0909341SAndroid Build Coastguard Worker pabsw m2, m5 ; adiff_k1p1 220*c0909341SAndroid Build Coastguard Worker pminsw m7, m3 221*c0909341SAndroid Build Coastguard Worker psrlw m3, m2, [pri_shift+gprsize] 222*c0909341SAndroid Build Coastguard Worker psignw m7, m4 ; constrain(diff_k1p0) 223*c0909341SAndroid Build Coastguard Worker psubusw m4, m6, m3 224*c0909341SAndroid Build Coastguard Worker pminsw m4, m2 225*c0909341SAndroid Build Coastguard Worker psignw m4, m5 ; constrain(diff_k1p1) 226*c0909341SAndroid Build Coastguard Worker paddw m7, m4 ; constrain(diff_k1) 227*c0909341SAndroid Build Coastguard Worker pmullw m0, [priq+16*0] ; pri_tap_k0 228*c0909341SAndroid Build Coastguard Worker pmullw m7, [priq+16*1] ; pri_tap_k1 229*c0909341SAndroid Build Coastguard Worker paddw m0, m7 ; sum 230*c0909341SAndroid Build Coastguard Worker psraw m2, m0, 15 231*c0909341SAndroid Build Coastguard Worker paddw m0, m2 232*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m8 233*c0909341SAndroid Build Coastguard Worker paddw m0, m1 234*c0909341SAndroid Build Coastguard Worker%if %1 == 4 235*c0909341SAndroid Build Coastguard Worker add tmpq, 32*2 236*c0909341SAndroid Build Coastguard Worker movq [dstq+strideq*0], m0 237*c0909341SAndroid Build Coastguard Worker movhps [dstq+strideq*1], m0 238*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+strideq*2] 239*c0909341SAndroid Build Coastguard Worker%else 240*c0909341SAndroid Build Coastguard Worker add tmpq, 32 241*c0909341SAndroid Build Coastguard Worker mova [dstq], m0 242*c0909341SAndroid Build Coastguard Worker add dstq, strideq 243*c0909341SAndroid Build Coastguard Worker%endif 244*c0909341SAndroid Build Coastguard Worker ret 245*c0909341SAndroid Build Coastguard WorkerALIGN function_align 246*c0909341SAndroid Build Coastguard Worker.sec: 247*c0909341SAndroid Build Coastguard Worker movsx offq, byte [dirq+8] ; off1_k0 248*c0909341SAndroid Build Coastguard Worker%if %1 == 4 249*c0909341SAndroid Build Coastguard Worker movq m1, [dstq+strideq*0] 250*c0909341SAndroid Build Coastguard Worker movhps m1, [dstq+strideq*1] 251*c0909341SAndroid Build Coastguard Worker movq m2, [tmpq+offq+32*0] ; k0s0 252*c0909341SAndroid Build Coastguard Worker movhps m2, [tmpq+offq+32*1] 253*c0909341SAndroid Build Coastguard Worker neg offq 254*c0909341SAndroid Build Coastguard Worker movq m3, [tmpq+offq+32*0] ; k0s1 255*c0909341SAndroid Build Coastguard Worker movhps m3, [tmpq+offq+32*1] 256*c0909341SAndroid Build Coastguard Worker%else 257*c0909341SAndroid Build Coastguard Worker mova m1, [dstq] 258*c0909341SAndroid Build Coastguard Worker movu m2, [tmpq+offq] 259*c0909341SAndroid Build Coastguard Worker neg offq 260*c0909341SAndroid Build Coastguard Worker movu m3, [tmpq+offq] 261*c0909341SAndroid Build Coastguard Worker%endif 262*c0909341SAndroid Build Coastguard Worker movsx offq, byte [dirq+0] ; off2_k0 263*c0909341SAndroid Build Coastguard Worker psubw m2, m1 ; diff_k0s0 264*c0909341SAndroid Build Coastguard Worker psubw m3, m1 ; diff_k0s1 265*c0909341SAndroid Build Coastguard Worker pabsw m4, m2 ; adiff_k0s0 266*c0909341SAndroid Build Coastguard Worker psrlw m5, m4, [sec_shift+gprsize] 267*c0909341SAndroid Build Coastguard Worker psubusw m0, m6, m5 268*c0909341SAndroid Build Coastguard Worker pabsw m5, m3 ; adiff_k0s1 269*c0909341SAndroid Build Coastguard Worker pminsw m0, m4 270*c0909341SAndroid Build Coastguard Worker psrlw m4, m5, [sec_shift+gprsize] 271*c0909341SAndroid Build Coastguard Worker psignw m0, m2 ; constrain(diff_k0s0) 272*c0909341SAndroid Build Coastguard Worker psubusw m2, m6, m4 273*c0909341SAndroid Build Coastguard Worker pminsw m2, m5 274*c0909341SAndroid Build Coastguard Worker%if %1 == 4 275*c0909341SAndroid Build Coastguard Worker movq m4, [tmpq+offq+32*0] ; k0s2 276*c0909341SAndroid Build Coastguard Worker movhps m4, [tmpq+offq+32*1] 277*c0909341SAndroid Build Coastguard Worker neg offq 278*c0909341SAndroid Build Coastguard Worker movq m5, [tmpq+offq+32*0] ; k0s3 279*c0909341SAndroid Build Coastguard Worker movhps m5, [tmpq+offq+32*1] 280*c0909341SAndroid Build Coastguard Worker%else 281*c0909341SAndroid Build Coastguard Worker movu m4, [tmpq+offq] 282*c0909341SAndroid Build Coastguard Worker neg offq 283*c0909341SAndroid Build Coastguard Worker movu m5, [tmpq+offq] 284*c0909341SAndroid Build Coastguard Worker%endif 285*c0909341SAndroid Build Coastguard Worker movsx offq, byte [dirq+9] ; off1_k1 286*c0909341SAndroid Build Coastguard Worker psubw m4, m1 ; diff_k0s2 287*c0909341SAndroid Build Coastguard Worker psubw m5, m1 ; diff_k0s3 288*c0909341SAndroid Build Coastguard Worker psignw m2, m3 ; constrain(diff_k0s1) 289*c0909341SAndroid Build Coastguard Worker pabsw m3, m4 ; adiff_k0s2 290*c0909341SAndroid Build Coastguard Worker paddw m0, m2 291*c0909341SAndroid Build Coastguard Worker psrlw m2, m3, [sec_shift+gprsize] 292*c0909341SAndroid Build Coastguard Worker psubusw m7, m6, m2 293*c0909341SAndroid Build Coastguard Worker pabsw m2, m5 ; adiff_k0s3 294*c0909341SAndroid Build Coastguard Worker pminsw m7, m3 295*c0909341SAndroid Build Coastguard Worker psrlw m3, m2, [sec_shift+gprsize] 296*c0909341SAndroid Build Coastguard Worker psignw m7, m4 ; constrain(diff_k0s2) 297*c0909341SAndroid Build Coastguard Worker psubusw m4, m6, m3 298*c0909341SAndroid Build Coastguard Worker pminsw m4, m2 299*c0909341SAndroid Build Coastguard Worker%if %1 == 4 300*c0909341SAndroid Build Coastguard Worker movq m2, [tmpq+offq+32*0] ; k1s0 301*c0909341SAndroid Build Coastguard Worker movhps m2, [tmpq+offq+32*1] 302*c0909341SAndroid Build Coastguard Worker neg offq 303*c0909341SAndroid Build Coastguard Worker movq m3, [tmpq+offq+32*0] ; k1s1 304*c0909341SAndroid Build Coastguard Worker movhps m3, [tmpq+offq+32*1] 305*c0909341SAndroid Build Coastguard Worker%else 306*c0909341SAndroid Build Coastguard Worker movu m2, [tmpq+offq] 307*c0909341SAndroid Build Coastguard Worker neg offq 308*c0909341SAndroid Build Coastguard Worker movu m3, [tmpq+offq] 309*c0909341SAndroid Build Coastguard Worker%endif 310*c0909341SAndroid Build Coastguard Worker movsx offq, byte [dirq+1] ; off2_k1 311*c0909341SAndroid Build Coastguard Worker paddw m0, m7 312*c0909341SAndroid Build Coastguard Worker psignw m4, m5 ; constrain(diff_k0s3) 313*c0909341SAndroid Build Coastguard Worker paddw m0, m4 ; constrain(diff_k0) 314*c0909341SAndroid Build Coastguard Worker psubw m2, m1 ; diff_k1s0 315*c0909341SAndroid Build Coastguard Worker psubw m3, m1 ; diff_k1s1 316*c0909341SAndroid Build Coastguard Worker paddw m0, m0 ; sec_tap_k0 317*c0909341SAndroid Build Coastguard Worker pabsw m4, m2 ; adiff_k1s0 318*c0909341SAndroid Build Coastguard Worker psrlw m5, m4, [sec_shift+gprsize] 319*c0909341SAndroid Build Coastguard Worker psubusw m7, m6, m5 320*c0909341SAndroid Build Coastguard Worker pabsw m5, m3 ; adiff_k1s1 321*c0909341SAndroid Build Coastguard Worker pminsw m7, m4 322*c0909341SAndroid Build Coastguard Worker psrlw m4, m5, [sec_shift+gprsize] 323*c0909341SAndroid Build Coastguard Worker psignw m7, m2 ; constrain(diff_k1s0) 324*c0909341SAndroid Build Coastguard Worker psubusw m2, m6, m4 325*c0909341SAndroid Build Coastguard Worker pminsw m2, m5 326*c0909341SAndroid Build Coastguard Worker%if %1 == 4 327*c0909341SAndroid Build Coastguard Worker movq m4, [tmpq+offq+32*0] ; k1s2 328*c0909341SAndroid Build Coastguard Worker movhps m4, [tmpq+offq+32*1] 329*c0909341SAndroid Build Coastguard Worker neg offq 330*c0909341SAndroid Build Coastguard Worker movq m5, [tmpq+offq+32*0] ; k1s3 331*c0909341SAndroid Build Coastguard Worker movhps m5, [tmpq+offq+32*1] 332*c0909341SAndroid Build Coastguard Worker%else 333*c0909341SAndroid Build Coastguard Worker movu m4, [tmpq+offq] 334*c0909341SAndroid Build Coastguard Worker neg offq 335*c0909341SAndroid Build Coastguard Worker movu m5, [tmpq+offq] 336*c0909341SAndroid Build Coastguard Worker%endif 337*c0909341SAndroid Build Coastguard Worker paddw m0, m7 338*c0909341SAndroid Build Coastguard Worker psubw m4, m1 ; diff_k1s2 339*c0909341SAndroid Build Coastguard Worker psubw m5, m1 ; diff_k1s3 340*c0909341SAndroid Build Coastguard Worker psignw m2, m3 ; constrain(diff_k1s1) 341*c0909341SAndroid Build Coastguard Worker pabsw m3, m4 ; adiff_k1s2 342*c0909341SAndroid Build Coastguard Worker paddw m0, m2 343*c0909341SAndroid Build Coastguard Worker psrlw m2, m3, [sec_shift+gprsize] 344*c0909341SAndroid Build Coastguard Worker psubusw m7, m6, m2 345*c0909341SAndroid Build Coastguard Worker pabsw m2, m5 ; adiff_k1s3 346*c0909341SAndroid Build Coastguard Worker pminsw m7, m3 347*c0909341SAndroid Build Coastguard Worker psrlw m3, m2, [sec_shift+gprsize] 348*c0909341SAndroid Build Coastguard Worker psignw m7, m4 ; constrain(diff_k1s2) 349*c0909341SAndroid Build Coastguard Worker psubusw m4, m6, m3 350*c0909341SAndroid Build Coastguard Worker pminsw m4, m2 351*c0909341SAndroid Build Coastguard Worker paddw m0, m7 352*c0909341SAndroid Build Coastguard Worker psignw m4, m5 ; constrain(diff_k1s3) 353*c0909341SAndroid Build Coastguard Worker paddw m0, m4 ; sum 354*c0909341SAndroid Build Coastguard Worker psraw m2, m0, 15 355*c0909341SAndroid Build Coastguard Worker paddw m0, m2 356*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m8 357*c0909341SAndroid Build Coastguard Worker paddw m0, m1 358*c0909341SAndroid Build Coastguard Worker%if %1 == 4 359*c0909341SAndroid Build Coastguard Worker add tmpq, 32*2 360*c0909341SAndroid Build Coastguard Worker movq [dstq+strideq*0], m0 361*c0909341SAndroid Build Coastguard Worker movhps [dstq+strideq*1], m0 362*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+strideq*2] 363*c0909341SAndroid Build Coastguard Worker%else 364*c0909341SAndroid Build Coastguard Worker add tmpq, 32 365*c0909341SAndroid Build Coastguard Worker mova [dstq], m0 366*c0909341SAndroid Build Coastguard Worker add dstq, strideq 367*c0909341SAndroid Build Coastguard Worker%endif 368*c0909341SAndroid Build Coastguard Worker ret 369*c0909341SAndroid Build Coastguard WorkerALIGN function_align 370*c0909341SAndroid Build Coastguard Worker.pri_sec: 371*c0909341SAndroid Build Coastguard Worker movsx offq, byte [dirq+8] ; off2_k0 372*c0909341SAndroid Build Coastguard Worker%if %1 == 4 373*c0909341SAndroid Build Coastguard Worker movq m1, [dstq+strideq*0] 374*c0909341SAndroid Build Coastguard Worker movhps m1, [dstq+strideq*1] 375*c0909341SAndroid Build Coastguard Worker movq m2, [tmpq+offq+32*0] ; k0s0 376*c0909341SAndroid Build Coastguard Worker movhps m2, [tmpq+offq+32*1] 377*c0909341SAndroid Build Coastguard Worker neg offq 378*c0909341SAndroid Build Coastguard Worker movq m3, [tmpq+offq+32*0] ; k0s1 379*c0909341SAndroid Build Coastguard Worker movhps m3, [tmpq+offq+32*1] 380*c0909341SAndroid Build Coastguard Worker%else 381*c0909341SAndroid Build Coastguard Worker mova m1, [dstq] 382*c0909341SAndroid Build Coastguard Worker movu m2, [tmpq+offq] 383*c0909341SAndroid Build Coastguard Worker neg offq 384*c0909341SAndroid Build Coastguard Worker movu m3, [tmpq+offq] 385*c0909341SAndroid Build Coastguard Worker%endif 386*c0909341SAndroid Build Coastguard Worker movsx offq, byte [dirq+0] ; off3_k0 387*c0909341SAndroid Build Coastguard Worker pabsw m4, m2 388*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 389*c0909341SAndroid Build Coastguard Worker pabsw m10, m3 390*c0909341SAndroid Build Coastguard Worker pmaxsw m9, m2, m3 391*c0909341SAndroid Build Coastguard Worker pminsw m10, m4 392*c0909341SAndroid Build Coastguard Worker%else 393*c0909341SAndroid Build Coastguard Worker pabsw m7, m3 394*c0909341SAndroid Build Coastguard Worker pmaxsw m5, m2, m3 395*c0909341SAndroid Build Coastguard Worker pminsw m4, m7 396*c0909341SAndroid Build Coastguard Worker mova m9, m5 397*c0909341SAndroid Build Coastguard Worker mova m10, m4 398*c0909341SAndroid Build Coastguard Worker%endif 399*c0909341SAndroid Build Coastguard Worker psubw m2, m1 ; diff_k0s0 400*c0909341SAndroid Build Coastguard Worker psubw m3, m1 ; diff_k0s1 401*c0909341SAndroid Build Coastguard Worker pabsw m4, m2 ; adiff_k0s0 402*c0909341SAndroid Build Coastguard Worker psrlw m5, m4, [sec_shift+gprsize] 403*c0909341SAndroid Build Coastguard Worker psubusw m0, m6, m5 404*c0909341SAndroid Build Coastguard Worker pabsw m5, m3 ; adiff_k0s1 405*c0909341SAndroid Build Coastguard Worker pminsw m0, m4 406*c0909341SAndroid Build Coastguard Worker psrlw m4, m5, [sec_shift+gprsize] 407*c0909341SAndroid Build Coastguard Worker psignw m0, m2 ; constrain(diff_k0s0) 408*c0909341SAndroid Build Coastguard Worker psubusw m2, m6, m4 409*c0909341SAndroid Build Coastguard Worker pminsw m2, m5 410*c0909341SAndroid Build Coastguard Worker%if %1 == 4 411*c0909341SAndroid Build Coastguard Worker movq m4, [tmpq+offq+32*0] ; k0s2 412*c0909341SAndroid Build Coastguard Worker movhps m4, [tmpq+offq+32*1] 413*c0909341SAndroid Build Coastguard Worker neg offq 414*c0909341SAndroid Build Coastguard Worker movq m5, [tmpq+offq+32*0] ; k0s3 415*c0909341SAndroid Build Coastguard Worker movhps m5, [tmpq+offq+32*1] 416*c0909341SAndroid Build Coastguard Worker%else 417*c0909341SAndroid Build Coastguard Worker movu m4, [tmpq+offq] 418*c0909341SAndroid Build Coastguard Worker neg offq 419*c0909341SAndroid Build Coastguard Worker movu m5, [tmpq+offq] 420*c0909341SAndroid Build Coastguard Worker%endif 421*c0909341SAndroid Build Coastguard Worker movsx offq, byte [dirq+9] ; off2_k1 422*c0909341SAndroid Build Coastguard Worker pabsw m7, m4 423*c0909341SAndroid Build Coastguard Worker psignw m2, m3 424*c0909341SAndroid Build Coastguard Worker pabsw m3, m5 ; constrain(diff_k0s1) 425*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 426*c0909341SAndroid Build Coastguard Worker pmaxsw m9, m4 427*c0909341SAndroid Build Coastguard Worker pminsw m10, m7 428*c0909341SAndroid Build Coastguard Worker pmaxsw m9, m5 429*c0909341SAndroid Build Coastguard Worker pminsw m10, m3 430*c0909341SAndroid Build Coastguard Worker%else 431*c0909341SAndroid Build Coastguard Worker pminsw m7, m10 432*c0909341SAndroid Build Coastguard Worker pminsw m7, m3 433*c0909341SAndroid Build Coastguard Worker pmaxsw m3, m9, m4 434*c0909341SAndroid Build Coastguard Worker pmaxsw m3, m5 435*c0909341SAndroid Build Coastguard Worker mova m10, m7 436*c0909341SAndroid Build Coastguard Worker mova m9, m3 437*c0909341SAndroid Build Coastguard Worker%endif 438*c0909341SAndroid Build Coastguard Worker psubw m4, m1 ; diff_k0s2 439*c0909341SAndroid Build Coastguard Worker psubw m5, m1 ; diff_k0s3 440*c0909341SAndroid Build Coastguard Worker paddw m0, m2 441*c0909341SAndroid Build Coastguard Worker pabsw m3, m4 ; adiff_k0s2 442*c0909341SAndroid Build Coastguard Worker psrlw m2, m3, [sec_shift+gprsize] 443*c0909341SAndroid Build Coastguard Worker psubusw m7, m6, m2 444*c0909341SAndroid Build Coastguard Worker pabsw m2, m5 ; adiff_k0s3 445*c0909341SAndroid Build Coastguard Worker pminsw m7, m3 446*c0909341SAndroid Build Coastguard Worker psrlw m3, m2, [sec_shift+gprsize] 447*c0909341SAndroid Build Coastguard Worker psignw m7, m4 ; constrain(diff_k0s2) 448*c0909341SAndroid Build Coastguard Worker psubusw m4, m6, m3 449*c0909341SAndroid Build Coastguard Worker pminsw m4, m2 450*c0909341SAndroid Build Coastguard Worker%if %1 == 4 451*c0909341SAndroid Build Coastguard Worker movq m2, [tmpq+offq+32*0] ; k1s0 452*c0909341SAndroid Build Coastguard Worker movhps m2, [tmpq+offq+32*1] 453*c0909341SAndroid Build Coastguard Worker neg offq 454*c0909341SAndroid Build Coastguard Worker movq m3, [tmpq+offq+32*0] ; k1s1 455*c0909341SAndroid Build Coastguard Worker movhps m3, [tmpq+offq+32*1] 456*c0909341SAndroid Build Coastguard Worker%else 457*c0909341SAndroid Build Coastguard Worker movu m2, [tmpq+offq] 458*c0909341SAndroid Build Coastguard Worker neg offq 459*c0909341SAndroid Build Coastguard Worker movu m3, [tmpq+offq] 460*c0909341SAndroid Build Coastguard Worker%endif 461*c0909341SAndroid Build Coastguard Worker movsx offq, byte [dirq+1] ; off3_k1 462*c0909341SAndroid Build Coastguard Worker paddw m0, m7 463*c0909341SAndroid Build Coastguard Worker pabsw m7, m2 464*c0909341SAndroid Build Coastguard Worker psignw m4, m5 ; constrain(diff_k0s3) 465*c0909341SAndroid Build Coastguard Worker pabsw m5, m3 466*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 467*c0909341SAndroid Build Coastguard Worker pmaxsw m9, m2 468*c0909341SAndroid Build Coastguard Worker pminsw m10, m7 469*c0909341SAndroid Build Coastguard Worker pmaxsw m9, m3 470*c0909341SAndroid Build Coastguard Worker pminsw m10, m5 471*c0909341SAndroid Build Coastguard Worker%else 472*c0909341SAndroid Build Coastguard Worker pminsw m7, m10 473*c0909341SAndroid Build Coastguard Worker pminsw m7, m5 474*c0909341SAndroid Build Coastguard Worker pmaxsw m5, m9, m2 475*c0909341SAndroid Build Coastguard Worker pmaxsw m5, m3 476*c0909341SAndroid Build Coastguard Worker mova m10, m7 477*c0909341SAndroid Build Coastguard Worker mova m9, m5 478*c0909341SAndroid Build Coastguard Worker%endif 479*c0909341SAndroid Build Coastguard Worker paddw m0, m4 ; constrain(diff_k0) 480*c0909341SAndroid Build Coastguard Worker psubw m2, m1 ; diff_k1s0 481*c0909341SAndroid Build Coastguard Worker psubw m3, m1 ; diff_k1s1 482*c0909341SAndroid Build Coastguard Worker paddw m0, m0 ; sec_tap_k0 483*c0909341SAndroid Build Coastguard Worker pabsw m4, m2 ; adiff_k1s0 484*c0909341SAndroid Build Coastguard Worker psrlw m5, m4, [sec_shift+gprsize] 485*c0909341SAndroid Build Coastguard Worker psubusw m7, m6, m5 486*c0909341SAndroid Build Coastguard Worker pabsw m5, m3 ; adiff_k1s1 487*c0909341SAndroid Build Coastguard Worker pminsw m7, m4 488*c0909341SAndroid Build Coastguard Worker psrlw m4, m5, [sec_shift+gprsize] 489*c0909341SAndroid Build Coastguard Worker psignw m7, m2 ; constrain(diff_k1s0) 490*c0909341SAndroid Build Coastguard Worker psubusw m2, m6, m4 491*c0909341SAndroid Build Coastguard Worker pminsw m2, m5 492*c0909341SAndroid Build Coastguard Worker%if %1 == 4 493*c0909341SAndroid Build Coastguard Worker movq m4, [tmpq+offq+32*0] ; k1s2 494*c0909341SAndroid Build Coastguard Worker movhps m4, [tmpq+offq+32*1] 495*c0909341SAndroid Build Coastguard Worker neg offq 496*c0909341SAndroid Build Coastguard Worker movq m5, [tmpq+offq+32*0] ; k1s3 497*c0909341SAndroid Build Coastguard Worker movhps m5, [tmpq+offq+32*1] 498*c0909341SAndroid Build Coastguard Worker%else 499*c0909341SAndroid Build Coastguard Worker movu m4, [tmpq+offq] 500*c0909341SAndroid Build Coastguard Worker neg offq 501*c0909341SAndroid Build Coastguard Worker movu m5, [tmpq+offq] 502*c0909341SAndroid Build Coastguard Worker%endif 503*c0909341SAndroid Build Coastguard Worker movsx offq, byte [dirq+4] ; off1_k0 504*c0909341SAndroid Build Coastguard Worker paddw m0, m7 505*c0909341SAndroid Build Coastguard Worker pabsw m7, m4 506*c0909341SAndroid Build Coastguard Worker psignw m2, m3 ; constrain(diff_k1s1) 507*c0909341SAndroid Build Coastguard Worker pabsw m3, m5 508*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 509*c0909341SAndroid Build Coastguard Worker pmaxsw m9, m4 510*c0909341SAndroid Build Coastguard Worker pminsw m10, m7 511*c0909341SAndroid Build Coastguard Worker pmaxsw m9, m5 512*c0909341SAndroid Build Coastguard Worker pminsw m10, m3 513*c0909341SAndroid Build Coastguard Worker%else 514*c0909341SAndroid Build Coastguard Worker pminsw m7, m10 515*c0909341SAndroid Build Coastguard Worker pminsw m7, m3 516*c0909341SAndroid Build Coastguard Worker pmaxsw m3, m9, m4 517*c0909341SAndroid Build Coastguard Worker pmaxsw m3, m5 518*c0909341SAndroid Build Coastguard Worker mova m10, m7 519*c0909341SAndroid Build Coastguard Worker mova m9, m3 520*c0909341SAndroid Build Coastguard Worker%endif 521*c0909341SAndroid Build Coastguard Worker psubw m4, m1 ; diff_k1s2 522*c0909341SAndroid Build Coastguard Worker psubw m5, m1 ; diff_k1s3 523*c0909341SAndroid Build Coastguard Worker pabsw m3, m4 ; adiff_k1s2 524*c0909341SAndroid Build Coastguard Worker paddw m0, m2 525*c0909341SAndroid Build Coastguard Worker psrlw m2, m3, [sec_shift+gprsize] 526*c0909341SAndroid Build Coastguard Worker psubusw m7, m6, m2 527*c0909341SAndroid Build Coastguard Worker pabsw m2, m5 ; adiff_k1s3 528*c0909341SAndroid Build Coastguard Worker pminsw m7, m3 529*c0909341SAndroid Build Coastguard Worker psrlw m3, m2, [sec_shift+gprsize] 530*c0909341SAndroid Build Coastguard Worker psignw m7, m4 ; constrain(diff_k1s2) 531*c0909341SAndroid Build Coastguard Worker psubusw m4, m6, m3 532*c0909341SAndroid Build Coastguard Worker pminsw m4, m2 533*c0909341SAndroid Build Coastguard Worker paddw m0, m7 534*c0909341SAndroid Build Coastguard Worker%if %1 == 4 535*c0909341SAndroid Build Coastguard Worker movq m2, [tmpq+offq+32*0] ; k0p0 536*c0909341SAndroid Build Coastguard Worker movhps m2, [tmpq+offq+32*1] 537*c0909341SAndroid Build Coastguard Worker neg offq 538*c0909341SAndroid Build Coastguard Worker movq m3, [tmpq+offq+32*0] ; k0p1 539*c0909341SAndroid Build Coastguard Worker movhps m3, [tmpq+offq+32*1] 540*c0909341SAndroid Build Coastguard Worker%else 541*c0909341SAndroid Build Coastguard Worker movu m2, [tmpq+offq] 542*c0909341SAndroid Build Coastguard Worker neg offq 543*c0909341SAndroid Build Coastguard Worker movu m3, [tmpq+offq] 544*c0909341SAndroid Build Coastguard Worker%endif 545*c0909341SAndroid Build Coastguard Worker movsx offq, byte [dirq+5] ; off1_k1 546*c0909341SAndroid Build Coastguard Worker pabsw m7, m2 547*c0909341SAndroid Build Coastguard Worker psignw m4, m5 ; constrain(diff_k1s3) 548*c0909341SAndroid Build Coastguard Worker pabsw m5, m3 549*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 550*c0909341SAndroid Build Coastguard Worker pmaxsw m9, m2 551*c0909341SAndroid Build Coastguard Worker pminsw m10, m7 552*c0909341SAndroid Build Coastguard Worker pmaxsw m9, m3 553*c0909341SAndroid Build Coastguard Worker pminsw m10, m5 554*c0909341SAndroid Build Coastguard Worker%else 555*c0909341SAndroid Build Coastguard Worker pminsw m7, m10 556*c0909341SAndroid Build Coastguard Worker pminsw m7, m5 557*c0909341SAndroid Build Coastguard Worker pmaxsw m5, m9, m2 558*c0909341SAndroid Build Coastguard Worker pmaxsw m5, m3 559*c0909341SAndroid Build Coastguard Worker mova m10, m7 560*c0909341SAndroid Build Coastguard Worker mova m9, m5 561*c0909341SAndroid Build Coastguard Worker%endif 562*c0909341SAndroid Build Coastguard Worker psubw m2, m1 ; diff_k0p0 563*c0909341SAndroid Build Coastguard Worker psubw m3, m1 ; diff_k0p1 564*c0909341SAndroid Build Coastguard Worker paddw m0, m4 565*c0909341SAndroid Build Coastguard Worker pabsw m4, m2 ; adiff_k0p0 566*c0909341SAndroid Build Coastguard Worker psrlw m5, m4, [pri_shift+gprsize] 567*c0909341SAndroid Build Coastguard Worker psubusw m7, [rsp+gprsize], m5 568*c0909341SAndroid Build Coastguard Worker pabsw m5, m3 ; adiff_k0p1 569*c0909341SAndroid Build Coastguard Worker pminsw m7, m4 570*c0909341SAndroid Build Coastguard Worker psrlw m4, m5, [pri_shift+gprsize] 571*c0909341SAndroid Build Coastguard Worker psignw m7, m2 ; constrain(diff_k0p0) 572*c0909341SAndroid Build Coastguard Worker psubusw m2, [rsp+gprsize], m4 573*c0909341SAndroid Build Coastguard Worker pminsw m2, m5 574*c0909341SAndroid Build Coastguard Worker%if %1 == 4 575*c0909341SAndroid Build Coastguard Worker movq m4, [tmpq+offq+32*0] ; k1p0 576*c0909341SAndroid Build Coastguard Worker movhps m4, [tmpq+offq+32*1] 577*c0909341SAndroid Build Coastguard Worker neg offq 578*c0909341SAndroid Build Coastguard Worker movq m5, [tmpq+offq+32*0] ; k1p1 579*c0909341SAndroid Build Coastguard Worker movhps m5, [tmpq+offq+32*1] 580*c0909341SAndroid Build Coastguard Worker%else 581*c0909341SAndroid Build Coastguard Worker movu m4, [tmpq+offq] 582*c0909341SAndroid Build Coastguard Worker neg offq 583*c0909341SAndroid Build Coastguard Worker movu m5, [tmpq+offq] 584*c0909341SAndroid Build Coastguard Worker%endif 585*c0909341SAndroid Build Coastguard Worker psignw m2, m3 ; constrain(diff_k0p1) 586*c0909341SAndroid Build Coastguard Worker pabsw m3, m4 587*c0909341SAndroid Build Coastguard Worker paddw m7, m2 ; constrain(diff_k0) 588*c0909341SAndroid Build Coastguard Worker pabsw m2, m5 589*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 590*c0909341SAndroid Build Coastguard Worker pmaxsw m9, m4 591*c0909341SAndroid Build Coastguard Worker pminsw m10, m3 592*c0909341SAndroid Build Coastguard Worker pmaxsw m9, m5 593*c0909341SAndroid Build Coastguard Worker pminsw m10, m2 594*c0909341SAndroid Build Coastguard Worker%else 595*c0909341SAndroid Build Coastguard Worker pminsw m3, m10 596*c0909341SAndroid Build Coastguard Worker pminsw m3, m2 597*c0909341SAndroid Build Coastguard Worker pmaxsw m2, m9, m4 598*c0909341SAndroid Build Coastguard Worker pmaxsw m2, m5 599*c0909341SAndroid Build Coastguard Worker mova m10, m3 600*c0909341SAndroid Build Coastguard Worker mova m9, m2 601*c0909341SAndroid Build Coastguard Worker%endif 602*c0909341SAndroid Build Coastguard Worker psubw m4, m1 ; diff_k1p0 603*c0909341SAndroid Build Coastguard Worker psubw m5, m1 ; diff_k1p1 604*c0909341SAndroid Build Coastguard Worker pabsw m3, m4 ; adiff_k1p0 605*c0909341SAndroid Build Coastguard Worker pmullw m7, [priq+16*0] ; pri_tap_k0 606*c0909341SAndroid Build Coastguard Worker paddw m0, m7 607*c0909341SAndroid Build Coastguard Worker psrlw m2, m3, [pri_shift+gprsize] 608*c0909341SAndroid Build Coastguard Worker psubusw m7, [rsp+16*0+gprsize], m2 609*c0909341SAndroid Build Coastguard Worker pabsw m2, m5 ; adiff_k1p1 610*c0909341SAndroid Build Coastguard Worker pminsw m7, m3 611*c0909341SAndroid Build Coastguard Worker psrlw m3, m2, [pri_shift+gprsize] 612*c0909341SAndroid Build Coastguard Worker psignw m7, m4 ; constrain(diff_k1p0) 613*c0909341SAndroid Build Coastguard Worker psubusw m4, [rsp+16*0+gprsize], m3 614*c0909341SAndroid Build Coastguard Worker pminsw m4, m2 615*c0909341SAndroid Build Coastguard Worker psignw m4, m5 ; constrain(diff_k1p1) 616*c0909341SAndroid Build Coastguard Worker paddw m7, m4 ; constrain(diff_k1) 617*c0909341SAndroid Build Coastguard Worker pmullw m7, [priq+16*1] ; pri_tap_k1 618*c0909341SAndroid Build Coastguard Worker paddw m0, m7 ; sum 619*c0909341SAndroid Build Coastguard Worker psraw m2, m0, 15 620*c0909341SAndroid Build Coastguard Worker paddw m0, m2 621*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m8 622*c0909341SAndroid Build Coastguard Worker paddw m0, m1 623*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 624*c0909341SAndroid Build Coastguard Worker pmaxsw m9, m1 625*c0909341SAndroid Build Coastguard Worker pminsw m0, m9 626*c0909341SAndroid Build Coastguard Worker%else 627*c0909341SAndroid Build Coastguard Worker pmaxsw m2, m9, m1 628*c0909341SAndroid Build Coastguard Worker pminsw m0, m2 629*c0909341SAndroid Build Coastguard Worker%endif 630*c0909341SAndroid Build Coastguard Worker pminsw m1, m10 631*c0909341SAndroid Build Coastguard Worker pmaxsw m0, m1 632*c0909341SAndroid Build Coastguard Worker%if %1 == 4 633*c0909341SAndroid Build Coastguard Worker add tmpq, 32*2 634*c0909341SAndroid Build Coastguard Worker movq [dstq+strideq*0], m0 635*c0909341SAndroid Build Coastguard Worker movhps [dstq+strideq*1], m0 636*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+strideq*2] 637*c0909341SAndroid Build Coastguard Worker%else 638*c0909341SAndroid Build Coastguard Worker add tmpq, 32 639*c0909341SAndroid Build Coastguard Worker mova [dstq], m0 640*c0909341SAndroid Build Coastguard Worker add dstq, strideq 641*c0909341SAndroid Build Coastguard Worker%endif 642*c0909341SAndroid Build Coastguard Worker ret 643*c0909341SAndroid Build Coastguard Worker%endif 644*c0909341SAndroid Build Coastguard Worker%endmacro 645*c0909341SAndroid Build Coastguard Worker 646*c0909341SAndroid Build Coastguard WorkerINIT_XMM ssse3 647*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 648*c0909341SAndroid Build Coastguard Workercglobal cdef_filter_4x4_16bpc, 5, 9, 9, 32*10, dst, stride, left, top, bot, \ 649*c0909341SAndroid Build Coastguard Worker pri, sec, edge 650*c0909341SAndroid Build Coastguard Worker %define px rsp+32*4 651*c0909341SAndroid Build Coastguard Worker%else 652*c0909341SAndroid Build Coastguard Workercglobal cdef_filter_4x4_16bpc, 2, 7, 8, -32*11, dst, stride, edge, top, left 653*c0909341SAndroid Build Coastguard Worker %define botq topq 654*c0909341SAndroid Build Coastguard Worker %define px rsp+32*5 655*c0909341SAndroid Build Coastguard Worker%endif 656*c0909341SAndroid Build Coastguard Worker %define base t0-dir_table 657*c0909341SAndroid Build Coastguard Worker %define pri_shift px-16*6 658*c0909341SAndroid Build Coastguard Worker %define sec_shift px-16*5 659*c0909341SAndroid Build Coastguard Worker mov edged, r9m 660*c0909341SAndroid Build Coastguard Worker LEA t0, dir_table 661*c0909341SAndroid Build Coastguard Worker movu m0, [dstq+strideq*0] 662*c0909341SAndroid Build Coastguard Worker movu m1, [dstq+strideq*1] 663*c0909341SAndroid Build Coastguard Worker lea t1, [dstq+strideq*2] 664*c0909341SAndroid Build Coastguard Worker movu m2, [t1 +strideq*0] 665*c0909341SAndroid Build Coastguard Worker movu m3, [t1 +strideq*1] 666*c0909341SAndroid Build Coastguard Worker movddup m7, [base+pw_m16384] 667*c0909341SAndroid Build Coastguard Worker mova [px+32*0+0], m0 668*c0909341SAndroid Build Coastguard Worker mova [px+32*1+0], m1 669*c0909341SAndroid Build Coastguard Worker mova [px+32*2+0], m2 670*c0909341SAndroid Build Coastguard Worker mova [px+32*3+0], m3 671*c0909341SAndroid Build Coastguard Worker test edgeb, 4 ; HAVE_TOP 672*c0909341SAndroid Build Coastguard Worker jz .no_top 673*c0909341SAndroid Build Coastguard Worker movifnidn topq, topmp 674*c0909341SAndroid Build Coastguard Worker movu m0, [topq+strideq*0] 675*c0909341SAndroid Build Coastguard Worker movu m1, [topq+strideq*1] 676*c0909341SAndroid Build Coastguard Worker mova [px-32*2+0], m0 677*c0909341SAndroid Build Coastguard Worker mova [px-32*1+0], m1 678*c0909341SAndroid Build Coastguard Worker test edgeb, 1 ; HAVE_LEFT 679*c0909341SAndroid Build Coastguard Worker jz .top_no_left 680*c0909341SAndroid Build Coastguard Worker movd m0, [topq+strideq*0-4] 681*c0909341SAndroid Build Coastguard Worker movd m1, [topq+strideq*1-4] 682*c0909341SAndroid Build Coastguard Worker movd [px-32*2-4], m0 683*c0909341SAndroid Build Coastguard Worker movd [px-32*1-4], m1 684*c0909341SAndroid Build Coastguard Worker jmp .top_done 685*c0909341SAndroid Build Coastguard Worker.no_top: 686*c0909341SAndroid Build Coastguard Worker mova [px-32*2+0], m7 687*c0909341SAndroid Build Coastguard Worker mova [px-32*1+0], m7 688*c0909341SAndroid Build Coastguard Worker.top_no_left: 689*c0909341SAndroid Build Coastguard Worker movd [px-32*2-4], m7 690*c0909341SAndroid Build Coastguard Worker movd [px-32*1-4], m7 691*c0909341SAndroid Build Coastguard Worker.top_done: 692*c0909341SAndroid Build Coastguard Worker test edgeb, 8 ; HAVE_BOTTOM 693*c0909341SAndroid Build Coastguard Worker jz .no_bottom 694*c0909341SAndroid Build Coastguard Worker movifnidn botq, r4mp 695*c0909341SAndroid Build Coastguard Worker movu m0, [botq+strideq*0] 696*c0909341SAndroid Build Coastguard Worker movu m1, [botq+strideq*1] 697*c0909341SAndroid Build Coastguard Worker mova [px+32*4+0], m0 698*c0909341SAndroid Build Coastguard Worker mova [px+32*5+0], m1 699*c0909341SAndroid Build Coastguard Worker test edgeb, 1 ; HAVE_LEFT 700*c0909341SAndroid Build Coastguard Worker jz .bottom_no_left 701*c0909341SAndroid Build Coastguard Worker movd m0, [botq+strideq*0-4] 702*c0909341SAndroid Build Coastguard Worker movd m1, [botq+strideq*1-4] 703*c0909341SAndroid Build Coastguard Worker movd [px+32*4-4], m0 704*c0909341SAndroid Build Coastguard Worker movd [px+32*5-4], m1 705*c0909341SAndroid Build Coastguard Worker jmp .bottom_done 706*c0909341SAndroid Build Coastguard Worker.no_bottom: 707*c0909341SAndroid Build Coastguard Worker mova [px+32*4+0], m7 708*c0909341SAndroid Build Coastguard Worker mova [px+32*5+0], m7 709*c0909341SAndroid Build Coastguard Worker.bottom_no_left: 710*c0909341SAndroid Build Coastguard Worker movd [px+32*4-4], m7 711*c0909341SAndroid Build Coastguard Worker movd [px+32*5-4], m7 712*c0909341SAndroid Build Coastguard Worker.bottom_done: 713*c0909341SAndroid Build Coastguard Worker test edgeb, 1 ; HAVE_LEFT 714*c0909341SAndroid Build Coastguard Worker jz .no_left 715*c0909341SAndroid Build Coastguard Worker movifnidn leftq, r2mp 716*c0909341SAndroid Build Coastguard Worker movd m0, [leftq+4*0] 717*c0909341SAndroid Build Coastguard Worker movd m1, [leftq+4*1] 718*c0909341SAndroid Build Coastguard Worker movd m2, [leftq+4*2] 719*c0909341SAndroid Build Coastguard Worker movd m3, [leftq+4*3] 720*c0909341SAndroid Build Coastguard Worker movd [px+32*0-4], m0 721*c0909341SAndroid Build Coastguard Worker movd [px+32*1-4], m1 722*c0909341SAndroid Build Coastguard Worker movd [px+32*2-4], m2 723*c0909341SAndroid Build Coastguard Worker movd [px+32*3-4], m3 724*c0909341SAndroid Build Coastguard Worker jmp .left_done 725*c0909341SAndroid Build Coastguard Worker.no_left: 726*c0909341SAndroid Build Coastguard Worker REPX {movd [px+32*x-4], m7}, 0, 1, 2, 3 727*c0909341SAndroid Build Coastguard Worker.left_done: 728*c0909341SAndroid Build Coastguard Worker test edgeb, 2 ; HAVE_RIGHT 729*c0909341SAndroid Build Coastguard Worker jnz .padding_done 730*c0909341SAndroid Build Coastguard Worker REPX {movd [px+32*x+8], m7}, -2, -1, 0, 1, 2, 3, 4, 5 731*c0909341SAndroid Build Coastguard Worker.padding_done: 732*c0909341SAndroid Build Coastguard Worker CDEF_FILTER 4, 4 733*c0909341SAndroid Build Coastguard Worker 734*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 735*c0909341SAndroid Build Coastguard Workercglobal cdef_filter_4x8_16bpc, 5, 9, 9, 32*14, dst, stride, left, top, bot, \ 736*c0909341SAndroid Build Coastguard Worker pri, sec, edge 737*c0909341SAndroid Build Coastguard Worker%else 738*c0909341SAndroid Build Coastguard Workercglobal cdef_filter_4x8_16bpc, 2, 7, 8, -32*15, dst, stride, edge, top, left 739*c0909341SAndroid Build Coastguard Worker%endif 740*c0909341SAndroid Build Coastguard Worker mov edged, r9m 741*c0909341SAndroid Build Coastguard Worker LEA t0, dir_table 742*c0909341SAndroid Build Coastguard Worker movu m0, [dstq+strideq*0] 743*c0909341SAndroid Build Coastguard Worker movu m1, [dstq+strideq*1] 744*c0909341SAndroid Build Coastguard Worker lea t1, [dstq+strideq*2] 745*c0909341SAndroid Build Coastguard Worker movu m2, [t1 +strideq*0] 746*c0909341SAndroid Build Coastguard Worker movu m3, [t1 +strideq*1] 747*c0909341SAndroid Build Coastguard Worker lea t1, [t1 +strideq*2] 748*c0909341SAndroid Build Coastguard Worker movu m4, [t1 +strideq*0] 749*c0909341SAndroid Build Coastguard Worker movu m5, [t1 +strideq*1] 750*c0909341SAndroid Build Coastguard Worker lea t1, [t1 +strideq*2] 751*c0909341SAndroid Build Coastguard Worker movu m6, [t1 +strideq*0] 752*c0909341SAndroid Build Coastguard Worker movu m7, [t1 +strideq*1] 753*c0909341SAndroid Build Coastguard Worker mova [px+32*0+0], m0 754*c0909341SAndroid Build Coastguard Worker mova [px+32*1+0], m1 755*c0909341SAndroid Build Coastguard Worker mova [px+32*2+0], m2 756*c0909341SAndroid Build Coastguard Worker mova [px+32*3+0], m3 757*c0909341SAndroid Build Coastguard Worker mova [px+32*4+0], m4 758*c0909341SAndroid Build Coastguard Worker mova [px+32*5+0], m5 759*c0909341SAndroid Build Coastguard Worker mova [px+32*6+0], m6 760*c0909341SAndroid Build Coastguard Worker mova [px+32*7+0], m7 761*c0909341SAndroid Build Coastguard Worker movddup m7, [base+pw_m16384] 762*c0909341SAndroid Build Coastguard Worker test edgeb, 4 ; HAVE_TOP 763*c0909341SAndroid Build Coastguard Worker jz .no_top 764*c0909341SAndroid Build Coastguard Worker movifnidn topq, topmp 765*c0909341SAndroid Build Coastguard Worker movu m0, [topq+strideq*0] 766*c0909341SAndroid Build Coastguard Worker movu m1, [topq+strideq*1] 767*c0909341SAndroid Build Coastguard Worker mova [px-32*2+0], m0 768*c0909341SAndroid Build Coastguard Worker mova [px-32*1+0], m1 769*c0909341SAndroid Build Coastguard Worker test edgeb, 1 ; HAVE_LEFT 770*c0909341SAndroid Build Coastguard Worker jz .top_no_left 771*c0909341SAndroid Build Coastguard Worker movd m0, [topq+strideq*0-4] 772*c0909341SAndroid Build Coastguard Worker movd m1, [topq+strideq*1-4] 773*c0909341SAndroid Build Coastguard Worker movd [px-32*2-4], m0 774*c0909341SAndroid Build Coastguard Worker movd [px-32*1-4], m1 775*c0909341SAndroid Build Coastguard Worker jmp .top_done 776*c0909341SAndroid Build Coastguard Worker.no_top: 777*c0909341SAndroid Build Coastguard Worker mova [px-32*2+0], m7 778*c0909341SAndroid Build Coastguard Worker mova [px-32*1+0], m7 779*c0909341SAndroid Build Coastguard Worker.top_no_left: 780*c0909341SAndroid Build Coastguard Worker movd [px-32*2-4], m7 781*c0909341SAndroid Build Coastguard Worker movd [px-32*1-4], m7 782*c0909341SAndroid Build Coastguard Worker.top_done: 783*c0909341SAndroid Build Coastguard Worker test edgeb, 8 ; HAVE_BOTTOM 784*c0909341SAndroid Build Coastguard Worker jz .no_bottom 785*c0909341SAndroid Build Coastguard Worker movifnidn botq, r4mp 786*c0909341SAndroid Build Coastguard Worker movu m0, [botq+strideq*0] 787*c0909341SAndroid Build Coastguard Worker movu m1, [botq+strideq*1] 788*c0909341SAndroid Build Coastguard Worker mova [px+32*8+0], m0 789*c0909341SAndroid Build Coastguard Worker mova [px+32*9+0], m1 790*c0909341SAndroid Build Coastguard Worker test edgeb, 1 ; HAVE_LEFT 791*c0909341SAndroid Build Coastguard Worker jz .bottom_no_left 792*c0909341SAndroid Build Coastguard Worker movd m0, [botq+strideq*0-4] 793*c0909341SAndroid Build Coastguard Worker movd m1, [botq+strideq*1-4] 794*c0909341SAndroid Build Coastguard Worker movd [px+32*8-4], m0 795*c0909341SAndroid Build Coastguard Worker movd [px+32*9-4], m1 796*c0909341SAndroid Build Coastguard Worker jmp .bottom_done 797*c0909341SAndroid Build Coastguard Worker.no_bottom: 798*c0909341SAndroid Build Coastguard Worker mova [px+32*8+0], m7 799*c0909341SAndroid Build Coastguard Worker mova [px+32*9+0], m7 800*c0909341SAndroid Build Coastguard Worker.bottom_no_left: 801*c0909341SAndroid Build Coastguard Worker movd [px+32*8-4], m7 802*c0909341SAndroid Build Coastguard Worker movd [px+32*9-4], m7 803*c0909341SAndroid Build Coastguard Worker.bottom_done: 804*c0909341SAndroid Build Coastguard Worker test edgeb, 1 ; HAVE_LEFT 805*c0909341SAndroid Build Coastguard Worker jz .no_left 806*c0909341SAndroid Build Coastguard Worker movifnidn leftq, r2mp 807*c0909341SAndroid Build Coastguard Worker movd m0, [leftq+4*0] 808*c0909341SAndroid Build Coastguard Worker movd m1, [leftq+4*1] 809*c0909341SAndroid Build Coastguard Worker movd m2, [leftq+4*2] 810*c0909341SAndroid Build Coastguard Worker movd m3, [leftq+4*3] 811*c0909341SAndroid Build Coastguard Worker movd [px+32*0-4], m0 812*c0909341SAndroid Build Coastguard Worker movd [px+32*1-4], m1 813*c0909341SAndroid Build Coastguard Worker movd [px+32*2-4], m2 814*c0909341SAndroid Build Coastguard Worker movd [px+32*3-4], m3 815*c0909341SAndroid Build Coastguard Worker movd m0, [leftq+4*4] 816*c0909341SAndroid Build Coastguard Worker movd m1, [leftq+4*5] 817*c0909341SAndroid Build Coastguard Worker movd m2, [leftq+4*6] 818*c0909341SAndroid Build Coastguard Worker movd m3, [leftq+4*7] 819*c0909341SAndroid Build Coastguard Worker movd [px+32*4-4], m0 820*c0909341SAndroid Build Coastguard Worker movd [px+32*5-4], m1 821*c0909341SAndroid Build Coastguard Worker movd [px+32*6-4], m2 822*c0909341SAndroid Build Coastguard Worker movd [px+32*7-4], m3 823*c0909341SAndroid Build Coastguard Worker jmp .left_done 824*c0909341SAndroid Build Coastguard Worker.no_left: 825*c0909341SAndroid Build Coastguard Worker REPX {movd [px+32*x-4], m7}, 0, 1, 2, 3, 4, 5, 6, 7 826*c0909341SAndroid Build Coastguard Worker.left_done: 827*c0909341SAndroid Build Coastguard Worker test edgeb, 2 ; HAVE_RIGHT 828*c0909341SAndroid Build Coastguard Worker jnz .padding_done 829*c0909341SAndroid Build Coastguard Worker REPX {movd [px+32*x+8], m7}, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 830*c0909341SAndroid Build Coastguard Worker.padding_done: 831*c0909341SAndroid Build Coastguard Worker CDEF_FILTER 4, 8 832*c0909341SAndroid Build Coastguard Worker 833*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 834*c0909341SAndroid Build Coastguard Workercglobal cdef_filter_8x8_16bpc, 5, 9, 9, 32*14, dst, stride, left, top, bot, \ 835*c0909341SAndroid Build Coastguard Worker pri, sec, edge 836*c0909341SAndroid Build Coastguard Worker%else 837*c0909341SAndroid Build Coastguard Workercglobal cdef_filter_8x8_16bpc, 2, 7, 8, -32*15, dst, stride, edge, top, left 838*c0909341SAndroid Build Coastguard Worker%endif 839*c0909341SAndroid Build Coastguard Worker mov edged, r9m 840*c0909341SAndroid Build Coastguard Worker LEA t0, dir_table 841*c0909341SAndroid Build Coastguard Worker mova m0, [dstq+strideq*0+ 0] 842*c0909341SAndroid Build Coastguard Worker movd m1, [dstq+strideq*0+16] 843*c0909341SAndroid Build Coastguard Worker mova m2, [dstq+strideq*1+ 0] 844*c0909341SAndroid Build Coastguard Worker movd m3, [dstq+strideq*1+16] 845*c0909341SAndroid Build Coastguard Worker lea t1, [dstq+strideq*2] 846*c0909341SAndroid Build Coastguard Worker mova m4, [t1 +strideq*0+ 0] 847*c0909341SAndroid Build Coastguard Worker movd m5, [t1 +strideq*0+16] 848*c0909341SAndroid Build Coastguard Worker mova m6, [t1 +strideq*1+ 0] 849*c0909341SAndroid Build Coastguard Worker movd m7, [t1 +strideq*1+16] 850*c0909341SAndroid Build Coastguard Worker lea t1, [t1 +strideq*2] 851*c0909341SAndroid Build Coastguard Worker mova [px+32*0+ 0], m0 852*c0909341SAndroid Build Coastguard Worker movd [px+32*0+16], m1 853*c0909341SAndroid Build Coastguard Worker mova [px+32*1+ 0], m2 854*c0909341SAndroid Build Coastguard Worker movd [px+32*1+16], m3 855*c0909341SAndroid Build Coastguard Worker mova [px+32*2+ 0], m4 856*c0909341SAndroid Build Coastguard Worker movd [px+32*2+16], m5 857*c0909341SAndroid Build Coastguard Worker mova [px+32*3+ 0], m6 858*c0909341SAndroid Build Coastguard Worker movd [px+32*3+16], m7 859*c0909341SAndroid Build Coastguard Worker mova m0, [t1 +strideq*0+ 0] 860*c0909341SAndroid Build Coastguard Worker movd m1, [t1 +strideq*0+16] 861*c0909341SAndroid Build Coastguard Worker mova m2, [t1 +strideq*1+ 0] 862*c0909341SAndroid Build Coastguard Worker movd m3, [t1 +strideq*1+16] 863*c0909341SAndroid Build Coastguard Worker lea t1, [t1 +strideq*2] 864*c0909341SAndroid Build Coastguard Worker mova m4, [t1 +strideq*0+ 0] 865*c0909341SAndroid Build Coastguard Worker movd m5, [t1 +strideq*0+16] 866*c0909341SAndroid Build Coastguard Worker mova m6, [t1 +strideq*1+ 0] 867*c0909341SAndroid Build Coastguard Worker movd m7, [t1 +strideq*1+16] 868*c0909341SAndroid Build Coastguard Worker mova [px+32*4+ 0], m0 869*c0909341SAndroid Build Coastguard Worker movd [px+32*4+16], m1 870*c0909341SAndroid Build Coastguard Worker mova [px+32*5+ 0], m2 871*c0909341SAndroid Build Coastguard Worker movd [px+32*5+16], m3 872*c0909341SAndroid Build Coastguard Worker mova [px+32*6+ 0], m4 873*c0909341SAndroid Build Coastguard Worker movd [px+32*6+16], m5 874*c0909341SAndroid Build Coastguard Worker mova [px+32*7+ 0], m6 875*c0909341SAndroid Build Coastguard Worker movd [px+32*7+16], m7 876*c0909341SAndroid Build Coastguard Worker movddup m7, [base+pw_m16384] 877*c0909341SAndroid Build Coastguard Worker test edgeb, 4 ; HAVE_TOP 878*c0909341SAndroid Build Coastguard Worker jz .no_top 879*c0909341SAndroid Build Coastguard Worker movifnidn topq, topmp 880*c0909341SAndroid Build Coastguard Worker mova m0, [topq+strideq*0+ 0] 881*c0909341SAndroid Build Coastguard Worker mova m1, [topq+strideq*0+16] 882*c0909341SAndroid Build Coastguard Worker mova m2, [topq+strideq*1+ 0] 883*c0909341SAndroid Build Coastguard Worker mova m3, [topq+strideq*1+16] 884*c0909341SAndroid Build Coastguard Worker mova [px-32*2+ 0], m0 885*c0909341SAndroid Build Coastguard Worker movd [px-32*2+16], m1 886*c0909341SAndroid Build Coastguard Worker mova [px-32*1+ 0], m2 887*c0909341SAndroid Build Coastguard Worker movd [px-32*1+16], m3 888*c0909341SAndroid Build Coastguard Worker test edgeb, 1 ; HAVE_LEFT 889*c0909341SAndroid Build Coastguard Worker jz .top_no_left 890*c0909341SAndroid Build Coastguard Worker movd m0, [topq+strideq*0-4] 891*c0909341SAndroid Build Coastguard Worker movd m1, [topq+strideq*1-4] 892*c0909341SAndroid Build Coastguard Worker movd [px-32*2-4], m0 893*c0909341SAndroid Build Coastguard Worker movd [px-32*1-4], m1 894*c0909341SAndroid Build Coastguard Worker jmp .top_done 895*c0909341SAndroid Build Coastguard Worker.no_top: 896*c0909341SAndroid Build Coastguard Worker mova [px-32*2+ 0], m7 897*c0909341SAndroid Build Coastguard Worker movd [px-32*2+16], m7 898*c0909341SAndroid Build Coastguard Worker mova [px-32*1+ 0], m7 899*c0909341SAndroid Build Coastguard Worker movd [px-32*1+16], m7 900*c0909341SAndroid Build Coastguard Worker.top_no_left: 901*c0909341SAndroid Build Coastguard Worker movd [px-32*2- 4], m7 902*c0909341SAndroid Build Coastguard Worker movd [px-32*1- 4], m7 903*c0909341SAndroid Build Coastguard Worker.top_done: 904*c0909341SAndroid Build Coastguard Worker test edgeb, 8 ; HAVE_BOTTOM 905*c0909341SAndroid Build Coastguard Worker jz .no_bottom 906*c0909341SAndroid Build Coastguard Worker movifnidn botq, r4mp 907*c0909341SAndroid Build Coastguard Worker mova m0, [botq+strideq*0+ 0] 908*c0909341SAndroid Build Coastguard Worker movd m1, [botq+strideq*0+16] 909*c0909341SAndroid Build Coastguard Worker mova m2, [botq+strideq*1+ 0] 910*c0909341SAndroid Build Coastguard Worker movd m3, [botq+strideq*1+16] 911*c0909341SAndroid Build Coastguard Worker mova [px+32*8+ 0], m0 912*c0909341SAndroid Build Coastguard Worker movd [px+32*8+16], m1 913*c0909341SAndroid Build Coastguard Worker mova [px+32*9+ 0], m2 914*c0909341SAndroid Build Coastguard Worker movd [px+32*9+16], m3 915*c0909341SAndroid Build Coastguard Worker test edgeb, 1 ; HAVE_LEFT 916*c0909341SAndroid Build Coastguard Worker jz .bottom_no_left 917*c0909341SAndroid Build Coastguard Worker movd m0, [botq+strideq*0-4] 918*c0909341SAndroid Build Coastguard Worker movd m1, [botq+strideq*1-4] 919*c0909341SAndroid Build Coastguard Worker movd [px+32*8- 4], m0 920*c0909341SAndroid Build Coastguard Worker movd [px+32*9- 4], m1 921*c0909341SAndroid Build Coastguard Worker jmp .bottom_done 922*c0909341SAndroid Build Coastguard Worker.no_bottom: 923*c0909341SAndroid Build Coastguard Worker mova [px+32*8+ 0], m7 924*c0909341SAndroid Build Coastguard Worker movd [px+32*8+16], m7 925*c0909341SAndroid Build Coastguard Worker mova [px+32*9+ 0], m7 926*c0909341SAndroid Build Coastguard Worker movd [px+32*9+16], m7 927*c0909341SAndroid Build Coastguard Worker.bottom_no_left: 928*c0909341SAndroid Build Coastguard Worker movd [px+32*8- 4], m7 929*c0909341SAndroid Build Coastguard Worker movd [px+32*9- 4], m7 930*c0909341SAndroid Build Coastguard Worker.bottom_done: 931*c0909341SAndroid Build Coastguard Worker test edgeb, 1 ; HAVE_LEFT 932*c0909341SAndroid Build Coastguard Worker jz .no_left 933*c0909341SAndroid Build Coastguard Worker movifnidn leftq, r2mp 934*c0909341SAndroid Build Coastguard Worker movd m0, [leftq+4*0] 935*c0909341SAndroid Build Coastguard Worker movd m1, [leftq+4*1] 936*c0909341SAndroid Build Coastguard Worker movd m2, [leftq+4*2] 937*c0909341SAndroid Build Coastguard Worker movd m3, [leftq+4*3] 938*c0909341SAndroid Build Coastguard Worker movd [px+32*0- 4], m0 939*c0909341SAndroid Build Coastguard Worker movd [px+32*1- 4], m1 940*c0909341SAndroid Build Coastguard Worker movd [px+32*2- 4], m2 941*c0909341SAndroid Build Coastguard Worker movd [px+32*3- 4], m3 942*c0909341SAndroid Build Coastguard Worker movd m0, [leftq+4*4] 943*c0909341SAndroid Build Coastguard Worker movd m1, [leftq+4*5] 944*c0909341SAndroid Build Coastguard Worker movd m2, [leftq+4*6] 945*c0909341SAndroid Build Coastguard Worker movd m3, [leftq+4*7] 946*c0909341SAndroid Build Coastguard Worker movd [px+32*4- 4], m0 947*c0909341SAndroid Build Coastguard Worker movd [px+32*5- 4], m1 948*c0909341SAndroid Build Coastguard Worker movd [px+32*6- 4], m2 949*c0909341SAndroid Build Coastguard Worker movd [px+32*7- 4], m3 950*c0909341SAndroid Build Coastguard Worker jmp .left_done 951*c0909341SAndroid Build Coastguard Worker.no_left: 952*c0909341SAndroid Build Coastguard Worker REPX {movd [px+32*x- 4], m7}, 0, 1, 2, 3, 4, 5, 6, 7 953*c0909341SAndroid Build Coastguard Worker.left_done: 954*c0909341SAndroid Build Coastguard Worker test edgeb, 2 ; HAVE_RIGHT 955*c0909341SAndroid Build Coastguard Worker jnz .padding_done 956*c0909341SAndroid Build Coastguard Worker REPX {movd [px+32*x+16], m7}, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 957*c0909341SAndroid Build Coastguard Worker.padding_done: 958*c0909341SAndroid Build Coastguard Worker CDEF_FILTER 8, 8 959*c0909341SAndroid Build Coastguard Worker 960*c0909341SAndroid Build Coastguard Worker%macro CDEF_DIR 0 961*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 962*c0909341SAndroid Build Coastguard Workercglobal cdef_dir_16bpc, 4, 7, 16, src, stride, var, bdmax 963*c0909341SAndroid Build Coastguard Worker lea r6, [dir_shift] 964*c0909341SAndroid Build Coastguard Worker shr bdmaxd, 11 ; 0 for 10bpc, 1 for 12bpc 965*c0909341SAndroid Build Coastguard Worker movddup m7, [r6+bdmaxq*8] 966*c0909341SAndroid Build Coastguard Worker lea r6, [strideq*3] 967*c0909341SAndroid Build Coastguard Worker mova m0, [srcq+strideq*0] 968*c0909341SAndroid Build Coastguard Worker mova m1, [srcq+strideq*1] 969*c0909341SAndroid Build Coastguard Worker mova m2, [srcq+strideq*2] 970*c0909341SAndroid Build Coastguard Worker mova m3, [srcq+r6 ] 971*c0909341SAndroid Build Coastguard Worker lea srcq, [srcq+strideq*4] 972*c0909341SAndroid Build Coastguard Worker mova m4, [srcq+strideq*0] 973*c0909341SAndroid Build Coastguard Worker mova m5, [srcq+strideq*1] 974*c0909341SAndroid Build Coastguard Worker mova m6, [srcq+strideq*2] 975*c0909341SAndroid Build Coastguard Worker REPX {pmulhuw x, m7}, m0, m1, m2, m3, m4, m5, m6 976*c0909341SAndroid Build Coastguard Worker pmulhuw m7, [srcq+r6 ] 977*c0909341SAndroid Build Coastguard Worker pxor m8, m8 978*c0909341SAndroid Build Coastguard Worker packuswb m9, m0, m1 979*c0909341SAndroid Build Coastguard Worker packuswb m10, m2, m3 980*c0909341SAndroid Build Coastguard Worker packuswb m11, m4, m5 981*c0909341SAndroid Build Coastguard Worker packuswb m12, m6, m7 982*c0909341SAndroid Build Coastguard Worker REPX {psadbw x, m8}, m9, m10, m11, m12 983*c0909341SAndroid Build Coastguard Worker packssdw m9, m10 984*c0909341SAndroid Build Coastguard Worker packssdw m11, m12 985*c0909341SAndroid Build Coastguard Worker packssdw m9, m11 986*c0909341SAndroid Build Coastguard Worker jmp mangle(private_prefix %+ _cdef_dir_8bpc %+ SUFFIX).main 987*c0909341SAndroid Build Coastguard Worker%else 988*c0909341SAndroid Build Coastguard Workercglobal cdef_dir_16bpc, 2, 4, 8, 96, src, stride, var, bdmax 989*c0909341SAndroid Build Coastguard Worker mov bdmaxd, bdmaxm 990*c0909341SAndroid Build Coastguard Worker LEA r2, dir_shift 991*c0909341SAndroid Build Coastguard Worker shr bdmaxd, 11 992*c0909341SAndroid Build Coastguard Worker movddup m7, [r2+bdmaxq*8] 993*c0909341SAndroid Build Coastguard Worker lea r3, [strideq*3] 994*c0909341SAndroid Build Coastguard Worker pmulhuw m3, m7, [srcq+strideq*0] 995*c0909341SAndroid Build Coastguard Worker pmulhuw m4, m7, [srcq+strideq*1] 996*c0909341SAndroid Build Coastguard Worker pmulhuw m5, m7, [srcq+strideq*2] 997*c0909341SAndroid Build Coastguard Worker pmulhuw m6, m7, [srcq+r3 ] 998*c0909341SAndroid Build Coastguard Worker movddup m1, [r2-dir_shift+pw_128] 999*c0909341SAndroid Build Coastguard Worker lea srcq, [srcq+strideq*4] 1000*c0909341SAndroid Build Coastguard Worker pxor m0, m0 1001*c0909341SAndroid Build Coastguard Worker packuswb m2, m3, m4 1002*c0909341SAndroid Build Coastguard Worker psubw m3, m1 1003*c0909341SAndroid Build Coastguard Worker psubw m4, m1 1004*c0909341SAndroid Build Coastguard Worker mova [esp+0x00], m3 1005*c0909341SAndroid Build Coastguard Worker mova [esp+0x10], m4 1006*c0909341SAndroid Build Coastguard Worker packuswb m3, m5, m6 1007*c0909341SAndroid Build Coastguard Worker psadbw m2, m0 1008*c0909341SAndroid Build Coastguard Worker psadbw m3, m0 1009*c0909341SAndroid Build Coastguard Worker psubw m5, m1 1010*c0909341SAndroid Build Coastguard Worker psubw m6, m1 1011*c0909341SAndroid Build Coastguard Worker packssdw m2, m3 1012*c0909341SAndroid Build Coastguard Worker mova [esp+0x20], m5 1013*c0909341SAndroid Build Coastguard Worker mova [esp+0x50], m6 1014*c0909341SAndroid Build Coastguard Worker pmulhuw m4, m7, [srcq+strideq*0] 1015*c0909341SAndroid Build Coastguard Worker pmulhuw m5, m7, [srcq+strideq*1] 1016*c0909341SAndroid Build Coastguard Worker pmulhuw m6, m7, [srcq+strideq*2] 1017*c0909341SAndroid Build Coastguard Worker pmulhuw m7, [srcq+r3 ] 1018*c0909341SAndroid Build Coastguard Worker packuswb m3, m4, m5 1019*c0909341SAndroid Build Coastguard Worker packuswb m1, m6, m7 1020*c0909341SAndroid Build Coastguard Worker psadbw m3, m0 1021*c0909341SAndroid Build Coastguard Worker psadbw m1, m0 1022*c0909341SAndroid Build Coastguard Worker packssdw m3, m1 1023*c0909341SAndroid Build Coastguard Worker movddup m1, [r2-dir_shift+pw_128] 1024*c0909341SAndroid Build Coastguard Worker LEA r2, shufw_6543210x 1025*c0909341SAndroid Build Coastguard Worker jmp mangle(private_prefix %+ _cdef_dir_8bpc %+ SUFFIX).main 1026*c0909341SAndroid Build Coastguard Worker%endif 1027*c0909341SAndroid Build Coastguard Worker%endmacro 1028*c0909341SAndroid Build Coastguard Worker 1029*c0909341SAndroid Build Coastguard WorkerINIT_XMM ssse3 1030*c0909341SAndroid Build Coastguard WorkerCDEF_DIR 1031*c0909341SAndroid Build Coastguard Worker 1032*c0909341SAndroid Build Coastguard WorkerINIT_XMM sse4 1033*c0909341SAndroid Build Coastguard WorkerCDEF_DIR 1034