1*c0909341SAndroid Build Coastguard Worker; Copyright © 2021, VideoLAN and dav1d authors 2*c0909341SAndroid Build Coastguard Worker; Copyright © 2021, Two Orioles, LLC 3*c0909341SAndroid Build Coastguard Worker; All rights reserved. 4*c0909341SAndroid Build Coastguard Worker; 5*c0909341SAndroid Build Coastguard Worker; Redistribution and use in source and binary forms, with or without 6*c0909341SAndroid Build Coastguard Worker; modification, are permitted provided that the following conditions are met: 7*c0909341SAndroid Build Coastguard Worker; 8*c0909341SAndroid Build Coastguard Worker; 1. Redistributions of source code must retain the above copyright notice, this 9*c0909341SAndroid Build Coastguard Worker; list of conditions and the following disclaimer. 10*c0909341SAndroid Build Coastguard Worker; 11*c0909341SAndroid Build Coastguard Worker; 2. Redistributions in binary form must reproduce the above copyright notice, 12*c0909341SAndroid Build Coastguard Worker; this list of conditions and the following disclaimer in the documentation 13*c0909341SAndroid Build Coastguard Worker; and/or other materials provided with the distribution. 14*c0909341SAndroid Build Coastguard Worker; 15*c0909341SAndroid Build Coastguard Worker; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 16*c0909341SAndroid Build Coastguard Worker; ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17*c0909341SAndroid Build Coastguard Worker; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18*c0909341SAndroid Build Coastguard Worker; DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 19*c0909341SAndroid Build Coastguard Worker; ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 20*c0909341SAndroid Build Coastguard Worker; (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 21*c0909341SAndroid Build Coastguard Worker; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 22*c0909341SAndroid Build Coastguard Worker; ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23*c0909341SAndroid Build Coastguard Worker; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 24*c0909341SAndroid Build Coastguard Worker; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25*c0909341SAndroid Build Coastguard Worker 26*c0909341SAndroid Build Coastguard Worker%include "config.asm" 27*c0909341SAndroid Build Coastguard Worker%include "ext/x86/x86inc.asm" 28*c0909341SAndroid Build Coastguard Worker 29*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 30*c0909341SAndroid Build Coastguard Worker 31*c0909341SAndroid Build Coastguard WorkerSECTION_RODATA 32*c0909341SAndroid Build Coastguard Worker 33*c0909341SAndroid Build Coastguard Worker%macro DIR_TABLE 1 ; stride 34*c0909341SAndroid Build Coastguard Worker db 1 * %1 + 0, 2 * %1 + 0 35*c0909341SAndroid Build Coastguard Worker db 1 * %1 + 0, 2 * %1 - 2 36*c0909341SAndroid Build Coastguard Worker db -1 * %1 + 2, -2 * %1 + 4 37*c0909341SAndroid Build Coastguard Worker db 0 * %1 + 2, -1 * %1 + 4 38*c0909341SAndroid Build Coastguard Worker db 0 * %1 + 2, 0 * %1 + 4 39*c0909341SAndroid Build Coastguard Worker db 0 * %1 + 2, 1 * %1 + 4 40*c0909341SAndroid Build Coastguard Worker db 1 * %1 + 2, 2 * %1 + 4 41*c0909341SAndroid Build Coastguard Worker db 1 * %1 + 0, 2 * %1 + 2 42*c0909341SAndroid Build Coastguard Worker db 1 * %1 + 0, 2 * %1 + 0 43*c0909341SAndroid Build Coastguard Worker db 1 * %1 + 0, 2 * %1 - 2 44*c0909341SAndroid Build Coastguard Worker db -1 * %1 + 2, -2 * %1 + 4 45*c0909341SAndroid Build Coastguard Worker db 0 * %1 + 2, -1 * %1 + 4 46*c0909341SAndroid Build Coastguard Worker%endmacro 47*c0909341SAndroid Build Coastguard Worker 48*c0909341SAndroid Build Coastguard Workerdir_table4: DIR_TABLE 16 49*c0909341SAndroid Build Coastguard Workerdir_table8: DIR_TABLE 32 50*c0909341SAndroid Build Coastguard Workerpri_taps: dw 4, 4, 3, 3, 2, 2, 3, 3 51*c0909341SAndroid Build Coastguard Worker 52*c0909341SAndroid Build Coastguard Workerdir_shift: times 2 dw 0x4000 53*c0909341SAndroid Build Coastguard Worker times 2 dw 0x1000 54*c0909341SAndroid Build Coastguard Worker 55*c0909341SAndroid Build Coastguard Workerpw_2048: times 2 dw 2048 56*c0909341SAndroid Build Coastguard Workerpw_m16384: times 2 dw -16384 57*c0909341SAndroid Build Coastguard Worker 58*c0909341SAndroid Build Coastguard Workercextern cdef_dir_8bpc_avx2.main 59*c0909341SAndroid Build Coastguard Worker 60*c0909341SAndroid Build Coastguard WorkerSECTION .text 61*c0909341SAndroid Build Coastguard Worker 62*c0909341SAndroid Build Coastguard Worker%macro CDEF_FILTER 2 ; w, h 63*c0909341SAndroid Build Coastguard Worker DEFINE_ARGS dst, stride, _, dir, pridmp, pri, sec, tmp 64*c0909341SAndroid Build Coastguard Worker movifnidn prid, r5m 65*c0909341SAndroid Build Coastguard Worker movifnidn secd, r6m 66*c0909341SAndroid Build Coastguard Worker mov dird, r7m 67*c0909341SAndroid Build Coastguard Worker vpbroadcastd m8, [base+pw_2048] 68*c0909341SAndroid Build Coastguard Worker lea dirq, [base+dir_table%1+dirq*2] 69*c0909341SAndroid Build Coastguard Worker test prid, prid 70*c0909341SAndroid Build Coastguard Worker jz .sec_only 71*c0909341SAndroid Build Coastguard Worker%if WIN64 72*c0909341SAndroid Build Coastguard Worker vpbroadcastw m6, prim 73*c0909341SAndroid Build Coastguard Worker movaps [rsp+16*0], xmm9 74*c0909341SAndroid Build Coastguard Worker movaps [rsp+16*1], xmm10 75*c0909341SAndroid Build Coastguard Worker%else 76*c0909341SAndroid Build Coastguard Worker movd xm6, prid 77*c0909341SAndroid Build Coastguard Worker vpbroadcastw m6, xm6 78*c0909341SAndroid Build Coastguard Worker%endif 79*c0909341SAndroid Build Coastguard Worker lzcnt pridmpd, prid 80*c0909341SAndroid Build Coastguard Worker rorx tmpd, prid, 2 81*c0909341SAndroid Build Coastguard Worker cmp dword r10m, 0xfff ; if (bpc == 12) 82*c0909341SAndroid Build Coastguard Worker cmove prid, tmpd ; pri >>= 2 83*c0909341SAndroid Build Coastguard Worker mov tmpd, r8m ; damping 84*c0909341SAndroid Build Coastguard Worker and prid, 4 85*c0909341SAndroid Build Coastguard Worker sub tmpd, 31 86*c0909341SAndroid Build Coastguard Worker vpbroadcastd m9, [base+pri_taps+priq+8*0] 87*c0909341SAndroid Build Coastguard Worker vpbroadcastd m10, [base+pri_taps+priq+8*1] 88*c0909341SAndroid Build Coastguard Worker test secd, secd 89*c0909341SAndroid Build Coastguard Worker jz .pri_only 90*c0909341SAndroid Build Coastguard Worker%if WIN64 91*c0909341SAndroid Build Coastguard Worker movaps r8m, xmm13 92*c0909341SAndroid Build Coastguard Worker vpbroadcastw m13, secm 93*c0909341SAndroid Build Coastguard Worker movaps r4m, xmm11 94*c0909341SAndroid Build Coastguard Worker movaps r6m, xmm12 95*c0909341SAndroid Build Coastguard Worker%else 96*c0909341SAndroid Build Coastguard Worker movd xm0, secd 97*c0909341SAndroid Build Coastguard Worker vpbroadcastw m13, xm0 98*c0909341SAndroid Build Coastguard Worker%endif 99*c0909341SAndroid Build Coastguard Worker lzcnt secd, secd 100*c0909341SAndroid Build Coastguard Worker xor prid, prid 101*c0909341SAndroid Build Coastguard Worker add pridmpd, tmpd 102*c0909341SAndroid Build Coastguard Worker cmovs pridmpd, prid 103*c0909341SAndroid Build Coastguard Worker add secd, tmpd 104*c0909341SAndroid Build Coastguard Worker lea tmpq, [px] 105*c0909341SAndroid Build Coastguard Worker mov [pri_shift], pridmpq 106*c0909341SAndroid Build Coastguard Worker mov [sec_shift], secq 107*c0909341SAndroid Build Coastguard Worker%rep %1*%2/16 108*c0909341SAndroid Build Coastguard Worker call mangle(private_prefix %+ _cdef_filter_%1x%1_16bpc %+ SUFFIX).pri_sec 109*c0909341SAndroid Build Coastguard Worker%endrep 110*c0909341SAndroid Build Coastguard Worker%if WIN64 111*c0909341SAndroid Build Coastguard Worker movaps xmm11, r4m 112*c0909341SAndroid Build Coastguard Worker movaps xmm12, r6m 113*c0909341SAndroid Build Coastguard Worker movaps xmm13, r8m 114*c0909341SAndroid Build Coastguard Worker%endif 115*c0909341SAndroid Build Coastguard Worker jmp .pri_end 116*c0909341SAndroid Build Coastguard Worker.pri_only: 117*c0909341SAndroid Build Coastguard Worker add pridmpd, tmpd 118*c0909341SAndroid Build Coastguard Worker cmovs pridmpd, secd 119*c0909341SAndroid Build Coastguard Worker lea tmpq, [px] 120*c0909341SAndroid Build Coastguard Worker mov [pri_shift], pridmpq 121*c0909341SAndroid Build Coastguard Worker%rep %1*%2/16 122*c0909341SAndroid Build Coastguard Worker call mangle(private_prefix %+ _cdef_filter_%1x%1_16bpc %+ SUFFIX).pri 123*c0909341SAndroid Build Coastguard Worker%endrep 124*c0909341SAndroid Build Coastguard Worker.pri_end: 125*c0909341SAndroid Build Coastguard Worker%if WIN64 126*c0909341SAndroid Build Coastguard Worker movaps xmm9, [rsp+16*0] 127*c0909341SAndroid Build Coastguard Worker movaps xmm10, [rsp+16*1] 128*c0909341SAndroid Build Coastguard Worker%endif 129*c0909341SAndroid Build Coastguard Worker.end: 130*c0909341SAndroid Build Coastguard Worker RET 131*c0909341SAndroid Build Coastguard Worker.sec_only: 132*c0909341SAndroid Build Coastguard Worker mov tmpd, r8m ; damping 133*c0909341SAndroid Build Coastguard Worker%if WIN64 134*c0909341SAndroid Build Coastguard Worker vpbroadcastw m6, secm 135*c0909341SAndroid Build Coastguard Worker%else 136*c0909341SAndroid Build Coastguard Worker movd xm6, secd 137*c0909341SAndroid Build Coastguard Worker vpbroadcastw m6, xm6 138*c0909341SAndroid Build Coastguard Worker%endif 139*c0909341SAndroid Build Coastguard Worker tzcnt secd, secd 140*c0909341SAndroid Build Coastguard Worker sub tmpd, secd 141*c0909341SAndroid Build Coastguard Worker mov [sec_shift], tmpq 142*c0909341SAndroid Build Coastguard Worker lea tmpq, [px] 143*c0909341SAndroid Build Coastguard Worker%rep %1*%2/16 144*c0909341SAndroid Build Coastguard Worker call mangle(private_prefix %+ _cdef_filter_%1x%1_16bpc %+ SUFFIX).sec 145*c0909341SAndroid Build Coastguard Worker%endrep 146*c0909341SAndroid Build Coastguard Worker jmp .end 147*c0909341SAndroid Build Coastguard Worker%if %1 == %2 148*c0909341SAndroid Build Coastguard WorkerALIGN function_align 149*c0909341SAndroid Build Coastguard Worker.pri: 150*c0909341SAndroid Build Coastguard Worker movsx offq, byte [dirq+4] ; off_k0 151*c0909341SAndroid Build Coastguard Worker%if %1 == 4 152*c0909341SAndroid Build Coastguard Worker mova m1, [tmpq+32*0] 153*c0909341SAndroid Build Coastguard Worker punpcklqdq m1, [tmpq+32*1] ; 0 2 1 3 154*c0909341SAndroid Build Coastguard Worker movu m2, [tmpq+offq+32*0] 155*c0909341SAndroid Build Coastguard Worker punpcklqdq m2, [tmpq+offq+32*1] ; k0p0 156*c0909341SAndroid Build Coastguard Worker neg offq 157*c0909341SAndroid Build Coastguard Worker movu m3, [tmpq+offq+32*0] 158*c0909341SAndroid Build Coastguard Worker punpcklqdq m3, [tmpq+offq+32*1] ; k0p1 159*c0909341SAndroid Build Coastguard Worker%else 160*c0909341SAndroid Build Coastguard Worker mova xm1, [tmpq+32*0] 161*c0909341SAndroid Build Coastguard Worker vinserti128 m1, [tmpq+32*1], 1 162*c0909341SAndroid Build Coastguard Worker movu xm2, [tmpq+offq+32*0] 163*c0909341SAndroid Build Coastguard Worker vinserti128 m2, [tmpq+offq+32*1], 1 164*c0909341SAndroid Build Coastguard Worker neg offq 165*c0909341SAndroid Build Coastguard Worker movu xm3, [tmpq+offq+32*0] 166*c0909341SAndroid Build Coastguard Worker vinserti128 m3, [tmpq+offq+32*1], 1 167*c0909341SAndroid Build Coastguard Worker%endif 168*c0909341SAndroid Build Coastguard Worker movsx offq, byte [dirq+5] ; off_k1 169*c0909341SAndroid Build Coastguard Worker psubw m2, m1 ; diff_k0p0 170*c0909341SAndroid Build Coastguard Worker psubw m3, m1 ; diff_k0p1 171*c0909341SAndroid Build Coastguard Worker pabsw m4, m2 ; adiff_k0p0 172*c0909341SAndroid Build Coastguard Worker psrlw m5, m4, [pri_shift+gprsize] 173*c0909341SAndroid Build Coastguard Worker psubusw m0, m6, m5 174*c0909341SAndroid Build Coastguard Worker pabsw m5, m3 ; adiff_k0p1 175*c0909341SAndroid Build Coastguard Worker pminsw m0, m4 176*c0909341SAndroid Build Coastguard Worker psrlw m4, m5, [pri_shift+gprsize] 177*c0909341SAndroid Build Coastguard Worker psignw m0, m2 ; constrain(diff_k0p0) 178*c0909341SAndroid Build Coastguard Worker psubusw m2, m6, m4 179*c0909341SAndroid Build Coastguard Worker pminsw m2, m5 180*c0909341SAndroid Build Coastguard Worker%if %1 == 4 181*c0909341SAndroid Build Coastguard Worker movu m4, [tmpq+offq+32*0] 182*c0909341SAndroid Build Coastguard Worker punpcklqdq m4, [tmpq+offq+32*1] ; k1p0 183*c0909341SAndroid Build Coastguard Worker neg offq 184*c0909341SAndroid Build Coastguard Worker movu m5, [tmpq+offq+32*0] 185*c0909341SAndroid Build Coastguard Worker punpcklqdq m5, [tmpq+offq+32*1] ; k1p1 186*c0909341SAndroid Build Coastguard Worker%else 187*c0909341SAndroid Build Coastguard Worker movu xm4, [tmpq+offq+32*0] 188*c0909341SAndroid Build Coastguard Worker vinserti128 m4, [tmpq+offq+32*1], 1 189*c0909341SAndroid Build Coastguard Worker neg offq 190*c0909341SAndroid Build Coastguard Worker movu xm5, [tmpq+offq+32*0] 191*c0909341SAndroid Build Coastguard Worker vinserti128 m5, [tmpq+offq+32*1], 1 192*c0909341SAndroid Build Coastguard Worker%endif 193*c0909341SAndroid Build Coastguard Worker psubw m4, m1 ; diff_k1p0 194*c0909341SAndroid Build Coastguard Worker psubw m5, m1 ; diff_k1p1 195*c0909341SAndroid Build Coastguard Worker psignw m2, m3 ; constrain(diff_k0p1) 196*c0909341SAndroid Build Coastguard Worker pabsw m3, m4 ; adiff_k1p0 197*c0909341SAndroid Build Coastguard Worker paddw m0, m2 ; constrain(diff_k0) 198*c0909341SAndroid Build Coastguard Worker psrlw m2, m3, [pri_shift+gprsize] 199*c0909341SAndroid Build Coastguard Worker psubusw m7, m6, m2 200*c0909341SAndroid Build Coastguard Worker pabsw m2, m5 ; adiff_k1p1 201*c0909341SAndroid Build Coastguard Worker pminsw m7, m3 202*c0909341SAndroid Build Coastguard Worker psrlw m3, m2, [pri_shift+gprsize] 203*c0909341SAndroid Build Coastguard Worker psignw m7, m4 ; constrain(diff_k1p0) 204*c0909341SAndroid Build Coastguard Worker psubusw m4, m6, m3 205*c0909341SAndroid Build Coastguard Worker pminsw m4, m2 206*c0909341SAndroid Build Coastguard Worker psignw m4, m5 ; constrain(diff_k1p1) 207*c0909341SAndroid Build Coastguard Worker paddw m7, m4 ; constrain(diff_k1) 208*c0909341SAndroid Build Coastguard Worker pmullw m0, m9 ; pri_tap_k0 209*c0909341SAndroid Build Coastguard Worker pmullw m7, m10 ; pri_tap_k1 210*c0909341SAndroid Build Coastguard Worker paddw m0, m7 ; sum 211*c0909341SAndroid Build Coastguard Worker psraw m2, m0, 15 212*c0909341SAndroid Build Coastguard Worker paddw m0, m2 213*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m8 214*c0909341SAndroid Build Coastguard Worker add tmpq, 32*2 215*c0909341SAndroid Build Coastguard Worker paddw m0, m1 216*c0909341SAndroid Build Coastguard Worker%if %1 == 4 217*c0909341SAndroid Build Coastguard Worker vextracti128 xm1, m0, 1 218*c0909341SAndroid Build Coastguard Worker movq [dstq+strideq*0], xm0 219*c0909341SAndroid Build Coastguard Worker movq [dstq+strideq*1], xm1 220*c0909341SAndroid Build Coastguard Worker movhps [dstq+strideq*2], xm0 221*c0909341SAndroid Build Coastguard Worker movhps [dstq+r9 ], xm1 222*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+strideq*4] 223*c0909341SAndroid Build Coastguard Worker%else 224*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*0], xm0 225*c0909341SAndroid Build Coastguard Worker vextracti128 [dstq+strideq*1], m0, 1 226*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+strideq*2] 227*c0909341SAndroid Build Coastguard Worker%endif 228*c0909341SAndroid Build Coastguard Worker ret 229*c0909341SAndroid Build Coastguard WorkerALIGN function_align 230*c0909341SAndroid Build Coastguard Worker.sec: 231*c0909341SAndroid Build Coastguard Worker movsx offq, byte [dirq+8] ; off1_k0 232*c0909341SAndroid Build Coastguard Worker%if %1 == 4 233*c0909341SAndroid Build Coastguard Worker mova m1, [tmpq+32*0] 234*c0909341SAndroid Build Coastguard Worker punpcklqdq m1, [tmpq+32*1] 235*c0909341SAndroid Build Coastguard Worker movu m2, [tmpq+offq+32*0] 236*c0909341SAndroid Build Coastguard Worker punpcklqdq m2, [tmpq+offq+32*1] ; k0s0 237*c0909341SAndroid Build Coastguard Worker neg offq 238*c0909341SAndroid Build Coastguard Worker movu m3, [tmpq+offq+32*0] 239*c0909341SAndroid Build Coastguard Worker punpcklqdq m3, [tmpq+offq+32*1] ; k0s1 240*c0909341SAndroid Build Coastguard Worker%else 241*c0909341SAndroid Build Coastguard Worker mova xm1, [tmpq+32*0] 242*c0909341SAndroid Build Coastguard Worker vinserti128 m1, [tmpq+32*1], 1 243*c0909341SAndroid Build Coastguard Worker movu xm2, [tmpq+offq+32*0] 244*c0909341SAndroid Build Coastguard Worker vinserti128 m2, [tmpq+offq+32*1], 1 245*c0909341SAndroid Build Coastguard Worker neg offq 246*c0909341SAndroid Build Coastguard Worker movu xm3, [tmpq+offq+32*0] 247*c0909341SAndroid Build Coastguard Worker vinserti128 m3, [tmpq+offq+32*1], 1 248*c0909341SAndroid Build Coastguard Worker%endif 249*c0909341SAndroid Build Coastguard Worker movsx offq, byte [dirq+0] ; off2_k0 250*c0909341SAndroid Build Coastguard Worker psubw m2, m1 ; diff_k0s0 251*c0909341SAndroid Build Coastguard Worker psubw m3, m1 ; diff_k0s1 252*c0909341SAndroid Build Coastguard Worker pabsw m4, m2 ; adiff_k0s0 253*c0909341SAndroid Build Coastguard Worker psrlw m5, m4, [sec_shift+gprsize] 254*c0909341SAndroid Build Coastguard Worker psubusw m0, m6, m5 255*c0909341SAndroid Build Coastguard Worker pabsw m5, m3 ; adiff_k0s1 256*c0909341SAndroid Build Coastguard Worker pminsw m0, m4 257*c0909341SAndroid Build Coastguard Worker psrlw m4, m5, [sec_shift+gprsize] 258*c0909341SAndroid Build Coastguard Worker psignw m0, m2 ; constrain(diff_k0s0) 259*c0909341SAndroid Build Coastguard Worker psubusw m2, m6, m4 260*c0909341SAndroid Build Coastguard Worker pminsw m2, m5 261*c0909341SAndroid Build Coastguard Worker%if %1 == 4 262*c0909341SAndroid Build Coastguard Worker movu m4, [tmpq+offq+32*0] 263*c0909341SAndroid Build Coastguard Worker punpcklqdq m4, [tmpq+offq+32*1] ; k0s2 264*c0909341SAndroid Build Coastguard Worker neg offq 265*c0909341SAndroid Build Coastguard Worker movu m5, [tmpq+offq+32*0] 266*c0909341SAndroid Build Coastguard Worker punpcklqdq m5, [tmpq+offq+32*1] ; k0s3 267*c0909341SAndroid Build Coastguard Worker%else 268*c0909341SAndroid Build Coastguard Worker movu xm4, [tmpq+offq+32*0] 269*c0909341SAndroid Build Coastguard Worker vinserti128 m4, [tmpq+offq+32*1], 1 270*c0909341SAndroid Build Coastguard Worker neg offq 271*c0909341SAndroid Build Coastguard Worker movu xm5, [tmpq+offq+32*0] 272*c0909341SAndroid Build Coastguard Worker vinserti128 m5, [tmpq+offq+32*1], 1 273*c0909341SAndroid Build Coastguard Worker%endif 274*c0909341SAndroid Build Coastguard Worker movsx offq, byte [dirq+9] ; off1_k1 275*c0909341SAndroid Build Coastguard Worker psubw m4, m1 ; diff_k0s2 276*c0909341SAndroid Build Coastguard Worker psubw m5, m1 ; diff_k0s3 277*c0909341SAndroid Build Coastguard Worker psignw m2, m3 ; constrain(diff_k0s1) 278*c0909341SAndroid Build Coastguard Worker pabsw m3, m4 ; adiff_k0s2 279*c0909341SAndroid Build Coastguard Worker paddw m0, m2 280*c0909341SAndroid Build Coastguard Worker psrlw m2, m3, [sec_shift+gprsize] 281*c0909341SAndroid Build Coastguard Worker psubusw m7, m6, m2 282*c0909341SAndroid Build Coastguard Worker pabsw m2, m5 ; adiff_k0s3 283*c0909341SAndroid Build Coastguard Worker pminsw m7, m3 284*c0909341SAndroid Build Coastguard Worker psrlw m3, m2, [sec_shift+gprsize] 285*c0909341SAndroid Build Coastguard Worker psignw m7, m4 ; constrain(diff_k0s2) 286*c0909341SAndroid Build Coastguard Worker psubusw m4, m6, m3 287*c0909341SAndroid Build Coastguard Worker pminsw m4, m2 288*c0909341SAndroid Build Coastguard Worker%if %1 == 4 289*c0909341SAndroid Build Coastguard Worker movu m2, [tmpq+offq+32*0] 290*c0909341SAndroid Build Coastguard Worker punpcklqdq m2, [tmpq+offq+32*1] ; k1s0 291*c0909341SAndroid Build Coastguard Worker neg offq 292*c0909341SAndroid Build Coastguard Worker movu m3, [tmpq+offq+32*0] 293*c0909341SAndroid Build Coastguard Worker punpcklqdq m3, [tmpq+offq+32*1] ; k1s1 294*c0909341SAndroid Build Coastguard Worker%else 295*c0909341SAndroid Build Coastguard Worker movu xm2, [tmpq+offq+32*0] 296*c0909341SAndroid Build Coastguard Worker vinserti128 m2, [tmpq+offq+32*1], 1 297*c0909341SAndroid Build Coastguard Worker neg offq 298*c0909341SAndroid Build Coastguard Worker movu xm3, [tmpq+offq+32*0] 299*c0909341SAndroid Build Coastguard Worker vinserti128 m3, [tmpq+offq+32*1], 1 300*c0909341SAndroid Build Coastguard Worker%endif 301*c0909341SAndroid Build Coastguard Worker movsx offq, byte [dirq+1] ; off2_k1 302*c0909341SAndroid Build Coastguard Worker paddw m0, m7 303*c0909341SAndroid Build Coastguard Worker psignw m4, m5 ; constrain(diff_k0s3) 304*c0909341SAndroid Build Coastguard Worker paddw m0, m4 ; constrain(diff_k0) 305*c0909341SAndroid Build Coastguard Worker psubw m2, m1 ; diff_k1s0 306*c0909341SAndroid Build Coastguard Worker psubw m3, m1 ; diff_k1s1 307*c0909341SAndroid Build Coastguard Worker paddw m0, m0 ; sec_tap_k0 308*c0909341SAndroid Build Coastguard Worker pabsw m4, m2 ; adiff_k1s0 309*c0909341SAndroid Build Coastguard Worker psrlw m5, m4, [sec_shift+gprsize] 310*c0909341SAndroid Build Coastguard Worker psubusw m7, m6, m5 311*c0909341SAndroid Build Coastguard Worker pabsw m5, m3 ; adiff_k1s1 312*c0909341SAndroid Build Coastguard Worker pminsw m7, m4 313*c0909341SAndroid Build Coastguard Worker psrlw m4, m5, [sec_shift+gprsize] 314*c0909341SAndroid Build Coastguard Worker psignw m7, m2 ; constrain(diff_k1s0) 315*c0909341SAndroid Build Coastguard Worker psubusw m2, m6, m4 316*c0909341SAndroid Build Coastguard Worker pminsw m2, m5 317*c0909341SAndroid Build Coastguard Worker%if %1 == 4 318*c0909341SAndroid Build Coastguard Worker movu m4, [tmpq+offq+32*0] 319*c0909341SAndroid Build Coastguard Worker punpcklqdq m4, [tmpq+offq+32*1] ; k1s2 320*c0909341SAndroid Build Coastguard Worker neg offq 321*c0909341SAndroid Build Coastguard Worker movu m5, [tmpq+offq+32*0] 322*c0909341SAndroid Build Coastguard Worker punpcklqdq m5, [tmpq+offq+32*1] ; k1s3 323*c0909341SAndroid Build Coastguard Worker%else 324*c0909341SAndroid Build Coastguard Worker movu xm4, [tmpq+offq+32*0] 325*c0909341SAndroid Build Coastguard Worker vinserti128 m4, [tmpq+offq+32*1], 1 326*c0909341SAndroid Build Coastguard Worker neg offq 327*c0909341SAndroid Build Coastguard Worker movu xm5, [tmpq+offq+32*0] 328*c0909341SAndroid Build Coastguard Worker vinserti128 m5, [tmpq+offq+32*1], 1 329*c0909341SAndroid Build Coastguard Worker%endif 330*c0909341SAndroid Build Coastguard Worker paddw m0, m7 331*c0909341SAndroid Build Coastguard Worker psubw m4, m1 ; diff_k1s2 332*c0909341SAndroid Build Coastguard Worker psubw m5, m1 ; diff_k1s3 333*c0909341SAndroid Build Coastguard Worker psignw m2, m3 ; constrain(diff_k1s1) 334*c0909341SAndroid Build Coastguard Worker pabsw m3, m4 ; adiff_k1s2 335*c0909341SAndroid Build Coastguard Worker paddw m0, m2 336*c0909341SAndroid Build Coastguard Worker psrlw m2, m3, [sec_shift+gprsize] 337*c0909341SAndroid Build Coastguard Worker psubusw m7, m6, m2 338*c0909341SAndroid Build Coastguard Worker pabsw m2, m5 ; adiff_k1s3 339*c0909341SAndroid Build Coastguard Worker pminsw m7, m3 340*c0909341SAndroid Build Coastguard Worker psrlw m3, m2, [sec_shift+gprsize] 341*c0909341SAndroid Build Coastguard Worker psignw m7, m4 ; constrain(diff_k1s2) 342*c0909341SAndroid Build Coastguard Worker psubusw m4, m6, m3 343*c0909341SAndroid Build Coastguard Worker pminsw m4, m2 344*c0909341SAndroid Build Coastguard Worker paddw m0, m7 345*c0909341SAndroid Build Coastguard Worker psignw m4, m5 ; constrain(diff_k1s3) 346*c0909341SAndroid Build Coastguard Worker paddw m0, m4 ; sum 347*c0909341SAndroid Build Coastguard Worker psraw m2, m0, 15 348*c0909341SAndroid Build Coastguard Worker paddw m0, m2 349*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m8 350*c0909341SAndroid Build Coastguard Worker add tmpq, 32*2 351*c0909341SAndroid Build Coastguard Worker paddw m0, m1 352*c0909341SAndroid Build Coastguard Worker%if %1 == 4 353*c0909341SAndroid Build Coastguard Worker vextracti128 xm1, m0, 1 354*c0909341SAndroid Build Coastguard Worker movq [dstq+strideq*0], xm0 355*c0909341SAndroid Build Coastguard Worker movq [dstq+strideq*1], xm1 356*c0909341SAndroid Build Coastguard Worker movhps [dstq+strideq*2], xm0 357*c0909341SAndroid Build Coastguard Worker movhps [dstq+r9 ], xm1 358*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+strideq*4] 359*c0909341SAndroid Build Coastguard Worker%else 360*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*0], xm0 361*c0909341SAndroid Build Coastguard Worker vextracti128 [dstq+strideq*1], m0, 1 362*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+strideq*2] 363*c0909341SAndroid Build Coastguard Worker%endif 364*c0909341SAndroid Build Coastguard Worker ret 365*c0909341SAndroid Build Coastguard WorkerALIGN function_align 366*c0909341SAndroid Build Coastguard Worker.pri_sec: 367*c0909341SAndroid Build Coastguard Worker movsx offq, byte [dirq+8] ; off2_k0 368*c0909341SAndroid Build Coastguard Worker%if %1 == 4 369*c0909341SAndroid Build Coastguard Worker mova m1, [tmpq+32*0] 370*c0909341SAndroid Build Coastguard Worker punpcklqdq m1, [tmpq+32*1] 371*c0909341SAndroid Build Coastguard Worker movu m2, [tmpq+offq+32*0] 372*c0909341SAndroid Build Coastguard Worker punpcklqdq m2, [tmpq+offq+32*1] ; k0s0 373*c0909341SAndroid Build Coastguard Worker neg offq 374*c0909341SAndroid Build Coastguard Worker movu m3, [tmpq+offq+32*0] 375*c0909341SAndroid Build Coastguard Worker punpcklqdq m3, [tmpq+offq+32*1] ; k0s1 376*c0909341SAndroid Build Coastguard Worker%else 377*c0909341SAndroid Build Coastguard Worker mova xm1, [dstq+strideq*0] 378*c0909341SAndroid Build Coastguard Worker vinserti128 m1, [dstq+strideq*1], 1 379*c0909341SAndroid Build Coastguard Worker movu xm2, [tmpq+offq+32*0] 380*c0909341SAndroid Build Coastguard Worker vinserti128 m2, [tmpq+offq+32*1], 1 381*c0909341SAndroid Build Coastguard Worker neg offq 382*c0909341SAndroid Build Coastguard Worker movu xm3, [tmpq+offq+32*0] 383*c0909341SAndroid Build Coastguard Worker vinserti128 m3, [tmpq+offq+32*1], 1 384*c0909341SAndroid Build Coastguard Worker%endif 385*c0909341SAndroid Build Coastguard Worker movsx offq, byte [dirq+0] ; off3_k0 386*c0909341SAndroid Build Coastguard Worker pmaxsw m11, m2, m3 387*c0909341SAndroid Build Coastguard Worker pminuw m12, m2, m3 388*c0909341SAndroid Build Coastguard Worker psubw m2, m1 ; diff_k0s0 389*c0909341SAndroid Build Coastguard Worker psubw m3, m1 ; diff_k0s1 390*c0909341SAndroid Build Coastguard Worker pabsw m4, m2 ; adiff_k0s0 391*c0909341SAndroid Build Coastguard Worker psrlw m5, m4, [sec_shift+gprsize] 392*c0909341SAndroid Build Coastguard Worker psubusw m0, m13, m5 393*c0909341SAndroid Build Coastguard Worker pabsw m5, m3 ; adiff_k0s1 394*c0909341SAndroid Build Coastguard Worker pminsw m0, m4 395*c0909341SAndroid Build Coastguard Worker psrlw m4, m5, [sec_shift+gprsize] 396*c0909341SAndroid Build Coastguard Worker psignw m0, m2 ; constrain(diff_k0s0) 397*c0909341SAndroid Build Coastguard Worker psubusw m2, m13, m4 398*c0909341SAndroid Build Coastguard Worker pminsw m2, m5 399*c0909341SAndroid Build Coastguard Worker%if %1 == 4 400*c0909341SAndroid Build Coastguard Worker movu m4, [tmpq+offq+32*0] 401*c0909341SAndroid Build Coastguard Worker punpcklqdq m4, [tmpq+offq+32*1] ; k0s2 402*c0909341SAndroid Build Coastguard Worker neg offq 403*c0909341SAndroid Build Coastguard Worker movu m5, [tmpq+offq+32*0] 404*c0909341SAndroid Build Coastguard Worker punpcklqdq m5, [tmpq+offq+32*1] ; k0s3 405*c0909341SAndroid Build Coastguard Worker%else 406*c0909341SAndroid Build Coastguard Worker movu xm4, [tmpq+offq+32*0] 407*c0909341SAndroid Build Coastguard Worker vinserti128 m4, [tmpq+offq+32*1], 1 408*c0909341SAndroid Build Coastguard Worker neg offq 409*c0909341SAndroid Build Coastguard Worker movu xm5, [tmpq+offq+32*0] 410*c0909341SAndroid Build Coastguard Worker vinserti128 m5, [tmpq+offq+32*1], 1 411*c0909341SAndroid Build Coastguard Worker%endif 412*c0909341SAndroid Build Coastguard Worker movsx offq, byte [dirq+9] ; off2_k1 413*c0909341SAndroid Build Coastguard Worker psignw m2, m3 ; constrain(diff_k0s1) 414*c0909341SAndroid Build Coastguard Worker pmaxsw m11, m4 415*c0909341SAndroid Build Coastguard Worker pminuw m12, m4 416*c0909341SAndroid Build Coastguard Worker pmaxsw m11, m5 417*c0909341SAndroid Build Coastguard Worker pminuw m12, m5 418*c0909341SAndroid Build Coastguard Worker psubw m4, m1 ; diff_k0s2 419*c0909341SAndroid Build Coastguard Worker psubw m5, m1 ; diff_k0s3 420*c0909341SAndroid Build Coastguard Worker paddw m0, m2 421*c0909341SAndroid Build Coastguard Worker pabsw m3, m4 ; adiff_k0s2 422*c0909341SAndroid Build Coastguard Worker psrlw m2, m3, [sec_shift+gprsize] 423*c0909341SAndroid Build Coastguard Worker psubusw m7, m13, m2 424*c0909341SAndroid Build Coastguard Worker pabsw m2, m5 ; adiff_k0s3 425*c0909341SAndroid Build Coastguard Worker pminsw m7, m3 426*c0909341SAndroid Build Coastguard Worker psrlw m3, m2, [sec_shift+gprsize] 427*c0909341SAndroid Build Coastguard Worker psignw m7, m4 ; constrain(diff_k0s2) 428*c0909341SAndroid Build Coastguard Worker psubusw m4, m13, m3 429*c0909341SAndroid Build Coastguard Worker pminsw m4, m2 430*c0909341SAndroid Build Coastguard Worker%if %1 == 4 431*c0909341SAndroid Build Coastguard Worker movu m2, [tmpq+offq+32*0] 432*c0909341SAndroid Build Coastguard Worker punpcklqdq m2, [tmpq+offq+32*1] ; k1s0 433*c0909341SAndroid Build Coastguard Worker neg offq 434*c0909341SAndroid Build Coastguard Worker movu m3, [tmpq+offq+32*0] 435*c0909341SAndroid Build Coastguard Worker punpcklqdq m3, [tmpq+offq+32*1] ; k1s1 436*c0909341SAndroid Build Coastguard Worker%else 437*c0909341SAndroid Build Coastguard Worker movu xm2, [tmpq+offq+32*0] 438*c0909341SAndroid Build Coastguard Worker vinserti128 m2, [tmpq+offq+32*1], 1 439*c0909341SAndroid Build Coastguard Worker neg offq 440*c0909341SAndroid Build Coastguard Worker movu xm3, [tmpq+offq+32*0] 441*c0909341SAndroid Build Coastguard Worker vinserti128 m3, [tmpq+offq+32*1], 1 442*c0909341SAndroid Build Coastguard Worker%endif 443*c0909341SAndroid Build Coastguard Worker movsx offq, byte [dirq+1] ; off3_k1 444*c0909341SAndroid Build Coastguard Worker paddw m0, m7 445*c0909341SAndroid Build Coastguard Worker psignw m4, m5 ; constrain(diff_k0s3) 446*c0909341SAndroid Build Coastguard Worker pmaxsw m11, m2 447*c0909341SAndroid Build Coastguard Worker pminuw m12, m2 448*c0909341SAndroid Build Coastguard Worker pmaxsw m11, m3 449*c0909341SAndroid Build Coastguard Worker pminuw m12, m3 450*c0909341SAndroid Build Coastguard Worker paddw m0, m4 ; constrain(diff_k0) 451*c0909341SAndroid Build Coastguard Worker psubw m2, m1 ; diff_k1s0 452*c0909341SAndroid Build Coastguard Worker psubw m3, m1 ; diff_k1s1 453*c0909341SAndroid Build Coastguard Worker paddw m0, m0 ; sec_tap_k0 454*c0909341SAndroid Build Coastguard Worker pabsw m4, m2 ; adiff_k1s0 455*c0909341SAndroid Build Coastguard Worker psrlw m5, m4, [sec_shift+gprsize] 456*c0909341SAndroid Build Coastguard Worker psubusw m7, m13, m5 457*c0909341SAndroid Build Coastguard Worker pabsw m5, m3 ; adiff_k1s1 458*c0909341SAndroid Build Coastguard Worker pminsw m7, m4 459*c0909341SAndroid Build Coastguard Worker psrlw m4, m5, [sec_shift+gprsize] 460*c0909341SAndroid Build Coastguard Worker psignw m7, m2 ; constrain(diff_k1s0) 461*c0909341SAndroid Build Coastguard Worker psubusw m2, m13, m4 462*c0909341SAndroid Build Coastguard Worker pminsw m2, m5 463*c0909341SAndroid Build Coastguard Worker%if %1 == 4 464*c0909341SAndroid Build Coastguard Worker movu m4, [tmpq+offq+32*0] 465*c0909341SAndroid Build Coastguard Worker punpcklqdq m4, [tmpq+offq+32*1] ; k1s2 466*c0909341SAndroid Build Coastguard Worker neg offq 467*c0909341SAndroid Build Coastguard Worker movu m5, [tmpq+offq+32*0] 468*c0909341SAndroid Build Coastguard Worker punpcklqdq m5, [tmpq+offq+32*1] ; k1s3 469*c0909341SAndroid Build Coastguard Worker%else 470*c0909341SAndroid Build Coastguard Worker movu xm4, [tmpq+offq+32*0] 471*c0909341SAndroid Build Coastguard Worker vinserti128 m4, [tmpq+offq+32*1], 1 472*c0909341SAndroid Build Coastguard Worker neg offq 473*c0909341SAndroid Build Coastguard Worker movu xm5, [tmpq+offq+32*0] 474*c0909341SAndroid Build Coastguard Worker vinserti128 m5, [tmpq+offq+32*1], 1 475*c0909341SAndroid Build Coastguard Worker%endif 476*c0909341SAndroid Build Coastguard Worker movsx offq, byte [dirq+4] ; off1_k0 477*c0909341SAndroid Build Coastguard Worker paddw m0, m7 478*c0909341SAndroid Build Coastguard Worker psignw m2, m3 ; constrain(diff_k1s1) 479*c0909341SAndroid Build Coastguard Worker pmaxsw m11, m4 480*c0909341SAndroid Build Coastguard Worker pminuw m12, m4 481*c0909341SAndroid Build Coastguard Worker pmaxsw m11, m5 482*c0909341SAndroid Build Coastguard Worker pminuw m12, m5 483*c0909341SAndroid Build Coastguard Worker psubw m4, m1 ; diff_k1s2 484*c0909341SAndroid Build Coastguard Worker psubw m5, m1 ; diff_k1s3 485*c0909341SAndroid Build Coastguard Worker pabsw m3, m4 ; adiff_k1s2 486*c0909341SAndroid Build Coastguard Worker paddw m0, m2 487*c0909341SAndroid Build Coastguard Worker psrlw m2, m3, [sec_shift+gprsize] 488*c0909341SAndroid Build Coastguard Worker psubusw m7, m13, m2 489*c0909341SAndroid Build Coastguard Worker pabsw m2, m5 ; adiff_k1s3 490*c0909341SAndroid Build Coastguard Worker pminsw m7, m3 491*c0909341SAndroid Build Coastguard Worker psrlw m3, m2, [sec_shift+gprsize] 492*c0909341SAndroid Build Coastguard Worker psignw m7, m4 ; constrain(diff_k1s2) 493*c0909341SAndroid Build Coastguard Worker psubusw m4, m13, m3 494*c0909341SAndroid Build Coastguard Worker pminsw m4, m2 495*c0909341SAndroid Build Coastguard Worker paddw m0, m7 496*c0909341SAndroid Build Coastguard Worker%if %1 == 4 497*c0909341SAndroid Build Coastguard Worker movu m2, [tmpq+offq+32*0] 498*c0909341SAndroid Build Coastguard Worker punpcklqdq m2, [tmpq+offq+32*1] ; k0p0 499*c0909341SAndroid Build Coastguard Worker neg offq 500*c0909341SAndroid Build Coastguard Worker movu m3, [tmpq+offq+32*0] 501*c0909341SAndroid Build Coastguard Worker punpcklqdq m3, [tmpq+offq+32*1] ; k0p1 502*c0909341SAndroid Build Coastguard Worker%else 503*c0909341SAndroid Build Coastguard Worker movu xm2, [tmpq+offq+32*0] 504*c0909341SAndroid Build Coastguard Worker vinserti128 m2, [tmpq+offq+32*1], 1 505*c0909341SAndroid Build Coastguard Worker neg offq 506*c0909341SAndroid Build Coastguard Worker movu xm3, [tmpq+offq+32*0] 507*c0909341SAndroid Build Coastguard Worker vinserti128 m3, [tmpq+offq+32*1], 1 508*c0909341SAndroid Build Coastguard Worker%endif 509*c0909341SAndroid Build Coastguard Worker movsx offq, byte [dirq+5] ; off1_k1 510*c0909341SAndroid Build Coastguard Worker psignw m4, m5 ; constrain(diff_k1s3) 511*c0909341SAndroid Build Coastguard Worker pmaxsw m11, m2 512*c0909341SAndroid Build Coastguard Worker pminuw m12, m2 513*c0909341SAndroid Build Coastguard Worker pmaxsw m11, m3 514*c0909341SAndroid Build Coastguard Worker pminuw m12, m3 515*c0909341SAndroid Build Coastguard Worker psubw m2, m1 ; diff_k0p0 516*c0909341SAndroid Build Coastguard Worker psubw m3, m1 ; diff_k0p1 517*c0909341SAndroid Build Coastguard Worker paddw m0, m4 518*c0909341SAndroid Build Coastguard Worker pabsw m4, m2 ; adiff_k0p0 519*c0909341SAndroid Build Coastguard Worker psrlw m5, m4, [pri_shift+gprsize] 520*c0909341SAndroid Build Coastguard Worker psubusw m7, m6, m5 521*c0909341SAndroid Build Coastguard Worker pabsw m5, m3 ; adiff_k0p1 522*c0909341SAndroid Build Coastguard Worker pminsw m7, m4 523*c0909341SAndroid Build Coastguard Worker psrlw m4, m5, [pri_shift+gprsize] 524*c0909341SAndroid Build Coastguard Worker psignw m7, m2 ; constrain(diff_k0p0) 525*c0909341SAndroid Build Coastguard Worker psubusw m2, m6, m4 526*c0909341SAndroid Build Coastguard Worker pminsw m2, m5 527*c0909341SAndroid Build Coastguard Worker%if %1 == 4 528*c0909341SAndroid Build Coastguard Worker movu m4, [tmpq+offq+32*0] 529*c0909341SAndroid Build Coastguard Worker punpcklqdq m4, [tmpq+offq+32*1] ; k1p0 530*c0909341SAndroid Build Coastguard Worker neg offq 531*c0909341SAndroid Build Coastguard Worker movu m5, [tmpq+offq+32*0] 532*c0909341SAndroid Build Coastguard Worker punpcklqdq m5, [tmpq+offq+32*1] ; k1p1 533*c0909341SAndroid Build Coastguard Worker%else 534*c0909341SAndroid Build Coastguard Worker movu xm4, [tmpq+offq+32*0] 535*c0909341SAndroid Build Coastguard Worker vinserti128 m4, [tmpq+offq+32*1], 1 536*c0909341SAndroid Build Coastguard Worker neg offq 537*c0909341SAndroid Build Coastguard Worker movu xm5, [tmpq+offq+32*0] 538*c0909341SAndroid Build Coastguard Worker vinserti128 m5, [tmpq+offq+32*1], 1 539*c0909341SAndroid Build Coastguard Worker%endif 540*c0909341SAndroid Build Coastguard Worker psignw m2, m3 ; constrain(diff_k0p1) 541*c0909341SAndroid Build Coastguard Worker paddw m7, m2 ; constrain(diff_k0) 542*c0909341SAndroid Build Coastguard Worker pmaxsw m11, m4 543*c0909341SAndroid Build Coastguard Worker pminuw m12, m4 544*c0909341SAndroid Build Coastguard Worker pmaxsw m11, m5 545*c0909341SAndroid Build Coastguard Worker pminuw m12, m5 546*c0909341SAndroid Build Coastguard Worker psubw m4, m1 ; diff_k1p0 547*c0909341SAndroid Build Coastguard Worker psubw m5, m1 ; diff_k1p1 548*c0909341SAndroid Build Coastguard Worker pabsw m3, m4 ; adiff_k1p0 549*c0909341SAndroid Build Coastguard Worker pmullw m7, m9 ; pri_tap_k0 550*c0909341SAndroid Build Coastguard Worker paddw m0, m7 551*c0909341SAndroid Build Coastguard Worker psrlw m2, m3, [pri_shift+gprsize] 552*c0909341SAndroid Build Coastguard Worker psubusw m7, m6, m2 553*c0909341SAndroid Build Coastguard Worker pabsw m2, m5 ; adiff_k1p1 554*c0909341SAndroid Build Coastguard Worker pminsw m7, m3 555*c0909341SAndroid Build Coastguard Worker psrlw m3, m2, [pri_shift+gprsize] 556*c0909341SAndroid Build Coastguard Worker psignw m7, m4 ; constrain(diff_k1p0) 557*c0909341SAndroid Build Coastguard Worker psubusw m4, m6, m3 558*c0909341SAndroid Build Coastguard Worker pminsw m4, m2 559*c0909341SAndroid Build Coastguard Worker psignw m4, m5 ; constrain(diff_k1p1) 560*c0909341SAndroid Build Coastguard Worker paddw m7, m4 ; constrain(diff_k1) 561*c0909341SAndroid Build Coastguard Worker pmullw m7, m10 ; pri_tap_k1 562*c0909341SAndroid Build Coastguard Worker paddw m0, m7 ; sum 563*c0909341SAndroid Build Coastguard Worker psraw m2, m0, 15 564*c0909341SAndroid Build Coastguard Worker paddw m0, m2 565*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m8 566*c0909341SAndroid Build Coastguard Worker add tmpq, 32*2 567*c0909341SAndroid Build Coastguard Worker pmaxsw m11, m1 568*c0909341SAndroid Build Coastguard Worker pminuw m12, m1 569*c0909341SAndroid Build Coastguard Worker paddw m0, m1 570*c0909341SAndroid Build Coastguard Worker pminsw m0, m11 571*c0909341SAndroid Build Coastguard Worker pmaxsw m0, m12 572*c0909341SAndroid Build Coastguard Worker%if %1 == 4 573*c0909341SAndroid Build Coastguard Worker vextracti128 xm1, m0, 1 574*c0909341SAndroid Build Coastguard Worker movq [dstq+strideq*0], xm0 575*c0909341SAndroid Build Coastguard Worker movq [dstq+strideq*1], xm1 576*c0909341SAndroid Build Coastguard Worker movhps [dstq+strideq*2], xm0 577*c0909341SAndroid Build Coastguard Worker movhps [dstq+r9 ], xm1 578*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+strideq*4] 579*c0909341SAndroid Build Coastguard Worker%else 580*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*0], xm0 581*c0909341SAndroid Build Coastguard Worker vextracti128 [dstq+strideq*1], m0, 1 582*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+strideq*2] 583*c0909341SAndroid Build Coastguard Worker%endif 584*c0909341SAndroid Build Coastguard Worker ret 585*c0909341SAndroid Build Coastguard Worker%endif 586*c0909341SAndroid Build Coastguard Worker%endmacro 587*c0909341SAndroid Build Coastguard Worker 588*c0909341SAndroid Build Coastguard WorkerINIT_YMM avx2 589*c0909341SAndroid Build Coastguard Workercglobal cdef_filter_4x4_16bpc, 5, 10, 9, 16*10, dst, stride, left, top, bot, \ 590*c0909341SAndroid Build Coastguard Worker pri, sec, edge 591*c0909341SAndroid Build Coastguard Worker%if WIN64 592*c0909341SAndroid Build Coastguard Worker %define px rsp+16*6 593*c0909341SAndroid Build Coastguard Worker %define offq r8 594*c0909341SAndroid Build Coastguard Worker %define pri_shift rsp+16*2 595*c0909341SAndroid Build Coastguard Worker %define sec_shift rsp+16*3 596*c0909341SAndroid Build Coastguard Worker%else 597*c0909341SAndroid Build Coastguard Worker %define px rsp+16*4 598*c0909341SAndroid Build Coastguard Worker %define offq r4 599*c0909341SAndroid Build Coastguard Worker %define pri_shift rsp+16*0 600*c0909341SAndroid Build Coastguard Worker %define sec_shift rsp+16*1 601*c0909341SAndroid Build Coastguard Worker%endif 602*c0909341SAndroid Build Coastguard Worker %define base r8-dir_table4 603*c0909341SAndroid Build Coastguard Worker mov edged, r9m 604*c0909341SAndroid Build Coastguard Worker lea r8, [dir_table4] 605*c0909341SAndroid Build Coastguard Worker movu xm0, [dstq+strideq*0] 606*c0909341SAndroid Build Coastguard Worker movu xm1, [dstq+strideq*1] 607*c0909341SAndroid Build Coastguard Worker lea r9, [strideq*3] 608*c0909341SAndroid Build Coastguard Worker movu xm2, [dstq+strideq*2] 609*c0909341SAndroid Build Coastguard Worker movu xm3, [dstq+r9 ] 610*c0909341SAndroid Build Coastguard Worker vpbroadcastd m7, [base+pw_m16384] 611*c0909341SAndroid Build Coastguard Worker mova [px+16*0+0], xm0 612*c0909341SAndroid Build Coastguard Worker mova [px+16*1+0], xm1 613*c0909341SAndroid Build Coastguard Worker mova [px+16*2+0], xm2 614*c0909341SAndroid Build Coastguard Worker mova [px+16*3+0], xm3 615*c0909341SAndroid Build Coastguard Worker test edgeb, 4 ; HAVE_TOP 616*c0909341SAndroid Build Coastguard Worker jz .no_top 617*c0909341SAndroid Build Coastguard Worker movu xm0, [topq+strideq*0] 618*c0909341SAndroid Build Coastguard Worker movu xm1, [topq+strideq*1] 619*c0909341SAndroid Build Coastguard Worker mova [px-16*2+0], xm0 620*c0909341SAndroid Build Coastguard Worker mova [px-16*1+0], xm1 621*c0909341SAndroid Build Coastguard Worker test edgeb, 1 ; HAVE_LEFT 622*c0909341SAndroid Build Coastguard Worker jz .top_no_left 623*c0909341SAndroid Build Coastguard Worker movd xm0, [topq+strideq*0-4] 624*c0909341SAndroid Build Coastguard Worker movd xm1, [topq+strideq*1-4] 625*c0909341SAndroid Build Coastguard Worker movd [px-16*2-4], xm0 626*c0909341SAndroid Build Coastguard Worker movd [px-16*1-4], xm1 627*c0909341SAndroid Build Coastguard Worker jmp .top_done 628*c0909341SAndroid Build Coastguard Worker.no_top: 629*c0909341SAndroid Build Coastguard Worker mova [px-16*2+0], m7 630*c0909341SAndroid Build Coastguard Worker.top_no_left: 631*c0909341SAndroid Build Coastguard Worker movd [px-16*2-4], xm7 632*c0909341SAndroid Build Coastguard Worker movd [px-16*1-4], xm7 633*c0909341SAndroid Build Coastguard Worker.top_done: 634*c0909341SAndroid Build Coastguard Worker test edgeb, 8 ; HAVE_BOTTOM 635*c0909341SAndroid Build Coastguard Worker jz .no_bottom 636*c0909341SAndroid Build Coastguard Worker movu xm0, [botq+strideq*0] 637*c0909341SAndroid Build Coastguard Worker movu xm1, [botq+strideq*1] 638*c0909341SAndroid Build Coastguard Worker mova [px+16*4+0], xm0 639*c0909341SAndroid Build Coastguard Worker mova [px+16*5+0], xm1 640*c0909341SAndroid Build Coastguard Worker test edgeb, 1 ; HAVE_LEFT 641*c0909341SAndroid Build Coastguard Worker jz .bottom_no_left 642*c0909341SAndroid Build Coastguard Worker movd xm0, [botq+strideq*0-4] 643*c0909341SAndroid Build Coastguard Worker movd xm1, [botq+strideq*1-4] 644*c0909341SAndroid Build Coastguard Worker movd [px+16*4-4], xm0 645*c0909341SAndroid Build Coastguard Worker movd [px+16*5-4], xm1 646*c0909341SAndroid Build Coastguard Worker jmp .bottom_done 647*c0909341SAndroid Build Coastguard Worker.no_bottom: 648*c0909341SAndroid Build Coastguard Worker mova [px+16*4+0], m7 649*c0909341SAndroid Build Coastguard Worker.bottom_no_left: 650*c0909341SAndroid Build Coastguard Worker movd [px+16*4-4], xm7 651*c0909341SAndroid Build Coastguard Worker movd [px+16*5-4], xm7 652*c0909341SAndroid Build Coastguard Worker.bottom_done: 653*c0909341SAndroid Build Coastguard Worker test edgeb, 1 ; HAVE_LEFT 654*c0909341SAndroid Build Coastguard Worker jz .no_left 655*c0909341SAndroid Build Coastguard Worker movd xm0, [leftq+4*0] 656*c0909341SAndroid Build Coastguard Worker movd xm1, [leftq+4*1] 657*c0909341SAndroid Build Coastguard Worker movd xm2, [leftq+4*2] 658*c0909341SAndroid Build Coastguard Worker movd xm3, [leftq+4*3] 659*c0909341SAndroid Build Coastguard Worker movd [px+16*0-4], xm0 660*c0909341SAndroid Build Coastguard Worker movd [px+16*1-4], xm1 661*c0909341SAndroid Build Coastguard Worker movd [px+16*2-4], xm2 662*c0909341SAndroid Build Coastguard Worker movd [px+16*3-4], xm3 663*c0909341SAndroid Build Coastguard Worker jmp .left_done 664*c0909341SAndroid Build Coastguard Worker.no_left: 665*c0909341SAndroid Build Coastguard Worker REPX {movd [px+16*x-4], xm7}, 0, 1, 2, 3 666*c0909341SAndroid Build Coastguard Worker.left_done: 667*c0909341SAndroid Build Coastguard Worker test edgeb, 2 ; HAVE_RIGHT 668*c0909341SAndroid Build Coastguard Worker jnz .padding_done 669*c0909341SAndroid Build Coastguard Worker REPX {movd [px+16*x+8], xm7}, -2, -1, 0, 1, 2, 3, 4, 5 670*c0909341SAndroid Build Coastguard Worker.padding_done: 671*c0909341SAndroid Build Coastguard Worker CDEF_FILTER 4, 4 672*c0909341SAndroid Build Coastguard Worker 673*c0909341SAndroid Build Coastguard Workercglobal cdef_filter_4x8_16bpc, 5, 10, 9, 16*14, dst, stride, left, top, bot, \ 674*c0909341SAndroid Build Coastguard Worker pri, sec, edge 675*c0909341SAndroid Build Coastguard Worker mov edged, r9m 676*c0909341SAndroid Build Coastguard Worker movu xm0, [dstq+strideq*0] 677*c0909341SAndroid Build Coastguard Worker movu xm1, [dstq+strideq*1] 678*c0909341SAndroid Build Coastguard Worker lea r9, [strideq*3] 679*c0909341SAndroid Build Coastguard Worker movu xm2, [dstq+strideq*2] 680*c0909341SAndroid Build Coastguard Worker movu xm3, [dstq+r9 ] 681*c0909341SAndroid Build Coastguard Worker lea r6, [dstq+strideq*4] 682*c0909341SAndroid Build Coastguard Worker movu xm4, [r6 +strideq*0] 683*c0909341SAndroid Build Coastguard Worker movu xm5, [r6 +strideq*1] 684*c0909341SAndroid Build Coastguard Worker movu xm6, [r6 +strideq*2] 685*c0909341SAndroid Build Coastguard Worker movu xm7, [r6 +r9 ] 686*c0909341SAndroid Build Coastguard Worker lea r8, [dir_table4] 687*c0909341SAndroid Build Coastguard Worker mova [px+16*0+0], xm0 688*c0909341SAndroid Build Coastguard Worker mova [px+16*1+0], xm1 689*c0909341SAndroid Build Coastguard Worker mova [px+16*2+0], xm2 690*c0909341SAndroid Build Coastguard Worker mova [px+16*3+0], xm3 691*c0909341SAndroid Build Coastguard Worker mova [px+16*4+0], xm4 692*c0909341SAndroid Build Coastguard Worker mova [px+16*5+0], xm5 693*c0909341SAndroid Build Coastguard Worker mova [px+16*6+0], xm6 694*c0909341SAndroid Build Coastguard Worker mova [px+16*7+0], xm7 695*c0909341SAndroid Build Coastguard Worker vpbroadcastd m7, [base+pw_m16384] 696*c0909341SAndroid Build Coastguard Worker test edgeb, 4 ; HAVE_TOP 697*c0909341SAndroid Build Coastguard Worker jz .no_top 698*c0909341SAndroid Build Coastguard Worker movu xm0, [topq+strideq*0] 699*c0909341SAndroid Build Coastguard Worker movu xm1, [topq+strideq*1] 700*c0909341SAndroid Build Coastguard Worker mova [px-16*2+0], xm0 701*c0909341SAndroid Build Coastguard Worker mova [px-16*1+0], xm1 702*c0909341SAndroid Build Coastguard Worker test edgeb, 1 ; HAVE_LEFT 703*c0909341SAndroid Build Coastguard Worker jz .top_no_left 704*c0909341SAndroid Build Coastguard Worker movd xm0, [topq+strideq*0-4] 705*c0909341SAndroid Build Coastguard Worker movd xm1, [topq+strideq*1-4] 706*c0909341SAndroid Build Coastguard Worker movd [px-16*2-4], xm0 707*c0909341SAndroid Build Coastguard Worker movd [px-16*1-4], xm1 708*c0909341SAndroid Build Coastguard Worker jmp .top_done 709*c0909341SAndroid Build Coastguard Worker.no_top: 710*c0909341SAndroid Build Coastguard Worker mova [px-16*2+0], m7 711*c0909341SAndroid Build Coastguard Worker.top_no_left: 712*c0909341SAndroid Build Coastguard Worker movd [px-16*2-4], xm7 713*c0909341SAndroid Build Coastguard Worker movd [px-16*1-4], xm7 714*c0909341SAndroid Build Coastguard Worker.top_done: 715*c0909341SAndroid Build Coastguard Worker test edgeb, 8 ; HAVE_BOTTOM 716*c0909341SAndroid Build Coastguard Worker jz .no_bottom 717*c0909341SAndroid Build Coastguard Worker movu xm0, [botq+strideq*0] 718*c0909341SAndroid Build Coastguard Worker movu xm1, [botq+strideq*1] 719*c0909341SAndroid Build Coastguard Worker mova [px+16*8+0], xm0 720*c0909341SAndroid Build Coastguard Worker mova [px+16*9+0], xm1 721*c0909341SAndroid Build Coastguard Worker test edgeb, 1 ; HAVE_LEFT 722*c0909341SAndroid Build Coastguard Worker jz .bottom_no_left 723*c0909341SAndroid Build Coastguard Worker movd xm0, [botq+strideq*0-4] 724*c0909341SAndroid Build Coastguard Worker movd xm1, [botq+strideq*1-4] 725*c0909341SAndroid Build Coastguard Worker movd [px+16*8-4], xm0 726*c0909341SAndroid Build Coastguard Worker movd [px+16*9-4], xm1 727*c0909341SAndroid Build Coastguard Worker jmp .bottom_done 728*c0909341SAndroid Build Coastguard Worker.no_bottom: 729*c0909341SAndroid Build Coastguard Worker mova [px+16*8+0], m7 730*c0909341SAndroid Build Coastguard Worker.bottom_no_left: 731*c0909341SAndroid Build Coastguard Worker movd [px+16*8-4], xm7 732*c0909341SAndroid Build Coastguard Worker movd [px+16*9-4], xm7 733*c0909341SAndroid Build Coastguard Worker.bottom_done: 734*c0909341SAndroid Build Coastguard Worker test edgeb, 1 ; HAVE_LEFT 735*c0909341SAndroid Build Coastguard Worker jz .no_left 736*c0909341SAndroid Build Coastguard Worker movd xm0, [leftq+4*0] 737*c0909341SAndroid Build Coastguard Worker movd xm1, [leftq+4*1] 738*c0909341SAndroid Build Coastguard Worker movd xm2, [leftq+4*2] 739*c0909341SAndroid Build Coastguard Worker movd xm3, [leftq+4*3] 740*c0909341SAndroid Build Coastguard Worker movd [px+16*0-4], xm0 741*c0909341SAndroid Build Coastguard Worker movd [px+16*1-4], xm1 742*c0909341SAndroid Build Coastguard Worker movd [px+16*2-4], xm2 743*c0909341SAndroid Build Coastguard Worker movd [px+16*3-4], xm3 744*c0909341SAndroid Build Coastguard Worker movd xm0, [leftq+4*4] 745*c0909341SAndroid Build Coastguard Worker movd xm1, [leftq+4*5] 746*c0909341SAndroid Build Coastguard Worker movd xm2, [leftq+4*6] 747*c0909341SAndroid Build Coastguard Worker movd xm3, [leftq+4*7] 748*c0909341SAndroid Build Coastguard Worker movd [px+16*4-4], xm0 749*c0909341SAndroid Build Coastguard Worker movd [px+16*5-4], xm1 750*c0909341SAndroid Build Coastguard Worker movd [px+16*6-4], xm2 751*c0909341SAndroid Build Coastguard Worker movd [px+16*7-4], xm3 752*c0909341SAndroid Build Coastguard Worker jmp .left_done 753*c0909341SAndroid Build Coastguard Worker.no_left: 754*c0909341SAndroid Build Coastguard Worker REPX {movd [px+16*x-4], xm7}, 0, 1, 2, 3, 4, 5, 6, 7 755*c0909341SAndroid Build Coastguard Worker.left_done: 756*c0909341SAndroid Build Coastguard Worker test edgeb, 2 ; HAVE_RIGHT 757*c0909341SAndroid Build Coastguard Worker jnz .padding_done 758*c0909341SAndroid Build Coastguard Worker REPX {movd [px+16*x+8], xm7}, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 759*c0909341SAndroid Build Coastguard Worker.padding_done: 760*c0909341SAndroid Build Coastguard Worker CDEF_FILTER 4, 8 761*c0909341SAndroid Build Coastguard Worker 762*c0909341SAndroid Build Coastguard Workercglobal cdef_filter_8x8_16bpc, 5, 9, 9, 32*13, dst, stride, left, top, bot, \ 763*c0909341SAndroid Build Coastguard Worker pri, sec, edge 764*c0909341SAndroid Build Coastguard Worker%if WIN64 765*c0909341SAndroid Build Coastguard Worker %define px rsp+32*4 766*c0909341SAndroid Build Coastguard Worker%else 767*c0909341SAndroid Build Coastguard Worker %define px rsp+32*3 768*c0909341SAndroid Build Coastguard Worker%endif 769*c0909341SAndroid Build Coastguard Worker %define base r8-dir_table8 770*c0909341SAndroid Build Coastguard Worker mov edged, r9m 771*c0909341SAndroid Build Coastguard Worker movu m0, [dstq+strideq*0] 772*c0909341SAndroid Build Coastguard Worker movu m1, [dstq+strideq*1] 773*c0909341SAndroid Build Coastguard Worker lea r6, [dstq+strideq*2] 774*c0909341SAndroid Build Coastguard Worker movu m2, [r6 +strideq*0] 775*c0909341SAndroid Build Coastguard Worker movu m3, [r6 +strideq*1] 776*c0909341SAndroid Build Coastguard Worker lea r6, [r6 +strideq*2] 777*c0909341SAndroid Build Coastguard Worker movu m4, [r6 +strideq*0] 778*c0909341SAndroid Build Coastguard Worker movu m5, [r6 +strideq*1] 779*c0909341SAndroid Build Coastguard Worker lea r6, [r6 +strideq*2] 780*c0909341SAndroid Build Coastguard Worker movu m6, [r6 +strideq*0] 781*c0909341SAndroid Build Coastguard Worker movu m7, [r6 +strideq*1] 782*c0909341SAndroid Build Coastguard Worker lea r8, [dir_table8] 783*c0909341SAndroid Build Coastguard Worker mova [px+32*0+0], m0 784*c0909341SAndroid Build Coastguard Worker mova [px+32*1+0], m1 785*c0909341SAndroid Build Coastguard Worker mova [px+32*2+0], m2 786*c0909341SAndroid Build Coastguard Worker mova [px+32*3+0], m3 787*c0909341SAndroid Build Coastguard Worker mova [px+32*4+0], m4 788*c0909341SAndroid Build Coastguard Worker mova [px+32*5+0], m5 789*c0909341SAndroid Build Coastguard Worker mova [px+32*6+0], m6 790*c0909341SAndroid Build Coastguard Worker mova [px+32*7+0], m7 791*c0909341SAndroid Build Coastguard Worker vpbroadcastd m7, [base+pw_m16384] 792*c0909341SAndroid Build Coastguard Worker test edgeb, 4 ; HAVE_TOP 793*c0909341SAndroid Build Coastguard Worker jz .no_top 794*c0909341SAndroid Build Coastguard Worker movu m0, [topq+strideq*0] 795*c0909341SAndroid Build Coastguard Worker movu m1, [topq+strideq*1] 796*c0909341SAndroid Build Coastguard Worker mova [px-32*2+0], m0 797*c0909341SAndroid Build Coastguard Worker mova [px-32*1+0], m1 798*c0909341SAndroid Build Coastguard Worker test edgeb, 1 ; HAVE_LEFT 799*c0909341SAndroid Build Coastguard Worker jz .top_no_left 800*c0909341SAndroid Build Coastguard Worker movd xm0, [topq+strideq*0-4] 801*c0909341SAndroid Build Coastguard Worker movd xm1, [topq+strideq*1-4] 802*c0909341SAndroid Build Coastguard Worker movd [px-32*2-4], xm0 803*c0909341SAndroid Build Coastguard Worker movd [px-32*1-4], xm1 804*c0909341SAndroid Build Coastguard Worker jmp .top_done 805*c0909341SAndroid Build Coastguard Worker.no_top: 806*c0909341SAndroid Build Coastguard Worker mova [px-32*2+0], m7 807*c0909341SAndroid Build Coastguard Worker mova [px-32*1+0], m7 808*c0909341SAndroid Build Coastguard Worker.top_no_left: 809*c0909341SAndroid Build Coastguard Worker movd [px-32*2-4], xm7 810*c0909341SAndroid Build Coastguard Worker movd [px-32*1-4], xm7 811*c0909341SAndroid Build Coastguard Worker.top_done: 812*c0909341SAndroid Build Coastguard Worker test edgeb, 8 ; HAVE_BOTTOM 813*c0909341SAndroid Build Coastguard Worker jz .no_bottom 814*c0909341SAndroid Build Coastguard Worker movu m0, [botq+strideq*0] 815*c0909341SAndroid Build Coastguard Worker movu m1, [botq+strideq*1] 816*c0909341SAndroid Build Coastguard Worker mova [px+32*8+0], m0 817*c0909341SAndroid Build Coastguard Worker mova [px+32*9+0], m1 818*c0909341SAndroid Build Coastguard Worker test edgeb, 1 ; HAVE_LEFT 819*c0909341SAndroid Build Coastguard Worker jz .bottom_no_left 820*c0909341SAndroid Build Coastguard Worker movd xm0, [botq+strideq*0-4] 821*c0909341SAndroid Build Coastguard Worker movd xm1, [botq+strideq*1-4] 822*c0909341SAndroid Build Coastguard Worker movd [px+32*8-4], xm0 823*c0909341SAndroid Build Coastguard Worker movd [px+32*9-4], xm1 824*c0909341SAndroid Build Coastguard Worker jmp .bottom_done 825*c0909341SAndroid Build Coastguard Worker.no_bottom: 826*c0909341SAndroid Build Coastguard Worker mova [px+32*8+0], m7 827*c0909341SAndroid Build Coastguard Worker mova [px+32*9+0], m7 828*c0909341SAndroid Build Coastguard Worker.bottom_no_left: 829*c0909341SAndroid Build Coastguard Worker movd [px+32*8-4], xm7 830*c0909341SAndroid Build Coastguard Worker movd [px+32*9-4], xm7 831*c0909341SAndroid Build Coastguard Worker.bottom_done: 832*c0909341SAndroid Build Coastguard Worker test edgeb, 1 ; HAVE_LEFT 833*c0909341SAndroid Build Coastguard Worker jz .no_left 834*c0909341SAndroid Build Coastguard Worker movd xm0, [leftq+4*0] 835*c0909341SAndroid Build Coastguard Worker movd xm1, [leftq+4*1] 836*c0909341SAndroid Build Coastguard Worker movd xm2, [leftq+4*2] 837*c0909341SAndroid Build Coastguard Worker movd xm3, [leftq+4*3] 838*c0909341SAndroid Build Coastguard Worker movd [px+32*0-4], xm0 839*c0909341SAndroid Build Coastguard Worker movd [px+32*1-4], xm1 840*c0909341SAndroid Build Coastguard Worker movd [px+32*2-4], xm2 841*c0909341SAndroid Build Coastguard Worker movd [px+32*3-4], xm3 842*c0909341SAndroid Build Coastguard Worker movd xm0, [leftq+4*4] 843*c0909341SAndroid Build Coastguard Worker movd xm1, [leftq+4*5] 844*c0909341SAndroid Build Coastguard Worker movd xm2, [leftq+4*6] 845*c0909341SAndroid Build Coastguard Worker movd xm3, [leftq+4*7] 846*c0909341SAndroid Build Coastguard Worker movd [px+32*4-4], xm0 847*c0909341SAndroid Build Coastguard Worker movd [px+32*5-4], xm1 848*c0909341SAndroid Build Coastguard Worker movd [px+32*6-4], xm2 849*c0909341SAndroid Build Coastguard Worker movd [px+32*7-4], xm3 850*c0909341SAndroid Build Coastguard Worker jmp .left_done 851*c0909341SAndroid Build Coastguard Worker.no_left: 852*c0909341SAndroid Build Coastguard Worker REPX {movd [px+32*x-4], xm7}, 0, 1, 2, 3, 4, 5, 6, 7 853*c0909341SAndroid Build Coastguard Worker.left_done: 854*c0909341SAndroid Build Coastguard Worker test edgeb, 2 ; HAVE_RIGHT 855*c0909341SAndroid Build Coastguard Worker jnz .padding_done 856*c0909341SAndroid Build Coastguard Worker REPX {movd [px+32*x+16], xm7}, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 857*c0909341SAndroid Build Coastguard Worker.padding_done: 858*c0909341SAndroid Build Coastguard Worker CDEF_FILTER 8, 8 859*c0909341SAndroid Build Coastguard Worker 860*c0909341SAndroid Build Coastguard Workercglobal cdef_dir_16bpc, 4, 7, 6, src, stride, var, bdmax 861*c0909341SAndroid Build Coastguard Worker lea r6, [dir_shift] 862*c0909341SAndroid Build Coastguard Worker shr bdmaxd, 11 ; 0 for 10bpc, 1 for 12bpc 863*c0909341SAndroid Build Coastguard Worker vpbroadcastd m4, [r6+bdmaxq*4] 864*c0909341SAndroid Build Coastguard Worker lea r6, [strideq*3] 865*c0909341SAndroid Build Coastguard Worker mova xm0, [srcq+strideq*0] 866*c0909341SAndroid Build Coastguard Worker mova xm1, [srcq+strideq*1] 867*c0909341SAndroid Build Coastguard Worker mova xm2, [srcq+strideq*2] 868*c0909341SAndroid Build Coastguard Worker mova xm3, [srcq+r6 ] 869*c0909341SAndroid Build Coastguard Worker lea srcq, [srcq+strideq*4] 870*c0909341SAndroid Build Coastguard Worker vinserti128 m0, [srcq+r6 ], 1 871*c0909341SAndroid Build Coastguard Worker vinserti128 m1, [srcq+strideq*2], 1 872*c0909341SAndroid Build Coastguard Worker vinserti128 m2, [srcq+strideq*1], 1 873*c0909341SAndroid Build Coastguard Worker vinserti128 m3, [srcq+strideq*0], 1 874*c0909341SAndroid Build Coastguard Worker REPX {pmulhuw x, m4}, m0, m1, m2, m3 875*c0909341SAndroid Build Coastguard Worker jmp mangle(private_prefix %+ _cdef_dir_8bpc %+ SUFFIX).main 876*c0909341SAndroid Build Coastguard Worker 877*c0909341SAndroid Build Coastguard Worker%endif ; ARCH_X86_64 878