1*fb1b10abSAndroid Build Coastguard Worker; 2*fb1b10abSAndroid Build Coastguard Worker; Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3*fb1b10abSAndroid Build Coastguard Worker; 4*fb1b10abSAndroid Build Coastguard Worker; Use of this source code is governed by a BSD-style license 5*fb1b10abSAndroid Build Coastguard Worker; that can be found in the LICENSE file in the root of the source 6*fb1b10abSAndroid Build Coastguard Worker; tree. An additional intellectual property rights grant can be found 7*fb1b10abSAndroid Build Coastguard Worker; in the file PATENTS. All contributing project authors may 8*fb1b10abSAndroid Build Coastguard Worker; be found in the AUTHORS file in the root of the source tree. 9*fb1b10abSAndroid Build Coastguard Worker; 10*fb1b10abSAndroid Build Coastguard Worker 11*fb1b10abSAndroid Build Coastguard Worker%include "third_party/x86inc/x86inc.asm" 12*fb1b10abSAndroid Build Coastguard Worker 13*fb1b10abSAndroid Build Coastguard WorkerSECTION .text 14*fb1b10abSAndroid Build Coastguard Worker 15*fb1b10abSAndroid Build Coastguard Worker; PROCESS_4x2x4 first, off_{first,second}_{src,ref}, advance_at_end 16*fb1b10abSAndroid Build Coastguard Worker%macro PROCESS_4x2x4 5-6 0 17*fb1b10abSAndroid Build Coastguard Worker movd m0, [srcq +%2] 18*fb1b10abSAndroid Build Coastguard Worker%if %1 == 1 19*fb1b10abSAndroid Build Coastguard Worker movd m6, [ref1q+%3] 20*fb1b10abSAndroid Build Coastguard Worker movd m4, [ref2q+%3] 21*fb1b10abSAndroid Build Coastguard Worker movd m7, [ref3q+%3] 22*fb1b10abSAndroid Build Coastguard Worker movd m5, [ref4q+%3] 23*fb1b10abSAndroid Build Coastguard Worker movd m1, [srcq +%4] 24*fb1b10abSAndroid Build Coastguard Worker movd m2, [ref1q+%5] 25*fb1b10abSAndroid Build Coastguard Worker punpckldq m0, m1 26*fb1b10abSAndroid Build Coastguard Worker punpckldq m6, m2 27*fb1b10abSAndroid Build Coastguard Worker movd m1, [ref2q+%5] 28*fb1b10abSAndroid Build Coastguard Worker movd m2, [ref3q+%5] 29*fb1b10abSAndroid Build Coastguard Worker movd m3, [ref4q+%5] 30*fb1b10abSAndroid Build Coastguard Worker punpckldq m4, m1 31*fb1b10abSAndroid Build Coastguard Worker punpckldq m7, m2 32*fb1b10abSAndroid Build Coastguard Worker punpckldq m5, m3 33*fb1b10abSAndroid Build Coastguard Worker movlhps m0, m0 34*fb1b10abSAndroid Build Coastguard Worker movlhps m6, m4 35*fb1b10abSAndroid Build Coastguard Worker movlhps m7, m5 36*fb1b10abSAndroid Build Coastguard Worker psadbw m6, m0 37*fb1b10abSAndroid Build Coastguard Worker psadbw m7, m0 38*fb1b10abSAndroid Build Coastguard Worker%else 39*fb1b10abSAndroid Build Coastguard Worker movd m1, [ref1q+%3] 40*fb1b10abSAndroid Build Coastguard Worker movd m5, [ref1q+%5] 41*fb1b10abSAndroid Build Coastguard Worker movd m2, [ref2q+%3] 42*fb1b10abSAndroid Build Coastguard Worker movd m4, [ref2q+%5] 43*fb1b10abSAndroid Build Coastguard Worker punpckldq m1, m5 44*fb1b10abSAndroid Build Coastguard Worker punpckldq m2, m4 45*fb1b10abSAndroid Build Coastguard Worker movd m3, [ref3q+%3] 46*fb1b10abSAndroid Build Coastguard Worker movd m5, [ref3q+%5] 47*fb1b10abSAndroid Build Coastguard Worker punpckldq m3, m5 48*fb1b10abSAndroid Build Coastguard Worker movd m4, [ref4q+%3] 49*fb1b10abSAndroid Build Coastguard Worker movd m5, [ref4q+%5] 50*fb1b10abSAndroid Build Coastguard Worker punpckldq m4, m5 51*fb1b10abSAndroid Build Coastguard Worker movd m5, [srcq +%4] 52*fb1b10abSAndroid Build Coastguard Worker punpckldq m0, m5 53*fb1b10abSAndroid Build Coastguard Worker movlhps m0, m0 54*fb1b10abSAndroid Build Coastguard Worker movlhps m1, m2 55*fb1b10abSAndroid Build Coastguard Worker movlhps m3, m4 56*fb1b10abSAndroid Build Coastguard Worker psadbw m1, m0 57*fb1b10abSAndroid Build Coastguard Worker psadbw m3, m0 58*fb1b10abSAndroid Build Coastguard Worker paddd m6, m1 59*fb1b10abSAndroid Build Coastguard Worker paddd m7, m3 60*fb1b10abSAndroid Build Coastguard Worker%endif 61*fb1b10abSAndroid Build Coastguard Worker%if %6 == 1 62*fb1b10abSAndroid Build Coastguard Worker lea srcq, [srcq +src_strideq*2] 63*fb1b10abSAndroid Build Coastguard Worker lea ref1q, [ref1q+ref_strideq*2] 64*fb1b10abSAndroid Build Coastguard Worker lea ref2q, [ref2q+ref_strideq*2] 65*fb1b10abSAndroid Build Coastguard Worker lea ref3q, [ref3q+ref_strideq*2] 66*fb1b10abSAndroid Build Coastguard Worker lea ref4q, [ref4q+ref_strideq*2] 67*fb1b10abSAndroid Build Coastguard Worker%endif 68*fb1b10abSAndroid Build Coastguard Worker%endmacro 69*fb1b10abSAndroid Build Coastguard Worker 70*fb1b10abSAndroid Build Coastguard Worker; PROCESS_8x2x4 first, off_{first,second}_{src,ref}, advance_at_end 71*fb1b10abSAndroid Build Coastguard Worker%macro PROCESS_8x2x4 5-6 0 72*fb1b10abSAndroid Build Coastguard Worker movh m0, [srcq +%2] 73*fb1b10abSAndroid Build Coastguard Worker%if %1 == 1 74*fb1b10abSAndroid Build Coastguard Worker movh m4, [ref1q+%3] 75*fb1b10abSAndroid Build Coastguard Worker movh m5, [ref2q+%3] 76*fb1b10abSAndroid Build Coastguard Worker movh m6, [ref3q+%3] 77*fb1b10abSAndroid Build Coastguard Worker movh m7, [ref4q+%3] 78*fb1b10abSAndroid Build Coastguard Worker movhps m0, [srcq +%4] 79*fb1b10abSAndroid Build Coastguard Worker movhps m4, [ref1q+%5] 80*fb1b10abSAndroid Build Coastguard Worker movhps m5, [ref2q+%5] 81*fb1b10abSAndroid Build Coastguard Worker movhps m6, [ref3q+%5] 82*fb1b10abSAndroid Build Coastguard Worker movhps m7, [ref4q+%5] 83*fb1b10abSAndroid Build Coastguard Worker psadbw m4, m0 84*fb1b10abSAndroid Build Coastguard Worker psadbw m5, m0 85*fb1b10abSAndroid Build Coastguard Worker psadbw m6, m0 86*fb1b10abSAndroid Build Coastguard Worker psadbw m7, m0 87*fb1b10abSAndroid Build Coastguard Worker%else 88*fb1b10abSAndroid Build Coastguard Worker movh m1, [ref1q+%3] 89*fb1b10abSAndroid Build Coastguard Worker movh m2, [ref2q+%3] 90*fb1b10abSAndroid Build Coastguard Worker movh m3, [ref3q+%3] 91*fb1b10abSAndroid Build Coastguard Worker movhps m0, [srcq +%4] 92*fb1b10abSAndroid Build Coastguard Worker movhps m1, [ref1q+%5] 93*fb1b10abSAndroid Build Coastguard Worker movhps m2, [ref2q+%5] 94*fb1b10abSAndroid Build Coastguard Worker movhps m3, [ref3q+%5] 95*fb1b10abSAndroid Build Coastguard Worker psadbw m1, m0 96*fb1b10abSAndroid Build Coastguard Worker psadbw m2, m0 97*fb1b10abSAndroid Build Coastguard Worker psadbw m3, m0 98*fb1b10abSAndroid Build Coastguard Worker paddd m4, m1 99*fb1b10abSAndroid Build Coastguard Worker movh m1, [ref4q+%3] 100*fb1b10abSAndroid Build Coastguard Worker movhps m1, [ref4q+%5] 101*fb1b10abSAndroid Build Coastguard Worker paddd m5, m2 102*fb1b10abSAndroid Build Coastguard Worker paddd m6, m3 103*fb1b10abSAndroid Build Coastguard Worker psadbw m1, m0 104*fb1b10abSAndroid Build Coastguard Worker paddd m7, m1 105*fb1b10abSAndroid Build Coastguard Worker%endif 106*fb1b10abSAndroid Build Coastguard Worker%if %6 == 1 107*fb1b10abSAndroid Build Coastguard Worker lea srcq, [srcq +src_strideq*2] 108*fb1b10abSAndroid Build Coastguard Worker lea ref1q, [ref1q+ref_strideq*2] 109*fb1b10abSAndroid Build Coastguard Worker lea ref2q, [ref2q+ref_strideq*2] 110*fb1b10abSAndroid Build Coastguard Worker lea ref3q, [ref3q+ref_strideq*2] 111*fb1b10abSAndroid Build Coastguard Worker lea ref4q, [ref4q+ref_strideq*2] 112*fb1b10abSAndroid Build Coastguard Worker%endif 113*fb1b10abSAndroid Build Coastguard Worker%endmacro 114*fb1b10abSAndroid Build Coastguard Worker 115*fb1b10abSAndroid Build Coastguard Worker; PROCESS_16x2x4 first, off_{first,second}_{src,ref}, advance_at_end 116*fb1b10abSAndroid Build Coastguard Worker%macro PROCESS_16x2x4 5-6 0 117*fb1b10abSAndroid Build Coastguard Worker ; 1st 16 px 118*fb1b10abSAndroid Build Coastguard Worker mova m0, [srcq +%2] 119*fb1b10abSAndroid Build Coastguard Worker%if %1 == 1 120*fb1b10abSAndroid Build Coastguard Worker movu m4, [ref1q+%3] 121*fb1b10abSAndroid Build Coastguard Worker movu m5, [ref2q+%3] 122*fb1b10abSAndroid Build Coastguard Worker movu m6, [ref3q+%3] 123*fb1b10abSAndroid Build Coastguard Worker movu m7, [ref4q+%3] 124*fb1b10abSAndroid Build Coastguard Worker psadbw m4, m0 125*fb1b10abSAndroid Build Coastguard Worker psadbw m5, m0 126*fb1b10abSAndroid Build Coastguard Worker psadbw m6, m0 127*fb1b10abSAndroid Build Coastguard Worker psadbw m7, m0 128*fb1b10abSAndroid Build Coastguard Worker%else 129*fb1b10abSAndroid Build Coastguard Worker movu m1, [ref1q+%3] 130*fb1b10abSAndroid Build Coastguard Worker movu m2, [ref2q+%3] 131*fb1b10abSAndroid Build Coastguard Worker movu m3, [ref3q+%3] 132*fb1b10abSAndroid Build Coastguard Worker psadbw m1, m0 133*fb1b10abSAndroid Build Coastguard Worker psadbw m2, m0 134*fb1b10abSAndroid Build Coastguard Worker psadbw m3, m0 135*fb1b10abSAndroid Build Coastguard Worker paddd m4, m1 136*fb1b10abSAndroid Build Coastguard Worker movu m1, [ref4q+%3] 137*fb1b10abSAndroid Build Coastguard Worker paddd m5, m2 138*fb1b10abSAndroid Build Coastguard Worker paddd m6, m3 139*fb1b10abSAndroid Build Coastguard Worker psadbw m1, m0 140*fb1b10abSAndroid Build Coastguard Worker paddd m7, m1 141*fb1b10abSAndroid Build Coastguard Worker%endif 142*fb1b10abSAndroid Build Coastguard Worker 143*fb1b10abSAndroid Build Coastguard Worker ; 2nd 16 px 144*fb1b10abSAndroid Build Coastguard Worker mova m0, [srcq +%4] 145*fb1b10abSAndroid Build Coastguard Worker movu m1, [ref1q+%5] 146*fb1b10abSAndroid Build Coastguard Worker movu m2, [ref2q+%5] 147*fb1b10abSAndroid Build Coastguard Worker movu m3, [ref3q+%5] 148*fb1b10abSAndroid Build Coastguard Worker psadbw m1, m0 149*fb1b10abSAndroid Build Coastguard Worker psadbw m2, m0 150*fb1b10abSAndroid Build Coastguard Worker psadbw m3, m0 151*fb1b10abSAndroid Build Coastguard Worker paddd m4, m1 152*fb1b10abSAndroid Build Coastguard Worker movu m1, [ref4q+%5] 153*fb1b10abSAndroid Build Coastguard Worker paddd m5, m2 154*fb1b10abSAndroid Build Coastguard Worker paddd m6, m3 155*fb1b10abSAndroid Build Coastguard Worker%if %6 == 1 156*fb1b10abSAndroid Build Coastguard Worker lea srcq, [srcq +src_strideq*2] 157*fb1b10abSAndroid Build Coastguard Worker lea ref1q, [ref1q+ref_strideq*2] 158*fb1b10abSAndroid Build Coastguard Worker lea ref2q, [ref2q+ref_strideq*2] 159*fb1b10abSAndroid Build Coastguard Worker lea ref3q, [ref3q+ref_strideq*2] 160*fb1b10abSAndroid Build Coastguard Worker lea ref4q, [ref4q+ref_strideq*2] 161*fb1b10abSAndroid Build Coastguard Worker%endif 162*fb1b10abSAndroid Build Coastguard Worker psadbw m1, m0 163*fb1b10abSAndroid Build Coastguard Worker paddd m7, m1 164*fb1b10abSAndroid Build Coastguard Worker%endmacro 165*fb1b10abSAndroid Build Coastguard Worker 166*fb1b10abSAndroid Build Coastguard Worker; PROCESS_32x2x4 first, off_{first,second}_{src,ref}, advance_at_end 167*fb1b10abSAndroid Build Coastguard Worker%macro PROCESS_32x2x4 5-6 0 168*fb1b10abSAndroid Build Coastguard Worker PROCESS_16x2x4 %1, %2, %3, %2 + 16, %3 + 16 169*fb1b10abSAndroid Build Coastguard Worker PROCESS_16x2x4 0, %4, %5, %4 + 16, %5 + 16, %6 170*fb1b10abSAndroid Build Coastguard Worker%endmacro 171*fb1b10abSAndroid Build Coastguard Worker 172*fb1b10abSAndroid Build Coastguard Worker; PROCESS_64x2x4 first, off_{first,second}_{src,ref}, advance_at_end 173*fb1b10abSAndroid Build Coastguard Worker%macro PROCESS_64x2x4 5-6 0 174*fb1b10abSAndroid Build Coastguard Worker PROCESS_32x2x4 %1, %2, %3, %2 + 32, %3 + 32 175*fb1b10abSAndroid Build Coastguard Worker PROCESS_32x2x4 0, %4, %5, %4 + 32, %5 + 32, %6 176*fb1b10abSAndroid Build Coastguard Worker%endmacro 177*fb1b10abSAndroid Build Coastguard Worker 178*fb1b10abSAndroid Build Coastguard Worker; void vpx_sadNxNx4d_sse2(uint8_t *src, int src_stride, 179*fb1b10abSAndroid Build Coastguard Worker; uint8_t *ref[4], int ref_stride, 180*fb1b10abSAndroid Build Coastguard Worker; uint32_t res[4]); 181*fb1b10abSAndroid Build Coastguard Worker; where NxN = 64x64, 32x32, 16x16, 16x8, 8x16, 8x8, 8x4, 4x8 and 4x4 182*fb1b10abSAndroid Build Coastguard Worker%macro SADNXN4D 2-3 0 183*fb1b10abSAndroid Build Coastguard Worker%if %3 == 1 ; skip rows 184*fb1b10abSAndroid Build Coastguard Worker%if UNIX64 185*fb1b10abSAndroid Build Coastguard Workercglobal sad_skip_%1x%2x4d, 5, 8, 8, src, src_stride, ref1, ref_stride, \ 186*fb1b10abSAndroid Build Coastguard Worker res, ref2, ref3, ref4 187*fb1b10abSAndroid Build Coastguard Worker%else 188*fb1b10abSAndroid Build Coastguard Workercglobal sad_skip_%1x%2x4d, 4, 7, 8, src, src_stride, ref1, ref_stride, \ 189*fb1b10abSAndroid Build Coastguard Worker ref2, ref3, ref4 190*fb1b10abSAndroid Build Coastguard Worker%endif 191*fb1b10abSAndroid Build Coastguard Worker%else ; normal sad 192*fb1b10abSAndroid Build Coastguard Worker%if UNIX64 193*fb1b10abSAndroid Build Coastguard Workercglobal sad%1x%2x4d, 5, 8, 8, src, src_stride, ref1, ref_stride, \ 194*fb1b10abSAndroid Build Coastguard Worker res, ref2, ref3, ref4 195*fb1b10abSAndroid Build Coastguard Worker%else 196*fb1b10abSAndroid Build Coastguard Workercglobal sad%1x%2x4d, 4, 7, 8, src, src_stride, ref1, ref_stride, \ 197*fb1b10abSAndroid Build Coastguard Worker ref2, ref3, ref4 198*fb1b10abSAndroid Build Coastguard Worker%endif 199*fb1b10abSAndroid Build Coastguard Worker%endif 200*fb1b10abSAndroid Build Coastguard Worker%if %3 == 1 201*fb1b10abSAndroid Build Coastguard Worker lea src_strided, [2*src_strided] 202*fb1b10abSAndroid Build Coastguard Worker lea ref_strided, [2*ref_strided] 203*fb1b10abSAndroid Build Coastguard Worker%endif 204*fb1b10abSAndroid Build Coastguard Worker movsxdifnidn src_strideq, src_strided 205*fb1b10abSAndroid Build Coastguard Worker movsxdifnidn ref_strideq, ref_strided 206*fb1b10abSAndroid Build Coastguard Worker mov ref2q, [ref1q+gprsize*1] 207*fb1b10abSAndroid Build Coastguard Worker mov ref3q, [ref1q+gprsize*2] 208*fb1b10abSAndroid Build Coastguard Worker mov ref4q, [ref1q+gprsize*3] 209*fb1b10abSAndroid Build Coastguard Worker mov ref1q, [ref1q+gprsize*0] 210*fb1b10abSAndroid Build Coastguard Worker 211*fb1b10abSAndroid Build Coastguard Worker PROCESS_%1x2x4 1, 0, 0, src_strideq, ref_strideq, 1 212*fb1b10abSAndroid Build Coastguard Worker%if %3 == 1 ; downsample number of rows by 2 213*fb1b10abSAndroid Build Coastguard Worker%define num_rep (%2-8)/4 214*fb1b10abSAndroid Build Coastguard Worker%else 215*fb1b10abSAndroid Build Coastguard Worker%define num_rep (%2-4)/2 216*fb1b10abSAndroid Build Coastguard Worker%endif 217*fb1b10abSAndroid Build Coastguard Worker%rep num_rep 218*fb1b10abSAndroid Build Coastguard Worker PROCESS_%1x2x4 0, 0, 0, src_strideq, ref_strideq, 1 219*fb1b10abSAndroid Build Coastguard Worker%endrep 220*fb1b10abSAndroid Build Coastguard Worker%undef num_rep 221*fb1b10abSAndroid Build Coastguard Worker PROCESS_%1x2x4 0, 0, 0, src_strideq, ref_strideq, 0 222*fb1b10abSAndroid Build Coastguard Worker 223*fb1b10abSAndroid Build Coastguard Worker%if %1 > 4 224*fb1b10abSAndroid Build Coastguard Worker pslldq m5, 4 225*fb1b10abSAndroid Build Coastguard Worker pslldq m7, 4 226*fb1b10abSAndroid Build Coastguard Worker por m4, m5 227*fb1b10abSAndroid Build Coastguard Worker por m6, m7 228*fb1b10abSAndroid Build Coastguard Worker mova m5, m4 229*fb1b10abSAndroid Build Coastguard Worker mova m7, m6 230*fb1b10abSAndroid Build Coastguard Worker punpcklqdq m4, m6 231*fb1b10abSAndroid Build Coastguard Worker punpckhqdq m5, m7 232*fb1b10abSAndroid Build Coastguard Worker movifnidn r4, r4mp 233*fb1b10abSAndroid Build Coastguard Worker paddd m4, m5 234*fb1b10abSAndroid Build Coastguard Worker%if %3 == 1 235*fb1b10abSAndroid Build Coastguard Worker pslld m4, 1 236*fb1b10abSAndroid Build Coastguard Worker%endif 237*fb1b10abSAndroid Build Coastguard Worker movu [r4], m4 238*fb1b10abSAndroid Build Coastguard Worker RET 239*fb1b10abSAndroid Build Coastguard Worker%else 240*fb1b10abSAndroid Build Coastguard Worker movifnidn r4, r4mp 241*fb1b10abSAndroid Build Coastguard Worker pshufd m6, m6, 0x08 242*fb1b10abSAndroid Build Coastguard Worker pshufd m7, m7, 0x08 243*fb1b10abSAndroid Build Coastguard Worker%if %3 == 1 244*fb1b10abSAndroid Build Coastguard Worker pslld m6, 1 245*fb1b10abSAndroid Build Coastguard Worker pslld m7, 1 246*fb1b10abSAndroid Build Coastguard Worker%endif 247*fb1b10abSAndroid Build Coastguard Worker movq [r4+0], m6 248*fb1b10abSAndroid Build Coastguard Worker movq [r4+8], m7 249*fb1b10abSAndroid Build Coastguard Worker RET 250*fb1b10abSAndroid Build Coastguard Worker%endif 251*fb1b10abSAndroid Build Coastguard Worker%endmacro 252*fb1b10abSAndroid Build Coastguard Worker 253*fb1b10abSAndroid Build Coastguard WorkerINIT_XMM sse2 254*fb1b10abSAndroid Build Coastguard WorkerSADNXN4D 64, 64 255*fb1b10abSAndroid Build Coastguard WorkerSADNXN4D 64, 32 256*fb1b10abSAndroid Build Coastguard WorkerSADNXN4D 32, 64 257*fb1b10abSAndroid Build Coastguard WorkerSADNXN4D 32, 32 258*fb1b10abSAndroid Build Coastguard WorkerSADNXN4D 32, 16 259*fb1b10abSAndroid Build Coastguard WorkerSADNXN4D 16, 32 260*fb1b10abSAndroid Build Coastguard WorkerSADNXN4D 16, 16 261*fb1b10abSAndroid Build Coastguard WorkerSADNXN4D 16, 8 262*fb1b10abSAndroid Build Coastguard WorkerSADNXN4D 8, 16 263*fb1b10abSAndroid Build Coastguard WorkerSADNXN4D 8, 8 264*fb1b10abSAndroid Build Coastguard WorkerSADNXN4D 8, 4 265*fb1b10abSAndroid Build Coastguard WorkerSADNXN4D 4, 8 266*fb1b10abSAndroid Build Coastguard WorkerSADNXN4D 4, 4 267*fb1b10abSAndroid Build Coastguard Worker 268*fb1b10abSAndroid Build Coastguard WorkerSADNXN4D 64, 64, 1 269*fb1b10abSAndroid Build Coastguard WorkerSADNXN4D 64, 32, 1 270*fb1b10abSAndroid Build Coastguard WorkerSADNXN4D 32, 64, 1 271*fb1b10abSAndroid Build Coastguard WorkerSADNXN4D 32, 32, 1 272*fb1b10abSAndroid Build Coastguard WorkerSADNXN4D 32, 16, 1 273*fb1b10abSAndroid Build Coastguard WorkerSADNXN4D 16, 32, 1 274*fb1b10abSAndroid Build Coastguard WorkerSADNXN4D 16, 16, 1 275*fb1b10abSAndroid Build Coastguard WorkerSADNXN4D 16, 8, 1 276*fb1b10abSAndroid Build Coastguard WorkerSADNXN4D 8, 16, 1 277*fb1b10abSAndroid Build Coastguard WorkerSADNXN4D 8, 8, 1 278*fb1b10abSAndroid Build Coastguard WorkerSADNXN4D 4, 8, 1 279