1*fb1b10abSAndroid Build Coastguard Worker; 2*fb1b10abSAndroid Build Coastguard Worker; Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3*fb1b10abSAndroid Build Coastguard Worker; 4*fb1b10abSAndroid Build Coastguard Worker; Use of this source code is governed by a BSD-style license 5*fb1b10abSAndroid Build Coastguard Worker; that can be found in the LICENSE file in the root of the source 6*fb1b10abSAndroid Build Coastguard Worker; tree. An additional intellectual property rights grant can be found 7*fb1b10abSAndroid Build Coastguard Worker; in the file PATENTS. All contributing project authors may 8*fb1b10abSAndroid Build Coastguard Worker; be found in the AUTHORS file in the root of the source tree. 9*fb1b10abSAndroid Build Coastguard Worker; 10*fb1b10abSAndroid Build Coastguard Worker 11*fb1b10abSAndroid Build Coastguard Worker%include "third_party/x86inc/x86inc.asm" 12*fb1b10abSAndroid Build Coastguard Worker 13*fb1b10abSAndroid Build Coastguard WorkerSECTION .text 14*fb1b10abSAndroid Build Coastguard Worker 15*fb1b10abSAndroid Build Coastguard Worker; void vpx_subtract_block(int rows, int cols, 16*fb1b10abSAndroid Build Coastguard Worker; int16_t *diff, ptrdiff_t diff_stride, 17*fb1b10abSAndroid Build Coastguard Worker; const uint8_t *src, ptrdiff_t src_stride, 18*fb1b10abSAndroid Build Coastguard Worker; const uint8_t *pred, ptrdiff_t pred_stride) 19*fb1b10abSAndroid Build Coastguard Worker 20*fb1b10abSAndroid Build Coastguard WorkerINIT_XMM sse2 21*fb1b10abSAndroid Build Coastguard Workercglobal subtract_block, 7, 7, 8, \ 22*fb1b10abSAndroid Build Coastguard Worker rows, cols, diff, diff_stride, src, src_stride, \ 23*fb1b10abSAndroid Build Coastguard Worker pred, pred_stride 24*fb1b10abSAndroid Build Coastguard Worker%define pred_str colsq 25*fb1b10abSAndroid Build Coastguard Worker pxor m7, m7 ; dedicated zero register 26*fb1b10abSAndroid Build Coastguard Worker cmp colsd, 4 27*fb1b10abSAndroid Build Coastguard Worker je .case_4 28*fb1b10abSAndroid Build Coastguard Worker cmp colsd, 8 29*fb1b10abSAndroid Build Coastguard Worker je .case_8 30*fb1b10abSAndroid Build Coastguard Worker cmp colsd, 16 31*fb1b10abSAndroid Build Coastguard Worker je .case_16 32*fb1b10abSAndroid Build Coastguard Worker cmp colsd, 32 33*fb1b10abSAndroid Build Coastguard Worker je .case_32 34*fb1b10abSAndroid Build Coastguard Worker 35*fb1b10abSAndroid Build Coastguard Worker%macro loop16 6 36*fb1b10abSAndroid Build Coastguard Worker mova m0, [srcq+%1] 37*fb1b10abSAndroid Build Coastguard Worker mova m4, [srcq+%2] 38*fb1b10abSAndroid Build Coastguard Worker mova m1, [predq+%3] 39*fb1b10abSAndroid Build Coastguard Worker mova m5, [predq+%4] 40*fb1b10abSAndroid Build Coastguard Worker punpckhbw m2, m0, m7 41*fb1b10abSAndroid Build Coastguard Worker punpckhbw m3, m1, m7 42*fb1b10abSAndroid Build Coastguard Worker punpcklbw m0, m7 43*fb1b10abSAndroid Build Coastguard Worker punpcklbw m1, m7 44*fb1b10abSAndroid Build Coastguard Worker psubw m2, m3 45*fb1b10abSAndroid Build Coastguard Worker psubw m0, m1 46*fb1b10abSAndroid Build Coastguard Worker punpckhbw m1, m4, m7 47*fb1b10abSAndroid Build Coastguard Worker punpckhbw m3, m5, m7 48*fb1b10abSAndroid Build Coastguard Worker punpcklbw m4, m7 49*fb1b10abSAndroid Build Coastguard Worker punpcklbw m5, m7 50*fb1b10abSAndroid Build Coastguard Worker psubw m1, m3 51*fb1b10abSAndroid Build Coastguard Worker psubw m4, m5 52*fb1b10abSAndroid Build Coastguard Worker mova [diffq+mmsize*0+%5], m0 53*fb1b10abSAndroid Build Coastguard Worker mova [diffq+mmsize*1+%5], m2 54*fb1b10abSAndroid Build Coastguard Worker mova [diffq+mmsize*0+%6], m4 55*fb1b10abSAndroid Build Coastguard Worker mova [diffq+mmsize*1+%6], m1 56*fb1b10abSAndroid Build Coastguard Worker%endmacro 57*fb1b10abSAndroid Build Coastguard Worker 58*fb1b10abSAndroid Build Coastguard Worker mov pred_str, pred_stridemp 59*fb1b10abSAndroid Build Coastguard Worker.loop_64: 60*fb1b10abSAndroid Build Coastguard Worker loop16 0*mmsize, 1*mmsize, 0*mmsize, 1*mmsize, 0*mmsize, 2*mmsize 61*fb1b10abSAndroid Build Coastguard Worker loop16 2*mmsize, 3*mmsize, 2*mmsize, 3*mmsize, 4*mmsize, 6*mmsize 62*fb1b10abSAndroid Build Coastguard Worker lea diffq, [diffq+diff_strideq*2] 63*fb1b10abSAndroid Build Coastguard Worker add predq, pred_str 64*fb1b10abSAndroid Build Coastguard Worker add srcq, src_strideq 65*fb1b10abSAndroid Build Coastguard Worker dec rowsd 66*fb1b10abSAndroid Build Coastguard Worker jg .loop_64 67*fb1b10abSAndroid Build Coastguard Worker RET 68*fb1b10abSAndroid Build Coastguard Worker 69*fb1b10abSAndroid Build Coastguard Worker.case_32: 70*fb1b10abSAndroid Build Coastguard Worker mov pred_str, pred_stridemp 71*fb1b10abSAndroid Build Coastguard Worker.loop_32: 72*fb1b10abSAndroid Build Coastguard Worker loop16 0, mmsize, 0, mmsize, 0, 2*mmsize 73*fb1b10abSAndroid Build Coastguard Worker lea diffq, [diffq+diff_strideq*2] 74*fb1b10abSAndroid Build Coastguard Worker add predq, pred_str 75*fb1b10abSAndroid Build Coastguard Worker add srcq, src_strideq 76*fb1b10abSAndroid Build Coastguard Worker dec rowsd 77*fb1b10abSAndroid Build Coastguard Worker jg .loop_32 78*fb1b10abSAndroid Build Coastguard Worker RET 79*fb1b10abSAndroid Build Coastguard Worker 80*fb1b10abSAndroid Build Coastguard Worker.case_16: 81*fb1b10abSAndroid Build Coastguard Worker mov pred_str, pred_stridemp 82*fb1b10abSAndroid Build Coastguard Worker.loop_16: 83*fb1b10abSAndroid Build Coastguard Worker loop16 0, src_strideq, 0, pred_str, 0, diff_strideq*2 84*fb1b10abSAndroid Build Coastguard Worker lea diffq, [diffq+diff_strideq*4] 85*fb1b10abSAndroid Build Coastguard Worker lea predq, [predq+pred_str*2] 86*fb1b10abSAndroid Build Coastguard Worker lea srcq, [srcq+src_strideq*2] 87*fb1b10abSAndroid Build Coastguard Worker sub rowsd, 2 88*fb1b10abSAndroid Build Coastguard Worker jg .loop_16 89*fb1b10abSAndroid Build Coastguard Worker RET 90*fb1b10abSAndroid Build Coastguard Worker 91*fb1b10abSAndroid Build Coastguard Worker%macro loop_h 0 92*fb1b10abSAndroid Build Coastguard Worker movh m0, [srcq] 93*fb1b10abSAndroid Build Coastguard Worker movh m2, [srcq+src_strideq] 94*fb1b10abSAndroid Build Coastguard Worker movh m1, [predq] 95*fb1b10abSAndroid Build Coastguard Worker movh m3, [predq+pred_str] 96*fb1b10abSAndroid Build Coastguard Worker punpcklbw m0, m7 97*fb1b10abSAndroid Build Coastguard Worker punpcklbw m1, m7 98*fb1b10abSAndroid Build Coastguard Worker punpcklbw m2, m7 99*fb1b10abSAndroid Build Coastguard Worker punpcklbw m3, m7 100*fb1b10abSAndroid Build Coastguard Worker psubw m0, m1 101*fb1b10abSAndroid Build Coastguard Worker psubw m2, m3 102*fb1b10abSAndroid Build Coastguard Worker mova [diffq], m0 103*fb1b10abSAndroid Build Coastguard Worker mova [diffq+diff_strideq*2], m2 104*fb1b10abSAndroid Build Coastguard Worker%endmacro 105*fb1b10abSAndroid Build Coastguard Worker 106*fb1b10abSAndroid Build Coastguard Worker.case_8: 107*fb1b10abSAndroid Build Coastguard Worker mov pred_str, pred_stridemp 108*fb1b10abSAndroid Build Coastguard Worker.loop_8: 109*fb1b10abSAndroid Build Coastguard Worker loop_h 110*fb1b10abSAndroid Build Coastguard Worker lea diffq, [diffq+diff_strideq*4] 111*fb1b10abSAndroid Build Coastguard Worker lea srcq, [srcq+src_strideq*2] 112*fb1b10abSAndroid Build Coastguard Worker lea predq, [predq+pred_str*2] 113*fb1b10abSAndroid Build Coastguard Worker sub rowsd, 2 114*fb1b10abSAndroid Build Coastguard Worker jg .loop_8 115*fb1b10abSAndroid Build Coastguard Worker RET 116*fb1b10abSAndroid Build Coastguard Worker 117*fb1b10abSAndroid Build Coastguard WorkerINIT_MMX 118*fb1b10abSAndroid Build Coastguard Worker.case_4: 119*fb1b10abSAndroid Build Coastguard Worker mov pred_str, pred_stridemp 120*fb1b10abSAndroid Build Coastguard Worker.loop_4: 121*fb1b10abSAndroid Build Coastguard Worker loop_h 122*fb1b10abSAndroid Build Coastguard Worker lea diffq, [diffq+diff_strideq*4] 123*fb1b10abSAndroid Build Coastguard Worker lea srcq, [srcq+src_strideq*2] 124*fb1b10abSAndroid Build Coastguard Worker lea predq, [predq+pred_str*2] 125*fb1b10abSAndroid Build Coastguard Worker sub rowsd, 2 126*fb1b10abSAndroid Build Coastguard Worker jg .loop_4 127*fb1b10abSAndroid Build Coastguard Worker emms 128*fb1b10abSAndroid Build Coastguard Worker RET 129