1*77c1e3ccSAndroid Build Coastguard Worker; 2*77c1e3ccSAndroid Build Coastguard Worker; Copyright (c) 2016, Alliance for Open Media. All rights reserved. 3*77c1e3ccSAndroid Build Coastguard Worker; 4*77c1e3ccSAndroid Build Coastguard Worker; This source code is subject to the terms of the BSD 2 Clause License and 5*77c1e3ccSAndroid Build Coastguard Worker; the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License 6*77c1e3ccSAndroid Build Coastguard Worker; was not distributed with this source code in the LICENSE file, you can 7*77c1e3ccSAndroid Build Coastguard Worker; obtain it at www.aomedia.org/license/software. If the Alliance for Open 8*77c1e3ccSAndroid Build Coastguard Worker; Media Patent License 1.0 was not distributed with this source code in the 9*77c1e3ccSAndroid Build Coastguard Worker; PATENTS file, you can obtain it at www.aomedia.org/license/patent. 10*77c1e3ccSAndroid Build Coastguard Worker; 11*77c1e3ccSAndroid Build Coastguard Worker 12*77c1e3ccSAndroid Build Coastguard Worker; 13*77c1e3ccSAndroid Build Coastguard Worker 14*77c1e3ccSAndroid Build Coastguard Worker%include "third_party/x86inc/x86inc.asm" 15*77c1e3ccSAndroid Build Coastguard Worker 16*77c1e3ccSAndroid Build Coastguard WorkerSECTION .text 17*77c1e3ccSAndroid Build Coastguard Worker 18*77c1e3ccSAndroid Build Coastguard Worker; void aom_subtract_block(int rows, int cols, 19*77c1e3ccSAndroid Build Coastguard Worker; int16_t *diff, ptrdiff_t diff_stride, 20*77c1e3ccSAndroid Build Coastguard Worker; const uint8_t *src, ptrdiff_t src_stride, 21*77c1e3ccSAndroid Build Coastguard Worker; const uint8_t *pred, ptrdiff_t pred_stride) 22*77c1e3ccSAndroid Build Coastguard Worker 23*77c1e3ccSAndroid Build Coastguard WorkerINIT_XMM sse2 24*77c1e3ccSAndroid Build Coastguard Workercglobal subtract_block, 7, 7, 8, \ 25*77c1e3ccSAndroid Build Coastguard Worker rows, cols, diff, diff_stride, src, src_stride, \ 26*77c1e3ccSAndroid Build Coastguard Worker pred, pred_stride 27*77c1e3ccSAndroid Build Coastguard Worker%define pred_str colsq 28*77c1e3ccSAndroid Build Coastguard Worker pxor m7, m7 ; dedicated zero register 29*77c1e3ccSAndroid Build Coastguard Worker cmp colsd, 4 30*77c1e3ccSAndroid Build Coastguard Worker je .case_4 31*77c1e3ccSAndroid Build Coastguard Worker cmp colsd, 8 32*77c1e3ccSAndroid Build Coastguard Worker je .case_8 33*77c1e3ccSAndroid Build Coastguard Worker cmp colsd, 16 34*77c1e3ccSAndroid Build Coastguard Worker je .case_16 35*77c1e3ccSAndroid Build Coastguard Worker cmp colsd, 32 36*77c1e3ccSAndroid Build Coastguard Worker je .case_32 37*77c1e3ccSAndroid Build Coastguard Worker cmp colsd, 64 38*77c1e3ccSAndroid Build Coastguard Worker je .case_64 39*77c1e3ccSAndroid Build Coastguard Worker 40*77c1e3ccSAndroid Build Coastguard Worker%macro loop16 6 41*77c1e3ccSAndroid Build Coastguard Worker mova m0, [srcq+%1] 42*77c1e3ccSAndroid Build Coastguard Worker mova m4, [srcq+%2] 43*77c1e3ccSAndroid Build Coastguard Worker movu m1, [predq+%3] 44*77c1e3ccSAndroid Build Coastguard Worker movu m5, [predq+%4] 45*77c1e3ccSAndroid Build Coastguard Worker punpckhbw m2, m0, m7 46*77c1e3ccSAndroid Build Coastguard Worker punpckhbw m3, m1, m7 47*77c1e3ccSAndroid Build Coastguard Worker punpcklbw m0, m7 48*77c1e3ccSAndroid Build Coastguard Worker punpcklbw m1, m7 49*77c1e3ccSAndroid Build Coastguard Worker psubw m2, m3 50*77c1e3ccSAndroid Build Coastguard Worker psubw m0, m1 51*77c1e3ccSAndroid Build Coastguard Worker punpckhbw m1, m4, m7 52*77c1e3ccSAndroid Build Coastguard Worker punpckhbw m3, m5, m7 53*77c1e3ccSAndroid Build Coastguard Worker punpcklbw m4, m7 54*77c1e3ccSAndroid Build Coastguard Worker punpcklbw m5, m7 55*77c1e3ccSAndroid Build Coastguard Worker psubw m1, m3 56*77c1e3ccSAndroid Build Coastguard Worker psubw m4, m5 57*77c1e3ccSAndroid Build Coastguard Worker mova [diffq+mmsize*0+%5], m0 58*77c1e3ccSAndroid Build Coastguard Worker mova [diffq+mmsize*1+%5], m2 59*77c1e3ccSAndroid Build Coastguard Worker mova [diffq+mmsize*0+%6], m4 60*77c1e3ccSAndroid Build Coastguard Worker mova [diffq+mmsize*1+%6], m1 61*77c1e3ccSAndroid Build Coastguard Worker%endmacro 62*77c1e3ccSAndroid Build Coastguard Worker 63*77c1e3ccSAndroid Build Coastguard Worker mov pred_str, pred_stridemp 64*77c1e3ccSAndroid Build Coastguard Worker.loop_128: 65*77c1e3ccSAndroid Build Coastguard Worker loop16 0*mmsize, 1*mmsize, 0*mmsize, 1*mmsize, 0*mmsize, 2*mmsize 66*77c1e3ccSAndroid Build Coastguard Worker loop16 2*mmsize, 3*mmsize, 2*mmsize, 3*mmsize, 4*mmsize, 6*mmsize 67*77c1e3ccSAndroid Build Coastguard Worker loop16 4*mmsize, 5*mmsize, 4*mmsize, 5*mmsize, 8*mmsize, 10*mmsize 68*77c1e3ccSAndroid Build Coastguard Worker loop16 6*mmsize, 7*mmsize, 6*mmsize, 7*mmsize, 12*mmsize, 14*mmsize 69*77c1e3ccSAndroid Build Coastguard Worker lea diffq, [diffq+diff_strideq*2] 70*77c1e3ccSAndroid Build Coastguard Worker add predq, pred_str 71*77c1e3ccSAndroid Build Coastguard Worker add srcq, src_strideq 72*77c1e3ccSAndroid Build Coastguard Worker sub rowsd, 1 73*77c1e3ccSAndroid Build Coastguard Worker jnz .loop_128 74*77c1e3ccSAndroid Build Coastguard Worker RET 75*77c1e3ccSAndroid Build Coastguard Worker 76*77c1e3ccSAndroid Build Coastguard Worker.case_64: 77*77c1e3ccSAndroid Build Coastguard Worker mov pred_str, pred_stridemp 78*77c1e3ccSAndroid Build Coastguard Worker.loop_64: 79*77c1e3ccSAndroid Build Coastguard Worker loop16 0*mmsize, 1*mmsize, 0*mmsize, 1*mmsize, 0*mmsize, 2*mmsize 80*77c1e3ccSAndroid Build Coastguard Worker loop16 2*mmsize, 3*mmsize, 2*mmsize, 3*mmsize, 4*mmsize, 6*mmsize 81*77c1e3ccSAndroid Build Coastguard Worker lea diffq, [diffq+diff_strideq*2] 82*77c1e3ccSAndroid Build Coastguard Worker add predq, pred_str 83*77c1e3ccSAndroid Build Coastguard Worker add srcq, src_strideq 84*77c1e3ccSAndroid Build Coastguard Worker dec rowsd 85*77c1e3ccSAndroid Build Coastguard Worker jg .loop_64 86*77c1e3ccSAndroid Build Coastguard Worker RET 87*77c1e3ccSAndroid Build Coastguard Worker 88*77c1e3ccSAndroid Build Coastguard Worker.case_32: 89*77c1e3ccSAndroid Build Coastguard Worker mov pred_str, pred_stridemp 90*77c1e3ccSAndroid Build Coastguard Worker.loop_32: 91*77c1e3ccSAndroid Build Coastguard Worker loop16 0, mmsize, 0, mmsize, 0, 2*mmsize 92*77c1e3ccSAndroid Build Coastguard Worker lea diffq, [diffq+diff_strideq*2] 93*77c1e3ccSAndroid Build Coastguard Worker add predq, pred_str 94*77c1e3ccSAndroid Build Coastguard Worker add srcq, src_strideq 95*77c1e3ccSAndroid Build Coastguard Worker dec rowsd 96*77c1e3ccSAndroid Build Coastguard Worker jg .loop_32 97*77c1e3ccSAndroid Build Coastguard Worker RET 98*77c1e3ccSAndroid Build Coastguard Worker 99*77c1e3ccSAndroid Build Coastguard Worker.case_16: 100*77c1e3ccSAndroid Build Coastguard Worker mov pred_str, pred_stridemp 101*77c1e3ccSAndroid Build Coastguard Worker.loop_16: 102*77c1e3ccSAndroid Build Coastguard Worker loop16 0, src_strideq, 0, pred_str, 0, diff_strideq*2 103*77c1e3ccSAndroid Build Coastguard Worker lea diffq, [diffq+diff_strideq*4] 104*77c1e3ccSAndroid Build Coastguard Worker lea predq, [predq+pred_str*2] 105*77c1e3ccSAndroid Build Coastguard Worker lea srcq, [srcq+src_strideq*2] 106*77c1e3ccSAndroid Build Coastguard Worker sub rowsd, 2 107*77c1e3ccSAndroid Build Coastguard Worker jg .loop_16 108*77c1e3ccSAndroid Build Coastguard Worker RET 109*77c1e3ccSAndroid Build Coastguard Worker 110*77c1e3ccSAndroid Build Coastguard Worker%macro loop_h 0 111*77c1e3ccSAndroid Build Coastguard Worker movh m0, [srcq] 112*77c1e3ccSAndroid Build Coastguard Worker movh m2, [srcq+src_strideq] 113*77c1e3ccSAndroid Build Coastguard Worker movh m1, [predq] 114*77c1e3ccSAndroid Build Coastguard Worker movh m3, [predq+pred_str] 115*77c1e3ccSAndroid Build Coastguard Worker punpcklbw m0, m7 116*77c1e3ccSAndroid Build Coastguard Worker punpcklbw m1, m7 117*77c1e3ccSAndroid Build Coastguard Worker punpcklbw m2, m7 118*77c1e3ccSAndroid Build Coastguard Worker punpcklbw m3, m7 119*77c1e3ccSAndroid Build Coastguard Worker psubw m0, m1 120*77c1e3ccSAndroid Build Coastguard Worker psubw m2, m3 121*77c1e3ccSAndroid Build Coastguard Worker mova [diffq], m0 122*77c1e3ccSAndroid Build Coastguard Worker mova [diffq+diff_strideq*2], m2 123*77c1e3ccSAndroid Build Coastguard Worker%endmacro 124*77c1e3ccSAndroid Build Coastguard Worker 125*77c1e3ccSAndroid Build Coastguard Worker.case_8: 126*77c1e3ccSAndroid Build Coastguard Worker mov pred_str, pred_stridemp 127*77c1e3ccSAndroid Build Coastguard Worker.loop_8: 128*77c1e3ccSAndroid Build Coastguard Worker loop_h 129*77c1e3ccSAndroid Build Coastguard Worker lea diffq, [diffq+diff_strideq*4] 130*77c1e3ccSAndroid Build Coastguard Worker lea srcq, [srcq+src_strideq*2] 131*77c1e3ccSAndroid Build Coastguard Worker lea predq, [predq+pred_str*2] 132*77c1e3ccSAndroid Build Coastguard Worker sub rowsd, 2 133*77c1e3ccSAndroid Build Coastguard Worker jg .loop_8 134*77c1e3ccSAndroid Build Coastguard Worker RET 135*77c1e3ccSAndroid Build Coastguard Worker 136*77c1e3ccSAndroid Build Coastguard WorkerINIT_MMX 137*77c1e3ccSAndroid Build Coastguard Worker.case_4: 138*77c1e3ccSAndroid Build Coastguard Worker mov pred_str, pred_stridemp 139*77c1e3ccSAndroid Build Coastguard Worker.loop_4: 140*77c1e3ccSAndroid Build Coastguard Worker loop_h 141*77c1e3ccSAndroid Build Coastguard Worker lea diffq, [diffq+diff_strideq*4] 142*77c1e3ccSAndroid Build Coastguard Worker lea srcq, [srcq+src_strideq*2] 143*77c1e3ccSAndroid Build Coastguard Worker lea predq, [predq+pred_str*2] 144*77c1e3ccSAndroid Build Coastguard Worker sub rowsd, 2 145*77c1e3ccSAndroid Build Coastguard Worker jg .loop_4 146*77c1e3ccSAndroid Build Coastguard Worker emms 147*77c1e3ccSAndroid Build Coastguard Worker RET 148