1*fb1b10abSAndroid Build Coastguard Worker; 2*fb1b10abSAndroid Build Coastguard Worker; Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3*fb1b10abSAndroid Build Coastguard Worker; 4*fb1b10abSAndroid Build Coastguard Worker; Use of this source code is governed by a BSD-style license 5*fb1b10abSAndroid Build Coastguard Worker; that can be found in the LICENSE file in the root of the source 6*fb1b10abSAndroid Build Coastguard Worker; tree. An additional intellectual property rights grant can be found 7*fb1b10abSAndroid Build Coastguard Worker; in the file PATENTS. All contributing project authors may 8*fb1b10abSAndroid Build Coastguard Worker; be found in the AUTHORS file in the root of the source tree. 9*fb1b10abSAndroid Build Coastguard Worker; 10*fb1b10abSAndroid Build Coastguard Worker 11*fb1b10abSAndroid Build Coastguard Worker%include "vpx_ports/x86_abi_support.asm" 12*fb1b10abSAndroid Build Coastguard Worker 13*fb1b10abSAndroid Build Coastguard Worker%macro STACK_FRAME_CREATE_X3 0 14*fb1b10abSAndroid Build Coastguard Worker%if ABI_IS_32BIT 15*fb1b10abSAndroid Build Coastguard Worker %define src_ptr rsi 16*fb1b10abSAndroid Build Coastguard Worker %define src_stride rax 17*fb1b10abSAndroid Build Coastguard Worker %define ref_ptr rdi 18*fb1b10abSAndroid Build Coastguard Worker %define ref_stride rdx 19*fb1b10abSAndroid Build Coastguard Worker %define end_ptr rcx 20*fb1b10abSAndroid Build Coastguard Worker %define ret_var rbx 21*fb1b10abSAndroid Build Coastguard Worker %define result_ptr arg(4) 22*fb1b10abSAndroid Build Coastguard Worker %define max_sad arg(4) 23*fb1b10abSAndroid Build Coastguard Worker %define height dword ptr arg(4) 24*fb1b10abSAndroid Build Coastguard Worker push rbp 25*fb1b10abSAndroid Build Coastguard Worker mov rbp, rsp 26*fb1b10abSAndroid Build Coastguard Worker push rsi 27*fb1b10abSAndroid Build Coastguard Worker push rdi 28*fb1b10abSAndroid Build Coastguard Worker push rbx 29*fb1b10abSAndroid Build Coastguard Worker 30*fb1b10abSAndroid Build Coastguard Worker mov rsi, arg(0) ; src_ptr 31*fb1b10abSAndroid Build Coastguard Worker mov rdi, arg(2) ; ref_ptr 32*fb1b10abSAndroid Build Coastguard Worker 33*fb1b10abSAndroid Build Coastguard Worker movsxd rax, dword ptr arg(1) ; src_stride 34*fb1b10abSAndroid Build Coastguard Worker movsxd rdx, dword ptr arg(3) ; ref_stride 35*fb1b10abSAndroid Build Coastguard Worker%else 36*fb1b10abSAndroid Build Coastguard Worker %if LIBVPX_YASM_WIN64 37*fb1b10abSAndroid Build Coastguard Worker SAVE_XMM 7, u 38*fb1b10abSAndroid Build Coastguard Worker %define src_ptr rcx 39*fb1b10abSAndroid Build Coastguard Worker %define src_stride rdx 40*fb1b10abSAndroid Build Coastguard Worker %define ref_ptr r8 41*fb1b10abSAndroid Build Coastguard Worker %define ref_stride r9 42*fb1b10abSAndroid Build Coastguard Worker %define end_ptr r10 43*fb1b10abSAndroid Build Coastguard Worker %define ret_var r11 44*fb1b10abSAndroid Build Coastguard Worker %define result_ptr [rsp+xmm_stack_space+8+4*8] 45*fb1b10abSAndroid Build Coastguard Worker %define max_sad [rsp+xmm_stack_space+8+4*8] 46*fb1b10abSAndroid Build Coastguard Worker %define height dword ptr [rsp+xmm_stack_space+8+4*8] 47*fb1b10abSAndroid Build Coastguard Worker %else 48*fb1b10abSAndroid Build Coastguard Worker %define src_ptr rdi 49*fb1b10abSAndroid Build Coastguard Worker %define src_stride rsi 50*fb1b10abSAndroid Build Coastguard Worker %define ref_ptr rdx 51*fb1b10abSAndroid Build Coastguard Worker %define ref_stride rcx 52*fb1b10abSAndroid Build Coastguard Worker %define end_ptr r9 53*fb1b10abSAndroid Build Coastguard Worker %define ret_var r10 54*fb1b10abSAndroid Build Coastguard Worker %define result_ptr r8 55*fb1b10abSAndroid Build Coastguard Worker %define max_sad r8 56*fb1b10abSAndroid Build Coastguard Worker %define height r8 57*fb1b10abSAndroid Build Coastguard Worker %endif 58*fb1b10abSAndroid Build Coastguard Worker%endif 59*fb1b10abSAndroid Build Coastguard Worker 60*fb1b10abSAndroid Build Coastguard Worker%endmacro 61*fb1b10abSAndroid Build Coastguard Worker 62*fb1b10abSAndroid Build Coastguard Worker%macro STACK_FRAME_DESTROY_X3 0 63*fb1b10abSAndroid Build Coastguard Worker %define src_ptr 64*fb1b10abSAndroid Build Coastguard Worker %define src_stride 65*fb1b10abSAndroid Build Coastguard Worker %define ref_ptr 66*fb1b10abSAndroid Build Coastguard Worker %define ref_stride 67*fb1b10abSAndroid Build Coastguard Worker %define end_ptr 68*fb1b10abSAndroid Build Coastguard Worker %define ret_var 69*fb1b10abSAndroid Build Coastguard Worker %define result_ptr 70*fb1b10abSAndroid Build Coastguard Worker %define max_sad 71*fb1b10abSAndroid Build Coastguard Worker %define height 72*fb1b10abSAndroid Build Coastguard Worker 73*fb1b10abSAndroid Build Coastguard Worker%if ABI_IS_32BIT 74*fb1b10abSAndroid Build Coastguard Worker pop rbx 75*fb1b10abSAndroid Build Coastguard Worker pop rdi 76*fb1b10abSAndroid Build Coastguard Worker pop rsi 77*fb1b10abSAndroid Build Coastguard Worker pop rbp 78*fb1b10abSAndroid Build Coastguard Worker%else 79*fb1b10abSAndroid Build Coastguard Worker %if LIBVPX_YASM_WIN64 80*fb1b10abSAndroid Build Coastguard Worker RESTORE_XMM 81*fb1b10abSAndroid Build Coastguard Worker %endif 82*fb1b10abSAndroid Build Coastguard Worker%endif 83*fb1b10abSAndroid Build Coastguard Worker ret 84*fb1b10abSAndroid Build Coastguard Worker%endmacro 85*fb1b10abSAndroid Build Coastguard Worker 86*fb1b10abSAndroid Build Coastguard WorkerSECTION .text 87*fb1b10abSAndroid Build Coastguard Worker 88*fb1b10abSAndroid Build Coastguard Worker;void vp8_copy32xn_sse3( 89*fb1b10abSAndroid Build Coastguard Worker; unsigned char *src_ptr, 90*fb1b10abSAndroid Build Coastguard Worker; int src_stride, 91*fb1b10abSAndroid Build Coastguard Worker; unsigned char *dst_ptr, 92*fb1b10abSAndroid Build Coastguard Worker; int dst_stride, 93*fb1b10abSAndroid Build Coastguard Worker; int height); 94*fb1b10abSAndroid Build Coastguard Workerglobalsym(vp8_copy32xn_sse3) 95*fb1b10abSAndroid Build Coastguard Workersym(vp8_copy32xn_sse3): 96*fb1b10abSAndroid Build Coastguard Worker 97*fb1b10abSAndroid Build Coastguard Worker STACK_FRAME_CREATE_X3 98*fb1b10abSAndroid Build Coastguard Worker 99*fb1b10abSAndroid Build Coastguard Worker.block_copy_sse3_loopx4: 100*fb1b10abSAndroid Build Coastguard Worker lea end_ptr, [src_ptr+src_stride*2] 101*fb1b10abSAndroid Build Coastguard Worker 102*fb1b10abSAndroid Build Coastguard Worker movdqu xmm0, XMMWORD PTR [src_ptr] 103*fb1b10abSAndroid Build Coastguard Worker movdqu xmm1, XMMWORD PTR [src_ptr + 16] 104*fb1b10abSAndroid Build Coastguard Worker movdqu xmm2, XMMWORD PTR [src_ptr + src_stride] 105*fb1b10abSAndroid Build Coastguard Worker movdqu xmm3, XMMWORD PTR [src_ptr + src_stride + 16] 106*fb1b10abSAndroid Build Coastguard Worker movdqu xmm4, XMMWORD PTR [end_ptr] 107*fb1b10abSAndroid Build Coastguard Worker movdqu xmm5, XMMWORD PTR [end_ptr + 16] 108*fb1b10abSAndroid Build Coastguard Worker movdqu xmm6, XMMWORD PTR [end_ptr + src_stride] 109*fb1b10abSAndroid Build Coastguard Worker movdqu xmm7, XMMWORD PTR [end_ptr + src_stride + 16] 110*fb1b10abSAndroid Build Coastguard Worker 111*fb1b10abSAndroid Build Coastguard Worker lea src_ptr, [src_ptr+src_stride*4] 112*fb1b10abSAndroid Build Coastguard Worker 113*fb1b10abSAndroid Build Coastguard Worker lea end_ptr, [ref_ptr+ref_stride*2] 114*fb1b10abSAndroid Build Coastguard Worker 115*fb1b10abSAndroid Build Coastguard Worker movdqa XMMWORD PTR [ref_ptr], xmm0 116*fb1b10abSAndroid Build Coastguard Worker movdqa XMMWORD PTR [ref_ptr + 16], xmm1 117*fb1b10abSAndroid Build Coastguard Worker movdqa XMMWORD PTR [ref_ptr + ref_stride], xmm2 118*fb1b10abSAndroid Build Coastguard Worker movdqa XMMWORD PTR [ref_ptr + ref_stride + 16], xmm3 119*fb1b10abSAndroid Build Coastguard Worker movdqa XMMWORD PTR [end_ptr], xmm4 120*fb1b10abSAndroid Build Coastguard Worker movdqa XMMWORD PTR [end_ptr + 16], xmm5 121*fb1b10abSAndroid Build Coastguard Worker movdqa XMMWORD PTR [end_ptr + ref_stride], xmm6 122*fb1b10abSAndroid Build Coastguard Worker movdqa XMMWORD PTR [end_ptr + ref_stride + 16], xmm7 123*fb1b10abSAndroid Build Coastguard Worker 124*fb1b10abSAndroid Build Coastguard Worker lea ref_ptr, [ref_ptr+ref_stride*4] 125*fb1b10abSAndroid Build Coastguard Worker 126*fb1b10abSAndroid Build Coastguard Worker sub height, 4 127*fb1b10abSAndroid Build Coastguard Worker cmp height, 4 128*fb1b10abSAndroid Build Coastguard Worker jge .block_copy_sse3_loopx4 129*fb1b10abSAndroid Build Coastguard Worker 130*fb1b10abSAndroid Build Coastguard Worker ;Check to see if there is more rows need to be copied. 131*fb1b10abSAndroid Build Coastguard Worker cmp height, 0 132*fb1b10abSAndroid Build Coastguard Worker je .copy_is_done 133*fb1b10abSAndroid Build Coastguard Worker 134*fb1b10abSAndroid Build Coastguard Worker.block_copy_sse3_loop: 135*fb1b10abSAndroid Build Coastguard Worker movdqu xmm0, XMMWORD PTR [src_ptr] 136*fb1b10abSAndroid Build Coastguard Worker movdqu xmm1, XMMWORD PTR [src_ptr + 16] 137*fb1b10abSAndroid Build Coastguard Worker lea src_ptr, [src_ptr+src_stride] 138*fb1b10abSAndroid Build Coastguard Worker 139*fb1b10abSAndroid Build Coastguard Worker movdqa XMMWORD PTR [ref_ptr], xmm0 140*fb1b10abSAndroid Build Coastguard Worker movdqa XMMWORD PTR [ref_ptr + 16], xmm1 141*fb1b10abSAndroid Build Coastguard Worker lea ref_ptr, [ref_ptr+ref_stride] 142*fb1b10abSAndroid Build Coastguard Worker 143*fb1b10abSAndroid Build Coastguard Worker sub height, 1 144*fb1b10abSAndroid Build Coastguard Worker jne .block_copy_sse3_loop 145*fb1b10abSAndroid Build Coastguard Worker 146*fb1b10abSAndroid Build Coastguard Worker.copy_is_done: 147*fb1b10abSAndroid Build Coastguard Worker STACK_FRAME_DESTROY_X3 148