1; 2; Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3; 4; Use of this source code is governed by a BSD-style license 5; that can be found in the LICENSE file in the root of the source 6; tree. An additional intellectual property rights grant can be found 7; in the file PATENTS. All contributing project authors may 8; be found in the AUTHORS file in the root of the source tree. 9; 10 11%include "vpx_ports/x86_abi_support.asm" 12 13; tabulate_ssim - sums sum_s,sum_r,sum_sq_s,sum_sq_r, sum_sxr 14%macro TABULATE_SSIM 0 15 paddusw xmm15, xmm3 ; sum_s 16 paddusw xmm14, xmm4 ; sum_r 17 movdqa xmm1, xmm3 18 pmaddwd xmm1, xmm1 19 paddd xmm13, xmm1 ; sum_sq_s 20 movdqa xmm2, xmm4 21 pmaddwd xmm2, xmm2 22 paddd xmm12, xmm2 ; sum_sq_r 23 pmaddwd xmm3, xmm4 24 paddd xmm11, xmm3 ; sum_sxr 25%endmacro 26 27; Sum across the register %1 starting with q words 28%macro SUM_ACROSS_Q 1 29 movdqa xmm2,%1 30 punpckldq %1,xmm0 31 punpckhdq xmm2,xmm0 32 paddq %1,xmm2 33 movdqa xmm2,%1 34 punpcklqdq %1,xmm0 35 punpckhqdq xmm2,xmm0 36 paddq %1,xmm2 37%endmacro 38 39; Sum across the register %1 starting with q words 40%macro SUM_ACROSS_W 1 41 movdqa xmm1, %1 42 punpcklwd %1,xmm0 43 punpckhwd xmm1,xmm0 44 paddd %1, xmm1 45 SUM_ACROSS_Q %1 46%endmacro 47 48SECTION .text 49 50;void vpx_ssim_parms_8x8_sse2( 51; unsigned char *s, 52; int sp, 53; unsigned char *r, 54; int rp 55; uint32_t *sum_s, 56; uint32_t *sum_r, 57; uint32_t *sum_sq_s, 58; uint32_t *sum_sq_r, 59; uint32_t *sum_sxr); 60; 61; TODO: Use parm passing through structure, probably don't need the pxors 62; ( calling app will initialize to 0 ) could easily fit everything in sse2 63; without too much hastle, and can probably do better estimates with psadw 64; or pavgb At this point this is just meant to be first pass for calculating 65; all the parms needed for 16x16 ssim so we can play with dssim as distortion 66; in mode selection code. 67globalsym(vpx_ssim_parms_8x8_sse2) 68sym(vpx_ssim_parms_8x8_sse2): 69 push rbp 70 mov rbp, rsp 71 SHADOW_ARGS_TO_STACK 9 72 SAVE_XMM 15 73 push rsi 74 push rdi 75 ; end prolog 76 77 mov rsi, arg(0) ;s 78 mov rcx, arg(1) ;sp 79 mov rdi, arg(2) ;r 80 mov rax, arg(3) ;rp 81 82 pxor xmm0, xmm0 83 pxor xmm15,xmm15 ;sum_s 84 pxor xmm14,xmm14 ;sum_r 85 pxor xmm13,xmm13 ;sum_sq_s 86 pxor xmm12,xmm12 ;sum_sq_r 87 pxor xmm11,xmm11 ;sum_sxr 88 89 mov rdx, 8 ;row counter 90.NextRow: 91 92 ;grab source and reference pixels 93 movq xmm3, [rsi] 94 movq xmm4, [rdi] 95 punpcklbw xmm3, xmm0 ; low_s 96 punpcklbw xmm4, xmm0 ; low_r 97 98 TABULATE_SSIM 99 100 add rsi, rcx ; next s row 101 add rdi, rax ; next r row 102 103 dec rdx ; counter 104 jnz .NextRow 105 106 SUM_ACROSS_W xmm15 107 SUM_ACROSS_W xmm14 108 SUM_ACROSS_Q xmm13 109 SUM_ACROSS_Q xmm12 110 SUM_ACROSS_Q xmm11 111 112 mov rdi,arg(4) 113 movd [rdi], xmm15; 114 mov rdi,arg(5) 115 movd [rdi], xmm14; 116 mov rdi,arg(6) 117 movd [rdi], xmm13; 118 mov rdi,arg(7) 119 movd [rdi], xmm12; 120 mov rdi,arg(8) 121 movd [rdi], xmm11; 122 123 ; begin epilog 124 pop rdi 125 pop rsi 126 RESTORE_XMM 127 UNSHADOW_ARGS 128 pop rbp 129 ret 130