1*fb1b10abSAndroid Build Coastguard Worker; 2*fb1b10abSAndroid Build Coastguard Worker; Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3*fb1b10abSAndroid Build Coastguard Worker; 4*fb1b10abSAndroid Build Coastguard Worker; Use of this source code is governed by a BSD-style license 5*fb1b10abSAndroid Build Coastguard Worker; that can be found in the LICENSE file in the root of the source 6*fb1b10abSAndroid Build Coastguard Worker; tree. An additional intellectual property rights grant can be found 7*fb1b10abSAndroid Build Coastguard Worker; in the file PATENTS. All contributing project authors may 8*fb1b10abSAndroid Build Coastguard Worker; be found in the AUTHORS file in the root of the source tree. 9*fb1b10abSAndroid Build Coastguard Worker; 10*fb1b10abSAndroid Build Coastguard Worker 11*fb1b10abSAndroid Build Coastguard Worker 12*fb1b10abSAndroid Build Coastguard Worker%include "vpx_ports/x86_abi_support.asm" 13*fb1b10abSAndroid Build Coastguard Worker 14*fb1b10abSAndroid Build Coastguard Worker%define BLOCK_HEIGHT_WIDTH 4 15*fb1b10abSAndroid Build Coastguard Worker%define VP8_FILTER_WEIGHT 128 16*fb1b10abSAndroid Build Coastguard Worker%define VP8_FILTER_SHIFT 7 17*fb1b10abSAndroid Build Coastguard Worker 18*fb1b10abSAndroid Build Coastguard WorkerSECTION .text 19*fb1b10abSAndroid Build Coastguard Worker 20*fb1b10abSAndroid Build Coastguard Worker;/************************************************************************************ 21*fb1b10abSAndroid Build Coastguard Worker; Notes: filter_block1d_h6 applies a 6 tap filter horizontally to the input pixels. The 22*fb1b10abSAndroid Build Coastguard Worker; input pixel array has output_height rows. This routine assumes that output_height is an 23*fb1b10abSAndroid Build Coastguard Worker; even number. This function handles 8 pixels in horizontal direction, calculating ONE 24*fb1b10abSAndroid Build Coastguard Worker; rows each iteration to take advantage of the 128 bits operations. 25*fb1b10abSAndroid Build Coastguard Worker; 26*fb1b10abSAndroid Build Coastguard Worker; This is an implementation of some of the SSE optimizations first seen in ffvp8 27*fb1b10abSAndroid Build Coastguard Worker; 28*fb1b10abSAndroid Build Coastguard Worker;*************************************************************************************/ 29*fb1b10abSAndroid Build Coastguard Worker;void vp8_filter_block1d8_h6_ssse3 30*fb1b10abSAndroid Build Coastguard Worker;( 31*fb1b10abSAndroid Build Coastguard Worker; unsigned char *src_ptr, 32*fb1b10abSAndroid Build Coastguard Worker; unsigned int src_pixels_per_line, 33*fb1b10abSAndroid Build Coastguard Worker; unsigned char *output_ptr, 34*fb1b10abSAndroid Build Coastguard Worker; unsigned int output_pitch, 35*fb1b10abSAndroid Build Coastguard Worker; unsigned int output_height, 36*fb1b10abSAndroid Build Coastguard Worker; unsigned int vp8_filter_index 37*fb1b10abSAndroid Build Coastguard Worker;) 38*fb1b10abSAndroid Build Coastguard Workerglobalsym(vp8_filter_block1d8_h6_ssse3) 39*fb1b10abSAndroid Build Coastguard Workersym(vp8_filter_block1d8_h6_ssse3): 40*fb1b10abSAndroid Build Coastguard Worker push rbp 41*fb1b10abSAndroid Build Coastguard Worker mov rbp, rsp 42*fb1b10abSAndroid Build Coastguard Worker SHADOW_ARGS_TO_STACK 6 43*fb1b10abSAndroid Build Coastguard Worker SAVE_XMM 7 44*fb1b10abSAndroid Build Coastguard Worker GET_GOT rbx 45*fb1b10abSAndroid Build Coastguard Worker push rsi 46*fb1b10abSAndroid Build Coastguard Worker push rdi 47*fb1b10abSAndroid Build Coastguard Worker ; end prolog 48*fb1b10abSAndroid Build Coastguard Worker 49*fb1b10abSAndroid Build Coastguard Worker movsxd rdx, DWORD PTR arg(5) ;table index 50*fb1b10abSAndroid Build Coastguard Worker xor rsi, rsi 51*fb1b10abSAndroid Build Coastguard Worker shl rdx, 4 52*fb1b10abSAndroid Build Coastguard Worker 53*fb1b10abSAndroid Build Coastguard Worker movdqa xmm7, [GLOBAL(rd)] 54*fb1b10abSAndroid Build Coastguard Worker 55*fb1b10abSAndroid Build Coastguard Worker lea rax, [GLOBAL(k0_k5)] 56*fb1b10abSAndroid Build Coastguard Worker add rax, rdx 57*fb1b10abSAndroid Build Coastguard Worker mov rdi, arg(2) ;output_ptr 58*fb1b10abSAndroid Build Coastguard Worker 59*fb1b10abSAndroid Build Coastguard Worker cmp esi, DWORD PTR [rax] 60*fb1b10abSAndroid Build Coastguard Worker je vp8_filter_block1d8_h4_ssse3 61*fb1b10abSAndroid Build Coastguard Worker 62*fb1b10abSAndroid Build Coastguard Worker movdqa xmm4, XMMWORD PTR [rax] ;k0_k5 63*fb1b10abSAndroid Build Coastguard Worker movdqa xmm5, XMMWORD PTR [rax+256] ;k2_k4 64*fb1b10abSAndroid Build Coastguard Worker movdqa xmm6, XMMWORD PTR [rax+128] ;k1_k3 65*fb1b10abSAndroid Build Coastguard Worker 66*fb1b10abSAndroid Build Coastguard Worker mov rsi, arg(0) ;src_ptr 67*fb1b10abSAndroid Build Coastguard Worker movsxd rax, dword ptr arg(1) ;src_pixels_per_line 68*fb1b10abSAndroid Build Coastguard Worker movsxd rcx, dword ptr arg(4) ;output_height 69*fb1b10abSAndroid Build Coastguard Worker 70*fb1b10abSAndroid Build Coastguard Worker movsxd rdx, dword ptr arg(3) ;output_pitch 71*fb1b10abSAndroid Build Coastguard Worker 72*fb1b10abSAndroid Build Coastguard Worker sub rdi, rdx 73*fb1b10abSAndroid Build Coastguard Worker;xmm3 free 74*fb1b10abSAndroid Build Coastguard Worker.filter_block1d8_h6_rowloop_ssse3: 75*fb1b10abSAndroid Build Coastguard Worker movq xmm0, MMWORD PTR [rsi - 2] ; -2 -1 0 1 2 3 4 5 76*fb1b10abSAndroid Build Coastguard Worker 77*fb1b10abSAndroid Build Coastguard Worker movq xmm2, MMWORD PTR [rsi + 3] ; 3 4 5 6 7 8 9 10 78*fb1b10abSAndroid Build Coastguard Worker 79*fb1b10abSAndroid Build Coastguard Worker punpcklbw xmm0, xmm2 ; -2 3 -1 4 0 5 1 6 2 7 3 8 4 9 5 10 80*fb1b10abSAndroid Build Coastguard Worker 81*fb1b10abSAndroid Build Coastguard Worker movdqa xmm1, xmm0 82*fb1b10abSAndroid Build Coastguard Worker pmaddubsw xmm0, xmm4 83*fb1b10abSAndroid Build Coastguard Worker 84*fb1b10abSAndroid Build Coastguard Worker movdqa xmm2, xmm1 85*fb1b10abSAndroid Build Coastguard Worker pshufb xmm1, [GLOBAL(shuf2bfrom1)] 86*fb1b10abSAndroid Build Coastguard Worker 87*fb1b10abSAndroid Build Coastguard Worker pshufb xmm2, [GLOBAL(shuf3bfrom1)] 88*fb1b10abSAndroid Build Coastguard Worker pmaddubsw xmm1, xmm5 89*fb1b10abSAndroid Build Coastguard Worker 90*fb1b10abSAndroid Build Coastguard Worker lea rdi, [rdi + rdx] 91*fb1b10abSAndroid Build Coastguard Worker pmaddubsw xmm2, xmm6 92*fb1b10abSAndroid Build Coastguard Worker 93*fb1b10abSAndroid Build Coastguard Worker lea rsi, [rsi + rax] 94*fb1b10abSAndroid Build Coastguard Worker dec rcx 95*fb1b10abSAndroid Build Coastguard Worker 96*fb1b10abSAndroid Build Coastguard Worker paddsw xmm0, xmm1 97*fb1b10abSAndroid Build Coastguard Worker paddsw xmm2, xmm7 98*fb1b10abSAndroid Build Coastguard Worker 99*fb1b10abSAndroid Build Coastguard Worker paddsw xmm0, xmm2 100*fb1b10abSAndroid Build Coastguard Worker 101*fb1b10abSAndroid Build Coastguard Worker psraw xmm0, 7 102*fb1b10abSAndroid Build Coastguard Worker 103*fb1b10abSAndroid Build Coastguard Worker packuswb xmm0, xmm0 104*fb1b10abSAndroid Build Coastguard Worker 105*fb1b10abSAndroid Build Coastguard Worker movq MMWORD Ptr [rdi], xmm0 106*fb1b10abSAndroid Build Coastguard Worker jnz .filter_block1d8_h6_rowloop_ssse3 107*fb1b10abSAndroid Build Coastguard Worker 108*fb1b10abSAndroid Build Coastguard Worker ; begin epilog 109*fb1b10abSAndroid Build Coastguard Worker pop rdi 110*fb1b10abSAndroid Build Coastguard Worker pop rsi 111*fb1b10abSAndroid Build Coastguard Worker RESTORE_GOT 112*fb1b10abSAndroid Build Coastguard Worker RESTORE_XMM 113*fb1b10abSAndroid Build Coastguard Worker UNSHADOW_ARGS 114*fb1b10abSAndroid Build Coastguard Worker pop rbp 115*fb1b10abSAndroid Build Coastguard Worker ret 116*fb1b10abSAndroid Build Coastguard Worker 117*fb1b10abSAndroid Build Coastguard Workervp8_filter_block1d8_h4_ssse3: 118*fb1b10abSAndroid Build Coastguard Worker movdqa xmm5, XMMWORD PTR [rax+256] ;k2_k4 119*fb1b10abSAndroid Build Coastguard Worker movdqa xmm6, XMMWORD PTR [rax+128] ;k1_k3 120*fb1b10abSAndroid Build Coastguard Worker 121*fb1b10abSAndroid Build Coastguard Worker movdqa xmm3, XMMWORD PTR [GLOBAL(shuf2bfrom1)] 122*fb1b10abSAndroid Build Coastguard Worker movdqa xmm4, XMMWORD PTR [GLOBAL(shuf3bfrom1)] 123*fb1b10abSAndroid Build Coastguard Worker 124*fb1b10abSAndroid Build Coastguard Worker mov rsi, arg(0) ;src_ptr 125*fb1b10abSAndroid Build Coastguard Worker 126*fb1b10abSAndroid Build Coastguard Worker movsxd rax, dword ptr arg(1) ;src_pixels_per_line 127*fb1b10abSAndroid Build Coastguard Worker movsxd rcx, dword ptr arg(4) ;output_height 128*fb1b10abSAndroid Build Coastguard Worker 129*fb1b10abSAndroid Build Coastguard Worker movsxd rdx, dword ptr arg(3) ;output_pitch 130*fb1b10abSAndroid Build Coastguard Worker 131*fb1b10abSAndroid Build Coastguard Worker sub rdi, rdx 132*fb1b10abSAndroid Build Coastguard Worker 133*fb1b10abSAndroid Build Coastguard Worker.filter_block1d8_h4_rowloop_ssse3: 134*fb1b10abSAndroid Build Coastguard Worker movq xmm0, MMWORD PTR [rsi - 2] ; -2 -1 0 1 2 3 4 5 135*fb1b10abSAndroid Build Coastguard Worker 136*fb1b10abSAndroid Build Coastguard Worker movq xmm1, MMWORD PTR [rsi + 3] ; 3 4 5 6 7 8 9 10 137*fb1b10abSAndroid Build Coastguard Worker 138*fb1b10abSAndroid Build Coastguard Worker punpcklbw xmm0, xmm1 ; -2 3 -1 4 0 5 1 6 2 7 3 8 4 9 5 10 139*fb1b10abSAndroid Build Coastguard Worker 140*fb1b10abSAndroid Build Coastguard Worker movdqa xmm2, xmm0 141*fb1b10abSAndroid Build Coastguard Worker pshufb xmm0, xmm3 142*fb1b10abSAndroid Build Coastguard Worker 143*fb1b10abSAndroid Build Coastguard Worker pshufb xmm2, xmm4 144*fb1b10abSAndroid Build Coastguard Worker pmaddubsw xmm0, xmm5 145*fb1b10abSAndroid Build Coastguard Worker 146*fb1b10abSAndroid Build Coastguard Worker lea rdi, [rdi + rdx] 147*fb1b10abSAndroid Build Coastguard Worker pmaddubsw xmm2, xmm6 148*fb1b10abSAndroid Build Coastguard Worker 149*fb1b10abSAndroid Build Coastguard Worker lea rsi, [rsi + rax] 150*fb1b10abSAndroid Build Coastguard Worker dec rcx 151*fb1b10abSAndroid Build Coastguard Worker 152*fb1b10abSAndroid Build Coastguard Worker paddsw xmm0, xmm7 153*fb1b10abSAndroid Build Coastguard Worker 154*fb1b10abSAndroid Build Coastguard Worker paddsw xmm0, xmm2 155*fb1b10abSAndroid Build Coastguard Worker 156*fb1b10abSAndroid Build Coastguard Worker psraw xmm0, 7 157*fb1b10abSAndroid Build Coastguard Worker 158*fb1b10abSAndroid Build Coastguard Worker packuswb xmm0, xmm0 159*fb1b10abSAndroid Build Coastguard Worker 160*fb1b10abSAndroid Build Coastguard Worker movq MMWORD Ptr [rdi], xmm0 161*fb1b10abSAndroid Build Coastguard Worker 162*fb1b10abSAndroid Build Coastguard Worker jnz .filter_block1d8_h4_rowloop_ssse3 163*fb1b10abSAndroid Build Coastguard Worker 164*fb1b10abSAndroid Build Coastguard Worker ; begin epilog 165*fb1b10abSAndroid Build Coastguard Worker pop rdi 166*fb1b10abSAndroid Build Coastguard Worker pop rsi 167*fb1b10abSAndroid Build Coastguard Worker RESTORE_GOT 168*fb1b10abSAndroid Build Coastguard Worker RESTORE_XMM 169*fb1b10abSAndroid Build Coastguard Worker UNSHADOW_ARGS 170*fb1b10abSAndroid Build Coastguard Worker pop rbp 171*fb1b10abSAndroid Build Coastguard Worker ret 172*fb1b10abSAndroid Build Coastguard Worker;void vp8_filter_block1d16_h6_ssse3 173*fb1b10abSAndroid Build Coastguard Worker;( 174*fb1b10abSAndroid Build Coastguard Worker; unsigned char *src_ptr, 175*fb1b10abSAndroid Build Coastguard Worker; unsigned int src_pixels_per_line, 176*fb1b10abSAndroid Build Coastguard Worker; unsigned char *output_ptr, 177*fb1b10abSAndroid Build Coastguard Worker; unsigned int output_pitch, 178*fb1b10abSAndroid Build Coastguard Worker; unsigned int output_height, 179*fb1b10abSAndroid Build Coastguard Worker; unsigned int vp8_filter_index 180*fb1b10abSAndroid Build Coastguard Worker;) 181*fb1b10abSAndroid Build Coastguard Workerglobalsym(vp8_filter_block1d16_h6_ssse3) 182*fb1b10abSAndroid Build Coastguard Workersym(vp8_filter_block1d16_h6_ssse3): 183*fb1b10abSAndroid Build Coastguard Worker push rbp 184*fb1b10abSAndroid Build Coastguard Worker mov rbp, rsp 185*fb1b10abSAndroid Build Coastguard Worker SHADOW_ARGS_TO_STACK 6 186*fb1b10abSAndroid Build Coastguard Worker SAVE_XMM 7 187*fb1b10abSAndroid Build Coastguard Worker GET_GOT rbx 188*fb1b10abSAndroid Build Coastguard Worker push rsi 189*fb1b10abSAndroid Build Coastguard Worker push rdi 190*fb1b10abSAndroid Build Coastguard Worker ; end prolog 191*fb1b10abSAndroid Build Coastguard Worker 192*fb1b10abSAndroid Build Coastguard Worker movsxd rdx, DWORD PTR arg(5) ;table index 193*fb1b10abSAndroid Build Coastguard Worker xor rsi, rsi 194*fb1b10abSAndroid Build Coastguard Worker shl rdx, 4 ; 195*fb1b10abSAndroid Build Coastguard Worker 196*fb1b10abSAndroid Build Coastguard Worker lea rax, [GLOBAL(k0_k5)] 197*fb1b10abSAndroid Build Coastguard Worker add rax, rdx 198*fb1b10abSAndroid Build Coastguard Worker 199*fb1b10abSAndroid Build Coastguard Worker mov rdi, arg(2) ;output_ptr 200*fb1b10abSAndroid Build Coastguard Worker 201*fb1b10abSAndroid Build Coastguard Worker mov rsi, arg(0) ;src_ptr 202*fb1b10abSAndroid Build Coastguard Worker 203*fb1b10abSAndroid Build Coastguard Worker movdqa xmm4, XMMWORD PTR [rax] ;k0_k5 204*fb1b10abSAndroid Build Coastguard Worker movdqa xmm5, XMMWORD PTR [rax+256] ;k2_k4 205*fb1b10abSAndroid Build Coastguard Worker movdqa xmm6, XMMWORD PTR [rax+128] ;k1_k3 206*fb1b10abSAndroid Build Coastguard Worker 207*fb1b10abSAndroid Build Coastguard Worker movsxd rax, dword ptr arg(1) ;src_pixels_per_line 208*fb1b10abSAndroid Build Coastguard Worker movsxd rcx, dword ptr arg(4) ;output_height 209*fb1b10abSAndroid Build Coastguard Worker movsxd rdx, dword ptr arg(3) ;output_pitch 210*fb1b10abSAndroid Build Coastguard Worker 211*fb1b10abSAndroid Build Coastguard Worker.filter_block1d16_h6_rowloop_ssse3: 212*fb1b10abSAndroid Build Coastguard Worker movq xmm0, MMWORD PTR [rsi - 2] ; -2 -1 0 1 2 3 4 5 213*fb1b10abSAndroid Build Coastguard Worker 214*fb1b10abSAndroid Build Coastguard Worker movq xmm3, MMWORD PTR [rsi + 3] ; 3 4 5 6 7 8 9 10 215*fb1b10abSAndroid Build Coastguard Worker 216*fb1b10abSAndroid Build Coastguard Worker punpcklbw xmm0, xmm3 ; -2 3 -1 4 0 5 1 6 2 7 3 8 4 9 5 10 217*fb1b10abSAndroid Build Coastguard Worker 218*fb1b10abSAndroid Build Coastguard Worker movdqa xmm1, xmm0 219*fb1b10abSAndroid Build Coastguard Worker pmaddubsw xmm0, xmm4 220*fb1b10abSAndroid Build Coastguard Worker 221*fb1b10abSAndroid Build Coastguard Worker movdqa xmm2, xmm1 222*fb1b10abSAndroid Build Coastguard Worker pshufb xmm1, [GLOBAL(shuf2bfrom1)] 223*fb1b10abSAndroid Build Coastguard Worker 224*fb1b10abSAndroid Build Coastguard Worker pshufb xmm2, [GLOBAL(shuf3bfrom1)] 225*fb1b10abSAndroid Build Coastguard Worker movq xmm3, MMWORD PTR [rsi + 6] 226*fb1b10abSAndroid Build Coastguard Worker 227*fb1b10abSAndroid Build Coastguard Worker pmaddubsw xmm1, xmm5 228*fb1b10abSAndroid Build Coastguard Worker movq xmm7, MMWORD PTR [rsi + 11] 229*fb1b10abSAndroid Build Coastguard Worker 230*fb1b10abSAndroid Build Coastguard Worker pmaddubsw xmm2, xmm6 231*fb1b10abSAndroid Build Coastguard Worker punpcklbw xmm3, xmm7 232*fb1b10abSAndroid Build Coastguard Worker 233*fb1b10abSAndroid Build Coastguard Worker paddsw xmm0, xmm1 234*fb1b10abSAndroid Build Coastguard Worker movdqa xmm1, xmm3 235*fb1b10abSAndroid Build Coastguard Worker 236*fb1b10abSAndroid Build Coastguard Worker pmaddubsw xmm3, xmm4 237*fb1b10abSAndroid Build Coastguard Worker paddsw xmm0, xmm2 238*fb1b10abSAndroid Build Coastguard Worker 239*fb1b10abSAndroid Build Coastguard Worker movdqa xmm2, xmm1 240*fb1b10abSAndroid Build Coastguard Worker paddsw xmm0, [GLOBAL(rd)] 241*fb1b10abSAndroid Build Coastguard Worker 242*fb1b10abSAndroid Build Coastguard Worker pshufb xmm1, [GLOBAL(shuf2bfrom1)] 243*fb1b10abSAndroid Build Coastguard Worker pshufb xmm2, [GLOBAL(shuf3bfrom1)] 244*fb1b10abSAndroid Build Coastguard Worker 245*fb1b10abSAndroid Build Coastguard Worker psraw xmm0, 7 246*fb1b10abSAndroid Build Coastguard Worker pmaddubsw xmm1, xmm5 247*fb1b10abSAndroid Build Coastguard Worker 248*fb1b10abSAndroid Build Coastguard Worker pmaddubsw xmm2, xmm6 249*fb1b10abSAndroid Build Coastguard Worker packuswb xmm0, xmm0 250*fb1b10abSAndroid Build Coastguard Worker 251*fb1b10abSAndroid Build Coastguard Worker lea rsi, [rsi + rax] 252*fb1b10abSAndroid Build Coastguard Worker paddsw xmm3, xmm1 253*fb1b10abSAndroid Build Coastguard Worker 254*fb1b10abSAndroid Build Coastguard Worker paddsw xmm3, xmm2 255*fb1b10abSAndroid Build Coastguard Worker 256*fb1b10abSAndroid Build Coastguard Worker paddsw xmm3, [GLOBAL(rd)] 257*fb1b10abSAndroid Build Coastguard Worker 258*fb1b10abSAndroid Build Coastguard Worker psraw xmm3, 7 259*fb1b10abSAndroid Build Coastguard Worker 260*fb1b10abSAndroid Build Coastguard Worker packuswb xmm3, xmm3 261*fb1b10abSAndroid Build Coastguard Worker 262*fb1b10abSAndroid Build Coastguard Worker punpcklqdq xmm0, xmm3 263*fb1b10abSAndroid Build Coastguard Worker 264*fb1b10abSAndroid Build Coastguard Worker movdqa XMMWORD Ptr [rdi], xmm0 265*fb1b10abSAndroid Build Coastguard Worker 266*fb1b10abSAndroid Build Coastguard Worker lea rdi, [rdi + rdx] 267*fb1b10abSAndroid Build Coastguard Worker dec rcx 268*fb1b10abSAndroid Build Coastguard Worker jnz .filter_block1d16_h6_rowloop_ssse3 269*fb1b10abSAndroid Build Coastguard Worker 270*fb1b10abSAndroid Build Coastguard Worker ; begin epilog 271*fb1b10abSAndroid Build Coastguard Worker pop rdi 272*fb1b10abSAndroid Build Coastguard Worker pop rsi 273*fb1b10abSAndroid Build Coastguard Worker RESTORE_GOT 274*fb1b10abSAndroid Build Coastguard Worker RESTORE_XMM 275*fb1b10abSAndroid Build Coastguard Worker UNSHADOW_ARGS 276*fb1b10abSAndroid Build Coastguard Worker pop rbp 277*fb1b10abSAndroid Build Coastguard Worker ret 278*fb1b10abSAndroid Build Coastguard Worker 279*fb1b10abSAndroid Build Coastguard Worker;void vp8_filter_block1d4_h6_ssse3 280*fb1b10abSAndroid Build Coastguard Worker;( 281*fb1b10abSAndroid Build Coastguard Worker; unsigned char *src_ptr, 282*fb1b10abSAndroid Build Coastguard Worker; unsigned int src_pixels_per_line, 283*fb1b10abSAndroid Build Coastguard Worker; unsigned char *output_ptr, 284*fb1b10abSAndroid Build Coastguard Worker; unsigned int output_pitch, 285*fb1b10abSAndroid Build Coastguard Worker; unsigned int output_height, 286*fb1b10abSAndroid Build Coastguard Worker; unsigned int vp8_filter_index 287*fb1b10abSAndroid Build Coastguard Worker;) 288*fb1b10abSAndroid Build Coastguard Workerglobalsym(vp8_filter_block1d4_h6_ssse3) 289*fb1b10abSAndroid Build Coastguard Workersym(vp8_filter_block1d4_h6_ssse3): 290*fb1b10abSAndroid Build Coastguard Worker push rbp 291*fb1b10abSAndroid Build Coastguard Worker mov rbp, rsp 292*fb1b10abSAndroid Build Coastguard Worker SHADOW_ARGS_TO_STACK 6 293*fb1b10abSAndroid Build Coastguard Worker SAVE_XMM 7 294*fb1b10abSAndroid Build Coastguard Worker GET_GOT rbx 295*fb1b10abSAndroid Build Coastguard Worker push rsi 296*fb1b10abSAndroid Build Coastguard Worker push rdi 297*fb1b10abSAndroid Build Coastguard Worker ; end prolog 298*fb1b10abSAndroid Build Coastguard Worker 299*fb1b10abSAndroid Build Coastguard Worker movsxd rdx, DWORD PTR arg(5) ;table index 300*fb1b10abSAndroid Build Coastguard Worker xor rsi, rsi 301*fb1b10abSAndroid Build Coastguard Worker shl rdx, 4 ; 302*fb1b10abSAndroid Build Coastguard Worker 303*fb1b10abSAndroid Build Coastguard Worker lea rax, [GLOBAL(k0_k5)] 304*fb1b10abSAndroid Build Coastguard Worker add rax, rdx 305*fb1b10abSAndroid Build Coastguard Worker movdqa xmm7, [GLOBAL(rd)] 306*fb1b10abSAndroid Build Coastguard Worker 307*fb1b10abSAndroid Build Coastguard Worker cmp esi, DWORD PTR [rax] 308*fb1b10abSAndroid Build Coastguard Worker je .vp8_filter_block1d4_h4_ssse3 309*fb1b10abSAndroid Build Coastguard Worker 310*fb1b10abSAndroid Build Coastguard Worker movdqa xmm4, XMMWORD PTR [rax] ;k0_k5 311*fb1b10abSAndroid Build Coastguard Worker movdqa xmm5, XMMWORD PTR [rax+256] ;k2_k4 312*fb1b10abSAndroid Build Coastguard Worker movdqa xmm6, XMMWORD PTR [rax+128] ;k1_k3 313*fb1b10abSAndroid Build Coastguard Worker 314*fb1b10abSAndroid Build Coastguard Worker mov rsi, arg(0) ;src_ptr 315*fb1b10abSAndroid Build Coastguard Worker mov rdi, arg(2) ;output_ptr 316*fb1b10abSAndroid Build Coastguard Worker movsxd rax, dword ptr arg(1) ;src_pixels_per_line 317*fb1b10abSAndroid Build Coastguard Worker movsxd rcx, dword ptr arg(4) ;output_height 318*fb1b10abSAndroid Build Coastguard Worker 319*fb1b10abSAndroid Build Coastguard Worker movsxd rdx, dword ptr arg(3) ;output_pitch 320*fb1b10abSAndroid Build Coastguard Worker 321*fb1b10abSAndroid Build Coastguard Worker;xmm3 free 322*fb1b10abSAndroid Build Coastguard Worker.filter_block1d4_h6_rowloop_ssse3: 323*fb1b10abSAndroid Build Coastguard Worker movdqu xmm0, XMMWORD PTR [rsi - 2] 324*fb1b10abSAndroid Build Coastguard Worker 325*fb1b10abSAndroid Build Coastguard Worker movdqa xmm1, xmm0 326*fb1b10abSAndroid Build Coastguard Worker pshufb xmm0, [GLOBAL(shuf1b)] 327*fb1b10abSAndroid Build Coastguard Worker 328*fb1b10abSAndroid Build Coastguard Worker movdqa xmm2, xmm1 329*fb1b10abSAndroid Build Coastguard Worker pshufb xmm1, [GLOBAL(shuf2b)] 330*fb1b10abSAndroid Build Coastguard Worker pmaddubsw xmm0, xmm4 331*fb1b10abSAndroid Build Coastguard Worker pshufb xmm2, [GLOBAL(shuf3b)] 332*fb1b10abSAndroid Build Coastguard Worker pmaddubsw xmm1, xmm5 333*fb1b10abSAndroid Build Coastguard Worker 334*fb1b10abSAndroid Build Coastguard Worker;-- 335*fb1b10abSAndroid Build Coastguard Worker pmaddubsw xmm2, xmm6 336*fb1b10abSAndroid Build Coastguard Worker 337*fb1b10abSAndroid Build Coastguard Worker lea rsi, [rsi + rax] 338*fb1b10abSAndroid Build Coastguard Worker;-- 339*fb1b10abSAndroid Build Coastguard Worker paddsw xmm0, xmm1 340*fb1b10abSAndroid Build Coastguard Worker paddsw xmm0, xmm7 341*fb1b10abSAndroid Build Coastguard Worker pxor xmm1, xmm1 342*fb1b10abSAndroid Build Coastguard Worker paddsw xmm0, xmm2 343*fb1b10abSAndroid Build Coastguard Worker psraw xmm0, 7 344*fb1b10abSAndroid Build Coastguard Worker packuswb xmm0, xmm0 345*fb1b10abSAndroid Build Coastguard Worker 346*fb1b10abSAndroid Build Coastguard Worker movd DWORD PTR [rdi], xmm0 347*fb1b10abSAndroid Build Coastguard Worker 348*fb1b10abSAndroid Build Coastguard Worker add rdi, rdx 349*fb1b10abSAndroid Build Coastguard Worker dec rcx 350*fb1b10abSAndroid Build Coastguard Worker jnz .filter_block1d4_h6_rowloop_ssse3 351*fb1b10abSAndroid Build Coastguard Worker 352*fb1b10abSAndroid Build Coastguard Worker ; begin epilog 353*fb1b10abSAndroid Build Coastguard Worker pop rdi 354*fb1b10abSAndroid Build Coastguard Worker pop rsi 355*fb1b10abSAndroid Build Coastguard Worker RESTORE_GOT 356*fb1b10abSAndroid Build Coastguard Worker RESTORE_XMM 357*fb1b10abSAndroid Build Coastguard Worker UNSHADOW_ARGS 358*fb1b10abSAndroid Build Coastguard Worker pop rbp 359*fb1b10abSAndroid Build Coastguard Worker ret 360*fb1b10abSAndroid Build Coastguard Worker 361*fb1b10abSAndroid Build Coastguard Worker.vp8_filter_block1d4_h4_ssse3: 362*fb1b10abSAndroid Build Coastguard Worker movdqa xmm5, XMMWORD PTR [rax+256] ;k2_k4 363*fb1b10abSAndroid Build Coastguard Worker movdqa xmm6, XMMWORD PTR [rax+128] ;k1_k3 364*fb1b10abSAndroid Build Coastguard Worker movdqa xmm0, XMMWORD PTR [GLOBAL(shuf2b)] 365*fb1b10abSAndroid Build Coastguard Worker movdqa xmm3, XMMWORD PTR [GLOBAL(shuf3b)] 366*fb1b10abSAndroid Build Coastguard Worker 367*fb1b10abSAndroid Build Coastguard Worker mov rsi, arg(0) ;src_ptr 368*fb1b10abSAndroid Build Coastguard Worker mov rdi, arg(2) ;output_ptr 369*fb1b10abSAndroid Build Coastguard Worker movsxd rax, dword ptr arg(1) ;src_pixels_per_line 370*fb1b10abSAndroid Build Coastguard Worker movsxd rcx, dword ptr arg(4) ;output_height 371*fb1b10abSAndroid Build Coastguard Worker 372*fb1b10abSAndroid Build Coastguard Worker movsxd rdx, dword ptr arg(3) ;output_pitch 373*fb1b10abSAndroid Build Coastguard Worker 374*fb1b10abSAndroid Build Coastguard Worker.filter_block1d4_h4_rowloop_ssse3: 375*fb1b10abSAndroid Build Coastguard Worker movdqu xmm1, XMMWORD PTR [rsi - 2] 376*fb1b10abSAndroid Build Coastguard Worker 377*fb1b10abSAndroid Build Coastguard Worker movdqa xmm2, xmm1 378*fb1b10abSAndroid Build Coastguard Worker pshufb xmm1, xmm0 ;;[GLOBAL(shuf2b)] 379*fb1b10abSAndroid Build Coastguard Worker pshufb xmm2, xmm3 ;;[GLOBAL(shuf3b)] 380*fb1b10abSAndroid Build Coastguard Worker pmaddubsw xmm1, xmm5 381*fb1b10abSAndroid Build Coastguard Worker 382*fb1b10abSAndroid Build Coastguard Worker;-- 383*fb1b10abSAndroid Build Coastguard Worker pmaddubsw xmm2, xmm6 384*fb1b10abSAndroid Build Coastguard Worker 385*fb1b10abSAndroid Build Coastguard Worker lea rsi, [rsi + rax] 386*fb1b10abSAndroid Build Coastguard Worker;-- 387*fb1b10abSAndroid Build Coastguard Worker paddsw xmm1, xmm7 388*fb1b10abSAndroid Build Coastguard Worker paddsw xmm1, xmm2 389*fb1b10abSAndroid Build Coastguard Worker psraw xmm1, 7 390*fb1b10abSAndroid Build Coastguard Worker packuswb xmm1, xmm1 391*fb1b10abSAndroid Build Coastguard Worker 392*fb1b10abSAndroid Build Coastguard Worker movd DWORD PTR [rdi], xmm1 393*fb1b10abSAndroid Build Coastguard Worker 394*fb1b10abSAndroid Build Coastguard Worker add rdi, rdx 395*fb1b10abSAndroid Build Coastguard Worker dec rcx 396*fb1b10abSAndroid Build Coastguard Worker jnz .filter_block1d4_h4_rowloop_ssse3 397*fb1b10abSAndroid Build Coastguard Worker 398*fb1b10abSAndroid Build Coastguard Worker ; begin epilog 399*fb1b10abSAndroid Build Coastguard Worker pop rdi 400*fb1b10abSAndroid Build Coastguard Worker pop rsi 401*fb1b10abSAndroid Build Coastguard Worker RESTORE_GOT 402*fb1b10abSAndroid Build Coastguard Worker RESTORE_XMM 403*fb1b10abSAndroid Build Coastguard Worker UNSHADOW_ARGS 404*fb1b10abSAndroid Build Coastguard Worker pop rbp 405*fb1b10abSAndroid Build Coastguard Worker ret 406*fb1b10abSAndroid Build Coastguard Worker 407*fb1b10abSAndroid Build Coastguard Worker 408*fb1b10abSAndroid Build Coastguard Worker 409*fb1b10abSAndroid Build Coastguard Worker;void vp8_filter_block1d16_v6_ssse3 410*fb1b10abSAndroid Build Coastguard Worker;( 411*fb1b10abSAndroid Build Coastguard Worker; unsigned char *src_ptr, 412*fb1b10abSAndroid Build Coastguard Worker; unsigned int src_pitch, 413*fb1b10abSAndroid Build Coastguard Worker; unsigned char *output_ptr, 414*fb1b10abSAndroid Build Coastguard Worker; unsigned int out_pitch, 415*fb1b10abSAndroid Build Coastguard Worker; unsigned int output_height, 416*fb1b10abSAndroid Build Coastguard Worker; unsigned int vp8_filter_index 417*fb1b10abSAndroid Build Coastguard Worker;) 418*fb1b10abSAndroid Build Coastguard Workerglobalsym(vp8_filter_block1d16_v6_ssse3) 419*fb1b10abSAndroid Build Coastguard Workersym(vp8_filter_block1d16_v6_ssse3): 420*fb1b10abSAndroid Build Coastguard Worker push rbp 421*fb1b10abSAndroid Build Coastguard Worker mov rbp, rsp 422*fb1b10abSAndroid Build Coastguard Worker SHADOW_ARGS_TO_STACK 6 423*fb1b10abSAndroid Build Coastguard Worker SAVE_XMM 7 424*fb1b10abSAndroid Build Coastguard Worker GET_GOT rbx 425*fb1b10abSAndroid Build Coastguard Worker push rsi 426*fb1b10abSAndroid Build Coastguard Worker push rdi 427*fb1b10abSAndroid Build Coastguard Worker ; end prolog 428*fb1b10abSAndroid Build Coastguard Worker 429*fb1b10abSAndroid Build Coastguard Worker movsxd rdx, DWORD PTR arg(5) ;table index 430*fb1b10abSAndroid Build Coastguard Worker xor rsi, rsi 431*fb1b10abSAndroid Build Coastguard Worker shl rdx, 4 ; 432*fb1b10abSAndroid Build Coastguard Worker 433*fb1b10abSAndroid Build Coastguard Worker lea rax, [GLOBAL(k0_k5)] 434*fb1b10abSAndroid Build Coastguard Worker add rax, rdx 435*fb1b10abSAndroid Build Coastguard Worker 436*fb1b10abSAndroid Build Coastguard Worker cmp esi, DWORD PTR [rax] 437*fb1b10abSAndroid Build Coastguard Worker je .vp8_filter_block1d16_v4_ssse3 438*fb1b10abSAndroid Build Coastguard Worker 439*fb1b10abSAndroid Build Coastguard Worker movdqa xmm5, XMMWORD PTR [rax] ;k0_k5 440*fb1b10abSAndroid Build Coastguard Worker movdqa xmm6, XMMWORD PTR [rax+256] ;k2_k4 441*fb1b10abSAndroid Build Coastguard Worker movdqa xmm7, XMMWORD PTR [rax+128] ;k1_k3 442*fb1b10abSAndroid Build Coastguard Worker 443*fb1b10abSAndroid Build Coastguard Worker mov rsi, arg(0) ;src_ptr 444*fb1b10abSAndroid Build Coastguard Worker movsxd rdx, DWORD PTR arg(1) ;pixels_per_line 445*fb1b10abSAndroid Build Coastguard Worker mov rdi, arg(2) ;output_ptr 446*fb1b10abSAndroid Build Coastguard Worker 447*fb1b10abSAndroid Build Coastguard Worker%if ABI_IS_32BIT=0 448*fb1b10abSAndroid Build Coastguard Worker movsxd r8, DWORD PTR arg(3) ;out_pitch 449*fb1b10abSAndroid Build Coastguard Worker%endif 450*fb1b10abSAndroid Build Coastguard Worker mov rax, rsi 451*fb1b10abSAndroid Build Coastguard Worker movsxd rcx, DWORD PTR arg(4) ;output_height 452*fb1b10abSAndroid Build Coastguard Worker add rax, rdx 453*fb1b10abSAndroid Build Coastguard Worker 454*fb1b10abSAndroid Build Coastguard Worker 455*fb1b10abSAndroid Build Coastguard Worker.vp8_filter_block1d16_v6_ssse3_loop: 456*fb1b10abSAndroid Build Coastguard Worker movq xmm1, MMWORD PTR [rsi] ;A 457*fb1b10abSAndroid Build Coastguard Worker movq xmm2, MMWORD PTR [rsi + rdx] ;B 458*fb1b10abSAndroid Build Coastguard Worker movq xmm3, MMWORD PTR [rsi + rdx * 2] ;C 459*fb1b10abSAndroid Build Coastguard Worker movq xmm4, MMWORD PTR [rax + rdx * 2] ;D 460*fb1b10abSAndroid Build Coastguard Worker movq xmm0, MMWORD PTR [rsi + rdx * 4] ;E 461*fb1b10abSAndroid Build Coastguard Worker 462*fb1b10abSAndroid Build Coastguard Worker punpcklbw xmm2, xmm4 ;B D 463*fb1b10abSAndroid Build Coastguard Worker punpcklbw xmm3, xmm0 ;C E 464*fb1b10abSAndroid Build Coastguard Worker 465*fb1b10abSAndroid Build Coastguard Worker movq xmm0, MMWORD PTR [rax + rdx * 4] ;F 466*fb1b10abSAndroid Build Coastguard Worker 467*fb1b10abSAndroid Build Coastguard Worker pmaddubsw xmm3, xmm6 468*fb1b10abSAndroid Build Coastguard Worker punpcklbw xmm1, xmm0 ;A F 469*fb1b10abSAndroid Build Coastguard Worker pmaddubsw xmm2, xmm7 470*fb1b10abSAndroid Build Coastguard Worker pmaddubsw xmm1, xmm5 471*fb1b10abSAndroid Build Coastguard Worker 472*fb1b10abSAndroid Build Coastguard Worker paddsw xmm2, xmm3 473*fb1b10abSAndroid Build Coastguard Worker paddsw xmm2, xmm1 474*fb1b10abSAndroid Build Coastguard Worker paddsw xmm2, [GLOBAL(rd)] 475*fb1b10abSAndroid Build Coastguard Worker psraw xmm2, 7 476*fb1b10abSAndroid Build Coastguard Worker packuswb xmm2, xmm2 477*fb1b10abSAndroid Build Coastguard Worker 478*fb1b10abSAndroid Build Coastguard Worker movq MMWORD PTR [rdi], xmm2 ;store the results 479*fb1b10abSAndroid Build Coastguard Worker 480*fb1b10abSAndroid Build Coastguard Worker movq xmm1, MMWORD PTR [rsi + 8] ;A 481*fb1b10abSAndroid Build Coastguard Worker movq xmm2, MMWORD PTR [rsi + rdx + 8] ;B 482*fb1b10abSAndroid Build Coastguard Worker movq xmm3, MMWORD PTR [rsi + rdx * 2 + 8] ;C 483*fb1b10abSAndroid Build Coastguard Worker movq xmm4, MMWORD PTR [rax + rdx * 2 + 8] ;D 484*fb1b10abSAndroid Build Coastguard Worker movq xmm0, MMWORD PTR [rsi + rdx * 4 + 8] ;E 485*fb1b10abSAndroid Build Coastguard Worker 486*fb1b10abSAndroid Build Coastguard Worker punpcklbw xmm2, xmm4 ;B D 487*fb1b10abSAndroid Build Coastguard Worker punpcklbw xmm3, xmm0 ;C E 488*fb1b10abSAndroid Build Coastguard Worker 489*fb1b10abSAndroid Build Coastguard Worker movq xmm0, MMWORD PTR [rax + rdx * 4 + 8] ;F 490*fb1b10abSAndroid Build Coastguard Worker pmaddubsw xmm3, xmm6 491*fb1b10abSAndroid Build Coastguard Worker punpcklbw xmm1, xmm0 ;A F 492*fb1b10abSAndroid Build Coastguard Worker pmaddubsw xmm2, xmm7 493*fb1b10abSAndroid Build Coastguard Worker pmaddubsw xmm1, xmm5 494*fb1b10abSAndroid Build Coastguard Worker 495*fb1b10abSAndroid Build Coastguard Worker add rsi, rdx 496*fb1b10abSAndroid Build Coastguard Worker add rax, rdx 497*fb1b10abSAndroid Build Coastguard Worker;-- 498*fb1b10abSAndroid Build Coastguard Worker;-- 499*fb1b10abSAndroid Build Coastguard Worker paddsw xmm2, xmm3 500*fb1b10abSAndroid Build Coastguard Worker paddsw xmm2, xmm1 501*fb1b10abSAndroid Build Coastguard Worker paddsw xmm2, [GLOBAL(rd)] 502*fb1b10abSAndroid Build Coastguard Worker psraw xmm2, 7 503*fb1b10abSAndroid Build Coastguard Worker packuswb xmm2, xmm2 504*fb1b10abSAndroid Build Coastguard Worker 505*fb1b10abSAndroid Build Coastguard Worker movq MMWORD PTR [rdi+8], xmm2 506*fb1b10abSAndroid Build Coastguard Worker 507*fb1b10abSAndroid Build Coastguard Worker%if ABI_IS_32BIT 508*fb1b10abSAndroid Build Coastguard Worker add rdi, DWORD PTR arg(3) ;out_pitch 509*fb1b10abSAndroid Build Coastguard Worker%else 510*fb1b10abSAndroid Build Coastguard Worker add rdi, r8 511*fb1b10abSAndroid Build Coastguard Worker%endif 512*fb1b10abSAndroid Build Coastguard Worker dec rcx 513*fb1b10abSAndroid Build Coastguard Worker jnz .vp8_filter_block1d16_v6_ssse3_loop 514*fb1b10abSAndroid Build Coastguard Worker 515*fb1b10abSAndroid Build Coastguard Worker ; begin epilog 516*fb1b10abSAndroid Build Coastguard Worker pop rdi 517*fb1b10abSAndroid Build Coastguard Worker pop rsi 518*fb1b10abSAndroid Build Coastguard Worker RESTORE_GOT 519*fb1b10abSAndroid Build Coastguard Worker RESTORE_XMM 520*fb1b10abSAndroid Build Coastguard Worker UNSHADOW_ARGS 521*fb1b10abSAndroid Build Coastguard Worker pop rbp 522*fb1b10abSAndroid Build Coastguard Worker ret 523*fb1b10abSAndroid Build Coastguard Worker 524*fb1b10abSAndroid Build Coastguard Worker.vp8_filter_block1d16_v4_ssse3: 525*fb1b10abSAndroid Build Coastguard Worker movdqa xmm6, XMMWORD PTR [rax+256] ;k2_k4 526*fb1b10abSAndroid Build Coastguard Worker movdqa xmm7, XMMWORD PTR [rax+128] ;k1_k3 527*fb1b10abSAndroid Build Coastguard Worker 528*fb1b10abSAndroid Build Coastguard Worker mov rsi, arg(0) ;src_ptr 529*fb1b10abSAndroid Build Coastguard Worker movsxd rdx, DWORD PTR arg(1) ;pixels_per_line 530*fb1b10abSAndroid Build Coastguard Worker mov rdi, arg(2) ;output_ptr 531*fb1b10abSAndroid Build Coastguard Worker 532*fb1b10abSAndroid Build Coastguard Worker%if ABI_IS_32BIT=0 533*fb1b10abSAndroid Build Coastguard Worker movsxd r8, DWORD PTR arg(3) ;out_pitch 534*fb1b10abSAndroid Build Coastguard Worker%endif 535*fb1b10abSAndroid Build Coastguard Worker mov rax, rsi 536*fb1b10abSAndroid Build Coastguard Worker movsxd rcx, DWORD PTR arg(4) ;output_height 537*fb1b10abSAndroid Build Coastguard Worker add rax, rdx 538*fb1b10abSAndroid Build Coastguard Worker 539*fb1b10abSAndroid Build Coastguard Worker.vp8_filter_block1d16_v4_ssse3_loop: 540*fb1b10abSAndroid Build Coastguard Worker movq xmm2, MMWORD PTR [rsi + rdx] ;B 541*fb1b10abSAndroid Build Coastguard Worker movq xmm3, MMWORD PTR [rsi + rdx * 2] ;C 542*fb1b10abSAndroid Build Coastguard Worker movq xmm4, MMWORD PTR [rax + rdx * 2] ;D 543*fb1b10abSAndroid Build Coastguard Worker movq xmm0, MMWORD PTR [rsi + rdx * 4] ;E 544*fb1b10abSAndroid Build Coastguard Worker 545*fb1b10abSAndroid Build Coastguard Worker punpcklbw xmm2, xmm4 ;B D 546*fb1b10abSAndroid Build Coastguard Worker punpcklbw xmm3, xmm0 ;C E 547*fb1b10abSAndroid Build Coastguard Worker 548*fb1b10abSAndroid Build Coastguard Worker pmaddubsw xmm3, xmm6 549*fb1b10abSAndroid Build Coastguard Worker pmaddubsw xmm2, xmm7 550*fb1b10abSAndroid Build Coastguard Worker movq xmm5, MMWORD PTR [rsi + rdx + 8] ;B 551*fb1b10abSAndroid Build Coastguard Worker movq xmm1, MMWORD PTR [rsi + rdx * 2 + 8] ;C 552*fb1b10abSAndroid Build Coastguard Worker movq xmm4, MMWORD PTR [rax + rdx * 2 + 8] ;D 553*fb1b10abSAndroid Build Coastguard Worker movq xmm0, MMWORD PTR [rsi + rdx * 4 + 8] ;E 554*fb1b10abSAndroid Build Coastguard Worker 555*fb1b10abSAndroid Build Coastguard Worker paddsw xmm2, [GLOBAL(rd)] 556*fb1b10abSAndroid Build Coastguard Worker paddsw xmm2, xmm3 557*fb1b10abSAndroid Build Coastguard Worker psraw xmm2, 7 558*fb1b10abSAndroid Build Coastguard Worker packuswb xmm2, xmm2 559*fb1b10abSAndroid Build Coastguard Worker 560*fb1b10abSAndroid Build Coastguard Worker punpcklbw xmm5, xmm4 ;B D 561*fb1b10abSAndroid Build Coastguard Worker punpcklbw xmm1, xmm0 ;C E 562*fb1b10abSAndroid Build Coastguard Worker 563*fb1b10abSAndroid Build Coastguard Worker pmaddubsw xmm1, xmm6 564*fb1b10abSAndroid Build Coastguard Worker pmaddubsw xmm5, xmm7 565*fb1b10abSAndroid Build Coastguard Worker 566*fb1b10abSAndroid Build Coastguard Worker movdqa xmm4, [GLOBAL(rd)] 567*fb1b10abSAndroid Build Coastguard Worker add rsi, rdx 568*fb1b10abSAndroid Build Coastguard Worker add rax, rdx 569*fb1b10abSAndroid Build Coastguard Worker;-- 570*fb1b10abSAndroid Build Coastguard Worker;-- 571*fb1b10abSAndroid Build Coastguard Worker paddsw xmm5, xmm1 572*fb1b10abSAndroid Build Coastguard Worker paddsw xmm5, xmm4 573*fb1b10abSAndroid Build Coastguard Worker psraw xmm5, 7 574*fb1b10abSAndroid Build Coastguard Worker packuswb xmm5, xmm5 575*fb1b10abSAndroid Build Coastguard Worker 576*fb1b10abSAndroid Build Coastguard Worker punpcklqdq xmm2, xmm5 577*fb1b10abSAndroid Build Coastguard Worker 578*fb1b10abSAndroid Build Coastguard Worker movdqa XMMWORD PTR [rdi], xmm2 579*fb1b10abSAndroid Build Coastguard Worker 580*fb1b10abSAndroid Build Coastguard Worker%if ABI_IS_32BIT 581*fb1b10abSAndroid Build Coastguard Worker add rdi, DWORD PTR arg(3) ;out_pitch 582*fb1b10abSAndroid Build Coastguard Worker%else 583*fb1b10abSAndroid Build Coastguard Worker add rdi, r8 584*fb1b10abSAndroid Build Coastguard Worker%endif 585*fb1b10abSAndroid Build Coastguard Worker dec rcx 586*fb1b10abSAndroid Build Coastguard Worker jnz .vp8_filter_block1d16_v4_ssse3_loop 587*fb1b10abSAndroid Build Coastguard Worker 588*fb1b10abSAndroid Build Coastguard Worker ; begin epilog 589*fb1b10abSAndroid Build Coastguard Worker pop rdi 590*fb1b10abSAndroid Build Coastguard Worker pop rsi 591*fb1b10abSAndroid Build Coastguard Worker RESTORE_GOT 592*fb1b10abSAndroid Build Coastguard Worker RESTORE_XMM 593*fb1b10abSAndroid Build Coastguard Worker UNSHADOW_ARGS 594*fb1b10abSAndroid Build Coastguard Worker pop rbp 595*fb1b10abSAndroid Build Coastguard Worker ret 596*fb1b10abSAndroid Build Coastguard Worker 597*fb1b10abSAndroid Build Coastguard Worker;void vp8_filter_block1d8_v6_ssse3 598*fb1b10abSAndroid Build Coastguard Worker;( 599*fb1b10abSAndroid Build Coastguard Worker; unsigned char *src_ptr, 600*fb1b10abSAndroid Build Coastguard Worker; unsigned int src_pitch, 601*fb1b10abSAndroid Build Coastguard Worker; unsigned char *output_ptr, 602*fb1b10abSAndroid Build Coastguard Worker; unsigned int out_pitch, 603*fb1b10abSAndroid Build Coastguard Worker; unsigned int output_height, 604*fb1b10abSAndroid Build Coastguard Worker; unsigned int vp8_filter_index 605*fb1b10abSAndroid Build Coastguard Worker;) 606*fb1b10abSAndroid Build Coastguard Workerglobalsym(vp8_filter_block1d8_v6_ssse3) 607*fb1b10abSAndroid Build Coastguard Workersym(vp8_filter_block1d8_v6_ssse3): 608*fb1b10abSAndroid Build Coastguard Worker push rbp 609*fb1b10abSAndroid Build Coastguard Worker mov rbp, rsp 610*fb1b10abSAndroid Build Coastguard Worker SHADOW_ARGS_TO_STACK 6 611*fb1b10abSAndroid Build Coastguard Worker SAVE_XMM 7 612*fb1b10abSAndroid Build Coastguard Worker GET_GOT rbx 613*fb1b10abSAndroid Build Coastguard Worker push rsi 614*fb1b10abSAndroid Build Coastguard Worker push rdi 615*fb1b10abSAndroid Build Coastguard Worker ; end prolog 616*fb1b10abSAndroid Build Coastguard Worker 617*fb1b10abSAndroid Build Coastguard Worker movsxd rdx, DWORD PTR arg(5) ;table index 618*fb1b10abSAndroid Build Coastguard Worker xor rsi, rsi 619*fb1b10abSAndroid Build Coastguard Worker shl rdx, 4 ; 620*fb1b10abSAndroid Build Coastguard Worker 621*fb1b10abSAndroid Build Coastguard Worker lea rax, [GLOBAL(k0_k5)] 622*fb1b10abSAndroid Build Coastguard Worker add rax, rdx 623*fb1b10abSAndroid Build Coastguard Worker 624*fb1b10abSAndroid Build Coastguard Worker movsxd rdx, DWORD PTR arg(1) ;pixels_per_line 625*fb1b10abSAndroid Build Coastguard Worker mov rdi, arg(2) ;output_ptr 626*fb1b10abSAndroid Build Coastguard Worker%if ABI_IS_32BIT=0 627*fb1b10abSAndroid Build Coastguard Worker movsxd r8, DWORD PTR arg(3) ; out_pitch 628*fb1b10abSAndroid Build Coastguard Worker%endif 629*fb1b10abSAndroid Build Coastguard Worker movsxd rcx, DWORD PTR arg(4) ;[output_height] 630*fb1b10abSAndroid Build Coastguard Worker 631*fb1b10abSAndroid Build Coastguard Worker cmp esi, DWORD PTR [rax] 632*fb1b10abSAndroid Build Coastguard Worker je .vp8_filter_block1d8_v4_ssse3 633*fb1b10abSAndroid Build Coastguard Worker 634*fb1b10abSAndroid Build Coastguard Worker movdqa xmm5, XMMWORD PTR [rax] ;k0_k5 635*fb1b10abSAndroid Build Coastguard Worker movdqa xmm6, XMMWORD PTR [rax+256] ;k2_k4 636*fb1b10abSAndroid Build Coastguard Worker movdqa xmm7, XMMWORD PTR [rax+128] ;k1_k3 637*fb1b10abSAndroid Build Coastguard Worker 638*fb1b10abSAndroid Build Coastguard Worker mov rsi, arg(0) ;src_ptr 639*fb1b10abSAndroid Build Coastguard Worker 640*fb1b10abSAndroid Build Coastguard Worker mov rax, rsi 641*fb1b10abSAndroid Build Coastguard Worker add rax, rdx 642*fb1b10abSAndroid Build Coastguard Worker 643*fb1b10abSAndroid Build Coastguard Worker.vp8_filter_block1d8_v6_ssse3_loop: 644*fb1b10abSAndroid Build Coastguard Worker movq xmm1, MMWORD PTR [rsi] ;A 645*fb1b10abSAndroid Build Coastguard Worker movq xmm2, MMWORD PTR [rsi + rdx] ;B 646*fb1b10abSAndroid Build Coastguard Worker movq xmm3, MMWORD PTR [rsi + rdx * 2] ;C 647*fb1b10abSAndroid Build Coastguard Worker movq xmm4, MMWORD PTR [rax + rdx * 2] ;D 648*fb1b10abSAndroid Build Coastguard Worker movq xmm0, MMWORD PTR [rsi + rdx * 4] ;E 649*fb1b10abSAndroid Build Coastguard Worker 650*fb1b10abSAndroid Build Coastguard Worker punpcklbw xmm2, xmm4 ;B D 651*fb1b10abSAndroid Build Coastguard Worker punpcklbw xmm3, xmm0 ;C E 652*fb1b10abSAndroid Build Coastguard Worker 653*fb1b10abSAndroid Build Coastguard Worker movq xmm0, MMWORD PTR [rax + rdx * 4] ;F 654*fb1b10abSAndroid Build Coastguard Worker movdqa xmm4, [GLOBAL(rd)] 655*fb1b10abSAndroid Build Coastguard Worker 656*fb1b10abSAndroid Build Coastguard Worker pmaddubsw xmm3, xmm6 657*fb1b10abSAndroid Build Coastguard Worker punpcklbw xmm1, xmm0 ;A F 658*fb1b10abSAndroid Build Coastguard Worker pmaddubsw xmm2, xmm7 659*fb1b10abSAndroid Build Coastguard Worker pmaddubsw xmm1, xmm5 660*fb1b10abSAndroid Build Coastguard Worker add rsi, rdx 661*fb1b10abSAndroid Build Coastguard Worker add rax, rdx 662*fb1b10abSAndroid Build Coastguard Worker;-- 663*fb1b10abSAndroid Build Coastguard Worker;-- 664*fb1b10abSAndroid Build Coastguard Worker paddsw xmm2, xmm3 665*fb1b10abSAndroid Build Coastguard Worker paddsw xmm2, xmm1 666*fb1b10abSAndroid Build Coastguard Worker paddsw xmm2, xmm4 667*fb1b10abSAndroid Build Coastguard Worker psraw xmm2, 7 668*fb1b10abSAndroid Build Coastguard Worker packuswb xmm2, xmm2 669*fb1b10abSAndroid Build Coastguard Worker 670*fb1b10abSAndroid Build Coastguard Worker movq MMWORD PTR [rdi], xmm2 671*fb1b10abSAndroid Build Coastguard Worker 672*fb1b10abSAndroid Build Coastguard Worker%if ABI_IS_32BIT 673*fb1b10abSAndroid Build Coastguard Worker add rdi, DWORD PTR arg(3) ;[out_pitch] 674*fb1b10abSAndroid Build Coastguard Worker%else 675*fb1b10abSAndroid Build Coastguard Worker add rdi, r8 676*fb1b10abSAndroid Build Coastguard Worker%endif 677*fb1b10abSAndroid Build Coastguard Worker dec rcx 678*fb1b10abSAndroid Build Coastguard Worker jnz .vp8_filter_block1d8_v6_ssse3_loop 679*fb1b10abSAndroid Build Coastguard Worker 680*fb1b10abSAndroid Build Coastguard Worker ; begin epilog 681*fb1b10abSAndroid Build Coastguard Worker pop rdi 682*fb1b10abSAndroid Build Coastguard Worker pop rsi 683*fb1b10abSAndroid Build Coastguard Worker RESTORE_GOT 684*fb1b10abSAndroid Build Coastguard Worker RESTORE_XMM 685*fb1b10abSAndroid Build Coastguard Worker UNSHADOW_ARGS 686*fb1b10abSAndroid Build Coastguard Worker pop rbp 687*fb1b10abSAndroid Build Coastguard Worker ret 688*fb1b10abSAndroid Build Coastguard Worker 689*fb1b10abSAndroid Build Coastguard Worker.vp8_filter_block1d8_v4_ssse3: 690*fb1b10abSAndroid Build Coastguard Worker movdqa xmm6, XMMWORD PTR [rax+256] ;k2_k4 691*fb1b10abSAndroid Build Coastguard Worker movdqa xmm7, XMMWORD PTR [rax+128] ;k1_k3 692*fb1b10abSAndroid Build Coastguard Worker movdqa xmm5, [GLOBAL(rd)] 693*fb1b10abSAndroid Build Coastguard Worker 694*fb1b10abSAndroid Build Coastguard Worker mov rsi, arg(0) ;src_ptr 695*fb1b10abSAndroid Build Coastguard Worker 696*fb1b10abSAndroid Build Coastguard Worker mov rax, rsi 697*fb1b10abSAndroid Build Coastguard Worker add rax, rdx 698*fb1b10abSAndroid Build Coastguard Worker 699*fb1b10abSAndroid Build Coastguard Worker.vp8_filter_block1d8_v4_ssse3_loop: 700*fb1b10abSAndroid Build Coastguard Worker movq xmm2, MMWORD PTR [rsi + rdx] ;B 701*fb1b10abSAndroid Build Coastguard Worker movq xmm3, MMWORD PTR [rsi + rdx * 2] ;C 702*fb1b10abSAndroid Build Coastguard Worker movq xmm4, MMWORD PTR [rax + rdx * 2] ;D 703*fb1b10abSAndroid Build Coastguard Worker movq xmm0, MMWORD PTR [rsi + rdx * 4] ;E 704*fb1b10abSAndroid Build Coastguard Worker 705*fb1b10abSAndroid Build Coastguard Worker punpcklbw xmm2, xmm4 ;B D 706*fb1b10abSAndroid Build Coastguard Worker punpcklbw xmm3, xmm0 ;C E 707*fb1b10abSAndroid Build Coastguard Worker 708*fb1b10abSAndroid Build Coastguard Worker pmaddubsw xmm3, xmm6 709*fb1b10abSAndroid Build Coastguard Worker pmaddubsw xmm2, xmm7 710*fb1b10abSAndroid Build Coastguard Worker add rsi, rdx 711*fb1b10abSAndroid Build Coastguard Worker add rax, rdx 712*fb1b10abSAndroid Build Coastguard Worker;-- 713*fb1b10abSAndroid Build Coastguard Worker;-- 714*fb1b10abSAndroid Build Coastguard Worker paddsw xmm2, xmm3 715*fb1b10abSAndroid Build Coastguard Worker paddsw xmm2, xmm5 716*fb1b10abSAndroid Build Coastguard Worker psraw xmm2, 7 717*fb1b10abSAndroid Build Coastguard Worker packuswb xmm2, xmm2 718*fb1b10abSAndroid Build Coastguard Worker 719*fb1b10abSAndroid Build Coastguard Worker movq MMWORD PTR [rdi], xmm2 720*fb1b10abSAndroid Build Coastguard Worker 721*fb1b10abSAndroid Build Coastguard Worker%if ABI_IS_32BIT 722*fb1b10abSAndroid Build Coastguard Worker add rdi, DWORD PTR arg(3) ;[out_pitch] 723*fb1b10abSAndroid Build Coastguard Worker%else 724*fb1b10abSAndroid Build Coastguard Worker add rdi, r8 725*fb1b10abSAndroid Build Coastguard Worker%endif 726*fb1b10abSAndroid Build Coastguard Worker dec rcx 727*fb1b10abSAndroid Build Coastguard Worker jnz .vp8_filter_block1d8_v4_ssse3_loop 728*fb1b10abSAndroid Build Coastguard Worker 729*fb1b10abSAndroid Build Coastguard Worker ; begin epilog 730*fb1b10abSAndroid Build Coastguard Worker pop rdi 731*fb1b10abSAndroid Build Coastguard Worker pop rsi 732*fb1b10abSAndroid Build Coastguard Worker RESTORE_GOT 733*fb1b10abSAndroid Build Coastguard Worker RESTORE_XMM 734*fb1b10abSAndroid Build Coastguard Worker UNSHADOW_ARGS 735*fb1b10abSAndroid Build Coastguard Worker pop rbp 736*fb1b10abSAndroid Build Coastguard Worker ret 737*fb1b10abSAndroid Build Coastguard Worker;void vp8_filter_block1d4_v6_ssse3 738*fb1b10abSAndroid Build Coastguard Worker;( 739*fb1b10abSAndroid Build Coastguard Worker; unsigned char *src_ptr, 740*fb1b10abSAndroid Build Coastguard Worker; unsigned int src_pitch, 741*fb1b10abSAndroid Build Coastguard Worker; unsigned char *output_ptr, 742*fb1b10abSAndroid Build Coastguard Worker; unsigned int out_pitch, 743*fb1b10abSAndroid Build Coastguard Worker; unsigned int output_height, 744*fb1b10abSAndroid Build Coastguard Worker; unsigned int vp8_filter_index 745*fb1b10abSAndroid Build Coastguard Worker;) 746*fb1b10abSAndroid Build Coastguard Workerglobalsym(vp8_filter_block1d4_v6_ssse3) 747*fb1b10abSAndroid Build Coastguard Workersym(vp8_filter_block1d4_v6_ssse3): 748*fb1b10abSAndroid Build Coastguard Worker push rbp 749*fb1b10abSAndroid Build Coastguard Worker mov rbp, rsp 750*fb1b10abSAndroid Build Coastguard Worker SHADOW_ARGS_TO_STACK 6 751*fb1b10abSAndroid Build Coastguard Worker GET_GOT rbx 752*fb1b10abSAndroid Build Coastguard Worker push rsi 753*fb1b10abSAndroid Build Coastguard Worker push rdi 754*fb1b10abSAndroid Build Coastguard Worker ; end prolog 755*fb1b10abSAndroid Build Coastguard Worker 756*fb1b10abSAndroid Build Coastguard Worker movsxd rdx, DWORD PTR arg(5) ;table index 757*fb1b10abSAndroid Build Coastguard Worker xor rsi, rsi 758*fb1b10abSAndroid Build Coastguard Worker shl rdx, 4 ; 759*fb1b10abSAndroid Build Coastguard Worker 760*fb1b10abSAndroid Build Coastguard Worker lea rax, [GLOBAL(k0_k5)] 761*fb1b10abSAndroid Build Coastguard Worker add rax, rdx 762*fb1b10abSAndroid Build Coastguard Worker 763*fb1b10abSAndroid Build Coastguard Worker movsxd rdx, DWORD PTR arg(1) ;pixels_per_line 764*fb1b10abSAndroid Build Coastguard Worker mov rdi, arg(2) ;output_ptr 765*fb1b10abSAndroid Build Coastguard Worker%if ABI_IS_32BIT=0 766*fb1b10abSAndroid Build Coastguard Worker movsxd r8, DWORD PTR arg(3) ; out_pitch 767*fb1b10abSAndroid Build Coastguard Worker%endif 768*fb1b10abSAndroid Build Coastguard Worker movsxd rcx, DWORD PTR arg(4) ;[output_height] 769*fb1b10abSAndroid Build Coastguard Worker 770*fb1b10abSAndroid Build Coastguard Worker cmp esi, DWORD PTR [rax] 771*fb1b10abSAndroid Build Coastguard Worker je .vp8_filter_block1d4_v4_ssse3 772*fb1b10abSAndroid Build Coastguard Worker 773*fb1b10abSAndroid Build Coastguard Worker movq mm5, MMWORD PTR [rax] ;k0_k5 774*fb1b10abSAndroid Build Coastguard Worker movq mm6, MMWORD PTR [rax+256] ;k2_k4 775*fb1b10abSAndroid Build Coastguard Worker movq mm7, MMWORD PTR [rax+128] ;k1_k3 776*fb1b10abSAndroid Build Coastguard Worker 777*fb1b10abSAndroid Build Coastguard Worker mov rsi, arg(0) ;src_ptr 778*fb1b10abSAndroid Build Coastguard Worker 779*fb1b10abSAndroid Build Coastguard Worker mov rax, rsi 780*fb1b10abSAndroid Build Coastguard Worker add rax, rdx 781*fb1b10abSAndroid Build Coastguard Worker 782*fb1b10abSAndroid Build Coastguard Worker.vp8_filter_block1d4_v6_ssse3_loop: 783*fb1b10abSAndroid Build Coastguard Worker movd mm1, DWORD PTR [rsi] ;A 784*fb1b10abSAndroid Build Coastguard Worker movd mm2, DWORD PTR [rsi + rdx] ;B 785*fb1b10abSAndroid Build Coastguard Worker movd mm3, DWORD PTR [rsi + rdx * 2] ;C 786*fb1b10abSAndroid Build Coastguard Worker movd mm4, DWORD PTR [rax + rdx * 2] ;D 787*fb1b10abSAndroid Build Coastguard Worker movd mm0, DWORD PTR [rsi + rdx * 4] ;E 788*fb1b10abSAndroid Build Coastguard Worker 789*fb1b10abSAndroid Build Coastguard Worker punpcklbw mm2, mm4 ;B D 790*fb1b10abSAndroid Build Coastguard Worker punpcklbw mm3, mm0 ;C E 791*fb1b10abSAndroid Build Coastguard Worker 792*fb1b10abSAndroid Build Coastguard Worker movd mm0, DWORD PTR [rax + rdx * 4] ;F 793*fb1b10abSAndroid Build Coastguard Worker 794*fb1b10abSAndroid Build Coastguard Worker movq mm4, [GLOBAL(rd)] 795*fb1b10abSAndroid Build Coastguard Worker 796*fb1b10abSAndroid Build Coastguard Worker pmaddubsw mm3, mm6 797*fb1b10abSAndroid Build Coastguard Worker punpcklbw mm1, mm0 ;A F 798*fb1b10abSAndroid Build Coastguard Worker pmaddubsw mm2, mm7 799*fb1b10abSAndroid Build Coastguard Worker pmaddubsw mm1, mm5 800*fb1b10abSAndroid Build Coastguard Worker add rsi, rdx 801*fb1b10abSAndroid Build Coastguard Worker add rax, rdx 802*fb1b10abSAndroid Build Coastguard Worker;-- 803*fb1b10abSAndroid Build Coastguard Worker;-- 804*fb1b10abSAndroid Build Coastguard Worker paddsw mm2, mm3 805*fb1b10abSAndroid Build Coastguard Worker paddsw mm2, mm1 806*fb1b10abSAndroid Build Coastguard Worker paddsw mm2, mm4 807*fb1b10abSAndroid Build Coastguard Worker psraw mm2, 7 808*fb1b10abSAndroid Build Coastguard Worker packuswb mm2, mm2 809*fb1b10abSAndroid Build Coastguard Worker 810*fb1b10abSAndroid Build Coastguard Worker movd DWORD PTR [rdi], mm2 811*fb1b10abSAndroid Build Coastguard Worker 812*fb1b10abSAndroid Build Coastguard Worker%if ABI_IS_32BIT 813*fb1b10abSAndroid Build Coastguard Worker add rdi, DWORD PTR arg(3) ;[out_pitch] 814*fb1b10abSAndroid Build Coastguard Worker%else 815*fb1b10abSAndroid Build Coastguard Worker add rdi, r8 816*fb1b10abSAndroid Build Coastguard Worker%endif 817*fb1b10abSAndroid Build Coastguard Worker dec rcx 818*fb1b10abSAndroid Build Coastguard Worker jnz .vp8_filter_block1d4_v6_ssse3_loop 819*fb1b10abSAndroid Build Coastguard Worker 820*fb1b10abSAndroid Build Coastguard Worker ; begin epilog 821*fb1b10abSAndroid Build Coastguard Worker pop rdi 822*fb1b10abSAndroid Build Coastguard Worker pop rsi 823*fb1b10abSAndroid Build Coastguard Worker RESTORE_GOT 824*fb1b10abSAndroid Build Coastguard Worker UNSHADOW_ARGS 825*fb1b10abSAndroid Build Coastguard Worker pop rbp 826*fb1b10abSAndroid Build Coastguard Worker ret 827*fb1b10abSAndroid Build Coastguard Worker 828*fb1b10abSAndroid Build Coastguard Worker.vp8_filter_block1d4_v4_ssse3: 829*fb1b10abSAndroid Build Coastguard Worker movq mm6, MMWORD PTR [rax+256] ;k2_k4 830*fb1b10abSAndroid Build Coastguard Worker movq mm7, MMWORD PTR [rax+128] ;k1_k3 831*fb1b10abSAndroid Build Coastguard Worker movq mm5, MMWORD PTR [GLOBAL(rd)] 832*fb1b10abSAndroid Build Coastguard Worker 833*fb1b10abSAndroid Build Coastguard Worker mov rsi, arg(0) ;src_ptr 834*fb1b10abSAndroid Build Coastguard Worker 835*fb1b10abSAndroid Build Coastguard Worker mov rax, rsi 836*fb1b10abSAndroid Build Coastguard Worker add rax, rdx 837*fb1b10abSAndroid Build Coastguard Worker 838*fb1b10abSAndroid Build Coastguard Worker.vp8_filter_block1d4_v4_ssse3_loop: 839*fb1b10abSAndroid Build Coastguard Worker movd mm2, DWORD PTR [rsi + rdx] ;B 840*fb1b10abSAndroid Build Coastguard Worker movd mm3, DWORD PTR [rsi + rdx * 2] ;C 841*fb1b10abSAndroid Build Coastguard Worker movd mm4, DWORD PTR [rax + rdx * 2] ;D 842*fb1b10abSAndroid Build Coastguard Worker movd mm0, DWORD PTR [rsi + rdx * 4] ;E 843*fb1b10abSAndroid Build Coastguard Worker 844*fb1b10abSAndroid Build Coastguard Worker punpcklbw mm2, mm4 ;B D 845*fb1b10abSAndroid Build Coastguard Worker punpcklbw mm3, mm0 ;C E 846*fb1b10abSAndroid Build Coastguard Worker 847*fb1b10abSAndroid Build Coastguard Worker pmaddubsw mm3, mm6 848*fb1b10abSAndroid Build Coastguard Worker pmaddubsw mm2, mm7 849*fb1b10abSAndroid Build Coastguard Worker add rsi, rdx 850*fb1b10abSAndroid Build Coastguard Worker add rax, rdx 851*fb1b10abSAndroid Build Coastguard Worker;-- 852*fb1b10abSAndroid Build Coastguard Worker;-- 853*fb1b10abSAndroid Build Coastguard Worker paddsw mm2, mm3 854*fb1b10abSAndroid Build Coastguard Worker paddsw mm2, mm5 855*fb1b10abSAndroid Build Coastguard Worker psraw mm2, 7 856*fb1b10abSAndroid Build Coastguard Worker packuswb mm2, mm2 857*fb1b10abSAndroid Build Coastguard Worker 858*fb1b10abSAndroid Build Coastguard Worker movd DWORD PTR [rdi], mm2 859*fb1b10abSAndroid Build Coastguard Worker 860*fb1b10abSAndroid Build Coastguard Worker%if ABI_IS_32BIT 861*fb1b10abSAndroid Build Coastguard Worker add rdi, DWORD PTR arg(3) ;[out_pitch] 862*fb1b10abSAndroid Build Coastguard Worker%else 863*fb1b10abSAndroid Build Coastguard Worker add rdi, r8 864*fb1b10abSAndroid Build Coastguard Worker%endif 865*fb1b10abSAndroid Build Coastguard Worker dec rcx 866*fb1b10abSAndroid Build Coastguard Worker jnz .vp8_filter_block1d4_v4_ssse3_loop 867*fb1b10abSAndroid Build Coastguard Worker 868*fb1b10abSAndroid Build Coastguard Worker ; begin epilog 869*fb1b10abSAndroid Build Coastguard Worker pop rdi 870*fb1b10abSAndroid Build Coastguard Worker pop rsi 871*fb1b10abSAndroid Build Coastguard Worker RESTORE_GOT 872*fb1b10abSAndroid Build Coastguard Worker UNSHADOW_ARGS 873*fb1b10abSAndroid Build Coastguard Worker pop rbp 874*fb1b10abSAndroid Build Coastguard Worker ret 875*fb1b10abSAndroid Build Coastguard Worker 876*fb1b10abSAndroid Build Coastguard Worker;void vp8_bilinear_predict16x16_ssse3 877*fb1b10abSAndroid Build Coastguard Worker;( 878*fb1b10abSAndroid Build Coastguard Worker; unsigned char *src_ptr, 879*fb1b10abSAndroid Build Coastguard Worker; int src_pixels_per_line, 880*fb1b10abSAndroid Build Coastguard Worker; int xoffset, 881*fb1b10abSAndroid Build Coastguard Worker; int yoffset, 882*fb1b10abSAndroid Build Coastguard Worker; unsigned char *dst_ptr, 883*fb1b10abSAndroid Build Coastguard Worker; int dst_pitch 884*fb1b10abSAndroid Build Coastguard Worker;) 885*fb1b10abSAndroid Build Coastguard Workerglobalsym(vp8_bilinear_predict16x16_ssse3) 886*fb1b10abSAndroid Build Coastguard Workersym(vp8_bilinear_predict16x16_ssse3): 887*fb1b10abSAndroid Build Coastguard Worker push rbp 888*fb1b10abSAndroid Build Coastguard Worker mov rbp, rsp 889*fb1b10abSAndroid Build Coastguard Worker SHADOW_ARGS_TO_STACK 6 890*fb1b10abSAndroid Build Coastguard Worker SAVE_XMM 7 891*fb1b10abSAndroid Build Coastguard Worker GET_GOT rbx 892*fb1b10abSAndroid Build Coastguard Worker push rsi 893*fb1b10abSAndroid Build Coastguard Worker push rdi 894*fb1b10abSAndroid Build Coastguard Worker ; end prolog 895*fb1b10abSAndroid Build Coastguard Worker 896*fb1b10abSAndroid Build Coastguard Worker lea rcx, [GLOBAL(vp8_bilinear_filters_ssse3)] 897*fb1b10abSAndroid Build Coastguard Worker movsxd rax, dword ptr arg(2) ; xoffset 898*fb1b10abSAndroid Build Coastguard Worker 899*fb1b10abSAndroid Build Coastguard Worker cmp rax, 0 ; skip first_pass filter if xoffset=0 900*fb1b10abSAndroid Build Coastguard Worker je .b16x16_sp_only 901*fb1b10abSAndroid Build Coastguard Worker 902*fb1b10abSAndroid Build Coastguard Worker shl rax, 4 903*fb1b10abSAndroid Build Coastguard Worker lea rax, [rax + rcx] ; HFilter 904*fb1b10abSAndroid Build Coastguard Worker 905*fb1b10abSAndroid Build Coastguard Worker mov rdi, arg(4) ; dst_ptr 906*fb1b10abSAndroid Build Coastguard Worker mov rsi, arg(0) ; src_ptr 907*fb1b10abSAndroid Build Coastguard Worker movsxd rdx, dword ptr arg(5) ; dst_pitch 908*fb1b10abSAndroid Build Coastguard Worker 909*fb1b10abSAndroid Build Coastguard Worker movdqa xmm1, [rax] 910*fb1b10abSAndroid Build Coastguard Worker 911*fb1b10abSAndroid Build Coastguard Worker movsxd rax, dword ptr arg(3) ; yoffset 912*fb1b10abSAndroid Build Coastguard Worker 913*fb1b10abSAndroid Build Coastguard Worker cmp rax, 0 ; skip second_pass filter if yoffset=0 914*fb1b10abSAndroid Build Coastguard Worker je .b16x16_fp_only 915*fb1b10abSAndroid Build Coastguard Worker 916*fb1b10abSAndroid Build Coastguard Worker shl rax, 4 917*fb1b10abSAndroid Build Coastguard Worker lea rax, [rax + rcx] ; VFilter 918*fb1b10abSAndroid Build Coastguard Worker 919*fb1b10abSAndroid Build Coastguard Worker lea rcx, [rdi+rdx*8] 920*fb1b10abSAndroid Build Coastguard Worker lea rcx, [rcx+rdx*8] 921*fb1b10abSAndroid Build Coastguard Worker movsxd rdx, dword ptr arg(1) ; src_pixels_per_line 922*fb1b10abSAndroid Build Coastguard Worker 923*fb1b10abSAndroid Build Coastguard Worker movdqa xmm2, [rax] 924*fb1b10abSAndroid Build Coastguard Worker 925*fb1b10abSAndroid Build Coastguard Worker%if ABI_IS_32BIT=0 926*fb1b10abSAndroid Build Coastguard Worker movsxd r8, dword ptr arg(5) ; dst_pitch 927*fb1b10abSAndroid Build Coastguard Worker%endif 928*fb1b10abSAndroid Build Coastguard Worker movq xmm3, [rsi] ; 00 01 02 03 04 05 06 07 929*fb1b10abSAndroid Build Coastguard Worker movq xmm5, [rsi+1] ; 01 02 03 04 05 06 07 08 930*fb1b10abSAndroid Build Coastguard Worker 931*fb1b10abSAndroid Build Coastguard Worker punpcklbw xmm3, xmm5 ; 00 01 01 02 02 03 03 04 04 05 05 06 06 07 07 08 932*fb1b10abSAndroid Build Coastguard Worker movq xmm4, [rsi+8] ; 08 09 10 11 12 13 14 15 933*fb1b10abSAndroid Build Coastguard Worker 934*fb1b10abSAndroid Build Coastguard Worker movq xmm5, [rsi+9] ; 09 10 11 12 13 14 15 16 935*fb1b10abSAndroid Build Coastguard Worker 936*fb1b10abSAndroid Build Coastguard Worker lea rsi, [rsi + rdx] ; next line 937*fb1b10abSAndroid Build Coastguard Worker 938*fb1b10abSAndroid Build Coastguard Worker pmaddubsw xmm3, xmm1 ; 00 02 04 06 08 10 12 14 939*fb1b10abSAndroid Build Coastguard Worker 940*fb1b10abSAndroid Build Coastguard Worker punpcklbw xmm4, xmm5 ; 08 09 09 10 10 11 11 12 12 13 13 14 14 15 15 16 941*fb1b10abSAndroid Build Coastguard Worker pmaddubsw xmm4, xmm1 ; 01 03 05 07 09 11 13 15 942*fb1b10abSAndroid Build Coastguard Worker 943*fb1b10abSAndroid Build Coastguard Worker paddw xmm3, [GLOBAL(rd)] ; xmm3 += round value 944*fb1b10abSAndroid Build Coastguard Worker psraw xmm3, VP8_FILTER_SHIFT ; xmm3 /= 128 945*fb1b10abSAndroid Build Coastguard Worker 946*fb1b10abSAndroid Build Coastguard Worker paddw xmm4, [GLOBAL(rd)] ; xmm4 += round value 947*fb1b10abSAndroid Build Coastguard Worker psraw xmm4, VP8_FILTER_SHIFT ; xmm4 /= 128 948*fb1b10abSAndroid Build Coastguard Worker 949*fb1b10abSAndroid Build Coastguard Worker movdqa xmm7, xmm3 950*fb1b10abSAndroid Build Coastguard Worker packuswb xmm7, xmm4 ; 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 951*fb1b10abSAndroid Build Coastguard Worker 952*fb1b10abSAndroid Build Coastguard Worker.next_row: 953*fb1b10abSAndroid Build Coastguard Worker movq xmm6, [rsi] ; 00 01 02 03 04 05 06 07 954*fb1b10abSAndroid Build Coastguard Worker movq xmm5, [rsi+1] ; 01 02 03 04 05 06 07 08 955*fb1b10abSAndroid Build Coastguard Worker 956*fb1b10abSAndroid Build Coastguard Worker punpcklbw xmm6, xmm5 957*fb1b10abSAndroid Build Coastguard Worker movq xmm4, [rsi+8] ; 08 09 10 11 12 13 14 15 958*fb1b10abSAndroid Build Coastguard Worker 959*fb1b10abSAndroid Build Coastguard Worker movq xmm5, [rsi+9] ; 09 10 11 12 13 14 15 16 960*fb1b10abSAndroid Build Coastguard Worker lea rsi, [rsi + rdx] ; next line 961*fb1b10abSAndroid Build Coastguard Worker 962*fb1b10abSAndroid Build Coastguard Worker pmaddubsw xmm6, xmm1 963*fb1b10abSAndroid Build Coastguard Worker 964*fb1b10abSAndroid Build Coastguard Worker punpcklbw xmm4, xmm5 965*fb1b10abSAndroid Build Coastguard Worker pmaddubsw xmm4, xmm1 966*fb1b10abSAndroid Build Coastguard Worker 967*fb1b10abSAndroid Build Coastguard Worker paddw xmm6, [GLOBAL(rd)] ; xmm6 += round value 968*fb1b10abSAndroid Build Coastguard Worker psraw xmm6, VP8_FILTER_SHIFT ; xmm6 /= 128 969*fb1b10abSAndroid Build Coastguard Worker 970*fb1b10abSAndroid Build Coastguard Worker paddw xmm4, [GLOBAL(rd)] ; xmm4 += round value 971*fb1b10abSAndroid Build Coastguard Worker psraw xmm4, VP8_FILTER_SHIFT ; xmm4 /= 128 972*fb1b10abSAndroid Build Coastguard Worker 973*fb1b10abSAndroid Build Coastguard Worker packuswb xmm6, xmm4 974*fb1b10abSAndroid Build Coastguard Worker movdqa xmm5, xmm7 975*fb1b10abSAndroid Build Coastguard Worker 976*fb1b10abSAndroid Build Coastguard Worker punpcklbw xmm5, xmm6 977*fb1b10abSAndroid Build Coastguard Worker pmaddubsw xmm5, xmm2 978*fb1b10abSAndroid Build Coastguard Worker 979*fb1b10abSAndroid Build Coastguard Worker punpckhbw xmm7, xmm6 980*fb1b10abSAndroid Build Coastguard Worker pmaddubsw xmm7, xmm2 981*fb1b10abSAndroid Build Coastguard Worker 982*fb1b10abSAndroid Build Coastguard Worker paddw xmm5, [GLOBAL(rd)] ; xmm5 += round value 983*fb1b10abSAndroid Build Coastguard Worker psraw xmm5, VP8_FILTER_SHIFT ; xmm5 /= 128 984*fb1b10abSAndroid Build Coastguard Worker 985*fb1b10abSAndroid Build Coastguard Worker paddw xmm7, [GLOBAL(rd)] ; xmm7 += round value 986*fb1b10abSAndroid Build Coastguard Worker psraw xmm7, VP8_FILTER_SHIFT ; xmm7 /= 128 987*fb1b10abSAndroid Build Coastguard Worker 988*fb1b10abSAndroid Build Coastguard Worker packuswb xmm5, xmm7 989*fb1b10abSAndroid Build Coastguard Worker movdqa xmm7, xmm6 990*fb1b10abSAndroid Build Coastguard Worker 991*fb1b10abSAndroid Build Coastguard Worker movdqa [rdi], xmm5 ; store the results in the destination 992*fb1b10abSAndroid Build Coastguard Worker%if ABI_IS_32BIT 993*fb1b10abSAndroid Build Coastguard Worker add rdi, DWORD PTR arg(5) ; dst_pitch 994*fb1b10abSAndroid Build Coastguard Worker%else 995*fb1b10abSAndroid Build Coastguard Worker add rdi, r8 996*fb1b10abSAndroid Build Coastguard Worker%endif 997*fb1b10abSAndroid Build Coastguard Worker 998*fb1b10abSAndroid Build Coastguard Worker cmp rdi, rcx 999*fb1b10abSAndroid Build Coastguard Worker jne .next_row 1000*fb1b10abSAndroid Build Coastguard Worker 1001*fb1b10abSAndroid Build Coastguard Worker jmp .done 1002*fb1b10abSAndroid Build Coastguard Worker 1003*fb1b10abSAndroid Build Coastguard Worker.b16x16_sp_only: 1004*fb1b10abSAndroid Build Coastguard Worker movsxd rax, dword ptr arg(3) ; yoffset 1005*fb1b10abSAndroid Build Coastguard Worker shl rax, 4 1006*fb1b10abSAndroid Build Coastguard Worker lea rax, [rax + rcx] ; VFilter 1007*fb1b10abSAndroid Build Coastguard Worker 1008*fb1b10abSAndroid Build Coastguard Worker mov rdi, arg(4) ; dst_ptr 1009*fb1b10abSAndroid Build Coastguard Worker mov rsi, arg(0) ; src_ptr 1010*fb1b10abSAndroid Build Coastguard Worker movsxd rdx, dword ptr arg(5) ; dst_pitch 1011*fb1b10abSAndroid Build Coastguard Worker 1012*fb1b10abSAndroid Build Coastguard Worker movdqa xmm1, [rax] ; VFilter 1013*fb1b10abSAndroid Build Coastguard Worker 1014*fb1b10abSAndroid Build Coastguard Worker lea rcx, [rdi+rdx*8] 1015*fb1b10abSAndroid Build Coastguard Worker lea rcx, [rcx+rdx*8] 1016*fb1b10abSAndroid Build Coastguard Worker movsxd rax, dword ptr arg(1) ; src_pixels_per_line 1017*fb1b10abSAndroid Build Coastguard Worker 1018*fb1b10abSAndroid Build Coastguard Worker ; get the first horizontal line done 1019*fb1b10abSAndroid Build Coastguard Worker movq xmm4, [rsi] ; load row 0 1020*fb1b10abSAndroid Build Coastguard Worker movq xmm2, [rsi + 8] ; load row 0 1021*fb1b10abSAndroid Build Coastguard Worker 1022*fb1b10abSAndroid Build Coastguard Worker lea rsi, [rsi + rax] ; next line 1023*fb1b10abSAndroid Build Coastguard Worker.next_row_sp: 1024*fb1b10abSAndroid Build Coastguard Worker movq xmm3, [rsi] ; load row + 1 1025*fb1b10abSAndroid Build Coastguard Worker movq xmm5, [rsi + 8] ; load row + 1 1026*fb1b10abSAndroid Build Coastguard Worker 1027*fb1b10abSAndroid Build Coastguard Worker punpcklbw xmm4, xmm3 1028*fb1b10abSAndroid Build Coastguard Worker punpcklbw xmm2, xmm5 1029*fb1b10abSAndroid Build Coastguard Worker 1030*fb1b10abSAndroid Build Coastguard Worker pmaddubsw xmm4, xmm1 1031*fb1b10abSAndroid Build Coastguard Worker movq xmm7, [rsi + rax] ; load row + 2 1032*fb1b10abSAndroid Build Coastguard Worker 1033*fb1b10abSAndroid Build Coastguard Worker pmaddubsw xmm2, xmm1 1034*fb1b10abSAndroid Build Coastguard Worker movq xmm6, [rsi + rax + 8] ; load row + 2 1035*fb1b10abSAndroid Build Coastguard Worker 1036*fb1b10abSAndroid Build Coastguard Worker punpcklbw xmm3, xmm7 1037*fb1b10abSAndroid Build Coastguard Worker punpcklbw xmm5, xmm6 1038*fb1b10abSAndroid Build Coastguard Worker 1039*fb1b10abSAndroid Build Coastguard Worker pmaddubsw xmm3, xmm1 1040*fb1b10abSAndroid Build Coastguard Worker paddw xmm4, [GLOBAL(rd)] 1041*fb1b10abSAndroid Build Coastguard Worker 1042*fb1b10abSAndroid Build Coastguard Worker pmaddubsw xmm5, xmm1 1043*fb1b10abSAndroid Build Coastguard Worker paddw xmm2, [GLOBAL(rd)] 1044*fb1b10abSAndroid Build Coastguard Worker 1045*fb1b10abSAndroid Build Coastguard Worker psraw xmm4, VP8_FILTER_SHIFT 1046*fb1b10abSAndroid Build Coastguard Worker psraw xmm2, VP8_FILTER_SHIFT 1047*fb1b10abSAndroid Build Coastguard Worker 1048*fb1b10abSAndroid Build Coastguard Worker packuswb xmm4, xmm2 1049*fb1b10abSAndroid Build Coastguard Worker paddw xmm3, [GLOBAL(rd)] 1050*fb1b10abSAndroid Build Coastguard Worker 1051*fb1b10abSAndroid Build Coastguard Worker movdqa [rdi], xmm4 ; store row 0 1052*fb1b10abSAndroid Build Coastguard Worker paddw xmm5, [GLOBAL(rd)] 1053*fb1b10abSAndroid Build Coastguard Worker 1054*fb1b10abSAndroid Build Coastguard Worker psraw xmm3, VP8_FILTER_SHIFT 1055*fb1b10abSAndroid Build Coastguard Worker psraw xmm5, VP8_FILTER_SHIFT 1056*fb1b10abSAndroid Build Coastguard Worker 1057*fb1b10abSAndroid Build Coastguard Worker packuswb xmm3, xmm5 1058*fb1b10abSAndroid Build Coastguard Worker movdqa xmm4, xmm7 1059*fb1b10abSAndroid Build Coastguard Worker 1060*fb1b10abSAndroid Build Coastguard Worker movdqa [rdi + rdx],xmm3 ; store row 1 1061*fb1b10abSAndroid Build Coastguard Worker lea rsi, [rsi + 2*rax] 1062*fb1b10abSAndroid Build Coastguard Worker 1063*fb1b10abSAndroid Build Coastguard Worker movdqa xmm2, xmm6 1064*fb1b10abSAndroid Build Coastguard Worker lea rdi, [rdi + 2*rdx] 1065*fb1b10abSAndroid Build Coastguard Worker 1066*fb1b10abSAndroid Build Coastguard Worker cmp rdi, rcx 1067*fb1b10abSAndroid Build Coastguard Worker jne .next_row_sp 1068*fb1b10abSAndroid Build Coastguard Worker 1069*fb1b10abSAndroid Build Coastguard Worker jmp .done 1070*fb1b10abSAndroid Build Coastguard Worker 1071*fb1b10abSAndroid Build Coastguard Worker.b16x16_fp_only: 1072*fb1b10abSAndroid Build Coastguard Worker lea rcx, [rdi+rdx*8] 1073*fb1b10abSAndroid Build Coastguard Worker lea rcx, [rcx+rdx*8] 1074*fb1b10abSAndroid Build Coastguard Worker movsxd rax, dword ptr arg(1) ; src_pixels_per_line 1075*fb1b10abSAndroid Build Coastguard Worker 1076*fb1b10abSAndroid Build Coastguard Worker.next_row_fp: 1077*fb1b10abSAndroid Build Coastguard Worker movq xmm2, [rsi] ; 00 01 02 03 04 05 06 07 1078*fb1b10abSAndroid Build Coastguard Worker movq xmm4, [rsi+1] ; 01 02 03 04 05 06 07 08 1079*fb1b10abSAndroid Build Coastguard Worker 1080*fb1b10abSAndroid Build Coastguard Worker punpcklbw xmm2, xmm4 1081*fb1b10abSAndroid Build Coastguard Worker movq xmm3, [rsi+8] ; 08 09 10 11 12 13 14 15 1082*fb1b10abSAndroid Build Coastguard Worker 1083*fb1b10abSAndroid Build Coastguard Worker pmaddubsw xmm2, xmm1 1084*fb1b10abSAndroid Build Coastguard Worker movq xmm4, [rsi+9] ; 09 10 11 12 13 14 15 16 1085*fb1b10abSAndroid Build Coastguard Worker 1086*fb1b10abSAndroid Build Coastguard Worker lea rsi, [rsi + rax] ; next line 1087*fb1b10abSAndroid Build Coastguard Worker punpcklbw xmm3, xmm4 1088*fb1b10abSAndroid Build Coastguard Worker 1089*fb1b10abSAndroid Build Coastguard Worker pmaddubsw xmm3, xmm1 1090*fb1b10abSAndroid Build Coastguard Worker movq xmm5, [rsi] 1091*fb1b10abSAndroid Build Coastguard Worker 1092*fb1b10abSAndroid Build Coastguard Worker paddw xmm2, [GLOBAL(rd)] 1093*fb1b10abSAndroid Build Coastguard Worker movq xmm7, [rsi+1] 1094*fb1b10abSAndroid Build Coastguard Worker 1095*fb1b10abSAndroid Build Coastguard Worker movq xmm6, [rsi+8] 1096*fb1b10abSAndroid Build Coastguard Worker psraw xmm2, VP8_FILTER_SHIFT 1097*fb1b10abSAndroid Build Coastguard Worker 1098*fb1b10abSAndroid Build Coastguard Worker punpcklbw xmm5, xmm7 1099*fb1b10abSAndroid Build Coastguard Worker movq xmm7, [rsi+9] 1100*fb1b10abSAndroid Build Coastguard Worker 1101*fb1b10abSAndroid Build Coastguard Worker paddw xmm3, [GLOBAL(rd)] 1102*fb1b10abSAndroid Build Coastguard Worker pmaddubsw xmm5, xmm1 1103*fb1b10abSAndroid Build Coastguard Worker 1104*fb1b10abSAndroid Build Coastguard Worker psraw xmm3, VP8_FILTER_SHIFT 1105*fb1b10abSAndroid Build Coastguard Worker punpcklbw xmm6, xmm7 1106*fb1b10abSAndroid Build Coastguard Worker 1107*fb1b10abSAndroid Build Coastguard Worker packuswb xmm2, xmm3 1108*fb1b10abSAndroid Build Coastguard Worker pmaddubsw xmm6, xmm1 1109*fb1b10abSAndroid Build Coastguard Worker 1110*fb1b10abSAndroid Build Coastguard Worker movdqa [rdi], xmm2 ; store the results in the destination 1111*fb1b10abSAndroid Build Coastguard Worker paddw xmm5, [GLOBAL(rd)] 1112*fb1b10abSAndroid Build Coastguard Worker 1113*fb1b10abSAndroid Build Coastguard Worker lea rdi, [rdi + rdx] ; dst_pitch 1114*fb1b10abSAndroid Build Coastguard Worker psraw xmm5, VP8_FILTER_SHIFT 1115*fb1b10abSAndroid Build Coastguard Worker 1116*fb1b10abSAndroid Build Coastguard Worker paddw xmm6, [GLOBAL(rd)] 1117*fb1b10abSAndroid Build Coastguard Worker psraw xmm6, VP8_FILTER_SHIFT 1118*fb1b10abSAndroid Build Coastguard Worker 1119*fb1b10abSAndroid Build Coastguard Worker packuswb xmm5, xmm6 1120*fb1b10abSAndroid Build Coastguard Worker lea rsi, [rsi + rax] ; next line 1121*fb1b10abSAndroid Build Coastguard Worker 1122*fb1b10abSAndroid Build Coastguard Worker movdqa [rdi], xmm5 ; store the results in the destination 1123*fb1b10abSAndroid Build Coastguard Worker lea rdi, [rdi + rdx] ; dst_pitch 1124*fb1b10abSAndroid Build Coastguard Worker 1125*fb1b10abSAndroid Build Coastguard Worker cmp rdi, rcx 1126*fb1b10abSAndroid Build Coastguard Worker 1127*fb1b10abSAndroid Build Coastguard Worker jne .next_row_fp 1128*fb1b10abSAndroid Build Coastguard Worker 1129*fb1b10abSAndroid Build Coastguard Worker.done: 1130*fb1b10abSAndroid Build Coastguard Worker ; begin epilog 1131*fb1b10abSAndroid Build Coastguard Worker pop rdi 1132*fb1b10abSAndroid Build Coastguard Worker pop rsi 1133*fb1b10abSAndroid Build Coastguard Worker RESTORE_GOT 1134*fb1b10abSAndroid Build Coastguard Worker RESTORE_XMM 1135*fb1b10abSAndroid Build Coastguard Worker UNSHADOW_ARGS 1136*fb1b10abSAndroid Build Coastguard Worker pop rbp 1137*fb1b10abSAndroid Build Coastguard Worker ret 1138*fb1b10abSAndroid Build Coastguard Worker 1139*fb1b10abSAndroid Build Coastguard Worker;void vp8_bilinear_predict8x8_ssse3 1140*fb1b10abSAndroid Build Coastguard Worker;( 1141*fb1b10abSAndroid Build Coastguard Worker; unsigned char *src_ptr, 1142*fb1b10abSAndroid Build Coastguard Worker; int src_pixels_per_line, 1143*fb1b10abSAndroid Build Coastguard Worker; int xoffset, 1144*fb1b10abSAndroid Build Coastguard Worker; int yoffset, 1145*fb1b10abSAndroid Build Coastguard Worker; unsigned char *dst_ptr, 1146*fb1b10abSAndroid Build Coastguard Worker; int dst_pitch 1147*fb1b10abSAndroid Build Coastguard Worker;) 1148*fb1b10abSAndroid Build Coastguard Workerglobalsym(vp8_bilinear_predict8x8_ssse3) 1149*fb1b10abSAndroid Build Coastguard Workersym(vp8_bilinear_predict8x8_ssse3): 1150*fb1b10abSAndroid Build Coastguard Worker push rbp 1151*fb1b10abSAndroid Build Coastguard Worker mov rbp, rsp 1152*fb1b10abSAndroid Build Coastguard Worker SHADOW_ARGS_TO_STACK 6 1153*fb1b10abSAndroid Build Coastguard Worker SAVE_XMM 7 1154*fb1b10abSAndroid Build Coastguard Worker GET_GOT rbx 1155*fb1b10abSAndroid Build Coastguard Worker push rsi 1156*fb1b10abSAndroid Build Coastguard Worker push rdi 1157*fb1b10abSAndroid Build Coastguard Worker ; end prolog 1158*fb1b10abSAndroid Build Coastguard Worker 1159*fb1b10abSAndroid Build Coastguard Worker ALIGN_STACK 16, rax 1160*fb1b10abSAndroid Build Coastguard Worker sub rsp, 144 ; reserve 144 bytes 1161*fb1b10abSAndroid Build Coastguard Worker 1162*fb1b10abSAndroid Build Coastguard Worker lea rcx, [GLOBAL(vp8_bilinear_filters_ssse3)] 1163*fb1b10abSAndroid Build Coastguard Worker 1164*fb1b10abSAndroid Build Coastguard Worker mov rsi, arg(0) ;src_ptr 1165*fb1b10abSAndroid Build Coastguard Worker movsxd rdx, dword ptr arg(1) ;src_pixels_per_line 1166*fb1b10abSAndroid Build Coastguard Worker 1167*fb1b10abSAndroid Build Coastguard Worker ;Read 9-line unaligned data in and put them on stack. This gives a big 1168*fb1b10abSAndroid Build Coastguard Worker ;performance boost. 1169*fb1b10abSAndroid Build Coastguard Worker movdqu xmm0, [rsi] 1170*fb1b10abSAndroid Build Coastguard Worker lea rax, [rdx + rdx*2] 1171*fb1b10abSAndroid Build Coastguard Worker movdqu xmm1, [rsi+rdx] 1172*fb1b10abSAndroid Build Coastguard Worker movdqu xmm2, [rsi+rdx*2] 1173*fb1b10abSAndroid Build Coastguard Worker add rsi, rax 1174*fb1b10abSAndroid Build Coastguard Worker movdqu xmm3, [rsi] 1175*fb1b10abSAndroid Build Coastguard Worker movdqu xmm4, [rsi+rdx] 1176*fb1b10abSAndroid Build Coastguard Worker movdqu xmm5, [rsi+rdx*2] 1177*fb1b10abSAndroid Build Coastguard Worker add rsi, rax 1178*fb1b10abSAndroid Build Coastguard Worker movdqu xmm6, [rsi] 1179*fb1b10abSAndroid Build Coastguard Worker movdqu xmm7, [rsi+rdx] 1180*fb1b10abSAndroid Build Coastguard Worker 1181*fb1b10abSAndroid Build Coastguard Worker movdqa XMMWORD PTR [rsp], xmm0 1182*fb1b10abSAndroid Build Coastguard Worker 1183*fb1b10abSAndroid Build Coastguard Worker movdqu xmm0, [rsi+rdx*2] 1184*fb1b10abSAndroid Build Coastguard Worker 1185*fb1b10abSAndroid Build Coastguard Worker movdqa XMMWORD PTR [rsp+16], xmm1 1186*fb1b10abSAndroid Build Coastguard Worker movdqa XMMWORD PTR [rsp+32], xmm2 1187*fb1b10abSAndroid Build Coastguard Worker movdqa XMMWORD PTR [rsp+48], xmm3 1188*fb1b10abSAndroid Build Coastguard Worker movdqa XMMWORD PTR [rsp+64], xmm4 1189*fb1b10abSAndroid Build Coastguard Worker movdqa XMMWORD PTR [rsp+80], xmm5 1190*fb1b10abSAndroid Build Coastguard Worker movdqa XMMWORD PTR [rsp+96], xmm6 1191*fb1b10abSAndroid Build Coastguard Worker movdqa XMMWORD PTR [rsp+112], xmm7 1192*fb1b10abSAndroid Build Coastguard Worker movdqa XMMWORD PTR [rsp+128], xmm0 1193*fb1b10abSAndroid Build Coastguard Worker 1194*fb1b10abSAndroid Build Coastguard Worker movsxd rax, dword ptr arg(2) ; xoffset 1195*fb1b10abSAndroid Build Coastguard Worker cmp rax, 0 ; skip first_pass filter if xoffset=0 1196*fb1b10abSAndroid Build Coastguard Worker je .b8x8_sp_only 1197*fb1b10abSAndroid Build Coastguard Worker 1198*fb1b10abSAndroid Build Coastguard Worker shl rax, 4 1199*fb1b10abSAndroid Build Coastguard Worker add rax, rcx ; HFilter 1200*fb1b10abSAndroid Build Coastguard Worker 1201*fb1b10abSAndroid Build Coastguard Worker mov rdi, arg(4) ; dst_ptr 1202*fb1b10abSAndroid Build Coastguard Worker movsxd rdx, dword ptr arg(5) ; dst_pitch 1203*fb1b10abSAndroid Build Coastguard Worker 1204*fb1b10abSAndroid Build Coastguard Worker movdqa xmm0, [rax] 1205*fb1b10abSAndroid Build Coastguard Worker 1206*fb1b10abSAndroid Build Coastguard Worker movsxd rax, dword ptr arg(3) ; yoffset 1207*fb1b10abSAndroid Build Coastguard Worker cmp rax, 0 ; skip second_pass filter if yoffset=0 1208*fb1b10abSAndroid Build Coastguard Worker je .b8x8_fp_only 1209*fb1b10abSAndroid Build Coastguard Worker 1210*fb1b10abSAndroid Build Coastguard Worker shl rax, 4 1211*fb1b10abSAndroid Build Coastguard Worker lea rax, [rax + rcx] ; VFilter 1212*fb1b10abSAndroid Build Coastguard Worker 1213*fb1b10abSAndroid Build Coastguard Worker lea rcx, [rdi+rdx*8] 1214*fb1b10abSAndroid Build Coastguard Worker 1215*fb1b10abSAndroid Build Coastguard Worker movdqa xmm1, [rax] 1216*fb1b10abSAndroid Build Coastguard Worker 1217*fb1b10abSAndroid Build Coastguard Worker ; get the first horizontal line done 1218*fb1b10abSAndroid Build Coastguard Worker movdqa xmm3, [rsp] ; 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 1219*fb1b10abSAndroid Build Coastguard Worker movdqa xmm5, xmm3 ; 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 xx 1220*fb1b10abSAndroid Build Coastguard Worker 1221*fb1b10abSAndroid Build Coastguard Worker psrldq xmm5, 1 1222*fb1b10abSAndroid Build Coastguard Worker lea rsp, [rsp + 16] ; next line 1223*fb1b10abSAndroid Build Coastguard Worker 1224*fb1b10abSAndroid Build Coastguard Worker punpcklbw xmm3, xmm5 ; 00 01 01 02 02 03 03 04 04 05 05 06 06 07 07 08 1225*fb1b10abSAndroid Build Coastguard Worker pmaddubsw xmm3, xmm0 ; 00 02 04 06 08 10 12 14 1226*fb1b10abSAndroid Build Coastguard Worker 1227*fb1b10abSAndroid Build Coastguard Worker paddw xmm3, [GLOBAL(rd)] ; xmm3 += round value 1228*fb1b10abSAndroid Build Coastguard Worker psraw xmm3, VP8_FILTER_SHIFT ; xmm3 /= 128 1229*fb1b10abSAndroid Build Coastguard Worker 1230*fb1b10abSAndroid Build Coastguard Worker movdqa xmm7, xmm3 1231*fb1b10abSAndroid Build Coastguard Worker packuswb xmm7, xmm7 ; 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 1232*fb1b10abSAndroid Build Coastguard Worker 1233*fb1b10abSAndroid Build Coastguard Worker.next_row: 1234*fb1b10abSAndroid Build Coastguard Worker movdqa xmm6, [rsp] ; 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 1235*fb1b10abSAndroid Build Coastguard Worker lea rsp, [rsp + 16] ; next line 1236*fb1b10abSAndroid Build Coastguard Worker 1237*fb1b10abSAndroid Build Coastguard Worker movdqa xmm5, xmm6 1238*fb1b10abSAndroid Build Coastguard Worker 1239*fb1b10abSAndroid Build Coastguard Worker psrldq xmm5, 1 1240*fb1b10abSAndroid Build Coastguard Worker 1241*fb1b10abSAndroid Build Coastguard Worker punpcklbw xmm6, xmm5 1242*fb1b10abSAndroid Build Coastguard Worker pmaddubsw xmm6, xmm0 1243*fb1b10abSAndroid Build Coastguard Worker 1244*fb1b10abSAndroid Build Coastguard Worker paddw xmm6, [GLOBAL(rd)] ; xmm6 += round value 1245*fb1b10abSAndroid Build Coastguard Worker psraw xmm6, VP8_FILTER_SHIFT ; xmm6 /= 128 1246*fb1b10abSAndroid Build Coastguard Worker 1247*fb1b10abSAndroid Build Coastguard Worker packuswb xmm6, xmm6 1248*fb1b10abSAndroid Build Coastguard Worker 1249*fb1b10abSAndroid Build Coastguard Worker punpcklbw xmm7, xmm6 1250*fb1b10abSAndroid Build Coastguard Worker pmaddubsw xmm7, xmm1 1251*fb1b10abSAndroid Build Coastguard Worker 1252*fb1b10abSAndroid Build Coastguard Worker paddw xmm7, [GLOBAL(rd)] ; xmm7 += round value 1253*fb1b10abSAndroid Build Coastguard Worker psraw xmm7, VP8_FILTER_SHIFT ; xmm7 /= 128 1254*fb1b10abSAndroid Build Coastguard Worker 1255*fb1b10abSAndroid Build Coastguard Worker packuswb xmm7, xmm7 1256*fb1b10abSAndroid Build Coastguard Worker 1257*fb1b10abSAndroid Build Coastguard Worker movq [rdi], xmm7 ; store the results in the destination 1258*fb1b10abSAndroid Build Coastguard Worker lea rdi, [rdi + rdx] 1259*fb1b10abSAndroid Build Coastguard Worker 1260*fb1b10abSAndroid Build Coastguard Worker movdqa xmm7, xmm6 1261*fb1b10abSAndroid Build Coastguard Worker 1262*fb1b10abSAndroid Build Coastguard Worker cmp rdi, rcx 1263*fb1b10abSAndroid Build Coastguard Worker jne .next_row 1264*fb1b10abSAndroid Build Coastguard Worker 1265*fb1b10abSAndroid Build Coastguard Worker jmp .done8x8 1266*fb1b10abSAndroid Build Coastguard Worker 1267*fb1b10abSAndroid Build Coastguard Worker.b8x8_sp_only: 1268*fb1b10abSAndroid Build Coastguard Worker movsxd rax, dword ptr arg(3) ; yoffset 1269*fb1b10abSAndroid Build Coastguard Worker shl rax, 4 1270*fb1b10abSAndroid Build Coastguard Worker lea rax, [rax + rcx] ; VFilter 1271*fb1b10abSAndroid Build Coastguard Worker 1272*fb1b10abSAndroid Build Coastguard Worker mov rdi, arg(4) ;dst_ptr 1273*fb1b10abSAndroid Build Coastguard Worker movsxd rdx, dword ptr arg(5) ; dst_pitch 1274*fb1b10abSAndroid Build Coastguard Worker 1275*fb1b10abSAndroid Build Coastguard Worker movdqa xmm0, [rax] ; VFilter 1276*fb1b10abSAndroid Build Coastguard Worker 1277*fb1b10abSAndroid Build Coastguard Worker movq xmm1, XMMWORD PTR [rsp] 1278*fb1b10abSAndroid Build Coastguard Worker movq xmm2, XMMWORD PTR [rsp+16] 1279*fb1b10abSAndroid Build Coastguard Worker 1280*fb1b10abSAndroid Build Coastguard Worker movq xmm3, XMMWORD PTR [rsp+32] 1281*fb1b10abSAndroid Build Coastguard Worker punpcklbw xmm1, xmm2 1282*fb1b10abSAndroid Build Coastguard Worker 1283*fb1b10abSAndroid Build Coastguard Worker movq xmm4, XMMWORD PTR [rsp+48] 1284*fb1b10abSAndroid Build Coastguard Worker punpcklbw xmm2, xmm3 1285*fb1b10abSAndroid Build Coastguard Worker 1286*fb1b10abSAndroid Build Coastguard Worker movq xmm5, XMMWORD PTR [rsp+64] 1287*fb1b10abSAndroid Build Coastguard Worker punpcklbw xmm3, xmm4 1288*fb1b10abSAndroid Build Coastguard Worker 1289*fb1b10abSAndroid Build Coastguard Worker movq xmm6, XMMWORD PTR [rsp+80] 1290*fb1b10abSAndroid Build Coastguard Worker punpcklbw xmm4, xmm5 1291*fb1b10abSAndroid Build Coastguard Worker 1292*fb1b10abSAndroid Build Coastguard Worker movq xmm7, XMMWORD PTR [rsp+96] 1293*fb1b10abSAndroid Build Coastguard Worker punpcklbw xmm5, xmm6 1294*fb1b10abSAndroid Build Coastguard Worker 1295*fb1b10abSAndroid Build Coastguard Worker ; Because the source register (xmm0) is always treated as signed by 1296*fb1b10abSAndroid Build Coastguard Worker ; pmaddubsw, the constant '128' is treated as '-128'. 1297*fb1b10abSAndroid Build Coastguard Worker pmaddubsw xmm1, xmm0 1298*fb1b10abSAndroid Build Coastguard Worker pmaddubsw xmm2, xmm0 1299*fb1b10abSAndroid Build Coastguard Worker 1300*fb1b10abSAndroid Build Coastguard Worker pmaddubsw xmm3, xmm0 1301*fb1b10abSAndroid Build Coastguard Worker pmaddubsw xmm4, xmm0 1302*fb1b10abSAndroid Build Coastguard Worker 1303*fb1b10abSAndroid Build Coastguard Worker pmaddubsw xmm5, xmm0 1304*fb1b10abSAndroid Build Coastguard Worker punpcklbw xmm6, xmm7 1305*fb1b10abSAndroid Build Coastguard Worker 1306*fb1b10abSAndroid Build Coastguard Worker pmaddubsw xmm6, xmm0 1307*fb1b10abSAndroid Build Coastguard Worker paddw xmm1, [GLOBAL(rd)] 1308*fb1b10abSAndroid Build Coastguard Worker 1309*fb1b10abSAndroid Build Coastguard Worker paddw xmm2, [GLOBAL(rd)] 1310*fb1b10abSAndroid Build Coastguard Worker psraw xmm1, VP8_FILTER_SHIFT 1311*fb1b10abSAndroid Build Coastguard Worker 1312*fb1b10abSAndroid Build Coastguard Worker paddw xmm3, [GLOBAL(rd)] 1313*fb1b10abSAndroid Build Coastguard Worker psraw xmm2, VP8_FILTER_SHIFT 1314*fb1b10abSAndroid Build Coastguard Worker 1315*fb1b10abSAndroid Build Coastguard Worker paddw xmm4, [GLOBAL(rd)] 1316*fb1b10abSAndroid Build Coastguard Worker psraw xmm3, VP8_FILTER_SHIFT 1317*fb1b10abSAndroid Build Coastguard Worker 1318*fb1b10abSAndroid Build Coastguard Worker paddw xmm5, [GLOBAL(rd)] 1319*fb1b10abSAndroid Build Coastguard Worker psraw xmm4, VP8_FILTER_SHIFT 1320*fb1b10abSAndroid Build Coastguard Worker 1321*fb1b10abSAndroid Build Coastguard Worker paddw xmm6, [GLOBAL(rd)] 1322*fb1b10abSAndroid Build Coastguard Worker psraw xmm5, VP8_FILTER_SHIFT 1323*fb1b10abSAndroid Build Coastguard Worker 1324*fb1b10abSAndroid Build Coastguard Worker psraw xmm6, VP8_FILTER_SHIFT 1325*fb1b10abSAndroid Build Coastguard Worker 1326*fb1b10abSAndroid Build Coastguard Worker ; Having multiplied everything by '-128' and obtained negative 1327*fb1b10abSAndroid Build Coastguard Worker ; numbers, the unsigned saturation truncates those values to 0, 1328*fb1b10abSAndroid Build Coastguard Worker ; resulting in incorrect handling of xoffset == 0 && yoffset == 0 1329*fb1b10abSAndroid Build Coastguard Worker packuswb xmm1, xmm1 1330*fb1b10abSAndroid Build Coastguard Worker 1331*fb1b10abSAndroid Build Coastguard Worker packuswb xmm2, xmm2 1332*fb1b10abSAndroid Build Coastguard Worker movq [rdi], xmm1 1333*fb1b10abSAndroid Build Coastguard Worker 1334*fb1b10abSAndroid Build Coastguard Worker packuswb xmm3, xmm3 1335*fb1b10abSAndroid Build Coastguard Worker movq [rdi+rdx], xmm2 1336*fb1b10abSAndroid Build Coastguard Worker 1337*fb1b10abSAndroid Build Coastguard Worker packuswb xmm4, xmm4 1338*fb1b10abSAndroid Build Coastguard Worker movq xmm1, XMMWORD PTR [rsp+112] 1339*fb1b10abSAndroid Build Coastguard Worker 1340*fb1b10abSAndroid Build Coastguard Worker lea rdi, [rdi + 2*rdx] 1341*fb1b10abSAndroid Build Coastguard Worker movq xmm2, XMMWORD PTR [rsp+128] 1342*fb1b10abSAndroid Build Coastguard Worker 1343*fb1b10abSAndroid Build Coastguard Worker packuswb xmm5, xmm5 1344*fb1b10abSAndroid Build Coastguard Worker movq [rdi], xmm3 1345*fb1b10abSAndroid Build Coastguard Worker 1346*fb1b10abSAndroid Build Coastguard Worker packuswb xmm6, xmm6 1347*fb1b10abSAndroid Build Coastguard Worker movq [rdi+rdx], xmm4 1348*fb1b10abSAndroid Build Coastguard Worker 1349*fb1b10abSAndroid Build Coastguard Worker lea rdi, [rdi + 2*rdx] 1350*fb1b10abSAndroid Build Coastguard Worker punpcklbw xmm7, xmm1 1351*fb1b10abSAndroid Build Coastguard Worker 1352*fb1b10abSAndroid Build Coastguard Worker movq [rdi], xmm5 1353*fb1b10abSAndroid Build Coastguard Worker pmaddubsw xmm7, xmm0 1354*fb1b10abSAndroid Build Coastguard Worker 1355*fb1b10abSAndroid Build Coastguard Worker movq [rdi+rdx], xmm6 1356*fb1b10abSAndroid Build Coastguard Worker punpcklbw xmm1, xmm2 1357*fb1b10abSAndroid Build Coastguard Worker 1358*fb1b10abSAndroid Build Coastguard Worker pmaddubsw xmm1, xmm0 1359*fb1b10abSAndroid Build Coastguard Worker paddw xmm7, [GLOBAL(rd)] 1360*fb1b10abSAndroid Build Coastguard Worker 1361*fb1b10abSAndroid Build Coastguard Worker psraw xmm7, VP8_FILTER_SHIFT 1362*fb1b10abSAndroid Build Coastguard Worker paddw xmm1, [GLOBAL(rd)] 1363*fb1b10abSAndroid Build Coastguard Worker 1364*fb1b10abSAndroid Build Coastguard Worker psraw xmm1, VP8_FILTER_SHIFT 1365*fb1b10abSAndroid Build Coastguard Worker packuswb xmm7, xmm7 1366*fb1b10abSAndroid Build Coastguard Worker 1367*fb1b10abSAndroid Build Coastguard Worker packuswb xmm1, xmm1 1368*fb1b10abSAndroid Build Coastguard Worker lea rdi, [rdi + 2*rdx] 1369*fb1b10abSAndroid Build Coastguard Worker 1370*fb1b10abSAndroid Build Coastguard Worker movq [rdi], xmm7 1371*fb1b10abSAndroid Build Coastguard Worker 1372*fb1b10abSAndroid Build Coastguard Worker movq [rdi+rdx], xmm1 1373*fb1b10abSAndroid Build Coastguard Worker lea rsp, [rsp + 144] 1374*fb1b10abSAndroid Build Coastguard Worker 1375*fb1b10abSAndroid Build Coastguard Worker jmp .done8x8 1376*fb1b10abSAndroid Build Coastguard Worker 1377*fb1b10abSAndroid Build Coastguard Worker.b8x8_fp_only: 1378*fb1b10abSAndroid Build Coastguard Worker lea rcx, [rdi+rdx*8] 1379*fb1b10abSAndroid Build Coastguard Worker 1380*fb1b10abSAndroid Build Coastguard Worker.next_row_fp: 1381*fb1b10abSAndroid Build Coastguard Worker movdqa xmm1, XMMWORD PTR [rsp] 1382*fb1b10abSAndroid Build Coastguard Worker movdqa xmm3, XMMWORD PTR [rsp+16] 1383*fb1b10abSAndroid Build Coastguard Worker 1384*fb1b10abSAndroid Build Coastguard Worker movdqa xmm2, xmm1 1385*fb1b10abSAndroid Build Coastguard Worker movdqa xmm5, XMMWORD PTR [rsp+32] 1386*fb1b10abSAndroid Build Coastguard Worker 1387*fb1b10abSAndroid Build Coastguard Worker psrldq xmm2, 1 1388*fb1b10abSAndroid Build Coastguard Worker movdqa xmm7, XMMWORD PTR [rsp+48] 1389*fb1b10abSAndroid Build Coastguard Worker 1390*fb1b10abSAndroid Build Coastguard Worker movdqa xmm4, xmm3 1391*fb1b10abSAndroid Build Coastguard Worker psrldq xmm4, 1 1392*fb1b10abSAndroid Build Coastguard Worker 1393*fb1b10abSAndroid Build Coastguard Worker movdqa xmm6, xmm5 1394*fb1b10abSAndroid Build Coastguard Worker psrldq xmm6, 1 1395*fb1b10abSAndroid Build Coastguard Worker 1396*fb1b10abSAndroid Build Coastguard Worker punpcklbw xmm1, xmm2 1397*fb1b10abSAndroid Build Coastguard Worker pmaddubsw xmm1, xmm0 1398*fb1b10abSAndroid Build Coastguard Worker 1399*fb1b10abSAndroid Build Coastguard Worker punpcklbw xmm3, xmm4 1400*fb1b10abSAndroid Build Coastguard Worker pmaddubsw xmm3, xmm0 1401*fb1b10abSAndroid Build Coastguard Worker 1402*fb1b10abSAndroid Build Coastguard Worker punpcklbw xmm5, xmm6 1403*fb1b10abSAndroid Build Coastguard Worker pmaddubsw xmm5, xmm0 1404*fb1b10abSAndroid Build Coastguard Worker 1405*fb1b10abSAndroid Build Coastguard Worker movdqa xmm2, xmm7 1406*fb1b10abSAndroid Build Coastguard Worker psrldq xmm2, 1 1407*fb1b10abSAndroid Build Coastguard Worker 1408*fb1b10abSAndroid Build Coastguard Worker punpcklbw xmm7, xmm2 1409*fb1b10abSAndroid Build Coastguard Worker pmaddubsw xmm7, xmm0 1410*fb1b10abSAndroid Build Coastguard Worker 1411*fb1b10abSAndroid Build Coastguard Worker paddw xmm1, [GLOBAL(rd)] 1412*fb1b10abSAndroid Build Coastguard Worker psraw xmm1, VP8_FILTER_SHIFT 1413*fb1b10abSAndroid Build Coastguard Worker 1414*fb1b10abSAndroid Build Coastguard Worker paddw xmm3, [GLOBAL(rd)] 1415*fb1b10abSAndroid Build Coastguard Worker psraw xmm3, VP8_FILTER_SHIFT 1416*fb1b10abSAndroid Build Coastguard Worker 1417*fb1b10abSAndroid Build Coastguard Worker paddw xmm5, [GLOBAL(rd)] 1418*fb1b10abSAndroid Build Coastguard Worker psraw xmm5, VP8_FILTER_SHIFT 1419*fb1b10abSAndroid Build Coastguard Worker 1420*fb1b10abSAndroid Build Coastguard Worker paddw xmm7, [GLOBAL(rd)] 1421*fb1b10abSAndroid Build Coastguard Worker psraw xmm7, VP8_FILTER_SHIFT 1422*fb1b10abSAndroid Build Coastguard Worker 1423*fb1b10abSAndroid Build Coastguard Worker packuswb xmm1, xmm1 1424*fb1b10abSAndroid Build Coastguard Worker packuswb xmm3, xmm3 1425*fb1b10abSAndroid Build Coastguard Worker 1426*fb1b10abSAndroid Build Coastguard Worker packuswb xmm5, xmm5 1427*fb1b10abSAndroid Build Coastguard Worker movq [rdi], xmm1 1428*fb1b10abSAndroid Build Coastguard Worker 1429*fb1b10abSAndroid Build Coastguard Worker packuswb xmm7, xmm7 1430*fb1b10abSAndroid Build Coastguard Worker movq [rdi+rdx], xmm3 1431*fb1b10abSAndroid Build Coastguard Worker 1432*fb1b10abSAndroid Build Coastguard Worker lea rdi, [rdi + 2*rdx] 1433*fb1b10abSAndroid Build Coastguard Worker movq [rdi], xmm5 1434*fb1b10abSAndroid Build Coastguard Worker 1435*fb1b10abSAndroid Build Coastguard Worker lea rsp, [rsp + 4*16] 1436*fb1b10abSAndroid Build Coastguard Worker movq [rdi+rdx], xmm7 1437*fb1b10abSAndroid Build Coastguard Worker 1438*fb1b10abSAndroid Build Coastguard Worker lea rdi, [rdi + 2*rdx] 1439*fb1b10abSAndroid Build Coastguard Worker cmp rdi, rcx 1440*fb1b10abSAndroid Build Coastguard Worker 1441*fb1b10abSAndroid Build Coastguard Worker jne .next_row_fp 1442*fb1b10abSAndroid Build Coastguard Worker 1443*fb1b10abSAndroid Build Coastguard Worker lea rsp, [rsp + 16] 1444*fb1b10abSAndroid Build Coastguard Worker 1445*fb1b10abSAndroid Build Coastguard Worker.done8x8: 1446*fb1b10abSAndroid Build Coastguard Worker ;add rsp, 144 1447*fb1b10abSAndroid Build Coastguard Worker pop rsp 1448*fb1b10abSAndroid Build Coastguard Worker ; begin epilog 1449*fb1b10abSAndroid Build Coastguard Worker pop rdi 1450*fb1b10abSAndroid Build Coastguard Worker pop rsi 1451*fb1b10abSAndroid Build Coastguard Worker RESTORE_GOT 1452*fb1b10abSAndroid Build Coastguard Worker RESTORE_XMM 1453*fb1b10abSAndroid Build Coastguard Worker UNSHADOW_ARGS 1454*fb1b10abSAndroid Build Coastguard Worker pop rbp 1455*fb1b10abSAndroid Build Coastguard Worker ret 1456*fb1b10abSAndroid Build Coastguard Worker 1457*fb1b10abSAndroid Build Coastguard WorkerSECTION_RODATA 1458*fb1b10abSAndroid Build Coastguard Workeralign 16 1459*fb1b10abSAndroid Build Coastguard Workershuf1b: 1460*fb1b10abSAndroid Build Coastguard Worker db 0, 5, 1, 6, 2, 7, 3, 8, 4, 9, 5, 10, 6, 11, 7, 12 1461*fb1b10abSAndroid Build Coastguard Workershuf2b: 1462*fb1b10abSAndroid Build Coastguard Worker db 2, 4, 3, 5, 4, 6, 5, 7, 6, 8, 7, 9, 8, 10, 9, 11 1463*fb1b10abSAndroid Build Coastguard Workershuf3b: 1464*fb1b10abSAndroid Build Coastguard Worker db 1, 3, 2, 4, 3, 5, 4, 6, 5, 7, 6, 8, 7, 9, 8, 10 1465*fb1b10abSAndroid Build Coastguard Worker 1466*fb1b10abSAndroid Build Coastguard Workeralign 16 1467*fb1b10abSAndroid Build Coastguard Workershuf2bfrom1: 1468*fb1b10abSAndroid Build Coastguard Worker db 4, 8, 6, 1, 8, 3, 1, 5, 3, 7, 5, 9, 7,11, 9,13 1469*fb1b10abSAndroid Build Coastguard Workeralign 16 1470*fb1b10abSAndroid Build Coastguard Workershuf3bfrom1: 1471*fb1b10abSAndroid Build Coastguard Worker db 2, 6, 4, 8, 6, 1, 8, 3, 1, 5, 3, 7, 5, 9, 7,11 1472*fb1b10abSAndroid Build Coastguard Worker 1473*fb1b10abSAndroid Build Coastguard Workeralign 16 1474*fb1b10abSAndroid Build Coastguard Workerrd: 1475*fb1b10abSAndroid Build Coastguard Worker times 8 dw 0x40 1476*fb1b10abSAndroid Build Coastguard Worker 1477*fb1b10abSAndroid Build Coastguard Workeralign 16 1478*fb1b10abSAndroid Build Coastguard Workerk0_k5: 1479*fb1b10abSAndroid Build Coastguard Worker times 8 db 0, 0 ;placeholder 1480*fb1b10abSAndroid Build Coastguard Worker times 8 db 0, 0 1481*fb1b10abSAndroid Build Coastguard Worker times 8 db 2, 1 1482*fb1b10abSAndroid Build Coastguard Worker times 8 db 0, 0 1483*fb1b10abSAndroid Build Coastguard Worker times 8 db 3, 3 1484*fb1b10abSAndroid Build Coastguard Worker times 8 db 0, 0 1485*fb1b10abSAndroid Build Coastguard Worker times 8 db 1, 2 1486*fb1b10abSAndroid Build Coastguard Worker times 8 db 0, 0 1487*fb1b10abSAndroid Build Coastguard Workerk1_k3: 1488*fb1b10abSAndroid Build Coastguard Worker times 8 db 0, 0 ;placeholder 1489*fb1b10abSAndroid Build Coastguard Worker times 8 db -6, 12 1490*fb1b10abSAndroid Build Coastguard Worker times 8 db -11, 36 1491*fb1b10abSAndroid Build Coastguard Worker times 8 db -9, 50 1492*fb1b10abSAndroid Build Coastguard Worker times 8 db -16, 77 1493*fb1b10abSAndroid Build Coastguard Worker times 8 db -6, 93 1494*fb1b10abSAndroid Build Coastguard Worker times 8 db -8, 108 1495*fb1b10abSAndroid Build Coastguard Worker times 8 db -1, 123 1496*fb1b10abSAndroid Build Coastguard Workerk2_k4: 1497*fb1b10abSAndroid Build Coastguard Worker times 8 db 128, 0 ;placeholder 1498*fb1b10abSAndroid Build Coastguard Worker times 8 db 123, -1 1499*fb1b10abSAndroid Build Coastguard Worker times 8 db 108, -8 1500*fb1b10abSAndroid Build Coastguard Worker times 8 db 93, -6 1501*fb1b10abSAndroid Build Coastguard Worker times 8 db 77, -16 1502*fb1b10abSAndroid Build Coastguard Worker times 8 db 50, -9 1503*fb1b10abSAndroid Build Coastguard Worker times 8 db 36, -11 1504*fb1b10abSAndroid Build Coastguard Worker times 8 db 12, -6 1505*fb1b10abSAndroid Build Coastguard Workeralign 16 1506*fb1b10abSAndroid Build Coastguard Workervp8_bilinear_filters_ssse3: 1507*fb1b10abSAndroid Build Coastguard Worker times 8 db 128, 0 1508*fb1b10abSAndroid Build Coastguard Worker times 8 db 112, 16 1509*fb1b10abSAndroid Build Coastguard Worker times 8 db 96, 32 1510*fb1b10abSAndroid Build Coastguard Worker times 8 db 80, 48 1511*fb1b10abSAndroid Build Coastguard Worker times 8 db 64, 64 1512*fb1b10abSAndroid Build Coastguard Worker times 8 db 48, 80 1513*fb1b10abSAndroid Build Coastguard Worker times 8 db 32, 96 1514*fb1b10abSAndroid Build Coastguard Worker times 8 db 16, 112 1515*fb1b10abSAndroid Build Coastguard Worker 1516