1*fb1b10abSAndroid Build Coastguard Worker; 2*fb1b10abSAndroid Build Coastguard Worker; Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3*fb1b10abSAndroid Build Coastguard Worker; 4*fb1b10abSAndroid Build Coastguard Worker; Use of this source code is governed by a BSD-style license 5*fb1b10abSAndroid Build Coastguard Worker; that can be found in the LICENSE file in the root of the source 6*fb1b10abSAndroid Build Coastguard Worker; tree. An additional intellectual property rights grant can be found 7*fb1b10abSAndroid Build Coastguard Worker; in the file PATENTS. All contributing project authors may 8*fb1b10abSAndroid Build Coastguard Worker; be found in the AUTHORS file in the root of the source tree. 9*fb1b10abSAndroid Build Coastguard Worker; 10*fb1b10abSAndroid Build Coastguard Worker 11*fb1b10abSAndroid Build Coastguard Worker 12*fb1b10abSAndroid Build Coastguard Worker%include "vpx_ports/x86_abi_support.asm" 13*fb1b10abSAndroid Build Coastguard Worker 14*fb1b10abSAndroid Build Coastguard WorkerSECTION .text 15*fb1b10abSAndroid Build Coastguard Worker 16*fb1b10abSAndroid Build Coastguard Worker;void vp8_dequantize_b_impl_mmx(short *sq, short *dq, short *q) 17*fb1b10abSAndroid Build Coastguard Workerglobalsym(vp8_dequantize_b_impl_mmx) 18*fb1b10abSAndroid Build Coastguard Workersym(vp8_dequantize_b_impl_mmx): 19*fb1b10abSAndroid Build Coastguard Worker push rbp 20*fb1b10abSAndroid Build Coastguard Worker mov rbp, rsp 21*fb1b10abSAndroid Build Coastguard Worker SHADOW_ARGS_TO_STACK 3 22*fb1b10abSAndroid Build Coastguard Worker push rsi 23*fb1b10abSAndroid Build Coastguard Worker push rdi 24*fb1b10abSAndroid Build Coastguard Worker ; end prolog 25*fb1b10abSAndroid Build Coastguard Worker 26*fb1b10abSAndroid Build Coastguard Worker mov rsi, arg(0) ;sq 27*fb1b10abSAndroid Build Coastguard Worker mov rdi, arg(1) ;dq 28*fb1b10abSAndroid Build Coastguard Worker mov rax, arg(2) ;q 29*fb1b10abSAndroid Build Coastguard Worker 30*fb1b10abSAndroid Build Coastguard Worker movq mm1, [rsi] 31*fb1b10abSAndroid Build Coastguard Worker pmullw mm1, [rax+0] ; mm4 *= kernel 0 modifiers. 32*fb1b10abSAndroid Build Coastguard Worker movq [rdi], mm1 33*fb1b10abSAndroid Build Coastguard Worker 34*fb1b10abSAndroid Build Coastguard Worker movq mm1, [rsi+8] 35*fb1b10abSAndroid Build Coastguard Worker pmullw mm1, [rax+8] ; mm4 *= kernel 0 modifiers. 36*fb1b10abSAndroid Build Coastguard Worker movq [rdi+8], mm1 37*fb1b10abSAndroid Build Coastguard Worker 38*fb1b10abSAndroid Build Coastguard Worker movq mm1, [rsi+16] 39*fb1b10abSAndroid Build Coastguard Worker pmullw mm1, [rax+16] ; mm4 *= kernel 0 modifiers. 40*fb1b10abSAndroid Build Coastguard Worker movq [rdi+16], mm1 41*fb1b10abSAndroid Build Coastguard Worker 42*fb1b10abSAndroid Build Coastguard Worker movq mm1, [rsi+24] 43*fb1b10abSAndroid Build Coastguard Worker pmullw mm1, [rax+24] ; mm4 *= kernel 0 modifiers. 44*fb1b10abSAndroid Build Coastguard Worker movq [rdi+24], mm1 45*fb1b10abSAndroid Build Coastguard Worker 46*fb1b10abSAndroid Build Coastguard Worker ; begin epilog 47*fb1b10abSAndroid Build Coastguard Worker pop rdi 48*fb1b10abSAndroid Build Coastguard Worker pop rsi 49*fb1b10abSAndroid Build Coastguard Worker UNSHADOW_ARGS 50*fb1b10abSAndroid Build Coastguard Worker pop rbp 51*fb1b10abSAndroid Build Coastguard Worker ret 52*fb1b10abSAndroid Build Coastguard Worker 53*fb1b10abSAndroid Build Coastguard Worker 54*fb1b10abSAndroid Build Coastguard Worker;void dequant_idct_add_mmx( 55*fb1b10abSAndroid Build Coastguard Worker;short *input, 0 56*fb1b10abSAndroid Build Coastguard Worker;short *dq, 1 57*fb1b10abSAndroid Build Coastguard Worker;unsigned char *dest, 2 58*fb1b10abSAndroid Build Coastguard Worker;int stride) 3 59*fb1b10abSAndroid Build Coastguard Workerglobalsym(vp8_dequant_idct_add_mmx) 60*fb1b10abSAndroid Build Coastguard Workersym(vp8_dequant_idct_add_mmx): 61*fb1b10abSAndroid Build Coastguard Worker push rbp 62*fb1b10abSAndroid Build Coastguard Worker mov rbp, rsp 63*fb1b10abSAndroid Build Coastguard Worker SHADOW_ARGS_TO_STACK 4 64*fb1b10abSAndroid Build Coastguard Worker GET_GOT rbx 65*fb1b10abSAndroid Build Coastguard Worker push rdi 66*fb1b10abSAndroid Build Coastguard Worker ; end prolog 67*fb1b10abSAndroid Build Coastguard Worker 68*fb1b10abSAndroid Build Coastguard Worker mov rax, arg(0) ;input 69*fb1b10abSAndroid Build Coastguard Worker mov rdx, arg(1) ;dq 70*fb1b10abSAndroid Build Coastguard Worker 71*fb1b10abSAndroid Build Coastguard Worker 72*fb1b10abSAndroid Build Coastguard Worker movq mm0, [rax ] 73*fb1b10abSAndroid Build Coastguard Worker pmullw mm0, [rdx] 74*fb1b10abSAndroid Build Coastguard Worker 75*fb1b10abSAndroid Build Coastguard Worker movq mm1, [rax +8] 76*fb1b10abSAndroid Build Coastguard Worker pmullw mm1, [rdx +8] 77*fb1b10abSAndroid Build Coastguard Worker 78*fb1b10abSAndroid Build Coastguard Worker movq mm2, [rax+16] 79*fb1b10abSAndroid Build Coastguard Worker pmullw mm2, [rdx+16] 80*fb1b10abSAndroid Build Coastguard Worker 81*fb1b10abSAndroid Build Coastguard Worker movq mm3, [rax+24] 82*fb1b10abSAndroid Build Coastguard Worker pmullw mm3, [rdx+24] 83*fb1b10abSAndroid Build Coastguard Worker 84*fb1b10abSAndroid Build Coastguard Worker mov rdx, arg(2) ;dest 85*fb1b10abSAndroid Build Coastguard Worker 86*fb1b10abSAndroid Build Coastguard Worker pxor mm7, mm7 87*fb1b10abSAndroid Build Coastguard Worker 88*fb1b10abSAndroid Build Coastguard Worker 89*fb1b10abSAndroid Build Coastguard Worker movq [rax], mm7 90*fb1b10abSAndroid Build Coastguard Worker movq [rax+8], mm7 91*fb1b10abSAndroid Build Coastguard Worker 92*fb1b10abSAndroid Build Coastguard Worker movq [rax+16],mm7 93*fb1b10abSAndroid Build Coastguard Worker movq [rax+24],mm7 94*fb1b10abSAndroid Build Coastguard Worker 95*fb1b10abSAndroid Build Coastguard Worker 96*fb1b10abSAndroid Build Coastguard Worker movsxd rdi, dword ptr arg(3) ;stride 97*fb1b10abSAndroid Build Coastguard Worker 98*fb1b10abSAndroid Build Coastguard Worker psubw mm0, mm2 ; b1= 0-2 99*fb1b10abSAndroid Build Coastguard Worker paddw mm2, mm2 ; 100*fb1b10abSAndroid Build Coastguard Worker 101*fb1b10abSAndroid Build Coastguard Worker movq mm5, mm1 102*fb1b10abSAndroid Build Coastguard Worker paddw mm2, mm0 ; a1 =0+2 103*fb1b10abSAndroid Build Coastguard Worker 104*fb1b10abSAndroid Build Coastguard Worker pmulhw mm5, [GLOBAL(x_s1sqr2)]; 105*fb1b10abSAndroid Build Coastguard Worker paddw mm5, mm1 ; ip1 * sin(pi/8) * sqrt(2) 106*fb1b10abSAndroid Build Coastguard Worker 107*fb1b10abSAndroid Build Coastguard Worker movq mm7, mm3 ; 108*fb1b10abSAndroid Build Coastguard Worker pmulhw mm7, [GLOBAL(x_c1sqr2less1)]; 109*fb1b10abSAndroid Build Coastguard Worker 110*fb1b10abSAndroid Build Coastguard Worker paddw mm7, mm3 ; ip3 * cos(pi/8) * sqrt(2) 111*fb1b10abSAndroid Build Coastguard Worker psubw mm7, mm5 ; c1 112*fb1b10abSAndroid Build Coastguard Worker 113*fb1b10abSAndroid Build Coastguard Worker movq mm5, mm1 114*fb1b10abSAndroid Build Coastguard Worker movq mm4, mm3 115*fb1b10abSAndroid Build Coastguard Worker 116*fb1b10abSAndroid Build Coastguard Worker pmulhw mm5, [GLOBAL(x_c1sqr2less1)] 117*fb1b10abSAndroid Build Coastguard Worker paddw mm5, mm1 118*fb1b10abSAndroid Build Coastguard Worker 119*fb1b10abSAndroid Build Coastguard Worker pmulhw mm3, [GLOBAL(x_s1sqr2)] 120*fb1b10abSAndroid Build Coastguard Worker paddw mm3, mm4 121*fb1b10abSAndroid Build Coastguard Worker 122*fb1b10abSAndroid Build Coastguard Worker paddw mm3, mm5 ; d1 123*fb1b10abSAndroid Build Coastguard Worker movq mm6, mm2 ; a1 124*fb1b10abSAndroid Build Coastguard Worker 125*fb1b10abSAndroid Build Coastguard Worker movq mm4, mm0 ; b1 126*fb1b10abSAndroid Build Coastguard Worker paddw mm2, mm3 ;0 127*fb1b10abSAndroid Build Coastguard Worker 128*fb1b10abSAndroid Build Coastguard Worker paddw mm4, mm7 ;1 129*fb1b10abSAndroid Build Coastguard Worker psubw mm0, mm7 ;2 130*fb1b10abSAndroid Build Coastguard Worker 131*fb1b10abSAndroid Build Coastguard Worker psubw mm6, mm3 ;3 132*fb1b10abSAndroid Build Coastguard Worker 133*fb1b10abSAndroid Build Coastguard Worker movq mm1, mm2 ; 03 02 01 00 134*fb1b10abSAndroid Build Coastguard Worker movq mm3, mm4 ; 23 22 21 20 135*fb1b10abSAndroid Build Coastguard Worker 136*fb1b10abSAndroid Build Coastguard Worker punpcklwd mm1, mm0 ; 11 01 10 00 137*fb1b10abSAndroid Build Coastguard Worker punpckhwd mm2, mm0 ; 13 03 12 02 138*fb1b10abSAndroid Build Coastguard Worker 139*fb1b10abSAndroid Build Coastguard Worker punpcklwd mm3, mm6 ; 31 21 30 20 140*fb1b10abSAndroid Build Coastguard Worker punpckhwd mm4, mm6 ; 33 23 32 22 141*fb1b10abSAndroid Build Coastguard Worker 142*fb1b10abSAndroid Build Coastguard Worker movq mm0, mm1 ; 11 01 10 00 143*fb1b10abSAndroid Build Coastguard Worker movq mm5, mm2 ; 13 03 12 02 144*fb1b10abSAndroid Build Coastguard Worker 145*fb1b10abSAndroid Build Coastguard Worker punpckldq mm0, mm3 ; 30 20 10 00 146*fb1b10abSAndroid Build Coastguard Worker punpckhdq mm1, mm3 ; 31 21 11 01 147*fb1b10abSAndroid Build Coastguard Worker 148*fb1b10abSAndroid Build Coastguard Worker punpckldq mm2, mm4 ; 32 22 12 02 149*fb1b10abSAndroid Build Coastguard Worker punpckhdq mm5, mm4 ; 33 23 13 03 150*fb1b10abSAndroid Build Coastguard Worker 151*fb1b10abSAndroid Build Coastguard Worker movq mm3, mm5 ; 33 23 13 03 152*fb1b10abSAndroid Build Coastguard Worker 153*fb1b10abSAndroid Build Coastguard Worker psubw mm0, mm2 ; b1= 0-2 154*fb1b10abSAndroid Build Coastguard Worker paddw mm2, mm2 ; 155*fb1b10abSAndroid Build Coastguard Worker 156*fb1b10abSAndroid Build Coastguard Worker movq mm5, mm1 157*fb1b10abSAndroid Build Coastguard Worker paddw mm2, mm0 ; a1 =0+2 158*fb1b10abSAndroid Build Coastguard Worker 159*fb1b10abSAndroid Build Coastguard Worker pmulhw mm5, [GLOBAL(x_s1sqr2)]; 160*fb1b10abSAndroid Build Coastguard Worker paddw mm5, mm1 ; ip1 * sin(pi/8) * sqrt(2) 161*fb1b10abSAndroid Build Coastguard Worker 162*fb1b10abSAndroid Build Coastguard Worker movq mm7, mm3 ; 163*fb1b10abSAndroid Build Coastguard Worker pmulhw mm7, [GLOBAL(x_c1sqr2less1)]; 164*fb1b10abSAndroid Build Coastguard Worker 165*fb1b10abSAndroid Build Coastguard Worker paddw mm7, mm3 ; ip3 * cos(pi/8) * sqrt(2) 166*fb1b10abSAndroid Build Coastguard Worker psubw mm7, mm5 ; c1 167*fb1b10abSAndroid Build Coastguard Worker 168*fb1b10abSAndroid Build Coastguard Worker movq mm5, mm1 169*fb1b10abSAndroid Build Coastguard Worker movq mm4, mm3 170*fb1b10abSAndroid Build Coastguard Worker 171*fb1b10abSAndroid Build Coastguard Worker pmulhw mm5, [GLOBAL(x_c1sqr2less1)] 172*fb1b10abSAndroid Build Coastguard Worker paddw mm5, mm1 173*fb1b10abSAndroid Build Coastguard Worker 174*fb1b10abSAndroid Build Coastguard Worker pmulhw mm3, [GLOBAL(x_s1sqr2)] 175*fb1b10abSAndroid Build Coastguard Worker paddw mm3, mm4 176*fb1b10abSAndroid Build Coastguard Worker 177*fb1b10abSAndroid Build Coastguard Worker paddw mm3, mm5 ; d1 178*fb1b10abSAndroid Build Coastguard Worker paddw mm0, [GLOBAL(fours)] 179*fb1b10abSAndroid Build Coastguard Worker 180*fb1b10abSAndroid Build Coastguard Worker paddw mm2, [GLOBAL(fours)] 181*fb1b10abSAndroid Build Coastguard Worker movq mm6, mm2 ; a1 182*fb1b10abSAndroid Build Coastguard Worker 183*fb1b10abSAndroid Build Coastguard Worker movq mm4, mm0 ; b1 184*fb1b10abSAndroid Build Coastguard Worker paddw mm2, mm3 ;0 185*fb1b10abSAndroid Build Coastguard Worker 186*fb1b10abSAndroid Build Coastguard Worker paddw mm4, mm7 ;1 187*fb1b10abSAndroid Build Coastguard Worker psubw mm0, mm7 ;2 188*fb1b10abSAndroid Build Coastguard Worker 189*fb1b10abSAndroid Build Coastguard Worker psubw mm6, mm3 ;3 190*fb1b10abSAndroid Build Coastguard Worker psraw mm2, 3 191*fb1b10abSAndroid Build Coastguard Worker 192*fb1b10abSAndroid Build Coastguard Worker psraw mm0, 3 193*fb1b10abSAndroid Build Coastguard Worker psraw mm4, 3 194*fb1b10abSAndroid Build Coastguard Worker 195*fb1b10abSAndroid Build Coastguard Worker psraw mm6, 3 196*fb1b10abSAndroid Build Coastguard Worker 197*fb1b10abSAndroid Build Coastguard Worker movq mm1, mm2 ; 03 02 01 00 198*fb1b10abSAndroid Build Coastguard Worker movq mm3, mm4 ; 23 22 21 20 199*fb1b10abSAndroid Build Coastguard Worker 200*fb1b10abSAndroid Build Coastguard Worker punpcklwd mm1, mm0 ; 11 01 10 00 201*fb1b10abSAndroid Build Coastguard Worker punpckhwd mm2, mm0 ; 13 03 12 02 202*fb1b10abSAndroid Build Coastguard Worker 203*fb1b10abSAndroid Build Coastguard Worker punpcklwd mm3, mm6 ; 31 21 30 20 204*fb1b10abSAndroid Build Coastguard Worker punpckhwd mm4, mm6 ; 33 23 32 22 205*fb1b10abSAndroid Build Coastguard Worker 206*fb1b10abSAndroid Build Coastguard Worker movq mm0, mm1 ; 11 01 10 00 207*fb1b10abSAndroid Build Coastguard Worker movq mm5, mm2 ; 13 03 12 02 208*fb1b10abSAndroid Build Coastguard Worker 209*fb1b10abSAndroid Build Coastguard Worker punpckldq mm0, mm3 ; 30 20 10 00 210*fb1b10abSAndroid Build Coastguard Worker punpckhdq mm1, mm3 ; 31 21 11 01 211*fb1b10abSAndroid Build Coastguard Worker 212*fb1b10abSAndroid Build Coastguard Worker punpckldq mm2, mm4 ; 32 22 12 02 213*fb1b10abSAndroid Build Coastguard Worker punpckhdq mm5, mm4 ; 33 23 13 03 214*fb1b10abSAndroid Build Coastguard Worker 215*fb1b10abSAndroid Build Coastguard Worker pxor mm7, mm7 216*fb1b10abSAndroid Build Coastguard Worker 217*fb1b10abSAndroid Build Coastguard Worker movd mm4, [rdx] 218*fb1b10abSAndroid Build Coastguard Worker punpcklbw mm4, mm7 219*fb1b10abSAndroid Build Coastguard Worker paddsw mm0, mm4 220*fb1b10abSAndroid Build Coastguard Worker packuswb mm0, mm7 221*fb1b10abSAndroid Build Coastguard Worker movd [rdx], mm0 222*fb1b10abSAndroid Build Coastguard Worker 223*fb1b10abSAndroid Build Coastguard Worker movd mm4, [rdx+rdi] 224*fb1b10abSAndroid Build Coastguard Worker punpcklbw mm4, mm7 225*fb1b10abSAndroid Build Coastguard Worker paddsw mm1, mm4 226*fb1b10abSAndroid Build Coastguard Worker packuswb mm1, mm7 227*fb1b10abSAndroid Build Coastguard Worker movd [rdx+rdi], mm1 228*fb1b10abSAndroid Build Coastguard Worker 229*fb1b10abSAndroid Build Coastguard Worker movd mm4, [rdx+2*rdi] 230*fb1b10abSAndroid Build Coastguard Worker punpcklbw mm4, mm7 231*fb1b10abSAndroid Build Coastguard Worker paddsw mm2, mm4 232*fb1b10abSAndroid Build Coastguard Worker packuswb mm2, mm7 233*fb1b10abSAndroid Build Coastguard Worker movd [rdx+rdi*2], mm2 234*fb1b10abSAndroid Build Coastguard Worker 235*fb1b10abSAndroid Build Coastguard Worker add rdx, rdi 236*fb1b10abSAndroid Build Coastguard Worker 237*fb1b10abSAndroid Build Coastguard Worker movd mm4, [rdx+2*rdi] 238*fb1b10abSAndroid Build Coastguard Worker punpcklbw mm4, mm7 239*fb1b10abSAndroid Build Coastguard Worker paddsw mm5, mm4 240*fb1b10abSAndroid Build Coastguard Worker packuswb mm5, mm7 241*fb1b10abSAndroid Build Coastguard Worker movd [rdx+rdi*2], mm5 242*fb1b10abSAndroid Build Coastguard Worker 243*fb1b10abSAndroid Build Coastguard Worker ; begin epilog 244*fb1b10abSAndroid Build Coastguard Worker pop rdi 245*fb1b10abSAndroid Build Coastguard Worker RESTORE_GOT 246*fb1b10abSAndroid Build Coastguard Worker UNSHADOW_ARGS 247*fb1b10abSAndroid Build Coastguard Worker pop rbp 248*fb1b10abSAndroid Build Coastguard Worker ret 249*fb1b10abSAndroid Build Coastguard Worker 250*fb1b10abSAndroid Build Coastguard WorkerSECTION_RODATA 251*fb1b10abSAndroid Build Coastguard Workeralign 16 252*fb1b10abSAndroid Build Coastguard Workerx_s1sqr2: 253*fb1b10abSAndroid Build Coastguard Worker times 4 dw 0x8A8C 254*fb1b10abSAndroid Build Coastguard Workeralign 16 255*fb1b10abSAndroid Build Coastguard Workerx_c1sqr2less1: 256*fb1b10abSAndroid Build Coastguard Worker times 4 dw 0x4E7B 257*fb1b10abSAndroid Build Coastguard Workeralign 16 258*fb1b10abSAndroid Build Coastguard Workerfours: 259*fb1b10abSAndroid Build Coastguard Worker times 4 dw 0x0004 260