1*dfc6aa5cSAndroid Build Coastguard Worker; 2*dfc6aa5cSAndroid Build Coastguard Worker; jcgryext.asm - grayscale colorspace conversion (MMX) 3*dfc6aa5cSAndroid Build Coastguard Worker; 4*dfc6aa5cSAndroid Build Coastguard Worker; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB 5*dfc6aa5cSAndroid Build Coastguard Worker; Copyright (C) 2011, 2016, D. R. Commander. 6*dfc6aa5cSAndroid Build Coastguard Worker; 7*dfc6aa5cSAndroid Build Coastguard Worker; Based on the x86 SIMD extension for IJG JPEG library 8*dfc6aa5cSAndroid Build Coastguard Worker; Copyright (C) 1999-2006, MIYASAKA Masaru. 9*dfc6aa5cSAndroid Build Coastguard Worker; For conditions of distribution and use, see copyright notice in jsimdext.inc 10*dfc6aa5cSAndroid Build Coastguard Worker; 11*dfc6aa5cSAndroid Build Coastguard Worker; This file should be assembled with NASM (Netwide Assembler), 12*dfc6aa5cSAndroid Build Coastguard Worker; can *not* be assembled with Microsoft's MASM or any compatible 13*dfc6aa5cSAndroid Build Coastguard Worker; assembler (including Borland's Turbo Assembler). 14*dfc6aa5cSAndroid Build Coastguard Worker; NASM is available from http://nasm.sourceforge.net/ or 15*dfc6aa5cSAndroid Build Coastguard Worker; http://sourceforge.net/project/showfiles.php?group_id=6208 16*dfc6aa5cSAndroid Build Coastguard Worker 17*dfc6aa5cSAndroid Build Coastguard Worker%include "jcolsamp.inc" 18*dfc6aa5cSAndroid Build Coastguard Worker 19*dfc6aa5cSAndroid Build Coastguard Worker; -------------------------------------------------------------------------- 20*dfc6aa5cSAndroid Build Coastguard Worker; 21*dfc6aa5cSAndroid Build Coastguard Worker; Convert some rows of samples to the output colorspace. 22*dfc6aa5cSAndroid Build Coastguard Worker; 23*dfc6aa5cSAndroid Build Coastguard Worker; GLOBAL(void) 24*dfc6aa5cSAndroid Build Coastguard Worker; jsimd_rgb_gray_convert_mmx(JDIMENSION img_width, JSAMPARRAY input_buf, 25*dfc6aa5cSAndroid Build Coastguard Worker; JSAMPIMAGE output_buf, JDIMENSION output_row, 26*dfc6aa5cSAndroid Build Coastguard Worker; int num_rows); 27*dfc6aa5cSAndroid Build Coastguard Worker; 28*dfc6aa5cSAndroid Build Coastguard Worker 29*dfc6aa5cSAndroid Build Coastguard Worker%define img_width(b) (b) + 8 ; JDIMENSION img_width 30*dfc6aa5cSAndroid Build Coastguard Worker%define input_buf(b) (b) + 12 ; JSAMPARRAY input_buf 31*dfc6aa5cSAndroid Build Coastguard Worker%define output_buf(b) (b) + 16 ; JSAMPIMAGE output_buf 32*dfc6aa5cSAndroid Build Coastguard Worker%define output_row(b) (b) + 20 ; JDIMENSION output_row 33*dfc6aa5cSAndroid Build Coastguard Worker%define num_rows(b) (b) + 24 ; int num_rows 34*dfc6aa5cSAndroid Build Coastguard Worker 35*dfc6aa5cSAndroid Build Coastguard Worker%define original_ebp ebp + 0 36*dfc6aa5cSAndroid Build Coastguard Worker%define wk(i) ebp - (WK_NUM - (i)) * SIZEOF_MMWORD 37*dfc6aa5cSAndroid Build Coastguard Worker ; mmword wk[WK_NUM] 38*dfc6aa5cSAndroid Build Coastguard Worker%define WK_NUM 2 39*dfc6aa5cSAndroid Build Coastguard Worker%define gotptr wk(0) - SIZEOF_POINTER ; void * gotptr 40*dfc6aa5cSAndroid Build Coastguard Worker 41*dfc6aa5cSAndroid Build Coastguard Worker align 32 42*dfc6aa5cSAndroid Build Coastguard Worker GLOBAL_FUNCTION(jsimd_rgb_gray_convert_mmx) 43*dfc6aa5cSAndroid Build Coastguard Worker 44*dfc6aa5cSAndroid Build Coastguard WorkerEXTN(jsimd_rgb_gray_convert_mmx): 45*dfc6aa5cSAndroid Build Coastguard Worker push ebp 46*dfc6aa5cSAndroid Build Coastguard Worker mov eax, esp ; eax = original ebp 47*dfc6aa5cSAndroid Build Coastguard Worker sub esp, byte 4 48*dfc6aa5cSAndroid Build Coastguard Worker and esp, byte (-SIZEOF_MMWORD) ; align to 64 bits 49*dfc6aa5cSAndroid Build Coastguard Worker mov [esp], eax 50*dfc6aa5cSAndroid Build Coastguard Worker mov ebp, esp ; ebp = aligned ebp 51*dfc6aa5cSAndroid Build Coastguard Worker lea esp, [wk(0)] 52*dfc6aa5cSAndroid Build Coastguard Worker pushpic eax ; make a room for GOT address 53*dfc6aa5cSAndroid Build Coastguard Worker push ebx 54*dfc6aa5cSAndroid Build Coastguard Worker; push ecx ; need not be preserved 55*dfc6aa5cSAndroid Build Coastguard Worker; push edx ; need not be preserved 56*dfc6aa5cSAndroid Build Coastguard Worker push esi 57*dfc6aa5cSAndroid Build Coastguard Worker push edi 58*dfc6aa5cSAndroid Build Coastguard Worker 59*dfc6aa5cSAndroid Build Coastguard Worker get_GOT ebx ; get GOT address 60*dfc6aa5cSAndroid Build Coastguard Worker movpic POINTER [gotptr], ebx ; save GOT address 61*dfc6aa5cSAndroid Build Coastguard Worker 62*dfc6aa5cSAndroid Build Coastguard Worker mov ecx, JDIMENSION [img_width(eax)] ; num_cols 63*dfc6aa5cSAndroid Build Coastguard Worker test ecx, ecx 64*dfc6aa5cSAndroid Build Coastguard Worker jz near .return 65*dfc6aa5cSAndroid Build Coastguard Worker 66*dfc6aa5cSAndroid Build Coastguard Worker push ecx 67*dfc6aa5cSAndroid Build Coastguard Worker 68*dfc6aa5cSAndroid Build Coastguard Worker mov esi, JSAMPIMAGE [output_buf(eax)] 69*dfc6aa5cSAndroid Build Coastguard Worker mov ecx, JDIMENSION [output_row(eax)] 70*dfc6aa5cSAndroid Build Coastguard Worker mov edi, JSAMPARRAY [esi+0*SIZEOF_JSAMPARRAY] 71*dfc6aa5cSAndroid Build Coastguard Worker lea edi, [edi+ecx*SIZEOF_JSAMPROW] 72*dfc6aa5cSAndroid Build Coastguard Worker 73*dfc6aa5cSAndroid Build Coastguard Worker pop ecx 74*dfc6aa5cSAndroid Build Coastguard Worker 75*dfc6aa5cSAndroid Build Coastguard Worker mov esi, JSAMPARRAY [input_buf(eax)] 76*dfc6aa5cSAndroid Build Coastguard Worker mov eax, INT [num_rows(eax)] 77*dfc6aa5cSAndroid Build Coastguard Worker test eax, eax 78*dfc6aa5cSAndroid Build Coastguard Worker jle near .return 79*dfc6aa5cSAndroid Build Coastguard Worker alignx 16, 7 80*dfc6aa5cSAndroid Build Coastguard Worker.rowloop: 81*dfc6aa5cSAndroid Build Coastguard Worker pushpic eax 82*dfc6aa5cSAndroid Build Coastguard Worker push edi 83*dfc6aa5cSAndroid Build Coastguard Worker push esi 84*dfc6aa5cSAndroid Build Coastguard Worker push ecx ; col 85*dfc6aa5cSAndroid Build Coastguard Worker 86*dfc6aa5cSAndroid Build Coastguard Worker mov esi, JSAMPROW [esi] ; inptr 87*dfc6aa5cSAndroid Build Coastguard Worker mov edi, JSAMPROW [edi] ; outptr0 88*dfc6aa5cSAndroid Build Coastguard Worker movpic eax, POINTER [gotptr] ; load GOT address (eax) 89*dfc6aa5cSAndroid Build Coastguard Worker 90*dfc6aa5cSAndroid Build Coastguard Worker cmp ecx, byte SIZEOF_MMWORD 91*dfc6aa5cSAndroid Build Coastguard Worker jae short .columnloop 92*dfc6aa5cSAndroid Build Coastguard Worker alignx 16, 7 93*dfc6aa5cSAndroid Build Coastguard Worker 94*dfc6aa5cSAndroid Build Coastguard Worker%if RGB_PIXELSIZE == 3 ; --------------- 95*dfc6aa5cSAndroid Build Coastguard Worker 96*dfc6aa5cSAndroid Build Coastguard Worker.column_ld1: 97*dfc6aa5cSAndroid Build Coastguard Worker push eax 98*dfc6aa5cSAndroid Build Coastguard Worker push edx 99*dfc6aa5cSAndroid Build Coastguard Worker lea ecx, [ecx+ecx*2] ; imul ecx,RGB_PIXELSIZE 100*dfc6aa5cSAndroid Build Coastguard Worker test cl, SIZEOF_BYTE 101*dfc6aa5cSAndroid Build Coastguard Worker jz short .column_ld2 102*dfc6aa5cSAndroid Build Coastguard Worker sub ecx, byte SIZEOF_BYTE 103*dfc6aa5cSAndroid Build Coastguard Worker xor eax, eax 104*dfc6aa5cSAndroid Build Coastguard Worker mov al, byte [esi+ecx] 105*dfc6aa5cSAndroid Build Coastguard Worker.column_ld2: 106*dfc6aa5cSAndroid Build Coastguard Worker test cl, SIZEOF_WORD 107*dfc6aa5cSAndroid Build Coastguard Worker jz short .column_ld4 108*dfc6aa5cSAndroid Build Coastguard Worker sub ecx, byte SIZEOF_WORD 109*dfc6aa5cSAndroid Build Coastguard Worker xor edx, edx 110*dfc6aa5cSAndroid Build Coastguard Worker mov dx, word [esi+ecx] 111*dfc6aa5cSAndroid Build Coastguard Worker shl eax, WORD_BIT 112*dfc6aa5cSAndroid Build Coastguard Worker or eax, edx 113*dfc6aa5cSAndroid Build Coastguard Worker.column_ld4: 114*dfc6aa5cSAndroid Build Coastguard Worker movd mmA, eax 115*dfc6aa5cSAndroid Build Coastguard Worker pop edx 116*dfc6aa5cSAndroid Build Coastguard Worker pop eax 117*dfc6aa5cSAndroid Build Coastguard Worker test cl, SIZEOF_DWORD 118*dfc6aa5cSAndroid Build Coastguard Worker jz short .column_ld8 119*dfc6aa5cSAndroid Build Coastguard Worker sub ecx, byte SIZEOF_DWORD 120*dfc6aa5cSAndroid Build Coastguard Worker movd mmG, dword [esi+ecx] 121*dfc6aa5cSAndroid Build Coastguard Worker psllq mmA, DWORD_BIT 122*dfc6aa5cSAndroid Build Coastguard Worker por mmA, mmG 123*dfc6aa5cSAndroid Build Coastguard Worker.column_ld8: 124*dfc6aa5cSAndroid Build Coastguard Worker test cl, SIZEOF_MMWORD 125*dfc6aa5cSAndroid Build Coastguard Worker jz short .column_ld16 126*dfc6aa5cSAndroid Build Coastguard Worker movq mmG, mmA 127*dfc6aa5cSAndroid Build Coastguard Worker movq mmA, MMWORD [esi+0*SIZEOF_MMWORD] 128*dfc6aa5cSAndroid Build Coastguard Worker mov ecx, SIZEOF_MMWORD 129*dfc6aa5cSAndroid Build Coastguard Worker jmp short .rgb_gray_cnv 130*dfc6aa5cSAndroid Build Coastguard Worker.column_ld16: 131*dfc6aa5cSAndroid Build Coastguard Worker test cl, 2*SIZEOF_MMWORD 132*dfc6aa5cSAndroid Build Coastguard Worker mov ecx, SIZEOF_MMWORD 133*dfc6aa5cSAndroid Build Coastguard Worker jz short .rgb_gray_cnv 134*dfc6aa5cSAndroid Build Coastguard Worker movq mmF, mmA 135*dfc6aa5cSAndroid Build Coastguard Worker movq mmA, MMWORD [esi+0*SIZEOF_MMWORD] 136*dfc6aa5cSAndroid Build Coastguard Worker movq mmG, MMWORD [esi+1*SIZEOF_MMWORD] 137*dfc6aa5cSAndroid Build Coastguard Worker jmp short .rgb_gray_cnv 138*dfc6aa5cSAndroid Build Coastguard Worker alignx 16, 7 139*dfc6aa5cSAndroid Build Coastguard Worker 140*dfc6aa5cSAndroid Build Coastguard Worker.columnloop: 141*dfc6aa5cSAndroid Build Coastguard Worker movq mmA, MMWORD [esi+0*SIZEOF_MMWORD] 142*dfc6aa5cSAndroid Build Coastguard Worker movq mmG, MMWORD [esi+1*SIZEOF_MMWORD] 143*dfc6aa5cSAndroid Build Coastguard Worker movq mmF, MMWORD [esi+2*SIZEOF_MMWORD] 144*dfc6aa5cSAndroid Build Coastguard Worker 145*dfc6aa5cSAndroid Build Coastguard Worker.rgb_gray_cnv: 146*dfc6aa5cSAndroid Build Coastguard Worker ; mmA=(00 10 20 01 11 21 02 12) 147*dfc6aa5cSAndroid Build Coastguard Worker ; mmG=(22 03 13 23 04 14 24 05) 148*dfc6aa5cSAndroid Build Coastguard Worker ; mmF=(15 25 06 16 26 07 17 27) 149*dfc6aa5cSAndroid Build Coastguard Worker 150*dfc6aa5cSAndroid Build Coastguard Worker movq mmD, mmA 151*dfc6aa5cSAndroid Build Coastguard Worker psllq mmA, 4*BYTE_BIT ; mmA=(-- -- -- -- 00 10 20 01) 152*dfc6aa5cSAndroid Build Coastguard Worker psrlq mmD, 4*BYTE_BIT ; mmD=(11 21 02 12 -- -- -- --) 153*dfc6aa5cSAndroid Build Coastguard Worker 154*dfc6aa5cSAndroid Build Coastguard Worker punpckhbw mmA, mmG ; mmA=(00 04 10 14 20 24 01 05) 155*dfc6aa5cSAndroid Build Coastguard Worker psllq mmG, 4*BYTE_BIT ; mmG=(-- -- -- -- 22 03 13 23) 156*dfc6aa5cSAndroid Build Coastguard Worker 157*dfc6aa5cSAndroid Build Coastguard Worker punpcklbw mmD, mmF ; mmD=(11 15 21 25 02 06 12 16) 158*dfc6aa5cSAndroid Build Coastguard Worker punpckhbw mmG, mmF ; mmG=(22 26 03 07 13 17 23 27) 159*dfc6aa5cSAndroid Build Coastguard Worker 160*dfc6aa5cSAndroid Build Coastguard Worker movq mmE, mmA 161*dfc6aa5cSAndroid Build Coastguard Worker psllq mmA, 4*BYTE_BIT ; mmA=(-- -- -- -- 00 04 10 14) 162*dfc6aa5cSAndroid Build Coastguard Worker psrlq mmE, 4*BYTE_BIT ; mmE=(20 24 01 05 -- -- -- --) 163*dfc6aa5cSAndroid Build Coastguard Worker 164*dfc6aa5cSAndroid Build Coastguard Worker punpckhbw mmA, mmD ; mmA=(00 02 04 06 10 12 14 16) 165*dfc6aa5cSAndroid Build Coastguard Worker psllq mmD, 4*BYTE_BIT ; mmD=(-- -- -- -- 11 15 21 25) 166*dfc6aa5cSAndroid Build Coastguard Worker 167*dfc6aa5cSAndroid Build Coastguard Worker punpcklbw mmE, mmG ; mmE=(20 22 24 26 01 03 05 07) 168*dfc6aa5cSAndroid Build Coastguard Worker punpckhbw mmD, mmG ; mmD=(11 13 15 17 21 23 25 27) 169*dfc6aa5cSAndroid Build Coastguard Worker 170*dfc6aa5cSAndroid Build Coastguard Worker pxor mmH, mmH 171*dfc6aa5cSAndroid Build Coastguard Worker 172*dfc6aa5cSAndroid Build Coastguard Worker movq mmC, mmA 173*dfc6aa5cSAndroid Build Coastguard Worker punpcklbw mmA, mmH ; mmA=(00 02 04 06) 174*dfc6aa5cSAndroid Build Coastguard Worker punpckhbw mmC, mmH ; mmC=(10 12 14 16) 175*dfc6aa5cSAndroid Build Coastguard Worker 176*dfc6aa5cSAndroid Build Coastguard Worker movq mmB, mmE 177*dfc6aa5cSAndroid Build Coastguard Worker punpcklbw mmE, mmH ; mmE=(20 22 24 26) 178*dfc6aa5cSAndroid Build Coastguard Worker punpckhbw mmB, mmH ; mmB=(01 03 05 07) 179*dfc6aa5cSAndroid Build Coastguard Worker 180*dfc6aa5cSAndroid Build Coastguard Worker movq mmF, mmD 181*dfc6aa5cSAndroid Build Coastguard Worker punpcklbw mmD, mmH ; mmD=(11 13 15 17) 182*dfc6aa5cSAndroid Build Coastguard Worker punpckhbw mmF, mmH ; mmF=(21 23 25 27) 183*dfc6aa5cSAndroid Build Coastguard Worker 184*dfc6aa5cSAndroid Build Coastguard Worker%else ; RGB_PIXELSIZE == 4 ; ----------- 185*dfc6aa5cSAndroid Build Coastguard Worker 186*dfc6aa5cSAndroid Build Coastguard Worker.column_ld1: 187*dfc6aa5cSAndroid Build Coastguard Worker test cl, SIZEOF_MMWORD/8 188*dfc6aa5cSAndroid Build Coastguard Worker jz short .column_ld2 189*dfc6aa5cSAndroid Build Coastguard Worker sub ecx, byte SIZEOF_MMWORD/8 190*dfc6aa5cSAndroid Build Coastguard Worker movd mmA, dword [esi+ecx*RGB_PIXELSIZE] 191*dfc6aa5cSAndroid Build Coastguard Worker.column_ld2: 192*dfc6aa5cSAndroid Build Coastguard Worker test cl, SIZEOF_MMWORD/4 193*dfc6aa5cSAndroid Build Coastguard Worker jz short .column_ld4 194*dfc6aa5cSAndroid Build Coastguard Worker sub ecx, byte SIZEOF_MMWORD/4 195*dfc6aa5cSAndroid Build Coastguard Worker movq mmF, mmA 196*dfc6aa5cSAndroid Build Coastguard Worker movq mmA, MMWORD [esi+ecx*RGB_PIXELSIZE] 197*dfc6aa5cSAndroid Build Coastguard Worker.column_ld4: 198*dfc6aa5cSAndroid Build Coastguard Worker test cl, SIZEOF_MMWORD/2 199*dfc6aa5cSAndroid Build Coastguard Worker mov ecx, SIZEOF_MMWORD 200*dfc6aa5cSAndroid Build Coastguard Worker jz short .rgb_gray_cnv 201*dfc6aa5cSAndroid Build Coastguard Worker movq mmD, mmA 202*dfc6aa5cSAndroid Build Coastguard Worker movq mmC, mmF 203*dfc6aa5cSAndroid Build Coastguard Worker movq mmA, MMWORD [esi+0*SIZEOF_MMWORD] 204*dfc6aa5cSAndroid Build Coastguard Worker movq mmF, MMWORD [esi+1*SIZEOF_MMWORD] 205*dfc6aa5cSAndroid Build Coastguard Worker jmp short .rgb_gray_cnv 206*dfc6aa5cSAndroid Build Coastguard Worker alignx 16, 7 207*dfc6aa5cSAndroid Build Coastguard Worker 208*dfc6aa5cSAndroid Build Coastguard Worker.columnloop: 209*dfc6aa5cSAndroid Build Coastguard Worker movq mmA, MMWORD [esi+0*SIZEOF_MMWORD] 210*dfc6aa5cSAndroid Build Coastguard Worker movq mmF, MMWORD [esi+1*SIZEOF_MMWORD] 211*dfc6aa5cSAndroid Build Coastguard Worker movq mmD, MMWORD [esi+2*SIZEOF_MMWORD] 212*dfc6aa5cSAndroid Build Coastguard Worker movq mmC, MMWORD [esi+3*SIZEOF_MMWORD] 213*dfc6aa5cSAndroid Build Coastguard Worker 214*dfc6aa5cSAndroid Build Coastguard Worker.rgb_gray_cnv: 215*dfc6aa5cSAndroid Build Coastguard Worker ; mmA=(00 10 20 30 01 11 21 31) 216*dfc6aa5cSAndroid Build Coastguard Worker ; mmF=(02 12 22 32 03 13 23 33) 217*dfc6aa5cSAndroid Build Coastguard Worker ; mmD=(04 14 24 34 05 15 25 35) 218*dfc6aa5cSAndroid Build Coastguard Worker ; mmC=(06 16 26 36 07 17 27 37) 219*dfc6aa5cSAndroid Build Coastguard Worker 220*dfc6aa5cSAndroid Build Coastguard Worker movq mmB, mmA 221*dfc6aa5cSAndroid Build Coastguard Worker punpcklbw mmA, mmF ; mmA=(00 02 10 12 20 22 30 32) 222*dfc6aa5cSAndroid Build Coastguard Worker punpckhbw mmB, mmF ; mmB=(01 03 11 13 21 23 31 33) 223*dfc6aa5cSAndroid Build Coastguard Worker 224*dfc6aa5cSAndroid Build Coastguard Worker movq mmG, mmD 225*dfc6aa5cSAndroid Build Coastguard Worker punpcklbw mmD, mmC ; mmD=(04 06 14 16 24 26 34 36) 226*dfc6aa5cSAndroid Build Coastguard Worker punpckhbw mmG, mmC ; mmG=(05 07 15 17 25 27 35 37) 227*dfc6aa5cSAndroid Build Coastguard Worker 228*dfc6aa5cSAndroid Build Coastguard Worker movq mmE, mmA 229*dfc6aa5cSAndroid Build Coastguard Worker punpcklwd mmA, mmD ; mmA=(00 02 04 06 10 12 14 16) 230*dfc6aa5cSAndroid Build Coastguard Worker punpckhwd mmE, mmD ; mmE=(20 22 24 26 30 32 34 36) 231*dfc6aa5cSAndroid Build Coastguard Worker 232*dfc6aa5cSAndroid Build Coastguard Worker movq mmH, mmB 233*dfc6aa5cSAndroid Build Coastguard Worker punpcklwd mmB, mmG ; mmB=(01 03 05 07 11 13 15 17) 234*dfc6aa5cSAndroid Build Coastguard Worker punpckhwd mmH, mmG ; mmH=(21 23 25 27 31 33 35 37) 235*dfc6aa5cSAndroid Build Coastguard Worker 236*dfc6aa5cSAndroid Build Coastguard Worker pxor mmF, mmF 237*dfc6aa5cSAndroid Build Coastguard Worker 238*dfc6aa5cSAndroid Build Coastguard Worker movq mmC, mmA 239*dfc6aa5cSAndroid Build Coastguard Worker punpcklbw mmA, mmF ; mmA=(00 02 04 06) 240*dfc6aa5cSAndroid Build Coastguard Worker punpckhbw mmC, mmF ; mmC=(10 12 14 16) 241*dfc6aa5cSAndroid Build Coastguard Worker 242*dfc6aa5cSAndroid Build Coastguard Worker movq mmD, mmB 243*dfc6aa5cSAndroid Build Coastguard Worker punpcklbw mmB, mmF ; mmB=(01 03 05 07) 244*dfc6aa5cSAndroid Build Coastguard Worker punpckhbw mmD, mmF ; mmD=(11 13 15 17) 245*dfc6aa5cSAndroid Build Coastguard Worker 246*dfc6aa5cSAndroid Build Coastguard Worker movq mmG, mmE 247*dfc6aa5cSAndroid Build Coastguard Worker punpcklbw mmE, mmF ; mmE=(20 22 24 26) 248*dfc6aa5cSAndroid Build Coastguard Worker punpckhbw mmG, mmF ; mmG=(30 32 34 36) 249*dfc6aa5cSAndroid Build Coastguard Worker 250*dfc6aa5cSAndroid Build Coastguard Worker punpcklbw mmF, mmH 251*dfc6aa5cSAndroid Build Coastguard Worker punpckhbw mmH, mmH 252*dfc6aa5cSAndroid Build Coastguard Worker psrlw mmF, BYTE_BIT ; mmF=(21 23 25 27) 253*dfc6aa5cSAndroid Build Coastguard Worker psrlw mmH, BYTE_BIT ; mmH=(31 33 35 37) 254*dfc6aa5cSAndroid Build Coastguard Worker 255*dfc6aa5cSAndroid Build Coastguard Worker%endif ; RGB_PIXELSIZE ; --------------- 256*dfc6aa5cSAndroid Build Coastguard Worker 257*dfc6aa5cSAndroid Build Coastguard Worker ; mm0=(R0 R2 R4 R6)=RE, mm2=(G0 G2 G4 G6)=GE, mm4=(B0 B2 B4 B6)=BE 258*dfc6aa5cSAndroid Build Coastguard Worker ; mm1=(R1 R3 R5 R7)=RO, mm3=(G1 G3 G5 G7)=GO, mm5=(B1 B3 B5 B7)=BO 259*dfc6aa5cSAndroid Build Coastguard Worker 260*dfc6aa5cSAndroid Build Coastguard Worker ; (Original) 261*dfc6aa5cSAndroid Build Coastguard Worker ; Y = 0.29900 * R + 0.58700 * G + 0.11400 * B 262*dfc6aa5cSAndroid Build Coastguard Worker ; 263*dfc6aa5cSAndroid Build Coastguard Worker ; (This implementation) 264*dfc6aa5cSAndroid Build Coastguard Worker ; Y = 0.29900 * R + 0.33700 * G + 0.11400 * B + 0.25000 * G 265*dfc6aa5cSAndroid Build Coastguard Worker 266*dfc6aa5cSAndroid Build Coastguard Worker movq mm6, mm1 267*dfc6aa5cSAndroid Build Coastguard Worker punpcklwd mm1, mm3 268*dfc6aa5cSAndroid Build Coastguard Worker punpckhwd mm6, mm3 269*dfc6aa5cSAndroid Build Coastguard Worker pmaddwd mm1, [GOTOFF(eax,PW_F0299_F0337)] ; mm1=ROL*FIX(0.299)+GOL*FIX(0.337) 270*dfc6aa5cSAndroid Build Coastguard Worker pmaddwd mm6, [GOTOFF(eax,PW_F0299_F0337)] ; mm6=ROH*FIX(0.299)+GOH*FIX(0.337) 271*dfc6aa5cSAndroid Build Coastguard Worker 272*dfc6aa5cSAndroid Build Coastguard Worker movq mm7, mm6 ; mm7=ROH*FIX(0.299)+GOH*FIX(0.337) 273*dfc6aa5cSAndroid Build Coastguard Worker 274*dfc6aa5cSAndroid Build Coastguard Worker movq mm6, mm0 275*dfc6aa5cSAndroid Build Coastguard Worker punpcklwd mm0, mm2 276*dfc6aa5cSAndroid Build Coastguard Worker punpckhwd mm6, mm2 277*dfc6aa5cSAndroid Build Coastguard Worker pmaddwd mm0, [GOTOFF(eax,PW_F0299_F0337)] ; mm0=REL*FIX(0.299)+GEL*FIX(0.337) 278*dfc6aa5cSAndroid Build Coastguard Worker pmaddwd mm6, [GOTOFF(eax,PW_F0299_F0337)] ; mm6=REH*FIX(0.299)+GEH*FIX(0.337) 279*dfc6aa5cSAndroid Build Coastguard Worker 280*dfc6aa5cSAndroid Build Coastguard Worker movq MMWORD [wk(0)], mm0 ; wk(0)=REL*FIX(0.299)+GEL*FIX(0.337) 281*dfc6aa5cSAndroid Build Coastguard Worker movq MMWORD [wk(1)], mm6 ; wk(1)=REH*FIX(0.299)+GEH*FIX(0.337) 282*dfc6aa5cSAndroid Build Coastguard Worker 283*dfc6aa5cSAndroid Build Coastguard Worker movq mm0, mm5 ; mm0=BO 284*dfc6aa5cSAndroid Build Coastguard Worker movq mm6, mm4 ; mm6=BE 285*dfc6aa5cSAndroid Build Coastguard Worker 286*dfc6aa5cSAndroid Build Coastguard Worker movq mm4, mm0 287*dfc6aa5cSAndroid Build Coastguard Worker punpcklwd mm0, mm3 288*dfc6aa5cSAndroid Build Coastguard Worker punpckhwd mm4, mm3 289*dfc6aa5cSAndroid Build Coastguard Worker pmaddwd mm0, [GOTOFF(eax,PW_F0114_F0250)] ; mm0=BOL*FIX(0.114)+GOL*FIX(0.250) 290*dfc6aa5cSAndroid Build Coastguard Worker pmaddwd mm4, [GOTOFF(eax,PW_F0114_F0250)] ; mm4=BOH*FIX(0.114)+GOH*FIX(0.250) 291*dfc6aa5cSAndroid Build Coastguard Worker 292*dfc6aa5cSAndroid Build Coastguard Worker movq mm3, [GOTOFF(eax,PD_ONEHALF)] ; mm3=[PD_ONEHALF] 293*dfc6aa5cSAndroid Build Coastguard Worker 294*dfc6aa5cSAndroid Build Coastguard Worker paddd mm0, mm1 295*dfc6aa5cSAndroid Build Coastguard Worker paddd mm4, mm7 296*dfc6aa5cSAndroid Build Coastguard Worker paddd mm0, mm3 297*dfc6aa5cSAndroid Build Coastguard Worker paddd mm4, mm3 298*dfc6aa5cSAndroid Build Coastguard Worker psrld mm0, SCALEBITS ; mm0=YOL 299*dfc6aa5cSAndroid Build Coastguard Worker psrld mm4, SCALEBITS ; mm4=YOH 300*dfc6aa5cSAndroid Build Coastguard Worker packssdw mm0, mm4 ; mm0=YO 301*dfc6aa5cSAndroid Build Coastguard Worker 302*dfc6aa5cSAndroid Build Coastguard Worker movq mm4, mm6 303*dfc6aa5cSAndroid Build Coastguard Worker punpcklwd mm6, mm2 304*dfc6aa5cSAndroid Build Coastguard Worker punpckhwd mm4, mm2 305*dfc6aa5cSAndroid Build Coastguard Worker pmaddwd mm6, [GOTOFF(eax,PW_F0114_F0250)] ; mm6=BEL*FIX(0.114)+GEL*FIX(0.250) 306*dfc6aa5cSAndroid Build Coastguard Worker pmaddwd mm4, [GOTOFF(eax,PW_F0114_F0250)] ; mm4=BEH*FIX(0.114)+GEH*FIX(0.250) 307*dfc6aa5cSAndroid Build Coastguard Worker 308*dfc6aa5cSAndroid Build Coastguard Worker movq mm2, [GOTOFF(eax,PD_ONEHALF)] ; mm2=[PD_ONEHALF] 309*dfc6aa5cSAndroid Build Coastguard Worker 310*dfc6aa5cSAndroid Build Coastguard Worker paddd mm6, MMWORD [wk(0)] 311*dfc6aa5cSAndroid Build Coastguard Worker paddd mm4, MMWORD [wk(1)] 312*dfc6aa5cSAndroid Build Coastguard Worker paddd mm6, mm2 313*dfc6aa5cSAndroid Build Coastguard Worker paddd mm4, mm2 314*dfc6aa5cSAndroid Build Coastguard Worker psrld mm6, SCALEBITS ; mm6=YEL 315*dfc6aa5cSAndroid Build Coastguard Worker psrld mm4, SCALEBITS ; mm4=YEH 316*dfc6aa5cSAndroid Build Coastguard Worker packssdw mm6, mm4 ; mm6=YE 317*dfc6aa5cSAndroid Build Coastguard Worker 318*dfc6aa5cSAndroid Build Coastguard Worker psllw mm0, BYTE_BIT 319*dfc6aa5cSAndroid Build Coastguard Worker por mm6, mm0 ; mm6=Y 320*dfc6aa5cSAndroid Build Coastguard Worker movq MMWORD [edi], mm6 ; Save Y 321*dfc6aa5cSAndroid Build Coastguard Worker 322*dfc6aa5cSAndroid Build Coastguard Worker sub ecx, byte SIZEOF_MMWORD 323*dfc6aa5cSAndroid Build Coastguard Worker add esi, byte RGB_PIXELSIZE*SIZEOF_MMWORD ; inptr 324*dfc6aa5cSAndroid Build Coastguard Worker add edi, byte SIZEOF_MMWORD ; outptr0 325*dfc6aa5cSAndroid Build Coastguard Worker cmp ecx, byte SIZEOF_MMWORD 326*dfc6aa5cSAndroid Build Coastguard Worker jae near .columnloop 327*dfc6aa5cSAndroid Build Coastguard Worker test ecx, ecx 328*dfc6aa5cSAndroid Build Coastguard Worker jnz near .column_ld1 329*dfc6aa5cSAndroid Build Coastguard Worker 330*dfc6aa5cSAndroid Build Coastguard Worker pop ecx ; col 331*dfc6aa5cSAndroid Build Coastguard Worker pop esi 332*dfc6aa5cSAndroid Build Coastguard Worker pop edi 333*dfc6aa5cSAndroid Build Coastguard Worker poppic eax 334*dfc6aa5cSAndroid Build Coastguard Worker 335*dfc6aa5cSAndroid Build Coastguard Worker add esi, byte SIZEOF_JSAMPROW ; input_buf 336*dfc6aa5cSAndroid Build Coastguard Worker add edi, byte SIZEOF_JSAMPROW 337*dfc6aa5cSAndroid Build Coastguard Worker dec eax ; num_rows 338*dfc6aa5cSAndroid Build Coastguard Worker jg near .rowloop 339*dfc6aa5cSAndroid Build Coastguard Worker 340*dfc6aa5cSAndroid Build Coastguard Worker emms ; empty MMX state 341*dfc6aa5cSAndroid Build Coastguard Worker 342*dfc6aa5cSAndroid Build Coastguard Worker.return: 343*dfc6aa5cSAndroid Build Coastguard Worker pop edi 344*dfc6aa5cSAndroid Build Coastguard Worker pop esi 345*dfc6aa5cSAndroid Build Coastguard Worker; pop edx ; need not be preserved 346*dfc6aa5cSAndroid Build Coastguard Worker; pop ecx ; need not be preserved 347*dfc6aa5cSAndroid Build Coastguard Worker pop ebx 348*dfc6aa5cSAndroid Build Coastguard Worker mov esp, ebp ; esp <- aligned ebp 349*dfc6aa5cSAndroid Build Coastguard Worker pop esp ; esp <- original ebp 350*dfc6aa5cSAndroid Build Coastguard Worker pop ebp 351*dfc6aa5cSAndroid Build Coastguard Worker ret 352*dfc6aa5cSAndroid Build Coastguard Worker 353*dfc6aa5cSAndroid Build Coastguard Worker; For some reason, the OS X linker does not honor the request to align the 354*dfc6aa5cSAndroid Build Coastguard Worker; segment unless we do this. 355*dfc6aa5cSAndroid Build Coastguard Worker align 32 356