1*dfc6aa5cSAndroid Build Coastguard Worker; 2*dfc6aa5cSAndroid Build Coastguard Worker; jdcolext.asm - colorspace conversion (MMX) 3*dfc6aa5cSAndroid Build Coastguard Worker; 4*dfc6aa5cSAndroid Build Coastguard Worker; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB 5*dfc6aa5cSAndroid Build Coastguard Worker; Copyright (C) 2016, D. R. Commander. 6*dfc6aa5cSAndroid Build Coastguard Worker; 7*dfc6aa5cSAndroid Build Coastguard Worker; Based on the x86 SIMD extension for IJG JPEG library 8*dfc6aa5cSAndroid Build Coastguard Worker; Copyright (C) 1999-2006, MIYASAKA Masaru. 9*dfc6aa5cSAndroid Build Coastguard Worker; For conditions of distribution and use, see copyright notice in jsimdext.inc 10*dfc6aa5cSAndroid Build Coastguard Worker; 11*dfc6aa5cSAndroid Build Coastguard Worker; This file should be assembled with NASM (Netwide Assembler), 12*dfc6aa5cSAndroid Build Coastguard Worker; can *not* be assembled with Microsoft's MASM or any compatible 13*dfc6aa5cSAndroid Build Coastguard Worker; assembler (including Borland's Turbo Assembler). 14*dfc6aa5cSAndroid Build Coastguard Worker; NASM is available from http://nasm.sourceforge.net/ or 15*dfc6aa5cSAndroid Build Coastguard Worker; http://sourceforge.net/project/showfiles.php?group_id=6208 16*dfc6aa5cSAndroid Build Coastguard Worker 17*dfc6aa5cSAndroid Build Coastguard Worker%include "jcolsamp.inc" 18*dfc6aa5cSAndroid Build Coastguard Worker 19*dfc6aa5cSAndroid Build Coastguard Worker; -------------------------------------------------------------------------- 20*dfc6aa5cSAndroid Build Coastguard Worker; 21*dfc6aa5cSAndroid Build Coastguard Worker; Convert some rows of samples to the output colorspace. 22*dfc6aa5cSAndroid Build Coastguard Worker; 23*dfc6aa5cSAndroid Build Coastguard Worker; GLOBAL(void) 24*dfc6aa5cSAndroid Build Coastguard Worker; jsimd_ycc_rgb_convert_mmx(JDIMENSION out_width, JSAMPIMAGE input_buf, 25*dfc6aa5cSAndroid Build Coastguard Worker; JDIMENSION input_row, JSAMPARRAY output_buf, 26*dfc6aa5cSAndroid Build Coastguard Worker; int num_rows) 27*dfc6aa5cSAndroid Build Coastguard Worker; 28*dfc6aa5cSAndroid Build Coastguard Worker 29*dfc6aa5cSAndroid Build Coastguard Worker%define out_width(b) (b) + 8 ; JDIMENSION out_width 30*dfc6aa5cSAndroid Build Coastguard Worker%define input_buf(b) (b) + 12 ; JSAMPIMAGE input_buf 31*dfc6aa5cSAndroid Build Coastguard Worker%define input_row(b) (b) + 16 ; JDIMENSION input_row 32*dfc6aa5cSAndroid Build Coastguard Worker%define output_buf(b) (b) + 20 ; JSAMPARRAY output_buf 33*dfc6aa5cSAndroid Build Coastguard Worker%define num_rows(b) (b) + 24 ; int num_rows 34*dfc6aa5cSAndroid Build Coastguard Worker 35*dfc6aa5cSAndroid Build Coastguard Worker%define original_ebp ebp + 0 36*dfc6aa5cSAndroid Build Coastguard Worker%define wk(i) ebp - (WK_NUM - (i)) * SIZEOF_MMWORD 37*dfc6aa5cSAndroid Build Coastguard Worker ; mmword wk[WK_NUM] 38*dfc6aa5cSAndroid Build Coastguard Worker%define WK_NUM 2 39*dfc6aa5cSAndroid Build Coastguard Worker%define gotptr wk(0) - SIZEOF_POINTER ; void * gotptr 40*dfc6aa5cSAndroid Build Coastguard Worker 41*dfc6aa5cSAndroid Build Coastguard Worker align 32 42*dfc6aa5cSAndroid Build Coastguard Worker GLOBAL_FUNCTION(jsimd_ycc_rgb_convert_mmx) 43*dfc6aa5cSAndroid Build Coastguard Worker 44*dfc6aa5cSAndroid Build Coastguard WorkerEXTN(jsimd_ycc_rgb_convert_mmx): 45*dfc6aa5cSAndroid Build Coastguard Worker push ebp 46*dfc6aa5cSAndroid Build Coastguard Worker mov eax, esp ; eax = original ebp 47*dfc6aa5cSAndroid Build Coastguard Worker sub esp, byte 4 48*dfc6aa5cSAndroid Build Coastguard Worker and esp, byte (-SIZEOF_MMWORD) ; align to 64 bits 49*dfc6aa5cSAndroid Build Coastguard Worker mov [esp], eax 50*dfc6aa5cSAndroid Build Coastguard Worker mov ebp, esp ; ebp = aligned ebp 51*dfc6aa5cSAndroid Build Coastguard Worker lea esp, [wk(0)] 52*dfc6aa5cSAndroid Build Coastguard Worker pushpic eax ; make a room for GOT address 53*dfc6aa5cSAndroid Build Coastguard Worker push ebx 54*dfc6aa5cSAndroid Build Coastguard Worker; push ecx ; need not be preserved 55*dfc6aa5cSAndroid Build Coastguard Worker; push edx ; need not be preserved 56*dfc6aa5cSAndroid Build Coastguard Worker push esi 57*dfc6aa5cSAndroid Build Coastguard Worker push edi 58*dfc6aa5cSAndroid Build Coastguard Worker 59*dfc6aa5cSAndroid Build Coastguard Worker get_GOT ebx ; get GOT address 60*dfc6aa5cSAndroid Build Coastguard Worker movpic POINTER [gotptr], ebx ; save GOT address 61*dfc6aa5cSAndroid Build Coastguard Worker 62*dfc6aa5cSAndroid Build Coastguard Worker mov ecx, JDIMENSION [out_width(eax)] ; num_cols 63*dfc6aa5cSAndroid Build Coastguard Worker test ecx, ecx 64*dfc6aa5cSAndroid Build Coastguard Worker jz near .return 65*dfc6aa5cSAndroid Build Coastguard Worker 66*dfc6aa5cSAndroid Build Coastguard Worker push ecx 67*dfc6aa5cSAndroid Build Coastguard Worker 68*dfc6aa5cSAndroid Build Coastguard Worker mov edi, JSAMPIMAGE [input_buf(eax)] 69*dfc6aa5cSAndroid Build Coastguard Worker mov ecx, JDIMENSION [input_row(eax)] 70*dfc6aa5cSAndroid Build Coastguard Worker mov esi, JSAMPARRAY [edi+0*SIZEOF_JSAMPARRAY] 71*dfc6aa5cSAndroid Build Coastguard Worker mov ebx, JSAMPARRAY [edi+1*SIZEOF_JSAMPARRAY] 72*dfc6aa5cSAndroid Build Coastguard Worker mov edx, JSAMPARRAY [edi+2*SIZEOF_JSAMPARRAY] 73*dfc6aa5cSAndroid Build Coastguard Worker lea esi, [esi+ecx*SIZEOF_JSAMPROW] 74*dfc6aa5cSAndroid Build Coastguard Worker lea ebx, [ebx+ecx*SIZEOF_JSAMPROW] 75*dfc6aa5cSAndroid Build Coastguard Worker lea edx, [edx+ecx*SIZEOF_JSAMPROW] 76*dfc6aa5cSAndroid Build Coastguard Worker 77*dfc6aa5cSAndroid Build Coastguard Worker pop ecx 78*dfc6aa5cSAndroid Build Coastguard Worker 79*dfc6aa5cSAndroid Build Coastguard Worker mov edi, JSAMPARRAY [output_buf(eax)] 80*dfc6aa5cSAndroid Build Coastguard Worker mov eax, INT [num_rows(eax)] 81*dfc6aa5cSAndroid Build Coastguard Worker test eax, eax 82*dfc6aa5cSAndroid Build Coastguard Worker jle near .return 83*dfc6aa5cSAndroid Build Coastguard Worker alignx 16, 7 84*dfc6aa5cSAndroid Build Coastguard Worker.rowloop: 85*dfc6aa5cSAndroid Build Coastguard Worker push eax 86*dfc6aa5cSAndroid Build Coastguard Worker push edi 87*dfc6aa5cSAndroid Build Coastguard Worker push edx 88*dfc6aa5cSAndroid Build Coastguard Worker push ebx 89*dfc6aa5cSAndroid Build Coastguard Worker push esi 90*dfc6aa5cSAndroid Build Coastguard Worker push ecx ; col 91*dfc6aa5cSAndroid Build Coastguard Worker 92*dfc6aa5cSAndroid Build Coastguard Worker mov esi, JSAMPROW [esi] ; inptr0 93*dfc6aa5cSAndroid Build Coastguard Worker mov ebx, JSAMPROW [ebx] ; inptr1 94*dfc6aa5cSAndroid Build Coastguard Worker mov edx, JSAMPROW [edx] ; inptr2 95*dfc6aa5cSAndroid Build Coastguard Worker mov edi, JSAMPROW [edi] ; outptr 96*dfc6aa5cSAndroid Build Coastguard Worker movpic eax, POINTER [gotptr] ; load GOT address (eax) 97*dfc6aa5cSAndroid Build Coastguard Worker alignx 16, 7 98*dfc6aa5cSAndroid Build Coastguard Worker.columnloop: 99*dfc6aa5cSAndroid Build Coastguard Worker 100*dfc6aa5cSAndroid Build Coastguard Worker movq mm5, MMWORD [ebx] ; mm5=Cb(01234567) 101*dfc6aa5cSAndroid Build Coastguard Worker movq mm1, MMWORD [edx] ; mm1=Cr(01234567) 102*dfc6aa5cSAndroid Build Coastguard Worker 103*dfc6aa5cSAndroid Build Coastguard Worker pcmpeqw mm4, mm4 104*dfc6aa5cSAndroid Build Coastguard Worker pcmpeqw mm7, mm7 105*dfc6aa5cSAndroid Build Coastguard Worker psrlw mm4, BYTE_BIT 106*dfc6aa5cSAndroid Build Coastguard Worker psllw mm7, 7 ; mm7={0xFF80 0xFF80 0xFF80 0xFF80} 107*dfc6aa5cSAndroid Build Coastguard Worker movq mm0, mm4 ; mm0=mm4={0xFF 0x00 0xFF 0x00 ..} 108*dfc6aa5cSAndroid Build Coastguard Worker 109*dfc6aa5cSAndroid Build Coastguard Worker pand mm4, mm5 ; mm4=Cb(0246)=CbE 110*dfc6aa5cSAndroid Build Coastguard Worker psrlw mm5, BYTE_BIT ; mm5=Cb(1357)=CbO 111*dfc6aa5cSAndroid Build Coastguard Worker pand mm0, mm1 ; mm0=Cr(0246)=CrE 112*dfc6aa5cSAndroid Build Coastguard Worker psrlw mm1, BYTE_BIT ; mm1=Cr(1357)=CrO 113*dfc6aa5cSAndroid Build Coastguard Worker 114*dfc6aa5cSAndroid Build Coastguard Worker paddw mm4, mm7 115*dfc6aa5cSAndroid Build Coastguard Worker paddw mm5, mm7 116*dfc6aa5cSAndroid Build Coastguard Worker paddw mm0, mm7 117*dfc6aa5cSAndroid Build Coastguard Worker paddw mm1, mm7 118*dfc6aa5cSAndroid Build Coastguard Worker 119*dfc6aa5cSAndroid Build Coastguard Worker ; (Original) 120*dfc6aa5cSAndroid Build Coastguard Worker ; R = Y + 1.40200 * Cr 121*dfc6aa5cSAndroid Build Coastguard Worker ; G = Y - 0.34414 * Cb - 0.71414 * Cr 122*dfc6aa5cSAndroid Build Coastguard Worker ; B = Y + 1.77200 * Cb 123*dfc6aa5cSAndroid Build Coastguard Worker ; 124*dfc6aa5cSAndroid Build Coastguard Worker ; (This implementation) 125*dfc6aa5cSAndroid Build Coastguard Worker ; R = Y + 0.40200 * Cr + Cr 126*dfc6aa5cSAndroid Build Coastguard Worker ; G = Y - 0.34414 * Cb + 0.28586 * Cr - Cr 127*dfc6aa5cSAndroid Build Coastguard Worker ; B = Y - 0.22800 * Cb + Cb + Cb 128*dfc6aa5cSAndroid Build Coastguard Worker 129*dfc6aa5cSAndroid Build Coastguard Worker movq mm2, mm4 ; mm2=CbE 130*dfc6aa5cSAndroid Build Coastguard Worker movq mm3, mm5 ; mm3=CbO 131*dfc6aa5cSAndroid Build Coastguard Worker paddw mm4, mm4 ; mm4=2*CbE 132*dfc6aa5cSAndroid Build Coastguard Worker paddw mm5, mm5 ; mm5=2*CbO 133*dfc6aa5cSAndroid Build Coastguard Worker movq mm6, mm0 ; mm6=CrE 134*dfc6aa5cSAndroid Build Coastguard Worker movq mm7, mm1 ; mm7=CrO 135*dfc6aa5cSAndroid Build Coastguard Worker paddw mm0, mm0 ; mm0=2*CrE 136*dfc6aa5cSAndroid Build Coastguard Worker paddw mm1, mm1 ; mm1=2*CrO 137*dfc6aa5cSAndroid Build Coastguard Worker 138*dfc6aa5cSAndroid Build Coastguard Worker pmulhw mm4, [GOTOFF(eax,PW_MF0228)] ; mm4=(2*CbE * -FIX(0.22800)) 139*dfc6aa5cSAndroid Build Coastguard Worker pmulhw mm5, [GOTOFF(eax,PW_MF0228)] ; mm5=(2*CbO * -FIX(0.22800)) 140*dfc6aa5cSAndroid Build Coastguard Worker pmulhw mm0, [GOTOFF(eax,PW_F0402)] ; mm0=(2*CrE * FIX(0.40200)) 141*dfc6aa5cSAndroid Build Coastguard Worker pmulhw mm1, [GOTOFF(eax,PW_F0402)] ; mm1=(2*CrO * FIX(0.40200)) 142*dfc6aa5cSAndroid Build Coastguard Worker 143*dfc6aa5cSAndroid Build Coastguard Worker paddw mm4, [GOTOFF(eax,PW_ONE)] 144*dfc6aa5cSAndroid Build Coastguard Worker paddw mm5, [GOTOFF(eax,PW_ONE)] 145*dfc6aa5cSAndroid Build Coastguard Worker psraw mm4, 1 ; mm4=(CbE * -FIX(0.22800)) 146*dfc6aa5cSAndroid Build Coastguard Worker psraw mm5, 1 ; mm5=(CbO * -FIX(0.22800)) 147*dfc6aa5cSAndroid Build Coastguard Worker paddw mm0, [GOTOFF(eax,PW_ONE)] 148*dfc6aa5cSAndroid Build Coastguard Worker paddw mm1, [GOTOFF(eax,PW_ONE)] 149*dfc6aa5cSAndroid Build Coastguard Worker psraw mm0, 1 ; mm0=(CrE * FIX(0.40200)) 150*dfc6aa5cSAndroid Build Coastguard Worker psraw mm1, 1 ; mm1=(CrO * FIX(0.40200)) 151*dfc6aa5cSAndroid Build Coastguard Worker 152*dfc6aa5cSAndroid Build Coastguard Worker paddw mm4, mm2 153*dfc6aa5cSAndroid Build Coastguard Worker paddw mm5, mm3 154*dfc6aa5cSAndroid Build Coastguard Worker paddw mm4, mm2 ; mm4=(CbE * FIX(1.77200))=(B-Y)E 155*dfc6aa5cSAndroid Build Coastguard Worker paddw mm5, mm3 ; mm5=(CbO * FIX(1.77200))=(B-Y)O 156*dfc6aa5cSAndroid Build Coastguard Worker paddw mm0, mm6 ; mm0=(CrE * FIX(1.40200))=(R-Y)E 157*dfc6aa5cSAndroid Build Coastguard Worker paddw mm1, mm7 ; mm1=(CrO * FIX(1.40200))=(R-Y)O 158*dfc6aa5cSAndroid Build Coastguard Worker 159*dfc6aa5cSAndroid Build Coastguard Worker movq MMWORD [wk(0)], mm4 ; wk(0)=(B-Y)E 160*dfc6aa5cSAndroid Build Coastguard Worker movq MMWORD [wk(1)], mm5 ; wk(1)=(B-Y)O 161*dfc6aa5cSAndroid Build Coastguard Worker 162*dfc6aa5cSAndroid Build Coastguard Worker movq mm4, mm2 163*dfc6aa5cSAndroid Build Coastguard Worker movq mm5, mm3 164*dfc6aa5cSAndroid Build Coastguard Worker punpcklwd mm2, mm6 165*dfc6aa5cSAndroid Build Coastguard Worker punpckhwd mm4, mm6 166*dfc6aa5cSAndroid Build Coastguard Worker pmaddwd mm2, [GOTOFF(eax,PW_MF0344_F0285)] 167*dfc6aa5cSAndroid Build Coastguard Worker pmaddwd mm4, [GOTOFF(eax,PW_MF0344_F0285)] 168*dfc6aa5cSAndroid Build Coastguard Worker punpcklwd mm3, mm7 169*dfc6aa5cSAndroid Build Coastguard Worker punpckhwd mm5, mm7 170*dfc6aa5cSAndroid Build Coastguard Worker pmaddwd mm3, [GOTOFF(eax,PW_MF0344_F0285)] 171*dfc6aa5cSAndroid Build Coastguard Worker pmaddwd mm5, [GOTOFF(eax,PW_MF0344_F0285)] 172*dfc6aa5cSAndroid Build Coastguard Worker 173*dfc6aa5cSAndroid Build Coastguard Worker paddd mm2, [GOTOFF(eax,PD_ONEHALF)] 174*dfc6aa5cSAndroid Build Coastguard Worker paddd mm4, [GOTOFF(eax,PD_ONEHALF)] 175*dfc6aa5cSAndroid Build Coastguard Worker psrad mm2, SCALEBITS 176*dfc6aa5cSAndroid Build Coastguard Worker psrad mm4, SCALEBITS 177*dfc6aa5cSAndroid Build Coastguard Worker paddd mm3, [GOTOFF(eax,PD_ONEHALF)] 178*dfc6aa5cSAndroid Build Coastguard Worker paddd mm5, [GOTOFF(eax,PD_ONEHALF)] 179*dfc6aa5cSAndroid Build Coastguard Worker psrad mm3, SCALEBITS 180*dfc6aa5cSAndroid Build Coastguard Worker psrad mm5, SCALEBITS 181*dfc6aa5cSAndroid Build Coastguard Worker 182*dfc6aa5cSAndroid Build Coastguard Worker packssdw mm2, mm4 ; mm2=CbE*-FIX(0.344)+CrE*FIX(0.285) 183*dfc6aa5cSAndroid Build Coastguard Worker packssdw mm3, mm5 ; mm3=CbO*-FIX(0.344)+CrO*FIX(0.285) 184*dfc6aa5cSAndroid Build Coastguard Worker psubw mm2, mm6 ; mm2=CbE*-FIX(0.344)+CrE*-FIX(0.714)=(G-Y)E 185*dfc6aa5cSAndroid Build Coastguard Worker psubw mm3, mm7 ; mm3=CbO*-FIX(0.344)+CrO*-FIX(0.714)=(G-Y)O 186*dfc6aa5cSAndroid Build Coastguard Worker 187*dfc6aa5cSAndroid Build Coastguard Worker movq mm5, MMWORD [esi] ; mm5=Y(01234567) 188*dfc6aa5cSAndroid Build Coastguard Worker 189*dfc6aa5cSAndroid Build Coastguard Worker pcmpeqw mm4, mm4 190*dfc6aa5cSAndroid Build Coastguard Worker psrlw mm4, BYTE_BIT ; mm4={0xFF 0x00 0xFF 0x00 ..} 191*dfc6aa5cSAndroid Build Coastguard Worker pand mm4, mm5 ; mm4=Y(0246)=YE 192*dfc6aa5cSAndroid Build Coastguard Worker psrlw mm5, BYTE_BIT ; mm5=Y(1357)=YO 193*dfc6aa5cSAndroid Build Coastguard Worker 194*dfc6aa5cSAndroid Build Coastguard Worker paddw mm0, mm4 ; mm0=((R-Y)E+YE)=RE=(R0 R2 R4 R6) 195*dfc6aa5cSAndroid Build Coastguard Worker paddw mm1, mm5 ; mm1=((R-Y)O+YO)=RO=(R1 R3 R5 R7) 196*dfc6aa5cSAndroid Build Coastguard Worker packuswb mm0, mm0 ; mm0=(R0 R2 R4 R6 ** ** ** **) 197*dfc6aa5cSAndroid Build Coastguard Worker packuswb mm1, mm1 ; mm1=(R1 R3 R5 R7 ** ** ** **) 198*dfc6aa5cSAndroid Build Coastguard Worker 199*dfc6aa5cSAndroid Build Coastguard Worker paddw mm2, mm4 ; mm2=((G-Y)E+YE)=GE=(G0 G2 G4 G6) 200*dfc6aa5cSAndroid Build Coastguard Worker paddw mm3, mm5 ; mm3=((G-Y)O+YO)=GO=(G1 G3 G5 G7) 201*dfc6aa5cSAndroid Build Coastguard Worker packuswb mm2, mm2 ; mm2=(G0 G2 G4 G6 ** ** ** **) 202*dfc6aa5cSAndroid Build Coastguard Worker packuswb mm3, mm3 ; mm3=(G1 G3 G5 G7 ** ** ** **) 203*dfc6aa5cSAndroid Build Coastguard Worker 204*dfc6aa5cSAndroid Build Coastguard Worker paddw mm4, MMWORD [wk(0)] ; mm4=(YE+(B-Y)E)=BE=(B0 B2 B4 B6) 205*dfc6aa5cSAndroid Build Coastguard Worker paddw mm5, MMWORD [wk(1)] ; mm5=(YO+(B-Y)O)=BO=(B1 B3 B5 B7) 206*dfc6aa5cSAndroid Build Coastguard Worker packuswb mm4, mm4 ; mm4=(B0 B2 B4 B6 ** ** ** **) 207*dfc6aa5cSAndroid Build Coastguard Worker packuswb mm5, mm5 ; mm5=(B1 B3 B5 B7 ** ** ** **) 208*dfc6aa5cSAndroid Build Coastguard Worker 209*dfc6aa5cSAndroid Build Coastguard Worker%if RGB_PIXELSIZE == 3 ; --------------- 210*dfc6aa5cSAndroid Build Coastguard Worker 211*dfc6aa5cSAndroid Build Coastguard Worker ; mmA=(00 02 04 06 ** ** ** **), mmB=(01 03 05 07 ** ** ** **) 212*dfc6aa5cSAndroid Build Coastguard Worker ; mmC=(10 12 14 16 ** ** ** **), mmD=(11 13 15 17 ** ** ** **) 213*dfc6aa5cSAndroid Build Coastguard Worker ; mmE=(20 22 24 26 ** ** ** **), mmF=(21 23 25 27 ** ** ** **) 214*dfc6aa5cSAndroid Build Coastguard Worker ; mmG=(** ** ** ** ** ** ** **), mmH=(** ** ** ** ** ** ** **) 215*dfc6aa5cSAndroid Build Coastguard Worker 216*dfc6aa5cSAndroid Build Coastguard Worker punpcklbw mmA, mmC ; mmA=(00 10 02 12 04 14 06 16) 217*dfc6aa5cSAndroid Build Coastguard Worker punpcklbw mmE, mmB ; mmE=(20 01 22 03 24 05 26 07) 218*dfc6aa5cSAndroid Build Coastguard Worker punpcklbw mmD, mmF ; mmD=(11 21 13 23 15 25 17 27) 219*dfc6aa5cSAndroid Build Coastguard Worker 220*dfc6aa5cSAndroid Build Coastguard Worker movq mmG, mmA 221*dfc6aa5cSAndroid Build Coastguard Worker movq mmH, mmA 222*dfc6aa5cSAndroid Build Coastguard Worker punpcklwd mmA, mmE ; mmA=(00 10 20 01 02 12 22 03) 223*dfc6aa5cSAndroid Build Coastguard Worker punpckhwd mmG, mmE ; mmG=(04 14 24 05 06 16 26 07) 224*dfc6aa5cSAndroid Build Coastguard Worker 225*dfc6aa5cSAndroid Build Coastguard Worker psrlq mmH, 2*BYTE_BIT ; mmH=(02 12 04 14 06 16 -- --) 226*dfc6aa5cSAndroid Build Coastguard Worker psrlq mmE, 2*BYTE_BIT ; mmE=(22 03 24 05 26 07 -- --) 227*dfc6aa5cSAndroid Build Coastguard Worker 228*dfc6aa5cSAndroid Build Coastguard Worker movq mmC, mmD 229*dfc6aa5cSAndroid Build Coastguard Worker movq mmB, mmD 230*dfc6aa5cSAndroid Build Coastguard Worker punpcklwd mmD, mmH ; mmD=(11 21 02 12 13 23 04 14) 231*dfc6aa5cSAndroid Build Coastguard Worker punpckhwd mmC, mmH ; mmC=(15 25 06 16 17 27 -- --) 232*dfc6aa5cSAndroid Build Coastguard Worker 233*dfc6aa5cSAndroid Build Coastguard Worker psrlq mmB, 2*BYTE_BIT ; mmB=(13 23 15 25 17 27 -- --) 234*dfc6aa5cSAndroid Build Coastguard Worker 235*dfc6aa5cSAndroid Build Coastguard Worker movq mmF, mmE 236*dfc6aa5cSAndroid Build Coastguard Worker punpcklwd mmE, mmB ; mmE=(22 03 13 23 24 05 15 25) 237*dfc6aa5cSAndroid Build Coastguard Worker punpckhwd mmF, mmB ; mmF=(26 07 17 27 -- -- -- --) 238*dfc6aa5cSAndroid Build Coastguard Worker 239*dfc6aa5cSAndroid Build Coastguard Worker punpckldq mmA, mmD ; mmA=(00 10 20 01 11 21 02 12) 240*dfc6aa5cSAndroid Build Coastguard Worker punpckldq mmE, mmG ; mmE=(22 03 13 23 04 14 24 05) 241*dfc6aa5cSAndroid Build Coastguard Worker punpckldq mmC, mmF ; mmC=(15 25 06 16 26 07 17 27) 242*dfc6aa5cSAndroid Build Coastguard Worker 243*dfc6aa5cSAndroid Build Coastguard Worker cmp ecx, byte SIZEOF_MMWORD 244*dfc6aa5cSAndroid Build Coastguard Worker jb short .column_st16 245*dfc6aa5cSAndroid Build Coastguard Worker 246*dfc6aa5cSAndroid Build Coastguard Worker movq MMWORD [edi+0*SIZEOF_MMWORD], mmA 247*dfc6aa5cSAndroid Build Coastguard Worker movq MMWORD [edi+1*SIZEOF_MMWORD], mmE 248*dfc6aa5cSAndroid Build Coastguard Worker movq MMWORD [edi+2*SIZEOF_MMWORD], mmC 249*dfc6aa5cSAndroid Build Coastguard Worker 250*dfc6aa5cSAndroid Build Coastguard Worker sub ecx, byte SIZEOF_MMWORD 251*dfc6aa5cSAndroid Build Coastguard Worker jz short .nextrow 252*dfc6aa5cSAndroid Build Coastguard Worker 253*dfc6aa5cSAndroid Build Coastguard Worker add esi, byte SIZEOF_MMWORD ; inptr0 254*dfc6aa5cSAndroid Build Coastguard Worker add ebx, byte SIZEOF_MMWORD ; inptr1 255*dfc6aa5cSAndroid Build Coastguard Worker add edx, byte SIZEOF_MMWORD ; inptr2 256*dfc6aa5cSAndroid Build Coastguard Worker add edi, byte RGB_PIXELSIZE*SIZEOF_MMWORD ; outptr 257*dfc6aa5cSAndroid Build Coastguard Worker jmp near .columnloop 258*dfc6aa5cSAndroid Build Coastguard Worker alignx 16, 7 259*dfc6aa5cSAndroid Build Coastguard Worker 260*dfc6aa5cSAndroid Build Coastguard Worker.column_st16: 261*dfc6aa5cSAndroid Build Coastguard Worker lea ecx, [ecx+ecx*2] ; imul ecx, RGB_PIXELSIZE 262*dfc6aa5cSAndroid Build Coastguard Worker cmp ecx, byte 2*SIZEOF_MMWORD 263*dfc6aa5cSAndroid Build Coastguard Worker jb short .column_st8 264*dfc6aa5cSAndroid Build Coastguard Worker movq MMWORD [edi+0*SIZEOF_MMWORD], mmA 265*dfc6aa5cSAndroid Build Coastguard Worker movq MMWORD [edi+1*SIZEOF_MMWORD], mmE 266*dfc6aa5cSAndroid Build Coastguard Worker movq mmA, mmC 267*dfc6aa5cSAndroid Build Coastguard Worker sub ecx, byte 2*SIZEOF_MMWORD 268*dfc6aa5cSAndroid Build Coastguard Worker add edi, byte 2*SIZEOF_MMWORD 269*dfc6aa5cSAndroid Build Coastguard Worker jmp short .column_st4 270*dfc6aa5cSAndroid Build Coastguard Worker.column_st8: 271*dfc6aa5cSAndroid Build Coastguard Worker cmp ecx, byte SIZEOF_MMWORD 272*dfc6aa5cSAndroid Build Coastguard Worker jb short .column_st4 273*dfc6aa5cSAndroid Build Coastguard Worker movq MMWORD [edi+0*SIZEOF_MMWORD], mmA 274*dfc6aa5cSAndroid Build Coastguard Worker movq mmA, mmE 275*dfc6aa5cSAndroid Build Coastguard Worker sub ecx, byte SIZEOF_MMWORD 276*dfc6aa5cSAndroid Build Coastguard Worker add edi, byte SIZEOF_MMWORD 277*dfc6aa5cSAndroid Build Coastguard Worker.column_st4: 278*dfc6aa5cSAndroid Build Coastguard Worker movd eax, mmA 279*dfc6aa5cSAndroid Build Coastguard Worker cmp ecx, byte SIZEOF_DWORD 280*dfc6aa5cSAndroid Build Coastguard Worker jb short .column_st2 281*dfc6aa5cSAndroid Build Coastguard Worker mov dword [edi+0*SIZEOF_DWORD], eax 282*dfc6aa5cSAndroid Build Coastguard Worker psrlq mmA, DWORD_BIT 283*dfc6aa5cSAndroid Build Coastguard Worker movd eax, mmA 284*dfc6aa5cSAndroid Build Coastguard Worker sub ecx, byte SIZEOF_DWORD 285*dfc6aa5cSAndroid Build Coastguard Worker add edi, byte SIZEOF_DWORD 286*dfc6aa5cSAndroid Build Coastguard Worker.column_st2: 287*dfc6aa5cSAndroid Build Coastguard Worker cmp ecx, byte SIZEOF_WORD 288*dfc6aa5cSAndroid Build Coastguard Worker jb short .column_st1 289*dfc6aa5cSAndroid Build Coastguard Worker mov word [edi+0*SIZEOF_WORD], ax 290*dfc6aa5cSAndroid Build Coastguard Worker shr eax, WORD_BIT 291*dfc6aa5cSAndroid Build Coastguard Worker sub ecx, byte SIZEOF_WORD 292*dfc6aa5cSAndroid Build Coastguard Worker add edi, byte SIZEOF_WORD 293*dfc6aa5cSAndroid Build Coastguard Worker.column_st1: 294*dfc6aa5cSAndroid Build Coastguard Worker cmp ecx, byte SIZEOF_BYTE 295*dfc6aa5cSAndroid Build Coastguard Worker jb short .nextrow 296*dfc6aa5cSAndroid Build Coastguard Worker mov byte [edi+0*SIZEOF_BYTE], al 297*dfc6aa5cSAndroid Build Coastguard Worker 298*dfc6aa5cSAndroid Build Coastguard Worker%else ; RGB_PIXELSIZE == 4 ; ----------- 299*dfc6aa5cSAndroid Build Coastguard Worker 300*dfc6aa5cSAndroid Build Coastguard Worker%ifdef RGBX_FILLER_0XFF 301*dfc6aa5cSAndroid Build Coastguard Worker pcmpeqb mm6, mm6 ; mm6=(X0 X2 X4 X6 ** ** ** **) 302*dfc6aa5cSAndroid Build Coastguard Worker pcmpeqb mm7, mm7 ; mm7=(X1 X3 X5 X7 ** ** ** **) 303*dfc6aa5cSAndroid Build Coastguard Worker%else 304*dfc6aa5cSAndroid Build Coastguard Worker pxor mm6, mm6 ; mm6=(X0 X2 X4 X6 ** ** ** **) 305*dfc6aa5cSAndroid Build Coastguard Worker pxor mm7, mm7 ; mm7=(X1 X3 X5 X7 ** ** ** **) 306*dfc6aa5cSAndroid Build Coastguard Worker%endif 307*dfc6aa5cSAndroid Build Coastguard Worker ; mmA=(00 02 04 06 ** ** ** **), mmB=(01 03 05 07 ** ** ** **) 308*dfc6aa5cSAndroid Build Coastguard Worker ; mmC=(10 12 14 16 ** ** ** **), mmD=(11 13 15 17 ** ** ** **) 309*dfc6aa5cSAndroid Build Coastguard Worker ; mmE=(20 22 24 26 ** ** ** **), mmF=(21 23 25 27 ** ** ** **) 310*dfc6aa5cSAndroid Build Coastguard Worker ; mmG=(30 32 34 36 ** ** ** **), mmH=(31 33 35 37 ** ** ** **) 311*dfc6aa5cSAndroid Build Coastguard Worker 312*dfc6aa5cSAndroid Build Coastguard Worker punpcklbw mmA, mmC ; mmA=(00 10 02 12 04 14 06 16) 313*dfc6aa5cSAndroid Build Coastguard Worker punpcklbw mmE, mmG ; mmE=(20 30 22 32 24 34 26 36) 314*dfc6aa5cSAndroid Build Coastguard Worker punpcklbw mmB, mmD ; mmB=(01 11 03 13 05 15 07 17) 315*dfc6aa5cSAndroid Build Coastguard Worker punpcklbw mmF, mmH ; mmF=(21 31 23 33 25 35 27 37) 316*dfc6aa5cSAndroid Build Coastguard Worker 317*dfc6aa5cSAndroid Build Coastguard Worker movq mmC, mmA 318*dfc6aa5cSAndroid Build Coastguard Worker punpcklwd mmA, mmE ; mmA=(00 10 20 30 02 12 22 32) 319*dfc6aa5cSAndroid Build Coastguard Worker punpckhwd mmC, mmE ; mmC=(04 14 24 34 06 16 26 36) 320*dfc6aa5cSAndroid Build Coastguard Worker movq mmG, mmB 321*dfc6aa5cSAndroid Build Coastguard Worker punpcklwd mmB, mmF ; mmB=(01 11 21 31 03 13 23 33) 322*dfc6aa5cSAndroid Build Coastguard Worker punpckhwd mmG, mmF ; mmG=(05 15 25 35 07 17 27 37) 323*dfc6aa5cSAndroid Build Coastguard Worker 324*dfc6aa5cSAndroid Build Coastguard Worker movq mmD, mmA 325*dfc6aa5cSAndroid Build Coastguard Worker punpckldq mmA, mmB ; mmA=(00 10 20 30 01 11 21 31) 326*dfc6aa5cSAndroid Build Coastguard Worker punpckhdq mmD, mmB ; mmD=(02 12 22 32 03 13 23 33) 327*dfc6aa5cSAndroid Build Coastguard Worker movq mmH, mmC 328*dfc6aa5cSAndroid Build Coastguard Worker punpckldq mmC, mmG ; mmC=(04 14 24 34 05 15 25 35) 329*dfc6aa5cSAndroid Build Coastguard Worker punpckhdq mmH, mmG ; mmH=(06 16 26 36 07 17 27 37) 330*dfc6aa5cSAndroid Build Coastguard Worker 331*dfc6aa5cSAndroid Build Coastguard Worker cmp ecx, byte SIZEOF_MMWORD 332*dfc6aa5cSAndroid Build Coastguard Worker jb short .column_st16 333*dfc6aa5cSAndroid Build Coastguard Worker 334*dfc6aa5cSAndroid Build Coastguard Worker movq MMWORD [edi+0*SIZEOF_MMWORD], mmA 335*dfc6aa5cSAndroid Build Coastguard Worker movq MMWORD [edi+1*SIZEOF_MMWORD], mmD 336*dfc6aa5cSAndroid Build Coastguard Worker movq MMWORD [edi+2*SIZEOF_MMWORD], mmC 337*dfc6aa5cSAndroid Build Coastguard Worker movq MMWORD [edi+3*SIZEOF_MMWORD], mmH 338*dfc6aa5cSAndroid Build Coastguard Worker 339*dfc6aa5cSAndroid Build Coastguard Worker sub ecx, byte SIZEOF_MMWORD 340*dfc6aa5cSAndroid Build Coastguard Worker jz short .nextrow 341*dfc6aa5cSAndroid Build Coastguard Worker 342*dfc6aa5cSAndroid Build Coastguard Worker add esi, byte SIZEOF_MMWORD ; inptr0 343*dfc6aa5cSAndroid Build Coastguard Worker add ebx, byte SIZEOF_MMWORD ; inptr1 344*dfc6aa5cSAndroid Build Coastguard Worker add edx, byte SIZEOF_MMWORD ; inptr2 345*dfc6aa5cSAndroid Build Coastguard Worker add edi, byte RGB_PIXELSIZE*SIZEOF_MMWORD ; outptr 346*dfc6aa5cSAndroid Build Coastguard Worker jmp near .columnloop 347*dfc6aa5cSAndroid Build Coastguard Worker alignx 16, 7 348*dfc6aa5cSAndroid Build Coastguard Worker 349*dfc6aa5cSAndroid Build Coastguard Worker.column_st16: 350*dfc6aa5cSAndroid Build Coastguard Worker cmp ecx, byte SIZEOF_MMWORD/2 351*dfc6aa5cSAndroid Build Coastguard Worker jb short .column_st8 352*dfc6aa5cSAndroid Build Coastguard Worker movq MMWORD [edi+0*SIZEOF_MMWORD], mmA 353*dfc6aa5cSAndroid Build Coastguard Worker movq MMWORD [edi+1*SIZEOF_MMWORD], mmD 354*dfc6aa5cSAndroid Build Coastguard Worker movq mmA, mmC 355*dfc6aa5cSAndroid Build Coastguard Worker movq mmD, mmH 356*dfc6aa5cSAndroid Build Coastguard Worker sub ecx, byte SIZEOF_MMWORD/2 357*dfc6aa5cSAndroid Build Coastguard Worker add edi, byte 2*SIZEOF_MMWORD 358*dfc6aa5cSAndroid Build Coastguard Worker.column_st8: 359*dfc6aa5cSAndroid Build Coastguard Worker cmp ecx, byte SIZEOF_MMWORD/4 360*dfc6aa5cSAndroid Build Coastguard Worker jb short .column_st4 361*dfc6aa5cSAndroid Build Coastguard Worker movq MMWORD [edi+0*SIZEOF_MMWORD], mmA 362*dfc6aa5cSAndroid Build Coastguard Worker movq mmA, mmD 363*dfc6aa5cSAndroid Build Coastguard Worker sub ecx, byte SIZEOF_MMWORD/4 364*dfc6aa5cSAndroid Build Coastguard Worker add edi, byte 1*SIZEOF_MMWORD 365*dfc6aa5cSAndroid Build Coastguard Worker.column_st4: 366*dfc6aa5cSAndroid Build Coastguard Worker cmp ecx, byte SIZEOF_MMWORD/8 367*dfc6aa5cSAndroid Build Coastguard Worker jb short .nextrow 368*dfc6aa5cSAndroid Build Coastguard Worker movd dword [edi+0*SIZEOF_DWORD], mmA 369*dfc6aa5cSAndroid Build Coastguard Worker 370*dfc6aa5cSAndroid Build Coastguard Worker%endif ; RGB_PIXELSIZE ; --------------- 371*dfc6aa5cSAndroid Build Coastguard Worker 372*dfc6aa5cSAndroid Build Coastguard Worker alignx 16, 7 373*dfc6aa5cSAndroid Build Coastguard Worker 374*dfc6aa5cSAndroid Build Coastguard Worker.nextrow: 375*dfc6aa5cSAndroid Build Coastguard Worker pop ecx 376*dfc6aa5cSAndroid Build Coastguard Worker pop esi 377*dfc6aa5cSAndroid Build Coastguard Worker pop ebx 378*dfc6aa5cSAndroid Build Coastguard Worker pop edx 379*dfc6aa5cSAndroid Build Coastguard Worker pop edi 380*dfc6aa5cSAndroid Build Coastguard Worker pop eax 381*dfc6aa5cSAndroid Build Coastguard Worker 382*dfc6aa5cSAndroid Build Coastguard Worker add esi, byte SIZEOF_JSAMPROW 383*dfc6aa5cSAndroid Build Coastguard Worker add ebx, byte SIZEOF_JSAMPROW 384*dfc6aa5cSAndroid Build Coastguard Worker add edx, byte SIZEOF_JSAMPROW 385*dfc6aa5cSAndroid Build Coastguard Worker add edi, byte SIZEOF_JSAMPROW ; output_buf 386*dfc6aa5cSAndroid Build Coastguard Worker dec eax ; num_rows 387*dfc6aa5cSAndroid Build Coastguard Worker jg near .rowloop 388*dfc6aa5cSAndroid Build Coastguard Worker 389*dfc6aa5cSAndroid Build Coastguard Worker emms ; empty MMX state 390*dfc6aa5cSAndroid Build Coastguard Worker 391*dfc6aa5cSAndroid Build Coastguard Worker.return: 392*dfc6aa5cSAndroid Build Coastguard Worker pop edi 393*dfc6aa5cSAndroid Build Coastguard Worker pop esi 394*dfc6aa5cSAndroid Build Coastguard Worker; pop edx ; need not be preserved 395*dfc6aa5cSAndroid Build Coastguard Worker; pop ecx ; need not be preserved 396*dfc6aa5cSAndroid Build Coastguard Worker pop ebx 397*dfc6aa5cSAndroid Build Coastguard Worker mov esp, ebp ; esp <- aligned ebp 398*dfc6aa5cSAndroid Build Coastguard Worker pop esp ; esp <- original ebp 399*dfc6aa5cSAndroid Build Coastguard Worker pop ebp 400*dfc6aa5cSAndroid Build Coastguard Worker ret 401*dfc6aa5cSAndroid Build Coastguard Worker 402*dfc6aa5cSAndroid Build Coastguard Worker; For some reason, the OS X linker does not honor the request to align the 403*dfc6aa5cSAndroid Build Coastguard Worker; segment unless we do this. 404*dfc6aa5cSAndroid Build Coastguard Worker align 32 405