Author: arekm Date: Tue Dec 11 18:13:10 2007 GMT Module: SOURCES Tag: HEAD ---- Log message: - no idea if correct
---- Files affected: SOURCES: libjpegsimd-asm.patch (NONE -> 1.1) (NEW) ---- Diffs: ================================================================ Index: SOURCES/libjpegsimd-asm.patch diff -u /dev/null SOURCES/libjpegsimd-asm.patch:1.1 --- /dev/null Tue Dec 11 19:13:10 2007 +++ SOURCES/libjpegsimd-asm.patch Tue Dec 11 19:13:05 2007 @@ -0,0 +1,558 @@ +diff -urN jpeg-6bx.org/jccolmmx.asm jpeg-6bx/jccolmmx.asm +--- jpeg-6bx.org/jccolmmx.asm 2006-02-03 17:50:00.000000000 +0100 ++++ jpeg-6bx/jccolmmx.asm 2007-12-11 19:05:42.000000000 +0100 +@@ -400,8 +400,8 @@ + + movq mm3,[GOTOFF(eax,PD_ONEHALF)] ; mm3=[PD_ONEHALF] + +- paddd mm0, MMWORD [wk(4)] +- paddd mm4, MMWORD [wk(5)] ++ paddd mm0, XMMWORD [wk(4)] ++ paddd mm4, XMMWORD [wk(5)] + paddd mm0,mm3 + paddd mm4,mm3 + psrld mm0,SCALEBITS ; mm0=YOL +@@ -439,8 +439,8 @@ + + movq mm2,[GOTOFF(eax,PD_ONEHALF)] ; mm2=[PD_ONEHALF] + +- paddd mm6, MMWORD [wk(6)] +- paddd mm4, MMWORD [wk(7)] ++ paddd mm6, XMMWORD [wk(6)] ++ paddd mm4, XMMWORD [wk(7)] + paddd mm6,mm2 + paddd mm4,mm2 + psrld mm6,SCALEBITS ; mm6=YEL +diff -urN jpeg-6bx.org/jcqnt3dn.asm jpeg-6bx/jcqnt3dn.asm +--- jpeg-6bx.org/jcqnt3dn.asm 2006-01-23 08:10:00.000000000 +0100 ++++ jpeg-6bx/jcqnt3dn.asm 2007-12-11 19:05:42.000000000 +0100 +@@ -169,12 +169,12 @@ + .quantloop: + movq mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_FAST_FLOAT)] + movq mm1, MMWORD [MMBLOCK(0,1,esi,SIZEOF_FAST_FLOAT)] +- pfmul mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_FAST_FLOAT)] +- pfmul mm1, MMWORD [MMBLOCK(0,1,edx,SIZEOF_FAST_FLOAT)] ++ pfmul mm0, XMMWORD [MMBLOCK(0,0,edx,SIZEOF_FAST_FLOAT)] ++ pfmul mm1, XMMWORD [MMBLOCK(0,1,edx,SIZEOF_FAST_FLOAT)] + movq mm2, MMWORD [MMBLOCK(0,2,esi,SIZEOF_FAST_FLOAT)] + movq mm3, MMWORD [MMBLOCK(0,3,esi,SIZEOF_FAST_FLOAT)] +- pfmul mm2, MMWORD [MMBLOCK(0,2,edx,SIZEOF_FAST_FLOAT)] +- pfmul mm3, MMWORD [MMBLOCK(0,3,edx,SIZEOF_FAST_FLOAT)] ++ pfmul mm2, XMMWORD [MMBLOCK(0,2,edx,SIZEOF_FAST_FLOAT)] ++ pfmul mm3, XMMWORD [MMBLOCK(0,3,edx,SIZEOF_FAST_FLOAT)] + + pfadd mm0,mm7 ; mm0=(00 ** 01 **) + pfadd mm1,mm7 ; mm1=(02 ** 03 **) +@@ -193,12 +193,12 @@ + + movq mm6, MMWORD [MMBLOCK(1,0,esi,SIZEOF_FAST_FLOAT)] + movq mm1, MMWORD [MMBLOCK(1,1,esi,SIZEOF_FAST_FLOAT)] +- pfmul mm6, MMWORD [MMBLOCK(1,0,edx,SIZEOF_FAST_FLOAT)] +- pfmul mm1, MMWORD [MMBLOCK(1,1,edx,SIZEOF_FAST_FLOAT)] ++ pfmul mm6, XMMWORD [MMBLOCK(1,0,edx,SIZEOF_FAST_FLOAT)] ++ pfmul mm1, XMMWORD [MMBLOCK(1,1,edx,SIZEOF_FAST_FLOAT)] + movq mm3, MMWORD [MMBLOCK(1,2,esi,SIZEOF_FAST_FLOAT)] + movq mm4, MMWORD [MMBLOCK(1,3,esi,SIZEOF_FAST_FLOAT)] +- pfmul mm3, MMWORD [MMBLOCK(1,2,edx,SIZEOF_FAST_FLOAT)] +- pfmul mm4, MMWORD [MMBLOCK(1,3,edx,SIZEOF_FAST_FLOAT)] ++ pfmul mm3, XMMWORD [MMBLOCK(1,2,edx,SIZEOF_FAST_FLOAT)] ++ pfmul mm4, XMMWORD [MMBLOCK(1,3,edx,SIZEOF_FAST_FLOAT)] + + pfadd mm6,mm7 ; mm0=(10 ** 11 **) + pfadd mm1,mm7 ; mm4=(12 ** 13 **) +diff -urN jpeg-6bx.org/jcqntmmx.asm jpeg-6bx/jcqntmmx.asm +--- jpeg-6bx.org/jcqntmmx.asm 2006-01-10 01:00:00.000000000 +0100 ++++ jpeg-6bx/jcqntmmx.asm 2007-12-11 19:05:43.000000000 +0100 +@@ -194,14 +194,14 @@ + ; return (unsigned long) sz; + ; } + +- paddw mm0, MMWORD [CORRECTION(0,0,edx)] ; correction + roundfactor +- paddw mm1, MMWORD [CORRECTION(0,1,edx)] ++ paddw mm0, XMMWORD [CORRECTION(0,0,edx)] ; correction + roundfactor ++ paddw mm1, XMMWORD [CORRECTION(0,1,edx)] + psllw mm0,1 + psllw mm1,1 + movq mm4,mm0 + movq mm5,mm1 +- pmulhw mm0, MMWORD [RECIPROCAL(0,0,edx)] ; reciprocal +- pmulhw mm1, MMWORD [RECIPROCAL(0,1,edx)] ++ pmulhw mm0, XMMWORD [RECIPROCAL(0,0,edx)] ; reciprocal ++ pmulhw mm1, XMMWORD [RECIPROCAL(0,1,edx)] + movq mm6, MMWORD [SCALE(0,0,edx)] ; scale + movq mm7, MMWORD [SCALE(0,1,edx)] + paddw mm0,mm4 ; reciprocal is always negative (MSB=1) +@@ -220,8 +220,8 @@ + paddw mm1,mm7 + psraw mm4,(WORD_BIT-1) + psraw mm5,(WORD_BIT-1) +- pand mm4, MMWORD [SCALE(0,0,edx)] ; scale +- pand mm5, MMWORD [SCALE(0,1,edx)] ++ pand mm4, XMMWORD [SCALE(0,0,edx)] ; scale ++ pand mm5, XMMWORD [SCALE(0,1,edx)] + paddw mm0,mm4 + paddw mm1,mm5 + +diff -urN jpeg-6bx.org/jdcolmmx.asm jpeg-6bx/jdcolmmx.asm +--- jpeg-6bx.org/jdcolmmx.asm 2006-02-03 17:50:00.000000000 +0100 ++++ jpeg-6bx/jdcolmmx.asm 2007-12-11 19:05:43.000000000 +0100 +@@ -236,8 +236,8 @@ + packuswb mm2,mm2 ; mm2=(G0 G2 G4 G6 ** ** ** **) + packuswb mm3,mm3 ; mm3=(G1 G3 G5 G7 ** ** ** **) + +- paddw mm4, MMWORD [wk(0)] ; mm4=(YE+(B-Y)E)=BE=(B0 B2 B4 B6) +- paddw mm5, MMWORD [wk(1)] ; mm5=(YO+(B-Y)O)=BO=(B1 B3 B5 B7) ++ paddw mm4, XMMWORD [wk(0)] ; mm4=(YE+(B-Y)E)=BE=(B0 B2 B4 B6) ++ paddw mm5, XMMWORD [wk(1)] ; mm5=(YO+(B-Y)O)=BO=(B1 B3 B5 B7) + packuswb mm4,mm4 ; mm4=(B0 B2 B4 B6 ** ** ** **) + packuswb mm5,mm5 ; mm5=(B1 B3 B5 B7 ** ** ** **) + +diff -urN jpeg-6bx.org/jdsammmx.asm jpeg-6bx/jdsammmx.asm +--- jpeg-6bx.org/jdsammmx.asm 2006-02-03 17:50:00.000000000 +0100 ++++ jpeg-6bx/jdsammmx.asm 2007-12-11 19:05:43.000000000 +0100 +@@ -103,7 +103,7 @@ + pxor mm0,mm0 ; mm0=(all 0's) + pcmpeqb mm7,mm7 + psrlq mm7,(SIZEOF_MMWORD-1)*BYTE_BIT +- pand mm7, MMWORD [esi+0*SIZEOF_MMWORD] ++ pand mm7, XMMWORD [esi+0*SIZEOF_MMWORD] + + add eax, byte SIZEOF_MMWORD-1 + and eax, byte -SIZEOF_MMWORD +@@ -114,7 +114,7 @@ + .columnloop_last: + pcmpeqb mm6,mm6 + psllq mm6,(SIZEOF_MMWORD-1)*BYTE_BIT +- pand mm6, MMWORD [esi+0*SIZEOF_MMWORD] ++ pand mm6, XMMWORD [esi+0*SIZEOF_MMWORD] + jmp short .upsample + alignx 16,7 + +@@ -338,8 +338,8 @@ + psllq mm1,(SIZEOF_MMWORD-2)*BYTE_BIT + movq mm2,mm1 + +- pand mm1, MMWORD [edx+1*SIZEOF_MMWORD] ; mm1=( - - - 7) +- pand mm2, MMWORD [edi+1*SIZEOF_MMWORD] ; mm2=( - - - 7) ++ pand mm1, XMMWORD [edx+1*SIZEOF_MMWORD] ; mm1=( - - - 7) ++ pand mm2, XMMWORD [edi+1*SIZEOF_MMWORD] ; mm2=( - - - 7) + + movq MMWORD [wk(2)], mm1 + movq MMWORD [wk(3)], mm2 +@@ -412,8 +412,8 @@ + movq mm4,mm3 + psrlq mm4,(SIZEOF_MMWORD-2)*BYTE_BIT ; mm4=( 7 - - -) + +- por mm1, MMWORD [wk(0)] ; mm1=(-1 0 1 2) +- por mm2, MMWORD [wk(2)] ; mm2=( 5 6 7 8) ++ por mm1, XMMWORD [wk(0)] ; mm1=(-1 0 1 2) ++ por mm2, XMMWORD [wk(2)] ; mm2=( 5 6 7 8) + + movq MMWORD [wk(0)], mm4 + +@@ -465,8 +465,8 @@ + movq mm3,mm4 + psrlq mm3,(SIZEOF_MMWORD-2)*BYTE_BIT ; mm3=( 7 - - -) + +- por mm1, MMWORD [wk(1)] ; mm1=(-1 0 1 2) +- por mm5, MMWORD [wk(3)] ; mm5=( 5 6 7 8) ++ por mm1, XMMWORD [wk(1)] ; mm1=(-1 0 1 2) ++ por mm5, XMMWORD [wk(3)] ; mm5=( 5 6 7 8) + + movq MMWORD [wk(1)], mm3 + +diff -urN jpeg-6bx.org/jfmmxint.asm jpeg-6bx/jfmmxint.asm +--- jpeg-6bx.org/jfmmxint.asm 2006-02-03 17:50:00.000000000 +0100 ++++ jpeg-6bx/jfmmxint.asm 2007-12-11 19:05:44.000000000 +0100 +@@ -314,8 +314,8 @@ + pmaddwd mm2,[GOTOFF(ebx,PW_MF089_F060)] ; mm2=tmp7L + pmaddwd mm7,[GOTOFF(ebx,PW_MF089_F060)] ; mm7=tmp7H + +- paddd mm4, MMWORD [wk(0)] ; mm4=data7L +- paddd mm1, MMWORD [wk(1)] ; mm1=data7H ++ paddd mm4, XMMWORD [wk(0)] ; mm4=data7L ++ paddd mm1, XMMWORD [wk(1)] ; mm1=data7H + paddd mm2,mm0 ; mm2=data1L + paddd mm7,mm6 ; mm7=data1H + +@@ -347,8 +347,8 @@ + + paddd mm1,mm0 ; mm1=data5L + paddd mm7,mm6 ; mm7=data5H +- paddd mm3, MMWORD [wk(0)] ; mm3=data3L +- paddd mm5, MMWORD [wk(1)] ; mm5=data3H ++ paddd mm3, XMMWORD [wk(0)] ; mm3=data3L ++ paddd mm5, XMMWORD [wk(1)] ; mm5=data3H + + paddd mm1,[GOTOFF(ebx,PD_DESCALE_P1)] + paddd mm7,[GOTOFF(ebx,PD_DESCALE_P1)] +@@ -558,8 +558,8 @@ + pmaddwd mm2,[GOTOFF(ebx,PW_MF089_F060)] ; mm2=tmp7L + pmaddwd mm7,[GOTOFF(ebx,PW_MF089_F060)] ; mm7=tmp7H + +- paddd mm4, MMWORD [wk(0)] ; mm4=data7L +- paddd mm1, MMWORD [wk(1)] ; mm1=data7H ++ paddd mm4, XMMWORD [wk(0)] ; mm4=data7L ++ paddd mm1, XMMWORD [wk(1)] ; mm1=data7H + paddd mm2,mm0 ; mm2=data1L + paddd mm7,mm6 ; mm7=data1H + +@@ -591,8 +591,8 @@ + + paddd mm1,mm0 ; mm1=data5L + paddd mm7,mm6 ; mm7=data5H +- paddd mm3, MMWORD [wk(0)] ; mm3=data3L +- paddd mm5, MMWORD [wk(1)] ; mm5=data3H ++ paddd mm3, XMMWORD [wk(0)] ; mm3=data3L ++ paddd mm5, XMMWORD [wk(1)] ; mm5=data3H + + paddd mm1,[GOTOFF(ebx,PD_DESCALE_P2)] + paddd mm7,[GOTOFF(ebx,PD_DESCALE_P2)] +diff -urN jpeg-6bx.org/ji3dnflt.asm jpeg-6bx/ji3dnflt.asm +--- jpeg-6bx.org/ji3dnflt.asm 2006-02-03 17:50:00.000000000 +0100 ++++ jpeg-6bx/ji3dnflt.asm 2007-12-11 19:05:44.000000000 +0100 +@@ -124,7 +124,7 @@ + psrad mm0,(DWORD_BIT-WORD_BIT) + pi2fd mm0,mm0 + +- pfmul mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_FLOAT_MULT_TYPE)] ++ pfmul mm0, XMMWORD [MMBLOCK(0,0,edx,SIZEOF_FLOAT_MULT_TYPE)] + + movq mm1,mm0 + punpckldq mm0,mm0 +@@ -157,8 +157,8 @@ + pi2fd mm0,mm0 + pi2fd mm1,mm1 + +- pfmul mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_FLOAT_MULT_TYPE)] +- pfmul mm1, MMWORD [MMBLOCK(2,0,edx,SIZEOF_FLOAT_MULT_TYPE)] ++ pfmul mm0, XMMWORD [MMBLOCK(0,0,edx,SIZEOF_FLOAT_MULT_TYPE)] ++ pfmul mm1, XMMWORD [MMBLOCK(2,0,edx,SIZEOF_FLOAT_MULT_TYPE)] + + punpcklwd mm2,mm2 + punpcklwd mm3,mm3 +@@ -167,8 +167,8 @@ + pi2fd mm2,mm2 + pi2fd mm3,mm3 + +- pfmul mm2, MMWORD [MMBLOCK(4,0,edx,SIZEOF_FLOAT_MULT_TYPE)] +- pfmul mm3, MMWORD [MMBLOCK(6,0,edx,SIZEOF_FLOAT_MULT_TYPE)] ++ pfmul mm2, XMMWORD [MMBLOCK(4,0,edx,SIZEOF_FLOAT_MULT_TYPE)] ++ pfmul mm3, XMMWORD [MMBLOCK(6,0,edx,SIZEOF_FLOAT_MULT_TYPE)] + + movq mm4,mm0 + movq mm5,mm1 +@@ -204,8 +204,8 @@ + pi2fd mm2,mm2 + pi2fd mm3,mm3 + +- pfmul mm2, MMWORD [MMBLOCK(1,0,edx,SIZEOF_FLOAT_MULT_TYPE)] +- pfmul mm3, MMWORD [MMBLOCK(3,0,edx,SIZEOF_FLOAT_MULT_TYPE)] ++ pfmul mm2, XMMWORD [MMBLOCK(1,0,edx,SIZEOF_FLOAT_MULT_TYPE)] ++ pfmul mm3, XMMWORD [MMBLOCK(3,0,edx,SIZEOF_FLOAT_MULT_TYPE)] + + punpcklwd mm5,mm5 + punpcklwd mm1,mm1 +@@ -214,8 +214,8 @@ + pi2fd mm5,mm5 + pi2fd mm1,mm1 + +- pfmul mm5, MMWORD [MMBLOCK(5,0,edx,SIZEOF_FLOAT_MULT_TYPE)] +- pfmul mm1, MMWORD [MMBLOCK(7,0,edx,SIZEOF_FLOAT_MULT_TYPE)] ++ pfmul mm5, XMMWORD [MMBLOCK(5,0,edx,SIZEOF_FLOAT_MULT_TYPE)] ++ pfmul mm1, XMMWORD [MMBLOCK(7,0,edx,SIZEOF_FLOAT_MULT_TYPE)] + + movq mm4,mm2 + movq mm0,mm5 +diff -urN jpeg-6bx.org/jimmxfst.asm jpeg-6bx/jimmxfst.asm +--- jpeg-6bx.org/jimmxfst.asm 2006-02-03 17:50:00.000000000 +0100 ++++ jpeg-6bx/jimmxfst.asm 2007-12-11 19:05:44.000000000 +0100 +@@ -139,11 +139,11 @@ + + movq mm0, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)] + movq mm1, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)] +- por mm0, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)] +- por mm1, MMWORD [MMBLOCK(4,0,esi,SIZEOF_JCOEF)] +- por mm0, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)] +- por mm1, MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)] +- por mm0, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)] ++ por mm0, XMMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)] ++ por mm1, XMMWORD [MMBLOCK(4,0,esi,SIZEOF_JCOEF)] ++ por mm0, XMMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)] ++ por mm1, XMMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)] ++ por mm0, XMMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)] + por mm1,mm0 + packsswb mm1,mm1 + movd eax,mm1 +@@ -153,7 +153,7 @@ + ; -- AC terms all zero + + movq mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)] +- pmullw mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_IFAST_MULT_TYPE)] ++ pmullw mm0, XMMWORD [MMBLOCK(0,0,edx,SIZEOF_IFAST_MULT_TYPE)] + + movq mm2,mm0 ; mm0=in0=(00 01 02 03) + punpcklwd mm0,mm0 ; mm0=(00 00 01 01) +@@ -183,12 +183,12 @@ + + movq mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)] + movq mm1, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)] +- pmullw mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_IFAST_MULT_TYPE)] +- pmullw mm1, MMWORD [MMBLOCK(2,0,edx,SIZEOF_IFAST_MULT_TYPE)] ++ pmullw mm0, XMMWORD [MMBLOCK(0,0,edx,SIZEOF_IFAST_MULT_TYPE)] ++ pmullw mm1, XMMWORD [MMBLOCK(2,0,edx,SIZEOF_IFAST_MULT_TYPE)] + movq mm2, MMWORD [MMBLOCK(4,0,esi,SIZEOF_JCOEF)] + movq mm3, MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)] +- pmullw mm2, MMWORD [MMBLOCK(4,0,edx,SIZEOF_IFAST_MULT_TYPE)] +- pmullw mm3, MMWORD [MMBLOCK(6,0,edx,SIZEOF_IFAST_MULT_TYPE)] ++ pmullw mm2, XMMWORD [MMBLOCK(4,0,edx,SIZEOF_IFAST_MULT_TYPE)] ++ pmullw mm3, XMMWORD [MMBLOCK(6,0,edx,SIZEOF_IFAST_MULT_TYPE)] + + movq mm4,mm0 + movq mm5,mm1 +@@ -215,12 +215,12 @@ + + movq mm2, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)] + movq mm3, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)] +- pmullw mm2, MMWORD [MMBLOCK(1,0,edx,SIZEOF_IFAST_MULT_TYPE)] +- pmullw mm3, MMWORD [MMBLOCK(3,0,edx,SIZEOF_IFAST_MULT_TYPE)] ++ pmullw mm2, XMMWORD [MMBLOCK(1,0,edx,SIZEOF_IFAST_MULT_TYPE)] ++ pmullw mm3, XMMWORD [MMBLOCK(3,0,edx,SIZEOF_IFAST_MULT_TYPE)] + movq mm5, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)] + movq mm1, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)] +- pmullw mm5, MMWORD [MMBLOCK(5,0,edx,SIZEOF_IFAST_MULT_TYPE)] +- pmullw mm1, MMWORD [MMBLOCK(7,0,edx,SIZEOF_IFAST_MULT_TYPE)] ++ pmullw mm5, XMMWORD [MMBLOCK(5,0,edx,SIZEOF_IFAST_MULT_TYPE)] ++ pmullw mm1, XMMWORD [MMBLOCK(7,0,edx,SIZEOF_IFAST_MULT_TYPE)] + + movq mm4,mm2 + movq mm0,mm5 +diff -urN jpeg-6bx.org/jimmxint.asm jpeg-6bx/jimmxint.asm +--- jpeg-6bx.org/jimmxint.asm 2006-02-03 17:50:00.000000000 +0100 ++++ jpeg-6bx/jimmxint.asm 2007-12-11 19:05:44.000000000 +0100 +@@ -152,11 +152,11 @@ + + movq mm0, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)] + movq mm1, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)] +- por mm0, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)] +- por mm1, MMWORD [MMBLOCK(4,0,esi,SIZEOF_JCOEF)] +- por mm0, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)] +- por mm1, MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)] +- por mm0, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)] ++ por mm0, XMMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)] ++ por mm1, XMMWORD [MMBLOCK(4,0,esi,SIZEOF_JCOEF)] ++ por mm0, XMMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)] ++ por mm1, XMMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)] ++ por mm0, XMMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)] + por mm1,mm0 + packsswb mm1,mm1 + movd eax,mm1 +@@ -166,7 +166,7 @@ + ; -- AC terms all zero + + movq mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)] +- pmullw mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)] ++ pmullw mm0, XMMWORD [MMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + + psllw mm0,PASS1_BITS + +@@ -198,12 +198,12 @@ + + movq mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)] + movq mm1, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)] +- pmullw mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)] +- pmullw mm1, MMWORD [MMBLOCK(2,0,edx,SIZEOF_ISLOW_MULT_TYPE)] ++ pmullw mm0, XMMWORD [MMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)] ++ pmullw mm1, XMMWORD [MMBLOCK(2,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + movq mm2, MMWORD [MMBLOCK(4,0,esi,SIZEOF_JCOEF)] + movq mm3, MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)] +- pmullw mm2, MMWORD [MMBLOCK(4,0,edx,SIZEOF_ISLOW_MULT_TYPE)] +- pmullw mm3, MMWORD [MMBLOCK(6,0,edx,SIZEOF_ISLOW_MULT_TYPE)] ++ pmullw mm2, XMMWORD [MMBLOCK(4,0,edx,SIZEOF_ISLOW_MULT_TYPE)] ++ pmullw mm3, XMMWORD [MMBLOCK(6,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + + ; (Original) + ; z1 = (z2 + z3) * 0.541196100; +@@ -271,12 +271,12 @@ + + movq mm4, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)] + movq mm6, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)] +- pmullw mm4, MMWORD [MMBLOCK(1,0,edx,SIZEOF_ISLOW_MULT_TYPE)] +- pmullw mm6, MMWORD [MMBLOCK(3,0,edx,SIZEOF_ISLOW_MULT_TYPE)] ++ pmullw mm4, XMMWORD [MMBLOCK(1,0,edx,SIZEOF_ISLOW_MULT_TYPE)] ++ pmullw mm6, XMMWORD [MMBLOCK(3,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + movq mm1, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)] + movq mm3, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)] +- pmullw mm1, MMWORD [MMBLOCK(5,0,edx,SIZEOF_ISLOW_MULT_TYPE)] +- pmullw mm3, MMWORD [MMBLOCK(7,0,edx,SIZEOF_ISLOW_MULT_TYPE)] ++ pmullw mm1, XMMWORD [MMBLOCK(5,0,edx,SIZEOF_ISLOW_MULT_TYPE)] ++ pmullw mm3, XMMWORD [MMBLOCK(7,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + + movq mm5,mm6 + movq mm7,mm4 +@@ -333,8 +333,8 @@ + pmaddwd mm3,[GOTOFF(ebx,PW_MF089_F060)] ; mm3=tmp3L + pmaddwd mm4,[GOTOFF(ebx,PW_MF089_F060)] ; mm4=tmp3H + +- paddd mm2, MMWORD [wk(10)] ; mm2=tmp0L +- paddd mm0, MMWORD [wk(11)] ; mm0=tmp0H ++ paddd mm2, XMMWORD [wk(10)] ; mm2=tmp0L ++ paddd mm0, XMMWORD [wk(11)] ; mm0=tmp0H + paddd mm3,mm5 ; mm3=tmp3L + paddd mm4,mm7 ; mm4=tmp3H + +@@ -354,8 +354,8 @@ + + paddd mm2,mm5 ; mm2=tmp1L + paddd mm0,mm7 ; mm0=tmp1H +- paddd mm1, MMWORD [wk(10)] ; mm1=tmp2L +- paddd mm6, MMWORD [wk(11)] ; mm6=tmp2H ++ paddd mm1, XMMWORD [wk(10)] ; mm1=tmp2L ++ paddd mm6, XMMWORD [wk(11)] ; mm6=tmp2H + + movq MMWORD [wk(10)], mm2 ; wk(10)=tmp1L + movq MMWORD [wk(11)], mm0 ; wk(11)=tmp1H +@@ -659,8 +659,8 @@ + pmaddwd mm3,[GOTOFF(ebx,PW_MF089_F060)] ; mm3=tmp3L + pmaddwd mm4,[GOTOFF(ebx,PW_MF089_F060)] ; mm4=tmp3H + +- paddd mm2, MMWORD [wk(10)] ; mm2=tmp0L +- paddd mm0, MMWORD [wk(11)] ; mm0=tmp0H ++ paddd mm2, XMMWORD [wk(10)] ; mm2=tmp0L ++ paddd mm0, XMMWORD [wk(11)] ; mm0=tmp0H + paddd mm3,mm5 ; mm3=tmp3L + paddd mm4,mm7 ; mm4=tmp3H + +@@ -680,8 +680,8 @@ + + paddd mm2,mm5 ; mm2=tmp1L + paddd mm0,mm7 ; mm0=tmp1H +- paddd mm1, MMWORD [wk(10)] ; mm1=tmp2L +- paddd mm6, MMWORD [wk(11)] ; mm6=tmp2H ++ paddd mm1, XMMWORD [wk(10)] ; mm1=tmp2L ++ paddd mm6, XMMWORD [wk(11)] ; mm6=tmp2H + + movq MMWORD [wk(10)], mm2 ; wk(10)=tmp1L + movq MMWORD [wk(11)], mm0 ; wk(11)=tmp1H +diff -urN jpeg-6bx.org/jimmxred.asm jpeg-6bx/jimmxred.asm +--- jpeg-6bx.org/jimmxred.asm 2006-02-03 17:50:00.000000000 +0100 ++++ jpeg-6bx/jimmxred.asm 2007-12-11 19:05:44.000000000 +0100 +@@ -160,10 +160,10 @@ + + movq mm0, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)] + movq mm1, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)] +- por mm0, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)] +- por mm1, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)] +- por mm0, MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)] +- por mm1, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)] ++ por mm0, XMMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)] ++ por mm1, XMMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)] ++ por mm0, XMMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)] ++ por mm1, XMMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)] + por mm0,mm1 + packsswb mm0,mm0 + movd eax,mm0 +@@ -173,7 +173,7 @@ + ; -- AC terms all zero + + movq mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)] +- pmullw mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)] ++ pmullw mm0, XMMWORD [MMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + + psllw mm0,PASS1_BITS + +@@ -201,12 +201,12 @@ + + movq mm0, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)] + movq mm1, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)] +- pmullw mm0, MMWORD [MMBLOCK(1,0,edx,SIZEOF_ISLOW_MULT_TYPE)] +- pmullw mm1, MMWORD [MMBLOCK(3,0,edx,SIZEOF_ISLOW_MULT_TYPE)] ++ pmullw mm0, XMMWORD [MMBLOCK(1,0,edx,SIZEOF_ISLOW_MULT_TYPE)] ++ pmullw mm1, XMMWORD [MMBLOCK(3,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + movq mm2, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)] + movq mm3, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)] +- pmullw mm2, MMWORD [MMBLOCK(5,0,edx,SIZEOF_ISLOW_MULT_TYPE)] +- pmullw mm3, MMWORD [MMBLOCK(7,0,edx,SIZEOF_ISLOW_MULT_TYPE)] ++ pmullw mm2, XMMWORD [MMBLOCK(5,0,edx,SIZEOF_ISLOW_MULT_TYPE)] ++ pmullw mm3, XMMWORD [MMBLOCK(7,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + + movq mm4,mm0 + movq mm5,mm0 +@@ -243,9 +243,9 @@ + movq mm4, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)] + movq mm5, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)] + movq mm0, MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)] +- pmullw mm4, MMWORD [MMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)] +- pmullw mm5, MMWORD [MMBLOCK(2,0,edx,SIZEOF_ISLOW_MULT_TYPE)] +- pmullw mm0, MMWORD [MMBLOCK(6,0,edx,SIZEOF_ISLOW_MULT_TYPE)] ++ pmullw mm4, XMMWORD [MMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)] ++ pmullw mm5, XMMWORD [MMBLOCK(2,0,edx,SIZEOF_ISLOW_MULT_TYPE)] ++ pmullw mm0, XMMWORD [MMBLOCK(6,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + + pxor mm1,mm1 + pxor mm2,mm2 +@@ -549,12 +549,12 @@ + + movq mm0, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)] + movq mm1, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)] +- pmullw mm0, MMWORD [MMBLOCK(1,0,edx,SIZEOF_ISLOW_MULT_TYPE)] +- pmullw mm1, MMWORD [MMBLOCK(3,0,edx,SIZEOF_ISLOW_MULT_TYPE)] ++ pmullw mm0, XMMWORD [MMBLOCK(1,0,edx,SIZEOF_ISLOW_MULT_TYPE)] ++ pmullw mm1, XMMWORD [MMBLOCK(3,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + movq mm2, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)] + movq mm3, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)] +- pmullw mm2, MMWORD [MMBLOCK(5,0,edx,SIZEOF_ISLOW_MULT_TYPE)] +- pmullw mm3, MMWORD [MMBLOCK(7,0,edx,SIZEOF_ISLOW_MULT_TYPE)] ++ pmullw mm2, XMMWORD [MMBLOCK(5,0,edx,SIZEOF_ISLOW_MULT_TYPE)] ++ pmullw mm3, XMMWORD [MMBLOCK(7,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + + ; mm0=(10 11 ** 13), mm1=(30 31 ** 33) + ; mm2=(50 51 ** 53), mm3=(70 71 ** 73) +@@ -582,12 +582,12 @@ + + movq mm6, MMWORD [MMBLOCK(1,1,esi,SIZEOF_JCOEF)] + movq mm1, MMWORD [MMBLOCK(3,1,esi,SIZEOF_JCOEF)] +- pmullw mm6, MMWORD [MMBLOCK(1,1,edx,SIZEOF_ISLOW_MULT_TYPE)] +- pmullw mm1, MMWORD [MMBLOCK(3,1,edx,SIZEOF_ISLOW_MULT_TYPE)] ++ pmullw mm6, XMMWORD [MMBLOCK(1,1,edx,SIZEOF_ISLOW_MULT_TYPE)] ++ pmullw mm1, XMMWORD [MMBLOCK(3,1,edx,SIZEOF_ISLOW_MULT_TYPE)] + movq mm3, MMWORD [MMBLOCK(5,1,esi,SIZEOF_JCOEF)] + movq mm5, MMWORD [MMBLOCK(7,1,esi,SIZEOF_JCOEF)] +- pmullw mm3, MMWORD [MMBLOCK(5,1,edx,SIZEOF_ISLOW_MULT_TYPE)] +- pmullw mm5, MMWORD [MMBLOCK(7,1,edx,SIZEOF_ISLOW_MULT_TYPE)] ++ pmullw mm3, XMMWORD [MMBLOCK(5,1,edx,SIZEOF_ISLOW_MULT_TYPE)] ++ pmullw mm5, XMMWORD [MMBLOCK(7,1,edx,SIZEOF_ISLOW_MULT_TYPE)] + + ; mm6=(** 15 ** 17), mm1=(** 35 ** 37) + ; mm3=(** 55 ** 57), mm5=(** 75 ** 77) +@@ -608,8 +608,8 @@ + + movq mm1, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)] + movq mm5, MMWORD [MMBLOCK(0,1,esi,SIZEOF_JCOEF)] +- pmullw mm1, MMWORD [MMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)] +- pmullw mm5, MMWORD [MMBLOCK(0,1,edx,SIZEOF_ISLOW_MULT_TYPE)] ++ pmullw mm1, XMMWORD [MMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)] ++ pmullw mm5, XMMWORD [MMBLOCK(0,1,edx,SIZEOF_ISLOW_MULT_TYPE)] + + ; mm1=(00 01 ** 03), mm5=(** 05 ** 07) + +diff -urN jpeg-6bx.org/jisseflt.asm jpeg-6bx/jisseflt.asm +--- jpeg-6bx.org/jisseflt.asm 2006-02-03 17:50:00.000000000 +0100 ++++ jpeg-6bx/jisseflt.asm 2007-12-11 19:05:45.000000000 +0100 +@@ -118,11 +118,11 @@ + + movq mm0, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)] + movq mm1, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)] +- por mm0, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)] +- por mm1, MMWORD [MMBLOCK(4,0,esi,SIZEOF_JCOEF)] +- por mm0, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)] +- por mm1, MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)] +- por mm0, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)] ++ por mm0, XMMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)] ++ por mm1, XMMWORD [MMBLOCK(4,0,esi,SIZEOF_JCOEF)] ++ por mm0, XMMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)] ++ por mm1, XMMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)] ++ por mm0, XMMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)] + por mm1,mm0 + packsswb mm1,mm1 + movd eax,mm1 ================================================================ _______________________________________________ pld-cvs-commit mailing list [email protected] http://lists.pld-linux.org/mailman/listinfo/pld-cvs-commit
