SOURCES: libjpegsimd-asm.patch (NEW) - no idea if correct
arekm
arekm at pld-linux.org
Tue Dec 11 19:13:10 CET 2007
Author: arekm Date: Tue Dec 11 18:13:10 2007 GMT
Module: SOURCES Tag: HEAD
---- Log message:
- no idea if correct
---- Files affected:
SOURCES:
libjpegsimd-asm.patch (NONE -> 1.1) (NEW)
---- Diffs:
================================================================
Index: SOURCES/libjpegsimd-asm.patch
diff -u /dev/null SOURCES/libjpegsimd-asm.patch:1.1
--- /dev/null Tue Dec 11 19:13:10 2007
+++ SOURCES/libjpegsimd-asm.patch Tue Dec 11 19:13:05 2007
@@ -0,0 +1,558 @@
+diff -urN jpeg-6bx.org/jccolmmx.asm jpeg-6bx/jccolmmx.asm
+--- jpeg-6bx.org/jccolmmx.asm 2006-02-03 17:50:00.000000000 +0100
++++ jpeg-6bx/jccolmmx.asm 2007-12-11 19:05:42.000000000 +0100
+@@ -400,8 +400,8 @@
+
+ movq mm3,[GOTOFF(eax,PD_ONEHALF)] ; mm3=[PD_ONEHALF]
+
+- paddd mm0, MMWORD [wk(4)]
+- paddd mm4, MMWORD [wk(5)]
++ paddd mm0, XMMWORD [wk(4)]
++ paddd mm4, XMMWORD [wk(5)]
+ paddd mm0,mm3
+ paddd mm4,mm3
+ psrld mm0,SCALEBITS ; mm0=YOL
+@@ -439,8 +439,8 @@
+
+ movq mm2,[GOTOFF(eax,PD_ONEHALF)] ; mm2=[PD_ONEHALF]
+
+- paddd mm6, MMWORD [wk(6)]
+- paddd mm4, MMWORD [wk(7)]
++ paddd mm6, XMMWORD [wk(6)]
++ paddd mm4, XMMWORD [wk(7)]
+ paddd mm6,mm2
+ paddd mm4,mm2
+ psrld mm6,SCALEBITS ; mm6=YEL
+diff -urN jpeg-6bx.org/jcqnt3dn.asm jpeg-6bx/jcqnt3dn.asm
+--- jpeg-6bx.org/jcqnt3dn.asm 2006-01-23 08:10:00.000000000 +0100
++++ jpeg-6bx/jcqnt3dn.asm 2007-12-11 19:05:42.000000000 +0100
+@@ -169,12 +169,12 @@
+ .quantloop:
+ movq mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_FAST_FLOAT)]
+ movq mm1, MMWORD [MMBLOCK(0,1,esi,SIZEOF_FAST_FLOAT)]
+- pfmul mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_FAST_FLOAT)]
+- pfmul mm1, MMWORD [MMBLOCK(0,1,edx,SIZEOF_FAST_FLOAT)]
++ pfmul mm0, XMMWORD [MMBLOCK(0,0,edx,SIZEOF_FAST_FLOAT)]
++ pfmul mm1, XMMWORD [MMBLOCK(0,1,edx,SIZEOF_FAST_FLOAT)]
+ movq mm2, MMWORD [MMBLOCK(0,2,esi,SIZEOF_FAST_FLOAT)]
+ movq mm3, MMWORD [MMBLOCK(0,3,esi,SIZEOF_FAST_FLOAT)]
+- pfmul mm2, MMWORD [MMBLOCK(0,2,edx,SIZEOF_FAST_FLOAT)]
+- pfmul mm3, MMWORD [MMBLOCK(0,3,edx,SIZEOF_FAST_FLOAT)]
++ pfmul mm2, XMMWORD [MMBLOCK(0,2,edx,SIZEOF_FAST_FLOAT)]
++ pfmul mm3, XMMWORD [MMBLOCK(0,3,edx,SIZEOF_FAST_FLOAT)]
+
+ pfadd mm0,mm7 ; mm0=(00 ** 01 **)
+ pfadd mm1,mm7 ; mm1=(02 ** 03 **)
+@@ -193,12 +193,12 @@
+
+ movq mm6, MMWORD [MMBLOCK(1,0,esi,SIZEOF_FAST_FLOAT)]
+ movq mm1, MMWORD [MMBLOCK(1,1,esi,SIZEOF_FAST_FLOAT)]
+- pfmul mm6, MMWORD [MMBLOCK(1,0,edx,SIZEOF_FAST_FLOAT)]
+- pfmul mm1, MMWORD [MMBLOCK(1,1,edx,SIZEOF_FAST_FLOAT)]
++ pfmul mm6, XMMWORD [MMBLOCK(1,0,edx,SIZEOF_FAST_FLOAT)]
++ pfmul mm1, XMMWORD [MMBLOCK(1,1,edx,SIZEOF_FAST_FLOAT)]
+ movq mm3, MMWORD [MMBLOCK(1,2,esi,SIZEOF_FAST_FLOAT)]
+ movq mm4, MMWORD [MMBLOCK(1,3,esi,SIZEOF_FAST_FLOAT)]
+- pfmul mm3, MMWORD [MMBLOCK(1,2,edx,SIZEOF_FAST_FLOAT)]
+- pfmul mm4, MMWORD [MMBLOCK(1,3,edx,SIZEOF_FAST_FLOAT)]
++ pfmul mm3, XMMWORD [MMBLOCK(1,2,edx,SIZEOF_FAST_FLOAT)]
++ pfmul mm4, XMMWORD [MMBLOCK(1,3,edx,SIZEOF_FAST_FLOAT)]
+
+ pfadd mm6,mm7 ; mm0=(10 ** 11 **)
+ pfadd mm1,mm7 ; mm4=(12 ** 13 **)
+diff -urN jpeg-6bx.org/jcqntmmx.asm jpeg-6bx/jcqntmmx.asm
+--- jpeg-6bx.org/jcqntmmx.asm 2006-01-10 01:00:00.000000000 +0100
++++ jpeg-6bx/jcqntmmx.asm 2007-12-11 19:05:43.000000000 +0100
+@@ -194,14 +194,14 @@
+ ; return (unsigned long) sz;
+ ; }
+
+- paddw mm0, MMWORD [CORRECTION(0,0,edx)] ; correction + roundfactor
+- paddw mm1, MMWORD [CORRECTION(0,1,edx)]
++ paddw mm0, XMMWORD [CORRECTION(0,0,edx)] ; correction + roundfactor
++ paddw mm1, XMMWORD [CORRECTION(0,1,edx)]
+ psllw mm0,1
+ psllw mm1,1
+ movq mm4,mm0
+ movq mm5,mm1
+- pmulhw mm0, MMWORD [RECIPROCAL(0,0,edx)] ; reciprocal
+- pmulhw mm1, MMWORD [RECIPROCAL(0,1,edx)]
++ pmulhw mm0, XMMWORD [RECIPROCAL(0,0,edx)] ; reciprocal
++ pmulhw mm1, XMMWORD [RECIPROCAL(0,1,edx)]
+ movq mm6, MMWORD [SCALE(0,0,edx)] ; scale
+ movq mm7, MMWORD [SCALE(0,1,edx)]
+ paddw mm0,mm4 ; reciprocal is always negative (MSB=1)
+@@ -220,8 +220,8 @@
+ paddw mm1,mm7
+ psraw mm4,(WORD_BIT-1)
+ psraw mm5,(WORD_BIT-1)
+- pand mm4, MMWORD [SCALE(0,0,edx)] ; scale
+- pand mm5, MMWORD [SCALE(0,1,edx)]
++ pand mm4, XMMWORD [SCALE(0,0,edx)] ; scale
++ pand mm5, XMMWORD [SCALE(0,1,edx)]
+ paddw mm0,mm4
+ paddw mm1,mm5
+
+diff -urN jpeg-6bx.org/jdcolmmx.asm jpeg-6bx/jdcolmmx.asm
+--- jpeg-6bx.org/jdcolmmx.asm 2006-02-03 17:50:00.000000000 +0100
++++ jpeg-6bx/jdcolmmx.asm 2007-12-11 19:05:43.000000000 +0100
+@@ -236,8 +236,8 @@
+ packuswb mm2,mm2 ; mm2=(G0 G2 G4 G6 ** ** ** **)
+ packuswb mm3,mm3 ; mm3=(G1 G3 G5 G7 ** ** ** **)
+
+- paddw mm4, MMWORD [wk(0)] ; mm4=(YE+(B-Y)E)=BE=(B0 B2 B4 B6)
+- paddw mm5, MMWORD [wk(1)] ; mm5=(YO+(B-Y)O)=BO=(B1 B3 B5 B7)
++ paddw mm4, XMMWORD [wk(0)] ; mm4=(YE+(B-Y)E)=BE=(B0 B2 B4 B6)
++ paddw mm5, XMMWORD [wk(1)] ; mm5=(YO+(B-Y)O)=BO=(B1 B3 B5 B7)
+ packuswb mm4,mm4 ; mm4=(B0 B2 B4 B6 ** ** ** **)
+ packuswb mm5,mm5 ; mm5=(B1 B3 B5 B7 ** ** ** **)
+
+diff -urN jpeg-6bx.org/jdsammmx.asm jpeg-6bx/jdsammmx.asm
+--- jpeg-6bx.org/jdsammmx.asm 2006-02-03 17:50:00.000000000 +0100
++++ jpeg-6bx/jdsammmx.asm 2007-12-11 19:05:43.000000000 +0100
+@@ -103,7 +103,7 @@
+ pxor mm0,mm0 ; mm0=(all 0's)
+ pcmpeqb mm7,mm7
+ psrlq mm7,(SIZEOF_MMWORD-1)*BYTE_BIT
+- pand mm7, MMWORD [esi+0*SIZEOF_MMWORD]
++ pand mm7, XMMWORD [esi+0*SIZEOF_MMWORD]
+
+ add eax, byte SIZEOF_MMWORD-1
+ and eax, byte -SIZEOF_MMWORD
+@@ -114,7 +114,7 @@
+ .columnloop_last:
+ pcmpeqb mm6,mm6
+ psllq mm6,(SIZEOF_MMWORD-1)*BYTE_BIT
+- pand mm6, MMWORD [esi+0*SIZEOF_MMWORD]
++ pand mm6, XMMWORD [esi+0*SIZEOF_MMWORD]
+ jmp short .upsample
+ alignx 16,7
+
+@@ -338,8 +338,8 @@
+ psllq mm1,(SIZEOF_MMWORD-2)*BYTE_BIT
+ movq mm2,mm1
+
+- pand mm1, MMWORD [edx+1*SIZEOF_MMWORD] ; mm1=( - - - 7)
+- pand mm2, MMWORD [edi+1*SIZEOF_MMWORD] ; mm2=( - - - 7)
++ pand mm1, XMMWORD [edx+1*SIZEOF_MMWORD] ; mm1=( - - - 7)
++ pand mm2, XMMWORD [edi+1*SIZEOF_MMWORD] ; mm2=( - - - 7)
+
+ movq MMWORD [wk(2)], mm1
+ movq MMWORD [wk(3)], mm2
+@@ -412,8 +412,8 @@
+ movq mm4,mm3
+ psrlq mm4,(SIZEOF_MMWORD-2)*BYTE_BIT ; mm4=( 7 - - -)
+
+- por mm1, MMWORD [wk(0)] ; mm1=(-1 0 1 2)
+- por mm2, MMWORD [wk(2)] ; mm2=( 5 6 7 8)
++ por mm1, XMMWORD [wk(0)] ; mm1=(-1 0 1 2)
++ por mm2, XMMWORD [wk(2)] ; mm2=( 5 6 7 8)
+
+ movq MMWORD [wk(0)], mm4
+
+@@ -465,8 +465,8 @@
+ movq mm3,mm4
+ psrlq mm3,(SIZEOF_MMWORD-2)*BYTE_BIT ; mm3=( 7 - - -)
+
+- por mm1, MMWORD [wk(1)] ; mm1=(-1 0 1 2)
+- por mm5, MMWORD [wk(3)] ; mm5=( 5 6 7 8)
++ por mm1, XMMWORD [wk(1)] ; mm1=(-1 0 1 2)
++ por mm5, XMMWORD [wk(3)] ; mm5=( 5 6 7 8)
+
+ movq MMWORD [wk(1)], mm3
+
+diff -urN jpeg-6bx.org/jfmmxint.asm jpeg-6bx/jfmmxint.asm
+--- jpeg-6bx.org/jfmmxint.asm 2006-02-03 17:50:00.000000000 +0100
++++ jpeg-6bx/jfmmxint.asm 2007-12-11 19:05:44.000000000 +0100
+@@ -314,8 +314,8 @@
+ pmaddwd mm2,[GOTOFF(ebx,PW_MF089_F060)] ; mm2=tmp7L
+ pmaddwd mm7,[GOTOFF(ebx,PW_MF089_F060)] ; mm7=tmp7H
+
+- paddd mm4, MMWORD [wk(0)] ; mm4=data7L
+- paddd mm1, MMWORD [wk(1)] ; mm1=data7H
++ paddd mm4, XMMWORD [wk(0)] ; mm4=data7L
++ paddd mm1, XMMWORD [wk(1)] ; mm1=data7H
+ paddd mm2,mm0 ; mm2=data1L
+ paddd mm7,mm6 ; mm7=data1H
+
+@@ -347,8 +347,8 @@
+
+ paddd mm1,mm0 ; mm1=data5L
+ paddd mm7,mm6 ; mm7=data5H
+- paddd mm3, MMWORD [wk(0)] ; mm3=data3L
+- paddd mm5, MMWORD [wk(1)] ; mm5=data3H
++ paddd mm3, XMMWORD [wk(0)] ; mm3=data3L
++ paddd mm5, XMMWORD [wk(1)] ; mm5=data3H
+
+ paddd mm1,[GOTOFF(ebx,PD_DESCALE_P1)]
+ paddd mm7,[GOTOFF(ebx,PD_DESCALE_P1)]
+@@ -558,8 +558,8 @@
+ pmaddwd mm2,[GOTOFF(ebx,PW_MF089_F060)] ; mm2=tmp7L
+ pmaddwd mm7,[GOTOFF(ebx,PW_MF089_F060)] ; mm7=tmp7H
+
+- paddd mm4, MMWORD [wk(0)] ; mm4=data7L
+- paddd mm1, MMWORD [wk(1)] ; mm1=data7H
++ paddd mm4, XMMWORD [wk(0)] ; mm4=data7L
++ paddd mm1, XMMWORD [wk(1)] ; mm1=data7H
+ paddd mm2,mm0 ; mm2=data1L
+ paddd mm7,mm6 ; mm7=data1H
+
+@@ -591,8 +591,8 @@
+
+ paddd mm1,mm0 ; mm1=data5L
+ paddd mm7,mm6 ; mm7=data5H
+- paddd mm3, MMWORD [wk(0)] ; mm3=data3L
+- paddd mm5, MMWORD [wk(1)] ; mm5=data3H
++ paddd mm3, XMMWORD [wk(0)] ; mm3=data3L
++ paddd mm5, XMMWORD [wk(1)] ; mm5=data3H
+
+ paddd mm1,[GOTOFF(ebx,PD_DESCALE_P2)]
+ paddd mm7,[GOTOFF(ebx,PD_DESCALE_P2)]
+diff -urN jpeg-6bx.org/ji3dnflt.asm jpeg-6bx/ji3dnflt.asm
+--- jpeg-6bx.org/ji3dnflt.asm 2006-02-03 17:50:00.000000000 +0100
++++ jpeg-6bx/ji3dnflt.asm 2007-12-11 19:05:44.000000000 +0100
+@@ -124,7 +124,7 @@
+ psrad mm0,(DWORD_BIT-WORD_BIT)
+ pi2fd mm0,mm0
+
+- pfmul mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
++ pfmul mm0, XMMWORD [MMBLOCK(0,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
+
+ movq mm1,mm0
+ punpckldq mm0,mm0
+@@ -157,8 +157,8 @@
+ pi2fd mm0,mm0
+ pi2fd mm1,mm1
+
+- pfmul mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
+- pfmul mm1, MMWORD [MMBLOCK(2,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
++ pfmul mm0, XMMWORD [MMBLOCK(0,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
++ pfmul mm1, XMMWORD [MMBLOCK(2,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
+
+ punpcklwd mm2,mm2
+ punpcklwd mm3,mm3
+@@ -167,8 +167,8 @@
+ pi2fd mm2,mm2
+ pi2fd mm3,mm3
+
+- pfmul mm2, MMWORD [MMBLOCK(4,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
+- pfmul mm3, MMWORD [MMBLOCK(6,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
++ pfmul mm2, XMMWORD [MMBLOCK(4,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
++ pfmul mm3, XMMWORD [MMBLOCK(6,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
+
+ movq mm4,mm0
+ movq mm5,mm1
+@@ -204,8 +204,8 @@
+ pi2fd mm2,mm2
+ pi2fd mm3,mm3
+
+- pfmul mm2, MMWORD [MMBLOCK(1,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
+- pfmul mm3, MMWORD [MMBLOCK(3,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
++ pfmul mm2, XMMWORD [MMBLOCK(1,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
++ pfmul mm3, XMMWORD [MMBLOCK(3,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
+
+ punpcklwd mm5,mm5
+ punpcklwd mm1,mm1
+@@ -214,8 +214,8 @@
+ pi2fd mm5,mm5
+ pi2fd mm1,mm1
+
+- pfmul mm5, MMWORD [MMBLOCK(5,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
+- pfmul mm1, MMWORD [MMBLOCK(7,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
++ pfmul mm5, XMMWORD [MMBLOCK(5,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
++ pfmul mm1, XMMWORD [MMBLOCK(7,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
+
+ movq mm4,mm2
+ movq mm0,mm5
+diff -urN jpeg-6bx.org/jimmxfst.asm jpeg-6bx/jimmxfst.asm
+--- jpeg-6bx.org/jimmxfst.asm 2006-02-03 17:50:00.000000000 +0100
++++ jpeg-6bx/jimmxfst.asm 2007-12-11 19:05:44.000000000 +0100
+@@ -139,11 +139,11 @@
+
+ movq mm0, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)]
+ movq mm1, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)]
+- por mm0, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)]
+- por mm1, MMWORD [MMBLOCK(4,0,esi,SIZEOF_JCOEF)]
+- por mm0, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)]
+- por mm1, MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)]
+- por mm0, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)]
++ por mm0, XMMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)]
++ por mm1, XMMWORD [MMBLOCK(4,0,esi,SIZEOF_JCOEF)]
++ por mm0, XMMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)]
++ por mm1, XMMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)]
++ por mm0, XMMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)]
+ por mm1,mm0
+ packsswb mm1,mm1
+ movd eax,mm1
+@@ -153,7 +153,7 @@
+ ; -- AC terms all zero
+
+ movq mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)]
+- pmullw mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_IFAST_MULT_TYPE)]
++ pmullw mm0, XMMWORD [MMBLOCK(0,0,edx,SIZEOF_IFAST_MULT_TYPE)]
+
+ movq mm2,mm0 ; mm0=in0=(00 01 02 03)
+ punpcklwd mm0,mm0 ; mm0=(00 00 01 01)
+@@ -183,12 +183,12 @@
+
+ movq mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)]
+ movq mm1, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)]
+- pmullw mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_IFAST_MULT_TYPE)]
+- pmullw mm1, MMWORD [MMBLOCK(2,0,edx,SIZEOF_IFAST_MULT_TYPE)]
++ pmullw mm0, XMMWORD [MMBLOCK(0,0,edx,SIZEOF_IFAST_MULT_TYPE)]
++ pmullw mm1, XMMWORD [MMBLOCK(2,0,edx,SIZEOF_IFAST_MULT_TYPE)]
+ movq mm2, MMWORD [MMBLOCK(4,0,esi,SIZEOF_JCOEF)]
+ movq mm3, MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)]
+- pmullw mm2, MMWORD [MMBLOCK(4,0,edx,SIZEOF_IFAST_MULT_TYPE)]
+- pmullw mm3, MMWORD [MMBLOCK(6,0,edx,SIZEOF_IFAST_MULT_TYPE)]
++ pmullw mm2, XMMWORD [MMBLOCK(4,0,edx,SIZEOF_IFAST_MULT_TYPE)]
++ pmullw mm3, XMMWORD [MMBLOCK(6,0,edx,SIZEOF_IFAST_MULT_TYPE)]
+
+ movq mm4,mm0
+ movq mm5,mm1
+@@ -215,12 +215,12 @@
+
+ movq mm2, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)]
+ movq mm3, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)]
+- pmullw mm2, MMWORD [MMBLOCK(1,0,edx,SIZEOF_IFAST_MULT_TYPE)]
+- pmullw mm3, MMWORD [MMBLOCK(3,0,edx,SIZEOF_IFAST_MULT_TYPE)]
++ pmullw mm2, XMMWORD [MMBLOCK(1,0,edx,SIZEOF_IFAST_MULT_TYPE)]
++ pmullw mm3, XMMWORD [MMBLOCK(3,0,edx,SIZEOF_IFAST_MULT_TYPE)]
+ movq mm5, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)]
+ movq mm1, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)]
+- pmullw mm5, MMWORD [MMBLOCK(5,0,edx,SIZEOF_IFAST_MULT_TYPE)]
+- pmullw mm1, MMWORD [MMBLOCK(7,0,edx,SIZEOF_IFAST_MULT_TYPE)]
++ pmullw mm5, XMMWORD [MMBLOCK(5,0,edx,SIZEOF_IFAST_MULT_TYPE)]
++ pmullw mm1, XMMWORD [MMBLOCK(7,0,edx,SIZEOF_IFAST_MULT_TYPE)]
+
+ movq mm4,mm2
+ movq mm0,mm5
+diff -urN jpeg-6bx.org/jimmxint.asm jpeg-6bx/jimmxint.asm
+--- jpeg-6bx.org/jimmxint.asm 2006-02-03 17:50:00.000000000 +0100
++++ jpeg-6bx/jimmxint.asm 2007-12-11 19:05:44.000000000 +0100
+@@ -152,11 +152,11 @@
+
+ movq mm0, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)]
+ movq mm1, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)]
+- por mm0, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)]
+- por mm1, MMWORD [MMBLOCK(4,0,esi,SIZEOF_JCOEF)]
+- por mm0, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)]
+- por mm1, MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)]
+- por mm0, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)]
++ por mm0, XMMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)]
++ por mm1, XMMWORD [MMBLOCK(4,0,esi,SIZEOF_JCOEF)]
++ por mm0, XMMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)]
++ por mm1, XMMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)]
++ por mm0, XMMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)]
+ por mm1,mm0
+ packsswb mm1,mm1
+ movd eax,mm1
+@@ -166,7 +166,7 @@
+ ; -- AC terms all zero
+
+ movq mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)]
+- pmullw mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
++ pmullw mm0, XMMWORD [MMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+
+ psllw mm0,PASS1_BITS
+
+@@ -198,12 +198,12 @@
+
+ movq mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)]
+ movq mm1, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)]
+- pmullw mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+- pmullw mm1, MMWORD [MMBLOCK(2,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
++ pmullw mm0, XMMWORD [MMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
++ pmullw mm1, XMMWORD [MMBLOCK(2,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+ movq mm2, MMWORD [MMBLOCK(4,0,esi,SIZEOF_JCOEF)]
+ movq mm3, MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)]
+- pmullw mm2, MMWORD [MMBLOCK(4,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+- pmullw mm3, MMWORD [MMBLOCK(6,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
++ pmullw mm2, XMMWORD [MMBLOCK(4,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
++ pmullw mm3, XMMWORD [MMBLOCK(6,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+
+ ; (Original)
+ ; z1 = (z2 + z3) * 0.541196100;
+@@ -271,12 +271,12 @@
+
+ movq mm4, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)]
+ movq mm6, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)]
+- pmullw mm4, MMWORD [MMBLOCK(1,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+- pmullw mm6, MMWORD [MMBLOCK(3,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
++ pmullw mm4, XMMWORD [MMBLOCK(1,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
++ pmullw mm6, XMMWORD [MMBLOCK(3,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+ movq mm1, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)]
+ movq mm3, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)]
+- pmullw mm1, MMWORD [MMBLOCK(5,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+- pmullw mm3, MMWORD [MMBLOCK(7,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
++ pmullw mm1, XMMWORD [MMBLOCK(5,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
++ pmullw mm3, XMMWORD [MMBLOCK(7,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+
+ movq mm5,mm6
+ movq mm7,mm4
+@@ -333,8 +333,8 @@
+ pmaddwd mm3,[GOTOFF(ebx,PW_MF089_F060)] ; mm3=tmp3L
+ pmaddwd mm4,[GOTOFF(ebx,PW_MF089_F060)] ; mm4=tmp3H
+
+- paddd mm2, MMWORD [wk(10)] ; mm2=tmp0L
+- paddd mm0, MMWORD [wk(11)] ; mm0=tmp0H
++ paddd mm2, XMMWORD [wk(10)] ; mm2=tmp0L
++ paddd mm0, XMMWORD [wk(11)] ; mm0=tmp0H
+ paddd mm3,mm5 ; mm3=tmp3L
+ paddd mm4,mm7 ; mm4=tmp3H
+
+@@ -354,8 +354,8 @@
+
+ paddd mm2,mm5 ; mm2=tmp1L
+ paddd mm0,mm7 ; mm0=tmp1H
+- paddd mm1, MMWORD [wk(10)] ; mm1=tmp2L
+- paddd mm6, MMWORD [wk(11)] ; mm6=tmp2H
++ paddd mm1, XMMWORD [wk(10)] ; mm1=tmp2L
++ paddd mm6, XMMWORD [wk(11)] ; mm6=tmp2H
+
+ movq MMWORD [wk(10)], mm2 ; wk(10)=tmp1L
+ movq MMWORD [wk(11)], mm0 ; wk(11)=tmp1H
+@@ -659,8 +659,8 @@
+ pmaddwd mm3,[GOTOFF(ebx,PW_MF089_F060)] ; mm3=tmp3L
+ pmaddwd mm4,[GOTOFF(ebx,PW_MF089_F060)] ; mm4=tmp3H
+
+- paddd mm2, MMWORD [wk(10)] ; mm2=tmp0L
+- paddd mm0, MMWORD [wk(11)] ; mm0=tmp0H
++ paddd mm2, XMMWORD [wk(10)] ; mm2=tmp0L
++ paddd mm0, XMMWORD [wk(11)] ; mm0=tmp0H
+ paddd mm3,mm5 ; mm3=tmp3L
+ paddd mm4,mm7 ; mm4=tmp3H
+
+@@ -680,8 +680,8 @@
+
+ paddd mm2,mm5 ; mm2=tmp1L
+ paddd mm0,mm7 ; mm0=tmp1H
+- paddd mm1, MMWORD [wk(10)] ; mm1=tmp2L
+- paddd mm6, MMWORD [wk(11)] ; mm6=tmp2H
++ paddd mm1, XMMWORD [wk(10)] ; mm1=tmp2L
++ paddd mm6, XMMWORD [wk(11)] ; mm6=tmp2H
+
+ movq MMWORD [wk(10)], mm2 ; wk(10)=tmp1L
+ movq MMWORD [wk(11)], mm0 ; wk(11)=tmp1H
+diff -urN jpeg-6bx.org/jimmxred.asm jpeg-6bx/jimmxred.asm
+--- jpeg-6bx.org/jimmxred.asm 2006-02-03 17:50:00.000000000 +0100
++++ jpeg-6bx/jimmxred.asm 2007-12-11 19:05:44.000000000 +0100
+@@ -160,10 +160,10 @@
+
+ movq mm0, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)]
+ movq mm1, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)]
+- por mm0, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)]
+- por mm1, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)]
+- por mm0, MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)]
+- por mm1, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)]
++ por mm0, XMMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)]
++ por mm1, XMMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)]
++ por mm0, XMMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)]
++ por mm1, XMMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)]
+ por mm0,mm1
+ packsswb mm0,mm0
+ movd eax,mm0
+@@ -173,7 +173,7 @@
+ ; -- AC terms all zero
+
+ movq mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)]
+- pmullw mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
++ pmullw mm0, XMMWORD [MMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+
+ psllw mm0,PASS1_BITS
+
+@@ -201,12 +201,12 @@
+
+ movq mm0, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)]
+ movq mm1, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)]
+- pmullw mm0, MMWORD [MMBLOCK(1,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+- pmullw mm1, MMWORD [MMBLOCK(3,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
++ pmullw mm0, XMMWORD [MMBLOCK(1,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
++ pmullw mm1, XMMWORD [MMBLOCK(3,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+ movq mm2, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)]
+ movq mm3, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)]
+- pmullw mm2, MMWORD [MMBLOCK(5,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+- pmullw mm3, MMWORD [MMBLOCK(7,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
++ pmullw mm2, XMMWORD [MMBLOCK(5,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
++ pmullw mm3, XMMWORD [MMBLOCK(7,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+
+ movq mm4,mm0
+ movq mm5,mm0
+@@ -243,9 +243,9 @@
+ movq mm4, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)]
+ movq mm5, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)]
+ movq mm0, MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)]
+- pmullw mm4, MMWORD [MMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+- pmullw mm5, MMWORD [MMBLOCK(2,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+- pmullw mm0, MMWORD [MMBLOCK(6,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
++ pmullw mm4, XMMWORD [MMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
++ pmullw mm5, XMMWORD [MMBLOCK(2,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
++ pmullw mm0, XMMWORD [MMBLOCK(6,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+
+ pxor mm1,mm1
+ pxor mm2,mm2
+@@ -549,12 +549,12 @@
+
+ movq mm0, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)]
+ movq mm1, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)]
+- pmullw mm0, MMWORD [MMBLOCK(1,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+- pmullw mm1, MMWORD [MMBLOCK(3,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
++ pmullw mm0, XMMWORD [MMBLOCK(1,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
++ pmullw mm1, XMMWORD [MMBLOCK(3,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+ movq mm2, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)]
+ movq mm3, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)]
+- pmullw mm2, MMWORD [MMBLOCK(5,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+- pmullw mm3, MMWORD [MMBLOCK(7,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
++ pmullw mm2, XMMWORD [MMBLOCK(5,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
++ pmullw mm3, XMMWORD [MMBLOCK(7,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+
+ ; mm0=(10 11 ** 13), mm1=(30 31 ** 33)
+ ; mm2=(50 51 ** 53), mm3=(70 71 ** 73)
+@@ -582,12 +582,12 @@
+
+ movq mm6, MMWORD [MMBLOCK(1,1,esi,SIZEOF_JCOEF)]
+ movq mm1, MMWORD [MMBLOCK(3,1,esi,SIZEOF_JCOEF)]
+- pmullw mm6, MMWORD [MMBLOCK(1,1,edx,SIZEOF_ISLOW_MULT_TYPE)]
+- pmullw mm1, MMWORD [MMBLOCK(3,1,edx,SIZEOF_ISLOW_MULT_TYPE)]
++ pmullw mm6, XMMWORD [MMBLOCK(1,1,edx,SIZEOF_ISLOW_MULT_TYPE)]
++ pmullw mm1, XMMWORD [MMBLOCK(3,1,edx,SIZEOF_ISLOW_MULT_TYPE)]
+ movq mm3, MMWORD [MMBLOCK(5,1,esi,SIZEOF_JCOEF)]
+ movq mm5, MMWORD [MMBLOCK(7,1,esi,SIZEOF_JCOEF)]
+- pmullw mm3, MMWORD [MMBLOCK(5,1,edx,SIZEOF_ISLOW_MULT_TYPE)]
+- pmullw mm5, MMWORD [MMBLOCK(7,1,edx,SIZEOF_ISLOW_MULT_TYPE)]
++ pmullw mm3, XMMWORD [MMBLOCK(5,1,edx,SIZEOF_ISLOW_MULT_TYPE)]
++ pmullw mm5, XMMWORD [MMBLOCK(7,1,edx,SIZEOF_ISLOW_MULT_TYPE)]
+
+ ; mm6=(** 15 ** 17), mm1=(** 35 ** 37)
+ ; mm3=(** 55 ** 57), mm5=(** 75 ** 77)
+@@ -608,8 +608,8 @@
+
+ movq mm1, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)]
+ movq mm5, MMWORD [MMBLOCK(0,1,esi,SIZEOF_JCOEF)]
+- pmullw mm1, MMWORD [MMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+- pmullw mm5, MMWORD [MMBLOCK(0,1,edx,SIZEOF_ISLOW_MULT_TYPE)]
++ pmullw mm1, XMMWORD [MMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
++ pmullw mm5, XMMWORD [MMBLOCK(0,1,edx,SIZEOF_ISLOW_MULT_TYPE)]
+
+ ; mm1=(00 01 ** 03), mm5=(** 05 ** 07)
+
+diff -urN jpeg-6bx.org/jisseflt.asm jpeg-6bx/jisseflt.asm
+--- jpeg-6bx.org/jisseflt.asm 2006-02-03 17:50:00.000000000 +0100
++++ jpeg-6bx/jisseflt.asm 2007-12-11 19:05:45.000000000 +0100
+@@ -118,11 +118,11 @@
+
+ movq mm0, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)]
+ movq mm1, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)]
+- por mm0, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)]
+- por mm1, MMWORD [MMBLOCK(4,0,esi,SIZEOF_JCOEF)]
+- por mm0, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)]
+- por mm1, MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)]
+- por mm0, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)]
++ por mm0, XMMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)]
++ por mm1, XMMWORD [MMBLOCK(4,0,esi,SIZEOF_JCOEF)]
++ por mm0, XMMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)]
++ por mm1, XMMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)]
++ por mm0, XMMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)]
+ por mm1,mm0
+ packsswb mm1,mm1
+ movd eax,mm1
================================================================
More information about the pld-cvs-commit
mailing list