SOURCES: libjpegsimd-asm.patch (NEW) - no idea if correct

arekm arekm at pld-linux.org
Tue Dec 11 19:13:10 CET 2007


Author: arekm                        Date: Tue Dec 11 18:13:10 2007 GMT
Module: SOURCES                       Tag: HEAD
---- Log message:
- no idea if correct

---- Files affected:
SOURCES:
   libjpegsimd-asm.patch (NONE -> 1.1)  (NEW)

---- Diffs:

================================================================
Index: SOURCES/libjpegsimd-asm.patch
diff -u /dev/null SOURCES/libjpegsimd-asm.patch:1.1
--- /dev/null	Tue Dec 11 19:13:10 2007
+++ SOURCES/libjpegsimd-asm.patch	Tue Dec 11 19:13:05 2007
@@ -0,0 +1,558 @@
+diff -urN jpeg-6bx.org/jccolmmx.asm jpeg-6bx/jccolmmx.asm
+--- jpeg-6bx.org/jccolmmx.asm	2006-02-03 17:50:00.000000000 +0100
++++ jpeg-6bx/jccolmmx.asm	2007-12-11 19:05:42.000000000 +0100
+@@ -400,8 +400,8 @@
+ 
+ 	movq      mm3,[GOTOFF(eax,PD_ONEHALF)]	; mm3=[PD_ONEHALF]
+ 
+-	paddd     mm0, MMWORD [wk(4)]
+-	paddd     mm4, MMWORD [wk(5)]
++	paddd     mm0, XMMWORD [wk(4)]
++	paddd     mm4, XMMWORD [wk(5)]
+ 	paddd     mm0,mm3
+ 	paddd     mm4,mm3
+ 	psrld     mm0,SCALEBITS		; mm0=YOL
+@@ -439,8 +439,8 @@
+ 
+ 	movq      mm2,[GOTOFF(eax,PD_ONEHALF)]	; mm2=[PD_ONEHALF]
+ 
+-	paddd     mm6, MMWORD [wk(6)]
+-	paddd     mm4, MMWORD [wk(7)]
++	paddd     mm6, XMMWORD [wk(6)]
++	paddd     mm4, XMMWORD [wk(7)]
+ 	paddd     mm6,mm2
+ 	paddd     mm4,mm2
+ 	psrld     mm6,SCALEBITS		; mm6=YEL
+diff -urN jpeg-6bx.org/jcqnt3dn.asm jpeg-6bx/jcqnt3dn.asm
+--- jpeg-6bx.org/jcqnt3dn.asm	2006-01-23 08:10:00.000000000 +0100
++++ jpeg-6bx/jcqnt3dn.asm	2007-12-11 19:05:42.000000000 +0100
+@@ -169,12 +169,12 @@
+ .quantloop:
+ 	movq	mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_FAST_FLOAT)]
+ 	movq	mm1, MMWORD [MMBLOCK(0,1,esi,SIZEOF_FAST_FLOAT)]
+-	pfmul	mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_FAST_FLOAT)]
+-	pfmul	mm1, MMWORD [MMBLOCK(0,1,edx,SIZEOF_FAST_FLOAT)]
++	pfmul	mm0, XMMWORD [MMBLOCK(0,0,edx,SIZEOF_FAST_FLOAT)]
++	pfmul	mm1, XMMWORD [MMBLOCK(0,1,edx,SIZEOF_FAST_FLOAT)]
+ 	movq	mm2, MMWORD [MMBLOCK(0,2,esi,SIZEOF_FAST_FLOAT)]
+ 	movq	mm3, MMWORD [MMBLOCK(0,3,esi,SIZEOF_FAST_FLOAT)]
+-	pfmul	mm2, MMWORD [MMBLOCK(0,2,edx,SIZEOF_FAST_FLOAT)]
+-	pfmul	mm3, MMWORD [MMBLOCK(0,3,edx,SIZEOF_FAST_FLOAT)]
++	pfmul	mm2, XMMWORD [MMBLOCK(0,2,edx,SIZEOF_FAST_FLOAT)]
++	pfmul	mm3, XMMWORD [MMBLOCK(0,3,edx,SIZEOF_FAST_FLOAT)]
+ 
+ 	pfadd	mm0,mm7			; mm0=(00 ** 01 **)
+ 	pfadd	mm1,mm7			; mm1=(02 ** 03 **)
+@@ -193,12 +193,12 @@
+ 
+ 	movq	mm6, MMWORD [MMBLOCK(1,0,esi,SIZEOF_FAST_FLOAT)]
+ 	movq	mm1, MMWORD [MMBLOCK(1,1,esi,SIZEOF_FAST_FLOAT)]
+-	pfmul	mm6, MMWORD [MMBLOCK(1,0,edx,SIZEOF_FAST_FLOAT)]
+-	pfmul	mm1, MMWORD [MMBLOCK(1,1,edx,SIZEOF_FAST_FLOAT)]
++	pfmul	mm6, XMMWORD [MMBLOCK(1,0,edx,SIZEOF_FAST_FLOAT)]
++	pfmul	mm1, XMMWORD [MMBLOCK(1,1,edx,SIZEOF_FAST_FLOAT)]
+ 	movq	mm3, MMWORD [MMBLOCK(1,2,esi,SIZEOF_FAST_FLOAT)]
+ 	movq	mm4, MMWORD [MMBLOCK(1,3,esi,SIZEOF_FAST_FLOAT)]
+-	pfmul	mm3, MMWORD [MMBLOCK(1,2,edx,SIZEOF_FAST_FLOAT)]
+-	pfmul	mm4, MMWORD [MMBLOCK(1,3,edx,SIZEOF_FAST_FLOAT)]
++	pfmul	mm3, XMMWORD [MMBLOCK(1,2,edx,SIZEOF_FAST_FLOAT)]
++	pfmul	mm4, XMMWORD [MMBLOCK(1,3,edx,SIZEOF_FAST_FLOAT)]
+ 
+ 	pfadd	mm6,mm7			; mm0=(10 ** 11 **)
+ 	pfadd	mm1,mm7			; mm4=(12 ** 13 **)
+diff -urN jpeg-6bx.org/jcqntmmx.asm jpeg-6bx/jcqntmmx.asm
+--- jpeg-6bx.org/jcqntmmx.asm	2006-01-10 01:00:00.000000000 +0100
++++ jpeg-6bx/jcqntmmx.asm	2007-12-11 19:05:43.000000000 +0100
+@@ -194,14 +194,14 @@
+ 	;   return (unsigned long) sz;
+ 	; }
+ 
+-	paddw	mm0, MMWORD [CORRECTION(0,0,edx)]   ; correction + roundfactor
+-	paddw	mm1, MMWORD [CORRECTION(0,1,edx)]
++	paddw	mm0, XMMWORD [CORRECTION(0,0,edx)]   ; correction + roundfactor
++	paddw	mm1, XMMWORD [CORRECTION(0,1,edx)]
+ 	psllw	mm0,1
+ 	psllw	mm1,1
+ 	movq	mm4,mm0
+ 	movq	mm5,mm1
+-	pmulhw	mm0, MMWORD [RECIPROCAL(0,0,edx)]   ; reciprocal
+-	pmulhw	mm1, MMWORD [RECIPROCAL(0,1,edx)]
++	pmulhw	mm0, XMMWORD [RECIPROCAL(0,0,edx)]   ; reciprocal
++	pmulhw	mm1, XMMWORD [RECIPROCAL(0,1,edx)]
+ 	movq	mm6, MMWORD [SCALE(0,0,edx)]	; scale
+ 	movq	mm7, MMWORD [SCALE(0,1,edx)]
+ 	paddw	mm0,mm4		; reciprocal is always negative (MSB=1)
+@@ -220,8 +220,8 @@
+ 	paddw	mm1,mm7
+ 	psraw	mm4,(WORD_BIT-1)
+ 	psraw	mm5,(WORD_BIT-1)
+-	pand	mm4, MMWORD [SCALE(0,0,edx)]	; scale
+-	pand	mm5, MMWORD [SCALE(0,1,edx)]
++	pand	mm4, XMMWORD [SCALE(0,0,edx)]	; scale
++	pand	mm5, XMMWORD [SCALE(0,1,edx)]
+ 	paddw	mm0,mm4
+ 	paddw	mm1,mm5
+ 
+diff -urN jpeg-6bx.org/jdcolmmx.asm jpeg-6bx/jdcolmmx.asm
+--- jpeg-6bx.org/jdcolmmx.asm	2006-02-03 17:50:00.000000000 +0100
++++ jpeg-6bx/jdcolmmx.asm	2007-12-11 19:05:43.000000000 +0100
+@@ -236,8 +236,8 @@
+ 	packuswb  mm2,mm2		; mm2=(G0 G2 G4 G6 ** ** ** **)
+ 	packuswb  mm3,mm3		; mm3=(G1 G3 G5 G7 ** ** ** **)
+ 
+-	paddw     mm4, MMWORD [wk(0)]	; mm4=(YE+(B-Y)E)=BE=(B0 B2 B4 B6)
+-	paddw     mm5, MMWORD [wk(1)]	; mm5=(YO+(B-Y)O)=BO=(B1 B3 B5 B7)
++	paddw     mm4, XMMWORD [wk(0)]	; mm4=(YE+(B-Y)E)=BE=(B0 B2 B4 B6)
++	paddw     mm5, XMMWORD [wk(1)]	; mm5=(YO+(B-Y)O)=BO=(B1 B3 B5 B7)
+ 	packuswb  mm4,mm4		; mm4=(B0 B2 B4 B6 ** ** ** **)
+ 	packuswb  mm5,mm5		; mm5=(B1 B3 B5 B7 ** ** ** **)
+ 
+diff -urN jpeg-6bx.org/jdsammmx.asm jpeg-6bx/jdsammmx.asm
+--- jpeg-6bx.org/jdsammmx.asm	2006-02-03 17:50:00.000000000 +0100
++++ jpeg-6bx/jdsammmx.asm	2007-12-11 19:05:43.000000000 +0100
+@@ -103,7 +103,7 @@
+ 	pxor	mm0,mm0			; mm0=(all 0's)
+ 	pcmpeqb	mm7,mm7
+ 	psrlq	mm7,(SIZEOF_MMWORD-1)*BYTE_BIT
+-	pand	mm7, MMWORD [esi+0*SIZEOF_MMWORD]
++	pand	mm7, XMMWORD [esi+0*SIZEOF_MMWORD]
+ 
+ 	add	eax, byte SIZEOF_MMWORD-1
+ 	and	eax, byte -SIZEOF_MMWORD
+@@ -114,7 +114,7 @@
+ .columnloop_last:
+ 	pcmpeqb	mm6,mm6
+ 	psllq	mm6,(SIZEOF_MMWORD-1)*BYTE_BIT
+-	pand	mm6, MMWORD [esi+0*SIZEOF_MMWORD]
++	pand	mm6, XMMWORD [esi+0*SIZEOF_MMWORD]
+ 	jmp	short .upsample
+ 	alignx	16,7
+ 
+@@ -338,8 +338,8 @@
+ 	psllq	mm1,(SIZEOF_MMWORD-2)*BYTE_BIT
+ 	movq	mm2,mm1
+ 
+-	pand	mm1, MMWORD [edx+1*SIZEOF_MMWORD]	; mm1=( - - - 7)
+-	pand	mm2, MMWORD [edi+1*SIZEOF_MMWORD]	; mm2=( - - - 7)
++	pand	mm1, XMMWORD [edx+1*SIZEOF_MMWORD]	; mm1=( - - - 7)
++	pand	mm2, XMMWORD [edi+1*SIZEOF_MMWORD]	; mm2=( - - - 7)
+ 
+ 	movq	MMWORD [wk(2)], mm1
+ 	movq	MMWORD [wk(3)], mm2
+@@ -412,8 +412,8 @@
+ 	movq	mm4,mm3
+ 	psrlq	mm4,(SIZEOF_MMWORD-2)*BYTE_BIT	; mm4=( 7 - - -)
+ 
+-	por	mm1, MMWORD [wk(0)]		; mm1=(-1 0 1 2)
+-	por	mm2, MMWORD [wk(2)]		; mm2=( 5 6 7 8)
++	por	mm1, XMMWORD [wk(0)]		; mm1=(-1 0 1 2)
++	por	mm2, XMMWORD [wk(2)]		; mm2=( 5 6 7 8)
+ 
+ 	movq	MMWORD [wk(0)], mm4
+ 
+@@ -465,8 +465,8 @@
+ 	movq	mm3,mm4
+ 	psrlq	mm3,(SIZEOF_MMWORD-2)*BYTE_BIT	; mm3=( 7 - - -)
+ 
+-	por	mm1, MMWORD [wk(1)]		; mm1=(-1 0 1 2)
+-	por	mm5, MMWORD [wk(3)]		; mm5=( 5 6 7 8)
++	por	mm1, XMMWORD [wk(1)]		; mm1=(-1 0 1 2)
++	por	mm5, XMMWORD [wk(3)]		; mm5=( 5 6 7 8)
+ 
+ 	movq	MMWORD [wk(1)], mm3
+ 
+diff -urN jpeg-6bx.org/jfmmxint.asm jpeg-6bx/jfmmxint.asm
+--- jpeg-6bx.org/jfmmxint.asm	2006-02-03 17:50:00.000000000 +0100
++++ jpeg-6bx/jfmmxint.asm	2007-12-11 19:05:44.000000000 +0100
+@@ -314,8 +314,8 @@
+ 	pmaddwd   mm2,[GOTOFF(ebx,PW_MF089_F060)]	; mm2=tmp7L
+ 	pmaddwd   mm7,[GOTOFF(ebx,PW_MF089_F060)]	; mm7=tmp7H
+ 
+-	paddd	mm4, MMWORD [wk(0)]	; mm4=data7L
+-	paddd	mm1, MMWORD [wk(1)]	; mm1=data7H
++	paddd	mm4, XMMWORD [wk(0)]	; mm4=data7L
++	paddd	mm1, XMMWORD [wk(1)]	; mm1=data7H
+ 	paddd	mm2,mm0			; mm2=data1L
+ 	paddd	mm7,mm6			; mm7=data1H
+ 
+@@ -347,8 +347,8 @@
+ 
+ 	paddd	mm1,mm0			; mm1=data5L
+ 	paddd	mm7,mm6			; mm7=data5H
+-	paddd	mm3, MMWORD [wk(0)]	; mm3=data3L
+-	paddd	mm5, MMWORD [wk(1)]	; mm5=data3H
++	paddd	mm3, XMMWORD [wk(0)]	; mm3=data3L
++	paddd	mm5, XMMWORD [wk(1)]	; mm5=data3H
+ 
+ 	paddd	mm1,[GOTOFF(ebx,PD_DESCALE_P1)]
+ 	paddd	mm7,[GOTOFF(ebx,PD_DESCALE_P1)]
+@@ -558,8 +558,8 @@
+ 	pmaddwd   mm2,[GOTOFF(ebx,PW_MF089_F060)]	; mm2=tmp7L
+ 	pmaddwd   mm7,[GOTOFF(ebx,PW_MF089_F060)]	; mm7=tmp7H
+ 
+-	paddd	mm4, MMWORD [wk(0)]	; mm4=data7L
+-	paddd	mm1, MMWORD [wk(1)]	; mm1=data7H
++	paddd	mm4, XMMWORD [wk(0)]	; mm4=data7L
++	paddd	mm1, XMMWORD [wk(1)]	; mm1=data7H
+ 	paddd	mm2,mm0			; mm2=data1L
+ 	paddd	mm7,mm6			; mm7=data1H
+ 
+@@ -591,8 +591,8 @@
+ 
+ 	paddd	mm1,mm0			; mm1=data5L
+ 	paddd	mm7,mm6			; mm7=data5H
+-	paddd	mm3, MMWORD [wk(0)]	; mm3=data3L
+-	paddd	mm5, MMWORD [wk(1)]	; mm5=data3H
++	paddd	mm3, XMMWORD [wk(0)]	; mm3=data3L
++	paddd	mm5, XMMWORD [wk(1)]	; mm5=data3H
+ 
+ 	paddd	mm1,[GOTOFF(ebx,PD_DESCALE_P2)]
+ 	paddd	mm7,[GOTOFF(ebx,PD_DESCALE_P2)]
+diff -urN jpeg-6bx.org/ji3dnflt.asm jpeg-6bx/ji3dnflt.asm
+--- jpeg-6bx.org/ji3dnflt.asm	2006-02-03 17:50:00.000000000 +0100
++++ jpeg-6bx/ji3dnflt.asm	2007-12-11 19:05:44.000000000 +0100
+@@ -124,7 +124,7 @@
+ 	psrad     mm0,(DWORD_BIT-WORD_BIT)
+ 	pi2fd     mm0,mm0
+ 
+-	pfmul     mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
++	pfmul     mm0, XMMWORD [MMBLOCK(0,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
+ 
+ 	movq      mm1,mm0
+ 	punpckldq mm0,mm0
+@@ -157,8 +157,8 @@
+ 	pi2fd     mm0,mm0
+ 	pi2fd     mm1,mm1
+ 
+-	pfmul     mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
+-	pfmul     mm1, MMWORD [MMBLOCK(2,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
++	pfmul     mm0, XMMWORD [MMBLOCK(0,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
++	pfmul     mm1, XMMWORD [MMBLOCK(2,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
+ 
+ 	punpcklwd mm2,mm2
+ 	punpcklwd mm3,mm3
+@@ -167,8 +167,8 @@
+ 	pi2fd     mm2,mm2
+ 	pi2fd     mm3,mm3
+ 
+-	pfmul     mm2, MMWORD [MMBLOCK(4,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
+-	pfmul     mm3, MMWORD [MMBLOCK(6,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
++	pfmul     mm2, XMMWORD [MMBLOCK(4,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
++	pfmul     mm3, XMMWORD [MMBLOCK(6,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
+ 
+ 	movq	mm4,mm0
+ 	movq	mm5,mm1
+@@ -204,8 +204,8 @@
+ 	pi2fd     mm2,mm2
+ 	pi2fd     mm3,mm3
+ 
+-	pfmul     mm2, MMWORD [MMBLOCK(1,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
+-	pfmul     mm3, MMWORD [MMBLOCK(3,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
++	pfmul     mm2, XMMWORD [MMBLOCK(1,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
++	pfmul     mm3, XMMWORD [MMBLOCK(3,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
+ 
+ 	punpcklwd mm5,mm5
+ 	punpcklwd mm1,mm1
+@@ -214,8 +214,8 @@
+ 	pi2fd     mm5,mm5
+ 	pi2fd     mm1,mm1
+ 
+-	pfmul     mm5, MMWORD [MMBLOCK(5,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
+-	pfmul     mm1, MMWORD [MMBLOCK(7,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
++	pfmul     mm5, XMMWORD [MMBLOCK(5,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
++	pfmul     mm1, XMMWORD [MMBLOCK(7,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
+ 
+ 	movq	mm4,mm2
+ 	movq	mm0,mm5
+diff -urN jpeg-6bx.org/jimmxfst.asm jpeg-6bx/jimmxfst.asm
+--- jpeg-6bx.org/jimmxfst.asm	2006-02-03 17:50:00.000000000 +0100
++++ jpeg-6bx/jimmxfst.asm	2007-12-11 19:05:44.000000000 +0100
+@@ -139,11 +139,11 @@
+ 
+ 	movq	mm0, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)]
+ 	movq	mm1, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)]
+-	por	mm0, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)]
+-	por	mm1, MMWORD [MMBLOCK(4,0,esi,SIZEOF_JCOEF)]
+-	por	mm0, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)]
+-	por	mm1, MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)]
+-	por	mm0, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)]
++	por	mm0, XMMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)]
++	por	mm1, XMMWORD [MMBLOCK(4,0,esi,SIZEOF_JCOEF)]
++	por	mm0, XMMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)]
++	por	mm1, XMMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)]
++	por	mm0, XMMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)]
+ 	por	mm1,mm0
+ 	packsswb mm1,mm1
+ 	movd	eax,mm1
+@@ -153,7 +153,7 @@
+ 	; -- AC terms all zero
+ 
+ 	movq	mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)]
+-	pmullw	mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_IFAST_MULT_TYPE)]
++	pmullw	mm0, XMMWORD [MMBLOCK(0,0,edx,SIZEOF_IFAST_MULT_TYPE)]
+ 
+ 	movq      mm2,mm0		; mm0=in0=(00 01 02 03)
+ 	punpcklwd mm0,mm0		; mm0=(00 00 01 01)
+@@ -183,12 +183,12 @@
+ 
+ 	movq	mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)]
+ 	movq	mm1, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)]
+-	pmullw	mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_IFAST_MULT_TYPE)]
+-	pmullw	mm1, MMWORD [MMBLOCK(2,0,edx,SIZEOF_IFAST_MULT_TYPE)]
++	pmullw	mm0, XMMWORD [MMBLOCK(0,0,edx,SIZEOF_IFAST_MULT_TYPE)]
++	pmullw	mm1, XMMWORD [MMBLOCK(2,0,edx,SIZEOF_IFAST_MULT_TYPE)]
+ 	movq	mm2, MMWORD [MMBLOCK(4,0,esi,SIZEOF_JCOEF)]
+ 	movq	mm3, MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)]
+-	pmullw	mm2, MMWORD [MMBLOCK(4,0,edx,SIZEOF_IFAST_MULT_TYPE)]
+-	pmullw	mm3, MMWORD [MMBLOCK(6,0,edx,SIZEOF_IFAST_MULT_TYPE)]
++	pmullw	mm2, XMMWORD [MMBLOCK(4,0,edx,SIZEOF_IFAST_MULT_TYPE)]
++	pmullw	mm3, XMMWORD [MMBLOCK(6,0,edx,SIZEOF_IFAST_MULT_TYPE)]
+ 
+ 	movq	mm4,mm0
+ 	movq	mm5,mm1
+@@ -215,12 +215,12 @@
+ 
+ 	movq	mm2, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)]
+ 	movq	mm3, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)]
+-	pmullw	mm2, MMWORD [MMBLOCK(1,0,edx,SIZEOF_IFAST_MULT_TYPE)]
+-	pmullw	mm3, MMWORD [MMBLOCK(3,0,edx,SIZEOF_IFAST_MULT_TYPE)]
++	pmullw	mm2, XMMWORD [MMBLOCK(1,0,edx,SIZEOF_IFAST_MULT_TYPE)]
++	pmullw	mm3, XMMWORD [MMBLOCK(3,0,edx,SIZEOF_IFAST_MULT_TYPE)]
+ 	movq	mm5, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)]
+ 	movq	mm1, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)]
+-	pmullw	mm5, MMWORD [MMBLOCK(5,0,edx,SIZEOF_IFAST_MULT_TYPE)]
+-	pmullw	mm1, MMWORD [MMBLOCK(7,0,edx,SIZEOF_IFAST_MULT_TYPE)]
++	pmullw	mm5, XMMWORD [MMBLOCK(5,0,edx,SIZEOF_IFAST_MULT_TYPE)]
++	pmullw	mm1, XMMWORD [MMBLOCK(7,0,edx,SIZEOF_IFAST_MULT_TYPE)]
+ 
+ 	movq	mm4,mm2
+ 	movq	mm0,mm5
+diff -urN jpeg-6bx.org/jimmxint.asm jpeg-6bx/jimmxint.asm
+--- jpeg-6bx.org/jimmxint.asm	2006-02-03 17:50:00.000000000 +0100
++++ jpeg-6bx/jimmxint.asm	2007-12-11 19:05:44.000000000 +0100
+@@ -152,11 +152,11 @@
+ 
+ 	movq	mm0, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)]
+ 	movq	mm1, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)]
+-	por	mm0, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)]
+-	por	mm1, MMWORD [MMBLOCK(4,0,esi,SIZEOF_JCOEF)]
+-	por	mm0, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)]
+-	por	mm1, MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)]
+-	por	mm0, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)]
++	por	mm0, XMMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)]
++	por	mm1, XMMWORD [MMBLOCK(4,0,esi,SIZEOF_JCOEF)]
++	por	mm0, XMMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)]
++	por	mm1, XMMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)]
++	por	mm0, XMMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)]
+ 	por	mm1,mm0
+ 	packsswb mm1,mm1
+ 	movd	eax,mm1
+@@ -166,7 +166,7 @@
+ 	; -- AC terms all zero
+ 
+ 	movq	mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)]
+-	pmullw	mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
++	pmullw	mm0, XMMWORD [MMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+ 
+ 	psllw	mm0,PASS1_BITS
+ 
+@@ -198,12 +198,12 @@
+ 
+ 	movq	mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)]
+ 	movq	mm1, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)]
+-	pmullw	mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+-	pmullw	mm1, MMWORD [MMBLOCK(2,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
++	pmullw	mm0, XMMWORD [MMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
++	pmullw	mm1, XMMWORD [MMBLOCK(2,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+ 	movq	mm2, MMWORD [MMBLOCK(4,0,esi,SIZEOF_JCOEF)]
+ 	movq	mm3, MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)]
+-	pmullw	mm2, MMWORD [MMBLOCK(4,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+-	pmullw	mm3, MMWORD [MMBLOCK(6,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
++	pmullw	mm2, XMMWORD [MMBLOCK(4,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
++	pmullw	mm3, XMMWORD [MMBLOCK(6,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+ 
+ 	; (Original)
+ 	; z1 = (z2 + z3) * 0.541196100;
+@@ -271,12 +271,12 @@
+ 
+ 	movq	mm4, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)]
+ 	movq	mm6, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)]
+-	pmullw	mm4, MMWORD [MMBLOCK(1,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+-	pmullw	mm6, MMWORD [MMBLOCK(3,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
++	pmullw	mm4, XMMWORD [MMBLOCK(1,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
++	pmullw	mm6, XMMWORD [MMBLOCK(3,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+ 	movq	mm1, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)]
+ 	movq	mm3, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)]
+-	pmullw	mm1, MMWORD [MMBLOCK(5,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+-	pmullw	mm3, MMWORD [MMBLOCK(7,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
++	pmullw	mm1, XMMWORD [MMBLOCK(5,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
++	pmullw	mm3, XMMWORD [MMBLOCK(7,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+ 
+ 	movq	mm5,mm6
+ 	movq	mm7,mm4
+@@ -333,8 +333,8 @@
+ 	pmaddwd   mm3,[GOTOFF(ebx,PW_MF089_F060)]	; mm3=tmp3L
+ 	pmaddwd   mm4,[GOTOFF(ebx,PW_MF089_F060)]	; mm4=tmp3H
+ 
+-	paddd	mm2, MMWORD [wk(10)]	; mm2=tmp0L
+-	paddd	mm0, MMWORD [wk(11)]	; mm0=tmp0H
++	paddd	mm2, XMMWORD [wk(10)]	; mm2=tmp0L
++	paddd	mm0, XMMWORD [wk(11)]	; mm0=tmp0H
+ 	paddd	mm3,mm5			; mm3=tmp3L
+ 	paddd	mm4,mm7			; mm4=tmp3H
+ 
+@@ -354,8 +354,8 @@
+ 
+ 	paddd	mm2,mm5			; mm2=tmp1L
+ 	paddd	mm0,mm7			; mm0=tmp1H
+-	paddd	mm1, MMWORD [wk(10)]	; mm1=tmp2L
+-	paddd	mm6, MMWORD [wk(11)]	; mm6=tmp2H
++	paddd	mm1, XMMWORD [wk(10)]	; mm1=tmp2L
++	paddd	mm6, XMMWORD [wk(11)]	; mm6=tmp2H
+ 
+ 	movq	MMWORD [wk(10)], mm2	; wk(10)=tmp1L
+ 	movq	MMWORD [wk(11)], mm0	; wk(11)=tmp1H
+@@ -659,8 +659,8 @@
+ 	pmaddwd   mm3,[GOTOFF(ebx,PW_MF089_F060)]	; mm3=tmp3L
+ 	pmaddwd   mm4,[GOTOFF(ebx,PW_MF089_F060)]	; mm4=tmp3H
+ 
+-	paddd	mm2, MMWORD [wk(10)]	; mm2=tmp0L
+-	paddd	mm0, MMWORD [wk(11)]	; mm0=tmp0H
++	paddd	mm2, XMMWORD [wk(10)]	; mm2=tmp0L
++	paddd	mm0, XMMWORD [wk(11)]	; mm0=tmp0H
+ 	paddd	mm3,mm5			; mm3=tmp3L
+ 	paddd	mm4,mm7			; mm4=tmp3H
+ 
+@@ -680,8 +680,8 @@
+ 
+ 	paddd	mm2,mm5			; mm2=tmp1L
+ 	paddd	mm0,mm7			; mm0=tmp1H
+-	paddd	mm1, MMWORD [wk(10)]	; mm1=tmp2L
+-	paddd	mm6, MMWORD [wk(11)]	; mm6=tmp2H
++	paddd	mm1, XMMWORD [wk(10)]	; mm1=tmp2L
++	paddd	mm6, XMMWORD [wk(11)]	; mm6=tmp2H
+ 
+ 	movq	MMWORD [wk(10)], mm2	; wk(10)=tmp1L
+ 	movq	MMWORD [wk(11)], mm0	; wk(11)=tmp1H
+diff -urN jpeg-6bx.org/jimmxred.asm jpeg-6bx/jimmxred.asm
+--- jpeg-6bx.org/jimmxred.asm	2006-02-03 17:50:00.000000000 +0100
++++ jpeg-6bx/jimmxred.asm	2007-12-11 19:05:44.000000000 +0100
+@@ -160,10 +160,10 @@
+ 
+ 	movq	mm0, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)]
+ 	movq	mm1, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)]
+-	por	mm0, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)]
+-	por	mm1, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)]
+-	por	mm0, MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)]
+-	por	mm1, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)]
++	por	mm0, XMMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)]
++	por	mm1, XMMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)]
++	por	mm0, XMMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)]
++	por	mm1, XMMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)]
+ 	por	mm0,mm1
+ 	packsswb mm0,mm0
+ 	movd	eax,mm0
+@@ -173,7 +173,7 @@
+ 	; -- AC terms all zero
+ 
+ 	movq	mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)]
+-	pmullw	mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
++	pmullw	mm0, XMMWORD [MMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+ 
+ 	psllw	mm0,PASS1_BITS
+ 
+@@ -201,12 +201,12 @@
+ 
+ 	movq	mm0, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)]
+ 	movq	mm1, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)]
+-	pmullw	mm0, MMWORD [MMBLOCK(1,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+-	pmullw	mm1, MMWORD [MMBLOCK(3,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
++	pmullw	mm0, XMMWORD [MMBLOCK(1,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
++	pmullw	mm1, XMMWORD [MMBLOCK(3,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+ 	movq	mm2, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)]
+ 	movq	mm3, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)]
+-	pmullw	mm2, MMWORD [MMBLOCK(5,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+-	pmullw	mm3, MMWORD [MMBLOCK(7,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
++	pmullw	mm2, XMMWORD [MMBLOCK(5,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
++	pmullw	mm3, XMMWORD [MMBLOCK(7,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+ 
+ 	movq      mm4,mm0
+ 	movq      mm5,mm0
+@@ -243,9 +243,9 @@
+ 	movq	mm4, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)]
+ 	movq	mm5, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)]
+ 	movq	mm0, MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)]
+-	pmullw	mm4, MMWORD [MMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+-	pmullw	mm5, MMWORD [MMBLOCK(2,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+-	pmullw	mm0, MMWORD [MMBLOCK(6,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
++	pmullw	mm4, XMMWORD [MMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
++	pmullw	mm5, XMMWORD [MMBLOCK(2,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
++	pmullw	mm0, XMMWORD [MMBLOCK(6,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+ 
+ 	pxor      mm1,mm1
+ 	pxor      mm2,mm2
+@@ -549,12 +549,12 @@
+ 
+ 	movq	mm0, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)]
+ 	movq	mm1, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)]
+-	pmullw	mm0, MMWORD [MMBLOCK(1,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+-	pmullw	mm1, MMWORD [MMBLOCK(3,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
++	pmullw	mm0, XMMWORD [MMBLOCK(1,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
++	pmullw	mm1, XMMWORD [MMBLOCK(3,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+ 	movq	mm2, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)]
+ 	movq	mm3, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)]
+-	pmullw	mm2, MMWORD [MMBLOCK(5,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+-	pmullw	mm3, MMWORD [MMBLOCK(7,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
++	pmullw	mm2, XMMWORD [MMBLOCK(5,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
++	pmullw	mm3, XMMWORD [MMBLOCK(7,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+ 
+ 	; mm0=(10 11 ** 13), mm1=(30 31 ** 33)
+ 	; mm2=(50 51 ** 53), mm3=(70 71 ** 73)
+@@ -582,12 +582,12 @@
+ 
+ 	movq	mm6, MMWORD [MMBLOCK(1,1,esi,SIZEOF_JCOEF)]
+ 	movq	mm1, MMWORD [MMBLOCK(3,1,esi,SIZEOF_JCOEF)]
+-	pmullw	mm6, MMWORD [MMBLOCK(1,1,edx,SIZEOF_ISLOW_MULT_TYPE)]
+-	pmullw	mm1, MMWORD [MMBLOCK(3,1,edx,SIZEOF_ISLOW_MULT_TYPE)]
++	pmullw	mm6, XMMWORD [MMBLOCK(1,1,edx,SIZEOF_ISLOW_MULT_TYPE)]
++	pmullw	mm1, XMMWORD [MMBLOCK(3,1,edx,SIZEOF_ISLOW_MULT_TYPE)]
+ 	movq	mm3, MMWORD [MMBLOCK(5,1,esi,SIZEOF_JCOEF)]
+ 	movq	mm5, MMWORD [MMBLOCK(7,1,esi,SIZEOF_JCOEF)]
+-	pmullw	mm3, MMWORD [MMBLOCK(5,1,edx,SIZEOF_ISLOW_MULT_TYPE)]
+-	pmullw	mm5, MMWORD [MMBLOCK(7,1,edx,SIZEOF_ISLOW_MULT_TYPE)]
++	pmullw	mm3, XMMWORD [MMBLOCK(5,1,edx,SIZEOF_ISLOW_MULT_TYPE)]
++	pmullw	mm5, XMMWORD [MMBLOCK(7,1,edx,SIZEOF_ISLOW_MULT_TYPE)]
+ 
+ 	; mm6=(** 15 ** 17), mm1=(** 35 ** 37)
+ 	; mm3=(** 55 ** 57), mm5=(** 75 ** 77)
+@@ -608,8 +608,8 @@
+ 
+ 	movq	mm1, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)]
+ 	movq	mm5, MMWORD [MMBLOCK(0,1,esi,SIZEOF_JCOEF)]
+-	pmullw	mm1, MMWORD [MMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+-	pmullw	mm5, MMWORD [MMBLOCK(0,1,edx,SIZEOF_ISLOW_MULT_TYPE)]
++	pmullw	mm1, XMMWORD [MMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
++	pmullw	mm5, XMMWORD [MMBLOCK(0,1,edx,SIZEOF_ISLOW_MULT_TYPE)]
+ 
+ 	; mm1=(00 01 ** 03), mm5=(** 05 ** 07)
+ 
+diff -urN jpeg-6bx.org/jisseflt.asm jpeg-6bx/jisseflt.asm
+--- jpeg-6bx.org/jisseflt.asm	2006-02-03 17:50:00.000000000 +0100
++++ jpeg-6bx/jisseflt.asm	2007-12-11 19:05:45.000000000 +0100
+@@ -118,11 +118,11 @@
+ 
+ 	movq	mm0, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)]
+ 	movq	mm1, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)]
+-	por	mm0, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)]
+-	por	mm1, MMWORD [MMBLOCK(4,0,esi,SIZEOF_JCOEF)]
+-	por	mm0, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)]
+-	por	mm1, MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)]
+-	por	mm0, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)]
++	por	mm0, XMMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)]
++	por	mm1, XMMWORD [MMBLOCK(4,0,esi,SIZEOF_JCOEF)]
++	por	mm0, XMMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)]
++	por	mm1, XMMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)]
++	por	mm0, XMMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)]
+ 	por	mm1,mm0
+ 	packsswb mm1,mm1
+ 	movd	eax,mm1
================================================================


More information about the pld-cvs-commit mailing list