SOURCES: gcc-pr19161.patch (NEW) - automatic mmx/x87 fpu mode swit...

pluto pluto at pld-linux.org
Fri Jun 24 13:40:13 CEST 2005


Author: pluto                        Date: Fri Jun 24 11:40:13 2005 GMT
Module: SOURCES                       Tag: HEAD
---- Log message:
- automatic mmx/x87 fpu mode switching.

---- Files affected:
SOURCES:
   gcc-pr19161.patch (NONE -> 1.1)  (NEW)

---- Diffs:

================================================================
Index: SOURCES/gcc-pr19161.patch
diff -u /dev/null SOURCES/gcc-pr19161.patch:1.1
--- /dev/null	Fri Jun 24 13:40:13 2005
+++ SOURCES/gcc-pr19161.patch	Fri Jun 24 13:40:08 2005
@@ -0,0 +1,755 @@
+Date: Tue, 21 Jun 2005 12:43:31 +0200
+From: Uros Bizjak <uros dot bizjak at kss-loka dot si>
+Subject: [PATCH, i386] automatic MMX/x87 FPU mode switching (the real one)
+
+Hello!
+
+This patch implements much requested feature of automatic mode switching between
+MMX and x87 register sets. This patch is based on LCM algorithm to insert
+(f)emms instruction where appropriate. Thanks also rth for his valuable
+help and Roger for his encouragement!
+
+This patch now handles ASM patterns, as discussed with rth. The only limitation
+is, that mixing x87 and MMX registers is not allowed in input and output
+constraints of ASM pattern. Function calls are handled in the same way as
+discussed before.
+
+So, the testcase:
+
+#include <mmintrin.h>
+
+__v8qi
+aaa (__v8qi x, __v8qi y)
+{
+  __v8qi mm1;
+
+  mm1 = _mm_add_pi8 (x, y);
+
+  return mm1;
+}
+
+int main() {
+  __v8qi mm0 = { 1,2,3,4,5,6,7,8 };
+  __v8qi mm1 = { 11,12,13,14,15,16,17,18 };
+
+  double a = 0.0;
+
+  union ttt {
+    __v8qi mm;
+    char x[8];
+  } temp;
+
+  temp.mm = mm0;
+  temp.x[1] = cos(a);
+
+  temp.mm = aaa (temp.mm, mm1);
+  printf ("%i %f\n", temp.x[0], sqrt(temp.x[1]));
+
+  return 0;
+}
+
+produces (gcc -O2 -mmmx -ffast-math -fomit-frame-pointer):
+
+aaa:
+        paddb %mm1, %mm0
+        ret
+
+main:
+        pushl %ebp
+        movl %esp, %ebp
+        subl $24, %esp
+        andl $-16, %esp
+        subl $16, %esp
+        movl $67305985, %edx
+        movl $134678021, %ecx
+        movb $1, %dh
+        movq .LC1, %mm1
+        movl %edx, -8(%ebp)
+        movl %ecx, -4(%ebp)
+        movq -8(%ebp), %mm2
+        movq %mm2, %mm0
+        call aaa
+        movq %mm0, -8(%ebp)
+        movl -8(%ebp), %edx
+        movsbl %dh, %eax
+        cbtw
+        emms                       <<< inserted by LCM here
+        pushw %ax
+        movsbl %dl,%eax
+        filds (%esp)
+        addl $2, %esp
+        movl %eax, 4(%esp)
+        movl $.LC2, (%esp)
+        fsqrt
+        fstpl 8(%esp)
+        call printf
+        xorl %eax, %eax
+        leave
+        ret
+
+And binary works as expected:
+
+./a.out
+12 3.605551
+ 
+
+A IMHO nice feature of this patch is, that manually inserted emms (via
+_mm_empty() intrinsic) is also handled with LCM approach. If there is no need
+for emms in this place, it is not emitted. And this patch also handles (stupid)
+code like:
+
+#include <mmintrin.h>
+
+__v8qi
+aaa (__v8qi x, __v8qi y)
+{
+  __v8qi mm1;
+
+  mm1 = _mm_add_pi8 (x, y);
+  _mm_empty ();
+  return mm1;
+}
+
+to produce correct asm code:
+
+aaa:
+        subl $12, %esp
+        paddb %mm1, %mm0
+        movq %mm0, (%esp)
+        emms
+        movq (%esp), %mm0
+        addl $12, %esp
+        ret
+
+The patch was bootstrapped on i686-pc-linux-gnu, regtested for c, c++. It
+introduces one new failure into the testsuite (__builtin_apply problems,
+gcc.dg/20020218-1.c), otherwise produced correct code for all testcases I have
+thrown in. I think this patch is ready for wider exposure in current mainline.
+
+For __builtin_apply ()problems, I suggest that called function (for i386) should
+NOT use MMX registers, and that it is always called in FPU_MODE_387. Otherwise,
+there is no way to determine MODE_AFTER of such function.
+
+2005-06-21  Uros Bizjak  <uros at kss-loka.si>
+
+	* mode-switching.c (optimize_mode_switching): Change MODE_AFTER
+	to include entity.
+
+	* reg-stack.c (subst_stack_regs): Handle MMX/x87 FPU mode
+	switching instructions.
+
+	* config/sh/sh.h: MODE_AFTER: Change define to include entity.
+
+	* config/i386/i386-modes.def: ALLREGS: New RANDOM_MODE.
+
+	* config/i386/i386-protos.h (emit_i387_cw_initialization):
+	Remove prototype.
+	(ix86_mode_after): New prototype.
+	(ix86_mode_entry): New prototype.
+	(ix86_mode_exit): New prototype.
+	(ix86_emit_mode_set): New prototype.
+
+	* config/i386/i386.h (enum ix86_fpu_mode): New enum.
+	(FPU_MODE_DEFAULT): New define.
+	(enum ix86_entity): Add new I387_FPU_MODE entity.
+	(NUM_MODES_FOR_MODE_SWITCHING): Add FPU_MODE_ANY to
+	enable switching for I387_FPU_MODE entity.
+	(MODE_AFTER): New define.
+	(MODE_ENTRY): New define.
+	(MODE_EXIT): New define.
+	(EMIT_MODE_SET): Change definition to use ix86_emit_mode_set.
+	(HARD_REGNO_NREGS): Return 8 for ALLREGS mode.
+
+	* config/i386/i386.c (ix86_mode_needed): Handle
+	entity I387_FPU_MODE.
+	(ix86_mode_after): New function.
+	(ix86_mode_entry): New function.
+	(ix86_mode_exit): New function.
+	(ix86_emit_mode_set): Renamed from emit_i387_cw_initialization.
+	Handle entity I387_FPU_MODE.
+	(ix86_init_machine_status): Set optimize_mode_switching flag
+	for I387_FPU_MODE entity if TARGET_MMX.
+	(ix86_expand_builtin) [IX86_BUILTIN_FEMMS]: Use "mmx_emms"
+	instruction pattern.
+
+	* config/i386/i386.md (UNSPECV_FEMMS): Remove constant.
+	(UNSPECV_EFPU, UNSPECV_NOP, FIRSTFP_REG, FIRSTMMX_REG): New
+	constants
+
+	* config/i386/mmx.md ("mmx_emms"): Change instruction definition
+	to use UNSPECV_NOP. Set "unit" attribute to i387.
+	("efpu", "emms"): New instruction patterns.
+
+Uros.
+
+--- a/gcc/config/i386/i386.c	2005-06-10 23:45:12.000000000 +0200
++++ b/gcc/config/i386/i386.c	2005-06-21 10:55:16.000000000 +0200
+@@ -7362,13 +7362,156 @@ output_387_binary_op (rtx insn, rtx *ope
+ int
+ ix86_mode_needed (int entity, rtx insn)
+ {
+-  enum attr_i387_cw mode;
++  int unit, mode;
++
++  if (entity == I387_FPU_MODE)
++    {
++      /* If a function call uses MMX registers, select MMX FPU mode and
++	 if function call uses x87 registers, select x87 FPU mode.  If
++	 no MMX or x87 registers are used, switch to default mode.  */
++      if (CALL_P (insn))
++	{
++	  rtx link;
++	  int mmx = 0, x87 = 0;
++
++	  for (link = CALL_INSN_FUNCTION_USAGE (insn);
++	       link;
++	       link = XEXP (link, 1))
++	    {
++	      if (GET_CODE (XEXP (link, 0)) == USE)
++		{
++		  rtx reg = XEXP (XEXP (link, 0), 0);
++
++		  if (reg)
++		    {
++		      if (MMX_REG_P (reg))
++			mmx = 1;
++
++		      if (FP_REG_P (reg))
++			x87 = 1;
++		    }
++		}
++	    }
++
++	  /* Mixing of x87 and MMX registers is not allowed
++	     in function call.  */
++	  gcc_assert (!mmx || !x87);
++
++	  if (mmx)
++	    return FPU_MODE_MMX;
++
++	  if (x87)
++	    return FPU_MODE_X87;      
++
++	  return FPU_MODE_DEFAULT;
++	}
++
++      /* Parse ASM operands to check input and output constraints.  If
++	 an ASM uses MMX registers, select MMX mode and if it uses x87
++	 registers, select x87 mode.  Mixing of MMX and x87 constraints
++	 is not allowed.  If no MMX or x87 input and output registers
++	 are used, switch to default mode.  */
++      if (NONJUMP_INSN_P (insn))
++	{
++	  rtx pat = PATTERN (insn);
++	  int noperands = asm_noperands (pat);
++
++	  if (noperands >= 0)
++	    {
++	      const char **constraints;
++	      int mmx = 0, x87 = 0;
++	      int i;
++
++	      constraints = alloca (noperands * sizeof (char *));
++	      decode_asm_operands (pat, NULL, NULL, constraints, NULL);
++
++	      for (i = 0; i < noperands; i++)
++		{
++		  const char *c = constraints[i];
++		  enum reg_class class;
++
++		  if (c[0] == '%')
++		    c++;
++		  if (ISDIGIT ((unsigned char) c[0]) && c[1] == '\0')
++		    c = constraints[c[0] - '0'];
++
++		  while (*c)
++		    {
++		      char cc = *c;
++		      int len;
++		      switch (cc)
++			{
++			case ',':
++			  cc++;
++			  continue;
++			case '=':
++			case '+':
++			case '*':
++			case '%':
++			case '!':
++			case '#':
++			case '&':
++			case '?':
++			  break;
++
++			default:
++			  class = REG_CLASS_FROM_LETTER (cc);
++
++			  if (MMX_CLASS_P (class))
++			    mmx = 1;
++
++			  if (FLOAT_CLASS_P (class))
++			    x87 = 1;
++			}
++
++		      len = CONSTRAINT_LEN (cc, c);
++		      do
++			c++;
++		      while (--len && *c);
++		    }
++		}
++
++	      /* Mixing x87 and MMX registers in ASM is not allowed.  */
++	      if (mmx && x87)
++		{
++		  error_for_asm (insn, "mixing of x87 and MMX registers "
++				 "is not allowed in %<asm%>");
++		  gcc_unreachable ();
++		}
++
++	      if (mmx)
++		return FPU_MODE_MMX;
++
++	      if (x87)
++		return FPU_MODE_X87;      
++
++	      return FPU_MODE_DEFAULT;
++	    }
++	}
++
++      if (recog_memoized (insn) < 0)
++	return FPU_MODE_ANY;
++
++      unit = get_attr_unit (insn);
++
++      switch (unit)
++	{
++	case UNIT_MMX:
++	  return FPU_MODE_MMX;
++
++	case UNIT_I387:
++	  return FPU_MODE_X87;
++	
++	default:
++	  return FPU_MODE_ANY;
++
++	}
++    }
+ 
+   /* The mode UNINITIALIZED is used to store control word after a
+      function call or ASM pattern.  The mode ANY specify that function
+      has no requirements on the control word and make no changes in the
+      bits we are interested in.  */
+-
+   if (CALL_P (insn)
+       || (NONJUMP_INSN_P (insn)
+ 	  && (asm_noperands (PATTERN (insn)) >= 0
+@@ -7409,21 +7552,127 @@ ix86_mode_needed (int entity, rtx insn)
+   return I387_CW_ANY;
+ }
+ 
+-/* Output code to initialize control word copies used by trunc?f?i and
+-   rounding patterns.  CURRENT_MODE is set to current control word,
+-   while NEW_MODE is set to new control word.  */
++
++/* Switch FPU mode to appropriate mode after function call in
++   optimize_mode_switchig pass.  */
++
++int
++ix86_mode_after (int entity, int mode, rtx insn)
++{
++  if (entity == I387_FPU_MODE)
++    {
++      /* Switch to MMX mode after funciton call if returned value
++	 is returned in MMX register and similar for x87 reg.
++	 If no MMX or x87 reg is returned, switch to default mode.  */
++      if (CALL_P (insn))
++	{
++	  rtx reg = SET_DEST (PATTERN (insn));
++
++	  if (reg)
++	    {
++	      if (MMX_REG_P (reg))
++		return FPU_MODE_MMX;
++
++	      if (FP_REG_P (reg))
++		return FPU_MODE_X87;
++	    }
++
++	  return FPU_MODE_DEFAULT;
++	}
++    }
++
++  return mode;
++}
++
++/* Switch FPU mode of function entry to appropriate mode in
++   optimize_mode_switchig pass.  */
++
++int
++ix86_mode_entry (int entity)
++{
++  if (entity == I387_FPU_MODE)
++    {
++      /* Switch entry mode to default mode for vaarg functions.  */
++      if (current_function_args_info.maybe_vaarg)
++	return FPU_MODE_DEFAULT;
++
++      if (current_function_args_info.mmx_nregs != MMX_REGPARM_MAX)
++	return FPU_MODE_MMX;
++
++      /* ??? Handle x87 registers for fpregparm.  */
++
++      return FPU_MODE_DEFAULT;
++    }
++
++  return I387_CW_ANY;
++}
++
++/* Switch FPU mode of function exit to appropriate mode in
++   optimize_mode_switchig pass.  */
++
++int
++ix86_mode_exit (int entity) 
++{
++  if (entity == I387_FPU_MODE)
++    {
++      rtx reg = current_function_return_rtx;
++
++      if (reg)
++	{
++	  if (MMX_REG_P (reg))
++	    return FPU_MODE_MMX;
++
++	  if (FP_REG_P (reg))
++	    return FPU_MODE_X87;
++	}
++
++      return FPU_MODE_DEFAULT;
++    }
++
++  return I387_CW_ANY;
++}
++
++/* Emit mode switching instructions in optimize_mode_switching pass.  */
+ 
+ void
+-emit_i387_cw_initialization (int mode)
++ix86_emit_mode_set (int entity, int mode)
+ {
+-  rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
+-  rtx new_mode;
++  rtx stored_mode, new_mode;
++  rtx reg;
+ 
+   int slot;
+ 
+-  rtx reg = gen_reg_rtx (HImode);
++  if (entity == I387_FPU_MODE)
++    {
++      switch (mode)
++	{
++	case FPU_MODE_ANY:
++	  return;
++
++	case FPU_MODE_X87:
++	  emit_insn (gen_emms ());
++	  return;
+ 
++	case FPU_MODE_MMX:
++	  emit_insn (gen_efpu ());
++	  return;
++
++	default:
++	  gcc_unreachable ();
++	}
++    }
++
++  /* Output code to initialize control word copies used by trunc?f?i
++     and rounding patterns.  STORED_MODE is set to current control
++     word, while NEW_MODE is set to new control word.  */
++
++  if ((mode == I387_CW_UNINITIALIZED) || (mode == I387_CW_ANY))
++    return;
++ 
++  stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
+   emit_insn (gen_x86_fnstcw_1 (stored_mode));
++
++  reg = gen_reg_rtx (HImode);
+   emit_move_insn (reg, stored_mode);
+ 
+   if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL || optimize_size)
+@@ -12178,6 +12427,7 @@ ix86_init_machine_status (void)
+ 
+   f = ggc_alloc_cleared (sizeof (struct machine_function));
+   f->use_fast_prologue_epilogue_nregs = -1;
++  f->optimize_mode_switching[I387_FPU_MODE] = TARGET_MMX;
+ 
+   return f;
+ }
+@@ -14934,7 +15184,7 @@ ix86_expand_builtin (tree exp, rtx targe
+       return target;
+ 
+     case IX86_BUILTIN_FEMMS:
+-      emit_insn (gen_mmx_femms ());
++      emit_insn (gen_mmx_emms ());
+       return NULL_RTX;
+ 
+     case IX86_BUILTIN_PAVGUSB:
+--- a/gcc/config/i386/i386.h	2005-06-08 07:05:22.000000000 +0200
++++ b/gcc/config/i386/i386.h	2005-06-21 08:59:21.000000000 +0200
+@@ -819,7 +819,9 @@ do {									\
+ 
+ #define HARD_REGNO_NREGS(REGNO, MODE)   \
+   (FP_REGNO_P (REGNO) || SSE_REGNO_P (REGNO) || MMX_REGNO_P (REGNO)	\
+-   ? (COMPLEX_MODE_P (MODE) ? 2 : 1)					\
++   ? ((MODE) == ALLREGSmode						\
++      ? 8								\
++      : (COMPLEX_MODE_P (MODE) ? 2 : 1))				\
+    : ((MODE) == XFmode							\
+       ? (TARGET_64BIT ? 2 : 3)						\
+       : (MODE) == XCmode						\
+@@ -2174,6 +2176,13 @@ extern rtx ix86_compare_op0;	/* operand 
+ extern rtx ix86_compare_op1;	/* operand 1 for comparisons */
+ extern rtx ix86_compare_emitted;
+ 
++
++/* x87 FPU modes for x87/MMX mode switching.  */
++enum ix86_fpu_mode { FPU_MODE_X87, FPU_MODE_MMX, FPU_MODE_ANY };
++
++/* Default FPU mode for x87/MMX mode switching.  */
++#define FPU_MODE_DEFAULT (TARGET_80387 ? FPU_MODE_X87 : FPU_MODE_MMX)
++
+ /* To properly truncate FP values into integers, we need to set i387 control
+    word.  We can't emit proper mode switching code before reload, as spills
+    generated by reload may truncate values incorrectly, but we still can avoid
+@@ -2195,6 +2204,7 @@ enum ix86_entity
+   I387_FLOOR,
+   I387_CEIL,
+   I387_MASK_PM,
++  I387_FPU_MODE,
+   MAX_386_ENTITIES
+ };
+ 
+@@ -2224,7 +2234,7 @@ enum ix86_stack_slot 
+    refer to the mode-switched entity in question.  */
+ 
+ #define NUM_MODES_FOR_MODE_SWITCHING \
+-   { I387_CW_ANY, I387_CW_ANY, I387_CW_ANY, I387_CW_ANY }
++   { I387_CW_ANY, I387_CW_ANY, I387_CW_ANY, I387_CW_ANY, FPU_MODE_ANY }
+ 
+ /* ENTITY is an integer specifying a mode-switched entity.  If
+    `OPTIMIZE_MODE_SWITCHING' is defined, you must define this macro to
+@@ -2234,6 +2244,13 @@ enum ix86_stack_slot 
+ 
+ #define MODE_NEEDED(ENTITY, I) ix86_mode_needed ((ENTITY), (I))
+ 
++#define MODE_AFTER(ENTITY, MODE, I) \
++   ix86_mode_after ((ENTITY), (MODE), (I))
++
++#define MODE_ENTRY(ENTITY) ix86_mode_entry (ENTITY)
++
++#define MODE_EXIT(ENTITY) ix86_mode_exit (ENTITY)
++
+ /* This macro specifies the order in which modes for ENTITY are
+    processed.  0 is the highest priority.  */
+ 
+@@ -2243,10 +2260,8 @@ enum ix86_stack_slot 
+    is the set of hard registers live at the point where the insn(s)
+    are to be inserted.  */
+ 
+-#define EMIT_MODE_SET(ENTITY, MODE, HARD_REGS_LIVE) 			\
+-  ((MODE) != I387_CW_ANY && (MODE) != I387_CW_UNINITIALIZED		\
+-   ? emit_i387_cw_initialization (MODE), 0				\
+-   : 0)
++#define EMIT_MODE_SET(ENTITY, MODE, HARD_REGS_LIVE) \
++   ix86_emit_mode_set ((ENTITY), (MODE))
+ 
+ 
+ /* Avoid renaming of stack registers, as doing so in combination with
+--- a/gcc/config/i386/i386.md	2005-06-08 05:22:15.000000000 +0200
++++ b/gcc/config/i386/i386.md	2005-06-21 10:45:14.000000000 +0200
+@@ -143,10 +143,10 @@
+ (define_constants
+   [(UNSPECV_BLOCKAGE		0)
+    (UNSPECV_STACK_PROBE		1)
+-   (UNSPECV_EMMS		2)
++   (UNSPECV_EFPU		2)
+    (UNSPECV_LDMXCSR		3)
+    (UNSPECV_STMXCSR		4)
+-   (UNSPECV_FEMMS		5)
++   (UNSPECV_EMMS		5)
+    (UNSPECV_CLFLUSH		6)
+    (UNSPECV_ALIGN		7)
+    (UNSPECV_MONITOR		8)
+@@ -155,15 +155,18 @@
+    (UNSPECV_CMPXCHG_2		11)
+    (UNSPECV_XCHG		12)
+    (UNSPECV_LOCK		13)
++   (UNSPECV_NOP			14)
+   ])
+ 
+ ;; Registers by name.
<<Diff was trimmed, longer than 597 lines>>



More information about the pld-cvs-commit mailing list