SOURCES: gcc-pr19161.patch (NEW) - automatic mmx/x87 fpu mode swit...
pluto
pluto at pld-linux.org
Fri Jun 24 13:40:13 CEST 2005
Author: pluto Date: Fri Jun 24 11:40:13 2005 GMT
Module: SOURCES Tag: HEAD
---- Log message:
- automatic mmx/x87 fpu mode switching.
---- Files affected:
SOURCES:
gcc-pr19161.patch (NONE -> 1.1) (NEW)
---- Diffs:
================================================================
Index: SOURCES/gcc-pr19161.patch
diff -u /dev/null SOURCES/gcc-pr19161.patch:1.1
--- /dev/null Fri Jun 24 13:40:13 2005
+++ SOURCES/gcc-pr19161.patch Fri Jun 24 13:40:08 2005
@@ -0,0 +1,755 @@
+Date: Tue, 21 Jun 2005 12:43:31 +0200
+From: Uros Bizjak <uros dot bizjak at kss-loka dot si>
+Subject: [PATCH, i386] automatic MMX/x87 FPU mode switching (the real one)
+
+Hello!
+
+This patch implements much requested feature of automatic mode switching between
+MMX and x87 register sets. This patch is based on LCM algorithm to insert
+(f)emms instruction where appropriate. Thanks also rth for his valuable
+help and Roger for his encouragement!
+
+This patch now handles ASM patterns, as discussed with rth. The only limitation
+is, that mixing x87 and MMX registers is not allowed in input and output
+constraints of ASM pattern. Function calls are handled in the same way as
+discussed before.
+
+So, the testcase:
+
+#include <mmintrin.h>
+
+__v8qi
+aaa (__v8qi x, __v8qi y)
+{
+ __v8qi mm1;
+
+ mm1 = _mm_add_pi8 (x, y);
+
+ return mm1;
+}
+
+int main() {
+ __v8qi mm0 = { 1,2,3,4,5,6,7,8 };
+ __v8qi mm1 = { 11,12,13,14,15,16,17,18 };
+
+ double a = 0.0;
+
+ union ttt {
+ __v8qi mm;
+ char x[8];
+ } temp;
+
+ temp.mm = mm0;
+ temp.x[1] = cos(a);
+
+ temp.mm = aaa (temp.mm, mm1);
+ printf ("%i %f\n", temp.x[0], sqrt(temp.x[1]));
+
+ return 0;
+}
+
+produces (gcc -O2 -mmmx -ffast-math -fomit-frame-pointer):
+
+aaa:
+ paddb %mm1, %mm0
+ ret
+
+main:
+ pushl %ebp
+ movl %esp, %ebp
+ subl $24, %esp
+ andl $-16, %esp
+ subl $16, %esp
+ movl $67305985, %edx
+ movl $134678021, %ecx
+ movb $1, %dh
+ movq .LC1, %mm1
+ movl %edx, -8(%ebp)
+ movl %ecx, -4(%ebp)
+ movq -8(%ebp), %mm2
+ movq %mm2, %mm0
+ call aaa
+ movq %mm0, -8(%ebp)
+ movl -8(%ebp), %edx
+ movsbl %dh, %eax
+ cbtw
+ emms <<< inserted by LCM here
+ pushw %ax
+ movsbl %dl,%eax
+ filds (%esp)
+ addl $2, %esp
+ movl %eax, 4(%esp)
+ movl $.LC2, (%esp)
+ fsqrt
+ fstpl 8(%esp)
+ call printf
+ xorl %eax, %eax
+ leave
+ ret
+
+And binary works as expected:
+
+./a.out
+12 3.605551
+
+
+A IMHO nice feature of this patch is, that manually inserted emms (via
+_mm_empty() intrinsic) is also handled with LCM approach. If there is no need
+for emms in this place, it is not emitted. And this patch also handles (stupid)
+code like:
+
+#include <mmintrin.h>
+
+__v8qi
+aaa (__v8qi x, __v8qi y)
+{
+ __v8qi mm1;
+
+ mm1 = _mm_add_pi8 (x, y);
+ _mm_empty ();
+ return mm1;
+}
+
+to produce correct asm code:
+
+aaa:
+ subl $12, %esp
+ paddb %mm1, %mm0
+ movq %mm0, (%esp)
+ emms
+ movq (%esp), %mm0
+ addl $12, %esp
+ ret
+
+The patch was bootstrapped on i686-pc-linux-gnu, regtested for c, c++. It
+introduces one new failure into the testsuite (__builtin_apply problems,
+gcc.dg/20020218-1.c), otherwise produced correct code for all testcases I have
+thrown in. I think this patch is ready for wider exposure in current mainline.
+
+For __builtin_apply ()problems, I suggest that called function (for i386) should
+NOT use MMX registers, and that it is always called in FPU_MODE_387. Otherwise,
+there is no way to determine MODE_AFTER of such function.
+
+2005-06-21 Uros Bizjak <uros at kss-loka.si>
+
+ * mode-switching.c (optimize_mode_switching): Change MODE_AFTER
+ to include entity.
+
+ * reg-stack.c (subst_stack_regs): Handle MMX/x87 FPU mode
+ switching instructions.
+
+ * config/sh/sh.h: MODE_AFTER: Change define to include entity.
+
+ * config/i386/i386-modes.def: ALLREGS: New RANDOM_MODE.
+
+ * config/i386/i386-protos.h (emit_i387_cw_initialization):
+ Remove prototype.
+ (ix86_mode_after): New prototype.
+ (ix86_mode_entry): New prototype.
+ (ix86_mode_exit): New prototype.
+ (ix86_emit_mode_set): New prototype.
+
+ * config/i386/i386.h (enum ix86_fpu_mode): New enum.
+ (FPU_MODE_DEFAULT): New define.
+ (enum ix86_entity): Add new I387_FPU_MODE entity.
+ (NUM_MODES_FOR_MODE_SWITCHING): Add FPU_MODE_ANY to
+ enable switching for I387_FPU_MODE entity.
+ (MODE_AFTER): New define.
+ (MODE_ENTRY): New define.
+ (MODE_EXIT): New define.
+ (EMIT_MODE_SET): Change definition to use ix86_emit_mode_set.
+ (HARD_REGNO_NREGS): Return 8 for ALLREGS mode.
+
+ * config/i386/i386.c (ix86_mode_needed): Handle
+ entity I387_FPU_MODE.
+ (ix86_mode_after): New function.
+ (ix86_mode_entry): New function.
+ (ix86_mode_exit): New function.
+ (ix86_emit_mode_set): Renamed from emit_i387_cw_initialization.
+ Handle entity I387_FPU_MODE.
+ (ix86_init_machine_status): Set optimize_mode_switching flag
+ for I387_FPU_MODE entity if TARGET_MMX.
+ (ix86_expand_builtin) [IX86_BUILTIN_FEMMS]: Use "mmx_emms"
+ instruction pattern.
+
+ * config/i386/i386.md (UNSPECV_FEMMS): Remove constant.
+ (UNSPECV_EFPU, UNSPECV_NOP, FIRSTFP_REG, FIRSTMMX_REG): New
+ constants
+
+ * config/i386/mmx.md ("mmx_emms"): Change instruction definition
+ to use UNSPECV_NOP. Set "unit" attribute to i387.
+ ("efpu", "emms"): New instruction patterns.
+
+Uros.
+
+--- a/gcc/config/i386/i386.c 2005-06-10 23:45:12.000000000 +0200
++++ b/gcc/config/i386/i386.c 2005-06-21 10:55:16.000000000 +0200
+@@ -7362,13 +7362,156 @@ output_387_binary_op (rtx insn, rtx *ope
+ int
+ ix86_mode_needed (int entity, rtx insn)
+ {
+- enum attr_i387_cw mode;
++ int unit, mode;
++
++ if (entity == I387_FPU_MODE)
++ {
++ /* If a function call uses MMX registers, select MMX FPU mode and
++ if function call uses x87 registers, select x87 FPU mode. If
++ no MMX or x87 registers are used, switch to default mode. */
++ if (CALL_P (insn))
++ {
++ rtx link;
++ int mmx = 0, x87 = 0;
++
++ for (link = CALL_INSN_FUNCTION_USAGE (insn);
++ link;
++ link = XEXP (link, 1))
++ {
++ if (GET_CODE (XEXP (link, 0)) == USE)
++ {
++ rtx reg = XEXP (XEXP (link, 0), 0);
++
++ if (reg)
++ {
++ if (MMX_REG_P (reg))
++ mmx = 1;
++
++ if (FP_REG_P (reg))
++ x87 = 1;
++ }
++ }
++ }
++
++ /* Mixing of x87 and MMX registers is not allowed
++ in function call. */
++ gcc_assert (!mmx || !x87);
++
++ if (mmx)
++ return FPU_MODE_MMX;
++
++ if (x87)
++ return FPU_MODE_X87;
++
++ return FPU_MODE_DEFAULT;
++ }
++
++ /* Parse ASM operands to check input and output constraints. If
++ an ASM uses MMX registers, select MMX mode and if it uses x87
++ registers, select x87 mode. Mixing of MMX and x87 constraints
++ is not allowed. If no MMX or x87 input and output registers
++ are used, switch to default mode. */
++ if (NONJUMP_INSN_P (insn))
++ {
++ rtx pat = PATTERN (insn);
++ int noperands = asm_noperands (pat);
++
++ if (noperands >= 0)
++ {
++ const char **constraints;
++ int mmx = 0, x87 = 0;
++ int i;
++
++ constraints = alloca (noperands * sizeof (char *));
++ decode_asm_operands (pat, NULL, NULL, constraints, NULL);
++
++ for (i = 0; i < noperands; i++)
++ {
++ const char *c = constraints[i];
++ enum reg_class class;
++
++ if (c[0] == '%')
++ c++;
++ if (ISDIGIT ((unsigned char) c[0]) && c[1] == '\0')
++ c = constraints[c[0] - '0'];
++
++ while (*c)
++ {
++ char cc = *c;
++ int len;
++ switch (cc)
++ {
++ case ',':
++ cc++;
++ continue;
++ case '=':
++ case '+':
++ case '*':
++ case '%':
++ case '!':
++ case '#':
++ case '&':
++ case '?':
++ break;
++
++ default:
++ class = REG_CLASS_FROM_LETTER (cc);
++
++ if (MMX_CLASS_P (class))
++ mmx = 1;
++
++ if (FLOAT_CLASS_P (class))
++ x87 = 1;
++ }
++
++ len = CONSTRAINT_LEN (cc, c);
++ do
++ c++;
++ while (--len && *c);
++ }
++ }
++
++ /* Mixing x87 and MMX registers in ASM is not allowed. */
++ if (mmx && x87)
++ {
++ error_for_asm (insn, "mixing of x87 and MMX registers "
++ "is not allowed in %<asm%>");
++ gcc_unreachable ();
++ }
++
++ if (mmx)
++ return FPU_MODE_MMX;
++
++ if (x87)
++ return FPU_MODE_X87;
++
++ return FPU_MODE_DEFAULT;
++ }
++ }
++
++ if (recog_memoized (insn) < 0)
++ return FPU_MODE_ANY;
++
++ unit = get_attr_unit (insn);
++
++ switch (unit)
++ {
++ case UNIT_MMX:
++ return FPU_MODE_MMX;
++
++ case UNIT_I387:
++ return FPU_MODE_X87;
++
++ default:
++ return FPU_MODE_ANY;
++
++ }
++ }
+
+ /* The mode UNINITIALIZED is used to store control word after a
+ function call or ASM pattern. The mode ANY specify that function
+ has no requirements on the control word and make no changes in the
+ bits we are interested in. */
+-
+ if (CALL_P (insn)
+ || (NONJUMP_INSN_P (insn)
+ && (asm_noperands (PATTERN (insn)) >= 0
+@@ -7409,21 +7552,127 @@ ix86_mode_needed (int entity, rtx insn)
+ return I387_CW_ANY;
+ }
+
+-/* Output code to initialize control word copies used by trunc?f?i and
+- rounding patterns. CURRENT_MODE is set to current control word,
+- while NEW_MODE is set to new control word. */
++
++/* Switch FPU mode to appropriate mode after function call in
++ optimize_mode_switchig pass. */
++
++int
++ix86_mode_after (int entity, int mode, rtx insn)
++{
++ if (entity == I387_FPU_MODE)
++ {
++ /* Switch to MMX mode after funciton call if returned value
++ is returned in MMX register and similar for x87 reg.
++ If no MMX or x87 reg is returned, switch to default mode. */
++ if (CALL_P (insn))
++ {
++ rtx reg = SET_DEST (PATTERN (insn));
++
++ if (reg)
++ {
++ if (MMX_REG_P (reg))
++ return FPU_MODE_MMX;
++
++ if (FP_REG_P (reg))
++ return FPU_MODE_X87;
++ }
++
++ return FPU_MODE_DEFAULT;
++ }
++ }
++
++ return mode;
++}
++
++/* Switch FPU mode of function entry to appropriate mode in
++ optimize_mode_switchig pass. */
++
++int
++ix86_mode_entry (int entity)
++{
++ if (entity == I387_FPU_MODE)
++ {
++ /* Switch entry mode to default mode for vaarg functions. */
++ if (current_function_args_info.maybe_vaarg)
++ return FPU_MODE_DEFAULT;
++
++ if (current_function_args_info.mmx_nregs != MMX_REGPARM_MAX)
++ return FPU_MODE_MMX;
++
++ /* ??? Handle x87 registers for fpregparm. */
++
++ return FPU_MODE_DEFAULT;
++ }
++
++ return I387_CW_ANY;
++}
++
++/* Switch FPU mode of function exit to appropriate mode in
++ optimize_mode_switchig pass. */
++
++int
++ix86_mode_exit (int entity)
++{
++ if (entity == I387_FPU_MODE)
++ {
++ rtx reg = current_function_return_rtx;
++
++ if (reg)
++ {
++ if (MMX_REG_P (reg))
++ return FPU_MODE_MMX;
++
++ if (FP_REG_P (reg))
++ return FPU_MODE_X87;
++ }
++
++ return FPU_MODE_DEFAULT;
++ }
++
++ return I387_CW_ANY;
++}
++
++/* Emit mode switching instructions in optimize_mode_switching pass. */
+
+ void
+-emit_i387_cw_initialization (int mode)
++ix86_emit_mode_set (int entity, int mode)
+ {
+- rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
+- rtx new_mode;
++ rtx stored_mode, new_mode;
++ rtx reg;
+
+ int slot;
+
+- rtx reg = gen_reg_rtx (HImode);
++ if (entity == I387_FPU_MODE)
++ {
++ switch (mode)
++ {
++ case FPU_MODE_ANY:
++ return;
++
++ case FPU_MODE_X87:
++ emit_insn (gen_emms ());
++ return;
+
++ case FPU_MODE_MMX:
++ emit_insn (gen_efpu ());
++ return;
++
++ default:
++ gcc_unreachable ();
++ }
++ }
++
++ /* Output code to initialize control word copies used by trunc?f?i
++ and rounding patterns. STORED_MODE is set to current control
++ word, while NEW_MODE is set to new control word. */
++
++ if ((mode == I387_CW_UNINITIALIZED) || (mode == I387_CW_ANY))
++ return;
++
++ stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
+ emit_insn (gen_x86_fnstcw_1 (stored_mode));
++
++ reg = gen_reg_rtx (HImode);
+ emit_move_insn (reg, stored_mode);
+
+ if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL || optimize_size)
+@@ -12178,6 +12427,7 @@ ix86_init_machine_status (void)
+
+ f = ggc_alloc_cleared (sizeof (struct machine_function));
+ f->use_fast_prologue_epilogue_nregs = -1;
++ f->optimize_mode_switching[I387_FPU_MODE] = TARGET_MMX;
+
+ return f;
+ }
+@@ -14934,7 +15184,7 @@ ix86_expand_builtin (tree exp, rtx targe
+ return target;
+
+ case IX86_BUILTIN_FEMMS:
+- emit_insn (gen_mmx_femms ());
++ emit_insn (gen_mmx_emms ());
+ return NULL_RTX;
+
+ case IX86_BUILTIN_PAVGUSB:
+--- a/gcc/config/i386/i386.h 2005-06-08 07:05:22.000000000 +0200
++++ b/gcc/config/i386/i386.h 2005-06-21 08:59:21.000000000 +0200
+@@ -819,7 +819,9 @@ do { \
+
+ #define HARD_REGNO_NREGS(REGNO, MODE) \
+ (FP_REGNO_P (REGNO) || SSE_REGNO_P (REGNO) || MMX_REGNO_P (REGNO) \
+- ? (COMPLEX_MODE_P (MODE) ? 2 : 1) \
++ ? ((MODE) == ALLREGSmode \
++ ? 8 \
++ : (COMPLEX_MODE_P (MODE) ? 2 : 1)) \
+ : ((MODE) == XFmode \
+ ? (TARGET_64BIT ? 2 : 3) \
+ : (MODE) == XCmode \
+@@ -2174,6 +2176,13 @@ extern rtx ix86_compare_op0; /* operand
+ extern rtx ix86_compare_op1; /* operand 1 for comparisons */
+ extern rtx ix86_compare_emitted;
+
++
++/* x87 FPU modes for x87/MMX mode switching. */
++enum ix86_fpu_mode { FPU_MODE_X87, FPU_MODE_MMX, FPU_MODE_ANY };
++
++/* Default FPU mode for x87/MMX mode switching. */
++#define FPU_MODE_DEFAULT (TARGET_80387 ? FPU_MODE_X87 : FPU_MODE_MMX)
++
+ /* To properly truncate FP values into integers, we need to set i387 control
+ word. We can't emit proper mode switching code before reload, as spills
+ generated by reload may truncate values incorrectly, but we still can avoid
+@@ -2195,6 +2204,7 @@ enum ix86_entity
+ I387_FLOOR,
+ I387_CEIL,
+ I387_MASK_PM,
++ I387_FPU_MODE,
+ MAX_386_ENTITIES
+ };
+
+@@ -2224,7 +2234,7 @@ enum ix86_stack_slot
+ refer to the mode-switched entity in question. */
+
+ #define NUM_MODES_FOR_MODE_SWITCHING \
+- { I387_CW_ANY, I387_CW_ANY, I387_CW_ANY, I387_CW_ANY }
++ { I387_CW_ANY, I387_CW_ANY, I387_CW_ANY, I387_CW_ANY, FPU_MODE_ANY }
+
+ /* ENTITY is an integer specifying a mode-switched entity. If
+ `OPTIMIZE_MODE_SWITCHING' is defined, you must define this macro to
+@@ -2234,6 +2244,13 @@ enum ix86_stack_slot
+
+ #define MODE_NEEDED(ENTITY, I) ix86_mode_needed ((ENTITY), (I))
+
++#define MODE_AFTER(ENTITY, MODE, I) \
++ ix86_mode_after ((ENTITY), (MODE), (I))
++
++#define MODE_ENTRY(ENTITY) ix86_mode_entry (ENTITY)
++
++#define MODE_EXIT(ENTITY) ix86_mode_exit (ENTITY)
++
+ /* This macro specifies the order in which modes for ENTITY are
+ processed. 0 is the highest priority. */
+
+@@ -2243,10 +2260,8 @@ enum ix86_stack_slot
+ is the set of hard registers live at the point where the insn(s)
+ are to be inserted. */
+
+-#define EMIT_MODE_SET(ENTITY, MODE, HARD_REGS_LIVE) \
+- ((MODE) != I387_CW_ANY && (MODE) != I387_CW_UNINITIALIZED \
+- ? emit_i387_cw_initialization (MODE), 0 \
+- : 0)
++#define EMIT_MODE_SET(ENTITY, MODE, HARD_REGS_LIVE) \
++ ix86_emit_mode_set ((ENTITY), (MODE))
+
+
+ /* Avoid renaming of stack registers, as doing so in combination with
+--- a/gcc/config/i386/i386.md 2005-06-08 05:22:15.000000000 +0200
++++ b/gcc/config/i386/i386.md 2005-06-21 10:45:14.000000000 +0200
+@@ -143,10 +143,10 @@
+ (define_constants
+ [(UNSPECV_BLOCKAGE 0)
+ (UNSPECV_STACK_PROBE 1)
+- (UNSPECV_EMMS 2)
++ (UNSPECV_EFPU 2)
+ (UNSPECV_LDMXCSR 3)
+ (UNSPECV_STMXCSR 4)
+- (UNSPECV_FEMMS 5)
++ (UNSPECV_EMMS 5)
+ (UNSPECV_CLFLUSH 6)
+ (UNSPECV_ALIGN 7)
+ (UNSPECV_MONITOR 8)
+@@ -155,15 +155,18 @@
+ (UNSPECV_CMPXCHG_2 11)
+ (UNSPECV_XCHG 12)
+ (UNSPECV_LOCK 13)
++ (UNSPECV_NOP 14)
+ ])
+
+ ;; Registers by name.
<<Diff was trimmed, longer than 597 lines>>
More information about the pld-cvs-commit
mailing list