SOURCES: gcc-pr23948.patch (NEW) - rewrite tree-ssa-math-opts.c

pluto pluto at pld-linux.org
Sun Sep 25 15:00:53 CEST 2005


Author: pluto                        Date: Sun Sep 25 13:00:53 2005 GMT
Module: SOURCES                       Tag: HEAD
---- Log message:
- rewrite tree-ssa-math-opts.c

---- Files affected:
SOURCES:
   gcc-pr23948.patch (NONE -> 1.1)  (NEW)

---- Diffs:

================================================================
Index: SOURCES/gcc-pr23948.patch
diff -u /dev/null SOURCES/gcc-pr23948.patch:1.1
--- /dev/null	Sun Sep 25 15:00:53 2005
+++ SOURCES/gcc-pr23948.patch	Sun Sep 25 15:00:48 2005
@@ -0,0 +1,569 @@
+This patch mostly rewrites tree-ssa-math-opts.c to insert the reciprocal 
+computations *near the uses* and not near the definitions.  This is more 
+efficient, gives a more elegant algorithm, supports -ftrapping-math 
+cases, and does not need any special casing to fix PR23948 (a 4.1 
+regression) and other bugs that were already fixed in the pass (e.g. 
+PR23109 and PR23234).
+
+The pass will insert multiple reciprocal computations, under these rules:
+
+1) with -fno-trapping-math at least two divides should postdominate the 
+computation.
+
+2) with -ftrapping-math, in addition, the computation will be in a basic 
+block that already holds a divide.
+
+3) if a computation is present in a dominator, it can be reused.
+
+The way that this was implemented was to construct a copy of the 
+dominator tree, limited to blocks that include a divide, and their 
+nearest common dominators.
+
+The tree that can be easily walked and annotated with the number of 
+divides in the block or (later in the algorithm) postdominating the 
+block.  It is also walked to insert the computations according to the 
+above rules.  The final replacement of divides by multiplies does not 
+need a dominator tree walk because we store the info in bb->aux.
+
+Loop-invariant motion can also do this optimization, and the new 
+algorithm can merge computations that are hoisted by LIM.  For this 
+reason I've moved the pass after LIM.
+
+Bootstrapped/regtested i686-pc-linux-gnu, SPECint+SPECfp shows no change 
+when compiled with "-O2 -ffast-math".  The new testcases (together with 
+the existing ones) give complete coverage of insert_bb and 
+insert_reciprocals.
+
+*** gcc/gcc/Makefile.in	14 Sep 2005 09:26:41 -0000	1.1541
+--- gcc/gcc/Makefile.in	24 Sep 2005 11:47:33 -0000
+***************
+*** 1908,1914 ****
+     $(TREE_DUMP_H) tree-pass.h $(FLAGS_H) real.h $(BASIC_BLOCK_H) \
+     hard-reg-set.h
+  tree-ssa-math-opts.o : tree-ssa-math-opts.c $(TREE_FLOW_H) $(CONFIG_H) \
+!    $(SYSTEM_H) $(TREE_H) $(TIMEVAR_H) tree-pass.h $(TM_H) $(FLAGS_H)
+  tree-ssa-alias.o : tree-ssa-alias.c $(TREE_FLOW_H) $(CONFIG_H) $(SYSTEM_H) \
+     $(RTL_H) $(TREE_H) $(TM_P_H) $(EXPR_H) $(GGC_H) tree-inline.h $(FLAGS_H) \
+     function.h $(TIMEVAR_H) convert.h $(TM_H) coretypes.h langhooks.h \
+--- 1908,1915 ----
+     $(TREE_DUMP_H) tree-pass.h $(FLAGS_H) real.h $(BASIC_BLOCK_H) \
+     hard-reg-set.h
+  tree-ssa-math-opts.o : tree-ssa-math-opts.c $(TREE_FLOW_H) $(CONFIG_H) \
+!    $(SYSTEM_H) $(TREE_H) $(TIMEVAR_H) tree-pass.h $(TM_H) $(FLAGS_H) \
+!    alloc-pool.h $(BASIC_BLOCK_H)
+  tree-ssa-alias.o : tree-ssa-alias.c $(TREE_FLOW_H) $(CONFIG_H) $(SYSTEM_H) \
+     $(RTL_H) $(TREE_H) $(TM_P_H) $(EXPR_H) $(GGC_H) tree-inline.h $(FLAGS_H) \
+     function.h $(TIMEVAR_H) convert.h $(TM_H) coretypes.h langhooks.h \
+*** gcc/gcc/passes.c	9 Sep 2005 00:46:38 -0000	2.111
+--- gcc/gcc/passes.c	24 Sep 2005 11:40:35 -0000
+***************
+*** 522,533 ****
+       we add may_alias right after fold builtins
+       which can create arbitrary GIMPLE.  */
+    NEXT_PASS (pass_may_alias);
+-   NEXT_PASS (pass_cse_reciprocals);
+    NEXT_PASS (pass_split_crit_edges);
+    NEXT_PASS (pass_reassoc);
+    NEXT_PASS (pass_pre);
+    NEXT_PASS (pass_sink_code);
+    NEXT_PASS (pass_tree_loop);
+    NEXT_PASS (pass_dominator);
+    NEXT_PASS (pass_copy_prop);
+    NEXT_PASS (pass_cd_dce);
+--- 501,512 ----
+       we add may_alias right after fold builtins
+       which can create arbitrary GIMPLE.  */
+    NEXT_PASS (pass_may_alias);
+    NEXT_PASS (pass_split_crit_edges);
+    NEXT_PASS (pass_reassoc);
+    NEXT_PASS (pass_pre);
+    NEXT_PASS (pass_sink_code);
+    NEXT_PASS (pass_tree_loop);
++   NEXT_PASS (pass_cse_reciprocals);
+    NEXT_PASS (pass_dominator);
+    NEXT_PASS (pass_copy_prop);
+    NEXT_PASS (pass_cd_dce);
+--- gcc/gcc/tree-ssa-math-opts.c	9 Aug 2005 03:28:32 -0000	2.5
++++ gcc/gcc/tree-ssa-math-opts.c	25 Sep 2005 11:39:17 -0000
+@@ -47,88 +47,355 @@ Software Foundation, 51 Franklin Street,
+ #include "real.h"
+ #include "timevar.h"
+ #include "tree-pass.h"
++#include "alloc-pool.h"
++#include "basic-block.h"
+ 
+-static bool
+-gate_cse_reciprocals (void)
++struct occurrence {
++  basic_block bb;
++  int num_divides;
++  tree recip_def;
++  tree recip_def_stmt;
++  struct occurrence *children;
++  struct occurrence *next;
++  bool insert_before_divide;
++};
++
++static struct occurrence *occ_head;
++static alloc_pool occ_pool;
++
++
++/* Allocate and return a new struct occurrence for basic block BB, and
++   whose children list is headed by CHILDREN.  */
++static struct occurrence *
++occ_new (basic_block bb, struct occurrence *children)
+ {
+-  return optimize && !optimize_size && flag_unsafe_math_optimizations;
++  struct occurrence *occ;
++
++  occ = bb->aux = pool_alloc (occ_pool);
++  occ->bb = bb;
++  occ->num_divides = 0;
++  occ->recip_def = NULL;
++  occ->recip_def_stmt = NULL;
++  occ->children = children;
++  occ->next = NULL;
++  occ->insert_before_divide = false;
++  return occ;
+ }
+ 
+-/* Where to put the statement computing a reciprocal.  */
+-enum place_reciprocal
++
++/* Insert BB into our subset of the dominator tree.  PHEAD points to a
++   list of "struct occurrence"s, one per basic block, having IDOM as
++   their common dominator.
++
++   We try to insert BB as deep as possible in the tree, and we also
++   insert any other block that is a common dominator for BB and one
++   block already in the tree.  */
++
++static void
++insert_bb (basic_block bb, struct occurrence *occ_bb, basic_block idom,
++	   struct occurrence **p_head)
+ {
+-  PR_BEFORE_BSI,	/* Put it using bsi_insert_before.  */
+-  PR_AFTER_BSI,		/* Put it using bsi_insert_after.  */
+-  PR_ON_ENTRY_EDGE	/* Put it on the edge between the entry
+-			   and the first basic block.  */
+-};
++  struct occurrence *occ, *occ_dom, **p_occ;
+ 
+-/* Check if DEF's uses include more than one floating-point division,
+-   and if so replace them by multiplications with the reciprocal.  Add
+-   the statement computing the reciprocal according to WHERE.
++  for (p_occ = p_head; (occ = *p_occ) != NULL; )
++    {
++      basic_block dom = nearest_common_dominator (CDI_DOMINATORS, occ->bb, bb);
++      if (dom == bb)
++	{
++	  /* BB dominates OCC->BB.  OCC becomes OCC_BB's child.  */
++	  *p_occ = occ->next;
++	  occ->next = occ_bb->children;
++	  occ_bb->children = occ;
++
++	  /* Try the next block (it may as well be dominated by BB).  */
++	}
++
++      else if (dom == occ->bb)
++	{
++	  /* OCC->BB dominates BB.  Tail recurse to look deeper.  */
++	  insert_bb (bb, occ_bb, dom, &occ->children);
++	  return;
++	}
++
++      else if (dom != idom)
++	{
++	  gcc_assert (!dom->aux);
++
++	  /* There is a dominator between IDOM and BB, add it and make two
++	     children out of OCC_BB and OCC.  */
++	  *p_occ = occ->next;
++          occ_dom = occ_new (dom, occ_bb);
++	  occ_bb->next = occ;
++	  occ->next = NULL;
++
++	  /* None of the previous blocks has DOM as a dominator, so tail
++	     recurse would reexamine them uselessly. Switching BB with DOM,
++	     we go on and look for blocks dominated by DOM.  */
++	  bb = dom;
++	  occ_bb = occ_dom;
++	}
++
++      else
++	{
++	  /* Nothing special, go on with the next element.  */
++	  p_occ = &occ->next;
++	}
++    }
++
++  /* No place was found as a child of IDOM.  Make BB a sibling of IDOM.  */
++  occ_bb->next = *p_head;
++  *p_head = occ_bb;
++}
++
++/* Register that we found a divide in BB.  */
++
++static inline void
++found_divide (basic_block bb)
++{
++  struct occurrence *occ;
++
++  occ = (struct occurrence *) bb->aux;
++  if (!occ)
++    {
++      occ = occ_new (bb, NULL);
++      insert_bb (bb, occ, ENTRY_BLOCK_PTR, &occ_head);
++    }
++
++  occ->insert_before_divide = true;
++  occ->num_divides++;
++}
++
++
++/* Return the one of two successor of BB that is not reachable by a
++   reached by a complex edge, if there is one.  Else, return BB.
++   This catches most cases in C++ where the result of a function call
++   is assigned to a variable.  */
++
++static basic_block
++sole_noncomplex_succ (basic_block bb)
++{
++  edge e0, e1;
++  if (EDGE_COUNT (bb->succs) != 2)
++    return bb;
++
++  e0 = EDGE_SUCC (bb, 0);
++  e1 = EDGE_SUCC (bb, 1);
++  if (e0->flags & EDGE_COMPLEX)
++    return e1->dest;
++  if (e1->flags & EDGE_COMPLEX)
++    return e0->dest;
++
++  return bb;
++}
++
++
++/* Compute the number of divides that postdominate each block in OCC and
++   its children.  */
+ 
+-   Does not check the type of DEF, nor that DEF is a GIMPLE register.
+-   This is done in the caller for speed, because otherwise this routine
+-   would be called for every definition and phi node.  */
+ static void
+-execute_cse_reciprocals_1 (block_stmt_iterator *bsi, tree def,
+-			   enum place_reciprocal where)
++compute_merit (struct occurrence *occ)
+ {
+-  use_operand_p use_p;
+-  imm_use_iterator use_iter;
+-  tree t, new_stmt, type;
+-  int count = 0;
+-  bool ok = !flag_trapping_math;
++  struct occurrence *occ_child;
++  basic_block dom = occ->bb;
+ 
+-  /* Find uses.  */
+-  FOR_EACH_IMM_USE_FAST (use_p, use_iter, def)
++  for (occ_child = occ->children; occ_child; occ_child = occ_child->next)
+     {
+-      tree use_stmt = USE_STMT (use_p);
+-      if (TREE_CODE (use_stmt) == MODIFY_EXPR
+-	  && TREE_CODE (TREE_OPERAND (use_stmt, 1)) == RDIV_EXPR
+-	  && TREE_OPERAND (TREE_OPERAND (use_stmt, 1), 1) == def)
++      basic_block bb;
++      if (occ_child->children)
++        compute_merit (occ_child);
++
++      if (flag_exceptions)
++	bb = sole_noncomplex_succ (dom);
++      else
++	bb = dom;
++
++      if (dominated_by_p (CDI_POST_DOMINATORS, bb, occ_child->bb))
++        occ->num_divides += occ_child->num_divides;
++    }
++}
++
++/* TODO: Check how this compares with bsi_after_labels.  Return an iterator
++   pointing after the last LABEL_EXPR, or before the first statement if there
++   is no LABEL_EXPR.  */
++
++static block_stmt_iterator
++bsi_before_first_stmt (basic_block bb)
++{
++  block_stmt_iterator bsi;
++  for (bsi = bsi_start (bb);
++       !bsi_end_p (bsi) && TREE_CODE (bsi_stmt (bsi)) == LABEL_EXPR;
++       bsi_next (&bsi))
++    ;
++
++  return bsi;
++}
++
++/* Return whether USE_STMT is a floating-point division by DEF.  */
++static inline bool
++is_divide_by (tree use_stmt, tree def)
++{
++  return TREE_CODE (use_stmt) == MODIFY_EXPR
++	 && TREE_CODE (TREE_OPERAND (use_stmt, 1)) == RDIV_EXPR
++	 && TREE_OPERAND (TREE_OPERAND (use_stmt, 1), 1) == def;
++}
++
++/* Walk the subset of the dominator tree rooted at OCC, setting the
++   RECIP_DEF field to a definition of 1.0 / DEF that can be used in
++   the given basic block.  The field may be left NULL, of course,
++   if it is not possible or profitable to do the optimization.
++
++   DEF_BSI is an iterator pointing at the statement defining DEF.
++   If RECIP_DEF is set, a dominator already has a computation that can
++   be used.  */
++
++static void
++insert_reciprocals (block_stmt_iterator *def_bsi, struct occurrence *occ,
++		    tree def, tree recip_def)
++{
++  tree type, new_stmt;
++  block_stmt_iterator bsi;
++  struct occurrence *occ_child;
++
++  if (!recip_def
++      && (occ->insert_before_divide || !flag_trapping_math)
++      && occ->num_divides >= 2)
++    {
++      /* Make a variable with the replacement and substitute it.  */
++      type = TREE_TYPE (def);
++      recip_def = make_rename_temp (type, "reciptmp");
++      new_stmt = build2 (MODIFY_EXPR, void_type_node, recip_def,
++		         fold_build2 (RDIV_EXPR, type,
++				      build_real (type, dconst1), def));
++  
++  
++      if (occ->insert_before_divide)
+         {
+-          ++count;
+-          /* Check if this use post-dominates the insertion point.  */
+-          if (ok || dominated_by_p (CDI_POST_DOMINATORS, bsi->bb,
+-				    bb_for_stmt (use_stmt)))
+-	    ok = true;
++          /* Case 1: insert before an existing divide.  */
++          bsi = bsi_before_first_stmt (occ->bb);
++          while (!bsi_end_p (bsi) && !is_divide_by (bsi_stmt (bsi), def))
++	    bsi_next (&bsi);
++
++          bsi_insert_before (&bsi, new_stmt, BSI_SAME_STMT);
++        }
++      else if (def_bsi && occ->bb == def_bsi->bb)
++        {
++          /* Case 2: insert right after the definition.  Note that this will
++	     never happen if the definition statement can throw, because in
++	     that case the sole successor of the statement's basic block will
++	     dominate all the uses as well.  */
++          bsi_insert_after (def_bsi, new_stmt, BSI_NEW_STMT);
++        }
++      else
++        {
++          /* Case 3: insert in a basic block not containing defs/uses.  */
++          bsi = bsi_before_first_stmt (occ->bb);
++          bsi_insert_before (&bsi, new_stmt, BSI_SAME_STMT);
+         }
+-      if (count >= 2 && ok)
+-        break;
++
++      occ->recip_def_stmt = new_stmt;
+     }
+ 
+-  if (count < 2 || !ok)
+-    return;
++  occ->recip_def = recip_def;
++  for (occ_child = occ->children; occ_child; occ_child = occ_child->next)
++    insert_reciprocals (def_bsi, occ_child, def, recip_def);
++}
++
+ 
+-  /* Make a variable with the replacement and substitute it.  */
+-  type = TREE_TYPE (def);
+-  t = make_rename_temp (type, "reciptmp");
+-  new_stmt = build2 (MODIFY_EXPR, void_type_node, t,
+-		     fold_build2 (RDIV_EXPR, type, build_real (type, dconst1),
+-				  def));
+-
+-  if (where == PR_BEFORE_BSI)
+-    bsi_insert_before (bsi, new_stmt, BSI_SAME_STMT);
+-  else if (where == PR_AFTER_BSI)
+-    bsi_insert_after (bsi, new_stmt, BSI_NEW_STMT);
+-  else if (where == PR_ON_ENTRY_EDGE)
+-    bsi_insert_on_edge (single_succ_edge (ENTRY_BLOCK_PTR), new_stmt);
++/* Replace the divide at USE_P with a multiplication by the reciprocal, if
++   possible.  */
++
++static inline void
++replace_reciprocal (use_operand_p use_p)
++{
++  tree use_stmt = USE_STMT (use_p);
++  basic_block bb = bb_for_stmt (use_stmt);
++  struct occurrence *occ = (struct occurrence *) bb->aux;
++
++  if (occ->recip_def && use_stmt != occ->recip_def_stmt)
++    {
++      TREE_SET_CODE (TREE_OPERAND (use_stmt, 1), MULT_EXPR);
++      SET_USE (use_p, occ->recip_def);
++    }
++}
++
++
++/* Free OCC and return one more "struct occurrence" to be freed.  */
++
++static struct occurrence *
++free_bb (struct occurrence *occ)
++{
++  struct occurrence *child, *next;
++
++  /* First get the two pointers hanging off OCC.  */
++  next = occ->next;
++  child = occ->children;
++  occ->bb->aux = NULL;
++  pool_free (occ_pool, occ);
++
++  /* Now ensure that we don't recurse unless it is necessary.  */
++  if (!child)
++    return next;
+   else
+-    gcc_unreachable ();
++    {
++      while (next)
++	next = free_bb (next);
++
++      return child;
++    }
++}
+ 
+-  FOR_EACH_IMM_USE_SAFE (use_p, use_iter, def)
++static bool
++gate_cse_reciprocals (void)
++{
++  return optimize && !optimize_size && flag_unsafe_math_optimizations;
++}
++
++/* Look for floating-point divides among DEF's uses, and try to
++   replace them by multiplications with the reciprocal.  Add
++   as many statements computing the reciprocal as needed.
++
++   Does not check the type of DEF, nor that DEF is a GIMPLE register.
++   This is done in the caller.  */
++
++static void
++execute_cse_reciprocals_1 (block_stmt_iterator *def_bsi, tree def)
++{
++  use_operand_p use_p;
++  imm_use_iterator use_iter;
++  struct occurrence *occ;
++  int count = 0;
++
++  FOR_EACH_IMM_USE_FAST (use_p, use_iter, def)
+     {
+       tree use_stmt = USE_STMT (use_p);
+-      if (use_stmt != new_stmt
+-	  && TREE_CODE (use_stmt) == MODIFY_EXPR
+-	  && TREE_CODE (TREE_OPERAND (use_stmt, 1)) == RDIV_EXPR
+-	  && TREE_OPERAND (TREE_OPERAND (use_stmt, 1), 1) == def)
++      if (is_divide_by (use_stmt, def))
+ 	{
+-	  TREE_SET_CODE (TREE_OPERAND (use_stmt, 1), MULT_EXPR);
+-	  SET_USE (use_p, t);
++	  found_divide (bb_for_stmt (use_stmt));
++	  count++;
+ 	}
+     }
++  
++  /* Do the expensive part only if we can hope to optimize something.  */
++  if (count >= 2)
++    {
++      for (occ = occ_head; occ; occ = occ->next)
++	{
++	  compute_merit (occ);
++	  insert_reciprocals (def_bsi, occ, def, NULL);
++	}
++
++      FOR_EACH_IMM_USE_SAFE (use_p, use_iter, def)
++	{
++	  tree use_stmt = USE_STMT (use_p);
++	  if (is_divide_by (use_stmt, def))
++	    replace_reciprocal (use_p);
++	}
++    }
++
++  for (occ = occ_head; occ; )
++    occ = free_bb (occ);
++
++  occ_head = NULL;
+ }
+ 
+ static void
+@@ -137,34 +404,30 @@ execute_cse_reciprocals (void)
+   basic_block bb;
+   tree arg;
+ 
+-  if (flag_trapping_math)
+-    calculate_dominance_info (CDI_POST_DOMINATORS);
++  occ_pool = create_alloc_pool ("dominators for recip",
++				sizeof (struct occurrence),
++				n_basic_blocks / 3 + 1);
+ 
+-  if (single_succ_p (ENTRY_BLOCK_PTR))
+-    for (arg = DECL_ARGUMENTS (cfun->decl); arg; arg = TREE_CHAIN (arg))
+-      if (default_def (arg))
+-	{
+-	  block_stmt_iterator bsi;
+-	  bsi = bsi_start (single_succ (ENTRY_BLOCK_PTR));
+-	  execute_cse_reciprocals_1 (&bsi, default_def (arg),
+-				     PR_ON_ENTRY_EDGE);
+-	}
++  calculate_dominance_info (CDI_DOMINATORS | CDI_POST_DOMINATORS);
++
++  FOR_EACH_BB (bb)
++    bb->aux = NULL;
++
++  for (arg = DECL_ARGUMENTS (cfun->decl); arg; arg = TREE_CHAIN (arg))
++    if (default_def (arg))
++      execute_cse_reciprocals_1 (NULL, default_def (arg));
+ 
+   FOR_EACH_BB (bb)
+     {
+-      block_stmt_iterator bsi;
++      block_stmt_iterator bsi = bsi_before_first_stmt (bb);
+       tree phi, def;
+-      for (bsi = bsi_start (bb);
+-	   !bsi_end_p (bsi) && TREE_CODE (bsi_stmt (bsi)) == LABEL_EXPR;
+-	   bsi_next (&bsi))
+-        ;
+ 
+       for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
+ 	{
+ 	  def = PHI_RESULT (phi);
+ 	  if (FLOAT_TYPE_P (TREE_TYPE (def))
+ 	      && is_gimple_reg (def))
+-	    execute_cse_reciprocals_1 (&bsi, def, PR_BEFORE_BSI);
++	    execute_cse_reciprocals_1 (NULL, def);
+ 	}
+ 
+       for (; !bsi_end_p (bsi); bsi_next (&bsi))
+@@ -174,15 +437,12 @@ execute_cse_reciprocals (void)
+ 	      && (def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF)) != NULL
+ 	      && FLOAT_TYPE_P (TREE_TYPE (def))
+ 	      && TREE_CODE (def) == SSA_NAME)
+-	    execute_cse_reciprocals_1 (&bsi, def, PR_AFTER_BSI);
++	    execute_cse_reciprocals_1 (&bsi, def);
+ 	}
+     }
+ 
+-  if (flag_trapping_math)
+-    free_dominance_info (CDI_POST_DOMINATORS);
+-  
+-  if (single_succ_p (ENTRY_BLOCK_PTR))
+-    bsi_commit_one_edge_insert (single_succ_edge (ENTRY_BLOCK_PTR), NULL);
++  free_dominance_info (CDI_DOMINATORS | CDI_POST_DOMINATORS);
++  free_alloc_pool (occ_pool);
+ }
+ 
+ struct tree_opt_pass pass_cse_reciprocals =
================================================================



More information about the pld-cvs-commit mailing list