SOURCES: gcc-pr23948.patch (NEW) - rewrite tree-ssa-math-opts.c
pluto
pluto at pld-linux.org
Sun Sep 25 15:00:53 CEST 2005
Author: pluto Date: Sun Sep 25 13:00:53 2005 GMT
Module: SOURCES Tag: HEAD
---- Log message:
- rewrite tree-ssa-math-opts.c
---- Files affected:
SOURCES:
gcc-pr23948.patch (NONE -> 1.1) (NEW)
---- Diffs:
================================================================
Index: SOURCES/gcc-pr23948.patch
diff -u /dev/null SOURCES/gcc-pr23948.patch:1.1
--- /dev/null Sun Sep 25 15:00:53 2005
+++ SOURCES/gcc-pr23948.patch Sun Sep 25 15:00:48 2005
@@ -0,0 +1,569 @@
+This patch mostly rewrites tree-ssa-math-opts.c to insert the reciprocal
+computations *near the uses* and not near the definitions. This is more
+efficient, gives a more elegant algorithm, supports -ftrapping-math
+cases, and does not need any special casing to fix PR23948 (a 4.1
+regression) and other bugs that were already fixed in the pass (e.g.
+PR23109 and PR23234).
+
+The pass will insert multiple reciprocal computations, under these rules:
+
+1) with -fno-trapping-math at least two divides should postdominate the
+computation.
+
+2) with -ftrapping-math, in addition, the computation will be in a basic
+block that already holds a divide.
+
+3) if a computation is present in a dominator, it can be reused.
+
+The way that this was implemented was to construct a copy of the
+dominator tree, limited to blocks that include a divide, and their
+nearest common dominators.
+
+The tree that can be easily walked and annotated with the number of
+divides in the block or (later in the algorithm) postdominating the
+block. It is also walked to insert the computations according to the
+above rules. The final replacement of divides by multiplies does not
+need a dominator tree walk because we store the info in bb->aux.
+
+Loop-invariant motion can also do this optimization, and the new
+algorithm can merge computations that are hoisted by LIM. For this
+reason I've moved the pass after LIM.
+
+Bootstrapped/regtested i686-pc-linux-gnu, SPECint+SPECfp shows no change
+when compiled with "-O2 -ffast-math". The new testcases (together with
+the existing ones) give complete coverage of insert_bb and
+insert_reciprocals.
+
+*** gcc/gcc/Makefile.in 14 Sep 2005 09:26:41 -0000 1.1541
+--- gcc/gcc/Makefile.in 24 Sep 2005 11:47:33 -0000
+***************
+*** 1908,1914 ****
+ $(TREE_DUMP_H) tree-pass.h $(FLAGS_H) real.h $(BASIC_BLOCK_H) \
+ hard-reg-set.h
+ tree-ssa-math-opts.o : tree-ssa-math-opts.c $(TREE_FLOW_H) $(CONFIG_H) \
+! $(SYSTEM_H) $(TREE_H) $(TIMEVAR_H) tree-pass.h $(TM_H) $(FLAGS_H)
+ tree-ssa-alias.o : tree-ssa-alias.c $(TREE_FLOW_H) $(CONFIG_H) $(SYSTEM_H) \
+ $(RTL_H) $(TREE_H) $(TM_P_H) $(EXPR_H) $(GGC_H) tree-inline.h $(FLAGS_H) \
+ function.h $(TIMEVAR_H) convert.h $(TM_H) coretypes.h langhooks.h \
+--- 1908,1915 ----
+ $(TREE_DUMP_H) tree-pass.h $(FLAGS_H) real.h $(BASIC_BLOCK_H) \
+ hard-reg-set.h
+ tree-ssa-math-opts.o : tree-ssa-math-opts.c $(TREE_FLOW_H) $(CONFIG_H) \
+! $(SYSTEM_H) $(TREE_H) $(TIMEVAR_H) tree-pass.h $(TM_H) $(FLAGS_H) \
+! alloc-pool.h $(BASIC_BLOCK_H)
+ tree-ssa-alias.o : tree-ssa-alias.c $(TREE_FLOW_H) $(CONFIG_H) $(SYSTEM_H) \
+ $(RTL_H) $(TREE_H) $(TM_P_H) $(EXPR_H) $(GGC_H) tree-inline.h $(FLAGS_H) \
+ function.h $(TIMEVAR_H) convert.h $(TM_H) coretypes.h langhooks.h \
+*** gcc/gcc/passes.c 9 Sep 2005 00:46:38 -0000 2.111
+--- gcc/gcc/passes.c 24 Sep 2005 11:40:35 -0000
+***************
+*** 522,533 ****
+ we add may_alias right after fold builtins
+ which can create arbitrary GIMPLE. */
+ NEXT_PASS (pass_may_alias);
+- NEXT_PASS (pass_cse_reciprocals);
+ NEXT_PASS (pass_split_crit_edges);
+ NEXT_PASS (pass_reassoc);
+ NEXT_PASS (pass_pre);
+ NEXT_PASS (pass_sink_code);
+ NEXT_PASS (pass_tree_loop);
+ NEXT_PASS (pass_dominator);
+ NEXT_PASS (pass_copy_prop);
+ NEXT_PASS (pass_cd_dce);
+--- 501,512 ----
+ we add may_alias right after fold builtins
+ which can create arbitrary GIMPLE. */
+ NEXT_PASS (pass_may_alias);
+ NEXT_PASS (pass_split_crit_edges);
+ NEXT_PASS (pass_reassoc);
+ NEXT_PASS (pass_pre);
+ NEXT_PASS (pass_sink_code);
+ NEXT_PASS (pass_tree_loop);
++ NEXT_PASS (pass_cse_reciprocals);
+ NEXT_PASS (pass_dominator);
+ NEXT_PASS (pass_copy_prop);
+ NEXT_PASS (pass_cd_dce);
+--- gcc/gcc/tree-ssa-math-opts.c 9 Aug 2005 03:28:32 -0000 2.5
++++ gcc/gcc/tree-ssa-math-opts.c 25 Sep 2005 11:39:17 -0000
+@@ -47,88 +47,355 @@ Software Foundation, 51 Franklin Street,
+ #include "real.h"
+ #include "timevar.h"
+ #include "tree-pass.h"
++#include "alloc-pool.h"
++#include "basic-block.h"
+
+-static bool
+-gate_cse_reciprocals (void)
++struct occurrence {
++ basic_block bb;
++ int num_divides;
++ tree recip_def;
++ tree recip_def_stmt;
++ struct occurrence *children;
++ struct occurrence *next;
++ bool insert_before_divide;
++};
++
++static struct occurrence *occ_head;
++static alloc_pool occ_pool;
++
++
++/* Allocate and return a new struct occurrence for basic block BB, and
++ whose children list is headed by CHILDREN. */
++static struct occurrence *
++occ_new (basic_block bb, struct occurrence *children)
+ {
+- return optimize && !optimize_size && flag_unsafe_math_optimizations;
++ struct occurrence *occ;
++
++ occ = bb->aux = pool_alloc (occ_pool);
++ occ->bb = bb;
++ occ->num_divides = 0;
++ occ->recip_def = NULL;
++ occ->recip_def_stmt = NULL;
++ occ->children = children;
++ occ->next = NULL;
++ occ->insert_before_divide = false;
++ return occ;
+ }
+
+-/* Where to put the statement computing a reciprocal. */
+-enum place_reciprocal
++
++/* Insert BB into our subset of the dominator tree. PHEAD points to a
++ list of "struct occurrence"s, one per basic block, having IDOM as
++ their common dominator.
++
++ We try to insert BB as deep as possible in the tree, and we also
++ insert any other block that is a common dominator for BB and one
++ block already in the tree. */
++
++static void
++insert_bb (basic_block bb, struct occurrence *occ_bb, basic_block idom,
++ struct occurrence **p_head)
+ {
+- PR_BEFORE_BSI, /* Put it using bsi_insert_before. */
+- PR_AFTER_BSI, /* Put it using bsi_insert_after. */
+- PR_ON_ENTRY_EDGE /* Put it on the edge between the entry
+- and the first basic block. */
+-};
++ struct occurrence *occ, *occ_dom, **p_occ;
+
+-/* Check if DEF's uses include more than one floating-point division,
+- and if so replace them by multiplications with the reciprocal. Add
+- the statement computing the reciprocal according to WHERE.
++ for (p_occ = p_head; (occ = *p_occ) != NULL; )
++ {
++ basic_block dom = nearest_common_dominator (CDI_DOMINATORS, occ->bb, bb);
++ if (dom == bb)
++ {
++ /* BB dominates OCC->BB. OCC becomes OCC_BB's child. */
++ *p_occ = occ->next;
++ occ->next = occ_bb->children;
++ occ_bb->children = occ;
++
++ /* Try the next block (it may as well be dominated by BB). */
++ }
++
++ else if (dom == occ->bb)
++ {
++ /* OCC->BB dominates BB. Tail recurse to look deeper. */
++ insert_bb (bb, occ_bb, dom, &occ->children);
++ return;
++ }
++
++ else if (dom != idom)
++ {
++ gcc_assert (!dom->aux);
++
++ /* There is a dominator between IDOM and BB, add it and make two
++ children out of OCC_BB and OCC. */
++ *p_occ = occ->next;
++ occ_dom = occ_new (dom, occ_bb);
++ occ_bb->next = occ;
++ occ->next = NULL;
++
++ /* None of the previous blocks has DOM as a dominator, so tail
++ recurse would reexamine them uselessly. Switching BB with DOM,
++ we go on and look for blocks dominated by DOM. */
++ bb = dom;
++ occ_bb = occ_dom;
++ }
++
++ else
++ {
++ /* Nothing special, go on with the next element. */
++ p_occ = &occ->next;
++ }
++ }
++
++ /* No place was found as a child of IDOM. Make BB a sibling of IDOM. */
++ occ_bb->next = *p_head;
++ *p_head = occ_bb;
++}
++
++/* Register that we found a divide in BB. */
++
++static inline void
++found_divide (basic_block bb)
++{
++ struct occurrence *occ;
++
++ occ = (struct occurrence *) bb->aux;
++ if (!occ)
++ {
++ occ = occ_new (bb, NULL);
++ insert_bb (bb, occ, ENTRY_BLOCK_PTR, &occ_head);
++ }
++
++ occ->insert_before_divide = true;
++ occ->num_divides++;
++}
++
++
++/* Return the one of two successor of BB that is not reachable by a
++ reached by a complex edge, if there is one. Else, return BB.
++ This catches most cases in C++ where the result of a function call
++ is assigned to a variable. */
++
++static basic_block
++sole_noncomplex_succ (basic_block bb)
++{
++ edge e0, e1;
++ if (EDGE_COUNT (bb->succs) != 2)
++ return bb;
++
++ e0 = EDGE_SUCC (bb, 0);
++ e1 = EDGE_SUCC (bb, 1);
++ if (e0->flags & EDGE_COMPLEX)
++ return e1->dest;
++ if (e1->flags & EDGE_COMPLEX)
++ return e0->dest;
++
++ return bb;
++}
++
++
++/* Compute the number of divides that postdominate each block in OCC and
++ its children. */
+
+- Does not check the type of DEF, nor that DEF is a GIMPLE register.
+- This is done in the caller for speed, because otherwise this routine
+- would be called for every definition and phi node. */
+ static void
+-execute_cse_reciprocals_1 (block_stmt_iterator *bsi, tree def,
+- enum place_reciprocal where)
++compute_merit (struct occurrence *occ)
+ {
+- use_operand_p use_p;
+- imm_use_iterator use_iter;
+- tree t, new_stmt, type;
+- int count = 0;
+- bool ok = !flag_trapping_math;
++ struct occurrence *occ_child;
++ basic_block dom = occ->bb;
+
+- /* Find uses. */
+- FOR_EACH_IMM_USE_FAST (use_p, use_iter, def)
++ for (occ_child = occ->children; occ_child; occ_child = occ_child->next)
+ {
+- tree use_stmt = USE_STMT (use_p);
+- if (TREE_CODE (use_stmt) == MODIFY_EXPR
+- && TREE_CODE (TREE_OPERAND (use_stmt, 1)) == RDIV_EXPR
+- && TREE_OPERAND (TREE_OPERAND (use_stmt, 1), 1) == def)
++ basic_block bb;
++ if (occ_child->children)
++ compute_merit (occ_child);
++
++ if (flag_exceptions)
++ bb = sole_noncomplex_succ (dom);
++ else
++ bb = dom;
++
++ if (dominated_by_p (CDI_POST_DOMINATORS, bb, occ_child->bb))
++ occ->num_divides += occ_child->num_divides;
++ }
++}
++
++/* TODO: Check how this compares with bsi_after_labels. Return an iterator
++ pointing after the last LABEL_EXPR, or before the first statement if there
++ is no LABEL_EXPR. */
++
++static block_stmt_iterator
++bsi_before_first_stmt (basic_block bb)
++{
++ block_stmt_iterator bsi;
++ for (bsi = bsi_start (bb);
++ !bsi_end_p (bsi) && TREE_CODE (bsi_stmt (bsi)) == LABEL_EXPR;
++ bsi_next (&bsi))
++ ;
++
++ return bsi;
++}
++
++/* Return whether USE_STMT is a floating-point division by DEF. */
++static inline bool
++is_divide_by (tree use_stmt, tree def)
++{
++ return TREE_CODE (use_stmt) == MODIFY_EXPR
++ && TREE_CODE (TREE_OPERAND (use_stmt, 1)) == RDIV_EXPR
++ && TREE_OPERAND (TREE_OPERAND (use_stmt, 1), 1) == def;
++}
++
++/* Walk the subset of the dominator tree rooted at OCC, setting the
++ RECIP_DEF field to a definition of 1.0 / DEF that can be used in
++ the given basic block. The field may be left NULL, of course,
++ if it is not possible or profitable to do the optimization.
++
++ DEF_BSI is an iterator pointing at the statement defining DEF.
++ If RECIP_DEF is set, a dominator already has a computation that can
++ be used. */
++
++static void
++insert_reciprocals (block_stmt_iterator *def_bsi, struct occurrence *occ,
++ tree def, tree recip_def)
++{
++ tree type, new_stmt;
++ block_stmt_iterator bsi;
++ struct occurrence *occ_child;
++
++ if (!recip_def
++ && (occ->insert_before_divide || !flag_trapping_math)
++ && occ->num_divides >= 2)
++ {
++ /* Make a variable with the replacement and substitute it. */
++ type = TREE_TYPE (def);
++ recip_def = make_rename_temp (type, "reciptmp");
++ new_stmt = build2 (MODIFY_EXPR, void_type_node, recip_def,
++ fold_build2 (RDIV_EXPR, type,
++ build_real (type, dconst1), def));
++
++
++ if (occ->insert_before_divide)
+ {
+- ++count;
+- /* Check if this use post-dominates the insertion point. */
+- if (ok || dominated_by_p (CDI_POST_DOMINATORS, bsi->bb,
+- bb_for_stmt (use_stmt)))
+- ok = true;
++ /* Case 1: insert before an existing divide. */
++ bsi = bsi_before_first_stmt (occ->bb);
++ while (!bsi_end_p (bsi) && !is_divide_by (bsi_stmt (bsi), def))
++ bsi_next (&bsi);
++
++ bsi_insert_before (&bsi, new_stmt, BSI_SAME_STMT);
++ }
++ else if (def_bsi && occ->bb == def_bsi->bb)
++ {
++ /* Case 2: insert right after the definition. Note that this will
++ never happen if the definition statement can throw, because in
++ that case the sole successor of the statement's basic block will
++ dominate all the uses as well. */
++ bsi_insert_after (def_bsi, new_stmt, BSI_NEW_STMT);
++ }
++ else
++ {
++ /* Case 3: insert in a basic block not containing defs/uses. */
++ bsi = bsi_before_first_stmt (occ->bb);
++ bsi_insert_before (&bsi, new_stmt, BSI_SAME_STMT);
+ }
+- if (count >= 2 && ok)
+- break;
++
++ occ->recip_def_stmt = new_stmt;
+ }
+
+- if (count < 2 || !ok)
+- return;
++ occ->recip_def = recip_def;
++ for (occ_child = occ->children; occ_child; occ_child = occ_child->next)
++ insert_reciprocals (def_bsi, occ_child, def, recip_def);
++}
++
+
+- /* Make a variable with the replacement and substitute it. */
+- type = TREE_TYPE (def);
+- t = make_rename_temp (type, "reciptmp");
+- new_stmt = build2 (MODIFY_EXPR, void_type_node, t,
+- fold_build2 (RDIV_EXPR, type, build_real (type, dconst1),
+- def));
+-
+- if (where == PR_BEFORE_BSI)
+- bsi_insert_before (bsi, new_stmt, BSI_SAME_STMT);
+- else if (where == PR_AFTER_BSI)
+- bsi_insert_after (bsi, new_stmt, BSI_NEW_STMT);
+- else if (where == PR_ON_ENTRY_EDGE)
+- bsi_insert_on_edge (single_succ_edge (ENTRY_BLOCK_PTR), new_stmt);
++/* Replace the divide at USE_P with a multiplication by the reciprocal, if
++ possible. */
++
++static inline void
++replace_reciprocal (use_operand_p use_p)
++{
++ tree use_stmt = USE_STMT (use_p);
++ basic_block bb = bb_for_stmt (use_stmt);
++ struct occurrence *occ = (struct occurrence *) bb->aux;
++
++ if (occ->recip_def && use_stmt != occ->recip_def_stmt)
++ {
++ TREE_SET_CODE (TREE_OPERAND (use_stmt, 1), MULT_EXPR);
++ SET_USE (use_p, occ->recip_def);
++ }
++}
++
++
++/* Free OCC and return one more "struct occurrence" to be freed. */
++
++static struct occurrence *
++free_bb (struct occurrence *occ)
++{
++ struct occurrence *child, *next;
++
++ /* First get the two pointers hanging off OCC. */
++ next = occ->next;
++ child = occ->children;
++ occ->bb->aux = NULL;
++ pool_free (occ_pool, occ);
++
++ /* Now ensure that we don't recurse unless it is necessary. */
++ if (!child)
++ return next;
+ else
+- gcc_unreachable ();
++ {
++ while (next)
++ next = free_bb (next);
++
++ return child;
++ }
++}
+
+- FOR_EACH_IMM_USE_SAFE (use_p, use_iter, def)
++static bool
++gate_cse_reciprocals (void)
++{
++ return optimize && !optimize_size && flag_unsafe_math_optimizations;
++}
++
++/* Look for floating-point divides among DEF's uses, and try to
++ replace them by multiplications with the reciprocal. Add
++ as many statements computing the reciprocal as needed.
++
++ Does not check the type of DEF, nor that DEF is a GIMPLE register.
++ This is done in the caller. */
++
++static void
++execute_cse_reciprocals_1 (block_stmt_iterator *def_bsi, tree def)
++{
++ use_operand_p use_p;
++ imm_use_iterator use_iter;
++ struct occurrence *occ;
++ int count = 0;
++
++ FOR_EACH_IMM_USE_FAST (use_p, use_iter, def)
+ {
+ tree use_stmt = USE_STMT (use_p);
+- if (use_stmt != new_stmt
+- && TREE_CODE (use_stmt) == MODIFY_EXPR
+- && TREE_CODE (TREE_OPERAND (use_stmt, 1)) == RDIV_EXPR
+- && TREE_OPERAND (TREE_OPERAND (use_stmt, 1), 1) == def)
++ if (is_divide_by (use_stmt, def))
+ {
+- TREE_SET_CODE (TREE_OPERAND (use_stmt, 1), MULT_EXPR);
+- SET_USE (use_p, t);
++ found_divide (bb_for_stmt (use_stmt));
++ count++;
+ }
+ }
++
++ /* Do the expensive part only if we can hope to optimize something. */
++ if (count >= 2)
++ {
++ for (occ = occ_head; occ; occ = occ->next)
++ {
++ compute_merit (occ);
++ insert_reciprocals (def_bsi, occ, def, NULL);
++ }
++
++ FOR_EACH_IMM_USE_SAFE (use_p, use_iter, def)
++ {
++ tree use_stmt = USE_STMT (use_p);
++ if (is_divide_by (use_stmt, def))
++ replace_reciprocal (use_p);
++ }
++ }
++
++ for (occ = occ_head; occ; )
++ occ = free_bb (occ);
++
++ occ_head = NULL;
+ }
+
+ static void
+@@ -137,34 +404,30 @@ execute_cse_reciprocals (void)
+ basic_block bb;
+ tree arg;
+
+- if (flag_trapping_math)
+- calculate_dominance_info (CDI_POST_DOMINATORS);
++ occ_pool = create_alloc_pool ("dominators for recip",
++ sizeof (struct occurrence),
++ n_basic_blocks / 3 + 1);
+
+- if (single_succ_p (ENTRY_BLOCK_PTR))
+- for (arg = DECL_ARGUMENTS (cfun->decl); arg; arg = TREE_CHAIN (arg))
+- if (default_def (arg))
+- {
+- block_stmt_iterator bsi;
+- bsi = bsi_start (single_succ (ENTRY_BLOCK_PTR));
+- execute_cse_reciprocals_1 (&bsi, default_def (arg),
+- PR_ON_ENTRY_EDGE);
+- }
++ calculate_dominance_info (CDI_DOMINATORS | CDI_POST_DOMINATORS);
++
++ FOR_EACH_BB (bb)
++ bb->aux = NULL;
++
++ for (arg = DECL_ARGUMENTS (cfun->decl); arg; arg = TREE_CHAIN (arg))
++ if (default_def (arg))
++ execute_cse_reciprocals_1 (NULL, default_def (arg));
+
+ FOR_EACH_BB (bb)
+ {
+- block_stmt_iterator bsi;
++ block_stmt_iterator bsi = bsi_before_first_stmt (bb);
+ tree phi, def;
+- for (bsi = bsi_start (bb);
+- !bsi_end_p (bsi) && TREE_CODE (bsi_stmt (bsi)) == LABEL_EXPR;
+- bsi_next (&bsi))
+- ;
+
+ for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
+ {
+ def = PHI_RESULT (phi);
+ if (FLOAT_TYPE_P (TREE_TYPE (def))
+ && is_gimple_reg (def))
+- execute_cse_reciprocals_1 (&bsi, def, PR_BEFORE_BSI);
++ execute_cse_reciprocals_1 (NULL, def);
+ }
+
+ for (; !bsi_end_p (bsi); bsi_next (&bsi))
+@@ -174,15 +437,12 @@ execute_cse_reciprocals (void)
+ && (def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF)) != NULL
+ && FLOAT_TYPE_P (TREE_TYPE (def))
+ && TREE_CODE (def) == SSA_NAME)
+- execute_cse_reciprocals_1 (&bsi, def, PR_AFTER_BSI);
++ execute_cse_reciprocals_1 (&bsi, def);
+ }
+ }
+
+- if (flag_trapping_math)
+- free_dominance_info (CDI_POST_DOMINATORS);
+-
+- if (single_succ_p (ENTRY_BLOCK_PTR))
+- bsi_commit_one_edge_insert (single_succ_edge (ENTRY_BLOCK_PTR), NULL);
++ free_dominance_info (CDI_DOMINATORS | CDI_POST_DOMINATORS);
++ free_alloc_pool (occ_pool);
+ }
+
+ struct tree_opt_pass pass_cse_reciprocals =
================================================================
More information about the pld-cvs-commit
mailing list