---------- Forwarded message ---------- From: Sandeep Maram <[EMAIL PROTECTED]> Date: Thu, May 15, 2008 at 6:46 PM Subject: Re: How to handle loop iterator variable? To: Sebastian Pop <[EMAIL PROTECTED]> Cc: gcc@gcc.gnu.org
> In lambda-code.c:1858 you have some code that does a similar renaming: > > FOR_EACH_IMM_USE_STMT (stmt, imm_iter, oldiv_def) > FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter) > propagate_value (use_p, newiv); > > Hi, In my previous mail I have missed some details. Please ignore it and consider this one. The function replace_uses_by() also does the same renaming and it is suitable in my case. So I have used it and renamed the iterator variable. The problem with the iterator variable is solved. Now I have deleted all the PHI nodes of loop_a and transferred and all the statements one by one to loop_b in the function fuse_loops(). So the statement j_12 = D.1190_11 + j_24; is now present in loop_b, but I am unable to create the phi node for it in loop_b. I tried to do so using for (phi = phi_nodes (loop_a->header); phi;) { next = PHI_CHAIN (phi); create_phi_node (SSA_NAME_VAR (PHI_RESULT (phi)), loop_b->header); phi = next; } in fuse_loops() but the phinode is created for j_2 instead of j_24. Can you please tell me how can I create phi node for j_24 ? I am attaching the patch and .c file. Thanks, Sandeep. >
/* Loop fusion. */ /* This pass performs loop fusion: for example, the loops |loop_1 | A[i] = ... |endloop_1 |loop_2 | ... = A[i] |endloop_2 that becomes after fusion: |loop_1 | A[i] = ... | ... = A[i] |endloop_1 */ #include "config.h" #include "system.h" #include "coretypes.h" #include "tm.h" #include "ggc.h" #include "tree.h" #include "target.h" #include "rtl.h" #include "basic-block.h" #include "diagnostic.h" #include "tree-flow.h" #include "tree-dump.h" #include "timevar.h" #include "cfgloop.h" #include "expr.h" #include "optabs.h" #include "tree-chrec.h" #include "tree-data-ref.h" #include "tree-scalar-evolution.h" #include "tree-pass.h" #include "lambda.h" #include "langhooks.h" #include "tree-vectorizer.h" /* Returns true when a given loop is a parallel loop. */ static bool is_parallel_loop (struct loop *loop) { VEC (ddr_p, heap) * dependence_relations; VEC (data_reference_p, heap) * datarefs; lambda_trans_matrix trans; bool ret = false; /* If the loop can be reversed, the iterations are independent. */ datarefs = VEC_alloc (data_reference_p, heap, 10); dependence_relations = VEC_alloc (ddr_p, heap, 10 * 10); compute_data_dependences_for_loop (loop, true, &datarefs, &dependence_relations); trans = lambda_trans_matrix_new (1, 1); LTM_MATRIX (trans)[0][0] = -1; if (lambda_transform_legal_p (trans, 1, dependence_relations)) { ret = true; } free_dependence_relations (dependence_relations); free_data_refs (datarefs); if (ret == true) fprintf (dump_file, " loop %d is a parallel loop\n ", loop->num); return ret; } /* Returns true when a given loop is a sequential loop. */ static bool is_sequential_loop (struct loop *loop) { if (is_parallel_loop (loop)) return false; else return true; } /* Returns true if there is no fusion preventing constraint between two given loops. */ static bool no_fusion_preventing_constraint (struct loop *loop_a, struct loop *loop_b) { bool ret = true; struct loop *father_loop; father_loop = find_common_loop (loop_a, loop_b); struct graph *rdg_father; rdg_father = build_rdg (father_loop); struct graph *rdg_a; rdg_a = build_rdg (loop_a); struct graph *rdg_b; rdg_b = build_rdg (loop_b); return ret; } /* Returns true when two loops can be fused. */ static bool can_fuse_loops (struct loop *loop_a, struct loop *loop_b) { if ((is_parallel_loop (loop_a) && is_parallel_loop (loop_b) && no_fusion_preventing_constraint (loop_a, loop_b)) || (is_sequential_loop (loop_a) && is_sequential_loop (loop_b) && no_fusion_preventing_constraint (loop_a, loop_b))) { fprintf (dump_file, " can fuse loops %d %d ", loop_a->num, loop_b->num); return true; } return false; } /* The following function fuses two loops. */ void fuse_loops2 (struct loop *loop_a, struct loop *loop_b) { basic_block *bbs_a, *bbs_b; struct loop *new_loop; bbs_a = get_loop_body (loop_a); bbs_b = get_loop_body (loop_b); debug_loop (loop_a, 10); debug_loop (loop_b, 10); tree_merge_blocks (bbs_b[0], bbs_a[0]); debug_loop (loop_a, 10); debug_loop (loop_b, 10); cancel_loop_tree (loop_a); } /* The following function fuses two loops. */ void fuse_loops (struct loop *loop_a, struct loop *loop_b) { debug_loop (loop_a, 10); debug_loop (loop_b, 10); block_stmt_iterator bsi_a, bsi_a_last, bsi_b, bsi_b_last, bsi; bsi_b_last = bsi_last (loop_b->header); bsi_prev (&bsi_b_last); tree phi, prev = NULL_TREE, next, use, def; /* Detects the iterator variable of loop_b. */ for (phi = phi_nodes (loop_b->header); phi;) { next = PHI_CHAIN (phi); use = PHI_RESULT (phi); phi = next; } /* Detects the iterator variable of loop_a. */ for (phi = phi_nodes (loop_a->header); phi;) { next = PHI_CHAIN (phi); def = PHI_RESULT (phi); phi = next; } /*for (phi = phi_nodes (loop_a->header); phi;) { next = PHI_CHAIN (phi); create_phi_node (SSA_NAME_VAR (PHI_RESULT (phi)), loop_b->header); phi = next; } */ /* Replaces the itaerator variable of loop_a with that of loop_b. */ for (phi = phi_nodes (loop_a->header); phi;) { next = PHI_CHAIN (phi); replace_uses_by (def, use); phi = next; } /* Remove PHI nodes of loop_a. */ for (phi = phi_nodes (loop_a->header); phi;) { next = PHI_CHAIN (phi); remove_phi_node (phi, prev, true); phi = next; } /* Transfer all the statements one by one. */ for (bsi = bsi_start (loop_a->header); !bsi_end_p (bsi);) { if ((TREE_CODE (bsi_stmt (bsi)) != COND_EXPR) && (TREE_CODE (bsi_stmt (bsi)) != LABEL_EXPR)) { update_stmt (bsi_stmt (bsi)); bsi_move_before (&bsi, &bsi_b_last); fprintf (stderr, " transferred one statement. \n "); fprintf (dump_file, " transferred one statement. \n "); update_stmt (bsi_stmt (bsi)); } else { bsi_next (&bsi); } } debug_loop (loop_a, 10); debug_loop (loop_b, 10); update_ssa (TODO_update_ssa); fprintf (stderr, "deleting loop %d \n", loop_a->num); cancel_loop_tree (loop_a); fprintf (stderr, "deleted loop %d \n", loop_a->num); } /* Returns true if loop_a is dependent on loop_b. */ static bool is_dependent_on (struct loop *loop_a, struct loop *loop_b) { bool ret = false; struct loop *loop_father; loop_father = find_common_loop (loop_a, loop_b); VEC (ddr_p, heap) * dependence_relations_a; VEC (ddr_p, heap) * dependence_relations_b; VEC (ddr_p, heap) * dependence_relations_father; VEC (data_reference_p, heap) * datarefs_a; VEC (data_reference_p, heap) * datarefs_b; VEC (data_reference_p, heap) * datarefs_father; datarefs_a = VEC_alloc (data_reference_p, heap, 10); datarefs_b = VEC_alloc (data_reference_p, heap, 10); datarefs_father = VEC_alloc (data_reference_p, heap, 10); dependence_relations_a = VEC_alloc (ddr_p, heap, 10 * 10); dependence_relations_b = VEC_alloc (ddr_p, heap, 10 * 10); dependence_relations_father = VEC_alloc (ddr_p, heap, 10 * 10); compute_data_dependences_for_loop (loop_a, true, &datarefs_a, &dependence_relations_a); compute_data_dependences_for_loop (loop_b, true, &datarefs_b, &dependence_relations_b); compute_data_dependences_for_loop (loop_father, true, &datarefs_father, &dependence_relations_father); debug_data_dependence_relations (dependence_relations_a); debug_data_dependence_relations (dependence_relations_b); debug_data_dependence_relations (dependence_relations_father); fprintf (dump_file, "dumping data dependences of a \n"); dump_data_dependence_relations (dump_file, dependence_relations_a); fprintf (dump_file, "dumping data dependences of b \n"); dump_data_dependence_relations (dump_file, dependence_relations_b); fprintf (dump_file, "dumping data dependences of father \n"); dump_data_dependence_relations (dump_file, dependence_relations_father); free_dependence_relations (dependence_relations_a); free_data_refs (datarefs_a); free_dependence_relations (dependence_relations_b); free_data_refs (datarefs_b); free_dependence_relations (dependence_relations_father); free_data_refs (datarefs_father); return true; } /* Structure to hold the list of partitions. */ static struct partition { int num; int list[10]; int present; char type; } ; /* Create a graph original_graph. It's nodes are loops and it's edges are data dependencies. */ static struct graph * create_original_graph (int num_loops) { struct graph *g = new_graph (num_loops); struct loop *loop; loop_iterator li; FOR_EACH_LOOP (li, loop, 0) { struct partition *p = XNEW (struct partition); p->num = 0; p->list[p->num] = loop->num; p->num++; p->present = 1; if (is_parallel_loop (loop)) { p->type = 'P'; g->vertices[loop->num].data = p; } else { p->type = 'S'; g->vertices[loop->num].data = p; } } FOR_EACH_LOOP (li, loop, 0) { struct loop *loop_dummy; loop_iterator li_dummy; FOR_EACH_LOOP (li_dummy, loop_dummy, 0) { if (loop != loop_dummy) { if (is_dependent_on (loop, loop_dummy)) { if (no_fusion_preventing_constraint (loop, loop_dummy)) { struct graph_edge *ge = add_edge (g, loop_dummy->num, loop->num); ge->data = "FPE"; } else add_edge (g, loop_dummy->num, loop->num); } } } } return g; } /* Create a component graph from the original graph. The parameter t denotes type of the component graph - parallel or sequential. */ static struct graph* create_component_graph (char t, int num_loops) { struct graph *og = create_original_graph (num_loops); struct graph *g = new_graph (num_loops); struct partition *p, *q, *r; int i; char opp; if (t == 'P') opp = 'S'; else opp = 'P'; struct loop *loop; loop_iterator li; for (i = 1; i < num_loops; i++) { p = XNEW (struct partition); p = og->vertices[i].data; if (p->type == t) { q = XNEW (struct partition); q->num = 0; q->list[q->num] = i; q->num++; q->present = 1; q->type = t; g->vertices[i].data = q; } } if (t == 'P') FOR_EACH_LOOP (li, loop, 0) { struct loop *loop_dummy; loop_iterator li_dummy; FOR_EACH_LOOP (li_dummy, loop_dummy, 0) { if (loop != loop_dummy) { if (is_dependent_on (loop, loop_dummy)&& (is_parallel_loop (loop)) && (is_parallel_loop (loop_dummy))) { if (no_fusion_preventing_constraint (loop, loop_dummy)) { struct graph_edge *ge = add_edge (g, loop_dummy->num, loop->num); ge->data = "FPE"; } else add_edge (g, loop_dummy->num, loop->num); } } } } if (t =='S') FOR_EACH_LOOP (li, loop, 0) { struct loop *loop_dummy; loop_iterator li_dummy; FOR_EACH_LOOP (li_dummy, loop_dummy, 0) { if (loop != loop_dummy) { if (is_dependent_on (loop, loop_dummy)&& (is_sequential_loop (loop)) && (is_sequential_loop (loop_dummy))) { if (no_fusion_preventing_constraint (loop, loop_dummy)) { struct graph_edge *ge = add_edge (g, loop_dummy->num, loop->num); ge->data = "FPE"; } else add_edge (g, loop_dummy->num, loop->num); } } } } for (i = 1; i < num_loops; i++) { p = XNEW (struct partition); p = og->vertices[i].data; if (p->type == opp) { struct graph_edge *ges; for (ges = og->vertices[i].pred; ges; ges = ges->pred_next) { int source = ges->src; struct graph_edge *ged; for (ged = og->vertices[i].succ; ged; ged = ged->succ_next) { int destin = ged->dest; q = XNEW (struct partition); q = og->vertices[source].data; r = XNEW (struct partition); r = og->vertices[destin].data; if ((q->type == t) && (r->type == t) && (q->list[0] != r->list[0])) { struct graph_edge *ge = add_edge (g, source, destin); ge->data = "FPE"; } } } } } return g; } /* Partitions the graph according to rules. And fuse the loops corresponding to the partitions. */ static void do_partitions (struct graph *g) { int num = g->n_vertices; int i; struct partition *p, *q; for (i = 1; i<num; i++) { p = XNEW (struct partition); p = g->vertices[i].data; struct graph_edge *ge; if (p) if (p->present == 1) for (ge = g->vertices[i].pred; ge; ge = ge->pred_next) { q = XNEW (struct partition); q = g->vertices[ge->src].data; if (ge->data != "FPE") { p->list[p->num] = q->list[0]; p->num++; q->present = 0; } } } } /* Performs loop fusion according to the given graph. */ static void fuse_according_to_graph (struct graph *g) { int num = g->n_vertices; int i, j; struct partition *p; struct loop *loop; loop_iterator li; for (i = 1; i<num; i++) { p = XNEW (struct partition); p = g->vertices[i].data; if (p) if (p->present == 1) { for (j = (p->num)-1; j > 0 ; j--) { FOR_EACH_LOOP (li, loop, 0) { if ((loop->num == p->list[j]) && (loop->next->num == p->list[j-1])) fuse_loops (loop, loop->next); } } } } } /* Performs legal loops fusion in the current function. */ static unsigned int tree_loop_fusion (void) { if (!current_loops) return 0; int num_loops = number_of_loops(); struct loop *loop; loop_iterator li; /* For all the loops in the program pick consecutive loops loop_a and loop_b. */ FOR_EACH_LOOP (li, loop, 0) { if (loop->next) { //if (can_fuse_loops (loop, loop->next)) { /*if (is_dependent_on (loop, loop->next)) fprintf (stderr, "loop %d is dependent on loop %d \n\n", loop->num, loop->next->num); else fprintf (stderr, "loop %d is not dependent on loop %d \n\n", loop->num, loop->next->num);*/ //fuse_loops2 (loop, loop->next); fuse_loops (loop, loop->next); } } } /* struct graph *original_graph_1 = new_graph (num_loops); original_graph_1 = create_original_graph (num_loops); dump_graph (dump_file, original_graph_1); dump_graph (stderr, original_graph_1); fprintf (dump_file, "printed the original_graph\n\n"); fprintf (stderr, "printed the original_graph\n\n"); struct graph *component_graph_parallel = create_component_graph ('P', num_loops); dump_graph (dump_file, component_graph_parallel); dump_graph (stderr, component_graph_parallel); fprintf (dump_file, "printed the component parallel graph\n\n"); fprintf (stderr, "printed the component parallel graph\n\n"); dump_graph (dump_file, original_graph_1); dump_graph (stderr, original_graph_1); fprintf (dump_file, "printed the original_graph\n\n"); fprintf (stderr, "printed the original_graph\n\n"); do_partitions (component_graph_parallel); dump_graph (dump_file, component_graph_parallel); dump_graph (stderr, component_graph_parallel); fprintf (dump_file, "printed the partitioned component parallel graph\n"); fprintf (stderr, "printed the partitioned component parallel graph\n"); fuse_according_to_graph (component_graph_parallel);*/ return 0; } static bool gate_tree_loop_fusion (void) { return flag_tree_loop_fusion != 0; } struct gimple_opt_pass pass_loop_fusion = { { GIMPLE_PASS, "lfusion", /* name */ gate_tree_loop_fusion, /* gate */ tree_loop_fusion, /* execute */ NULL, /* sub */ NULL, /* next */ 0, /* static_pass_number */ TV_TREE_LOOP_FUSION, /* tv_id */ PROP_cfg | PROP_ssa, /* properties_required */ 0, /* properties_provided */ 0, /* properties_destroyed */ 0, /* todo_flags_start */ TODO_dump_func | TODO_verify_loops | TODO_update_ssa, /* todo_flags_finish */ } };
Index: doc/invoke.texi =================================================================== --- doc/invoke.texi (revision 134556) +++ doc/invoke.texi (working copy) @@ -355,6 +355,7 @@ -fstrict-aliasing -fstrict-overflow -fthread-jumps -ftracer -ftree-ccp @gol -ftree-ch -ftree-copy-prop -ftree-copyrename -ftree-dce @gol -ftree-dominator-opts -ftree-dse -ftree-fre -ftree-loop-im @gol +-ftree-loop-fusion @gol -ftree-loop-distribution @gol -ftree-loop-ivcanon -ftree-loop-linear -ftree-loop-optimize @gol [EMAIL PROTECTED] -ftree-pre -ftree-reassoc -ftree-salias @gol @@ -5888,6 +5889,9 @@ Compare the results of several data dependence analyzers. This option is used for debugging the data dependence analyzers. [EMAIL PROTECTED] -ftree-loop-fusion +Perform loop fusion. + @item -ftree-loop-distribution Perform loop distribution. This flag can improve cache performance on big loop bodies and allow further loop optimizations, like Index: tree-pass.h =================================================================== --- tree-pass.h (revision 134556) +++ tree-pass.h (working copy) @@ -287,6 +287,7 @@ extern struct gimple_opt_pass pass_empty_loop; extern struct gimple_opt_pass pass_record_bounds; extern struct gimple_opt_pass pass_if_conversion; +extern struct gimple_opt_pass pass_loop_fusion; extern struct gimple_opt_pass pass_loop_distribution; extern struct gimple_opt_pass pass_vectorize; extern struct gimple_opt_pass pass_complete_unroll; Index: tree-loop-fusion.c =================================================================== --- tree-loop-fusion.c (revision 0) +++ tree-loop-fusion.c (revision 0) @@ -0,0 +1,589 @@ +/* Loop fusion. */ + +/* This pass performs loop fusion: for example, the loops + + |loop_1 + | A[i] = ... + |endloop_1 + + + |loop_2 + | ... = A[i] + |endloop_2 + + that becomes after fusion: + + |loop_1 + | A[i] = ... + | ... = A[i] + |endloop_1 + +*/ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "ggc.h" +#include "tree.h" +#include "target.h" + +#include "rtl.h" +#include "basic-block.h" +#include "diagnostic.h" +#include "tree-flow.h" +#include "tree-dump.h" +#include "timevar.h" +#include "cfgloop.h" +#include "expr.h" +#include "optabs.h" +#include "tree-chrec.h" +#include "tree-data-ref.h" +#include "tree-scalar-evolution.h" +#include "tree-pass.h" +#include "lambda.h" +#include "langhooks.h" +#include "tree-vectorizer.h" + + + + +/* Returns true when a given loop is a parallel loop. */ + +static bool +is_parallel_loop (struct loop *loop) +{ + VEC (ddr_p, heap) * dependence_relations; + VEC (data_reference_p, heap) * datarefs; + lambda_trans_matrix trans; + bool ret = false; + + /* If the loop can be reversed, the iterations are independent. */ + datarefs = VEC_alloc (data_reference_p, heap, 10); + dependence_relations = VEC_alloc (ddr_p, heap, 10 * 10); + compute_data_dependences_for_loop (loop, true, &datarefs, &dependence_relations); + trans = lambda_trans_matrix_new (1, 1); + LTM_MATRIX (trans)[0][0] = -1; + + if (lambda_transform_legal_p (trans, 1, dependence_relations)) + { + ret = true; + } + + free_dependence_relations (dependence_relations); + free_data_refs (datarefs); + if (ret == true) + fprintf (dump_file, " loop %d is a parallel loop\n ", loop->num); + return ret; +} + +/* Returns true when a given loop is a sequential loop. */ + +static bool +is_sequential_loop (struct loop *loop) +{ + if (is_parallel_loop (loop)) + return false; + else + return true; +} + +/* Returns true if there is no fusion preventing constraint between two given loops. */ + +static bool +no_fusion_preventing_constraint (struct loop *loop_a, struct loop *loop_b) +{ + bool ret = true; + struct loop *father_loop; + father_loop = find_common_loop (loop_a, loop_b); + struct graph *rdg_father; + rdg_father = build_rdg (father_loop); + struct graph *rdg_a; + rdg_a = build_rdg (loop_a); + struct graph *rdg_b; + rdg_b = build_rdg (loop_b); + return ret; +} + +/* Returns true when two loops can be fused. */ + +static bool +can_fuse_loops (struct loop *loop_a, struct loop *loop_b) +{ + if ((is_parallel_loop (loop_a) && is_parallel_loop (loop_b) && no_fusion_preventing_constraint (loop_a, loop_b)) || + (is_sequential_loop (loop_a) && is_sequential_loop (loop_b) && no_fusion_preventing_constraint (loop_a, loop_b))) + { + fprintf (dump_file, " can fuse loops %d %d ", loop_a->num, loop_b->num); + return true; + } + + return false; +} + +/* The following function fuses two loops. */ + +void +fuse_loops2 (struct loop *loop_a, struct loop *loop_b) +{ + basic_block *bbs_a, *bbs_b; + struct loop *new_loop; + bbs_a = get_loop_body (loop_a); + bbs_b = get_loop_body (loop_b); + debug_loop (loop_a, 10); + debug_loop (loop_b, 10); + tree_merge_blocks (bbs_b[0], bbs_a[0]); + debug_loop (loop_a, 10); + debug_loop (loop_b, 10); + cancel_loop_tree (loop_a); + +} + + +/* The following function fuses two loops. */ + +void +fuse_loops (struct loop *loop_a, struct loop *loop_b) +{ + debug_loop (loop_a, 10); + debug_loop (loop_b, 10); + block_stmt_iterator bsi_a, bsi_a_last, bsi_b, bsi_b_last, bsi; + bsi_b_last = bsi_last (loop_b->header); + bsi_prev (&bsi_b_last); + tree phi, prev = NULL_TREE, next, use, def; + + /* Detects the iterator variable of loop_b. */ + for (phi = phi_nodes (loop_b->header); + phi;) + { + next = PHI_CHAIN (phi); + use = PHI_RESULT (phi); + phi = next; + } + + /* Detects the iterator variable of loop_a. */ + for (phi = phi_nodes (loop_a->header); + phi;) + { + next = PHI_CHAIN (phi); + def = PHI_RESULT (phi); + phi = next; + } + + /*for (phi = phi_nodes (loop_a->header); + phi;) + { + next = PHI_CHAIN (phi); + create_phi_node (SSA_NAME_VAR (PHI_RESULT (phi)), loop_b->header); + phi = next; + } */ + + /* Replaces the itaerator variable of loop_a with that of loop_b. */ + for (phi = phi_nodes (loop_a->header); + phi;) + { + next = PHI_CHAIN (phi); + replace_uses_by (def, use); + phi = next; + } + + /* Remove PHI nodes of loop_a. */ + for (phi = phi_nodes (loop_a->header); + phi;) + { + next = PHI_CHAIN (phi); + remove_phi_node (phi, prev, true); + phi = next; + } + + /* Transfer all the statements one by one. */ + for (bsi = bsi_start (loop_a->header); !bsi_end_p (bsi);) + { + if ((TREE_CODE (bsi_stmt (bsi)) != COND_EXPR) && (TREE_CODE (bsi_stmt (bsi)) != LABEL_EXPR)) + { + update_stmt (bsi_stmt (bsi)); + bsi_move_before (&bsi, &bsi_b_last); + fprintf (stderr, " transferred one statement. \n "); + fprintf (dump_file, " transferred one statement. \n "); + update_stmt (bsi_stmt (bsi)); + } + else + { + bsi_next (&bsi); + } + } + + debug_loop (loop_a, 10); + debug_loop (loop_b, 10); + update_ssa (TODO_update_ssa); + fprintf (stderr, "deleting loop %d \n", loop_a->num); + cancel_loop_tree (loop_a); + fprintf (stderr, "deleted loop %d \n", loop_a->num); +} + + +/* Returns true if loop_a is dependent on loop_b. */ + +static bool +is_dependent_on (struct loop *loop_a, struct loop *loop_b) +{ + bool ret = false; + struct loop *loop_father; + loop_father = find_common_loop (loop_a, loop_b); + + VEC (ddr_p, heap) * dependence_relations_a; + VEC (ddr_p, heap) * dependence_relations_b; + VEC (ddr_p, heap) * dependence_relations_father; + + VEC (data_reference_p, heap) * datarefs_a; + VEC (data_reference_p, heap) * datarefs_b; + VEC (data_reference_p, heap) * datarefs_father; + + datarefs_a = VEC_alloc (data_reference_p, heap, 10); + datarefs_b = VEC_alloc (data_reference_p, heap, 10); + datarefs_father = VEC_alloc (data_reference_p, heap, 10); + + dependence_relations_a = VEC_alloc (ddr_p, heap, 10 * 10); + dependence_relations_b = VEC_alloc (ddr_p, heap, 10 * 10); + dependence_relations_father = VEC_alloc (ddr_p, heap, 10 * 10); + + compute_data_dependences_for_loop (loop_a, true, &datarefs_a, &dependence_relations_a); + compute_data_dependences_for_loop (loop_b, true, &datarefs_b, &dependence_relations_b); + compute_data_dependences_for_loop (loop_father, true, &datarefs_father, &dependence_relations_father); + + debug_data_dependence_relations (dependence_relations_a); + debug_data_dependence_relations (dependence_relations_b); + debug_data_dependence_relations (dependence_relations_father); + + fprintf (dump_file, "dumping data dependences of a \n"); + dump_data_dependence_relations (dump_file, dependence_relations_a); + fprintf (dump_file, "dumping data dependences of b \n"); + dump_data_dependence_relations (dump_file, dependence_relations_b); + fprintf (dump_file, "dumping data dependences of father \n"); + dump_data_dependence_relations (dump_file, dependence_relations_father); + + free_dependence_relations (dependence_relations_a); + free_data_refs (datarefs_a); + free_dependence_relations (dependence_relations_b); + free_data_refs (datarefs_b); + free_dependence_relations (dependence_relations_father); + free_data_refs (datarefs_father); + + return true; +} + +/* Structure to hold the list of partitions. */ + +static struct partition +{ + int num; + int list[10]; + int present; + char type; +} ; + + +/* Create a graph original_graph. It's nodes are loops and it's edges are data dependencies. */ + +static struct graph * +create_original_graph (int num_loops) +{ + struct graph *g = new_graph (num_loops); + struct loop *loop; + loop_iterator li; + FOR_EACH_LOOP (li, loop, 0) + { + struct partition *p = XNEW (struct partition); + p->num = 0; + p->list[p->num] = loop->num; + p->num++; + p->present = 1; + + if (is_parallel_loop (loop)) + { + p->type = 'P'; + g->vertices[loop->num].data = p; + } + else + { + p->type = 'S'; + g->vertices[loop->num].data = p; + } + } + FOR_EACH_LOOP (li, loop, 0) + { + struct loop *loop_dummy; + loop_iterator li_dummy; + FOR_EACH_LOOP (li_dummy, loop_dummy, 0) + { + if (loop != loop_dummy) + { + if (is_dependent_on (loop, loop_dummy)) + { + if (no_fusion_preventing_constraint (loop, loop_dummy)) + { + struct graph_edge *ge = add_edge (g, loop_dummy->num, loop->num); + ge->data = "FPE"; + } + else + add_edge (g, loop_dummy->num, loop->num); + } + } + } + } + + + return g; +} + +/* Create a component graph from the original graph. The parameter t denotes type of the component graph - + parallel or sequential. */ + +static struct graph* +create_component_graph (char t, int num_loops) +{ + struct graph *og = create_original_graph (num_loops); + struct graph *g = new_graph (num_loops); + struct partition *p, *q, *r; + int i; + char opp; + if (t == 'P') + opp = 'S'; + else + opp = 'P'; + struct loop *loop; + loop_iterator li; + + + for (i = 1; i < num_loops; i++) + { + p = XNEW (struct partition); + p = og->vertices[i].data; + if (p->type == t) + { + q = XNEW (struct partition); + q->num = 0; + q->list[q->num] = i; + q->num++; + q->present = 1; + q->type = t; + g->vertices[i].data = q; + } + } + if (t == 'P') + FOR_EACH_LOOP (li, loop, 0) + { + struct loop *loop_dummy; + loop_iterator li_dummy; + FOR_EACH_LOOP (li_dummy, loop_dummy, 0) + { + if (loop != loop_dummy) + { + if (is_dependent_on (loop, loop_dummy)&& (is_parallel_loop (loop)) && (is_parallel_loop (loop_dummy))) + { + if (no_fusion_preventing_constraint (loop, loop_dummy)) + { + struct graph_edge *ge = add_edge (g, loop_dummy->num, loop->num); + ge->data = "FPE"; + } + else + add_edge (g, loop_dummy->num, loop->num); + } + } + } + } + if (t =='S') + FOR_EACH_LOOP (li, loop, 0) + { + struct loop *loop_dummy; + loop_iterator li_dummy; + FOR_EACH_LOOP (li_dummy, loop_dummy, 0) + { + if (loop != loop_dummy) + { + if (is_dependent_on (loop, loop_dummy)&& (is_sequential_loop (loop)) && (is_sequential_loop (loop_dummy))) + { + if (no_fusion_preventing_constraint (loop, loop_dummy)) + { + struct graph_edge *ge = add_edge (g, loop_dummy->num, loop->num); + ge->data = "FPE"; + } + else + add_edge (g, loop_dummy->num, loop->num); + } + } + } + } + + for (i = 1; i < num_loops; i++) + { + p = XNEW (struct partition); + p = og->vertices[i].data; + if (p->type == opp) + { + struct graph_edge *ges; + for (ges = og->vertices[i].pred; ges; ges = ges->pred_next) + { + int source = ges->src; + struct graph_edge *ged; + for (ged = og->vertices[i].succ; ged; ged = ged->succ_next) + { + int destin = ged->dest; + q = XNEW (struct partition); + q = og->vertices[source].data; + r = XNEW (struct partition); + r = og->vertices[destin].data; + if ((q->type == t) && (r->type == t) && (q->list[0] != r->list[0])) + { + struct graph_edge *ge = add_edge (g, source, destin); + ge->data = "FPE"; + } + } + } + } + } + return g; + +} + + +/* Partitions the graph according to rules. And fuse the loops corresponding to the partitions. */ + +static void +do_partitions (struct graph *g) +{ + int num = g->n_vertices; + int i; + struct partition *p, *q; + for (i = 1; i<num; i++) + { + p = XNEW (struct partition); + p = g->vertices[i].data; + struct graph_edge *ge; + if (p) + if (p->present == 1) + for (ge = g->vertices[i].pred; ge; ge = ge->pred_next) + { + q = XNEW (struct partition); + q = g->vertices[ge->src].data; + if (ge->data != "FPE") + { + p->list[p->num] = q->list[0]; + p->num++; + q->present = 0; + } + } + } + +} + +/* Performs loop fusion according to the given graph. */ + +static void +fuse_according_to_graph (struct graph *g) +{ + int num = g->n_vertices; + int i, j; + struct partition *p; + struct loop *loop; + loop_iterator li; + for (i = 1; i<num; i++) + { + p = XNEW (struct partition); + p = g->vertices[i].data; + if (p) + if (p->present == 1) + { + for (j = (p->num)-1; j > 0 ; j--) + { + FOR_EACH_LOOP (li, loop, 0) + { + if ((loop->num == p->list[j]) && (loop->next->num == p->list[j-1])) + fuse_loops (loop, loop->next); + } + } + } + } + +} + + +/* Performs legal loops fusion in the current function. */ + +static unsigned int +tree_loop_fusion (void) +{ + if (!current_loops) + return 0; + int num_loops = number_of_loops(); + struct loop *loop; + loop_iterator li; + + /* For all the loops in the program pick consecutive loops loop_a and loop_b. */ + FOR_EACH_LOOP (li, loop, 0) + { + if (loop->next) + { + //if (can_fuse_loops (loop, loop->next)) + { + /*if (is_dependent_on (loop, loop->next)) + fprintf (stderr, "loop %d is dependent on loop %d \n\n", loop->num, loop->next->num); + else + fprintf (stderr, "loop %d is not dependent on loop %d \n\n", loop->num, loop->next->num);*/ + //fuse_loops2 (loop, loop->next); + fuse_loops (loop, loop->next); + } + } + + } + + /* struct graph *original_graph_1 = new_graph (num_loops); + original_graph_1 = create_original_graph (num_loops); + dump_graph (dump_file, original_graph_1); + dump_graph (stderr, original_graph_1); + fprintf (dump_file, "printed the original_graph\n\n"); + fprintf (stderr, "printed the original_graph\n\n"); + struct graph *component_graph_parallel = create_component_graph ('P', num_loops); + dump_graph (dump_file, component_graph_parallel); + dump_graph (stderr, component_graph_parallel); + fprintf (dump_file, "printed the component parallel graph\n\n"); + fprintf (stderr, "printed the component parallel graph\n\n"); + dump_graph (dump_file, original_graph_1); + dump_graph (stderr, original_graph_1); + fprintf (dump_file, "printed the original_graph\n\n"); + fprintf (stderr, "printed the original_graph\n\n"); + do_partitions (component_graph_parallel); + dump_graph (dump_file, component_graph_parallel); + dump_graph (stderr, component_graph_parallel); + fprintf (dump_file, "printed the partitioned component parallel graph\n"); + fprintf (stderr, "printed the partitioned component parallel graph\n"); + fuse_according_to_graph (component_graph_parallel);*/ + + + return 0; +} + +static bool +gate_tree_loop_fusion (void) +{ + return flag_tree_loop_fusion != 0; +} + +struct gimple_opt_pass pass_loop_fusion = +{ + { + GIMPLE_PASS, + "lfusion", /* name */ + gate_tree_loop_fusion, /* gate */ + tree_loop_fusion, /* execute */ + NULL, /* sub */ + NULL, /* next */ + 0, /* static_pass_number */ + TV_TREE_LOOP_FUSION, /* tv_id */ + PROP_cfg | PROP_ssa, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + TODO_dump_func | TODO_verify_loops | TODO_update_ssa, /* todo_flags_finish */ + } +}; + Index: timevar.def =================================================================== --- timevar.def (revision 134556) +++ timevar.def (working copy) @@ -125,6 +125,7 @@ DEFTIMEVAR (TV_TREE_PARALLELIZE_LOOPS, "tree parallelize loops") DEFTIMEVAR (TV_TREE_VECTORIZATION , "tree vectorization") DEFTIMEVAR (TV_TREE_LINEAR_TRANSFORM , "tree loop linear") +DEFTIMEVAR (TV_TREE_LOOP_FUSION , "tree loop fusion") DEFTIMEVAR (TV_TREE_LOOP_DISTRIBUTION, "tree loop distribution") DEFTIMEVAR (TV_CHECK_DATA_DEPS , "tree check data dependences") DEFTIMEVAR (TV_TREE_PREFETCH , "tree prefetching") Index: common.opt =================================================================== --- common.opt (revision 134556) +++ common.opt (working copy) @@ -1115,6 +1115,10 @@ Common Report Var(flag_tree_fre) Optimization Enable Full Redundancy Elimination (FRE) on trees +ftree-loop-fusion +Common Report Var(flag_tree_loop_fusion) Optimization +Enable loop fusion on trees + ftree-loop-distribution Common Report Var(flag_tree_loop_distribution) Optimization Enable loop distribution on trees Index: Makefile.in =================================================================== --- Makefile.in (revision 134556) +++ Makefile.in (working copy) @@ -1147,6 +1147,7 @@ tree-if-conv.o \ tree-into-ssa.o \ tree-iterator.o \ + tree-loop-fusion.o \ tree-loop-distribution.o \ tree-loop-linear.o \ tree-nested.o \ @@ -2289,6 +2290,11 @@ $(DIAGNOSTIC_H) $(TREE_FLOW_H) $(TREE_DUMP_H) $(TIMEVAR_H) $(CFGLOOP_H) \ tree-pass.h $(TREE_DATA_REF_H) $(SCEV_H) $(EXPR_H) $(LAMBDA_H) \ $(TARGET_H) tree-chrec.h $(OBSTACK_H) +tree-loop-fusion.o: tree-loop-fusion.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \ + $(TM_H) $(GGC_H) $(OPTABS_H) $(TREE_H) $(RTL_H) $(BASIC_BLOCK_H) \ + $(DIAGNOSTIC_H) $(TREE_FLOW_H) $(TREE_DUMP_H) $(TIMEVAR_H) $(CFGLOOP_H) \ + tree-pass.h $(TREE_DATA_REF_H) $(SCEV_H) $(EXPR_H) \ + $(TARGET_H) tree-chrec.h tree-vectorizer.h tree-loop-distribution.o: tree-loop-distribution.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \ $(TM_H) $(GGC_H) $(OPTABS_H) $(TREE_H) $(RTL_H) $(BASIC_BLOCK_H) \ $(DIAGNOSTIC_H) $(TREE_FLOW_H) $(TREE_DUMP_H) $(TIMEVAR_H) $(CFGLOOP_H) \ Index: tree-cfg.c =================================================================== --- tree-cfg.c (revision 134556) +++ tree-cfg.c (working copy) @@ -104,7 +104,7 @@ static inline void change_bb_for_stmt (tree t, basic_block bb); /* Flowgraph optimization and cleanup. */ -static void tree_merge_blocks (basic_block, basic_block); +extern void tree_merge_blocks (basic_block, basic_block); static bool tree_can_merge_blocks_p (basic_block, basic_block); static void remove_bb (basic_block); static edge find_taken_edge_computed_goto (basic_block, tree); @@ -1277,7 +1277,7 @@ /* Merge block B into block A. */ -static void +void tree_merge_blocks (basic_block a, basic_block b) { block_stmt_iterator bsi; Index: passes.c =================================================================== --- passes.c (revision 134556) +++ passes.c (working copy) @@ -627,6 +627,7 @@ NEXT_PASS (pass_empty_loop); NEXT_PASS (pass_record_bounds); NEXT_PASS (pass_check_data_deps); + NEXT_PASS (pass_loop_fusion); NEXT_PASS (pass_loop_distribution); NEXT_PASS (pass_linear_transform); NEXT_PASS (pass_iv_canon);