This is really more of a question than a patch. Looking at PR/115687 I managed to convince myself there's a general class of problems here: splitting might produce constant subexpressions, but as far as I can tell there's nothing to eliminate those constant subexpressions. So I very quickly threw together a CSE that doesn't fold expressions, and it does eliminate the high-part constants in question.
At that point I realized the implementation here is bogus: it's not the folding that's the problem, but introducing new expressions post-split would break things -- or at least I think it would, we'd end up with insns the backends don't expect to have that late. I'm not sure if split2 would end up cleaning all that up at a functional level, but it certainly seems like optimization would be pretty far off the rails at that point and thus doesn't seem like a good idea. I'm also not sure how effective this would be without doing the folding, as without folding we can only eliminate the last insn in the constant sequence -- that's fine here, but it wouldn't work for more complicated stuff. So I think if this was to go anywhere we'd want to have a CSE that really only eliminates expressions (ie, doesn't do any of the other juggling to try and produce more constant subexpressions). There's a few places where new expressions can be introduced, so it'd probably be better done as a new cse_insn-type function instead of just a flag. It seems somewhat manageable to write, though. That said, I really don't know what I'm doing here. So I figured I'd just send out what I'd put together, mostly as a way to ask if it's worth putting time into this? --- gcc/common.opt | 4 ++ gcc/cse.cc | 112 ++++++++++++++++++++++++++++++++++++++++++------ gcc/opts.cc | 1 + gcc/passes.def | 1 + gcc/tree-pass.h | 1 + 5 files changed, 105 insertions(+), 14 deletions(-) diff --git a/gcc/common.opt b/gcc/common.opt index 327230967ea..efc4b8ddaf3 100644 --- a/gcc/common.opt +++ b/gcc/common.opt @@ -2695,6 +2695,10 @@ frerun-cse-after-loop Common Var(flag_rerun_cse_after_loop) Optimization Add a common subexpression elimination pass after loop optimizations. +frerun-cse-after-split +Common Var(flag_rerun_cse_after_split) Optimization +Add a common subexpression elimination pass after splitting instructions. + frerun-loop-opt Common Ignore Does nothing. Preserved for backward compatibility. diff --git a/gcc/cse.cc b/gcc/cse.cc index c53deecbe54..d3955001ce7 100644 --- a/gcc/cse.cc +++ b/gcc/cse.cc @@ -543,11 +543,11 @@ static rtx fold_rtx (rtx, rtx_insn *); static rtx equiv_constant (rtx); static void record_jump_equiv (rtx_insn *, bool); static void record_jump_cond (enum rtx_code, machine_mode, rtx, rtx); -static void cse_insn (rtx_insn *); +static void cse_insn (rtx_insn *, int); static void cse_prescan_path (struct cse_basic_block_data *); static void invalidate_from_clobbers (rtx_insn *); static void invalidate_from_sets_and_clobbers (rtx_insn *); -static void cse_extended_basic_block (struct cse_basic_block_data *); +static void cse_extended_basic_block (struct cse_basic_block_data *, int); extern void dump_class (struct table_elt*); static void get_cse_reg_info_1 (unsigned int regno); static struct cse_reg_info * get_cse_reg_info (unsigned int regno); @@ -4511,12 +4511,13 @@ canonicalize_insn (rtx_insn *insn, vec<struct set> *psets) /* Main function of CSE. First simplify sources and addresses of all assignments - in the instruction, using previously-computed equivalents values. + in the instruction, using previously-computed equivalents values when + simplification is allowed. Then install the new sources and destinations in the table of available values. */ static void -cse_insn (rtx_insn *insn) +cse_insn (rtx_insn *insn, int simplify) { rtx x = PATTERN (insn); int i; @@ -4662,9 +4663,15 @@ cse_insn (rtx_insn *insn) else src_eqv_here = src_eqv; - /* Simplify and foldable subexpressions in SRC. Then get the fully- - simplified result, which may not necessarily be valid. */ - src_folded = fold_rtx (src, NULL); + /* If simplification is enabled, then simplify and foldable + subexpressions in SRC. Then get the fully-simplified result, which + may not necessarily be valid. + + Otherwise, just leave SRC alone. */ + if (simplify) + src_folded = fold_rtx (src, NULL); + else + src_folded = src; #if 0 /* ??? This caused bad code to be generated for the m68k port with -O2. @@ -6504,7 +6511,7 @@ check_for_label_ref (rtx_insn *insn) /* Process a single extended basic block described by EBB_DATA. */ static void -cse_extended_basic_block (struct cse_basic_block_data *ebb_data) +cse_extended_basic_block (struct cse_basic_block_data *ebb_data, int simplify) { int path_size = ebb_data->path_size; int path_entry; @@ -6571,7 +6578,7 @@ cse_extended_basic_block (struct cse_basic_block_data *ebb_data) if (changed) df_notes_rescan (insn); - cse_insn (insn); + cse_insn (insn, simplify); /* If we haven't already found an insn where we added a LABEL_REF, check this one. */ @@ -6637,6 +6644,7 @@ cse_extended_basic_block (struct cse_basic_block_data *ebb_data) /* Perform cse on the instructions of a function. F is the first instruction. NREGS is one plus the highest pseudo-reg number used in the instruction. + SIMPLIFY determines whether simplification should be performed. Return 2 if jump optimizations should be redone due to simplifications in conditional jump instructions. @@ -6644,7 +6652,7 @@ cse_extended_basic_block (struct cse_basic_block_data *ebb_data) Return 0 otherwise. */ static int -cse_main (rtx_insn *f ATTRIBUTE_UNUSED, int nregs) +cse_main (rtx_insn *f ATTRIBUTE_UNUSED, int nregs, int simplify) { struct cse_basic_block_data ebb_data; basic_block bb; @@ -6716,7 +6724,7 @@ cse_main (rtx_insn *f ATTRIBUTE_UNUSED, int nregs) if (dump_file) cse_dump_path (&ebb_data, ebb_data.nsets, dump_file); - cse_extended_basic_block (&ebb_data); + cse_extended_basic_block (&ebb_data, simplify); } } @@ -7546,7 +7554,7 @@ rest_of_handle_cse (void) if (dump_file) dump_flow_info (dump_file, dump_flags); - tem = cse_main (get_insns (), max_reg_num ()); + tem = cse_main (get_insns (), max_reg_num (), 1); /* If we are not running more CSE passes, then we are no longer expecting CSE to be run. But always rerun it in a cheap mode. */ @@ -7614,7 +7622,7 @@ rest_of_handle_cse2 (void) if (dump_file) dump_flow_info (dump_file, dump_flags); - tem = cse_main (get_insns (), max_reg_num ()); + tem = cse_main (get_insns (), max_reg_num (), 1); /* Run a pass to eliminate duplicated assignments to condition code registers. We have to run this after bypass_jumps, because it @@ -7694,7 +7702,7 @@ rest_of_handle_cse_after_global_opts (void) flag_cse_follow_jumps = 0; rebuild_jump_labels (get_insns ()); - tem = cse_main (get_insns (), max_reg_num ()); + tem = cse_main (get_insns (), max_reg_num (), 1); cse_cfg_altered |= purge_all_dead_edges (); delete_trivially_dead_insns (get_insns (), max_reg_num ()); @@ -7757,3 +7765,79 @@ make_pass_cse_after_global_opts (gcc::context *ctxt) { return new pass_cse_after_global_opts (ctxt); } + +/* Run simplified CSE pass after splitting. */ +static unsigned int +rest_of_handle_cse_after_split (void) +{ + int save_cfj; + int tem; + + /* We only want to do local CSE, so don't follow jumps. */ + save_cfj = flag_cse_follow_jumps; + flag_cse_follow_jumps = 0; + + rebuild_jump_labels (get_insns ()); + tem = cse_main (get_insns (), max_reg_num (), 0); + cse_cfg_altered |= purge_all_dead_edges (); + delete_trivially_dead_insns (get_insns (), max_reg_num ()); + + cse_not_expected = !flag_rerun_cse_after_split; + + /* If cse altered any jumps, rerun jump opts to clean things up. */ + if (tem == 2) + { + timevar_push (TV_JUMP); + rebuild_jump_labels (get_insns ()); + cse_cfg_altered |= cleanup_cfg (CLEANUP_CFG_CHANGED); + timevar_pop (TV_JUMP); + } + else if (tem == 1 || cse_cfg_altered) + cse_cfg_altered |= cleanup_cfg (0); + + flag_cse_follow_jumps = save_cfj; + return 0; +} + +namespace { + +const pass_data pass_data_cse_after_split = +{ + RTL_PASS, /* type */ + "cse_after_split", /* name */ + OPTGROUP_NONE, /* optinfo_flags */ + TV_CSE, /* tv_id */ + 0, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + TODO_df_finish, /* todo_flags_finish */ +}; + +class pass_cse_after_split : public rtl_opt_pass +{ +public: + pass_cse_after_split (gcc::context *ctxt) + : rtl_opt_pass (pass_data_cse_after_split, ctxt) + {} + + /* opt_pass methods: */ + bool gate (function *) final override + { + return optimize > 0 && flag_rerun_cse_after_split; + } + + unsigned int execute (function *) final override + { + return rest_of_handle_cse_after_split (); + } + +}; // class pass_cse_after_split + +} // anon namespace + +rtl_opt_pass * +make_pass_cse_after_split (gcc::context *ctxt) +{ + return new pass_cse_after_split (ctxt); +} diff --git a/gcc/opts.cc b/gcc/opts.cc index 915bce88fd6..85416c09dfd 100644 --- a/gcc/opts.cc +++ b/gcc/opts.cc @@ -651,6 +651,7 @@ static const struct default_options default_options_table[] = { OPT_LEVELS_2_PLUS, OPT_fpeephole2, NULL, 1 }, { OPT_LEVELS_2_PLUS, OPT_freorder_functions, NULL, 1 }, { OPT_LEVELS_2_PLUS, OPT_frerun_cse_after_loop, NULL, 1 }, + { OPT_LEVELS_2_PLUS, OPT_frerun_cse_after_split, NULL, 1 }, #ifdef INSN_SCHEDULING { OPT_LEVELS_2_PLUS, OPT_fschedule_insns2, NULL, 1 }, #endif diff --git a/gcc/passes.def b/gcc/passes.def index 13c9dc34ddf..c6f543ad370 100644 --- a/gcc/passes.def +++ b/gcc/passes.def @@ -501,6 +501,7 @@ along with GCC; see the file COPYING3. If not see NEXT_PASS (pass_split_all_insns); NEXT_PASS (pass_lower_subreg3); NEXT_PASS (pass_df_initialize_no_opt); + NEXT_PASS (pass_cse_after_split); NEXT_PASS (pass_stack_ptr_mod); NEXT_PASS (pass_mode_switching); NEXT_PASS (pass_match_asm_constraints); diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h index 38902b1b01b..12b92385fa6 100644 --- a/gcc/tree-pass.h +++ b/gcc/tree-pass.h @@ -603,6 +603,7 @@ extern rtl_opt_pass *make_pass_match_asm_constraints (gcc::context *ctxt); extern rtl_opt_pass *make_pass_split_all_insns (gcc::context *ctxt); extern rtl_opt_pass *make_pass_fast_rtl_byte_dce (gcc::context *ctxt); extern rtl_opt_pass *make_pass_lower_subreg3 (gcc::context *ctxt); +extern rtl_opt_pass *make_pass_cse_after_split (gcc::context *ctxt); extern rtl_opt_pass *make_pass_mode_switching (gcc::context *ctxt); extern rtl_opt_pass *make_pass_sms (gcc::context *ctxt); extern rtl_opt_pass *make_pass_sched (gcc::context *ctxt); -- 2.45.1