This is really more of a question than a patch.

Looking at PR/115687 I managed to convince myself there's a general
class of problems here: splitting might produce constant subexpressions,
but as far as I can tell there's nothing to eliminate those constant
subexpressions.  So I very quickly threw together a CSE that doesn't
fold expressions, and it does eliminate the high-part constants in
question.

At that point I realized the implementation here is bogus: it's not the
folding that's the problem, but introducing new expressions post-split
would break things -- or at least I think it would, we'd end up with
insns the backends don't expect to have that late.  I'm not sure if
split2 would end up cleaning all that up at a functional level, but it
certainly seems like optimization would be pretty far off the rails at
that point and thus doesn't seem like a good idea.  I'm also not sure
how effective this would be without doing the folding, as without
folding we can only eliminate the last insn in the constant sequence --
that's fine here, but it wouldn't work for more complicated stuff.

So I think if this was to go anywhere we'd want to have a CSE that
really only eliminates expressions (ie, doesn't do any of the other
juggling to try and produce more constant subexpressions).  There's a
few places where new expressions can be introduced, so it'd probably be
better done as a new cse_insn-type function instead of just a flag.  It
seems somewhat manageable to write, though.

That said, I really don't know what I'm doing here.  So I figured I'd
just send out what I'd put together, mostly as a way to ask if it's
worth putting time into this?
---
 gcc/common.opt  |   4 ++
 gcc/cse.cc      | 112 ++++++++++++++++++++++++++++++++++++++++++------
 gcc/opts.cc     |   1 +
 gcc/passes.def  |   1 +
 gcc/tree-pass.h |   1 +
 5 files changed, 105 insertions(+), 14 deletions(-)

diff --git a/gcc/common.opt b/gcc/common.opt
index 327230967ea..efc4b8ddaf3 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -2695,6 +2695,10 @@ frerun-cse-after-loop
 Common Var(flag_rerun_cse_after_loop) Optimization
 Add a common subexpression elimination pass after loop optimizations.
 
+frerun-cse-after-split
+Common Var(flag_rerun_cse_after_split) Optimization
+Add a common subexpression elimination pass after splitting instructions.
+
 frerun-loop-opt
 Common Ignore
 Does nothing.  Preserved for backward compatibility.
diff --git a/gcc/cse.cc b/gcc/cse.cc
index c53deecbe54..d3955001ce7 100644
--- a/gcc/cse.cc
+++ b/gcc/cse.cc
@@ -543,11 +543,11 @@ static rtx fold_rtx (rtx, rtx_insn *);
 static rtx equiv_constant (rtx);
 static void record_jump_equiv (rtx_insn *, bool);
 static void record_jump_cond (enum rtx_code, machine_mode, rtx, rtx);
-static void cse_insn (rtx_insn *);
+static void cse_insn (rtx_insn *, int);
 static void cse_prescan_path (struct cse_basic_block_data *);
 static void invalidate_from_clobbers (rtx_insn *);
 static void invalidate_from_sets_and_clobbers (rtx_insn *);
-static void cse_extended_basic_block (struct cse_basic_block_data *);
+static void cse_extended_basic_block (struct cse_basic_block_data *, int);
 extern void dump_class (struct table_elt*);
 static void get_cse_reg_info_1 (unsigned int regno);
 static struct cse_reg_info * get_cse_reg_info (unsigned int regno);
@@ -4511,12 +4511,13 @@ canonicalize_insn (rtx_insn *insn, vec<struct set> 
*psets)
 
 /* Main function of CSE.
    First simplify sources and addresses of all assignments
-   in the instruction, using previously-computed equivalents values.
+   in the instruction, using previously-computed equivalents values when
+   simplification is allowed.
    Then install the new sources and destinations in the table
    of available values.  */
 
 static void
-cse_insn (rtx_insn *insn)
+cse_insn (rtx_insn *insn, int simplify)
 {
   rtx x = PATTERN (insn);
   int i;
@@ -4662,9 +4663,15 @@ cse_insn (rtx_insn *insn)
       else
        src_eqv_here = src_eqv;
 
-      /* Simplify and foldable subexpressions in SRC.  Then get the fully-
-        simplified result, which may not necessarily be valid.  */
-      src_folded = fold_rtx (src, NULL);
+      /* If simplification is enabled, then simplify and foldable
+        subexpressions in SRC.  Then get the fully-simplified result, which
+        may not necessarily be valid.
+
+        Otherwise, just leave SRC alone.  */
+      if (simplify)
+       src_folded = fold_rtx (src, NULL);
+      else
+       src_folded = src;
 
 #if 0
       /* ??? This caused bad code to be generated for the m68k port with -O2.
@@ -6504,7 +6511,7 @@ check_for_label_ref (rtx_insn *insn)
 /* Process a single extended basic block described by EBB_DATA.  */
 
 static void
-cse_extended_basic_block (struct cse_basic_block_data *ebb_data)
+cse_extended_basic_block (struct cse_basic_block_data *ebb_data, int simplify)
 {
   int path_size = ebb_data->path_size;
   int path_entry;
@@ -6571,7 +6578,7 @@ cse_extended_basic_block (struct cse_basic_block_data 
*ebb_data)
              if (changed)
                df_notes_rescan (insn);
 
-             cse_insn (insn);
+             cse_insn (insn, simplify);
 
              /* If we haven't already found an insn where we added a LABEL_REF,
                 check this one.  */
@@ -6637,6 +6644,7 @@ cse_extended_basic_block (struct cse_basic_block_data 
*ebb_data)
 /* Perform cse on the instructions of a function.
    F is the first instruction.
    NREGS is one plus the highest pseudo-reg number used in the instruction.
+   SIMPLIFY determines whether simplification should be performed.
 
    Return 2 if jump optimizations should be redone due to simplifications
    in conditional jump instructions.
@@ -6644,7 +6652,7 @@ cse_extended_basic_block (struct cse_basic_block_data 
*ebb_data)
    Return 0 otherwise.  */
 
 static int
-cse_main (rtx_insn *f ATTRIBUTE_UNUSED, int nregs)
+cse_main (rtx_insn *f ATTRIBUTE_UNUSED, int nregs, int simplify)
 {
   struct cse_basic_block_data ebb_data;
   basic_block bb;
@@ -6716,7 +6724,7 @@ cse_main (rtx_insn *f ATTRIBUTE_UNUSED, int nregs)
          if (dump_file)
            cse_dump_path (&ebb_data, ebb_data.nsets, dump_file);
 
-         cse_extended_basic_block (&ebb_data);
+         cse_extended_basic_block (&ebb_data, simplify);
        }
     }
 
@@ -7546,7 +7554,7 @@ rest_of_handle_cse (void)
   if (dump_file)
     dump_flow_info (dump_file, dump_flags);
 
-  tem = cse_main (get_insns (), max_reg_num ());
+  tem = cse_main (get_insns (), max_reg_num (), 1);
 
   /* If we are not running more CSE passes, then we are no longer
      expecting CSE to be run.  But always rerun it in a cheap mode.  */
@@ -7614,7 +7622,7 @@ rest_of_handle_cse2 (void)
   if (dump_file)
     dump_flow_info (dump_file, dump_flags);
 
-  tem = cse_main (get_insns (), max_reg_num ());
+  tem = cse_main (get_insns (), max_reg_num (), 1);
 
   /* Run a pass to eliminate duplicated assignments to condition code
      registers.  We have to run this after bypass_jumps, because it
@@ -7694,7 +7702,7 @@ rest_of_handle_cse_after_global_opts (void)
   flag_cse_follow_jumps = 0;
 
   rebuild_jump_labels (get_insns ());
-  tem = cse_main (get_insns (), max_reg_num ());
+  tem = cse_main (get_insns (), max_reg_num (), 1);
   cse_cfg_altered |= purge_all_dead_edges ();
   delete_trivially_dead_insns (get_insns (), max_reg_num ());
 
@@ -7757,3 +7765,79 @@ make_pass_cse_after_global_opts (gcc::context *ctxt)
 {
   return new pass_cse_after_global_opts (ctxt);
 }
+
+/* Run simplified CSE pass after splitting.  */
+static unsigned int
+rest_of_handle_cse_after_split (void)
+{
+  int save_cfj;
+  int tem;
+
+  /* We only want to do local CSE, so don't follow jumps.  */
+  save_cfj = flag_cse_follow_jumps;
+  flag_cse_follow_jumps = 0;
+
+  rebuild_jump_labels (get_insns ());
+  tem = cse_main (get_insns (), max_reg_num (), 0);
+  cse_cfg_altered |= purge_all_dead_edges ();
+  delete_trivially_dead_insns (get_insns (), max_reg_num ());
+
+  cse_not_expected = !flag_rerun_cse_after_split;
+
+  /* If cse altered any jumps, rerun jump opts to clean things up.  */
+  if (tem == 2)
+    {
+      timevar_push (TV_JUMP);
+      rebuild_jump_labels (get_insns ());
+      cse_cfg_altered |= cleanup_cfg (CLEANUP_CFG_CHANGED);
+      timevar_pop (TV_JUMP);
+    }
+  else if (tem == 1 || cse_cfg_altered)
+    cse_cfg_altered |= cleanup_cfg (0);
+
+  flag_cse_follow_jumps = save_cfj;
+  return 0;
+}
+
+namespace {
+
+const pass_data pass_data_cse_after_split =
+{
+  RTL_PASS, /* type */
+  "cse_after_split", /* name */
+  OPTGROUP_NONE, /* optinfo_flags */
+  TV_CSE, /* tv_id */
+  0, /* properties_required */
+  0, /* properties_provided */
+  0, /* properties_destroyed */
+  0, /* todo_flags_start */
+  TODO_df_finish, /* todo_flags_finish */
+};
+
+class pass_cse_after_split : public rtl_opt_pass
+{
+public:
+  pass_cse_after_split (gcc::context *ctxt)
+    : rtl_opt_pass (pass_data_cse_after_split, ctxt)
+  {}
+
+  /* opt_pass methods: */
+  bool gate (function *) final override
+    {
+      return optimize > 0 && flag_rerun_cse_after_split;
+    }
+
+  unsigned int execute (function *) final override
+    {
+      return rest_of_handle_cse_after_split ();
+    }
+
+}; // class pass_cse_after_split
+
+} // anon namespace
+
+rtl_opt_pass *
+make_pass_cse_after_split (gcc::context *ctxt)
+{
+  return new pass_cse_after_split (ctxt);
+}
diff --git a/gcc/opts.cc b/gcc/opts.cc
index 915bce88fd6..85416c09dfd 100644
--- a/gcc/opts.cc
+++ b/gcc/opts.cc
@@ -651,6 +651,7 @@ static const struct default_options default_options_table[] 
=
     { OPT_LEVELS_2_PLUS, OPT_fpeephole2, NULL, 1 },
     { OPT_LEVELS_2_PLUS, OPT_freorder_functions, NULL, 1 },
     { OPT_LEVELS_2_PLUS, OPT_frerun_cse_after_loop, NULL, 1 },
+    { OPT_LEVELS_2_PLUS, OPT_frerun_cse_after_split, NULL, 1 },
 #ifdef INSN_SCHEDULING
     { OPT_LEVELS_2_PLUS, OPT_fschedule_insns2, NULL, 1 },
 #endif
diff --git a/gcc/passes.def b/gcc/passes.def
index 13c9dc34ddf..c6f543ad370 100644
--- a/gcc/passes.def
+++ b/gcc/passes.def
@@ -501,6 +501,7 @@ along with GCC; see the file COPYING3.  If not see
       NEXT_PASS (pass_split_all_insns);
       NEXT_PASS (pass_lower_subreg3);
       NEXT_PASS (pass_df_initialize_no_opt);
+      NEXT_PASS (pass_cse_after_split);
       NEXT_PASS (pass_stack_ptr_mod);
       NEXT_PASS (pass_mode_switching);
       NEXT_PASS (pass_match_asm_constraints);
diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h
index 38902b1b01b..12b92385fa6 100644
--- a/gcc/tree-pass.h
+++ b/gcc/tree-pass.h
@@ -603,6 +603,7 @@ extern rtl_opt_pass *make_pass_match_asm_constraints 
(gcc::context *ctxt);
 extern rtl_opt_pass *make_pass_split_all_insns (gcc::context *ctxt);
 extern rtl_opt_pass *make_pass_fast_rtl_byte_dce (gcc::context *ctxt);
 extern rtl_opt_pass *make_pass_lower_subreg3 (gcc::context *ctxt);
+extern rtl_opt_pass *make_pass_cse_after_split (gcc::context *ctxt);
 extern rtl_opt_pass *make_pass_mode_switching (gcc::context *ctxt);
 extern rtl_opt_pass *make_pass_sms (gcc::context *ctxt);
 extern rtl_opt_pass *make_pass_sched (gcc::context *ctxt);
-- 
2.45.1

Reply via email to