Insn seqs before sched:

.L1:
a5 = insn-1 (a0)
a6 = insn-2 (a1)
a7 = insn-3 (a7, a5)
a8 = insn-4 (a8, a6)
Jmp .L1

Insn-3 & insn-4 is REG_DEP_TRUE of insn-1 & insn-2,
so insn-3 & insn-4 will be as the last of ready list.
And this patch will put 0 cost dependency due to a bypass
as highest numbered class also if some target have forward
feature between DEP_PRO and DEP_CON.

if the insns are in the same cost class on -fsched-last-insn-heuristic,
And then, go to "prefer the insn which has more later insns that depend on it",
return from dep_list_size() is not satisfied, it includes all dependence of 
insn.
We need to ignore the ones that have a 0 cost dependency due to a bypass.

With this patch and pipeline description as below:

(define_bypass 0 "insn-1, insn-2" "insn-3, insn-4")

We can get better insn seqs after sched:

.L1:
a5 = insn-1 (a0)
a7 = insn-3 (a7, a5)
a6 = insn-2 (a1)
a8 = insn-4 (a8, a6)
Jmp .L1

I have tested on ck860 of C-SKY arch and C960 of T-Head based on RISCV arch

        gcc/
        * haifa-sched.c (dep_list_costs): New.
        (rank_for_schedule): Replace dep_list_size with dep_list_costs.
        Add 0 cost dependency due to bypass on -fsched-last-insn-heuristic.

---
 gcc/haifa-sched.c | 49 +++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 45 insertions(+), 4 deletions(-)

diff --git a/gcc/haifa-sched.c b/gcc/haifa-sched.c
index 350178c82b8..51c6d23d3a5 100644
--- a/gcc/haifa-sched.c
+++ b/gcc/haifa-sched.c
@@ -1584,6 +1584,44 @@ dep_list_size (rtx_insn *insn, sd_list_types_def list)
   return nodbgcount;
 }
 
+/* Get the bypass cost of dependence DEP.  */
+
+HAIFA_INLINE static int
+dep_cost_bypass(dep_t dep)
+{
+  if (dep == NULL)
+    return -1;
+
+  if (INSN_CODE (DEP_PRO (dep)) >= 0
+      && bypass_p (DEP_PRO (dep))
+      && recog_memoized (DEP_CON (dep)) >= 0)
+    return dep_cost (dep);
+
+  return -1;
+}
+
+/* Compute the costs of nondebug deps in list LIST for INSN.  */
+
+static int
+dep_list_costs (rtx_insn *insn, sd_list_types_def list)
+{
+  sd_iterator_def sd_it;
+  dep_t dep;
+  int costs = 0;
+
+  FOR_EACH_DEP (insn, list, sd_it, dep)
+    {
+      if (!DEBUG_INSN_P (DEP_CON (dep))
+         && !DEBUG_INSN_P (DEP_PRO (dep)))
+       {
+         if (dep_cost_bypass (dep) != 0)
+           costs++;
+       }
+    }
+
+  return costs;
+}
+
 bool sched_fusion;
 
 /* Compute the priority number for INSN.  */
@@ -2758,10 +2796,12 @@ rank_for_schedule (const void *x, const void *y)
          1) Data dependent on last schedule insn.
          2) Anti/Output dependent on last scheduled insn.
          3) Independent of last scheduled insn, or has latency of one.
+         4) bypass of last scheduled insn, and has latency of zero.
          Choose the insn from the highest numbered class if different.  */
       dep1 = sd_find_dep_between (last, tmp, true);
 
-      if (dep1 == NULL || dep_cost (dep1) == 1)
+      if (dep1 == NULL || dep_cost (dep1) == 1
+         || (dep_cost_bypass (dep1) == 0))
        tmp_class = 3;
       else if (/* Data dependence.  */
               DEP_TYPE (dep1) == REG_DEP_TRUE)
@@ -2771,7 +2811,8 @@ rank_for_schedule (const void *x, const void *y)
 
       dep2 = sd_find_dep_between (last, tmp2, true);
 
-      if (dep2 == NULL || dep_cost (dep2)  == 1)
+      if (dep2 == NULL || dep_cost (dep2)  == 1
+         || (dep_cost_bypass (dep2) == 0))
        tmp2_class = 3;
       else if (/* Data dependence.  */
               DEP_TYPE (dep2) == REG_DEP_TRUE)
@@ -2795,8 +2836,8 @@ rank_for_schedule (const void *x, const void *y)
      This gives the scheduler more freedom when scheduling later
      instructions at the expense of added register pressure.  */
 
-  val = (dep_list_size (tmp2, SD_LIST_FORW)
-        - dep_list_size (tmp, SD_LIST_FORW));
+  val = (dep_list_costs (tmp2, SD_LIST_FORW)
+        - dep_list_costs (tmp, SD_LIST_FORW));
 
   if (flag_sched_dep_count_heuristic && val != 0)
     return rfs_result (RFS_DEP_COUNT, val, tmp, tmp2);
-- 
2.24.3 (Apple Git-128)

Reply via email to