This patch introduces a conservative loop unrolling heuristic for the
RISC-V backend.

The option is enabled by default at -O2 and above for speed-optimized
builds, together with -funroll-loops, so that small loops benefit from
unrolling without exposing larger loops to its costs.  When the user
explicitly passes -funroll-loops or -funroll-all-loops,
-munroll-only-small-loops is automatically disabled so that the full
unroller heuristics apply as before.

gcc/ChangeLog:

        * common/config/riscv/riscv-common.cc
        (riscv_option_optimization_table): Enable -funroll-loops and
        -munroll-only-small-loops at -O2 and above for speed.
        * config/riscv/riscv.cc (riscv_loop_unroll_adjust): New function.
        (riscv_option_override): Disable -munroll-only-small-loops when
        -funroll-loops or -funroll-all-loops is explicitly requested.
        (TARGET_LOOP_UNROLL_ADJUST): Define.
        * config/riscv/riscv.opt (munroll-only-small-loops): New option.

gcc/testsuite/ChangeLog:

        * gcc.target/riscv/unroll-small-loop.c: New test.
        * gcc.target/riscv/unroll-large-loop.c: New test.
        * gcc.target/riscv/unroll-explicit.c: New test.
---
 gcc/common/config/riscv/riscv-common.cc       |  4 +++
 gcc/config/riscv/riscv.cc                     | 27 +++++++++++++++++++
 gcc/config/riscv/riscv.opt                    |  4 +++
 .../gcc.target/riscv/unroll-explicit.c        | 21 +++++++++++++++
 .../gcc.target/riscv/unroll-large-loop.c      | 20 ++++++++++++++
 .../gcc.target/riscv/unroll-small-loop.c      | 22 +++++++++++++++
 6 files changed, 98 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/riscv/unroll-explicit.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/unroll-large-loop.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/unroll-small-loop.c

diff --git a/gcc/common/config/riscv/riscv-common.cc 
b/gcc/common/config/riscv/riscv-common.cc
index 74929381a06..cc6988caef4 100644
--- a/gcc/common/config/riscv/riscv-common.cc
+++ b/gcc/common/config/riscv/riscv-common.cc
@@ -2344,6 +2344,10 @@ static const struct default_options 
riscv_option_optimization_table[] =
     { OPT_LEVELS_ALL, OPT_fasynchronous_unwind_tables, NULL, 1 },
     { OPT_LEVELS_ALL, OPT_funwind_tables, NULL, 1},
 #endif
+    /* Turn on -funroll-loops with -munroll-only-small-loops to enable
+       small loop unrolling at -O2.  */
+    { OPT_LEVELS_2_PLUS_SPEED_ONLY, OPT_funroll_loops, NULL, 1 },
+    { OPT_LEVELS_2_PLUS_SPEED_ONLY, OPT_munroll_only_small_loops, NULL, 1 },
     { OPT_LEVELS_NONE, 0, NULL, 0 }
   };
 
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index cc1b8cd16a9..f2c3f8e6eff 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -5055,6 +5055,21 @@ riscv_insn_cost (rtx_insn *insn, bool speed)
   return cost;
 }
 
+/* Implement TARGET_LOOP_UNROLL_ADJUST.  */
+
+static unsigned
+riscv_loop_unroll_adjust (unsigned nunroll, class loop *loop)
+{
+  if (riscv_unroll_only_small_loops)
+    {
+      if (loop->ninsns <= 4)
+       return MIN (8, nunroll);
+      else
+       return 1;
+    }
+  return nunroll;
+}
+
 /* Implement TARGET_MAX_NOCE_IFCVT_SEQ_COST.  Like the default implementation,
    but we consider cost units of branch instructions equal to cost units of
    other instructions.  */
@@ -12139,6 +12154,16 @@ riscv_option_override (void)
 
   flag_pcc_struct_return = 0;
 
+  /* Explicit -funroll-loops or -funroll-all-loops turns
+     -munroll-only-small-loops off, allowing the unroller to handle
+     all loops without the conservative small-loop restriction.  */
+  if ((OPTION_SET_P (flag_unroll_loops) && flag_unroll_loops)
+      || (OPTION_SET_P (flag_unroll_all_loops) && flag_unroll_all_loops))
+    {
+      if (!OPTION_SET_P (riscv_unroll_only_small_loops))
+       riscv_unroll_only_small_loops = 0;
+    }
+
   if (flag_pic)
     g_switch_value = 0;
 
@@ -16364,6 +16389,8 @@ riscv_memtag_tag_bitsize ()
 #define TARGET_RTX_COSTS riscv_rtx_costs
 #undef TARGET_ADDRESS_COST
 #define TARGET_ADDRESS_COST riscv_address_cost
+#undef TARGET_LOOP_UNROLL_ADJUST
+#define TARGET_LOOP_UNROLL_ADJUST riscv_loop_unroll_adjust
 #undef TARGET_INSN_COST
 #define TARGET_INSN_COST riscv_insn_cost
 
diff --git a/gcc/config/riscv/riscv.opt b/gcc/config/riscv/riscv.opt
index c2670ad87b2..3226367bf8a 100644
--- a/gcc/config/riscv/riscv.opt
+++ b/gcc/config/riscv/riscv.opt
@@ -454,3 +454,7 @@ Enum(arcv_mpy_option) String(10c) Value(ARCV_MPY_OPTION_10C)
 mmpy-option=
 Target RejectNegative Joined Enum(arcv_mpy_option) Var(arcv_mpy_option) 
Init(ARCV_MPY_OPTION_2C)
 The type of MPY unit used by the RMX-100 core (to be used in combination with 
-mtune=arc-v-rmx-100-series) (default: 2c).
+
+munroll-only-small-loops
+Target Var(riscv_unroll_only_small_loops) Init(0) Save
+Enable conservative small loop unrolling.
diff --git a/gcc/testsuite/gcc.target/riscv/unroll-explicit.c 
b/gcc/testsuite/gcc.target/riscv/unroll-explicit.c
new file mode 100644
index 00000000000..b5e537d20c9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/unroll-explicit.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -funroll-loops -fdump-rtl-loop2_unroll-details" } */
+
+/* Verify that when the user explicitly passes -funroll-loops,
+   -munroll-only-small-loops is disabled and large loops can also
+   be unrolled.  */
+
+void
+large_loop_explicit (int *a, int *b, int *c, int *d, int n)
+{
+  int i;
+  for (i = 0; i < n; i++)
+    {
+      a[i] = b[i] + c[i];
+      d[i] = a[i] * b[i] - c[i];
+      b[i] = c[i] + d[i] + a[i];
+      c[i] = a[i] - d[i] + b[i];
+    }
+}
+
+/* { dg-final { scan-rtl-dump "Unrolled loop" "loop2_unroll" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/unroll-large-loop.c 
b/gcc/testsuite/gcc.target/riscv/unroll-large-loop.c
new file mode 100644
index 00000000000..ad470c34fdd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/unroll-large-loop.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-rtl-loop2_unroll-details" } */
+
+/* Verify that a large loop (4 or more insns in the body) is NOT unrolled
+   when -munroll-only-small-loops is in effect (the default at -O2).  */
+
+void
+large_loop (int *a, int *b, int *c, int *d, int n)
+{
+  int i;
+  for (i = 0; i < n; i++)
+    {
+      a[i] = b[i] + c[i];
+      d[i] = a[i] * b[i] - c[i];
+      b[i] = c[i] + d[i] + a[i];
+      c[i] = a[i] - d[i] + b[i];
+    }
+}
+
+/* { dg-final { scan-rtl-dump-not "Unrolled loop" "loop2_unroll" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/unroll-small-loop.c 
b/gcc/testsuite/gcc.target/riscv/unroll-small-loop.c
new file mode 100644
index 00000000000..49dcfa04e41
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/unroll-small-loop.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-rtl-loop2_unroll-details" } */
+
+/* Verify -munroll-only-small-loops (default ON at -O2) unrolls a small
+   loop.  The do-while form uses the counter itself as the induction
+   variable, so the RTL loop body collapses to roughly:
+     asm     ; <-- empty volatile asm (kept as one insn)
+     addi n, n, -1
+     bnez n, .L
+   giving loop->ninsns <= 4 and triggering the small-loop unroll path
+   in riscv_loop_unroll_adjust.  The empty volatile asm prevents the
+   loop from being deleted as dead code.  */
+
+void
+small_loop (int n)
+{
+  do
+    __asm__ volatile ("");
+  while (--n);
+}
+
+/* { dg-final { scan-rtl-dump "Unrolled loop" "loop2_unroll" } } */
-- 
2.39.5 (Apple Git-154)

Reply via email to