On 7/22/21 15:41, Christoph Muellner via Gcc-patches wrote:
This patch enables the overlap-by-pieces feature of the by-pieces
infrastructure for inlining builtins in case the target has set
riscv_slow_unaligned_access_p to false.

An example to demonstrate the effect for targets with fast unaligned
access (target's that have slow_unaligned_access set to false) is
the code that is generated for "memset (p, 0, 15);", where the
alignment of p is unknown:

   Without overlap_op_by_pieces we get:
     8e:   00053023                sd      zero,0(a0)
     92:   00052423                sw      zero,8(a0)
     96:   00051623                sh      zero,12(a0)
     9a:   00050723                sb      zero,14(a0)

   With overlap_op_by_pieces we get:
     7e:   00053023                sd      zero,0(a0)
     82:   000533a3                sd      zero,7(a0)

gcc/ChangeLog:

        * config/riscv/riscv.c (riscv_overlap_op_by_pieces): New function.
        (TARGET_OVERLAP_OP_BY_PIECES_P): Connect to
        riscv_overlap_op_by_pieces.

gcc/testsuite/ChangeLog:

        * gcc.target/riscv/builtins-overlap-1.c: New test.
        * gcc.target/riscv/builtins-overlap-2.c: New test.
        * gcc.target/riscv/builtins-overlap-3.c: New test.
        * gcc.target/riscv/builtins-overlap-4.c: New test.
        * gcc.target/riscv/builtins-overlap-5.c: New test.
        * gcc.target/riscv/builtins-overlap-6.c: New test.
        * gcc.target/riscv/builtins-overlap-7.c: New test.
        * gcc.target/riscv/builtins-overlap-8.c: New test.
        * gcc.target/riscv/builtins-strict-align.c: New test.
        * gcc.target/riscv/builtins.h: New test.

Signed-off-by: Christoph Muellner <cmuell...@gcc.gnu.org>

Ping, IMO this needs to be (re)considered for trunk.
This goes really nicely with riscv_slow_unaligned_access_p==false, to elide the unrolled tail copies for trailer word/sword/byte accesses.

@Kito, @Palmer ? Just from codegen pov this seems to be a no brainer

foo:
    sd    zero,0(a0)
    sw    zero,8(a0)
    sh    zero,12(a0)
    sb    zero,14(a0)

vs.

    sd    zero,0(a0)
    sd    zero,7(a0)

-Vineet

---
  gcc/config/riscv/riscv.c                         | 11 +++++++++++
  .../gcc.target/riscv/builtins-overlap-1.c        | 10 ++++++++++
  .../gcc.target/riscv/builtins-overlap-2.c        | 10 ++++++++++
  .../gcc.target/riscv/builtins-overlap-3.c        | 10 ++++++++++
  .../gcc.target/riscv/builtins-overlap-4.c        | 10 ++++++++++
  .../gcc.target/riscv/builtins-overlap-5.c        | 11 +++++++++++
  .../gcc.target/riscv/builtins-overlap-6.c        | 13 +++++++++++++
  .../gcc.target/riscv/builtins-overlap-7.c        | 11 +++++++++++
  .../gcc.target/riscv/builtins-overlap-8.c        | 11 +++++++++++
  .../gcc.target/riscv/builtins-strict-align.c     | 10 ++++++++++
  gcc/testsuite/gcc.target/riscv/builtins.h        | 16 ++++++++++++++++
  11 files changed, 123 insertions(+)
  create mode 100644 gcc/testsuite/gcc.target/riscv/builtins-overlap-1.c
  create mode 100644 gcc/testsuite/gcc.target/riscv/builtins-overlap-2.c
  create mode 100644 gcc/testsuite/gcc.target/riscv/builtins-overlap-3.c
  create mode 100644 gcc/testsuite/gcc.target/riscv/builtins-overlap-4.c
  create mode 100644 gcc/testsuite/gcc.target/riscv/builtins-overlap-5.c
  create mode 100644 gcc/testsuite/gcc.target/riscv/builtins-overlap-6.c
  create mode 100644 gcc/testsuite/gcc.target/riscv/builtins-overlap-7.c
  create mode 100644 gcc/testsuite/gcc.target/riscv/builtins-overlap-8.c
  create mode 100644 gcc/testsuite/gcc.target/riscv/builtins-strict-align.c
  create mode 100644 gcc/testsuite/gcc.target/riscv/builtins.h

diff --git a/gcc/config/riscv/riscv.c b/gcc/config/riscv/riscv.c
index 576960bb37c..98c76ba657a 100644
--- a/gcc/config/riscv/riscv.c
+++ b/gcc/config/riscv/riscv.c
@@ -5201,6 +5201,14 @@ riscv_slow_unaligned_access (machine_mode, unsigned int)
    return riscv_slow_unaligned_access_p;
  }
+/* Implement TARGET_OVERLAP_OP_BY_PIECES_P. */
+
+static bool
+riscv_overlap_op_by_pieces (void)
+{
+  return !riscv_slow_unaligned_access_p;
+}
+
  /* Implement TARGET_CAN_CHANGE_MODE_CLASS.  */
static bool
@@ -5525,6 +5533,9 @@ riscv_asan_shadow_offset (void)
  #undef TARGET_SLOW_UNALIGNED_ACCESS
  #define TARGET_SLOW_UNALIGNED_ACCESS riscv_slow_unaligned_access
+#undef TARGET_OVERLAP_OP_BY_PIECES_P
+#define TARGET_OVERLAP_OP_BY_PIECES_P riscv_overlap_op_by_pieces
+
  #undef TARGET_SECONDARY_MEMORY_NEEDED
  #define TARGET_SECONDARY_MEMORY_NEEDED riscv_secondary_memory_needed
diff --git a/gcc/testsuite/gcc.target/riscv/builtins-overlap-1.c b/gcc/testsuite/gcc.target/riscv/builtins-overlap-1.c
new file mode 100644
index 00000000000..ca51fff0fc6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/builtins-overlap-1.c
@@ -0,0 +1,10 @@
+/* { dg-options "-O2 -mtune=thead-c906 -march=rv64gc -mabi=lp64" } */
+/* { dg-do compile } */
+
+#include "builtins.h"
+
+DO_MEMSET0_N(7)
+
+/* { dg-final { scan-assembler-times "sw\tzero,0"  1 } } */
+/* { dg-final { scan-assembler-times "sw\tzero,3"  1 } } */
+/* { dg-final { scan-assembler-not   "sb" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/builtins-overlap-2.c 
b/gcc/testsuite/gcc.target/riscv/builtins-overlap-2.c
new file mode 100644
index 00000000000..24b5b254658
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/builtins-overlap-2.c
@@ -0,0 +1,10 @@
+/* { dg-options "-O2 -mtune=thead-c906 -march=rv64gc -mabi=lp64" } */
+/* { dg-do compile } */
+
+#include "builtins.h"
+
+DO_MEMSET0_N(11)
+
+/* { dg-final { scan-assembler-times "sd\tzero,0"  1 } } */
+/* { dg-final { scan-assembler-times "sw\tzero,7"  1 } } */
+/* { dg-final { scan-assembler-not   "sb" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/builtins-overlap-3.c 
b/gcc/testsuite/gcc.target/riscv/builtins-overlap-3.c
new file mode 100644
index 00000000000..636031cb944
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/builtins-overlap-3.c
@@ -0,0 +1,10 @@
+/* { dg-options "-O2 -mtune=thead-c906 -march=rv64gc -mabi=lp64" } */
+/* { dg-do compile } */
+
+#include "builtins.h"
+
+DO_MEMSET0_N(13)
+
+/* { dg-final { scan-assembler-times "sd\tzero,0"  1 } } */
+/* { dg-final { scan-assembler-times "sd\tzero,5"  1 } } */
+/* { dg-final { scan-assembler-not   "sb" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/builtins-overlap-4.c 
b/gcc/testsuite/gcc.target/riscv/builtins-overlap-4.c
new file mode 100644
index 00000000000..15d77860050
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/builtins-overlap-4.c
@@ -0,0 +1,10 @@
+/* { dg-options "-O2 -mtune=thead-c906 -march=rv64gc -mabi=lp64" } */
+/* { dg-do compile } */
+
+#include "builtins.h"
+
+DO_MEMSET0_N(15)
+
+/* { dg-final { scan-assembler-times "sd\tzero,0"  1 } } */
+/* { dg-final { scan-assembler-times "sd\tzero,7"  1 } } */
+/* { dg-final { scan-assembler-not   "sb" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/builtins-overlap-5.c 
b/gcc/testsuite/gcc.target/riscv/builtins-overlap-5.c
new file mode 100644
index 00000000000..faccb301f84
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/builtins-overlap-5.c
@@ -0,0 +1,11 @@
+/* { dg-options "-O2 -mtune=thead-c906 -march=rv64gc -mabi=lp64" } */
+/* { dg-do compile } */
+
+#include "builtins.h"
+
+DO_MEMCPY_N(7)
+
+/* { dg-final { scan-assembler-times "lw"    2 } } */
+/* { dg-final { scan-assembler-times "sw"    2 } } */
+/* { dg-final { scan-assembler-not   "lb" } } */
+/* { dg-final { scan-assembler-not   "sb" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/builtins-overlap-6.c 
b/gcc/testsuite/gcc.target/riscv/builtins-overlap-6.c
new file mode 100644
index 00000000000..51e9b37ba5a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/builtins-overlap-6.c
@@ -0,0 +1,13 @@
+/* { dg-options "-O2 -mtune=thead-c906 -march=rv64gc -mabi=lp64" } */
+/* { dg-do compile } */
+
+#include "builtins.h"
+
+DO_MEMCPY_N(11)
+
+/* { dg-final { scan-assembler-times "ld"    1 } } */
+/* { dg-final { scan-assembler-times "sw"    1 } } */
+/* { dg-final { scan-assembler-times "lw"    1 } } */
+/* { dg-final { scan-assembler-times "sw"    1 } } */
+/* { dg-final { scan-assembler-not   "lb" } } */
+/* { dg-final { scan-assembler-not   "sb" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/builtins-overlap-7.c 
b/gcc/testsuite/gcc.target/riscv/builtins-overlap-7.c
new file mode 100644
index 00000000000..44fdaa398ca
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/builtins-overlap-7.c
@@ -0,0 +1,11 @@
+/* { dg-options "-O2 -mtune=thead-c906 -march=rv64gc -mabi=lp64" } */
+/* { dg-do compile } */
+
+#include "builtins.h"
+
+DO_MEMCPY_N(13)
+
+/* { dg-final { scan-assembler-times "ld"    2 } } */
+/* { dg-final { scan-assembler-times "sd"    2 } } */
+/* { dg-final { scan-assembler-not   "lb" } } */
+/* { dg-final { scan-assembler-not   "sb" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/builtins-overlap-8.c 
b/gcc/testsuite/gcc.target/riscv/builtins-overlap-8.c
new file mode 100644
index 00000000000..61186ae09a2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/builtins-overlap-8.c
@@ -0,0 +1,11 @@
+/* { dg-options "-O2 -mtune=thead-c906 -march=rv64gc -mabi=lp64" } */
+/* { dg-do compile } */
+
+#include "builtins.h"
+
+DO_MEMCPY_N(15)
+
+/* { dg-final { scan-assembler-times "ld"    2 } } */
+/* { dg-final { scan-assembler-times "sd"    2 } } */
+/* { dg-final { scan-assembler-not   "lb" } } */
+/* { dg-final { scan-assembler-not   "sb" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/builtins-strict-align.c 
b/gcc/testsuite/gcc.target/riscv/builtins-strict-align.c
new file mode 100644
index 00000000000..5d06c6eea08
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/builtins-strict-align.c
@@ -0,0 +1,10 @@
+/* { dg-options "-O2 -mtune=thead-c906 -march=rv64gc -mabi=lp64 
-mstrict-align" } */
+/* { dg-do compile } */
+
+#include "builtins.h"
+
+DO_MEMSET0_N(15)
+
+/* { dg-final { scan-assembler-times "sb\tzero"  15 } } */
+/* { dg-final { scan-assembler-not   "sw" } } */
+/* { dg-final { scan-assembler-not   "sd" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/builtins.h 
b/gcc/testsuite/gcc.target/riscv/builtins.h
new file mode 100644
index 00000000000..22b2800d464
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/builtins.h
@@ -0,0 +1,16 @@
+#ifndef BUILTINS_H
+#define BUILTINS_H
+
+#define DO_MEMSET0_N(N)                        \
+void do_memset0_##N (void *p)          \
+{                                      \
+       __builtin_memset (p, 0, N);     \
+}
+
+#define DO_MEMCPY_N(N)                 \
+void do_memcpy_##N (void *d, void *s)  \
+{                                      \
+       __builtin_memcpy (d, s, N);     \
+}
+
+#endif /* BUILTINS_H */

Reply via email to