On 7/22/21 15:41, Christoph Muellner via Gcc-patches wrote:
This patch enables the overlap-by-pieces feature of the by-pieces
infrastructure for inlining builtins in case the target has set
riscv_slow_unaligned_access_p to false.
An example to demonstrate the effect for targets with fast unaligned
access (target's that have slow_unaligned_access set to false) is
the code that is generated for "memset (p, 0, 15);", where the
alignment of p is unknown:
Without overlap_op_by_pieces we get:
8e: 00053023 sd zero,0(a0)
92: 00052423 sw zero,8(a0)
96: 00051623 sh zero,12(a0)
9a: 00050723 sb zero,14(a0)
With overlap_op_by_pieces we get:
7e: 00053023 sd zero,0(a0)
82: 000533a3 sd zero,7(a0)
gcc/ChangeLog:
* config/riscv/riscv.c (riscv_overlap_op_by_pieces): New function.
(TARGET_OVERLAP_OP_BY_PIECES_P): Connect to
riscv_overlap_op_by_pieces.
gcc/testsuite/ChangeLog:
* gcc.target/riscv/builtins-overlap-1.c: New test.
* gcc.target/riscv/builtins-overlap-2.c: New test.
* gcc.target/riscv/builtins-overlap-3.c: New test.
* gcc.target/riscv/builtins-overlap-4.c: New test.
* gcc.target/riscv/builtins-overlap-5.c: New test.
* gcc.target/riscv/builtins-overlap-6.c: New test.
* gcc.target/riscv/builtins-overlap-7.c: New test.
* gcc.target/riscv/builtins-overlap-8.c: New test.
* gcc.target/riscv/builtins-strict-align.c: New test.
* gcc.target/riscv/builtins.h: New test.
Signed-off-by: Christoph Muellner <cmuell...@gcc.gnu.org>
Ping, IMO this needs to be (re)considered for trunk.
This goes really nicely with riscv_slow_unaligned_access_p==false, to
elide the unrolled tail copies for trailer word/sword/byte accesses.
@Kito, @Palmer ? Just from codegen pov this seems to be a no brainer
foo:
sd zero,0(a0)
sw zero,8(a0)
sh zero,12(a0)
sb zero,14(a0)
vs.
sd zero,0(a0)
sd zero,7(a0)
-Vineet
---
gcc/config/riscv/riscv.c | 11 +++++++++++
.../gcc.target/riscv/builtins-overlap-1.c | 10 ++++++++++
.../gcc.target/riscv/builtins-overlap-2.c | 10 ++++++++++
.../gcc.target/riscv/builtins-overlap-3.c | 10 ++++++++++
.../gcc.target/riscv/builtins-overlap-4.c | 10 ++++++++++
.../gcc.target/riscv/builtins-overlap-5.c | 11 +++++++++++
.../gcc.target/riscv/builtins-overlap-6.c | 13 +++++++++++++
.../gcc.target/riscv/builtins-overlap-7.c | 11 +++++++++++
.../gcc.target/riscv/builtins-overlap-8.c | 11 +++++++++++
.../gcc.target/riscv/builtins-strict-align.c | 10 ++++++++++
gcc/testsuite/gcc.target/riscv/builtins.h | 16 ++++++++++++++++
11 files changed, 123 insertions(+)
create mode 100644 gcc/testsuite/gcc.target/riscv/builtins-overlap-1.c
create mode 100644 gcc/testsuite/gcc.target/riscv/builtins-overlap-2.c
create mode 100644 gcc/testsuite/gcc.target/riscv/builtins-overlap-3.c
create mode 100644 gcc/testsuite/gcc.target/riscv/builtins-overlap-4.c
create mode 100644 gcc/testsuite/gcc.target/riscv/builtins-overlap-5.c
create mode 100644 gcc/testsuite/gcc.target/riscv/builtins-overlap-6.c
create mode 100644 gcc/testsuite/gcc.target/riscv/builtins-overlap-7.c
create mode 100644 gcc/testsuite/gcc.target/riscv/builtins-overlap-8.c
create mode 100644 gcc/testsuite/gcc.target/riscv/builtins-strict-align.c
create mode 100644 gcc/testsuite/gcc.target/riscv/builtins.h
diff --git a/gcc/config/riscv/riscv.c b/gcc/config/riscv/riscv.c
index 576960bb37c..98c76ba657a 100644
--- a/gcc/config/riscv/riscv.c
+++ b/gcc/config/riscv/riscv.c
@@ -5201,6 +5201,14 @@ riscv_slow_unaligned_access (machine_mode, unsigned int)
return riscv_slow_unaligned_access_p;
}
+/* Implement TARGET_OVERLAP_OP_BY_PIECES_P. */
+
+static bool
+riscv_overlap_op_by_pieces (void)
+{
+ return !riscv_slow_unaligned_access_p;
+}
+
/* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
static bool
@@ -5525,6 +5533,9 @@ riscv_asan_shadow_offset (void)
#undef TARGET_SLOW_UNALIGNED_ACCESS
#define TARGET_SLOW_UNALIGNED_ACCESS riscv_slow_unaligned_access
+#undef TARGET_OVERLAP_OP_BY_PIECES_P
+#define TARGET_OVERLAP_OP_BY_PIECES_P riscv_overlap_op_by_pieces
+
#undef TARGET_SECONDARY_MEMORY_NEEDED
#define TARGET_SECONDARY_MEMORY_NEEDED riscv_secondary_memory_needed
diff --git a/gcc/testsuite/gcc.target/riscv/builtins-overlap-1.c b/gcc/testsuite/gcc.target/riscv/builtins-overlap-1.c
new file mode 100644
index 00000000000..ca51fff0fc6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/builtins-overlap-1.c
@@ -0,0 +1,10 @@
+/* { dg-options "-O2 -mtune=thead-c906 -march=rv64gc -mabi=lp64" } */
+/* { dg-do compile } */
+
+#include "builtins.h"
+
+DO_MEMSET0_N(7)
+
+/* { dg-final { scan-assembler-times "sw\tzero,0" 1 } } */
+/* { dg-final { scan-assembler-times "sw\tzero,3" 1 } } */
+/* { dg-final { scan-assembler-not "sb" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/builtins-overlap-2.c
b/gcc/testsuite/gcc.target/riscv/builtins-overlap-2.c
new file mode 100644
index 00000000000..24b5b254658
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/builtins-overlap-2.c
@@ -0,0 +1,10 @@
+/* { dg-options "-O2 -mtune=thead-c906 -march=rv64gc -mabi=lp64" } */
+/* { dg-do compile } */
+
+#include "builtins.h"
+
+DO_MEMSET0_N(11)
+
+/* { dg-final { scan-assembler-times "sd\tzero,0" 1 } } */
+/* { dg-final { scan-assembler-times "sw\tzero,7" 1 } } */
+/* { dg-final { scan-assembler-not "sb" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/builtins-overlap-3.c
b/gcc/testsuite/gcc.target/riscv/builtins-overlap-3.c
new file mode 100644
index 00000000000..636031cb944
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/builtins-overlap-3.c
@@ -0,0 +1,10 @@
+/* { dg-options "-O2 -mtune=thead-c906 -march=rv64gc -mabi=lp64" } */
+/* { dg-do compile } */
+
+#include "builtins.h"
+
+DO_MEMSET0_N(13)
+
+/* { dg-final { scan-assembler-times "sd\tzero,0" 1 } } */
+/* { dg-final { scan-assembler-times "sd\tzero,5" 1 } } */
+/* { dg-final { scan-assembler-not "sb" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/builtins-overlap-4.c
b/gcc/testsuite/gcc.target/riscv/builtins-overlap-4.c
new file mode 100644
index 00000000000..15d77860050
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/builtins-overlap-4.c
@@ -0,0 +1,10 @@
+/* { dg-options "-O2 -mtune=thead-c906 -march=rv64gc -mabi=lp64" } */
+/* { dg-do compile } */
+
+#include "builtins.h"
+
+DO_MEMSET0_N(15)
+
+/* { dg-final { scan-assembler-times "sd\tzero,0" 1 } } */
+/* { dg-final { scan-assembler-times "sd\tzero,7" 1 } } */
+/* { dg-final { scan-assembler-not "sb" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/builtins-overlap-5.c
b/gcc/testsuite/gcc.target/riscv/builtins-overlap-5.c
new file mode 100644
index 00000000000..faccb301f84
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/builtins-overlap-5.c
@@ -0,0 +1,11 @@
+/* { dg-options "-O2 -mtune=thead-c906 -march=rv64gc -mabi=lp64" } */
+/* { dg-do compile } */
+
+#include "builtins.h"
+
+DO_MEMCPY_N(7)
+
+/* { dg-final { scan-assembler-times "lw" 2 } } */
+/* { dg-final { scan-assembler-times "sw" 2 } } */
+/* { dg-final { scan-assembler-not "lb" } } */
+/* { dg-final { scan-assembler-not "sb" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/builtins-overlap-6.c
b/gcc/testsuite/gcc.target/riscv/builtins-overlap-6.c
new file mode 100644
index 00000000000..51e9b37ba5a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/builtins-overlap-6.c
@@ -0,0 +1,13 @@
+/* { dg-options "-O2 -mtune=thead-c906 -march=rv64gc -mabi=lp64" } */
+/* { dg-do compile } */
+
+#include "builtins.h"
+
+DO_MEMCPY_N(11)
+
+/* { dg-final { scan-assembler-times "ld" 1 } } */
+/* { dg-final { scan-assembler-times "sw" 1 } } */
+/* { dg-final { scan-assembler-times "lw" 1 } } */
+/* { dg-final { scan-assembler-times "sw" 1 } } */
+/* { dg-final { scan-assembler-not "lb" } } */
+/* { dg-final { scan-assembler-not "sb" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/builtins-overlap-7.c
b/gcc/testsuite/gcc.target/riscv/builtins-overlap-7.c
new file mode 100644
index 00000000000..44fdaa398ca
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/builtins-overlap-7.c
@@ -0,0 +1,11 @@
+/* { dg-options "-O2 -mtune=thead-c906 -march=rv64gc -mabi=lp64" } */
+/* { dg-do compile } */
+
+#include "builtins.h"
+
+DO_MEMCPY_N(13)
+
+/* { dg-final { scan-assembler-times "ld" 2 } } */
+/* { dg-final { scan-assembler-times "sd" 2 } } */
+/* { dg-final { scan-assembler-not "lb" } } */
+/* { dg-final { scan-assembler-not "sb" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/builtins-overlap-8.c
b/gcc/testsuite/gcc.target/riscv/builtins-overlap-8.c
new file mode 100644
index 00000000000..61186ae09a2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/builtins-overlap-8.c
@@ -0,0 +1,11 @@
+/* { dg-options "-O2 -mtune=thead-c906 -march=rv64gc -mabi=lp64" } */
+/* { dg-do compile } */
+
+#include "builtins.h"
+
+DO_MEMCPY_N(15)
+
+/* { dg-final { scan-assembler-times "ld" 2 } } */
+/* { dg-final { scan-assembler-times "sd" 2 } } */
+/* { dg-final { scan-assembler-not "lb" } } */
+/* { dg-final { scan-assembler-not "sb" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/builtins-strict-align.c
b/gcc/testsuite/gcc.target/riscv/builtins-strict-align.c
new file mode 100644
index 00000000000..5d06c6eea08
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/builtins-strict-align.c
@@ -0,0 +1,10 @@
+/* { dg-options "-O2 -mtune=thead-c906 -march=rv64gc -mabi=lp64
-mstrict-align" } */
+/* { dg-do compile } */
+
+#include "builtins.h"
+
+DO_MEMSET0_N(15)
+
+/* { dg-final { scan-assembler-times "sb\tzero" 15 } } */
+/* { dg-final { scan-assembler-not "sw" } } */
+/* { dg-final { scan-assembler-not "sd" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/builtins.h
b/gcc/testsuite/gcc.target/riscv/builtins.h
new file mode 100644
index 00000000000..22b2800d464
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/builtins.h
@@ -0,0 +1,16 @@
+#ifndef BUILTINS_H
+#define BUILTINS_H
+
+#define DO_MEMSET0_N(N) \
+void do_memset0_##N (void *p) \
+{ \
+ __builtin_memset (p, 0, N); \
+}
+
+#define DO_MEMCPY_N(N) \
+void do_memcpy_##N (void *d, void *s) \
+{ \
+ __builtin_memcpy (d, s, N); \
+}
+
+#endif /* BUILTINS_H */