RE: [PATCH v1] RISC-V: Add xfail test case for highest-number regno ternary overlap

2024-04-21 Thread Li, Pan2
Committed, thanks Juzhe.

Pan

From: juzhe.zh...@rivai.ai 
Sent: Monday, April 22, 2024 2:40 PM
To: Li, Pan2 ; gcc-patches 
Cc: kito.cheng ; Robin Dapp ; Li, 
Pan2 
Subject: Re: [PATCH v1] RISC-V: Add xfail test case for highest-number regno 
ternary overlap

LGTM.


juzhe.zh...@rivai.ai

From: pan2.li
Date: 2024-04-22 14:35
To: gcc-patches
CC: juzhe.zhong; 
kito.cheng; rdapp.gcc; 
Pan Li
Subject: [PATCH v1] RISC-V: Add xfail test case for highest-number regno 
ternary overlap
From: Pan Li mailto:pan2...@intel.com>>

We reverted below patch for register group overlap, add the related
insn test and mark it as xfail.  And we will remove the xfail
after we support the register overlap in GCC-15.

27fde325d64 RISC-V: Support highest-number regno overlap for widen ternary

The below test suites are passed.
* The rv64gcv fully regression test.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/pr112431-37.c: New test.
* gcc.target/riscv/rvv/base/pr112431-38.c: New test.

Signed-off-by: Pan Li mailto:pan2...@intel.com>>
---
.../gcc.target/riscv/rvv/base/pr112431-37.c   | 103 ++
.../gcc.target/riscv/rvv/base/pr112431-38.c   |  82 ++
2 files changed, 185 insertions(+)
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-37.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-38.c

diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-37.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-37.c
new file mode 100644
index 000..66e81ea905a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-37.c
@@ -0,0 +1,103 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
+
+#include "riscv_vector.h"
+
+void
+foo (void *in, void *out)
+{
+  vint16m2_t accum = __riscv_vle16_v_i16m2 (in, 4);
+  vint16m1_t high_eew16 = __riscv_vget_v_i16m2_i16m1 (accum, 1);
+  vint8m1_t high_eew8 = __riscv_vreinterpret_v_i16m1_i8m1 (high_eew16);
+  vint16m2_t result = __riscv_vwmacc_vx_i16m2 (accum, 16, high_eew8, 4);
+  __riscv_vse16_v_i16m2 (out, result, 4);
+}
+
+void
+foo2 (void *in, void *out)
+{
+  vint16m4_t accum = __riscv_vle16_v_i16m4 (in, 4);
+  vint16m2_t high_eew16 = __riscv_vget_v_i16m4_i16m2 (accum, 1);
+  vint8m2_t high_eew8 = __riscv_vreinterpret_v_i16m2_i8m2 (high_eew16);
+  vint16m4_t result = __riscv_vwmacc_vx_i16m4 (accum, 16, high_eew8, 4);
+  __riscv_vse16_v_i16m4 (out, result, 4);
+}
+
+void
+foo3 (void *in, void *out)
+{
+  vint16m8_t accum = __riscv_vle16_v_i16m8 (in, 4);
+  vint16m4_t high_eew16 = __riscv_vget_v_i16m8_i16m4 (accum, 1);
+  vint8m4_t high_eew8 = __riscv_vreinterpret_v_i16m4_i8m4 (high_eew16);
+  vint16m8_t result = __riscv_vwmacc_vx_i16m8 (accum, 16, high_eew8, 4);
+  __riscv_vse16_v_i16m8 (out, result, 4);
+}
+
+void
+foo4 (void *in, void *out)
+{
+  vint16m2_t accum = __riscv_vle16_v_i16m2 (in, 4);
+  vint16m1_t high_eew16 = __riscv_vget_v_i16m2_i16m1 (accum, 1);
+  vint8m1_t high_eew8 = __riscv_vreinterpret_v_i16m1_i8m1 (high_eew16);
+  vint16m2_t result = __riscv_vwmaccus_vx_i16m2 (accum, 16, high_eew8, 4);
+  __riscv_vse16_v_i16m2 (out, result, 4);
+}
+
+void
+foo5 (void *in, void *out)
+{
+  vint16m4_t accum = __riscv_vle16_v_i16m4 (in, 4);
+  vint16m2_t high_eew16 = __riscv_vget_v_i16m4_i16m2 (accum, 1);
+  vint8m2_t high_eew8 = __riscv_vreinterpret_v_i16m2_i8m2 (high_eew16);
+  vint16m4_t result = __riscv_vwmaccus_vx_i16m4 (accum, 16, high_eew8, 4);
+  __riscv_vse16_v_i16m4 (out, result, 4);
+}
+
+void
+foo6 (void *in, void *out)
+{
+  vint16m8_t accum = __riscv_vle16_v_i16m8 (in, 4);
+  vint16m4_t high_eew16 = __riscv_vget_v_i16m8_i16m4 (accum, 1);
+  vint8m4_t high_eew8 = __riscv_vreinterpret_v_i16m4_i8m4 (high_eew16);
+  vint16m8_t result = __riscv_vwmaccus_vx_i16m8 (accum, 16, high_eew8, 4);
+  __riscv_vse16_v_i16m8 (out, result, 4);
+}
+
+void
+foo7 (void *in, void *out)
+{
+  vint16m2_t accum = __riscv_vle16_v_i16m2 (in, 4);
+  vint16m1_t high_eew16 = __riscv_vget_v_i16m2_i16m1 (accum, 1);
+  vint8m1_t high_eew8 = __riscv_vreinterpret_v_i16m1_i8m1 (high_eew16);
+  vuint8m1_t high_ueew8 = __riscv_vreinterpret_v_i8m1_u8m1 (high_eew8);
+  vint16m2_t result = __riscv_vwmaccsu_vx_i16m2 (accum, 16, high_ueew8, 4);
+  __riscv_vse16_v_i16m2 (out, result, 4);
+}
+
+void
+foo8 (void *in, void *out)
+{
+  vint16m4_t accum = __riscv_vle16_v_i16m4 (in, 4);
+  vint16m2_t high_eew16 = __riscv_vget_v_i16m4_i16m2 (accum, 1);
+  vint8m2_t high_eew8 = __riscv_vreinterpret_v_i16m2_i8m2 (high_eew16);
+  vuint8m2_t high_ueew8 = __riscv_vreinterpret_v_i8m2_u8m2 (high_eew8);
+  vint16m4_t result = __riscv_vwmaccsu_vx_i16m4 (accum, 16, high_ueew8, 4);
+  __riscv_vse16_v_i16m4 (out, result, 4);
+}
+
+void
+foo9 (void *in, void *out)
+{
+  vint16m8_t accum = __riscv_

Re: [PATCH v1] RISC-V: Add xfail test case for highest-number regno ternary overlap

2024-04-21 Thread juzhe.zh...@rivai.ai
LGTM.



juzhe.zh...@rivai.ai
 
From: pan2.li
Date: 2024-04-22 14:35
To: gcc-patches
CC: juzhe.zhong; kito.cheng; rdapp.gcc; Pan Li
Subject: [PATCH v1] RISC-V: Add xfail test case for highest-number regno 
ternary overlap
From: Pan Li 
 
We reverted below patch for register group overlap, add the related
insn test and mark it as xfail.  And we will remove the xfail
after we support the register overlap in GCC-15.
 
27fde325d64 RISC-V: Support highest-number regno overlap for widen ternary
 
The below test suites are passed.
* The rv64gcv fully regression test.
 
gcc/testsuite/ChangeLog:
 
* gcc.target/riscv/rvv/base/pr112431-37.c: New test.
* gcc.target/riscv/rvv/base/pr112431-38.c: New test.
 
Signed-off-by: Pan Li 
---
.../gcc.target/riscv/rvv/base/pr112431-37.c   | 103 ++
.../gcc.target/riscv/rvv/base/pr112431-38.c   |  82 ++
2 files changed, 185 insertions(+)
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-37.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-38.c
 
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-37.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-37.c
new file mode 100644
index 000..66e81ea905a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-37.c
@@ -0,0 +1,103 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
+
+#include "riscv_vector.h"
+
+void
+foo (void *in, void *out)
+{
+  vint16m2_t accum = __riscv_vle16_v_i16m2 (in, 4);
+  vint16m1_t high_eew16 = __riscv_vget_v_i16m2_i16m1 (accum, 1);
+  vint8m1_t high_eew8 = __riscv_vreinterpret_v_i16m1_i8m1 (high_eew16);
+  vint16m2_t result = __riscv_vwmacc_vx_i16m2 (accum, 16, high_eew8, 4);
+  __riscv_vse16_v_i16m2 (out, result, 4);
+}
+
+void
+foo2 (void *in, void *out)
+{
+  vint16m4_t accum = __riscv_vle16_v_i16m4 (in, 4);
+  vint16m2_t high_eew16 = __riscv_vget_v_i16m4_i16m2 (accum, 1);
+  vint8m2_t high_eew8 = __riscv_vreinterpret_v_i16m2_i8m2 (high_eew16);
+  vint16m4_t result = __riscv_vwmacc_vx_i16m4 (accum, 16, high_eew8, 4);
+  __riscv_vse16_v_i16m4 (out, result, 4);
+}
+
+void
+foo3 (void *in, void *out)
+{
+  vint16m8_t accum = __riscv_vle16_v_i16m8 (in, 4);
+  vint16m4_t high_eew16 = __riscv_vget_v_i16m8_i16m4 (accum, 1);
+  vint8m4_t high_eew8 = __riscv_vreinterpret_v_i16m4_i8m4 (high_eew16);
+  vint16m8_t result = __riscv_vwmacc_vx_i16m8 (accum, 16, high_eew8, 4);
+  __riscv_vse16_v_i16m8 (out, result, 4);
+}
+
+void
+foo4 (void *in, void *out)
+{
+  vint16m2_t accum = __riscv_vle16_v_i16m2 (in, 4);
+  vint16m1_t high_eew16 = __riscv_vget_v_i16m2_i16m1 (accum, 1);
+  vint8m1_t high_eew8 = __riscv_vreinterpret_v_i16m1_i8m1 (high_eew16);
+  vint16m2_t result = __riscv_vwmaccus_vx_i16m2 (accum, 16, high_eew8, 4);
+  __riscv_vse16_v_i16m2 (out, result, 4);
+}
+
+void
+foo5 (void *in, void *out)
+{
+  vint16m4_t accum = __riscv_vle16_v_i16m4 (in, 4);
+  vint16m2_t high_eew16 = __riscv_vget_v_i16m4_i16m2 (accum, 1);
+  vint8m2_t high_eew8 = __riscv_vreinterpret_v_i16m2_i8m2 (high_eew16);
+  vint16m4_t result = __riscv_vwmaccus_vx_i16m4 (accum, 16, high_eew8, 4);
+  __riscv_vse16_v_i16m4 (out, result, 4);
+}
+
+void
+foo6 (void *in, void *out)
+{
+  vint16m8_t accum = __riscv_vle16_v_i16m8 (in, 4);
+  vint16m4_t high_eew16 = __riscv_vget_v_i16m8_i16m4 (accum, 1);
+  vint8m4_t high_eew8 = __riscv_vreinterpret_v_i16m4_i8m4 (high_eew16);
+  vint16m8_t result = __riscv_vwmaccus_vx_i16m8 (accum, 16, high_eew8, 4);
+  __riscv_vse16_v_i16m8 (out, result, 4);
+}
+
+void
+foo7 (void *in, void *out)
+{
+  vint16m2_t accum = __riscv_vle16_v_i16m2 (in, 4);
+  vint16m1_t high_eew16 = __riscv_vget_v_i16m2_i16m1 (accum, 1);
+  vint8m1_t high_eew8 = __riscv_vreinterpret_v_i16m1_i8m1 (high_eew16);
+  vuint8m1_t high_ueew8 = __riscv_vreinterpret_v_i8m1_u8m1 (high_eew8);
+  vint16m2_t result = __riscv_vwmaccsu_vx_i16m2 (accum, 16, high_ueew8, 4);
+  __riscv_vse16_v_i16m2 (out, result, 4);
+}
+
+void
+foo8 (void *in, void *out)
+{
+  vint16m4_t accum = __riscv_vle16_v_i16m4 (in, 4);
+  vint16m2_t high_eew16 = __riscv_vget_v_i16m4_i16m2 (accum, 1);
+  vint8m2_t high_eew8 = __riscv_vreinterpret_v_i16m2_i8m2 (high_eew16);
+  vuint8m2_t high_ueew8 = __riscv_vreinterpret_v_i8m2_u8m2 (high_eew8);
+  vint16m4_t result = __riscv_vwmaccsu_vx_i16m4 (accum, 16, high_ueew8, 4);
+  __riscv_vse16_v_i16m4 (out, result, 4);
+}
+
+void
+foo9 (void *in, void *out)
+{
+  vint16m8_t accum = __riscv_vle16_v_i16m8 (in, 4);
+  vint16m4_t high_eew16 = __riscv_vget_v_i16m8_i16m4 (accum, 1);
+  vint8m4_t high_eew8 = __riscv_vreinterpret_v_i16m4_i8m4 (high_eew16);
+  vuint8m4_t high_ueew8 = __riscv_vreinterpret_v_i8m4_u8m4 (high_eew8);
+  vint16m8_t result = __riscv_vwmaccsu_vx_i16m8 (accum, 16, high_ueew8, 4);
+  __riscv_vse16_v_i16m8 (out, result, 4);
+}
+
+/* { dg-final { scan-assembler-not {vmv1r} } } */
+/* { dg-final { scan-assembler-not {vmv2r} { xfail riscv*-*-* } } } */
+/* { dg-final { scan-assembler-not {vmv4r} { xfail r

[PATCH v1] RISC-V: Add xfail test case for highest-number regno ternary overlap

2024-04-21 Thread pan2 . li
From: Pan Li 

We reverted below patch for register group overlap, add the related
insn test and mark it as xfail.  And we will remove the xfail
after we support the register overlap in GCC-15.

27fde325d64 RISC-V: Support highest-number regno overlap for widen ternary

The below test suites are passed.
* The rv64gcv fully regression test.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/pr112431-37.c: New test.
* gcc.target/riscv/rvv/base/pr112431-38.c: New test.

Signed-off-by: Pan Li 
---
 .../gcc.target/riscv/rvv/base/pr112431-37.c   | 103 ++
 .../gcc.target/riscv/rvv/base/pr112431-38.c   |  82 ++
 2 files changed, 185 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-37.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-38.c

diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-37.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-37.c
new file mode 100644
index 000..66e81ea905a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-37.c
@@ -0,0 +1,103 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
+
+#include "riscv_vector.h"
+
+void
+foo (void *in, void *out)
+{
+  vint16m2_t accum = __riscv_vle16_v_i16m2 (in, 4);
+  vint16m1_t high_eew16 = __riscv_vget_v_i16m2_i16m1 (accum, 1);
+  vint8m1_t high_eew8 = __riscv_vreinterpret_v_i16m1_i8m1 (high_eew16);
+  vint16m2_t result = __riscv_vwmacc_vx_i16m2 (accum, 16, high_eew8, 4);
+  __riscv_vse16_v_i16m2 (out, result, 4);
+}
+
+void
+foo2 (void *in, void *out)
+{
+  vint16m4_t accum = __riscv_vle16_v_i16m4 (in, 4);
+  vint16m2_t high_eew16 = __riscv_vget_v_i16m4_i16m2 (accum, 1);
+  vint8m2_t high_eew8 = __riscv_vreinterpret_v_i16m2_i8m2 (high_eew16);
+  vint16m4_t result = __riscv_vwmacc_vx_i16m4 (accum, 16, high_eew8, 4);
+  __riscv_vse16_v_i16m4 (out, result, 4);
+}
+
+void
+foo3 (void *in, void *out)
+{
+  vint16m8_t accum = __riscv_vle16_v_i16m8 (in, 4);
+  vint16m4_t high_eew16 = __riscv_vget_v_i16m8_i16m4 (accum, 1);
+  vint8m4_t high_eew8 = __riscv_vreinterpret_v_i16m4_i8m4 (high_eew16);
+  vint16m8_t result = __riscv_vwmacc_vx_i16m8 (accum, 16, high_eew8, 4);
+  __riscv_vse16_v_i16m8 (out, result, 4);
+}
+
+void
+foo4 (void *in, void *out)
+{
+  vint16m2_t accum = __riscv_vle16_v_i16m2 (in, 4);
+  vint16m1_t high_eew16 = __riscv_vget_v_i16m2_i16m1 (accum, 1);
+  vint8m1_t high_eew8 = __riscv_vreinterpret_v_i16m1_i8m1 (high_eew16);
+  vint16m2_t result = __riscv_vwmaccus_vx_i16m2 (accum, 16, high_eew8, 4);
+  __riscv_vse16_v_i16m2 (out, result, 4);
+}
+
+void
+foo5 (void *in, void *out)
+{
+  vint16m4_t accum = __riscv_vle16_v_i16m4 (in, 4);
+  vint16m2_t high_eew16 = __riscv_vget_v_i16m4_i16m2 (accum, 1);
+  vint8m2_t high_eew8 = __riscv_vreinterpret_v_i16m2_i8m2 (high_eew16);
+  vint16m4_t result = __riscv_vwmaccus_vx_i16m4 (accum, 16, high_eew8, 4);
+  __riscv_vse16_v_i16m4 (out, result, 4);
+}
+
+void
+foo6 (void *in, void *out)
+{
+  vint16m8_t accum = __riscv_vle16_v_i16m8 (in, 4);
+  vint16m4_t high_eew16 = __riscv_vget_v_i16m8_i16m4 (accum, 1);
+  vint8m4_t high_eew8 = __riscv_vreinterpret_v_i16m4_i8m4 (high_eew16);
+  vint16m8_t result = __riscv_vwmaccus_vx_i16m8 (accum, 16, high_eew8, 4);
+  __riscv_vse16_v_i16m8 (out, result, 4);
+}
+
+void
+foo7 (void *in, void *out)
+{
+  vint16m2_t accum = __riscv_vle16_v_i16m2 (in, 4);
+  vint16m1_t high_eew16 = __riscv_vget_v_i16m2_i16m1 (accum, 1);
+  vint8m1_t high_eew8 = __riscv_vreinterpret_v_i16m1_i8m1 (high_eew16);
+  vuint8m1_t high_ueew8 = __riscv_vreinterpret_v_i8m1_u8m1 (high_eew8);
+  vint16m2_t result = __riscv_vwmaccsu_vx_i16m2 (accum, 16, high_ueew8, 4);
+  __riscv_vse16_v_i16m2 (out, result, 4);
+}
+
+void
+foo8 (void *in, void *out)
+{
+  vint16m4_t accum = __riscv_vle16_v_i16m4 (in, 4);
+  vint16m2_t high_eew16 = __riscv_vget_v_i16m4_i16m2 (accum, 1);
+  vint8m2_t high_eew8 = __riscv_vreinterpret_v_i16m2_i8m2 (high_eew16);
+  vuint8m2_t high_ueew8 = __riscv_vreinterpret_v_i8m2_u8m2 (high_eew8);
+  vint16m4_t result = __riscv_vwmaccsu_vx_i16m4 (accum, 16, high_ueew8, 4);
+  __riscv_vse16_v_i16m4 (out, result, 4);
+}
+
+void
+foo9 (void *in, void *out)
+{
+  vint16m8_t accum = __riscv_vle16_v_i16m8 (in, 4);
+  vint16m4_t high_eew16 = __riscv_vget_v_i16m8_i16m4 (accum, 1);
+  vint8m4_t high_eew8 = __riscv_vreinterpret_v_i16m4_i8m4 (high_eew16);
+  vuint8m4_t high_ueew8 = __riscv_vreinterpret_v_i8m4_u8m4 (high_eew8);
+  vint16m8_t result = __riscv_vwmaccsu_vx_i16m8 (accum, 16, high_ueew8, 4);
+  __riscv_vse16_v_i16m8 (out, result, 4);
+}
+
+/* { dg-final { scan-assembler-not {vmv1r} } } */
+/* { dg-final { scan-assembler-not {vmv2r} { xfail riscv*-*-* } } } */
+/* { dg-final { scan-assembler-not {vmv4r} { xfail riscv*-*-* } } } */
+/* { dg-final { scan-assembler-not {vmv8r} { xfail riscv*-*-* } } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-38.c 

Re: [PATCH] s390: testsuite: Remove xfail for vpopct{b,h}

2024-04-21 Thread Andreas Krebbel
On 4/22/24 08:01, Stefan Schulze Frielinghaus wrote:
> Starting with r14-9316-g7890836de20912 patterns for vpopct{b,h} are also
> detected.  Thus, remove xfails.
> 
> gcc/testsuite/ChangeLog:
> 
>   * gcc.target/s390/vxe/popcount-1.c: Remove xfail.

Ok. Thanks!

Andreas

> ---
>  Ok for mainline?
> 
>  gcc/testsuite/gcc.target/s390/vxe/popcount-1.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/gcc/testsuite/gcc.target/s390/vxe/popcount-1.c 
> b/gcc/testsuite/gcc.target/s390/vxe/popcount-1.c
> index 9ea835a1cf0..25ef354f963 100644
> --- a/gcc/testsuite/gcc.target/s390/vxe/popcount-1.c
> +++ b/gcc/testsuite/gcc.target/s390/vxe/popcount-1.c
> @@ -21,7 +21,7 @@ vpopctb (uv16qi a)
>  
>return r;
>  }
> -/* { dg-final { scan-assembler "vpopctb\t%v24,%v24" { xfail *-*-* } } } */
> +/* { dg-final { scan-assembler "vpopctb\t%v24,%v24" } } */
>  
>  uv8hi __attribute__((noinline))
>  vpopcth (uv8hi a)
> @@ -34,7 +34,7 @@ vpopcth (uv8hi a)
>  
>return r;
>  }
> -/* { dg-final { scan-assembler "vpopcth\t%v24,%v24" { xfail *-*-* } } } */
> +/* { dg-final { scan-assembler "vpopcth\t%v24,%v24" } } */
>  
>  uv4si __attribute__((noinline))
>  vpopctf (uv4si a)



[PATCH] s390: testsuite: Fix forwprop-4{0,1}.c

2024-04-21 Thread Stefan Schulze Frielinghaus
The tests fail on s390 since can_vec_perm_const_p fails and therefore
the bit insert/ref survive which r14-3381-g27de9aa152141e aims for.
Strictly speaking, the tests only fail in case the target supports
vectors, i.e., for targets prior z13 or in case of -mesa the emulated
vector operations are optimized out.

Easiest would be to skip the entire test for s390.  Another solution
would be to xfail in case of vector support hoping that eventually we
end up with an xpass for a future machine generation or if gcc advances.
That is implemented by this patch.  In order to do so I implemented a
new target test s390_mvx which tests whether vector support is available
or not.  Maybe this is already over-engineered for a simple test?  Any
thoughts?
---
 gcc/testsuite/gcc.dg/tree-ssa/forwprop-40.c |  4 ++--
 gcc/testsuite/gcc.dg/tree-ssa/forwprop-41.c |  4 ++--
 gcc/testsuite/lib/target-supports.exp   | 14 ++
 3 files changed, 18 insertions(+), 4 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/forwprop-40.c 
b/gcc/testsuite/gcc.dg/tree-ssa/forwprop-40.c
index 7513497f552..b67e3e93a7f 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/forwprop-40.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/forwprop-40.c
@@ -10,5 +10,5 @@ vector int g(vector int a)
   return a;
 }
 
-/* { dg-final { scan-tree-dump-times "BIT_INSERT_EXPR" 0 "optimized" } } */
-/* { dg-final { scan-tree-dump-times "BIT_FIELD_REF" 0 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "BIT_INSERT_EXPR" 0 "optimized" { xfail 
s390_mvx } } } */
+/* { dg-final { scan-tree-dump-times "BIT_FIELD_REF" 0 "optimized" { xfail 
s390_mvx } } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/forwprop-41.c 
b/gcc/testsuite/gcc.dg/tree-ssa/forwprop-41.c
index b1e75797a90..0f119675207 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/forwprop-41.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/forwprop-41.c
@@ -11,6 +11,6 @@ vector int g(vector int a, int c)
   return a;
 }
 
-/* { dg-final { scan-tree-dump-times "BIT_INSERT_EXPR" 1 "optimized" } } */
-/* { dg-final { scan-tree-dump-times "BIT_FIELD_REF" 0 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "BIT_INSERT_EXPR" 1 "optimized" { xfail 
s390_mvx } } } */
+/* { dg-final { scan-tree-dump-times "BIT_FIELD_REF" 0 "optimized" { xfail 
s390_mvx } } } */
 /* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 0 "optimized" } } */
diff --git a/gcc/testsuite/lib/target-supports.exp 
b/gcc/testsuite/lib/target-supports.exp
index edce672c0e2..5a692baa8ef 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -12380,6 +12380,20 @@ proc check_effective_target_profile_update_atomic {} {
 } "-fprofile-update=atomic -fprofile-generate"]
 }
 
+# Return 1 if the target has a vector facility.
+proc check_effective_target_s390_mvx { } {
+if ![istarget s390*-*-*] then {
+   return 0;
+}
+
+return [check_no_compiler_messages_nocache s390_mvx assembly {
+   #if !defined __VX__
+   #error no vector facility.
+   #endif
+   int dummy;
+} [current_compiler_flags]]
+}
+
 # Return 1 if vector (va - vector add) instructions are understood by
 # the assembler and can be executed.  This also covers checking for
 # the VX kernel feature.  A kernel without that feature does not
-- 
2.44.0



RE: [PATCH v1] RISC-V: Add xfail test case for widening register overlap of vf4/vf8

2024-04-21 Thread Li, Pan2
Committed, thanks Juzhe.

Pan

From: juzhe.zh...@rivai.ai 
Sent: Monday, April 22, 2024 11:49 AM
To: Li, Pan2 ; gcc-patches 
Cc: kito.cheng ; Robin Dapp ; Li, 
Pan2 
Subject: Re: [PATCH v1] RISC-V: Add xfail test case for widening register 
overlap of vf4/vf8

LGTM.


juzhe.zh...@rivai.ai

From: pan2.li
Date: 2024-04-22 11:19
To: gcc-patches
CC: juzhe.zhong; 
kito.cheng; rdapp.gcc; 
Pan Li
Subject: [PATCH v1] RISC-V: Add xfail test case for widening register overlap 
of vf4/vf8
From: Pan Li mailto:pan2...@intel.com>>

We reverted below patch for register group overlap, add the related
insn test and mark it as xfail.  And we will remove the xfail
after we support the register overlap in GCC-15.

303195e2a6b RISC-V: Support widening register overlap for vf4/vf8

The below test suites are passed.
* The rv64gcv fully regression test.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/pr112431-16.c: New test.
* gcc.target/riscv/rvv/base/pr112431-17.c: New test.
* gcc.target/riscv/rvv/base/pr112431-18.c: New test.

Signed-off-by: Pan Li mailto:pan2...@intel.com>>
---
.../gcc.target/riscv/rvv/base/pr112431-16.c   | 68 +++
.../gcc.target/riscv/rvv/base/pr112431-17.c   | 51 ++
.../gcc.target/riscv/rvv/base/pr112431-18.c   | 51 ++
3 files changed, 170 insertions(+)
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-16.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-17.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-18.c

diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-16.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-16.c
new file mode 100644
index 000..42d11611d98
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-16.c
@@ -0,0 +1,68 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
+
+#include "riscv_vector.h"
+
+size_t __attribute__ ((noinline))
+sumation (size_t sum0, size_t sum1, size_t sum2, size_t sum3, size_t sum4,
+   size_t sum5, size_t sum6, size_t sum7)
+{
+  return sum0 + sum1 + sum2 + sum3 + sum4 + sum5 + sum6 + sum7;
+}
+
+size_t
+foo (char const *buf, size_t len)
+{
+  size_t sum = 0;
+  size_t vl = __riscv_vsetvlmax_e8m8 ();
+  size_t step = vl * 4;
+  const char *it = buf, *end = buf + len;
+  for (; it + step <= end;)
+{
+  vint8m1_t v0 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v1 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v2 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v3 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v4 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v5 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v6 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v7 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+
+  asm volatile("nop" ::: "memory");
+  vint32m4_t vw0 = __riscv_vsext_vf4_i32m4 (v0, vl);
+  vint32m4_t vw1 = __riscv_vsext_vf4_i32m4 (v1, vl);
+  vint32m4_t vw2 = __riscv_vsext_vf4_i32m4 (v2, vl);
+  vint32m4_t vw3 = __riscv_vsext_vf4_i32m4 (v3, vl);
+  vint32m4_t vw4 = __riscv_vsext_vf4_i32m4 (v4, vl);
+  vint32m4_t vw5 = __riscv_vsext_vf4_i32m4 (v5, vl);
+  vint32m4_t vw6 = __riscv_vsext_vf4_i32m4 (v6, vl);
+  vint32m4_t vw7 = __riscv_vsext_vf4_i32m4 (v7, vl);
+
+  asm volatile("nop" ::: "memory");
+  size_t sum0 = __riscv_vmv_x_s_i32m4_i32 (vw0);
+  size_t sum1 = __riscv_vmv_x_s_i32m4_i32 (vw1);
+  size_t sum2 = __riscv_vmv_x_s_i32m4_i32 (vw2);
+  size_t sum3 = __riscv_vmv_x_s_i32m4_i32 (vw3);
+  size_t sum4 = __riscv_vmv_x_s_i32m4_i32 (vw4);
+  size_t sum5 = __riscv_vmv_x_s_i32m4_i32 (vw5);
+  size_t sum6 = __riscv_vmv_x_s_i32m4_i32 (vw6);
+  size_t sum7 = __riscv_vmv_x_s_i32m4_i32 (vw7);
+
+  sum += sumation (sum0, sum1, sum2, sum3, sum4, sum5, sum6, sum7);
+}
+  return sum;
+}
+
+/* { dg-final { scan-assembler-not {vmv1r} } } */
+/* { dg-final { scan-assembler-not {vmv2r} } } */
+/* { dg-final { scan-assembler-not {vmv4r} } } */
+/* { dg-final { scan-assembler-not {vmv8r} } } */
+/* { dg-final { scan-assembler-not {csrr} { xfail riscv*-*-* } } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-17.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-17.c
new file mode 100644
index 000..9ecc62e234b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-17.c
@@ -0,0 +1,51 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
+
+#include "riscv_vector.h"
+
+size_t __attribute__ ((noinline))
+sumation (size_

[PATCH] s390: testsuite: Remove xfail for vpopct{b,h}

2024-04-21 Thread Stefan Schulze Frielinghaus
Starting with r14-9316-g7890836de20912 patterns for vpopct{b,h} are also
detected.  Thus, remove xfails.

gcc/testsuite/ChangeLog:

* gcc.target/s390/vxe/popcount-1.c: Remove xfail.
---
 Ok for mainline?

 gcc/testsuite/gcc.target/s390/vxe/popcount-1.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/testsuite/gcc.target/s390/vxe/popcount-1.c 
b/gcc/testsuite/gcc.target/s390/vxe/popcount-1.c
index 9ea835a1cf0..25ef354f963 100644
--- a/gcc/testsuite/gcc.target/s390/vxe/popcount-1.c
+++ b/gcc/testsuite/gcc.target/s390/vxe/popcount-1.c
@@ -21,7 +21,7 @@ vpopctb (uv16qi a)
 
   return r;
 }
-/* { dg-final { scan-assembler "vpopctb\t%v24,%v24" { xfail *-*-* } } } */
+/* { dg-final { scan-assembler "vpopctb\t%v24,%v24" } } */
 
 uv8hi __attribute__((noinline))
 vpopcth (uv8hi a)
@@ -34,7 +34,7 @@ vpopcth (uv8hi a)
 
   return r;
 }
-/* { dg-final { scan-assembler "vpopcth\t%v24,%v24" { xfail *-*-* } } } */
+/* { dg-final { scan-assembler "vpopcth\t%v24,%v24" } } */
 
 uv4si __attribute__((noinline))
 vpopctf (uv4si a)
-- 
2.44.0



Re: [PATCH v1] RISC-V: Add xfail test case for widening register overlap of vf4/vf8

2024-04-21 Thread juzhe.zh...@rivai.ai
LGTM.



juzhe.zh...@rivai.ai
 
From: pan2.li
Date: 2024-04-22 11:19
To: gcc-patches
CC: juzhe.zhong; kito.cheng; rdapp.gcc; Pan Li
Subject: [PATCH v1] RISC-V: Add xfail test case for widening register overlap 
of vf4/vf8
From: Pan Li 
 
We reverted below patch for register group overlap, add the related
insn test and mark it as xfail.  And we will remove the xfail
after we support the register overlap in GCC-15.
 
303195e2a6b RISC-V: Support widening register overlap for vf4/vf8
 
The below test suites are passed.
* The rv64gcv fully regression test.
 
gcc/testsuite/ChangeLog:
 
* gcc.target/riscv/rvv/base/pr112431-16.c: New test.
* gcc.target/riscv/rvv/base/pr112431-17.c: New test.
* gcc.target/riscv/rvv/base/pr112431-18.c: New test.
 
Signed-off-by: Pan Li 
---
.../gcc.target/riscv/rvv/base/pr112431-16.c   | 68 +++
.../gcc.target/riscv/rvv/base/pr112431-17.c   | 51 ++
.../gcc.target/riscv/rvv/base/pr112431-18.c   | 51 ++
3 files changed, 170 insertions(+)
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-16.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-17.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-18.c
 
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-16.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-16.c
new file mode 100644
index 000..42d11611d98
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-16.c
@@ -0,0 +1,68 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
+
+#include "riscv_vector.h"
+
+size_t __attribute__ ((noinline))
+sumation (size_t sum0, size_t sum1, size_t sum2, size_t sum3, size_t sum4,
+   size_t sum5, size_t sum6, size_t sum7)
+{
+  return sum0 + sum1 + sum2 + sum3 + sum4 + sum5 + sum6 + sum7;
+}
+
+size_t
+foo (char const *buf, size_t len)
+{
+  size_t sum = 0;
+  size_t vl = __riscv_vsetvlmax_e8m8 ();
+  size_t step = vl * 4;
+  const char *it = buf, *end = buf + len;
+  for (; it + step <= end;)
+{
+  vint8m1_t v0 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v1 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v2 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v3 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v4 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v5 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v6 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v7 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  
+  asm volatile("nop" ::: "memory");
+  vint32m4_t vw0 = __riscv_vsext_vf4_i32m4 (v0, vl);
+  vint32m4_t vw1 = __riscv_vsext_vf4_i32m4 (v1, vl);
+  vint32m4_t vw2 = __riscv_vsext_vf4_i32m4 (v2, vl);
+  vint32m4_t vw3 = __riscv_vsext_vf4_i32m4 (v3, vl);
+  vint32m4_t vw4 = __riscv_vsext_vf4_i32m4 (v4, vl);
+  vint32m4_t vw5 = __riscv_vsext_vf4_i32m4 (v5, vl);
+  vint32m4_t vw6 = __riscv_vsext_vf4_i32m4 (v6, vl);
+  vint32m4_t vw7 = __riscv_vsext_vf4_i32m4 (v7, vl);
+
+  asm volatile("nop" ::: "memory");
+  size_t sum0 = __riscv_vmv_x_s_i32m4_i32 (vw0);
+  size_t sum1 = __riscv_vmv_x_s_i32m4_i32 (vw1);
+  size_t sum2 = __riscv_vmv_x_s_i32m4_i32 (vw2);
+  size_t sum3 = __riscv_vmv_x_s_i32m4_i32 (vw3);
+  size_t sum4 = __riscv_vmv_x_s_i32m4_i32 (vw4);
+  size_t sum5 = __riscv_vmv_x_s_i32m4_i32 (vw5);
+  size_t sum6 = __riscv_vmv_x_s_i32m4_i32 (vw6);
+  size_t sum7 = __riscv_vmv_x_s_i32m4_i32 (vw7);
+
+  sum += sumation (sum0, sum1, sum2, sum3, sum4, sum5, sum6, sum7);
+}
+  return sum;
+}
+
+/* { dg-final { scan-assembler-not {vmv1r} } } */
+/* { dg-final { scan-assembler-not {vmv2r} } } */
+/* { dg-final { scan-assembler-not {vmv4r} } } */
+/* { dg-final { scan-assembler-not {vmv8r} } } */
+/* { dg-final { scan-assembler-not {csrr} { xfail riscv*-*-* } } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-17.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-17.c
new file mode 100644
index 000..9ecc62e234b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-17.c
@@ -0,0 +1,51 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
+
+#include "riscv_vector.h"
+
+size_t __attribute__ ((noinline))
+sumation (size_t sum0, size_t sum1, size_t sum2, size_t sum3)
+{
+  return sum0 + sum1 + sum2 + sum3;
+}
+
+size_t
+foo (char const *buf, size_t len)
+{
+  size_t sum = 0;
+  size_t vl = __riscv_vsetvlmax_e8m8 ();
+  size_t step = vl * 4;
+  const char *it = buf, *end = buf + len;
+  for (; it + step <= end;)
+{
+  vint8m2_t v0 = __riscv_vle8_v_i8m2 ((void *) it, vl);
+  it += vl;
+  vint8m2_t v1 = __riscv_vle8_v_i8m2 ((void *) it, vl);
+  it += vl;
+  vint8m2_t v2 = __riscv_vle8_v_i8m2 ((void *) it, vl);
+  it +

[PATCH v1] RISC-V: Add xfail test case for widening register overlap of vf4/vf8

2024-04-21 Thread pan2 . li
From: Pan Li 

We reverted below patch for register group overlap, add the related
insn test and mark it as xfail.  And we will remove the xfail
after we support the register overlap in GCC-15.

303195e2a6b RISC-V: Support widening register overlap for vf4/vf8

The below test suites are passed.
* The rv64gcv fully regression test.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/pr112431-16.c: New test.
* gcc.target/riscv/rvv/base/pr112431-17.c: New test.
* gcc.target/riscv/rvv/base/pr112431-18.c: New test.

Signed-off-by: Pan Li 
---
 .../gcc.target/riscv/rvv/base/pr112431-16.c   | 68 +++
 .../gcc.target/riscv/rvv/base/pr112431-17.c   | 51 ++
 .../gcc.target/riscv/rvv/base/pr112431-18.c   | 51 ++
 3 files changed, 170 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-16.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-17.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-18.c

diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-16.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-16.c
new file mode 100644
index 000..42d11611d98
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-16.c
@@ -0,0 +1,68 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
+
+#include "riscv_vector.h"
+
+size_t __attribute__ ((noinline))
+sumation (size_t sum0, size_t sum1, size_t sum2, size_t sum3, size_t sum4,
+ size_t sum5, size_t sum6, size_t sum7)
+{
+  return sum0 + sum1 + sum2 + sum3 + sum4 + sum5 + sum6 + sum7;
+}
+
+size_t
+foo (char const *buf, size_t len)
+{
+  size_t sum = 0;
+  size_t vl = __riscv_vsetvlmax_e8m8 ();
+  size_t step = vl * 4;
+  const char *it = buf, *end = buf + len;
+  for (; it + step <= end;)
+{
+  vint8m1_t v0 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v1 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v2 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v3 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v4 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v5 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v6 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v7 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  
+  asm volatile("nop" ::: "memory");
+  vint32m4_t vw0 = __riscv_vsext_vf4_i32m4 (v0, vl);
+  vint32m4_t vw1 = __riscv_vsext_vf4_i32m4 (v1, vl);
+  vint32m4_t vw2 = __riscv_vsext_vf4_i32m4 (v2, vl);
+  vint32m4_t vw3 = __riscv_vsext_vf4_i32m4 (v3, vl);
+  vint32m4_t vw4 = __riscv_vsext_vf4_i32m4 (v4, vl);
+  vint32m4_t vw5 = __riscv_vsext_vf4_i32m4 (v5, vl);
+  vint32m4_t vw6 = __riscv_vsext_vf4_i32m4 (v6, vl);
+  vint32m4_t vw7 = __riscv_vsext_vf4_i32m4 (v7, vl);
+
+  asm volatile("nop" ::: "memory");
+  size_t sum0 = __riscv_vmv_x_s_i32m4_i32 (vw0);
+  size_t sum1 = __riscv_vmv_x_s_i32m4_i32 (vw1);
+  size_t sum2 = __riscv_vmv_x_s_i32m4_i32 (vw2);
+  size_t sum3 = __riscv_vmv_x_s_i32m4_i32 (vw3);
+  size_t sum4 = __riscv_vmv_x_s_i32m4_i32 (vw4);
+  size_t sum5 = __riscv_vmv_x_s_i32m4_i32 (vw5);
+  size_t sum6 = __riscv_vmv_x_s_i32m4_i32 (vw6);
+  size_t sum7 = __riscv_vmv_x_s_i32m4_i32 (vw7);
+
+  sum += sumation (sum0, sum1, sum2, sum3, sum4, sum5, sum6, sum7);
+}
+  return sum;
+}
+
+/* { dg-final { scan-assembler-not {vmv1r} } } */
+/* { dg-final { scan-assembler-not {vmv2r} } } */
+/* { dg-final { scan-assembler-not {vmv4r} } } */
+/* { dg-final { scan-assembler-not {vmv8r} } } */
+/* { dg-final { scan-assembler-not {csrr} { xfail riscv*-*-* } } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-17.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-17.c
new file mode 100644
index 000..9ecc62e234b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-17.c
@@ -0,0 +1,51 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
+
+#include "riscv_vector.h"
+
+size_t __attribute__ ((noinline))
+sumation (size_t sum0, size_t sum1, size_t sum2, size_t sum3)
+{
+  return sum0 + sum1 + sum2 + sum3;
+}
+
+size_t
+foo (char const *buf, size_t len)
+{
+  size_t sum = 0;
+  size_t vl = __riscv_vsetvlmax_e8m8 ();
+  size_t step = vl * 4;
+  const char *it = buf, *end = buf + len;
+  for (; it + step <= end;)
+{
+  vint8m2_t v0 = __riscv_vle8_v_i8m2 ((void *) it, vl);
+  it += vl;
+  vint8m2_t v1 = __riscv_vle8_v_i8m2 ((void *) it, vl);
+  it += vl;
+  vint8m2_t v2 = __riscv_vle8_v_i8m2 ((void *) it, vl);
+  it += vl;
+  vint8m2_t v3 = __riscv_vle8_v_i8m2 ((void *) it, vl);
+  it += vl;
+
+  asm volatile("nop" ::: "memory");
+  vint32m8_t vw0 = __riscv_vsext_vf4_i32m8 (v0, vl);
+  

Re: [PATCH] c++: Check if allocation functions are xobj members [PR114078]

2024-04-21 Thread Patrick Palka
On Sat, 20 Apr 2024, Nathaniel Shead wrote:

> Bootstrapped and regtested on x86_64-pc-linux-gnu, OK for trunk?
> 
> -- >8 --
> 
> A class allocation member function is implicitly 'static' by
> [class.free] p3, so cannot have an explicit object parameter.
> 
>   PR c++/114078
> 
> gcc/cp/ChangeLog:
> 
>   * decl.cc (grokdeclarator): Check allocation functions for xobj
>   parameters.
> 
> gcc/testsuite/ChangeLog:
> 
>   * g++.dg/cpp23/explicit-obj-ops-alloc.C: New test.

LGTM

> 
> Signed-off-by: Nathaniel Shead 
> ---
>  gcc/cp/decl.cc  |  6 ++
>  gcc/testsuite/g++.dg/cpp23/explicit-obj-ops-alloc.C | 11 +++
>  2 files changed, 17 insertions(+)
>  create mode 100644 gcc/testsuite/g++.dg/cpp23/explicit-obj-ops-alloc.C
> 
> diff --git a/gcc/cp/decl.cc b/gcc/cp/decl.cc
> index 65ab64885ff..2af026d255d 100644
> --- a/gcc/cp/decl.cc
> +++ b/gcc/cp/decl.cc
> @@ -13728,6 +13728,12 @@ grokdeclarator (const cp_declarator *declarator,
>   inform (DECL_SOURCE_LOCATION (xobj_parm),
>   "explicit object parameter declared here");
> }
> + if (unqualified_id
> + && identifier_p (unqualified_id)
> + && IDENTIFIER_NEWDEL_OP_P (unqualified_id))
> +   error_at (DECL_SOURCE_LOCATION (xobj_parm),
> + "%qD cannot be an explicit object member "
> + "function", unqualified_id);
> }
> }
>   tree pushed_scope = NULL_TREE;
> diff --git a/gcc/testsuite/g++.dg/cpp23/explicit-obj-ops-alloc.C 
> b/gcc/testsuite/g++.dg/cpp23/explicit-obj-ops-alloc.C
> new file mode 100644
> index 000..8a277db7ef5
> --- /dev/null
> +++ b/gcc/testsuite/g++.dg/cpp23/explicit-obj-ops-alloc.C
> @@ -0,0 +1,11 @@
> +// PR c++/114078
> +// { dg-do compile { target c++23 } }
> +
> +using size_t = decltype(sizeof(0));
> +
> +struct S {
> +  void* operator new(this size_t);  // { dg-error "explicit object" }
> +  void* operator new[](this size_t);  // { dg-error "explicit object" }
> +  void operator delete(this void*);  // { dg-error "explicit object" }
> +  void operator delete[](this void*);  // { dg-error "explicit object" }
> +};
> -- 
> 2.43.2
> 
> 



Re: [PATCH] c++: Fix ICE with xobj parms and maybe incomplete decl-specifiers

2024-04-21 Thread Patrick Palka
> Bootstrapped and regtested on x86_64-pc-linux-gnu, OK for trunk?
> 
> -- >8 --
> 
> This fixes a null dereference issue when decl_specifiers.type is not yet
> provided.
> 
> gcc/cp/ChangeLog:
> 
>   * parser.cc (cp_parser_parameter_declaration): Check if
>   decl_specifiers.type is null.
> 
> gcc/testsuite/ChangeLog:
> 
>   * g++.dg/cpp23/explicit-obj-basic7.C: New test.

LGTM

> 
> Signed-off-by: Nathaniel Shead 
> ---
>  gcc/cp/parser.cc | 5 +++--
>  gcc/testsuite/g++.dg/cpp23/explicit-obj-basic7.C | 9 +
>  2 files changed, 12 insertions(+), 2 deletions(-)
>  create mode 100644 gcc/testsuite/g++.dg/cpp23/explicit-obj-basic7.C
> 
> diff --git a/gcc/cp/parser.cc b/gcc/cp/parser.cc
> index 50d3ad35b61..97ee2650dc4 100644
> --- a/gcc/cp/parser.cc
> +++ b/gcc/cp/parser.cc
> @@ -25780,8 +25780,9 @@ cp_parser_parameter_declaration (cp_parser *parser,
>  }
>  
>if (xobj_param_p
> -  && (declarator ? declarator->parameter_pack_p
> -  : PACK_EXPANSION_P (decl_specifiers.type)))
> +  && ((declarator && declarator->parameter_pack_p)
> +   || (decl_specifiers.type
> +   && PACK_EXPANSION_P (decl_specifiers.type
>  {
>location_t xobj_param
>   = make_location (decl_specifiers.locations[ds_this],
> diff --git a/gcc/testsuite/g++.dg/cpp23/explicit-obj-basic7.C 
> b/gcc/testsuite/g++.dg/cpp23/explicit-obj-basic7.C
> new file mode 100644
> index 000..a474e97fc18
> --- /dev/null
> +++ b/gcc/testsuite/g++.dg/cpp23/explicit-obj-basic7.C
> @@ -0,0 +1,9 @@
> +// { dg-do compile { target c++23 } }
> +
> +// Shouldn't ICE
> +struct S {
> +  void a(this long);
> +  void b(this const long);
> +  void c(this long unsigned);
> +  void c(this signed);
> +};
> -- 
> 2.43.2
> 
> 



[PATCH] add rlwinm pattern for DImode for constant building

2024-04-21 Thread Jiufu Guo
Hi,

'rlwinm' pattern is already well used for SImode.  As this instruction
can touch the whole 64bit register, so some constants in 64bit(DImode)
can be built via 'lis/li+rlwinm'.  To achieve this, a new pattern for
'rlwinm' is added, and 'rs6000_emit_set_long_const' is updated to check
if a constant is able to be built by 'lis/li; rlwinm'.

Bootstrap and regtest pass on ppc64{,le}.

Is this patch ok for trunk (when stage1 is open)?

Jeff (Jiufu Guo).

gcc/ChangeLog:

* config/rs6000/rs6000-protos.h (can_be_rotated_to_lowbits): Add new
parameter.
* config/rs6000/rs6000.cc (can_be_built_by_li_lis_and_rlwinm): New 
function.
(rs6000_emit_set_long_const): Generate 'lis/li+rlwinm'.
(can_be_rotated_to_lowbits): Add new parameter.
* config/rs6000/rs6000.md (rlwinm_di_mask): New pattern.

gcc/testsuite/ChangeLog:

* gcc.target/powerpc/pr93012.c: Update to match 'rlwinm'.
* gcc.target/powerpc/rlwinm4di-1.c: New test.
* gcc.target/powerpc/rlwinm4di-2.c: New test.
* gcc.target/powerpc/rlwinm4di.c: New test.
* gcc.target/powerpc/rlwinm4di.h: New test.

---
 gcc/config/rs6000/rs6000-protos.h |  2 +-
 gcc/config/rs6000/rs6000.cc   | 65 ++-
 gcc/config/rs6000/rs6000.md   | 18 +
 gcc/testsuite/gcc.target/powerpc/pr93012.c|  2 +-
 .../gcc.target/powerpc/rlwinm4di-1.c  | 25 +++
 .../gcc.target/powerpc/rlwinm4di-2.c  | 19 ++
 gcc/testsuite/gcc.target/powerpc/rlwinm4di.c  |  6 ++
 gcc/testsuite/gcc.target/powerpc/rlwinm4di.h  | 25 +++
 8 files changed, 158 insertions(+), 4 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/rlwinm4di-1.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/rlwinm4di-2.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/rlwinm4di.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/rlwinm4di.h

diff --git a/gcc/config/rs6000/rs6000-protos.h 
b/gcc/config/rs6000/rs6000-protos.h
index 09a57a806fa..10505a8061a 100644
--- a/gcc/config/rs6000/rs6000-protos.h
+++ b/gcc/config/rs6000/rs6000-protos.h
@@ -36,7 +36,7 @@ extern bool vspltisw_vupkhsw_constant_p (rtx, machine_mode, 
int * = nullptr);
 extern int vspltis_shifted (rtx);
 extern HOST_WIDE_INT const_vector_elt_as_int (rtx, unsigned int);
 extern bool macho_lo_sum_memory_operand (rtx, machine_mode);
-extern bool can_be_rotated_to_lowbits (unsigned HOST_WIDE_INT, int, int *);
+extern bool can_be_rotated_to_lowbits (unsigned HOST_WIDE_INT, int, int *, 
bool = false);
 extern bool can_be_rotated_to_positive_16bits (HOST_WIDE_INT);
 extern bool can_be_rotated_to_negative_15bits (HOST_WIDE_INT);
 extern int num_insns_constant (rtx, machine_mode);
diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index 6ba9df4f02e..853eaede673 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -10454,6 +10454,51 @@ can_be_built_by_li_and_rldic (HOST_WIDE_INT c, int 
*shift, HOST_WIDE_INT *mask)
   return false;
 }
 
+/* Check if value C can be generated by 2 instructions, one instruction
+   is li/lis, another instruction is rlwinm.  */
+
+static bool
+can_be_built_by_li_lis_and_rlwinm (HOST_WIDE_INT c, HOST_WIDE_INT *val,
+  int *shift, HOST_WIDE_INT *mask)
+{
+  unsigned HOST_WIDE_INT low = c & 0xULL;
+  unsigned HOST_WIDE_INT high = (c >> 32) & 0xULL;
+  unsigned HOST_WIDE_INT v;
+
+  /* diff of high and low (high ^ low) should be the mask position.  */
+  unsigned HOST_WIDE_INT m = low ^ high;
+  int tz = ctz_hwi (m);
+  int lz = clz_hwi (m);
+  if (m != 0)
+m = ((HOST_WIDE_INT_M1U >> (lz + tz)) << tz);
+  if (high != 0)
+m = ~m;
+  v = high != 0 ? high : ((low | ~m) & 0x);
+
+  if ((high != 0) && ((v & m) != low || lz < 33 || tz < 1))
+return false;
+
+  /* rotl32 on positive/negative value of 'li' 15/16bits.  */
+  int n;
+  if (!can_be_rotated_to_lowbits (v, 15, &n, true)
+  && !can_be_rotated_to_lowbits ((~v) & 0xULL, 15, &n, true))
+{
+  /* rotate32 from a negative value of 'lis'.  */
+  if (!can_be_rotated_to_lowbits (v & 0xULL, 16, &n, true))
+   return false;
+  n += 16;
+}
+  n = 32 - (n % 32);
+  n %= 32;
+  v = ((v >> n) | (v << (32 - n))) & 0x;
+  if (v & 0x8000ULL)
+v |= HOST_WIDE_INT_M1U << 32;
+  *mask = m;
+  *val = v;
+  *shift = n;
+  return true;
+}
+
 /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode.
Output insns to set DEST equal to the constant C as a series of
lis, ori and shl instructions.  If NUM_INSNS is not NULL, then
@@ -10553,6 +10598,18 @@ rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c, 
int *num_insns)
   return;
 }
 
+  HOST_WIDE_INT val;
+  if (can_be_built_by_li_lis_and_rlwinm (c, &val, &shift, &mask))
+{
+  /* li/lis; rlwinm */
+  count_or_emit_insn (temp, GEN_INT (val));
+  rtx low = temp ? gen_lowp

Re: [pushed] c++/modules: make bits_in/out move-constructible

2024-04-21 Thread Christophe Lyon
Hi Patrick,

On Sat, 13 Apr 2024 at 22:12, Patrick Palka  wrote:
>
> Pushed as obvious after verifying C++11 bootstrap is restored.

I guess this also fixes the bootstrap_ubsan breakage on aarch64
reported by Linaro CI?
See https://linaro.atlassian.net/browse/GNU-1199
(I think you also received a notification about this a few days ago?)

Thanks,

Christophe

>
> -- >8 --
>
> gcc/cp/ChangeLog:
>
> * module.cc (struct bytes_in::bits_in): Define defaulted
> move ctor.
> (struct bytes_out::bits_out): Likewise.
> ---
>  gcc/cp/module.cc | 2 ++
>  1 file changed, 2 insertions(+)
>
> diff --git a/gcc/cp/module.cc b/gcc/cp/module.cc
> index bbed82652d4..c6f71e11515 100644
> --- a/gcc/cp/module.cc
> +++ b/gcc/cp/module.cc
> @@ -706,6 +706,7 @@ struct bytes_in::bits_in {
>  bflush ();
>}
>
> +  bits_in(bits_in&&) = default;
>bits_in(const bits_in&) = delete;
>bits_in& operator=(const bits_in&) = delete;
>
> @@ -752,6 +753,7 @@ struct bytes_out::bits_out {
>  bflush ();
>}
>
> +  bits_out(bits_out&&) = default;
>bits_out(const bits_out&) = delete;
>bits_out& operator=(const bits_out&) = delete;
>
> --
> 2.44.0.591.g8f7582d995
>


Re: [PATCH v1] RISC-V: Add xfail test case for highpart register overlap of vx/vf widen

2024-04-21 Thread 钟居哲
LGTM



juzhe.zh...@rivai.ai
 
From: pan2.li
Date: 2024-04-21 13:01
To: gcc-patches
CC: juzhe.zhong; kito.cheng; rdapp.gcc; Pan Li
Subject: [PATCH v1] RISC-V: Add xfail test case for highpart register overlap 
of vx/vf widen
From: Pan Li 
 
We reverted below patch for register group overlap, add the related
insn test and mark it as xfail.  And we will remove the xfail
after we support the register overlap in GCC-15.
 
a23415d7572 RISC-V: Support highpart register overlap for widen vx/vf 
instructions
 
The below test suites are passed.
* The rv64gcv fully regression test.
 
gcc/testsuite/ChangeLog:
 
* gcc.target/riscv/rvv/base/pr112431-22.c: New test.
* gcc.target/riscv/rvv/base/pr112431-23.c: New test.
* gcc.target/riscv/rvv/base/pr112431-24.c: New test.
* gcc.target/riscv/rvv/base/pr112431-25.c: New test.
* gcc.target/riscv/rvv/base/pr112431-26.c: New test.
* gcc.target/riscv/rvv/base/pr112431-27.c: New test.
 
Signed-off-by: Pan Li 
---
.../gcc.target/riscv/rvv/base/pr112431-22.c   | 188 ++
.../gcc.target/riscv/rvv/base/pr112431-23.c   | 119 +++
.../gcc.target/riscv/rvv/base/pr112431-24.c   |  86 
.../gcc.target/riscv/rvv/base/pr112431-25.c   | 104 ++
.../gcc.target/riscv/rvv/base/pr112431-26.c   |  68 +++
.../gcc.target/riscv/rvv/base/pr112431-27.c   |  51 +
6 files changed, 616 insertions(+)
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-22.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-23.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-24.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-25.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-26.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-27.c
 
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-22.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-22.c
new file mode 100644
index 000..ac56703c75c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-22.c
@@ -0,0 +1,188 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
+
+#include "riscv_vector.h"
+
+size_t __attribute__ ((noinline))
+sumation (size_t sum0, size_t sum1, size_t sum2, size_t sum3, size_t sum4,
+   size_t sum5, size_t sum6, size_t sum7, size_t sum8, size_t sum9,
+   size_t sum10, size_t sum11, size_t sum12, size_t sum13, size_t sum14,
+   size_t sum15)
+{
+  return sum0 + sum1 + sum2 + sum3 + sum4 + sum5 + sum6 + sum7 + sum8 + sum9
+ + sum10 + sum11 + sum12 + sum13 + sum14 + sum15;
+}
+
+size_t
+foo (char const *buf, size_t len)
+{
+  size_t sum = 0;
+  size_t vl = __riscv_vsetvlmax_e8m8 ();
+  size_t step = vl * 4;
+  const char *it = buf, *end = buf + len;
+  for (; it + step <= end;)
+{
+  vint8m1_t v0 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v1 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v2 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v3 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v4 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v5 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v6 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v7 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v8 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v9 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v10 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v11 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v12 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v13 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v14 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  vint8m1_t v15 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+  it += vl;
+  
+  asm volatile("nop" ::: "memory");
+  vint16m2_t vw0 = __riscv_vwadd_vx_i16m2 (v0, 33, vl);
+  vint16m2_t vw1 = __riscv_vwadd_vx_i16m2 (v1, 33, vl);
+  vint16m2_t vw2 = __riscv_vwadd_vx_i16m2 (v2, 33, vl);
+  vint16m2_t vw3 = __riscv_vwadd_vx_i16m2 (v3, 33, vl);
+  vint16m2_t vw4 = __riscv_vwadd_vx_i16m2 (v4, 33, vl);
+  vint16m2_t vw5 = __riscv_vwadd_vx_i16m2 (v5, 33, vl);
+  vint16m2_t vw6 = __riscv_vwadd_vx_i16m2 (v6, 33, vl);
+  vint16m2_t vw7 = __riscv_vwadd_vx_i16m2 (v7, 33, vl);
+  vint16m2_t vw8 = __riscv_vwadd_vx_i16m2 (v8, 33, vl);
+  vint16m2_t vw9 = __riscv_vwadd_vx_i16m2 (v9, 33, vl);
+  vint16m2_t vw10 = __riscv_vwadd_vx_i16m2 (v10, 33, vl);
+  vint16m2_t vw11 = __riscv_vwadd_vx_i16m2 (v11, 33, vl);
+  vint16m2_t vw12 = __riscv_vwadd_vx_i16m2 (v12, 33, vl);
+  vint16m2_t vw13 = __riscv_vwadd_vx_i16m2 (v13, 33, vl);
+  vint16m2_t vw14 = __r

[patch,avr,applied] PR114794 - Tweak divmodqi4

2024-04-21 Thread Georg-Johann Lay

Instead of jumping to a place that ROLs r_arg1 (with C=0),
LSL r_arg1 can be performed prior to the loop.  This
reduces the number of loopings from 9 to 8.

Applied as obvious.

Johann

AVR: target/114794 - Tweak __udivmodqi4

libgcc/
PR target/114794
* config/avr/lib1funcs.S (__udivmodqi4): Tweak.

diff --git a/libgcc/config/avr/lib1funcs.S b/libgcc/config/avr/lib1funcs.S
index 535510ab867..af4d7d97016 100644
--- a/libgcc/config/avr/lib1funcs.S
+++ b/libgcc/config/avr/lib1funcs.S
@@ -1339,9 +1339,9 @@ DEFUN __umulsidi3

 #if defined (L_udivmodqi4)
 DEFUN __udivmodqi4
-   sub r_rem,r_rem ; clear remainder and carry
-   ldi r_cnt,9 ; init loop counter
-   rjmp__udivmodqi4_ep ; jump to entry point
+   clr r_rem   ; clear remainder
+   ldi r_cnt,8 ; init loop counter
+   lsl r_arg1  ; shift dividend
 __udivmodqi4_loop:
rol r_rem   ; shift dividend into remainder
cp  r_rem,r_arg2; compare remainder & divisor


[PATCH, aarch64] v2: Preparatory patch to place target independent and,dependent changed code in one file

2024-04-21 Thread Ajit Agarwal
Hello Alex/Richard:

All review comments are addressed and changes are made to transform_for_base
function as per consensus.

Common infrastructure of load store pair fusion is divided into target
independent and target dependent changed code.

Target independent code is the Generic code with pure virtual function
to interface betwwen target independent and dependent code.

Target dependent code is the implementation of pure virtual function for
aarch64 target and the call to target independent code.

Bootstrapped on aarch64-linux-gnu.

Thanks & Regards
Ajit



aarch64: Preparatory patch to place target independent and
dependent changed code in one file

Common infrastructure of load store pair fusion is divided into target
independent and target dependent changed code.

Target independent code is the Generic code with pure virtual function
to interface betwwen target independent and dependent code.

Target dependent code is the implementation of pure virtual function for
aarch64 target and the call to target independent code.

2024-04-21  Ajit Kumar Agarwal  

gcc/ChangeLog:

* config/aarch64/aarch64-ldp-fusion.cc: Place target
independent and dependent changed code
---
 gcc/config/aarch64/aarch64-ldp-fusion.cc | 484 +++
 1 file changed, 325 insertions(+), 159 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-ldp-fusion.cc 
b/gcc/config/aarch64/aarch64-ldp-fusion.cc
index 365dcf48b22..83a917e1d20 100644
--- a/gcc/config/aarch64/aarch64-ldp-fusion.cc
+++ b/gcc/config/aarch64/aarch64-ldp-fusion.cc
@@ -138,6 +138,189 @@ struct alt_base
   poly_int64 offset;
 };
 
+// Virtual base class for load/store walkers used in alias analysis.
+struct alias_walker
+{
+  virtual bool conflict_p (int &budget) const = 0;
+  virtual insn_info *insn () const = 0;
+  virtual bool valid () const = 0;
+  virtual void advance () = 0;
+};
+
+// Forward declaration to be used inside the aarch64_pair_fusion class.
+bool ldp_operand_mode_ok_p (machine_mode mode);
+rtx aarch64_destructure_load_pair (rtx regs[2], rtx pattern);
+rtx aarch64_destructure_store_pair (rtx regs[2], rtx pattern);
+rtx aarch64_gen_writeback_pair (rtx wb_effect, rtx pair_mem, rtx regs[2],
+   bool load_p);
+enum class writeback{
+  WRITEBACK_PAIR_P,
+  WRITEBACK
+};
+
+struct pair_fusion {
+
+  pair_fusion ()
+  {
+calculate_dominance_info (CDI_DOMINATORS);
+df_analyze ();
+crtl->ssa = new rtl_ssa::function_info (cfun);
+  };
+  // Return true if GPR is FP or SIMD accesses, passed
+  // with GPR reg_op rtx, machine mode and load_p.
+  virtual bool fpsimd_op_p (rtx, machine_mode, bool)
+  {
+return false;
+  }
+  // Return true if pair operand mode is ok. Passed with
+  // machine mode.
+  virtual bool pair_operand_mode_ok_p (machine_mode mode) = 0;
+  // Return true if reg operand is ok, passed with load_p,
+  // reg_op rtx and machine mode.
+  virtual bool pair_reg_operand_ok_p (bool load_p, rtx reg_op,
+ machine_mode mem_mode) = 0;
+  // Return alias check limit.
+  virtual int pair_mem_alias_check_limit () = 0;
+  // Return true if there is writeback opportunities. Passed
+  // with enum writeback.
+  virtual bool handle_writeback_opportunities (enum writeback wback) = 0 ;
+  // Return true if mem ok ldp stp policy model passed with
+  // rtx mem, load_p and machine mode.
+  virtual bool pair_mem_ok_with_policy (rtx first_mem, bool load_p,
+   machine_mode mode) = 0;
+  // Gen load store mem pair. Return load store rtx passed
+  // with arguments load store pattern, writeback rtx and
+  // load_p.
+  virtual rtx gen_mem_pair (rtx *pats, rtx writeback,
+   bool load_p) = 0;
+  // Return true if memory writeback can be promoted, passed
+  // with insn, rtx pattern and load_p. load_p is set by this
+  // hook.
+  virtual bool pair_mem_promote_writeback_p (insn_info *, rtx, bool &)
+  {
+ return false;
+  }
+  // Return true if we track loads.
+  virtual bool track_loads_p ()
+  {
+return true;
+  }
+  // Return true if we track stores.
+  virtual bool track_stores_p ()
+  {
+return true;
+  }
+  // Return true if offset is out of range.
+  virtual bool pair_mem_out_of_range_p (HOST_WIDE_INT off) = 0;
+  // Return destructure pair. Passed with rtx reg, insn pattern
+  // and load_p.
+  virtual rtx gen_destructure_pair (rtx regs[2], rtx rti, bool load_p) = 0;
+  // Return writeback pair. Passed with rtx writeback effect, mem rtx
+  // regs rtx and load_p.
+  virtual rtx gen_writeback_pair (rtx wb_effect, rtx mem,
+ rtx regs[2], bool load_p) = 0;
+  void ldp_fusion_bb (bb_info *bb);
+  insn_info * find_trailing_add (insn_info *insns[2],
+const insn_range_info &pair_range,
+int initial_writeback,
+rtx *writeback_effect,
+d