date:20230426

[PATCH] RISC-V: Add required tls to read thread pointer test

2023-04-26 Thread Pan Li via Gcc-patches

From: Pan Li 

The read-thread-pointer test may require the gcc configured
with --enable-tls. If no, there x4 (aka tp) register will not
be presented in the assembly code.

This patch requires the tls for the dg checking. It will perform
the test checking if --enable-tls and mark the test as unsupported
if --disable-tls.

Configured with --enable-tls:
=== gcc Summary ===
of expected passes16

Configured with --disable-tls:
=== gcc Summary ===
of unsupported tests  8

gcc/testsuite/ChangeLog:

* gcc.target/riscv/read-thread-pointer.c: Add required tls.

Signed-off-by: Pan Li 
---
 gcc/testsuite/gcc.target/riscv/read-thread-pointer.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/gcc/testsuite/gcc.target/riscv/read-thread-pointer.c 
b/gcc/testsuite/gcc.target/riscv/read-thread-pointer.c
index 401fb421129..5f460b5f746 100644
--- a/gcc/testsuite/gcc.target/riscv/read-thread-pointer.c
+++ b/gcc/testsuite/gcc.target/riscv/read-thread-pointer.c
@@ -1,4 +1,5 @@
 /* { dg-do compile } */
+/* { dg-require-effective-target tls_native } */
 
 void *get_tp()
 {
-- 
2.34.1

[PATCH v3] RISCV: Add vector psabi checking.

2023-04-26 Thread yanzhang.wang--- via Gcc-patches

From: Yanzhang Wang 

This patch adds support to check function's argument or return is vector type
and throw warning if yes.

gcc/ChangeLog:

* config/riscv/riscv.cc:
(riscv_scalable_vector_type_p): Determine whether the type is scalable 
vector.
(riscv_arg_has_vector): Determine whether the arg is vector type.
(riscv_pass_in_vector_p): Check the vector type param is passed by 
value.
(riscv_get_arg_info): Add the checking.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/vector-abi-1.c: New test.
* gcc.target/riscv/vector-abi-2.c: New test.
* gcc.target/riscv/vector-abi-3.c: New test.
* gcc.target/riscv/vector-abi-4.c: New test.
* gcc.target/riscv/vector-abi-5.c: New test.

Signed-off-by: Yanzhang Wang 
Co-authored-by: Kito Cheng 
---
 gcc/config/riscv/riscv.cc | 73 +++
 gcc/testsuite/gcc.target/riscv/vector-abi-1.c | 14 
 gcc/testsuite/gcc.target/riscv/vector-abi-2.c | 14 
 gcc/testsuite/gcc.target/riscv/vector-abi-3.c | 14 
 gcc/testsuite/gcc.target/riscv/vector-abi-4.c | 16 
 gcc/testsuite/gcc.target/riscv/vector-abi-5.c | 15 
 6 files changed, 146 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/riscv/vector-abi-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/vector-abi-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/vector-abi-3.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/vector-abi-4.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/vector-abi-5.c

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 76eee4a55e9..06e9fe7d924 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -3728,6 +3728,76 @@ riscv_pass_fpr_pair (machine_mode mode, unsigned regno1,
   GEN_INT (offset2;
 }
 
+/* Use the TYPE_SIZE to distinguish the type with vector_size attribute and
+   intrinsic vector type.  Because we can't get the decl for the params.  */
+
+static bool
+riscv_scalable_vector_type_p (const_tree type)
+{
+  tree size = TYPE_SIZE (type);
+  if (size && TREE_CODE (size) == INTEGER_CST)
+return false;
+
+  /* For the data type like vint32m1_t, the size code is POLY_INT_CST.  */
+  return true;
+}
+
+static bool
+riscv_arg_has_vector (const_tree type)
+{
+  bool is_vector = false;
+
+  switch (TREE_CODE (type))
+{
+case RECORD_TYPE:
+  if (!COMPLETE_TYPE_P (type))
+   break;
+
+  for (tree f = TYPE_FIELDS (type); f; f = DECL_CHAIN (f))
+   if (TREE_CODE (f) == FIELD_DECL)
+ {
+   tree field_type = TREE_TYPE (f);
+   if (!TYPE_P (field_type))
+ break;
+
+   /* Ignore it if it's fixed length vector.  */
+   if (VECTOR_TYPE_P (field_type))
+ is_vector = riscv_scalable_vector_type_p (field_type);
+   else
+ is_vector = riscv_arg_has_vector (field_type);
+ }
+
+  break;
+
+case VECTOR_TYPE:
+  is_vector = riscv_scalable_vector_type_p (type);
+  break;
+
+default:
+  is_vector = false;
+  break;
+}
+
+  return is_vector;
+}
+
+/* Pass the type to check whether it's a vector type or contains vector type.
+   Only check the value type and no checking for vector pointer type.  */
+
+static void
+riscv_pass_in_vector_p (const_tree type)
+{
+  static int warned = 0;
+
+  if (type && riscv_arg_has_vector (type) && !warned)
+{
+  warning (OPT_Wpsabi, "ABI for the scalable vector type is currently in "
+  "experimental stage and may changes in the upcoming version of "
+  "GCC.");
+  warned = 1;
+}
+}
+
 /* Fill INFO with information about a single argument, and return an
RTL pattern to pass or return the argument.  CUM is the cumulative
state for earlier arguments.  MODE is the mode of this argument and
@@ -3812,6 +3882,9 @@ riscv_get_arg_info (struct riscv_arg_info *info, const 
CUMULATIVE_ARGS *cum,
}
 }
 
+  /* Only check existing of vector type.  */
+  riscv_pass_in_vector_p (type);
+
   /* Work out the size of the argument.  */
   num_bytes = type ? int_size_in_bytes (type) : GET_MODE_SIZE 
(mode).to_constant ();
   num_words = (num_bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
diff --git a/gcc/testsuite/gcc.target/riscv/vector-abi-1.c 
b/gcc/testsuite/gcc.target/riscv/vector-abi-1.c
new file mode 100644
index 000..969f14277a4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/vector-abi-1.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O0 -march=rv64gcv -mabi=lp64d" } */
+
+#include "riscv_vector.h"
+
+void
+fun (vint32m1_t a) { } /* { dg-warning "the scalable vector type" } */
+
+void
+bar ()
+{
+  vint32m1_t a;
+  fun (a);
+}
diff --git a/gcc/testsuite/gcc.target/riscv/vector-abi-2.c 
b/gcc/testsuite/gcc.target/riscv/vector-abi-2.c
new file mode 100644
index 000..b752760b76f
--- /dev/null
+++

[PATCH v2] RISCV: Add vector psabi checking.

2023-04-26 Thread yanzhang.wang--- via Gcc-patches

From: Yanzhang Wang 

This patch adds support to check function's argument or return is vector type
and throw warning if yes.

gcc/ChangeLog:

* config/riscv/riscv.cc:
(riscv_scalable_vector_type_p): Determine whether the type is scalable 
vector.
(riscv_arg_has_vector): Determine whether the arg is vector type.
(riscv_pass_in_vector_p): Check the vector type param is passed by 
value.
(riscv_get_arg_info): Add the checking.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/vector-abi-1.c: New test.
* gcc.target/riscv/vector-abi-2.c: New test.
* gcc.target/riscv/vector-abi-3.c: New test.
* gcc.target/riscv/vector-abi-4.c: New test.
* gcc.target/riscv/vector-abi-5.c: New test.

Signed-off-by: Yanzhang Wang 
Co-authored-by: Kito Cheng 
---
 gcc/config/riscv/riscv.cc | 73 +++
 gcc/testsuite/gcc.target/riscv/vector-abi-1.c | 14 
 gcc/testsuite/gcc.target/riscv/vector-abi-2.c | 14 
 gcc/testsuite/gcc.target/riscv/vector-abi-3.c | 14 
 gcc/testsuite/gcc.target/riscv/vector-abi-4.c | 16 
 gcc/testsuite/gcc.target/riscv/vector-abi-5.c | 15 
 6 files changed, 146 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/riscv/vector-abi-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/vector-abi-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/vector-abi-3.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/vector-abi-4.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/vector-abi-5.c

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 76eee4a55e9..06e9fe7d924 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -3728,6 +3728,76 @@ riscv_pass_fpr_pair (machine_mode mode, unsigned regno1,
   GEN_INT (offset2;
 }
 
+/* Use the TYPE_SIZE to distinguish the type with vector_size attribute and
+   intrinsic vector type.  Because we can't get the decl for the params.  */
+
+static bool
+riscv_scalable_vector_type_p (const_tree type)
+{
+  tree size = TYPE_SIZE (type);
+  if (size && TREE_CODE (size) == INTEGER_CST)
+return false;
+
+  /* For the data type like vint32m1_t, the size code is POLY_INT_CST.  */
+  return true;
+}
+
+static bool
+riscv_arg_has_vector (const_tree type)
+{
+  bool is_vector = false;
+
+  switch (TREE_CODE (type))
+{
+case RECORD_TYPE:
+  if (!COMPLETE_TYPE_P (type))
+   break;
+
+  for (tree f = TYPE_FIELDS (type); f; f = DECL_CHAIN (f))
+   if (TREE_CODE (f) == FIELD_DECL)
+ {
+   tree field_type = TREE_TYPE (f);
+   if (!TYPE_P (field_type))
+ break;
+
+   /* Ignore it if it's fixed length vector.  */
+   if (VECTOR_TYPE_P (field_type))
+ is_vector = riscv_scalable_vector_type_p (field_type);
+   else
+ is_vector = riscv_arg_has_vector (field_type);
+ }
+
+  break;
+
+case VECTOR_TYPE:
+  is_vector = riscv_scalable_vector_type_p (type);
+  break;
+
+default:
+  is_vector = false;
+  break;
+}
+
+  return is_vector;
+}
+
+/* Pass the type to check whether it's a vector type or contains vector type.
+   Only check the value type and no checking for vector pointer type.  */
+
+static void
+riscv_pass_in_vector_p (const_tree type)
+{
+  static int warned = 0;
+
+  if (type && riscv_arg_has_vector (type) && !warned)
+{
+  warning (OPT_Wpsabi, "ABI for the scalable vector type is currently in "
+  "experimental stage and may changes in the upcoming version of "
+  "GCC.");
+  warned = 1;
+}
+}
+
 /* Fill INFO with information about a single argument, and return an
RTL pattern to pass or return the argument.  CUM is the cumulative
state for earlier arguments.  MODE is the mode of this argument and
@@ -3812,6 +3882,9 @@ riscv_get_arg_info (struct riscv_arg_info *info, const 
CUMULATIVE_ARGS *cum,
}
 }
 
+  /* Only check existing of vector type.  */
+  riscv_pass_in_vector_p (type);
+
   /* Work out the size of the argument.  */
   num_bytes = type ? int_size_in_bytes (type) : GET_MODE_SIZE 
(mode).to_constant ();
   num_words = (num_bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
diff --git a/gcc/testsuite/gcc.target/riscv/vector-abi-1.c 
b/gcc/testsuite/gcc.target/riscv/vector-abi-1.c
new file mode 100644
index 000..114ee6de483
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/vector-abi-1.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O0 -march=rv64gcv -mabi=lp64d" } */
+
+#include "riscv_vector.h"
+
+void
+fun (vint32m1_t a) { } /* { dg-warning "the vector type" } */
+
+void
+bar ()
+{
+  vint32m1_t a;
+  fun (a);
+}
diff --git a/gcc/testsuite/gcc.target/riscv/vector-abi-2.c 
b/gcc/testsuite/gcc.target/riscv/vector-abi-2.c
new file mode 100644
index 000..fd4569535cc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/vector-abi-2.c
@@

[PATCH] libsanitizer: cherry-pick commit 05551c658269 from upstream

2023-04-26 Thread H.J. Lu via Gcc-patches

cherry-pick:

05551c658269 [sanitizer] Correct alignment of x32 __sanitizer_siginfo

* sanitizer_common/sanitizer_platform_limits_posix.h
(__sanitizer_siginfo_pad): Use u64 to align x32
__sanitizer_siginfo to 8 bytes.
---
 .../sanitizer_common/sanitizer_platform_limits_posix.h   | 5 +
 1 file changed, 5 insertions(+)

diff --git a/libsanitizer/sanitizer_common/sanitizer_platform_limits_posix.h 
b/libsanitizer/sanitizer_common/sanitizer_platform_limits_posix.h
index cfca7bdedbe..e6f298c26e1 100644
--- a/libsanitizer/sanitizer_common/sanitizer_platform_limits_posix.h
+++ b/libsanitizer/sanitizer_common/sanitizer_platform_limits_posix.h
@@ -578,8 +578,13 @@ struct __sanitizer_sigset_t {
 #endif
 
 struct __sanitizer_siginfo_pad {
+#if SANITIZER_X32
+  // x32 siginfo_t is aligned to 8 bytes.
+  u64 pad[128 / sizeof(u64)];
+#else
   // Require uptr, because siginfo_t is always pointer-size aligned on Linux.
   uptr pad[128 / sizeof(uptr)];
+#endif
 };
 
 #if SANITIZER_LINUX
-- 
2.40.0

[pushed] c++: restore instantiate_decl assert

2023-04-26 Thread Jason Merrill via Gcc-patches

Tested x86_64-pc-linux-gnu, applying to trunk.

-- 8< --

For PR61445 I removed this assert, but PR108242 demonstrated why it's still
useful; to avoid regressing the former testcase I check pattern_defined
in the assert.

This reverts r212524.

PR c++/61445

gcc/cp/ChangeLog:

* pt.cc (instantiate_decl): Assert !defer_ok for local
class members.
---
 gcc/cp/pt.cc | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/gcc/cp/pt.cc b/gcc/cp/pt.cc
index 93a055c66a1..6df16fef0dd 100644
--- a/gcc/cp/pt.cc
+++ b/gcc/cp/pt.cc
@@ -27090,6 +27090,12 @@ instantiate_decl (tree d, bool defer_ok, bool 
expl_inst_class_mem_p)
pattern_defined = ! DECL_EXTERNAL (code_pattern);
 }
 
+  /* Any local class members should be instantiated from the TAG_DEFN
+ with defer_ok == 0.  */
+  gcc_checking_assert (!defer_ok || !pattern_defined
+  || !decl_function_context (d)
+  || LAMBDA_TYPE_P (DECL_CONTEXT (d)));
+
   /* We may be in the middle of deferred access check.  Disable it now.  */
   push_deferring_access_checks (dk_no_deferred);
 

base-commit: 9b40ca2569d71e54d7dbbdbfd00d733770576f6f
-- 
2.31.1

Re: libsanitizer: sync from master

2023-04-26 Thread H.J. Lu via Gcc-patches

On Wed, Apr 26, 2023 at 4:37 PM H.J. Lu  wrote:
>
> On Wed, Apr 26, 2023 at 1:24 PM Martin Liška  wrote:
> >
> > On 4/26/23 21:23, H.J. Lu wrote:
> > > On Wed, Apr 26, 2023 at 6:52 AM Martin Liška  wrote:
> > >>
> > >> On 11/15/22 16:47, Martin Liška wrote:
> > >>> Hi.
> > >>>
> > >>> I've just pushed libsanitizer update that was tested on x86_64-linux 
> > >>> and ppc64le-linux systems.
> > >>> Moreover, I run bootstrap on x86_64-linux and checked ABI difference 
> > >>> with abidiff.
> > >>
> > >> Hello.
> > >>
> > >> And I've done the same now and merged upstream version 
> > >> 3185e47b5a8444e9fd.
> > >
> > > It caused the bootstrap failure:
> > >
> > > https://gcc.gnu.org/pipermail/gcc-regression/2023-April/077674.html
> >
> > Can you see what's the build error in the build log? I can't see it from the
> > sent link?
>
> I opened:
>
> https://github.com/llvm/llvm-project/issues/62394
>
> and will submit a patch upstream.
>

Fixed in upstream by

https://reviews.llvm.org/D142995

-- 
H.J.

Re: [committed] libgcc CRIS: Define TARGET_HAS_NO_HW_DIVIDE

2023-04-26 Thread Hans-Peter Nilsson via Gcc-patches

> From: Paul Koning 
> Date: Wed, 26 Apr 2023 21:02:31 -0400

> > On Apr 26, 2023, at 8:05 PM, Hans-Peter Nilsson  wrote:
> > 
> > Not many targets define this besides msp430, pdp1, xtensa,
> > and arm compared to those that appear to unconditionally
> > have a hardware division instruction (also, pdp11 and
> > msp430 seem confused and should be empty instead of "1"  ...
> 
> How so, "confused"?  The documentation says it should be
> defined, it doesn't say that it should be defined as
> empty.  What goes wrong if it's defined as 1 rather than
> empty?

Only future edits, expecting action to follow as if it was a
non-zero expression like many of the target macros.

> The documentation is also somewhat misleading, because it
> says to define it if the hardware has no divide
> instruction.  The more accurate statement is that it
> should be defined if the hardware has no 64 / 32 bit
> divide hardware support.  pdp11.h points this out in a
> comment, because most pdp11s do have divide instructions
> but those are for 32 / 16 bits.

That might be true, and I've heard patches are welcome.

brgds, H-P

Re: [committed] libgcc CRIS: Define TARGET_HAS_NO_HW_DIVIDE

2023-04-26 Thread Paul Koning via Gcc-patches

> On Apr 26, 2023, at 8:05 PM, Hans-Peter Nilsson  wrote:
> 
> Not many targets define this besides msp430, pdp1, xtensa,
> and arm compared to those that appear to unconditionally
> have a hardware division instruction (also, pdp11 and
> msp430 seem confused and should be empty instead of "1"  ...

How so, "confused"?  The documentation says it should be defined, it doesn't 
say that it should be defined as empty.  What goes wrong if it's defined as 1 
rather than empty?

The documentation is also somewhat misleading, because it says to define it if 
the hardware has no divide instruction.  The more accurate statement is that it 
should be defined if the hardware has no 64 / 32 bit divide hardware support.  
pdp11.h points this out in a comment, because most pdp11s do have divide 
instructions but those are for 32 / 16 bits.

paul

[committed] libgcc CRIS: Define TARGET_HAS_NO_HW_DIVIDE

2023-04-26 Thread Hans-Peter Nilsson via Gcc-patches

Not many targets define this besides msp430, pdp1, xtensa,
and arm compared to those that appear to unconditionally
have a hardware division instruction (also, pdp11 and
msp430 seem confused and should be empty instead of "1" and
"(!  TARGET_HWMULT)" - and having hardware multiplication
doesn't have a bearing anyway even if that worked; see
numbers below for an example).

Heads-up maintainers of ports without hardware division
(including conditionally, for multilibbed configurations)!

-- >8 --
With this, execution time for e.g. __moddi3 go from 59 to 40 cycles in
the "fast" case or from 290 to 200 cycles in the "slow" case (when the
!TARGET_HAS_NO_HW_DIVIDE variant calls division and modulus functions
for 32-bit SImode), as exposed by gcc.c-torture/execute/arith-rand-ll.c
compiled for -march=v10.

Unfortunately, it just puts a performance improvement "dent" of 0.07%
in a arith-rand-ll.c-based performance test - where all loops are also
reduced to 1/10.

The size of every affected libgcc function is reduced to less than
half and they are all now leaf functions.

* config/cris/t-cris (HOST_LIBGCC2_CFLAGS): Add
-DTARGET_HAS_NO_HW_DIVIDE.
---
 libgcc/config/cris/t-cris | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/libgcc/config/cris/t-cris b/libgcc/config/cris/t-cris
index b582974a42ee..e0020294be96 100644
--- a/libgcc/config/cris/t-cris
+++ b/libgcc/config/cris/t-cris
@@ -8,3 +8,6 @@ $(LIB2ADD): $(srcdir)/config/cris/arit.c
echo "#define L$$name" > tmp-$@ \
&& echo '#include "$<"' >> tmp-$@ \
&& mv -f tmp-$@ $@
+
+# Use an appropriate implementation when implementing DImode division.
+HOST_LIBGCC2_CFLAGS += -DTARGET_HAS_NO_HW_DIVIDE
-- 
2.30.2

Re: [PATCH v4 05/10] RISC-V: autovec: Add autovectorization patterns for binary integer operations

2023-04-26 Thread Michael Collison


Hi Robin and Juzhe,

Just took a look and I like the approach.

On 4/26/23 19:43, juzhe.zhong wrote:

Yeah，Robin stuff is what I want and is making perfect sense for me.
 Replied Message 
FromRobin Dapp 
Date04/27/2023 02:15
To 	juzhe.zh...@rivai.ai 
,

collison ,
gcc-patches 
Cc  jeffreyalaw ,
Kito.cheng ,
kito.cheng ,
palmer ,
palmer 
Subject 	Re: [PATCH v4 05/10] RISC-V:autovec: Add autovectorization 
patterns for binary integer operations


Hi Michael,

I have the diff below for the binops in my tree locally.
Maybe something like this works for you? Untested but compiles and
the expander helpers would need to be fortified obviously.

Regards
Robin

--

gcc/ChangeLog:

   * config/riscv/autovec.md (3): New binops expander.
   * config/riscv/riscv-protos.h (emit_nonvlmax_binop): Define.
   * config/riscv/riscv-v.cc (emit_pred_binop): New function.
   (emit_nonvlmax_binop): New function.
   * config/riscv/vector-iterators.md: New iterator.
---
gcc/config/riscv/autovec.md  | 12 
gcc/config/riscv/riscv-protos.h  |  1 +
gcc/config/riscv/riscv-v.cc  | 89 
gcc/config/riscv/vector-iterators.md | 20 +++
4 files changed, 97 insertions(+), 25 deletions(-)

diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
index b5d46ff57ab..c21d241f426 100644
--- a/gcc/config/riscv/autovec.md
+++ b/gcc/config/riscv/autovec.md
@@ -47,3 +47,15 @@ (define_expand "len_store_"
                 operands[1], operands[2], mode);
  DONE;
})
+
+(define_expand "3"
+  [(set (match_operand:VI 0 "register_operand")
+    (any_int_binop:VI (match_operand:VI 1 "register_operand")
+              (match_operand:VI 2 "register_operand")))]
+  "TARGET_VECTOR"
+{
+  riscv_vector::emit_nonvlmax_binop (code_for_pred (, 
mode),

+                 operands[0], operands[1], operands[2],
+                 gen_reg_rtx (Pmode), mode);
+  DONE;
+})
diff --git a/gcc/config/riscv/riscv-protos.h 
b/gcc/config/riscv/riscv-protos.h

index f6ea6846736..5cca543c773 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -163,6 +163,7 @@ void emit_hard_vlmax_vsetvl (machine_mode, rtx);
void emit_vlmax_op (unsigned, rtx, rtx, machine_mode);
void emit_vlmax_op (unsigned, rtx, rtx, rtx, machine_mode);
void emit_nonvlmax_op (unsigned, rtx, rtx, rtx, machine_mode);
+void emit_nonvlmax_binop (unsigned, rtx, rtx, rtx, rtx, machine_mode);
enum vlmul_type get_vlmul (machine_mode);
unsigned int get_ratio (machine_mode);
int get_ta (rtx);
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 5e69427ac54..98ebc052340 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -52,7 +52,7 @@ namespace riscv_vector {
template  class insn_expander
{
public:
-  insn_expander () : m_opno (0) {}
+  insn_expander () : m_opno (0), has_dest(false) {}
  void add_output_operand (rtx x, machine_mode mode)
  {
create_output_operand (_ops[m_opno++], x, mode);
@@ -83,6 +83,44 @@ public:
add_input_operand (gen_int_mode (type, Pmode), Pmode);
  }

+  void set_dest_and_mask (rtx mask, rtx dest, machine_mode mask_mode)
+  {
+    dest_mode = GET_MODE (dest);
+    has_dest = true;
+
+    add_output_operand (dest, dest_mode);
+
+    if (mask)
+  add_input_operand (mask, GET_MODE (mask));
+    else
+  add_all_one_mask_operand (mask_mode);
+
+    add_vundef_operand (dest_mode);
+  }
+
+  void set_len_and_policy (rtx len, bool vlmax_p)
+    {
+  gcc_assert (has_dest);
+  gcc_assert (len || vlmax_p);
+
+  if (len)
+    add_input_operand (len, Pmode);
+  else
+    {
+      rtx vlmax = gen_reg_rtx (Pmode);
+      emit_vlmax_vsetvl (dest_mode, vlmax);
+      add_input_operand (vlmax, Pmode);
+    }
+
+  if (GET_MODE_CLASS (dest_mode) != MODE_VECTOR_BOOL)
+    add_policy_operand (get_prefer_tail_policy (), 
get_prefer_mask_policy ());

+
+  if (vlmax_p)
+    add_avl_type_operand (avl_type::VLMAX);
+  else
+    add_avl_type_operand (avl_type::NONVLMAX);
+    }
+
  void expand (enum insn_code icode, bool temporary_volatile_p = false)
  {
if (temporary_volatile_p)
@@ -96,6 +134,8 @@ public:

private:
  int m_opno;
+  bool has_dest;
+  machine_mode dest_mode;
  expand_operand m_ops[MAX_OPERANDS];
};

@@ -183,37 +223,29 @@ emit_pred_op (unsigned icode, rtx mask, rtx 
dest, rtx src, rtx len,

     machine_mode mask_mode, bool vlmax_p)
{
  insn_expander<8> e;
-  machine_mode mode = GET_MODE (dest);
+  e.set_dest_and_mask (mask, dest, mask_mode);

-  e.add_output_operand (dest, mode);
-
-  if (mask)
-    e.add_input_operand (mask, GET_MODE (mask));
-  else
-    e.add_all_one_mask_operand (mask_mode);
+

Ping: Re: [PATCH v3] doc: Document order of define_peephole2 scanning

2023-04-26 Thread Hans-Peter Nilsson via Gcc-patches

> From: Hans-Peter Nilsson 
> Date: Wed, 19 Apr 2023 18:59:14 +0200
[...]

> So again: Approvers: pdf output reviewed.  Ok to commit?
> -- >8 --
> I was a bit surprised when my newly-added define_peephole2 didn't
> match, but it was because it was expected to partially match the
> generated output of a previous define_peephole2, which matched and
> modified the last insn of a sequence to be matched.  I had assumed
> that the algorithm backed-up the size of the match-buffer, thereby
> exposing newly created opportunities *with sufficient context* to all
> define_peephole2's.  While things can change in that direction, let's
> start with documenting the current state.
> 
>   * doc/md.texi (define_peephole2): Document order of scanning.
> ---
>  gcc/doc/md.texi | 9 +
>  1 file changed, 9 insertions(+)
> 
> diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
> index 07bf8bdebffb..300d104d58ab 100644
> --- a/gcc/doc/md.texi
> +++ b/gcc/doc/md.texi
> @@ -9362,6 +9362,15 @@ If the preparation falls through (invokes neither 
> @code{DONE} nor
>  @code{FAIL}), then the @code{define_peephole2} uses the replacement
>  template.
>  
> +Insns are scanned in forward order from beginning to end for each basic
> +block.  Matches are attempted in order of @code{define_peephole2}
> +appearance in the @file{md} file.  After a successful replacement,
> +scanning for further opportunities for @code{define_peephole2}, resumes
> +with the first generated replacement insn as the first insn to be
> +matched against all @code{define_peephole2}.  For the example above,
> +after its successful replacement, the first insn that can be matched by
> +a @code{define_peephole2} is @code{(set (match_dup 4) (match_dup 1))}.
> +
>  @end ifset
>  @ifset INTERNALS
>  @node Insn Attributes
> -- 
> 2.30.2
>

Re: libsanitizer: sync from master

2023-04-26 Thread H.J. Lu via Gcc-patches

On Wed, Apr 26, 2023 at 1:24 PM Martin Liška  wrote:
>
> On 4/26/23 21:23, H.J. Lu wrote:
> > On Wed, Apr 26, 2023 at 6:52 AM Martin Liška  wrote:
> >>
> >> On 11/15/22 16:47, Martin Liška wrote:
> >>> Hi.
> >>>
> >>> I've just pushed libsanitizer update that was tested on x86_64-linux and 
> >>> ppc64le-linux systems.
> >>> Moreover, I run bootstrap on x86_64-linux and checked ABI difference with 
> >>> abidiff.
> >>
> >> Hello.
> >>
> >> And I've done the same now and merged upstream version 3185e47b5a8444e9fd.
> >
> > It caused the bootstrap failure:
> >
> > https://gcc.gnu.org/pipermail/gcc-regression/2023-April/077674.html
>
> Can you see what's the build error in the build log? I can't see it from the
> sent link?

I opened:

https://github.com/llvm/llvm-project/issues/62394

and will submit a patch upstream.

> Martin
>
> >
> >> Martin
> >>
> >>>
> >>> Pushed as r13-4068-g3037f11fb86eda.
> >>>
> >>> Cheers,
> >>> Martin
> >>
> >
> >
>


-- 
H.J.

[committed] RISC-V: Fix sync.md and riscv.cc whitespace errors

2023-04-26 Thread Patrick O'Neill


On 4/26/23 14:21, Patrick O'Neill wrote:


This patch fixes whitespace errors introduced with
https://gcc.gnu.org/pipermail/gcc-patches/2023-April/616807.html

2023-04-26 Patrick O'Neill

gcc/ChangeLog:

* config/riscv/riscv.cc: Fix whitespace.
* config/riscv/sync.md: Fix whitespace.

Signed-off-by: Patrick O'Neill


Committed (obvious whitespace changes, pre-approved by Jeff Law)

Patrick

Re: [PATCH v2] RISC-V: Fix sync.md and riscv.cc whitespace errors

2023-04-26 Thread Patrick O'Neill




On 4/26/23 14:45, Bernhard Reutner-Fischer wrote:

On 26 April 2023 23:21:06 CEST, Patrick O'Neill  wrote:

This patch fixes whitespace errors introduced with
https://gcc.gnu.org/pipermail/gcc-patches/2023-April/616807.html

2023-04-26 Patrick O'Neill

gcc/ChangeLog:

* config/riscv/riscv.cc: Fix whitespace.
* config/riscv/sync.md: Fix whitespace.

The .md change above is gone by now.


There are still some sync.md changes (comment whitespace/function whitespace 
changes).


No reason to resend the patch, just fixing it before you push it is fine, once 
ACKed (although such patches usually counts as obvious).


Thanks for the help with this - still getting the hang of pushing my own 
changes!

Patrick


Many thanks for the quick tweak!
cheers,


Signed-off-by: Patrick O'Neill
---
Patch was checked with contrib/check_GNU_style.py

Whitespace changes in this patch are 2 flavors:
* Add space between function name and ()
* 2 spaces between end of comment and  */
---
v2 Changelog:
* Ignored checker warning for space before [] in rtl
---
gcc/config/riscv/riscv.cc |  6 +++---
gcc/config/riscv/sync.md  | 16 
2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 0f890469d7a..1529855a2b4 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -7193,7 +7193,7 @@ riscv_subword_address (rtx mem, rtx *aligned_mem, rtx 
*shift, rtx *mask,
   emit_move_insn (*mask, gen_rtx_ASHIFT (SImode, *mask,
 gen_lowpart (QImode, *shift)));

-  emit_move_insn (*not_mask, gen_rtx_NOT(SImode, *mask));
+  emit_move_insn (*not_mask, gen_rtx_NOT (SImode, *mask));
}

/* Leftshift a subword within an SImode register.  */
@@ -7206,8 +7206,8 @@ riscv_lshift_subword (machine_mode mode, rtx value, rtx 
shift,
   emit_move_insn (value_reg, simplify_gen_subreg (SImode, value,
  mode, 0));

-  emit_move_insn(*shifted_value, gen_rtx_ASHIFT (SImode, value_reg,
-gen_lowpart (QImode, shift)));
+  emit_move_insn (*shifted_value, gen_rtx_ASHIFT (SImode, value_reg,
+ gen_lowpart (QImode, shift)));
}

/* Initialize the GCC target structure.  */
diff --git a/gcc/config/riscv/sync.md b/gcc/config/riscv/sync.md
index 83be6431cb6..19274528262 100644
--- a/gcc/config/riscv/sync.md
+++ b/gcc/config/riscv/sync.md
@@ -128,10 +128,10 @@
{
   /* We have no QImode/HImode atomics, so form a mask, then use
  subword_atomic_fetch_strong_nand to implement a LR/SC version of the
- operation. */
+ operation.  */

   /* Logic duplicated in gcc/libgcc/config/riscv/atomic.c for use when inlining
- is disabled */
+ is disabled.  */

   rtx old = gen_reg_rtx (SImode);
   rtx mem = operands[1];
@@ -193,10 +193,10 @@
{
   /* We have no QImode/HImode atomics, so form a mask, then use
  subword_atomic_fetch_strong_ to implement a LR/SC version of the
- operation. */
+ operation.  */

   /* Logic duplicated in gcc/libgcc/config/riscv/atomic.c for use when inlining
- is disabled */
+ is disabled.  */

   rtx old = gen_reg_rtx (SImode);
   rtx mem = operands[1];
@@ -367,7 +367,7 @@
 {
   rtx difference = gen_rtx_MINUS (SImode, val, exp);
   compare = gen_reg_rtx (SImode);
-  emit_move_insn  (compare, difference);
+  emit_move_insn (compare, difference);
 }

   if (word_mode != SImode)
@@ -393,10 +393,10 @@
{
   /* We have no QImode/HImode atomics, so form a mask, then use
  subword_atomic_cas_strong to implement a LR/SC version of the
- operation. */
+ operation.  */

   /* Logic duplicated in gcc/libgcc/config/riscv/atomic.c for use when inlining
- is disabled */
+ is disabled.  */

   rtx old = gen_reg_rtx (SImode);
   rtx mem = operands[1];
@@ -461,7 +461,7 @@
   "TARGET_ATOMIC"
{
   /* We have no QImode atomics, so use the address LSBs to form a mask,
- then use an aligned SImode atomic. */
+ then use an aligned SImode atomic.  */
   rtx result = operands[0];
   rtx mem = operands[1];
   rtx model = operands[2];
--
2.34.1

[PATCH v5 04/10] RISC-V:autovec: Add target vectorization hooks

2023-04-26 Thread Michael Collison

2023-04-24  Michael Collison  
Juzhe Zhong  

* config/riscv/riscv.cc
(riscv_estimated_poly_value): Implement
TARGET_ESTIMATED_POLY_VALUE.
(riscv_preferred_simd_mode): Implement
TARGET_VECTORIZE_PREFERRED_SIMD_MODE.
(riscv_autovectorize_vector_modes): Implement
TARGET_AUTOVECTORIZE_VECTOR_MODES.
(riscv_get_mask_mode): Implement TARGET_VECTORIZE_GET_MASK_MODE.
(riscv_empty_mask_is_expensive): Implement
TARGET_VECTORIZE_EMPTY_MASK_IS_EXPENSIVE.
(riscv_vectorize_create_costs): Implement
TARGET_VECTORIZE_CREATE_COSTS.
(TARGET_ESTIMATED_POLY_VALUE): Register target macro.
(TARGET_VECTORIZE_GET_MASK_MODE): Ditto.
(TARGET_VECTORIZE_EMPTY_MASK_IS_EXPENSIVE): Ditto.
---
 gcc/config/riscv/riscv.cc | 129 ++
 1 file changed, 129 insertions(+)

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index dc47434fac4..77209b161f6 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -60,6 +60,15 @@ along with GCC; see the file COPYING3.  If not see
 #include "opts.h"
 #include "tm-constrs.h"
 #include "rtl-iter.h"
+#include "gimple.h"
+#include "cfghooks.h"
+#include "cfgloop.h"
+#include "cfgrtl.h"
+#include "sel-sched.h"
+#include "fold-const.h"
+#include "gimple-iterator.h"
+#include "gimple-expr.h"
+#include "tree-vectorizer.h"
 
 /* This file should be included last.  */
 #include "target-def.h"
@@ -275,6 +284,9 @@ poly_uint16 riscv_vector_chunks;
 /* The number of bytes in a vector chunk.  */
 unsigned riscv_bytes_per_vector_chunk;
 
+/* Prefer vf for auto-vectorizer.  */
+unsigned riscv_vectorization_factor;
+
 /* Index R is the smallest register class that contains register R.  */
 const enum reg_class riscv_regno_to_class[FIRST_PSEUDO_REGISTER] = {
   GR_REGS, GR_REGS,GR_REGS,GR_REGS,
@@ -6363,6 +6375,9 @@ riscv_option_override (void)
 
   /* Convert -march to a chunks count.  */
   riscv_vector_chunks = riscv_convert_vector_bits ();
+
+  if (TARGET_VECTOR)
+riscv_vectorization_factor = RVV_LMUL1;
 }
 
 /* Implement TARGET_CONDITIONAL_REGISTER_USAGE.  */
@@ -7057,6 +7072,105 @@ riscv_dwarf_poly_indeterminate_value (unsigned int i, 
unsigned int *factor,
   return RISCV_DWARF_VLENB;
 }
 
+/* Implement TARGET_ESTIMATED_POLY_VALUE.
+   Look into the tuning structure for an estimate.
+   KIND specifies the type of requested estimate: min, max or likely.
+   For cores with a known RVV width all three estimates are the same.
+   For generic RVV tuning we want to distinguish the maximum estimate from
+   the minimum and likely ones.
+   The likely estimate is the same as the minimum in that case to give a
+   conservative behavior of auto-vectorizing with RVV when it is a win
+   even for 128-bit RVV.
+   When RVV width information is available VAL.coeffs[1] is multiplied by
+   the number of VQ chunks over the initial Advanced SIMD 128 bits.  */
+
+static HOST_WIDE_INT
+riscv_estimated_poly_value (poly_int64 val,
+   poly_value_estimate_kind kind = POLY_VALUE_LIKELY)
+{
+  unsigned int width_source = BITS_PER_RISCV_VECTOR.is_constant ()
+? (unsigned int) BITS_PER_RISCV_VECTOR.to_constant ()
+: (unsigned int) RVV_SCALABLE;
+
+  /* If there is no core-specific information then the minimum and likely
+ values are based on 128-bit vectors and the maximum is based on
+ the architectural maximum of 65536 bits.  */
+  if (width_source == RVV_SCALABLE)
+switch (kind)
+  {
+  case POLY_VALUE_MIN:
+  case POLY_VALUE_LIKELY:
+   return val.coeffs[0];
+
+  case POLY_VALUE_MAX:
+   return val.coeffs[0] + val.coeffs[1] * 15;
+  }
+
+  /* Allow BITS_PER_RISCV_VECTOR to be a bitmask of different VL, treating the
+ lowest as likely.  This could be made more general if future -mtune
+ options need it to be.  */
+  if (kind == POLY_VALUE_MAX)
+width_source = 1 << floor_log2 (width_source);
+  else
+width_source = least_bit_hwi (width_source);
+
+  /* If the core provides width information, use that.  */
+  HOST_WIDE_INT over_128 = width_source - 128;
+  return val.coeffs[0] + val.coeffs[1] * over_128 / 128;
+}
+
+/* Implement TARGET_VECTORIZE_PREFERRED_SIMD_MODE.  */
+
+static machine_mode
+riscv_preferred_simd_mode (scalar_mode mode)
+{
+  if (TARGET_VECTOR)
+return riscv_vector::riscv_vector_preferred_simd_mode (mode);
+
+  return word_mode;
+}
+
+/* Implement TARGET_AUTOVECTORIZE_VECTOR_MODES for RVV.  */
+static unsigned int
+riscv_autovectorize_vector_modes (vector_modes *modes, bool)
+{
+  if (!TARGET_VECTOR)
+return 0;
+
+  if (riscv_vectorization_factor == RVV_LMUL1)
+{
+  modes->safe_push (VNx16QImode);
+  modes->safe_push (VNx8QImode);
+  modes->safe_push (VNx4QImode);
+  modes->safe_push (VNx2QImode);
+}
+
+  return 0;
+}
+
+/* Implement TARGET_VECTORIZE_GET_MASK_MODE.  */
+
+static

[PATCH v5 07/10] vect: Verify that GET_MODE_NUNITS is a multiple of 2.

2023-04-26 Thread Michael Collison

While working on autovectorizing for the RISCV port I encountered an issue
where can_duplicate_and_interleave_p assumes that GET_MODE_NUNITS is a
evenly divisible by two. The RISC-V target has vector modes (e.g. VNx1DImode),
where GET_MODE_NUNITS is equal to one.

Tested on RISCV and x86_64-linux-gnu. Okay?

2023-03-09  Michael Collison  

* tree-vect-slp.cc (can_duplicate_and_interleave_p):
Check that GET_MODE_NUNITS is a multiple of 2.
---
 gcc/tree-vect-slp.cc | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index d73deaecce0..a64fe454e19 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -423,10 +423,13 @@ can_duplicate_and_interleave_p (vec_info *vinfo, unsigned 
int count,
(GET_MODE_BITSIZE (int_mode), 1);
  tree vector_type
= get_vectype_for_scalar_type (vinfo, int_type, count);
+ poly_int64 half_nelts;
  if (vector_type
  && VECTOR_MODE_P (TYPE_MODE (vector_type))
  && known_eq (GET_MODE_SIZE (TYPE_MODE (vector_type)),
-  GET_MODE_SIZE (base_vector_mode)))
+  GET_MODE_SIZE (base_vector_mode))
+ && multiple_p (GET_MODE_NUNITS (TYPE_MODE (vector_type)),
+2, _nelts))
{
  /* Try fusing consecutive sequences of COUNT / NVECTORS elements
 together into elements of type INT_TYPE and using the result
@@ -434,7 +437,7 @@ can_duplicate_and_interleave_p (vec_info *vinfo, unsigned 
int count,
  poly_uint64 nelts = GET_MODE_NUNITS (TYPE_MODE (vector_type));
  vec_perm_builder sel1 (nelts, 2, 3);
  vec_perm_builder sel2 (nelts, 2, 3);
- poly_int64 half_nelts = exact_div (nelts, 2);
+
  for (unsigned int i = 0; i < 3; ++i)
{
  sel1.quick_push (i);
-- 
2.34.1

[PATCH v5 02/10] RISC-V: autovec: Export policy functions to global scope

2023-04-26 Thread Michael Collison

2023-03-02  Michael Collison  
Juzhe Zhong  

* config/riscv/riscv-vector-builtins.cc (get_tail_policy_for_pred):
Remove static declaration to to make externally visible.
(get_mask_policy_for_pred): Ditto.
* config/riscv/riscv-vector-builtins.h (get_tail_policy_for_pred):
New external declaration.
(get_mask_policy_for_pred): Ditto.
---
 gcc/config/riscv/riscv-vector-builtins.cc | 4 ++--
 gcc/config/riscv/riscv-vector-builtins.h  | 3 +++
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/gcc/config/riscv/riscv-vector-builtins.cc 
b/gcc/config/riscv/riscv-vector-builtins.cc
index 01cea23d3e6..1ed9e4acc40 100644
--- a/gcc/config/riscv/riscv-vector-builtins.cc
+++ b/gcc/config/riscv/riscv-vector-builtins.cc
@@ -2493,7 +2493,7 @@ use_real_merge_p (enum predication_type_index pred)
 
 /* Get TAIL policy for predication. If predication indicates TU, return the TU.
Otherwise, return the prefer default configuration.  */
-static rtx
+rtx
 get_tail_policy_for_pred (enum predication_type_index pred)
 {
   if (pred == PRED_TYPE_tu || pred == PRED_TYPE_tum || pred == PRED_TYPE_tumu)
@@ -2503,7 +2503,7 @@ get_tail_policy_for_pred (enum predication_type_index 
pred)
 
 /* Get MASK policy for predication. If predication indicates MU, return the MU.
Otherwise, return the prefer default configuration.  */
-static rtx
+rtx
 get_mask_policy_for_pred (enum predication_type_index pred)
 {
   if (pred == PRED_TYPE_tumu || pred == PRED_TYPE_mu)
diff --git a/gcc/config/riscv/riscv-vector-builtins.h 
b/gcc/config/riscv/riscv-vector-builtins.h
index 8ffb9d33e33..de3fd6ca290 100644
--- a/gcc/config/riscv/riscv-vector-builtins.h
+++ b/gcc/config/riscv/riscv-vector-builtins.h
@@ -483,6 +483,9 @@ extern rvv_builtin_types_t builtin_types[NUM_VECTOR_TYPES + 
1];
 extern function_instance get_read_vl_instance (void);
 extern tree get_read_vl_decl (void);
 
+extern rtx get_tail_policy_for_pred (enum predication_type_index pred);
+extern rtx get_mask_policy_for_pred (enum predication_type_index pred);
+
 inline tree
 rvv_arg_type_info::get_scalar_type (vector_type_index type_idx) const
 {
-- 
2.34.1

[PATCH v5 10/10] RISC-V: autovec: This patch supports 8 bit auto-vectorization in riscv.

2023-04-26 Thread Michael Collison

From: Kevin Lee 

2023-04-14 Kevin Lee 
gcc/testsuite/ChangeLog:

* config/riscv/riscv.cc (riscv_autovectorize_vector_modes): Add
new vector mode
* gcc.target/riscv/rvv/autovec/loop-add-rv32.c: Support 8bit
type
* gcc.target/riscv/rvv/autovec/loop-add.c: Ditto
* gcc.target/riscv/rvv/autovec/loop-and-rv32.c: Ditto
* gcc.target/riscv/rvv/autovec/loop-and.c: Ditto
* gcc.target/riscv/rvv/autovec/loop-div-rv32.c: Ditto
* gcc.target/riscv/rvv/autovec/loop-div.c: Ditto
* gcc.target/riscv/rvv/autovec/loop-max-rv32.c: Ditto
* gcc.target/riscv/rvv/autovec/loop-max.c: Ditto
* gcc.target/riscv/rvv/autovec/loop-min-rv32.c: Ditto
* gcc.target/riscv/rvv/autovec/loop-min.c: Ditto
* gcc.target/riscv/rvv/autovec/loop-mod-rv32.c: Ditto
* gcc.target/riscv/rvv/autovec/loop-mod.c: Ditto
* gcc.target/riscv/rvv/autovec/loop-mul-rv32.c: Ditto
* gcc.target/riscv/rvv/autovec/loop-mul.c: Ditto
* gcc.target/riscv/rvv/autovec/loop-or-rv32.c: Ditto
* gcc.target/riscv/rvv/autovec/loop-or.c: Ditto
* gcc.target/riscv/rvv/autovec/loop-sub-rv32.c: Ditto
* gcc.target/riscv/rvv/autovec/loop-sub.c: Ditto
* gcc.target/riscv/rvv/autovec/loop-xor-rv32.c: Ditto
* gcc.target/riscv/rvv/autovec/loop-xor.c: Ditto
---
 gcc/config/riscv/riscv.cc | 1 +
 .../gcc.target/riscv/rvv/autovec/loop-add-rv32.c  | 5 +++--
 gcc/testsuite/gcc.target/riscv/rvv/autovec/loop-add.c | 5 +++--
 .../gcc.target/riscv/rvv/autovec/loop-and-rv32.c  | 5 +++--
 gcc/testsuite/gcc.target/riscv/rvv/autovec/loop-and.c | 5 +++--
 .../gcc.target/riscv/rvv/autovec/loop-div-rv32.c  | 8 +---
 gcc/testsuite/gcc.target/riscv/rvv/autovec/loop-div.c | 8 +---
 .../gcc.target/riscv/rvv/autovec/loop-max-rv32.c  | 7 ---
 gcc/testsuite/gcc.target/riscv/rvv/autovec/loop-max.c | 7 ---
 .../gcc.target/riscv/rvv/autovec/loop-min-rv32.c  | 7 ---
 gcc/testsuite/gcc.target/riscv/rvv/autovec/loop-min.c | 7 ---
 .../gcc.target/riscv/rvv/autovec/loop-mod-rv32.c  | 8 +---
 gcc/testsuite/gcc.target/riscv/rvv/autovec/loop-mod.c | 8 +---
 .../gcc.target/riscv/rvv/autovec/loop-mul-rv32.c  | 5 +++--
 gcc/testsuite/gcc.target/riscv/rvv/autovec/loop-mul.c | 5 +++--
 gcc/testsuite/gcc.target/riscv/rvv/autovec/loop-or-rv32.c | 5 +++--
 gcc/testsuite/gcc.target/riscv/rvv/autovec/loop-or.c  | 5 +++--
 .../gcc.target/riscv/rvv/autovec/loop-sub-rv32.c  | 5 +++--
 gcc/testsuite/gcc.target/riscv/rvv/autovec/loop-sub.c | 5 +++--
 .../gcc.target/riscv/rvv/autovec/loop-xor-rv32.c  | 5 +++--
 gcc/testsuite/gcc.target/riscv/rvv/autovec/loop-xor.c | 5 +++--
 21 files changed, 73 insertions(+), 48 deletions(-)

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 77209b161f6..f293414acd1 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -7143,6 +7143,7 @@ riscv_autovectorize_vector_modes (vector_modes *modes, 
bool)
   modes->safe_push (VNx8QImode);
   modes->safe_push (VNx4QImode);
   modes->safe_push (VNx2QImode);
+  modes->safe_push (VNx1QImode);
 }
 
   return 0;
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/loop-add-rv32.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/loop-add-rv32.c
index bdc3b6892e9..76f5a3a3ff5 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/loop-add-rv32.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/loop-add-rv32.c
@@ -10,8 +10,9 @@
   dst[i] = a[i] + b[i];\
   }
 
-/* *int8_t not autovec currently. */
 #define TEST_ALL() \
+ TEST_TYPE(int8_t) \
+ TEST_TYPE(uint8_t)\
  TEST_TYPE(int16_t)\
  TEST_TYPE(uint16_t)   \
  TEST_TYPE(int32_t)\
@@ -21,4 +22,4 @@
 
 TEST_ALL()
 
-/* { dg-final { scan-assembler-times {\tvadd\.vv} 6 } } */
+/* { dg-final { scan-assembler-times {\tvadd\.vv} 8 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/loop-add.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/loop-add.c
index d7f992c7d27..3d1e10bf4e1 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/loop-add.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/loop-add.c
@@ -10,8 +10,9 @@
   dst[i] = a[i] + b[i];\
   }
 
-/* *int8_t not autovec currently. */
 #define TEST_ALL() \
+ TEST_TYPE(int8_t) \
+ TEST_TYPE(uint8_t)\
  TEST_TYPE(int16_t)\
  TEST_TYPE(uint16_t)   \
  TEST_TYPE(int32_t)\
@@ -21,4 +22,4 @@
 
 TEST_ALL()
 
-/* { dg-final { scan-assembler-times {\tvadd\.vv} 6 } } */
+/* { dg-final { scan-assembler-times {\tvadd\.vv} 8 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/loop-and-rv32.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/loop-and-rv32.c
index eb1ac5b44fd..a4c7abfb0ad 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/loop-and-rv32.c
+++

[PATCH v5 03/10] RISC-V:autovec: Add auto-vectorization support functions

2023-04-26 Thread Michael Collison

2023-04-24  Michael Collison  
Juzhe Zhong  

* config/riscv/riscv-v.cc
(riscv_vector_preferred_simd_mode): New function.
(get_mask_policy_no_pred): Ditto.
(get_tail_policy_no_pred): Ditto.
(riscv_vector_mask_mode_p): Ditto.
(riscv_vector_get_mask_mode): Ditto.
---
 gcc/config/riscv/riscv-v.cc | 79 +
 1 file changed, 79 insertions(+)

diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 392f5d02e17..ecd98680d64 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -39,9 +39,11 @@
 #include "emit-rtl.h"
 #include "tm_p.h"
 #include "target.h"
+#include "targhooks.h"
 #include "expr.h"
 #include "optabs.h"
 #include "tm-constrs.h"
+#include "riscv-vector-builtins.h"
 #include "rtx-vector-builder.h"
 
 using namespace riscv_vector;
@@ -176,6 +178,46 @@ calculate_ratio (unsigned int sew, enum vlmul_type vlmul)
   return ratio;
 }
 
+/* Implement TARGET_VECTORIZE_PREFERRED_SIMD_MODE for RVV.  */
+
+machine_mode
+riscv_vector_preferred_simd_mode (scalar_mode mode)
+{
+  if (!TARGET_VECTOR)
+return word_mode;
+
+  switch (mode)
+{
+case E_QImode:
+  return VNx8QImode;
+  break;
+case E_HImode:
+  return VNx4HImode;
+  break;
+case E_SImode:
+  return VNx2SImode;
+  break;
+case E_DImode:
+  if (riscv_vector_elen_flags != MASK_VECTOR_ELEN_32
+ && riscv_vector_elen_flags != MASK_VECTOR_ELEN_FP_32)
+   return VNx1DImode;
+  break;
+case E_SFmode:
+  if (TARGET_HARD_FLOAT && riscv_vector_elen_flags != MASK_VECTOR_ELEN_32
+ && riscv_vector_elen_flags != MASK_VECTOR_ELEN_64)
+   return VNx2SFmode;
+  break;
+case E_DFmode:
+  if (TARGET_DOUBLE_FLOAT && TARGET_VECTOR_ELEN_FP_64)
+   return VNx1DFmode;
+  break;
+default:
+  break;
+}
+
+  return word_mode;
+}
+
 /* Emit an RVV unmask && vl mov from SRC to DEST.  */
 static void
 emit_pred_op (unsigned icode, rtx mask, rtx dest, rtx src, rtx len,
@@ -421,6 +463,43 @@ get_avl_type_rtx (enum avl_type type)
   return gen_int_mode (type, Pmode);
 }
 
+rtx
+get_mask_policy_no_pred ()
+{
+  return get_mask_policy_for_pred (PRED_TYPE_none);
+}
+
+rtx
+get_tail_policy_no_pred ()
+{
+  return get_mask_policy_for_pred (PRED_TYPE_none);
+}
+
+/* Return true if it is a RVV mask mode.  */
+bool
+riscv_vector_mask_mode_p (machine_mode mode)
+{
+  return (mode == VNx1BImode || mode == VNx2BImode || mode == VNx4BImode
+ || mode == VNx8BImode || mode == VNx16BImode || mode == VNx32BImode
+ || mode == VNx64BImode);
+}
+
+/* Implement TARGET_VECTORIZE_GET_MASK_MODE for RVV.  */
+
+opt_machine_mode
+riscv_vector_get_mask_mode (machine_mode mode)
+{
+  machine_mode mask_mode;
+  int nf = 1;
+
+  FOR_EACH_MODE_IN_CLASS (mask_mode, MODE_VECTOR_BOOL)
+  if (GET_MODE_INNER (mask_mode) == BImode
+  && known_eq (GET_MODE_NUNITS (mask_mode) * nf, GET_MODE_NUNITS (mode))
+  && riscv_vector_mask_mode_p (mask_mode))
+return mask_mode;
+  return default_get_mask_mode (mode);
+}
+
 /* Return the RVV vector mode that has NUNITS elements of mode INNER_MODE.
This function is not only used by builtins, but also will be used by
auto-vectorization in the future.  */
-- 
2.34.1

[PATCH v5 09/10] RISC-V: autovec: This patch adds a guard for VNx1 vectors that are present in ports like riscv.

2023-04-26 Thread Michael Collison

From: Kevin Lee 

Kevin Lee 
gcc/ChangeLog:

* tree-vect-data-refs.cc (vect_grouped_store_supported): Add new
condition
---
 gcc/tree-vect-data-refs.cc | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc
index 8daf7bd7dd3..df393ba723d 100644
--- a/gcc/tree-vect-data-refs.cc
+++ b/gcc/tree-vect-data-refs.cc
@@ -5399,6 +5399,8 @@ vect_grouped_store_supported (tree vectype, unsigned 
HOST_WIDE_INT count)
  poly_uint64 nelt = GET_MODE_NUNITS (mode);
 
  /* The encoding has 2 interleaved stepped patterns.  */
+if(!multiple_p (nelt, 2))
+  return false;
  vec_perm_builder sel (nelt, 2, 3);
  sel.quick_grow (6);
  for (i = 0; i < 3; i++)
-- 
2.34.1

[PATCH v5 08/10] RISC-V:autovec: Add autovectorization tests for binary integer

2023-04-26 Thread Michael Collison

2023-04-05  Michael Collison  

* gcc.target/riscv/rvv/autovec/loop-and-rv32.c: New
test to verify code generation of vector "and" on rv32.
* gcc.target/riscv/rvv/autovec/loop-and.c: New
test to verify code generation of vector "and" on rv64.
* gcc.target/riscv/rvv/autovec/loop-div-rv32.c: New
test to verify code generation of vector divide on rv32.
* gcc.target/riscv/rvv/autovec/loop-div.c: New
test to verify code generation of vector divide on rv64.
* gcc.target/riscv/rvv/autovec/loop-max-rv32.c: New
test to verify code generation of vector maximum on rv32.
* gcc.target/riscv/rvv/autovec/loop-max.c: New
test to verify code generation of vector maximum on rv64.
* gcc.target/riscv/rvv/autovec/loop-min-rv32.c: New
test to verify code generation of vector minimum on rv32.
* gcc.target/riscv/rvv/autovec/loop-min.c: New
test to verify code generation of vector minimum on rv64.
* gcc.target/riscv/rvv/autovec/loop-mod-rv32.c: New
test to verify code generation of vector modulus on rv32.
* gcc.target/riscv/rvv/autovec/loop-mod.c: New
test to verify code generation of vector modulus on rv64.
* gcc.target/riscv/rvv/autovec/loop-mul-rv32.c: New
test to verify code generation of vector multiply on rv32.
* gcc.target/riscv/rvv/autovec/loop-mul.c: New
test to verify code generation of vector multiply on rv64.
* gcc.target/riscv/rvv/autovec/loop-or-rv32.c: New
test to verify code generation of vector "or" on rv32.
* gcc.target/riscv/rvv/autovec/loop-or.c: New
test to verify code generation of vector "or" on rv64.
* gcc.target/riscv/rvv/autovec/loop-xor-rv32.c: New
test to verify code generation of vector xor on rv32.
* gcc.target/riscv/rvv/autovec/loop-xor.c: New
test to verify code generation of vector xor on rv64.
---
 .../riscv/rvv/autovec/loop-and-rv32.c | 24 ++
 .../gcc.target/riscv/rvv/autovec/loop-and.c   | 24 ++
 .../riscv/rvv/autovec/loop-div-rv32.c | 25 +++
 .../gcc.target/riscv/rvv/autovec/loop-div.c   | 25 +++
 .../riscv/rvv/autovec/loop-max-rv32.c | 25 +++
 .../gcc.target/riscv/rvv/autovec/loop-max.c   | 25 +++
 .../riscv/rvv/autovec/loop-min-rv32.c | 25 +++
 .../gcc.target/riscv/rvv/autovec/loop-min.c   | 25 +++
 .../riscv/rvv/autovec/loop-mod-rv32.c | 25 +++
 .../gcc.target/riscv/rvv/autovec/loop-mod.c   | 25 +++
 .../riscv/rvv/autovec/loop-mul-rv32.c | 24 ++
 .../gcc.target/riscv/rvv/autovec/loop-mul.c   | 24 ++
 .../riscv/rvv/autovec/loop-or-rv32.c  | 24 ++
 .../gcc.target/riscv/rvv/autovec/loop-or.c| 24 ++
 .../riscv/rvv/autovec/loop-xor-rv32.c | 24 ++
 .../gcc.target/riscv/rvv/autovec/loop-xor.c   | 24 ++
 gcc/testsuite/gcc.target/riscv/rvv/rvv.exp|  3 +++
 17 files changed, 395 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/loop-and-rv32.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/loop-and.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/loop-div-rv32.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/loop-div.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/loop-max-rv32.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/loop-max.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/loop-min-rv32.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/loop-min.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/loop-mod-rv32.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/loop-mod.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/loop-mul-rv32.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/loop-mul.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/loop-or-rv32.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/loop-or.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/loop-xor-rv32.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/loop-xor.c

diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/loop-and-rv32.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/loop-and-rv32.c
new file mode 100644
index 000..eb1ac5b44fd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/loop-and-rv32.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O2 -ftree-vectorize -march=rv32gcv -mabi=ilp32d" 
} */
+
+#include 
+
+#define TEST_TYPE(TYPE)\
+  void vand_##TYPE (TYPE *dst, TYPE *a, TYPE *b, int n)\
+  {

Re: [PATCH v2] RISC-V: Fix sync.md and riscv.cc whitespace errors

2023-04-26 Thread Bernhard Reutner-Fischer via Gcc-patches

On 26 April 2023 23:21:06 CEST, Patrick O'Neill  wrote:
>This patch fixes whitespace errors introduced with
>https://gcc.gnu.org/pipermail/gcc-patches/2023-April/616807.html
>
>2023-04-26 Patrick O'Neill 
>
>gcc/ChangeLog:
>
>   * config/riscv/riscv.cc: Fix whitespace.
>   * config/riscv/sync.md: Fix whitespace.

The .md change above is gone by now.
No reason to resend the patch, just fixing it before you push it is fine, once 
ACKed (although such patches usually counts as obvious).

Many thanks for the quick tweak!
cheers,

>
>Signed-off-by: Patrick O'Neill 
>---
>Patch was checked with contrib/check_GNU_style.py
>
>Whitespace changes in this patch are 2 flavors:
> * Add space between function name and ()
> * 2 spaces between end of comment and  */
>---
>v2 Changelog:
> * Ignored checker warning for space before [] in rtl
>---
> gcc/config/riscv/riscv.cc |  6 +++---
> gcc/config/riscv/sync.md  | 16 
> 2 files changed, 11 insertions(+), 11 deletions(-)
>
>diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
>index 0f890469d7a..1529855a2b4 100644
>--- a/gcc/config/riscv/riscv.cc
>+++ b/gcc/config/riscv/riscv.cc
>@@ -7193,7 +7193,7 @@ riscv_subword_address (rtx mem, rtx *aligned_mem, rtx 
>*shift, rtx *mask,
>   emit_move_insn (*mask, gen_rtx_ASHIFT (SImode, *mask,
>gen_lowpart (QImode, *shift)));
>
>-  emit_move_insn (*not_mask, gen_rtx_NOT(SImode, *mask));
>+  emit_move_insn (*not_mask, gen_rtx_NOT (SImode, *mask));
> }
>
> /* Leftshift a subword within an SImode register.  */
>@@ -7206,8 +7206,8 @@ riscv_lshift_subword (machine_mode mode, rtx value, rtx 
>shift,
>   emit_move_insn (value_reg, simplify_gen_subreg (SImode, value,
> mode, 0));
>
>-  emit_move_insn(*shifted_value, gen_rtx_ASHIFT (SImode, value_reg,
>-   gen_lowpart (QImode, shift)));
>+  emit_move_insn (*shifted_value, gen_rtx_ASHIFT (SImode, value_reg,
>+gen_lowpart (QImode, shift)));
> }
>
> /* Initialize the GCC target structure.  */
>diff --git a/gcc/config/riscv/sync.md b/gcc/config/riscv/sync.md
>index 83be6431cb6..19274528262 100644
>--- a/gcc/config/riscv/sync.md
>+++ b/gcc/config/riscv/sync.md
>@@ -128,10 +128,10 @@
> {
>   /* We have no QImode/HImode atomics, so form a mask, then use
>  subword_atomic_fetch_strong_nand to implement a LR/SC version of the
>- operation. */
>+ operation.  */
>
>   /* Logic duplicated in gcc/libgcc/config/riscv/atomic.c for use when 
> inlining
>- is disabled */
>+ is disabled.  */
>
>   rtx old = gen_reg_rtx (SImode);
>   rtx mem = operands[1];
>@@ -193,10 +193,10 @@
> {
>   /* We have no QImode/HImode atomics, so form a mask, then use
>  subword_atomic_fetch_strong_ to implement a LR/SC version of the
>- operation. */
>+ operation.  */
>
>   /* Logic duplicated in gcc/libgcc/config/riscv/atomic.c for use when 
> inlining
>- is disabled */
>+ is disabled.  */
>
>   rtx old = gen_reg_rtx (SImode);
>   rtx mem = operands[1];
>@@ -367,7 +367,7 @@
> {
>   rtx difference = gen_rtx_MINUS (SImode, val, exp);
>   compare = gen_reg_rtx (SImode);
>-  emit_move_insn  (compare, difference);
>+  emit_move_insn (compare, difference);
> }
>
>   if (word_mode != SImode)
>@@ -393,10 +393,10 @@
> {
>   /* We have no QImode/HImode atomics, so form a mask, then use
>  subword_atomic_cas_strong to implement a LR/SC version of the
>- operation. */
>+ operation.  */
>
>   /* Logic duplicated in gcc/libgcc/config/riscv/atomic.c for use when 
> inlining
>- is disabled */
>+ is disabled.  */
>
>   rtx old = gen_reg_rtx (SImode);
>   rtx mem = operands[1];
>@@ -461,7 +461,7 @@
>   "TARGET_ATOMIC"
> {
>   /* We have no QImode atomics, so use the address LSBs to form a mask,
>- then use an aligned SImode atomic. */
>+ then use an aligned SImode atomic.  */
>   rtx result = operands[0];
>   rtx mem = operands[1];
>   rtx model = operands[2];
>--
>2.34.1
>

[PATCH v5 00/10] RISC-V: autovec: Add autovec support

2023-04-26 Thread Michael Collison

This series of patches adds foundational support for RISC-V auto-vectorization 
support. These patches are based on the current upstream rvv vector intrinsic 
support and is not a new implementation. Most of the implementation consists of 
adding the new vector cost model, the autovectorization patterns themselves and 
target hooks. This implementation only provides support for integer addition 
and subtraction as a proof of concept. This patch set should not be construed 
to be feature complete. Based on conversations with the community these patches 
are intended to lay the groundwork for feature completion and collaboration 
within the RISC-V community.

These patches are largely based off the work of Juzhe Zhong 
(juzhe.zh...@rivai.ai) of RiVAI. More specifically 
the rvv-next branch at: https://github.com/riscv-collab/riscv-gcc.git 
is the foundation of this patch 
set. 

As discussed on this list, if these patches are approved they will be merged 
into a "auto-vectorization" branch once gcc-13 branches for release. There are 
two known issues related to crashes (assert failures) associated with tree 
vectorization; one of which I have sent a patch for and have received feedback. 

Changes in v5:

- Incorporated upstream comments large to delete unnecessary code

Changes in v4:

- Added support for binary integer operations and test cases
- Fixed bug to support 8-bit integer vectorization
- Fixed several assert errors related to non-multiple of two vector modes

Changes in v3:

- Removed the cost model and cost hooks based on feedback from Richard Biener
- Used RVV_VUNDEF macro to fix failing patterns

Changes in v2 

- Updated ChangeLog entry to include RiVAI contributions 
- Fixed ChangeLog email formatting 
- Fixed gnu formatting issues in the code 


Kevin Lee (2):
  This patch adds a guard for VNx1 vectors that are present in ports
like riscv.
  This patch supports 8 bit auto-vectorization in riscv.

Michael Collison (8):
  RISC-V: Add new predicates and function prototypes
  RISC-V: autovec: Export policy functions to global scope
  RISC-V:autovec: Add auto-vectorization support functions
  RISC-V:autovec: Add target vectorization hooks
  RISC-V:autovec: Add autovectorization patterns for binary integer &
len_load/store
  RISC-V:autovec: Add autovectorization tests for add & sub
  vect: Verify that GET_MODE_NUNITS is a multiple of 2.
  RISC-V:autovec: Add autovectorization tests for binary integer

 gcc/config/riscv/predicates.md|  13 ++
 gcc/config/riscv/riscv-opts.h |  29 
 gcc/config/riscv/riscv-protos.h   |   9 ++
 gcc/config/riscv/riscv-v.cc   |  79 +++
 gcc/config/riscv/riscv-vector-builtins.cc |   4 +-
 gcc/config/riscv/riscv-vector-builtins.h  |   3 +
 gcc/config/riscv/riscv.cc | 130 ++
 gcc/config/riscv/riscv.md |   1 +
 gcc/config/riscv/vector-auto.md   |  74 ++
 gcc/config/riscv/vector.md|   4 +-
 .../riscv/rvv/autovec/loop-add-rv32.c |  25 
 .../gcc.target/riscv/rvv/autovec/loop-add.c   |  25 
 .../riscv/rvv/autovec/loop-and-rv32.c |  25 
 .../gcc.target/riscv/rvv/autovec/loop-and.c   |  25 
 .../riscv/rvv/autovec/loop-div-rv32.c |  27 
 .../gcc.target/riscv/rvv/autovec/loop-div.c   |  27 
 .../riscv/rvv/autovec/loop-max-rv32.c |  26 
 .../gcc.target/riscv/rvv/autovec/loop-max.c   |  26 
 .../riscv/rvv/autovec/loop-min-rv32.c |  26 
 .../gcc.target/riscv/rvv/autovec/loop-min.c   |  26 
 .../riscv/rvv/autovec/loop-mod-rv32.c |  27 
 .../gcc.target/riscv/rvv/autovec/loop-mod.c   |  27 
 .../riscv/rvv/autovec/loop-mul-rv32.c |  25 
 .../gcc.target/riscv/rvv/autovec/loop-mul.c   |  25 
 .../riscv/rvv/autovec/loop-or-rv32.c  |  25 
 .../gcc.target/riscv/rvv/autovec/loop-or.c|  25 
 .../riscv/rvv/autovec/loop-sub-rv32.c |  25 
 .../gcc.target/riscv/rvv/autovec/loop-sub.c   |  25 
 .../riscv/rvv/autovec/loop-xor-rv32.c |  25 
 .../gcc.target/riscv/rvv/autovec/loop-xor.c   |  25 
 gcc/testsuite/gcc.target/riscv/rvv/rvv.exp|   3 +
 gcc/tree-vect-data-refs.cc|   2 +
 gcc/tree-vect-slp.cc  |   7 +-
 33 files changed, 864 insertions(+), 6 deletions(-)
 create mode 100644 gcc/config/riscv/vector-auto.md
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/loop-add-rv32.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/loop-add.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/loop-and-rv32.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/loop-and.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/loop-div-rv32.c
 create mode 100644

[PATCH v5 06/10] RISC-V:autovec: Add autovectorization tests for add & sub

2023-04-26 Thread Michael Collison

2023-03-02  Michael Collison  
Vineet Gupta 

* gcc.target/riscv/rvv/autovec: New directory
for autovectorization tests.
* gcc.target/riscv/rvv/autovec/loop-add-rv32.c: New
test to verify code generation of vector add on rv32.
* gcc.target/riscv/rvv/autovec/loop-add.c: New
test to verify code generation of vector add on rv64.
* gcc.target/riscv/rvv/autovec/loop-sub-rv32.c: New
test to verify code generation of vector subtract on rv32.
* gcc.target/riscv/rvv/autovec/loop-sub.c: New
test to verify code generation of vector subtract on rv64.
---
 .../riscv/rvv/autovec/loop-add-rv32.c | 24 +++
 .../gcc.target/riscv/rvv/autovec/loop-add.c   | 24 +++
 .../riscv/rvv/autovec/loop-sub-rv32.c | 24 +++
 .../gcc.target/riscv/rvv/autovec/loop-sub.c   | 24 +++
 4 files changed, 96 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/loop-add-rv32.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/loop-add.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/loop-sub-rv32.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/loop-sub.c

diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/loop-add-rv32.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/loop-add-rv32.c
new file mode 100644
index 000..bdc3b6892e9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/loop-add-rv32.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O2 -ftree-vectorize -march=rv32gcv -mabi=ilp32d" 
} */
+
+#include 
+
+#define TEST_TYPE(TYPE)\
+  void vadd_##TYPE (TYPE *dst, TYPE *a, TYPE *b, int n)\
+  {\
+for (int i = 0; i < n; i++)\
+  dst[i] = a[i] + b[i];\
+  }
+
+/* *int8_t not autovec currently. */
+#define TEST_ALL() \
+ TEST_TYPE(int16_t)\
+ TEST_TYPE(uint16_t)   \
+ TEST_TYPE(int32_t)\
+ TEST_TYPE(uint32_t)   \
+ TEST_TYPE(int64_t)\
+ TEST_TYPE(uint64_t)
+
+TEST_ALL()
+
+/* { dg-final { scan-assembler-times {\tvadd\.vv} 6 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/loop-add.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/loop-add.c
new file mode 100644
index 000..d7f992c7d27
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/loop-add.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O2 -ftree-vectorize -march=rv64gcv -mabi=lp64d" } 
*/
+
+#include 
+
+#define TEST_TYPE(TYPE)\
+  void vadd_##TYPE (TYPE *dst, TYPE *a, TYPE *b, int n)\
+  {\
+for (int i = 0; i < n; i++)\
+  dst[i] = a[i] + b[i];\
+  }
+
+/* *int8_t not autovec currently. */
+#define TEST_ALL() \
+ TEST_TYPE(int16_t)\
+ TEST_TYPE(uint16_t)   \
+ TEST_TYPE(int32_t)\
+ TEST_TYPE(uint32_t)   \
+ TEST_TYPE(int64_t)\
+ TEST_TYPE(uint64_t)
+
+TEST_ALL()
+
+/* { dg-final { scan-assembler-times {\tvadd\.vv} 6 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/loop-sub-rv32.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/loop-sub-rv32.c
new file mode 100644
index 000..7d0a40ec539
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/loop-sub-rv32.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O2 -ftree-vectorize -march=rv32gcv -mabi=ilp32d" 
} */
+
+#include 
+
+#define TEST_TYPE(TYPE)\
+  void vadd_##TYPE (TYPE *dst, TYPE *a, TYPE *b, int n)\
+  {\
+for (int i = 0; i < n; i++)\
+  dst[i] = a[i] - b[i];\
+  }
+
+/* *int8_t not autovec currently. */
+#define TEST_ALL() \
+ TEST_TYPE(int16_t)\
+ TEST_TYPE(uint16_t)   \
+ TEST_TYPE(int32_t)\
+ TEST_TYPE(uint32_t)   \
+ TEST_TYPE(int64_t)\
+ TEST_TYPE(uint64_t)
+
+TEST_ALL()
+
+/* { dg-final { scan-assembler-times {\tvsub\.vv} 6 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/loop-sub.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/loop-sub.c
new file mode 100644
index 000..c8900884f83
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/loop-sub.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O2 -ftree-vectorize -march=rv64gcv -mabi=lp64d" } 
*/
+
+#include 
+
+#define TEST_TYPE(TYPE)\
+  void vadd_##TYPE (TYPE *dst, TYPE *a, TYPE *b, int n)\
+  {\
+for (int i = 0; i < n; i++)\
+  dst[i] = a[i] - b[i];\
+  }
+
+/*

[PATCH v5 05/10] RISC-V:autovec: Add autovectorization patterns for binary integer & len_load/store

2023-04-26 Thread Michael Collison

2023-04-25  Michael Collison  
Juzhe Zhong  

* config/riscv/riscv.md (riscv_vector_preferred_simd_mode): Include
vector-iterators.md.
* config/riscv/vector-auto.md: New file containing
autovectorization patterns.
* config/riscv/vector.md: Remove include of vector-iterators.md
and include vector-auto.md.
---
 gcc/config/riscv/riscv.md   |  1 +
 gcc/config/riscv/vector-auto.md | 74 +
 gcc/config/riscv/vector.md  |  4 +-
 3 files changed, 77 insertions(+), 2 deletions(-)
 create mode 100644 gcc/config/riscv/vector-auto.md

diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index bc384d9aedf..7f8f3a6cb18 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -135,6 +135,7 @@
 (include "predicates.md")
 (include "constraints.md")
 (include "iterators.md")
+(include "vector-iterators.md")
 
 ;; 
 ;;
diff --git a/gcc/config/riscv/vector-auto.md b/gcc/config/riscv/vector-auto.md
new file mode 100644
index 000..83d2ab6957a
--- /dev/null
+++ b/gcc/config/riscv/vector-auto.md
@@ -0,0 +1,74 @@
+;; Machine description for RISC-V 'V' Extension for GNU compiler.
+;; Copyright (C) 2022-2023 Free Software Foundation, Inc.
+;; Contributed by Juzhe Zhong (juzhe.zh...@rivai.ai), RiVAI Technologies Ltd.
+;; Contributed by Michael Collison (colli...@rivosinc.com, Rivos Inc.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; .
+
+;; len_load/len_store is a sub-optimal pattern for RVV auto-vectorization 
support.
+;; We will replace them when len_maskload/len_maskstore is supported in loop 
vectorizer.
+(define_expand "len_load_"
+  [(match_operand:V 0 "register_operand")
+   (match_operand:V 1 "memory_operand")
+   (match_operand 2 "vector_length_operand")
+   (match_operand 3 "const_0_operand")]
+  "TARGET_VECTOR"
+{
+  riscv_vector::emit_nonvlmax_op (code_for_pred_mov (mode), operands[0],
+ operands[1], operands[2], mode);
+  DONE;
+})
+
+(define_expand "len_store_"
+  [(match_operand:V 0 "memory_operand")
+   (match_operand:V 1 "register_operand")
+   (match_operand 2 "vector_length_operand")
+   (match_operand 3 "const_0_operand")]
+  "TARGET_VECTOR"
+{
+  riscv_vector::emit_nonvlmax_op (code_for_pred_mov (mode), operands[0],
+ operands[1], operands[2], mode);
+  DONE;
+})
+
+;; -
+;;  [INT] Vector binary patterns
+;; -
+
+(define_expand "3"
+  [(set (match_operand:VI 0 "register_operand")
+   (any_int_binop:VI (match_operand:VI 1 "")
+ (match_operand:VI 2 "")))]
+  "TARGET_VECTOR"
+{
+  using namespace riscv_vector;
+
+  rtx merge = RVV_VUNDEF (mode);
+  rtx vl = gen_reg_rtx (Pmode);
+  emit_vlmax_vsetvl (mode, vl);
+  rtx mask_policy = get_mask_policy_no_pred ();
+  rtx tail_policy = get_tail_policy_no_pred ();
+  rtx mask = CONSTM1_RTX(mode);
+  rtx vlmax_avl_p = get_avl_type_rtx (NONVLMAX);
+
+  emit_insn (gen_pred_ (operands[0], mask, merge, operands[1], 
operands[2],
+vl, tail_policy, mask_policy, 
vlmax_avl_p));
+
+  DONE;
+})
+
+
diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md
index 0ecca98f20c..2ac5b744503 100644
--- a/gcc/config/riscv/vector.md
+++ b/gcc/config/riscv/vector.md
@@ -26,8 +26,6 @@
 ;; - Auto-vectorization (TBD)
 ;; - Combine optimization (TBD)
 
-(include "vector-iterators.md")
-
 (define_constants [
(INVALID_ATTRIBUTE255)
(X0_REGNUM  0)
@@ -351,6 +349,8 @@
   (symbol_ref "INTVAL (operands[4])")]
(const_int INVALID_ATTRIBUTE)))
 
+(include "vector-auto.md")
+
 ;; -
 ;;  Miscellaneous Operations
 ;; -
-- 
2.34.1

[PATCH v5 01/10] RISC-V: autovec: Add new predicates and function prototypes

2023-04-26 Thread Michael Collison

2023-04-24  Michael Collison  
Juzhe Zhong  

* config/riscv/riscv-protos.h
(riscv_vector_preferred_simd_mode): New.
(riscv_vector_mask_mode_p): Ditto.
(riscv_vector_get_mask_mode): Ditto.
(emit_vlmax_vsetvl): Ditto.
(get_mask_policy_no_pred): Ditto.
(get_tail_policy_no_pred): Ditto.
(vlmul_field_enum): Ditto.
* config/riscv/riscv-v.cc (emit_vlmax_vsetvl):
Remove static scope.
* config/riscv/predicates.md (p_reg_or_const_csr_operand):
New predicate.
(vector_reg_or_const_dup_operand): Ditto.
* config/riscv/riscv-opts.h (riscv_vector_bits_enum): New enum.
(riscv_vector_lmul_enum): Ditto.
(vlmul_field_enum): Ditto.
---
 gcc/config/riscv/predicates.md  | 13 +
 gcc/config/riscv/riscv-opts.h   | 29 +
 gcc/config/riscv/riscv-protos.h |  9 +
 3 files changed, 51 insertions(+)

diff --git a/gcc/config/riscv/predicates.md b/gcc/config/riscv/predicates.md
index 8654dbc5943..b3f2d622c7b 100644
--- a/gcc/config/riscv/predicates.md
+++ b/gcc/config/riscv/predicates.md
@@ -264,6 +264,14 @@
 })
 
 ;; Predicates for the V extension.
+(define_special_predicate "p_reg_or_const_csr_operand"
+  (match_code "reg, subreg, const_int")
+{
+  if (CONST_INT_P (op))
+return satisfies_constraint_K (op);
+  return GET_MODE (op) == Pmode;
+})
+
 (define_special_predicate "vector_length_operand"
   (ior (match_operand 0 "pmode_register_operand")
(match_operand 0 "const_csr_operand")))
@@ -291,6 +299,11 @@
   (and (match_code "const_vector")
(match_test "rtx_equal_p (op, riscv_vector::gen_scalar_move_mask 
(GET_MODE (op)))")))
 
+(define_predicate "vector_reg_or_const_dup_operand"
+  (ior (match_operand 0 "register_operand")
+   (match_test "const_vec_duplicate_p (op)
+   && !CONST_POLY_INT_P (CONST_VECTOR_ELT (op, 0))")))
+
 (define_predicate "vector_mask_operand"
   (ior (match_operand 0 "register_operand")
(match_operand 0 "vector_all_trues_mask_operand")))
diff --git a/gcc/config/riscv/riscv-opts.h b/gcc/config/riscv/riscv-opts.h
index cf0cd669be4..af77df11430 100644
--- a/gcc/config/riscv/riscv-opts.h
+++ b/gcc/config/riscv/riscv-opts.h
@@ -67,6 +67,35 @@ enum stack_protector_guard {
   SSP_GLOBAL   /* global canary */
 };
 
+/* RISC-V auto-vectorization preference.  */
+enum riscv_autovec_preference_enum {
+  NO_AUTOVEC,
+  RVV_SCALABLE,
+  RVV_FIXED_VLMAX
+};
+
+/* vectorization factor.  */
+enum riscv_vector_lmul_enum
+{
+  RVV_LMUL1 = 1,
+  RVV_LMUL2 = 2,
+  RVV_LMUL4 = 4,
+  RVV_LMUL8 = 8
+};
+
+enum vlmul_field_enum
+{
+  VLMUL_FIELD_000, /* LMUL = 1.  */
+  VLMUL_FIELD_001, /* LMUL = 2.  */
+  VLMUL_FIELD_010, /* LMUL = 4.  */
+  VLMUL_FIELD_011, /* LMUL = 8.  */
+  VLMUL_FIELD_100, /* RESERVED.  */
+  VLMUL_FIELD_101, /* LMUL = 1/8.  */
+  VLMUL_FIELD_110, /* LMUL = 1/4.  */
+  VLMUL_FIELD_111, /* LMUL = 1/2.  */
+  MAX_VLMUL_FIELD
+};
+
 #define MASK_ZICSR(1 << 0)
 #define MASK_ZIFENCEI (1 << 1)
 
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 5244e8dcbf0..55056222e57 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -237,4 +237,13 @@ extern const char*
 th_mempair_output_move (rtx[4], bool, machine_mode, RTX_CODE);
 #endif
 
+/* Routines implemented in riscv-v.cc.  */
+
+namespace riscv_vector {
+extern machine_mode riscv_vector_preferred_simd_mode (scalar_mode mode);
+extern bool riscv_vector_mask_mode_p (machine_mode);
+extern opt_machine_mode riscv_vector_get_mask_mode (machine_mode mode);
+extern rtx get_mask_policy_no_pred ();
+extern rtx get_tail_policy_no_pred ();
+}
 #endif /* ! GCC_RISCV_PROTOS_H */
-- 
2.34.1

Re: [PATCH] RISC-V: Fix sync.md and riscv.cc whitespace errors

2023-04-26 Thread Bernhard Reutner-Fischer via Gcc-patches

On 26 April 2023 23:10:01 CEST, Andreas Schwab  wrote:
>On Apr 26 2023, Patrick O'Neill wrote:
>
>> @@ -290,10 +290,10 @@
>>[(set (match_operand:GPR 0 "register_operand" "=")
>>  (match_operand:GPR 1 "memory_operand" "+A"))
>> (set (match_dup 1)
>> -(unspec_volatile:GPR [(match_operand:GPR 2 "reg_or_0_operand" "rJ")
>> -  (match_operand:GPR 3 "reg_or_0_operand" "rJ")
>> -  (match_operand:SI 4 "const_int_operand")  ;; mod_s
>> -  (match_operand:SI 5 "const_int_operand")] ;; mod_f
>> +(unspec_volatile:GPR[(match_operand:GPR 2 "reg_or_0_operand" "rJ")
>> + (match_operand:GPR 3 "reg_or_0_operand" "rJ")
>> + (match_operand:SI 4 "const_int_operand")  ;; mod_s
>> + (match_operand:SI 5 "const_int_operand")] ;; mod_f
>
>That appears to be a bug in the checker.  This isn't a C array
>expression, but an argument in lispy vector notation, so it should be
>separated by a space.

Yeah, the checker fails on machine descriptions currently, i should have 
mentioned that, sorry!

[PATCH v2] RISC-V: Fix sync.md and riscv.cc whitespace errors

2023-04-26 Thread Patrick O'Neill

This patch fixes whitespace errors introduced with
https://gcc.gnu.org/pipermail/gcc-patches/2023-April/616807.html

2023-04-26 Patrick O'Neill 

gcc/ChangeLog:

* config/riscv/riscv.cc: Fix whitespace.
* config/riscv/sync.md: Fix whitespace.

Signed-off-by: Patrick O'Neill 
---
Patch was checked with contrib/check_GNU_style.py

Whitespace changes in this patch are 2 flavors:
 * Add space between function name and ()
 * 2 spaces between end of comment and  */
---
v2 Changelog:
 * Ignored checker warning for space before [] in rtl
---
 gcc/config/riscv/riscv.cc |  6 +++---
 gcc/config/riscv/sync.md  | 16 
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 0f890469d7a..1529855a2b4 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -7193,7 +7193,7 @@ riscv_subword_address (rtx mem, rtx *aligned_mem, rtx 
*shift, rtx *mask,
   emit_move_insn (*mask, gen_rtx_ASHIFT (SImode, *mask,
 gen_lowpart (QImode, *shift)));

-  emit_move_insn (*not_mask, gen_rtx_NOT(SImode, *mask));
+  emit_move_insn (*not_mask, gen_rtx_NOT (SImode, *mask));
 }

 /* Leftshift a subword within an SImode register.  */
@@ -7206,8 +7206,8 @@ riscv_lshift_subword (machine_mode mode, rtx value, rtx 
shift,
   emit_move_insn (value_reg, simplify_gen_subreg (SImode, value,
  mode, 0));

-  emit_move_insn(*shifted_value, gen_rtx_ASHIFT (SImode, value_reg,
-gen_lowpart (QImode, shift)));
+  emit_move_insn (*shifted_value, gen_rtx_ASHIFT (SImode, value_reg,
+ gen_lowpart (QImode, shift)));
 }

 /* Initialize the GCC target structure.  */
diff --git a/gcc/config/riscv/sync.md b/gcc/config/riscv/sync.md
index 83be6431cb6..19274528262 100644
--- a/gcc/config/riscv/sync.md
+++ b/gcc/config/riscv/sync.md
@@ -128,10 +128,10 @@
 {
   /* We have no QImode/HImode atomics, so form a mask, then use
  subword_atomic_fetch_strong_nand to implement a LR/SC version of the
- operation. */
+ operation.  */

   /* Logic duplicated in gcc/libgcc/config/riscv/atomic.c for use when inlining
- is disabled */
+ is disabled.  */

   rtx old = gen_reg_rtx (SImode);
   rtx mem = operands[1];
@@ -193,10 +193,10 @@
 {
   /* We have no QImode/HImode atomics, so form a mask, then use
  subword_atomic_fetch_strong_ to implement a LR/SC version of the
- operation. */
+ operation.  */

   /* Logic duplicated in gcc/libgcc/config/riscv/atomic.c for use when inlining
- is disabled */
+ is disabled.  */

   rtx old = gen_reg_rtx (SImode);
   rtx mem = operands[1];
@@ -367,7 +367,7 @@
 {
   rtx difference = gen_rtx_MINUS (SImode, val, exp);
   compare = gen_reg_rtx (SImode);
-  emit_move_insn  (compare, difference);
+  emit_move_insn (compare, difference);
 }

   if (word_mode != SImode)
@@ -393,10 +393,10 @@
 {
   /* We have no QImode/HImode atomics, so form a mask, then use
  subword_atomic_cas_strong to implement a LR/SC version of the
- operation. */
+ operation.  */

   /* Logic duplicated in gcc/libgcc/config/riscv/atomic.c for use when inlining
- is disabled */
+ is disabled.  */

   rtx old = gen_reg_rtx (SImode);
   rtx mem = operands[1];
@@ -461,7 +461,7 @@
   "TARGET_ATOMIC"
 {
   /* We have no QImode atomics, so use the address LSBs to form a mask,
- then use an aligned SImode atomic. */
+ then use an aligned SImode atomic.  */
   rtx result = operands[0];
   rtx mem = operands[1];
   rtx model = operands[2];
--
2.34.1

[pushed] c++: remove nsdmi_inst hashtable

2023-04-26 Thread Jason Merrill via Gcc-patches

Tested x86_64-pc-linux-gnu, applying to trunk.

-- 8< --

It occurred to me that we have a perfectly good DECL_INITIAL field to put
the instantiated DMI into, we don't need a separate hash table.

gcc/cp/ChangeLog:

* init.cc (nsdmi_inst): Remove.
(maybe_instantiate_nsdmi_init): Use DECL_INITIAL instead.
---
 gcc/cp/init.cc | 10 --
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/gcc/cp/init.cc b/gcc/cp/init.cc
index 9571d18170e..1dd24e30d7c 100644
--- a/gcc/cp/init.cc
+++ b/gcc/cp/init.cc
@@ -564,21 +564,19 @@ perform_target_ctor (tree init)
 /* Instantiate the default member initializer of MEMBER, if needed.
Only get_nsdmi should use the return value of this function.  */
 
-static GTY((cache)) decl_tree_cache_map *nsdmi_inst;
-
 tree
 maybe_instantiate_nsdmi_init (tree member, tsubst_flags_t complain)
 {
   tree init = DECL_INITIAL (member);
-  if (init && DECL_LANG_SPECIFIC (member) && DECL_TEMPLATE_INFO (member))
+
+  /* tsubst_decl uses void_node to indicate an uninstantiated DMI.  */
+  if (init == void_node)
 {
   init = DECL_INITIAL (DECL_TI_TEMPLATE (member));
   location_t expr_loc
= cp_expr_loc_or_loc (init, DECL_SOURCE_LOCATION (member));
   if (TREE_CODE (init) == DEFERRED_PARSE)
/* Unparsed.  */;
-  else if (tree *slot = hash_map_safe_get (nsdmi_inst, member))
-   init = *slot;
   /* Check recursive instantiation.  */
   else if (DECL_INSTANTIATING_NSDMI_P (member))
{
@@ -639,7 +637,7 @@ maybe_instantiate_nsdmi_init (tree member, tsubst_flags_t 
complain)
  DECL_INSTANTIATING_NSDMI_P (member) = 0;
 
  if (init != error_mark_node)
-   hash_map_safe_put (nsdmi_inst, member, init);
+   DECL_INITIAL (member) = init;
 
  current_function_decl = cfd;
  current_binding_level = cbl;

base-commit: 50d866038a910ceb9075b97295a12d77a8d09a3a
prerequisite-patch-id: 4ed1b4e9cbee2ce090bd119cbf47056c35aaeb57
prerequisite-patch-id: cd3a881f1b6cd3bca0064002d9a96f2f52c85a79
prerequisite-patch-id: b42a3670c7e30ee21934698ade2156884b63c19f
-- 
2.31.1

[pushed] c++: local class in nested generic lambda [PR109241]

2023-04-26 Thread Jason Merrill via Gcc-patches

Tested x86_64-pc-linux-gnu, applying to trunk.

-- 8< --

The earlier fix for PR109241 avoided the crash by handling a type with no
TREE_BINFO.  But we want to move toward doing the partial substitution of
classes in generic lambdas, so let's take a step in that direction.

PR c++/109241

gcc/cp/ChangeLog:

* pt.cc (instantiate_class_template): Do partially instantiate.
(tsubst_expr): Do call complete_type for partial instantiations.
---
 gcc/cp/pt.cc | 14 +-
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/gcc/cp/pt.cc b/gcc/cp/pt.cc
index e1c272b9817..93a055c66a1 100644
--- a/gcc/cp/pt.cc
+++ b/gcc/cp/pt.cc
@@ -12086,7 +12086,8 @@ instantiate_class_template (tree type)
 return error_mark_node;
 
   if (COMPLETE_OR_OPEN_TYPE_P (type)
-  || uses_template_parms (type))
+  || (uses_template_parms (type)
+ && !TYPE_FUNCTION_SCOPE_P (type)))
 return type;
 
   /* Figure out which template is being instantiated.  */
@@ -19356,10 +19357,7 @@ tsubst_expr (tree t, tree args, tsubst_flags_t 
complain, tree in_decl)
 
 case TAG_DEFN:
   tmp = tsubst (TREE_TYPE (t), args, complain, NULL_TREE);
-  if (dependent_type_p (tmp))
-   /* This is a partial instantiation, try again when full.  */
-   add_stmt (build_min (TAG_DEFN, tmp));
-  else if (CLASS_TYPE_P (tmp))
+  if (CLASS_TYPE_P (tmp))
{
  /* Local classes are not independent templates; they are
 instantiated along with their containing function.  And this
@@ -19368,6 +19366,12 @@ tsubst_expr (tree t, tree args, tsubst_flags_t 
complain, tree in_decl)
  /* Closures are handled by the LAMBDA_EXPR.  */
  gcc_assert (!LAMBDA_TYPE_P (TREE_TYPE (t)));
  complete_type (tmp);
+ if (dependent_type_p (tmp))
+   {
+ /* This is a partial instantiation, try again when full.  */
+ add_stmt (build_min (TAG_DEFN, tmp));
+ break;
+   }
  tree save_ccp = current_class_ptr;
  tree save_ccr = current_class_ref;
  for (tree fld = TYPE_FIELDS (tmp); fld; fld = DECL_CHAIN (fld))

base-commit: 50d866038a910ceb9075b97295a12d77a8d09a3a
prerequisite-patch-id: 4ed1b4e9cbee2ce090bd119cbf47056c35aaeb57
prerequisite-patch-id: cd3a881f1b6cd3bca0064002d9a96f2f52c85a79
-- 
2.31.1

[pushed] c++: unique friend shenanigans [PR69836]

2023-04-26 Thread Jason Merrill via Gcc-patches

Tested x86_64-pc-linux-gnu, applying to trunk.

-- 8< --

Normally we re-instantiate a function declaration when we start to
instantiate the body in case of multiple declarations.  In this wacky
testcase, this causes a problem because the type of the w_counter parameter
depends on its declaration not being in scope yet, so the name lookup only
finds the previous declaration.  This isn't a problem for member functions,
since they aren't subject to argument-dependent lookup.  So let's just skip
the regeneration for hidden friends.

PR c++/69836

gcc/cp/ChangeLog:

* pt.cc (regenerate_decl_from_template): Skip unique friends.

gcc/testsuite/ChangeLog:

* g++.dg/template/friend76.C: New test.
---
 gcc/cp/pt.cc |  6 
 gcc/testsuite/g++.dg/template/friend76.C | 36 
 2 files changed, 42 insertions(+)
 create mode 100644 gcc/testsuite/g++.dg/template/friend76.C

diff --git a/gcc/cp/pt.cc b/gcc/cp/pt.cc
index 678cb7930e3..e1c272b9817 100644
--- a/gcc/cp/pt.cc
+++ b/gcc/cp/pt.cc
@@ -26458,6 +26458,11 @@ regenerate_decl_from_template (tree decl, tree tmpl, 
tree args)
   int args_depth;
   int parms_depth;
 
+  /* Don't bother with this for unique friends that can't be redeclared and
+might change type if regenerated (PR69836).  */
+  if (DECL_UNIQUE_FRIEND_P (decl))
+   goto done;
+
   /* Use the source location of the definition.  */
   DECL_SOURCE_LOCATION (decl) = DECL_SOURCE_LOCATION (tmpl);
 
@@ -26528,6 +26533,7 @@ regenerate_decl_from_template (tree decl, tree tmpl, 
tree args)
   else
 gcc_unreachable ();
 
+ done:
   pop_access_scope (decl);
 }
 
diff --git a/gcc/testsuite/g++.dg/template/friend76.C 
b/gcc/testsuite/g++.dg/template/friend76.C
new file mode 100644
index 000..ce3ed201dda
--- /dev/null
+++ b/gcc/testsuite/g++.dg/template/friend76.C
@@ -0,0 +1,36 @@
+// PR c++/69836
+// { dg-do compile { target c++11 } }
+
+template struct number : public number {
+static constexpr int value = N;
+static constexpr number prev() { return {}; }
+};
+template<> struct number<0> { static constexpr int value = 0; };
+
+template struct S { enum { value = N }; };
+
+template constexpr S increment(S) { return {}; }
+
+#define RETURN(R) -> decltype(R) { return R; }
+
+#define INIT(TYPE) \
+using W_ThisType = TYPE;  \
+friend constexpr S<0> state(number<0>, W_ThisType**) { return {}; }
+
+#define STUFF \
+friend constexpr auto state(number{}, 
static_cast(nullptr)))::value+1> w_counter, \
+W_ThisType **w_this) \
+RETURN(increment(state(w_counter.prev(), w_this)))
+
+
+template  struct TemplateObject   {
+INIT(TemplateObject)
+STUFF
+STUFF
+};
+
+int main() {
+  TemplateObject t;
+constexpr auto s = state(number<>{}, 
static_cast**>(nullptr)) ;
+static_assert(s.value == 2, "");
+}

base-commit: 50d866038a910ceb9075b97295a12d77a8d09a3a
prerequisite-patch-id: 4ed1b4e9cbee2ce090bd119cbf47056c35aaeb57
-- 
2.31.1

Re: [PATCH] RISC-V: Fix sync.md and riscv.cc whitespace errors

2023-04-26 Thread Andreas Schwab

On Apr 26 2023, Patrick O'Neill wrote:

> @@ -290,10 +290,10 @@
>[(set (match_operand:GPR 0 "register_operand" "=")
>   (match_operand:GPR 1 "memory_operand" "+A"))
> (set (match_dup 1)
> - (unspec_volatile:GPR [(match_operand:GPR 2 "reg_or_0_operand" "rJ")
> -   (match_operand:GPR 3 "reg_or_0_operand" "rJ")
> -   (match_operand:SI 4 "const_int_operand")  ;; mod_s
> -   (match_operand:SI 5 "const_int_operand")] ;; mod_f
> + (unspec_volatile:GPR[(match_operand:GPR 2 "reg_or_0_operand" "rJ")
> +  (match_operand:GPR 3 "reg_or_0_operand" "rJ")
> +  (match_operand:SI 4 "const_int_operand")  ;; mod_s
> +  (match_operand:SI 5 "const_int_operand")] ;; mod_f

That appears to be a bug in the checker.  This isn't a C array
expression, but an argument in lispy vector notation, so it should be
separated by a space.

> @@ -431,15 +431,15 @@
>  })
>
>  (define_insn "subword_atomic_cas_strong"
> -  [(set (match_operand:SI 0 "register_operand" "=")
>;; old value at mem
> - (match_operand:SI 1 "memory_operand" "+A"));; 
> mem location
> +  [(set (match_operand:SI 0 "register_operand" "=")
>   ;; old value at mem
> + (match_operand:SI 1 "memory_operand" "+A"))   ;; 
> mem location
> (set (match_dup 1)
> - (unspec_volatile:SI [(match_operand:SI 2 "reg_or_0_operand" "rJ")  ;; 
> expected value
> -  (match_operand:SI 3 "reg_or_0_operand" "rJ")] ;; 
> desired value
> + (unspec_volatile:SI[(match_operand:SI 2 "reg_or_0_operand" "rJ")  ;; 
> expected value
> + (match_operand:SI 3 "reg_or_0_operand" "rJ")] ;; 
> desired value

Likewise.

-- 
Andreas Schwab, sch...@linux-m68k.org
GPG Key fingerprint = 7578 EB47 D4E5 4D69 2510  2552 DF73 E780 A9DA AEC1
"And now for something completely different."

[PATCH] RISC-V: Fix sync.md and riscv.cc whitespace errors

2023-04-26 Thread Patrick O'Neill

This patch fixes whitespace errors introduced with
https://gcc.gnu.org/pipermail/gcc-patches/2023-April/616807.html

2023-04-26 Patrick O'Neill 

gcc/ChangeLog:

* config/riscv/riscv.cc: Fix whitespace.
* config/riscv/sync.md: Fix whitespace.

Signed-off-by: Patrick O'Neill 
---
Patch was checked with contrib/check_GNU_style.py

Whitespace changes in this patch are 3 flavors:
 * Add space between function name and ()
 * Remove space before square bracket[]
 * 2 spaces between end of comment and  */
---
 gcc/config/riscv/riscv.cc |  6 +++---
 gcc/config/riscv/sync.md  | 40 +++
 2 files changed, 23 insertions(+), 23 deletions(-)

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 0f890469d7a..1529855a2b4 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -7193,7 +7193,7 @@ riscv_subword_address (rtx mem, rtx *aligned_mem, rtx 
*shift, rtx *mask,
   emit_move_insn (*mask, gen_rtx_ASHIFT (SImode, *mask,
 gen_lowpart (QImode, *shift)));

-  emit_move_insn (*not_mask, gen_rtx_NOT(SImode, *mask));
+  emit_move_insn (*not_mask, gen_rtx_NOT (SImode, *mask));
 }

 /* Leftshift a subword within an SImode register.  */
@@ -7206,8 +7206,8 @@ riscv_lshift_subword (machine_mode mode, rtx value, rtx 
shift,
   emit_move_insn (value_reg, simplify_gen_subreg (SImode, value,
  mode, 0));

-  emit_move_insn(*shifted_value, gen_rtx_ASHIFT (SImode, value_reg,
-gen_lowpart (QImode, shift)));
+  emit_move_insn (*shifted_value, gen_rtx_ASHIFT (SImode, value_reg,
+ gen_lowpart (QImode, shift)));
 }

 /* Initialize the GCC target structure.  */
diff --git a/gcc/config/riscv/sync.md b/gcc/config/riscv/sync.md
index 83be6431cb6..8e95ce77916 100644
--- a/gcc/config/riscv/sync.md
+++ b/gcc/config/riscv/sync.md
@@ -49,7 +49,7 @@
 ;; conservatively emit a full FENCE.
 (define_insn "mem_thread_fence_1"
   [(set (match_operand:BLK 0 "" "")
-   (unspec:BLK [(match_dup 0)] UNSPEC_MEMORY_BARRIER))
+   (unspec:BLK[(match_dup 0)] UNSPEC_MEMORY_BARRIER))
(match_operand:SI 1 "const_int_operand" "")] ;; model
   ""
   "fence\tiorw,iorw")
@@ -128,10 +128,10 @@
 {
   /* We have no QImode/HImode atomics, so form a mask, then use
  subword_atomic_fetch_strong_nand to implement a LR/SC version of the
- operation. */
+ operation.  */

   /* Logic duplicated in gcc/libgcc/config/riscv/atomic.c for use when inlining
- is disabled */
+ is disabled.  */

   rtx old = gen_reg_rtx (SImode);
   rtx mem = operands[1];
@@ -193,10 +193,10 @@
 {
   /* We have no QImode/HImode atomics, so form a mask, then use
  subword_atomic_fetch_strong_ to implement a LR/SC version of the
- operation. */
+ operation.  */

   /* Logic duplicated in gcc/libgcc/config/riscv/atomic.c for use when inlining
- is disabled */
+ is disabled.  */

   rtx old = gen_reg_rtx (SImode);
   rtx mem = operands[1];
@@ -290,10 +290,10 @@
   [(set (match_operand:GPR 0 "register_operand" "=")
(match_operand:GPR 1 "memory_operand" "+A"))
(set (match_dup 1)
-   (unspec_volatile:GPR [(match_operand:GPR 2 "reg_or_0_operand" "rJ")
- (match_operand:GPR 3 "reg_or_0_operand" "rJ")
- (match_operand:SI 4 "const_int_operand")  ;; mod_s
- (match_operand:SI 5 "const_int_operand")] ;; mod_f
+   (unspec_volatile:GPR[(match_operand:GPR 2 "reg_or_0_operand" "rJ")
+(match_operand:GPR 3 "reg_or_0_operand" "rJ")
+(match_operand:SI 4 "const_int_operand")  ;; mod_s
+(match_operand:SI 5 "const_int_operand")] ;; mod_f
 UNSPEC_COMPARE_AND_SWAP))
(clobber (match_scratch:GPR 6 "="))]
   "TARGET_ATOMIC"
@@ -367,7 +367,7 @@
 {
   rtx difference = gen_rtx_MINUS (SImode, val, exp);
   compare = gen_reg_rtx (SImode);
-  emit_move_insn  (compare, difference);
+  emit_move_insn (compare, difference);
 }

   if (word_mode != SImode)
@@ -393,10 +393,10 @@
 {
   /* We have no QImode/HImode atomics, so form a mask, then use
  subword_atomic_cas_strong to implement a LR/SC version of the
- operation. */
+ operation.  */

   /* Logic duplicated in gcc/libgcc/config/riscv/atomic.c for use when inlining
- is disabled */
+ is disabled.  */

   rtx old = gen_reg_rtx (SImode);
   rtx mem = operands[1];
@@ -431,15 +431,15 @@
 })

 (define_insn "subword_atomic_cas_strong"
-  [(set (match_operand:SI 0 "register_operand" "=") ;; 
old value at mem
-   (match_operand:SI 1 "memory_operand" "+A"));; 
mem location
+  [(set (match_operand:SI 0 "register_operand" "=");; 
old value at mem
+   (match_operand:SI 1

Re: libsanitizer: sync from master

2023-04-26 Thread Martin Liška

On 4/26/23 20:31, Florian Weimer wrote:
> * Martin Liška:
> 
>> On 11/15/22 16:47, Martin Liška wrote:
>>> Hi.
>>>
>>> I've just pushed libsanitizer update that was tested on x86_64-linux and 
>>> ppc64le-linux systems.
>>> Moreover, I run bootstrap on x86_64-linux and checked ABI difference with 
>>> abidiff.
>>
>> Hello.
>>
>> And I've done the same now and merged upstream version 3185e47b5a8444e9fd.
> 
> So … we have the issue that involves interceptors outside of libc.so.6,
> namely crypt, crypt_r, and I posted an upstream patch for this:
> 
>   sanitizers: Disable crypt, crypt_r interceptors for glibc
>   
> 
> Can we just apply this downstream for now?  It blocks various folks from
> using the sanitizers in their projects.

Hello.

Your upstream revision has been already accepted, so please apply it and I'm 
going to do
one more merge from upstream in the following days. Does it work for you?

Cheers,
Martin

> 
> Thanks,
> Florian
>

Re: libsanitizer: sync from master

2023-04-26 Thread Martin Liška

On 4/26/23 21:23, H.J. Lu wrote:
> On Wed, Apr 26, 2023 at 6:52 AM Martin Liška  wrote:
>>
>> On 11/15/22 16:47, Martin Liška wrote:
>>> Hi.
>>>
>>> I've just pushed libsanitizer update that was tested on x86_64-linux and 
>>> ppc64le-linux systems.
>>> Moreover, I run bootstrap on x86_64-linux and checked ABI difference with 
>>> abidiff.
>>
>> Hello.
>>
>> And I've done the same now and merged upstream version 3185e47b5a8444e9fd.
> 
> It caused the bootstrap failure:
> 
> https://gcc.gnu.org/pipermail/gcc-regression/2023-April/077674.html

Can you see what's the build error in the build log? I can't see it from the
sent link?

Martin

> 
>> Martin
>>
>>>
>>> Pushed as r13-4068-g3037f11fb86eda.
>>>
>>> Cheers,
>>> Martin
>>
> 
>

Re: libsanitizer: sync from master

2023-04-26 Thread Bernhard Reutner-Fischer via Gcc-patches

On 26 April 2023 20:31:10 CEST, Florian Weimer via Fortran 
 wrote:
>* Martin Liška:
>
>> On 11/15/22 16:47, Martin Liška wrote:
>>> Hi.
>>> 
>>> I've just pushed libsanitizer update that was tested on x86_64-linux and 
>>> ppc64le-linux systems.
>>> Moreover, I run bootstrap on x86_64-linux and checked ABI difference with 
>>> abidiff.
>>
>> Hello.
>>
>> And I've done the same now and merged upstream version 3185e47b5a8444e9fd.
>
>So … we have the issue that involves interceptors outside of libc.so.6,
>namely crypt, crypt_r, and I posted an upstream patch for this:
>
>  sanitizers: Disable crypt, crypt_r interceptors for glibc
>  
>
>Can we just apply this downstream for now?  It blocks various folks from
>using the sanitizers in their projects.

+1

Re: [PATCH] c++: Micro-optimize most_specialized_partial_spec

2023-04-26 Thread Patrick Palka via Gcc-patches

On Thu, Sep 1, 2022 at 2:40 PM Jason Merrill  wrote:
>
> On 8/31/22 17:15, Patrick Palka wrote:
> > This introduces an early exit test to most_specialized_partial_spec for
> > the common case where we have no partial specializations, which allows
> > us to avoid some unnecessary work.  In passing, clean the function up a
> > bit.
> >
> > Bootstrapped and regtested on x86_64-pc-linux-gnu, does this look OK for
> > trunk?
>
> OK.

Thanks.  This patch fell through the cracks for GCC 13, but I pushed
it to trunk just now.

>
> > gcc/cp/ChangeLog:
> >
> >   * pt.cc (most_specialized_partial_spec): Exit early when
> >   DECL_TEMPLATE_SPECIALIZATIONS is empty.  Move local variable
> >   declarations closer to their first use.  Remove redundant
> >   flag_concepts test.  Remove redundant forward declaration.
> > ---
> >   gcc/cp/pt.cc | 45 +++--
> >   1 file changed, 19 insertions(+), 26 deletions(-)
> >
> > diff --git a/gcc/cp/pt.cc b/gcc/cp/pt.cc
> > index fe7e809fc2d..497a18ef728 100644
> > --- a/gcc/cp/pt.cc
> > +++ b/gcc/cp/pt.cc
> > @@ -187,7 +187,6 @@ static int unify_pack_expansion (tree, tree, tree,
> >   static tree copy_template_args (tree);
> >   static tree tsubst_template_parms (tree, tree, tsubst_flags_t);
> >   static void tsubst_each_template_parm_constraints (tree, tree, 
> > tsubst_flags_t);
> > -tree most_specialized_partial_spec (tree, tsubst_flags_t);
> >   static tree tsubst_aggr_type (tree, tree, tsubst_flags_t, tree, int);
> >   static tree tsubst_arg_types (tree, tree, tree, tsubst_flags_t, tree);
> >   static tree tsubst_function_type (tree, tree, tsubst_flags_t, tree);
> > @@ -25756,15 +25755,7 @@ most_general_template (tree decl)
> >   tree
> >   most_specialized_partial_spec (tree target, tsubst_flags_t complain)
> >   {
> > -  tree list = NULL_TREE;
> > -  tree t;
> > -  tree champ;
> > -  int fate;
> > -  bool ambiguous_p;
> > -  tree outer_args = NULL_TREE;
> > -  tree tmpl, args;
> > -
> > -  tree decl;
> > +  tree tmpl, args, decl;
> > if (TYPE_P (target))
> >   {
> > tree tinfo = CLASSTYPE_TEMPLATE_INFO (target);
> > @@ -25788,13 +25779,18 @@ most_specialized_partial_spec (tree target, 
> > tsubst_flags_t complain)
> > else
> >   gcc_unreachable ();
> >
> > +  tree main_tmpl = most_general_template (tmpl);
> > +  tree specs = DECL_TEMPLATE_SPECIALIZATIONS (main_tmpl);
> > +  if (!specs)
> > +/* There are no partial specializations of this template.  */
> > +return NULL_TREE;
> > +
> > push_access_scope_guard pas (decl);
> > deferring_access_check_sentinel acs (dk_no_deferred);
> >
> > -  tree main_tmpl = most_general_template (tmpl);
> > -
> > /* For determining which partial specialization to use, only the
> >innermost args are interesting.  */
> > +  tree outer_args = NULL_TREE;
> > if (TMPL_ARGS_HAVE_MULTIPLE_LEVELS (args))
> >   {
> > outer_args = strip_innermost_template_args (args, 1);
> > @@ -25806,7 +25802,8 @@ most_specialized_partial_spec (tree target, 
> > tsubst_flags_t complain)
> >fully resolve everything.  */
> > processing_template_decl_sentinel ptds;
> >
> > -  for (t = DECL_TEMPLATE_SPECIALIZATIONS (main_tmpl); t; t = TREE_CHAIN 
> > (t))
> > +  tree list = NULL_TREE;
> > +  for (tree t = specs; t; t = TREE_CHAIN (t))
> >   {
> > const tree ospec_tmpl = TREE_VALUE (t);
> >
> > @@ -25829,10 +25826,8 @@ most_specialized_partial_spec (tree target, 
> > tsubst_flags_t complain)
> > if (outer_args)
> >   spec_args = add_to_template_args (outer_args, spec_args);
> >
> > -  /* Keep the candidate only if the constraints are satisfied,
> > - or if we're not compiling with concepts.  */
> > -  if (!flag_concepts
> > -   || constraints_satisfied_p (ospec_tmpl, spec_args))
> > +   /* Keep the candidate only if the constraints are satisfied.  */
> > +   if (constraints_satisfied_p (ospec_tmpl, spec_args))
> >   {
> > list = tree_cons (spec_args, ospec_tmpl, list);
> > TREE_TYPE (list) = TREE_TYPE (t);
> > @@ -25843,13 +25838,11 @@ most_specialized_partial_spec (tree target, 
> > tsubst_flags_t complain)
> > if (! list)
> >   return NULL_TREE;
> >
> > -  ambiguous_p = false;
> > -  t = list;
> > -  champ = t;
> > -  t = TREE_CHAIN (t);
> > -  for (; t; t = TREE_CHAIN (t))
> > +  tree champ = list;
> > +  bool ambiguous_p = false;
> > +  for (tree t = TREE_CHAIN (list); t; t = TREE_CHAIN (t))
> >   {
> > -  fate = more_specialized_partial_spec (tmpl, champ, t);
> > +  int fate = more_specialized_partial_spec (tmpl, champ, t);
> > if (fate == 1)
> >   ;
> > else
> > @@ -25868,9 +25861,9 @@ most_specialized_partial_spec (tree target, 
> > tsubst_flags_t complain)
> >   }
> >
> > if (!ambiguous_p)
> > -for (t = list; t && t != champ; t = TREE_CHAIN (t))
> > +for (tree t = list; t

[COMMITTED 4/5] Rename ssa_global_cache to ssa_cache and add has_range

2023-04-26 Thread Andrew MacLeod via Gcc-patches

The original ssa_global_cache was intended to simply be the global cache 
for ranger, but uses of it have since percolated such that it is really 
just a range acche for a list of ssa-names. This patch renames it from 
"ssa_global_cache" to "ssa_cache".


It also adds a method called "has_range" which didnt exist before which 
simply indicates if a range is set or not.


Bootstrapped on x86_64-pc-linux-gnu with no regressions.  Pushed.

Andrew

From bf07de561197559304c67bd46c7bea3da9eb63f9 Mon Sep 17 00:00:00 2001
From: Andrew MacLeod 
Date: Tue, 28 Mar 2023 11:32:21 -0400
Subject: [PATCH 4/5] Rename ssa_global_cache to ssa_cache and add has_range

This renames the ssa_global_cache to be ssa_cache.  The original use was
to function as a global cache, but its uses have expanded.  Remove all mention
of "global" from the class and methods.  Also add a has_range method.

	* gimple-range-cache.cc (ssa_cache::ssa_cache): Rename.
	(ssa_cache::~ssa_cache): Rename.
	(ssa_cache::has_range): New.
	(ssa_cache::get_range): Rename.
	(ssa_cache::set_range): Rename.
	(ssa_cache::clear_range): Rename.
	(ssa_cache::clear): Rename.
	(ssa_cache::dump): Rename and use get_range.
	(ranger_cache::get_global_range): Use get_range and set_range.
	(ranger_cache::range_of_def): Use get_range.
	* gimple-range-cache.h (class ssa_cache): Rename class and methods.
	(class ranger_cache): Use ssa_cache.
	* gimple-range-path.cc (path_range_query::path_range_query): Use
	ssa_cache.
	(path_range_query::get_cache): Use get_range.
	(path_range_query::set_cache): Use set_range.
	* gimple-range-path.h (class path_range_query): Use ssa_cache.
	* gimple-range.cc (assume_query::assume_range_p): Use get_range.
	(assume_query::range_of_expr): Use get_range.
	(assume_query::assume_query): Use set_range.
	(assume_query::calculate_op): Use get_range and set_range.
	* gimple-range.h (class assume_query): Use ssa_cache.
---
 gcc/gimple-range-cache.cc | 45 ---
 gcc/gimple-range-cache.h  | 15 +++--
 gcc/gimple-range-path.cc  |  8 +++
 gcc/gimple-range-path.h   |  2 +-
 gcc/gimple-range.cc   | 14 ++--
 gcc/gimple-range.h|  2 +-
 6 files changed, 49 insertions(+), 37 deletions(-)

diff --git a/gcc/gimple-range-cache.cc b/gcc/gimple-range-cache.cc
index 868d2dda424..6de96f6b8a9 100644
--- a/gcc/gimple-range-cache.cc
+++ b/gcc/gimple-range-cache.cc
@@ -530,27 +530,38 @@ block_range_cache::dump (FILE *f, basic_block bb, bool print_varying)
 
 // -
 
-// Initialize a global cache.
+// Initialize an ssa cache.
 
-ssa_global_cache::ssa_global_cache ()
+ssa_cache::ssa_cache ()
 {
   m_tab.create (0);
   m_range_allocator = new obstack_vrange_allocator;
 }
 
-// Deconstruct a global cache.
+// Deconstruct an ssa cache.
 
-ssa_global_cache::~ssa_global_cache ()
+ssa_cache::~ssa_cache ()
 {
   m_tab.release ();
   delete m_range_allocator;
 }
 
+// Return TRUE if the global range of NAME has a cache entry.
+
+bool
+ssa_cache::has_range (tree name) const
+{
+  unsigned v = SSA_NAME_VERSION (name);
+  if (v >= m_tab.length ())
+return false;
+  return m_tab[v] != NULL;
+}
+
 // Retrieve the global range of NAME from cache memory if it exists. 
 // Return the value in R.
 
 bool
-ssa_global_cache::get_global_range (vrange , tree name) const
+ssa_cache::get_range (vrange , tree name) const
 {
   unsigned v = SSA_NAME_VERSION (name);
   if (v >= m_tab.length ())
@@ -563,11 +574,11 @@ ssa_global_cache::get_global_range (vrange , tree name) const
   return true;
 }
 
-// Set the range for NAME to R in the global cache.
+// Set the range for NAME to R in the ssa cache.
 // Return TRUE if there was already a range set, otherwise false.
 
 bool
-ssa_global_cache::set_global_range (tree name, const vrange )
+ssa_cache::set_range (tree name, const vrange )
 {
   unsigned v = SSA_NAME_VERSION (name);
   if (v >= m_tab.length ())
@@ -584,7 +595,7 @@ ssa_global_cache::set_global_range (tree name, const vrange )
 // Set the range for NAME to R in the global cache.
 
 void
-ssa_global_cache::clear_global_range (tree name)
+ssa_cache::clear_range (tree name)
 {
   unsigned v = SSA_NAME_VERSION (name);
   if (v >= m_tab.length ())
@@ -592,19 +603,19 @@ ssa_global_cache::clear_global_range (tree name)
   m_tab[v] = NULL;
 }
 
-// Clear the global cache.
+// Clear the ssa cache.
 
 void
-ssa_global_cache::clear ()
+ssa_cache::clear ()
 {
   if (m_tab.address ())
 memset (m_tab.address(), 0, m_tab.length () * sizeof (vrange *));
 }
 
-// Dump the contents of the global cache to F.
+// Dump the contents of the ssa cache to F.
 
 void
-ssa_global_cache::dump (FILE *f)
+ssa_cache::dump (FILE *f)
 {
   /* Cleared after the table header has been printed.  */
   bool print_header = true;
@@ -613,7 +624,7 @@ ssa_global_cache::dump (FILE *f)
   if (!gimple_range_ssa_p (ssa_name (x)))
 	continue;
   Value_Range r (TREE_TYPE (ssa_name (x)));
-

[COMMITTED 5/5] PR tree-optimization/108697 - Create a lazy ssa_cache.

2023-04-26 Thread Andrew MacLeod via Gcc-patches

Sparsely used ssa caches can benefit from using a bitmap to determine if 
a name already has an entry.  The path_query class was already managing 
something like this internally, but there is benefit to making it 
generally available.


ssa_lazy_cache inherits from ssa_cache and adds management of the 
bitmap.   The self-managed version in path_query has been removed, 
cleaned up, and replaced with this lazy version.  It is also now used in 
"assume_query" processing.


All 5 patches combined produce about
 - a 0.4% speedup in total compilation time,
 - about a 1% speedup in VRP
 - and threading picks up a more impressive 13% improvement.

This patch has the previous one as a prerequisite to rename 
ssa_global_cache to ssa_cache.


Bootstrapped on x86_64-pc-linux-gnu with no regressions.  Pushed.

Andrew
From b3c81a4a6b7ff5adce6b5891729b79a0d6e4e54a Mon Sep 17 00:00:00 2001
From: Andrew MacLeod 
Date: Tue, 28 Mar 2023 11:35:26 -0400
Subject: [PATCH 5/5] Create a lazy ssa_cache.

Sparsely used ssa caches can benefit from using a bitmap to
determine if a name already has an entry.  Utilize it in the path query
and remove its private bitmap for tracking the same info.
Also use it in the "assume" query class.

	PR tree-optimization/108697
	* gimple-range-cache.cc (ssa_global_cache::clear_range): Do
	not clear the vector on an out of range query.
	(ssa_cache::dump): Use dump_range_query instead of get_range.
	(ssa_cache::dump_range_query): New.
	(ssa_lazy_cache::dump_range_query): New.
	(ssa_lazy_cache::set_range): New.
	* gimple-range-cache.h (ssa_cache::dump_range_query): New.
	(class ssa_lazy_cache): New.
	(ssa_lazy_cache::ssa_lazy_cache): New.
	(ssa_lazy_cache::~ssa_lazy_cache): New.
	(ssa_lazy_cache::get_range): New.
	(ssa_lazy_cache::clear_range): New.
	(ssa_lazy_cache::clear): New.
	(ssa_lazy_cache::dump): New.
	* gimple-range-path.cc (path_range_query::path_range_query): Do
	not allocate a ssa_cache object nor has_cache bitmap.
	(path_range_query::~path_range_query): Do not free objects.
	(path_range_query::clear_cache): Remove.
	(path_range_query::get_cache): Adjust.
	(path_range_query::set_cache): Remove.
	(path_range_query::dump): Don't call through a pointer.
	(path_range_query::internal_range_of_expr): Set cache directly.
	(path_range_query::reset_path): Clear cache directly.
	(path_range_query::ssa_range_in_phi): Fold with globals only.
	(path_range_query::compute_ranges_in_phis): Simply set range.
	(path_range_query::compute_ranges_in_block): Call cache directly.
	* gimple-range-path.h (class path_range_query): Replace bitmap
	and cache pointer with lazy cache object.
	* gimple-range.h (class assume_query): Use ssa_lazy_cache.
---
 gcc/gimple-range-cache.cc | 45 +--
 gcc/gimple-range-cache.h  | 35 -
 gcc/gimple-range-path.cc  | 65 +--
 gcc/gimple-range-path.h   |  7 +
 gcc/gimple-range.h|  2 +-
 5 files changed, 92 insertions(+), 62 deletions(-)

diff --git a/gcc/gimple-range-cache.cc b/gcc/gimple-range-cache.cc
index 6de96f6b8a9..5510efba1ca 100644
--- a/gcc/gimple-range-cache.cc
+++ b/gcc/gimple-range-cache.cc
@@ -592,14 +592,14 @@ ssa_cache::set_range (tree name, const vrange )
   return m != NULL;
 }
 
-// Set the range for NAME to R in the global cache.
+// Set the range for NAME to R in the ssa cache.
 
 void
 ssa_cache::clear_range (tree name)
 {
   unsigned v = SSA_NAME_VERSION (name);
   if (v >= m_tab.length ())
-m_tab.safe_grow_cleared (num_ssa_names + 1);
+return;
   m_tab[v] = NULL;
 }
 
@@ -624,7 +624,10 @@ ssa_cache::dump (FILE *f)
   if (!gimple_range_ssa_p (ssa_name (x)))
 	continue;
   Value_Range r (TREE_TYPE (ssa_name (x)));
-  if (get_range (r, ssa_name (x)) && !r.varying_p ())
+  // Invoke dump_range_query which is a private virtual version of
+  // get_range.   This avoids performance impacts on general queries,
+  // but allows sharing of the dump routine.
+  if (dump_range_query (r, ssa_name (x)) && !r.varying_p ())
 	{
 	  if (print_header)
 	{
@@ -646,6 +649,42 @@ ssa_cache::dump (FILE *f)
 fputc ('\n', f);
 }
 
+// Virtual private get_range query for dumping.
+
+bool
+ssa_cache::dump_range_query (vrange , tree name) const
+{
+  return get_range (r, name);
+}
+
+// Virtual private get_range query for dumping.
+
+bool
+ssa_lazy_cache::dump_range_query (vrange , tree name) const
+{
+  return get_range (r, name);
+}
+
+
+// Set range of NAME to R in a lazy cache.  Return FALSE if it did not already
+// have a range.
+
+bool
+ssa_lazy_cache::set_range (tree name, const vrange )
+{
+  unsigned v = SSA_NAME_VERSION (name);
+  if (!bitmap_set_bit (active_p, v))
+{
+  // There is already an entry, simply set it.
+  gcc_checking_assert (v < m_tab.length ());
+  return ssa_cache::set_range (name, r);
+}
+  if (v >= m_tab.length ())
+m_tab.safe_grow (num_ssa_names + 1);
+  m_tab[v] = m_range_allocator->clone (r);
+  return

[COMMITTED 3/5] Add sbr_lazy_vector and adjust (e)vrp sparse cache

2023-04-26 Thread Andrew MacLeod via Gcc-patches

This implements a sparse vector class for rangers cache and uses it bey 
default except when the CFG is very small, in qhich case the original 
full vectors are faster.  It works like a normal vector cache (in fact 
it inherits from it), but uses a sparse bitmap to determine whether a 
vector element is set or not.  This provide better performance for 
clearing the vector, as well as during initialization.


A new param is added for this transition "vrp_vector_threshold" which 
defaults to 250.  Anything function with fewer than 250 basic blocks 
will use the simple vectors.  Various timing runs have indicated this is 
about the sweet spot where using the sparse bitmap overtakes the time 
required to clear the vector initially. Should we make ranger live 
across functions in the future, we'll probably want to lower this value 
again as clearing is significantly cheaper.


This patch also rename the "evrp_*" params to "vrp_*" as there really is 
not a serperate EVRP pass any more, its all one vrp pass.   Eventually 
we'll probably want to change it to vrp1, vrp2 and vrp3 rather than 
evrp, vrp1  and vrp2.    But thats a task for later, perhaps when we 
reconsider pass orderings..


Bootstrapped on x86_64-pc-linux-gnu with no regressions.  Pushed.

Andrew
From 6a3babfbd9a2b18b9e86d3d3a91564fcb9b8f9d7 Mon Sep 17 00:00:00 2001
From: Andrew MacLeod 
Date: Thu, 13 Apr 2023 14:47:47 -0400
Subject: [PATCH 3/5] Add sbr_lazy_vector and adjust (e)vrp sparse cache

Add a sparse vector class for cache and use if by default.
Rename the evrp_* params to vrp_*, and add a param for small CFGS which use
just the original basic vector.

	* gimple-range-cache.cc (sbr_vector::sbr_vector): Add parameter
	and local to optionally zero memory.
	(br_vector::grow): Only zero memory if flag is set.
	(class sbr_lazy_vector): New.
	(sbr_lazy_vector::sbr_lazy_vector): New.
	(sbr_lazy_vector::set_bb_range): New.
	(sbr_lazy_vector::get_bb_range): New.
	(sbr_lazy_vector::bb_range_p): New.
	(block_range_cache::set_bb_range): Check flags and Use sbr_lazy_vector.
	* gimple-range-gori.cc (gori_map::calculate_gori): Use
	param_vrp_switch_limit.
	(gori_compute::gori_compute): Use param_vrp_switch_limit.
	* params.opt (vrp_sparse_threshold): Rename from evrp_sparse_threshold.
	(vrp_switch_limit): Rename from evrp_switch_limit.
	(vrp_vector_threshold): New.
---
 gcc/gimple-range-cache.cc | 72 ++-
 gcc/gimple-range-gori.cc  |  4 +--
 gcc/params.opt| 20 ++-
 3 files changed, 78 insertions(+), 18 deletions(-)

diff --git a/gcc/gimple-range-cache.cc b/gcc/gimple-range-cache.cc
index 2314478d558..868d2dda424 100644
--- a/gcc/gimple-range-cache.cc
+++ b/gcc/gimple-range-cache.cc
@@ -79,7 +79,7 @@ ssa_block_ranges::dump (FILE *f)
 class sbr_vector : public ssa_block_ranges
 {
 public:
-  sbr_vector (tree t, vrange_allocator *allocator);
+  sbr_vector (tree t, vrange_allocator *allocator, bool zero_p = true);
 
   virtual bool set_bb_range (const_basic_block bb, const vrange ) override;
   virtual bool get_bb_range (vrange , const_basic_block bb) override;
@@ -91,22 +91,25 @@ protected:
   vrange *m_undefined;
   tree m_type;
   vrange_allocator *m_range_allocator;
+  bool m_zero_p;
   void grow ();
 };
 
 
 // Initialize a block cache for an ssa_name of type T.
 
-sbr_vector::sbr_vector (tree t, vrange_allocator *allocator)
+sbr_vector::sbr_vector (tree t, vrange_allocator *allocator, bool zero_p)
   : ssa_block_ranges (t)
 {
   gcc_checking_assert (TYPE_P (t));
   m_type = t;
+  m_zero_p = zero_p;
   m_range_allocator = allocator;
   m_tab_size = last_basic_block_for_fn (cfun) + 1;
   m_tab = static_cast 
 (allocator->alloc (m_tab_size * sizeof (vrange *)));
-  memset (m_tab, 0, m_tab_size * sizeof (vrange *));
+  if (zero_p)
+memset (m_tab, 0, m_tab_size * sizeof (vrange *));
 
   // Create the cached type range.
   m_varying = m_range_allocator->alloc_vrange (t);
@@ -132,7 +135,8 @@ sbr_vector::grow ()
   vrange **t = static_cast 
 (m_range_allocator->alloc (new_size * sizeof (vrange *)));
   memcpy (t, m_tab, m_tab_size * sizeof (vrange *));
-  memset (t + m_tab_size, 0, (new_size - m_tab_size) * sizeof (vrange *));
+  if (m_zero_p)
+memset (t + m_tab_size, 0, (new_size - m_tab_size) * sizeof (vrange *));
 
   m_tab = t;
   m_tab_size = new_size;
@@ -183,6 +187,50 @@ sbr_vector::bb_range_p (const_basic_block bb)
   return false;
 }
 
+// Like an sbr_vector, except it uses a bitmap to manage whetehr  vale is set
+// or not rather than cleared memory.
+
+class sbr_lazy_vector : public sbr_vector
+{
+public:
+  sbr_lazy_vector (tree t, vrange_allocator *allocator, bitmap_obstack *bm);
+
+  virtual bool set_bb_range (const_basic_block bb, const vrange ) override;
+  virtual bool get_bb_range (vrange , const_basic_block bb) override;
+  virtual bool bb_range_p (const_basic_block bb) override;
+protected:
+  bitmap m_has_value;
+};
+
+sbr_lazy_vector::sbr_lazy_vector (tree t,

[COMMITTED 2/5] Quicker relation check.

2023-04-26 Thread Andrew MacLeod via Gcc-patches

If either of the SSA names in a comparison do not have any equivalences 
or relations, we can short-circuit the check slightly and be a bit faster.


Bootstrapped on x86_64-pc-linux-gnu with no regressions.  Pushed.

Andrew
From ee03aca78fb5739f4cd76cb30332f8aff2c5243a Mon Sep 17 00:00:00 2001
From: Andrew MacLeod 
Date: Wed, 8 Feb 2023 12:36:23 -0500
Subject: [PATCH 2/5] Quicker relation check.

If either of the SSA names in a comparison do not have any equivalences
or relations, we can short-circuit the check slightly.

	* value-relation.cc (dom_oracle::query_relation): Check early for lack
	of any relation.
	* value-relation.h (equiv_oracle::has_equiv_p): New.
---
 gcc/value-relation.cc | 6 ++
 gcc/value-relation.h  | 1 +
 2 files changed, 7 insertions(+)

diff --git a/gcc/value-relation.cc b/gcc/value-relation.cc
index 30a02d3c9d3..65cf7694d40 100644
--- a/gcc/value-relation.cc
+++ b/gcc/value-relation.cc
@@ -1374,6 +1374,12 @@ dom_oracle::query_relation (basic_block bb, tree ssa1, tree ssa2)
   if (v1 == v2)
 return VREL_EQ;
 
+  // If v1 or v2 do not have any relations or equivalences, a partial
+  // equivalence is the only possibility.
+  if ((!bitmap_bit_p (m_relation_set, v1) && !has_equiv_p (v1))
+  || (!bitmap_bit_p (m_relation_set, v2) && !has_equiv_p (v2)))
+return partial_equiv (ssa1, ssa2);
+
   // Check for equivalence first.  They must be in each equivalency set.
   const_bitmap equiv1 = equiv_set (ssa1, bb);
   const_bitmap equiv2 = equiv_set (ssa2, bb);
diff --git a/gcc/value-relation.h b/gcc/value-relation.h
index 3177ecb1ad0..be6e277421b 100644
--- a/gcc/value-relation.h
+++ b/gcc/value-relation.h
@@ -170,6 +170,7 @@ public:
   void dump (FILE *f) const override;
 
 protected:
+  inline bool has_equiv_p (unsigned v) { return bitmap_bit_p (m_equiv_set, v); }
   bitmap_obstack m_bitmaps;
   struct obstack m_chain_obstack;
 private:
-- 
2.39.2

[COMMITTED 1/5] PR tree-optimization/109417 - Don't save ssa-name pointer in dependency cache.

2023-04-26 Thread Andrew MacLeod via Gcc-patches



On 4/25/23 22:34, Jeff Law wrote:



On 4/24/23 07:51, Andrew MacLeod wrote:



Its not a real cache..  its merely a statement shortcut in dependency 
analysis to avoid re-parsing statements every time we look at them 
for dependency analysis


It is not suppose to be used for anything other than dependency 
checking.   ie, if an SSA_NAME changes, we can check if it matches 
either of the 2 "cached" names on this DEF, and if so, we know this 
name is stale.  we are never actually suppose to use the dependency 
cached values to drive anything, merely respond to the question if 
either matches a given name.   So it doesnt matter if the name here 
has been freed
OK.  I'll take your word for it.  Note that a free'd SSA_NAME may have 
an empty TREE_TYPE or an unexpected TREE_CHAIN field IIRC. So you have 
to be a bit careful if you're going to allow them.






We never re-use SSA names from within the pass releasing it.  But if
the ranger cache
persists across passes this could be a problem.  See



This particular valueswould never persist beyond a current pass.. its 
just the dependency chains and they would get rebuilt every time 
because the IL has changed.
Good.  THat would limit the concerns significantly.  I don't think we 
recycle names within a pass anymore (we used to within DOM due to the 
way threading worked eons ago, but we no longer take things out of SSA 
form to handle the CFG/SSA graph updates.  One could even argue we 
don't need to maintain the freelist and recycle names anymore.


Jeff

well, no worries.  taken care of thusly for the future. Its a hair 
slower, but nothing outrageous


Bootstrapped on x86_64-pc-linux-gnu with no regressions.  Pushed.

Andrew




From a530eb642032da7ad4d30de51131421631055f72 Mon Sep 17 00:00:00 2001
From: Andrew MacLeod 
Date: Tue, 25 Apr 2023 15:33:52 -0400
Subject: [PATCH 1/5] Don't save ssa-name pointer in dependency cache.

If the direct dependence fields point directly to an ssa-name,
its possible that an optimization frees an ssa-name, and the value
pointed to may now be in the free list.   Simply maintain the ssa
version number instead.

	PR tree-optimization/109417
	* gimple-range-gori.cc (range_def_chain::register_dependency):
	Save the ssa version number, not the pointer.
	(gori_compute::may_recompute_p): No need to check if a dependency
	is in the free list.
	* gimple-range-gori.h (class range_def_chain): Change ssa1 and ssa2
	fields to be unsigned int instead of trees.
	(ange_def_chain::depend1): Adjust.
	(ange_def_chain::depend2): Adjust.
	* gimple-range.h: Include "ssa.h" to inline ssa_name().
---
 gcc/gimple-range-gori.cc |  8 
 gcc/gimple-range-gori.h  | 14 ++
 gcc/gimple-range.h   |  1 +
 3 files changed, 15 insertions(+), 8 deletions(-)

diff --git a/gcc/gimple-range-gori.cc b/gcc/gimple-range-gori.cc
index d77e1f51ac2..5bba77c7b7b 100644
--- a/gcc/gimple-range-gori.cc
+++ b/gcc/gimple-range-gori.cc
@@ -182,9 +182,9 @@ range_def_chain::register_dependency (tree name, tree dep, basic_block bb)
 
   // Set the direct dependency cache entries.
   if (!src.ssa1)
-src.ssa1 = dep;
-  else if (!src.ssa2 && src.ssa1 != dep)
-src.ssa2 = dep;
+src.ssa1 = SSA_NAME_VERSION (dep);
+  else if (!src.ssa2 && src.ssa1 != SSA_NAME_VERSION (dep))
+src.ssa2 = SSA_NAME_VERSION (dep);
 
   // Don't calculate imports or export/dep chains if BB is not provided.
   // This is usually the case for when the temporal cache wants the direct
@@ -1316,7 +1316,7 @@ gori_compute::may_recompute_p (tree name, basic_block bb, int depth)
   // If the first dependency is not set, there is no recomputation.
   // Dependencies reflect original IL, not current state.   Check if the
   // SSA_NAME is still valid as well.
-  if (!dep1 || SSA_NAME_IN_FREE_LIST (dep1))
+  if (!dep1)
 return false;
 
   // Don't recalculate PHIs or statements with side_effects.
diff --git a/gcc/gimple-range-gori.h b/gcc/gimple-range-gori.h
index 3ea4b45595b..526edc24b53 100644
--- a/gcc/gimple-range-gori.h
+++ b/gcc/gimple-range-gori.h
@@ -46,8 +46,8 @@ protected:
   bitmap_obstack m_bitmaps;
 private:
   struct rdc {
-   tree ssa1;		// First direct dependency
-   tree ssa2;		// Second direct dependency
+   unsigned int ssa1;		// First direct dependency
+   unsigned int ssa2;		// Second direct dependency
bitmap bm;		// All dependencies
bitmap m_import;
   };
@@ -66,7 +66,10 @@ range_def_chain::depend1 (tree name) const
   unsigned v = SSA_NAME_VERSION (name);
   if (v >= m_def_chain.length ())
 return NULL_TREE;
-  return m_def_chain[v].ssa1;
+  unsigned v1 = m_def_chain[v].ssa1;
+  if (!v1)
+return NULL_TREE;
+  return ssa_name (v1);
 }
 
 // Return the second direct dependency for NAME, if there is one.
@@ -77,7 +80,10 @@ range_def_chain::depend2 (tree name) const
   unsigned v = SSA_NAME_VERSION (name);
   if (v >= m_def_chain.length ())
 return NULL_TREE;
-  return m_def_chain[v].ssa2;
+  unsigned v2 = m_def_chain[v].ssa2;
+  if

Re: libsanitizer: sync from master

2023-04-26 Thread H.J. Lu via Gcc-patches

On Wed, Apr 26, 2023 at 6:52 AM Martin Liška  wrote:
>
> On 11/15/22 16:47, Martin Liška wrote:
> > Hi.
> >
> > I've just pushed libsanitizer update that was tested on x86_64-linux and 
> > ppc64le-linux systems.
> > Moreover, I run bootstrap on x86_64-linux and checked ABI difference with 
> > abidiff.
>
> Hello.
>
> And I've done the same now and merged upstream version 3185e47b5a8444e9fd.

It caused the bootstrap failure:

https://gcc.gnu.org/pipermail/gcc-regression/2023-April/077674.html

> Martin
>
> >
> > Pushed as r13-4068-g3037f11fb86eda.
> >
> > Cheers,
> > Martin
>


-- 
H.J.

Re: [PATCH] GCC-13/changes: Add note about iostream usage

2023-04-26 Thread Andrew Pinski via Gcc-patches

On Wed, Apr 26, 2023 at 12:07 PM Jonathan Wakely via Gcc-patches
 wrote:
>
> On 26/04/23 09:53 -0700, Andrew Pinski wrote:
> >This adds a note about iostream usage so it does not catch others
> >in surpise like it has already.
> >
> >OK?
>
> Thanks, I agree we should add something, but have some comments below.
>
> >---
> > htdocs/gcc-13/changes.html | 5 +
> > 1 file changed, 5 insertions(+)
> >
> >diff --git a/htdocs/gcc-13/changes.html b/htdocs/gcc-13/changes.html
> >index 70732ec0..7c83f7c4 100644
> >--- a/htdocs/gcc-13/changes.html
> >+++ b/htdocs/gcc-13/changes.html
> >@@ -25,6 +25,11 @@ You may also want to check out our
> > 
> > Caveats
> > 
> >+libstdc++ uses constructors inside the library to initialize 
> >std::cout/std::cin, etc.
> >+ instead of having it done in each source which uses iostream header.
>
> We should use code font for std::cout, std::cin and iostream, and
> style it as  not just iostream.
>
> >+ This requires you to make sure the dynamic loader to load the new 
> >libstdc++v3 library
> >+ (examples of how to do this is to use -Wl,-rpath,... while linking or 
> >LD_LIBRARY_PATH
> >+ while running the program).  
>
> I think it would be better to link to
> https://gcc.gnu.org/onlinedocs/libstdc++/manual/using_dynamic_or_shared.html#manual.intro.using.linkage.dynamic
>
> How about:
>
>For C++, construction of the global iostream objects
>std::cout, std::cin etc. is now done
>inside the standard library, instead of in every source file that
>includes the iostream header. This change
>improves the start-up performance of C++ programs, but it means that
>code compiled with GCC 13.1 will crash if the correct version of
>libstdc++.so is not used at runtime. See the
> href="https://gcc.gnu.org/onlinedocs/libstdc++/manual/using_dynamic_or_shared.html#manual.intro.using.linkage.dynamic;>documentation
>about using the right libstdc++.so at runtime.
>

This looks better than my version.

Thanks,
Andrew

>
>

Re: [PATCH] GCC-13/changes: Add note about iostream usage

2023-04-26 Thread Jonathan Wakely via Gcc-patches


On 26/04/23 09:53 -0700, Andrew Pinski wrote:

This adds a note about iostream usage so it does not catch others
in surpise like it has already.

OK?


Thanks, I agree we should add something, but have some comments below.


---
htdocs/gcc-13/changes.html | 5 +
1 file changed, 5 insertions(+)

diff --git a/htdocs/gcc-13/changes.html b/htdocs/gcc-13/changes.html
index 70732ec0..7c83f7c4 100644
--- a/htdocs/gcc-13/changes.html
+++ b/htdocs/gcc-13/changes.html
@@ -25,6 +25,11 @@ You may also want to check out our

Caveats

+libstdc++ uses constructors inside the library to initialize 
std::cout/std::cin, etc.
+ instead of having it done in each source which uses iostream header.


We should use code font for std::cout, std::cin and iostream, and
style it as  not just iostream.


+ This requires you to make sure the dynamic loader to load the new 
libstdc++v3 library
+ (examples of how to do this is to use -Wl,-rpath,... while linking or 
LD_LIBRARY_PATH
+ while running the program).  


I think it would be better to link to 
https://gcc.gnu.org/onlinedocs/libstdc++/manual/using_dynamic_or_shared.html#manual.intro.using.linkage.dynamic


How about:

  For C++, construction of the global iostream objects 
  std::cout, std::cin etc. is now done

  inside the standard library, instead of in every source file that
  includes the iostream header. This change
  improves the start-up performance of C++ programs, but it means that
  code compiled with GCC 13.1 will crash if the correct version of
  libstdc++.so is not used at runtime. See the
  https://gcc.gnu.org/onlinedocs/libstdc++/manual/using_dynamic_or_shared.html#manual.intro.using.linkage.dynamic;>documentation
  about using the right libstdc++.so at runtime.

Re: libsanitizer: sync from master

2023-04-26 Thread Florian Weimer via Gcc-patches

* Martin Liška:

> On 11/15/22 16:47, Martin Liška wrote:
>> Hi.
>> 
>> I've just pushed libsanitizer update that was tested on x86_64-linux and 
>> ppc64le-linux systems.
>> Moreover, I run bootstrap on x86_64-linux and checked ABI difference with 
>> abidiff.
>
> Hello.
>
> And I've done the same now and merged upstream version 3185e47b5a8444e9fd.

So … we have the issue that involves interceptors outside of libc.so.6,
namely crypt, crypt_r, and I posted an upstream patch for this:

  sanitizers: Disable crypt, crypt_r interceptors for glibc

Can we just apply this downstream for now?  It blocks various folks from
using the sanitizers in their projects.

Thanks,
Florian

Re: [PATCH v4 05/10] RISC-V:autovec: Add autovectorization patterns for binary integer operations

2023-04-26 Thread Robin Dapp via Gcc-patches

Hi Michael,

I have the diff below for the binops in my tree locally.
Maybe something like this works for you? Untested but compiles and
the expander helpers would need to be fortified obviously.

Regards
 Robin

--

gcc/ChangeLog:

* config/riscv/autovec.md (3): New binops expander.
* config/riscv/riscv-protos.h (emit_nonvlmax_binop): Define.
* config/riscv/riscv-v.cc (emit_pred_binop): New function.
(emit_nonvlmax_binop): New function.
* config/riscv/vector-iterators.md: New iterator.
---
 gcc/config/riscv/autovec.md  | 12 
 gcc/config/riscv/riscv-protos.h  |  1 +
 gcc/config/riscv/riscv-v.cc  | 89 
 gcc/config/riscv/vector-iterators.md | 20 +++
 4 files changed, 97 insertions(+), 25 deletions(-)

diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
index b5d46ff57ab..c21d241f426 100644
--- a/gcc/config/riscv/autovec.md
+++ b/gcc/config/riscv/autovec.md
@@ -47,3 +47,15 @@ (define_expand "len_store_"
  operands[1], operands[2], mode);
   DONE;
 })
+
+(define_expand "3"
+  [(set (match_operand:VI 0 "register_operand")
+   (any_int_binop:VI (match_operand:VI 1 "register_operand")
+ (match_operand:VI 2 "register_operand")))]
+  "TARGET_VECTOR"
+{
+  riscv_vector::emit_nonvlmax_binop (code_for_pred (, 
mode),
+operands[0], operands[1], operands[2],
+gen_reg_rtx (Pmode), mode);
+  DONE;
+})
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index f6ea6846736..5cca543c773 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -163,6 +163,7 @@ void emit_hard_vlmax_vsetvl (machine_mode, rtx);
 void emit_vlmax_op (unsigned, rtx, rtx, machine_mode);
 void emit_vlmax_op (unsigned, rtx, rtx, rtx, machine_mode);
 void emit_nonvlmax_op (unsigned, rtx, rtx, rtx, machine_mode);
+void emit_nonvlmax_binop (unsigned, rtx, rtx, rtx, rtx, machine_mode);
 enum vlmul_type get_vlmul (machine_mode);
 unsigned int get_ratio (machine_mode);
 int get_ta (rtx);
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 5e69427ac54..98ebc052340 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -52,7 +52,7 @@ namespace riscv_vector {
 template  class insn_expander
 {
 public:
-  insn_expander () : m_opno (0) {}
+  insn_expander () : m_opno (0), has_dest(false) {}
   void add_output_operand (rtx x, machine_mode mode)
   {
 create_output_operand (_ops[m_opno++], x, mode);
@@ -83,6 +83,44 @@ public:
 add_input_operand (gen_int_mode (type, Pmode), Pmode);
   }
 
+  void set_dest_and_mask (rtx mask, rtx dest, machine_mode mask_mode)
+  {
+dest_mode = GET_MODE (dest);
+has_dest = true;
+
+add_output_operand (dest, dest_mode);
+
+if (mask)
+  add_input_operand (mask, GET_MODE (mask));
+else
+  add_all_one_mask_operand (mask_mode);
+
+add_vundef_operand (dest_mode);
+  }
+
+  void set_len_and_policy (rtx len, bool vlmax_p)
+{
+  gcc_assert (has_dest);
+  gcc_assert (len || vlmax_p);
+
+  if (len)
+   add_input_operand (len, Pmode);
+  else
+   {
+ rtx vlmax = gen_reg_rtx (Pmode);
+ emit_vlmax_vsetvl (dest_mode, vlmax);
+ add_input_operand (vlmax, Pmode);
+   }
+
+  if (GET_MODE_CLASS (dest_mode) != MODE_VECTOR_BOOL)
+   add_policy_operand (get_prefer_tail_policy (), get_prefer_mask_policy 
());
+
+  if (vlmax_p)
+   add_avl_type_operand (avl_type::VLMAX);
+  else
+   add_avl_type_operand (avl_type::NONVLMAX);
+}
+
   void expand (enum insn_code icode, bool temporary_volatile_p = false)
   {
 if (temporary_volatile_p)
@@ -96,6 +134,8 @@ public:
 
 private:
   int m_opno;
+  bool has_dest;
+  machine_mode dest_mode;
   expand_operand m_ops[MAX_OPERANDS];
 };
 
@@ -183,37 +223,29 @@ emit_pred_op (unsigned icode, rtx mask, rtx dest, rtx 
src, rtx len,
  machine_mode mask_mode, bool vlmax_p)
 {
   insn_expander<8> e;
-  machine_mode mode = GET_MODE (dest);
+  e.set_dest_and_mask (mask, dest, mask_mode);
 
-  e.add_output_operand (dest, mode);
-
-  if (mask)
-e.add_input_operand (mask, GET_MODE (mask));
-  else
-e.add_all_one_mask_operand (mask_mode);
+  e.add_input_operand (src, GET_MODE (src));
 
-  e.add_vundef_operand (mode);
+  e.set_len_and_policy (len, vlmax_p);
 
-  e.add_input_operand (src, GET_MODE (src));
+  e.expand ((enum insn_code) icode, MEM_P (dest) || MEM_P (src));
+}
 
-  if (len)
-e.add_input_operand (len, Pmode);
-  else
-{
-  rtx vlmax = gen_reg_rtx (Pmode);
-  emit_vlmax_vsetvl (mode, vlmax);
-  e.add_input_operand (vlmax, Pmode);
-}
+/* Emit an RVV unmask && vl mov from SRC to DEST.  */
+static void
+emit_pred_binop (unsigned icode, rtx mask, rtx dest, rtx src1, rtx src2,
+rtx len, machine_mode

[committed] RISCV: Inline subword atomic ops

2023-04-26 Thread Patrick O'Neill

Committed - I had to reformat the changelog so it would push and resolve a
trivial merge conflict in riscv.opt.

---

RISC-V has no support for subword atomic operations; code currently
generates libatomic library calls.

This patch changes the default behavior to inline subword atomic calls
(using the same logic as the existing library call).
Behavior can be specified using the -minline-atomics and
-mno-inline-atomics command line flags.

gcc/libgcc/config/riscv/atomic.c has the same logic implemented in asm.
This will need to stay for backwards compatibility and the
-mno-inline-atomics flag.

2023-04-18 Patrick O'Neill 

gcc/ChangeLog:
PR target/104338
* config/riscv/riscv-protos.h: Add helper function stubs.
* config/riscv/riscv.cc: Add helper functions for subword masking.
* config/riscv/riscv.opt: Add command-line flag.
* config/riscv/sync.md: Add masking logic and inline asm for 
fetch_and_op,
fetch_and_nand, CAS, and exchange ops.
* doc/invoke.texi: Add blurb regarding command-line flag.

libgcc/ChangeLog:
PR target/104338
* config/riscv/atomic.c: Add reference to duplicate logic.

gcc/testsuite/ChangeLog:
PR target/104338
* gcc.target/riscv/inline-atomics-1.c: New test.
* gcc.target/riscv/inline-atomics-2.c: New test.
* gcc.target/riscv/inline-atomics-3.c: New test.
* gcc.target/riscv/inline-atomics-4.c: New test.
* gcc.target/riscv/inline-atomics-5.c: New test.
* gcc.target/riscv/inline-atomics-6.c: New test.
* gcc.target/riscv/inline-atomics-7.c: New test.
* gcc.target/riscv/inline-atomics-8.c: New test.

Signed-off-by: Patrick O'Neill 
Signed-off-by: Palmer Dabbelt 
---
 gcc/config/riscv/riscv-protos.h   |   2 +
 gcc/config/riscv/riscv.cc |  49 ++
 gcc/config/riscv/riscv.opt|   4 +
 gcc/config/riscv/sync.md  | 301 +
 gcc/doc/invoke.texi   |  10 +-
 .../gcc.target/riscv/inline-atomics-1.c   |  18 +
 .../gcc.target/riscv/inline-atomics-2.c   |   9 +
 .../gcc.target/riscv/inline-atomics-3.c   | 569 ++
 .../gcc.target/riscv/inline-atomics-4.c   | 566 +
 .../gcc.target/riscv/inline-atomics-5.c   |  87 +++
 .../gcc.target/riscv/inline-atomics-6.c   |  87 +++
 .../gcc.target/riscv/inline-atomics-7.c   |  69 +++
 .../gcc.target/riscv/inline-atomics-8.c   |  69 +++
 libgcc/config/riscv/atomic.c  |   2 +
 14 files changed, 1841 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/inline-atomics-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/inline-atomics-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/inline-atomics-3.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/inline-atomics-4.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/inline-atomics-5.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/inline-atomics-6.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/inline-atomics-7.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/inline-atomics-8.c

diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 607ff6ea697..f87661bde2c 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -79,6 +79,8 @@ extern void riscv_reinit (void);
 extern poly_uint64 riscv_regmode_natural_size (machine_mode);
 extern bool riscv_v_ext_vector_mode_p (machine_mode);
 extern bool riscv_shamt_matches_mask_p (int, HOST_WIDE_INT);
+extern void riscv_subword_address (rtx, rtx *, rtx *, rtx *, rtx *);
+extern void riscv_lshift_subword (machine_mode, rtx, rtx, rtx *);

 /* Routines implemented in riscv-c.cc.  */
 void riscv_cpu_cpp_builtins (cpp_reader *);
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index a2d2dd0bb67..0f890469d7a 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -7161,6 +7161,55 @@ riscv_zero_call_used_regs (HARD_REG_SET 
need_zeroed_hardregs)
& ~zeroed_hardregs);
 }

+/* Given memory reference MEM, expand code to compute the aligned
+   memory address, shift and mask values and store them into
+   *ALIGNED_MEM, *SHIFT, *MASK and *NOT_MASK.  */
+
+void
+riscv_subword_address (rtx mem, rtx *aligned_mem, rtx *shift, rtx *mask,
+  rtx *not_mask)
+{
+  /* Align the memory address to a word.  */
+  rtx addr = force_reg (Pmode, XEXP (mem, 0));
+
+  rtx addr_mask = gen_int_mode (-4, Pmode);
+
+  rtx aligned_addr = gen_reg_rtx (Pmode);
+  emit_move_insn (aligned_addr,  gen_rtx_AND (Pmode, addr, addr_mask));
+
+  *aligned_mem = change_address (mem, SImode, aligned_addr);
+
+  /* Calculate the shift amount.  */
+  emit_move_insn (*shift, gen_rtx_AND (SImode, gen_lowpart (SImode, addr),
+  gen_int_mode (3, SImode)));
+

[PATCH] GCC-13/changes: Add note about iostream usage

2023-04-26 Thread Andrew Pinski via Gcc-patches

This adds a note about iostream usage so it does not catch others
in surpise like it has already.

OK?
---
 htdocs/gcc-13/changes.html | 5 +
 1 file changed, 5 insertions(+)

diff --git a/htdocs/gcc-13/changes.html b/htdocs/gcc-13/changes.html
index 70732ec0..7c83f7c4 100644
--- a/htdocs/gcc-13/changes.html
+++ b/htdocs/gcc-13/changes.html
@@ -25,6 +25,11 @@ You may also want to check out our
 
 Caveats
 
+libstdc++ uses constructors inside the library to initialize 
std::cout/std::cin, etc.
+ instead of having it done in each source which uses iostream header.
+ This requires you to make sure the dynamic loader to load the new 
libstdc++v3 library
+ (examples of how to do this is to use -Wl,-rpath,... while linking or 
LD_LIBRARY_PATH
+ while running the program).  
 OpenMP offloading to Intel MIC has been removed.
 The support for the cr16-elf, tilegx*-linux, 
tilepro*-linux,
   hppa[12]*-*-hpux10*, hppa[12]*-*-hpux11*
-- 
2.31.1

[PATCH, V4] PR target/105325, Make load/cmp fusion know about prefixed loads.

2023-04-26 Thread Michael Meissner via Gcc-patches

I posted a version of patch on March 21st, a second version on March 24th, and
the third version on March 28th.

The V4 patch just adds a new condition to the new test case.  Previously, I was
using 'powerpc_prefixed_addr' to determine whether the GCC compiler would
automatically generate prefixed addresses.  The V4 version also adds a check
for 'power10_ok'.  Power10_ok is needed in case the compiler could generate
prefixed addresses, but the assembler does not support prefixed instructions.

The V3 patch makes some code changes suggested in the genfusion.pl code from
the last 2 patch submissions.  The fusion.md that is produced by genfusion.pl
is the same in all 3 versions.

In V3, I changed the genfusion.pl to match the suggestion for code layout.  I
also used the correct comment for each of the instructions (in the 2nd patch,
the when I rewrote the comments about ld and lwa being DS format instructions,
I had put the ld comment in the section handling lwa, and vice versa).

In V3, I also removed lp64 from the new test.  When I first added the prefixed
code, it was only done for 64-bit, but now it is allowed for 32-bit.  However,
the case that shows up (lwa) would not hit in 32-bit, since it only generates
lwz and not lwa.  It also would not generate ld.  But the test does pass when
it is built with -m32.

The issue with the original bug is the power10 load GPR + cmpi -1/0/1 fusion
optimization generates illegal assembler code.

Ultimately the code was dying because the fusion load + compare -1/0/1 patterns
did not handle the possibility that the load might be prefixed.

The main cause is the constraints for the individual loads in the fusion did not
match the machine.  In particular, LWA is a ds format instruction when it is
unprefixed.  The code did not also set the prefixed attribute correctly.

This patch rewrites the genfusion.pl script so that it will have more accurate
constraints for the LWA and LD instructions (which are DS instructions).  The
updated genfusion.pl was then run to update fusion.md.  Finally, the code for
the "prefixed" attribute is modified so that it considers load + compare
immediate patterns to be like the normal load insns in checking whether
operand[1] is a prefixed instruction.

I have tested this code on a power9 little endian system (with long double
being IEEE 128-bit and IBM 128-bit), a power10 little endian system, and a
power8 big endian system, testing both 32-bit and 64-bit code generation.

For the V4 changes I also built the compiler on a big endian system with an
older assembler, and I verified that the pr105325.C test was listed as
unsupported.

Can I put this code into the master branch, and after a waiting period, apply
it to the GCC 12 and GCC 11 branches (the bug does show up in those branches,
and the patch applies without change).

2023-04-26   Michael Meissner  

gcc/

PR target/105325
* gcc/config/rs6000/genfusion.pl (gen_ld_cmpi_p10): Improve generation
of the ld and lwa instructions which use the DS encoding instead of D.
Use the YZ constraint for these loads.  Handle prefixed loads better.
Set the sign_extend attribute as appropriate.
* gcc/config/rs6000/fusion.md: Regenerate.
* gcc/config/rs6000/rs6000.md (prefixed attribute): Add fused_load_cmpi
instructions to the list of instructions that might have a prefixed load
instruction.

gcc/testsuite/

PR target/105325
* g++.target/powerpc/pr105325.C: New test.
* gcc.target/powerpc/fusion-p10-ldcmpi.c: Adjust insn counts.
---
 gcc/config/rs6000/fusion.md   | 17 +++-
 gcc/config/rs6000/genfusion.pl| 26 ---
 gcc/config/rs6000/rs6000.md   |  2 +-
 gcc/testsuite/g++.target/powerpc/pr105325.C   | 25 ++
 .../gcc.target/powerpc/fusion-p10-ldcmpi.c|  4 +--
 5 files changed, 60 insertions(+), 14 deletions(-)
 create mode 100644 gcc/testsuite/g++.target/powerpc/pr105325.C

diff --git a/gcc/config/rs6000/fusion.md b/gcc/config/rs6000/fusion.md
index d45fb138a70..da9953d9ad9 100644
--- a/gcc/config/rs6000/fusion.md
+++ b/gcc/config/rs6000/fusion.md
@@ -22,7 +22,7 @@
 ;; load mode is DI result mode is clobber compare mode is CC extend is none
 (define_insn_and_split "*ld_cmpdi_cr0_DI_clobber_CC_none"
   [(set (match_operand:CC 2 "cc_reg_operand" "=x")
-(compare:CC (match_operand:DI 1 "ds_form_mem_operand" "m")
+(compare:CC (match_operand:DI 1 "ds_form_mem_operand" "YZ")
 (match_operand:DI 3 "const_m1_to_1_operand" "n")))
(clobber (match_scratch:DI 0 "=r"))]
   "(TARGET_P10_FUSION)"
@@ -43,7 +43,7 @@ (define_insn_and_split "*ld_cmpdi_cr0_DI_clobber_CC_none"
 ;; load mode is DI result mode is clobber compare mode is CCUNS extend is none
 (define_insn_and_split "*ld_cmpldi_cr0_DI_clobber_CCUNS_none"
   [(set (match_operand:CCUNS 2 "cc_reg_operand" "=x")
-(compare:CCUNS (match_operand:DI

[PATCH] doc: Add explanation of zero-length array example

2023-04-26 Thread Jonathan Wakely via Gcc-patches

As suggested here:
https://gcc.gnu.org/pipermail/gcc/2023-April/241190.html

OK for trunk?

-- >8 --

gcc/ChangeLog:

* doc/extend.texi (Zero Length): Describe example.
---
 gcc/doc/extend.texi | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index 84b44cb9916..06134a50ad2 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -1705,6 +1705,9 @@ struct line *thisline = (struct line *)
 thisline->length = this_length;
 @end smallexample
 
+In this example, @code{thisline->contents} is an array of @code{char} that
+can hold up to @code{thisline->length} bytes.
+
 Although the size of a zero-length array is zero, an array member of
 this kind may increase the size of the enclosing type as a result of tail
 padding.  The offset of a zero-length array member from the beginning
-- 
2.40.0

[committed] MAINTAINERS: Add myself to write after approval

2023-04-26 Thread Patrick O'Neill


On 4/26/23 09:08, Palmer Dabbelt wrote:


On Wed, 26 Apr 2023 09:07:05 PDT (-0700), Patrick O'Neill wrote:

2023-04-26 Patrick O'Neill 

* MAINTAINERS: Add myself.

Signed-off-by: Patrick O'Neill 
---
 MAINTAINERS | 1 +
 1 file changed, 1 insertion(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index d2188c3275a..169418d44f7 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -578,6 +578,7 @@ Dorit Nuzman 
 David O'Brien    
 Carlos O'Donell    
 Peter O'Gorman 
+Patrick O'Neill    
 Braden Obrzut    
 Andrea Ornstein 
 Maxim Ostapenko 


Acked-by: Palmer Dabbelt 

Thanks!


Committed!

Patrick

Re: [PATCH] Add targetm.libm_function_max_error

2023-04-26 Thread Michael Matz via Gcc-patches

Hello,

On Wed, 26 Apr 2023, Jakub Jelinek via Gcc-patches wrote:

> For glibc I've gathered data from:
> 4) using attached ulp-tester.c (how to invoke in file comment; tested
>both x86_64, ppc64, ppc64le 50M pseudo-random values in all 4 rounding
>modes, plus on x86_64 float/double sin/cos using libmvec - see
>attached libmvec-wrapper.c as well)

That ulp-tester.c file as attached here is not testing what you think it 
does.  (1) It doesn't compile as it doesn't #define the TESTS macro in the 
!LIBMVEC_TEST case, and (2) it almost never progresses 'm', the status 
variable used before the random numbers start, to beyond 1: you start with 
nextafter(0.0, 1.0), which is the smallest subnormal number (with a ERANGE 
error, but that's ignored), and you test for equality with THIS_MIN, the 
smallest normal (!) number, until you start incrementing 'm'.

>From subnormal smallest to normal smallest takes 1<

Re: [PATCH] MAINTAINERS: Add myself to write after approval

2023-04-26 Thread Palmer Dabbelt


On Wed, 26 Apr 2023 09:07:05 PDT (-0700), Patrick O'Neill wrote:

2023-04-26 Patrick O'Neill 

* MAINTAINERS: Add myself.

Signed-off-by: Patrick O'Neill 
---
 MAINTAINERS | 1 +
 1 file changed, 1 insertion(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index d2188c3275a..169418d44f7 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -578,6 +578,7 @@ Dorit Nuzman

 David O'Brien  
 Carlos O'Donell
 Peter O'Gorman 
+Patrick O'Neill
 Braden Obrzut  
 Andrea Ornstein
 Maxim Ostapenko



Acked-by: Palmer Dabbelt 

Thanks!

[PATCH] MAINTAINERS: Add myself to write after approval

2023-04-26 Thread Patrick O'Neill

2023-04-26 Patrick O'Neill 

* MAINTAINERS: Add myself.

Signed-off-by: Patrick O'Neill 
---
 MAINTAINERS | 1 +
 1 file changed, 1 insertion(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index d2188c3275a..169418d44f7 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -578,6 +578,7 @@ Dorit Nuzman

 David O'Brien  
 Carlos O'Donell
 Peter O'Gorman 
+Patrick O'Neill
 Braden Obrzut  
 Andrea Ornstein
 Maxim Ostapenko

--
2.34.1

Re: [PATCH] testsuite: adjust NOP expectations for RISC-V

2023-04-26 Thread Palmer Dabbelt


On Wed, 26 Apr 2023 08:26:26 PDT (-0700), gcc-patches@gcc.gnu.org wrote:



On 4/25/23 08:50, Jan Beulich via Gcc-patches wrote:

RISC-V will emit ".option nopic" when -fno-pie is in effect, which
matches the generic pattern. Just like done for Alpha, special-case
RISC-V.
---
A couple more targets look to be affected as well, simply because their
"no-operation" insn doesn't match the expectation. With the apparently
necessary further special casing I then also question the presence of
"SWYM" in the generic pattern.

An alternative here might be to use dg-additional-options to add e.g.
-fpie. I don't think I know all possible implications of doing so,
though.


Looks like there's already a no-pie for SPARC.  Nothing's jumping out as 
to why, but I'm not super familiar with `-fpatchable-function-entry`.



I think this is fine.  Go ahead and install it.


We run into this sort of thing somewhat frequently.  Maybe we want a DG 
matcher that avoids matching assembler directives?  Or maybe even a 
"scan-assembler-nop-times" type thing, given that different ports have 
different names for the instruction?


I don't see reason to block fixing the test on something bigger, though, 
so seems fine for trunk.  Presumably we'd want to backport this as well?




jeff

Re: [PATCH] testsuite: adjust NOP expectations for RISC-V

2023-04-26 Thread Jeff Law via Gcc-patches





On 4/25/23 08:50, Jan Beulich via Gcc-patches wrote:

RISC-V will emit ".option nopic" when -fno-pie is in effect, which
matches the generic pattern. Just like done for Alpha, special-case
RISC-V.
---
A couple more targets look to be affected as well, simply because their
"no-operation" insn doesn't match the expectation. With the apparently
necessary further special casing I then also question the presence of
"SWYM" in the generic pattern.

An alternative here might be to use dg-additional-options to add e.g.
-fpie. I don't think I know all possible implications of doing so,
though.

I think this is fine.  Go ahead and install it.

jeff

[PATCH] Add targetm.libm_function_max_error

2023-04-26 Thread Jakub Jelinek via Gcc-patches

Hi!

As has been discussed before, the following patch adds target hook
for math library function maximum errors measured in ulps.
The default is to return ~0U which is a magic maximum value which means
nothing is known about precision of the match function.

The first argument is unsigned int because enum combined_fn isn't available
everywhere where target hooks are included but is expected to be given
the enum combined_fn value, although it should be used solely to find out
which kind of match function (say sin vs. cos vs. sqrt vs. exp10) rather
than its variant (f suffix, no suffix, l suffix, f128 suffix, ...), for
which there is the machine_mode argument.
The last argument is a bool, if it is false, the function should return
maximum known error in ulps for a given function (taking -frounding-math
into account if enabled), with 0.5ulps being represented as 0.
If it is true, it is about whether the function can return values outside of
an intrinsic finite range for the function and by how many ulps.
E.g. sin/cos should return result in [-1.,1], if the function is expected
to never return values outside of that finite interval, the hook should
return 0.  Similarly for sqrt such range is [-0.,+Inf].

The patch implements it for glibc only so far, I hope other maintainers
can submit details for Solaris, musl, perhaps BSDs, etc.
For glibc I've gathered data from:
1) 
https://www.gnu.org/software/libc/manual/html_node/Errors-in-Math-Functions.html
   as latest published glibc data
2) 
https://www.gnu.org/software/libc/manual/2.22/html_node/Errors-in-Math-Functions.html
   as a few years old glibc data
3) using attached libc-ulps.sh script from glibc git
4) using attached ulp-tester.c (how to invoke in file comment; tested
   both x86_64, ppc64, ppc64le 50M pseudo-random values in all 4 rounding
   modes, plus on x86_64 float/double sin/cos using libmvec - see
   attached libmvec-wrapper.c as well)
5) using attached boundary-tester.c to test for whether sin/cos/sqrt return
   values outside of the intrinsic ranges for those functions (again,
   tested on x86_64, ppc64, ppc64le plus on x86_64 using libmvec as well;
   libmvec with non-default rounding modes is pretty much random number
   generator it seems)

The data is added to various hooks, the generic and generic glibc versions
being in targhooks.c so that the various targets can easily override it.
The intent is that the generic glibc version handles most of the stuff
and specific target arch overrides handle the outliers or special cases.
The patch has special case for x86_64 when __FAST_MATH__ is defined (as
one can use in that case either libm or libmvec and we don't know which
one will be used; so it uses maximum of what libm provides and libmvec),
rs6000 (had to add one because cosf has 3ulps on ppc* rather than 1-2ulps
on most other targets; MODE_COMPOSITE_P could be in theory handled in the
generic code too, but as we have rs6000-linux specific function, it can be
done just there), arc-linux (because DFmode sin has 7ulps there compared to
1ulps on other targets, both in default rounding mode and in others) and
or1k-linux (while DFmode sin has 1ulps there for default rounding mode,
for other rounding modes it has up to 7ulps).
Now, for -frounding-math I'm trying to add a few ulps more because I expect
it to be much less tested, except that for boundary_p I try to use
the numbers I got from the 5) tester.

So far built on x86_64-linux, powerpc64le-linux, arc-linux and or1k-linux,
ok for trunk if it passes bootstrap/regtest?

2023-04-26  Jakub Jelinek  

* target.def (libm_function_max_error): New target hook.
* doc/tm.texi.in (TARGET_LIBM_FUNCTION_MAX_ERROR): Add.
* doc/tm.texi: Regenerated.
* targhooks.h (default_libm_function_max_error,
glibc_linux_libm_function_max_error): Declare.
* targhooks.cc: Include case-cfn-macros.h.
(default_libm_function_max_error,
glibc_linux_libm_function_max_error): New functions.
* config/linux.h (TARGET_LIBM_FUNCTION_MAX_ERROR): Redefine.
* config/linux-protos.h (linux_libm_function_max_error): Declare.
* config/linux.cc: Include target.h and targhooks.h.
(linux_libm_function_max_error): New function.
* config/arc/arc.cc: Include targhooks.h and case-cfn-macros.h.
(arc_libm_function_max_error): New function.
(TARGET_LIBM_FUNCTION_MAX_ERROR): Redefine.
* config/i386/i386.cc (ix86_libc_has_fast_function): Formatting fix.
(ix86_libm_function_max_error): New function.
(TARGET_LIBM_FUNCTION_MAX_ERROR): Redefine.
* config/rs6000/rs6000-protos.h
(rs6000_linux_libm_function_max_error): Declare.
* config/rs6000/rs6000-linux.cc: Include target.h, targhooks.h, tree.h
and case-cfn-macros.h.
(rs6000_linux_libm_function_max_error): New function.
* config/rs6000/linux.h (TARGET_LIBM_FUNCTION_MAX_ERROR): Redefine.
*

[PATCH][committed] aarch64: Reimplement RSHRN2 intrinsic patterns with standard RTL codes

2023-04-26 Thread Kyrylo Tkachov via Gcc-patches

Hi all,

Similar to the previous patch, we can reimplement the rshrn2 patterns using 
standard RTL codes
for shift, truncate and plus with the appropriate constants.
This allows us to get rid of UNSPEC_RSHRN entirely.

Bootstrapped and tested on aarch64-none-linux-gnu.
Pushing to trunk.

Thanks,
Kyrill

gcc/ChangeLog:

* config/aarch64/aarch64-simd.md (aarch64_rshrn2_insn_le):
Reimplement using standard RTL codes instead of unspec.
(aarch64_rshrn2_insn_be): Likewise.
(aarch64_rshrn2): Adjust for the above.
* config/aarch64/aarch64.md (UNSPEC_RSHRN): Delete.


rshrn2.patch
Description: rshrn2.patch

[PATCH][committed] aarch64: Reimplement RSHRN intrinsic patterns with standard RTL codes

2023-04-26 Thread Kyrylo Tkachov via Gcc-patches

Hi all,

This patch reimplements the backend patterns for the rshrn intrinsics using 
standard RTL codes rather than UNSPECS.
We already represent shrn as truncate of a shift. rshrn can be represented as 
truncate (src + (1 << (shft - 1)) >> shft),
similar to how LLVM treats it.

I have a follow-up patch to do the same for the rshrn2 pattern, which will 
allow us to remove the UNSPEC_RSHRN entirely.

Bootstrapped and tested on aarch64-none-linux-gnu.
Pushing to trunk.
Thanks,
Kyrill

gcc/ChangeLog:

* config/aarch64/aarch64-simd.md (aarch64_rshrn_insn_le): 
Reimplement
with standard RTL codes instead of an UNSPEC.
(aarch64_rshrn_insn_be): Likewise.
(aarch64_rshrn): Adjust for the above.
* config/aarch64/predicates.md (aarch64_simd_rshrn_imm_vec): Define.


rshrn.patch
Description: rshrn.patch

Re: libsanitizer: sync from master

2023-04-26 Thread Martin Liška

On 11/15/22 16:47, Martin Liška wrote:
> Hi.
> 
> I've just pushed libsanitizer update that was tested on x86_64-linux and 
> ppc64le-linux systems.
> Moreover, I run bootstrap on x86_64-linux and checked ABI difference with 
> abidiff.

Hello.

And I've done the same now and merged upstream version 3185e47b5a8444e9fd.

Martin

> 
> Pushed as r13-4068-g3037f11fb86eda.
> 
> Cheers,
> Martin

RE: [PATCH] RISC-V: Legitimise the const0_rtx for RVV load/store address

2023-04-26 Thread Li, Pan2 via Gcc-patches

Thanks Kito. It comes from some experience of Ju-Zhe for auto vectorization in 
previous.

Pan

-Original Message-
From: Kito Cheng  
Sent: Wednesday, April 26, 2023 9:24 PM
To: Li, Pan2 
Cc: gcc-patches@gcc.gnu.org; juzhe.zh...@rivai.ai; kito.ch...@sifive.com; Wang, 
Yanzhang 
Subject: Re: [PATCH] RISC-V: Legitimise the const0_rtx for RVV load/store 
address

LGTM, pushed to trunk

> This patch try to legitimise the const0_rtx (aka zero register) as the 
> base register for the RVV load/store instructions.
>
> For example:
> vint32m1_t test_vle32_v_i32m1_shortcut (size_t vl) {
>   return __riscv_vle32_v_i32m1 ((int32_t *)0, vl); }

The example is kind of counter intuitive to me, I know it's legal from ISA spec 
level, but can't understand why it's useful...until I saw you mention auto vec 
and index load - I realized this is optimization for gather/scatter code gen.

Re: [PATCH] LoongArch: Enable shrink wrapping

2023-04-26 Thread Xi Ruoyao via Gcc-patches

On Wed, 2023-04-26 at 17:53 +0800, Lulu Cheng wrote:
> Hi, ruoyao:
> 
>    The performance of spec2006 is finished. The fixed-point 
> 400.perlbench has about 3% performance improvement,
> 
> and the other basics have not changed, and the floating-point tests
> have 
> basically remained the same.
> 
>    Do you have any questions about the test cases mentioned by Guo
> Jie? If there is no problem, modify the test case,
> 
> I think the code can be merged into the main branch.

I'll rewrite the test and commit in a few days (now I'm occupied with
something :( ).

> 
> Thanks!
> 
> 在 2023/4/25 下午5:12, Guo Jie 写道:
> > /* snip */
> > 
> > > >   diff --git a/gcc/testsuite/gcc.target/loongarch/shrink-wrap.c 
> > > > b/gcc/testsuite/gcc.target/loongarch/shrink-wrap.c
> > > > new file mode 100644
> > > > index 000..f2c867a2769
> > > > --- /dev/null
> > > > +++ b/gcc/testsuite/gcc.target/loongarch/shrink-wrap.c
> > > > @@ -0,0 +1,22 @@
> > > > +/* { dg-do compile } */
> > > > +/* { dg-options "-O -fshrink-wrap" } */
> > > > +
> > > > +/* f(x) should do nothing if x is 0.  */
> > > > +/* { dg-final { scan-assembler 
> > > > "bnez\t\\\$r4,\[^\n\]*\n\tjr\t\\\$r1" } } */
> > > > +
> > > > +void g(void);
> > > > +
> > > > +void
> > > > +f(int x)
> > > > +{
> > > > +  if (x)
> > > > +    {
> > > > +  register int s0 asm("s0") = x;
> > > > +  register int s1 asm("s1") = x;
> > > > +  register int s2 asm("s2") = x;
> > > > +  asm("" : : "r"(s0));
> > > > +  asm("" : : "r"(s1));
> > > > +  asm("" : : "r"(s2));
> > > > +  g();
> > > > +    }
> > > > +}
> > 
> > I think the test case cannot fully reflect the optimization effect
> > of 
> > the current patch,
> > 
> > because even without the patch, -O -fshrink-wrap will still perform 
> > architecture independent optimization.
> > 
> > This patch considers architecture related registers as finer grained
> > optimization for shrink wrapping,
> > 
> > I think a test case like the one below is more suitable:
> > 
> > 
> > int foo(int x)
> > {
> >   if (x)
> >   {
> >     __asm__ ("":::"s0","s1");
> >     return x;
> >   }
> > 
> >   __asm__ ("":::"s2","s3");
> >   return 0;
> > }
> > 
> > Otherwise LGTM, thanks!
> 

-- 
Xi Ruoyao 
School of Aerospace Science and Technology, Xidian University

Re: [PATCH] RISC-V: Legitimise the const0_rtx for RVV load/store address

2023-04-26 Thread Kito Cheng via Gcc-patches

LGTM, pushed to trunk

> This patch try to legitimise the const0_rtx (aka zero register)
> as the base register for the RVV load/store instructions.
>
> For example:
> vint32m1_t test_vle32_v_i32m1_shortcut (size_t vl)
> {
>   return __riscv_vle32_v_i32m1 ((int32_t *)0, vl);
> }

The example is kind of counter intuitive to me, I know it's legal from
ISA spec level, but can't understand why it's useful...until I saw you
mention auto vec and index load - I realized this is optimization for
gather/scatter code gen.

[PATCH v2] RISC-V: ICE for vlmul_ext_v intrinsic API

2023-04-26 Thread yanzhang.wang--- via Gcc-patches

From: Yanzhang Wang 

PR 109617

gcc/ChangeLog:

* config/riscv/vector-iterators.md: Support VNx2HI and VNX4DI when 
MIN_VLEN >= 128.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/vlmul_ext-1.c: New test.

Signed-off-by: Yanzhang Wang 
Co-authored-by: Pan Li 
---
 gcc/config/riscv/vector-iterators.md   |  3 ++-
 .../gcc.target/riscv/rvv/base/vlmul_ext-1.c| 14 ++
 2 files changed, 16 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/vlmul_ext-1.c

diff --git a/gcc/config/riscv/vector-iterators.md 
b/gcc/config/riscv/vector-iterators.md
index a8e856161d3..033659930d1 100644
--- a/gcc/config/riscv/vector-iterators.md
+++ b/gcc/config/riscv/vector-iterators.md
@@ -189,6 +189,7 @@
   (VNx1HI "TARGET_MIN_VLEN < 128") VNx2HI VNx4HI VNx8HI (VNx16HI 
"TARGET_MIN_VLEN >= 128")
   (VNx1SI "TARGET_MIN_VLEN < 128") VNx2SI VNx4SI (VNx8SI "TARGET_MIN_VLEN >= 
128")
   (VNx1DI "TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN < 128") (VNx2DI 
"TARGET_VECTOR_ELEN_64")
+  (VNx4DI "TARGET_VECTOR_ELEN_64")
   (VNx1SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN < 128")
   (VNx2SF "TARGET_VECTOR_ELEN_FP_32")
   (VNx4SF "TARGET_VECTOR_ELEN_FP_32")
@@ -220,7 +221,7 @@
 
 (define_mode_iterator VLMULEXT32 [
   (VNx1QI "TARGET_MIN_VLEN < 128") VNx2QI (VNx4QI "TARGET_MIN_VLEN >= 128")
-  (VNx1HI "TARGET_MIN_VLEN < 128")
+  (VNx1HI "TARGET_MIN_VLEN < 128") (VNx2HI "TARGET_MIN_VLEN >= 128")
 ])
 
 (define_mode_iterator VLMULEXT64 [
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/vlmul_ext-1.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/vlmul_ext-1.c
new file mode 100644
index 000..501d98c5897
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/vlmul_ext-1.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -fno-schedule-insns 
-fno-schedule-insns2" } */
+
+#include 
+
+vint16m8_t test_vlmul_ext_v_i16mf4_i16m8(vint16mf4_t op1) {
+  return __riscv_vlmul_ext_v_i16mf4_i16m8(op1);
+}
+
+vint64m8_t test_vlmul_ext_v_i64m2_i64m8(vint64m2_t op1) {
+  return __riscv_vlmul_ext_v_i64m2_i64m8(op1);
+}
+
+/* { dg-final { scan-assembler-times {vs8r.v\s+[,\sa-x0-9()]+} 2} } */
-- 
2.39.2

Re: [PATCH] RISCV: Add vector psabi checking.

2023-04-26 Thread Kito Cheng via Gcc-patches

> gcc/ChangeLog:
>
> * config/riscv/riscv.cc (riscv_scalable_vector_type_p):
> (riscv_arg_has_vector):
> (riscv_pass_in_vector_p):
> (riscv_get_arg_info):

You need to write something for changelog...:P

Re: [PATCH] wwwdocs: Document more libstdc++ additions for GCC 13

2023-04-26 Thread Jonathan Wakely via Gcc-patches

On Wed, 26 Apr 2023 at 13:52, Patrick Palka via Libstdc++ <
libstd...@gcc.gnu.org> wrote:

> Does this look OK for wwwdocs?
>
> ---
>  htdocs/gcc-13/changes.html | 12 +++-
>  1 file changed, 11 insertions(+), 1 deletion(-)
>
> diff --git a/htdocs/gcc-13/changes.html b/htdocs/gcc-13/changes.html
> index 7b64b2e5..191d0a4d 100644
> --- a/htdocs/gcc-13/changes.html
> +++ b/htdocs/gcc-13/changes.html
> @@ -370,7 +370,17 @@ You may also want to check out our
>views::pairwise, views::slide,
>views::chunk, views::chunk_by,
>views::repeat, views::chunk_by,
> -  views::cartesian_product,
> views::as_rvalue.
> +  views::cartesian_product,
> views::as_rvalue,
> +  views::enumerate, views::as_const.
> +
> +Additions to the algorithm header:
> +  ranges::contains,
> ranges::contains_subrange,
> +  ranges::iota, ranges::find_last,
> +  ranges::find_last_if,
> ranges::find_last_if_not,
> +  ranges::fold_left,
> ranges::fold_left_first,
> +  ranges::fold_right,
> ranges::fold_right_last,
> +  ranges::fold_left_with_iter
>

OK with a comma at the end of the line above, thanks.


> +  ranges::fold_left_first_with_iter.
>  
>  Monadic operations for std::expected.
>  Constexpr std::bitset, std::to_chars
> --
> 2.40.1.423.g2807bd2c10
>
>

[PATCH] wwwdocs: Document more libstdc++ additions for GCC 13

2023-04-26 Thread Patrick Palka via Gcc-patches

Does this look OK for wwwdocs?

---
 htdocs/gcc-13/changes.html | 12 +++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/htdocs/gcc-13/changes.html b/htdocs/gcc-13/changes.html
index 7b64b2e5..191d0a4d 100644
--- a/htdocs/gcc-13/changes.html
+++ b/htdocs/gcc-13/changes.html
@@ -370,7 +370,17 @@ You may also want to check out our
   views::pairwise, views::slide,
   views::chunk, views::chunk_by,
   views::repeat, views::chunk_by,
-  views::cartesian_product, views::as_rvalue.
+  views::cartesian_product, views::as_rvalue,
+  views::enumerate, views::as_const.
+
+Additions to the algorithm header:
+  ranges::contains, ranges::contains_subrange,
+  ranges::iota, ranges::find_last,
+  ranges::find_last_if, ranges::find_last_if_not,
+  ranges::fold_left, ranges::fold_left_first,
+  ranges::fold_right, ranges::fold_right_last,
+  ranges::fold_left_with_iter
+  ranges::fold_left_first_with_iter.
 
 Monadic operations for std::expected.
 Constexpr std::bitset, std::to_chars
-- 
2.40.1.423.g2807bd2c10

[PATCH] RISCV: Add vector psabi checking.

2023-04-26 Thread yanzhang.wang--- via Gcc-patches

From: Yanzhang Wang 

This patch adds support to check function's argument or return is vector type
and throw warning if yes.

gcc/ChangeLog:

* config/riscv/riscv.cc (riscv_scalable_vector_type_p):
(riscv_arg_has_vector):
(riscv_pass_in_vector_p):
(riscv_get_arg_info):

gcc/testsuite/ChangeLog:

* gcc.target/riscv/vector-abi-1.c: New test.
* gcc.target/riscv/vector-abi-2.c: New test.
* gcc.target/riscv/vector-abi-3.c: New test.
* gcc.target/riscv/vector-abi-4.c: New test.
* gcc.target/riscv/vector-abi-5.c: New test.

Signed-off-by: Yanzhang Wang 
Co-authored-by: Kito Cheng 
---
 gcc/config/riscv/riscv.cc | 73 +++
 gcc/testsuite/gcc.target/riscv/vector-abi-1.c | 14 
 gcc/testsuite/gcc.target/riscv/vector-abi-2.c | 14 
 gcc/testsuite/gcc.target/riscv/vector-abi-3.c | 14 
 gcc/testsuite/gcc.target/riscv/vector-abi-4.c | 16 
 gcc/testsuite/gcc.target/riscv/vector-abi-5.c | 15 
 6 files changed, 146 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/riscv/vector-abi-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/vector-abi-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/vector-abi-3.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/vector-abi-4.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/vector-abi-5.c

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 76eee4a55e9..06e9fe7d924 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -3728,6 +3728,76 @@ riscv_pass_fpr_pair (machine_mode mode, unsigned regno1,
   GEN_INT (offset2;
 }
 
+/* Use the TYPE_SIZE to distinguish the type with vector_size attribute and
+   intrinsic vector type.  Because we can't get the decl for the params.  */
+
+static bool
+riscv_scalable_vector_type_p (const_tree type)
+{
+  tree size = TYPE_SIZE (type);
+  if (size && TREE_CODE (size) == INTEGER_CST)
+return false;
+
+  /* For the data type like vint32m1_t, the size code is POLY_INT_CST.  */
+  return true;
+}
+
+static bool
+riscv_arg_has_vector (const_tree type)
+{
+  bool is_vector = false;
+
+  switch (TREE_CODE (type))
+{
+case RECORD_TYPE:
+  if (!COMPLETE_TYPE_P (type))
+   break;
+
+  for (tree f = TYPE_FIELDS (type); f; f = DECL_CHAIN (f))
+   if (TREE_CODE (f) == FIELD_DECL)
+ {
+   tree field_type = TREE_TYPE (f);
+   if (!TYPE_P (field_type))
+ break;
+
+   /* Ignore it if it's fixed length vector.  */
+   if (VECTOR_TYPE_P (field_type))
+ is_vector = riscv_scalable_vector_type_p (field_type);
+   else
+ is_vector = riscv_arg_has_vector (field_type);
+ }
+
+  break;
+
+case VECTOR_TYPE:
+  is_vector = riscv_scalable_vector_type_p (type);
+  break;
+
+default:
+  is_vector = false;
+  break;
+}
+
+  return is_vector;
+}
+
+/* Pass the type to check whether it's a vector type or contains vector type.
+   Only check the value type and no checking for vector pointer type.  */
+
+static void
+riscv_pass_in_vector_p (const_tree type)
+{
+  static int warned = 0;
+
+  if (type && riscv_arg_has_vector (type) && !warned)
+{
+  warning (OPT_Wpsabi, "ABI for the scalable vector type is currently in "
+  "experimental stage and may changes in the upcoming version of "
+  "GCC.");
+  warned = 1;
+}
+}
+
 /* Fill INFO with information about a single argument, and return an
RTL pattern to pass or return the argument.  CUM is the cumulative
state for earlier arguments.  MODE is the mode of this argument and
@@ -3812,6 +3882,9 @@ riscv_get_arg_info (struct riscv_arg_info *info, const 
CUMULATIVE_ARGS *cum,
}
 }
 
+  /* Only check existing of vector type.  */
+  riscv_pass_in_vector_p (type);
+
   /* Work out the size of the argument.  */
   num_bytes = type ? int_size_in_bytes (type) : GET_MODE_SIZE 
(mode).to_constant ();
   num_words = (num_bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
diff --git a/gcc/testsuite/gcc.target/riscv/vector-abi-1.c 
b/gcc/testsuite/gcc.target/riscv/vector-abi-1.c
new file mode 100644
index 000..114ee6de483
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/vector-abi-1.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O0 -march=rv64gcv -mabi=lp64d" } */
+
+#include "riscv_vector.h"
+
+void
+fun (vint32m1_t a) { } /* { dg-warning "the vector type" } */
+
+void
+bar ()
+{
+  vint32m1_t a;
+  fun (a);
+}
diff --git a/gcc/testsuite/gcc.target/riscv/vector-abi-2.c 
b/gcc/testsuite/gcc.target/riscv/vector-abi-2.c
new file mode 100644
index 000..fd4569535cc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/vector-abi-2.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d" } */
+
+#include "riscv_vector.h"
+
+vint32m1_t
+fun (vint32m1_t* a) {  return *a;

RE: [PATCH] RISC-V: ICE for vlmul_ext_v intrinsic API

2023-04-26 Thread Li, Pan2 via Gcc-patches

Great! Thanks yanzhang.

Could you please help to add some text about the changes below? Or kito may 
meet some error by git hook when commit the PATCH.


gcc/ChangeLog:



* config/riscv/vector-iterators.md:   <- add text for change.



gcc/testsuite/ChangeLog:



* gcc.target/riscv/rvv/base/vlmul_ext-1.c: New test.

Pan

From: juzhe.zhong 
Sent: Wednesday, April 26, 2023 8:15 PM
To: Wang, Yanzhang 
Cc: gcc-patches@gcc.gnu.org; kito.ch...@sifive.com; Li, Pan2 
; Wang, Yanzhang 
Subject: Re: [PATCH] RISC-V: ICE for vlmul_ext_v intrinsic API

LGTM. Thanks for fixing my silly mistake.
 Replied Message 
From
yanzhang.w...@intel.com
Date
04/26/2023 20:05
To
gcc-patches@gcc.gnu.org
Cc
juzhe.zh...@rivai.ai,
kito.ch...@sifive.com,
pan2...@intel.com,
yanzhang.w...@intel.com
Subject
[PATCH] RISC-V: ICE for vlmul_ext_v intrinsic API

Re: [PATCH] avr: Set param_min_pagesize to 0 [PR105523]

2023-04-26 Thread Richard Biener via Gcc-patches

On Wed, Apr 26, 2023 at 12:56 PM  wrote:
>
> On Wed, Apr 26, 2023 at 3:15 PM Richard Biener via Gcc-patches 
>  wrote:
> >
> > On Wed, Apr 26, 2023 at 11:42 AM Richard Biener
> >  wrote:
> > >
> > > On Wed, Apr 26, 2023 at 11:01 AM SenthilKumar.Selvaraj--- via
> > > Gcc-patches  wrote:
> > > >
> > > > Hi,
> > > >
> > > > This patch fixes PR 105523 by setting param_min_pagesize to 0 for the
> > > > avr target. For this target, zero and offsets from zero are perfectly
> > > > valid addresses, and the default value of param_min_pagesize ends up
> > > > triggering warnings on valid memory accesses.
> > >
> > > I think the proper configuration is to have
> > > DEFAULT_ADDR_SPACE_ZERO_ADDRESS_VALID
> >
> > Err, TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID
>
> That worked. Ok for trunk and backporting to 13 and 12 branches
> (pending regression testing)?

OK, but please let Denis time to comment.

Richard.

> Regards,
> Senthil
>
> PR 105523
>
> gcc/ChangeLog:
>
> * config/avr/avr.cc (avr_addr_space_zero_address_valid):
> (TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID): Return true.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/avr/pr105523.c: New test.
>
>
>
> diff --git a/gcc/config/avr/avr.cc b/gcc/config/avr/avr.cc
> index c193430cf07..5439eb8e55c 100644
> --- a/gcc/config/avr/avr.cc
> +++ b/gcc/config/avr/avr.cc
> @@ -9788,6 +9788,16 @@ avr_addr_space_diagnose_usage (addr_space_t as, 
> location_t loc)
>(void) avr_addr_space_supported_p (as, loc);
>  }
>
> +/* Implement `TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID. Zero is a valid
> +   address in all address spaces. Even in ADDR_SPACE_FLASH1 etc.,
> +   a zero address is valid and means 0x, where RAMPZ is
> +   set to the appropriate segment value. */
> +
> +static bool
> +avr_addr_space_zero_address_valid (addr_space_t as)
> +{
> +  return true;
> +}
>
>  /* Look if DECL shall be placed in program memory space by
> means of attribute `progmem' or some address-space qualifier.
> @@ -14688,6 +14698,9 @@ avr_float_lib_compare_returns_bool (machine_mode 
> mode, enum rtx_code)
>  #undef  TARGET_ADDR_SPACE_DIAGNOSE_USAGE
>  #define TARGET_ADDR_SPACE_DIAGNOSE_USAGE avr_addr_space_diagnose_usage
>
> +#undef  TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID
> +#define TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID 
> avr_addr_space_zero_address_valid
> +
>  #undef  TARGET_MODE_DEPENDENT_ADDRESS_P
>  #define TARGET_MODE_DEPENDENT_ADDRESS_P avr_mode_dependent_address_p
>
> diff --git a/gcc/testsuite/gcc.target/avr/pr105523.c 
> b/gcc/testsuite/gcc.target/avr/pr105523.c
> new file mode 100644
> index 000..fbbf7bf4422
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/avr/pr105523.c
> @@ -0,0 +1,14 @@
> +/* { dg-do compile } */
> +/* { dg-options "-Os -Wall" } */
> +
> +/* Verify no "array subscript 0 is outside array bounds of" is generated
> +   for accessing memory addresses in the 0-4096 range. */
> +
> +typedef __UINT8_TYPE__ uint8_t;
> +
> +#define SREG (*(volatile uint8_t*) (0x3F + __AVR_SFR_OFFSET__ ))
> +
> +void bar (void)
> +{
> +SREG = 0;
> +}
>
>

Re: Unloop no longer looping loops in loop-ch

2023-04-26 Thread Richard Biener via Gcc-patches

On Wed, 26 Apr 2023, Jan Hubicka wrote:

> > > -  if (precise)
> > > +  if (precise
> > > +   && get_max_loop_iterations_int (loop) == 1)
> > > + {
> > > +   if (dump_file && (dump_flags & TDF_DETAILS))
> > > + fprintf (dump_file, "Loop %d no longer loops.\n", loop->num);
> > 
> > but max loop iterations is 1 ...?
> 
> I first check for loops with 0 iterations, push them to unlooping list
> and avoid any header copying (it is useless).
> At this patch we already did header duplication and verified that the
> maximal number of iterations will drop by 1 since there is no way loop
> can terminate except for the header tests we peeled out.
> 
> So 1 would turn to 0 in the loop info update and it seems useless to do
> it.
> > 
> > > +   loops_to_unloop.safe_push (loop);
> > > +   loops_to_unloop_nunroll.safe_push (0);
> > > + }
> > > +  else if (precise)
> > >   {
> > > if (dump_file && (dump_flags & TDF_DETAILS))
> > >   fprintf (dump_file,
> > > @@ -688,6 +699,12 @@ ch_base::copy_headers (function *fun)
> > > BITMAP_FREE (exit_bbs);
> > >   }
> > >  }
> > > +  if (loops_to_unloop.length())
> > 
> >   !loops_to_unloop.is_empty ()
> I updated that in my copy of the patch.
> > 
> > > +{
> > > +  bool irred_invalidated;
> > > +  unloop_loops (loops_to_unloop, loops_to_unloop_nunroll, NULL, 
> > > _invalidated);
> > > +  changed = true;
> > > +}
> > >free (bbs);
> > >free (copied_bbs);
> > 
> > 
> > Since we run VN on the header copies I wonder if, since you remove
> > edges, we need to run CFG cleanup before this and updating SSA form?
> > For safety we usually let CFG cleanup do the actual CFG manipulation
> > and just change cond jumps to if (0) or if (1)?
> 
> I do unlooping only after the VN so I think I am safe here.

Ah OK.

The patch is OK then.

Thanks,
Richard.

[PATCH] RISC-V: ICE for vlmul_ext_v intrinsic API

2023-04-26 Thread yanzhang.wang--- via Gcc-patches

From: Yanzhang Wang 

PR 109617

gcc/ChangeLog:

* config/riscv/vector-iterators.md:

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/vlmul_ext-1.c: New test.

Signed-off-by: Yanzhang Wang 
Co-authored-by: Pan Li 
---
 gcc/config/riscv/vector-iterators.md   |  3 ++-
 .../gcc.target/riscv/rvv/base/vlmul_ext-1.c| 14 ++
 2 files changed, 16 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/vlmul_ext-1.c

diff --git a/gcc/config/riscv/vector-iterators.md 
b/gcc/config/riscv/vector-iterators.md
index a8e856161d3..033659930d1 100644
--- a/gcc/config/riscv/vector-iterators.md
+++ b/gcc/config/riscv/vector-iterators.md
@@ -189,6 +189,7 @@
   (VNx1HI "TARGET_MIN_VLEN < 128") VNx2HI VNx4HI VNx8HI (VNx16HI 
"TARGET_MIN_VLEN >= 128")
   (VNx1SI "TARGET_MIN_VLEN < 128") VNx2SI VNx4SI (VNx8SI "TARGET_MIN_VLEN >= 
128")
   (VNx1DI "TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN < 128") (VNx2DI 
"TARGET_VECTOR_ELEN_64")
+  (VNx4DI "TARGET_VECTOR_ELEN_64")
   (VNx1SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN < 128")
   (VNx2SF "TARGET_VECTOR_ELEN_FP_32")
   (VNx4SF "TARGET_VECTOR_ELEN_FP_32")
@@ -220,7 +221,7 @@
 
 (define_mode_iterator VLMULEXT32 [
   (VNx1QI "TARGET_MIN_VLEN < 128") VNx2QI (VNx4QI "TARGET_MIN_VLEN >= 128")
-  (VNx1HI "TARGET_MIN_VLEN < 128")
+  (VNx1HI "TARGET_MIN_VLEN < 128") (VNx2HI "TARGET_MIN_VLEN >= 128")
 ])
 
 (define_mode_iterator VLMULEXT64 [
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/vlmul_ext-1.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/vlmul_ext-1.c
new file mode 100644
index 000..501d98c5897
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/vlmul_ext-1.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -fno-schedule-insns 
-fno-schedule-insns2" } */
+
+#include 
+
+vint16m8_t test_vlmul_ext_v_i16mf4_i16m8(vint16mf4_t op1) {
+  return __riscv_vlmul_ext_v_i16mf4_i16m8(op1);
+}
+
+vint64m8_t test_vlmul_ext_v_i64m2_i64m8(vint64m2_t op1) {
+  return __riscv_vlmul_ext_v_i64m2_i64m8(op1);
+}
+
+/* { dg-final { scan-assembler-times {vs8r.v\s+[,\sa-x0-9()]+} 2} } */
-- 
2.39.2

[PATCH] RISC-V: Legitimise the const0_rtx for RVV load/store address

2023-04-26 Thread Pan Li via Gcc-patches

From: Pan Li 

This patch try to legitimise the const0_rtx (aka zero register)
as the base register for the RVV load/store instructions.

For example:
vint32m1_t test_vle32_v_i32m1_shortcut (size_t vl)
{
  return __riscv_vle32_v_i32m1 ((int32_t *)0, vl);
}

Before this patch:
li  a5,0
vsetvli zero,a1,e32,m1,ta,ma
vle32.v v24,0(a5)  <- can propagate the const 0 to a5 here
vs1r.v  v24,0(a0)

After this patch:
vsetvli zero,a1,e32,m1,ta,ma
vle32.v v24,0(zero)
vs1r.v  v24,0(a0)

As above, this patch allow you to propagaate the const 0 (aka zero
register) to the base register of the RVV Unit-Stride load in the
combine pass. This may benefit the underlying RVV auto-vectorization.

However, the indexed load failed to perform the optimization and it
will be token care of in another PATCH.

gcc/ChangeLog:

* config/riscv/riscv.cc (riscv_classify_address): Allow
  const0_rtx for the RVV load/store.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/zero_base_load_store_optimization.c: New 
test.

Signed-off-by: Pan Li 
Co-authored-by: Ju-Zhe Zhong 
---
 gcc/config/riscv/riscv.cc |  17 ++-
 .../base/zero_base_load_store_optimization.c  | 135 ++
 2 files changed, 150 insertions(+), 2 deletions(-)
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/base/zero_base_load_store_optimization.c

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index ac8e4420896..a2d2dd0bb67 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -1088,9 +1088,22 @@ riscv_classify_address (struct riscv_address_info *info, 
rtx x,
  && riscv_valid_lo_sum_p (info->symbol_type, mode, info->offset));
 
 case CONST_INT:
-  /* RVV load/store disallow CONST_INT.  */
+  /* We only allow the const0_rtx for the RVV load/store.  For example:
++--+
+| li  a5,0 |
+| vsetvli zero,a1,e32,m1,ta,ma |
+| vle32.v v24,0(a5)  <- propagate the const 0 to a5 here.  |
+| vs1r.v  v24,0(a0)|
++--+
+It can be folded to:
++--+
+| vsetvli zero,a1,e32,m1,ta,ma |
+| vle32.v v24,0(zero)  |
+| vs1r.v  v24,0(a0)|
++--+
+This behavior will benefit the underlying RVV auto vectorization.  */
   if (riscv_v_ext_vector_mode_p (mode))
-   return false;
+   return x == const0_rtx;
 
   /* Small-integer addresses don't occur very often, but they
 are legitimate if x0 is a valid base register.  */
diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/base/zero_base_load_store_optimization.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/zero_base_load_store_optimization.c
new file mode 100644
index 000..4b30d3505c5
--- /dev/null
+++ 
b/gcc/testsuite/gcc.target/riscv/rvv/base/zero_base_load_store_optimization.c
@@ -0,0 +1,135 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64 -O3" } */
+
+// #include 
+#include "riscv_vector.h"
+
+#define float32_t float
+
+// Unit-Stride Load/Store
+vint32m1_t test_vle32_v_i32m1_shortcut (size_t vl)
+{
+  return __riscv_vle32_v_i32m1 ((int32_t *)0, vl);
+}
+
+vuint32m1_t test_vle32_v_u32m1_shortcut (size_t vl)
+{
+  return __riscv_vle32_v_u32m1 ((int32_t *)0, vl);
+}
+
+vfloat32m1_t test_vle32_v_f32m1_shortcut (size_t vl)
+{
+  return __riscv_vle32_v_f32m1 ((float32_t *)0, vl);
+}
+
+void test_vse32_v_i32m1_shortcut (vint32m1_t val, size_t vl)
+{
+  __riscv_vse32_v_i32m1 ((int32_t *)0, val, vl);
+}
+
+void test_vse32_v_u32m1_shortcut (vuint32m1_t val, size_t vl)
+{
+  __riscv_vse32_v_u32m1 ((uint32_t *)0, val, vl);
+}
+
+void test_vse32_v_f32m1_shortcut (vfloat32m1_t val, size_t vl)
+{
+  __riscv_vse32_v_f32m1 ((float32_t *)0, val, vl);
+}
+
+// Stride Load/Store
+vint32m1_t test_vlse32_v_i32m1_shortcut (ptrdiff_t bstride, size_t vl)
+{
+  return  __riscv_vlse32_v_i32m1 ((int32_t *)0, bstride, vl);
+}
+
+vuint32m1_t test_vlse32_v_u32m1_shortcut (ptrdiff_t bstride, size_t vl)
+{
+  return  __riscv_vlse32_v_u32m1 ((uint32_t *)0, bstride, vl);
+}
+
+vfloat32m1_t test_vlse32_v_f32m1_shortcut (ptrdiff_t bstride, size_t vl)
+{
+  return  __riscv_vlse32_v_f32m1 ((float32_t *)0, bstride, vl);
+}
+
+void test_vsse32_v_i32m1_shortcut (ptrdiff_t bstride, vint32m1_t val, size_t 
vl)
+{
+  __riscv_vsse32_v_i32m1 ((int32_t *)0, bstride, val, vl);
+}
+
+void test_vsse32_v_u32m1_shortcut (ptrdiff_t bstride, vuint32m1_t val, size_t 
vl)
+{
+  __riscv_vsse32_v_u32m1 ((uint32_t *)0, bstride, val, vl);
+}
+
+void test_vsse32_v_f32m1_shortcut (ptrdiff_t bstride,

Re: [PATCH] VECT: Add decrement IV iteration loop control by variable amount support

2023-04-26 Thread Richard Sandiford via Gcc-patches

"juzhe.zh...@rivai.ai"  writes:
> Hi， Richard.
> Would you mind take a look at the loop control part again:
>
> static gcond *
> vect_set_loop_condition_partial_vectors (class loop *loop,
> loop_vec_info loop_vinfo, tree niters,
> tree final_iv, bool niters_maybe_zero,
> gimple_stmt_iterator loop_cond_gsi)
> ...
> tree loop_len_x = NULL_TREE;
>   FOR_EACH_VEC_ELT (*controls, i, rgc)
> if (!rgc->controls.is_empty ())
>   {
> ...
>
> /* Set up all controls for this group.  */
> if (direct_internal_fn_supported_p (IFN_SELECT_VL, iv_type,
>OPTIMIZE_FOR_SPEED))
>  test_ctrl
>= vect_set_loop_controls_by_select_vl (loop, loop_vinfo,
>   _seq, _seq,
>   rgc, niters, _len_x);
> ...
>   }
>
> static tree
> vect_set_loop_controls_by_select_vl (class loop *loop, loop_vec_info 
> loop_vinfo,
> gimple_seq *preheader_seq,
> gimple_seq *header_seq,
> rgroup_controls *rgc, tree niters, tree *x)
> {
>   tree compare_type = LOOP_VINFO_RGROUP_COMPARE_TYPE (loop_vinfo);
>   tree iv_type = LOOP_VINFO_RGROUP_IV_TYPE (loop_vinfo);
>   /* We are not allowing masked approach in SELECT_VL.  */
>   gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
>
>   tree ctrl_type = rgc->type;
>   unsigned int nitems_per_iter = rgc->max_nscalars_per_iter * rgc->factor;
>   poly_uint64 nitems_per_ctrl = TYPE_VECTOR_SUBPARTS (ctrl_type) * 
> rgc->factor;
>   poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
>
>   /* Calculate the maximum number of item values that the rgroup
>  handles in total, the number that it handles for each iteration
>  of the vector loop.  */
>   tree nitems_total = niters;
>   if (nitems_per_iter != 1)
> {
>   /* We checked before setting LOOP_VINFO_USING_PARTIAL_VECTORS_P that
> these multiplications don't overflow.  */
>   tree compare_factor = build_int_cst (compare_type, nitems_per_iter);
>   nitems_total = gimple_build (preheader_seq, MULT_EXPR, compare_type,
>   nitems_total, compare_factor);
> }
>
>   /* Convert the comparison value to the IV type (either a no-op or
>  a promotion).  */
>   nitems_total = gimple_convert (preheader_seq, iv_type, nitems_total);
>
>   /* Create an induction variable that counts the number of items
>  processed.  */
>   tree index_before_incr, index_after_incr;
>   gimple_stmt_iterator incr_gsi;
>   bool insert_after;
>   standard_iv_increment_position (loop, _gsi, _after);
>
>   /* Test the decremented IV, which will never underflow 0 since we have
>  IFN_SELECT_VL to gurantee that.  */
>   tree test_limit = nitems_total;
>
>   /* Provide a definition of each control in the group.  */
>   tree ctrl;
>   unsigned int i;
>   FOR_EACH_VEC_ELT_REVERSE (rgc->controls, i, ctrl)
> {
>   /* Previous controls will cover BIAS items.  This control covers the
> next batch.  */
>   poly_uint64 bias = nitems_per_ctrl * i;
>   tree bias_tree = build_int_cst (iv_type, bias);
>
>   /* Rather than have a new IV that starts at TEST_LIMIT and goes down to
> BIAS, prefer to use the same TEST_LIMIT - BIAS based IV for each
> control and adjust the bound down by BIAS.  */
>   tree this_test_limit = test_limit;
>   if (i != 0)
> {
>  this_test_limit = gimple_build (preheader_seq, MAX_EXPR, iv_type,
>  this_test_limit, bias_tree);
>  this_test_limit = gimple_build (preheader_seq, MINUS_EXPR, iv_type,
>  this_test_limit, bias_tree);
> }
>
>   /* Create decrement IV.  */
>   create_iv (this_test_limit, MINUS_EXPR, ctrl, NULL_TREE, loop, 
> _gsi,
> insert_after, _before_incr, _after_incr);
>
>   tree res_len;
>   if (rgc->controls.length () != 1)
> {
>  if (nitems_per_iter == 1)
>{
>  /* Generte length = (X - VF*I/N) capped to the range [0, VF/N]. */
>  /* step = VF * I / N.  */
>  tree step
> = build_int_cst (iv_type,
> exact_div (vf * i, rgc->controls.length ()));
>  /* Make sure (X - VF*I/N) never underflow zero.  */
>  tree max = gimple_build (header_seq, MAX_EXPR, iv_type, *x, step);
>  res_len
> = gimple_build (header_seq, MIN_EXPR, iv_type,
> index_before_incr,
> build_int_cst (iv_type, vf * nitems_per_iter));
>}
>  else
>{
>  /* For SLP, we can't allow non-VF number of elements to be
> processed in non-final iteration. We force the number of
> elements to be processed in each non-final iteration is VF
> elements. If we allow non-VF elements processing in non-final
> iteration will make SLP too complicated and produce inferior
> codegen.
>
>   For example:
>
>If non-final iteration process VF elements.
>
>  ...
>  .LEN_STORE (vectp_f.8_51, 128B, _71, { 1, 2, 1, 2 }, 0);
>  .LEN_STORE (vectp_f.8_56, 128B, _72, { 1, 2, 1, 2 }, 0);
>  ...
>
>If non-final iteration process non-VF elements.
>
>  ...
>  .LEN_STORE (vectp_f.8_51, 128B, _71, { 1, 2, 1, 2 }, 0);
>  if (_71 % 2 == 0)
>   .LEN_STORE (vectp_f.8_56, 128B, _72, { 1, 2, 1, 2 }, 0);
>  else
>   .LEN_STORE (vectp_f.8_56, 128B, _72, { 2, 1, 2, 1 },

[COMMITTED] Convert compare_nonzero_chars to wide_ints.

2023-04-26 Thread Aldy Hernandez via Gcc-patches

gcc/ChangeLog:

* tree-ssa-strlen.cc (compare_nonzero_chars): Convert to wide_ints.
---
 gcc/tree-ssa-strlen.cc | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/gcc/tree-ssa-strlen.cc b/gcc/tree-ssa-strlen.cc
index 0bbcb04834f..de785281b27 100644
--- a/gcc/tree-ssa-strlen.cc
+++ b/gcc/tree-ssa-strlen.cc
@@ -359,10 +359,10 @@ compare_nonzero_chars (strinfo *si, gimple *stmt,
  of the length range are equal return the result of the comparison
  same as in the constant case.  Otherwise return a conservative
  result.  */
-  tree lower = wide_int_to_tree (vr.type (), vr.lower_bound ());
-  tree upper = wide_int_to_tree (vr.type (), vr.upper_bound ());
-  int cmpmin = compare_tree_int (lower, off);
-  if (cmpmin > 0 || tree_int_cst_equal (lower, upper))
+  signop sign = TYPE_SIGN (vr.type ());
+  unsigned prec = TYPE_PRECISION (vr.type ());
+  int cmpmin = wi::cmp (vr.lower_bound (), wi::uhwi (off, prec), sign);
+  if (cmpmin > 0 || vr.singleton_p ())
 return cmpmin;
 
   return -1;
-- 
2.40.0

[COMMITTED] Remove range_has_numeric_bounds_p.

2023-04-26 Thread Aldy Hernandez via Gcc-patches

gcc/ChangeLog:

* value-range.cc (irange::copy_legacy_to_multi_range): Rewrite use
of range_has_numeric_bounds_p with irange API.
(range_has_numeric_bounds_p): Delete.
* value-range.h (range_has_numeric_bounds_p): Delete.
---
 gcc/value-range.cc | 12 +++-
 gcc/value-range.h  |  1 -
 2 files changed, 3 insertions(+), 10 deletions(-)

diff --git a/gcc/value-range.cc b/gcc/value-range.cc
index da9098139ad..f7c3e7a00e2 100644
--- a/gcc/value-range.cc
+++ b/gcc/value-range.cc
@@ -979,7 +979,9 @@ irange::copy_legacy_to_multi_range (const irange )
 set_varying (src.type ());
   else
 {
-  if (range_has_numeric_bounds_p ())
+  if (!src.undefined_p ()
+ && TREE_CODE (src.min ()) == INTEGER_CST
+ && TREE_CODE (src.max ()) == INTEGER_CST)
set (src.min (), src.max (), src.kind ());
   else
{
@@ -3068,14 +3070,6 @@ ranges_from_anti_range (const value_range *ar,
   return !vr0->undefined_p ();
 }
 
-bool
-range_has_numeric_bounds_p (const irange *vr)
-{
-  return (!vr->undefined_p ()
- && TREE_CODE (vr->min ()) == INTEGER_CST
- && TREE_CODE (vr->max ()) == INTEGER_CST);
-}
-
 /* Return whether VAL is equal to the maximum value of its type.
We can't do a simple equality comparison with TYPE_MAX_VALUE because
C typedefs and Ada subtypes can produce types whose TYPE_MAX_VALUE
diff --git a/gcc/value-range.h b/gcc/value-range.h
index c1474a73eaf..b47540d49be 100644
--- a/gcc/value-range.h
+++ b/gcc/value-range.h
@@ -669,7 +669,6 @@ irange::legacy_mode_p () const
   return m_max_ranges == 1;
 }
 
-extern bool range_has_numeric_bounds_p (const irange *);
 extern bool ranges_from_anti_range (const value_range *,
value_range *, value_range *);
 extern value_range_kind get_legacy_range (const irange &, tree , tree 
);
-- 
2.40.0

[COMMITTED] Remove range_int_cst_p.

2023-04-26 Thread Aldy Hernandez via Gcc-patches

gcc/ChangeLog:

* tree-data-ref.cc (compute_distributive_range): Replace uses of
range_int_cst_p with irange API.
* tree-ssa-strlen.cc (get_range_strlen_dynamic): Same.
* tree-vrp.h (range_int_cst_p): Delete.
* vr-values.cc (check_for_binary_op_overflow): Replace usees of
range_int_cst_p with irange API.
(vr_set_zero_nonzero_bits): Same.
(range_fits_type_p): Same.
(simplify_using_ranges::simplify_casted_cond): Same.
---
 gcc/tree-data-ref.cc   |  3 ++-
 gcc/tree-ssa-strlen.cc | 28 ++--
 gcc/tree-vrp.cc|  9 ---
 gcc/tree-vrp.h |  2 --
 gcc/vr-values.cc   | 59 ++
 5 files changed, 48 insertions(+), 53 deletions(-)

diff --git a/gcc/tree-data-ref.cc b/gcc/tree-data-ref.cc
index 6a4112b9fde..b3a1d410cbd 100644
--- a/gcc/tree-data-ref.cc
+++ b/gcc/tree-data-ref.cc
@@ -646,7 +646,8 @@ compute_distributive_range (tree type, value_range 
_range,
   if (!op.fold_range (wide_range, ssizetype, op0_range, op1_range))
 wide_range.set_varying (ssizetype);;
   flag_wrapv = saved_flag_wrapv;
-  if (wide_range.num_pairs () != 1 || !range_int_cst_p (_range))
+  if (wide_range.num_pairs () != 1
+  || wide_range.varying_p () || wide_range.undefined_p ())
 return false;
 
   wide_int lb = wide_range.lower_bound ();
diff --git a/gcc/tree-ssa-strlen.cc b/gcc/tree-ssa-strlen.cc
index de785281b27..4bf183072d9 100644
--- a/gcc/tree-ssa-strlen.cc
+++ b/gcc/tree-ssa-strlen.cc
@@ -1222,13 +1222,14 @@ get_range_strlen_dynamic (tree src, gimple *stmt,
{
  value_range vr;
  ptr_qry->rvals->range_of_expr (vr, si->nonzero_chars, si->stmt);
- if (range_int_cst_p ())
+ if (vr.undefined_p () || vr.varying_p ())
+   pdata->minlen = build_zero_cst (size_type_node);
+ else
{
- pdata->minlen = vr.min ();
- pdata->maxlen = vr.max ();
+ tree type = vr.type ();
+ pdata->minlen = wide_int_to_tree (type, vr.lower_bound ());
+ pdata->maxlen = wide_int_to_tree (type, vr.upper_bound ());
}
- else
-   pdata->minlen = build_zero_cst (size_type_node);
}
  else
pdata->minlen = build_zero_cst (size_type_node);
@@ -1266,21 +1267,22 @@ get_range_strlen_dynamic (tree src, gimple *stmt,
{
  value_range vr;
  ptr_qry->rvals->range_of_expr (vr, si->nonzero_chars, stmt);
- if (range_int_cst_p ())
+ if (vr.varying_p () || vr.undefined_p ())
+   {
+ pdata->minlen = build_zero_cst (size_type_node);
+ pdata->maxlen = build_all_ones_cst (size_type_node);
+   }
+ else
{
- pdata->minlen = vr.min ();
- pdata->maxlen = vr.max ();
+ tree type = vr.type ();
+ pdata->minlen = wide_int_to_tree (type, vr.lower_bound ());
+ pdata->maxlen = wide_int_to_tree (type, vr.upper_bound ());
  offset_int max = offset_int::from (vr.upper_bound (0), SIGNED);
  if (tree maxbound = get_maxbound (si->ptr, stmt, max, ptr_qry))
pdata->maxbound = maxbound;
  else
pdata->maxbound = pdata->maxlen;
}
- else
-   {
- pdata->minlen = build_zero_cst (size_type_node);
- pdata->maxlen = build_all_ones_cst (size_type_node);
-   }
}
   else if (pdata->minlen && TREE_CODE (pdata->minlen) == INTEGER_CST)
{
diff --git a/gcc/tree-vrp.cc b/gcc/tree-vrp.cc
index 27126088708..6c6e0382809 100644
--- a/gcc/tree-vrp.cc
+++ b/gcc/tree-vrp.cc
@@ -312,15 +312,6 @@ intersect_range_with_nonzero_bits (enum value_range_kind 
vr_type,
   return vr_type;
 }
 
-/* Return true if max and min of VR are INTEGER_CST.  It's not necessary
-   a singleton.  */
-
-bool
-range_int_cst_p (const value_range *vr)
-{
-  return (vr->kind () == VR_RANGE && range_has_numeric_bounds_p (vr));
-}
-
 /* Return the single symbol (an SSA_NAME) contained in T if any, or NULL_TREE
otherwise.  We only handle additive operations and set NEG to true if the
symbol is negated and INV to the invariant part, if any.  */
diff --git a/gcc/tree-vrp.h b/gcc/tree-vrp.h
index bad50e31aad..58216388ee6 100644
--- a/gcc/tree-vrp.h
+++ b/gcc/tree-vrp.h
@@ -22,8 +22,6 @@ along with GCC; see the file COPYING3.  If not see
 
 #include "value-range.h"
 
-extern bool range_int_cst_p (const value_range *);
-
 extern int compare_values (tree, tree);
 extern int compare_values_warnv (tree, tree, bool *);
 extern int operand_less_p (tree, tree);
diff --git a/gcc/vr-values.cc b/gcc/vr-values.cc
index f43de63b4f4..ea7c6738739 100644
--- a/gcc/vr-values.cc
+++ b/gcc/vr-values.cc
@@ -125,16 +125,16 @@ check_for_binary_op_overflow (range_query *query,

[COMMITTED] Remove legacy range support.

2023-04-26 Thread Aldy Hernandez via Gcc-patches

This patch removes all the code paths guarded by legacy_mode_p(), thus
allowing us to re-use the int_range<1> idiom for a range of one
sub-range.  This allows us to represent these simple ranges in a more
efficient manner.

gcc/ChangeLog:

* range-op.cc (range_op_cast_tests): Remove legacy support.
* value-range-storage.h (vrange_allocator::alloc_irange): Same.
* value-range.cc (irange::operator=): Same.
(get_legacy_range): Same.
(irange::copy_legacy_to_multi_range): Delete.
(irange::copy_to_legacy): Delete.
(irange::irange_set_anti_range): Delete.
(irange::set): Remove legacy support.
(irange::verify_range): Same.
(irange::legacy_lower_bound): Delete.
(irange::legacy_upper_bound): Delete.
(irange::legacy_equal_p): Delete.
(irange::operator==): Remove legacy support.
(irange::singleton_p): Same.
(irange::value_inside_range): Same.
(irange::contains_p): Same.
(intersect_ranges): Delete.
(irange::legacy_intersect): Delete.
(union_ranges): Delete.
(irange::legacy_union): Delete.
(irange::legacy_verbose_union_): Delete.
(irange::legacy_verbose_intersect): Delete.
(irange::irange_union): Remove legacy support.
(irange::irange_intersect): Same.
(irange::intersect): Same.
(irange::invert): Same.
(ranges_from_anti_range): Delete.
(gt_pch_nx): Adjust for legacy removal.
(gt_ggc_mx): Same.
(range_tests_legacy): Delete.
(range_tests_misc): Adjust for legacy removal.
(range_tests): Same.
* value-range.h (class irange): Same.
(irange::legacy_mode_p): Delete.
(ranges_from_anti_range): Delete.
(irange::nonzero_p): Adjust for legacy removal.
(irange::lower_bound): Same.
(irange::upper_bound): Same.
(irange::union_): Same.
(irange::intersect): Same.
(irange::set_nonzero): Same.
(irange::set_zero): Same.
* vr-values.cc (simplify_using_ranges::legacy_fold_cond_overflow): Same.
---
 gcc/range-op.cc   |2 +-
 gcc/value-range-storage.h |3 +-
 gcc/value-range.cc| 1188 ++---
 gcc/value-range.h |   64 +-
 gcc/vr-values.cc  |4 +-
 5 files changed, 47 insertions(+), 1214 deletions(-)

diff --git a/gcc/range-op.cc b/gcc/range-op.cc
index e47edcf3d74..215a1613b38 100644
--- a/gcc/range-op.cc
+++ b/gcc/range-op.cc
@@ -4890,7 +4890,7 @@ namespace selftest
 static void
 range_op_cast_tests ()
 {
-  int_range<1> r0, r1, r2, rold;
+  int_range<2> r0, r1, r2, rold;
   r0.set_varying (integer_type_node);
   tree maxint = wide_int_to_tree (integer_type_node, r0.upper_bound ());
 
diff --git a/gcc/value-range-storage.h b/gcc/value-range-storage.h
index 6da377ebd2e..070b85c5739 100644
--- a/gcc/value-range-storage.h
+++ b/gcc/value-range-storage.h
@@ -175,8 +175,7 @@ inline irange *
 vrange_allocator::alloc_irange (unsigned num_pairs)
 {
   // Never allocate 0 pairs.
-  // Don't allocate 1 either, or we get legacy value_range's.
-  if (num_pairs < 2)
+  if (num_pairs < 1)
 num_pairs = 2;
 
   size_t nbytes = sizeof (tree) * 2 * num_pairs;
diff --git a/gcc/value-range.cc b/gcc/value-range.cc
index f7c3e7a00e2..1a3013be6fd 100644
--- a/gcc/value-range.cc
+++ b/gcc/value-range.cc
@@ -877,23 +877,11 @@ frange::set_nonnegative (tree type)
   set (type, dconst0, frange_val_max (type));
 }
 
-// Here we copy between any two irange's.  The ranges can be legacy or
-// multi-ranges, and copying between any combination works correctly.
+// Here we copy between any two irange's.
 
 irange &
 irange::operator= (const irange )
 {
-  if (legacy_mode_p ())
-{
-  copy_to_legacy (src);
-  return *this;
-}
-  if (src.legacy_mode_p ())
-{
-  copy_legacy_to_multi_range (src);
-  return *this;
-}
-
   unsigned x;
   unsigned lim = src.m_num_ranges;
   if (lim > m_max_ranges)
@@ -909,6 +897,8 @@ irange::operator= (const irange )
   m_num_ranges = lim;
   m_kind = src.m_kind;
   m_nonzero_mask = src.m_nonzero_mask;
+  if (m_max_ranges == 1)
+normalize_kind ();
   if (flag_checking)
 verify_range ();
   return *this;
@@ -951,12 +941,6 @@ get_legacy_range (const irange , tree , tree )
   inv.invert ();
   min = wide_int_to_tree (type, inv.lower_bound (0));
   max = wide_int_to_tree (type, inv.upper_bound (0));
-  if (r.legacy_mode_p ())
-   {
- gcc_checking_assert (old_kind == VR_ANTI_RANGE);
- gcc_checking_assert (vrp_operand_equal_p (old_min, min));
- gcc_checking_assert (vrp_operand_equal_p (old_max, max));
-   }
   return VR_ANTI_RANGE;
 }
 
@@ -968,54 +952,6 @@ get_legacy_range (const irange , tree , tree )
   return VR_RANGE;
 }
 
-void
-irange::copy_legacy_to_multi_range (const irange )
-{
-  gcc_checking_assert (src.legacy_mode_p ());
-

[COMMITTED] Remove some uses of deprecated irange API.

2023-04-26 Thread Aldy Hernandez via Gcc-patches

gcc/ChangeLog:

* builtins.cc (expand_builtin_strnlen): Rewrite deprecated irange
API uses to new API.
* gimple-predicate-analysis.cc (find_var_cmp_const): Same.
* internal-fn.cc (get_min_precision): Same.
* match.pd: Same.
* tree-affine.cc (expr_to_aff_combination): Same.
* tree-data-ref.cc (dr_step_indicator): Same.
* tree-dfa.cc (get_ref_base_and_extent): Same.
* tree-scalar-evolution.cc (iv_can_overflow_p): Same.
* tree-ssa-phiopt.cc (two_value_replacement): Same.
* tree-ssa-pre.cc (insert_into_preds_of_block): Same.
* tree-ssa-reassoc.cc (optimize_range_tests_to_bit_test): Same.
* tree-ssa-strlen.cc (compare_nonzero_chars): Same.
* tree-switch-conversion.cc (bit_test_cluster::emit): Same.
* tree-vect-patterns.cc (vect_recog_divmod_pattern): Same.
* tree.cc (get_range_pos_neg): Same.
---
 gcc/builtins.cc  |  2 +-
 gcc/gimple-predicate-analysis.cc |  3 ++-
 gcc/internal-fn.cc   |  3 ++-
 gcc/match.pd | 10 +-
 gcc/tree-affine.cc   |  3 ++-
 gcc/tree-data-ref.cc |  2 +-
 gcc/tree-dfa.cc  |  3 ++-
 gcc/tree-scalar-evolution.cc |  6 --
 gcc/tree-ssa-phiopt.cc   | 17 -
 gcc/tree-ssa-pre.cc  |  3 ++-
 gcc/tree-ssa-reassoc.cc  |  3 ++-
 gcc/tree-ssa-strlen.cc   | 13 +++--
 gcc/tree-switch-conversion.cc|  3 ++-
 gcc/tree-vect-patterns.cc|  2 +-
 gcc/tree.cc  |  3 ++-
 15 files changed, 39 insertions(+), 37 deletions(-)

diff --git a/gcc/builtins.cc b/gcc/builtins.cc
index bb931242c9c..878596c240a 100644
--- a/gcc/builtins.cc
+++ b/gcc/builtins.cc
@@ -3494,7 +3494,7 @@ expand_builtin_strnlen (tree exp, rtx target, 
machine_mode target_mode)
   wide_int min, max;
   value_range r;
   get_global_range_query ()->range_of_expr (r, bound);
-  if (r.kind () != VR_RANGE)
+  if (r.varying_p () || r.undefined_p ())
 return NULL_RTX;
   min = r.lower_bound ();
   max = r.upper_bound ();
diff --git a/gcc/gimple-predicate-analysis.cc b/gcc/gimple-predicate-analysis.cc
index c89a5b1653a..7f20f81ad86 100644
--- a/gcc/gimple-predicate-analysis.cc
+++ b/gcc/gimple-predicate-analysis.cc
@@ -307,7 +307,8 @@ find_var_cmp_const (pred_chain_union preds, gphi *phi, 
gimple **flag_def,
  value_range r;
  if (!INTEGRAL_TYPE_P (type)
  || !get_range_query (cfun)->range_of_expr (r, cond_rhs)
- || r.kind () != VR_RANGE)
+ || r.undefined_p ()
+ || r.varying_p ())
continue;
 
  wide_int min = r.lower_bound ();
diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc
index 6e81dc05e0e..5c9da73ea11 100644
--- a/gcc/internal-fn.cc
+++ b/gcc/internal-fn.cc
@@ -775,7 +775,8 @@ get_min_precision (tree arg, signop sign)
 return prec + (orig_sign != sign);
   value_range r;
   while (!get_global_range_query ()->range_of_expr (r, arg)
-|| r.kind () != VR_RANGE)
+|| r.varying_p ()
+|| r.undefined_p ())
 {
   gimple *g = SSA_NAME_DEF_STMT (arg);
   if (is_gimple_assign (g)
diff --git a/gcc/match.pd b/gcc/match.pd
index e89ba57e30b..34e1a5c1b46 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -525,7 +525,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
   value_range vr0;
   if (ovf1 == wi::OVF_NONE && ovf2 == wi::OVF_NONE
  && get_global_range_query ()->range_of_expr (vr0, @4)
- && vr0.kind () == VR_RANGE)
+ && !vr0.varying_p () && !vr0.undefined_p ())
{
  wide_int wmin0 = vr0.lower_bound ();
  wide_int wmax0 = vr0.upper_bound ();
@@ -566,7 +566,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
   value_range vr0;
   if (ovf1 == wi::OVF_NONE && ovf2 == wi::OVF_NONE
  && get_global_range_query ()->range_of_expr (vr0, @0)
- && vr0.kind () == VR_RANGE)
+ && !vr0.varying_p () && !vr0.undefined_p ())
{
  wide_int wmin0 = vr0.lower_bound ();
  wide_int wmax0 = vr0.upper_bound ();
@@ -853,8 +853,8 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
if (INTEGRAL_TYPE_P (type)
   && get_global_range_query ()->range_of_expr (vr0, @0)
   && get_global_range_query ()->range_of_expr (vr1, @1)
-  && vr0.kind () == VR_RANGE
-  && vr1.kind () == VR_RANGE)
+  && !vr0.varying_p () && !vr0.undefined_p ()
+  && !vr1.varying_p () && !vr1.undefined_p ())
 {
   wide_int wmin0 = vr0.lower_bound ();
   wide_int wmax0 = vr0.upper_bound ();
@@ -3010,7 +3010,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 
value_range vr;
if (get_global_range_query ()->range_of_expr (vr, @0)
-   && vr.kind () == VR_RANGE)
+   && !vr.varying_p () && !vr.undefined_p ())
   {
wide_int wmin0 = vr.lower_bound ();
wide_int wmax0 =

[COMMITTED] Replace ad-hoc value_range dumpers with irange::dump.

2023-04-26 Thread Aldy Hernandez via Gcc-patches

This causes a regression in gcc.c-torture/unsorted/dump-noaddr.c.

The test is asserting that two dumps are identical, but they are not
because irange dumps the type which varies between runs:

   <  VR  [irange] void (*) (int) [1, +INF]
   >  VR  [irange] void (*) (int) [1, +INF]

I have changed the pretty printer for irange types to pass TDF_NOUID,
thus avoiding this problem.

gcc/ChangeLog:

* ipa-prop.cc (ipa_print_node_jump_functions_for_edge): Use
vrange::dump instead of ad-hoc dumper.
* tree-ssa-strlen.cc (dump_strlen_info): Same.
* value-range-pretty-print.cc (visit): Pass TDF_NOUID to
dump_generic_node.
---
 gcc/ipa-prop.cc |  9 ++--
 gcc/tree-ssa-strlen.cc  | 40 +
 gcc/value-range-pretty-print.cc |  2 +-
 3 files changed, 9 insertions(+), 42 deletions(-)

diff --git a/gcc/ipa-prop.cc b/gcc/ipa-prop.cc
index 1a8ff7ebb95..c6d4585aed1 100644
--- a/gcc/ipa-prop.cc
+++ b/gcc/ipa-prop.cc
@@ -437,13 +437,8 @@ ipa_print_node_jump_functions_for_edge (FILE *f, struct 
cgraph_edge *cs)
 
   if (jump_func->m_vr)
{
- fprintf (f, " VR  ");
- fprintf (f, "%s[",
-  (jump_func->m_vr->kind () == VR_ANTI_RANGE) ? "~" : "");
- print_decs (wi::to_wide (jump_func->m_vr->min ()), f);
- fprintf (f, ", ");
- print_decs (wi::to_wide (jump_func->m_vr->max ()), f);
- fprintf (f, "]\n");
+ jump_func->m_vr->dump (f);
+ fprintf (f, "\n");
}
   else
fprintf (f, " Unknown VR\n");
diff --git a/gcc/tree-ssa-strlen.cc b/gcc/tree-ssa-strlen.cc
index 1d79034594b..dcc9e9e14ab 100644
--- a/gcc/tree-ssa-strlen.cc
+++ b/gcc/tree-ssa-strlen.cc
@@ -981,42 +981,14 @@ dump_strlen_info (FILE *fp, gimple *stmt, range_query 
*rvals)
  print_generic_expr (fp, si->nonzero_chars);
  if (TREE_CODE (si->nonzero_chars) == SSA_NAME)
{
- value_range_kind rng = VR_UNDEFINED;
- wide_int min, max;
+ value_range vr;
  if (rvals)
-   {
- value_range vr;
- rvals->range_of_expr (vr, si->nonzero_chars,
-   si->stmt);
- rng = vr.kind ();
- if (range_int_cst_p ())
-   {
- min = wi::to_wide (vr.min ());
- max = wi::to_wide (vr.max ());
-   }
- else
-   rng = VR_UNDEFINED;
-   }
+   rvals->range_of_expr (vr, si->nonzero_chars,
+ si->stmt);
  else
-   {
- value_range vr;
- get_range_query (cfun)
-   ->range_of_expr (vr, si->nonzero_chars);
- rng = vr.kind ();
- if (!vr.undefined_p ())
-   {
- min = wi::to_wide (vr.min ());
- max = wi::to_wide (vr.max ());
-   }
-   }
-
- if (rng == VR_RANGE || rng == VR_ANTI_RANGE)
-   {
- fprintf (fp, " %s[%llu, %llu]",
-  rng == VR_RANGE ? "" : "~",
-  (long long) min.to_uhwi (),
-  (long long) max.to_uhwi ());
-   }
+   get_range_query (cfun)->range_of_expr (vr,
+   si->nonzero_chars);
+ vr.dump (fp);
}
}
 
diff --git a/gcc/value-range-pretty-print.cc b/gcc/value-range-pretty-print.cc
index a11c5a621f8..8d47d8087e8 100644
--- a/gcc/value-range-pretty-print.cc
+++ b/gcc/value-range-pretty-print.cc
@@ -56,7 +56,7 @@ vrange_printer::visit (const irange ) const
   pp_string (pp, "UNDEFINED");
   return;
 }
-  dump_generic_node (pp, r.type (), 0, TDF_NONE, false);
+  dump_generic_node (pp, r.type (), 0, TDF_NONE | TDF_NOUID, false);
   pp_character (pp, ' ');
   if (r.varying_p ())
 {
-- 
2.40.0

[COMMITTED] Fix swapping of ranges.

2023-04-26 Thread Aldy Hernandez via Gcc-patches

The legacy range code has logic to swap out of order endpoints in the
irange constructor.  The new irange code expects the caller to fix any
inconsistencies, thus speeding up the common case.  However, this means
that when we remove legacy, any stragglers must be fixed.  This patch
fixes the 3 culprits found during the conversion.

gcc/ChangeLog:

* range-op.cc (operator_cast::op1_range): Use
create_possibly_reversed_range.
(operator_bitwise_and::simple_op1_range_solver): Same.
* value-range.cc (swap_out_of_order_endpoints): Delete.
(irange::set): Remove call to swap_out_of_order_endpoints.
---
 gcc/range-op.cc| 10 ++
 gcc/value-range.cc | 47 --
 2 files changed, 6 insertions(+), 51 deletions(-)

diff --git a/gcc/range-op.cc b/gcc/range-op.cc
index f90e78dcfbc..e47edcf3d74 100644
--- a/gcc/range-op.cc
+++ b/gcc/range-op.cc
@@ -2876,8 +2876,9 @@ operator_cast::op1_range (irange , tree type,
  // Start by building the positive signed outer range for the type.
  wide_int lim = wi::set_bit_in_zero (TYPE_PRECISION (lhs_type),
  TYPE_PRECISION (type));
- r = int_range<1> (type, lim, wi::max_value (TYPE_PRECISION (type),
- SIGNED));
+ create_possibly_reversed_range (r, type, lim,
+ wi::max_value (TYPE_PRECISION (type),
+SIGNED));
  // For the signed part, we need to simply union the 2 ranges now.
  r.union_ (converted_lhs);
 
@@ -3367,7 +3368,7 @@ operator_bitwise_and::simple_op1_range_solver (irange , 
tree type,
   if (we_know_nothing)
 r.set_varying (type);
   else
-r = int_range<1> (type, minv, maxv);
+create_possibly_reversed_range (r, type, minv, maxv);
 
   // Solve [-INF, lhs.upper_bound ()] = x & MASK.
   //
@@ -3398,7 +3399,8 @@ operator_bitwise_and::simple_op1_range_solver (irange , 
tree type,
 }
   maxv |= ~cst2v;
   minv = sgnbit;
-  int_range<1> upper_bits (type, minv, maxv);
+  int_range<2> upper_bits;
+  create_possibly_reversed_range (upper_bits, type, minv, maxv);
   r.intersect (upper_bits);
 }
 
diff --git a/gcc/value-range.cc b/gcc/value-range.cc
index a50d1a63968..da9098139ad 100644
--- a/gcc/value-range.cc
+++ b/gcc/value-range.cc
@@ -1014,46 +1014,6 @@ irange::copy_to_legacy (const irange )
 set (src.tree_lower_bound (), src.tree_upper_bound ());
 }
 
-// Swap MIN/MAX if they are out of order and adjust KIND appropriately.
-
-static void
-swap_out_of_order_endpoints (tree , tree , value_range_kind )
-{
-  gcc_checking_assert (kind != VR_UNDEFINED);
-  if (kind == VR_VARYING)
-return;
-  /* Wrong order for min and max, to swap them and the VR type we need
- to adjust them.  */
-  if (tree_int_cst_lt (max, min))
-{
-  tree one, tmp;
-
-  /* For one bit precision if max < min, then the swapped
-range covers all values, so for VR_RANGE it is varying and
-for VR_ANTI_RANGE empty range, so drop to varying as well.  */
-  if (TYPE_PRECISION (TREE_TYPE (min)) == 1)
-   {
- kind = VR_VARYING;
- return;
-   }
-
-  one = build_int_cst (TREE_TYPE (min), 1);
-  tmp = int_const_binop (PLUS_EXPR, max, one);
-  max = int_const_binop (MINUS_EXPR, min, one);
-  min = tmp;
-
-  /* There's one corner case, if we had [C+1, C] before we now have
-that again.  But this represents an empty value range, so drop
-to varying in this case.  */
-  if (tree_int_cst_lt (max, min))
-   {
- kind = VR_VARYING;
- return;
-   }
-  kind = kind == VR_RANGE ? VR_ANTI_RANGE : VR_RANGE;
-}
-}
-
 void
 irange::irange_set (tree min, tree max)
 {
@@ -1192,13 +1152,6 @@ irange::set (tree min, tree max, value_range_kind kind)
   gcc_checking_assert (TREE_CODE (min) == INTEGER_CST
   && TREE_CODE (max) == INTEGER_CST);
 
-  swap_out_of_order_endpoints (min, max, kind);
-  if (kind == VR_VARYING)
-{
-  set_varying (TREE_TYPE (min));
-  return;
-}
-
   // Anti-ranges that can be represented as ranges should be so.
   if (kind == VR_ANTI_RANGE)
 {
-- 
2.40.0

Re: [PATCH] avr: Set param_min_pagesize to 0 [PR105523]

2023-04-26 Thread SenthilKumar.Selvaraj--- via Gcc-patches

On Wed, Apr 26, 2023 at 3:15 PM Richard Biener via Gcc-patches 
 wrote:
>
> On Wed, Apr 26, 2023 at 11:42 AM Richard Biener
>  wrote:
> >
> > On Wed, Apr 26, 2023 at 11:01 AM SenthilKumar.Selvaraj--- via
> > Gcc-patches  wrote:
> > >
> > > Hi,
> > >
> > > This patch fixes PR 105523 by setting param_min_pagesize to 0 for the
> > > avr target. For this target, zero and offsets from zero are perfectly
> > > valid addresses, and the default value of param_min_pagesize ends up
> > > triggering warnings on valid memory accesses.
> >
> > I think the proper configuration is to have
> > DEFAULT_ADDR_SPACE_ZERO_ADDRESS_VALID
>
> Err, TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID

That worked. Ok for trunk and backporting to 13 and 12 branches
(pending regression testing)?

Regards,
Senthil

PR 105523

gcc/ChangeLog:

* config/avr/avr.cc (avr_addr_space_zero_address_valid):
(TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID): Return true.

gcc/testsuite/ChangeLog:

* gcc.target/avr/pr105523.c: New test.



diff --git a/gcc/config/avr/avr.cc b/gcc/config/avr/avr.cc
index c193430cf07..5439eb8e55c 100644
--- a/gcc/config/avr/avr.cc
+++ b/gcc/config/avr/avr.cc
@@ -9788,6 +9788,16 @@ avr_addr_space_diagnose_usage (addr_space_t as, 
location_t loc)
   (void) avr_addr_space_supported_p (as, loc);
 }
 
+/* Implement `TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID. Zero is a valid
+   address in all address spaces. Even in ADDR_SPACE_FLASH1 etc.,
+   a zero address is valid and means 0x, where RAMPZ is
+   set to the appropriate segment value. */
+
+static bool
+avr_addr_space_zero_address_valid (addr_space_t as)
+{
+  return true;
+}
 
 /* Look if DECL shall be placed in program memory space by
means of attribute `progmem' or some address-space qualifier.
@@ -14688,6 +14698,9 @@ avr_float_lib_compare_returns_bool (machine_mode mode, 
enum rtx_code)
 #undef  TARGET_ADDR_SPACE_DIAGNOSE_USAGE
 #define TARGET_ADDR_SPACE_DIAGNOSE_USAGE avr_addr_space_diagnose_usage
 
+#undef  TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID
+#define TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID avr_addr_space_zero_address_valid
+
 #undef  TARGET_MODE_DEPENDENT_ADDRESS_P
 #define TARGET_MODE_DEPENDENT_ADDRESS_P avr_mode_dependent_address_p
 
diff --git a/gcc/testsuite/gcc.target/avr/pr105523.c 
b/gcc/testsuite/gcc.target/avr/pr105523.c
new file mode 100644
index 000..fbbf7bf4422
--- /dev/null
+++ b/gcc/testsuite/gcc.target/avr/pr105523.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-Os -Wall" } */
+
+/* Verify no "array subscript 0 is outside array bounds of" is generated
+   for accessing memory addresses in the 0-4096 range. */
+
+typedef __UINT8_TYPE__ uint8_t;
+
+#define SREG (*(volatile uint8_t*) (0x3F + __AVR_SFR_OFFSET__ ))
+
+void bar (void)
+{
+SREG = 0;
+}

Re: Unloop no longer looping loops in loop-ch

2023-04-26 Thread Jan Hubicka via Gcc-patches

> > -  if (precise)
> > +  if (precise
> > + && get_max_loop_iterations_int (loop) == 1)
> > +   {
> > + if (dump_file && (dump_flags & TDF_DETAILS))
> > +   fprintf (dump_file, "Loop %d no longer loops.\n", loop->num);
> 
> but max loop iterations is 1 ...?

I first check for loops with 0 iterations, push them to unlooping list
and avoid any header copying (it is useless).
At this patch we already did header duplication and verified that the
maximal number of iterations will drop by 1 since there is no way loop
can terminate except for the header tests we peeled out.

So 1 would turn to 0 in the loop info update and it seems useless to do
it.
> 
> > + loops_to_unloop.safe_push (loop);
> > + loops_to_unloop_nunroll.safe_push (0);
> > +   }
> > +  else if (precise)
> > {
> >   if (dump_file && (dump_flags & TDF_DETAILS))
> > fprintf (dump_file,
> > @@ -688,6 +699,12 @@ ch_base::copy_headers (function *fun)
> >   BITMAP_FREE (exit_bbs);
> > }
> >  }
> > +  if (loops_to_unloop.length())
> 
>   !loops_to_unloop.is_empty ()
I updated that in my copy of the patch.
> 
> > +{
> > +  bool irred_invalidated;
> > +  unloop_loops (loops_to_unloop, loops_to_unloop_nunroll, NULL, 
> > _invalidated);
> > +  changed = true;
> > +}
> >free (bbs);
> >free (copied_bbs);
> 
> 
> Since we run VN on the header copies I wonder if, since you remove
> edges, we need to run CFG cleanup before this and updating SSA form?
> For safety we usually let CFG cleanup do the actual CFG manipulation
> and just change cond jumps to if (0) or if (1)?

I do unlooping only after the VN so I think I am safe here.

Honza

Re: [PATCH] LoongArch: Enable shrink wrapping

2023-04-26 Thread WANG Xuerui


On 2023/4/26 18:14, Lulu Cheng wrote:


在 2023/4/26 下午6:02, WANG Xuerui 写道:


On 2023/4/26 17:53, Lulu Cheng wrote:

Hi, ruoyao:

  The performance of spec2006 is finished. The fixed-point 
400.perlbench has about 3% performance improvement,


and the other basics have not changed, and the floating-point tests 
have basically remained the same.

Nice to know!


  Do you have any questions about the test cases mentioned by 
Guo Jie? If there is no problem, modify the test case,


I think the code can be merged into the main branch.


BTW what about the previous function/loop alignment patches? The LLVM 
changes are also waiting for such results. ;-)
Well, there are many combinations in this align test, so the test time 
will be very long. I will reply the result as soon as the test results 
come out.:-)



Oh, I got. Thanks very much for all the tests and take your time!

Re: [PATCH] LoongArch: Enable shrink wrapping

2023-04-26 Thread Lulu Cheng




在 2023/4/26 下午6:02, WANG Xuerui 写道:


On 2023/4/26 17:53, Lulu Cheng wrote:

Hi, ruoyao:

  The performance of spec2006 is finished. The fixed-point 
400.perlbench has about 3% performance improvement,


and the other basics have not changed, and the floating-point tests 
have basically remained the same.

Nice to know!


  Do you have any questions about the test cases mentioned by Guo 
Jie? If there is no problem, modify the test case,


I think the code can be merged into the main branch.


BTW what about the previous function/loop alignment patches? The LLVM 
changes are also waiting for such results. ;-)
Well, there are many combinations in this align test, so the test time 
will be very long. I will reply the result as soon as the test results 
come out.:-)

Re: Re: [PATCH] VECT: Add decrement IV iteration loop control by variable amount support

2023-04-26 Thread juzhe.zh...@rivai.ai

Hi， Richard.
Would you mind take a look at the loop control part again:

static gcond *
vect_set_loop_condition_partial_vectors (class loop *loop,
loop_vec_info loop_vinfo, tree niters,
tree final_iv, bool niters_maybe_zero,
gimple_stmt_iterator loop_cond_gsi)
...
tree loop_len_x = NULL_TREE;
  FOR_EACH_VEC_ELT (*controls, i, rgc)
if (!rgc->controls.is_empty ())
  {
...

/* Set up all controls for this group.  */
if (direct_internal_fn_supported_p (IFN_SELECT_VL, iv_type,
   OPTIMIZE_FOR_SPEED))
 test_ctrl
   = vect_set_loop_controls_by_select_vl (loop, loop_vinfo,
  _seq, _seq,
  rgc, niters, _len_x);
...
  }

static tree
vect_set_loop_controls_by_select_vl (class loop *loop, loop_vec_info loop_vinfo,
gimple_seq *preheader_seq,
gimple_seq *header_seq,
rgroup_controls *rgc, tree niters, tree *x)
{
  tree compare_type = LOOP_VINFO_RGROUP_COMPARE_TYPE (loop_vinfo);
  tree iv_type = LOOP_VINFO_RGROUP_IV_TYPE (loop_vinfo);
  /* We are not allowing masked approach in SELECT_VL.  */
  gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));

  tree ctrl_type = rgc->type;
  unsigned int nitems_per_iter = rgc->max_nscalars_per_iter * rgc->factor;
  poly_uint64 nitems_per_ctrl = TYPE_VECTOR_SUBPARTS (ctrl_type) * rgc->factor;
  poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);

  /* Calculate the maximum number of item values that the rgroup
 handles in total, the number that it handles for each iteration
 of the vector loop.  */
  tree nitems_total = niters;
  if (nitems_per_iter != 1)
{
  /* We checked before setting LOOP_VINFO_USING_PARTIAL_VECTORS_P that
these multiplications don't overflow.  */
  tree compare_factor = build_int_cst (compare_type, nitems_per_iter);
  nitems_total = gimple_build (preheader_seq, MULT_EXPR, compare_type,
  nitems_total, compare_factor);
}

  /* Convert the comparison value to the IV type (either a no-op or
 a promotion).  */
  nitems_total = gimple_convert (preheader_seq, iv_type, nitems_total);

  /* Create an induction variable that counts the number of items
 processed.  */
  tree index_before_incr, index_after_incr;
  gimple_stmt_iterator incr_gsi;
  bool insert_after;
  standard_iv_increment_position (loop, _gsi, _after);

  /* Test the decremented IV, which will never underflow 0 since we have
 IFN_SELECT_VL to gurantee that.  */
  tree test_limit = nitems_total;

  /* Provide a definition of each control in the group.  */
  tree ctrl;
  unsigned int i;
  FOR_EACH_VEC_ELT_REVERSE (rgc->controls, i, ctrl)
{
  /* Previous controls will cover BIAS items.  This control covers the
next batch.  */
  poly_uint64 bias = nitems_per_ctrl * i;
  tree bias_tree = build_int_cst (iv_type, bias);

  /* Rather than have a new IV that starts at TEST_LIMIT and goes down to
BIAS, prefer to use the same TEST_LIMIT - BIAS based IV for each
control and adjust the bound down by BIAS.  */
  tree this_test_limit = test_limit;
  if (i != 0)
{
 this_test_limit = gimple_build (preheader_seq, MAX_EXPR, iv_type,
 this_test_limit, bias_tree);
 this_test_limit = gimple_build (preheader_seq, MINUS_EXPR, iv_type,
 this_test_limit, bias_tree);
}

  /* Create decrement IV.  */
  create_iv (this_test_limit, MINUS_EXPR, ctrl, NULL_TREE, loop, _gsi,
insert_after, _before_incr, _after_incr);

  tree res_len;
  if (rgc->controls.length () != 1)
{
 if (nitems_per_iter == 1)
   {
 /* Generte length = (X - VF*I/N) capped to the range [0, VF/N]. */
 /* step = VF * I / N.  */
 tree step
= build_int_cst (iv_type,
exact_div (vf * i, rgc->controls.length ()));
 /* Make sure (X - VF*I/N) never underflow zero.  */
 tree max = gimple_build (header_seq, MAX_EXPR, iv_type, *x, step);
 res_len
= gimple_build (header_seq, MIN_EXPR, iv_type,
index_before_incr,
build_int_cst (iv_type, vf * nitems_per_iter));
   }
 else
   {
 /* For SLP, we can't allow non-VF number of elements to be
processed in non-final iteration. We force the number of
elements to be processed in each non-final iteration is VF
elements. If we allow non-VF elements processing in non-final
iteration will make SLP too complicated and produce inferior
codegen.

  For example:

   If non-final iteration process VF elements.

 ...
 .LEN_STORE (vectp_f.8_51, 128B, _71, { 1, 2, 1, 2 }, 0);
 .LEN_STORE (vectp_f.8_56, 128B, _72, { 1, 2, 1, 2 }, 0);
 ...

   If non-final iteration process non-VF elements.

 ...
 .LEN_STORE (vectp_f.8_51, 128B, _71, { 1, 2, 1, 2 }, 0);
 if (_71 % 2 == 0)
  .LEN_STORE (vectp_f.8_56, 128B, _72, { 1, 2, 1, 2 }, 0);
 else
  .LEN_STORE (vectp_f.8_56, 128B, _72, { 2, 1, 2, 1 }, 0);
 ...

This is the simple case of 2-elements interleaved vector SLP.
We consider other interleave vector, the situation will become
more complicated.  */
 res_len
= gimple_build (header_seq, MIN_EXPR, iv_type,
index_before_incr,
build_int_cst (iv_type, vf * nitems_per_iter));
   }
}

Re: [PATCH] LoongArch: Enable shrink wrapping

2023-04-26 Thread WANG Xuerui




On 2023/4/26 17:53, Lulu Cheng wrote:

Hi, ruoyao:

  The performance of spec2006 is finished. The fixed-point 
400.perlbench has about 3% performance improvement,


and the other basics have not changed, and the floating-point tests 
have basically remained the same.

Nice to know!


  Do you have any questions about the test cases mentioned by Guo 
Jie? If there is no problem, modify the test case,


I think the code can be merged into the main branch.


BTW what about the previous function/loop alignment patches? The LLVM 
changes are also waiting for such results. ;-)

Re: [PATCH] LoongArch: Enable shrink wrapping

2023-04-26 Thread Lulu Cheng


Hi, ruoyao:

  The performance of spec2006 is finished. The fixed-point 
400.perlbench has about 3% performance improvement,


and the other basics have not changed, and the floating-point tests have 
basically remained the same.


  Do you have any questions about the test cases mentioned by Guo 
Jie? If there is no problem, modify the test case,


I think the code can be merged into the main branch.


Thanks!

在 2023/4/25 下午5:12, Guo Jie 写道:

/* snip */

  diff --git a/gcc/testsuite/gcc.target/loongarch/shrink-wrap.c 
b/gcc/testsuite/gcc.target/loongarch/shrink-wrap.c

new file mode 100644
index 000..f2c867a2769
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/shrink-wrap.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-O -fshrink-wrap" } */
+
+/* f(x) should do nothing if x is 0.  */
+/* { dg-final { scan-assembler 
"bnez\t\\\$r4,\[^\n\]*\n\tjr\t\\\$r1" } } */

+
+void g(void);
+
+void
+f(int x)
+{
+  if (x)
+    {
+  register int s0 asm("s0") = x;
+  register int s1 asm("s1") = x;
+  register int s2 asm("s2") = x;
+  asm("" : : "r"(s0));
+  asm("" : : "r"(s1));
+  asm("" : : "r"(s2));
+  g();
+    }
+}


I think the test case cannot fully reflect the optimization effect of 
the current patch,


because even without the patch, -O -fshrink-wrap will still perform 
architecture independent optimization.


This patch considers architecture related registers as finer grained 
optimization for shrink wrapping,


I think a test case like the one below is more suitable:


int foo(int x)
{
  if (x)
  {
    __asm__ ("":::"s0","s1");
    return x;
  }

  __asm__ ("":::"s2","s3");
  return 0;
}

Otherwise LGTM, thanks!

Re: [PATCH] Avoid creating useless debug temporaries

2023-04-26 Thread Richard Biener via Gcc-patches

On Wed, Apr 26, 2023 at 11:31 AM Eric Botcazou  wrote:
>
> > probably also helps PR109612 and the other similar PR referenced therein.
>
> Here's a more aggressive patch in this area, but it regresses guality tests,
> for example:
>
> +FAIL: gcc.dg/guality/ipa-sra-1.c   -O2  -DPREVENT_OPTIMIZATION  line 27 k ==
> 3
> +FAIL: gcc.dg/guality/ipa-sra-1.c   -O3 -g  -DPREVENT_OPTIMIZATION  line 27 k
> == 3
> +FAIL: gcc.dg/guality/ipa-sra-1.c   -Os  -DPREVENT_OPTIMIZATION  line 27 k ==
> 3
>
> eric@fomalhaut:~/build/gcc/native> diff -u ipa-sra-1.c.254t.optimized.0 ipa-
> sra-1.c.254t.optimized
> --- ipa-sra-1.c.254t.optimized.02023-04-26 11:12:07.806357325 +0200
> +++ ipa-sra-1.c.254t.optimized  2023-04-26 11:24:08.632874257 +0200
> @@ -101,7 +101,6 @@
># DEBUG k => k_5
># DEBUG BEGIN_STMT
>_1 = get_val1 ();
> -  # DEBUG D#6 => k_5
>r_8 = foo.isra (_1);
># DEBUG r => r_8
># DEBUG BEGIN_STMT
>
> and I don't understand why yet.

interesting.  So that removes unmentioned debug temporaries?  I think
remove_unused_locals does something to debug stmts as well
(but from a quick look cannot decipher what it actually does).

On the RTL level delete_trivially_dead_insns does wipe some (redundant)
debug_insns, there's no exact match to that on the GIMPLE side either.

I'm not sure if DCE is a good place to do this.

>
> * tree-ssa-dce.cc (find_debug_expr_decl): New callback.
> (mark_stmt_if_obviously_necessary): Add DECLS parameters.
> : Call find_debug_expr_decl on the value of
> DEBUG_BIND statements and record the results in DECLS.
> (find_obviously_necessary_stmts): If DEBUG_BIND statements may be
> present, get rid of those setting an unnecessary DEBUG_EXPR_DECL.
>
> --
> Eric Botcazou

Re: [PATCH] avr: Set param_min_pagesize to 0 [PR105523]

2023-04-26 Thread Richard Biener via Gcc-patches

On Wed, Apr 26, 2023 at 11:42 AM Richard Biener
 wrote:
>
> On Wed, Apr 26, 2023 at 11:01 AM SenthilKumar.Selvaraj--- via
> Gcc-patches  wrote:
> >
> > Hi,
> >
> > This patch fixes PR 105523 by setting param_min_pagesize to 0 for the
> > avr target. For this target, zero and offsets from zero are perfectly
> > valid addresses, and the default value of param_min_pagesize ends up
> > triggering warnings on valid memory accesses.
>
> I think the proper configuration is to have
> DEFAULT_ADDR_SPACE_ZERO_ADDRESS_VALID

Err, TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID

> defined to something returning true instead.
>
> Richard.
>
> > Ok for trunk and backporting to 13 and 12 branches?
> >
> > Regards
> > Senthil
> >
> > PR target/105523
> >
> > gcc/ChangeLog:
> >
> > * config/avr/avr.cc (avr_option_override): Set
> > param_min_pagesize to 0.
> >
> > gcc/testsuite/ChangeLog:
> >
> > * gcc.target/avr/pr105523.c: New test.
> >
> > diff --git a/gcc/config/avr/avr.cc b/gcc/config/avr/avr.cc
> > index c193430cf07..3b862f4e4ac 100644
> > --- a/gcc/config/avr/avr.cc
> > +++ b/gcc/config/avr/avr.cc
> > @@ -56,6 +56,7 @@
> >  #include "tree-pass.h"
> >  #include "print-rtl.h"
> >  #include "rtl-iter.h"
> > +#include "opts.h"
> >
> >  /* This file should be included last.  */
> >  #include "target-def.h"
> > @@ -769,6 +770,9 @@ avr_option_override (void)
> >avr_gasisr_prologues = 0;
> >  #endif
> >
> > +  SET_OPTION_IF_UNSET (_options, _options_set,
> > +   param_min_pagesize, 0);
> > +
> >if (!avr_set_core_architecture())
> >  return;
> >
> > diff --git a/gcc/testsuite/gcc.target/avr/pr105523.c 
> > b/gcc/testsuite/gcc.target/avr/pr105523.c
> > new file mode 100644
> > index 000..fbbf7bf4422
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/avr/pr105523.c
> > @@ -0,0 +1,14 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-Os -Wall" } */
> > +
> > +/* Verify no "array subscript 0 is outside array bounds of" is generated
> > +   for accessing memory addresses in the 0-4096 range. */
> > +
> > +typedef __UINT8_TYPE__ uint8_t;
> > +
> > +#define SREG (*(volatile uint8_t*) (0x3F + __AVR_SFR_OFFSET__ ))
> > +
> > +void bar (void)
> > +{
> > +SREG = 0;
> > +}
> >

Re: [PATCH] avr: Set param_min_pagesize to 0 [PR105523]

2023-04-26 Thread Richard Biener via Gcc-patches

On Wed, Apr 26, 2023 at 11:01 AM SenthilKumar.Selvaraj--- via
Gcc-patches  wrote:
>
> Hi,
>
> This patch fixes PR 105523 by setting param_min_pagesize to 0 for the
> avr target. For this target, zero and offsets from zero are perfectly
> valid addresses, and the default value of param_min_pagesize ends up
> triggering warnings on valid memory accesses.

I think the proper configuration is to have
DEFAULT_ADDR_SPACE_ZERO_ADDRESS_VALID
defined to something returning true instead.

Richard.

> Ok for trunk and backporting to 13 and 12 branches?
>
> Regards
> Senthil
>
> PR target/105523
>
> gcc/ChangeLog:
>
> * config/avr/avr.cc (avr_option_override): Set
> param_min_pagesize to 0.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/avr/pr105523.c: New test.
>
> diff --git a/gcc/config/avr/avr.cc b/gcc/config/avr/avr.cc
> index c193430cf07..3b862f4e4ac 100644
> --- a/gcc/config/avr/avr.cc
> +++ b/gcc/config/avr/avr.cc
> @@ -56,6 +56,7 @@
>  #include "tree-pass.h"
>  #include "print-rtl.h"
>  #include "rtl-iter.h"
> +#include "opts.h"
>
>  /* This file should be included last.  */
>  #include "target-def.h"
> @@ -769,6 +770,9 @@ avr_option_override (void)
>avr_gasisr_prologues = 0;
>  #endif
>
> +  SET_OPTION_IF_UNSET (_options, _options_set,
> +   param_min_pagesize, 0);
> +
>if (!avr_set_core_architecture())
>  return;
>
> diff --git a/gcc/testsuite/gcc.target/avr/pr105523.c 
> b/gcc/testsuite/gcc.target/avr/pr105523.c
> new file mode 100644
> index 000..fbbf7bf4422
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/avr/pr105523.c
> @@ -0,0 +1,14 @@
> +/* { dg-do compile } */
> +/* { dg-options "-Os -Wall" } */
> +
> +/* Verify no "array subscript 0 is outside array bounds of" is generated
> +   for accessing memory addresses in the 0-4096 range. */
> +
> +typedef __UINT8_TYPE__ uint8_t;
> +
> +#define SREG (*(volatile uint8_t*) (0x3F + __AVR_SFR_OFFSET__ ))
> +
> +void bar (void)
> +{
> +SREG = 0;
> +}
>

Re: [PATCH V3] rs6000: Load high and low part of 64bit constant independently

2023-04-26 Thread Kewen.Lin via Gcc-patches

Hi Jeff,

on 2023/1/4 14:51, Jiufu Guo wrote:
> Hi,
> 
> Compare with previous version, this patch updates the comments only.
> https://gcc.gnu.org/pipermail/gcc-patches/2022-December/608293.html
> 
> For a complicate 64bit constant, below is one instruction-sequence to
> build:
>   lis 9,0x800a
>   ori 9,9,0xabcd
>   sldi 9,9,32
>   oris 9,9,0xc167
>   ori 9,9,0xfa16
> 
> while we can also use below sequence to build:
>   lis 9,0xc167
>   lis 10,0x800a
>   ori 9,9,0xfa16
>   ori 10,10,0xabcd
>   rldimi 9,10,32,0
> This sequence is using 2 registers to build high and low part firstly,
> and then merge them.
> 
> In parallel aspect, this sequence would be faster. (Ofcause, using 1 more
> register with potential register pressure).
> 
> The instruction sequence with two registers for parallel version can be
> generated only if can_create_pseudo_p.  Otherwise, the one register
> sequence is generated.
> 
> Bootstrap and regtest pass on ppc64{,le}.
> Is this ok for trunk?

OK for trunk, thanks for the improvement!

BR,
Kewen

> 
> 
> BR,
> Jeff(Jiufu)
> 
> 
> gcc/ChangeLog:
> 
>   * config/rs6000/rs6000.cc (rs6000_emit_set_long_const): Generate
>   more parallel code if can_create_pseudo_p.
> 
> gcc/testsuite/ChangeLog:
> 
>   * gcc.target/powerpc/parall_5insn_const.c: New test.
> 
> ---
>  gcc/config/rs6000/rs6000.cc   | 39 +--
>  .../gcc.target/powerpc/parall_5insn_const.c   | 27 +
>  2 files changed, 54 insertions(+), 12 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/parall_5insn_const.c
> 
> diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
> index 6ac3adcec6b..b4f03499252 100644
> --- a/gcc/config/rs6000/rs6000.cc
> +++ b/gcc/config/rs6000/rs6000.cc
> @@ -10366,19 +10366,34 @@ rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT 
> c)
>  }
>else
>  {
> -  temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
> -
> -  emit_move_insn (temp, GEN_INT (sext_hwi (ud4 << 16, 32)));
> -  if (ud3 != 0)
> - emit_move_insn (temp, gen_rtx_IOR (DImode, temp, GEN_INT (ud3)));
> +  if (can_create_pseudo_p ())
> + {
> +   /* lis HIGH,UD4 ; ori HIGH,UD3 ;
> +  lis LOW,UD2 ; ori LOW,UD1 ; rldimi LOW,HIGH,32,0.  */
> +   rtx high = gen_reg_rtx (DImode);
> +   rtx low = gen_reg_rtx (DImode);
> +   HOST_WIDE_INT num = (ud2 << 16) | ud1;
> +   rs6000_emit_set_long_const (low, sext_hwi (num, 32));
> +   num = (ud4 << 16) | ud3;
> +   rs6000_emit_set_long_const (high, sext_hwi (num, 32));
> +   emit_insn (gen_rotldi3_insert_3 (dest, high, GEN_INT (32), low,
> +GEN_INT (0x)));
> + }
> +  else
> + {
> +   /* lis DEST,UD4 ; ori DEST,UD3 ; rotl DEST,32 ;
> +  oris DEST,UD2 ; ori DEST,UD1.  */
> +   emit_move_insn (dest, GEN_INT (sext_hwi (ud4 << 16, 32)));
> +   if (ud3 != 0)
> + emit_move_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud3)));
> 
> -  emit_move_insn (ud2 != 0 || ud1 != 0 ? temp : dest,
> -   gen_rtx_ASHIFT (DImode, temp, GEN_INT (32)));
> -  if (ud2 != 0)
> - emit_move_insn (ud1 != 0 ? temp : dest,
> - gen_rtx_IOR (DImode, temp, GEN_INT (ud2 << 16)));
> -  if (ud1 != 0)
> - emit_move_insn (dest, gen_rtx_IOR (DImode, temp, GEN_INT (ud1)));
> +   emit_move_insn (dest, gen_rtx_ASHIFT (DImode, dest, GEN_INT (32)));
> +   if (ud2 != 0)
> + emit_move_insn (dest,
> + gen_rtx_IOR (DImode, dest, GEN_INT (ud2 << 16)));
> +   if (ud1 != 0)
> + emit_move_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud1)));
> + }
>  }
>  }
> 
> diff --git a/gcc/testsuite/gcc.target/powerpc/parall_5insn_const.c 
> b/gcc/testsuite/gcc.target/powerpc/parall_5insn_const.c
> new file mode 100644
> index 000..e3a9a7264cf
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/parall_5insn_const.c
> @@ -0,0 +1,27 @@
> +/* { dg-do run } */
> +/* { dg-options "-O2 -mno-prefixed -save-temps" } */
> +/* { dg-require-effective-target has_arch_ppc64 } */
> +
> +/* { dg-final { scan-assembler-times {\mlis\M} 4 } } */
> +/* { dg-final { scan-assembler-times {\mori\M} 4 } } */
> +/* { dg-final { scan-assembler-times {\mrldimi\M} 2 } } */
> +
> +void __attribute__ ((noinline)) foo (unsigned long long *a)
> +{
> +  /* 2 lis + 2 ori + 1 rldimi for each constant.  */
> +  *a++ = 0x800aabcdc167fa16ULL;
> +  *a++ = 0x7543a876867f616ULL;
> +}
> +
> +long long A[] = {0x800aabcdc167fa16ULL, 0x7543a876867f616ULL};
> +int
> +main ()
> +{
> +  long long res[2];
> +
> +  foo (res);
> +  if (__builtin_memcmp (res, A, sizeof (res)) != 0)
> +__builtin_abort ();
> +
> +  return 0;
> +}

Re: [PATCH] Avoid creating useless debug temporaries

2023-04-26 Thread Eric Botcazou via Gcc-patches

> probably also helps PR109612 and the other similar PR referenced therein.

Here's a more aggressive patch in this area, but it regresses guality tests, 
for example:

+FAIL: gcc.dg/guality/ipa-sra-1.c   -O2  -DPREVENT_OPTIMIZATION  line 27 k == 
3
+FAIL: gcc.dg/guality/ipa-sra-1.c   -O3 -g  -DPREVENT_OPTIMIZATION  line 27 k 
== 3
+FAIL: gcc.dg/guality/ipa-sra-1.c   -Os  -DPREVENT_OPTIMIZATION  line 27 k == 
3

eric@fomalhaut:~/build/gcc/native> diff -u ipa-sra-1.c.254t.optimized.0 ipa-
sra-1.c.254t.optimized
--- ipa-sra-1.c.254t.optimized.02023-04-26 11:12:07.806357325 +0200
+++ ipa-sra-1.c.254t.optimized  2023-04-26 11:24:08.632874257 +0200
@@ -101,7 +101,6 @@
   # DEBUG k => k_5
   # DEBUG BEGIN_STMT
   _1 = get_val1 ();
-  # DEBUG D#6 => k_5
   r_8 = foo.isra (_1);
   # DEBUG r => r_8
   # DEBUG BEGIN_STMT

and I don't understand why yet.


* tree-ssa-dce.cc (find_debug_expr_decl): New callback.
(mark_stmt_if_obviously_necessary): Add DECLS parameters.
: Call find_debug_expr_decl on the value of
DEBUG_BIND statements and record the results in DECLS.
(find_obviously_necessary_stmts): If DEBUG_BIND statements may be
present, get rid of those setting an unnecessary DEBUG_EXPR_DECL.

-- 
Eric Botcazoudiff --git a/gcc/tree-ssa-dce.cc b/gcc/tree-ssa-dce.cc
index 08876bfc1c7..09bbfaca22e 100644
--- a/gcc/tree-ssa-dce.cc
+++ b/gcc/tree-ssa-dce.cc
@@ -191,14 +191,35 @@ mark_operand_necessary (tree op)
 }
 
 
+/* Called via walk_tree, look for DEBUG_EXPR_DECLs and mark them in DATA.  */
+
+static tree
+find_debug_expr_decl (tree *tp, int *walk_subtrees, void *data)
+{
+  auto_bitmap *decls = (auto_bitmap *) data;
+
+  if (TREE_CODE (*tp) == SSA_NAME || IS_TYPE_OR_DECL_P (*tp))
+{
+  if (TREE_CODE (*tp) == DEBUG_EXPR_DECL)
+	bitmap_set_bit (*decls, DECL_UID (*tp));
+
+  *walk_subtrees = 0;
+}
+
+  return NULL_TREE;
+}
+
 /* Mark STMT as necessary if it obviously is.  Add it to the worklist if
it can make other statements necessary.
 
+   If STMT is a DEBUG_BIND, mark the necessary DEBUG_EXPR_DECLs in DECLS.
+
If AGGRESSIVE is false, control statements are conservatively marked as
necessary.  */
 
 static void
-mark_stmt_if_obviously_necessary (gimple *stmt, bool aggressive)
+mark_stmt_if_obviously_necessary (gimple *stmt, auto_bitmap *decls,
+  bool aggressive)
 {
   /* Statements that are implicitly live.  Most function calls, asm
  and return statements are required.  Labels and GIMPLE_BIND nodes
@@ -258,14 +279,28 @@ mark_stmt_if_obviously_necessary (gimple *stmt, bool aggressive)
   }
 
 case GIMPLE_DEBUG:
-  /* Debug temps without a value are not useful.  ??? If we could
-	 easily locate the debug temp bind stmt for a use thereof,
-	 would could refrain from marking all debug temps here, and
-	 mark them only if they're used.  */
-  if (gimple_debug_nonbind_marker_p (stmt)
-	  || !gimple_debug_bind_p (stmt)
-	  || gimple_debug_bind_has_value_p (stmt)
-	  || TREE_CODE (gimple_debug_bind_get_var (stmt)) != DEBUG_EXPR_DECL)
+  if (gimple_debug_bind_p (stmt))
+	{
+	  tree var = gimple_debug_bind_get_var (stmt);
+	  tree val = gimple_debug_bind_get_value (stmt);
+	  bool necessary = false;
+
+	  /* A bind statement for a real variable is always necessary.  */
+	  if (TREE_CODE (var) != DEBUG_EXPR_DECL)
+	necessary = true;
+
+	  /* A bind statement with a value is necessary for now and we look
+	 into the value to find out necessary DEBUG_EXPR_DECLs.  */
+	  if (val)
+	{
+	  walk_tree (, find_debug_expr_decl, decls, NULL);
+	  necessary = true;
+	}
+
+	  if (necessary )
+	mark_stmt_necessary (stmt, false);
+	}
+  else
 	mark_stmt_necessary (stmt, false);
   return;
 
@@ -398,6 +433,7 @@ find_obviously_necessary_stmts (bool aggressive)
   gimple_stmt_iterator gsi;
   edge e;
   gimple *phi, *stmt;
+  auto_bitmap necessary_decls;
   int flags;
 
   FOR_EACH_BB_FN (bb, cfun)
@@ -414,10 +450,35 @@ find_obviously_necessary_stmts (bool aggressive)
 	{
 	  stmt = gsi_stmt (gsi);
 	  gimple_set_plf (stmt, STMT_NECESSARY, false);
-	  mark_stmt_if_obviously_necessary (stmt, aggressive);
+	  mark_stmt_if_obviously_necessary (stmt, _decls, aggressive);
 	}
 }
 
+  /* Check all debug bind statements again in the basic blocks and find out
+ those which set an unnecessary DEBUG_EXPR_DECL to a value.  */
+  if (MAY_HAVE_DEBUG_BIND_STMTS)
+FOR_EACH_BB_FN (bb, cfun)
+  for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next ())
+	{
+	  stmt = gsi_stmt (gsi);
+	  if (gimple_debug_bind_p (stmt)
+	  && gimple_debug_bind_has_value_p (stmt))
+	{
+	  tree var = gimple_debug_bind_get_var (stmt);
+	  if (TREE_CODE (var) == DEBUG_EXPR_DECL
+		  && !bitmap_bit_p (necessary_decls, DECL_UID (var)))
+		{
+		  gimple_set_plf (stmt, STMT_NECESSARY, false);
+		  if (dump_file && (dump_flags & TDF_DETAILS))
+		{
+		  fprintf (dump_file, "Unmarking useful stmt:

Re: [PATCH] VECT: Add decrement IV iteration loop control by variable amount support

2023-04-26 Thread Richard Sandiford via Gcc-patches

"juzhe.zh...@rivai.ai"  writes:
> Thank you so much for pointing out this issue.
>
> After reading your comments carefully, I need to revise 
> "vect_set_loop_controls_by_while_len"  in  loop control like this:
>
> vect_set_loop_controls_by_while_len
> ... 
> tree X = NULL_TREE;
> FOR_EACH_VEC_ELT (rgc->controls, i, ctrl)
> ...
> if (i == 0) {
>   X = gimple_build (WHILE_LEN);
>   gimple_build_assign (ctrl, X);
> } else {
>   // (X - VF*I/N) capped to the range [0, VF/N]
>   tree t = gimple_build (MINUS, X, build_int_cst (VF*I/N));
>   gimple_build_assign (ctrl, t);
> }
> }
> 
>
> Am I understand your idea correctly ?

I think it's more that rgc->controls.length () == 1 is a special case,
rather than i == 0 being a special case.

That is, rgc->controls.length () == 1 can use a single WHILE_LEN to
calculate the number of scalars that will be processed by the current
loop iteration.  Let's call it X.  Then all rgroups with
rgc->controls.length () > 1 will be based on X rather than using
WHILE_LEN.  (And they would do that even for the first control in the
group, i.e. for i == 0.)

I'm not saying it has to be this way.  It might be that a different
arrangement is better for the later RVV processing.  But there needs
to be something in the gimple-level description, and something in
the optab documentation, that guarantees that whatever code we
generate for these cases works correctly.

BTW, very minor thing (I should have raised it earlier), but maybe
something like SELECT_VL would be a better name than WHILE_LEN?
WHILE_ULT means "while (IV) is unsigned less than" and so describes
an operation in terms of its arguments.  But I think WHILE_LEN is
more describing an operation based on its use case.

Thanks,
Richard


>
> So the example you shows in ARM SVE gimple IR, is like this:
>
> _3 =   [(long int *)_2];
>   vect__4.6_15 = .MASK_LOAD (_3, 64B, loop_mask_21); (INT64)
>   _5 =   [(long int *)_2 + POLY_INT_CST [16B, 
> 16B]];
>   vect__4.7_8 = .MASK_LOAD (_5, 64B, loop_mask_20);(INT64)
>   _7 =   [(long int *)_2 + POLY_INT_CST [32B, 
> 32B]];
>   vect__4.8_28 = .MASK_LOAD (_7, 64B, loop_mask_19);(INT64)
>   _24 =   [(long int *)_2 + POLY_INT_CST [48B, 
> 48B]];
>   vect__4.9_30 = .MASK_LOAD (_24, 64B, loop_mask_16); (INT64)
> vect__7.11_31 = VEC_PACK_TRUNC_EXPR ;
>   vect__7.11_32 = VEC_PACK_TRUNC_EXPR ;
>   vect__7.10_33 = VEC_PACK_TRUNC_EXPR ;
> ...
> .MASK_STORE (_13, 16B, loop_mask_36, vect__7.10_33); (INT16)
>
> If it is changed into WHILE_LEN style,  it should be:
>   
>X = WHILE_LEN;
> _3 =   [(long int *)_2];
>   vect__4.6_15 = .LEN_LOAD (_3, 64B, X - VF*1/N); (INT64)
>   _5 =   [(long int *)_2 + (X - VF*1/N)*8 ];
>   vect__4.7_8 = .LEN_LOAD (_5, 64B, X - VF*2/N);(INT64)
>   _7 =   [(long int *)_2 + (X - VF*2/N)*8];
>   vect__4.8_28 = .LEN_LOAD (_7, 64B, X - VF*3/N);(INT64)
>   _24 =   [(long int *)_2 + (X - VF*3/N)*8];
>   vect__4.9_30 = .LEN_LOAD (_24, 64B, X - VF*4/N); (INT64)
> vect__7.11_31 = VEC_PACK_TRUNC_EXPR ;
>   vect__7.11_32 = VEC_PACK_TRUNC_EXPR ;
>   vect__7.10_33 = VEC_PACK_TRUNC_EXPR ;
> ...
> .LEN_STORE (_13, 16B, X, vect__7.10_33); (INT16)
>
> Is this correct ? 
>
> Thanks.
>
>
> juzhe.zh...@rivai.ai
>  
> From: Richard Sandiford
> Date: 2023-04-26 16:06
> To: juzhe.zhong\@rivai.ai
> CC: gcc-patches; rguenther
> Subject: Re: [PATCH] VECT: Add decrement IV iteration loop control by 
> variable amount support
> "juzhe.zh...@rivai.ai"  writes:
>> Thanks Richard so much.
>>
 I don't think that's guaranteed by the proposed definition of WHILE_LEN.
 The first int64_t WHILE_LEN could come up short, and return something
 less than VF/2.
>>
>> I am so sorry that the comments of vect_set_loop_controls_by_while_len
>> is totally misleading and incorrect and I have sent V3 patch to fix that.
>> Actually, I don't use WHILE_LEN in multi-rgroups situation, instead, I use 
>> MIN_EXPR
>> to force VF elements for each non-final iteration to make sure result is 
>> correct.
>>
>> Yes, I agree with you that WHILE_LEN will produce issues for SLP situation 
>> that
>> having multi-rgroups since WHILE_LEN definition is allow target produces 
>> non-VF
>> outcome in non-final iteration. 
>  
> Yeah, I'd read that you weren't using WHILE_LEN for SLP.  I was talking
> specifically about non-SLP though (nitems_per_iter == 1).  Consider:
>  
> void f(short *x, long *y) {
>   for (int i = 0; i < 100; ++i)
> x[i] = y[i];
> }
>  
> compiled at -O3 -fno-vect-cost-model for SVE:
>  
> whilelo p4.d, wzr, w6
> whilelo p3.d, wzr, w5
> whilelo p2.h, wzr, w3
> whilelo p1.d, wzr, w3
> whilelo p0.d, wzr, w4
> .L2:
> ld1dz2.d, p0/z, [x1, #1, mul vl]
> ld1dz0.d, p1/z, [x1]
> ld1dz1.d, p3/z, [x1, #2, mul vl]
> uzp1z0.s, z0.s, z2.s
> ld1dz2.d, p4/z, [x1, #3, mul vl]
> uzp1z1.s, z1.s, z2.s
> uzp1z0.h, z0.h, z1.h
> st1hz0.h, p2, [x0, x2, lsl 1]
> add x2, x2,

Re: Re: [PATCH] VECT: Add decrement IV iteration loop control by variable amount support

2023-04-26 Thread juzhe.zh...@rivai.ai

Oh， I see。 I just checked the codes again.
I think I can't do it directly in the vect_set_loop_controls_by_while_len

Instead, I should do something like this for length:
/* First try using permutes.  This adds a single vector
   instruction to the loop for each mask, but needs no extra
   loop invariants or IVs.  */
unsigned int nmasks = i + 1;
if (use_masks_p && (nmasks & 1) == 0)
  {
rgroup_controls *half_rgc = &(*controls)[nmasks / 2 - 1];
if (!half_rgc->controls.is_empty ()
&& vect_maybe_permute_loop_masks (_seq, rgc, half_rgc))
  continue;
  }

Is that correct?

juzhe.zh...@rivai.ai

From: juzhe.zh...@rivai.ai
Date: 2023-04-26 16:55
To: richard.sandiford
CC: gcc-patches; rguenther
Subject: Re: Re: [PATCH] VECT: Add decrement IV iteration loop control by 
variable amount support
Thank you so much for pointing out this issue.

After reading your comments carefully, I need to revise 
"vect_set_loop_controls_by_while_len"  in  loop control like this:

vect_set_loop_controls_by_while_len
... 
tree X = NULL_TREE;
FOR_EACH_VEC_ELT (rgc->controls, i, ctrl)
...
if (i == 0) {
  X = gimple_build (WHILE_LEN);
  gimple_build_assign (ctrl, X);
} else {
  // (X - VF*I/N) capped to the range [0, VF/N]
  tree t = gimple_build (MINUS, X, build_int_cst (VF*I/N));
  gimple_build_assign (ctrl, t);
}
}

Am I understand your idea correctly ?

So the example you shows in ARM SVE gimple IR, is like this:

_3 =   [(long int *)_2];
  vect__4.6_15 = .MASK_LOAD (_3, 64B, loop_mask_21); (INT64)
  _5 =   [(long int *)_2 + POLY_INT_CST [16B, 16B]];
  vect__4.7_8 = .MASK_LOAD (_5, 64B, loop_mask_20);(INT64)
  _7 =   [(long int *)_2 + POLY_INT_CST [32B, 32B]];
  vect__4.8_28 = .MASK_LOAD (_7, 64B, loop_mask_19);(INT64)
  _24 =   [(long int *)_2 + POLY_INT_CST [48B, 
48B]];
  vect__4.9_30 = .MASK_LOAD (_24, 64B, loop_mask_16); (INT64)
vect__7.11_31 = VEC_PACK_TRUNC_EXPR ;
  vect__7.11_32 = VEC_PACK_TRUNC_EXPR ;
  vect__7.10_33 = VEC_PACK_TRUNC_EXPR ;
...
.MASK_STORE (_13, 16B, loop_mask_36, vect__7.10_33); (INT16)

If it is changed into WHILE_LEN style,  it should be:

   X = WHILE_LEN;
_3 =   [(long int *)_2];
  vect__4.6_15 = .LEN_LOAD (_3, 64B, X - VF*1/N); (INT64)
  _5 =   [(long int *)_2 + (X - VF*1/N)*8 ];
  vect__4.7_8 = .LEN_LOAD (_5, 64B, X - VF*2/N);(INT64)
  _7 =   [(long int *)_2 + (X - VF*2/N)*8];
  vect__4.8_28 = .LEN_LOAD (_7, 64B, X - VF*3/N);(INT64)
  _24 =   [(long int *)_2 + (X - VF*3/N)*8];
  vect__4.9_30 = .LEN_LOAD (_24, 64B, X - VF*4/N); (INT64)
vect__7.11_31 = VEC_PACK_TRUNC_EXPR ;
  vect__7.11_32 = VEC_PACK_TRUNC_EXPR ;
  vect__7.10_33 = VEC_PACK_TRUNC_EXPR ;
...
.LEN_STORE (_13, 16B, X, vect__7.10_33); (INT16)

Is this correct ? 

Thanks.

juzhe.zh...@rivai.ai

From: Richard Sandiford
Date: 2023-04-26 16:06
To: juzhe.zhong\@rivai.ai
CC: gcc-patches; rguenther
Subject: Re: [PATCH] VECT: Add decrement IV iteration loop control by variable 
amount support
"juzhe.zh...@rivai.ai"  writes:
> Thanks Richard so much.
>
>>> I don't think that's guaranteed by the proposed definition of WHILE_LEN.
>>> The first int64_t WHILE_LEN could come up short, and return something
>>> less than VF/2.
>
> I am so sorry that the comments of vect_set_loop_controls_by_while_len
> is totally misleading and incorrect and I have sent V3 patch to fix that.
> Actually, I don't use WHILE_LEN in multi-rgroups situation, instead, I use 
> MIN_EXPR
> to force VF elements for each non-final iteration to make sure result is 
> correct.
>
> Yes, I agree with you that WHILE_LEN will produce issues for SLP situation 
> that
> having multi-rgroups since WHILE_LEN definition is allow target produces 
> non-VF
> outcome in non-final iteration. 

Yeah, I'd read that you weren't using WHILE_LEN for SLP.  I was talking
specifically about non-SLP though (nitems_per_iter == 1).  Consider:

void f(short *x, long *y) {
  for (int i = 0; i < 100; ++i)
x[i] = y[i];
}

compiled at -O3 -fno-vect-cost-model for SVE:

whilelo p4.d, wzr, w6
whilelo p3.d, wzr, w5
whilelo p2.h, wzr, w3
whilelo p1.d, wzr, w3
whilelo p0.d, wzr, w4
.L2:
ld1dz2.d, p0/z, [x1, #1, mul vl]
ld1dz0.d, p1/z, [x1]
ld1dz1.d, p3/z, [x1, #2, mul vl]
uzp1z0.s, z0.s, z2.s
ld1dz2.d, p4/z, [x1, #3, mul vl]
uzp1z1.s, z1.s, z2.s
uzp1z0.h, z0.h, z1.h
st1hz0.h, p2, [x0, x2, lsl 1]
add x2, x2, x8
whilelo p2.h, w2, w3
whilelo p4.d, w2, w6
whilelo p3.d, w2, w5
whilelo p0.d, w2, w4
add x1, x1, x7
whilelo p1.d, w2, w3
b.any   .L2

This is a non-SLP loop.  We have two rgroups: a single-mask/control
rgroup for the short vector, and a 4-mask/control rgroup for the long
vector.  And the loop converts the Nth long scalar (selected from 4
concatenated vectors) to the Nth short scalar

[PATCH] avr: Set param_min_pagesize to 0 [PR105523]

2023-04-26 Thread SenthilKumar.Selvaraj--- via Gcc-patches

Hi,

This patch fixes PR 105523 by setting param_min_pagesize to 0 for the
avr target. For this target, zero and offsets from zero are perfectly
valid addresses, and the default value of param_min_pagesize ends up
triggering warnings on valid memory accesses.

Ok for trunk and backporting to 13 and 12 branches?

Regards
Senthil

PR target/105523

gcc/ChangeLog:

* config/avr/avr.cc (avr_option_override): Set
param_min_pagesize to 0.

gcc/testsuite/ChangeLog:

* gcc.target/avr/pr105523.c: New test.

diff --git a/gcc/config/avr/avr.cc b/gcc/config/avr/avr.cc
index c193430cf07..3b862f4e4ac 100644
--- a/gcc/config/avr/avr.cc
+++ b/gcc/config/avr/avr.cc
@@ -56,6 +56,7 @@
 #include "tree-pass.h"
 #include "print-rtl.h"
 #include "rtl-iter.h"
+#include "opts.h"
 
 /* This file should be included last.  */
 #include "target-def.h"
@@ -769,6 +770,9 @@ avr_option_override (void)
   avr_gasisr_prologues = 0;
 #endif
 
+  SET_OPTION_IF_UNSET (_options, _options_set,
+   param_min_pagesize, 0);
+
   if (!avr_set_core_architecture())
 return;
 
diff --git a/gcc/testsuite/gcc.target/avr/pr105523.c 
b/gcc/testsuite/gcc.target/avr/pr105523.c
new file mode 100644
index 000..fbbf7bf4422
--- /dev/null
+++ b/gcc/testsuite/gcc.target/avr/pr105523.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-Os -Wall" } */
+
+/* Verify no "array subscript 0 is outside array bounds of" is generated
+   for accessing memory addresses in the 0-4096 range. */
+
+typedef __UINT8_TYPE__ uint8_t;
+
+#define SREG (*(volatile uint8_t*) (0x3F + __AVR_SFR_OFFSET__ ))
+
+void bar (void)
+{
+SREG = 0;
+}

Re: Re: [PATCH] VECT: Add decrement IV iteration loop control by variable amount support

2023-04-26 Thread juzhe.zh...@rivai.ai

Thank you so much for pointing out this issue.

After reading your comments carefully, I need to revise 
"vect_set_loop_controls_by_while_len"  in  loop control like this:

vect_set_loop_controls_by_while_len
... 
tree X = NULL_TREE;
FOR_EACH_VEC_ELT (rgc->controls, i, ctrl)
...
if (i == 0) {
  X = gimple_build (WHILE_LEN);
  gimple_build_assign (ctrl, X);
} else {
  // (X - VF*I/N) capped to the range [0, VF/N]
  tree t = gimple_build (MINUS, X, build_int_cst (VF*I/N));
  gimple_build_assign (ctrl, t);
}
}

Am I understand your idea correctly ?

So the example you shows in ARM SVE gimple IR, is like this:

_3 =   [(long int *)_2];
  vect__4.6_15 = .MASK_LOAD (_3, 64B, loop_mask_21); (INT64)
  _5 =   [(long int *)_2 + POLY_INT_CST [16B, 16B]];
  vect__4.7_8 = .MASK_LOAD (_5, 64B, loop_mask_20);(INT64)
  _7 =   [(long int *)_2 + POLY_INT_CST [32B, 32B]];
  vect__4.8_28 = .MASK_LOAD (_7, 64B, loop_mask_19);(INT64)
  _24 =   [(long int *)_2 + POLY_INT_CST [48B, 
48B]];
  vect__4.9_30 = .MASK_LOAD (_24, 64B, loop_mask_16); (INT64)
vect__7.11_31 = VEC_PACK_TRUNC_EXPR ;
  vect__7.11_32 = VEC_PACK_TRUNC_EXPR ;
  vect__7.10_33 = VEC_PACK_TRUNC_EXPR ;
...
.MASK_STORE (_13, 16B, loop_mask_36, vect__7.10_33); (INT16)

If it is changed into WHILE_LEN style,  it should be:

   X = WHILE_LEN;
_3 =   [(long int *)_2];
  vect__4.6_15 = .LEN_LOAD (_3, 64B, X - VF*1/N); (INT64)
  _5 =   [(long int *)_2 + (X - VF*1/N)*8 ];
  vect__4.7_8 = .LEN_LOAD (_5, 64B, X - VF*2/N);(INT64)
  _7 =   [(long int *)_2 + (X - VF*2/N)*8];
  vect__4.8_28 = .LEN_LOAD (_7, 64B, X - VF*3/N);(INT64)
  _24 =   [(long int *)_2 + (X - VF*3/N)*8];
  vect__4.9_30 = .LEN_LOAD (_24, 64B, X - VF*4/N); (INT64)
vect__7.11_31 = VEC_PACK_TRUNC_EXPR ;
  vect__7.11_32 = VEC_PACK_TRUNC_EXPR ;
  vect__7.10_33 = VEC_PACK_TRUNC_EXPR ;
...
.LEN_STORE (_13, 16B, X, vect__7.10_33); (INT16)

Is this correct ? 

Thanks.

juzhe.zh...@rivai.ai

From: Richard Sandiford
Date: 2023-04-26 16:06
To: juzhe.zhong\@rivai.ai
CC: gcc-patches; rguenther
Subject: Re: [PATCH] VECT: Add decrement IV iteration loop control by variable 
amount support
"juzhe.zh...@rivai.ai"  writes:
> Thanks Richard so much.
>
>>> I don't think that's guaranteed by the proposed definition of WHILE_LEN.
>>> The first int64_t WHILE_LEN could come up short, and return something
>>> less than VF/2.
>
> I am so sorry that the comments of vect_set_loop_controls_by_while_len
> is totally misleading and incorrect and I have sent V3 patch to fix that.
> Actually, I don't use WHILE_LEN in multi-rgroups situation, instead, I use 
> MIN_EXPR
> to force VF elements for each non-final iteration to make sure result is 
> correct.
>
> Yes, I agree with you that WHILE_LEN will produce issues for SLP situation 
> that
> having multi-rgroups since WHILE_LEN definition is allow target produces 
> non-VF
> outcome in non-final iteration. 

Yeah, I'd read that you weren't using WHILE_LEN for SLP.  I was talking
specifically about non-SLP though (nitems_per_iter == 1).  Consider:

void f(short *x, long *y) {
  for (int i = 0; i < 100; ++i)
x[i] = y[i];
}

compiled at -O3 -fno-vect-cost-model for SVE:

whilelo p4.d, wzr, w6
whilelo p3.d, wzr, w5
whilelo p2.h, wzr, w3
whilelo p1.d, wzr, w3
whilelo p0.d, wzr, w4
.L2:
ld1dz2.d, p0/z, [x1, #1, mul vl]
ld1dz0.d, p1/z, [x1]
ld1dz1.d, p3/z, [x1, #2, mul vl]
uzp1z0.s, z0.s, z2.s
ld1dz2.d, p4/z, [x1, #3, mul vl]
uzp1z1.s, z1.s, z2.s
uzp1z0.h, z0.h, z1.h
st1hz0.h, p2, [x0, x2, lsl 1]
add x2, x2, x8
whilelo p2.h, w2, w3
whilelo p4.d, w2, w6
whilelo p3.d, w2, w5
whilelo p0.d, w2, w4
add x1, x1, x7
whilelo p1.d, w2, w3
b.any   .L2

This is a non-SLP loop.  We have two rgroups: a single-mask/control
rgroup for the short vector, and a 4-mask/control rgroup for the long
vector.  And the loop converts the Nth long scalar (selected from 4
concatenated vectors) to the Nth short scalar (in a single vector).

It's therefore important that the 4-mask/control rgroup and the
single-mask/control rgroup treat the same lanes/scalar iterations
as active and the same lanes/scalar iterations as inactive.

But if I read the code correctly, the patch would generate 5 WHILE_LENs
for this case, since nitems_per_iter==1 for all 5 controls.  And I don't
think the documentation of WHILE_LEN guarantees that that will work
correctly, given that WHILE_LEN isn't a simple MIN operation.

It might be that it works correctly on RVV, given the later
backend-specific processing.  But I'm looking at this as a purely
gimple thing.  If something guarantees that the above works then
I think the WHILE_LEN documentation needs to be updated.

From the current documentation of WHILE_LEN, I think the safe
approach would be to use WHILE_LEN for a single-control rgroup
and then "expand" that to larger

[COMMITTED] Convert users of legacy API to get_legacy_range() function.

2023-04-26 Thread Aldy Hernandez via Gcc-patches

This patch converts the users of the legacy API to a function called
get_legacy_range() which will return the pieces of the soon to be
removed API (min, max, and kind).  This is a temporary measure while
these users are converted.

In upcoming patches I will convert most users, but most of the
middle-end warning uses will remain.  Naive attempts to remove them
showed that a lot of these uses are quite dependant on the anti-range
idiom, and converting them to the new API broke the tests, even when
the conversion was conceptually correct.  Perhaps someone who
understands these passes could take a stab at it.  In the meantime,
the legacy uses can be trivially found by grepping for
get_legacy_range.

gcc/ChangeLog:

* builtins.cc (determine_block_size): Convert use of legacy API to
get_legacy_range.
* gimple-array-bounds.cc (check_out_of_bounds_and_warn): Same.
(array_bounds_checker::check_array_ref): Same.
* gimple-ssa-warn-restrict.cc
(builtin_memref::extend_offset_range): Same.
* ipa-cp.cc (ipcp_store_vr_results): Same.
* ipa-fnsummary.cc (set_switch_stmt_execution_predicate): Same.
* ipa-prop.cc (struct ipa_vr_ggc_hash_traits): Same.
(ipa_write_jump_function): Same.
* pointer-query.cc (get_size_range): Same.
* tree-data-ref.cc (split_constant_offset): Same.
* tree-ssa-strlen.cc (get_range): Same.
(maybe_diag_stxncpy_trunc): Same.
(strlen_pass::get_len_or_size): Same.
(strlen_pass::count_nonzero_bytes_addr): Same.
* tree-vect-patterns.cc (vect_get_range_info): Same.
* value-range.cc (irange::maybe_anti_range): Remove.
(get_legacy_range): New.
(irange::copy_to_legacy):
(ranges_from_anti_range): Use get_legacy_range.
* value-range.h (class irange): Remove maybe_anti_range.
(get_legacy_range): New.
* vr-values.cc (check_for_binary_op_overflow): Convert use of
legacy API to get_legacy_range.
(compare_ranges): Same.
(compare_range_with_value): Same.
(bounds_of_var_in_loop): Same.
(find_case_label_ranges): Same.
(simplify_using_ranges::simplify_switch_using_ranges): Same.
---
 gcc/builtins.cc |   7 +-
 gcc/gimple-array-bounds.cc  |   9 ++-
 gcc/gimple-ssa-warn-restrict.cc |   7 +-
 gcc/ipa-cp.cc   |   7 +-
 gcc/ipa-fnsummary.cc|   7 +-
 gcc/ipa-prop.cc |  16 +++--
 gcc/pointer-query.cc|   7 +-
 gcc/tree-data-ref.cc|   7 +-
 gcc/tree-ssa-strlen.cc  |  29 
 gcc/tree-vect-patterns.cc   |   7 +-
 gcc/value-range.cc  |  89 ---
 gcc/value-range.h   |   3 +-
 gcc/vr-values.cc| 123 +++-
 13 files changed, 199 insertions(+), 119 deletions(-)

diff --git a/gcc/builtins.cc b/gcc/builtins.cc
index 0e06fa5b2e0..bb931242c9c 100644
--- a/gcc/builtins.cc
+++ b/gcc/builtins.cc
@@ -3570,12 +3570,13 @@ determine_block_size (tree len, rtx len_rtx,
   if (TREE_CODE (len) == SSA_NAME)
{
  value_range r;
+ tree tmin, tmax;
  get_global_range_query ()->range_of_expr (r, len);
- range_type = r.kind ();
+ range_type = get_legacy_range (r, tmin, tmax);
  if (range_type != VR_UNDEFINED)
{
- min = wi::to_wide (r.min ());
- max = wi::to_wide (r.max ());
+ min = wi::to_wide (tmin);
+ max = wi::to_wide (tmax);
}
}
   if (range_type == VR_RANGE)
diff --git a/gcc/gimple-array-bounds.cc b/gcc/gimple-array-bounds.cc
index 775f0c13258..e04c8e29d5a 100644
--- a/gcc/gimple-array-bounds.cc
+++ b/gcc/gimple-array-bounds.cc
@@ -266,6 +266,7 @@ check_out_of_bounds_and_warn (location_t location, tree ref,
  bool ignore_off_by_one, bool for_array_bound,
  bool *out_of_bound)
 {
+  tree min, max;
   tree low_bound = array_ref_low_bound (ref);
   tree artype = TREE_TYPE (TREE_OPERAND (ref, 0));
 
@@ -284,7 +285,7 @@ check_out_of_bounds_and_warn (location_t location, tree ref,
 
   if (warned)
 ; /* Do nothing.  */
-  else if (vr->kind () == VR_ANTI_RANGE)
+  else if (get_legacy_range (*vr, min, max) == VR_ANTI_RANGE)
 {
   if (up_bound
  && TREE_CODE (up_sub) == INTEGER_CST
@@ -378,8 +379,10 @@ array_bounds_checker::check_array_ref (location_t 
location, tree ref,
   get_value_range (vr, low_sub_org, stmt);
   if (!vr.undefined_p () && !vr.varying_p ())
{
- low_sub = vr.kind () == VR_RANGE ? vr.max () : vr.min ();
- up_sub = vr.kind () == VR_RANGE ? vr.min () : vr.max ();
+ tree min, max;
+ value_range_kind kind = get_legacy_range (vr, min, max);
+ low_sub = kind == VR_RANGE ? max : min;
+ up_sub = kind == VR_RANGE ? min : max;
}
 }
 
diff

[COMMITTED] Remove irange::constant_p.

2023-04-26 Thread Aldy Hernandez via Gcc-patches

gcc/ChangeLog:

* value-range-pretty-print.cc (vrange_printer::visit): Remove
constant_p use.
* value-range.cc (irange::constant_p): Remove.
(irange::get_nonzero_bits_from_range): Remove constant_p use.
* value-range.h (class irange): Remove constant_p.
(irange::num_pairs): Remove constant_p use.
---
 gcc/value-range-pretty-print.cc | 13 -
 gcc/value-range.cc  | 14 --
 gcc/value-range.h   |  8 ++--
 3 files changed, 6 insertions(+), 29 deletions(-)

diff --git a/gcc/value-range-pretty-print.cc b/gcc/value-range-pretty-print.cc
index d20e2562431..a11c5a621f8 100644
--- a/gcc/value-range-pretty-print.cc
+++ b/gcc/value-range-pretty-print.cc
@@ -63,19 +63,6 @@ vrange_printer::visit (const irange ) const
   pp_string (pp, "VARYING");
   return;
 }
-  // Handle legacy symbolics.
-  if (!r.constant_p ())
-{
-  if (r.kind () == VR_ANTI_RANGE)
-   pp_character (pp, '~');
-  pp_character (pp, '[');
-  dump_generic_node (pp, r.min (), 0, TDF_NONE, false);
-  pp_string (pp, ", ");
-  dump_generic_node (pp, r.max (), 0, TDF_NONE, false);
-  pp_character (pp, ']');
-  print_irange_bitmasks (r);
-  return;
-}
   for (unsigned i = 0; i < r.num_pairs (); ++i)
 {
   pp_character (pp, '[');
diff --git a/gcc/value-range.cc b/gcc/value-range.cc
index ebadea8b917..58ae2c157db 100644
--- a/gcc/value-range.cc
+++ b/gcc/value-range.cc
@@ -1351,16 +1351,6 @@ irange::operator== (const irange ) const
   return nz1 == nz2;
 }
 
-/* Return TRUE if this is a constant range.  */
-
-bool
-irange::constant_p () const
-{
-  return (m_num_ranges > 0
- && TREE_CODE (min ()) == INTEGER_CST
- && TREE_CODE (max ()) == INTEGER_CST);
-}
-
 /* If range is a singleton, place it in RESULT and return TRUE.
Note: A singleton can be any gimple invariant, not just constants.
So, [, ] counts as a singleton.  */
@@ -2835,10 +2825,6 @@ irange::invert ()
 wide_int
 irange::get_nonzero_bits_from_range () const
 {
-  // For legacy symbolics.
-  if (!constant_p ())
-return wi::shwi (-1, TYPE_PRECISION (type ()));
-
   wide_int min = lower_bound ();
   wide_int max = upper_bound ();
   wide_int xorv = min ^ max;
diff --git a/gcc/value-range.h b/gcc/value-range.h
index 1012d007261..2442f8eed70 100644
--- a/gcc/value-range.h
+++ b/gcc/value-range.h
@@ -169,7 +169,6 @@ public:
   // Deprecated legacy public methods.
   tree min () const;   // DEPRECATED
   tree max () const;   // DEPRECATED
-  bool constant_p () const;// DEPRECATED
   bool legacy_verbose_union_ (const class irange *);   // DEPRECATED
   bool legacy_verbose_intersect (const irange *);  // DEPRECATED
 
@@ -692,7 +691,12 @@ inline unsigned
 irange::num_pairs () const
 {
   if (m_kind == VR_ANTI_RANGE)
-return constant_p () ? 2 : 1;
+{
+  bool constant_p = (TREE_CODE (min ()) == INTEGER_CST
+&& TREE_CODE (max ()) == INTEGER_CST);
+  gcc_checking_assert (constant_p);
+  return 2;
+}
   else
 return m_num_ranges;
 }
-- 
2.40.0

1 2 >

1 - 100 of 115 matches

Mail list logo