Re: [PATCH v3 04/11] riscv: thead: Add support for the XTheadBs ISA extension

2023-02-28 Thread Christoph Müllner
On Wed, Mar 1, 2023 at 1:19 AM Hans-Peter Nilsson  wrote:
>
>
>
> On Tue, 28 Feb 2023, Christoph Müllner wrote:
>
> > On Sun, Feb 26, 2023 at 12:42 AM Hans-Peter Nilsson  
> > wrote:
> > >
> > > On Fri, 24 Feb 2023, Christoph Muellner wrote:
> > > > diff --git a/gcc/config/riscv/thead.md b/gcc/config/riscv/thead.md
> > > > index 158e9124c3a..2c684885850 100644
> > > > --- a/gcc/config/riscv/thead.md
> > > > +++ b/gcc/config/riscv/thead.md
> > > > @@ -29,3 +29,14 @@ (define_insn "*th_addsl"
> > > >"th.addsl\t%0,%3,%1,%2"
> > > >[(set_attr "type" "bitmanip")
> > > > (set_attr "mode" "")])
> > > > +
> > > > +;; XTheadBs
> > > > +
> > > > +(define_insn "*th_tst"
> > > > +  [(set (match_operand:X 0 "register_operand" "=r")
> > > > + (zero_extract:X (match_operand:X 1 "register_operand" "r")
> > > > + (const_int 1)
> > > > + (match_operand 2 "immediate_operand" "i")))]
> > >
> > > (Here and same elsewhere.)
> > >
> > > You're unlikely to get other constant operands in that pattern,
> > > but FWIW, the actual matching pair for just CONST_INT is
> > > "const_int_operand" for the predicate and "n" for the
> > > constraint.  Using the right predicate and constraint will also
> > > help the generated part of recog be a few nanoseconds faster. ;)
> >
> > Thank you for that comment!
> > I think what you mean would look like this:
> >
> > (define_insn "*th_tst"
> >   [(set (match_operand:X 0 "register_operand" "=r")
> > (zero_extract:X (match_operand:X 1 "register_operand" "r")
> > (match_operand 3 "const_int_operand" "n")
> > (match_operand 2 "immediate_operand" "i")))]
>
> No; misunderstanding.  Keep the (const_int 1) but replace
> (match_operand 2 "immediate_operand" "i") with
> (match_operand 2 "const_int_operand" "n")

Ah, yes, this makes sense!

Thanks!


>
> brgds, H-P


[PATCH] tree-optimization/108950 - widen-sum reduction ICE

2023-02-28 Thread Richard Biener via Gcc-patches
When we end up with a widen-sum with an invariant smaller operand
the reduction code uses a wrong vector type for it, causing
IL checking ICEs.  The following fixes that and the inefficiency
of using a widen-sum with a widenend invariant operand as well
by actually performing the check the following comment wants.

Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.

PR tree-optimization/108950
* tree-vect-patterns.cc (vect_recog_widen_sum_pattern):
Check oprnd0 is defined in the loop.
* tree-vect-loop.cc (vectorizable_reduction): Record all
operands vector types, compute that of invariants and
properly update their SLP nodes.

* gcc.dg/vect/pr108950.c: New testcase.
---
 gcc/testsuite/gcc.dg/vect/pr108950.c | 13 +
 gcc/tree-vect-loop.cc| 17 +++--
 gcc/tree-vect-patterns.cc|  4 +++-
 3 files changed, 27 insertions(+), 7 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr108950.c

diff --git a/gcc/testsuite/gcc.dg/vect/pr108950.c 
b/gcc/testsuite/gcc.dg/vect/pr108950.c
new file mode 100644
index 000..2163866dfa7
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr108950.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+
+int m;
+short int n;
+
+__attribute__ ((simd)) int
+foo (void)
+{
+  m += n;
+  m += n;
+}
+
+/* { dg-final { scan-tree-dump-not "widen_sum" "vect" } } */
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index ab7af0ea3b8..b17e8745d3f 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -6790,6 +6790,7 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
stmt_vector_for_cost *cost_vec)
 {
   tree vectype_in = NULL_TREE;
+  tree vectype_op[3] = { NULL_TREE, NULL_TREE, NULL_TREE };
   class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
   enum vect_def_type cond_reduc_dt = vect_unknown_def_type;
   stmt_vec_info cond_stmt_vinfo = NULL;
@@ -6799,7 +6800,6 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
   bool nested_cycle = false;
   bool double_reduc = false;
   int vec_num;
-  tree tem;
   tree cr_index_scalar_type = NULL_TREE, cr_index_vector_type = NULL_TREE;
   tree cond_reduc_val = NULL_TREE;
 
@@ -7037,7 +7037,7 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
   enum vect_def_type dt;
   if (!vect_is_simple_use (loop_vinfo, stmt_info, slp_for_stmt_info,
   i + opno_adjust, [i], _op[i], ,
-  , _stmt_info))
+  _op[i], _stmt_info))
{
  if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -7052,15 +7052,20 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
   if (VECTORIZABLE_CYCLE_DEF (dt))
return false;
 
+  if (!vectype_op[i])
+   vectype_op[i]
+ = get_vectype_for_scalar_type (loop_vinfo,
+TREE_TYPE (op.ops[i]), slp_op[i]);
+
   /* To properly compute ncopies we are interested in the widest
 non-reduction input type in case we're looking at a widening
 accumulation that we later handle in vect_transform_reduction.  */
   if (lane_reduc_code_p
- && tem
+ && vectype_op[i]
  && (!vectype_in
  || (GET_MODE_SIZE (SCALAR_TYPE_MODE (TREE_TYPE (vectype_in)))
- < GET_MODE_SIZE (SCALAR_TYPE_MODE (TREE_TYPE (tem))
-   vectype_in = tem;
+ < GET_MODE_SIZE (SCALAR_TYPE_MODE (TREE_TYPE 
(vectype_op[i]))
+   vectype_in = vectype_op[i];
 
   if (op.code == COND_EXPR)
{
@@ -7581,7 +7586,7 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
   && !lane_reduc_code_p
   && reduction_type != FOLD_LEFT_REDUCTION))
 for (i = 0; i < (int) op.num_ops; i++)
-  if (!vect_maybe_update_slp_op_vectype (slp_op[i], vectype_in))
+  if (!vect_maybe_update_slp_op_vectype (slp_op[i], vectype_op[i]))
{
  if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc
index cefe331620f..dd585e59bf7 100644
--- a/gcc/tree-vect-patterns.cc
+++ b/gcc/tree-vect-patterns.cc
@@ -1821,7 +1821,9 @@ vect_recog_widen_sum_pattern (vec_info *vinfo,
  of the above pattern.  */
 
   if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR,
-  , ))
+  , )
+  || TREE_CODE (oprnd0) != SSA_NAME
+  || !vinfo->lookup_def (oprnd0))
 return NULL;
 
   type = TREE_TYPE (gimple_get_lhs (last_stmt));
-- 
2.35.3


[PATCH, gfortran] Escalate failure when Hollerith constant to real conversion fails [PR103628]

2023-02-28 Thread HAO CHEN GUI via Gcc-patches
Hi,
  The patch escalates the failure when Hollerith constant to real conversion
fails in native_interpret_expr. It finally reports an "Unclassifiable
statement" error.

  The patch of pr95450 added a verification for decoding/encoding checking
in native_interpret_expr. native_interpret_expr may fail on real type
conversion and returns a NULL tree then. But upper layer calls don't handle
the failure so that an ICE is reported when the verification fails.

  IBM long double is an example. It doesn't have a unique memory presentation
for some real values. So it may not pass the verification. The new test
case shows the problem.

  Bootstrapped and tested on powerpc64-linux BE and LE with no regressions.

Thanks
Gui Haochen

ChangeLog
2023-03-01  Haochen Gui 

gcc/
PR target/103628
* fortran/target-memory.cc (gfc_interpret_float): Return FAIL when
native_interpret_expr gets a NULL tree.
* fortran/arith.cc (gfc_hollerith2real): Return NULL when
gfc_interpret_float fails.

gcc/testsuite/
PR target/103628
* gfortran.dg/pr103628.f90: New.


patch.diff
diff --git a/gcc/fortran/arith.cc b/gcc/fortran/arith.cc
index c0d12cfad9d..d3d38c7eb6a 100644
--- a/gcc/fortran/arith.cc
+++ b/gcc/fortran/arith.cc
@@ -2752,10 +2752,12 @@ gfc_hollerith2real (gfc_expr *src, int kind)
   result = gfc_get_constant_expr (BT_REAL, kind, >where);

   hollerith2representation (result, src);
-  gfc_interpret_float (kind, (unsigned char *) result->representation.string,
-  result->representation.length, result->value.real);
-
-  return result;
+  if (gfc_interpret_float (kind,
+  (unsigned char *) result->representation.string,
+  result->representation.length, result->value.real))
+return result;
+  else
+return NULL;
 }

 /* Convert character to real.  The constant will be padded or truncated.  */
diff --git a/gcc/fortran/target-memory.cc b/gcc/fortran/target-memory.cc
index 7ce7d736629..04afc357e3c 100644
--- a/gcc/fortran/target-memory.cc
+++ b/gcc/fortran/target-memory.cc
@@ -417,10 +417,13 @@ gfc_interpret_float (int kind, unsigned char *buffer, 
size_t buffer_size,
 {
   gfc_set_model_kind (kind);
   mpfr_init (real);
-  gfc_conv_tree_to_mpfr (real,
-native_interpret_expr (gfc_get_real_type (kind),
-   buffer, buffer_size));

+  tree source  = native_interpret_expr (gfc_get_real_type (kind), buffer,
+   buffer_size);
+  if (!source)
+return 0;
+
+  gfc_conv_tree_to_mpfr (real, source);
   return size_float (kind);
 }

diff --git a/gcc/testsuite/gfortran.dg/pr103628.f90 
b/gcc/testsuite/gfortran.dg/pr103628.f90
new file mode 100644
index 000..e49aefc18fd
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/pr103628.f90
@@ -0,0 +1,14 @@
+! { dg-do compile { target powerpc*-*-* } }
+! { dg-options "-O2 -mabi=ibmlongdouble" }
+
+! Test to ensure that it reports an "Unclassifiable statement" error
+! instead of throwing an ICE when the memory represent of the HOLLERITH
+! string is not unique with ibm long double encoding.
+
+program main
+  integer, parameter :: k = 16
+  real(kind = k):: b = 4h1234
+end program main
+
+! { dg-warning "Conversion from HOLLERITH" "warning" { target powerpc*-*-* } 
10 }
+! { dg-error "Unclassifiable statement" "error" { target powerpc*-*-* } 10 }


[PATCH] rs6000/test: Adjust scalar-test-data-class-1[45].c with int128

2023-02-28 Thread Kewen.Lin via Gcc-patches
Hi,

Test cases scalar-test-data-class-1[45].c adopts type __int128
which requires to check int128 effective target, otherwise the
testing on them will fail at -m32.  This patch is to add int128
effective target requirement.

Tested on powerpc64-linux-gnu P7/P8/P9 and
powerpc64le-linux-gnu P9/P10.

I'm going to push this soon if no objections.

BR,
Kewen
-

gcc/testsuite/ChangeLog:

* gcc.target/powerpc/bfp/scalar-test-data-class-14.c: Adjust with
int128 effective target requirement.
* gcc.target/powerpc/bfp/scalar-test-data-class-15.c: Likewise.
---
 gcc/testsuite/gcc.target/powerpc/bfp/scalar-test-data-class-14.c | 1 +
 gcc/testsuite/gcc.target/powerpc/bfp/scalar-test-data-class-15.c | 1 +
 2 files changed, 2 insertions(+)

diff --git a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-test-data-class-14.c 
b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-test-data-class-14.c
index 84c610de5de..5c3e07c8108 100644
--- a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-test-data-class-14.c
+++ b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-test-data-class-14.c
@@ -1,4 +1,5 @@
 /* { dg-do run { target { powerpc*-*-* } } } */
+/* { dg-require-effective-target int128 } */
 /* { dg-require-effective-target p9vector_hw } */
 /* { dg-options "-mdejagnu-cpu=power9" } */

diff --git a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-test-data-class-15.c 
b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-test-data-class-15.c
index a8b5a34ba8a..2be5d1b18b6 100644
--- a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-test-data-class-15.c
+++ b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-test-data-class-15.c
@@ -1,4 +1,5 @@
 /* { dg-do run { target { powerpc*-*-* } } } */
+/* { dg-require-effective-target int128 } */
 /* { dg-require-effective-target p9vector_hw } */
 /* { dg-options "-mdejagnu-cpu=power9" } */

--
2.39.2


[PATCH] rs6000/test: Adjust pr101384-2.c for P9 [PR108813]

2023-02-28 Thread Kewen.Lin via Gcc-patches
Hi,

Compiled with cpu type Power9 or later, GCC generates
xxspltib rather than vspltis*, so adjust the test
case scanning content accordingly.

Tested on powerpc64-linux-gnu P7/P8/P9 and
powerpc64le-linux-gnu P9/P10.

I'm going to push this soon if no objections.

BR,
Kewen
-
PR testsuite/108813

gcc/testsuite/ChangeLog:

* gcc.target/powerpc/pr101384-2.c: Adjust with xxspltib.
---
 gcc/testsuite/gcc.target/powerpc/pr101384-2.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.target/powerpc/pr101384-2.c 
b/gcc/testsuite/gcc.target/powerpc/pr101384-2.c
index f3957086b04..c14891b5ab4 100644
--- a/gcc/testsuite/gcc.target/powerpc/pr101384-2.c
+++ b/gcc/testsuite/gcc.target/powerpc/pr101384-2.c
@@ -2,7 +2,7 @@
 /* { dg-do compile { target be } } */
 /* { dg-options "-O2 -maltivec" } */
 /* { dg-require-effective-target powerpc_altivec_ok } */
-/* { dg-final { scan-assembler-times {\mvspltis[whb] [^\n\r]*,-1\M} 9 } } */
+/* { dg-final { scan-assembler-times {\mvspltis[whb] [^\n\r]*,-1\M|\mxxspltib 
[^\n\r]*,255\M} 9 } } */
 /* { dg-final { scan-assembler-times {\mvslw\M} 3 } } */
 /* { dg-final { scan-assembler-times {\mvslh\M} 3 } } */
 /* { dg-final { scan-assembler-times {\mvslb\M} 3 } } */
--
2.39.2





[PATCH] rs6000/test: Adjust fold-vec-extract-double.p9.c for BE [PR108810]

2023-02-28 Thread Kewen.Lin via Gcc-patches
Hi,

On BE, the extracted index for the leftmost element is 0
rather than 1, adjust the test case accordingly.

Tested on powerpc64-linux-gnu P7/P8/P9 and
powerpc64le-linux-gnu P9/P10.

I'm going to push this soon if no objections.

BR,
Kewen
-
PR testsuite/108810

gcc/testsuite/ChangeLog:

* gcc.target/powerpc/fold-vec-extract-double.p9.c (testd_cst): Adjust
the extracted index for BE.
---
 .../gcc.target/powerpc/fold-vec-extract-double.p9.c   | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-extract-double.p9.c 
b/gcc/testsuite/gcc.target/powerpc/fold-vec-extract-double.p9.c
index 6c515035d1a..100f680fd02 100644
--- a/gcc/testsuite/gcc.target/powerpc/fold-vec-extract-double.p9.c
+++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-extract-double.p9.c
@@ -18,9 +18,15 @@ testd_var (vector double vd2, signed int si)
   return vec_extract (vd2, si);
 }

+#ifdef __BIG_ENDIAN__
+#define LEFTMOST_ELEMENT_INDEX 0
+#else
+#define LEFTMOST_ELEMENT_INDEX 1
+#endif
+
 double
 testd_cst (vector double vd2)
 {
-  return vec_extract (vd2, 1);
+  return vec_extract (vd2, LEFTMOST_ELEMENT_INDEX);
 }

--
2.39.2


[PATCH] rs6000/test: Adjust scalar-test-neg-8.c with lp64 [PR108730]

2023-02-28 Thread Kewen.Lin via Gcc-patches
Hi,

The built-in function scalar_test_neg_qp is under stanza
ieee128-hw, that is TARGET_FLOAT128_HW.  Since we don't
have float128 hardware support on 32-bit as follows:

if (TARGET_FLOAT128_HW && !TARGET_64BIT)
  {
if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0)
  error ("%qs requires %qs", "%<-mfloat128-hardware%>", "-m64");
rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
  }

So adjust the case with lp64 effective target accordingly.

Tested on powerpc64-linux-gnu P7/P8/P9 and
powerpc64le-linux-gnu P9/P10.

I'm going to push this soon if no objections.

BR,
Kewen
-
PR testsuite/108730

gcc/testsuite/ChangeLog:

* gcc.target/powerpc/bfp/scalar-test-neg-8.c: Adjust with lp64
effective target requirement.
---
 gcc/testsuite/gcc.target/powerpc/bfp/scalar-test-neg-8.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-test-neg-8.c 
b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-test-neg-8.c
index 02dfa348b81..2ef512cd8d0 100644
--- a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-test-neg-8.c
+++ b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-test-neg-8.c
@@ -1,4 +1,5 @@
 /* { dg-do run { target { powerpc*-*-* } } } */
+/* { dg-require-effective-target lp64 } */
 /* { dg-require-effective-target p9vector_hw } */
 /* { dg-options "-mdejagnu-cpu=power9" } */

--
2.39.2


[PATCH] rs6000/test: Adjust two bfp test cases with has_arch_ppc64 [PR108729]

2023-02-28 Thread Kewen.Lin via Gcc-patches
Hi,

Two test cases scalar-test-data-class-12.c and vec-test-data-class-9.c
fail on Power9 BE testing at -m32, they adopts a built-in function
scalar_insert_exp which requires powerpc64 support.  This patch
is to make them to check has_arch_ppc64 effective target requirement.

Tested on powerpc64-linux-gnu P7/P8/P9 and
powerpc64le-linux-gnu P9/P10.

I'm going to push this soon if no objections.

BR,
Kewen
-
gcc/testsuite/ChangeLog:

* gcc.target/powerpc/bfp/scalar-test-data-class-12.c: Adjust with
has_arch_ppc64 effective target.
* gcc.target/powerpc/bfp/vec-test-data-class-9.c: Likewise.
---
 gcc/testsuite/gcc.target/powerpc/bfp/scalar-test-data-class-12.c | 1 +
 gcc/testsuite/gcc.target/powerpc/bfp/vec-test-data-class-9.c | 1 +
 2 files changed, 2 insertions(+)

diff --git a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-test-data-class-12.c 
b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-test-data-class-12.c
index f7f41419810..08f2720e5b4 100644
--- a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-test-data-class-12.c
+++ b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-test-data-class-12.c
@@ -1,4 +1,5 @@
 /* { dg-do run { target { powerpc*-*-* } } } */
+/* { dg-require-effective-target has_arch_ppc64 } */
 /* { dg-require-effective-target p9vector_hw } */
 /* { dg-options "-mdejagnu-cpu=power9" } */

diff --git a/gcc/testsuite/gcc.target/powerpc/bfp/vec-test-data-class-9.c 
b/gcc/testsuite/gcc.target/powerpc/bfp/vec-test-data-class-9.c
index aeb64fbf122..c803617b5d3 100644
--- a/gcc/testsuite/gcc.target/powerpc/bfp/vec-test-data-class-9.c
+++ b/gcc/testsuite/gcc.target/powerpc/bfp/vec-test-data-class-9.c
@@ -1,4 +1,5 @@
 /* { dg-do run { target { powerpc*-*-* } } } */
+/* { dg-require-effective-target has_arch_ppc64 } */
 /* { dg-require-effective-target p9vector_hw } */
 /* { dg-options "-mdejagnu-cpu=power9" } */

--
2.39.2


Re: [PATCH 1/2] testsuite: Fix analyzer errors for newlib-errno

2023-02-28 Thread Hans-Peter Nilsson via Gcc-patches
> From: David Malcolm 
> Date: Tue, 28 Feb 2023 21:25:34 -0500

> On Wed, 2023-03-01 at 01:59 +0100, Hans-Peter Nilsson wrote:
> > > From: David Malcolm 
> > > Date: Tue, 28 Feb 2023 14:12:47 -0500
> > 
> > > On Tue, 2023-02-28 at 19:47 +0100, Hans-Peter Nilsson wrote:
> > > > Ok to commit?
> > > > -- >8 --
> > > > Investigating analyzer tesstsuite errors for cris-elf.  The same
> > > > are
> > > > seen for pru-elf according to posts to gcc-testresults@.
> > > > 
> > > > For glibc, errno is #defined as:
> > > >  extern int *__errno_location (void) __THROW __attribute_const__;
> > > >  # define errno (*__errno_location ())
> > > > while for newlib in its default configuration, it's:
> > > >  #define errno (*__errno())
> > > >  extern int *__errno (void);
> > > 
> > > We're already handling ___errno (three underscores) for Solaris as
> > > of
> > > 7c9717fcb5cf94ce1e7ef5c903058adf9980ff28; does it fix the issue if
> > > you 
> > > add __errno (two underscores) to analyzer/kf.cc's 
> > > register_known_functions in an analogous way to that commit?  (i.e.
> > > wiring it up to kf_errno_location, "teaching" the analyzer that
> > > that
> > > function returns a pointer to the "errno region")
> > 
> > But...there's already "support" for two underscores since
> > the commit you quote, so I guess not.  
> 
> Is there?  That commit adds support for:
>   __error
> but not for:
>   __errno
> unless there's something I'm missing.

No, it's me.  Apparently I can't spot the difference between
error and errno.  Sorry.

It's nice to know that the const-attribute (to signal that
the pointer points to a constant location) works
automatically, i.e. the Solaris and glibc definitions should
not (no longer?) be needed - unless there's other
errno-analyzing magic elsewhere too.  Either way, since
there's specific support for this errno kind of thing, that
certainly works for me.  The patch gets smaller too. :)

So, how's this instead, pending full testing (only
analyzer.exp spot-tested)?

-- >8 --
analyzer: Support errno for newlib

Without this definition, the errno definition for newlib
isn't recognized as such, and these tests fail for newlib
targets:

FAIL: gcc.dg/analyzer/call-summaries-errno.c  (test for warnings, line 16)
FAIL: gcc.dg/analyzer/call-summaries-errno.c (test for excess errors)
FAIL: gcc.dg/analyzer/errno-1.c  (test for warnings, line 20)
FAIL: gcc.dg/analyzer/errno-1.c (test for excess errors)
FAIL: gcc.dg/analyzer/isatty-1.c  (test for warnings, line 31)
FAIL: gcc.dg/analyzer/isatty-1.c  (test for warnings, line 35)
FAIL: gcc.dg/analyzer/isatty-1.c  (test for warnings, line 46)
FAIL: gcc.dg/analyzer/isatty-1.c  (test for warnings, line 56)
FAIL: gcc.dg/analyzer/isatty-1.c (test for excess errors)

gcc/analyzer:
* kf.cc (register_known_functions): Add __errno function for newlib.
---
 gcc/analyzer/kf.cc | 1 +
 1 file changed, 1 insertion(+)

diff --git a/gcc/analyzer/kf.cc b/gcc/analyzer/kf.cc
index 3a91b6bd6ebb..bfb9148f9ae7 100644
--- a/gcc/analyzer/kf.cc
+++ b/gcc/analyzer/kf.cc
@@ -1036,6 +1036,7 @@ register_known_functions (known_function_manager )
Add these as synonyms for "__errno_location".  */
 kfm.add ("___errno", make_unique ());
 kfm.add ("__error", make_unique ());
+kfm.add ("__errno", make_unique ());
   }
 
   /* Language-specific support functions.  */
-- 
2.30.2


Re: [PATCH 1/2] testsuite: Fix analyzer errors for newlib-errno

2023-02-28 Thread David Malcolm via Gcc-patches
On Wed, 2023-03-01 at 01:59 +0100, Hans-Peter Nilsson wrote:
> > From: David Malcolm 
> > Date: Tue, 28 Feb 2023 14:12:47 -0500
> 
> > On Tue, 2023-02-28 at 19:47 +0100, Hans-Peter Nilsson wrote:
> > > Ok to commit?
> > > -- >8 --
> > > Investigating analyzer tesstsuite errors for cris-elf.  The same
> > > are
> > > seen for pru-elf according to posts to gcc-testresults@.
> > > 
> > > For glibc, errno is #defined as:
> > >  extern int *__errno_location (void) __THROW __attribute_const__;
> > >  # define errno (*__errno_location ())
> > > while for newlib in its default configuration, it's:
> > >  #define errno (*__errno())
> > >  extern int *__errno (void);
> > 
> > We're already handling ___errno (three underscores) for Solaris as
> > of
> > 7c9717fcb5cf94ce1e7ef5c903058adf9980ff28; does it fix the issue if
> > you 
> > add __errno (two underscores) to analyzer/kf.cc's 
> > register_known_functions in an analogous way to that commit?  (i.e.
> > wiring it up to kf_errno_location, "teaching" the analyzer that
> > that
> > function returns a pointer to the "errno region")
> 
> But...there's already "support" for two underscores since
> the commit you quote, so I guess not.  

Is there?  That commit adds support for:
  __error
but not for:
  __errno
unless there's something I'm missing.

> I strongly believe
> "the critical difference is that __attribute__
> ((__const__))" because I indeed proved it by adding it to
> the newlib definition.
> 
> I doubt these tests actually *pass* for OS X, because that
> one looks identical to the newlib definition as quoted in
> that commit (compare to the newlib one I pasted in the quote
> above).
> 
> It looks like that definition was added for good measure
> along with the Solaris definition (that has the
> attribute-const) but never tested.  Sorry, I don't have an
> OS X to test it myself and according to a popular search
> engine(...) nobody has posted gcc-testresults@ for anything
> "apple" since gcc-4.7 era. :(

The comments by Rainer in
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=107807
suggests that it did fix stuff on Solaris and OS X.

Sorry if I'm missing something here, I'm not very familiar with newlib.

Dave

> 
> brgds, H-P
> 
> > 
> > Dave
> > 
> > > 
> > > The critical difference is that __attribute__ ((__const__)),
> > > where glibc says that the caller will see the same value on
> > > all calls (from the same context; read: same thread).  I'm
> > > not sure the absence of __attribute__ ((__const__)) for the
> > > newlib definition is deliberate, but I guess it can.
> > > Either way, without the "const" attribute, it can't be known
> > > that the same location will be returned the next time, so
> > > analyzer-tests that depend the value being known it should
> > > see UNKNOWN rather than TRUE, that's why the deliberate
> > > check for UNKNOWN rather than xfailing the test.
> > > 
> > > For isatty-1.c, it's the same problem, but here it'd be
> > > unweildy with the extra dg-lines, so better just skip it for
> > > newlib targets.
> > > 
> > > testsuite:
> > >     * gcc.dg/analyzer/call-summaries-errno.c: Expect UNKNOWN
> > >     for newlib after having set errno.
> > >     * gcc.dg/analyzer/errno-1.c: Ditto.
> > >     * gcc.dg/analyzer/isatty-1.c: Skip for newlib targets.
> > > ---
> > >  gcc/testsuite/gcc.dg/analyzer/call-summaries-errno.c | 3 ++-
> > >  gcc/testsuite/gcc.dg/analyzer/errno-1.c  | 3 ++-
> > >  gcc/testsuite/gcc.dg/analyzer/isatty-1.c | 2 +-
> > >  3 files changed, 5 insertions(+), 3 deletions(-)
> > > 
> > > diff --git a/gcc/testsuite/gcc.dg/analyzer/call-summaries-errno.c
> > > b/gcc/testsuite/gcc.dg/analyzer/call-summaries-errno.c
> > > index e4333b30bb77..cf4d9f7141e4 100644
> > > --- a/gcc/testsuite/gcc.dg/analyzer/call-summaries-errno.c
> > > +++ b/gcc/testsuite/gcc.dg/analyzer/call-summaries-errno.c
> > > @@ -13,5 +13,6 @@ void test_sets_errno (int y)
> > >    sets_errno (y);
> > >    sets_errno (y);
> > >  
> > > -  __analyzer_eval (errno == y); /* { dg-warning "TRUE" } */  
> > > +  __analyzer_eval (errno == y); /* { dg-warning "TRUE" "errno is
> > > at
> > > a constant location" { target { ! newlib } } } */
> > > +  /* { dg-warning "UNKNOWN" "errno is not known to be at a
> > > constant
> > > location" { target { newlib } } .-1 } */
> > >  }
> > > diff --git a/gcc/testsuite/gcc.dg/analyzer/errno-1.c
> > > b/gcc/testsuite/gcc.dg/analyzer/errno-1.c
> > > index 6b9d28c10799..af0cc3d52a36 100644
> > > --- a/gcc/testsuite/gcc.dg/analyzer/errno-1.c
> > > +++ b/gcc/testsuite/gcc.dg/analyzer/errno-1.c
> > > @@ -17,7 +17,8 @@ void test_storing_to_errno (int val)
> > >  {
> > >    __analyzer_eval (errno == val); /* { dg-warning "UNKNOWN" } */
> > >    errno = val;
> > > -  __analyzer_eval (errno == val); /* { dg-warning "TRUE" } */
> > > +  __analyzer_eval (errno == val); /* { dg-warning "TRUE" "errno
> > > is
> > > at a constant location" { target { ! newlib } } } */
> > > +  /* { dg-warning 

[PATCH] RISC-V: Optimize the MASK opt generation

2023-02-28 Thread Feng Wang
The Mask flag in the single TargetVariable is not enough due to more
and more extensions were added.So I optimize the defination of Mask
flag, please refer to the below case:
There are some new MASK flags for 'v' extension(ZVL32B,ZVL64B,...,ZVL65536B),
but these MASK flags can't store into x_target_flags,because the total number
of MASK flags exceed 32. In this patch we can write it like this in this scence.

TargetVariable
int riscv_zvl_flags

Mask(ZVL32B) in TargetVariable(riscv_zvl_flags)

The corresponding MASK and TARGET will be automatically generated.

gcc/ChangeLog:

* config/riscv/riscv-opts.h   Delete below definations
(MASK_ZICSR): Delete;
(MASK_ZIFENCEI): Delete;
(TARGET_ZICSR): Delete;
(TARGET_ZIFENCEI): Delete;
(MASK_ZAWRS): Delete;
(TARGET_ZAWRS): Delete;
(MASK_ZBA): Delete;
(MASK_ZBB): Delete;
(MASK_ZBC): Delete;
(MASK_ZBS): Delete;
(TARGET_ZBA): Delete;
(TARGET_ZBB): Delete;
(TARGET_ZBC): Delete;
(TARGET_ZBS): Delete;
(MASK_ZFINX): Delete;
(MASK_ZDINX): Delete;
(MASK_ZHINX): Delete;
(MASK_ZHINXMIN): Delete;
(TARGET_ZFINX): Delete;
(TARGET_ZDINX): Delete;
(TARGET_ZHINX): Delete;
(TARGET_ZHINXMIN): Delete;
(MASK_ZBKB): Delete;
(MASK_ZBKC): Delete;
(MASK_ZBKX): Delete;
(MASK_ZKNE): Delete;
(MASK_ZKND): Delete;
(MASK_ZKNH): Delete;
(MASK_ZKR): Delete;
(MASK_ZKSED): Delete;
(MASK_ZKSH): Delete;
(MASK_ZKT): Delete;
(TARGET_ZBKB): Delete;
(TARGET_ZBKC): Delete;
(TARGET_ZBKX): Delete;
(TARGET_ZKNE): Delete;
(TARGET_ZKND): Delete;
(TARGET_ZKNH): Delete;
(TARGET_ZKR): Delete;
(TARGET_ZKSED): Delete;
(TARGET_ZKSH): Delete;
(TARGET_ZKT): Delete;
(MASK_VECTOR_ELEN_32): Delete;
(MASK_VECTOR_ELEN_64): Delete;
(MASK_VECTOR_ELEN_FP_32): Delete;
(MASK_VECTOR_ELEN_FP_64): Delete;
(TARGET_VECTOR_ELEN_32): Delete;
(TARGET_VECTOR_ELEN_64): Delete;
(TARGET_VECTOR_ELEN_FP_32): Delete;
(TARGET_VECTOR_ELEN_FP_64): Delete;
(MASK_ZVL32B): Delete;
(MASK_ZVL64B): Delete;
(MASK_ZVL128B): Delete;
(MASK_ZVL256B): Delete;
(MASK_ZVL512B): Delete;
(MASK_ZVL1024B): Delete;
(MASK_ZVL2048B): Delete;
(MASK_ZVL4096B): Delete;
(MASK_ZVL8192B): Delete;
(MASK_ZVL16384B): Delete;
(MASK_ZVL32768B): Delete;
(MASK_ZVL65536B): Delete;
(TARGET_ZVL32B): Delete;
(TARGET_ZVL64B): Delete;
(TARGET_ZVL128B): Delete;
(TARGET_ZVL256B): Delete;
(TARGET_ZVL512B): Delete;
(TARGET_ZVL1024B): Delete;
(TARGET_ZVL2048B): Delete;
(TARGET_ZVL4096B): Delete;
(TARGET_ZVL8192B): Delete;
(TARGET_ZVL16384B): Delete;
(TARGET_ZVL32768B): Delete;
(TARGET_ZVL65536B): Delete;
(MASK_ZICBOZ): Delete;
(MASK_ZICBOM): Delete;
(MASK_ZICBOP): Delete;
(TARGET_ZICBOZ): Delete;
(TARGET_ZICBOM): Delete;
(TARGET_ZICBOP): Delete;
(MASK_ZFHMIN): Delete;
(MASK_ZFH): Delete;
(TARGET_ZFHMIN): Delete;
(TARGET_ZFH): Delete;
(MASK_ZMMUL): Delete;
(TARGET_ZMMUL): Delete;
(MASK_SVINVAL): Delete;
(MASK_SVNAPOT): Delete;
(TARGET_SVINVAL): Delete;
(TARGET_SVNAPOT): Delete;
* config/riscv/riscv.opt: Add new Mask defination.
* opt-functions.awk:  Add new function to find the index
  of target variable from extra_target_vars.
* opt-read.awk:   Add new function to store the Mask flags.
* opth-gen.awk:   Add new function to output the defination of
  Mask Macro and Target Macro.
---
 gcc/config/riscv/riscv-opts.h | 115 --
 gcc/config/riscv/riscv.opt|  90 ++
 gcc/opt-functions.awk |  11 
 gcc/opt-read.awk  |  16 -
 gcc/opth-gen.awk  |  22 +++
 5 files changed, 138 insertions(+), 116 deletions(-)

diff --git a/gcc/config/riscv/riscv-opts.h b/gcc/config/riscv/riscv-opts.h
index 25fd85b09b1..7cf28838cb5 100644
--- a/gcc/config/riscv/riscv-opts.h
+++ b/gcc/config/riscv/riscv-opts.h
@@ -66,121 +66,6 @@ enum stack_protector_guard {
   SSP_TLS, /* per-thread canary in TLS block */
   SSP_GLOBAL   /* global canary */
 };
-
-#define MASK_ZICSR(1 << 0)
-#define MASK_ZIFENCEI (1 << 1)
-
-#define TARGET_ZICSR((riscv_zi_subext & MASK_ZICSR) != 0)
-#define TARGET_ZIFENCEI ((riscv_zi_subext & MASK_ZIFENCEI) != 0)
-
-#define MASK_ZAWRS   (1 << 0)
-#define TARGET_ZAWRS ((riscv_za_subext & MASK_ZAWRS) != 0)
-
-#define MASK_ZBA  (1 << 

Re: [PATCH 1/2] testsuite: Fix analyzer errors for newlib-errno

2023-02-28 Thread Hans-Peter Nilsson via Gcc-patches
> From: David Malcolm 
> Date: Tue, 28 Feb 2023 14:12:47 -0500

> On Tue, 2023-02-28 at 19:47 +0100, Hans-Peter Nilsson wrote:
> > Ok to commit?
> > -- >8 --
> > Investigating analyzer tesstsuite errors for cris-elf.  The same are
> > seen for pru-elf according to posts to gcc-testresults@.
> > 
> > For glibc, errno is #defined as:
> >  extern int *__errno_location (void) __THROW __attribute_const__;
> >  # define errno (*__errno_location ())
> > while for newlib in its default configuration, it's:
> >  #define errno (*__errno())
> >  extern int *__errno (void);
> 
> We're already handling ___errno (three underscores) for Solaris as of
> 7c9717fcb5cf94ce1e7ef5c903058adf9980ff28; does it fix the issue if you 
> add __errno (two underscores) to analyzer/kf.cc's 
> register_known_functions in an analogous way to that commit?  (i.e.
> wiring it up to kf_errno_location, "teaching" the analyzer that that
> function returns a pointer to the "errno region")

But...there's already "support" for two underscores since
the commit you quote, so I guess not.  I strongly believe
"the critical difference is that __attribute__
((__const__))" because I indeed proved it by adding it to
the newlib definition.

I doubt these tests actually *pass* for OS X, because that
one looks identical to the newlib definition as quoted in
that commit (compare to the newlib one I pasted in the quote
above).

It looks like that definition was added for good measure
along with the Solaris definition (that has the
attribute-const) but never tested.  Sorry, I don't have an
OS X to test it myself and according to a popular search
engine(...) nobody has posted gcc-testresults@ for anything
"apple" since gcc-4.7 era. :(

brgds, H-P

> 
> Dave
> 
> > 
> > The critical difference is that __attribute__ ((__const__)),
> > where glibc says that the caller will see the same value on
> > all calls (from the same context; read: same thread).  I'm
> > not sure the absence of __attribute__ ((__const__)) for the
> > newlib definition is deliberate, but I guess it can.
> > Either way, without the "const" attribute, it can't be known
> > that the same location will be returned the next time, so
> > analyzer-tests that depend the value being known it should
> > see UNKNOWN rather than TRUE, that's why the deliberate
> > check for UNKNOWN rather than xfailing the test.
> > 
> > For isatty-1.c, it's the same problem, but here it'd be
> > unweildy with the extra dg-lines, so better just skip it for
> > newlib targets.
> > 
> > testsuite:
> > * gcc.dg/analyzer/call-summaries-errno.c: Expect UNKNOWN
> > for newlib after having set errno.
> > * gcc.dg/analyzer/errno-1.c: Ditto.
> > * gcc.dg/analyzer/isatty-1.c: Skip for newlib targets.
> > ---
> >  gcc/testsuite/gcc.dg/analyzer/call-summaries-errno.c | 3 ++-
> >  gcc/testsuite/gcc.dg/analyzer/errno-1.c  | 3 ++-
> >  gcc/testsuite/gcc.dg/analyzer/isatty-1.c | 2 +-
> >  3 files changed, 5 insertions(+), 3 deletions(-)
> > 
> > diff --git a/gcc/testsuite/gcc.dg/analyzer/call-summaries-errno.c
> > b/gcc/testsuite/gcc.dg/analyzer/call-summaries-errno.c
> > index e4333b30bb77..cf4d9f7141e4 100644
> > --- a/gcc/testsuite/gcc.dg/analyzer/call-summaries-errno.c
> > +++ b/gcc/testsuite/gcc.dg/analyzer/call-summaries-errno.c
> > @@ -13,5 +13,6 @@ void test_sets_errno (int y)
> >sets_errno (y);
> >sets_errno (y);
> >  
> > -  __analyzer_eval (errno == y); /* { dg-warning "TRUE" } */  
> > +  __analyzer_eval (errno == y); /* { dg-warning "TRUE" "errno is at
> > a constant location" { target { ! newlib } } } */
> > +  /* { dg-warning "UNKNOWN" "errno is not known to be at a constant
> > location" { target { newlib } } .-1 } */
> >  }
> > diff --git a/gcc/testsuite/gcc.dg/analyzer/errno-1.c
> > b/gcc/testsuite/gcc.dg/analyzer/errno-1.c
> > index 6b9d28c10799..af0cc3d52a36 100644
> > --- a/gcc/testsuite/gcc.dg/analyzer/errno-1.c
> > +++ b/gcc/testsuite/gcc.dg/analyzer/errno-1.c
> > @@ -17,7 +17,8 @@ void test_storing_to_errno (int val)
> >  {
> >__analyzer_eval (errno == val); /* { dg-warning "UNKNOWN" } */
> >errno = val;
> > -  __analyzer_eval (errno == val); /* { dg-warning "TRUE" } */
> > +  __analyzer_eval (errno == val); /* { dg-warning "TRUE" "errno is
> > at a constant location" { target { ! newlib } } } */
> > +  /* { dg-warning "UNKNOWN" "errno is not known to be at a constant
> > location" { target { newlib } } .-1 } */
> >external_fn ();
> >__analyzer_eval (errno == val); /* { dg-warning "UNKNOWN" } */  
> >  }
> > diff --git a/gcc/testsuite/gcc.dg/analyzer/isatty-1.c
> > b/gcc/testsuite/gcc.dg/analyzer/isatty-1.c
> > index 389d2cdf3f18..450a7d71990d 100644
> > --- a/gcc/testsuite/gcc.dg/analyzer/isatty-1.c
> > +++ b/gcc/testsuite/gcc.dg/analyzer/isatty-1.c
> > @@ -1,4 +1,4 @@
> > -/* { dg-skip-if "" { powerpc*-*-aix* } } */
> > +/* { dg-skip-if "" { powerpc*-*-aix* || newlib } } */
> >  
> >  #include 
> >  #include "analyzer-decls.h"
> 


Re: [PATCH v3 04/11] riscv: thead: Add support for the XTheadBs ISA extension

2023-02-28 Thread Hans-Peter Nilsson



On Tue, 28 Feb 2023, Christoph Müllner wrote:

> On Sun, Feb 26, 2023 at 12:42 AM Hans-Peter Nilsson  wrote:
> >
> > On Fri, 24 Feb 2023, Christoph Muellner wrote:
> > > diff --git a/gcc/config/riscv/thead.md b/gcc/config/riscv/thead.md
> > > index 158e9124c3a..2c684885850 100644
> > > --- a/gcc/config/riscv/thead.md
> > > +++ b/gcc/config/riscv/thead.md
> > > @@ -29,3 +29,14 @@ (define_insn "*th_addsl"
> > >"th.addsl\t%0,%3,%1,%2"
> > >[(set_attr "type" "bitmanip")
> > > (set_attr "mode" "")])
> > > +
> > > +;; XTheadBs
> > > +
> > > +(define_insn "*th_tst"
> > > +  [(set (match_operand:X 0 "register_operand" "=r")
> > > + (zero_extract:X (match_operand:X 1 "register_operand" "r")
> > > + (const_int 1)
> > > + (match_operand 2 "immediate_operand" "i")))]
> >
> > (Here and same elsewhere.)
> >
> > You're unlikely to get other constant operands in that pattern,
> > but FWIW, the actual matching pair for just CONST_INT is
> > "const_int_operand" for the predicate and "n" for the
> > constraint.  Using the right predicate and constraint will also
> > help the generated part of recog be a few nanoseconds faster. ;)
> 
> Thank you for that comment!
> I think what you mean would look like this:
> 
> (define_insn "*th_tst"
>   [(set (match_operand:X 0 "register_operand" "=r")
> (zero_extract:X (match_operand:X 1 "register_operand" "r")
> (match_operand 3 "const_int_operand" "n")
> (match_operand 2 "immediate_operand" "i")))]

No; misunderstanding.  Keep the (const_int 1) but replace
(match_operand 2 "immediate_operand" "i") with
(match_operand 2 "const_int_operand" "n")

brgds, H-P


Re: [Patch] gcc.dg/overflow-warn-9.c: exclude from LLP64

2023-02-28 Thread Hans-Peter Nilsson
On Tue, 28 Feb 2023, Jonathan Yong wrote:

> On 2/28/23 03:06, Hans-Peter Nilsson wrote:
> > 
> > On Mon, 27 Feb 2023, Jonathan Yong via Gcc-patches wrote:
> > 
> > > This test is for LP64 only, exclude LLP64 too.
> > > Patch OK?
> > 
> > I may be confused, but you're not making use of the "llp64"
> > effective target, there instead excluding/including lp64 /
> > ilp32 in sets that not obviously mean "exclude LLP64".
> > 
> > To wit, how is "! ilp32" -> "lp64" and "ilp32" -> "! lp64"
> > expressing "! llp64"?
> > 
> > brgds, H-P
> 
> Attached new version, hopefully it is clearer.
> 

Yes, thank you!  (Not an approver; not an approval.)

brgds, H-P


Re: [PATCH] amdgcn: Enable SIMD vectorization of math functions

2023-02-28 Thread Andrew Pinski via Gcc-patches
On Tue, Feb 28, 2023 at 3:02 PM Kwok Cheung Yeung  wrote:
>
> Hello
>
> This patch implements the TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
> target hook for the AMD GCN architecture, such that when vectorized,
> calls to builtin standard math functions such as asinf, exp, pow etc.
> are converted to calls to the recently added vectorized math functions
> for GCN in Newlib. The -fno-math-errno flag is required in addition to
> the usual vectorization optimization flags for this to occur, and some
> of the math functions (the larger double-precision ones) require a large
> stack size to function properly.
>
> This patch requires the GCN vector math functions in Newlib to function
> - these were included in the recent 4.3.0.20230120 snapshot. As this was
> a minimum requirement starting from the patch 'amdgcn, libgomp: Manually
> allocated stacks', this should not be a problem.
>
> I have added new testcases in the testsuite that compare the output of
> the vectorized math functions against the scalar, passing if they are
> sufficiently close. With the testcase for standalone GCN (without
> libgomp) in gcc.target/gcn/, there is a problem since gcn-run currently
> cannot set the stack size correctly in DejaGnu testing, so I have made
> it a compile test for now - it is still useful to check that calls to
> the correct functions are being made. The runtime correctness is still
> covered by the libgomp test.

I thought we were moving towards using the simd attribute instead and
moving away from these kind of patches.
Though since gcn is a special target that including math.h normally
does not happen for offloading this might be still usefull.


>
> Okay for trunk?

We are in stage 4 of GCC 13 release cycle, I suspect we want to wait
until GCC 13 branches off to apply this.

Thanks,
Andrew Pinski

>
> Thanks
>
> Kwok


[PATCH] amdgcn: Enable SIMD vectorization of math functions

2023-02-28 Thread Kwok Cheung Yeung

Hello

This patch implements the TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION 
target hook for the AMD GCN architecture, such that when vectorized, 
calls to builtin standard math functions such as asinf, exp, pow etc. 
are converted to calls to the recently added vectorized math functions 
for GCN in Newlib. The -fno-math-errno flag is required in addition to 
the usual vectorization optimization flags for this to occur, and some 
of the math functions (the larger double-precision ones) require a large 
stack size to function properly.


This patch requires the GCN vector math functions in Newlib to function 
- these were included in the recent 4.3.0.20230120 snapshot. As this was 
a minimum requirement starting from the patch 'amdgcn, libgomp: Manually 
allocated stacks', this should not be a problem.


I have added new testcases in the testsuite that compare the output of 
the vectorized math functions against the scalar, passing if they are 
sufficiently close. With the testcase for standalone GCN (without 
libgomp) in gcc.target/gcn/, there is a problem since gcn-run currently 
cannot set the stack size correctly in DejaGnu testing, so I have made 
it a compile test for now - it is still useful to check that calls to 
the correct functions are being made. The runtime correctness is still 
covered by the libgomp test.


Okay for trunk?

Thanks

Kwok
From 69d13dc898ff7c70e80299a92dc895a89a9e679b Mon Sep 17 00:00:00 2001
From: Kwok Cheung Yeung 
Date: Tue, 28 Feb 2023 14:15:47 +
Subject: [PATCH] amdgcn: Enable SIMD vectorization of math functions

Calls to vectorized versions of routines in the math library will now
be inserted when vectorizing code containing supported math functions.

2023-02-28  Kwok Cheung Yeung  
Paul-Antoine Arras  

gcc/
* builtins.cc (mathfn_built_in_explicit): New.
* config/gcn/gcn.cc: Include case-cfn-macros.h.
(mathfn_built_in_explicit): Add prototype.
(gcn_vectorize_builtin_vectorized_function): New.
(gcn_libc_has_function): New.
(TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION): Define.
(TARGET_LIBC_HAS_FUNCTION): Define.

gcc/testsuite/
* gcc.target/gcn/simd-math-1.c: New testcase.

libgomp/
* testsuite/libgomp.c/simd-math-1.c: New testcase.
---
 gcc/builtins.cc|   8 +
 gcc/config/gcn/gcn.cc  | 110 +++
 gcc/testsuite/gcc.target/gcn/simd-math-1.c | 210 
 libgomp/testsuite/libgomp.c/simd-math-1.c  | 217 +
 4 files changed, 545 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/gcn/simd-math-1.c
 create mode 100644 libgomp/testsuite/libgomp.c/simd-math-1.c

diff --git a/gcc/builtins.cc b/gcc/builtins.cc
index 4d467c8c5c1..305c65c29be 100644
--- a/gcc/builtins.cc
+++ b/gcc/builtins.cc
@@ -2089,6 +2089,14 @@ mathfn_built_in (tree type, combined_fn fn)
   return mathfn_built_in_1 (type, fn, /*implicit=*/ 1);
 }
 
+/* Like mathfn_built_in_1, but always use the explicit array.  */
+
+tree
+mathfn_built_in_explicit (tree type, combined_fn fn)
+{
+  return mathfn_built_in_1 (type, fn, /*implicit=*/ 0);
+}
+
 /* Like mathfn_built_in_1, but take a built_in_function and
always use the implicit array.  */
 
diff --git a/gcc/config/gcn/gcn.cc b/gcc/config/gcn/gcn.cc
index 23ab01e75d8..d99bb63d4c0 100644
--- a/gcc/config/gcn/gcn.cc
+++ b/gcc/config/gcn/gcn.cc
@@ -53,6 +53,7 @@
 #include "dwarf2.h"
 #include "gimple.h"
 #include "cgraph.h"
+#include "case-cfn-macros.h"
 
 /* This file should be included last.  */
 #include "target-def.h"
@@ -5240,6 +5241,110 @@ gcn_simd_clone_usable (struct cgraph_node *ARG_UNUSED 
(node))
   return 0;
 }
 
+tree mathfn_built_in_explicit (tree, combined_fn);
+
+/* Implement TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION.
+   Return the function declaration of the vectorized version of the builtin
+   in the math library if available.  */
+
+tree
+gcn_vectorize_builtin_vectorized_function (unsigned int fn, tree type_out,
+  tree type_in)
+{
+  if (TREE_CODE (type_out) != VECTOR_TYPE
+  || TREE_CODE (type_in) != VECTOR_TYPE)
+return NULL_TREE;
+
+  machine_mode out_mode = TYPE_MODE (TREE_TYPE (type_out));
+  int out_n = TYPE_VECTOR_SUBPARTS (type_out);
+  machine_mode in_mode = TYPE_MODE (TREE_TYPE (type_in));
+  int in_n = TYPE_VECTOR_SUBPARTS (type_in);
+  combined_fn cfn = combined_fn (fn);
+
+  /* Keep this consistent with the list of vectorized math routines.  */
+  int implicit_p;
+  switch (fn)
+{
+CASE_CFN_ACOS:
+CASE_CFN_ACOSH:
+CASE_CFN_ASIN:
+CASE_CFN_ASINH:
+CASE_CFN_ATAN:
+CASE_CFN_ATAN2:
+CASE_CFN_ATANH:
+CASE_CFN_COPYSIGN:
+CASE_CFN_COS:
+CASE_CFN_COSH:
+CASE_CFN_ERF:
+CASE_CFN_EXP:
+CASE_CFN_EXP2:
+CASE_CFN_FINITE:
+CASE_CFN_FMOD:
+CASE_CFN_GAMMA:
+CASE_CFN_HYPOT:
+CASE_CFN_ISNAN:
+CASE_CFN_LGAMMA:
+

Re: [PATCH] ubsan: Honor -fstrict-flex-arrays= in -fsanitize=bounds [PR108894]

2023-02-28 Thread Jakub Jelinek via Gcc-patches
On Tue, Feb 28, 2023 at 07:19:40PM +, Qing Zhao wrote:
> Understood.  
> So, your patch fixed this bug, and then [0] arrays are instrumented by 
> default with this patch.
> 
> > Well, it would complain about
> > struct S { int a; int b[0]; int c; } s;
> > ... [1] ...
> > for C++, but not for C.
> 
> A little confused here: [0] arrays were instrumented by default for C++ if 
> it’s not a trailing array, but not for C?

Given say
struct S { int a; int b[0]; int c; } s;

int
main ()
{
  int *volatile p = [0];
  p = [1];
  int volatile q = s.b[0];
}
both -fsanitize=bounds and -fsanitize=bounds-strict behaved the same way,
in C nothing was reported, in C++ the p = [1]; statement.
The reasons for s.b[0] not being reported in C++ was that for
!ignore_off_by_one, bounds was ~(size_t)0, and so index > ~(size_t)0
is always false.  While with the committed patch it is
index >= (~(size_t)0)+1 and so always true.  And in C additionally, we
punted early because TYPE_MAX_VALUE (domain) was NULL.

Jakub



[wwwdocs] Document synchronized_value addition to libstdc++

2023-02-28 Thread Jonathan Wakely via Gcc-patches
Pushed to wwwdocs.


---
 htdocs/gcc-13/changes.html | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/htdocs/gcc-13/changes.html b/htdocs/gcc-13/changes.html
index a803f501..811d9bdf 100644
--- a/htdocs/gcc-13/changes.html
+++ b/htdocs/gcc-13/changes.html
@@ -324,6 +324,9 @@ a work-in-progress.
   Support for the experimental/scope header
   from v3 of the Library Fundamentals Technical Specification.
   
+  Support for the experimental/synchronized_value
+  header from v2 of the Concurrency Technical Specification.
+  
 
 
 
-- 
2.39.2



Re: [PATCH] ubsan: Honor -fstrict-flex-arrays= in -fsanitize=bounds [PR108894]

2023-02-28 Thread Qing Zhao via Gcc-patches


> On Feb 28, 2023, at 12:49 PM, Jakub Jelinek  wrote:
> 
> On Tue, Feb 28, 2023 at 04:13:28PM +, Qing Zhao wrote:
>>> On Feb 28, 2023, at 3:26 AM, Jakub Jelinek via Gcc-patches 
>>>  wrote:
>>> I think -fstrict-flex-arrays* options can be considered as language
>>> mode changing options, by default flexible member-like arrays are
>>> handled like flexible arrays, but that option can change the set of
>>> the arrays which are treated like that.  So, -fsanitize=bounds should
>>> change with that on what is considered acceptable and what isn't.
>>> While -fsanitize=bounds-strict should reject them all always to
>>> continue previous behavior.
>> 
>> 
>> As my understanding, without your current patch, the current 
>> -fsanitize=bounds-strict behaves like -fstrict-flex-arrays=2, i.e:
>> it treats:
>>   [], [0] as flexible array members;
>> but
>>   [1], [4] as regular arrays
> 
> Yes, but not because it would be an intention, but because of a bug
> it actually never instrumented [0] arrays.  
Understood.  
So, your patch fixed this bug, and then [0] arrays are instrumented by default 
with this patch.

> Well, it would complain about
> struct S { int a; int b[0]; int c; } s;
> ... [1] ...
> for C++, but not for C.

A little confused here: [0] arrays were instrumented by default for C++ if it’s 
not a trailing array, but not for C?

> 
>> Then with your current patch, [0] will NOT be treated as flexible array 
>> members by default anymore, so, the -fsanitize=bounds-strict will
>> treats:
>>   [] as flexible array members;
>> but
>>   [0], [1], [4] as regular arrays
>> The same behavior as -fstrict-flex-arrays=3.
>> 
>> Therefore, -fsanitize=bounds-strict already implies -fstrict-flex-arrays=3. 
> 
> No.  -fsanitize=bounds-strict doesn't imply anything for
> flag_strict_flex_arrays, it for the bounds sanitization decisions
> behaves as if -fstrict-flex-arrays=3.

Yes, that was what I meant. -:)
> 
>> For [0] arrays, why C++ and C represent with different IR? 
> 
> I think it is a historic difference that could take quite a big amount of
> work to get rid of (and the question is what is better), and even after that
> work there would be still big chances of regressions.

Okay, I see… (this is really a confusion situation…) but anyway…

Thanks. 

Qing
> 
>   Jakub
> 



Re: [PATCH 2/2] testsuite: Fix analyzer errors for newlib-fd

2023-02-28 Thread David Malcolm via Gcc-patches
On Tue, 2023-02-28 at 19:49 +0100, Hans-Peter Nilsson wrote:
> Ok to commit? 

OK



Re: [PATCH 1/2] testsuite: Fix analyzer errors for newlib-errno

2023-02-28 Thread David Malcolm via Gcc-patches
On Tue, 2023-02-28 at 19:47 +0100, Hans-Peter Nilsson wrote:
> Ok to commit?
> -- >8 --
> Investigating analyzer tesstsuite errors for cris-elf.  The same are
> seen for pru-elf according to posts to gcc-testresults@.
> 
> For glibc, errno is #defined as:
>  extern int *__errno_location (void) __THROW __attribute_const__;
>  # define errno (*__errno_location ())
> while for newlib in its default configuration, it's:
>  #define errno (*__errno())
>  extern int *__errno (void);

We're already handling ___errno (three underscores) for Solaris as of
7c9717fcb5cf94ce1e7ef5c903058adf9980ff28; does it fix the issue if you 
add __errno (two underscores) to analyzer/kf.cc's 
register_known_functions in an analogous way to that commit?  (i.e.
wiring it up to kf_errno_location, "teaching" the analyzer that that
function returns a pointer to the "errno region")

Dave

> 
> The critical difference is that __attribute__ ((__const__)),
> where glibc says that the caller will see the same value on
> all calls (from the same context; read: same thread).  I'm
> not sure the absence of __attribute__ ((__const__)) for the
> newlib definition is deliberate, but I guess it can.
> Either way, without the "const" attribute, it can't be known
> that the same location will be returned the next time, so
> analyzer-tests that depend the value being known it should
> see UNKNOWN rather than TRUE, that's why the deliberate
> check for UNKNOWN rather than xfailing the test.
> 
> For isatty-1.c, it's the same problem, but here it'd be
> unweildy with the extra dg-lines, so better just skip it for
> newlib targets.
> 
> testsuite:
> * gcc.dg/analyzer/call-summaries-errno.c: Expect UNKNOWN
> for newlib after having set errno.
> * gcc.dg/analyzer/errno-1.c: Ditto.
> * gcc.dg/analyzer/isatty-1.c: Skip for newlib targets.
> ---
>  gcc/testsuite/gcc.dg/analyzer/call-summaries-errno.c | 3 ++-
>  gcc/testsuite/gcc.dg/analyzer/errno-1.c  | 3 ++-
>  gcc/testsuite/gcc.dg/analyzer/isatty-1.c | 2 +-
>  3 files changed, 5 insertions(+), 3 deletions(-)
> 
> diff --git a/gcc/testsuite/gcc.dg/analyzer/call-summaries-errno.c
> b/gcc/testsuite/gcc.dg/analyzer/call-summaries-errno.c
> index e4333b30bb77..cf4d9f7141e4 100644
> --- a/gcc/testsuite/gcc.dg/analyzer/call-summaries-errno.c
> +++ b/gcc/testsuite/gcc.dg/analyzer/call-summaries-errno.c
> @@ -13,5 +13,6 @@ void test_sets_errno (int y)
>    sets_errno (y);
>    sets_errno (y);
>  
> -  __analyzer_eval (errno == y); /* { dg-warning "TRUE" } */  
> +  __analyzer_eval (errno == y); /* { dg-warning "TRUE" "errno is at
> a constant location" { target { ! newlib } } } */
> +  /* { dg-warning "UNKNOWN" "errno is not known to be at a constant
> location" { target { newlib } } .-1 } */
>  }
> diff --git a/gcc/testsuite/gcc.dg/analyzer/errno-1.c
> b/gcc/testsuite/gcc.dg/analyzer/errno-1.c
> index 6b9d28c10799..af0cc3d52a36 100644
> --- a/gcc/testsuite/gcc.dg/analyzer/errno-1.c
> +++ b/gcc/testsuite/gcc.dg/analyzer/errno-1.c
> @@ -17,7 +17,8 @@ void test_storing_to_errno (int val)
>  {
>    __analyzer_eval (errno == val); /* { dg-warning "UNKNOWN" } */
>    errno = val;
> -  __analyzer_eval (errno == val); /* { dg-warning "TRUE" } */
> +  __analyzer_eval (errno == val); /* { dg-warning "TRUE" "errno is
> at a constant location" { target { ! newlib } } } */
> +  /* { dg-warning "UNKNOWN" "errno is not known to be at a constant
> location" { target { newlib } } .-1 } */
>    external_fn ();
>    __analyzer_eval (errno == val); /* { dg-warning "UNKNOWN" } */  
>  }
> diff --git a/gcc/testsuite/gcc.dg/analyzer/isatty-1.c
> b/gcc/testsuite/gcc.dg/analyzer/isatty-1.c
> index 389d2cdf3f18..450a7d71990d 100644
> --- a/gcc/testsuite/gcc.dg/analyzer/isatty-1.c
> +++ b/gcc/testsuite/gcc.dg/analyzer/isatty-1.c
> @@ -1,4 +1,4 @@
> -/* { dg-skip-if "" { powerpc*-*-aix* } } */
> +/* { dg-skip-if "" { powerpc*-*-aix* || newlib } } */
>  
>  #include 
>  #include "analyzer-decls.h"



[PATCH 2/2] testsuite: Fix analyzer errors for newlib-fd

2023-02-28 Thread Hans-Peter Nilsson via Gcc-patches
Ok to commit?  (After this, there's
gcc.dg/analyzer/flex-without-call-summaries.c left to do.)

-- >8 --
Investigating analyzer testsuite errors for cris-elf.  The same are
seen for pru-elf according to posts to gcc-testresults@.

The test fd-access-mode-target-headers.c uses the analyzer
"sm-fd" which for this use requires (e.g.) that constants
O_ACCMODE, O_RDONLY and O_WRONLY are defined as literal
constants.  While for glibc, O_ACCMODE is defined as:
 #define O_ACCMODE 0003
in newlib, it's defined as:
 #define O_ACCMODE (O_RDONLY|O_WRONLY|O_RDWR)
and the analyzer is not able to make use of an expression
like this (even though O_RDONLY, O_WRONLY and O_RDWR are
defined as literal constants and the whole evaluates to 3).
Better do as for AIX and skip this test.

testsuite:
* gcc.dg/analyzer/fd-access-mode-target-headers.c: Skip for
newlib targets too.
---
 gcc/testsuite/gcc.dg/analyzer/fd-access-mode-target-headers.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.dg/analyzer/fd-access-mode-target-headers.c 
b/gcc/testsuite/gcc.dg/analyzer/fd-access-mode-target-headers.c
index 847d47e06342..cf273b217d17 100644
--- a/gcc/testsuite/gcc.dg/analyzer/fd-access-mode-target-headers.c
+++ b/gcc/testsuite/gcc.dg/analyzer/fd-access-mode-target-headers.c
@@ -1,4 +1,4 @@
-/* { dg-skip-if "" { powerpc*-*-aix* } } */
+/* { dg-skip-if "" { powerpc*-*-aix* || newlib } } */
 
 #include 
 #include 
-- 
2.30.2



[PATCH 1/2] testsuite: Fix analyzer errors for newlib-errno

2023-02-28 Thread Hans-Peter Nilsson via Gcc-patches
Ok to commit?
-- >8 --
Investigating analyzer tesstsuite errors for cris-elf.  The same are
seen for pru-elf according to posts to gcc-testresults@.

For glibc, errno is #defined as:
 extern int *__errno_location (void) __THROW __attribute_const__;
 # define errno (*__errno_location ())
while for newlib in its default configuration, it's:
 #define errno (*__errno())
 extern int *__errno (void);

The critical difference is that __attribute__ ((__const__)),
where glibc says that the caller will see the same value on
all calls (from the same context; read: same thread).  I'm
not sure the absence of __attribute__ ((__const__)) for the
newlib definition is deliberate, but I guess it can.
Either way, without the "const" attribute, it can't be known
that the same location will be returned the next time, so
analyzer-tests that depend the value being known it should
see UNKNOWN rather than TRUE, that's why the deliberate
check for UNKNOWN rather than xfailing the test.

For isatty-1.c, it's the same problem, but here it'd be
unweildy with the extra dg-lines, so better just skip it for
newlib targets.

testsuite:
* gcc.dg/analyzer/call-summaries-errno.c: Expect UNKNOWN
for newlib after having set errno.
* gcc.dg/analyzer/errno-1.c: Ditto.
* gcc.dg/analyzer/isatty-1.c: Skip for newlib targets.
---
 gcc/testsuite/gcc.dg/analyzer/call-summaries-errno.c | 3 ++-
 gcc/testsuite/gcc.dg/analyzer/errno-1.c  | 3 ++-
 gcc/testsuite/gcc.dg/analyzer/isatty-1.c | 2 +-
 3 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/analyzer/call-summaries-errno.c 
b/gcc/testsuite/gcc.dg/analyzer/call-summaries-errno.c
index e4333b30bb77..cf4d9f7141e4 100644
--- a/gcc/testsuite/gcc.dg/analyzer/call-summaries-errno.c
+++ b/gcc/testsuite/gcc.dg/analyzer/call-summaries-errno.c
@@ -13,5 +13,6 @@ void test_sets_errno (int y)
   sets_errno (y);
   sets_errno (y);
 
-  __analyzer_eval (errno == y); /* { dg-warning "TRUE" } */  
+  __analyzer_eval (errno == y); /* { dg-warning "TRUE" "errno is at a constant 
location" { target { ! newlib } } } */
+  /* { dg-warning "UNKNOWN" "errno is not known to be at a constant location" 
{ target { newlib } } .-1 } */
 }
diff --git a/gcc/testsuite/gcc.dg/analyzer/errno-1.c 
b/gcc/testsuite/gcc.dg/analyzer/errno-1.c
index 6b9d28c10799..af0cc3d52a36 100644
--- a/gcc/testsuite/gcc.dg/analyzer/errno-1.c
+++ b/gcc/testsuite/gcc.dg/analyzer/errno-1.c
@@ -17,7 +17,8 @@ void test_storing_to_errno (int val)
 {
   __analyzer_eval (errno == val); /* { dg-warning "UNKNOWN" } */
   errno = val;
-  __analyzer_eval (errno == val); /* { dg-warning "TRUE" } */
+  __analyzer_eval (errno == val); /* { dg-warning "TRUE" "errno is at a 
constant location" { target { ! newlib } } } */
+  /* { dg-warning "UNKNOWN" "errno is not known to be at a constant location" 
{ target { newlib } } .-1 } */
   external_fn ();
   __analyzer_eval (errno == val); /* { dg-warning "UNKNOWN" } */  
 }
diff --git a/gcc/testsuite/gcc.dg/analyzer/isatty-1.c 
b/gcc/testsuite/gcc.dg/analyzer/isatty-1.c
index 389d2cdf3f18..450a7d71990d 100644
--- a/gcc/testsuite/gcc.dg/analyzer/isatty-1.c
+++ b/gcc/testsuite/gcc.dg/analyzer/isatty-1.c
@@ -1,4 +1,4 @@
-/* { dg-skip-if "" { powerpc*-*-aix* } } */
+/* { dg-skip-if "" { powerpc*-*-aix* || newlib } } */
 
 #include 
 #include "analyzer-decls.h"
-- 
2.30.2



Re: [Patch] harden-sls-6.c: fix warning on LLP64

2023-02-28 Thread Jakub Jelinek via Gcc-patches
On Wed, Feb 15, 2023 at 01:44:08PM +, Jonathan Yong via Gcc-patches wrote:
> gcc/testsuite/ChangeLog:
> 
> * gcc.target/i386/harden-sls-6.c: fix warning on LLP64
> targets.
> 
> Attached patch OK?

> From c0572a1e95c6f569980d6b7454c8dc293f07389e Mon Sep 17 00:00:00 2001
> From: Jonathan Yong <10wa...@gmail.com>
> Date: Wed, 15 Feb 2023 13:42:12 +
> Subject: [PATCH] harden-sls-6.c: fix warning on LLP64
> 
> gcc/testsuite/ChangeLog:
> 
>   * gcc.target/i386/harden-sls-6.c: fix warning on LLP64
>   targets.

s/fix/Fix/

Otherwise LGTM.
> 
> Signed-off-by: Jonathan Yong <10wa...@gmail.com>
> ---
>  gcc/testsuite/gcc.target/i386/harden-sls-6.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/gcc/testsuite/gcc.target/i386/harden-sls-6.c 
> b/gcc/testsuite/gcc.target/i386/harden-sls-6.c
> index 9068eb64008..3b270211927 100644
> --- a/gcc/testsuite/gcc.target/i386/harden-sls-6.c
> +++ b/gcc/testsuite/gcc.target/i386/harden-sls-6.c
> @@ -11,7 +11,7 @@ struct _Unwind_Context {
>struct _Unwind_Context cur_contextcur_context =
>_Unwind_Resume_or_Rethrow_this_context;
>offset(0);
> -  __builtin_eh_return ((long) offset, 0);
> +  __builtin_eh_return ((__INTPTR_TYPE__) offset, 0);
>  }
>  
>  /* { dg-final { scan-assembler "jmp\[ \t\]+\\*%rcx" } } */

Jakub



Re: [PATCH v3 04/11] riscv: thead: Add support for the XTheadBs ISA extension

2023-02-28 Thread Christoph Müllner
On Sun, Feb 26, 2023 at 12:42 AM Hans-Peter Nilsson  wrote:
>
> On Fri, 24 Feb 2023, Christoph Muellner wrote:
> > diff --git a/gcc/config/riscv/thead.md b/gcc/config/riscv/thead.md
> > index 158e9124c3a..2c684885850 100644
> > --- a/gcc/config/riscv/thead.md
> > +++ b/gcc/config/riscv/thead.md
> > @@ -29,3 +29,14 @@ (define_insn "*th_addsl"
> >"th.addsl\t%0,%3,%1,%2"
> >[(set_attr "type" "bitmanip")
> > (set_attr "mode" "")])
> > +
> > +;; XTheadBs
> > +
> > +(define_insn "*th_tst"
> > +  [(set (match_operand:X 0 "register_operand" "=r")
> > + (zero_extract:X (match_operand:X 1 "register_operand" "r")
> > + (const_int 1)
> > + (match_operand 2 "immediate_operand" "i")))]
>
> (Here and same elsewhere.)
>
> You're unlikely to get other constant operands in that pattern,
> but FWIW, the actual matching pair for just CONST_INT is
> "const_int_operand" for the predicate and "n" for the
> constraint.  Using the right predicate and constraint will also
> help the generated part of recog be a few nanoseconds faster. ;)

Thank you for that comment!
I think what you mean would look like this:

(define_insn "*th_tst"
  [(set (match_operand:X 0 "register_operand" "=r")
(zero_extract:X (match_operand:X 1 "register_operand" "r")
(match_operand 3 "const_int_operand" "n")
(match_operand 2 "immediate_operand" "i")))]
  "TARGET_XTHEADBS && UINTVAL (operands[2]) < GET_MODE_BITSIZE (mode)
   && UINTVAL (operands[3]) == 1"
  "th.tst\t%0,%1,%2"
  [(set_attr "type" "bitmanip")])

So while we have more generic form in the pattern, the condition needs
to check that the operand is equal to 1.

I can change this in the patch (I don't have strong opinions about
this and I do care about the nanosecond).
However, I think this goes beyond this patchset.
Because a single git grep shows many examples of "const_int " matches
in GCC's backends.
Examples can be found in gcc/config/riscv/bitmanip.md,
gcc/config/aarch64/aarch64.md,...
So it feels like changing the patch to use const_int_operand would go
against common practice.

@Kito: Any preferences about this?

Thanks,
Christoph


Re: [PATCH] ubsan: Honor -fstrict-flex-arrays= in -fsanitize=bounds [PR108894]

2023-02-28 Thread Jakub Jelinek via Gcc-patches
On Tue, Feb 28, 2023 at 04:13:28PM +, Qing Zhao wrote:
> > On Feb 28, 2023, at 3:26 AM, Jakub Jelinek via Gcc-patches 
> >  wrote:
> > I think -fstrict-flex-arrays* options can be considered as language
> > mode changing options, by default flexible member-like arrays are
> > handled like flexible arrays, but that option can change the set of
> > the arrays which are treated like that.  So, -fsanitize=bounds should
> > change with that on what is considered acceptable and what isn't.
> > While -fsanitize=bounds-strict should reject them all always to
> > continue previous behavior.
> 
> 
> As my understanding, without your current patch, the current 
> -fsanitize=bounds-strict behaves like -fstrict-flex-arrays=2, i.e:
> it treats:
>[], [0] as flexible array members;
> but
>[1], [4] as regular arrays

Yes, but not because it would be an intention, but because of a bug
it actually never instrumented [0] arrays.  Well, it would complain about
struct S { int a; int b[0]; int c; } s;
... [1] ...
for C++, but not for C.

> Then with your current patch, [0] will NOT be treated as flexible array 
> members by default anymore, so, the -fsanitize=bounds-strict will
> treats:
>[] as flexible array members;
> but
>[0], [1], [4] as regular arrays
> The same behavior as -fstrict-flex-arrays=3.
> 
> Therefore, -fsanitize=bounds-strict already implies -fstrict-flex-arrays=3. 

No.  -fsanitize=bounds-strict doesn't imply anything for
flag_strict_flex_arrays, it for the bounds sanitization decisions
behaves as if -fstrict-flex-arrays=3.

> For [0] arrays, why C++ and C represent with different IR? 

I think it is a historic difference that could take quite a big amount of
work to get rid of (and the question is what is better), and even after that
work there would be still big chances of regressions.

Jakub



Re: [RFC PATCH v1 08/10] ifcvt: add if-conversion to conditional-zero instructions

2023-02-28 Thread Maciej W. Rozycki
On Mon, 13 Feb 2023, Jeff Law via Gcc-patches wrote:

> 3. The canaonical conditional-zero-or-value assumes the target can do a
> generic SEQ/SNE of two register values.  As you know, on RISC-V we have
> SEQZ/SNEZ.  So we've added another fallback path to handle that case in
> noce_emit_condzero.  You subtract the two values, then you can do an SEQZ/SNEZ
> on the result.

 NB these machine operations are identical to MIPSr6 SELEQZ and SELNEZ 
instructions (cf. ISA_HAS_SEL), so why can't we just duplicate what the 
MIPS backend does?  Or did the MIPS backend do something wrong here?

  Maciej


[Patch] OpenMP/Fortran: Fix handling of optional is_device_ptr + bind(C) [PR108546]

2023-02-28 Thread Tobias Burnus

(That's a[11/12/13 Regression] P2 regression) The problem is that an is-present
check on the receiver side (inside the target region) does not make much
sense; the != NULL check needs to be done before the GOMP_target_ext but
it *is* already there. (Having the check inside the target region failed
with an ICE.) Additionally, I encountered an issue for 'void *' alias
'type(c_ptr)'. That's on the Fortran-language side represented as
derived type with private component(s), but then mapped to 'void*'. In
any case, it lead to 'map(to: p) map(pset: p)' and the former will be at
some point get TYPE_UNIT_SIZE(TREE_TYPE(*p)) but '*p' is void, giving an
ICE in omp-lower ... OK for mainline – and later backport to 12 + 11?
Tobias
-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955
OpenMP/Fortran: Fix handling of optional is_device_ptr + bind(C) [PR108546]

For is_device_ptr, optional checks should only be done before calling
libgomp, afterwards they are NULL either because of absent or, by
chance, because it is unallocated or unassociated (for pointers/allocatables).

Additionally, it fixes an issue with explicit mapping for 'type(c_ptr)'.

	PR middle-end/108546

gcc/fortran/ChangeLog:

	* trans-openmp.cc (gfc_trans_omp_clauses): Fix mapping of
	type(C_ptr) variables.

gcc/ChangeLog:

	* omp-low.cc (lower_omp_target): Remove optional handling
	on the receiver side, i.e. inside target (data), for
	use_device_ptr.

libgomp/ChangeLog:

	* testsuite/libgomp.fortran/is_device_ptr-3.f90: New test.
	* testsuite/libgomp.fortran/use_device_ptr-optional-4.f90: New test.

 gcc/fortran/trans-openmp.cc|  4 +-
 gcc/omp-low.cc |  3 +-
 .../testsuite/libgomp.fortran/is_device_ptr-3.f90  | 46 +++
 .../libgomp.fortran/use_device_ptr-optional-4.f90  | 53 ++
 4 files changed, 104 insertions(+), 2 deletions(-)

diff --git a/gcc/fortran/trans-openmp.cc b/gcc/fortran/trans-openmp.cc
index 2d16f3be8ea..84c0184f48e 100644
--- a/gcc/fortran/trans-openmp.cc
+++ b/gcc/fortran/trans-openmp.cc
@@ -3152,7 +3152,9 @@ gfc_trans_omp_clauses (stmtblock_t *block, gfc_omp_clauses *clauses,
 			   || GFC_DECL_CRAY_POINTEE (decl)
 			   || GFC_DESCRIPTOR_TYPE_P
 	 (TREE_TYPE (TREE_TYPE (decl)))
-			   || n->sym->ts.type == BT_DERIVED))
+			   || (n->sym->ts.type == BT_DERIVED
+   && (n->sym->ts.u.derived->ts.f90_type
+   != BT_VOID
 		{
 		  tree orig_decl = decl;
 
diff --git a/gcc/omp-low.cc b/gcc/omp-low.cc
index fef41a013ec..9757592c635 100644
--- a/gcc/omp-low.cc
+++ b/gcc/omp-low.cc
@@ -13942,7 +13942,8 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx)
 	  }
 	tree present;
 	present = ((do_optional_check
-			&& OMP_CLAUSE_CODE (c) != OMP_CLAUSE_HAS_DEVICE_ADDR)
+			&& OMP_CLAUSE_CODE (c) != OMP_CLAUSE_HAS_DEVICE_ADDR
+			&& OMP_CLAUSE_CODE (c) != OMP_CLAUSE_IS_DEVICE_PTR)
 		   ? omp_check_optional_argument (OMP_CLAUSE_DECL (c), true)
 		   : NULL_TREE);
 	if (present)
diff --git a/libgomp/testsuite/libgomp.fortran/is_device_ptr-3.f90 b/libgomp/testsuite/libgomp.fortran/is_device_ptr-3.f90
new file mode 100644
index 000..ab9f00ebecb
--- /dev/null
+++ b/libgomp/testsuite/libgomp.fortran/is_device_ptr-3.f90
@@ -0,0 +1,46 @@
+module m
+   use iso_c_binding
+   implicit none
+contains
+   subroutine s(x,y,z)
+  type(c_ptr), optional :: x
+  integer, pointer, optional :: y
+  integer, allocatable, optional :: z
+  logical is_present, is_null
+  is_present = present(x)
+  if (is_present) &
+is_null = .not. c_associated(x)
+
+  !$omp target is_device_ptr(x) has_device_addr(y) has_device_addr(z)
+if (is_present) then
+  if (is_null) then
+if (c_associated(x)) stop 1
+if (associated(y)) stop 2
+if (allocated(z)) stop 3
+  else
+if (.not. c_associated(x, c_loc(y))) stop 4
+if (y /= 7) stop 5
+if (z /= 9) stop 6
+  end if
+end if
+  !$omp end target
+   end
+end
+
+use m
+implicit none
+integer, pointer :: p
+integer, allocatable :: a
+p => null()
+call s()
+!$omp target data map(p,a) use_device_addr(p,a)
+  call s(c_null_ptr, p, a)
+!$omp end target data
+allocate(p,a)
+p = 7
+a = 9
+!$omp target data map(p,a) use_device_addr(p,a)
+  call s(c_loc(p), p, a)
+!$omp end target data
+deallocate(p,a)
+end
diff --git a/libgomp/testsuite/libgomp.fortran/use_device_ptr-optional-4.f90 b/libgomp/testsuite/libgomp.fortran/use_device_ptr-optional-4.f90
new file mode 100644
index 000..b2a5c314685
--- /dev/null
+++ b/libgomp/testsuite/libgomp.fortran/use_device_ptr-optional-4.f90
@@ -0,0 +1,53 @@
+! PR 

Re: [PATCH] ubsan: Honor -fstrict-flex-arrays= in -fsanitize=bounds [PR108894]

2023-02-28 Thread Qing Zhao via Gcc-patches
Hi, Jakub,

Thanks a lot for fixing this issue.

I have several questions in below:

> On Feb 28, 2023, at 3:26 AM, Jakub Jelinek via Gcc-patches 
>  wrote:
> I think -fstrict-flex-arrays* options can be considered as language
> mode changing options, by default flexible member-like arrays are
> handled like flexible arrays, but that option can change the set of
> the arrays which are treated like that.  So, -fsanitize=bounds should
> change with that on what is considered acceptable and what isn't.
> While -fsanitize=bounds-strict should reject them all always to
> continue previous behavior.


As my understanding, without your current patch, the current 
-fsanitize=bounds-strict behaves like -fstrict-flex-arrays=2, i.e:
it treats:
   [], [0] as flexible array members;
but
   [1], [4] as regular arrays

Then with your current patch, [0] will NOT be treated as flexible array members 
by default anymore, so, the -fsanitize=bounds-strict will
treats:
   [] as flexible array members;
but
   [0], [1], [4] as regular arrays
The same behavior as -fstrict-flex-arrays=3.

Therefore, -fsanitize=bounds-strict already implies -fstrict-flex-arrays=3. 

Is the above understanding correctly?

> 
> The following patch implements that.  To support [0] array instrumentation,
> I had to change the meaning of the bounds argument to .UBSAN_BOUNDS,
> previously it was the TYPE_MAX_VALUE of the domain unless ignore_off_by_one
> (used for taking address of the array element rather than accessing it;
> in that case 1 is added to the bound argument) and the later lowered checks
> were if (index > bound) report_failure ().
> The problem with that is that for [0] arrays where (at least for C++)
> the max value is all ones, for accesses that condition will be never true;
> for addresses of elements it was working (in C++) correctly before.
> This patch changes it to add 1 + ignore_off_by_one, so -1 becomes 0 or
> 1 for _ref and changing the lowering to be if (index >= bound)
> report_failure ().  Furthermore, as C represents the [0] arrays with
> NULL TYPE_MAX_VALUE, I treated those like the C++ ones.

For [0] arrays, why C++ and C represent with different IR? 

thanks.

Qing
> 



Re: [PATCH] c++: Don't recurse on DECL_INITIAL for DECL_EXPR on non-VAR_DECLs [PR108606]

2023-02-28 Thread Jason Merrill via Gcc-patches

On 2/22/23 04:06, Jakub Jelinek wrote:

Hi!

The r13-2965-g73d9b0e5947e16 change changed the line touched in this patch
from
   return RECUR (tmp, want_rval);
to
   return RECUR (DECL_INITIAL (tmp), want_rval);
This is on DECL_EXPR handling code, where tmp can be lots of different
trees and DECL_INITIAL unfortunately also means different things on
different trees.
It is the initializer on VAR_DECL, DECL_ARG_TYPE on PARM_DECLs (though
those are unlikely to have DECL_EXPRs), for FUNCTION_DECLs the body,
..., USING_DECL_DECLS on USING_DECLs and DECL_FRIENDLIST on TYPE_DECLs.

The testcase below ICEs because we have a DECL_EXPR for TYPE_DECL
which has non-NULL DECL_FRIENDLIST and we certainly can't recurse on
the friend list.

The following patch is a conditional partial reversion of r13-2965,
it will recurse on DECL_INITIAL only for VAR_DECLs and for anything
else keep previous behavior.


Hmm, we can probably just return true for non-VAR_DECLs.  OK either way.


Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2023-02-22  Jakub Jelinek  

PR c++/108606
* constexpr.cc (potential_constant_expression_1) :
Only recurse on DECL_INITIAL (tmp) if tmp is a VAR_DECL, otherwise
recurse on tmp.

* g++.dg/cpp1y/pr108606.C: New test.

--- gcc/cp/constexpr.cc.jj  2023-02-20 22:06:36.0 +0100
+++ gcc/cp/constexpr.cc 2023-02-21 14:40:39.366910531 +0100
@@ -9699,7 +9699,9 @@ potential_constant_expression_1 (tree t,
   (tmp, /*constexpr_context_p=*/true, flags))
return false;
}
-  return RECUR (DECL_INITIAL (tmp), want_rval);
+  if (VAR_P (tmp))
+   return RECUR (DECL_INITIAL (tmp), want_rval);
+  return RECUR (tmp, want_rval);
  
  case TRY_FINALLY_EXPR:

return (RECUR (TREE_OPERAND (t, 0), want_rval)
--- gcc/testsuite/g++.dg/cpp1y/pr108606.C.jj2023-02-21 14:44:41.292380973 
+0100
+++ gcc/testsuite/g++.dg/cpp1y/pr108606.C   2023-02-21 14:44:15.622755459 
+0100
@@ -0,0 +1,11 @@
+// PR c++/108606
+// { dg-do compile { target c++14 } }
+
+template 
+void bar (T) {}
+
+void
+foo ()
+{
+  bar ([&] (auto x) { class C { friend void baz (); }; });
+}

Jakub





Re: [PATCH] [vxworks] make wint_t and wchar_t the same distinct type

2023-02-28 Thread Jason Merrill via Gcc-patches

On 2/22/23 10:23, Alexandre Oliva wrote:

Hello, Jason,

On Feb 17, 2023, Jason Merrill  wrote:


On 2/17/23 23:04, Alexandre Oliva wrote:


We used to define WINT_TYPE to WCHAR_TYPE, so that both wint_t and
wchar_t mapped to the same underlying type, but this caused a glitch
in Wstringop-overflow-6.C: on vxworks, wint_t is typedef'ed to
wchar_t



And fixing that isn't an option?


Erhm, why do you say "fixing"?  That implies it's broken, but I don't
see anything in the C++ standards, or in the relevant bits that it
imports from the C standards, that rules out using wint_t = wchar_t in
C++.  wint_t is imported from the C standard as an integral type that
meets certain requirements, and AFAICT in C++ wchar_t is an integral
type that meets those requirements.  Am I missing something?


Now, could it be changed so that wint_t is wchar_t's underlying type
rather than wchar_t?  If the equivalence is a compliance error, we could
file a bug report with WRS and request them to fix it, but modifying
their system headers would require a copyright license they don't grant,
so we avoid doing that.  I imagine that breaking this equivalence would
have ABI implications, and even break legitimate (though unportable)
programs because of overload, specializations and whatnot, so there
would have to be very strong reasons to support a request for such a
change.


Do the integer builtins work properly if we force them to use wchar_t
instead of an integer type?


I haven't observed any regressions, I don't see any builtin functions
with wint in their signature that we even expand as builtins, and my
imagination is failing me on why an integral type such as wchar_t would
fail for wint_t, where other integral types, including wchar_t's
underlying type, would work.  Did you have any specific risks in mind
about what could go wrong?


Sorry, I think I was mostly confusing myself about how distinct wchar_t 
really is.


The patch is OK.

Jason



Re: [PATCH] Accept pmf-vbit-in-delta extra warning

2023-02-28 Thread Jason Merrill via Gcc-patches

On 2/22/23 11:03, Alexandre Oliva wrote:

On Feb 17, 2023, Jason Merrill  wrote:


On 2/17/23 23:02, Alexandre Oliva wrote:


cp_build_binary_op, that issues -Waddress warnings, issues an extra
warning on arm targets, that g++.dg/warn/Waddress-5.C does not expect
when comparing a pointer-to-member-function literal with null.

The reason for the extra warning is that, on arm targets,
TARGET_PTRMEMFUNC_VBIT_LOCATION == ptrmemfunc_vbit_in_delta, which
causes a different path to be taken, that extracts the
pointer-to-function and the delta fields (minus the vbit) and compares
each one with zero.  It's when comparing this pointer-to-function with
zero, in a recursive cp_build_binary_op, that another warning is
issued.

I suppose there should be a way to skip the warning in this recursive
call, without disabling other warnings that might be issued there, but



warning_sentinel ws (warn_address)


Oh, yeah, that will suppress the expected warning for pfn0, but isn't
there any risk whatsoever that it could suppress other -Waddress
warnings for tree operands of pfn0?


There shouldn't be an issue; those operands would have been considered 
for warning when building their subexpressions.



I see the cp_save_expr for side effects, but what if e.g. the pmfn we're
testing is an array element, and the index expression tests another pmfn
against NULL that should be warned about?  Or something else that
wouldn't have TREE_SIDE_EFFECTS, and would thus not go through
cp_save_expr.  Would we then warn for uses of both pfn0 and delta0?


Here's what I'm going to test for these concerns.  Ok to install if it
bootstraps successfully, and my concerns are unfounded?


OK.


[c++] suppress redundant null-addr warn in pfn from pmfn

From: Alexandre Oliva 

When TARGET_PTRMEMFUNC_VBIT_LOCATION == ptrmemfunc_vbit_in_delta, when
we warn about comparing a pointer-to-member-function with NULL, we
also warn about comparing the pointer-to-function extracted from it
with NULL, which is redundant.  Suppress the redundant warning.


for  gcc/cp/ChangeLog

* typeck.cc (cp_build_binary_op): Suppress redundant warning
for pfn null test in pmfn test with vbit-in-delta.
---
  gcc/cp/typeck.cc |   17 -
  1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/gcc/cp/typeck.cc b/gcc/cp/typeck.cc
index 4afb5e4f0d420..d5a3e501d8e91 100644
--- a/gcc/cp/typeck.cc
+++ b/gcc/cp/typeck.cc
@@ -5780,11 +5780,18 @@ cp_build_binary_op (const op_location_t ,
  
  	  pfn0 = pfn_from_ptrmemfunc (op0);

  delta0 = delta_from_ptrmemfunc (op0);
- e1 = cp_build_binary_op (location,
-  EQ_EXPR,
-  pfn0,
-  build_zero_cst (TREE_TYPE (pfn0)),
-  complain);
+ {
+   /* If we will warn below about a null-address compare
+  involving the orig_op0 ptrmemfunc, we'd likely also
+  warn about the pfn0's null-address compare, and
+  that would be redundant, so suppress it.  */
+   warning_sentinel ws (warn_address);
+   e1 = cp_build_binary_op (location,
+EQ_EXPR,
+pfn0,
+build_zero_cst (TREE_TYPE (pfn0)),
+complain);
+ }
  e2 = cp_build_binary_op (location,
   BIT_AND_EXPR,
   delta0,






RE: [PATCH] RISC-V: Bugfix for rvv bool mode precision adjustment

2023-02-28 Thread Li, Pan2 via Gcc-patches
Hi Richard Sandiford,

Just tried the overloaded constant divisors with below print div, it works as 
you mentioned, !

printf ("can_div_away_from_zero_p (mode_precision[E_%smode], "
 "BITS_PER_UNIT, _size[E_%smode]);\n", m->name, m->name);

template
inline typename if_nonpoly::type
can_div_away_from_zero_p (const poly_int_pod ,
 Cb b,
 poly_int_pod *quotient)
{
  if (!can_div_trunc_p (a, b, quotient))
return false;
  if (maybe_ne (*quotient * b, a))
for (unsigned int i = 0; i < N; ++i)
  quotient->coeffs[i] += (quotient->coeffs[i] < 0 ? -1 : 1);
  return true;
}

But I may have a question about the one case as below.

Assume:
a = [4, 4], b = 8.

When meet can_div_trunc_p, it will check if the reminder is constant or not, 
aka a.coeffs[i] % 8 == 0 (i >= 1). If not constant reminder, the 
can_div_trunc_p will do nothing about quotient and return false.

Thus, when a = [4, 4] for can_div_away_from_zero_p, the output *quotient will 
be unchanged, aka the mod_size[E_%smode] will be unchanged for this case. 
However, the underlying mode_size will adjust it to the real byte size, and I 
am not sure if it is by design or requires additional handling.

Pan

From: 盼 李 
Sent: Tuesday, February 28, 2023 5:59 PM
To: Richard Sandiford ; Li, Pan2 
Cc: incarnation.p.lee--- via Gcc-patches ; 
juzhe.zh...@rivai.ai; kito.ch...@sifive.com; rguent...@suse.de
Subject: Re: [PATCH] RISC-V: Bugfix for rvv bool mode precision adjustment

Understood, thanks for the explanations and suggestions. Let me have a try and 
keep you posted.

Pan

From: Richard Sandiford 
mailto:richard.sandif...@arm.com>>
Sent: Tuesday, February 28, 2023 17:50
To: Li, Pan2 mailto:pan2...@intel.com>>
Cc: 盼 李 mailto:incarnation.p@outlook.com>>; 
incarnation.p.lee--- via Gcc-patches 
mailto:gcc-patches@gcc.gnu.org>>; 
juzhe.zh...@rivai.ai 
mailto:juzhe.zh...@rivai.ai>>; 
kito.ch...@sifive.com 
mailto:kito.ch...@sifive.com>>; 
rguent...@suse.de 
mailto:rguent...@suse.de>>
Subject: Re: [PATCH] RISC-V: Bugfix for rvv bool mode precision adjustment

"Li, Pan2" mailto:pan2...@intel.com>> writes:
> Hi Richard Sandiford,
>
> After some investigation, I am not sure if it is possible to make it general 
> without any changes to exact_div. We can add one method like below to get the 
> unit poly for all possible N.
>
> template
> inline POLY_CONST_RESULT (N, Ca, Ca)
> normalize_to_unit (const poly_int_pod )
> {
>   typedef POLY_CONST_COEFF (Ca, Ca) C;
>
>   poly_int normalized = a;
>
>   if (normalized.is_constant())
> normalized.coeffs[0] = 1;
>   else
> for (unsigned int i = 0; i < N; i++)
>   POLY_SET_COEFF (C, normalized, i, 1);
>
>   return normalized;
> }
>
> And then adjust the genmodes like below to consume the unit poly.
>
>   printf ("poly_uint16 unit_poly = "
>  "normalize_to_unit (mode_precision[E_%smode]);\n", m->name);
>   printf ("if (known_lt (mode_precision[E_%smode], "
>  "unit_poly * BITS_PER_UNIT))\n", m->name);
>   printf ("  mode_size[E_%smode] = unit_poly;\n", m->name);
>
> I am not sure if it is a good idea to introduce above normalize code into 
> exact_div. Given the comment of the exact_div indicates that “/* Return A / 
> B, given that A is known to be a multiple of B. */”.

My point was that we have multiple ways of dividing poly_ints:

- exact_div, for when the caller knows that the result is always exact
- can_div_trunc_p, for truncating division (round towards 0)
- can_div_away_from_zero_p, for rounding away from 0
- ...

This is like how we have multiple division *_EXPRs on trees.

Until now, exact_div was the correct choice for modes because vector
modes didn't have padding.  We're now changing that, so my suggestion
in the review was to change the division operation that we use.
Rather than use exact_div, we should now use can_div_away_from_zero_p,
which would have the effect of rounding the quotient up.

Something like:

  if (!can_div_away_from_zero_p (mode_precision[E_%smode], BITS_PER_UNIT,
 _size[E_%smode]))
gcc_unreachable ();

But this will require a new overload of can_div_away_from_zero_p, since
the existing one is for constant quotients rather than constant divisors.

Thanks,
Richard

>
> Could you please help to share your opinion about this from the expert’s 
> perspective ? Thank you!
>
> Pan
>
> From: 盼 李 
> mailto:incarnation.p@outlook.com>>
> Sent: Monday, February 27, 2023 11:13 PM
> To: Richard Sandiford 
> mailto:richard.sandif...@arm.com>>; 
> incarnation.p.lee--- via Gcc-patches 
> mailto:gcc-patches@gcc.gnu.org>>
> Cc: juzhe.zh...@rivai.ai; 
> kito.ch...@sifive.com; 
> rguent...@suse.de; Li, Pan2 
> mailto:pan2...@intel.com>>
> 

[PATCH 1/2] gcc: xtensa: add data alignment properties to dynconfig

2023-02-28 Thread Max Filippov via Gcc-patches
gcc/
* config/xtensa/xtensa-dynconfig.cc (xtensa_get_config_v4): New
function.

include/
* xtensa-dynconfig.h (xtensa_config_v4): New struct.
(XCHAL_DATA_WIDTH, XCHAL_UNALIGNED_LOAD_EXCEPTION)
(XCHAL_UNALIGNED_STORE_EXCEPTION, XCHAL_UNALIGNED_LOAD_HW)
(XCHAL_UNALIGNED_STORE_HW, XTENSA_CONFIG_V4_ENTRY_LIST): New
definitions.
(XTENSA_CONFIG_INSTANCE_LIST): Add xtensa_config_v4 instance.
(XTENSA_CONFIG_ENTRY_LIST): Add XTENSA_CONFIG_V4_ENTRY_LIST.
---
 gcc/config/xtensa/xtensa-dynconfig.cc | 18 
 include/xtensa-dynconfig.h| 59 ++-
 2 files changed, 76 insertions(+), 1 deletion(-)

diff --git a/gcc/config/xtensa/xtensa-dynconfig.cc 
b/gcc/config/xtensa/xtensa-dynconfig.cc
index 9aea9f253c25..12dce4d1b2aa 100644
--- a/gcc/config/xtensa/xtensa-dynconfig.cc
+++ b/gcc/config/xtensa/xtensa-dynconfig.cc
@@ -182,6 +182,24 @@ const struct xtensa_config_v3 *xtensa_get_config_v3 (void)
   return config;
 }
 
+const struct xtensa_config_v4 *xtensa_get_config_v4 (void)
+{
+  static const struct xtensa_config_v4 *config;
+  static const struct xtensa_config_v4 def = {
+  16, /* xchal_data_width */
+  1,  /* xchal_unaligned_load_exception */
+  1,  /* xchal_unaligned_store_exception */
+  0,  /* xchal_unaligned_load_hw */
+  0,  /* xchal_unaligned_store_hw */
+  };
+
+  if (!config)
+config = (const struct xtensa_config_v4 *) xtensa_load_config 
("xtensa_config_v4",
+  
_config_v4,
+  );
+  return config;
+}
+
 const char * const *xtensa_get_config_strings (void)
 {
   static const char * const *config_strings;
diff --git a/include/xtensa-dynconfig.h b/include/xtensa-dynconfig.h
index 2cc15cc99112..48877ebb6b61 100644
--- a/include/xtensa-dynconfig.h
+++ b/include/xtensa-dynconfig.h
@@ -112,6 +112,15 @@ struct xtensa_config_v3
   int xchal_have_xea3;
 };
 
+struct xtensa_config_v4
+{
+  int xchal_data_width;
+  int xchal_unaligned_load_exception;
+  int xchal_unaligned_store_exception;
+  int xchal_unaligned_load_hw;
+  int xchal_unaligned_store_hw;
+};
+
 typedef struct xtensa_isa_internal_struct xtensa_isa_internal;
 
 extern const void *xtensa_load_config (const char *name,
@@ -120,6 +129,7 @@ extern const void *xtensa_load_config (const char *name,
 extern const struct xtensa_config_v1 *xtensa_get_config_v1 (void);
 extern const struct xtensa_config_v2 *xtensa_get_config_v2 (void);
 extern const struct xtensa_config_v3 *xtensa_get_config_v3 (void);
+extern const struct xtensa_config_v4 *xtensa_get_config_v4 (void);
 
 #ifdef XTENSA_CONFIG_DEFINITION
 
@@ -207,6 +217,26 @@ extern const struct xtensa_config_v3 *xtensa_get_config_v3 
(void);
 #define XCHAL_HAVE_XEA3 0
 #endif
 
+#ifndef XCHAL_DATA_WIDTH
+#define XCHAL_DATA_WIDTH 16
+#endif
+
+#ifndef XCHAL_UNALIGNED_LOAD_EXCEPTION
+#define XCHAL_UNALIGNED_LOAD_EXCEPTION 1
+#endif
+
+#ifndef XCHAL_UNALIGNED_STORE_EXCEPTION
+#define XCHAL_UNALIGNED_STORE_EXCEPTION 1
+#endif
+
+#ifndef XCHAL_UNALIGNED_LOAD_HW
+#define XCHAL_UNALIGNED_LOAD_HW 0
+#endif
+
+#ifndef XCHAL_UNALIGNED_STORE_HW
+#define XCHAL_UNALIGNED_STORE_HW 0
+#endif
+
 #define XTENSA_CONFIG_ENTRY(a) a
 
 #define XTENSA_CONFIG_V1_ENTRY_LIST \
@@ -276,6 +306,13 @@ extern const struct xtensa_config_v3 *xtensa_get_config_v3 
(void);
 XTENSA_CONFIG_ENTRY(XCHAL_HAVE_EXCLUSIVE), \
 XTENSA_CONFIG_ENTRY(XCHAL_HAVE_XEA3)
 
+#define XTENSA_CONFIG_V4_ENTRY_LIST \
+XTENSA_CONFIG_ENTRY(XCHAL_DATA_WIDTH), \
+XTENSA_CONFIG_ENTRY(XCHAL_UNALIGNED_LOAD_EXCEPTION), \
+XTENSA_CONFIG_ENTRY(XCHAL_UNALIGNED_STORE_EXCEPTION), \
+XTENSA_CONFIG_ENTRY(XCHAL_UNALIGNED_LOAD_HW), \
+XTENSA_CONFIG_ENTRY(XCHAL_UNALIGNED_STORE_HW)
+
 #define XTENSA_CONFIG_INSTANCE_LIST \
 const struct xtensa_config_v1 xtensa_config_v1 = { \
 XTENSA_CONFIG_V1_ENTRY_LIST, \
@@ -285,12 +322,16 @@ const struct xtensa_config_v2 xtensa_config_v2 = { \
 }; \
 const struct xtensa_config_v3 xtensa_config_v3 = { \
 XTENSA_CONFIG_V3_ENTRY_LIST, \
+}; \
+const struct xtensa_config_v4 xtensa_config_v4 = { \
+XTENSA_CONFIG_V4_ENTRY_LIST, \
 }
 
 #define XTENSA_CONFIG_ENTRY_LIST \
 XTENSA_CONFIG_V1_ENTRY_LIST, \
 XTENSA_CONFIG_V2_ENTRY_LIST, \
-XTENSA_CONFIG_V3_ENTRY_LIST
+XTENSA_CONFIG_V3_ENTRY_LIST, \
+XTENSA_CONFIG_V4_ENTRY_LIST
 
 #else /* XTENSA_CONFIG_DEFINITION */
 
@@ -482,6 +523,22 @@ const struct xtensa_config_v3 xtensa_config_v3 = { \
 #undef XCHAL_HAVE_XEA3
 #define XCHAL_HAVE_XEA3(xtensa_get_config_v3 
()->xchal_have_xea3)
 
+
+#undef XCHAL_DATA_WIDTH
+#define XCHAL_DATA_WIDTH   (xtensa_get_config_v4 
()->xchal_data_width)
+
+#undef XCHAL_UNALIGNED_LOAD_EXCEPTION
+#define XCHAL_UNALIGNED_LOAD_EXCEPTION (xtensa_get_config_v4 
()->xchal_unaligned_load_exception)
+
+#undef XCHAL_UNALIGNED_STORE_EXCEPTION

[PATCH 2/2] gcc: xtensa: adjust STRICT_ALIGNMENT per hardware capabilities

2023-02-28 Thread Max Filippov via Gcc-patches
gcc/
* config/xtensa/xtensa.h (STRICT_ALIGNMENT): Make it 0 when the
hardware supports both unaligned loads and stores.
---
 gcc/config/xtensa/xtensa.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/gcc/config/xtensa/xtensa.h b/gcc/config/xtensa/xtensa.h
index 058602e44ee2..49ec9147b543 100644
--- a/gcc/config/xtensa/xtensa.h
+++ b/gcc/config/xtensa/xtensa.h
@@ -143,7 +143,8 @@ along with GCC; see the file COPYING3.  If not see
 
 /* Set this nonzero if move instructions will actually fail to work
when given unaligned data.  */
-#define STRICT_ALIGNMENT 1
+#define STRICT_ALIGNMENT (!XCHAL_UNALIGNED_LOAD_HW \
+ || !XCHAL_UNALIGNED_STORE_HW)
 
 /* Promote integer modes smaller than a word to SImode.  Set UNSIGNEDP
for QImode, because there is no 8-bit load from memory with sign
-- 
2.30.2



[PATCH 4/5] Sanitize irange::num_pairs

2023-02-28 Thread Richard Biener via Gcc-patches
irange::num_pairs has odd behavior for VR_ANTI_RANGE where it
claims there are two pairs when there's actually only one.  The
following is now able to get rid of this, also fixing
irange::legacy_upper_bound which special-cased ~[-INF, up]
to return +INF instead of properly doing that when up is not +INF.

* value-range.h (irange::num_pairs): Always return
m_num_ranges.
* value-range.cc (irange::legacy_lower_bound): Remove
pair == 1 case.
(irange::legacy_upper_bound): Likewise.  Properly
special-case ~[low, +INF].
---
 gcc/value-range.cc | 6 --
 gcc/value-range.h  | 5 +
 2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/gcc/value-range.cc b/gcc/value-range.cc
index a535337c47a..143af5601c7 100644
--- a/gcc/value-range.cc
+++ b/gcc/value-range.cc
@@ -1208,7 +1208,8 @@ irange::legacy_lower_bound (unsigned pair) const
   if (m_kind == VR_ANTI_RANGE)
 {
   tree typ = type (), t;
-  if (pair == 1 || vrp_val_is_min (min ()))
+  gcc_checking_assert (pair == 0);
+  if (vrp_val_is_min (min ()))
t = wide_int_to_tree (typ, wi::to_wide (max ()) + 1);
   else
t = vrp_val_min (typ);
@@ -1235,7 +1236,8 @@ irange::legacy_upper_bound (unsigned pair) const
   if (m_kind == VR_ANTI_RANGE)
 {
   tree typ = type (), t;
-  if (pair == 1 || vrp_val_is_min (min ()))
+  gcc_checking_assert (pair == 0);
+  if (!vrp_val_is_max (max ()))
t = vrp_val_max (typ);
   else
t = wide_int_to_tree (typ, wi::to_wide (min ()) - 1);
diff --git a/gcc/value-range.h b/gcc/value-range.h
index f4ac73b499f..cfb51bad915 100644
--- a/gcc/value-range.h
+++ b/gcc/value-range.h
@@ -614,10 +614,7 @@ vrange::kind () const
 inline unsigned
 irange::num_pairs () const
 {
-  if (m_kind == VR_ANTI_RANGE)
-return constant_p () ? 2 : 1;
-  else
-return m_num_ranges;
+  return m_num_ranges;
 }
 
 inline tree
-- 
2.35.3



[PATCH 3/5] Avoid upper/lower_bound (1) on VR_ANTI_RANGE

2023-02-28 Thread Richard Biener via Gcc-patches
simplify_using_ranges::two_valued_val_range_p has odd code to
verify that an anti-range is two-valued which relies on
num_pairs () returning two for anti-ranges despite there's only
one pair and relying on lower/upper_bound treating that argument
specially.  I cannot convince myself that's even correct.
The following avoids this by using a temporary int_range<2> to
allow anti-ranges to be represented as union of two ranges.

* vr-values.cc (simplify_using_ranges::two_valued_val_range_p):
Canonicalize legacy range to int_range<2> before checking
for two valued-ness.
---
 gcc/vr-values.cc | 7 ---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/gcc/vr-values.cc b/gcc/vr-values.cc
index 365f4976a39..c188fff1f8c 100644
--- a/gcc/vr-values.cc
+++ b/gcc/vr-values.cc
@@ -2189,11 +2189,12 @@ bool
 simplify_using_ranges::two_valued_val_range_p (tree var, tree *a, tree *b,
   gimple *s)
 {
-  value_range vr = *query->get_value_range (var, s);
-  vr.normalize_symbolics ();
-  if (vr.varying_p () || vr.undefined_p ())
+  value_range r = *query->get_value_range (var, s);
+  r.normalize_symbolics ();
+  if (r.varying_p () || r.undefined_p ())
 return false;
 
+  int_range<2> vr (r);
   if ((vr.num_pairs () == 1 && vr.upper_bound () - vr.lower_bound () == 1)
   || (vr.num_pairs () == 2
  && vr.lower_bound (0) == vr.upper_bound (0)
-- 
2.35.3



[PATCH 5/5] Sanitize legacy_{lower,upper}_bound

2023-02-28 Thread Richard Biener via Gcc-patches
* value-range.h (irange::legacy_lower_bound): Remove
pair argument.
(irange::legacy_upper_bound): Likewise.
(irange::lower_bound): Commonize asserts, adjust.
(irange::upper_bound): Likewise.
* value-range.cc (irange::legacy_lower_bound): Remove
asserts done in the caller.  Use min/max consistently.
(irange::legacy_upper_bound): Likewise.
---
 gcc/value-range.cc | 24 
 gcc/value-range.h  | 18 --
 2 files changed, 20 insertions(+), 22 deletions(-)

diff --git a/gcc/value-range.cc b/gcc/value-range.cc
index 143af5601c7..eaf6139a1b5 100644
--- a/gcc/value-range.cc
+++ b/gcc/value-range.cc
@@ -1190,60 +1190,52 @@ irange::verify_range ()
 }
 }
 
-// Return the lower bound for a sub-range.  PAIR is the sub-range in
-// question.
+// Return the lower bound for a sub-range.
 
 wide_int
-irange::legacy_lower_bound (unsigned pair) const
+irange::legacy_lower_bound () const
 {
   gcc_checking_assert (legacy_mode_p ());
   if (symbolic_p ())
 {
   value_range numeric_range (*this);
   numeric_range.normalize_symbolics ();
-  return numeric_range.legacy_lower_bound (pair);
+  return numeric_range.legacy_lower_bound ();
 }
-  gcc_checking_assert (m_num_ranges > 0);
-  gcc_checking_assert (pair + 1 <= num_pairs ());
   if (m_kind == VR_ANTI_RANGE)
 {
   tree typ = type (), t;
-  gcc_checking_assert (pair == 0);
   if (vrp_val_is_min (min ()))
t = wide_int_to_tree (typ, wi::to_wide (max ()) + 1);
   else
t = vrp_val_min (typ);
   return wi::to_wide (t);
 }
- return wi::to_wide (tree_lower_bound (pair));
+ return wi::to_wide (min ());
 }
 
-// Return the upper bound for a sub-range.  PAIR is the sub-range in
-// question.
+// Return the upper bound for a sub-range.
 
 wide_int
-irange::legacy_upper_bound (unsigned pair) const
+irange::legacy_upper_bound () const
 {
   gcc_checking_assert (legacy_mode_p ());
   if (symbolic_p ())
 {
   value_range numeric_range (*this);
   numeric_range.normalize_symbolics ();
-  return numeric_range.legacy_upper_bound (pair);
+  return numeric_range.legacy_upper_bound ();
 }
-  gcc_checking_assert (m_num_ranges > 0);
-  gcc_checking_assert (pair + 1 <= num_pairs ());
   if (m_kind == VR_ANTI_RANGE)
 {
   tree typ = type (), t;
-  gcc_checking_assert (pair == 0);
   if (!vrp_val_is_max (max ()))
t = vrp_val_max (typ);
   else
t = wide_int_to_tree (typ, wi::to_wide (min ()) - 1);
   return wi::to_wide (t);
 }
-  return wi::to_wide (tree_upper_bound (pair));
+  return wi::to_wide (max ());
 }
 
 bool
diff --git a/gcc/value-range.h b/gcc/value-range.h
index cfb51bad915..ed8163ed218 100644
--- a/gcc/value-range.h
+++ b/gcc/value-range.h
@@ -193,8 +193,8 @@ protected:
   void legacy_union (irange *, const irange *);
   void legacy_intersect (irange *, const irange *);
   void verify_range ();
-  wide_int legacy_lower_bound (unsigned = 0) const;
-  wide_int legacy_upper_bound (unsigned) const;
+  wide_int legacy_lower_bound () const;
+  wide_int legacy_upper_bound () const;
   int value_inside_range (tree) const;
   bool maybe_anti_range () const;
   void copy_to_legacy (const irange &);
@@ -918,10 +918,13 @@ irange::set_varying (tree type)
 inline wide_int
 irange::lower_bound (unsigned pair) const
 {
-  if (legacy_mode_p ())
-return legacy_lower_bound (pair);
   gcc_checking_assert (m_num_ranges > 0);
   gcc_checking_assert (pair + 1 <= num_pairs ());
+  if (legacy_mode_p ())
+{
+  gcc_checking_assert (pair == 0);
+  return legacy_lower_bound ();
+}
   return wi::to_wide (tree_lower_bound (pair));
 }
 
@@ -931,10 +934,13 @@ irange::lower_bound (unsigned pair) const
 inline wide_int
 irange::upper_bound (unsigned pair) const
 {
-  if (legacy_mode_p ())
-return legacy_upper_bound (pair);
   gcc_checking_assert (m_num_ranges > 0);
   gcc_checking_assert (pair + 1 <= num_pairs ());
+  if (legacy_mode_p ())
+{
+  gcc_checking_assert (pair == 0);
+  return legacy_upper_bound ();
+}
   return wi::to_wide (tree_upper_bound (pair));
 }
 
-- 
2.35.3


[PATCH 2/5] fend off anti-ranges from value-range-storage

2023-02-28 Thread Richard Biener via Gcc-patches
The following avoids the need to special-case storage requirement
and copying for irange_storage_slot by making sure we canonicalize
such ranges to int_range<2>.

* tree-ssanames.cc (range_info_set_range): If receiving
an anti-range recurse with a temporary int_range<2>.
* value-range-storage.cc (irange_storage_slot::size):
Assert we're not asking for a VR_ANTI_RANGE.
---
 gcc/tree-ssanames.cc   | 3 +++
 gcc/value-range-storage.cc | 2 +-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/gcc/tree-ssanames.cc b/gcc/tree-ssanames.cc
index 08aa166ef17..23468958586 100644
--- a/gcc/tree-ssanames.cc
+++ b/gcc/tree-ssanames.cc
@@ -127,6 +127,9 @@ range_info_get_range (tree name, vrange )
 inline bool
 range_info_set_range (tree name, const vrange )
 {
+  if (r.kind () == VR_ANTI_RANGE)
+return range_info_set_range (name, int_range<2> (as_a  (r)));
+
   if (!range_info_p (name) || !range_info_fits_p (name, r))
 {
   if (range_info_p (name))
diff --git a/gcc/value-range-storage.cc b/gcc/value-range-storage.cc
index bf23f6dd476..79ab2921a03 100644
--- a/gcc/value-range-storage.cc
+++ b/gcc/value-range-storage.cc
@@ -182,7 +182,7 @@ irange_storage_slot::get_irange (irange , tree type) const
 size_t
 irange_storage_slot::size (const irange )
 {
-  gcc_checking_assert (!r.undefined_p ());
+  gcc_checking_assert (!r.undefined_p () && r.kind () != VR_ANTI_RANGE);
 
   unsigned prec = TYPE_PRECISION (r.type ());
   unsigned n = num_wide_ints_needed (r);
-- 
2.35.3



[PATCH 1/5] fix anti-range dumping

2023-02-28 Thread Richard Biener via Gcc-patches
when vrange_printer::visit gets a VR_ANTI_RANGE it should print it
as such, not just print the first element as range.  When
irange::num_pairs and upper/lower_bound are fixed that would no
longer print a canonicalized anti-range.

* value-range-pretty-print.cc (vrange_printer::visit):
Handle all VR_ANTI_RANGE specially.
---
 gcc/value-range-pretty-print.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/value-range-pretty-print.cc b/gcc/value-range-pretty-print.cc
index d20e2562431..23817f48a3d 100644
--- a/gcc/value-range-pretty-print.cc
+++ b/gcc/value-range-pretty-print.cc
@@ -64,7 +64,7 @@ vrange_printer::visit (const irange ) const
   return;
 }
   // Handle legacy symbolics.
-  if (!r.constant_p ())
+  if (!r.constant_p () || r.kind () == VR_ANTI_RANGE)
 {
   if (r.kind () == VR_ANTI_RANGE)
pp_character (pp, '~');
-- 
2.35.3



[PATCH 0/5] Fix irange::legacy_upper_bound

2023-02-28 Thread Richard Biener via Gcc-patches


This series fixes irange::legacy_upper_bound behavior for VR_ANTI_RANGE
(actually [4/5] does).  It turns out the interesting behavior is
relied on in multiple spots (maybe not so many but fixing things lead
to fixing more things when trying to understand what happens).

So apart from fixing this possible wrong-code issue it removes
the odd behavior of calling the functions with pair == 1 which is
because VR_ANTI_RANGE get num_pairs () == 2 which is because of
"reasons" (the other parts of the series show what I ran into).

I have bootstrapped and tested patches 1-4 together on 
x86_64-unknown-linux-gnu and am now testing patch 5 ontop which
looked like natural cleanup here.

I didn't try to construct a wrong-code testcase, ranger probably
never builds legacy ranges, so I'm not sure how easy it is to
get an anti-range we end up querying upper_bound on.

OK for trunk or stage1?

Thanks,
Richard.


Re: [PATCH] lto: Fix up lto_fixup_prevailing_type [PR108910]

2023-02-28 Thread Richard Biener via Gcc-patches
On Tue, 28 Feb 2023, Jakub Jelinek wrote:

> On Tue, Feb 28, 2023 at 12:05:20PM +, Richard Biener via Gcc-patches 
> wrote:
> > > + p = _POINTER_TO (TREE_TYPE (t));
> > > +  else if (TREE_CODE (t) == REFERENCE_TYPE && !TYPE_REF_IS_RVALUE 
> > > (t))
> > > + p = _REFERENCE_TO (TREE_TYPE (t));
> > > +  if (p)
> > > + {
> > > +   tree t2;
> > > +   for (t2 = *p; t2; t2 = TYPE_NEXT_PTR_TO (t2))
> > > + if (TYPE_MODE (t2) == TYPE_MODE (t)
> > > + && TYPE_REF_CAN_ALIAS_ALL (t2) == TYPE_REF_CAN_ALIAS_ALL (t))
> > > +   break;
> > 
> > Can we elide this searching please?  Having duplicated should be harmless
> > unless proved otherwise.
> 
> I've posted 2 patches (one inlined, another attached), the second one
> didn't do this at all.

Ah, didn't notice that.

> Having (too many) duplicates would be harmful because build_pointer_type
> etc. walk up to the whole length of list all the time.  When the list
> length is bounded (say at most 2 modes - ptr_mode/Pmode, times 2 (the
> can alias all bool), then it doesn't hurt, if it could in theory be
> arbitrarily long, it would be a compile time problem.
> But given that we with the !TYPE_ATTRIBUTES/!TYPE_REF_IS_RVALUE change
> don't have a testcase that would show it is a problem actually ever
> encounterable, the second patch without this is fine I think.

I agree the attached patch is fine.

Richard.


[Patch] OpenMP: Ignore side-effects when finding struct comps [PR108545]

2023-02-28 Thread Tobias Burnus

(This is marked as P1 regression)

Since the structure handling updates, a hash is now used to find expressions 
which are identical;
unfortunately, this mishandles 'volatile' vars as expressions involving them 
are not regarded
as identical. This leads to spurious *multiple* 'map(struct:x' that later 
causes an ICE.
(For details, see also the PR, https://gcc.gnu.org/PR108545 )

OK for mainline?

Tobias
-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955
OpenMP: Ignore side-effects when finding struct comps [PR108545]

With volatile, two 'x.data' comp refs aren't regarded as identical,
causing that the two items in the first map of
  map(to:x.a, x.a.data) map(pset: x.a.data)
end up in separate 'map(struct:x)', which will cause a later ICE.

Solution: Ignore side effects when checking the operands in the hash
for being equal. (Do so by creating a variant of tree_operand_hash
that calls operand_equal_p with OEP_MATCH_SIDE_EFFECTS.)

gcc/ChangeLog:

	PR middle-end/108545
	* gimplify.cc (struct tree_operand_sideeff_hash): New.
	omp_index_mapping_groups_1, omp_index_mapping_groups,
	omp_reindex_mapping_groups, omp_mapped_by_containing_struct,
	omp_tsort_mapping_groups_1, omp_tsort_mapping_groups,
	oacc_resolve_clause_dependencies, omp_build_struct_sibling_lists,
	gimplify_scan_omp_clauses): Use tree_operand_sideeff_hash instead
	of tree_operand_hash.


gcc/testsuite/ChangeLog:

	PR middle-end/108545
	* c-c++-common/gomp/map-8.c: New test.
	* gfortran.dg/gomp/map-9.f90: New test.

 gcc/gimplify.cc  | 55 ++--
 gcc/testsuite/c-c++-common/gomp/map-8.c  | 19 +++
 gcc/testsuite/gfortran.dg/gomp/map-9.f90 | 13 
 3 files changed, 70 insertions(+), 17 deletions(-)

diff --git a/gcc/gimplify.cc b/gcc/gimplify.cc
index 96845154a92..3c0bb1c987a 100644
--- a/gcc/gimplify.cc
+++ b/gcc/gimplify.cc
@@ -8958,6 +8958,28 @@ enum omp_tsort_mark {
   PERMANENT
 };
 
+/* Hash for trees based on operand_equal_p.
+   Like tree_operand_hash but accepts side effects.  */
+struct tree_operand_sideeff_hash : ggc_ptr_hash 
+{
+  static inline hashval_t hash (const value_type &);
+  static inline bool equal (const value_type &,
+			const compare_type &);
+};
+
+inline hashval_t
+tree_operand_sideeff_hash::hash (const value_type )
+{
+  return iterative_hash_expr (t, 0);
+}
+
+inline bool
+tree_operand_sideeff_hash::equal (const value_type ,
+  const compare_type )
+{
+  return operand_equal_p (t1, t2, OEP_MATCH_SIDE_EFFECTS);
+}
+
 /* A group of OMP_CLAUSE_MAP nodes that correspond to a single "map"
clause.  */
 
@@ -9400,10 +9422,10 @@ omp_group_base (omp_mapping_group *grp, unsigned int *chained,
 }
 
 /* Given a vector of omp_mapping_groups, build a hash table so we can look up
-   nodes by tree_operand_hash.  */
+   nodes by tree_operand_sideeff_hash.  */
 
 static void
-omp_index_mapping_groups_1 (hash_map *grpmap,
 			vec *groups,
 			tree reindex_sentinel)
@@ -9432,7 +9454,6 @@ omp_index_mapping_groups_1 (hash_map *
+static hash_map *
 omp_index_mapping_groups (vec *groups)
 {
-  hash_map *grpmap
-= new hash_map;
+  hash_map *grpmap
+= new hash_map;
 
   omp_index_mapping_groups_1 (grpmap, groups, NULL_TREE);
 
@@ -9502,14 +9523,14 @@ omp_index_mapping_groups (vec *groups)
so, we can do the reindex operation in two parts, on the processed and
then the unprocessed halves of the list.  */
 
-static hash_map *
+static hash_map *
 omp_reindex_mapping_groups (tree *list_p,
 			vec *groups,
 			vec *processed_groups,
 			tree sentinel)
 {
-  hash_map *grpmap
-= new hash_map;
+  hash_map *grpmap
+= new hash_map;
 
   processed_groups->truncate (0);
 
@@ -9550,7 +9571,7 @@ omp_containing_struct (tree expr)
that maps that structure, if present.  */
 
 static bool
-omp_mapped_by_containing_struct (hash_map *grpmap,
  tree decl,
  omp_mapping_group **mapped_by_group)
@@ -9590,7 +9611,7 @@ omp_mapped_by_containing_struct (hash_map *groups,
-			hash_map
+			hash_map
 			  *grpmap,
 			omp_mapping_group *grp)
 {
@@ -9670,7 +9691,7 @@ omp_tsort_mapping_groups_1 (omp_mapping_group ***outlist,
 
 static omp_mapping_group *
 omp_tsort_mapping_groups (vec *groups,
-			  hash_map
+			  hash_map
 			*grpmap)
 {
   omp_mapping_group *grp, *outlist = NULL, **cursor;
@@ -9986,7 +10007,7 @@ omp_check_mapping_compatibility (location_t loc,
 
 void
 oacc_resolve_clause_dependencies (vec *groups,
-  hash_map *grpmap)
 {
   int i;
@@ -10520,8 +10541,8 @@ static bool
 omp_build_struct_sibling_lists (enum tree_code code,
 enum omp_region_type region_type,
 vec *groups,
-hash_map
-  **grpmap,
+hash_map **grpmap,
 tree *list_p)
 {
   unsigned i;
@@ -10747,7 +10768,7 @@ 

Re: RISC-V: Add divmod instruction support

2023-02-28 Thread Maciej W. Rozycki
On Mon, 20 Feb 2023, Alexander Monakov wrote:

> > >  That's the kind of stuff I'd expect to happen at the tree level though,
> > > before expand.
> > 
> > The GIMPLE pass forming divmod could indeed choose to emit the
> > div + mul/sub sequence instead if an actual divmod pattern isn't available.
> > It could even generate some fake mul/sub/mod RTXen to cost the two
> > variants against each other but I seriously doubt any uarch that implements
> > division/modulo has a slower mul/sub.
> 
> Making a correct decision requires knowing to which degree the divider is
> pipelined, and costs won't properly reflect that. If the divider accepts
> a new div/mod instruction every couple of cycles, it's faster to just issue
> a div followed by a mod with the same operands.

 I guess there exist microarchitectures that have their divider pipelined, 
but I haven't come across one.  I think division is not an operation that 
is commonly optimised for in hw RTL, whether integer or FP, not at least 
in embedded applications, and given the nature of the operation I believe 
it would be particularly costly in terms of silicon.  I've seen latencies 
and repeat rates of up to 50 quoted in CPU documentation even for 32-bit 
integer division while multiplication had a latency of 5 and a repeat rate 
of 1 (i.e. fully pipelined) for the same microarchitecture.

 So (taking the data dependency into account) the latency of a DIV + MOD 
operation would be 100 for said microarchitecture (and the same repeat 
rate), while a DIV + MUL/SUB sequence would have a latency of 56 and a 
repeat rate of 50.  Quite an improvement.

> Therefore I think in this case it's fair for GIMPLE level to just check if
> the divmod pattern is available, and let the target do the fine tuning via
> the divmod expander.

 Hmm, we have DFA scheduling available that is supposed to give latencies 
and repeat rates for individual operations.  Wouldn't it be possible to 
get hold of this information?

 If we cannot make use of this information at the GIMPLE level (which I'd 
consider regrettable), then I think that maybe we need a target hook to 
say division is cheap that would prevent a DIV + MOD to DIV + MUL/SUB 
transformation from happening, perhaps off by default.  I think it would 
be regrettable too if every backend for targets/subtargets that do not 
have a hardware DIVMOD operation (e.g. MIPS has one at certain ISA levels 
only) or a pipelined division would have to code the transformation by 
hand.

> It would make sense for tree-ssa-mathopts to emit div + mul/sub when neither
> 'divmod' nor 'mod' patterns are available, because RTL expansion will do the
> same, just later, and we'll rely on RTL CSE to clean up the redundant div.

 That as well.

> But RISC-V has both 'div' and 'mod', so as I tried to explain in the first
> paragraph we should let the target decide.

 Still I think it would be best if RISC-V ought to supply a divmod pattern 
only where fused DIV/MOD execution is present in the microarchitecture.

  Maciej


Re: [PATCH] lto: Fix up lto_fixup_prevailing_type [PR108910]

2023-02-28 Thread Jakub Jelinek via Gcc-patches
On Tue, Feb 28, 2023 at 12:05:20PM +, Richard Biener via Gcc-patches wrote:
> > +   p = _POINTER_TO (TREE_TYPE (t));
> > +  else if (TREE_CODE (t) == REFERENCE_TYPE && !TYPE_REF_IS_RVALUE (t))
> > +   p = _REFERENCE_TO (TREE_TYPE (t));
> > +  if (p)
> > +   {
> > + tree t2;
> > + for (t2 = *p; t2; t2 = TYPE_NEXT_PTR_TO (t2))
> > +   if (TYPE_MODE (t2) == TYPE_MODE (t)
> > +   && TYPE_REF_CAN_ALIAS_ALL (t2) == TYPE_REF_CAN_ALIAS_ALL (t))
> > + break;
> 
> Can we elide this searching please?  Having duplicated should be harmless
> unless proved otherwise.

I've posted 2 patches (one inlined, another attached), the second one
didn't do this at all.
Having (too many) duplicates would be harmful because build_pointer_type
etc. walk up to the whole length of list all the time.  When the list
length is bounded (say at most 2 modes - ptr_mode/Pmode, times 2 (the
can alias all bool), then it doesn't hurt, if it could in theory be
arbitrarily long, it would be a compile time problem.
But given that we with the !TYPE_ATTRIBUTES/!TYPE_REF_IS_RVALUE change
don't have a testcase that would show it is a problem actually ever
encounterable, the second patch without this is fine I think.
> 
> OK with that change.

Jakub



Re: [PATCH] lto: Fix up lto_fixup_prevailing_type [PR108910]

2023-02-28 Thread Richard Biener via Gcc-patches
On Tue, 28 Feb 2023, Jakub Jelinek wrote:

> On Tue, Feb 28, 2023 at 09:43:37AM +, Richard Biener wrote:
> > We try to make sure to put all built types into the type merging
> > machinery, so I think it shouldn't happen - but then I cannot rule
> > it out.  I'm also not sure whether duplicates would really pose
> > a problem here (other than waste).  If we want to make sure we should
> > probably enhance verify_types to verify the TYPE_POINTER_TO chain ...
> > 
> > > I'll try to debug why those user aligned types become TYPE_MAIN_VARIANT
> > > though...
> 
> Ok, so this happens already in the FEs when trying to add the attribute:
> #0  build_distinct_type_copy (type=) at 
> ../../gcc/tree.cc:5735
> #1  0x00c2327b in build_type_attribute_qual_variant 
> (otype=, attribute=, 
> quals=0) at ../../gcc/attribs.cc:1298
> #2  0x00c245b0 in build_type_attribute_variant (ttype= 0x7fffea0219d8>, attribute=) at 
> ../../gcc/attribs.cc:1591
> #3  0x00c22325 in decl_attributes (node=0x7fffd008, 
> attributes=, flags=1, last_decl=) at 
> ../../gcc/attribs.cc:964
> 
> So, perhaps the !TYPE_QUALS (t) could be just an assert, but maybe next to
> the !TYPE_USER_ALIGN (t) (or just instead of?) we need !TYPE_ATTRIBUTES (t).
> Because addition of attributes (but anything else that causes
> build_distinct_type_copy rather than build_variant_type_copy) will create
> new TYPE_MAIN_VARIANTS.
> Looking around, TYPE_REF_IS_RVALUE references also create distinct types,
> and while the C++ FE sticks them into the TYPE_REFERENCE_TO chain, it
> ensures they go after the corresponding !TYPE_REF_IS_RVALUE entry, so
> perhaps LTO should !TYPE_REF_IS_RVALUE for REFERENCE_TYPEs.
> Other uses of build_distinct_type_copy in the FEs are mostly related to
> ARRAY_TYPEs (in C FE as well as c-common).  Asan uses it solely on integral
> types, etc.  For attributes, big question is if it when we set
> *no_addr_attrs = true we still tweak some things on the type (not in place)
> or not.
> 
> So, here are two possible variant patches which fix the ICE on the
> testcase too.
> 
> 2023-02-28  Jakub Jelinek  
> 
>   PR target/108910
>   * lto-common.cc (lto_fixup_prevailing_type): Don't add t to
>   TYPE_POINTER_TO or TYPE_REFERENCE_TO chain if it has 
>   TYPE_ATTRIBUTES, or is TYPE_REF_IS_RVALUE, or some other type
>   with the same TYPE_MODE and TYPE_REF_CAN_ALIAS_ALL flag is already
>   present.
> 
> --- gcc/lto/lto-common.cc.jj  2023-01-16 11:52:16.165732856 +0100
> +++ gcc/lto/lto-common.cc 2023-02-28 12:30:37.014471255 +0100
> @@ -984,21 +984,36 @@ lto_fixup_prevailing_type (tree t)
>TYPE_NEXT_VARIANT (t) = TYPE_NEXT_VARIANT (mv);
>TYPE_NEXT_VARIANT (mv) = t;
>  }
> -
> -  /* The following reconstructs the pointer chains
> - of the new pointed-to type if we are a main variant.  We do
> - not stream those so they are broken before fixup.  */
> -  if (TREE_CODE (t) == POINTER_TYPE
> -  && TYPE_MAIN_VARIANT (t) == t)
> -{
> -  TYPE_NEXT_PTR_TO (t) = TYPE_POINTER_TO (TREE_TYPE (t));
> -  TYPE_POINTER_TO (TREE_TYPE (t)) = t;
> -}
> -  else if (TREE_CODE (t) == REFERENCE_TYPE
> -&& TYPE_MAIN_VARIANT (t) == t)
> +  else if (!TYPE_ATTRIBUTES (t))
>  {
> -  TYPE_NEXT_REF_TO (t) = TYPE_REFERENCE_TO (TREE_TYPE (t));
> -  TYPE_REFERENCE_TO (TREE_TYPE (t)) = t;
> +  /* The following reconstructs the pointer chains
> +  of the new pointed-to type if we are a main variant.  We do
> +  not stream those so they are broken before fixup.
> +  Don't add it if despite being main variant it has
> +  attributes (then it was created with build_distinct_type_copy).
> +  Similarly don't add TYPE_REF_IS_RVALUE REFERENCE_TYPEs.
> +  Don't add it if there is something in the chain already.  */
> +  tree *p = NULL;
> +  if (TREE_CODE (t) == POINTER_TYPE)
> + p = _POINTER_TO (TREE_TYPE (t));
> +  else if (TREE_CODE (t) == REFERENCE_TYPE && !TYPE_REF_IS_RVALUE (t))
> + p = _REFERENCE_TO (TREE_TYPE (t));
> +  if (p)
> + {
> +   tree t2;
> +   for (t2 = *p; t2; t2 = TYPE_NEXT_PTR_TO (t2))
> + if (TYPE_MODE (t2) == TYPE_MODE (t)
> + && TYPE_REF_CAN_ALIAS_ALL (t2) == TYPE_REF_CAN_ALIAS_ALL (t))
> +   break;

Can we elide this searching please?  Having duplicated should be harmless
unless proved otherwise.

OK with that change.

Richard.

> +   if (t2 == NULL_TREE)
> + {
> +   if (TREE_CODE (t) == POINTER_TYPE)
> + TYPE_NEXT_PTR_TO (t) = *p;
> +   else
> + TYPE_NEXT_REF_TO (t) = *p;
> +   *p = t;
> + }
> + }
>  }
>  }
>  
> 
> 
>   Jakub
> 

-- 
Richard Biener 
SUSE Software Solutions Germany GmbH, Frankenstrasse 146, 90461 Nuernberg,
Germany; GF: Ivo Totev, Andrew Myers, Andrew McDonald, Boudien Moerman;
HRB 36809 (AG Nuernberg)


Re: [PATCH 1/2]middle-end: Fix wrong overmatching of div-bitmask by using new optabs [PR108583]

2023-02-28 Thread Richard Sandiford via Gcc-patches
Tamar Christina  writes:
>> -Original Message-
>> From: Richard Sandiford 
>> Sent: Tuesday, February 28, 2023 11:09 AM
>> To: Tamar Christina 
>> Cc: Tamar Christina via Gcc-patches ; nd
>> ; rguent...@suse.de; j...@ventanamicro.com
>> Subject: Re: [PATCH 1/2]middle-end: Fix wrong overmatching of div-bitmask
>> by using new optabs [PR108583]
>> 
>> Tamar Christina  writes:
>> >> -Original Message-
>> >> From: Richard Sandiford 
>> >> Sent: Monday, February 27, 2023 9:33 PM
>> >> To: Tamar Christina via Gcc-patches 
>> >> Cc: Tamar Christina ; nd ;
>> >> rguent...@suse.de; j...@ventanamicro.com
>> >> Subject: Re: [PATCH 1/2]middle-end: Fix wrong overmatching of
>> >> div-bitmask by using new optabs [PR108583]
>> >>
>> >> Tamar Christina via Gcc-patches  writes:
>> >> >> -Original Message-
>> >> >> From: Richard Sandiford 
>> >> >> Sent: Monday, February 27, 2023 12:12 PM
>> >> >> To: Tamar Christina 
>> >> >> Cc: Tamar Christina via Gcc-patches ; nd
>> >> >> ; rguent...@suse.de; j...@ventanamicro.com
>> >> >> Subject: Re: [PATCH 1/2]middle-end: Fix wrong overmatching of
>> >> >> div-bitmask by using new optabs [PR108583]
>> >> >>
>> >> >> Tamar Christina  writes:
>> >> >> > Hi,
>> >> >> >
>> >> >> >> > I avoided open coding it with add and shift because it
>> >> >> >> > creates a
>> >> >> >> > 4 instructions (and shifts which are typically slow)
>> >> >> >> > dependency chain instead of a load and multiply.  This
>> >> >> >> > change, unless the target is known to optimize it further is
>> >> >> >> > unlikely to be beneficial.  And by the time we get to costing
>> >> >> >> > the only alternative is to undo the existing pattern and
>> >> >> >> so you lose the general shift optimization.
>> >> >> >> >
>> >> >> >> > So it seemed unwise to open code as shifts, given the codegen
>> >> >> >> > out of the vectorizer would be degenerate for most targets or
>> >> >> >> > one needs the more complicated route of costing during
>> >> >> >> > pattern
>> >> matching already.
>> >> >> >>
>> >> >> >> Hmm, OK.  That seems like a cost-model thing though, rather
>> >> >> >> than something that should be exposed through optabs.  And I
>> >> >> >> imagine the open-coded version would still be better than
>> >> >> >> nothing on targets without
>> >> >> highpart multiply.
>> >> >> >>
>> >> >> >> So how about replacing the hook with one that simply asks
>> >> >> >> whether division through highpart multiplication is preferred
>> >> >> >> over the add/shift
>> >> >> sequence?
>> >> >> >> (Unfortunately it's not going to be possible to work that out
>> >> >> >> from existing
>> >> >> >> information.)
>> >> >> >
>> >> >> > So this doesn't work for SVE.  For SVE the multiplication
>> >> >> > widening pass introduces FMAs at gimple level.  So in the cases
>> >> >> > where the operation is fed from a widening multiplication we end
>> >> >> > up generating
>> >> FMA.
>> >> >> If that was it I could have matched FMA.
>> >> >> >
>> >> >> > But it also pushes the multiplication in the second operand
>> >> >> > because it no longer has a mul to share the results with.
>> >> >> >
>> >> >> > In any case, the gimple code is transformed into
>> >> >> >
>> >> >> > vect__3.8_122 = .MASK_LOAD (_29, 8B, loop_mask_121);
>> >> >> > vect_patt_57.9_123 = (vector([8,8]) unsigned short)
>> >> >> > vect__3.8_122;
>> >> >> > vect_patt_64.11_127 = .FMA (vect_patt_57.9_123, vect_cst__124, {
>> >> >> > 257, ... });
>> >> >> > vect_patt_65.12_128 = vect_patt_64.11_127 >> 8;
>> >> >> > vect_patt_66.13_129 = .FMA (vect_patt_57.9_123, vect_cst__124,
>> >> >> > vect_patt_65.12_128);
>> >> >> > vect_patt_62.14_130 = vect_patt_66.13_129 >> 8;
>> >> >> > vect_patt_68.15_131 = (vector([8,8]) unsigned charD.21)
>> >> >> > vect_patt_62.14_130;
>> >> >> >
>> >> >> > This transformation is much worse than the original code, it
>> >> >> > extended the dependency chain with another expensive
>> >> >> > instruction. I can try to correct this in RTL by matching FMA
>> >> >> > and shift and splitting into MUL +
>> >> >> ADDHNB and hope CSE takes care of the extra mul.
>> >> >> >
>> >> >> > But this seems like a hack, and it's basically undoing the
>> >> >> > earlier transformation.  It seems to me that the open coding is a bad
>> idea.
>> >> >>
>> >> >> Could you post the patch that gives this result?  I'll have a poke 
>> >> >> around.
>> >> >
>> >> > Sure, I'll post the new series, it needs all of them.
>> >>
>> >> Thanks.  Which testcase did you use to get the above?
>> >>
>> >
>> > #include 
>> >
>> > #define N 16
>> > #define TYPE uint8_t
>> >
>> > void fun3(TYPE* restrict pixel, TYPE level, int n) {
>> >   for (int i = 0; i < (n & -16); i+=1)
>> > pixel[i] = (pixel[i] * level) / 0xff; }
>> 
>> Thanks.  In that testcase, isn't the FMA handling an anti-optimisation in its
>> own right though?  It's duplicating a multiplication into two points on a
>> dependency chain.
>
> Most definitely, that's what I meant above. The "optimization" doesn't 

Re: [PATCH] lto: Fix up lto_fixup_prevailing_type [PR108910]

2023-02-28 Thread Jakub Jelinek via Gcc-patches
On Tue, Feb 28, 2023 at 09:43:37AM +, Richard Biener wrote:
> We try to make sure to put all built types into the type merging
> machinery, so I think it shouldn't happen - but then I cannot rule
> it out.  I'm also not sure whether duplicates would really pose
> a problem here (other than waste).  If we want to make sure we should
> probably enhance verify_types to verify the TYPE_POINTER_TO chain ...
> 
> > I'll try to debug why those user aligned types become TYPE_MAIN_VARIANT
> > though...

Ok, so this happens already in the FEs when trying to add the attribute:
#0  build_distinct_type_copy (type=) at 
../../gcc/tree.cc:5735
#1  0x00c2327b in build_type_attribute_qual_variant 
(otype=, attribute=, 
quals=0) at ../../gcc/attribs.cc:1298
#2  0x00c245b0 in build_type_attribute_variant (ttype=, attribute=) at 
../../gcc/attribs.cc:1591
#3  0x00c22325 in decl_attributes (node=0x7fffd008, 
attributes=, flags=1, last_decl=) at 
../../gcc/attribs.cc:964

So, perhaps the !TYPE_QUALS (t) could be just an assert, but maybe next to
the !TYPE_USER_ALIGN (t) (or just instead of?) we need !TYPE_ATTRIBUTES (t).
Because addition of attributes (but anything else that causes
build_distinct_type_copy rather than build_variant_type_copy) will create
new TYPE_MAIN_VARIANTS.
Looking around, TYPE_REF_IS_RVALUE references also create distinct types,
and while the C++ FE sticks them into the TYPE_REFERENCE_TO chain, it
ensures they go after the corresponding !TYPE_REF_IS_RVALUE entry, so
perhaps LTO should !TYPE_REF_IS_RVALUE for REFERENCE_TYPEs.
Other uses of build_distinct_type_copy in the FEs are mostly related to
ARRAY_TYPEs (in C FE as well as c-common).  Asan uses it solely on integral
types, etc.  For attributes, big question is if it when we set
*no_addr_attrs = true we still tweak some things on the type (not in place)
or not.

So, here are two possible variant patches which fix the ICE on the
testcase too.

2023-02-28  Jakub Jelinek  

PR target/108910
* lto-common.cc (lto_fixup_prevailing_type): Don't add t to
TYPE_POINTER_TO or TYPE_REFERENCE_TO chain if it has 
TYPE_ATTRIBUTES, or is TYPE_REF_IS_RVALUE, or some other type
with the same TYPE_MODE and TYPE_REF_CAN_ALIAS_ALL flag is already
present.

--- gcc/lto/lto-common.cc.jj2023-01-16 11:52:16.165732856 +0100
+++ gcc/lto/lto-common.cc   2023-02-28 12:30:37.014471255 +0100
@@ -984,21 +984,36 @@ lto_fixup_prevailing_type (tree t)
   TYPE_NEXT_VARIANT (t) = TYPE_NEXT_VARIANT (mv);
   TYPE_NEXT_VARIANT (mv) = t;
 }
-
-  /* The following reconstructs the pointer chains
- of the new pointed-to type if we are a main variant.  We do
- not stream those so they are broken before fixup.  */
-  if (TREE_CODE (t) == POINTER_TYPE
-  && TYPE_MAIN_VARIANT (t) == t)
-{
-  TYPE_NEXT_PTR_TO (t) = TYPE_POINTER_TO (TREE_TYPE (t));
-  TYPE_POINTER_TO (TREE_TYPE (t)) = t;
-}
-  else if (TREE_CODE (t) == REFERENCE_TYPE
-  && TYPE_MAIN_VARIANT (t) == t)
+  else if (!TYPE_ATTRIBUTES (t))
 {
-  TYPE_NEXT_REF_TO (t) = TYPE_REFERENCE_TO (TREE_TYPE (t));
-  TYPE_REFERENCE_TO (TREE_TYPE (t)) = t;
+  /* The following reconstructs the pointer chains
+of the new pointed-to type if we are a main variant.  We do
+not stream those so they are broken before fixup.
+Don't add it if despite being main variant it has
+attributes (then it was created with build_distinct_type_copy).
+Similarly don't add TYPE_REF_IS_RVALUE REFERENCE_TYPEs.
+Don't add it if there is something in the chain already.  */
+  tree *p = NULL;
+  if (TREE_CODE (t) == POINTER_TYPE)
+   p = _POINTER_TO (TREE_TYPE (t));
+  else if (TREE_CODE (t) == REFERENCE_TYPE && !TYPE_REF_IS_RVALUE (t))
+   p = _REFERENCE_TO (TREE_TYPE (t));
+  if (p)
+   {
+ tree t2;
+ for (t2 = *p; t2; t2 = TYPE_NEXT_PTR_TO (t2))
+   if (TYPE_MODE (t2) == TYPE_MODE (t)
+   && TYPE_REF_CAN_ALIAS_ALL (t2) == TYPE_REF_CAN_ALIAS_ALL (t))
+ break;
+ if (t2 == NULL_TREE)
+   {
+ if (TREE_CODE (t) == POINTER_TYPE)
+   TYPE_NEXT_PTR_TO (t) = *p;
+ else
+   TYPE_NEXT_REF_TO (t) = *p;
+ *p = t;
+   }
+   }
 }
 }
 


Jakub
2023-02-28  Jakub Jelinek  

PR target/108910
* lto-common.cc (lto_fixup_prevailing_type): Don't add t to
TYPE_POINTER_TO or TYPE_REFERENCE_TO chain if it has 
TYPE_ATTRIBUTES or is TYPE_REF_IS_RVALUE.

--- gcc/lto/lto-common.cc.jj2023-01-16 11:52:16.165732856 +0100
+++ gcc/lto/lto-common.cc   2023-02-28 12:34:33.698038478 +0100
@@ -984,21 +984,25 @@ lto_fixup_prevailing_type (tree t)
   TYPE_NEXT_VARIANT (t) = TYPE_NEXT_VARIANT (mv);
   TYPE_NEXT_VARIANT (mv) = t;
 }
-
-  /* The following reconstructs the pointer chains
- of the 

RE: [PATCH 1/2]middle-end: Fix wrong overmatching of div-bitmask by using new optabs [PR108583]

2023-02-28 Thread Tamar Christina via Gcc-patches
> -Original Message-
> From: Richard Sandiford 
> Sent: Tuesday, February 28, 2023 11:09 AM
> To: Tamar Christina 
> Cc: Tamar Christina via Gcc-patches ; nd
> ; rguent...@suse.de; j...@ventanamicro.com
> Subject: Re: [PATCH 1/2]middle-end: Fix wrong overmatching of div-bitmask
> by using new optabs [PR108583]
> 
> Tamar Christina  writes:
> >> -Original Message-
> >> From: Richard Sandiford 
> >> Sent: Monday, February 27, 2023 9:33 PM
> >> To: Tamar Christina via Gcc-patches 
> >> Cc: Tamar Christina ; nd ;
> >> rguent...@suse.de; j...@ventanamicro.com
> >> Subject: Re: [PATCH 1/2]middle-end: Fix wrong overmatching of
> >> div-bitmask by using new optabs [PR108583]
> >>
> >> Tamar Christina via Gcc-patches  writes:
> >> >> -Original Message-
> >> >> From: Richard Sandiford 
> >> >> Sent: Monday, February 27, 2023 12:12 PM
> >> >> To: Tamar Christina 
> >> >> Cc: Tamar Christina via Gcc-patches ; nd
> >> >> ; rguent...@suse.de; j...@ventanamicro.com
> >> >> Subject: Re: [PATCH 1/2]middle-end: Fix wrong overmatching of
> >> >> div-bitmask by using new optabs [PR108583]
> >> >>
> >> >> Tamar Christina  writes:
> >> >> > Hi,
> >> >> >
> >> >> >> > I avoided open coding it with add and shift because it
> >> >> >> > creates a
> >> >> >> > 4 instructions (and shifts which are typically slow)
> >> >> >> > dependency chain instead of a load and multiply.  This
> >> >> >> > change, unless the target is known to optimize it further is
> >> >> >> > unlikely to be beneficial.  And by the time we get to costing
> >> >> >> > the only alternative is to undo the existing pattern and
> >> >> >> so you lose the general shift optimization.
> >> >> >> >
> >> >> >> > So it seemed unwise to open code as shifts, given the codegen
> >> >> >> > out of the vectorizer would be degenerate for most targets or
> >> >> >> > one needs the more complicated route of costing during
> >> >> >> > pattern
> >> matching already.
> >> >> >>
> >> >> >> Hmm, OK.  That seems like a cost-model thing though, rather
> >> >> >> than something that should be exposed through optabs.  And I
> >> >> >> imagine the open-coded version would still be better than
> >> >> >> nothing on targets without
> >> >> highpart multiply.
> >> >> >>
> >> >> >> So how about replacing the hook with one that simply asks
> >> >> >> whether division through highpart multiplication is preferred
> >> >> >> over the add/shift
> >> >> sequence?
> >> >> >> (Unfortunately it's not going to be possible to work that out
> >> >> >> from existing
> >> >> >> information.)
> >> >> >
> >> >> > So this doesn't work for SVE.  For SVE the multiplication
> >> >> > widening pass introduces FMAs at gimple level.  So in the cases
> >> >> > where the operation is fed from a widening multiplication we end
> >> >> > up generating
> >> FMA.
> >> >> If that was it I could have matched FMA.
> >> >> >
> >> >> > But it also pushes the multiplication in the second operand
> >> >> > because it no longer has a mul to share the results with.
> >> >> >
> >> >> > In any case, the gimple code is transformed into
> >> >> >
> >> >> > vect__3.8_122 = .MASK_LOAD (_29, 8B, loop_mask_121);
> >> >> > vect_patt_57.9_123 = (vector([8,8]) unsigned short)
> >> >> > vect__3.8_122;
> >> >> > vect_patt_64.11_127 = .FMA (vect_patt_57.9_123, vect_cst__124, {
> >> >> > 257, ... });
> >> >> > vect_patt_65.12_128 = vect_patt_64.11_127 >> 8;
> >> >> > vect_patt_66.13_129 = .FMA (vect_patt_57.9_123, vect_cst__124,
> >> >> > vect_patt_65.12_128);
> >> >> > vect_patt_62.14_130 = vect_patt_66.13_129 >> 8;
> >> >> > vect_patt_68.15_131 = (vector([8,8]) unsigned charD.21)
> >> >> > vect_patt_62.14_130;
> >> >> >
> >> >> > This transformation is much worse than the original code, it
> >> >> > extended the dependency chain with another expensive
> >> >> > instruction. I can try to correct this in RTL by matching FMA
> >> >> > and shift and splitting into MUL +
> >> >> ADDHNB and hope CSE takes care of the extra mul.
> >> >> >
> >> >> > But this seems like a hack, and it's basically undoing the
> >> >> > earlier transformation.  It seems to me that the open coding is a bad
> idea.
> >> >>
> >> >> Could you post the patch that gives this result?  I'll have a poke 
> >> >> around.
> >> >
> >> > Sure, I'll post the new series, it needs all of them.
> >>
> >> Thanks.  Which testcase did you use to get the above?
> >>
> >
> > #include 
> >
> > #define N 16
> > #define TYPE uint8_t
> >
> > void fun3(TYPE* restrict pixel, TYPE level, int n) {
> >   for (int i = 0; i < (n & -16); i+=1)
> > pixel[i] = (pixel[i] * level) / 0xff; }
> 
> Thanks.  In that testcase, isn't the FMA handling an anti-optimisation in its
> own right though?  It's duplicating a multiplication into two points on a
> dependency chain.

Most definitely, that's what I meant above. The "optimization" doesn't take into
account the effect on the rest of the chain.

> 
> E.g. for:
> 
> unsigned int
> f1 (unsigned int a, unsigned int b, unsigned int c) {

Re: [PATCH 1/2]middle-end: Fix wrong overmatching of div-bitmask by using new optabs [PR108583]

2023-02-28 Thread Richard Sandiford via Gcc-patches
Tamar Christina  writes:
>> -Original Message-
>> From: Richard Sandiford 
>> Sent: Monday, February 27, 2023 9:33 PM
>> To: Tamar Christina via Gcc-patches 
>> Cc: Tamar Christina ; nd ;
>> rguent...@suse.de; j...@ventanamicro.com
>> Subject: Re: [PATCH 1/2]middle-end: Fix wrong overmatching of div-bitmask
>> by using new optabs [PR108583]
>> 
>> Tamar Christina via Gcc-patches  writes:
>> >> -Original Message-
>> >> From: Richard Sandiford 
>> >> Sent: Monday, February 27, 2023 12:12 PM
>> >> To: Tamar Christina 
>> >> Cc: Tamar Christina via Gcc-patches ; nd
>> >> ; rguent...@suse.de; j...@ventanamicro.com
>> >> Subject: Re: [PATCH 1/2]middle-end: Fix wrong overmatching of
>> >> div-bitmask by using new optabs [PR108583]
>> >>
>> >> Tamar Christina  writes:
>> >> > Hi,
>> >> >
>> >> >> > I avoided open coding it with add and shift because it creates a
>> >> >> > 4 instructions (and shifts which are typically slow) dependency
>> >> >> > chain instead of a load and multiply.  This change, unless the
>> >> >> > target is known to optimize it further is unlikely to be
>> >> >> > beneficial.  And by the time we get to costing the only
>> >> >> > alternative is to undo the existing pattern and
>> >> >> so you lose the general shift optimization.
>> >> >> >
>> >> >> > So it seemed unwise to open code as shifts, given the codegen
>> >> >> > out of the vectorizer would be degenerate for most targets or
>> >> >> > one needs the more complicated route of costing during pattern
>> matching already.
>> >> >>
>> >> >> Hmm, OK.  That seems like a cost-model thing though, rather than
>> >> >> something that should be exposed through optabs.  And I imagine
>> >> >> the open-coded version would still be better than nothing on
>> >> >> targets without
>> >> highpart multiply.
>> >> >>
>> >> >> So how about replacing the hook with one that simply asks whether
>> >> >> division through highpart multiplication is preferred over the
>> >> >> add/shift
>> >> sequence?
>> >> >> (Unfortunately it's not going to be possible to work that out from
>> >> >> existing
>> >> >> information.)
>> >> >
>> >> > So this doesn't work for SVE.  For SVE the multiplication widening
>> >> > pass introduces FMAs at gimple level.  So in the cases where the
>> >> > operation is fed from a widening multiplication we end up generating
>> FMA.
>> >> If that was it I could have matched FMA.
>> >> >
>> >> > But it also pushes the multiplication in the second operand because
>> >> > it no longer has a mul to share the results with.
>> >> >
>> >> > In any case, the gimple code is transformed into
>> >> >
>> >> > vect__3.8_122 = .MASK_LOAD (_29, 8B, loop_mask_121);
>> >> > vect_patt_57.9_123 = (vector([8,8]) unsigned short) vect__3.8_122;
>> >> > vect_patt_64.11_127 = .FMA (vect_patt_57.9_123, vect_cst__124, {
>> >> > 257, ... });
>> >> > vect_patt_65.12_128 = vect_patt_64.11_127 >> 8;
>> >> > vect_patt_66.13_129 = .FMA (vect_patt_57.9_123, vect_cst__124,
>> >> > vect_patt_65.12_128);
>> >> > vect_patt_62.14_130 = vect_patt_66.13_129 >> 8;
>> >> > vect_patt_68.15_131 = (vector([8,8]) unsigned charD.21)
>> >> > vect_patt_62.14_130;
>> >> >
>> >> > This transformation is much worse than the original code, it
>> >> > extended the dependency chain with another expensive instruction. I
>> >> > can try to correct this in RTL by matching FMA and shift and
>> >> > splitting into MUL +
>> >> ADDHNB and hope CSE takes care of the extra mul.
>> >> >
>> >> > But this seems like a hack, and it's basically undoing the earlier
>> >> > transformation.  It seems to me that the open coding is a bad idea.
>> >>
>> >> Could you post the patch that gives this result?  I'll have a poke around.
>> >
>> > Sure, I'll post the new series, it needs all of them.
>> 
>> Thanks.  Which testcase did you use to get the above?
>> 
>
> #include 
>
> #define N 16
> #define TYPE uint8_t
>
> void fun3(TYPE* restrict pixel, TYPE level, int n)
> {
>   for (int i = 0; i < (n & -16); i+=1)
> pixel[i] = (pixel[i] * level) / 0xff;
> }

Thanks.  In that testcase, isn't the FMA handling an anti-optimisation
in its own right though?  It's duplicating a multiplication into two
points on a dependency chain.

E.g. for:

unsigned int
f1 (unsigned int a, unsigned int b, unsigned int c)
{
  unsigned int d = a * b;
  return d + ((c + d) >> 1);
}
unsigned int
g1 (unsigned int a, unsigned int b, unsigned int c)
{
  return a * b + c;
}

__Uint32x4_t
f2 (__Uint32x4_t a, __Uint32x4_t b, __Uint32x4_t c)
{
  __Uint32x4_t d = a * b;
  return d + ((c + d) >> 1);
}
__Uint32x4_t
g2 (__Uint32x4_t a, __Uint32x4_t b, __Uint32x4_t c)
{
  return a * b + c;
}

typedef unsigned int vec __attribute__((vector_size(32)));
vec
f3 (vec a, vec b, vec c)
{
  vec d = a * b;
  return d + ((c + d) >> 1);
}
vec
g3 (vec a, vec b, vec c)
{
  return a * b + c;
}

compiled with -O2 -msve-vector-bits=256 -march=armv8.2-a+sve,
all the g functions use multiply-add (as expected), but the
f functions are:

f1:
mul

Re: [Patch] OpenMP: Handle descriptors in target's firstprivate [PR104949]

2023-02-28 Thread Thomas Schwinge
Hi!

I'm currently reviewing 'gomp_copy_host2dev', 'ephemeral' in a different
context, and a question came up here;
commit r13-706-g49d1a2f91325fa8cc011149e27e5093a988b3a49
"OpenMP: Handle descriptors in target's firstprivate [PR104949]":

On 2022-05-11T19:33:00+0200, Tobias Burnus  wrote:
> this patch handles (for target regions)
>firstprivate(array_descriptor)
> by not only firstprivatizing the descriptor but also the data
> it points to. This is done by turning it in omp-low.cc the clause
> into
>firstprivate(descr) firstprivate(descr.data)
> and then attaching the latter to the former. That works by
> adding an 'attach' after the last firstprivate (and checking
> for it in libgomp). The attached-to device address for a
> previous (here: the first) firstprivate is obtained by returning
> the device address inside the hostaddrs[i] alias omp_arr array,
> i.e. the compiler generates:
>omp_arr.1 =   /* firstprivate */
>omp_arr.2 = descr.data;  /* firstprivate */
>omp_arr.3 = _arr.1;  /* attach; bias:  */
> and libgomp then knows that the device address is in the
> pointer.

> Note: The code is not active for OpenACC. The existing code uses, e.g.,
> 'goto oacc_firstprivate' – thus, the new code would be
> partially active. I went for making it completely inactive for OpenACC
> by adding one '!is_gimple_omp_oacc'.

ACK.

> I bet that a deep copy would be
> also useful for OpenACC, but I have neither checked what the current
> code does nor what the OpenACC spec says about this.

Instead of adding corresponding handling to the OpenACC 'firstprivate'
special code paths later on, I suggest that we first address known issues
with OpenACC 'firstprivate' -- which probably may largely be achieved by
in fact removing those 'goto oacc_firstprivate's and other special code
paths?  For example, see 
"OpenACC 'firstprivate' clause: initial value".

That means, the following code currently isn't active for OpenACC, and
given that OpenMP 'target' doesn't do asynchronous device execution
(meaning: not in the way/implementation of OpenACC 'async'), it thus
doesn't care about the 'ephemeral' argument to 'gomp_copy_host2dev', but
still, for correctness (and once that code gets used for OpenACC):

> OpenMP: Handle descriptors in target's firstprivate [PR104949]
>
> For allocatable/pointer arrays, a firstprivate to a device
> not only needs to privatize the descriptor but also the actual
> data. This is implemented as:
>   firstprivate(x) firstprivate(x.data) attach(x [bias: )
> where the address of x in device memory is saved in hostaddrs[i]
> by libgomp and the middle end actually passes hostaddrs[i]' to
> attach.

> --- a/libgomp/target.c
> +++ b/libgomp/target.c
> @@ -1350,7 +1350,24 @@ gomp_map_vars_internal (struct gomp_device_descr 
> *devicep,
>   gomp_copy_host2dev (devicep, aq,
>   (void *) (tgt->tgt_start + tgt_size),
>   (void *) hostaddrs[i], len, false, cbufp);

Here, passing 'ephemeral <- false' is correct, as 'h <- hostaddrs[i]'
points to non-ephemeral data.

> + /* Save device address in hostaddr to permit latter availablity
> +when doing a deep-firstprivate with pointer attach.  */
> + hostaddrs[i] = (void *) (tgt->tgt_start + tgt_size);

Here, we modify 'hostaddrs[i]' (itself -- *not* the data that the
original 'hostaddrs[i]' points to), so the above 'gomp_copy_host2dev'
with 'ephemeral <- false' is still correct, right?

>   tgt_size += len;
> +
> + /* If followed by GOMP_MAP_ATTACH, pointer assign this
> +firstprivate to hostaddrs[i+1], which is assumed to contain a
> +device address.  */
> + if (i + 1 < mapnum
> + && (GOMP_MAP_ATTACH
> + == (typemask & get_kind (short_mapkind, kinds, i+1
> +   {
> + uintptr_t target = (uintptr_t) hostaddrs[i];
> + void *devptr = *(void**) hostaddrs[i+1] + sizes[i+1];
> + gomp_copy_host2dev (devicep, aq, devptr, ,
> + sizeof (void *), false, cbufp);

However, 'h <- ' here points to data in the local frame
('target'), which potentially goes out of scope before an asynchronous
'gomp_copy_host2dev' has completed.  Thus, don't we have to pass here
'ephemeral <- true' instead of 'ephemeral <- false'?  Or, actually
instead of '', pass '[i]', which then again points to
non-ephemeral data?  Is the latter safe to do, or are we potentially
further down the line modifying the data that '[i]' points to?
(I got a bit lost in the use of 'hostaddrs[i]' here.)

> + ++i;
> +   }
>   continue;
> case GOMP_MAP_FIRSTPRIVATE_INT:
> case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
> @@ -2517,6 +2534,11 @@ copy_firstprivate_data (char *tgt, size_t mapnum, void 
> **hostaddrs,
>  

Re: [Patch] gcc.dg/overflow-warn-9.c: exclude from LLP64

2023-02-28 Thread Jonathan Yong via Gcc-patches

On 2/28/23 03:06, Hans-Peter Nilsson wrote:


On Mon, 27 Feb 2023, Jonathan Yong via Gcc-patches wrote:


This test is for LP64 only, exclude LLP64 too.
Patch OK?


I may be confused, but you're not making use of the "llp64"
effective target, there instead excluding/including lp64 /
ilp32 in sets that not obviously mean "exclude LLP64".

To wit, how is "! ilp32" -> "lp64" and "ilp32" -> "! lp64"
expressing "! llp64"?

brgds, H-P


Attached new version, hopefully it is clearer.
From 91102d00dc701a65dfac5820a2bc57e1e4bed5b2 Mon Sep 17 00:00:00 2001
From: Jonathan Yong <10wa...@gmail.com>
Date: Mon, 27 Feb 2023 09:49:31 +
Subject: [PATCH 5/7] gcc.dg/overflow-warn-9.c: Fix LLP64

gcc/testsuite/ChangeLog:

	* gcc.dg/overflow-warn-9.c: Add LLP64 case.

Signed-off-by: Jonathan Yong <10wa...@gmail.com>
---
 gcc/testsuite/gcc.dg/overflow-warn-9.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.dg/overflow-warn-9.c b/gcc/testsuite/gcc.dg/overflow-warn-9.c
index 57c0f17bc91..ae588bd8491 100644
--- a/gcc/testsuite/gcc.dg/overflow-warn-9.c
+++ b/gcc/testsuite/gcc.dg/overflow-warn-9.c
@@ -59,7 +59,8 @@ const struct Types t1 = {
   .ui = UINT_MAX + 1L,  /* { dg-warning "signed conversion from .long int. to .unsigned int. changes value from .4294967296. to .0." "lp64" { target lp64 } } */
   .ui = UINT_MAX + 1LU, /* { dg-warning "conversion from .long unsigned int. to .unsigned int. changes value from .4294967296. to .0." "lp64" { target lp64 } } */
 
-  .sl = LONG_MAX + 1LU, /* { dg-warning "signed conversion from .long unsigned int. to .long int. changes value from .9223372036854775808. to .-9223372036854775808." "not-ilp32" { target { ! ilp32 } } } */
+  .sl = LONG_MAX + 1LU, /* { dg-warning "signed conversion from .long unsigned int. to .long int. changes value from .9223372036854775808. to .-9223372036854775808." "lp64" { target lp64 } } */
   /* { dg-warning "signed conversion from .long unsigned int. to .long int. changes value from .2147483648. to .-2147483648." "ilp32" { target ilp32 } .-1 } */
+  /* { dg-warning "signed conversion from .long unsigned int. to .long int. changes value from .2147483648. to .-2147483648." "llp64" { target llp64 } .-2 } */
   .ul = ULONG_MAX + 1LU /* there should be some warning here */
 };
-- 
2.39.2



[PATCH] MIPS: Bugfix for fix Dejagnu issues with RTL checking enabled.

2023-02-28 Thread Xin Liu
From: Robert Suchanek 

gcc/ChangeLog:

   * config/mips/mips.cc (mips_set_text_contents_type): Modified parameter 
   * config/mips/mips-protos.h (mips_set_text_contents_type): Likewise

Signed-off-by: Xin Liu 

---
 gcc/config/mips/mips-protos.h | 2 +-
 gcc/config/mips/mips.c| 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/gcc/config/mips/mips-protos.h b/gcc/config/mips/mips-protos.h
index 75432677da2..fae71fe776c 100644
--- a/gcc/config/mips/mips-protos.h
+++ b/gcc/config/mips/mips-protos.h
@@ -272,7 +272,7 @@ extern void mips_declare_object (FILE *, const char *, 
const char *,
 extern void mips_declare_object_name (FILE *, const char *, tree);
 extern void mips_finish_declare_object (FILE *, tree, int, int);
 extern void mips_set_text_contents_type (FILE *, const char *,
-unsigned long, bool);
+unsigned HOST_WIDE_INT, bool);
 
 extern bool mips_small_data_pattern_p (rtx);
 extern rtx mips_rewrite_small_data (rtx);
diff --git a/gcc/config/mips/mips.cc b/gcc/config/mips/mips.cc
index fb903a2a630..2d87d4f3627 100644
--- a/gcc/config/mips/mips.cc
+++ b/gcc/config/mips/mips.cc
@@ -11090,7 +11090,7 @@ mips_finish_declare_object (FILE *stream, tree decl, 
int top_level, int at_end)
 void
 mips_set_text_contents_type (FILE *file ATTRIBUTE_UNUSED,
 const char *prefix ATTRIBUTE_UNUSED,
-unsigned long num ATTRIBUTE_UNUSED,
+unsigned HOST_WIDE_INT num ATTRIBUTE_UNUSED,
 bool function_p ATTRIBUTE_UNUSED)
 {
 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
@@ -11099,7 +11099,7 @@ mips_set_text_contents_type (FILE *file 
ATTRIBUTE_UNUSED,
   char *sname;
   rtx symbol;
 
-  sprintf (buf, "%lu", num);
+  sprintf (buf, HOST_WIDE_INT_PRINT_UNSIGNED, num);
   symbol = XEXP (DECL_RTL (current_function_decl), 0);
   fnname = targetm.strip_name_encoding (XSTR (symbol, 0));
   sname = ACONCAT ((prefix, fnname, "_", buf, NULL));
-- 
2.30.2


[PATCH] c++: Emit fundamental tinfos for all _Float*/decltype(0.0bf16) types unconditionally [PR108883]

2023-02-28 Thread Jakub Jelinek via Gcc-patches
Hi!

On Tue, Feb 28, 2023 at 12:51:06AM +0100, Jakub Jelinek via Gcc-patches wrote:
> And then there is a question whether we want to emit rtti for
> _Float{16,32,64,128}, _Float{32,64,128}x and decltype(0.0bf16) regardless
> of whether the target supports them at all or not.

If the answer to that is yes, we want them all, then the following patch
arranges that (had to force immediate emit_tinfo_decl to get rid of the
fallback_* stuff), these days it doesn't mean it will be emitted right away
anyway, just it will be registered with varpool and have initializers
created.

If the answer is yes, except _Float128x, we could in the patch just move
that _type_node to the other initializer.

If the answer is no, then I think we need a target hook which would tell us
which fundamentals should be forced this way, could have e.g. a form
of returning a vector of global tree addresses to be treated that way in
addition to the standard ones (dfloat*), or a hook that would take
a callback pointer and be called with _support_tinfo_1 and the backend
would call the callback on any types it wants to handle that way.

2023-02-28  Jakub Jelinek  

PR target/108883
* cp-tree.h (enum cp_tree_index): Remove CPTI_FALLBACK_DFLOAT*_TYPE
enumerators.
(fallback_dfloat32_type, fallback_dfloat64_type,
fallback_dfloat128_type): Remove.
* rtti.cc (emit_support_tinfo_1): If not emitted already, call
emit_tinfo_decl and remove from unemitted_tinfo_decls right away.
(emit_support_tinfos): Move *_type_node, _node
and [0-9]*_type_node from fundamentals array into new
fundamentals_with_fallback array.  Call emit_support_tinfo_1
on elements of that array too, with the difference that if
the type is NULL, use a fallback REAL_TYPE for it temporarily.
Drop the !targetm.decimal_float_supported_p () handling.
* mangle.cc (write_builtin_type): Remove references to
fallback_dfloat*_type.

--- gcc/cp/cp-tree.h.jj 2023-02-18 12:38:30.910023526 +0100
+++ gcc/cp/cp-tree.h2023-02-28 10:39:22.377379948 +0100
@@ -235,10 +235,6 @@ enum cp_tree_index
 
 CPTI_PSEUDO_CONTRACT_VIOLATION,
 
-CPTI_FALLBACK_DFLOAT32_TYPE,
-CPTI_FALLBACK_DFLOAT64_TYPE,
-CPTI_FALLBACK_DFLOAT128_TYPE,
-
 CPTI_MAX
 };
 
@@ -397,13 +393,6 @@ extern GTY(()) tree cp_global_trees[CPTI
access nodes in tree.h.  */
 
 #define access_default_nodenull_node
-
-/* Variant of dfloat{32,64,128}_type_node only used for fundamental
-   rtti purposes if DFP is disabled.  */
-#define fallback_dfloat32_type 
cp_global_trees[CPTI_FALLBACK_DFLOAT32_TYPE]
-#define fallback_dfloat64_type 
cp_global_trees[CPTI_FALLBACK_DFLOAT64_TYPE]
-#define fallback_dfloat128_type
cp_global_trees[CPTI_FALLBACK_DFLOAT128_TYPE]
-
 
 #include "name-lookup.h"
 
--- gcc/cp/rtti.cc.jj   2023-02-22 15:58:50.137998090 +0100
+++ gcc/cp/rtti.cc  2023-02-28 10:31:53.693893021 +0100
@@ -1577,6 +1577,15 @@ emit_support_tinfo_1 (tree bltn)
  gcc_assert (TREE_PUBLIC (tinfo) && !DECL_COMDAT (tinfo));
  DECL_INTERFACE_KNOWN (tinfo) = 1;
}
+
+  /* Emit it right away if not emitted already.  */
+  if (DECL_INITIAL (tinfo) == NULL_TREE)
+   {
+ gcc_assert (unemitted_tinfo_decls->last () == tinfo);
+ bool ok = emit_tinfo_decl (tinfo);
+ gcc_assert (ok);
+ unemitted_tinfo_decls->pop ();
+   }
 }
 }
 
@@ -1602,10 +1611,17 @@ emit_support_tinfos (void)
 _integer_type_node, _unsigned_type_node,
 _long_integer_type_node, _long_unsigned_type_node,
 _type_node, _type_node, _double_type_node,
+_type_node,
+0
+  };
+  /* Similar, but for floating point types only which should get type info
+ regardless whether they are non-NULL or NULL.  */
+  static tree *const fundamentals_with_fallback[] =
+  {
 _type_node, _type_node, _type_node,
 _type_node, _type_node, _type_node,
 _type_node, _type_node, _type_node,
-_type_node, _type_node, _type_node,
+_type_node, _type_node,
 0
   };
   int ix;
@@ -1627,8 +1643,20 @@ emit_support_tinfos (void)
   location_t saved_loc = input_location;
   input_location = BUILTINS_LOCATION;
   doing_runtime = 1;
+  tree fallback = NULL_TREE;
   for (ix = 0; fundamentals[ix]; ix++)
 emit_support_tinfo_1 (*fundamentals[ix]);
+  for (ix = 0; fundamentals_with_fallback[ix]; ix++)
+if (*fundamentals_with_fallback[ix])
+  emit_support_tinfo_1 (*fundamentals_with_fallback[ix]);
+else
+  {
+   if (fallback == NULL_TREE)
+ fallback = make_node (REAL_TYPE);
+   *fundamentals_with_fallback[ix] = fallback;
+   emit_support_tinfo_1 (fallback);
+   *fundamentals_with_fallback[ix] = NULL_TREE;
+  }
   for (ix = 0; ix < NUM_INT_N_ENTS; ix ++)
 if (int_n_enabled_p[ix])
   {
@@ -1637,20 +1665,6 @@ emit_support_tinfos (void)
   }
   for (tree t = registered_builtin_types; 

Re: Fwd: [V2][PATCH] Fixing PR107411

2023-02-28 Thread Richard Biener via Gcc-patches
On Mon, 27 Feb 2023, Qing Zhao wrote:

> Ping.

OK.

Thanks,
Richard.

> Qing
> 
> Begin forwarded message:
> 
> From: Qing Zhao mailto:qing.z...@oracle.com>>
> Subject: [V2][PATCH] Fixing PR107411
> Date: February 21, 2023 at 9:46:04 AM EST
> To: ja...@redhat.com, 
> rguent...@suse.de
> Cc: gcc-patches@gcc.gnu.org, Qing Zhao 
> mailto:qing.z...@oracle.com>>
> 
> This is the 2nd version of the patch.
> compared to the first version, the major change is:
> 
> use sprintf to replace xasprintf per Jacub's suggestion.
> 
> bootstrapped and regression tested on both x86 and aarch64.
> 
> Okay for committing?
> 
> thanks.
> 
> Qing
> 
> ===
> 
> 
> This is a bug in tree-ssa-uninit.cc.
> When doing the following:
> 
>  /* Ignore the call to .DEFERRED_INIT that define the original
> var itself as the following case:
>   temp = .DEFERRED_INIT (4, 2, “alt_reloc");
>   alt_reloc = temp;
> In order to avoid generating warning for the fake usage
> at alt_reloc = temp.
>  */
> 
> We need to compare the var name inside the .DEFERRED_INIT call
> (the 3rd argument) and the name for the LHS variable. if they are the same,
> we will NOT report the warning.
> 
> There is one issue when we get the name for the LHS variable. when the
> variable doesn't have a DECL_NAME (it's not a user declared variable,
> which is the case for this bug):
> 
>  _1 = .DEFERRED_INIT (4, 2, &"D.2389"[0]);
>  D.2389 = _1;
> 
> The current checking just ignores this case, and still report the warning.
> 
> The fix is very simple, when getting the name for the LHS variable, we should
> consider this case and come up with the name the same way as we construct the
> 3rd argument for the call to .DEFERRED_INIT (please refer to the routine
> "gimple_add_init_for_auto_var")
> 
> PR middle-end/107411
> 
> gcc/ChangeLog:
> 
> PR middle-end/107411
> * gimplify.cc (gimple_add_init_for_auto_var): Use sprintf 
> to replace
> xasprintf.
> * tree-ssa-uninit.cc (warn_uninit): Handle the 
> case when the
> LHS varaible of a .DEFERRED_INIT call doesn't have a DECL_NAME.
> 
> gcc/testsuite/ChangeLog:
> 
> PR middle-end/107411
> * g++.dg/pr107411.C: New test.
> ---
> gcc/gimplify.cc |  4 ++--
> gcc/testsuite/g++.dg/pr107411.C | 10 ++
> gcc/tree-ssa-uninit.cc  | 23 
> ---
> 3 files changed, 28 insertions(+), 9 deletions(-)
> create mode 100644 gcc/testsuite/g++.dg/pr107411.C
> 
> diff --git a/gcc/gimplify.cc 
> b/gcc/gimplify.cc
> index 96845154a92..35d1ea22623 100644
> --- a/gcc/gimplify.cc
> +++ b/gcc/gimplify.cc
> @@ -1775,9 +1775,9 @@ gimple_add_init_for_auto_var (tree decl,
> 
>   else
> {
> -  char *decl_name_anonymous = xasprintf ("D.%u", DECL_UID (decl));
> +  char decl_name_anonymous[3 + (HOST_BITS_PER_INT + 2) / 3];
> +  sprintf (decl_name_anonymous, "D.%u", DECL_UID (decl));
>   decl_name = build_string_literal (decl_name_anonymous);
> -  free (decl_name_anonymous);
> }
> 
>   tree call = build_call_expr_internal_loc (loc, IFN_DEFERRED_INIT,
> diff --git a/gcc/testsuite/g++.dg/pr107411.C b/gcc/testsuite/g++.dg/pr107411.C
> new file mode 100644
> index 000..7eefecae4f3
> --- /dev/null
> +++ b/gcc/testsuite/g++.dg/pr107411.C
> @@ -0,0 +1,10 @@
> +/* { dg-do compile } */
> +/* { dg-options "-Werror=uninitialized -ftrivial-auto-var-init=zero"  } */
> +int t();
> +void f(int);
> +
> +void j()
> +{
> +  const int& e = t();
> +  f(e);
> +}
> diff --git a/gcc/tree-ssa-uninit.cc 
> b/gcc/tree-ssa-uninit.cc
> index c555cf5cd50..9f720ae1f4f 100644
> --- a/gcc/tree-ssa-uninit.cc
> +++ b/gcc/tree-ssa-uninit.cc
> @@ -224,8 +224,6 @@ warn_uninit (opt_code opt, tree t, tree var, gimple 
> *context,
> at alt_reloc = temp.
>  */
>  tree lhs_var = NULL_TREE;
> -  tree lhs_var_name = NULL_TREE;
> -  const char *lhs_var_name_str = NULL;
> 
>  /* Get the variable name from the 3rd argument of call.  */
>  tree var_name = gimple_call_arg (var_def_stmt, 2);
> @@ -239,11 +237,22 @@ warn_uninit (opt_code opt, tree t, tree var, gimple 
> *context,
>  else if (TREE_CODE (gimple_assign_lhs (context)) == SSA_NAME)
> lhs_var = SSA_NAME_VAR (gimple_assign_lhs (context));
>}
> -  if (lhs_var
> -  && (lhs_var_name = DECL_NAME (lhs_var))
> -  && (lhs_var_name_str = IDENTIFIER_POINTER (lhs_var_name))
> -  && (strcmp (lhs_var_name_str, var_name_str) == 0))
> -return;
> +  if (lhs_var)
> +{
> +  /* Get the name string for the LHS_VAR.
> + Refer to routine gimple_add_init_for_auto_var.  */
> +  if (DECL_NAME (lhs_var)
> +  && (strcmp 

Re: [PATCH] RISC-V: Bugfix for rvv bool mode precision adjustment

2023-02-28 Thread 盼 李 via Gcc-patches
Understood, thanks for the explanations and suggestions. Let me have a try and 
keep you posted.

Pan

From: Richard Sandiford 
Sent: Tuesday, February 28, 2023 17:50
To: Li, Pan2 
Cc: 盼 李 ; incarnation.p.lee--- via Gcc-patches 
; juzhe.zh...@rivai.ai ; 
kito.ch...@sifive.com ; rguent...@suse.de 

Subject: Re: [PATCH] RISC-V: Bugfix for rvv bool mode precision adjustment

"Li, Pan2"  writes:
> Hi Richard Sandiford,
>
> After some investigation, I am not sure if it is possible to make it general 
> without any changes to exact_div. We can add one method like below to get the 
> unit poly for all possible N.
>
> template
> inline POLY_CONST_RESULT (N, Ca, Ca)
> normalize_to_unit (const poly_int_pod )
> {
>   typedef POLY_CONST_COEFF (Ca, Ca) C;
>
>   poly_int normalized = a;
>
>   if (normalized.is_constant())
> normalized.coeffs[0] = 1;
>   else
> for (unsigned int i = 0; i < N; i++)
>   POLY_SET_COEFF (C, normalized, i, 1);
>
>   return normalized;
> }
>
> And then adjust the genmodes like below to consume the unit poly.
>
>   printf ("poly_uint16 unit_poly = "
>  "normalize_to_unit (mode_precision[E_%smode]);\n", m->name);
>   printf ("if (known_lt (mode_precision[E_%smode], "
>  "unit_poly * BITS_PER_UNIT))\n", m->name);
>   printf ("  mode_size[E_%smode] = unit_poly;\n", m->name);
>
> I am not sure if it is a good idea to introduce above normalize code into 
> exact_div. Given the comment of the exact_div indicates that “/* Return A / 
> B, given that A is known to be a multiple of B. */”.

My point was that we have multiple ways of dividing poly_ints:

- exact_div, for when the caller knows that the result is always exact
- can_div_trunc_p, for truncating division (round towards 0)
- can_div_away_from_zero_p, for rounding away from 0
- ...

This is like how we have multiple division *_EXPRs on trees.

Until now, exact_div was the correct choice for modes because vector
modes didn't have padding.  We're now changing that, so my suggestion
in the review was to change the division operation that we use.
Rather than use exact_div, we should now use can_div_away_from_zero_p,
which would have the effect of rounding the quotient up.

Something like:

  if (!can_div_away_from_zero_p (mode_precision[E_%smode], BITS_PER_UNIT,
 _size[E_%smode]))
gcc_unreachable ();

But this will require a new overload of can_div_away_from_zero_p, since
the existing one is for constant quotients rather than constant divisors.

Thanks,
Richard

>
> Could you please help to share your opinion about this from the expert’s 
> perspective ? Thank you!
>
> Pan
>
> From: 盼 李 
> Sent: Monday, February 27, 2023 11:13 PM
> To: Richard Sandiford ; incarnation.p.lee--- via 
> Gcc-patches 
> Cc: juzhe.zh...@rivai.ai; kito.ch...@sifive.com; rguent...@suse.de; Li, Pan2 
> 
> Subject: Re: [PATCH] RISC-V: Bugfix for rvv bool mode precision adjustment
>
> Never mind, wish you have a good holiday.
>
> Thanks for pointing this out, the if part cannot take care of poly_int with N 
> > 2. As I understand, we need to make it general for all the N of poly_int.
>
> Thus I would like to double confirm with you about how to make it general. I 
> suppose there will be a new function can_div_away_from_zero_p to replace the 
> if (known_lt(,)) part in genmodes.cc, and leave exact_div unchanged(consider 
> the word exact, I suppose we should not touch here), right? Then we still 
> need one poly_int with all 1 for N as the return if can_div_away_from_zero_p 
> is true.
>
> Thanks again for your professional suggestion, have a nice day, !
>
> Pan
> 
> From: Richard Sandiford 
> mailto:richard.sandif...@arm.com>>
> Sent: Monday, February 27, 2023 22:24
> To: incarnation.p.lee--- via Gcc-patches 
> mailto:gcc-patches@gcc.gnu.org>>
> Cc: incarnation.p@outlook.com 
> mailto:incarnation.p@outlook.com>>; 
> juzhe.zh...@rivai.ai 
> mailto:juzhe.zh...@rivai.ai>>; 
> kito.ch...@sifive.com 
> mailto:kito.ch...@sifive.com>>; 
> rguent...@suse.de 
> mailto:rguent...@suse.de>>; 
> pan2...@intel.com 
> mailto:pan2...@intel.com>>
> Subject: Re: [PATCH] RISC-V: Bugfix for rvv bool mode precision adjustment
>
> Sorry for the slow reply, been away for a couple of weeks.
>
> "incarnation.p.lee--- via Gcc-patches" 
> mailto:gcc-patches@gcc.gnu.org>> writes:
>> From: Pan Li mailto:pan2...@intel.com>>
>>
>>Fix the bug of the rvv bool mode precision with the adjustment.
>>The bits size of vbool*_t will be adjusted to
>>[1, 2, 4, 8, 16, 32, 64] according to the rvv spec 1.0 isa. The
>>adjusted mode precison of vbool*_t will help underlying pass to
>>make the right decision for both the correctness and optimization.
>>
>> 

[committed] libstdc++: Do not use memmove for 1-element ranges [PR108846]

2023-02-28 Thread Jonathan Wakely via Gcc-patches
Tested powerpc64le-linux. Pushed to trunk.

-- >8 --

This avoids overwriting tail padding when algorithms like std::copy are
used to write a single value through a pointer to a base subobject.

The pointer arithmetic on a Base* is valid for N==1, but the copy/move
operation needs to be done using assignment, not a memmove or memcpy of
sizeof(Base) bytes.

Instead of putting a check for N==1 in all of copy, copy_n, move etc.
this adds it to the __copy_move and __copy_move_backward partial
specializations used for trivially copyable types. When N==1 those
partial specializations dispatch to new static member functions of the
partial specializations for non-trivial types, so that a copy/move
assignment is done appropriately for the _IsMove constant.

libstdc++-v3/ChangeLog:

PR libstdc++/108846
* include/bits/stl_algobase.h (__copy_move)
Add __assign_one static member function.
(__copy_move): Likewise.
(__copy_move): Do not use memmove for a single
value.
(__copy_move_backward): Likewise.
* testsuite/25_algorithms/copy/108846.cc: New test.
* testsuite/25_algorithms/copy_backward/108846.cc: New test.
* testsuite/25_algorithms/copy_n/108846.cc: New test.
* testsuite/25_algorithms/move/108846.cc: New test.
* testsuite/25_algorithms/move_backward/108846.cc: New test.
---
 libstdc++-v3/include/bits/stl_algobase.h  | 46 ---
 .../testsuite/25_algorithms/copy/108846.cc| 58 +++
 .../25_algorithms/copy_backward/108846.cc | 58 +++
 .../testsuite/25_algorithms/copy_n/108846.cc  | 58 +++
 .../testsuite/25_algorithms/move/108846.cc| 58 +++
 .../25_algorithms/move_backward/108846.cc | 58 +++
 6 files changed, 314 insertions(+), 22 deletions(-)
 create mode 100644 libstdc++-v3/testsuite/25_algorithms/copy/108846.cc
 create mode 100644 libstdc++-v3/testsuite/25_algorithms/copy_backward/108846.cc
 create mode 100644 libstdc++-v3/testsuite/25_algorithms/copy_n/108846.cc
 create mode 100644 libstdc++-v3/testsuite/25_algorithms/move/108846.cc
 create mode 100644 libstdc++-v3/testsuite/25_algorithms/move_backward/108846.cc

diff --git a/libstdc++-v3/include/bits/stl_algobase.h 
b/libstdc++-v3/include/bits/stl_algobase.h
index a30fc299d26..4a6f8195d98 100644
--- a/libstdc++-v3/include/bits/stl_algobase.h
+++ b/libstdc++-v3/include/bits/stl_algobase.h
@@ -391,6 +391,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
}
  return __result;
}
+
+  template
+   static void
+   __assign_one(_Tp* __to, _Up* __from)
+   { *__to = *__from; }
 };
 
 #if __cplusplus >= 201103L
@@ -411,27 +416,28 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
}
  return __result;
}
+
+  template
+   static void
+   __assign_one(_Tp* __to, _Up* __from)
+   { *__to = std::move(*__from); }
 };
 #endif
 
   template
 struct __copy_move<_IsMove, true, random_access_iterator_tag>
 {
-  template
+  template
_GLIBCXX20_CONSTEXPR
-   static _Tp*
-   __copy_m(const _Tp* __first, const _Tp* __last, _Tp* __result)
+   static _Up*
+   __copy_m(_Tp* __first, _Tp* __last, _Up* __result)
{
-#if __cplusplus >= 201103L
- using __assignable = __conditional_t<_IsMove,
-  is_move_assignable<_Tp>,
-  is_copy_assignable<_Tp>>;
- // trivial types can have deleted assignment
- static_assert( __assignable::value, "type must be assignable" );
-#endif
  const ptrdiff_t _Num = __last - __first;
- if (_Num)
+ if (__builtin_expect(_Num > 1, true))
__builtin_memmove(__result, __first, sizeof(_Tp) * _Num);
+ else if (_Num == 1)
+   std::__copy_move<_IsMove, false, random_access_iterator_tag>::
+ __assign_one(__result, __first);
  return __result + _Num;
}
 };
@@ -732,21 +738,17 @@ _GLIBCXX_END_NAMESPACE_CONTAINER
   template
 struct __copy_move_backward<_IsMove, true, random_access_iterator_tag>
 {
-  template
+  template
_GLIBCXX20_CONSTEXPR
-   static _Tp*
-   __copy_move_b(const _Tp* __first, const _Tp* __last, _Tp* __result)
+   static _Up*
+   __copy_move_b(_Tp* __first, _Tp* __last, _Up* __result)
{
-#if __cplusplus >= 201103L
- using __assignable = __conditional_t<_IsMove,
-  is_move_assignable<_Tp>,
-  is_copy_assignable<_Tp>>;
- // trivial types can have deleted assignment
- static_assert( __assignable::value, "type must be assignable" );
-#endif
  const ptrdiff_t _Num = __last - __first;
- if (_Num)
+ if (__builtin_expect(_Num > 1, true))
__builtin_memmove(__result - _Num, 

[committed] libstdc++: Add likely/unlikely attributes to implementation

2023-02-28 Thread Jonathan Wakely via Gcc-patches
Tested powerpc64le-linux. Pushed to trunk.

-- >8 --

For the common case of converting valid text this improves performance
significantly.

libstdc++-v3/ChangeLog:

* src/c++11/codecvt.cc: Add [[likely]] and [[unlikely]]
attributes.
---
 libstdc++-v3/src/c++11/codecvt.cc | 92 +++
 1 file changed, 46 insertions(+), 46 deletions(-)

diff --git a/libstdc++-v3/src/c++11/codecvt.cc 
b/libstdc++-v3/src/c++11/codecvt.cc
index e333e795f48..02f05752de8 100644
--- a/libstdc++-v3/src/c++11/codecvt.cc
+++ b/libstdc++-v3/src/c++11/codecvt.cc
@@ -256,19 +256,19 @@ namespace
   return incomplete_mb_character;
 char32_t c1 = (unsigned char) from[0];
 // https://en.wikipedia.org/wiki/UTF-8#Sample_code
-if (c1 < 0x80)
+if (c1 < 0x80) [[likely]]
 {
   ++from;
   return c1;
 }
-else if (c1 < 0xC2) // continuation or overlong 2-byte sequence
+else if (c1 < 0xC2) [[unlikely]] // continuation or overlong 2-byte 
sequence
   return invalid_mb_sequence;
 else if (c1 < 0xE0) // 2-byte sequence
 {
-  if (avail < 2)
+  if (avail < 2) [[unlikely]]
return incomplete_mb_character;
   char32_t c2 = (unsigned char) from[1];
-  if ((c2 & 0xC0) != 0x80)
+  if ((c2 & 0xC0) != 0x80) [[unlikely]]
return invalid_mb_sequence;
   char32_t c = (c1 << 6) + c2 - 0x3080;
   if (c <= maxcode)
@@ -277,17 +277,17 @@ namespace
 }
 else if (c1 < 0xF0) // 3-byte sequence
 {
-  if (avail < 2)
+  if (avail < 2) [[unlikely]]
return incomplete_mb_character;
   char32_t c2 = (unsigned char) from[1];
-  if ((c2 & 0xC0) != 0x80)
+  if ((c2 & 0xC0) != 0x80) [[unlikely]]
return invalid_mb_sequence;
-  if (c1 == 0xE0 && c2 < 0xA0) // overlong
+  if (c1 == 0xE0 && c2 < 0xA0) [[unlikely]] // overlong
return invalid_mb_sequence;
-  if (avail < 3)
+  if (avail < 3) [[unlikely]]
return incomplete_mb_character;
   char32_t c3 = (unsigned char) from[2];
-  if ((c3 & 0xC0) != 0x80)
+  if ((c3 & 0xC0) != 0x80) [[unlikely]]
return invalid_mb_sequence;
   char32_t c = (c1 << 12) + (c2 << 6) + c3 - 0xE2080;
   if (c <= maxcode)
@@ -296,31 +296,31 @@ namespace
 }
 else if (c1 < 0xF5 && maxcode > 0x) // 4-byte sequence
 {
-  if (avail < 2)
+  if (avail < 2) [[unlikely]]
return incomplete_mb_character;
   char32_t c2 = (unsigned char) from[1];
-  if ((c2 & 0xC0) != 0x80)
+  if ((c2 & 0xC0) != 0x80) [[unlikely]]
return invalid_mb_sequence;
-  if (c1 == 0xF0 && c2 < 0x90) // overlong
+  if (c1 == 0xF0 && c2 < 0x90) [[unlikely]] // overlong
return invalid_mb_sequence;
-  if (c1 == 0xF4 && c2 >= 0x90) // > U+10
+  if (c1 == 0xF4 && c2 >= 0x90) [[unlikely]] // > U+10
return invalid_mb_sequence;
-  if (avail < 3)
+  if (avail < 3) [[unlikely]]
return incomplete_mb_character;
   char32_t c3 = (unsigned char) from[2];
-  if ((c3 & 0xC0) != 0x80)
+  if ((c3 & 0xC0) != 0x80) [[unlikely]]
return invalid_mb_sequence;
-  if (avail < 4)
+  if (avail < 4) [[unlikely]]
return incomplete_mb_character;
   char32_t c4 = (unsigned char) from[3];
-  if ((c4 & 0xC0) != 0x80)
+  if ((c4 & 0xC0) != 0x80) [[unlikely]]
return invalid_mb_sequence;
   char32_t c = (c1 << 18) + (c2 << 12) + (c3 << 6) + c4 - 0x3C82080;
   if (c <= maxcode)
from += 4;
   return c;
 }
-else // > U+10
+else [[unlikely]] // > U+10
   return invalid_mb_sequence;
   }
 
@@ -330,20 +330,20 @@ namespace
   {
 if (code_point < 0x80)
   {
-   if (to.size() < 1)
+   if (to.size() < 1) [[unlikely]]
  return false;
to = code_point;
   }
 else if (code_point <= 0x7FF)
   {
-   if (to.size() < 2)
+   if (to.size() < 2) [[unlikely]]
  return false;
to = (code_point >> 6) + 0xC0;
to = (code_point & 0x3F) + 0x80;
   }
 else if (code_point <= 0x)
   {
-   if (to.size() < 3)
+   if (to.size() < 3) [[unlikely]]
  return false;
to = (code_point >> 12) + 0xE0;
to = ((code_point >> 6) & 0x3F) + 0x80;
@@ -351,14 +351,14 @@ namespace
   }
 else if (code_point <= 0x10)
   {
-   if (to.size() < 4)
+   if (to.size() < 4) [[unlikely]]
  return false;
to = (code_point >> 18) + 0xF0;
to = ((code_point >> 12) & 0x3F) + 0x80;
to = ((code_point >> 6) & 0x3F) + 0x80;
to = (code_point & 0x3F) + 0x80;
   }
-else
+else [[unlikely]]
   return false;
 return true;
   }
@@ -403,16 +403,16 @@ namespace
  unsigned long maxcode, codecvt_mode mode)
 {
   const size_t avail = from.size();
-  if (avail == 0)
+  if (avail == 0) [[unlikely]]
return 

[committed] libstdc++: Fix uses_allocator_construction_args for pair [PR108952]

2023-02-28 Thread Jonathan Wakely via Gcc-patches
Tested powerpc64le-linux. Pushed to trunk.

-- >8 --

This implements LWG 3527 which fixes the handling of pair in
std::uses_allocator_construction_args.

libstdc++-v3/ChangeLog:

PR libstdc++/108952
* include/bits/uses_allocator_args.h
(uses_allocator_construction_args): Implement LWG 3527.
* testsuite/20_util/pair/astuple/get-2.cc: New test.
* testsuite/20_util/scoped_allocator/108952.cc: New test.
* testsuite/20_util/uses_allocator/lwg3527.cc: New test.
---
 .../include/bits/uses_allocator_args.h| 11 +--
 .../testsuite/20_util/pair/astuple/get-2.cc   | 68 +++
 .../20_util/scoped_allocator/108952.cc| 23 +++
 .../20_util/uses_allocator/lwg3527.cc | 22 ++
 4 files changed, 120 insertions(+), 4 deletions(-)
 create mode 100644 libstdc++-v3/testsuite/20_util/pair/astuple/get-2.cc
 create mode 100644 libstdc++-v3/testsuite/20_util/scoped_allocator/108952.cc
 create mode 100644 libstdc++-v3/testsuite/20_util/uses_allocator/lwg3527.cc

diff --git a/libstdc++-v3/include/bits/uses_allocator_args.h 
b/libstdc++-v3/include/bits/uses_allocator_args.h
index e7849abfc91..bc038f03458 100644
--- a/libstdc++-v3/include/bits/uses_allocator_args.h
+++ b/libstdc++-v3/include/bits/uses_allocator_args.h
@@ -185,11 +185,14 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   using _Tp1 = typename _Tp::first_type;
   using _Tp2 = typename _Tp::second_type;
 
+  // _GLIBCXX_RESOLVE_LIB_DEFECTS
+  // 3527. uses_allocator_construction_args handles rvalue pairs
+  // of rvalue references incorrectly
   return std::make_tuple(piecewise_construct,
  std::uses_allocator_construction_args<_Tp1>(__a,
-   std::move(__pr).first),
+   std::get<0>(std::move(__pr))),
  std::uses_allocator_construction_args<_Tp2>(__a,
-   std::move(__pr).second));
+   std::get<1>(std::move(__pr;
 }
 
 #if __cplusplus > 202002L
@@ -216,9 +219,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 
   return std::make_tuple(piecewise_construct,
  std::uses_allocator_construction_args<_Tp1>(__a,
-   std::move(__pr).first),
+   std::get<0>(std::move(__pr))),
  std::uses_allocator_construction_args<_Tp2>(__a,
-   std::move(__pr).second));
+   std::get<1>(std::move(__pr;
 }
 #endif // C++23
 
diff --git a/libstdc++-v3/testsuite/20_util/pair/astuple/get-2.cc 
b/libstdc++-v3/testsuite/20_util/pair/astuple/get-2.cc
new file mode 100644
index 000..573d239effa
--- /dev/null
+++ b/libstdc++-v3/testsuite/20_util/pair/astuple/get-2.cc
@@ -0,0 +1,68 @@
+// { dg-do compile { target c++11 } }
+
+#include 
+
+template
+constexpr bool
+check()
+{
+  return std::is_same(std::declval())), T>::value;
+}
+
+void
+test_value_category()
+{
+  using P = std::pair;
+  static_assert( check<0, int&, P&>(),
+"get<0>(pair&)" );
+  static_assert( check<1, long&, P&>(),
+"get<1>(pair&)" );
+  static_assert( check<0, int&&, P&&>(),
+"get<0>(pair&&)" );
+  static_assert( check<1, long&&, P&&>(),
+"get<1>(pair&&)" );
+  static_assert( check<0, const int&, const P&>(),
+"get<0>(const pair&)" );
+  static_assert( check<1, const long&, const P&>(),
+"get<1>(const pair&)" );
+  static_assert( check<0, const int&&, const P&&>(),
+"get<0>(const pair&&)" );
+  static_assert( check<1, const long&&, const P&&>(),
+"get<1>(const pair&&)" );
+
+  using PL = std::pair;
+  static_assert( check<0, int&, PL&>(),
+"get<0>(pair&)" );
+  static_assert( check<1, long&, PL&>(),
+"get<1>(pair&)" );
+  static_assert( check<0, int&, PL&&>(),
+"get<0>(pair&&)" );
+  static_assert( check<1, long&, PL&&>(),
+"get<1>(pair&&)" );
+  static_assert( check<0, int&, const PL&>(),
+"get<0>(const pair&)" );
+  static_assert( check<1, long&, const PL&>(),
+"get<1>(const pair&)" );
+  static_assert( check<0, int&, const PL&&>(),
+"get<0>(const pair&&)" );
+  static_assert( check<1, long&, const PL&&>(),
+"get<1>(const pair&&)" );
+
+  using PR = std::pair;
+  static_assert( check<0, int&, P&>(),
+"get<0>(pair&)" );
+  static_assert( check<1, long&, P&>(),
+"get<1>(pair&)" );
+  static_assert( check<0, int&&, PR&&>(),
+"get<0>(pair&&)" );
+  static_assert( check<1, long&&, PR&&>(),
+"get<1>(pair&&)" );
+  static_assert( check<0, int&, const PR&>(),
+"get<0>(const pair&)" );
+  static_assert( check<1, long&, const PR&>(),
+"get<1>(const pair&)" );
+  static_assert( check<0, int&&, const PR&&>(),
+"get<0>(const pair&&)" );
+  static_assert( check<1, long&&, const PR&&>(),
+"get<1>(const pair&&)" );
+}
diff --git 

Re: [PATCH] RISC-V: Bugfix for rvv bool mode precision adjustment

2023-02-28 Thread Richard Sandiford via Gcc-patches
"Li, Pan2"  writes:
> Hi Richard Sandiford,
>
> After some investigation, I am not sure if it is possible to make it general 
> without any changes to exact_div. We can add one method like below to get the 
> unit poly for all possible N.
>
> template
> inline POLY_CONST_RESULT (N, Ca, Ca)
> normalize_to_unit (const poly_int_pod )
> {
>   typedef POLY_CONST_COEFF (Ca, Ca) C;
>
>   poly_int normalized = a;
>
>   if (normalized.is_constant())
> normalized.coeffs[0] = 1;
>   else
> for (unsigned int i = 0; i < N; i++)
>   POLY_SET_COEFF (C, normalized, i, 1);
>
>   return normalized;
> }
>
> And then adjust the genmodes like below to consume the unit poly.
>
>   printf ("poly_uint16 unit_poly = "
>  "normalize_to_unit (mode_precision[E_%smode]);\n", m->name);
>   printf ("if (known_lt (mode_precision[E_%smode], "
>  "unit_poly * BITS_PER_UNIT))\n", m->name);
>   printf ("  mode_size[E_%smode] = unit_poly;\n", m->name);
>
> I am not sure if it is a good idea to introduce above normalize code into 
> exact_div. Given the comment of the exact_div indicates that “/* Return A / 
> B, given that A is known to be a multiple of B. */”.

My point was that we have multiple ways of dividing poly_ints:

- exact_div, for when the caller knows that the result is always exact
- can_div_trunc_p, for truncating division (round towards 0)
- can_div_away_from_zero_p, for rounding away from 0
- ...

This is like how we have multiple division *_EXPRs on trees.

Until now, exact_div was the correct choice for modes because vector
modes didn't have padding.  We're now changing that, so my suggestion
in the review was to change the division operation that we use.
Rather than use exact_div, we should now use can_div_away_from_zero_p,
which would have the effect of rounding the quotient up.

Something like:

  if (!can_div_away_from_zero_p (mode_precision[E_%smode], BITS_PER_UNIT,
 _size[E_%smode]))
gcc_unreachable ();

But this will require a new overload of can_div_away_from_zero_p, since
the existing one is for constant quotients rather than constant divisors.

Thanks,
Richard

>
> Could you please help to share your opinion about this from the expert’s 
> perspective ? Thank you!
>
> Pan
>
> From: 盼 李 
> Sent: Monday, February 27, 2023 11:13 PM
> To: Richard Sandiford ; incarnation.p.lee--- via 
> Gcc-patches 
> Cc: juzhe.zh...@rivai.ai; kito.ch...@sifive.com; rguent...@suse.de; Li, Pan2 
> 
> Subject: Re: [PATCH] RISC-V: Bugfix for rvv bool mode precision adjustment
>
> Never mind, wish you have a good holiday.
>
> Thanks for pointing this out, the if part cannot take care of poly_int with N 
> > 2. As I understand, we need to make it general for all the N of poly_int.
>
> Thus I would like to double confirm with you about how to make it general. I 
> suppose there will be a new function can_div_away_from_zero_p to replace the 
> if (known_lt(,)) part in genmodes.cc, and leave exact_div unchanged(consider 
> the word exact, I suppose we should not touch here), right? Then we still 
> need one poly_int with all 1 for N as the return if can_div_away_from_zero_p 
> is true.
>
> Thanks again for your professional suggestion, have a nice day, !
>
> Pan
> 
> From: Richard Sandiford 
> mailto:richard.sandif...@arm.com>>
> Sent: Monday, February 27, 2023 22:24
> To: incarnation.p.lee--- via Gcc-patches 
> mailto:gcc-patches@gcc.gnu.org>>
> Cc: incarnation.p@outlook.com 
> mailto:incarnation.p@outlook.com>>; 
> juzhe.zh...@rivai.ai 
> mailto:juzhe.zh...@rivai.ai>>; 
> kito.ch...@sifive.com 
> mailto:kito.ch...@sifive.com>>; 
> rguent...@suse.de 
> mailto:rguent...@suse.de>>; 
> pan2...@intel.com 
> mailto:pan2...@intel.com>>
> Subject: Re: [PATCH] RISC-V: Bugfix for rvv bool mode precision adjustment
>
> Sorry for the slow reply, been away for a couple of weeks.
>
> "incarnation.p.lee--- via Gcc-patches" 
> mailto:gcc-patches@gcc.gnu.org>> writes:
>> From: Pan Li mailto:pan2...@intel.com>>
>>
>>Fix the bug of the rvv bool mode precision with the adjustment.
>>The bits size of vbool*_t will be adjusted to
>>[1, 2, 4, 8, 16, 32, 64] according to the rvv spec 1.0 isa. The
>>adjusted mode precison of vbool*_t will help underlying pass to
>>make the right decision for both the correctness and optimization.
>>
>>Given below sample code:
>>void test_1(int8_t * restrict in, int8_t * restrict out)
>>{
>>  vbool8_t v2 = *(vbool8_t*)in;
>>  vbool16_t v5 = *(vbool16_t*)in;
>>  *(vbool16_t*)(out + 200) = v5;
>>  *(vbool8_t*)(out + 100) = v2;
>>}
>>
>>Before the precision adjustment:
>>addia4,a1,100
>>vsetvli a5,zero,e8,m1,ta,ma

Re: [PATCH] lto: Fix up lto_fixup_prevailing_type [PR108910]

2023-02-28 Thread Richard Biener via Gcc-patches
On Tue, 28 Feb 2023, Jakub Jelinek wrote:

> On Tue, Feb 28, 2023 at 08:58:20AM +, Richard Biener wrote:
> > > Without LTO, TYPE_POINTER_TO/TYPE_REFERENCE_TO chains are only maintained
> > > inside of build_{pointer,reference}_type_for_mode and those routines
> > > ensure that the pointer/reference type added to the chain is really
> > > unqualified (including address space), without extra user alignment
> > > and has just one entry for each of the TYPE_MODE/TYPE_REF_CAN_ALIAS_ALL
> > > pair (unless something would modify the types in place, but that would
> > > be wrong).
> > 
> > Is that so?  I can't find any code verifying that (verify_type?).
> > Of course build_{pointer,reference}_type_for_mode will always build
> > unqualified pointers, but then the LTO code does
> > 
> >   /* The following reconstructs the pointer chains
> >  of the new pointed-to type if we are a main variant.  We do
> >  not stream those so they are broken before fixup.  */
> >   if (TREE_CODE (t) == POINTER_TYPE
> >   && TYPE_MAIN_VARIANT (t) == t)
> > {
> >   TYPE_NEXT_PTR_TO (t) = TYPE_POINTER_TO (TREE_TYPE (t));
> >   TYPE_POINTER_TO (TREE_TYPE (t)) = t;
> > }
> >   else if (TREE_CODE (t) == REFERENCE_TYPE
> >&& TYPE_MAIN_VARIANT (t) == t)
> > {
> >   TYPE_NEXT_REF_TO (t) = TYPE_REFERENCE_TO (TREE_TYPE (t));
> >   TYPE_REFERENCE_TO (TREE_TYPE (t)) = t;
> > }
> > 
> > which was supposed to ensure only putting unqualified pointers
> > (not pointed to types!) to the chain.  So to me the question is
> > rather why a type with TYPE_USER_ALIGN is a the main variant - that's
> > what looks wrong here?
> 
> First of all, it is unclear how the above ensures that type isn't
> added to those chains even when it already is there (or some similar type).
> Say lto1 during initialization needs build_pointer_type (float_type_node)
> and later on lto_fixup_prevailing_type is called on some float *.

We try to make sure to put all built types into the type merging
machinery, so I think it shouldn't happen - but then I cannot rule
it out.  I'm also not sure whether duplicates would really pose
a problem here (other than waste).  If we want to make sure we should
probably enhance verify_types to verify the TYPE_POINTER_TO chain ...

> I'll try to debug why those user aligned types become TYPE_MAIN_VARIANT
> though...
> 
>   Jakub
> 
> 

-- 
Richard Biener 
SUSE Software Solutions Germany GmbH, Frankenstrasse 146, 90461 Nuernberg,
Germany; GF: Ivo Totev, Andrew Myers, Andrew McDonald, Boudien Moerman;
HRB 36809 (AG Nuernberg)


Re: [PATCH] Fixup possible VR_ANTI_RANGE value_range issues

2023-02-28 Thread Richard Biener via Gcc-patches
On Mon, 27 Feb 2023, Aldy Hernandez wrote:

> 
> 
> On 2/27/23 14:58, Richard Biener wrote:
> > After fixing PR107561 the following avoids looking at VR_ANTI_RANGE
> > ranges where it doesn't seem obvious the code does the correct
> > thing here (lower_bound and upper_bound do not work as expected).
> 
> I do realize there's some confusion here, and some of it is my fault. This has
> become obvious in my upcoming work removing all of legacy.
> 
> What's going on is that ultimately min/max are broken with (non-legacy)
> iranges.  Or at the very least inconsistent between legacy and non-legacy.
> These are left over from the legacy world, and have been marked DEPRECATED for
> a few releases, but the middle end warnings continued to use them and even
> added new uses after they were obsoleted.
> 
> min/max have different meanings depending on kind(), which is also deprecated,
> btw.  They are the underlying min/max fields from the legacy implementation,
> and thus somewhat leak the implementation details. Unfortunately, they are
> being called from non-legacy code which is ignoring the kind() field.
> 
> In retrospect I should've converted everything away from min/max/kind years
> ago, or at the very least converted min/max to work with non-legacy more
> consistently.
> 
> For the record:
> 
>   enum value_range_kind kind () const;// DEPRECATED
>   tree min () const;  // DEPRECATED
>   tree max () const;  // DEPRECATED
>   bool symbolic_p () const;   // DEPRECATED
>   bool constant_p () const;   // DEPRECATED
>   void normalize_symbolics ();// DEPRECATED
>   void normalize_addresses ();// DEPRECATED
>   bool may_contain_p (tree) const;// DEPRECATED
>   bool legacy_verbose_union_ (const class irange *);  // DEPRECATED
>   bool legacy_verbose_intersect (const irange *); // DEPRECATED
> 
> In my local branch I tried converting all the middle-end legacy API uses to
> the new API, but a bunch of tests started failing, and lots more false
> positives started showing up in correct code.  I suspect that's part of the
> reason legacy continued to be used in these passes :-/.  As you point out in
> the PR, the tests seem designed to test the current (at times broken)
> implementation.
> 
> That being said, the 5 fixes in your patch are not wrong to begin with,
> because all uses guard lower_bound() and upper_bound() which work correctly.
> They return the lowest and uppermost possible bound for the range (ignoring
> the underlying implementation).  So, the lower bound of a signed non-zero is
> -INF because ~[0,0] is really [-INF,-1][1,+INF]. In the min/max legacy code,
> min() of signed non-zero (~[0,0]) is 0.  The new implementation has no concept
> of anti-ranges, and we don't leak any of that out.
> 
> Any uses of min/max without looking at kind() are definitely broken. OTOH uses
> of lower/upper_bound are fine and should work with both legacy and non-legacy.
> 
> Unrelated, but one place where I haven't been able to convince myself that the
> use is correct is bounds_of_var_in_loop:
> 
> > /* Check if init + nit * step overflows.  Though we checked
> >scev {init, step}_loop doesn't wrap, it is not enough
> >because the loop may exit immediately.  Overflow could
> >happen in the plus expression in this case.  */
> > if ((dir == EV_DIR_DECREASES
> >  && compare_values (maxvr.min (), initvr.min ()) != -1)
> > || (dir == EV_DIR_GROWS
> >  && compare_values (maxvr.max (), initvr.max ()) != 1))
> 
> Ughh...this is all slated to go away, and I have patches removing all of
> legacy and the old API.
> 
> Does this help?  Do you still think lower and upper bound are not working as
> expected?

lower_bound/upper_bound work as expected, 
tree_lower_bound/tree_upper_bound do not.  I've checked and all uses
I "fixed" use lower_bound/upper_boud.

tree_lower_bound & friends are 'protected', but in the above light the
comment

  // potential promotion to public?

looks dangerous (I was considering using them ...).

I'm dropping the patch, it's probably time to work on getting rid of
'value_range' uses (aka legacy_mode_p ()) and/or replace raw accesses
to the min/max for that mode with m_base accesses?  At least
that there's tree_{upper,lower}_bound for pair != 0 suggests this
function is used with differing semantics internally which is a
source of confusion (to me at least).  tree_{lower,upper}_bound
should be able to assert it's not a legacy mode range and
min/max should be able to assert that it is.

Btw, legacy_upper_bound looks wrong:

  if (m_kind == VR_ANTI_RANGE)
{
  tree typ = type (), t;
  if (pair == 1 || vrp_val_is_min (min ()))
t = vrp_val_max (typ);
  else
t = wide_int_to_tree (typ, wi::to_wide (min ()) - 1);

not sure what the pair == 1 case is about, but the upper bound
of an 

Re: [PATCH] RISC-V: Fix wrong partial subreg check for bsetidisi

2023-02-28 Thread Philipp Tomsich
On Tue, 28 Feb 2023 at 06:00, Lin Sinan  wrote:
>
> From: Lin Sinan 
>
> The partial subreg check should be for subreg operand(operand 1) instead of
> the immediate operand(operand 2). This change also fix pr68648.c in zbs.

Good catch.
Reviewed-by: 


[PATCH v2] MIPS: Add buildtime option to set msa default

2023-02-28 Thread Junxian Zhu
From: Junxian Zhu 

Add buildtime option to decide whether will compiler build with `-mmsa` option 
default.

gcc/ChangeLog:
* config.gcc: add -with-{no-}msa build option.
* config/mips/mips.h: Likewise.
* doc/install.texi: Likewise.

Signed-off-by: Junxian Zhu 
---
 gcc/config.gcc | 19 +--
 gcc/config/mips/mips.h |  3 ++-
 gcc/doc/install.texi   |  8 
 3 files changed, 27 insertions(+), 3 deletions(-)

diff --git a/gcc/config.gcc b/gcc/config.gcc
index c070e6ecd2e..da3a6d3ba1f 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -4709,7 +4709,7 @@ case "${target}" in
;;
 
mips*-*-*)
-   supported_defaults="abi arch arch_32 arch_64 float fpu nan 
fp_32 odd_spreg_32 tune tune_32 tune_64 divide llsc mips-plt synci lxc1-sxc1 
madd4 compact-branches"
+   supported_defaults="abi arch arch_32 arch_64 float fpu nan 
fp_32 odd_spreg_32 tune tune_32 tune_64 divide llsc mips-plt synci lxc1-sxc1 
madd4 compact-branches msa"
 
case ${with_float} in
"" | soft | hard)
@@ -4871,6 +4871,21 @@ case "${target}" in
exit 1
;;
esac
+
+   case ${with_msa} in
+   yes)
+   with_msa=msa
+   ;;
+   no)
+   with_msa=no-msa
+   ;;
+   "")
+   ;;
+   *)
+   echo "Unknown msa type used in --with-msa" 1>&2
+   exit 1
+   ;;
+   esac
;;
 
loongarch*-*-*)
@@ -5815,7 +5830,7 @@ case ${target} in
 esac
 
 t=
-all_defaults="abi cpu cpu_32 cpu_64 arch arch_32 arch_64 tune tune_32 tune_64 
schedule float mode fpu nan fp_32 odd_spreg_32 divide llsc mips-plt synci tls 
lxc1-sxc1 madd4 isa_spec compact-branches"
+all_defaults="abi cpu cpu_32 cpu_64 arch arch_32 arch_64 tune tune_32 tune_64 
schedule float mode fpu nan fp_32 odd_spreg_32 divide llsc mips-plt synci tls 
lxc1-sxc1 madd4 isa_spec compact-branches msa"
 for option in $all_defaults
 do
eval "val=\$with_"`echo $option | sed s/-/_/g`
diff --git a/gcc/config/mips/mips.h b/gcc/config/mips/mips.h
index fbb4372864f..13bc193b752 100644
--- a/gcc/config/mips/mips.h
+++ b/gcc/config/mips/mips.h
@@ -916,7 +916,8 @@ struct mips_cpu_info {
   {"synci", "%{!msynci:%{!mno-synci:-m%(VALUE)}}" },   \
   {"lxc1-sxc1", "%{!mlxc1-sxc1:%{!mno-lxc1-sxc1:-m%(VALUE)}}" }, \
   {"madd4", "%{!mmadd4:%{!mno-madd4:-m%(VALUE)}}" }, \
-  {"compact-branches", "%{!mcompact-branches=*:-mcompact-branches=%(VALUE)}" } 
\
+  {"compact-branches", "%{!mcompact-branches=*:-mcompact-branches=%(VALUE)}" 
}, \
+  {"msa", "%{!mmsa:%{!mno-msa:-m%(VALUE)}}" } \
 
 /* A spec that infers the:
-mnan=2008 setting from a -mips argument,
diff --git a/gcc/doc/install.texi b/gcc/doc/install.texi
index 8ef5c1414da..718f48fbaeb 100644
--- a/gcc/doc/install.texi
+++ b/gcc/doc/install.texi
@@ -1653,6 +1653,14 @@ unfused is normally expected).  Disabling these 
instructions is the
 only way to ensure compatible code is generated; this will incur
 a performance penalty.
 
+@item --with-msa
+On MIPS targets, make @option{-mmsa} the default when no
+@option{-mno-msa} option is passed.
+
+@item --without-msa
+On MIPS targets, make @option{-mno-msa} the default when no
+@option{-mmsa} option is passed. This is the default.
+
 @item --with-mips-plt
 On MIPS targets, make use of copy relocations and PLTs.
 These features are extensions to the traditional
-- 
2.39.2


Re: [PATCH] lto: Fix up lto_fixup_prevailing_type [PR108910]

2023-02-28 Thread Jakub Jelinek via Gcc-patches
On Tue, Feb 28, 2023 at 08:58:20AM +, Richard Biener wrote:
> > Without LTO, TYPE_POINTER_TO/TYPE_REFERENCE_TO chains are only maintained
> > inside of build_{pointer,reference}_type_for_mode and those routines
> > ensure that the pointer/reference type added to the chain is really
> > unqualified (including address space), without extra user alignment
> > and has just one entry for each of the TYPE_MODE/TYPE_REF_CAN_ALIAS_ALL
> > pair (unless something would modify the types in place, but that would
> > be wrong).
> 
> Is that so?  I can't find any code verifying that (verify_type?).
> Of course build_{pointer,reference}_type_for_mode will always build
> unqualified pointers, but then the LTO code does
> 
>   /* The following reconstructs the pointer chains
>  of the new pointed-to type if we are a main variant.  We do
>  not stream those so they are broken before fixup.  */
>   if (TREE_CODE (t) == POINTER_TYPE
>   && TYPE_MAIN_VARIANT (t) == t)
> {
>   TYPE_NEXT_PTR_TO (t) = TYPE_POINTER_TO (TREE_TYPE (t));
>   TYPE_POINTER_TO (TREE_TYPE (t)) = t;
> }
>   else if (TREE_CODE (t) == REFERENCE_TYPE
>&& TYPE_MAIN_VARIANT (t) == t)
> {
>   TYPE_NEXT_REF_TO (t) = TYPE_REFERENCE_TO (TREE_TYPE (t));
>   TYPE_REFERENCE_TO (TREE_TYPE (t)) = t;
> }
> 
> which was supposed to ensure only putting unqualified pointers
> (not pointed to types!) to the chain.  So to me the question is
> rather why a type with TYPE_USER_ALIGN is a the main variant - that's
> what looks wrong here?

First of all, it is unclear how the above ensures that type isn't
added to those chains even when it already is there (or some similar type).
Say lto1 during initialization needs build_pointer_type (float_type_node)
and later on lto_fixup_prevailing_type is called on some float *.

I'll try to debug why those user aligned types become TYPE_MAIN_VARIANT
though...

Jakub



Re: [PATCH] ubsan: Honor -fstrict-flex-arrays= in -fsanitize=bounds [PR108894]

2023-02-28 Thread Jakub Jelinek via Gcc-patches
On Tue, Feb 28, 2023 at 09:02:47AM +, Richard Biener wrote:
> > While this isn't really a regression, the -fstrict-flex-arrays*
> > option is new in GCC 13 and so I think we should make -fsanitize=bounds
> > play with it well from the beginning.
> > 
> > The current behavior is that -fsanitize=bounds considers all trailing
> > arrays as flexible member-like arrays and both -fsanitize=bounds and
> > -fsanitize=bounds-strict because of a bug don't even instrument
> > [0] arrays at all, not as trailing nor when followed by other members.
> > 
> > I think -fstrict-flex-arrays* options can be considered as language
> > mode changing options, by default flexible member-like arrays are
> > handled like flexible arrays, but that option can change the set of
> > the arrays which are treated like that.  So, -fsanitize=bounds should
> > change with that on what is considered acceptable and what isn't.
> > While -fsanitize=bounds-strict should reject them all always to
> > continue previous behavior.
> > 
> > The following patch implements that.  To support [0] array instrumentation,
> > I had to change the meaning of the bounds argument to .UBSAN_BOUNDS,
> > previously it was the TYPE_MAX_VALUE of the domain unless ignore_off_by_one
> > (used for taking address of the array element rather than accessing it;
> > in that case 1 is added to the bound argument) and the later lowered checks
> > were if (index > bound) report_failure ().
> > The problem with that is that for [0] arrays where (at least for C++)
> > the max value is all ones, for accesses that condition will be never true;
> > for addresses of elements it was working (in C++) correctly before.
> > This patch changes it to add 1 + ignore_off_by_one, so -1 becomes 0 or
> > 1 for _ref and changing the lowering to be if (index >= bound)
> > report_failure ().  Furthermore, as C represents the [0] arrays with
> > NULL TYPE_MAX_VALUE, I treated those like the C++ ones.
> 
> LGTM.  Btw, what does -fsanitize=bounds do for C++ code which lacks
> flexible arrays?  Does it treat all trailing arrays as fixed?

As -fstrict-flex-arrays* options and strict_flex_array attribute are
basically ignored right now for C++, I've kept the previous behavior
for C++ (except for fixing handling of [0] arrays), which is that it like
the rest of the compiler treats all trailing arrays as flexible member like
(ok, there is a loop looking for nested references, so
struct S { int a; struct T { int b; int c[1]; } d; int e; };
that the e member results in c not being treated flexible member-like).
If/when -fstrict-flex-arrays* support is added for C++, we'll need to
drop one of the !c_dialect_cxx () guards there even in c-ubsan.cc.

Jakub



Re: [PATCH] ubsan: Honor -fstrict-flex-arrays= in -fsanitize=bounds [PR108894]

2023-02-28 Thread Richard Biener via Gcc-patches
On Tue, 28 Feb 2023, Jakub Jelinek wrote:

> Hi!
> 
> While this isn't really a regression, the -fstrict-flex-arrays*
> option is new in GCC 13 and so I think we should make -fsanitize=bounds
> play with it well from the beginning.
> 
> The current behavior is that -fsanitize=bounds considers all trailing
> arrays as flexible member-like arrays and both -fsanitize=bounds and
> -fsanitize=bounds-strict because of a bug don't even instrument
> [0] arrays at all, not as trailing nor when followed by other members.
> 
> I think -fstrict-flex-arrays* options can be considered as language
> mode changing options, by default flexible member-like arrays are
> handled like flexible arrays, but that option can change the set of
> the arrays which are treated like that.  So, -fsanitize=bounds should
> change with that on what is considered acceptable and what isn't.
> While -fsanitize=bounds-strict should reject them all always to
> continue previous behavior.
> 
> The following patch implements that.  To support [0] array instrumentation,
> I had to change the meaning of the bounds argument to .UBSAN_BOUNDS,
> previously it was the TYPE_MAX_VALUE of the domain unless ignore_off_by_one
> (used for taking address of the array element rather than accessing it;
> in that case 1 is added to the bound argument) and the later lowered checks
> were if (index > bound) report_failure ().
> The problem with that is that for [0] arrays where (at least for C++)
> the max value is all ones, for accesses that condition will be never true;
> for addresses of elements it was working (in C++) correctly before.
> This patch changes it to add 1 + ignore_off_by_one, so -1 becomes 0 or
> 1 for _ref and changing the lowering to be if (index >= bound)
> report_failure ().  Furthermore, as C represents the [0] arrays with
> NULL TYPE_MAX_VALUE, I treated those like the C++ ones.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

LGTM.  Btw, what does -fsanitize=bounds do for C++ code which lacks
flexible arrays?  Does it treat all trailing arrays as fixed?

Thanks,
Richard.

> 2023-02-28  Jakub Jelinek  
> 
>   PR sanitizer/108894
> gcc/
>   * ubsan.cc (ubsan_expand_bounds_ifn): Emit index >= bound
>   comparison rather than index > bound.
>   * gimple-fold.cc (gimple_fold_call): Use tree_int_cst_lt
>   rather than tree_int_cst_le for IFN_UBSAN_BOUND comparison.
>   * doc/invoke.texi (-fsanitize=bounds): Document that whether
>   flexible array member-like arrays are instrumented or not depends
>   on -fstrict-flex-arrays* options of strict_flex_array attributes.
>   (-fsanitize=bounds-strict): Document that flexible array members
>   are not instrumented.
> gcc/c-family/
>   * c-common.h (c_strict_flex_array_level_of): Declare.
>   * c-common.cc (c_strict_flex_array_level_of): New function,
>   moved and renamed from c-decl.cc's strict_flex_array_level_of.
>   * c-ubsan.cc (ubsan_instrument_bounds): Fix comment typo.  For
>   C check c_strict_flex_array_level_of whether a trailing array
>   should be treated as flexible member like.  Handle C [0] arrays.
>   Add 1 + index_off_by_one rather than index_off_by_one to bounds
>   and use tree_int_cst_lt rather than tree_int_cst_le for idx vs.
>   bounds comparison.
> gcc/c/
>   * c-decl.cc (strict_flex_array_level_of): Move to c-common.cc
>   and rename to c_strict_flex_array_level_of.
>   (is_flexible_array_member_p): Adjust caller.
> gcc/testsuite/
>   * gcc.dg/ubsan/bounds-4.c: New test.
>   * gcc.dg/ubsan/bounds-4a.c: New test.
>   * gcc.dg/ubsan/bounds-4b.c: New test.
>   * gcc.dg/ubsan/bounds-4c.c: New test.
>   * gcc.dg/ubsan/bounds-4d.c: New test.
>   * g++.dg/ubsan/bounds-1.C: New test.
> 
> --- gcc/ubsan.cc.jj   2023-02-06 09:05:49.001628881 +0100
> +++ gcc/ubsan.cc  2023-02-27 16:46:13.452122423 +0100
> @@ -721,7 +721,7 @@ ubsan_expand_bounds_ifn (gimple_stmt_ite
>  
>gimple_stmt_iterator gsi_orig = *gsi;
>  
> -  /* Create condition "if (index > bound)".  */
> +  /* Create condition "if (index >= bound)".  */
>basic_block then_bb, fallthru_bb;
>gimple_stmt_iterator cond_insert_point
>  = create_cond_insert_point (gsi, false, false, true,
> @@ -730,7 +730,7 @@ ubsan_expand_bounds_ifn (gimple_stmt_ite
>index = force_gimple_operand_gsi (_insert_point, index,
>   true, NULL_TREE,
>   false, GSI_NEW_STMT);
> -  gimple *g = gimple_build_cond (GT_EXPR, index, bound, NULL_TREE, 
> NULL_TREE);
> +  gimple *g = gimple_build_cond (GE_EXPR, index, bound, NULL_TREE, 
> NULL_TREE);
>gimple_set_location (g, loc);
>gsi_insert_after (_insert_point, g, GSI_NEW_STMT);
>  
> --- gcc/gimple-fold.cc.jj 2023-02-08 11:57:29.726582589 +0100
> +++ gcc/gimple-fold.cc2023-02-27 16:45:00.895160752 +0100
> @@ -5624,7 +5624,7 @@ gimple_fold_call (gimple_stmt_iterator *
>   

Re: [PATCH] lto: Fix up lto_fixup_prevailing_type [PR108910]

2023-02-28 Thread Richard Biener via Gcc-patches
On Tue, 28 Feb 2023, Jakub Jelinek wrote:

> Hi!
> 
> Without LTO, TYPE_POINTER_TO/TYPE_REFERENCE_TO chains are only maintained
> inside of build_{pointer,reference}_type_for_mode and those routines
> ensure that the pointer/reference type added to the chain is really
> unqualified (including address space), without extra user alignment
> and has just one entry for each of the TYPE_MODE/TYPE_REF_CAN_ALIAS_ALL
> pair (unless something would modify the types in place, but that would
> be wrong).

Is that so?  I can't find any code verifying that (verify_type?).
Of course build_{pointer,reference}_type_for_mode will always build
unqualified pointers, but then the LTO code does

  /* The following reconstructs the pointer chains
 of the new pointed-to type if we are a main variant.  We do
 not stream those so they are broken before fixup.  */
  if (TREE_CODE (t) == POINTER_TYPE
  && TYPE_MAIN_VARIANT (t) == t)
{
  TYPE_NEXT_PTR_TO (t) = TYPE_POINTER_TO (TREE_TYPE (t));
  TYPE_POINTER_TO (TREE_TYPE (t)) = t;
}
  else if (TREE_CODE (t) == REFERENCE_TYPE
   && TYPE_MAIN_VARIANT (t) == t)
{
  TYPE_NEXT_REF_TO (t) = TYPE_REFERENCE_TO (TREE_TYPE (t));
  TYPE_REFERENCE_TO (TREE_TYPE (t)) = t;
}

which was supposed to ensure only putting unqualified pointers
(not pointed to types!) to the chain.  So to me the question is
rather why a type with TYPE_USER_ALIGN is a the main variant - that's
what looks wrong here?

Richard.

> Now, LTO adds stuff to these chains in lto_fixup_prevailing_type but
> doesn't guarantee that.  The testcase in the PR (which I'm not including
> for testsuite because when (I hope) the aarch64 backend bug will be fixed,
> the testcase would work either way) shows a case where user has
> TYPE_USER_ALIGN type with very high alignment, as there aren't enough
> pointers to float in the code left that one becomes the prevailing one,
> lto_fixup_prevailing_type puts it into the TYPE_POINTER_TO chain of
> float and later on during expansion of __builtin_cexpif expander
> uses build_pointer_type (float_type_node) to emit a sincosf call and
> instead of getting a normal pointer type gets this non-standard one.
> 
> The following patch fixes that by not adding into those chains
> qualified or user aligned types and by making sure that some type
> for the TYPE_MODE/TYPE_REF_CAN_ALIAS_ALL combination (e.g. from lto1
> initialization) isn't there already before adding a new one.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
> 
> 2023-02-28  Jakub Jelinek  
> 
>   PR target/108910
>   * lto-common.cc (lto_fixup_prevailing_type): Don't add t to
>   TYPE_POINTER_TO or TYPE_REFERENCE_TO chain if it has non-zero
>   TYPE_QUALS, or TYPE_USER_ALIGN or some other type with the
>   same TYPE_MODE and TYPE_REF_CAN_ALIAS_ALL flag is already
>   present.
> 
> --- gcc/lto/lto-common.cc.jj  2023-01-16 11:52:16.165732856 +0100
> +++ gcc/lto/lto-common.cc 2023-02-28 01:42:51.006764018 +0100
> @@ -984,21 +984,35 @@ lto_fixup_prevailing_type (tree t)
>TYPE_NEXT_VARIANT (t) = TYPE_NEXT_VARIANT (mv);
>TYPE_NEXT_VARIANT (mv) = t;
>  }
> -
> -  /* The following reconstructs the pointer chains
> - of the new pointed-to type if we are a main variant.  We do
> - not stream those so they are broken before fixup.  */
> -  if (TREE_CODE (t) == POINTER_TYPE
> -  && TYPE_MAIN_VARIANT (t) == t)
> -{
> -  TYPE_NEXT_PTR_TO (t) = TYPE_POINTER_TO (TREE_TYPE (t));
> -  TYPE_POINTER_TO (TREE_TYPE (t)) = t;
> -}
> -  else if (TREE_CODE (t) == REFERENCE_TYPE
> -&& TYPE_MAIN_VARIANT (t) == t)
> +  else if (!TYPE_QUALS (t) && !TYPE_USER_ALIGN (t))
>  {
> -  TYPE_NEXT_REF_TO (t) = TYPE_REFERENCE_TO (TREE_TYPE (t));
> -  TYPE_REFERENCE_TO (TREE_TYPE (t)) = t;
> +  /* The following reconstructs the pointer chains
> +  of the new pointed-to type if we are a main variant.  We do
> +  not stream those so they are broken before fixup.
> +  Don't add it if despite being main variant it is
> +  qualified or user aligned type.  Don't add it if there
> +  is something in the chain already.  */
> +  tree *p = NULL;
> +  if (TREE_CODE (t) == POINTER_TYPE)
> + p = _POINTER_TO (TREE_TYPE (t));
> +  else if (TREE_CODE (t) == REFERENCE_TYPE)
> + p = _REFERENCE_TO (TREE_TYPE (t));
> +  if (p)
> + {
> +   tree t2;
> +   for (t2 = *p; t2; t2 = TYPE_NEXT_PTR_TO (t2))
> + if (TYPE_MODE (t2) == TYPE_MODE (t)
> + && TYPE_REF_CAN_ALIAS_ALL (t2) == TYPE_REF_CAN_ALIAS_ALL (t))
> +   break;
> +   if (t2 == NULL_TREE)
> + {
> +   if (TREE_CODE (t) == POINTER_TYPE)
> + TYPE_NEXT_PTR_TO (t) = *p;
> +   else
> + TYPE_NEXT_REF_TO (t) = *p;
> +   *p = t;
> + }
> + }
>  }
>  }
>  
> 
>   Jakub
> 
> 

-- 
Richard Biener 
SUSE Software Solutions 

[PATCH] ubsan: Honor -fstrict-flex-arrays= in -fsanitize=bounds [PR108894]

2023-02-28 Thread Jakub Jelinek via Gcc-patches
Hi!

While this isn't really a regression, the -fstrict-flex-arrays*
option is new in GCC 13 and so I think we should make -fsanitize=bounds
play with it well from the beginning.

The current behavior is that -fsanitize=bounds considers all trailing
arrays as flexible member-like arrays and both -fsanitize=bounds and
-fsanitize=bounds-strict because of a bug don't even instrument
[0] arrays at all, not as trailing nor when followed by other members.

I think -fstrict-flex-arrays* options can be considered as language
mode changing options, by default flexible member-like arrays are
handled like flexible arrays, but that option can change the set of
the arrays which are treated like that.  So, -fsanitize=bounds should
change with that on what is considered acceptable and what isn't.
While -fsanitize=bounds-strict should reject them all always to
continue previous behavior.

The following patch implements that.  To support [0] array instrumentation,
I had to change the meaning of the bounds argument to .UBSAN_BOUNDS,
previously it was the TYPE_MAX_VALUE of the domain unless ignore_off_by_one
(used for taking address of the array element rather than accessing it;
in that case 1 is added to the bound argument) and the later lowered checks
were if (index > bound) report_failure ().
The problem with that is that for [0] arrays where (at least for C++)
the max value is all ones, for accesses that condition will be never true;
for addresses of elements it was working (in C++) correctly before.
This patch changes it to add 1 + ignore_off_by_one, so -1 becomes 0 or
1 for _ref and changing the lowering to be if (index >= bound)
report_failure ().  Furthermore, as C represents the [0] arrays with
NULL TYPE_MAX_VALUE, I treated those like the C++ ones.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2023-02-28  Jakub Jelinek  

PR sanitizer/108894
gcc/
* ubsan.cc (ubsan_expand_bounds_ifn): Emit index >= bound
comparison rather than index > bound.
* gimple-fold.cc (gimple_fold_call): Use tree_int_cst_lt
rather than tree_int_cst_le for IFN_UBSAN_BOUND comparison.
* doc/invoke.texi (-fsanitize=bounds): Document that whether
flexible array member-like arrays are instrumented or not depends
on -fstrict-flex-arrays* options of strict_flex_array attributes.
(-fsanitize=bounds-strict): Document that flexible array members
are not instrumented.
gcc/c-family/
* c-common.h (c_strict_flex_array_level_of): Declare.
* c-common.cc (c_strict_flex_array_level_of): New function,
moved and renamed from c-decl.cc's strict_flex_array_level_of.
* c-ubsan.cc (ubsan_instrument_bounds): Fix comment typo.  For
C check c_strict_flex_array_level_of whether a trailing array
should be treated as flexible member like.  Handle C [0] arrays.
Add 1 + index_off_by_one rather than index_off_by_one to bounds
and use tree_int_cst_lt rather than tree_int_cst_le for idx vs.
bounds comparison.
gcc/c/
* c-decl.cc (strict_flex_array_level_of): Move to c-common.cc
and rename to c_strict_flex_array_level_of.
(is_flexible_array_member_p): Adjust caller.
gcc/testsuite/
* gcc.dg/ubsan/bounds-4.c: New test.
* gcc.dg/ubsan/bounds-4a.c: New test.
* gcc.dg/ubsan/bounds-4b.c: New test.
* gcc.dg/ubsan/bounds-4c.c: New test.
* gcc.dg/ubsan/bounds-4d.c: New test.
* g++.dg/ubsan/bounds-1.C: New test.

--- gcc/ubsan.cc.jj 2023-02-06 09:05:49.001628881 +0100
+++ gcc/ubsan.cc2023-02-27 16:46:13.452122423 +0100
@@ -721,7 +721,7 @@ ubsan_expand_bounds_ifn (gimple_stmt_ite
 
   gimple_stmt_iterator gsi_orig = *gsi;
 
-  /* Create condition "if (index > bound)".  */
+  /* Create condition "if (index >= bound)".  */
   basic_block then_bb, fallthru_bb;
   gimple_stmt_iterator cond_insert_point
 = create_cond_insert_point (gsi, false, false, true,
@@ -730,7 +730,7 @@ ubsan_expand_bounds_ifn (gimple_stmt_ite
   index = force_gimple_operand_gsi (_insert_point, index,
true, NULL_TREE,
false, GSI_NEW_STMT);
-  gimple *g = gimple_build_cond (GT_EXPR, index, bound, NULL_TREE, NULL_TREE);
+  gimple *g = gimple_build_cond (GE_EXPR, index, bound, NULL_TREE, NULL_TREE);
   gimple_set_location (g, loc);
   gsi_insert_after (_insert_point, g, GSI_NEW_STMT);
 
--- gcc/gimple-fold.cc.jj   2023-02-08 11:57:29.726582589 +0100
+++ gcc/gimple-fold.cc  2023-02-27 16:45:00.895160752 +0100
@@ -5624,7 +5624,7 @@ gimple_fold_call (gimple_stmt_iterator *
  {
index = fold_convert (TREE_TYPE (bound), index);
if (TREE_CODE (index) == INTEGER_CST
-   && tree_int_cst_le (index, bound))
+   && tree_int_cst_lt (index, bound))
  {
replace_call_with_value (gsi, NULL_TREE);