[gcc r15-336] [ranger] Force buffer alignment in Value_Range [PR114912]

2024-05-08 Thread Aldy Hernandez via Gcc-cvs
https://gcc.gnu.org/g:d7ff8ae5313bea755f5960786b33a7b151e7b663

commit r15-336-gd7ff8ae5313bea755f5960786b33a7b151e7b663
Author: Aldy Hernandez 
Date:   Fri May 3 11:17:32 2024 +0200

[ranger] Force buffer alignment in Value_Range [PR114912]

gcc/ChangeLog:

PR tree-optimization/114912
* value-range.h (class Value_Range): Use a union.

Diff:
---
 gcc/value-range.h | 30 ++
 1 file changed, 18 insertions(+), 12 deletions(-)

diff --git a/gcc/value-range.h b/gcc/value-range.h
index 6e24874c0a25..44cdbd717f4c 100644
--- a/gcc/value-range.h
+++ b/gcc/value-range.h
@@ -800,10 +800,14 @@ private:
   void init (const vrange &);
 
   vrange *m_vrange;
-  // The buffer must be at least the size of the largest range.
-  static_assert (sizeof (int_range_max) > sizeof (frange), "");
-  static_assert (sizeof (int_range_max) > sizeof (prange), "");
-  char m_buffer[sizeof (int_range_max)];
+  union buffer_type {
+int_range_max ints;
+frange floats;
+unsupported_range unsupported;
+prange pointers;
+buffer_type () { }
+~buffer_type () { }
+  } m_buffer;
 };
 
 // The default constructor is uninitialized and must be initialized
@@ -811,6 +815,7 @@ private:
 
 inline
 Value_Range::Value_Range ()
+  : m_buffer ()
 {
   m_vrange = NULL;
 }
@@ -877,13 +882,13 @@ Value_Range::init (tree type)
   gcc_checking_assert (TYPE_P (type));
 
   if (irange::supports_p (type))
-m_vrange = new (&m_buffer) int_range_max ();
+m_vrange = new (&m_buffer.ints) int_range_max ();
   else if (prange::supports_p (type))
-m_vrange = new (&m_buffer) prange ();
+m_vrange = new (&m_buffer.pointers) prange ();
   else if (frange::supports_p (type))
-m_vrange = new (&m_buffer) frange ();
+m_vrange = new (&m_buffer.floats) frange ();
   else
-m_vrange = new (&m_buffer) unsupported_range ();
+m_vrange = new (&m_buffer.unsupported) unsupported_range ();
 }
 
 // Initialize object with a copy of R.
@@ -892,13 +897,14 @@ inline void
 Value_Range::init (const vrange &r)
 {
   if (is_a  (r))
-m_vrange = new (&m_buffer) int_range_max (as_a  (r));
+m_vrange = new (&m_buffer.ints) int_range_max (as_a  (r));
   else if (is_a  (r))
-m_vrange = new (&m_buffer) prange (as_a  (r));
+m_vrange = new (&m_buffer.pointers) prange (as_a  (r));
   else if (is_a  (r))
-m_vrange = new (&m_buffer) frange (as_a  (r));
+m_vrange = new (&m_buffer.floats) frange (as_a  (r));
   else
-m_vrange = new (&m_buffer) unsupported_range (as_a  
(r));
+m_vrange = new (&m_buffer.unsupported)
+  unsupported_range (as_a  (r));
 }
 
 // Assignment operator.  Copying incompatible types is allowed.  That


[gcc r15-335] [prange] Reword dispatch error message

2024-05-08 Thread Aldy Hernandez via Gcc-cvs
https://gcc.gnu.org/g:be3df704ce7de417682d57bc3e819dfcf0fdd501

commit r15-335-gbe3df704ce7de417682d57bc3e819dfcf0fdd501
Author: Aldy Hernandez 
Date:   Wed May 8 22:50:22 2024 +0200

[prange] Reword dispatch error message

After reading the ICE for the PR, it's obvious the error message is
rather cryptic.  This makes it less so.

gcc/ChangeLog:

* range-op.cc (range_op_handler::discriminator_fail): Reword error
message.

Diff:
---
 gcc/range-op.cc | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/gcc/range-op.cc b/gcc/range-op.cc
index 245385fe4876..e00136479a6d 100644
--- a/gcc/range-op.cc
+++ b/gcc/range-op.cc
@@ -197,7 +197,8 @@ range_op_handler::discriminator_fail (const vrange &r1,
   gcc_checking_assert (r1.m_discriminator < sizeof (name) - 1);
   gcc_checking_assert (r2.m_discriminator < sizeof (name) - 1);
   gcc_checking_assert (r3.m_discriminator < sizeof (name) - 1);
-  fprintf (stderr, "DISCRIMINATOR FAIL.  Dispatch > RO_%c%c%c <\n",
+  fprintf (stderr,
+  "Unsupported operand combination in dispatch: RO_%c%c%c\n",
   name[r1.m_discriminator],
   name[r2.m_discriminator],
   name[r3.m_discriminator]);


[gcc r13-8734] expansion: Use __trunchfbf2 calls rather than __extendhfbf2 [PR114907]

2024-05-08 Thread Jakub Jelinek via Gcc-cvs
https://gcc.gnu.org/g:cad27df08915ead8db3c7d512cfcc1866e7ece69

commit r13-8734-gcad27df08915ead8db3c7d512cfcc1866e7ece69
Author: Jakub Jelinek 
Date:   Tue May 7 21:30:21 2024 +0200

expansion: Use __trunchfbf2 calls rather than __extendhfbf2 [PR114907]

The HF and BF modes have the same size/precision and neither is
a subset nor superset of the other.
So, using either __extendhfbf2 or __trunchfbf2 is weird.
The expansion apparently emits __extendhfbf2, but on the libgcc side
we apparently have __trunchfbf2 implemented.

I think it is easier to switch to using what is available rather than
adding new entrypoints to libgcc, even alias, because this is backportable.

2024-05-07  Jakub Jelinek  

PR middle-end/114907
* expr.cc (convert_mode_scalar): Use trunc_optab rather than
sext_optab for HF->BF conversions.
* optabs-libfuncs.cc (gen_trunc_conv_libfunc): Likewise.

* gcc.dg/pr114907.c: New test.

(cherry picked from commit 28ee13db2e9d995bd3728c4ff3a3545e24b39cd2)

Diff:
---
 gcc/expr.cc | 12 ++--
 gcc/optabs-libfuncs.cc  |  4 +++-
 gcc/testsuite/gcc.dg/pr114907.c | 27 +++
 3 files changed, 40 insertions(+), 3 deletions(-)

diff --git a/gcc/expr.cc b/gcc/expr.cc
index 5dac06fa94b5..705d5b34eed6 100644
--- a/gcc/expr.cc
+++ b/gcc/expr.cc
@@ -351,8 +351,16 @@ convert_mode_scalar (rtx to, rtx from, int unsignedp)
  && REAL_MODE_FORMAT (from_mode) == &ieee_half_format));
 
   if (GET_MODE_PRECISION (from_mode) == GET_MODE_PRECISION (to_mode))
-   /* Conversion between decimal float and binary float, same size.  */
-   tab = DECIMAL_FLOAT_MODE_P (from_mode) ? trunc_optab : sext_optab;
+   {
+ if (REAL_MODE_FORMAT (to_mode) == &arm_bfloat_half_format
+ && REAL_MODE_FORMAT (from_mode) == &ieee_half_format)
+   /* libgcc implements just __trunchfbf2, not __extendhfbf2.  */
+   tab = trunc_optab;
+ else
+   /* Conversion between decimal float and binary float, same
+  size.  */
+   tab = DECIMAL_FLOAT_MODE_P (from_mode) ? trunc_optab : sext_optab;
+   }
   else if (GET_MODE_PRECISION (from_mode) < GET_MODE_PRECISION (to_mode))
tab = sext_optab;
   else
diff --git a/gcc/optabs-libfuncs.cc b/gcc/optabs-libfuncs.cc
index f1abe6916d34..4eb98be794b7 100644
--- a/gcc/optabs-libfuncs.cc
+++ b/gcc/optabs-libfuncs.cc
@@ -589,7 +589,9 @@ gen_trunc_conv_libfunc (convert_optab tab,
   if (GET_MODE_CLASS (float_tmode) != GET_MODE_CLASS (float_fmode))
 gen_interclass_conv_libfunc (tab, opname, float_tmode, float_fmode);
 
-  if (GET_MODE_PRECISION (float_fmode) <= GET_MODE_PRECISION (float_tmode))
+  if (GET_MODE_PRECISION (float_fmode) <= GET_MODE_PRECISION (float_tmode)
+  && (REAL_MODE_FORMAT (float_tmode) != &arm_bfloat_half_format
+ || REAL_MODE_FORMAT (float_fmode) != &ieee_half_format))
 return;
 
   if (GET_MODE_CLASS (float_tmode) == GET_MODE_CLASS (float_fmode))
diff --git a/gcc/testsuite/gcc.dg/pr114907.c b/gcc/testsuite/gcc.dg/pr114907.c
new file mode 100644
index ..628746e1f8c1
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr114907.c
@@ -0,0 +1,27 @@
+/* PR middle-end/114907 */
+/* { dg-do run } */
+/* { dg-options "" } */
+/* { dg-add-options float16 } */
+/* { dg-require-effective-target float16_runtime } */
+/* { dg-add-options bfloat16 } */
+/* { dg-require-effective-target bfloat16_runtime } */
+
+__attribute__((noipa)) _Float16
+foo (__bf16 x)
+{
+  return (_Float16) x;
+}
+
+__attribute__((noipa)) __bf16
+bar (_Float16 x)
+{
+  return (__bf16) x;
+}
+
+int
+main ()
+{
+  if (foo (11.125bf16) != 11.125f16
+  || bar (11.125f16) != 11.125bf16)
+__builtin_abort ();
+}


[gcc r13-8733] tree-inline: Remove .ASAN_MARK calls when inlining functions into no_sanitize callers [PR114956]

2024-05-08 Thread Jakub Jelinek via Gcc-cvs
https://gcc.gnu.org/g:d1ec7bc9cb0639a212422710ba647dc1aaea6eaa

commit r13-8733-gd1ec7bc9cb0639a212422710ba647dc1aaea6eaa
Author: Jakub Jelinek 
Date:   Tue May 7 21:29:14 2024 +0200

tree-inline: Remove .ASAN_MARK calls when inlining functions into 
no_sanitize callers [PR114956]

In r9-5742 we've started allowing to inline always_inline functions into
functions which have disabled e.g. address sanitization even when the
always_inline function is implicitly from command line options sanitized.

This mostly works fine because most of the asan instrumentation is done only
late after ipa, but as the following testcase the .ASAN_MARK ifn calls
gimplifier adds can result in ICEs.

Fixed by dropping those during inlining, similarly to how we drop
.TSAN_FUNC_EXIT calls.

2024-05-07  Jakub Jelinek  

PR sanitizer/114956
* tree-inline.cc: Include asan.h.
(copy_bb): Remove also .ASAN_MARK calls if id->dst_fn has 
asan/hwasan
sanitization disabled.

* gcc.dg/asan/pr114956.c: New test.

(cherry picked from commit d4e25cf4f7c1f51a8824cc62bbb85a81a41b829a)

Diff:
---
 gcc/testsuite/gcc.dg/asan/pr114956.c | 26 ++
 gcc/tree-inline.cc   | 28 +---
 2 files changed, 47 insertions(+), 7 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/asan/pr114956.c 
b/gcc/testsuite/gcc.dg/asan/pr114956.c
new file mode 100644
index ..fb87d514f255
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/asan/pr114956.c
@@ -0,0 +1,26 @@
+/* PR sanitizer/114956 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fsanitize=address,null" } */
+
+int **a;
+void qux (int *);
+
+__attribute__((always_inline)) static inline int *
+foo (void)
+{
+  int b[1];
+  qux (b);
+  return a[1];
+}
+
+__attribute__((no_sanitize_address)) void
+bar (void)
+{
+  *a = foo ();
+}
+
+void
+baz (void)
+{
+  bar ();
+}
diff --git a/gcc/tree-inline.cc b/gcc/tree-inline.cc
index 72a80c0c74da..73d5a9fadef3 100644
--- a/gcc/tree-inline.cc
+++ b/gcc/tree-inline.cc
@@ -65,6 +65,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "symbol-summary.h"
 #include "symtab-thunks.h"
 #include "symtab-clones.h"
+#include "asan.h"
 
 /* I'm not real happy about this, but we need to handle gimple and
non-gimple trees.  */
@@ -2198,13 +2199,26 @@ copy_bb (copy_body_data *id, basic_block bb,
}
  else if (call_stmt
   && id->call_stmt
-  && gimple_call_internal_p (stmt)
-  && gimple_call_internal_fn (stmt) == IFN_TSAN_FUNC_EXIT)
-   {
- /* Drop TSAN_FUNC_EXIT () internal calls during inlining.  */
- gsi_remove (©_gsi, false);
- continue;
-   }
+  && gimple_call_internal_p (stmt))
+   switch (gimple_call_internal_fn (stmt))
+ {
+ case IFN_TSAN_FUNC_EXIT:
+   /* Drop .TSAN_FUNC_EXIT () internal calls during inlining.  */
+   gsi_remove (©_gsi, false);
+   continue;
+ case IFN_ASAN_MARK:
+   /* Drop .ASAN_MARK internal calls during inlining into
+  no_sanitize functions.  */
+   if (!sanitize_flags_p (SANITIZE_ADDRESS, id->dst_fn)
+   && !sanitize_flags_p (SANITIZE_HWADDRESS, id->dst_fn))
+ {
+   gsi_remove (©_gsi, false);
+   continue;
+ }
+   break;
+ default:
+   break;
+ }
 
  /* Statements produced by inlining can be unfolded, especially
 when we constant propagated some operands.  We can't fold


[gcc r13-8735] reassoc: Fix up optimize_range_tests_to_bit_test [PR114965]

2024-05-08 Thread Jakub Jelinek via Gcc-cvs
https://gcc.gnu.org/g:44d84db11ab724c34a8b1f8c0e06da1cc78439a2

commit r13-8735-g44d84db11ab724c34a8b1f8c0e06da1cc78439a2
Author: Jakub Jelinek 
Date:   Wed May 8 10:17:32 2024 +0200

reassoc: Fix up optimize_range_tests_to_bit_test [PR114965]

The optimize_range_tests_to_bit_test optimization normally emits a range
test first:
  if (entry_test_needed)
{
  tem = build_range_check (loc, optype, unshare_expr (exp),
   false, lowi, high);
  if (tem == NULL_TREE || is_gimple_val (tem))
continue;
}
so during the bit test we already know that exp is in the [lowi, high]
range, but skips it if we have range info which tells us this isn't
necessary.
Also, normally it emits shifts by exp - lowi counter, but has an
optimization to use just exp counter if the mask isn't a more expensive
constant in that case and lowi is > 0 and high is smaller than prec.

The following testcase is miscompiled because the two abnormal cases
are triggered.  The range of exp is [43, 43][48, 48][95, 95], so we on
64-bit arch decide we don't need the entry test, because 95 - 43 < 64.
And we also decide to use just exp as counter, because the range test
tests just for exp == 43 || exp == 48, so high is smaller than 64 too.
Because 95 is in the exp range, we can't do that, we'd either need to
do a range test first, i.e.
if (exp - 43U <= 48U - 43U) if ((1UL << exp) & mask1))
or need to subtract lowi from the shift counter, i.e.
if ((1UL << (exp - 43)) & mask2)
but can't do both unless r.upper_bound () is < prec.

The following patch ensures that.

2024-05-08  Jakub Jelinek  

PR tree-optimization/114965
* tree-ssa-reassoc.cc (optimize_range_tests_to_bit_test): Don't try 
to
optimize away exp - lowi subtraction from shift count unless entry
test is emitted or unless r.upper_bound () is smaller than prec.

* gcc.c-torture/execute/pr114965.c: New test.

(cherry picked from commit 9adec2d91e62a479474ae79df5b455fd4b8463ba)

Diff:
---
 gcc/testsuite/gcc.c-torture/execute/pr114965.c | 30 ++
 gcc/tree-ssa-reassoc.cc|  3 ++-
 2 files changed, 32 insertions(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.c-torture/execute/pr114965.c 
b/gcc/testsuite/gcc.c-torture/execute/pr114965.c
new file mode 100644
index ..89d68e187015
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/execute/pr114965.c
@@ -0,0 +1,30 @@
+/* PR tree-optimization/114965 */
+
+static void
+foo (const char *x)
+{
+
+  char a = '0';
+  while (1)
+{
+  switch (*x)
+   {
+   case '_':
+   case '+':
+ a = *x;
+ x++;
+ continue;
+   default:
+ break;
+   }
+  break;
+}
+  if (a == '0' || a == '+')
+__builtin_abort ();
+}
+
+int
+main ()
+{
+  foo ("_");
+}
diff --git a/gcc/tree-ssa-reassoc.cc b/gcc/tree-ssa-reassoc.cc
index c5020465c2b3..d8f5471951af 100644
--- a/gcc/tree-ssa-reassoc.cc
+++ b/gcc/tree-ssa-reassoc.cc
@@ -3411,7 +3411,8 @@ optimize_range_tests_to_bit_test (enum tree_code opcode, 
int first, int length,
 We can avoid then subtraction of the minimum value, but the
 mask constant could be perhaps more expensive.  */
  if (compare_tree_int (lowi, 0) > 0
- && compare_tree_int (high, prec) < 0)
+ && compare_tree_int (high, prec) < 0
+ && (entry_test_needed || wi::ltu_p (r.upper_bound (), prec)))
{
  int cost_diff;
  HOST_WIDE_INT m = tree_to_uhwi (lowi);


[gcc r13-8732] gimple-ssa-sprintf: Use [0, 1] range for %lc with (wint_t) 0 argument [PR114876]

2024-05-08 Thread Jakub Jelinek via Gcc-cvs
https://gcc.gnu.org/g:e07df053031e109c50387c92d689950de1d193ab

commit r13-8732-ge07df053031e109c50387c92d689950de1d193ab
Author: Jakub Jelinek 
Date:   Tue Apr 30 11:22:32 2024 +0200

gimple-ssa-sprintf: Use [0, 1] range for %lc with (wint_t) 0 argument 
[PR114876]

Seems when Martin S. implemented this, he coded there strict reading
of the standard, which said that %lc with (wint_t) 0 argument is handled
as wchar_t[2] temp = { arg, 0 }; %ls with temp arg and so shouldn't print
any values.  But, most of the libc implementations actually handled that
case like %c with '\0' argument, adding a single NUL character, the only
known exception is musl.
Recently, C23 changed this in response to GB-141 and POSIX in
https://austingroupbugs.net/view.php?id=1647
so that it should have the same behavior as %c with '\0'.

Because there is implementation divergence, the following patch uses
a range rather than hardcoding it to all 1s (i.e. the %c behavior),
though the likely case is still 1 (forward looking plus most of
implementations).
The res.knownrange = true; assignment removed is redundant due to
the same assignment done unconditionally before the if statement,
rest is formatting fixes.

I don't think the min >= 0 && min < 128 case is right either, I'd think
it should be min >= 0 && max < 128, otherwise it is just some possible
inputs are (maybe) ASCII and there can be others, but this code is a total
mess anyway, with the min, max, likely (somewhere in [min, max]?) and then
unlikely possibly larger than max, dunno, perhaps for at least some chars
in the ASCII range the likely case could be for the ascii case; so perhaps
just the one_2_one_ascii shouldn't set max to 1 and mayfail should be true
for max >= 128.  Anyway, didn't feel I should touch that right now.

2024-04-30  Jakub Jelinek  

PR tree-optimization/114876
* gimple-ssa-sprintf.cc (format_character): For min == 0 && max == 
0,
set max, likely and unlikely members to 1 rather than 0.  Remove
useless res.knownrange = true;.  Formatting fixes.

* gcc.dg/pr114876.c: New test.
* gcc.dg/tree-ssa/builtin-sprintf-warn-1.c: Adjust expected
diagnostics.

(cherry picked from commit 6c6b70f07208ca14ba783933988c04c6fc2fff42)

Diff:
---
 gcc/gimple-ssa-sprintf.cc  | 20 +++--
 gcc/testsuite/gcc.dg/pr114876.c| 34 ++
 .../gcc.dg/tree-ssa/builtin-sprintf-warn-1.c   | 12 
 3 files changed, 51 insertions(+), 15 deletions(-)

diff --git a/gcc/gimple-ssa-sprintf.cc b/gcc/gimple-ssa-sprintf.cc
index 18975708d2c2..e02977f0ac39 100644
--- a/gcc/gimple-ssa-sprintf.cc
+++ b/gcc/gimple-ssa-sprintf.cc
@@ -2170,8 +2170,7 @@ format_character (const directive &dir, tree arg, 
pointer_query &ptr_qry)
 
   res.knownrange = true;
 
-  if (dir.specifier == 'C'
-  || dir.modifier == FMT_LEN_l)
+  if (dir.specifier == 'C' || dir.modifier == FMT_LEN_l)
 {
   /* A wide character can result in as few as zero bytes.  */
   res.range.min = 0;
@@ -2182,10 +2181,13 @@ format_character (const directive &dir, tree arg, 
pointer_query &ptr_qry)
{
  if (min == 0 && max == 0)
{
- /* The NUL wide character results in no bytes.  */
- res.range.max = 0;
- res.range.likely = 0;
- res.range.unlikely = 0;
+ /* In strict reading of older ISO C or POSIX, this required
+no characters to be emitted.  ISO C23 changes that, so
+does POSIX, to match what has been implemented in most of the
+implementations, namely emitting a single NUL character.
+Let's use 0 for minimum and 1 for all the other values.  */
+ res.range.max = 1;
+ res.range.likely = res.range.unlikely = 1;
}
  else if (min >= 0 && min < 128)
{
@@ -2193,11 +2195,12 @@ format_character (const directive &dir, tree arg, 
pointer_query &ptr_qry)
 is not a 1-to-1 mapping to the source character set or
 if the source set is not ASCII.  */
  bool one_2_one_ascii
-   = (target_to_host_charmap[0] == 1 && target_to_host ('a') == 
97);
+   = (target_to_host_charmap[0] == 1
+  && target_to_host ('a') == 97);
 
  /* A wide character in the ASCII range most likely results
 in a single byte, and only unlikely in up to MB_LEN_MAX.  */
- res.range.max = one_2_one_ascii ? 1 : target_mb_len_max ();;
+ res.range.max = one_2_one_ascii ? 1 : target_mb_len_max ();
  res.range.likely = 1;
  res.range.unlikely = target_mb_len_max ();
  res.mayfail = !one_2_one_ascii;
@@ -2228,7 +2231,6 @@ 

[gcc r13-8731] c++: Fix constexpr evaluation of parameters passed by invisible reference [PR111284]

2024-05-08 Thread Jakub Jelinek via Gcc-cvs
https://gcc.gnu.org/g:6f1b3f9c97e17aa717ae61bc70afa27adcb7ef44

commit r13-8731-g6f1b3f9c97e17aa717ae61bc70afa27adcb7ef44
Author: Jakub Jelinek 
Date:   Thu Apr 25 20:45:04 2024 +0200

c++: Fix constexpr evaluation of parameters passed by invisible reference 
[PR111284]

My r9-6136 changes to make a copy of constexpr function bodies before
genericization modifies it broke the constant evaluation of non-POD
arguments passed by value.
In the callers such arguments are passed as reference to usually a
TARGET_EXPR, but on the callee side until genericization they are just
direct uses of a PARM_DECL with some class type.
In cxx_bind_parameters_in_call I've used convert_from_reference to
pretend it is passed by value and then cxx_eval_constant_expression
is called there and evaluates that as an rvalue, followed by
adjust_temp_type if the types don't match exactly (e.g. const Foo
argument and passing to it reference to Foo TARGET_EXPR).

The reason this doesn't work is that when the TARGET_EXPR in the caller
is constant initialized, this for it is the address of the TARGET_EXPR_SLOT,
but if the code later on pretends the PARM_DECL is just initialized to the
rvalue of the constant evaluation of the TARGET_EXPR, it is as if there
is a bitwise copy of the TARGET_EXPR to the callee, so this in the callee
is then address of the PARM_DECL in the callee.

The following patch attempts to fix that by constexpr evaluation of such
arguments in the caller as an lvalue instead of rvalue, and on the callee
side when seeing such a PARM_DECL, if we want an lvalue, lookup the value
(lvalue) saved in ctx->globals (if any), and if wanting an rvalue,
recursing with vc_prvalue on the looked up value (because it is there
as an lvalue, nor rvalue).

adjust_temp_type doesn't work for lvalues of non-scalarish types, for
such types it relies on changing the type of a CONSTRUCTOR, but on the
other side we know what we pass to the argument is addressable, so
the patch on type mismatch takes address of the argument value, casts
to reference to the desired type and dereferences it.

2024-04-25  Jakub Jelinek  

PR c++/111284
* constexpr.cc (cxx_bind_parameters_in_call): For PARM_DECLs with
TREE_ADDRESSABLE types use vc_glvalue rather than vc_prvalue for
cxx_eval_constant_expression and if it doesn't have the same
type as it should, cast the reference type to reference to type
before convert_from_reference and instead of adjust_temp_type
take address of the arg, cast to reference to type and then
convert_from_reference.
(cxx_eval_constant_expression) : For lval case
on parameters with TREE_ADDRESSABLE types lookup result in
ctx->globals if possible.  Otherwise if lookup in ctx->globals
was successful for parameter with TREE_ADDRESSABLE type,
recurse with vc_prvalue on the returned value.

* g++.dg/cpp1z/constexpr-111284.C: New test.

(cherry picked from commit f541757ba4632e204169dd08a5f10c782199af42)

Diff:
---
 gcc/cp/constexpr.cc   | 44 +--
 gcc/testsuite/g++.dg/cpp1z/constexpr-111284.C | 19 
 2 files changed, 53 insertions(+), 10 deletions(-)

diff --git a/gcc/cp/constexpr.cc b/gcc/cp/constexpr.cc
index 216b98122007..acb5496085bb 100644
--- a/gcc/cp/constexpr.cc
+++ b/gcc/cp/constexpr.cc
@@ -1800,13 +1800,18 @@ cxx_bind_parameters_in_call (const constexpr_ctx *ctx, 
tree t, tree fun,
  x = build_address (x);
}
   if (TREE_ADDRESSABLE (type))
-   /* Undo convert_for_arg_passing work here.  */
-   x = convert_from_reference (x);
-  /* Normally we would strip a TARGET_EXPR in an initialization context
-such as this, but here we do the elision differently: we keep the
-TARGET_EXPR, and use its CONSTRUCTOR as the value of the parm.  */
-  arg = cxx_eval_constant_expression (ctx, x, vc_prvalue,
- non_constant_p, overflow_p);
+   {
+ /* Undo convert_for_arg_passing work here.  */
+ x = convert_from_reference (x);
+ arg = cxx_eval_constant_expression (ctx, x, vc_glvalue,
+ non_constant_p, overflow_p);
+   }
+  else
+   /* Normally we would strip a TARGET_EXPR in an initialization context
+  such as this, but here we do the elision differently: we keep the
+  TARGET_EXPR, and use its CONSTRUCTOR as the value of the parm.  */
+   arg = cxx_eval_constant_expression (ctx, x, vc_prvalue,
+   non_constant_p, overflow_p);
   /* Don't VERIFY_CONSTANT here.  */
   if (*non_constant_p && ctx->quiet)
break;
@@ -1818,7 +1823,16 @@ cxx_

[gcc r13-8730] openmp: Copy DECL_LANG_SPECIFIC and DECL_LANG_FLAG_? to tree-nested decl copy [PR114825]

2024-05-08 Thread Jakub Jelinek via Gcc-cvs
https://gcc.gnu.org/g:6d30cfc3fc88976151d0d10e73e10111ccb71ee0

commit r13-8730-g6d30cfc3fc88976151d0d10e73e10111ccb71ee0
Author: Jakub Jelinek 
Date:   Thu Apr 25 20:09:35 2024 +0200

openmp: Copy DECL_LANG_SPECIFIC and DECL_LANG_FLAG_? to tree-nested decl 
copy [PR114825]

tree-nested.cc creates in 2 spots artificial VAR_DECLs, one of them is used
both for debug info and OpenMP/OpenACC lowering purposes, the other solely 
for
OpenMP/OpenACC lowering purposes.
When the decls are used in OpenMP/OpenACC lowering, the OMP langhooks 
(mostly
Fortran, C just a little and C++ doesn't have nested functions) then inspect
the flags on the vars and based on that decide how to lower the 
corresponding
clauses.

Unfortunately we weren't copying DECL_LANG_SPECIFIC and DECL_LANG_FLAG_?, so
the langhooks made decisions on the default flags on those instead.
As the original decl isn't necessarily a VAR_DECL, could be e.g. PARM_DECL,
using copy_node wouldn't work properly, so this patch just copies those
flags in addition to other flags it was copying already.  And I've removed
code duplication by introducing a helper function which does copying common
to both uses.

2024-04-25  Jakub Jelinek  

PR fortran/114825
* tree-nested.cc (get_debug_decl): New function.
(get_nonlocal_debug_decl): Use it.
(get_local_debug_decl): Likewise.

* gfortran.dg/gomp/pr114825.f90: New test.

(cherry picked from commit 14d48516e588ad2b35e2007b3970bdcb1b3f145c)

Diff:
---
 gcc/testsuite/gfortran.dg/gomp/pr114825.f90 | 16 
 gcc/tree-nested.cc  | 61 -
 2 files changed, 49 insertions(+), 28 deletions(-)

diff --git a/gcc/testsuite/gfortran.dg/gomp/pr114825.f90 
b/gcc/testsuite/gfortran.dg/gomp/pr114825.f90
new file mode 100644
index ..b635476af61e
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/gomp/pr114825.f90
@@ -0,0 +1,16 @@
+! PR fortran/114825
+
+subroutine pr114825(b)
+  type t
+real, allocatable :: m(:)
+  end type t
+  type(t), allocatable, target :: b(:)
+  type(t), pointer :: d
+  !$omp parallel private(d)
+  d => b(1)
+  !$omp end parallel
+contains
+  subroutine sub
+d => b(1)
+  end subroutine sub
+end subroutine pr114825
diff --git a/gcc/tree-nested.cc b/gcc/tree-nested.cc
index 1418e1f7f562..0f44b3dc735e 100644
--- a/gcc/tree-nested.cc
+++ b/gcc/tree-nested.cc
@@ -1039,6 +1039,37 @@ get_frame_field (struct nesting_info *info, tree 
target_context,
 
 static void note_nonlocal_vla_type (struct nesting_info *info, tree type);
 
+/* Helper for get_nonlocal_debug_decl and get_local_debug_decl.  */
+
+static tree
+get_debug_decl (tree decl)
+{
+  tree new_decl
+= build_decl (DECL_SOURCE_LOCATION (decl),
+ VAR_DECL, DECL_NAME (decl), TREE_TYPE (decl));
+  DECL_ARTIFICIAL (new_decl) = DECL_ARTIFICIAL (decl);
+  DECL_IGNORED_P (new_decl) = DECL_IGNORED_P (decl);
+  TREE_THIS_VOLATILE (new_decl) = TREE_THIS_VOLATILE (decl);
+  TREE_SIDE_EFFECTS (new_decl) = TREE_SIDE_EFFECTS (decl);
+  TREE_READONLY (new_decl) = TREE_READONLY (decl);
+  TREE_ADDRESSABLE (new_decl) = TREE_ADDRESSABLE (decl);
+  DECL_SEEN_IN_BIND_EXPR_P (new_decl) = 1;
+  if ((TREE_CODE (decl) == PARM_DECL
+   || TREE_CODE (decl) == RESULT_DECL
+   || VAR_P (decl))
+  && DECL_BY_REFERENCE (decl))
+DECL_BY_REFERENCE (new_decl) = 1;
+  /* Copy DECL_LANG_SPECIFIC and DECL_LANG_FLAG_* for OpenMP langhook
+ purposes.  */
+  DECL_LANG_SPECIFIC (new_decl) = DECL_LANG_SPECIFIC (decl);
+#define COPY_DLF(n) DECL_LANG_FLAG_##n (new_decl) = DECL_LANG_FLAG_##n (decl)
+  COPY_DLF (0); COPY_DLF (1); COPY_DLF (2); COPY_DLF (3);
+  COPY_DLF (4); COPY_DLF (5); COPY_DLF (6); COPY_DLF (7);
+  COPY_DLF (8);
+#undef COPY_DLF
+  return new_decl;
+}
+
 /* A subroutine of convert_nonlocal_reference_op.  Create a local variable
in the nested function with DECL_VALUE_EXPR set to reference the true
variable in the parent function.  This is used both for debug info
@@ -1086,21 +1117,8 @@ get_nonlocal_debug_decl (struct nesting_info *info, tree 
decl)
 x = build_simple_mem_ref_notrap (x);
 
   /* ??? We should be remapping types as well, surely.  */
-  new_decl = build_decl (DECL_SOURCE_LOCATION (decl),
-VAR_DECL, DECL_NAME (decl), TREE_TYPE (decl));
+  new_decl = get_debug_decl (decl);
   DECL_CONTEXT (new_decl) = info->context;
-  DECL_ARTIFICIAL (new_decl) = DECL_ARTIFICIAL (decl);
-  DECL_IGNORED_P (new_decl) = DECL_IGNORED_P (decl);
-  TREE_THIS_VOLATILE (new_decl) = TREE_THIS_VOLATILE (decl);
-  TREE_SIDE_EFFECTS (new_decl) = TREE_SIDE_EFFECTS (decl);
-  TREE_READONLY (new_decl) = TREE_READONLY (decl);
-  TREE_ADDRESSABLE (new_decl) = TREE_ADDRESSABLE (decl);
-  DECL_SEEN_IN_BIND_EXPR_P (new_decl) = 1;
-  if ((TREE_CODE (decl) == PARM_DECL
-   || TREE_CODE (decl) == RESULT_DECL
-   || VAR_

[gcc r13-8729] libstdc++: Workaround kernel-headers on s390x-linux

2024-05-08 Thread Jakub Jelinek via Libstdc++-cvs
https://gcc.gnu.org/g:f1b1d515aa5836844cdb45e8bb2b941784f78fd2

commit r13-8729-gf1b1d515aa5836844cdb45e8bb2b941784f78fd2
Author: Jakub Jelinek 
Date:   Mon Apr 22 18:00:06 2024 +0200

libstdc++: Workaround kernel-headers on s390x-linux

We see
FAIL: 17_intro/headers/c++1998/all_attributes.cc   (test for excess errors)
FAIL: 17_intro/headers/c++2011/all_attributes.cc   (test for excess errors)
FAIL: 17_intro/headers/c++2014/all_attributes.cc   (test for excess errors)
FAIL: 17_intro/headers/c++2017/all_attributes.cc   (test for excess errors)
FAIL: 17_intro/headers/c++2020/all_attributes.cc   (test for excess errors)
FAIL: 17_intro/names.cc  -std=gnu++17 (test for excess errors)
on s390x-linux.
The first 5 are due to kernel-headers not using uglified attribute names,
where  contains
__attribute__((packed, aligned(4)))
I've filed a downstream bugreport for this in
https://bugzilla.redhat.com/show_bug.cgi?id=2276084
(not really sure where to report kernel-headers issues upstream), while the
last one is due to  from glibc containing:
  #ifdef __USE_MISC
  # define __ctx(fld) fld
  #else
  # define __ctx(fld) __ ## fld
  #endif
  ...
  typedef union
{
  double  __ctx(d);
  float   __ctx(f);
} fpreg_t;
and g++ predefining -D_GNU_SOURCE which implies define __USE_MISC.

The following patch adds a workaround for this on the libstdc++ testsuite
side.

2024-04-22  Jakub Jelinek  

* testsuite/17_intro/names.cc (d, f): Undefine on s390*-linux*.
* testsuite/17_intro/headers/c++1998/all_attributes.cc (packed): 
Don't
define on s390.
* testsuite/17_intro/headers/c++2011/all_attributes.cc (packed):
Likewise.
* testsuite/17_intro/headers/c++2014/all_attributes.cc (packed):
Likewise.
* testsuite/17_intro/headers/c++2017/all_attributes.cc (packed):
Likewise.
* testsuite/17_intro/headers/c++2020/all_attributes.cc (packed):
Likewise.

(cherry picked from commit cf5f7791056b3ed993bc8024be767a86157514a9)

Diff:
---
 libstdc++-v3/testsuite/17_intro/headers/c++1998/all_attributes.cc | 4 
 libstdc++-v3/testsuite/17_intro/headers/c++2011/all_attributes.cc | 4 
 libstdc++-v3/testsuite/17_intro/headers/c++2014/all_attributes.cc | 4 
 libstdc++-v3/testsuite/17_intro/headers/c++2017/all_attributes.cc | 4 
 libstdc++-v3/testsuite/17_intro/headers/c++2020/all_attributes.cc | 4 
 libstdc++-v3/testsuite/17_intro/names.cc  | 6 ++
 6 files changed, 26 insertions(+)

diff --git a/libstdc++-v3/testsuite/17_intro/headers/c++1998/all_attributes.cc 
b/libstdc++-v3/testsuite/17_intro/headers/c++1998/all_attributes.cc
index 74268b6a482f..658063bd0a4e 100644
--- a/libstdc++-v3/testsuite/17_intro/headers/c++1998/all_attributes.cc
+++ b/libstdc++-v3/testsuite/17_intro/headers/c++1998/all_attributes.cc
@@ -29,7 +29,11 @@
 # define noreturn 1
 # define visibility 1
 #endif
+#ifndef __s390__
+// kernel-headers  uses __attribute__((packed,aligned(4))) on
+// S390.
 #define packed 1
+#endif
 #define pure 1
 // glibc's sysdeps/unix/sysv/linux/arm/sys/ucontext.h uses this on ARM.
 #ifndef __arm__
diff --git a/libstdc++-v3/testsuite/17_intro/headers/c++2011/all_attributes.cc 
b/libstdc++-v3/testsuite/17_intro/headers/c++2011/all_attributes.cc
index 5d0c5fe81776..f1bcc1fbbc81 100644
--- a/libstdc++-v3/testsuite/17_intro/headers/c++2011/all_attributes.cc
+++ b/libstdc++-v3/testsuite/17_intro/headers/c++2011/all_attributes.cc
@@ -29,7 +29,11 @@
 # define visibility 1
 #endif
 #define no_unique_address 1
+#ifndef __s390__
+// kernel-headers  uses __attribute__((packed,aligned(4))) on
+// S390.
 #define packed 1
+#endif
 #define pure 1
 // glibc's sysdeps/unix/sysv/linux/arm/sys/ucontext.h uses this on ARM.
 #ifndef __arm__
diff --git a/libstdc++-v3/testsuite/17_intro/headers/c++2014/all_attributes.cc 
b/libstdc++-v3/testsuite/17_intro/headers/c++2014/all_attributes.cc
index 3cac2190ec77..48e7ef64afbe 100644
--- a/libstdc++-v3/testsuite/17_intro/headers/c++2014/all_attributes.cc
+++ b/libstdc++-v3/testsuite/17_intro/headers/c++2014/all_attributes.cc
@@ -29,7 +29,11 @@
 # define visibility 1
 #endif
 #define no_unique_address 1
+#ifndef __s390__
+// kernel-headers  uses __attribute__((packed,aligned(4))) on
+// S390.
 #define packed 1
+#endif
 #define pure 1
 // glibc's sysdeps/unix/sysv/linux/arm/sys/ucontext.h uses this on ARM.
 #ifndef __arm__
diff --git a/libstdc++-v3/testsuite/17_intro/headers/c++2017/all_attributes.cc 
b/libstdc++-v3/testsuite/17_intro/headers/c++2017/all_attributes.cc
index f607532aa90d..03e4e23c6865 100644
--- a/libstdc++-v3/testsuite/17_intro/headers/c++2017/all_attributes.cc
+++ b/libstdc++-v3/testsuite/17_intro/headers/c++2017/all_attributes.cc
@@ -28,7 +28,11 @@
 # define visibility 1
 #endif
 #def

[gcc r15-334] i386: fix ix86_hardreg_mov_ok with lra_in_progress

2024-05-08 Thread Kong Lingling via Gcc-cvs
https://gcc.gnu.org/g:baf1a677955a4dcfffe8d93966900af96600d642

commit r15-334-gbaf1a677955a4dcfffe8d93966900af96600d642
Author: konglin1 
Date:   Thu May 9 09:48:56 2024 +0800

i386: fix ix86_hardreg_mov_ok with lra_in_progress

Originally eliminate_regs_in_insnit will transform
(parallel [
  (set (reg:QI 130)
(plus:QI (subreg:QI (reg:DI 19 frame) 0)
  (const_int 96)))
  (clobber (reg:CC 17 flag))]) {*addqi_1}
to
(set (reg:QI 130)
 (subreg:QI (reg:DI 19 frame) 0)) {*movqi_internal}
when verify_changes.

But with No Flags add, it transforms
(set (reg:QI 5 di)
  (plus:QI (subreg:QI (reg:DI 19 frame) 0)
   (const_int 96))) {*addqi_1_nf}
to
(set (reg:QI 5 di)
 (subreg:QI (reg:DI 19 frame) 0)) {*addqi_1_nf}.
there is no extra clobbers at the end, and
its dest reg just is a hardreg. For ix86_hardreg_mov_ok,
it returns false. So it fails to update insn and causes
the ICE when transform to movqi_internal.

But actually it is ok and safe for ix86_hardreg_mov_ok
when lra_in_progress.

And tested the spec2017, the performance was not affected.

gcc/ChangeLog:

* config/i386/i386.cc (ix86_hardreg_mov_ok): Relax
hard reg mov restriction when lra in progress.

Diff:
---
 gcc/config/i386/i386.cc | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index c2df4ab91ee9..54c6c445bf14 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -20355,7 +20355,8 @@ ix86_hardreg_mov_ok (rtx dst, rtx src)
   ? standard_sse_constant_p (src, GET_MODE (dst))
   : x86_64_immediate_operand (src, GET_MODE (dst)))
   && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst)))
-  && !reload_completed)
+  && !reload_completed
+  && !lra_in_progress)
 return false;
   return true;
 }


[gcc r15-333] [PATCH v1 1/1] RISC-V: Nan-box the result of movbf on soft-bf16

2024-05-08 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:ce51e6727c9d69bbab0e766c449e60fd41f5f2f9

commit r15-333-gce51e6727c9d69bbab0e766c449e60fd41f5f2f9
Author: Xiao Zeng 
Date:   Wed May 8 14:00:58 2024 -0600

[PATCH v1 1/1] RISC-V: Nan-box the result of movbf on soft-bf16

1 This patch implements the Nan-box of bf16.

2 Please refer to the Nan-box implementation of hf16 in:



3 The discussion about Nan-box can be found on the website:



4 Below test are passed for this patch
* The riscv fully regression test.

gcc/ChangeLog:

* config/riscv/riscv.cc (riscv_legitimize_move): Expand movbf
with Nan-boxing value.
* config/riscv/riscv.md (*movbf_softfloat_boxing): New pattern.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/_Bfloat16-nanboxing.c: New test.

Diff:
---
 gcc/config/riscv/riscv.cc  | 52 --
 gcc/config/riscv/riscv.md  | 12 -
 .../gcc.target/riscv/_Bfloat16-nanboxing.c | 38 
 3 files changed, 77 insertions(+), 25 deletions(-)

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 633b55f9707a..2eac67b0ce0a 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -3130,35 +3130,39 @@ riscv_legitimize_move (machine_mode mode, rtx dest, rtx 
src)
 }
 
   /* In order to fit NaN boxing, expand
- (set FP_REG (reg:HF src))
+ (set FP_REG (reg:HF/BF src))
  to
  (set (reg:SI/DI mask) (const_int -65536)
- (set (reg:SI/DI temp) (zero_extend:SI/DI (subreg:HI (reg:HF src) 0)))
+ (set (reg:SI/DI temp) (zero_extend:SI/DI (subreg:HI (reg:HF/BF src) 0)))
  (set (reg:SI/DI temp) (ior:SI/DI (reg:SI/DI mask) (reg:SI/DI temp)))
- (set (reg:HF dest) (unspec:HF [ (reg:SI/DI temp) ] UNSPEC_FMV_SFP16_X))
+ (set (reg:HF/BF dest) (unspec:HF/BF[ (reg:SI/DI temp) ]
+   UNSPEC_FMV_SFP16_X/UNSPEC_FMV_SBF16_X))
  */
 
- if (TARGET_HARD_FLOAT
- && !TARGET_ZFHMIN && mode == HFmode
- && REG_P (dest) && FP_REG_P (REGNO (dest))
- && REG_P (src) && !FP_REG_P (REGNO (src))
- && can_create_pseudo_p ())
-   {
- rtx mask = force_reg (word_mode, gen_int_mode (-65536, word_mode));
- rtx temp = gen_reg_rtx (word_mode);
- emit_insn (gen_extend_insn (temp,
-simplify_gen_subreg (HImode, src, mode, 0),
-word_mode, HImode, 1));
- if (word_mode == SImode)
-   emit_insn (gen_iorsi3 (temp, mask, temp));
- else
-   emit_insn (gen_iordi3 (temp, mask, temp));
-
- riscv_emit_move (dest, gen_rtx_UNSPEC (HFmode, gen_rtvec (1, temp),
-   UNSPEC_FMV_SFP16_X));
-
- return true;
-   }
+  if (TARGET_HARD_FLOAT
+  && ((!TARGET_ZFHMIN && mode == HFmode)
+ || (!TARGET_ZFBFMIN && mode == BFmode))
+  && REG_P (dest) && FP_REG_P (REGNO (dest))
+  && REG_P (src) && !FP_REG_P (REGNO (src))
+  && can_create_pseudo_p ())
+{
+  rtx mask = force_reg (word_mode, gen_int_mode (-65536, word_mode));
+  rtx temp = gen_reg_rtx (word_mode);
+  emit_insn (gen_extend_insn (temp,
+ simplify_gen_subreg (HImode, src, mode, 0),
+ word_mode, HImode, 1));
+  if (word_mode == SImode)
+   emit_insn (gen_iorsi3 (temp, mask, temp));
+  else
+   emit_insn (gen_iordi3 (temp, mask, temp));
+
+  riscv_emit_move (dest,
+  gen_rtx_UNSPEC (mode, gen_rtvec (1, temp),
+  mode == HFmode ? UNSPEC_FMV_SFP16_X
+ : UNSPEC_FMV_SBF16_X));
+
+  return true;
+}
 
   /* We need to deal with constants that would be legitimate
  immediate_operands but aren't legitimate move_operands.  */
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index 620a1b3bd32f..4d6de9925572 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -86,8 +86,9 @@
   ;; String unspecs
   UNSPEC_STRLEN
 
-  ;; Workaround for HFmode without hardware extension
+  ;; Workaround for HFmode and BFmode without hardware extension
   UNSPEC_FMV_SFP16_X
+  UNSPEC_FMV_SBF16_X
 
   ;; XTheadFmv moves
   UNSPEC_XTHEADFMV
@@ -1926,6 +1927,15 @@
   [(set_attr "type" "fmove")
(set_attr "mode" "SF")])
 
+(define_insn "*movbf_softfloat_boxing"
+  [(set (match_operand:BF 0 "register_operand"   "=f")
+   (unspec:BF [(match_operand:X 1 "register_operand" " r")]
+UNSPEC_FMV_SBF16_X))]
+  "!TARGET_ZFBFMIN"
+  "fmv.w.x\t%0,%1"
+  [(set_attr "type" "fmove")
+   (set_attr "mode" "SF")])
+
 ;;
 ;;  
 ;;
diff --g

[gcc r15-332] [RISC-V][V2] Fix incorrect if-then-else nesting of Zbs usage in constant synthesis

2024-05-08 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:1c234097487927a4388ddcc690b63597bb3a90dc

commit r15-332-g1c234097487927a4388ddcc690b63597bb3a90dc
Author: Jeff Law 
Date:   Wed May 8 13:44:00 2024 -0600

[RISC-V][V2] Fix incorrect if-then-else nesting of Zbs usage in constant 
synthesis

Reposting without the patch that ignores whitespace.  The CI system doesn't
like including both patches, that'll generate a failure to apply and none of
the tests actually get run.

So I managed to goof the if-then-else level of the bseti bits last week.  
They
were supposed to be a last ditch effort to improve the result, but ended up
inside a conditional where they don't really belong.  I almost always use 
Zba,
Zbb and Zbs together, so it slipped by.

So it's NFC if you always test with Zbb and Zbs enabled together.  But if 
you
enabled Zbs without Zbb you'd see a failure to use bseti.

gcc/
* config/riscv/riscv.cc (riscv_build_integer_1): Fix incorrect
if-then-else nesting of Zbs code.

Diff:
---
 gcc/config/riscv/riscv.cc | 81 ---
 1 file changed, 41 insertions(+), 40 deletions(-)

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 62207b6b2273..633b55f9707a 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -878,50 +878,51 @@ riscv_build_integer_1 (struct riscv_integer_op 
codes[RISCV_MAX_INTEGER_OPS],
  codes[1].use_uw = false;
  cost = 2;
}
-  /* Final cases, particularly focused on bseti.  */
-  else if (cost > 2 && TARGET_ZBS)
-   {
- int i = 0;
+}
 
- /* First handle any bits set by LUI.  Be careful of the
-SImode sign bit!.  */
- if (value & 0x7800)
-   {
- alt_codes[i].code = (i == 0 ? UNKNOWN : IOR);
- alt_codes[i].value = value & 0x7800;
- alt_codes[i].use_uw = false;
- value &= ~0x7800;
- i++;
-   }
+  /* Final cases, particularly focused on bseti.  */
+  if (cost > 2 && TARGET_ZBS)
+{
+  int i = 0;
 
- /* Next, any bits we can handle with addi.  */
- if (value & 0x7ff)
-   {
- alt_codes[i].code = (i == 0 ? UNKNOWN : PLUS);
- alt_codes[i].value = value & 0x7ff;
- alt_codes[i].use_uw = false;
- value &= ~0x7ff;
- i++;
-   }
+  /* First handle any bits set by LUI.  Be careful of the
+SImode sign bit!.  */
+  if (value & 0x7800)
+   {
+ alt_codes[i].code = (i == 0 ? UNKNOWN : IOR);
+ alt_codes[i].value = value & 0x7800;
+ alt_codes[i].use_uw = false;
+ value &= ~0x7800;
+  i++;
+   }
 
- /* And any residuals with bseti.  */
- while (i < cost && value)
-   {
- HOST_WIDE_INT bit = ctz_hwi (value);
- alt_codes[i].code = (i == 0 ? UNKNOWN : IOR);
- alt_codes[i].value = 1UL << bit;
- alt_codes[i].use_uw = false;
- value &= ~(1ULL << bit);
- i++;
-   }
+  /* Next, any bits we can handle with addi.  */
+  if (value & 0x7ff)
+   {
+ alt_codes[i].code = (i == 0 ? UNKNOWN : PLUS);
+ alt_codes[i].value = value & 0x7ff;
+ alt_codes[i].use_uw = false;
+ value &= ~0x7ff;
+ i++;
+   }
 
- /* If LUI+ADDI+BSETI resulted in a more efficient
-sequence, then use it.  */
- if (i < cost)
-   {
- memcpy (codes, alt_codes, sizeof (alt_codes));
- cost = i;
-   }
+  /* And any residuals with bseti.  */
+  while (i < cost && value)
+   {
+ HOST_WIDE_INT bit = ctz_hwi (value);
+ alt_codes[i].code = (i == 0 ? UNKNOWN : IOR);
+ alt_codes[i].value = 1UL << bit;
+ alt_codes[i].use_uw = false;
+ value &= ~(1ULL << bit);
+ i++;
+   }
+
+  /* If LUI+ADDI+BSETI resulted in a more efficient
+sequence, then use it.  */
+  if (i < cost)
+   {
+ memcpy (codes, alt_codes, sizeof (alt_codes));
+ cost = i;
}
 }


[gcc r14-10185] Objective-C, NeXT, v2: Correct a regression in code-gen.

2024-05-08 Thread Iain D Sandoe via Gcc-cvs
https://gcc.gnu.org/g:7e8fae89f356bad9c62c8625a36b631cf0a74568

commit r14-10185-g7e8fae89f356bad9c62c8625a36b631cf0a74568
Author: Iain Sandoe 
Date:   Tue Apr 30 15:11:56 2024 +0100

Objective-C, NeXT, v2: Correct a regression in code-gen.

There have been several changes in the ABI of Objective-C which
depend on the OS version targetted.  In this case Protocols and
LabelProtocols should be made weak/hidden/extern from macOS 10.7
however there was a mistake in the code causing this to occur
from macOS 10.6.  Fixed thus.

gcc/objc/ChangeLog:

* objc-next-runtime-abi-02.cc (WEAK_PROTOCOLS_AFTER): New.
(next_runtime_abi_02_protocol_decl): Use WEAK_PROTOCOLS_AFTER
to determine this ABI change.
(build_v2_protocol_list_address_table): Likewise.

Signed-off-by: Iain Sandoe 
(cherry picked from commit 9b5c0be59d0f94df0517820f00b4520b5abddd8c)

Diff:
---
 gcc/objc/objc-next-runtime-abi-02.cc | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/gcc/objc/objc-next-runtime-abi-02.cc 
b/gcc/objc/objc-next-runtime-abi-02.cc
index cdf559b9bea8..248ef641281f 100644
--- a/gcc/objc/objc-next-runtime-abi-02.cc
+++ b/gcc/objc/objc-next-runtime-abi-02.cc
@@ -72,6 +72,7 @@ along with GCC; see the file COPYING3.  If not see
 #define TAG_MSGSENDSUPER_STRET "objc_msgSendSuper2_stret"
 
 #define USE_FIXUP_BEFORE   100600
+#define WEAK_PROTOCOLS_AFTER   100700
 #define TAG_FIXUP  "_fixup"
 
 
@@ -1025,7 +1026,7 @@ next_runtime_abi_02_protocol_decl (tree p)
   /* static struct _objc_protocol _OBJC_Protocol_; */
   snprintf (buf, BUFSIZE, "_OBJC_Protocol_%s",
IDENTIFIER_POINTER (PROTOCOL_NAME (p)));
-  if (flag_next_runtime >= USE_FIXUP_BEFORE)
+  if (flag_next_runtime >= WEAK_PROTOCOLS_AFTER)
 {
   decl = create_hidden_decl (objc_v2_protocol_template, buf);
   DECL_WEAK (decl) = true;
@@ -2315,7 +2316,7 @@ build_v2_protocol_list_address_table (void)
   gcc_assert (ref->id && TREE_CODE (ref->id) == PROTOCOL_INTERFACE_TYPE);
   snprintf (buf, BUFSIZE, "_OBJC_LabelProtocol_%s",
IDENTIFIER_POINTER (PROTOCOL_NAME (ref->id)));
-  if (flag_next_runtime >= USE_FIXUP_BEFORE)
+  if (flag_next_runtime >= WEAK_PROTOCOLS_AFTER)
{
  decl = create_hidden_decl (objc_protocol_type, buf, /*is def=*/true);
  DECL_WEAK (decl) = true;


[gcc r12-10433] testsuite: fix Wmismatched-new-delete-8.C with -m32

2024-05-08 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:58d11bfc27d5412619c0919738158a4c05cca2cf

commit r12-10433-g58d11bfc27d5412619c0919738158a4c05cca2cf
Author: Marek Polacek 
Date:   Thu Feb 22 18:52:32 2024 -0500

testsuite: fix Wmismatched-new-delete-8.C with -m32

This fixes
error: 'operator new' takes type 'size_t' ('unsigned int') as first 
parameter [-fpermissive]

gcc/testsuite/ChangeLog:

* g++.dg/warn/Wmismatched-new-delete-8.C: Use __SIZE_TYPE__.

(cherry picked from commit d34d7c74d51d365a3a4ddcd4383fc7c9f29020a1)

Diff:
---
 gcc/testsuite/g++.dg/warn/Wmismatched-new-delete-8.C | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/testsuite/g++.dg/warn/Wmismatched-new-delete-8.C 
b/gcc/testsuite/g++.dg/warn/Wmismatched-new-delete-8.C
index 0ddc056c6df2..e8fd7a85b8c9 100644
--- a/gcc/testsuite/g++.dg/warn/Wmismatched-new-delete-8.C
+++ b/gcc/testsuite/g++.dg/warn/Wmismatched-new-delete-8.C
@@ -11,7 +11,7 @@ static inline T * construct_at(void *at, ARGS && args)
  struct Placeable : T
  {
   Placeable(ARGS && args) : T(args) { }
-  void * operator new (long unsigned int, void *ptr) { return ptr; }
+  void * operator new (__SIZE_TYPE__, void *ptr) { return ptr; }
   void operator delete (void *, void *) { }
  };
  return new (at) Placeable(static_cast(args));


[gcc r12-10432] warn-access: Fix handling of unnamed types [PR109804]

2024-05-08 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:16319f8fba6c049d743046488588f40da2349048

commit r12-10432-g16319f8fba6c049d743046488588f40da2349048
Author: Andrew Pinski 
Date:   Wed Feb 21 20:12:21 2024 -0800

warn-access: Fix handling of unnamed types [PR109804]

This looks like an oversight of handling DEMANGLE_COMPONENT_UNNAMED_TYPE.
DEMANGLE_COMPONENT_UNNAMED_TYPE only has the u.s_number.number set while
the code expected newc.u.s_binary.left would be valid.
So this treats DEMANGLE_COMPONENT_UNNAMED_TYPE like we treat function 
paramaters
(DEMANGLE_COMPONENT_FUNCTION_PARAM) and template paramaters 
(DEMANGLE_COMPONENT_TEMPLATE_PARAM).

Note the code in the demangler does this when it sets 
DEMANGLE_COMPONENT_UNNAMED_TYPE:
  ret->type = DEMANGLE_COMPONENT_UNNAMED_TYPE;
  ret->u.s_number.number = num;

Committed as obvious after bootstrap/test on x86_64-linux-gnu

PR tree-optimization/109804

gcc/ChangeLog:

* gimple-ssa-warn-access.cc (new_delete_mismatch_p): Handle
DEMANGLE_COMPONENT_UNNAMED_TYPE.

gcc/testsuite/ChangeLog:

* g++.dg/warn/Wmismatched-new-delete-8.C: New test.

Signed-off-by: Andrew Pinski 
(cherry picked from commit 1076ffda6ce5e6d5fc9577deaf8233e549e5787a)

Diff:
---
 gcc/gimple-ssa-warn-access.cc  |  1 +
 .../g++.dg/warn/Wmismatched-new-delete-8.C | 42 ++
 2 files changed, 43 insertions(+)

diff --git a/gcc/gimple-ssa-warn-access.cc b/gcc/gimple-ssa-warn-access.cc
index 8d088ad33f2f..e70a6f1fb877 100644
--- a/gcc/gimple-ssa-warn-access.cc
+++ b/gcc/gimple-ssa-warn-access.cc
@@ -1688,6 +1688,7 @@ new_delete_mismatch_p (const demangle_component &newc,
 
 case DEMANGLE_COMPONENT_FUNCTION_PARAM:
 case DEMANGLE_COMPONENT_TEMPLATE_PARAM:
+case DEMANGLE_COMPONENT_UNNAMED_TYPE:
   return newc.u.s_number.number != delc.u.s_number.number;
 
 case DEMANGLE_COMPONENT_CHARACTER:
diff --git a/gcc/testsuite/g++.dg/warn/Wmismatched-new-delete-8.C 
b/gcc/testsuite/g++.dg/warn/Wmismatched-new-delete-8.C
new file mode 100644
index ..0ddc056c6df2
--- /dev/null
+++ b/gcc/testsuite/g++.dg/warn/Wmismatched-new-delete-8.C
@@ -0,0 +1,42 @@
+/* PR tree-optimization/109804 */
+/* { dg-do compile { target c++11 } } */
+/* { dg-options "-Wall" } */
+
+/* Here we used to ICE in new_delete_mismatch_p because
+   we didn't handle unnamed types from the demangler 
(DEMANGLE_COMPONENT_UNNAMED_TYPE). */
+
+template 
+static inline T * construct_at(void *at, ARGS && args)
+{
+ struct Placeable : T
+ {
+  Placeable(ARGS && args) : T(args) { }
+  void * operator new (long unsigned int, void *ptr) { return ptr; }
+  void operator delete (void *, void *) { }
+ };
+ return new (at) Placeable(static_cast(args));
+}
+template 
+struct Reconstructible
+{
+  char _space[sizeof(MT)];
+  Reconstructible() { }
+};
+template 
+struct Constructible : Reconstructible
+{
+ Constructible(){}
+};
+struct A { };
+struct B
+{
+ Constructible a { };
+ B(int) { }
+};
+Constructible b { };
+void f()
+{
+  enum { ENUM_A = 1 };
+  enum { ENUM_B = 1 };
+  construct_at(b._space, ENUM_B);
+}


[gcc r15-331] AVR: target/114981 - Support __builtin_powi[l] / __powidf2.

2024-05-08 Thread Georg-Johann Lay via Gcc-cvs
https://gcc.gnu.org/g:de4eea7d7ea86e54843507c68d6672eca9d8c7bb

commit r15-331-gde4eea7d7ea86e54843507c68d6672eca9d8c7bb
Author: Georg-Johann Lay 
Date:   Wed May 8 17:56:05 2024 +0200

AVR: target/114981 - Support __builtin_powi[l] / __powidf2.

This supports __powidf2 by means of a double wrapper for already
existing f7_powi (renamed to __f7_powi by f7-renames.h).
It tweaks the implementation so that it does not perform trivial
multiplications with 1.0 any more, but instead uses a move.
It also fixes the last statement of f7_powi, which was wrong.
Notice that f7_powi was unused until now.

PR target/114981
libgcc/config/avr/libf7/
* libf7-common.mk (F7_ASM_PARTS): Add D_powi
* libf7-asm.sx (F7MOD_D_powi_, __powidf2): New module and function.
* libf7.c (f7_powi): Fix last (wrong) statement.
Tweak trivial multiplications with 1.0.

gcc/testsuite/
* gcc.target/avr/pr114981-powil.c: New test.

Diff:
---
 gcc/testsuite/gcc.target/avr/pr114981-powil.c | 33 +++
 libgcc/config/avr/libf7/libf7-asm.sx  | 12 ++
 libgcc/config/avr/libf7/libf7-common.mk   |  2 +-
 libgcc/config/avr/libf7/libf7.c   | 29 +--
 4 files changed, 68 insertions(+), 8 deletions(-)

diff --git a/gcc/testsuite/gcc.target/avr/pr114981-powil.c 
b/gcc/testsuite/gcc.target/avr/pr114981-powil.c
new file mode 100644
index ..70f8e796c654
--- /dev/null
+++ b/gcc/testsuite/gcc.target/avr/pr114981-powil.c
@@ -0,0 +1,33 @@
+/* { dg-do run { target { ! avr_tiny } } } */
+/* { dg-additional-options "-Os" } */
+
+const long double vals[] =
+  {
+0.0625L, -0.125L, 0.25L, -0.5L,
+1.0L,
+-2.0L, 4.0L, -8.0L, 16.0L
+  };
+
+#define ARRAY_SIZE(X) ((int) (sizeof(X) / sizeof(*X)))
+
+__attribute__((noinline,noclone))
+void test1 (long double x)
+{
+  int i;
+
+  for (i = 0; i < ARRAY_SIZE (vals); ++i)
+{
+  long double val0 = vals[i];
+  long double val1 = __builtin_powil (x, i - 4);
+  __asm ("" : "+r" (val0));
+
+  if (val0 != val1)
+   __builtin_exit (__LINE__);
+}
+}
+
+int main (void)
+{
+  test1 (-2.0L);
+  return 0;
+}
diff --git a/libgcc/config/avr/libf7/libf7-asm.sx 
b/libgcc/config/avr/libf7/libf7-asm.sx
index 1ab91270cb2b..1f8f60ab2826 100644
--- a/libgcc/config/avr/libf7/libf7-asm.sx
+++ b/libgcc/config/avr/libf7/libf7-asm.sx
@@ -1877,4 +1877,16 @@ ENDF call_ddd
 
 #include "f7-wraps.h"
 
+;;; Some additional, singular wraps that don't match any pattern.
+
+;; double __powidf2 (double, int)  ; __builtin_powi
+#ifdef F7MOD_D_powi_
+_DEFUN __powidf2
+.global F7_NAME(powi)
+ldi ZH, hi8(gs(F7_NAME(powi)))
+ldi ZL, lo8(gs(F7_NAME(powi)))
+F7jmp   call_ddx
+_ENDF __powidf2
+#endif /* F7MOD_D_powi_ */
+
 #endif /* !AVR_TINY */
diff --git a/libgcc/config/avr/libf7/libf7-common.mk 
b/libgcc/config/avr/libf7/libf7-common.mk
index d541b48ff3ca..5d411071c8e4 100644
--- a/libgcc/config/avr/libf7/libf7-common.mk
+++ b/libgcc/config/avr/libf7/libf7-common.mk
@@ -22,7 +22,7 @@ F7_ASM_PARTS += addsub_mant_scaled store load
 F7_ASM_PARTS += to_integer to_unsigned clz normalize_with_carry normalize
 F7_ASM_PARTS += store_expo sqrt16 sqrt_approx div
 
-F7_ASM_PARTS += D_class D_fma
+F7_ASM_PARTS += D_class D_fma D_powi
 F7_ASM_PARTS += D_isnan D_isinf D_isfinite D_signbit D_copysign D_neg D_fabs
 
 F7_ASM_PARTS += call_dd call_ddd
diff --git a/libgcc/config/avr/libf7/libf7.c b/libgcc/config/avr/libf7/libf7.c
index 369dbe241039..375becb854c1 100644
--- a/libgcc/config/avr/libf7/libf7.c
+++ b/libgcc/config/avr/libf7/libf7.c
@@ -1752,20 +1752,33 @@ void f7_powi (f7_t *cc, const f7_t *aa, int ii)
 {
   uint16_t u16 = ii;
   f7_t xx27, *xx2 = &xx27;
+  bool cc_is_one = true;
+  bool expo_is_neg = false;
 
   if (ii < 0)
-u16 = -u16;
+{
+  u16 = -u16;
+  expo_is_neg = true;
+}
 
   f7_copy (xx2, aa);
 
-  f7_set_u16 (cc, 1);
-
   while (1)
 {
   if (u16 & 1)
-   f7_Imul (cc, xx2);
+   {
+ if (cc_is_one)
+   {
+ // C *= X2 simplifies to C = X2.
+ f7_copy (cc, xx2);
+ cc_is_one = false;
+   }
+ else
+   f7_Imul (cc, xx2);
+   }
 
-  if (! f7_is_nonzero (cc))
+  if (! cc_is_one
+ && ! f7_is_nonzero (cc))
break;
 
   u16 >>= 1;
@@ -1774,8 +1787,10 @@ void f7_powi (f7_t *cc, const f7_t *aa, int ii)
   f7_Isquare (xx2);
 }
 
-  if (ii < 0)
-f7_div1 (xx2, aa);
+  if (cc_is_one)
+f7_set_u16 (cc, 1);
+  else if (expo_is_neg)
+f7_div1 (cc, cc);
 }
 #endif // F7MOD_powi_


[gcc r15-330] [PR114810][LRA]: Recognize alternatives with lack of available registers for insn and demote them.

2024-05-08 Thread Vladimir Makarov via Gcc-cvs
https://gcc.gnu.org/g:2f00e6caca1a14dfe26e94f608e9d79a787ebe08

commit r15-330-g2f00e6caca1a14dfe26e94f608e9d79a787ebe08
Author: Vladimir N. Makarov 
Date:   Wed May 8 10:39:04 2024 -0400

[PR114810][LRA]: Recognize alternatives with lack of available registers 
for insn and demote them.

  PR114810 was fixed in machine-dependent way.  This patch is a fix of
the PR on LRA side.  LRA chose alternative with constraints `&r,r,ro`
on i686 when all operands of DImode and there are only 6 available
general regs.  The patch recognizes such case and significantly
increase the alternative cost.  It does not reject alternative
completely.  So the fix is safe but it might not work for all
potentially possible cases of registers lack as register classes can
have any relations including subsets and intersections.

gcc/ChangeLog:

PR target/114810
* lra-constraints.cc (process_alt_operands): Calculate union reg
class for the alternative, peak matched regs and required reload
regs.  Recognize alternatives with lack of available registers and
make them costly.  Add debug print about this case.

Diff:
---
 gcc/lra-constraints.cc | 43 +--
 1 file changed, 41 insertions(+), 2 deletions(-)

diff --git a/gcc/lra-constraints.cc b/gcc/lra-constraints.cc
index 10e3d4e40977..5b78fd0b7e5c 100644
--- a/gcc/lra-constraints.cc
+++ b/gcc/lra-constraints.cc
@@ -2127,6 +2127,8 @@ process_alt_operands (int only_alternative)
   /* Numbers of operands which are early clobber registers.  */
   int early_clobbered_nops[MAX_RECOG_OPERANDS];
   enum reg_class curr_alt[MAX_RECOG_OPERANDS];
+  enum reg_class all_this_alternative;
+  int all_used_nregs, all_reload_nregs;
   HARD_REG_SET curr_alt_set[MAX_RECOG_OPERANDS];
   HARD_REG_SET curr_alt_exclude_start_hard_regs[MAX_RECOG_OPERANDS];
   bool curr_alt_match_win[MAX_RECOG_OPERANDS];
@@ -2229,7 +2231,8 @@ process_alt_operands (int only_alternative)
   curr_alt_out_sp_reload_p = false;
   curr_reuse_alt_p = true;
   curr_alt_class_change_p = false;
-  
+  all_this_alternative = NO_REGS;
+  all_used_nregs = all_reload_nregs = 0;
   for (nop = 0; nop < n_operands; nop++)
{
  const char *p;
@@ -2660,6 +2663,15 @@ process_alt_operands (int only_alternative)
  /* Record which operands fit this alternative.  */
  if (win)
{
+ if (early_clobber_p
+ || curr_static_id->operand[nop].type != OP_OUT)
+   {
+ all_used_nregs
+   += ira_reg_class_min_nregs[this_alternative][mode];
+ all_this_alternative
+   = (reg_class_subunion
+  [all_this_alternative][this_alternative]);
+   }
  this_alternative_win = true;
  if (class_change_p)
{
@@ -2781,7 +2793,19 @@ process_alt_operands (int only_alternative)
   & ~((ira_prohibited_class_mode_regs
[this_alternative][mode])
   | lra_no_alloc_regs));
- if (hard_reg_set_empty_p (available_regs))
+ if (!hard_reg_set_empty_p (available_regs))
+   {
+ if (early_clobber_p
+ || curr_static_id->operand[nop].type != OP_OUT)
+   {
+ all_reload_nregs
+   += ira_reg_class_min_nregs[this_alternative][mode];
+ all_this_alternative
+   = (reg_class_subunion
+  [all_this_alternative][this_alternative]);
+   }
+   }
+ else
{
  /* There are no hard regs holding a value of given
 mode.  */
@@ -3217,6 +3241,21 @@ process_alt_operands (int only_alternative)
 "Cycle danger: overall += LRA_MAX_REJECT\n");
  overall += LRA_MAX_REJECT;
}
+  if (all_this_alternative != NO_REGS
+ && all_used_nregs != 0 && all_reload_nregs != 0
+ && (all_used_nregs + all_reload_nregs + 1
+ >= ira_class_hard_regs_num[all_this_alternative]))
+   {
+ if (lra_dump_file != NULL)
+   fprintf
+ (lra_dump_file,
+  "Register starvation: overall += LRA_MAX_REJECT"
+  "(class=%s,avail=%d,used=%d,reload=%d)\n",
+  reg_class_names[all_this_alternative],
+  ira_class_hard_regs_num[all_this_alternative],
+  all_used_nregs, all_reload_nregs);
+ overall += LRA_MAX_REJECT;
+   }
   ok_p = true;
   curr_alt_dont_inherit_ops_num = 0;
   for (nop = 0; nop < early_clobbered_regs_num; nop++)


[gcc r12-10431] Fix PR 111331: wrong code for `a > 28 ? MIN : 29`

2024-05-08 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:39d56b96996dd8336652ceac97983c26fd8de4c6

commit r12-10431-g39d56b96996dd8336652ceac97983c26fd8de4c6
Author: Andrew Pinski 
Date:   Thu Sep 7 22:13:31 2023 -0700

Fix PR 111331: wrong code for `a > 28 ? MIN : 29`

The problem here is after r6-7425-ga9fee7cdc3c62d0e51730,
the comparison to see if the transformation could be done was using the
wrong value. Instead of see if the inner was LE (for MIN and GE for MAX)
the outer value, it was comparing the inner to the value used in the 
comparison
which was wrong.

Committed to GCC 13 branch after bootstrapped and tested on 
x86_64-linux-gnu.

gcc/ChangeLog:

PR tree-optimization/111331
* tree-ssa-phiopt.cc (minmax_replacement):
Fix the LE/GE comparison for the
`(a CMP CST1) ? max : a` optimization.

gcc/testsuite/ChangeLog:

PR tree-optimization/111331
* gcc.c-torture/execute/pr111331-1.c: New test.
* gcc.c-torture/execute/pr111331-2.c: New test.
* gcc.c-torture/execute/pr111331-3.c: New test.

(cherry picked from commit 30e6ee074588bacefd2dfe745b188bb20c81fe5e)

Diff:
---
 gcc/testsuite/gcc.c-torture/execute/pr111331-1.c | 17 +
 gcc/testsuite/gcc.c-torture/execute/pr111331-2.c | 19 +++
 gcc/testsuite/gcc.c-torture/execute/pr111331-3.c | 15 +++
 gcc/tree-ssa-phiopt.cc   |  8 
 4 files changed, 55 insertions(+), 4 deletions(-)

diff --git a/gcc/testsuite/gcc.c-torture/execute/pr111331-1.c 
b/gcc/testsuite/gcc.c-torture/execute/pr111331-1.c
new file mode 100644
index ..4c7f4fdbaa9d
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/execute/pr111331-1.c
@@ -0,0 +1,17 @@
+int a;
+int b;
+int c(int d, int e, int f) {
+  if (d < e)
+return e;
+  if (d > f)
+return f;
+  return d;
+}
+int main() {
+  int g = -1;
+  a = c(b + 30, 29, g + 29);
+  volatile t = a;
+  if (t != 28)
+__builtin_abort();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.c-torture/execute/pr111331-2.c 
b/gcc/testsuite/gcc.c-torture/execute/pr111331-2.c
new file mode 100644
index ..5c677f2caa9f
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/execute/pr111331-2.c
@@ -0,0 +1,19 @@
+
+int a;
+int b;
+
+int main() {
+  int d = b+30;
+  {
+int t;
+if (d < 29)
+  t =  29;
+else
+  t = (d > 28) ? 28 : d;
+a = t;
+  }
+  volatile int t = a;
+  if (a != 28)
+__builtin_abort();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.c-torture/execute/pr111331-3.c 
b/gcc/testsuite/gcc.c-torture/execute/pr111331-3.c
new file mode 100644
index ..213d9bdd539d
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/execute/pr111331-3.c
@@ -0,0 +1,15 @@
+int a;
+int b;
+
+int main() {
+  int d = b+30;
+  {
+int t;
+t = d < 29 ? 29 : ((d > 28) ? 28 : d);
+a = t;
+  }
+  volatile int t = a;
+  if (a != 28)
+__builtin_abort();
+  return 0;
+}
diff --git a/gcc/tree-ssa-phiopt.cc b/gcc/tree-ssa-phiopt.cc
index c56d0b9ff151..e2dba56383b4 100644
--- a/gcc/tree-ssa-phiopt.cc
+++ b/gcc/tree-ssa-phiopt.cc
@@ -2014,7 +2014,7 @@ minmax_replacement (basic_block cond_bb, basic_block 
middle_bb,
 
  /* We need BOUND <= LARGER.  */
  if (!integer_nonzerop (fold_build2 (LE_EXPR, boolean_type_node,
- bound, larger)))
+ bound, arg_false)))
return false;
}
  else if (operand_equal_for_phi_arg_p (arg_false, smaller)
@@ -2045,7 +2045,7 @@ minmax_replacement (basic_block cond_bb, basic_block 
middle_bb,
 
  /* We need BOUND >= SMALLER.  */
  if (!integer_nonzerop (fold_build2 (GE_EXPR, boolean_type_node,
- bound, smaller)))
+ bound, arg_false)))
return false;
}
  else
@@ -2085,7 +2085,7 @@ minmax_replacement (basic_block cond_bb, basic_block 
middle_bb,
 
  /* We need BOUND >= LARGER.  */
  if (!integer_nonzerop (fold_build2 (GE_EXPR, boolean_type_node,
- bound, larger)))
+ bound, arg_true)))
return false;
}
  else if (operand_equal_for_phi_arg_p (arg_true, smaller)
@@ -2112,7 +2112,7 @@ minmax_replacement (basic_block cond_bb, basic_block 
middle_bb,
 
  /* We need BOUND <= SMALLER.  */
  if (!integer_nonzerop (fold_build2 (LE_EXPR, boolean_type_node,
- bound, smaller)))
+ bound, arg_true)))
return false;
}
  else


[gcc r11-11421] Fix PR 111331: wrong code for `a > 28 ? MIN : 29`

2024-05-08 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:16e27b6d03756bf1fae22607fa93107787a7b9cb

commit r11-11421-g16e27b6d03756bf1fae22607fa93107787a7b9cb
Author: Andrew Pinski 
Date:   Thu Sep 7 22:13:31 2023 -0700

Fix PR 111331: wrong code for `a > 28 ? MIN : 29`

The problem here is after r6-7425-ga9fee7cdc3c62d0e51730,
the comparison to see if the transformation could be done was using the
wrong value. Instead of see if the inner was LE (for MIN and GE for MAX)
the outer value, it was comparing the inner to the value used in the 
comparison
which was wrong.

Committed to GCC 13 branch after bootstrapped and tested on 
x86_64-linux-gnu.

gcc/ChangeLog:

PR tree-optimization/111331
* tree-ssa-phiopt.c (minmax_replacement):
Fix the LE/GE comparison for the
`(a CMP CST1) ? max : a` optimization.

gcc/testsuite/ChangeLog:

PR tree-optimization/111331
* gcc.c-torture/execute/pr111331-1.c: New test.
* gcc.c-torture/execute/pr111331-2.c: New test.
* gcc.c-torture/execute/pr111331-3.c: New test.

(cherry picked from commit 30e6ee074588bacefd2dfe745b188bb20c81fe5e)

Diff:
---
 gcc/testsuite/gcc.c-torture/execute/pr111331-1.c | 17 +
 gcc/testsuite/gcc.c-torture/execute/pr111331-2.c | 19 +++
 gcc/testsuite/gcc.c-torture/execute/pr111331-3.c | 15 +++
 gcc/tree-ssa-phiopt.c|  8 
 4 files changed, 55 insertions(+), 4 deletions(-)

diff --git a/gcc/testsuite/gcc.c-torture/execute/pr111331-1.c 
b/gcc/testsuite/gcc.c-torture/execute/pr111331-1.c
new file mode 100644
index ..4c7f4fdbaa9d
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/execute/pr111331-1.c
@@ -0,0 +1,17 @@
+int a;
+int b;
+int c(int d, int e, int f) {
+  if (d < e)
+return e;
+  if (d > f)
+return f;
+  return d;
+}
+int main() {
+  int g = -1;
+  a = c(b + 30, 29, g + 29);
+  volatile t = a;
+  if (t != 28)
+__builtin_abort();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.c-torture/execute/pr111331-2.c 
b/gcc/testsuite/gcc.c-torture/execute/pr111331-2.c
new file mode 100644
index ..5c677f2caa9f
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/execute/pr111331-2.c
@@ -0,0 +1,19 @@
+
+int a;
+int b;
+
+int main() {
+  int d = b+30;
+  {
+int t;
+if (d < 29)
+  t =  29;
+else
+  t = (d > 28) ? 28 : d;
+a = t;
+  }
+  volatile int t = a;
+  if (a != 28)
+__builtin_abort();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.c-torture/execute/pr111331-3.c 
b/gcc/testsuite/gcc.c-torture/execute/pr111331-3.c
new file mode 100644
index ..213d9bdd539d
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/execute/pr111331-3.c
@@ -0,0 +1,15 @@
+int a;
+int b;
+
+int main() {
+  int d = b+30;
+  {
+int t;
+t = d < 29 ? 29 : ((d > 28) ? 28 : d);
+a = t;
+  }
+  volatile int t = a;
+  if (a != 28)
+__builtin_abort();
+  return 0;
+}
diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c
index 5831a7764a49..d26d7889d952 100644
--- a/gcc/tree-ssa-phiopt.c
+++ b/gcc/tree-ssa-phiopt.c
@@ -1676,7 +1676,7 @@ minmax_replacement (basic_block cond_bb, basic_block 
middle_bb,
 
  /* We need BOUND <= LARGER.  */
  if (!integer_nonzerop (fold_build2 (LE_EXPR, boolean_type_node,
- bound, larger)))
+ bound, arg_false)))
return false;
}
  else if (operand_equal_for_phi_arg_p (arg_false, smaller)
@@ -1707,7 +1707,7 @@ minmax_replacement (basic_block cond_bb, basic_block 
middle_bb,
 
  /* We need BOUND >= SMALLER.  */
  if (!integer_nonzerop (fold_build2 (GE_EXPR, boolean_type_node,
- bound, smaller)))
+ bound, arg_false)))
return false;
}
  else
@@ -1747,7 +1747,7 @@ minmax_replacement (basic_block cond_bb, basic_block 
middle_bb,
 
  /* We need BOUND >= LARGER.  */
  if (!integer_nonzerop (fold_build2 (GE_EXPR, boolean_type_node,
- bound, larger)))
+ bound, arg_true)))
return false;
}
  else if (operand_equal_for_phi_arg_p (arg_true, smaller)
@@ -1774,7 +1774,7 @@ minmax_replacement (basic_block cond_bb, basic_block 
middle_bb,
 
  /* We need BOUND <= SMALLER.  */
  if (!integer_nonzerop (fold_build2 (LE_EXPR, boolean_type_node,
- bound, smaller)))
+ bound, arg_true)))
return false;
}
  else


[gcc(refs/users/meissner/heads/work165-tar)] Update ChangeLog.*

2024-05-08 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:270b6fcfb2327764ddcac74d4c3eaa7888a5933b

commit 270b6fcfb2327764ddcac74d4c3eaa7888a5933b
Author: Michael Meissner 
Date:   Wed May 8 12:27:17 2024 -0400

Update ChangeLog.*

Diff:
---
 gcc/ChangeLog.tar | 9 +
 1 file changed, 9 insertions(+)

diff --git a/gcc/ChangeLog.tar b/gcc/ChangeLog.tar
index 20a4ff5e6043..3e4684c22019 100644
--- a/gcc/ChangeLog.tar
+++ b/gcc/ChangeLog.tar
@@ -1,3 +1,12 @@
+ Branch work165-tar, patch #203 
+
+Limit SPR registers to hold only DImode/SImode.
+
+2024-05-08  Michael Meissner  
+
+   * config/rs6000/rs6000.cc (rs6000_hard_regno_mode_ok_uncached): Limit
+   SPR registers to only hold SImode/DImode.
+
  Branch work165-tar, patch #202 
 
 Add -mfspr option.


[gcc(refs/users/meissner/heads/work165-tar)] Limit SPR registers to hold only DImode/SImode.

2024-05-08 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:caf6d1a19dbd292a6d8efafc4997d37d395789ca

commit caf6d1a19dbd292a6d8efafc4997d37d395789ca
Author: Michael Meissner 
Date:   Wed May 8 12:26:28 2024 -0400

Limit SPR registers to hold only DImode/SImode.

2024-05-08  Michael Meissner  

* config/rs6000/rs6000.cc (rs6000_hard_regno_mode_ok_uncached): 
Limit
SPR registers to only hold SImode/DImode.

Diff:
---
 gcc/config/rs6000/rs6000.cc | 18 --
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index bfc207fb5f55..6bfcfdfb22c5 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -1848,6 +1848,7 @@ static int
 rs6000_hard_regno_mode_ok_uncached (int regno, machine_mode mode)
 {
   int last_regno = regno + rs6000_hard_regno_nregs[mode][regno] - 1;
+  machine_mode orig_mode = mode;
 
   if (COMPLEX_MODE_P (mode))
 mode = GET_MODE_INNER (mode);
@@ -1929,8 +1930,21 @@ rs6000_hard_regno_mode_ok_uncached (int regno, 
machine_mode mode)
   if (CR_REGNO_P (regno))
 return GET_MODE_CLASS (mode) == MODE_CC;
 
-  if (CA_REGNO_P (regno))
-return mode == Pmode || mode == SImode;
+  /* Limit SPR registers to integer modes that can fit in a single register.
+ Do not allow complex modes or modes that need sign/zero extension.  */
+  switch (regno)
+{
+case LR_REGNO:
+case CTR_REGNO:
+case TAR_REGNO:
+case VRSAVE_REGNO:
+case VSCR_REGNO:
+case CA_REGNO:
+  return (orig_mode == Pmode || orig_mode == SImode);
+
+default:
+  break;
+}
 
   /* AltiVec only in AldyVec registers.  */
   if (ALTIVEC_REGNO_P (regno))


[gcc r15-329] c++: #pragma doesn't disable -Wunused-label [PR113582]

2024-05-08 Thread Marek Polacek via Gcc-cvs
https://gcc.gnu.org/g:d9318caed3bbff8136d13e00dcfc020a59d10f78

commit r15-329-gd9318caed3bbff8136d13e00dcfc020a59d10f78
Author: Marek Polacek 
Date:   Wed Jan 24 18:06:48 2024 -0500

c++: #pragma doesn't disable -Wunused-label [PR113582]

The PR complains that

  void do_something(){
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-label"
start:;
#pragma GCC diagnostic pop
  } #1

doesn't work.  That's because we warn_for_unused_label only while we're
in finish_function, meaning we're at #1 where we're outside the #pragma
region.  We can use suppress_warning + warning_suppressed_p to fix this.

Note that I'm not using TREE_USED.  Propagating it in tsubst_stmt/LABEL_EXPR
from decl to label would mean that we don't warn in do_something2, but
I think we want the warning there: we're in a template and the goto is
a discarded statement.

PR c++/113582

gcc/c-family/ChangeLog:

* c-warn.cc (warn_for_unused_label): Don't warn if -Wunused-label 
has
been suppressed for the label.

gcc/cp/ChangeLog:

* parser.cc (cp_parser_label_for_labeled_statement): 
suppress_warning
if it's not enabled at input_location.
* pt.cc (tsubst_stmt): Call copy_warning.

gcc/testsuite/ChangeLog:

* g++.dg/warn/Wunused-label-4.C: New test.

Diff:
---
 gcc/c-family/c-warn.cc  |  4 +++-
 gcc/cp/parser.cc|  6 +-
 gcc/cp/pt.cc|  9 +
 gcc/testsuite/g++.dg/warn/Wunused-label-4.C | 29 +
 4 files changed, 42 insertions(+), 6 deletions(-)

diff --git a/gcc/c-family/c-warn.cc b/gcc/c-family/c-warn.cc
index bff87be05ae3..5b2d6805c790 100644
--- a/gcc/c-family/c-warn.cc
+++ b/gcc/c-family/c-warn.cc
@@ -2185,7 +2185,9 @@ warn_for_unused_label (tree label)
 {
   if (!TREE_USED (label))
 {
-  if (DECL_INITIAL (label))
+  if (warning_suppressed_p (label, OPT_Wunused_label))
+   /* Don't warn.  */;
+  else if (DECL_INITIAL (label))
warning (OPT_Wunused_label, "label %q+D defined but not used", label);
   else
warning (OPT_Wunused_label, "label %q+D declared but not defined", 
label);
diff --git a/gcc/cp/parser.cc b/gcc/cp/parser.cc
index c4191200291d..7306ce9a8a8b 100644
--- a/gcc/cp/parser.cc
+++ b/gcc/cp/parser.cc
@@ -13108,7 +13108,11 @@ cp_parser_label_for_labeled_statement (cp_parser* 
parser, tree attributes)
   /* Anything else must be an ordinary label.  */
   label = finish_label_stmt (cp_parser_identifier (parser));
   if (label && TREE_CODE (label) == LABEL_DECL)
-   FALLTHROUGH_LABEL_P (label) = fallthrough_p;
+   {
+ FALLTHROUGH_LABEL_P (label) = fallthrough_p;
+ if (!warning_enabled_at (input_location, OPT_Wunused_label))
+   suppress_warning (label, OPT_Wunused_label);
+   }
   break;
 }
 
diff --git a/gcc/cp/pt.cc b/gcc/cp/pt.cc
index 1816bfd1f401..8787eabb9fdb 100644
--- a/gcc/cp/pt.cc
+++ b/gcc/cp/pt.cc
@@ -18837,11 +18837,12 @@ tsubst_stmt (tree t, tree args, tsubst_flags_t 
complain, tree in_decl)
 case LABEL_EXPR:
   {
tree decl = LABEL_EXPR_LABEL (t);
-   tree label;
-
-   label = finish_label_stmt (DECL_NAME (decl));
+   tree label = finish_label_stmt (DECL_NAME (decl));
if (TREE_CODE (label) == LABEL_DECL)
- FALLTHROUGH_LABEL_P (label) = FALLTHROUGH_LABEL_P (decl);
+ {
+   FALLTHROUGH_LABEL_P (label) = FALLTHROUGH_LABEL_P (decl);
+   copy_warning (label, decl);
+ }
if (DECL_ATTRIBUTES (decl) != NULL_TREE)
  cplus_decl_attributes (&label, DECL_ATTRIBUTES (decl), 0);
   }
diff --git a/gcc/testsuite/g++.dg/warn/Wunused-label-4.C 
b/gcc/testsuite/g++.dg/warn/Wunused-label-4.C
new file mode 100644
index ..d194f043d215
--- /dev/null
+++ b/gcc/testsuite/g++.dg/warn/Wunused-label-4.C
@@ -0,0 +1,29 @@
+// PR c++/113582
+// { dg-do compile { target c++17 } }
+// { dg-options "-Wunused-label" }
+
+template void
+do_something ()
+{
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-label"
+start:
+  if constexpr(B)
+goto start;
+#pragma GCC diagnostic pop
+}
+
+template void
+do_something2 ()
+{
+start: // { dg-warning "defined but not used" }
+  if constexpr(B)
+goto start;
+}
+
+void
+g ()
+{
+  do_something<0>();
+  do_something2<0>();
+}


[gcc r11-11420] Fold: Fix up merge_truthop_with_opposite_arm for NaNs [PR95351]

2024-05-08 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:6c00c3245e688d00dae3e928f0d03f530640caae

commit r11-11420-g6c00c3245e688d00dae3e928f0d03f530640caae
Author: Andrew Pinski 
Date:   Sun Mar 10 22:17:09 2024 +

Fold: Fix up merge_truthop_with_opposite_arm for NaNs [PR95351]

The problem here is that merge_truthop_with_opposite_arm would
use the type of the result of the comparison rather than the operands
of the comparison to figure out if we are honoring NaNs.
This fixes that oversight and now we get the correct results in this
case.

Committed as obvious after a bootstrap/test on x86_64-linux-gnu.

PR middle-end/95351

gcc/ChangeLog:

* fold-const.c (merge_truthop_with_opposite_arm): Use
the type of the operands of the comparison and not the type
of the comparison.

gcc/testsuite/ChangeLog:

* gcc.dg/float_opposite_arm-1.c: New test.

Signed-off-by: Andrew Pinski 
(cherry picked from commit 31ce2e993d09dcad1ce139a2848a28de5931056d)

Diff:
---
 gcc/fold-const.c|  3 ++-
 gcc/testsuite/gcc.dg/float_opposite_arm-1.c | 17 +
 2 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/gcc/fold-const.c b/gcc/fold-const.c
index f4fd980dbbc8..97f77da5b93f 100644
--- a/gcc/fold-const.c
+++ b/gcc/fold-const.c
@@ -6171,7 +6171,6 @@ static tree
 merge_truthop_with_opposite_arm (location_t loc, tree op, tree cmpop,
 bool rhs_only)
 {
-  tree type = TREE_TYPE (cmpop);
   enum tree_code code = TREE_CODE (cmpop);
   enum tree_code truthop_code = TREE_CODE (op);
   tree lhs = TREE_OPERAND (op, 0);
@@ -6187,6 +6186,8 @@ merge_truthop_with_opposite_arm (location_t loc, tree op, 
tree cmpop,
   if (TREE_CODE_CLASS (code) != tcc_comparison)
 return NULL_TREE;
 
+  tree type = TREE_TYPE (TREE_OPERAND (cmpop, 0));
+
   if (rhs_code == truthop_code)
 {
   tree newrhs = merge_truthop_with_opposite_arm (loc, rhs, cmpop, 
rhs_only);
diff --git a/gcc/testsuite/gcc.dg/float_opposite_arm-1.c 
b/gcc/testsuite/gcc.dg/float_opposite_arm-1.c
new file mode 100644
index ..d2dbff350663
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/float_opposite_arm-1.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O1 -fdump-tree-original -fdump-tree-optimized" } */
+/* { dg-add-options ieee } */
+/* PR middle-end/95351 */
+
+int Foo(double possiblyNAN, double b, double c)
+{
+return (possiblyNAN <= 2.0) || ((possiblyNAN  > 2.0) && (b > c));
+}
+
+/* Make sure we don't remove either >/<=  */
+
+/* { dg-final { scan-tree-dump "possiblyNAN > 2.0e.0" "original" } } */
+/* { dg-final { scan-tree-dump "possiblyNAN_\[0-9\]+.D. > 2.0e.0" "optimized" 
} } */
+
+/* { dg-final { scan-tree-dump "possiblyNAN <= 2.0e.0" "original" } } */
+/* { dg-final { scan-tree-dump "possiblyNAN_\[0-9\]+.D. <= 2.0e.0" "optimized" 
} } */


[gcc r12-10430] Fold: Fix up merge_truthop_with_opposite_arm for NaNs [PR95351]

2024-05-08 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:d88fe8210e4edc2f4ddf722ba788924452c6f6a0

commit r12-10430-gd88fe8210e4edc2f4ddf722ba788924452c6f6a0
Author: Andrew Pinski 
Date:   Sun Mar 10 22:17:09 2024 +

Fold: Fix up merge_truthop_with_opposite_arm for NaNs [PR95351]

The problem here is that merge_truthop_with_opposite_arm would
use the type of the result of the comparison rather than the operands
of the comparison to figure out if we are honoring NaNs.
This fixes that oversight and now we get the correct results in this
case.

Committed as obvious after a bootstrap/test on x86_64-linux-gnu.

PR middle-end/95351

gcc/ChangeLog:

* fold-const.cc (merge_truthop_with_opposite_arm): Use
the type of the operands of the comparison and not the type
of the comparison.

gcc/testsuite/ChangeLog:

* gcc.dg/float_opposite_arm-1.c: New test.

Signed-off-by: Andrew Pinski 
(cherry picked from commit 31ce2e993d09dcad1ce139a2848a28de5931056d)

Diff:
---
 gcc/fold-const.cc   |  3 ++-
 gcc/testsuite/gcc.dg/float_opposite_arm-1.c | 17 +
 2 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/gcc/fold-const.cc b/gcc/fold-const.cc
index cd410e50d779..da96ed34a4c3 100644
--- a/gcc/fold-const.cc
+++ b/gcc/fold-const.cc
@@ -6188,7 +6188,6 @@ static tree
 merge_truthop_with_opposite_arm (location_t loc, tree op, tree cmpop,
 bool rhs_only)
 {
-  tree type = TREE_TYPE (cmpop);
   enum tree_code code = TREE_CODE (cmpop);
   enum tree_code truthop_code = TREE_CODE (op);
   tree lhs = TREE_OPERAND (op, 0);
@@ -6204,6 +6203,8 @@ merge_truthop_with_opposite_arm (location_t loc, tree op, 
tree cmpop,
   if (TREE_CODE_CLASS (code) != tcc_comparison)
 return NULL_TREE;
 
+  tree type = TREE_TYPE (TREE_OPERAND (cmpop, 0));
+
   if (rhs_code == truthop_code)
 {
   tree newrhs = merge_truthop_with_opposite_arm (loc, rhs, cmpop, 
rhs_only);
diff --git a/gcc/testsuite/gcc.dg/float_opposite_arm-1.c 
b/gcc/testsuite/gcc.dg/float_opposite_arm-1.c
new file mode 100644
index ..d2dbff350663
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/float_opposite_arm-1.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O1 -fdump-tree-original -fdump-tree-optimized" } */
+/* { dg-add-options ieee } */
+/* PR middle-end/95351 */
+
+int Foo(double possiblyNAN, double b, double c)
+{
+return (possiblyNAN <= 2.0) || ((possiblyNAN  > 2.0) && (b > c));
+}
+
+/* Make sure we don't remove either >/<=  */
+
+/* { dg-final { scan-tree-dump "possiblyNAN > 2.0e.0" "original" } } */
+/* { dg-final { scan-tree-dump "possiblyNAN_\[0-9\]+.D. > 2.0e.0" "optimized" 
} } */
+
+/* { dg-final { scan-tree-dump "possiblyNAN <= 2.0e.0" "original" } } */
+/* { dg-final { scan-tree-dump "possiblyNAN_\[0-9\]+.D. <= 2.0e.0" "optimized" 
} } */


[gcc r12-10427] libstdc++: Add include guard to simd-internal header

2024-05-08 Thread Matthias Kretz via Libstdc++-cvs
https://gcc.gnu.org/g:c06311303bb61ce530f6eb05c6e838bffe2a6ae4

commit r12-10427-gc06311303bb61ce530f6eb05c6e838bffe2a6ae4
Author: Matthias Kretz 
Date:   Wed Apr 17 10:12:42 2024 +0200

libstdc++: Add include guard to simd-internal header

Signed-off-by: Matthias Kretz 

libstdc++-v3/ChangeLog:

* include/experimental/bits/numeric_traits.h: Add include guard.

(cherry picked from commit 3cfe94ad28102618c14a91c0a83d9e5cc7df69d7)

Diff:
---
 libstdc++-v3/include/experimental/bits/numeric_traits.h | 4 
 1 file changed, 4 insertions(+)

diff --git a/libstdc++-v3/include/experimental/bits/numeric_traits.h 
b/libstdc++-v3/include/experimental/bits/numeric_traits.h
index 84e97da7d6b3..85f189e276f9 100644
--- a/libstdc++-v3/include/experimental/bits/numeric_traits.h
+++ b/libstdc++-v3/include/experimental/bits/numeric_traits.h
@@ -22,6 +22,9 @@
 // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 // .
 
+#ifndef _GLIBCXX_EXPERIMENTAL_BITS_NUMERIC_TRAITS_H
+#define _GLIBCXX_EXPERIMENTAL_BITS_NUMERIC_TRAITS_H
+
 #include 
 
 namespace std {
@@ -565,3 +568,4 @@ template <>
 #endif // __FINITE_MATH_ONLY__
 
 } // namespace std
+#endif  // _GLIBCXX_EXPERIMENTAL_BITS_NUMERIC_TRAITS_H


[gcc r12-10429] libstdc++: Fix conversion of simd to vector builtin

2024-05-08 Thread Matthias Kretz via Gcc-cvs
https://gcc.gnu.org/g:0ab30fb865e6465156f7db10506b56ef2df19648

commit r12-10429-g0ab30fb865e6465156f7db10506b56ef2df19648
Author: Matthias Kretz 
Date:   Mon Apr 22 16:12:34 2024 +0200

libstdc++: Fix conversion of simd to vector builtin

Signed-off-by: Matthias Kretz 

libstdc++-v3/ChangeLog:

PR libstdc++/114803
* include/experimental/bits/simd_builtin.h
(_SimdBase2::operator __vector_type_t): There is no __builtin()
function in _SimdWrapper, instead use its conversion operator.
* testsuite/experimental/simd/pr114803_vecbuiltin_cvt.cc: New
test.

(cherry picked from commit 7ef139146a8923a8719873ca3fdae175668e8d63)

Diff:
---
 .../include/experimental/bits/simd_builtin.h   |   2 +-
 .../experimental/simd/pr114803_vecbuiltin_cvt.cc   | 105 +
 2 files changed, 106 insertions(+), 1 deletion(-)

diff --git a/libstdc++-v3/include/experimental/bits/simd_builtin.h 
b/libstdc++-v3/include/experimental/bits/simd_builtin.h
index fc8937a6e024..57a5640643da 100644
--- a/libstdc++-v3/include/experimental/bits/simd_builtin.h
+++ b/libstdc++-v3/include/experimental/bits/simd_builtin.h
@@ -841,7 +841,7 @@ template 
 
   _GLIBCXX_SIMD_ALWAYS_INLINE explicit
   operator __vector_type_t<_Tp, _Np>() const
-  { return static_cast*>(this)->_M_data.__builtin(); 
}
+  { return __data(*static_cast*>(this)); }
 };
 
 struct _SimdBase1
diff --git 
a/libstdc++-v3/testsuite/experimental/simd/pr114803_vecbuiltin_cvt.cc 
b/libstdc++-v3/testsuite/experimental/simd/pr114803_vecbuiltin_cvt.cc
new file mode 100644
index ..103dd19394c1
--- /dev/null
+++ b/libstdc++-v3/testsuite/experimental/simd/pr114803_vecbuiltin_cvt.cc
@@ -0,0 +1,105 @@
+// { dg-options "-std=gnu++17" }
+// { dg-do compile { target c++17 } }
+
+#include 
+
+template 
+  void
+  maybe_test()
+  {
+using V = std::experimental::simd>;
+if constexpr (std::is_destructible_v)
+  {
+   using V2 [[gnu::vector_size(16)]] = T;
+   V x = {};
+   V2 x2 = static_cast(x);
+   x = static_cast(x2);
+   for (unsigned i = 0; i < V::size(); ++i)
+ {
+   if (x2[i] != 0)
+ __builtin_abort();
+ }
+#ifdef __SSE__
+   if constexpr (std::is_same_v)
+ x = static_cast(static_cast<__m128>(x));
+   else if constexpr (std::is_same_v)
+ x = static_cast(static_cast<__m128d>(x));
+   else if constexpr (std::is_integral_v)
+ x = static_cast(static_cast<__m128i>(x));
+#elif __ALTIVEC__
+   if constexpr (std::is_same_v)
+ x = static_cast(static_cast<__vector float>(x));
+#ifdef __VSX__
+   else if constexpr (std::is_same_v)
+ x = static_cast(static_cast<__vector double>(x));
+#endif
+   else if constexpr (std::is_integral_v && sizeof(T) == sizeof(signed 
char)
+&& std::is_signed_v)
+ x = static_cast(static_cast<__vector signed char>(x));
+   else if constexpr (std::is_integral_v && sizeof(T) == sizeof(signed 
char))
+ x = static_cast(static_cast<__vector unsigned char>(x));
+   else if constexpr (std::is_integral_v && sizeof(T) == sizeof(short)
+&& std::is_signed_v)
+ x = static_cast(static_cast<__vector signed short>(x));
+   else if constexpr (std::is_integral_v && sizeof(T) == sizeof(short))
+ x = static_cast(static_cast<__vector unsigned short>(x));
+   else if constexpr (std::is_integral_v && sizeof(T) == sizeof(int)
+&& std::is_signed_v)
+ x = static_cast(static_cast<__vector signed int>(x));
+   else if constexpr (std::is_integral_v && sizeof(T) == sizeof(int))
+ x = static_cast(static_cast<__vector unsigned int>(x));
+#ifdef __VSX__
+   else if constexpr (std::is_integral_v && sizeof(T) == sizeof(long 
long)
+&& std::is_signed_v)
+ x = static_cast(static_cast<__vector signed long long>(x));
+   else if constexpr (std::is_integral_v && sizeof(T) == sizeof(long 
long))
+ x = static_cast(static_cast<__vector unsigned long long>(x));
+#endif
+#elif __ARM_NEON
+   if constexpr (std::is_same_v)
+ x = static_cast(static_cast(x));
+#ifdef __aarch64__
+   else if constexpr (std::is_same_v)
+ x = static_cast(static_cast(x));
+#endif
+   else if constexpr (std::is_integral_v && sizeof(T) == 1 && 
std::is_signed_v)
+ x = static_cast(static_cast(x));
+   else if constexpr (std::is_integral_v && sizeof(T) == 1)
+ x = static_cast(static_cast(x));
+   else if constexpr (std::is_integral_v && sizeof(T) == 2 && 
std::is_signed_v)
+ x = static_cast(static_cast(x));
+   else if constexpr (std::is_integral_v && sizeof(T) == 2)
+ x = static_cast(static_cast(x));
+   else if constexpr (std::is_integral_v && sizeof(T) == 4 && 
std::is_signed_v)
+ x

[gcc r12-10428] libstdc++: Silence irrelevant warnings in

2024-05-08 Thread Matthias Kretz via Gcc-cvs
https://gcc.gnu.org/g:79aa69663cfbac30b76c85a8e4856b0865749ef7

commit r12-10428-g79aa69663cfbac30b76c85a8e4856b0865749ef7
Author: Matthias Kretz 
Date:   Wed Apr 17 10:35:47 2024 +0200

libstdc++: Silence irrelevant warnings in 

Avoid
-Wnarrowing in C code;
-Wtautological-compare in unconditional static_assert (necessary for
faking a dependency on a template parameter)

Signed-off-by: Matthias Kretz 

libstdc++-v3/ChangeLog:

* include/experimental/bits/simd.h: Ignore -Wnarrowing for
arm_neon.h.
(__int_for_sizeof): Replace tautological compare with checking
for invalid template parameter value.
* include/experimental/bits/simd_builtin.h (__extract_part):
Remove tautological compare by combining two static_assert.

(cherry picked from commit e7a3ad29c9c832b6ae999cbfb0af89e121959030)

Diff:
---
 libstdc++-v3/include/experimental/bits/simd.h | 8 +++-
 libstdc++-v3/include/experimental/bits/simd_builtin.h | 3 +--
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/libstdc++-v3/include/experimental/bits/simd.h 
b/libstdc++-v3/include/experimental/bits/simd.h
index 6f7418a840ea..439545869be8 100644
--- a/libstdc++-v3/include/experimental/bits/simd.h
+++ b/libstdc++-v3/include/experimental/bits/simd.h
@@ -43,7 +43,12 @@
 #if _GLIBCXX_SIMD_X86INTRIN
 #include 
 #elif _GLIBCXX_SIMD_HAVE_NEON
+#pragma GCC diagnostic push
+// narrowing conversion of '__a' from 'uint64_t' {aka 'long long unsigned 
int'} to
+//   'int64x1_t' {aka 'long long int'} [-Wnarrowing]
+#pragma GCC diagnostic ignored "-Wnarrowing"
 #include 
+#pragma GCC diagnostic pop
 #endif
 
 /** @ingroup ts_simd
@@ -576,6 +581,7 @@ template 
   constexpr auto
   __int_for_sizeof()
   {
+static_assert(_Bytes > 0);
 if constexpr (_Bytes == sizeof(int))
   return int();
   #ifdef __clang__
@@ -641,7 +647,7 @@ template 
return _Ip{};
   }
 else
-  static_assert(_Bytes != _Bytes, "this should be unreachable");
+  static_assert(_Bytes == 0, "this should be unreachable");
   }
 #pragma GCC diagnostic pop
 
diff --git a/libstdc++-v3/include/experimental/bits/simd_builtin.h 
b/libstdc++-v3/include/experimental/bits/simd_builtin.h
index e060816c6eac..fc8937a6e024 100644
--- a/libstdc++-v3/include/experimental/bits/simd_builtin.h
+++ b/libstdc++-v3/include/experimental/bits/simd_builtin.h
@@ -278,8 +278,7 @@ template 
   __extract_part(const _SimdWrapper __x)
   {
 static_assert(_Combine == 1, "_Combine != 1 not implemented");
-static_assert(__have_avx512f && _Np == _Np);
-static_assert(_Total >= 2 && _Index + _Combine <= _Total && _Index >= 0);
+static_assert(__have_avx512f && _Total >= 2 && _Index + _Combine <= _Total 
&& _Index >= 0);
 return __x._M_data >> (_Index * _Np / _Total);
   }


[gcc r12-10426] libstdc++: Avoid ill-formed types on ARM

2024-05-08 Thread Matthias Kretz via Gcc-cvs
https://gcc.gnu.org/g:b3097e01cd5d32e9825b7b87dff12b3247bd9819

commit r12-10426-gb3097e01cd5d32e9825b7b87dff12b3247bd9819
Author: Matthias Kretz 
Date:   Wed Apr 17 09:11:25 2024 +0200

libstdc++: Avoid ill-formed types on ARM

This resolves failing tests in check-simd.

Signed-off-by: Matthias Kretz 

libstdc++-v3/ChangeLog:

PR libstdc++/114750
* include/experimental/bits/simd_builtin.h
(_SimdImplBuiltin::_S_load, _S_store): Fall back to copying
scalars if the memory type cannot be vectorized for the target.

(cherry picked from commit 0fc7f3c6adc8543f55ec35b309016d9d9c4ddd35)

Diff:
---
 libstdc++-v3/include/experimental/bits/simd_builtin.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libstdc++-v3/include/experimental/bits/simd_builtin.h 
b/libstdc++-v3/include/experimental/bits/simd_builtin.h
index 51034fec6931..e060816c6eac 100644
--- a/libstdc++-v3/include/experimental/bits/simd_builtin.h
+++ b/libstdc++-v3/include/experimental/bits/simd_builtin.h
@@ -1464,7 +1464,7 @@ template 
   [&](auto __i) constexpr {
 return static_cast<_Tp>(__i < _Np ? __mem[__i] : 0);
   });
-   else if constexpr (sizeof(_Up) > 8)
+   else if constexpr (sizeof(_Up) > 8 or __vectorized_sizeof<_Up>() <= 
sizeof(_Up))
  return __generate_vector<_Tp, _SimdMember<_Tp>::_S_full_size>(
   [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
 return static_cast<_Tp>(__i < _Np ? __mem[__i] : 0);
@@ -1536,7 +1536,7 @@ template 
for (size_t __i = 0; __i < _Np; ++__i)
  __mem[__i] = __v[__i];
  }
-   else if constexpr (sizeof(_Up) > 8)
+   else if constexpr (sizeof(_Up) > 8 or __vectorized_sizeof<_Up>() <= 
sizeof(_Up))
  __execute_n_times<_Np>([&](auto __i) constexpr 
_GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
__mem[__i] = __v[__i];
  });


[gcc r12-10424] libstdc++: Fix call signature of builtins from masked ++/--

2024-05-08 Thread Matthias Kretz via Gcc-cvs
https://gcc.gnu.org/g:7e40721ebd07317aa5e8704549401ba4bcde2e2f

commit r12-10424-g7e40721ebd07317aa5e8704549401ba4bcde2e2f
Author: Matthias Kretz 
Date:   Wed Mar 27 08:49:43 2024 +0100

libstdc++: Fix call signature of builtins from masked ++/--

This resolves failures in the "expensive" where-* test of check-simd
when targeting AVX-512.

Signed-off-by: Matthias Kretz 

libstdc++-v3/ChangeLog:

* include/experimental/bits/simd_x86.h (_S_masked_unary): Call
the 4- and 8-byte variants of __builtin_ia32_subp[ds] without
rounding direction argument.

(cherry picked from commit 0ac2c0f0687b321ab54de271d788b4e0a287b4e2)

Diff:
---
 libstdc++-v3/include/experimental/bits/simd_x86.h | 12 +---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/libstdc++-v3/include/experimental/bits/simd_x86.h 
b/libstdc++-v3/include/experimental/bits/simd_x86.h
index 03febe7044c9..90b7fca26e2b 100644
--- a/libstdc++-v3/include/experimental/bits/simd_x86.h
+++ b/libstdc++-v3/include/experimental/bits/simd_x86.h
@@ -3538,17 +3538,23 @@ template 
  }
else
  {
-#define _GLIBCXX_SIMD_MASK_SUB(_Sizeof, _Width, _Instr)
\
+#define _GLIBCXX_SIMD_MASK_SUB_512(_Sizeof, _Width, _Instr)
\
   if constexpr (sizeof(_Tp) == _Sizeof && sizeof(__v) == _Width)   
\
 return __builtin_ia32_##_Instr##_mask( 
\
 __v._M_data, __vector_broadcast<_Np>(_Tp(__pm_one)), __v._M_data, \
 __k._M_data, _MM_FROUND_CUR_DIRECTION)
-   _GLIBCXX_SIMD_MASK_SUB(4, 64, subps512);
+#define _GLIBCXX_SIMD_MASK_SUB(_Sizeof, _Width, _Instr)
\
+  if constexpr (sizeof(_Tp) == _Sizeof && sizeof(__v) == _Width)   
\
+return __builtin_ia32_##_Instr##_mask( 
\
+__v._M_data, __vector_broadcast<_Np>(_Tp(__pm_one)), __v._M_data, \
+__k._M_data)
+   _GLIBCXX_SIMD_MASK_SUB_512(4, 64, subps512);
_GLIBCXX_SIMD_MASK_SUB(4, 32, subps256);
_GLIBCXX_SIMD_MASK_SUB(4, 16, subps128);
-   _GLIBCXX_SIMD_MASK_SUB(8, 64, subpd512);
+   _GLIBCXX_SIMD_MASK_SUB_512(8, 64, subpd512);
_GLIBCXX_SIMD_MASK_SUB(8, 32, subpd256);
_GLIBCXX_SIMD_MASK_SUB(8, 16, subpd128);
+#undef _GLIBCXX_SIMD_MASK_SUB_512
 #undef _GLIBCXX_SIMD_MASK_SUB
  }
 #endif // __clang__


[gcc r12-10425] libstdc++: Add masked ++/-- implementation for sizeof < 16

2024-05-08 Thread Matthias Kretz via Libstdc++-cvs
https://gcc.gnu.org/g:5cf14c6f219f2bb18bbbe7964576eeebb8cee84b

commit r12-10425-g5cf14c6f219f2bb18bbbe7964576eeebb8cee84b
Author: Matthias Kretz 
Date:   Wed Mar 27 13:41:25 2024 +0100

libstdc++: Add masked ++/-- implementation for sizeof < 16

This resolves further failures (-Wreturn-type warnings) and test
failures for where-* tests targeting AVX-512.

Signed-off-by: Matthias Kretz 

libstdc++-v3/ChangeLog:

* include/experimental/bits/simd_x86.h (_S_masked_unary):
Cast inputs < 16 bytes to 16 byte vectors before calling the
right subtraction builtin. Before returning, truncate to the
return vector type.

(cherry picked from commit a6c630c314b099f64d79055964d88b257459cf13)

Diff:
---
 libstdc++-v3/include/experimental/bits/simd_x86.h | 24 +--
 1 file changed, 14 insertions(+), 10 deletions(-)

diff --git a/libstdc++-v3/include/experimental/bits/simd_x86.h 
b/libstdc++-v3/include/experimental/bits/simd_x86.h
index 90b7fca26e2b..7cda7f7d0e07 100644
--- a/libstdc++-v3/include/experimental/bits/simd_x86.h
+++ b/libstdc++-v3/include/experimental/bits/simd_x86.h
@@ -3508,6 +3508,9 @@ template 
 #ifdef __clang__
return __movm<_Np, _Tp>(__k._M_data) ? __v._M_data - __pm_one : 
__v._M_data;
 #else // __clang__
+   using _TV = __vector_type_t<_Tp, _Np>;
+   constexpr size_t __bytes = sizeof(__v) < 16 ? 16 : sizeof(__v);
+   constexpr size_t __width = __bytes / sizeof(_Tp);
if constexpr (is_integral_v<_Tp>)
  {
constexpr bool __lp64 = sizeof(long) == sizeof(long long);
@@ -3517,11 +3520,11 @@ template 
  std::conditional_t<__lp64, long long, int>,
  std::conditional_t<
std::is_same_v<_Ip, signed char>, char, _Ip>>;
-   const auto __value = __vector_bitcast<_Up>(__v._M_data);
+   const auto __value = __intrin_bitcast<__vector_type_t<_Up, 
__width>>(__v._M_data);
 #define _GLIBCXX_SIMD_MASK_SUB(_Sizeof, _Width, _Instr)
\
-  if constexpr (sizeof(_Tp) == _Sizeof && sizeof(__v) == _Width)   
\
-return __vector_bitcast<_Tp>(__builtin_ia32_##_Instr##_mask(__value,   
\
-__vector_broadcast<_Np>(_Up(__pm_one)), __value, __k._M_data))
+  if constexpr (sizeof(_Tp) == _Sizeof && sizeof(__value) == _Width)   
\
+return __intrin_bitcast<_TV>(__builtin_ia32_##_Instr##_mask(__value,   
\
+__vector_broadcast<__width>(_Up(__pm_one)), __value, __k._M_data))
_GLIBCXX_SIMD_MASK_SUB(1, 64, psubb512);
_GLIBCXX_SIMD_MASK_SUB(1, 32, psubb256);
_GLIBCXX_SIMD_MASK_SUB(1, 16, psubb128);
@@ -3538,16 +3541,17 @@ template 
  }
else
  {
+   const auto __value = __intrin_bitcast<__vector_type_t<_Tp, 
__width>>(__v._M_data);
 #define _GLIBCXX_SIMD_MASK_SUB_512(_Sizeof, _Width, _Instr)
\
-  if constexpr (sizeof(_Tp) == _Sizeof && sizeof(__v) == _Width)   
\
+  if constexpr (sizeof(_Tp) == _Sizeof && sizeof(__value) == _Width)   
\
 return __builtin_ia32_##_Instr##_mask( 
\
-__v._M_data, __vector_broadcast<_Np>(_Tp(__pm_one)), __v._M_data, \
+__value, __vector_broadcast<__width>(_Tp(__pm_one)), __value, \
 __k._M_data, _MM_FROUND_CUR_DIRECTION)
 #define _GLIBCXX_SIMD_MASK_SUB(_Sizeof, _Width, _Instr)
\
-  if constexpr (sizeof(_Tp) == _Sizeof && sizeof(__v) == _Width)   
\
-return __builtin_ia32_##_Instr##_mask( 
\
-__v._M_data, __vector_broadcast<_Np>(_Tp(__pm_one)), __v._M_data, \
-__k._M_data)
+  if constexpr (sizeof(_Tp) == _Sizeof && sizeof(__value) == _Width)   
\
+return __intrin_bitcast<_TV>(__builtin_ia32_##_Instr##_mask(   
\
+__value, __vector_broadcast<__width>(_Tp(__pm_one)), __value, \
+__k._M_data))
_GLIBCXX_SIMD_MASK_SUB_512(4, 64, subps512);
_GLIBCXX_SIMD_MASK_SUB(4, 32, subps256);
_GLIBCXX_SIMD_MASK_SUB(4, 16, subps128);


[gcc r12-10423] libstdc++: Avoid vector casts while still avoiding PR90424

2024-05-08 Thread Matthias Kretz via Gcc-cvs
https://gcc.gnu.org/g:44df51e5826747d089a08fa1a1378454a8d2d0de

commit r12-10423-g44df51e5826747d089a08fa1a1378454a8d2d0de
Author: Matthias Kretz 
Date:   Fri Jun 2 21:33:04 2023 +0200

libstdc++: Avoid vector casts while still avoiding PR90424

Signed-off-by: Matthias Kretz 

libstdc++-v3/ChangeLog:

PR libstdc++/109822
* include/experimental/bits/simd_builtin.h (_S_store): Rewrite
to avoid casts to other vector types. Implement store as
succession of power-of-2 sized memcpy to avoid PR90424.

(cherry picked from commit 9165ede56ababd6471e7a2ce4eab30f3d5129e14)

Diff:
---
 .../include/experimental/bits/simd_builtin.h   | 40 --
 1 file changed, 15 insertions(+), 25 deletions(-)

diff --git a/libstdc++-v3/include/experimental/bits/simd_builtin.h 
b/libstdc++-v3/include/experimental/bits/simd_builtin.h
index 8923a82da39e..51034fec6931 100644
--- a/libstdc++-v3/include/experimental/bits/simd_builtin.h
+++ b/libstdc++-v3/include/experimental/bits/simd_builtin.h
@@ -1295,6 +1295,18 @@ struct _CommonImplBuiltin
 
   // }}}
   // _S_store {{{
+  template 
+_GLIBCXX_SIMD_INTRINSIC static void
+_S_memcpy(char* __dst, const char* __src)
+{
+  if constexpr (_Bytes > 0)
+   {
+ constexpr size_t _Ns = std::__bit_floor(_Bytes);
+ __builtin_memcpy(__dst, __src, _Ns);
+ _S_memcpy<_Bytes - _Ns>(__dst + _Ns, __src + _Ns);
+   }
+}
+
   template 
 _GLIBCXX_SIMD_INTRINSIC static void
 _S_store(_TV __x, void* __addr)
@@ -1302,33 +1314,11 @@ struct _CommonImplBuiltin
   constexpr size_t _Bytes = _ReqBytes == 0 ? sizeof(__x) : _ReqBytes;
   static_assert(sizeof(__x) >= _Bytes);
 
+#if !defined __clang__ && _GLIBCXX_SIMD_WORKAROUND_PR90424
   if constexpr (__is_vector_type_v<_TV>)
-   {
- using _Tp = typename _VectorTraits<_TV>::value_type;
- constexpr size_t _Np = _Bytes / sizeof(_Tp);
- static_assert(_Np * sizeof(_Tp) == _Bytes);
-
-#ifdef _GLIBCXX_SIMD_WORKAROUND_PR90424
- using _Up = conditional_t<
-   (is_integral_v<_Tp> || _Bytes < 4),
-   conditional_t<(sizeof(__x) > sizeof(long long)), long long, _Tp>,
-   float>;
- const auto __v = __vector_bitcast<_Up>(__x);
-#else // _GLIBCXX_SIMD_WORKAROUND_PR90424
- const __vector_type_t<_Tp, _Np> __v = __x;
-#endif // _GLIBCXX_SIMD_WORKAROUND_PR90424
-
- if constexpr ((_Bytes & (_Bytes - 1)) != 0)
-   {
- constexpr size_t _MoreBytes = std::__bit_ceil(_Bytes);
- alignas(decltype(__v)) char __tmp[_MoreBytes];
- __builtin_memcpy(__tmp, &__v, _MoreBytes);
- __builtin_memcpy(__addr, __tmp, _Bytes);
-   }
- else
-   __builtin_memcpy(__addr, &__v, _Bytes);
-   }
+   _S_memcpy<_Bytes>(reinterpret_cast(__addr), 
reinterpret_cast(&__x));
   else
+#endif // _GLIBCXX_SIMD_WORKAROUND_PR90424
__builtin_memcpy(__addr, &__x, _Bytes);
 }


[gcc r12-10422] libstdc++: Replace use of incorrect non-temporal store

2024-05-08 Thread Matthias Kretz via Gcc-cvs
https://gcc.gnu.org/g:e60ec9b0e02e8647c289d204342e992e91750011

commit r12-10422-ge60ec9b0e02e8647c289d204342e992e91750011
Author: Matthias Kretz 
Date:   Fri Jun 2 13:44:22 2023 +0200

libstdc++: Replace use of incorrect non-temporal store

The call to the base implementation sometimes didn't find a matching
signature because the _Abi parameter of _SimdImpl* was "wrong" after
conversion. It has to call into ::_SimdImpl instead of the
current ABI tag's _SimdImpl. This also reduces the number of possible
template instantiations.

Signed-off-by: Matthias Kretz 

libstdc++-v3/ChangeLog:

PR libstdc++/110054
* include/experimental/bits/simd_builtin.h (_S_masked_store):
Call into deduced ABI's SimdImpl after conversion.
* include/experimental/bits/simd_x86.h (_S_masked_store_nocvt):
Don't use _mm_maskmoveu_si128. Use the generic fall-back
implementation. Also fix masked stores without SSE2, which
were not doing anything before.

(cherry picked from commit 27e45b7597d6fb1a71927d658a0294797b720c0a)

Diff:
---
 .../include/experimental/bits/simd_builtin.h   |  6 ++--
 libstdc++-v3/include/experimental/bits/simd_x86.h  | 38 +++---
 2 files changed, 7 insertions(+), 37 deletions(-)

diff --git a/libstdc++-v3/include/experimental/bits/simd_builtin.h 
b/libstdc++-v3/include/experimental/bits/simd_builtin.h
index 9ea6259bfda2..8923a82da39e 100644
--- a/libstdc++-v3/include/experimental/bits/simd_builtin.h
+++ b/libstdc++-v3/include/experimental/bits/simd_builtin.h
@@ -1628,7 +1628,7 @@ template 
if constexpr (_UW_size == _TV_size) // one convert+store
  {
const _UW __converted = __convert<_UW>(__v);
-   _SuperImpl::_S_masked_store_nocvt(
+   _UAbi::_SimdImpl::_S_masked_store_nocvt(
  __converted, __mem,
  _UAbi::_MaskImpl::template _S_convert<
__int_for_sizeof_t<_Up>>(__k));
@@ -1643,7 +1643,7 @@ template 
const array<_UV, _NAllStores> __converted
  = __convert_all<_UV, _NAllStores>(__v);
__execute_n_times<_NFullStores>([&](auto __i) 
_GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
- _SuperImpl::_S_masked_store_nocvt(
+ _UAbi::_SimdImpl::_S_masked_store_nocvt(
_UW(__converted[__i]), __mem + __i * _UW_size,
_UAbi::_MaskImpl::template _S_convert<
  __int_for_sizeof_t<_Up>>(
@@ -1651,7 +1651,7 @@ template 
});
if constexpr (_NAllStores
  > _NFullStores) // one partial at the end
- _SuperImpl::_S_masked_store_nocvt(
+ _UAbi::_SimdImpl::_S_masked_store_nocvt(
_UW(__converted[_NFullStores]),
__mem + _NFullStores * _UW_size,
_UAbi::_MaskImpl::template _S_convert<
diff --git a/libstdc++-v3/include/experimental/bits/simd_x86.h 
b/libstdc++-v3/include/experimental/bits/simd_x86.h
index 557462893964..03febe7044c9 100644
--- a/libstdc++-v3/include/experimental/bits/simd_x86.h
+++ b/libstdc++-v3/include/experimental/bits/simd_x86.h
@@ -1106,31 +1106,6 @@ template 
else
  _mm512_mask_storeu_pd(__mem, __k, __vi);
  }
-#if 0 // with KNL either sizeof(_Tp) >= 4 or sizeof(_vi) <= 32
-  // with Skylake-AVX512, __have_avx512bw is true
- else if constexpr (__have_sse2)
-   {
- using _M   = __vector_type_t<_Tp, _Np>;
- using _MVT = _VectorTraits<_M>;
- _mm_maskmoveu_si128(__auto_bitcast(__extract<0, 4>(__v._M_data)),
- __auto_bitcast(_MaskImpl::template 
_S_convert<_Tp, _Np>(__k._M_data)),
- reinterpret_cast(__mem));
- _mm_maskmoveu_si128(__auto_bitcast(__extract<1, 4>(__v._M_data)),
- __auto_bitcast(_MaskImpl::template 
_S_convert<_Tp, _Np>(
-   __k._M_data >> 1 * _MVT::_S_full_size)),
- reinterpret_cast(__mem) + 1 * 16);
- _mm_maskmoveu_si128(__auto_bitcast(__extract<2, 4>(__v._M_data)),
- __auto_bitcast(_MaskImpl::template 
_S_convert<_Tp, _Np>(
-   __k._M_data >> 2 * _MVT::_S_full_size)),
- reinterpret_cast(__mem) + 2 * 16);
- if constexpr (_Np > 48 / sizeof(_Tp))
-   _mm_maskmoveu_si128(
- __auto_bitcast(__extract<3, 4>(__v._M_data)),
- __auto_bitcast(_MaskImpl::template _S_convert<_Tp, _Np>(
-   __k._M_data >> 3 * _MVT::_S_full_size)),
- reinterpret_cast(__mem) + 3 * 16);
-   }
-#endif
else
  __ass

[gcc r12-10421] libstdc++: Protect against macros

2024-05-08 Thread Matthias Kretz via Libstdc++-cvs
https://gcc.gnu.org/g:81830862ee431e93c4008c87da8ab352dc79770d

commit r12-10421-g81830862ee431e93c4008c87da8ab352dc79770d
Author: Matthias Kretz 
Date:   Fri Jun 2 21:21:36 2023 +0200

libstdc++: Protect against macros

Signed-off-by: Matthias Kretz 

libstdc++-v3/ChangeLog:

* include/experimental/bits/simd.h (__bit_cast): Use
__gnu__::__vector_size__ instead of gnu::vector_size.

(cherry picked from commit ce2188e4320cbb46d6246bd3f478ba20440c62f3)

Diff:
---
 libstdc++-v3/include/experimental/bits/simd.h | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/libstdc++-v3/include/experimental/bits/simd.h 
b/libstdc++-v3/include/experimental/bits/simd.h
index b18ce9d34a27..6f7418a840ea 100644
--- a/libstdc++-v3/include/experimental/bits/simd.h
+++ b/libstdc++-v3/include/experimental/bits/simd.h
@@ -1743,18 +1743,18 @@ template 
   return reinterpret_cast<_To>(__x);
 else if constexpr (__is_vector_type_v<_To> && __from_is_vectorizable)
   {
-   using _FV [[gnu::vector_size(sizeof(_From))]] = _From;
+   using _FV [[__gnu__::__vector_size__(sizeof(_From))]] = _From;
return reinterpret_cast<_To>(_FV{__x});
   }
 else if constexpr (__to_is_vectorizable && __from_is_vectorizable)
   {
-   using _TV [[gnu::vector_size(sizeof(_To))]] = _To;
-   using _FV [[gnu::vector_size(sizeof(_From))]] = _From;
+   using _TV [[__gnu__::__vector_size__(sizeof(_To))]] = _To;
+   using _FV [[__gnu__::__vector_size__(sizeof(_From))]] = _From;
return reinterpret_cast<_TV>(_FV{__x})[0];
   }
 else if constexpr (__to_is_vectorizable && __is_vector_type_v<_From>)
   {
-   using _TV [[gnu::vector_size(sizeof(_To))]] = _To;
+   using _TV [[__gnu__::__vector_size__(sizeof(_To))]] = _To;
return reinterpret_cast<_TV>(__x)[0];
   }
 else


[gcc r13-8728] Fix PR 110066: crash with -pg -static on riscv

2024-05-08 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:929b0fffe4d3d836e07e5a398a8e176e65f8b2c2

commit r13-8728-g929b0fffe4d3d836e07e5a398a8e176e65f8b2c2
Author: Andrew Pinski 
Date:   Sat Jul 22 08:52:42 2023 -0700

Fix PR 110066: crash with -pg -static on riscv

The problem -fasynchronous-unwind-tables is on by default for riscv linux
We need turn it off for crt*.o because it would make __EH_FRAME_BEGIN__ 
point
to .eh_frame data from crtbeginT.o instead of the user-defined object
during static linking.

This turns it off.

OK?

libgcc/ChangeLog:

* config.host (riscv*-*-linux*): Add t-crtstuff to tmake_file.
(riscv*-*-freebsd*): Likewise.
* config/riscv/t-crtstuff: New file.

(cherry picked from commit bbc1a102735c72e3c5a4dede8ab382813d12b058)

Diff:
---
 libgcc/config.host | 4 ++--
 libgcc/config/riscv/t-crtstuff | 5 +
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/libgcc/config.host b/libgcc/config.host
index 9d7212028d06..c94d69d84b7c 100644
--- a/libgcc/config.host
+++ b/libgcc/config.host
@@ -1304,12 +1304,12 @@ pru-*-*)
tm_file="$tm_file pru/pru-abi.h"
;;
 riscv*-*-linux*)
-   tmake_file="${tmake_file} riscv/t-softfp${host_address} t-softfp 
riscv/t-elf riscv/t-elf${host_address} t-slibgcc-libgcc"
+   tmake_file="${tmake_file} riscv/t-crtstuff 
riscv/t-softfp${host_address} t-softfp riscv/t-elf riscv/t-elf${host_address} 
t-slibgcc-libgcc"
extra_parts="$extra_parts crtbegin.o crtend.o crti.o crtn.o crtendS.o 
crtbeginT.o"
md_unwind_header=riscv/linux-unwind.h
;;
 riscv*-*-freebsd*)
-   tmake_file="${tmake_file} riscv/t-softfp${host_address} t-softfp 
riscv/t-elf riscv/t-elf${host_address} t-slibgcc-libgcc"
+   tmake_file="${tmake_file} riscv/t-crtstuff 
riscv/t-softfp${host_address} t-softfp riscv/t-elf riscv/t-elf${host_address} 
t-slibgcc-libgcc"
extra_parts="$extra_parts crtbegin.o crtend.o crti.o crtn.o crtendS.o 
crtbeginT.o"
;;
 riscv*-*-*)
diff --git a/libgcc/config/riscv/t-crtstuff b/libgcc/config/riscv/t-crtstuff
new file mode 100644
index ..685d11b3e66d
--- /dev/null
+++ b/libgcc/config/riscv/t-crtstuff
@@ -0,0 +1,5 @@
+# -fasynchronous-unwind-tables -funwind-tables is on by default for riscv linux
+# We turn it off for crt*.o because it would make __EH_FRAME_BEGIN__ point
+# to .eh_frame data from crtbeginT.o instead of the user-defined object
+# during static linking.
+CRTSTUFF_T_CFLAGS += -fno-asynchronous-unwind-tables -fno-unwind-tables


[gcc r15-328] match: `a CMP nonnegative ? a : ABS` simplified to just `ABS` [PR112392]

2024-05-08 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:5726de79e2154a16d8a045567d2cfad035f7ed19

commit r15-328-g5726de79e2154a16d8a045567d2cfad035f7ed19
Author: Andrew Pinski 
Date:   Mon May 6 23:53:41 2024 -0700

match: `a CMP nonnegative ? a : ABS` simplified to just `ABS` 
[PR112392]

We can optimize `a == nonnegative ? a : ABS`, `a > nonnegative ? a : 
ABS`
and `a >= nonnegative ? a : ABS` into `ABS`. This allows removal of
some extra comparison and extra conditional moves in some cases.
I don't remember where I had found though but it is simple to add so
let's add it.

Bootstrapped and tested on x86_64-linux-gnu with no regressions.

Note I have a secondary pattern for the equal case as either a or 
nonnegative
could be used.

PR tree-optimization/112392

gcc/ChangeLog:

* match.pd (`x CMP nonnegative ? x : ABS`): New pattern;
where CMP is ==, > and >=.
(`x CMP nonnegative@y ? y : ABS`): New pattern.

gcc/testsuite/ChangeLog:

* gcc.dg/tree-ssa/phi-opt-41.c: New test.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/match.pd   | 15 +
 gcc/testsuite/gcc.dg/tree-ssa/phi-opt-41.c | 34 ++
 2 files changed, 49 insertions(+)

diff --git a/gcc/match.pd b/gcc/match.pd
index 03a03c31233c..07e743ae464b 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -5876,6 +5876,21 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
  (convert (absu:utype @0)))
 @3
 
+/* X >  Positive ? X : ABS(X) -> ABS(X) */
+/* X >= Positive ? X : ABS(X) -> ABS(X) */
+/* X == Positive ? X : ABS(X) -> ABS(X) */
+(for cmp (eq gt ge)
+ (simplify
+  (cond (cmp:c @0 tree_expr_nonnegative_p@1) @0 (abs@3 @0))
+  (if (INTEGRAL_TYPE_P (type))
+   @3)))
+
+/* X == Positive ? Positive : ABS(X) -> ABS(X) */
+(simplify
+ (cond (eq:c @0 tree_expr_nonnegative_p@1) @1 (abs@3 @0))
+ (if (INTEGRAL_TYPE_P (type))
+  @3))
+
 /* (X + 1) > Y ? -X : 1 simplifies to X >= Y ? -X : 1 when
X is unsigned, as when X + 1 overflows, X is -1, so -X == 1.  */
 (simplify
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-41.c 
b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-41.c
new file mode 100644
index ..9774e283a7ba
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-41.c
@@ -0,0 +1,34 @@
+/* { dg-do compile } */
+/* { dg-options "-O1 -fdump-tree-phiopt1" } */
+/* PR tree-optimization/112392 */
+
+int feq_1(int a, unsigned char b)
+{
+  int absb = b;
+  if (a == absb)  return absb;
+  return a > 0 ? a : -a;
+}
+int feq_2(int a, unsigned char b)
+{
+  int absb = b;
+  if (a == absb)  return a;
+  return a > 0 ? a : -a;
+}
+
+int fgt(int a, unsigned char b)
+{
+  int absb = b;
+  if (a > absb)  return a;
+  return a > 0 ? a : -a;
+}
+
+int fge(int a, unsigned char b)
+{
+  int absb = b;
+  if (a >= absb)  return a;
+  return a > 0 ? a : -a;
+}
+
+
+/* { dg-final { scan-tree-dump-not "if " "phiopt1" } } */
+/* { dg-final { scan-tree-dump-times "ABS_EXPR <" 4 "phiopt1" } } */


[gcc r15-327] tree-ssa-sink: Improve code sinking pass

2024-05-08 Thread Ajit Kumar Agarwal via Gcc-cvs
https://gcc.gnu.org/g:f4b86ab09dfe258c4780fcc7567ca8a275c96e7a

commit r15-327-gf4b86ab09dfe258c4780fcc7567ca8a275c96e7a
Author: Ajit Kumar Agarwal 
Date:   Wed May 8 09:21:27 2024 -0500

tree-ssa-sink: Improve code sinking pass

Currently, code sinking will sink code at the use points with loop having 
same
nesting depth. The following patch improves code sinking by placing the sunk
code in begining of the block after the labels.

2024-05-08  Ajit Kumar Agarwal  

gcc/ChangeLog:

PR tree-optimization/81953
* tree-ssa-sink.cc (statement_sink_location):Sink statements at
the begining of the basic block after labels.

gcc/testsuite/ChangeLog:

PR tree-optimization/81953
* gcc.dg/tree-ssa/ssa-sink-21.c: New test.

Diff:
---
 gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c | 15 +++
 gcc/tree-ssa-sink.cc|  5 +
 2 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c 
b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c
new file mode 100644
index ..d3b79ca58031
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-sink-stats" } */
+void bar();
+int j;
+void foo(int a, int b, int c, int d, int e, int f)
+{
+  int l;
+  l = a + b + c + d +e + f;
+  if (a != 5)
+{
+  bar();
+  j = l;
+}
+}
+/* { dg-final { scan-tree-dump 
{l_12\s+=\s+_4\s+\+\s+f_11\(D\);\n\s+bar\s+\(\)} sink1 } } */
diff --git a/gcc/tree-ssa-sink.cc b/gcc/tree-ssa-sink.cc
index 880d6f70a80a..2f90acb7ef48 100644
--- a/gcc/tree-ssa-sink.cc
+++ b/gcc/tree-ssa-sink.cc
@@ -439,10 +439,7 @@ statement_sink_location (gimple *stmt, basic_block frombb,
  if (sinkbb == frombb)
return false;
 
- if (sinkbb == gimple_bb (use))
-   *togsi = gsi_for_stmt (use);
- else
-   *togsi = gsi_after_labels (sinkbb);
+ *togsi = gsi_after_labels (sinkbb);
 
  return true;
}


[gcc r15-323] RISC-V: Add test for sraiw-31 special case

2024-05-08 Thread Christoph Mテシllner via Gcc-cvs
https://gcc.gnu.org/g:dd388198b8be52ab378c935fc517a269e0ba741c

commit r15-323-gdd388198b8be52ab378c935fc517a269e0ba741c
Author: Christoph Müllner 
Date:   Tue May 7 22:59:44 2024 +0200

RISC-V: Add test for sraiw-31 special case

We already optimize a sign-extension of a right-shift by 31 in
si3_extend.  Let's add a test for that (similar to
zero-extend-1.c).

gcc/testsuite/ChangeLog:

* gcc.target/riscv/sign-extend-1.c: New test.

Signed-off-by: Christoph Müllner 

Diff:
---
 gcc/testsuite/gcc.target/riscv/sign-extend-1.c | 14 ++
 1 file changed, 14 insertions(+)

diff --git a/gcc/testsuite/gcc.target/riscv/sign-extend-1.c 
b/gcc/testsuite/gcc.target/riscv/sign-extend-1.c
new file mode 100644
index ..e9056ec0d424
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sign-extend-1.c
@@ -0,0 +1,14 @@
+/* { dg-do compile { target { riscv64*-*-* } } } */
+/* { dg-options "-march=rv64gc -mabi=lp64" } */
+/* { dg-skip-if "" { *-*-* } {"-O0" "-Os" "-Og" "-Oz" "-flto" } } */
+
+signed long
+foo1 (int i)
+{
+  return i >> 31;
+}
+/* { dg-final { scan-assembler "sraiw\ta\[0-9\],a\[0-9\],31" } } */
+
+/* { dg-final { scan-assembler-not "srai\t" } } */
+/* { dg-final { scan-assembler-not "srli\t" } } */
+/* { dg-final { scan-assembler-not "srliw\t" } } */


[gcc r13-8727] tree-optimization/114375 - disallow SLP discovery of permuted mask loads

2024-05-08 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:4f2a35a76cca503749c696e7772d2e8eadc77ba5

commit r13-8727-g4f2a35a76cca503749c696e7772d2e8eadc77ba5
Author: Richard Biener 
Date:   Mon Mar 18 12:39:03 2024 +0100

tree-optimization/114375 - disallow SLP discovery of permuted mask loads

We cannot currently handle permutations of mask loads in code generation
or permute optimization.  But we simply drop any permutation on the
floor, so the following instead rejects the SLP build rather than
producing wrong-code.  I've also made sure to reject them in
vectorizable_load for completeness.

PR tree-optimization/114375
* tree-vect-slp.cc (vect_build_slp_tree_2): Compute the
load permutation for masked loads but reject it when any
such is necessary.
* tree-vect-stmts.cc (vectorizable_load): Reject masked
VMAT_ELEMENTWISE and VMAT_STRIDED_SLP as those are not
supported.

* gcc.dg/vect/vect-pr114375.c: New testcase.

(cherry picked from commit 94c3508c5a14d1948fe3bffa9e16c6f3d9c2836a)

Diff:
---
 gcc/testsuite/gcc.dg/vect/vect-pr114375.c | 44 +++
 gcc/tree-vect-slp.cc  | 34 +++-
 gcc/tree-vect-stmts.cc|  8 ++
 3 files changed, 79 insertions(+), 7 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/vect/vect-pr114375.c 
b/gcc/testsuite/gcc.dg/vect/vect-pr114375.c
new file mode 100644
index ..1e1cb0123d07
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-pr114375.c
@@ -0,0 +1,44 @@
+/* { dg-additional-options "-mavx2" { target avx2_runtime } } */
+
+#include "tree-vect.h"
+
+int a[512];
+int b[512];
+int c[512];
+
+void __attribute__((noipa))
+foo(int * __restrict p)
+{
+  for (int i = 0; i < 64; ++i)
+{
+  int tem = 2, tem2 = 2;
+  if (a[4*i + 1])
+tem = p[4*i];
+  if (a[4*i])
+tem2 = p[4*i + 2];
+  b[2*i] = tem2;
+  b[2*i+1] = tem;
+  if (a[4*i + 2])
+tem = p[4*i + 1];
+  if (a[4*i + 3])
+tem2 = p[4*i + 3];
+  c[2*i] = tem2;
+  c[2*i+1] = tem;
+}
+}
+int main()
+{
+  check_vect ();
+
+  for (int i = 0; i < 512; ++i)
+a[i] = (i >> 1) & 1;
+
+  foo (a);
+
+  if (c[0] != 1 || c[1] != 0 || c[2] != 1 || c[3] != 0
+  || b[0] != 2 || b[1] != 2 || b[2] != 2 || b[3] != 2)
+abort ();
+
+  return 0;
+}
+
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index bbc05fac65ec..c01dc02afff6 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -1780,10 +1780,8 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node,
   if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
   && DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
 {
-  if (gcall *stmt = dyn_cast  (stmt_info->stmt))
-   gcc_assert (gimple_call_internal_p (stmt, IFN_MASK_LOAD)
-   || gimple_call_internal_p (stmt, IFN_GATHER_LOAD)
-   || gimple_call_internal_p (stmt, IFN_MASK_GATHER_LOAD));
+  if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
+   gcc_assert (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)));
   else
{
  *max_nunits = this_max_nunits;
@@ -1799,15 +1797,37 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node,
  load_permutation.create (group_size);
  stmt_vec_info first_stmt_info
= DR_GROUP_FIRST_ELEMENT (SLP_TREE_SCALAR_STMTS (node)[0]);
+ bool any_permute = false;
  FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load_info)
{
  int load_place = vect_get_place_in_interleaving_chain
  (load_info, first_stmt_info);
  gcc_assert (load_place != -1);
- load_permutation.safe_push (load_place);
+ any_permute |= load_place != j;
+ load_permutation.quick_push (load_place);
+   }
+
+ if (gcall *stmt = dyn_cast  (stmt_info->stmt))
+   {
+ gcc_assert (gimple_call_internal_p (stmt, IFN_MASK_LOAD)
+ || gimple_call_internal_p (stmt, IFN_GATHER_LOAD)
+ || gimple_call_internal_p (stmt, 
IFN_MASK_GATHER_LOAD));
+ load_permutation.release ();
+ /* We cannot handle permuted masked loads, see PR114375.  */
+ if (any_permute
+ || (STMT_VINFO_GROUPED_ACCESS (stmt_info)
+ && DR_GROUP_SIZE (first_stmt_info) != group_size)
+ || STMT_VINFO_STRIDED_P (stmt_info))
+   {
+ matches[0] = false;
+ return NULL;
+   }
+   }
+ else
+   {
+ SLP_TREE_LOAD_PERMUTATION (node) = load_permutation;
+ return node;
}
- SLP_TREE_LOAD_PERMUTATION (node) = load_permutation;
- return node;
}
 }
   else if (gimple_assign_single_p (stmt_info->stmt)
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-

[gcc r13-8726] cfgrtl: Fix MEM_EXPR update in duplicate_insn_chain [PR114924]

2024-05-08 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:c63704a2d840436797f54e175a2af0cb029889d2

commit r13-8726-gc63704a2d840436797f54e175a2af0cb029889d2
Author: Alex Coplan 
Date:   Fri May 3 09:23:59 2024 +0100

cfgrtl: Fix MEM_EXPR update in duplicate_insn_chain [PR114924]

The PR shows that when cfgrtl.cc:duplicate_insn_chain attempts to
update the MR_DEPENDENCE_CLIQUE information for a MEM_EXPR we can end up
accidentally dropping (e.g.) an ARRAY_REF from the MEM_EXPR and end up
replacing it with the underlying MEM_REF.  This leads to an
inconsistency in the MEM_EXPR information, and could lead to wrong code.

While the walk down to the MEM_REF is necessary to update
MR_DEPENDENCE_CLIQUE, we should use the outer tree expression for the
MEM_EXPR.  This patch does that.

gcc/ChangeLog:

PR rtl-optimization/114924
* cfgrtl.cc (duplicate_insn_chain): When updating MEM_EXPRs,
don't strip (e.g.) ARRAY_REFs from the final MEM_EXPR.

(cherry picked from commit fe40d525619eee9c2821126390df75068df4773a)

Diff:
---
 gcc/cfgrtl.cc | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/gcc/cfgrtl.cc b/gcc/cfgrtl.cc
index 149131c2693f..4cb32e4d9bf3 100644
--- a/gcc/cfgrtl.cc
+++ b/gcc/cfgrtl.cc
@@ -4407,12 +4407,13 @@ duplicate_insn_chain (rtx_insn *from, rtx_insn *to,
   since MEM_EXPR is shared so make a copy and
   walk to the subtree again.  */
tree new_expr = unshare_expr (MEM_EXPR (*iter));
+   tree orig_new_expr = new_expr;
if (TREE_CODE (new_expr) == WITH_SIZE_EXPR)
  new_expr = TREE_OPERAND (new_expr, 0);
while (handled_component_p (new_expr))
  new_expr = TREE_OPERAND (new_expr, 0);
MR_DEPENDENCE_CLIQUE (new_expr) = newc;
-   set_mem_expr (const_cast  (*iter), new_expr);
+   set_mem_expr (const_cast  (*iter), orig_new_expr);
  }
  }
}


[gcc r13-8725] middle-end: Fix ICE in poly-int.h due to SLP.

2024-05-08 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:b0632c06a7f61d3b42170d51aa5b88237a722ae0

commit r13-8725-gb0632c06a7f61d3b42170d51aa5b88237a722ae0
Author: Richard Ball 
Date:   Thu Feb 1 17:18:28 2024 +

middle-end: Fix ICE in poly-int.h due to SLP.

Adds a check to ensure that the input vector arguments
to a function are not variable length. Previously, only the
output vector of a function was checked.

The ICE in question is within the neon-sve-bridge.c test,
and is related to https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111268

gcc/ChangeLog:
PR tree-optimization/111268
* tree-vect-slp.cc (vectorizable_slp_permutation_1):
Add variable-length check for vector input arguments
to a function.

(cherry picked from commit 4571b4d413a4ba5f1e2d429a2623180ad1c73c0f)

Diff:
---
 gcc/tree-vect-slp.cc | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index 570edf38ca40..bbc05fac65ec 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -8574,7 +8574,8 @@ vectorizable_slp_permutation_1 (vec_info *vinfo, 
gimple_stmt_iterator *gsi,
 {
   /* Calculate every element of every permute mask vector explicitly,
 instead of relying on the pattern described above.  */
-  if (!nunits.is_constant (&npatterns))
+  if (!nunits.is_constant (&npatterns)
+ || !TYPE_VECTOR_SUBPARTS (op_vectype).is_constant ())
return -1;
   nelts_per_pattern = ncopies = 1;
   if (loop_vec_info linfo = dyn_cast  (vinfo))


[gcc r13-8724] [Committed] Avoid FAIL of gcc.target/i386/pr110792.c

2024-05-08 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:3367f78ff92971ac21c67c5d82df988863605f84

commit r13-8724-g3367f78ff92971ac21c67c5d82df988863605f84
Author: Roger Sayle 
Date:   Sun Aug 6 23:19:10 2023 +0100

[Committed] Avoid FAIL of gcc.target/i386/pr110792.c

My apologies (again), I managed to mess up the 64-bit version of the
test case for PR 110792.  Unlike the 32-bit version, the 64-bit case
contains exactly the same load instructions, just in a different order
making the correct and incorrect behaviours impossible to distinguish
with a scan-assembler-not.  Somewhere between checking that this test
failed in a clean tree without the patch, and getting the escaping
correct, I'd failed to notice that this also FAILs in the patched tree.
Doh!  Instead of removing the test completely, I've left it as a
compilation test.

The original fix is tested by the 32-bit test case.

Committed to mainline as obvious.  Sorry for the incovenience.

2023-08-06  Roger Sayle  

gcc/testsuite/ChangeLog
PR target/110792
* gcc.target/i386/pr110792.c: Remove dg-final scan-assembler-not.

(cherry picked from commit 529909f9e92dd3b0ed0383f45a44d2b5f8a58958)

Diff:
---
 gcc/testsuite/gcc.target/i386/pr110792.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/gcc/testsuite/gcc.target/i386/pr110792.c 
b/gcc/testsuite/gcc.target/i386/pr110792.c
index b65125c48b62..eea4e1877dbb 100644
--- a/gcc/testsuite/gcc.target/i386/pr110792.c
+++ b/gcc/testsuite/gcc.target/i386/pr110792.c
@@ -15,4 +15,3 @@ unsigned __int128 whirl(unsigned char x0)
asm("":::"memory");
return tt;
 }
-/* { dg-final { scan-assembler-not "movq\tWHIRL_S\\+8\\(%rdi\\), %rdi" } } */


[gcc r13-8723] PR target/110792: Early clobber issues with rot32di2_doubleword on i386.

2024-05-08 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:3658dafc65a2b64989a0aa3b4007356d638f1bfa

commit r13-8723-g3658dafc65a2b64989a0aa3b4007356d638f1bfa
Author: Roger Sayle 
Date:   Thu Aug 3 07:12:04 2023 +0100

PR target/110792: Early clobber issues with rot32di2_doubleword on i386.

This patch is a conservative fix for PR target/110792, a wrong-code
regression affecting doubleword rotations by BITS_PER_WORD, which
effectively swaps the highpart and lowpart words, when the source to be
rotated resides in memory. The issue is that if the register used to
hold the lowpart of the destination is mentioned in the address of
the memory operand, the current define_insn_and_split unintentionally
clobbers it before reading the highpart.

Hence, for the testcase, the incorrectly generated code looks like:

salq$4, %rdi// calculate address
movqWHIRL_S+8(%rdi), %rdi   // accidentally clobber addr
movqWHIRL_S(%rdi), %rbp // load (wrong) lowpart

Traditionally, the textbook way to fix this would be to add an
explicit early clobber to the instruction's constraints.

 (define_insn_and_split "32di2_doubleword"
- [(set (match_operand:DI 0 "register_operand" "=r,r,r")
+ [(set (match_operand:DI 0 "register_operand" "=r,r,&r")
(any_rotate:DI (match_operand:DI 1 "nonimmediate_operand" "0,r,o")
   (const_int 32)))]

but unfortunately this currently generates significantly worse code,
due to a strange choice of reloads (effectively memcpy), which ends up
looking like:

salq$4, %rdi// calculate address
movdqa  WHIRL_S(%rdi), %xmm0// load the double word in SSE reg.
movaps  %xmm0, -16(%rsp)// store the SSE reg back to the 
stack
movq-8(%rsp), %rdi  // load highpart
movq-16(%rsp), %rbp // load lowpart

Note that reload's "&" doesn't distinguish between the memory being
early clobbered, vs the registers used in an addressing mode being
early clobbered.

The fix proposed in this patch is to remove the third alternative, that
allowed offsetable memory as an operand, forcing reload to place the
operand into a register before the rotation.  This results in:

salq$4, %rdi
movqWHIRL_S(%rdi), %rax
movqWHIRL_S+8(%rdi), %rdi
movq%rax, %rbp

I believe there's a more advanced solution, by swapping the order of
the loads (if first destination register is mentioned in the address),
or inserting a lea insn (if both destination registers are mentioned
in the address), but this fix is a minimal "safe" solution, that
should hopefully be suitable for backporting.

2023-08-03  Roger Sayle  

gcc/ChangeLog
PR target/110792
* config/i386/i386.md (ti3): For rotations by 64 bits
place operand in a register before gen_64ti2_doubleword.
(di3): Likewise, for rotations by 32 bits, place
operand in a register before gen_32di2_doubleword.
(32di2_doubleword): Constrain operand to be in register.
(64ti2_doubleword): Likewise.

gcc/testsuite/ChangeLog
PR target/110792
* g++.target/i386/pr110792.C: New 32-bit C++ test case.
* gcc.target/i386/pr110792.c: New 64-bit C test case.

(cherry picked from commit 790c1f60a5662b16eb19eb4b81922995863c7571)

Diff:
---
 gcc/config/i386/i386.md  | 18 --
 gcc/testsuite/g++.target/i386/pr110792.C | 16 
 gcc/testsuite/gcc.target/i386/pr110792.c | 18 ++
 3 files changed, 46 insertions(+), 6 deletions(-)

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index cebb235bfc04..b4b39865d677 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -14368,7 +14368,10 @@
 emit_insn (gen_ix86_ti3_doubleword
(operands[0], operands[1], operands[2]));
   else if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) == 64)
-emit_insn (gen_64ti2_doubleword (operands[0], operands[1]));
+{
+  operands[1] = force_reg (TImode, operands[1]);
+  emit_insn (gen_64ti2_doubleword (operands[0], operands[1]));
+}
   else
 {
   rtx amount = force_reg (QImode, operands[2]);
@@ -14403,7 +14406,10 @@
 emit_insn (gen_ix86_di3_doubleword
(operands[0], operands[1], operands[2]));
   else if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) == 32)
-emit_insn (gen_32di2_doubleword (operands[0], operands[1]));
+{
+  operands[1] = force_reg (DImode, operands[1]);
+  emit_insn (gen_32di2_doubleword (operands[0], operands[1]));
+}
   else
 FAIL;
 
@@ -14571,8 +14577,8 @@
 })
 
 (define_insn_and_split "32di2_doubleword"
- [(set (match_operand:

[gcc r13-8722] c++: Add testcase for this PR [PR97990]

2024-05-08 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:a0f59eb899d151727ee965f900d57dc0395b47df

commit r13-8722-ga0f59eb899d151727ee965f900d57dc0395b47df
Author: Andrew Pinski 
Date:   Fri Feb 16 10:55:43 2024 -0800

c++: Add testcase for this PR [PR97990]

This testcase was fixed by r14-5934-gf26d68d5d128c8 but we should add
one to make sure it does not regress again.

Committed as obvious after a quick test on the testcase.

PR c++/97990

gcc/testsuite/ChangeLog:

* g++.dg/torture/vector-struct-1.C: New test.

Signed-off-by: Andrew Pinski 
(cherry picked from commit 5f1438db419c9eb8901d1d1d7f98fb69082aec8e)

Diff:
---
 gcc/testsuite/g++.dg/torture/vector-struct-1.C | 18 ++
 1 file changed, 18 insertions(+)

diff --git a/gcc/testsuite/g++.dg/torture/vector-struct-1.C 
b/gcc/testsuite/g++.dg/torture/vector-struct-1.C
new file mode 100644
index ..e2747417e2d5
--- /dev/null
+++ b/gcc/testsuite/g++.dg/torture/vector-struct-1.C
@@ -0,0 +1,18 @@
+/* PR c++/97990 */
+/* This used to crash with lto and strict aliasing enabled as the
+   vector type variant still had TYPE_ALIAS_SET set on it. */
+
+typedef __attribute__((__vector_size__(sizeof(short short TSimd;
+TSimd hh(int);
+struct y6
+{
+  TSimd VALUE;
+  ~y6();
+};
+template 
+auto f2(T1 p1, T2){
+  return hh(p1) <= 0;
+}
+void f1(){
+  f2(0, y6{});
+}


[gcc r13-8721] middle-end/112732 - stray TYPE_ALIAS_SET in type variant

2024-05-08 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:664ab9c6e8a5d031031596100997e025e5334e86

commit r13-8721-g664ab9c6e8a5d031031596100997e025e5334e86
Author: Richard Biener 
Date:   Tue Nov 28 12:36:21 2023 +0100

middle-end/112732 - stray TYPE_ALIAS_SET in type variant

The following fixes a stray TYPE_ALIAS_SET in a type variant built
by build_opaque_vector_type which is diagnosed by type checking
enabled with -flto.

PR middle-end/112732
* tree.cc (build_opaque_vector_type): Reset TYPE_ALIAS_SET
of the newly built type.

(cherry picked from commit f26d68d5d128c86faaceeb81b1e8f22254ad53df)

Diff:
---
 gcc/tree.cc | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/gcc/tree.cc b/gcc/tree.cc
index 1d1c240b2573..12dea81a5f39 100644
--- a/gcc/tree.cc
+++ b/gcc/tree.cc
@@ -10182,6 +10182,8 @@ build_opaque_vector_type (tree innertype, poly_int64 
nunits)
   TYPE_NEXT_VARIANT (cand) = TYPE_NEXT_VARIANT (t);
   TYPE_NEXT_VARIANT (t) = cand;
   TYPE_MAIN_VARIANT (cand) = TYPE_MAIN_VARIANT (t);
+  /* Type variants have no alias set defined.  */
+  TYPE_ALIAS_SET (cand) = -1;
   return cand;
 }


[gcc r15-321] Fix non-grouped SLP load/store accounting in alignment peeling

2024-05-08 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:b6822bf3e3f3ff37d64be700f139c8fce3a9bf44

commit r15-321-gb6822bf3e3f3ff37d64be700f139c8fce3a9bf44
Author: Richard Biener 
Date:   Tue Mar 5 16:07:41 2024 +0100

Fix non-grouped SLP load/store accounting in alignment peeling

When we have a non-grouped access we bogously multiply by zero.
This shows most with single-lane SLP but also happens with
the multi-lane splat case.

* tree-vect-data-refs.cc (vect_enhance_data_refs_alignment):
Properly guard DR_GROUP_SIZE access with STMT_VINFO_GROUPED_ACCESS.

Diff:
---
 gcc/tree-vect-data-refs.cc | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc
index c531079d3bbf..ae237407672c 100644
--- a/gcc/tree-vect-data-refs.cc
+++ b/gcc/tree-vect-data-refs.cc
@@ -2290,8 +2290,11 @@ vect_enhance_data_refs_alignment (loop_vec_info 
loop_vinfo)
   if (unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo)))
{
  poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
- nscalars = (STMT_SLP_TYPE (stmt_info)
- ? vf * DR_GROUP_SIZE (stmt_info) : vf);
+ unsigned group_size = 1;
+ if (STMT_SLP_TYPE (stmt_info)
+ && STMT_VINFO_GROUPED_ACCESS (stmt_info))
+   group_size = DR_GROUP_SIZE (stmt_info);
+ nscalars = vf * group_size;
}
 
  /* Save info about DR in the hash table.  Also include peeling


[gcc r15-322] Fix SLP reduction initial value for pointer reductions

2024-05-08 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:b65cc09f9c78633a4f29d458a0549f36627c1259

commit r15-322-gb65cc09f9c78633a4f29d458a0549f36627c1259
Author: Richard Biener 
Date:   Fri Feb 23 11:21:20 2024 +0100

Fix SLP reduction initial value for pointer reductions

For pointer reductions we need to convert the initial value to
the vector component integer type.

* tree-vect-loop.cc (get_initial_defs_for_reduction): Convert
initial value to the vector component type.

Diff:
---
 gcc/tree-vect-loop.cc | 9 -
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index 29c03c246d45..704df7bdcc73 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -5618,7 +5618,14 @@ get_initial_defs_for_reduction (loop_vec_info loop_vinfo,
   if (i >= initial_values.length () || (j > i && neutral_op))
op = neutral_op;
   else
-   op = initial_values[i];
+   {
+ if (!useless_type_conversion_p (TREE_TYPE (vector_type),
+ TREE_TYPE (initial_values[i])))
+   initial_values[i] = gimple_convert (&ctor_seq,
+   TREE_TYPE (vector_type),
+   initial_values[i]);
+ op = initial_values[i];
+   }
 
   /* Create 'vect_ = {op0,op1,...,opn}'.  */
   number_of_places_left_in_vector--;


[gcc r13-8720] tree-optimization/112281 - loop distribution and zero dependence distances

2024-05-08 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:0b409fc34e9b1ff7f90456e73128e38970c1532b

commit r13-8720-g0b409fc34e9b1ff7f90456e73128e38970c1532b
Author: Richard Biener 
Date:   Mon Nov 20 13:39:52 2023 +0100

tree-optimization/112281 - loop distribution and zero dependence distances

The following fixes an omission in dependence testing for loop
distribution.  When the overall dependence distance is not zero but
the dependence direction in the innermost common loop is = there is
a conflict between the partitions and we have to merge them.

PR tree-optimization/112281
* tree-loop-distribution.cc
(loop_distribution::pg_add_dependence_edges): For = in the
innermost common loop record a partition conflict.

* gcc.dg/torture/pr112281-1.c: New testcase.
* gcc.dg/torture/pr112281-2.c: Likewise.

(cherry picked from commit 3b34902417259031823bff7f853f615a60464bbd)

Diff:
---
 gcc/testsuite/gcc.dg/torture/pr112281-1.c | 18 ++
 gcc/testsuite/gcc.dg/torture/pr112281-2.c | 18 ++
 gcc/tree-loop-distribution.cc | 18 ++
 3 files changed, 50 insertions(+), 4 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/torture/pr112281-1.c 
b/gcc/testsuite/gcc.dg/torture/pr112281-1.c
new file mode 100644
index ..711f5663195c
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/torture/pr112281-1.c
@@ -0,0 +1,18 @@
+/* { dg-do run } */
+/* { dg-additional-options "-ftree-loop-distribution" } */
+
+struct {
+  int : 8;
+  int a;
+} b, d[4] = {{0}, {0}, {0}, {5}};
+int c, e;
+int main() {
+  for (c = 2; c; c--)
+for (e = 0; e < 2; e++) {
+  d[c] = b = d[c + 1];
+  d[c + 1].a = 0;
+}
+  if (b.a != 0)
+__builtin_abort();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/torture/pr112281-2.c 
b/gcc/testsuite/gcc.dg/torture/pr112281-2.c
new file mode 100644
index ..d7671e3322b4
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/torture/pr112281-2.c
@@ -0,0 +1,18 @@
+/* { dg-do run } */
+/* { dg-additional-options "-ftree-loop-distribution" } */
+
+struct {
+  int : 8;
+  int a;
+} b, d[4] = {{5}, {0}, {0}, {0}};
+int c, e;
+int main() {
+  for (c = 0; c < 2; c++)
+for (e = 0; e < 2; e++) {
+  d[c + 1] = b = d[c];
+  d[c].a = 0;
+}
+  if (b.a != 0)
+__builtin_abort();
+  return 0;
+}
diff --git a/gcc/tree-loop-distribution.cc b/gcc/tree-loop-distribution.cc
index c674da97b02f..3d92d1c73b5f 100644
--- a/gcc/tree-loop-distribution.cc
+++ b/gcc/tree-loop-distribution.cc
@@ -2152,9 +2152,6 @@ loop_distribution::pg_add_dependence_edges (struct graph 
*rdg, int dir,
}
  else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
{
- if (DDR_REVERSED_P (ddr))
-   this_dir = -this_dir;
-
  /* Known dependences can still be unordered througout the
 iteration space, see gcc.dg/tree-ssa/ldist-16.c and
 gcc.dg/tree-ssa/pr94969.c.  */
@@ -2167,7 +2164,20 @@ loop_distribution::pg_add_dependence_edges (struct graph 
*rdg, int dir,
  /* Else as the distance vector is lexicographic positive swap
 the dependence direction.  */
  else
-   this_dir = -this_dir;
+   {
+ if (DDR_REVERSED_P (ddr))
+   this_dir = -this_dir;
+ this_dir = -this_dir;
+
+ /* When then dependence distance of the innermost common
+loop of the DRs is zero we have a conflict.  */
+ auto l1 = gimple_bb (DR_STMT (dr1))->loop_father;
+ auto l2 = gimple_bb (DR_STMT (dr2))->loop_father;
+ int idx = index_in_loop_nest (find_common_loop (l1, l2)->num,
+   DDR_LOOP_NEST (ddr));
+ if (DDR_DIST_VECT (ddr, 0)[idx] == 0)
+   this_dir = 2;
+   }
}
  else
this_dir = 0;


[gcc r13-8719] tree-optimization/112991 - re-do PR112961 fix

2024-05-08 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:25add4b65a3bac262685d290a4dc93884a022576

commit r13-8719-g25add4b65a3bac262685d290a4dc93884a022576
Author: Richard Biener 
Date:   Wed Dec 13 08:45:58 2023 +0100

tree-optimization/112991 - re-do PR112961 fix

The following does away with the fake edge adding as in the original
PR112961 fix and instead exposes handling of entry PHIs as additional
parameter of the region VN run.

PR tree-optimization/112991
PR tree-optimization/112961
* tree-ssa-sccvn.h (do_rpo_vn): Add skip_entry_phis argument.
* tree-ssa-sccvn.cc (do_rpo_vn): Likewise.
(do_rpo_vn_1): Likewise, merge with auto-processing.
(run_rpo_vn): Adjust.
(pass_fre::execute): Likewise.
* tree-if-conv.cc (tree_if_conversion): Revert last change.
Value-number latch block but disable value-numbering of
entry PHIs.
* tree-ssa-uninit.cc (execute_early_warn_uninitialized): Adjust.

* gcc.dg/torture/pr112991.c: New testcase.
* g++.dg/vect/pr112961.cc: Likewise.

(cherry picked from commit 93db32a4146afd2a6d90410691351a56768167c9)

Diff:
---
 gcc/testsuite/g++.dg/vect/pr112961.cc   | 17 +
 gcc/testsuite/gcc.dg/torture/pr112991.c | 21 +
 gcc/tree-if-conv.cc | 18 +-
 gcc/tree-ssa-sccvn.cc   | 24 +++-
 gcc/tree-ssa-sccvn.h|  1 +
 gcc/tree-ssa-uninit.cc  |  2 +-
 6 files changed, 64 insertions(+), 19 deletions(-)

diff --git a/gcc/testsuite/g++.dg/vect/pr112961.cc 
b/gcc/testsuite/g++.dg/vect/pr112961.cc
new file mode 100644
index ..52759e180fbe
--- /dev/null
+++ b/gcc/testsuite/g++.dg/vect/pr112961.cc
@@ -0,0 +1,17 @@
+// { dg-do compile }
+// { dg-require-effective-target vect_int }
+
+inline const int& maxx (const int& a, const int &b)
+{
+  return a > b ? a : b;
+}
+
+int foo(int *a)
+{
+  int max = 0;
+  for (int i = 0; i < 1024; ++i)
+max = maxx(max, a[i]);
+  return max;
+}
+
+// { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" { xfail 
vect_no_int_min_max } } }
diff --git a/gcc/testsuite/gcc.dg/torture/pr112991.c 
b/gcc/testsuite/gcc.dg/torture/pr112991.c
new file mode 100644
index ..aace98545997
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/torture/pr112991.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+
+typedef struct {
+  unsigned links[2];
+} RMF_unit;
+long RMF_recurseListsBound_count;
+int RMF_recurseListsBound_tbl, RMF_recurseListsBound_list_head_1;
+unsigned RMF_recurseListsBound_list_head_0;
+void RMF_recurseListsBound() {
+  int list_count = RMF_recurseListsBound_list_head_1;
+  long link = RMF_recurseListsBound_list_head_0;
+  for (; RMF_recurseListsBound_count;) {
+long next_link =
+((RMF_unit *)&RMF_recurseListsBound_tbl)[link >> 2].links[0];
+if (link)
+  --RMF_recurseListsBound_count;
+link = next_link;
+  }
+  while (list_count)
+;
+}
diff --git a/gcc/tree-if-conv.cc b/gcc/tree-if-conv.cc
index fddc4a890c69..b1dbb8706ed6 100644
--- a/gcc/tree-if-conv.cc
+++ b/gcc/tree-if-conv.cc
@@ -3697,21 +3697,21 @@ tree_if_conversion (class loop *loop, vec 
*preds)
   combine_blocks (loop);
 }
 
-  /* Perform local CSE, this esp. helps the vectorizer analysis if loads
- and stores are involved.  CSE only the loop body, not the entry
- PHIs, those are to be kept in sync with the non-if-converted copy.
- ???  We'll still keep dead stores though.  */
-  exit_bbs = BITMAP_ALLOC (NULL);
-  bitmap_set_bit (exit_bbs, single_exit (loop)->dest->index);
-  bitmap_set_bit (exit_bbs, loop->latch->index);
-
   std::pair  *name_pair;
   unsigned ssa_names_idx;
   FOR_EACH_VEC_ELT (redundant_ssa_names, ssa_names_idx, name_pair)
 replace_uses_by (name_pair->first, name_pair->second);
   redundant_ssa_names.release ();
 
-  todo |= do_rpo_vn (cfun, loop_preheader_edge (loop), exit_bbs);
+  /* Perform local CSE, this esp. helps the vectorizer analysis if loads
+ and stores are involved.  CSE only the loop body, not the entry
+ PHIs, those are to be kept in sync with the non-if-converted copy.
+ ???  We'll still keep dead stores though.  */
+  exit_bbs = BITMAP_ALLOC (NULL);
+  for (edge exit : get_loop_exit_edges (loop))
+bitmap_set_bit (exit_bbs, exit->dest->index);
+  todo |= do_rpo_vn (cfun, loop_preheader_edge (loop), exit_bbs,
+false, true, true);
 
   /* Delete dead predicate computations.  */
   ifcvt_local_dce (loop);
diff --git a/gcc/tree-ssa-sccvn.cc b/gcc/tree-ssa-sccvn.cc
index fe4fc256deaf..fa1d8d9214ee 100644
--- a/gcc/tree-ssa-sccvn.cc
+++ b/gcc/tree-ssa-sccvn.cc
@@ -7535,12 +7535,13 @@ eliminate_with_rpo_vn (bitmap inserted_exprs)
 
 static unsigned
 do_rpo_vn_1 (function *fn, edge entry, bitmap exit_bbs,
-bool iterate, bool eliminate, vn_lookup_kind kind);
+bool ite

[gcc r13-8718] middle-end/113396 - int128 array index and value-ranges

2024-05-08 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:7baefcb0a358a47a7e2340432d49f29db798a200

commit r13-8718-g7baefcb0a358a47a7e2340432d49f29db798a200
Author: Richard Biener 
Date:   Tue Mar 19 15:25:16 2024 +0100

middle-end/113396 - int128 array index and value-ranges

The following fixes bogus truncation of a value-range for an int128
array index when computing the maximum extent for a variable array
reference.  Instead of possibly slowing things down by using
widest_int the following makes sure the range bounds fit within
the constraints offset_int were designed for.

PR middle-end/113396
* tree-dfa.cc (get_ref_base_and_extent): Use index range
bounds only if they fit within the address-range constraints
of offset_int.

* gcc.dg/torture/pr113396.c: New testcase.

(cherry picked from commit 6a55e39bdb1fdb570730c08413ebbe744e493411)

Diff:
---
 gcc/testsuite/gcc.dg/torture/pr113396.c | 19 +++
 gcc/tree-dfa.cc |  6 --
 2 files changed, 23 insertions(+), 2 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/torture/pr113396.c 
b/gcc/testsuite/gcc.dg/torture/pr113396.c
new file mode 100644
index ..585f717bddab
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/torture/pr113396.c
@@ -0,0 +1,19 @@
+/* { dg-do run } */
+/* { dg-require-effective-target int128 } */
+
+unsigned char m[] = {5, 79, 79, 79, 79};
+__int128 p;
+int main()
+{
+  int g1 = 0;
+  p = 0;
+  for (int aj = 0; aj < 256; aj++)
+   {
+  m[0] = -4;
+  for (; p >= 0; p -= 1) {
+g1 = m[p];
+  }
+  }
+  if (g1 != 0xfc)
+__builtin_abort();
+}
diff --git a/gcc/tree-dfa.cc b/gcc/tree-dfa.cc
index 82803a8ccb1d..eced5c61ca54 100644
--- a/gcc/tree-dfa.cc
+++ b/gcc/tree-dfa.cc
@@ -550,7 +550,8 @@ get_ref_base_and_extent (tree exp, poly_int64_pod *poffset,
/* Try to constrain maxsize with range information.  */
offset_int omax
  = offset_int::from (max, TYPE_SIGN (TREE_TYPE (index)));
-   if (known_lt (lbound, omax))
+   if (wi::get_precision (max) <= ADDR_MAX_BITSIZE
+   && known_lt (lbound, omax))
  {
poly_offset_int rmaxsize;
rmaxsize = (omax - lbound + 1)
@@ -568,7 +569,8 @@ get_ref_base_and_extent (tree exp, poly_int64_pod *poffset,
/* Try to adjust bit_offset with range information.  */
offset_int omin
  = offset_int::from (min, TYPE_SIGN (TREE_TYPE (index)));
-   if (known_le (lbound, omin))
+   if (wi::get_precision (min) <= ADDR_MAX_BITSIZE
+   && known_le (lbound, omin))
  {
poly_offset_int woffset
  = wi::sext (omin - lbound,


[gcc r15-320] aarch64: Fix typo in aarch64-ldp-fusion.cc:combine_reg_notes [PR114936]

2024-05-08 Thread Alex Coplan via Gcc-cvs
https://gcc.gnu.org/g:73c8e24b692e691c665d0f1f5424432837bd8c06

commit r15-320-g73c8e24b692e691c665d0f1f5424432837bd8c06
Author: Alex Coplan 
Date:   Fri May 3 14:12:32 2024 +

aarch64: Fix typo in aarch64-ldp-fusion.cc:combine_reg_notes [PR114936]

This fixes a typo in combine_reg_notes in the load/store pair fusion
pass.  As it stands, the calls to filter_notes store any
REG_FRAME_RELATED_EXPR to fr_expr with the following association:

 - i2 -> fr_expr[0]
 - i1 -> fr_expr[1]

but then the checks inside the following if statement expect the
opposite (more natural) association, i.e.:

 - i2 -> fr_expr[1]
 - i1 -> fr_expr[0]

this patch fixes the oversight by swapping the fr_expr indices in the
calls to filter_notes.

In hindsight it would probably have been less confusing / error-prone to
have combine_reg_notes take an array of two insns, then we wouldn't have
to mix 1-based and 0-based indexing as well as remembering to call
filter_notes in reverse program order.  This however is a minimal fix
for backporting purposes.

gcc/ChangeLog:

PR target/114936
* config/aarch64/aarch64-ldp-fusion.cc (combine_reg_notes):
Ensure insn iN has its REG_FRAME_RELATED_EXPR (if any) stored in
FR_EXPR[N-1], thus matching the correspondence expected by the
copy_rtx calls.

Diff:
---
 gcc/config/aarch64/aarch64-ldp-fusion.cc | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-ldp-fusion.cc 
b/gcc/config/aarch64/aarch64-ldp-fusion.cc
index 437f0aeb7877..1d9caeab05d4 100644
--- a/gcc/config/aarch64/aarch64-ldp-fusion.cc
+++ b/gcc/config/aarch64/aarch64-ldp-fusion.cc
@@ -1085,9 +1085,9 @@ combine_reg_notes (insn_info *i1, insn_info *i2, bool 
load_p)
   bool found_eh_region = false;
   rtx result = NULL_RTX;
   result = filter_notes (REG_NOTES (i2->rtl ()), result,
-&found_eh_region, fr_expr);
-  result = filter_notes (REG_NOTES (i1->rtl ()), result,
 &found_eh_region, fr_expr + 1);
+  result = filter_notes (REG_NOTES (i1->rtl ()), result,
+&found_eh_region, fr_expr);
 
   if (!load_p)
 {


[gcc r15-319] tree-ssa-loop-prefetch.cc: Honour -fno-unroll-loops

2024-05-08 Thread Stefan Schulze Frielinghaus via Gcc-cvs
https://gcc.gnu.org/g:e755f478c24c3e99409936af545ac83d35d27ad9

commit r15-319-ge755f478c24c3e99409936af545ac83d35d27ad9
Author: Stefan Schulze Frielinghaus 
Date:   Wed May 8 10:48:45 2024 +0200

tree-ssa-loop-prefetch.cc: Honour -fno-unroll-loops

This fixes a couple of tests (gcc.dg/vect/pr109011-*.c) on s390 where
loops are unrolled although -fno-unroll-loops is specified.

gcc/ChangeLog:

* tree-ssa-loop-prefetch.cc (determine_unroll_factor): Honour
-fno-unroll-loops.

Diff:
---
 gcc/tree-ssa-loop-prefetch.cc | 4 
 1 file changed, 4 insertions(+)

diff --git a/gcc/tree-ssa-loop-prefetch.cc b/gcc/tree-ssa-loop-prefetch.cc
index 70073cc4fe46..bb5d5dec7795 100644
--- a/gcc/tree-ssa-loop-prefetch.cc
+++ b/gcc/tree-ssa-loop-prefetch.cc
@@ -1401,6 +1401,10 @@ determine_unroll_factor (class loop *loop, struct 
mem_ref_group *refs,
   struct mem_ref_group *agp;
   struct mem_ref *ref;
 
+  /* Bail out early in case we must not unroll loops.  */
+  if (!flag_unroll_loops)
+return 1;
+
   /* First check whether the loop is not too large to unroll.  We ignore
  PARAM_MAX_UNROLL_TIMES, because for small loops, it prevented us
  from unrolling them enough to make exactly one cache line covered by each


[gcc r15-318] AVR: target/114975 - Add combine-pattern for __parityqi2.

2024-05-08 Thread Georg-Johann Lay via Gcc-cvs
https://gcc.gnu.org/g:41bc359c322d45ec1adfb51f7a45c7ef02ce6ca9

commit r15-318-g41bc359c322d45ec1adfb51f7a45c7ef02ce6ca9
Author: Georg-Johann Lay 
Date:   Tue May 7 19:42:30 2024 +0200

AVR: target/114975 - Add combine-pattern for __parityqi2.

PR target/114975
gcc/
* config/avr/avr.md: Add combine pattern for
8-bit parity detection.

gcc/testsuite/
* gcc.target/avr/pr114975-parity.c: New test.

Diff:
---
 gcc/config/avr/avr.md  | 17 -
 gcc/testsuite/gcc.target/avr/pr114975-parity.c | 17 +
 2 files changed, 33 insertions(+), 1 deletion(-)

diff --git a/gcc/config/avr/avr.md b/gcc/config/avr/avr.md
index 36fe384828f9..d4fcff46123b 100644
--- a/gcc/config/avr/avr.md
+++ b/gcc/config/avr/avr.md
@@ -8418,7 +8418,22 @@
(set (match_dup 0)
 (reg:HI 24))])
 
-(define_insn_and_split "*parityqihi2"
+(define_insn_and_split "*parityqihi2.1"
+  [(set (match_operand:HI 0 "register_operand""=r")
+(zero_extend:HI
+ (parity:QI (match_operand:QI 1 "register_operand" "r"
+   (clobber (reg:HI 24))]
+  "!reload_completed"
+  { gcc_unreachable(); }
+  "&& 1"
+  [(set (reg:QI 24)
+(match_dup 1))
+   (set (reg:HI 24)
+(zero_extend:HI (parity:QI (reg:QI 24
+   (set (match_dup 0)
+(reg:HI 24))])
+
+(define_insn_and_split "*parityqihi2.2"
   [(set (match_operand:HI 0 "register_operand"   "=r")
 (parity:HI (match_operand:QI 1 "register_operand" "r")))
(clobber (reg:HI 24))]
diff --git a/gcc/testsuite/gcc.target/avr/pr114975-parity.c 
b/gcc/testsuite/gcc.target/avr/pr114975-parity.c
new file mode 100644
index ..767ced0a464d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/avr/pr114975-parity.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-Os" } */
+
+typedef __UINT8_TYPE__ uint8_t;
+
+uint8_t use_pary1 (int y, uint8_t x)
+{
+return 1 + __builtin_parity (x);
+}
+
+uint8_t use_pary2 (uint8_t x)
+{
+   x += 1;
+return 1 - __builtin_parity (x);
+}
+
+/* { dg-final { scan-assembler-times "__parityqi2" 2 } } */


[gcc r15-317] AVR: target/114975 - Add combine-pattern for __popcountqi2.

2024-05-08 Thread Georg-Johann Lay via Gcc-cvs
https://gcc.gnu.org/g:c8f4bbb824fafecf021a802324cd79e64b03b947

commit r15-317-gc8f4bbb824fafecf021a802324cd79e64b03b947
Author: Georg-Johann Lay 
Date:   Tue May 7 19:39:55 2024 +0200

AVR: target/114975 - Add combine-pattern for __popcountqi2.

PR target/114975
gcc/
* config/avr/avr.md: Add combine pattern for
8-bit popcount detection.

gcc/testsuite/
* gcc.target/avr/pr114975-popcount.c: New test.

Diff:
---
 gcc/config/avr/avr.md| 13 +
 gcc/testsuite/gcc.target/avr/pr114975-popcount.c | 17 +
 2 files changed, 30 insertions(+)

diff --git a/gcc/config/avr/avr.md b/gcc/config/avr/avr.md
index 97f42be7729c..36fe384828f9 100644
--- a/gcc/config/avr/avr.md
+++ b/gcc/config/avr/avr.md
@@ -8527,6 +8527,19 @@
 operands[2] = gen_reg_rtx (HImode);
   })
 
+(define_insn_and_split "*popcounthi2.split8"
+  [(set (reg:HI 24)
+(zero_extend:HI (popcount:QI (match_operand:QI 0 
"register_operand"]
+  "! reload_completed"
+  { gcc_unreachable(); }
+  "&& 1"
+  [(set (reg:QI 24)
+(match_dup 0))
+   (set (reg:QI 24)
+(popcount:QI (reg:QI 24)))
+   (set (reg:QI 25)
+(const_int 0))])
+
 (define_insn_and_split "*popcounthi2.libgcc_split"
   [(set (reg:HI 24)
 (popcount:HI (reg:HI 24)))]
diff --git a/gcc/testsuite/gcc.target/avr/pr114975-popcount.c 
b/gcc/testsuite/gcc.target/avr/pr114975-popcount.c
new file mode 100644
index ..87eb56b56c5a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/avr/pr114975-popcount.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-Os" } */
+
+typedef __UINT8_TYPE__ uint8_t;
+
+uint8_t use_pop1 (int y, uint8_t x)
+{
+return 1 + __builtin_popcount (x);
+}
+
+uint8_t use_pop2 (uint8_t x)
+{
+   x += 1;
+return 1 - __builtin_popcount (x);
+}
+
+/* { dg-final { scan-assembler-times "__popcountqi2" 2 } } */


[gcc r15-316] Fix and speedup IDF pruning by dominator

2024-05-08 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:245a6d478aba6499d1f649e4d35df1e858c5967c

commit r15-316-g245a6d478aba6499d1f649e4d35df1e858c5967c
Author: Richard Biener 
Date:   Thu Apr 4 15:18:06 2024 +0200

Fix and speedup IDF pruning by dominator

When insert_updated_phi_nodes_for tries to skip pruning the IDF to
blocks dominated by the nearest common dominator of the set of
definition blocks it compares against ENTRY_BLOCK but that's never
going to be the common dominator.  In fact if it ever were the code
fails to copy IDF to PRUNED_IDF, leading to wrong code.

The following fixes that by avoiding the copy and pruning from the
IDF in-place as well as using the more approprate check against
the single successor of the ENTRY_BLOCK.

* tree-into-ssa.cc (insert_updated_phi_nodes_for): Skip
pruning when the nearest common dominator is the successor
of ENTRY_BLOCK.  Do not copy IDF but prune it directly.

Diff:
---
 gcc/tree-into-ssa.cc | 47 +--
 1 file changed, 25 insertions(+), 22 deletions(-)

diff --git a/gcc/tree-into-ssa.cc b/gcc/tree-into-ssa.cc
index 705e4119ba3b..3732c269ca3d 100644
--- a/gcc/tree-into-ssa.cc
+++ b/gcc/tree-into-ssa.cc
@@ -3233,7 +3233,7 @@ insert_updated_phi_nodes_for (tree var, bitmap_head *dfs,
 {
   basic_block entry;
   def_blocks *db;
-  bitmap idf, pruned_idf;
+  bitmap pruned_idf;
   bitmap_iterator bi;
   unsigned i;
 
@@ -3250,8 +3250,7 @@ insert_updated_phi_nodes_for (tree var, bitmap_head *dfs,
 return;
 
   /* Compute the initial iterated dominance frontier.  */
-  idf = compute_idf (db->def_blocks, dfs);
-  pruned_idf = BITMAP_ALLOC (NULL);
+  pruned_idf = compute_idf (db->def_blocks, dfs);
 
   if (TREE_CODE (var) == SSA_NAME)
 {
@@ -3262,27 +3261,32 @@ insert_updated_phi_nodes_for (tree var, bitmap_head 
*dfs,
 common dominator of all the definition blocks.  */
  entry = nearest_common_dominator_for_set (CDI_DOMINATORS,
db->def_blocks);
- if (entry != ENTRY_BLOCK_PTR_FOR_FN (cfun))
-   EXECUTE_IF_SET_IN_BITMAP (idf, 0, i, bi)
- if (BASIC_BLOCK_FOR_FN (cfun, i) != entry
- && dominated_by_p (CDI_DOMINATORS,
-BASIC_BLOCK_FOR_FN (cfun, i), entry))
-   bitmap_set_bit (pruned_idf, i);
+ if (entry != single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)))
+   {
+ unsigned to_remove = ~0U;
+ EXECUTE_IF_SET_IN_BITMAP (pruned_idf, 0, i, bi)
+   {
+ if (to_remove != ~0U)
+   {
+ bitmap_clear_bit (pruned_idf, to_remove);
+ to_remove = ~0U;
+   }
+ if (BASIC_BLOCK_FOR_FN (cfun, i) == entry
+ || !dominated_by_p (CDI_DOMINATORS,
+ BASIC_BLOCK_FOR_FN (cfun, i), entry))
+   to_remove = i;
+   }
+ if (to_remove != ~0U)
+   bitmap_clear_bit (pruned_idf, to_remove);
+   }
}
   else
-   {
- /* Otherwise, do not prune the IDF for VAR.  */
- gcc_checking_assert (update_flags == TODO_update_ssa_full_phi);
- bitmap_copy (pruned_idf, idf);
-   }
-}
-  else
-{
-  /* Otherwise, VAR is a symbol that needs to be put into SSA form
-for the first time, so we need to compute the full IDF for
-it.  */
-  bitmap_copy (pruned_idf, idf);
+   /* Otherwise, do not prune the IDF for VAR.  */
+   gcc_checking_assert (update_flags == TODO_update_ssa_full_phi);
 }
+  /* Otherwise, VAR is a symbol that needs to be put into SSA form
+ for the first time, so we need to compute the full IDF for
+ it.  */
 
   if (!bitmap_empty_p (pruned_idf))
 {
@@ -3309,7 +3313,6 @@ insert_updated_phi_nodes_for (tree var, bitmap_head *dfs,
 }
 
   BITMAP_FREE (pruned_idf);
-  BITMAP_FREE (idf);
 }
 
 /* Sort symbols_to_rename after their DECL_UID.  */


[gcc/redhat/heads/gcc-14-branch] (31 commits) Merge commit 'r14-10184-gd54151df3ba0ee3203e0b8cb8f8fcd168a

2024-05-08 Thread Jakub Jelinek via Gcc-cvs
The branch 'redhat/heads/gcc-14-branch' was updated to point to:

 9ae733ecd3cd... Merge commit 'r14-10184-gd54151df3ba0ee3203e0b8cb8f8fcd168a

It previously pointed to:

 748fd0ecf84c... Merge commit 'r14-10154-g7a00c459cbb913ac165a39d344a48fc278

Diff:

Summary of changes (added commits):
---

  9ae733e... Merge commit 'r14-10184-gd54151df3ba0ee3203e0b8cb8f8fcd168a
  d54151d... reassoc: Fix up optimize_range_tests_to_bit_test [PR114965] (*)
  cacc480... c++/c-common: Fix convert_vector_to_array_for_subscript for (*)
  61a095b... c++/modules: Stream unmergeable temporaries by value again  (*)
  f43f346... expansion: Use __trunchfbf2 calls rather than __extendhfbf2 (*)
  aca573e... tree-inline: Remove .ASAN_MARK calls when inlining function (*)
  07dab3f... [PR modula2/113768][PR modula2/114133] bugfix constants mus (*)
  23cf010... libgomp: Add gfx90c, 1036 and 1103 declare variant tests (*)
  a1c8ae1... gimple-ssa-sprintf: Use [0, 1] range for %lc with (wint_t)  (*)
  390bd23... c++/modules: imported spec befriending class tmpl [PR114889 (*)
  c6141ad... AVR: ipa/92606 - Don't optimize PROGMEM data against non-PR (*)
  43b730b... Bump BASE-VER (*)
  cd0059a... Update ChangeLog and version files for release (*)
  4f12e06... Update gennews for GCC 14. (*)
  308a39c... Daily bump. (*)
  c7b4305... testsuite: c++: Skip g++.dg/analyzer on Solaris [PR111475] (*)
  765ddff... Daily bump. (*)
  43b7e2f... Daily bump. (*)
  532d775... Daily bump. (*)
  d811080... [PATCH] PR modula2/114929 for loop fails to iterate down to (*)
  3b4d6b6... c++: initializer_list and EH [PR114935] (*)
  db447ec... Revert "tree-optimization/114921 - _Float16 -> __bf16 isn't (*)
  d7c06a8... libstdc++: Update powerpc-linux-gnu baseline_symbols (*)
  7963194... RISC-V: Add testcase for pr114734 (*)
  5c42872... middle-end/114734 - wrong code with expand_call_mem_ref (*)
  242fbc0... cfgrtl: Fix MEM_EXPR update in duplicate_insn_chain [PR1149 (*)
  fa7e05d... tree-optimization/114921 - _Float16 -> __bf16 isn't noop (*)
  f86f197... Daily bump. (*)
  8e39d4f... Regenerate gcc.pot (*)
  590a065... Daily bump. (*)
  9ccb16d... Daily bump. (*)

(*) This commit already exists in another branch.
Because the reference `refs/vendors/redhat/heads/gcc-14-branch' matches
your hooks.email-new-commits-only configuration,
no separate email is sent for this commit.


[gcc(refs/vendors/redhat/heads/gcc-14-branch)] Merge commit 'r14-10184-gd54151df3ba0ee3203e0b8cb8f8fcd168a766c51' into redhat/gcc-14-branch

2024-05-08 Thread Jakub Jelinek via Libstdc++-cvs
https://gcc.gnu.org/g:9ae733ecd3cd76d7e2f99385520783b01ab7e5dd

commit 9ae733ecd3cd76d7e2f99385520783b01ab7e5dd
Merge: 748fd0ecf84c d54151df3ba0
Author: Jakub Jelinek 
Date:   Wed May 8 10:21:32 2024 +0200

Merge commit 'r14-10184-gd54151df3ba0ee3203e0b8cb8f8fcd168a766c51' into 
redhat/gcc-14-branch

Diff:

 ChangeLog  |4 +
 c++tools/ChangeLog |4 +
 config/ChangeLog   |4 +
 contrib/ChangeLog  |4 +
 contrib/gennews|1 +
 contrib/header-tools/ChangeLog |4 +
 contrib/reghunt/ChangeLog  |4 +
 contrib/regression/ChangeLog   |4 +
 fixincludes/ChangeLog  |4 +
 gcc/BASE-VER   |2 +-
 gcc/ChangeLog  |   53 +
 gcc/DATESTAMP  |2 +-
 gcc/DEV-PHASE  |1 -
 gcc/ada/ChangeLog  |4 +
 gcc/analyzer/ChangeLog |4 +
 gcc/c-family/ChangeLog |4 +
 gcc/c-family/c-common.cc   |7 +-
 gcc/c/ChangeLog|4 +
 gcc/cfgrtl.cc  |3 +-
 gcc/config/avr/avr.cc  |6 +
 gcc/cp/ChangeLog   |   26 +
 gcc/cp/call.cc |3 +
 gcc/cp/constexpr.cc|3 +-
 gcc/cp/cp-gimplify.cc  |   18 +-
 gcc/cp/cp-tree.h   |1 +
 gcc/cp/init.cc |2 +-
 gcc/cp/module.cc   |2 +
 gcc/cp/typeck2.cc  |   34 +-
 gcc/d/ChangeLog|4 +
 gcc/expr.cc|   12 +-
 gcc/fortran/ChangeLog  |   18 +
 gcc/gimple-ssa-sprintf.cc  |   20 +-
 gcc/go/ChangeLog   |4 +
 gcc/internal-fn.cc |6 +-
 gcc/jit/ChangeLog  |4 +
 gcc/lto/ChangeLog  |4 +
 gcc/m2/ChangeLog   |   19 +
 gcc/m2/gm2-compiler/M2Quads.mod|  191 +-
 gcc/objc/ChangeLog |4 +
 gcc/objcp/ChangeLog|4 +
 gcc/optabs-libfuncs.cc |4 +-
 gcc/po/ChangeLog   |8 +
 gcc/po/gcc.pot | 2840 ++--
 gcc/rust/ChangeLog |4 +
 gcc/testsuite/ChangeLog|   61 +
 gcc/testsuite/g++.dg/analyzer/analyzer.exp |5 +
 gcc/testsuite/g++.dg/cpp0x/initlist-eh1.C  |   25 +
 gcc/testsuite/g++.dg/modules/friend-8_a.H  |   23 +
 gcc/testsuite/g++.dg/modules/friend-8_b.C  |9 +
 gcc/testsuite/g++.dg/modules/pr114856.h|   12 +
 gcc/testsuite/g++.dg/modules/pr114856_a.H  |5 +
 gcc/testsuite/g++.dg/modules/pr114856_b.C  |5 +
 gcc/testsuite/g++.dg/torture/vector-subaccess-1.C  |   23 +
 gcc/testsuite/gcc.c-torture/execute/pr114965.c |   30 +
 gcc/testsuite/gcc.dg/asan/pr114956.c   |   26 +
 gcc/testsuite/gcc.dg/pr114876.c|   34 +
 gcc/testsuite/gcc.dg/pr114907.c|   27 +
 gcc/testsuite/gcc.dg/pr83415.c |2 +-
 .../gcc.dg/tree-ssa/builtin-sprintf-warn-1.c   |   12 +-
 gcc/testsuite/gcc.target/avr/torture/pr92606.c |   57 +
 .../gcc.target/riscv/rvv/autovec/pr114734.c|   25 +
 .../gm2/extensions/run/pass/callingc10.mod |6 +-
 .../gm2/extensions/run/pass/callingc11.mod |6 +-
 gcc/testsuite/gm2/extensions/run/pass/vararg2.mod  |6 +-
 gcc/testsuite/gm2/iso/run/pass/packed.mod  |2 +
 gcc/testsuite/gm2/pim/run/pass/testforloopzero.mod |   33 +
 .../gm2/pim/run/pass/testforloopzero2.mod  |   35 +
 .../gm2/pim/run/pass/testforloopzero3.mod  |   32 +
 .../gm2/pim/run/pass/testforloopzero4.mod  |   32 +
 gcc/tree-inline.cc |   28 +-
 gcc/tree-ssa-reassoc.cc|3 +-
 gnattools/ChangeLog|4 +
 gotools/ChangeLog  |4 +
 include/ChangeLog  |4 +
 libada/ChangeLog   |4 +
 libatomic/ChangeLog|4 +
 libbacktra

[gcc r14-10184] reassoc: Fix up optimize_range_tests_to_bit_test [PR114965]

2024-05-08 Thread Jakub Jelinek via Gcc-cvs
https://gcc.gnu.org/g:d54151df3ba0ee3203e0b8cb8f8fcd168a766c51

commit r14-10184-gd54151df3ba0ee3203e0b8cb8f8fcd168a766c51
Author: Jakub Jelinek 
Date:   Wed May 8 10:17:32 2024 +0200

reassoc: Fix up optimize_range_tests_to_bit_test [PR114965]

The optimize_range_tests_to_bit_test optimization normally emits a range
test first:
  if (entry_test_needed)
{
  tem = build_range_check (loc, optype, unshare_expr (exp),
   false, lowi, high);
  if (tem == NULL_TREE || is_gimple_val (tem))
continue;
}
so during the bit test we already know that exp is in the [lowi, high]
range, but skips it if we have range info which tells us this isn't
necessary.
Also, normally it emits shifts by exp - lowi counter, but has an
optimization to use just exp counter if the mask isn't a more expensive
constant in that case and lowi is > 0 and high is smaller than prec.

The following testcase is miscompiled because the two abnormal cases
are triggered.  The range of exp is [43, 43][48, 48][95, 95], so we on
64-bit arch decide we don't need the entry test, because 95 - 43 < 64.
And we also decide to use just exp as counter, because the range test
tests just for exp == 43 || exp == 48, so high is smaller than 64 too.
Because 95 is in the exp range, we can't do that, we'd either need to
do a range test first, i.e.
if (exp - 43U <= 48U - 43U) if ((1UL << exp) & mask1))
or need to subtract lowi from the shift counter, i.e.
if ((1UL << (exp - 43)) & mask2)
but can't do both unless r.upper_bound () is < prec.

The following patch ensures that.

2024-05-08  Jakub Jelinek  

PR tree-optimization/114965
* tree-ssa-reassoc.cc (optimize_range_tests_to_bit_test): Don't try 
to
optimize away exp - lowi subtraction from shift count unless entry
test is emitted or unless r.upper_bound () is smaller than prec.

* gcc.c-torture/execute/pr114965.c: New test.

(cherry picked from commit 9adec2d91e62a479474ae79df5b455fd4b8463ba)

Diff:
---
 gcc/testsuite/gcc.c-torture/execute/pr114965.c | 30 ++
 gcc/tree-ssa-reassoc.cc|  3 ++-
 2 files changed, 32 insertions(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.c-torture/execute/pr114965.c 
b/gcc/testsuite/gcc.c-torture/execute/pr114965.c
new file mode 100644
index ..89d68e187015
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/execute/pr114965.c
@@ -0,0 +1,30 @@
+/* PR tree-optimization/114965 */
+
+static void
+foo (const char *x)
+{
+
+  char a = '0';
+  while (1)
+{
+  switch (*x)
+   {
+   case '_':
+   case '+':
+ a = *x;
+ x++;
+ continue;
+   default:
+ break;
+   }
+  break;
+}
+  if (a == '0' || a == '+')
+__builtin_abort ();
+}
+
+int
+main ()
+{
+  foo ("_");
+}
diff --git a/gcc/tree-ssa-reassoc.cc b/gcc/tree-ssa-reassoc.cc
index 61f54f07b577..556ecdebe2d7 100644
--- a/gcc/tree-ssa-reassoc.cc
+++ b/gcc/tree-ssa-reassoc.cc
@@ -3418,7 +3418,8 @@ optimize_range_tests_to_bit_test (enum tree_code opcode, 
int first, int length,
 We can avoid then subtraction of the minimum value, but the
 mask constant could be perhaps more expensive.  */
  if (compare_tree_int (lowi, 0) > 0
- && compare_tree_int (high, prec) < 0)
+ && compare_tree_int (high, prec) < 0
+ && (entry_test_needed || wi::ltu_p (r.upper_bound (), prec)))
{
  int cost_diff;
  HOST_WIDE_INT m = tree_to_uhwi (lowi);


[gcc r15-315] reassoc: Fix up optimize_range_tests_to_bit_test [PR114965]

2024-05-08 Thread Jakub Jelinek via Gcc-cvs
https://gcc.gnu.org/g:9adec2d91e62a479474ae79df5b455fd4b8463ba

commit r15-315-g9adec2d91e62a479474ae79df5b455fd4b8463ba
Author: Jakub Jelinek 
Date:   Wed May 8 10:17:32 2024 +0200

reassoc: Fix up optimize_range_tests_to_bit_test [PR114965]

The optimize_range_tests_to_bit_test optimization normally emits a range
test first:
  if (entry_test_needed)
{
  tem = build_range_check (loc, optype, unshare_expr (exp),
   false, lowi, high);
  if (tem == NULL_TREE || is_gimple_val (tem))
continue;
}
so during the bit test we already know that exp is in the [lowi, high]
range, but skips it if we have range info which tells us this isn't
necessary.
Also, normally it emits shifts by exp - lowi counter, but has an
optimization to use just exp counter if the mask isn't a more expensive
constant in that case and lowi is > 0 and high is smaller than prec.

The following testcase is miscompiled because the two abnormal cases
are triggered.  The range of exp is [43, 43][48, 48][95, 95], so we on
64-bit arch decide we don't need the entry test, because 95 - 43 < 64.
And we also decide to use just exp as counter, because the range test
tests just for exp == 43 || exp == 48, so high is smaller than 64 too.
Because 95 is in the exp range, we can't do that, we'd either need to
do a range test first, i.e.
if (exp - 43U <= 48U - 43U) if ((1UL << exp) & mask1))
or need to subtract lowi from the shift counter, i.e.
if ((1UL << (exp - 43)) & mask2)
but can't do both unless r.upper_bound () is < prec.

The following patch ensures that.

2024-05-08  Jakub Jelinek  

PR tree-optimization/114965
* tree-ssa-reassoc.cc (optimize_range_tests_to_bit_test): Don't try 
to
optimize away exp - lowi subtraction from shift count unless entry
test is emitted or unless r.upper_bound () is smaller than prec.

* gcc.c-torture/execute/pr114965.c: New test.

Diff:
---
 gcc/testsuite/gcc.c-torture/execute/pr114965.c | 30 ++
 gcc/tree-ssa-reassoc.cc|  3 ++-
 2 files changed, 32 insertions(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.c-torture/execute/pr114965.c 
b/gcc/testsuite/gcc.c-torture/execute/pr114965.c
new file mode 100644
index ..89d68e187015
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/execute/pr114965.c
@@ -0,0 +1,30 @@
+/* PR tree-optimization/114965 */
+
+static void
+foo (const char *x)
+{
+
+  char a = '0';
+  while (1)
+{
+  switch (*x)
+   {
+   case '_':
+   case '+':
+ a = *x;
+ x++;
+ continue;
+   default:
+ break;
+   }
+  break;
+}
+  if (a == '0' || a == '+')
+__builtin_abort ();
+}
+
+int
+main ()
+{
+  foo ("_");
+}
diff --git a/gcc/tree-ssa-reassoc.cc b/gcc/tree-ssa-reassoc.cc
index 61f54f07b577..556ecdebe2d7 100644
--- a/gcc/tree-ssa-reassoc.cc
+++ b/gcc/tree-ssa-reassoc.cc
@@ -3418,7 +3418,8 @@ optimize_range_tests_to_bit_test (enum tree_code opcode, 
int first, int length,
 We can avoid then subtraction of the minimum value, but the
 mask constant could be perhaps more expensive.  */
  if (compare_tree_int (lowi, 0) > 0
- && compare_tree_int (high, prec) < 0)
+ && compare_tree_int (high, prec) < 0
+ && (entry_test_needed || wi::ltu_p (r.upper_bound (), prec)))
{
  int cost_diff;
  HOST_WIDE_INT m = tree_to_uhwi (lowi);


[gcc r13-8717] Fortran: Generate new charlens for shared symbol typespecs [PR89462]

2024-05-08 Thread Paul Thomas via Gcc-cvs
https://gcc.gnu.org/g:ff78ffe8f912bc8d2d355f22d32e1ddb9a1169aa

commit r13-8717-gff78ffe8f912bc8d2d355f22d32e1ddb9a1169aa
Author: Paul Thomas 
Date:   Thu Apr 25 06:52:31 2024 +0100

Fortran: Generate new charlens for shared symbol typespecs [PR89462]

2024-04-25  Paul Thomas  
Jakub Jelinek  

gcc/fortran
PR fortran/89462
* decl.cc (build_sym): Add an extra argument 'elem'. If 'elem'
is greater than 1, gfc_new_charlen is called to generate a new
charlen, registered in the symbol namespace.
(variable_decl, enumerator_decl): Set the new argument in the
calls to build_sym.

gcc/testsuite/
PR fortran/89462
* gfortran.dg/pr89462.f90: New test.

(cherry picked from commit 1fd5a07444776d76cdd6a2eee7df0478201197a5)

Diff:
---
 gcc/fortran/decl.cc   | 11 +++
 gcc/testsuite/gfortran.dg/pr89462.f90 | 13 +
 2 files changed, 20 insertions(+), 4 deletions(-)

diff --git a/gcc/fortran/decl.cc b/gcc/fortran/decl.cc
index 527e84ad7637..19321685e550 100644
--- a/gcc/fortran/decl.cc
+++ b/gcc/fortran/decl.cc
@@ -1708,7 +1708,7 @@ gfc_verify_c_interop_param (gfc_symbol *sym)
 /* Function called by variable_decl() that adds a name to the symbol table.  */
 
 static bool
-build_sym (const char *name, gfc_charlen *cl, bool cl_deferred,
+build_sym (const char *name, int elem, gfc_charlen *cl, bool cl_deferred,
   gfc_array_spec **as, locus *var_locus)
 {
   symbol_attribute attr;
@@ -1773,7 +1773,10 @@ build_sym (const char *name, gfc_charlen *cl, bool 
cl_deferred,
 
   if (sym->ts.type == BT_CHARACTER)
 {
-  sym->ts.u.cl = cl;
+  if (elem > 1)
+   sym->ts.u.cl = gfc_new_charlen (sym->ns, cl);
+  else
+   sym->ts.u.cl = cl;
   sym->ts.deferred = cl_deferred;
 }
 
@@ -2955,7 +2958,7 @@ variable_decl (int elem)
  create a symbol for those yet.  If we fail to create the symbol,
  bail out.  */
   if (!gfc_comp_struct (gfc_current_state ())
-  && !build_sym (name, cl, cl_deferred, &as, &var_locus))
+  && !build_sym (name, elem, cl, cl_deferred, &as, &var_locus))
 {
   m = MATCH_ERROR;
   goto cleanup;
@@ -10903,7 +10906,7 @@ enumerator_decl (void)
   /* OK, we've successfully matched the declaration.  Now put the
  symbol in the current namespace. If we fail to create the symbol,
  bail out.  */
-  if (!build_sym (name, NULL, false, &as, &var_locus))
+  if (!build_sym (name, 1, NULL, false, &as, &var_locus))
 {
   m = MATCH_ERROR;
   goto cleanup;
diff --git a/gcc/testsuite/gfortran.dg/pr89462.f90 
b/gcc/testsuite/gfortran.dg/pr89462.f90
new file mode 100644
index ..b2a4912fcc85
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/pr89462.f90
@@ -0,0 +1,13 @@
+! { dg-do compile }
+! { dg-options "-pedantic-errors" }
+! Test the fix for PR89462 in which the shared 'cl' field of the typespec
+! shared between 'test', 'TR' and 'aTP' caused the compiler to go into an
+! infinite loop.
+! Contributed by Sergei Trofimovich  
+  CHARACTER*1 FUNCTION test(H) ! { dg-warning "Old-style character length" }
+ CHARACTER*1 test2,TR,aTP  ! { dg-warning "Old-style character length" }
+ ENTRY test2(L)
+ CALL ttest3(aTP)
+ test = TR
+ RETURN
+  END


[gcc r13-8716] Fortran: Fix ICE in gfc_trans_create_temp_array from bad type [PR93678]

2024-05-08 Thread Paul Thomas via Gcc-cvs
https://gcc.gnu.org/g:80bb0bda4a96da7e690cb4df572fcb9604f511f8

commit r13-8716-g80bb0bda4a96da7e690cb4df572fcb9604f511f8
Author: Paul Thomas 
Date:   Thu Apr 25 06:56:10 2024 +0100

Fortran: Fix ICE in gfc_trans_create_temp_array from bad type [PR93678]

2024-04-25  Paul Thomas  

gcc/fortran
PR fortran/93678
* trans-expr.cc (gfc_conv_procedure_call): Use the interface,
where possible, to obtain the type of character procedure
pointers of class entities.

gcc/testsuite/
PR fortran/93678
* gfortran.dg/pr93678.f90: New test.

(cherry picked from commit c058105bc47a0701e157d1028e60f48554561f9f)

Diff:
---
 gcc/fortran/trans-expr.cc | 10 --
 gcc/testsuite/gfortran.dg/pr93678.f90 | 32 
 2 files changed, 40 insertions(+), 2 deletions(-)

diff --git a/gcc/fortran/trans-expr.cc b/gcc/fortran/trans-expr.cc
index 5c5fabf5f5ae..cfe03252582c 100644
--- a/gcc/fortran/trans-expr.cc
+++ b/gcc/fortran/trans-expr.cc
@@ -7626,8 +7626,14 @@ gfc_conv_procedure_call (gfc_se * se, gfc_symbol * sym,
{
  gcc_assert (se->loop && info);
 
- /* Set the type of the array.  */
- tmp = gfc_typenode_for_spec (&comp->ts);
+ /* Set the type of the array. vtable charlens are not always reliable.
+Use the interface, if possible.  */
+ if (comp->ts.type == BT_CHARACTER
+ && expr->symtree->n.sym->ts.type == BT_CLASS
+ && comp->ts.interface && comp->ts.interface->result)
+   tmp = gfc_typenode_for_spec (&comp->ts.interface->result->ts);
+ else
+   tmp = gfc_typenode_for_spec (&comp->ts);
  gcc_assert (se->ss->dimen == se->loop->dimen);
 
  /* Evaluate the bounds of the result, if known.  */
diff --git a/gcc/testsuite/gfortran.dg/pr93678.f90 
b/gcc/testsuite/gfortran.dg/pr93678.f90
new file mode 100644
index ..403bedd0c4fd
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/pr93678.f90
@@ -0,0 +1,32 @@
+! { dg-do compile }
+! Test the fix for PR93678 in which the charlen for the 'unpackbytes'
+! vtable field was incomplete and caused the ICE as indicated.
+! Contributed by Luis Kornblueh  
+!
+! The testcase was reduced by various gfortran regulars.
+module mo_a
+  implicit none
+  type t_b
+integer :: i
+  contains
+procedure :: unpackbytes => b_unpackbytes
+  end type t_b
+contains
+  function b_unpackbytes (me) result (res)
+class(t_b), intent(inout) :: me
+character :: res(1)
+res = char (me%i)
+  end function b_unpackbytes
+  subroutine b_unpackint (me, c)
+class(t_b), intent(inout) :: me
+character, intent(in) :: c
+!   print *, b_unpackbytes (me) ! ok
+if (any (me% unpackbytes () .ne. c)) stop 1 ! ICEd here
+  end subroutine b_unpackint
+end module mo_a
+
+  use mo_a
+  class(t_b), allocatable :: z
+  allocate (z, source = t_b(97))
+  call b_unpackint (z, "a")
+end


[gcc r15-314] Minor tweaks to code computing modular multiplicative inverse

2024-05-08 Thread Eric Botcazou via Gcc-cvs
https://gcc.gnu.org/g:10e34aa5b1d23e1517f0ca5cfae3cac3b51a7a53

commit r15-314-g10e34aa5b1d23e1517f0ca5cfae3cac3b51a7a53
Author: Eric Botcazou 
Date:   Mon Apr 29 17:46:20 2024 +0200

Minor tweaks to code computing modular multiplicative inverse

This removes the last parameter of choose_multiplier, which is unused, adds
another assertion and more details to the description and various comments.
Likewise to the closely related invert_mod2n, except for the last parameter.

[changelog]
* expmed.h (choose_multiplier): Tweak description and remove last
parameter.
* expmed.cc (choose_multiplier): Likewise.  Add assertion for the
third parameter and adds details to various comments.
(invert_mod2n): Tweak description and add assertion for the first
parameter.
(expand_divmod): Adjust calls to choose_multiplier.
* tree-vect-generic.cc (expand_vector_divmod): Likewise.
* tree-vect-patterns.cc (vect_recog_divmod_pattern): Likewise.

Diff:
---
 gcc/expmed.cc | 95 ++-
 gcc/expmed.h  |  9 +++--
 gcc/tree-vect-generic.cc  | 13 +++
 gcc/tree-vect-patterns.cc | 14 +++
 4 files changed, 71 insertions(+), 60 deletions(-)

diff --git a/gcc/expmed.cc b/gcc/expmed.cc
index 20f3a36f38cc..248940fe4147 100644
--- a/gcc/expmed.cc
+++ b/gcc/expmed.cc
@@ -3695,50 +3695,62 @@ expand_widening_mult (machine_mode mode, rtx op0, rtx 
op1, rtx target,
   unsignedp, OPTAB_LIB_WIDEN);
 }
 
-/* Choose a minimal N + 1 bit approximation to 1/D that can be used to
-   replace division by D, and put the least significant N bits of the result
-   in *MULTIPLIER_PTR and return the most significant bit.
+/* Choose a minimal N + 1 bit approximation to 2**K / D that can be used to
+   replace division by D, put the least significant N bits of the result in
+   *MULTIPLIER_PTR, the value K - N in *POST_SHIFT_PTR, and return the most
+   significant bit.
 
The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
-   needed precision is in PRECISION (should be <= N).
+   needed precision is PRECISION (should be <= N).
 
-   PRECISION should be as small as possible so this function can choose
-   multiplier more freely.
+   PRECISION should be as small as possible so this function can choose the
+   multiplier more freely.  If PRECISION is <= N - 1, the most significant
+   bit returned by the function will be zero.
 
-   The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
-   is to be used for a final right shift is placed in *POST_SHIFT_PTR.
-
-   Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
-   where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
+   Using this function, x / D is equal to (x*m) / 2**N >> (*POST_SHIFT_PTR),
+   where m is the full N + 1 bit multiplier.  */
 
 unsigned HOST_WIDE_INT
 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
   unsigned HOST_WIDE_INT *multiplier_ptr,
-  int *post_shift_ptr, int *lgup_ptr)
+  int *post_shift_ptr)
 {
   int lgup, post_shift;
-  int pow, pow2;
+  int pow1, pow2;
 
-  /* lgup = ceil(log2(divisor)); */
+  /* lgup = ceil(log2(d)) */
+  /* Assuming d > 1, we have d >= 2^(lgup-1) + 1 */
   lgup = ceil_log2 (d);
 
   gcc_assert (lgup <= n);
+  gcc_assert (lgup <= precision);
 
-  pow = n + lgup;
+  pow1 = n + lgup;
   pow2 = n + lgup - precision;
 
-  /* mlow = 2^(N + lgup)/d */
-  wide_int val = wi::set_bit_in_zero (pow, HOST_BITS_PER_DOUBLE_INT);
+  /* mlow = 2^(n + lgup)/d */
+  /* Trivially from above we have mlow < 2^(n+1) */
+  wide_int val = wi::set_bit_in_zero (pow1, HOST_BITS_PER_DOUBLE_INT);
   wide_int mlow = wi::udiv_trunc (val, d);
 
-  /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
+  /* mhigh = (2^(n + lgup) + 2^(n + lgup - precision))/d */
+  /* From above we have mhigh < 2^(n+1) assuming lgup <= precision */
+  /* From precision <= n, the difference between the numerators of mhigh and
+ mlow is >= 2^lgup >= d.  Therefore the difference of the quotients in
+ the Euclidean division by d is at least 1, so we have mlow < mhigh and
+ the exact value of 2^(n + lgup)/d lies in the interval [mlow; mhigh).  */
   val |= wi::set_bit_in_zero (pow2, HOST_BITS_PER_DOUBLE_INT);
   wide_int mhigh = wi::udiv_trunc (val, d);
 
-  /* If precision == N, then mlow, mhigh exceed 2^N
- (but they do not exceed 2^(N+1)).  */
-
   /* Reduce to lowest terms.  */
+  /* If precision <= n - 1, then the difference between the numerators of
+ mhigh and mlow is >= 2^(lgup + 1) >= 2 * 2^lgup >= 2 * d.  Therefore
+ the difference of the quotients in the Euclidean division by d is at
+ least 2, which means that mhigh and mlow differ by at least one bit
+ not in the last place.  The conclusion is that the fir

[gcc r13-8715] Fortran: Fix ICE in gfc_trans_pointer_assignment [PR113956]

2024-05-08 Thread Paul Thomas via Gcc-cvs
https://gcc.gnu.org/g:102d52967bde164d6b99037465688b62d57ae560

commit r13-8715-g102d52967bde164d6b99037465688b62d57ae560
Author: Paul Thomas 
Date:   Tue Apr 9 15:23:46 2024 +0100

Fortran: Fix ICE in gfc_trans_pointer_assignment [PR113956]

2024-04-09  Paul Thomas  

gcc/fortran
PR fortran/113956
* trans-expr.cc (gfc_trans_pointer_assignment): Remove assert
causing the ICE since it was unnecesary.

gcc/testsuite/
PR fortran/113956
* gfortran.dg/pr113956.f90: New test.

(cherry picked from commit 88aea122a7ee639230bf17a9eda4bf8a5eb7e282)

Diff:
---
 gcc/fortran/trans-expr.cc  |  9 +++--
 gcc/testsuite/gfortran.dg/pr113956.f90 | 21 +
 2 files changed, 24 insertions(+), 6 deletions(-)

diff --git a/gcc/fortran/trans-expr.cc b/gcc/fortran/trans-expr.cc
index c7ec591e279d..5c5fabf5f5ae 100644
--- a/gcc/fortran/trans-expr.cc
+++ b/gcc/fortran/trans-expr.cc
@@ -10257,12 +10257,9 @@ gfc_trans_pointer_assignment (gfc_expr * expr1, 
gfc_expr * expr2)
{
  gfc_symbol *psym = expr1->symtree->n.sym;
  tmp = NULL_TREE;
- if (psym->ts.type == BT_CHARACTER)
-   {
- gcc_assert (psym->ts.u.cl->backend_decl
- && VAR_P (psym->ts.u.cl->backend_decl));
- tmp = psym->ts.u.cl->backend_decl;
-   }
+ if (psym->ts.type == BT_CHARACTER
+ && psym->ts.u.cl->backend_decl)
+   tmp = psym->ts.u.cl->backend_decl;
  else if (expr1->ts.u.cl->backend_decl
   && VAR_P (expr1->ts.u.cl->backend_decl))
tmp = expr1->ts.u.cl->backend_decl;
diff --git a/gcc/testsuite/gfortran.dg/pr113956.f90 
b/gcc/testsuite/gfortran.dg/pr113956.f90
new file mode 100644
index ..229e891f847b
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/pr113956.f90
@@ -0,0 +1,21 @@
+! { dg-do run }
+! Test the fix for PR113956
+! Contributed by David Binderman  
+module m
+contains
+  subroutine test_array_char(p, x)
+character(*), target  :: x(:)
+character(:), pointer :: p(:)
+p => x   ! ICE
+  end subroutine
+end module
+
+  use m
+  character(:), allocatable, target :: chr(:)
+  character(:), pointer :: p(:)
+  chr = ["ab","cd"]
+  call test_array_char (p, chr)
+  if (loc (chr) .ne. loc (p)) stop 1
+  if (len (p) .ne. 2) stop 2
+  if (any (p .ne. chr)) stop 3
+end


[gcc r13-8714] Fortran: Fix ICE in trans-stmt.cc(gfc_trans_call) [PR114535]

2024-05-08 Thread Paul Thomas via Gcc-cvs
https://gcc.gnu.org/g:9fd6f7e912415f426382099d6aa182fd2b8ebb82

commit r13-8714-g9fd6f7e912415f426382099d6aa182fd2b8ebb82
Author: Paul Thomas 
Date:   Tue Apr 9 15:27:28 2024 +0100

Fortran: Fix ICE in trans-stmt.cc(gfc_trans_call) [PR114535]

2024-04-09  Paul Thomas  

gcc/fortran
PR fortran/114535
* resolve.cc (resolve_symbol): Remove last chunk that checked
for finalization of unreferenced symbols.

gcc/testsuite/
PR fortran/114535
* gfortran.dg/pr114535d.f90: New test.
* gfortran.dg/pr114535iv.f90: Additional source.

(cherry picked from commit de82b0cf981e49a0bda957c0ac31146b17407e23)

Diff:
---
 gcc/fortran/resolve.cc   |  9 ---
 gcc/testsuite/gfortran.dg/pr114535d.f90  | 42 
 gcc/testsuite/gfortran.dg/pr114535iv.f90 | 18 ++
 3 files changed, 60 insertions(+), 9 deletions(-)

diff --git a/gcc/fortran/resolve.cc b/gcc/fortran/resolve.cc
index 388209d28329..453dd90b5fbc 100644
--- a/gcc/fortran/resolve.cc
+++ b/gcc/fortran/resolve.cc
@@ -16700,15 +16700,6 @@ resolve_symbol (gfc_symbol *sym)
 
   if (sym->param_list)
 resolve_pdt (sym);
-
-  if (!sym->attr.referenced
-  && (sym->ts.type == BT_CLASS || sym->ts.type == BT_DERIVED))
-{
-  gfc_expr *final_expr = gfc_lval_expr_from_sym (sym);
-  if (gfc_is_finalizable (final_expr->ts.u.derived, NULL))
-   gfc_set_sym_referenced (sym);
-  gfc_free_expr (final_expr);
-}
 }
 
 
diff --git a/gcc/testsuite/gfortran.dg/pr114535d.f90 
b/gcc/testsuite/gfortran.dg/pr114535d.f90
new file mode 100644
index ..7ce178a1e303
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/pr114535d.f90
@@ -0,0 +1,42 @@
+! { dg-do compile }
+! { dg-compile-aux-modules "pr114535iv.f90" }
+! Contributed by Andrew Benson  
+!
+module d
+  implicit none
+contains
+  function en() result(dd)
+use :: iv
+implicit none
+type(vs) :: dd
+dd%i = 1
+  end function en
+end module d
+
+! Delete line 1 and all brands complain that 'vs' is an undefined type.
+! Delete lines 1 and line 2 recreates the original problem.
+module ni
+  implicit none
+contains
+  subroutine iss1()
+!use :: iv! line 1
+use :: d
+implicit none
+!type(vs) :: ans; ans = en(); ! line 2
+  end subroutine iss1
+  subroutine iss2()
+use :: d
+implicit none
+  end subroutine iss2
+end module ni ! Used to give an ICE: in gfc_trans_call, at 
fortran/trans-stmt.cc:400
+
+  use ni
+  use iv
+  type(vs) :: x
+  call iss1()
+  call iss1()
+  if ((ctr .eq. 0) .or. (ctr .ne. 6)) stop 1  ! Depends whether lines 1 & 2 
are present
+  call iss2()
+  x = vs(42)
+  if ((ctr .eq. 1) .or. (ctr .ne. 7)) stop 2  ! Make sure destructor available 
here
+end
diff --git a/gcc/testsuite/gfortran.dg/pr114535iv.f90 
b/gcc/testsuite/gfortran.dg/pr114535iv.f90
new file mode 100644
index ..be629991023e
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/pr114535iv.f90
@@ -0,0 +1,18 @@
+! Compiled with pr114535d.f90
+! Contributed by Andrew Benson  
+!
+module iv
+  type, public :: vs
+ integer :: i
+   contains
+ final :: destructor
+  end type vs
+  integer :: ctr = 0
+contains
+  impure elemental subroutine destructor(s)
+type(vs), intent(inout) :: s
+s%i = 0
+ctr = ctr + 1
+  end subroutine destructor
+end module iv
+


[gcc r15-313] x86: Fix cmov cost model issue [PR109549]

2024-05-08 Thread Kong Lingling via Gcc-cvs
https://gcc.gnu.org/g:d826f7945609046f922732b138fb90795d5b1985

commit r15-313-gd826f7945609046f922732b138fb90795d5b1985
Author: konglin1 
Date:   Wed May 8 15:46:10 2024 +0800

x86: Fix cmov cost model issue [PR109549]

(if_then_else:SI (eq (reg:CCZ 17 flags)
(const_int 0 [0]))
(reg/v:SI 101 [ e ])
(reg:SI 102))
The cost is 8 for the rtx, the cost for
(eq (reg:CCZ 17 flags) (const_int 0 [0])) is 4,
but this is just an operator do not need to compute it's cost in cmov.

gcc/ChangeLog:

PR target/109549
* config/i386/i386.cc (ix86_rtx_costs): The XEXP (x, 0) for cmov
is an operator do not need to compute cost.

gcc/testsuite/ChangeLog:

* gcc.target/i386/cmov6.c: Fixed.

Diff:
---
 gcc/config/i386/i386.cc   | 2 +-
 gcc/testsuite/gcc.target/i386/cmov6.c | 5 +
 2 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index e58335adc8f0..c2df4ab91ee9 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -22237,7 +22237,7 @@ ix86_rtx_costs (rtx x, machine_mode mode, int 
outer_code_i, int opno,
{
  /* cmov.  */
  *total = COSTS_N_INSNS (1);
- if (!REG_P (XEXP (x, 0)))
+ if (!COMPARISON_P (XEXP (x, 0)) && !REG_P (XEXP (x, 0)))
*total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
  if (!REG_P (XEXP (x, 1)))
*total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
diff --git a/gcc/testsuite/gcc.target/i386/cmov6.c 
b/gcc/testsuite/gcc.target/i386/cmov6.c
index 5111c8a90995..535326e4c2a3 100644
--- a/gcc/testsuite/gcc.target/i386/cmov6.c
+++ b/gcc/testsuite/gcc.target/i386/cmov6.c
@@ -1,9 +1,6 @@
 /* { dg-do compile } */
 /* { dg-options "-O2 -march=k8" } */
-/* if-converting this sequence would require two cmov
-   instructions and seems to always cost more independent
-   of the TUNE_ONE_IF_CONV setting.  */
-/* { dg-final { scan-assembler-not "cmov\[^6\]" } } */
+/* { dg-final { scan-assembler "cmov\[^6\]" } } */
 
 /* Verify that blocks are converted to conditional moves.  */
 extern int bar (int, int);