[gcc r14-9350] match.pd: Optimize a * !a to 0 [PR114009]

2024-03-06 Thread Jakub Jelinek via Gcc-cvs
https://gcc.gnu.org/g:95b6ee96348041eaee9133f082b57f3e57ef0b11

commit r14-9350-g95b6ee96348041eaee9133f082b57f3e57ef0b11
Author: Jakub Jelinek 
Date:   Thu Mar 7 08:43:16 2024 +0100

match.pd: Optimize a * !a to 0 [PR114009]

The following patch attempts to fix an optimization regression through
adding a simple simplification.  We already have the
/* (m1 CMP m2) * d -> (m1 CMP m2) ? d : 0  */
(if (!canonicalize_math_p ())
 (for cmp (tcc_comparison)
  (simplify
   (mult:c (convert (cmp@0 @1 @2)) @3)
   (if (INTEGRAL_TYPE_P (type)
&& INTEGRAL_TYPE_P (TREE_TYPE (@0)))
 (cond @0 @3 { build_zero_cst (type); })))
optimization which otherwise triggers during the a * !a multiplication,
but that is done only late and we aren't able through range assumptions
optimize it yet anyway.

The patch adds a specific simplification for it.
If a is zero, then a * !a will be 0 * 1 (or for signed 1-bit 0 * -1)
and so 0.
If a is non-zero, then a * !a will be a * 0 and so again 0.
THe pattern is valid for scalar integers, complex integers and vector types,
but I think will actually trigger only for the scalar integers.  For
vector types I've added other two with VEC_COND_EXPR in it, for complex
there are different GENERIC trees to match and it is something that likely
would be never matched in GIMPLE, so I didn't handle that.

2024-03-07  Jakub Jelinek  

PR tree-optimization/114009
* genmatch.cc (decision_tree::gen): Emit ARG_UNUSED for captures
argument even for GENERIC, not just for GIMPLE.
* match.pd (a * !a -> 0): New simplifications.

* gcc.dg/tree-ssa/pr114009.c: New test.

Diff:
---
 gcc/genmatch.cc  |  2 +-
 gcc/match.pd | 11 +++
 gcc/testsuite/gcc.dg/tree-ssa/pr114009.c | 33 
 3 files changed, 45 insertions(+), 1 deletion(-)

diff --git a/gcc/genmatch.cc b/gcc/genmatch.cc
index 61c4c8c0294..c982c95b70f 100644
--- a/gcc/genmatch.cc
+++ b/gcc/genmatch.cc
@@ -4071,7 +4071,7 @@ decision_tree::gen (vec  , bool gimple)
  for (unsigned i = 0;
   i < as_a (s->s->s->match)->ops.length (); ++i)
fp_decl (f, " tree ARG_UNUSED (_p%d),", i);
- fp_decl (f, " tree *captures");
+ fp_decl (f, " tree *ARG_UNUSED (captures)");
}
   for (unsigned i = 0; i < s->s->s->for_subst_vec.length (); ++i)
{
diff --git a/gcc/match.pd b/gcc/match.pd
index 4edba7c84fb..9ce313323a3 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -1219,6 +1219,17 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
&& tree_nop_conversion_p (type, TREE_TYPE (@1)))
(lshift @0 @2)))
 
+/* Fold a * !a into 0.  */
+(simplify
+ (mult:c @0 (convert? (eq @0 integer_zerop)))
+  { build_zero_cst (type); })
+(simplify
+ (mult:c @0 (vec_cond (eq @0 integer_zerop) @1 integer_zerop))
+  { build_zero_cst (type); })
+(simplify
+ (mult:c @0 (vec_cond (ne @0 integer_zerop) integer_zerop @1))
+  { build_zero_cst (type); })
+
 /* Shifts by precision or greater result in zero.  */
 (for shift (lshift rshift)
  (simplify
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr114009.c 
b/gcc/testsuite/gcc.dg/tree-ssa/pr114009.c
new file mode 100644
index 000..3b0486e16ad
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr114009.c
@@ -0,0 +1,33 @@
+/* PR tree-optimization/114009 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -Wno-psabi -fdump-tree-forwprop1" } */
+/* { dg-final { scan-tree-dump-times "  return 0;" 3 "forwprop1" } } */
+/* { dg-final { scan-tree-dump-times "  (?:return| =) { 0, 0, 0, 0 };" 
1 "forwprop1" } } */
+
+int
+foo (int x)
+{
+  x = (x / 2) * 2;
+  return (!x) * x;
+}
+
+int
+bar (int x, int y)
+{
+  (void) x;
+  return y * !y;
+}
+
+unsigned long long
+baz (unsigned long long x)
+{
+  return (!x) * x;
+}
+
+typedef int V __attribute__((vector_size (4 * sizeof (int;
+
+V
+qux (V x)
+{
+  return x * (x == 0);
+}


[gcc r14-9349] RISC-V: Refactor expand_vec_cmp [NFC]

2024-03-06 Thread Demin Han via Gcc-cvs
https://gcc.gnu.org/g:1cd8254ebad7b73993d2acee80a7caf37c21878a

commit r14-9349-g1cd8254ebad7b73993d2acee80a7caf37c21878a
Author: demin.han 
Date:   Mon Feb 26 14:50:15 2024 +0800

RISC-V: Refactor expand_vec_cmp [NFC]

There are two expand_vec_cmp functions.
They have same structure and similar code.
We can use default arguments instead of overloading.

Tested on RV32 and RV64.

gcc/ChangeLog:

* config/riscv/riscv-protos.h (expand_vec_cmp): Change proto
* config/riscv/riscv-v.cc (expand_vec_cmp): Use default arguments
(expand_vec_cmp_float): Adapt arguments

Signed-off-by: demin.han 

Diff:
---
 gcc/config/riscv/riscv-protos.h |  2 +-
 gcc/config/riscv/riscv-v.cc | 44 +
 2 files changed, 15 insertions(+), 31 deletions(-)

diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 80efdf2b7e5..b8735593805 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -603,7 +603,7 @@ bool simm5_p (rtx);
 bool neg_simm5_p (rtx);
 #ifdef RTX_CODE
 bool has_vi_variant_p (rtx_code, rtx);
-void expand_vec_cmp (rtx, rtx_code, rtx, rtx);
+void expand_vec_cmp (rtx, rtx_code, rtx, rtx, rtx = nullptr, rtx = nullptr);
 bool expand_vec_cmp_float (rtx, rtx_code, rtx, rtx, bool);
 void expand_cond_len_unop (unsigned, rtx *);
 void expand_cond_len_binop (unsigned, rtx *);
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 2d32db06dd1..967f4e38287 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -2775,7 +2775,8 @@ vectorize_related_mode (machine_mode vector_mode, 
scalar_mode element_mode,
 /* Expand an RVV comparison.  */
 
 void
-expand_vec_cmp (rtx target, rtx_code code, rtx op0, rtx op1)
+expand_vec_cmp (rtx target, rtx_code code, rtx op0, rtx op1, rtx mask,
+   rtx maskoff)
 {
   machine_mode mask_mode = GET_MODE (target);
   machine_mode data_mode = GET_MODE (op0);
@@ -2785,8 +2786,8 @@ expand_vec_cmp (rtx target, rtx_code code, rtx op0, rtx 
op1)
 {
   rtx lt = gen_reg_rtx (mask_mode);
   rtx gt = gen_reg_rtx (mask_mode);
-  expand_vec_cmp (lt, LT, op0, op1);
-  expand_vec_cmp (gt, GT, op0, op1);
+  expand_vec_cmp (lt, LT, op0, op1, mask, maskoff);
+  expand_vec_cmp (gt, GT, op0, op1, mask, maskoff);
   icode = code_for_pred (IOR, mask_mode);
   rtx ops[] = {target, lt, gt};
   emit_vlmax_insn (icode, BINARY_MASK_OP, ops);
@@ -2794,33 +2795,16 @@ expand_vec_cmp (rtx target, rtx_code code, rtx op0, rtx 
op1)
 }
 
   rtx cmp = gen_rtx_fmt_ee (code, mask_mode, op0, op1);
-  rtx ops[] = {target, cmp, op0, op1};
-  emit_vlmax_insn (icode, COMPARE_OP, ops);
-}
-
-void
-expand_vec_cmp (rtx target, rtx_code code, rtx mask, rtx maskoff, rtx op0,
-   rtx op1)
-{
-  machine_mode mask_mode = GET_MODE (target);
-  machine_mode data_mode = GET_MODE (op0);
-  insn_code icode = get_cmp_insn_code (code, data_mode);
-
-  if (code == LTGT)
+  if (!mask && !maskoff)
 {
-  rtx lt = gen_reg_rtx (mask_mode);
-  rtx gt = gen_reg_rtx (mask_mode);
-  expand_vec_cmp (lt, LT, mask, maskoff, op0, op1);
-  expand_vec_cmp (gt, GT, mask, maskoff, op0, op1);
-  icode = code_for_pred (IOR, mask_mode);
-  rtx ops[] = {target, lt, gt};
-  emit_vlmax_insn (icode, BINARY_MASK_OP, ops);
-  return;
+  rtx ops[] = {target, cmp, op0, op1};
+  emit_vlmax_insn (icode, COMPARE_OP, ops);
+}
+  else
+{
+  rtx ops[] = {target, mask, maskoff, cmp, op0, op1};
+  emit_vlmax_insn (icode, COMPARE_OP_MU, ops);
 }
-
-  rtx cmp = gen_rtx_fmt_ee (code, mask_mode, op0, op1);
-  rtx ops[] = {target, mask, maskoff, cmp, op0, op1};
-  emit_vlmax_insn (icode, COMPARE_OP_MU, ops);
 }
 
 /* Expand an RVV floating-point comparison:
@@ -2898,7 +2882,7 @@ expand_vec_cmp_float (rtx target, rtx_code code, rtx op0, 
rtx op1,
   else
{
  /* vmfeq.vvv0, vb, vb, v0.t  */
- expand_vec_cmp (eq0, EQ, eq0, eq0, op1, op1);
+ expand_vec_cmp (eq0, EQ, op1, op1, eq0, eq0);
}
   break;
 default:
@@ -2916,7 +2900,7 @@ expand_vec_cmp_float (rtx target, rtx_code code, rtx op0, 
rtx op1,
   if (code == ORDERED)
 emit_move_insn (target, eq0);
   else
-expand_vec_cmp (eq0, code, eq0, eq0, op0, op1);
+expand_vec_cmp (eq0, code, op0, op1, eq0, eq0);
 
   if (can_invert_p)
 {


[gcc r14-9348] Fortran: Fix issue with using snprintf function.

2024-03-06 Thread Jerry DeLisle via Gcc-cvs
https://gcc.gnu.org/g:03932d3203bce244edd812b81921c2f16ea18d86

commit r14-9348-g03932d3203bce244edd812b81921c2f16ea18d86
Author: Jerry DeLisle 
Date:   Wed Mar 6 19:46:04 2024 -0800

Fortran: Fix issue with using snprintf function.

The previous patch used snprintf to set the message
string. The message string is not a formatted string
and the snprintf will interpret '%' related characters
as format specifiers when there are no associated
output variables. A segfault ensues.

This change replaces snprintf with a fortran string copy
function and null terminates the message string.

PR libfortran/105456

libgfortran/ChangeLog:

* io/list_read.c (list_formatted_read_scalar): Use fstrcpy
from libgfortran/runtime/string.c to replace snprintf.
(nml_read_obj): Likewise.
* io/transfer.c (unformatted_read): Likewise.
(unformatted_write): Likewise.
(formatted_transfer_scalar_read): Likewise.
(formatted_transfer_scalar_write): Likewise.
* io/write.c (list_formatted_write_scalar): Likewise.
(nml_write_obj): Likewise.

gcc/testsuite/ChangeLog:

* gfortran.dg/pr105456.f90: Revise using '%' characters
in users error message.

Diff:
---
 gcc/testsuite/gfortran.dg/pr105456.f90 |  4 ++--
 libgfortran/io/list_read.c | 10 ++
 libgfortran/io/transfer.c  | 20 
 libgfortran/io/write.c | 10 ++
 4 files changed, 26 insertions(+), 18 deletions(-)

diff --git a/gcc/testsuite/gfortran.dg/pr105456.f90 
b/gcc/testsuite/gfortran.dg/pr105456.f90
index 188323847a7..60cd3b6f3e8 100644
--- a/gcc/testsuite/gfortran.dg/pr105456.f90
+++ b/gcc/testsuite/gfortran.dg/pr105456.f90
@@ -19,7 +19,7 @@ contains
 character :: ch
 read (unit,fmt='(A1)', advance="no", iostat=piostat, iomsg=piomsg) ch
 piostat = 42
-piomsg="The users message"
+piomsg="The users message containing % and %% and %s and other stuff"
 dtv%ch = ch
   end subroutine read_formatted
 end module sk1
@@ -35,4 +35,4 @@ program skip1
   write (*,'(10(A))') "Read: '",x%ch,"'"
 end program skip1
 ! { dg-output ".*(unit = 10, file = .*)" }
-! { dg-output "Fortran runtime error: The users message" }
+! { dg-output "Fortran runtime error: The users message containing % and %% 
and %s and other stuff" }
diff --git a/libgfortran/io/list_read.c b/libgfortran/io/list_read.c
index 707afaeb8dc..e38e9a84976 100644
--- a/libgfortran/io/list_read.c
+++ b/libgfortran/io/list_read.c
@@ -2268,9 +2268,10 @@ list_formatted_read_scalar (st_parameter_dt *dtp, bt 
type, void *p,
  !(dtp->common.flags & IOPARM_HAS_IOSTAT))
{
  char message[IOMSG_LEN + 1];
- child_iomsg_len = string_len_trim (IOMSG_LEN, child_iomsg) + 1;
+ child_iomsg_len = string_len_trim (IOMSG_LEN, child_iomsg);
  free_line (dtp);
- snprintf (message, child_iomsg_len, child_iomsg);
+ fstrcpy (message, child_iomsg_len, child_iomsg, child_iomsg_len);
+ message[child_iomsg_len] = '\0';
  generate_error (>common, dtp->u.p.child_saved_iostat,
  message);
}
@@ -3082,8 +3083,9 @@ nml_read_obj (st_parameter_dt *dtp, namelist_info *nl, 
index_type offset,
!(dtp->common.flags & IOPARM_HAS_IOSTAT))
  {
char message[IOMSG_LEN + 1];
-   child_iomsg_len = string_len_trim (IOMSG_LEN, child_iomsg) 
+ 1;
-   snprintf (message, child_iomsg_len, child_iomsg);
+   child_iomsg_len = string_len_trim (IOMSG_LEN, child_iomsg);
+   fstrcpy (message, child_iomsg_len, child_iomsg, 
child_iomsg_len);
+   message[child_iomsg_len] = '\0';
generate_error (>common, dtp->u.p.child_saved_iostat,
message);
goto nml_err_ret;
diff --git a/libgfortran/io/transfer.c b/libgfortran/io/transfer.c
index 9523a14c4bf..a86099d46f5 100644
--- a/libgfortran/io/transfer.c
+++ b/libgfortran/io/transfer.c
@@ -1128,8 +1128,9 @@ unformatted_read (st_parameter_dt *dtp, bt type,
  !(dtp->common.flags & IOPARM_HAS_IOSTAT))
{
  char message[IOMSG_LEN + 1];
- child_iomsg_len = string_len_trim (IOMSG_LEN, child_iomsg) + 1;
- snprintf (message, child_iomsg_len, child_iomsg);
+ child_iomsg_len = string_len_trim (IOMSG_LEN, child_iomsg);
+ fstrcpy (message, child_iomsg_len, child_iomsg, child_iomsg_len);
+ message[child_iomsg_len] = '\0';
  generate_error (>common, dtp->u.p.child_saved_iostat,
  message);
}
@@ -1271,8 +1272,9 @@ unformatted_write (st_parameter_dt *dtp, bt type,

[gcc r13-8407] Fortran: do not evaluate polymorphic functions twice in assignment [PR114012]

2024-03-06 Thread Harald Anlauf via Gcc-cvs
https://gcc.gnu.org/g:1f5787e4b803a4294eeb80e048f56ccdb99c1b3b

commit r13-8407-g1f5787e4b803a4294eeb80e048f56ccdb99c1b3b
Author: Harald Anlauf 
Date:   Sun Feb 25 21:18:23 2024 +0100

Fortran: do not evaluate polymorphic functions twice in assignment 
[PR114012]

PR fortran/114012

gcc/fortran/ChangeLog:

* trans-expr.cc (gfc_conv_procedure_call): Evaluate non-trivial
arguments just once before assigning to an unlimited polymorphic
dummy variable.

gcc/testsuite/ChangeLog:

* gfortran.dg/pr114012.f90: New test.

(cherry picked from commit 2f71e801ad0bb1f620334aadbd7c99cc4efe6309)

Diff:
---
 gcc/fortran/trans-expr.cc  |  4 ++
 gcc/testsuite/gfortran.dg/pr114012.f90 | 81 ++
 2 files changed, 85 insertions(+)

diff --git a/gcc/fortran/trans-expr.cc b/gcc/fortran/trans-expr.cc
index 48af30740fe..316ad684a64 100644
--- a/gcc/fortran/trans-expr.cc
+++ b/gcc/fortran/trans-expr.cc
@@ -6518,6 +6518,10 @@ gfc_conv_procedure_call (gfc_se * se, gfc_symbol * sym,
{
  tree efield;
 
+ /* Evaluate arguments just once.  */
+ if (e->expr_type != EXPR_VARIABLE)
+   parmse.expr = save_expr (parmse.expr);
+
  /* Set the _data field.  */
  tmp = gfc_class_data_get (var);
  efield = fold_convert (TREE_TYPE (tmp),
diff --git a/gcc/testsuite/gfortran.dg/pr114012.f90 
b/gcc/testsuite/gfortran.dg/pr114012.f90
new file mode 100644
index 000..9dbb031c664
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/pr114012.f90
@@ -0,0 +1,81 @@
+! { dg-do run }
+! PR fortran/114012
+!
+! Polymorphic functions were evaluated twice in assignment
+
+program test
+  implicit none
+
+  type :: custom_int
+ integer :: val = 2
+  end type
+
+  interface assignment(=)
+ procedure assign
+  end interface
+  interface operator(-)
+ procedure neg
+  end interface
+
+  type(custom_int) :: i
+  integer  :: count_assign, count_neg
+
+  count_assign = 0
+  count_neg= 0
+
+  i = 1
+  if (count_assign /= 1 .or. count_neg /= 0) stop 1
+
+  i = -i
+  if (count_assign /= 2 .or. count_neg /= 1) stop 2
+  if (i% val /= -1) stop 3
+
+  i = neg(i)
+  if (count_assign /= 3 .or. count_neg /= 2) stop 4
+  if (i% val /=  1) stop 5
+
+  i = (neg(i))
+  if (count_assign /= 4 .or. count_neg /= 3) stop 6
+  if (i% val /= -1) stop 7
+
+  i = - neg(i)
+  if (count_assign /= 5 .or. count_neg /= 5) stop 8
+  if (i% val /= -1) stop 9
+
+contains
+
+  subroutine assign (field, val)
+type(custom_int), intent(out) :: field
+class(*), intent(in) :: val
+
+count_assign = count_assign + 1
+
+select type (val)
+type is (integer)
+!  print *, " in assign(integer)", field%val, val
+   field%val = val
+type is (custom_int)
+!  print *, " in assign(custom)", field%val, val%val
+   field%val = val%val
+class default
+   error stop
+end select
+
+  end subroutine assign
+
+  function neg (input_field) result(output_field)
+type(custom_int), intent(in), target :: input_field
+class(custom_int), allocatable :: output_field
+allocate (custom_int :: output_field)
+
+count_neg = count_neg + 1
+
+select type (output_field)
+type is (custom_int)
+!  print *, " in neg", output_field%val, input_field%val
+   output_field%val = -input_field%val
+class default
+   error stop
+end select
+  end function neg
+end program test


[gcc r13-8406] Fortran: ALLOCATE statement, SOURCE/MOLD expressions with subrefs [PR114024]

2024-03-06 Thread Harald Anlauf via Gcc-cvs
https://gcc.gnu.org/g:77cf842869ddda8cfcdbb7db6e65ecfb9ac432fc

commit r13-8406-g77cf842869ddda8cfcdbb7db6e65ecfb9ac432fc
Author: Steve Kargl 
Date:   Fri Feb 23 22:05:04 2024 +0100

Fortran: ALLOCATE statement, SOURCE/MOLD expressions with subrefs [PR114024]

PR fortran/114024

gcc/fortran/ChangeLog:

* trans-stmt.cc (gfc_trans_allocate): When a source expression has
substring references, part-refs, or %re/%im inquiries, wrap the
entity in parentheses to force evaluation of the expression.

gcc/testsuite/ChangeLog:

* gfortran.dg/allocate_with_source_27.f90: New test.
* gfortran.dg/allocate_with_source_28.f90: New test.

Co-Authored-By: Harald Anlauf 
(cherry picked from commit 80d126ba99f4b9bc64d4861b3c4bae666497f2d4)

Diff:
---
 gcc/fortran/trans-stmt.cc  | 10 ++-
 .../gfortran.dg/allocate_with_source_27.f90| 20 +
 .../gfortran.dg/allocate_with_source_28.f90| 90 ++
 3 files changed, 118 insertions(+), 2 deletions(-)

diff --git a/gcc/fortran/trans-stmt.cc b/gcc/fortran/trans-stmt.cc
index 776f98d08d9..35eb1880539 100644
--- a/gcc/fortran/trans-stmt.cc
+++ b/gcc/fortran/trans-stmt.cc
@@ -6318,8 +6318,14 @@ gfc_trans_allocate (gfc_code * code)
vtab_needed = (al->expr->ts.type == BT_CLASS);
 
   gfc_init_se (, NULL);
-  /* When expr3 is a variable, i.e., a very simple expression,
-then convert it once here.  */
+  /* When expr3 is a variable, i.e., a very simple expression, then
+convert it once here.  If one has a source expression that has
+substring references, part-refs, or %re/%im inquiries, wrap the
+entity in parentheses to force evaluation of the expression.  */
+  if (code->expr3->expr_type == EXPR_VARIABLE
+ && is_subref_array (code->expr3))
+   code->expr3 = gfc_get_parentheses (code->expr3);
+
   if (code->expr3->expr_type == EXPR_VARIABLE
  || code->expr3->expr_type == EXPR_ARRAY
  || code->expr3->expr_type == EXPR_CONSTANT)
diff --git a/gcc/testsuite/gfortran.dg/allocate_with_source_27.f90 
b/gcc/testsuite/gfortran.dg/allocate_with_source_27.f90
new file mode 100644
index 000..d0f0f3c4a84
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/allocate_with_source_27.f90
@@ -0,0 +1,20 @@
+!
+! { dg-do run }
+!
+! fortran/PR114024
+! https://github.com/fujitsu/compiler-test-suite
+! Modified from Fortran/0093/0093_0130.f90
+!
+program foo
+   implicit none
+   complex :: cmp(3)
+   real, allocatable :: xx(:), yy(:), zz(:)
+   cmp = (3., 6.78)
+   allocate(xx, source = cmp%re)  ! This caused an ICE.
+   allocate(yy, source = cmp(1:3)%re) ! This caused an ICE.
+   allocate(zz, source = (cmp%re))
+   if (any(xx /= [3., 3., 3.])) stop 1
+   if (any(yy /= [3., 3., 3.])) stop 2
+   if (any(zz /= [3., 3., 3.])) stop 3
+end program foo
+
diff --git a/gcc/testsuite/gfortran.dg/allocate_with_source_28.f90 
b/gcc/testsuite/gfortran.dg/allocate_with_source_28.f90
new file mode 100644
index 000..8548ccb34e2
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/allocate_with_source_28.f90
@@ -0,0 +1,90 @@
+! { dg-do run }
+!
+! PR fortran/114024
+
+program foo
+  implicit none
+  complex :: cmp(3) = (3.,4.)
+  type ci   ! pseudo "complex integer" type
+ integer :: re
+ integer :: im
+  end type ci
+  type cr   ! pseudo "complex" type
+ real :: re
+ real :: im
+  end type cr
+  type u
+ type(ci) :: ii(3)
+ type(cr) :: rr(3)
+  end type u
+  type(u) :: cc
+
+  cc% ii% re = nint (cmp% re)
+  cc% ii% im = nint (cmp% im)
+  cc% rr% re = cmp% re
+  cc% rr% im = cmp% im
+ 
+  call test_substring ()
+  call test_int_real ()
+  call test_poly ()
+
+contains
+
+  subroutine test_substring ()
+character(4)  :: str(3) = ["abcd","efgh","ijkl"]
+character(:), allocatable :: ac(:)
+allocate (ac, source=str(1::2)(2:4))
+if (size (ac) /= 2 .or. len (ac) /= 3) stop 11
+if (ac(2) /= "jkl")stop 12
+deallocate (ac)
+allocate (ac, mold=str(1::2)(2:4))
+if (size (ac) /= 2 .or. len (ac) /= 3) stop 13
+deallocate (ac)
+  end
+
+  subroutine test_int_real ()
+integer, allocatable  :: aa(:)
+real, pointer :: pp(:)
+allocate (aa, source = cc% ii% im)
+if (size (aa) /= 3)  stop 21
+if (any (aa /= cmp% im)) stop 22
+allocate (pp, source = cc% rr% re)
+if (size (pp) /= 3)  stop 23
+if (any (pp /= cmp% re)) stop 24
+deallocate (aa, pp)
+  end
+
+  subroutine test_poly ()
+class(*), allocatable :: uu(:), vv(:)
+allocate (uu, source = cc% ii% im)
+allocate (vv, source = cc% rr% re)
+if (size (uu) /= 3) stop 31
+if (size (vv) /= 3) stop 32
+call check (uu)
+call check (vv)
+deallocate (uu, vv)
+allocate (uu, mold = cc% ii% im)
+allocate (vv, mold = cc% rr% re)
+if 

[gcc r14-9346] i386: Fix and improve insn constraint for V2QI arithmetic/shift insns

2024-03-06 Thread Uros Bizjak via Gcc-cvs
https://gcc.gnu.org/g:74e8cc28eda9b1d75588fcd4017a735911b9d2b4

commit r14-9346-g74e8cc28eda9b1d75588fcd4017a735911b9d2b4
Author: Uros Bizjak 
Date:   Wed Mar 6 20:53:50 2024 +0100

i386: Fix and improve insn constraint for V2QI arithmetic/shift insns

optimize_function_for_size_p predicate is not stable during optab selection,
because it also depends on node->count/node->frequency of the current 
function,
which are updated during IPA, so they may change between early opts and
late opts.  Use optimize_size instead - optimize_size implies
optimize_function_for_size_p (cfun), so if a named pattern uses
"&& optimize_size" and the insn it splits into uses
optimize_function_for_size_p (cfun), it shouldn't fail.

PR target/114232

gcc/ChangeLog:

* config/i386/mmx.md (negv2qi2): Enable for optimize_size instead
of optimize_function_for_size_p.  Explictily enable for TARGET_SSE2.
(negv2qi SSE reg splitter): Enable for TARGET_SSE2 only.
(v2qi3): Enable for optimize_size instead
of optimize_function_for_size_p.  Explictily enable for TARGET_SSE2.
(v2qi SSE reg splitter): Enable for TARGET_SSE2 
only.
(v2qi3): Enable for optimize_size instead
of optimize_function_for_size_p.

Diff:
---
 gcc/config/i386/mmx.md | 33 +++--
 1 file changed, 23 insertions(+), 10 deletions(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 2856ae6ffef..9a8d6030d8b 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -2874,11 +2874,18 @@
 (neg:V2QI
  (match_operand:V2QI 1 "register_operand" "0,Yw")))
(clobber (reg:CC FLAGS_REG))]
-  "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
+  "!TARGET_PARTIAL_REG_STALL || optimize_size || TARGET_SSE2"
   "#"
   [(set_attr "isa" "*,sse2")
(set_attr "type" "multi")
-   (set_attr "mode" "QI,TI")])
+   (set_attr "mode" "QI,TI")
+   (set (attr "enabled")
+   (cond [(and (eq_attr "alternative" "0")
+   (and (match_test "TARGET_PARTIAL_REG_STALL")
+(not (match_test "optimize_function_for_size_p 
(cfun)"
+   (symbol_ref "false")
+ ]
+ (const_string "*")))])
 
 (define_split
   [(set (match_operand:V2QI 0 "general_reg_operand")
@@ -2912,8 +2919,7 @@
 (neg:V2QI
  (match_operand:V2QI 1 "sse_reg_operand")))
(clobber (reg:CC FLAGS_REG))]
-  "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
-   && TARGET_SSE2 && reload_completed"
+  "TARGET_SSE2 && reload_completed"
   [(set (match_dup 0) (match_dup 2))
(set (match_dup 0)
(minus:V16QI (match_dup 0) (match_dup 1)))]
@@ -2975,11 +2981,18 @@
  (match_operand:V2QI 1 "register_operand" "0,0,Yw")
  (match_operand:V2QI 2 "register_operand" "Q,x,Yw")))
(clobber (reg:CC FLAGS_REG))]
-  "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
+  "!TARGET_PARTIAL_REG_STALL || optimize_size || TARGET_SSE2"
   "#"
   [(set_attr "isa" "*,sse2_noavx,avx")
(set_attr "type" "multi,sseadd,sseadd")
-   (set_attr "mode" "QI,TI,TI")])
+   (set_attr "mode" "QI,TI,TI")
+   (set (attr "enabled")
+   (cond [(and (eq_attr "alternative" "0")
+   (and (match_test "TARGET_PARTIAL_REG_STALL")
+(not (match_test "optimize_function_for_size_p 
(cfun)"
+   (symbol_ref "false")
+ ]
+ (const_string "*")))])
 
 (define_split
   [(set (match_operand:V2QI 0 "general_reg_operand")
@@ -3021,8 +3034,7 @@
  (match_operand:V2QI 1 "sse_reg_operand")
  (match_operand:V2QI 2 "sse_reg_operand")))
(clobber (reg:CC FLAGS_REG))]
-  "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
-   && TARGET_SSE2 && reload_completed"
+  "TARGET_SSE2 && reload_completed"
   [(set (match_dup 0)
 (plusminus:V16QI (match_dup 1) (match_dup 2)))]
 {
@@ -3684,9 +3696,10 @@
  (match_operand:V2QI 1 "register_operand" "0")
  (match_operand:QI 2 "nonmemory_operand" "cI")))
(clobber (reg:CC FLAGS_REG))]
-  "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
+  "!TARGET_PARTIAL_REG_STALL || optimize_size"
   "#"
-  "&& reload_completed"
+  "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
+   && reload_completed"
   [(parallel
  [(set (zero_extract:HI (match_dup 3) (const_int 8) (const_int 8))
   (subreg:HI


[gcc r14-9345] RISC-V: Use vmv1r.v instead of vmv.v.v for fma output reloads [PR114200].

2024-03-06 Thread Robin Dapp via Gcc-cvs
https://gcc.gnu.org/g:59554a50be8ebbd52e8a6348a92110af182e1874

commit r14-9345-g59554a50be8ebbd52e8a6348a92110af182e1874
Author: Robin Dapp 
Date:   Wed Mar 6 12:15:40 2024 +0100

RISC-V: Use vmv1r.v instead of vmv.v.v for fma output reloads [PR114200].

Three-operand instructions like vmacc are modeled with an implicit
output reload when the output does not match one of the operands.  For
this we use vmv.v.v which is subject to length masking.

In a situation where the current vl is less than the full vlenb
and the fma's result value is used as input for a vector reduction
(which is never length masked) we effectively only reduce vl
elements.  The masked-out elements are relevant for the
reduction, though, leading to a wrong result.

This patch replaces the vmv reloads by full-register reloads.

gcc/ChangeLog:

PR target/114200
PR target/114202

* config/riscv/vector.md: Use vmv[1248]r.v instead of vmv.v.v.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/pr114200.c: New test.
* gcc.target/riscv/rvv/autovec/pr114202.c: New test.

Diff:
---
 gcc/config/riscv/vector.md | 96 +++---
 .../gcc.target/riscv/rvv/autovec/pr114200.c| 18 
 .../gcc.target/riscv/rvv/autovec/pr114202.c| 20 +
 3 files changed, 86 insertions(+), 48 deletions(-)

diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md
index f89f9c2fa86..8b1c24c5d79 100644
--- a/gcc/config/riscv/vector.md
+++ b/gcc/config/riscv/vector.md
@@ -5351,10 +5351,10 @@
   "@
vmadd.vv\t%0,%4,%5%p1
vmacc.vv\t%0,%3,%4%p1
-   vmv.v.v\t%0,%4\;vmacc.vv\t%0,%3,%4%p1
+   vmv%m4r.v\t%0,%4\;vmacc.vv\t%0,%3,%4%p1
vmadd.vv\t%0,%4,%5%p1
vmacc.vv\t%0,%3,%4%p1
-   vmv.v.v\t%0,%5\;vmacc.vv\t%0,%3,%4%p1"
+   vmv%m5r.v\t%0,%5\;vmacc.vv\t%0,%3,%4%p1"
   [(set_attr "type" "vimuladd")
(set_attr "mode" "")])
 
@@ -5378,9 +5378,9 @@
   "TARGET_VECTOR"
   "@
vmadd.vv\t%0,%3,%4%p1
-   vmv.v.v\t%0,%2\;vmadd.vv\t%0,%3,%4%p1
+   vmv%m2r.v\t%0,%2\;vmadd.vv\t%0,%3,%4%p1
vmadd.vv\t%0,%3,%4%p1
-   vmv.v.v\t%0,%2\;vmadd.vv\t%0,%3,%4%p1"
+   vmv%m2r.v\t%0,%2\;vmadd.vv\t%0,%3,%4%p1"
   [(set_attr "type" "vimuladd")
(set_attr "mode" "")
(set_attr "merge_op_idx" "2")
@@ -5409,9 +5409,9 @@
   "TARGET_VECTOR"
   "@
vmacc.vv\t%0,%2,%3%p1
-   vmv.v.v\t%0,%4\;vmacc.vv\t%0,%2,%3%p1
+   vmv%m4r.v\t%0,%4;vmacc.vv\t%0,%2,%3%p1
vmacc.vv\t%0,%2,%3%p1
-   vmv.v.v\t%0,%4\;vmacc.vv\t%0,%2,%3%p1"
+   vmv%m4r.v\t%0,%4\;vmacc.vv\t%0,%2,%3%p1"
   [(set_attr "type" "vimuladd")
(set_attr "mode" "")
(set_attr "merge_op_idx" "4")
@@ -5462,9 +5462,9 @@
   "TARGET_VECTOR"
   "@
vmadd.vx\t%0,%2,%4%p1
-   vmv.v.v\t%0,%3\;vmadd.vx\t%0,%2,%4%p1
+   vmv%m3r.v\t%0,%3\;vmadd.vx\t%0,%2,%4%p1
vmadd.vx\t%0,%2,%4%p1
-   vmv.v.v\t%0,%3\;vmadd.vx\t%0,%2,%4%p1"
+   vmv%m3r.v\t%0,%3\;vmadd.vx\t%0,%2,%4%p1"
   [(set_attr "type" "vimuladd")
(set_attr "mode" "")
(set_attr "merge_op_idx" "3")
@@ -5494,9 +5494,9 @@
   "TARGET_VECTOR"
   "@
vmacc.vx\t%0,%2,%3%p1
-   vmv.v.v\t%0,%4\;vmacc.vx\t%0,%2,%3%p1
+   vmv%m4r.v\t%0,%4\;vmacc.vx\t%0,%2,%3%p1
vmacc.vx\t%0,%2,%3%p1
-   vmv.v.v\t%0,%4\;vmacc.vx\t%0,%2,%3%p1"
+   vmv%m4r.v\t%0,%4\;vmacc.vx\t%0,%2,%3%p1"
   [(set_attr "type" "vimuladd")
(set_attr "mode" "")
(set_attr "merge_op_idx" "4")
@@ -5562,9 +5562,9 @@
   "TARGET_VECTOR && !TARGET_64BIT"
   "@
vmadd.vx\t%0,%2,%4%p1
-   vmv.v.v\t%0,%2\;vmadd.vx\t%0,%2,%4%p1
+   vmv%m2r.v\t%0,%2\;vmadd.vx\t%0,%2,%4%p1
vmadd.vx\t%0,%2,%4%p1
-   vmv.v.v\t%0,%2\;vmadd.vx\t%0,%2,%4%p1"
+   vmv%m2r.v\t%0,%2\;vmadd.vx\t%0,%2,%4%p1"
   [(set_attr "type" "vimuladd")
(set_attr "mode" "")
(set_attr "merge_op_idx" "3")
@@ -5595,9 +5595,9 @@
   "TARGET_VECTOR && !TARGET_64BIT"
   "@
vmacc.vx\t%0,%2,%3%p1
-   vmv.v.v\t%0,%4\;vmacc.vx\t%0,%2,%3%p1
+   vmv%m4r.v\t%0,%4\;vmacc.vx\t%0,%2,%3%p1
vmacc.vx\t%0,%2,%3%p1
-   vmv.v.v\t%0,%4\;vmacc.vx\t%0,%2,%3%p1"
+   vmv%m4r.v\t%0,%4\;vmacc.vx\t%0,%2,%3%p1"
   [(set_attr "type" "vimuladd")
(set_attr "mode" "")
(set_attr "merge_op_idx" "4")
@@ -5649,10 +5649,10 @@
   "@
vnmsub.vv\t%0,%4,%5%p1
vnmsac.vv\t%0,%3,%4%p1
-   vmv.v.v\t%0,%3\;vnmsub.vv\t%0,%4,%5%p1
+   vmv%m3r.v\t%0,%3\;vnmsub.vv\t%0,%4,%5%p1
vnmsub.vv\t%0,%4,%5%p1
vnmsac.vv\t%0,%3,%4%p1
-   vmv.v.v\t%0,%3\;vnmsub.vv\t%0,%4,%5%p1"
+   vmv%m3r.v\t%0,%3\;vnmsub.vv\t%0,%4,%5%p1"
   [(set_attr "type" "vimuladd")
(set_attr "mode" "")])
 
@@ -5676,9 +5676,9 @@
   "TARGET_VECTOR"
   "@
vnmsub.vv\t%0,%3,%4%p1
-   vmv.v.v\t%0,%2\;vnmsub.vv\t%0,%3,%4%p1
+   vmv%m2r.v\t%0,%2\;vnmsub.vv\t%0,%3,%4%p1
vnmsub.vv\t%0,%3,%4%p1
-   vmv.v.v\t%0,%2\;vnmsub.vv\t%0,%3,%4%p1"
+   vmv%m2r.v\t%0,%2\;vnmsub.vv\t%0,%3,%4%p1"
   [(set_attr "type" "vimuladd")
(set_attr "mode" "")
(set_attr "merge_op_idx" "2")
@@ -5707,9 +5707,9 

[gcc r14-9344] RISC-V: Adjust vec unit-stride load/store costs.

2024-03-06 Thread Robin Dapp via Gcc-cvs
https://gcc.gnu.org/g:9ae83078fe45d093bbaa02b8348f2407fe0c62d6

commit r14-9344-g9ae83078fe45d093bbaa02b8348f2407fe0c62d6
Author: Robin Dapp 
Date:   Mon Jan 15 17:34:58 2024 +0100

RISC-V: Adjust vec unit-stride load/store costs.

Scalar loads provide offset addressing while unit-stride vector
instructions cannot.  The offset must be loaded into a general-purpose
register before it can be used.  In order to account for this, this
patch adds an address arithmetic heuristic that keeps track of data
reference operands.  If we haven't seen the operand before we add the
cost of a scalar statement.

This helps to get rid of an lbm regression when vectorizing (roughly
0.5% fewer dynamic instructions).  gcc5 improves by 0.2% and deepsjeng
by 0.25%.  wrf and nab degrade by 0.1%.  This is because before we now
adjust the cost of SLP as well as loop-vectorized instructions whereas
we would only adjust loop-vectorized instructions before.
Considering higher scalar_to_vec costs (3 vs 1) for all vectorization
types causes some snippets not to get vectorized anymore.  Given these
costs the decision looks correct but appears worse when just counting
dynamic instructions.

In total SPECint 2017 has 4 bln dynamic instructions less and SPECfp 0.7
bln.

gcc/ChangeLog:

* config/riscv/riscv-vector-costs.cc (adjust_stmt_cost): Move...
(costs::adjust_stmt_cost): ... to here and add vec_load/vec_store
offset handling.
(costs::add_stmt_cost): Also adjust cost for statements without
stmt_info.
* config/riscv/riscv-vector-costs.h: Define zero constant.

gcc/testsuite/ChangeLog:

* gcc.dg/vect/costmodel/riscv/rvv/vse-slp-1.c: New test.
* gcc.dg/vect/costmodel/riscv/rvv/vse-slp-2.c: New test.

Diff:
---
 gcc/config/riscv/riscv-vector-costs.cc | 86 +++---
 gcc/config/riscv/riscv-vector-costs.h  | 10 +++
 .../gcc.dg/vect/costmodel/riscv/rvv/vse-slp-1.c| 51 +
 .../gcc.dg/vect/costmodel/riscv/rvv/vse-slp-2.c| 51 +
 4 files changed, 188 insertions(+), 10 deletions(-)

diff --git a/gcc/config/riscv/riscv-vector-costs.cc 
b/gcc/config/riscv/riscv-vector-costs.cc
index 7c9840df4e9..adf9c197df5 100644
--- a/gcc/config/riscv/riscv-vector-costs.cc
+++ b/gcc/config/riscv/riscv-vector-costs.cc
@@ -42,6 +42,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "backend.h"
 #include "tree-data-ref.h"
 #include "tree-ssa-loop-niter.h"
+#include "tree-hash-traits.h"
 
 /* This file should be included last.  */
 #include "riscv-vector-costs.h"
@@ -1047,18 +1048,81 @@ costs::better_main_loop_than_p (const vector_costs 
*uncast_other) const
top of riscv_builtin_vectorization_cost handling which doesn't have any
information on statement operation codes etc.  */
 
-static unsigned
-adjust_stmt_cost (enum vect_cost_for_stmt kind, tree vectype, int stmt_cost)
+unsigned
+costs::adjust_stmt_cost (enum vect_cost_for_stmt kind, loop_vec_info loop,
+stmt_vec_info stmt_info,
+slp_tree, tree vectype, int stmt_cost)
 {
   const cpu_vector_cost *costs = get_vector_costs ();
   switch (kind)
 {
 case scalar_to_vec:
-  return stmt_cost += (FLOAT_TYPE_P (vectype) ? costs->regmove->FR2VR
- : costs->regmove->GR2VR);
+  stmt_cost += (FLOAT_TYPE_P (vectype) ? costs->regmove->FR2VR
+   : costs->regmove->GR2VR);
+  break;
 case vec_to_scalar:
-  return stmt_cost += (FLOAT_TYPE_P (vectype) ? costs->regmove->VR2FR
- : costs->regmove->VR2GR);
+  stmt_cost += (FLOAT_TYPE_P (vectype) ? costs->regmove->VR2FR
+   : costs->regmove->VR2GR);
+  break;
+case vector_load:
+case vector_store:
+   {
+ /* Unit-stride vector loads and stores do not have offset addressing
+as opposed to scalar loads and stores.
+If the address depends on a variable we need an additional
+add/sub for each load/store in the worst case.  */
+ if (stmt_info && stmt_info->stmt)
+   {
+ data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
+ class loop *father = stmt_info->stmt->bb->loop_father;
+ if (!loop && father && !father->inner && father->superloops)
+   {
+ tree ref;
+ if (TREE_CODE (dr->ref) != MEM_REF
+ || !(ref = TREE_OPERAND (dr->ref, 0))
+ || TREE_CODE (ref) != SSA_NAME)
+   break;
+
+ if (SSA_NAME_IS_DEFAULT_DEF (ref))
+   break;
+
+ if (memrefs.contains ({ref, cst0}))
+   break;
+
+ memrefs.add ({ref, cst0});
+
+  

[gcc r14-9343] ARM: Fix conditional execution [PR113915]

2024-03-06 Thread Wilco Dijkstra via Gcc-cvs
https://gcc.gnu.org/g:b575f37a342cebb954aa85fa45df0604bfa1ada9

commit r14-9343-gb575f37a342cebb954aa85fa45df0604bfa1ada9
Author: Wilco Dijkstra 
Date:   Wed Mar 6 17:35:16 2024 +

ARM: Fix conditional execution [PR113915]

By default most patterns can be conditionalized on Arm targets.  However
Thumb-2 predication requires the "predicable" attribute be explicitly
set to "yes".  Most patterns are shared between Arm and Thumb(-2) and are
marked with "predicable".  Given this sharing, it does not make sense to
use a different default for Arm.  So only consider conditional execution
of instructions that have the predicable attribute set to yes.  This ensures
that patterns not explicitly marked as such are never conditionally 
executed.

gcc/ChangeLog:
PR target/113915
* config/arm/arm.md (NOCOND): Improve comment.
(arm_rev*) Add predicable.
* config/arm/arm.cc (arm_final_prescan_insn): Add check for
PREDICABLE_YES.

gcc/testsuite/ChangeLog:
PR target/113915
* gcc.target/arm/builtin-bswap-1.c: Fix test to allow conditional
execution both for Arm and Thumb-2.

Diff:
---
 gcc/config/arm/arm.cc  |  5 +++--
 gcc/config/arm/arm.md  |  6 ++
 gcc/testsuite/gcc.target/arm/builtin-bswap-1.c | 15 ++-
 3 files changed, 15 insertions(+), 11 deletions(-)

diff --git a/gcc/config/arm/arm.cc b/gcc/config/arm/arm.cc
index 1cd69268ee9..6a35fe44138 100644
--- a/gcc/config/arm/arm.cc
+++ b/gcc/config/arm/arm.cc
@@ -25613,11 +25613,12 @@ arm_final_prescan_insn (rtx_insn *insn)
  break;
 
case INSN:
- /* Instructions using or affecting the condition codes make it
-fail.  */
+ /* Check the instruction is explicitly marked as predicable.
+Instructions using or affecting the condition codes are not.  
*/
  scanbody = PATTERN (this_insn);
  if (!(GET_CODE (scanbody) == SET
|| GET_CODE (scanbody) == PARALLEL)
+ || get_attr_predicable (this_insn) != PREDICABLE_YES
  || get_attr_conds (this_insn) != CONDS_NOCOND)
fail = TRUE;
  break;
diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index 814e871acea..1fd00146ca9 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -319,6 +319,8 @@
 ;
 ; NOCOND means that the instruction does not use or alter the condition
 ;   codes but can be converted into a conditionally exectuted instruction.
+;   Given that NOCOND is the default for most instructions if omitted,
+;   the attribute predicable must be set to yes as well.
 
 (define_attr "conds" "use,set,clob,unconditional,nocond"
(if_then_else
@@ -12559,6 +12561,7 @@
   revsh%?\t%0, %1"
   [(set_attr "arch" "t1,t2,32")
(set_attr "length" "2,2,4")
+   (set_attr "predicable" "no,yes,yes")
(set_attr "type" "rev")]
 )
 
@@ -12572,6 +12575,7 @@
rev16%?\t%0, %1"
   [(set_attr "arch" "t1,t2,32")
(set_attr "length" "2,2,4")
+   (set_attr "predicable" "no,yes,yes")
(set_attr "type" "rev")]
 )
 
@@ -12596,6 +12600,7 @@
rev16%?\t%0, %1"
   [(set_attr "arch" "t1,t2,32")
(set_attr "length" "2,2,4")
+   (set_attr "predicable" "no,yes,yes")
(set_attr "type" "rev")]
 )
 
@@ -12616,6 +12621,7 @@
rev16%?\t%0, %1"
   [(set_attr "arch" "t1,t2,32")
(set_attr "length" "2,2,4")
+   (set_attr "predicable" "no,yes,yes")
(set_attr "type" "rev")]
 )
 
diff --git a/gcc/testsuite/gcc.target/arm/builtin-bswap-1.c 
b/gcc/testsuite/gcc.target/arm/builtin-bswap-1.c
index c1e7740d14d..1a311a6a5af 100644
--- a/gcc/testsuite/gcc.target/arm/builtin-bswap-1.c
+++ b/gcc/testsuite/gcc.target/arm/builtin-bswap-1.c
@@ -5,14 +5,11 @@
of the instructions.  Add an -mtune option known to facilitate that.  */
 /* { dg-additional-options "-O2 -mtune=cortex-a53" } */
 /* { dg-final { scan-assembler-not "orr\[ \t\]" } } */
-/* { dg-final { scan-assembler-times "revsh\\t" 1 { target { arm_nothumb } } } 
}  */
-/* { dg-final { scan-assembler-times "revshne\\t" 1 { target { arm_nothumb } } 
} }  */
-/* { dg-final { scan-assembler-times "revsh\\t" 2 { target { ! arm_nothumb } } 
} }  */
-/* { dg-final { scan-assembler-times "rev16\\t" 1 { target { arm_nothumb } } } 
}  */
-/* { dg-final { scan-assembler-times "rev16ne\\t" 1 { target { arm_nothumb } } 
} }  */
-/* { dg-final { scan-assembler-times "rev16\\t" 2 { target { ! arm_nothumb } } 
} }  */
-/* { dg-final { scan-assembler-times "rev\\t" 2 { target { arm_nothumb } } } } 
 */
-/* { dg-final { scan-assembler-times "revne\\t" 2 { target { arm_nothumb } } } 
}  */
-/* { dg-final { scan-assembler-times "rev\\t" 4 { target { ! arm_nothumb } } } 
}  */
+/* { dg-final { scan-assembler-times "revsh\\t" 1 } }  */
+/* { dg-final { scan-assembler-times "revshne\\t" 1 } }  */
+/* { dg-final 

[gcc r14-9342] Revert "Set num_threads to 50 on 32-bit hppa in two libgomp loop tests"

2024-03-06 Thread John David Anglin via Gcc-cvs
https://gcc.gnu.org/g:49c3f24552ee550f78416b6470b22af9be8bea72

commit r14-9342-g49c3f24552ee550f78416b6470b22af9be8bea72
Author: John David Anglin 
Date:   Wed Mar 6 17:01:59 2024 +

Revert "Set num_threads to 50 on 32-bit hppa in two libgomp loop tests"

This reverts commit b14209715e659f6d3ca0f9eef9a4851e7bd6e373.

Diff:
---
 libgomp/testsuite/libgomp.c++/loop-3.C   | 8 +---
 libgomp/testsuite/libgomp.c/omp-loop03.c | 8 +---
 2 files changed, 2 insertions(+), 14 deletions(-)

diff --git a/libgomp/testsuite/libgomp.c++/loop-3.C 
b/libgomp/testsuite/libgomp.c++/loop-3.C
index 3f460f114bf..fa50f099f3f 100644
--- a/libgomp/testsuite/libgomp.c++/loop-3.C
+++ b/libgomp/testsuite/libgomp.c++/loop-3.C
@@ -1,9 +1,3 @@
-#if defined(__hppa__) && !defined(__LP64__)
-#define NUM_THREADS 50
-#else
-#define NUM_THREADS 64
-#endif
-
 extern "C" void abort (void);
 int a;
 
@@ -25,7 +19,7 @@ foo ()
 int
 main (void)
 {
-#pragma omp parallel num_threads (NUM_THREADS)
+#pragma omp parallel num_threads (64)
   foo ();
 
   return 0;
diff --git a/libgomp/testsuite/libgomp.c/omp-loop03.c 
b/libgomp/testsuite/libgomp.c/omp-loop03.c
index 9879981cf4a..7bb9a194331 100644
--- a/libgomp/testsuite/libgomp.c/omp-loop03.c
+++ b/libgomp/testsuite/libgomp.c/omp-loop03.c
@@ -1,9 +1,3 @@
-#if defined(__hppa__) && !defined(__LP64__)
-#define NUM_THREADS 50
-#else
-#define NUM_THREADS 64
-#endif
-
 extern void abort (void);
 int a;
 
@@ -25,7 +19,7 @@ foo ()
 int
 main (void)
 {
-#pragma omp parallel num_threads (NUM_THREADS)
+#pragma omp parallel num_threads (64)
   foo ();
 
   return 0;


[gcc r14-9341] [PR target/113001] Fix incorrect operand swapping in conditional move

2024-03-06 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:10cbfcd60f9e5bdbe486e1c0192e0f168d899b77

commit r14-9341-g10cbfcd60f9e5bdbe486e1c0192e0f168d899b77
Author: Jeff Law 
Date:   Wed Mar 6 09:50:44 2024 -0700

[PR target/113001] Fix incorrect operand swapping in conditional move

This bug totally fell off my radar.  Sorry about that.

We have some special casing the conditional move expander to simplify a
conditional move when comparing a register against zero and that same 
register
is one of the arms.

Specifically a (eq (reg) (const_int 0)) where reg is also the true arm or 
(ne
(reg) (const_int 0)) where reg is the false arm need not use the fully
generalized conditional move, thus saving an instruction for those cases.

In the NE case we swapped the operands, but didn't swap the condition, which
led to the ICE due to an unrecognized pattern.  THe backend actually has
distinct patterns for those two cases.  So swapping the operands is neither
needed nor advisable.

Regression tested on rv64gc and verified the new tests pass.

Pushing to the trunk.

PR target/113001
PR target/112871
gcc/
* config/riscv/riscv.cc (expand_conditional_move): Do not swap
operands when the comparison operand is the same as the false
arm for a NE test.

gcc/testsuite
* gcc.target/riscv/zicond-ice-3.c: New test.
* gcc.target/riscv/zicond-ice-4.c: New test.

Diff:
---
 gcc/config/riscv/riscv.cc |  2 --
 gcc/testsuite/gcc.target/riscv/zicond-ice-3.c | 15 +++
 gcc/testsuite/gcc.target/riscv/zicond-ice-4.c | 22 ++
 3 files changed, 37 insertions(+), 2 deletions(-)

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 691d967de29..680c4a728e9 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -4633,8 +4633,6 @@ riscv_expand_conditional_move (rtx dest, rtx op, rtx 
cons, rtx alt)
   || (code == NE && rtx_equal_p (alt, op0)))
{
  rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1);
- if (!rtx_equal_p (cons, op0))
-   std::swap (alt, cons);
  alt = force_reg (mode, alt);
  emit_insn (gen_rtx_SET (dest,
  gen_rtx_IF_THEN_ELSE (mode, cond,
diff --git a/gcc/testsuite/gcc.target/riscv/zicond-ice-3.c 
b/gcc/testsuite/gcc.target/riscv/zicond-ice-3.c
new file mode 100644
index 000..650986825ef
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/zicond-ice-3.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc_zicond -mabi=lp64d" { target { rv64 } } } */
+/* { dg-options "-march=rv32gc_zicond -mabi=ilp32d" { target { rv32 } } } */
+
+long a, b;
+int c, d;
+void e(long *f) {
+  (b = *f) && --b;
+  for (; c;)
+;
+}
+void g() {
+  for (; d; d--)
+e();
+}
diff --git a/gcc/testsuite/gcc.target/riscv/zicond-ice-4.c 
b/gcc/testsuite/gcc.target/riscv/zicond-ice-4.c
new file mode 100644
index 000..2be02c78a08
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/zicond-ice-4.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc_zicond -mabi=lp64d" { target { rv64 } } } */
+/* { dg-options "-march=rv32gc_zicond -mabi=ilp32d" { target { rv32 } } } */
+
+short a, c;
+int b, d, i;
+volatile char e;
+static int f[] = {1, 1};
+long g;
+int volatile h;
+short(j)() { return b ? a : 0; }
+void k() {
+l:
+  h;
+  g = 0;
+  for (; g <= 2; g++) {
+d | ((i || j() & (0 == f[g])) ^ i) && e;
+if (c)
+  goto l;
+  }
+}
+


[gcc r14-9340] Fortran: error recovery while simplifying expressions [PR103707, PR106987]

2024-03-06 Thread Harald Anlauf via Gcc-cvs
https://gcc.gnu.org/g:93e1d4d24ed014387da97e2ce11556d68fe98e66

commit r14-9340-g93e1d4d24ed014387da97e2ce11556d68fe98e66
Author: Harald Anlauf 
Date:   Tue Mar 5 21:54:26 2024 +0100

Fortran: error recovery while simplifying expressions [PR103707,PR106987]

When an exception is encountered during simplification of arithmetic
expressions, the result may depend on whether range-checking is active
(-frange-check) or not.  However, the code path in the front-end should
stay the same for "soft" errors for which the exception is triggered by the
check, while "hard" errors should always terminate the simplification, so
that error recovery is independent of the flag.  Separation of arithmetic
error codes into "hard" and "soft" errors shall be done consistently via
is_hard_arith_error().

PR fortran/103707
PR fortran/106987

gcc/fortran/ChangeLog:

* arith.cc (is_hard_arith_error): New helper function to determine
whether an arithmetic error is "hard" or not.
(check_result): Use it.
(gfc_arith_divide): Set "Division by zero" only for regular
numerators of real and complex divisions.
(reduce_unary): Use is_hard_arith_error to determine whether a hard
or (recoverable) soft error was encountered.  Terminate immediately
on hard error, otherwise remember code of first soft error.
(reduce_binary_ac): Likewise.
(reduce_binary_ca): Likewise.
(reduce_binary_aa): Likewise.

gcc/testsuite/ChangeLog:

* gfortran.dg/pr99350.f90:
* gfortran.dg/arithmetic_overflow_3.f90: New test.

Diff:
---
 gcc/fortran/arith.cc   | 134 +++--
 .../gfortran.dg/arithmetic_overflow_3.f90  |  48 
 gcc/testsuite/gfortran.dg/pr99350.f90  |   2 +-
 3 files changed, 143 insertions(+), 41 deletions(-)

diff --git a/gcc/fortran/arith.cc b/gcc/fortran/arith.cc
index d17d1aaa1d9..b373c25e5e1 100644
--- a/gcc/fortran/arith.cc
+++ b/gcc/fortran/arith.cc
@@ -130,6 +130,30 @@ gfc_arith_error (arith code)
 }
 
 
+/* Check if a certain arithmetic error code is severe enough to prevent
+   further simplification, as opposed to errors thrown by the range check
+   (e.g. overflow) or arithmetic exceptions that are tolerated with
+   -fno-range-check.  */
+
+static bool
+is_hard_arith_error (arith code)
+{
+  switch (code)
+{
+case ARITH_OK:
+case ARITH_OVERFLOW:
+case ARITH_UNDERFLOW:
+case ARITH_NAN:
+case ARITH_DIV0:
+case ARITH_ASYMMETRIC:
+  return false;
+
+default:
+  return true;
+}
+}
+
+
 /* Get things ready to do math.  */
 
 void
@@ -579,10 +603,10 @@ check_result (arith rc, gfc_expr *x, gfc_expr *r, 
gfc_expr **rp)
   val = ARITH_OK;
 }
 
-  if (val == ARITH_OK || val == ARITH_OVERFLOW)
-*rp = r;
-  else
+  if (is_hard_arith_error (val))
 gfc_free_expr (r);
+  else
+*rp = r;
 
   return val;
 }
@@ -792,23 +816,26 @@ gfc_arith_divide (gfc_expr *op1, gfc_expr *op2, gfc_expr 
**resultp)
   break;
 
 case BT_REAL:
-  if (mpfr_sgn (op2->value.real) == 0 && flag_range_check == 1)
-   {
- rc = ARITH_DIV0;
- break;
-   }
+  /* Set "Division by zero" only for regular numerator.  */
+  if (flag_range_check == 1
+ && mpfr_zero_p (op2->value.real)
+ && mpfr_regular_p (op1->value.real))
+   rc = ARITH_DIV0;
 
   mpfr_div (result->value.real, op1->value.real, op2->value.real,
   GFC_RND_MODE);
   break;
 
 case BT_COMPLEX:
-  if (mpc_cmp_si_si (op2->value.complex, 0, 0) == 0
- && flag_range_check == 1)
-   {
- rc = ARITH_DIV0;
- break;
-   }
+  /* Set "Division by zero" only for regular numerator.  */
+  if (flag_range_check == 1
+ && mpfr_zero_p (mpc_realref (op2->value.complex))
+ && mpfr_zero_p (mpc_imagref (op2->value.complex))
+ && ((mpfr_regular_p (mpc_realref (op1->value.complex))
+  && mpfr_number_p (mpc_imagref (op1->value.complex)))
+ || (mpfr_regular_p (mpc_imagref (op1->value.complex))
+ && mpfr_number_p (mpc_realref (op1->value.complex)
+   rc = ARITH_DIV0;
 
   gfc_set_model (mpc_realref (op1->value.complex));
   if (mpc_cmp_si_si (op2->value.complex, 0, 0) == 0)
@@ -1323,7 +1350,6 @@ reduce_unary (arith (*eval) (gfc_expr *, gfc_expr **), 
gfc_expr *op,
   gfc_constructor *c;
   gfc_expr *r;
   arith rc;
-  bool ov = false;
 
   if (op->expr_type == EXPR_CONSTANT)
 return eval (op, result);
@@ -1335,19 +1361,22 @@ reduce_unary (arith (*eval) (gfc_expr *, gfc_expr **), 
gfc_expr *op,
   head = gfc_constructor_copy (op->value.constructor);
   for (c = gfc_constructor_first (head); c; c = gfc_constructor_next (c))
 {
-  rc = reduce_unary (eval, c->expr, );
+  

[gcc r14-9339] c++: ICE with noexcept and local specialization [PR114114]

2024-03-06 Thread Marek Polacek via Gcc-cvs
https://gcc.gnu.org/g:dc6c3bfb59baab28b998e18396c06087b6d9b0ed

commit r14-9339-gdc6c3bfb59baab28b998e18396c06087b6d9b0ed
Author: Marek Polacek 
Date:   Tue Mar 5 13:33:10 2024 -0500

c++: ICE with noexcept and local specialization [PR114114]

Here we ICE because we call register_local_specialization while
local_specializations is null, so

  local_specializations->put ();

crashes on null this.  It's null since maybe_instantiate_noexcept calls
push_to_top_level which creates a new scope.  Normally, I would have
guessed that we need a new local_specialization_stack.  But here we're
dealing with an operand of a noexcept, which is an unevaluated operand,
and those aren't registered in the hash map.  maybe_instantiate_noexcept
wasn't signalling that it's substituting an unevaluated operand though.

PR c++/114114

gcc/cp/ChangeLog:

* pt.cc (maybe_instantiate_noexcept): Save/restore
cp_unevaluated_operand, c_inhibit_evaluation_warnings, and
cp_noexcept_operand around the tsubst_expr call.

gcc/testsuite/ChangeLog:

* g++.dg/cpp0x/noexcept84.C: New test.

Diff:
---
 gcc/cp/pt.cc|  6 ++
 gcc/testsuite/g++.dg/cpp0x/noexcept84.C | 32 
 2 files changed, 38 insertions(+)

diff --git a/gcc/cp/pt.cc b/gcc/cp/pt.cc
index a6e6c804130..d73f6d93485 100644
--- a/gcc/cp/pt.cc
+++ b/gcc/cp/pt.cc
@@ -26879,10 +26879,16 @@ maybe_instantiate_noexcept (tree fn, tsubst_flags_t 
complain)
  if (orig_fn)
++processing_template_decl;
 
+ ++cp_unevaluated_operand;
+ ++c_inhibit_evaluation_warnings;
+ ++cp_noexcept_operand;
  /* Do deferred instantiation of the noexcept-specifier.  */
  noex = tsubst_expr (DEFERRED_NOEXCEPT_PATTERN (noex),
  DEFERRED_NOEXCEPT_ARGS (noex),
  tf_warning_or_error, fn);
+ --cp_unevaluated_operand;
+ --c_inhibit_evaluation_warnings;
+ --cp_noexcept_operand;
 
  /* Build up the noexcept-specification.  */
  spec = build_noexcept_spec (noex, tf_warning_or_error);
diff --git a/gcc/testsuite/g++.dg/cpp0x/noexcept84.C 
b/gcc/testsuite/g++.dg/cpp0x/noexcept84.C
new file mode 100644
index 000..06f33264f77
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp0x/noexcept84.C
@@ -0,0 +1,32 @@
+// PR c++/114114
+// { dg-do compile { target c++11 } }
+
+template
+constexpr void
+test ()
+{
+  constexpr bool is_yes = B;
+  struct S {
+constexpr S() noexcept(is_yes) { }
+  };
+  S s;
+}
+
+constexpr bool foo() { return true; }
+
+template
+constexpr void
+test2 ()
+{
+  constexpr T (*pfn)() = 
+  struct S {
+constexpr S() noexcept(pfn()) { }
+  };
+  S s;
+}
+
+int main()
+{
+  test();
+  test2();
+}


[gcc r14-9338] i386: Eliminate common code from x86_32 TARGET_MACHO part in ix86_expand_move

2024-03-06 Thread Uros Bizjak via Gcc-cvs
https://gcc.gnu.org/g:e772c0c05c36d0b0539effb4256be67bbedd77fb

commit r14-9338-ge772c0c05c36d0b0539effb4256be67bbedd77fb
Author: Uros Bizjak 
Date:   Wed Mar 6 17:08:25 2024 +0100

i386: Eliminate common code from x86_32 TARGET_MACHO part in 
ix86_expand_move

Eliminate common code from x86_32 TARGET_MACHO part in ix86_expand_move and
use generic code instead.

No functional changes.

gcc/ChangeLog:

* config/i386/i386-expand.cc (ix86_expand_move) [TARGET_MACHO]:
Eliminate common code and use generic code instead.

Diff:
---
 gcc/config/i386/i386-expand.cc | 37 +++--
 1 file changed, 11 insertions(+), 26 deletions(-)

diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index 3b1685ae448..2210e6f7cc8 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -471,9 +471,9 @@ ix86_expand_move (machine_mode mode, rtx operands[])
   if ((flag_pic || MACHOPIC_INDIRECT)
   && symbolic_operand (op1, mode))
 {
+#if TARGET_MACHO
   if (TARGET_MACHO && !TARGET_64BIT)
{
-#if TARGET_MACHO
  /* dynamic-no-pic */
  if (MACHOPIC_INDIRECT)
{
@@ -490,33 +490,18 @@ ix86_expand_move (machine_mode mode, rtx operands[])
  emit_insn (insn);
  return;
}
- if (GET_CODE (op0) == MEM)
-   op1 = force_reg (Pmode, op1);
- else
-   {
- rtx temp = op0;
- if (GET_CODE (temp) != REG)
-   temp = gen_reg_rtx (Pmode);
- temp = legitimize_pic_address (op1, temp);
- if (temp == op0)
-   return;
- op1 = temp;
-   }
-  /* dynamic-no-pic */
-#endif
}
-  else
+#endif
+
+  if (MEM_P (op0))
+   op1 = force_reg (mode, op1);
+  else if (!(TARGET_64BIT && x86_64_movabs_operand (op1, DImode)))
{
- if (MEM_P (op0))
-   op1 = force_reg (mode, op1);
- else if (!(TARGET_64BIT && x86_64_movabs_operand (op1, DImode)))
-   {
- rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
- op1 = legitimize_pic_address (op1, reg);
- if (op0 == op1)
-   return;
- op1 = convert_to_mode (mode, op1, 1);
-   }
+ rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
+ op1 = legitimize_pic_address (op1, reg);
+ if (op0 == op1)
+   return;
+ op1 = convert_to_mode (mode, op1, 1);
}
 }
   else


[gcc(refs/users/rguenth/heads/vect-force-slp)] Avoid bogus SLP outer loop vectorization

2024-03-06 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:d1b89155e4bc4f42a06a36b64208216e5d37e779

commit d1b89155e4bc4f42a06a36b64208216e5d37e779
Author: Richard Biener 
Date:   Wed Mar 6 15:13:05 2024 +0100

Avoid bogus SLP outer loop vectorization

This fixes the check for multiple types which go wrong I think
because of bogus pointer IV increments when there are multiple
copies of vector stmts in the inner loop.

* tree-vect-stmts.cc (vectorizable_load): Avoid outer loop
SLP vectorization with multi-copy vector stmts in the inner
loop.
(vectorizable_store): Likewise.

Diff:
---
 gcc/tree-vect-stmts.cc | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index 8766ef220c2..2bee550f4a9 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -8176,7 +8176,9 @@ vectorizable_store (vec_info *vinfo,
   gcc_assert (ncopies >= 1);
 
   /* FORNOW.  This restriction should be relaxed.  */
-  if (loop && nested_in_vect_loop_p (loop, stmt_info) && ncopies > 1)
+  if (loop
+  && nested_in_vect_loop_p (loop, stmt_info)
+  && (ncopies > 1 || (slp && SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) > 1)))
 {
   if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -9917,7 +9919,8 @@ vectorizable_load (vec_info *vinfo,
   gcc_assert (ncopies >= 1);
 
   /* FORNOW. This restriction should be relaxed.  */
-  if (nested_in_vect_loop && ncopies > 1)
+  if (nested_in_vect_loop
+  && (ncopies > 1 || (slp && SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) > 1)))
 {
   if (dump_enabled_p ())
 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,


[gcc r14-9337] amdgcn: additional gfx1030/gfx1100 support: adjust test cases

2024-03-06 Thread Thomas Schwinge via Gcc-cvs
https://gcc.gnu.org/g:71aad5231447484046b45e6c8381f8096d3c287d

commit r14-9337-g71aad5231447484046b45e6c8381f8096d3c287d
Author: Thomas Schwinge 
Date:   Mon Mar 4 10:40:39 2024 +0100

amdgcn: additional gfx1030/gfx1100 support: adjust test cases

The "SDWA" changes in commit 99890e15527f1f04caef95ecdd135c9f1a077f08
"amdgcn: additional gfx1030/gfx1100 support" caused a few regressions:

PASS: gcc.target/gcn/sram-ecc-3.c (test for excess errors)
[-PASS:-]{+FAIL:+} gcc.target/gcn/sram-ecc-3.c scan-assembler 
zero_extendv64qiv64si2

PASS: gcc.target/gcn/sram-ecc-4.c (test for excess errors)
[-PASS:-]{+FAIL:+} gcc.target/gcn/sram-ecc-4.c scan-assembler 
zero_extendv64hiv64si2

PASS: gcc.target/gcn/sram-ecc-7.c (test for excess errors)
[-PASS:-]{+FAIL:+} gcc.target/gcn/sram-ecc-7.c scan-assembler 
zero_extendv64qiv64si2

PASS: gcc.target/gcn/sram-ecc-8.c (test for excess errors)
[-PASS:-]{+FAIL:+} gcc.target/gcn/sram-ecc-8.c scan-assembler 
zero_extendv64hiv64si2

Those test cases need corresponding adjustment.

gcc/testsuite/
* gcc.target/gcn/sram-ecc-3.c: Adjust.
* gcc.target/gcn/sram-ecc-4.c: Likewise.
* gcc.target/gcn/sram-ecc-7.c: Likewise.
* gcc.target/gcn/sram-ecc-8.c: Likewise.

Diff:
---
 gcc/testsuite/gcc.target/gcn/sram-ecc-3.c | 2 +-
 gcc/testsuite/gcc.target/gcn/sram-ecc-4.c | 2 +-
 gcc/testsuite/gcc.target/gcn/sram-ecc-7.c | 2 +-
 gcc/testsuite/gcc.target/gcn/sram-ecc-8.c | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/gcc/testsuite/gcc.target/gcn/sram-ecc-3.c 
b/gcc/testsuite/gcc.target/gcn/sram-ecc-3.c
index 692d4578b66..bc89e3542d2 100644
--- a/gcc/testsuite/gcc.target/gcn/sram-ecc-3.c
+++ b/gcc/testsuite/gcc.target/gcn/sram-ecc-3.c
@@ -18,4 +18,4 @@ f ()
 a[n] = b[n];
 }
 
-/* { dg-final { scan-assembler "zero_extendv64qiv64si2" } } */
+/* { dg-final { scan-assembler 
"(\\\*zero_extendv64qiv64si_sdwa|\\\*zero_extendv64qiv64si_shift)" } } */
diff --git a/gcc/testsuite/gcc.target/gcn/sram-ecc-4.c 
b/gcc/testsuite/gcc.target/gcn/sram-ecc-4.c
index 61b8d552759..ff7e2d0bda5 100644
--- a/gcc/testsuite/gcc.target/gcn/sram-ecc-4.c
+++ b/gcc/testsuite/gcc.target/gcn/sram-ecc-4.c
@@ -18,4 +18,4 @@ f ()
 a[n] = b[n];
 }
 
-/* { dg-final { scan-assembler "zero_extendv64hiv64si2" } } */
+/* { dg-final { scan-assembler 
"(\\\*zero_extendv64hiv64si_sdwa|\\\*zero_extendv64hiv64si_shift)" } } */
diff --git a/gcc/testsuite/gcc.target/gcn/sram-ecc-7.c 
b/gcc/testsuite/gcc.target/gcn/sram-ecc-7.c
index 9d0ce6f6b5a..8d363970ffb 100644
--- a/gcc/testsuite/gcc.target/gcn/sram-ecc-7.c
+++ b/gcc/testsuite/gcc.target/gcn/sram-ecc-7.c
@@ -18,4 +18,4 @@ f ()
 a[n] = b[n];
 }
 
-/* { dg-final { scan-assembler "zero_extendv64qiv64si2" } } */
+/* { dg-final { scan-assembler 
"(\\\*zero_extendv64qiv64si_sdwa|\\\*zero_extendv64qiv64si_shift)" } } */
diff --git a/gcc/testsuite/gcc.target/gcn/sram-ecc-8.c 
b/gcc/testsuite/gcc.target/gcn/sram-ecc-8.c
index 76e02882798..a2b25076ed1 100644
--- a/gcc/testsuite/gcc.target/gcn/sram-ecc-8.c
+++ b/gcc/testsuite/gcc.target/gcn/sram-ecc-8.c
@@ -18,4 +18,4 @@ f ()
 a[n] = b[n];
 }
 
-/* { dg-final { scan-assembler "zero_extendv64hiv64si2" } } */
+/* { dg-final { scan-assembler 
"(\\\*zero_extendv64hiv64si_sdwa|\\\*zero_extendv64hiv64si_shift)" } } */


[gcc r14-9335] tree-optimization/114239 - rework reduction epilogue driving

2024-03-06 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:89c443a7e9a0780a52a698fb02d4f5173e025918

commit r14-9335-g89c443a7e9a0780a52a698fb02d4f5173e025918
Author: Richard Biener 
Date:   Wed Mar 6 10:31:02 2024 +0100

tree-optimization/114239 - rework reduction epilogue driving

The following reworks vectorizable_live_operation to pass the
live stmt to vect_create_epilog_for_reduction also for early breaks
and a peeled main exit.  This is to be able to figure the scalar
definition to replace.  This reverts the PR114192 fix as it is
subsumed by this cleanup.

PR tree-optimization/114239
* tree-vect-loop.cc (vect_get_vect_def): Remove.
(vect_create_epilog_for_reduction): The passed in stmt_info
should now be the live stmt that produces the scalar reduction
result.  Revert PR114192 fix.  Base reduction info off
info_for_reduction.  Remove special handling of
early-break/peeled, restore original vector def gathering.
Make sure to pick the correct exit PHIs.
(vectorizable_live_operation): Pass in the proper stmt_info
for early break exits.

* gcc.dg/vect/vect-early-break_122-pr114239.c: New testcase.

Diff:
---
 .../gcc.dg/vect/vect-early-break_122-pr114239.c|  29 ++
 gcc/tree-vect-loop.cc  | 105 +
 2 files changed, 53 insertions(+), 81 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_122-pr114239.c 
b/gcc/testsuite/gcc.dg/vect/vect-early-break_122-pr114239.c
new file mode 100644
index 000..7bf4db14209
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_122-pr114239.c
@@ -0,0 +1,29 @@
+/* { dg-do compile } */
+/* { dg-add-options vect_early_break } */
+/* { dg-require-effective-target vect_early_break } */
+
+int ip4_getbit_a, ip4_getbit_pos, ip4_clrbit_pos;
+void ip4_clrbit(int *a) { *a &= ip4_clrbit_pos; }
+typedef struct {
+  char pxlen;
+  int prefix;
+} net_addr_ip4;
+void fib_get_chain();
+int trie_match_longest_ip4();
+int trie_match_next_longest_ip4(net_addr_ip4 *n) {
+  int __trans_tmp_1;
+  while (n->pxlen) {
+n->pxlen--;
+ip4_clrbit(>prefix);
+__trans_tmp_1 = ip4_getbit_a >> ip4_getbit_pos;
+if (__trans_tmp_1)
+  return 1;
+  }
+  return 0;
+}
+void net_roa_check_ip4_trie_tab() {
+  net_addr_ip4 px0;
+  for (int _n = trie_match_longest_ip4(); _n;
+   _n = trie_match_next_longest_ip4())
+fib_get_chain();
+}
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index 761cdc67570..20ee0aad932 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -5897,35 +5897,6 @@ vect_create_partial_epilog (tree vec_def, tree vectype, 
code_helper code,
   return new_temp;
 }
 
-/* Retrieves the definining statement to be used for a reduction.
-   For LAST_VAL_REDUC_P we use the current VEC_STMTs which correspond to the
-   final value after vectorization and otherwise we look at the reduction
-   definitions to get the first.  */
-
-tree
-vect_get_vect_def (stmt_vec_info reduc_info, slp_tree slp_node,
-  slp_instance slp_node_instance, bool last_val_reduc_p,
-  unsigned i, vec  _stmts)
-{
-  tree def;
-
-  if (slp_node)
-{
-  if (!last_val_reduc_p)
-slp_node = slp_node_instance->reduc_phis;
-  def = vect_get_slp_vect_def (slp_node, i);
-}
-  else
-{
-  if (!last_val_reduc_p)
-   reduc_info = STMT_VINFO_REDUC_DEF (vect_orig_stmt (reduc_info));
-  vec_stmts = STMT_VINFO_VEC_STMTS (reduc_info);
-  def = gimple_get_lhs (vec_stmts[0]);
-}
-
-  return def;
-}
-
 /* Function vect_create_epilog_for_reduction
 
Create code at the loop-epilog to finalize the result of a reduction
@@ -5989,8 +5960,6 @@ vect_create_epilog_for_reduction (loop_vec_info 
loop_vinfo,
  loop-closed PHI of the inner loop which we remember as
  def for the reduction PHI generation.  */
   bool double_reduc = false;
-  bool last_val_reduc_p = LOOP_VINFO_IV_EXIT (loop_vinfo) == loop_exit
- && !LOOP_VINFO_EARLY_BREAKS_VECT_PEELED (loop_vinfo);
   stmt_vec_info rdef_info = stmt_info;
   if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def)
 {
@@ -6000,8 +5969,6 @@ vect_create_epilog_for_reduction (loop_vec_info 
loop_vinfo,
(stmt_info->stmt, 0));
   stmt_info = vect_stmt_to_vectorize (stmt_info);
 }
-  gphi *reduc_def_stmt
-= as_a  (STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info))->stmt);
   code_helper code = STMT_VINFO_REDUC_CODE (reduc_info);
   internal_fn reduc_fn = STMT_VINFO_REDUC_FN (reduc_info);
   tree vectype;
@@ -6066,33 +6033,9 @@ vect_create_epilog_for_reduction (loop_vec_info 
loop_vinfo,
 
   stmt_vec_info single_live_out_stmt[] = { stmt_info };
   array_slice live_out_stmts = single_live_out_stmt;
-  if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo)
-  && loop_exit != LOOP_VINFO_IV_EXIT 

[gcc r14-9334] LoongArch: testsuite: Rewrite {x, }vfcmp-{d, f}.c to avoid named registers

2024-03-06 Thread Xi Ruoyao via Gcc-cvs
https://gcc.gnu.org/g:7719b9be2daa55edf336d721839300e62a7abbdc

commit r14-9334-g7719b9be2daa55edf336d721839300e62a7abbdc
Author: Xi Ruoyao 
Date:   Tue Mar 5 20:46:57 2024 +0800

LoongArch: testsuite: Rewrite {x,}vfcmp-{d,f}.c to avoid named registers

Loops on named vector register are not vectorized (see comment 11 of
PR113622), so the these test cases have been failing for a while.
Rewrite them using check-function-bodies to remove hard coding register
names.  A barrier is needed to always load the first operand before the
second operand.

gcc/testsuite/ChangeLog:

* gcc.target/loongarch/vfcmp-f.c: Rewrite to avoid named
registers.
* gcc.target/loongarch/vfcmp-d.c: Likewise.
* gcc.target/loongarch/xvfcmp-f.c: Likewise.
* gcc.target/loongarch/xvfcmp-d.c: Likewise.

Diff:
---
 gcc/testsuite/gcc.target/loongarch/vfcmp-d.c  | 202 +--
 gcc/testsuite/gcc.target/loongarch/vfcmp-f.c  | 347 --
 gcc/testsuite/gcc.target/loongarch/xvfcmp-d.c | 202 +--
 gcc/testsuite/gcc.target/loongarch/xvfcmp-f.c | 204 +--
 4 files changed, 816 insertions(+), 139 deletions(-)

diff --git a/gcc/testsuite/gcc.target/loongarch/vfcmp-d.c 
b/gcc/testsuite/gcc.target/loongarch/vfcmp-d.c
index 8b870ef38a0..87e4ed19e96 100644
--- a/gcc/testsuite/gcc.target/loongarch/vfcmp-d.c
+++ b/gcc/testsuite/gcc.target/loongarch/vfcmp-d.c
@@ -1,28 +1,188 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -mlsx -ffixed-f0 -ffixed-f1 -ffixed-f2 
-fno-vect-cost-model" } */
+/* { dg-options "-O2 -mlsx -fno-vect-cost-model" } */
+/* { dg-final { check-function-bodies "**" "" } } */
 
 #define F double
 #define I long long
 
 #include "vfcmp-f.c"
 
-/* { dg-final { scan-assembler 
"compare_quiet_equal:.*\tvfcmp\\.ceq\\.d\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_quiet_equal\n"
 } } */
-/* { dg-final { scan-assembler 
"compare_quiet_not_equal:.*\tvfcmp\\.cune\\.d\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_quiet_not_equal\n"
 } } */
-/* { dg-final { scan-assembler 
"compare_signaling_greater:.*\tvfcmp\\.slt\\.d\t\\\$vr2,\\\$vr1,\\\$vr0.*-compare_signaling_greater\n"
 } } */
-/* { dg-final { scan-assembler 
"compare_signaling_greater_equal:.*\tvfcmp\\.sle\\.d\t\\\$vr2,\\\$vr1,\\\$vr0.*-compare_signaling_greater_equal\n"
 } } */
-/* { dg-final { scan-assembler 
"compare_signaling_less:.*\tvfcmp\\.slt\\.d\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_signaling_less\n"
 } } */
-/* { dg-final { scan-assembler 
"compare_signaling_less_equal:.*\tvfcmp\\.sle\\.d\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_signaling_less_equal\n"
 } } */
-/* { dg-final { scan-assembler 
"compare_signaling_not_greater:.*\tvfcmp\\.sule\\.d\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_signaling_not_greater\n"
 } } */
-/* { dg-final { scan-assembler 
"compare_signaling_less_unordered:.*\tvfcmp\\.sult\\.d\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_signaling_less_unordered\n"
 } } */
-/* { dg-final { scan-assembler 
"compare_signaling_not_less:.*\tvfcmp\\.sule\\.d\t\\\$vr2,\\\$vr1,\\\$vr0.*-compare_signaling_not_less\n"
 } } */
-/* { dg-final { scan-assembler 
"compare_signaling_greater_unordered:.*\tvfcmp\\.sult\\.d\t\\\$vr2,\\\$vr1,\\\$vr0.*-compare_signaling_greater_unordered\n"
 } } */
-/* { dg-final { scan-assembler 
"compare_quiet_less:.*\tvfcmp\\.clt\\.d\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_quiet_less\n"
 } } */
-/* { dg-final { scan-assembler 
"compare_quiet_less_equal:.*\tvfcmp\\.cle\\.d\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_quiet_less_equal\n"
 } } */
-/* { dg-final { scan-assembler 
"compare_quiet_greater:.*\tvfcmp\\.clt\\.d\t\\\$vr2,\\\$vr1,\\\$vr0.*-compare_quiet_greater\n"
 } } */
-/* { dg-final { scan-assembler 
"compare_quiet_greater_equal:.*\tvfcmp\\.cle\\.d\t\\\$vr2,\\\$vr1,\\\$vr0.*-compare_quiet_greater_equal\n"
 } } */
-/* { dg-final { scan-assembler 
"compare_quiet_not_less:.*\tvfcmp\\.cule\\.d\t\\\$vr2,\\\$vr1,\\\$vr0.*-compare_quiet_not_less\n"
 } } */
-/* { dg-final { scan-assembler 
"compare_quiet_greater_unordered:.*\tvfcmp\\.cult\\.d\t\\\$vr2,\\\$vr1,\\\$vr0.*-compare_quiet_greater_unordered\n"
 } } */
-/* { dg-final { scan-assembler 
"compare_quiet_not_greater:.*\tvfcmp\\.cule\\.d\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_quiet_not_greater\n"
 } } */
-/* { dg-final { scan-assembler 
"compare_quiet_less_unordered:.*\tvfcmp\\.cult\\.d\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_quiet_less_unordered\n"
 } } */
-/* { dg-final { scan-assembler 
"compare_quiet_unordered:.*\tvfcmp\\.cun\\.d\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_quiet_unordered\n"
 } } */
-/* { dg-final { scan-assembler 
"compare_quiet_ordered:.*\tvfcmp\\.cor\\.d\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_quiet_ordered\n"
 } } */
+/*
+** compare_quiet_equal:
+** vld (\$vr[0-9]+),\$r4,0
+** vld (\$vr[0-9]+),\$r5,0
+** vfcmp.ceq.d (\$vr[0-9]+),(\1,\2|\2,\1)
+** vst \3,\$r6,0
+** jr  \$r1
+*/
+
+/*
+** compare_quiet_not_equal:
+** vld (\$vr[0-9]+),\$r4,0
+** vld 

[gcc r14-9333] aarch64: Define out-of-class static constants

2024-03-06 Thread Richard Sandiford via Gcc-cvs
https://gcc.gnu.org/g:c7a9883663a888617b6e3584233aa756b30519f8

commit r14-9333-gc7a9883663a888617b6e3584233aa756b30519f8
Author: Richard Sandiford 
Date:   Wed Mar 6 10:04:56 2024 +

aarch64: Define out-of-class static constants

While reworking the aarch64 feature descriptions, I forgot
to add out-of-class definitions of some static constants.
This could lead to a build failure with some compilers.

This was seen with some WIP to increase the number of extensions
beyond 64.  It's latent on trunk though, and a regression from
before the rework.

gcc/
* config/aarch64/aarch64-feature-deps.h (feature_deps::info): Add
out-of-class definitions of static constants.

Diff:
---
 gcc/config/aarch64/aarch64-feature-deps.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/gcc/config/aarch64/aarch64-feature-deps.h 
b/gcc/config/aarch64/aarch64-feature-deps.h
index a1b81f9070b..3641badb82f 100644
--- a/gcc/config/aarch64/aarch64-feature-deps.h
+++ b/gcc/config/aarch64/aarch64-feature-deps.h
@@ -71,6 +71,9 @@ template struct info;
 static constexpr auto enable = flag | get_enable REQUIRES; \
 static constexpr auto explicit_on = enable | get_enable EXPLICIT_ON; \
   };   \
+  const aarch64_feature_flags info::flag;  \
+  const aarch64_feature_flags info::enable;\
+  const aarch64_feature_flags info::explicit_on; \
   constexpr info IDENT ()  \
   {\
 return info ();\


[gcc r14-9332] c++: Fix template deduction for conversion operators with xobj parameters [PR113629]

2024-03-06 Thread Nathaniel Shead via Gcc-cvs
https://gcc.gnu.org/g:49d83e963aa453600088380aebd507e172eb80ad

commit r14-9332-g49d83e963aa453600088380aebd507e172eb80ad
Author: Nathaniel Shead 
Date:   Wed Mar 6 00:43:22 2024 +1100

c++: Fix template deduction for conversion operators with xobj parameters 
[PR113629]

Unification for conversion operators (DEDUCE_CONV) doesn't perform
transformations like handling forwarding references. This is correct in
general, but not for xobj parameters, which should be handled "normally"
for the purposes of deduction: [temp.deduct.conv] only applies to the
return type of the conversion function.

PR c++/113629

gcc/cp/ChangeLog:

* pt.cc (type_unification_real): Only use DEDUCE_CONV for the
return type of a conversion function.

gcc/testsuite/ChangeLog:

* g++.dg/cpp23/explicit-obj-conv-op.C: New test.

Signed-off-by: Nathaniel Shead 
Reviewed-by: Jason Merrill 

Diff:
---
 gcc/cp/pt.cc  | 12 +-
 gcc/testsuite/g++.dg/cpp23/explicit-obj-conv-op.C | 49 +++
 2 files changed, 60 insertions(+), 1 deletion(-)

diff --git a/gcc/cp/pt.cc b/gcc/cp/pt.cc
index c4bc54a8fdb..a6e6c804130 100644
--- a/gcc/cp/pt.cc
+++ b/gcc/cp/pt.cc
@@ -23312,10 +23312,18 @@ type_unification_real (tree tparms,
   parameter pack is a non-deduced context.  */
continue;
 
+  /* [temp.deduct.conv] only applies to the deduction of the return
+type, which is always the first argument here.  Other arguments
+(notably, explicit object parameters) should undergo normal
+call-like unification.  */
+  unification_kind_t kind = strict;
+  if (strict == DEDUCE_CONV && ia > 0)
+   kind = DEDUCE_CALL;
+
   arg = args[ia];
   ++ia;
 
-  if (unify_one_argument (tparms, full_targs, parm, arg, subr, strict,
+  if (unify_one_argument (tparms, full_targs, parm, arg, subr, kind,
  explain_p))
return 1;
 }
@@ -23324,6 +23332,8 @@ type_unification_real (tree tparms,
   && parms != void_list_node
   && TREE_CODE (TREE_VALUE (parms)) == TYPE_PACK_EXPANSION)
 {
+  gcc_assert (strict != DEDUCE_CONV);
+
   /* Unify the remaining arguments with the pack expansion type.  */
   tree argvec;
   tree parmvec = make_tree_vec (1);
diff --git a/gcc/testsuite/g++.dg/cpp23/explicit-obj-conv-op.C 
b/gcc/testsuite/g++.dg/cpp23/explicit-obj-conv-op.C
new file mode 100644
index 000..a6ae4ea1dda
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp23/explicit-obj-conv-op.C
@@ -0,0 +1,49 @@
+// PR c++/113629
+// { dg-do compile { target c++23 } }
+
+template  constexpr bool is_lvalue = false;
+template  constexpr bool is_lvalue = true;
+
+struct A {
+  constexpr operator bool(this auto&& self) {
+return is_lvalue;
+  }
+};
+
+constexpr A a;
+static_assert(static_cast(a));
+static_assert((bool)a);
+static_assert(!static_cast(A{}));
+static_assert(!(bool)A{});
+
+struct B : A {};
+
+constexpr B b;
+static_assert(static_cast(b));
+static_assert((bool)b);
+static_assert(!static_cast(B{}));
+static_assert(!(bool)B{});
+
+struct C {
+  template 
+  explicit constexpr operator R(this T&&) {
+return is_lvalue;
+  }
+};
+
+constexpr C c;
+static_assert(static_cast(c));
+static_assert((bool)c);
+static_assert(!static_cast(C{}));
+static_assert(!(bool)C{});
+
+struct D {
+  explicit constexpr operator bool(this const D&) { return true; }
+  explicit constexpr operator bool(this const D&&) { return false; }
+};
+
+constexpr D d;
+static_assert(static_cast(d));
+static_assert((bool)d);
+static_assert(!static_cast(D{}));
+static_assert(!(bool)D{});


[gcc r14-9331] tree-optimization/114249 - ICE with BB reduction vectorization

2024-03-06 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:3a910114fdb2aa76495c4c748acf6b9c7fbecc89

commit r14-9331-g3a910114fdb2aa76495c4c748acf6b9c7fbecc89
Author: Richard Biener 
Date:   Wed Mar 6 09:25:15 2024 +0100

tree-optimization/114249 - ICE with BB reduction vectorization

When we scrap the last def of an odd lane numbered BB reduction
we can end up recording a pattern def which will later wreck
code generation.  The following puts this logic where it better
belongs, avoiding this issue.

PR tree-optimization/114249
* tree-vect-slp.cc (vect_build_slp_instance): Move making
a BB reduction lane number even ...
(vect_slp_check_for_roots): ... here to avoid leaking
pattern defs.

* gcc.dg/vect/bb-slp-pr114249.c: New testcase.

Diff:
---
 gcc/testsuite/gcc.dg/vect/bb-slp-pr114249.c | 20 
 gcc/tree-vect-slp.cc| 20 ++--
 2 files changed, 30 insertions(+), 10 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-pr114249.c 
b/gcc/testsuite/gcc.dg/vect/bb-slp-pr114249.c
new file mode 100644
index 000..64c93cd9a2d
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/bb-slp-pr114249.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+
+enum { SEG_THIN_POOL } read_only;
+struct {
+  unsigned skip_block_zeroing;
+  unsigned ignore_discard;
+  unsigned no_discard_passdown;
+  unsigned error_if_no_space;
+} _thin_pool_emit_segment_line_seg;
+void dm_snprintf();
+void _emit_segment()
+{
+  int features =
+  (_thin_pool_emit_segment_line_seg.error_if_no_space ? 1 : 0) +
+  (read_only ? 1 : 0) +
+  (_thin_pool_emit_segment_line_seg.ignore_discard ? 1 : 0) +
+  (_thin_pool_emit_segment_line_seg.no_discard_passdown ? 1 : 0) +
+  (_thin_pool_emit_segment_line_seg.skip_block_zeroing ? 1 : 0);
+  dm_snprintf(features);
+}
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index 324400db19e..527b06c9f9c 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -3288,15 +3288,6 @@ vect_build_slp_instance (vec_info *vinfo,
 "  %G", scalar_stmts[i]->stmt);
 }
 
-  /* When a BB reduction doesn't have an even number of lanes
- strip it down, treating the remaining lane as scalar.
- ???  Selecting the optimal set of lanes to vectorize would be nice
- but SLP build for all lanes will fail quickly because we think
- we're going to need unrolling.  */
-  if (kind == slp_inst_kind_bb_reduc
-  && (scalar_stmts.length () & 1))
-remain.safe_insert (0, gimple_get_lhs (scalar_stmts.pop ()->stmt));
-
   /* Build the tree for the SLP instance.  */
   unsigned int group_size = scalar_stmts.length ();
   bool *matches = XALLOCAVEC (bool, group_size);
@@ -7549,6 +7540,7 @@ vect_slp_check_for_roots (bb_vec_info bb_vinfo)
  /* ???  For now do not allow mixing ops or externs/constants.  */
  bool invalid = false;
  unsigned remain_cnt = 0;
+ unsigned last_idx = 0;
  for (unsigned i = 0; i < chain.length (); ++i)
{
  if (chain[i].code != code)
@@ -7563,7 +7555,13 @@ vect_slp_check_for_roots (bb_vec_info bb_vinfo)
  (chain[i].op)->stmt)
  != chain[i].op))
remain_cnt++;
+ else
+   last_idx = i;
}
+ /* Make sure to have an even number of lanes as we later do
+all-or-nothing discovery, not trying to split further.  */
+ if ((chain.length () - remain_cnt) & 1)
+   remain_cnt++;
  if (!invalid && chain.length () - remain_cnt > 1)
{
  vec stmts;
@@ -7576,7 +7574,9 @@ vect_slp_check_for_roots (bb_vec_info bb_vinfo)
  stmt_vec_info stmt_info;
  if (chain[i].dt == vect_internal_def
  && ((stmt_info = bb_vinfo->lookup_def (chain[i].op)),
- gimple_get_lhs (stmt_info->stmt) == chain[i].op))
+ gimple_get_lhs (stmt_info->stmt) == chain[i].op)
+ && (i != last_idx
+ || (stmts.length () & 1)))
stmts.quick_push (stmt_info);
  else
remain.quick_push (chain[i].op);


[gcc r14-9330] tree-optimization/114246 - invalid call argument from DSE

2024-03-06 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:0249744a9fe0775c2c895727aeebec4c59fd5f95

commit r14-9330-g0249744a9fe0775c2c895727aeebec4c59fd5f95
Author: Richard Biener 
Date:   Wed Mar 6 09:02:31 2024 +0100

tree-optimization/114246 - invalid call argument from DSE

The following makes sure to strip type conversions added by
build_fold_addr_expr before placing the result in a call argument.

PR tree-optimization/114246
* tree-ssa-dse.cc (increment_start_addr): Strip useless
type conversions from the adjusted address.

* gcc.dg/torture/pr114246.c: New testcase.

Diff:
---
 gcc/testsuite/gcc.dg/torture/pr114246.c | 11 +++
 gcc/tree-ssa-dse.cc |  2 ++
 2 files changed, 13 insertions(+)

diff --git a/gcc/testsuite/gcc.dg/torture/pr114246.c 
b/gcc/testsuite/gcc.dg/torture/pr114246.c
new file mode 100644
index 000..eb20db594cd
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/torture/pr114246.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-w" } */
+
+int a, b;
+
+void
+foo (void)
+{
+  __builtin_memcpy (, (char *) - 1, 2);
+  __builtin_memcpy (, , 1);
+}
diff --git a/gcc/tree-ssa-dse.cc b/gcc/tree-ssa-dse.cc
index 7c348516ddf..fce4fc76a56 100644
--- a/gcc/tree-ssa-dse.cc
+++ b/gcc/tree-ssa-dse.cc
@@ -49,6 +49,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "cfgloop.h"
 #include "tree-data-ref.h"
 #include "internal-fn.h"
+#include "tree-ssa.h"
 
 /* This file implements dead store elimination.
 
@@ -658,6 +659,7 @@ increment_start_addr (gimple *stmt, tree *where, int 
increment)
  *where,
  build_int_cst (ptr_type_node,
 increment)));
+  STRIP_USELESS_TYPE_CONVERSION (*where);
 }
 
 /* STMT is builtin call that writes bytes in bitmap ORIG, some bytes are dead


[gcc r14-9329] i386: Fix up the vzeroupper REG_DEAD/REG_UNUSED note workaround [PR114190]

2024-03-06 Thread Jakub Jelinek via Gcc-cvs
https://gcc.gnu.org/g:1157d5de35b41eabe5ee51d532224864173c37bd

commit r14-9329-g1157d5de35b41eabe5ee51d532224864173c37bd
Author: Jakub Jelinek 
Date:   Wed Mar 6 09:35:37 2024 +0100

i386: Fix up the vzeroupper REG_DEAD/REG_UNUSED note workaround [PR114190]

When writing the rest_of_handle_insert_vzeroupper workaround to manually
remove all the REG_DEAD/REG_UNUSED notes from the IL, I've missed that
there is a df_analyze () call right after it and that the problems added
earlier in the pass, like df_note_add_problem () done during mode switching,
doesn't affect just the next df_analyze () call right after it, but all
other df_analyze () calls until the end of the current pass where
df_finish_pass removes the optional problems.

So, as can be seen on the following patch, the workaround doesn't actually
work there, because while rest_of_handle_insert_vzeroupper carefully removes
all REG_DEAD/REG_UNUSED notes, the df_analyze () call at the end of the
function immediately adds them in again (so, I must say I have no idea
why the workaround worked on the earlier testcases).

Now, I could move the df_analyze () call just before the REG_DEAD/REG_UNUSED
note removal loop, but I think the following patch is better, because
the df_analyze () call doesn't have to recompute the problem when we don't
care about it and will actively strip all traces of it away.

2024-03-06  Jakub Jelinek  

PR rtl-optimization/114190
* config/i386/i386-features.cc (rest_of_handle_insert_vzeroupper):
Call df_remove_problem for df_note before calling df_analyze.

* gcc.target/i386/avx-pr114190.c: New test.

Diff:
---
 gcc/config/i386/i386-features.cc |  1 +
 gcc/testsuite/gcc.target/i386/avx-pr114190.c | 27 +++
 2 files changed, 28 insertions(+)

diff --git a/gcc/config/i386/i386-features.cc b/gcc/config/i386/i386-features.cc
index d3b9ae81025..1de2a07ed75 100644
--- a/gcc/config/i386/i386-features.cc
+++ b/gcc/config/i386/i386-features.cc
@@ -2690,6 +2690,7 @@ rest_of_handle_insert_vzeroupper (void)
}
}
 
+  df_remove_problem (df_note);
   df_analyze ();
   return 0;
 }
diff --git a/gcc/testsuite/gcc.target/i386/avx-pr114190.c 
b/gcc/testsuite/gcc.target/i386/avx-pr114190.c
new file mode 100644
index 000..fc5b2615de2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx-pr114190.c
@@ -0,0 +1,27 @@
+/* PR rtl-optimization/114190 */
+/* { dg-do run { target avx } } */
+/* { dg-options "-O2 -fno-dce -fharden-compares -mavx 
--param=max-rtl-if-conversion-unpredictable-cost=136 -mno-avx512f -Wno-psabi" } 
*/
+
+#include "avx-check.h"
+
+typedef unsigned char U __attribute__((vector_size (64)));
+typedef unsigned int V __attribute__((vector_size (64)));
+U u;
+
+V
+foo (V a, V b)
+{
+  u[0] = __builtin_sub_overflow (0, (int) a[0], [b[7] & 5]) ? -u[1] : -b[3];
+  b ^= 0 != b;
+  return (V) u + (V) a + (V) b;
+}
+
+static void
+avx_test (void)
+{
+  V x = foo ((V) { 1 }, (V) { 0, 0, 0, 1 });
+  if (x[0] != -1U)
+__builtin_abort ();
+  if (x[3] != -2U)
+__builtin_abort ();
+}