Re: [PATCH 1/3] Fix printing COMPOUND_EXPR in .original [PR23872]

2024-05-03 Thread Richard Biener
On Thu, May 2, 2024 at 11:40 PM Andrew Pinski  wrote:
>
> Starting with the merge of the openmp branch into the trunk
> (r0-73077-g953ff28998b59b), COMPOUND_EXPR started to be printed
> as `expr; , expr` which is wrong. This was due to the wrong
> conversion of dumping_stmts into `!(flags & TDF_SLIM)`. That is wrong
> as we are not dumping stmts at this point (`!(flags & TDF_SLIM)` was always
> true for this case as TDF_SLIM case was handled before hand). So switch it
> to be always false.
>
> Bootstrapped and tested on x86_64-linux-gnu with no regressions.

OK.

> gcc/ChangeLog:
>
> PR middle-end/23872
> * tree-pretty-print.cc (dump_generic_node ): Fix
> calls to dump_generic_node and also remove unreachable code that is 
> testing
> `flags & TDF_SLIM`.
>
> gcc/testsuite/ChangeLog:
>
> * gfortran.dg/gomp/atomic-21.f90: Update testcase for the removal of 
> `;`.
>
> Signed-off-by: Andrew Pinski 
> ---
>  gcc/testsuite/gfortran.dg/gomp/atomic-21.f90 |  4 ++--
>  gcc/tree-pretty-print.cc | 24 ++--
>  2 files changed, 9 insertions(+), 19 deletions(-)
>
> diff --git a/gcc/testsuite/gfortran.dg/gomp/atomic-21.f90 
> b/gcc/testsuite/gfortran.dg/gomp/atomic-21.f90
> index febcdbbacfb..35099294d7a 100644
> --- a/gcc/testsuite/gfortran.dg/gomp/atomic-21.f90
> +++ b/gcc/testsuite/gfortran.dg/gomp/atomic-21.f90
> @@ -56,7 +56,7 @@ subroutine foobar()
>  endif
>
>  !  TARGET_EXPR  = #pragma omp atomic capture acq_rel
> -!TARGET_EXPR  = NON_LVALUE_EXPR  = 
> *TARGET_EXPR  == oo> ? pp : *TARGET_EXPR ;, if 
> (TARGET_EXPR )
> +!TARGET_EXPR  = NON_LVALUE_EXPR  = 
> *TARGET_EXPR  == oo> ? pp : *TARGET_EXPR , if 
> (TARGET_EXPR )
>  !{
>  !  <<< Unknown tree: void_cst >>>
>  !}
> @@ -66,7 +66,7 @@ subroutine foobar()
>  !};
>  !
>  ! { dg-final { scan-tree-dump-times "TARGET_EXPR  = #pragma 
> omp atomic capture acq_rel" 1 "original" } }
> -! { dg-final { scan-tree-dump-times "TARGET_EXPR  = 
> NON_LVALUE_EXPR  = \\*TARGET_EXPR  > == oo> \\? pp : \\*TARGET_EXPR ;, if \\(TARGET_EXPR 
> \\)" 1 "original" } }
> +! { dg-final { scan-tree-dump-times "TARGET_EXPR  = 
> NON_LVALUE_EXPR  = \\*TARGET_EXPR  > == oo> \\? pp : \\*TARGET_EXPR , if \\(TARGET_EXPR 
> \\)" 1 "original" } }
>  ! { dg-final { scan-tree-dump-times "<<< Unknown tree: void_cst >>>" 1 
> "original" } }
>  ! { dg-final { scan-tree-dump-times "qq = TARGET_EXPR ;" 1 
> "original" } }
>
> diff --git a/gcc/tree-pretty-print.cc b/gcc/tree-pretty-print.cc
> index c935a7da7d1..f9ad8562078 100644
> --- a/gcc/tree-pretty-print.cc
> +++ b/gcc/tree-pretty-print.cc
> @@ -2856,31 +2856,21 @@ dump_generic_node (pretty_printer *pp, tree node, int 
> spc, dump_flags_t flags,
>   }
>
> dump_generic_node (pp, TREE_OPERAND (node, 0),
> -  spc, flags, !(flags & TDF_SLIM));
> -   if (flags & TDF_SLIM)
> - newline_and_indent (pp, spc);
> -   else
> - {
> -   pp_comma (pp);
> -   pp_space (pp);
> - }
> +  spc, flags, false);
> +   pp_comma (pp);
> +   pp_space (pp);
>
> for (tp = _OPERAND (node, 1);
>  TREE_CODE (*tp) == COMPOUND_EXPR;
>  tp = _OPERAND (*tp, 1))
>   {
> dump_generic_node (pp, TREE_OPERAND (*tp, 0),
> -  spc, flags, !(flags & TDF_SLIM));
> -   if (flags & TDF_SLIM)
> - newline_and_indent (pp, spc);
> -   else
> - {
> -   pp_comma (pp);
> -   pp_space (pp);
> - }
> +  spc, flags, false);
> +   pp_comma (pp);
> +   pp_space (pp);
>   }
>
> -   dump_generic_node (pp, *tp, spc, flags, !(flags & TDF_SLIM));
> +   dump_generic_node (pp, *tp, spc, flags, false);
>}
>break;
>
> --
> 2.43.0
>


[PATCH] Add default bitmap obstack allocation check

2024-05-03 Thread Richard Biener
The following adds a check that the global bitmap obstack is initialized
when allocating a bitmap from it.

Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.

* bitmap.cc (bitmap_alloc): When using the global bitmap obstack
assert that is initialized.
---
 gcc/bitmap.cc | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/gcc/bitmap.cc b/gcc/bitmap.cc
index 459e32c1ad1..0905cde696f 100644
--- a/gcc/bitmap.cc
+++ b/gcc/bitmap.cc
@@ -781,7 +781,10 @@ bitmap_alloc (bitmap_obstack *bit_obstack MEM_STAT_DECL)
   bitmap map;
 
   if (!bit_obstack)
-bit_obstack = _default_obstack;
+{
+  gcc_assert (bitmap_default_obstack_depth > 0);
+  bit_obstack = _default_obstack;
+}
   map = bit_obstack->heads;
   if (map)
 bit_obstack->heads = (class bitmap_head *) map->first;
-- 
2.35.3


Re: [PATCH v2] Silence two instances of -Wcalloc-transposed-args

2024-05-03 Thread Richard Biener
On Fri, May 3, 2024 at 10:03 AM Peter Damianov  wrote:
>
> libgcc/
> * libgcov-util.c (tag_counters): Swap order of arguments to xcalloc.
> (topen_to_memory_representation): Likewise.

Thanks, I pushed this for you.

Richard.

> Signed-off-by: Peter Damianov 
> ---
>  libgcc/libgcov-util.c | 6 +++---
>  1 file changed, 3 insertions(+), 3 deletions(-)
>
> diff --git a/libgcc/libgcov-util.c b/libgcc/libgcov-util.c
> index ba4b90a480d..f443408c4ab 100644
> --- a/libgcc/libgcov-util.c
> +++ b/libgcc/libgcov-util.c
> @@ -211,8 +211,8 @@ tag_counters (unsigned tag, int length)
>gcc_assert (k_ctrs[tag_ix].num == 0);
>k_ctrs[tag_ix].num = n_counts;
>
> -  k_ctrs[tag_ix].values = values = (gcov_type *) xcalloc (sizeof (gcov_type),
> - n_counts);
> +  k_ctrs[tag_ix].values = values = (gcov_type *) xcalloc (n_counts,
> + sizeof (gcov_type));
>gcc_assert (values);
>
>if (length > 0)
> @@ -526,7 +526,7 @@ topn_to_memory_representation (struct gcov_ctr_info *info)
>if (n > 0)
> {
>   struct gcov_kvp *tuples
> -   = (struct gcov_kvp *)xcalloc (sizeof (struct gcov_kvp), n);
> +   = (struct gcov_kvp *)xcalloc (n, sizeof (struct gcov_kvp));
>   for (unsigned i = 0; i < n - 1; i++)
> tuples[i].next = [i + 1];
>   for (unsigned i = 0; i < n; i++)
> --
> 2.39.2
>


Re: [PATCH] libstdc++: Update powerpc-linux-gnu baseline_symbols

2024-05-03 Thread Richard Biener
On Fri, May 3, 2024 at 12:55 PM Jonathan Wakely  wrote:
>
> On Fri, 3 May 2024 at 11:51, Jonathan Wakely  wrote:
> >
> > On Fri, 3 May 2024 at 10:30, Andreas Schwab wrote:
> > >
> > > * config/abi/post/powerpc-linux-gnu/baseline_symbols.txt: Update.
> > > * config/abi/post/powerpc64-linux-gnu/32/baseline_symbols.txt: 
> > > Update.
> >
> > OK thanks
>
> Also OK for gcc-14 as far as I'm concerned, but it needs RM approval
> for that now.

OK for the branch as well.

Richard.

>
> >
> >
> > > ---
> > >  .../powerpc-linux-gnu/baseline_symbols.txt| 98 +++
> > >  .../32/baseline_symbols.txt   | 98 +++
> > >  2 files changed, 196 insertions(+)
> > >
> > > diff --git 
> > > a/libstdc++-v3/config/abi/post/powerpc-linux-gnu/baseline_symbols.txt 
> > > b/libstdc++-v3/config/abi/post/powerpc-linux-gnu/baseline_symbols.txt
> > > index 0209003243a..c38386543b6 100644
> > > --- a/libstdc++-v3/config/abi/post/powerpc-linux-gnu/baseline_symbols.txt
> > > +++ b/libstdc++-v3/config/abi/post/powerpc-linux-gnu/baseline_symbols.txt
> > > @@ -497,7 +497,12 @@ FUNC:_ZNKSt11__timepunctIwE7_M_daysEPPKw@@GLIBCXX_3.4
> > >  FUNC:_ZNKSt11__timepunctIwE8_M_am_pmEPPKw@@GLIBCXX_3.4
> > >  FUNC:_ZNKSt11__timepunctIwE9_M_monthsEPPKw@@GLIBCXX_3.4
> > >  FUNC:_ZNKSt11logic_error4whatEv@@GLIBCXX_3.4
> > > +FUNC:_ZNKSt12__basic_fileIcE13native_handleEv@@GLIBCXX_3.4.33
> > >  FUNC:_ZNKSt12__basic_fileIcE7is_openEv@@GLIBCXX_3.4
> > > +FUNC:_ZNKSt12__shared_ptrINSt10filesystem28recursive_directory_iterator10_Dir_stackELN9__gnu_cxx12_Lock_policyE2EEcvbEv@@GLIBCXX_3.4.31
> > > +FUNC:_ZNKSt12__shared_ptrINSt10filesystem4_DirELN9__gnu_cxx12_Lock_policyE2EEcvbEv@@GLIBCXX_3.4.31
> > > +FUNC:_ZNKSt12__shared_ptrINSt10filesystem7__cxx1128recursive_directory_iterator10_Dir_stackELN9__gnu_cxx12_Lock_policyE2EEcvbEv@@GLIBCXX_3.4.31
> > > +FUNC:_ZNKSt12__shared_ptrINSt10filesystem7__cxx114_DirELN9__gnu_cxx12_Lock_policyE2EEcvbEv@@GLIBCXX_3.4.31
> > >  FUNC:_ZNKSt12bad_weak_ptr4whatEv@@GLIBCXX_3.4.15
> > >  FUNC:_ZNKSt12future_error4whatEv@@GLIBCXX_3.4.14
> > >  FUNC:_ZNKSt12strstreambuf6pcountEv@@GLIBCXX_3.4
> > > @@ -810,6 +815,13 @@ FUNC:_ZNKSt5ctypeIwE8do_widenEPKcS2_Pw@@GLIBCXX_3.4
> > >  FUNC:_ZNKSt5ctypeIwE8do_widenEc@@GLIBCXX_3.4
> > >  FUNC:_ZNKSt5ctypeIwE9do_narrowEPKwS2_cPc@@GLIBCXX_3.4
> > >  FUNC:_ZNKSt5ctypeIwE9do_narrowEwc@@GLIBCXX_3.4
> > > +FUNC:_ZNKSt6chrono4tzdb11locate_zoneESt17basic_string_viewIcSt11char_traitsIcEE@@GLIBCXX_3.4.31
> > > +FUNC:_ZNKSt6chrono4tzdb12current_zoneEv@@GLIBCXX_3.4.31
> > > +FUNC:_ZNKSt6chrono9time_zone15_M_get_sys_infoENS_10time_pointINS_3_V212system_clockENS_8durationIxSt5ratioILx1ELx1EE@@GLIBCXX_3.4.31
> > > +FUNC:_ZNKSt6chrono9time_zone17_M_get_local_infoENS_10time_pointINS_7local_tENS_8durationIxSt5ratioILx1ELx1EE@@GLIBCXX_3.4.31
> > > +FUNC:_ZNKSt6chrono9tzdb_list14const_iteratordeEv@@GLIBCXX_3.4.31
> > > +FUNC:_ZNKSt6chrono9tzdb_list5beginEv@@GLIBCXX_3.4.31
> > > +FUNC:_ZNKSt6chrono9tzdb_list5frontEv@@GLIBCXX_3.4.31
> > >  FUNC:_ZNKSt6locale2id5_M_idEv@@GLIBCXX_3.4
> > >  FUNC:_ZNKSt6locale4nameB5cxx11Ev@@GLIBCXX_3.4.21
> > >  FUNC:_ZNKSt6locale4nameEv@@GLIBCXX_3.4
> > > @@ -3285,9 +3297,18 @@ 
> > > FUNC:_ZNSt6__norm15_List_node_base7_M_hookEPS0_@@GLIBCXX_3.4.14
> > >  FUNC:_ZNSt6__norm15_List_node_base7reverseEv@@GLIBCXX_3.4.9
> > >  FUNC:_ZNSt6__norm15_List_node_base8transferEPS0_S1_@@GLIBCXX_3.4.9
> > >  FUNC:_ZNSt6__norm15_List_node_base9_M_unhookEv@@GLIBCXX_3.4.14
> > > +FUNC:_ZNSt6chrono11locate_zoneESt17basic_string_viewIcSt11char_traitsIcEE@@GLIBCXX_3.4.31
> > > +FUNC:_ZNSt6chrono11reload_tzdbEv@@GLIBCXX_3.4.31
> > > +FUNC:_ZNSt6chrono12current_zoneEv@@GLIBCXX_3.4.31
> > >  FUNC:_ZNSt6chrono12system_clock3nowEv@@GLIBCXX_3.4.11
> > > +FUNC:_ZNSt6chrono13get_tzdb_listEv@@GLIBCXX_3.4.31
> > > +FUNC:_ZNSt6chrono14remote_versionB5cxx11Ev@@GLIBCXX_3.4.31
> > >  FUNC:_ZNSt6chrono3_V212steady_clock3nowEv@@GLIBCXX_3.4.19
> > >  FUNC:_ZNSt6chrono3_V212system_clock3nowEv@@GLIBCXX_3.4.19
> > > +FUNC:_ZNSt6chrono8get_tzdbEv@@GLIBCXX_3.4.31
> > > +FUNC:_ZNSt6chrono9tzdb_list11erase_afterENS0_14const_iteratorE@@GLIBCXX_3.4.31
> > > +FUNC:_ZNSt6chrono9tzdb_list14const_iteratorppEi@@GLIBCXX_3.4.31
> > > +FUNC:_ZNSt6chrono9tzdb_list14const_iteratorppEv@@GLIBCXX_3.4.31
> > >  FUNC:_ZNSt6gslice8_IndexerC1EjRKSt8valarrayIjES4_@@GLIBCXX_3.4
> > >  FUNC:_ZNSt6gslice8_IndexerC2EjRKSt8valarrayIjES4_@@GLIBCXX_3.4
> > >  FUNC:_ZNSt6locale11_M_coalesceERKS_S1_i@@GLIBCXX_3.4
> > > @@ -3384,6 +3405,7 @@ 
> > > FUNC:_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE10_M_disposeEv@@GLIBCX
> > >  
> > > FUNC:_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE10_M_replaceEjjPKcj@@GLIBCXX_3.4.21
> > >  
> > > FUNC:_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE10_S_compareEjj@@GLIBCXX_3.4.21
> > >  
> > > FUNC:_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_capacityEj@@GLIBCXX_3.4.21
> > > 

[PATCH] Avoid changing type in the type_hash_canon hash

2024-05-03 Thread Richard Biener
When building a type and type_hash_canon returns an existing type
avoid changing it, in particular its TYPE_CANONICAL.

Bootstrapped and tested on x86_64-unknown-linux-gnu for all languages.

OK for trunk?

Thanks,
Richard.

PR middle-end/114931
* tree.cc (build_array_type_1): Return early when type_hash_canon
returned an older existing type.
(build_function_type): Likewise.
(build_method_type_directly): Likewise.
(build_offset_type): Likewise.
---
 gcc/tree.cc | 12 
 1 file changed, 12 insertions(+)

diff --git a/gcc/tree.cc b/gcc/tree.cc
index 83f3bf306af..780662549fe 100644
--- a/gcc/tree.cc
+++ b/gcc/tree.cc
@@ -7352,7 +7352,10 @@ build_array_type_1 (tree elt_type, tree index_type, bool 
typeless_storage,
   if (shared)
 {
   hashval_t hash = type_hash_canon_hash (t);
+  tree probe_type = t;
   t = type_hash_canon (hash, t);
+  if (t != probe_type)
+   return t;
 }
 
   if (TYPE_CANONICAL (t) == t && set_canonical)
@@ -7509,7 +7512,10 @@ build_function_type (tree value_type, tree arg_types,
 
   /* If we already have such a type, use the old one.  */
   hashval_t hash = type_hash_canon_hash (t);
+  tree probe_type = t;
   t = type_hash_canon (hash, t);
+  if (t != probe_type)
+return t;
 
   /* Set up the canonical type. */
   any_structural_p   = TYPE_STRUCTURAL_EQUALITY_P (value_type);
@@ -7663,7 +7669,10 @@ build_method_type_directly (tree basetype,
 
   /* If we already have such a type, use the old one.  */
   hashval_t hash = type_hash_canon_hash (t);
+  tree probe_type = t;
   t = type_hash_canon (hash, t);
+  if (t != probe_type)
+return t;
 
   /* Set up the canonical type. */
   any_structural_p
@@ -7720,7 +7729,10 @@ build_offset_type (tree basetype, tree type)
 
   /* If we already have such a type, use the old one.  */
   hashval_t hash = type_hash_canon_hash (t);
+  tree probe_type = t;
   t = type_hash_canon (hash, t);
+  if (t != probe_type)
+return t;
 
   if (!COMPLETE_TYPE_P (t))
 layout_type (t);
-- 
2.35.3


Re: [PATCH] PR middle-end/111701: signbit(x*x) vs -fsignaling-nans

2024-05-03 Thread Richard Biener
On Thu, May 2, 2024 at 3:48 PM Roger Sayle  wrote:
>
>
> > From: Richard Biener 
> > On Thu, May 2, 2024 at 11:34 AM Roger Sayle 
> > wrote:
> > >
> > >
> > > > From: Richard Biener  On Fri, Apr 26,
> > > > 2024 at 10:19 AM Roger Sayle 
> > > > wrote:
> > > > >
> > > > > This patch addresses PR middle-end/111701 where optimization of
> > > > > signbit(x*x) using tree_nonnegative_p incorrectly eliminates a
> > > > > floating point multiplication when the operands may potentially be
> > > > > signaling
> > > > NaNs.
> > > > >
> > > > > The above bug fix also provides a solution or work-around to the
> > > > > tricky issue in PR middle-end/111701, that the results of IEEE
> > > > > operations on NaNs are specified to return a NaN result, but fail
> > > > > to
> > > > > (precisely) specify the exact NaN representation of this result.
> > > > > Hence for the operation "-NaN*-NaN" different hardware
> > > > > implementations
> > > > > (targets) return different results.  Ultimately knowing what the
> > > > > resulting NaN "payload" of an operation is can only be known by
> > > > > executing that operation at run-time, and I'd suggest that GCC's
> > > > > -fsignaling-nans provides a mechanism for handling code that uses
> > > > > NaN representations for communication/signaling (which is a
> > > > > different but related
> > > > concept to IEEE's sNaN).
> > > > >
> > > > > One nice thing about this patch, which may or may not be a P2
> > > > > regression fix, is that it only affects (improves) code compiled
> > > > > with -fsignaling-nans so should be extremely safe even for this point 
> > > > > in stage
> > 3.
> > > > >
> > > > > This patch has been tested on x86_64-pc-linux-gnu with make
> > > > > bootstrap and make -k check, both with and without
> > > > > --target_board=unix{-m32} with no new failures.  Ok for mainline?
> > > >
> > > > Hmm, but the bugreports are not about sNaN but about the fact that
> > > > the sign of the NaN produced by 0/0 or by -NaN*-NaN is not well-defined.
> > > > So I don't think this is the correct approach to fix this.  We'd
> > > > instead have to use tree_expr_maybe_nan_p () - and if NaN*NaN cannot
> > > > be -NaN (is that at least
> > > > specified?) then the RECURSE path should still work as well.
> > >
> > > If we ignore the bugzilla PR for now, can we agree that if x is a
> > > signaling NaN, that we shouldn't be eliminating x*x?  i.e. that this
> > > patch fixes a real bug, but perhaps not (precisely) the one described in 
> > > PR
> > middle-end/111701.
> >
> > This might or might not be covered by -fdelete-dead-exceptions - at least 
> > in the
> > past we were OK with removing traps like for -ftrapv (-ftrapv makes signed
> > overflow no longer invoke undefined behavior) or when deleting loads that 
> > might
> > trap (but those would invoke undefined behavior).
> >
> > I bet the C standard doesn't say anything about sNaNs or how an operation 
> > with
> > it has to behave in the abstract machine.  We do document though that it
> > "disables optimizations that may change the number of exceptions visible 
> > with
> > signaling NaNs" which suggests that with -fsignaling-nans we have to 
> > preserve all
> > such traps but I am very sure DCE will simply elide unused ops here (also 
> > for other
> > FP operations with -ftrapping-math - but there we do not document that we
> > preserve all traps).
> >
> > With the patch the multiplication is only preserved because 
> > __builtin_signbit still
> > uses it.  A plain
> >
> > void foo(double x)
> > {
> >x*x;
> > }
> >
> > has the multiplication elided during gimplification already (even at -O0).
>
> void foo(double x)
> {
>   double t = x*x;
> }
>
> when compiled with -fsignaling-nans -fexceptions -fnon-call-exceptions
> doesn't exhibit the above bug.  Perhaps this short-coming of gimplification
> deserves its own Bugzilla PR?

With optimization you need -fno-delete-dead-exceptions to preserve the
multiply.  Btw, the observable trap is there even without -fnon-call-exceptions
and a trap isn't an exception.

So what I do not necessarily ag

Re: [PATCH] tree-inline: Add __builtin_stack_{save,restore} pair about inline calls with calls to alloca [PR113596]

2024-05-03 Thread Richard Biener
On Fri, 3 May 2024, Jakub Jelinek wrote:

> Hi!
> 
> The following patch adds save_NNN = __builtin_stack_save (); ...
> __builtin_stack_restore (save_NNN);
> pair around inline calls which call alloca (alloca calls because of
> VLA vars are ignored in that decision).
> The patch doesn't change anything on whether we try to inline such calls or
> not, it just fixes the behavior when we inline them despite those checks.
> The stack save/restore restores the behavior that alloca acquired regions
> are freed at the end of the containing call.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

OK.

Thanks,
Richard.

> 2024-05-03  Jakub Jelinek  
> 
>   PR middle-end/113596
>   * tree-inline.cc (expand_call_inline): Emit __builtin_stack_save
>   and __builtin_stack_restore calls around inlined functions which
>   call alloca.
> 
>   * gcc.dg/pr113596.c: New test.
>   * gcc.dg/tree-ssa/pr113596.c: New test.
> 
> --- gcc/tree-inline.cc.jj 2024-04-11 11:09:07.274670922 +0200
> +++ gcc/tree-inline.cc2024-05-02 19:05:06.963750322 +0200
> @@ -4794,6 +4794,7 @@ expand_call_inline (basic_block bb, gimp
>use_operand_p use;
>gimple *simtenter_stmt = NULL;
>vec *simtvars_save;
> +  tree save_stack = NULL_TREE;
>  
>/* The gimplifier uses input_location in too many places, such as
>   internal_get_tmp_var ().  */
> @@ -5042,6 +5043,28 @@ expand_call_inline (basic_block bb, gimp
>   GSI_NEW_STMT);
>  }
>  
> +  /* If function to be inlined calls alloca, wrap the inlined function
> + in between save_stack = __builtin_stack_save (); and
> + __builtin_stack_restore (save_stack); calls.  */
> +  if (id->src_cfun->calls_alloca && !gimple_call_noreturn_p (stmt))
> +/* Don't do this for VLA allocations though, just for user alloca
> +   calls.  */
> +for (struct cgraph_edge *e = id->src_node->callees; e; e = 
> e->next_callee)
> +  if (gimple_maybe_alloca_call_p (e->call_stmt)
> +   && !gimple_call_alloca_for_var_p (e->call_stmt))
> + {
> +   tree fn = builtin_decl_implicit (BUILT_IN_STACK_SAVE);
> +   gcall *call = gimple_build_call (fn, 0);
> +   save_stack = make_ssa_name (ptr_type_node);
> +   gimple_call_set_lhs (call, save_stack);
> +   gimple_stmt_iterator si = gsi_last_bb (bb);
> +   gsi_insert_after (, call, GSI_NEW_STMT);
> +   struct cgraph_node *dest = cgraph_node::get_create (fn);
> +   id->dst_node->create_edge (dest, call, bb->count)->inline_failed
> + = CIF_BODY_NOT_AVAILABLE;
> +   break;
> + }
> +
>if (DECL_INITIAL (fn))
>  {
>if (gimple_block (stmt))
> @@ -5165,6 +5188,17 @@ expand_call_inline (basic_block bb, gimp
>   }
>   }
>  
> +  if (save_stack)
> +{
> +  tree fn = builtin_decl_implicit (BUILT_IN_STACK_RESTORE);
> +  gcall *call = gimple_build_call (fn, 1, save_stack);
> +  gsi_insert_before (_gsi, call, GSI_SAME_STMT);
> +  struct cgraph_node *dest = cgraph_node::get_create (fn);
> +  id->dst_node->create_edge (dest, call,
> +  return_block->count)->inline_failed
> + = CIF_BODY_NOT_AVAILABLE;
> +}
> +
>/* Reset the escaped solution.  */
>if (cfun->gimple_df)
>  {
> --- gcc/testsuite/gcc.dg/pr113596.c.jj2024-05-02 15:05:25.048642302 
> +0200
> +++ gcc/testsuite/gcc.dg/pr113596.c   2024-05-02 15:05:25.048642302 +0200
> @@ -0,0 +1,24 @@
> +/* PR middle-end/113596 */
> +/* { dg-do run } */
> +/* { dg-options "-O2" } */
> +
> +__attribute__((noipa)) void
> +bar (char *p, int n)
> +{
> +  p[0] = 1;
> +  p[n - 1] = 2;
> +}
> +
> +static inline __attribute__((always_inline)) void
> +foo (int n)
> +{
> +  char *p = __builtin_alloca (n);
> +  bar (p, n);
> +}
> +
> +int
> +main ()
> +{
> +  for (int i = 2; i < 8192; ++i)
> +foo (i);
> +}
> --- gcc/testsuite/gcc.dg/tree-ssa/pr113596.c.jj   2024-05-02 
> 19:10:29.218455257 +0200
> +++ gcc/testsuite/gcc.dg/tree-ssa/pr113596.c  2024-05-02 19:11:11.211895559 
> +0200
> @@ -0,0 +1,37 @@
> +/* PR middle-end/113596 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-einline" } */
> +/* { dg-final { scan-tree-dump-times "__builtin_stack_save \\\(" 3 "einline" 
> } } */
> +/* { dg-final { scan-tree-dump-times "__builtin_stack_restore \\\(" 3 
> "einline" } } */
> +
> +void baz (char *p, int n);
> +volatile int v;
> +
> +static inline __attribute__((always_inline)) void

Re: [Backport] ifcvt: Don't lower bitfields with non-constant offsets [PR 111882]

2024-05-03 Thread Richard Biener
On Fri, 3 May 2024, Richard Ball wrote:

> Hi,
> 
> Requesting permission to backport:
> https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=24cf1f600b8ad34c68a51f48884e72d01f729893
> to gcc-13 in order to fix:
> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111882
> 
> Applies cleanly and with no regressions.

Go ahead.

Thanks,
Richard.


Re: cfgrtl: Fix MEM_EXPR update in duplicate_insn_chain [PR114924]

2024-05-03 Thread Richard Biener
On Thu, 2 May 2024, Alex Coplan wrote:

> Hi,
> 
> The PR shows that when cfgrtl.cc:duplicate_insn_chain attempts to
> update the MR_DEPENDENCE_CLIQUE information for a MEM_EXPR we can end up
> accidentally dropping (e.g.) an ARRAY_REF from the MEM_EXPR and end up
> replacing it with the underlying MEM_REF.  This leads to an
> inconsistency in the MEM_EXPR information, and could lead to wrong code.
> 
> While the walk down to the MEM_REF is necessary to update
> MR_DEPENDENCE_CLIQUE, we should use the outer tree expression for the
> MEM_EXPR.  This patch does that.
> 
> Bootstrapped/regtested on aarch64-linux-gnu, no regressions.  OK for
> trunk?  What about backports?

OK for trunk and branches, including 14, I think this is quite safe.

Thanks,
Richard.

> Thanks,
> Alex
> 
> gcc/ChangeLog:
> 
>   PR rtl-optimization/114924
>   * cfgrtl.cc (duplicate_insn_chain): When updating MEM_EXPRs,
>   don't strip (e.g.) ARRAY_REFs from the final MEM_EXPR.
> 

-- 
Richard Biener 
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)


Re: [PATCH] MATCH: Maybe expand (T)(A + C1) * C2 and (T)(A + C1) * C2 + C3 [PR109393]

2024-05-02 Thread Richard Biener
On Thu, 2 May 2024, Manolis Tsamis wrote:

> On Thu, May 2, 2024 at 4:00 PM Richard Biener  wrote:
> >
> > On Tue, 23 Apr 2024, Manolis Tsamis wrote:
> >
> > > The original motivation for this pattern was that the following function 
> > > does
> > > not fold to 'return 1':
> > >
> > > int foo(int *a, int j)
> > > {
> > >   int k = j - 1;
> > >   return a[j - 1] == a[k];
> > > }
> > >
> > > The expression ((unsigned long) (X +- C1) * C2) appears frequently as 
> > > part of
> > > address calculations (e.g. arrays). These patterns help fold and simplify 
> > > more
> > > expressions.
> > >
> > >   PR tree-optimization/109393
> > >
> > > gcc/ChangeLog:
> > >
> > >   * match.pd: Add new patterns for ((T)(A +- CST1)) * CST2 and
> > > ((T)(A +- CST1)) * CST2 + CST3.
> > >
> > > gcc/testsuite/ChangeLog:
> > >
> > >   * gcc.dg/pr109393.c: New test.
> > >
> > > Signed-off-by: Manolis Tsamis 
> > > ---
> > >
> > >  gcc/match.pd| 30 ++
> > >  gcc/testsuite/gcc.dg/pr109393.c | 16 
> > >  2 files changed, 46 insertions(+)
> > >  create mode 100644 gcc/testsuite/gcc.dg/pr109393.c
> > >
> > > diff --git a/gcc/match.pd b/gcc/match.pd
> > > index d401e7503e6..13c828ba70d 100644
> > > --- a/gcc/match.pd
> > > +++ b/gcc/match.pd
> > > @@ -3650,6 +3650,36 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> > > (plus (convert @0) (op @2 (convert @1))
> > >  #endif
> > >
> > > +/* ((T)(A + CST1)) * CST2 + CST3
> > > + -> ((T)(A) * CST2) + ((T)CST1 * CST2 + CST3)
> > > +   Where (A + CST1) doesn't need to have a single use.  */
> > > +#if GIMPLE
> > > +  (for op (plus minus)
> > > +   (simplify
> > > +(plus (mult (convert:s (op @0 INTEGER_CST@1)) INTEGER_CST@2) 
> > > INTEGER_CST@3)
> > > + (if (TREE_CODE (TREE_TYPE (@0)) == INTEGER_TYPE
> > > +   && TREE_CODE (type) == INTEGER_TYPE
> > > +   && TYPE_PRECISION (type) > TYPE_PRECISION (TREE_TYPE (@0))
> > > +   && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (@0))
> > > +   && !TYPE_OVERFLOW_SANITIZED (TREE_TYPE (@0))
> > > +   && TYPE_OVERFLOW_WRAPS (type))
> > > +   (op (mult @2 (convert @0)) (plus (mult @2 (convert @1)) @3)
> > > +#endif
> > > +
> > > +/* ((T)(A + CST1)) * CST2 -> ((T)(A) * CST2) + ((T)CST1 * CST2)  */
> > > +#if GIMPLE
> > > +  (for op (plus minus)
> > > +   (simplify
> > > +(mult (convert:s (op:s @0 INTEGER_CST@1)) INTEGER_CST@2)
> > > + (if (TREE_CODE (TREE_TYPE (@0)) == INTEGER_TYPE
> >
> > Please use INTEGRAL_TYPE_P
> >
> > > +   && TREE_CODE (type) == INTEGER_TYPE
> > > +   && TYPE_PRECISION (type) > TYPE_PRECISION (TREE_TYPE (@0))
> > > +   && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (@0))
> > > +   && !TYPE_OVERFLOW_SANITIZED (TREE_TYPE (@0))
> > > +   && TYPE_OVERFLOW_WRAPS (type))
> > > +   (op (mult @2 (convert @0)) (mult @2 (convert @1))
> >
> > (mult @2 (convert @0)) is non-canonical for no good reason if @0
> > isn't constant - constant should be 2nd, please swap operands here.
> >
> > > +#endif
> >
> > The first pattern is an extension of the second, why's the first
> > necessary at all?  The add of CST3 is unchanged (OK, you seem to
> > associate here, but that's again a different thing).
> >
> > I'd say the 2nd pattern is OK with the above changes but the first
> > looks redundant.
> >
> Hi Richard,
> 
> Thanks for the comments, I'll fix these.
> 
> The difference is that the second uses op:s while the first uses just op.
> In the second case if A + CST1 has other uses expanding the pattern
> may not be a good idea but in the first case it always is because we
> know + CST1 * CST2 will merge with + CST3.

I see.  But that pattern misses a :s on the multiplication result then, 
no?  Local pattern-matching isn't the best vehicle to handle multi-use
cases, introducing context dependent canonicalizations can lead to
SCEV analysis no longer matching up for related accesses and then
data dependence analysis failing.  It's been a trade-off here.

Richard.

> 
> Thanks,
> Manolis
> 
> > Thanks,
> > Richard.
> 

[PATCH][v3] tree-optimization/114921 - _Float16 -> __bf16 isn't noop

2024-05-02 Thread Richard Biener
The vectorizer handles a _Float16 to __bf16 conversion through
vectorizable_assignment, thinking it's a noop.  The following
fixes this by requiring the same vector component mode when
checking for CONVERT_EXPR_CODE_P, being stricter than for
VIEW_CONVERT_EXPR.

This variant splits the check for VIEW_CONVERT_EXPR and
CONVERT_EXPR_CODE_P since it otherwise regresses gcc.dg/vect/vect-120.c.

PR tree-optimization/114921
* tree-vect-stmts.cc (vectorizable_assignment): Require
same vector component modes for input and output for
CONVERT_EXPR_CODE_P.
---
 gcc/tree-vect-stmts.cc | 19 +++
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index f8d8636b139..7e571968a59 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -5955,14 +5955,17 @@ vectorizable_assignment (vec_info *vinfo,
   if (!vectype_in)
 vectype_in = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op), slp_node);
 
-  /* We can handle NOP_EXPR conversions that do not change the number
- of elements or the vector size.  */
-  if ((CONVERT_EXPR_CODE_P (code)
-   || code == VIEW_CONVERT_EXPR)
-  && (!vectype_in
- || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in), nunits)
- || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
-  GET_MODE_SIZE (TYPE_MODE (vectype_in)
+  /* We can handle VIEW_CONVERT conversions that do not change the number
+ of elements or the vector size or other conversions when the component
+ mode keeps the same.  */
+  if (!vectype_in
+  || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in), nunits)
+  || (code == VIEW_CONVERT_EXPR
+ && maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
+  GET_MODE_SIZE (TYPE_MODE (vectype_in
+  || (CONVERT_EXPR_CODE_P (code)
+ && (TYPE_MODE (TREE_TYPE (vectype))
+ != TYPE_MODE (TREE_TYPE (vectype_in)
 return false;
 
   if (VECTOR_BOOLEAN_TYPE_P (vectype) != VECTOR_BOOLEAN_TYPE_P (vectype_in))
-- 
2.35.3


[PATCH] Improve SLP dump and graph

2024-05-02 Thread Richard Biener
The following notes which lanes are considered live and adds an overload
to produce a graphviz graph for multiple entries into an SLP graph.

Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.

* tree-vect-slp.cc (vect_print_slp_tree): Mark live lanes.
(dot_slp_tree): New overload for multiple entries.
---
 gcc/tree-vect-slp.cc | 21 -
 1 file changed, 20 insertions(+), 1 deletion(-)

diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index 133606fa6f3..3eb326d20b5 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -2759,7 +2759,9 @@ vect_print_slp_tree (dump_flags_t dump_kind, 
dump_location_t loc,
 }
   if (SLP_TREE_SCALAR_STMTS (node).exists ())
 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
-  dump_printf_loc (metadata, user_loc, "\tstmt %u %G", i, stmt_info->stmt);
+  dump_printf_loc (metadata, user_loc, "\t%sstmt %u %G",
+  STMT_VINFO_LIVE_P (stmt_info) ? "[l] " : "",
+  i, stmt_info->stmt);
   else
 {
   dump_printf_loc (metadata, user_loc, "\t{ ");
@@ -2840,6 +2842,23 @@ dot_slp_tree (const char *fname, slp_tree node)
   fclose (f);
 }
 
+DEBUG_FUNCTION void
+dot_slp_tree (const char *fname, const vec _instances)
+{
+  FILE *f = fopen (fname, "w");
+  fprintf (f, "digraph {\n");
+  fflush (f);
+{
+  debug_dump_context ctx (f);
+  hash_set visited;
+  for (auto inst : slp_instances)
+   dot_slp_tree (f, SLP_INSTANCE_TREE (inst), visited);
+}
+  fflush (f);
+  fprintf (f, "}\n");
+  fclose (f);
+}
+
 /* Dump a slp tree NODE using flags specified in DUMP_KIND.  */
 
 static void
-- 
2.35.3


Re: [PATCH] MATCH: Maybe expand (T)(A + C1) * C2 and (T)(A + C1) * C2 + C3 [PR109393]

2024-05-02 Thread Richard Biener
On Tue, 23 Apr 2024, Manolis Tsamis wrote:

> The original motivation for this pattern was that the following function does
> not fold to 'return 1':
> 
> int foo(int *a, int j)
> {
>   int k = j - 1;
>   return a[j - 1] == a[k];
> }
> 
> The expression ((unsigned long) (X +- C1) * C2) appears frequently as part of
> address calculations (e.g. arrays). These patterns help fold and simplify more
> expressions.
> 
>   PR tree-optimization/109393
> 
> gcc/ChangeLog:
> 
>   * match.pd: Add new patterns for ((T)(A +- CST1)) * CST2 and
> ((T)(A +- CST1)) * CST2 + CST3.
> 
> gcc/testsuite/ChangeLog:
> 
>   * gcc.dg/pr109393.c: New test.
> 
> Signed-off-by: Manolis Tsamis 
> ---
> 
>  gcc/match.pd| 30 ++
>  gcc/testsuite/gcc.dg/pr109393.c | 16 
>  2 files changed, 46 insertions(+)
>  create mode 100644 gcc/testsuite/gcc.dg/pr109393.c
> 
> diff --git a/gcc/match.pd b/gcc/match.pd
> index d401e7503e6..13c828ba70d 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -3650,6 +3650,36 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> (plus (convert @0) (op @2 (convert @1))
>  #endif
>  
> +/* ((T)(A + CST1)) * CST2 + CST3
> + -> ((T)(A) * CST2) + ((T)CST1 * CST2 + CST3)
> +   Where (A + CST1) doesn't need to have a single use.  */
> +#if GIMPLE
> +  (for op (plus minus)
> +   (simplify
> +(plus (mult (convert:s (op @0 INTEGER_CST@1)) INTEGER_CST@2) 
> INTEGER_CST@3)
> + (if (TREE_CODE (TREE_TYPE (@0)) == INTEGER_TYPE
> +   && TREE_CODE (type) == INTEGER_TYPE
> +   && TYPE_PRECISION (type) > TYPE_PRECISION (TREE_TYPE (@0))
> +   && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (@0))
> +   && !TYPE_OVERFLOW_SANITIZED (TREE_TYPE (@0))
> +   && TYPE_OVERFLOW_WRAPS (type))
> +   (op (mult @2 (convert @0)) (plus (mult @2 (convert @1)) @3)
> +#endif
> +
> +/* ((T)(A + CST1)) * CST2 -> ((T)(A) * CST2) + ((T)CST1 * CST2)  */
> +#if GIMPLE
> +  (for op (plus minus)
> +   (simplify
> +(mult (convert:s (op:s @0 INTEGER_CST@1)) INTEGER_CST@2)
> + (if (TREE_CODE (TREE_TYPE (@0)) == INTEGER_TYPE

Please use INTEGRAL_TYPE_P

> +   && TREE_CODE (type) == INTEGER_TYPE
> +   && TYPE_PRECISION (type) > TYPE_PRECISION (TREE_TYPE (@0))
> +   && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (@0))
> +   && !TYPE_OVERFLOW_SANITIZED (TREE_TYPE (@0))
> +   && TYPE_OVERFLOW_WRAPS (type))
> +   (op (mult @2 (convert @0)) (mult @2 (convert @1))

(mult @2 (convert @0)) is non-canonical for no good reason if @0
isn't constant - constant should be 2nd, please swap operands here.

> +#endif

The first pattern is an extension of the second, why's the first
necessary at all?  The add of CST3 is unchanged (OK, you seem to
associate here, but that's again a different thing).

I'd say the 2nd pattern is OK with the above changes but the first
looks redundant.

Thanks,
Richard.

> +
>  /* (T)(A) +- (T)(B) -> (T)(A +- B) only when (A +- B) could be simplified
> to a simple value.  */
>(for op (plus minus)
> diff --git a/gcc/testsuite/gcc.dg/pr109393.c b/gcc/testsuite/gcc.dg/pr109393.c
> new file mode 100644
> index 000..e9051273672
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/pr109393.c
> @@ -0,0 +1,16 @@
> +/* PR tree-optimization/109393 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-optimized" } */
> +/* { dg-final { scan-tree-dump-times "return 1;" 2 "optimized" } } */
> +
> +int foo(int *a, int j)
> +{
> +  int k = j - 1;
> +  return a[j - 1] == a[k];
> +}
> +
> +int bar(int *a, int j)
> +{
> +  int k = j - 1;
> +  return ([j + 1] - 2) == [k];
> +}
> 

-- 
Richard Biener 
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)


Re: [Backport] tree-optimization/114672 - WIDEN_MULT_PLUS_EXPR type mismatch

2024-05-02 Thread Richard Biener
On Thu, 2 May 2024, Richard Ball wrote:

> Hi,
> 
> Requesting permission to backport: 
> https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=912753cc5f18d786e334dd425469fa7f93155661
> to fix the issue listed here:
> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=114672
> in gcc-12 and gcc-13.

I'm fine with backporting if you have bootstrapped/tested them.

Richard.

> Thanks,
> Richard
> 

-- 
Richard Biener 
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)


[PATCH] tree-optimization/114921 - _Float16 -> __bf16 isn't noop

2024-05-02 Thread Richard Biener
The vectorizer handles a _Float16 to __bf16 conversion through
vectorizable_assignment, thinking it's a noop.  The following
fixes this by making the same-size check stricter, requiring
the same vector component mode.

Posted again for the arm CI

PR tree-optimization/114921
* tree-vect-stmts.cc (vectorizable_assignment): Require
same vector component modes for input and output.
---
 gcc/tree-vect-stmts.cc | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index f8d8636b139..adb6ef53254 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -5961,8 +5961,8 @@ vectorizable_assignment (vec_info *vinfo,
|| code == VIEW_CONVERT_EXPR)
   && (!vectype_in
  || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in), nunits)
- || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
-  GET_MODE_SIZE (TYPE_MODE (vectype_in)
+ || (TYPE_MODE (TREE_TYPE (vectype))
+ != TYPE_MODE (TREE_TYPE (vectype_in)
 return false;
 
   if (VECTOR_BOOLEAN_TYPE_P (vectype) != VECTOR_BOOLEAN_TYPE_P (vectype_in))
-- 
2.35.3


Re: [PATCH] tree-optimization/114921 - _Float16 -> __bf16 isn't noop

2024-05-02 Thread Richard Biener
On Thu, 2 May 2024, Richard Biener wrote:

> The vectorizer handles a _Float16 to __bf16 conversion through
> vectorizable_assignment, thinking it's a noop.  The following
> fixes this by making the same-size check stricter, requiring
> the same vector component mode.
> 
> Bootstrap & regtest running on x86_64-unknown-linux-gnu.  I couldn't
> manage to produce a meaningful (runtime) testcase.
> 
>   PR tree-optimization/114921
>   * tree-vect-stmts.cc (vectorizable_assignment): Require
>   same vector component modes for input and output.
> ---
>  gcc/tree-vect-stmts.cc | 3 +--
>  1 file changed, 1 insertion(+), 2 deletions(-)
> 
> diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
> index f8d8636b139..5ec053755a2 100644
> --- a/gcc/tree-vect-stmts.cc
> +++ b/gcc/tree-vect-stmts.cc
> @@ -5961,8 +5961,7 @@ vectorizable_assignment (vec_info *vinfo,
> || code == VIEW_CONVERT_EXPR)
>&& (!vectype_in
> || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in), nunits)
> -   || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
> -GET_MODE_SIZE (TYPE_MODE (vectype_in)
> +   || TYPE_MODE (vectype) != TYPE_MODE (vectype_in)))

- || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
-  GET_MODE_SIZE (TYPE_MODE (vectype_in)
+ || (TYPE_MODE (TREE_TYPE (vectype))
+ != TYPE_MODE (TREE_TYPE (vectype_in)

actually so it matches the comment in the commit message.  ISTR
we have v4si and v2six2 modes on some archs that we possibly want
to inter-operate with through vectorizable_assignment.

Richard.


[PATCH] tree-optimization/114921 - _Float16 -> __bf16 isn't noop

2024-05-02 Thread Richard Biener
The vectorizer handles a _Float16 to __bf16 conversion through
vectorizable_assignment, thinking it's a noop.  The following
fixes this by making the same-size check stricter, requiring
the same vector component mode.

Bootstrap & regtest running on x86_64-unknown-linux-gnu.  I couldn't
manage to produce a meaningful (runtime) testcase.

PR tree-optimization/114921
* tree-vect-stmts.cc (vectorizable_assignment): Require
same vector component modes for input and output.
---
 gcc/tree-vect-stmts.cc | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index f8d8636b139..5ec053755a2 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -5961,8 +5961,7 @@ vectorizable_assignment (vec_info *vinfo,
|| code == VIEW_CONVERT_EXPR)
   && (!vectype_in
  || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in), nunits)
- || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
-  GET_MODE_SIZE (TYPE_MODE (vectype_in)
+ || TYPE_MODE (vectype) != TYPE_MODE (vectype_in)))
 return false;
 
   if (VECTOR_BOOLEAN_TYPE_P (vectype) != VECTOR_BOOLEAN_TYPE_P (vectype_in))
-- 
2.35.3


Re: [PATCH] Silence two instances of -Wcalloc-transposed-args

2024-05-02 Thread Richard Biener
On Mon, Apr 29, 2024 at 1:48 AM Peter Damianov  wrote:
>
> Signed-off-by: Peter Damianov 
> ---
>
> Fixes these warnings:
>
> ../../gcc/gcc/../libgcc/libgcov-util.c: In function 'void 
> tag_counters(unsigned int, int)':
> ../../gcc/gcc/../libgcc/libgcov-util.c:214:59: warning: 'void* calloc(size_t, 
> size_t)' sizes specified with 'sizeof' in the earlier argument and not in the 
> later argument [-Wcalloc-transposed-args]
>   214 |   k_ctrs[tag_ix].values = values = (gcov_type *) xcalloc (sizeof 
> (gcov_type),
>   |   
> ^~
> ../../gcc/gcc/../libgcc/libgcov-util.c:214:59: note: earlier argument should 
> specify number of elements, later size of each element
>
> ../../gcc/gcc/../libgcc/libgcov-util.c: In function 'void 
> topn_to_memory_representation(gcov_ctr_info*)':
> ../../gcc/gcc/../libgcc/libgcov-util.c:529:43: warning: 'void* calloc(size_t, 
> size_t)' sizes specified with 'sizeof' in the earlier argument and not in the 
> later argument [-Wcalloc-transposed-args]
>   529 | = (struct gcov_kvp *)xcalloc (sizeof (struct gcov_kvp), 
> n);
>   |   ^~~~
> ../../gcc/gcc/../libgcc/libgcov-util.c:529:43: note: earlier argument should 
> specify number of elements, later size of each element
>
> I think this can be applied as obvious.

Agreed.  Note that patches need a specially formatted ChangeLog as
part of the commit message,
like for example

libgcc/
* libgcov-util.c (tag_counters): Swap xcalloc arguments.
(topn_to_memory_representation): Likewise.

with tabs to indent.

Richard.

>  libgcc/libgcov-util.c | 6 +++---
>  1 file changed, 3 insertions(+), 3 deletions(-)
>
> diff --git a/libgcc/libgcov-util.c b/libgcc/libgcov-util.c
> index ba4b90a480d..f443408c4ab 100644
> --- a/libgcc/libgcov-util.c
> +++ b/libgcc/libgcov-util.c
> @@ -211,8 +211,8 @@ tag_counters (unsigned tag, int length)
>gcc_assert (k_ctrs[tag_ix].num == 0);
>k_ctrs[tag_ix].num = n_counts;
>
> -  k_ctrs[tag_ix].values = values = (gcov_type *) xcalloc (sizeof (gcov_type),
> - n_counts);
> +  k_ctrs[tag_ix].values = values = (gcov_type *) xcalloc (n_counts,
> + sizeof (gcov_type));
>gcc_assert (values);
>
>if (length > 0)
> @@ -526,7 +526,7 @@ topn_to_memory_representation (struct gcov_ctr_info *info)
>if (n > 0)
> {
>   struct gcov_kvp *tuples
> -   = (struct gcov_kvp *)xcalloc (sizeof (struct gcov_kvp), n);
> +   = (struct gcov_kvp *)xcalloc (n, sizeof (struct gcov_kvp));
>   for (unsigned i = 0; i < n - 1; i++)
> tuples[i].next = [i + 1];
>   for (unsigned i = 0; i < n; i++)
> --
> 2.39.2
>


Re: [PATCH] PR middle-end/111701: signbit(x*x) vs -fsignaling-nans

2024-05-02 Thread Richard Biener
On Thu, May 2, 2024 at 11:34 AM Roger Sayle  wrote:
>
>
> > From: Richard Biener 
> > On Fri, Apr 26, 2024 at 10:19 AM Roger Sayle 
> > wrote:
> > >
> > > This patch addresses PR middle-end/111701 where optimization of
> > > signbit(x*x) using tree_nonnegative_p incorrectly eliminates a
> > > floating point multiplication when the operands may potentially be 
> > > signaling
> > NaNs.
> > >
> > > The above bug fix also provides a solution or work-around to the
> > > tricky issue in PR middle-end/111701, that the results of IEEE
> > > operations on NaNs are specified to return a NaN result, but fail to
> > > (precisely) specify the exact NaN representation of this result.
> > > Hence for the operation "-NaN*-NaN" different hardware implementations
> > > (targets) return different results.  Ultimately knowing what the
> > > resulting NaN "payload" of an operation is can only be known by
> > > executing that operation at run-time, and I'd suggest that GCC's
> > > -fsignaling-nans provides a mechanism for handling code that uses NaN
> > > representations for communication/signaling (which is a different but 
> > > related
> > concept to IEEE's sNaN).
> > >
> > > One nice thing about this patch, which may or may not be a P2
> > > regression fix, is that it only affects (improves) code compiled with
> > > -fsignaling-nans so should be extremely safe even for this point in stage 
> > > 3.
> > >
> > > This patch has been tested on x86_64-pc-linux-gnu with make bootstrap
> > > and make -k check, both with and without --target_board=unix{-m32}
> > > with no new failures.  Ok for mainline?
> >
> > Hmm, but the bugreports are not about sNaN but about the fact that the sign 
> > of
> > the NaN produced by 0/0 or by -NaN*-NaN is not well-defined.
> > So I don't think this is the correct approach to fix this.  We'd instead 
> > have to use
> > tree_expr_maybe_nan_p () - and if NaN*NaN cannot be -NaN (is that at least
> > specified?) then the RECURSE path should still work as well.
>
> If we ignore the bugzilla PR for now, can we agree that if x is a signaling 
> NaN,
> that we shouldn't be eliminating x*x?  i.e. that this patch fixes a real bug, 
> but
> perhaps not (precisely) the one described in PR middle-end/111701.

This might or might not be covered by -fdelete-dead-exceptions - at least in
the past we were OK with removing traps like for -ftrapv (-ftrapv makes
signed overflow no longer invoke undefined behavior) or when deleting
loads that might trap (but those would invoke undefined behavior).

I bet the C standard doesn't say anything about sNaNs or how an operation
with it has to behave in the abstract machine.  We do document though
that it "disables optimizations that may change the number of
exceptions visible with
signaling NaNs" which suggests that with -fsignaling-nans we have to preserve
all such traps but I am very sure DCE will simply elide unused ops here
(also for other FP operations with -ftrapping-math - but there we do
not document
that we preserve all traps).

With the patch the multiplication is only preserved because __builtin_signbit
still uses it.  A plain

void foo(double x)
{
   x*x;
}

has the multiplication elided during gimplification already (even at -O0).

So I don't think the patch is a meaningful improvement as to preserve
multiplications of sNaNs.

Richard.

> Once the signaling NaN case is correctly handled, the use of -fsignaling-nans
> can be used as a workaround for PR 111701, allowing it to perhaps be reduced
> from a P2 to a P3 regression (or even not a bug if the qNaN case is undefined 
> behavior).
> When I wrote this patch I was trying to help with GCC 14's stage 3.
>
> > > 2024-04-26  Roger Sayle  
> > >
> > > gcc/ChangeLog
> > > PR middle-end/111701
> > > * fold-const.cc (tree_binary_nonnegative_warnv_p)  > > MULT_EXPR>:
> > > Split handling of floating point and integer types.  For equal
> > > floating point operands, avoid optimization if the operand may be
> > > a signaling NaN.
> > >
> > > gcc/testsuite/ChangeLog
> > > PR middle-end/111701
> > > * gcc.dg/pr111701-1.c: New test case.
> > > * gcc.dg/pr111701-2.c: Likewise.
> > >
>
>


Re: [C PATCH] PR c/109618: ICE-after-error from error_mark_node.

2024-05-02 Thread Richard Biener
On Tue, Apr 30, 2024 at 5:14 PM Roger Sayle  wrote:
>
> > On Tue, Apr 30, 2024 at 10:23 AM Roger Sayle 
> > wrote:
> > > Hi Richard,
> > > Thanks for looking into this.
> > >
> > > It’s not the call to size_binop_loc (for CEIL_DIV_EXPR) that's
> > > problematic, but the call to fold_convert_loc (loc, size_type_node, 
> > > value) on line
> > 4009 of c-common.cc.
> > > At this point, value is (NOP_EXPR:sizetype (VAR_DECL:error_mark_node)).
> >
> > I see.  Can we catch this when we build (NOP_EXPR:sizetype
> > (VAR_DECL:error_mark_node))
> > and instead have it "build" error_mark_node?
>
> That's the tricky part.  At the point the NOP_EXPR is built the VAR_DECL's 
> type
> is valid.  It's later when this variable gets redefined with a conflicting 
> type that
> the shared VAR_DECL gets modified, setting its type to error_mark_node.
> Mutating this shared node, then potentially introduces error_operand_p at
> arbitrary places deep within an expression.

Ugh, I see.  I wonder if we can avoid setting the VAR_DECL type to
error_mark_node,
it's bad do change existing IL to some random invalid state.  Can we
instead make
future name lookup on the then not merged decl fail?  I suppose we made it
error_mark_node to improve error recovery but as can be seen here the way we
do this leads to issues.

> Fortunately, we only have to
> worry about this in the unusual/exceptional case that seen_error() is true.

But unfortunately there might be many places this would be necessary, so
it doesn't look very maintainable to me.

Marek?

Thanks,
Richard.

>
> > > Ultimately, it's the code in match.pd /* Handle cases of two
> > > conversions in a row.  */ with the problematic line being (match.pd:4748):
> > >   unsigned int inside_prec = element_precision (inside_type);
> > >
> > > Here inside_type is error_mark_node, and so tree type checking in
> > > element_precision throws an internal_error.
> > >
> > > There doesn’t seem to be a good way to fix this in element_precision,
> > > and it's complicated to reorganize the logic in match.pd's "with
> > > clause" inside the (ocvt (icvt@1 @0)), but perhaps a (ocvt
> > (icvt:non_error_type@1 @0))?
> > >
> > > The last place/opportunity the front-end could sanitize this operand
> > > before passing the dubious tree to the middle-end is
> > > c_sizeof_or_alignof_type (which alas doesn't appear in the backtrace due 
> > > to
> > inlining).
> > >
> > > #5  0x0227b0e9 in internal_error (
> > > gmsgid=gmsgid@entry=0x249c7b8 "tree check: expected class %qs,
> > > have %qs (%s) in %s, at %s:%d") at ../../gcc/gcc/diagnostic.cc:2232
> > > #6  0x0081e32a in tree_class_check_failed (node=0x76c1ef30,
> > > cl=cl@entry=tcc_type, file=file@entry=0x2495f3f 
> > > "../../gcc/gcc/tree.cc",
> > > line=line@entry=6795, function=function@entry=0x24961fe
> > "element_precision")
> > > at ../../gcc/gcc/tree.cc:9005
> > > #7  0x0081ef4c in tree_class_check (__t=,
> > __class=tcc_type,
> > > __f=0x2495f3f "../../gcc/gcc/tree.cc", __l=6795,
> > > __g=0x24961fe "element_precision") at ../../gcc/gcc/tree.h:4067
> > > #8  element_precision (type=, type@entry=0x76c1ef30)
> > > at ../../gcc/gcc/tree.cc:6795
> > > #9  0x017f66a4 in generic_simplify_CONVERT_EXPR (loc=201632,
> > > code=, type=0x76c3e7e0, _p0=0x76dc95c0)
> > > at generic-match-6.cc:3386
> > > #10 0x00c1b18c in fold_unary_loc (loc=201632, code=NOP_EXPR,
> > > type=0x76c3e7e0, op0=0x76dc95c0) at
> > > ../../gcc/gcc/fold-const.cc:9523
> > > #11 0x00c1d94a in fold_build1_loc (loc=201632, code=NOP_EXPR,
> > > type=0x76c3e7e0, op0=0x76dc95c0) at
> > > ../../gcc/gcc/fold-const.cc:14165
> > > #12 0x0094068c in c_expr_sizeof_expr (loc=loc@entry=201632,
> > expr=...)
> > > at ../../gcc/gcc/tree.h:3771
> > > #13 0x0097f06c in c_parser_sizeof_expression (parser= > out>)
> > > at ../../gcc/gcc/c/c-parser.cc:9932
> > >
> > >
> > > I hope this explains what's happening.  The size_binop_loc call is a
> > > bit of a red herring that returns the same tree it is given (as
> > > TYPE_PRECISION (char_type_node) == BITS_PER_UNIT), so it's the
> > > "TYPE_SIZE_UNIT (type)" which needs to be checked for the embedded
> > 

Re: [PATCH v3 1/2] Driver: Add new -truncate option

2024-05-02 Thread Richard Biener
On Mon, Apr 29, 2024 at 1:27 AM Peter0x44  wrote:
>
> 29 Apr 2024 12:16:26 am Peter Damianov :
>
> > This commit adds a new option to the driver that truncates one file
> > after
> > linking.
> >
> > Tested likeso:
> >
> > $ gcc hello.c -c
> > $ du -h hello.o
> > 4.0K  hello.o
> > $ gcc hello.o -truncate hello.o
> > $ ./a.out
> > Hello world
> > $ du -h hello.o
> > $ 0   hello.o
> >
> > $ gcc hello.o -truncate
> > gcc: error: missing filename after '-truncate'
> >
> > The motivation for adding this is PR110710. It is used by lto-wrapper
> > to
> > truncate files in a shell-independent manner.
> >
> > Signed-off-by: Peter Damianov 
> > ---
> > gcc/common.opt |  6 ++
> > gcc/gcc.cc | 14 ++
> > 2 files changed, 20 insertions(+)
> >
> > diff --git a/gcc/common.opt b/gcc/common.opt
> > index ad348844775..40cab3cb36a 100644
> > --- a/gcc/common.opt
> > +++ b/gcc/common.opt
> > @@ -422,6 +422,12 @@ Display target specific command line options
> > (including assembler and linker opt
> > -time
> > Driver Alias(time)
> >
> > +;; Truncate the file specified after linking.
> > +;; This option is used by lto-wrapper to reduce the peak disk-usage
> > when
> > +;; linking with many .LTRANS units.
> > +truncate
> > +Driver Separate Undocumented MissingArgError(missing filename after
> > %qs)
> > +
> > -verbose
> > Driver Alias(v)
> >
> > diff --git a/gcc/gcc.cc b/gcc/gcc.cc
> > index 728332b8153..830a4700a87 100644
> > --- a/gcc/gcc.cc
> > +++ b/gcc/gcc.cc
> > @@ -2138,6 +2138,10 @@ static int have_E = 0;
> > /* Pointer to output file name passed in with -o. */
> > static const char *output_file = 0;
> >
> > +/* Pointer to input file name passed in with -truncate.
> > +   This file should be truncated after linking. */
> > +static const char *totruncate_file = 0;
> > +
> > /* This is the list of suffixes and codes (%g/%u/%U/%j) and the
> > associated
> > temp file.  If the HOST_BIT_BUCKET is used for %j, no entry is made
> > for
> > it here.  */
> > @@ -4538,6 +4542,11 @@ driver_handle_option (struct gcc_options *opts,
> >do_save = false;
> >break;
> >
> > +case OPT_truncate:
> > +  totruncate_file = arg;
> > +  do_save = false;
> > +  break;
> > +
> >  case OPT:
> >/* "-###"
> >  This is similar to -v except that there is no execution
> > @@ -9286,6 +9295,11 @@ driver::final_actions () const
> >  delete_failure_queue ();
> >delete_temp_files ();
> >
> > +  if (totruncate_file != NULL && !seen_error ())
> > +/* Truncate file specified by -truncate.
> > +   Used by lto-wrapper to reduce temporary disk-space usage. */
> > +truncate(totruncate_file, 0);
> > +
> >if (print_help_list)
> >  {
> >printf (("\nFor bug reporting instructions, please see:\n"));
> > --
> > 2.39.2
> I resubmitted the patch because the previous one had a mistake.
>
> It didn't set "do_save" to false, so it resulted in problems like this:
>
> ./gcc/xgcc -truncate
> xgcc: error: missing filename after ‘-truncate’
> xgcc: fatal error: no input files
>
> ./gcc/xgcc -truncate ??
> xgcc: error: unrecognized command-line option ‘-truncate’
> xgcc: fatal error: no input files
>
> Therefore regressing some tests, and not working properly.
> After fixing this, I ran all of the LTO tests again and observed no
> failures.
>
> I'm not sure how I ever observed it working before, but I'm reasonably
> confident this is correct now.

The series is still OK.  I suppose you do not have git write access so
I am testing the series with a LTO bootstrap and will push it once
that's successful.

Thanks,
Richard.


Re: [PATCH] PR tree-opt/113673: Avoid load merging from potentially trapping additions.

2024-05-02 Thread Richard Biener
On Sun, Apr 28, 2024 at 11:11 AM Roger Sayle  wrote:
>
>
> This patch fixes PR tree-optimization/113673, a P2 ice-on-valid regression
> caused by load merging of (ptr[0]<<8)+ptr[1] when -ftrapv has been
> specified.  When the operator is | or ^ this is safe, but for addition
> of signed integer types, a trap may be generated/required, so merging this
> idiom into a single non-trapping instruction is inappropriate, confusing
> the compiler by transforming a basic block with an exception edge into one
> without.  One fix is to be more selective for PLUS_EXPR than for
> BIT_IOR_EXPR or BIT_XOR_EXPR in gimple-ssa-store-merging.cc's
> find_bswap_or_nop_1 function.
>
> An alternate solution might be to notice that in this idiom the addition
> can't overflow, but that this detail wasn't apparent when exception edges
> were added to the CFG.  In which case, it's safe to remove (or mark for
> removal) the problematic exceptional edge.  Unfortunately updating the
> CFG is a part of the compiler that I'm less familiar with.
>
> This patch has been tested on x86_64-pc-linux-gnu with make bootstrap
> and make -k check, both with and without --target_board=unix{-m32}
> with no new failures.  Ok for mainline?

Instead of

+   case PLUS_EXPR:
+ /* Don't perform load merging if this addition can trap.  */
+ if (cfun->can_throw_non_call_exceptions
+ && INTEGRAL_TYPE_P (TREE_TYPE (rhs1))
+ && TYPE_OVERFLOW_TRAPS (TREE_TYPE (rhs1)))
+   return NULL;

please check stmt_can_throw_internal (cfun, stmt) - the find_bswap_or_no_load
call in the function suffers from the same issue, so this should probably
be checked before that call even.

Thanks,
Richard.

>
> 2024-04-28  Roger Sayle  
>
> gcc/ChangeLog
> PR tree-optimization/113673
> * gimple-ssa-store-merging.cc (find_bswap_or_nop_1)  PLUS_EXPR>:
> Don't perform load merging if a signed addition may trap.
>
> gcc/testsuite/ChangeLog
> PR tree-optimization/113673
> * g++.dg/pr113673.C: New test case.
>
>
> Thanks in advance,
> Roger
> --
>


Re: [PATCH] Update libbid according to the latest Intel Decimal Floating-Point Math Library.

2024-05-02 Thread Richard Biener
On Sun, Apr 28, 2024 at 7:53 AM liuhongt  wrote:
>
> The Intel Decimal Floating-Point Math Library is available as open-source on 
> Netlib[1].
>
> [1] https://www.netlib.org/misc/intel/.
>
> Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
> Ok for trunk?

OK for trunk.

Thanks,
Richard.

> libgcc/config/libbid/ChangeLog:
>
> * bid128_fma.c (add_and_round): Fix bug: the result
> of (+5E+368)*(+10E-34)+(-10E+369) was returning
> -99E+336 instead of expected
> result -10E+337.
> (bid128_ext_fma): Ditto.
> (bid64qqq_fma): Ditto.
> * bid128_noncomp.c: Change return type of bid128_class from
> int to class_t.
> * bid128_round_integral.c: Add default case to avoid compiler
> warning.
> * bid128_string.c (bid128_to_string): Replace 0x30 with '0'
> for zero digit.
> (bid128_from_string): Ditto.
> * bid32_to_bid128.c (bid128_to_bid32): Fix Bug. In addition
> to the INEXACT flag, the UNDERFLOW flag needs to be set (and
> was not) when converting an input such as
> +6931674235302037148946035460357709E+1857 to +100E-101
> * bid32_to_bid64.c (bid64_to_bid32): fix Bug, In addition to
> the INEXACT flag, the UNDERFLOW flag needs to be set (and was
> not) when converting an input such as +9991E-111
> to +100E-101. Furthermore, significant bits of NaNs are
> set correctly now. For example,  0x7c3b9aca was
> returning 0x7c02 instead of 0x 7c000100.
> * bid64_noncomp.c: Change return type of bid64_class from int
> to class_t.
> * bid64_round_integral.c (bid64_round_integral_exact): Add
> default case to avoid compiler warning.
> * bid64_string.c (bid64_from_string): Fix bug for rounding
> up. The input string "1" was returning
> +1001E+1 instead of +1000E+1.
> * bid64_to_bid128.c (bid128_to_bid64): Fix bug, in addition to
> the INEXACT flag, the UNDERFLOW flag needs to be set (and was
> not) when converting an input such as
> +99E-417 to
> +1000E-398.
> * bid_binarydecimal.c (bid32_to_binary64): Fix bug for
> conversion between binary and bid types. For example,
> 0x7c0F4240 was returning 0x7FFFA120 instead of
> expected double precision 0x7FF8.
> (binary64_to_bid32): Ditto.
> (binary80_to_bid32): Ditto.
> (binary128_to_bid32): Ditto.
> (binary80_to_bid64): Ditto.
> (binary128_to_bid64): Ditto.
> * bid_conf.h (BID_HIGH_128W): New macro.
> (BID_LOW_128W): Ditto.
> * bid_functions.h (__ENABLE_BINARY80__): Ditto.
> (ALIGN): Ditto.
> * bid_inline_add.h (get_add128): Add default case to avoid compiler
> warning.
> * bid_internal.h (get_BID64): Ditto.
> (fast_get_BID64_check_OF): Ditto.
> (ALIGN): New macro.
>
> Co-authored-by: Anderson, Cristina S 
> Co-authored-by: Akkas, Ahmet 
> Co-authored-by: Cornea, Marius 
> ---
>  libgcc/config/libbid/bid128_fma.c| 188 ++-
>  libgcc/config/libbid/bid128_noncomp.c|   2 +-
>  libgcc/config/libbid/bid128_round_integral.c |   2 +
>  libgcc/config/libbid/bid128_string.c |   7 +-
>  libgcc/config/libbid/bid32_to_bid128.c   |   3 -
>  libgcc/config/libbid/bid32_to_bid64.c|  11 +-
>  libgcc/config/libbid/bid64_noncomp.c |   2 +-
>  libgcc/config/libbid/bid64_round_integral.c  |   2 +
>  libgcc/config/libbid/bid64_string.c  |  21 ++-
>  libgcc/config/libbid/bid64_to_bid128.c   |   3 -
>  libgcc/config/libbid/bid_binarydecimal.c | 167 ++--
>  libgcc/config/libbid/bid_conf.h  |   8 +
>  libgcc/config/libbid/bid_functions.h |  23 ++-
>  libgcc/config/libbid/bid_inline_add.h|   2 +
>  libgcc/config/libbid/bid_internal.h  |  17 +-
>  15 files changed, 220 insertions(+), 238 deletions(-)
>
> diff --git a/libgcc/config/libbid/bid128_fma.c 
> b/libgcc/config/libbid/bid128_fma.c
> index 67233193a42..cbcf225546f 100644
> --- a/libgcc/config/libbid/bid128_fma.c
> +++ b/libgcc/config/libbid/bid128_fma.c
> @@ -417,13 +417,12 @@ add_and_round (int q3,
>R128.w[1] = R256.w[1];
>R128.w[0] = R256.w[0];
>  }
> +if (e4 + x0 < expmin) { // for all rounding modes
> +  is_tiny = 1;
> +}
>  // the rounded result has p34 = 34 digits
>  e4 = e4 + x0 + incr_exp;
> -if (rnd_mode == ROUNDING_TO_NEAREST) {
> -  if (e4 < expmin) {
> -is_tiny = 1; // for other rounding modes apply correction
> -  }
> -} else {
> +if (rnd_mode != ROUNDING_TO_NEAREST) {
>// for RM, RP, RZ, RA apply 

Re: [PATCH] PR middle-end/111701: signbit(x*x) vs -fsignaling-nans

2024-05-02 Thread Richard Biener
On Fri, Apr 26, 2024 at 10:19 AM Roger Sayle  wrote:
>
>
> This patch addresses PR middle-end/111701 where optimization of signbit(x*x)
> using tree_nonnegative_p incorrectly eliminates a floating point
> multiplication when the operands may potentially be signaling NaNs.
>
> The above bug fix also provides a solution or work-around to the tricky
> issue in PR middle-end/111701, that the results of IEEE operations on NaNs
> are specified to return a NaN result, but fail to (precisely) specify
> the exact NaN representation of this result.  Hence for the operation
> "-NaN*-NaN" different hardware implementations (targets) return different
> results.  Ultimately knowing what the resulting NaN "payload" of an
> operation is can only be known by executing that operation at run-time,
> and I'd suggest that GCC's -fsignaling-nans provides a mechanism for
> handling code that uses NaN representations for communication/signaling
> (which is a different but related concept to IEEE's sNaN).
>
> One nice thing about this patch, which may or may not be a P2 regression
> fix, is that it only affects (improves) code compiled with -fsignaling-nans
> so should be extremely safe even for this point in stage 3.
>
> This patch has been tested on x86_64-pc-linux-gnu with make bootstrap
> and make -k check, both with and without --target_board=unix{-m32}
> with no new failures.  Ok for mainline?

Hmm, but the bugreports are not about sNaN but about the fact that
the sign of the NaN produced by 0/0 or by -NaN*-NaN is not well-defined.
So I don't think this is the correct approach to fix this.  We'd instead
have to use tree_expr_maybe_nan_p () - and if NaN*NaN cannot be
-NaN (is that at least specified?) then the RECURSE path should
still work as well.

Richard.

>
> 2024-04-26  Roger Sayle  
>
> gcc/ChangeLog
> PR middle-end/111701
> * fold-const.cc (tree_binary_nonnegative_warnv_p) :
> Split handling of floating point and integer types.  For equal
> floating point operands, avoid optimization if the operand may be
> a signaling NaN.
>
> gcc/testsuite/ChangeLog
> PR middle-end/111701
> * gcc.dg/pr111701-1.c: New test case.
> * gcc.dg/pr111701-2.c: Likewise.
>
>
> Thanks in advance,
> Roger
> --
>


[PATCH] Make graph dumps use graphviz format

2024-05-02 Thread Richard Biener
SLP build eventually uses graphds graphs, the following makes its
dump use graphviz format so you can easily visualize it.

Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.

* graphds.cc (dump_graph): Dump in graphviz format.
---
 gcc/graphds.cc | 17 ++---
 1 file changed, 6 insertions(+), 11 deletions(-)

diff --git a/gcc/graphds.cc b/gcc/graphds.cc
index 17d08966f53..ed5bfeb2808 100644
--- a/gcc/graphds.cc
+++ b/gcc/graphds.cc
@@ -31,22 +31,17 @@ dump_graph (FILE *f, struct graph *g)
   int i;
   struct graph_edge *e;
 
+  fprintf (f, "digraph {\n");
   for (i = 0; i < g->n_vertices; i++)
 {
-  if (!g->vertices[i].pred
- && !g->vertices[i].succ)
-   continue;
-
-  fprintf (f, "%d (%d)\t<-", i, g->vertices[i].component);
+  fprintf (f, "\"%d\" [label=\"%d (%d): %p\"];\n",
+  i, i, g->vertices[i].component, g->vertices[i].data);
   for (e = g->vertices[i].pred; e; e = e->pred_next)
-   fprintf (f, " %d", e->src);
-  fprintf (f, "\n");
-
-  fprintf (f, "\t->");
+   fprintf (f, "\"%d\" -> \"%d\" [label=\"%p\"];\n", e->src, e->dest, 
e->data);
   for (e = g->vertices[i].succ; e; e = e->succ_next)
-   fprintf (f, " %d", e->dest);
-  fprintf (f, "\n");
+   fprintf (f, "\"%d\" -> \"%d\";\n", e->src, e->dest);
 }
+  fprintf (f, "}\n");
 }
 
 /* Creates a new graph with N_VERTICES vertices.  */
-- 
2.35.3


Re: [PATCH] c++/c-common: Fix convert_vector_to_array_for_subscript for qualified vector types [PR89224]

2024-05-02 Thread Richard Biener
On Tue, Apr 30, 2024 at 9:13 PM Jason Merrill  wrote:
>
> On 4/30/24 12:04, Andrew Pinski wrote:
> > On Tue, Apr 30, 2024 at 11:54 AM Jason Merrill  wrote:
> >>
> >> On 2/20/24 19:06, Andrew Pinski wrote:
> >>> After r7-987-gf17a223de829cb, the access for the elements of a vector 
> >>> type would lose the qualifiers.
> >>> So if we had `constvector[0]`, the type of the element of the array would 
> >>> not have const on it.
> >>> This was due to a missing build_qualified_type for the inner type of the 
> >>> vector when building the array type.
> >>> We need to add back the call to build_qualified_type and now the access 
> >>> has the correct qualifiers. So the
> >>> overloads and even if it is a lvalue or rvalue is correctly done.
> >>>
> >>> Note we correctly now reject the testcase gcc.dg/pr83415.c which was 
> >>> incorrectly accepted after r7-987-gf17a223de829cb.
> >>>
> >>> Built and tested for aarch64-linux-gnu.
> >>>
> >>>PR c++/89224
> >>>
> >>> gcc/c-family/ChangeLog:
> >>>
> >>>* c-common.cc (convert_vector_to_array_for_subscript): Call 
> >>> build_qualified_type
> >>>for the inner type.
> >>>
> >>> gcc/cp/ChangeLog:
> >>>
> >>>* constexpr.cc (cxx_eval_array_reference): Compare main variants
> >>>for the vector/array types instead of the types directly.
> >>>
> >>> gcc/testsuite/ChangeLog:
> >>>
> >>>* g++.dg/torture/vector-subaccess-1.C: New test.
> >>>* gcc.dg/pr83415.c: Change warning to error.
> >>>
> >>> Signed-off-by: Andrew Pinski 
> >>> ---
> >>>gcc/c-family/c-common.cc  |  7 +-
> >>>gcc/cp/constexpr.cc   |  3 ++-
> >>>.../g++.dg/torture/vector-subaccess-1.C   | 23 +++
> >>>gcc/testsuite/gcc.dg/pr83415.c|  2 +-
> >>>4 files changed, 32 insertions(+), 3 deletions(-)
> >>>create mode 100644 gcc/testsuite/g++.dg/torture/vector-subaccess-1.C
> >>>
> >>> diff --git a/gcc/c-family/c-common.cc b/gcc/c-family/c-common.cc
> >>> index e15eff698df..884dd9043f9 100644
> >>> --- a/gcc/c-family/c-common.cc
> >>> +++ b/gcc/c-family/c-common.cc
> >>> @@ -8936,6 +8936,7 @@ convert_vector_to_array_for_subscript (location_t 
> >>> loc,
> >>>  if (gnu_vector_type_p (TREE_TYPE (*vecp)))
> >>>{
> >>>  tree type = TREE_TYPE (*vecp);
> >>> +  tree newitype;
> >>>
> >>>  ret = !lvalue_p (*vecp);
> >>>
> >>> @@ -8950,8 +8951,12 @@ convert_vector_to_array_for_subscript (location_t 
> >>> loc,
> >>> for function parameters.  */
> >>>  c_common_mark_addressable_vec (*vecp);
> >>>
> >>> +  /* Make sure qualifiers are copied from the vector type to the new 
> >>> element
> >>> +  of the array type.  */
> >>> +  newitype = build_qualified_type (TREE_TYPE (type), TYPE_QUALS 
> >>> (type));
> >>> +
> >>>  *vecp = build1 (VIEW_CONVERT_EXPR,
> >>> -   build_array_type_nelts (TREE_TYPE (type),
> >>> +   build_array_type_nelts (newitype,
> >>>  TYPE_VECTOR_SUBPARTS (type)),
> >>>  *vecp);
> >>>}
> >>> diff --git a/gcc/cp/constexpr.cc b/gcc/cp/constexpr.cc
> >>> index fa346fe01c9..1fe91d16e8e 100644
> >>> --- a/gcc/cp/constexpr.cc
> >>> +++ b/gcc/cp/constexpr.cc
> >>> @@ -4421,7 +4421,8 @@ cxx_eval_array_reference (const constexpr_ctx *ctx, 
> >>> tree t,
> >>>  if (!lval
> >>>  && TREE_CODE (ary) == VIEW_CONVERT_EXPR
> >>>  && VECTOR_TYPE_P (TREE_TYPE (TREE_OPERAND (ary, 0)))
> >>> -  && TREE_TYPE (t) == TREE_TYPE (TREE_TYPE (TREE_OPERAND (ary, 0
> >>> +  && TYPE_MAIN_VARIANT (TREE_TYPE (t))
> >>> +   == TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (TREE_OPERAND (ary, 
> >>> 0)
> >>
> >> Please add parens around the == expression so the formatting is stable.
> > ok, I will make that change.
> >
> >>
> >> With that change, OK for trunk and release branches.
> >
> > For the GCC 14 branch, should I wait until after the release due to
> > RC1 going out today and I am not sure this counts as a show stopper
> > issue.
>
> That's not my call ("all changes to the branch require a RM approval
> now") but I think it can wait for 14.2.

Yes, this should wait.

Richard.

> Jason
>


Re: [PATCH][Backport][GCC13] match.pd: Only merge truncation with conversion for -fno-signed-zeros

2024-04-30 Thread Richard Biener
On Mon, Apr 29, 2024 at 5:12 PM Joe Ramsay  wrote:
>
> This optimisation does not honour signed zeros, so should not be
> enabled except with -fno-signed-zeros.
>
> Cherry-pick of 7dd3b2b09cbeb6712ec680a0445cb0ad41070423.
>
> Applies cleanly on releases/gcc-13, regression-tested with no new
> failures.
>
> OK for backport to GCC 13? If so, please commit for me as I do not
> have commit rights in GCC.

I've pushed to 12 and 13.

Richard.

> Thanks,
> Joe
>
> gcc/ChangeLog:
>
> * match.pd: Fix truncation pattern for -fno-signed-zeroes
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/aarch64/no_merge_trunc_signed_zero.c: New test.
> ---
>  gcc/match.pd  |  1 +
>  .../aarch64/no_merge_trunc_signed_zero.c  | 24 +++
>  2 files changed, 25 insertions(+)
>  create mode 100644 
> gcc/testsuite/gcc.target/aarch64/no_merge_trunc_signed_zero.c
>
> diff --git a/gcc/match.pd b/gcc/match.pd
> index 47e48fa2ca5..dc34e7ead9f 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -4188,6 +4188,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>  (simplify
> (float (fix_trunc @0))
> (if (!flag_trapping_math
> +   && !HONOR_SIGNED_ZEROS (type)
> && types_match (type, TREE_TYPE (@0))
> && direct_internal_fn_supported_p (IFN_TRUNC, type,
>   OPTIMIZE_FOR_BOTH))
> diff --git a/gcc/testsuite/gcc.target/aarch64/no_merge_trunc_signed_zero.c 
> b/gcc/testsuite/gcc.target/aarch64/no_merge_trunc_signed_zero.c
> new file mode 100644
> index 000..b2c93e55567
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/no_merge_trunc_signed_zero.c
> @@ -0,0 +1,24 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fno-trapping-math -fsigned-zeros" } */
> +
> +#include 
> +
> +float
> +f1 (float x)
> +{
> +  return (int) rintf(x);
> +}
> +
> +double
> +f2 (double x)
> +{
> +  return (long) rint(x);
> +}
> +
> +/* { dg-final { scan-assembler "frintx\\ts\[0-9\]+, s\[0-9\]+" } } */
> +/* { dg-final { scan-assembler "cvtzs\\ts\[0-9\]+, s\[0-9\]+" } } */
> +/* { dg-final { scan-assembler "scvtf\\ts\[0-9\]+, s\[0-9\]+" } } */
> +/* { dg-final { scan-assembler "frintx\\td\[0-9\]+, d\[0-9\]+" } } */
> +/* { dg-final { scan-assembler "cvtzs\\td\[0-9\]+, d\[0-9\]+" } } */
> +/* { dg-final { scan-assembler "scvtf\\td\[0-9\]+, d\[0-9\]+" } } */
> +
> --
> 2.27.0
>


Re: [C PATCH] PR c/109618: ICE-after-error from error_mark_node.

2024-04-30 Thread Richard Biener
On Tue, Apr 30, 2024 at 10:23 AM Roger Sayle  wrote:
>
>
> Hi Richard,
> Thanks for looking into this.
>
> It’s not the call to size_binop_loc (for CEIL_DIV_EXPR) that's problematic, 
> but the
> call to fold_convert_loc (loc, size_type_node, value) on line 4009 of 
> c-common.cc.
> At this point, value is (NOP_EXPR:sizetype (VAR_DECL:error_mark_node)).

I see.  Can we catch this when we build (NOP_EXPR:sizetype
(VAR_DECL:error_mark_node))
and instead have it "build" error_mark_node?

>
> Ultimately, it's the code in match.pd /* Handle cases of two conversions in a 
> row.  */
> with the problematic line being (match.pd:4748):
>   unsigned int inside_prec = element_precision (inside_type);
>
> Here inside_type is error_mark_node, and so tree type checking in 
> element_precision
> throws an internal_error.
>
> There doesn’t seem to be a good way to fix this in element_precision, and it's
> complicated to reorganize the logic in match.pd's "with clause" inside the
> (ocvt (icvt@1 @0)), but perhaps a (ocvt (icvt:non_error_type@1 @0))?
>
> The last place/opportunity the front-end could sanitize this operand before
> passing the dubious tree to the middle-end is c_sizeof_or_alignof_type (which
> alas doesn't appear in the backtrace due to inlining).
>
> #5  0x0227b0e9 in internal_error (
> gmsgid=gmsgid@entry=0x249c7b8 "tree check: expected class %qs, have %qs 
> (%s) in %s, at %s:%d") at ../../gcc/gcc/diagnostic.cc:2232
> #6  0x0081e32a in tree_class_check_failed (node=0x76c1ef30,
> cl=cl@entry=tcc_type, file=file@entry=0x2495f3f "../../gcc/gcc/tree.cc",
> line=line@entry=6795, function=function@entry=0x24961fe 
> "element_precision")
> at ../../gcc/gcc/tree.cc:9005
> #7  0x0081ef4c in tree_class_check (__t=, 
> __class=tcc_type,
> __f=0x2495f3f "../../gcc/gcc/tree.cc", __l=6795,
> __g=0x24961fe "element_precision") at ../../gcc/gcc/tree.h:4067
> #8  element_precision (type=, type@entry=0x76c1ef30)
> at ../../gcc/gcc/tree.cc:6795
> #9  0x017f66a4 in generic_simplify_CONVERT_EXPR (loc=201632,
> code=, type=0x76c3e7e0, _p0=0x76dc95c0)
> at generic-match-6.cc:3386
> #10 0x00c1b18c in fold_unary_loc (loc=201632, code=NOP_EXPR,
> type=0x76c3e7e0, op0=0x76dc95c0) at 
> ../../gcc/gcc/fold-const.cc:9523
> #11 0x00c1d94a in fold_build1_loc (loc=201632, code=NOP_EXPR,
> type=0x76c3e7e0, op0=0x76dc95c0) at 
> ../../gcc/gcc/fold-const.cc:14165
> #12 0x0094068c in c_expr_sizeof_expr (loc=loc@entry=201632, expr=...)
> at ../../gcc/gcc/tree.h:3771
> #13 0x0097f06c in c_parser_sizeof_expression (parser=)
> at ../../gcc/gcc/c/c-parser.cc:9932
>
>
> I hope this explains what's happening.  The size_binop_loc call is a bit of a 
> red
> herring that returns the same tree it is given (as TYPE_PRECISION 
> (char_type_node)
> == BITS_PER_UNIT), so it's the "TYPE_SIZE_UNIT (type)" which needs to be 
> checked
> for the embedded VAR_DECL with a TREE_TYPE of error_mark_node.
>
> As Andrew Pinski writes in comment #3, this one is trickier than average.
>
> A more comprehensive fix might be to write deep_error_operand_p which does
> more of a tree traversal checking error_operand_p within the unary and binary
> operators of an expression tree.
>
> Please let me know what you think/recommend.
> Best regards,
> Roger
> --
>
> > -Original Message-
> > From: Richard Biener 
> > Sent: 30 April 2024 08:38
> > To: Roger Sayle 
> > Cc: gcc-patches@gcc.gnu.org
> > Subject: Re: [C PATCH] PR c/109618: ICE-after-error from error_mark_node.
> >
> > On Tue, Apr 30, 2024 at 1:06 AM Roger Sayle 
> > wrote:
> > >
> > >
> > > This patch solves another ICE-after-error problem in the C family
> > > front-ends.  Upon a conflicting type redeclaration, the ambiguous type
> > > is poisoned with an error_mark_node to indicate to the middle-end that
> > > the type is suspect, but care has to be taken by the front-end to
> > > avoid passing these malformed trees into the middle-end during error
> > > recovery. In this case, a var_decl with a poisoned type appears within
> > > a sizeof() expression (wrapped in NOP_EXPR) which causes problems.
> > >
> > > This revision of the patch tests seen_error() to avoid tree traversal
> > > (STRIP_NOPs) in the most common case that an error hasn't occurred.
> > > Both this version (and an earlier revision that didn't test
> > > seen_error) have survived bootstrap and regression testing on 

Re: [PATCH] vect: Adjust vect_transform_reduction assertion [PR114883]

2024-04-30 Thread Richard Biener
On Tue, 30 Apr 2024, Jakub Jelinek wrote:

> Hi!
> 
> The assertion doesn't allow IFN_COND_MIN/IFN_COND_MAX, which are
> commutative conditional binary operations like ADD/MUL/AND/IOR/XOR,
> and can be handled just fine.
> In particular, we emit
>   vminpd  %zmm3, %zmm5, %zmm0{%k2}
>   vminpd  %zmm0, %zmm3, %zmm5{%k1}
> and
>   vmaxpd  %zmm3, %zmm5, %zmm0{%k2}
>   vmaxpd  %zmm0, %zmm3, %zmm5{%k1}
> in the vectorized loops of the first and second subroutine.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk and
> 14.1?

OK for both.

Richard.

> 2024-04-30  Jakub Jelinek  
>   Hongtao Liu  
> 
>   PR tree-optimization/114883
>   * tree-vect-loop.cc (vect_transform_reduction): Allow IFN_COND_MIN and
>   IFN_COND_MAX in the assert.
> 
>   * gfortran.dg/pr114883.f90: New test.
> 
> --- gcc/tree-vect-loop.cc.jj  2024-04-17 11:34:02.465185397 +0200
> +++ gcc/tree-vect-loop.cc 2024-04-29 20:41:04.973723992 +0200
> @@ -8505,7 +8505,8 @@ vect_transform_reduction (loop_vec_info
>  {
>gcc_assert (code == IFN_COND_ADD || code == IFN_COND_SUB
> || code == IFN_COND_MUL || code == IFN_COND_AND
> -   || code == IFN_COND_IOR || code == IFN_COND_XOR);
> +   || code == IFN_COND_IOR || code == IFN_COND_XOR
> +   || code == IFN_COND_MIN || code == IFN_COND_MAX);
>gcc_assert (op.num_ops == 4
> && (op.ops[reduc_index]
> == op.ops[internal_fn_else_index ((internal_fn) code)]));
> --- gcc/testsuite/gfortran.dg/pr114883.f90.jj 2024-04-29 20:39:39.000871849 
> +0200
> +++ gcc/testsuite/gfortran.dg/pr114883.f902024-04-29 20:39:27.757021972 
> +0200
> @@ -0,0 +1,53 @@
> +! PR tree-optimization/114883
> +! { dg-do compile }
> +! { dg-options "-O2 -fvect-cost-model=cheap" }
> +! { dg-additional-options "-march=x86-64-v4" { target i?86-*-* x86_64-*-* } }
> +
> +subroutine pr114883_1(a, b, c, d, e, f, g, h, o)
> +  real(8) :: c(1011), d(1011), e(0:1011)
> +  real(8) :: p, q, f, r, g(1011), h(1011), b, bar
> +  integer :: o(100), a, t, u
> +  p = 0.0_8
> +  r = bar()
> +  u = 1
> +  do i = 1,a
> +do k = 1,1011
> +  km1 = max0(k-1,1)
> +  h(k) = c(k) * e(k-1) * d(km1)
> +  f = g(k) + h(k)
> +  if(f.gt.1.e-6)then
> +p = min(p,r)
> +  endif
> +end do
> +q = 0.9_8 * p
> +t = integer(b/q + 1)
> +if(t>100)then
> +  u = t
> +endif
> +o(u) = o(u) + 1
> +  end do
> +end subroutine pr114883_1
> +subroutine pr114883_2(a, b, c, d, e, f, g, h, o)
> +  real(8) :: c(1011), d(1011), e(0:1011)
> +  real(8) :: p, q, f, r, g(1011), h(1011), b, bar
> +  integer :: o(100), a, t, u
> +  p = 0.0_8
> +  r = bar()
> +  u = 1
> +  do i = 1,a
> +do k = 1,1011
> +  km1 = max0(k-1,1)
> +  h(k) = c(k) * e(k-1) * d(km1)
> +  f = g(k) + h(k)
> +  if(f.gt.1.e-6)then
> +p = max(p,r)
> +  endif
> +end do
> +q = 0.9_8 * p
> +t = integer(b/q + 1)
> +if(t>100)then
> +  u = t
> +endif
> +o(u) = o(u) + 1
> +  end do
> +end subroutine pr114883_2
> 
>   Jakub
> 
> 

-- 
Richard Biener 
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)


Re: [PATCH] gimple-ssa-sprintf: Use [0, 1] range for %lc with (wint_t) 0 argument [PR114876]

2024-04-30 Thread Richard Biener
   /* Bump up the byte counters if WIDTH is greater.  */
> --- gcc/testsuite/gcc.dg/pr114876.c.jj2024-04-29 12:26:45.774965158 
> +0200
> +++ gcc/testsuite/gcc.dg/pr114876.c   2024-04-29 12:51:37.863777055 +0200
> @@ -0,0 +1,34 @@
> +/* PR tree-optimization/114876 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-optimized" } */
> +/* { dg-final { scan-tree-dump-not "return \[01\];" "optimized" } } */
> +/* { dg-final { scan-tree-dump "return 3;" "optimized" } } */
> +/* { dg-final { scan-tree-dump "return 4;" "optimized" } } */
> +
> +int
> +foo (void)
> +{
> +  char buf[64];
> +  return __builtin_sprintf (buf, "%lc%lc%lc", (__WINT_TYPE__) 0, 
> (__WINT_TYPE__) 0, (__WINT_TYPE__) 0);
> +}
> +
> +int
> +bar (void)
> +{
> +  char buf[64];
> +  return __builtin_sprintf (buf, "%c%c%c", 0, 0, 0);
> +}
> +
> +int
> +baz (void)
> +{
> +  char buf[64];
> +  return __builtin_sprintf (buf, "%lc%lc%lca", (__WINT_TYPE__) 0, 
> (__WINT_TYPE__) 0, (__WINT_TYPE__) 0);
> +}
> +
> +int
> +qux (void)
> +{
> +  char buf[64];
> +  return __builtin_sprintf (buf, "%c%c%ca", 0, 0, 0);
> +}
> --- gcc/testsuite/gcc.dg/tree-ssa/builtin-sprintf-warn-1.c.jj 2020-12-03 
> 10:04:35.888092988 +0100
> +++ gcc/testsuite/gcc.dg/tree-ssa/builtin-sprintf-warn-1.c2024-04-29 
> 12:49:14.452717581 +0200
> @@ -200,11 +200,11 @@ void test_sprintf_chk_c_const (void)
>T (3, "%c%c", '1', '2');
>  
>/* Wide characters.  */
> -  T (0, "%lc", (wint_t)0);   /* { dg-warning "nul past the end" } */
> -  T (1, "%lc", (wint_t)0);
> -  T (1, "%lc%lc",  (wint_t)0, (wint_t)0);
> +  T (0, "%lc", (wint_t)0);   /* { dg-warning ".%lc. directive writing up 
> to 1 bytes into a region of size 0" } */
> +  T (1, "%lc", (wint_t)0);   /* { dg-warning "nul past the end" } */
> +  T (1, "%lc%lc",  (wint_t)0, (wint_t)0);   /* { dg-warning ".%lc. directive 
> writing up to 1 bytes into a region of size between 0 and 1" } */
>T (2, "%lc", (wint_t)0);
> -  T (2, "%lc%lc",  (wint_t)0, (wint_t)0);
> +  T (2, "%lc%lc",  (wint_t)0, (wint_t)0);   /* { dg-warning "nul past the 
> end" } */
>  
>/* The following could result in as few as no bytes and in as many as
>   MB_CUR_MAX, but since the MB_CUR_MAX value is a runtime property
> @@ -1550,7 +1550,7 @@ void test_snprintf_c_const (char *d)
>  
>/* Wide characters.  */
>T (0, "%lc",  (wint_t)0);
> -  T (1, "%lc",  (wint_t)0);
> +  T (1, "%lc",  (wint_t)0);  /* { dg-warning "output may be truncated 
> before the last format character" } */
>T (2, "%lc",  (wint_t)0);
>  
>/* The following could result in as few as a single byte and in as many
> @@ -1603,7 +1603,7 @@ void test_snprintf_chk_c_const (void)
>  
>/* Wide characters.  */
>T (0, "%lc",  (wint_t)0);
> -  T (1, "%lc",  (wint_t)0);
> +  T (1, "%lc",  (wint_t)0);  /* { dg-warning "output may be truncated 
> before the last format character" } */
>T (2, "%lc",  (wint_t)0);
>  
>/* The following could result in as few as a single byte and in as many
> 
>   Jakub
> 
> 

-- 
Richard Biener 
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)


Re: [PATCH 1/2] MATCH: change single_non_singleton_phi_for_edges for singleton phis

2024-04-30 Thread Richard Biener
On Sun, Apr 28, 2024 at 8:31 AM Andrew Pinski  wrote:
>
> I noticed that single_non_singleton_phi_for_edges could
> return a phi whos entry are all the same for the edge.
> This happens only if there was a single phis in the first place.
> Also gimple_seq_singleton_p walks the sequence to see if it the one
> element in the sequence so there is removing that check actually
> reduces the number of pointer walks needed.
>
> Bootstrapped and tested on x86_64-linux-gnu with no regressions.

OK.

Richard.

> gcc/ChangeLog:
>
> * tree-ssa-phiopt.cc (single_non_singleton_phi_for_edges):
> Remove the special case of gimple_seq_singleton_p.
>
> Signed-off-by: Andrew Pinski 
> ---
>  gcc/tree-ssa-phiopt.cc | 8 
>  1 file changed, 8 deletions(-)
>
> diff --git a/gcc/tree-ssa-phiopt.cc b/gcc/tree-ssa-phiopt.cc
> index d1746c4b468..f1e07502b02 100644
> --- a/gcc/tree-ssa-phiopt.cc
> +++ b/gcc/tree-ssa-phiopt.cc
> @@ -62,14 +62,6 @@ single_non_singleton_phi_for_edges (gimple_seq seq, edge 
> e0, edge e1)
>  {
>gimple_stmt_iterator i;
>gphi *phi = NULL;
> -  if (gimple_seq_singleton_p (seq))
> -{
> -  phi = as_a  (gsi_stmt (gsi_start (seq)));
> -  /* Never return virtual phis.  */
> -  if (virtual_operand_p (gimple_phi_result (phi)))
> -   return NULL;
> -  return phi;
> -}
>for (i = gsi_start (seq); !gsi_end_p (i); gsi_next ())
>  {
>gphi *p = as_a  (gsi_stmt (i));
> --
> 2.43.0
>


Re: [PATCH 2/2] PHI-OPT: speed up value_replacement slightly

2024-04-30 Thread Richard Biener
On Sun, Apr 28, 2024 at 8:31 AM Andrew Pinski  wrote:
>
> This adds a few early outs to value_replacement that I noticed
> while rewriting this to use match-and-simplify but could be committed
> seperately.
> * virtual operands won't change so return early for them
> * special case `A ? B : B` as that is already just `B`
>
> Also moves the check for NE/EQ earlier as calculating empty_or_with_defined_p
> is an IR walk for a BB and that might be big.
>
> Bootstrapped and tested on x86_64-linux-gnu with no regressions.

OK.

> gcc/ChangeLog:
>
> * tree-ssa-phiopt.cc (value_replacement): Move check for
> NE/EQ earlier.
>
> Signed-off-by: Andrew Pinski 
> ---
>  gcc/tree-ssa-phiopt.cc | 22 +++---
>  1 file changed, 15 insertions(+), 7 deletions(-)
>
> diff --git a/gcc/tree-ssa-phiopt.cc b/gcc/tree-ssa-phiopt.cc
> index f1e07502b02..a2bdcb5eae8 100644
> --- a/gcc/tree-ssa-phiopt.cc
> +++ b/gcc/tree-ssa-phiopt.cc
> @@ -1131,6 +1131,21 @@ value_replacement (basic_block cond_bb, basic_block 
> middle_bb,
>enum tree_code code;
>bool empty_or_with_defined_p = true;
>
> +  /* Virtual operands don't need to be handled. */
> +  if (virtual_operand_p (arg1))
> +return 0;
> +
> +  /* Special case A ? B : B as this will always simplify to B. */
> +  if (operand_equal_for_phi_arg_p (arg0, arg1))
> +return 0;
> +
> +  gcond *cond = as_a  (*gsi_last_bb (cond_bb));
> +  code = gimple_cond_code (cond);
> +
> +  /* This transformation is only valid for equality comparisons.  */
> +  if (code != NE_EXPR && code != EQ_EXPR)
> +return 0;
> +
>/* If the type says honor signed zeros we cannot do this
>   optimization.  */
>if (HONOR_SIGNED_ZEROS (arg1))
> @@ -1161,13 +1176,6 @@ value_replacement (basic_block cond_bb, basic_block 
> middle_bb,
> empty_or_with_defined_p = false;
>  }
>
> -  gcond *cond = as_a  (*gsi_last_bb (cond_bb));
> -  code = gimple_cond_code (cond);
> -
> -  /* This transformation is only valid for equality comparisons.  */
> -  if (code != NE_EXPR && code != EQ_EXPR)
> -return 0;
> -
>/* We need to know which is the true edge and which is the false
>edge so that we know if have abs or negative abs.  */
>extract_true_false_edges_from_block (cond_bb, _edge, _edge);
> --
> 2.43.0
>


Re: [PATCH 2/2] Remove support for nontemporal stores with ssa_names on lhs [PR112976]

2024-04-30 Thread Richard Biener
On Sat, Apr 27, 2024 at 1:04 AM Andrew Pinski  wrote:
>
> When cfgexpand was changed to support expanding from tuple gimple
> (r0-95521-g28ed065ef9f345), the code was added to support
> doing nontemporal stores with LHS of a SSA_NAME but that will
> never be a nontemporal store.
> This patch removes that and asserts that expanding with a LHS
> of a SSA_NAME is not a nontemporal store.
>
> Bootstrapped and tested on x86_64-linux-gnu.

OK.

> gcc/ChangeLog:
>
> PR middle-end/112976
> * cfgexpand.cc (expand_gimple_stmt_1): Remove
> support for expanding nontemporal "moves" with
> ssa names on the LHS.
>
> Signed-off-by: Andrew Pinski 
> ---
>  gcc/cfgexpand.cc | 11 ---
>  1 file changed, 4 insertions(+), 7 deletions(-)
>
> diff --git a/gcc/cfgexpand.cc b/gcc/cfgexpand.cc
> index eef565eddb5..cfc5291aa0c 100644
> --- a/gcc/cfgexpand.cc
> +++ b/gcc/cfgexpand.cc
> @@ -4002,17 +4002,16 @@ expand_gimple_stmt_1 (gimple *stmt)
> else
>   {
> rtx target, temp;
> -   bool nontemporal = gimple_assign_nontemporal_move_p (assign_stmt);
> +   gcc_assert (!gimple_assign_nontemporal_move_p (assign_stmt));
> bool promoted = false;
>
> target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
> if (GET_CODE (target) == SUBREG && SUBREG_PROMOTED_VAR_P (target))
>   promoted = true;
>
> -  /* If we want to use a nontemporal store, force the value to
> - register first.  If we store into a promoted register,
> - don't directly expand to target.  */
> -   temp = nontemporal || promoted ? NULL_RTX : target;
> +  /* If we store into a promoted register, don't directly
> + expand to target.  */
> +   temp = promoted ? NULL_RTX : target;
> temp = expand_expr_real_gassign (assign_stmt, temp,
>  GET_MODE (target), 
> EXPAND_NORMAL);
>
> @@ -4034,8 +4033,6 @@ expand_gimple_stmt_1 (gimple *stmt)
>
> convert_move (SUBREG_REG (target), temp, unsignedp);
>   }
> -   else if (nontemporal && emit_storent_insn (target, temp))
> - ;
> else
>   {
> temp = force_operand (temp, target);
> --
> 2.43.0
>


Re: [PATCH 1/2] Add verification of gimple_assign_nontemporal_move_p [PR112976]

2024-04-30 Thread Richard Biener
On Sat, Apr 27, 2024 at 1:04 AM Andrew Pinski  wrote:
>
> Currently the middle-end only knows how to support temporal stores
> (the undocumented storent optab) so let's verify that the only time
> we set nontemporal_move on an assign is if the the lhs is not a
> gimple reg.
>
> Bootstrapped and tested on x86_64-linux-gnu no regressions.

OK.

> gcc/ChangeLog:
>
> PR middle-end/112976
> * tree-cfg.cc (verify_gimple_assign): Verify that
> nontmporal moves are stores.
> * gimple.h (struct gimple): Note that only
> nontemporal stores are supported.
>
> Signed-off-by: Andrew Pinski 
> ---
>  gcc/gimple.h|  3 ++-
>  gcc/tree-cfg.cc | 11 +++
>  2 files changed, 13 insertions(+), 1 deletion(-)
>
> diff --git a/gcc/gimple.h b/gcc/gimple.h
> index 8a8ca109bbf..bd315ffc2dd 100644
> --- a/gcc/gimple.h
> +++ b/gcc/gimple.h
> @@ -236,7 +236,8 @@ struct GTY((desc ("gimple_statement_structure (&%h)"), 
> tag ("GSS_BASE"),
>   for clearing this bit before using it.  */
>unsigned int visited : 1;
>
> -  /* Nonzero if this tuple represents a non-temporal move.  */
> +  /* Nonzero if this tuple represents a non-temporal move; currently
> + only stores are supported.  */
>unsigned int nontemporal_move: 1;
>
>/* Pass local flags.  These flags are free for any pass to use as
> diff --git a/gcc/tree-cfg.cc b/gcc/tree-cfg.cc
> index b1ba33018fd..06a96f96be7 100644
> --- a/gcc/tree-cfg.cc
> +++ b/gcc/tree-cfg.cc
> @@ -4837,6 +4837,17 @@ verify_gimple_assign_single (gassign *stmt)
>  static bool
>  verify_gimple_assign (gassign *stmt)
>  {
> +  if (gimple_assign_nontemporal_move_p (stmt))
> +{
> +  tree lhs = gimple_assign_lhs (stmt);
> +  if (is_gimple_reg (lhs))
> +   {
> + error ("nontemporal store lhs cannot a gimple register");
> + debug_generic_stmt (lhs);
> + return true;
> +   }
> +}
> +
>switch (gimple_assign_rhs_class (stmt))
>  {
>  case GIMPLE_SINGLE_RHS:
> --
> 2.43.0
>


Re: [PATCH] rust: Do not link with libdl and libpthread unconditionally

2024-04-30 Thread Richard Biener
On Fri, Apr 19, 2024 at 11:49 AM Arthur Cohen  wrote:
>
> Hi everyone,
>
> This patch checks for the presence of dlopen and pthread_create in libc. If 
> that is not the
> case, we check for the existence of -ldl and -lpthread, as these libraries 
> are required to
> link the Rust runtime to our Rust frontend.
>
> If these libs are not present on the system, then we disable the Rust 
> frontend.
>
> This was tested on x86_64, in an environment with a recent GLIBC and in a 
> container with GLIBC
> 2.27.
>
> Apologies for sending it in so late.

For example GCC_ENABLE_PLUGINS simply does

 # Check -ldl
 saved_LIBS="$LIBS"
 AC_SEARCH_LIBS([dlopen], [dl])
 if test x"$ac_cv_search_dlopen" = x"-ldl"; then
   pluginlibs="$pluginlibs -ldl"
 fi
 LIBS="$saved_LIBS"

which I guess would also work for pthread_create?  This would simplify
the code a bit.

> ChangeLog:
>
> * Makefile.tpl: Add CRAB1_LIBS variable.
> * Makefile.in: Regenerate.
> * configure: Regenerate.
> * configure.ac: Check if -ldl and -lpthread are needed, and if so, add
> them to CRAB1_LIBS.
>
> gcc/rust/ChangeLog:
>
> * Make-lang.in: Remove overazealous LIBS = -ldl -lpthread line, link
> crab1 against CRAB1_LIBS.
> ---
>  Makefile.in   |   3 +
>  Makefile.tpl  |   3 +
>  configure | 157 ++
>  configure.ac  |  94 +
>  gcc/rust/Make-lang.in |   2 +-
>  5 files changed, 258 insertions(+), 1 deletion(-)
>
> diff --git a/Makefile.in b/Makefile.in
> index db4fa6c6260..34c5550beca 100644
> --- a/Makefile.in
> +++ b/Makefile.in
> @@ -197,6 +197,7 @@ HOST_EXPORTS = \
> $(BASE_EXPORTS) \
> CC="$(CC)"; export CC; \
> ADA_CFLAGS="$(ADA_CFLAGS)"; export ADA_CFLAGS; \
> +   CRAB1_LIBS="$(CRAB1_LIBS)"; export CRAB1_LIBS; \
> CFLAGS="$(CFLAGS)"; export CFLAGS; \
> CONFIG_SHELL="$(SHELL)"; export CONFIG_SHELL; \
> CXX="$(CXX)"; export CXX; \
> @@ -450,6 +451,8 @@ GOCFLAGS = $(CFLAGS)
>  GDCFLAGS = @GDCFLAGS@
>  GM2FLAGS = $(CFLAGS)
>
> +CRAB1_LIBS = @CRAB1_LIBS@
> +
>  PKG_CONFIG_PATH = @PKG_CONFIG_PATH@
>
>  GUILE = guile
> diff --git a/Makefile.tpl b/Makefile.tpl
> index 1d5813cd569..8f4bf297918 100644
> --- a/Makefile.tpl
> +++ b/Makefile.tpl
> @@ -200,6 +200,7 @@ HOST_EXPORTS = \
> $(BASE_EXPORTS) \
> CC="$(CC)"; export CC; \
> ADA_CFLAGS="$(ADA_CFLAGS)"; export ADA_CFLAGS; \
> +   CRAB1_LIBS="$(CRAB1_LIBS)"; export CRAB1_LIBS; \
> CFLAGS="$(CFLAGS)"; export CFLAGS; \
> CONFIG_SHELL="$(SHELL)"; export CONFIG_SHELL; \
> CXX="$(CXX)"; export CXX; \
> @@ -453,6 +454,8 @@ GOCFLAGS = $(CFLAGS)
>  GDCFLAGS = @GDCFLAGS@
>  GM2FLAGS = $(CFLAGS)
>
> +CRAB1_LIBS = @CRAB1_LIBS@
> +
>  PKG_CONFIG_PATH = @PKG_CONFIG_PATH@
>
>  GUILE = guile
> diff --git a/configure b/configure
> index 3b0abeb8b2e..75b489a5f57 100755
> --- a/configure
> +++ b/configure
> @@ -690,6 +690,7 @@ extra_host_zlib_configure_flags
>  extra_host_libiberty_configure_flags
>  stage1_languages
>  host_libs_picflag
> +CRAB1_LIBS
>  PICFLAG
>  host_shared
>  gcc_host_pie
> @@ -8875,6 +8876,142 @@ fi
>
>
>
> +# Rust requires -ldl and -lpthread if you are using an old glibc that does 
> not include them by
> +# default, so we check for them here
> +
> +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if libc includes libdl and 
> libpthread" >&5
> +$as_echo_n "checking if libc includes libdl and libpthread... " >&6; }
> +
> +ac_ext=c
> +ac_cpp='$CPP $CPPFLAGS'
> +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
> +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS 
> conftest.$ac_ext $LIBS >&5'
> +ac_compiler_gnu=$ac_cv_c_compiler_gnu
> +
> +
> +requires_ldl=no
> +requires_lpthread=no
> +missing_rust_dynlibs=none
> +
> +cat confdefs.h - <<_ACEOF >conftest.$ac_ext
> +/* end confdefs.h.  */
> +#include 
> +int
> +main ()
> +{
> +dlopen(0,0);
> +  ;
> +  return 0;
> +}
> +_ACEOF
> +if ac_fn_c_try_link "$LINENO"; then :
> +
> +else
> +  requires_ldl=yes
> +
> +fi
> +rm -f core conftest.err conftest.$ac_objext \
> +conftest$ac_exeext conftest.$ac_ext
> +
> +if test $requires_ldl = yes; then
> +tmp_LIBS=$LIBS
> +LIBS="$LIBS -ldl"
> +
> +cat confdefs.h - <<_ACEOF >conftest.$ac_ext
> +/* end confdefs.h.  */
> +#include 
> +int
> +main ()
> +{
> +dlopen(0,0);
> +  ;
> +  return 0;
> +}
> +_ACEOF
> +if ac_fn_c_try_link "$LINENO"; then :
> +  CRAB1_LIBS="$CRAB1_LIBS -ldl"
> +else
> +  missing_rust_dynlibs="libdl"
> +
> +fi
> +rm -f core conftest.err conftest.$ac_objext \
> +conftest$ac_exeext conftest.$ac_ext
> +
> +LIBS=$tmp_LIBS
> +fi
> +
> +cat confdefs.h - <<_ACEOF >conftest.$ac_ext
> +/* end confdefs.h.  */
> +#include 
> +int
> +main ()
> +{
> +pthread_create(NULL,NULL,NULL,NULL);
> +
> +  ;
> +  return 0;
> +}
> +_ACEOF
> +if ac_fn_c_try_link "$LINENO"; then :
> +
> +else
> +  

Re: [COMMITTED 03/16] Make some Value_Range's explicitly integer.

2024-04-30 Thread Richard Biener
On Sun, Apr 28, 2024 at 9:07 PM Aldy Hernandez  wrote:
>
> Fix some Value_Range's that we know ahead of time will be only
> integers.  This avoids using the polymorphic Value_Range unnecessarily

But isn't Value_Range a variable-size irange but int_range<2> doesn't
support more than two sub-ranges?

So it doesn't look obvious that this isn't actually a regression?

Richard.

> gcc/ChangeLog:
>
> * gimple-ssa-warn-access.cc (check_nul_terminated_array): Make 
> Value_Range an int_range.
> (memmodel_to_uhwi): Same
> * tree-ssa-loop-niter.cc (refine_value_range_using_guard): Same.
> (determine_value_range): Same.
> (infer_loop_bounds_from_signedness): Same.
> (scev_var_range_cant_overflow): Same.
> ---
>  gcc/gimple-ssa-warn-access.cc |  4 ++--
>  gcc/tree-ssa-loop-niter.cc| 12 ++--
>  2 files changed, 8 insertions(+), 8 deletions(-)
>
> diff --git a/gcc/gimple-ssa-warn-access.cc b/gcc/gimple-ssa-warn-access.cc
> index dedaae27b31..450c1caa765 100644
> --- a/gcc/gimple-ssa-warn-access.cc
> +++ b/gcc/gimple-ssa-warn-access.cc
> @@ -330,7 +330,7 @@ check_nul_terminated_array (GimpleOrTree expr, tree src, 
> tree bound)
>wide_int bndrng[2];
>if (bound)
>  {
> -  Value_Range r (TREE_TYPE (bound));
> +  int_range<2> r (TREE_TYPE (bound));
>
>get_range_query (cfun)->range_of_expr (r, bound);
>
> @@ -2816,7 +2816,7 @@ memmodel_to_uhwi (tree ord, gimple *stmt, unsigned 
> HOST_WIDE_INT *cstval)
>  {
>/* Use the range query to determine constant values in the absence
>  of constant propagation (such as at -O0).  */
> -  Value_Range rng (TREE_TYPE (ord));
> +  int_range<2> rng (TREE_TYPE (ord));
>if (!get_range_query (cfun)->range_of_expr (rng, ord, stmt)
>   || !rng.singleton_p ())
> return false;
> diff --git a/gcc/tree-ssa-loop-niter.cc b/gcc/tree-ssa-loop-niter.cc
> index c6d010f6d89..cbc9dbc5a1f 100644
> --- a/gcc/tree-ssa-loop-niter.cc
> +++ b/gcc/tree-ssa-loop-niter.cc
> @@ -214,7 +214,7 @@ refine_value_range_using_guard (tree type, tree var,
>get_type_static_bounds (type, mint, maxt);
>mpz_init (minc1);
>mpz_init (maxc1);
> -  Value_Range r (TREE_TYPE (varc1));
> +  int_range<2> r (TREE_TYPE (varc1));
>/* Setup range information for varc1.  */
>if (integer_zerop (varc1))
>  {
> @@ -368,7 +368,7 @@ determine_value_range (class loop *loop, tree type, tree 
> var, mpz_t off,
>gphi_iterator gsi;
>
>/* Either for VAR itself...  */
> -  Value_Range var_range (TREE_TYPE (var));
> +  int_range<2> var_range (TREE_TYPE (var));
>get_range_query (cfun)->range_of_expr (var_range, var);
>if (var_range.varying_p () || var_range.undefined_p ())
> rtype = VR_VARYING;
> @@ -382,7 +382,7 @@ determine_value_range (class loop *loop, tree type, tree 
> var, mpz_t off,
>
>/* Or for PHI results in loop->header where VAR is used as
>  PHI argument from the loop preheader edge.  */
> -  Value_Range phi_range (TREE_TYPE (var));
> +  int_range<2> phi_range (TREE_TYPE (var));
>for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next 
> ())
> {
>   gphi *phi = gsi.phi ();
> @@ -408,7 +408,7 @@ determine_value_range (class loop *loop, tree type, tree 
> var, mpz_t off,
>  involved.  */
>   if (wi::gt_p (minv, maxv, sgn))
> {
> - Value_Range vr (TREE_TYPE (var));
> + int_range<2> vr (TREE_TYPE (var));
>   get_range_query (cfun)->range_of_expr (vr, var);
>   if (vr.varying_p () || vr.undefined_p ())
> rtype = VR_VARYING;
> @@ -4367,7 +4367,7 @@ infer_loop_bounds_from_signedness (class loop *loop, 
> gimple *stmt)
>
>low = lower_bound_in_type (type, type);
>high = upper_bound_in_type (type, type);
> -  Value_Range r (TREE_TYPE (def));
> +  int_range<2> r (TREE_TYPE (def));
>get_range_query (cfun)->range_of_expr (r, def);
>if (!r.varying_p () && !r.undefined_p ())
>  {
> @@ -5426,7 +5426,7 @@ scev_var_range_cant_overflow (tree var, tree step, 
> class loop *loop)
>if (!def_bb || !dominated_by_p (CDI_DOMINATORS, loop->latch, def_bb))
>  return false;
>
> -  Value_Range r (TREE_TYPE (var));
> +  int_range<2> r (TREE_TYPE (var));
>get_range_query (cfun)->range_of_expr (r, var);
>if (r.varying_p () || r.undefined_p ())
>  return false;
> --
> 2.44.0
>


Re: [PATCH] make -freg-struct-return visibly a negative alias of -fpcc-struct-return

2024-04-30 Thread Richard Biener
On Sun, Apr 28, 2024 at 10:24 AM Alexandre Oliva  wrote:
>
>
> The fact that both options accept negative forms suggests that maybe
> they aren't negative forms of each other.  They are, but that isn't
> clear even by examining common.opt.  Use NegativeAlias to make it
> abundantly clear.
>
> The 'Optimization' keyword next to freg-struct-return was the only
> thing that caused flag_pcc_struct_return to be a per-function flag,
> and ipa-inline relied on that.  After making it an alias, the
> Optimization keyword was no longer operational.  I'm not sure it was
> sensible or desirable for flag_pcc_struct_return to be a per-function
> setting, but this patch does not intend to change behavior.
>
> Regstrapped on x86_64-linux-gnu and ppc64le-linux-gnu.  Ok to install?

OK.

Thanks,
Richard.

>
> for  gcc/ChangeLog
>
> * common.opt (freg-struct-return): Make it explicitly
> fpcc-struct-return's NegativeAlias.  Copy Optimization...
> (freg-struct-return): ... here.
> ---
>  gcc/common.opt |4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/gcc/common.opt b/gcc/common.opt
> index ad3488447752b..12d93c76a1e63 100644
> --- a/gcc/common.opt
> +++ b/gcc/common.opt
> @@ -2406,7 +2406,7 @@ Common RejectNegative Joined UInteger Optimization
>  -fpack-struct= Set initial maximum structure member alignment.
>
>  fpcc-struct-return
> -Common Var(flag_pcc_struct_return,1) Init(DEFAULT_PCC_STRUCT_RETURN)
> +Common Var(flag_pcc_struct_return,1) Init(DEFAULT_PCC_STRUCT_RETURN) 
> Optimization
>  Return small aggregates in memory, not registers.
>
>  fpeel-loops
> @@ -2596,7 +2596,7 @@ Common Var(flag_record_gcc_switches)
>  Record gcc command line switches in the object file.
>
>  freg-struct-return
> -Common Var(flag_pcc_struct_return,0) Optimization
> +Common NegativeAlias Alias(fpcc_struct_return) Optimization
>  Return small aggregates in registers.
>
>  fregmove
>
> --
> Alexandre Oliva, happy hackerhttps://FSFLA.org/blogs/lxo/
>Free Software Activist   GNU Toolchain Engineer
> More tolerance and less prejudice are key for inclusion and diversity
> Excluding neuro-others for not behaving ""normal"" is *not* inclusive


Re: [C PATCH] PR c/109618: ICE-after-error from error_mark_node.

2024-04-30 Thread Richard Biener
On Tue, Apr 30, 2024 at 1:06 AM Roger Sayle  wrote:
>
>
> This patch solves another ICE-after-error problem in the C family
> front-ends.  Upon a conflicting type redeclaration, the ambiguous
> type is poisoned with an error_mark_node to indicate to the middle-end
> that the type is suspect, but care has to be taken by the front-end to
> avoid passing these malformed trees into the middle-end during error
> recovery. In this case, a var_decl with a poisoned type appears within
> a sizeof() expression (wrapped in NOP_EXPR) which causes problems.
>
> This revision of the patch tests seen_error() to avoid tree traversal
> (STRIP_NOPs) in the most common case that an error hasn't occurred.
> Both this version (and an earlier revision that didn't test seen_error)
> have survived bootstrap and regression testing on x86_64-pc-linux-gnu.
> As a consolation, this code also contains a minor performance improvement,
> by avoiding trying to create (and folding away) a CEIL_DIV_EXPR in the
> common case that "char" is a single-byte.  The current code relies on
> the middle-end's tree folding to recognize that CEIL_DIV_EXPR of
> integer_one_node is a no-op, that can be optimized away.
>
> Ok for mainline?

Where does it end up ICEing?  I see size_binop_loc guards against
error_mark_node operands already, maybe it should use
error_operand_p instead?

>
> 2024-04-30  Roger Sayle  
>
> gcc/c-family/ChangeLog
> PR c/109618
> * c-common.cc (c_sizeof_or_alignof_type): If seen_error() check
> whether value is (a VAR_DECL) of type error_mark_node, or a
> NOP_EXPR thereof.  Avoid folding CEIL_DIV_EXPR for the common
> case where char_type is a single byte.
>
> gcc/testsuite/ChangeLog
> PR c/109618
> * gcc.dg/pr109618.c: New test case.
>
>
> Thanks in advance,
> Roger
> --
>


Re: [PATCH] Don't assert for IFN_COND_{MIN, MAX} in vect_transform_reduction

2024-04-30 Thread Richard Biener
On Mon, Apr 29, 2024 at 5:30 PM H.J. Lu  wrote:
>
> On Mon, Apr 29, 2024 at 6:47 AM liuhongt  wrote:
> >
> > The Fortran standard does not specify what the result of the MAX
> > and MIN intrinsics are if one of the arguments is a NaN. So it
> > should be ok to tranform reduction for IFN_COND_MIN with vectorized
> > COND_MIN and REDUC_MIN.
>
> The commit subject isn't very clear.   This patch isn't about "Don't assert
> for IFN_COND_{MIN,MAX}".  It allows IFN_COND_{MIN,MAX} in
> vect_transform_reduction.

Well, we allow it elsewhere, we just fail to enumerate all COND_* we allow
here correctly.

> > Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
> > Ok for trunk and backport to GCC14?

OK for trunk and branch.

Thanks,
Richard.

> >
> > gcc/ChangeLog:
> >
> > PR 114883
> > * tree-vect-loop.cc (vect_transform_reduction): Don't assert
> > for IFN_COND_{MIN, MAX}.
> >
> > gcc/testsuite/ChangeLog:
> >
> > * gfortran.dg/pr114883.f90: New test.
> > ---
> >  gcc/testsuite/gfortran.dg/pr114883.f90 | 191 +
> >  gcc/tree-vect-loop.cc  |   3 +-
> >  2 files changed, 193 insertions(+), 1 deletion(-)
> >  create mode 100644 gcc/testsuite/gfortran.dg/pr114883.f90
> >
> > diff --git a/gcc/testsuite/gfortran.dg/pr114883.f90 
> > b/gcc/testsuite/gfortran.dg/pr114883.f90
> > new file mode 100644
> > index 000..86b664a521e
> > --- /dev/null
> > +++ b/gcc/testsuite/gfortran.dg/pr114883.f90
> > @@ -0,0 +1,191 @@
> > +! { dg-do compile }
> > +! { dg-options "-O3" }
> > +! { dg-additional-options "-march=x86-64-v4" { target { x86_64-*-* 
> > i?86-*-* } } }
> > +
> > +module ndrop
> > +
> > +
> > +  implicit none
> > +
> > +  private
> > +  save
> > +
> > +  public dropmixnuc
> > +
> > +  real(8) :: npv(1011) ! number per volume concentration
> > +  real(8) :: alogsig(1011) ! natl log of geometric standard dev of aerosol
> > +
> > +  type qqcw_type
> > + real(8), pointer :: fldcw(:,:)
> > +  end type qqcw_type
> > +
> > +contains
> > +
> > +  subroutine dropmixnuc(lchnk, ncol, temp,  &
> > +   cldn,cldo, &
> > +   raer, dtmicro   &
> > +   )
> > +implicit none
> > +
> > +! input
> > +
> > +integer, intent(in) :: lchnk! chunk identifier
> > +integer, intent(in) :: ncol ! number of columns
> > +!  type(physics_state), intent(in) :: state  ! Physics state 
> > variables
> > +real(8), intent(in) :: dtmicro ! time step for 
> > microphysics (s)
> > +real(8), intent(in) :: temp(1,1011)! temperature (K)
> > +real(8), intent(in) :: cldo(1,1011)! cloud fraction on previous 
> > time step
> > +real(8), intent(in) :: cldn(1,1011)! cloud fraction
> > +real(8), intent(in) :: raer(1,1011,1011) ! aerosol mass, number mixing 
> > ratios
> > +
> > +
> > +type(qqcw_type) :: QQCW(1011)
> > +
> > +real(8) depvel(1,1011)! deposition velocity for droplets (m/s)
> > +real(8) wtke(1,1011) ! turbulent vertical velocity at base of 
> > layer k (m/s)
> > +real(8) wtke_cen(1,1011) ! turbulent vertical velocity at center of 
> > layer k (m/s)
> > +real(8) zn(1011) ! g/pdel (m2/g) for layer
> > +real(8) zs(1011) ! inverse of distance between levels (m)
> > +real(8), parameter :: zkmin=0.01_8,zkmax=100._8
> > +real(8) cs(1,1011)  ! air density (kg/m3)
> > +real(8) dz(1,1011)  ! geometric thickness of layers (m)
> > +real(8) zero
> > +
> > +real(8) wdiab   ! diabatic vertical velocity
> > +real(8), parameter :: wmixmin = 0.1 ! minimum turbulence vertical 
> > velocity (m/s)
> > +!   real(8), parameter :: wmixmin = 0.2 ! minimum turbulence 
> > vertical velocity (m/s)
> > +!  real(8), parameter :: wmixmin = 1.0 ! minimum turbulence 
> > vertical velocity (m/s)
> > +real(8) ekk(0:1011)   ! density*diffusivity for droplets (kg/m3 
> > m2/s)
> > +real(8), parameter :: sq2pi=2.5066283_8
> > +real(8) dtinv
> > +
> > +integer km1,kp1
> > +real(8) wbar,wmix,wmin,wmax
> > +real(8) dumc
> > +real(8) fac_srflx
> > +real(8) surfrate(1011) ! surface exchange rate (/s)
> > +real(8) surfratemax  ! max surfrate for all species treated here
> > +real(8) dtmin,tinv,dtt
> > +integer nsubmix,nsubmix_bnd
> > +integer i,k,m
> > +real(8) dtmix
> > +real(8) pi
> > +integer nnew,nsav,ntemp
> > +real(8) ekkp(1011),ekkm(1011) ! zn*zs*density*diffusivity
> > +integer count_submix(100)
> > +save count_submix
> > +real(8) nsource(1,1011)! droplet number source (#/kg/s)
> > +real(8) ndropmix(1,1011)   ! droplet number mixing (#/kg/s)
> > +real(8) ndropcol(1)   ! column droplet number (#/m2)
> > +
> > +real(8) na(1),va(1),hy(1)
> > +real(8) naermod(1011) ! (/m3)
> > +real(8) hygro(1011)  ! hygroscopicity of aerosol mode
> > +real(8) 

Re: [PATCH] PHIOPT: Value-replacement check undef

2024-04-30 Thread Richard Biener
On Mon, Apr 29, 2024 at 8:52 AM Andrew Pinski  wrote:
>
> While moving value replacement part of PHIOPT over
> to use match-and-simplify, I ran into the case where
> we would have an undef use that was conditional become
> unconditional. This prevents that. I can't remember at this
> point what the testcase was though.
>
> Bootstrapped and tested on x86_64-linux-gnu with no regressions.

OK.

Thanks,
Richard.

> gcc/ChangeLog:
>
> * tree-ssa-phiopt.cc (value_replacement): Reject undef variables
> so they don't become unconditional used.
>
> Signed-off-by: Andrew Pinski 
> ---
>  gcc/tree-ssa-phiopt.cc | 7 +++
>  1 file changed, 7 insertions(+)
>
> diff --git a/gcc/tree-ssa-phiopt.cc b/gcc/tree-ssa-phiopt.cc
> index a2bdcb5eae8..f166c3132cb 100644
> --- a/gcc/tree-ssa-phiopt.cc
> +++ b/gcc/tree-ssa-phiopt.cc
> @@ -1146,6 +1146,13 @@ value_replacement (basic_block cond_bb, basic_block 
> middle_bb,
>if (code != NE_EXPR && code != EQ_EXPR)
>  return 0;
>
> +  /* Do not make conditional undefs unconditional.  */
> +  if ((TREE_CODE (arg0) == SSA_NAME
> +   && ssa_name_maybe_undef_p (arg0))
> +  || (TREE_CODE (arg1) == SSA_NAME
> + && ssa_name_maybe_undef_p (arg1)))
> +return false;
> +
>/* If the type says honor signed zeros we cannot do this
>   optimization.  */
>if (HONOR_SIGNED_ZEROS (arg1))
> --
> 2.43.0
>


[PATCH] middle-end/114734 - wrong code with expand_call_mem_ref

2024-04-26 Thread Richard Biener

When expand_call_mem_ref looks at the definition of the address
argument to eventually expand a _MEM_REF argument together
with a masked load it fails to honor constraints imposed by SSA
coalescing decisions.  The following fixes this.

Boostrap and regtest running on x86_64-unknown-linux-gnu.

PR middle-end/114734
* internal-fn.cc (expand_call_mem_ref): Use
get_gimple_for_ssa_name to get at the def stmt of the address
argument to honor SSA coalescing constraints.
---
 gcc/internal-fn.cc | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc
index 2c764441cde..0a7053c2286 100644
--- a/gcc/internal-fn.cc
+++ b/gcc/internal-fn.cc
@@ -53,6 +53,8 @@ along with GCC; see the file COPYING3.  If not see
 #include "rtl-iter.h"
 #include "gimple-range.h"
 #include "fold-const-call.h"
+#include "tree-ssa-live.h"
+#include "tree-outof-ssa.h"

 /* For lang_hooks.types.type_for_mode.  */
 #include "langhooks.h"
@@ -2964,8 +2966,8 @@ expand_call_mem_ref (tree type, gcall *stmt, int index)
   tree tmp = addr;
   if (TREE_CODE (tmp) == SSA_NAME)
 {
-  gimple *def = SSA_NAME_DEF_STMT (tmp);
-  if (gimple_assign_single_p (def))
+  gimple *def = get_gimple_for_ssa_name (tmp);
+  if (def && gimple_assign_single_p (def))
tmp = gimple_assign_rhs1 (def);
 }

--
2.25.1


Re: [PATCH] libgcc: Don't use weakrefs for glibc 2.34

2024-04-25 Thread Richard Biener



> Am 25.04.2024 um 20:24 schrieb Jakub Jelinek :
> 
> Hi!
> 
> glibc 2.34 and later doesn't have separate libpthread (libpthread.so.0 is a
> dummy shared library with just some symbol versions for compatibility, but
> all the pthread_* APIs are in libc.so.6).
> So, we don't need to do the .weakref dances to check whether a program
> has been linked with -lpthread or not, in dynamically linked apps those
> will be always true anyway.
> In -static linking, this fixes various issues people had when only linking
> some parts of libpthread.a and getting weird crashes.  A hack for that was
> what e.g. some Fedora glibcs used, where libpthread.a was a library
> containing just one giant *.o file which had all the normal libpthread.a
> *.o files linked with -r together.
> 
> libstdc++-v3 actually does something like this already since r10-10928,
> the following patch is meant to fix it even for libgfortran, libobjc and
> whatever else uses gthr.h.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux (with glibc 2.35), ok
> for trunk?

Ok

Richard 

> 2024-04-25  Jakub Jelinek  
> 
>* gthr.h (GTHREAD_USE_WEAK): Redefine to 0 for GLIBC 2.34 or later.
> 
> --- libgcc/gthr.h.jj2024-01-03 12:07:28.623363560 +0100
> +++ libgcc/gthr.h2024-04-25 12:09:39.708622613 +0200
> @@ -141,6 +141,15 @@ see the files COPYING3 and COPYING.RUNTI
> #define GTHREAD_USE_WEAK 0
> #endif
> 
> +#ifdef __GLIBC_PREREQ
> +#if __GLIBC_PREREQ(2, 34)
> +/* glibc 2.34 and later has all pthread_* APIs inside of libc,
> +   no need to link separately with -lpthread.  */
> +#undef GTHREAD_USE_WEAK
> +#define GTHREAD_USE_WEAK 0
> +#endif
> +#endif
> +
> #ifndef GTHREAD_USE_WEAK
> #define GTHREAD_USE_WEAK 1
> #endif
> 
>Jakub
> 


Re: [PATCH] wwwdocs: contribute.html: Update consensus on patch content.

2024-04-25 Thread Richard Biener



> Am 25.04.2024 um 17:44 schrieb Carlos O'Donell :
> 
> Discussion is here:
> https://inbox.sourceware.org/gcc/CAPS5khZeWkAD=v8ka9g5eecdnk3bdhfnzjumpvc+hedmkvj...@mail.gmail.com/
> 
> Rough consensus from Jakub Jelinek, Richard Biener and others is
> that maintainers are for the change.

Ok

Richard 


> This changes the contribution notes to allow it.
> ---
> htdocs/contribute.html | 5 +++--
> 1 file changed, 3 insertions(+), 2 deletions(-)
> 
> diff --git a/htdocs/contribute.html b/htdocs/contribute.html
> index 7c1ae323..e8137edc 100644
> --- a/htdocs/contribute.html
> +++ b/htdocs/contribute.html
> @@ -195,8 +195,9 @@ of your testing.
> 
> The patch itself
> 
> -Do not include generated files as part of the patch, just mention
> -them in the ChangeLog (e.g., "* configure: Regenerate.").
> +The patch should include everything you are changing (including
> +regenerated files which should be noted in the ChangeLog e.g.
> +"* configure: Regenerate.").
> 
> 
> 
> --
> 2.44.0
> 


[PATCH] tree-optimization/114792 - order loops to unloops in CH

2024-04-25 Thread Richard Biener

When we use unloop_loops we have to make sure to have loops ordered
inner to outer as otherwise we can wreck inner loop structure where
unlooping relies on that being intact.  The following re-sorts the
vector of to unloop loops after copy-header as that adds to the
vector in two places and the wrong order.

Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.

PR tree-optimization/114792
* tree-ssa-loop-ch.cc (ch_order_loops): New function.
(ch_base::copy_headers): Sort loops to unloop inner-to-outer.

* gcc.dg/torture/pr114792.c: New testcase.
---
 gcc/testsuite/gcc.dg/torture/pr114792.c | 16 
 gcc/tree-ssa-loop-ch.cc | 25 +
 2 files changed, 41 insertions(+)
 create mode 100644 gcc/testsuite/gcc.dg/torture/pr114792.c

diff --git a/gcc/testsuite/gcc.dg/torture/pr114792.c 
b/gcc/testsuite/gcc.dg/torture/pr114792.c
new file mode 100644
index 000..4990e76eb2a
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/torture/pr114792.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-fno-tree-ccp -fno-tree-copy-prop" } */
+
+int a;
+int main() {
+  int b = -1, c;
+  for (; b >= 0; b++) {
+for (c = 0; c; c++) {
+L:
+  while (a)
+if (a)
+  goto L;
+}
+  }
+  return 0;
+}
diff --git a/gcc/tree-ssa-loop-ch.cc b/gcc/tree-ssa-loop-ch.cc
index b7ef485c4cc..d7dd3e5459d 100644
--- a/gcc/tree-ssa-loop-ch.cc
+++ b/gcc/tree-ssa-loop-ch.cc
@@ -759,6 +759,21 @@ protected:
   bool process_loop_p (class loop *loop) final override;
 }; // class pass_ch_vect

+/* Sort comparator to order loops after the specified order.  */
+
+static int
+ch_order_loops (const void *a_, const void *b_, void *order_)
+{
+  int *order = (int *)order_;
+  const class loop *a = *(const class loop * const *)a_;
+  const class loop *b = *(const class loop * const *)b_;
+  if (a->num == b->num)
+return 0;
+  if (order[a->num] < order[b->num])
+return -1;
+  return 1;
+}
+
 /* For all loops, copy the condition at the end of the loop body in front
of the loop.  This is beneficial since it increases efficiency of
code motion optimizations.  It also saves one jump on entry to the loop.  */
@@ -1152,6 +1167,16 @@ ch_base::copy_headers (function *fun)
 }
   if (!loops_to_unloop.is_empty ())
 {
+  /* Make sure loops are ordered inner to outer for unlooping.  */
+  if (loops_to_unloop.length () != 1)
+   {
+ auto_vec order;
+ order.safe_grow (number_of_loops (cfun), true);
+ int i = 0;
+ for (auto loop : loops_list (cfun, LI_FROM_INNERMOST))
+   order[loop->num] = i++;
+ loops_to_unloop.sort (ch_order_loops, order.address ());
+   }
   bool irred_invalidated;
   auto_bitmap lc_invalidated;
   auto_vec edges_to_remove;
--
2.25.1


[PATCH] tree-optimization/114787 - more careful loop update with CFG cleanup

2024-04-24 Thread Richard Biener

When CFG cleanup removes a backedge we have to be more careful with
loop update.  In particular we need to clear niter info and estimates
and if we remove the last backedge of a loop we have to also mark
it for removal to prevent a following basic block merging to associate
loop info with an unrelated header.

Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.

PR tree-optimization/114787
* tree-cfg.cc (remove_edge_and_dominated_blocks): When
removing a loop backedge clear niter info and when removing
the last backedge of a loop mark that loop for removal.

* gcc.dg/torture/pr114787.c: New testcase.
---
 gcc/testsuite/gcc.dg/torture/pr114787.c | 27 +
 gcc/tree-cfg.cc | 26 +---
 2 files changed, 50 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/torture/pr114787.c

diff --git a/gcc/testsuite/gcc.dg/torture/pr114787.c 
b/gcc/testsuite/gcc.dg/torture/pr114787.c
new file mode 100644
index 000..1c7294bee7b
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/torture/pr114787.c
@@ -0,0 +1,27 @@
+/* { dg-do run } */
+
+int a, b, c, d, e = -1, f, g, h, j, k, n, o, p;
+int main() {
+  int i, l = 2, m;
+  for (b = 0; b < 1; b++)
+l = 0;
+  for (; a >= 0; a--)
+for (m = 3; m; m--) {
+  k = g;
+  i = 0;
+  for (; i < 1; i++)
+for (; f < 1; f++)
+  h = g;
+  n = 2 & ((e ^ d) | 1) * j;
+  o = ~(e & n);
+q:
+  if (c <= e)
+return 0;
+  e = o;
+}
+  p = l;
+  l = 0;
+  if (p)
+goto q;
+  return 0;
+}
diff --git a/gcc/tree-cfg.cc b/gcc/tree-cfg.cc
index bdffc3b4ed2..b936df9866b 100644
--- a/gcc/tree-cfg.cc
+++ b/gcc/tree-cfg.cc
@@ -9004,10 +9004,30 @@ remove_edge_and_dominated_blocks (edge e)

   /* If we are removing a path inside a non-root loop that may change
  loop ownership of blocks or remove loops.  Mark loops for fixup.  */
+  class loop *src_loop = e->src->loop_father;
   if (current_loops
-  && loop_outer (e->src->loop_father) != NULL
-  && e->src->loop_father == e->dest->loop_father)
-loops_state_set (LOOPS_NEED_FIXUP);
+  && loop_outer (src_loop) != NULL
+  && src_loop == e->dest->loop_father)
+{
+  loops_state_set (LOOPS_NEED_FIXUP);
+  /* If we are removing a backedge clear the number of iterations
+and estimates.  */
+  class loop *dest_loop = e->dest->loop_father;
+  if (e->dest == src_loop->header
+ || (e->dest == dest_loop->header
+ && flow_loop_nested_p (dest_loop, src_loop)))
+   {
+ free_numbers_of_iterations_estimates (dest_loop);
+ /* If we removed the last backedge mark the loop for removal.  */
+ FOR_EACH_EDGE (f, ei, dest_loop->header->preds)
+   if (f != e
+   && (f->src->loop_father == dest_loop
+   || flow_loop_nested_p (dest_loop, f->src->loop_father)))
+ break;
+ if (!f)
+   mark_loop_for_removal (dest_loop);
+   }
+}

   if (!dom_info_available_p (CDI_DOMINATORS))
 {
--
2.25.1


[PATCH] tree-optimization/114832 - wrong dominator info with vect peeling

2024-04-23 Thread Richard Biener

When we update the dominator of the redirected exit after peeling
we check whether the immediate dominator was the loop header rather
than the exit source when we later want to just update it to the
new source.  The following fixes this oversight.

Bootstrap and regtest running on x86_64-unknown-linux-gnu.

Richard.

PR tree-optimization/114832
* tree-vect-loop-manip.cc (slpeel_tree_duplicate_loop_to_edge_cfg):
Fix dominance check.

* gcc.dg/vect/pr114832.c: New testcase.
---
 gcc/testsuite/gcc.dg/vect/pr114832.c | 13 +
 gcc/tree-vect-loop-manip.cc  |  2 +-
 2 files changed, 14 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr114832.c

diff --git a/gcc/testsuite/gcc.dg/vect/pr114832.c 
b/gcc/testsuite/gcc.dg/vect/pr114832.c
new file mode 100644
index 000..2de07ae22ef
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr114832.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-fno-tree-loop-if-convert 
-fno-tree-loop-distribute-patterns" } */
+
+int a, b, c, d[3];
+void e() {
+  int f, g = 0;
+  for (; g < 3; g++) {
+if (f || a || b && c) {
+  int h, *i = , **j = 
+}
+d[g] = 0;
+  }
+}
diff --git a/gcc/tree-vect-loop-manip.cc b/gcc/tree-vect-loop-manip.cc
index 8d9b533d50f..43c7881c640 100644
--- a/gcc/tree-vect-loop-manip.cc
+++ b/gcc/tree-vect-loop-manip.cc
@@ -1523,7 +1523,7 @@ slpeel_tree_duplicate_loop_to_edge_cfg (class loop *loop, 
edge loop_exit,

   exit_dest = exit->dest;
   was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
- exit_dest) == loop->header ?
+ exit_dest) == exit->src ?
 true : false);

   /* Also copy the pre-header, this avoids jumping through hoops to
--
2.25.1


[PATCH] tree-optimization/114799 - SLP and patterns

2024-04-23 Thread Richard Biener
The following plugs a hole with computing whether a SLP node has any
pattern stmts which is important to know when we want to replace it
by a CTOR from external defs.

Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.

PR tree-optimization/114799
* tree-vect-slp.cc (vect_get_and_check_slp_defs): Properly
update ->any_pattern when swapping operands.

* gcc.dg/vect/bb-slp-pr114799.c: New testcase.
---
 gcc/testsuite/gcc.dg/vect/bb-slp-pr114799.c | 16 
 gcc/tree-vect-slp.cc|  6 ++
 2 files changed, 22 insertions(+)
 create mode 100644 gcc/testsuite/gcc.dg/vect/bb-slp-pr114799.c

diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-pr114799.c 
b/gcc/testsuite/gcc.dg/vect/bb-slp-pr114799.c
new file mode 100644
index 000..70572fe703b
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/bb-slp-pr114799.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-fno-tree-forwprop" } */
+
+unsigned long x;
+unsigned char y;
+
+void
+foo (void)
+{
+  unsigned long tt = y;
+  tt+=255;
+  unsigned short t1 = tt;
+  t1 = 254 - t1;
+  tt += ((unsigned long)t1);
+  x = tt;
+}
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index 109f318c7d6..3eb326d20b5 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -888,6 +888,12 @@ vect_get_and_check_slp_defs (vec_info *vinfo, unsigned 
char swap,
 (*oprnds_info)[i+1]->def_stmts[stmt_num]);
  std::swap ((*oprnds_info)[i]->ops[stmt_num],
 (*oprnds_info)[i+1]->ops[stmt_num]);
+ /* After swapping some operands we lost track whether an
+operand has any pattern defs so be conservative here.  */
+ if ((*oprnds_info)[i]->any_pattern
+ || (*oprnds_info)[i+1]->any_pattern)
+   (*oprnds_info)[i]->any_pattern
+ = (*oprnds_info)[i+1]->any_pattern = true;
  swapped = true;
  continue;
}
-- 
2.35.3


Re: [PATCH] decay vect tests from run to link for pr95401

2024-04-22 Thread Richard Biener
On Mon, Apr 22, 2024 at 12:05 PM Alexandre Oliva  wrote:
>
> Ping?-ish for the full version of the RFC posted at
> https://gcc.gnu.org/pipermail/gcc-patches/2021-March/566588.html
>
> On Mar 11, 2021, Richard Biener  wrote:
>
> > On Thu, Mar 11, 2021 at 9:03 AM Alexandre Oliva  wrote:
>
> >> So I'm leaning towards this proposed change, just extended to other
> >> platforms that also decay from run to compile rather than link, and thus
> >> run into this problem in g++.dg/vect/pr95401.cc.  Would this be
> >> acceptable?
>
> > I think that's OK.  It's probably difficult to make the test UNSUPPORTED
> > when dg-additional-sources is discovered with a dg-do compile test?
>
> Thanks, here's a completed version.
>
>
> When vect.exp finds our configuration disables altivec by default, it
> disables the execution of vectorization tests, assuming the test
> hardware doesn't support it.
>
> Tests become just compile tests, but compile tests won't work
> correctly when additional sources are named, e.g. pr95401.cc, because
> GCC refuses to compile multiple files into the same asm output.
>
> With this patch, the default for when execution is not possible
> becomes link.
>
> Regstrapped on x86_64-linux-gnu and ppc64el-linux-gnu.  Also tested with
> gcc-13 on ppc64-vx7r2 and ppc-vx7r2.  Ok to install?

That makes sense.  OK

Thanks,
Richard.

>
> for  gcc/testsuite/ChangeLog
>
> * lib/target-supports.exp (check_vect_support_and_set_flags):
> Decay to link rather than compile.
> ---
>  gcc/testsuite/lib/target-supports.exp |   18 +-
>  1 file changed, 9 insertions(+), 9 deletions(-)
>
> diff --git a/gcc/testsuite/lib/target-supports.exp 
> b/gcc/testsuite/lib/target-supports.exp
> index 3a5713d98691f..54a55585371b0 100644
> --- a/gcc/testsuite/lib/target-supports.exp
> +++ b/gcc/testsuite/lib/target-supports.exp
> @@ -11625,7 +11625,7 @@ proc check_vect_support_and_set_flags { } {
>  if [check_750cl_hw_available] {
>  set dg-do-what-default run
>  } else {
> -set dg-do-what-default compile
> +set dg-do-what-default link
>  }
>  } elseif [istarget powerpc*-*-*] {
>  # Skip targets not supporting -maltivec.
> @@ -11655,14 +11655,14 @@ proc check_vect_support_and_set_flags { } {
>  # some other cpu type specified above.
> set DEFAULT_VECTCFLAGS [linsert $DEFAULT_VECTCFLAGS 0 
> "-mcpu=970"]
>  }
> -set dg-do-what-default compile
> +set dg-do-what-default link
>  }
>  } elseif { [istarget i?86-*-*] || [istarget x86_64-*-*] } {
>  lappend DEFAULT_VECTCFLAGS "-msse2"
>  if { [check_effective_target_sse2_runtime] } {
>  set dg-do-what-default run
>  } else {
> -set dg-do-what-default compile
> +set dg-do-what-default link
>  }
>  } elseif { [istarget mips*-*-*]
>&& [check_effective_target_nomips16] } {
> @@ -11681,7 +11681,7 @@ proc check_vect_support_and_set_flags { } {
>  if [check_effective_target_ultrasparc_hw] {
>  set dg-do-what-default run
>  } else {
> -set dg-do-what-default compile
> +set dg-do-what-default link
>  }
>  } elseif [istarget alpha*-*-*] {
>  # Alpha's vectorization capabilities are extremely limited.
> @@ -11694,7 +11694,7 @@ proc check_vect_support_and_set_flags { } {
>  if [check_alpha_max_hw_available] {
>  set dg-do-what-default run
>  } else {
> -set dg-do-what-default compile
> +set dg-do-what-default link
>  }
>  } elseif [istarget ia64-*-*] {
>  set dg-do-what-default run
> @@ -11707,7 +11707,7 @@ proc check_vect_support_and_set_flags { } {
>  if [is-effective-target arm_neon_hw] {
>  set dg-do-what-default run
>  } else {
> -set dg-do-what-default compile
> +set dg-do-what-default link
>  }
>  } elseif [istarget aarch64*-*-*] {
>  set dg-do-what-default run
> @@ -11731,7 +11731,7 @@ proc check_vect_support_and_set_flags { } {
>  set dg-do-what-default run
>  } else {
> lappend DEFAULT_VECTCFLAGS "-march=z14" "-mzarch"
> -set dg-do-what-default compile
> +set dg-do-what-default link
>  }
>  } elseif [istarget amdgcn-*-*] {
>  set dg-do-what-default run
> @@ -11742,7 +11742,7 @@ proc check_vect_support_and_set_flags { } 

Re: [PATCH] s390: testsuite: Fix forwprop-4{0,1}.c

2024-04-22 Thread Richard Biener
On Mon, Apr 22, 2024 at 10:47 AM Andreas Krebbel  wrote:
>
> Hi Stefan,
>
> due to that missed optimization we currently generate silly code for these 
> two tests and should
> really fix this (after gcc entering stage1). So just skipping it on s390x 
> would definitely be the
> wrong choice I think.
>
> I think our vectorize_vec_perm_const correctly rejects this permute pattern, 
> since it would require
> a load from literal pool. Question is why we do have to rely on this being 
> turned into a permute
> first to get rid of the obviously redundant assignments. Shouldn't fwprop be 
> able to handle this
> without it?

We do not detect "redundant" BIT_INSERT, but the match.pd pattern
could eventually detect
this case (ISTR we have one doing that but I may be mistaken).

> I'm ok with your patch, but please also open a BZ for it and perhaps mention 
> it in the comment close
> to the xfail.
>
> Thanks!
>
> Andreas
>
> On 4/22/24 08:23, Stefan Schulze Frielinghaus wrote:
> > The tests fail on s390 since can_vec_perm_const_p fails and therefore
> > the bit insert/ref survive which r14-3381-g27de9aa152141e aims for.
> > Strictly speaking, the tests only fail in case the target supports
> > vectors, i.e., for targets prior z13 or in case of -mesa the emulated
> > vector operations are optimized out.
> >
> > Easiest would be to skip the entire test for s390.  Another solution
> > would be to xfail in case of vector support hoping that eventually we
> > end up with an xpass for a future machine generation or if gcc advances.
> > That is implemented by this patch.  In order to do so I implemented a
> > new target test s390_mvx which tests whether vector support is available
> > or not.  Maybe this is already over-engineered for a simple test?  Any
> > thoughts?
> > ---
> >  gcc/testsuite/gcc.dg/tree-ssa/forwprop-40.c |  4 ++--
> >  gcc/testsuite/gcc.dg/tree-ssa/forwprop-41.c |  4 ++--
> >  gcc/testsuite/lib/target-supports.exp   | 14 ++
> >  3 files changed, 18 insertions(+), 4 deletions(-)
> >
> > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/forwprop-40.c 
> > b/gcc/testsuite/gcc.dg/tree-ssa/forwprop-40.c
> > index 7513497f552..b67e3e93a7f 100644
> > --- a/gcc/testsuite/gcc.dg/tree-ssa/forwprop-40.c
> > +++ b/gcc/testsuite/gcc.dg/tree-ssa/forwprop-40.c
> > @@ -10,5 +10,5 @@ vector int g(vector int a)
> >return a;
> >  }
> >
> > -/* { dg-final { scan-tree-dump-times "BIT_INSERT_EXPR" 0 "optimized" } } */
> > -/* { dg-final { scan-tree-dump-times "BIT_FIELD_REF" 0 "optimized" } } */
> > +/* { dg-final { scan-tree-dump-times "BIT_INSERT_EXPR" 0 "optimized" { 
> > xfail s390_mvx } } } */
> > +/* { dg-final { scan-tree-dump-times "BIT_FIELD_REF" 0 "optimized" { xfail 
> > s390_mvx } } } */
> > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/forwprop-41.c 
> > b/gcc/testsuite/gcc.dg/tree-ssa/forwprop-41.c
> > index b1e75797a90..0f119675207 100644
> > --- a/gcc/testsuite/gcc.dg/tree-ssa/forwprop-41.c
> > +++ b/gcc/testsuite/gcc.dg/tree-ssa/forwprop-41.c
> > @@ -11,6 +11,6 @@ vector int g(vector int a, int c)
> >return a;
> >  }
> >
> > -/* { dg-final { scan-tree-dump-times "BIT_INSERT_EXPR" 1 "optimized" } } */
> > -/* { dg-final { scan-tree-dump-times "BIT_FIELD_REF" 0 "optimized" } } */
> > +/* { dg-final { scan-tree-dump-times "BIT_INSERT_EXPR" 1 "optimized" { 
> > xfail s390_mvx } } } */
> > +/* { dg-final { scan-tree-dump-times "BIT_FIELD_REF" 0 "optimized" { xfail 
> > s390_mvx } } } */
> >  /* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 0 "optimized" } } */
> > diff --git a/gcc/testsuite/lib/target-supports.exp 
> > b/gcc/testsuite/lib/target-supports.exp
> > index edce672c0e2..5a692baa8ef 100644
> > --- a/gcc/testsuite/lib/target-supports.exp
> > +++ b/gcc/testsuite/lib/target-supports.exp
> > @@ -12380,6 +12380,20 @@ proc check_effective_target_profile_update_atomic 
> > {} {
> >  } "-fprofile-update=atomic -fprofile-generate"]
> >  }
> >
> > +# Return 1 if the target has a vector facility.
> > +proc check_effective_target_s390_mvx { } {
> > +if ![istarget s390*-*-*] then {
> > + return 0;
> > +}
> > +
> > +return [check_no_compiler_messages_nocache s390_mvx assembly {
> > + #if !defined __VX__
> > + #error no vector facility.
> > + #endif
> > + int dummy;
> > +} [current_compiler_flags]]
> > +}
> > +
> >  # Return 1 if vector (va - vector add) instructions are understood by
> >  # the assembler and can be executed.  This also covers checking for
> >  # the VX kernel feature.  A kernel without that feature does not
>


Re: [PATCH]middle-end: refactory vect_recog_absolute_difference to simplify flow [PR114769]

2024-04-19 Thread Richard Biener
On Fri, Apr 19, 2024 at 3:29 PM Tamar Christina  wrote:
>
> Hi All,
>
> As the reporter in PR114769 points out the control flow for the abd detection
> is hard to follow.  This is because vect_recog_absolute_difference has two
> different ways it can return true.
>
> 1. It can return true when the widening operation is matched, in which case
>unprom is set, half_type is not NULL and diff_stmt is not set.
>
> 2. It can return true when the widening operation is not matched, but the stmt
>being checked is a minus.  In this case unprom is not set, half_type is set
>to NULL and diff_stmt is set.  This because to get to diff_stmt you have to
>dig through the abs statement and any possible promotions.
>
> This however leads to complicated uses of the function at the call sites as 
> the
> exact semantic needs to be known to use it safely.
>
> vect_recog_absolute_difference has two callers:
>
> 1. vect_recog_sad_pattern where if you return true with unprom not set, then
>*half_type will be NULL.  The call to vect_supportable_direct_optab_p will
>always reject it since there's no vector mode for NULL.  Note that if 
> looking
>at the dump files, the convention in the dump files have always been that 
> we
>first indicate that a pattern could possibly be recognize and then check 
> that
>it's supported.
>
>This change somewhat incorrectly makes the diagnostic message get printed 
> for
>"invalid" patterns.
>
> 2. vect_recog_abd_pattern, where if half_type is NULL, it then uses diff_stmt 
> to
>set them.
>
> So while the note in the dump file is misleading, the code is safe.
>
> This refactors the code, it now only has 1 success condition, and diff_stmt is
> always set to the minus statement in the abs if there is one.
>
> The function now only returns success if the widening minus is found, in which
> case unprom and half_type set.
>
> This then leaves it up to the caller to decide if they want to do anything 
> with
> diff_stmt.
>
> Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
>
> Ok for master?

OK.

Thanks,
Richard.

> Thanks,
> Tamar
>
> gcc/ChangeLog:
>
> PR tree-optimization/114769
> * tree-vect-patterns.cc:
> (vect_recog_absolute_difference): Have only one success condition.
> (vect_recog_abd_pattern): Handle further checks if
> vect_recog_absolute_difference fails.
>
> ---
> diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc
> index 
> 4f491c6b8336f8710c3519dec1fa7e0f49387d2b..87c2acff386d91d22a3b2d6e6443d1f2f2326ea6
>  100644
> --- a/gcc/tree-vect-patterns.cc
> +++ b/gcc/tree-vect-patterns.cc
> @@ -797,8 +797,7 @@ vect_split_statement (vec_info *vinfo, stmt_vec_info 
> stmt2_info, tree new_rhs,
> HALF_TYPE and UNPROM will be set should the statement be found to
> be a widened operation.
> DIFF_STMT will be set to the MINUS_EXPR
> -   statement that precedes the ABS_STMT unless vect_widened_op_tree
> -   succeeds.
> +   statement that precedes the ABS_STMT if it is a MINUS_EXPR..
>   */
>  static bool
>  vect_recog_absolute_difference (vec_info *vinfo, gassign *abs_stmt,
> @@ -843,6 +842,12 @@ vect_recog_absolute_difference (vec_info *vinfo, gassign 
> *abs_stmt,
>if (!diff_stmt_vinfo)
>  return false;
>
> +  gassign *diff = dyn_cast  (STMT_VINFO_STMT (diff_stmt_vinfo));
> +  if (diff_stmt && diff
> +  && gimple_assign_rhs_code (diff) == MINUS_EXPR
> +  && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (abs_oprnd)))
> +*diff_stmt = diff;
> +
>/* FORNOW.  Can continue analyzing the def-use chain when this stmt in a 
> phi
>   inside the loop (in case we are analyzing an outer-loop).  */
>if (vect_widened_op_tree (vinfo, diff_stmt_vinfo,
> @@ -850,17 +855,6 @@ vect_recog_absolute_difference (vec_info *vinfo, gassign 
> *abs_stmt,
> false, 2, unprom, half_type))
>  return true;
>
> -  /* Failed to find a widen operation so we check for a regular MINUS_EXPR.  
> */
> -  gassign *diff = dyn_cast  (STMT_VINFO_STMT (diff_stmt_vinfo));
> -  if (diff_stmt && diff
> -  && gimple_assign_rhs_code (diff) == MINUS_EXPR
> -  && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (abs_oprnd)))
> -{
> -  *diff_stmt = diff;
> -  *half_type = NULL_TREE;
> -  return true;
> -}
> -
>return false;
>  }
>
> @@ -1499,27 +1493,22 @@ vect_recog_abd_pattern (vec_info *vinfo,
>tree out_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
>
>vect_unpromoted_value unprom[2];
> -  gassign *diff_stmt;
> -  tree half_type;
> -  if (!vect_recog_absolute_difference (vinfo, last_stmt, _type,
> +  gassign *diff_stmt = NULL;
> +  tree abd_in_type;
> +  if (!vect_recog_absolute_difference (vinfo, last_stmt, _in_type,
>unprom, _stmt))
> -return NULL;
> -
> -  tree abd_in_type, abd_out_type;
> -
> -  if (half_type)
> -{
> -  abd_in_type = half_type;
> -  abd_out_type = abd_in_type;
> -

Re: [PATCH] rtlanal: Fix set_noop_p for volatile loads or stores [PR114768]

2024-04-19 Thread Richard Biener
On Fri, 19 Apr 2024, Jakub Jelinek wrote:

> Hi!
> 
> On the following testcase, combine propagates the mem/v load into mem store
> with the same address and then removes it, because noop_move_p says it is a
> no-op move.  If it was the other way around, i.e. mem/v store and mem load,
> or both would be mem/v, it would be kept.
> The problem is that rtx_equal_p never checks any kind of flags on the rtxes
> (and I think it would be quite dangerous to change it at this point), and
> set_noop_p checks side_effects_p on just one of the operands, not both.
> In the MEM <- MEM set, it only checks it on the destination, in
> store to ZERO_EXTRACT only checks it on the source.
> 
> The following patch adds the missing side_effects_p checks.
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

OK.

Thanks,
Richard.

> 2024-04-19  Jakub Jelinek  
> 
>   PR rtl-optimization/114768
>   * rtlanal.cc (set_noop_p): Don't return true for MEM <- MEM
>   sets if src has side-effects or for stores into ZERO_EXTRACT
>   if ZERO_EXTRACT operand has side-effects.
> 
>   * gcc.dg/pr114768.c: New test.
> 
> --- gcc/rtlanal.cc.jj 2024-02-24 12:45:28.674249100 +0100
> +++ gcc/rtlanal.cc2024-04-18 15:09:55.199499083 +0200
> @@ -1637,12 +1637,15 @@ set_noop_p (const_rtx set)
>  return true;
>  
>if (MEM_P (dst) && MEM_P (src))
> -return rtx_equal_p (dst, src) && !side_effects_p (dst);
> +return (rtx_equal_p (dst, src)
> + && !side_effects_p (dst)
> + && !side_effects_p (src));
>  
>if (GET_CODE (dst) == ZERO_EXTRACT)
> -return rtx_equal_p (XEXP (dst, 0), src)
> -&& !BITS_BIG_ENDIAN && XEXP (dst, 2) == const0_rtx
> -&& !side_effects_p (src);
> +return (rtx_equal_p (XEXP (dst, 0), src)
> + && !BITS_BIG_ENDIAN && XEXP (dst, 2) == const0_rtx
> + && !side_effects_p (src)
> + && !side_effects_p (XEXP (dst, 0)));
>  
>if (GET_CODE (dst) == STRICT_LOW_PART)
>  dst = XEXP (dst, 0);
> --- gcc/testsuite/gcc.dg/pr114768.c.jj2024-04-18 15:37:49.139433678 
> +0200
> +++ gcc/testsuite/gcc.dg/pr114768.c   2024-04-18 15:43:30.389730365 +0200
> @@ -0,0 +1,10 @@
> +/* PR rtl-optimization/114768 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-rtl-final" } */
> +/* { dg-final { scan-rtl-dump "\\\(mem/v:" "final" { target { ! { nvptx*-*-* 
> } } } } } */
> +
> +void
> +foo (int *p)
> +{
> +  *p = *(volatile int *) p;
> +}
> 
>   Jakub
> 
> 

-- 
Richard Biener 
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)


Re: [PATCH] libgcc: Another __divmodbitint4 bug fix [PR114762]

2024-04-19 Thread Richard Biener
On Fri, 19 Apr 2024, Jakub Jelinek wrote:

> Hi!
> 
> The following testcase is miscompiled because the code to decrement
> vn on negative value with all ones in most significant limb (even partial)
> and 0 in most significant bit of the second most significant limb doesn't
> take into account the case where all bits below the most significant limb
> are zero.  This has been a problem both in the version before yesterday's
> commit where it has been done only if un was one shorter than vn before this
> decrement, and is now problem even more often when it is done earlier.
> When we decrement vn in such case and negate it, we end up with all 0s in
> the v2 value, so have both the problems with UB on __builtin_clz* and the
> expectations of the algorithm that the divisor has most significant bit set
> after shifting, plus when the decremented vn is 1 it can SIGFPE on division
> by zero even when it is not division by zero etc.  Other values shouldn't
> get 0 in the new most significant limb after negation, because the
> bitint_reduce_prec canonicalization should reduce prec if the second most
> significant limb is all ones and if that limb is all zeros, if at least
> one limb below it is non-zero, carry in will make it non-zero.
> 
> The following patch fixes it by checking if at least one bit below the
> most significant limb is non-zero, in that case it decrements, otherwise
> it will do nothing (but e.g. for the un < vn case that also means the
> divisor is large enough that the result should be q 0 r u).
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

OK.

Richard.

> 2024-04-19  Jakub Jelinek  
> 
>   PR libgcc/114762
>   * libgcc2.c (__divmodbitint4): Don't decrement vn if all bits
>   below the most significant limb are zero.
> 
>   * gcc.dg/torture/bitint-70.c: New test.
> 
> --- libgcc/libgcc2.c.jj   2024-04-18 09:48:55.172538667 +0200
> +++ libgcc/libgcc2.c  2024-04-18 12:17:28.893616007 +0200
> @@ -1715,11 +1715,18 @@ __divmodbitint4 (UBILtype *q, SItype qpr
>&& vn > 1
>&& (Wtype) v[BITINT_END (1, vn - 2)] >= 0)
>  {
> -  vp = 0;
> -  --vn;
> +  /* Unless all bits below the most significant limb are zero.  */
> +  SItype vn2;
> +  for (vn2 = vn - 2; vn2 >= 0; --vn2)
> + if (v[BITINT_END (vn - 1 - vn2, vn2)])
> +   {
> + vp = 0;
> + --vn;
>  #if __LIBGCC_BITINT_ORDER__ == __ORDER_BIG_ENDIAN__
> -  ++v;
> + ++v;
>  #endif
> + break;
> +   }
>  }
>if (__builtin_expect (un < vn, 0))
>  {
> --- gcc/testsuite/gcc.dg/torture/bitint-70.c.jj   2024-04-18 
> 12:26:09.406383158 +0200
> +++ gcc/testsuite/gcc.dg/torture/bitint-70.c  2024-04-18 12:26:57.253718287 
> +0200
> @@ -0,0 +1,22 @@
> +/* PR libgcc/114762 */
> +/* { dg-do run { target bitint } } */
> +/* { dg-options "-std=c23" } */
> +/* { dg-skip-if "" { ! run_expensive_tests }  { "*" } { "-O0" "-O2" } } */
> +/* { dg-skip-if "" { ! run_expensive_tests } { "-flto" } { "" } } */
> +
> +#if __BITINT_MAXWIDTH__ >= 255
> +__attribute__((__noipa__)) signed _BitInt(255)
> +foo (signed _BitInt(255) a, signed _BitInt(65) b)
> +{
> +  return a / b;
> +}
> +#endif
> +
> +int
> +main ()
> +{
> +#if __BITINT_MAXWIDTH__ >= 255
> +  if (foo (1, -0xwb - 1wb))
> +__builtin_abort ();
> +#endif
> +}
> 
>   Jakub
> 
> 

-- 
Richard Biener 
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)


Re: [PATCH] libstdc++: Support link chains in std::chrono::tzdb::locate_zone [PR114770]

2024-04-19 Thread Richard Biener
On Thu, Apr 18, 2024 at 6:34 PM Jonathan Wakely  wrote:
>
> This would fix the but, how do people feel about it this close to the
> gcc-14 release?

Guess we'll have to fix it anyway, so why not now ... (what could go wrong..)

Richard.

> Tested x86_64-linux.
>
> -- >8 --
>
> Since 2022 the TZif format defined in the zic(8) man page has said that
> links can refer to other links, rather than only referring to a zone.
> This isn't supported by the C++20 spec, which assumes that the target()
> for a chrono::time_zone_link always names a chrono::time_zone, not
> another chrono::time_zone_link.
>
> This hasn't been a problem until now, because there are no entries in
> the tzdata file that chain links together. However, Debian Sid has
> changed the target of the Asia/Chungking link from the Asia/Shanghai
> zone to the Asia/Chongqing link, creating a link chain. The libstdc++
> code is unable to handle this, so chrono::locate_zone("Asia/Chungking")
> will fail with the tzdata.zi file from Debian Sid.
>
> It seems likely that the C++ spec will need a change to allow link
> chains, so that the original structure of the IANA database can be fully
> represented by chrono::tzdb. The alternative would be for chrono::tzdb
> to flatten all chains when loading the data, so that a link's target is
> always a zone, but this means throwing away information present in the
> tzdata.zi input file.
>
> In anticipation of a change to the spec, this commit adds support for
> chained links to libstdc++. When a name is found to be a link, we try to
> find its target in the list of zones as before, but now if the target
> isn't the name of a zone we don't fail. Instead we look for another link
> with that name, and keep doing that until we reach the end of the chain
> of links, and then look up the last target as a zone.
>
> This new logic would get stuck in a loop if the tzdata.zi file is buggy
> and defines a link chain that contains a cycle, e.g. two links that
> refer to each other. To deal with that unlikely case, we use the
> tortoise and hare algorithm to detect cycles in link chains, and throw
> an exception if we detect a cycle. Cycles in links should never happen,
> and it is expected that link chains will be short (if they occur at all)
> and so the code is optimized for short chains without cycles. Longer
> chains (four or more links) and cycles will do more work, but won't fail
> to resolve a chain or get stuck in a loop.
>
> libstdc++-v3/ChangeLog:
>
> PR libstdc++/114770
> * src/c++20/tzdb.cc (do_locate_zone): Support links that have
> another link as their target.
> * testsuite/std/time/tzdb/links.cc: New test.
> ---
>  libstdc++-v3/src/c++20/tzdb.cc|  57 -
>  libstdc++-v3/testsuite/std/time/tzdb/links.cc | 215 ++
>  2 files changed, 268 insertions(+), 4 deletions(-)
>  create mode 100644 libstdc++-v3/testsuite/std/time/tzdb/links.cc
>
> diff --git a/libstdc++-v3/src/c++20/tzdb.cc b/libstdc++-v3/src/c++20/tzdb.cc
> index 639d1c440ba..c7c7cc9deee 100644
> --- a/libstdc++-v3/src/c++20/tzdb.cc
> +++ b/libstdc++-v3/src/c++20/tzdb.cc
> @@ -1599,7 +1599,7 @@ namespace std::chrono
>  const time_zone*
>  do_locate_zone(const vector& zones,
>const vector& links,
> -  string_view tz_name) noexcept
> +  string_view tz_name)
>  {
>// Lambda mangling changed between -fabi-version=2 and -fabi-version=18
>auto search = [](const Vec& v, string_view name) {
> @@ -1610,13 +1610,62 @@ namespace std::chrono
> return ptr;
>};
>
> +  // Search zones first.
>if (auto tz = search(zones, tz_name))
> return tz;
>
> +  // Search links second.
>if (auto tz_l = search(links, tz_name))
> -   return search(zones, tz_l->target());
> +   {
> + // Handle the common case of a link that has a zone as the target.
> + if (auto tz = search(zones, tz_l->target())) [[likely]]
> +   return tz;
>
> -  return nullptr;
> + // Either tz_l->target() doesn't exist, or we have a chain of links.
> + // Use Floyd's cycle-finding algorithm to avoid infinite loops,
> + // at the cost of extra lookups. In the common case we expect a
> + // chain of links to be short so the loop won't run many times.
> + // In particular, the duplicate lookups to move the tortoise
> + // never happen unless the chain has four or more links.
> + // When a chain contains a cycle we do multiple duplicate lookups,
> + // but that case should never happen with correct tzdata.zi,
> + // so there's no need to optimize cycle detection.
> +
> + const time_zone_link* tortoise = tz_l;
> + const time_zone_link* hare = search(links, tz_l->target());
> + while (hare)
> +   {
> + // Chains should be short, so first check if it ends here:
> + if 

Re: [PATCH] libgcc: Fix up __divmodbitint4 [PR114755]

2024-04-18 Thread Richard Biener
INT_ORDER__ == __ORDER_BIG_ENDIAN__
> -   r += rn - 1;
> -   u += un - 1;
> -#endif
> -   if (up)
> - --un;
> -   if (rn < un)
> - un = rn;
> -   for (rn -= un; un; --un)
> - {
> -   *r = *u;
> -   r += BITINT_INC;
> -   u += BITINT_INC;
> - }
> -   if (!rn)
> - return;
> -   if (up)
> - {
> -   if (uprec > 0)
> - *r = *u & (((UWtype) 1 << up) - 1);
> -   else
> - *r = *u | ((UWtype) -1 << up);
> -   r += BITINT_INC;
> -   if (!--rn)
> - return;
> - }
> -   UWtype c = uprec < 0 ? (UWtype) -1 : (UWtype) 0;
> -   for (; rn; --rn)
> - {
> -   *r = c;
> -   r += BITINT_INC;
> - }
> -   return;
>   }
> +  UWtype c = uprec < 0 ? (UWtype) -1 : (UWtype) 0;
> +  for (; rn; --rn)
> + {
> +   *r = c;
> +   r += BITINT_INC;
> + }
> +  return;
>  }
>USItype qn2 = un - vn + 1;
>if (qn >= qn2)
> --- gcc/testsuite/gcc.dg/torture/bitint-69.c.jj   2024-04-17 
> 19:09:34.165521448 +0200
> +++ gcc/testsuite/gcc.dg/torture/bitint-69.c  2024-04-17 19:10:25.343814139 
> +0200
> @@ -0,0 +1,26 @@
> +/* PR libgcc/114755 */
> +/* { dg-do run { target bitint } } */
> +/* { dg-options "-std=c23" } */
> +/* { dg-skip-if "" { ! run_expensive_tests }  { "*" } { "-O0" "-O2" } } */
> +/* { dg-skip-if "" { ! run_expensive_tests } { "-flto" } { "" } } */
> +
> +#if __BITINT_MAXWIDTH__ >= 255
> +_BitInt(65)
> +foo (void)
> +{
> +  _BitInt(255) a = 0x040404040404040404040404wb;
> +  _BitInt(65) b = -0xwb;
> +  _BitInt(65) r = a % b;
> +  return r;
> +}
> +#endif
> +
> +int
> +main ()
> +{
> +#if __BITINT_MAXWIDTH__ >= 255
> +  _BitInt(65) x = foo ();
> +  if (x != 0x0404040408080808wb)
> +__builtin_abort ();
> +#endif
> +}
> 
>   Jakub
> 
> 

-- 
Richard Biener 
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)


Re: [PATCH] internal-fn: Temporarily disable flag_trapv during .{ADD,SUB,MUL}_OVERFLOW etc. expansion [PR114753]

2024-04-18 Thread Richard Biener
t; +++ gcc/testsuite/gcc.dg/pr114753.c   2024-04-17 13:54:14.035352376 +0200
> @@ -0,0 +1,14 @@
> +/* PR middle-end/114753 */
> +/* { dg-do run } */
> +/* { dg-options "-O2 -ftrapv" } */
> +
> +int
> +main ()
> +{
> +  volatile long long i = __LONG_LONG_MAX__;
> +  volatile long long j = 2;
> +  long long k;
> +  if (!__builtin_mul_overflow (i, j, ) || k != -2LL)
> +__builtin_abort ();
> +  return 0;
> +}
> 
>   Jakub
> 
> 

-- 
Richard Biener 
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)


Re: [PATCH v2 2/2] lto-wrapper: Truncate files using -truncate driver option [PR110710]

2024-04-18 Thread Richard Biener
On Thu, Apr 18, 2024 at 6:12 AM Peter Damianov  wrote:
>
> This commit changes the Makefiles generated by lto-wrapper to no longer use
> the "mv" and "touch" shell commands. These don't exist on Windows, so when the
> Makefile attempts to call them, it results in errors like:
> The system cannot find the file specified.
>
> This problem only manifested when calling gcc from cmd.exe, and having no
> sh.exe present on the PATH. The Windows port of GNU Make searches the PATH for
> an sh.exe, and uses it if present.
>
> I have tested this in environments with and without sh.exe on the PATH and
> confirmed it works as expected.

OK.

Thanks,
Richard.

> Signed-off-by: Peter Damianov 
> ---
>  gcc/lto-wrapper.cc | 6 ++
>  1 file changed, 2 insertions(+), 4 deletions(-)
>
> diff --git a/gcc/lto-wrapper.cc b/gcc/lto-wrapper.cc
> index 02579951569..a36a59e6fb8 100644
> --- a/gcc/lto-wrapper.cc
> +++ b/gcc/lto-wrapper.cc
> @@ -2023,14 +2023,12 @@ cont:
>   fprintf (mstream, "%s:\n\t@%s ", output_name, new_argv[0]);
>   for (j = 1; new_argv[j] != NULL; ++j)
> fprintf (mstream, " '%s'", new_argv[j]);
> - fprintf (mstream, "\n");
>   /* If we are not preserving the ltrans input files then
>  truncate them as soon as we have processed it.  This
>  reduces temporary disk-space usage.  */
>   if (! save_temps)
> -   fprintf (mstream, "\t@-touch -r \"%s\" \"%s.tem\" > /dev/null 
> "
> -"2>&1 && mv \"%s.tem\" \"%s\"\n",
> -input_name, input_name, input_name, input_name);
> +   fprintf (mstream, " '-truncate' '%s'", input_name);
> + fprintf (mstream, "\n");
> }
>   else
> {
> --
> 2.39.2
>


Re: [PATCH v2 1/2] Driver: Add new -truncate option

2024-04-18 Thread Richard Biener
On Thu, Apr 18, 2024 at 6:12 AM Peter Damianov  wrote:
>
> This commit adds a new option to the driver that truncates one file after
> linking.
>
> Tested likeso:
>
> $ gcc hello.c -c
> $ du -h hello.o
> 4.0K  hello.o
> $ gcc hello.o -truncate hello
> $ ./a.out
> Hello world
> $ du -h hello.o
> $ 0   hello.o

I suppose it should have been

$ gcc hello.o -truncate hello.o

in the example.

> $ gcc hello.o -truncate
> gcc: error: missing filename after '-truncate'
>
> The motivation for adding this is PR110710. It is used by lto-wrapper to
> truncate files in a shell-independent manner.

This looks good to me.

Thanks,
Richard.

> Signed-off-by: Peter Damianov 
> ---
> v2: moved truncation to driver::final_actions
> v2: moved handling of OPT_truncate to be in alphabetic order
>
>  gcc/common.opt |  6 ++
>  gcc/gcc.cc | 13 +
>  2 files changed, 19 insertions(+)
>
> diff --git a/gcc/common.opt b/gcc/common.opt
> index ad348844775..40cab3cb36a 100644
> --- a/gcc/common.opt
> +++ b/gcc/common.opt
> @@ -422,6 +422,12 @@ Display target specific command line options (including 
> assembler and linker opt
>  -time
>  Driver Alias(time)
>
> +;; Truncate the file specified after linking.
> +;; This option is used by lto-wrapper to reduce the peak disk-usage when
> +;; linking with many .LTRANS units.
> +truncate
> +Driver Separate Undocumented MissingArgError(missing filename after %qs)
> +
>  -verbose
>  Driver Alias(v)
>
> diff --git a/gcc/gcc.cc b/gcc/gcc.cc
> index 728332b8153..b4169bbd3be 100644
> --- a/gcc/gcc.cc
> +++ b/gcc/gcc.cc
> @@ -2138,6 +2138,10 @@ static int have_E = 0;
>  /* Pointer to output file name passed in with -o. */
>  static const char *output_file = 0;
>
> +/* Pointer to input file name passed in with -truncate.
> +   This file should be truncated after linking. */
> +static const char *totruncate_file = 0;
> +
>  /* This is the list of suffixes and codes (%g/%u/%U/%j) and the associated
> temp file.  If the HOST_BIT_BUCKET is used for %j, no entry is made for
> it here.  */
> @@ -4538,6 +4542,10 @@ driver_handle_option (struct gcc_options *opts,
>do_save = false;
>break;
>
> +case OPT_truncate:
> +  totruncate_file = arg;
> +  break;
> +
>  case OPT:
>/* "-###"
>  This is similar to -v except that there is no execution
> @@ -9286,6 +9294,11 @@ driver::final_actions () const
>  delete_failure_queue ();
>delete_temp_files ();
>
> +  if (totruncate_file != NULL && !seen_error ())
> +/* Truncate file specified by -truncate.
> +   Used by lto-wrapper to reduce temporary disk-space usage. */
> +truncate(totruncate_file, 0);
> +
>if (print_help_list)
>  {
>printf (("\nFor bug reporting instructions, please see:\n"));
> --
> 2.39.2
>


Re: [PATCH] DOCUMENTATION_ROOT_URL vs. release branches [PR114738]

2024-04-17 Thread Richard Biener
On Wed, Apr 17, 2024 at 1:17 PM Jakub Jelinek  wrote:
>
> Hi!
>
> Starting with GCC 14 we have the nice URLification of the options printed
> in diagnostics, say for in
> test.c:4:23: warning: format ‘%d’ expects argument of type ‘int’, but 
> argument 2 has type ‘long int’ [-Wformat=]
> the -Wformat= is underlined in some terminals and hovering on it shows
> https://gcc.gnu.org/onlinedocs/gcc/Warning-Options.html#index-Wformat
> link.
>
> This works nicely on the GCC trunk, where the online documentation is
> regenerated every day from a cron job and more importantly, people rarely
> use the trunk snapshots for too long, so it is unlikely that further changes
> in the documentation will make too many links stale, because users will
> simply regularly update to newer snapshots.
>
> I think it doesn't work properly on release branches though.
> Some users only use the relased versions (i.e. MAJOR.MINOR.0) from tarballs
> but can use them for a couple of years, others use snapshots from the
> release branches, but again they could be in use for months or years and
> the above mentioned online docs which represent just the GCC trunk might
> diverge significantly.
>
> Now, for the relases we always publish also online docs for the release,
> which unlike the trunk online docs will not change further, under
> e.g.
> https://gcc.gnu.org/onlinedocs/gcc-14.1.0/gcc/Warning-Options.html#index-Wformat
> or
> https://gcc.gnu.org/onlinedocs/gcc-14.2.0/gcc/Warning-Options.html#index-Wformat
> etc.
>
> So, I think at least for the MAJOR.MINOR.0 releases we want to use
> URLs like above rather than the trunk ones and we can use the same process
> of updating *.opt.urls as well for that.
>
> For the snapshots from release branches, we don't have such docs.
> One option (implemented in the patch below for the URL printing side) is
> point to the MAJOR.MINOR.0 docs even for MAJOR.MINOR.1 snapshots.
> Most of the links will work fine, for options newly added on the release
> branches (rare thing but still happens) can have until the next release
> no URLs for them and get them with the next point release.
> The question is what to do about make regenerate-opt-urls for the release
> branch snapshots.  Either just document that users shouldn't
> make regenerate-opt-urls on release branches (and filter out *.opt.urls
> changes from their commits), add make regenerate-opt-urls task be RM
> responsibility before making first release candidate from a branch and
> adjust the autoregen CI to know about that.  Or add a separate goal
> which instead of relying on make html created files would download
> copy of the html files from the last release from web (kind of web
> mirroring the https://gcc.gnu.org/onlinedocs/gcc-14.1.0/ subtree locally)
> and doing regenerate-opt-urls on top of that?  But how to catch the
> point when first release candidate is made and we want to update to
> what will be the URLs once the release is made (but will be stale URLs
> for a week or so)?
>
> Another option would be to add to cron daily regeneration of the online
> docs for the release branches.  I don't think that is a good idea though,
> because as I wrote earlier, not all users update to the latest snapshot
> frequently, so there can be users that use gcc 13.1.1 20230525 for months
> or years, and other users which use gcc 13.1.1 20230615 for years etc.
>
> Another question is what is most sensible for users who want to override
> the default root and use the --with-documentation-root-url= configure
> option.  Do we expect them to grab the whole onlinedocs tree or for release
> branches at least include gcc-14.1.0/ subdirectory under the root?
> If so, the patch below deals with that.  Or should we just change the
> default documentation root url, so if user doesn't specify
> --with-documentation-root-url= and we are on a release branch, default that
> to https://gcc.gnu.org/onlinedocs/gcc-14.1.0/ or
> https://gcc.gnu.org/onlinedocs/gcc-14.2.0/ etc. and don't add any infix in
> get_option_url/make_doc_url, but when people supply their own, let them
> point to the root of the tree which contains the right docs?
> Then such changes would go into gcc/configure.ac, some case based on
> "$gcc_version", from that decide if it is a release branch or trunk.

I think this patch is sensible and an improvement over the current situation.
I guess we'll have to see how things evolve on the branch and adapt.

So, OK.

Thanks,
Richard.

> 2024-04-17  Jakub Jelinek  
>
> PR other/114738
> * opts.cc (get_option_url): On release branches append
> gcc-MAJOR.MINOR.0/ after DOCUMENTATION_ROOT_URL.
> * gcc-urlifier.cc (gcc_urlifier::make_doc_url): Likewise.
>
> --- gcc/opts.cc.jj  2024-01-05 08:35:13.600828652 +0100
> +++ gcc/opts.cc 2024-04-17 12:03:10.961525141 +0200
> @@ -3761,7 +3761,19 @@ get_option_url (const diagnostic_context
>  {
>label_text url_suffix = get_option_url_suffix (option_index, 
> lang_mask);
>   

[PATCH] Support {CEIL, FLOOR, ROUND}_{DIV, MOD}_EXPR and EXACT_DIV_EXPR in GIMPLE FE

2024-04-17 Thread Richard Biener
The following adds support for the various division and modulo operators
to the GIMPLE frontend via __{CEIL,FLOOR,ROUND}_{DIV,MOD} and
__EXACT_DIV operators.

Bootstrapped and tested on x86_64-unknown-linux-gnu, queued for stage1.

Richard.

gcc/c/
* gimple-parser.cc (c_parser_gimple_binary_expression):
Parse __{CEIL,FLOOR,ROUND}_{DIV,MOD} and __EXACT_DIV.

gcc/testsuite/
* gcc.dg/gimplefe-53.c: New testcase.
---
 gcc/c/gimple-parser.cc | 35 ++
 gcc/testsuite/gcc.dg/gimplefe-53.c | 16 ++
 2 files changed, 51 insertions(+)
 create mode 100644 gcc/testsuite/gcc.dg/gimplefe-53.c

diff --git a/gcc/c/gimple-parser.cc b/gcc/c/gimple-parser.cc
index 2dac41a335a..d156d83cd37 100644
--- a/gcc/c/gimple-parser.cc
+++ b/gcc/c/gimple-parser.cc
@@ -1055,6 +1055,41 @@ c_parser_gimple_binary_expression (gimple_parser 
, tree ret_type)
code = LTGT_EXPR;
break;
  }
+   else if (strcmp (IDENTIFIER_POINTER (id), "__FLOOR_DIV") == 0)
+ {
+   code = FLOOR_DIV_EXPR;
+   break;
+ }
+   else if (strcmp (IDENTIFIER_POINTER (id), "__ROUND_DIV") == 0)
+ {
+   code = ROUND_DIV_EXPR;
+   break;
+ }
+   else if (strcmp (IDENTIFIER_POINTER (id), "__EXACT_DIV") == 0)
+ {
+   code = EXACT_DIV_EXPR;
+   break;
+ }
+   else if (strcmp (IDENTIFIER_POINTER (id), "__CEIL_DIV") == 0)
+ {
+   code = CEIL_DIV_EXPR;
+   break;
+ }
+   else if (strcmp (IDENTIFIER_POINTER (id), "__FLOOR_MOD") == 0)
+ {
+   code = FLOOR_MOD_EXPR;
+   break;
+ }
+   else if (strcmp (IDENTIFIER_POINTER (id), "__ROUND_MOD") == 0)
+ {
+   code = ROUND_MOD_EXPR;
+   break;
+ }
+   else if (strcmp (IDENTIFIER_POINTER (id), "__CEIL_MOD") == 0)
+ {
+   code = CEIL_MOD_EXPR;
+   break;
+ }
   }
   /* Fallthru.  */
 default:
diff --git a/gcc/testsuite/gcc.dg/gimplefe-53.c 
b/gcc/testsuite/gcc.dg/gimplefe-53.c
new file mode 100644
index 000..926c77c74d4
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/gimplefe-53.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-fgimple" } */
+
+int __GIMPLE
+foo (int a, int b)
+{
+  int tem;
+  tem = a __EXACT_DIV b;
+  tem = tem __CEIL_DIV b;
+  tem = tem __FLOOR_DIV b;
+  tem = tem __ROUND_DIV b;
+  tem = tem __FLOOR_MOD b;
+  tem = tem __CEIL_MOD b;
+  tem = tem __ROUND_MOD b;
+  return tem;
+}
-- 
2.35.3


[PATCH] tree-optimization/114749 - reset partial vector decision for no-SLP retry

2024-04-17 Thread Richard Biener
The following makes sure to reset LOOP_VINFO_USING_PARTIAL_VECTORS_P
to its default of false when re-trying without SLP as otherwise
analysis may run into bogus asserts.

Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.

Richard.

PR tree-optimization/114749
* tree-vect-loop.cc (vect_analyze_loop_2): Reset
LOOP_VINFO_USING_PARTIAL_VECTORS_P when re-trying without SLP.
---
 gcc/tree-vect-loop.cc | 1 +
 1 file changed, 1 insertion(+)

diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index 431b3e9492c..a6cf0a5546c 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -3400,6 +3400,7 @@ again:
   LOOP_VINFO_VERSIONING_THRESHOLD (loop_vinfo) = 0;
   LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
 = saved_can_use_partial_vectors_p;
+  LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo) = false;
 
   goto start_over;
 }
-- 
2.35.3


Re: [PATCH] asan: Don't instrument .ABNORMAL_DISPATCHER [PR114743]

2024-04-17 Thread Richard Biener
On Wed, 17 Apr 2024, Jakub Jelinek wrote:

> Hi!
> 
> .ABNORMAL_DISPATCHER is currently the only internal function with
> ECF_NORETURN, and asan likes to instrument ECF_NORETURN calls by adding
> some builtin call before them, which breaks the .ABNORMAL_DISPATCHER
> discovery added in gsi_safe_*.
> 
> The following patch fixes asan not to instrument .ABNORMAL_DISPATCHER
> calls, like it doesn't instrument a couple of specific builtin calls
> as well.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

OK

> 2024-04-17  Jakub Jelinek  
> 
>   PR sanitizer/114743
>   * asan.cc (maybe_instrument_call): Don't instrument calls to
>   .ABNORMAL_DISPATCHER.
> 
>   * gcc.dg/asan/pr112709-2.c (freddy): New function from
>   gcc.dg/ubsan/pr112709-2.c version of the test.
> 
> --- gcc/asan.cc.jj2024-04-11 11:12:03.756191961 +0200
> +++ gcc/asan.cc   2024-04-16 17:32:14.304098386 +0200
> @@ -3030,6 +3030,9 @@ maybe_instrument_call (gimple_stmt_itera
> break;
>   }
>   }
> +  if (gimple_call_internal_p (stmt, IFN_ABNORMAL_DISPATCHER))
> + /* Don't instrument this.  */
> + return false;
>/* If a function does not return, then we must handle clearing up the
>shadow stack accordingly.  For ASAN we can simply set the entire stack
>to "valid" for accesses by setting the shadow space to 0 and all
> --- gcc/testsuite/gcc.dg/asan/pr112709-2.c.jj 2024-03-13 09:18:58.000925135 
> +0100
> +++ gcc/testsuite/gcc.dg/asan/pr112709-2.c2024-04-16 17:34:26.084301656 
> +0200
> @@ -48,3 +48,15 @@ l3:
>if (x < 4)
>  goto *q[x & 3];
>  }
> +
> +void
> +freddy (int x, int *y, struct S *p)
> +{
> +  bar (*p);
> +  ++p;
> +  if (x == 25)
> +x = foo (2);
> +  else if (x == 42)
> +x = foo (foo (3));
> +  *y = bar (*p);
> +}
> 
>   Jakub
> 
> 

-- 
Richard Biener 
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)


[PATCH] middle-end/13421 - -ftrapv vs. POINTER_DIFF_EXPR

2024-04-16 Thread Richard Biener
Currently we expand POINTER_DIFF_EXPR using subv_optab when -ftrapv
(but -fsanitize=undefined does nothing).  That's not consistent
with the behavior of POINTER_PLUS_EXPR which never uses addv_optab
with -ftrapv.  Both are because of the way we select whether to use
the trapping or the non-trapping optab - we look at the result type
of the expression and check

  trapv = INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_TRAPS (type);

the bugreport correctly complains that -ftrapv affects pointer
subtraction (there's no -ftrapv-pointer).  Now that we have
POINTER_DIFF_EXPR we can honor that appropriately.

The patch moves both POINTER_DIFF_EXPR and POINTER_PLUS_EXPR
handling so they will never consider trapping (or saturating)
optabs.

Bootstrapped and tested on x86_64-unknown-linux-gnu.

OK for stage1?

Thanks,
Richard.

PR middle-end/13421
* optabs-tree.cc (optab_for_tree_code): Do not consider
{add,sub}v or {us,ss}{add,sub} optabs for POINTER_DIFF_EXPR
or POINTER_PLUS_EXPR.
---
 gcc/optabs-tree.cc | 8 ++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/gcc/optabs-tree.cc b/gcc/optabs-tree.cc
index e7bd0d10892..b69a5bc3676 100644
--- a/gcc/optabs-tree.cc
+++ b/gcc/optabs-tree.cc
@@ -135,6 +135,12 @@ optab_for_tree_code (enum tree_code code, const_tree type,
 case MIN_EXPR:
   return TYPE_UNSIGNED (type) ? umin_optab : smin_optab;
 
+case POINTER_PLUS_EXPR:
+  return add_optab;
+
+case POINTER_DIFF_EXPR:
+  return sub_optab;
+
 case REALIGN_LOAD_EXPR:
   return vec_realign_load_optab;
 
@@ -249,13 +255,11 @@ optab_for_tree_code (enum tree_code code, const_tree type,
   trapv = INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_TRAPS (type);
   switch (code)
 {
-case POINTER_PLUS_EXPR:
 case PLUS_EXPR:
   if (TYPE_SATURATING (type))
return TYPE_UNSIGNED (type) ? usadd_optab : ssadd_optab;
   return trapv ? addv_optab : add_optab;
 
-case POINTER_DIFF_EXPR:
 case MINUS_EXPR:
   if (TYPE_SATURATING (type))
return TYPE_UNSIGNED (type) ? ussub_optab : sssub_optab;
-- 
2.35.3


[PATCH] tree-optimization/114736 - SLP DFS walk issue

2024-04-16 Thread Richard Biener
The following fixes a DFS walk issue when identifying to be ignored
latch edges.  We have (bogus) SLP_TREE_REPRESENTATIVEs for VEC_PERM
nodes so those have to be explicitly ignored as possibly being PHIs.

Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.

Richard.

PR tree-optimization/114736
* tree-vect-slp.cc (vect_optimize_slp_pass::is_cfg_latch_edge):
Do not consider VEC_PERM_EXPRs as PHI use.

* gfortran.dg/vect/pr114736.f90: New testcase.
---
 gcc/testsuite/gfortran.dg/vect/pr114736.f90 | 14 ++
 gcc/tree-vect-slp.cc|  3 ++-
 2 files changed, 16 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gfortran.dg/vect/pr114736.f90

diff --git a/gcc/testsuite/gfortran.dg/vect/pr114736.f90 
b/gcc/testsuite/gfortran.dg/vect/pr114736.f90
new file mode 100644
index 000..cdbfb6f415a
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/vect/pr114736.f90
@@ -0,0 +1,14 @@
+! { dg-do compile }
+! { dg-additional-options "-O3" }
+
+SUBROUTINE MY_ROUTINE (N, A, B )
+IMPLICIT NONE
+INTEGER,   INTENT(IN):: N
+COMPLEX,   INTENT(IN):: A(N)
+COMPLEX,   INTENT(OUT)   :: B(N)
+INTEGER  :: II
+B(:) = (1.,0.)
+DO II = 1, N-1
+B(II) = A(N-II+1) / A(N-II)
+ENDDO
+END SUBROUTINE MY_ROUTINE
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index 2e5481acbc7..109f318c7d6 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -4328,7 +4328,8 @@ vect_optimize_slp_pass::is_cfg_latch_edge (graph_edge *ud)
 {
   slp_tree use = m_vertices[ud->src].node;
   slp_tree def = m_vertices[ud->dest].node;
-  if (SLP_TREE_DEF_TYPE (use) != vect_internal_def
+  if ((SLP_TREE_DEF_TYPE (use) != vect_internal_def
+   || SLP_TREE_CODE (use) == VEC_PERM_EXPR)
   || SLP_TREE_DEF_TYPE (def) != vect_internal_def)
 return false;
 
-- 
2.35.3


[PATCH] tree-optimization/114733 - neg induction fails for 1 element vectors

2024-04-16 Thread Richard Biener
The neg induction vectorization code isn't prepared to deal with
single element vectors.

Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.

Richard.

PR tree-optimization/114733
* tree-vect-loop.cc (vectorizable_nonlinear_induction): Reject
neg induction vectorization of single element vectors.

* gcc.dg/vect/pr114733.c: New testcase.
---
 gcc/testsuite/gcc.dg/vect/pr114733.c | 20 
 gcc/tree-vect-loop.cc|  2 ++
 2 files changed, 22 insertions(+)
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr114733.c

diff --git a/gcc/testsuite/gcc.dg/vect/pr114733.c 
b/gcc/testsuite/gcc.dg/vect/pr114733.c
new file mode 100644
index 000..219cbf20469
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr114733.c
@@ -0,0 +1,20 @@
+/* { dg-additional-options "-O3" } */
+
+#include "tree-vect.h"
+
+long b = 1;
+signed char c;
+int d[25];
+
+int main()
+{
+  check_vect ();
+  for (signed char g = 0; g < 8; g += 1)
+for (short h = 0; h < 25; h += 2) {
+  b *= -1;
+  c ^= d[h];
+}
+  if (b != 1)
+abort ();
+  return 0;
+}
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index 025319e0cb1..431b3e9492c 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -9685,6 +9685,8 @@ vectorizable_nonlinear_induction (loop_vec_info 
loop_vinfo,
   switch (induction_type)
 {
 case vect_step_op_neg:
+  if (maybe_eq (TYPE_VECTOR_SUBPARTS (vectype), 1u))
+   return false;
   if (TREE_CODE (init_expr) != INTEGER_CST
  && TREE_CODE (init_expr) != REAL_CST)
{
-- 
2.35.3


Re: [PATCH] Fix some comment nits

2024-04-16 Thread Richard Biener
On Tue, 16 Apr 2024, Jakub Jelinek wrote:

> Hi!
> 
> While studying the TYPE_CANONICAL/TYPE_STRUCTURAL_EQUALITY_P stuff,
> I've noticed some nits in comments, the following patch fixes them.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

OK (IMO obvious).

Richard.

> 2024-04-16  Jakub Jelinek  
> 
>   * tree.cc (array_type_nelts): Ensure 2 spaces after . in comment
>   instead of just one.
>   (build_variant_type_copy): Likewise.
>   (tree_check_failed): Likewise.
>   (build_atomic_base): Likewise.
>   * ipa-free-lang-data.cc (fld_incomplete_type_of): Use an indefinite
>   article rather than a.
> 
> --- gcc/tree.cc.jj2024-04-15 14:25:21.721813546 +0200
> +++ gcc/tree.cc   2024-04-15 17:49:25.923935286 +0200
> @@ -3689,7 +3689,7 @@ int_byte_position (const_tree field)
>  }
>  
>  /* Return, as a tree node, the number of elements for TYPE (which is an
> -   ARRAY_TYPE) minus one. This counts only elements of the top array.  */
> +   ARRAY_TYPE) minus one.  This counts only elements of the top array.  */
>  
>  tree
>  array_type_nelts (const_tree type)
> @@ -5757,7 +5757,7 @@ build_variant_type_copy (tree type MEM_S
>t = build_distinct_type_copy (type PASS_MEM_STAT);
>  
>/* Since we're building a variant, assume that it is a non-semantic
> - variant. This also propagates TYPE_STRUCTURAL_EQUALITY_P. */
> + variant.  This also propagates TYPE_STRUCTURAL_EQUALITY_P. */
>TYPE_CANONICAL (t) = TYPE_CANONICAL (type);
>/* Type variants have no alias set defined.  */
>TYPE_ALIAS_SET (t) = -1;
> @@ -8915,7 +8915,7 @@ get_file_function_name (const char *type
>  #if defined ENABLE_TREE_CHECKING && (GCC_VERSION >= 2007)
>  
>  /* Complain that the tree code of NODE does not match the expected 0
> -   terminated list of trailing codes. The trailing code list can be
> +   terminated list of trailing codes.  The trailing code list can be
> empty, for a more vague error message.  FILE, LINE, and FUNCTION
> are of the caller.  */
>  
> @@ -9332,7 +9332,7 @@ make_or_reuse_accum_type (unsigned size,
>  
>  /* Create an atomic variant node for TYPE.  This routine is called
> during initialization of data types to create the 5 basic atomic
> -   types. The generic build_variant_type function requires these to
> +   types.  The generic build_variant_type function requires these to
> already be set up in order to function properly, so cannot be
> called from there.  If ALIGN is non-zero, then ensure alignment is
> overridden to this value.  */
> --- gcc/ipa-free-lang-data.cc.jj  2024-04-15 14:25:21.668814259 +0200
> +++ gcc/ipa-free-lang-data.cc 2024-04-15 17:49:47.303647823 +0200
> @@ -234,7 +234,7 @@ fld_decl_context (tree ctx)
>return ctx;
>  }
>  
> -/* For T being aggregate type try to turn it into a incomplete variant.
> +/* For T being aggregate type try to turn it into an incomplete variant.
> Return T if no simplification is possible.  */
>  
>  static tree
> 
>   Jakub
> 
> 

-- 
Richard Biener 
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)


Re: [PATCH] [testsuite] [i386] require fpic for pr111497.C

2024-04-16 Thread Richard Biener
On Tue, Apr 16, 2024 at 5:43 AM Alexandre Oliva  wrote:
>
>
> Fix another test that uses -fPIC without requiring fpic support.
>
> Regstrapped on x86_64-linux-gnu.  Also tested with gcc-13 on arm-,
> aarch64-, x86- and x86_64-vxworks7r2.  Ok to install?

OK.

> PS: This is neither the first nor the last such patch.  Maybe the test
> harness could detect -fPIC et al in compile options and react
> intelligently to them, whether by warning if dg-require-effective-target
> fpic is missing, or adding it implicitly.  We could have more such
> smarts in the testsuite machinery.  WDYT?

Might be a possibility I guess.  There's other options like
-fprofile-* which have matching effective targets.

>
> for  gcc/testsuite/ChangeLog
>
> * g++.target/i386/pr111497.C: Require fpic support.
> ---
>  gcc/testsuite/g++.target/i386/pr111497.C |1 +
>  1 file changed, 1 insertion(+)
>
> diff --git a/gcc/testsuite/g++.target/i386/pr111497.C 
> b/gcc/testsuite/g++.target/i386/pr111497.C
> index a645bb95907ee..30e2e0409ad0e 100644
> --- a/gcc/testsuite/g++.target/i386/pr111497.C
> +++ b/gcc/testsuite/g++.target/i386/pr111497.C
> @@ -1,5 +1,6 @@
>  // { dg-do compile { target ia32 } }
>  // { dg-options "-march=i686 -mtune=generic -fPIC -O2 -g" }
> +// { dg-require-effective-target fpic }
>
>  class A;
>  struct B { const char *b1; int b2; };
>
> --
> Alexandre Oliva, happy hackerhttps://FSFLA.org/blogs/lxo/
>Free Software Activist   GNU Toolchain Engineer
> More tolerance and less prejudice are key for inclusion and diversity
> Excluding neuro-others for not behaving ""normal"" is *not* inclusive


Re: [PATCH] [testsuite] xfail pr103798-2 in C++ on vxworks too [PR113706]

2024-04-16 Thread Richard Biener
On Tue, Apr 16, 2024 at 5:31 AM Alexandre Oliva  wrote:
>
>
> pr103798-2.c fails in C++ on targets that provide a ISO C++-compliant
> declaration of memchr, because it mismatches the C-compatible builtin,
> as per PR113706.  Expect the C++ test to fail on vxworks as well.
>
> Regstrapped on x86_64-linux-gnu.  Also tested with gcc-13 on arm-,
> aarch64-, x86- and x86_64-vxworks7r2.  Ok to install?

OK

>
> for  gcc/testsuite/ChangeLog
>
> PR testsuite/113706
> * c-c++-common/pr103798-2.c: XFAIL in C++ on vxworks too.
> ---
>  gcc/testsuite/c-c++-common/pr103798-2.c |2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/gcc/testsuite/c-c++-common/pr103798-2.c 
> b/gcc/testsuite/c-c++-common/pr103798-2.c
> index bc126c205e1e3..83cdfaa1660bb 100644
> --- a/gcc/testsuite/c-c++-common/pr103798-2.c
> +++ b/gcc/testsuite/c-c++-common/pr103798-2.c
> @@ -28,4 +28,4 @@ main ()
>  }
>
>  /* See PR c++/113706 for the xfail.  */
> -/* { dg-final { scan-assembler-not "memchr" { xfail { c++ && *-*-solaris2* } 
> } } } */
> +/* { dg-final { scan-assembler-not "memchr" { xfail { c++ && { *-*-solaris2* 
> *-*-vxworks* } } } } } */
>
> --
> Alexandre Oliva, happy hackerhttps://FSFLA.org/blogs/lxo/
>Free Software Activist   GNU Toolchain Engineer
> More tolerance and less prejudice are key for inclusion and diversity
> Excluding neuro-others for not behaving ""normal"" is *not* inclusive


Re: [PATCH] [testsuite] [analyzer] include sys/select.h if available

2024-04-16 Thread Richard Biener
On Tue, Apr 16, 2024 at 5:29 AM Alexandre Oliva  wrote:
>
>
> Test that calls select fails on vxworks because select is only
> declared in sys/select.h.  Include that header if it's present.
>
> Regstrapped on x86_64-linux-gnu.  Also tested with gcc-13 on arm-,
> aarch64-, x86- and x86_64-vxworks7r2.  Ok to install?

OK

>
> for  gcc/testsuite/ChangeLog
>
> * gcc.dg/analyzer/fd-glibc-byte-stream-connection-server.c:
> Include sys/select.h if present.
> ---
>  .../fd-glibc-byte-stream-connection-server.c   |3 +++
>  1 file changed, 3 insertions(+)
>
> diff --git 
> a/gcc/testsuite/gcc.dg/analyzer/fd-glibc-byte-stream-connection-server.c 
> b/gcc/testsuite/gcc.dg/analyzer/fd-glibc-byte-stream-connection-server.c
> index fcbcc740170e6..f922a52238f90 100644
> --- a/gcc/testsuite/gcc.dg/analyzer/fd-glibc-byte-stream-connection-server.c
> +++ b/gcc/testsuite/gcc.dg/analyzer/fd-glibc-byte-stream-connection-server.c
> @@ -8,6 +8,9 @@
>  #include 
>  #include 
>  #include 
> +#if __has_include()
> +#include 
> +#endif
>  #include 
>  #include 
>  #include 
>
> --
> Alexandre Oliva, happy hackerhttps://FSFLA.org/blogs/lxo/
>Free Software Activist   GNU Toolchain Engineer
> More tolerance and less prejudice are key for inclusion and diversity
> Excluding neuro-others for not behaving ""normal"" is *not* inclusive


Re: [PATCH] [testsuite] [analyzer] require fork where used

2024-04-16 Thread Richard Biener
On Tue, Apr 16, 2024 at 5:27 AM Alexandre Oliva  wrote:
>
>
> Mark tests that fail due to the lack of fork, as in vxworks kernel
> mode, as requiring fork.
>
> Regstrapped on x86_64-linux-gnu.  Also tested with gcc-13 on arm-,
> aarch64-, x86- and x86_64-vxworks7r2.  Ok to install?
>

OK

> for  gcc/testsuite/ChangeLog
>
> * gcc.dg/analyzer/pipe-glibc.c: Require fork.
> * gcc.dg/analyzer/pipe-manpages.c: Likewise.
> ---
>  gcc/testsuite/gcc.dg/analyzer/pipe-glibc.c|5 +++--
>  gcc/testsuite/gcc.dg/analyzer/pipe-manpages.c |2 ++
>  2 files changed, 5 insertions(+), 2 deletions(-)
>
> diff --git a/gcc/testsuite/gcc.dg/analyzer/pipe-glibc.c 
> b/gcc/testsuite/gcc.dg/analyzer/pipe-glibc.c
> index 60558a870b9d7..fe38ddef3959a 100644
> --- a/gcc/testsuite/gcc.dg/analyzer/pipe-glibc.c
> +++ b/gcc/testsuite/gcc.dg/analyzer/pipe-glibc.c
> @@ -1,6 +1,7 @@
> -/* Example of pipe usage from glibc manual.  */
> -
>  /* { dg-skip-if "" { "avr-*-*" } } */
> +/* { dg-require-fork "" } */
> +
> +/* Example of pipe usage from glibc manual.  */
>
>  #include 
>  #include 
> diff --git a/gcc/testsuite/gcc.dg/analyzer/pipe-manpages.c 
> b/gcc/testsuite/gcc.dg/analyzer/pipe-manpages.c
> index 6b9ae4d260281..ac5805fdba092 100644
> --- a/gcc/testsuite/gcc.dg/analyzer/pipe-manpages.c
> +++ b/gcc/testsuite/gcc.dg/analyzer/pipe-manpages.c
> @@ -1,3 +1,5 @@
> +/* { dg-require-fork "" } */
> +
>  /* Example of "pipe" from release 5.13 of the Linux man-pages project.
>
>  Copyright (C) 2005, 2008, Michael Kerrisk 
>
> --
> Alexandre Oliva, happy hackerhttps://FSFLA.org/blogs/lxo/
>Free Software Activist   GNU Toolchain Engineer
> More tolerance and less prejudice are key for inclusion and diversity
> Excluding neuro-others for not behaving ""normal"" is *not* inclusive


Re: [PATCH] [testsuite] [analyzer] avoid vxworks libc mode_t

2024-04-16 Thread Richard Biener
On Tue, Apr 16, 2024 at 5:25 AM Alexandre Oliva  wrote:
>
>
> Define macro that prevents mode_t from being defined by vxworks'
> headers as well.
>
> Regstrapped on x86_64-linux-gnu.  Also tested with gcc-13 on arm-,
> aarch64-, x86- and x86_64-vxworks7r2.  Ok to install?

OK

>
> for  gcc/testsuite/ChangeLog
>
> * gcc.dg/analyzer/fd-4.c: Define macro to avoid mode_t on
> vxworks.
> ---
>  gcc/testsuite/gcc.dg/analyzer/fd-4.c |1 +
>  1 file changed, 1 insertion(+)
>
> diff --git a/gcc/testsuite/gcc.dg/analyzer/fd-4.c 
> b/gcc/testsuite/gcc.dg/analyzer/fd-4.c
> index 880de3d789607..d104bfdad547f 100644
> --- a/gcc/testsuite/gcc.dg/analyzer/fd-4.c
> +++ b/gcc/testsuite/gcc.dg/analyzer/fd-4.c
> @@ -1,4 +1,5 @@
>  /* { dg-additional-options "-D_MODE_T_DECLARED=1" { target newlib } } */
> +/* { dg-additional-options "-D_DEFINED_mode_t" { target *-*-vxworks* } } */
>  #if defined(_AIX) || defined(__hpux)
>  #define _MODE_T
>  #endif
>
> --
> Alexandre Oliva, happy hackerhttps://FSFLA.org/blogs/lxo/
>Free Software Activist   GNU Toolchain Engineer
> More tolerance and less prejudice are key for inclusion and diversity
> Excluding neuro-others for not behaving ""normal"" is *not* inclusive


Re: [PATCH] [testsuite] [analyzer] skip access-mode: O_ACCMODE on vxworks

2024-04-16 Thread Richard Biener
On Tue, Apr 16, 2024 at 5:26 AM Alexandre Oliva  wrote:
>
>
> O_ACCMODE is not defined on vxworks, and the test is meaningless and
> failing without it, so skip it.
>
> Regstrapped on x86_64-linux-gnu.  Also tested with gcc-13 on arm-,
> aarch64-, x86- and x86_64-vxworks7r2.  Ok to install?

OK

>
> for  gcc/testsuite/ChangeLog
>
> * gcc.dg/analyzer/fd-access-mode-target-headers.c: Skip on
> vxworks as well.
> ---
>  .../analyzer/fd-access-mode-target-headers.c   |3 +--
>  1 file changed, 1 insertion(+), 2 deletions(-)
>
> diff --git a/gcc/testsuite/gcc.dg/analyzer/fd-access-mode-target-headers.c 
> b/gcc/testsuite/gcc.dg/analyzer/fd-access-mode-target-headers.c
> index b57b9fa2279c2..9fc32638a3de4 100644
> --- a/gcc/testsuite/gcc.dg/analyzer/fd-access-mode-target-headers.c
> +++ b/gcc/testsuite/gcc.dg/analyzer/fd-access-mode-target-headers.c
> @@ -1,5 +1,4 @@
> -/* { dg-skip-if "" { powerpc*-*-aix* || newlib } } */
> -/* { dg-skip-if "" { avr-*-* } } */
> +/* { dg-skip-if "" { { powerpc*-*-aix* avr-*-* *-*-vxworks* } || newlib } } 
> */
>
>  #include 
>  #include 
>
> --
> Alexandre Oliva, happy hackerhttps://FSFLA.org/blogs/lxo/
>Free Software Activist   GNU Toolchain Engineer
> More tolerance and less prejudice are key for inclusion and diversity
> Excluding neuro-others for not behaving ""normal"" is *not* inclusive


Re: [PATCH] [testsuite] introduce strndup effective target

2024-04-16 Thread Richard Biener
On Tue, Apr 16, 2024 at 5:23 AM Alexandre Oliva  wrote:
>
>
> A number of tests that call strndup fail on vxworks, where there's no
> strndup.  Some of them already had workarounds to skip the strndup
> parts of the tests on platforms that don't offer it.  I've changed
> them to rely on a strndup effective target instead, and extended the
> logic to other tests that were otherwise skipped entirely.
>
> Regstrapped on x86_64-linux-gnu.  Also tested with gcc-13 on arm-,
> aarch64-, x86- and x86_64-vxworks7r2.  Ok to install?

OK

>
> for  gcc/ChangeLog
>
> * doc/sourcebuild.texi (strndup): Add effective target.
>
> for  gcc/testsuite/ChangeLog
>
> * lib/target-supports.exp (check_effective_target_strndup): New.
> * gcc.dg/builtin-dynamic-object-size-0.c: Skip strndup tests
> when the function is not available.
> * gcc.dg/builtin-dynamic-object-size-1.c: Likewise.
> * gcc.dg/builtin-dynamic-object-size-2.c: Likewise.
> * gcc.dg/builtin-dynamic-object-size-3.c: Likewise.
> * gcc.dg/builtin-dynamic-object-size-4.c: Likewise.
> * gcc.dg/builtin-object-size-1.c: Likewise.
> * gcc.dg/builtin-object-size-2.c: Likewise.
> * gcc.dg/builtin-object-size-3.c: Likewise.
> * gcc.dg/builtin-object-size-4.c: Likewise.
> ---
>  gcc/doc/sourcebuild.texi   |3 +++
>  .../gcc.dg/builtin-dynamic-object-size-0.c |   10 +-
>  gcc/testsuite/gcc.dg/builtin-object-size-1.c   |7 ---
>  gcc/testsuite/gcc.dg/builtin-object-size-2.c   |7 ---
>  gcc/testsuite/gcc.dg/builtin-object-size-3.c   |7 ---
>  gcc/testsuite/gcc.dg/builtin-object-size-4.c   |7 ---
>  gcc/testsuite/lib/target-supports.exp  |   11 +++
>  7 files changed, 39 insertions(+), 13 deletions(-)
>
> diff --git a/gcc/doc/sourcebuild.texi b/gcc/doc/sourcebuild.texi
> index 7c0df90e82236..8e4e59ac44c74 100644
> --- a/gcc/doc/sourcebuild.texi
> +++ b/gcc/doc/sourcebuild.texi
> @@ -2837,6 +2837,9 @@ can be included without error when 
> @option{-mbig-endian} is passed.
>  @item stpcpy
>  Target provides @code{stpcpy} function.
>
> +@item strndup
> +Target provides @code{strndup} function.
> +
>  @item sysconf
>  Target supports @code{sysconf}.
>
> diff --git a/gcc/testsuite/gcc.dg/builtin-dynamic-object-size-0.c 
> b/gcc/testsuite/gcc.dg/builtin-dynamic-object-size-0.c
> index 173e7c755f4c9..d02e37f79d95f 100644
> --- a/gcc/testsuite/gcc.dg/builtin-dynamic-object-size-0.c
> +++ b/gcc/testsuite/gcc.dg/builtin-dynamic-object-size-0.c
> @@ -1,7 +1,7 @@
>  /* { dg-do run } */
>  /* { dg-options "-O2" } */
>  /* { dg-require-effective-target size20plus } */
> -/* { dg-skip-if "no strndup" { hppa*-*-hpux* } } */
> +/* { dg-additional-options "-DSKIP_STRNDUP" { target { ! strndup } } } */
>
>  #include "builtin-object-size-common.h"
>
> @@ -567,6 +567,7 @@ test_strdup (const char *in)
>return sz;
>  }
>
> +#ifndef SKIP_STRNDUP
>  size_t
>  __attribute__ ((noinline))
>  test_strndup (const char *in, size_t bound)
> @@ -577,6 +578,7 @@ test_strndup (const char *in, size_t bound)
>__builtin_free (res);
>return sz;
>  }
> +#endif
>
>  size_t
>  __attribute__ ((noinline))
> @@ -589,6 +591,7 @@ test_strdup_min (const char *in)
>return sz;
>  }
>
> +#ifndef SKIP_STRNDUP
>  size_t
>  __attribute__ ((noinline))
>  test_strndup_min (const char *in, size_t bound)
> @@ -599,6 +602,7 @@ test_strndup_min (const char *in, size_t bound)
>__builtin_free (res);
>return sz;
>  }
> +#endif
>
>  /* Other tests.  */
>
> @@ -788,12 +792,16 @@ main (int argc, char **argv)
>const char *str = "hello world";
>if (test_strdup (str) != __builtin_strlen (str) + 1)
>  FAIL ();
> +#ifndef SKIP_STRNDUP
>if (test_strndup (str, 4) != 5)
>  FAIL ();
> +#endif
>if (test_strdup_min (str) != __builtin_strlen (str) + 1)
>  FAIL ();
> +#ifndef SKIP_STRNDUP
>if (test_strndup_min (str, 4) != 1)
>  FAIL ();
> +#endif
>
>DONE ();
>  }
> diff --git a/gcc/testsuite/gcc.dg/builtin-object-size-1.c 
> b/gcc/testsuite/gcc.dg/builtin-object-size-1.c
> index 4f7d4c0b370f5..d6d13c5ef7a29 100644
> --- a/gcc/testsuite/gcc.dg/builtin-object-size-1.c
> +++ b/gcc/testsuite/gcc.dg/builtin-object-size-1.c
> @@ -1,6 +1,7 @@
>  /* { dg-do run } */
>  /* { dg-options "-O2 -Wno-stringop-overread" } */
>  /* { dg-require-effective-target alloca } */
> +/* { dg-additional-options "-DSKIP_STRNDUP" { target { ! strndup } } } */
>
>  #include "builtin-object-size-common.h"
>
> @@ -621,7 +622,7 @@ test10 (void)
>  }
>  }
>
> -#if !defined(__AVR__) && !defined(__hpux__) /* avr and hpux have no strndup 
> */
> +#ifndef SKIP_STRNDUP
>  /* Tests for strdup/strndup.  */
>  size_t
>  __attribute__ ((noinline))
> @@ -709,7 +710,7 @@ test11 (void)
>  FAIL ();
>free (res);
>  }
> -#endif /* avr */
> +#endif
>
>  int
>  main (void)
> @@ -726,7 +727,7 @@ main (void)
>test8 ();
>

Re: [PATCH] Document that vector_size works with typedefs [PR92880]

2024-04-16 Thread Richard Biener
On Tue, Apr 16, 2024 at 2:26 AM Andrew Pinski  wrote:
>
> This just adds a clause to make it more obvious that the vector_size
> attribute extension works with typedefs.
> Note this whole section needs a rewrite to be a similar format as other
> extensions. But that is for another day.
>
> OK?

OK

>
> gcc/ChangeLog:
>
> PR c/92880
> * doc/extend.texi (Using Vector Instructions): Add that
> the base_types could be a typedef of them.
>
> Signed-off-by: Andrew Pinski 
> ---
>  gcc/doc/extend.texi | 13 +++--
>  1 file changed, 7 insertions(+), 6 deletions(-)
>
> diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
> index 7b54a241a7b..e290265d68d 100644
> --- a/gcc/doc/extend.texi
> +++ b/gcc/doc/extend.texi
> @@ -12901,12 +12901,13 @@ typedef int v4si __attribute__ ((vector_size (16)));
>  @end smallexample
>
>  @noindent
> -The @code{int} type specifies the @dfn{base type}, while the attribute 
> specifies
> -the vector size for the variable, measured in bytes.  For example, the
> -declaration above causes the compiler to set the mode for the @code{v4si}
> -type to be 16 bytes wide and divided into @code{int} sized units.  For
> -a 32-bit @code{int} this means a vector of 4 units of 4 bytes, and the
> -corresponding mode of @code{foo} is @acronym{V4SI}.
> +The @code{int} type specifies the @dfn{base type} (which can be a
> +@code{typedef}), while the attribute specifies the vector size for the
> +variable, measured in bytes. For example, the declaration above causes
> +the compiler to set the mode for the @code{v4si} type to be 16 bytes wide
> +and divided into @code{int} sized units.  For a 32-bit @code{int} this
> +means a vector of 4 units of 4 bytes, and the corresponding mode of
> +@code{foo} is @acronym{V4SI}.
>
>  The @code{vector_size} attribute is only applicable to integral and
>  floating scalars, although arrays, pointers, and function return values
> --
> 2.43.0
>


Re: [PATCH]middle-end: skip vectorization check on ilp32 on vect-early-break_124-pr114403.c

2024-04-16 Thread Richard Biener
On Tue, 16 Apr 2024, Tamar Christina wrote:

> Hi all,
> 
> The testcase seems to fail vectorization on -m32 since the access pattern is
> determined as too complex.  This skips the vectorization check on ilp32 
> systems
> as I couldn't find a better proxy for being able to do strided 64-bit loads 
> and
> I suspect it would fail on all 32-bit targets.

You could try having Val aligned to 64bits in the structure (likely
32bit targets have it not aligned).

> Regtested on x86_64-pc-linux-gnu with -m32 and no issues.
> 
> Ok for master?
> 
> Thanks,
> Tamar
> 
> gcc/testsuite/ChangeLog:
> 
>   PR tree-optimization/114403
>   * gcc.dg/vect/vect-early-break_124-pr114403.c: Skip in ilp32.
> 
> ---
> diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_124-pr114403.c 
> b/gcc/testsuite/gcc.dg/vect/vect-early-break_124-pr114403.c
> index 
> 1751296ab813fe85eaab1f58dc674bac10f6eb7a..db8e00556f116ca81c5a6558ec6ecd3b222ec93d
>  100644
> --- a/gcc/testsuite/gcc.dg/vect/vect-early-break_124-pr114403.c
> +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_124-pr114403.c
> @@ -2,11 +2,11 @@
>  /* { dg-require-effective-target vect_early_break_hw } */
>  /* { dg-require-effective-target vect_long_long } */
>  
> -/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
> +/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" { target { ! ilp32 } 
> } } } */
>  
>  #include "tree-vect.h"
>  
> -typedef unsigned long PV;
> +typedef unsigned long long PV;
>  typedef struct _buff_t {
>  int foo;
>  PV Val;
> 
> 
> 
> 
> 

-- 
Richard Biener 
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)


Re: docs: document early break support and pragma novector

2024-04-16 Thread Richard Biener
On Tue, 16 Apr 2024, Tamar Christina wrote:

> docs: document early break support and pragma novector

OK.

> ---
> diff --git a/htdocs/gcc-14/changes.html b/htdocs/gcc-14/changes.html
> index 
> b4c602a523717c1d64333e44aefb60ba0ed02e7a..aceecb86f17443cfae637e90987427b98c42f6eb
>  100644
> --- a/htdocs/gcc-14/changes.html
> +++ b/htdocs/gcc-14/changes.html
> @@ -200,6 +200,34 @@ a work-in-progress.
>  for indicating parameters that are expected to be null-terminated
>  strings.
>
> +  
> +The vectorizer now supports vectorizing loops which contain any number 
> of early breaks.
> +This means loops such as:
> +
> + int z[100], y[100], x[100];
> + int foo (int n)
> + {
> +   int res = 0;
> +   for (int i = 0; i < n; i++)
> + {
> +y[i] = x[i] * 2;
> +res += x[i] + y[i];
> +
> +if (x[i] > 5)
> +  break;
> +
> +if (z[i] > 5)
> +  break;
> +
> + }
> +   return res;
> + }
> +
> +can now be vectorized on a number of targets.  In this first version any
> +input data sources must either have a statically known size at compile 
> time
> +or the vectorizer must be able to determine based on auxillary 
> information
> +that the accesses are aligned.
> +  
>  
>  
>  New Languages and Language specific improvements
> @@ -231,6 +259,9 @@ a work-in-progress.
>previous options -std=c2x, -std=gnu2x
>and -Wc11-c2x-compat, which are deprecated but remain
>supported.
> +  GCC supports a new pragma pragma GCC novector to
> +  indicate to the vectorizer not to vectorize the loop annotated with the
> +  pragma.
>  
>  
>  C++
> @@ -400,6 +431,9 @@ a work-in-progress.
>warnings are enabled for C++ as well
>The DR 2237 code no longer gives an error, it emits
>a -Wtemplate-id-cdtor warning instead
> +  GCC supports a new pragma pragma GCC novector to
> +  indicate to the vectorizer not to vectorize the loop annotated with the
> +  pragma.
>  
>  
>  Runtime Library (libstdc++)
> 
> 
> 
> 
> 

-- 
Richard Biener 
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)


Re: [PATCH] Guard longjmp in test to not inf loop [PR114720]

2024-04-15 Thread Richard Biener
On Mon, Apr 15, 2024 at 2:35 PM Jørgen Kvalsvik  wrote:
>
> Guard the longjmp to not infinitely loop. The longjmp (jump) function is
> called unconditionally to make test flow simpler, but the jump
> destination would return to a point in main that would call longjmp
> again. The longjmp is really there to exercise the then-branch of
> setjmp, to verify coverage is accurately counted in the presence of
> complex edges.

OK

> PR gcov-profile/114720
>
> gcc/testsuite/ChangeLog:
>
> * gcc.misc-tests/gcov-22.c: Guard longjmp to not loop.
> ---
>  gcc/testsuite/gcc.misc-tests/gcov-22.c | 14 +-
>  1 file changed, 13 insertions(+), 1 deletion(-)
>
> diff --git a/gcc/testsuite/gcc.misc-tests/gcov-22.c 
> b/gcc/testsuite/gcc.misc-tests/gcov-22.c
> index 641791a7223..7ca78467ca3 100644
> --- a/gcc/testsuite/gcc.misc-tests/gcov-22.c
> +++ b/gcc/testsuite/gcc.misc-tests/gcov-22.c
> @@ -87,7 +87,19 @@ setdest ()
>  void
>  jump ()
>  {
> -longjmp (dest, 1);
> +/* Protect the longjmp so it will only be done once.  The whole purpose 
> of
> +   this function is to help test conditions and instrumentation around
> +   setjmp and its complex edges, as both branches should count towards
> +   coverage, even when one is taken through longjmp.  If the jump is not
> +   guarded it can cause an infinite loop as setdest returns to a point in
> +   main before jump (), leading to an infinite loop.  See PR
> +   gcov-profile/114720.  */
> +static int called_once = 0;
> +if (!called_once) /* conditions(suppress) */
> +{
> +   called_once = 1;
> +   longjmp (dest, 1);
> +}
>  }
>
>  int
> --
> 2.30.2
>


Re: [wwwdocs] gcc-14/changes.html (AMD GCN): Mention gfx1036 support

2024-04-15 Thread Richard Biener
On Mon, Apr 15, 2024 at 12:04 PM Tobias Burnus  wrote:
>
> I experimented with some variants to make clearer that each of RDNA2 and
> RNDA3 applies to two card types, but at the end I settled on the
> fewest-word version.
>
> Comments, remarks, suggestions? (To this change or in general?)
>
> Current version: https://gcc.gnu.org/gcc-14/changes.html#amdgcn
>
> Compiler flags, listing the the gfx* cards:
> https://gcc.gnu.org/onlinedocs/gcc/AMD-GCN-Options.html
>
> Tobias
>
> PS: On the compiler side, I am looking forward to a .def file which
> reduces the number of files to change when adding a new gfx* card, given
> that we have doubled the number of entries. [Well, 1 missing but I know
> of one WIP addition.]

I do wonder whether hot-patching the ELF header from the libgomp plugin
with the actual micro-subarch would be possible to make the driver happy.
We do query the device ISA when initializing the device so we should
be able to massage the ELF header of the object in GOMP_OFFLOAD_load_image
at least within some constraints (ideally we'd mark the ELF object as to
be matched with a device in some group).

Richard.


Re: [PATCH] c, v3: Fix ICE with -g and -std=c23 related to incomplete types [PR114361]

2024-04-15 Thread Richard Biener
On Mon, 15 Apr 2024, Jakub Jelinek wrote:

> On Mon, Apr 15, 2024 at 10:05:58AM +0200, Jakub Jelinek wrote:
> > On Mon, Apr 15, 2024 at 10:02:25AM +0200, Richard Biener wrote:
> > > > Though, haven't managed to reproduce it with -O2 -flto -std=c23
> > > > struct S;
> > > > typedef struct S **V[10];
> > > > V **foo (int x) { return 0; }
> > > > struct S { int s; };
> > > > either.
> > > > So, maybe let's drop the ipa-free-lang-data.cc part?
> > > > Seems fld_incomplete_type_of uses fld_type_variant which should
> > > > copy over TYPE_CANONICAL.
> > > 
> > > If you have a testcase that still triggers it would be nice to see it.
> > 
> > I don't, that is why I'm now suggesting to just drop that hunk.
> 
> Actually no, I've just screwed up something in my testing.
> One can reproduce it easily with -O2 -flto 20021205-1.c -std=c23
> if the ipa-free-lang-data.cc hunk is removed.
> This happens when fld_incomplete_type_of is called on a POINTER_TYPE
> to RECORD_TYPE x, where the RECORD_TYPE x is not the TYPE_MAIN_VARIANT,
> but another variant created by set_underlying_type.  The
> c_update_type_canonical didn't touch TYPE_CANONICAL in those, I was too
> afraid I don't know what TYPE_CANONICAL should be for all variant types,
> so that TREE_TYPE (t) had TYPE_CANONICAL NULL.  But when we call
> fld_incomplete_type_of on that TREE_TYPE (t), it sees it isn't
> TYPE_MAIN_VARIANT, so calls
>   return (fld_type_variant
>   (fld_incomplete_type_of (TYPE_MAIN_VARIANT (t), fld), t, fld));
> but TYPE_MAIN_VARIANT (t) has already TYPE_CANONICAL (TYPE_MAIN_VARIANT (t))
> == TYPE_MAIN_VARIANT (t), that one has been completed on finish_struct.
> And so we trigger the assertion, because
> TYPE_CANONICAL (t2) == TYPE_CANONICAL (TREE_TYPE (t))
> is no longer true, the former is non-NULL, the latter is NULL.
> 
> But looking at all the build_variant_type_copy callers and the call itself,
> the call itself sets TYPE_CANONICAL to the TYPE_CANONICAL of the type on
> which it is called and the only caller I can find that changes
> TYPE_CANONICAL sometimes is build_qualified_type.
> So, I'd hope that normally all variant types of an aggregate type (or
> pointer type) have the same TYPE_CANONICAL if they have the same TYPE_QUALS
> and if they have it different, they have TYPE_CANONICAL of
> build_qualified_type of the base TYPE_CANONICAL.

The middle-end assumes that TYPE_CANONICAL of all variant types are
the same, for TBAA purposes it immediately "puns" to
TYPE_CANONICAL (TYPE_MAIN_VARIANT (..)).  It also assumes that
the canonical type is not a variant type.  Note we never "honor"
TYPE_STRUCTURAL_EQUALITY_P on a variant type (because we don't look
at it, we only look at whether the main variant has
TYPE_STRUCTURAL_EQUALITY_P).

Thus, TYPE_CANONICAL of variant types in principle doesn't need to be
set (but not all places might go the extra step looking at the main
variant before accessing TYPE_CANONICAL).

Richard.

> With the following updated patch (ipa-free-lang-data.cc hunk removed,
> c_update_type_canonical function updated, plus removed trailing whitespace
> from tests),
> make check-gcc RUNTESTFLAGS="--target_board=unix/-std=gnu23 
> compile.exp='20021205-1.c 20040214-2.c 20060109-1.c pr113623.c pr46866.c 
> pta-1.c' execute.exp='pr33870-1.c pr33870.c'"
> no longer ICEs (have just expected FAILs on 20040214-2.c which isn't
> compatible with C23) and make check-gcc -j32 doesn't regress compared
> to the unpatched one.
> 
> Is this ok for trunk if it passes full bootstrap/regtest?
> 
> 2024-04-15  Martin Uecker  
>   Jakub Jelinek  
> 
>   PR lto/114574
>   PR c/114361
> gcc/c/
>   * c-decl.cc (shadow_tag_warned): For flag_isoc23 and code not
>   ENUMERAL_TYPE use SET_TYPE_STRUCTURAL_EQUALITY.
>   (parser_xref_tag): Likewise.
>   (start_struct): For flag_isoc23 use SET_TYPE_STRUCTURAL_EQUALITY.
>   (c_update_type_canonical): New function.
>   (finish_struct): Put NULL as second == operand rather than first.
>   Assert TYPE_STRUCTURAL_EQUALITY_P.  Call c_update_type_canonical.
>   * c-typeck.cc (composite_type_internal): Use
>   SET_TYPE_STRUCTURAL_EQUALITY.  Formatting fix.
> gcc/testsuite/
>   * gcc.dg/pr114574-1.c: New test.
>   * gcc.dg/pr114574-2.c: New test.
>   * gcc.dg/pr114361.c: New test.
>   * gcc.dg/c23-tag-incomplete-1.c: New test.
>   * gcc.dg/c23-tag-incomplete-2.c: New test.
> 
> --- gcc/c/c-decl.cc.jj2024-04-09 09:29:04.824520299 +0200
> +++ gcc/c/c-decl.cc   2024-04-15 12:26:43.000790475 +0200
> @@ -5051,6 +5051,8 @@ shadow_tag_warned (const struct c_declsp
>

[PATCH] gcov-profile/114715 - missing coverage for switch

2024-04-15 Thread Richard Biener
The following avoids missing coverage for the line of a switch statement
which happens when gimplification emits a BIND_EXPR wrapping the switch
as that prevents us from setting locations on the containing statements
via annotate_all_with_location.  Instead set the location of the GIMPLE
switch directly.

Bootstrapped and tested on x86_64-unknown-linux-gnu, OK for trunk?

Thanks,
Richard.

PR gcov-profile/114715
* gimplify.cc (gimplify_switch_expr): Set the location of the
GIMPLE switch.

* gcc.misc-tests/gcov-24.c: New testcase.
---
 gcc/gimplify.cc|  1 +
 gcc/testsuite/gcc.misc-tests/gcov-24.c | 30 ++
 2 files changed, 31 insertions(+)
 create mode 100644 gcc/testsuite/gcc.misc-tests/gcov-24.c

diff --git a/gcc/gimplify.cc b/gcc/gimplify.cc
index 3df58b962f3..26e96ada4c7 100644
--- a/gcc/gimplify.cc
+++ b/gcc/gimplify.cc
@@ -3017,6 +3017,7 @@ gimplify_switch_expr (tree *expr_p, gimple_seq *pre_p)
 
   switch_stmt = gimple_build_switch (SWITCH_COND (switch_expr),
 default_case, labels);
+  gimple_set_location (switch_stmt, EXPR_LOCATION (switch_expr));
   /* For the benefit of -Wimplicit-fallthrough, if switch_body_seq
 ends with a GIMPLE_LABEL holding SWITCH_BREAK_LABEL_P LABEL_DECL,
 wrap the GIMPLE_SWITCH up to that GIMPLE_LABEL into a GIMPLE_BIND,
diff --git a/gcc/testsuite/gcc.misc-tests/gcov-24.c 
b/gcc/testsuite/gcc.misc-tests/gcov-24.c
new file mode 100644
index 000..395099bd7ae
--- /dev/null
+++ b/gcc/testsuite/gcc.misc-tests/gcov-24.c
@@ -0,0 +1,30 @@
+/* { dg-options "-fprofile-arcs -ftest-coverage" } */
+/* { dg-do run { target native } } */
+
+int main()
+{
+  int a = 1;
+  int b = 2;
+  int c = -3;
+  switch(a) /* count(1) */
+{
+case 1: /* count(1) */
+c = 3;
+switch(b) { /* count(1) */
+  case 1: /* count(#) */
+  c = 4;
+  break;
+  case 2: /* count(1) */
+  c = 5;
+  break;
+}
+break;
+case 2: /* count(#) */
+c = 6;
+break;
+default: /* count(#) */
+break;
+}
+}
+
+/* { dg-final { run-gcov gcov-24.c } } */
-- 
2.35.3


Re: [PATCH] attribs: Don't crash on NULL TREE_TYPE in diag_attr_exclusions [PR114634]

2024-04-15 Thread Richard Biener
On Mon, 15 Apr 2024, Jakub Jelinek wrote:

> Hi!
> 
> The enumerator still doesn't have TREE_TYPE set but diag_attr_exclusions
> assumes that all decls must have types.
> I think it is better in something as unimportant as diag_attr_exclusions
> to be more robust, if there is no type, it can just diagnose exclusions
> on the DECL_ATTRIBUTES, like for types it only diagnoses it on
> TYPE_ATTRIBUTES.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

OK, but can you add a comment?

Thanks,
Richard.

> 2024-04-15  Jakub Jelinek  
> 
>   PR c++/114634
>   * attribs.cc (diag_attr_exclusions): Set attrs[1] to NULL_TREE for
>   decls with NULL TREE_TYPE.
> 
>   * g++.dg/ext/attrib68.C: New test.
> 
> --- gcc/attribs.cc.jj 2024-02-12 20:44:52.409074876 +0100
> +++ gcc/attribs.cc2024-04-12 18:29:52.000381917 +0200
> @@ -468,7 +468,10 @@ diag_attr_exclusions (tree last_decl, tr
>if (DECL_P (node))
>  {
>attrs[0] = DECL_ATTRIBUTES (node);
> -  attrs[1] = TYPE_ATTRIBUTES (TREE_TYPE (node));
> +  if (TREE_TYPE (node))
> + attrs[1] = TYPE_ATTRIBUTES (TREE_TYPE (node));
> +  else
> + attrs[1] = NULL_TREE;
>  }
>else
>  {
> --- gcc/testsuite/g++.dg/ext/attrib68.C.jj2024-04-12 18:31:38.100968098 
> +0200
> +++ gcc/testsuite/g++.dg/ext/attrib68.C   2024-04-12 18:30:57.011515625 
> +0200
> @@ -0,0 +1,8 @@
> +// PR c++/114634
> +// { dg-do compile }
> +
> +template 
> +struct A
> +{
> +  enum { e __attribute__ ((aligned (16))) }; // { dg-error "alignment may 
> not be specified for 'e'" }
> +};
> 
>   Jakub
> 
> 

-- 
Richard Biener 
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)


Re: [C PATCH, v2] Fix ICE with -g and -std=c23 related to incomplete types [PR114361]

2024-04-15 Thread Richard Biener
On Mon, 15 Apr 2024, Jakub Jelinek wrote:

> On Mon, Apr 15, 2024 at 09:38:29AM +0200, Jakub Jelinek wrote:
> > I had this spot instrumented to log the different cases (before adding the
> > code to fix up also pointer types in c_update_type_canonical) and the only 
> > thing
> > that triggered was that the 2 TYPE_CANONICALs weren't equal if
> > TYPE_STRUCTURAL_EQUALITY_P (TREE_TYPE (t)), the other was just in case.
> > gcc.c-torture/compile/20021205-1.c
> > gcc.c-torture/compile/20040214-2.c
> > gcc.c-torture/compile/20060109-1.c
> > gcc.c-torture/compile/pr113623.c
> > gcc.c-torture/compile/pr46866.c
> > gcc.c-torture/compile/pta-1.c
> > gcc.c-torture/execute/pr33870-1.c
> > gcc.c-torture/execute/pr33870.c
> > gcc.dg/torture/pr57478.c
> > tests were affected in make check-gcc.
> > I thought it would be a clear consequence of the choice we've discussed on
> > IRC, that build_pointer_type_for_mode and other tree.cc functions which
> > lookup/create derived types don't try to fill in TYPE_CANONICAL for
> > types derived from something which initially had TYPE_STRUCTURAL_EQUALITY_P
> > but later changed to non-TYPE_STRUCTURAL_EQUALITY_P.  The patch updates
> > it solely for qualified types/related pointer types, but doesn't do that
> > for array types, pointer to array types, function types, ...
> > So, I think the assertion could still trigger if we have something like
> > -O2 -flto -std=c23
> > struct S;
> > typedef struct S *T;
> > typedef T U[10];
> > typedef U *V;
> > V foo (int x) { return 0; }
> > struct S { int s; };
> > (but doesn't, dunno what I'm missing; though here certainly V and U have
> > TYPE_STRUCTURAL_EQUALITY_P, even T has because it is a typedef, not
> > something actually normally returned by build_pointer_type).
> 
> Though, haven't managed to reproduce it with -O2 -flto -std=c23
> struct S;
> typedef struct S **V[10];
> V **foo (int x) { return 0; }
> struct S { int s; };
> either.
> So, maybe let's drop the ipa-free-lang-data.cc part?
> Seems fld_incomplete_type_of uses fld_type_variant which should
> copy over TYPE_CANONICAL.

If you have a testcase that still triggers it would be nice to see it.

Richard.


Re: [Backport 1/2] tree-profile: Disable indirect call profiling for IFUNC resolvers

2024-04-15 Thread Richard Biener
On Mon, 15 Apr 2024, Richard Biener wrote:

> On Sun, 14 Apr 2024, H.J. Lu wrote:
> 
> > We can't profile indirect calls to IFUNC resolvers nor their callees as
> > it requires TLS which hasn't been set up yet when the dynamic linker is
> > resolving IFUNC symbols.
> > 
> > Add an IFUNC resolver caller marker to cgraph_node and set it if the
> > function is called by an IFUNC resolver.  Disable indirect call profiling
> > for IFUNC resolvers and their callees.
> > 
> > Tested with profiledbootstrap on Fedora 39/x86-64.
> > 
> > gcc/ChangeLog:
> > 
> > PR tree-optimization/114115
> > * cgraph.h (symtab_node): Add check_ifunc_callee_symtab_nodes.
> > (cgraph_node): Add called_by_ifunc_resolver.
> > * cgraphunit.cc (symbol_table::compile): Call
> > symtab_node::check_ifunc_callee_symtab_nodes.
> > * symtab.cc (check_ifunc_resolver): New.
> > (ifunc_ref_map): Likewise.
> > (is_caller_ifunc_resolver): Likewise.
> > (symtab_node::check_ifunc_callee_symtab_nodes): Likewise.
> > * tree-profile.cc (gimple_gen_ic_func_profiler): Disable indirect
> > call profiling for IFUNC resolvers and their callees.
> > 
> > gcc/testsuite/ChangeLog:
> > 
> > PR tree-optimization/114115
> > * gcc.dg/pr114115.c: New test.
> > 
> > (cherry picked from commit cab32bacaea268ec062b1fb4fc662d90c9d1cfce)
> > ---
> >  gcc/cgraph.h|  6 +++
> >  gcc/cgraphunit.cc   |  2 +
> >  gcc/symtab.cc   | 89 +
> >  gcc/testsuite/gcc.dg/pr114115.c | 24 +
> >  gcc/tree-profile.cc |  8 ++-
> >  5 files changed, 128 insertions(+), 1 deletion(-)
> >  create mode 100644 gcc/testsuite/gcc.dg/pr114115.c
> > 
> > diff --git a/gcc/cgraph.h b/gcc/cgraph.h
> > index c1a3691b6f5..430c87d8bb7 100644
> > --- a/gcc/cgraph.h
> > +++ b/gcc/cgraph.h
> > @@ -479,6 +479,9 @@ public:
> >   Return NULL if there's no such node.  */
> >static symtab_node *get_for_asmname (const_tree asmname);
> >  
> > +  /* Check symbol table for callees of IFUNC resolvers.  */
> > +  static void check_ifunc_callee_symtab_nodes (void);
> > +
> >/* Verify symbol table for internal consistency.  */
> >static DEBUG_FUNCTION void verify_symtab_nodes (void);
> >  
> > @@ -896,6 +899,7 @@ struct GTY((tag ("SYMTAB_FUNCTION"))) cgraph_node : 
> > public symtab_node
> >redefined_extern_inline (false), tm_may_enter_irr (false),
> >ipcp_clone (false), declare_variant_alt (false),
> >calls_declare_variant_alt (false), gc_candidate (false),
> > +  called_by_ifunc_resolver (false),
> >m_uid (uid), m_summary_id (-1)
> >{}
> >  
> > @@ -1491,6 +1495,8 @@ struct GTY((tag ("SYMTAB_FUNCTION"))) cgraph_node : 
> > public symtab_node
> >   is set for local SIMD clones when they are created and cleared if the
> >   vectorizer uses them.  */
> >unsigned gc_candidate : 1;
> > +  /* Set if the function is called by an IFUNC resolver.  */
> > +  unsigned called_by_ifunc_resolver : 1;
> >  
> >  private:
> >/* Unique id of the node.  */
> > diff --git a/gcc/cgraphunit.cc b/gcc/cgraphunit.cc
> > index bccd2f2abb5..40dcceccca5 100644
> > --- a/gcc/cgraphunit.cc
> > +++ b/gcc/cgraphunit.cc
> > @@ -2313,6 +2313,8 @@ symbol_table::compile (void)
> >  
> >symtab_node::checking_verify_symtab_nodes ();
> >  
> > +  symtab_node::check_ifunc_callee_symtab_nodes ();
> > +
> >timevar_push (TV_CGRAPHOPT);
> >if (pre_ipa_mem_report)
> >  dump_memory_report ("Memory consumption before IPA");
> > diff --git a/gcc/symtab.cc b/gcc/symtab.cc
> > index 0470509a98d..df09def81e9 100644
> > --- a/gcc/symtab.cc
> > +++ b/gcc/symtab.cc
> > @@ -1369,6 +1369,95 @@ symtab_node::verify (void)
> >timevar_pop (TV_CGRAPH_VERIFY);
> >  }
> >  
> > +/* Return true and set *DATA to true if NODE is an ifunc resolver.  */
> > +
> > +static bool
> > +check_ifunc_resolver (cgraph_node *node, void *data)
> > +{
> > +  if (node->ifunc_resolver)
> > +{
> > +  bool *is_ifunc_resolver = (bool *) data;
> > +  *is_ifunc_resolver = true;
> > +  return true;
> > +}
> > +  return false;
> > +}
> > +
> > +static auto_bitmap ifunc_ref_map;
> 
> Please don't use static auto_bitmap, that isn't constructed
> properly.
> 
>

Re: [Backport 1/2] tree-profile: Disable indirect call profiling for IFUNC resolvers

2024-04-15 Thread Richard Biener
l.  */
> +  if (e->caller == node)
> + continue;
> +
> +  /* Skip if it has been visited.  */
> +  unsigned int uid = e->caller->get_uid ();
> +  if (bitmap_bit_p (ifunc_ref_map, uid))
> + continue;
> +  bitmap_set_bit (ifunc_ref_map, uid);
> +
> +  if (is_caller_ifunc_resolver (e->caller))
> + {
> +   /* Return true if caller is an IFUNC resolver.  */
> +   e->caller->called_by_ifunc_resolver = true;
> +   return true;
> + }
> +
> +  /* Check if caller's alias is an IFUNC resolver.  */
> +  e->caller->call_for_symbol_and_aliases (check_ifunc_resolver,
> +   _ifunc_resolver,
> +   true);
> +  if (is_ifunc_resolver)
> + {
> +   /* Return true if caller's alias is an IFUNC resolver.  */
> +   e->caller->called_by_ifunc_resolver = true;
> +   return true;
> + }
> +}
> +
> +  return false;
> +}
> +
> +/* Check symbol table for callees of IFUNC resolvers.  */
> +
> +void
> +symtab_node::check_ifunc_callee_symtab_nodes (void)
> +{
> +  symtab_node *node;
> +
> +  FOR_EACH_SYMBOL (node)
> +{
> +  cgraph_node *cnode = dyn_cast  (node);
> +  if (!cnode)
> + continue;
> +
> +  unsigned int uid = cnode->get_uid ();
> +  if (bitmap_bit_p (ifunc_ref_map, uid))
> + continue;
> +  bitmap_set_bit (ifunc_ref_map, uid);
> +
> +  bool is_ifunc_resolver = false;
> +  cnode->call_for_symbol_and_aliases (check_ifunc_resolver,
> +   _ifunc_resolver, true);
> +  if (is_ifunc_resolver || is_caller_ifunc_resolver (cnode))
> + cnode->called_by_ifunc_resolver = true;
> +}
> +
> +  bitmap_clear (ifunc_ref_map);
> +}
> +
>  /* Verify symbol table for internal consistency.  */
>  
>  DEBUG_FUNCTION void
> diff --git a/gcc/testsuite/gcc.dg/pr114115.c b/gcc/testsuite/gcc.dg/pr114115.c
> new file mode 100644
> index 000..2629f591877
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/pr114115.c
> @@ -0,0 +1,24 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O0 -fprofile-generate -fdump-tree-optimized" } */
> +/* { dg-require-profiling "-fprofile-generate" } */
> +/* { dg-require-ifunc "" } */
> +
> +void *foo_ifunc2() __attribute__((ifunc("foo_resolver")));
> +
> +void bar(void)
> +{
> +}
> +
> +static int f3()
> +{
> +  bar ();
> +  return 5;
> +}
> +
> +void (*foo_resolver(void))(void)
> +{
> +  f3();
> +  return bar;
> +}
> +
> +/* { dg-final { scan-tree-dump-not "__gcov_indirect_call_profiler_v" 
> "optimized" } } */
> diff --git a/gcc/tree-profile.cc b/gcc/tree-profile.cc
> index da300d5f9e8..b5de0fb914f 100644
> --- a/gcc/tree-profile.cc
> +++ b/gcc/tree-profile.cc
> @@ -418,7 +418,13 @@ gimple_gen_ic_func_profiler (void)
>gcall *stmt1;
>tree tree_uid, cur_func, void0;
>  
> -  if (c_node->only_called_directly_p ())
> +  /* Disable indirect call profiling for an IFUNC resolver and its
> + callees since it requires TLS which hasn't been set up yet when
> + the dynamic linker is resolving IFUNC symbols.  See
> + https://gcc.gnu.org/bugzilla/show_bug.cgi?id=114115
> +   */
> +  if (c_node->only_called_directly_p ()
> +  || c_node->called_by_ifunc_resolver)
>  return;
>  
>gimple_init_gcov_profiler ();
> 

-- 
Richard Biener 
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)


Re: [C PATCH, v2] Fix ICE with -g and -std=c23 related to incomplete types [PR114361]

2024-04-15 Thread Richard Biener
c/testsuite/gcc.dg/pr114361.c b/gcc/testsuite/gcc.dg/pr114361.c
> new file mode 100644
> index 000..0f3feb53566
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/pr114361.c
> @@ -0,0 +1,11 @@
> +/* PR c/114361 */
> +/* { dg-do compile } */
> +/* { dg-options "-std=gnu23 -g" } */
> +
> +void f()
> +{
> +typedef struct foo bar;
> +typedef __typeof( ({ (struct foo { bar *x; }){ }; }) ) wuz;
> +struct foo { wuz *x; };
> +}
> +
> diff --git a/gcc/testsuite/gcc.dg/pr114574-1.c 
> b/gcc/testsuite/gcc.dg/pr114574-1.c
> new file mode 100644
> index 000..060dcdbe73e
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/pr114574-1.c
> @@ -0,0 +1,10 @@
> +/* PR lto/114574
> + * { dg-do compile }
> + * { dg-options "-flto" } */
> +
> +const struct S * x;
> +struct S {};
> +void f(const struct S **);
> +
> +
> +
> diff --git a/gcc/testsuite/gcc.dg/pr114574-2.c 
> b/gcc/testsuite/gcc.dg/pr114574-2.c
> new file mode 100644
> index 000..723291e2211
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/pr114574-2.c
> @@ -0,0 +1,10 @@
> +/* PR lto/114574
> + * { dg-do compile }
> + * { dg-options "-flto -std=c23" } */
> +
> +const struct S * x;
> +struct S {};
> +void f(const struct S **);
> +
> +
> +
> 

-- 
Richard Biener 
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)


Re: [PATCH]middle-end: adjust loop upper bounds when peeling for gaps and early break [PR114403].

2024-04-12 Thread Richard Biener
nfo) ? 1 : 0;
   int bias_for_assumed = bias_for_lowest;
   int alignment_npeels = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
   if (alignment_npeels && LOOP_VINFO_USING_PARTIAL_VECTORS_P 
(loop_vinfo))


> Thanks,
> Tamar
> 
> gcc/ChangeLog:
> 
>   PR tree-optimization/114403
>   * tree-vect-loop.cc (vect_transform_loop): Adjust upper bounds for when
>   peeling for gaps and early break.
> 
> gcc/testsuite/ChangeLog:
> 
>   PR tree-optimization/114403
>   * gcc.dg/vect/vect-early-break_124-pr114403.c: New test.
> 
> ---
> diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_124-pr114403.c 
> b/gcc/testsuite/gcc.dg/vect/vect-early-break_124-pr114403.c
> new file mode 100644
> index 
> ..ae5e53efc45e7bef89c5a72abd6afa48292668db
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_124-pr114403.c
> @@ -0,0 +1,74 @@
> +/* { dg-add-options vect_early_break } */
> +/* { dg-require-effective-target vect_early_break_hw } */
> +/* { dg-require-effective-target vect_long_long } */
> +
> +/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
> +
> +#include "tree-vect.h"
> +
> +typedef unsigned long PV;
> +typedef struct _buff_t {
> +int foo;
> +PV Val;
> +} buff_t;
> +
> +#define NUM 9
> +#define SZ NUM * sizeof (PV)
> +char buffer[SZ];
> +
> +__attribute__ ((noipa))
> +buff_t *copy (buff_t *first, buff_t *last)
> +{
> +  char *buffer_ptr = buffer;
> +  char *const buffer_end = [SZ-1];
> +  int store_size = sizeof(first->Val);
> +  while (first != last && (buffer_ptr + store_size) <= buffer_end)
> +{
> +  const char *value_data = (const char *)(>Val);
> +  __builtin_memcpy(buffer_ptr, value_data, store_size);
> +  buffer_ptr += store_size;
> +  ++first;
> +}
> +
> +  if (first == last)
> +return 0;
> +
> +  return first;
> +}
> +
> +int main ()
> +{
> +  /* Copy an ascii buffer.  We need to trigger the loop to exit from
> + the condition where we have more data to copy but not enough space.
> + For this test that means that OVL must be > SZ.  */
> +#define OVL NUM*2
> +  char str[OVL]="abcdefghiabcdefgh\0";
> +  buff_t tmp[OVL];
> +
> +#pragma GCC novector
> +  for (int i = 0; i < OVL; i++)
> +tmp[i].Val = str[i];
> +
> +  buff_t *start = [0];
> +  buff_t *last = [OVL-1];
> +  buff_t *res = 0;
> +
> +  /* This copy should exit on the early exit, in which case we know
> + that start != last as we had more data to copy but the buffer
> + was full.  */
> +  if (!(res = copy (start, last)))
> +__builtin_abort ();
> +
> +  /* Check if we have the right reduction value.  */
> +  if (res != [NUM-1])
> +__builtin_abort ();
> +
> +  int store_size = sizeof(PV);
> +#pragma GCC novector
> +  for (int i = 0; i < NUM - 1; i+=store_size)
> +if (0 != __builtin_memcmp (buffer+i, (char*)[i].Val, store_size))
> +  __builtin_abort ();
> +
> +  return 0;
> +}
> +
> diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
> index 
> 4375ebdcb493a90fd0501cbb4b07466077b525c3..024a24a305c4727f97eb022247f4dca791c52dfe
>  100644
> --- a/gcc/tree-vect-loop.cc
> +++ b/gcc/tree-vect-loop.cc
> @@ -12144,6 +12144,12 @@ vect_transform_loop (loop_vec_info loop_vinfo, 
> gimple *loop_vectorized_call)
>   -min_epilogue_iters to remove iterations that cannot be performed
> by the vector code.  */
>int bias_for_lowest = 1 - min_epilogue_iters;
> +  /* For an early break we must always assume that the vector loop can be
> + executed partially.  In this definition a partial iteration means that 
> we
> + take an exit before the IV exit.  */
> +  if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
> +bias_for_lowest = 1;
> +
>int bias_for_assumed = bias_for_lowest;
>int alignment_npeels = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
>if (alignment_npeels && LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo))
> 
> 
> 
> 
> 

-- 
Richard Biener 
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)


Re: [PATCH] tree-cfg: Make the verifier returns_twice message translatable

2024-04-12 Thread Richard Biener



> Am 12.04.2024 um 09:58 schrieb Jakub Jelinek :
> 
> Hi!
> 
> While translation of the verifier messages is questionable, that case is
> something that ideally should never happen except to gcc developers
> and so pressumably English should be fine, we use error etc. APIs and
> those imply translatations and some translators translate it.
> The following patch adjusts the code such that we don't emit
> appel returns_twice est not first dans le bloc de base 33
> in French (i.e. 2 English word in the middle of a French message).
> Similarly Swedish or Ukrainian.
> Note, the German translator did differentiate between these verifier
> messages vs. normal user facing and translated it to:
> "Interner Fehler: returns_twice call is %s in basic block %d"
> so just a German prefix before English message.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

Ok

Richard 

> 2024-04-12  Jakub Jelinek  
> 
>* tree-cfg.cc (gimple_verify_flow_info): Make the misplaced
>returns_twice diagnostics translatable.
> 
> --- gcc/tree-cfg.cc.jj2024-04-10 10:19:04.237471564 +0200
> +++ gcc/tree-cfg.cc2024-04-11 17:18:57.962672110 +0200
> @@ -5818,7 +5818,7 @@ gimple_verify_flow_info (void)
>  if (gimple_code (stmt) == GIMPLE_CALL
>  && gimple_call_flags (stmt) & ECF_RETURNS_TWICE)
>{
> -  const char *misplaced = NULL;
> +  bool misplaced = false;
>  /* TM is an exception: it points abnormal edges just after the
> call that starts a transaction, i.e. it must end the BB.  */
>  if (gimple_call_builtin_p (stmt, BUILT_IN_TM_START))
> @@ -5826,18 +5826,23 @@ gimple_verify_flow_info (void)
>  if (single_succ_p (bb)
>  && bb_has_abnormal_pred (single_succ (bb))
>  && !gsi_one_nondebug_before_end_p (gsi))
> -misplaced = "not last";
> +{
> +  error ("returns_twice call is not last in basic block "
> + "%d", bb->index);
> +  misplaced = true;
> +}
>}
>  else
>{
> -  if (seen_nondebug_stmt
> -  && bb_has_abnormal_pred (bb))
> -misplaced = "not first";
> +  if (seen_nondebug_stmt && bb_has_abnormal_pred (bb))
> +{
> +  error ("returns_twice call is not first in basic block "
> + "%d", bb->index);
> +  misplaced = true;
> +}
>}
>  if (misplaced)
>{
> -  error ("returns_twice call is %s in basic block %d",
> - misplaced, bb->index);
>  print_gimple_stmt (stderr, stmt, 0, TDF_SLIM);
>  err = true;
>}
> 
>Jakub
> 


Re: [PATCH] Limit special asan/ubsan/bitint returns_twice handling to calls in bbs with abnormal pred [PR114687]

2024-04-12 Thread Richard Biener



> Am 12.04.2024 um 09:50 schrieb Jakub Jelinek :
> 
> Hi!
> 
> The tree-cfg.cc verifier only diagnoses returns_twice calls preceded
> by non-label/debug stmts if it is in a bb with abnormal predecessor.
> The following testcase shows that if a user lies in the attributes
> (a function which never returns can't be pure, and can't return
> twice when it doesn't ever return at all), when we figure it out,
> we can remove the abnormal edges to the "returns_twice" call and perhaps
> whole .ABNORMAL_DISPATCHER etc.
> edge_before_returns_twice_call then ICEs because it can't find such
> an edge.
> 
> The following patch limits the special handling to calls in bbs where
> the verifier requires that.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

Ok

Richard 

> 2024-04-12  Jakub Jelinek  
> 
>PR sanitizer/114687
>* gimple-iterator.cc (gsi_safe_insert_before): Only use
>edge_before_returns_twice_call if bb_has_abnormal_pred.
>(gsi_safe_insert_seq_before): Likewise.
>* gimple-lower-bitint.cc (bitint_large_huge::lower_call): Only
>push to m_returns_twice_calls if bb_has_abnormal_pred.
> 
>* gcc.dg/asan/pr114687.c: New test.
> 
> --- gcc/gimple-iterator.cc.jj2024-03-14 09:57:09.024966285 +0100
> +++ gcc/gimple-iterator.cc2024-04-11 17:05:06.267081433 +0200
> @@ -1049,7 +1049,8 @@ gsi_safe_insert_before (gimple_stmt_iter
>   gimple *stmt = gsi_stmt (*iter);
>   if (stmt
>   && is_gimple_call (stmt)
> -  && (gimple_call_flags (stmt) & ECF_RETURNS_TWICE) != 0)
> +  && (gimple_call_flags (stmt) & ECF_RETURNS_TWICE) != 0
> +  && bb_has_abnormal_pred (gsi_bb (*iter)))
> {
>   edge e = edge_before_returns_twice_call (gsi_bb (*iter));
>   basic_block new_bb = gsi_insert_on_edge_immediate (e, g);
> @@ -1072,7 +1073,8 @@ gsi_safe_insert_seq_before (gimple_stmt_
>   gimple *stmt = gsi_stmt (*iter);
>   if (stmt
>   && is_gimple_call (stmt)
> -  && (gimple_call_flags (stmt) & ECF_RETURNS_TWICE) != 0)
> +  && (gimple_call_flags (stmt) & ECF_RETURNS_TWICE) != 0
> +  && bb_has_abnormal_pred (gsi_bb (*iter)))
> {
>   edge e = edge_before_returns_twice_call (gsi_bb (*iter));
>   gimple *f = gimple_seq_first_stmt (seq);
> --- gcc/gimple-lower-bitint.cc.jj2024-04-09 09:28:21.261123664 +0200
> +++ gcc/gimple-lower-bitint.cc2024-04-11 17:06:58.033548199 +0200
> @@ -5320,7 +5320,7 @@ bitint_large_huge::lower_call (tree obj,
>  arg = make_ssa_name (TREE_TYPE (arg));
>  gimple *g = gimple_build_assign (arg, v);
>  gsi_insert_before (, g, GSI_SAME_STMT);
> -  if (returns_twice)
> +  if (returns_twice && bb_has_abnormal_pred (gimple_bb (stmt)))
>{
>  m_returns_twice_calls.safe_push (stmt);
>  returns_twice = false;
> --- gcc/testsuite/gcc.dg/asan/pr114687.c.jj2024-04-11 17:09:54.518127165 
> +0200
> +++ gcc/testsuite/gcc.dg/asan/pr114687.c2024-04-11 17:09:22.699563654 
> +0200
> @@ -0,0 +1,22 @@
> +/* PR sanitizer/114687 */
> +/* { dg-do compile } */
> +
> +int a;
> +int foo (int);
> +
> +__attribute__((pure, returns_twice)) int
> +bar (void)
> +{
> +  a = 1;
> +  while (a)
> +a = 2;
> +  return a;
> +}
> +
> +int
> +baz (void)
> +{
> +  int d = bar ();
> +  foo (d);
> +  return 0;
> +}
> 
>Jakub
> 


Re: [PATCH] match: Fix `!a?b:c` and `a?~t:t` patterns for signed 1 bit types [PR114666]

2024-04-12 Thread Richard Biener
On Fri, Apr 12, 2024 at 1:25 AM Andrew Pinski (QUIC)
 wrote:
>
> > -Original Message-
> > From: Richard Biener 
> > Sent: Thursday, April 11, 2024 2:31 AM
> > To: Andrew Pinski (QUIC) 
> > Cc: gcc-patches@gcc.gnu.org
> > Subject: Re: [PATCH] match: Fix `!a?b:c` and `a?~t:t` patterns for signed 1 
> > bit
> > types [PR114666]
> >
> > On Thu, Apr 11, 2024 at 10:43 AM Andrew Pinski
> >  wrote:
> > >
> > > The issue here is that the `a?~t:t` pattern assumed (maybe correctly)
> > > that a here was always going to be a unsigned boolean type. This fixes
> > > the problem in both patterns to cast the operand to boolean type first.
> > >
> > > I should note that VRP seems to be keep on wanting to produce `a ==
> > > 0?1:-2` from `((int)a) ^ 1` is a bit odd and partly is the cause of
> > > the issue and there seems to be some disconnect on what should be the
> > > canonical form. That will be something to look at for GCC 15.
> > >
> > > Bootstrapped and tested on x86_64-linux-gnu with no regressions.
> > >
> > > PR tree-optimization/114666
> > >
> > > gcc/ChangeLog:
> > >
> > > * match.pd (`!a?b:c`): Cast `a` to boolean type for cond for
> > > gimple.
> > > (`a?~t:t`): Cast `a` to boolean type before casting it
> > > to the type.
> > >
> > > gcc/testsuite/ChangeLog:
> > >
> > > * gcc.c-torture/execute/bitfld-signed1-1.c: New test.
> > >
> > > Signed-off-by: Andrew Pinski 
> > > ---
> > >  gcc/match.pd| 10 +++---
> > >  .../gcc.c-torture/execute/bitfld-signed1-1.c| 13 +
> > >  2 files changed, 20 insertions(+), 3 deletions(-)  create mode 100644
> > > gcc/testsuite/gcc.c-torture/execute/bitfld-signed1-1.c
> > >
> > > diff --git a/gcc/match.pd b/gcc/match.pd index
> > > 15a1e7350d4..ffc928b656a 100644
> > > --- a/gcc/match.pd
> > > +++ b/gcc/match.pd
> > > @@ -5895,7 +5895,11 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> > >   /* !A ? B : C -> A ? C : B.  */
> > >   (simplify
> > >(cnd (logical_inverted_value truth_valued_p@0) @1 @2)
> > > -  (cnd @0 @2 @1)))
> > > +  /* For gimple, make sure the operand to COND is a boolean type,
> > > + truth_valued_p will match 1bit integers too. */  (if (GIMPLE &&
> > > + cnd == COND_EXPR)
> > > +   (cnd (convert:boolean_type_node @0) @2 @1)
> > > +   (cnd @0 @2 @1
> >
> > This looks "wrong" for GENERIC still?
>
> I tired without the GIMPLE check and ran into the testcase 
> gcc.dg/torture/builtins-isinf-sign-1.c failing. Because the extra convert was 
> blocking seeing both sides of an equal was the same (I didn't look into it 
> further than that). So I decided to limit it to GIMPLE only.
>
> > But this is not really part of the fix but deciding we should not have
> > signed:1 as
> > cond operand?  I'll note that truth_valued_p allows signed:1.
> >
> > Maybe as minimal surgery add a TYPE_UNSIGNED (TREE_TPE (@0)) check here
> > instead?
>
> That might work, let me try.
>
> >
> > >  /* abs/negative simplifications moved from
> > fold_cond_expr_with_comparison.
> > >
> > > @@ -7099,8 +7103,8 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> > > && (!wascmp || TYPE_PRECISION (type) == 1))
> > > (if ((!TYPE_UNSIGNED (type) && TREE_CODE (type) == BOOLEAN_TYPE)
> > > || TYPE_PRECISION (type) == 1)
> > > -(bit_xor (convert:type @0) @2)
> > > -(bit_xor (negate (convert:type @0)) @2)
> > > +(bit_xor (convert:type (convert:boolean_type_node @0)) @2)
> > > +(bit_xor (negate (convert:type (convert:boolean_type_node @0)))
> > > + @2)
> > >  #endif
> >
> > This looks OK, but then testing TYPE_UNSIGNED (TREE_TYPE (@0)) might be
> > better?
> >
>
> Let me do that just like the other pattern.
>
> > Does this all just go downhill from what VRP creates?  That is, would IL
> > checking have had a chance detecting it if we say signed:1 are not valid as
> > condition?
>
> Yes. So what VRP produces in the testcase is:
> `_2 == 0 ? 1 : -2u` (where _2 is the signed 1bit integer).
> Now maybe the COND_EXPR should be the canonical form for constants (but that 
> is for a different patch I think, I added it to the list of things I should 
> look into for GCC 15).

Ah OK, so th

Re: [PATCH v2] match: Fix `!a?b:c` and `a?~t:t` patterns for signed 1 bit types [PR114666]

2024-04-12 Thread Richard Biener
On Fri, Apr 12, 2024 at 6:53 AM Andrew Pinski  wrote:
>
> The problem is `!a?b:c` pattern will create a COND_EXPR with an 1bit signed 
> integer
> which breaks patterns like `a?~t:t`. This rejects when we have a signed 
> operand for
> both patterns.
>
> Note for GCC 15, I am going to look at the canonicalization of `a?~t:t` where 
> t
> was a constant since I think keeping it a COND_EXPR might be more canonical 
> and
> is what VPR produces from the same IR; if anything expand should handle which 
> one
> is better.
>
> Bootstrapped and tested on x86_64-linux-gnu with no regressions.

OK.

> PR tree-optimization/114666
>
> gcc/ChangeLog:
>
> * match.pd (`!a?b:c`): Reject signed types for the condition.
> (`a?~t:t`): Likewise.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.c-torture/execute/bitfld-signed1-1.c: New test.
>
> Signed-off-by: Andrew Pinski 
> ---
>  gcc/match.pd|  6 +-
>  .../gcc.c-torture/execute/bitfld-signed1-1.c| 13 +
>  2 files changed, 18 insertions(+), 1 deletion(-)
>  create mode 100644 gcc/testsuite/gcc.c-torture/execute/bitfld-signed1-1.c
>
> diff --git a/gcc/match.pd b/gcc/match.pd
> index 15a1e7350d4..d401e7503e6 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -5895,7 +5895,10 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>   /* !A ? B : C -> A ? C : B.  */
>   (simplify
>(cnd (logical_inverted_value truth_valued_p@0) @1 @2)
> -  (cnd @0 @2 @1)))
> +  /* For CONDs, don't handle signed values here. */
> +  (if (cnd == VEC_COND_EXPR
> +   || TYPE_UNSIGNED (TREE_TYPE (@0)))
> +   (cnd @0 @2 @1
>
>  /* abs/negative simplifications moved from fold_cond_expr_with_comparison.
>
> @@ -7095,6 +7098,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>   (cond @0 @1 @2)
>   (with { bool wascmp; }
>(if (INTEGRAL_TYPE_P (type)
> +   && TYPE_UNSIGNED (TREE_TYPE (@0))
> && bitwise_inverted_equal_p (@1, @2, wascmp)
> && (!wascmp || TYPE_PRECISION (type) == 1))
> (if ((!TYPE_UNSIGNED (type) && TREE_CODE (type) == BOOLEAN_TYPE)
> diff --git a/gcc/testsuite/gcc.c-torture/execute/bitfld-signed1-1.c 
> b/gcc/testsuite/gcc.c-torture/execute/bitfld-signed1-1.c
> new file mode 100644
> index 000..b0ff120ea51
> --- /dev/null
> +++ b/gcc/testsuite/gcc.c-torture/execute/bitfld-signed1-1.c
> @@ -0,0 +1,13 @@
> +/* PR tree-optimization/114666 */
> +/* We used to miscompile this to be always aborting
> +   due to the use of the signed 1bit into the COND_EXPR. */
> +
> +struct {
> +  signed a : 1;
> +} b = {-1};
> +char c;
> +int main()
> +{
> +  if ((b.a ^ 1UL) < 3)
> +__builtin_abort();
> +}
> --
> 2.43.0
>


Re: [PATCH, OpenACC 2.7] Connect readonly modifier to points-to analysis

2024-04-12 Thread Richard Biener
On Thu, 11 Apr 2024, Thomas Schwinge wrote:

> Hi Chung-Lin, Richard!
> 
> From me just a few mechanical pieces, see below.  Richard, are you able
> to again comment on Chung-Lin's general strategy, as I'm not at all
> familiar with those parts of the code?

I've queued all stage1 material and will be only able to slowly look
at it after we released.

> On 2024-04-03T19:50:55+0800, Chung-Lin Tang  
> wrote:
> > On 2023/10/30 8:46 PM, Richard Biener wrote:
> >>>
> >>> What Chung-Lin's first patch does is mark the OMP clause for 'x' (not the
> >>> 'x' decl itself!) as 'readonly', via a new 'OMP_CLAUSE_MAP_READONLY'
> >>> flag.
> >>>
> >>> The actual optimization then is done in this second patch.  Chung-Lin
> >>> found that he could use 'SSA_NAME_POINTS_TO_READONLY_MEMORY' for that.
> >>> I don't have much experience with most of the following generic code, so
> >>> would appreciate a helping hand, whether that conceptually makes sense as
> >>> well as from the implementation point of view:
> >
> > First of all, I have removed all of the gimplify-stage scanning and setting 
> > of
> > DECL_POINTS_TO_READONLY and SSA_NAME_POINTS_TO_READONLY_MEMORY (so no 
> > changes to
> > gimplify.cc now)
> >
> > I remember this code was an artifact of earlier attempts to allow 
> > struct-member
> > pointer mappings to also work (e.g. map(readonly:rec.ptr[:N])), but failed 
> > anyways.
> > I think the omp_data_* member accesses when building child function side
> > receiver_refs is blocking points-to analysis from working (didn't try 
> > digging deeper)
> >
> > Also during gimplify, VAR_DECLs appeared to be reused (at least in some 
> > cases) for map
> > clause decl reference building, so hoping that the variables "happen to be" 
> > single-use and
> > DECL_POINTS_TO_READONLY relaying into SSA_NAME_POINTS_TO_READONLY_MEMORY 
> > does appear to be
> > a little risky.
> >
> > However, for firstprivate pointers processed during omp-low, it appears to 
> > be somewhat different.
> > (see below description)
> >
> >> No, I don't think you can use that flag on non-default-defs, nor
> >> preserve it on copying.  So
> >> it also doesn't nicely extend to DECLs as done by the patch.  We
> >> currently _only_ use it
> >> for incoming parameters.  When used on arbitrary code you can get to for 
> >> example
> >> 
> >> ptr1(points-to-readony-memory) = >x;
> >> ... access via ptr1 ...
> >> ptr2 = >x;
> >> ... access via ptr2 ...
> >> 
> >> where both are your OMP regions differently constrained (the constrain is 
> >> on the
> >> code in the region, _not_ on the actual protections of the pointed to
> >> data, much like
> >> for the fortran case).  But now CSE comes along and happily replaces all 
> >> ptr2
> >> with ptr2 in the second region and ... oops!
> >
> > Richard, I assume what you meant was "happily replaces all ptr2 with ptr1 
> > in the second region"?
> >
> > That doesn't happen, because during omp-lower/expand, OMP target regions 
> > (which is all that
> > this applies currently) is separated into different individual child 
> > functions.
> >
> > (Currently, the only "effective" use of DECL_POINTS_TO_READONLY is during 
> > omp-lower, when
> > for firstprivate pointers (i.e. 'a' here) we set this bit when constructing 
> > the first load
> > of this pointer)
> >
> >   #pragma acc parallel copyin(readonly: a[:32]) copyout(r)
> >   {
> > foo (a, a[8]);
> > r = a[8];
> >   }
> >   #pragma acc parallel copyin(readonly: a[:32]) copyout(r)
> >   {
> > foo (a, a[12]);
> > r = a[12];
> >   }
> >
> > After omp-expand (before SSA):
> >
> > __attribute__((oacc parallel, omp target entrypoint, noclone))
> > void main._omp_fn.1 (const struct .omp_data_t.3 & restrict .omp_data_i)
> > {
> >  ...
> >:
> >   D.2962 = .omp_data_i->D.2947;
> >   a.8 = D.2962;
> >   r.1 = (*a.8)[12];
> >   foo (a.8, r.1);
> >   r.1 = (*a.8)[12];
> >   D.2965 = .omp_data_i->r;
> >   *D.2965 = r.1;
> >   return;
> > }
> >
> > __attribute__((oacc parallel, omp target entrypoint, noclone))
> > void main._omp_fn.0 (const struct .omp_data_t.2 & restrict .omp_data_i)
> > {
> >   ...
> >:
> >   D.2968 = .omp_data_i->D.2939;

Re: [r14-9912 Regression] FAIL: gcc.dg/guality/pr54693-2.c -O2 -flto -fuse-linker-plugin -fno-fat-lto-objects -DPREVENT_OPTIMIZATION line 21 z == 30 - 3 * i on Linux/x86_64

2024-04-12 Thread Richard Biener
On Fri, 12 Apr 2024, haochen.jiang wrote:

> On Linux/x86_64,
> 
> c7e8a8d814229fd6fc4c16c2452f15dddc613479 is the first bad commit
> commit c7e8a8d814229fd6fc4c16c2452f15dddc613479
> Author: Richard Biener 
> Date:   Thu Apr 11 11:08:07 2024 +0200
> 
> tree-optimization/109596 - wrong debug stmt move by copyheader
> 
> caused
> 
> FAIL: gcc.dg/guality/pr43051-1.c   -O3 -fomit-frame-pointer -funroll-loops 
> -fpeel-loops -ftracer -finline-functions  -DPREVENT_OPTIMIZATION  line 34 c 
> == [0]
> FAIL: gcc.dg/guality/pr43051-1.c   -O3 -fomit-frame-pointer -funroll-loops 
> -fpeel-loops -ftracer -finline-functions  -DPREVENT_OPTIMIZATION  line 39 c 
> == [0]
> FAIL: gcc.dg/guality/pr54693-2.c   -O2 -flto -fuse-linker-plugin 
> -fno-fat-lto-objects  -DPREVENT_OPTIMIZATION line 21 x == 10 - i
> FAIL: gcc.dg/guality/pr54693-2.c   -O2 -flto -fuse-linker-plugin 
> -fno-fat-lto-objects  -DPREVENT_OPTIMIZATION line 21 y == 20 - 2 * i
> FAIL: gcc.dg/guality/pr54693-2.c   -O2 -flto -fuse-linker-plugin 
> -fno-fat-lto-objects  -DPREVENT_OPTIMIZATION line 21 z == 30 - 3 * i

Just FYI these are the FAILs as they were present before the regression
this change fixed.


Re: Combine patch ping

2024-04-11 Thread Richard Biener



> Am 11.04.2024 um 16:03 schrieb Segher Boessenkool 
> :
> 
> On Wed, Apr 10, 2024 at 08:32:39PM +0200, Uros Bizjak wrote:
>>> On Wed, Apr 10, 2024 at 7:56 PM Segher Boessenkool
>>>  wrote:
>>> This is never okay.  You cannot commit a patch without approval, *ever*.
> 
> This is the biggest issue, to start with.  It is fundamental.

I have approved the patch as you might have noticed.

Richard 

>>> That patch is also obvious -- obviously *wrong*, that is.  There are
>>> big assumptions everywhere in the compiler how a CC reg can be used.
>>> This violates that, as explained elsewhere.
>> 
>> Can you please elaborate what is wrong with this concrete patch.
> 
> The explanation of the patch is contradictory to how RTL works at all,
> so it is just wrong.  It might even do something sane, but I didn't get
> that far at all!
> 
> Write good email explanations, and a good proposed commit message.
> Please.  It is the only one people can judge a patch.  Well, apart
> from doing everything myself from first principles, ignoring everything
> you said, just looking at the patch itself, but that is a hundred times
> more work.  I don't do that.
> 
>> The
>> part that the patch touches has several wrong assumptions, and the
>> fixed "???" comment just emphasizes that. I don't see what is wrong
>> with:
>> 
>> (define_insn "@pushfl2"
>>  [(set (match_operand:W 0 "push_operand" "=<")
>>(unspec:W [(match_operand 1 "flags_reg_operand")]
>>  UNSPEC_PUSHFL))]
>>  "GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_CC"
>>  "pushf{}"
>>  [(set_attr "type" "push")
>>   (set_attr "mode" "")])
> 
> What does it even mean?  What is a flags:CC?  You always always always
> need to say what is *in* the flags, if you want to use it as input
> (which is what unspec does).  CC is weird like this.  Most targets do
> not have distinct physical flags for every condition, only a few
> conditions are "alive" at any point in the program!
> 
>> it is just a push of the flags reg to the stack. If the push can't be
>> described in this way, then it is the middle end at fault, we can't
>> just change modes at will.
> 
> But that is not what this describes: it operates on the flags register
> in some unspecified way, and pushes the result of *that* to the stack.
> 
> (Stack pointer modification is not described here btw, should it be?  Is
> that magically implemented by the backend some way, via type=push
> perhaps?)
> 
> 
> Segher


Re: [PATCH] match: Fix `!a?b:c` and `a?~t:t` patterns for signed 1 bit types [PR114666]

2024-04-11 Thread Richard Biener
On Thu, Apr 11, 2024 at 10:43 AM Andrew Pinski  wrote:
>
> The issue here is that the `a?~t:t` pattern assumed (maybe correctly) that a
> here was always going to be a unsigned boolean type. This fixes the problem
> in both patterns to cast the operand to boolean type first.
>
> I should note that VRP seems to be keep on wanting to produce `a == 0?1:-2`
> from `((int)a) ^ 1` is a bit odd and partly is the cause of the issue and 
> there
> seems to be some disconnect on what should be the canonical form. That will be
> something to look at for GCC 15.
>
> Bootstrapped and tested on x86_64-linux-gnu with no regressions.
>
> PR tree-optimization/114666
>
> gcc/ChangeLog:
>
> * match.pd (`!a?b:c`): Cast `a` to boolean type for cond for
> gimple.
> (`a?~t:t`): Cast `a` to boolean type before casting it
> to the type.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.c-torture/execute/bitfld-signed1-1.c: New test.
>
> Signed-off-by: Andrew Pinski 
> ---
>  gcc/match.pd| 10 +++---
>  .../gcc.c-torture/execute/bitfld-signed1-1.c| 13 +
>  2 files changed, 20 insertions(+), 3 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.c-torture/execute/bitfld-signed1-1.c
>
> diff --git a/gcc/match.pd b/gcc/match.pd
> index 15a1e7350d4..ffc928b656a 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -5895,7 +5895,11 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>   /* !A ? B : C -> A ? C : B.  */
>   (simplify
>(cnd (logical_inverted_value truth_valued_p@0) @1 @2)
> -  (cnd @0 @2 @1)))
> +  /* For gimple, make sure the operand to COND is a boolean type,
> + truth_valued_p will match 1bit integers too. */
> +  (if (GIMPLE && cnd == COND_EXPR)
> +   (cnd (convert:boolean_type_node @0) @2 @1)
> +   (cnd @0 @2 @1

This looks "wrong" for GENERIC still?

But this is not really part of the fix but deciding we should not have
signed:1 as
cond operand?  I'll note that truth_valued_p allows signed:1.

Maybe as minimal surgery add a TYPE_UNSIGNED (TREE_TPE (@0)) check here
instead?

>  /* abs/negative simplifications moved from fold_cond_expr_with_comparison.
>
> @@ -7099,8 +7103,8 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> && (!wascmp || TYPE_PRECISION (type) == 1))
> (if ((!TYPE_UNSIGNED (type) && TREE_CODE (type) == BOOLEAN_TYPE)
> || TYPE_PRECISION (type) == 1)
> -(bit_xor (convert:type @0) @2)
> -(bit_xor (negate (convert:type @0)) @2)
> +(bit_xor (convert:type (convert:boolean_type_node @0)) @2)
> +(bit_xor (negate (convert:type (convert:boolean_type_node @0))) @2)
>  #endif

This looks OK, but then testing TYPE_UNSIGNED (TREE_TYPE (@0)) might be
better?

Does this all just go downhill from what VRP creates?  That is, would
IL checking
have had a chance detecting it if we say signed:1 are not valid as condition?

That said, the latter pattern definitely needs guarding/adjustment, I'm not sure
the former is wrong?  Semantically [VEC_]COND_EXPR is op0 != 0 ? ... : ...

Richard.

>  /* Simplify pointer equality compares using PTA.  */
> diff --git a/gcc/testsuite/gcc.c-torture/execute/bitfld-signed1-1.c 
> b/gcc/testsuite/gcc.c-torture/execute/bitfld-signed1-1.c
> new file mode 100644
> index 000..b0ff120ea51
> --- /dev/null
> +++ b/gcc/testsuite/gcc.c-torture/execute/bitfld-signed1-1.c
> @@ -0,0 +1,13 @@
> +/* PR tree-optimization/114666 */
> +/* We used to miscompile this to be always aborting
> +   due to the use of the signed 1bit into the COND_EXPR. */
> +
> +struct {
> +  signed a : 1;
> +} b = {-1};
> +char c;
> +int main()
> +{
> +  if ((b.a ^ 1UL) < 3)
> +__builtin_abort();
> +}
> --
> 2.43.0
>


[PATCH] tree-optimization/109596 - wrong debug stmt move by copyheader

2024-04-11 Thread Richard Biener
The following fixes an omission in r14-162-gcda246f8b421ba causing
wrong-debug and a bunch of guality regressions.

Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.

PR tree-optimization/109596
* tree-ssa-loop-ch.cc (ch_base::copy_headers): Propagate
debug stmts to nonexit->dest rather than exit->dest.
---
 gcc/tree-ssa-loop-ch.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/tree-ssa-loop-ch.cc b/gcc/tree-ssa-loop-ch.cc
index 1f0033be4c4..b7ef485c4cc 100644
--- a/gcc/tree-ssa-loop-ch.cc
+++ b/gcc/tree-ssa-loop-ch.cc
@@ -957,7 +957,7 @@ ch_base::copy_headers (function *fun)
 
   edge entry = loop_preheader_edge (loop);
 
-  propagate_threaded_block_debug_into (exit->dest, entry->dest);
+  propagate_threaded_block_debug_into (nonexit->dest, entry->dest);
   if (!gimple_duplicate_seme_region (entry, exit, bbs, n_bbs, copied_bbs,
 true))
{
-- 
2.35.3


[PATCH] middle-end/114681 - condition coverage and inlining

2024-04-11 Thread Richard Biener
When inlining a gcond it can map to multiple stmts, esp. with
non-call EH.  The following makes sure to pick up the remapped
condition when dealing with condition coverage.

Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.

PR middle-end/114681
* tree-inline.cc (copy_bb): Key on the remapped stmt
to identify gconds to have condition coverage data remapped.

* gcc.misc-tests/gcov-pr114681.c: New testcase.
---
 gcc/testsuite/gcc.misc-tests/gcov-pr114681.c | 18 ++
 gcc/tree-inline.cc   |  2 +-
 2 files changed, 19 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.misc-tests/gcov-pr114681.c

diff --git a/gcc/testsuite/gcc.misc-tests/gcov-pr114681.c 
b/gcc/testsuite/gcc.misc-tests/gcov-pr114681.c
new file mode 100644
index 000..a8dc666a452
--- /dev/null
+++ b/gcc/testsuite/gcc.misc-tests/gcov-pr114681.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-O -fnon-call-exceptions -fno-exceptions 
-fcondition-coverage" } */
+
+float f, g;
+
+static void
+bar ()
+{
+  if (g < f)
+for (;;)
+  ;
+}
+
+void
+foo ()
+{
+  bar ();
+}
diff --git a/gcc/tree-inline.cc b/gcc/tree-inline.cc
index 5f852885e7f..238afb7de80 100644
--- a/gcc/tree-inline.cc
+++ b/gcc/tree-inline.cc
@@ -2090,7 +2090,7 @@ copy_bb (copy_body_data *id, basic_block bb,
  /* If -fcondition-coverage is used, register the inlined conditions
 in the cond->expression mapping of the caller.  The expression tag
 is shifted conditions from the two bodies are not mixed.  */
- if (id->src_cfun->cond_uids && is_a  (orig_stmt))
+ if (id->src_cfun->cond_uids && is_a  (stmt))
{
  gcond *orig_cond = as_a  (orig_stmt);
  gcond *cond = as_a  (stmt);
-- 
2.35.3


Re: [PATCH] asan, v3: Fix up handling of > 32 byte aligned variables with -fsanitize=address -fstack-protector* [PR110027]

2024-04-11 Thread Richard Biener
  prev_offset = frame_offset.to_constant ();
> }
> to -ASAN_RED_ZONE_SIZE.  The asan_emit_stack_protection code wasn't
> taking this into account though, so essentially assumed in the
> __asan_stack_malloc_N allocated memory it needs to align it such that
> pointer corresponding to offsets[0] is alignb aligned.  But that isn't
> correct if alignb > ASAN_RED_ZONE_SIZE, in that case it needs to ensure that
> pointer corresponding to frame offset 0 is alignb aligned.
> 
> The following patch fixes that.  Unlike the previous case where
> we knew that asan_frame_size + base_align_bias falls into the same bucket
> as asan_frame_size, this isn't in some cases true anymore, so the patch
> recomputes which bucket to use and if going to bucket 11 (because there is
> no __asan_stack_malloc_11 function in the library) disables the after return
> sanitization.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

LGTM.

Thanks,
Richard.

> 2024-04-11  Jakub Jelinek  
> 
>   PR middle-end/110027
>   * asan.cc (asan_emit_stack_protection): Assert offsets[0] is
>   zero if there is no stack protect guard, otherwise
>   -ASAN_RED_ZONE_SIZE.  If alignb > ASAN_RED_ZONE_SIZE and there is
>   stack pointer guard, take the ASAN_RED_ZONE_SIZE bytes allocated at
>   the top of the stack into account when computing base_align_bias.
>   Recompute use_after_return_class from asan_frame_size + base_align_bias
>   and set to -1 if that would overflow to 11.
> 
>   * gcc.dg/asan/pr110027.c: New test.
> 
> --- gcc/asan.cc.jj2024-04-10 09:54:39.661231059 +0200
> +++ gcc/asan.cc   2024-04-10 12:12:11.337978004 +0200
> @@ -1911,19 +1911,39 @@ asan_emit_stack_protection (rtx base, rt
>  }
>str_cst = asan_pp_string (_pp);
>  
> +  gcc_checking_assert (offsets[0] == (crtl->stack_protect_guard
> +   ? -ASAN_RED_ZONE_SIZE : 0));
>/* Emit the prologue sequence.  */
>if (asan_frame_size > 32 && asan_frame_size <= 65536 && pbase
>&& param_asan_use_after_return)
>  {
> +  HOST_WIDE_INT adjusted_frame_size = asan_frame_size;
> +  /* The stack protector guard is allocated at the top of the frame
> +  and cfgexpand.cc then uses align_frame_offset (ASAN_RED_ZONE_SIZE);
> +  while in that case we can still use asan_frame_size, we need to take
> +  that into account when computing base_align_bias.  */
> +  if (alignb > ASAN_RED_ZONE_SIZE && crtl->stack_protect_guard)
> + adjusted_frame_size += ASAN_RED_ZONE_SIZE;
>use_after_return_class = floor_log2 (asan_frame_size - 1) - 5;
>/* __asan_stack_malloc_N guarantees alignment
>N < 6 ? (64 << N) : 4096 bytes.  */
>if (alignb > (use_after_return_class < 6
>   ? (64U << use_after_return_class) : 4096U))
>   use_after_return_class = -1;
> -  else if (alignb > ASAN_RED_ZONE_SIZE && (asan_frame_size & (alignb - 
> 1)))
> - base_align_bias = ((asan_frame_size + alignb - 1)
> -& ~(alignb - HOST_WIDE_INT_1)) - asan_frame_size;
> +  else if (alignb > ASAN_RED_ZONE_SIZE
> +&& (adjusted_frame_size & (alignb - 1)))
> + {
> +   base_align_bias
> + = ((adjusted_frame_size + alignb - 1)
> +& ~(alignb - HOST_WIDE_INT_1)) - adjusted_frame_size;
> +   use_after_return_class
> + = floor_log2 (asan_frame_size + base_align_bias - 1) - 5;
> +   if (use_after_return_class > 10)
> + {
> +   base_align_bias = 0;
> +   use_after_return_class = -1;
> + }
> + }
>  }
>  
>/* Align base if target is STRICT_ALIGNMENT.  */
> --- gcc/testsuite/gcc.dg/asan/pr110027.c.jj   2024-04-10 12:01:19.939768472 
> +0200
> +++ gcc/testsuite/gcc.dg/asan/pr110027.c  2024-04-10 12:11:52.728229147 
> +0200
> @@ -0,0 +1,50 @@
> +/* PR middle-end/110027 */
> +/* { dg-do run } */
> +/* { dg-additional-options "-fstack-protector-strong" { target 
> fstack_protector } } */
> +/* { dg-set-target-env-var ASAN_OPTIONS "detect_stack_use_after_return=1" } 
> */
> +
> +struct __attribute__((aligned (128))) S { char s[128]; };
> +struct __attribute__((aligned (64))) T { char s[192]; };
> +struct __attribute__((aligned (32))) U { char s[256]; };
> +struct __attribute__((aligned (64))) V { char s[320]; };
> +struct __attribute__((aligned (128))) W { char s[512]; };
> +
> +__attribute__((noipa)) void
> +foo (void *p, void *q, void *r, void *s)
> +{
> +  if (((__UINTPTR_TYPE__) p & 31) != 0
> +  || ((__UINTPTR_TYPE__) q & 127) != 0
> +  || ((__UINTPTR_TYPE__) r & 63) != 0)
> +__builtin_abort ();
> +  (void *) s;
> +}
> +
> +__attribute__((noipa)) int
> +bar (void)
> +{
> +  struct U u;
> +  struct S s;
> +  struct T t;
> +  char p[4];
> +  foo (, , , );
> +  return 42;
> +}
> +
> +__attribute__((noipa)) int
> +baz (void)
> +{
> +  struct W w;
> +  struct U u;
> +  struct V v;
> +  char p[4];
> +  foo (, , , );
> +  return 42;
> +}
> +
> +int
> +main ()
> +{
> +  bar ();
> +  baz ();
> +  return 0;
> +}
> 
> 
>   Jakub
> 
> 

-- 
Richard Biener 
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)


Re: [PATCH] s390: testsuite: Fix loop-interchange-16.c

2024-04-11 Thread Richard Biener
On Thu, Apr 11, 2024 at 9:02 AM Stefan Schulze Frielinghaus
 wrote:
>
> Revert parameter max-completely-peel-times to 16, otherwise, the
> innermost loop is removed and we are left with no loop interchange which
> this test is all about.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.dg/tree-ssa/loop-interchange-16.c: Revert parameter
> max-completely-peel-times for s390.
> ---
>  Ok for mainline?

Can you check whether placing

#pragma GCC unroll 0

before the innermost loop works as well?  That'd be more to the point.

OK if that works.

thanks,
Richard.

>  gcc/testsuite/gcc.dg/tree-ssa/loop-interchange-16.c | 1 +
>  1 file changed, 1 insertion(+)
>
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/loop-interchange-16.c 
> b/gcc/testsuite/gcc.dg/tree-ssa/loop-interchange-16.c
> index 781555e085d..2530ec84bc0 100644
> --- a/gcc/testsuite/gcc.dg/tree-ssa/loop-interchange-16.c
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/loop-interchange-16.c
> @@ -1,6 +1,7 @@
>  /* PR/101280 */
>  /* { dg-do compile } */
>  /* { dg-options "-O3 -fdump-tree-linterchange-details" } */
> +/* { dg-additional-options "--param max-completely-peel-times=16" { target 
> s390*-*-* } } */
>
>  void dummy (double *, double *);
>  #define LEN_2D 32
> --
> 2.43.0
>


Re: Combine patch ping

2024-04-11 Thread Richard Biener
On Wed, 10 Apr 2024, Uros Bizjak wrote:

> On Wed, Apr 10, 2024 at 7:56 PM Segher Boessenkool
>  wrote:
> >
> > On Sun, Apr 07, 2024 at 08:31:38AM +0200, Uros Bizjak wrote:
> > > If there are no further comments, I plan to commit the referred patch
> > > to the mainline on Wednesday. The latest version can be considered an
> > > obvious patch that solves certain oversight in the original
> > > implementation.
> >
> > This is never okay.  You cannot commit a patch without approval, *ever*.
> >
> > That patch is also obvious -- obviously *wrong*, that is.  There are
> > big assumptions everywhere in the compiler how a CC reg can be used.
> > This violates that, as explained elsewhere.
> 
> Can you please elaborate what is wrong with this concrete patch.

Better show a correct patch.  The interchanges in the last months
have not been constructive at all.

Richard.

Re: [PATCH] c++/114409 - ANNOTATE_EXPR and templates

2024-04-10 Thread Richard Biener
On Wed, 10 Apr 2024, Jakub Jelinek wrote:

> On Wed, Apr 10, 2024 at 06:43:02PM +0200, Richard Biener wrote:
> > The following fixes a mismatch in COMPOUND_EXPR handling in
> > tsubst_expr vs tsubst_stmt where the latter allows a stmt in
> > operand zero but the former doesn't.  This makes a difference
> > for the case at hand because when the COMPOUND_EXPR is wrapped
> > inside an ANNOTATE_EXPR it gets handled by tsubst_expr and when
> > not, tsubst_stmt successfully handles it and the contained
> > DECL_EXPR in operand zero.
> > 
> > The following makes handling of COMPOUND_EXPR in tsubst_expr
> > consistent with that of tsubst_stmt for the operand that doesn't
> > specify the result and thus the reason we choose either or the
> > other for substing.
> > 
> > Bootstrapped and tested on x86_64-unknown-linux-gnu, OK?
> > 
> > Thanks,
> > Richard.
> > 
> > PR c++/114409
> > gcc/cp/
> > * pt.cc (tsubst_expr): Recurse to COMPOUND_EXPR operand
> > zero using tsubst_stmt, when that returns NULL return
> > the subst operand one, mimicing what tsubst_stmt does.
> > 
> > gcc/testsuite/
> > * g++.dg/pr114409.C: New testcase.
> 
> I've posted https://gcc.gnu.org/bugzilla/show_bug.cgi?id=114409#c16
> for this already and Jason agreed to that version, so I just have to test it
> tonight:
> https://gcc.gnu.org/pipermail/gcc-patches/2024-April/649165.html

Ah, I saw the bugzilla patches and wanted this version to be sent
because I think the COMPOUND_EXPR inconsistency is odd.  So Jason,
please still have a look, not necessarily because of the bug
which can be fixed in multiple ways but because of that COMPOUND_EXPR
handling oddity (there are already some cases in tsubst_expr that
explicitly recurse with tsubst_stmt).

Richard.

-- 
Richard Biener 
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)


[PATCH] c++/114409 - ANNOTATE_EXPR and templates

2024-04-10 Thread Richard Biener
The following fixes a mismatch in COMPOUND_EXPR handling in
tsubst_expr vs tsubst_stmt where the latter allows a stmt in
operand zero but the former doesn't.  This makes a difference
for the case at hand because when the COMPOUND_EXPR is wrapped
inside an ANNOTATE_EXPR it gets handled by tsubst_expr and when
not, tsubst_stmt successfully handles it and the contained
DECL_EXPR in operand zero.

The following makes handling of COMPOUND_EXPR in tsubst_expr
consistent with that of tsubst_stmt for the operand that doesn't
specify the result and thus the reason we choose either or the
other for substing.

Bootstrapped and tested on x86_64-unknown-linux-gnu, OK?

Thanks,
Richard.

PR c++/114409
gcc/cp/
* pt.cc (tsubst_expr): Recurse to COMPOUND_EXPR operand
zero using tsubst_stmt, when that returns NULL return
the subst operand one, mimicing what tsubst_stmt does.

gcc/testsuite/
* g++.dg/pr114409.C: New testcase.
---
 gcc/cp/pt.cc| 5 -
 gcc/testsuite/g++.dg/pr114409.C | 8 
 2 files changed, 12 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/g++.dg/pr114409.C

diff --git a/gcc/cp/pt.cc b/gcc/cp/pt.cc
index bf4b89d8413..dae423a751f 100644
--- a/gcc/cp/pt.cc
+++ b/gcc/cp/pt.cc
@@ -20635,8 +20635,11 @@ tsubst_expr (tree t, tree args, tsubst_flags_t 
complain, tree in_decl)
 
 case COMPOUND_EXPR:
   {
-   tree op0 = tsubst_expr (TREE_OPERAND (t, 0), args,
+   tree op0 = tsubst_stmt (TREE_OPERAND (t, 0), args,
complain & ~tf_decltype, in_decl);
+   if (op0 == NULL_TREE)
+ /* If the first operand was a statement, we're done with it.  */
+ RETURN (RECUR (TREE_OPERAND (t, 1)));
RETURN (build_x_compound_expr (EXPR_LOCATION (t),
   op0,
   RECUR (TREE_OPERAND (t, 1)),
diff --git a/gcc/testsuite/g++.dg/pr114409.C b/gcc/testsuite/g++.dg/pr114409.C
new file mode 100644
index 000..6343fe8d9f3
--- /dev/null
+++ b/gcc/testsuite/g++.dg/pr114409.C
@@ -0,0 +1,8 @@
+// { dg-do compile }
+
+template  int t() {
+#pragma GCC unroll 4
+while (int ThisEntry = 0) { } // { dg-bogus "ignoring loop annotation" "" 
{ xfail *-*-* } }
+return 0;
+}
+int tt = t<1>();
-- 
2.35.3


<    1   2   3   4   5   6   7   8   9   10   >