[PATCHv2] [aarch64] Fix target/95969: __builtin_aarch64_im_lane_boundsi interferes with gimple

2021-09-03 Thread apinski--- via Gcc-patches
From: Andrew Pinski 

This patch adds simple folding of __builtin_aarch64_im_lane_boundsi where
we are not going to error out. It fixes the problem by the removal
of the function from the IR.

OK? Bootstrapped and tested on aarch64-linux-gnu with no regressions.

gcc/ChangeLog:

PR target/95969
* config/aarch64/aarch64-builtins.c (aarch64_fold_builtin_lane_check):
New function.
(aarch64_general_fold_builtin): Handle AARCH64_SIMD_BUILTIN_LANE_CHECK.
(aarch64_general_gimple_fold_builtin): Likewise.

gcc/testsuite/ChangeLog:

PR target/95969
* gcc.target/aarch64/lane-bound-1.c: New test.
* gcc.target/aarch64/lane-bound-2.c: New test.
---
 gcc/config/aarch64/aarch64-builtins.c | 35 +++
 .../gcc.target/aarch64/lane-bound-1.c | 14 
 .../gcc.target/aarch64/lane-bound-2.c | 10 ++
 3 files changed, 59 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/lane-bound-1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/lane-bound-2.c

diff --git a/gcc/config/aarch64/aarch64-builtins.c 
b/gcc/config/aarch64/aarch64-builtins.c
index eef9fc0f444..119f67d4e4c 100644
--- a/gcc/config/aarch64/aarch64-builtins.c
+++ b/gcc/config/aarch64/aarch64-builtins.c
@@ -29,6 +29,7 @@
 #include "rtl.h"
 #include "tree.h"
 #include "gimple.h"
+#include "ssa.h"
 #include "memmodel.h"
 #include "tm_p.h"
 #include "expmed.h"
@@ -2333,6 +2334,27 @@ aarch64_general_builtin_rsqrt (unsigned int fn)
   return NULL_TREE;
 }
 
+/* Return true if the lane check can be removed as there is no
+   error going to be emitted.  */
+static bool
+aarch64_fold_builtin_lane_check (tree arg0, tree arg1, tree arg2)
+{
+  if (TREE_CODE (arg0) != INTEGER_CST)
+return false;
+  if (TREE_CODE (arg1) != INTEGER_CST)
+return false;
+  if (TREE_CODE (arg2) != INTEGER_CST)
+return false;
+
+  auto totalsize = wi::to_widest (arg0);
+  auto elementsize = wi::to_widest (arg1);
+  if (totalsize == 0 || elementsize == 0)
+return false;
+  auto lane = wi::to_widest (arg2);
+  auto high = wi::udiv_trunc (totalsize, elementsize);
+  return wi::ltu_p (lane, high);
+}
+
 #undef VAR1
 #define VAR1(T, N, MAP, FLAG, A) \
   case AARCH64_SIMD_BUILTIN_##T##_##N##A:
@@ -2353,6 +2375,11 @@ aarch64_general_fold_builtin (unsigned int fcode, tree 
type,
   VAR1 (UNOP, floatv4si, 2, ALL, v4sf)
   VAR1 (UNOP, floatv2di, 2, ALL, v2df)
return fold_build1 (FLOAT_EXPR, type, args[0]);
+  case AARCH64_SIMD_BUILTIN_LANE_CHECK:
+   gcc_assert (n_args == 3);
+   if (aarch64_fold_builtin_lane_check (args[0], args[1], args[2]))
+ return void_node;
+   break;
   default:
break;
 }
@@ -2440,6 +2467,14 @@ aarch64_general_gimple_fold_builtin (unsigned int fcode, 
gcall *stmt)
}
  break;
}
+case AARCH64_SIMD_BUILTIN_LANE_CHECK:
+  if (aarch64_fold_builtin_lane_check (args[0], args[1], args[2]))
+   {
+ unlink_stmt_vdef (stmt);
+ release_defs (stmt);
+ new_stmt = gimple_build_nop ();
+   }
+  break;
 default:
   break;
 }
diff --git a/gcc/testsuite/gcc.target/aarch64/lane-bound-1.c 
b/gcc/testsuite/gcc.target/aarch64/lane-bound-1.c
new file mode 100644
index 000..bbbe679fd80
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/lane-bound-1.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+#include 
+
+void
+f (float32x4_t **ptr)
+{
+  float32x4_t res = vsetq_lane_f32 (0.0f, **ptr, 0);
+  **ptr = res;
+}
+/* GCC should be able to remove the call to "__builtin_aarch64_im_lane_boundsi"
+   and optimize out the second load from *ptr.  */
+/* { dg-final { scan-tree-dump-times "__builtin_aarch64_im_lane_boundsi" 0 
"optimized" } } */
+/* { dg-final { scan-tree-dump-times " = \\\*ptr_" 1 "optimized" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/lane-bound-2.c 
b/gcc/testsuite/gcc.target/aarch64/lane-bound-2.c
new file mode 100644
index 000..923c94687c6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/lane-bound-2.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-original" } */
+void
+f (void)
+{
+  __builtin_aarch64_im_lane_boundsi (16, 4, 0);
+  __builtin_aarch64_im_lane_boundsi (8, 8, 0);
+}
+/* GCC should be able to optimize these out before gimplification. */
+/* { dg-final { scan-tree-dump-times "__builtin_aarch64_im_lane_boundsi" 0 
"original" } } */
-- 
2.17.1



Re: [PATCH] [aarch64] Fix target/95969: __builtin_aarch64_im_lane_boundsi interferes with gimple

2021-09-03 Thread Andrew Pinski via Gcc-patches
On Fri, Sep 3, 2021 at 2:42 AM Richard Sandiford via Gcc-patches
 wrote:
>
> apinski--- via Gcc-patches  writes:
> > From: Andrew Pinski 
> >
> > This patch adds simple folding of __builtin_aarch64_im_lane_boundsi where
> > we are not going to error out. It fixes the problem by the removal
> > of the function from the IR.
> >
> > OK? Bootstrapped and tested on aarch64-linux-gnu with no regressions.
> >
> > gcc/ChangeLog:
> >
> >   * config/aarch64/aarch64-builtins.c (aarch64_fold_builtin_lane_check):
> >   New function.
> >   (aarch64_general_fold_builtin): Handle 
> > AARCH64_SIMD_BUILTIN_LANE_CHECK.
> >   (aarch64_general_gimple_fold_builtin): Likewise.
> > ---
> >  gcc/config/aarch64/aarch64-builtins.c | 35 +++
> >  1 file changed, 35 insertions(+)
> >
> > diff --git a/gcc/config/aarch64/aarch64-builtins.c 
> > b/gcc/config/aarch64/aarch64-builtins.c
> > index f6b41d9c200..d4414373aa4 100644
> > --- a/gcc/config/aarch64/aarch64-builtins.c
> > +++ b/gcc/config/aarch64/aarch64-builtins.c
> > @@ -29,6 +29,7 @@
> >  #include "rtl.h"
> >  #include "tree.h"
> >  #include "gimple.h"
> > +#include "ssa.h"
> >  #include "memmodel.h"
> >  #include "tm_p.h"
> >  #include "expmed.h"
> > @@ -2333,6 +2334,27 @@ aarch64_general_builtin_rsqrt (unsigned int fn)
> >return NULL_TREE;
> >  }
> >
> > +/* Return true if the lane check can be removed as there is no
> > +   error going to be emitted.  */
> > +static bool
> > +aarch64_fold_builtin_lane_check (tree arg0, tree arg1, tree arg2)
> > +{
> > +  if (TREE_CODE (arg0) != INTEGER_CST)
> > +return false;
> > +  if (TREE_CODE (arg1) != INTEGER_CST)
> > +return false;
> > +  if (TREE_CODE (arg2) != INTEGER_CST)
> > +return false;
> > +
> > +  auto totalsize = wi::to_widest (arg0);
> > +  auto elementsize = wi::to_widest (arg1);
> > +  if (totalsize == 0 || elementsize == 0)
> > +return false;
> > +  auto lane = wi::to_widest (arg2);
> > +  auto high = wi::udiv_trunc (totalsize, elementsize);
> > +  return wi::ltu_p (lane, high);
> > +}
> > +
> >  #undef VAR1
> >  #define VAR1(T, N, MAP, FLAG, A) \
> >case AARCH64_SIMD_BUILTIN_##T##_##N##A:
> > @@ -2353,6 +2375,11 @@ aarch64_general_fold_builtin (unsigned int fcode, 
> > tree type,
> >VAR1 (UNOP, floatv4si, 2, ALL, v4sf)
> >VAR1 (UNOP, floatv2di, 2, ALL, v2df)
> >   return fold_build1 (FLOAT_EXPR, type, args[0]);
> > +  case AARCH64_SIMD_BUILTIN_LANE_CHECK:
> > + if (n_args == 3
>
> Do we need this check?  If it's safe to rely on frontend testing
> for aarch64_general_gimple_fold_builtin then hopefully it should
> be here too.  I think an assert would be better.

I added it just in case, I do find it interesting that it was passed
but never checked.
>
> (FTR, we have extra checking for SVE because the overloaded functions
> don't have definite prototypes.)
>
> > + && aarch64_fold_builtin_lane_check (args[0], args[1], args[2]))
> > +   return fold_convert (void_type_node, integer_zero_node);
>
> Could this just be void_node instead?  VOID_CST is the tree constant
> code for void_type_node.

Oh I had missed there was such a thing, definitely better using that
instead of the above.

>
> It would be good to add the testcase, as a -fdump-tree-optimized test
> that checks for a single instance of { = \*ptr}.

Yes I was thinking about adding one but I will definitely add one now.

Thanks,
Andrew Pinski

>
> Thanks,
> Richard


Re: Ping ^ 2: [PATCH] rs6000: Expand fmod and remainder when built with fast-math [PR97142]

2021-09-03 Thread Segher Boessenkool
Hi!

On Fri, Sep 03, 2021 at 10:31:24AM +0800, Xionghu Luo wrote:
> fmod/fmodf and remainder/remainderf could be expanded instead of library
> call when fast-math build, which is much faster.

Thank you very much for this patch.

Some trivial comments if you haven't commmitted it yet:

> +(define_expand "fmod3"
> +  [(use (match_operand:SFDF 0 "gpc_reg_operand"))
> + (use (match_operand:SFDF 1 "gpc_reg_operand"))
> + (use (match_operand:SFDF 2 "gpc_reg_operand"))]
> +  "TARGET_HARD_FLOAT
> +  && TARGET_FPRND
> +  && flag_unsafe_math_optimizations"

It should have one extra space before each && here:

  "TARGET_HARD_FLOAT
   && TARGET_FPRND
   && flag_unsafe_math_optimizations"

(so that everything inside of the string aligns).

> +(define_expand "remainder3"

(same here).

> +/* { dg-final { scan-assembler-not {\mbl fmod\M} } } */
> +/* { dg-final { scan-assembler-not {\mbl fmodf\M} } } */
> +/* { dg-final { scan-assembler-not {\mbl remainder\M} } } */
> +/* { dg-final { scan-assembler-not {\mbl remainderf\M} } } */

These are negative tests, so won't spuriously fail, but this does not
test for the function prefixes we can have.  See
gcc.target/powerpc/builtins-1.c for example.

Again, thank you, and thanks to everyone else for the patch review
action :-)


Segher


[PATCH] rs6000: Don't use r12 for CR save on ELFv2 (PR102107)

2021-09-03 Thread Segher Boessenkool
CR is saved and/or restored on some paths where GPR12 is already live
since it has a meaning in the calling convention in the ELFv2 ABI.

It is not completely clear to me that we can always use r11 here, but
it does seem save, there is checking code (to detect conflicts here),
and it is stage 1.  So here goes.


Segher


2021-09-03  Segher Boessenkool 

PR target/102107
* config/rs6000/rs6000-logue.c (rs6000_emit_prologue): On ELFv2 use r11
instead of r12 for CR save, in all cases.
---
 gcc/config/rs6000/rs6000-logue.c | 11 +++
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/gcc/config/rs6000/rs6000-logue.c b/gcc/config/rs6000/rs6000-logue.c
index 07337c4836a3..e363d56ecec0 100644
--- a/gcc/config/rs6000/rs6000-logue.c
+++ b/gcc/config/rs6000/rs6000-logue.c
@@ -3293,10 +3293,13 @@ rs6000_emit_prologue (void)
 
   /* If we need to save CR, put it into r12 or r11.  Choose r12 except when
  r12 will be needed by out-of-line gpr save.  */
-  cr_save_regno = ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
-  && !(strategy & (SAVE_INLINE_GPRS
-   | SAVE_NOINLINE_GPRS_SAVES_LR))
-  ? 11 : 12);
+  if (DEFAULT_ABI == ABI_AIX
+  && !(strategy & (SAVE_INLINE_GPRS | SAVE_NOINLINE_GPRS_SAVES_LR)))
+cr_save_regno = 11;
+  else if (DEFAULT_ABI == ABI_ELFv2)
+cr_save_regno = 11;
+  else
+cr_save_regno = 12;
   if (!WORLD_SAVE_P (info)
   && info->cr_save_p
   && REGNO (frame_reg_rtx) != cr_save_regno
-- 
1.8.3.1



Re: [PATCH 1/13] v2 [PATCH 1/13] Add support for per-location warning groups (PR 74765)

2021-09-03 Thread Thomas Schwinge
Hi!

Martin, thanks for your review.  Now need someone to formally approve the
third patch.

On 2021-09-01T18:14:46-0600, Martin Sebor  wrote:
> On 9/1/21 1:35 PM, Thomas Schwinge wrote:
>> On 2021-06-23T13:47:08-0600, Martin Sebor via Gcc-patches 
>>  wrote:
>>> On 6/22/21 5:28 PM, David Malcolm wrote:
 On Tue, 2021-06-22 at 19:18 -0400, David Malcolm wrote:
> On Fri, 2021-06-04 at 15:41 -0600, Martin Sebor wrote:
>> The attached patch introduces the suppress_warning(),
>> warning_suppressed(), and copy_no_warning() APIs [etc.]

>> I now had a bit of a deep dive into some aspects of this, in context of
>>  "gcc/sparseset.h:215:20: error: suggest
>> parentheses around assignment used as truth value [-Werror=parentheses]"
>> that I recently filed.  This seems difficult to reproduce, but I'm still
>> able to reliably reproduce it in one specific build
>> configuration/directory/machine/whatever.  Initially, we all quickly
>> assumed that it'd be some GC issue -- but "alas", it's not, at least not
>> directly.  (But I'll certainly assume that some GC aspects are involved
>> which make this issue come and go across different GCC sources revisions,
>> and difficult to reproduce.)

>> First, two pieces of cleanup:

ACKed by Martin, again attached for convenience.


>>> --- /dev/null
>>> +++ b/gcc/diagnostic-spec.h
>>
>>> +typedef location_t key_type_t;
>>> +typedef int_hash  xint_hash_t;

> By the way, it seems we should probably also use a manifest constant
> for Empty (probably UNKNOWN_LOCATION since we're reserving it).

Yes, that will be part of another patch here -- waiting for approval of
"Generalize 'gcc/input.h:struct location_hash'" posted elsewhere.


>> attached "Don't maintain a warning spec for
>> 'UNKNOWN_LOCATION'/'BUILTINS_LOCATION' [PR101574]".  OK to push?
>
> [...].  So I agree that it ought to be fixed.

>> I'm reasonably confident that my changes are doing the right things in
>> general, but please carefully review, especially here:
>>
>>- 'gcc/warning-control.cc:suppress_warning' functions: is it correct to
>>  conditionalize on '!RESERVED_LOCATION_P' the 'suppress_warning_at'
>>  calls and 'supp' update?  Or, should instead 'suppress_warning_at'
>>  handle the case of '!RESERVED_LOCATION_P'?  (How?)
>
> It seems like six of one vs half a dozen of the other.  I'd say go
> with whatever makes more sense to you here :)

OK, was just trying to make sure that I don't fail to see any non-obvious
intentions here.

>>- 'gcc/diagnostic-spec.c:copy_warning' and
>>  'gcc/warning-control.cc:copy_warning': is the rationale correct for
>>  the 'gcc_checking_assert (!from_spec)': "If we cannot set no-warning
>>  dispositions for 'to', ascertain that we don't have any for 'from'.
>>  Otherwise, we'd lose these."?  If the rationale is correct, then
>>  observing that in 'gcc/warning-control.cc:copy_warning' this
>>  currently "triggers during GCC build" is something to be looked into,
>>  later, I suppose, and otherwise, how should I change this code?
>
> copy_warning(location_t, location_t) is called [only] from
> gimple_set_location().  The middle end does clear the location of
> some statements for which it was previously valid (e.g., return
> statements).

What I observed was that the 'assert' never triggered for the
'location_t' variant "called [only] from gimple_set_location" -- but does
trigger for some other variant.  Anyway:

> So I wouldn't expect this assumption to be safe.  If
> that happens, we have no choice but to lose the per-warning detail
> and fall back on the no-warning bit.

ACK.  I'm thus clarifying that as follows:

--- gcc/diagnostic-spec.c
+++ gcc/diagnostic-spec.c
@@ -185,7 +185,5 @@ copy_warning (location_t to, location_t from)
   if (RESERVED_LOCATION_P (to))
-{
-  /* If we cannot set no-warning dispositions for 'to', ascertain that 
we
-don't have any for 'from'.  Otherwise, we'd lose these.  */
-  gcc_checking_assert (!from_spec);
-}
+/* We cannot set no-warning dispositions for 'to', so we have no 
chance but
+   lose those potentially set for 'from'.  */
+;
   else
--- gcc/warning-control.cc
+++ gcc/warning-control.cc
@@ -197,9 +197,5 @@ void copy_warning (ToType to, FromType from)
   if (RESERVED_LOCATION_P (to_loc))
-{
-#if 0 //TODO triggers during GCC build
-  /* If we cannot set no-warning dispositions for 'to', ascertain that 
we
-don't have any for 'from'.  Otherwise, we'd lose these.  */
-  gcc_checking_assert (!from_spec);
-#endif
-}
+/* We cannot set no-warning dispositions for 'to', so we have no 
chance but
+   lose those potentially set for 'from'.  */
+;
   else

> (Either David or a middle end maintainer will need to approve the last
> patch once it's final.)

As far as I'm concerned that would 

Re: [r12-3321 Regression] FAIL: gfortran.dg/PR100914.f90 -Os (test for excess errors) on Linux/x86_64

2021-09-03 Thread H.J. Lu via Gcc-patches
On Fri, Sep 3, 2021 at 10:25 AM Sandra Loosemore
 wrote:
>
> On 9/2/21 11:37 PM, Sandra Loosemore wrote:
> > On 9/2/21 10:18 PM, sunil.k.pandey wrote:
> >> On Linux/x86_64,
> >>
> >> 93b6b2f614eb692d1d8126ec6cb946984a9d01d7 is the first bad commit
> >> commit 93b6b2f614eb692d1d8126ec6cb946984a9d01d7
> >> Author: Sandra Loosemore 
> >> Date:   Wed Aug 18 07:22:03 2021 -0700
> >>
> >>  libgfortran: Further fixes for GFC/CFI descriptor conversions.
> >>
> >> caused
> >>
> >> FAIL: gfortran.dg/PR100914.f90   -O0  (test for excess errors)
> >> FAIL: gfortran.dg/PR100914.f90   -O1  (test for excess errors)
> >> FAIL: gfortran.dg/PR100914.f90   -O2  (test for excess errors)
> >> FAIL: gfortran.dg/PR100914.f90   -O3 -fomit-frame-pointer
> >> -funroll-loops -fpeel-loops -ftracer -finline-functions  (test for
> >> excess errors)
> >> FAIL: gfortran.dg/PR100914.f90   -O3 -g  (test for excess errors)
> >> FAIL: gfortran.dg/PR100914.f90   -Os  (test for excess errors)
> >
> > I did not see this failure in my own testing, but I also had this patch
> > for another known bug in my stack:
> >
> > https://gcc.gnu.org/pipermail/fortran/2021-August/056382.html
> >
> > I just pinged that earlier this evening as it has not been reviewed yet.
> >   I'll rebuild/retest without that patch and confirm that's what the
> > problem is.
>
> I'm still not seeing these FAILs in my test results.  Can you provide
> any details about the "excess errors" it is producing?  E.g. is it the
> same missing libquadmath.h error reported here?
>
> https://gcc.gnu.org/pipermail/fortran/2021-September/056467.html
>
> -Sandra

/export/gnu/import/git/gcc-test-master-intel64/bld/gcc/testsuite/gfortran2/../../gfortran
-B/export/gnu/import/git/gcc-test-master-intel64/bld/gcc/testsuite/gfortran2/../../
-B/export/gnu/import/git/gcc-test-master-intel64/bld/x86_64-pc-linux-gnu/./libgfortran/
/export/gnu/import/git/gcc-test-master-intel64/src-master/gcc/testsuite/gfortran.dg/PR100914.f90
-fdiagnostics-plain-output -fdiagnostics-plain-output -O0
-pedantic-errors
/export/gnu/import/git/gcc-test-master-intel64/src-master/gcc/testsuite/gfortran.dg/PR100914.c
-dumpbase  
-B/export/gnu/import/git/gcc-test-master-intel64/bld/x86_64-pc-linux-gnu/./libgfortran/.libs
-L/export/gnu/import/git/gcc-test-master-intel64/bld/x86_64-pc-linux-gnu/./libgfortran/.libs
-L/export/gnu/import/git/gcc-test-master-intel64/bld/x86_64-pc-linux-gnu/./libgfortran/.libs
-L/export/gnu/import/git/gcc-test-master-intel64/bld/x86_64-pc-linux-gnu/./libatomic/.libs
-B/export/gnu/import/git/gcc-test-master-intel64/bld/x86_64-pc-linux-gnu/./libquadmath/.libs
-L/export/gnu/import/git/gcc-test-master-intel64/bld/x86_64-pc-linux-gnu/./libquadmath/.libs
-L/export/gnu/import/git/gcc-test-master-intel64/bld/x86_64-pc-linux-gnu/./libquadmath/.libs
-lm -o ./PR100914.exe
/export/gnu/import/git/gcc-test-master-intel64/src-master/gcc/testsuite/gfortran.dg/PR100914.c:8:10:
fatal error: quadmath.h: No such file or directory
compilation terminated.
compiler exited with status 1
FAIL: gfortran.dg/PR100914.f90   -O0  (test for excess errors)
Excess errors:
/export/gnu/import/git/gcc-test-master-intel64/src-master/gcc/testsuite/gfortran.dg/PR100914.c:8:10:
fatal error: quadmath.h: No such file or directory
compilation terminated.

I think some -B/-I is missing.

-- 
H.J.


Re: [committed] analyzer: support "bifurcation"; reimplement realloc [PR99260]

2021-09-03 Thread Gerald Pfeifer
On Mon, 30 Aug 2021, David Malcolm via Gcc-patches wrote:
> gcc/analyzer/ChangeLog:
>   PR analyzer/99260
>   * analyzer.h (class custom_edge_info): New class, adapted from
>   exploded_edge::custom_info_t.  Make member functions const.
>   Make update_model return bool, converting edge param from
>   reference to a pointer, and adding a ctxt param.
>   (class path_context): New class.
>   * call-info.cc: New file.
>   * call-info.h: New file.
>   * engine.cc: Include "analyzer/call-info.h" and .
>   (impl_region_model_context::impl_region_model_context): Update for
>   new m_path_ctxt field.
>   (impl_region_model_context::bifurcate): New.
>   (impl_region_model_context::terminate_path): New.

I believe it is this change that is causing bootstrap to fail with

  In file included from /scratch/tmp/gerald/GCC-HEAD/gcc/analyzer/engine.cc:69:
  In file included from /usr/include/c++/v1/memory:653:
  /usr/include/c++/v1/typeinfo:346:5: error: no member named 'fancy_abort' 
in name space 'std::__1'; did you mean simply 'fancy_abort'?
_VSTD::abort();
^~~
  /usr/include/c++/v1/__config:782:15: note: expanded from macro '_VSTD'
  #define _VSTD std::_LIBCPP_ABI_NAMESPACE
^
  /scratch/tmp/gerald/GCC-HEAD/gcc/system.h:777:13: note: 'fancy_abort' 
declared here
  extern void fancy_abort (const char *, int, const char *)
  ^

This is on FreeBSD 11 with clang version 10.0.1 as system compiler and
libc++, which I assume is what triggers this?

Gerald


Re: [r12-3321 Regression] FAIL: gfortran.dg/PR100914.f90 -Os (test for excess errors) on Linux/x86_64

2021-09-03 Thread Sandra Loosemore

On 9/2/21 11:37 PM, Sandra Loosemore wrote:

On 9/2/21 10:18 PM, sunil.k.pandey wrote:

On Linux/x86_64,

93b6b2f614eb692d1d8126ec6cb946984a9d01d7 is the first bad commit
commit 93b6b2f614eb692d1d8126ec6cb946984a9d01d7
Author: Sandra Loosemore 
Date:   Wed Aug 18 07:22:03 2021 -0700

 libgfortran: Further fixes for GFC/CFI descriptor conversions.

caused

FAIL: gfortran.dg/PR100914.f90   -O0  (test for excess errors)
FAIL: gfortran.dg/PR100914.f90   -O1  (test for excess errors)
FAIL: gfortran.dg/PR100914.f90   -O2  (test for excess errors)
FAIL: gfortran.dg/PR100914.f90   -O3 -fomit-frame-pointer 
-funroll-loops -fpeel-loops -ftracer -finline-functions  (test for 
excess errors)

FAIL: gfortran.dg/PR100914.f90   -O3 -g  (test for excess errors)
FAIL: gfortran.dg/PR100914.f90   -Os  (test for excess errors)


I did not see this failure in my own testing, but I also had this patch 
for another known bug in my stack:


https://gcc.gnu.org/pipermail/fortran/2021-August/056382.html

I just pinged that earlier this evening as it has not been reviewed yet. 
  I'll rebuild/retest without that patch and confirm that's what the 
problem is.


I'm still not seeing these FAILs in my test results.  Can you provide 
any details about the "excess errors" it is producing?  E.g. is it the 
same missing libquadmath.h error reported here?


https://gcc.gnu.org/pipermail/fortran/2021-September/056467.html

-Sandra


Re: [PATCH v3, Fortran] TS 29113 testsuite

2021-09-03 Thread Sandra Loosemore

On 9/3/21 3:14 AM, Tobias Burnus wrote:

If I read https://gcc.gnu.org/onlinedocs/gcc/Floating-Types.html 
correctly, we should use _Float128 for the following errors


"The _Float128 type is supported on all systems where __float128 is 
supported or where long double has the IEEE binary128 format."


Applies to:


For
typecodes-array-float128.f90
FAIL: gfortran.dg/c-interop/typecodes-array-float128.f90   -O0  (test 
for excess errors)

Excess errors:
/gcc/testsuite/gfortran.dg/c-interop/typecodes-array-float128-c.c:35:32: 
error: '__float128' undeclared (first use in this function); did you 
mean '_Float128'?

typecodes-sanity.f90
FAIL: gfortran.dg/c-interop/typecodes-sanity.f90   -O0  (test for 
excess errors)

Excess errors:
/gcc/testsuite/gfortran.dg/c-interop/typecodes-sanity-c.c:41:13: 
error: '__float128' undeclared here (not in a function); did you mean 
'_Float128'?

typecodes-scalar-float128.f90
FAIL: gfortran.dg/c-interop/typecodes-scalar-float128.f90   -O0  (test 
for excess errors)

Excess errors:
/gcc/testsuite/gfortran.dg/c-interop/typecodes-scalar-float128-c.c:34:32: 
error: '__float128' undeclared (first use in this function); did you 
mean '_Float128'?


Just so I'm clear on this, the situation with ARM/AArch64 is that it 
provides _Float128 but not __float128?


The GNU Fortran manual explicitly defines C_FLOAT128 as corresponding to 
__float128, not _Float128 or some other 128-bit floating-point type.


https://gcc.gnu.org/onlinedocs/gcc-11.2.0/gfortran/ISO_005fC_005fBINDING.html#ISO_005fC_005fBINDING

So the situation with ARM/AArch64 is that it provides _Float128 but not 
__float128?


I guess we could change the documentation and all the references in the 
implementation as well as the test cases to tie this kind to _Float128 
instead.  I think that is backward-compatible with all existing uses?



* * *

PR100914.f90
FAIL: gfortran.dg/PR100914.f90   -O0  (test for excess errors)
Excess errors:
/gcc/testsuite/gfortran.dg/PR100914.c:8:10: fatal error: quadmath.h: 
No such file or directory


Does ARM/Aarch64 provide _Float128 support without also providing 
libquadmath.h?  I'm trying to understand why it got that particular 
error.  :-S




  * * *

On a normal x86-64 system, I get XPASS for:

gfortran.dg/PR100914.f90

gfortran.dg/c-interop/typecodes-array-float128.f90

The ! { dg-do run { xfail { { x86_64*-*-* i?86*-*-* } && longdouble128 } 
} }

does not help as it just checks whether 'sizeof(long double)==16',
which seemingly passes also on x86-64 with 80bit long double.


I don't understand this, either.  I've been testing on an 
i686-pc-linux-gnu build with both -m32 and -m64.  The tests PASS on -m32 
and XFAIL on -m64.  The XFAIL is there because with -m64, sizeof (long 
double) == 16 and it can't be disambiguated from the true 128-bit 
floating point type __float128 which also has size 16, and the other 
patch I committed makes it choose the standard type over the GNU 
extension type.  With -m32, the 80-bit long double type has size 12 
instead so there is no conflict.


-Sandra



Re: [PATCH] Implement POINTER_DIFF_EXPR entry in range-op.

2021-09-03 Thread Aldy Hernandez via Gcc-patches
Patches welcome :-).

On Fri, Sep 3, 2021, 18:42 Martin Sebor  wrote:

> On 9/3/21 8:41 AM, Aldy Hernandez via Gcc-patches wrote:
> > [Andrew, do you see any problem with using the minus relational code
> > here?  It seems like everything matches up.]
> >
> > I've seen cases in the upcoming jump threader enhancements where we see
> > a difference of two pointers that are known to be equivalent, and yet we
> > fail to return 0 for the range.  This is because we have no working
> > range-op entry for POINTER_DIFF_EXPR.  The entry we currently have is
> > a mere placeholder to avoid ignoring POINTER_DIFF_EXPR's so
> > adjust_pointer_diff_expr() could get a whack at it here:
> >
> > //def = __builtin_memchr (arg, 0, sz)
> > //n = def - arg
> > //
> > // The range for N can be narrowed to [0, PTRDIFF_MAX - 1].
>
> For the two statements above a tighter bound is possible: [0, sz).
>
> With no range info, sz can be assumed to be in [0, N], where N
> is one less than the smaller of PTRDIFF_MAX and the size of arg.
> (With PTRDIFF_MAX being only hypothetical.  max_object_size()
> should at some return the actual limit for the current target).
>
> The same (or even tighter) range can be derived from calls to
> other functions, including like mempcpy/stpcpy, or strchr, etc.
>
> Martin
>
> >
> > This patch adds the relational magic to range-op, which we can just
> > steal from the minus_expr code.
> >
> > Testing currently in progress.  I will commit pending tests.
> >
> > gcc/ChangeLog:
> >
> >   * range-op.cc (operator_minus::op1_op2_relation_effect): Abstract
> >   out to...
> >   (minus_op1_op2_relation_effect): ...here.
> >   (class operator_pointer_diff): New.
> >   (operator_pointer_diff::op1_op2_relation_effect): Call
> >   minus_op1_op2_relation_effect.
> >   (integral_table::integral_table): Add entry for POINTER_DIFF_EXPR.
> > ---
> >   gcc/range-op.cc | 45 ++---
> >   1 file changed, 38 insertions(+), 7 deletions(-)
> >
> > diff --git a/gcc/range-op.cc b/gcc/range-op.cc
> > index fee0e834c23..5e37133026d 100644
> > --- a/gcc/range-op.cc
> > +++ b/gcc/range-op.cc
> > @@ -1372,13 +1372,14 @@ operator_minus::wi_fold (irange , tree type,
> >   }
> >
> >   // Check to see if the relation REL between OP1 and OP2 has any effect
> on the
> > -// LHS of the expression.  If so, apply it to LHS_RANGE.
> > +// LHS of the expression.  If so, apply it to LHS_RANGE.  This is a
> helper
> > +// function for both MINUS_EXPR and POINTER_DIFF_EXPR.
> >
> > -bool
> > -operator_minus::op1_op2_relation_effect (irange _range, tree type,
> > -   const irange _range
> ATTRIBUTE_UNUSED,
> > -   const irange _range
> ATTRIBUTE_UNUSED,
> > -   relation_kind rel) const
> > +static bool
> > +minus_op1_op2_relation_effect (irange _range, tree type,
> > +const irange _range ATTRIBUTE_UNUSED,
> > +const irange _range ATTRIBUTE_UNUSED,
> > +relation_kind rel)
> >   {
> > if (rel == VREL_NONE)
> >   return false;
> > @@ -1440,6 +1441,16 @@ operator_minus::op1_op2_relation_effect (irange
> _range, tree type,
> > return true;
> >   }
> >
> > +bool
> > +operator_minus::op1_op2_relation_effect (irange _range, tree type,
> > +  const irange _range,
> > +  const irange _range,
> > +  relation_kind rel) const
> > +{
> > +  return minus_op1_op2_relation_effect (lhs_range, type, op1_range,
> op2_range,
> > + rel);
> > +}
> > +
> >   bool
> >   operator_minus::op1_range (irange , tree type,
> >  const irange ,
> > @@ -1459,6 +1470,26 @@ operator_minus::op2_range (irange , tree type,
> >   }
> >
> >
> > +class operator_pointer_diff : public range_operator
> > +{
> > +  virtual bool op1_op2_relation_effect (irange _range,
> > + tree type,
> > + const irange _range,
> > + const irange _range,
> > + relation_kind rel) const;
> > +} op_pointer_diff;
> > +
> > +bool
> > +operator_pointer_diff::op1_op2_relation_effect (irange _range, tree
> type,
> > + const irange _range,
> > + const irange _range,
> > + relation_kind rel) const
> > +{
> > +  return minus_op1_op2_relation_effect (lhs_range, type, op1_range,
> op2_range,
> > + rel);
> > +}
> > +
> > +
> >   class operator_min : public range_operator
> >   {
> >   public:
> > @@ -4018,7 +4049,7 @@ integral_table::integral_table ()
> > set (OBJ_TYPE_REF, op_identity);
> 

Re: [PATCH] Implement POINTER_DIFF_EXPR entry in range-op.

2021-09-03 Thread Martin Sebor via Gcc-patches

On 9/3/21 8:41 AM, Aldy Hernandez via Gcc-patches wrote:

[Andrew, do you see any problem with using the minus relational code
here?  It seems like everything matches up.]

I've seen cases in the upcoming jump threader enhancements where we see
a difference of two pointers that are known to be equivalent, and yet we
fail to return 0 for the range.  This is because we have no working
range-op entry for POINTER_DIFF_EXPR.  The entry we currently have is
a mere placeholder to avoid ignoring POINTER_DIFF_EXPR's so
adjust_pointer_diff_expr() could get a whack at it here:

//  def = __builtin_memchr (arg, 0, sz)
//  n = def - arg
//
// The range for N can be narrowed to [0, PTRDIFF_MAX - 1].


For the two statements above a tighter bound is possible: [0, sz).

With no range info, sz can be assumed to be in [0, N], where N
is one less than the smaller of PTRDIFF_MAX and the size of arg.
(With PTRDIFF_MAX being only hypothetical.  max_object_size()
should at some return the actual limit for the current target).

The same (or even tighter) range can be derived from calls to
other functions, including like mempcpy/stpcpy, or strchr, etc.

Martin



This patch adds the relational magic to range-op, which we can just
steal from the minus_expr code.

Testing currently in progress.  I will commit pending tests.

gcc/ChangeLog:

* range-op.cc (operator_minus::op1_op2_relation_effect): Abstract
out to...
(minus_op1_op2_relation_effect): ...here.
(class operator_pointer_diff): New.
(operator_pointer_diff::op1_op2_relation_effect): Call
minus_op1_op2_relation_effect.
(integral_table::integral_table): Add entry for POINTER_DIFF_EXPR.
---
  gcc/range-op.cc | 45 ++---
  1 file changed, 38 insertions(+), 7 deletions(-)

diff --git a/gcc/range-op.cc b/gcc/range-op.cc
index fee0e834c23..5e37133026d 100644
--- a/gcc/range-op.cc
+++ b/gcc/range-op.cc
@@ -1372,13 +1372,14 @@ operator_minus::wi_fold (irange , tree type,
  }
  
  // Check to see if the relation REL between OP1 and OP2 has any effect on the

-// LHS of the expression.  If so, apply it to LHS_RANGE.
+// LHS of the expression.  If so, apply it to LHS_RANGE.  This is a helper
+// function for both MINUS_EXPR and POINTER_DIFF_EXPR.
  
-bool

-operator_minus::op1_op2_relation_effect (irange _range, tree type,
- const irange _range ATTRIBUTE_UNUSED,
- const irange _range ATTRIBUTE_UNUSED,
- relation_kind rel) const
+static bool
+minus_op1_op2_relation_effect (irange _range, tree type,
+  const irange _range ATTRIBUTE_UNUSED,
+  const irange _range ATTRIBUTE_UNUSED,
+  relation_kind rel)
  {
if (rel == VREL_NONE)
  return false;
@@ -1440,6 +1441,16 @@ operator_minus::op1_op2_relation_effect (irange 
_range, tree type,
return true;
  }
  
+bool

+operator_minus::op1_op2_relation_effect (irange _range, tree type,
+const irange _range,
+const irange _range,
+relation_kind rel) const
+{
+  return minus_op1_op2_relation_effect (lhs_range, type, op1_range, op2_range,
+   rel);
+}
+
  bool
  operator_minus::op1_range (irange , tree type,
   const irange ,
@@ -1459,6 +1470,26 @@ operator_minus::op2_range (irange , tree type,
  }
  
  
+class operator_pointer_diff : public range_operator

+{
+  virtual bool op1_op2_relation_effect (irange _range,
+   tree type,
+   const irange _range,
+   const irange _range,
+   relation_kind rel) const;
+} op_pointer_diff;
+
+bool
+operator_pointer_diff::op1_op2_relation_effect (irange _range, tree type,
+   const irange _range,
+   const irange _range,
+   relation_kind rel) const
+{
+  return minus_op1_op2_relation_effect (lhs_range, type, op1_range, op2_range,
+   rel);
+}
+
+
  class operator_min : public range_operator
  {
  public:
@@ -4018,7 +4049,7 @@ integral_table::integral_table ()
set (OBJ_TYPE_REF, op_identity);
set (IMAGPART_EXPR, op_unknown);
set (REALPART_EXPR, op_unknown);
-  set (POINTER_DIFF_EXPR, op_unknown);
+  set (POINTER_DIFF_EXPR, op_pointer_diff);
set (ABS_EXPR, op_abs);
set (ABSU_EXPR, op_absu);
set (NEGATE_EXPR, op_negate);





Re: [PATCH] Implement POINTER_DIFF_EXPR entry in range-op.

2021-09-03 Thread Aldy Hernandez via Gcc-patches




On 9/3/21 5:41 PM, Jakub Jelinek wrote:

On Fri, Sep 03, 2021 at 09:09:59AM -0600, Jeff Law via Gcc-patches wrote:



On 9/3/2021 9:05 AM, Andrew MacLeod via Gcc-patches wrote:

On 9/3/21 10:41 AM, Aldy Hernandez wrote:

[Andrew, do you see any problem with using the minus relational code
here?  It seems like everything matches up.]

I've seen cases in the upcoming jump threader enhancements where we see
a difference of two pointers that are known to be equivalent, and yet we
fail to return 0 for the range.  This is because we have no working
range-op entry for POINTER_DIFF_EXPR.  The entry we currently have is
a mere placeholder to avoid ignoring POINTER_DIFF_EXPR's so
adjust_pointer_diff_expr() could get a whack at it here:

//    def = __builtin_memchr (arg, 0, sz)
//    n = def - arg
//
// The range for N can be narrowed to [0, PTRDIFF_MAX - 1].


In theory... but do the non-equality relations make sense?   ie ptr1 <
ptr2   ?   Perhaps there is no harm in those..  Probably undefined
behaviour if they are not related so we can do whatever...

I'm pretty sure they're in the realm of undefined behavior if the pointers
point to different objects.  There was some code that would have cared about
this, but I think it mostly got cleaned up in response to Martin S's
diagnostic work over the last couple years.


Yeah.  Just note, [0, PTRDIFF_MAX - 1] range will be only for builtins
like memchr/strchr/mempcpy etc. that return either the passed argument or that 
pointer
incremented some number of times.  Generally pointer arith can go in both
directions, so POINTER_DIFF_EXPR can be negative too.
If one of the pointers points into some VAR_DECL object, the range could be
narrowed from the size of that object though.


The [0, PTRDIFF_MAX - 1] stuff I spoke of, is the code in 
adjust_pointer_diff_expr().  It is meant to be used when chasing the 
defining statement of a POINTER_DIFF_EXPR is in a specific form. 
Currently it only applies to __builtin_memchr.  I believe the code came 
from vr-values (or tree-vrp) at some point ??.


Do you think the code there should be ehanced to include other built-ins 
other than __builtin_memchr?  Patches welcome :)).


BTW, my proposed patch only deals with relations between the operands. 
It has nothing to do with builtins or other such uses.  For example, if 
both operands to a POINTER_DIFF_EXPR are equal, we can deduce that the 
result will be 0.  I frankly only care about the == and != relationship 
presently, but it was easy enough to hijack the MINUS_EXPR relation code.


Thanks.
Aldy


char a[26];

long
foo (long i, char *q)
{
   char *p = [0] + i;
   return q - p;
}
The minimum result will be if q points to [0] and p to [26], i.e.
-26, maximum the other way, so [-26, 26] range.

Jakub





Re: [Committed] [PATCH 2/4] (v4) On-demand locations within string-literals

2021-09-03 Thread Thomas Schwinge
Hi!

On 2021-09-02T21:09:54+0200, I wrote:
> On 2021-09-02T15:59:14+0200, I wrote:
>> On 2016-08-05T14:16:58-0400, David Malcolm  wrote:
>>> Committed to trunk as r239175; I'm attaching the final version of the
>>> patch for reference.
>>
>> David, you've added here 'gcc/input.h:struct location_hash' (see quoted
>> below), which will be useful elsewhere, so:
>>
>>> --- a/gcc/input.c
>>> +++ b/gcc/input.c
>>
>>> +/* Internal function.  Canonicalize LOC into a form suitable for
>>> +   use as a key within the database, stripping away macro expansion,
>>> +   ad-hoc information, and range information, using the location of
>>> +   the start of LOC within an ordinary linemap.  */
>>> +
>>> +location_t
>>> +string_concat_db::get_key_loc (location_t loc)
>>> +{
>>> +  loc = linemap_resolve_location (line_table, loc, LRK_SPELLING_LOCATION,
>>> + NULL);
>>> +
>>> +  loc = get_range_from_loc (line_table, loc).m_start;
>>> +
>>> +  return loc;
>>> +}
>>
>> OK to push the attached
>> "Harden 'gcc/input.c:string_concat_db::get_key_loc'"?  (This fell out of
>> my analysis for development work elsewhere.)
>
> My suggested patch was:
>
> --- a/gcc/input.c
> +++ b/gcc/input.c
> @@ -1483,6 +1483,9 @@ string_concat_db::get_key_loc (location_t loc)
>
>loc = get_range_from_loc (line_table, loc).m_start;
>
> +  /* Ascertain that 'loc' is valid as a key in 'm_table'.  */
> +  gcc_checking_assert (!RESERVED_LOCATION_P (loc));
> +
>return loc;
>  }
>
> Uh, I should've looked at the correct test logs...  This change actually
> does regress 'c-c++-common/substring-location-PR-87721.c' and
> 'gcc.dg/plugin/diagnostic-test-string-literals-1.c': for these, we do see
> 'BUILTINS_LOCATION' (via 'string_concat_db::record_string_concatenation').
> Unless someone tell me that's unexpected (I'm completely lost in this
> code...)

I think I convinced myself that the current code doesn't have stable
behavior, so...

> I shall change/generalize my changes to provide both a
> 'location_hash' only using 'UNKNOWN_LOCATION' as a spare value for
> 'Empty' (as currently used here) and another variant additionally using
> 'BUILTINS_LOCATION' as spare value for 'Deleted'.

... I didn't do this, but instead would like to push the attached
"Don't record string concatenation data for 'RESERVED_LOCATION_P'"
(replacing "Harden 'gcc/input.c:string_concat_db::get_key_loc'" as
originally proposed).  OK?


... and then re:

>>> --- a/gcc/input.h
>>> +++ b/gcc/input.h
>>
>>> +struct location_hash : int_hash  { };
>>> +
>>> +class GTY(()) string_concat_db
>>> +{
>>> +[...]
>>> +  hash_map  *m_table;
>>> +};
>>
>> OK to push the attached
>> "Generalize 'gcc/input.h:struct location_hash'"?

Attached again.


Grüße
 Thomas


-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955
>From 9f1066fcb770397d6e791aa0594f067a755e2ed6 Mon Sep 17 00:00:00 2001
From: Thomas Schwinge 
Date: Fri, 3 Sep 2021 18:25:10 +0200
Subject: [PATCH] Don't record string concatenation data for
 'RESERVED_LOCATION_P'

'RESERVED_LOCATION_P' means 'UNKNOWN_LOCATION' or 'BUILTINS_LOCATION.
We're using 'UNKNOWN_LOCATION' as a spare value for 'Empty', so should
ascertain that we don't use it as a key additionally.  Similarly for
'BUILTINS_LOCATION' that we'd later like to use as a spare value for
'Deleted'.

As discussed in the source code comment added, for these we didn't have
stable behavior anyway.

Follow-up to r239175 (commit 88faa309e5d6c6171b957daaf2f800920869)
"On-demand locations within string-literals".

	gcc/
	* input.c (string_concat_db::record_string_concatenation)
	(string_concat_db::get_string_concatenation): Skip for
	'RESERVED_LOCATION_P'.
	gcc/testsuite/
	* gcc.dg/plugin/diagnostic-test-string-literals-1.c: Adjust
	expected error diagnostics.
---
 gcc/input.c  | 9 +
 .../gcc.dg/plugin/diagnostic-test-string-literals-1.c| 4 ++--
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/gcc/input.c b/gcc/input.c
index 4b809862e02..dd753decfa0 100644
--- a/gcc/input.c
+++ b/gcc/input.c
@@ -1437,6 +1437,11 @@ string_concat_db::record_string_concatenation (int num, location_t *locs)
   gcc_assert (locs);
 
   location_t key_loc = get_key_loc (locs[0]);
+  /* We don't record data for 'RESERVED_LOCATION_P (key_loc)' key values:
+ any data now recorded under key 'key_loc' would be overwritten by a
+ subsequent call with the same key 'key_loc'.  */
+  if (RESERVED_LOCATION_P (key_loc))
+return;
 
   string_concat *concat
 = new (ggc_alloc  ()) string_concat (num, locs);
@@ -1460,6 +1465,10 @@ string_concat_db::get_string_concatenation (location_t loc,
   gcc_assert (out_locs);
 
   location_t key_loc = get_key_loc (loc);
+  /* We 

Re: [PATCH v3] rs6000: Add load density heuristic

2021-09-03 Thread Bill Schmidt via Gcc-patches

Hi Kewen,

Sorry that we lost track of this patch!  The heuristic approach looks 
good.  It is limited in scope and won't kick in often, and the case 
you're trying to account for is important.


At the time you submitted this, I think reliable P10 testing wasn't 
possible.  Now that it is, could you please do a quick sniff test to 
make sure there aren't any adjustments that need to be made for P10?  I 
doubt it, but worth checking.


Otherwise I have one comment below...

On 7/28/21 12:22 AM, Kewen.Lin wrote:

Hi,

v2: https://gcc.gnu.org/pipermail/gcc-patches/2021-May/571258.html

This v3 addressed William's review comments in
https://gcc.gnu.org/pipermail/gcc-patches/2021-July/576154.html

It's mainly to deal with the bwaves_r degradation due to vector
construction fed by strided loads.

As Richi's comments [1], this follows the similar idea to over
price the vector construction fed by VMAT_ELEMENTWISE or
VMAT_STRIDED_SLP.  Instead of adding the extra cost on vector
construction costing immediately, it firstly records how many
loads and vectorized statements in the given loop, later in
rs6000_density_test (called by finish_cost) it computes the
load density ratio against all vectorized stmts, and check
with the corresponding thresholds DENSITY_LOAD_NUM_THRESHOLD
and DENSITY_LOAD_PCT_THRESHOLD, do the actual extra pricing
if both thresholds are exceeded.

Note that this new load density heuristic check is based on
some fields in target cost which are updated as needed when
scanning each add_stmt_cost entry, it's independent of the
current function rs6000_density_test which requires to scan
non_vect stmts.  Since it's checking the load stmts count
vs. all vectorized stmts, it's kind of density, so I put
it in function rs6000_density_test.  With the same reason to
keep it independent, I didn't put it as an else arm of the
current existing density threshold check hunk or before this
hunk.

In the investigation of -1.04% degradation from 526.blender_r
on Power8, I noticed that the extra penalized cost 320 on one
single vector construction with type V16QI is much exaggerated,
which makes the final body cost unreliable, so this patch adds
one maximum bound for the extra penalized cost for each vector
construction statement.

Bootstrapped & regtested *again* on powerpc64le-linux-gnu P9.

Full SPEC2017 performance evaluation on Power8/Power9 with
option combinations (with v2, as v3 is NFC against v2):
   * -O2 -ftree-vectorize {,-fvect-cost-model=very-cheap} {,-ffast-math}
   * {-O3, -Ofast} {,-funroll-loops}

bwaves_r degradations on P8/P9 have been fixed, nothing else
remarkable was observed.

Is it ok for trunk?

[1] https://gcc.gnu.org/pipermail/gcc-patches/2021-May/570076.html

BR,
Kewen
-
gcc/ChangeLog:

* config/rs6000/rs6000.c (struct rs6000_cost_data): New members
nstmts, nloads and extra_ctor_cost.
(rs6000_density_test): Add load density related heuristics and the
checks, do extra costing on vector construction statements if need.
(rs6000_init_cost): Init new members.
(rs6000_update_target_cost_per_stmt): New function.
(rs6000_add_stmt_cost): Factor vect_nonmem hunk out to function
rs6000_update_target_cost_per_stmt and call it.



---
gcc/config/rs6000/rs6000.c | 119 ++---
1 file changed, 110 insertions(+), 9 deletions(-)

diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 279f00cc648..4e9087683dc 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -5254,6 +5254,12 @@ typedef struct _rs6000_cost_data
{
  struct loop *loop_info;
  unsigned cost[3];
+  /* Total number of vectorized stmts (loop only).  */
+  unsigned nstmts;
+  /* Total number of loads (loop only).  */
+  unsigned nloads;
+  /* Possible extra penalized cost on vector construction (loop only).  */
+  unsigned extra_ctor_cost;
  /* For each vectorized loop, this var holds TRUE iff a non-memory vector
 instruction is needed by the vectorization.  */
  bool vect_nonmem;
@@ -5315,9 +5321,45 @@ rs6000_density_test (rs6000_cost_data *data)
  if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
 "density %d%%, cost %d exceeds threshold, penalizing "
-"loop body cost by %d%%", density_pct,
+"loop body cost by %d%%\n", density_pct,
 vec_cost + not_vec_cost, DENSITY_PENALTY);
}
+
+  /* Check if we need to penalize the body cost for latency and
+ execution resources bound from strided or elementwise loads
+ into a vector.  */
+  if (data->extra_ctor_cost > 0)
+{
+  /* Threshold for load stmts percentage in all vectorized stmts.  */
+  const int DENSITY_LOAD_PCT_THRESHOLD = 45;
+  /* Threshold for total number of load stmts.  */
+  const int DENSITY_LOAD_NUM_THRESHOLD = 20;
+
+  gcc_assert (data->nloads <= data->nstmts);
+  unsigned int 

Re: [PATCH] Implement POINTER_DIFF_EXPR entry in range-op.

2021-09-03 Thread Jakub Jelinek via Gcc-patches
On Fri, Sep 03, 2021 at 09:09:59AM -0600, Jeff Law via Gcc-patches wrote:
> 
> 
> On 9/3/2021 9:05 AM, Andrew MacLeod via Gcc-patches wrote:
> > On 9/3/21 10:41 AM, Aldy Hernandez wrote:
> > > [Andrew, do you see any problem with using the minus relational code
> > > here?  It seems like everything matches up.]
> > > 
> > > I've seen cases in the upcoming jump threader enhancements where we see
> > > a difference of two pointers that are known to be equivalent, and yet we
> > > fail to return 0 for the range.  This is because we have no working
> > > range-op entry for POINTER_DIFF_EXPR.  The entry we currently have is
> > > a mere placeholder to avoid ignoring POINTER_DIFF_EXPR's so
> > > adjust_pointer_diff_expr() could get a whack at it here:
> > > 
> > > //    def = __builtin_memchr (arg, 0, sz)
> > > //    n = def - arg
> > > //
> > > // The range for N can be narrowed to [0, PTRDIFF_MAX - 1].
> > > 
> > In theory... but do the non-equality relations make sense?   ie ptr1 <
> > ptr2   ?   Perhaps there is no harm in those..  Probably undefined
> > behaviour if they are not related so we can do whatever...
> I'm pretty sure they're in the realm of undefined behavior if the pointers
> point to different objects.  There was some code that would have cared about
> this, but I think it mostly got cleaned up in response to Martin S's
> diagnostic work over the last couple years.

Yeah.  Just note, [0, PTRDIFF_MAX - 1] range will be only for builtins
like memchr/strchr/mempcpy etc. that return either the passed argument or that 
pointer
incremented some number of times.  Generally pointer arith can go in both
directions, so POINTER_DIFF_EXPR can be negative too.
If one of the pointers points into some VAR_DECL object, the range could be
narrowed from the size of that object though.
char a[26];

long
foo (long i, char *q)
{
  char *p = [0] + i;
  return q - p;
}
The minimum result will be if q points to [0] and p to [26], i.e.
-26, maximum the other way, so [-26, 26] range.

Jakub



Re: [PATCH] Implement POINTER_DIFF_EXPR entry in range-op.

2021-09-03 Thread Aldy Hernandez via Gcc-patches




On 9/3/21 5:05 PM, Andrew MacLeod wrote:

On 9/3/21 10:41 AM, Aldy Hernandez wrote:

[Andrew, do you see any problem with using the minus relational code
here?  It seems like everything matches up.]

I've seen cases in the upcoming jump threader enhancements where we see
a difference of two pointers that are known to be equivalent, and yet we
fail to return 0 for the range.  This is because we have no working
range-op entry for POINTER_DIFF_EXPR.  The entry we currently have is
a mere placeholder to avoid ignoring POINTER_DIFF_EXPR's so
adjust_pointer_diff_expr() could get a whack at it here:

//    def = __builtin_memchr (arg, 0, sz)
//    n = def - arg
//
// The range for N can be narrowed to [0, PTRDIFF_MAX - 1].

In theory... but do the non-equality relations make sense?   ie ptr1 < 
ptr2   ?   Perhaps there is no harm in those..  Probably undefined 
behaviour if they are not related so we can do whatever...


I originally just copy-pasted the stuff dealing with == and !=, but 
figured why not piggyback onto the existing minus code.


I'm ok with doing whatever for undefined behavior.  At least it's all 
encapsulated in a single place.


Will commit when tests finish.

Thanks.
Aldy



Re: [PATCH] c++: shortcut bad convs during overload resolution [PR101904]

2021-09-03 Thread Patrick Palka via Gcc-patches
On Thu, 2 Sep 2021, Jason Merrill wrote:

> On 8/31/21 3:15 PM, Patrick Palka wrote:
> > On Mon, 30 Aug 2021, Patrick Palka wrote:
> > 
> > > In the context of overload resolution we have the notion of a "bad"
> > > argument conversion, which is a conversion that "would be a permitted
> > > with a bending of the language standards", and we handle such bad
> > > conversions specially.  In particular, we rank a bad conversion as
> > > better than no conversion but worse than a good conversion, and a bad
> > > conversion doesn't necessarily make a candidate unviable.  With the
> > > flag -fpermissive, we permit the situation where overload resolution
> > > selects a candidate that contains a bad conversion (which we call a
> > > non-strictly viable candidate).  And without the flag we issue a
> > > distinct permerror in this situation instead.
> > > 
> > > One consequence of this defacto behavior is that in order to distinguish
> > > a non-strictly viable candidate from an unviable candidate, if we
> > > encounter a bad argument conversion during overload resolution we must
> > > keep converting subsequent arguments because a subsequent conversion
> > > could render the candidate unviable instead of just non-strictly viable.
> > > But checking subsequent arguments can force template instantiations and
> > > result in otherwise avoidable hard errors.  And in particular, all
> > > 'this' conversions are at worst bad, so this means the
> > > const/ref-qualifiers
> > > of a member function can't be used to prune a candidate quickly, which
> > > is the subject of the mentioned PR.
> > > 
> > > This patch tries to improve the situation without changing the defacto
> > > output of add_candidates.  Specifically, when considering a candidate
> > > during overload resolution this patch makes us shortcut argument
> > > conversion checking upon encountering the first bad conversion
> > > (tentatively marking the candidate as non-strictly viable, though it
> > > could ultimately be unviable) under the assumption that we'll eventually
> > > find a strictly viable candidate anyway (rendering the distinction
> > > between non-strictly viable and unviable moot, since both are worse
> > > than a strictly viable candidate).  If this assumption turns out to be
> > > false, we'll fully reconsider the candidate under the defacto behavior
> > > (without the shortcutting).
> > > 
> > > So in the best case (there's a strictly viable candidate), we avoid
> > > some argument conversions and/or template argument deduction that may
> > > cause a hard error.  In the worst case (there's no such candidate), we
> > > have to redundantly consider some candidates twice.  (In a previous
> > > version of the patch, to avoid this redundant checking I created a new
> > > "deferred" conversion type that represents a conversion that is yet to
> > > be performed, and instead of reconsidering a candidate I just realized
> > > its deferred conversions.  But it doesn't seem this redundancy is a
> > > significant performance issue to justify the added complexity of this
> > > other approach.)
> 
> OK, thanks.

Thanks a lot.  After retesting the patch, I ran into a libstdc++
testsuite failure involving reversed operator candidates that I somehow
missed earlier.  The failure is due to a bug in the last check below

  if (cand->viable == -1
  && shortcut_bad_convs
  && !cand->convs[cand->num_convs - 1]) // BUG
{
  /* This candidate has been tentatively marked non-strictly viable,
 and we didn't compute all argument conversions for it (having
 stopped at the first bad conversion).  Add the function to BAD_FNS
 to fully reconsider later if we don't find any strictly viable
 candidates.  */
  bad_fns = lookup_add (fn, bad_fns);
  *candidates = (*candidates)->next;
}

which assumes a shortcutted candidate must have a missing conversion
at the end of the conversion array (since argument conversions are
processed in left-to-right order).  But for a reversed candidate the
missing conversion would instead be at the front of the array, since we
reversed the order of its conversions in add_candidate.  So I changed
the problematic check to

  !cand->convs[cand->reversed () ? 0 : cand->num_convs - 1])

and committed the patch with this (presumably uncontroversial) change.

> 
> > > Lots of care was taken to preserve the defacto behavior w.r.t.
> > > non-strictly viable candidates, but I wonder how important this behavior
> > > is nowadays?  Can the notion of a non-strictly viable candidate be done
> > > away with, or is it here to stay?
> > 
> > To expand on this, as a concrete alternative to this optimistic shortcutting
> > trick we could maybe recognize non-strictly viable candidates only when
> > -fpermissive (and just mark them as unviable when not -fpermissive).  IIUC
> > this would be a backwards compatible change overall -- only diagnostics
> > would
> > 

Re: [PATCH V3 0/6] Initial support for AVX512FP16

2021-09-03 Thread Iain Sandoe



> On 3 Sep 2021, at 08:51, Iain Sandoe  wrote:
> 
> 
>> On 2 Sep 2021, at 21:03, Joseph Myers  wrote:
>> 
>> On Thu, 2 Sep 2021, Iain Sandoe via Gcc-patches wrote:
>> 
>>> diff --git a/libgcc/soft-fp/eqdf2.c b/libgcc/soft-fp/eqdf2.c
>>> index 2a44ee377ce..a3bb664f5f1 100644
>>> --- a/libgcc/soft-fp/eqdf2.c
>>> +++ b/libgcc/soft-fp/eqdf2.c
>>> @@ -28,6 +28,7 @@
>>>   License along with the GNU C Library; if not, see
>>>   .  */
>>> 
>>> +#define DarwinMode DF
>>> #include "soft-fp.h"
>>> #include "double.h"
>> 
>> All these files are supposed to be taken unmodified from glibc.  They 
>> shouldn't contain any OS-specific code, such as a define of DarwinMode.  
>> sfp-machine.h, however, is libgcc-local, hence putting the definition of 
>> strong_alias there.
> 
> OK, that makes sense.
>> 
>> So you need some other way to extract the argument type of name in order 
>> to use it in a declaration of aliasname.  E.g.
>> 
>> __typeof (_Generic (name,
>>   CMPtype (*) (HFtype, HFtype): (HFtype) 0,
>>   CMPtype (*) (SFtype, SFtype): (SFtype) 0,
>>   CMPtype (*) (DFtype, DFtype): (DFtype) 0,
>>   CMPtype (*) (TFtype, TFtype): (TFtype) 0))
> 
> thanks for the suggestion
> 
>> Now in fact I think the include ordering means none of the *type macros 
>> are defined here.  But if you do e.g.
>> 
>> typedef float alias_SFtype __attribute__ ((mode (SF)));
>> 
>> and similar, you could use alias_SFtype in the above.  And so keep the 
>> changes to the Darwin-specific parts of the libgcc-local sfp-machine.h.
> 
> this is what I’m testing - OK if it bootstraps on x86_64-darwin, linux?

(those bootstraps were sucessful)

given that:

a) this fixes Darwin x86-64 bootstrap which has been broken for more than 24h
b) the patch is now Darwin-local.

I’ve pushed the patch below to fix the bootstrap break - but if there are any 
futher 
recommendations I’m happy to apply a follow-on.  It seems that there will be 
more
changes for the half-float support anyway,

thanks
Iain

> 
> thanks
> Iain
> 
> 
> [PATCH] libgcc, soft-float: Fix strong_alias macro use for Darwin.
> 
> Darwin does not support strong symbol aliases and a work-
> around is provided in sfp-machine.h where a second function
> is created that simply calls the original.  However this
> needs the arguments to the synthesized function to track
> the mode of the original function.
> 
> So the fix here is to match known floating point modes from
> the incoming function and apply the one found to the new
> function args.
> 
> The matching is highly specific to the current set of modes
> and will need adjusting should more cases be added.
> 
> Signed-off-by: Iain Sandoe 
> 
> libgcc/ChangeLog:
> 
>   * config/i386/sfp-machine.h (alias_HFtype, alias_SFtype
>   alias_DFtype, alias_TFtype): New.
>   (ALIAS_SELECTOR): New.
>   (strong_alias): Use __typeof and a _Generic selector to
>   provide the type to the synthesized function.
> ---
> libgcc/config/i386/sfp-machine.h | 20 +---
> 1 file changed, 17 insertions(+), 3 deletions(-)
> 
> diff --git a/libgcc/config/i386/sfp-machine.h 
> b/libgcc/config/i386/sfp-machine.h
> index f15d29d3755..172ebc70c8d 100644
> --- a/libgcc/config/i386/sfp-machine.h
> +++ b/libgcc/config/i386/sfp-machine.h
> @@ -75,10 +75,24 @@ void __sfp_handle_exceptions (int);
> 
> /* Define ALIASNAME as a strong alias for NAME.  */
> #if defined __MACH__
> -/* Mach-O doesn't support aliasing.  If these functions ever return
> -   anything but CMPtype we need to revisit this... */
> +/* Mach-O doesn't support aliasing, so we build a secondary function for
> +   the alias - we need to do a bit of a dance to find out what the type of
> +   the arguments is and then apply that to the secondary function.
> +   If these functions ever return anything but CMPtype we need to revisit
> +   this... */
> +typedef float alias_HFtype __attribute__ ((mode (HF)));
> +typedef float alias_SFtype __attribute__ ((mode (SF)));
> +typedef float alias_DFtype __attribute__ ((mode (DF)));
> +typedef float alias_TFtype __attribute__ ((mode (TF)));
> +#define ALIAS_SELECTOR \
> +  CMPtype (*) (alias_HFtype, alias_HFtype): (alias_HFtype) 0, \
> +  CMPtype (*) (alias_SFtype, alias_SFtype): (alias_SFtype) 0, \
> +  CMPtype (*) (alias_DFtype, alias_DFtype): (alias_DFtype) 0, \
> +  CMPtype (*) (alias_TFtype, alias_TFtype): (alias_TFtype) 0
> #define strong_alias(name, aliasname) \
> -  CMPtype aliasname (TFtype a, TFtype b) { return name(a, b); }
> +  CMPtype aliasname (__typeof (_Generic (name, ALIAS_SELECTOR)) a, \
> +  __typeof (_Generic (name, ALIAS_SELECTOR)) b) \
> + { return name (a, b); }
> #else
> # define strong_alias(name, aliasname) _strong_alias(name, aliasname)
> # define _strong_alias(name, aliasname) \
> -- 
> 



Re: [PATCH] Do not assume loop header threading in backward threader.

2021-09-03 Thread Aldy Hernandez via Gcc-patches




On 9/3/21 5:04 PM, Jeff Law wrote:



On 9/3/2021 7:57 AM, Aldy Hernandez wrote:

The registry's thread_through_all_blocks() has a may_peel_loop_headers
argument.  When refactoring the backward threader code, I removed this
argument for the local passthru method because it was always TRUE.  This
may not necessarily be true in the future, if the backward threader is
called from another context.  This patch removes the default definition,
in favor of an argument that is exactly the same as the identically
named function in tree-ssa-threadupdate.c.  I think this also makes it
less confusing when looking at both methods across the source base.

Tested on x86-64 Linux.

OK?

gcc/ChangeLog:

* tree-ssa-threadbackward.c 
(back_threader::thread_through_all_blocks):

(back_threader_registry::thread_through_all_blocks):
(try_thread_blocks):
(pass_early_thread_jumps::execute):
Presumably this is preparing for addressing some of the issues we 
discussed via email a little while ago -- ie to avoid having the 
backwards threader mucking up loops and allowing the loop optimizer to 
make more decisions about things like peeling because the loop optimizer 
has a cost model for this stuff whereas the jump threaders to not.


Yes.

Currently the forward and the backward threaders have different 
profitability models.  The backward threader has one function to control 
it all, whereas the forward threader has ad-hoc code sprinkled 
throughout.  I would like to combine both models, though I'm not sure 
whether I'll be able to get everything done in this release.


Also, both threaders have different block duplication code.  The forward 
threader's seems more permissive than the other, so this should also be 
combined.  See for instance, EDGE_FSM_THREAD versus EDGE_*BLOCK.


Right now I'm trying to replace the forward threader with an enhanced 
version of the current backward threader, and I'm running into issues 
because the backward threader is handcuffed as to what it can do (see 
profitable_path_p).  It will take some untangling to figure out what to 
allow it to do, without it going ape-shit and threading iterations of 
loops, etc.


Anywhoo...it's a work in progress.  I have upcoming patches to both the 
forward threader clients (to rid them of their dependence on VRP/evrp), 
and to the backward threader to enhance it further so it can get pretty 
much everything the forward threader currently gets.


Gawwwd, this is so convoluted.  I hope most of it made sense.

Aldy



[pushed] c++: Avoid bogus -Wunused with recent change

2021-09-03 Thread Jason Merrill via Gcc-patches
My change to make limit_bad_template_recursion avoid instantiating members
of erroneous classes produced a bogus "used but not defined" warning for
23_containers/unordered_set/instantiation_neg.cc; it's not defined because
we decided not to instantiate it.  So we need to suppress that warning.

Tested x86_64-pc-linux-gnu, applying to trunk.

gcc/cp/ChangeLog:

* pt.c (limit_bad_template_recursion): Suppress -Wunused for decls
we decide not to instantiate.
---
 gcc/cp/pt.c | 26 +++---
 1 file changed, 19 insertions(+), 7 deletions(-)

diff --git a/gcc/cp/pt.c b/gcc/cp/pt.c
index 72b22d8c487..1b81501386b 100644
--- a/gcc/cp/pt.c
+++ b/gcc/cp/pt.c
@@ -10890,15 +10890,27 @@ limit_bad_template_recursion (tree decl)
 return false;
 
   /* Avoid instantiating members of an ill-formed class.  */
-  if (DECL_CLASS_SCOPE_P (decl)
-  && CLASSTYPE_ERRONEOUS (DECL_CONTEXT (decl)))
-return true;
+  bool refuse
+= (DECL_CLASS_SCOPE_P (decl)
+   && CLASSTYPE_ERRONEOUS (DECL_CONTEXT (decl)));
 
-  for (; lev; lev = lev->next)
-if (neglectable_inst_p (lev->maybe_get_node ()))
-  break;
+  if (!refuse)
+{
+  for (; lev; lev = lev->next)
+   if (neglectable_inst_p (lev->maybe_get_node ()))
+ break;
+  refuse = (lev && errs > lev->errors);
+}
 
-  return (lev && errs > lev->errors);
+  if (refuse)
+{
+  /* Don't warn about it not being defined.  */
+  suppress_warning (decl, OPT_Wunused);
+  tree clone;
+  FOR_EACH_CLONE (clone, decl)
+   suppress_warning (clone, OPT_Wunused);
+}
+  return refuse;
 }
 
 static int tinst_depth;

base-commit: 943c65c4494145e993af43c821c8213c6375
-- 
2.27.0



Re: [PATCH] Implement POINTER_DIFF_EXPR entry in range-op.

2021-09-03 Thread Jeff Law via Gcc-patches




On 9/3/2021 9:05 AM, Andrew MacLeod via Gcc-patches wrote:

On 9/3/21 10:41 AM, Aldy Hernandez wrote:

[Andrew, do you see any problem with using the minus relational code
here?  It seems like everything matches up.]

I've seen cases in the upcoming jump threader enhancements where we see
a difference of two pointers that are known to be equivalent, and yet we
fail to return 0 for the range.  This is because we have no working
range-op entry for POINTER_DIFF_EXPR.  The entry we currently have is
a mere placeholder to avoid ignoring POINTER_DIFF_EXPR's so
adjust_pointer_diff_expr() could get a whack at it here:

//    def = __builtin_memchr (arg, 0, sz)
//    n = def - arg
//
// The range for N can be narrowed to [0, PTRDIFF_MAX - 1].

In theory... but do the non-equality relations make sense?   ie ptr1 < 
ptr2   ?   Perhaps there is no harm in those..  Probably undefined 
behaviour if they are not related so we can do whatever...
I'm pretty sure they're in the realm of undefined behavior if the 
pointers point to different objects.  There was some code that would 
have cared about this, but I think it mostly got cleaned up in response 
to Martin S's diagnostic work over the last couple years.


Jeff


Re: [PATCH 2/2] Get rid of all float-int special cases in validate_subreg.

2021-09-03 Thread Andreas Schwab
On Sep 02 2021, Segher Boessenkool wrote:

> On Tue, Aug 31, 2021 at 07:17:49PM +0800, liuhongt via Gcc-patches wrote:
>>  * emit-rtl.c (validate_subreg): Get rid of all float-int
>>  special cases.
>
> This caused various regressions on powerpc.  Please revert this until
> this can be done safely (the comment this patch deletes says why it can
> not be done yet).

This also breaks ada on riscv64.

s-fatgen.adb: In function 'System.Fat_Flt.Attr_Float.Scaling':
s-fatgen.adb:830:8: error: unable to find a register to spill
s-fatgen.adb:830:8: error: this is the insn:
(insn 215 321 216 26 (set (reg:SF 88 [ xx.26_39 ])
(mult:SF (reg:SF 190)
(subreg:SF (reg:DI 221 [164]) 0))) "s-fatgen.adb":821:25 17 {mulsf3}
 (expr_list:REG_DEAD (reg:DI 221 [164])
(expr_list:REG_DEAD (reg:SF 190)
(nil
during RTL pass: reload

Andreas.

-- 
Andreas Schwab, sch...@linux-m68k.org
GPG Key fingerprint = 7578 EB47 D4E5 4D69 2510  2552 DF73 E780 A9DA AEC1
"And now for something completely different."


Re: [PATCH] Implement POINTER_DIFF_EXPR entry in range-op.

2021-09-03 Thread Andrew MacLeod via Gcc-patches

On 9/3/21 10:41 AM, Aldy Hernandez wrote:

[Andrew, do you see any problem with using the minus relational code
here?  It seems like everything matches up.]

I've seen cases in the upcoming jump threader enhancements where we see
a difference of two pointers that are known to be equivalent, and yet we
fail to return 0 for the range.  This is because we have no working
range-op entry for POINTER_DIFF_EXPR.  The entry we currently have is
a mere placeholder to avoid ignoring POINTER_DIFF_EXPR's so
adjust_pointer_diff_expr() could get a whack at it here:

//  def = __builtin_memchr (arg, 0, sz)
//  n = def - arg
//
// The range for N can be narrowed to [0, PTRDIFF_MAX - 1].

In theory... but do the non-equality relations make sense?   ie ptr1 < 
ptr2   ?   Perhaps there is no harm in those..  Probably undefined 
behaviour if they are not related so we can do whatever...



Andrew



Re: [PATCH] Do not assume loop header threading in backward threader.

2021-09-03 Thread Jeff Law via Gcc-patches




On 9/3/2021 7:57 AM, Aldy Hernandez wrote:

The registry's thread_through_all_blocks() has a may_peel_loop_headers
argument.  When refactoring the backward threader code, I removed this
argument for the local passthru method because it was always TRUE.  This
may not necessarily be true in the future, if the backward threader is
called from another context.  This patch removes the default definition,
in favor of an argument that is exactly the same as the identically
named function in tree-ssa-threadupdate.c.  I think this also makes it
less confusing when looking at both methods across the source base.

Tested on x86-64 Linux.

OK?

gcc/ChangeLog:

* tree-ssa-threadbackward.c (back_threader::thread_through_all_blocks):
(back_threader_registry::thread_through_all_blocks):
(try_thread_blocks):
(pass_early_thread_jumps::execute):
Presumably this is preparing for addressing some of the issues we 
discussed via email a little while ago -- ie to avoid having the 
backwards threader mucking up loops and allowing the loop optimizer to 
make more decisions about things like peeling because the loop optimizer 
has a cost model for this stuff whereas the jump threaders to not.


OK.

jeff



Re: [PATCH] Abstract PHI and forwarder block checks in jump threader.

2021-09-03 Thread Jeff Law via Gcc-patches




On 9/3/2021 7:56 AM, Aldy Hernandez wrote:

This patch abstracts out a couple common idioms in the forward
threader that I found useful while navigating the code base.

Tested on x86-64 Linux.

OK?

gcc/ChangeLog:

* tree-ssa-threadedge.c (has_phis_p): New.
(forwarder_block_p): New.
(potentially_threadable_block): Call forwarder_block_p.
(jump_threader::thread_around_empty_blocks): Call has_phis_p.
(jump_threader::thread_through_normal_block): Call
forwarder_block_p.

OK
jeff



Re: [PATCH] Improve backwards threader debugging dumps.

2021-09-03 Thread Jeff Law via Gcc-patches




On 9/3/2021 7:56 AM, Aldy Hernandez wrote:

This patch adds debugging helpers to the backwards threader.  I have
also noticed that profitable_path_p() can bail early on paths that
crosses loops and leave the dump of blocks incomplete.  Fixed as
well.

Unfortunately the new methods cannot be marked const, because we call
the solver's dump which is not const.  I believe this was because the
ranger dump calls m_cache.block_range().  This could probably use a
cleanup at a later time.

Tested on x86-64 Linux.

OK?

gcc/ChangeLog:

* tree-ssa-threadbackward.c (back_threader::dump): New.
(back_threader::debug): New.
(back_threader_profitability::profitable_path_p): Dump blocks
even if we are bailing early.

OK
jeff



Re: [PATCH] Dump reason why threads are being cancelled and abstract code.

2021-09-03 Thread Jeff Law via Gcc-patches




On 9/3/2021 7:55 AM, Aldy Hernandez wrote:

We are inconsistent on dumping out reasons why a thread was canceled.
This makes debugging jump threading problems harder because paths can be
canceled with no reason given.  This patch abstracts out the thread
canceling code and adds a reason for every cancellation.

Tested on x86-64 Linux.

OK?

gcc/ChangeLog:

* tree-ssa-threadupdate.c (cancel_thread): New.
(jump_thread_path_registry::thread_block_1): Use cancel_thread.
(jump_thread_path_registry::mark_threaded_blocks): Same.
(jump_thread_path_registry::register_jump_thread): Same.

Probably should have been added long ago.  OK
jeff



Re: Ping ^ 2: [PATCH] rs6000: Expand fmod and remainder when built with fast-math [PR97142]

2021-09-03 Thread David Edelsohn via Gcc-patches
On Thu, Sep 2, 2021 at 10:31 PM Xionghu Luo  wrote:
>
> Resend the patch that addressed Will's comments.
>
>
> fmod/fmodf and remainder/remainderf could be expanded instead of library
> call when fast-math build, which is much faster.
>
> fmodf:
>  fdivs   f0,f1,f2
>  frizf0,f0
>  fnmsubs f1,f2,f0,f1
>
> remainderf:
>  fdivs   f0,f1,f2
>  frinf0,f0
>  fnmsubs f1,f2,f0,f1
>
> SPEC2017 Ofast P8LE: 511.povray_r +1.14%,  526.blender_r +1.72%
>
> gcc/ChangeLog:
>
> 2021-09-03  Xionghu Luo  
>
> PR target/97142
> * config/rs6000/rs6000.md (fmod3): New define_expand.
> (remainder3): Likewise.
>
> gcc/testsuite/ChangeLog:
>
> 2021-09-03  Xionghu Luo  
>
> PR target/97142
> * gcc.target/powerpc/pr97142.c: New test.

Okay.

Thanks, David


Re: Ping ^ 2: [PATCH] rs6000: Expand fmod and remainder when built with fast-math [PR97142]

2021-09-03 Thread Bill Schmidt via Gcc-patches

Hi Xionghu,

This looks okay to me.  Recommend maintainers approve.

Thanks!
Bill

On 9/2/21 9:31 PM, Xionghu Luo wrote:

Resend the patch that addressed Will's comments.


fmod/fmodf and remainder/remainderf could be expanded instead of library
call when fast-math build, which is much faster.

fmodf:
  fdivs   f0,f1,f2
  frizf0,f0
  fnmsubs f1,f2,f0,f1

remainderf:
  fdivs   f0,f1,f2
  frinf0,f0
  fnmsubs f1,f2,f0,f1

SPEC2017 Ofast P8LE: 511.povray_r +1.14%,  526.blender_r +1.72%

gcc/ChangeLog:

2021-09-03  Xionghu Luo  

PR target/97142
* config/rs6000/rs6000.md (fmod3): New define_expand.
(remainder3): Likewise.

gcc/testsuite/ChangeLog:

2021-09-03  Xionghu Luo  

PR target/97142
* gcc.target/powerpc/pr97142.c: New test.
---
  gcc/config/rs6000/rs6000.md| 36 ++
  gcc/testsuite/gcc.target/powerpc/pr97142.c | 35 +
  2 files changed, 71 insertions(+)
  create mode 100644 gcc/testsuite/gcc.target/powerpc/pr97142.c

diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index c8cdc42533c..84820d3b5cb 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -4932,6 +4932,42 @@ (define_insn "fre"
[(set_attr "type" "fp")
 (set_attr "isa" "*,")])
  
+(define_expand "fmod3"

+  [(use (match_operand:SFDF 0 "gpc_reg_operand"))
+   (use (match_operand:SFDF 1 "gpc_reg_operand"))
+   (use (match_operand:SFDF 2 "gpc_reg_operand"))]
+  "TARGET_HARD_FLOAT
+  && TARGET_FPRND
+  && flag_unsafe_math_optimizations"
+{
+  rtx div = gen_reg_rtx (mode);
+  emit_insn (gen_div3 (div, operands[1], operands[2]));
+
+  rtx friz = gen_reg_rtx (mode);
+  emit_insn (gen_btrunc2 (friz, div));
+
+  emit_insn (gen_nfms4 (operands[0], operands[2], friz, operands[1]));
+  DONE;
+ })
+
+(define_expand "remainder3"
+  [(use (match_operand:SFDF 0 "gpc_reg_operand"))
+   (use (match_operand:SFDF 1 "gpc_reg_operand"))
+   (use (match_operand:SFDF 2 "gpc_reg_operand"))]
+  "TARGET_HARD_FLOAT
+  && TARGET_FPRND
+  && flag_unsafe_math_optimizations"
+{
+  rtx div = gen_reg_rtx (mode);
+  emit_insn (gen_div3 (div, operands[1], operands[2]));
+
+  rtx frin = gen_reg_rtx (mode);
+  emit_insn (gen_round2 (frin, div));
+
+  emit_insn (gen_nfms4 (operands[0], operands[2], frin, operands[1]));
+  DONE;
+ })
+
  (define_insn "*rsqrt2"
[(set (match_operand:SFDF 0 "gpc_reg_operand" "=,wa")
(unspec:SFDF [(match_operand:SFDF 1 "gpc_reg_operand" ",wa")]
diff --git a/gcc/testsuite/gcc.target/powerpc/pr97142.c 
b/gcc/testsuite/gcc.target/powerpc/pr97142.c
new file mode 100644
index 000..e5306eb681b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr97142.c
@@ -0,0 +1,35 @@
+/* { dg-do compile } */
+/* { dg-options "-Ofast" } */
+
+#include 
+
+float test1 (float x, float y)
+{
+  return fmodf (x, y);
+}
+
+double test2 (double x, double y)
+{
+  return fmod (x, y);
+}
+
+float test3 (float x, float y)
+{
+  return remainderf (x, y);
+}
+
+double test4 (double x, double y)
+{
+  return remainder (x, y);
+}
+
+/* { dg-final { scan-assembler-not {\mbl fmod\M} } } */
+/* { dg-final { scan-assembler-not {\mbl fmodf\M} } } */
+/* { dg-final { scan-assembler-not {\mbl remainder\M} } } */
+/* { dg-final { scan-assembler-not {\mbl remainderf\M} } } */
+/* { dg-final { scan-assembler-times {\mfdiv\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mfdivs\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mfnmsub\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mfnmsubs\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mfriz\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mfrin\M} 2 } } */


Re: [patch][version 8]add -ftrivial-auto-var-init and variable attribute "uninitialized" to gcc

2021-09-03 Thread Qing Zhao via Gcc-patches
Ping.

Qing

> On Aug 21, 2021, at 3:07 PM, Qing Zhao via Gcc-patches 
>  wrote:
> 
> Hi,
> 
> This is the 8th version of the patch for the new security feature for GCC.
> I have tested it with bootstrap on both x86 and aarch64, regression testing 
> on both x86 and aarch64.
> Also tested it with the kernel testing case provided by Kees.
> Also compile CPU2017 (running is ongoing), without any issue.
> 
> Please take a look at this patch and let me know any issues.
> 
> thanks.
> 
> Qing
> 
> In this version:
> 
> *Fix all issues raised by Richard B on 8/9: (compared with version 7)
> 
>  1. in "handle_uninitialized_attribute", exclude globals with additional 
> checking and warning message. Update corresponding testing cases for this 
> change c-c++-common/auto-init-9/10.c
>  2. add more clarification on the new argument "for_auto_init" for 
> "BUILT_IN_CLEAR_PADDING" in the comments part.
>  3. use auto_var_p to replace "VAR_P && !DECL_EXTERNAL && !TREE_STATIC";
> add a new helper routine "is_var_need_auto_init" to check whether a 
> variable need to be initialized.
>  4. move the "gimple_add_padding_init_for_auto_var" inside the routine 
> "gimplify_init_constructor";
>  5. restrict gimple_add_padding_init_for_auto_var only for INIT_EXPR;
>  6. in "gimplify.c" phase, generate a GENERIC call to DEFERRED_INIT + 
> gimplfiy_assign  instead of a gimple call to fully gimplify the call to the 
> .DEFERRED_INIT to avoid some potential issues.
>  7. in "internal-fn.c" phase, use helper routines "lang_hooks.types, 
> type_for_mode", "can_native_interpret_type_p", "native_interpret_expr" to 
> simplify the implementation for pattern generation.
>  8. move "generating tree node for __BUILTIN_CLEAR_PADDING" in tree.c;
>  9. cleanup tree-cfg.c.
>  10. testing cases updates.
>  for address taken variable -Wuninitialized warnings, add "xfail" for C 
> testing cases; deleting C++ testing cases since "xfail" does not work. 
> (FIXME, add them back if something like "xfail" works for C++.)
> 
> **Known issue:
> 
>  1. -Wuninitialized warnings migh not be issued for address taken auto 
> variables.
> please see https://gcc.gnu.org/pipermail/gcc-patches/2021-August/577431.html
> for details.
> 
> Plan: add a separate patch to improve -Wuninitialized on address taken 
> auto variables.
> 
> **NOTE:
> 
>  1. Changes in tree-sra.c  have been reviewed and approved by Martin Jambor.
>  2. the code changes should be very clean now;
>  3. The testing cases might need more review, I need comments and suggestions 
> on testing cases part. (Not feel very comfortable especially on the testing 
> cases for checking pattern initializations).
> 
> **Please see version 7 at:
> https://gcc.gnu.org/pipermail/gcc-patches/2021-July/576341.html
> 
> **ChangeLog is:
> gcc/ChangeLog:
> 
> 2021-08-21  qing zhao  
> 
>* builtins.c (expand_builtin_memset): Make external visible.
>* builtins.h (expand_builtin_memset): Declare extern.
>* common.opt (ftrivial-auto-var-init=): New option.
>* doc/extend.texi: Document the uninitialized attribute.
>* doc/invoke.texi: Document -ftrivial-auto-var-init.
>* flag-types.h (enum auto_init_type): New enumerated type
>auto_init_type.
>* gimple-fold.c (clear_padding_type): Add one new parameter.
>(clear_padding_union): Likewise.
>(clear_padding_emit_loop): Likewise.
>(clear_type_padding_in_mask): Likewise.
>(gimple_fold_builtin_clear_padding): Handle this new parameter.
>* gimplify.c (gimple_add_init_for_auto_var): New function.
>(gimple_add_padding_init_for_auto_var): New function.
>(is_var_need_auto_init): New function.
>(gimplify_decl_expr): Add initialization to automatic variables per
>users' requests.
>(gimplify_call_expr): Add one new parameter for call to
>__builtin_clear_padding.
>(gimplify_init_constructor): Add padding initialization in the end.
>* internal-fn.c (INIT_PATTERN_VALUE): New macro.
>(expand_DEFERRED_INIT): New function.
>* internal-fn.def (DEFERRED_INIT): New internal function.
>* tree-cfg.c (verify_gimple_call): Verify calls to .DEFERRED_INIT.
>* tree-sra.c (generate_subtree_deferred_init): New function.
>(scan_function): Avoid setting cannot_scalarize_away_bitmap for
>calls to .DEFERRED_INIT.
>(sra_modify_deferred_init): New function.
>(sra_modify_function_body): Handle calls to DEFERRED_INIT specially.
>* tree-ssa-structalias.c (find_func_aliases_for_call): Likewise.
>* tree-ssa-uninit.c (warn_uninit): Handle calls to DEFERRED_INIT
>specially.
>(check_defs): Likewise.
>(warn_uninitialized_vars): Likewise.
>* tree-ssa.c (ssa_undefined_value_p): Likewise.
>* tree.c (build_common_builtin_nodes): Build tree node for
>BUILT_IN_CLEAR_PADDING when needed.
> 
> 

[PATCH] Implement POINTER_DIFF_EXPR entry in range-op.

2021-09-03 Thread Aldy Hernandez via Gcc-patches
[Andrew, do you see any problem with using the minus relational code
here?  It seems like everything matches up.]

I've seen cases in the upcoming jump threader enhancements where we see
a difference of two pointers that are known to be equivalent, and yet we
fail to return 0 for the range.  This is because we have no working
range-op entry for POINTER_DIFF_EXPR.  The entry we currently have is
a mere placeholder to avoid ignoring POINTER_DIFF_EXPR's so
adjust_pointer_diff_expr() could get a whack at it here:

//  def = __builtin_memchr (arg, 0, sz)
//  n = def - arg
//
// The range for N can be narrowed to [0, PTRDIFF_MAX - 1].

This patch adds the relational magic to range-op, which we can just
steal from the minus_expr code.

Testing currently in progress.  I will commit pending tests.

gcc/ChangeLog:

* range-op.cc (operator_minus::op1_op2_relation_effect): Abstract
out to...
(minus_op1_op2_relation_effect): ...here.
(class operator_pointer_diff): New.
(operator_pointer_diff::op1_op2_relation_effect): Call
minus_op1_op2_relation_effect.
(integral_table::integral_table): Add entry for POINTER_DIFF_EXPR.
---
 gcc/range-op.cc | 45 ++---
 1 file changed, 38 insertions(+), 7 deletions(-)

diff --git a/gcc/range-op.cc b/gcc/range-op.cc
index fee0e834c23..5e37133026d 100644
--- a/gcc/range-op.cc
+++ b/gcc/range-op.cc
@@ -1372,13 +1372,14 @@ operator_minus::wi_fold (irange , tree type,
 }
 
 // Check to see if the relation REL between OP1 and OP2 has any effect on the
-// LHS of the expression.  If so, apply it to LHS_RANGE.
+// LHS of the expression.  If so, apply it to LHS_RANGE.  This is a helper
+// function for both MINUS_EXPR and POINTER_DIFF_EXPR.
 
-bool
-operator_minus::op1_op2_relation_effect (irange _range, tree type,
- const irange _range ATTRIBUTE_UNUSED,
- const irange _range ATTRIBUTE_UNUSED,
- relation_kind rel) const
+static bool
+minus_op1_op2_relation_effect (irange _range, tree type,
+  const irange _range ATTRIBUTE_UNUSED,
+  const irange _range ATTRIBUTE_UNUSED,
+  relation_kind rel)
 {
   if (rel == VREL_NONE)
 return false;
@@ -1440,6 +1441,16 @@ operator_minus::op1_op2_relation_effect (irange 
_range, tree type,
   return true;
 }
 
+bool
+operator_minus::op1_op2_relation_effect (irange _range, tree type,
+const irange _range,
+const irange _range,
+relation_kind rel) const
+{
+  return minus_op1_op2_relation_effect (lhs_range, type, op1_range, op2_range,
+   rel);
+}
+
 bool
 operator_minus::op1_range (irange , tree type,
   const irange ,
@@ -1459,6 +1470,26 @@ operator_minus::op2_range (irange , tree type,
 }
 
 
+class operator_pointer_diff : public range_operator
+{
+  virtual bool op1_op2_relation_effect (irange _range,
+   tree type,
+   const irange _range,
+   const irange _range,
+   relation_kind rel) const;
+} op_pointer_diff;
+
+bool
+operator_pointer_diff::op1_op2_relation_effect (irange _range, tree type,
+   const irange _range,
+   const irange _range,
+   relation_kind rel) const
+{
+  return minus_op1_op2_relation_effect (lhs_range, type, op1_range, op2_range,
+   rel);
+}
+
+
 class operator_min : public range_operator
 {
 public:
@@ -4018,7 +4049,7 @@ integral_table::integral_table ()
   set (OBJ_TYPE_REF, op_identity);
   set (IMAGPART_EXPR, op_unknown);
   set (REALPART_EXPR, op_unknown);
-  set (POINTER_DIFF_EXPR, op_unknown);
+  set (POINTER_DIFF_EXPR, op_pointer_diff);
   set (ABS_EXPR, op_abs);
   set (ABSU_EXPR, op_absu);
   set (NEGATE_EXPR, op_negate);
-- 
2.31.1



[PATCH] Abstract PHI and forwarder block checks in jump threader.

2021-09-03 Thread Aldy Hernandez via Gcc-patches
This patch abstracts out a couple common idioms in the forward
threader that I found useful while navigating the code base.

Tested on x86-64 Linux.

OK?

gcc/ChangeLog:

* tree-ssa-threadedge.c (has_phis_p): New.
(forwarder_block_p): New.
(potentially_threadable_block): Call forwarder_block_p.
(jump_threader::thread_around_empty_blocks): Call has_phis_p.
(jump_threader::thread_through_normal_block): Call
forwarder_block_p.
---
 gcc/tree-ssa-threadedge.c | 25 +++--
 1 file changed, 19 insertions(+), 6 deletions(-)

diff --git a/gcc/tree-ssa-threadedge.c b/gcc/tree-ssa-threadedge.c
index e57f6d3e39c..3db54a199fd 100644
--- a/gcc/tree-ssa-threadedge.c
+++ b/gcc/tree-ssa-threadedge.c
@@ -95,6 +95,21 @@ jump_threader::thread_through_all_blocks (bool 
may_peel_loop_headers)
   return m_registry->thread_through_all_blocks (may_peel_loop_headers);
 }
 
+static inline bool
+has_phis_p (basic_block bb)
+{
+  return !gsi_end_p (gsi_start_phis (bb));
+}
+
+/* Return TRUE for a forwarder block which is defined as having PHIs
+   but no instructions.  */
+
+static bool
+forwarder_block_p (basic_block bb)
+{
+  return gsi_end_p (gsi_start_nondebug_bb (bb)) && has_phis_p (bb);
+}
+
 /* Return TRUE if we may be able to thread an incoming edge into
BB to an outgoing edge from BB.  Return FALSE otherwise.  */
 
@@ -107,9 +122,8 @@ potentially_threadable_block (basic_block bb)
  not optimized away because they forward from outside a loop
  to the loop header.   We want to thread through them as we can
  sometimes thread to the loop exit, which is obviously profitable.
- the interesting case here is when the block has PHIs.  */
-  if (gsi_end_p (gsi_start_nondebug_bb (bb))
-  && !gsi_end_p (gsi_start_phis (bb)))
+ The interesting case here is when the block has PHIs.  */
+  if (forwarder_block_p (bb))
 return true;
 
   /* If BB has a single successor or a single predecessor, then
@@ -854,7 +868,7 @@ jump_threader::thread_around_empty_blocks 
(vec *path,
   /* The key property of these blocks is that they need not be duplicated
  when threading.  Thus they cannot have visible side effects such
  as PHI nodes.  */
-  if (!gsi_end_p (gsi_start_phis (bb)))
+  if (has_phis_p (bb))
 return false;
 
   /* Skip over DEBUG statements at the start of the block.  */
@@ -994,8 +1008,7 @@ jump_threader::thread_through_normal_block 
(vec *path,
 {
   /* First case.  The statement simply doesn't have any instructions, but
 does have PHIs.  */
-  if (gsi_end_p (gsi_start_nondebug_bb (e->dest))
- && !gsi_end_p (gsi_start_phis (e->dest)))
+  if (forwarder_block_p (e->dest))
return 0;
 
   /* Second case.  */
-- 
2.31.1



Re: [PATCH 8/8] coroutines: Make the continue handle visible to debug.

2021-09-03 Thread Jason Merrill via Gcc-patches

On 9/1/21 6:56 AM, Iain Sandoe wrote:


When we have a suspend method that returns a coroutine handle
we transfer (hopefully symmetrically, i.e. with a tailcall) to
that new coroutine instead of returning to our resumer.

This adds the variable to the outer block for the actor function
which means that '_Coro_actor_continue' is visible to debug.

Contributory to PR 99215.


OK.


Signed-off-by: Iain Sandoe 

gcc/cp/ChangeLog:

* coroutines.cc (build_actor_fn): Make _Coro_actor_continue
visible to debug.
---
  gcc/cp/coroutines.cc | 1 +
  1 file changed, 1 insertion(+)

diff --git a/gcc/cp/coroutines.cc b/gcc/cp/coroutines.cc
index 395e5c488e5..b32c5dc5e55 100644
--- a/gcc/cp/coroutines.cc
+++ b/gcc/cp/coroutines.cc
@@ -2148,6 +2148,7 @@ build_actor_fn (location_t loc, tree coro_frame_type, 
tree actor, tree fnbody,
 NULL_TREE);

BIND_EXPR_VARS (actor_bind) = continuation;

+  BLOCK_VARS (top_block) = BIND_EXPR_VARS (actor_bind) ;
  
/* Link in the block associated with the outer scope of the re-written

   function body.  */





Re: [PATCH 7/8] coroutines: Make proxy vars for the function arg copies.

2021-09-03 Thread Iain Sandoe



> On 3 Sep 2021, at 15:07, Jason Merrill via Gcc-patches 
>  wrote:
> 
> On 9/1/21 6:56 AM, Iain Sandoe wrote:
>> This adds top level proxy variables for the coroutine frame
>> copies of the original function args.  These are then available
>> in the debugger to refer to the frame copies.  We rewrite the
>> function body to use the copies, since the original parms will
>> no longer be in scope when the coroutine is running.
>> Signed-off-by: Iain Sandoe 
>> gcc/cp/ChangeLog:
>>  * coroutines.cc (struct param_info): Add copy_var.
>>  (build_actor_fn): Use simplified param references.
>>  (register_param_uses): Likewise.
>>  (rewrite_param_uses): Likewise.
>>  (analyze_fn_parms): New function.
>>  (coro_rewrite_function_body): Add proxies for the fn
>>  parameters to the outer bind scope of the rewritten code.
>>  (morph_fn_to_coro): Use simplified version of param ref.
>> ---
>>  gcc/cp/coroutines.cc | 247 ---
>>  1 file changed, 117 insertions(+), 130 deletions(-)
>> diff --git a/gcc/cp/coroutines.cc b/gcc/cp/coroutines.cc
>> index aacf352f1c9..395e5c488e5 100644
>> --- a/gcc/cp/coroutines.cc
>> +++ b/gcc/cp/coroutines.cc
>> @@ -1964,6 +1964,7 @@ transform_await_wrapper (tree *stmt, int *do_subtree, 
>> void *d)
>>  struct param_info
>>  {
>>tree field_id; /* The name of the copy in the coroutine frame.  */
>> +  tree copy_var; /* The local var proxy for the frame copy.  */
>>vec *body_uses; /* Worklist of uses, void if there are none.  */
>>tree frame_type;   /* The type used to represent this parm in the frame.  
>> */
>>tree orig_type;/* The original type of the parm (not as passed).  */
>> @@ -2169,36 +2170,6 @@ build_actor_fn (location_t loc, tree coro_frame_type, 
>> tree actor, tree fnbody,
>>/* Declare the continuation handle.  */
>>add_decl_expr (continuation);
>>  -  /* Re-write param references in the body, no code should be generated
>> - here.  */
>> -  if (DECL_ARGUMENTS (orig))
>> -{
>> -  tree arg;
>> -  for (arg = DECL_ARGUMENTS (orig); arg != NULL; arg = DECL_CHAIN (arg))
>> -{
>> -  bool existed;
>> -  param_info  = param_uses->get_or_insert (arg, );
>> -  if (!parm.body_uses)
>> -continue; /* Wasn't used in the original function body.  */
>> -
>> -  tree fld_ref = lookup_member (coro_frame_type, parm.field_id,
>> -/*protect=*/1, /*want_type=*/0,
>> -tf_warning_or_error);
>> -  tree fld_idx = build3_loc (loc, COMPONENT_REF, parm.frame_type,
>> - actor_frame, fld_ref, NULL_TREE);
>> -
>> -  /* We keep these in the frame as a regular pointer, so convert that
>> -   back to the type expected.  */
>> -  if (parm.pt_ref)
>> -fld_idx = build1_loc (loc, CONVERT_EXPR, TREE_TYPE (arg), fld_idx);
>> -
>> -  int i;
>> -  tree *puse;
>> -  FOR_EACH_VEC_ELT (*parm.body_uses, i, puse)
>> -*puse = fld_idx;
>> -}
>> -}
>> -
>>/* Re-write local vars, similarly.  */
>>local_vars_transform xform_vars_data
>>  = {actor, actor_frame, coro_frame_type, loc, local_var_uses};
>> @@ -3772,11 +3743,11 @@ struct param_frame_data
>>bool param_seen;
>>  };
>>  -/* A tree-walk callback that records the use of parameters (to allow for
>> -   optimizations where handling unused parameters may be omitted).  */
>> +/* A tree walk callback that rewrites each parm use to the local variable
>> +   that represents its copy in the frame.  */
>>static tree
>> -register_param_uses (tree *stmt, int *do_subtree ATTRIBUTE_UNUSED, void *d)
>> +rewrite_param_uses (tree *stmt, int *do_subtree ATTRIBUTE_UNUSED, void *d)
>>  {
>>param_frame_data *data = (param_frame_data *) d;
>>  @@ -3784,7 +3755,7 @@ register_param_uses (tree *stmt, int *do_subtree 
>> ATTRIBUTE_UNUSED, void *d)
>>if (TREE_CODE (*stmt) == VAR_DECL && DECL_HAS_VALUE_EXPR_P (*stmt))
>>  {
>>tree t = DECL_VALUE_EXPR (*stmt);
>> -  return cp_walk_tree (, register_param_uses, d, NULL);
>> +  return cp_walk_tree (, rewrite_param_uses, d, NULL);
>>  }
>>  if (TREE_CODE (*stmt) != PARM_DECL)
>> @@ -3798,16 +3769,87 @@ register_param_uses (tree *stmt, int *do_subtree 
>> ATTRIBUTE_UNUSED, void *d)
>>param_info  = data->param_uses->get_or_insert (*stmt, );
>>gcc_checking_assert (existed);
>>  -  if (!parm.body_uses)
>> +  *stmt = parm.copy_var;
>> +  return NULL_TREE;
>> +}
>> +
>> +/* Build up a set of info that determines how each param copy will be
>> +   handled.  */
>> +
>> +static hash_map *analyze_fn_parms (tree orig)
> 
> Function name should be on a new line.

oops.
> 
>> +{
>> +  if (!DECL_ARGUMENTS (orig))
>> +return NULL;
>> +
>> +  hash_map *param_uses = new hash_map;
>> +
>> +  /* Build a hash map with an entry for each param.
>> + The key is the param tree.
>> + Then we have an 

Re: [PATCH 6/8] coroutines: Convert implementation variables to debug-friendly form.

2021-09-03 Thread Iain Sandoe



> On 3 Sep 2021, at 15:12, Jason Merrill  wrote:
> 
> On 9/3/21 9:56 AM, Iain Sandoe wrote:
>>> On 3 Sep 2021, at 14:52, Jason Merrill  wrote:
>>> 
>>> On 9/1/21 6:55 AM, Iain Sandoe wrote:
 The user might well wish to inspect some of the state that represents
 the implementation of the coroutine machine.
 In particular:
   The promise object.
   The function pointers for the resumer and destroyer.
   The current resume index (suspend point).
   The handle that represent this coroutine 'self handle'.
   Whether the coroutine frame is allocated and needs to be freed.
 These variables are given names that can be 'well-known' and advertised
 in debug documentation - they are placed in the implementation namespace
 and all begin with _Coro_.
>>> 
 Signed-off-by: Iain Sandoe 
 gcc/cp/ChangeLog:
* coroutines.cc (transform_await_expr): Use debug-friendly
names for coroutine implementation.
(build_actor_fn): Likewise.
(build_destroy_fn): Likewise.
(coro_rewrite_function_body): Likewise.
(morph_fn_to_coro): Likewise.
>>> 
>>> Hmm, this patch doesn't seem to match the description and ChangeLog entry 
>>> other than in the names of the functions changed.
>> with 20:20 hindsight I should have squashed the (several) patches related to 
>> the implementation symbols,
>> I’ll redo the description - essentially, this is just making use of the 
>> simplification available because we now have pre-defined values for the 
>> field names.
> 
> I can see how that describes a few lines in this patch, but not for instance 
> the change to transform_await_expr, which seems to have nothing to do with 
> names?

it’s indirect indeed - but the changes we’ve made to the variable handling mean 
that we no longer need to rewrite the
proxy vars into their frame->offset; that is handled by the DECL_VALUE_EXPR (as 
for user’s vars ) - the change to transform_await_expr is removing the now 
defunct substitution (and poorly described, sorry).

> But yes, moving the changed lines that just use the variables from the 
> previous patch into that commit sounds good.  I use rebase -i for that sort 
> of thing all the time.

yeah, me too -  I realised too late that this series could have had more 
squashing - if it would make things easier I could do that for the patches 
related to implemenation variables .. - which would include patch 8 (but not 
patch 7 which is related to parms only)


 ---
  gcc/cp/coroutines.cc | 214 +++
  1 file changed, 94 insertions(+), 120 deletions(-)
 diff --git a/gcc/cp/coroutines.cc b/gcc/cp/coroutines.cc
 index 3b46aac4dc5..aacf352f1c9 100644
 --- a/gcc/cp/coroutines.cc
 +++ b/gcc/cp/coroutines.cc
 @@ -1906,7 +1906,6 @@ transform_await_expr (tree await_expr, 
 await_xform_data *xform)
/* So, on entry, we have:
   in : CO_AWAIT_EXPR (a, e_proxy, o, awr_call_vector, mode)
  We no longer need a [it had diagnostic value, maybe?]
 -We need to replace the promise proxy in all elements
  We need to replace the e_proxy in the awr_call.  */
  tree coro_frame_type = TREE_TYPE (xform->actor_frame);
 @@ -1932,16 +1931,6 @@ transform_await_expr (tree await_expr, 
 await_xform_data *xform)
TREE_OPERAND (await_expr, 1) = as;
  }
  -  /* Now do the self_handle.  */
 -  data.from = xform->self_h_proxy;
 -  data.to = xform->real_self_h;
 -  cp_walk_tree (_expr, replace_proxy, , NULL);
 -
 -  /* Now do the promise.  */
 -  data.from = xform->promise_proxy;
 -  data.to = xform->real_promise;
 -  cp_walk_tree (_expr, replace_proxy, , NULL);
 -
return await_expr;
  }
  @@ -2128,10 +2117,9 @@ coro_get_frame_dtor (tree coro_fp, tree orig, tree 
 frame_size,
static void
  build_actor_fn (location_t loc, tree coro_frame_type, tree actor, tree 
 fnbody,
 -  tree orig, hash_map *param_uses,
 -  hash_map *local_var_uses,
 -  vec *param_dtor_list, tree resume_fn_field,
 -  tree resume_idx_field, unsigned body_count, tree frame_size)
 +  tree orig, hash_map *local_var_uses,
 +  vec *param_dtor_list,
 +  tree resume_idx_var, unsigned body_count, tree frame_size)
  {
verify_stmt_tree (fnbody);
/* Some things we inherit from the original function.  */
 @@ -2216,8 +2204,8 @@ build_actor_fn (location_t loc, tree 
 coro_frame_type, tree actor, tree fnbody,
  = {actor, actor_frame, coro_frame_type, loc, local_var_uses};
cp_walk_tree (, transform_local_var_uses, _vars_data, 
 NULL);
  -  tree rat_field = lookup_member (coro_frame_type, 
 coro_resume_index_field, 1, 0,
 -tf_warning_or_error);
 +  tree rat_field = lookup_member (coro_frame_type, 
 

Re: [PATCH 6/8] coroutines: Convert implementation variables to debug-friendly form.

2021-09-03 Thread Jason Merrill via Gcc-patches

On 9/3/21 9:56 AM, Iain Sandoe wrote:




On 3 Sep 2021, at 14:52, Jason Merrill  wrote:

On 9/1/21 6:55 AM, Iain Sandoe wrote:

The user might well wish to inspect some of the state that represents
the implementation of the coroutine machine.
In particular:
   The promise object.
   The function pointers for the resumer and destroyer.
   The current resume index (suspend point).
   The handle that represent this coroutine 'self handle'.
   Whether the coroutine frame is allocated and needs to be freed.
These variables are given names that can be 'well-known' and advertised
in debug documentation - they are placed in the implementation namespace
and all begin with _Coro_.



Signed-off-by: Iain Sandoe 
gcc/cp/ChangeLog:
* coroutines.cc (transform_await_expr): Use debug-friendly
names for coroutine implementation.
(build_actor_fn): Likewise.
(build_destroy_fn): Likewise.
(coro_rewrite_function_body): Likewise.
(morph_fn_to_coro): Likewise.


Hmm, this patch doesn't seem to match the description and ChangeLog entry other 
than in the names of the functions changed.


with 20:20 hindsight I should have squashed the (several) patches related to 
the implementation symbols,

I’ll redo the description - essentially, this is just making use of the 
simplification available because we now have pre-defined values for the field 
names.


I can see how that describes a few lines in this patch, but not for 
instance the change to transform_await_expr, which seems to have nothing 
to do with names?


But yes, moving the changed lines that just use the variables from the 
previous patch into that commit sounds good.  I use rebase -i for that 
sort of thing all the time.



---
  gcc/cp/coroutines.cc | 214 +++
  1 file changed, 94 insertions(+), 120 deletions(-)
diff --git a/gcc/cp/coroutines.cc b/gcc/cp/coroutines.cc
index 3b46aac4dc5..aacf352f1c9 100644
--- a/gcc/cp/coroutines.cc
+++ b/gcc/cp/coroutines.cc
@@ -1906,7 +1906,6 @@ transform_await_expr (tree await_expr, await_xform_data 
*xform)
/* So, on entry, we have:
   in : CO_AWAIT_EXPR (a, e_proxy, o, awr_call_vector, mode)
  We no longer need a [it had diagnostic value, maybe?]
- We need to replace the promise proxy in all elements
  We need to replace the e_proxy in the awr_call.  */
  tree coro_frame_type = TREE_TYPE (xform->actor_frame);
@@ -1932,16 +1931,6 @@ transform_await_expr (tree await_expr, await_xform_data 
*xform)
TREE_OPERAND (await_expr, 1) = as;
  }
  -  /* Now do the self_handle.  */
-  data.from = xform->self_h_proxy;
-  data.to = xform->real_self_h;
-  cp_walk_tree (_expr, replace_proxy, , NULL);
-
-  /* Now do the promise.  */
-  data.from = xform->promise_proxy;
-  data.to = xform->real_promise;
-  cp_walk_tree (_expr, replace_proxy, , NULL);
-
return await_expr;
  }
  @@ -2128,10 +2117,9 @@ coro_get_frame_dtor (tree coro_fp, tree orig, tree 
frame_size,
static void
  build_actor_fn (location_t loc, tree coro_frame_type, tree actor, tree fnbody,
-   tree orig, hash_map *param_uses,
-   hash_map *local_var_uses,
-   vec *param_dtor_list, tree resume_fn_field,
-   tree resume_idx_field, unsigned body_count, tree frame_size)
+   tree orig, hash_map *local_var_uses,
+   vec *param_dtor_list,
+   tree resume_idx_var, unsigned body_count, tree frame_size)
  {
verify_stmt_tree (fnbody);
/* Some things we inherit from the original function.  */
@@ -2216,8 +2204,8 @@ build_actor_fn (location_t loc, tree coro_frame_type, 
tree actor, tree fnbody,
  = {actor, actor_frame, coro_frame_type, loc, local_var_uses};
cp_walk_tree (, transform_local_var_uses, _vars_data, NULL);
  -  tree rat_field = lookup_member (coro_frame_type, coro_resume_index_field, 
1, 0,
- tf_warning_or_error);
+  tree rat_field = lookup_member (coro_frame_type, coro_resume_index_field,
+ 1, 0, tf_warning_or_error);
tree rat = build3 (COMPONENT_REF, short_unsigned_type_node, actor_frame,
 rat_field, NULL_TREE);
  @@ -2319,14 +2307,8 @@ build_actor_fn (location_t loc, tree coro_frame_type, 
tree actor, tree fnbody,
tree r = build_stmt (loc, LABEL_EXPR, actor_begin_label);
add_stmt (r);
  -  /* actor's version of the promise.  */
-  tree ap_m = lookup_member (coro_frame_type, get_identifier 
("_Coro_promise"), 1, 0,
-tf_warning_or_error);
-  tree ap = build_class_member_access_expr (actor_frame, ap_m, NULL_TREE, 
false,
-   tf_warning_or_error);
-
/* actor's coroutine 'self handle'.  */
-  tree ash_m = lookup_member (coro_frame_type, get_identifier 
("_Coro_self_handle"), 1,
+  tree ash_m = lookup_member (coro_frame_type, coro_self_handle_field, 1,
  0, 

Re: [PATCH 7/8] coroutines: Make proxy vars for the function arg copies.

2021-09-03 Thread Jason Merrill via Gcc-patches

On 9/1/21 6:56 AM, Iain Sandoe wrote:


This adds top level proxy variables for the coroutine frame
copies of the original function args.  These are then available
in the debugger to refer to the frame copies.  We rewrite the
function body to use the copies, since the original parms will
no longer be in scope when the coroutine is running.

Signed-off-by: Iain Sandoe 

gcc/cp/ChangeLog:

* coroutines.cc (struct param_info): Add copy_var.
(build_actor_fn): Use simplified param references.
(register_param_uses): Likewise.
(rewrite_param_uses): Likewise.
(analyze_fn_parms): New function.
(coro_rewrite_function_body): Add proxies for the fn
parameters to the outer bind scope of the rewritten code.
(morph_fn_to_coro): Use simplified version of param ref.
---
  gcc/cp/coroutines.cc | 247 ---
  1 file changed, 117 insertions(+), 130 deletions(-)

diff --git a/gcc/cp/coroutines.cc b/gcc/cp/coroutines.cc
index aacf352f1c9..395e5c488e5 100644
--- a/gcc/cp/coroutines.cc
+++ b/gcc/cp/coroutines.cc
@@ -1964,6 +1964,7 @@ transform_await_wrapper (tree *stmt, int *do_subtree, 
void *d)
  struct param_info
  {
tree field_id; /* The name of the copy in the coroutine frame.  */
+  tree copy_var; /* The local var proxy for the frame copy.  */
vec *body_uses; /* Worklist of uses, void if there are none.  */
tree frame_type;   /* The type used to represent this parm in the frame.  */
tree orig_type;/* The original type of the parm (not as passed).  */
@@ -2169,36 +2170,6 @@ build_actor_fn (location_t loc, tree coro_frame_type, 
tree actor, tree fnbody,
/* Declare the continuation handle.  */
add_decl_expr (continuation);
  
-  /* Re-write param references in the body, no code should be generated

- here.  */
-  if (DECL_ARGUMENTS (orig))
-{
-  tree arg;
-  for (arg = DECL_ARGUMENTS (orig); arg != NULL; arg = DECL_CHAIN (arg))
-   {
- bool existed;
- param_info  = param_uses->get_or_insert (arg, );
- if (!parm.body_uses)
-   continue; /* Wasn't used in the original function body.  */
-
- tree fld_ref = lookup_member (coro_frame_type, parm.field_id,
-   /*protect=*/1, /*want_type=*/0,
-   tf_warning_or_error);
- tree fld_idx = build3_loc (loc, COMPONENT_REF, parm.frame_type,
-actor_frame, fld_ref, NULL_TREE);
-
- /* We keep these in the frame as a regular pointer, so convert that
-  back to the type expected.  */
- if (parm.pt_ref)
-   fld_idx = build1_loc (loc, CONVERT_EXPR, TREE_TYPE (arg), fld_idx);
-
- int i;
- tree *puse;
- FOR_EACH_VEC_ELT (*parm.body_uses, i, puse)
-   *puse = fld_idx;
-   }
-}
-
/* Re-write local vars, similarly.  */
local_vars_transform xform_vars_data
  = {actor, actor_frame, coro_frame_type, loc, local_var_uses};
@@ -3772,11 +3743,11 @@ struct param_frame_data
bool param_seen;
  };
  
-/* A tree-walk callback that records the use of parameters (to allow for

-   optimizations where handling unused parameters may be omitted).  */
+/* A tree walk callback that rewrites each parm use to the local variable
+   that represents its copy in the frame.  */
  
  static tree

-register_param_uses (tree *stmt, int *do_subtree ATTRIBUTE_UNUSED, void *d)
+rewrite_param_uses (tree *stmt, int *do_subtree ATTRIBUTE_UNUSED, void *d)
  {
param_frame_data *data = (param_frame_data *) d;
  
@@ -3784,7 +3755,7 @@ register_param_uses (tree *stmt, int *do_subtree ATTRIBUTE_UNUSED, void *d)

if (TREE_CODE (*stmt) == VAR_DECL && DECL_HAS_VALUE_EXPR_P (*stmt))
  {
tree t = DECL_VALUE_EXPR (*stmt);
-  return cp_walk_tree (, register_param_uses, d, NULL);
+  return cp_walk_tree (, rewrite_param_uses, d, NULL);
  }
  
if (TREE_CODE (*stmt) != PARM_DECL)

@@ -3798,16 +3769,87 @@ register_param_uses (tree *stmt, int *do_subtree 
ATTRIBUTE_UNUSED, void *d)
param_info  = data->param_uses->get_or_insert (*stmt, );
gcc_checking_assert (existed);
  
-  if (!parm.body_uses)

+  *stmt = parm.copy_var;
+  return NULL_TREE;
+}
+
+/* Build up a set of info that determines how each param copy will be
+   handled.  */
+
+static hash_map *analyze_fn_parms (tree orig)


Function name should be on a new line.


+{
+  if (!DECL_ARGUMENTS (orig))
+return NULL;
+
+  hash_map *param_uses = new hash_map;
+
+  /* Build a hash map with an entry for each param.
+ The key is the param tree.
+ Then we have an entry for the frame field name.
+ Then a cache for the field ref when we come to use it.
+ Then a tree list of the uses.
+ The second two entries start out empty - and only get populated
+ when we see uses.  */
+  bool lambda_p = LAMBDA_FUNCTION_P (orig);
+
+  unsigned no_name_parm = 

[PATCH] Abstract PHI and forwarder block checks in jump threader.

2021-09-03 Thread Aldy Hernandez via Gcc-patches
This patch abstracts out a couple common idioms in the forward
threader that I found useful while navigating the code base.

Tested on x86-64 Linux.

OK?

gcc/ChangeLog:

* tree-ssa-threadedge.c (has_phis_p): New.
(forwarder_block_p): New.
(potentially_threadable_block): Call forwarder_block_p.
(jump_threader::thread_around_empty_blocks): Call has_phis_p.
(jump_threader::thread_through_normal_block): Call
forwarder_block_p.
---
 gcc/tree-ssa-threadedge.c | 25 +++--
 1 file changed, 19 insertions(+), 6 deletions(-)

diff --git a/gcc/tree-ssa-threadedge.c b/gcc/tree-ssa-threadedge.c
index e57f6d3e39c..3db54a199fd 100644
--- a/gcc/tree-ssa-threadedge.c
+++ b/gcc/tree-ssa-threadedge.c
@@ -95,6 +95,21 @@ jump_threader::thread_through_all_blocks (bool 
may_peel_loop_headers)
   return m_registry->thread_through_all_blocks (may_peel_loop_headers);
 }
 
+static inline bool
+has_phis_p (basic_block bb)
+{
+  return !gsi_end_p (gsi_start_phis (bb));
+}
+
+/* Return TRUE for a forwarder block which is defined as having PHIs
+   but no instructions.  */
+
+static bool
+forwarder_block_p (basic_block bb)
+{
+  return gsi_end_p (gsi_start_nondebug_bb (bb)) && has_phis_p (bb);
+}
+
 /* Return TRUE if we may be able to thread an incoming edge into
BB to an outgoing edge from BB.  Return FALSE otherwise.  */
 
@@ -107,9 +122,8 @@ potentially_threadable_block (basic_block bb)
  not optimized away because they forward from outside a loop
  to the loop header.   We want to thread through them as we can
  sometimes thread to the loop exit, which is obviously profitable.
- the interesting case here is when the block has PHIs.  */
-  if (gsi_end_p (gsi_start_nondebug_bb (bb))
-  && !gsi_end_p (gsi_start_phis (bb)))
+ The interesting case here is when the block has PHIs.  */
+  if (forwarder_block_p (bb))
 return true;
 
   /* If BB has a single successor or a single predecessor, then
@@ -854,7 +868,7 @@ jump_threader::thread_around_empty_blocks 
(vec *path,
   /* The key property of these blocks is that they need not be duplicated
  when threading.  Thus they cannot have visible side effects such
  as PHI nodes.  */
-  if (!gsi_end_p (gsi_start_phis (bb)))
+  if (has_phis_p (bb))
 return false;
 
   /* Skip over DEBUG statements at the start of the block.  */
@@ -994,8 +1008,7 @@ jump_threader::thread_through_normal_block 
(vec *path,
 {
   /* First case.  The statement simply doesn't have any instructions, but
 does have PHIs.  */
-  if (gsi_end_p (gsi_start_nondebug_bb (e->dest))
- && !gsi_end_p (gsi_start_phis (e->dest)))
+  if (forwarder_block_p (e->dest))
return 0;
 
   /* Second case.  */
-- 
2.31.1



[PATCH] Do not assume loop header threading in backward threader.

2021-09-03 Thread Aldy Hernandez via Gcc-patches
The registry's thread_through_all_blocks() has a may_peel_loop_headers
argument.  When refactoring the backward threader code, I removed this
argument for the local passthru method because it was always TRUE.  This
may not necessarily be true in the future, if the backward threader is
called from another context.  This patch removes the default definition,
in favor of an argument that is exactly the same as the identically
named function in tree-ssa-threadupdate.c.  I think this also makes it
less confusing when looking at both methods across the source base.

Tested on x86-64 Linux.

OK?

gcc/ChangeLog:

* tree-ssa-threadbackward.c (back_threader::thread_through_all_blocks):
(back_threader_registry::thread_through_all_blocks):
(try_thread_blocks):
(pass_early_thread_jumps::execute):
---
 gcc/tree-ssa-threadbackward.c | 16 
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/gcc/tree-ssa-threadbackward.c b/gcc/tree-ssa-threadbackward.c
index b9a0d9a60ad..2fa22f8e328 100644
--- a/gcc/tree-ssa-threadbackward.c
+++ b/gcc/tree-ssa-threadbackward.c
@@ -53,7 +53,7 @@ class back_threader_registry
 public:
   back_threader_registry (int max_allowable_paths);
   bool register_path (const vec &, edge taken);
-  bool thread_through_all_blocks ();
+  bool thread_through_all_blocks (bool may_peel_loop_headers);
 private:
   jump_thread_path_registry m_lowlevel_registry;
   const int m_max_allowable_paths;
@@ -80,7 +80,7 @@ public:
   back_threader (bool speed_p);
   ~back_threader ();
   void maybe_thread_block (basic_block bb);
-  bool thread_through_all_blocks ();
+  bool thread_through_all_blocks (bool may_peel_loop_headers);
 private:
   void find_paths (basic_block bb, tree name);
   void maybe_register_path (edge taken_edge);
@@ -497,9 +497,9 @@ back_threader::maybe_thread_block (basic_block bb)
 // Perform the actual jump threading for the all queued paths.
 
 bool
-back_threader::thread_through_all_blocks ()
+back_threader::thread_through_all_blocks (bool may_peel_loop_headers)
 {
-  return m_registry.thread_through_all_blocks ();
+  return m_registry.thread_through_all_blocks (may_peel_loop_headers);
 }
 
 // Dump a sequence of BBs through the CFG.
@@ -553,9 +553,9 @@ back_threader_registry::back_threader_registry (int 
max_allowable_paths)
 }
 
 bool
-back_threader_registry::thread_through_all_blocks ()
+back_threader_registry::thread_through_all_blocks (bool may_peel_loop_headers)
 {
-  return m_lowlevel_registry.thread_through_all_blocks (true);
+  return m_lowlevel_registry.thread_through_all_blocks (may_peel_loop_headers);
 }
 
 /* Examine jump threading path PATH and return TRUE if it is profitable to
@@ -947,7 +947,7 @@ try_thread_blocks (function *fun)
   if (EDGE_COUNT (bb->succs) > 1)
threader.maybe_thread_block (bb);
 }
-  return threader.thread_through_all_blocks ();
+  return threader.thread_through_all_blocks (/*peel_loop_headers=*/true);
 }
 
 unsigned int
@@ -1016,7 +1016,7 @@ pass_early_thread_jumps::execute (function *fun)
   if (EDGE_COUNT (bb->succs) > 1)
threader.maybe_thread_block (bb);
 }
-  threader.thread_through_all_blocks ();
+  threader.thread_through_all_blocks (/*peel_loop_headers=*/true);
 
   loop_optimizer_finalize ();
   return 0;
-- 
2.31.1



Re: [PATCH 6/8] coroutines: Convert implementation variables to debug-friendly form.

2021-09-03 Thread Iain Sandoe



> On 3 Sep 2021, at 14:52, Jason Merrill  wrote:
> 
> On 9/1/21 6:55 AM, Iain Sandoe wrote:
>> The user might well wish to inspect some of the state that represents
>> the implementation of the coroutine machine.
>> In particular:
>>   The promise object.
>>   The function pointers for the resumer and destroyer.
>>   The current resume index (suspend point).
>>   The handle that represent this coroutine 'self handle'.
>>   Whether the coroutine frame is allocated and needs to be freed.
>> These variables are given names that can be 'well-known' and advertised
>> in debug documentation - they are placed in the implementation namespace
>> and all begin with _Coro_.
> 
>> Signed-off-by: Iain Sandoe 
>> gcc/cp/ChangeLog:
>>  * coroutines.cc (transform_await_expr): Use debug-friendly
>>  names for coroutine implementation.
>>  (build_actor_fn): Likewise.
>>  (build_destroy_fn): Likewise.
>>  (coro_rewrite_function_body): Likewise.
>>  (morph_fn_to_coro): Likewise.
> 
> Hmm, this patch doesn't seem to match the description and ChangeLog entry 
> other than in the names of the functions changed.

with 20:20 hindsight I should have squashed the (several) patches related to 
the implementation symbols, 

I’ll redo the description - essentially, this is just making use of the 
simplification available because we now have pre-defined values for the field 
names.

>> ---
>>  gcc/cp/coroutines.cc | 214 +++
>>  1 file changed, 94 insertions(+), 120 deletions(-)
>> diff --git a/gcc/cp/coroutines.cc b/gcc/cp/coroutines.cc
>> index 3b46aac4dc5..aacf352f1c9 100644
>> --- a/gcc/cp/coroutines.cc
>> +++ b/gcc/cp/coroutines.cc
>> @@ -1906,7 +1906,6 @@ transform_await_expr (tree await_expr, 
>> await_xform_data *xform)
>>/* So, on entry, we have:
>>   in : CO_AWAIT_EXPR (a, e_proxy, o, awr_call_vector, mode)
>>We no longer need a [it had diagnostic value, maybe?]
>> -  We need to replace the promise proxy in all elements
>>We need to replace the e_proxy in the awr_call.  */
>>  tree coro_frame_type = TREE_TYPE (xform->actor_frame);
>> @@ -1932,16 +1931,6 @@ transform_await_expr (tree await_expr, 
>> await_xform_data *xform)
>>TREE_OPERAND (await_expr, 1) = as;
>>  }
>>  -  /* Now do the self_handle.  */
>> -  data.from = xform->self_h_proxy;
>> -  data.to = xform->real_self_h;
>> -  cp_walk_tree (_expr, replace_proxy, , NULL);
>> -
>> -  /* Now do the promise.  */
>> -  data.from = xform->promise_proxy;
>> -  data.to = xform->real_promise;
>> -  cp_walk_tree (_expr, replace_proxy, , NULL);
>> -
>>return await_expr;
>>  }
>>  @@ -2128,10 +2117,9 @@ coro_get_frame_dtor (tree coro_fp, tree orig, tree 
>> frame_size,
>>static void
>>  build_actor_fn (location_t loc, tree coro_frame_type, tree actor, tree 
>> fnbody,
>> -tree orig, hash_map *param_uses,
>> -hash_map *local_var_uses,
>> -vec *param_dtor_list, tree resume_fn_field,
>> -tree resume_idx_field, unsigned body_count, tree frame_size)
>> +tree orig, hash_map *local_var_uses,
>> +vec *param_dtor_list,
>> +tree resume_idx_var, unsigned body_count, tree frame_size)
>>  {
>>verify_stmt_tree (fnbody);
>>/* Some things we inherit from the original function.  */
>> @@ -2216,8 +2204,8 @@ build_actor_fn (location_t loc, tree coro_frame_type, 
>> tree actor, tree fnbody,
>>  = {actor, actor_frame, coro_frame_type, loc, local_var_uses};
>>cp_walk_tree (, transform_local_var_uses, _vars_data, NULL);
>>  -  tree rat_field = lookup_member (coro_frame_type, 
>> coro_resume_index_field, 1, 0,
>> -  tf_warning_or_error);
>> +  tree rat_field = lookup_member (coro_frame_type, coro_resume_index_field,
>> +  1, 0, tf_warning_or_error);
>>tree rat = build3 (COMPONENT_REF, short_unsigned_type_node, actor_frame,
>>   rat_field, NULL_TREE);
>>  @@ -2319,14 +2307,8 @@ build_actor_fn (location_t loc, tree 
>> coro_frame_type, tree actor, tree fnbody,
>>tree r = build_stmt (loc, LABEL_EXPR, actor_begin_label);
>>add_stmt (r);
>>  -  /* actor's version of the promise.  */
>> -  tree ap_m = lookup_member (coro_frame_type, get_identifier 
>> ("_Coro_promise"), 1, 0,
>> - tf_warning_or_error);
>> -  tree ap = build_class_member_access_expr (actor_frame, ap_m, NULL_TREE, 
>> false,
>> -tf_warning_or_error);
>> -
>>/* actor's coroutine 'self handle'.  */
>> -  tree ash_m = lookup_member (coro_frame_type, get_identifier 
>> ("_Coro_self_handle"), 1,
>> +  tree ash_m = lookup_member (coro_frame_type, coro_self_handle_field, 1,
>>0, tf_warning_or_error);
>>tree ash = build_class_member_access_expr (actor_frame, ash_m, NULL_TREE,
>>   false, tf_warning_or_error);
>> @@ 

[PATCH] Improve backwards threader debugging dumps.

2021-09-03 Thread Aldy Hernandez via Gcc-patches
This patch adds debugging helpers to the backwards threader.  I have
also noticed that profitable_path_p() can bail early on paths that
crosses loops and leave the dump of blocks incomplete.  Fixed as
well.

Unfortunately the new methods cannot be marked const, because we call
the solver's dump which is not const.  I believe this was because the
ranger dump calls m_cache.block_range().  This could probably use a
cleanup at a later time.

Tested on x86-64 Linux.

OK?

gcc/ChangeLog:

* tree-ssa-threadbackward.c (back_threader::dump): New.
(back_threader::debug): New.
(back_threader_profitability::profitable_path_p): Dump blocks
even if we are bailing early.
---
 gcc/tree-ssa-threadbackward.c | 35 +++
 1 file changed, 35 insertions(+)

diff --git a/gcc/tree-ssa-threadbackward.c b/gcc/tree-ssa-threadbackward.c
index 3aad1493c4d..b9a0d9a60ad 100644
--- a/gcc/tree-ssa-threadbackward.c
+++ b/gcc/tree-ssa-threadbackward.c
@@ -42,6 +42,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "gimple-range-path.h"
 #include "ssa.h"
 #include "tree-cfgcleanup.h"
+#include "tree-pretty-print.h"
 
 // Path registry for the backwards threader.  After all paths have been
 // registered with register_path(), thread_through_all_blocks() is called
@@ -89,6 +90,8 @@ private:
   edge find_taken_edge (const vec );
   edge find_taken_edge_cond (const vec , gcond *);
   edge find_taken_edge_switch (const vec , gswitch *);
+  virtual void debug ();
+  virtual void dump (FILE *out);
 
   back_threader_registry m_registry;
   back_threader_profitability m_profit;
@@ -519,6 +522,30 @@ debug (const vec  )
   dump_path (stderr, path);
 }
 
+void
+back_threader::dump (FILE *out)
+{
+  m_solver.dump (out);
+  fprintf (out, "\nCandidates for pre-computation:\n");
+  fprintf (out, "===\n");
+
+  bitmap_iterator bi;
+  unsigned i;
+
+  EXECUTE_IF_SET_IN_BITMAP (m_imports, 0, i, bi)
+{
+  tree name = ssa_name (i);
+  print_generic_expr (out, name, TDF_NONE);
+  fprintf (out, "\n");
+}
+}
+
+void
+back_threader::debug ()
+{
+  dump (stderr);
+}
+
 back_threader_registry::back_threader_registry (int max_allowable_paths)
   : m_max_allowable_paths (max_allowable_paths)
 {
@@ -607,6 +634,14 @@ back_threader_profitability::profitable_path_p (const 
vec _path,
  if (bb->loop_father != loop)
{
  path_crosses_loops = true;
+
+ // Dump rest of blocks.
+ if (dump_file && (dump_flags & TDF_DETAILS))
+   for (j++; j < m_path.length (); j++)
+ {
+   bb = m_path[j];
+   fprintf (dump_file, " bb:%i", bb->index);
+ }
  break;
}
 
-- 
2.31.1



[PATCH] Dump reason why threads are being cancelled and abstract code.

2021-09-03 Thread Aldy Hernandez via Gcc-patches
We are inconsistent on dumping out reasons why a thread was canceled.
This makes debugging jump threading problems harder because paths can be
canceled with no reason given.  This patch abstracts out the thread
canceling code and adds a reason for every cancellation.

Tested on x86-64 Linux.

OK?

gcc/ChangeLog:

* tree-ssa-threadupdate.c (cancel_thread): New.
(jump_thread_path_registry::thread_block_1): Use cancel_thread.
(jump_thread_path_registry::mark_threaded_blocks): Same.
(jump_thread_path_registry::register_jump_thread): Same.
---
 gcc/tree-ssa-threadupdate.c | 56 +++--
 1 file changed, 29 insertions(+), 27 deletions(-)

diff --git a/gcc/tree-ssa-threadupdate.c b/gcc/tree-ssa-threadupdate.c
index 1d32a0230fb..18f16efbb7a 100644
--- a/gcc/tree-ssa-threadupdate.c
+++ b/gcc/tree-ssa-threadupdate.c
@@ -245,6 +245,23 @@ debug (const vec *path)
   debug (*path);
 }
 
+/* Release the memory associated with PATH, and if dumping is enabled,
+   dump out the reason why the thread was canceled.  */
+
+static void
+cancel_thread (vec *path, const char *reason = NULL)
+{
+  if (dump_file && (dump_flags & TDF_DETAILS))
+{
+  if (reason)
+   fprintf (dump_file, "%s:\n", reason);
+
+  dump_jump_thread_path (dump_file, *path, false);
+  fprintf (dump_file, "\n");
+}
+  path->release ();
+}
+
 /* Simple hashing function.  For any given incoming edge E, we're going
to be most concerned with the final destination of its jump thread
path.  So hash on the block index of the final edge in the path.  */
@@ -1449,7 +1466,7 @@ jump_thread_path_registry::thread_block_1 (basic_block bb,
  /* Since this case is not handled by our special code
 to thread through a loop header, we must explicitly
 cancel the threading request here.  */
- path->release ();
+ cancel_thread (path, "Threading through unhandled loop header");
  e->aux = NULL;
  continue;
}
@@ -1488,7 +1505,7 @@ jump_thread_path_registry::thread_block_1 (basic_block bb,
 
  if (i != path->length ())
{
- path->release ();
+ cancel_thread (path, "Threading through loop exit");
  e->aux = NULL;
  continue;
}
@@ -1847,7 +1864,7 @@ fail:
 
   if (path)
{
- path->release ();
+ cancel_thread (path, "Failure in thread_through_loop_header");
  e->aux = NULL;
}
 }
@@ -1975,9 +1992,7 @@ jump_thread_path_registry::mark_threaded_blocks (bitmap 
threaded_blocks)
  else
{
  m_paths.unordered_remove (i);
- if (dump_file && (dump_flags & TDF_DETAILS))
-   dump_jump_thread_path (dump_file, *path, false);
- path->release ();
+ cancel_thread (path);
}
}
   else
@@ -2012,9 +2027,7 @@ jump_thread_path_registry::mark_threaded_blocks (bitmap 
threaded_blocks)
{
  e->aux = NULL;
  m_paths.unordered_remove (i);
- if (dump_file && (dump_flags & TDF_DETAILS))
-   dump_jump_thread_path (dump_file, *path, false);
- path->release ();
+ cancel_thread (path);
}
}
   else
@@ -2060,9 +2073,7 @@ jump_thread_path_registry::mark_threaded_blocks (bitmap 
threaded_blocks)
 
if (j != path->length ())
  {
-   if (dump_file && (dump_flags & TDF_DETAILS))
- dump_jump_thread_path (dump_file, *path, false);
-   path->release ();
+   cancel_thread (path);
e->aux = NULL;
  }
else
@@ -2109,7 +2120,7 @@ jump_thread_path_registry::mark_threaded_blocks (bitmap 
threaded_blocks)
 
  if (e2 && !phi_args_equal_on_edges (e2, final_edge))
{
- path->release ();
+ cancel_thread (path);
  e->aux = NULL;
}
}
@@ -2332,9 +2343,7 @@ jump_thread_path_registry::adjust_paths_after_duplication
  if (j == cand_path->length ())
{
remove_candidate_from_list:
- if (dump_file && (dump_flags & TDF_DETAILS))
-   fprintf (dump_file, "adjusted candidate: [EMPTY]\n");
- cand_path->release ();
+ cancel_thread (cand_path, "Adjusted candidate is EMPTY");
  m_paths.unordered_remove (cand_path_num);
  continue;
}
@@ -2595,7 +2604,7 @@ jump_thread_path_registry::thread_through_all_blocks
 
if (j != path->length ())
  {
-   path->release ();
+   cancel_thread (path, "Thread references removed edge");
m_paths.unordered_remove (i);
continue;
   

Re: [PATCH 6/8] coroutines: Convert implementation variables to debug-friendly form.

2021-09-03 Thread Jason Merrill via Gcc-patches

On 9/1/21 6:55 AM, Iain Sandoe wrote:


The user might well wish to inspect some of the state that represents
the implementation of the coroutine machine.

In particular:
   The promise object.
   The function pointers for the resumer and destroyer.
   The current resume index (suspend point).
   The handle that represent this coroutine 'self handle'.
   Whether the coroutine frame is allocated and needs to be freed.

These variables are given names that can be 'well-known' and advertised
in debug documentation - they are placed in the implementation namespace
and all begin with _Coro_.



Signed-off-by: Iain Sandoe 

gcc/cp/ChangeLog:

* coroutines.cc (transform_await_expr): Use debug-friendly
names for coroutine implementation.
(build_actor_fn): Likewise.
(build_destroy_fn): Likewise.
(coro_rewrite_function_body): Likewise.
(morph_fn_to_coro): Likewise.


Hmm, this patch doesn't seem to match the description and ChangeLog 
entry other than in the names of the functions changed.



---
  gcc/cp/coroutines.cc | 214 +++
  1 file changed, 94 insertions(+), 120 deletions(-)

diff --git a/gcc/cp/coroutines.cc b/gcc/cp/coroutines.cc
index 3b46aac4dc5..aacf352f1c9 100644
--- a/gcc/cp/coroutines.cc
+++ b/gcc/cp/coroutines.cc
@@ -1906,7 +1906,6 @@ transform_await_expr (tree await_expr, await_xform_data 
*xform)
/* So, on entry, we have:
   in : CO_AWAIT_EXPR (a, e_proxy, o, awr_call_vector, mode)
  We no longer need a [it had diagnostic value, maybe?]
- We need to replace the promise proxy in all elements
  We need to replace the e_proxy in the awr_call.  */
  
tree coro_frame_type = TREE_TYPE (xform->actor_frame);

@@ -1932,16 +1931,6 @@ transform_await_expr (tree await_expr, await_xform_data 
*xform)
TREE_OPERAND (await_expr, 1) = as;
  }
  
-  /* Now do the self_handle.  */

-  data.from = xform->self_h_proxy;
-  data.to = xform->real_self_h;
-  cp_walk_tree (_expr, replace_proxy, , NULL);
-
-  /* Now do the promise.  */
-  data.from = xform->promise_proxy;
-  data.to = xform->real_promise;
-  cp_walk_tree (_expr, replace_proxy, , NULL);
-
return await_expr;
  }
  
@@ -2128,10 +2117,9 @@ coro_get_frame_dtor (tree coro_fp, tree orig, tree frame_size,
  
  static void

  build_actor_fn (location_t loc, tree coro_frame_type, tree actor, tree fnbody,
-   tree orig, hash_map *param_uses,
-   hash_map *local_var_uses,
-   vec *param_dtor_list, tree resume_fn_field,
-   tree resume_idx_field, unsigned body_count, tree frame_size)
+   tree orig, hash_map *local_var_uses,
+   vec *param_dtor_list,
+   tree resume_idx_var, unsigned body_count, tree frame_size)
  {
verify_stmt_tree (fnbody);
/* Some things we inherit from the original function.  */
@@ -2216,8 +2204,8 @@ build_actor_fn (location_t loc, tree coro_frame_type, 
tree actor, tree fnbody,
  = {actor, actor_frame, coro_frame_type, loc, local_var_uses};
cp_walk_tree (, transform_local_var_uses, _vars_data, NULL);
  
-  tree rat_field = lookup_member (coro_frame_type, coro_resume_index_field, 1, 0,

- tf_warning_or_error);
+  tree rat_field = lookup_member (coro_frame_type, coro_resume_index_field,
+ 1, 0, tf_warning_or_error);
tree rat = build3 (COMPONENT_REF, short_unsigned_type_node, actor_frame,
 rat_field, NULL_TREE);
  
@@ -2319,14 +2307,8 @@ build_actor_fn (location_t loc, tree coro_frame_type, tree actor, tree fnbody,

tree r = build_stmt (loc, LABEL_EXPR, actor_begin_label);
add_stmt (r);
  
-  /* actor's version of the promise.  */

-  tree ap_m = lookup_member (coro_frame_type, get_identifier 
("_Coro_promise"), 1, 0,
-tf_warning_or_error);
-  tree ap = build_class_member_access_expr (actor_frame, ap_m, NULL_TREE, 
false,
-   tf_warning_or_error);
-
/* actor's coroutine 'self handle'.  */
-  tree ash_m = lookup_member (coro_frame_type, get_identifier 
("_Coro_self_handle"), 1,
+  tree ash_m = lookup_member (coro_frame_type, coro_self_handle_field, 1,
  0, tf_warning_or_error);
tree ash = build_class_member_access_expr (actor_frame, ash_m, NULL_TREE,
 false, tf_warning_or_error);
@@ -2347,36 +2329,13 @@ build_actor_fn (location_t loc, tree coro_frame_type, 
tree actor, tree fnbody,
   decide where to put things.  */
  
await_xform_data xform

-= {actor, actor_frame, promise_proxy, ap, self_h_proxy, ash};
+= {actor, actor_frame, NULL_TREE, NULL_TREE, self_h_proxy, ash};
  
/* Transform the await expressions in the function body.  Only do each

   await tree once!  */
hash_set pset;
cp_walk_tree (, transform_await_wrapper, , );
  
-  /* Now replace 

[COMMITTED] Avoid using unavailable objects in jt_state.

2021-09-03 Thread Aldy Hernandez via Gcc-patches
[Jeff: I'm going to go out on a limb here and commit this under the
obvious rule.  If I am overstepping my obvious powers, please let me
know.]

The jump threading state is about to get more interesting, and it may
get with a ranger or with the const_copies/etc helpers.  This patch
makes sure we have an object before we attempt to call push_marker or
pop_to_marker.

Tested on x86-64 Linux.

gcc/ChangeLog:

* tree-ssa-threadedge.c (jt_state::push): Only call methods for
which objects are available.
(jt_state::pop): Same.
(jt_state::register_equiv): Same.
(jt_state::register_equivs_on_edge): Same.
---
 gcc/tree-ssa-threadedge.c | 18 --
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/gcc/tree-ssa-threadedge.c b/gcc/tree-ssa-threadedge.c
index 8a485125ff5..e57f6d3e39c 100644
--- a/gcc/tree-ssa-threadedge.c
+++ b/gcc/tree-ssa-threadedge.c
@@ -1323,8 +1323,10 @@ jt_state::jt_state (const_and_copies *copies,
 void
 jt_state::push (edge)
 {
-  m_copies->push_marker ();
-  m_exprs->push_marker ();
+  if (m_copies)
+m_copies->push_marker ();
+  if (m_exprs)
+m_exprs->push_marker ();
   if (m_evrp)
 m_evrp->push_marker ();
 }
@@ -1334,8 +1336,10 @@ jt_state::push (edge)
 void
 jt_state::pop ()
 {
-  m_copies->pop_to_marker ();
-  m_exprs->pop_to_marker ();
+  if (m_copies)
+m_copies->pop_to_marker ();
+  if (m_exprs)
+m_exprs->pop_to_marker ();
   if (m_evrp)
 m_evrp->pop_to_marker ();
 }
@@ -1346,7 +1350,8 @@ jt_state::pop ()
 void
 jt_state::register_equiv (tree dst, tree src, bool update_range)
 {
-  m_copies->record_const_or_copy (dst, src);
+  if (m_copies)
+m_copies->record_const_or_copy (dst, src);
 
   /* If requested, update the value range associated with DST, using
  the range from SRC.  */
@@ -1396,7 +1401,8 @@ jt_state::record_ranges_from_stmt (gimple *stmt, bool 
temporary)
 void
 jt_state::register_equivs_on_edge (edge e)
 {
-  record_temporary_equivalences (e, m_copies, m_exprs);
+  if (m_copies && m_exprs)
+record_temporary_equivalences (e, m_copies, m_exprs);
 }
 
 jump_threader_simplifier::jump_threader_simplifier (vr_values *v)
-- 
2.31.1



Re: [PATCH 5/8] coroutines: Define and populate accessors for debug state.

2021-09-03 Thread Jason Merrill via Gcc-patches

On 9/3/21 9:42 AM, Iain Sandoe wrote:




On 3 Sep 2021, at 14:39, Jason Merrill  wrote:

On 9/1/21 6:54 AM, Iain Sandoe wrote:

This is an efficiency measure and repeats the pattern used for
other identifiers used in the coroutine implementation.
In support of debugging, the user might well need to look at some
of the variables that the implementation manipulates in lowering
the coroutines.  The defines the identifiers for these and populates
them on demand (avoiding repeated identifier calls).
Contributory to debug support (PR 99215)
Signed-off-by: Iain Sandoe 
gcc/cp/ChangeLog:
* coroutines.cc: Add identifiers for implementation
variables that we want to expose to debug.
(coro_init_identifiers): Initialize implementation names.
(coro_promise_type_found_p): Use pre-built identifiers.
(build_actor_fn): Likewise.
(build_destroy_fn): Likewise.
---
  gcc/cp/coroutines.cc | 32 
  1 file changed, 24 insertions(+), 8 deletions(-)
diff --git a/gcc/cp/coroutines.cc b/gcc/cp/coroutines.cc
index 081e1a46c63..3b46aac4dc5 100644
--- a/gcc/cp/coroutines.cc
+++ b/gcc/cp/coroutines.cc
@@ -215,7 +215,17 @@ static GTY(()) tree coro_await_ready_identifier;
  static GTY(()) tree coro_await_suspend_identifier;
  static GTY(()) tree coro_await_resume_identifier;
  -/* Create the identifiers used by the coroutines library interfaces.  */
+/* Accessors for the coroutine frame state used by the implementation.  */
+
+static GTY(()) tree coro_resume_fn_field;
+static GTY(()) tree coro_destroy_fn_field;
+static GTY(()) tree coro_promise_field;
+static GTY(()) tree coro_frame_needs_free_field;
+static GTY(()) tree coro_resume_index_field;
+static GTY(()) tree coro_self_handle_field;


Since these are identifiers, not FIELD_DECLs, calling them *_field seems 
misleading.


I could append _id or _name .. they were just getting long already.
(they are names of fields, so that would not be misleading)


_id works for me, either with or without the _field.

Jason




+/* Create the identifiers used by the coroutines library interfaces and
+   the implementation frame state.  */
static void
  coro_init_identifiers ()
@@ -241,6 +251,14 @@ coro_init_identifiers ()
coro_await_ready_identifier = get_identifier ("await_ready");
coro_await_suspend_identifier = get_identifier ("await_suspend");
coro_await_resume_identifier = get_identifier ("await_resume");
+
+  /* Coroutine state frame field accessors.  */
+  coro_resume_fn_field = get_identifier ("_Coro_resume_fn");
+  coro_destroy_fn_field = get_identifier ("_Coro_destroy_fn");
+  coro_promise_field = get_identifier ("_Coro_promise");
+  coro_frame_needs_free_field = get_identifier ("_Coro_frame_needs_free");
+  coro_resume_index_field = get_identifier ("_Coro_resume_index");
+  coro_self_handle_field = get_identifier ("_Coro_self_handle");
  }
/* Trees we only need to set up once.  */
@@ -513,12 +531,12 @@ coro_promise_type_found_p (tree fndecl, location_t loc)
/* Build a proxy for a handle to "self" as the param to
 await_suspend() calls.  */
coro_info->self_h_proxy
-   = build_lang_decl (VAR_DECL, get_identifier ("_Coro_self_handle"),
+   = build_lang_decl (VAR_DECL, coro_self_handle_field,
   coro_info->handle_type);
  /* Build a proxy for the promise so that we can perform lookups.  */
coro_info->promise_proxy
-   = build_lang_decl (VAR_DECL, get_identifier ("_Coro_promise"),
+   = build_lang_decl (VAR_DECL, coro_promise_field,
   coro_info->promise_type);
  /* Note where we first saw a coroutine keyword.  */
@@ -2198,8 +2216,7 @@ build_actor_fn (location_t loc, tree coro_frame_type, 
tree actor, tree fnbody,
  = {actor, actor_frame, coro_frame_type, loc, local_var_uses};
cp_walk_tree (, transform_local_var_uses, _vars_data, NULL);
  -  tree resume_idx_name = get_identifier ("_Coro_resume_index");
-  tree rat_field = lookup_member (coro_frame_type, resume_idx_name, 1, 0,
+  tree rat_field = lookup_member (coro_frame_type, coro_resume_index_field, 1, 
0,
  tf_warning_or_error);
tree rat = build3 (COMPONENT_REF, short_unsigned_type_node, actor_frame,
 rat_field, NULL_TREE);
@@ -2462,7 +2479,7 @@ build_actor_fn (location_t loc, tree coro_frame_type, 
tree actor, tree fnbody,
  /* We will need to know which resume point number should be encoded.  */
tree res_idx_m
-= lookup_member (coro_frame_type, resume_idx_name,
+= lookup_member (coro_frame_type, coro_resume_index_field,
 /*protect=*/1, /*want_type=*/0, tf_warning_or_error);
tree resume_pt_number
  = build_class_member_access_expr (actor_frame, res_idx_m, NULL_TREE, 
false,
@@ -2504,8 +2521,7 @@ build_destroy_fn (location_t loc, tree coro_frame_type, 
tree destroy,
  tree destr_frame = build1 (INDIRECT_REF, 

[COMMITTED] Do not release state location until after path registry.

2021-09-03 Thread Aldy Hernandez via Gcc-patches
We are popping state and then calling the registry code.  This causes
the registry to have incorrect information.  This isn't visible in
current trunk, but will be an issue when I submit further enhancements
to the threading code.  However, it is a cleanup on its own so I am
pushing it now.

Tested on x86-64 Linux.

Committed as obvious.

gcc/ChangeLog:

* tree-ssa-threadedge.c (jump_threader::thread_across_edge):
Move pop until after a thread is registered.
---
 gcc/tree-ssa-threadedge.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/tree-ssa-threadedge.c b/gcc/tree-ssa-threadedge.c
index 37ee5c11be3..8a485125ff5 100644
--- a/gcc/tree-ssa-threadedge.c
+++ b/gcc/tree-ssa-threadedge.c
@@ -1155,9 +1155,9 @@ jump_threader::thread_across_edge (edge e)
 {
   propagate_threaded_block_debug_into (path->last ()->e->dest,
   e->dest);
-  m_state->pop ();
   BITMAP_FREE (visited);
   m_registry->register_jump_thread (path);
+  m_state->pop ();
   return;
 }
   else
-- 
2.31.1



Re: [PATCH 5/8] coroutines: Define and populate accessors for debug state.

2021-09-03 Thread Iain Sandoe



> On 3 Sep 2021, at 14:39, Jason Merrill  wrote:
> 
> On 9/1/21 6:54 AM, Iain Sandoe wrote:
>> This is an efficiency measure and repeats the pattern used for
>> other identifiers used in the coroutine implementation.
>> In support of debugging, the user might well need to look at some
>> of the variables that the implementation manipulates in lowering
>> the coroutines.  The defines the identifiers for these and populates
>> them on demand (avoiding repeated identifier calls).
>> Contributory to debug support (PR 99215)
>> Signed-off-by: Iain Sandoe 
>> gcc/cp/ChangeLog:
>>  * coroutines.cc: Add identifiers for implementation
>>  variables that we want to expose to debug.
>>  (coro_init_identifiers): Initialize implementation names.
>>  (coro_promise_type_found_p): Use pre-built identifiers.
>>  (build_actor_fn): Likewise.
>>  (build_destroy_fn): Likewise.
>> ---
>>  gcc/cp/coroutines.cc | 32 
>>  1 file changed, 24 insertions(+), 8 deletions(-)
>> diff --git a/gcc/cp/coroutines.cc b/gcc/cp/coroutines.cc
>> index 081e1a46c63..3b46aac4dc5 100644
>> --- a/gcc/cp/coroutines.cc
>> +++ b/gcc/cp/coroutines.cc
>> @@ -215,7 +215,17 @@ static GTY(()) tree coro_await_ready_identifier;
>>  static GTY(()) tree coro_await_suspend_identifier;
>>  static GTY(()) tree coro_await_resume_identifier;
>>  -/* Create the identifiers used by the coroutines library interfaces.  */
>> +/* Accessors for the coroutine frame state used by the implementation.  */
>> +
>> +static GTY(()) tree coro_resume_fn_field;
>> +static GTY(()) tree coro_destroy_fn_field;
>> +static GTY(()) tree coro_promise_field;
>> +static GTY(()) tree coro_frame_needs_free_field;
>> +static GTY(()) tree coro_resume_index_field;
>> +static GTY(()) tree coro_self_handle_field;
> 
> Since these are identifiers, not FIELD_DECLs, calling them *_field seems 
> misleading.

I could append _id or _name .. they were just getting long already.
(they are names of fields, so that would not be misleading)

Iain


> 
>> +/* Create the identifiers used by the coroutines library interfaces and
>> +   the implementation frame state.  */
>>static void
>>  coro_init_identifiers ()
>> @@ -241,6 +251,14 @@ coro_init_identifiers ()
>>coro_await_ready_identifier = get_identifier ("await_ready");
>>coro_await_suspend_identifier = get_identifier ("await_suspend");
>>coro_await_resume_identifier = get_identifier ("await_resume");
>> +
>> +  /* Coroutine state frame field accessors.  */
>> +  coro_resume_fn_field = get_identifier ("_Coro_resume_fn");
>> +  coro_destroy_fn_field = get_identifier ("_Coro_destroy_fn");
>> +  coro_promise_field = get_identifier ("_Coro_promise");
>> +  coro_frame_needs_free_field = get_identifier ("_Coro_frame_needs_free");
>> +  coro_resume_index_field = get_identifier ("_Coro_resume_index");
>> +  coro_self_handle_field = get_identifier ("_Coro_self_handle");
>>  }
>>/* Trees we only need to set up once.  */
>> @@ -513,12 +531,12 @@ coro_promise_type_found_p (tree fndecl, location_t loc)
>>/* Build a proxy for a handle to "self" as the param to
>>   await_suspend() calls.  */
>>coro_info->self_h_proxy
>> -= build_lang_decl (VAR_DECL, get_identifier ("_Coro_self_handle"),
>> += build_lang_decl (VAR_DECL, coro_self_handle_field,
>> coro_info->handle_type);
>>  /* Build a proxy for the promise so that we can perform lookups.  */
>>coro_info->promise_proxy
>> -= build_lang_decl (VAR_DECL, get_identifier ("_Coro_promise"),
>> += build_lang_decl (VAR_DECL, coro_promise_field,
>> coro_info->promise_type);
>>  /* Note where we first saw a coroutine keyword.  */
>> @@ -2198,8 +2216,7 @@ build_actor_fn (location_t loc, tree coro_frame_type, 
>> tree actor, tree fnbody,
>>  = {actor, actor_frame, coro_frame_type, loc, local_var_uses};
>>cp_walk_tree (, transform_local_var_uses, _vars_data, NULL);
>>  -  tree resume_idx_name = get_identifier ("_Coro_resume_index");
>> -  tree rat_field = lookup_member (coro_frame_type, resume_idx_name, 1, 0,
>> +  tree rat_field = lookup_member (coro_frame_type, coro_resume_index_field, 
>> 1, 0,
>>tf_warning_or_error);
>>tree rat = build3 (COMPONENT_REF, short_unsigned_type_node, actor_frame,
>>   rat_field, NULL_TREE);
>> @@ -2462,7 +2479,7 @@ build_actor_fn (location_t loc, tree coro_frame_type, 
>> tree actor, tree fnbody,
>>  /* We will need to know which resume point number should be encoded.  */
>>tree res_idx_m
>> -= lookup_member (coro_frame_type, resume_idx_name,
>> += lookup_member (coro_frame_type, coro_resume_index_field,
>>   /*protect=*/1, /*want_type=*/0, tf_warning_or_error);
>>tree resume_pt_number
>>  = build_class_member_access_expr (actor_frame, res_idx_m, NULL_TREE, 
>> false,
>> @@ -2504,8 +2521,7 @@ build_destroy_fn 

Re: [PATCH 5/8] coroutines: Define and populate accessors for debug state.

2021-09-03 Thread Iain Sandoe



> On 3 Sep 2021, at 14:39, Jason Merrill  wrote:
> 
> On 9/1/21 6:54 AM, Iain Sandoe wrote:
>> This is an efficiency measure and repeats the pattern used for
>> other identifiers used in the coroutine implementation.
>> In support of debugging, the user might well need to look at some
>> of the variables that the implementation manipulates in lowering
>> the coroutines.  The defines the identifiers for these and populates
>> them on demand (avoiding repeated identifier calls).
>> Contributory to debug support (PR 99215)
>> Signed-off-by: Iain Sandoe 
>> gcc/cp/ChangeLog:
>>  * coroutines.cc: Add identifiers for implementation
>>  variables that we want to expose to debug.
>>  (coro_init_identifiers): Initialize implementation names.
>>  (coro_promise_type_found_p): Use pre-built identifiers.
>>  (build_actor_fn): Likewise.
>>  (build_destroy_fn): Likewise.
>> ---
>>  gcc/cp/coroutines.cc | 32 
>>  1 file changed, 24 insertions(+), 8 deletions(-)
>> diff --git a/gcc/cp/coroutines.cc b/gcc/cp/coroutines.cc
>> index 081e1a46c63..3b46aac4dc5 100644
>> --- a/gcc/cp/coroutines.cc
>> +++ b/gcc/cp/coroutines.cc
>> @@ -215,7 +215,17 @@ static GTY(()) tree coro_await_ready_identifier;
>>  static GTY(()) tree coro_await_suspend_identifier;
>>  static GTY(()) tree coro_await_resume_identifier;
>>  -/* Create the identifiers used by the coroutines library interfaces.  */
>> +/* Accessors for the coroutine frame state used by the implementation.  */
>> +
>> +static GTY(()) tree coro_resume_fn_field;
>> +static GTY(()) tree coro_destroy_fn_field;
>> +static GTY(()) tree coro_promise_field;
>> +static GTY(()) tree coro_frame_needs_free_field;
>> +static GTY(()) tree coro_resume_index_field;
>> +static GTY(()) tree coro_self_handle_field;
> 
> Since these are identifiers, not FIELD_DECLs, calling them *_field seems 
> misleading.

> 
>> +/* Create the identifiers used by the coroutines library interfaces and
>> +   the implementation frame state.  */
>>static void
>>  coro_init_identifiers ()
>> @@ -241,6 +251,14 @@ coro_init_identifiers ()
>>coro_await_ready_identifier = get_identifier ("await_ready");
>>coro_await_suspend_identifier = get_identifier ("await_suspend");
>>coro_await_resume_identifier = get_identifier ("await_resume");
>> +
>> +  /* Coroutine state frame field accessors.  */
>> +  coro_resume_fn_field = get_identifier ("_Coro_resume_fn");
>> +  coro_destroy_fn_field = get_identifier ("_Coro_destroy_fn");
>> +  coro_promise_field = get_identifier ("_Coro_promise");
>> +  coro_frame_needs_free_field = get_identifier ("_Coro_frame_needs_free");
>> +  coro_resume_index_field = get_identifier ("_Coro_resume_index");
>> +  coro_self_handle_field = get_identifier ("_Coro_self_handle");
>>  }
>>/* Trees we only need to set up once.  */
>> @@ -513,12 +531,12 @@ coro_promise_type_found_p (tree fndecl, location_t loc)
>>/* Build a proxy for a handle to "self" as the param to
>>   await_suspend() calls.  */
>>coro_info->self_h_proxy
>> -= build_lang_decl (VAR_DECL, get_identifier ("_Coro_self_handle"),
>> += build_lang_decl (VAR_DECL, coro_self_handle_field,
>> coro_info->handle_type);
>>  /* Build a proxy for the promise so that we can perform lookups.  */
>>coro_info->promise_proxy
>> -= build_lang_decl (VAR_DECL, get_identifier ("_Coro_promise"),
>> += build_lang_decl (VAR_DECL, coro_promise_field,
>> coro_info->promise_type);
>>  /* Note where we first saw a coroutine keyword.  */
>> @@ -2198,8 +2216,7 @@ build_actor_fn (location_t loc, tree coro_frame_type, 
>> tree actor, tree fnbody,
>>  = {actor, actor_frame, coro_frame_type, loc, local_var_uses};
>>cp_walk_tree (, transform_local_var_uses, _vars_data, NULL);
>>  -  tree resume_idx_name = get_identifier ("_Coro_resume_index");
>> -  tree rat_field = lookup_member (coro_frame_type, resume_idx_name, 1, 0,
>> +  tree rat_field = lookup_member (coro_frame_type, coro_resume_index_field, 
>> 1, 0,
>>tf_warning_or_error);
>>tree rat = build3 (COMPONENT_REF, short_unsigned_type_node, actor_frame,
>>   rat_field, NULL_TREE);
>> @@ -2462,7 +2479,7 @@ build_actor_fn (location_t loc, tree coro_frame_type, 
>> tree actor, tree fnbody,
>>  /* We will need to know which resume point number should be encoded.  */
>>tree res_idx_m
>> -= lookup_member (coro_frame_type, resume_idx_name,
>> += lookup_member (coro_frame_type, coro_resume_index_field,
>>   /*protect=*/1, /*want_type=*/0, tf_warning_or_error);
>>tree resume_pt_number
>>  = build_class_member_access_expr (actor_frame, res_idx_m, NULL_TREE, 
>> false,
>> @@ -2504,8 +2521,7 @@ build_destroy_fn (location_t loc, tree 
>> coro_frame_type, tree destroy,
>>  tree destr_frame = build1 (INDIRECT_REF, coro_frame_type, destr_fp);
>>  -  tree 

Re: [PATCH 5/8] coroutines: Define and populate accessors for debug state.

2021-09-03 Thread Jason Merrill via Gcc-patches

On 9/1/21 6:54 AM, Iain Sandoe wrote:


This is an efficiency measure and repeats the pattern used for
other identifiers used in the coroutine implementation.

In support of debugging, the user might well need to look at some
of the variables that the implementation manipulates in lowering
the coroutines.  The defines the identifiers for these and populates
them on demand (avoiding repeated identifier calls).

Contributory to debug support (PR 99215)

Signed-off-by: Iain Sandoe 

gcc/cp/ChangeLog:

* coroutines.cc: Add identifiers for implementation
variables that we want to expose to debug.
(coro_init_identifiers): Initialize implementation names.
(coro_promise_type_found_p): Use pre-built identifiers.
(build_actor_fn): Likewise.
(build_destroy_fn): Likewise.
---
  gcc/cp/coroutines.cc | 32 
  1 file changed, 24 insertions(+), 8 deletions(-)

diff --git a/gcc/cp/coroutines.cc b/gcc/cp/coroutines.cc
index 081e1a46c63..3b46aac4dc5 100644
--- a/gcc/cp/coroutines.cc
+++ b/gcc/cp/coroutines.cc
@@ -215,7 +215,17 @@ static GTY(()) tree coro_await_ready_identifier;
  static GTY(()) tree coro_await_suspend_identifier;
  static GTY(()) tree coro_await_resume_identifier;
  
-/* Create the identifiers used by the coroutines library interfaces.  */

+/* Accessors for the coroutine frame state used by the implementation.  */
+
+static GTY(()) tree coro_resume_fn_field;
+static GTY(()) tree coro_destroy_fn_field;
+static GTY(()) tree coro_promise_field;
+static GTY(()) tree coro_frame_needs_free_field;
+static GTY(()) tree coro_resume_index_field;
+static GTY(()) tree coro_self_handle_field;


Since these are identifiers, not FIELD_DECLs, calling them *_field seems 
misleading.



+/* Create the identifiers used by the coroutines library interfaces and
+   the implementation frame state.  */
  
  static void

  coro_init_identifiers ()
@@ -241,6 +251,14 @@ coro_init_identifiers ()
coro_await_ready_identifier = get_identifier ("await_ready");
coro_await_suspend_identifier = get_identifier ("await_suspend");
coro_await_resume_identifier = get_identifier ("await_resume");
+
+  /* Coroutine state frame field accessors.  */
+  coro_resume_fn_field = get_identifier ("_Coro_resume_fn");
+  coro_destroy_fn_field = get_identifier ("_Coro_destroy_fn");
+  coro_promise_field = get_identifier ("_Coro_promise");
+  coro_frame_needs_free_field = get_identifier ("_Coro_frame_needs_free");
+  coro_resume_index_field = get_identifier ("_Coro_resume_index");
+  coro_self_handle_field = get_identifier ("_Coro_self_handle");
  }
  
  /* Trees we only need to set up once.  */

@@ -513,12 +531,12 @@ coro_promise_type_found_p (tree fndecl, location_t loc)
/* Build a proxy for a handle to "self" as the param to
 await_suspend() calls.  */
coro_info->self_h_proxy
-   = build_lang_decl (VAR_DECL, get_identifier ("_Coro_self_handle"),
+   = build_lang_decl (VAR_DECL, coro_self_handle_field,
   coro_info->handle_type);
  
/* Build a proxy for the promise so that we can perform lookups.  */

coro_info->promise_proxy
-   = build_lang_decl (VAR_DECL, get_identifier ("_Coro_promise"),
+   = build_lang_decl (VAR_DECL, coro_promise_field,
   coro_info->promise_type);
  
/* Note where we first saw a coroutine keyword.  */

@@ -2198,8 +2216,7 @@ build_actor_fn (location_t loc, tree coro_frame_type, 
tree actor, tree fnbody,
  = {actor, actor_frame, coro_frame_type, loc, local_var_uses};
cp_walk_tree (, transform_local_var_uses, _vars_data, NULL);
  
-  tree resume_idx_name = get_identifier ("_Coro_resume_index");

-  tree rat_field = lookup_member (coro_frame_type, resume_idx_name, 1, 0,
+  tree rat_field = lookup_member (coro_frame_type, coro_resume_index_field, 1, 
0,
  tf_warning_or_error);
tree rat = build3 (COMPONENT_REF, short_unsigned_type_node, actor_frame,
 rat_field, NULL_TREE);
@@ -2462,7 +2479,7 @@ build_actor_fn (location_t loc, tree coro_frame_type, 
tree actor, tree fnbody,
  
/* We will need to know which resume point number should be encoded.  */

tree res_idx_m
-= lookup_member (coro_frame_type, resume_idx_name,
+= lookup_member (coro_frame_type, coro_resume_index_field,
 /*protect=*/1, /*want_type=*/0, tf_warning_or_error);
tree resume_pt_number
  = build_class_member_access_expr (actor_frame, res_idx_m, NULL_TREE, 
false,
@@ -2504,8 +2521,7 @@ build_destroy_fn (location_t loc, tree coro_frame_type, 
tree destroy,
  
tree destr_frame = build1 (INDIRECT_REF, coro_frame_type, destr_fp);
  
-  tree resume_idx_name = get_identifier ("_Coro_resume_index");

-  tree rat_field = lookup_member (coro_frame_type, resume_idx_name, 1, 0,
+  tree rat_field = lookup_member (coro_frame_type, coro_resume_index_field, 1, 
0,
 

[COMMITTED] Add debug helper for jump thread paths.

2021-09-03 Thread Aldy Hernandez via Gcc-patches
Committed as obvious.

Tested on x86-64 Linux.

gcc/ChangeLog:

* tree-ssa-threadupdate.c (debug): New.
---
 gcc/tree-ssa-threadupdate.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/gcc/tree-ssa-threadupdate.c b/gcc/tree-ssa-threadupdate.c
index d08e7d30d8d..1d32a0230fb 100644
--- a/gcc/tree-ssa-threadupdate.c
+++ b/gcc/tree-ssa-threadupdate.c
@@ -239,6 +239,12 @@ debug (const vec )
   dump_jump_thread_path (stderr, path, true);
 }
 
+DEBUG_FUNCTION void
+debug (const vec *path)
+{
+  debug (*path);
+}
+
 /* Simple hashing function.  For any given incoming edge E, we're going
to be most concerned with the final destination of its jump thread
path.  So hash on the block index of the final edge in the path.  */
-- 
2.31.1



Re: [PATCH 4/8] coroutines: Make some of the artificial names more debugger-friendly.

2021-09-03 Thread Jason Merrill via Gcc-patches

On 9/1/21 6:54 AM, Iain Sandoe wrote:


Some of the compiler-generated entries are of interest to a
user debugging - keep variables in the implementation namespace
but avoid using periods as separators (which is not compatible
with visible symbols for some assemblers).

Partial improvement to debugging (PR 99215).


OK.


Signed-off-by: Iain Sandoe 

gcc/cp/ChangeLog:

* coroutines.cc (coro_promise_type_found_p): Rename variable
to make it suitable for debugging.
(build_actor_fn): Likewise.
(build_destroy_fn): Likewise.
(register_local_var_uses): Likewise.
(coro_rewrite_function_body): Likewise.
(morph_fn_to_coro): Likewise.
---
  gcc/cp/coroutines.cc | 59 ++--
  1 file changed, 30 insertions(+), 29 deletions(-)

diff --git a/gcc/cp/coroutines.cc b/gcc/cp/coroutines.cc
index a12714ea67e..081e1a46c63 100644
--- a/gcc/cp/coroutines.cc
+++ b/gcc/cp/coroutines.cc
@@ -513,12 +513,12 @@ coro_promise_type_found_p (tree fndecl, location_t loc)
/* Build a proxy for a handle to "self" as the param to
 await_suspend() calls.  */
coro_info->self_h_proxy
-   = build_lang_decl (VAR_DECL, get_identifier ("self_h.proxy"),
+   = build_lang_decl (VAR_DECL, get_identifier ("_Coro_self_handle"),
   coro_info->handle_type);
  
/* Build a proxy for the promise so that we can perform lookups.  */

coro_info->promise_proxy
-   = build_lang_decl (VAR_DECL, get_identifier ("promise.proxy"),
+   = build_lang_decl (VAR_DECL, get_identifier ("_Coro_promise"),
   coro_info->promise_type);
  
/* Note where we first saw a coroutine keyword.  */

@@ -2198,7 +2198,7 @@ build_actor_fn (location_t loc, tree coro_frame_type, 
tree actor, tree fnbody,
  = {actor, actor_frame, coro_frame_type, loc, local_var_uses};
cp_walk_tree (, transform_local_var_uses, _vars_data, NULL);
  
-  tree resume_idx_name = get_identifier ("__resume_at");

+  tree resume_idx_name = get_identifier ("_Coro_resume_index");
tree rat_field = lookup_member (coro_frame_type, resume_idx_name, 1, 0,
  tf_warning_or_error);
tree rat = build3 (COMPONENT_REF, short_unsigned_type_node, actor_frame,
@@ -2303,13 +2303,13 @@ build_actor_fn (location_t loc, tree coro_frame_type, 
tree actor, tree fnbody,
add_stmt (r);
  
/* actor's version of the promise.  */

-  tree ap_m = lookup_member (coro_frame_type, get_identifier ("__p"), 1, 0,
+  tree ap_m = lookup_member (coro_frame_type, get_identifier 
("_Coro_promise"), 1, 0,
 tf_warning_or_error);
tree ap = build_class_member_access_expr (actor_frame, ap_m, NULL_TREE, 
false,
tf_warning_or_error);
  
/* actor's coroutine 'self handle'.  */

-  tree ash_m = lookup_member (coro_frame_type, get_identifier ("__self_h"), 1,
+  tree ash_m = lookup_member (coro_frame_type, get_identifier 
("_Coro_self_handle"), 1,
  0, tf_warning_or_error);
tree ash = build_class_member_access_expr (actor_frame, ash_m, NULL_TREE,
 false, tf_warning_or_error);
@@ -2343,12 +2343,12 @@ build_actor_fn (location_t loc, tree coro_frame_type, 
tree actor, tree fnbody,
p_data.to = ap;
cp_walk_tree (, replace_proxy, _data, NULL);
  
-  /* The rewrite of the function adds code to set the __resume field to

+  /* The rewrite of the function adds code to set the resume_fn field to
   nullptr when the coroutine is done and also the index to zero when
   calling an unhandled exception.  These are represented by two proxies
   in the function, so rewrite them to the proper frame access.  */
tree resume_m
-= lookup_member (coro_frame_type, get_identifier ("__resume"),
+= lookup_member (coro_frame_type, get_identifier ("_Coro_resume_fn"),
 /*protect=*/1, /*want_type=*/0, tf_warning_or_error);
tree res_x = build_class_member_access_expr (actor_frame, resume_m, 
NULL_TREE,
   false, tf_warning_or_error);
@@ -2381,7 +2381,7 @@ build_actor_fn (location_t loc, tree coro_frame_type, 
tree actor, tree fnbody,
/* Here deallocate the frame (if we allocated it), which we will have at
   present.  */
tree fnf_m
-= lookup_member (coro_frame_type, get_identifier ("__frame_needs_free"), 1,
+= lookup_member (coro_frame_type, get_identifier 
("_Coro_frame_needs_free"), 1,
 0, tf_warning_or_error);
tree fnf2_x = build_class_member_access_expr (actor_frame, fnf_m, NULL_TREE,
false, tf_warning_or_error);
@@ -2504,7 +2504,7 @@ build_destroy_fn (location_t loc, tree coro_frame_type, 
tree destroy,
  
tree destr_frame = build1 (INDIRECT_REF, coro_frame_type, destr_fp);
  
-  tree 

Re: PATCH 3/8] coroutines: Support for debugging implementation state.

2021-09-03 Thread Jason Merrill via Gcc-patches

On 9/1/21 6:53 AM, Iain Sandoe wrote:


Some of the state that is associated with the implementation
is of interest to a user debugging a coroutine.  In particular
items such as the suspend point, promise object, and current
suspend point.

These variables live in the coroutine frame, but we can inject
proxies for them into the outermost bind expression of the
coroutine.  Such variables are automatically moved into the
coroutine frame (if they need to persist across a suspend
expression).  PLacing the proxies thus allows the user to
inspect them by name in the debugger.

To implement this, we ensure that (at the outermost scope) the
frame entries are not mangled (coroutine frame variables are
usually mangled with scope nesting information so that they do
not clash).  We can safely avoid doing this for the outermost
scope so that we can map frame entries directly to the variables.

This is partial contribution to debug support (PR 99215).


OK.


Signed-off-by: Iain Sandoe 

gcc/cp/ChangeLog:

* coroutines.cc (register_local_var_uses): Do not mangle
frame entries for the outermost scope.  Record the outer
scope as nesting depth 0.
---
  gcc/cp/coroutines.cc | 16 +++-
  1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/gcc/cp/coroutines.cc b/gcc/cp/coroutines.cc
index b8501032969..a12714ea67e 100644
--- a/gcc/cp/coroutines.cc
+++ b/gcc/cp/coroutines.cc
@@ -3885,8 +3885,6 @@ register_local_var_uses (tree *stmt, int *do_subtree, 
void *d)
  
if (TREE_CODE (*stmt) == BIND_EXPR)

  {
-  lvd->bind_indx++;
-  lvd->nest_depth++;
tree lvar;
for (lvar = BIND_EXPR_VARS (*stmt); lvar != NULL;
   lvar = DECL_CHAIN (lvar))
@@ -3925,11 +3923,17 @@ register_local_var_uses (tree *stmt, int *do_subtree, 
void *d)
continue;
  
  	  /* Make names depth+index unique, so that we can support nested

-scopes with identically named locals.  */
+scopes with identically named locals and still be able to
+identify them in the coroutine frame.  */
  tree lvname = DECL_NAME (lvar);
  char *buf;
- if (lvname != NULL_TREE)
-   buf = xasprintf ("__%s.%u.%u", IDENTIFIER_POINTER (lvname),
+ /* The outermost bind scope contains the artificial variables that
+we inject to implement the coro state machine.  We want to be able
+to inspect these in debugging.  */
+ if (lvname != NULL_TREE && lvd->nest_depth == 0)
+   buf = xasprintf ("%s", IDENTIFIER_POINTER (lvname));
+ else if (lvname != NULL_TREE)
+   buf = xasprintf ("%s_%u_%u", IDENTIFIER_POINTER (lvname),
 lvd->nest_depth, lvd->bind_indx);
  else
buf = xasprintf ("_D%u.%u.%u", DECL_UID (lvar), lvd->nest_depth,
@@ -3942,6 +3946,8 @@ register_local_var_uses (tree *stmt, int *do_subtree, 
void *d)
  /* We don't walk any of the local var sub-trees, they won't contain
 any bind exprs.  */
}
+  lvd->bind_indx++;
+  lvd->nest_depth++;
cp_walk_tree (_EXPR_BODY (*stmt), register_local_var_uses, d, 
NULL);
*do_subtree = 0; /* We've done this.  */
lvd->nest_depth--;





[COMMITTED] Add function name when dumping ranger contents.

2021-09-03 Thread Aldy Hernandez via Gcc-patches
These are minor cleanups to the dumping code.

Tested on x86-64 Linux.

gcc/ChangeLog:

* gimple-range-trace.cc (debug_seed_ranger): Remove static.
(dump_ranger): Dump function name.
---
 gcc/gimple-range-trace.cc | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/gcc/gimple-range-trace.cc b/gcc/gimple-range-trace.cc
index 1feb978e928..449ed8373fd 100644
--- a/gcc/gimple-range-trace.cc
+++ b/gcc/gimple-range-trace.cc
@@ -130,7 +130,7 @@ range_tracer::trailer (unsigned counter, const char 
*caller, bool result,
 
 // Query all statements in the IL to precalculate computable ranges in RANGER.
 
-static DEBUG_FUNCTION void
+DEBUG_FUNCTION void
 debug_seed_ranger (gimple_ranger )
 {
   // Recalculate SCEV to make sure the dump lists everything.
@@ -161,6 +161,11 @@ DEBUG_FUNCTION void
 dump_ranger (FILE *out)
 {
   gimple_ranger ranger;
+
+  fprintf (out, ";; Function ");
+  print_generic_expr (out, current_function_decl);
+  fprintf (out, "\n");
+
   debug_seed_ranger (ranger);
   ranger.dump (out);
 }
-- 
2.31.1



[COMMITTED] Use non-null knowledge in path_range_query.

2021-09-03 Thread Aldy Hernandez via Gcc-patches
[Jeff, I'm CCing you to keep you in the loop on changes that will impact
jump threading.  Since we're using one engine, any changes to the either
ranger or the path solver, is likely to improve jump threading.]

This patch improves ranges for pointers we are interested in a path, by
using the non-null class from the ranger.  This allows us to thread more
paths with minimal effort.

Tested on x86-64 Linux.

gcc/ChangeLog:

* gimple-range-path.cc (path_range_query::range_defined_in_block):
Adjust for non-null.
(path_range_query::adjust_for_non_null_uses): New.
(path_range_query::precompute_ranges): Call
adjust_for_non_null_uses.
* gimple-range-path.h: Add m_non_null and
adjust_for_non_null_uses.
---
 gcc/gimple-range-path.cc | 33 +
 gcc/gimple-range-path.h  |  2 ++
 2 files changed, 35 insertions(+)

diff --git a/gcc/gimple-range-path.cc b/gcc/gimple-range-path.cc
index 6d6e5eb6635..db15eb3ff22 100644
--- a/gcc/gimple-range-path.cc
+++ b/gcc/gimple-range-path.cc
@@ -221,6 +221,9 @@ path_range_query::range_defined_in_block (irange , tree 
name, basic_block bb)
   else if (!fold_range (r, def_stmt, this))
 r.set_varying (TREE_TYPE (name));
 
+  if (bb)
+m_non_null.adjust_range (r, name, bb);
+
   if (DEBUG_SOLVER && (bb || !r.varying_p ()))
 {
   fprintf (dump_file, "range_defined_in_block (BB%d) for ", bb ? bb->index 
: -1);
@@ -302,6 +305,35 @@ path_range_query::precompute_ranges_in_block (basic_block 
bb)
 }
 }
 
+// Adjust all pointer imports in BB with non-null information.
+
+void
+path_range_query::adjust_for_non_null_uses (basic_block bb)
+{
+  int_range_max r;
+  bitmap_iterator bi;
+  unsigned i;
+
+  EXECUTE_IF_SET_IN_BITMAP (m_imports, 0, i, bi)
+{
+  tree name = ssa_name (i);
+
+  if (!POINTER_TYPE_P (TREE_TYPE (name)))
+   continue;
+
+  if (get_cache (r, name))
+   {
+ if (r.nonzero_p ())
+   continue;
+   }
+  else
+   r.set_varying (TREE_TYPE (name));
+
+  if (m_non_null.adjust_range (r, name, bb))
+   set_cache (r, name);
+}
+}
+
 // Precompute the ranges for IMPORTS along PATH.
 //
 // IMPORTS are the set of SSA names, any of which could potentially
@@ -332,6 +364,7 @@ path_range_query::precompute_ranges (const vec 
,
   basic_block bb = curr_bb ();
 
   precompute_ranges_in_block (bb);
+  adjust_for_non_null_uses (bb);
 
   if (at_exit ())
break;
diff --git a/gcc/gimple-range-path.h b/gcc/gimple-range-path.h
index 0d2d2e7f75d..51773131040 100644
--- a/gcc/gimple-range-path.h
+++ b/gcc/gimple-range-path.h
@@ -53,6 +53,7 @@ private:
   // Methods to precompute ranges for the given path.
   bool range_defined_in_block (irange &, tree name, basic_block bb);
   void precompute_ranges_in_block (basic_block bb);
+  void adjust_for_non_null_uses (basic_block bb);
   void ssa_range_in_phi (irange , gphi *phi);
 
   // Path navigation.
@@ -80,6 +81,7 @@ private:
 
   const bitmap_head *m_imports;
   gimple_ranger _ranger;
+  non_null_ref m_non_null;
 };
 
 #endif // GCC_TREE_SSA_THREADSOLVER_H
-- 
2.31.1



[committed] libgomp.*/error-1.{c,f90}: Fix dg-output newline pattern

2021-09-03 Thread Tobias Burnus

Fixes the testcase for \r\n line breaks →
r12-3327-g4ce90454c2c81246be993d997cab12e21bc0be68

Tobias

-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955
commit 4ce90454c2c81246be993d997cab12e21bc0be68
Author: Tobias Burnus 
Date:   Fri Sep 3 15:24:41 2021 +0200

libgomp.*/error-1.{c,f90}: Fix dg-output newline pattern

libgomp/ChangeLog:

* testsuite/libgomp.c-c++-common/error-1.c: Use \r\n not \n\r in
dg-output.
* testsuite/libgomp.fortran/error-1.f90: Likewise.

diff --git a/libgomp/testsuite/libgomp.c-c++-common/error-1.c b/libgomp/testsuite/libgomp.c-c++-common/error-1.c
index 04c0519bf63..e7f550ae701 100644
--- a/libgomp/testsuite/libgomp.c-c++-common/error-1.c
+++ b/libgomp/testsuite/libgomp.c-c++-common/error-1.c
@@ -53,7 +53,7 @@ main ()
   return 0;
 }
 
-/* { dg-output "libgomp: error directive encountered(\n|\r|\n\r)(\n|\r|\n\r)" } */
-/* { dg-output "libgomp: error directive encountered: message(\n|\r|\n\r)(\n|\r|\n\r)" } */
-/* { dg-output "libgomp: error directive encountered: hello from a distance(\n|\r|\n\r)(\n|\r|\n\r)" } */
+/* { dg-output "libgomp: error directive encountered(\n|\r|\r\n)(\n|\r|\r\n)" } */
+/* { dg-output "libgomp: error directive encountered: message(\n|\r|\r\n)(\n|\r|\r\n)" } */
+/* { dg-output "libgomp: error directive encountered: hello from a distance(\n|\r|\r\n)(\n|\r|\r\n)" } */
 /* { dg-output "libgomp: fatal error: error directive encountered: my message" } */
diff --git a/libgomp/testsuite/libgomp.fortran/error-1.f90 b/libgomp/testsuite/libgomp.fortran/error-1.f90
index 7c497fd002e..ee3222d8894 100644
--- a/libgomp/testsuite/libgomp.fortran/error-1.f90
+++ b/libgomp/testsuite/libgomp.fortran/error-1.f90
@@ -73,15 +73,15 @@ contains
end subroutine
 end
 
-! { dg-output "(\n|\r|\n\r)" }
-! { dg-output "libgomp: error directive encountered: Polonius(\n|\r|\n\r)(\n|\r|\n\r)" }
-! { dg-output "libgomp: error directive encountered: Laertes(\n|\r|\n\r)(\n|\r|\n\r)" }
-! { dg-output "libgomp: error directive encountered: Paris(\n|\r|\n\r)(\n|\r|\n\r)" }
-! { dg-output "libgomp: error directive encountered: To thine own self be true(\n|\r|\n\r)(\n|\r|\n\r)" }
-! { dg-output "libgomp: error directive encountered(\n|\r|\n\r)(\n|\r|\n\r)" }
-! { dg-output "libgomp: error directive encountered: message(\n|\r|\n\r)(\n|\r|\n\r)" }
-! { dg-output "libgomp: error directive encountered: Farewell(\n|\r|\n\r)(\n|\r|\n\r)" }
-! { dg-output "libgomp: error directive encountered: from a distance(\n|\r|\n\r)(\n|\r|\n\r)" }
-! { dg-output "libgomp: error directive encountered: Hello World(\n|\r|\n\r)(\n|\r|\n\r)" }
-! { dg-output "libgomp: error directive encountered: mes(\n|\r|\n\r)(\n|\r|\n\r)" }
-! { dg-output "libgomp: fatal error: error directive encountered: my message   (\n|\r|\n\r)" }
+! { dg-output "(\n|\r|\r\n)" }
+! { dg-output "libgomp: error directive encountered: Polonius(\n|\r|\r\n)(\n|\r|\r\n)" }
+! { dg-output "libgomp: error directive encountered: Laertes(\n|\r|\r\n)(\n|\r|\r\n)" }
+! { dg-output "libgomp: error directive encountered: Paris(\n|\r|\r\n)(\n|\r|\r\n)" }
+! { dg-output "libgomp: error directive encountered: To thine own self be true(\n|\r|\r\n)(\n|\r|\r\n)" }
+! { dg-output "libgomp: error directive encountered(\n|\r|\r\n)(\n|\r|\r\n)" }
+! { dg-output "libgomp: error directive encountered: message(\n|\r|\r\n)(\n|\r|\r\n)" }
+! { dg-output "libgomp: error directive encountered: Farewell(\n|\r|\r\n)(\n|\r|\r\n)" }
+! { dg-output "libgomp: error directive encountered: from a distance(\n|\r|\r\n)(\n|\r|\r\n)" }
+! { dg-output "libgomp: error directive encountered: Hello World(\n|\r|\r\n)(\n|\r|\r\n)" }
+! { dg-output "libgomp: error directive encountered: mes(\n|\r|\r\n)(\n|\r|\r\n)" }
+! { dg-output "libgomp: fatal error: error directive encountered: my message   (\n|\r|\r\n)" }


[COMMITTED] Improve path_range_query dumps.

2021-09-03 Thread Aldy Hernandez via Gcc-patches
Tested on x86-64 Linux.

gcc/ChangeLog:

* gimple-range-path.cc (path_range_query::dump): Dump path
length.
(path_range_query::precompute_ranges): Dump entire path.
---
 gcc/gimple-range-path.cc | 14 --
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/gcc/gimple-range-path.cc b/gcc/gimple-range-path.cc
index 77b823e7bd5..6d6e5eb6635 100644
--- a/gcc/gimple-range-path.cc
+++ b/gcc/gimple-range-path.cc
@@ -92,7 +92,7 @@ path_range_query::dump (FILE *dump_file)
   bitmap_iterator bi;
   extern void dump_ranger (FILE *, const vec &);
 
-  fprintf (dump_file, "Path is:\n");
+  fprintf (dump_file, "Path is (length=%d):\n", m_path->length ());
   dump_ranger (dump_file, *m_path);
 
   fprintf (dump_file, "Imports:\n");
@@ -315,7 +315,17 @@ path_range_query::precompute_ranges (const 
vec ,
   m_imports = imports;
 
   if (DEBUG_SOLVER)
-fprintf (dump_file, "path_range_query: precompute_ranges\n");
+{
+  fprintf (dump_file, "\npath_range_query: precompute_ranges for path: ");
+  for (unsigned i = path.length (); i > 0; --i)
+   {
+ basic_block bb = path[i - 1];
+ fprintf (dump_file, "BB %d", bb->index);
+ if (i > 1)
+   fprintf (dump_file, ", ");
+   }
+  fprintf (dump_file, "\n");
+}
 
   while (1)
 {
-- 
2.31.1



[COMMITTED] Implement relation_oracle::debug.

2021-09-03 Thread Aldy Hernandez via Gcc-patches
Tested on x86-64 Linux.

gcc/ChangeLog:

* value-relation.cc (relation_oracle::debug): New.
* value-relation.h (relation_oracle::debug): New.
---
 gcc/value-relation.cc | 6 ++
 gcc/value-relation.h  | 1 +
 2 files changed, 7 insertions(+)

diff --git a/gcc/value-relation.cc b/gcc/value-relation.cc
index 8edd98b612a..ba01d298521 100644
--- a/gcc/value-relation.cc
+++ b/gcc/value-relation.cc
@@ -1164,3 +1164,9 @@ relation_oracle::dump (FILE *f) const
dump (f, BASIC_BLOCK_FOR_FN (cfun, i));
   }
 }
+
+void
+relation_oracle::debug () const
+{
+  dump (stderr);
+}
diff --git a/gcc/value-relation.h b/gcc/value-relation.h
index e0e2f82c9ae..27158e051bf 100644
--- a/gcc/value-relation.h
+++ b/gcc/value-relation.h
@@ -142,6 +142,7 @@ public:
 
   void dump (FILE *f, basic_block bb) const;
   void dump (FILE *f) const;
+  void debug () const;
 private:
   bitmap m_tmp, m_tmp2;
   bitmap m_relation_set;  // Index by ssa-name. True if a relation exists
-- 
2.31.1



[COMMITTED] Remove unnecessary include from tree-ssa-loop-ch.c

2021-09-03 Thread Aldy Hernandez via Gcc-patches
Tested on x86-64 Linux.

Committed as obvious.

gcc/ChangeLog:

* tree-ssa-loop-ch.c: Remove unnecessary include file.
---
 gcc/tree-ssa-loop-ch.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/gcc/tree-ssa-loop-ch.c b/gcc/tree-ssa-loop-ch.c
index b4e09f97b28..ffb0aa85118 100644
--- a/gcc/tree-ssa-loop-ch.c
+++ b/gcc/tree-ssa-loop-ch.c
@@ -31,7 +31,6 @@ along with GCC; see the file COPYING3.  If not see
 #include "tree-into-ssa.h"
 #include "cfgloop.h"
 #include "tree-inline.h"
-#include "tree-ssa-scopedtables.h"
 #include "tree-ssa-threadedge.h"
 #include "tree-ssa-sccvn.h"
 #include "tree-phinodes.h"
-- 
2.31.1



[COMMITTED] Skip statements with no BB in ranger.

2021-09-03 Thread Aldy Hernandez via Gcc-patches
The function postfold_gcond_edges() registers relations coming out of a
GIMPLE_COND.  With upcoming changes, we may be called with statements
not in the IL (for example, dummy statements created by the
forward threader).  This patch avoids breakage by exiting if the
statement does not have a defining basic block.  There is a similar
change to the path solver.

Tested on x86-64 Linux.

gcc/ChangeLog:

* gimple-range-fold.cc (fold_using_range::postfold_gcond_edges):
Skip statements with no defining BB.
* gimple-range-path.cc (path_range_query::range_defined_in_block):
Do not get confused by statements with no defining BB.
---
 gcc/gimple-range-fold.cc | 4 
 gcc/gimple-range-path.cc | 9 +++--
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/gcc/gimple-range-fold.cc b/gcc/gimple-range-fold.cc
index 8be6d473f82..7cf8830fc5d 100644
--- a/gcc/gimple-range-fold.cc
+++ b/gcc/gimple-range-fold.cc
@@ -1360,6 +1360,10 @@ fold_using_range::postfold_gcond_edges (gcond *s, 
irange& lhs_range,
   range_operator *handler;
   basic_block bb = gimple_bb (s);
 
+  // We may get asked to fold an artificial statement not in the CFG.
+  if (!bb)
+return;
+
   edge e0 = EDGE_SUCC (bb, 0);
   if (!single_pred_p (e0->dest))
 e0 = NULL;
diff --git a/gcc/gimple-range-path.cc b/gcc/gimple-range-path.cc
index a8226a6810f..77b823e7bd5 100644
--- a/gcc/gimple-range-path.cc
+++ b/gcc/gimple-range-path.cc
@@ -221,14 +221,19 @@ path_range_query::range_defined_in_block (irange , tree 
name, basic_block bb)
   else if (!fold_range (r, def_stmt, this))
 r.set_varying (TREE_TYPE (name));
 
-  if (DEBUG_SOLVER)
+  if (DEBUG_SOLVER && (bb || !r.varying_p ()))
 {
-  fprintf (dump_file, "range_defined_in_block (BB%d) for ", bb->index);
+  fprintf (dump_file, "range_defined_in_block (BB%d) for ", bb ? bb->index 
: -1);
   print_generic_expr (dump_file, name, TDF_SLIM);
   fprintf (dump_file, " is ");
   r.dump (dump_file);
   fprintf (dump_file, "\n");
 }
+
+  // We may have an artificial statement not in the IL.
+  if (!bb && r.varying_p ())
+return false;
+
   return true;
 }
 
-- 
2.31.1



[COMMITTED] Improve support for IMAGPART_EXPR and REALPART_EXPR in ranger.

2021-09-03 Thread Aldy Hernandez via Gcc-patches
Currently we adjust statements containing an IMAGPART_EXPR if the
defining statement was one of a few built-ins known to return boolean
types.  We can also adjust statements for both IMAGPART_EXPR and
REALPART_EXPR where the defining statement is a constant.

This patch adds such support, and cleans up the code a bit.

Tested on x86-64 Linux.

gcc/ChangeLog:

* gimple-range-fold.cc (adjust_imagpart_expr): Move from
gimple_range_adjustment.  Add support for constants.
(adjust_realpart_expr): New.
(gimple_range_adjustment): Move IMAGPART_EXPR code to
adjust_imagpart_expr.
* range-op.cc (integral_table::integral_table): Add entry for
REALPART_CST.
---
 gcc/gimple-range-fold.cc | 110 +++
 gcc/range-op.cc  |   1 +
 2 files changed, 77 insertions(+), 34 deletions(-)

diff --git a/gcc/gimple-range-fold.cc b/gcc/gimple-range-fold.cc
index 94dd042721e..8be6d473f82 100644
--- a/gcc/gimple-range-fold.cc
+++ b/gcc/gimple-range-fold.cc
@@ -369,14 +369,78 @@ adjust_pointer_diff_expr (irange , const gimple 
*diff_stmt)
 }
 }
 
+// Adjust the range for an IMAGPART_EXPR.
+
+static void
+adjust_imagpart_expr (irange , const gimple *stmt)
+{
+  tree name = TREE_OPERAND (gimple_assign_rhs1 (stmt), 0);
+
+  if (TREE_CODE (name) != SSA_NAME || !SSA_NAME_DEF_STMT (name))
+return;
+
+  gimple *def_stmt = SSA_NAME_DEF_STMT (name);
+  if (is_gimple_call (def_stmt) && gimple_call_internal_p (def_stmt))
+{
+  switch (gimple_call_internal_fn (def_stmt))
+   {
+   case IFN_ADD_OVERFLOW:
+   case IFN_SUB_OVERFLOW:
+   case IFN_MUL_OVERFLOW:
+   case IFN_ATOMIC_COMPARE_EXCHANGE:
+ {
+   int_range<2> r;
+   r.set_varying (boolean_type_node);
+   tree type = TREE_TYPE (gimple_assign_lhs (stmt));
+   range_cast (r, type);
+   res.intersect (r);
+ }
+   default:
+ break;
+   }
+  return;
+}
+  if (is_gimple_assign (def_stmt))
+{
+  tree cst = gimple_assign_rhs1 (def_stmt);
+  if (TREE_CODE (cst) == COMPLEX_CST)
+   {
+ tree imag = TREE_IMAGPART (cst);
+ int_range<2> tmp (imag, imag);
+ res.intersect (tmp);
+   }
+}
+}
+
+// Adjust the range for a REALPART_EXPR.
+
+static void
+adjust_realpart_expr (irange , const gimple *stmt)
+{
+  tree name = TREE_OPERAND (gimple_assign_rhs1 (stmt), 0);
+
+  if (TREE_CODE (name) != SSA_NAME)
+return;
+
+  gimple *def_stmt = SSA_NAME_DEF_STMT (name);
+  if (!SSA_NAME_DEF_STMT (name))
+return;
+
+  if (is_gimple_assign (def_stmt))
+{
+  tree cst = gimple_assign_rhs1 (def_stmt);
+  if (TREE_CODE (cst) == COMPLEX_CST)
+   {
+ tree imag = TREE_REALPART (cst);
+ int_range<2> tmp (imag, imag);
+ res.intersect (tmp);
+   }
+}
+}
+
 // This function looks for situations when walking the use/def chains
 // may provide additonal contextual range information not exposed on
-// this statement.  Like knowing the IMAGPART return value from a
-// builtin function is a boolean result.
-
-// We should rework how we're called, as we have an op_unknown entry
-// for IMAGPART_EXPR and POINTER_DIFF_EXPR in range-ops just so this
-// function gets called.
+// this statement.
 
 static void
 gimple_range_adjustment (irange , const gimple *stmt)
@@ -388,34 +452,12 @@ gimple_range_adjustment (irange , const gimple *stmt)
   return;
 
 case IMAGPART_EXPR:
-  {
-   tree name = TREE_OPERAND (gimple_assign_rhs1 (stmt), 0);
-   if (TREE_CODE (name) == SSA_NAME)
- {
-   gimple *def_stmt = SSA_NAME_DEF_STMT (name);
-   if (def_stmt && is_gimple_call (def_stmt)
-   && gimple_call_internal_p (def_stmt))
- {
-   switch (gimple_call_internal_fn (def_stmt))
- {
- case IFN_ADD_OVERFLOW:
- case IFN_SUB_OVERFLOW:
- case IFN_MUL_OVERFLOW:
- case IFN_ATOMIC_COMPARE_EXCHANGE:
-   {
- int_range<2> r;
- r.set_varying (boolean_type_node);
- tree type = TREE_TYPE (gimple_assign_lhs (stmt));
- range_cast (r, type);
- res.intersect (r);
-   }
- default:
-   break;
- }
- }
- }
-   break;
-  }
+  adjust_imagpart_expr (res, stmt);
+  return;
+
+case REALPART_EXPR:
+  adjust_realpart_expr (res, stmt);
+  return;
 
 default:
   break;
diff --git a/gcc/range-op.cc b/gcc/range-op.cc
index 56eccf471a2..fee0e834c23 100644
--- a/gcc/range-op.cc
+++ b/gcc/range-op.cc
@@ -4017,6 +4017,7 @@ integral_table::integral_table ()
   set (PAREN_EXPR, op_identity);
   set (OBJ_TYPE_REF, op_identity);
   set (IMAGPART_EXPR, op_unknown);
+  set (REALPART_EXPR, op_unknown);
  

Re: [PATCH 2/6] [i386] Enable _Float16 type for TARGET_SSE2 and above.

2021-09-03 Thread Jakub Jelinek via Gcc-patches
On Mon, Aug 02, 2021 at 02:31:12PM +0800, liuhongt via Gcc-patches wrote:
>   * doc/extend.texi (Half-Precision Floating Point): Documemt
>   _Float16 for x86.

> --- a/gcc/doc/extend.texi
> +++ b/gcc/doc/extend.texi
> @@ -1102,6 +1102,7 @@ typedef _Complex float __attribute__((mode(IC))) 
> _Complex_ibm128;
>  @section Half-Precision Floating Point
>  @cindex half-precision floating point
>  @cindex @code{__fp16} data type
> +@cindex @code{__Float16} data type
>  
>  On ARM and AArch64 targets, GCC supports half-precision (16-bit) floating
>  point via the @code{__fp16} type defined in the ARM C Language Extensions.
> @@ -1150,6 +1151,18 @@ calls.
>  It is recommended that portable code use the @code{_Float16} type defined
>  by ISO/IEC TS 18661-3:2015.  @xref{Floating Types}.
>  
> +On x86 targets with @code{target("sse2")} and above, GCC supports 
> half-precision
> +(16-bit) floating point via the @code{_Float16} type which is defined by
> +18661-3:2015. For C++, x86 provide a builtin type named @code{_Float16}
> +which contains same data format as C.
> +
> +Without @option{-mavx512fp16}, @code{_Float16} type is storage only, all
> +operations will be emulated by software emulation and the @code{float}
> +instructions. The default behavior for @code{FLT_EVAL_METHOD} is to keep
> +the intermediate result of the operation as 32-bit precision. This may lead
> +to inconsistent behavior between software emulation and AVX512-FP16
> +instructions.
> +
>  @node Decimal Float
>  @section Decimal Floating Types
>  @cindex decimal floating types

Shouldn't there be more changes for this in doc/extend.texi?

I'd say that x86 with -msse2 should be mentioned in
The @code{_Float16} type is supported on AArch64
systems by default, and on ARM systems when the IEEE format for 16-bit
floating-point types is selected with @option{-mfp16-format=ieee}.

and in
@node Half-Precision
I'd say that one sentence about the x86 support should go already in the
first paragraph, perhaps after:
On ARM and AArch64 targets, GCC supports half-precision (16-bit) floating
point via the @code{__fp16} type defined in the ARM C Language Extensions.
On ARM systems, you must enable this type explicitly with the
@option{-mfp16-format} command-line option in order to use it.
because users just won't scroll down to immediately find out that in the
10th/11th paragraph it talks about x86.
Just mention there that on all 3 arches it is available using the _Float16 type
in C, on x86 in C++ too and then on ARM/AArch64 using __fp16.

Jakub



Re: [PATCH] Explicitly add -msse2 to compile HF related libgcc source file.

2021-09-03 Thread Iain Sandoe via Gcc-patches



> On 3 Sep 2021, at 13:33, Jakub Jelinek via Gcc-patches 
>  wrote:
> 
> On Fri, Sep 03, 2021 at 01:32:14PM +0100, Iain Sandoe via Gcc-patches wrote:
>> diff --git a/libgcc/libgcc-std.ver.in b/libgcc/libgcc-std.ver.in
>> index cea33267e53..f5c1bc1aa6c 100644
>> --- a/libgcc/libgcc-std.ver.in
>> +++ b/libgcc/libgcc-std.ver.in
>> @@ -1944,3 +1944,21 @@ GCC_7.0.0 {
>>   __PFX__divmoddi4
>>   __PFX__divmodti4
>> }
>> +
>> +%inherit GCC_12.0.0 GCC_7.0.0
>> + GCC_12.0.0 {
>> +  __PFX__eqhf2
>> +  __PFX__extendhfdf2
>> +  __PFX__extendhfsf2
>> +  __PFX__extendhftf2
>> +  __PFX__extendhfxf2
>> +  __PFX__fixhfti
>> +  __PFX__fixunshfti
>> +  __PFX__floattihf
>> +  __PFX__floatuntihf
>> +  __PFX__nehf2
>> +  __PFX__truncdfhf2
>> +  __PFX__truncsfhf2
>> +  __PFX__trunctfhf2
>> +  __PFX__truncxfhf2
>> +}
> 
> No *hc* entrypoints?

oops EWRONGPATCH

diff --git a/libgcc/libgcc-std.ver.in b/libgcc/libgcc-std.ver.in
index cea33267e53..902a9364850 100644
--- a/libgcc/libgcc-std.ver.in
+++ b/libgcc/libgcc-std.ver.in
@@ -1944,3 +1944,23 @@ GCC_7.0.0 {
   __PFX__divmoddi4
   __PFX__divmodti4
 }
+
+%inherit GCC_12.0.0 GCC_7.0.0
+ GCC_12.0.0 {
+  __PFX__divhc3
+  __PFX__mulhc3
+  __PFX__eqhf2
+  __PFX__extendhfdf2
+  __PFX__extendhfsf2
+  __PFX__extendhftf2
+  __PFX__extendhfxf2
+  __PFX__fixhfti
+  __PFX__fixunshfti
+  __PFX__floattihf
+  __PFX__floatuntihf
+  __PFX__nehf2
+  __PFX__truncdfhf2
+  __PFX__truncsfhf2
+  __PFX__trunctfhf2
+  __PFX__truncxfhf2
+}




Re: [PATCH] Explicitly add -msse2 to compile HF related libgcc source file.

2021-09-03 Thread Jakub Jelinek via Gcc-patches
On Fri, Sep 03, 2021 at 01:32:14PM +0100, Iain Sandoe via Gcc-patches wrote:
> diff --git a/libgcc/libgcc-std.ver.in b/libgcc/libgcc-std.ver.in
> index cea33267e53..f5c1bc1aa6c 100644
> --- a/libgcc/libgcc-std.ver.in
> +++ b/libgcc/libgcc-std.ver.in
> @@ -1944,3 +1944,21 @@ GCC_7.0.0 {
>__PFX__divmoddi4
>__PFX__divmodti4
>  }
> +
> +%inherit GCC_12.0.0 GCC_7.0.0
> + GCC_12.0.0 {
> +  __PFX__eqhf2
> +  __PFX__extendhfdf2
> +  __PFX__extendhfsf2
> +  __PFX__extendhftf2
> +  __PFX__extendhfxf2
> +  __PFX__fixhfti
> +  __PFX__fixunshfti
> +  __PFX__floattihf
> +  __PFX__floatuntihf
> +  __PFX__nehf2
> +  __PFX__truncdfhf2
> +  __PFX__truncsfhf2
> +  __PFX__trunctfhf2
> +  __PFX__truncxfhf2
> +}

No *hc* entrypoints?

Jakub



Re: [PATCH] Explicitly add -msse2 to compile HF related libgcc source file.

2021-09-03 Thread Iain Sandoe via Gcc-patches



> On 3 Sep 2021, at 10:00, Jakub Jelinek via Gcc-patches 
>  wrote:
> 
> On Fri, Sep 03, 2021 at 03:41:13PM +0800, liuhongt via Gcc-patches wrote:
>> --- a/libgcc/config/i386/64/t-softfp
>> +++ b/libgcc/config/i386/64/t-softfp
>> @@ -1 +1,6 @@
>> softfp_extras := fixhfti fixunshfti floattihf floatuntihf
>> +
>> +CFLAGS-fixhfti.c += -msse2
>> +CFLAGS-fixunshfti.c += -msse2
>> +CFLAGS-floattihf.c += -msse2
>> +CFLAGS-floatunstihf.c += -msse2
>> \ No newline at end of file
> 
> Please avoid this.
> 
>> --- a/libgcc/config/i386/t-softfp
>> +++ b/libgcc/config/i386/t-softfp
>> @@ -1,6 +1,38 @@
>> LIB2ADD += $(srcdir)/config/i386/sfp-exceptions.c
>> 
>> +# Replace _divhc3 and _mulhc3.
>> +libgcc2-hf-functions = _divhc3 _mulhc3
>> +LIB2FUNCS_EXCLUDE += $(libgcc2-hf-functions)
>> +libgcc2-hf-extras = $(addsuffix .c, $(libgcc2-hf-functions))
>> +LIB2ADD_ST += $(addprefix $(srcdir)/config/i386/, $(libgcc2-hf-extras))
>> +
>> softfp_extensions := hfsf hfdf hftf hfxf sfdf sftf dftf xftf
>> softfp_truncations := tfhf xfhf dfhf sfhf tfsf dfsf tfdf tfxf
>> 
>> -softfp_extras += eqhf2
>> \ No newline at end of file
>> +softfp_extras += eqhf2
>> +
>> +CFLAGS-extendhfsf2.c += -msse2
>> +CFLAGS-extendhfdf2.c += -msse2
>> +CFLAGS-extendhftf2.c += -msse2
>> +CFLAGS-extendhfxf2.c += -msse2
>> +
>> +CFLAGS-extendsfdf2.c += -msse2
>> +CFLAGS-extendsftf2.c += -msse2
>> +
>> +CFLAGS-extenddftf2.c += -msse2
>> +CFLAGS-extendxftf2.c += -msse2
> 
> Aren't the [sdxt]f<->[sdxt]f extend/trunc conversions in libgcc for
> quite a while?  If so, using -msse2 for those seems wrong, it is fine
> if we require -msse2 support for anything that uses HF or HC mode
> types, but if just trying to convert other types it should work even on
> just i386 or i486.
> 
>> +
>> +CFLAGS-truncsfhf2.c += -msse2
>> +CFLAGS-truncdfhf2.c += -msse2
>> +CFLAGS-truncxfhf2.c += -msse2
>> +CFLAGS-trunctfhf2.c += -msse2
>> +
>> +CFLAGS-truncdfsf2.c += -msse2
>> +CFLAGS-trunctfsf2.c += -msse2
>> +
>> +CFLAGS-trunctfdf2.c += -msse2
>> +CFLAGS-trunctfxf2.c += -msse2
>> +
>> +CFLAGS-eqhf2.c += -msse2
>> +CFLAGS-_divhc3.c += -msse2
>> +CFLAGS-_mulhc3.c += -msse2
>> \ No newline at end of file
> 
> See above.
> 
> Also, shouldn't the *hf* and *hc* APIs be exported from libgcc_s.so.1?
> 
>   254: 00011960   968 FUNCLOCAL  DEFAULT   13 __floattihf
>   256: 000116f0   315 FUNCLOCAL  DEFAULT   13 __fixhfti
>   263: f950  1358 FUNCLOCAL  DEFAULT   13 __truncsfhf2
>   264: db10   272 FUNCLOCAL  DEFAULT   13 __extendhfsf2
>   265: df70   399 FUNCLOCAL  DEFAULT   13 __extendhfxf2
>   267: 00011fe0   469 FUNCLOCAL  DEFAULT   13 __eqhf2
>   268: 00011d30   681 FUNCLOCAL  DEFAULT   13 __floatuntihf
>   271: dc20   374 FUNCLOCAL  DEFAULT   13 __extendhfdf2
>   274: 00011830   292 FUNCLOCAL  DEFAULT   13 __fixunshfti
>   281: dda0   460 FUNCLOCAL  DEFAULT   13 __extendhftf2
>   283: e860  1439 FUNCLOCAL  DEFAULT   13 __trunctfhf2
>   285: 00011fe0   469 FUNCLOCAL  DEFAULT   13 __nehf2
>   286: 6290  1627 FUNCLOCAL  DEFAULT   13 __divhc3
>   290: ee00  1499 FUNCLOCAL  DEFAULT   13 __truncxfhf2
>   292: f3e0  1392 FUNCLOCAL  DEFAULT   13 __truncdfhf2
>   296: 5150  1931 FUNCLOCAL  DEFAULT   13 __mulhc3
> 
> So, don't we want GCC_12.0 with those symbols
> in config/i386/libgcc-glibc.ver and perhaps others?

this works for me on Darwin - I didn’t try it on Linux so far though (and didn’t
look at solaris etc. which have thier own sym maps).

---

diff --git a/libgcc/libgcc-std.ver.in b/libgcc/libgcc-std.ver.in
index cea33267e53..f5c1bc1aa6c 100644
--- a/libgcc/libgcc-std.ver.in
+++ b/libgcc/libgcc-std.ver.in
@@ -1944,3 +1944,21 @@ GCC_7.0.0 {
   __PFX__divmoddi4
   __PFX__divmodti4
 }
+
+%inherit GCC_12.0.0 GCC_7.0.0
+ GCC_12.0.0 {
+  __PFX__eqhf2
+  __PFX__extendhfdf2
+  __PFX__extendhfsf2
+  __PFX__extendhftf2
+  __PFX__extendhfxf2
+  __PFX__fixhfti
+  __PFX__fixunshfti
+  __PFX__floattihf
+  __PFX__floatuntihf
+  __PFX__nehf2
+  __PFX__truncdfhf2
+  __PFX__truncsfhf2
+  __PFX__trunctfhf2
+  __PFX__truncxfhf2
+}



Re: Host and offload targets have no common meaning of address spaces (was: [ping] Re-unify 'omp_build_component_ref' and 'oacc_build_component_ref')

2021-09-03 Thread Andrew Stubbs

On 24/08/2021 12:43, Richard Biener via Gcc-patches wrote:

On Tue, Aug 24, 2021 at 12:23 PM Thomas Schwinge
 wrote:


Hi!

On 2021-08-19T22:13:56+0200, I wrote:

On 2021-08-16T10:21:04+0200, Jakub Jelinek  wrote:

On Mon, Aug 16, 2021 at 10:08:42AM +0200, Thomas Schwinge wrote:

|> Concerning the current 'gcc/omp-low.c:omp_build_component_ref', for the
|> current set of offloading testcases, we never see a
|> '!ADDR_SPACE_GENERIC_P' there, so the address space handling doesn't seem
|> to be necessary there (but also won't do any harm: no-op).


Are you sure this can't trigger?
Say
extern int __seg_fs a;

void
foo (void)
{
   #pragma omp parallel private (a)
   a = 2;
}


That test case doesn't run into 'omp_build_component_ref' at all,
but [I've pushed an altered and extended variant that does],
"Add 'libgomp.c/address-space-1.c'".

In this case, 'omp_build_component_ref' called via host compilation
'pass_lower_omp', it's the 'field_type' that has 'address-space-1', not
'obj_type', so indeed Kwok's new code is a no-op:

 (gdb) call debug_tree(field_type)
  


I think keeping the qual addr space here is the wrong thing to do,
it should keep the other quals and clear the address space instead,
the whole struct is going to be in generic addres space, isn't it?


Correct for 'omp_build_component_ref' called via host compilation
'pass_lower_omp'



However, regarding the former comment -- shouldn't we force generic
address space for all 'tree' types read in via LTO streaming for
offloading compilation?  I assume that (in the general case) address
spaces are never compatible between host and offloading compilation?
For [...] "Add 'libgomp.c/address-space-1.c'", propagating the
'__seg_fs' address space across the offloading boundary (assuming I did
interpret the dumps correctly) doesn't seem to cause any problems


As I found later, actually the 'address-space-1' per host '__seg_fs' does
cause the "Intel MIC (emulated) offloading execution failure"
mentioned/XFAILed for 'libgomp.c/address-space-1.c': SIGSEGV, like
(expected) for host execution.  For GCN offloading target, it maps to
GCN 'ADDR_SPACE_FLAT' which apparently doesn't cause any ill effects (for
that simple test case).  The nvptx offloading target doesn't consider
address spaces at all.

Is the attached "Host and offload targets have no common meaning of
address spaces" OK to push?


Then, is that the way to do this, or should we add in
'gcc/tree-streamer-out.c:pack_ts_base_value_fields':

 if (lto_stream_offload_p)
   gcc_assert (ADDR_SPACE_GENERIC_P (TYPE_ADDR_SPACE (expr)));

..., and elsewhere sanitize this for offloading compilation?  Jakub's
suggestion above, regarding 'gcc/omp-low.c:omp_build_component_ref':

| I think keeping the qual addr space here is the wrong thing to do,
| it should keep the other quals and clear the address space instead

But it's not obvious to me that indeed this is the one place where this
would need to be done?  (It ought to work for
'libgomp.c/address-space-1.c', and any other occurrences would run into
the 'assert', so that ought to be "fine", though?)


And, should we have a new hook
'void targetm.addr_space.validate (addr_space_t as)' (better name?),
called via 'gcc/emit-rtl.c:set_mem_attrs' (only? -- assuming this is the
appropriate canonic function where address space use is observed?), to
make sure that the requested 'as' is valid for the target?
'default_addr_space_validate' would refuse everything but
'ADDR_SPACE_GENERIC_P (as)'; this hook would need implementing for all
handful of targets making use of address spaces (supposedly matching the
logic how they call 'c_register_addr_space'?).  (The closest existing
hook seems to be 'targetm.addr_space.diagnose_usage', only defined for
AVR, and called from "the front ends" (C only).)


Are address-spaces to be used in any way for OpenMP offload code?  That is,
does the OpenMP standard talk about them and how to remap things?  I'd
say I agree that any host address-space should go away when the corresponding
data is offloaded and in case OpenMP allows to specify a target address-space
that would need to be instantiated in a way so the LTO streaming knows about
a mapping from the host to the target representation.


The new OpenMP 5 allocator features will permit allocations to different 
memories (we're planning an implementation soon). Whether that means a 
different address space may be target specific, but I would certainly 
expect that it could be. For AMD GCN there is a "flat" address space 
that covers most memories, but if you know what memory an address refers 
to then there's often a more efficient instruction you can use.


Certainly the numeric address space codes for the host system 
architecture have no meaning on the accelerator architecture.


Andrew


RE: [Patch][GCC][middle-end] - Generate FRINTZ for (double)(int) under -ffast-math on aarch64

2021-09-03 Thread Jirui Wu via Gcc-patches
Ping

-Original Message-
From: Jirui Wu 
Sent: Friday, August 20, 2021 4:28 PM
To: Richard Biener 
Cc: Richard Biener ; Andrew Pinski 
; Richard Sandiford ; 
i...@airs.com; gcc-patches@gcc.gnu.org; Joseph S. Myers 

Subject: RE: [Patch][GCC][middle-end] - Generate FRINTZ for (double)(int) under 
-ffast-math on aarch64

> -Original Message-
> From: Richard Biener 
> Sent: Friday, August 20, 2021 8:15 AM
> To: Jirui Wu 
> Cc: Richard Biener ; Andrew Pinski 
> ; Richard Sandiford ; 
> i...@airs.com; gcc-patches@gcc.gnu.org; Joseph S. Myers 
> 
> Subject: RE: [Patch][GCC][middle-end] - Generate FRINTZ for 
> (double)(int) under -ffast-math on aarch64
> 
> On Thu, 19 Aug 2021, Jirui Wu wrote:
> 
> > Hi all,
> >
> > This patch generates FRINTZ instruction to optimize type casts.
> >
> > The changes in this patch covers:
> > * Generate FRINTZ for (double)(int) casts.
> > * Add new test cases.
> >
> > The intermediate type is not checked according to the C99 spec.
> > Overflow of the integral part when casting floats to integers causes
> undefined behavior.
> > As a result, optimization to trunc() is not invalid.
> > I've confirmed that Boolean type does not match the matching condition.
> >
> > Regtested on aarch64-none-linux-gnu and no issues.
> >
> > Ok for master? If OK can it be committed for me, I have no commit rights.
> 
> +/* Detected a fix_trunc cast inside a float type cast,
> +   use IFN_TRUNC to optimize.  */
> +#if GIMPLE
> +(simplify
> +  (float (fix_trunc @0))
> +  (if (direct_internal_fn_supported_p (IFN_TRUNC, type,
> +  OPTIMIZE_FOR_BOTH)
> +   && flag_unsafe_math_optimizations
> +   && type == TREE_TYPE (@0))
> 
> types_match (type, TREE_TYPE (@0))
> 
> please.  Please perform cheap tests first (the flag test).
> 
> + (IFN_TRUNC @0)))
> +#endif
> 
> why only for GIMPLE?  I'm not sure flag_unsafe_math_optimizations is a 
> good test here.  If you say we can use undefined behavior of any 
> overflow of the fix_trunc operation what do we guard here?
> If it's Inf/NaN input then flag_finite_math_only would be more 
> appropriate, if it's behavior for -0. (I suppose trunc (-0.0) == -0.0 
> and thus "wrong") then a && !HONOR_SIGNED_ZEROS (type) is missing 
> instead.  If it's setting of FENV state and possibly trapping on 
> overflow (but it's undefined?!) then flag_trapping_math covers the 
> latter but we don't have any flag for eliding FENV state affecting 
> transforms, so there the kitchen-sink flag_unsafe_math_optimizations might 
> apply.
> 
> So - which is it?
> 
This change is only for GIMPLE because we can't test for the optab support 
without being in GIMPLE. direct_internal_fn_supported_p is defined only for 
GIMPLE. 

IFN_TRUNC's documentation mentions nothing for zero, NaNs/inf inputs.
So I think the correct guard is just flag_fp_int_builtin_inexact.
!flag_trapping_math because the operation can only still raise inexacts.

The new pattern is moved next to the place you mentioned.

Ok for master? If OK can it be committed for me, I have no commit rights.

Thanks,
Jirui
> Note there's also the pattern
> 
> /* Handle cases of two conversions in a row.  */ (for ocvt (convert 
> float
> fix_trunc)  (for icvt (convert float)
>   (simplify
>(ocvt (icvt@1 @0))
>(with
> {
> ...
> 
> which is related so please put the new pattern next to that (the set 
> of conversions handled there does not include (float (fix_trunc @0)))
> 
> Thanks,
> Richard.
> 
> > Thanks,
> > Jirui
> >
> > gcc/ChangeLog:
> >
> > * match.pd: Generate IFN_TRUNC.
> >
> > gcc/testsuite/ChangeLog:
> >
> > * gcc.target/aarch64/merge_trunc1.c: New test.
> >
> > > -Original Message-
> > > From: Richard Biener 
> > > Sent: Tuesday, August 17, 2021 9:13 AM
> > > To: Andrew Pinski 
> > > Cc: Jirui Wu ; Richard Sandiford 
> > > ; i...@airs.com; 
> > > gcc-patches@gcc.gnu.org; rguent...@suse.de
> > > Subject: Re: [Patch][GCC][middle-end] - Generate FRINTZ for
> > > (double)(int) under -ffast-math on aarch64
> > >
> > > On Mon, Aug 16, 2021 at 8:48 PM Andrew Pinski via Gcc-patches 
> > >  wrote:
> > > >
> > > > On Mon, Aug 16, 2021 at 9:15 AM Jirui Wu via Gcc-patches 
> > > >  wrote:
> > > > >
> > > > > Hi all,
> > > > >
> > > > > This patch generates FRINTZ instruction to optimize type casts.
> > > > >
> > > > > The changes in this patch covers:
> > > > > * Opimization of a FIX_TRUNC_EXPR cast inside a FLOAT_EXPR 
> > > > > using
> > > IFN_TRUNC.
> > > > > * Change of corresponding test cases.
> > > > >
> > > > > Regtested on aarch64-none-linux-gnu and no issues.
> > > > >
> > > > > Ok for master? If OK can it be committed for me, I have no 
> > > > > commit
> rights.
> > > >
> > > > Is there a reason why you are doing the transformation manually 
> > > > inside forwprop rather than handling it inside match.pd?
> > > > Also can't this only be done for -ffast-math case?
> > >
> > > You definitely have to look at the intermediate type - 

Re: [PATCH 2/5]AArch64 sve: combine nested if predicates

2021-09-03 Thread Richard Sandiford via Gcc-patches
Tamar Christina  writes:
> Hi All,
>
> The following example
>
> void f5(float * restrict z0, float * restrict z1, float *restrict x,
>   float * restrict y, float c, int n)
> {
> for (int i = 0; i < n; i++) {
> float a = x[i];
> float b = y[i];
> if (a > b) {
> z0[i] = a + b;
> if (a > c) {
> z1[i] = a - b;
> }
> }
> }
> }
>
> generates currently:
>
> ptrue   p3.b, all
> ld1wz1.s, p1/z, [x2, x5, lsl 2]
> ld1wz2.s, p1/z, [x3, x5, lsl 2]
> fcmgt   p0.s, p3/z, z1.s, z0.s
> fcmgt   p2.s, p1/z, z1.s, z2.s
> fcmgt   p0.s, p0/z, z1.s, z2.s
>
> The conditions for a > b and a > c become separate comparisons.
>
> After this patch using a 2 -> 2 split we generate:
>
> ld1wz1.s, p0/z, [x2, x5, lsl 2]
> ld1wz2.s, p0/z, [x3, x5, lsl 2]
> fcmgt   p1.s, p0/z, z1.s, z2.s
> fcmgt   p1.s, p1/z, z1.s, z0.s
>
> Where the condition a > b && a > c are folded by using the predicate result of
> the previous compare and thus allows the removal of one of the compares.
>
> Note: This patch series is working incrementally towards generating the most
>   efficient code for this and other loops in small steps.

It looks like this could be done in the vectoriser via an extension
of the scalar_cond_masked_set mechanism.  We have:

  mask__54.13_59 = vect_a_15.9_55 > vect_b_17.12_58;
  vec_mask_and_60 = loop_mask_32 & mask__54.13_59;
  …
  mask__30.17_67 = vect_a_15.9_55 > vect_cst__66;
  mask__29.18_68 = mask__54.13_59 & mask__30.17_67;
  vec_mask_and_69 = loop_mask_32 & mask__29.18_68;

When vectorising mask__29.18_68, we could test whether each side
of the "&" is already in scalar_cond_masked_set and AND in the loop
mask if so, like we do in vectorizable_condition.  We could then
separately record that the & result includes the loop mask.

Thanks,
Richard

> Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
>
> Ok for master?
>
> Thanks,
> Tamar
>
> gcc/ChangeLog:
>
>   * config/aarch64/aarch64-sve.md (*mask_cmp_and_combine): New.
>
> gcc/testsuite/ChangeLog:
>
>   * gcc.target/aarch64/sve/pred-combine-and.c: New test.
>
> --- inline copy of patch -- 
> diff --git a/gcc/config/aarch64/aarch64-sve.md 
> b/gcc/config/aarch64/aarch64-sve.md
> index 
> 2c23c6b12bafb038d82920e7141a418e078a2c65..ee9d32c0a5534209689d9d3abaa560ee5b66347d
>  100644
> --- a/gcc/config/aarch64/aarch64-sve.md
> +++ b/gcc/config/aarch64/aarch64-sve.md
> @@ -8162,6 +8162,48 @@ (define_insn_and_split "*mask_inv_combine"
>  }
>  )
>  
> +;; Combine multiple masks where the comparisons operators are the same and
> +;; each comparison has one parameter shared. e.g. combine a > b && a > c
> +(define_insn_and_split "*mask_cmp_and_combine"
> +  [(set (match_operand: 0 "register_operand" "=Upa")
> + (and:
> +   (and:
> + (unspec:
> +   [(match_operand: 1)
> +(const_int SVE_KNOWN_PTRUE)
> +(match_operand:SVE_FULL_F 2 "register_operand" "w")
> +(match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "wDz")]
> +   SVE_COND_FP_CMP_I0)
> + (unspec:
> +   [(match_dup 1)
> +(const_int SVE_KNOWN_PTRUE)
> +(match_dup 2)
> +(match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "wDz")]
> +   SVE_COND_FP_CMP_I0))
> + (match_operand: 5 "register_operand" "Upa")))
> +   (clobber (match_scratch: 6 "="))]
> +  "TARGET_SVE"
> +  "#"
> +  "&& 1"
> +  [(set (match_dup 6)
> + (unspec:
> +   [(match_dup 5)
> +(const_int SVE_MAYBE_NOT_PTRUE)
> +(match_dup 2)
> +(match_dup 3)]
> +   SVE_COND_FP_CMP_I0))
> +   (set (match_dup 0)
> + (unspec:
> +   [(match_dup 6)
> +(const_int SVE_MAYBE_NOT_PTRUE)
> +(match_dup 2)
> +(match_dup 4)]
> +   SVE_COND_FP_CMP_I0))]
> +{
> +  operands[6] = gen_reg_rtx (mode);
> +}
> +)
> +
>  ;; -
>  ;;  [FP] Absolute comparisons
>  ;; -
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pred-combine-and.c 
> b/gcc/testsuite/gcc.target/aarch64/sve/pred-combine-and.c
> new file mode 100644
> index 
> ..d395b7f84bb15b588493611df5a47549726ac24a
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/pred-combine-and.c
> @@ -0,0 +1,18 @@
> +/* { dg-do assemble { target aarch64_asm_sve_ok } } */
> +/* { dg-options "-O3 --save-temps" } */
> +
> +void f5(float * restrict z0, float * restrict z1, float *restrict x, float * 
> restrict y, float c, int n)
> +{
> +for (int i = 0; i < n; i++) {
> +float a = x[i];
> +float b = y[i];
> +if (a > b) {
> +z0[i] = a + b;
> +if (a > c) {
> +z1[i] = a - b;
> +   

Re: [ARM] PR66791: Replace builtins for vdup_n and vmov_n intrinsics

2021-09-03 Thread Christophe LYON via Gcc-patches



On 03/09/2021 10:35, Prathamesh Kulkarni via Gcc-patches wrote:

On Thu, 2 Sept 2021 at 14:32, Christophe Lyon
 wrote:



On Tue, Aug 24, 2021 at 10:17 AM Kyrylo Tkachov  wrote:




-Original Message-
From: Prathamesh Kulkarni 
Sent: 24 August 2021 09:01
To: Christophe Lyon 
Cc: Kyrylo Tkachov ; gcc Patches 
Subject: Re: [ARM] PR66791: Replace builtins for vdup_n and vmov_n
intrinsics

On Tue, 17 Aug 2021 at 11:55, Prathamesh Kulkarni
 wrote:

On Thu, 12 Aug 2021 at 19:04, Christophe Lyon
 wrote:



On Thu, Aug 12, 2021 at 1:54 PM Prathamesh Kulkarni

 wrote:

On Wed, 11 Aug 2021 at 22:23, Christophe Lyon
 wrote:



On Thu, Jun 24, 2021 at 6:29 PM Kyrylo Tkachov via Gcc-patches 
patc...@gcc.gnu.org> wrote:




-Original Message-
From: Prathamesh Kulkarni 
Sent: 24 June 2021 12:11
To: gcc Patches ; Kyrylo Tkachov

Subject: [ARM] PR66791: Replace builtins for vdup_n and vmov_n

intrinsics

Hi,
This patch replaces builtins for vdup_n and vmov_n.
The patch results in regression for pr51534.c.
Consider following function:

uint8x8_t f1 (uint8x8_t a) {
   return vcgt_u8(a, vdup_n_u8(0));
}

code-gen before patch:
f1:
 vmov.i32  d16, #0  @ v8qi
 vcgt.u8 d0, d0, d16
 bx lr

code-gen after patch:
f1:
 vceq.i8 d0, d0, #0
 vmvnd0, d0
 bx lr

I am not sure which one is better tho ?

Hi Prathamesh,

This patch introduces a regression on non-hardfp configs (eg arm-

linux-gnueabi or arm-eabi):

FAIL:  gcc:gcc.target/arm/arm.exp=gcc.target/arm/pr51534.c scan-

assembler-times vmov.i32[ \t]+[dD][0-9]+, #0x 3

FAIL:  gcc:gcc.target/arm/arm.exp=gcc.target/arm/pr51534.c scan-

assembler-times vmov.i32[ \t]+[qQ][0-9]+, #4294967295 3

Can you fix this?

The issue is, for following test:

#include 

uint8x8_t f1 (uint8x8_t a) {
   return vcge_u8(a, vdup_n_u8(0));
}

armhf code-gen:
f1:
 vmov.i32  d0, #0x  @ v8qi
 bxlr

arm softfp code-gen:
f1:
 mov r0, #-1
 mov r1, #-1
 bx  lr

The code-gen for both is same upto split2 pass:

(insn 10 6 11 2 (set (reg/i:V8QI 16 s0)
 (const_vector:V8QI [
 (const_int -1 [0x]) repeated x8
 ])) "foo.c":5:1 1052 {*neon_movv8qi}
  (expr_list:REG_EQUAL (const_vector:V8QI [
 (const_int -1 [0x]) repeated x8
 ])
 (nil)))
(insn 11 10 13 2 (use (reg/i:V8QI 16 s0)) "foo.c":5:1 -1
  (nil))

and for softfp target, split2 pass splits the assignment to r0 and r1:

(insn 15 6 16 2 (set (reg:SI 0 r0)
 (const_int -1 [0x])) "foo.c":5:1 740

{*thumb2_movsi_vfp}

  (nil))
(insn 16 15 11 2 (set (reg:SI 1 r1 [+4 ])
 (const_int -1 [0x])) "foo.c":5:1 740

{*thumb2_movsi_vfp}

  (nil))
(insn 11 16 13 2 (use (reg/i:V8QI 0 r0)) "foo.c":5:1 -1
  (nil))

I suppose we could use a dg-scan for r[0-9]+, #-1 for softfp targets ?


Yes, probably, or try with check-function-bodies.

Hi,
Sorry for the late response. Does the attached patch look OK ?

ping https://gcc.gnu.org/pipermail/gcc-patches/2021-August/577532.html

Ok.



Sorry Prathamesh, this does not quite work. See  
https://people.linaro.org/~christophe.lyon/cross-validation/gcc/trunk/r12-3294-g7a6f40d0452ec76e126c2612dcfa32f3c73e2315/report-build-info.html
(red cells in the gcc column)

Can you have a look?

Ah, for arm-none-eabi target the code-gen for test_vcge_u8_uint8x8_t is:
 mvn r0, #0
 mvn r1, #0
 bx  lr

instead of:
 mov r0, #-1
 mov r1, #-1
 bx  lr

I guess both code-gen sequences are correct, but I am not sure under
which circumstance either sequence gets generated.
To accomodate for both, I tried to check for either pattern with OR:
/* { dg-final { scan-assembler-times "(mov\[\]+r\[0-9\]+,
#-1)|(mvn\[\]+r\[0-9\]+, #0)" 6 { target { arm_softfp_ok } } } }
*/
but that doesn't seem to work.

Do you have any suggestions on how to proceed ?


Sorry I have no magic syntax for this :-)

Maybe it's also worth checking why we have this codegen difference?


Thanks,


Christophe





Thanks,
Prathamesh

Thanks

Christophe


Thanks,
Kyrill


Thanks,
Prathamesh

Thanks,
Prathamesh

  Christophe


Thanks,
Prathamesh

Thanks

Christophe



I think they're equivalent in practice, in any case the patch itself is

good (move away from RTL builtins).

Ok.
Thanks,
Kyrill


Also, this patch regressed bf16_dup.c on arm-linux-gnueabi,
which is due to a missed opt in lowering. I had filed it as
PR98435, and posted a fix for it here:
https://gcc.gnu.org/pipermail/gcc-patches/2021-

June/572648.html

Thanks,
Prathamesh


Re: [PATCH] libstdc++: Fix compare_three_way for constexpr and Clang

2021-09-03 Thread Paul Keir via Gcc-patches
*ping*


From: Paul Keir 
Sent: 20 August 2021 21:17
To: gcc-patches@gcc.gnu.org
Cc: libstd...@gcc.gnu.org
Subject: [PATCH] libstdc++: Fix compare_three_way for constexpr and Clang

Hi,

The current compare_three_way implementation makes provision for constant 
evaluation contexts (avoiding reinterpret_cast etc.), but the approach fails 
with Clang; when it compares two const volatile void pointers: "comparison 
between unequal pointers to void has unspecified result". I include a fix and 
test.

Could someone commit the attached patch for me?

Thanks,
Paul


Please consider the environment and think before you print.

The University of the West of Scotland is a registered Scottish charity. 
Charity number SC002520.

This e-mail and any attachment is for authorised use by the intended 
recipient(s) only. It may contain proprietary material, confidential 
information and/or be subject to legal privilege. It should not be copied, 
disclosed to, retained or used by, any other party. If you are not an intended 
recipient then please promptly delete this e-mail and any attachment and all 
copies and inform the sender.

Please note that any views or opinions presented in this email are solely those 
of the author and do not necessarily represent those of the University of the 
West of Scotland.

As a public body, the University of the West of Scotland may be required to 
make available emails as well as other written forms of information as a result 
of a request made under the Freedom of Information (Scotland) Act 2002.


Re: [PATCH 1/2]middle-end Teach CSE to be able to do vector extracts.

2021-09-03 Thread Richard Sandiford via Gcc-patches
Tamar Christina via Gcc-patches  writes:
> diff --git a/gcc/cse.c b/gcc/cse.c
> index 
> 330c1e90ce05b8f95b58f24576ec93e10ec55d89..d76e01b6478e22e9dd5760b7c78cecb536d7daef
>  100644
> --- a/gcc/cse.c
> +++ b/gcc/cse.c
> @@ -44,6 +44,7 @@ along with GCC; see the file COPYING3.  If not see
>  #include "regs.h"
>  #include "function-abi.h"
>  #include "rtlanal.h"
> +#include "expr.h"
>  
>  /* The basic idea of common subexpression elimination is to go
> through the code, keeping a record of expressions that would
> @@ -4274,6 +4275,25 @@ find_sets_in_insn (rtx_insn *insn, struct set **psets)
>someplace else, so it isn't worth cse'ing.  */
>else if (GET_CODE (SET_SRC (x)) == CALL)
>   ;
> +  else if (GET_CODE (SET_SRC (x)) == CONST_VECTOR
> +&& GET_MODE_CLASS (GET_MODE (SET_SRC (x))) != MODE_VECTOR_BOOL)
> + {
> +   /* First register the vector itself.  */
> +   sets[n_sets++].rtl = x;
> +   rtx src = SET_SRC (x);
> +   machine_mode elem_mode = GET_MODE_INNER (GET_MODE (src));
> +   /* Go over the constants of the CONST_VECTOR in forward order, to
> +  put them in the same order in the SETS array.  */
> +   for (unsigned i = 0; i < const_vector_encoded_nelts (src) ; i++)
> + {
> +   /* These are templates and don't actually get emitted but are
> +  used to tell CSE how to get to a particular constant.  */
> +   rtx tmp = gen_rtx_PARALLEL (VOIDmode,
> +   gen_rtvec (1, GEN_INT (i)));
> +   rtx y = gen_rtx_VEC_SELECT (elem_mode, SET_DEST (x), tmp);
> +   sets[n_sets++].rtl = gen_rtx_SET (y, CONST_VECTOR_ELT (src, i));
> + }
> + }

As mentioned in the 2/2 thread, I think we should use subregs for
the case where they're canonical.  It'd probably be worth adding a
simplify-rtx.c helper to extract one element from a vector, e.g.:

  rtx simplify_gen_vec_select (rtx op, unsigned int index);

so that this is easier to do.

Does making the loop above per-element mean that, for 128-bit Advanced
SIMD, the optimisation “only” kicks in for 64-bit element sizes?
Perhaps for other element sizes we could do “top” and “bottom” halves.
(There's obviously no need to do that as part of this work, was just
wondering.)

>else
>   sets[n_sets++].rtl = x;
>  }
> @@ -4513,7 +4533,14 @@ cse_insn (rtx_insn *insn)
>struct set *sets = (struct set *) 0;
>  
>if (GET_CODE (x) == SET)
> -sets = XALLOCA (struct set);
> +{
> +  /* For CONST_VECTOR we wants to be able to CSE the vector itself along 
> with
> +  elements inside the vector if the target says it's cheap.  */
> +  if (GET_CODE (SET_SRC (x)) == CONST_VECTOR)
> + sets = XALLOCAVEC (struct set, const_vector_encoded_nelts (SET_SRC (x)) 
> + 1);
> +  else
> + sets = XALLOCA (struct set);
> +}
>else if (GET_CODE (x) == PARALLEL)
>  sets = XALLOCAVEC (struct set, XVECLEN (x, 0));

I think this would be easier if “sets” was first converted to an
auto_vec, say auto_vec.  We then wouldn't need to
predict in advance how many elements are needed.

> @@ -4997,6 +5024,26 @@ cse_insn (rtx_insn *insn)
> src_related_is_const_anchor = src_related != NULL_RTX;
>   }
>  
> +  /* Try to re-materialize a vec_dup with an existing constant.   */
> +  if (GET_CODE (src) == CONST_VECTOR
> +   && const_vector_encoded_nelts (src) == 1)
> + {
> +rtx const_rtx = CONST_VECTOR_ELT (src, 0);

Would be simpler as:

  rtx src_elt;
  if (const_vec_duplicate_p (src, _elt))

I think we should also check !src_eqv_here, or perhaps:

  (!src_eqv_here || CONSTANT_P (src_eqv_here))

so that we don't override any existing reg notes, which could have more
chance of succeeding.

> +machine_mode const_mode = GET_MODE_INNER (GET_MODE (src));
> +struct table_elt *related_elt
> + = lookup (const_rtx, HASH (const_rtx, const_mode), const_mode);
> +if (related_elt)
> + {
> +   for (related_elt = related_elt->first_same_value;
> +related_elt; related_elt = related_elt->next_same_value)
> + if (REG_P (related_elt->exp))
> +   {
> + src_eqv_here
> + = gen_rtx_VEC_DUPLICATE (GET_MODE (src),
> +  related_elt->exp);
> +   }

Other similar loops seem to break after the first match, instead of
picking the last match.

Thanks,
Richard

> + }
> + }
>  
>if (src == src_folded)
>   src_folded = 0;


Re: [PATCH] Fix some GC issues in the aarch64 back-end.

2021-09-03 Thread Richard Sandiford via Gcc-patches
apinski--- via Gcc-patches  writes:
> From: Andrew Pinski 
>
> I got some ICEs in my latest testsing while running the libstdc++ testsuite.
> I had noticed the problem was connected to types and had just touched the
> builtins code but nothing which could have caused this and I looked for
> some types/variables that were not being marked with GTY.
>
> OK? Bootstrapped and tested on aarch64-linux-gnu with no regressions.

OK.  I'm a bit surprised this makes a difference, since I'd have
expected the builtin function definitions to keep the types live.

I agree the change is clearly correct though.  We shouldn't be relying
on indirect marking.

Thanks,
Richard

>
> gcc/ChangeLog:
>
>   * config/aarch64/aarch64-builtins.c (struct aarch64_simd_type_info):
>   Mark with GTY.
>   (aarch64_simd_types): Likewise.
>   (aarch64_simd_intOI_type_node): Likewise.
>   (aarch64_simd_intCI_type_node): Likewise.
>   (aarch64_simd_intXI_type_node): Likewise.
>   * config/aarch64/aarch64.h (aarch64_fp16_type_node): Likewise.
>   (aarch64_fp16_ptr_type_node): Likewise.
>   (aarch64_bf16_type_node): Likewise.
>   (aarch64_bf16_ptr_type_node): Likewise.
> ---
>  gcc/config/aarch64/aarch64-builtins.c | 10 +-
>  gcc/config/aarch64/aarch64.h  |  8 
>  2 files changed, 9 insertions(+), 9 deletions(-)
>
> diff --git a/gcc/config/aarch64/aarch64-builtins.c 
> b/gcc/config/aarch64/aarch64-builtins.c
> index d441437..9f37a71 100644
> --- a/gcc/config/aarch64/aarch64-builtins.c
> +++ b/gcc/config/aarch64/aarch64-builtins.c
> @@ -594,7 +594,7 @@ enum aarch64_simd_type
>  };
>  #undef ENTRY
>  
> -struct aarch64_simd_type_info
> +struct GTY(()) aarch64_simd_type_info
>  {
>enum aarch64_simd_type type;
>  
> @@ -626,14 +626,14 @@ struct aarch64_simd_type_info
>  
>  #define ENTRY(E, M, Q, G)  \
>{E, "__" #E, #G "__" #E, NULL_TREE, NULL_TREE, E_##M##mode, qualifier_##Q},
> -static struct aarch64_simd_type_info aarch64_simd_types [] = {
> +static GTY(()) struct aarch64_simd_type_info aarch64_simd_types [] = {
>  #include "aarch64-simd-builtin-types.def"
>  };
>  #undef ENTRY
>  
> -static tree aarch64_simd_intOI_type_node = NULL_TREE;
> -static tree aarch64_simd_intCI_type_node = NULL_TREE;
> -static tree aarch64_simd_intXI_type_node = NULL_TREE;
> +static GTY(()) tree aarch64_simd_intOI_type_node = NULL_TREE;
> +static GTY(()) tree aarch64_simd_intCI_type_node = NULL_TREE;
> +static GTY(()) tree aarch64_simd_intXI_type_node = NULL_TREE;
>  
>  /* The user-visible __fp16 type, and a pointer to that type.  Used
> across the back-end.  */
> diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
> index bfffbcd..a5ba6c2 100644
> --- a/gcc/config/aarch64/aarch64.h
> +++ b/gcc/config/aarch64/aarch64.h
> @@ -1262,13 +1262,13 @@ extern const char *host_detect_local_cpu (int argc, 
> const char **argv);
>  
>  /* This type is the user-visible __fp16, and a pointer to that type.  We
> need it in many places in the backend.  Defined in aarch64-builtins.c.  */
> -extern tree aarch64_fp16_type_node;
> -extern tree aarch64_fp16_ptr_type_node;
> +extern GTY(()) tree aarch64_fp16_type_node;
> +extern GTY(()) tree aarch64_fp16_ptr_type_node;
>  
>  /* This type is the user-visible __bf16, and a pointer to that type.  Defined
> in aarch64-builtins.c.  */
> -extern tree aarch64_bf16_type_node;
> -extern tree aarch64_bf16_ptr_type_node;
> +extern GTY(()) tree aarch64_bf16_type_node;
> +extern GTY(()) tree aarch64_bf16_ptr_type_node;
>  
>  /* The generic unwind code in libgcc does not initialize the frame pointer.
> So in order to unwind a function using a frame pointer, the very first


Re: [PATCH] [aarch64] Fix target/95969: __builtin_aarch64_im_lane_boundsi interferes with gimple

2021-09-03 Thread Richard Sandiford via Gcc-patches
apinski--- via Gcc-patches  writes:
> From: Andrew Pinski 
>
> This patch adds simple folding of __builtin_aarch64_im_lane_boundsi where
> we are not going to error out. It fixes the problem by the removal
> of the function from the IR.
>
> OK? Bootstrapped and tested on aarch64-linux-gnu with no regressions.
>
> gcc/ChangeLog:
>
>   * config/aarch64/aarch64-builtins.c (aarch64_fold_builtin_lane_check):
>   New function.
>   (aarch64_general_fold_builtin): Handle AARCH64_SIMD_BUILTIN_LANE_CHECK.
>   (aarch64_general_gimple_fold_builtin): Likewise.
> ---
>  gcc/config/aarch64/aarch64-builtins.c | 35 +++
>  1 file changed, 35 insertions(+)
>
> diff --git a/gcc/config/aarch64/aarch64-builtins.c 
> b/gcc/config/aarch64/aarch64-builtins.c
> index f6b41d9c200..d4414373aa4 100644
> --- a/gcc/config/aarch64/aarch64-builtins.c
> +++ b/gcc/config/aarch64/aarch64-builtins.c
> @@ -29,6 +29,7 @@
>  #include "rtl.h"
>  #include "tree.h"
>  #include "gimple.h"
> +#include "ssa.h"
>  #include "memmodel.h"
>  #include "tm_p.h"
>  #include "expmed.h"
> @@ -2333,6 +2334,27 @@ aarch64_general_builtin_rsqrt (unsigned int fn)
>return NULL_TREE;
>  }
>  
> +/* Return true if the lane check can be removed as there is no
> +   error going to be emitted.  */
> +static bool
> +aarch64_fold_builtin_lane_check (tree arg0, tree arg1, tree arg2)
> +{
> +  if (TREE_CODE (arg0) != INTEGER_CST)
> +return false;
> +  if (TREE_CODE (arg1) != INTEGER_CST)
> +return false;
> +  if (TREE_CODE (arg2) != INTEGER_CST)
> +return false;
> +
> +  auto totalsize = wi::to_widest (arg0);
> +  auto elementsize = wi::to_widest (arg1);
> +  if (totalsize == 0 || elementsize == 0)
> +return false;
> +  auto lane = wi::to_widest (arg2);
> +  auto high = wi::udiv_trunc (totalsize, elementsize);
> +  return wi::ltu_p (lane, high);
> +}
> +
>  #undef VAR1
>  #define VAR1(T, N, MAP, FLAG, A) \
>case AARCH64_SIMD_BUILTIN_##T##_##N##A:
> @@ -2353,6 +2375,11 @@ aarch64_general_fold_builtin (unsigned int fcode, tree 
> type,
>VAR1 (UNOP, floatv4si, 2, ALL, v4sf)
>VAR1 (UNOP, floatv2di, 2, ALL, v2df)
>   return fold_build1 (FLOAT_EXPR, type, args[0]);
> +  case AARCH64_SIMD_BUILTIN_LANE_CHECK:
> + if (n_args == 3

Do we need this check?  If it's safe to rely on frontend testing
for aarch64_general_gimple_fold_builtin then hopefully it should
be here too.  I think an assert would be better.

(FTR, we have extra checking for SVE because the overloaded functions
don't have definite prototypes.)

> + && aarch64_fold_builtin_lane_check (args[0], args[1], args[2]))
> +   return fold_convert (void_type_node, integer_zero_node);

Could this just be void_node instead?  VOID_CST is the tree constant
code for void_type_node.

It would be good to add the testcase, as a -fdump-tree-optimized test
that checks for a single instance of { = \*ptr}.

Thanks,
Richard


[c-family] Improve compatibility of -fdump-ada-spec with warnings

2021-09-03 Thread Eric Botcazou
This makes sure that the style and warning settings used in the C/C++ bindings 
generated by -fdump-ada-spec do not leak into the units that use them.

Tested on x86-64/Linux, applied on the mainline.


2021-09-03  Eric Botcazou  

c-family/
* c-ada-spec.c (dump_ads): Generate pragmas to disable style checks
and -gnatwu warning for the package specification.

-- 
Eric Botcazoudiff --git a/gcc/c-family/c-ada-spec.c b/gcc/c-family/c-ada-spec.c
index 827bcc76830..b197d551c43 100644
--- a/gcc/c-family/c-ada-spec.c
+++ b/gcc/c-family/c-ada-spec.c
@@ -3440,11 +3440,12 @@ dump_ads (const char *source_file,
   dump_ada_nodes (, source_file);
 
   /* We require Ada 2012 syntax, so generate corresponding pragma.  */
-  fputs ("pragma Ada_2012;\n", f);
+  fputs ("pragma Ada_2012;\n\n", f);
 
   /* Disable style checks and warnings on unused entities since this file
 	 is auto-generated and always has a with clause for Interfaces.C.  */
-  fputs ("pragma Style_Checks (Off);\npragma Warnings (\"U\");\n\n", f);
+  fputs ("pragma Style_Checks (Off);\n", f);
+  fputs ("pragma Warnings (Off, \"-gnatwu\");\n\n", f);
 
   /* Dump withs.  */
   dump_ada_withs (f);
@@ -3452,7 +3453,10 @@ dump_ads (const char *source_file,
   fprintf (f, "\npackage %s is\n\n", pkg_name);
   pp_write_text_to_stream ();
   /* ??? need to free pp */
-  fprintf (f, "end %s;\n", pkg_name);
+  fprintf (f, "end %s;\n\n", pkg_name);
+
+  fputs ("pragma Style_Checks (On);\n", f);
+  fputs ("pragma Warnings (On, \"-gnatwu\");\n", f);
   fclose (f);
 }
 


Re: [PATCH v3, Fortran] TS 29113 testsuite

2021-09-03 Thread Tobias Burnus

Hi,

On 03.09.21 09:46, Christophe Lyon wrote:

I'm not quite sure I understand the expected status of this patch: are
all the "expected" failures tagged as XFAIL?

I think that's the idea.

If yes, then there's a problem because I see lots of unresolved on
aarch64/arm

XFAIL: gfortran.dg/c-interop/cf-descriptor-5.f90   -O0  pr92482 (test
for bogus messages, line 10)
PASS: gfortran.dg/c-interop/cf-descriptor-5.f90   -O0  (test for
excess errors)
UNRESOLVED: gfortran.dg/c-interop/cf-descriptor-5.f90   -O0
 compilation failed to produce executable


I think that's the usual result for 'dg-do run' tests if the compilation
failed (be it due to errors or due to effective-target-*).

* * *

If I read https://gcc.gnu.org/onlinedocs/gcc/Floating-Types.html
correctly, we should use _Float128 for the following errors

"The _Float128 type is supported on all systems where __float128 is
supported or where long double has the IEEE binary128 format."

Applies to:


For
typecodes-array-float128.f90
FAIL: gfortran.dg/c-interop/typecodes-array-float128.f90   -O0  (test
for excess errors)
Excess errors:
/gcc/testsuite/gfortran.dg/c-interop/typecodes-array-float128-c.c:35:32:
error: '__float128' undeclared (first use in this function); did you
mean '_Float128'?
typecodes-sanity.f90
FAIL: gfortran.dg/c-interop/typecodes-sanity.f90   -O0  (test for
excess errors)
Excess errors:
/gcc/testsuite/gfortran.dg/c-interop/typecodes-sanity-c.c:41:13:
error: '__float128' undeclared here (not in a function); did you mean
'_Float128'?
typecodes-scalar-float128.f90
FAIL: gfortran.dg/c-interop/typecodes-scalar-float128.f90   -O0  (test
for excess errors)
Excess errors:
/gcc/testsuite/gfortran.dg/c-interop/typecodes-scalar-float128-c.c:34:32:
error: '__float128' undeclared (first use in this function); did you
mean '_Float128'?

* * *

PR100914.f90
FAIL: gfortran.dg/PR100914.f90   -O0  (test for excess errors)
Excess errors:
/gcc/testsuite/gfortran.dg/PR100914.c:8:10: fatal error: quadmath.h:
No such file or directory


 * * *

On a normal x86-64 system, I get XPASS for:

gfortran.dg/PR100914.f90

gfortran.dg/c-interop/typecodes-array-float128.f90

The ! { dg-do run { xfail { { x86_64*-*-* i?86*-*-* } && longdouble128 } } }
does not help as it just checks whether 'sizeof(long double)==16',
which seemingly passes also on x86-64 with 80bit long double.

Tobias

-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955


[i386] Fix PR C++/64697

2021-09-03 Thread Eric Botcazou
Hi,

this old PR is about a link failure on x86-64/Cygwin with thread-local
storage variables:

use.o:use.cxx:(.text$_ZTWN1N3ptdE[_ZTWN1N3ptdE]+0x15): relocation
truncated to fit: R_X86_64_PC32 against undefined symbol `TLS init
function for N::ptd'
collect2: error: ld returned 1 exit status

At the time MinGW was not affected because the base address of executables was
within the lower 4GB.  This was changed in binutils 2.36 and the bug was then
reported for MinGW to binutils's Bugzilla:
  https://sourceware.org/bugzilla/show_bug.cgi?id=26659

The BFD fix eliminates the link failure and working code is generated at -O0,
but _not_ when optimization is enabled because the optimizer changes:

movq.refptr._ZTH1s(%rip), %rax
testq   %rax, %rax
je  .L2
call_ZTH1s

into:

leaq_ZTH1s(%rip), %rax
testq   %rax, %rax
je  .L2
call_ZTH1s

and the leaq now also gets the relocation overflow.  So the fix is to
teach legitimate_pic_address_disp_p to reject the transformation when
the symbol is an external weak function, which yields:

cmpq$0, .refptr._ZTH1s(%rip)
je  .L2
call_ZTH1s

and the cmpq keeps a relocation that does not overflow.

Tested on x86-64/Windows, OK for mainline, 11 and 10 branches?


2021-09-03  Eric Botcazou  

* config/i386/i386.c (legitimate_pic_address_disp_p): For PE-COFF, do
not return true for external weak function symbols in the medium model.

-- 
Eric Botcazoudiff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index bfefbd718bb..2f708a1cf1c 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -10664,24 +10664,19 @@ legitimate_pic_address_disp_p (rtx disp)
 	  if (is_imported_p (op0))
 		return true;
 
-	  if (SYMBOL_REF_FAR_ADDR_P (op0)
-		  || !SYMBOL_REF_LOCAL_P (op0))
+	  if (SYMBOL_REF_FAR_ADDR_P (op0) || !SYMBOL_REF_LOCAL_P (op0))
 		break;
 
-	  /* Function-symbols need to be resolved only for
-	 large-model.
-	 For the small-model we don't need to resolve anything
-	 here.  */
+	  /* Non-external-weak function symbols need to be resolved only
+		 for the large model.  Non-external symbols don't need to be
+		 resolved for large and medium models.  For the small model,
+		 we don't need to resolve anything here.  */
 	  if ((ix86_cmodel != CM_LARGE_PIC
-	   && SYMBOL_REF_FUNCTION_P (op0))
+		   && SYMBOL_REF_FUNCTION_P (op0)
+		   && !(SYMBOL_REF_EXTERNAL_P (op0) && SYMBOL_REF_WEAK (op0)))
+		  || !SYMBOL_REF_EXTERNAL_P (op0)
 		  || ix86_cmodel == CM_SMALL_PIC)
 		return true;
-	  /* Non-external symbols don't need to be resolved for
-	 large, and medium-model.  */
-	  if ((ix86_cmodel == CM_LARGE_PIC
-		   || ix86_cmodel == CM_MEDIUM_PIC)
-		  && !SYMBOL_REF_EXTERNAL_P (op0))
-		return true;
 	}
 	  else if (!SYMBOL_REF_FAR_ADDR_P (op0)
 		   && (SYMBOL_REF_LOCAL_P (op0)


Re: [PATCH] Fix target/102173 ICE after error recovery

2021-09-03 Thread Richard Sandiford via Gcc-patches
apinski--- via Gcc-patches  writes:
> From: Andrew Pinski 
>
> After the recent r12-3278-823685221de986a change, the testcase
> gcc.target/aarch64/sve/acle/general-c/type_redef_1.c started
> to ICE as the code was not ready for error_mark_node in the
> type.  This fixes that and the testcase now passes.

OK, thanks.

> gcc/ChangeLog:
>
>   * config/aarch64/aarch64-sve-builtins.cc (register_vector_type):
>   Handle error_mark_node as the type of the type_decl.
> ---
>  gcc/config/aarch64/aarch64-sve-builtins.cc | 1 +
>  1 file changed, 1 insertion(+)
>
> diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc 
> b/gcc/config/aarch64/aarch64-sve-builtins.cc
> index f71b287570e..bc92213665c 100644
> --- a/gcc/config/aarch64/aarch64-sve-builtins.cc
> +++ b/gcc/config/aarch64/aarch64-sve-builtins.cc
> @@ -3416,6 +3416,7 @@ register_vector_type (vector_type_index type)
>   installing an incorrect type.  */
>if (decl
>&& TREE_CODE (decl) == TYPE_DECL
> +  && TREE_TYPE (decl) != error_mark_node
>&& TYPE_MAIN_VARIANT (TREE_TYPE (decl)) == vectype)
>  vectype = TREE_TYPE (decl);
>acle_vector_types[0][type] = vectype;


Re: [PATCH] Explicitly add -msse2 to compile HF related libgcc source file.

2021-09-03 Thread Jakub Jelinek via Gcc-patches
On Fri, Sep 03, 2021 at 03:41:13PM +0800, liuhongt via Gcc-patches wrote:
> --- a/libgcc/config/i386/64/t-softfp
> +++ b/libgcc/config/i386/64/t-softfp
> @@ -1 +1,6 @@
>  softfp_extras := fixhfti fixunshfti floattihf floatuntihf
> +
> +CFLAGS-fixhfti.c += -msse2
> +CFLAGS-fixunshfti.c += -msse2
> +CFLAGS-floattihf.c += -msse2
> +CFLAGS-floatunstihf.c += -msse2
> \ No newline at end of file

Please avoid this.

> --- a/libgcc/config/i386/t-softfp
> +++ b/libgcc/config/i386/t-softfp
> @@ -1,6 +1,38 @@
>  LIB2ADD += $(srcdir)/config/i386/sfp-exceptions.c
>  
> +# Replace _divhc3 and _mulhc3.
> +libgcc2-hf-functions = _divhc3 _mulhc3
> +LIB2FUNCS_EXCLUDE += $(libgcc2-hf-functions)
> +libgcc2-hf-extras = $(addsuffix .c, $(libgcc2-hf-functions))
> +LIB2ADD_ST += $(addprefix $(srcdir)/config/i386/, $(libgcc2-hf-extras))
> +
>  softfp_extensions := hfsf hfdf hftf hfxf sfdf sftf dftf xftf
>  softfp_truncations := tfhf xfhf dfhf sfhf tfsf dfsf tfdf tfxf
>  
> -softfp_extras += eqhf2
> \ No newline at end of file
> +softfp_extras += eqhf2
> +
> +CFLAGS-extendhfsf2.c += -msse2
> +CFLAGS-extendhfdf2.c += -msse2
> +CFLAGS-extendhftf2.c += -msse2
> +CFLAGS-extendhfxf2.c += -msse2
> +
> +CFLAGS-extendsfdf2.c += -msse2
> +CFLAGS-extendsftf2.c += -msse2
> +
> +CFLAGS-extenddftf2.c += -msse2
> +CFLAGS-extendxftf2.c += -msse2

Aren't the [sdxt]f<->[sdxt]f extend/trunc conversions in libgcc for
quite a while?  If so, using -msse2 for those seems wrong, it is fine
if we require -msse2 support for anything that uses HF or HC mode
types, but if just trying to convert other types it should work even on
just i386 or i486.

> +
> +CFLAGS-truncsfhf2.c += -msse2
> +CFLAGS-truncdfhf2.c += -msse2
> +CFLAGS-truncxfhf2.c += -msse2
> +CFLAGS-trunctfhf2.c += -msse2
> +
> +CFLAGS-truncdfsf2.c += -msse2
> +CFLAGS-trunctfsf2.c += -msse2
> +
> +CFLAGS-trunctfdf2.c += -msse2
> +CFLAGS-trunctfxf2.c += -msse2
> +
> +CFLAGS-eqhf2.c += -msse2
> +CFLAGS-_divhc3.c += -msse2
> +CFLAGS-_mulhc3.c += -msse2
> \ No newline at end of file

See above.

Also, shouldn't the *hf* and *hc* APIs be exported from libgcc_s.so.1?

   254: 00011960   968 FUNCLOCAL  DEFAULT   13 __floattihf
   256: 000116f0   315 FUNCLOCAL  DEFAULT   13 __fixhfti
   263: f950  1358 FUNCLOCAL  DEFAULT   13 __truncsfhf2
   264: db10   272 FUNCLOCAL  DEFAULT   13 __extendhfsf2
   265: df70   399 FUNCLOCAL  DEFAULT   13 __extendhfxf2
   267: 00011fe0   469 FUNCLOCAL  DEFAULT   13 __eqhf2
   268: 00011d30   681 FUNCLOCAL  DEFAULT   13 __floatuntihf
   271: dc20   374 FUNCLOCAL  DEFAULT   13 __extendhfdf2
   274: 00011830   292 FUNCLOCAL  DEFAULT   13 __fixunshfti
   281: dda0   460 FUNCLOCAL  DEFAULT   13 __extendhftf2
   283: e860  1439 FUNCLOCAL  DEFAULT   13 __trunctfhf2
   285: 00011fe0   469 FUNCLOCAL  DEFAULT   13 __nehf2
   286: 6290  1627 FUNCLOCAL  DEFAULT   13 __divhc3
   290: ee00  1499 FUNCLOCAL  DEFAULT   13 __truncxfhf2
   292: f3e0  1392 FUNCLOCAL  DEFAULT   13 __truncdfhf2
   296: 5150  1931 FUNCLOCAL  DEFAULT   13 __mulhc3

So, don't we want GCC_12.0 with those symbols
in config/i386/libgcc-glibc.ver and perhaps others?

Jakub



Re: libgo patch committed: Update to Go1.17rc2 release

2021-09-03 Thread Matthias Klose
On 8/31/21 3:24 PM, H.J. Lu via Gcc-patches wrote:
> On Thu, Aug 12, 2021 at 8:24 PM Ian Lance Taylor via Gcc-patches
>  wrote:
>>
>> This patch updates libgo from the Go1.16.5 release to the Go 1.17rc2
>> release.  As usual with these version updates, the patch itself is too
>> large to attach to this e-mail message.  I've attached the changes to
>> files that are specific to gccgo.  Bootstraped and ran Go testsuite on
>> x86_64-pc-linux-gnu.  Committed to mainline.
>>
>> Ian
> 
> This breaks build with x32:

This is PR/102102

Also seen on x86_64-linux-gnu, when configuring with
--with-multilib-list=m32,m64,mx32

Matthias


[patch] Fix debug info for packed array types in Ada

2021-09-03 Thread Eric Botcazou
Hi,

packed array types are sometimes represented with integer types under the hood
in Ada, but we nevertheless need to emit them as array types in the debug info
so we have the types.get_array_descr_info langhook for this purpose; but it is
not invoked from modified_type_die, which is responsible for:

FAIL: gdb.ada/arrayptr.exp: scenario=minimal: print pa_ptr.all
FAIL: gdb.ada/arrayptr.exp: scenario=minimal: print pa_ptr.all(3)

in the GDB testsuite.

Tested on x86-64/Linux, both GCC and GDB, OK for the mainline?


2021-09-03  Eric Botcazou  

* dwarf2out.c (modified_type_die): Deal with all array types earlier.

-- 
Eric Botcazoudiff --git a/gcc/dwarf2out.c b/gcc/dwarf2out.c
index 07a479f6382..85fc24cc2b6 100644
--- a/gcc/dwarf2out.c
+++ b/gcc/dwarf2out.c
@@ -13542,6 +13542,7 @@ modified_type_die (tree type, int cv_quals, bool reverse,
   tree qualified_type;
   tree name, low, high;
   dw_die_ref mod_scope;
+  struct array_descr_info info;
   /* Only these cv-qualifiers are currently handled.  */
   const int cv_qual_mask = (TYPE_QUAL_CONST | TYPE_QUAL_VOLATILE
 			| TYPE_QUAL_RESTRICT | TYPE_QUAL_ATOMIC | 
@@ -13786,6 +13787,13 @@ modified_type_die (tree type, int cv_quals, bool reverse,
 	}
 	}
 }
+  else if (TREE_CODE (type) == ARRAY_TYPE
+	   || (lang_hooks.types.get_array_descr_info
+	   && lang_hooks.types.get_array_descr_info (type, )))
+{
+  gen_type_die (type, context_die);
+  return lookup_type_die (type);
+}
   else if (code == INTEGER_TYPE
 	   && TREE_TYPE (type) != NULL_TREE
 	   && subrange_type_for_debug_p (type, , ))
@@ -13836,13 +13844,12 @@ modified_type_die (tree type, int cv_quals, bool reverse,
 	  return lookup_type_die (t);
 	  return lookup_type_die (type);
 	}
-  else if (TREE_CODE (type) != VECTOR_TYPE
-	   && TREE_CODE (type) != ARRAY_TYPE)
-	return lookup_type_die (type_main_variant (type));
-  else
-	/* Vectors have the debugging information in the type,
-	   not the main variant.  */
+  /* Vectors have the debugging information in the type,
+	 not the main variant.  */
+  else if (TREE_CODE (type) == VECTOR_TYPE)
 	return lookup_type_die (type);
+  else
+	return lookup_type_die (type_main_variant (type));
 }
 
   /* Builtin types don't have a DECL_ORIGINAL_TYPE.  For those,


Re: [ARM] PR66791: Replace builtins for vdup_n and vmov_n intrinsics

2021-09-03 Thread Prathamesh Kulkarni via Gcc-patches
On Thu, 2 Sept 2021 at 14:32, Christophe Lyon
 wrote:
>
>
>
> On Tue, Aug 24, 2021 at 10:17 AM Kyrylo Tkachov  
> wrote:
>>
>>
>>
>> > -Original Message-
>> > From: Prathamesh Kulkarni 
>> > Sent: 24 August 2021 09:01
>> > To: Christophe Lyon 
>> > Cc: Kyrylo Tkachov ; gcc Patches > > patc...@gcc.gnu.org>
>> > Subject: Re: [ARM] PR66791: Replace builtins for vdup_n and vmov_n
>> > intrinsics
>> >
>> > On Tue, 17 Aug 2021 at 11:55, Prathamesh Kulkarni
>> >  wrote:
>> > >
>> > > On Thu, 12 Aug 2021 at 19:04, Christophe Lyon
>> > >  wrote:
>> > > >
>> > > >
>> > > >
>> > > > On Thu, Aug 12, 2021 at 1:54 PM Prathamesh Kulkarni
>> >  wrote:
>> > > >>
>> > > >> On Wed, 11 Aug 2021 at 22:23, Christophe Lyon
>> > > >>  wrote:
>> > > >> >
>> > > >> >
>> > > >> >
>> > > >> > On Thu, Jun 24, 2021 at 6:29 PM Kyrylo Tkachov via Gcc-patches > > patc...@gcc.gnu.org> wrote:
>> > > >> >>
>> > > >> >>
>> > > >> >>
>> > > >> >> > -Original Message-
>> > > >> >> > From: Prathamesh Kulkarni 
>> > > >> >> > Sent: 24 June 2021 12:11
>> > > >> >> > To: gcc Patches ; Kyrylo Tkachov
>> > > >> >> > 
>> > > >> >> > Subject: [ARM] PR66791: Replace builtins for vdup_n and vmov_n
>> > intrinsics
>> > > >> >> >
>> > > >> >> > Hi,
>> > > >> >> > This patch replaces builtins for vdup_n and vmov_n.
>> > > >> >> > The patch results in regression for pr51534.c.
>> > > >> >> > Consider following function:
>> > > >> >> >
>> > > >> >> > uint8x8_t f1 (uint8x8_t a) {
>> > > >> >> >   return vcgt_u8(a, vdup_n_u8(0));
>> > > >> >> > }
>> > > >> >> >
>> > > >> >> > code-gen before patch:
>> > > >> >> > f1:
>> > > >> >> > vmov.i32  d16, #0  @ v8qi
>> > > >> >> > vcgt.u8 d0, d0, d16
>> > > >> >> > bx lr
>> > > >> >> >
>> > > >> >> > code-gen after patch:
>> > > >> >> > f1:
>> > > >> >> > vceq.i8 d0, d0, #0
>> > > >> >> > vmvnd0, d0
>> > > >> >> > bx lr
>> > > >> >> >
>> > > >> >> > I am not sure which one is better tho ?
>> > > >> >>
>> > > >> >
>> > > >> > Hi Prathamesh,
>> > > >> >
>> > > >> > This patch introduces a regression on non-hardfp configs (eg arm-
>> > linux-gnueabi or arm-eabi):
>> > > >> > FAIL:  gcc:gcc.target/arm/arm.exp=gcc.target/arm/pr51534.c scan-
>> > assembler-times vmov.i32[ \t]+[dD][0-9]+, #0x 3
>> > > >> > FAIL:  gcc:gcc.target/arm/arm.exp=gcc.target/arm/pr51534.c scan-
>> > assembler-times vmov.i32[ \t]+[qQ][0-9]+, #4294967295 3
>> > > >> >
>> > > >> > Can you fix this?
>> > > >> The issue is, for following test:
>> > > >>
>> > > >> #include 
>> > > >>
>> > > >> uint8x8_t f1 (uint8x8_t a) {
>> > > >>   return vcge_u8(a, vdup_n_u8(0));
>> > > >> }
>> > > >>
>> > > >> armhf code-gen:
>> > > >> f1:
>> > > >> vmov.i32  d0, #0x  @ v8qi
>> > > >> bxlr
>> > > >>
>> > > >> arm softfp code-gen:
>> > > >> f1:
>> > > >> mov r0, #-1
>> > > >> mov r1, #-1
>> > > >> bx  lr
>> > > >>
>> > > >> The code-gen for both is same upto split2 pass:
>> > > >>
>> > > >> (insn 10 6 11 2 (set (reg/i:V8QI 16 s0)
>> > > >> (const_vector:V8QI [
>> > > >> (const_int -1 [0x]) repeated x8
>> > > >> ])) "foo.c":5:1 1052 {*neon_movv8qi}
>> > > >>  (expr_list:REG_EQUAL (const_vector:V8QI [
>> > > >> (const_int -1 [0x]) repeated x8
>> > > >> ])
>> > > >> (nil)))
>> > > >> (insn 11 10 13 2 (use (reg/i:V8QI 16 s0)) "foo.c":5:1 -1
>> > > >>  (nil))
>> > > >>
>> > > >> and for softfp target, split2 pass splits the assignment to r0 and r1:
>> > > >>
>> > > >> (insn 15 6 16 2 (set (reg:SI 0 r0)
>> > > >> (const_int -1 [0x])) "foo.c":5:1 740
>> > {*thumb2_movsi_vfp}
>> > > >>  (nil))
>> > > >> (insn 16 15 11 2 (set (reg:SI 1 r1 [+4 ])
>> > > >> (const_int -1 [0x])) "foo.c":5:1 740
>> > {*thumb2_movsi_vfp}
>> > > >>  (nil))
>> > > >> (insn 11 16 13 2 (use (reg/i:V8QI 0 r0)) "foo.c":5:1 -1
>> > > >>  (nil))
>> > > >>
>> > > >> I suppose we could use a dg-scan for r[0-9]+, #-1 for softfp targets ?
>> > > >>
>> > > > Yes, probably, or try with check-function-bodies.
>> > > Hi,
>> > > Sorry for the late response. Does the attached patch look OK ?
>> > ping https://gcc.gnu.org/pipermail/gcc-patches/2021-August/577532.html
>>
>> Ok.
>
>
>
> Sorry Prathamesh, this does not quite work. See  
> https://people.linaro.org/~christophe.lyon/cross-validation/gcc/trunk/r12-3294-g7a6f40d0452ec76e126c2612dcfa32f3c73e2315/report-build-info.html
> (red cells in the gcc column)
>
> Can you have a look?
Ah, for arm-none-eabi target the code-gen for test_vcge_u8_uint8x8_t is:
mvn r0, #0
mvn r1, #0
bx  lr

instead of:
mov r0, #-1
mov r1, #-1
bx  lr

I guess both code-gen sequences are correct, but I am not sure under
which circumstance either sequence gets generated.
To accomodate 

[committed] openmp: Improve expand_omp_atomic_pipeline

2021-09-03 Thread Jakub Jelinek via Gcc-patches
Hi!

When __atomic_* builtins were introduced, omp-expand.c (omp-low.c
at that point) has been adjusted in several spots so that it uses
the atomic builtins instead of sync builtins, but
expand_omp_atomic_pipeline has not because the __atomic_compare_exchange_*
APIs take address of the argument, so it kept using __sync_val_compare_swap_*.
That means it always uses seq_cst though.
This patch changes it to use the ATOMIC_COMPARE_EXCHANGE ifn which gimple-fold
folds __atomic_compare_exchange_* into - that ifn also passes expected
directly.

Bootstrapped/regtested on x86_64-linux, committed to trunk.

2021-09-03  Jakub Jelinek  

* omp-expand.c (expand_omp_atomic_pipeline): Use
IFN_ATOMIC_COMPARE_EXCHANGE instead of
BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_? so that memory order
can be provided.

--- gcc/omp-expand.c.jj 2021-09-01 11:37:41.966556334 +0200
+++ gcc/omp-expand.c2021-09-02 15:50:58.276341091 +0200
@@ -8807,8 +8807,6 @@ expand_omp_atomic_pipeline (basic_block
   edge e;
   enum built_in_function fncode;
 
-  /* ??? We need a non-pointer interface to __atomic_compare_exchange in
- order to use the RELAXED memory model effectively.  */
   fncode = (enum built_in_function)((int)BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_N
+ index + 1);
   cmpxchg = builtin_decl_explicit (fncode);
@@ -8825,6 +8823,15 @@ expand_omp_atomic_pipeline (basic_block
   /* Load the initial value, replacing the GIMPLE_OMP_ATOMIC_LOAD.  */
   si = gsi_last_nondebug_bb (load_bb);
   gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
+  location_t loc = gimple_location (gsi_stmt (si));
+  enum omp_memory_order omo = gimple_omp_atomic_memory_order (gsi_stmt (si));
+  enum memmodel imo = omp_memory_order_to_memmodel (omo);
+  tree mo = build_int_cst (NULL, imo);
+  if (imo == MEMMODEL_RELEASE)
+imo = MEMMODEL_RELAXED;
+  else if (imo == MEMMODEL_ACQ_REL)
+imo = MEMMODEL_ACQUIRE;
+  tree fmo = build_int_cst (NULL, imo);
 
   /* For floating-point values, we'll need to view-convert them to integers
  so that we can perform the atomic compare and swap.  Simplify the
@@ -8921,7 +8928,15 @@ expand_omp_atomic_pipeline (basic_block
  GSI_SAME_STMT);
 
   /* Build the compare statement.  */
-  new_storedi = build_call_expr (cmpxchg, 3, iaddr, loadedi, storedi);
+  tree ctype = build_complex_type (itype);
+  int flag = int_size_in_bytes (itype);
+  new_storedi = build_call_expr_internal_loc (loc, IFN_ATOMIC_COMPARE_EXCHANGE,
+ ctype, 6, iaddr, loadedi,
+ storedi,
+ build_int_cst (integer_type_node,
+flag),
+ mo, fmo);
+  new_storedi = build1 (REALPART_EXPR, itype, new_storedi);
   new_storedi = force_gimple_operand_gsi (,
  fold_convert (TREE_TYPE (loadedi),
new_storedi),

Jakub



Re: [RFC] ldist: Recognize rawmemchr loop patterns

2021-09-03 Thread Stefan Schulze Frielinghaus via Gcc-patches
On Fri, Aug 20, 2021 at 12:35:58PM +0200, Richard Biener wrote:
[...]
> > >
> > > +  /* Handle strlen like loops.  */
> > > +  if (store_dr == NULL
> > > +  && integer_zerop (pattern)
> > > +  && TREE_CODE (reduction_iv.base) == INTEGER_CST
> > > +  && TREE_CODE (reduction_iv.step) == INTEGER_CST
> > > +  && integer_onep (reduction_iv.step)
> > > +  && (types_compatible_p (TREE_TYPE (reduction_var), size_type_node)
> > > + || TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (reduction_var
> > > +{
> > >
> > > I wonder what goes wrong with a larger or smaller wrapping IV type?
> > > The iteration
> > > only stops when you load a NUL and the increments just wrap along (you're
> > > using the pointer IVs to compute the strlen result).  Can't you simply 
> > > truncate?
> >
> > I think truncation is enough as long as no overflow occurs in strlen or
> > strlen_using_rawmemchr.
> >
> > > For larger than size_type_node (actually larger than ptr_type_node would 
> > > matter
> > > I guess), the argument is that since pointer wrapping would be undefined 
> > > anyway
> > > the IV cannot wrap either.  Now, the correct check here would IMHO be
> > >
> > >   TYPE_PRECISION (TREE_TYPE (reduction_var)) < TYPE_PRECISION
> > > (ptr_type_node)
> > >|| TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (pointer-iv-var))
> > >
> > > ?
> >
> > Regarding the implementation which makes use of rawmemchr:
> >
> > We can count at most PTRDIFF_MAX many bytes without an overflow.  Thus,
> > the maximal length we can determine of a string where each character has
> > size S is PTRDIFF_MAX / S without an overflow.  Since an overflow for
> > ptrdiff type is undefined we have to make sure that if an overflow
> > occurs, then an overflow occurs for reduction variable, too, and that
> > this is undefined, too.  However, I'm not sure anymore whether we want
> > to respect overflows in all cases.  If TYPE_PRECISION (ptr_type_node)
> > equals TYPE_PRECISION (ptrdiff_type_node) and an overflow occurs, then
> > this would mean that a single string consumes more than half of the
> > virtual addressable memory.  At least for architectures where
> > TYPE_PRECISION (ptrdiff_type_node) == 64 holds, I think it is reasonable
> > to neglect the case where computing pointer difference may overflow.
> > Otherwise we are talking about strings with lenghts of multiple
> > pebibytes.  For other architectures we might have to be more precise
> > and make sure that reduction variable overflows first and that this is
> > undefined.
> >
> > Thus a conservative condition would be (I assumed that the size of any
> > integral type is a power of two which I'm not sure if this really holds;
> > IIRC the C standard requires only that the alignment is a power of two
> > but not necessarily the size so I might need to change this):
> >
> > /* Compute precision (reduction_var) < (precision (ptrdiff_type) - 1 - log2 
> > (sizeof (load_type))
> >or in other words return true if reduction variable overflows first
> >and false otherwise.  */
> >
> > static bool
> > reduction_var_overflows_first (tree reduction_var, tree load_type)
> > {
> >   unsigned precision_ptrdiff = TYPE_PRECISION (ptrdiff_type_node);
> >   unsigned precision_reduction_var = TYPE_PRECISION (TREE_TYPE 
> > (reduction_var));
> >   unsigned size_exponent = wi::exact_log2 (wi::to_wide (TYPE_SIZE_UNIT 
> > (load_type)));
> >   return wi::ltu_p (precision_reduction_var, precision_ptrdiff - 1 - 
> > size_exponent);
> > }
> >
> > TYPE_PRECISION (ptrdiff_type_node) == 64
> > || (TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (reduction_var))
> > && reduction_var_overflows_first (reduction_var, load_type)
> >
> > Regarding the implementation which makes use of strlen:
> >
> > I'm not sure what it means if strlen is called for a string with a
> > length greater than SIZE_MAX.  Therefore, similar to the implementation
> > using rawmemchr where we neglect the case of an overflow for 64bit
> > architectures, a conservative condition would be:
> >
> > TYPE_PRECISION (size_type_node) == 64
> > || (TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (reduction_var))
> > && TYPE_PRECISION (reduction_var) <= TYPE_PRECISION (size_type_node))
> >
> > I still included the overflow undefined check for reduction variable in
> > order to rule out situations where the reduction variable is unsigned
> > and overflows as many times until strlen(,_using_rawmemchr) overflows,
> > too.  Maybe this is all theoretical nonsense but I'm afraid of uncommon
> > architectures.  Anyhow, while writing this down it becomes clear that
> > this deserves a comment which I will add once it becomes clear which way
> > to go.
> 
> I think all the arguments about objects bigger than half of the address-space
> also are valid for 32bit targets and thus 32bit size_type_node (or
> 32bit pointer size).
> I'm not actually sure what's the canonical type to check against, whether
> it's size_type_node (Cs size_t), ptr_type_node (Cs void *) or 

Re: [PATCH V3 0/6] Initial support for AVX512FP16

2021-09-03 Thread Iain Sandoe via Gcc-patches
Hi Joseph,

> On 2 Sep 2021, at 21:03, Joseph Myers  wrote:
> 
> On Thu, 2 Sep 2021, Iain Sandoe via Gcc-patches wrote:
> 
>> diff --git a/libgcc/soft-fp/eqdf2.c b/libgcc/soft-fp/eqdf2.c
>> index 2a44ee377ce..a3bb664f5f1 100644
>> --- a/libgcc/soft-fp/eqdf2.c
>> +++ b/libgcc/soft-fp/eqdf2.c
>> @@ -28,6 +28,7 @@
>>License along with the GNU C Library; if not, see
>>.  */
>> 
>> +#define DarwinMode DF
>> #include "soft-fp.h"
>> #include "double.h"
> 
> All these files are supposed to be taken unmodified from glibc.  They 
> shouldn't contain any OS-specific code, such as a define of DarwinMode.  
> sfp-machine.h, however, is libgcc-local, hence putting the definition of 
> strong_alias there.

OK, that makes sense.
> 
> So you need some other way to extract the argument type of name in order 
> to use it in a declaration of aliasname.  E.g.
> 
> __typeof (_Generic (name,
>CMPtype (*) (HFtype, HFtype): (HFtype) 0,
>CMPtype (*) (SFtype, SFtype): (SFtype) 0,
>CMPtype (*) (DFtype, DFtype): (DFtype) 0,
>CMPtype (*) (TFtype, TFtype): (TFtype) 0))

thanks for the suggestion

> Now in fact I think the include ordering means none of the *type macros 
> are defined here.  But if you do e.g.
> 
> typedef float alias_SFtype __attribute__ ((mode (SF)));
> 
> and similar, you could use alias_SFtype in the above.  And so keep the 
> changes to the Darwin-specific parts of the libgcc-local sfp-machine.h.

this is what I’m testing - OK if it bootstraps on x86_64-darwin, linux?

thanks
Iain


[PATCH] libgcc, soft-float: Fix strong_alias macro use for Darwin.

Darwin does not support strong symbol aliases and a work-
around is provided in sfp-machine.h where a second function
is created that simply calls the original.  However this
needs the arguments to the synthesized function to track
the mode of the original function.

So the fix here is to match known floating point modes from
the incoming function and apply the one found to the new
function args.

The matching is highly specific to the current set of modes
and will need adjusting should more cases be added.

Signed-off-by: Iain Sandoe 

libgcc/ChangeLog:

* config/i386/sfp-machine.h (alias_HFtype, alias_SFtype
alias_DFtype, alias_TFtype): New.
(ALIAS_SELECTOR): New.
(strong_alias): Use __typeof and a _Generic selector to
provide the type to the synthesized function.
---
 libgcc/config/i386/sfp-machine.h | 20 +---
 1 file changed, 17 insertions(+), 3 deletions(-)

diff --git a/libgcc/config/i386/sfp-machine.h b/libgcc/config/i386/sfp-machine.h
index f15d29d3755..172ebc70c8d 100644
--- a/libgcc/config/i386/sfp-machine.h
+++ b/libgcc/config/i386/sfp-machine.h
@@ -75,10 +75,24 @@ void __sfp_handle_exceptions (int);
 
 /* Define ALIASNAME as a strong alias for NAME.  */
 #if defined __MACH__
-/* Mach-O doesn't support aliasing.  If these functions ever return
-   anything but CMPtype we need to revisit this... */
+/* Mach-O doesn't support aliasing, so we build a secondary function for
+   the alias - we need to do a bit of a dance to find out what the type of
+   the arguments is and then apply that to the secondary function.
+   If these functions ever return anything but CMPtype we need to revisit
+   this... */
+typedef float alias_HFtype __attribute__ ((mode (HF)));
+typedef float alias_SFtype __attribute__ ((mode (SF)));
+typedef float alias_DFtype __attribute__ ((mode (DF)));
+typedef float alias_TFtype __attribute__ ((mode (TF)));
+#define ALIAS_SELECTOR \
+  CMPtype (*) (alias_HFtype, alias_HFtype): (alias_HFtype) 0, \
+  CMPtype (*) (alias_SFtype, alias_SFtype): (alias_SFtype) 0, \
+  CMPtype (*) (alias_DFtype, alias_DFtype): (alias_DFtype) 0, \
+  CMPtype (*) (alias_TFtype, alias_TFtype): (alias_TFtype) 0
 #define strong_alias(name, aliasname) \
-  CMPtype aliasname (TFtype a, TFtype b) { return name(a, b); }
+  CMPtype aliasname (__typeof (_Generic (name, ALIAS_SELECTOR)) a, \
+__typeof (_Generic (name, ALIAS_SELECTOR)) b) \
+   { return name (a, b); }
 #else
 # define strong_alias(name, aliasname) _strong_alias(name, aliasname)
 # define _strong_alias(name, aliasname) \
-- 



Re: [PATCH v3, Fortran] TS 29113 testsuite

2021-09-03 Thread Christophe Lyon via Gcc-patches
Hi,

On Thu, Aug 19, 2021 at 7:29 PM Sandra Loosemore 
wrote:

> On 7/27/21 5:07 AM, Tobias Burnus wrote:
> > Hi Sandra, hi Thomas, hi all,
> >
> > @Thomas K: Comments about the following - and of course to the
> > testsuite itself - are highly welcome.
> >
> > In my opinion, the testsuite LGTM and can be committed.
> >
> > @Sandra:
> > - Thoughts on the directory name? (cf. below)
> > - Give others/Thomas a chance to comment on this,
> >before committing it. (And remove the now passing xfails.)
> >Thanks for the testsuite!
> >
> > Regarding:
> >
> > * XFAILS - as discussed before, I think having some XFAILS is
> >not ideal but fine, especially if the XFAIL/PASS ratio is low
> >and there are plans to fix the known fails, some posted patches
> >for those, and open PRs for the issues.
> >
> > (I think there is one patch pending review and two patches pending
> > committal with some modifications by Sandra - plus several patches
> > by José which still need to be reviewed.)
> >
> >
> > * Naming of the directory + .exp file:
> >   ts29113/ts29113.exp
> >is okay. Given that 'select rank' (in F2018 but not in TS29113)
> >is also tested, there was some controversy regarding the name
> >and the coverage; additionally, TS29113 is a name which is not
> >immediately clear. Thus, we could use some other name like:
> >   c-interop/c-interop.exp
> >or  (suggestions?).
> >In any case, I do not feel strong about either name.
> >
> > * I had a closer look at earlier versions of the testsuite, I did
> >browse through the current one + looked at the diff to previous
> >version, but it is big enough and the spec is complex enough that
> >I have likely missed something.
> >Thus: Additional reviews are highly welcome!
>
> Here is the current version of the testsuite.  Changes since the last
> version include:
>
> * Renaming the directory and .exp file from ts29113 -> c-interop per the
> request above.  There have been no additional review comments.
>
> * I also made it explicit that section and constraint numbers mentioned
> in comments in the test cases refer to TS 29113.  I considered using the
> numbering from 2018 standard, but given that the standard already
> renumbered things twice since the time TS 29113 was published I didn't
> really see the point, as long as it is unambiguous what document is
> being cited.
>
> * I flattened the subdirectory structure after realizing that
> dg-additional-sources can't cope with relative pathnames in remote-host
> testing.
>
> * I split up the typecodes tests (for testing that descriptors
> constructed by the front end match ISO_Fortran_binding.h) to allow for
> finer-grained control over xfails and dg-require-effective-target, and
> added a new effective target for Fortran C_FLOAT128 support.  There are
> also some additional things being tested now in this group.
>
> The current xfails in the tests reflect the two patches I posted last
> night that are still waiting for review:
>
> https://gcc.gnu.org/pipermail/fortran/2021-August/056382.html
> https://gcc.gnu.org/pipermail/fortran/2021-August/056383.html
>
> I've been testing on x86 (both 32- and 64-bit) and powerpc64le-linux-gnu.
>
>
I'm not quite sure I understand the expected status of this patch: are all
the "expected" failures tagged as XFAIL?

If yes, then there's a problem because I see lots of unresolved on
aarch64/arm
For instance on aarch64:
 /gcc/testsuite/gfortran.dg/c-interop/cf-descriptor-5.f90:10:19: Error:
Sorry, character dummy argument 'a' at (1) with assumed length is not yet
supported for procedure 'ftest' with BIND(C) attribute
compiler exited with status 1
XFAIL: gfortran.dg/c-interop/cf-descriptor-5.f90   -O0  pr92482 (test for
bogus messages, line 10)
PASS: gfortran.dg/c-interop/cf-descriptor-5.f90   -O0  (test for excess
errors)
UNRESOLVED: gfortran.dg/c-interop/cf-descriptor-5.f90   -O0  compilation
failed to produce executable

/gcc/testsuite/gfortran.dg/c-interop/cf-out-descriptor-5.f90:9:19: Error:
Sorry, character dummy argument 'a' at (1) with assumed length is not yet
supported for procedure 'ftest' with BIND(C) attribute
/gcc/testsuite/gfortran.dg/c-interop/cf-out-descriptor-5.f90:23:23: Error:
Sorry, character dummy argument 'a' at (1) with assumed length is not yet
supported for procedure 'ctest' with BIND(C) attribute
/gcc/testsuite/gfortran.dg/c-interop/cf-out-descriptor-5.f90:29:23: Error:
Sorry, character dummy argument 'a' at (1) with assumed length is not yet
supported for procedure 'ftest' with BIND(C) attribute
compiler exited with status 1
XFAIL: gfortran.dg/c-interop/cf-out-descriptor-5.f90   -O0  pr92482 (test
for bogus messages, line 9)
XFAIL: gfortran.dg/c-interop/cf-out-descriptor-5.f90   -O0  pr92482 (test
for bogus messages, line 23)
XFAIL: gfortran.dg/c-interop/cf-out-descriptor-5.f90   -O0  pr92482 (test
for bogus messages, line 29)
PASS: gfortran.dg/c-interop/cf-out-descriptor-5.f90   -O0  (test for excess

[PATCH] Explicitly add -msse2 to compile HF related libgcc source file.

2021-09-03 Thread liuhongt via Gcc-patches
For 32-bit libgcc configure w/o sse2, there's would be an error since
GCC only support _Float16 under sse2. Explicitly add -msse2 for those
HF related libgcc functions, so users can still link them w/ the
upper configuration.

  Bootstrapped and regtested on x86_64-linux-gnu{-m32,}.
  Ok for trunk?

libgcc/ChangeLog:

* Makefile.in: Adjust to support specific CFLAGS for each
libgcc source file.
* config/i386/64/t-softfp: Explicitly add -msse2 for HF
related libgcc source files.
* config/i386/t-softfp: Ditto.
* config/i386/_divhc3.c: New file.
* config/i386/_mulhc3.c: New file.
---
 libgcc/Makefile.in |  2 +-
 libgcc/config/i386/64/t-softfp |  5 +
 libgcc/config/i386/_divhc3.c   |  4 
 libgcc/config/i386/_mulhc3.c   |  4 
 libgcc/config/i386/t-softfp| 34 +-
 5 files changed, 47 insertions(+), 2 deletions(-)
 create mode 100644 libgcc/config/i386/_divhc3.c
 create mode 100644 libgcc/config/i386/_mulhc3.c

diff --git a/libgcc/Makefile.in b/libgcc/Makefile.in
index 7ec97584554..32e329f7764 100644
--- a/libgcc/Makefile.in
+++ b/libgcc/Makefile.in
@@ -314,7 +314,7 @@ MULTIOSSUBDIR := $(shell if test $(MULTIOSDIR) != .; then 
echo /$(MULTIOSDIR); f
 inst_libdir = $(libsubdir)$(MULTISUBDIR)
 inst_slibdir = $(slibdir)$(MULTIOSSUBDIR)
 
-gcc_compile_bare = $(CC) $(INTERNAL_CFLAGS)
+gcc_compile_bare = $(CC) $(INTERNAL_CFLAGS) $(CFLAGS-$(

[PATCH v4] MIPS: add .module arch and ase to all output asm

2021-09-03 Thread YunQiang Su
Currently, the asm output file for MIPS has no rev info.
It can make some trouble, for example:

  assembler is mips1 by default,
  gcc is fpxx by default.

To assemble the output of gcc -S, we have to pass -mips2
to assembler.

The same situation is for some CPU has extension insn.
Octeon is an example.
So we can just add ".set arch=octeon".

If an ASE is enabled, .module ase will also be used.
---
 gcc/config/mips/mips.c | 37 +
 1 file changed, 37 insertions(+)

diff --git a/gcc/config/mips/mips.c b/gcc/config/mips/mips.c
index 493d3de48..ade5d7041 100644
--- a/gcc/config/mips/mips.c
+++ b/gcc/config/mips/mips.c
@@ -9896,6 +9896,43 @@ mips_file_start (void)
   else
 fputs ("\t.module\tnooddspreg\n", asm_out_file);
 
+  fprintf (asm_out_file, "\t.module\tarch=%s\n", mips_arch_info->name);
+  /* FIXME: DSPR3 is not supported by GCC? gas does support it */
+  if (TARGET_DSPR2)
+fputs ("\t.module\tdspr2\n", asm_out_file);
+  else if (TARGET_DSP)
+fputs ("\t.module\tdsp\n", asm_out_file);
+  if (TARGET_EVA)
+fputs ("\t.module\teva\n", asm_out_file);
+  if (TARGET_MCU)
+fputs ("\t.module\tmcu\n", asm_out_file);
+  if (TARGET_MDMX)
+fputs ("\t.module\tmdmx\n", asm_out_file);
+  if (TARGET_MIPS3D)
+fputs ("\t.module\tmips3d\n", asm_out_file);
+  if (TARGET_MT)
+fputs ("\t.module\tmt\n", asm_out_file);
+  if (TARGET_SMARTMIPS)
+fputs ("\t.module\tsmartmips\n", asm_out_file);
+  if (TARGET_VIRT)
+fputs ("\t.module\tvirt\n", asm_out_file);
+  if (TARGET_MSA)
+fputs ("\t.module\tmsa\n", asm_out_file);
+  if (TARGET_XPA)
+fputs ("\t.module\txpa\n", asm_out_file);
+  /* FIXME: MIPS16E2 is not supported by GCC? gas does support it */
+  if (TARGET_CRC)
+fputs ("\t.module\tcrc\n", asm_out_file);
+  if (TARGET_GINV)
+fputs ("\t.module\tginv\n", asm_out_file);
+  if (TARGET_LOONGSON_MMI)
+fputs ("\t.module\tloongson-mmi\n", asm_out_file);
+  /* FIXME: LOONGSON-CAM is not supported by GCC? gas does support it */
+  if (TARGET_LOONGSON_EXT2)
+fputs ("\t.module\tloongson-ext2\n", asm_out_file);
+  else if (TARGET_LOONGSON_EXT)
+fputs ("\t.module\tloongson-ext\n", asm_out_file);
+
 #else
 #ifdef HAVE_AS_GNU_ATTRIBUTE
   {
-- 
2.30.2