Re: [PATCH] match.pd: Fix x * 0.0 -> 0.0 folding [PR104389]

2022-02-04 Thread Richard Biener via Gcc-patches



> Am 05.02.2022 um 00:08 schrieb Jakub Jelinek via Gcc-patches 
> :
> 
> Hi!
> 
> The recent PR95115 change to punt in const_binop on folding operation
> with non-NaN operands into NaN if flag_trapping_math broke the following
> testcase, because the x * 0.0 simplification punts just if
> x maybe a NaN (because NaN * 0.0 is NaN not 0.0) or if one of the operands
> could be negative zero.  But Inf * 0.0 or -Inf * 0.0 is also NaN, not
> 0.0, so when NaNs are honored we need to punt for possible infinities too.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk
> and 11/10 where the PR95115 change has been unfortunately backported to
> as well?

Ok


> 2022-02-04  Jakub Jelinek  
> 
>PR tree-optimization/104389
>(x * 0 -> 0): Punt if x maybe infinite and NaNs are honored.
> 
>* gcc.dg/pr104389.c: New test.
> 
> --- gcc/match.pd.jj2022-02-04 14:36:55.393599880 +0100
> +++ gcc/match.pd2022-02-04 20:30:48.548213594 +0100
> @@ -256,10 +256,12 @@ (define_operator_list SYNC_FETCH_AND_AND
> /* Maybe fold x * 0 to 0.  The expressions aren't the same
>when x is NaN, since x * 0 is also NaN.  Nor are they the
>same in modes with signed zeros, since multiplying a
> -   negative value by 0 gives -0, not +0.  */
> +   negative value by 0 gives -0, not +0.  Nor when x is +-Inf,
> +   since x * 0 is NaN.  */
> (simplify
>  (mult @0 real_zerop@1)
>  (if (!tree_expr_maybe_nan_p (@0)
> +  && (!HONOR_NANS (type) || !tree_expr_maybe_infinite_p (@0))
>   && !tree_expr_maybe_real_minus_zero_p (@0)
>   && !tree_expr_maybe_real_minus_zero_p (@1))
>   @1))
> --- gcc/testsuite/gcc.dg/pr104389.c.jj2022-02-04 20:37:40.579537142 +0100
> +++ gcc/testsuite/gcc.dg/pr104389.c2022-02-04 20:37:20.787809803 +0100
> @@ -0,0 +1,26 @@
> +/* PR tree-optimization/104389 */
> +/* { dg-do run } */
> +/* { dg-options "-O2" } */
> +/* { dg-add-options ieee } */
> +/* { dg-require-effective-target inf } */
> +
> +__attribute__((noipa)) double
> +foo (void)
> +{
> +  double a = __builtin_huge_val ();
> +  return a * 0.0;
> +}
> +
> +__attribute__((noipa)) long double
> +bar (void)
> +{
> +  return __builtin_huge_vall () * 0.0L;
> +}
> +
> +int
> +main ()
> +{
> +  if (!__builtin_isnan (foo ()) || !__builtin_isnanl (bar ()))
> +__builtin_abort ();
> +  return 0;
> +}
> 
>Jakub
> 


[pushed] c++: assignment, aggregate, array [PR104300]

2022-02-04 Thread Jason Merrill via Gcc-patches
The PR92385 fix meant that we see more VEC_INIT_EXPR outside of INIT_EXPR;
in such cases, we need to wrap them in TARGET_EXPR.  I previously fixed
that in build_array_copy; we also need it in process_init_constructor.
After fixing that, I needed to adjust a few places to recognize the
VEC_INIT_EXPR even inside a TARGET_EXPR.  And prevent cp_fully_fold_init
from lowering VEC_INIT_EXPR too soon.  And handle COMPOUND_EXPR inside
TARGET_EXPR better.

Tested x86_64-pc-linux-gnu, applying to trunk.

PR c++/104300
PR c++/92385

gcc/cp/ChangeLog:

* cp-tree.h (get_vec_init_expr): New.
(target_expr_needs_replace): New.
* cp-gimplify.cc (cp_gimplify_init_expr): Use it.
(struct cp_fold_data): New.
(cp_fold_r): Only genericize inits at end of fn.
(cp_fold_function): Here.
(cp_fully_fold_init): Not here.
* init.cc (build_vec_init): Use get_vec_init_expr.
* tree.cc (build_vec_init_expr): Likewise.
* typeck2.cc (split_nonconstant_init_1): Likewise.
(process_init_constructor): Wrap VEC_INIT_EXPR in
TARGET_EXPR.

gcc/testsuite/ChangeLog:

* g++.dg/cpp0x/initlist-array14.C: New test.
---
 gcc/cp/cp-tree.h  | 27 +
 gcc/cp/cp-gimplify.cc | 39 ---
 gcc/cp/init.cc| 12 --
 gcc/cp/tree.cc|  4 +-
 gcc/cp/typeck2.cc |  9 -
 gcc/testsuite/g++.dg/cpp0x/initlist-array14.C | 12 ++
 6 files changed, 82 insertions(+), 21 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/cpp0x/initlist-array14.C

diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h
index b9eb71fbc3a..d71be0a5bc7 100644
--- a/gcc/cp/cp-tree.h
+++ b/gcc/cp/cp-tree.h
@@ -4201,6 +4201,18 @@ more_aggr_init_expr_args_p (const 
aggr_init_expr_arg_iterator *iter)
 #define VEC_INIT_EXPR_VALUE_INIT(NODE) \
   TREE_LANG_FLAG_1 (VEC_INIT_EXPR_CHECK (NODE))
 
+/* If T is a VEC_INIT_EXPR, return it, possibly stripping a TARGET_EXPR
+   wrapper.  Otherwise, return null.  */
+inline tree
+get_vec_init_expr (tree t)
+{
+  if (t && TREE_CODE (t) == TARGET_EXPR)
+t = TARGET_EXPR_INITIAL (t);
+  if (t && TREE_CODE (t) == VEC_INIT_EXPR)
+return t;
+  return NULL_TREE;
+}
+
 /* The condition under which this MUST_NOT_THROW_EXPR actually blocks
exceptions.  NULL_TREE means 'true'.  */
 #define MUST_NOT_THROW_COND(NODE) \
@@ -5361,6 +5373,21 @@ more_aggr_init_expr_args_p (const 
aggr_init_expr_arg_iterator *iter)
&& TARGET_EXPR_INITIAL (NODE)   \
&& !VOID_TYPE_P (TREE_TYPE (TARGET_EXPR_INITIAL (NODE
 
+/* True if T is a TARGET_EXPR for which we'll need to replace_decl to use it as
+   an initializer.  */
+inline bool
+target_expr_needs_replace (tree t)
+{
+  if (!t || TREE_CODE (t) != TARGET_EXPR)
+return false;
+  tree init = TARGET_EXPR_INITIAL (t);
+  if (!init || !VOID_TYPE_P (TREE_TYPE (init)))
+return false;
+  while (TREE_CODE (init) == COMPOUND_EXPR)
+init = TREE_OPERAND (init, 1);
+  return TREE_CODE (init) != AGGR_INIT_EXPR;
+}
+
 /* True if EXPR expresses direct-initialization of a TYPE.  */
 #define DIRECT_INIT_EXPR_P(TYPE,EXPR)  \
   (TREE_CODE (EXPR) == TARGET_EXPR && TREE_LANG_FLAG_2 (EXPR)  \
diff --git a/gcc/cp/cp-gimplify.cc b/gcc/cp/cp-gimplify.cc
index d1c653c5fda..d7323fb5c09 100644
--- a/gcc/cp/cp-gimplify.cc
+++ b/gcc/cp/cp-gimplify.cc
@@ -249,8 +249,7 @@ cp_gimplify_init_expr (tree *expr_p)
   if (TREE_CODE (from) == TARGET_EXPR)
 if (tree init = TARGET_EXPR_INITIAL (from))
   {
-   if (VOID_TYPE_P (TREE_TYPE (init))
-   && TREE_CODE (init) != AGGR_INIT_EXPR)
+   if (target_expr_needs_replace (from))
  {
/* If this was changed by cp_genericize_target_expr, we need to
   walk into it to replace uses of the slot.  */
@@ -950,14 +949,23 @@ struct cp_genericize_data
 
 /* Perform any pre-gimplification folding of C++ front end trees to
GENERIC.
-   Note:  The folding of none-omp cases is something to move into
+   Note:  The folding of non-omp cases is something to move into
  the middle-end.  As for now we have most foldings only on GENERIC
  in fold-const, we need to perform this before transformation to
  GIMPLE-form.  */
 
-static tree
-cp_fold_r (tree *stmt_p, int *walk_subtrees, void *data)
+struct cp_fold_data
 {
+  hash_set pset;
+  bool genericize; // called from cp_fold_function?
+
+  cp_fold_data (bool g): genericize (g) {}
+};
+
+static tree
+cp_fold_r (tree *stmt_p, int *walk_subtrees, void *data_)
+{
+  cp_fold_data *data = (cp_fold_data*)data_;
   tree stmt = *stmt_p;
   enum tree_code code = TREE_CODE (stmt);
 
@@ -967,7 +975,7 @@ cp_fold_r (tree *stmt_p, int *walk_subtrees, void *data)
   if (TREE_CODE (PTRMEM_CST_MEMBER (stmt)) == FUNCTION_DECL
  && DECL_IMMEDIATE_FUNCTION_P 

Re: [PATCH] testsuite: Robustify aarch64/simd tests against more aggressive DCE

2022-02-04 Thread Andrew Pinski via Gcc-patches
On Fri, Feb 4, 2022 at 3:21 AM Richard Sandiford via Gcc-patches
 wrote:
>
> Sorry, just realised I'd never replied to this.
>
> Marc Poulhies  writes:
> > Eric Botcazou  writes:
> >>> The new variables seem to be unused, so I think slightly stronger
> >>> DCE could remove the calls even after the patch.  Perhaps the containing
> >>> functions should take an int32x4_t *ptr or something, with the calls
> >>> assigning to different ptr[] indices.
> >>
> >> We run a minimal DCE pass at -O0 in our compiler to eliminate all the 
> >> garbage
> >> generated by the gimplifier for variable-sized types (people care about 
> >> code
> >> size at -O0 in specific contexts) but it does not touch anything written by
> >> the user (and debugging is unaffected of course).  Given that the builtins 
> >> are
> >> pure functions and the arguments have no side effects, it eliminates the
> >> calls, but adding a LHS blocks that because this minimal DCE pass preserves
> >> anything user-related, in particular assignments to user variables.
> >>
> >>> I think it would be better to do that using new calls though,
> >>> and xfail the existing ones when they no longer work.  For example:
> >>>
> >>>   /* { dg-error "lane -1 out of range 0 - 7" "" {target *-*-*} 0 } */
> >>>   vqdmlal_high_laneq_s16 (int32x4_a, int16x8_b, int16x8_c, -1);
> >>>   /* { dg-error "lane -1 out of range 0 - 7" "" {target *-*-*} 0 } */
> >>>   ptr[0] = vqdmlal_high_laneq_s16 (int32x4_a, int16x8_b, int16x8_c, -1);
> >>>
> >>> That way we don't lose the existing tests.
> >>
> >> Frankly I'm not quite sure of what we can lose by adding a LHS here, can 
> >> you
> >> elaborate a bit?  We would need a solution that works out of the box with 
> >> our
> >> compiler in the future, i.e. without having to tweak 50 testcases again.
> >
> > Hi Richard,
> >
> > Thank for your reply !
> >
> > As Éric, I'm also wondering why having LHS in the existing tests would
> > make us loose them. I guess I'm not familiar enough with this part of
> > the testsuite and I'm missing something.
>
> The problem is that we only enforce lane bounds via calls to
> __builtin_aarch64_im_lane_boundsi.  In previous releases, the check
> only happend at RTL expansion time, so the check would be skipped if
> any gimple pass removed the call.  Now we do the checking during
> folding, but that still misses cases.  E.g., compare the -O0 and -O1
> behaviour for:

Actually I looked into the below testcase and
__builtin_aarch64_im_lane_boundsi is not part of the intrinsic.
Basically some intrinsics have their own bounds checking as part of
the builtin rather than using __builtin_aarch64_im_lane_boundsi.
That is the problem shows up in GCC 11 where the folding of
__builtin_aarch64_im_lane_boundsi on the gimple level didn't happen.
I will file a bug report on this regression later tonight or tomorrow.

Here are the uses of aarch64_simd_lane_bounds which emit the error
(besides the __builtin_aarch64_im_lane_boundsi builtin itself):

function:
aarch64_expand_fcmla_builtin

builtin_simd_arg args:
SIMD_ARG_STRUCT_LOAD_STORE_LANE_INDEX
SIMD_ARG_LANE_INDEX
SIMD_ARG_LANE_PAIR_INDEX
SIMD_ARG_LANE_QUADTUP_INDEX

rtl named patterns:
aarch64_ld_lane
aarch64_st_lane

Thanks,
Andrew Pinski

>
> #include 
>
> void f(int32x4_t *p0, int16x8_t *p1) {
> vqdmlal_high_laneq_s16(p0[0], p1[0], p1[1], -1);
> //p0[0] = vqdmlal_high_laneq_s16(p0[0], p1[0], p1[1], -1);
> }
>
> -O0 gives the error but -O1 doesn't [https://godbolt.org/z/1KosTY43T].
> The -O1 behaviour here is wrong: badly-formed calls should be rejected
> with a diagnostic even if the calls are unused.  Clang gets this right
> in both cases [https://godbolt.org/z/EGxs8jq97].
>
> I think keeping the lhs-free calls is important for making sure that
> the -O0 behaviour doesn't regress without the DCE.
>
> Your DCE will regress it, but that's the fault of the arm_neon.h
> implementation rather than the fault of your pass.  Having the
> tests but XFAILing them seems like the best way of dealing with that.
> Hopefully we'll then see some progression if the arm_neon.h implementation
> is improved in future.
>
> Thanks,
> Richard


Re: [PATCH] configure: add --disable-fix-includes

2022-02-04 Thread Allan McRae via Gcc-patches

On 5/2/22 01:22, Martin Liška wrote:

On 2/4/22 14:30, Jakub Jelinek via Gcc-patches wrote:

We don't ship any include-fixed headers in Fedora/RHEL.


Removing include-fixed from an installed folder, I see:

make[2]: Entering directory '/home/marxin/Programming/postgres/src/common'
gcc -Wall -Wmissing-prototypes -Wpointer-arith 
-Wdeclaration-after-statement -Werror=vla -Wendif-labels 
-Wmissing-format-attribute -Wimplicit-fallthrough=3 -Wcast-function-type 
-Wformat-security -fno-strict-aliasing -fwrapv 
-fexcess-precision=standard -Wno-format-truncation 
-Wno-stringop-truncation -O3 -march=native -flto=auto -DFRONTEND -I. 
-I../../src/common -I../../src/include  -D_GNU_SOURCE  
-DVAL_CC="\"gcc\"" -DVAL_CPPFLAGS="\"-D_GNU_SOURCE\"" 
-DVAL_CFLAGS="\"-Wall -Wmissing-prototypes -Wpointer-arith 
-Wdeclaration-after-statement -Werror=vla -Wendif-labels 
-Wmissing-format-attribute -Wimplicit-fallthrough=3 -Wcast-function-type 
-Wformat-security -fno-strict-aliasing -fwrapv 
-fexcess-precision=standard -Wno-format-truncation 
-Wno-stringop-truncation -O3 -march=native -flto=auto\"" 
-DVAL_CFLAGS_SL="\"-fPIC\"" -DVAL_LDFLAGS="\"-O3 -march=native 
-flto=auto -Wl,--as-needed 
-Wl,-rpath,'/usr/local/pgsql/lib64',--enable-new-dtags\"" 
-DVAL_LDFLAGS_EX="\"\"" -DVAL_LDFLAGS_SL="\"\"" -DVAL_LIBS="\"-lpgcommon 
-lpgport -lz -lreadline -lm \""  -c -o pg_lzcompress.o pg_lzcompress.c

In file included from pg_lzcompress.c:186:
/usr/include/limits.h:124:26: error: no include path in which to search 
for limits.h

   124 | # include_next 
   |  ^
pg_lzcompress.c:226:9: error: ‘INT_MAX’ undeclared here (not in a function)
   226 | INT_MAX,    /* No 
upper limit on what we'll try to

   | ^~~
pg_lzcompress.c:189:1: note: ‘INT_MAX’ is defined in header 
‘’; did you forget to ‘#include ’?

   188 | #include "common/pg_lzcompress.h"
   +++ |+#include 

How do you solve this in Fedora/RHEL?


The Fedora gcc.spec file has this:

mv $FULLPATH/include-fixed/syslimits.h $FULLPATH/include/syslimits.h
mv $FULLPATH/include-fixed/limits.h $FULLPATH/include/limits.h

My understanding are these are not real fixinclude processed headers.

Allan


Re: [committed] libstdc++: Fix filesystem::remove_all races [PR104161]

2022-02-04 Thread Jonathan Wakely via Gcc-patches
On Fri, 4 Feb 2022 at 23:55, Jonathan Wakely wrote:
> +// Used to implement filesystem::remove_all.
> +fs::recursive_directory_iterator&
> +fs::recursive_directory_iterator::__erase(error_code* ecptr)
> +{
> +  error_code ec;
> +  if (!_M_dirs)
> +{
> +  ec = std::make_error_code(errc::invalid_argument);
> +  return *this;
> +}
> +
> +  // We never want to skip permission denied when removing files.
> +  const bool skip_permission_denied = false;
> +  // We never want to follow directory symlinks when removing files.
> +  const bool nofollow = true;
> +
> +  // Loop until we find something we can remove.
> +  while (!ec)
> +{
> +  auto& top = _M_dirs->top();
> +
> +  if (top.entry._M_type == file_type::directory)
> +   {
> + _Dir dir = top.open_subdir(skip_permission_denied, nofollow, ec);
> + if (!ec)
> +   {
> + __glibcxx_assert(dir.dirp != nullptr);
> + if (dir.advance(skip_permission_denied, ec))
> +   {
> + // Non-empty directory, recurse into it.
> + _M_dirs->push(std::move(dir));
> + continue;
> +   }
> + if (!ec)
> +   {
> + // Directory is empty so we can remove it.
> + if (top.rmdir(ec))
> +   break; // Success
> +   }
> +   }
> +   }
> +  else if (top.unlink(ec))
> +   break; // Success
> +  else if (top.entry._M_type == file_type::none)
> +   {
> + // We did not have a cached type, so it's possible that top.entry
> + // is actually a directory, and that's why the unlink above failed.
> +#ifdef EPERM
> + // POSIX.1-2017 says unlinking a directory returns EPERM,
> + // but LSB allows EISDIR too. Some targets don't even define EPERM.
> + if (ec.value() == EPERM || ec.value() == EISDIR)
> +#else
> + if (ec.value() == EISDIR)
> +#endif

This doesn't work on Windows because the top.unlink(ec) sets a Windows
error using the system category, so doesn't match the errno values
here.

I have a fix.

>  std::uintmax_t
>  fs::remove_all(const path& p)
>  {
> -  return fs::do_remove_all(p, ErrorReporter{"cannot remove all", p});
> +  uintmax_t count = 0;
> +  auto st = filesystem::status(p);
> +  if (!exists(st))
> +return 0;
> +  if (is_directory(st))

Gah, this remove_all(const path&) overload was supposed to be using
the same logic as the one below with an error_code parameter.

I'll fix it on Monday.



Go patch committed: Add "any" as alias for "interface{}"

2022-02-04 Thread Ian Lance Taylor via Gcc-patches
In the Go 1.18 release the predeclared identifier "any" is a new alias
for the type "interface{}".  This patch adds this alias to the Go
frontend.  This requires updating a test.  Bootstrapped and ran Go
testsuite on x86_64-pc-linux-gnu.  Committed to mainline.

Ian
d0607656a50cd571d9ab260d040f1daee89d8eb0
diff --git a/gcc/go/gofrontend/MERGE b/gcc/go/gofrontend/MERGE
index a42d88d25c4..f78561c3483 100644
--- a/gcc/go/gofrontend/MERGE
+++ b/gcc/go/gofrontend/MERGE
@@ -1,4 +1,4 @@
-7d510bf5fcec9b0ccc0282f4193a80c0a164df63
+61f7cf4b9db0587ff099aa36832a355b90ee1bf9
 
 The first line of this file holds the git revision number of the last
 merge done from the gofrontend repository.
diff --git a/gcc/go/gofrontend/gogo.cc b/gcc/go/gofrontend/gogo.cc
index e2fd509f58a..b1e210ee6ac 100644
--- a/gcc/go/gofrontend/gogo.cc
+++ b/gcc/go/gofrontend/gogo.cc
@@ -141,6 +141,15 @@ Gogo::Gogo(Backend* backend, Linemap* linemap, int, int 
pointer_size)
 this->add_named_type(error_type);
   }
 
+  // "any" is an alias for the empty interface type.
+  {
+Type* empty = Type::make_empty_interface_type(loc);
+Named_object* no = Named_object::make_type("any", NULL, empty, loc);
+Named_type* nt = no->type_value();
+nt->set_is_alias();
+this->add_named_type(nt);
+  }
+
   this->globals_->add_constant(Typed_identifier("true",
Type::make_boolean_type(),
loc),
diff --git a/gcc/testsuite/go.test/test/fixedbugs/issue14652.go 
b/gcc/testsuite/go.test/test/fixedbugs/issue14652.go
index d53b4126683..586663b676f 100644
--- a/gcc/testsuite/go.test/test/fixedbugs/issue14652.go
+++ b/gcc/testsuite/go.test/test/fixedbugs/issue14652.go
@@ -1,4 +1,4 @@
-// errorcheck
+// compile
 
 // Copyright 2016 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
@@ -6,4 +6,5 @@
 
 package p
 
-var x any // ERROR "undefined: any|undefined type .*any.*"
+// any is now permitted instead of interface{}
+var x any


[committed] libstdc++: Fix std::filesystem build failure for Windows

2022-02-04 Thread Jonathan Wakely via Gcc-patches
Tested x86_64-linux (and built on x86_64-w64-mingw), pushed to trunk.


The std::filesystem code needs to use posix::DIR not ::DIR, as that is
an alias for _WDIR on Windows.

libstdc++-v3/ChangeLog:

* src/filesystem/dir-common.h (_Dir_base::openat): Change return
type to use portable posix::DIR alias.
---
 libstdc++-v3/src/filesystem/dir-common.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libstdc++-v3/src/filesystem/dir-common.h 
b/libstdc++-v3/src/filesystem/dir-common.h
index ee4f33b6bc1..0b7665a3f70 100644
--- a/libstdc++-v3/src/filesystem/dir-common.h
+++ b/libstdc++-v3/src/filesystem/dir-common.h
@@ -173,7 +173,7 @@ struct _Dir_base
 return true;
   }
 
-  static ::DIR*
+  static posix::DIR*
   openat(int fd, const posix::char_type* pathname, bool nofollow)
   {
 #if _GLIBCXX_HAVE_FDOPENDIR && defined O_RDONLY && defined O_DIRECTORY \
-- 
2.34.1



Re: [PATCH] c++, v3: Further address_compare fixes [PR89074]

2022-02-04 Thread Jakub Jelinek via Gcc-patches
On Fri, Feb 04, 2022 at 04:42:41PM -0500, Jason Merrill wrote:
> > @@ -20,9 +20,16 @@ along with GCC; see the file COPYING3.
> >   #ifndef GCC_FOLD_CONST_H
> >   #define GCC_FOLD_CONST_H
> > -/* Non-zero if we are folding constants inside an initializer; zero
> > -   otherwise.  */
> > +/* Nonzero if we are folding constants inside an initializer or a C++
> > +   manifestly-constant-evaluated context; zero otherwise.
> > +   Should be used when folding in initializer enables additional
> > +   optimizations.  */
> >   extern int folding_initializer;
> > +/* Nonzer of we are folding C++ manifestly-constant-evaluated context; zero
> 
> Still need to fix this typo.

Sorry, finally now fixed in my copy.

> > +   otherwise.
> > +   Should be used when certain constructs shouldn't be optimized
> > +   during folding in that context.  */
> > +bool folding_cxx_constexpr = false;
> > +
> >   /* The following constants represent a bit based encoding of GCC's
> >  comparison operators.  This encoding simplifies transformations
> >  on relational comparison operators, such as AND and OR.  */
> > @@ -16628,41 +16636,55 @@ address_compare (tree_code code, tree ty
> 
> Incidentally, the function comment needs to document TYPE.

Will do.

> So at this point equal can be 0 or 2, the latter because either the offset
> is out of bounds, or because we're comparing offset 0 to one-past-the-end.

Yes.

> In the out-of-bounds case, we're into undefined behavior, so I'd be inclined
> to return 2 immediately rather than continue, so the code below only needs
> to worry about possibly overlapping/contiguous objects.

You mean for folding_cxx_constexpr ?  The code does that basically, with one
exception, the folding_initializer FUNCTION_DECL cmp FUNCTION_DECL case.
We don't track sizes of functions, so the size of 1 is just a hack to
pretend functions don't have zero size.  Some functions can have zero size
if they contain just __builtin_unreachable, but it is very rare.
But I guess I could move that
  if (folding_initializer
  && TREE_CODE (base0) == FUNCTION_DECL
  && TREE_CODE (base1) == FUNCTION_DECL)
return 0;
above the size checking block and then indeed right after that do
  if (folding_cxx_constexpr && equal)
return equal;
with a comment.

> In the code below, in !constexpr mode we decide to return 0 even though
> equal == 2 in three cases which need more commentary, either together or
> separately:
> 
> 1) One is a string and the other a decl.  Do we know that we can't layout a
> string and a global variable next to each other?  This overlaps a lot
> with...
> 
> 2) We're comparing as pointers (rather than integers), so we return unequal
> even if they could be equal in practice if the objects are contiguous.  The
> comment says this but still needs a rationale; it doesn't seem useful to me
> for the limited cases that could reach here with equal == 2.

For the pointer comparisons we just exploit the undefined behavior and
pretend they can't be adjacent even if they actually sometimes can be,
and we've been doing that intentionally for years.
If one does (uintptr_t)  == (uintptr_t) , we try to be more
conservative.

> 3) We're comparing a local variable and a global, so they really can't be
> equal unless the offset is far out of bounds.  This is currently last; we
> might move it first and treat strings like globals?

For the automatic vs. global or strings, it is very unlikely they'd be
adjacent, with typical memory layouts there couldn't be any heap and stack
would need to grow until it reaches end of data or bss section on a page
boundary.  Strings perhaps could be adjacent in .rodata, but again it is
fairly rare.
And sure, I can try to improve comments.

Jakub



[committed] libstdc++: Allow Clang to use before C++23

2022-02-04 Thread Jonathan Wakely via Gcc-patches
Tested powerpc64le-linux (and smoke tested with clang), pushed to trunk.


There is code that only expects to be compiled with clang++ and uses its
, which works because Clang supports the _Atomic specifier
in C++. The addition  of  to libstdc++ broke this code, as
now it finds the C++ header instead, which is empty for any standard
mode before C++23.

This change allows that code to keep working as before, by forwarding to
clang's .

libstdc++-v3/ChangeLog:

* include/c_compatibility/stdatomic.h [__clang__]: Use
#include_next .
---
 libstdc++-v3/include/c_compatibility/stdatomic.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/libstdc++-v3/include/c_compatibility/stdatomic.h 
b/libstdc++-v3/include/c_compatibility/stdatomic.h
index 852574bd87c..95c72615b4e 100644
--- a/libstdc++-v3/include/c_compatibility/stdatomic.h
+++ b/libstdc++-v3/include/c_compatibility/stdatomic.h
@@ -120,5 +120,7 @@ using std::atomic_flag_clear_explicit;
 using std::atomic_thread_fence;
 using std::atomic_signal_fence;
 
+#elif defined __clang__
+# include_next 
 #endif // C++23
 #endif // _GLIBCXX_STDATOMIC_H
-- 
2.34.1



[committed] libstdc++: Remove un-implementable noexcept from Filesystem TS operations

2022-02-04 Thread Jonathan Wakely via Gcc-patches
Tested powerpc64le-linux, pushed to trunk.


LWG 3014 removed these incorrect noexcept specifications from the C++17
std::filesystem operations. They are also incorrect on the experimental
TS versions and should be removed from them too.

libstdc++-v3/ChangeLog:

* include/experimental/bits/fs_ops.h (fs::copy_file): Remove
noexcept.
(fs::create_directories): Likewise.
(fs::remove_all): Likewise.
* src/filesystem/ops.cc (fs::copy_file): Remove noexcept.
(fs::create_directories): Likewise.
(fs::remove_all): Likewise.
---
 libstdc++-v3/include/experimental/bits/fs_ops.h | 8 
 libstdc++-v3/src/filesystem/ops.cc  | 6 +++---
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/libstdc++-v3/include/experimental/bits/fs_ops.h 
b/libstdc++-v3/include/experimental/bits/fs_ops.h
index 6e475306c23..dafd1ec79a0 100644
--- a/libstdc++-v3/include/experimental/bits/fs_ops.h
+++ b/libstdc++-v3/include/experimental/bits/fs_ops.h
@@ -74,19 +74,19 @@ inline namespace v1
   { return copy_file(__from, __to, copy_options::none); }
 
   inline bool
-  copy_file(const path& __from, const path& __to, error_code& __ec) noexcept
+  copy_file(const path& __from, const path& __to, error_code& __ec)
   { return copy_file(__from, __to, copy_options::none, __ec); }
 
   bool copy_file(const path& __from, const path& __to, copy_options __option);
   bool copy_file(const path& __from, const path& __to, copy_options __option,
-error_code& __ec) noexcept;
+error_code& __ec);
 
   void copy_symlink(const path& __existing_symlink, const path& __new_symlink);
   void copy_symlink(const path& __existing_symlink, const path& __new_symlink,
error_code& __ec) noexcept;
 
   bool create_directories(const path& __p);
-  bool create_directories(const path& __p, error_code& __ec) noexcept;
+  bool create_directories(const path& __p, error_code& __ec);
 
   bool create_directory(const path& __p);
   bool create_directory(const path& __p, error_code& __ec) noexcept;
@@ -262,7 +262,7 @@ inline namespace v1
   bool remove(const path& __p, error_code& __ec) noexcept;
 
   uintmax_t remove_all(const path& __p);
-  uintmax_t remove_all(const path& __p, error_code& __ec) noexcept;
+  uintmax_t remove_all(const path& __p, error_code& __ec);
 
   void rename(const path& __from, const path& __to);
   void rename(const path& __from, const path& __to, error_code& __ec) noexcept;
diff --git a/libstdc++-v3/src/filesystem/ops.cc 
b/libstdc++-v3/src/filesystem/ops.cc
index c020f621a88..e2a2cefdf49 100644
--- a/libstdc++-v3/src/filesystem/ops.cc
+++ b/libstdc++-v3/src/filesystem/ops.cc
@@ -372,7 +372,7 @@ fs::copy_file(const path& from, const path& to, 
copy_options option)
 
 bool
 fs::copy_file(const path& from, const path& to, copy_options options,
- error_code& ec) noexcept
+ error_code& ec)
 {
 #ifdef _GLIBCXX_HAVE_SYS_STAT_H
   return do_copy_file(from.c_str(), to.c_str(), copy_file_options(options),
@@ -424,7 +424,7 @@ fs::create_directories(const path& p)
 }
 
 bool
-fs::create_directories(const path& p, error_code& ec) noexcept
+fs::create_directories(const path& p, error_code& ec)
 {
   if (p.empty())
 {
@@ -1098,7 +1098,7 @@ fs::remove_all(const path& p)
 }
 
 std::uintmax_t
-fs::remove_all(const path& p, error_code& ec) noexcept
+fs::remove_all(const path& p, error_code& ec)
 {
   // Use the C++17 implementation.
   return std::filesystem::remove_all(p.native(), ec);
-- 
2.34.1



[PATCH v2] doc: RISC-V: Document the `-misa-spec=' option

2022-02-04 Thread Maciej W. Rozycki
We have recently updated the default for the `-misa-spec=' option, yet 
we still have not documented it nor its `--with-isa-spec=' counterpart 
in the GCC manuals.  Fix that.

gcc/
* doc/install.texi (Configuration): Document `--with-isa-spec=' 
RISC-V option.
* doc/invoke.texi (Option Summary): List `-misa-spec=' RISC-V
option.
(RISC-V Options): Document it.
---
> Thanks.  I have a version of this floating around somewhere.  I probably 
> forgot
> to post it and it's generally inferior to yours, so this LGTM.

 Thank you for your review.

> The only thing I'd point out is that this specifically controls the version of
> the Unprivileged (formally called user) specification, as there are many 
> RISC-V
> specifications and it can be a bit ambiguous what folks mean when they just 
> say
> "specification".  Not sure exactly what the right wording is there, maybe
> "version of the RISC-V Unprivileged (formerly user-level) ISA specification"?

 Good point.  I have updated the text to your suggested wording, which is 
also what I would use if I were to propose it (modulo capitalisation).  I 
will commit the change as included here shortly then unless I hear an
objection.

  Maciej

Changes from v1:

- Clarify it is the Unprivileged (formerly User-Level) ISA specification 
  the options concerned refer to.

- Fix a typo `-misa-spec' vs `-misa-spec=' in ChangeLog.
---
 gcc/doc/install.texi |   14 ++
 gcc/doc/invoke.texi  |   17 +
 2 files changed, 31 insertions(+)

gcc-riscv-misa-doc.diff
Index: gcc/gcc/doc/install.texi
===
--- gcc.orig/gcc/doc/install.texi
+++ gcc/gcc/doc/install.texi
@@ -1599,6 +1599,20 @@ On certain targets this option sets the
 size as a power of two in bytes.  On AArch64 @var{size} is required to be 
either
 12 (4KB) or 16 (64KB).
 
+@item --with-isa-spec=@var{ISA-spec-string}
+On RISC-V targets specify the default version of the RISC-V Unprivileged
+(formerly User-Level) ISA specification to produce code conforming to.
+The possibilities for @var{ISA-spec-string} are:
+@table @code
+@item 2.2
+Produce code conforming to version 2.2.
+@item 20190608
+Produce code conforming to version 20190608.
+@item 20191213
+Produce code conforming to version 20191213.
+@end table
+In the absence of this configuration option the default version is 20191213.
+
 @item --enable-__cxa_atexit
 Define if you want to use __cxa_atexit, rather than atexit, to
 register C++ destructors for local statics and global objects.
Index: gcc/gcc/doc/invoke.texi
===
--- gcc.orig/gcc/doc/invoke.texi
+++ gcc/gcc/doc/invoke.texi
@@ -1184,6 +1184,7 @@ See RS/6000 and PowerPC Options.
 -mabi=@var{ABI-string} @gol
 -mfdiv  -mno-fdiv @gol
 -mdiv  -mno-div @gol
+-misa-spec=@var{ISA-spec-string} @gol
 -march=@var{ISA-string} @gol
 -mtune=@var{processor-string} @gol
 -mpreferred-stack-boundary=@var{num} @gol
@@ -27632,6 +27633,22 @@ Do or don't use hardware instructions fo
 M extension.  The default is to use them if the specified architecture has
 these instructions.
 
+@item -misa-spec=@var{ISA-spec-string}
+@opindex misa-spec
+Specify the version of the RISC-V Unprivileged (formerly User-Level)
+ISA specification to produce code conforming to.  The possibilities
+for @var{ISA-spec-string} are:
+@table @code
+@item 2.2
+Produce code conforming to version 2.2.
+@item 20190608
+Produce code conforming to version 20190608.
+@item 20191213
+Produce code conforming to version 20191213.
+@end table
+The default is @option{-misa-spec=20191213} unless GCC has been configured
+with @option{--with-isa-spec=} specifying a different default version.
+
 @item -march=@var{ISA-string}
 @opindex march
 Generate code for given RISC-V ISA (e.g.@: @samp{rv64im}).  ISA strings must be


[committed] libstdc++: Fix filesystem::remove_all races [PR104161]

2022-02-04 Thread Jonathan Wakely via Gcc-patches
Tested powerpc64le-linux and powerpc-aix, pushed to trunk.


This fixes the remaining filesystem::remove_all race condition by using
POSIX openat to recurse into sub-directories and using POSIX unlinkat to
remove files. This avoids the remaining race where the directory being
removed is replaced with a symlink after the directory has been opened,
so that the filesystem::remove("subdir/file") resolves to "target/file"
instead, because "subdir" has been removed and replaced with a symlink.
The previous patch only fixed the case where the directory was replaced
with a symlink before we tried to open it, but it still used the full
(potentially compromised) path as an argument to filesystem::remove.

The first part of the fix is to use openat when recursing into a
sub-directory with recursive_directory_iterator. This means that opening
"dir/subdir" uses the file descriptor for "dir", and so is sure to open
"dir/subdir" and not "symlink/subdir". (The previous patch to use
O_NOFOLLOW already ensured we won't open "dir/symlink/" here.)

The second part of the fix is to use unlinkat for the remove_all
operation. Previously we used a directory_iterator to get the name of
each file in a directory and then used filesystem::remove(iter->path())
on that name. This meant that any checks (e.g. O_NOFOLLOW) done by the
iterator could be invalidated before the remove operation on that
pathname. The directory iterator contains an open DIR stream, which we
can use to obtain a file descriptor to pass to unlinkat. This ensures
that the file being deleted really is contained within the directory
we're iterating over, rather than using a pathname that could resolve to
some other file.

The filesystem::remove_all function previously used a (non-recursive)
filesystem::directory_iterator for each directory, and called itself
recursively for sub-directories. The new implementation uses a single
filesystem::recursive_directory_iterator object, and calls a new __erase
member function on that iterator. That new __erase member function does
the actual work of removing a file (or a directory after its contents
have been iterated over and removed) using unlinkat. That means we don't
need to expose the DIR stream or its file descriptor to the remove_all
function, it's still encapuslated by the iterator class.

It would be possible to add a __rewind member to directory iterators
too, to call rewinddir after each modification to the directory. That
would make it more likely for filesystem::remove_all to successfully
remove everything even if files are being written to the directory tree
while removing it. It's unclear if that is actually prefereable, or if
it's better to fail and report an error at the first opportunity.

The necessary APIs (openat, unlinkat, fdopendir, dirfd) are defined in
POSIX.1-2008, and in Glibc since 2.10. But if the target doesn't provide
them, the original code (with race conditions) is still used.

This also reduces the number of small memory allocations needed for
std::filesystem::remove_all, because we do not store the full path to
every directory entry that is iterated over. The new filename_only
option means we only store the filename in the directory entry, as that
is all we need in order to use openat or unlinkat.

Finally, rather than duplicating everything for the Filesystem TS, the
std::experimental::filesystem::remove_all implementation now just calls
std::filesystem::remove_all to do the work.

libstdc++-v3/ChangeLog:

PR libstdc++/104161
* acinclude.m4 (GLIBCXX_CHECK_FILESYSTEM_DEPS): Check for dirfd
and unlinkat.
* config.h.in: Regenerate.
* configure: Regenerate.
* include/bits/fs_dir.h (recursive_directory_iterator): Declare
remove_all overloads as friends.
(recursive_directory_iterator::__erase): Declare new member
function.
* include/bits/fs_fwd.h (remove, remove_all): Declare.
* src/c++17/fs_dir.cc (_Dir): Add filename_only parameter to
constructor. Pass file descriptor argument to base constructor.
(_Dir::dir_and_pathname, _Dir::open_subdir, _Dir::do_unlink)
(_Dir::unlink, _Dir::rmdir): Define new member functions.
(directory_iterator): Pass filename_only argument to _Dir
constructor.
(recursive_directory_iterator::_Dir_stack): Adjust constructor
parameters to take a _Dir rvalue instead of creating one.
(_Dir_stack::orig): Add data member for storing original path.
(_Dir_stack::report_error): Define new member function.
(__directory_iterator_nofollow): Move here from dir-common.h and
fix value to be a power of two.
(__directory_iterator_filename_only): Define new constant.
(recursive_directory_iterator): Construct _Dir object and move
into _M_dirs stack. Pass skip_permission_denied argument to first
advance call.
(recursive_directory_iterator::increment): Use _Dir::open_subdir.

[PATCH] match.pd: Fix x * 0.0 -> 0.0 folding [PR104389]

2022-02-04 Thread Jakub Jelinek via Gcc-patches
Hi!

The recent PR95115 change to punt in const_binop on folding operation
with non-NaN operands into NaN if flag_trapping_math broke the following
testcase, because the x * 0.0 simplification punts just if
x maybe a NaN (because NaN * 0.0 is NaN not 0.0) or if one of the operands
could be negative zero.  But Inf * 0.0 or -Inf * 0.0 is also NaN, not
0.0, so when NaNs are honored we need to punt for possible infinities too.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk
and 11/10 where the PR95115 change has been unfortunately backported to
as well?

2022-02-04  Jakub Jelinek  

PR tree-optimization/104389
(x * 0 -> 0): Punt if x maybe infinite and NaNs are honored.

* gcc.dg/pr104389.c: New test.

--- gcc/match.pd.jj 2022-02-04 14:36:55.393599880 +0100
+++ gcc/match.pd2022-02-04 20:30:48.548213594 +0100
@@ -256,10 +256,12 @@ (define_operator_list SYNC_FETCH_AND_AND
 /* Maybe fold x * 0 to 0.  The expressions aren't the same
when x is NaN, since x * 0 is also NaN.  Nor are they the
same in modes with signed zeros, since multiplying a
-   negative value by 0 gives -0, not +0.  */
+   negative value by 0 gives -0, not +0.  Nor when x is +-Inf,
+   since x * 0 is NaN.  */
 (simplify
  (mult @0 real_zerop@1)
  (if (!tree_expr_maybe_nan_p (@0)
+  && (!HONOR_NANS (type) || !tree_expr_maybe_infinite_p (@0))
   && !tree_expr_maybe_real_minus_zero_p (@0)
   && !tree_expr_maybe_real_minus_zero_p (@1))
   @1))
--- gcc/testsuite/gcc.dg/pr104389.c.jj  2022-02-04 20:37:40.579537142 +0100
+++ gcc/testsuite/gcc.dg/pr104389.c 2022-02-04 20:37:20.787809803 +0100
@@ -0,0 +1,26 @@
+/* PR tree-optimization/104389 */
+/* { dg-do run } */
+/* { dg-options "-O2" } */
+/* { dg-add-options ieee } */
+/* { dg-require-effective-target inf } */
+
+__attribute__((noipa)) double
+foo (void)
+{
+  double a = __builtin_huge_val ();
+  return a * 0.0;
+}
+
+__attribute__((noipa)) long double
+bar (void)
+{
+  return __builtin_huge_vall () * 0.0L;
+}
+
+int
+main ()
+{
+  if (!__builtin_isnan (foo ()) || !__builtin_isnanl (bar ()))
+__builtin_abort ();
+  return 0;
+}

Jakub



Re: [PATCH] c++: Improve diagnostics for template args terminated with >= or >>= [PR104319]

2022-02-04 Thread Jason Merrill via Gcc-patches

On 2/4/22 17:10, Jakub Jelinek wrote:

On Fri, Feb 04, 2022 at 04:54:39PM -0500, Jason Merrill wrote:

Bootstrapped/regtested on powerpc64le-linux, ok for trunk?
Or shall it wait for GCC 13?


Hmm, I lean toward GCC 13; this seems more of a stage 3 change.


Ok.


I see you test valid uses of >= in template arguments; you should also test
valid >>= (with another overloaded operator).


Is >>= valid there though?  The comment says:
Although the standard says "assignment-expression", it forbids
throw-expressions or assignments in the template argument.
Therefore, we use "conditional-expression" instead.  */


Ah, good point.  Incidentally that's out of date; it now says 
"constant-expression", which expands to "conditional-expression".


Jason



Re: [PATCH, V2] Use system default for long double if not specified on PowerPC.

2022-02-04 Thread Segher Boessenkool
On Fri, Feb 04, 2022 at 02:10:03PM -0600, Peter Bergner wrote:
> On 2/4/22 12:03 PM, Segher Boessenkool wrote:
> > On Fri, Feb 04, 2022 at 04:43:53PM +0100, Andreas Schwab wrote:
> >> On Feb 04 2022, Michael Meissner via Gcc-patches wrote:
> >>> If the user did not specify a default long double format when configuring
> >>> GCC, use the long double default from the host compiler.
> >>
> >> That doesn't make any sense.  The host compiler can be any random
> >> compiler completely unrelated to the target.
> > 
> > Yes, see .
> [snip]
> > I already NAKed this patch for weeks, and I do it again now.
> 
> Did you NAK the patch due to its specific implementation or are you
> even against the aim of the patch, namely that gcc configure tries
> to determine the long double default of the underlying system and
> matches that?

As I said before, I didn't even read the patch, just the one line
summary was enough for a NAK.  If the patch in fact does something else,
then it is still incorrect, and needs a very different subject and
summary.

I hope you see how "using the default of the underlying system" is
questionable in itself, but is something completely different from using
the default of the build compiler, which makes even less sense.

You want a configure flag to set the default long double format to be
IEEE QP.  This cannot be enabled by default until a (big) majority of
systems "in the wild" will work with that (only on powerpc64le-linux
or some *big* thing like that is fine, only default it to enabled there
then).  At that point in time, configure shouls complain, and the user
would have to explicitly *disable* it to build without that support.

Anything else is pretend progress and costs us too much.  Yes, this is
a "flag day", but only for the default.  We will still support the
double-double format for a loong time, if only because there are no
concrete plans for moving most of its users (or no plans at all is more
truthful actually)!


Segher


Re: [PATCH] c++: Improve diagnostics for template args terminated with >= or >>= [PR104319]

2022-02-04 Thread Jakub Jelinek via Gcc-patches
On Fri, Feb 04, 2022 at 04:54:39PM -0500, Jason Merrill wrote:
> > Bootstrapped/regtested on powerpc64le-linux, ok for trunk?
> > Or shall it wait for GCC 13?
> 
> Hmm, I lean toward GCC 13; this seems more of a stage 3 change.

Ok.

> I see you test valid uses of >= in template arguments; you should also test
> valid >>= (with another overloaded operator).

Is >>= valid there though?  The comment says:
   Although the standard says "assignment-expression", it forbids
   throw-expressions or assignments in the template argument.
   Therefore, we use "conditional-expression" instead.  */

Jakub



Re: [PATCH] c++: conditional noexcept-spec on defaulted comparison op [PR96242]

2022-02-04 Thread Jason Merrill via Gcc-patches

On 2/4/22 12:04, Patrick Palka wrote:

On Thu, 3 Feb 2022, Jason Merrill wrote:


On 2/3/22 16:06, Patrick Palka wrote:

On Thu, 3 Feb 2022, Jason Merrill wrote:


On 2/3/22 14:58, Patrick Palka wrote:

When synthesizing a defaulted comparison op from
maybe_instantiate_noexcept, we seem to be forgetting to instantiate the
noexcept-spec afterwards.


Hmm, there shouldn't be any need to instantiate the noexcept-spec
afterwards,
it should have been set by ~comp_info.


It appears the comp_info class sets the noexcept-spec only if the
comparison function hasn't been declared with an explicit noexcept-spec.
Otherwise the class doesn't touch the noexcept-spec, and it remains a
DEFERRED_NOEXCEPT with non-NULL DEFERRED_NOEXCEPT_PATTERN.


Ah, I see.  So perhaps we should entirely skip the current DECL_MAYBE_DELETED
handling in maybe_instantiate_noexcept if we have DEFERRED_NOEXCEPT with
non-null DEFERRED_NOEXCEPT_PATTERN (which seems to want another macro)?


Hmm, I tried something to that effect but it looks like mark_used relies
solely on the DECL_MAYBE_DELETED handling in maybe_instantiate_noexcept
to determine deletedness of a defaulted comparison operator (via trying
to synthesize it).  So by sometimes sidestepping this handling, we end
up failing to diagnose the use of the deleted defaulted <=> in e.g.:

   #include 

   struct A { };

   template
   struct X {
 auto operator<=>(const X&) const noexcept(B) = default;
 A a;
   };

   X x_t;
   auto c = x_t <=> x_t; // should be error: use of deleted <=> b/c A lacks <=>

In light of this, I suppose mark_used should directly perform
DECL_MAYBE_DELETED synthesization of its own?

And it looks like DECL_MAYBE_DELETED is always false after doing
maybe_synthesize_method, so I think maybe_instantiate_noexcept should
return !DECL_DELETED_FN instead of !DECL_MAYBE_DELETED after synthesization.

How does this look?  Lightly tested so far, bootstrap and regtesting in 
progress.

-- >8 --

PR c++/96242

gcc/cp/ChangeLog:

* decl2.cc (mark_used): Directly synthesize a DECL_MAYBE_DELETED
fn by calling maybe_synthesize_method instead of relying on
maybe_instantiate_noexcept.
* pt.cc (maybe_instantiate_noexcept): Restrict DECL_MAYBE_DELETED
synthesization to only fns with an implicit noexcept-spec, and
return !DECL_DELETED_FN instead of !DECL_MAYBE_DELETED afteward.

gcc/testsuite/ChangeLog:

* g++.dg/cpp2a/spaceship-synth15.C: New test.
---
  gcc/cp/decl2.cc   | 17 ++
  gcc/cp/pt.cc  | 11 +-
  .../g++.dg/cpp2a/spaceship-synth15.C  | 22 +++
  3 files changed, 41 insertions(+), 9 deletions(-)
  create mode 100644 gcc/testsuite/g++.dg/cpp2a/spaceship-synth15.C

diff --git a/gcc/cp/decl2.cc b/gcc/cp/decl2.cc
index a2aa5f1de4e..4d3798d02fe 100644
--- a/gcc/cp/decl2.cc
+++ b/gcc/cp/decl2.cc
@@ -5772,10 +5772,19 @@ mark_used (tree decl, tsubst_flags_t complain)
if (TREE_CODE (decl) == CONST_DECL)
  used_types_insert (DECL_CONTEXT (decl));
  
-  if (TREE_CODE (decl) == FUNCTION_DECL

-  && !DECL_DELETED_FN (decl)
-  && !maybe_instantiate_noexcept (decl, complain))
-return false;
+  if (TREE_CODE (decl) == FUNCTION_DECL)
+{
+  if (DECL_MAYBE_DELETED (decl))
+   {
+ ++function_depth;
+ maybe_synthesize_method (decl);
+ --function_depth;
+   }
+
+  if (!DECL_DELETED_FN (decl)
+ && !maybe_instantiate_noexcept (decl, complain))
+   return false;


At this point we might move the call to maybe_instantiate_noexcept after 
the DECL_DELETED_FN handling just below.  OK either way.



+}
  
if (TREE_CODE (decl) == FUNCTION_DECL

&& DECL_DELETED_FN (decl))
diff --git a/gcc/cp/pt.cc b/gcc/cp/pt.cc
index d219bba6ac1..584c752529b 100644
--- a/gcc/cp/pt.cc
+++ b/gcc/cp/pt.cc
@@ -25982,7 +25982,11 @@ maybe_instantiate_noexcept (tree fn, tsubst_flags_t 
complain)
&& (!flag_noexcept_type || type_dependent_expression_p (fn)))
  return true;
  
-  if (DECL_MAYBE_DELETED (fn))

+  tree fntype = TREE_TYPE (fn);
+  tree spec = TYPE_RAISES_EXCEPTIONS (fntype);
+
+  if (DECL_MAYBE_DELETED (fn)
+  && (!spec || UNEVALUATED_NOEXCEPT_SPEC_P (spec)))
  {
if (fn == current_function_decl)
/* We're in start_preparsed_function, keep going.  */
@@ -25991,12 +25995,9 @@ maybe_instantiate_noexcept (tree fn, tsubst_flags_t 
complain)
++function_depth;
maybe_synthesize_method (fn);
--function_depth;
-  return !DECL_MAYBE_DELETED (fn);
+  return !DECL_DELETED_FN (fn);
  }
  
-  tree fntype = TREE_TYPE (fn);

-  tree spec = TYPE_RAISES_EXCEPTIONS (fntype);
-
if (!spec || !TREE_PURPOSE (spec))
  return true;
  
diff --git a/gcc/testsuite/g++.dg/cpp2a/spaceship-synth15.C b/gcc/testsuite/g++.dg/cpp2a/spaceship-synth15.C

new file mode 100644
index 000..00ea6c10474
--- 

Re: [PATCH] c++: Improve diagnostics for template args terminated with >= or >>= [PR104319]

2022-02-04 Thread Jason Merrill via Gcc-patches

On 2/4/22 09:12, Jakub Jelinek wrote:

Hi!

As mentioned in the PR, for C++98 we have diagnostics that expect

terminating template arguments to be a mistake for > > (C++11

said it has to be treated that way), while if user trying to spare the
spacebar doesn't separate > from following = or >> from following =,
the diagnostics is confusing, while clang suggests adding space in between.

The following patch does that for >= and >>= too.

For some strange reason the error recovery emits further errors,
not really sure what's going on because I overwrite the token->type
like the code does for the C++11 >> case or for the C++98 >> cases,
but at least the first error is nicer (well, for the C++98 nested
template case and >>= I need to overwrite it to > and so the = is lost,
so perhaps some follow-up errors are needed for that case).

Bootstrapped/regtested on powerpc64le-linux, ok for trunk?
Or shall it wait for GCC 13?


Hmm, I lean toward GCC 13; this seems more of a stage 3 change.

I see you test valid uses of >= in template arguments; you should also 
test valid >>= (with another overloaded operator).



2022-02-04  Jakub Jelinek  

PR c++/104319
* parser.cc (cp_parser_template_argument): Treat >= like C++98 >>
after a type id by setting maybe_type_id and aborting tentative
parse.
(cp_parser_enclosed_template_argument_list): Handle
CPP_GREATER_EQ like misspelled CPP_GREATER CPP_RQ and
CPP_RSHIFT_EQ like misspelled CPP_GREATER CPP_GREATER_EQ
or CPP_RSHIFT CPP_EQ or CPP_GREATER CPP_GREATER CPP_EQ.
(cp_parser_next_token_ends_template_argument_p): Return true
also for CPP_GREATER_EQ and CPP_RSHIFT_EQ.

* g++.dg/parse/template28.C: Adjust expected diagnostics.
* g++.dg/parse/template30.C: New test.

--- gcc/cp/parser.cc.jj 2022-02-04 14:36:54.765608651 +0100
+++ gcc/cp/parser.cc2022-02-04 14:42:14.761139259 +0100
@@ -18820,8 +18820,13 @@ cp_parser_template_argument (cp_parser*
   In C++0x, the '>>' will be considered two separate '>'
   tokens.  */
if (!cp_parser_error_occurred (parser)
-  && cxx_dialect == cxx98
-  && cp_lexer_next_token_is (parser->lexer, CPP_RSHIFT))
+  && ((cxx_dialect == cxx98
+  && cp_lexer_next_token_is (parser->lexer, CPP_RSHIFT))
+ /* Similarly for >= which
+cp_parser_next_token_ends_template_argument_p treats for
+diagnostics purposes as mistyped > =, but can be valid
+after a type-id.  */
+ || cp_lexer_next_token_is (parser->lexer, CPP_GREATER_EQ)))
  {
maybe_type_id = true;
cp_parser_abort_tentative_parse (parser);
@@ -32029,7 +32034,9 @@ cp_parser_enclosed_template_argument_lis
cp_evaluated ev;
/* Parse the template-argument-list itself.  */
if (cp_lexer_next_token_is (parser->lexer, CPP_GREATER)
-  || cp_lexer_next_token_is (parser->lexer, CPP_RSHIFT))
+  || cp_lexer_next_token_is (parser->lexer, CPP_RSHIFT)
+  || cp_lexer_next_token_is (parser->lexer, CPP_GREATER_EQ)
+  || cp_lexer_next_token_is (parser->lexer, CPP_RSHIFT_EQ))
  arguments = NULL_TREE;
else
  arguments = cp_parser_template_argument_list (parser);
@@ -32086,6 +32093,38 @@ cp_parser_enclosed_template_argument_lis
"a template argument list");
}
  }
+  /* Similarly for >>= and >=.  */
+  else if (cp_lexer_next_token_is (parser->lexer, CPP_GREATER_EQ)
+  || cp_lexer_next_token_is (parser->lexer, CPP_RSHIFT_EQ))
+{
+  cp_token *token = cp_lexer_consume_token (parser->lexer);
+  gcc_rich_location richloc (token->location);
+  enum cpp_ttype new_type;
+  const char *replacement;
+  if (token->type == CPP_GREATER_EQ)
+   {
+ replacement = "> =";
+ new_type = CPP_EQ;
+   }
+  else if (!saved_greater_than_is_operator_p)
+   {
+ if (cxx_dialect != cxx98)
+   replacement = ">> =";
+ else
+   replacement = "> > =";
+ new_type = CPP_GREATER;
+   }
+  else
+   {
+ replacement = "> >=";
+ new_type = CPP_GREATER_EQ;
+   }
+  richloc.add_fixit_replace (replacement);
+  error_at (, "%qs should be %qs to terminate a template "
+   "argument list",
+   cpp_type2name (token->type, token->flags), replacement);
+  token->type = new_type;
+}
else
  cp_parser_require_end_of_template_parameter_list (parser);
/* The `>' token might be a greater-than operator again now.  */
@@ -33163,7 +33202,11 @@ cp_parser_next_token_ends_template_argum
return (token->type == CPP_COMMA
|| token->type == CPP_GREATER
|| token->type == CPP_ELLIPSIS
- || ((cxx_dialect != cxx98) && token->type == CPP_RSHIFT));
+ || ((cxx_dialect != cxx98) && token->type == CPP_RSHIFT)
+ /* For better diagnostics, treat >>= like that too, that
+shouldn't 

Re: [PATCH] c++, v3: Further address_compare fixes [PR89074]

2022-02-04 Thread Jason Merrill via Gcc-patches

On 2/4/22 08:41, Jakub Jelinek wrote:

On Thu, Feb 03, 2022 at 04:34:17PM -0500, Jason Merrill wrote:

On 2/3/22 16:18, Jakub Jelinek wrote:

On Thu, Feb 03, 2022 at 04:04:57PM -0500, Jason Merrill wrote:

I think it would be clearer to leave the !DECL_P case alone and add

/* In C++ it is unspecified, and so non-constant, whether two
  equivalent strings have the same address.  */
else if (folding_cxx_constexpr
&& (TREE_CODE (base0) == STRING_CST
|| TREE_CODE (base1) == STRING_CST)


The point was to let the first if handle for
!folding_cxx_constexpr the cases with STRING_CST
as one or both operands and if that falls through, return 2.


Ah, I see.  And then for folding_cxx_constexpr you have your new code toward
the bottom of the function that can say they're unequal in some cases.  Can
you combine the STRING_CST handling for both values of folding_cxx_constexpr
instead of having them so far apart?


Not easily, because for the folding_cxx_constexpr case it primarily reuses
the code from the last else if - computing sizes of objects and checking
if one is at a start of one and another at the end of the other.


And the !folding_cxx_constexpr case shouldn't also use that code?


One further option would be to compute early flags like
enum { OFF_POS_START, OFF_POS_MIDDLE, OFF_POS_END } pos0, pos1;
and then just use them or ignore them in the decisions later.


If that helps to refactor a bit, sure.


Here it is, hopefully it makes the code more readable and understandable.


Much more readable, thanks!


Bootstrapped/regtested on powerpc64le-linux, ok for trunk?

2022-02-04  Jakub Jelinek  

PR c++/89074
PR c++/104033
* fold-const.h (folding_initializer): Adjust comment.
(folding_cxx_constexpr): Declare.
* fold-const.cc (folding_initializer): Adjust comment.
(folding_cxx_constexpr): New variable.
(address_compare): Restrict the decl vs. STRING_CST
or vice versa or STRING_CST vs. STRING_CST or
is_global_var != is_global_var optimizations to !folding_cxx_constexpr.
Punt for FUNCTION_DECLs with non-zero offsets.  If folding_initializer,
assume non-aliased functions have non-zero size and have different
addresses.  For folding_cxx_constexpr, punt on comparisons of start
of some object and end of another one, regardless whether it is a decl
or string literal.  Also punt for folding_cxx_constexpr on
STRING_CST vs. STRING_CST comparisons if the two literals could be
overlapping.

* constexpr.cc (cxx_eval_binary_expression): Temporarily set
folding_cxx_constexpr.

* g++.dg/cpp1y/constexpr-89074-3.C: New test.

--- gcc/fold-const.h.jj 2022-02-01 20:10:51.235856007 +0100
+++ gcc/fold-const.h2022-02-03 15:02:02.700228631 +0100
@@ -20,9 +20,16 @@ along with GCC; see the file COPYING3.
  #ifndef GCC_FOLD_CONST_H
  #define GCC_FOLD_CONST_H
  
-/* Non-zero if we are folding constants inside an initializer; zero

-   otherwise.  */
+/* Nonzero if we are folding constants inside an initializer or a C++
+   manifestly-constant-evaluated context; zero otherwise.
+   Should be used when folding in initializer enables additional
+   optimizations.  */
  extern int folding_initializer;
+/* Nonzer of we are folding C++ manifestly-constant-evaluated context; zero


Still need to fix this typo.


+   otherwise.
+   Should be used when certain constructs shouldn't be optimized
+   during folding in that context.  */
+extern bool folding_cxx_constexpr;
  
  /* Convert between trees and native memory representation.  */

  extern int native_encode_expr (const_tree, unsigned char *, int, int off = 
-1);
--- gcc/fold-const.cc.jj2022-02-03 14:31:32.243129408 +0100
+++ gcc/fold-const.cc   2022-02-04 10:19:13.812784763 +0100
@@ -86,9 +86,17 @@ along with GCC; see the file COPYING3.
  #include "gimple-range.h"
  
  /* Nonzero if we are folding constants inside an initializer or a C++

-   manifestly-constant-evaluated context; zero otherwise.  */
+   manifestly-constant-evaluated context; zero otherwise.
+   Should be used when folding in initializer enables additional
+   optimizations.  */
  int folding_initializer = 0;
  
+/* Nonzer of we are folding C++ manifestly-constant-evaluated context; zero


And here.


+   otherwise.
+   Should be used when certain constructs shouldn't be optimized
+   during folding in that context.  */
+bool folding_cxx_constexpr = false;
+
  /* The following constants represent a bit based encoding of GCC's
 comparison operators.  This encoding simplifies transformations
 on relational comparison operators, such as AND and OR.  */
@@ -16628,41 +16636,55 @@ address_compare (tree_code code, tree ty


Incidentally, the function comment needs to document TYPE.


HOST_WIDE_INT ioff0 = -1, ioff1 = -1;
off0.is_constant ();
off1.is_constant ();
-  if ((DECL_P (base0) && TREE_CODE (base1) == 

Re: [PATCH, V2] Use system default for long double if not specified on PowerPC.

2022-02-04 Thread Peter Bergner via Gcc-patches
On 2/4/22 12:03 PM, Segher Boessenkool wrote:
> On Fri, Feb 04, 2022 at 04:43:53PM +0100, Andreas Schwab wrote:
>> On Feb 04 2022, Michael Meissner via Gcc-patches wrote:
>>> If the user did not specify a default long double format when configuring
>>> GCC, use the long double default from the host compiler.
>>
>> That doesn't make any sense.  The host compiler can be any random
>> compiler completely unrelated to the target.
> 
> Yes, see .
[snip]
> I already NAKed this patch for weeks, and I do it again now.

Did you NAK the patch due to its specific implementation or are you
even against the aim of the patch, namely that gcc configure tries
to determine the long double default of the underlying system and
matches that?

Peter




[PATCH, committed] rs6000: Clean up ISA 3.1 documentation [PR100808]

2022-02-04 Thread Bill Schmidt via Gcc-patches
Hi!

PR100808 pointed out some trivial formatting issues with Power documentation
for basic ISA 3.1 built-in functions.  This patch cleans those up.

Tested on powerpc64le-linux-gnu, committed as obvious.

Thanks!
Bill


2022-02-04  Bill Schmidt  

gcc/
PR target/100808
* doc/extend.texi (Basic PowerPC Built-in Functions Available on ISA
3.1): Provide consistent type names.  Remove unnecessary semicolons.
Fix bad line breaks.
---
 gcc/doc/extend.texi | 71 +++--
 1 file changed, 43 insertions(+), 28 deletions(-)

diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index a961fc4e0a2..cb1b2b98ca8 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -18276,74 +18276,89 @@ The following built-in functions are available on 
Linux 64-bit systems
 that use a future architecture instruction set (@option{-mcpu=power10}):
 
 @smallexample
-@exdent unsigned long long int
-@exdent __builtin_cfuged (unsigned long long int, unsigned long long int)
+@exdent unsigned long long
+@exdent __builtin_cfuged (unsigned long long, unsigned long long)
 @end smallexample
 Perform a 64-bit centrifuge operation, as if implemented by the
 @code{cfuged} instruction.
 @findex __builtin_cfuged
 
 @smallexample
-@exdent unsigned long long int
-@exdent __builtin_cntlzdm (unsigned long long int, unsigned long long int)
+@exdent unsigned long long
+@exdent __builtin_cntlzdm (unsigned long long, unsigned long long)
 @end smallexample
 Perform a 64-bit count leading zeros operation under mask, as if
 implemented by the @code{cntlzdm} instruction.
 @findex __builtin_cntlzdm
 
 @smallexample
-@exdent unsigned long long int
-@exdent __builtin_cnttzdm (unsigned long long int, unsigned long long int)
+@exdent unsigned long long
+@exdent __builtin_cnttzdm (unsigned long long, unsigned long long)
 @end smallexample
 Perform a 64-bit count trailing zeros operation under mask, as if
 implemented by the @code{cnttzdm} instruction.
 @findex __builtin_cnttzdm
 
 @smallexample
-@exdent unsigned long long int
-@exdent __builtin_pdepd (unsigned long long int, unsigned long long int)
+@exdent unsigned long long
+@exdent __builtin_pdepd (unsigned long long, unsigned long long)
 @end smallexample
 Perform a 64-bit parallel bits deposit operation, as if implemented by the
 @code{pdepd} instruction.
 @findex __builtin_pdepd
 
 @smallexample
-@exdent unsigned long long int
-@exdent __builtin_pextd (unsigned long long int, unsigned long long int)
+@exdent unsigned long long
+@exdent __builtin_pextd (unsigned long long, unsigned long long)
 @end smallexample
 Perform a 64-bit parallel bits extract operation, as if implemented by the
 @code{pextd} instruction.
 @findex __builtin_pextd
 
 @smallexample
-@exdent vector signed __int128 vsx_xl_sext (signed long long, signed char *);
-@exdent vector signed __int128 vsx_xl_sext (signed long long, signed short *);
-@exdent vector signed __int128 vsx_xl_sext (signed long long, signed int *);
-@exdent vector signed __int128 vsx_xl_sext (signed long long, signed long long 
*);
-@exdent vector unsigned __int128 vsx_xl_zext (signed long long, unsigned char 
*);
-@exdent vector unsigned __int128 vsx_xl_zext (signed long long, unsigned short 
*);
-@exdent vector unsigned __int128 vsx_xl_zext (signed long long, unsigned int 
*);
-@exdent vector unsigned __int128 vsx_xl_zext (signed long long, unsigned long 
long *);
+@exdent vector signed __int128 vsx_xl_sext (signed long long, signed char *)
+
+@exdent vector signed __int128 vsx_xl_sext (signed long long, signed short *)
+
+@exdent vector signed __int128 vsx_xl_sext (signed long long, signed int *)
+
+@exdent vector signed __int128 vsx_xl_sext (signed long long, signed long long 
*)
+
+@exdent vector unsigned __int128 vsx_xl_zext (signed long long, unsigned char 
*)
+
+@exdent vector unsigned __int128 vsx_xl_zext (signed long long, unsigned short 
*)
+
+@exdent vector unsigned __int128 vsx_xl_zext (signed long long, unsigned int *)
+
+@exdent vector unsigned __int128 vsx_xl_zext (signed long long, unsigned long 
long *)
 @end smallexample
 
 Load (and sign extend) to an __int128 vector, as if implemented by the ISA 3.1
-@code{lxvrbx} @code{lxvrhx} @code{lxvrwx} @code{lxvrdx} instructions.
+@code{lxvrbx}, @code{lxvrhx}, @code{lxvrwx}, and  @code{lxvrdx} instructions.
 @findex vsx_xl_sext
 @findex vsx_xl_zext
 
 @smallexample
-@exdent void vec_xst_trunc (vector signed __int128, signed long long, signed 
char *);
-@exdent void vec_xst_trunc (vector signed __int128, signed long long, signed 
short *);
-@exdent void vec_xst_trunc (vector signed __int128, signed long long, signed 
int *);
-@exdent void vec_xst_trunc (vector signed __int128, signed long long, signed 
long long *);
-@exdent void vec_xst_trunc (vector unsigned __int128, signed long long, 
unsigned char *);
-@exdent void vec_xst_trunc (vector unsigned __int128, signed long long, 
unsigned short *);
-@exdent void vec_xst_trunc (vector 

Re: [PATCH] doc: RISC-V: Document the `-misa-spec=' option

2022-02-04 Thread Palmer Dabbelt

On Fri, 04 Feb 2022 10:44:04 PST (-0800), ma...@embecosm.com wrote:

We have recently updated the default for the `-misa-spec=' option, yet
we still have not documented it nor its `--with-isa-spec=' counterpart
in the GCC manuals.  Fix that.

gcc/
* doc/install.texi (Configuration): Document `--with-isa-spec='
RISC-V option.
* doc/invoke.texi (Option Summary): List `-misa-spec' RISC-V
option.
(RISC-V Options): Document it.
---
Hi,

 Verified with `make info' and `make pdf'.  OK to apply?

  Maciej
---
 gcc/doc/install.texi |   14 ++
 gcc/doc/invoke.texi  |   16 
 2 files changed, 30 insertions(+)

gcc-riscv-misa-doc.diff
Index: gcc/gcc/doc/install.texi
===
--- gcc.orig/gcc/doc/install.texi
+++ gcc/gcc/doc/install.texi
@@ -1599,6 +1599,20 @@ On certain targets this option sets the
 size as a power of two in bytes.  On AArch64 @var{size} is required to be 
either
 12 (4KB) or 16 (64KB).

+@item --with-isa-spec=@var{ISA-spec-string}
+On RISC-V targets specify the default version of the RISC-V ISA specification
+to produce code conforming to.  The possibilities for @var{ISA-spec-string}
+are:
+@table @code
+@item 2.2
+Produce code conforming to version 2.2.
+@item 20190608
+Produce code conforming to version 20190608.
+@item 20191213
+Produce code conforming to version 20191213.
+@end table
+In the absence of this configuration option the default version is 20191213.


Thanks.  I have a version of this floating around somewhere.  I probably 
forgot to post it and it's generally inferior to yours, so this LGTM.


The only thing I'd point out is that this specifically controls the 
version of the Unprivileged (formally called user) specification, as 
there are many RISC-V specifications and it can be a bit ambiguous what 
folks mean when they just say "specification".  Not sure exactly what 
the right wording is there, maybe "version of the RISC-V Unprivileged 
(formerly user-level) ISA specification"?


That applies a handful of places.


+
 @item --enable-__cxa_atexit
 Define if you want to use __cxa_atexit, rather than atexit, to
 register C++ destructors for local statics and global objects.
Index: gcc/gcc/doc/invoke.texi
===
--- gcc.orig/gcc/doc/invoke.texi
+++ gcc/gcc/doc/invoke.texi
@@ -1184,6 +1184,7 @@ See RS/6000 and PowerPC Options.
 -mabi=@var{ABI-string} @gol
 -mfdiv  -mno-fdiv @gol
 -mdiv  -mno-div @gol
+-misa-spec=@var{ISA-spec-string} @gol
 -march=@var{ISA-string} @gol
 -mtune=@var{processor-string} @gol
 -mpreferred-stack-boundary=@var{num} @gol
@@ -27632,6 +27633,21 @@ Do or don't use hardware instructions fo
 M extension.  The default is to use them if the specified architecture has
 these instructions.

+@item -misa-spec=@var{ISA-spec-string}
+@opindex misa-spec
+Specify the version of the RISC-V ISA specification to produce code conforming
+to.  The possibilities for @var{ISA-spec-string} are:
+@table @code
+@item 2.2
+Produce code conforming to version 2.2.
+@item 20190608
+Produce code conforming to version 20190608.
+@item 20191213
+Produce code conforming to version 20191213.
+@end table
+The default is @option{-misa-spec=20191213} unless GCC has been configured
+with @option{--with-isa-spec=} specifying a different default version.
+
 @item -march=@var{ISA-string}
 @opindex march
 Generate code for given RISC-V ISA (e.g.@: @samp{rv64im}).  ISA strings must be


[PATCH] doc: RISC-V: Document the `-misa-spec=' option

2022-02-04 Thread Maciej W. Rozycki
We have recently updated the default for the `-misa-spec=' option, yet 
we still have not documented it nor its `--with-isa-spec=' counterpart 
in the GCC manuals.  Fix that.

gcc/
* doc/install.texi (Configuration): Document `--with-isa-spec=' 
RISC-V option.
* doc/invoke.texi (Option Summary): List `-misa-spec' RISC-V
option.
(RISC-V Options): Document it.
---
Hi,

 Verified with `make info' and `make pdf'.  OK to apply?

  Maciej
---
 gcc/doc/install.texi |   14 ++
 gcc/doc/invoke.texi  |   16 
 2 files changed, 30 insertions(+)

gcc-riscv-misa-doc.diff
Index: gcc/gcc/doc/install.texi
===
--- gcc.orig/gcc/doc/install.texi
+++ gcc/gcc/doc/install.texi
@@ -1599,6 +1599,20 @@ On certain targets this option sets the
 size as a power of two in bytes.  On AArch64 @var{size} is required to be 
either
 12 (4KB) or 16 (64KB).
 
+@item --with-isa-spec=@var{ISA-spec-string}
+On RISC-V targets specify the default version of the RISC-V ISA specification
+to produce code conforming to.  The possibilities for @var{ISA-spec-string}
+are:
+@table @code
+@item 2.2
+Produce code conforming to version 2.2.
+@item 20190608
+Produce code conforming to version 20190608.
+@item 20191213
+Produce code conforming to version 20191213.
+@end table
+In the absence of this configuration option the default version is 20191213.
+
 @item --enable-__cxa_atexit
 Define if you want to use __cxa_atexit, rather than atexit, to
 register C++ destructors for local statics and global objects.
Index: gcc/gcc/doc/invoke.texi
===
--- gcc.orig/gcc/doc/invoke.texi
+++ gcc/gcc/doc/invoke.texi
@@ -1184,6 +1184,7 @@ See RS/6000 and PowerPC Options.
 -mabi=@var{ABI-string} @gol
 -mfdiv  -mno-fdiv @gol
 -mdiv  -mno-div @gol
+-misa-spec=@var{ISA-spec-string} @gol
 -march=@var{ISA-string} @gol
 -mtune=@var{processor-string} @gol
 -mpreferred-stack-boundary=@var{num} @gol
@@ -27632,6 +27633,21 @@ Do or don't use hardware instructions fo
 M extension.  The default is to use them if the specified architecture has
 these instructions.
 
+@item -misa-spec=@var{ISA-spec-string}
+@opindex misa-spec
+Specify the version of the RISC-V ISA specification to produce code conforming
+to.  The possibilities for @var{ISA-spec-string} are:
+@table @code
+@item 2.2
+Produce code conforming to version 2.2.
+@item 20190608
+Produce code conforming to version 20190608.
+@item 20191213
+Produce code conforming to version 20191213.
+@end table
+The default is @option{-misa-spec=20191213} unless GCC has been configured
+with @option{--with-isa-spec=} specifying a different default version.
+
 @item -march=@var{ISA-string}
 @opindex march
 Generate code for given RISC-V ISA (e.g.@: @samp{rv64im}).  ISA strings must be


Re: [PATCH, V2] Use system default for long double if not specified on PowerPC.

2022-02-04 Thread Segher Boessenkool
On Fri, Feb 04, 2022 at 04:43:53PM +0100, Andreas Schwab wrote:
> On Feb 04 2022, Michael Meissner via Gcc-patches wrote:
> > If the user did not specify a default long double format when configuring
> > GCC, use the long double default from the host compiler.
> 
> That doesn't make any sense.  The host compiler can be any random
> compiler completely unrelated to the target.

Yes, see .

It also goes against the basic GCC policy that results should be
reproducible.  We *do* have some existing cases where the compiler
changes behaviour based on whether e.g. binutils is too old to have a
certain feature.  Either a) such cases are ancient, everyone has a newer
version in practice, we could just require this; or b) those cases cause
no end of problems, everyone is much better off if we tell the user at
configuration time to get newer stuff, or if it causes a clear error in
the first place we can just let it do that.

Also.  We cannot have confidence that our compiler does anything correct
or good if we cannot test it.  If we let the testing matrix explode
exponentially we cannot test even the reasonable cases.  It will make
the support job a lot harder as well: users will not report what they
used when configuring and building the compiler when they report a
problem, even more so because in all likelyhood it was someone else who
did that building!  And our own diagnostic for this (the gcc -v output)
dows not say what defaults the build compiler used.

I already NAKed this patch for weeks, and I do it again now.


Segher


Re: [PATCH] c++: conditional noexcept-spec on defaulted comparison op [PR96242]

2022-02-04 Thread Patrick Palka via Gcc-patches
On Thu, 3 Feb 2022, Jason Merrill wrote:

> On 2/3/22 16:06, Patrick Palka wrote:
> > On Thu, 3 Feb 2022, Jason Merrill wrote:
> > 
> > > On 2/3/22 14:58, Patrick Palka wrote:
> > > > When synthesizing a defaulted comparison op from
> > > > maybe_instantiate_noexcept, we seem to be forgetting to instantiate the
> > > > noexcept-spec afterwards.
> > > 
> > > Hmm, there shouldn't be any need to instantiate the noexcept-spec
> > > afterwards,
> > > it should have been set by ~comp_info.
> > 
> > It appears the comp_info class sets the noexcept-spec only if the
> > comparison function hasn't been declared with an explicit noexcept-spec.
> > Otherwise the class doesn't touch the noexcept-spec, and it remains a
> > DEFERRED_NOEXCEPT with non-NULL DEFERRED_NOEXCEPT_PATTERN.
> 
> Ah, I see.  So perhaps we should entirely skip the current DECL_MAYBE_DELETED
> handling in maybe_instantiate_noexcept if we have DEFERRED_NOEXCEPT with
> non-null DEFERRED_NOEXCEPT_PATTERN (which seems to want another macro)?

Hmm, I tried something to that effect but it looks like mark_used relies
solely on the DECL_MAYBE_DELETED handling in maybe_instantiate_noexcept
to determine deletedness of a defaulted comparison operator (via trying
to synthesize it).  So by sometimes sidestepping this handling, we end
up failing to diagnose the use of the deleted defaulted <=> in e.g.:

  #include 

  struct A { };

  template
  struct X {
auto operator<=>(const X&) const noexcept(B) = default;
A a;
  };

  X x_t;
  auto c = x_t <=> x_t; // should be error: use of deleted <=> b/c A lacks <=>

In light of this, I suppose mark_used should directly perform
DECL_MAYBE_DELETED synthesization of its own?

And it looks like DECL_MAYBE_DELETED is always false after doing
maybe_synthesize_method, so I think maybe_instantiate_noexcept should
return !DECL_DELETED_FN instead of !DECL_MAYBE_DELETED after synthesization.

How does this look?  Lightly tested so far, bootstrap and regtesting in 
progress.

-- >8 --

PR c++/96242

gcc/cp/ChangeLog:

* decl2.cc (mark_used): Directly synthesize a DECL_MAYBE_DELETED
fn by calling maybe_synthesize_method instead of relying on
maybe_instantiate_noexcept.
* pt.cc (maybe_instantiate_noexcept): Restrict DECL_MAYBE_DELETED
synthesization to only fns with an implicit noexcept-spec, and
return !DECL_DELETED_FN instead of !DECL_MAYBE_DELETED afteward.

gcc/testsuite/ChangeLog:

* g++.dg/cpp2a/spaceship-synth15.C: New test.
---
 gcc/cp/decl2.cc   | 17 ++
 gcc/cp/pt.cc  | 11 +-
 .../g++.dg/cpp2a/spaceship-synth15.C  | 22 +++
 3 files changed, 41 insertions(+), 9 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/cpp2a/spaceship-synth15.C

diff --git a/gcc/cp/decl2.cc b/gcc/cp/decl2.cc
index a2aa5f1de4e..4d3798d02fe 100644
--- a/gcc/cp/decl2.cc
+++ b/gcc/cp/decl2.cc
@@ -5772,10 +5772,19 @@ mark_used (tree decl, tsubst_flags_t complain)
   if (TREE_CODE (decl) == CONST_DECL)
 used_types_insert (DECL_CONTEXT (decl));
 
-  if (TREE_CODE (decl) == FUNCTION_DECL
-  && !DECL_DELETED_FN (decl)
-  && !maybe_instantiate_noexcept (decl, complain))
-return false;
+  if (TREE_CODE (decl) == FUNCTION_DECL)
+{
+  if (DECL_MAYBE_DELETED (decl))
+   {
+ ++function_depth;
+ maybe_synthesize_method (decl);
+ --function_depth;
+   }
+
+  if (!DECL_DELETED_FN (decl)
+ && !maybe_instantiate_noexcept (decl, complain))
+   return false;
+}
 
   if (TREE_CODE (decl) == FUNCTION_DECL
   && DECL_DELETED_FN (decl))
diff --git a/gcc/cp/pt.cc b/gcc/cp/pt.cc
index d219bba6ac1..584c752529b 100644
--- a/gcc/cp/pt.cc
+++ b/gcc/cp/pt.cc
@@ -25982,7 +25982,11 @@ maybe_instantiate_noexcept (tree fn, tsubst_flags_t 
complain)
   && (!flag_noexcept_type || type_dependent_expression_p (fn)))
 return true;
 
-  if (DECL_MAYBE_DELETED (fn))
+  tree fntype = TREE_TYPE (fn);
+  tree spec = TYPE_RAISES_EXCEPTIONS (fntype);
+
+  if (DECL_MAYBE_DELETED (fn)
+  && (!spec || UNEVALUATED_NOEXCEPT_SPEC_P (spec)))
 {
   if (fn == current_function_decl)
/* We're in start_preparsed_function, keep going.  */
@@ -25991,12 +25995,9 @@ maybe_instantiate_noexcept (tree fn, tsubst_flags_t 
complain)
   ++function_depth;
   maybe_synthesize_method (fn);
   --function_depth;
-  return !DECL_MAYBE_DELETED (fn);
+  return !DECL_DELETED_FN (fn);
 }
 
-  tree fntype = TREE_TYPE (fn);
-  tree spec = TYPE_RAISES_EXCEPTIONS (fntype);
-
   if (!spec || !TREE_PURPOSE (spec))
 return true;
 
diff --git a/gcc/testsuite/g++.dg/cpp2a/spaceship-synth15.C 
b/gcc/testsuite/g++.dg/cpp2a/spaceship-synth15.C
new file mode 100644
index 000..00ea6c10474
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp2a/spaceship-synth15.C
@@ -0,0 +1,22 @@
+// PR c++/96242
+// { dg-do compile { target c++20 } }

Re: [PATCH] rs6000: Fix up -D_FORTIFY_SOURCE* with -mabi=ieeelongdouble [PR104380]

2022-02-04 Thread David Edelsohn via Gcc-patches
On Fri, Feb 4, 2022 at 11:58 AM Jakub Jelinek  wrote:
>
> Hi!
>
> The following testcase FAILs when configured with
> --with-long-double-format=ieee .  Only happens in the -std=c* modes, not the
> GNU modes; while the glibc headers have __asm redirects of
> vsnprintf and __vsnprinf_chk to __vsnprintfieee128 and
> __vsnprintf_chkieee128, the vsnprintf fortification extern inline gnu_inline
> always_inline wrapper calls __builtin_vsnprintf_chk and we actually emit
> a call to __vsnprinf_chk (i.e. with IBM extended long double) instead of
> __vsnprintf_chkieee128.
>
> rs6000_mangle_decl_assembler_name already had cases for *printf and *scanf,
> so this just adds another case for *printf_chk.  *scanf_chk doesn't exist.
> __ prefixing isn't done because *printf_chk already starts with __.
>
> Bootstrapped/regtested on powerpc64le-linux, ok for trunk?

Okay.

Thanks, David

>
> 2022-02-04  Jakub Jelinek  
>
> PR target/104380
> * config/rs6000/rs6000.cc (rs6000_mangle_decl_assembler_name): Also
> adjust mangling of __builtin*printf_chk.
>
> * gcc.dg/pr104380.c: New test.
>
> --- gcc/config/rs6000/rs6000.cc.jj  2022-01-28 10:01:41.224837656 +0100
> +++ gcc/config/rs6000/rs6000.cc 2022-02-04 12:31:27.651715472 +0100
> @@ -28228,6 +28228,7 @@ rs6000_mangle_decl_assembler_name (tree
> {
>   size_t printf_len = strlen ("printf");
>   size_t scanf_len = strlen ("scanf");
> + size_t printf_chk_len = strlen ("printf_chk");
>
>   if (len >= printf_len
>   && strcmp (name + len - printf_len, "printf") == 0)
> @@ -28237,6 +28238,10 @@ rs6000_mangle_decl_assembler_name (tree
>&& strcmp (name + len - scanf_len, "scanf") == 0)
> newname = xasprintf ("__isoc99_%sieee128", name);
>
> + else if (len >= printf_chk_len
> +  && strcmp (name + len - printf_chk_len, "printf_chk") == 0)
> +   newname = xasprintf ("%sieee128", name);
> +
>   else if (name[len - 1] == 'l')
> {
>   bool uses_ieee128_p = false;
> --- gcc/testsuite/gcc.dg/pr104380.c.jj  2022-02-04 12:51:50.152643364 +0100
> +++ gcc/testsuite/gcc.dg/pr104380.c 2022-02-04 12:53:25.092317741 +0100
> @@ -0,0 +1,32 @@
> +/* PR target/104380 */
> +/* This test needs runtime that provides __*_chk functions.  */
> +/* { dg-do run { target *-*-linux* *-*-gnu* *-*-uclinux* } } */
> +/* { dg-options "-O2 -std=c99" } */
> +
> +#define FORTIFY_SOURCE 2
> +#include 
> +#include 
> +
> +static char buf[4096];
> +static char gfmt[] = "%Lg";
> +
> +static int __attribute__ ((noipa))
> +foo (char *str, const char *fmt, ...)
> +{
> +  int ret;
> +  va_list ap;
> +  va_start (ap, fmt);
> +  ret = vsnprintf (str, 4096, fmt, ap);
> +  va_end (ap);
> +  return ret;
> +}
> +
> +int
> +main ()
> +{
> +  long double dval = 128.0L;
> +  int ret = foo (buf, gfmt, dval);
> +  if (ret != 3 || __builtin_strcmp (buf, "128") != 0)
> +__builtin_abort ();
> +  return 0;
> +}
>
> Jakub
>


[PATCH] rs6000: Fix up -D_FORTIFY_SOURCE* with -mabi=ieeelongdouble [PR104380]

2022-02-04 Thread Jakub Jelinek via Gcc-patches
Hi!

The following testcase FAILs when configured with
--with-long-double-format=ieee .  Only happens in the -std=c* modes, not the
GNU modes; while the glibc headers have __asm redirects of
vsnprintf and __vsnprinf_chk to __vsnprintfieee128 and
__vsnprintf_chkieee128, the vsnprintf fortification extern inline gnu_inline
always_inline wrapper calls __builtin_vsnprintf_chk and we actually emit
a call to __vsnprinf_chk (i.e. with IBM extended long double) instead of
__vsnprintf_chkieee128.

rs6000_mangle_decl_assembler_name already had cases for *printf and *scanf,
so this just adds another case for *printf_chk.  *scanf_chk doesn't exist.
__ prefixing isn't done because *printf_chk already starts with __.

Bootstrapped/regtested on powerpc64le-linux, ok for trunk?

2022-02-04  Jakub Jelinek  

PR target/104380
* config/rs6000/rs6000.cc (rs6000_mangle_decl_assembler_name): Also
adjust mangling of __builtin*printf_chk.

* gcc.dg/pr104380.c: New test.

--- gcc/config/rs6000/rs6000.cc.jj  2022-01-28 10:01:41.224837656 +0100
+++ gcc/config/rs6000/rs6000.cc 2022-02-04 12:31:27.651715472 +0100
@@ -28228,6 +28228,7 @@ rs6000_mangle_decl_assembler_name (tree
{
  size_t printf_len = strlen ("printf");
  size_t scanf_len = strlen ("scanf");
+ size_t printf_chk_len = strlen ("printf_chk");
 
  if (len >= printf_len
  && strcmp (name + len - printf_len, "printf") == 0)
@@ -28237,6 +28238,10 @@ rs6000_mangle_decl_assembler_name (tree
   && strcmp (name + len - scanf_len, "scanf") == 0)
newname = xasprintf ("__isoc99_%sieee128", name);
 
+ else if (len >= printf_chk_len
+  && strcmp (name + len - printf_chk_len, "printf_chk") == 0)
+   newname = xasprintf ("%sieee128", name);
+
  else if (name[len - 1] == 'l')
{
  bool uses_ieee128_p = false;
--- gcc/testsuite/gcc.dg/pr104380.c.jj  2022-02-04 12:51:50.152643364 +0100
+++ gcc/testsuite/gcc.dg/pr104380.c 2022-02-04 12:53:25.092317741 +0100
@@ -0,0 +1,32 @@
+/* PR target/104380 */
+/* This test needs runtime that provides __*_chk functions.  */
+/* { dg-do run { target *-*-linux* *-*-gnu* *-*-uclinux* } } */
+/* { dg-options "-O2 -std=c99" } */
+
+#define FORTIFY_SOURCE 2
+#include 
+#include 
+
+static char buf[4096];
+static char gfmt[] = "%Lg";
+
+static int __attribute__ ((noipa))
+foo (char *str, const char *fmt, ...)
+{
+  int ret;
+  va_list ap;
+  va_start (ap, fmt);
+  ret = vsnprintf (str, 4096, fmt, ap);
+  va_end (ap);
+  return ret;
+}
+
+int
+main ()
+{
+  long double dval = 128.0L;
+  int ret = foo (buf, gfmt, dval);
+  if (ret != 3 || __builtin_strcmp (buf, "128") != 0)
+__builtin_abort ();
+  return 0;
+}

Jakub



[PATCH] analyzer: Fix tests for glibc 2.35 [PR101081]

2022-02-04 Thread Joel Teichroeb via Gcc-patches
In recent versions of glibc fopen has __attribute__((malloc)).
Since we can not detect wether this attribute is present or not,
we avoid including stdio.h and instead forward declare what we
need in each test.

Signed-off-by: Joel Teichroeb 
---
 gcc/testsuite/gcc.dg/analyzer/analyzer-verbosity-2a.c | 5 -
 gcc/testsuite/gcc.dg/analyzer/analyzer-verbosity-3a.c | 5 -
 gcc/testsuite/gcc.dg/analyzer/edges-1.c   | 5 -
 gcc/testsuite/gcc.dg/analyzer/file-1.c| 7 ++-
 gcc/testsuite/gcc.dg/analyzer/file-2.c| 5 -
 gcc/testsuite/gcc.dg/analyzer/file-paths-1.c  | 9 -
 gcc/testsuite/gcc.dg/analyzer/file-pr58237.c  | 8 +++-
 gcc/testsuite/gcc.dg/analyzer/pr99716-1.c | 9 +++--
 8 files changed, 44 insertions(+), 9 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/analyzer/analyzer-verbosity-2a.c 
b/gcc/testsuite/gcc.dg/analyzer/analyzer-verbosity-2a.c
index 9faf5da3a4f..cf014b0a3c8 100644
--- a/gcc/testsuite/gcc.dg/analyzer/analyzer-verbosity-2a.c
+++ b/gcc/testsuite/gcc.dg/analyzer/analyzer-verbosity-2a.c
@@ -1,6 +1,9 @@
 /* { dg-additional-options "-fanalyzer-verbosity=2" } */
 
-#include 
+typedef struct FILE   FILE;
+
+FILE* fopen (const char*, const char*);
+int   fclose (FILE*);
 
 extern int foo ();
 extern void bar ();
diff --git a/gcc/testsuite/gcc.dg/analyzer/analyzer-verbosity-3a.c 
b/gcc/testsuite/gcc.dg/analyzer/analyzer-verbosity-3a.c
index 1b2b7983624..b0ece203f56 100644
--- a/gcc/testsuite/gcc.dg/analyzer/analyzer-verbosity-3a.c
+++ b/gcc/testsuite/gcc.dg/analyzer/analyzer-verbosity-3a.c
@@ -1,6 +1,9 @@
 /* { dg-additional-options "-fanalyzer-verbosity=3" } */
 
-#include 
+typedef struct FILE   FILE;
+
+FILE* fopen (const char*, const char*);
+int   fclose (FILE*);
 
 extern int foo ();
 extern void bar ();
diff --git a/gcc/testsuite/gcc.dg/analyzer/edges-1.c 
b/gcc/testsuite/gcc.dg/analyzer/edges-1.c
index 6b53c05..f08a6143d59 100644
--- a/gcc/testsuite/gcc.dg/analyzer/edges-1.c
+++ b/gcc/testsuite/gcc.dg/analyzer/edges-1.c
@@ -1,4 +1,7 @@
-#include 
+typedef struct FILE   FILE;
+
+FILE* fopen (const char*, const char*);
+int   fclose (FILE*);
 
 extern int foo ();
 extern void bar ();
diff --git a/gcc/testsuite/gcc.dg/analyzer/file-1.c 
b/gcc/testsuite/gcc.dg/analyzer/file-1.c
index 0f4bc5aa7af..e8d934331fd 100644
--- a/gcc/testsuite/gcc.dg/analyzer/file-1.c
+++ b/gcc/testsuite/gcc.dg/analyzer/file-1.c
@@ -1,4 +1,9 @@
-#include 
+typedef struct FILE   FILE;
+
+FILE* fopen (const char*, const char*);
+int   fclose (FILE*);
+#define SEEK_SET0
+int fseek (FILE *, long int, int);
 
 void
 test_1 (const char *path)
diff --git a/gcc/testsuite/gcc.dg/analyzer/file-2.c 
b/gcc/testsuite/gcc.dg/analyzer/file-2.c
index 8d34c739084..9c58108a531 100644
--- a/gcc/testsuite/gcc.dg/analyzer/file-2.c
+++ b/gcc/testsuite/gcc.dg/analyzer/file-2.c
@@ -1,4 +1,7 @@
-#include 
+typedef struct FILE   FILE;
+
+FILE* fopen (const char*, const char*);
+int   fclose (FILE*);
 
 struct foo
 {
diff --git a/gcc/testsuite/gcc.dg/analyzer/file-paths-1.c 
b/gcc/testsuite/gcc.dg/analyzer/file-paths-1.c
index d346f7a7c9a..f35017835d4 100644
--- a/gcc/testsuite/gcc.dg/analyzer/file-paths-1.c
+++ b/gcc/testsuite/gcc.dg/analyzer/file-paths-1.c
@@ -1,6 +1,13 @@
 /* { dg-additional-options "-fanalyzer-verbosity=3" } */
 
-#include 
+typedef struct FILE   FILE;
+
+FILE* fopen (const char*, const char*);
+int   fclose (FILE*);
+char *fgets (char *, int, FILE *);
+
+#define NULL ((void *)0)
+
 
 /* Verify that we correctly emit CFG events in the face of buffers
being clobbered in these leak reports.  */
diff --git a/gcc/testsuite/gcc.dg/analyzer/file-pr58237.c 
b/gcc/testsuite/gcc.dg/analyzer/file-pr58237.c
index 68f49c25607..ecc7144198b 100644
--- a/gcc/testsuite/gcc.dg/analyzer/file-pr58237.c
+++ b/gcc/testsuite/gcc.dg/analyzer/file-pr58237.c
@@ -1,4 +1,10 @@
-#include 
+typedef struct FILE   FILE;
+
+FILE* fopen (const char*, const char*);
+int   fclose (FILE*);
+char *fgets (char *, int, FILE *);
+
+#define NULL ((void *)0)
 
 void f0(const char *str)
 {
diff --git a/gcc/testsuite/gcc.dg/analyzer/pr99716-1.c 
b/gcc/testsuite/gcc.dg/analyzer/pr99716-1.c
index 6720c3c198b..2ccdcc73a5c 100644
--- a/gcc/testsuite/gcc.dg/analyzer/pr99716-1.c
+++ b/gcc/testsuite/gcc.dg/analyzer/pr99716-1.c
@@ -1,5 +1,10 @@
-#include 
-#include 
+typedef struct FILE   FILE;
+
+FILE* fopen (const char*, const char*);
+int   fclose (FILE*);
+int fprintf (FILE *, const char *, ...);
+
+#define NULL ((void *)0)
 
 void
 test_1 (void)
-- 
2.35.1



Re: [committed] libgomp.fortran/allocate-1.f90: Minor cleanup (was: Re: [PATCH] [gfortran] Add support for allocate clause (OpenMP 5.0).)

2022-02-04 Thread Tobias Burnus

On 04.02.22 16:33, Thomas Schwinge wrote:

Maybe removed locally, I can't tell ;-) -- but it's still in the
commit that you pushed. See below.
Also, a commented-out '!$omp barrier'; not sure what that one is about.


I shall not do commits after one week of 6h+/day virtual OpenMP
Face2Face meeting.

Corrected with commit as shown in the attachment.

Tobias
-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955
commit f62156eab7b757d1ee03a11d5c96c72bd3de079c
Author: Tobias Burnus 
Date:   Fri Feb 4 17:31:21 2022 +0100

libgomp.fortran/allocate-1.f90: Fix minor cleanup

libgomp/ChangeLog:
* testsuite/libgomp.fortran/allocate-1.f90: Remove spurious
STOP of previous commit.

diff --git a/libgomp/testsuite/libgomp.fortran/allocate-1.f90 b/libgomp/testsuite/libgomp.fortran/allocate-1.f90
index 062278f9908..0a31d35d5ac 100644
--- a/libgomp/testsuite/libgomp.fortran/allocate-1.f90
+++ b/libgomp/testsuite/libgomp.fortran/allocate-1.f90
@@ -74,8 +74,6 @@ subroutine foo (x, p, q, h, fl)
   if (x /= 42) then
 stop 1
   end if
-
-  !!$omp barrier
   v(1) = 7
   if ( (and(fl, 2) /= 0) .and.  &
((is_64bit_aligned(x) == 0) .or. &
@@ -97,7 +95,6 @@ subroutine foo (x, p, q, h, fl)
 stop 4
   end if
   !$omp end parallel
-stop
   !$omp teams
   !$omp parallel private (y) firstprivate (x, w) allocate (h: x, y, w)
 


Re: [PATCH, V2] Use system default for long double if not specified on PowerPC.

2022-02-04 Thread Andreas Schwab
On Feb 04 2022, Michael Meissner via Gcc-patches wrote:

> If the user did not specify a default long double format when configuring
> GCC, use the long double default from the host compiler.

That doesn't make any sense.  The host compiler can be any random
compiler completely unrelated to the target.

-- 
Andreas Schwab, sch...@linux-m68k.org
GPG Key fingerprint = 7578 EB47 D4E5 4D69 2510  2552 DF73 E780 A9DA AEC1
"And now for something completely different."


Re: [committed] libgomp.fortran/allocate-1.f90: Minor cleanup (was: Re: [PATCH] [gfortran] Add support for allocate clause (OpenMP 5.0).)

2022-02-04 Thread Thomas Schwinge
Hi Tobias!

On 2022-02-04T14:57:07+0100, Tobias Burnus  wrote:
> On 04.02.22 10:37, Thomas Schwinge wrote:
>>> I have attached a patch (not commited), which silences the three kind of
>>> warnings and fixes the interface issue.
>>> TODO: commit it.
>> Still "TODO: commit it" ;-) -- and while I haven't reviewed the changes
>> in detail, I did spot one item that should be addressed, I suppose:
>
> I had also spotted the 'stop' which was a left over from -fsanitized=...
> checking and had removed it locally.

Maybe removed locally, I can't tell ;-) -- but it's still in the commit
that you pushed.  See below.

Also, a commented-out '!$omp barrier'; not sure what that one is about.

> But good that you also keep
> checking patches :-)

I try!  :-)


Grüße
 Thomas


> In any case, I have now _finally_ committed the patch.
>
> Attached is the simplified (-w) diff, where I did exclude the
> indentation changes to make the diff more readable.
>
> For the full diff, see e.g. https://gcc.gnu.org/r12-7053
>
> Tobias

> commit 6d4981350168f1eb3f72149bd7e05b9ba6bec1fd
> Author: Tobias Burnus 
> Date:   Fri Feb 4 14:51:01 2022 +0100
>
> libgomp.fortran/allocate-1.f90: Minor cleanup
>
> libgomp/ChangeLog:
> * testsuite/libgomp.fortran/allocate-1.c (is_64bit_aligned): 
> Renamed
> from is_64bit_aligned_.
> * testsuite/libgomp.fortran/allocate-1.f90: Fix interface decl
> and use it, more implicit none, remove unused argument.
>
> diff --git a/libgomp/testsuite/libgomp.fortran/allocate-1.c 
> b/libgomp/testsuite/libgomp.fortran/allocate-1.c
> index d33acc6feef..cb6d355afc6 100644
> --- a/libgomp/testsuite/libgomp.fortran/allocate-1.c
> +++ b/libgomp/testsuite/libgomp.fortran/allocate-1.c
> @@ -1,7 +1,7 @@
>  #include 
>
>  int
> -is_64bit_aligned_ (uintptr_t a)
> +is_64bit_aligned (uintptr_t a)
>  {
>return ( (a & 0x3f) == 0);
>  }
> diff --git a/libgomp/testsuite/libgomp.fortran/allocate-1.f90 
> b/libgomp/testsuite/libgomp.fortran/allocate-1.f90
> index 35d1750b878..062278f9908 100644
> --- a/libgomp/testsuite/libgomp.fortran/allocate-1.f90
> +++ b/libgomp/testsuite/libgomp.fortran/allocate-1.f90
> @@ -5,30 +5,30 @@
>  module m
>use omp_lib
>use iso_c_binding
> -  implicit none
> +  implicit none (type, external)
>
>interface
>  integer(c_int) function is_64bit_aligned (a) bind(C)
>import :: c_int
> -  integer  :: a
> +  type(*)  :: a
>  end
>end interface
> -end module m
>
> -subroutine foo (x, p, q, px, h, fl)
> +contains
> +
> +subroutine foo (x, p, q, h, fl)
>use omp_lib
>use iso_c_binding
>integer  :: x
>integer, dimension(4) :: p
>integer, dimension(4) :: q
> -  integer  :: px
>integer (kind=omp_allocator_handle_kind) :: h
>integer  :: fl
>
>integer  :: y
>integer  :: r, i, i1, i2, i3, i4, i5
>integer  :: l, l3, l4, l5, l6
> -  integer  :: n, n1, n2, n3, n4
> +  integer  :: n, n2, n3, n4
>integer  :: j2, j3, j4
>integer, dimension(4) :: l2
>integer, dimension(4) :: r2
> @@ -74,6 +74,8 @@ subroutine foo (x, p, q, px, h, fl)
>if (x /= 42) then
>  stop 1
>end if
> +
> +  !!$omp barrier
>v(1) = 7
>if ( (and(fl, 2) /= 0) .and.  &
> ((is_64bit_aligned(x) == 0) .or. &
> @@ -95,7 +97,7 @@ subroutine foo (x, p, q, px, h, fl)
>  stop 4
>end if
>!$omp end parallel
> -
> +stop
>!$omp teams
>!$omp parallel private (y) firstprivate (x, w) allocate (h: x, y, w)
>
> @@ -305,11 +307,13 @@ subroutine foo (x, p, q, px, h, fl)
>.or. r2(1) /= (5 * p(3)) .or. r2(4) /= (6 * p(3))) then
>  stop 25
>end if
> -
>  end subroutine
> +end module m
>
>  program main
>use omp_lib
> +  use m
> +  implicit none (type, external)
>integer, dimension(4) :: p
>integer, dimension(4) :: q
>
> @@ -323,11 +327,11 @@ program main
>if (a == omp_null_allocator) stop 1
>
>call omp_set_default_allocator (omp_default_mem_alloc);
> -  call foo (42, p, q, 2, a, 0);
> -  call foo (42, p, q, 2, omp_default_mem_alloc, 0);
> -  call foo (42, p, q, 2, a, 1);
> +  call foo (42, p, q, a, 0);
> +  call foo (42, p, q, omp_default_mem_alloc, 0);
> +  call foo (42, p, q, a, 1);
>call omp_set_default_allocator (a);
> -  call foo (42, p, q, 2, omp_null_allocator, 3);
> -  call foo (42, p, q, 2, omp_default_mem_alloc, 2);
> +  call foo (42, p, q, omp_null_allocator, 3);
> +  call foo (42, p, q, omp_default_mem_alloc, 2);
>call omp_destroy_allocator (a);
>  end
-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955


[PATCH, V2] Use system default for long double if not specified on PowerPC.

2022-02-04 Thread Michael Meissner via Gcc-patches
Use system default for long double unless it is overridden.

If the user did not specify a default long double format when configuring
GCC, use the long double default from the host compiler.

I tested this on the following systems.  There were no regressions:

*   Big endian Linux power8 using --with-cpu=power8
*   Little endian Linux power9 using --with-cpu=power9
*   Little endian Linux power10 using --with-cpu=power10
*   Little endian Fedora rawhide power10 using --with-cpu=power10 that the
default was changed to use IEEE 128-bit. I did not specify
--with-long-double-format=ieee and it correctly defaulted to IEEE.
*   I also built a compiler on the above Fedora rawhide system, explicitly
setting the type to IBM, and it did use the IBM format.

Can I check this into the master branch?  I also think this should be back
ported to GCC 11.  Can I do this also?


2022-02-04  Michael Meissner  

gcc/
* config/rs6000/rs6000.cc (TARGET_IEEEQUAD_DEFAULT): If the
host compiler defaults to IEEE 128-bit long double, make that the
default for this build unless it was overridden via the
--with-long-double-format= configuration option.
---
 gcc/config/rs6000/rs6000.cc | 9 -
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index 666dec694a8..0595855568c 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -91,10 +91,17 @@
  explicitly redefine TARGET_IEEEQUAD and TARGET_IEEEQUAD_DEFAULT to 0, so
  those systems will not pick up this default.  This needs to be after all
  of the include files, so that POWERPC_LINUX and POWERPC_FREEBSD are
- properly defined.  */
+ properly defined.  In addition, the --with-long-double-format
+ configuration option also sets TARGET_IEEEQUAD_DEFAULT.
+
+ If the host compiler uses IEEE 128-bit long doubles, make the default to
+ also use IEEE 128-bit long doubles unless the --with-long-double-format
+ configuration switch was used.  */
 #ifndef TARGET_IEEEQUAD_DEFAULT
 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
 #define TARGET_IEEEQUAD_DEFAULT 1
+#elif defined (__LONG_DOUBLE_IEEE128__)
+#define TARGET_IEEEQUAD_DEFAULT 1
 #else
 #define TARGET_IEEEQUAD_DEFAULT 0
 #endif
-- 
2.34.1


-- 
Michael Meissner, IBM
PO Box 98, Ayer, Massachusetts, USA, 01432
email: meiss...@linux.ibm.com


[committed] doc: Update references to "C++2a" in cpp.texi

2022-02-04 Thread Jonathan Wakely via Gcc-patches
Committed as obvious.


gcc/ChangeLog:

* doc/cpp.texi (Variadic Macros): Replace C++2a with C++20.
---
 gcc/doc/cpp.texi | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/gcc/doc/cpp.texi b/gcc/doc/cpp.texi
index d4290fefb41..90b2767e39a 100644
--- a/gcc/doc/cpp.texi
+++ b/gcc/doc/cpp.texi
@@ -1678,7 +1678,7 @@ This formulation looks more descriptive, but historically 
it was less
 flexible: you had to supply at least one argument after the format
 string.  In standard C, you could not omit the comma separating the
 named argument from the variable arguments.  (Note that this
-restriction has been lifted in C++2a, and never existed in GNU C; see
+restriction has been lifted in C++20, and never existed in GNU C; see
 below.)
 
 Furthermore, if you left the variable argument empty, you would have
@@ -1690,10 +1690,10 @@ eprintf("success!\n", );
  @expansion{} fprintf(stderr, "success!\n", );
 @end smallexample
 
-This has been fixed in C++2a, and GNU CPP also has a pair of
+This has been fixed in C++20, and GNU CPP also has a pair of
 extensions which deal with this problem.
 
-First, in GNU CPP, and in C++ beginning in C++2a, you are allowed to
+First, in GNU CPP, and in C++ beginning in C++20, you are allowed to
 leave the variable argument out entirely:
 
 @smallexample
@@ -1702,7 +1702,7 @@ eprintf ("success!\n")
 @end smallexample
 
 @noindent
-Second, C++2a introduces the @code{@w{__VA_OPT__}} function macro.
+Second, C++20 introduces the @code{@w{__VA_OPT__}} function macro.
 This macro may only appear in the definition of a variadic macro.  If
 the variable argument has any tokens, then a @code{@w{__VA_OPT__}}
 invocation expands to its argument; but if the variable argument does
-- 
2.34.1



[committed] libstdc++: Add suggestion to std::uncaught_exception() warning

2022-02-04 Thread Jonathan Wakely via Gcc-patches
Tested powerpc64le-linux, pushed to trunk.


We should use the SUGGEST macro for std::uncaught_exception()
deprecation warnings.

libstdc++-v3/ChangeLog:

* include/bits/allocator.h: Qualify std::allocator_traits in
deprecated warnings.
* libsupc++/exception (uncaught_exception): Add suggestion to
deprecated warning.
---
 libstdc++-v3/include/bits/allocator.h | 4 ++--
 libstdc++-v3/libsupc++/exception  | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/libstdc++-v3/include/bits/allocator.h 
b/libstdc++-v3/include/bits/allocator.h
index e0c322b5941..f7770165273 100644
--- a/libstdc++-v3/include/bits/allocator.h
+++ b/libstdc++-v3/include/bits/allocator.h
@@ -92,7 +92,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   using propagate_on_container_move_assignment = true_type;
 
   using is_always_equal
-   _GLIBCXX20_DEPRECATED_SUGGEST("allocator_traits::is_always_equal")
+   _GLIBCXX20_DEPRECATED_SUGGEST("std::allocator_traits::is_always_equal")
= true_type;
 
 #if __cplusplus >= 202002L
@@ -146,7 +146,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   using propagate_on_container_move_assignment = true_type;
 
   using is_always_equal
-   _GLIBCXX20_DEPRECATED_SUGGEST("allocator_traits::is_always_equal")
+   _GLIBCXX20_DEPRECATED_SUGGEST("std::allocator_traits::is_always_equal")
= true_type;
 #endif
 
diff --git a/libstdc++-v3/libsupc++/exception b/libstdc++-v3/libsupc++/exception
index a7d6b4b93ee..43f1cf71262 100644
--- a/libstdc++-v3/libsupc++/exception
+++ b/libstdc++-v3/libsupc++/exception
@@ -120,7 +120,7 @@ namespace std
*  %exception can result in a call of 1terminate()`
*  (15.5.1).'
*/
-  _GLIBCXX17_DEPRECATED
+  _GLIBCXX17_DEPRECATED_SUGGEST("std::uncaught_exceptions()")
   bool uncaught_exception() _GLIBCXX_USE_NOEXCEPT __attribute__ ((__pure__));
 
 #if __cplusplus >= 201703L || !defined(__STRICT_ANSI__) // c++17 or gnu++98
-- 
2.34.1



Re: [PATCH] configure: add --disable-fix-includes

2022-02-04 Thread Martin Liška

On 2/4/22 14:30, Jakub Jelinek via Gcc-patches wrote:

We don't ship any include-fixed headers in Fedora/RHEL.


Removing include-fixed from an installed folder, I see:

make[2]: Entering directory '/home/marxin/Programming/postgres/src/common'
gcc -Wall -Wmissing-prototypes -Wpointer-arith -Wdeclaration-after-statement -Werror=vla -Wendif-labels -Wmissing-format-attribute -Wimplicit-fallthrough=3 -Wcast-function-type -Wformat-security -fno-strict-aliasing -fwrapv 
-fexcess-precision=standard -Wno-format-truncation -Wno-stringop-truncation -O3 -march=native -flto=auto -DFRONTEND -I. -I../../src/common -I../../src/include  -D_GNU_SOURCE  -DVAL_CC="\"gcc\"" 
-DVAL_CPPFLAGS="\"-D_GNU_SOURCE\"" -DVAL_CFLAGS="\"-Wall -Wmissing-prototypes -Wpointer-arith -Wdeclaration-after-statement -Werror=vla -Wendif-labels -Wmissing-format-attribute -Wimplicit-fallthrough=3 
-Wcast-function-type -Wformat-security -fno-strict-aliasing -fwrapv -fexcess-precision=standard -Wno-format-truncation -Wno-stringop-truncation -O3 -march=native -flto=auto\"" -DVAL_CFLAGS_SL="\"-fPIC\"" 
-DVAL_LDFLAGS="\"-O3 -march=native -flto=auto -Wl,--as-needed -Wl,-rpath,'/usr/local/pgsql/lib64',--enable-new-dtags\"" -DVAL_LDFLAGS_EX="\"\"" -DVAL_LDFLAGS_SL="\"\"" 
-DVAL_LIBS="\"-lpgcommon -lpgport -lz -lreadline -lm \""  -c -o pg_lzcompress.o pg_lzcompress.c
In file included from pg_lzcompress.c:186:
/usr/include/limits.h:124:26: error: no include path in which to search for 
limits.h
  124 | # include_next 
  |  ^
pg_lzcompress.c:226:9: error: ‘INT_MAX’ undeclared here (not in a function)
  226 | INT_MAX,/* No upper 
limit on what we'll try to
  | ^~~
pg_lzcompress.c:189:1: note: ‘INT_MAX’ is defined in header ‘’; did you 
forget to ‘#include ’?
  188 | #include "common/pg_lzcompress.h"
  +++ |+#include 

How do you solve this in Fedora/RHEL?

Thanks,
Martin


Re: [PATCH] configure: add --disable-fix-includes

2022-02-04 Thread Martin Liška

On 2/4/22 14:07, Rainer Orth wrote:

Hi Martin,


It seems to me that fixincludes is hardy unused feature for nowadays header
files and so I'm suggesting a developer option that can skip the fixing.


please remember that there's a world beyond current-day Linux.


Sure! That's why I'm suggesting an option and not removing the machinery.




How is the feature used on other targets?


There are still quite a number of fixes on e.g. Solaris or macOS.  And
people are still building gcc on older OS versions for one reason or
another...


diff --git a/gcc/configure.ac b/gcc/configure.ac
index 1171c946e6e..6015e403aa9 100644
--- a/gcc/configure.ac
+++ b/gcc/configure.ac
@@ -842,6 +842,12 @@ gather_stats=`if test $enable_gather_detailed_mem_stats != 
no; then echo 1; else
   AC_DEFINE_UNQUOTED(GATHER_STATISTICS, $gather_stats,
   [Define to enable detailed memory allocation stats gathering.])
   
+AC_ARG_ENABLE(disable-fix-includes,

+[AS_HELP_STRING([--disable-fix-includes],


The beast is called fixincludes, no '-' or '_'.


+   [skip fixing of includes])], [],


Better say 'running fixincludes' for example rather than being
vague/obtuse.


Note using --disable-fixinclude effectively disables building of everything in 
the folder.
That's not what I want, it breaks GCC build.



The new options requires documenting in install.texi.

That said, I'm not sure this is really worth yet adding another option.
And how are developers supposed to know if they can safely use it or
not.


Sure.



Besides, have you actually run a regtest with that option?  I'm asking
because even on Ubuntu 20.04 fixincludes drops in it's own .
You need to check that dropping that is actually safe.


Let me try that.

Martin



Rainer





Re: [PATCH] configure: add --disable-fix-includes

2022-02-04 Thread Martin Liška

On 2/4/22 14:30, Jakub Jelinek via Gcc-patches wrote:

On Fri, Feb 04, 2022 at 02:25:17PM +0100, Richard Biener via Gcc-patches wrote:

Besides, have you actually run a regtest with that option?  I'm asking
because even on Ubuntu 20.04 fixincludes drops in it's own .
You need to check that dropping that is actually safe.


limits.h and syslimits.h are the only include-fixed we "ship" in our
packages.  Not sure if they are really required though.


We don't ship any include-fixed headers in Fedora/RHEL.
If there is an important need for fixing, it should be in Linux
distributions fixed by actually fixing the headers in their corresponding
packages, making modified copies of headers at gcc build time and overriding
through that headers from other packages that can change doesn't really work
properly when those headers ever change.  I know we could install the
fixincludes tool and trigger running it whenever any of such headers change
in other packages, but that is very expensive for very little gain.


I fully agree with the suggested approach! I've also noticed the script makes
modifications in comments (that are unnecessary), plus it effectively breaks
header files for things like:

$ diff /usr/lib64/gcc/aarch64-suse-linux/11/include-fixed/bits/unistd_ext.h 
/usr/aarch64-suse-linux/sys-root/usr/include/bits/unistd_ext.h

46c37
< # if __has_include ("__linux__/close_range.h")
---

# if __has_include ("linux/close_range.h")


I would suggest removing the fixed includes in openSUSE as well?

Martin



On other targets I understand fixincludes is much more important.

Jakub





Re: [PATCH] Speed up fixincludes.

2022-02-04 Thread Marek Polacek via Gcc-patches
On Fri, Feb 04, 2022 at 10:26:07AM +0100, Martin Liška wrote:
> On 2/3/22 22:51, Jakub Jelinek wrote:
> > On Thu, Feb 03, 2022 at 04:29:39PM -0500, Marek Polacek wrote:
> > > On Thu, Feb 03, 2022 at 10:13:36PM +0100, Martin Liška wrote:
> > > > On 2/3/22 19:44, Andreas Schwab wrote:
> > > > > On Feb 03 2022, Martin Liška wrote:
> > > > > 
> > > > > > +cd $LIB
> > > > > > +echo "$all_dirs" | xargs mkdir -p
> > > > > > +cd ..
> > > > > > +
> > > > > 
> > > > > $LIB always contains slashes.
> > > > > 
> > > > 
> > > > And what is the problem? You're too brief..
> > > 
> > > I guess his point is that if you do
> > > cd a/b/c/
> > > then
> > > cd ..
> > > will not get you back to where you started.  Perhaps you could use
> > > pushd/popd instead.
> > 
> > Or a subshell.
> > 
> > Jakub
> > 
> 
> I'm suggesting the following patch.
> 
> Ready to be installed?

LGTM.

> From 77bc388daf42d18334cb874407031fc49dbbaa67 Mon Sep 17 00:00:00 2001
> From: Martin Liska 
> Date: Fri, 4 Feb 2022 10:24:51 +0100
> Subject: [PATCH] fixincludes: Update pwd.
> 
> fixincludes/ChangeLog:
> 
>   * fixinc.in: Use cd OLDDIR instead of cd .. .
> ---
>  fixincludes/fixinc.in | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
> 
> diff --git a/fixincludes/fixinc.in b/fixincludes/fixinc.in
> index 0c3066452c6..0bd8027a554 100755
> --- a/fixincludes/fixinc.in
> +++ b/fixincludes/fixinc.in
> @@ -258,9 +258,10 @@ then echo "All directories (including links to 
> directories):"
>   echo $all_dirs
>  fi
>  
> +OLDDIR=`${PWDCMD}`
>  cd $LIB
>  echo "$all_dirs" | xargs mkdir -p
> -cd ..
> +cd ${OLDDIR}
>  
>  mkdir $LIB/root
>  
> -- 
> 2.35.1
> 


Marek



[PATCH] c++: Improve diagnostics for template args terminated with >= or >>= [PR104319]

2022-02-04 Thread Jakub Jelinek via Gcc-patches
Hi!

As mentioned in the PR, for C++98 we have diagnostics that expect
>> terminating template arguments to be a mistake for > > (C++11
said it has to be treated that way), while if user trying to spare the
spacebar doesn't separate > from following = or >> from following =,
the diagnostics is confusing, while clang suggests adding space in between.

The following patch does that for >= and >>= too.

For some strange reason the error recovery emits further errors,
not really sure what's going on because I overwrite the token->type
like the code does for the C++11 >> case or for the C++98 >> cases,
but at least the first error is nicer (well, for the C++98 nested
template case and >>= I need to overwrite it to > and so the = is lost,
so perhaps some follow-up errors are needed for that case).

Bootstrapped/regtested on powerpc64le-linux, ok for trunk?
Or shall it wait for GCC 13?

2022-02-04  Jakub Jelinek  

PR c++/104319
* parser.cc (cp_parser_template_argument): Treat >= like C++98 >>
after a type id by setting maybe_type_id and aborting tentative
parse.
(cp_parser_enclosed_template_argument_list): Handle
CPP_GREATER_EQ like misspelled CPP_GREATER CPP_RQ and
CPP_RSHIFT_EQ like misspelled CPP_GREATER CPP_GREATER_EQ
or CPP_RSHIFT CPP_EQ or CPP_GREATER CPP_GREATER CPP_EQ.
(cp_parser_next_token_ends_template_argument_p): Return true
also for CPP_GREATER_EQ and CPP_RSHIFT_EQ.

* g++.dg/parse/template28.C: Adjust expected diagnostics.
* g++.dg/parse/template30.C: New test.

--- gcc/cp/parser.cc.jj 2022-02-04 14:36:54.765608651 +0100
+++ gcc/cp/parser.cc2022-02-04 14:42:14.761139259 +0100
@@ -18820,8 +18820,13 @@ cp_parser_template_argument (cp_parser*
  In C++0x, the '>>' will be considered two separate '>'
  tokens.  */
   if (!cp_parser_error_occurred (parser)
-  && cxx_dialect == cxx98
-  && cp_lexer_next_token_is (parser->lexer, CPP_RSHIFT))
+  && ((cxx_dialect == cxx98
+  && cp_lexer_next_token_is (parser->lexer, CPP_RSHIFT))
+ /* Similarly for >= which
+cp_parser_next_token_ends_template_argument_p treats for
+diagnostics purposes as mistyped > =, but can be valid
+after a type-id.  */
+ || cp_lexer_next_token_is (parser->lexer, CPP_GREATER_EQ)))
 {
   maybe_type_id = true;
   cp_parser_abort_tentative_parse (parser);
@@ -32029,7 +32034,9 @@ cp_parser_enclosed_template_argument_lis
   cp_evaluated ev;
   /* Parse the template-argument-list itself.  */
   if (cp_lexer_next_token_is (parser->lexer, CPP_GREATER)
-  || cp_lexer_next_token_is (parser->lexer, CPP_RSHIFT))
+  || cp_lexer_next_token_is (parser->lexer, CPP_RSHIFT)
+  || cp_lexer_next_token_is (parser->lexer, CPP_GREATER_EQ)
+  || cp_lexer_next_token_is (parser->lexer, CPP_RSHIFT_EQ))
 arguments = NULL_TREE;
   else
 arguments = cp_parser_template_argument_list (parser);
@@ -32086,6 +32093,38 @@ cp_parser_enclosed_template_argument_lis
"a template argument list");
}
 }
+  /* Similarly for >>= and >=.  */
+  else if (cp_lexer_next_token_is (parser->lexer, CPP_GREATER_EQ)
+  || cp_lexer_next_token_is (parser->lexer, CPP_RSHIFT_EQ))
+{
+  cp_token *token = cp_lexer_consume_token (parser->lexer);
+  gcc_rich_location richloc (token->location);
+  enum cpp_ttype new_type;
+  const char *replacement;
+  if (token->type == CPP_GREATER_EQ)
+   {
+ replacement = "> =";
+ new_type = CPP_EQ;
+   }
+  else if (!saved_greater_than_is_operator_p)
+   {
+ if (cxx_dialect != cxx98)
+   replacement = ">> =";
+ else
+   replacement = "> > =";
+ new_type = CPP_GREATER;
+   }
+  else
+   {
+ replacement = "> >=";
+ new_type = CPP_GREATER_EQ;
+   }
+  richloc.add_fixit_replace (replacement);
+  error_at (, "%qs should be %qs to terminate a template "
+   "argument list",
+   cpp_type2name (token->type, token->flags), replacement);
+  token->type = new_type;
+}
   else
 cp_parser_require_end_of_template_parameter_list (parser);
   /* The `>' token might be a greater-than operator again now.  */
@@ -33163,7 +33202,11 @@ cp_parser_next_token_ends_template_argum
   return (token->type == CPP_COMMA
   || token->type == CPP_GREATER
   || token->type == CPP_ELLIPSIS
- || ((cxx_dialect != cxx98) && token->type == CPP_RSHIFT));
+ || ((cxx_dialect != cxx98) && token->type == CPP_RSHIFT)
+ /* For better diagnostics, treat >>= like that too, that
+shouldn't appear non-nested in template arguments.  */
+ || token->type == CPP_GREATER_EQ
+ || token->type == CPP_RSHIFT_EQ);
 }
 
 /* Returns TRUE iff the n-th token is a "<", or the n-th is a "[" and the
--- 

Re: [PATCH 1/4][RFC] middle-end/90348 - add explicit birth

2022-02-04 Thread Richard Biener via Gcc-patches
On Fri, 4 Feb 2022, Jakub Jelinek wrote:

> On Fri, Feb 04, 2022 at 02:49:13PM +0100, Richard Biener wrote:
> > Any comments?  I have mixed feelings with proposing this for GCC 12
> > but like to hear from others as well.  I didn't try to evaluate
> > the quality of stack slot sharing before/after this change besides
> > fixing the testsuite fallout (we have a few testcases checking for
> > specific instances).
> 
> I have mixed feelings too, it is quite risky, on the other side we have
> those numerous otherwise unsolvable PRs.

Yep - it seems it's always stage3/4 when we get to those.  That said,
I'm happy to wait for stage1 and I'm also happy to revert if issues
pop up.

> I wonder if tree-ssa-live.cc (compute_live_vars_1) doesn't need similar
> changes, after all, it is a variant of the cfgexpand algorithm.

Oh, I wasn't aware of that.  It seems it's only used by tail-recursion
and inlining (there for inserting CLOBBERs).  But yes, I think it
might suffer from the same issue.

> And I'll certainly need to incrementally mark most if not all current
> build_clobbers in omp-low.cc as EOLs and emit birth clobbers (stuff that is
> added post gimplification, so too late for gimplification's added birth/eol
> handling there and so it must be done manually).

Adding testcases for intended stack slot sharing in those cases would
be nice.

Richard.


Re: [PATCH 1/4][RFC] middle-end/90348 - add explicit birth

2022-02-04 Thread Jakub Jelinek via Gcc-patches
On Fri, Feb 04, 2022 at 02:49:13PM +0100, Richard Biener wrote:
> Any comments?  I have mixed feelings with proposing this for GCC 12
> but like to hear from others as well.  I didn't try to evaluate
> the quality of stack slot sharing before/after this change besides
> fixing the testsuite fallout (we have a few testcases checking for
> specific instances).

I have mixed feelings too, it is quite risky, on the other side we have
those numerous otherwise unsolvable PRs.

I wonder if tree-ssa-live.cc (compute_live_vars_1) doesn't need similar
changes, after all, it is a variant of the cfgexpand algorithm.

And I'll certainly need to incrementally mark most if not all current
build_clobbers in omp-low.cc as EOLs and emit birth clobbers (stuff that is
added post gimplification, so too late for gimplification's added birth/eol
handling there and so it must be done manually).

Jakub



[committed] libgomp.fortran/allocate-1.f90: Minor cleanup (was: Re: [PATCH] [gfortran] Add support for allocate clause (OpenMP 5.0).)

2022-02-04 Thread Tobias Burnus

Hi Thomas,

On 04.02.22 10:37, Thomas Schwinge wrote:

I have attached a patch (not commited), which silences the three kind of
warnings and fixes the interface issue.
TODO: commit it.

Still "TODO: commit it" ;-) -- and while I haven't reviewed the changes
in detail, I did spot one item that should be addressed, I suppose:


I had also spotted the 'stop' which was a left over from -fsanitized=...
checking and had removed it locally. But good that you also keep
checking patches :-)

In any case, I have now _finally_ committed the patch.

Attached is the simplified (-w) diff, where I did exclude the
indentation changes to make the diff more readable.

For the full diff, see e.g. https://gcc.gnu.org/r12-7053

Tobias
-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955
commit 6d4981350168f1eb3f72149bd7e05b9ba6bec1fd
Author: Tobias Burnus 
Date:   Fri Feb 4 14:51:01 2022 +0100

libgomp.fortran/allocate-1.f90: Minor cleanup

libgomp/ChangeLog:
* testsuite/libgomp.fortran/allocate-1.c (is_64bit_aligned): Renamed
from is_64bit_aligned_.
* testsuite/libgomp.fortran/allocate-1.f90: Fix interface decl
and use it, more implicit none, remove unused argument.

diff --git a/libgomp/testsuite/libgomp.fortran/allocate-1.c b/libgomp/testsuite/libgomp.fortran/allocate-1.c
index d33acc6feef..cb6d355afc6 100644
--- a/libgomp/testsuite/libgomp.fortran/allocate-1.c
+++ b/libgomp/testsuite/libgomp.fortran/allocate-1.c
@@ -1,7 +1,7 @@
 #include 
 
 int
-is_64bit_aligned_ (uintptr_t a)
+is_64bit_aligned (uintptr_t a)
 {
   return ( (a & 0x3f) == 0);
 }
diff --git a/libgomp/testsuite/libgomp.fortran/allocate-1.f90 b/libgomp/testsuite/libgomp.fortran/allocate-1.f90
index 35d1750b878..062278f9908 100644
--- a/libgomp/testsuite/libgomp.fortran/allocate-1.f90
+++ b/libgomp/testsuite/libgomp.fortran/allocate-1.f90
@@ -5,30 +5,30 @@
 module m
   use omp_lib
   use iso_c_binding
-  implicit none
+  implicit none (type, external)
 
   interface
 integer(c_int) function is_64bit_aligned (a) bind(C)
   import :: c_int
-  integer  :: a
+  type(*)  :: a
 end
   end interface
-end module m
 
-subroutine foo (x, p, q, px, h, fl)
+contains
+
+subroutine foo (x, p, q, h, fl)
   use omp_lib
   use iso_c_binding
   integer  :: x
   integer, dimension(4) :: p
   integer, dimension(4) :: q
-  integer  :: px
   integer (kind=omp_allocator_handle_kind) :: h
   integer  :: fl
 
   integer  :: y
   integer  :: r, i, i1, i2, i3, i4, i5
   integer  :: l, l3, l4, l5, l6
-  integer  :: n, n1, n2, n3, n4
+  integer  :: n, n2, n3, n4
   integer  :: j2, j3, j4
   integer, dimension(4) :: l2
   integer, dimension(4) :: r2
@@ -74,6 +74,8 @@ subroutine foo (x, p, q, px, h, fl)
   if (x /= 42) then
 stop 1
   end if
+
+  !!$omp barrier
   v(1) = 7
   if ( (and(fl, 2) /= 0) .and.  &
((is_64bit_aligned(x) == 0) .or. &
@@ -95,7 +97,7 @@ subroutine foo (x, p, q, px, h, fl)
 stop 4
   end if
   !$omp end parallel
-
+stop
   !$omp teams
   !$omp parallel private (y) firstprivate (x, w) allocate (h: x, y, w)
 
@@ -305,11 +307,13 @@ subroutine foo (x, p, q, px, h, fl)
   .or. r2(1) /= (5 * p(3)) .or. r2(4) /= (6 * p(3))) then
 stop 25
   end if
-
 end subroutine
+end module m
 
 program main
   use omp_lib
+  use m
+  implicit none (type, external)
   integer, dimension(4) :: p
   integer, dimension(4) :: q
 
@@ -323,11 +327,11 @@ program main
   if (a == omp_null_allocator) stop 1
 
   call omp_set_default_allocator (omp_default_mem_alloc);
-  call foo (42, p, q, 2, a, 0);
-  call foo (42, p, q, 2, omp_default_mem_alloc, 0);
-  call foo (42, p, q, 2, a, 1);
+  call foo (42, p, q, a, 0);
+  call foo (42, p, q, omp_default_mem_alloc, 0);
+  call foo (42, p, q, a, 1);
   call omp_set_default_allocator (a);
-  call foo (42, p, q, 2, omp_null_allocator, 3);
-  call foo (42, p, q, 2, omp_default_mem_alloc, 2);
+  call foo (42, p, q, omp_null_allocator, 3);
+  call foo (42, p, q, omp_default_mem_alloc, 2);
   call omp_destroy_allocator (a);
 end


[PATCH 4/4] Optimization passes adjustments for birth CLOBBERs

2022-02-04 Thread Richard Biener via Gcc-patches
Since stack slot sharing now only works when RTL expansion
sees the point of birth of variables explicitely marked
we have to insert those markers during optimization.

One case is when CCP simplifies a VLA allocation done
with __builtin_stack_{save,restore} to a decl with constant size.
There we already place the proper end-of-life CLOBBERs and the
patch changes us to also emit birth CLOBBERs.  gcc.dg/pr51491.c
is where the effect is visible.

A similar case happens when inlining produces declarations for
arguments and result variables.  This is visible in
g++.dg/opt/pr81715.C for example.

2022-02-02  Richard Biener  

* tree-inline.cc (expand_call_inline): Also insert birth
CLOBBERs for parameter and return declarations.
* tree-ssa-ccp.c (insert_clobbers_for_var): Also insert
birth CLOBBERs.

* gcc.dg/pr51491-2.c: Adjust.
---
 gcc/testsuite/gcc.dg/pr51491-2.c |  3 ++-
 gcc/tree-inline.cc   | 27 ++-
 gcc/tree-ssa-ccp.cc  |  5 +
 3 files changed, 33 insertions(+), 2 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/pr51491-2.c b/gcc/testsuite/gcc.dg/pr51491-2.c
index 429ee4e5914..910be3ddeec 100644
--- a/gcc/testsuite/gcc.dg/pr51491-2.c
+++ b/gcc/testsuite/gcc.dg/pr51491-2.c
@@ -31,4 +31,5 @@ f (int n)
   return tt;
 }
 
-/* { dg-final { scan-tree-dump-times "CLOBBER" 2 "ccp1"} } */
+/* There is one redundant birth before the a[4] due to a DECL_EXPR + BIND.  */
+/* { dg-final { scan-tree-dump-times "CLOBBER" 5 "ccp1"} } */
diff --git a/gcc/tree-inline.cc b/gcc/tree-inline.cc
index ca66a8266b1..cb825077bde 100644
--- a/gcc/tree-inline.cc
+++ b/gcc/tree-inline.cc
@@ -4767,7 +4767,7 @@ expand_call_inline (basic_block bb, gimple *stmt, 
copy_body_data *id,
   cgraph_inline_failed_t reason;
   basic_block return_block;
   edge e;
-  gimple_stmt_iterator gsi, stmt_gsi;
+  gimple_stmt_iterator gsi, stmt_gsi, birth_gsi;
   bool successfully_inlined = false;
   bool purge_dead_abnormal_edges;
   gcall *call_stmt;
@@ -4924,6 +4924,7 @@ expand_call_inline (basic_block bb, gimple *stmt, 
copy_body_data *id,
   /* Split the block before the GIMPLE_CALL.  */
   stmt_gsi = gsi_for_stmt (stmt);
   gsi_prev (_gsi);
+  birth_gsi = stmt_gsi;
   e = split_block (bb, gsi_end_p (stmt_gsi) ? NULL : gsi_stmt (stmt_gsi));
   bb = e->src;
   return_block = e->dest;
@@ -5143,6 +5144,18 @@ expand_call_inline (basic_block bb, gimple *stmt, 
copy_body_data *id,
  clobber_stmt = gimple_build_assign (*varp, clobber);
  gimple_set_location (clobber_stmt, gimple_location (stmt));
  gsi_insert_before (_gsi, clobber_stmt, GSI_SAME_STMT);
+
+ clobber = build_clobber (TREE_TYPE (*varp), CLOBBER_BIRTH);
+ clobber_stmt = gimple_build_assign (*varp, clobber);
+ gimple_set_location (clobber_stmt, gimple_location (stmt));
+ if (gsi_end_p (birth_gsi))
+   {
+ birth_gsi = gsi_start_bb (gsi_bb (birth_gsi));
+ gsi_insert_before (_gsi, clobber_stmt, GSI_NEW_STMT);
+   }
+ else
+   gsi_insert_after (_gsi, clobber_stmt,
+ GSI_CONTINUE_LINKING);
}
}
 
@@ -5212,6 +5225,18 @@ expand_call_inline (basic_block bb, gimple *stmt, 
copy_body_data *id,
  clobber_stmt = gimple_build_assign (id->retvar, clobber);
  gimple_set_location (clobber_stmt, gimple_location (old_stmt));
  gsi_insert_after (_gsi, clobber_stmt, GSI_SAME_STMT);
+
+ clobber = build_clobber (TREE_TYPE (id->retvar), CLOBBER_BIRTH);
+ clobber_stmt = gimple_build_assign (id->retvar, clobber);
+ gimple_set_location (clobber_stmt, gimple_location (call_stmt));
+ if (gsi_end_p (birth_gsi))
+   {
+ birth_gsi = gsi_start_bb (gsi_bb (birth_gsi));
+ gsi_insert_before (_gsi, clobber_stmt, GSI_NEW_STMT);
+   }
+ else
+   gsi_insert_after (_gsi, clobber_stmt,
+ GSI_CONTINUE_LINKING);
}
 }
   else
diff --git a/gcc/tree-ssa-ccp.cc b/gcc/tree-ssa-ccp.cc
index 9164efe3037..77910715755 100644
--- a/gcc/tree-ssa-ccp.cc
+++ b/gcc/tree-ssa-ccp.cc
@@ -2573,6 +2573,11 @@ insert_clobbers_for_var (gimple_stmt_iterator i, tree 
var)
   if (saved_val == NULL_TREE)
continue;
 
+  /* Place a birth after the stack-save.  */
+  tree clobber = build_clobber (TREE_TYPE (var), CLOBBER_BIRTH);
+  gimple *clobber_stmt = gimple_build_assign (var, clobber);
+  gsi_insert_after (, clobber_stmt, GSI_SAME_STMT);
+
   insert_clobber_before_stack_restore (saved_val, var, );
   break;
 }
-- 
2.34.1


[PATCH 3/4] Diagnostic passes adjustments

2022-02-04 Thread Richard Biener via Gcc-patches
This adjusts diagnostic passes for the birth CLOBBERs where
necessary.  In particular the uninit diagnostics relies on particular
shaped IL to simplify the expression printed (to be cleaned up independently)
in gcc.dg/pr86058.c.

2022-02-02  Richard Biener  

* tree-ssa-uninit.cc (check_defs_data::found_full_clobber):
New member.
(check_defs): Set it.
(maybe_warn_operand): Use it to treat expression simplification
the same way as when the function entry was reached.
---
 gcc/tree-ssa-uninit.cc | 11 ++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/gcc/tree-ssa-uninit.cc b/gcc/tree-ssa-uninit.cc
index 02e88d58e1f..f2b15439113 100644
--- a/gcc/tree-ssa-uninit.cc
+++ b/gcc/tree-ssa-uninit.cc
@@ -319,6 +319,8 @@ struct check_defs_data
 {
   /* If we found any may-defs besides must-def clobbers.  */
   bool found_may_defs;
+  /* If we found a GIMPLE clobber that made the whole ref undefined.  */
+  bool found_full_clobber;
 };
 
 /* Return true if STMT is a call to built-in function all of whose
@@ -501,7 +503,10 @@ check_defs (ao_ref *ref, tree vdef, void *data_)
   if (gimple_clobber_p (def_stmt))
 {
   if (stmt_kills_ref_p (def_stmt, ref))
-   return true;
+   {
+ data->found_full_clobber = true;
+ return true;
+   }
   return false;
 }
 
@@ -601,6 +606,7 @@ maybe_warn_operand (ao_ref , gimple *stmt, tree lhs, 
tree rhs,
   check_defs_data data;
   bool fentry_reached = false;
   data.found_may_defs = false;
+  data.found_full_clobber = false;
   tree use = gimple_vuse (stmt);
   if (!use)
 return NULL_TREE;
@@ -666,7 +672,10 @@ maybe_warn_operand (ao_ref , gimple *stmt, tree lhs, 
tree rhs,
  if (tree ba = get_base_address (base))
base = ba;
}
+}
 
+  if (fentry_reached || data.found_full_clobber)
+{
   /* Replace the RHS expression with BASE so that it
 refers to it in the diagnostic (instead of to
 '').  */
-- 
2.34.1



[PATCH 2/4] Testcase adjustments for birth CLOBBERs

2022-02-04 Thread Richard Biener via Gcc-patches
Mostly decl shuffling, using patterns to make the scans less
error-prone and adjustments for now appearing birth CLOBBERs.

2022-02-02  Richard Biener  

* gcc.dg/pr87052.c: Adjust.
* gcc.dg/tm/memopt-3.c: Likewise.
* gcc.dg/torture/pta-ptrarith-1.c: Likewise.
* gcc.dg/torture/pta-ptrarith-2.c: Likewise.
* gcc.dg/tree-ssa/20031015-1.c: Likewise.
* gcc.dg/tree-ssa/alias-19.c: Likewise.
* gcc.dg/tree-ssa/dse-points-to.c: Likewise.
* gcc.dg/tree-ssa/pta-callused.c: Likewise.
---
 gcc/testsuite/gcc.dg/pr87052.c| 3 ++-
 gcc/testsuite/gcc.dg/tm/memopt-3.c| 2 +-
 gcc/testsuite/gcc.dg/torture/pta-ptrarith-1.c | 2 +-
 gcc/testsuite/gcc.dg/torture/pta-ptrarith-2.c | 2 +-
 gcc/testsuite/gcc.dg/tree-ssa/20031015-1.c| 6 --
 gcc/testsuite/gcc.dg/tree-ssa/alias-19.c  | 2 +-
 gcc/testsuite/gcc.dg/tree-ssa/dse-points-to.c | 2 +-
 gcc/testsuite/gcc.dg/tree-ssa/pta-callused.c  | 2 +-
 8 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/pr87052.c b/gcc/testsuite/gcc.dg/pr87052.c
index 18e092c4674..c67d9ae02f8 100644
--- a/gcc/testsuite/gcc.dg/pr87052.c
+++ b/gcc/testsuite/gcc.dg/pr87052.c
@@ -37,5 +37,6 @@ void test (void)
{ dg-final { scan-tree-dump-times "b = \"ax00bc\";"  1 "gimple" } }
{ dg-final { scan-tree-dump-times "c = \"\";"  1 "gimple" } }
{ dg-final { scan-tree-dump-times "d = { *};"  1 "gimple" } }
-   { dg-final { scan-tree-dump-times "e = "  1 "gimple" } }
+   { dg-final { scan-tree-dump-times "e = "  3 "gimple" } }
+   { dg-final { scan-tree-dump-times "e = {CLOBBER\\(birth\\)}"  2 "gimple" } }
{ dg-final { scan-tree-dump-times "e = {CLOBBER\\(eol\\)}"  1 "gimple" } }  
*/
diff --git a/gcc/testsuite/gcc.dg/tm/memopt-3.c 
b/gcc/testsuite/gcc.dg/tm/memopt-3.c
index 5316f9cae20..299494a1d2b 100644
--- a/gcc/testsuite/gcc.dg/tm/memopt-3.c
+++ b/gcc/testsuite/gcc.dg/tm/memopt-3.c
@@ -17,4 +17,4 @@ int f()
   return lala.x[0];
 }
 
-/* { dg-final { scan-tree-dump-times "logging: lala.x\\\[i_4\\\]" 1 "tmmark" } 
} */
+/* { dg-final { scan-tree-dump-times "logging: lala.x\\\[i_\[0-9\]+\\\]" 1 
"tmmark" } } */
diff --git a/gcc/testsuite/gcc.dg/torture/pta-ptrarith-1.c 
b/gcc/testsuite/gcc.dg/torture/pta-ptrarith-1.c
index 85b68068b12..4e27b12 100644
--- a/gcc/testsuite/gcc.dg/torture/pta-ptrarith-1.c
+++ b/gcc/testsuite/gcc.dg/torture/pta-ptrarith-1.c
@@ -32,4 +32,4 @@ int main()
   return 0;
 }
 
-/* { dg-final { scan-tree-dump "ESCAPED = {\[^\n\}\]* i f \[^\n\}\]*}" "alias" 
} } */
+/* { dg-final { scan-tree-dump "ESCAPED = {\[^\n\}\]* (i f|f i) \[^\n\}\]*}" 
"alias" } } */
diff --git a/gcc/testsuite/gcc.dg/torture/pta-ptrarith-2.c 
b/gcc/testsuite/gcc.dg/torture/pta-ptrarith-2.c
index 4f5556acc93..5083d6367fa 100644
--- a/gcc/testsuite/gcc.dg/torture/pta-ptrarith-2.c
+++ b/gcc/testsuite/gcc.dg/torture/pta-ptrarith-2.c
@@ -32,4 +32,4 @@ int main()
 
 /* In theory = { i } is the correct solution.  But it's not easy to scan
for that reliably, so just use what we create now.  */
-/* { dg-final { scan-tree-dump "= { i j }" "alias" } } */
+/* { dg-final { scan-tree-dump "= { (i j|j i) }" "alias" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/20031015-1.c 
b/gcc/testsuite/gcc.dg/tree-ssa/20031015-1.c
index faa6853f571..b97b4475132 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/20031015-1.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/20031015-1.c
@@ -13,5 +13,7 @@ main(void)
   return 0;
 }
 
-/* The VDEF comes from the initial assignment, the asm, and the clobber.  */
-/* { dg-final { scan-tree-dump-times "DEF" 3 "alias" } } */
+/* The VDEF comes from the birth clobber, initial assignment, the asm, and
+   the clobber.
+   ???  The birth clobber is duplicate.  */
+/* { dg-final { scan-tree-dump-times "DEF" 5 "alias" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/alias-19.c 
b/gcc/testsuite/gcc.dg/tree-ssa/alias-19.c
index 330ec001705..15f44a1c284 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/alias-19.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/alias-19.c
@@ -25,4 +25,4 @@ int main()
   return 0;
 }
 
-/* { dg-final { scan-tree-dump "q_. = { a b }" "alias" } } */
+/* { dg-final { scan-tree-dump "q_. = { (a b|b a) }" "alias" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/dse-points-to.c 
b/gcc/testsuite/gcc.dg/tree-ssa/dse-points-to.c
index 762d6720143..fced53fe50b 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/dse-points-to.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/dse-points-to.c
@@ -11,4 +11,4 @@ f ()
   return a;
 }
 
-/* { dg-final { scan-tree-dump-times "Deleted dead store.*p_1" 1 "dse1"} } */
+/* { dg-final { scan-tree-dump-times "Deleted dead store: \\*p_\[0-9\]+ = 1" 1 
"dse1"} } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pta-callused.c 
b/gcc/testsuite/gcc.dg/tree-ssa/pta-callused.c
index b9a57d8d135..45826415d56 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/pta-callused.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pta-callused.c
@@ -22,5 +22,5 @@ int bar (int b)
   return *foo 

[PATCH 1/4][RFC] middle-end/90348 - add explicit birth

2022-02-04 Thread Richard Biener via Gcc-patches
This adds explicit variable birth CLOBBERs in an attempt to fix
PR90348 and duplicates.  The birth / death CLOBBER pairs are
used to compute liveness and conflicts for stack variable
coalescing where the lack of an explicit birth but instead
use of first mention causes misrepresentation of variable life
ranges when optimization moves the first mention upwards the
original birth point at the variables bind expression start.

Birth CLOBBERs are represented as traditional CLOBBERs with all
the accompaning effect on optimization.  While they do not serve
as a barrier for address mentions they act as barrier for the
actual accesses which is enough for determining conflicts in
the context of stack slot sharing.  The birth CLOBBERs are
distinguished from death CLOBBERs by setting CLOBBER_MARKS_BIRTH
using the private_flag on the CONSTRUCTOR node and amend the
CLOBBER_MARK_EOL marked clobbers introduced earlier.

The patch changes how we handle stack variables that are not marked
with CLOBBERs.  For those the first mention started live which then
lasted upon function exit which means effectively all not marked
variables conflicted with each other variable.  This property is best
represented by an extra flag rather than the full conflict bitmap
which is what the patch introduces with conflicts_represented which
is cleared at start and set to true once we visit the first birth
CLOBBER.  From that on we assume all variable accesses are properly
fenced by birth/death CLOBBERs.

Variables without explicit births will not take part in stack slot
sharing after this change.

Currently birth CLOBBERs are added when gimplification adds
corresponding end-of-life CLOBBERs, during bind and target expression
gimplification.  Generally inserting births at DECL_EXPRs is
more precise so we also do it there (also noting not all such variables
are mentioned in BINDs).  Avoiding redundant births is on the TOOD
list and might also remove the need for the warn_switch_unreachable_r
hunk.

This is the meat of the PR90348 fix, the following 3 patches perform
testcase adjustments and followup fixes to avoid regressions.

Bootstrapped on x86_64-unknown-linux-gnu - re-testing in progress.

Any comments?  I have mixed feelings with proposing this for GCC 12
but like to hear from others as well.  I didn't try to evaluate
the quality of stack slot sharing before/after this change besides
fixing the testsuite fallout (we have a few testcases checking for
specific instances).

Thanks,
Richard.

2022-02-01  Richard Biener  

PR middle-end/90348
PR middle-end/103006
* tree-core.h (clobber_kind): Add CLOBBER_BIRTH.
* gimplify.cc (gimplify_bind_expr): Also add birth CLOBBERs.
(gimplify_target_expr): Likewise.
(gimplify_decl_expr): Likewise.
(warn_switch_unreachable_r): Do not treat birth CLOBBERs as
real stmt - they get added at BIND starts but that's before
the case labels.
* tree-pretty-print.cc (dump_generic_node): Mark birth CLOBBERs.
* cfgexpand.cc (stack_var::conflicts_represented): New.
(add_stack_var): Initialize conflicts_represented.
(add_stack_var_conflict): Assert the conflicts for the
vars are represented.
(stack_var_conflict_p): Honor conflicts_represented flag.
(visit_op): Remove.
(visit_conflict): Likewise.
(add_scope_conflicts_1): Simplify by only considering birth
and death CLOBBERs.
(add_scope_conflicts): Adjust comment.
(add_stack_protection_conflicts): Only add conflicts for
variables that have them represented.

* gcc.dg/torture/pr103006-1.c: New testcase.
* gcc.dg/torture/pr103006-2.c: Likewise.
* gcc.dg/torture/pr90348.c: Likewise.
---
 gcc/cfgexpand.cc  | 157 +-
 gcc/gimplify.cc   |  83 +++-
 gcc/testsuite/gcc.dg/torture/pr103006-1.c |  27 
 gcc/testsuite/gcc.dg/torture/pr103006-2.c |  29 
 gcc/testsuite/gcc.dg/torture/pr90348.c|  40 ++
 gcc/tree-core.h   |   2 +
 gcc/tree-pretty-print.cc  |   4 +-
 7 files changed, 244 insertions(+), 98 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/torture/pr103006-1.c
 create mode 100644 gcc/testsuite/gcc.dg/torture/pr103006-2.c
 create mode 100644 gcc/testsuite/gcc.dg/torture/pr90348.c

diff --git a/gcc/cfgexpand.cc b/gcc/cfgexpand.cc
index d51af2e3084..02467874996 100644
--- a/gcc/cfgexpand.cc
+++ b/gcc/cfgexpand.cc
@@ -319,6 +319,10 @@ public:
  size, the alignment for this partition.  */
   unsigned int alignb;
 
+  /* Whether this variable has conflicts represented in the CONFLICTS
+ bitmap.  If not, it conflicts with all other stack variables.  */
+  bool conflicts_represented;
+
   /* The partition representative.  */
   size_t representative;
 
@@ -479,7 +483,8 @@ add_stack_var (tree decl, bool really_expand)
   v->representative = 

[PATCH] c++, v3: Further address_compare fixes [PR89074]

2022-02-04 Thread Jakub Jelinek via Gcc-patches
On Thu, Feb 03, 2022 at 04:34:17PM -0500, Jason Merrill wrote:
> On 2/3/22 16:18, Jakub Jelinek wrote:
> > On Thu, Feb 03, 2022 at 04:04:57PM -0500, Jason Merrill wrote:
> > > > > I think it would be clearer to leave the !DECL_P case alone and add
> > > > > 
> > > > > /* In C++ it is unspecified, and so non-constant, whether two
> > > > >  equivalent strings have the same address.  */
> > > > > else if (folding_cxx_constexpr
> > > > >&& (TREE_CODE (base0) == STRING_CST
> > > > >|| TREE_CODE (base1) == STRING_CST)
> > > > 
> > > > The point was to let the first if handle for
> > > > !folding_cxx_constexpr the cases with STRING_CST
> > > > as one or both operands and if that falls through, return 2.
> > > 
> > > Ah, I see.  And then for folding_cxx_constexpr you have your new code 
> > > toward
> > > the bottom of the function that can say they're unequal in some cases.  
> > > Can
> > > you combine the STRING_CST handling for both values of 
> > > folding_cxx_constexpr
> > > instead of having them so far apart?
> > 
> > Not easily, because for the folding_cxx_constexpr case it primarily reuses
> > the code from the last else if - computing sizes of objects and checking
> > if one is at a start of one and another at the end of the other.
> 
> And the !folding_cxx_constexpr case shouldn't also use that code?
> 
> > One further option would be to compute early flags like
> >enum { OFF_POS_START, OFF_POS_MIDDLE, OFF_POS_END } pos0, pos1;
> > and then just use them or ignore them in the decisions later.
> 
> If that helps to refactor a bit, sure.

Here it is, hopefully it makes the code more readable and understandable.

Bootstrapped/regtested on powerpc64le-linux, ok for trunk?

2022-02-04  Jakub Jelinek  

PR c++/89074
PR c++/104033
* fold-const.h (folding_initializer): Adjust comment.
(folding_cxx_constexpr): Declare.
* fold-const.cc (folding_initializer): Adjust comment.
(folding_cxx_constexpr): New variable.
(address_compare): Restrict the decl vs. STRING_CST
or vice versa or STRING_CST vs. STRING_CST or
is_global_var != is_global_var optimizations to !folding_cxx_constexpr.
Punt for FUNCTION_DECLs with non-zero offsets.  If folding_initializer,
assume non-aliased functions have non-zero size and have different
addresses.  For folding_cxx_constexpr, punt on comparisons of start
of some object and end of another one, regardless whether it is a decl
or string literal.  Also punt for folding_cxx_constexpr on
STRING_CST vs. STRING_CST comparisons if the two literals could be
overlapping.

* constexpr.cc (cxx_eval_binary_expression): Temporarily set
folding_cxx_constexpr.

* g++.dg/cpp1y/constexpr-89074-3.C: New test.

--- gcc/fold-const.h.jj 2022-02-01 20:10:51.235856007 +0100
+++ gcc/fold-const.h2022-02-03 15:02:02.700228631 +0100
@@ -20,9 +20,16 @@ along with GCC; see the file COPYING3.
 #ifndef GCC_FOLD_CONST_H
 #define GCC_FOLD_CONST_H
 
-/* Non-zero if we are folding constants inside an initializer; zero
-   otherwise.  */
+/* Nonzero if we are folding constants inside an initializer or a C++
+   manifestly-constant-evaluated context; zero otherwise.
+   Should be used when folding in initializer enables additional
+   optimizations.  */
 extern int folding_initializer;
+/* Nonzer of we are folding C++ manifestly-constant-evaluated context; zero
+   otherwise.
+   Should be used when certain constructs shouldn't be optimized
+   during folding in that context.  */
+extern bool folding_cxx_constexpr;
 
 /* Convert between trees and native memory representation.  */
 extern int native_encode_expr (const_tree, unsigned char *, int, int off = -1);
--- gcc/fold-const.cc.jj2022-02-03 14:31:32.243129408 +0100
+++ gcc/fold-const.cc   2022-02-04 10:19:13.812784763 +0100
@@ -86,9 +86,17 @@ along with GCC; see the file COPYING3.
 #include "gimple-range.h"
 
 /* Nonzero if we are folding constants inside an initializer or a C++
-   manifestly-constant-evaluated context; zero otherwise.  */
+   manifestly-constant-evaluated context; zero otherwise.
+   Should be used when folding in initializer enables additional
+   optimizations.  */
 int folding_initializer = 0;
 
+/* Nonzer of we are folding C++ manifestly-constant-evaluated context; zero
+   otherwise.
+   Should be used when certain constructs shouldn't be optimized
+   during folding in that context.  */
+bool folding_cxx_constexpr = false;
+
 /* The following constants represent a bit based encoding of GCC's
comparison operators.  This encoding simplifies transformations
on relational comparison operators, such as AND and OR.  */
@@ -16628,41 +16636,55 @@ address_compare (tree_code code, tree ty
   HOST_WIDE_INT ioff0 = -1, ioff1 = -1;
   off0.is_constant ();
   off1.is_constant ();
-  if ((DECL_P (base0) && TREE_CODE (base1) == STRING_CST)
-   || 

Re: Porting the Docs to Sphinx - project status

2022-02-04 Thread Matthias Klose
On 1/31/22 15:06, Martin Liška wrote:
> Hello.
> 
> It's about 5 months since the last project status update:
> https://gcc.gnu.org/pipermail/gcc-patches/2021-August/577108.html
> Now it's pretty clear that it won't be merged before GCC 12.1 gets released.
> 
> So where we are? I contacted documentation maintainers (Gerald, Sandra and
> Joseph) at the
> end of the year in a private email, where I pinged the patches. My take away 
> is
> that both
> Gerald and Joseph are fine with the porting, while Sandra has some concerns.
> Based on her
> feedback, I was able to improve the PDF generated output significantly and I'm
> pleased by the
> provided feedback. That led to the following 2 Sphinx pulls requests that need
> to be merged
> before we can migrate the documentation: [1], [2].
> 
> Since the last time I also made one more round of proofreading and the layout
> was improved
> (mainly for PDF part). Current version of the documentation can be seen here:
> https://splichal.eu/scripts/sphinx/
> 
> I would like to finish the transition once GCC 12.1 gets released in May/June
> this year.
> There are still some minor regressions, but overall the Sphinx-based
> documentation should
> be a significant improvement over what we've got right now.
> 
> Please take this email as urgent call for a feedback!

Please take care about the copyrights.  I only checked the D frontend manual,
and this one suddenly has a copyright with invariant sections, compared to the
current gdc.texi which has a copyright *without* the invariant sections.  Debian
doesn't allow me to ship documentation with invariant sections ...

I didn't look how much you reorganized the sources, but it would nice to split
the files into those documenting command line options (used to generate the man
pages) and other documentation.  This is already done for gcc/doc, but not for
other frontends.  It would allow having manual pages with a copyright requiring
front and back cover texts in the manual pages.

It would also be nice to require the latest sphinx version (and probably some
plugins), so that distros can build the docs with older sphinx versions as well.

Matthias


Re: [PATCH][v2] tree-optimization/100499 - niter analysis and multiple_of_p

2022-02-04 Thread Richard Sandiford via Gcc-patches
Richard Biener  writes:
> niter analysis uses multiple_of_p which currently assumes
> operations like MULT_EXPR do not wrap.  We've got to rely on this
> for optimizing size expressions like those in DECL_SIZE and those
> generally use unsigned arithmetic with no indication that they
> are not expected to wrap.  To preserve that the following adds
> a parameter to multiple_of_p, defaulted to true, indicating that
> the TOP expression is not expected to wrap for outer computations
> in TYPE.  This mostly follows a patch proposed by Bin last year
> with the conversion behavior added.
>
> Applying to all users the new effect is that upon type conversions
> in the TOP expression the behavior will switch to honor
> TYPE_OVERFLOW_UNDEFINED for the converted sub-expressions.
>
> The patch also changes the occurance in niter analysis that we
> know is problematic and we have testcases for to pass false
> to multiple_of_p.  The patch also contains a change to the
> PR72817 fix from Bin to avoid regressing gcc.dg/tree-ssa/loop-42.c.
>
> The intent for stage1 is to introduce a size_multiple_of_p and
> internalize the added parameter so all multiple_of_p users will
> honor TYPE_OVERFLOW_UNDEFINED and users dealing with size expressions
> need to be switched to size_multiple_of_p.
>
> Boostrapped and tested on x86_64-unknown-linux-gnu.
>
> OK?
>
> Thanks,
> Richard.
>
> 2022-01-26  Richard Biener  
>
>   PR tree-optimization/100499
>   * fold-const.h (multiple_of_p): Add nowrap parameter, defaulted
>   to true.
>   * fold-const.cc (multiple_of_p): Likewise.  Honor it for
>   MULT_EXPR, PLUS_EXPR and MINUS_EXPR and pass it along,
>   switching to false for conversions.
>   * tree-ssa-loop-niter.cc (number_of_iterations_ne): Do not
>   claim the outermost expression does not wrap when calling
>   multiple_of_p.  Refactor the check done to check the
>   original IV, avoiding a bias that might wrap.
>
>   * gcc.dg/torture/pr100499-1.c: New testcase.
>   * gcc.dg/torture/pr100499-2.c: Likewise.
>   * gcc.dg/torture/pr100499-3.c: Likewise.

LGTM FWIW, but…

> Co-authored-by: Bin Cheng  
> ---
>  gcc/fold-const.cc | 80 +++
>  gcc/fold-const.h  |  2 +-
>  gcc/testsuite/gcc.dg/torture/pr100499-1.c | 27 
>  gcc/testsuite/gcc.dg/torture/pr100499-2.c | 16 +
>  gcc/testsuite/gcc.dg/torture/pr100499-3.c | 14 
>  gcc/tree-ssa-loop-niter.cc| 52 ++-
>  6 files changed, 130 insertions(+), 61 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.dg/torture/pr100499-1.c
>  create mode 100644 gcc/testsuite/gcc.dg/torture/pr100499-2.c
>  create mode 100644 gcc/testsuite/gcc.dg/torture/pr100499-3.c
>
> diff --git a/gcc/fold-const.cc b/gcc/fold-const.cc
> index 12732d39c79..2578a86ca1a 100644
> --- a/gcc/fold-const.cc
> +++ b/gcc/fold-const.cc
> @@ -14073,10 +14073,16 @@ fold_binary_initializer_loc (location_t loc, 
> tree_code code, tree type,
>   SAVE_EXPR (I) * SAVE_EXPR (J)
>  
> (where the same SAVE_EXPR (J) is used in the original and the
> -   transformed version).  */
> +   transformed version).
> +
> +   NOWRAP specifies whether all outer operations in TYPE should
> +   be considered not wrapping.  Any type conversion within TOP acts
> +   as a barrier and we will fall back to NOWRAP being false.
> +   NOWRAP is mostly used to treat expressions in TYPE_SIZE and friends
> +   as not wrapping even though they are generally using unsigned arithmetic. 
>  */
>  
>  int
> -multiple_of_p (tree type, const_tree top, const_tree bottom)
> +multiple_of_p (tree type, const_tree top, const_tree bottom, bool nowrap)
>  {
>gimple *stmt;
>tree op1, op2;
> @@ -14094,10 +14100,17 @@ multiple_of_p (tree type, const_tree top, 
> const_tree bottom)
>a multiple of BOTTOM then TOP is a multiple of BOTTOM.  */
>if (!integer_pow2p (bottom))
>   return 0;
> -  return (multiple_of_p (type, TREE_OPERAND (top, 1), bottom)
> -   || multiple_of_p (type, TREE_OPERAND (top, 0), bottom));
> +  return (multiple_of_p (type, TREE_OPERAND (top, 1), bottom, nowrap)
> +   || multiple_of_p (type, TREE_OPERAND (top, 0), bottom, nowrap));
>  
>  case MULT_EXPR:
> +  /* If the multiplication can wrap we cannot recurse further unless
> +  the bottom is a power of two which is where wrapping does not
> +  matter.  */
> +  if (!nowrap
> +   && !TYPE_OVERFLOW_UNDEFINED (type)
> +   && !integer_pow2p (bottom))
> + return 0;
>if (TREE_CODE (bottom) == INTEGER_CST)
>   {
> op1 = TREE_OPERAND (top, 0);
> @@ -14106,24 +14119,24 @@ multiple_of_p (tree type, const_tree top, 
> const_tree bottom)
>   std::swap (op1, op2);
> if (TREE_CODE (op2) == INTEGER_CST)
>   {
> -   if (multiple_of_p (type, op2, bottom))
> +   if (multiple_of_p (type, op2, bottom, nowrap))
>   

Re: [PATCH] configure: add --disable-fix-includes

2022-02-04 Thread Jakub Jelinek via Gcc-patches
On Fri, Feb 04, 2022 at 02:25:17PM +0100, Richard Biener via Gcc-patches wrote:
> > Besides, have you actually run a regtest with that option?  I'm asking
> > because even on Ubuntu 20.04 fixincludes drops in it's own .
> > You need to check that dropping that is actually safe.
> 
> limits.h and syslimits.h are the only include-fixed we "ship" in our
> packages.  Not sure if they are really required though.

We don't ship any include-fixed headers in Fedora/RHEL.
If there is an important need for fixing, it should be in Linux
distributions fixed by actually fixing the headers in their corresponding
packages, making modified copies of headers at gcc build time and overriding
through that headers from other packages that can change doesn't really work
properly when those headers ever change.  I know we could install the
fixincludes tool and trigger running it whenever any of such headers change
in other packages, but that is very expensive for very little gain.

On other targets I understand fixincludes is much more important.

Jakub



Re: [PATCH] configure: add --disable-fix-includes

2022-02-04 Thread Richard Biener via Gcc-patches
On Fri, Feb 4, 2022 at 2:08 PM Rainer Orth  
wrote:
>
> Hi Martin,
>
> > It seems to me that fixincludes is hardy unused feature for nowadays header
> > files and so I'm suggesting a developer option that can skip the fixing.
>
> please remember that there's a world beyond current-day Linux.
>
> > How is the feature used on other targets?
>
> There are still quite a number of fixes on e.g. Solaris or macOS.  And
> people are still building gcc on older OS versions for one reason or
> another...
>
> > diff --git a/gcc/configure.ac b/gcc/configure.ac
> > index 1171c946e6e..6015e403aa9 100644
> > --- a/gcc/configure.ac
> > +++ b/gcc/configure.ac
> > @@ -842,6 +842,12 @@ gather_stats=`if test 
> > $enable_gather_detailed_mem_stats != no; then echo 1; else
> >   AC_DEFINE_UNQUOTED(GATHER_STATISTICS, $gather_stats,
> >   [Define to enable detailed memory allocation stats gathering.])
> >
> > +AC_ARG_ENABLE(disable-fix-includes,
> > +[AS_HELP_STRING([--disable-fix-includes],
>
> The beast is called fixincludes, no '-' or '_'.
>
> > + [skip fixing of includes])], [],
>
> Better say 'running fixincludes' for example rather than being
> vague/obtuse.
>
> The new options requires documenting in install.texi.
>
> That said, I'm not sure this is really worth yet adding another option.
> And how are developers supposed to know if they can safely use it or
> not.
>
> Besides, have you actually run a regtest with that option?  I'm asking
> because even on Ubuntu 20.04 fixincludes drops in it's own .
> You need to check that dropping that is actually safe.

limits.h and syslimits.h are the only include-fixed we "ship" in our
packages.  Not sure if they are really required though.

Richard.

>
> Rainer
>
> --
> -
> Rainer Orth, Center for Biotechnology, Bielefeld University


Re: [PATCH v2] tree-optimization/94899: Remove "+ 0x80000000" in int comparisons

2022-02-04 Thread Richard Biener via Gcc-patches
On Fri, Feb 4, 2022 at 12:50 PM Jakub Jelinek  wrote:
>
> On Fri, Feb 04, 2022 at 12:14:33PM +0100, Richard Biener via Gcc-patches 
> wrote:
> > +#define MAGIC 0x8000
> >
> > I _think_ writing the constant this way requires
>
> Perhaps use (~(uint32_t)0 / 2 + 1) as MAGIC instead?
> Then it doesn't actually require that uint32_t actually is exactly 32 bits.
> On the other side the regex checks for the exact 32-bit constant.

I think uint32_t always is 32-bit in practice, compared to unsigned
int which is not.

Richard.

> Jakub
>


[PATCH][v2] tree-optimization/100499 - niter analysis and multiple_of_p

2022-02-04 Thread Richard Biener via Gcc-patches
niter analysis uses multiple_of_p which currently assumes
operations like MULT_EXPR do not wrap.  We've got to rely on this
for optimizing size expressions like those in DECL_SIZE and those
generally use unsigned arithmetic with no indication that they
are not expected to wrap.  To preserve that the following adds
a parameter to multiple_of_p, defaulted to true, indicating that
the TOP expression is not expected to wrap for outer computations
in TYPE.  This mostly follows a patch proposed by Bin last year
with the conversion behavior added.

Applying to all users the new effect is that upon type conversions
in the TOP expression the behavior will switch to honor
TYPE_OVERFLOW_UNDEFINED for the converted sub-expressions.

The patch also changes the occurance in niter analysis that we
know is problematic and we have testcases for to pass false
to multiple_of_p.  The patch also contains a change to the
PR72817 fix from Bin to avoid regressing gcc.dg/tree-ssa/loop-42.c.

The intent for stage1 is to introduce a size_multiple_of_p and
internalize the added parameter so all multiple_of_p users will
honor TYPE_OVERFLOW_UNDEFINED and users dealing with size expressions
need to be switched to size_multiple_of_p.

Boostrapped and tested on x86_64-unknown-linux-gnu.

OK?

Thanks,
Richard.

2022-01-26  Richard Biener  

PR tree-optimization/100499
* fold-const.h (multiple_of_p): Add nowrap parameter, defaulted
to true.
* fold-const.cc (multiple_of_p): Likewise.  Honor it for
MULT_EXPR, PLUS_EXPR and MINUS_EXPR and pass it along,
switching to false for conversions.
* tree-ssa-loop-niter.cc (number_of_iterations_ne): Do not
claim the outermost expression does not wrap when calling
multiple_of_p.  Refactor the check done to check the
original IV, avoiding a bias that might wrap.

* gcc.dg/torture/pr100499-1.c: New testcase.
* gcc.dg/torture/pr100499-2.c: Likewise.
* gcc.dg/torture/pr100499-3.c: Likewise.

Co-authored-by: Bin Cheng  
---
 gcc/fold-const.cc | 80 +++
 gcc/fold-const.h  |  2 +-
 gcc/testsuite/gcc.dg/torture/pr100499-1.c | 27 
 gcc/testsuite/gcc.dg/torture/pr100499-2.c | 16 +
 gcc/testsuite/gcc.dg/torture/pr100499-3.c | 14 
 gcc/tree-ssa-loop-niter.cc| 52 ++-
 6 files changed, 130 insertions(+), 61 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/torture/pr100499-1.c
 create mode 100644 gcc/testsuite/gcc.dg/torture/pr100499-2.c
 create mode 100644 gcc/testsuite/gcc.dg/torture/pr100499-3.c

diff --git a/gcc/fold-const.cc b/gcc/fold-const.cc
index 12732d39c79..2578a86ca1a 100644
--- a/gcc/fold-const.cc
+++ b/gcc/fold-const.cc
@@ -14073,10 +14073,16 @@ fold_binary_initializer_loc (location_t loc, 
tree_code code, tree type,
  SAVE_EXPR (I) * SAVE_EXPR (J)
 
(where the same SAVE_EXPR (J) is used in the original and the
-   transformed version).  */
+   transformed version).
+
+   NOWRAP specifies whether all outer operations in TYPE should
+   be considered not wrapping.  Any type conversion within TOP acts
+   as a barrier and we will fall back to NOWRAP being false.
+   NOWRAP is mostly used to treat expressions in TYPE_SIZE and friends
+   as not wrapping even though they are generally using unsigned arithmetic.  
*/
 
 int
-multiple_of_p (tree type, const_tree top, const_tree bottom)
+multiple_of_p (tree type, const_tree top, const_tree bottom, bool nowrap)
 {
   gimple *stmt;
   tree op1, op2;
@@ -14094,10 +14100,17 @@ multiple_of_p (tree type, const_tree top, const_tree 
bottom)
 a multiple of BOTTOM then TOP is a multiple of BOTTOM.  */
   if (!integer_pow2p (bottom))
return 0;
-  return (multiple_of_p (type, TREE_OPERAND (top, 1), bottom)
- || multiple_of_p (type, TREE_OPERAND (top, 0), bottom));
+  return (multiple_of_p (type, TREE_OPERAND (top, 1), bottom, nowrap)
+ || multiple_of_p (type, TREE_OPERAND (top, 0), bottom, nowrap));
 
 case MULT_EXPR:
+  /* If the multiplication can wrap we cannot recurse further unless
+the bottom is a power of two which is where wrapping does not
+matter.  */
+  if (!nowrap
+ && !TYPE_OVERFLOW_UNDEFINED (type)
+ && !integer_pow2p (bottom))
+   return 0;
   if (TREE_CODE (bottom) == INTEGER_CST)
{
  op1 = TREE_OPERAND (top, 0);
@@ -14106,24 +14119,24 @@ multiple_of_p (tree type, const_tree top, const_tree 
bottom)
std::swap (op1, op2);
  if (TREE_CODE (op2) == INTEGER_CST)
{
- if (multiple_of_p (type, op2, bottom))
+ if (multiple_of_p (type, op2, bottom, nowrap))
return 1;
  /* Handle multiple_of_p ((x * 2 + 2) * 4, 8).  */
- if (multiple_of_p (type, bottom, op2))
+ if (multiple_of_p (type, bottom, op2, nowrap))

Re: [PATCH] configure: add --disable-fix-includes

2022-02-04 Thread Rainer Orth
Hi Martin,

> It seems to me that fixincludes is hardy unused feature for nowadays header
> files and so I'm suggesting a developer option that can skip the fixing.

please remember that there's a world beyond current-day Linux.

> How is the feature used on other targets?

There are still quite a number of fixes on e.g. Solaris or macOS.  And
people are still building gcc on older OS versions for one reason or
another...

> diff --git a/gcc/configure.ac b/gcc/configure.ac
> index 1171c946e6e..6015e403aa9 100644
> --- a/gcc/configure.ac
> +++ b/gcc/configure.ac
> @@ -842,6 +842,12 @@ gather_stats=`if test $enable_gather_detailed_mem_stats 
> != no; then echo 1; else
>   AC_DEFINE_UNQUOTED(GATHER_STATISTICS, $gather_stats,
>   [Define to enable detailed memory allocation stats gathering.])
>   
> +AC_ARG_ENABLE(disable-fix-includes,
> +[AS_HELP_STRING([--disable-fix-includes],

The beast is called fixincludes, no '-' or '_'.

> + [skip fixing of includes])], [],

Better say 'running fixincludes' for example rather than being
vague/obtuse.

The new options requires documenting in install.texi.

That said, I'm not sure this is really worth yet adding another option.
And how are developers supposed to know if they can safely use it or
not.

Besides, have you actually run a regtest with that option?  I'm asking
because even on Ubuntu 20.04 fixincludes drops in it's own .
You need to check that dropping that is actually safe.

Rainer

-- 
-
Rainer Orth, Center for Biotechnology, Bielefeld University


Re: [PATCH] RISC-V: Always pass -misa-spec to assembler [PR104219]

2022-02-04 Thread Martin Liška

May I please PING this?

Thanks,
Martin

On 1/25/22 14:04, Kito Cheng wrote:

Add -misa-spec to OPTION_DEFAULT_SPECS to make sure -misa-spec will
always pass that into assembler, that prevent GCC and binutils using
different way to interpret the ISA string.

gcc/ChangeLog:

PR target/104219
* config.gcc (riscv*-*-*): Normalize the with_isa_spec value.
(all_defaults): Add isa_spec.
* config/riscv/riscv.h (OPTION_DEFAULT_SPECS): Add isa_spec.
---
  gcc/config.gcc   | 4 +++-
  gcc/config/riscv/riscv.h | 2 ++
  2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/gcc/config.gcc b/gcc/config.gcc
index 90aec3f8f3f..0bb8c63a46e 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -4643,12 +4643,14 @@ case "${target}" in
case "${with_isa_spec}" in
""|default|20191213|201912)
tm_defines="${tm_defines} 
TARGET_DEFAULT_ISA_SPEC=ISA_SPEC_CLASS_20191213"
+   with_isa_spec=20191213
;;
2.2)
tm_defines="${tm_defines} 
TARGET_DEFAULT_ISA_SPEC=ISA_SPEC_CLASS_2P2"
;;
20190608 | 201906)
tm_defines="${tm_defines} 
TARGET_DEFAULT_ISA_SPEC=ISA_SPEC_CLASS_20190608"
+   with_isa_spec=20190608
;;
*)
echo "--with-isa-spec only accept 2.2, 20191213, 201912, 20190608 
or 201906" 1>&2
@@ -5430,7 +5432,7 @@ case ${target} in
  esac
  
  t=

-all_defaults="abi cpu cpu_32 cpu_64 arch arch_32 arch_64 tune tune_32 tune_64 
schedule float mode fpu nan fp_32 odd_spreg_32 divide llsc mips-plt synci tls lxc1-sxc1 
madd4"
+all_defaults="abi cpu cpu_32 cpu_64 arch arch_32 arch_64 tune tune_32 tune_64 
schedule float mode fpu nan fp_32 odd_spreg_32 divide llsc mips-plt synci tls lxc1-sxc1 
madd4 isa_spec"
  for option in $all_defaults
  do
eval "val=\$with_"`echo $option | sed s/-/_/g`
diff --git a/gcc/config/riscv/riscv.h b/gcc/config/riscv/riscv.h
index 695668424c3..8a4d2cf7f85 100644
--- a/gcc/config/riscv/riscv.h
+++ b/gcc/config/riscv/riscv.h
@@ -60,6 +60,7 @@ extern const char *riscv_default_mtune (int argc, const char 
**argv);
 --with-arch is ignored if -march or -mcpu is specified.
 --with-abi is ignored if -mabi is specified.
 --with-tune is ignored if -mtune or -mcpu is specified.
+   --with-isa-spec is ignored if -misa-spec is specified.
  
 But using default -march/-mtune value if -mcpu don't have valid option.  */

  #define OPTION_DEFAULT_SPECS \
@@ -70,6 +71,7 @@ extern const char *riscv_default_mtune (int argc, const char 
**argv);
   "  %{!mcpu=*:-march=%(VALUE)}" \
   "  %{mcpu=*:%:riscv_expand_arch_from_cpu(%* %(VALUE))}}" },\
{"abi", "%{!mabi=*:-mabi=%(VALUE)}" }, \
+  {"isa_spec", "%{!misa-spec=*:-misa-spec=%(VALUE)}" }, \
  
  #ifdef IN_LIBGCC2

  #undef TARGET_64BIT




[PATCH] configure: add --disable-fix-includes

2022-02-04 Thread Martin Liška

Hello.

It seems to me that fixincludes is hardy unused feature for nowadays header
files and so I'm suggesting a developer option that can skip the fixing.

How is the feature used on other targets?

Right now, fixinclude takes about 11 seconds on my machine, where
it reads (and applies regexes) 130MB of header files.

The number of fixed headers is negligible without any significant
change. I'm attaching diff that I can see on my developer machine.

Patch can bootstrap on x86_64-linux-gnu and survives regression tests.

Ready to be installed?
Thanks,
Martin

fixincludes/ChangeLog:

* fixinc.in: Add early exit.

gcc/ChangeLog:

* Makefile.in: Support disable_fix_includes.
* configure.ac: Add --disable-fix-includes.
* configure: Regenerate.
---
 fixincludes/fixinc.in |  6 ++
 gcc/Makefile.in   |  6 --
 gcc/configure | 21 +++--
 gcc/configure.ac  |  6 ++
 4 files changed, 35 insertions(+), 4 deletions(-)

diff --git a/fixincludes/fixinc.in b/fixincludes/fixinc.in
index 0c3066452c6..3ebcd346d41 100755
--- a/fixincludes/fixinc.in
+++ b/fixincludes/fixinc.in
@@ -63,6 +63,12 @@ else
   esac
 fi
 
+if test "x$DISABLE_FIX_INCLUDES" = "xyes"

+then
+  echo "Skipping fixincludes"
+  exit 0
+fi
+
 # Define what target system we're fixing.
 #
 if test -r ./Makefile; then
diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index 31ff95500c9..c77f1cc644d 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -401,6 +401,8 @@ PLUGINLIBS = @pluginlibs@
 
 enable_plugin = @enable_plugin@
 
+disable_fix_includes = @disable_fix_includes@

+
 # On MinGW plugin installation involves installing import libraries.
 ifeq ($(enable_plugin),yes)
   plugin_implib := $(if $(strip $(filter mingw%,$(host_os))),yes,no)
@@ -3248,8 +3250,8 @@ stmp-fixinc: gsyslimits.h macro_list fixinc_list \
chmod a+rx $${fix_dir} || true; \
(TARGET_MACHINE='$(target)'; srcdir=`cd $(srcdir); ${PWD_COMMAND}`; 
\
  SHELL='$(SHELL)'; MACRO_LIST=`${PWD_COMMAND}`/macro_list ; \
- gcc_dir=`${PWD_COMMAND}` ; \
- export TARGET_MACHINE srcdir SHELL MACRO_LIST && \
+ gcc_dir=`${PWD_COMMAND}` ; 
DISABLE_FIX_INCLUDES=${disable_fix_includes} \
+ export TARGET_MACHINE srcdir SHELL MACRO_LIST DISABLE_FIX_INCLUDES 
&& \
  cd $(build_objdir)/fixincludes && \
  $(SHELL) ./fixinc.sh "$${gcc_dir}/$${fix_dir}" \
$(BUILD_SYSTEM_HEADER_DIR) $(OTHER_FIXINCLUDES_DIRS) ); \
diff --git a/gcc/configure b/gcc/configure
index bd4d4721868..843ab02bfa3 100755
--- a/gcc/configure
+++ b/gcc/configure
@@ -839,6 +839,7 @@ with_float
 with_cpu
 enable_multiarch
 enable_multilib
+disable_fix_includes
 coverage_flags
 valgrind_command
 valgrind_path_defines
@@ -958,6 +959,7 @@ enable_werror_always
 enable_checking
 enable_coverage
 enable_gather_detailed_mem_stats
+enable_disable_fix_includes
 enable_valgrind_annotations
 enable_multilib
 enable_multiarch
@@ -1688,6 +1690,7 @@ Optional Features:
   Values are opt, noopt, default is noopt
   --enable-gather-detailed-mem-stats
   enable detailed memory allocation stats gathering
+  --disable-fix-includes  skip fixing of includes
   --enable-valgrind-annotations
   enable valgrind runtime interaction
   --enable-multilib   enable library support for multiple ABIs
@@ -7780,6 +7783,20 @@ cat >>confdefs.h <<_ACEOF
 _ACEOF
 
 
+# Check whether --enable-disable-fix-includes was given.

+if test "${enable_disable_fix_includes+set}" = set; then :
+  enableval=$enable_disable_fix_includes;
+else
+  disable_fix_includes=yes
+fi
+
+
+cat >>confdefs.h <<_ACEOF
+#define FIX_INCLUDES $disable_fix_includes
+_ACEOF
+
+
+
 # Check whether --enable-valgrind-annotations was given.
 if test "${enable_valgrind_annotations+set}" = set; then :
   enableval=$enable_valgrind_annotations;
@@ -19659,7 +19676,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<_LT_EOF
-#line 19662 "configure"
+#line 19679 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H

@@ -19765,7 +19782,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<_LT_EOF
-#line 19768 "configure"
+#line 19785 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H

diff --git a/gcc/configure.ac b/gcc/configure.ac
index 1171c946e6e..6015e403aa9 100644
--- a/gcc/configure.ac
+++ b/gcc/configure.ac
@@ -842,6 +842,12 @@ gather_stats=`if test $enable_gather_detailed_mem_stats != 
no; then echo 1; else
 AC_DEFINE_UNQUOTED(GATHER_STATISTICS, $gather_stats,
 [Define to enable detailed memory allocation stats gathering.])
 
+AC_ARG_ENABLE(disable-fix-includes,

+[AS_HELP_STRING([--disable-fix-includes],
+   [skip fixing of includes])], [],
+[disable_fix_includes=yes])
+AC_SUBST(disable_fix_includes)
+
 

Re: [PATCH v2] tree-optimization/94899: Remove "+ 0x80000000" in int comparisons

2022-02-04 Thread Jakub Jelinek via Gcc-patches
On Fri, Feb 04, 2022 at 12:14:33PM +0100, Richard Biener via Gcc-patches wrote:
> +#define MAGIC 0x8000
> 
> I _think_ writing the constant this way requires

Perhaps use (~(uint32_t)0 / 2 + 1) as MAGIC instead?
Then it doesn't actually require that uint32_t actually is exactly 32 bits.
On the other side the regex checks for the exact 32-bit constant.

Jakub



Re: [Patch][wwwdocs + gcc] nvptx – update for -mptx change – gcc-12/changes.html + gcc/docs/invoke.texi

2022-02-04 Thread Tom de Vries via Gcc-patches

On 2/2/22 09:30, Tobias Burnus wrote:

This patch updates the documentation for Tom's change of the default
-mptx= version - mentioning also -mptx=7.0.

I forgot whether ptx = 7.0 was working fine or whether there was
a reason not to mention it.


A ptx version is experimental if all sm versions it enables are 
experimental.


7.0 enables sm_80, and that one is experimental.

sm_80 is considered experimental, because the last time I tried to do a 
gcc build with the default set to sm_80, it failed (ICEs).  Likewise 
sm_75, sm_53.


Following that reasoning ptx isa 6.3 would also be experimental, but it 
fixes something that is very fragile with older ptx version (warp 
convergence) so it has been promoted to supported.


Anyway, I'm preparing a patch that picks default -mptx based on -misa, 
with a default minimum of 6.0 instead of 6.3, to enable testing with a 
390.x driver.



At some point, we also have to update -misa=... Currently, only
sm_30 and sm_35 are documented but sm_53, sm_75 and sm_80 are supported.
Can they now be documented are are there still issues?


AFAIK there are still issues.


OK to commit the wwwdocs + gcc invoke.texi patches?


Sorry, it looks like this is still in flux.

Thanks,
- Tom


[Patch] Fortran/OpenMP: Avoid ICE for invalid char array in omp atomic [PR104329]

2022-02-04 Thread Tobias Burnus

Already during parsing, the allocatable character array assignment
   x = (x)

is converted to two gfc_codes with EXEC_ASSIGN, namely:

  ASSIGN z1:_F.DA0(FULL) (parens z1:x(FULL))
  ASSIGN z1:x(FULL) z1:_F.DA0(FULL)

But the current code expects only one gfc_code - as parse.c does some
checks, that's unexpected for resolution and currently is checked with
an gcc_assert.

Solution: I now defer the gfc_assert until after diagnosing the arguments.

OK for mainline (only affected version)?

Tobias
-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955
Fortran/OpenMP: Avoid ICE for invalid char array in omp atomic [PR104329]

	PR fortran/104329
gcc/fortran/ChangeLog:

	* openmp.cc (resolve_omp_atomic): Defer extra-code assert after
	other diagnostics.

gcc/testsuite/ChangeLog:

	* gfortran.dg/gomp/atomic-28.f90: New test.

 gcc/fortran/openmp.cc| 11 ---
 gcc/testsuite/gfortran.dg/gomp/atomic-28.f90 | 28 
 2 files changed, 36 insertions(+), 3 deletions(-)

diff --git a/gcc/fortran/openmp.cc b/gcc/fortran/openmp.cc
index 38c67e1f640..b1c065d9e8b 100644
--- a/gcc/fortran/openmp.cc
+++ b/gcc/fortran/openmp.cc
@@ -7687,7 +7687,7 @@ resolve_omp_atomic (gfc_code *code)
   gfc_omp_atomic_op aop
 = (gfc_omp_atomic_op) (atomic_code->ext.omp_clauses->atomic_op
 			   & GFC_OMP_ATOMIC_MASK);
-  gfc_code *stmt = NULL, *capture_stmt = NULL;
+  gfc_code *stmt = NULL, *capture_stmt = NULL, *tailing_stmt = NULL;
   gfc_expr *comp_cond = NULL;
   locus *loc = NULL;
 
@@ -7825,7 +7825,8 @@ resolve_omp_atomic (gfc_code *code)
 	  stmt = code;
 	  capture_stmt = code->next;
 	}
-  gcc_assert (!code->next->next);
+  /* Shall be NULL but can happen for invalid code. */
+  tailing_stmt = code->next->next;
 }
   else
 {
@@ -7833,7 +7834,8 @@ resolve_omp_atomic (gfc_code *code)
   stmt = code;
   if (!atomic_code->ext.omp_clauses->compare && stmt->op != EXEC_ASSIGN)
 	goto unexpected;
-  gcc_assert (!code->next);
+  /* Shall be NULL but can happen for invalid code. */
+  tailing_stmt = code->next;
 }
 
   if (comp_cond)
@@ -7886,6 +7888,9 @@ resolve_omp_atomic (gfc_code *code)
   return;
 }
 
+  /* Should be diagnosed above already. */
+  gcc_assert (tailing_stmt == NULL);
+
   var = stmt->expr1->symtree->n.sym;
   stmt_expr2 = is_conversion (stmt->expr2, true, true);
   if (stmt_expr2 == NULL)
diff --git a/gcc/testsuite/gfortran.dg/gomp/atomic-28.f90 b/gcc/testsuite/gfortran.dg/gomp/atomic-28.f90
new file mode 100644
index 000..91e29c96d45
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/gomp/atomic-28.f90
@@ -0,0 +1,28 @@
+! { dg-do compile }
+!
+! PR fortran/104329
+!
+! Contributed by G. Steinmetz
+!
+subroutine z1
+   character(:), allocatable :: x(:)
+   x = ['123']
+   !$omp atomic update
+   x = (x)  ! { dg-error "OMP ATOMIC statement must set a scalar variable of intrinsic type" }
+end
+
+subroutine z2
+   character(:), allocatable :: x(:)
+   x = ['123']
+   !$omp atomic update
+   x = 'a' // x // 'e'  ! { dg-error "OMP ATOMIC statement must set a scalar variable of intrinsic type" }
+end
+
+
+subroutine z3
+   character(:), allocatable :: x(:)
+   x = ['123']
+   !$omp atomic capture
+   x = 'a' // x // 'e'  ! { dg-error "OMP ATOMIC statement must set a scalar variable of intrinsic type" }
+   x = x
+end


Re: [PATCH] [gfortran] Add support for allocate clause (OpenMP 5.0).

2022-02-04 Thread Hafiz Abid Qadeer
On 04/02/2022 09:46, Thomas Schwinge wrote:

> 
> Abid, are you going to address these?  I think it does make sense if the
> C/C++ and Fortran test cases match as much as feasible.
> 
Sure. I will do that.

> However: really (a) remove 'omp_alloctrait (omp_atk_pool_size, 8192)'
> altogether, or instead: (b) increase its size (if that can be computed)
> -- and/or (c) limit the number of OpenMP threads executing in parallel?
> Due to unfamiliarity with all that, I don't know what's best here.
> 
C testcase also does not have the pool_size trait. So it makes sense to me to 
not have it in fortran
testcase too. It also seems more cleaner than putting some limits on number of 
threads or increasing
the size which will be a bit fragile.

Thanks,
-- 
Hafiz Abid Qadeer



Re: [PATCH] testsuite: Robustify aarch64/simd tests against more aggressive DCE

2022-02-04 Thread Richard Sandiford via Gcc-patches
Sorry, just realised I'd never replied to this.

Marc Poulhies  writes:
> Eric Botcazou  writes:
>>> The new variables seem to be unused, so I think slightly stronger
>>> DCE could remove the calls even after the patch.  Perhaps the containing
>>> functions should take an int32x4_t *ptr or something, with the calls
>>> assigning to different ptr[] indices.
>>
>> We run a minimal DCE pass at -O0 in our compiler to eliminate all the 
>> garbage 
>> generated by the gimplifier for variable-sized types (people care about code 
>> size at -O0 in specific contexts) but it does not touch anything written by 
>> the user (and debugging is unaffected of course).  Given that the builtins 
>> are 
>> pure functions and the arguments have no side effects, it eliminates the 
>> calls, but adding a LHS blocks that because this minimal DCE pass preserves 
>> anything user-related, in particular assignments to user variables.
>>
>>> I think it would be better to do that using new calls though,
>>> and xfail the existing ones when they no longer work.  For example:
>>> 
>>>   /* { dg-error "lane -1 out of range 0 - 7" "" {target *-*-*} 0 } */
>>>   vqdmlal_high_laneq_s16 (int32x4_a, int16x8_b, int16x8_c, -1);
>>>   /* { dg-error "lane -1 out of range 0 - 7" "" {target *-*-*} 0 } */
>>>   ptr[0] = vqdmlal_high_laneq_s16 (int32x4_a, int16x8_b, int16x8_c, -1);
>>> 
>>> That way we don't lose the existing tests.
>>
>> Frankly I'm not quite sure of what we can lose by adding a LHS here, can you 
>> elaborate a bit?  We would need a solution that works out of the box with 
>> our 
>> compiler in the future, i.e. without having to tweak 50 testcases again.
>
> Hi Richard,
>
> Thank for your reply !
>
> As Éric, I'm also wondering why having LHS in the existing tests would
> make us loose them. I guess I'm not familiar enough with this part of
> the testsuite and I'm missing something.

The problem is that we only enforce lane bounds via calls to
__builtin_aarch64_im_lane_boundsi.  In previous releases, the check
only happend at RTL expansion time, so the check would be skipped if
any gimple pass removed the call.  Now we do the checking during
folding, but that still misses cases.  E.g., compare the -O0 and -O1
behaviour for:

#include 

void f(int32x4_t *p0, int16x8_t *p1) {
vqdmlal_high_laneq_s16(p0[0], p1[0], p1[1], -1);
//p0[0] = vqdmlal_high_laneq_s16(p0[0], p1[0], p1[1], -1);
}

-O0 gives the error but -O1 doesn't [https://godbolt.org/z/1KosTY43T].
The -O1 behaviour here is wrong: badly-formed calls should be rejected
with a diagnostic even if the calls are unused.  Clang gets this right
in both cases [https://godbolt.org/z/EGxs8jq97].

I think keeping the lhs-free calls is important for making sure that
the -O0 behaviour doesn't regress without the DCE.

Your DCE will regress it, but that's the fault of the arm_neon.h
implementation rather than the fault of your pass.  Having the
tests but XFAILing them seems like the best way of dealing with that.
Hopefully we'll then see some progression if the arm_neon.h implementation
is improved in future.

Thanks,
Richard


Re: [PATCH v2] tree-optimization/94899: Remove "+ 0x80000000" in int comparisons

2022-02-04 Thread Richard Biener via Gcc-patches
On Thu, Feb 3, 2022 at 4:50 AM Arjun Shankar  wrote:
>
> Expressions of the form "X + CST < Y + CST" where:
>
> * CST is an unsigned integer constant with only the MSB set, and
> * X and Y's types have integer conversion ranks <= CST's
>
> can be simplified to "(signed) X < (signed) Y".
>
> This is because, assuming a 32-bit signed numbers,
> (unsigned) INT_MIN + 0x8000 is 0, and
> (unsigned) INT_MAX + 0x8000 is UINT_MAX.
>
> i.e. the result increases monotonically with signed input.
>
> This means:
> ((signed) X < (signed) Y) iff (X + 0x8000 < Y + 0x8000)

+  (op (plus:c INTEGER_CST@0 @1) (plus:c INTEGER_CST@0 @2))

INTEGER_CST are put last by canonicalization so you should write

  (op (plus @1 INTEGER_CST@0) (plus @2 INTEGER_CST@0))

and thus omit the :c.

+#define MAGIC 0x8000

I _think_ writing the constant this way requires

/* { dg-require-effective-target int32plus } */

as on a target where int is 16 bits it possibly yields a diagnostic?
I've been using explicit int32_t types w/o such effective target
in the past but I'm not sure how to write an explicit precision
constant to avoid diagnostics.

Note I didn't actually check we get a diagnostic ...

Slapping on the dg-requires-effective-target should be safe though.

Otherwise looks good now - note this has to wait for stage1 of GCC 13,
so make sure to ping then or commit then in case you have git access.

Thanks,
Richard.

> gcc/
> * match.pd (X + C < Y + C -> (signed) X < (signed) Y, if C is
> 0x8000): New simplification.
> gcc/testsuite/
> * gcc.dg/pr94899.c: New test.
> ---
>  gcc/match.pd   | 13 +
>  gcc/testsuite/gcc.dg/pr94899.c | 48 ++
>  2 files changed, 61 insertions(+)
>  create mode 100644 gcc/testsuite/gcc.dg/pr94899.c
> ---
> v1: https://gcc.gnu.org/pipermail/gcc-patches/2022-February/589557.html
>
> Notes on v2, based on Richard's review comments:
>
> 1. I removed matching on "convert", and therefore also replaced the
> removal of convert upon simplification with an explicit cast to
> signed. I originally thought this simplification only applies to
> signed operands that have been cast to unsigned, but thinking about
> it, it became clear that they do not necessarily have to be signed
> originally. The simplification is now a bit more general.
>
> 2. Removed checks for operands' types as it seems to be unnecessary. I
> hope this is correct.
>
> 3. Added unsigned types and mismatched sizes of operands to the test.
> These are now simplified.


Re: [PATCH] tree-optimization/100499 - niter analysis and multiple_of_p

2022-02-04 Thread Richard Biener via Gcc-patches
On Fri, 4 Feb 2022, Richard Sandiford wrote:

> Richard Biener  writes:
> > On Fri, 4 Feb 2022, Richard Sandiford wrote:
> >> Richard Biener via Gcc-patches  writes:
> >> > niter analysis uses multiple_of_p which currently assumes
> >> > operations like MULT_EXPR do not wrap.  We've got to rely on this
> >> > for optimizing size expressions like those in DECL_SIZE and those
> >> > generally use unsigned arithmetic with no indication that they
> >> > are not expected to wrap.  To preserve that the following adds
> >> > a parameter to multiple_of_p, defaulted to true, indicating that
> >> > the TOP expression is not expected to wrap for outer computations
> >> > in TYPE.  This mostly follows a patch proposed by Bin last year
> >> > with the conversion behavior added.
> >> >
> >> > Applying to all users the new effect is that upon type conversions
> >> > in the TOP expression the behavior will switch to honor
> >> > TYPE_OVERFLOW_UNDEFINED for the converted sub-expressions.
> >> >
> >> > The patch also changes the occurance in niter analysis that we
> >> > know is problematic and we have testcases for to pass false
> >> > to multiple_of_p.  The patch also contains a change to the
> >> > PR72817 fix from Bin to avoid regressing gcc.dg/tree-ssa/loop-42.c.
> >> >
> >> > The intent for stage1 is to introduce a size_multiple_of_p and
> >> > internalize the added parameter so all multiple_of_p users will
> >> > honor TYPE_OVERFLOW_UNDEFINED and users dealing with size expressions
> >> > need to be switched to size_multiple_of_p.
> >> >
> >> > Bootstrapped and tested on x86_64-unknown-linux-gnu with all languages
> >> > and {,-m32} testing.
> >> >
> >> > The patch applies ontop of the three earlier posted ones that touch
> >> > multiple_of_p but have not yet been reviewed/pushed.
> >> >
> >> > OK?
> >> >
> >> > Thanks,
> >> > Richard.
> >> >
> >> > 2022-01-26  Richard Biener  
> >> >
> >> >  PR tree-optimization/100499
> >> >  * fold-const.h (multiple_of_p): Add nowrap parameter, defaulted
> >> >  to true.
> >> >  * fold-const.cc (multiple_of_p): Likewise.  Honor it for
> >> >  MULT_EXPR, PLUS_EXPR and MINUS_EXPR and pass it along,
> >> >  switching to false for conversions.
> >> >  * tree-ssa-loop-niter.cc (number_of_iterations_ne): Do not
> >> >  claim the outermost expression does not wrap when calling
> >> >  multiple_of_p.  Refactor the check done to check the
> >> >  original IV, avoiding a bias that might wrap.
> >> >
> >> >  * gcc.dg/torture/pr100499-1.c: New testcase.
> >> >  * gcc.dg/torture/pr100499-2.c: Likewise.
> >> >  * gcc.dg/torture/pr100499-3.c: Likewise.
> >> >
> >> > Co-authored-by: Bin Cheng  
> >> > ---
> >> >  gcc/fold-const.cc | 81 +++
> >> >  gcc/fold-const.h  |  2 +-
> >> >  gcc/testsuite/gcc.dg/torture/pr100499-1.c | 27 
> >> >  gcc/testsuite/gcc.dg/torture/pr100499-2.c | 16 +
> >> >  gcc/testsuite/gcc.dg/torture/pr100499-3.c | 14 
> >> >  gcc/tree-ssa-loop-niter.cc| 52 ++-
> >> >  6 files changed, 131 insertions(+), 61 deletions(-)
> >> >  create mode 100644 gcc/testsuite/gcc.dg/torture/pr100499-1.c
> >> >  create mode 100644 gcc/testsuite/gcc.dg/torture/pr100499-2.c
> >> >  create mode 100644 gcc/testsuite/gcc.dg/torture/pr100499-3.c
> >> >
> >> > diff --git a/gcc/fold-const.cc b/gcc/fold-const.cc
> >> > index a0a4913c45e..7c204fb6265 100644
> >> > --- a/gcc/fold-const.cc
> >> > +++ b/gcc/fold-const.cc
> >> > @@ -14062,10 +14062,16 @@ fold_binary_initializer_loc (location_t loc, 
> >> > tree_code code, tree type,
> >> >   SAVE_EXPR (I) * SAVE_EXPR (J)
> >> >  
> >> > (where the same SAVE_EXPR (J) is used in the original and the
> >> > -   transformed version).  */
> >> > +   transformed version).
> >> > +
> >> > +   NOWRAP specifies whether all outer operations in TYPE should
> >> > +   be considered not wrapping.  Any type conversion within TOP acts
> >> > +   as a barrier and we will fall back to NOWRAP being false.
> >> > +   NOWRAP is mostly used to treat expressions in TYPE_SIZE and friends
> >> > +   as not wrapping even though they are generally using unsigned 
> >> > arithmetic.  */
> >> >  
> >> >  int
> >> > -multiple_of_p (tree type, const_tree top, const_tree bottom)
> >> > +multiple_of_p (tree type, const_tree top, const_tree bottom, bool 
> >> > nowrap)
> >> >  {
> >> >gimple *stmt;
> >> >tree op1, op2;
> >> > @@ -14083,10 +14089,17 @@ multiple_of_p (tree type, const_tree top, 
> >> > const_tree bottom)
> >> >   a multiple of BOTTOM then TOP is a multiple of BOTTOM.  */
> >> >if (!integer_pow2p (bottom))
> >> >  return 0;
> >> > -  return (multiple_of_p (type, TREE_OPERAND (top, 1), bottom)
> >> > -  || multiple_of_p (type, TREE_OPERAND (top, 0), bottom));
> >> > +  return (multiple_of_p (type, TREE_OPERAND (top, 1), bottom, 
> >> > nowrap)
> >> > +  || multiple_of_p (type, TREE_OPERAND (top, 

Re: [PATCH] tree-optimization/100499 - niter analysis and multiple_of_p

2022-02-04 Thread Richard Sandiford via Gcc-patches
Richard Biener  writes:
> On Fri, 4 Feb 2022, Richard Sandiford wrote:
>> Richard Biener via Gcc-patches  writes:
>> > niter analysis uses multiple_of_p which currently assumes
>> > operations like MULT_EXPR do not wrap.  We've got to rely on this
>> > for optimizing size expressions like those in DECL_SIZE and those
>> > generally use unsigned arithmetic with no indication that they
>> > are not expected to wrap.  To preserve that the following adds
>> > a parameter to multiple_of_p, defaulted to true, indicating that
>> > the TOP expression is not expected to wrap for outer computations
>> > in TYPE.  This mostly follows a patch proposed by Bin last year
>> > with the conversion behavior added.
>> >
>> > Applying to all users the new effect is that upon type conversions
>> > in the TOP expression the behavior will switch to honor
>> > TYPE_OVERFLOW_UNDEFINED for the converted sub-expressions.
>> >
>> > The patch also changes the occurance in niter analysis that we
>> > know is problematic and we have testcases for to pass false
>> > to multiple_of_p.  The patch also contains a change to the
>> > PR72817 fix from Bin to avoid regressing gcc.dg/tree-ssa/loop-42.c.
>> >
>> > The intent for stage1 is to introduce a size_multiple_of_p and
>> > internalize the added parameter so all multiple_of_p users will
>> > honor TYPE_OVERFLOW_UNDEFINED and users dealing with size expressions
>> > need to be switched to size_multiple_of_p.
>> >
>> > Bootstrapped and tested on x86_64-unknown-linux-gnu with all languages
>> > and {,-m32} testing.
>> >
>> > The patch applies ontop of the three earlier posted ones that touch
>> > multiple_of_p but have not yet been reviewed/pushed.
>> >
>> > OK?
>> >
>> > Thanks,
>> > Richard.
>> >
>> > 2022-01-26  Richard Biener  
>> >
>> >PR tree-optimization/100499
>> >* fold-const.h (multiple_of_p): Add nowrap parameter, defaulted
>> >to true.
>> >* fold-const.cc (multiple_of_p): Likewise.  Honor it for
>> >MULT_EXPR, PLUS_EXPR and MINUS_EXPR and pass it along,
>> >switching to false for conversions.
>> >* tree-ssa-loop-niter.cc (number_of_iterations_ne): Do not
>> >claim the outermost expression does not wrap when calling
>> >multiple_of_p.  Refactor the check done to check the
>> >original IV, avoiding a bias that might wrap.
>> >
>> >* gcc.dg/torture/pr100499-1.c: New testcase.
>> >* gcc.dg/torture/pr100499-2.c: Likewise.
>> >* gcc.dg/torture/pr100499-3.c: Likewise.
>> >
>> > Co-authored-by: Bin Cheng  
>> > ---
>> >  gcc/fold-const.cc | 81 +++
>> >  gcc/fold-const.h  |  2 +-
>> >  gcc/testsuite/gcc.dg/torture/pr100499-1.c | 27 
>> >  gcc/testsuite/gcc.dg/torture/pr100499-2.c | 16 +
>> >  gcc/testsuite/gcc.dg/torture/pr100499-3.c | 14 
>> >  gcc/tree-ssa-loop-niter.cc| 52 ++-
>> >  6 files changed, 131 insertions(+), 61 deletions(-)
>> >  create mode 100644 gcc/testsuite/gcc.dg/torture/pr100499-1.c
>> >  create mode 100644 gcc/testsuite/gcc.dg/torture/pr100499-2.c
>> >  create mode 100644 gcc/testsuite/gcc.dg/torture/pr100499-3.c
>> >
>> > diff --git a/gcc/fold-const.cc b/gcc/fold-const.cc
>> > index a0a4913c45e..7c204fb6265 100644
>> > --- a/gcc/fold-const.cc
>> > +++ b/gcc/fold-const.cc
>> > @@ -14062,10 +14062,16 @@ fold_binary_initializer_loc (location_t loc, 
>> > tree_code code, tree type,
>> >   SAVE_EXPR (I) * SAVE_EXPR (J)
>> >  
>> > (where the same SAVE_EXPR (J) is used in the original and the
>> > -   transformed version).  */
>> > +   transformed version).
>> > +
>> > +   NOWRAP specifies whether all outer operations in TYPE should
>> > +   be considered not wrapping.  Any type conversion within TOP acts
>> > +   as a barrier and we will fall back to NOWRAP being false.
>> > +   NOWRAP is mostly used to treat expressions in TYPE_SIZE and friends
>> > +   as not wrapping even though they are generally using unsigned 
>> > arithmetic.  */
>> >  
>> >  int
>> > -multiple_of_p (tree type, const_tree top, const_tree bottom)
>> > +multiple_of_p (tree type, const_tree top, const_tree bottom, bool nowrap)
>> >  {
>> >gimple *stmt;
>> >tree op1, op2;
>> > @@ -14083,10 +14089,17 @@ multiple_of_p (tree type, const_tree top, 
>> > const_tree bottom)
>> > a multiple of BOTTOM then TOP is a multiple of BOTTOM.  */
>> >if (!integer_pow2p (bottom))
>> >return 0;
>> > -  return (multiple_of_p (type, TREE_OPERAND (top, 1), bottom)
>> > -|| multiple_of_p (type, TREE_OPERAND (top, 0), bottom));
>> > +  return (multiple_of_p (type, TREE_OPERAND (top, 1), bottom, nowrap)
>> > +|| multiple_of_p (type, TREE_OPERAND (top, 0), bottom, nowrap));
>> >  
>> >  case MULT_EXPR:
>> > +  /* If the multiplication can wrap we cannot recurse further unless
>> > +   the second operand is a power of two which is where wrapping
>> > +   does not matter.  */
>> > +  if (!nowrap
>> > 

Re: [patch] Fix PR debug/104366

2022-02-04 Thread Richard Biener via Gcc-patches
On Fri, Feb 4, 2022 at 11:41 AM Eric Botcazou via Gcc-patches
 wrote:
>
> Hi,
>
> this completes my fix for PR debug/101947 by emptying the base_types vector
> before (re)populating it.
>
> Tested on x86_64-suse-linux, OK for the mainline?

OK.

>
> 2022-02-04  Eric Botcazou  
>
> PR debug/104366
> * dwarf2out.cc (dwarf2out_finish): Empty base_types.
> (dwarf2out_early_finish): Likewise.
>
> --
> Eric Botcazou


RE: [PATCH] PR target/104345: Use nvptx "set" instruction for cond ? -1 : 0.

2022-02-04 Thread Roger Sayle


Hi Thomas,

Very many thanks for your help investigating this problem.

> > This patch addresses the "increased register pressure" regression
> > on nvptx-none caused by my change to transition the backend to
> > a STORE_FLAG_VALUE = 1 target.
> 
> Yes, "addresses", but unfortunately doesn't "resolve".  ;-|

Doh!

> I'm confirming the improved code generation (less registers used, less
> instructions emitted) in cases where it triggers -- but unfortunately it
> doesn't in the PR104345 'libgomp.oacc-c-c++-common/reduction-cplx-dbl.c'
> scenario.

Looking over the nvptx code currently generated for reduction-cplx-dbl.c,
it appears nearly optimal and it's difficult to see what could have regressed.
[It makes almost no uses of Boolean types, so is relatively unaffected
by a STORE_FLAG_VALUE change].  One remaining possibility is that
the "register usage" regression is not in reduction-cplx-dbl.c itself but
in __muldc3 in libgcc.a.  [I believe kernel resource usage is computed
including all called functions].

Might this be easy to test on your configuration, moving libgcc.a from
one build to another?

If it is __muldc3 regressing, then the other nvptx patches mentioned
previously, and perhaps even improvements to isnan and isinf, may help.

Again apologies that the "using nvptx set.?32 for "cond ? -1: 0" patch, that
catches many of the issues observed in your initial PR analysis, isn't actually
the root cause of this particular case.

Thanks again,
Roger
--




Re: [PATCH] PR/101135 - Load of null pointer when passing absent assumed-shape array argument for an optional dummy argument

2022-02-04 Thread Mikael Morin

Hello,

Le 29/01/2022 à 22:41, Harald Anlauf via Fortran a écrit :

Dear Fortranners,

compiling with -fsanitize=undefined shows that we did mishandle the
case where a missing optional argument is passed to another procedure.

Besides the example given in the PR, the existing testcase
fortran.dg/missing_optional_dummy_6a.f90 fails with:

gcc/testsuite/gfortran.dg/missing_optional_dummy_6a.f90:21:29: runtime error: 
load of null pointer of type 'integer(kind=4)'
gcc/testsuite/gfortran.dg/missing_optional_dummy_6a.f90:22:30: runtime error: 
load of null pointer of type 'integer(kind=4)'
gcc/testsuite/gfortran.dg/missing_optional_dummy_6a.f90:27:29: runtime error: 
load of null pointer of type 'integer(kind=4)'

The least invasive change - already pointed out by the reporter - is
to check the presence of the argument before dereferencing the data
pointer after the offset calculation.  This requires adjusting the
checking pattern for gfortran.dg/missing_optional_dummy_6a.f90.

Regtesting reminded me that procedures with bind(c) attribute are doing
their own stuff, which is why they need to be excluded here, otherwise
testcase bind-c-contiguous-4.f90 would regress on the expected output.

I've created a testcase that uses this PR's input as well as the lesson
learned from studying the bind(c) testcase and placed this in the asan
subdirectory.

There is a potential alternative solution which I did not pursue, as I
think it is more invasive, but also that I didn't succeed to implement:
A non-present dummy array argument should not need to get its descriptor
set up.  Pursuing this is probably not the right thing to do during the
current stage of development and could be implemented later.  If somebody
believes this is important, feel free to open a PR for this.

I have an other (equally unimportant) concern that it may create an 
unnecessary conditional when passing a subobject of an optional 
argument.  In that case we can assume that the optional is present.

It’s not a correctness issue, so let’s not bother at this stage.


Regtested on x86_64-pc-linux-gnu.  OK for mainline?


OK.

Thanks.


Re: [PATCH] tree-optimization/100499 - niter analysis and multiple_of_p

2022-02-04 Thread Richard Biener via Gcc-patches
On Fri, 4 Feb 2022, Richard Sandiford wrote:

> Richard Biener via Gcc-patches  writes:
> > niter analysis uses multiple_of_p which currently assumes
> > operations like MULT_EXPR do not wrap.  We've got to rely on this
> > for optimizing size expressions like those in DECL_SIZE and those
> > generally use unsigned arithmetic with no indication that they
> > are not expected to wrap.  To preserve that the following adds
> > a parameter to multiple_of_p, defaulted to true, indicating that
> > the TOP expression is not expected to wrap for outer computations
> > in TYPE.  This mostly follows a patch proposed by Bin last year
> > with the conversion behavior added.
> >
> > Applying to all users the new effect is that upon type conversions
> > in the TOP expression the behavior will switch to honor
> > TYPE_OVERFLOW_UNDEFINED for the converted sub-expressions.
> >
> > The patch also changes the occurance in niter analysis that we
> > know is problematic and we have testcases for to pass false
> > to multiple_of_p.  The patch also contains a change to the
> > PR72817 fix from Bin to avoid regressing gcc.dg/tree-ssa/loop-42.c.
> >
> > The intent for stage1 is to introduce a size_multiple_of_p and
> > internalize the added parameter so all multiple_of_p users will
> > honor TYPE_OVERFLOW_UNDEFINED and users dealing with size expressions
> > need to be switched to size_multiple_of_p.
> >
> > Bootstrapped and tested on x86_64-unknown-linux-gnu with all languages
> > and {,-m32} testing.
> >
> > The patch applies ontop of the three earlier posted ones that touch
> > multiple_of_p but have not yet been reviewed/pushed.
> >
> > OK?
> >
> > Thanks,
> > Richard.
> >
> > 2022-01-26  Richard Biener  
> >
> > PR tree-optimization/100499
> > * fold-const.h (multiple_of_p): Add nowrap parameter, defaulted
> > to true.
> > * fold-const.cc (multiple_of_p): Likewise.  Honor it for
> > MULT_EXPR, PLUS_EXPR and MINUS_EXPR and pass it along,
> > switching to false for conversions.
> > * tree-ssa-loop-niter.cc (number_of_iterations_ne): Do not
> > claim the outermost expression does not wrap when calling
> > multiple_of_p.  Refactor the check done to check the
> > original IV, avoiding a bias that might wrap.
> >
> > * gcc.dg/torture/pr100499-1.c: New testcase.
> > * gcc.dg/torture/pr100499-2.c: Likewise.
> > * gcc.dg/torture/pr100499-3.c: Likewise.
> >
> > Co-authored-by: Bin Cheng  
> > ---
> >  gcc/fold-const.cc | 81 +++
> >  gcc/fold-const.h  |  2 +-
> >  gcc/testsuite/gcc.dg/torture/pr100499-1.c | 27 
> >  gcc/testsuite/gcc.dg/torture/pr100499-2.c | 16 +
> >  gcc/testsuite/gcc.dg/torture/pr100499-3.c | 14 
> >  gcc/tree-ssa-loop-niter.cc| 52 ++-
> >  6 files changed, 131 insertions(+), 61 deletions(-)
> >  create mode 100644 gcc/testsuite/gcc.dg/torture/pr100499-1.c
> >  create mode 100644 gcc/testsuite/gcc.dg/torture/pr100499-2.c
> >  create mode 100644 gcc/testsuite/gcc.dg/torture/pr100499-3.c
> >
> > diff --git a/gcc/fold-const.cc b/gcc/fold-const.cc
> > index a0a4913c45e..7c204fb6265 100644
> > --- a/gcc/fold-const.cc
> > +++ b/gcc/fold-const.cc
> > @@ -14062,10 +14062,16 @@ fold_binary_initializer_loc (location_t loc, 
> > tree_code code, tree type,
> >   SAVE_EXPR (I) * SAVE_EXPR (J)
> >  
> > (where the same SAVE_EXPR (J) is used in the original and the
> > -   transformed version).  */
> > +   transformed version).
> > +
> > +   NOWRAP specifies whether all outer operations in TYPE should
> > +   be considered not wrapping.  Any type conversion within TOP acts
> > +   as a barrier and we will fall back to NOWRAP being false.
> > +   NOWRAP is mostly used to treat expressions in TYPE_SIZE and friends
> > +   as not wrapping even though they are generally using unsigned 
> > arithmetic.  */
> >  
> >  int
> > -multiple_of_p (tree type, const_tree top, const_tree bottom)
> > +multiple_of_p (tree type, const_tree top, const_tree bottom, bool nowrap)
> >  {
> >gimple *stmt;
> >tree op1, op2;
> > @@ -14083,10 +14089,17 @@ multiple_of_p (tree type, const_tree top, 
> > const_tree bottom)
> >  a multiple of BOTTOM then TOP is a multiple of BOTTOM.  */
> >if (!integer_pow2p (bottom))
> > return 0;
> > -  return (multiple_of_p (type, TREE_OPERAND (top, 1), bottom)
> > - || multiple_of_p (type, TREE_OPERAND (top, 0), bottom));
> > +  return (multiple_of_p (type, TREE_OPERAND (top, 1), bottom, nowrap)
> > + || multiple_of_p (type, TREE_OPERAND (top, 0), bottom, nowrap));
> >  
> >  case MULT_EXPR:
> > +  /* If the multiplication can wrap we cannot recurse further unless
> > +the second operand is a power of two which is where wrapping
> > +does not matter.  */
> > +  if (!nowrap
> > + && !TYPE_OVERFLOW_UNDEFINED (type)
> > + && !integer_pow2p (TREE_OPERAND (top, 1)))
> > +   return 0;
> 

[patch] Fix PR debug/104366

2022-02-04 Thread Eric Botcazou via Gcc-patches
Hi,

this completes my fix for PR debug/101947 by emptying the base_types vector 
before (re)populating it.

Tested on x86_64-suse-linux, OK for the mainline?


2022-02-04  Eric Botcazou  

PR debug/104366
* dwarf2out.cc (dwarf2out_finish): Empty base_types.
(dwarf2out_early_finish): Likewise.

-- 
Eric Botcazoudiff --git a/gcc/dwarf2out.cc b/gcc/dwarf2out.cc
index e60575b1398..d1e8654e4d7 100644
--- a/gcc/dwarf2out.cc
+++ b/gcc/dwarf2out.cc
@@ -32155,6 +32155,7 @@ dwarf2out_finish (const char *filename)
 FOR_EACH_CHILD (die, c, gcc_assert (! c->die_mark));
   }
 #endif
+  base_types.truncate (0);
   for (ctnode = comdat_type_list; ctnode != NULL; ctnode = ctnode->next)
 resolve_addr (ctnode->root_die);
   resolve_addr (comp_unit_die ());
@@ -32999,6 +33000,7 @@ dwarf2out_early_finish (const char *filename)
  location related output removed and some LTO specific changes.
  Some refactoring might make both smaller and easier to match up.  */
 
+  base_types.truncate (0);
   for (ctnode = comdat_type_list; ctnode != NULL; ctnode = ctnode->next)
 mark_base_types (ctnode->root_die);
   mark_base_types (comp_unit_die ());


Re: [PATCH] tree-optimization/100499 - niter analysis and multiple_of_p

2022-02-04 Thread Richard Sandiford via Gcc-patches
Richard Biener via Gcc-patches  writes:
> niter analysis uses multiple_of_p which currently assumes
> operations like MULT_EXPR do not wrap.  We've got to rely on this
> for optimizing size expressions like those in DECL_SIZE and those
> generally use unsigned arithmetic with no indication that they
> are not expected to wrap.  To preserve that the following adds
> a parameter to multiple_of_p, defaulted to true, indicating that
> the TOP expression is not expected to wrap for outer computations
> in TYPE.  This mostly follows a patch proposed by Bin last year
> with the conversion behavior added.
>
> Applying to all users the new effect is that upon type conversions
> in the TOP expression the behavior will switch to honor
> TYPE_OVERFLOW_UNDEFINED for the converted sub-expressions.
>
> The patch also changes the occurance in niter analysis that we
> know is problematic and we have testcases for to pass false
> to multiple_of_p.  The patch also contains a change to the
> PR72817 fix from Bin to avoid regressing gcc.dg/tree-ssa/loop-42.c.
>
> The intent for stage1 is to introduce a size_multiple_of_p and
> internalize the added parameter so all multiple_of_p users will
> honor TYPE_OVERFLOW_UNDEFINED and users dealing with size expressions
> need to be switched to size_multiple_of_p.
>
> Bootstrapped and tested on x86_64-unknown-linux-gnu with all languages
> and {,-m32} testing.
>
> The patch applies ontop of the three earlier posted ones that touch
> multiple_of_p but have not yet been reviewed/pushed.
>
> OK?
>
> Thanks,
> Richard.
>
> 2022-01-26  Richard Biener  
>
>   PR tree-optimization/100499
>   * fold-const.h (multiple_of_p): Add nowrap parameter, defaulted
>   to true.
>   * fold-const.cc (multiple_of_p): Likewise.  Honor it for
>   MULT_EXPR, PLUS_EXPR and MINUS_EXPR and pass it along,
>   switching to false for conversions.
>   * tree-ssa-loop-niter.cc (number_of_iterations_ne): Do not
>   claim the outermost expression does not wrap when calling
>   multiple_of_p.  Refactor the check done to check the
>   original IV, avoiding a bias that might wrap.
>
>   * gcc.dg/torture/pr100499-1.c: New testcase.
>   * gcc.dg/torture/pr100499-2.c: Likewise.
>   * gcc.dg/torture/pr100499-3.c: Likewise.
>
> Co-authored-by: Bin Cheng  
> ---
>  gcc/fold-const.cc | 81 +++
>  gcc/fold-const.h  |  2 +-
>  gcc/testsuite/gcc.dg/torture/pr100499-1.c | 27 
>  gcc/testsuite/gcc.dg/torture/pr100499-2.c | 16 +
>  gcc/testsuite/gcc.dg/torture/pr100499-3.c | 14 
>  gcc/tree-ssa-loop-niter.cc| 52 ++-
>  6 files changed, 131 insertions(+), 61 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.dg/torture/pr100499-1.c
>  create mode 100644 gcc/testsuite/gcc.dg/torture/pr100499-2.c
>  create mode 100644 gcc/testsuite/gcc.dg/torture/pr100499-3.c
>
> diff --git a/gcc/fold-const.cc b/gcc/fold-const.cc
> index a0a4913c45e..7c204fb6265 100644
> --- a/gcc/fold-const.cc
> +++ b/gcc/fold-const.cc
> @@ -14062,10 +14062,16 @@ fold_binary_initializer_loc (location_t loc, 
> tree_code code, tree type,
>   SAVE_EXPR (I) * SAVE_EXPR (J)
>  
> (where the same SAVE_EXPR (J) is used in the original and the
> -   transformed version).  */
> +   transformed version).
> +
> +   NOWRAP specifies whether all outer operations in TYPE should
> +   be considered not wrapping.  Any type conversion within TOP acts
> +   as a barrier and we will fall back to NOWRAP being false.
> +   NOWRAP is mostly used to treat expressions in TYPE_SIZE and friends
> +   as not wrapping even though they are generally using unsigned arithmetic. 
>  */
>  
>  int
> -multiple_of_p (tree type, const_tree top, const_tree bottom)
> +multiple_of_p (tree type, const_tree top, const_tree bottom, bool nowrap)
>  {
>gimple *stmt;
>tree op1, op2;
> @@ -14083,10 +14089,17 @@ multiple_of_p (tree type, const_tree top, 
> const_tree bottom)
>a multiple of BOTTOM then TOP is a multiple of BOTTOM.  */
>if (!integer_pow2p (bottom))
>   return 0;
> -  return (multiple_of_p (type, TREE_OPERAND (top, 1), bottom)
> -   || multiple_of_p (type, TREE_OPERAND (top, 0), bottom));
> +  return (multiple_of_p (type, TREE_OPERAND (top, 1), bottom, nowrap)
> +   || multiple_of_p (type, TREE_OPERAND (top, 0), bottom, nowrap));
>  
>  case MULT_EXPR:
> +  /* If the multiplication can wrap we cannot recurse further unless
> +  the second operand is a power of two which is where wrapping
> +  does not matter.  */
> +  if (!nowrap
> +   && !TYPE_OVERFLOW_UNDEFINED (type)
> +   && !integer_pow2p (TREE_OPERAND (top, 1)))
> + return 0;

I think I'm missing something, but isn't the key thing whether bottom
is a power of 2?  E.g. as it stands it looks like we'd still say that a
wrapping x * 2 is a multiple of 3 based purely on x being a multiple of 3,

Re: [PATCH] match.pd: Fix up 1 / X for unsigned X optimization [PR104280]

2022-02-04 Thread Jakub Jelinek via Gcc-patches
On Fri, Feb 04, 2022 at 11:26:30AM +0100, Jakub Jelinek via Gcc-patches wrote:
> On Fri, Feb 04, 2022 at 11:14:05AM +0100, Eric Botcazou wrote:
> > > > --- a/gcc/match.pd
> > > > +++ b/gcc/match.pd
> > > > @@ -401,27 +401,35 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> > > > 
> > > >   /* X / bool_range_Y is X.  */
> > > >   (simplify
> > > >   
> > > >(div @0 SSA_NAME@1)
> > > > 
> > > > -  (if (INTEGRAL_TYPE_P (type) && ssa_name_has_boolean_range (@1))
> > > > +  (if (INTEGRAL_TYPE_P (type)
> > > > +   && ssa_name_has_boolean_range (@1)
> > > > +   && !flag_non_call_exceptions)
> > > 
> > > ssa_name_has_boolean_range call is certainly more expensive than
> > > !flag_non_call_exceptions check, can you swap those two?
> > 
> > But !flag_non_call_exceptions is (almost) always true for the C family of 
> > languages, so you're going to penalize them by doing this.
> 
> True, but much less so than the other order penalizing Ada/Go.
> 
> > > And similarly, TYPE_PRECISION (type) > 1 check is very cheap, can
> > > it be done before the && !integer_zerop (@1) line?
> > 
> > Yes, it clearly belongs there.

Anyway, not a big deal.

Jakub



Re: [PATCH] match.pd: Fix up 1 / X for unsigned X optimization [PR104280]

2022-02-04 Thread Jakub Jelinek via Gcc-patches
On Fri, Feb 04, 2022 at 11:14:05AM +0100, Eric Botcazou wrote:
> > > --- a/gcc/match.pd
> > > +++ b/gcc/match.pd
> > > @@ -401,27 +401,35 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> > > 
> > >   /* X / bool_range_Y is X.  */
> > >   (simplify
> > >   
> > >(div @0 SSA_NAME@1)
> > > 
> > > -  (if (INTEGRAL_TYPE_P (type) && ssa_name_has_boolean_range (@1))
> > > +  (if (INTEGRAL_TYPE_P (type)
> > > +   && ssa_name_has_boolean_range (@1)
> > > +   && !flag_non_call_exceptions)
> > 
> > ssa_name_has_boolean_range call is certainly more expensive than
> > !flag_non_call_exceptions check, can you swap those two?
> 
> But !flag_non_call_exceptions is (almost) always true for the C family of 
> languages, so you're going to penalize them by doing this.

True, but much less so than the other order penalizing Ada/Go.

> > And similarly, TYPE_PRECISION (type) > 1 check is very cheap, can
> > it be done before the && !integer_zerop (@1) line?
> 
> Yes, it clearly belongs there.

Jakub



Re: [PATCH] Adjust LSHIFT_EXPR handling of multiple_of_p

2022-02-04 Thread Richard Biener via Gcc-patches
On Fri, 4 Feb 2022, Richard Sandiford wrote:

> Richard Biener  writes:
> > This removes the odd check of size_type_node when handling left-shifts
> > as multiplications of 1 << N and instead uses the type as specified.
> > It also moves left-shift handling next to multiplications where it
> > semantically belongs.
> >
> > Boostrap and regtest pending on x86_64-unknown-linux-gnu.
> >
> > OK?  (I failed to short-cut the wide_int_to_tree for a
> > poly_int_cst_p bottom)
> >
> > Thanks,
> > Richard.
> >
> > 2022-01-24  Richard Biener  
> >
> > * fold-const.cc (multiple_of_p): Re-write and move LSHIFT_EXPR
> > handling.
> > ---
> >  gcc/fold-const.cc | 33 -
> >  1 file changed, 16 insertions(+), 17 deletions(-)
> >
> > diff --git a/gcc/fold-const.cc b/gcc/fold-const.cc
> > index b155611578d..a0a4913c45e 100644
> > --- a/gcc/fold-const.cc
> > +++ b/gcc/fold-const.cc
> > @@ -14068,7 +14068,7 @@ int
> >  multiple_of_p (tree type, const_tree top, const_tree bottom)
> >  {
> >gimple *stmt;
> > -  tree t1, op1, op2;
> > +  tree op1, op2;
> >  
> >if (operand_equal_p (top, bottom, 0))
> >  return 1;
> > @@ -14114,6 +14114,21 @@ multiple_of_p (tree type, const_tree top, 
> > const_tree bottom)
> >return (multiple_of_p (type, TREE_OPERAND (top, 1), bottom)
> >   || multiple_of_p (type, TREE_OPERAND (top, 0), bottom));
> >  
> > +case LSHIFT_EXPR:
> > +  /* Handle X << CST as X * (1 << CST) and only process the constant.  
> > */
> > +  if (TREE_CODE (TREE_OPERAND (top, 1)) == INTEGER_CST)
> > +   {
> > + op1 = TREE_OPERAND (top, 1);
> > + if (wi::gtu_p (TYPE_PRECISION (type), wi::to_wide (op1)))
> > +   {
> > + wide_int mul_op
> > +   = wi::one (TYPE_PRECISION (type)) << wi::to_wide (op1);
> > + return multiple_of_p (type,
> > +   wide_int_to_tree (type, mul_op), bottom);
> > +   }
> > +   }
> > +  return 0;
> > +
> 
> LGTM.  Sorry for the slow response.
> 
> I guess the condition could be written:
> 
> if (wi::to_widest (op1) < TYPE_PRECISION (type))
> 
> which might be more readable, and also avoids accidentally reinterpreting
> a sign.

Good idea - I'll adjust, re-test and push.

Thanks,
Richard.


Re: [PATCH] match.pd: Fix up 1 / X for unsigned X optimization [PR104280]

2022-02-04 Thread Eric Botcazou via Gcc-patches
> > --- a/gcc/match.pd
> > +++ b/gcc/match.pd
> > @@ -401,27 +401,35 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> > 
> >   /* X / bool_range_Y is X.  */
> >   (simplify
> >   
> >(div @0 SSA_NAME@1)
> > 
> > -  (if (INTEGRAL_TYPE_P (type) && ssa_name_has_boolean_range (@1))
> > +  (if (INTEGRAL_TYPE_P (type)
> > +   && ssa_name_has_boolean_range (@1)
> > +   && !flag_non_call_exceptions)
> 
> ssa_name_has_boolean_range call is certainly more expensive than
> !flag_non_call_exceptions check, can you swap those two?

But !flag_non_call_exceptions is (almost) always true for the C family of 
languages, so you're going to penalize them by doing this.

> And similarly, TYPE_PRECISION (type) > 1 check is very cheap, can
> it be done before the && !integer_zerop (@1) line?

Yes, it clearly belongs there.

-- 
Eric Botcazou





Re: [PATCH] Adjust LSHIFT_EXPR handling of multiple_of_p

2022-02-04 Thread Richard Sandiford via Gcc-patches
Richard Biener  writes:
> This removes the odd check of size_type_node when handling left-shifts
> as multiplications of 1 << N and instead uses the type as specified.
> It also moves left-shift handling next to multiplications where it
> semantically belongs.
>
> Boostrap and regtest pending on x86_64-unknown-linux-gnu.
>
> OK?  (I failed to short-cut the wide_int_to_tree for a
> poly_int_cst_p bottom)
>
> Thanks,
> Richard.
>
> 2022-01-24  Richard Biener  
>
>   * fold-const.cc (multiple_of_p): Re-write and move LSHIFT_EXPR
>   handling.
> ---
>  gcc/fold-const.cc | 33 -
>  1 file changed, 16 insertions(+), 17 deletions(-)
>
> diff --git a/gcc/fold-const.cc b/gcc/fold-const.cc
> index b155611578d..a0a4913c45e 100644
> --- a/gcc/fold-const.cc
> +++ b/gcc/fold-const.cc
> @@ -14068,7 +14068,7 @@ int
>  multiple_of_p (tree type, const_tree top, const_tree bottom)
>  {
>gimple *stmt;
> -  tree t1, op1, op2;
> +  tree op1, op2;
>  
>if (operand_equal_p (top, bottom, 0))
>  return 1;
> @@ -14114,6 +14114,21 @@ multiple_of_p (tree type, const_tree top, const_tree 
> bottom)
>return (multiple_of_p (type, TREE_OPERAND (top, 1), bottom)
> || multiple_of_p (type, TREE_OPERAND (top, 0), bottom));
>  
> +case LSHIFT_EXPR:
> +  /* Handle X << CST as X * (1 << CST) and only process the constant.  */
> +  if (TREE_CODE (TREE_OPERAND (top, 1)) == INTEGER_CST)
> + {
> +   op1 = TREE_OPERAND (top, 1);
> +   if (wi::gtu_p (TYPE_PRECISION (type), wi::to_wide (op1)))
> + {
> +   wide_int mul_op
> + = wi::one (TYPE_PRECISION (type)) << wi::to_wide (op1);
> +   return multiple_of_p (type,
> + wide_int_to_tree (type, mul_op), bottom);
> + }
> + }
> +  return 0;
> +

LGTM.  Sorry for the slow response.

I guess the condition could be written:

if (wi::to_widest (op1) < TYPE_PRECISION (type))

which might be more readable, and also avoids accidentally reinterpreting
a sign.

Thanks,
Richard

>  case MINUS_EXPR:
>/* It is impossible to prove if op0 - op1 is multiple of bottom
>precisely, so be conservative here checking if both op0 and op1
> @@ -14133,22 +14148,6 @@ multiple_of_p (tree type, const_tree top, const_tree 
> bottom)
>return (multiple_of_p (type, op1, bottom)
> && multiple_of_p (type, TREE_OPERAND (top, 0), bottom));
>  
> -case LSHIFT_EXPR:
> -  if (TREE_CODE (TREE_OPERAND (top, 1)) == INTEGER_CST)
> - {
> -   op1 = TREE_OPERAND (top, 1);
> -   /* const_binop may not detect overflow correctly,
> -  so check for it explicitly here.  */
> -   if (wi::gtu_p (TYPE_PRECISION (TREE_TYPE (size_one_node)),
> -  wi::to_wide (op1))
> -   && (t1 = fold_convert (type,
> -  const_binop (LSHIFT_EXPR, size_one_node,
> -   op1))) != 0
> -   && !TREE_OVERFLOW (t1))
> - return multiple_of_p (type, t1, bottom);
> - }
> -  return 0;
> -
>  CASE_CONVERT:
>/* Can't handle conversions from non-integral or wider integral type.  
> */
>if ((TREE_CODE (TREE_TYPE (TREE_OPERAND (top, 0))) != INTEGER_TYPE)


Re: [PATCH] match.pd: Fix up 1 / X for unsigned X optimization [PR104280]

2022-02-04 Thread Jakub Jelinek via Gcc-patches
On Fri, Feb 04, 2022 at 10:53:22AM +0100, Eric Botcazou wrote:
> > Well, yes, we have to fix it.
> 
> Here's the fix we agreed upon in the audit trail, OK for the mainline?
> 
>   PR tree-optimization/104356
>   * match.pd (X / bool_range_Y is X): Add guard.
>   (X / X is one): Likewise.
>   (X / abs (X) is X < 0 ? -1 : 1): Likewise.
>   (X / -X is -1): Likewise.
>   (1 / X -> X == 1): Likewise.

Looks mostly good, just a few nits.

> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -401,27 +401,35 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>   /* X / bool_range_Y is X.  */ 
>   (simplify
>(div @0 SSA_NAME@1)
> -  (if (INTEGRAL_TYPE_P (type) && ssa_name_has_boolean_range (@1))
> +  (if (INTEGRAL_TYPE_P (type)
> +   && ssa_name_has_boolean_range (@1)
> +   && !flag_non_call_exceptions)

ssa_name_has_boolean_range call is certainly more expensive than
!flag_non_call_exceptions check, can you swap those two?

> @@ -444,6 +452,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>   (trunc_div integer_onep@0 @1)
>   (if (INTEGRAL_TYPE_P (type)
>&& !integer_zerop (@1)
> +  && (!flag_non_call_exceptions || tree_expr_nonzero_p (@1))
>&& TYPE_PRECISION (type) > 1)

And similarly, TYPE_PRECISION (type) > 1 check is very cheap, can
it be done before the && !integer_zerop (@1) line?
I admit this one is already preexisting, but tree_expr_nonzero_p
can be quite expensive.

Otherwise LGTM.

Jakub



Re: [PATCH v3 07/15] arm: Implement MVE predicates as vectors of booleans

2022-02-04 Thread Richard Sandiford via Gcc-patches
Richard Sandiford via Gcc-patches  writes:
> The main thing that makes truth vector types special is that those
> types are the only ones that allow multiple elements in the same byte.
> A “normal” 16-byte vector created by build_vector_type(_for_mode)
> cannot be smaller than 16 bytes.

Er, of course I meant “16-element vector created by...”.  16-byte
vectors that are smaller than 16 bytes would indeed be a problem.

Thanks,
Richard


Re: [PATCH] match.pd: Fix up 1 / X for unsigned X optimization [PR104280]

2022-02-04 Thread Eric Botcazou via Gcc-patches
> Well, yes, we have to fix it.

Here's the fix we agreed upon in the audit trail, OK for the mainline?

PR tree-optimization/104356
* match.pd (X / bool_range_Y is X): Add guard.
(X / X is one): Likewise.
(X / abs (X) is X < 0 ? -1 : 1): Likewise.
(X / -X is -1): Likewise.
(1 / X -> X == 1): Likewise.

-- 
Eric Botcazou
diff --git a/gcc/match.pd b/gcc/match.pd
index b942cb2930a..4b695db7a25 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -401,27 +401,35 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
  /* X / bool_range_Y is X.  */ 
  (simplify
   (div @0 SSA_NAME@1)
-  (if (INTEGRAL_TYPE_P (type) && ssa_name_has_boolean_range (@1))
+  (if (INTEGRAL_TYPE_P (type)
+   && ssa_name_has_boolean_range (@1)
+   && !flag_non_call_exceptions)
@0))
  /* X / X is one.  */
  (simplify
   (div @0 @0)
   /* But not for 0 / 0 so that we can get the proper warnings and errors.
  And not for _Fract types where we can't build 1.  */
-  (if (!integer_zerop (@0) && !ALL_FRACT_MODE_P (TYPE_MODE (type)))
+  (if (!integer_zerop (@0)
+   && (!flag_non_call_exceptions || tree_expr_nonzero_p (@0))
+   && !ALL_FRACT_MODE_P (TYPE_MODE (type)))
{ build_one_cst (type); }))
  /* X / abs (X) is X < 0 ? -1 : 1.  */
  (simplify
(div:C @0 (abs @0))
(if (INTEGRAL_TYPE_P (type)
-	&& TYPE_OVERFLOW_UNDEFINED (type))
+	&& TYPE_OVERFLOW_UNDEFINED (type)
+	&& !integer_zerop (@0)
+	&& (!flag_non_call_exceptions || tree_expr_nonzero_p (@0)))
 (cond (lt @0 { build_zero_cst (type); })
   { build_minus_one_cst (type); } { build_one_cst (type); })))
  /* X / -X is -1.  */
  (simplify
(div:C @0 (negate @0))
(if ((INTEGRAL_TYPE_P (type) || VECTOR_INTEGER_TYPE_P (type))
-	&& TYPE_OVERFLOW_UNDEFINED (type))
+	&& TYPE_OVERFLOW_UNDEFINED (type)
+	&& !integer_zerop (@0)
+	&& (!flag_non_call_exceptions || tree_expr_nonzero_p (@0)))
 { build_minus_one_cst (type); })))
 
 /* For unsigned integral types, FLOOR_DIV_EXPR is the same as
@@ -444,6 +452,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
  (trunc_div integer_onep@0 @1)
  (if (INTEGRAL_TYPE_P (type)
   && !integer_zerop (@1)
+  && (!flag_non_call_exceptions || tree_expr_nonzero_p (@1))
   && TYPE_PRECISION (type) > 1)
   (if (TYPE_UNSIGNED (type))
(convert (eq:boolean_type_node @1 { build_one_cst (type); }))


Re: [PATCH] [gfortran] Add support for allocate clause (OpenMP 5.0).

2022-02-04 Thread Thomas Schwinge
Hi!

On 2022-01-31T19:13:09+, Hafiz Abid Qadeer  wrote:
> On 25/01/2022 10:32, Tobias Burnus wrote:
>> On 25.01.22 10:19, Thomas Schwinge wrote:
 I am trying to figure out if the problem you observed
 is a general one or just specific to fortran testcase.
>>> So, unless the '-fsanitize=thread' issues are bogus -- unlikely ;-) -- it
>>> seems a latent issue generally, now fatal with
>>> 'libgomp.fortran/allocate-1.f90'.
>>
>> There is one known issue with libgomp and TSAN (-fsanitize=thread)
>> that I tend to forget about :-(
>>
>> That's according to Jakub, who wrote a while ago:
>>
>> "TSAN doesn't understand what libgomp is doing, unless built with 
>> --disable-linux-futex"

Uh.  Anything that can reasonably be done to address this?  At least, to
make this obvious to the user of '-fsanitize=thread'?

>> However, I now tried to disable futex and still get the following.
>> (First result for libgomp.c-c++-common/allocate-1.c).
>>
>> On the other hand, I have the feeling that the configure option is
>> a no op for libgomp. This can also be seen in the configure.ac script,
>> which only for libstdc++ uses the result and the others have a no-op
>> call to 'true' (alias ':'):
>>
>> libgomp/configure.ac:GCC_LINUX_FUTEX(:)
>> libitm/configure.ac:GCC_LINUX_FUTEX(:)
>> libstdc++-v3/configure.ac:GCC_LINUX_FUTEX([AC_DEFINE(HAVE_LINUX_FUTEX, 1, 
>> [Define if futex syscall
>> is available.])])
>>
>> (The check is not completely pointless as some checks are still done;
>> e.g. 'SYS_gettid and SYS_futex required'.)

Uh.  That (make '--disable-linux-futex' work) should be fixed, I suppose?

>> (TSAN did find issues in libgomp in the past, however. But those
>> habe been fixed.)
>>
>>
>> Thus, there might or might not be an issue when TSAN reports one.
>>
>>  * * *
>>
>> Glancing at the Fortran testcase, I noted the following,
>> which probably does not cause the problems. But still,
>> I want to mention it:
>>
>>   !$omp parallel private (y, v) firstprivate (x) allocate (x, y, v)
>>   if (x /= 42) then
>> stop 1
>>   end if
>>
>>   v(1) = 7
>>   if ( (and(fl, 2) /= 0) .and.  &
>>((is_64bit_aligned(x) == 0) .or. &
>> (is_64bit_aligned(y) == 0) .or. &
>> (is_64bit_aligned(v(1)) == 0))) then
>>   stop 2
>>   end if
>>
>> If one compares this with the C/C++ testcase, I note that there
>> is a barrier before the alignment check in C/C++ but not in
>> Fortran. Additionally, 'v(1) = 7' is set twice and the
>> alignment check happens earlier than in C/C++. Not that that
>> should really matter, but I just saw it.
>>
>>
>> In C/C++:
>>   int v[x], w[x];
>> ...
>> v[0] = 7;
>> v[41] = 8;
>>
>> In Fortran:
>>   integer, dimension(x) :: v
>> ...
>>   v(1) = 7
>>   v(41) = 8
>>
>> where 'x == 42'. The Fortran version is not really wrong, but I think
>> the idea is to set the first and last array element - and that's here
>> v(42) and not v(41).
>>
>> BTW: Fortran permits to specify a different lower bound. When converting
>> C/C++ testcases, it can be useful to use the same lower bound also in
>> Fortran:   integer :: v(0:x-1)  (or: 'integer, dimension(0:x-1) :: v')
>> uses then 0 ... 41 for the indices instead of 1 ... 42.
>>
>> But one has to be careful as Fortran uses the upper bound and C uses the
>> number of elements. (Same with OpenMP array sections in Fortran vs. C.)

Abid, are you going to address these?  I think it does make sense if the
C/C++ and Fortran test cases match as much as feasible.

>> PS: The promised data-race warning:
>> ==
>> WARNING: ThreadSanitizer: data race (pid=4135381)
>>   Read of size 8 at 0x7ffc0888bdc0 by thread T10:
>> #0 foo._omp_fn.2 libgomp.c-c++-common/allocate-1.c:47 (a.out+0x402c05)
>> #1 gomp_thread_start ../../../repos/gcc/libgomp/team.c:129 
>> (libgomp.so.1+0x1e5ed)
>>
>>   Previous write of size 8 at 0x7ffc0888bdc0 by main thread:
>> #0 foo._omp_fn.1 libgomp.c-c++-common/allocate-1.c:47 (a.out+0x402aee)
>> #1 GOMP_teams_reg ../../../repos/gcc/libgomp/teams.c:51 
>> (libgomp.so.1+0x3638c)
>> #2 main libgomp.c-c++-common/allocate-1.c:366 (a.out+0x40273e)
>>
>>   Location is stack of main thread.
>>
>>   Location is global '' at 0x ([stack]+0x1ddc0)
>>
>>   Thread T10 (tid=4135398, running) created by main thread at:
>> #0 pthread_create 
>> ../../../../repos/gcc/libsanitizer/tsan/tsan_interceptors_posix.cpp:1001
>> (libtsan.so.2+0x62c76)
>> #1 gomp_team_start ../../../repos/gcc/libgomp/team.c:858 
>> (libgomp.so.1+0x1ec18)
>> #2 main libgomp.c-c++-common/allocate-1.c:366 (a.out+0x40273e)
>>
>> SUMMARY: ThreadSanitizer: data race libgomp.c-c++-common/allocate-1.c:47 in 
>> foo._omp_fn.2
>> ==
>>
>
> Problem was with the pool_size trait. It has limited size which this testcase 
> exceeded. I have
> removed it now which seems to fix the problem. Ok to commit the attached 
> patch?

First, I do confirm that this (testing together with Tobias' patch "which

Re: [PATCH v3 07/15] arm: Implement MVE predicates as vectors of booleans

2022-02-04 Thread Richard Sandiford via Gcc-patches
Christophe Lyon  writes:
> On Tue, Feb 1, 2022 at 4:42 AM Richard Sandiford 
> wrote:
>
>> Christophe Lyon via Gcc-patches  writes:
>> > On Mon, Jan 31, 2022 at 7:01 PM Richard Sandiford via Gcc-patches <
>> > gcc-patches@gcc.gnu.org> wrote:
>> >
>> >> Sorry for the slow response, was out last week.
>> >>
>> >> Christophe Lyon via Gcc-patches  writes:
>> >> > diff --git a/gcc/emit-rtl.c b/gcc/emit-rtl.c
>> >> > index f16d320..5f559f8fd93 100644
>> >> > --- a/gcc/emit-rtl.c
>> >> > +++ b/gcc/emit-rtl.c
>> >> > @@ -6239,9 +6239,14 @@ init_emit_once (void)
>> >> >
>> >> >/* For BImode, 1 and -1 are unsigned and signed interpretations
>> >> >   of the same value.  */
>> >> > -  const_tiny_rtx[0][(int) BImode] = const0_rtx;
>> >> > -  const_tiny_rtx[1][(int) BImode] = const_true_rtx;
>> >> > -  const_tiny_rtx[3][(int) BImode] = const_true_rtx;
>> >> > +  for (mode = MIN_MODE_BOOL;
>> >> > +   mode <= MAX_MODE_BOOL;
>> >> > +   mode = (machine_mode)((int)(mode) + 1))
>> >> > +{
>> >> > +  const_tiny_rtx[0][(int) mode] = const0_rtx;
>> >> > +  const_tiny_rtx[1][(int) mode] = const_true_rtx;
>> >> > +  const_tiny_rtx[3][(int) mode] = const_true_rtx;
>> >> > +}
>> >> >
>> >> >for (mode = MIN_MODE_PARTIAL_INT;
>> >> > mode <= MAX_MODE_PARTIAL_INT;
>> >>
>> >> Does this do the right thing for:
>> >>
>> >>   gen_int_mode (-1, B2Imode)
>> >>
>> >> (which is used e.g. in native_decode_vector_rtx)?  It looks like it
>> >> would give 0b01 rather than 0b11.
>> >>
>> >> Maybe for non-BImode we should use const1_rtx and constm1_rtx, like with
>> >> MODE_INT.
>> >>
>> >
>> > debug_rtx ( gen_int_mode (-1, B2Imode) says:
>> > (const_int -1 [0x])
>> > so that looks right?
>>
>> Ah, right, I forgot that the mode is unused for the small constant lookup.
>> But it looks like CONSTM1_RTX (B2Imode) would be (const_int 1) instead,
>> even though the two should be equal.
>>
>
> Indeed!
>
> So I changed the above loop into:
>/* For BImode, 1 and -1 are unsigned and signed interpretations
>  of the same value.  */
>   for (mode = MIN_MODE_BOOL;
>mode <= MAX_MODE_BOOL;
>mode = (machine_mode)((int)(mode) + 1))
> {
>   const_tiny_rtx[0][(int) mode] = const0_rtx;
>   const_tiny_rtx[1][(int) mode] = const_true_rtx;
> -  const_tiny_rtx[3][(int) mode] = const_true_rtx;
> +  const_tiny_rtx[3][(int) mode] = constm1_rtx;
> }
> which works, both constants are now equal and the validation still passes.

I think we need to keep const_true_rtx for both [BImode][1] and [BImode][3].
BImode is an awkward special case in that the (only) nonzero value must be
exactly STORE_FLAG_VALUE, even if that leads to an otherwise non-canonical
const_int representation.

For the multi-bit booleans, [1] needs to be const1_rtx rather than
const_true_rtx in case STORE_FLAG_VALUE != 1.

>> >> > @@ -1679,15 +1708,25 @@ emit_class_narrowest_mode (void)
>> >> >print_decl ("unsigned char", "class_narrowest_mode",
>> >> "MAX_MODE_CLASS");
>> >> >
>> >> >for (c = 0; c < MAX_MODE_CLASS; c++)
>> >> > -/* Bleah, all this to get the comment right for MIN_MODE_INT.  */
>> >> > -tagged_printf ("MIN_%s", mode_class_names[c],
>> >> > -modes[c]
>> >> > -? ((c != MODE_INT || modes[c]->precision != 1)
>> >> > -   ? modes[c]->name
>> >> > -   : (modes[c]->next
>> >> > -  ? modes[c]->next->name
>> >> > -  : void_mode->name))
>> >> > -: void_mode->name);
>> >> > +{
>> >> > +  /* Bleah, all this to get the comment right for MIN_MODE_INT.
>> */
>> >> > +  const char *comment_name = void_mode->name;
>> >> > +
>> >> > +  if (modes[c])
>> >> > + if (c != MODE_INT || !modes[c]->boolean)
>> >> > +   comment_name = modes[c]->name;
>> >> > + else
>> >> > +   {
>> >> > + struct mode_data *m = modes[c];
>> >> > + while (m->boolean)
>> >> > +   m = m->next;
>> >> > + if (m)
>> >> > +   comment_name = m->name;
>> >> > + else
>> >> > +   comment_name = void_mode->name;
>> >> > +   }
>> >>
>> >> Have you tried bootstrapping the patch on a host of your choice?
>> >> I would expect a warning/Werror about an ambiguous else here.
>> >>
>> > No I hadn't and indeed the build fails
>> >
>> >>
>> >> I guess this reduces to:
>> >>
>> >> struct mode_data *m = modes[c];
>> >> while (m && m->boolean)
>> >>   m = m->next;
>> >> const char *comment_name = (m ? m : void_mode)->name;
>> >>
>> >> but I don't know if that's more readable.
>> >>
>> > but to my understanding the problem is that the ambiguous else
>> > is the first one, and the code should read:
>> >  if (modes[c])
>> > +  {
>> > if (c != MODE_INT || !modes[c]->boolean)
>> >   comment_name = modes[c]->name;
>> > else
>> >   {
>> > struct mode_data *m = 

[PATCH][RFC] tree-optimization/104373 - early diagnostic on unreachable code

2022-02-04 Thread Richard Biener via Gcc-patches
The following improves early uninit diagnostics by computing edge
reachability using VN and ignoring unreachable blocks when looking
for uninitialized uses.  To not ICE with -fdump-tree-all the
early uninit pass needs a dumpfile since VN tries to dump statistics.

Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.

In the PR I note that the early warn_printf and warn_nonnull_compare
and warn_access might also benefit from reachability analysis
which would probably mean a separate "reachability analysis" pass
computing EDGE_EXECUTABLE conditionaly on any of the consuming
diagnostics.  I've went with this simpler proof-of-concept to gather
feedback on the idea though.

As of costs the patch cuts the most improtant part of VN, the
alias VUSE->VDEF walking away, and relies on the non-iterative
VN mode being O (n * something like log n) by design (but with
quite a high constant factor).  For the case of uninit I avoid
the extra VN walk when optimizing since we only warn for
always executed code there (but those cases could be expanded
with VN and an adjusted CFG walk).  The other early diagnostics
likely warn everywhere even when optimizing so when expanding
the idea we'd do the extra VN run even when optimizing and then
the question arises, if we do it as a separate phase, whether
we want to enable IL tranforms (not sure how much the early
code relies on seeing none of those).

Thus - feedback is welcome.  The PR itself is a 12 regression
so technically P1 but this is more a general improvement.  In
the audit log I also mention that for the specific testcase
there's a cheaper way by doing a simple-mindend const/copy
lattice and RPO walk in the uninit pass itself.

Thanks,
Richard.

2022-02-04  Richard Biener  

PR tree-optimization/104373
* tree-ssa-sccvn.h (do_rpo_vn): New export exposing the
walk kind.
* tree-ssa-sccvn.cc (do_rpo_vn): Export, get the default
walk kind as argument.
(run_rpo_vn): Adjust.
(pass_fre::execute): Likewise.
* tree-ssa-uninit.cc (warn_uninitialized_vars): Skip
blocks not reachable.
(execute_late_warn_uninitialized): Mark all edges as
executable.
(execute_early_warn_uninitialized): Use VN to compute
executable edges.
(pass_data_early_warn_uninitialized): Enable a dump file.
---
 gcc/testsuite/g++.dg/warn/Wuninitialized-32.C | 14 
 gcc/tree-ssa-sccvn.cc | 18 +-
 gcc/tree-ssa-sccvn.h  |  1 +
 gcc/tree-ssa-uninit.cc| 36 ---
 4 files changed, 54 insertions(+), 15 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/warn/Wuninitialized-32.C

diff --git a/gcc/testsuite/g++.dg/warn/Wuninitialized-32.C 
b/gcc/testsuite/g++.dg/warn/Wuninitialized-32.C
new file mode 100644
index 000..8b02b5c6adb
--- /dev/null
+++ b/gcc/testsuite/g++.dg/warn/Wuninitialized-32.C
@@ -0,0 +1,14 @@
+// { dg-do compile }
+// { dg-additional-options "-Wall" }
+
+void* operator new[](unsigned long, void* __p);
+
+struct allocator
+{
+  ~allocator();
+};
+
+void *foo (void *p)
+{
+  return p ? new(p) allocator[1] : new allocator[1]; // { dg-bogus 
"uninitialized" }
+}
diff --git a/gcc/tree-ssa-sccvn.cc b/gcc/tree-ssa-sccvn.cc
index a03f0aae924..eb17549c185 100644
--- a/gcc/tree-ssa-sccvn.cc
+++ b/gcc/tree-ssa-sccvn.cc
@@ -7034,15 +7034,14 @@ eliminate_with_rpo_vn (bitmap inserted_exprs)
   return walker.eliminate_cleanup ();
 }
 
-static unsigned
+unsigned
 do_rpo_vn (function *fn, edge entry, bitmap exit_bbs,
-  bool iterate, bool eliminate);
+  bool iterate, bool eliminate, vn_lookup_kind kind);
 
 void
 run_rpo_vn (vn_lookup_kind kind)
 {
-  default_vn_walk_kind = kind;
-  do_rpo_vn (cfun, NULL, NULL, true, false);
+  do_rpo_vn (cfun, NULL, NULL, true, false, kind);
 
   /* ???  Prune requirement of these.  */
   constant_to_value_id = new hash_table (23);
@@ -7740,11 +7739,12 @@ do_unwind (unwind_state *to, rpo_elim )
executed and iterate.  If ELIMINATE is true then perform
elimination, otherwise leave that to the caller.  */
 
-static unsigned
+unsigned
 do_rpo_vn (function *fn, edge entry, bitmap exit_bbs,
-  bool iterate, bool eliminate)
+  bool iterate, bool eliminate, vn_lookup_kind kind)
 {
   unsigned todo = 0;
+  default_vn_walk_kind = kind;
 
   /* We currently do not support region-based iteration when
  elimination is requested.  */
@@ -8164,8 +8164,7 @@ do_rpo_vn (function *fn, edge entry, bitmap exit_bbs,
 unsigned
 do_rpo_vn (function *fn, edge entry, bitmap exit_bbs)
 {
-  default_vn_walk_kind = VN_WALKREWRITE;
-  unsigned todo = do_rpo_vn (fn, entry, exit_bbs, false, true);
+  unsigned todo = do_rpo_vn (fn, entry, exit_bbs, false, true, VN_WALKREWRITE);
   free_rpo_vn ();
   return todo;
 }
@@ -8221,8 +8220,7 @@ pass_fre::execute (function *fun)
   if (iterate_p)
 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
 
-  

Re: [PATCH] [gfortran] Add support for allocate clause (OpenMP 5.0).

2022-02-04 Thread Thomas Schwinge
Hi Tobias!

On 2022-01-24T09:45:48+0100, Tobias Burnus  wrote:
> On 21.01.22 18:43, Tobias Burnus wrote:
>> On 21.01.22 18:15, Thomas Schwinge wrote:
>>> 11 | integer(c_int) function is_64bit_aligned (a) bind(C)
>>>  Warning: Variable ‘a’ at (1) is a dummy argument of the BIND(C)
>>> procedure ‘is_64bit_aligned’ but may not be C interoperable
>>> [-Wc-binding-type]
>>>
>>> Is that something to worry about?
> I have attached a patch (not commited), which silences the three kind of
> warnings and fixes the interface issue.
> TODO: commit it.

Still "TODO: commit it" ;-) -- and while I haven't reviewed the changes
in detail, I did spot one item that should be addressed, I suppose:

> --- a/libgomp/testsuite/libgomp.fortran/allocate-1.c
> +++ b/libgomp/testsuite/libgomp.fortran/allocate-1.c
> @@ -1,7 +1,7 @@
>  #include 
>
>  int
> -is_64bit_aligned_ (uintptr_t a)
> +is_64bit_aligned (uintptr_t a)
>  {
>return ( (a & 0x3f) == 0);
>  }

> --- a/libgomp/testsuite/libgomp.fortran/allocate-1.f90
> +++ b/libgomp/testsuite/libgomp.fortran/allocate-1.f90
> @@ -5,30 +5,30 @@
>  module m
>use omp_lib
>use iso_c_binding
> -  implicit none
> +  implicit none (type, external)
>
>interface
>  integer(c_int) function is_64bit_aligned (a) bind(C)
>import :: c_int
> -  integer  :: a
> +  type(*)  :: a
>  end
>end interface
> -end module m
>
> -subroutine foo (x, p, q, px, h, fl)
> +contains
> +
> +subroutine foo (x, p, q, h, fl)
>use omp_lib
>use iso_c_binding
>integer  :: x
>integer, dimension(4) :: p
>integer, dimension(4) :: q
> -  integer  :: px
>integer (kind=omp_allocator_handle_kind) :: h
>integer  :: fl
>
>integer  :: y
>integer  :: r, i, i1, i2, i3, i4, i5
>integer  :: l, l3, l4, l5, l6
> -  integer  :: n, n1, n2, n3, n4
> +  integer  :: n, n2, n3, n4
>integer  :: j2, j3, j4
>integer, dimension(4) :: l2
>integer, dimension(4) :: r2
> @@ -118,6 +118,7 @@ subroutine foo (x, p, q, px, h, fl)
>end if
>!$omp end parallel
>!$omp end teams
> +stop
>
>!$omp parallel do private (y) firstprivate (x)  reduction(+: r) allocate 
> (h: x, y, r, l, n) lastprivate (l)  linear (n: 16)
>do i = 0, 63

That early 'stop' should probably be backed out?  ;-)


Grüße
 Thomas


> @@ -153,77 +154,77 @@ subroutine foo (x, p, q, px, h, fl)
> ((is_64bit_aligned(l2(1)) == 0) .or. &
>  (is_64bit_aligned(l3) == 0) .or. &
>  (is_64bit_aligned(i1) == 0))) then
> - stop 10
> +stop 10
>end if
>  end do
>
>  !$omp do collapse(2) lastprivate(l4, i2, j2) linear (n2:17) allocate (h: 
> n2, l4, i2, j2)
>  do i2 = 3, 4
>do j2 = 17, 22, 2
> - n2 = n2 + 17
> - l4 = i2 * 31 + j2
> - if ( (and(fl, 1) /= 0) .and.  &
> -   ((is_64bit_aligned(l4) == 0) .or. &
> -   (is_64bit_aligned(n2) == 0) .or. &
> -   (is_64bit_aligned(i2) == 0) .or. &
> -   (is_64bit_aligned(j2) == 0))) then
> -   stop 11
> - end if
> +n2 = n2 + 17
> +l4 = i2 * 31 + j2
> +if ( (and(fl, 1) /= 0) .and.  &
> + ((is_64bit_aligned(l4) == 0) .or. &
> +  (is_64bit_aligned(n2) == 0) .or. &
> +  (is_64bit_aligned(i2) == 0) .or. &
> +  (is_64bit_aligned(j2) == 0))) then
> +  stop 11
> +end if
>end do
>  end do
>
>  !$omp do collapse(2) lastprivate(l5, i3, j3) linear (n3:17) schedule 
> (static, 3) allocate (n3, l5, i3, j3)
>  do i3 = 3, 4
>do j3 = 17, 22, 2
> -   n3 = n3 + 17
> -   l5 = i3 * 31 + j3
> -   if ( (and(fl, 2) /= 0) .and.  &
> -   ((is_64bit_aligned(l5) == 0) .or. &
> -   (is_64bit_aligned(n3) == 0) .or. &
> -   (is_64bit_aligned(i3) == 0) .or. &
> -   (is_64bit_aligned(j3) == 0))) then
> -   stop 12
> - end if
> +  n3 = n3 + 17
> +  l5 = i3 * 31 + j3
> +  if ( (and(fl, 2) /= 0) .and.  &
> + ((is_64bit_aligned(l5) == 0) .or. &
> +  (is_64bit_aligned(n3) == 0) .or. &
> +  (is_64bit_aligned(i3) == 0) .or. &
> +  (is_64bit_aligned(j3) == 0))) then
> +  stop 12
> +end if
>end do
>  end do
>
>  !$omp do collapse(2) lastprivate(l6, i4, j4) linear (n4:17) schedule 
> (dynamic) allocate (h: n4, l6, i4, j4)
>  do i4 = 3, 4
>do j4 = 17, 22,2
> -   n4 = n4 + 17;
> -   l6 = i4 * 31 + j4;
> - if ( (and(fl, 1) /= 0) .and.  &
> -   ((is_64bit_aligned(l6) == 0) .or. &
> -   (is_64bit_aligned(n4) == 0) .or. &
> -   (is_64bit_aligned(i4) == 0) .or. &
> -   (is_64bit_aligned(j4) == 0))) then
> -   stop 13
> - end if
> +  n4 = n4 + 17;
> +  l6 = i4 * 31 + j4;
> +if ( (and(fl, 1) /= 0) .and.  &
> +((is_64bit_aligned(l6) == 0) .or. &
> + (is_64bit_aligned(n4) == 0) .or. &
> + 

Re: [PATCH] tree-optimization/103641 - improve vect_synth_mult_by_constant

2022-02-04 Thread Richard Biener via Gcc-patches
On Fri, 4 Feb 2022, Richard Sandiford wrote:

> Richard Biener  writes:
> > The following happens to improve compile-time of the PR103641
> > testcase on aarch64 significantly.  I did not investigate the
> > effect on the generated code but at least in theory
> > choose_mult_variant should do a better job when we tell it
> > the actual mode we are going to use for the operations it
> > synthesizes.
> 
> Yeah, agreed.  (Following up from a comment in the PR: I don't think
> we can rely on unsupported operations having a high cost, but then we
> should already be checking that the operations are actually supported.)
> 
> > Bootstrapped and tested on x86_64-unknown-linux-gnu.
> >
> > OK for trunk?
> >
> > Thanks,
> > Richard.
> >
> > 2022-02-04  Richard Biener  
> >
> > PR tree-optimization/103641
> > * tree-vect-patterns.cc (vect_synth_mult_by_constant):
> > Pass the vector mode to choose_mult_variant.
> > ---
> >  gcc/tree-vect-patterns.cc | 8 
> >  1 file changed, 4 insertions(+), 4 deletions(-)
> >
> > diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc
> > index bea04992160..686a10caec1 100644
> > --- a/gcc/tree-vect-patterns.cc
> > +++ b/gcc/tree-vect-patterns.cc
> > @@ -3046,17 +3046,17 @@ vect_synth_mult_by_constant (vec_info *vinfo, tree 
> > op, tree val,
> >   can synthesize shifts that way.  */
> >bool synth_shift_p = !vect_supportable_shift (vinfo, LSHIFT_EXPR, 
> > multtype);
> >  
> > +  tree vectype = get_vectype_for_scalar_type (vinfo, multtype);
> >HOST_WIDE_INT hwval = tree_to_shwi (val);
> >/* Use MAX_COST here as we don't want to limit the sequence on rtx costs.
> >   The vectorizer's benefit analysis will decide whether it's beneficial
> >   to do this.  */
> > -  bool possible = choose_mult_variant (mode, hwval, ,
> > -   , MAX_COST);
> > +  bool possible = choose_mult_variant (VECTOR_MODE_P (TYPE_MODE (vectype))
> > +  ? TYPE_MODE (vectype) : mode,
> > +  hwval, , , MAX_COST);
> >if (!possible)
> >  return NULL;
> >  
> > -  tree vectype = get_vectype_for_scalar_type (vinfo, multtype);
> > -
> >if (!vectype
> >|| !target_supports_mult_synth_alg (, variant,
> >vectype, synth_shift_p))
> 
> The !vectype early out needs to move with the assignment.
> LGTM otherwise.

Whoops yes - missed that.  Will push after that fixed.

Richard.


Re: [PATCH] Speed up fixincludes.

2022-02-04 Thread Martin Liška

On 2/3/22 22:51, Jakub Jelinek wrote:

On Thu, Feb 03, 2022 at 04:29:39PM -0500, Marek Polacek wrote:

On Thu, Feb 03, 2022 at 10:13:36PM +0100, Martin Liška wrote:

On 2/3/22 19:44, Andreas Schwab wrote:

On Feb 03 2022, Martin Liška wrote:


+cd $LIB
+echo "$all_dirs" | xargs mkdir -p
+cd ..
+


$LIB always contains slashes.



And what is the problem? You're too brief..


I guess his point is that if you do
cd a/b/c/
then
cd ..
will not get you back to where you started.  Perhaps you could use
pushd/popd instead.


Or a subshell.

Jakub



I'm suggesting the following patch.

Ready to be installed?
Thanks,
MartinFrom 77bc388daf42d18334cb874407031fc49dbbaa67 Mon Sep 17 00:00:00 2001
From: Martin Liska 
Date: Fri, 4 Feb 2022 10:24:51 +0100
Subject: [PATCH] fixincludes: Update pwd.

fixincludes/ChangeLog:

	* fixinc.in: Use cd OLDDIR instead of cd .. .
---
 fixincludes/fixinc.in | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fixincludes/fixinc.in b/fixincludes/fixinc.in
index 0c3066452c6..0bd8027a554 100755
--- a/fixincludes/fixinc.in
+++ b/fixincludes/fixinc.in
@@ -258,9 +258,10 @@ then echo "All directories (including links to directories):"
  echo $all_dirs
 fi
 
+OLDDIR=`${PWDCMD}`
 cd $LIB
 echo "$all_dirs" | xargs mkdir -p
-cd ..
+cd ${OLDDIR}
 
 mkdir $LIB/root
 
-- 
2.35.1



Re: [PATCH] tree-optimization/103641 - improve vect_synth_mult_by_constant

2022-02-04 Thread Jakub Jelinek via Gcc-patches
On Fri, Feb 04, 2022 at 09:31:20AM +0100, Richard Biener wrote:
> The following happens to improve compile-time of the PR103641
> testcase on aarch64 significantly.  I did not investigate the
> effect on the generated code but at least in theory
> choose_mult_variant should do a better job when we tell it
> the actual mode we are going to use for the operations it
> synthesizes.
> 
> Bootstrapped and tested on x86_64-unknown-linux-gnu.
> 
> OK for trunk?
> 
> Thanks,
> Richard.
> 
> 2022-02-04  Richard Biener  
> 
>   PR tree-optimization/103641
>   * tree-vect-patterns.cc (vect_synth_mult_by_constant):
>   Pass the vector mode to choose_mult_variant.

LGTM.

Jakub



PTX code generation (was: [PATCH] PR target/104345: Use nvptx "set" instruction for cond ? -1 : 0)

2022-02-04 Thread Thomas Schwinge
Hi!

As an aside:

On 2022-02-03T21:00:50+, "Roger Sayle"  wrote:
> the exact register usage of a nvptx kernel depends upon the version of
> the Cuda drivers being used (and the hardware)

Yeah, that's a "problem" -- or: "challenge"?  ;-)

The GCC/nvptx back end is generating some rather high-level IR (PTX)
targeting a "black hole": not knowing what exactly the Nvidia/CUDA
Driver, PTX -> SASS compiler are going to do with it.  (Well, similar
problem also exists for more traditional ISAs if CPU microcode etc. is
involved, but it's certainly more severe here.)

Five years ago, I asked our then Nvidia PTX contact person about ideas,
"How to generate PTX code to the PTX -> SASS compiler's liking":

| We're currently looking into options for improving the PTX code generated
| by GCC's nvptx back end, and it came up the question about how to
| generate PTX code to the PTX -> SASS compiler's liking?  Is there any
| documentation available regarding this?  (I say "PTX -> SASS compiler" as
| I don't think I know the proper name of it.  For avoidance of doubt, I
| mean the "component" that sits between the PTX code we feed into
| cuLinkAddData, and what actually gets executed on the GPU as SASS code.
| Presumably the same "component" that is part of the "ptxas" tool?)
|
| As always, there are often many different variants for expressing the
| same thing.  A few examples.
|
| [...]
|
| ;-) Any so on, and so forth.  Are there any generic recommendations,
| "best practice"?

The answer was:

| I don't know of any official documentation; in general we have tuned the 
backend to the PTX that we generate, so following that lead will give you the 
best results.

So, yeah.  :-\ Understandable and not unexpected, though.


Grüße
 Thomas
-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955


Re: [PATCH] tree-optimization/103641 - improve vect_synth_mult_by_constant

2022-02-04 Thread Richard Sandiford via Gcc-patches
Richard Biener  writes:
> The following happens to improve compile-time of the PR103641
> testcase on aarch64 significantly.  I did not investigate the
> effect on the generated code but at least in theory
> choose_mult_variant should do a better job when we tell it
> the actual mode we are going to use for the operations it
> synthesizes.

Yeah, agreed.  (Following up from a comment in the PR: I don't think
we can rely on unsupported operations having a high cost, but then we
should already be checking that the operations are actually supported.)

> Bootstrapped and tested on x86_64-unknown-linux-gnu.
>
> OK for trunk?
>
> Thanks,
> Richard.
>
> 2022-02-04  Richard Biener  
>
>   PR tree-optimization/103641
>   * tree-vect-patterns.cc (vect_synth_mult_by_constant):
>   Pass the vector mode to choose_mult_variant.
> ---
>  gcc/tree-vect-patterns.cc | 8 
>  1 file changed, 4 insertions(+), 4 deletions(-)
>
> diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc
> index bea04992160..686a10caec1 100644
> --- a/gcc/tree-vect-patterns.cc
> +++ b/gcc/tree-vect-patterns.cc
> @@ -3046,17 +3046,17 @@ vect_synth_mult_by_constant (vec_info *vinfo, tree 
> op, tree val,
>   can synthesize shifts that way.  */
>bool synth_shift_p = !vect_supportable_shift (vinfo, LSHIFT_EXPR, 
> multtype);
>  
> +  tree vectype = get_vectype_for_scalar_type (vinfo, multtype);
>HOST_WIDE_INT hwval = tree_to_shwi (val);
>/* Use MAX_COST here as we don't want to limit the sequence on rtx costs.
>   The vectorizer's benefit analysis will decide whether it's beneficial
>   to do this.  */
> -  bool possible = choose_mult_variant (mode, hwval, ,
> - , MAX_COST);
> +  bool possible = choose_mult_variant (VECTOR_MODE_P (TYPE_MODE (vectype))
> +? TYPE_MODE (vectype) : mode,
> +hwval, , , MAX_COST);
>if (!possible)
>  return NULL;
>  
> -  tree vectype = get_vectype_for_scalar_type (vinfo, multtype);
> -
>if (!vectype
>|| !target_supports_mult_synth_alg (, variant,
>  vectype, synth_shift_p))

The !vectype early out needs to move with the assignment.
LGTM otherwise.

Thanks,
Richard


[PATCH] tree-optimization/103641 - improve vect_synth_mult_by_constant

2022-02-04 Thread Richard Biener via Gcc-patches
The following happens to improve compile-time of the PR103641
testcase on aarch64 significantly.  I did not investigate the
effect on the generated code but at least in theory
choose_mult_variant should do a better job when we tell it
the actual mode we are going to use for the operations it
synthesizes.

Bootstrapped and tested on x86_64-unknown-linux-gnu.

OK for trunk?

Thanks,
Richard.

2022-02-04  Richard Biener  

PR tree-optimization/103641
* tree-vect-patterns.cc (vect_synth_mult_by_constant):
Pass the vector mode to choose_mult_variant.
---
 gcc/tree-vect-patterns.cc | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc
index bea04992160..686a10caec1 100644
--- a/gcc/tree-vect-patterns.cc
+++ b/gcc/tree-vect-patterns.cc
@@ -3046,17 +3046,17 @@ vect_synth_mult_by_constant (vec_info *vinfo, tree op, 
tree val,
  can synthesize shifts that way.  */
   bool synth_shift_p = !vect_supportable_shift (vinfo, LSHIFT_EXPR, multtype);
 
+  tree vectype = get_vectype_for_scalar_type (vinfo, multtype);
   HOST_WIDE_INT hwval = tree_to_shwi (val);
   /* Use MAX_COST here as we don't want to limit the sequence on rtx costs.
  The vectorizer's benefit analysis will decide whether it's beneficial
  to do this.  */
-  bool possible = choose_mult_variant (mode, hwval, ,
-   , MAX_COST);
+  bool possible = choose_mult_variant (VECTOR_MODE_P (TYPE_MODE (vectype))
+  ? TYPE_MODE (vectype) : mode,
+  hwval, , , MAX_COST);
   if (!possible)
 return NULL;
 
-  tree vectype = get_vectype_for_scalar_type (vinfo, multtype);
-
   if (!vectype
   || !target_supports_mult_synth_alg (, variant,
   vectype, synth_shift_p))
-- 
2.34.1


Re: [PATCH] PR target/104345: Use nvptx "set" instruction for cond ? -1 : 0.

2022-02-04 Thread Thomas Schwinge
Hi Roger!

On 2022-02-03T21:00:50+, "Roger Sayle"  wrote:
> This patch

Thanks!

> addresses the "increased register pressure" regression on
> nvptx-none caused by my change to transition the backend to a
> STORE_FLAG_VALUE = 1 target.

Yes, "addresses", but unfortunately doesn't "resolve".  ;-|

> This improved code generation for the
> more common case of producing 0/1 Boolean values, but unfortunately
> made things marginally worse when a 0/-1 mask value is desired.
> Unfortunately, nvptx kernels are extremely sensitive to changes in
> register usage, which was observable in the reported PR.
>
> This patch provides optimizations for -(cond ? 1 : 0), effectively
> simplify this into cond ? -1 : 0, where these ternary operators are
> provided by nvptx's selp instruction, and for the specific case of
> SImode, using (restoring) nvptx's "set" instruction (which avoids
> the need for a predicate register).

I'm confirming the improved code generation (less registers used, less
instructions emitted) in cases where it triggers -- but unfortunately it
doesn't in the PR104345 'libgomp.oacc-c-c++-common/reduction-cplx-dbl.c'
scenario.

> This patch has been tested on nvptx-none hosted on x86_64-pc-linux-gnu
> with a "make" and "make -k check" with no new failures.  Unfortunately,
> the exact register usage of a nvptx kernel depends upon the version of
> the Cuda drivers being used (and the hardware), but I believe this
> change should resolve the PR (for Thomas) by improving code generation
> for the cases that regressed.  Ok for mainline?

So, testing your patch in isolation, it does *not* resolve PR104345,
unfortunately.  I'll next test in combination with your other pending
patches:

  - "nvptx: Expand QI mode operations using SI mode instructions".
  - "nvptx: Fix and use BI mode logic instructions (e.g. and.pred)"


Grüße
 Thomas


> gcc/ChangeLog
>   PR target/104345
>   * config/nvptx/nvptx.md (sel_true): Fix indentation.
>   (sel_false): Likewise.
>   (define_code_iterator eqne): New code iterator for EQ and NE.
>   (*selp_neg_): New define_insn_and_split to optimize
>   the negation of a selp instruction.
>   (*selp_not_): New define_insn_and_split to optimize
>   the bitwise not of a selp instruction.
>   (*setcc_int): Use set instruction for neg:SI of a selp.
>
> gcc/testsuite/ChangeLog
>   PR target/104345
>   * gcc.target/nvptx/neg-selp.c: New test case.
>
>
> Thanks in advance,
> Roger
> --
>
> diff --git a/gcc/config/nvptx/nvptx.md b/gcc/config/nvptx/nvptx.md
> index 92768dd..651ba20 100644
> --- a/gcc/config/nvptx/nvptx.md
> +++ b/gcc/config/nvptx/nvptx.md
> @@ -892,7 +892,7 @@
>
>  (define_insn "sel_true"
>[(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
> -(if_then_else:HSDIM
> + (if_then_else:HSDIM
> (ne (match_operand:BI 1 "nvptx_register_operand" "R") (const_int 0))
> (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")
> (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")))]
> @@ -901,7 +901,7 @@
>
>  (define_insn "sel_true"
>[(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
> -(if_then_else:SDFM
> + (if_then_else:SDFM
> (ne (match_operand:BI 1 "nvptx_register_operand" "R") (const_int 0))
> (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")
> (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")))]
> @@ -910,7 +910,7 @@
>
>  (define_insn "sel_false"
>[(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
> -(if_then_else:HSDIM
> + (if_then_else:HSDIM
> (eq (match_operand:BI 1 "nvptx_register_operand" "R") (const_int 0))
> (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")
> (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")))]
> @@ -919,13 +919,63 @@
>
>  (define_insn "sel_false"
>[(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
> -(if_then_else:SDFM
> + (if_then_else:SDFM
> (eq (match_operand:BI 1 "nvptx_register_operand" "R") (const_int 0))
> (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")
> (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")))]
>""
>"%.\\tselp%t0\\t%0, %3, %2, %1;")
>
> +(define_code_iterator eqne [eq ne])
> +
> +;; Split negation of a predicate into a conditional move.
> +(define_insn_and_split "*selp_neg_"
> +  [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
> + (neg:HSDIM (eqne:HSDIM
> +  (match_operand:BI 1 "nvptx_register_operand" "R")
> +  (const_int 0]
> +  ""
> +  "#"
> +  "&& 1"
> +  [(set (match_dup 0)
> + (if_then_else:HSDIM
> +   (eqne (match_dup 1) (const_int 0))
> +   (const_int -1)
> +   (const_int 0)))])
> +
> +;; Split bitwise not of a predicate into a conditional move.
> +(define_insn_and_split "*selp_not_"
> +  [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
> + (not:HSDIM (eqne:HSDIM
> + 

Re: [PATCH] waccess: Look at calls when tracking clobbers [PR104092]

2022-02-04 Thread Richard Sandiford via Gcc-patches
Richard Sandiford  writes:
> In this PR the waccess pass was fed:
>
>   D.10779 ={v} {CLOBBER};
>   VIEW_CONVERT_EXPR(D.10779) = .MASK_LOAD_LANES (addr_5(D), 
> 64B, _2);
>   _7 = D.10779.__val[0];
>
> However, the tracking of m_clobbers only looked at gassigns,
> so it missed that the clobber on the first line was overwritten
> by the call on the second line.
>
> This patch splits the updating of m_clobbers out into its own
> function, called after the check_*() routines, and extends it
> to handle both gassigns and gcalls.  I think that makes sense
> as an instance of the "read, operate, write" model, with the
> new function being part of "write".
>
> Previously only the gimple_clobber_p handling was conditional
> on m_check_dangling_p, but I think the whole of the new function
> can be.  We only enter stmts into m_clobbers if m_check_dangling_p,
> so we only need to remove them under the same condition.
>
> Tested on aarch64-linux-gnu.  OK to install?
>
> Richard
>
>
> gcc/
>   PR middle-end/104092
>   * gimple-ssa-warn-access.cc (pass_waccess::update_clobbers_from_lhs):
>   New function, split out from...
>   (pass_waccess::check_stmt): ...here and generalized to calls.
>   (pass_waccess::check_block): Call it.
>
> gcc/testsuite/
>   * gcc.target/aarch64/sve/acle/general/pr104092.c: New test.

I've pushed the test to trunk after Richard's EOL fix (thanks).

Richard

> ---
>  gcc/gimple-ssa-warn-access.cc | 68 +++
>  .../aarch64/sve/acle/general/pr104092.c   |  7 ++
>  2 files changed, 48 insertions(+), 27 deletions(-)
>  create mode 100644 
> gcc/testsuite/gcc.target/aarch64/sve/acle/general/pr104092.c
>
> diff --git a/gcc/gimple-ssa-warn-access.cc b/gcc/gimple-ssa-warn-access.cc
> index f639807a78a..25066fa6b89 100644
> --- a/gcc/gimple-ssa-warn-access.cc
> +++ b/gcc/gimple-ssa-warn-access.cc
> @@ -2094,6 +2094,9 @@ private:
>/* Check a non-call statement.  */
>void check_stmt (gimple *);
>  
> +  /* Update the clobber map based on the lhs of a statement.  */
> +  void update_clobbers_from_lhs (gimple *);
> +
>/* Check statements in a basic block.  */
>void check_block (basic_block);
>  
> @@ -4270,33 +4273,6 @@ is_auto_decl (tree x)
>  void
>  pass_waccess::check_stmt (gimple *stmt)
>  {
> -  if (m_check_dangling_p && gimple_clobber_p (stmt))
> -{
> -  /* Ignore clobber statemts in blocks with exceptional edges.  */
> -  basic_block bb = gimple_bb (stmt);
> -  edge e = EDGE_PRED (bb, 0);
> -  if (e->flags & EDGE_EH)
> - return;
> -
> -  tree var = gimple_assign_lhs (stmt);
> -  m_clobbers.put (var, stmt);
> -  return;
> -}
> -
> -  if (is_gimple_assign (stmt))
> -{
> -  /* Clobbered unnamed temporaries such as compound literals can be
> -  revived.  Check for an assignment to one and remove it from
> -  M_CLOBBERS.  */
> -  tree lhs = gimple_assign_lhs (stmt);
> -  while (handled_component_p (lhs))
> - lhs = TREE_OPERAND (lhs, 0);
> -
> -  if (is_auto_decl (lhs))
> - m_clobbers.remove (lhs);
> -  return;
> -}
> -
>if (greturn *ret = dyn_cast  (stmt))
>  {
>if (optimize && flag_isolate_erroneous_paths_dereference)
> @@ -4326,6 +4302,42 @@ pass_waccess::check_stmt (gimple *stmt)
>  }
>  }
>  
> +/* Update the clobber map based on the lhs of STMT.  */
> +
> +void
> +pass_waccess::update_clobbers_from_lhs (gimple *stmt)
> +{
> +  if (gimple_clobber_p (stmt))
> +{
> +  /* Ignore clobber statements in blocks with exceptional edges.  */
> +  basic_block bb = gimple_bb (stmt);
> +  edge e = EDGE_PRED (bb, 0);
> +  if (e->flags & EDGE_EH)
> + return;
> +
> +  tree var = gimple_assign_lhs (stmt);
> +  m_clobbers.put (var, stmt);
> +  return;
> +}
> +
> +  if (is_gimple_assign (stmt) || is_gimple_call (stmt))
> +{
> +  /* Clobbered unnamed temporaries such as compound literals can be
> +  revived.  Check for an assignment to one and remove it from
> +  M_CLOBBERS.  */
> +  tree lhs = gimple_get_lhs (stmt);
> +  if (!lhs)
> + return;
> +
> +  while (handled_component_p (lhs))
> + lhs = TREE_OPERAND (lhs, 0);
> +
> +  if (is_auto_decl (lhs))
> + m_clobbers.remove (lhs);
> +  return;
> +}
> +}
> +
>  /* Check basic block BB for invalid accesses.  */
>  
>  void
> @@ -4340,6 +4352,8 @@ pass_waccess::check_block (basic_block bb)
>   check_call (call);
>else
>   check_stmt (stmt);
> +  if (m_check_dangling_p)
> + update_clobbers_from_lhs (stmt);
>  }
>  }
>  
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/pr104092.c 
> b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/pr104092.c
> new file mode 100644
> index 000..c17ece7d82f
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/pr104092.c
> @@ -0,0 +1,7 @@
> +/* { dg-options "-O2 -Wall" } */
> +
> +#include 
> +
>