Re: [PATCH] Fix ICE during gimple resimplification (PR tree-optimization/92401)

2019-11-08 Thread Richard Biener
On November 9, 2019 1:07:18 AM GMT+01:00, Jakub Jelinek  
wrote:
>Hi!
>
>On the following testcase we ICE, because gimple_resimplify3 is called
>on a CONSTRUCTOR with 3 elements, which is fine, but it calls
>fold_ternary
>which works only on expression codes with TREE_CODE_LENGTH of 3.
>
>Fixed thusly, bootstrapped/regtested on x86_64-linux and i686-linux, ok
>for
>trunk?

Ok. 

Thanks, 
Richard. 


>2019-11-09  Jakub Jelinek  
>
>   PR tree-optimization/92401
>   * gimple-match-head.c (gimple_resimplify1): Call const_unop only
>   if res_op->code is an expression with code length 1.
>   * gimple-match-head.c (gimple_resimplify2): Call const_binop only
>   if res_op->code is an expression with code length 2.
>   * gimple-match-head.c (gimple_resimplify3): Call fold_ternary only
>   if res_op->code is an expression with code length 3.
>
>   * g++.dg/opt/pr92401.C: New test.
>
>--- gcc/gimple-match-head.c.jj 2019-11-07 17:56:21.983858406 +0100
>+++ gcc/gimple-match-head.c2019-11-08 09:18:26.650193537 +0100
>@@ -191,7 +191,12 @@ gimple_resimplify1 (gimple_seq *seq, gim
> {
>   tree tem = NULL_TREE;
>   if (res_op->code.is_tree_code ())
>-  tem = const_unop (res_op->code, res_op->type, res_op->ops[0]);
>+  {
>+tree_code code = res_op->code;
>+if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code))
>+&& TREE_CODE_LENGTH (code) == 1)
>+  tem = const_unop (res_op->code, res_op->type, res_op->ops[0]);
>+  }
>   else
>   tem = fold_const_call (combined_fn (res_op->code), res_op->type,
>  res_op->ops[0]);
>@@ -252,8 +257,13 @@ gimple_resimplify2 (gimple_seq *seq, gim
> {
>   tree tem = NULL_TREE;
>   if (res_op->code.is_tree_code ())
>-  tem = const_binop (res_op->code, res_op->type,
>- res_op->ops[0], res_op->ops[1]);
>+  {
>+tree_code code = res_op->code;
>+if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code))
>+&& TREE_CODE_LENGTH (code) == 2)
>+  tem = const_binop (res_op->code, res_op->type,
>+ res_op->ops[0], res_op->ops[1]);
>+  }
>   else
>   tem = fold_const_call (combined_fn (res_op->code), res_op->type,
>  res_op->ops[0], res_op->ops[1]);
>@@ -325,9 +335,14 @@ gimple_resimplify3 (gimple_seq *seq, gim
> {
>   tree tem = NULL_TREE;
>   if (res_op->code.is_tree_code ())
>-  tem = fold_ternary/*_to_constant*/ (res_op->code, res_op->type,
>-  res_op->ops[0], res_op->ops[1],
>-  res_op->ops[2]);
>+  {
>+tree_code code = res_op->code;
>+if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code))
>+&& TREE_CODE_LENGTH (code) == 3)
>+  tem = fold_ternary/*_to_constant*/ (res_op->code, res_op->type,
>+  res_op->ops[0], res_op->ops[1],
>+  res_op->ops[2]);
>+  }
>   else
>   tem = fold_const_call (combined_fn (res_op->code), res_op->type,
>  res_op->ops[0], res_op->ops[1], res_op->ops[2]);
>--- gcc/testsuite/g++.dg/opt/pr92401.C.jj  2019-11-08 09:07:48.465767281
>+0100
>+++ gcc/testsuite/g++.dg/opt/pr92401.C 2019-11-08 09:26:02.778350689
>+0100
>@@ -0,0 +1,15 @@
>+// PR tree-optimization/92401
>+// { dg-do compile { target c++11 } }
>+// { dg-options "-O2" }
>+
>+typedef float V __attribute__ ((__vector_size__ (4 * sizeof
>(float;
>+
>+V v;
>+
>+void
>+foo ()
>+{
>+  int i;
>+  for (i = 0; i < 11; ++i)
>+v = V { 0.0f, 0.0f, (float) i, 0.0f };
>+}
>
>   Jakub



Implement the part of C++20 p1032 Misc constexpr bits.

2019-11-08 Thread Smith-Rowland, Edward M
Here is the  part of C++20 p1032 Misc constexpr bits.

Tested on x86_64-linux. OK?

Ed
2019-11-09  Edward Smith-Rowland  <3dw...@verizon.net>

	Implement the  part of C++20 p1032 Misc constexpr bits.
	* include/std/tuple (_Head_base, _Tuple_impl(allocator_arg_t,...),
	_M_assign, tuple(allocator_arg_t,...), _Inherited, operator=, _M_swap,
	swap, pair(piecewise_construct_t,): Constexpr.
	* (__uses_alloc0::_Sink::operator=, __uses_alloc_t): Constexpr.
	* testsuite/20_util/tuple/cons/constexpr_allocator_arg_t.cc: New test.

Index: include/std/tuple
===
--- include/std/tuple	(revision 277944)
+++ include/std/tuple	(working copy)
@@ -132,6 +132,7 @@
 constexpr _Head_base(_UHead&& __h)
 	: _M_head_impl(std::forward<_UHead>(__h)) { }
 
+  _GLIBCXX20_CONSTEXPR
   _Head_base(allocator_arg_t, __uses_alloc0)
   : _M_head_impl() { }
 
@@ -144,6 +145,7 @@
 	: _M_head_impl(*__a._M_a) { }
 
   template
+	_GLIBCXX20_CONSTEXPR
 	_Head_base(__uses_alloc0, _UHead&& __uhead)
 	: _M_head_impl(std::forward<_UHead>(__uhead)) { }
 
@@ -243,6 +245,7 @@
 		(_Tuple_impl<_Idx, _UHead, _UTails...>::_M_head(__in))) { }
 
   template
+	_GLIBCXX20_CONSTEXPR
 	_Tuple_impl(allocator_arg_t __tag, const _Alloc& __a)
 	: _Inherited(__tag, __a),
   _Base(__tag, __use_alloc<_Head>(__a)) { }
@@ -256,6 +259,7 @@
   template::type>
+	_GLIBCXX20_CONSTEXPR
 	_Tuple_impl(allocator_arg_t __tag, const _Alloc& __a,
 	_UHead&& __head, _UTail&&... __tail)
 	: _Inherited(__tag, __a, std::forward<_UTail>(__tail)...),
@@ -263,6 +267,7 @@
 	std::forward<_UHead>(__head)) { }
 
   template
+	_GLIBCXX20_CONSTEXPR
 _Tuple_impl(allocator_arg_t __tag, const _Alloc& __a,
 	const _Tuple_impl& __in)
 	: _Inherited(__tag, __a, _M_tail(__in)),
@@ -269,6 +274,7 @@
   _Base(__use_alloc<_Head, _Alloc, _Head>(__a), _M_head(__in)) { }
 
   template
+	_GLIBCXX20_CONSTEXPR
 	_Tuple_impl(allocator_arg_t __tag, const _Alloc& __a,
 	_Tuple_impl&& __in)
 	: _Inherited(__tag, __a, std::move(_M_tail(__in))),
@@ -276,6 +282,7 @@
 	std::forward<_Head>(_M_head(__in))) { }
 
   template
+	_GLIBCXX20_CONSTEXPR
 	_Tuple_impl(allocator_arg_t __tag, const _Alloc& __a,
 	const _Tuple_impl<_Idx, _UElements...>& __in)
 	: _Inherited(__tag, __a,
@@ -284,6 +291,7 @@
 		_Tuple_impl<_Idx, _UElements...>::_M_head(__in)) { }
 
   template
+	_GLIBCXX20_CONSTEXPR
 	_Tuple_impl(allocator_arg_t __tag, const _Alloc& __a,
 	_Tuple_impl<_Idx, _UHead, _UTails...>&& __in)
 	: _Inherited(__tag, __a, std::move
@@ -293,6 +301,7 @@
 		(_Tuple_impl<_Idx, _UHead, _UTails...>::_M_head(__in))) { }
 
   template
+	_GLIBCXX20_CONSTEXPR
 void
 _M_assign(const _Tuple_impl<_Idx, _UElements...>& __in)
 {
@@ -302,6 +311,7 @@
 	}
 
   template
+	_GLIBCXX20_CONSTEXPR
 void
 _M_assign(_Tuple_impl<_Idx, _UHead, _UTails...>&& __in)
 {
@@ -312,6 +322,7 @@
 	}
 
 protected:
+  _GLIBCXX20_CONSTEXPR
   void
   _M_swap(_Tuple_impl& __in)
   {
@@ -369,6 +380,7 @@
 	{ }
 
   template
+	_GLIBCXX20_CONSTEXPR
 	_Tuple_impl(allocator_arg_t __tag, const _Alloc& __a)
 	: _Base(__tag, __use_alloc<_Head>(__a)) { }
 
@@ -378,6 +390,7 @@
 	: _Base(__use_alloc<_Head, _Alloc, _Head>(__a), __head) { }
 
   template
+	_GLIBCXX20_CONSTEXPR
 	_Tuple_impl(allocator_arg_t __tag, const _Alloc& __a,
 	_UHead&& __head)
 	: _Base(__use_alloc<_Head, _Alloc, _UHead>(__a),
@@ -384,11 +397,13 @@
 	std::forward<_UHead>(__head)) { }
 
   template
+	_GLIBCXX20_CONSTEXPR
 _Tuple_impl(allocator_arg_t __tag, const _Alloc& __a,
 	const _Tuple_impl& __in)
 	: _Base(__use_alloc<_Head, _Alloc, _Head>(__a), _M_head(__in)) { }
 
   template
+	_GLIBCXX20_CONSTEXPR
 	_Tuple_impl(allocator_arg_t __tag, const _Alloc& __a,
 	_Tuple_impl&& __in)
 	: _Base(__use_alloc<_Head, _Alloc, _Head>(__a),
@@ -395,6 +410,7 @@
 	std::forward<_Head>(_M_head(__in))) { }
 
   template
+	_GLIBCXX20_CONSTEXPR
 	_Tuple_impl(allocator_arg_t __tag, const _Alloc& __a,
 	const _Tuple_impl<_Idx, _UHead>& __in)
 	: _Base(__use_alloc<_Head, _Alloc, _Head>(__a),
@@ -401,6 +417,7 @@
 		_Tuple_impl<_Idx, _UHead>::_M_head(__in)) { }
 
   template
+	_GLIBCXX20_CONSTEXPR
 	_Tuple_impl(allocator_arg_t __tag, const _Alloc& __a,
 	_Tuple_impl<_Idx, _UHead>&& __in)
 	: _Base(__use_alloc<_Head, _Alloc, _UHead>(__a),
@@ -408,6 +425,7 @@
 	{ }
 
   template
+	_GLIBCXX20_CONSTEXPR
 void
 _M_assign(const _Tuple_impl<_Idx, _UHead>& __in)
 {
@@ -415,6 +433,7 @@
 	}
 
   template
+	_GLIBCXX20_CONSTEXPR
 void
 _M_assign(_Tuple_impl<_Idx, _UHead>&& __in)
 {
@@ -423,6 +442,7 @@
 	}
 
 protected:
+  _GLIBCXX20_CONSTEXPR
   void
   _M_swap(_Tuple_impl& __in)
   {

Re: [PATCH] OpenACC reference count overhaul

2019-11-08 Thread Julian Brown
On Thu, 31 Oct 2019 19:11:57 +0100
Thomas Schwinge  wrote:

> Hi Julian!
> 
> On 2019-10-29T12:15:01+, Julian Brown 
> wrote:
> > This is a new version of the patch which hopefully addresses all
> > review comments. Further commentary below.  
> 
> Thanks, great, looking into that one -- I see you're removing more and
> more special-case, strange code, replacing it with generic and/or
> well-explained code.
> 
> 
> Question, for my understanding:
> 
> > On Mon, 21 Oct 2019 16:14:11 +0200
> > Thomas Schwinge  wrote:  
> >> On 2019-10-03T09:35:04-0700, Julian Brown 
> >> wrote:  
> 
> >> > @@ -577,17 +551,14 @@ present_create_copy (unsigned f, void *h,
> >> > size_t s, int async)
> >>   
> >> > -  d = tgt->to_free;
> >>   
> >> > +  n = lookup_host (acc_dev, h, s);
> >> > +  assert (n != NULL);
> >> > +  d = (void *) (n->tgt->tgt_start + n->tgt_offset +
> >> > (uintptr_t) h
> >> > +- n->host_start);
> >> 
> >> |   return d;
> >> 
> >> Again, it's not obvious to me how that is semantically equivalent
> >> to what we've returned before?  
> >
> > This is a bug fix (it's mentioned in the ChangeLog).  
> 
> Eh, well hidden.  Indeed that mentions:
> 
>   (present_create_copy): [...] Fix target pointer
>   return value.
> 
> So that's not related to reference counting, needs to be discussed
> separately.
> 
> ..., and while I do agree that the current code is a bit "strange"
> (returning 'tgt->to_free'), I couldn't quickly find or come up with a
> test cases where this would actually do the wrong thing.  After all,
> this is the code path taken for "not present", and 'tgt' is built
> anew for one single mapping, with no alignment set (which would cause
> 'to_free' to differ from 'tgt_start'); 'tgt_offset' should always be
> zero, and 'h' always the same as 'host_start'.  What am I missing?
> That is, given the current set of libgomp test cases, the attached
> never triggeres.

The code can't stay exactly as it is with this patch, because the tgt
return value from gomp_map_vars_async with
GOMP_MAP_VARS_OPENACC_ENTER_DATA is a null pointer.

So, the device pointer calculation needed to be re-done -- although it's
not quite a bug fix, as you point out, and some of the offsets will
always be zero or cancel out in practice.

*However*, it looks like the device pointer calculation for the
"present" case is wrong in the preceding code. I've addressed that in
the patch posted here:

https://gcc.gnu.org/ml/gcc-patches/2019-11/msg00661.html

The patch attached here applies on top of that one, and attempts to
keep the device pointer calculation "the same" for the non-present
case, modulo an extra lookup_host -- and also adds some assertions to
make sure the assumptions about zero/cancelled-out offsets stay true.

OK for trunk? Re-tested with offloading to nvptx.

Thanks,

Julian
commit f486944db79d4c9b8f3d96453d72f34c4a9f0aab
Author: Julian Brown 
Date:   Mon Nov 5 15:51:46 2018 -0800

OpenACC reference count overhaul

2019-10-28  Julian Brown  
Thomas Schwinge  

libgomp/
* libgomp.h (struct splay_tree_key_s): Substitute dynamic_refcount
field for virtual_refcount.
(struct acc_dispatch_t): Remove data_environ field.
(struct gomp_device_descr): Update comment on openacc field.
(enum gomp_map_vars_kind): Add GOMP_MAP_VARS_OPENACC_ENTER_DATA.
(gomp_acc_insert_pointer, gomp_acc_remove_pointer, gomp_free_memmap):
Remove prototypes.
(gomp_remove_var_async): Add prototype.
* oacc-host.c (host_dispatch): Don't initialise removed data_environ
field.
* oacc-init.c (acc_shutdown_1): Iteratively call gomp_remove_var
instead of calling gomp_free_memmap.
* oacc-mem.c (lookup_dev_1): New function.
(lookup_dev): Reimplement using above.
(acc_free, acc_hostptr): Update calls to lookup_dev.
(acc_map_data): Likewise.  Don't add to data_environ list.
(acc_unmap_data): Remove call to gomp_unmap_vars.  Fix semantics to
remove mapping, but not mapped data.
(present_create_copy): Use virtual_refcount instead of
dynamic_refcount.  Don't manipulate data_environ.  Fix target pointer
return value.
(delete_copyout): Update for virtual_refcount semantics.  Use
goacc_remove_var_async for asynchronous delete/copyouts.
(gomp_acc_insert_pointer, gomp_acc_remove_pointer): Remove functions.
* oacc-parallel.c (find_pointer): Remove function.
(find_group_last, goacc_enter_data_internal,
goacc_exit_data_internal): New functions.
(GOACC_enter_exit_data): Use goacc_enter_data_internal and
goacc_exit_data_internal helper functions.
* target.c (gomp_map_vars_internal): Handle

Implement the part of C++20 p1032 Misc constexpr bits.

2019-11-08 Thread Smith-Rowland, Edward M
I'm going to implement p1032 in pieces.  It *is* miscellaneous after all ;-).

Tested on x96_64-linux? OK?
2019-11-09  Edward Smith-Rowland  <3dw...@verizon.net>

	Implement the  part of C++20 p1032 Misc constexpr bits.
	* include/std/array (fill, swap): Make constexpr.
	* testsuite/23_containers/array/requirements/constexpr_fill.cc: New.
	* testsuite/23_containers/array/requirements/constexpr_swap.cc: New.
Index: include/std/array
===
--- include/std/array	(revision 277944)
+++ include/std/array	(working copy)
@@ -112,11 +112,11 @@
   // No explicit construct/copy/destroy for aggregate type.
 
   // DR 776.
-  void
+  _GLIBCXX20_CONSTEXPR void
   fill(const value_type& __u)
   { std::fill_n(begin(), size(), __u); }
 
-  void
+  _GLIBCXX20_CONSTEXPR void
   swap(array& __other)
   noexcept(_AT_Type::_Is_nothrow_swappable::value)
   { std::swap_ranges(begin(), end(), __other.begin()); }
@@ -288,6 +288,7 @@
 
   // Specialized algorithms.
   template
+_GLIBCXX20_CONSTEXPR
 inline
 #if __cplusplus > 201402L || !defined(__STRICT_ANSI__) // c++1z or gnu++11
 // Constrained free swap overload, see p0185r1
@@ -295,7 +296,6 @@
   _GLIBCXX_STD_C::__array_traits<_Tp, _Nm>::_Is_swappable::value
 >::type
 #else
-_GLIBCXX20_CONSTEXPR
 void
 #endif
 swap(array<_Tp, _Nm>& __one, array<_Tp, _Nm>& __two)
Index: testsuite/23_containers/array/requirements/constexpr_fill.cc
===
--- testsuite/23_containers/array/requirements/constexpr_fill.cc	(nonexistent)
+++ testsuite/23_containers/array/requirements/constexpr_fill.cc	(working copy)
@@ -0,0 +1,36 @@
+// { dg-options "-std=gnu++2a" }
+// { dg-do compile { target c++2a } }
+//
+// Copyright (C) 2019 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING3.  If not see
+// .
+
+#include 
+
+constexpr bool
+test_array()
+{
+  auto ok = true;
+
+  std::array fa{};
+  fa.fill(3.333f);
+
+  ok = ok && (fa[0] == fa[2]);
+
+  return ok;
+}
+
+static_assert(test_array());
Index: testsuite/23_containers/array/requirements/constexpr_swap.cc
===
--- testsuite/23_containers/array/requirements/constexpr_swap.cc	(nonexistent)
+++ testsuite/23_containers/array/requirements/constexpr_swap.cc	(working copy)
@@ -0,0 +1,43 @@
+// { dg-options "-std=gnu++2a" }
+// { dg-do compile { target c++2a } }
+//
+// Copyright (C) 2019 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING3.  If not see
+// .
+
+#include 
+
+constexpr bool
+test_array()
+{
+  auto ok = true;
+
+  std::array fa{{1.1f, 2.2f, 3.3f}};
+
+  std::array fb{{4.4f, 5.5f, 6.6f}};
+
+  fb.swap(fa);
+
+  ok = ok && (fa[0] == 4.4f);
+
+  std::swap(fa, fb);
+
+  ok = ok && (fa[0] == 1.1f);
+
+  return ok;
+}
+
+static_assert(test_array());


[PATCH] libgomp/test: Add flags to find libatomic in build-tree testing

2019-11-08 Thread Maciej W. Rozycki
Add flags to find libatomic in build-tree testing, fixing a catastrophic 
libgomp testsuite failure with targets such as `riscv-linux-gnu' that 
imply `-latomic' with the `-pthread' GCC option, implied in turn by the 
offload options, removing failures like:

.../bin/riscv64-linux-gnu-ld: cannot find -latomic
collect2: error: ld returned 1 exit status
compiler exited with status 1
FAIL: libgomp.c/../libgomp.c-c++-common/atomic-18.c (test for excess errors)
Excess errors:
.../bin/riscv64-linux-gnu-ld: cannot find -latomic

UNRESOLVED: libgomp.c/../libgomp.c-c++-common/atomic-18.c compilation failed to 
produce executable

and bringing overall test results for the said target (here with the 
`x86_64-linux-gnu' host and RISC-V QEMU in the Linux user emulation mode 
as the target board) from:

=== libgomp Summary ===

# of expected passes90
# of unexpected failures3267
# of expected failures  2
# of unresolved testcases   3247
# of unsupported tests  548

to:

=== libgomp Summary ===

# of expected passes6834
# of unexpected failures4
# of expected failures  4
# of unsupported tests  518

libgomp/
* testsuite/lib/libgomp.exp (libgomp_init): Add flags to find 
libatomic in build-tree testing.
---
 libgomp/testsuite/lib/libgomp.exp |   10 ++
 1 file changed, 10 insertions(+)

gcc-test-libgomp-atomic-lib-path.diff
Index: gcc/libgomp/testsuite/lib/libgomp.exp
===
--- gcc.orig/libgomp/testsuite/lib/libgomp.exp
+++ gcc/libgomp/testsuite/lib/libgomp.exp
@@ -174,6 +174,16 @@ proc libgomp_init { args } {
 # For build-tree testing, also consider the library paths used for builing.
 # For installed testing, we assume all that to be provided in the sysroot.
 if { $blddir != "" } {
+   # Offload options imply `-pthread', and that implies `-latomic'
+   # on some targets, so wire in libatomic build directories.
+   set shlib_ext [get_shlib_extension]
+   set atomic_library_path "${blddir}/../libatomic/.libs"
+   if { [file exists "${atomic_library_path}/libatomic.a"]
+|| [file exists \
+"${atomic_library_path}/libatomic.${shlib_ext}"] } {
+   lappend ALWAYS_CFLAGS "additional_flags=-L${atomic_library_path}"
+   append always_ld_library_path ":${atomic_library_path}"
+   }
global cuda_driver_include
global cuda_driver_lib
if { $cuda_driver_include != "" } {


[PATCH] OpenACC "present" subarrays: runtime API return value and unmapping fixes

2019-11-08 Thread Julian Brown
Hi,

This patch fixes an issue I noticed when investigating an answer
for Thomas's question about device pointer return values in:

https://gcc.gnu.org/ml/gcc-patches/2019-10/msg02260.html

It looks to me like the return value for the present case is wrong in
the existing code: in case of a acc_pcopyin or similar call that refers
to a subarray of a larger block already mapped on the target, the
device pointer return value will be the start of the larger block, not
of the subarray being copied.

The attached patch corrects this issue, and also relaxes a restriction
on acc_delete, acc_copyout (etc.) to allow them to unmap/copyout
subarrays of a larger block already present on the target. There's no
particular reason to disallow that, as far as I can tell. This is
necessary to allow the new tests included with this patch to pass, and
a couple of existing "shouldfail" tests no longer fail, and have been
adjusted accordingly. It's still an error to try to copy data beyond
the bounds of a mapped block, and other existing tests cover those
cases.

The calculation for the return value for the non-present case of
present_create_copy has also been adjusted in anticipation of a new
version of the above-linked patch.

Tested with offloading to nvptx. OK for trunk?

Julian

ChangeLog

libgomp/
* oacc-mem.c (present_create_copy): Fix device pointer return value in
case of "present" subarray.  Use tgt->tgt_start instead of tgt->to_free
in non-present/create case.
(delete_copyout): Change error condition to detect only copies outside
of mapped block.  Adjust error message accordingly.
* testsuite/libgomp.oacc-c-c++-common/copyin-devptr-1.c: New test.
* testsuite/libgomp.oacc-c-c++-common/copyin-devptr-2.c: New test.
* testsuite/libgomp.oacc-c-c++-common/lib-20.c: Adjust expected error
message.
* testsuite/libgomp.oacc-c-c++-common/lib-23.c: Likewise.
* testsuite/libgomp.oacc-c-c++-common/lib-22.c: Allow test to pass now.
* testsuite/libgomp.oacc-c-c++-common/lib-30.c: Likewise.
commit 00607b06c8e506b0f0744a230856e1e8776633c3
Author: Julian Brown 
Date:   Thu Nov 7 14:24:49 2019 -0800

OpenACC "present" subarrays: runtime API return value and unmapping fixes

libgomp/
* oacc-mem.c (present_create_copy): Fix device pointer return value in
case of "present" subarray.  Use tgt->tgt_start instead of tgt->to_free
in non-present/create case.
(delete_copyout): Change error condition to fail only on copies outside
of mapped block.  Adjust error message accordingly.
* testsuite/libgomp.oacc-c-c++-common/copyin-devptr-1.c: New test.
* testsuite/libgomp.oacc-c-c++-common/copyin-devptr-2.c: New test.
* testsuite/libgomp.oacc-c-c++-common/lib-20.c: Adjust expected error
message.
* testsuite/libgomp.oacc-c-c++-common/lib-23.c: Likewise.
* testsuite/libgomp.oacc-c-c++-common/lib-22.c: Allow test to pass now.
* testsuite/libgomp.oacc-c-c++-common/lib-30.c: Likewise.

diff --git a/libgomp/oacc-mem.c b/libgomp/oacc-mem.c
index 2f271009fb8..0a41f11210c 100644
--- a/libgomp/oacc-mem.c
+++ b/libgomp/oacc-mem.c
@@ -535,7 +535,7 @@ present_create_copy (unsigned f, void *h, size_t s, int async)
   if (n)
 {
   /* Present. */
-  d = (void *) (n->tgt->tgt_start + n->tgt_offset);
+  d = (void *) (n->tgt->tgt_start + n->tgt_offset + h - n->host_start);
 
   if (!(f & FLAG_PRESENT))
 {
@@ -584,7 +584,7 @@ present_create_copy (unsigned f, void *h, size_t s, int async)
 
   gomp_mutex_lock (_dev->lock);
 
-  d = tgt->to_free;
+  d = (void *) tgt->tgt_start;
   tgt->prev = acc_dev->openacc.data_environ;
   acc_dev->openacc.data_environ = tgt;
 
@@ -669,7 +669,6 @@ acc_pcopyin (void *h, size_t s)
 static void
 delete_copyout (unsigned f, void *h, size_t s, int async, const char *libfnname)
 {
-  size_t host_size;
   splay_tree_key n;
   void *d;
   struct goacc_thread *thr = goacc_thread ();
@@ -703,13 +702,12 @@ delete_copyout (unsigned f, void *h, size_t s, int async, const char *libfnname)
   d = (void *) (n->tgt->tgt_start + n->tgt_offset
 		+ (uintptr_t) h - n->host_start);
 
-  host_size = n->host_end - n->host_start;
-
-  if (n->host_start != (uintptr_t) h || host_size != s)
+  if ((uintptr_t) h < n->host_start || (uintptr_t) h + s > n->host_end)
 {
+  size_t host_size = n->host_end - n->host_start;
   gomp_mutex_unlock (_dev->lock);
-  gomp_fatal ("[%p,%d] surrounds2 [%p,+%d]",
-		  (void *) n->host_start, (int) host_size, (void *) h, (int) s);
+  gomp_fatal ("[%p,+%d] outside mapped block [%p,+%d]",
+		  (void *) h, (int) s, (void *) n->host_start, (int) host_size);
 }
 
   if (n->refcount == REFCOUNT_INFINITY)
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/copyin-devptr-1.c 

Re: [PATCH 1/5] Libsanitizer: merge from trunk with merge.sh.

2019-11-08 Thread Eric Gallager
On 11/5/19, Jakub Jelinek  wrote:
> On Mon, Nov 04, 2019 at 04:10:27PM +0100, Martin Liska wrote:
>>
>> libsanitizer/ChangeLog:
>>
>> 2019-11-05  Martin Liska  
>>
>>  * all source files: Merge from upstream r375507.
>> ---
>>  libsanitizer/BlocksRuntime/Block.h|   59 +
>>  libsanitizer/BlocksRuntime/Block_private.h|  179 ++
>
> Do we really need this?

So, maybe we don't need this for the sanitizer itself, but if the
sanitizers now come with their own copy of the Blocks Runtime...
couldn't that be the solution as to where to take our blocks support
library for GCC proper from? So many previous discussions about adding
blocks support to GCC got derailed by the issue of licensing of the
Blocks Runtime, but if it's okay to include it as part of
libsanitizer, I'd say that should apply to the rest of GCC too...

>
>> --- a/libsanitizer/tsan/tsan_libdispatch.cpp
>> +++ b/libsanitizer/tsan/tsan_interceptors_libdispatch.cpp
>> @@ -1,4 +1,4 @@
>> -//===-- tsan_libdispatch.cpp
>> --===//
>> +//===-- tsan_interceptors_libdispatch.cpp
>> -===//
>>  //
>>  // Part of the LLVM Project, under the Apache License v2.0 with LLVM
>> Exceptions.
>>  // See https://llvm.org/LICENSE.txt for license information.
>> @@ -16,6 +16,7 @@
>>  #include "tsan_interceptors.h"
>>  #include "tsan_rtl.h"
>>
>> +#include "BlocksRuntime/Block.h"
>>  #include "tsan_dispatch_defs.h"
>>
>>  namespace __tsan {
>
> I mean, couldn't we wrap this Block.h include with #ifdef __BLOCKS__ or so
> as a local patch (at least for now)?

This is bug 78352 btw: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=78352

>
> Otherwise the patch series LGTM.
>
>   Jakub
>
>


Add C2x *_NORM_MAX constants to

2019-11-08 Thread Joseph Myers
C2x adds  constants FLT_NORM_MAX, DBL_NORM_MAX and
LDBL_NORM_MAX.  These are for the maximum "normalized" finite
floating-point number, where the given definition of normalized is
that all possible values with MANT_DIG significand digits (leading one
not zero) can be represented with that exponent.  The effect of that
definition is that these macros are the same as the corresponding MAX
macros for all formats except IBM long double, where the NORM_MAX
value has exponent 1 smaller than the MAX one so that all 106 digits
can be 1.

This patch adds those macros to GCC.  They are only defined for float,
double and long double; C2x does not include such macros for DFP
types, and while the integration of TS 18661-3 into C2x has not yet
occurred, the draft proposed text does not add them for the _FloatN /
_FloatNx types (where they would always be the same as the MAX
macros).

Bootstrapped with no regressions on x86_64-pc-linux-gnu.  Also tested
compilation of the new test for powerpc-linux-gnu to confirm the check
of LDBL_NORM_MAX in the IBM long double case does get properly
optimized out.  OK to commit?

gcc:
2019-11-09  Joseph Myers  

* ginclude/float.c [__STDC_VERSION__ > 201710L] (FLT_NORM_MAX,
DBL_NORM_MAX, LDBL_NORM_MAX): Define.
* real.c (get_max_float): Add norm_max argument.
* real.h (get_max_float): Update prototype.
* builtins.c (fold_builtin_interclass_mathfn): Update calls to
get_max_float.

gcc/c-family:
2019-11-09  Joseph Myers  

* c-cppbuiltin.c (builtin_define_float_constants): Also define
NORM_MAX constants.  Update call to get_max_float.
(LAZY_HEX_FP_VALUES_CNT): Update value to include NORM_MAX
constants.

gcc/d:
2019-11-09  Joseph Myers  

* d-target.cc (define_float_constants): Update call to
get_max_float.

gcc/testsuite:
2019-11-09  Joseph Myers  

* gcc.dg/c11-float-3.c, gcc.dg/c2x-float-1.c: New tests.

Index: gcc/builtins.c
===
--- gcc/builtins.c  (revision 277989)
+++ gcc/builtins.c  (working copy)
@@ -9039,7 +9039,7 @@ fold_builtin_interclass_mathfn (location_t loc, tr
mode = DFmode;
arg = fold_build1_loc (loc, NOP_EXPR, type, arg);
  }
-   get_max_float (REAL_MODE_FORMAT (mode), buf, sizeof (buf));
+   get_max_float (REAL_MODE_FORMAT (mode), buf, sizeof (buf), false);
real_from_string (, buf);
result = build_call_expr (isgr_fn, 2,
  fold_build1_loc (loc, ABS_EXPR, type, arg),
@@ -9063,7 +9063,7 @@ fold_builtin_interclass_mathfn (location_t loc, tr
mode = DFmode;
arg = fold_build1_loc (loc, NOP_EXPR, type, arg);
  }
-   get_max_float (REAL_MODE_FORMAT (mode), buf, sizeof (buf));
+   get_max_float (REAL_MODE_FORMAT (mode), buf, sizeof (buf), false);
real_from_string (, buf);
result = build_call_expr (isle_fn, 2,
  fold_build1_loc (loc, ABS_EXPR, type, arg),
@@ -9102,7 +9102,7 @@ fold_builtin_interclass_mathfn (location_t loc, tr
  }
arg = fold_build1_loc (loc, ABS_EXPR, type, arg);
 
-   get_max_float (REAL_MODE_FORMAT (mode), buf, sizeof (buf));
+   get_max_float (REAL_MODE_FORMAT (mode), buf, sizeof (buf), false);
real_from_string (, buf);
sprintf (buf, "0x1p%d", REAL_MODE_FORMAT (orig_mode)->emin - 1);
real_from_string (, buf);
Index: gcc/c-family/c-cppbuiltin.c
===
--- gcc/c-family/c-cppbuiltin.c (revision 277989)
+++ gcc/c-family/c-cppbuiltin.c (working copy)
@@ -259,11 +259,16 @@ builtin_define_float_constants (const char *name_p
   /* Since, for the supported formats, B is always a power of 2, we
  construct the following numbers directly as a hexadecimal
  constants.  */
-  get_max_float (fmt, buf, sizeof (buf));
+  get_max_float (fmt, buf, sizeof (buf), false);
 
   sprintf (name, "__%s_MAX__", name_prefix);
   builtin_define_with_hex_fp_value (name, type, decimal_dig, buf, fp_suffix, 
fp_cast);
 
+  get_max_float (fmt, buf, sizeof (buf), true);
+
+  sprintf (name, "__%s_NORM_MAX__", name_prefix);
+  builtin_define_with_hex_fp_value (name, type, decimal_dig, buf, fp_suffix, 
fp_cast);
+
   /* The minimum normalized positive floating-point number,
  b**(emin-1).  */
   sprintf (name, "__%s_MIN__", name_prefix);
@@ -1607,10 +1612,10 @@ struct GTY(()) lazy_hex_fp_value_struct
 };
 /* Number of the expensive to compute macros we should evaluate lazily.
Each builtin_define_float_constants invocation calls
-   builtin_define_with_hex_fp_value 4 times and builtin_define_float_constants
+   builtin_define_with_hex_fp_value 5 times and builtin_define_float_constants
is called for FLT, DBL, LDBL and up to NUM_FLOATN_NX_TYPES times for
FLTNN*.  */ 
-#define LAZY_HEX_FP_VALUES_CNT (4 * (3 + 

[PATCH] Fix ICE during gimple resimplification (PR tree-optimization/92401)

2019-11-08 Thread Jakub Jelinek
Hi!

On the following testcase we ICE, because gimple_resimplify3 is called
on a CONSTRUCTOR with 3 elements, which is fine, but it calls fold_ternary
which works only on expression codes with TREE_CODE_LENGTH of 3.

Fixed thusly, bootstrapped/regtested on x86_64-linux and i686-linux, ok for
trunk?

2019-11-09  Jakub Jelinek  

PR tree-optimization/92401
* gimple-match-head.c (gimple_resimplify1): Call const_unop only
if res_op->code is an expression with code length 1.
* gimple-match-head.c (gimple_resimplify2): Call const_binop only
if res_op->code is an expression with code length 2.
* gimple-match-head.c (gimple_resimplify3): Call fold_ternary only
if res_op->code is an expression with code length 3.

* g++.dg/opt/pr92401.C: New test.

--- gcc/gimple-match-head.c.jj  2019-11-07 17:56:21.983858406 +0100
+++ gcc/gimple-match-head.c 2019-11-08 09:18:26.650193537 +0100
@@ -191,7 +191,12 @@ gimple_resimplify1 (gimple_seq *seq, gim
 {
   tree tem = NULL_TREE;
   if (res_op->code.is_tree_code ())
-   tem = const_unop (res_op->code, res_op->type, res_op->ops[0]);
+   {
+ tree_code code = res_op->code;
+ if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code))
+ && TREE_CODE_LENGTH (code) == 1)
+   tem = const_unop (res_op->code, res_op->type, res_op->ops[0]);
+   }
   else
tem = fold_const_call (combined_fn (res_op->code), res_op->type,
   res_op->ops[0]);
@@ -252,8 +257,13 @@ gimple_resimplify2 (gimple_seq *seq, gim
 {
   tree tem = NULL_TREE;
   if (res_op->code.is_tree_code ())
-   tem = const_binop (res_op->code, res_op->type,
-  res_op->ops[0], res_op->ops[1]);
+   {
+ tree_code code = res_op->code;
+ if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code))
+ && TREE_CODE_LENGTH (code) == 2)
+   tem = const_binop (res_op->code, res_op->type,
+  res_op->ops[0], res_op->ops[1]);
+   }
   else
tem = fold_const_call (combined_fn (res_op->code), res_op->type,
   res_op->ops[0], res_op->ops[1]);
@@ -325,9 +335,14 @@ gimple_resimplify3 (gimple_seq *seq, gim
 {
   tree tem = NULL_TREE;
   if (res_op->code.is_tree_code ())
-   tem = fold_ternary/*_to_constant*/ (res_op->code, res_op->type,
-   res_op->ops[0], res_op->ops[1],
-   res_op->ops[2]);
+   {
+ tree_code code = res_op->code;
+ if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code))
+ && TREE_CODE_LENGTH (code) == 3)
+   tem = fold_ternary/*_to_constant*/ (res_op->code, res_op->type,
+   res_op->ops[0], res_op->ops[1],
+   res_op->ops[2]);
+   }
   else
tem = fold_const_call (combined_fn (res_op->code), res_op->type,
   res_op->ops[0], res_op->ops[1], res_op->ops[2]);
--- gcc/testsuite/g++.dg/opt/pr92401.C.jj   2019-11-08 09:07:48.465767281 
+0100
+++ gcc/testsuite/g++.dg/opt/pr92401.C  2019-11-08 09:26:02.778350689 +0100
@@ -0,0 +1,15 @@
+// PR tree-optimization/92401
+// { dg-do compile { target c++11 } }
+// { dg-options "-O2" }
+
+typedef float V __attribute__ ((__vector_size__ (4 * sizeof (float;
+
+V v;
+
+void
+foo ()
+{
+  int i;
+  for (i = 0; i < 11; ++i)
+v = V { 0.0f, 0.0f, (float) i, 0.0f };
+}

Jakub



[committed] Fix various spelling errors in cgraph, tree-ssa-strlen and ipa-sra

2019-11-08 Thread Jakub Jelinek
Hi!

While trying to read cgraph.h to find out how to enhance it to deal with
declare variant, I've noticed a couple of spelling errors, enough that I ran
aspell -c on a couple of sources (obviously, it flags a lot of issues which
are non-issues, so most of time I have to press I).
This only fixes spelling errors, e.g. the very first hunk contains some
weird unless at the end of the sentence, which I haven't changed.

Bootstrapped/regtested on x86_64-linux and i686-linux, committed to trunk as
obvious.

2019-11-09  Jakub Jelinek  

* symtab.c: Fix comment typos.
* cgraphunit.c: Likewise.
* cgraph.h: Likewise.
* cgraphclones.c: Likewise.
* cgraph.c: Likewise.
* varpool.c: Likewise.
* tree-ssa-strlen.c: Likewise.
* ipa-sra.c: Likewise.
(scan_expr_access, check_all_callers_for_issues): Fix typo
in a dump message.

--- gcc/symtab.c.jj 2019-10-30 10:49:37.193013424 +0100
+++ gcc/symtab.c2019-11-08 14:10:32.587346432 +0100
@@ -253,7 +253,7 @@ symbol_table::symtab_prevail_in_asm_name
   insert_to_assembler_name_hash (node, false);
 }
 
-/* Initalize asm name hash unless.  */
+/* Initialize asm name hash unless.  */
 
 void
 symbol_table::symtab_initialize_asm_name_hash (void)
@@ -551,7 +551,7 @@ symtab_node::dump_asm_name () const
 }
 
 /* Return ipa reference from this symtab_node to
-   REFERED_NODE or REFERED_VARPOOL_NODE. USE_TYPE specify type
+   REFERRED_NODE or REFERRED_VARPOOL_NODE. USE_TYPE specify type
of the use.  */
 
 ipa_ref *
@@ -563,7 +563,7 @@ symtab_node::create_reference (symtab_no
 
 
 /* Return ipa reference from this symtab_node to
-   REFERED_NODE or REFERED_VARPOOL_NODE. USE_TYPE specify type
+   REFERRED_NODE or REFERRED_VARPOOL_NODE. USE_TYPE specify type
of the use and STMT the statement (if it exists).  */
 
 ipa_ref *
@@ -732,7 +732,7 @@ symtab_node::remove_stmt_references (gim
 }
 
 /* Remove all stmt references in non-speculative references.
-   Those are not maintained during inlining & clonning.
+   Those are not maintained during inlining & cloning.
The exception are speculative references that are updated along
with callgraph edges associated with them.  */
 
@@ -1453,7 +1453,7 @@ symtab_node::ultimate_alias_target_1 (en
  availability prevails the availability of its target (i.e. static alias of
  weak definition is available.
 
- Transaparent alias is just alternative anme of a given symbol used within
+ Transparent alias is just alternative name of a given symbol used within
  one compilation unit and is translated prior hitting the object file.  It
  inherits the visibility of its target.
  Weakref is a different animal (and noweak definition is weak).
@@ -1670,7 +1670,7 @@ symtab_node::set_init_priority (priority
   h->init = priority;
 }
 
-/* Set fialization priority to PRIORITY.  */
+/* Set finalization priority to PRIORITY.  */
 
 void
 cgraph_node::set_fini_priority (priority_type priority)
@@ -1915,7 +1915,7 @@ enum symbol_partitioning_class
 symtab_node::get_partitioning_class (void)
 {
   /* Inline clones are always duplicated.
- This include external delcarations.   */
+ This include external declarations.   */
   cgraph_node *cnode = dyn_cast  (this);
 
   if (DECL_ABSTRACT_P (decl))
@@ -2040,7 +2040,7 @@ symtab_node::nonzero_address ()
 
If MEMORY_ACCESSED is true, assume that both memory pointer to THIS
and S2 is going to be accessed.  This eliminates the situations when
-   either THIS or S2 is NULL and is seful for comparing bases when deciding
+   either THIS or S2 is NULL and is useful for comparing bases when deciding
about memory aliasing.  */
 int
 symtab_node::equal_address_to (symtab_node *s2, bool memory_accessed)
@@ -2074,7 +2074,7 @@ symtab_node::equal_address_to (symtab_no
  code and are used only within speculation.  In this case we may make
  symbol equivalent to its alias even if interposition may break this
  rule.  Doing so will allow us to turn speculative inlining into
- non-speculative more agressively.  */
+ non-speculative more aggressively.  */
   if (DECL_VIRTUAL_P (this->decl) && avail1 >= AVAIL_AVAILABLE)
 binds_local1 = true;
   if (DECL_VIRTUAL_P (s2->decl) && avail2 >= AVAIL_AVAILABLE)
@@ -2130,7 +2130,7 @@ symtab_node::equal_address_to (symtab_no
 
   /* TODO: Alias oracle basically assume that addresses of global variables
  are different unless they are declared as alias of one to another while
- the code folding comparsions doesn't.
+ the code folding comparisons doesn't.
  We probably should be consistent and use this fact here, too, but for
  the moment return false only when we are called from the alias oracle.  */
 
@@ -2389,8 +2389,8 @@ symtab_node::output_to_lto_symbol_table_
 }
 
   /* We have real symbol that should be in symbol table.  However try to trim
- down the refernces to libraries bit more because linker 

Re: [PATCH, Fortran] Allow CHARACTER literals in assignments and DATA statements

2019-11-08 Thread Jakub Jelinek
On Fri, Nov 08, 2019 at 11:17:21AM +0100, Tobias Burnus wrote:
> >     Jim MacArthur 
> >     Mark Eggleston 
> > 
> >     PR fortran/89103
> >     * gfortran.dg/dec_char_conversion_in_assignment_1.f90: New test.
> >     * gfortran.dg/dec_char_conversion_in_assignment_2.f90: New test.
> >     * gfortran.dg/dec_char_conversion_in_assignment_3.f90: New test.
> >     * gfortran.dg/dec_char_conversion_in_assignment_4.f90: New test.
> >     * gfortran.dg/dec_char_conversion_in_assignment_5.f90: New test.
> >     * gfortran.dg/dec_char_conversion_in_assignment_6.f90: New test.
> >     * gfortran.dg/dec_char_conversion_in_assignment_7.f90: New test.
> >     * gfortran.dg/dec_char_conversion_in_assignment_8.f90: New test.
> >     * gfortran.dg/dec_char_conversion_in_data_1.f90: New test.
> >     * gfortran.dg/dec_char_conversion_in_data_2.f90: New test.
> >     * gfortran.dg/dec_char_conversion_in_data_3.f90: New test.
> >     * gfortran.dg/dec_char_conversion_in_data_4.f90: New test.
> >     * gfortran.dg/dec_char_conversion_in_data_5.f90: New test.
> >     * gfortran.dg/dec_char_conversion_in_data_6.f90: New test.
> >     * gfortran.dg/dec_char_conversion_in_data_7.f90: New test.
> >     * gfortran.dg/hollerith5.f90: Add -Wsurprising to options.
> >     * gfortran.dg/hollerith_legacy.f90: Add -Wsurprising to options.
> >     * gfortran.dg/no_char_to_numeric_assign.f90: New test.

After full bootstrap/regtest, I've also noticed:
+UNRESOLVED: gfortran.dg/dec_char_conversion_in_assignment_4.f90   -O0  
compilation failed to produce executable
+UNRESOLVED: gfortran.dg/dec_char_conversion_in_assignment_4.f90   -O1  
compilation failed to produce executable
+UNRESOLVED: gfortran.dg/dec_char_conversion_in_assignment_4.f90   -O2  
compilation failed to produce executable
+UNRESOLVED: gfortran.dg/dec_char_conversion_in_assignment_4.f90   -O3 
-fomit-frame-pointer -funroll-loops -fpeel-loops -ftracer -finline-functions  
compilation failed to produce executable
+UNRESOLVED: gfortran.dg/dec_char_conversion_in_assignment_4.f90   -O3 -g  
compilation failed to produce executable
+UNRESOLVED: gfortran.dg/dec_char_conversion_in_assignment_4.f90   -Os  
compilation failed to produce executable
+UNRESOLVED: gfortran.dg/dec_char_conversion_in_data_3.f90   -O0  compilation 
failed to produce executable
+UNRESOLVED: gfortran.dg/dec_char_conversion_in_data_3.f90   -O1  compilation 
failed to produce executable
+UNRESOLVED: gfortran.dg/dec_char_conversion_in_data_3.f90   -O2  compilation 
failed to produce executable
+UNRESOLVED: gfortran.dg/dec_char_conversion_in_data_3.f90   -O3 
-fomit-frame-pointer -funroll-loops -fpeel-loops -ftracer -finline-functions  
compilation failed to produce executable
+UNRESOLVED: gfortran.dg/dec_char_conversion_in_data_3.f90   -O3 -g  
compilation failed to produce executable
+UNRESOLVED: gfortran.dg/dec_char_conversion_in_data_3.f90   -Os  compilation 
failed to produce executable

Tests with dg-error are expected to fail compilation, so won't produce
executable and thus can't be run.

Fixed thusly, tested on x86_64-linux, committed to trunk as obvious.

2019-11-09  Jakub Jelinek  

* gfortran.dg/dec_char_conversion_in_assignment_4.f90: Use
dg-do compile instead of dg-do run.
* gfortran.dg/dec_char_conversion_in_data_3.f90: Likewise.

--- testsuite/gfortran.dg/dec_char_conversion_in_assignment_4.f90   
(revision 277993)
+++ testsuite/gfortran.dg/dec_char_conversion_in_assignment_4.f90   
(working copy)
@@ -1,4 +1,4 @@
-! { dg-do run }
+! { dg-do compile }
 ! { dg-options "-fdec -fno-dec-char-conversions" }
 !
 ! Modified by Mark Eggleston 
@@ -17,4 +17,3 @@ include "dec_char_conversion_in_assignme
 ! { dg-error "Cannot convert" " " { target *-*-* } 47 }
 ! { dg-error "Cannot convert" " " { target *-*-* } 48 }
 ! { dg-error "Cannot convert" " " { target *-*-* } 49 }
-
--- testsuite/gfortran.dg/dec_char_conversion_in_data_3.f90 (revision 
277993)
+++ testsuite/gfortran.dg/dec_char_conversion_in_data_3.f90 (working copy)
@@ -1,4 +1,4 @@
-! { dg-do run }
+! { dg-do compile }
 ! { dg-options "-fdec -fno-dec-char-conversions" }
 !
 ! Modified by Mark Eggleston 
@@ -17,4 +17,3 @@ include "dec_char_conversion_in_data_1.f
 ! { dg-error "Incompatible types" " " { target *-*-* } 68 }
 ! { dg-error "Incompatible types" " " { target *-*-* } 69 }
 ! { dg-error "Incompatible types" " " { target *-*-* } 70 }
-


Jakub



[RFC, libstdc++] Implement C++20 P1208R6 - source_location.

2019-11-08 Thread Ed Smith-Rowland via gcc-patches
As an experiment, I took a shot at implementing source_location for 
C++20.?? This was mostly done in experimental but I wanted to try adding 
column information.?? (The experimental version just returned 0).?? I 
added __builtin_COLUMN in analogy to __builtin_LINE.?? The std version is 
also consteval so you get different results in some cases wrt 
experimental. You can diff the two 1.cc test cases in libstdc++ to see 
for yourself.


As Jonathan mentioned on IRC, we probably want a single builtin and we 
want to coordinate the name with clang (__builtin_source_location?).?? 
But this "works" and it might make useful fodder for the next round.


Ed



gcc/ChangeLog

2019-11-08  Ed Smith-Rowland  <3dw...@verizon.net>

Implement C++20 P1208R6 - source_location.  Implement column with a
__builtin_COLUMN for both std and experimental.  The std current()
is consteval.
* builtins.c (fold_builtin_COLUMN): New function.
(fold_builtin_0): Use it.
* builtins.def: Add __builtin_COLUMN.
* doc/extend.texi: Doc __builtin_COLUMN.
* testsuite/c-c++-common/builtin_location.c: __builtin_COLUMN() tests.
* testsuite/c-c++-common/cpp/has-builtin-2.c: __builtin_COLUMN test.


libstdc++-v3/ChangeLog

2019-11-08  Ed Smith-Rowland  <3dw...@verizon.net>

Implement C++20 P1208R6 - source_location.  Implement column with a
__builtin_COLUMN for both std and experimental.  The std current()
is consteval.
* include/experimental/source_location: Call __builtin_COLUMN
* include/std/source_location: New header.
* include/std/version: Add 
* testsuite/20_util/source_location/1.cc: New test.
* libstdc++-v3/testsuite/experimental/source_location/1.cc: Test column.

Index: gcc/builtins.c
===
--- gcc/builtins.c	(revision 277745)
+++ gcc/builtins.c	(working copy)
@@ -9500,6 +9500,14 @@
   return build_int_cst (type, LOCATION_LINE (loc));
 }
 
+/* Fold a call to __builtin_COLUMN to an integer constant.  */
+
+static inline tree
+fold_builtin_COLUMN (location_t loc, tree type)
+{
+  return build_int_cst (type, LOCATION_COLUMN (loc));
+}
+
 /* Fold a call to built-in function FNDECL with 0 arguments.
This function returns NULL_TREE if no simplification was possible.  */
 
@@ -9519,6 +9527,9 @@
 case BUILT_IN_LINE:
   return fold_builtin_LINE (loc, type);
 
+case BUILT_IN_COLUMN:
+  return fold_builtin_COLUMN (loc, type);
+
 CASE_FLT_FN (BUILT_IN_INF):
 CASE_FLT_FN_FLOATN_NX (BUILT_IN_INF):
 case BUILT_IN_INFD32:
Index: gcc/builtins.def
===
--- gcc/builtins.def	(revision 277745)
+++ gcc/builtins.def	(working copy)
@@ -1048,6 +1048,7 @@
 DEF_GCC_BUILTIN (BUILT_IN_FILE, "FILE", BT_FN_CONST_STRING, ATTR_NOTHROW_LEAF_LIST)
 DEF_GCC_BUILTIN (BUILT_IN_FUNCTION, "FUNCTION", BT_FN_CONST_STRING, ATTR_NOTHROW_LEAF_LIST)
 DEF_GCC_BUILTIN (BUILT_IN_LINE, "LINE", BT_FN_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_GCC_BUILTIN (BUILT_IN_COLUMN, "COLUMN", BT_FN_INT, ATTR_NOTHROW_LEAF_LIST)
 
 /* Synchronization Primitives.  */
 #include "sync-builtins.def"
Index: gcc/doc/extend.texi
===
--- gcc/doc/extend.texi	(revision 277745)
+++ gcc/doc/extend.texi	(working copy)
@@ -13154,6 +13154,13 @@
 of the call to @var{F}.
 @end deftypefn
 
+@deftypefn {Built-in Function} int __builtin_COLUMN ()
+This function returns a constant integer expression that evaluates to
+the column number of the invocation of the built-in.  When used as a C++
+default argument for a function @var{F}, it returns the line number
+of the call to @var{F}.
+@end deftypefn
+
 @deftypefn {Built-in Function} {const char *} __builtin_FUNCTION ()
 This function is the equivalent of the @code{__FUNCTION__} symbol
 and returns an address constant pointing to the name of the function
Index: libstdc++-v3/include/experimental/source_location
===
--- libstdc++-v3/include/experimental/source_location	(revision 277745)
+++ libstdc++-v3/include/experimental/source_location	(working copy)
@@ -52,7 +52,7 @@
 current(const char* __file = __builtin_FILE(),
 	const char* __func = __builtin_FUNCTION(),
 	int __line = __builtin_LINE(),
-	int __col = 0) noexcept
+	int __col = __builtin_COLUMN()) noexcept
 {
   source_location __loc;
   __loc._M_file = __file;
Index: libstdc++-v3/include/std/source_location
===
--- libstdc++-v3/include/std/source_location	(nonexistent)
+++ libstdc++-v3/include/std/source_location	(working copy)
@@ -0,0 +1,95 @@
+//  -*- C++ -*-
+
+// Copyright (C) 2019 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can 

Re: introduce -fcallgraph-info option

2019-11-08 Thread Eric Gallager
On 11/8/19, Richard Biener  wrote:
> On Thu, 7 Nov 2019, Alexandre Oliva wrote:
>
>> On Nov  7, 2019, Richard Biener  wrote:
>>
>> > (also raises the question why we have both -dumpbase and -auxbase ...)
>>
>> https://gcc.gnu.org/ml/gcc-patches/2002-08/msg00294.html
>>
>> This was before -dumpdir, however.
>>
>> Here's the current logic for aux_base_name:
>>
>> -c or -S with -o [odir/]obase.oext: [odir/]obase
>> otherwise, given input [idir/]ibase.iext: ibase
>>
>> Whereas the current logic for dump_base_name, once aux_base_name has
>> been determined as [auxdir/]auxbase, is:
>>
>> given -dumpbase ddir/dbase: ddir/dbase
>> otherwise, given -dumpdir ddir and -dumpbase dbase: ddir/dbase
>> otherwise, given -dumpbase dbase: [auxdir/]dbase
>> otherwise, given -dumpdir ddir: ddir/ibase.iext
>> otherwise: [auxdir/]ibase.iext
>>
>> Relevant cases to consider: (aux, dump) for each compilation with
>> CC='gcc -fstack-usage -fdump-tree-original'
>>
>> compiling without -o: (ibase, ibase.iext)
>> ex $CC -c srcdir/foo.c srcdir/x/bar.c
>> -> foo.o foo.su foo.c.#t.original
>>  + bar.o bar.su bar.c.#t.original
>>
>> compiling with -o: ([odir/]obase, [odir/]ibase.iext)
>> ex $CC -c srcdir/foo.c -o objdir/foobaz.o -Dfoobaz
>> -> objdir/foobaz.o objdir/foobaz.su objdir/foo.c.#t.original
>>
>> compiling multiple sources with -dumpbase: (ibase, [ddir/]dbase)
>> ex $CC -dumpbase outdir/runme.dump -c srcdir/foo.c srcdir/x/bar.c
>> -> foo.o foo.su outdir/runme.dump.#t.original
>>  + bar.o bar.su outdir/runme.dump.#t.original (dupe)
>>
>> compiling and linking with -o: (ibase, ibase.iext)
>> ex $CC -o outdir/runme srcdir/foo.c srcdir/x/bar.c
>> -> /tmp/temp().o foo.su foo.c.#t.original
>>  + /tmp/temp().o bar.su bar.c.#t.original
>>  + outdir/runme
>>
>> lto-recompiling and linking with -o: (/tmp/obase.temp().ltrans#.ltrans,
>> odir/obase.ltrans#)
>> ex $CC -o outdir/runme ltobjdir/foo.o ltobjdir/bar.o -fdump-rtl-expand
>> -> /tmp/runme.temp().ltrans0.ltrans.o /tmp/runme.temp().ltrans0.ltrans.su
>>  + outdir/runme.ltrans0.#r.expand
>>  + outdir/runme
>>
>> lto-recompiling and linking without -o: (/tmp/temp().ltrans#.ltrans,
>> /tmp/temp().ltrans#.o)
>> ex $CC ltobjdir/foo.o ltobjdir/bar.o -fdump-rtl-expand
>> -> /tmp/temp().ltrans0.ltrans.o /tmp/temp().ltrans0.ltrans.su
>>  + /tmp/temp().ltrans0.#r.expand
>>  + a.out
>>
>>
>> If we were to unify auxbase and dumpbase, I'd take the opportunity to
>> fix the -o objdir/foobaz.o compilation to output dumps named after
>> objdir/foobaz or objdir/foobaz-foo.c rather than ./foo.c; for
>> outdir/runme.dump to be used as a prefix for aux and dump names, so that
>> we wouldn't create and then overwrite outdir/runme.dump, and so that
>> other compilations of foo.c and bar.c wouldn't overwrite the .su files,
>> but rather create outdir/runme.dump-{foo,bar}.* dumps and aux files; and
>> likewise use outdir/runme.ltrans0 or a.out.ltrans0 for the .su and
>> .expand files.
>>
>>
>> The logic I suggest is involves combining some of the -auxbase and some
>> of the -dumpbase logic, namely:
>>
>> In the driver:
>>
>> compiling a single source idir/ibase.iext:
>>
>>   -o odir/obase.oext specified: default -dumpdir odir -dumpbase
>> obase.iext
>>   -o obase.oext specified: default -dumpbase obase.iext
>>   -o ibase.oext implied: default -dumpbase ibase.iext
>>
>> compiling multiple sources named as ibase.iext for linking:
>>
>>   -dumpbase [ddir/]dbase specified: make it -dumpbase
>> [ddir/]dbase-ibase.iext
>>   -o odir/output specified: default -dumpdir odir -dumpbase
>> output-ibase.iext
>>   -o output specified: default -dumpbase output-ibase.iext
>>   -o a.out implied: default -dumpbase a.out-ibase.iext
>>
>> LTO recompiling:
>>
>>   same as above, with each ibase.iext set to ltrans#
>>
>>
>> In the compiler, set dump_base_name to:
>>
>> Given -dumpbase ddir/dbase: ddir/dbase
>> otherwise, given -dumpdir ddir and -dumpbase dbase: ddir/dbase
>> otherwise, given -dumpbase dbase: dbase
>>
>> and copy aux_base_name from dump_base_name, but if it ends in .iext,
>> drop the extension.
>>
>> The resulting behavior (aux_base_name, dump_base_name)
>>
>> compiling without -o: (ibase, ibase.iext)  unchanged
>> ex $CC -c srcdir/foo.c srcdir/x/bar.c
>> -> foo.o foo.su foo.c.#t.original
>>  + bar.o bar.su bar.c.#t.original
>>
>> compiling with -o: ([odir/]obase, [odir/]obase.iext)
>> ex $CC -c srcdir/foo.c -o objdir/foobaz.o -Dfoobaz
>> -> objdir/foobaz.o objdir/foobaz.su objdir/foobaz.c.#t.original
>>
>> compiling multiple sources with -dumpbase: ([ddir]/dbase, [ddir/]dbase)
>> ex $CC -dumpbase outdir/runme.dump -c srcdir/foo.c srcdir/x/bar.c
>> -> foo.o outdir/runme.dump-foo.su outdir/runme.dump-foo.c.#t.original
>>  + bar.o outdir/runme.dump-bar.su outdir/runme.dump-bar.c.#t.original
>>
>> compiling and linking with -o: (outdir/runme-ibase,
>> outdir/runme-ibase.iext)
>> ex $CC -o outdir/runme srcdir/foo.c srcdir/x/bar.c
>> -> /tmp/temp().o outdir/runme-foo.su outdir/runme-foo.c.#t.original
>>  + 

[Darwin, committed] Add include guard to darwin-protos.h

2019-11-08 Thread Iain Sandoe
The Darwin protos header is missing an include guard, this adds one.

tested on x86_64-darwin16
applied to mainline
thanks
Iain

gcc/ChangeLog:

2019-11-08  Iain Sandoe  

* config/darwin-protos.h: Add include quard.

diff --git a/gcc/config/darwin-protos.h b/gcc/config/darwin-protos.h
index afeca81..e6721c7 100644
--- a/gcc/config/darwin-protos.h
+++ b/gcc/config/darwin-protos.h
@@ -17,6 +17,9 @@ You should have received a copy of the GNU General Public 
License
 along with GCC; see the file COPYING3.  If not see
 .  */
 
+#ifndef CONFIG_DARWIN_PROTOS_H
+#define CONFIG_DARWIN_PROTOS_H
+
 extern void darwin_init_sections (void);
 extern int name_needs_quotes (const char *);
 
@@ -123,3 +126,5 @@ extern void darwin_override_options (void);
 extern void darwin_patch_builtins (void);
 extern void darwin_rename_builtins (void);
 extern bool darwin_libc_has_function (enum function_class fn_class);
+
+#endif /* CONFIG_DARWIN_PROTOS_H */



[PATCH] extend -Wstringop-overflow to allocated objects (PR 91582)

2019-11-08 Thread Martin Sebor

Unless it's used with _FORTIFY_SOURCE, -Wstringop-overflow
doesn't consider out-of-bounds accesses to objects allocated
by alloca, malloc, other functions declared with attribute
alloc_size, or even VLAs with variable bounds.  This was
a known limitation of the checks (done just before expansion)
relying on the the object size pass when they were introduced
in GCC 7.

But since its introduction in GCC 7, the warning has evolved
beyond some of the limitations of the object size pass.  Unlike
it, the warning considers non-constant offsets and stores with
non-constant sizes.  Attached is a simple enhancement that
(finally) adds the ability to also detect overflow in allocated
objects to the warning.

With the patch GCC detects the overflow in code like this:

  char* f (void)
  {
char s[] = "12345";
char *p = malloc (strlen (s));
strcpy (p, s);   // warning here
return p;
  }

but not (yet) in something like this:

  char* g (const char *s)
  {
char *p = malloc (strlen (s));
strcpy (p, s);   // no warning (yet)
return p;
  }

and quite a few other examples.  Doing better requires extending
the strlen pass.  I'm working on this extension and expect to
submit a patch before stage 1 ends.

Martin

PS I was originally planning to do all the allocation checking
in the strlen pass but it occurred to me that by also enhancing
the compute_objsize function, all warnings that use it will
benefit.  Besides -Wstringop-overflow this includes a subset
of -Warray-bounds, -Wformat-overflow, and -Wrestrict.  It's
nice when a small enhancement has such a broad positive effect.
PR middle-end/91582 - missing heap overflow detection for strcpy

gcc/ChangeLog:

	* builtins.c (gimple_call_alloc_size): New function.
	(compute_objsize): Add argument.  Call gimple_call_alloc_size.
	Handle variable offsets and indices.
	* builtins.h (gimple_call_alloc_size): Declare.
	(compute_objsize): Add argument.
	* tree-ssa-strlen.c (handle_store): Handle calls to allocated objects.

gcc/testsuite/ChangeLog:

	* c-c++-common/Wstringop-truncation.c: Remove xfails.
	* gcc/testsuite/g++.dg/ext/attr-alloc_size.C: Suppress -Warray-bounds.
	* gcc.dg/Wstringop-overflow-22.c: New test.
	* gcc/testsuite/gcc.dg/attr-alloc_size.c: Suppress -Warray-bounds.
	* gcc/testsuite/gcc.dg/attr-copy-2.c: Same.
	* gcc.dg/builtin-stringop-chk-5.c: Remove xfails.
	* gcc.dg/builtin-stringop-chk-8.c: Same.  Correct the text of expected
	warnings.
	* gcc.target/i386/pr82002-2a.c: Prune expected warning.
	* gcc.target/i386/pr82002-2b.c: Same.

Index: gcc/builtins.c
===
--- gcc/builtins.c	(revision 277978)
+++ gcc/builtins.c	(working copy)
@@ -3563,6 +3563,80 @@ check_access (tree exp, tree, tree, tree dstwrite,
   return true;
 }
 
+/* If STMT is a call to an allocation function, returns the size
+   of the object allocated by the call.  */
+
+tree
+gimple_call_alloc_size (gimple *stmt)
+{
+  if (!stmt)
+return NULL_TREE;
+
+  tree allocfntype;
+  if (tree fndecl = gimple_call_fndecl (stmt))
+allocfntype = TREE_TYPE (fndecl);
+  else
+allocfntype = gimple_call_fntype (stmt);
+
+  if (!allocfntype)
+return NULL_TREE;
+
+  unsigned argidx1 = UINT_MAX, argidx2 = UINT_MAX;
+  tree at = lookup_attribute ("alloc_size", TYPE_ATTRIBUTES (allocfntype));
+  if (!at)
+{
+  if (!gimple_call_builtin_p (stmt, BUILT_IN_ALLOCA_WITH_ALIGN))
+	return NULL_TREE;
+
+  argidx1 = 0;
+}
+
+  unsigned nargs = gimple_call_num_args (stmt);
+
+  if (argidx1 == UINT_MAX)
+{
+  tree atval = TREE_VALUE (at);
+  if (!atval)
+	return NULL_TREE;
+
+  argidx1 = TREE_INT_CST_LOW (TREE_VALUE (atval)) - 1;
+  if (nargs <= argidx1)
+	return NULL_TREE;
+
+  atval = TREE_CHAIN (atval);
+  if (atval)
+	{
+	  argidx2 = TREE_INT_CST_LOW (TREE_VALUE (atval)) - 1;
+	  if  (nargs <= argidx2)
+	return NULL_TREE;
+	}
+}
+
+  tree size = gimple_call_arg (stmt, argidx1);
+  tree n = argidx2 < nargs ? gimple_call_arg (stmt, argidx2) : integer_one_node;
+
+  /* To handle ranges do the math in wide_int and return the product
+ of the upper bounds as a constant.  Ignore anti-ranges.  */
+  wide_int rng1[2];
+  if (TREE_CODE (size) == INTEGER_CST)
+rng1[0] = rng1[1] = wi::to_wide (size);
+  else if (TREE_CODE (size) != SSA_NAME
+	   || get_range_info (size, rng1, rng1 + 1) != VR_RANGE)
+return NULL_TREE;
+
+  wide_int rng2[2];
+  if (TREE_CODE (n) == INTEGER_CST)
+rng2[0] = rng2[1] = wi::to_wide (n);
+  else if (TREE_CODE (n) != SSA_NAME
+	   || get_range_info (n, rng2 + 1, rng2 + 1) != VR_RANGE)
+return NULL_TREE;
+
+  const int prec = TYPE_PRECISION (sizetype);
+  rng1[1] = wide_int::from (rng1[1], prec, UNSIGNED);
+  rng2[1] = wide_int::from (rng2[1], prec, UNSIGNED);
+  return wide_int_to_tree (sizetype, rng1[1] * rng2[1]);
+}
+
 /* Helper to compute the size of the object referenced by the DEST
expression which must have pointer type, using 

[C++ COMMITTED] Add test for c++/92058

2019-11-08 Thread Marek Polacek
Another test for the constinit issue I fixed recently (c++/92134).

Tested on x86_64-linux, applying to trunk.

2019-11-08  Marek Polacek  

PR c++/92058 - constinit malfunction in static data member.
* g++.dg/cpp2a/constinit15.C: New test.

diff --git gcc/testsuite/g++.dg/cpp2a/constinit15.C 
gcc/testsuite/g++.dg/cpp2a/constinit15.C
new file mode 100644
index 000..32594334330
--- /dev/null
+++ gcc/testsuite/g++.dg/cpp2a/constinit15.C
@@ -0,0 +1,14 @@
+// PR c++/92058 - constinit malfunction in static data member.
+// { dg-do compile { target c++2a } }
+
+struct B {
+B() {}
+};
+
+struct A {
+constinit static inline B b1{}; // { dg-error "does not have a constant 
initializer|call to non-.constexpr. function" }
+};
+
+int main() {
+A a;
+}



C++ PATCH for c++/88337 - Implement P1327R1: Allow dynamic_cast in constexpr

2019-11-08 Thread Marek Polacek
After much weeping and gnashing of teeth, here's a patch to handle dynamic_cast
in constexpr evaluation.  While the change in the standard is trivial (see
), the
change in the compiler is less so.

When build_dynamic_cast realizes that a dynamic_cast needs a run-time check, it
generates a call to __dynamic_cast -- see dyncast.cc in libsupc++ for its
definition.  The gist of my approach is to evaluate such a call at compile time.

This should be easy in theory: let the constexpr machinery find out the dynamic
type and then handle a sidecast and upcast.  That's ultimately what the patch
is trying to do but there was a number of hindrances.

1) We can't use __dynamic_cast's type_info parameters, this type is not a
literal class.  But that means we have no idea what we're converting to!
I noticed that build_dynamic_cast_1 will create a cast via cp_convert
to the target type for both pointer/reference dynamic_cast.  So we can save
this type to the constexpr values hash map under a magic key; I abused
dynamic_cast_node for this...

2) [class.cdtor] says that when a dynamic_cast is used in a constructor or
destructor and the operand of the dynamic_cast refers to the object under
construction or destruction, this object is considered to be a most derived
object.  This was tricky, and the only thing that seemed to work was to add
a new member to constexpr_global_ctx.  I was happy to find out that I could
use new_obj I'd added recently.  Note that destruction is *not* handled at
all and in fact I couldn't even construct a testcase where that would make
a difference.

3) We can't rely on the hint __dynamic_cast gave us; the comment in
cxx_eval_dynamic_cast_fn explains why the accessible_base_p checks were
necessary.

There are many various scanarios regarding inheritance so special care was
devoted to test as much as possible, but testing the "dynamic_cast in
a constructor" could be expanded. 

This patch doesn't handle polymorphic typeid yet.  I think it will be easier
to review to separate these two.  Hopefully the typeid part will be much
easier.

Bootstrapped/regtested on x86_64-linux.

2019-11-08  Marek Polacek  

PR c++/88337 - Implement P1327R1: Allow dynamic_cast in constexpr.
* call.c (is_base_field_ref): No longer static.
* constexpr.c (struct constexpr_global_ctx): Add ctor_object member
and initialize it.
(cxx_dynamic_cast_fn_p): New function.
(cxx_eval_dynamic_cast_fn): Likewise.
(cxx_eval_call_expression): Call cxx_eval_dynamic_cast_fn for a call
to __dynamic_cast.  Save the object a constexpr constructor is
constructing.
(cxx_eval_constant_expression) : Save the target
type of a call to __dynamic_cast.
(potential_constant_expression_1): Don't give up on
cxx_dynamic_cast_fn_p.
* cp-tree.h (is_base_field_ref): Declare.
* parser.c (cp_parser_postfix_expression): Set location of expression.
* rtti.c (build_dynamic_cast_1): When creating a call to
__dynamic_cast, use the location of the original expression.

* g++.dg/cpp2a/constexpr-dynamic1.C: New test.
* g++.dg/cpp2a/constexpr-dynamic10.C: New test.
* g++.dg/cpp2a/constexpr-dynamic11.C: New test.
* g++.dg/cpp2a/constexpr-dynamic12.C: New test.
* g++.dg/cpp2a/constexpr-dynamic13.C: New test.
* g++.dg/cpp2a/constexpr-dynamic14.C: New test.
* g++.dg/cpp2a/constexpr-dynamic2.C: New test.
* g++.dg/cpp2a/constexpr-dynamic3.C: New test.
* g++.dg/cpp2a/constexpr-dynamic4.C: New test.
* g++.dg/cpp2a/constexpr-dynamic5.C: New test.
* g++.dg/cpp2a/constexpr-dynamic6.C: New test.
* g++.dg/cpp2a/constexpr-dynamic7.C: New test.
* g++.dg/cpp2a/constexpr-dynamic8.C: New test.
* g++.dg/cpp2a/constexpr-dynamic9.C: New test.

diff --git gcc/cp/call.c gcc/cp/call.c
index 0034c1cee0d..5de2aca1358 100644
--- gcc/cp/call.c
+++ gcc/cp/call.c
@@ -8193,7 +8193,7 @@ call_copy_ctor (tree a, tsubst_flags_t complain)
 
 /* Return true iff T refers to a base field.  */
 
-static bool
+bool
 is_base_field_ref (tree t)
 {
   STRIP_NOPS (t);
diff --git gcc/cp/constexpr.c gcc/cp/constexpr.c
index 20fddc57825..ef7706347bc 100644
--- gcc/cp/constexpr.c
+++ gcc/cp/constexpr.c
@@ -1025,8 +1025,11 @@ struct constexpr_global_ctx {
   /* Heap VAR_DECLs created during the evaluation of the outermost constant
  expression.  */
   auto_vec heap_vars;
+  /* For a constructor, this is the object we're constructing.  */
+  tree ctor_object;
   /* Constructor.  */
-  constexpr_global_ctx () : constexpr_ops_count (0) {}
+  constexpr_global_ctx () : constexpr_ops_count (0), ctor_object (NULL_TREE)
+{}
 };
 
 /* The constexpr expansion context.  CALL is the current function
@@ -1663,6 +1666,244 @@ is_std_allocator_allocate (tree fndecl)
   return decl_in_std_namespace_p (decl);
 }
 

Backports to gcc-9-branch

2019-11-08 Thread Jakub Jelinek
Hi!

I've backorted following 10 patches from trunk to 9 branch,
bootstrapped/regtested them on x86_64-linux and i686-linux and committed.

Jakub
2019-11-08  Jakub Jelinek  

Backported from mainline
2019-10-21  Jakub Jelinek  

PR c++/92015
* constexpr.c (cxx_eval_component_reference, cxx_eval_bit_field_ref):
Use STRIP_ANY_LOCATION_WRAPPER on CONSTRUCTOR elts.

* g++.dg/cpp0x/constexpr-92015.C: New test.

--- gcc/cp/constexpr.c  (revision 277266)
+++ gcc/cp/constexpr.c  (revision 277267)
@@ -2887,7 +2887,10 @@ cxx_eval_component_reference (const cons
  : field == part)
{
  if (value)
-   return value;
+   {
+ STRIP_ANY_LOCATION_WRAPPER (value);
+ return value;
+   }
  else
/* We're in the middle of initializing it.  */
break;
@@ -2977,6 +2980,7 @@ cxx_eval_bit_field_ref (const constexpr_
   FOR_EACH_CONSTRUCTOR_ELT (CONSTRUCTOR_ELTS (whole), i, field, value)
 {
   tree bitpos = bit_position (field);
+  STRIP_ANY_LOCATION_WRAPPER (value);
   if (bitpos == start && DECL_SIZE (field) == TREE_OPERAND (t, 1))
return value;
   if (TREE_CODE (TREE_TYPE (field)) == INTEGER_TYPE
--- gcc/testsuite/g++.dg/cpp0x/constexpr-92015.C(nonexistent)
+++ gcc/testsuite/g++.dg/cpp0x/constexpr-92015.C(revision 277267)
@@ -0,0 +1,7 @@
+// PR c++/92015
+// { dg-do compile { target c++11 } }
+
+struct S1 { char c[6] {'h', 'e', 'l', 'l', 'o', 0}; };
+struct S2 { char c[6] = "hello"; };
+static_assert (S1{}.c[0] == 'h', "");
+static_assert (S2{}.c[0] == 'h', "");
2019-11-08  Jakub Jelinek  

Backported from mainline
2019-10-22  Tamar Christina  

PR sanitizer/92154
* sanitizer_common/sanitizer_platform_limits_posix.cc:
Cherry-pick compiler-rt revision r375220.

--- libsanitizer/sanitizer_common/sanitizer_platform_limits_posix.cc
(revision 277290)
+++ libsanitizer/sanitizer_common/sanitizer_platform_limits_posix.cc
(revision 277291)
@@ -1126,8 +1126,12 @@ CHECK_SIZE_AND_OFFSET(ipc_perm, uid);
 CHECK_SIZE_AND_OFFSET(ipc_perm, gid);
 CHECK_SIZE_AND_OFFSET(ipc_perm, cuid);
 CHECK_SIZE_AND_OFFSET(ipc_perm, cgid);
-#if !defined(__aarch64__) || !SANITIZER_LINUX || __GLIBC_PREREQ (2, 21)
+#if (!defined(__aarch64__) || !SANITIZER_LINUX || __GLIBC_PREREQ (2, 21)) && \
+!defined(__arm__)
 /* On aarch64 glibc 2.20 and earlier provided incorrect mode field.  */
+/* On Arm glibc 2.31 and later provide a different mode field, this field is
+   never used by libsanitizer so we can simply ignore this assert for all glibc
+   versions.  */
 CHECK_SIZE_AND_OFFSET(ipc_perm, mode);
 #endif
 
2019-11-08  Jakub Jelinek  

Backported from mainline
2019-10-22  Jakub Jelinek  

PR tree-optimization/85887
* decl.c (expand_static_init): Drop ECF_LEAF from __cxa_guard_acquire
and __cxa_guard_release.

--- gcc/cp/decl.c   (revision 277292)
+++ gcc/cp/decl.c   (revision 277293)
@@ -8589,14 +8589,14 @@ expand_static_init (tree decl, tree init
  (acquire_name, build_function_type_list (integer_type_node,
   TREE_TYPE (guard_addr),
   NULL_TREE),
-  NULL_TREE, ECF_NOTHROW | ECF_LEAF);
+  NULL_TREE, ECF_NOTHROW);
  if (!release_fn || !abort_fn)
vfntype = build_function_type_list (void_type_node,
TREE_TYPE (guard_addr),
NULL_TREE);
  if (!release_fn)
release_fn = push_library_fn (release_name, vfntype, NULL_TREE,
-  ECF_NOTHROW | ECF_LEAF);
+ ECF_NOTHROW);
  if (!abort_fn)
abort_fn = push_library_fn (abort_name, vfntype, NULL_TREE,
ECF_NOTHROW | ECF_LEAF);
2019-11-08  Jakub Jelinek  

Backported from mainline
2019-10-27  Jakub Jelinek  

* locales.c (iso_3166): Add missing comma after "United-States".

--- gcc/ada/locales.c   (revision 277491)
+++ gcc/ada/locales.c   (revision 277492)
@@ -529,7 +529,7 @@ static char* iso_3166[] =
   "UM", "United States Minor Outlying Islands",
   "US", "United States",
   "US", "United States of America",
-  "US", "United-States"
+  "US", "United-States",
   "UY", "Uruguay",
   "UZ", "Uzbekistan",
 
2019-11-08  Jakub Jelinek  

Backported from mainline
2019-10-29  Jakub Jelinek  

* doc/install.texi (--enable-offload-targets): Fix up a typo in the
example, use actual names of supported offload targets.

--- gcc/doc/install.texi(revision 277551)
+++ gcc/doc/install.texi(revision 277552)
@@ -2158,7 +2158,7 @@ specifying paths @var{path1}, @dots{}, @
 
 

Re: [PATCH] Refactor rust-demangle to be independent of C++ demangling.

2019-11-08 Thread Ian Lance Taylor via gcc-patches
On Fri, Nov 8, 2019 at 11:17 AM Eduard-Mihai Burtescu  wrote:
>
> On Fri, Nov 8, 2019, at 7:43 PM, Ian Lance Taylor wrote:
> > On Fri, Nov 8, 2019 at 9:02 AM Eduard-Mihai Burtescu  wrote:
> > >
> > > Ping #2 for https://gcc.gnu.org/ml/gcc-patches/2019-10/msg01830.html
> > > Original patch (without the early exit optimization): 
> > > https://gcc.gnu.org/ml/gcc-patches/2019-10/msg01591.html
> >
> > Sorry for letting this slide.
> >
> > Do we need the CHECK_OR and ERROR_AND macros?  Is there anything like
> > those elsewhere in the libiberty or GCC sources?  I would rather than
> > have ordinary code than obscure macros.
>
> Good point, I was wondering about the macros but forgot to ask explicitly, 
> they're the least usual (for C) part of this code, they arose from porting 
> the demangler for the new format (most of which isn't even in the current 
> patch) from Rust, where we have facilities for conveniently propagating 
> errors, and I wanted to avoid using goto too much for this purpose.
>
> Looking at 
> https://gist.github.com/eddyb/c41a69378750a433767cf53fe2316768#file-rust-demangle-c
>  I can see:
> * 5 uses of ERROR_AND
> * 20 uses of CHECK_OR
>   * 7 of those are CHECK_OR (!rdm->errored, return ); which can just be if 
> (rdm->errored) return;
>
> So in the final code there'd be ~18 places that would need to set 
> rdm->errored = 1; (and then return or goto cleanup).
> That's not that bad, I guess, and I'd welcome any suggestions for how to 
> clean up that.
>
> For this current patch, however, there's only 3 uses total, so the macros are 
> definitely overkill.
> Assuming you'd want them removed, I took the liberty of doing that and here's 
> the fixed patch:
>
> 2019-10-22  Eduard-Mihai Burtescu  
> include/ChangeLog:
> * demangle.h (rust_demangle_callback): Add.
> libiberty/ChangeLog:
> * cplus-dem.c (cplus_demangle): Use rust_demangle directly.
> (rust_demangle): Remove.
> * rust-demangle.c (is_prefixed_hash): Rename to 
> is_legacy_prefixed_hash.
> (parse_lower_hex_nibble): Rename to decode_lower_hex_nibble.
> (parse_legacy_escape): Rename to decode_legacy_escape.
> (rust_is_mangled): Remove.
> (struct rust_demangler): Add.
> (peek): Add.
> (next): Add.
> (struct rust_mangled_ident): Add.
> (parse_ident): Add.
> (rust_demangle_sym): Remove.
> (print_str): Add.
> (PRINT): Add.
> (print_ident): Add.
> (rust_demangle_callback): Add.
> (struct str_buf): Add.
> (str_buf_reserve): Add.
> (str_buf_append): Add.
> (str_buf_demangle_callback): Add.
> (rust_demangle): Add.
> * rust-demangle.h: Remove.

This is OK.

Thanks.

Ian


Re: [PATCH] Refactor rust-demangle to be independent of C++ demangling.

2019-11-08 Thread Eduard-Mihai Burtescu
On Fri, Nov 8, 2019, at 7:43 PM, Ian Lance Taylor wrote:
> On Fri, Nov 8, 2019 at 9:02 AM Eduard-Mihai Burtescu  wrote:
> >
> > Ping #2 for https://gcc.gnu.org/ml/gcc-patches/2019-10/msg01830.html
> > Original patch (without the early exit optimization): 
> > https://gcc.gnu.org/ml/gcc-patches/2019-10/msg01591.html
> 
> Sorry for letting this slide.
> 
> Do we need the CHECK_OR and ERROR_AND macros?  Is there anything like
> those elsewhere in the libiberty or GCC sources?  I would rather than
> have ordinary code than obscure macros.

Good point, I was wondering about the macros but forgot to ask explicitly, 
they're the least usual (for C) part of this code, they arose from porting the 
demangler for the new format (most of which isn't even in the current patch) 
from Rust, where we have facilities for conveniently propagating errors, and I 
wanted to avoid using goto too much for this purpose.

Looking at 
https://gist.github.com/eddyb/c41a69378750a433767cf53fe2316768#file-rust-demangle-c
 I can see:
* 5 uses of ERROR_AND
* 20 uses of CHECK_OR
  * 7 of those are CHECK_OR (!rdm->errored, return ); which can just be if 
(rdm->errored) return;

So in the final code there'd be ~18 places that would need to set rdm->errored 
= 1; (and then return or goto cleanup).
That's not that bad, I guess, and I'd welcome any suggestions for how to clean 
up that.

For this current patch, however, there's only 3 uses total, so the macros are 
definitely overkill.
Assuming you'd want them removed, I took the liberty of doing that and here's 
the fixed patch:

2019-10-22  Eduard-Mihai Burtescu  
include/ChangeLog:
* demangle.h (rust_demangle_callback): Add.
libiberty/ChangeLog:
* cplus-dem.c (cplus_demangle): Use rust_demangle directly.
(rust_demangle): Remove.
* rust-demangle.c (is_prefixed_hash): Rename to is_legacy_prefixed_hash.
(parse_lower_hex_nibble): Rename to decode_lower_hex_nibble.
(parse_legacy_escape): Rename to decode_legacy_escape.
(rust_is_mangled): Remove.
(struct rust_demangler): Add.
(peek): Add.
(next): Add.
(struct rust_mangled_ident): Add.
(parse_ident): Add.
(rust_demangle_sym): Remove.
(print_str): Add.
(PRINT): Add.
(print_ident): Add.
(rust_demangle_callback): Add.
(struct str_buf): Add.
(str_buf_reserve): Add.
(str_buf_append): Add.
(str_buf_demangle_callback): Add.
(rust_demangle): Add.
* rust-demangle.h: Remove.

diff --git a/include/demangle.h b/include/demangle.h
index 06c32571d5c..ce7235d13f3 100644
--- a/include/demangle.h
+++ b/include/demangle.h
@@ -159,6 +159,11 @@ ada_demangle (const char *mangled, int options);
 extern char *
 dlang_demangle (const char *mangled, int options);
 
+extern int
+rust_demangle_callback (const char *mangled, int options,
+demangle_callbackref callback, void *opaque);
+
+
 extern char *
 rust_demangle (const char *mangled, int options);
 
diff --git a/libiberty/cplus-dem.c b/libiberty/cplus-dem.c
index a39e2bf2ed4..735a61d7a82 100644
--- a/libiberty/cplus-dem.c
+++ b/libiberty/cplus-dem.c
@@ -52,7 +52,6 @@ void * realloc ();
 #define CURRENT_DEMANGLING_STYLE options
 
 #include "libiberty.h"
-#include "rust-demangle.h"
 
 enum demangling_styles current_demangling_style = auto_demangling;
 
@@ -160,27 +159,20 @@ cplus_demangle (const char *mangled, int options)
   if ((options & DMGL_STYLE_MASK) == 0)
 options |= (int) current_demangling_style & DMGL_STYLE_MASK;
 
+  /* The Rust demangling is implemented elsewhere.
+ Legacy Rust symbols overlap with GNU_V3, so try Rust first.  */
+  if (RUST_DEMANGLING || AUTO_DEMANGLING)
+{
+  ret = rust_demangle (mangled, options);
+  if (ret || RUST_DEMANGLING)
+return ret;
+}
+
   /* The V3 ABI demangling is implemented elsewhere.  */
-  if (GNU_V3_DEMANGLING || RUST_DEMANGLING || AUTO_DEMANGLING)
+  if (GNU_V3_DEMANGLING || AUTO_DEMANGLING)
 {
   ret = cplus_demangle_v3 (mangled, options);
-  if (GNU_V3_DEMANGLING)
-   return ret;
-
-  if (ret)
-   {
- /* Rust symbols are GNU_V3 mangled plus some extra subtitutions.
-The subtitutions are always smaller, so do in place changes.  */
- if (rust_is_mangled (ret))
-   rust_demangle_sym (ret);
- else if (RUST_DEMANGLING)
-   {
- free (ret);
- ret = NULL;
-   }
-   }
-
-  if (ret || RUST_DEMANGLING)
+  if (ret || GNU_V3_DEMANGLING)
return ret;
 }
 
@@ -204,27 +196,6 @@ cplus_demangle (const char *mangled, int options)
   return (ret);
 }
 
-char *
-rust_demangle (const char *mangled, int options)
-{
-  /* Rust symbols are GNU_V3 mangled plus some extra subtitutions.  */
-  char *ret = cplus_demangle_v3 (mangled, options);
-
-  /* The Rust subtitutions are always smaller, so do in place changes.  */
-  if (ret != 

Re: [PATCH, GCC] Fix unrolling check.

2019-11-08 Thread Eric Botcazou
> I was fiddling around with the loop unrolling pass and noticed a check 
> in decide_unroll_* functions (in the patch). The comment on top of this 
> check says
> "/* If we were not asked to unroll this loop, just return back silently. 
>   */"
> However the check returns when loop->unroll == 0 rather than 1.
> 
> The check was added in r255106 where the ChangeLog suggests that the 
> actual intention was probably to check the value 1 and not 0.

No, this is intended, 0 is the default value of the field, not 1.  And note 
that decide_unroll_constant_iterations, decide_unroll_runtime_iterations and 
decide_unroll_stupid *cannot* be called with loop->unroll == 1 because of this 
check in decide_unrolling:

  if (loop->unroll == 1)
{
  if (dump_file)
fprintf (dump_file,
 ";; Not unrolling loop, user didn't want it unrolled\n");
  continue;
}

> Tested on aarch64-none-elf with one new regression:
> FAIL: gcc.dg/pr40209.c (test for excess errors)
> This fails because the changes cause the loop to unroll 3 times using 
> unroll_stupid and that shows up as excess error due -fopt-info. This 
> option was added in r202077 but I am not sure why this particular test 
> was chosen for it.

That's a regression, there should be no unrolling.

-- 
Eric Botcazou


[COMMITTED] Change fold_range() and wi_fold() to return the result via reference parameter.

2019-11-08 Thread Andrew MacLeod
The call to range_operator::fold_range() and wi_fold() originally 
returned the resulting range in a reference parameter.  When prepping 
for trunk,  we got excited about something else and I changed it to 
return the result by value.


As we move towards multiple sub-ranges in value_range, I  recalled the 
rationale for the reference parameter was to allow us to better support 
a variable number of sub-ranges.   range-ops will work with as many 
subranges as are available, and this allows the caller to provide a 
range object with the desired number.       It also makes fold_range a 
bit more consistent with the way op1_range and op2_range work.


The fundamental change is moving from:
    virtual value_range fold_range (tree type, const value_range , 
const value_range ) const;

to
    virtual void fold_range (value_range , tree type, const 
value_range , const value_range ) const;


and likewise for wi_fold.

The change is quite mechanical.  Bootstraps all languages and causes no 
regressions.


Checked in as SVN revision 277979

Andrew




Change range_operator methods 'fold_range' and 'wi_fold' to return the result
range by a reference parameter instead of by value.

2019-11-08  Andrew MacLeod 

	* range-op.h (range_operator::fold_range): Return result in a
	reference parameter instead of by value.
	(range_operator::wi_fold): Same.
	* range-op.cc (range_operator::wi_fold): Return result in a reference
	parameter instead of by value.
	(range_operator::fold_range): Same.
	(value_range_from_overflowed_bounds): Same.
	(value_range_with_overflow): Same
	(create_possibly_reversed_range): Same.
	(operator_equal::fold_range): Same.
	(operator_not_equal::fold_range): Same.
	(operator_lt::fold_range): Same.
	(operator_le::fold_range): Same.
	(operator_gt::fold_range): Same.
	(operator_ge::fold_range): Same.
	(operator_plus::wi_fold): Same.
	(operator_plus::op1_range): Change call to fold_range.
	(operator_plus::op2_range): Change call to fold_range.
	(operator_minus::wi_fold): Return result via reference parameter.
	(operator_minus::op1_range): Change call to fold_range.
	(operator_minus::op2_range): Change call to fold_range.
	(operator_min::wi_fold): Return result via reference parameter.
	(operator_max::wi_fold): Same.
	(cross_product_operator::wi_cross_product): Same.
	(operator_mult::wi_fold): Same.
	(operator_div::wi_fold): Same.
	(operator_div op_floor_div): Fix whitespace.
	(operator_exact_divide::op1_range): Change call to fold_range.
	(operator_lshift::fold_range): Return result via reference parameter.
	(operator_lshift::wi_fold): Same.
	(operator_rshift::fold_range): Same.
	(operator_rshift::wi_fold): Same.
	(operator_cast::fold_range): Same.
	(operator_cast::op1_range): Change calls to fold_range.
	(operator_logical_and::fold_range): Return result via reference.
	(wi_optimize_and_or): Adjust call to value_range_with_overflow.
	(operator_bitwise_and::wi_fold): Return result via reference.
	(operator_logical_or::fold_range): Same.
	(operator_bitwise_or::wi_fold): Same.
	(operator_bitwise_xor::wi_fold): Same.
	(operator_trunc_mod::wi_fold): Same.
	(operator_logical_not::fold_range): Same.
	(operator_bitwise_not::fold_range): Same.
	(operator_bitwise_not::op1_range): Change call to fold_range.
	(operator_cst::fold_range): Return result via reference.
	(operator_identity::fold_range): Same.
	(operator_abs::wi_fold): Same.
	(operator_absu::wi_fold): Same.
	(operator_negate::fold_range): Same.
	(operator_negate::op1_range): Change call to fold_range.
	(operator_addr_expr::fold_range): Return result via reference.
	(operator_addr_expr::op1_range): Change call to fold_range.
	(operator_pointer_plus::wi_fold): Return result via reference.
	(operator_pointer_min_max::wi_fold): Same.
	(operator_pointer_and::wi_fold): Same.
	(operator_pointer_or::wi_fold): Same.
	(range_op_handler): Change call to fold_range.
	(range_cast): Same.
	* tree-vrp.c (range_fold_binary_symbolics_p): Change call to
	fold_range.
	(range_fold_unary_symbolics_p): Same.
	(range_fold_binary_expr): Same.
	(range_fold_unary_expr): Same.


Index: range-op.h
===
*** range-op.h	(revision 277853)
--- range-op.h	(working copy)
*** class range_operator
*** 50,58 
  {
  public:
// Perform an operation between 2 ranges and return it.
!   virtual value_range fold_range (tree type,
!   const value_range ,
!   const value_range ) const;
  
// Return the range for op[12] in the general case.  LHS is the range for
// the LHS of the expression, OP[12]is the range for the other
--- 50,58 
  {
  public:
// Perform an operation between 2 ranges and return it.
!   virtual void fold_range (value_range , tree type,
! 			   const value_range ,
! 			   const value_range ) const;
  
// Return the range for op[12] in the general case.  LHS is the range for
// the LHS of the expression, OP[12]is the range for the other
*** public:
*** 74,84 

[C++ PATCH] Opt out of GNU vector extensions for built-in SVE types

2019-11-08 Thread Richard Sandiford
This is the C++ equivalent of r277950, which prevented the
use of the GNU vector extensions with SVE vector types for C.
[https://gcc.gnu.org/viewcvs/gcc?view=revision=277950].
I've copied the rationale below for reference.

The changes here are very similar to the C ones.  Perhaps the only
noteworthy thing (that I know of) is that the patch continues to treat
!gnu_vector_type_p vector types as literal types/potential constexprs.
Disabling the GNU vector extensions shouldn't in itself stop the types
from being literal types, since whatever the target provides instead
might be constexpr material.

Tested on aarch64-linux-gnu and x86_64-linux-gnu.  OK to install?

Richard

-
The AArch64 port defines built-in SVE types at start-up under names
like __SVInt8_t.  These types are represented in the front end and
gimple as normal VECTOR_TYPEs and are code-generated as normal vectors.
However, we'd like to stop the frontends from treating them in the
same way as GNU-style ("vector_size") vectors, for several reasons:

(1) We allowed the GNU vector extensions to be mixed with Advanced SIMD
vector types and it ended up causing a lot of confusion on big-endian
targets.  Although SVE handles big-endian vectors differently from
Advanced SIMD, there are still potential surprises; see the block
comment near the head of aarch64-sve.md for details.

(2) One of the SVE vectors is a packed one-bit-per-element boolean vector.
That isn't a combination the GNU vector extensions have supported
before.  E.g. it means that vectors can no longer decompose to
arrays for indexing, and that not all elements are individually
addressable.  It also makes it less clear which order the initialiser
should be in (lsb first, or bitfield ordering?).  We could define
all that of course, but it seems a bit weird to go to the effort
for this case when, given all the other reasons, we don't want the
extensions anyway.

(3) The GNU vector extensions only provide full-vector operations,
which is a very artifical limitation on a predicated architecture
like SVE.

(4) The set of operations provided by the GNU vector extensions is
relatively small, whereas the SVE intrinsics provide many more.

(5) It makes it easier to ensure that (with default options) code is
portable between compilers without the GNU vector extensions having
to become an official part of the SVE intrinsics spec.

(6) The length of the SVE types is usually not fixed at compile time,
whereas the GNU vector extension is geared around fixed-length
vectors.

It's possible to specify the length of an SVE vector using the
command-line option -msve-vector-bits=N, but in principle it should
be possible to have functions compiled for different N in the same
translation unit.  This isn't supported yet but would be very useful
for implementing ifuncs.  Once mixing lengths in a translation unit
is supported, the SVE types should represent the same type throughout
the translation unit, just as GNU vector types do.

However, when -msve-vector-bits=N is in effect, we do allow conversions
between explicit GNU vector types of N bits and the corresponding SVE
types.  This doesn't undermine the intent of (5) because in this case
the use of GNU vector types is explicit and intentional.  It also doesn't
undermine the intent of (6) because converting between the types is just
a conditionally-supported operation.  In other words, the types still
represent the same types throughout the translation unit, it's just that
conversions between them are valid in cases where a certain precondition
is known to hold.  It's similar to the way that the SVE vector types are
defined throughout the translation unit but can only be used in functions
for which SVE is enabled.
-


2019-11-08  Richard Sandiford  

gcc/cp/
* cp-tree.h (CP_AGGREGATE_TYPE_P): Check for gnu_vector_type_p
instead of VECTOR_TYPE.
* call.c (build_conditional_expr_1): Restrict vector handling
to vectors that satisfy gnu_vector_type_p.  Don't treat the
"then" and "else" types as equivalent if they have the same
vector shape but differ in whether they're GNU vectors.
* cvt.c (ocp_convert): Only allow vectors to be converted
to bool if they satisfy gnu_vector_type_p.
(build_expr_type_conversion): Only allow conversions from
vectors if they satisfy gnu_vector_type_p.
* typeck.c (cp_build_binary_op): Only allow binary operators to be
applied to vectors if they satisfy gnu_vector_type_p.
(cp_build_unary_op): Likewise unary operators.
(build_reinterpret_cast_1):

gcc/testsuite/
* g++.target/aarch64/sve/acle/general-c++/gnu_vectors_1.C: New test.
* 

Re: [PATCH] Refactor rust-demangle to be independent of C++ demangling.

2019-11-08 Thread Ian Lance Taylor via gcc-patches
On Fri, Nov 8, 2019 at 9:02 AM Eduard-Mihai Burtescu  wrote:
>
> Ping #2 for https://gcc.gnu.org/ml/gcc-patches/2019-10/msg01830.html
> Original patch (without the early exit optimization): 
> https://gcc.gnu.org/ml/gcc-patches/2019-10/msg01591.html

Sorry for letting this slide.

Do we need the CHECK_OR and ERROR_AND macros?  Is there anything like
those elsewhere in the libiberty or GCC sources?  I would rather than
have ordinary code than obscure macros.

Ian


> On Wed, Oct 30, 2019, at 6:46 PM, Eduard-Mihai Burtescu wrote:
> > Ping: https://gcc.gnu.org/ml/gcc-patches/2019-10/msg01830.html
> > Original patch (without the early exit optimization):
> > https://gcc.gnu.org/ml/gcc-patches/2019-10/msg01591.html
> >
> > Thanks,
> > - Eddy B.
> >
> > On Fri, Oct 25, 2019, at 3:44 PM, Eduard-Mihai Burtescu wrote:
> > > > This can be further optimized by using memcmp in place of strncmp, 
> > > > since from
> > > > the length check you know that you won't see the null terminator among 
> > > > the three
> > > > chars you're checking.
> > >
> > > Fair enough, here's the combined changelog/diff, with memcmp:


Re: [PATCH rs6000]Fix PR92132

2019-11-08 Thread Segher Boessenkool
Hi!

On Fri, Nov 08, 2019 at 10:38:13AM +0800, Kewen.Lin wrote:
> >> +  [(set (match_operand: 0 "vint_operand")
> >> +   (match_operator 1 "comparison_operator"
> > 
> > If you make an iterator for this instead, it is simpler code (you can then
> > use  to do all these cases in one statement).
> 
> If my understanding is correct and based on some tries before, I think we
> have to leave these **CASEs** there (at least at the 1st level define_expand
> for vec_cmp*), since vec_cmp* doesn't have  field in the pattern name.
> The code can be only extracted from operator 1.  I tried to add one dummy
> operand to hold  but it's impractical.
> 
> Sorry, I may miss something here, I'm happy to make a subsequent patch to
> uniform these cases if there is a good way to run a code iterator on them.

Instead of

  [(set (match_operand:VEC_I 0 "vint_operand")
(match_operator 1 "signed_or_equality_comparison_operator"
  [(match_operand:VEC_I 2 "vint_operand")
   (match_operand:VEC_I 3 "vint_operand")]))]

you can do

  [(set (match_operand:VEC_I 0 "vint_operand")
(some_iter:VEC_I (match_operand:VEC_I 1 "vint_operand")
 (match_operand:VEC_I 2 "vint_operand")))]

with some_iter some code_iterator, (note you need to renumber), and in the
body you can then just use  (or , or some other code_attribute).

code_iterator is more flexible than match_operator, in most ways.


Segher


[PATCH][GCC][arm] Add CLI and multilib support for Armv8.1-M Mainline MVE extensions

2019-11-08 Thread Mihail Ionescu
Hi,

This patch adds CLI and multilib support for Armv8.1-M MVE to the Arm backend.
Two new option added for v8.1-m.main: "+mve" for integer MVE instructions only
and "+mve.fp" for both integer and single-precision/half-precision
floating-point MVE.
The patch also maps the Armv8.1-M multilib variants to the corresponding v8-M 
ones.



gcc/ChangeLog:

2019-11-08  Mihail Ionescu  
2019-11-08  Andre Vieira  

* config/arm/arm-cpus.in (mve, mve_float): New features.
(dsp, mve, mve.fp): New options.
* config/arm/arm.h (TARGET_HAVE_MVE, TARGET_HAVE_MVE_FLOAT): Define.
* config/arm/t-rmprofile: Map v8.1-M multilibs to v8-M.


gcc/testsuite/ChangeLog:

2019-11-08  Mihail Ionescu  
2019-11-08  Andre Vieira  

* testsuite/gcc.target/arm/multilib.exp: Add v8.1-M entries.


Is this ok for trunk?

Best regards,

Mihail


### Attachment also inlined for ease of reply###


diff --git a/gcc/config/arm/arm-cpus.in b/gcc/config/arm/arm-cpus.in
index 
59aad8f62ee5186cc87d3cefaf40ba2ce049012d..c2f016c75e2d8dd06890295321232bef61cbd234
 100644
--- a/gcc/config/arm/arm-cpus.in
+++ b/gcc/config/arm/arm-cpus.in
@@ -194,6 +194,10 @@ define feature sb
 # v8-A architectures, added by default from v8.5-A
 define feature predres
 
+# M-profile Vector Extension feature bits
+define feature mve
+define feature mve_float
+
 # Feature groups.  Conventionally all (or mostly) upper case.
 # ALL_FPU lists all the feature bits associated with the floating-point
 # unit; these will all be removed if the floating-point unit is disabled
@@ -654,9 +658,12 @@ begin arch armv8.1-m.main
  base 8M_MAIN
  isa ARMv8_1m_main
 # fp => FPv5-sp-d16; fp.dp => FPv5-d16
+ option dsp add armv7em
  option fp add FPv5 fp16
  option fp.dp add FPv5 FP_DBL fp16
  option nofp remove ALL_FP
+ option mve add mve armv7em
+ option mve.fp add mve FPv5 fp16 mve_float armv7em
 end arch armv8.1-m.main
 
 begin arch iwmmxt
diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h
index 
64c292f2862514fb600a4faeaddfeacb2b69180b..9ec38c6af1b84fc92e20e30e8f07ce5360a966c1
 100644
--- a/gcc/config/arm/arm.h
+++ b/gcc/config/arm/arm.h
@@ -310,6 +310,12 @@ emission of floating point pcs attributes.  */
instructions (most are floating-point related).  */
 #define TARGET_HAVE_FPCXT_CMSE (arm_arch8_1m_main)
 
+#define TARGET_HAVE_MVE (bitmap_bit_p (arm_active_target.isa, \
+  isa_bit_mve))
+
+#define TARGET_HAVE_MVE_FLOAT (bitmap_bit_p (arm_active_target.isa, \
+isa_bit_mve_float))
+
 /* Nonzero if integer division instructions supported.  */
 #define TARGET_IDIV((TARGET_ARM && arm_arch_arm_hwdiv) \
 || (TARGET_THUMB && arm_arch_thumb_hwdiv))
diff --git a/gcc/config/arm/t-rmprofile b/gcc/config/arm/t-rmprofile
index 
807e69eaf78625f422e2d7ef5936c5c80c5b9073..62e27fd284b21524896430176d64ff5b08c6e0ef
 100644
--- a/gcc/config/arm/t-rmprofile
+++ b/gcc/config/arm/t-rmprofile
@@ -54,7 +54,7 @@ MULTILIB_REQUIRED += 
mthumb/march=armv8-m.main+fp.dp/mfloat-abi=softfp
 # Arch Matches
 MULTILIB_MATCHES   += march?armv6s-m=march?armv6-m
 
-# Map all v8-m.main+dsp FP variants down the the variant without DSP.
+# Map all v8-m.main+dsp FP variants down to the variant without DSP.
 MULTILIB_MATCHES   += march?armv8-m.main=march?armv8-m.main+dsp \
   $(foreach FP, +fp +fp.dp, \
 
march?armv8-m.main$(FP)=march?armv8-m.main+dsp$(FP))
@@ -66,3 +66,18 @@ MULTILIB_MATCHES += march?armv7e-m+fp=march?armv7e-m+fpv5
 MULTILIB_REUSE += $(foreach ARCH, armv6s-m armv7-m armv7e-m 
armv8-m\.base armv8-m\.main, \
 
mthumb/march.$(ARCH)/mfloat-abi.soft=mthumb/march.$(ARCH)/mfloat-abi.softfp)
 
+# Map v8.1-M to v8-M.
+MULTILIB_MATCHES   += march?armv8-m.main=march?armv8.1-m.main
+MULTILIB_MATCHES   += march?armv8-m.main=march?armv8.1-m.main+dsp
+MULTILIB_MATCHES   += march?armv8-m.main=march?armv8.1-m.main+mve
+
+v8_1m_sp_variants = +fp +dsp+fp +mve.fp
+v8_1m_dp_variants = +fp.dp +dsp+fp.dp +fp.dp+mve +fp.dp+mve.fp
+
+# Map all v8.1-m.main FP sp variants down to v8-m.
+MULTILIB_MATCHES += $(foreach FP, $(v8_1m_sp_variants), \
+march?armv8-m.main+fp=march?armv8.1-m.main$(FP))
+
+# Map all v8.1-m.main FP dp variants down to v8-m.
+MULTILIB_MATCHES += $(foreach FP, $(v8_1m_dp_variants), \
+march?armv8-m.main+fp.dp=march?armv8.1-m.main$(FP))
diff --git a/gcc/testsuite/gcc.target/arm/multilib.exp 
b/gcc/testsuite/gcc.target/arm/multilib.exp
index 
dcea829965eb15e372401e6389df5a1403393ecb..63cca118da2578253740fcd95421eae9ddf219bd
 100644
--- a/gcc/testsuite/gcc.target/arm/multilib.exp
+++ b/gcc/testsuite/gcc.target/arm/multilib.exp
@@ -775,6 +775,27 @@ if {[multilib_config "rmprofile"] } {
{-march=armv8-r+fp.sp -mfpu=auto -mfloat-abi=hard} 

Re: [PATCH] Refactor rust-demangle to be independent of C++ demangling.

2019-11-08 Thread Eduard-Mihai Burtescu
Ping #2 for https://gcc.gnu.org/ml/gcc-patches/2019-10/msg01830.html
Original patch (without the early exit optimization): 
https://gcc.gnu.org/ml/gcc-patches/2019-10/msg01591.html

Thanks,
- Eddy B.

On Wed, Oct 30, 2019, at 6:46 PM, Eduard-Mihai Burtescu wrote:
> Ping: https://gcc.gnu.org/ml/gcc-patches/2019-10/msg01830.html
> Original patch (without the early exit optimization): 
> https://gcc.gnu.org/ml/gcc-patches/2019-10/msg01591.html
> 
> Thanks,
> - Eddy B.
> 
> On Fri, Oct 25, 2019, at 3:44 PM, Eduard-Mihai Burtescu wrote:
> > > This can be further optimized by using memcmp in place of strncmp, since 
> > > from
> > > the length check you know that you won't see the null terminator among 
> > > the three
> > > chars you're checking.
> > 
> > Fair enough, here's the combined changelog/diff, with memcmp:


[PATCH] Fix PR c++/92365

2019-11-08 Thread Bernd Edlinger
Hi,

this fixes an unexprected fallout from my previous patch on the 
-Wshadow=complatible-local.

By using can_convert_arg here, it avoids the issue, that can_convert tries to 
cast
int() to char*, which is a a possible NULL-pointer value in C++98 (but not in 
C++11).
As pointed out in the PR, there are still more issues with can_convert, but I 
would
like to fix the regression here without digging any deeper in the mud, at least 
for now.


Boot-strapped and reg-tested on x86_64-pc-linux-gnu.
Is it OK for trunk?


Thanks
Bernd.
2019-11-08  Bernd Edlinger  

	PR c++/92365
	* name-lookup.c (check_local_shadow): Use can_convert_arg
	instead of can_convert.

testsuite:
2019-11-08  Bernd Edlinger  

	PR c++/92365
	* g++.dg/pr92365.C: New test.

Index: gcc/cp/name-lookup.c
===
--- gcc/cp/name-lookup.c	(revision 277860)
+++ gcc/cp/name-lookup.c	(working copy)
@@ -2770,8 +2770,8 @@ check_local_shadow (tree decl)
 		  (now) doing the shadow checking too
 		  early.  */
 		   && !type_uses_auto (TREE_TYPE (decl))
-		   && can_convert (TREE_TYPE (old), TREE_TYPE (decl),
-   tf_none)))
+		   && can_convert_arg (TREE_TYPE (old), TREE_TYPE (decl),
+   decl, LOOKUP_IMPLICIT, tf_none)))
 	warning_code = OPT_Wshadow_compatible_local;
   else
 	warning_code = OPT_Wshadow_local;
Index: gcc/testsuite/g++.dg/pr92365.C
===
--- gcc/testsuite/g++.dg/pr92365.C	(revision 0)
+++ gcc/testsuite/g++.dg/pr92365.C	(working copy)
@@ -0,0 +1,12 @@
+/* PR c++/92365  */
+/* { dg-options "-std=c++98 -Wshadow=compatible-local" } */
+
+class a {
+public:
+  a(char *);
+};
+void b() {
+  a c(0);
+  if (0)
+int c;
+}


Re: [PATCH] include size and offset in -Wstringop-overflow

2019-11-08 Thread Martin Sebor

On 11/6/19 2:06 PM, Martin Sebor wrote:

On 11/6/19 1:39 PM, Jeff Law wrote:

On 11/6/19 1:27 PM, Martin Sebor wrote:

On 11/6/19 11:55 AM, Jeff Law wrote:

On 11/6/19 11:00 AM, Martin Sebor wrote:

The -Wstringop-overflow warnings for single-byte and multi-byte
stores mention the amount of data being stored and the amount of
space remaining in the destination, such as:

warning: writing 4 bytes into a region of size 0 [-Wstringop-overflow=] 


123 |   *p = 0;
|   ~~~^~~
note: destination object declared here
 45 |   char b[N];
|^

A warning like this can take some time to analyze.  First, the size
of the destination isn't mentioned and may not be easy to tell from
the sources.  In the note above, when N's value is the result of
some non-trivial computation, chasing it down may be a small project
in and of itself.  Second, it's also not clear why the region size
is zero.  It could be because the offset is exactly N, or because
it's negative, or because it's in some range greater than N.

Mentioning both the size of the destination object and the offset
makes the existing messages clearer, are will become essential when
GCC starts diagnosing overflow into allocated buffers (as my
follow-on patch does).

The attached patch enhances -Wstringop-overflow to do this by
letting compute_objsize return the offset to its caller, doing
something similar in get_stridx, and adding a new function to
the strlen pass to issue this enhanced warning (eventually, I'd
like the function to replace the -Wstringop-overflow handler in
builtins.c).  With the change, the note above might read something
like:

note: at offset 11 to object ‘b’ with size 8 declared here
 45 |   char b[N];
|^

Tested on x86_64-linux.

Martin

gcc-store-offset.diff

gcc/ChangeLog:

 * builtins.c (compute_objsize): Add an argument and set it to 
offset

 into destination.
 * builtins.h (compute_objsize): Add an argument.
 * tree-object-size.c (addr_object_size): Add an argument and 
set it

 to offset into destination.
 (compute_builtin_object_size): Same.
 * tree-object-size.h (compute_builtin_object_size): Add an 
argument.

 * tree-ssa-strlen.c (get_addr_stridx): Add an argument and set it
 to offset into destination.
 (maybe_warn_overflow): New function.
 (handle_store): Call maybe_warn_overflow to issue warnings.

gcc/testsuite/ChangeLog:

 * c-c++-common/Wstringop-overflow-2.c: Adjust text of expected
messages.
 * g++.dg/warn/Wstringop-overflow-3.C: Same.
 * gcc.dg/Wstringop-overflow-17.c: Same.




Index: gcc/tree-ssa-strlen.c
===
--- gcc/tree-ssa-strlen.c    (revision 277886)
+++ gcc/tree-ssa-strlen.c    (working copy)
@@ -189,6 +189,52 @@ struct laststmt_struct
   static int get_stridx_plus_constant (strinfo *, unsigned
HOST_WIDE_INT, tree);
   static void handle_builtin_stxncpy (built_in_function,
gimple_stmt_iterator *);
   +/* Sets MINMAX to either the constant value or the range VAL is in
+   and returns true on success.  */
+
+static bool
+get_range (tree val, wide_int minmax[2], const vr_values *rvals = 
NULL)

+{
+  if (tree_fits_uhwi_p (val))
+    {
+  minmax[0] = minmax[1] = wi::to_wide (val);
+  return true;
+    }
+
+  if (TREE_CODE (val) != SSA_NAME)
+    return false;
+
+  if (rvals)
+    {
+  gimple *def = SSA_NAME_DEF_STMT (val);
+  if (gimple_assign_single_p (def)
+  && gimple_assign_rhs_code (def) == INTEGER_CST)
+    {
+  /* get_value_range returns [0, N] for constant assignments.  */
+  val = gimple_assign_rhs1 (def);
+  minmax[0] = minmax[1] = wi::to_wide (val);
+  return true;
+    }

Umm, something seems really off with this hunk.  If the SSA_NAME is set
via a simple constant assignment, then the range ought to be a 
singleton

ie [CONST,CONST].   Is there are particular test were this is not true?

The only way offhand I could see this happening is if originally the 
RHS

wasn't a constant, but due to optimizations it either simplified into a
constant or a constant was propagated into an SSA_NAME appearing on the
RHS.  This would have to happen between the last range analysis and the
point where you're making this query.


Yes, I think that's right.  Here's an example where it happens:

   void f (void)
   {
 char s[] = "1234";
 unsigned n = strlen (s);
 char vla[n];   // or malloc (n)
 vla[n] = 0;    // n = [4, 4]
 ...
   }

The strlen call is folded to 4 but that's not propagated to
the access until sometime after the strlen pass is done.

Hmm.  Are we calling set_range_info in that case?  That goes behind the
back of pass instance of vr_values.  If so, that might argue we want to
be setting it in vr_values too.


No, set_range_info is only called for ranges.  In this case,
handle_builtin_strlen replaces the strlen() call with 4:

   s = "1234";
   _1 = __builtin_strlen ();
   n_2 = (unsigned 

[PATCH, GCC] Fix unrolling check.

2019-11-08 Thread Sudakshina Das
Hi

I was fiddling around with the loop unrolling pass and noticed a check 
in decide_unroll_* functions (in the patch). The comment on top of this 
check says
"/* If we were not asked to unroll this loop, just return back silently. 
  */"
However the check returns when loop->unroll == 0 rather than 1.

The check was added in r255106 where the ChangeLog suggests that the 
actual intention was probably to check the value 1 and not 0.

Tested on aarch64-none-elf with one new regression:
FAIL: gcc.dg/pr40209.c (test for excess errors)
This fails because the changes cause the loop to unroll 3 times using 
unroll_stupid and that shows up as excess error due -fopt-info. This 
option was added in r202077 but I am not sure why this particular test 
was chosen for it.

Does this change look ok? Can I just remove the -fopt-info from the test 
or unrolling the loop in the test is not desirable?

Thanks
Sudi

gcc/ChangeLog:

2019-11-07  Sudakshina Das  

* loop-unroll.c (decide_unroll_constant_iterations): Update condition 
to check
loop->unroll.
(decide_unroll_runtime_iterations): Likewise.
(decide_unroll_stupid): Likewise.
diff --git a/gcc/loop-unroll.c b/gcc/loop-unroll.c
index 63fccd23fae38f8918a7d94411aaa43c72830dd3..9f7ab4b5c1c9b2333148e452b84afbf040707456 100644
--- a/gcc/loop-unroll.c
+++ b/gcc/loop-unroll.c
@@ -354,7 +354,7 @@ decide_unroll_constant_iterations (class loop *loop, int flags)
   widest_int iterations;
 
   /* If we were not asked to unroll this loop, just return back silently.  */
-  if (!(flags & UAP_UNROLL) && !loop->unroll)
+  if (!(flags & UAP_UNROLL) && loop->unroll == 1)
 return;
 
   if (dump_enabled_p ())
@@ -674,7 +674,7 @@ decide_unroll_runtime_iterations (class loop *loop, int flags)
   widest_int iterations;
 
   /* If we were not asked to unroll this loop, just return back silently.  */
-  if (!(flags & UAP_UNROLL) && !loop->unroll)
+  if (!(flags & UAP_UNROLL) && loop->unroll == 1)
 return;
 
   if (dump_enabled_p ())
@@ -1159,7 +1159,7 @@ decide_unroll_stupid (class loop *loop, int flags)
   widest_int iterations;
 
   /* If we were not asked to unroll this loop, just return back silently.  */
-  if (!(flags & UAP_UNROLL_ALL) && !loop->unroll)
+  if (!(flags & UAP_UNROLL_ALL) && loop->unroll == 1)
 return;
 
   if (dump_enabled_p ())


Re: [PATCH, Fortran] Allow CHARACTER literals in assignments and DATA statements

2019-11-08 Thread Jakub Jelinek
On Fri, Nov 08, 2019 at 11:17:21AM +0100, Tobias Burnus wrote:
> >     Jim MacArthur  
> >     Mark Eggleston  
> > 
> >     * arith.c (hollerith2representation): Use OPT_Wcharacter_truncation
> > in
> >     call to gfc_warning.  Add character2representation,
> > gfc_character2int,
> >     gfc_character2real, gfc_character2complex and gfc_character2logical.

This broke bootstrap:
../../gcc/fortran/arith.c: In function ‘void 
character2representation(gfc_expr*, gfc_expr*)’:
../../gcc/fortran/arith.c:2549:17: error: comparison of integer expressions of 
different signedness: ‘int’ and ‘size_t’ {aka ‘long unsigned int’} 
[-Werror=sign-compare]
   for (i = 0; i < MIN (result_len, src_len); i++)

I have committed the following patch to unbreak it as obvious.
Also, the ChangeLog entry has various issues.  When adding new functions
or prototypes etc., it should be
(character2representation, gfc_character2int, gfc_character2real,
gfc_character2complex, gfc_character2logical): New.
(or New functions., New declarations., Declare. etc.), not Add ...
Always what has changed and what was the change.

> >     * invoke.texi: Add option to list of options.
> >     * invoke.texi: Add Character conversion subsection to Extensions
> >     section.

The same file shouldn't be listed multiple times in the same ChangeLog
entry, multiple changes should be merged together, plus it would be helpful
to say what option has been added, etc.

> >     * lang.opt: Add new option.

Here it should be even * lang.opt (fdec-char-conversions): New option.
or so.

Jakub



Re: [PATCH 2/2] Introduce the gcc option --record-gcc-command-line

2019-11-08 Thread Segher Boessenkool
On Fri, Nov 08, 2019 at 12:05:19AM +0100, Egeyar Bagcioglu wrote:
> On 11/7/19 7:57 PM, Segher Boessenkool wrote:
> >>>Opening a file as "r" but then
> >>>accessing it with "fread" is peculiar, too.
> >>I am not sure what you mean here. Is it that you prefer "wb" and "rb"
> >>instead of "w" and "r"? I thought it was enough to use a consistent pair.
> >I'd use fgets or similar, not fread.
> 
> Two things made me prefer fread over fgets here:
> 1) Although I am reading a string, I do not need each read character to 
> be checked against newline. I just need to read till end-of-file.
> 2) fread returns the number of elements read which I later use. If I 
> used fgets, I'd need to call strlen or so afterwards to get the string size.
> 
> Let me know please if you disagree or if there are advantages / 
> disadvantages that I omit.

Somehow I thought fread works differently with text streams than it does.
Maybe because you don't often see fread on text streams :-)

Sorry for the noise,


Segher


[patch,avr] Add suport for devices from the 0-series.

2019-11-08 Thread Georg-Johann Lay

Hi,

this patch adds support for a few more AVR devices.  Because the offset 
where flash is seen in RAM deviates from the settings for the family 
(and hence also from the linker script defaults), a new field in 
avr_mcu_t is needed to express this so that specs can be generated 
appropriately.


The AVR_MCU lines in avr-mcus.def are longer than 80 chars because it's 
easier to maintain 1 device = 1 line entries.  And it's easier to scan 
them with the awk scripts.


Ok for trunk?

Johann

Add support for AVR devices from the 0-series.

* config/avr/avr-arch.h (avr_mcu_t) : New field.
* config/avr/avr-devices.c (avr_mcu_types): Adjust initializers.
* config/avr/avr-mcus.def (AVR_MCU): Add respective field.
* config/avr/specs.h (LINK_SPEC) <%(link_pm_base_address)>: Add.
* config/avr/gen-avr-mmcu-specs.c (print_mcu)
<*cpp, *cpp_mcu, *cpp_avrlibc, *link_pm_base_address>: Emit code
for spec definitions.
* doc/avr-mmcu.texi: Regenerate.
Index: config/avr/avr-arch.h
===
--- config/avr/avr-arch.h	(revision 277953)
+++ config/avr/avr-arch.h	(working copy)
@@ -126,6 +126,9 @@ const char *const macro;
 
   /* Flash size in bytes.  */
   int flash_size;
+
+  /* Offset where flash is seen in the RAM address space.  */
+  int flash_pm_offset;
 } avr_mcu_t;
 
 /* AVR device specific features.
Index: config/avr/avr-devices.c
===
--- config/avr/avr-devices.c	(revision 277953)
+++ config/avr/avr-devices.c	(working copy)
@@ -117,12 +117,12 @@ avr_texinfo[] =
 const avr_mcu_t
 avr_mcu_types[] =
 {
-#define AVR_MCU(NAME, ARCH, DEV_ATTRIBUTE, MACRO, DATA_SEC, TEXT_SEC, FLASH_SIZE)\
-  { NAME, ARCH, DEV_ATTRIBUTE, MACRO, DATA_SEC, TEXT_SEC, FLASH_SIZE },
+#define AVR_MCU(NAME, ARCH, DEV_ATTRIBUTE, MACRO, DATA_SEC, TEXT_SEC, FLASH_SIZE, PMOFF) \
+  { NAME, ARCH, DEV_ATTRIBUTE, MACRO, DATA_SEC, TEXT_SEC, FLASH_SIZE, PMOFF },
 #include "avr-mcus.def"
 #undef AVR_MCU
 /* End of list.  */
-  { NULL, ARCH_UNKNOWN, AVR_ISA_NONE, NULL, 0, 0, 0 }
+  { NULL, ARCH_UNKNOWN, AVR_ISA_NONE, NULL, 0, 0, 0, 0 }
 };
 
 
Index: config/avr/avr-mcus.def
===
--- config/avr/avr-mcus.def	(revision 277953)
+++ config/avr/avr-mcus.def	(working copy)
@@ -61,313 +61,327 @@ supply respective built-in macro.
 
FLASH_SIZEFlash size in bytes.
 
+   RODATA_PM_OFFSET
+		 Either 0x0 or the offset where flash memory is mirrored
+		 into the RAM address space accessible by LD and LDS.
+		 This is only needed if that value deviates from the
+		 value for the respective family.
+
"avr2" must be first for the "0" default to work as intended.  */
 
 /* Classic, <= 8K.  */
-AVR_MCU ("avr2", ARCH_AVR2, AVR_ERRATA_SKIP, NULL, 0x0060, 0x0, 0x6)
+AVR_MCU ("avr2", ARCH_AVR2, AVR_ERRATA_SKIP, NULL, 0x0060, 0x0, 0x6, 0)
 
-AVR_MCU ("at90s2313",ARCH_AVR2, AVR_SHORT_SP, "__AVR_AT90S2313__", 0x0060, 0x0, 0x800)
-AVR_MCU ("at90s2323",ARCH_AVR2, AVR_SHORT_SP, "__AVR_AT90S2323__", 0x0060, 0x0, 0x800)
-AVR_MCU ("at90s2333",ARCH_AVR2, AVR_SHORT_SP, "__AVR_AT90S2333__", 0x0060, 0x0, 0x800)
-AVR_MCU ("at90s2343",ARCH_AVR2, AVR_SHORT_SP, "__AVR_AT90S2343__", 0x0060, 0x0, 0x800)
-AVR_MCU ("attiny22", ARCH_AVR2, AVR_SHORT_SP, "__AVR_ATtiny22__",  0x0060, 0x0, 0x800)
-AVR_MCU ("attiny26", ARCH_AVR2, AVR_SHORT_SP, "__AVR_ATtiny26__",  0x0060, 0x0, 0x800)
-AVR_MCU ("at90s4414",ARCH_AVR2, AVR_ISA_NONE, "__AVR_AT90S4414__", 0x0060, 0x0, 0x1000)
-AVR_MCU ("at90s4433",ARCH_AVR2, AVR_SHORT_SP, "__AVR_AT90S4433__", 0x0060, 0x0, 0x1000)
-AVR_MCU ("at90s4434",ARCH_AVR2, AVR_ISA_NONE, "__AVR_AT90S4434__", 0x0060, 0x0, 0x1000)
-AVR_MCU ("at90s8515",ARCH_AVR2, AVR_ERRATA_SKIP, "__AVR_AT90S8515__",  0x0060, 0x0, 0x2000)
-AVR_MCU ("at90c8534",ARCH_AVR2, AVR_ISA_NONE, "__AVR_AT90C8534__", 0x0060, 0x0, 0x2000)
-AVR_MCU ("at90s8535",ARCH_AVR2, AVR_ISA_NONE, "__AVR_AT90S8535__", 0x0060, 0x0, 0x2000)
+AVR_MCU ("at90s2313",ARCH_AVR2, AVR_SHORT_SP, "__AVR_AT90S2313__", 0x0060, 0x0, 0x800, 0)
+AVR_MCU ("at90s2323",ARCH_AVR2, AVR_SHORT_SP, "__AVR_AT90S2323__", 0x0060, 0x0, 0x800, 0)
+AVR_MCU ("at90s2333",ARCH_AVR2, AVR_SHORT_SP, "__AVR_AT90S2333__", 0x0060, 0x0, 0x800, 0)
+AVR_MCU ("at90s2343",ARCH_AVR2, AVR_SHORT_SP, "__AVR_AT90S2343__", 0x0060, 0x0, 0x800, 0)
+AVR_MCU ("attiny22", ARCH_AVR2, AVR_SHORT_SP, "__AVR_ATtiny22__",  0x0060, 0x0, 0x800, 0)
+AVR_MCU ("attiny26", ARCH_AVR2, AVR_SHORT_SP, "__AVR_ATtiny26__",  0x0060, 0x0, 0x800, 0)
+AVR_MCU ("at90s4414", 

Fix SLP downward group access classification (PR92420)

2019-11-08 Thread Richard Sandiford
This PR was caused by the SLP handling in get_group_load_store_type
returning VMAT_CONTIGUOUS rather than VMAT_CONTIGUOUS_REVERSE for
downward groups.

A more elaborate fix would be to try to combine the reverse permutation
into SLP_TREE_LOAD_PERMUTATION for loads, but that's really a follow-on
optimisation and not backport material.  It might also not necessarily
be a win, if the target supports (say) reversing and odd/even swaps
as independent permutes but doesn't recognise the combined form.

Tested on aarch64-linux-gnu and x86_64-linux-gnu.  OK for trunk?
OK for branches without the assert, after a grace period?

Thanks,
Richard


2019-11-08  Richard Sandiford  

gcc/
PR tree-optimization/92420
* tree-vect-stmts.c (get_negative_load_store_type): Move further
up file.
(get_group_load_store_type): Use it for reversed SLP accesses.

gcc/testsuite/
* gcc.dg/vect/pr92420.c: New test.

Index: gcc/tree-vect-stmts.c
===
--- gcc/tree-vect-stmts.c   2019-11-08 09:06:29.482897293 +
+++ gcc/tree-vect-stmts.c   2019-11-08 16:18:10.864080575 +
@@ -2147,6 +2147,56 @@ perm_mask_for_reverse (tree vectype)
   return vect_gen_perm_mask_checked (vectype, indices);
 }
 
+/* A subroutine of get_load_store_type, with a subset of the same
+   arguments.  Handle the case where STMT_INFO is a load or store that
+   accesses consecutive elements with a negative step.  */
+
+static vect_memory_access_type
+get_negative_load_store_type (stmt_vec_info stmt_info, tree vectype,
+ vec_load_store_type vls_type,
+ unsigned int ncopies)
+{
+  dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
+  dr_alignment_support alignment_support_scheme;
+
+  if (ncopies > 1)
+{
+  if (dump_enabled_p ())
+   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+"multiple types with negative step.\n");
+  return VMAT_ELEMENTWISE;
+}
+
+  alignment_support_scheme = vect_supportable_dr_alignment (dr_info, false);
+  if (alignment_support_scheme != dr_aligned
+  && alignment_support_scheme != dr_unaligned_supported)
+{
+  if (dump_enabled_p ())
+   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+"negative step but alignment required.\n");
+  return VMAT_ELEMENTWISE;
+}
+
+  if (vls_type == VLS_STORE_INVARIANT)
+{
+  if (dump_enabled_p ())
+   dump_printf_loc (MSG_NOTE, vect_location,
+"negative step with invariant source;"
+" no permute needed.\n");
+  return VMAT_CONTIGUOUS_DOWN;
+}
+
+  if (!perm_mask_for_reverse (vectype))
+{
+  if (dump_enabled_p ())
+   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+"negative step and reversing not supported.\n");
+  return VMAT_ELEMENTWISE;
+}
+
+  return VMAT_CONTIGUOUS_REVERSE;
+}
+
 /* STMT_INFO is either a masked or unconditional store.  Return the value
being stored.  */
 
@@ -2273,7 +2323,15 @@ get_group_load_store_type (stmt_vec_info
 "Peeling for outer loop is not supported\n");
  return false;
}
- *memory_access_type = VMAT_CONTIGUOUS;
+ int cmp = compare_step_with_zero (stmt_info);
+ if (cmp < 0)
+   *memory_access_type = get_negative_load_store_type
+ (stmt_info, vectype, vls_type, 1);
+ else
+   {
+ gcc_assert (!loop_vinfo || cmp > 0);
+ *memory_access_type = VMAT_CONTIGUOUS;
+   }
}
 }
   else
@@ -2375,56 +2433,6 @@ get_group_load_store_type (stmt_vec_info
   return true;
 }
 
-/* A subroutine of get_load_store_type, with a subset of the same
-   arguments.  Handle the case where STMT_INFO is a load or store that
-   accesses consecutive elements with a negative step.  */
-
-static vect_memory_access_type
-get_negative_load_store_type (stmt_vec_info stmt_info, tree vectype,
- vec_load_store_type vls_type,
- unsigned int ncopies)
-{
-  dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
-  dr_alignment_support alignment_support_scheme;
-
-  if (ncopies > 1)
-{
-  if (dump_enabled_p ())
-   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-"multiple types with negative step.\n");
-  return VMAT_ELEMENTWISE;
-}
-
-  alignment_support_scheme = vect_supportable_dr_alignment (dr_info, false);
-  if (alignment_support_scheme != dr_aligned
-  && alignment_support_scheme != dr_unaligned_supported)
-{
-  if (dump_enabled_p ())
-   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-"negative step but alignment required.\n");
-  return VMAT_ELEMENTWISE;
-}
-
-  if (vls_type == 

Re: [PATCH] [LRA] Do not use eliminable registers for spilling

2019-11-08 Thread Vladimir Makarov



On 2019-11-07 12:28 p.m., Kwok Cheung Yeung wrote:

Hello

On AMD GCN, I encountered the following situation in the following 
testcases using the compilation flags '-O2 -ftracer -fsplit-paths':


libgomp.oacc-fortran/reduction-1.f90
libgomp.oacc-fortran/reduction-2.f90
libgomp.oacc-fortran/reduction-3.f90
gcc.c-torture/execute/ieee/pr50310.c

- LRA decides to spill a register to s14 (which is used for the hard 
frame pointer, but is not in use due to the -fomit-frame-pointer 
implied by -O2). The reload dump has:


  Spill r612 into hr14
...
(insn 597 711 712 2 (set (reg:BI 129 scc [612])
    (ne:BI (reg:SI 2 s2 [684])
    (const_int 0 [0]))) "reduction-1.f90":22:0 23 {cstoresi4}
 (nil))
...
(insn 710 713 598 2 (set (reg:BI 14 s14)
    (reg:BI 160 v0 [685])) "reduction-1.f90":22:0 3 {*movbi}
 (nil))

- Later on, LRA decides to allocate s14 to a pseudo:

 Assigning to 758 (cl=ALL_REGS, orig=758, freq=388, tfirst=758, 
tfreq=388)...

   Assign 14 to subreg reload r758 (freq=388)
...
(insn 801 786 787 34 (set (reg:BI 14 s14 [758])
    (reg:BI 163 v3 [758])) 3 {*movbi}
 (nil))

- But then the next BB reloads the value previously spilled into s14, 
which has been clobbered by previous instruction:


(insn 733 144 732 9 (set (reg:BI 163 v3 [706])
    (reg:BI 14 s14)) 3 {*movbi}
 (nil))

A similar issue has been dealt with in the past in PR83327, which was 
fixed in r258093. However, it does not work here - s14 is not marked 
as conflicting with pseudo 758.


This is because s14 is in eliminable_regset - if 
HARD_FRAME_POINTER_IS_FRAME_POINTER is false, 
ira_setup_eliminable_regset puts HARD_FRAME_POINTER_REGNUM into 
eliminable_regset even if the frame pointer is not needed (Why is 
this? It seems to have been that way since IRA was introduced).


  I don't remember exactly why.  It was long time ago (12 years).  I 
suspect it was related to the fact that IRA worked with reload first and 
LRA was added much later and reload communicated differently to IRA than 
to the old global RA.  I guess we could try to change it and see what 
happens.



At the beginning of process_bb_lives (in lra-lives.c), 
eliminable_regset is ~ANDed out of hard_regs_live, so even if s14 is 
in the live-outs of the BB, it will be removed from consideration when 
registering conflicts with pseudos in the BB.


(As an aside, the liveness of eliminable spill registers would 
previously have been updated by make_hard_regno_live and 
make_hard_regno_dead, but as of r276440 '[LRA] Don't make eliminable 
registers live (PR91957)' this is no longer the case.)


Given that conflicts with registers in eliminable_regset is not 
tracked, I believe the easiest fix is simply to prevent eliminable 
registers from being used as spill registers.


Built and tested on AMD GCN with no regressions.

I've bootstrapped it on x86_64, but there is no point testing on it 
ATM as TARGET_SPILL_CLASS was disabled in r237792.


Okay for trunk?



Yes, the patch is ok.

Thank you for the patch and good explanation of the problem.




    [LRA] Do not use eliminable registers for spilling

    2019-11-07  Kwok Cheung Yeung  

    gcc/
    * lra-spills.c (assign_spill_hard_regs): Do not spill into
    registers in eliminable_regset.

diff --git a/gcc/lra-spills.c b/gcc/lra-spills.c
index 0068e52..54f76cc 100644
--- a/gcc/lra-spills.c
+++ b/gcc/lra-spills.c
@@ -283,6 +283,8 @@ assign_spill_hard_regs (int *pseudo_regnos, int n)
   for (k = 0; k < spill_class_size; k++)
 {
   hard_regno = ira_class_hard_regs[spill_class][k];
+  if (TEST_HARD_REG_BIT (eliminable_regset, hard_regno))
+    continue;
   if (! overlaps_hard_reg_set_p (conflict_hard_regs, mode, 
hard_regno))

 break;
 }




Re: [PATCH] Fix PR92324

2019-11-08 Thread Richard Sandiford
Richard Biener  writes:
> On Fri, 8 Nov 2019, Richard Sandiford wrote:
>
>> Richard Biener  writes:
>> > I've been sitting on this for a few days since I'm not 100% happy
>> > with how the code looks like.  There's possibly still holes in it
>> > (chains with mixed signed/unsigned adds for example might pick
>> > up signed adds in the epilogue), but the wrong-code cases should
>> > work fine now.  I'm probably going to followup with some
>> > mass renaming of variable/parameter names to make it more clear
>> > which stmt / type we are actually looking at ...
>> >
>> > Bootstrapped and tested on x86_64-unknown-linux-gnu, applied to trunk.
>> 
>> Does this look like the right way of updating neutral_op_for_slp_reduction?
>> It now needs to know whether the caller is using STMT_VINFO_REDUC_VECTYPE
>> (for an epilogue value) or STMT_VINFO_REDUC_VECTYPE_IN (for a PHI argument).
>> 
>> Fixes various gcc.target/aarch64/sve/slp_* tests, will give it a
>> full test on aarch64-linux-gnu.
>
> Yeah, it looks sensible.  In vect_create_epilog_for_reduction
> please move the call down to the only use in the
>
>   else if (direct_slp_reduc)
> {
>
> block.

Thanks, here's what I installed after testing on aarch64-linux-gnu
and x86_64-linux-gnu.

Richard

2019-11-08  Richard Sandiford  

gcc/
* tree-vect-loop.c (neutral_op_for_slp_reduction): Take the
vector type as an argument rather than reading it from the
stmt_vec_info.
(vect_create_epilog_for_reduction): Update accordingly.
(vectorizable_reduction): Likewise.
(vect_transform_cycle_phi): Likewise.

Index: gcc/tree-vect-loop.c
===
--- gcc/tree-vect-loop.c2019-11-08 11:58:22.331095690 +
+++ gcc/tree-vect-loop.c2019-11-08 16:06:11.389032120 +
@@ -2590,17 +2590,17 @@ reduction_fn_for_scalar_code (enum tree_
 
 /* If there is a neutral value X such that SLP reduction NODE would not
be affected by the introduction of additional X elements, return that X,
-   otherwise return null.  CODE is the code of the reduction.  REDUC_CHAIN
-   is true if the SLP statements perform a single reduction, false if each
-   statement performs an independent reduction.  */
+   otherwise return null.  CODE is the code of the reduction and VECTOR_TYPE
+   is the vector type that would hold element X.  REDUC_CHAIN is true if
+   the SLP statements perform a single reduction, false if each statement
+   performs an independent reduction.  */
 
 static tree
-neutral_op_for_slp_reduction (slp_tree slp_node, tree_code code,
- bool reduc_chain)
+neutral_op_for_slp_reduction (slp_tree slp_node, tree vector_type,
+ tree_code code, bool reduc_chain)
 {
   vec stmts = SLP_TREE_SCALAR_STMTS (slp_node);
   stmt_vec_info stmt_vinfo = stmts[0];
-  tree vector_type = STMT_VINFO_VECTYPE (stmt_vinfo);
   tree scalar_type = TREE_TYPE (vector_type);
   class loop *loop = gimple_bb (stmt_vinfo->stmt)->loop_father;
   gcc_assert (loop);
@@ -4220,11 +4220,6 @@ vect_create_epilog_for_reduction (stmt_v
 = as_a  (STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info))->stmt);
   enum tree_code code = STMT_VINFO_REDUC_CODE (reduc_info);
   internal_fn reduc_fn = STMT_VINFO_REDUC_FN (reduc_info);
-  tree neutral_op = NULL_TREE;
-  if (slp_node)
-neutral_op
-  = neutral_op_for_slp_reduction (slp_node_instance->reduc_phis, code,
- REDUC_GROUP_FIRST_ELEMENT (stmt_info));
   stmt_vec_info prev_phi_info;
   tree vectype;
   machine_mode mode;
@@ -4822,6 +4817,14 @@ vect_create_epilog_for_reduction (stmt_v
 scalar value if we have one, otherwise the initial scalar value
 is itself a neutral value.  */
   tree vector_identity = NULL_TREE;
+  tree neutral_op = NULL_TREE;
+  if (slp_node)
+   {
+ stmt_vec_info first = REDUC_GROUP_FIRST_ELEMENT (stmt_info);
+ neutral_op
+   = neutral_op_for_slp_reduction (slp_node_instance->reduc_phis,
+   vectype, code, first != NULL);
+   }
   if (neutral_op)
vector_identity = gimple_build_vector_from_val (, vectype,
neutral_op);
@@ -6214,7 +6217,7 @@ vectorizable_reduction (stmt_vec_info st
   tree neutral_op = NULL_TREE;
   if (slp_node)
 neutral_op = neutral_op_for_slp_reduction
-  (slp_node_instance->reduc_phis, orig_code,
+  (slp_node_instance->reduc_phis, vectype_out, orig_code,
REDUC_GROUP_FIRST_ELEMENT (stmt_info) != NULL);
 
   if (double_reduc && reduction_type == FOLD_LEFT_REDUCTION)
@@ -6797,7 +6800,7 @@ vect_transform_cycle_phi (stmt_vec_info
   gcc_assert (slp_node == slp_node_instance->reduc_phis);
   stmt_vec_info first = REDUC_GROUP_FIRST_ELEMENT (reduc_stmt_info);
   tree neutral_op
-   = neutral_op_for_slp_reduction (slp_node,
+   = 

Re: [Patch][OpenMP][Fortran] Support absent optional args with use_device_{ptr,addr} (+ OpenACC's use_device clause)

2019-11-08 Thread Jakub Jelinek
On Fri, Nov 08, 2019 at 04:41:23PM +0100, Tobias Burnus wrote:
> With DECL_ARTIFICIAL added and also_value replaced:
> Build on x86-64-gnu-linux. OK once regtested?

Almost.

> - gimplify_assign (x, var, );
> + if (do_optional_check && omp_check_optional_argument (ovar, true))
 
Do you need true here when just testing for non-NULL?
If yes, it would be better to call it just once, so that e.g. the
DECL_ARGUMENTS list is not walked twice.  So perhaps:

tree present;
present = (do_optional_check
   ? omp_check_optional_argument (ovar, true) : NULL_TREE);
if (present)
  {

> +   {
> + tree null_label = create_artificial_label (UNKNOWN_LOCATION);
> + tree notnull_label = create_artificial_label (UNKNOWN_LOCATION);
> + tree opt_arg_label = create_artificial_label (UNKNOWN_LOCATION);

I this this is already too long, so needs line wrapping before =.

> + tree new_x = unshare_expr (x);
> + tree present = omp_check_optional_argument (ovar, true);

And not call it here again.

> + gimplify_expr (, , NULL, is_gimple_val,
> +fb_rvalue);
> + gcond *cond = gimple_build_cond_from_tree (present,
> +notnull_label,
> +null_label);
> + gimple_seq_add_stmt (, cond);
> + gimple_seq_add_stmt (, gimple_build_label (null_label));
> + gimplify_assign (new_x, null_pointer_node, );
> + gimple_seq_add_stmt (, gimple_build_goto (opt_arg_label));

And here similarly.

> + if (do_optional_check
> + && omp_check_optional_argument (OMP_CLAUSE_DECL (c), true))
> +   {
> + tree null_label = create_artificial_label (UNKNOWN_LOCATION);
> + tree notnull_label = create_artificial_label (UNKNOWN_LOCATION);
> + tree opt_arg_label = create_artificial_label (UNKNOWN_LOCATION);
> + glabel *null_glabel = gimple_build_label (null_label);
> + glabel *notnull_glabel = gimple_build_label (notnull_label);
> + ggoto *opt_arg_ggoto = gimple_build_goto (opt_arg_label);
> + gimplify_expr (, _body, NULL, is_gimple_val,
> +fb_rvalue);
> + tree present = omp_check_optional_argument (OMP_CLAUSE_DECL (c),
> + true);

Similarly to the above.

Otherwise LGTM.

Jakub



Re: [PATCH V3] rs6000: Refine small loop unroll in loop_unroll_adjust hook

2019-11-08 Thread Segher Boessenkool
Hi Jiu Fu,

On Thu, Nov 07, 2019 at 10:40:41PM +0800, Jiufu Guo wrote:
> gcc/
> 2019-11-07  Jiufu Guo  
> 
>   PR tree-optimization/88760
>   * gcc/config/rs6000/rs6000.opt (-munroll-only-small-loops): New option.
>   * gcc/common/config/rs6000/rs6000-common.c
>   (rs6000_option_optimization_table) [OPT_LEVELS_2_PLUS_SPEED_ONLY]:
>   Turn on -funroll-loops and -munroll-only-small-loops.
>   [OPT_LEVELS_ALL]: Turn off -fweb and -frename-registers.
>   * config/rs6000/rs6000.c (rs6000_option_override_internal): Remove
>   set of PARAM_MAX_UNROLL_TIMES and PARAM_MAX_UNROLLED_INSNS.
>   Turn off -munroll-only-small-loops for explicit -funroll-loops.
>   (TARGET_LOOP_UNROLL_ADJUST): Add loop unroll adjust hook.
>   (rs6000_loop_unroll_adjust): Define it.  Use -munroll-only-small-loops.
> 
> gcc.testsuite/
> 2019-11-07  Jiufu Guo  
> 
>   PR tree-optimization/88760
>   * gcc.dg/pr59643.c: Update back to r277550.

Okay for trunk.  Thanks!  Just some formatting stuff:

> +/* Enable -munroll-only-small-loops with -funroll-loops to unroll small
> +loops at -O2 and above by default.   */

The "l" of "loops" should align with the "E" of "Enable", and only two
spaces after a dot:
/* Enable -munroll-only-small-loops with -funroll-loops to unroll small
   loops at -O2 and above by default.  */

> +/*  Implement targetm.loop_unroll_adjust.  */

Only one space at the start of the comment.

> +static unsigned
> +rs6000_loop_unroll_adjust (unsigned nunroll, struct loop * loop)

struct loop *loop

> +  /* TODO: This is hardcoded to 10 right now.  It can be refined, for
> +  example we may want to unroll very small loops more times (4 perhaps).
> +  We also should use a PARAM for this.  */

There will be target-specific params soon, if I understood correctly :-)

Cheers,


Segher


Re: [Patch][OpenMP][Fortran] Support absent optional args with use_device_{ptr,addr} (+ OpenACC's use_device clause)

2019-11-08 Thread Tobias Burnus

Hi Jakub,

thanks for the review.

On 11/8/19 3:39 PM, Jakub Jelinek wrote:

+  /* Walk function argument list to find the hidden arg.  */
+  decl = DECL_ARGUMENTS (DECL_CONTEXT (decl));
+  for ( ; decl != NULL_TREE; decl = TREE_CHAIN (decl))
+   if (DECL_NAME (decl) == tree_name)
+ break;

Is this reliable?  I mean, consider -fallow-leading-underscore with:
subroutine foo (a, _a)


I also assume that this will break; unlikely in real-world code but still.


Not really sure if additional DECL_ARTIFICIAL (decl) test would be enough.


At least, I cannot quickly come up with a case where it will break. – I 
have now added it; also to the existing trans-expr.c function – which 
uses the used and, hence, same algorithm.



+omp_check_optional_argument (tree decl, bool also_value)

Why is the argument called for_present_check in the langhook and
also_value here?  Looks inconsistent.


Because I initially was thinking only of the VALUE attribute until I 
realized that assumed-shape arrays have the same issue; they use a local 
variable for the data – and make the actual array descriptor available 
via lang-specific field. – As the use is either an extra deref is needed 
or a check whether the variable is present, I changed the meaning – 
seemingly, three places survived with the old name.


With DECL_ARTIFICIAL added and also_value replaced:
Build on x86-64-gnu-linux. OK once regtested?

Tobias

2019-11-08  Tobias Burnus  
	Kwok Cheung Yeung  

	gcc/
	* langhooks-def.h (LANG_HOOKS_OMP_CHECK_OPTIONAL_ARGUMENT):
	Renamed from LANG_HOOKS_OMP_IS_OPTIONAL_ARGUMENT; update define.
	(LANG_HOOKS_DECLS): Rename also here.
	* langhooks.h (lang_hooks_for_decls): Rename
	omp_is_optional_argument to omp_check_optional_argument; take
	additional bool argument.
	* omp-general.h (omp_check_optional_argument): Likewise.
	* omp-general.h (omp_check_optional_argument): Likewise.
	* omp-low.c (lower_omp_target): Update calls; handle absent
	Fortran optional arguments with USE_DEVICE_ADDR/USE_DEVICE_PTR.

	gcc/fortran/
	* trans-expr.c (gfc_conv_expr_present): Check for DECL_ARTIFICIAL
	for the VALUE hidden argument avoiding -fallow-underscore issues.
	* trans-decl.c (create_function_arglist): Also set
	GFC_DECL_OPTIONAL_ARGUMENT for per-value arguments.
	* f95-lang.c (LANG_HOOKS_OMP_CHECK_OPTIONAL_ARGUMENT):
	Renamed from LANG_HOOKS_OMP_IS_OPTIONAL_ARGUMENT; point
	to gfc_omp_check_optional_argument.
	* trans.h (gfc_omp_check_optional_argument): Subsitutes
	gfc_omp_is_optional_argument declaration.
	* trans-openmp.c (gfc_omp_is_optional_argument): Make static.
	(gfc_omp_check_optional_argument): New function.

	libgomp/
	* testsuite/libgomp.fortran/use_device_ptr-optional-1.f90: Extend.
	* testsuite/libgomp.fortran/use_device_ptr-optional-2.f90: New.

 gcc/fortran/f95-lang.c |   4 +-
 gcc/fortran/trans-decl.c   |   3 +-
 gcc/fortran/trans-expr.c   |   3 +-
 gcc/fortran/trans-openmp.c |  63 ++-
 gcc/fortran/trans.h|   2 +-
 gcc/langhooks-def.h|   4 +-
 gcc/langhooks.h|  13 ++-
 gcc/omp-general.c  |  14 ++-
 gcc/omp-general.h  |   2 +-
 gcc/omp-low.c  | 117 -
 .../libgomp.fortran/use_device_ptr-optional-1.f90  |  22 
 .../libgomp.fortran/use_device_ptr-optional-2.f90  |  33 ++
 12 files changed, 232 insertions(+), 48 deletions(-)

diff --git a/gcc/fortran/f95-lang.c b/gcc/fortran/f95-lang.c
index 0684c3b99cf..c7b592dbfe2 100644
--- a/gcc/fortran/f95-lang.c
+++ b/gcc/fortran/f95-lang.c
@@ -115,7 +115,7 @@ static const struct attribute_spec gfc_attribute_table[] =
 #undef LANG_HOOKS_INIT_TS
 #undef LANG_HOOKS_OMP_ARRAY_DATA
 #undef LANG_HOOKS_OMP_IS_ALLOCATABLE_OR_PTR
-#undef LANG_HOOKS_OMP_IS_OPTIONAL_ARGUMENT
+#undef LANG_HOOKS_OMP_CHECK_OPTIONAL_ARGUMENT
 #undef LANG_HOOKS_OMP_PRIVATIZE_BY_REFERENCE
 #undef LANG_HOOKS_OMP_PREDETERMINED_SHARING
 #undef LANG_HOOKS_OMP_REPORT_DECL
@@ -150,7 +150,7 @@ static const struct attribute_spec gfc_attribute_table[] =
 #define LANG_HOOKS_INIT_TS		gfc_init_ts
 #define LANG_HOOKS_OMP_ARRAY_DATA		gfc_omp_array_data
 #define LANG_HOOKS_OMP_IS_ALLOCATABLE_OR_PTR	gfc_omp_is_allocatable_or_ptr
-#define LANG_HOOKS_OMP_IS_OPTIONAL_ARGUMENT	gfc_omp_is_optional_argument
+#define LANG_HOOKS_OMP_CHECK_OPTIONAL_ARGUMENT	gfc_omp_check_optional_argument
 #define LANG_HOOKS_OMP_PRIVATIZE_BY_REFERENCE	gfc_omp_privatize_by_reference
 #define LANG_HOOKS_OMP_PREDETERMINED_SHARING	gfc_omp_predetermined_sharing
 #define LANG_HOOKS_OMP_REPORT_DECL		gfc_omp_report_decl
diff --git a/gcc/fortran/trans-decl.c b/gcc/fortran/trans-decl.c
index ffa6316..80ef45d892e 100644
--- a/gcc/fortran/trans-decl.c
+++ b/gcc/fortran/trans-decl.c
@@ -2691,9 +2691,8 @@ 

[PATCH] rs6000: Fix branch_comparison_operator

2019-11-08 Thread Segher Boessenkool
Another part I forgot yesterday.  With this, everything is back to
normal.

Committing.


Segher


2019-11-08  Segher Boessenkool  

* config/rs6000/predicates.md (branch_comparison_operator): Allow only
the comparison codes that make sense for the mode used, and only the
codes that can be done with a single branch instruction.

---
 gcc/config/rs6000/predicates.md | 12 
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index 5ef505b..5c5aad9 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -1135,10 +1135,14 @@ (define_special_predicate "equality_operator"
 ;; validate_condition_mode is an assertion.
 (define_predicate "branch_comparison_operator"
(and (match_operand 0 "comparison_operator")
-   (and (match_test "GET_MODE_CLASS (GET_MODE (XEXP (op, 0))) == MODE_CC")
-(match_test "validate_condition_mode (GET_CODE (op),
-  GET_MODE (XEXP (op, 0))),
- 1"
+   (match_test "GET_MODE_CLASS (GET_MODE (XEXP (op, 0))) == MODE_CC")
+   (if_then_else (match_test "GET_MODE (XEXP (op, 0)) == CCFPmode
+  && !flag_finite_math_only")
+ (match_code "lt,gt,eq,unordered,unge,unle,ne,ordered")
+ (match_code "lt,ltu,le,leu,gt,gtu,ge,geu,eq,ne"))
+   (match_test "validate_condition_mode (GET_CODE (op),
+ GET_MODE (XEXP (op, 0))),
+1")))
 
 ;; Return 1 if OP is an unsigned comparison operator.
 (define_predicate "unsigned_comparison_operator"
-- 
1.8.3.1



[PATCH][vect]Account for epilogue's peeling for gaps when checking if we have enough niters for epilogue

2019-11-08 Thread Andre Vieira (lists)

Hi,

As I mentioned in the patch to disable epilogue vectorization for loops 
with SIMDUID set, there were still some aarch64 libgomp failures. This 
patch fixes those.


The problem was that we were vectorizing a reduction that was only using 
one of the parts from a complex number, creating data accesses with 
gaps. For this we set PEELING_FOR_GAPS which forces us to peel an extra 
scalar iteration.


What was happening in the testcase I looked at was that we had a known 
niters of 10. The first VF was 4, leaving 10 % 4 = 2 scalar iterations. 
The epilogue had VF 2, which meant the current code thought we could do 
it. However, given the PEELING_FOR_GAPS it would create a scalar 
epilogue and we would end up doing too many iterations, surprisingly 12 
as I think the code assumed we hadn't created said epilogue.


I ran a local check where I upped the iterations of the fortran test to 
11 and I see GCC vectorizing the epilogue with VF = 2 and a scalar 
epilogue for one iteration, so that looks good too. I have transformed 
it into a test that would reproduce the issue in C and without openacc 
so I can run it in gcc's normal testsuite more easily.


Bootstrap on aarch64 and x86_64.

Is this OK for trunk?

Cheers,
Andre

gcc/ChangeLog:
2019-11-08  Andre Vieira  

* tree-vect-loop-manip.c (vect_do_peeling): Take epilogue gaps
into account when checking if there are enough iterations to
vectorize epilogue.

gcc/testsuite/ChangeLog:
2019-11-08  Andre Vieira  

* gcc.dg/vect/vect-reduc-epilogue-gaps.c: New test.
diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-epilogue-gaps.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-epilogue-gaps.c
new file mode 100644
index ..dc5704f5607fc94fd02036e5db4a9bd37ce5169b
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-epilogue-gaps.c
@@ -0,0 +1,45 @@
+/* { dg-options "-O3 -fno-vect-cost-model" } */
+struct {
+float real;
+float img;
+} g[11];
+
+float __attribute__ ((noclone))
+foo_11 (void)
+{
+  float sum = 0.0;
+  for (int i = 0; i < 11; ++i)
+sum += g[i].real;
+  return sum;
+}
+
+float __attribute__ ((noclone))
+foo_10 (void)
+{
+  float sum = 0.0;
+  for (int i = 0; i < 10; ++i)
+sum += g[i].real;
+  return sum;
+}
+
+int main (void)
+{
+  float check_10 = 0.0;
+  float check_11 = 0.0;
+  for (int i = 0; i < 11; ++i)
+{
+  asm volatile ("" : : : "memory");
+  g[i].real = (float) i;
+  g[i].img = (float) -i;
+  if (i < 10)
+	check_10 += (float) i;
+  check_11 += (float) i;
+}
+
+  if (foo_10 () != check_10)
+__builtin_abort ();
+  if (foo_11 () != check_11)
+__builtin_abort ();
+
+  return 0;
+}
diff --git a/gcc/tree-vect-loop-manip.c b/gcc/tree-vect-loop-manip.c
index 54f3ccf3ec373b5621e7778e6e80bab853a57687..559d59bbe78738e53e7e6c1d64e7f87eed255d76 100644
--- a/gcc/tree-vect-loop-manip.c
+++ b/gcc/tree-vect-loop-manip.c
@@ -2530,9 +2530,11 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
 	= eiters % lowest_vf + LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo);
 
   unsigned int ratio;
+  unsigned int epilogue_gaps
+	= LOOP_VINFO_PEELING_FOR_GAPS (epilogue_vinfo);
   while (!(constant_multiple_p (loop_vinfo->vector_size,
 epilogue_vinfo->vector_size, )
-	   && eiters >= lowest_vf / ratio))
+	   && eiters >= lowest_vf / ratio + epilogue_gaps))
 	{
 	  delete epilogue_vinfo;
 	  epilogue_vinfo = NULL;
@@ -2543,6 +2545,7 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
 	}
 	  epilogue_vinfo = loop_vinfo->epilogue_vinfos[0];
 	  loop_vinfo->epilogue_vinfos.ordered_remove (0);
+	  epilogue_gaps = LOOP_VINFO_PEELING_FOR_GAPS (epilogue_vinfo);
 	}
 }
   /* Prolog loop may be skipped.  */


Re: [Patch][OpenMP][Fortran] Support absent optional args with use_device_{ptr,addr} (+ OpenACC's use_device clause)

2019-11-08 Thread Jakub Jelinek
On Thu, Nov 07, 2019 at 11:42:22AM +0100, Tobias Burnus wrote:
> +  /* For VALUE, the scalar variable is passed as is but a hidden argument
> + denotes the value.  Cf. trans-expr.c.  */
> +  if (TREE_CODE (TREE_TYPE (decl)) != POINTER_TYPE)
> +{
> +  char name[GFC_MAX_SYMBOL_LEN + 2];
> +  tree tree_name;
> +
> +  name[0] = '_';
> +  strcpy ([1], IDENTIFIER_POINTER (DECL_NAME (decl)));
> +  tree_name = get_identifier (name);
> +
> +  /* Walk function argument list to find the hidden arg.  */
> +  decl = DECL_ARGUMENTS (DECL_CONTEXT (decl));
> +  for ( ; decl != NULL_TREE; decl = TREE_CHAIN (decl))
> + if (DECL_NAME (decl) == tree_name)
> +   break;
> +
> +  gcc_assert (decl);
> +  return decl;
> +}

Is this reliable?  I mean, consider -fallow-leading-underscore with:
subroutine foo (a, _a)
  integer, optional, value :: a
  logical(kind=1), value :: _a
...
end subroutine foo
and whatever OpenMP clause is affected in ...
In GIMPLE dump I certainly see:
foo (integer(kind=4) a, logical(kind=1) _a, logical(kind=1) _a)
and I bet the above would pick the wrong one.

Not really sure if additional DECL_ARTIFICIAL (decl) test would be enough.

> --- a/gcc/omp-general.c
> +++ b/gcc/omp-general.c
> @@ -63,12 +63,18 @@ omp_is_allocatable_or_ptr (tree decl)
>return lang_hooks.decls.omp_is_allocatable_or_ptr (decl);
>  }
>  
> -/* Return true if DECL is a Fortran optional argument.  */
> +/* Check whether this DECL belongs to a Fortran optional argument.
> +   With 'for_present_check' set to false, decls which are optional parameters
> +   themselve are returned as tree - or a NULL_TREE otherwise. Those decls are
> +   always pointers.  With 'for_present_check' set to true, the decl for 
> checking
> +   whether an argument is present is returned; for arguments with value
> +   attribute this is the hidden argument and of BOOLEAN_TYPE.  If the decl is
> +   unrelated to optional arguments, NULL_TREE is returned.  */
>  
> -bool
> -omp_is_optional_argument (tree decl)
> +tree
> +omp_check_optional_argument (tree decl, bool also_value)

Why is the argument called for_present_check in the langhook and
also_value here?  Looks inconsistent.

> --- a/gcc/omp-general.h
> +++ b/gcc/omp-general.h
> @@ -74,7 +74,7 @@ struct omp_for_data
>  
>  extern tree omp_find_clause (tree clauses, enum omp_clause_code kind);
>  extern bool omp_is_allocatable_or_ptr (tree decl);
> -extern bool omp_is_optional_argument (tree decl);
> +extern tree omp_check_optional_argument (tree decl, bool also_value);
>  extern bool omp_is_reference (tree decl);
>  extern void omp_adjust_for_condition (location_t loc, enum tree_code 
> *cond_code,
> tree *n2, tree v, tree step);

Here too.

Jakub



Re: [PATCH, Fortran] Allow CHARACTER literals in assignments and DATA statements

2019-11-08 Thread Steve Kargl
On Fri, Nov 08, 2019 at 11:17:21AM +0100, Tobias Burnus wrote:
> Additionally, there are several recent reports on segfaults and 
> regressions, looking Bugzilla (component = fortran, sort by change data).
> 

Easiest way to find open gfortran bugs is through the gfortran wiki.
https://gcc.gnu.org/wiki/GFortran
Go down to the "For gfrtran developers" section.

-- 
steve


Re: [PATCH 0/4][MSP430] Tweaks to default configuration to reduce code size

2019-11-08 Thread Jozef Lawrynowicz
On Fri, 08 Nov 2019 22:59:18 +0900
Oleg Endo  wrote:

> On Fri, 2019-11-08 at 13:27 +, Jozef Lawrynowicz wrote:
> > 
> > Yes, I should have used -flto in my examples. But it doesn't help remove 
> > these
> > CRT library functions which are normally either directly added to the
> > list of functions to run before main (via .init, .ctors or .init_array) or 
> > used
> > in functions which are themselves added to this list.
> > 
> > The unnecessary functions we want to remove are:
> >   deregister_tm_clones
> >   register_tm_clones
> >   __do_global_dtors_aux
> >   frame_dummy
> > LTO can't remove any of them.
> >   
> 
> Ah, right, good point.  That's not MSP430 specific actually.  For those
> things I usually have custom init code, which also does other things
> occasionally.  Stripping off global dtors is then an option in the
> build system which takes care of it (in my case, I do it by modifying
> the generated linker script).
> 
> But again, as with the exceptions, it might be better to implement
> these kind of things outside of the compiler, e.g. by building the app
> with -nostartfiles -nodefaultlibs and providing your own substitutes.

I just don't think we need to be putting up this high barrier to entry for users
who want reduced code size but are building GCC from source.

With these changes users are getting a highly size-optimized runtime library
(14 bytes for a program that gets you to main() is always nice to see) out of
the box, by simply removing features that do not make sense on the target, and
they don't have to faff with any extra options.

The size of the CRT code has been a long standing complaint and is some part of
the reason a large chunk of the MSP430 user base still uses "mspgcc" which is
the old downstream GCC port of the target, which hasn't has any development
since 2012.

> 
> Another option is to patch those things in using the OS part of the
> target triplet.

Interesting idea. Something like msp430-unknown-min(imum)? The thing is even
with these changes the target is still ELF compliant.

Although I guess supplying a configuration which disables exceptions is not
very ELF-y.

Thanks,
Jozef
> 
> Cheers,
> Oleg
> 



Re: [PATCH 0/4][MSP430] Tweaks to default configuration to reduce code size

2019-11-08 Thread Oleg Endo
On Fri, 2019-11-08 at 13:27 +, Jozef Lawrynowicz wrote:
> 
> Yes, I should have used -flto in my examples. But it doesn't help remove these
> CRT library functions which are normally either directly added to the
> list of functions to run before main (via .init, .ctors or .init_array) or 
> used
> in functions which are themselves added to this list.
> 
> The unnecessary functions we want to remove are:
>   deregister_tm_clones
>   register_tm_clones
>   __do_global_dtors_aux
>   frame_dummy
> LTO can't remove any of them.
> 

Ah, right, good point.  That's not MSP430 specific actually.  For those
things I usually have custom init code, which also does other things
occasionally.  Stripping off global dtors is then an option in the
build system which takes care of it (in my case, I do it by modifying
the generated linker script).

But again, as with the exceptions, it might be better to implement
these kind of things outside of the compiler, e.g. by building the app
with -nostartfiles -nodefaultlibs and providing your own substitutes.

Another option is to patch those things in using the OS part of the
target triplet.

Cheers,
Oleg



Re: [PATCH 13/X] [libsanitizer][options] Add hwasan flags and argument parsing

2019-11-08 Thread Andrey Konovalov via gcc-patches
OK, let's keep the macros district then. In the kernel it doesn't give
you a lot, since you actually know which ASAN you're using based on
the kernel CONFIG_ values, but looks like it's important for
userspace. Thanks!

On Thu, Nov 7, 2019 at 7:01 PM Evgenii Stepanov  wrote:
>
> Clang has a function level attribute,
>   __attribute__((no_sanitize("hwaddress")))
> a feature macro
>   #if __has_feature(hwaddress_sanitizer)
> and a blacklist section
>   [hwaddress]
>   https://clang.llvm.org/docs/SanitizerSpecialCaseList.html
>
> I think it makes sense for the compiler to err on the side of not losing 
> information and provide distinct macros for these two sanitizers. If the 
> kernel does not care about the difference, they can add a simple #ifdef. They 
> would need to, anyway, because gcc does not have feature macros and clang 
> does not define __SANITIZE_ADDRESS__.
>
>
> On Thu, Nov 7, 2019 at 7:51 AM Andrey Konovalov  wrote:
>>
>> On Thu, Nov 7, 2019 at 1:48 PM Matthew Malcomson
>>  wrote:
>> >
>> > On 05/11/2019 13:11, Andrey Konovalov wrote:
>> > > On Tue, Nov 5, 2019 at 12:34 PM Matthew Malcomson
>> > >  wrote:
>> > >>
>> > >> NOTE:
>> > >> --
>> > >> I have defined a new macro of __SANITIZE_HWADDRESS__ that gets
>> > >> automatically defined when compiling with hwasan.  This is analogous to
>> > >> __SANITIZE_ADDRESS__ which is defined when compiling with asan.
>> > >>
>> > >> Users in the kernel have expressed an interest in using
>> > >> __SANITIZE_ADDRESS__ for both
>> > >> (https://lists.infradead.org/pipermail/linux-arm-kernel/2019-October/690703.html).
>> > >>
>> > >> One approach to do this could be to define __SANITIZE_ADDRESS__ with
>> > >> different values depending on whether we are compiling with hwasan or
>> > >> asan.
>> > >>
>> > >> Using __SANITIZE_ADDRESS__ for both means that code like the kernel
>> > >> which wants to treat the two sanitizers as alternate implementations of
>> > >> the same thing gets that automatically.
>> > >>
>> > >> My preference is to use __SANITIZE_HWADDRESS__ since that means any
>> > >> existing code will not be predicated on this (and hence I guess less
>> > >> surprises), but would appreciate feedback on this given the point above.
>> > >
>> > > +Evgenii Stepanov
>> > >
>> > > (A repost from my answer from the mentioned thread):
>> > >
>> > >> Similarly, I'm thinking I'll add no_sanitize_hwaddress as the hwasan
>> > >> equivalent of no_sanitize_address, which will require an update in the
>> > >> kernel given it seems you want KASAN to be used the same whether using
>> > >> tags or not.
>> > >
>> > > We have intentionally reused the same macros to simplify things. Is
>> > > there any reason to use separate macros for GCC? Are there places
>> > > where we need to use specifically no_sanitize_hwaddress and
>> > > __SANITIZE_HWADDRESS__, but not no_sanitize_address and
>> > > __SANITIZE_ADDRESS__?
>> > >
>> > >
>> >
>> > I've just looked through some open source repositories (via github
>> > search) that used the existing __SANITIZE_ADDRESS__ macro.
>> >
>> > There are a few repos that would want to use a feature macro for hwasan
>> > or asan in the exact same way as each other, but of the 31 truly
>> > different uses I found, 11 look like they would need to distinguish
>> > between hwasan and asan (where 4 uses I found I couldn't easily tell)
>> >
>> > NOTE
>> > - This is a count of unique uses, ignoring those repos which use a file
>> > from another repo.
>> > - I'm just giving links to the first of the relevant kind that I found,
>> > not putting effort into finding the "canonical" source of each repository.
>> >
>> >
>> > Places that need distinction (and their reasons):
>> >
>> > There are quite a few that use the ASAN_POISON_MEMORY_REGION and
>> > ASAN_UNPOISON_MEMORY_REGION macros to poison/unpoison memory themselves.
>> >   This abstraction doesn't quite make sense in a hwasan environment, as
>> > there is not really a "poisoned/unpoisoned" concept.
>> >
>> > https://github.com/laurynas-biveinis/unodb
>> > https://github.com/darktable-org/rawspeed
>> > https://github.com/MariaDB/server
>> > https://github.com/ralfbrown/framepac-ng
>> > https://github.com/peters/aom
>> > https://github.com/pspacek/knot-resolver-docker-fix
>> > https://github.com/harikrishnan94/sheap
>> >
>> >
>> > Some use it to record their compilation "type" as `-fsanitize=address`
>> > https://github.com/wallix/redemption
>> >
>> > Or to decide to set the environment variable ASAN_OPTIONS
>> > https://github.com/dephonatine/VBox5.2.18
>> >
>> > Others worry about stack space due to asan's redzones (hwasan has a much
>> > smaller stack memory overhead).
>> > https://github.com/fastbuild/fastbuild
>> > https://github.com/scylladb/seastar
>> > (n.b. seastar has a lot more conditioned code that would be the same
>> > between asan and hwasan).
>> >
>> >
>> > Each of these needs to know the difference between compiling with asan
>> > and hwasan, so I'm confident that having some 

Re: [PATCH 0/4][MSP430] Tweaks to default configuration to reduce code size

2019-11-08 Thread Jozef Lawrynowicz
On Fri, 08 Nov 2019 21:14:19 +0900
Oleg Endo  wrote:

> On Thu, 2019-11-07 at 21:31 +, Jozef Lawrynowicz wrote:
> > When building small programs for MSP430, the impact of the unused
> > functions pulled in from the CRT libraries is quite noticeable. Most of 
> > these
> > relates to feature that will never be used for MSP430 (Transactional memory,
> > supporting shared objects and dynamic linking), or rarely used (exception
> > handling).  
> 
> There's a magic switch, which does the business, at least for me, most
> of the time:
> 
>-flto
> 
> If you're trying to bring down the executable size as much as possible,
> but don't use -flto, I think something is wrong.
> 
> Cheers,
> Oleg
> 

Yes, I should have used -flto in my examples. But it doesn't help remove these
CRT library functions which are normally either directly added to the
list of functions to run before main (via .init, .ctors or .init_array) or used
in functions which are themselves added to this list.

The unnecessary functions we want to remove are:
  deregister_tm_clones
  register_tm_clones
  __do_global_dtors_aux
  frame_dummy
LTO can't remove any of them.

Thanks,
Jozef


Re: [PATCH 2/4] MSP430: Disable exception handling by default for C++

2019-11-08 Thread Jozef Lawrynowicz
On Fri, 08 Nov 2019 09:07:39 +0900
Oleg Endo  wrote:

> On Thu, 2019-11-07 at 21:37 +, Jozef Lawrynowicz wrote:
> > The code size bloat added by building C++ programs using libraries 
> > containing
> > support for exceptions is significant. When using simple constructs such as
> > static variables, sometimes many kB from the libraries are unnecessarily
> > pulled in.
> > 
> > So this patch disable exceptions by default for MSP430 when compiling for 
> > C++,
> > by implicitly passing -fno-exceptions unless -fexceptions is passed.  
> 
> It is extremely annoying when GCC's default standard behavior differs
> across different targets.  And as a consequence, you have to add a load
> of workarounds and disable other things, like fiddling with the
> testsuite.  It's the same thing as setting "double = float" to get more
> "speed" by default.
> 
> I would strongly advice against making such non-standard behaviors the
> default in the vanilla compiler.  C++ normally has exceptions enabled. 
> If a user doesn't want them and is willing to deal with it all the
> consequences, then we already have a mechanism to do that:
>  --fno-exceptions
> 
> Perhaps it's generally more useful to add a global configure option for
> GCC to disable exception handling by default.  Then you can provide a
> turn-key toolchain to your customers as well -- just add an option to
> the configure line.
> 
> Cheers,
> Oleg
> 

Fair point, I probably should have realised whilst implementing all the
testsuite workarounds that this wasn't the best choice for upstream GCC and
integrating nicely with the testsuite.

So I've regtested and attached a revised patch to instead build -fno-exceptions
multilibs, so the reduced code size can still be achieved by passing with
-fno-exceptions.

And the --disable-no-exceptions multilib option is added to reduce build time
for developers.

Thanks for providing your input,
Jozef
>From fe67a5ff71bc48af05b086b2d495fbf77e1a070d Mon Sep 17 00:00:00 2001
From: Jozef Lawrynowicz 
Date: Fri, 8 Nov 2019 10:47:26 +
Subject: [PATCH 2/4] MSP430: Add -fno-exceptions multilib

ChangeLog:

2019-11-08  Jozef Lawrynowicz  

	* config-ml.in: Support --disable-no-exceptions configure flag.

gcc/ChangeLog:

2019-11-08  Jozef Lawrynowicz  

	* config/msp430/msp430.h (STARTFILE_SPEC) [fno-exceptions]: Use
	crtbegin_no_eh.o.
	(ENDFILE_SPEC) [fno-exceptions]: Use crtend_no_eh.o.
	* config/msp430/t-msp430: Add -fno-exceptions multilib.
	* doc/install.texi: Document --disable-no-exceptions multilib configure
	option.

gcc/testsuite/ChangeLog:

2019-11-08  Jozef Lawrynowicz  

	* lib/gcc-dg.exp: Add dg-prune messages for when exception handling is
	disabled.

libgcc/ChangeLog:

2019-11-08  Jozef Lawrynowicz  

	* config.host: Add crt{begin,end}_no_eh.o to "extra_parts".
	* config/msp430/t-msp430: Add rules to build crt{begin,end}_no_eh.o.

---
 config-ml.in  | 13 +
 gcc/config/msp430/msp430.h|  6 --
 gcc/config/msp430/t-msp430|  9 +
 gcc/doc/install.texi  |  3 +++
 gcc/testsuite/lib/gcc-dg.exp  | 10 ++
 libgcc/config.host|  3 ++-
 libgcc/config/msp430/t-msp430 |  6 ++
 7 files changed, 43 insertions(+), 7 deletions(-)

diff --git a/config-ml.in b/config-ml.in
index 3e37f875c88..5720d38d23f 100644
--- a/config-ml.in
+++ b/config-ml.in
@@ -383,6 +383,19 @@ mips*-*-*)
 	  done
 	fi
 	;;
+msp430-*-*)
+	if [ x$enable_no_exceptions = xno ]
+	then
+	  old_multidirs="${multidirs}"
+	  multidirs=""
+	  for x in ${old_multidirs}; do
+	case "$x" in
+	  *no-exceptions* ) : ;;
+	  *) multidirs="${multidirs} ${x}" ;;
+	esac
+	  done
+	fi
+	;;
 powerpc*-*-* | rs6000*-*-*)
 	if [ x$enable_aix64 = xno ]
 	then
diff --git a/gcc/config/msp430/msp430.h b/gcc/config/msp430/msp430.h
index 73afe2e2d16..4d796f67d1b 100644
--- a/gcc/config/msp430/msp430.h
+++ b/gcc/config/msp430/msp430.h
@@ -46,11 +46,13 @@ extern bool msp430x;
 
 #undef  STARTFILE_SPEC
 #define STARTFILE_SPEC "%{pg:gcrt0.o%s}" \
-  "%{!pg:%{minrt:crt0-minrt.o%s}%{!minrt:crt0.o%s}} %{!minrt:crtbegin.o%s}"
+  "%{!pg:%{minrt:crt0-minrt.o%s}%{!minrt:crt0.o%s}} " \
+  "%{!minrt:%{fno-exceptions:crtbegin_no_eh.o%s; :crtbegin.o%s}}"
 
 /* -lgcc is included because crtend.o needs __mspabi_func_epilog_1.  */
 #undef  ENDFILE_SPEC
-#define ENDFILE_SPEC "%{!minrt:crtend.o%s} " \
+#define ENDFILE_SPEC \
+  "%{!minrt:%{fno-exceptions:crtend_no_eh.o%s; :crtend.o%s}} "  \
   "%{minrt:%:if-exists(crtn-minrt.o%s)}%{!minrt:%:if-exists(crtn.o%s)} -lgcc"
 
 #define ASM_SPEC "-mP " /* Enable polymorphic instructions.  */ \
diff --git a/gcc/config/msp430/t-msp430 b/gcc/config/msp430/t-msp430
index f8ba7751123..e180ce3efdb 100644
--- a/gcc/config/msp430/t-msp430
+++ b/gcc/config/msp430/t-msp430
@@ -28,8 +28,8 @@ msp430-devices.o: $(srcdir)/config/msp430/msp430-devices.c \
 
 # Enable multilibs:
 
-MULTILIB_OPTIONS= mcpu=msp430 mlarge  mdata-region=none
-MULTILIB_DIRNAMES   = 430	   large  

Re: [PATCH][vect] PR 92351: When peeling for alignment make alignment of epilogues unknown

2019-11-08 Thread Richard Biener
On Fri, 8 Nov 2019, Richard Sandiford wrote:

> Richard Biener  writes:
> > On Thu, 7 Nov 2019, Andre Vieira (lists) wrote:
> >> On 07/11/2019 14:00, Richard Biener wrote:
> >> > On Thu, 7 Nov 2019, Andre Vieira (lists) wrote:
> >> > 
> >> >> Hi,
> >> >>
> >> >> PR92351 reports a bug in which a wrongly aligned load is generated for 
> >> >> an
> >> >> epilogue of a main loop for which we peeled for alignment.  There is no 
> >> >> way
> >> >> to
> >> >> guarantee that epilogue data accesses are aligned when the main loop is
> >> >> peeling for alignment.
> >> >>
> >> >> I also had to split vect-peel-2.c as there were scans there for the 
> >> >> number
> >> >> of
> >> >> unaligned accesses that were vectorized, thanks to this change that now
> >> >> depends on whether we are vectorizing the epilogue, which will also 
> >> >> contain
> >> >> unaligned accesses.  Since not all targets need to be able to vectorize 
> >> >> the
> >> >> epilogue I decided to disable epilogue vectorization for the version in
> >> >> which
> >> >> we scan the dumps and add a version that attempts epilogue vectorization
> >> >> but
> >> >> does not scan the dumps.
> >> >>
> >> >> Bootstrapped and regression tested on x86_64 and aarch64.
> >> >>
> >> >> Is this OK for trunk?
> >> > 
> >> > @@ -938,6 +938,18 @@ vect_compute_data_ref_alignment (dr_vec_info
> >> > *dr_info)
> >> >   = exact_div (vect_calculate_target_alignment (dr_info),
> >> > BITS_PER_UNIT);
> >> > DR_TARGET_ALIGNMENT (dr_info) = vector_alignment;
> >> >   +  /* If the main loop has peeled for alignment we have no way of 
> >> > knowing
> >> > + whether the data accesses in the epilogues are aligned.  We can't 
> >> > at
> >> > + compile time answer the question whether we have entered the main
> >> > loop
> >> > or
> >> > + not.  Fixes PR 92351.  */
> >> > +  if (loop_vinfo)
> >> > +{
> >> > +  loop_vec_info orig_loop_vinfo = LOOP_VINFO_ORIG_LOOP_INFO
> >> > (loop_vinfo);
> >> > +  if (orig_loop_vinfo
> >> > + && LOOP_VINFO_PEELING_FOR_ALIGNMENT (orig_loop_vinfo) != 0)
> >> > +   return;
> >> > +}
> >> > 
> >> > so I'm not sure this is the correct place to do the fixup.  Isn't the
> >> > above done when analyzing the loops with different vector size/mode?
> >> > So we don't yet know whether we analyze the loop as epilogue or
> >> > not epilogue?  Looks like we at the moment always choose the
> >> > very first loop we analyze successfully as "main" loop?
> >> > 
> >> > So, can we do this instead in update_epilogue_loop_vinfo?  There
> >> > we should also know whether we created the jump-around the
> >> > main vect loop.
> >> > 
> >> 
> >> So we do know we are analyzing it as an epilogue, that is the only case
> >> orig_loop_vinfo is set.
> >> 
> >> The reason why we shouldn't do it in update_epilogue_loop_vinfo is that the
> >> target might not know how to vectorize memory accesses for unaligned memory
> >> for the given VF. Or maybe it does but is too expensive don't know if we
> >> currently check that though. I do not have an example but this is why I
> >> believe it would be better to do it during analysis. I thought it had been 
> >> you
> >> who alerted me to this, but maybe it was Sandiford, or maybe I dreamt it 
> >> up ;)
> >
> > It was probably me, yes.  But don't we have a catch-22 now?  If we
> > have multiple vector sizes and as Richard, want to first compute
> > the "cheapest" to use as the main vectorized body don't we then have
> > to re-analyze the smaller vector sizes for epilogue use?
> 
> It was a nice hack that we could vectorise as an epilogue even when
> choosing main loops, and optionally "promote" them later, but it's
> probably going to have to yield at some point anyway.  E.g. from what
> Andre said on IRC yesterday, he might have to take peeling for gaps
> into account too.
> 
> > So how do we handle this situation at the moment?
> >
> > I think during alignment peeling analysis we look whether a DR
> > absolutely needs to be aligned, that is, we use
> > vect_supportable_dr_alignment (*, true).  If that returns
> > dr_unaligned_unsupported we should probably simply disable
> > epilogue vectorization if we didn't version for alignment
> > (or we know the vectorized loop was entered).
> 
> I guess doing this based on the main loop would hard-code an assumption
> that the shorter vectors have the same sensitivity to alignment as
> longer vectors.  Which is probably fine in practice, but it would
> be good to avoid if possible.
> 
> > So, during analysis reject epilogues that have DRs with
> > dr_unaligned_unsupported but allow them as "main" loops still
> > (so disable epilogue vectorization for a main loop with such DRs).
> >
> > Then at update_epilogue_loop_vinfo time simply make alignment
> > unknown.
> >
> > Would that work?
> 
> Agree it sounds like it would work.  But at the moment we don't yet have
> a dr_unaligned_unsupported target that wants the "best loop" behaviour.
> Given that we 

[PATCH] Debug counter for IVOPTs

2019-11-08 Thread Richard Biener


Bootstrapped / tested on x86_64-unknwon-linux-gnu, applied.

Richard.

2019-11-08  Richard Biener  

* dbgcnt.def (ivopts_loop): Add.
* tree-ssa-loop-ivopts.c (tree_ssa_iv_optimize): Check
ivopts_loop before optimizing a loop.

Index: gcc/dbgcnt.def
===
--- gcc/dbgcnt.def  (revision 277965)
+++ gcc/dbgcnt.def  (working copy)
@@ -199,3 +199,4 @@ DEBUG_COUNTER (dom_unreachable_edges)
 DEBUG_COUNTER (match)
 DEBUG_COUNTER (store_merging)
 DEBUG_COUNTER (gimple_unroll)
+DEBUG_COUNTER (ivopts_loop)
Index: gcc/tree-ssa-loop-ivopts.c
===
--- gcc/tree-ssa-loop-ivopts.c  (revision 277965)
+++ gcc/tree-ssa-loop-ivopts.c  (working copy)
@@ -131,6 +131,7 @@ along with GCC; see the file COPYING3.
 #include "tree-ssa-address.h"
 #include "builtins.h"
 #include "tree-vectorizer.h"
+#include "dbgcnt.h"
 
 /* FIXME: Expressions are expanded to RTL in this pass to determine the
cost of different addressing modes.  This should be moved to a TBD
@@ -8043,6 +8044,9 @@ tree_ssa_iv_optimize (void)
   /* Optimize the loops starting with the innermost ones.  */
   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
 {
+  if (!dbg_cnt (ivopts_loop))
+   continue;
+
   if (dump_file && (dump_flags & TDF_DETAILS))
flow_loop_dump (loop, dump_file, NULL, 1);
 


[PATCH] Fix PR92409

2019-11-08 Thread Richard Biener


The following fixes an ICE with the recent relaxing of type
constraints for inlining.

Bootstrapped and tested on x86_64-unknown-linux-gnu, applied.

Richard.

2019-11-08  Richard Biener  

PR ipa/92409
* tree-inline.c (declare_return_variable): Properly handle
type mismatches for the return slot.

Index: gcc/tree-inline.c
===
--- gcc/tree-inline.c   (revision 277955)
+++ gcc/tree-inline.c   (working copy)
@@ -3593,7 +3593,9 @@ declare_return_variable (copy_body_data
  vs. the call expression.  */
   if (modify_dest)
 caller_type = TREE_TYPE (modify_dest);
-  else
+  else if (return_slot)
+caller_type = TREE_TYPE (return_slot);
+  else /* No LHS on the call.  */
 caller_type = TREE_TYPE (TREE_TYPE (callee));
 
   /* We don't need to do anything for functions that don't return anything.  */
@@ -3634,6 +3636,10 @@ declare_return_variable (copy_body_data
  && !DECL_GIMPLE_REG_P (result)
  && DECL_P (var))
DECL_GIMPLE_REG_P (var) = 0;
+
+  if (!useless_type_conversion_p (callee_type, caller_type))
+   var = build1 (VIEW_CONVERT_EXPR, callee_type, var);
+
   use = NULL;
   goto done;
 }


Re: [PATCH, nvptx] Expand OpenACC child function arguments to use CUDA params space

2019-11-08 Thread Chung-Lin Tang

On 2019/10/8 10:05 PM, Thomas Schwinge wrote:

Hi Chung-Lin!

While we're all waiting for Tom to comment on this;-)  -- here's another
item I realized:

On 2019-09-10T19:41:59+0800, Chung-Lin Tang  wrote:

The libgomp nvptx plugin changes are also quite contained, with lots of
now unneeded [...] code deleted (since we no longer first cuAlloc a
buffer for the argument record before cuLaunchKernel)

It would be nice;-)  -- but unless I'm confused, it's not that simple: we
either have to reject (force host-fallback execution) or keep supporting
"old-style" nvptx offloading code: new-libgomp has to continue to work
with nvptx offloading code once generated by old-GCC.  Possibly even a
mixture of old and new nvptx offloading code, if libraries are involved,
huh!

I have not completely thought that through, but I suppose this could be
addressed by adding a flag to the 'struct nvptx_fn' (or similar) that's
synthesized by nvptx 'mkoffload'?


Hi Thomas, Tom,
I've looked at the problem, it is unfortunate that we overlooked the
need for versioning of NVPTX images, and did not reserve something in
'struct nvptx_tdata' for something like this.

But how about something like:

typedef struct nvptx_tdata
{
  const struct targ_ptx_obj *ptx_objs;
  unsigned ptx_num;

  unsigned ptx_version; /* < Add version field here.  */

  const char *const *var_names;
  unsigned var_num;

  const struct targ_fn_launch *fn_descs;
  unsigned fn_num;
} nvptx_tdata_t;

We currently only support x86_64 and powerpc64le hosts, which are both LP64 
targets.

Assuming that, the position above where I put the new 'ptx_version' field is 
already
a 32-bit sized alignment hole, doesn't change the layout of other fields, and 
in the
static 'target_data' variable generated by mkoffload should be zeroed in current
circulating binaries (unless binutils is not doing the intuitive thing...)

If these assumptions are safe, then we can treat as if ptx_version == 0 right 
now,
and from now on bump it to 1 for these new nvptx convention changes.

(We can do a similar thing in 'struct targ_fn_launch' if we want to 
differentiate
at a per-function level.)

Any considerations?

Thanks,
Chung-Lin


[RFC] Only warn for maybe-uninitialized SRAed bits in -Wextra (PR 80635)

2019-11-08 Thread Martin Jambor
Hi,

this patch is an attempt to implement my idea from a previous thread
about moving -Wmaybe-uninitialized to -Wextra:

https://gcc.gnu.org/ml/gcc-patches/2019-02/msg00220.html

Specifically, it attempts to split -Wmaybe-uninitialized into those that
are about SRA DECLs and those which are not, and move to -Wextra only
the former ones.  The main idea is that false -Wmaybe-uninitialized
warings about values that are scalar in user's code are easy to silence
by initializing them to zero or something, as opposed to bits of
aggregates such as a value in std::optional which are not.  Therefore,
the warnings about user-scalars can remain in -Wall but warnings about
SRAed pieces should be moved to -Wextra.

The patch is a bit bigger because of documentation (which I'll be happy
to improve based on your suggestions) and testsuite churn, but the main
bit is the following added test in warn_uninit function:

  if (wc == OPT_Wmaybe_uninitialized
  && SSA_NAME_VAR (t)
  && DECL_ARTIFICIAL (SSA_NAME_VAR (t))
  && DECL_HAS_DEBUG_EXPR_P (SSA_NAME_VAR (t)))
{
  if (warn_maybe_uninitialized_aggregates)
wc = OPT_Wmaybe_uninitialized_aggregates;
  else
return;
}

The reason why I also test DECL_HAS_DEBUG_EXPR_P is
gfortran.dg/pr39666-2.f90 - without it the test silences a warning about
a decl representing the return value which is an artificial scalar
variable (probably all the way from the front-end).  We can of course
not care about not warning for it but then I don't know how to call and
document the new option :-)  Generally, if someone can think of a better
test to identify SRA DECLs, I'll be happy to change that.  We might put
a bit to identify SRA decls in the decl tree, but I tend to think that
is not a good use of the few remaining bits there.

What do you think, is something along these lines a good idea?

Thanks,

Martin



2019-11-08  Martin Jambor  

* common.opt (Wmaybe-uninitialized-aggregates): New.
* tree-ssa-uninit.c (gate_warn_uninitialized): Also run if
warn_maybe_uninitialized_aggregates is set.
(warn_uninit): Warn for artificial DECLs only if
warn_maybe_uninitialized_aggregates is set.
* doc/invoke.texi (Warning Options): Add
-Wmaybe-uninitialized-aggregates to the list.
(-Wextra): Likewise.
(-Wmaybe-uninitialized): Document that it only works on scalars.
(-Wmaybe-uninitialized-aggregates): Document.

testsuite/
* gcc.dg/pr45083.c: Add Wmaybe-uninitialized-aggregates to options.
* gcc.dg/ubsan/pr81981.c: Likewise.
* gfortran.dg/pr25923.f90: Likewise.
* g++.dg/warn/pr80635.C: New.
---
 gcc/common.opt|  4 +++
 gcc/doc/invoke.texi   | 18 +--
 gcc/testsuite/g++.dg/warn/pr80635.C   | 45 +++
 gcc/testsuite/gcc.dg/pr45083.c|  2 +-
 gcc/testsuite/gcc.dg/ubsan/pr81981.c  |  2 +-
 gcc/testsuite/gfortran.dg/pr25923.f90 |  2 +-
 gcc/tree-ssa-uninit.c | 14 -
 7 files changed, 81 insertions(+), 6 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/warn/pr80635.C

diff --git a/gcc/common.opt b/gcc/common.opt
index cc279f411d7..03769299df8 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -783,6 +783,10 @@ Wmaybe-uninitialized
 Common Var(warn_maybe_uninitialized) Warning EnabledBy(Wuninitialized)
 Warn about maybe uninitialized automatic variables.
 
+Wmaybe-uninitialized-aggregates
+Common Var(warn_maybe_uninitialized_aggregates) Warning EnabledBy(Wextra)
+Warn about maybe uninitialized automatic parts of aggregates.
+
 Wunreachable-code
 Common Ignore Warning
 Does nothing. Preserved for backward compatibility.
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index faa7fa95a0e..dbc3219b770 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -328,7 +328,8 @@ Objective-C and Objective-C++ Dialects}.
 -Wzero-length-bounds @gol
 -Winvalid-pch  -Wlarger-than=@var{byte-size} @gol
 -Wlogical-op  -Wlogical-not-parentheses  -Wlong-long @gol
--Wmain  -Wmaybe-uninitialized  -Wmemset-elt-size  -Wmemset-transposed-args @gol
+-Wmain  -Wmaybe-uninitialized -Wmaybe-uninitialized-aggregates @gol
+-Wmemset-elt-size  -Wmemset-transposed-args @gol
 -Wmisleading-indentation  -Wmissing-attributes  -Wmissing-braces @gol
 -Wmissing-field-initializers  -Wmissing-format-attribute @gol
 -Wmissing-include-dirs  -Wmissing-noreturn  -Wmissing-profile @gol
@@ -4498,6 +4499,7 @@ name is still supported, but the newer name is more 
descriptive.)
 -Wempty-body  @gol
 -Wignored-qualifiers @gol
 -Wimplicit-fallthrough=3 @gol
+-Wmaybe-uninitialized-aggregates @gol
 -Wmissing-field-initializers  @gol
 -Wmissing-parameter-type @r{(C only)}  @gol
 -Wold-style-declaration @r{(C only)}  @gol
@@ -5690,10 +5692,22 @@ in fact be called at the place that would cause a 
problem.
 
 Some spurious warnings can be avoided if you declare all the functions
 you use that never 

Re: [PATCH][vect] PR 92351: When peeling for alignment make alignment of epilogues unknown

2019-11-08 Thread Richard Sandiford
Richard Biener  writes:
> On Thu, 7 Nov 2019, Andre Vieira (lists) wrote:
>> On 07/11/2019 14:00, Richard Biener wrote:
>> > On Thu, 7 Nov 2019, Andre Vieira (lists) wrote:
>> > 
>> >> Hi,
>> >>
>> >> PR92351 reports a bug in which a wrongly aligned load is generated for an
>> >> epilogue of a main loop for which we peeled for alignment.  There is no 
>> >> way
>> >> to
>> >> guarantee that epilogue data accesses are aligned when the main loop is
>> >> peeling for alignment.
>> >>
>> >> I also had to split vect-peel-2.c as there were scans there for the number
>> >> of
>> >> unaligned accesses that were vectorized, thanks to this change that now
>> >> depends on whether we are vectorizing the epilogue, which will also 
>> >> contain
>> >> unaligned accesses.  Since not all targets need to be able to vectorize 
>> >> the
>> >> epilogue I decided to disable epilogue vectorization for the version in
>> >> which
>> >> we scan the dumps and add a version that attempts epilogue vectorization
>> >> but
>> >> does not scan the dumps.
>> >>
>> >> Bootstrapped and regression tested on x86_64 and aarch64.
>> >>
>> >> Is this OK for trunk?
>> > 
>> > @@ -938,6 +938,18 @@ vect_compute_data_ref_alignment (dr_vec_info
>> > *dr_info)
>> >   = exact_div (vect_calculate_target_alignment (dr_info),
>> > BITS_PER_UNIT);
>> > DR_TARGET_ALIGNMENT (dr_info) = vector_alignment;
>> >   +  /* If the main loop has peeled for alignment we have no way of knowing
>> > + whether the data accesses in the epilogues are aligned.  We can't at
>> > + compile time answer the question whether we have entered the main
>> > loop
>> > or
>> > + not.  Fixes PR 92351.  */
>> > +  if (loop_vinfo)
>> > +{
>> > +  loop_vec_info orig_loop_vinfo = LOOP_VINFO_ORIG_LOOP_INFO
>> > (loop_vinfo);
>> > +  if (orig_loop_vinfo
>> > + && LOOP_VINFO_PEELING_FOR_ALIGNMENT (orig_loop_vinfo) != 0)
>> > +   return;
>> > +}
>> > 
>> > so I'm not sure this is the correct place to do the fixup.  Isn't the
>> > above done when analyzing the loops with different vector size/mode?
>> > So we don't yet know whether we analyze the loop as epilogue or
>> > not epilogue?  Looks like we at the moment always choose the
>> > very first loop we analyze successfully as "main" loop?
>> > 
>> > So, can we do this instead in update_epilogue_loop_vinfo?  There
>> > we should also know whether we created the jump-around the
>> > main vect loop.
>> > 
>> 
>> So we do know we are analyzing it as an epilogue, that is the only case
>> orig_loop_vinfo is set.
>> 
>> The reason why we shouldn't do it in update_epilogue_loop_vinfo is that the
>> target might not know how to vectorize memory accesses for unaligned memory
>> for the given VF. Or maybe it does but is too expensive don't know if we
>> currently check that though. I do not have an example but this is why I
>> believe it would be better to do it during analysis. I thought it had been 
>> you
>> who alerted me to this, but maybe it was Sandiford, or maybe I dreamt it up 
>> ;)
>
> It was probably me, yes.  But don't we have a catch-22 now?  If we
> have multiple vector sizes and as Richard, want to first compute
> the "cheapest" to use as the main vectorized body don't we then have
> to re-analyze the smaller vector sizes for epilogue use?

It was a nice hack that we could vectorise as an epilogue even when
choosing main loops, and optionally "promote" them later, but it's
probably going to have to yield at some point anyway.  E.g. from what
Andre said on IRC yesterday, he might have to take peeling for gaps
into account too.

> So how do we handle this situation at the moment?
>
> I think during alignment peeling analysis we look whether a DR
> absolutely needs to be aligned, that is, we use
> vect_supportable_dr_alignment (*, true).  If that returns
> dr_unaligned_unsupported we should probably simply disable
> epilogue vectorization if we didn't version for alignment
> (or we know the vectorized loop was entered).

I guess doing this based on the main loop would hard-code an assumption
that the shorter vectors have the same sensitivity to alignment as
longer vectors.  Which is probably fine in practice, but it would
be good to avoid if possible.

> So, during analysis reject epilogues that have DRs with
> dr_unaligned_unsupported but allow them as "main" loops still
> (so disable epilogue vectorization for a main loop with such DRs).
>
> Then at update_epilogue_loop_vinfo time simply make alignment
> unknown.
>
> Would that work?

Agree it sounds like it would work.  But at the moment we don't yet have
a dr_unaligned_unsupported target that wants the "best loop" behaviour.
Given that we might have to do what the vect_analyze_loop comment in

  https://gcc.gnu.org/ml/gcc-patches/2019-11/msg00296.html

explains away anyway, it might not be worth the effort to support
that case.

Thanks,
Richard


[SPARC] Fix PR target/92095

2019-11-08 Thread Eric Botcazou
This is a regression present on mainline and 9/8 branches which was introduced 
by my fix for PR target/91472, another regression present on the branches:
  https://gcc.gnu.org/ml/gcc-patches/2019-09/msg00012.html
They are ultimately all fallout of the transition to a pseudo PIC register for 
the SPARC port, which turns out to be more disruptive than initially thought.

Bootstrapped/regtested on SPARC64/Linux and SPARC/Solaris, applied on mainline 
and 9/8 branches.


2019-11-08  Eric Botcazou  

PR target/92095
* config/sparc/sparc-protos.h (output_load_pcrel_sym): Declare.
* config/sparc/sparc.c (sparc_cannot_force_const_mem): Revert latest
change.
(got_helper_needed): New static variable.
(output_load_pcrel_sym): New function.
(get_pc_thunk_name): Remove after inlining...
(load_got_register): ...here.  Rework the initialization of the GOT
register and of the GOT helper.
(save_local_or_in_reg_p): Test the REGNO of the GOT register.
(sparc_file_end): Test got_helper_needed to decide whether the GOT
helper must be emitted.  Use output_asm_insn instead of fprintf.
(sparc_init_pic_reg): In PIC mode, always initialize the PIC register
if optimization is enabled.
* config/sparc/sparc.md (load_pcrel_sym): Emit the assembly
by calling output_load_pcrel_sym.


2019-11-08  Eric Botcazou  

* gcc.c-torture/compile/20191108-1.c: New test.
* gcc.target/sparc/overflow-3.c: Add -fno-pie to the options.
* gcc.target/sparc/overflow-4.c: Likewise.
* gcc.target/sparc/overflow-5.c: Likewise.

-- 
Eric Botcazou
/* PR target/92095 */
/* Testcase by Sergei Trofimovich  */

typedef union {
  double a;
  int b[2];
} c;

double d(int e)
{
  c f;
  ()->b[0] = 15728640;
  return e ? -()->a : ()->a;
}
Index: config/sparc/sparc-protos.h
===
--- config/sparc/sparc-protos.h	(revision 277906)
+++ config/sparc/sparc-protos.h	(working copy)
@@ -69,6 +69,7 @@ extern void sparc_split_reg_mem (rtx, rt
 extern void sparc_split_mem_reg (rtx, rtx, machine_mode);
 extern int sparc_split_reg_reg_legitimate (rtx, rtx);
 extern void sparc_split_reg_reg (rtx, rtx, machine_mode);
+extern const char *output_load_pcrel_sym (rtx *);
 extern const char *output_ubranch (rtx, rtx_insn *);
 extern const char *output_cbranch (rtx, rtx, int, int, int, rtx_insn *);
 extern const char *output_return (rtx_insn *);
Index: config/sparc/sparc.c
===
--- config/sparc/sparc.c	(revision 277906)
+++ config/sparc/sparc.c	(working copy)
@@ -4201,13 +4201,6 @@ eligible_for_sibcall_delay (rtx_insn *tr
 static bool
 sparc_cannot_force_const_mem (machine_mode mode, rtx x)
 {
-  /* After IRA has run in PIC mode, it is too late to put anything into the
- constant pool if the PIC register hasn't already been initialized.  */
-  if ((lra_in_progress || reload_in_progress)
-  && flag_pic
-  && !crtl->uses_pic_offset_table)
-return true;
-
   switch (GET_CODE (x))
 {
 case CONST_INT:
@@ -4243,9 +4236,11 @@ sparc_cannot_force_const_mem (machine_mo
 }
 
 /* Global Offset Table support.  */
-static GTY(()) rtx got_helper_rtx = NULL_RTX;
-static GTY(()) rtx got_register_rtx = NULL_RTX;
 static GTY(()) rtx got_symbol_rtx = NULL_RTX;
+static GTY(()) rtx got_register_rtx = NULL_RTX;
+static GTY(()) rtx got_helper_rtx = NULL_RTX;
+
+static GTY(()) bool got_helper_needed = false;
 
 /* Return the SYMBOL_REF for the Global Offset Table.  */
 
@@ -4258,27 +4253,6 @@ sparc_got (void)
   return got_symbol_rtx;
 }
 
-#ifdef HAVE_GAS_HIDDEN
-# define USE_HIDDEN_LINKONCE 1
-#else
-# define USE_HIDDEN_LINKONCE 0
-#endif
-
-static void
-get_pc_thunk_name (char name[32], unsigned int regno)
-{
-  const char *reg_name = reg_names[regno];
-
-  /* Skip the leading '%' as that cannot be used in a
- symbol name.  */
-  reg_name += 1;
-
-  if (USE_HIDDEN_LINKONCE)
-sprintf (name, "__sparc_get_pc_thunk.%s", reg_name);
-  else
-ASM_GENERATE_INTERNAL_LABEL (name, "LADDPC", regno);
-}
-
 /* Wrapper around the load_pcrel_sym{si,di} patterns.  */
 
 static rtx
@@ -4298,30 +4272,78 @@ gen_load_pcrel_sym (rtx op0, rtx op1, rt
   return insn;
 }
 
+/* Output the load_pcrel_sym{si,di} patterns.  */
+
+const char *
+output_load_pcrel_sym (rtx *operands)
+{
+  if (flag_delayed_branch)
+{
+  output_asm_insn ("sethi\t%%hi(%a1-4), %0", operands);
+  output_asm_insn ("call\t%a2", operands);
+  output_asm_insn (" add\t%0, %%lo(%a1+4), %0", operands);
+}
+  else
+{
+  output_asm_insn ("sethi\t%%hi(%a1-8), %0", operands);
+  output_asm_insn ("add\t%0, %%lo(%a1-4), %0", operands);
+  output_asm_insn ("call\t%a2", operands);
+  output_asm_insn (" nop&qu

[PATCH] Fix PR92088

2019-11-08 Thread Richard Biener


The following works around a middle-end limitation not being able
to deal with by value-passing of VLAs transparently during inlining
(but only DECL_BY_REFERENCE is handled) in the C frontend by marking
said functions as not inlinable.  This avoids ICEs later.

Bootstrapped and tested on x86_64-unknown-linux-gnu.

OK for trunk?

Thanks,
Richard.

2019-11-09  Richard Biener  

PR c/92088
c/
* c-decl.c (grokdeclarator): Prevent inlining of nested
function with VLA arguments.

* builtins.c (compute_objsize): Deal with VLAs.

* gcc.dg/torture/pr92088-1.c: New testcase.
* gcc.dg/torture/pr92088-2.c: Likewise.

Index: gcc/builtins.c
===
--- gcc/builtins.c  (revision 277906)
+++ gcc/builtins.c  (working copy)
@@ -3708,7 +3708,8 @@ compute_objsize (tree dest, int ostype,
   if (DECL_P (ref))
 {
   *pdecl = ref;
-  return DECL_SIZE_UNIT (ref);
+  if (tree size = DECL_SIZE_UNIT (ref))
+   return TREE_CODE (size) == INTEGER_CST ? size : NULL_TREE;
 }
 
   tree type = TREE_TYPE (dest);
Index: gcc/c/c-decl.c
===
--- gcc/c/c-decl.c  (revision 277906)
+++ gcc/c/c-decl.c  (working copy)
@@ -7304,6 +7304,23 @@ grokdeclarator (const struct c_declarato
"no linkage");
   }
 
+/* For nested functions disqualify ones taking VLAs by value
+   from inlining since the middle-end cannot deal with this.
+   ???  We should arrange for those to be passed by reference
+   with emitting the copy on the caller side in the frontend.  */
+if (storage_class == csc_none
+   && TREE_CODE (type) == FUNCTION_TYPE)
+  for (tree al = TYPE_ARG_TYPES (type); al; al = TREE_CHAIN (al))
+   {
+ tree arg = TREE_VALUE (al);
+ if (arg != error_mark_node
+ && C_TYPE_VARIABLE_SIZE (TREE_VALUE (al)))
+   {
+ DECL_UNINLINABLE (decl) = 1;
+ break;
+   }
+   }
+
 /* Record `register' declaration for warnings on &
and in case doing stupid register allocation.  */
 
Index: gcc/testsuite/gcc.dg/torture/pr92088-1.c
===
--- gcc/testsuite/gcc.dg/torture/pr92088-1.c(revision 0)
+++ gcc/testsuite/gcc.dg/torture/pr92088-1.c(working copy)
@@ -0,0 +1,22 @@
+/* { dg-do run } */
+
+int __attribute__((noipa))
+g (char *p)
+{
+  return p[9];
+}
+int main (int argc, char **argv)
+{
+  struct S {
+char toto[argc + 16];
+  };
+  int f (struct S arg) {
+  __builtin_strcpy(arg.toto, "helloworld");
+  return g (arg.toto);
+  }
+  struct S bob;
+  __builtin_strcpy(bob.toto, "coucoucoucou");
+  if (f(bob) != 'd' || __builtin_strcmp (bob.toto, "coucoucoucou"))
+__builtin_abort ();
+  return 0;
+}
Index: gcc/testsuite/gcc.dg/torture/pr92088-2.c
===
--- gcc/testsuite/gcc.dg/torture/pr92088-2.c(revision 0)
+++ gcc/testsuite/gcc.dg/torture/pr92088-2.c(working copy)
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+
+void foo(int n)
+{
+  struct X { int a[n]; } y;
+
+  struct X baz (struct X x)
+{
+  x.a[0] = 1;
+  return x;
+}
+
+  y.a[0] = 0;
+  y = baz(y);
+  if (y.a[0] != 1)
+__builtin_abort ();
+}


[PATCH] gdbinit.in: call a function with "call", not "set"

2019-11-08 Thread Konstantin Kharlamov
Last time a command that calls a function of debuggee with "set" was
added is 2013 year. Apparently something has changed since then, since
doing "set foo()" in gdb to call a "foo()" results in error.
Disregarding, it looks wrong to call a function with "set". Let's use
"call" instead.

(debug_rtx,debug_rtx_list,debug_tree,debug_c_tree,debug_gimple_stmt,
debug_gimple_seq,mpz_out_str,debug_dwarf_die,print_binding_stack,
bitmap_print): Replace "set" with "call"
---
 gcc/gdbinit.in | 24 
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/gcc/gdbinit.in b/gcc/gdbinit.in
index 42302aecfe3..a933ddc6141 100644
--- a/gcc/gdbinit.in
+++ b/gcc/gdbinit.in
@@ -26,7 +26,7 @@ Works only when an inferior is executing.
 end
 
 define pr
-set debug_rtx ($)
+call debug_rtx ($)
 end
 
 document pr
@@ -35,7 +35,7 @@ Works only when an inferior is executing.
 end
 
 define prl
-set debug_rtx_list ($, debug_rtx_count)
+call debug_rtx_list ($, debug_rtx_count)
 end
 
 document prl
@@ -50,7 +50,7 @@ it using debug_rtx_list. Usage example: set 
$foo=debug_rtx_find(first, 42)
 end
 
 define pt
-set debug_tree ($)
+call debug_tree ($)
 end
 
 document pt
@@ -59,7 +59,7 @@ Works only when an inferior is executing.
 end
 
 define pct
-set debug_c_tree ($)
+call debug_c_tree ($)
 end
 
 document pct
@@ -68,7 +68,7 @@ Works only when an inferior is executing.
 end
 
 define pgg
-set debug_gimple_stmt ($)
+call debug_gimple_stmt ($)
 end
 
 document pgg
@@ -77,7 +77,7 @@ Works only when an inferior is executing.
 end
 
 define pgq
-set debug_gimple_seq ($)
+call debug_gimple_seq ($)
 end
 
 document pgq
@@ -86,7 +86,7 @@ Works only when an inferior is executing.
 end
 
 define pgs
-set debug_generic_stmt ($)
+call debug_generic_stmt ($)
 end
 
 document pgs
@@ -95,7 +95,7 @@ Works only when an inferior is executing.
 end
 
 define pge
-set debug_generic_expr ($)
+call debug_generic_expr ($)
 end
 
 document pge
@@ -104,7 +104,7 @@ Works only when an inferior is executing.
 end
 
 define pmz
-set mpz_out_str(stderr, 10, $)
+call mpz_out_str(stderr, 10, $)
 end
 
 document pmz
@@ -140,7 +140,7 @@ Print the name of the type-node that is $.
 end
 
 define pdd
-set debug_dwarf_die ($)
+call debug_dwarf_die ($)
 end
 
 document pdd
@@ -167,7 +167,7 @@ Print the fields of an instruction that is $.
 end
 
 define pbs
-set print_binding_stack ()
+call print_binding_stack ()
 end
 
 document pbs
@@ -176,7 +176,7 @@ including the global binding level.
 end
 
 define pbm
-set bitmap_print (stderr, $, "", "\n")
+call bitmap_print (stderr, $, "", "\n")
 end
 
 document pbm
-- 
2.24.0



Re: [PATCH 0/4][MSP430] Tweaks to default configuration to reduce code size

2019-11-08 Thread Oleg Endo
On Thu, 2019-11-07 at 21:31 +, Jozef Lawrynowicz wrote:
> When building small programs for MSP430, the impact of the unused
> functions pulled in from the CRT libraries is quite noticeable. Most of these
> relates to feature that will never be used for MSP430 (Transactional memory,
> supporting shared objects and dynamic linking), or rarely used (exception
> handling).

There's a magic switch, which does the business, at least for me, most
of the time:

   -flto

If you're trying to bring down the executable size as much as possible,
but don't use -flto, I think something is wrong.

Cheers,
Oleg



Re: [4/6] Optionally pick the cheapest loop_vec_info

2019-11-08 Thread Richard Sandiford
Richard Biener  writes:
> On Thu, Nov 7, 2019 at 6:15 PM Richard Sandiford
>  wrote:
>>
>> Richard Biener  writes:
>> > On Wed, Nov 6, 2019 at 3:01 PM Richard Sandiford
>> >  wrote:
>> >>
>> >> Richard Biener  writes:
>> >> > On Tue, Nov 5, 2019 at 3:29 PM Richard Sandiford
>> >> >  wrote:
>> >> >>
>> >> >> This patch adds a mode in which the vectoriser tries each available
>> >> >> base vector mode and picks the one with the lowest cost.  For now
>> >> >> the behaviour is behind a default-off --param, but a later patch
>> >> >> enables it by default for SVE.
>> >> >>
>> >> >> The patch keeps the current behaviour of preferring a VF of
>> >> >> loop->simdlen over any larger or smaller VF, regardless of costs
>> >> >> or target preferences.
>> >> >
>> >> > Can you avoid using a --param for this?  Instead I'd suggest to
>> >> > amend the vectorize_modes target hook to return some
>> >> > flags like VECT_FIRST_MODE_WINS.  We'd eventually want
>> >> > to make the target able to say do-not-vectorize-epiloges-of-MODE
>> >> > (I think we may not want to vectorize SSE vectorized loop
>> >> > epilogues with MMX-with-SSE or GPRs for example).  I guess
>> >> > for the latter we'd use a new target hook.
>> >>
>> >> The reason for using a --param was that I wanted a way of turning
>> >> this on and off on the command line, so that users can experiment
>> >> with it if necessary.  E.g. enabling the --param could be a viable
>> >> alternative to -mprefix-* in some cases.  Disabling it would be
>> >> a way of working around a bad cost model decision without going
>> >> all the way to -fno-vect-cost-model.
>> >>
>> >> These kinds of --params can become useful workarounds until an
>> >> optimisation bug is fixed.
>> >
>> > I'm arguing that the default depends on the actual ISAs so there isn't
>> > a one-fits all and given we have OMP SIMD and target cloning for
>> > multiple ISAs this looks like a wrong approach.  For sure the
>> > target can use its own switches to override defaults here, or alternatively
>> > we might want to have a #pragma GCC simdlen mimicing OMP behavior
>> > here.
>>
>> I agree there's no one-size-fits-all choice here, but that's true for
>> other --params too.  The problem with using target switches is that we
>> have to explain them and to keep accepting them "forever" (or at least
>> with a long deprecation period).
>
> Fortunately next week you'll be able to add target specific --params
> to your targets .opt file ;)

Nice!  That definitely sounds like a good compromise. :-)  I'll hold off
on 6/6 until Martin's patches have gone in.  There are a couple of other
SVE things that would benefit from that too.

Thanks,
Richard


Re: Fix code order in tree-sra.c:create_access

2019-11-08 Thread Richard Biener
On Fri, Nov 8, 2019 at 12:04 PM Martin Jambor  wrote:
>
> Hi,
>
> On Fri, Nov 08 2019, Richard Sandiford wrote:
> > If get_ref_base_and_extent returns poly_int offsets or sizes,
> > tree-sra.c:create_access prevents SRA from being applied to the base.
> > However, we haven't verified by that point that we have a valid base
> > to disqualify.
> >
> > This originally led to an ICE on the attached testcase, but it
> > no longer triggers there after the introduction of IPA SRA.
> >
> > Tested on aarch64-linux-gnu and x86_64-linux-gnu.  OK to install?
> >
> > Richard
> >
> >
> > 2019-11-08  Richard Sandiford  
> >
> > gcc/
> >   * tree-sra.c (create_access): Delay disqualifying the base
> >   for poly_int values until we know we have a base.
> >
>
> I can't approve the patch but it looks fine.

OK.

Richard.

> Thanks,
>
> Martin


Re: Mark constant-sized objects as addressable if they have poly-int accesses

2019-11-08 Thread Richard Biener
On Fri, Nov 8, 2019 at 10:40 AM Richard Sandiford
 wrote:
>
> If SVE code is written for a specific vector length, it might load from
> or store to fixed-sized objects.  This needs to work even without
> -msve-vector-bits=N (which should never be needed for correctness).
>
> There's no way of handling a direct poly-int sized reference to a
> fixed-size register; it would have to go via memory.  And in that
> case it's more efficient to mark the fixed-size object as
> addressable from the outset, like we do for array references
> with non-constant indices.
>
> Tested on aarch64-linux-gnu and x86_64-linux-gnu.  OK to install?

Hmm, shouldn't you somehow avoid walking subtrees again like
the array-ref cases?  Do "intermediate" types really matter here?
Thus if you have BIT_FIELDREF ,
select first element > do you really need it addressable?

It seems to me you want to check it only on the actual reference type.

Richard.

> Richard
>
>
> 2019-11-08  Richard Sandiford  
>
> gcc/
> * cfgexpand.c (discover_nonconstant_array_refs_r): If an access
> with POLY_INT_CST size is made to a fixed-size object, force the
> object to live in memory.
>
> gcc/testsuite/
> * gcc.target/aarch64/sve/acle/general/deref_1.c: New test.
>
> Index: gcc/cfgexpand.c
> ===
> --- gcc/cfgexpand.c 2019-10-01 09:55:35.062089236 +0100
> +++ gcc/cfgexpand.c 2019-11-08 09:39:13.105130902 +
> @@ -6106,6 +6106,20 @@ discover_nonconstant_array_refs_r (tree
>  {
>tree t = *tp;
>
> +  /* References of size POLY_INT_CST to a fixed-size object must go
> + through memory.  It's more efficient to force that here than
> + to create temporary slots on the fly.  */
> +  if (TYPE_SIZE (TREE_TYPE (t))
> +  && POLY_INT_CST_P (TYPE_SIZE (TREE_TYPE (t
> +{
> +  t = get_base_address (t);
> +  if (t
> + && DECL_P (t)
> + && DECL_MODE (t) != BLKmode
> + && GET_MODE_BITSIZE (DECL_MODE (t)).is_constant ())
> +   TREE_ADDRESSABLE (t) = 1;
> +}
> +
>if (IS_TYPE_OR_DECL_P (t))
>  *walk_subtrees = 0;
>else if (TREE_CODE (t) == ARRAY_REF || TREE_CODE (t) == ARRAY_RANGE_REF)
> Index: gcc/testsuite/gcc.target/aarch64/sve/acle/general/deref_1.c
> ===
> --- /dev/null   2019-09-17 11:41:18.176664108 +0100
> +++ gcc/testsuite/gcc.target/aarch64/sve/acle/general/deref_1.c 2019-11-08 
> 09:39:13.105130902 +
> @@ -0,0 +1,13 @@
> +/* { dg-options "-O2" } */
> +
> +#include 
> +
> +uint64_t
> +f (int32_t *x, int32_t *y)
> +{
> +  union { uint64_t x; char c[8]; } u;
> +  svbool_t pg = svptrue_b32 ();
> +  *(svbool_t *)[0] = svcmpeq (pg, svld1 (pg, x), 0);
> +  *(svbool_t *)[4] = svcmpeq (pg, svld1 (pg, y), 1);
> +  return u.x;
> +}


Re: [PATCH] Fix PR92324

2019-11-08 Thread Richard Biener
On Fri, 8 Nov 2019, Richard Sandiford wrote:

> Richard Biener  writes:
> > I've been sitting on this for a few days since I'm not 100% happy
> > with how the code looks like.  There's possibly still holes in it
> > (chains with mixed signed/unsigned adds for example might pick
> > up signed adds in the epilogue), but the wrong-code cases should
> > work fine now.  I'm probably going to followup with some
> > mass renaming of variable/parameter names to make it more clear
> > which stmt / type we are actually looking at ...
> >
> > Bootstrapped and tested on x86_64-unknown-linux-gnu, applied to trunk.
> 
> Does this look like the right way of updating neutral_op_for_slp_reduction?
> It now needs to know whether the caller is using STMT_VINFO_REDUC_VECTYPE
> (for an epilogue value) or STMT_VINFO_REDUC_VECTYPE_IN (for a PHI argument).
> 
> Fixes various gcc.target/aarch64/sve/slp_* tests, will give it a
> full test on aarch64-linux-gnu.

Yeah, it looks sensible.  In vect_create_epilog_for_reduction
please move the call down to the only use in the

  else if (direct_slp_reduc)
{

block.

Thanks,
Richard.

> Thanks,
> Richard
> 
> 
> 2019-11-08  Richard Sandiford  
> 
> gcc/
>   * tree-vect-loop.c (neutral_op_for_slp_reduction): Take the
>   vector type as an argument rather than reading it from the
>   stmt_vec_info.
>   (vect_create_epilog_for_reduction): Update accordingly,
>   passing the STMT_VINFO_REDUC_VECTYPE.
>   (vectorizable_reduction): Likewise.
>   (vect_transform_cycle_phi): Likewise, but passing the
>   STMT_VINFO_REDUC_VECTYPE_IN.
> 
> Index: gcc/tree-vect-loop.c
> ===
> --- gcc/tree-vect-loop.c  2019-11-08 09:06:29.654896085 +
> +++ gcc/tree-vect-loop.c  2019-11-08 10:41:54.498861004 +
> @@ -2586,17 +2586,17 @@ reduction_fn_for_scalar_code (enum tree_
>  
>  /* If there is a neutral value X such that SLP reduction NODE would not
> be affected by the introduction of additional X elements, return that X,
> -   otherwise return null.  CODE is the code of the reduction.  REDUC_CHAIN
> -   is true if the SLP statements perform a single reduction, false if each
> -   statement performs an independent reduction.  */
> +   otherwise return null.  CODE is the code of the reduction and VECTOR_TYPE
> +   is the vector type that would hold element X.  REDUC_CHAIN is true if
> +   the SLP statements perform a single reduction, false if each statement
> +   performs an independent reduction.  */
>  
>  static tree
> -neutral_op_for_slp_reduction (slp_tree slp_node, tree_code code,
> -   bool reduc_chain)
> +neutral_op_for_slp_reduction (slp_tree slp_node, tree vector_type,
> +   tree_code code, bool reduc_chain)
>  {
>vec stmts = SLP_TREE_SCALAR_STMTS (slp_node);
>stmt_vec_info stmt_vinfo = stmts[0];
> -  tree vector_type = STMT_VINFO_VECTYPE (stmt_vinfo);
>tree scalar_type = TREE_TYPE (vector_type);
>class loop *loop = gimple_bb (stmt_vinfo->stmt)->loop_father;
>gcc_assert (loop);
> @@ -4216,11 +4216,6 @@ vect_create_epilog_for_reduction (stmt_v
>  = as_a  (STMT_VINFO_REDUC_DEF (vect_orig_stmt 
> (stmt_info))->stmt);
>enum tree_code code = STMT_VINFO_REDUC_CODE (reduc_info);
>internal_fn reduc_fn = STMT_VINFO_REDUC_FN (reduc_info);
> -  tree neutral_op = NULL_TREE;
> -  if (slp_node)
> -neutral_op
> -  = neutral_op_for_slp_reduction (slp_node_instance->reduc_phis, code,
> -   REDUC_GROUP_FIRST_ELEMENT (stmt_info));
>stmt_vec_info prev_phi_info;
>tree vectype;
>machine_mode mode;
> @@ -4267,11 +4262,15 @@ vect_create_epilog_for_reduction (stmt_v
>gcc_assert (vectype);
>mode = TYPE_MODE (vectype);
>  
> +  tree neutral_op = NULL_TREE;
>tree initial_def = NULL;
>tree induc_val = NULL_TREE;
>tree adjustment_def = NULL;
>if (slp_node)
> -;
> +neutral_op
> +  = neutral_op_for_slp_reduction (slp_node_instance->reduc_phis,
> +   vectype, code,
> +   REDUC_GROUP_FIRST_ELEMENT (stmt_info));
>else
>  {
>/* Get at the scalar def before the loop, that defines the initial 
> value
> @@ -6210,7 +6209,7 @@ vectorizable_reduction (stmt_vec_info st
>tree neutral_op = NULL_TREE;
>if (slp_node)
>  neutral_op = neutral_op_for_slp_reduction
> -  (slp_node_instance->reduc_phis, orig_code,
> +  (slp_node_instance->reduc_phis, vectype_out, orig_code,
> REDUC_GROUP_FIRST_ELEMENT (stmt_info) != NULL);
>  
>if (double_reduc && reduction_type == FOLD_LEFT_REDUCTION)
> @@ -6793,7 +6792,7 @@ vect_transform_cycle_phi (stmt_vec_info
>gcc_assert (slp_node == slp_node_instance->reduc_phis);
>stmt_vec_info first = REDUC_GROUP_FIRST_ELEMENT (reduc_stmt_info);
>tree neutral_op
> - = neutral_op_for_slp_reduction (slp_node,

Re: [4/6] Optionally pick the cheapest loop_vec_info

2019-11-08 Thread Richard Biener
On Thu, Nov 7, 2019 at 6:15 PM Richard Sandiford
 wrote:
>
> Richard Biener  writes:
> > On Wed, Nov 6, 2019 at 3:01 PM Richard Sandiford
> >  wrote:
> >>
> >> Richard Biener  writes:
> >> > On Tue, Nov 5, 2019 at 3:29 PM Richard Sandiford
> >> >  wrote:
> >> >>
> >> >> This patch adds a mode in which the vectoriser tries each available
> >> >> base vector mode and picks the one with the lowest cost.  For now
> >> >> the behaviour is behind a default-off --param, but a later patch
> >> >> enables it by default for SVE.
> >> >>
> >> >> The patch keeps the current behaviour of preferring a VF of
> >> >> loop->simdlen over any larger or smaller VF, regardless of costs
> >> >> or target preferences.
> >> >
> >> > Can you avoid using a --param for this?  Instead I'd suggest to
> >> > amend the vectorize_modes target hook to return some
> >> > flags like VECT_FIRST_MODE_WINS.  We'd eventually want
> >> > to make the target able to say do-not-vectorize-epiloges-of-MODE
> >> > (I think we may not want to vectorize SSE vectorized loop
> >> > epilogues with MMX-with-SSE or GPRs for example).  I guess
> >> > for the latter we'd use a new target hook.
> >>
> >> The reason for using a --param was that I wanted a way of turning
> >> this on and off on the command line, so that users can experiment
> >> with it if necessary.  E.g. enabling the --param could be a viable
> >> alternative to -mprefix-* in some cases.  Disabling it would be
> >> a way of working around a bad cost model decision without going
> >> all the way to -fno-vect-cost-model.
> >>
> >> These kinds of --params can become useful workarounds until an
> >> optimisation bug is fixed.
> >
> > I'm arguing that the default depends on the actual ISAs so there isn't
> > a one-fits all and given we have OMP SIMD and target cloning for
> > multiple ISAs this looks like a wrong approach.  For sure the
> > target can use its own switches to override defaults here, or alternatively
> > we might want to have a #pragma GCC simdlen mimicing OMP behavior
> > here.
>
> I agree there's no one-size-fits-all choice here, but that's true for
> other --params too.  The problem with using target switches is that we
> have to explain them and to keep accepting them "forever" (or at least
> with a long deprecation period).

Fortunately next week you'll be able to add target specific --params
to your targets .opt file ;)

>  Whereas the --param was just something
> that people could play with or perhaps use to work around problems
> temporarily.  It would come with no guarantees attached.  And what the
> --param did applied to any targets that support multiple modes,
> regardless of what the targets do by default.
>
> All that said, here's a version that returns the bitmask you suggested.
> I ended up making the flag select the new behaviour and 0 select the
> current behaviour, rather than have a flag for "first mode wins".
> Tested as before.

OK.

Thanks,
Richard.

> Thanks,
> Richard
>
>
> 2019-11-07  Richard Sandiford  
>
> gcc/
> * target.h (VECT_COMPARE_COSTS): New constant.
> * target.def (autovectorize_vector_modes): Return a bitmask of flags.
> * doc/tm.texi: Regenerate.
> * targhooks.h (default_autovectorize_vector_modes): Update 
> accordingly.
> * targhooks.c (default_autovectorize_vector_modes): Likewise.
> * config/aarch64/aarch64.c (aarch64_autovectorize_vector_modes):
> Likewise.
> * config/arc/arc.c (arc_autovectorize_vector_modes): Likewise.
> * config/arm/arm.c (arm_autovectorize_vector_modes): Likewise.
> * config/i386/i386.c (ix86_autovectorize_vector_modes): Likewise.
> * config/mips/mips.c (mips_autovectorize_vector_modes): Likewise.
> * tree-vectorizer.h (_loop_vec_info::vec_outside_cost)
> (_loop_vec_info::vec_inside_cost): New member variables.
> * tree-vect-loop.c (_loop_vec_info::_loop_vec_info): Initialize them.
> (vect_better_loop_vinfo_p, vect_joust_loop_vinfos): New functions.
> (vect_analyze_loop): When autovectorize_vector_modes returns
> VECT_COMPARE_COSTS, try vectorizing the loop with each available
> vector mode and picking the one with the lowest cost.
> (vect_estimate_min_profitable_iters): Record the computed costs
> in the loop_vec_info.
>
> Index: gcc/target.h
> ===
> --- gcc/target.h2019-11-07 15:11:15.831017985 +
> +++ gcc/target.h2019-11-07 16:52:30.037198353 +
> @@ -218,6 +218,14 @@ enum omp_device_kind_arch_isa {
>omp_device_isa
>  };
>
> +/* Flags returned by TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES:
> +
> +   VECT_COMPARE_COSTS
> +   Tells the loop vectorizer to try all the provided modes and
> +   pick the one with the lowest cost.  By default the vectorizer
> +   will choose the first mode that works.  */
> +const unsigned int VECT_COMPARE_COSTS = 1U << 0;
> +
>  

Re: [2/6] Don't assign a cost to vectorizable_assignment

2019-11-08 Thread Richard Biener
On Thu, Nov 7, 2019 at 5:40 PM Richard Sandiford
 wrote:
>
> Richard Biener  writes:
> > On Wed, Nov 6, 2019 at 4:58 PM Richard Sandiford
> >  wrote:
> >>
> >> Richard Biener  writes:
> >> > On Tue, Nov 5, 2019 at 3:27 PM Richard Sandiford
> >> >  wrote:
> >> >>
> >> >> vectorizable_assignment handles true SSA-to-SSA copies (which hopefully
> >> >> we don't see in practice) and no-op conversions that are required
> >> >> to maintain correct gimple, such as changes between signed and
> >> >> unsigned types.  These cases shouldn't generate any code and so
> >> >> shouldn't count against either the scalar or vector costs.
> >> >>
> >> >> Later patches test this, but it seemed worth splitting out.
> >> >
> >> > Hmm, but you have to adjust vect_compute_single_scalar_iteration_cost and
> >> > possibly the SLP cost walk as well, otherwise we're artificially making
> >> > those copies cheaper when vectorized.
> >>
> >> Ah, yeah.  It looks complicated to reproduce the conditions exactly
> >> there, so how about just costing 1 copy in vectorizable_assignment
> >> to counteract it, and ignore ncopies?
> >
> > I guess costing a single scalar_stmt ought to make it exactly offset
> > the scalar cost?
>
> To summarise what we said on IRC: the problem with that is that we
> need to count VF scalar stmts, where VF might be a runtime value.
> The follow-on loop costing code copes with variable VF without
> relying on vect_vf_for_cost.
>
> Calling vectorizable_assignment from the scalar costing code
> seemed like too much of a hack.  And it turns out that we can't
> delay the scalar costing until after vect_analyze_stmts because
> vect_enhance_data_refs_alignment needs it before then.  Reworking
> this whole thing is too much work for GCC 10 at this stage.
>
> So this patch goes with your suggestion of using a test based on
> tree_nop_conversion.  To make sure that the scalar and vector costs
> stay somewhat consistent, vectorizable_assignment continues to cost
> stmts for which the new predicate is false.
>
> Tested as before.

OK.

thanks,
Richard.

> Thanks,
> Richard
>
>
> 2019-11-07  Richard Sandiford  
>
> gcc/
> * tree-vectorizer.h (vect_nop_conversion_p): Declare.
> * tree-vect-stmts.c (vect_nop_conversion_p): New function.
> (vectorizable_assignment): Don't add a cost for nop conversions.
> * tree-vect-loop.c (vect_compute_single_scalar_iteration_cost):
> Likewise.
> * tree-vect-slp.c (vect_bb_slp_scalar_cost): Likewise.
>
> Index: gcc/tree-vectorizer.h
> ===
> --- gcc/tree-vectorizer.h   2019-11-07 15:11:22.290972236 +
> +++ gcc/tree-vectorizer.h   2019-11-07 16:32:14.817523866 +
> @@ -1654,6 +1654,7 @@ extern tree vect_get_vec_def_for_stmt_co
>  extern bool vect_transform_stmt (stmt_vec_info, gimple_stmt_iterator *,
>  slp_tree, slp_instance);
>  extern void vect_remove_stores (stmt_vec_info);
> +extern bool vect_nop_conversion_p (stmt_vec_info);
>  extern opt_result vect_analyze_stmt (stmt_vec_info, bool *, slp_tree,
>  slp_instance, stmt_vector_for_cost *);
>  extern void vect_get_load_cost (stmt_vec_info, int, bool,
> Index: gcc/tree-vect-stmts.c
> ===
> --- gcc/tree-vect-stmts.c   2019-11-07 15:11:50.134775028 +
> +++ gcc/tree-vect-stmts.c   2019-11-07 16:32:14.817523866 +
> @@ -5284,6 +5284,29 @@ vectorizable_conversion (stmt_vec_info s
>return true;
>  }
>
> +/* Return true if we can assume from the scalar form of STMT_INFO that
> +   neither the scalar nor the vector forms will generate code.  STMT_INFO
> +   is known not to involve a data reference.  */
> +
> +bool
> +vect_nop_conversion_p (stmt_vec_info stmt_info)
> +{
> +  gassign *stmt = dyn_cast  (stmt_info->stmt);
> +  if (!stmt)
> +return false;
> +
> +  tree lhs = gimple_assign_lhs (stmt);
> +  tree_code code = gimple_assign_rhs_code (stmt);
> +  tree rhs = gimple_assign_rhs1 (stmt);
> +
> +  if (code == SSA_NAME || code == VIEW_CONVERT_EXPR)
> +return true;
> +
> +  if (CONVERT_EXPR_CODE_P (code))
> +return tree_nop_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs));
> +
> +  return false;
> +}
>
>  /* Function vectorizable_assignment.
>
> @@ -5399,7 +5422,9 @@ vectorizable_assignment (stmt_vec_info s
>  {
>STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
>DUMP_VECT_SCOPE ("vectorizable_assignment");
> -  vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, 
> cost_vec);
> +  if (!vect_nop_conversion_p (stmt_info))
> +   vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node,
> +   cost_vec);
>return true;
>  }
>
> Index: gcc/tree-vect-loop.c
> ===
> --- gcc/tree-vect-loop.c2019-11-07 

Re: Fix code order in tree-sra.c:create_access

2019-11-08 Thread Martin Jambor
Hi,

On Fri, Nov 08 2019, Richard Sandiford wrote:
> If get_ref_base_and_extent returns poly_int offsets or sizes,
> tree-sra.c:create_access prevents SRA from being applied to the base.
> However, we haven't verified by that point that we have a valid base
> to disqualify.
>
> This originally led to an ICE on the attached testcase, but it
> no longer triggers there after the introduction of IPA SRA.
>
> Tested on aarch64-linux-gnu and x86_64-linux-gnu.  OK to install?
>
> Richard
>
>
> 2019-11-08  Richard Sandiford  
>
> gcc/
>   * tree-sra.c (create_access): Delay disqualifying the base
>   for poly_int values until we know we have a base.
>

I can't approve the patch but it looks fine.

Thanks,

Martin


Re: [PATCH 4/6] arm, aarch64: Add support for __GCC_ASM_FLAG_OUTPUTS__

2019-11-08 Thread Richard Henderson
On 11/8/19 11:54 AM, Richard Henderson wrote:
> +@table @code
> +@item eq
> +``equal'' or Z flag set
> +@item ne
> +``not equal'' or Z flag clear
> +@item cs
> +``carry'' or C flag set
> +@item cc
> +C flag clear
> +@item mi
> +``minus'' or N flag set
> +@item pl
> +``plus'' or N flag clear
> +@item hi
> +unsigned greater than

Dang, skipped right over vc/vs here.  Will fix.


r~


[PATCH 5/6] arm: Add testsuite checks for asm-flag

2019-11-08 Thread Richard Henderson
Inspired by the tests in gcc.target/i386.  Testing code generation,
diagnostics, and execution.

* gcc.target/arm/asm-flag-1.c: New test.
* gcc.target/arm/asm-flag-3.c: New test.
* gcc.target/arm/asm-flag-5.c: New test.
* gcc.target/arm/asm-flag-6.c: New test.
---
 gcc/testsuite/gcc.target/arm/asm-flag-1.c | 35 ++
 gcc/testsuite/gcc.target/arm/asm-flag-3.c | 36 +++
 gcc/testsuite/gcc.target/arm/asm-flag-5.c | 30 
 gcc/testsuite/gcc.target/arm/asm-flag-6.c | 43 +++
 4 files changed, 144 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/arm/asm-flag-1.c
 create mode 100644 gcc/testsuite/gcc.target/arm/asm-flag-3.c
 create mode 100644 gcc/testsuite/gcc.target/arm/asm-flag-5.c
 create mode 100644 gcc/testsuite/gcc.target/arm/asm-flag-6.c

diff --git a/gcc/testsuite/gcc.target/arm/asm-flag-1.c 
b/gcc/testsuite/gcc.target/arm/asm-flag-1.c
new file mode 100644
index 000..e1ce4120d98
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/asm-flag-1.c
@@ -0,0 +1,35 @@
+/* Test the valid @cc asm flag outputs.  */
+/* { dg-do compile } */
+/* { dg-options "-O" } */
+
+#ifndef __GCC_ASM_FLAG_OUTPUTS__
+#error "missing preprocessor define"
+#endif
+
+void f(char *out)
+{
+  asm(""
+  : "=@ccne"(out[0]), "=@cceq"(out[1]),
+   "=@cccs"(out[2]), "=@"(out[3]),
+   "=@ccmi"(out[4]), "=@ccpl"(out[5]),
+   "=@ccvs"(out[6]), "=@ccvc"(out[7]),
+   "=@cchi"(out[8]), "=@ccls"(out[9]),
+   "=@ccge"(out[10]), "=@cclt"(out[11]),
+   "=@ccgt"(out[12]), "=@ccle"(out[13]));
+}
+
+/* There will be at least one of each, probably two.  */
+/* { dg-final { scan-assembler "movne" } } */
+/* { dg-final { scan-assembler "moveq" } } */
+/* { dg-final { scan-assembler "movcs" } } */
+/* { dg-final { scan-assembler "movcc" } } */
+/* { dg-final { scan-assembler "movmi" } } */
+/* { dg-final { scan-assembler "movpl" } } */
+/* { dg-final { scan-assembler "movvs" } } */
+/* { dg-final { scan-assembler "movvc" } } */
+/* { dg-final { scan-assembler "movhi" } } */
+/* { dg-final { scan-assembler "movls" } } */
+/* { dg-final { scan-assembler "movge" } } */
+/* { dg-final { scan-assembler "movls" } } */
+/* { dg-final { scan-assembler "movgt" } } */
+/* { dg-final { scan-assembler "movle" } } */
diff --git a/gcc/testsuite/gcc.target/arm/asm-flag-3.c 
b/gcc/testsuite/gcc.target/arm/asm-flag-3.c
new file mode 100644
index 000..8b0bd8a00f9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/asm-flag-3.c
@@ -0,0 +1,36 @@
+/* Test some of the valid @cc asm flag outputs.  */
+/* { dg-do compile } */
+/* { dg-options "-O" } */
+
+#define DO(C) \
+void f##C(void) { char x; asm("" : "=@cc"#C(x)); if (!x) asm(""); asm(""); }
+
+DO(ne)
+DO(eq)
+DO(cs)
+DO(cc)
+DO(mi)
+DO(pl)
+DO(vs)
+DO(vc)
+DO(hi)
+DO(ls)
+DO(ge)
+DO(lt)
+DO(gt)
+DO(le)
+
+/* { dg-final { scan-assembler "bne" } } */
+/* { dg-final { scan-assembler "beq" } } */
+/* { dg-final { scan-assembler "bcs" } } */
+/* { dg-final { scan-assembler "bcc" } } */
+/* { dg-final { scan-assembler "bmi" } } */
+/* { dg-final { scan-assembler "bpl" } } */
+/* { dg-final { scan-assembler "bvs" } } */
+/* { dg-final { scan-assembler "bvc" } } */
+/* { dg-final { scan-assembler "bhi" } } */
+/* { dg-final { scan-assembler "bls" } } */
+/* { dg-final { scan-assembler "bge" } } */
+/* { dg-final { scan-assembler "blt" } } */
+/* { dg-final { scan-assembler "bgt" } } */
+/* { dg-final { scan-assembler "ble" } } */
diff --git a/gcc/testsuite/gcc.target/arm/asm-flag-5.c 
b/gcc/testsuite/gcc.target/arm/asm-flag-5.c
new file mode 100644
index 000..4d4394e1478
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/asm-flag-5.c
@@ -0,0 +1,30 @@
+/* Test error conditions of asm flag outputs.  */
+/* { dg-do compile } */
+/* { dg-options "" } */
+
+void f_B(void) { _Bool x; asm("" : "=@"(x)); }
+void f_c(void) { char x; asm("" : "=@"(x)); }
+void f_s(void) { short x; asm("" : "=@"(x)); }
+void f_i(void) { int x; asm("" : "=@"(x)); }
+void f_l(void) { long x; asm("" : "=@"(x)); }
+void f_ll(void) { long long x; asm("" : "=@"(x)); }
+
+void f_f(void)
+{
+  float x;
+  asm("" : "=@"(x)); /* { dg-error invalid type } */
+}
+
+void f_d(void)
+{
+  double x;
+  asm("" : "=@"(x)); /* { dg-error invalid type } */
+}
+
+struct S { int x[3]; };
+
+void f_S(void)
+{
+  struct S x;
+  asm("" : "=@"(x)); /* { dg-error invalid type } */
+}
diff --git a/gcc/testsuite/gcc.target/arm/asm-flag-6.c 
b/gcc/testsuite/gcc.target/arm/asm-flag-6.c
new file mode 100644
index 000..ef2e06afc37
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/asm-flag-6.c
@@ -0,0 +1,43 @@
+/* Executable testcase for 'output flags.'  */
+/* { dg-do run } */
+
+int test_bits (long nzcv)
+{
+  long n, z, c, v;
+
+  __asm__ ("msr APSR_nzcvq, %[in]"
+  : "=@ccmi"(n), "=@cceq"(z), "=@cccs"(c), "=@ccvs"(v)
+  : [in] "r"(nzcv << 28));
+
+  return n * 8 + z * 4 + c * 2 + 

[PATCH 6/6] aarch64: Add testsuite checks for asm-flag

2019-11-08 Thread Richard Henderson
Inspired by the tests in gcc.target/i386.  Testing code generation,
diagnostics, and execution.

* gcc.target/aarch64/asm-flag-1.c: New test.
* gcc.target/aarch64/asm-flag-3.c: New test.
* gcc.target/aarch64/asm-flag-5.c: New test.
* gcc.target/aarch64/asm-flag-6.c: New test.
---
 gcc/testsuite/gcc.target/aarch64/asm-flag-1.c | 34 +++
 gcc/testsuite/gcc.target/aarch64/asm-flag-3.c | 36 
 gcc/testsuite/gcc.target/aarch64/asm-flag-5.c | 30 +
 gcc/testsuite/gcc.target/aarch64/asm-flag-6.c | 43 +++
 4 files changed, 143 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/asm-flag-1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/asm-flag-3.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/asm-flag-5.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/asm-flag-6.c

diff --git a/gcc/testsuite/gcc.target/aarch64/asm-flag-1.c 
b/gcc/testsuite/gcc.target/aarch64/asm-flag-1.c
new file mode 100644
index 000..e3e79c29b8f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/asm-flag-1.c
@@ -0,0 +1,34 @@
+/* Test the valid @cc asm flag outputs.  */
+/* { dg-do compile } */
+/* { dg-options "-O" } */
+
+#ifndef __GCC_ASM_FLAG_OUTPUTS__
+#error "missing preprocessor define"
+#endif
+
+void f(char *out)
+{
+  asm(""
+  : "=@ccne"(out[0]), "=@cceq"(out[1]),
+   "=@cccs"(out[2]), "=@"(out[3]),
+   "=@ccmi"(out[4]), "=@ccpl"(out[5]),
+   "=@ccvs"(out[6]), "=@ccvc"(out[7]),
+   "=@cchi"(out[8]), "=@ccls"(out[9]),
+   "=@ccge"(out[10]), "=@cclt"(out[11]),
+   "=@ccgt"(out[12]), "=@ccle"(out[13]));
+}
+
+/* { dg-final { scan-assembler "cset.*, ne" } } */
+/* { dg-final { scan-assembler "cset.*, eq" } } */
+/* { dg-final { scan-assembler "cset.*, cs" } } */
+/* { dg-final { scan-assembler "cset.*, cc" } } */
+/* { dg-final { scan-assembler "cset.*, mi" } } */
+/* { dg-final { scan-assembler "cset.*, pl" } } */
+/* { dg-final { scan-assembler "cset.*, vs" } } */
+/* { dg-final { scan-assembler "cset.*, vc" } } */
+/* { dg-final { scan-assembler "cset.*, hi" } } */
+/* { dg-final { scan-assembler "cset.*, ls" } } */
+/* { dg-final { scan-assembler "cset.*, ge" } } */
+/* { dg-final { scan-assembler "cset.*, ls" } } */
+/* { dg-final { scan-assembler "cset.*, gt" } } */
+/* { dg-final { scan-assembler "cset.*, le" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/asm-flag-3.c 
b/gcc/testsuite/gcc.target/aarch64/asm-flag-3.c
new file mode 100644
index 000..8b0bd8a00f9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/asm-flag-3.c
@@ -0,0 +1,36 @@
+/* Test some of the valid @cc asm flag outputs.  */
+/* { dg-do compile } */
+/* { dg-options "-O" } */
+
+#define DO(C) \
+void f##C(void) { char x; asm("" : "=@cc"#C(x)); if (!x) asm(""); asm(""); }
+
+DO(ne)
+DO(eq)
+DO(cs)
+DO(cc)
+DO(mi)
+DO(pl)
+DO(vs)
+DO(vc)
+DO(hi)
+DO(ls)
+DO(ge)
+DO(lt)
+DO(gt)
+DO(le)
+
+/* { dg-final { scan-assembler "bne" } } */
+/* { dg-final { scan-assembler "beq" } } */
+/* { dg-final { scan-assembler "bcs" } } */
+/* { dg-final { scan-assembler "bcc" } } */
+/* { dg-final { scan-assembler "bmi" } } */
+/* { dg-final { scan-assembler "bpl" } } */
+/* { dg-final { scan-assembler "bvs" } } */
+/* { dg-final { scan-assembler "bvc" } } */
+/* { dg-final { scan-assembler "bhi" } } */
+/* { dg-final { scan-assembler "bls" } } */
+/* { dg-final { scan-assembler "bge" } } */
+/* { dg-final { scan-assembler "blt" } } */
+/* { dg-final { scan-assembler "bgt" } } */
+/* { dg-final { scan-assembler "ble" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/asm-flag-5.c 
b/gcc/testsuite/gcc.target/aarch64/asm-flag-5.c
new file mode 100644
index 000..4d4394e1478
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/asm-flag-5.c
@@ -0,0 +1,30 @@
+/* Test error conditions of asm flag outputs.  */
+/* { dg-do compile } */
+/* { dg-options "" } */
+
+void f_B(void) { _Bool x; asm("" : "=@"(x)); }
+void f_c(void) { char x; asm("" : "=@"(x)); }
+void f_s(void) { short x; asm("" : "=@"(x)); }
+void f_i(void) { int x; asm("" : "=@"(x)); }
+void f_l(void) { long x; asm("" : "=@"(x)); }
+void f_ll(void) { long long x; asm("" : "=@"(x)); }
+
+void f_f(void)
+{
+  float x;
+  asm("" : "=@"(x)); /* { dg-error invalid type } */
+}
+
+void f_d(void)
+{
+  double x;
+  asm("" : "=@"(x)); /* { dg-error invalid type } */
+}
+
+struct S { int x[3]; };
+
+void f_S(void)
+{
+  struct S x;
+  asm("" : "=@"(x)); /* { dg-error invalid type } */
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/asm-flag-6.c 
b/gcc/testsuite/gcc.target/aarch64/asm-flag-6.c
new file mode 100644
index 000..d9b90b8e517
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/asm-flag-6.c
@@ -0,0 +1,43 @@
+/* Executable testcase for 'output flags.'  */
+/* { dg-do run } */
+
+int test_bits (long nzcv)
+{
+  long n, z, c, v;
+
+  __asm__ ("msr nzcv, %[in]"
+  : "=@ccmi"(n), "=@cceq"(z), 

[PATCH 4/6] arm, aarch64: Add support for __GCC_ASM_FLAG_OUTPUTS__

2019-11-08 Thread Richard Henderson
Since all but a couple of lines is shared between the two targets,
enable them both at once.

* config/arm/aarch-common-protos.h (arm_md_asm_adjust): Declare.
* config/arm/aarch-common.c (arm_md_asm_adjust): New.
* config/arm/arm-c.c (arm_cpu_builtins): Define
__GCC_ASM_FLAG_OUTPUTS__.
* config/arm/arm.c (TARGET_MD_ASM_ADJUST): New.
* config/aarch64/aarch64-c.c (aarch64_define_unconditional_macros):
Define __GCC_ASM_FLAG_OUTPUTS__.
* config/aarch64/aarch64.c (TARGET_MD_ASM_ADJUST): New.
* doc/extend.texi (FlagOutputOperands): Add documentation
for ARM and AArch64.
---
 gcc/config/arm/aarch-common-protos.h |   6 ++
 gcc/config/aarch64/aarch64-c.c   |   2 +
 gcc/config/aarch64/aarch64.c |   3 +
 gcc/config/arm/aarch-common.c| 131 +++
 gcc/config/arm/arm-c.c   |   1 +
 gcc/config/arm/arm.c |   3 +
 gcc/doc/extend.texi  |  33 +++
 7 files changed, 179 insertions(+)

diff --git a/gcc/config/arm/aarch-common-protos.h 
b/gcc/config/arm/aarch-common-protos.h
index 3bf38a104f6..f15cf336e9d 100644
--- a/gcc/config/arm/aarch-common-protos.h
+++ b/gcc/config/arm/aarch-common-protos.h
@@ -23,6 +23,8 @@
 #ifndef GCC_AARCH_COMMON_PROTOS_H
 #define GCC_AARCH_COMMON_PROTOS_H
 
+#include "hard-reg-set.h"
+
 extern int aarch_accumulator_forwarding (rtx_insn *, rtx_insn *);
 extern bool aarch_rev16_p (rtx);
 extern bool aarch_rev16_shleft_mask_imm_p (rtx, machine_mode);
@@ -141,5 +143,9 @@ struct cpu_cost_table
   const struct vector_cost_table vect;
 };
 
+rtx_insn *
+arm_md_asm_adjust (vec , vec &/*inputs*/,
+   vec ,
+   vec , HARD_REG_SET _regs);
 
 #endif /* GCC_AARCH_COMMON_PROTOS_H */
diff --git a/gcc/config/aarch64/aarch64-c.c b/gcc/config/aarch64/aarch64-c.c
index 7c322ca0813..0af859f1c14 100644
--- a/gcc/config/aarch64/aarch64-c.c
+++ b/gcc/config/aarch64/aarch64-c.c
@@ -69,6 +69,8 @@ aarch64_define_unconditional_macros (cpp_reader *pfile)
   builtin_define ("__ARM_FEATURE_UNALIGNED");
   builtin_define ("__ARM_PCS_AAPCS64");
   builtin_define_with_int_value ("__ARM_SIZEOF_WCHAR_T", WCHAR_TYPE_SIZE / 8);
+
+  builtin_define ("__GCC_ASM_FLAG_OUTPUTS__");
 }
 
 /* Undefine/redefine macros that depend on the current backend state and may
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 1dfff331a5a..26de9879bc7 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -21947,6 +21947,9 @@ aarch64_libgcc_floating_mode_supported_p
 #undef TARGET_STRICT_ARGUMENT_NAMING
 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
 
+#undef TARGET_MD_ASM_ADJUST
+#define TARGET_MD_ASM_ADJUST arm_md_asm_adjust
+
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 #include "gt-aarch64.h"
diff --git a/gcc/config/arm/aarch-common.c b/gcc/config/arm/aarch-common.c
index 965a07a43e3..8b98c8d3802 100644
--- a/gcc/config/arm/aarch-common.c
+++ b/gcc/config/arm/aarch-common.c
@@ -26,10 +26,16 @@
 #include "config.h"
 #include "system.h"
 #include "coretypes.h"
+#include "insn-modes.h"
 #include "tm.h"
 #include "rtl.h"
 #include "rtl-iter.h"
 #include "memmodel.h"
+#include "errors.h"
+#include "tree.h"
+#include "expr.h"
+#include "function.h"
+#include "emit-rtl.h"
 
 /* Return TRUE if X is either an arithmetic shift left, or
is a multiplication by a power of two.  */
@@ -520,3 +526,128 @@ arm_mac_accumulator_is_mul_result (rtx producer, rtx 
consumer)
   && !reg_overlap_mentioned_p (mul_result, mac_op0)
   && !reg_overlap_mentioned_p (mul_result, mac_op1));
 }
+
+/* Worker function for TARGET_MD_ASM_ADJUST.
+   We implement asm flag outputs.  */
+
+rtx_insn *
+arm_md_asm_adjust (vec , vec &/*inputs*/,
+   vec ,
+   vec &/*clobbers*/, HARD_REG_SET &/*clobbered_regs*/)
+{
+  bool saw_asm_flag = false;
+
+  start_sequence ();
+  for (unsigned i = 0, n = outputs.length (); i < n; ++i)
+{
+  const char *con = constraints[i];
+  if (strncmp (con, "=@cc", 4) != 0)
+   continue;
+  con += 4;
+  if (strchr (con, ',') != NULL)
+   {
+ error ("alternatives not allowed in % flag output");
+ continue;
+   }
+
+  machine_mode mode = CCmode;
+  rtx_code code = UNKNOWN;
+
+  switch (con[0])
+   {
+   case 'c':
+ if (con[1] == 'c' && con[2] == 0)
+   mode = CC_Cmode, code = GEU;
+ else if (con[1] == 's' && con[2] == 0)
+   mode = CC_Cmode, code = LTU;
+ break;
+   case 'e':
+ if (con[1] == 'q' && con[2] == 0)
+   mode = CC_NZmode, code = EQ;
+ break;
+   case 'g':
+ if (con[1] == 'e' && con[2] == 0)
+   mode = CCmode, code = GE;
+ else if (con[1] == 't' && con[2] == 0)
+   mode = CCmode, code = GT;
+ break;
+   case 'h':
+ if (con[1] == 'i' && 

[PATCH 2/6] arm: Fix the "c" constraint

2019-11-08 Thread Richard Henderson
The existing definition using register class CC_REG does not
work because CC_REGNUM does not support normal modes, and so
fails to match register_operand.  Use a non-register constraint
and the cc_register predicate instead.

* config/arm/constraints.md (c): Use cc_register predicate.
---
 gcc/config/arm/constraints.md | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/gcc/config/arm/constraints.md b/gcc/config/arm/constraints.md
index b76de81b85c..e02b678d26d 100644
--- a/gcc/config/arm/constraints.md
+++ b/gcc/config/arm/constraints.md
@@ -94,8 +94,9 @@
  "@internal
   Thumb only.  The union of the low registers and the stack register.")
 
-(define_register_constraint "c" "CC_REG"
- "@internal The condition code register.")
+(define_constraint "c"
+ "@internal The condition code register."
+ (match_operand 0 "cc_register"))
 
 (define_register_constraint "Cs" "CALLER_SAVE_REGS"
  "@internal The caller save registers.  Useful for sibcalls.")
-- 
2.17.1



[PATCH 3/6] arm: Rename CC_NOOVmode to CC_NZmode

2019-11-08 Thread Richard Henderson
CC_NZmode is a more accurate description of what we require
from the mode, and matches up with the definition in aarch64.

Rename noov_comparison_operator to nz_comparison_operator
in order to match.

* config/arm/arm-modes.def (CC_NZ): Rename from CC_NOOV.
* config/arm/predicates.md (nz_comparison_operator): Rename
from noov_comparison_operator.
* config/arm/arm.c (arm_select_cc_mode): Use CC_NZmode name.
(arm_gen_dicompare_reg): Likewise.
(maybe_get_arm_condition_code): Likewise.
(thumb1_final_prescan_insn): Likewise.
(arm_emit_coreregs_64bit_shift): Likewise.
* config/arm/arm.md (addsi3_compare0): Likewise.
(*addsi3_compare0_scratch, subsi3_compare0): Likewise.
(*mulsi3_compare0, *mulsi3_compare0_v6): Likewise.
(*mulsi3_compare0_scratch, *mulsi3_compare0_scratch_v6): Likewise.
(*mulsi3addsi_compare0, *mulsi3addsi_compare0_v6): Likewise.
(*mulsi3addsi_compare0_scratch): Likewise.
(*mulsi3addsi_compare0_scratch_v6): Likewise.
(*andsi3_compare0, *andsi3_compare0_scratch): Likewise.
(*zeroextractsi_compare0_scratch): Likewise.
(*ne_zeroextractsi, *ne_zeroextractsi_shifted): Likewise.
(*ite_ne_zeroextractsi, *ite_ne_zeroextractsi_shifted): Likewise.
(andsi_not_shiftsi_si_scc_no_reuse): Likewise.
(andsi_not_shiftsi_si_scc): Likewise.
(*andsi_notsi_si_compare0, *andsi_notsi_si_compare0_scratch): Likewise.
(*iorsi3_compare0, *iorsi3_compare0_scratch): Likewise.
(*xorsi3_compare0, *xorsi3_compare0_scratch): Likewise.
(*shiftsi3_compare0, *shiftsi3_compare0_scratch): Likewise.
(*not_shiftsi_compare0, *not_shiftsi_compare0_scratch): Likewise.
(*notsi_compare0, *notsi_compare0_scratch): Likewise.
(return_addr_mask, *check_arch2): Likewise.
(*arith_shiftsi_compare0, *arith_shiftsi_compare0_scratch): Likewise.
(*sub_shiftsi_compare0, *sub_shiftsi_compare0_scratch): Likewise.
(compare_scc splitters): Likewise.
(movcond_addsi): Likewise.
* config/arm/thumb2.md (thumb2_addsi3_compare0): Likewise.
(*thumb2_addsi3_compare0_scratch): Likewise.
(*thumb2_mulsi_short_compare0): Likewise.
(*thumb2_mulsi_short_compare0_scratch): Likewise.
(compare peephole2s): Likewise.
* config/arm/thumb1.md (thumb1_cbz): Use CC_NZmode and
nz_comparison_operator names.
(cbranchsi4_insn): Likewise.
---
 gcc/config/arm/arm.c |  12 +--
 gcc/config/arm/arm-modes.def |   4 +-
 gcc/config/arm/arm.md| 186 +--
 gcc/config/arm/predicates.md |   2 +-
 gcc/config/arm/thumb1.md |   8 +-
 gcc/config/arm/thumb2.md |  34 +++
 6 files changed, 123 insertions(+), 123 deletions(-)

diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index eddd3ca93ed..b620322318b 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -15379,7 +15379,7 @@ arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
  || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
  || GET_CODE (x) == ROTATERT
  || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
-return CC_NOOVmode;
+return CC_NZmode;
 
   /* A comparison of ~reg with a const is really a special
  canoncialization of compare (~const, reg), which is a reverse
@@ -15495,11 +15495,11 @@ arm_gen_dicompare_reg (rtx_code code, rtx x, rtx y, 
rtx scratch)
  }
 
rtx clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
-   cc_reg = gen_rtx_REG (CC_NOOVmode, CC_REGNUM);
+   cc_reg = gen_rtx_REG (CC_NZmode, CC_REGNUM);
 
rtx set
  = gen_rtx_SET (cc_reg,
-gen_rtx_COMPARE (CC_NOOVmode,
+gen_rtx_COMPARE (CC_NZmode,
  gen_rtx_IOR (SImode, x_lo, x_hi),
  const0_rtx));
emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set,
@@ -23884,7 +23884,7 @@ maybe_get_arm_condition_code (rtx comparison)
return code;
   return ARM_NV;
 
-case E_CC_NOOVmode:
+case E_CC_NZmode:
   switch (comp_code)
{
case NE: return ARM_NE;
@@ -25307,7 +25307,7 @@ thumb1_final_prescan_insn (rtx_insn *insn)
  cfun->machine->thumb1_cc_insn = insn;
  cfun->machine->thumb1_cc_op0 = SET_DEST (set);
  cfun->machine->thumb1_cc_op1 = const0_rtx;
- cfun->machine->thumb1_cc_mode = CC_NOOVmode;
+ cfun->machine->thumb1_cc_mode = CC_NZmode;
  if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
{
  rtx src1 = XEXP (SET_SRC (set), 1);
@@ -30484,7 +30484,7 @@ arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx 
out, rtx in,
   else
 {
   /* We have a shift-by-register.  */
-  rtx cc_reg = gen_rtx_REG (CC_NOOVmode, CC_REGNUM);
+  

[PATCH 0/6] Implement asm flag outputs for arm + aarch64

2019-11-08 Thread Richard Henderson
I've put the implementation into config/arm/aarch-common.c, so
that it can be shared between the two targets.  This required
a little bit of cleanup to the CC modes and constraints to get
the two targets to match up.

I really should have done more than just x86 years ago, so that
it would be done now and I could just use it in the kernel...  ;-)


r~


Richard Henderson (6):
  aarch64: Add "c" constraint
  arm: Fix the "c" constraint
  arm: Rename CC_NOOVmode to CC_NZmode
  arm, aarch64: Add support for __GCC_ASM_FLAG_OUTPUTS__
  arm: Add testsuite checks for asm-flag
  aarch64: Add testsuite checks for asm-flag

 gcc/config/arm/aarch-common-protos.h  |   6 +
 gcc/config/aarch64/aarch64-c.c|   2 +
 gcc/config/aarch64/aarch64.c  |   3 +
 gcc/config/arm/aarch-common.c | 131 
 gcc/config/arm/arm-c.c|   1 +
 gcc/config/arm/arm.c  |  15 +-
 gcc/testsuite/gcc.target/aarch64/asm-flag-1.c |  34 
 gcc/testsuite/gcc.target/aarch64/asm-flag-3.c |  36 
 gcc/testsuite/gcc.target/aarch64/asm-flag-5.c |  30 +++
 gcc/testsuite/gcc.target/aarch64/asm-flag-6.c |  43 
 gcc/testsuite/gcc.target/arm/asm-flag-1.c |  35 
 gcc/testsuite/gcc.target/arm/asm-flag-3.c |  36 
 gcc/testsuite/gcc.target/arm/asm-flag-5.c |  30 +++
 gcc/testsuite/gcc.target/arm/asm-flag-6.c |  43 
 gcc/config/aarch64/constraints.md |   4 +
 gcc/config/arm/arm-modes.def  |   4 +-
 gcc/config/arm/arm.md | 186 +-
 gcc/config/arm/constraints.md |   5 +-
 gcc/config/arm/predicates.md  |   2 +-
 gcc/config/arm/thumb1.md  |   8 +-
 gcc/config/arm/thumb2.md  |  34 ++--
 gcc/doc/extend.texi   |  33 
 22 files changed, 596 insertions(+), 125 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/asm-flag-1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/asm-flag-3.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/asm-flag-5.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/asm-flag-6.c
 create mode 100644 gcc/testsuite/gcc.target/arm/asm-flag-1.c
 create mode 100644 gcc/testsuite/gcc.target/arm/asm-flag-3.c
 create mode 100644 gcc/testsuite/gcc.target/arm/asm-flag-5.c
 create mode 100644 gcc/testsuite/gcc.target/arm/asm-flag-6.c

-- 
2.17.1



[PATCH 1/6] aarch64: Add "c" constraint

2019-11-08 Thread Richard Henderson
Mirror arm in letting "c" match the condition code register.

* config/aarch64/constraints.md (c): New constraint.
---
 gcc/config/aarch64/constraints.md | 4 
 1 file changed, 4 insertions(+)

diff --git a/gcc/config/aarch64/constraints.md 
b/gcc/config/aarch64/constraints.md
index d0c3dd5bc1f..b9e5d13e851 100644
--- a/gcc/config/aarch64/constraints.md
+++ b/gcc/config/aarch64/constraints.md
@@ -39,6 +39,10 @@
 (define_register_constraint "y" "FP_LO8_REGS"
   "Floating point and SIMD vector registers V0 - V7.")
 
+(define_constraint "c"
+ "@internal The condition code register."
+  (match_operand 0 "cc_register"))
+
 (define_constraint "I"
  "A constant that can be used with an ADD operation."
  (and (match_code "const_int")
-- 
2.17.1



Re: [PATCH] Fix PR92324

2019-11-08 Thread Richard Sandiford
Richard Biener  writes:
> I've been sitting on this for a few days since I'm not 100% happy
> with how the code looks like.  There's possibly still holes in it
> (chains with mixed signed/unsigned adds for example might pick
> up signed adds in the epilogue), but the wrong-code cases should
> work fine now.  I'm probably going to followup with some
> mass renaming of variable/parameter names to make it more clear
> which stmt / type we are actually looking at ...
>
> Bootstrapped and tested on x86_64-unknown-linux-gnu, applied to trunk.

Does this look like the right way of updating neutral_op_for_slp_reduction?
It now needs to know whether the caller is using STMT_VINFO_REDUC_VECTYPE
(for an epilogue value) or STMT_VINFO_REDUC_VECTYPE_IN (for a PHI argument).

Fixes various gcc.target/aarch64/sve/slp_* tests, will give it a
full test on aarch64-linux-gnu.

Thanks,
Richard


2019-11-08  Richard Sandiford  

gcc/
* tree-vect-loop.c (neutral_op_for_slp_reduction): Take the
vector type as an argument rather than reading it from the
stmt_vec_info.
(vect_create_epilog_for_reduction): Update accordingly,
passing the STMT_VINFO_REDUC_VECTYPE.
(vectorizable_reduction): Likewise.
(vect_transform_cycle_phi): Likewise, but passing the
STMT_VINFO_REDUC_VECTYPE_IN.

Index: gcc/tree-vect-loop.c
===
--- gcc/tree-vect-loop.c2019-11-08 09:06:29.654896085 +
+++ gcc/tree-vect-loop.c2019-11-08 10:41:54.498861004 +
@@ -2586,17 +2586,17 @@ reduction_fn_for_scalar_code (enum tree_
 
 /* If there is a neutral value X such that SLP reduction NODE would not
be affected by the introduction of additional X elements, return that X,
-   otherwise return null.  CODE is the code of the reduction.  REDUC_CHAIN
-   is true if the SLP statements perform a single reduction, false if each
-   statement performs an independent reduction.  */
+   otherwise return null.  CODE is the code of the reduction and VECTOR_TYPE
+   is the vector type that would hold element X.  REDUC_CHAIN is true if
+   the SLP statements perform a single reduction, false if each statement
+   performs an independent reduction.  */
 
 static tree
-neutral_op_for_slp_reduction (slp_tree slp_node, tree_code code,
- bool reduc_chain)
+neutral_op_for_slp_reduction (slp_tree slp_node, tree vector_type,
+ tree_code code, bool reduc_chain)
 {
   vec stmts = SLP_TREE_SCALAR_STMTS (slp_node);
   stmt_vec_info stmt_vinfo = stmts[0];
-  tree vector_type = STMT_VINFO_VECTYPE (stmt_vinfo);
   tree scalar_type = TREE_TYPE (vector_type);
   class loop *loop = gimple_bb (stmt_vinfo->stmt)->loop_father;
   gcc_assert (loop);
@@ -4216,11 +4216,6 @@ vect_create_epilog_for_reduction (stmt_v
 = as_a  (STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info))->stmt);
   enum tree_code code = STMT_VINFO_REDUC_CODE (reduc_info);
   internal_fn reduc_fn = STMT_VINFO_REDUC_FN (reduc_info);
-  tree neutral_op = NULL_TREE;
-  if (slp_node)
-neutral_op
-  = neutral_op_for_slp_reduction (slp_node_instance->reduc_phis, code,
- REDUC_GROUP_FIRST_ELEMENT (stmt_info));
   stmt_vec_info prev_phi_info;
   tree vectype;
   machine_mode mode;
@@ -4267,11 +4262,15 @@ vect_create_epilog_for_reduction (stmt_v
   gcc_assert (vectype);
   mode = TYPE_MODE (vectype);
 
+  tree neutral_op = NULL_TREE;
   tree initial_def = NULL;
   tree induc_val = NULL_TREE;
   tree adjustment_def = NULL;
   if (slp_node)
-;
+neutral_op
+  = neutral_op_for_slp_reduction (slp_node_instance->reduc_phis,
+ vectype, code,
+ REDUC_GROUP_FIRST_ELEMENT (stmt_info));
   else
 {
   /* Get at the scalar def before the loop, that defines the initial value
@@ -6210,7 +6209,7 @@ vectorizable_reduction (stmt_vec_info st
   tree neutral_op = NULL_TREE;
   if (slp_node)
 neutral_op = neutral_op_for_slp_reduction
-  (slp_node_instance->reduc_phis, orig_code,
+  (slp_node_instance->reduc_phis, vectype_out, orig_code,
REDUC_GROUP_FIRST_ELEMENT (stmt_info) != NULL);
 
   if (double_reduc && reduction_type == FOLD_LEFT_REDUCTION)
@@ -6793,7 +6792,7 @@ vect_transform_cycle_phi (stmt_vec_info
   gcc_assert (slp_node == slp_node_instance->reduc_phis);
   stmt_vec_info first = REDUC_GROUP_FIRST_ELEMENT (reduc_stmt_info);
   tree neutral_op
-   = neutral_op_for_slp_reduction (slp_node,
+   = neutral_op_for_slp_reduction (slp_node, vectype_in,
STMT_VINFO_REDUC_CODE (reduc_info),
first != NULL);
   get_initial_defs_for_reduction (slp_node_instance->reduc_phis,


Re: [PATCH][vect] PR 92351: When peeling for alignment make alignment of epilogues unknown

2019-11-08 Thread Richard Biener
On Thu, 7 Nov 2019, Andre Vieira (lists) wrote:

> 
> 
> On 07/11/2019 14:00, Richard Biener wrote:
> > On Thu, 7 Nov 2019, Andre Vieira (lists) wrote:
> > 
> >> Hi,
> >>
> >> PR92351 reports a bug in which a wrongly aligned load is generated for an
> >> epilogue of a main loop for which we peeled for alignment.  There is no way
> >> to
> >> guarantee that epilogue data accesses are aligned when the main loop is
> >> peeling for alignment.
> >>
> >> I also had to split vect-peel-2.c as there were scans there for the number
> >> of
> >> unaligned accesses that were vectorized, thanks to this change that now
> >> depends on whether we are vectorizing the epilogue, which will also contain
> >> unaligned accesses.  Since not all targets need to be able to vectorize the
> >> epilogue I decided to disable epilogue vectorization for the version in
> >> which
> >> we scan the dumps and add a version that attempts epilogue vectorization
> >> but
> >> does not scan the dumps.
> >>
> >> Bootstrapped and regression tested on x86_64 and aarch64.
> >>
> >> Is this OK for trunk?
> > 
> > @@ -938,6 +938,18 @@ vect_compute_data_ref_alignment (dr_vec_info
> > *dr_info)
> >   = exact_div (vect_calculate_target_alignment (dr_info),
> > BITS_PER_UNIT);
> > DR_TARGET_ALIGNMENT (dr_info) = vector_alignment;
> >   +  /* If the main loop has peeled for alignment we have no way of knowing
> > + whether the data accesses in the epilogues are aligned.  We can't at
> > + compile time answer the question whether we have entered the main
> > loop
> > or
> > + not.  Fixes PR 92351.  */
> > +  if (loop_vinfo)
> > +{
> > +  loop_vec_info orig_loop_vinfo = LOOP_VINFO_ORIG_LOOP_INFO
> > (loop_vinfo);
> > +  if (orig_loop_vinfo
> > + && LOOP_VINFO_PEELING_FOR_ALIGNMENT (orig_loop_vinfo) != 0)
> > +   return;
> > +}
> > 
> > so I'm not sure this is the correct place to do the fixup.  Isn't the
> > above done when analyzing the loops with different vector size/mode?
> > So we don't yet know whether we analyze the loop as epilogue or
> > not epilogue?  Looks like we at the moment always choose the
> > very first loop we analyze successfully as "main" loop?
> > 
> > So, can we do this instead in update_epilogue_loop_vinfo?  There
> > we should also know whether we created the jump-around the
> > main vect loop.
> > 
> 
> So we do know we are analyzing it as an epilogue, that is the only case
> orig_loop_vinfo is set.
> 
> The reason why we shouldn't do it in update_epilogue_loop_vinfo is that the
> target might not know how to vectorize memory accesses for unaligned memory
> for the given VF. Or maybe it does but is too expensive don't know if we
> currently check that though. I do not have an example but this is why I
> believe it would be better to do it during analysis. I thought it had been you
> who alerted me to this, but maybe it was Sandiford, or maybe I dreamt it up ;)

It was probably me, yes.  But don't we have a catch-22 now?  If we
have multiple vector sizes and as Richard, want to first compute
the "cheapest" to use as the main vectorized body don't we then have
to re-analyze the smaller vector sizes for epilogue use?

So how do we handle this situation at the moment?

I think during alignment peeling analysis we look whether a DR
absolutely needs to be aligned, that is, we use
vect_supportable_dr_alignment (*, true).  If that returns
dr_unaligned_unsupported we should probably simply disable
epilogue vectorization if we didn't version for alignment
(or we know the vectorized loop was entered).

So, during analysis reject epilogues that have DRs with
dr_unaligned_unsupported but allow them as "main" loops still
(so disable epilogue vectorization for a main loop with such DRs).

Then at update_epilogue_loop_vinfo time simply make alignment
unknown.

Would that work?

Thanks,
Richard.


Re: [PATCH] Ensure x86_64 TYPE_EMPTY_P args have distinct addresses (PR c++/92384)

2019-11-08 Thread Richard Biener
On Thu, 7 Nov 2019, Jakub Jelinek wrote:

> Hi!
> 
> TYPE_EMPTY_P arguments (which right now only x86_64 uses) have
> data->entry_parm == data->stack_parm being a stack slot with zero size in
> the stack parameter passing area.
> The problem with that is that in C++ they should have distinct addresses
> from other objects, which is not the case right now, as their address is
> equal to whatever argument is after it on the stack.
> 
> The following patch in the third hunk, if the TYPE_EMPTY_P argument has 
> address
> taken forces stack_parm to be NULL, so that a new slot will be created for
> it, and the other two hunks just make sure we don't actually copy anything,
> as the TYPE_EMPTY_P types contain solely padding and so it is ok if they
> are just allocated on the stack, but are not copied.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

OK.

Thanks,
Richard.

> 2019-11-07  Jakub Jelinek  
> 
>   PR c++/92384
>   * function.c (assign_parm_setup_block, assign_parm_setup_stack): Don't
>   copy TYPE_EMPTY_P arguments from data->entry_parm to data->stack_parm
>   slot.
>   (assign_parms): For TREE_ADDRESSABLE parms with TYPE_EMPTY_P type
>   force creation of a unique data.stack_parm slot.
> 
>   * g++.dg/torture/pr92384.C: New test.
> 
> --- gcc/function.c.jj 2019-10-01 18:16:13.205128183 +0200
> +++ gcc/function.c2019-11-06 09:30:47.097353832 +0100
> @@ -3087,7 +3087,7 @@ assign_parm_setup_block (struct assign_p
>   move_block_from_reg (REGNO (entry_parm), mem,
>size_stored / UNITS_PER_WORD);
>  }
> -  else if (data->stack_parm == 0)
> +  else if (data->stack_parm == 0 && !TYPE_EMPTY_P (data->arg.type))
>  {
>push_to_sequence2 (all->first_conversion_insn, 
> all->last_conversion_insn);
>emit_block_move (stack_parm, data->entry_parm, GEN_INT (size),
> @@ -3488,7 +3488,9 @@ assign_parm_setup_stack (struct assign_p
>dest = validize_mem (copy_rtx (data->stack_parm));
>src = validize_mem (copy_rtx (data->entry_parm));
>  
> -  if (MEM_P (src))
> +  if (TYPE_EMPTY_P (data->arg.type))
> + /* Empty types don't really need to be copied.  */;
> +  else if (MEM_P (src))
>   {
> /* Use a block move to handle potentially misaligned entry_parm.  */
> if (!to_conversion)
> @@ -3643,6 +3645,16 @@ assign_parms (tree fndecl)
>   {
> assign_parm_find_stack_rtl (parm, );
> assign_parm_adjust_entry_rtl ();
> +   /* For arguments that occupy no space in the parameter
> +  passing area, have non-zero size and have address taken,
> +  force creation of a stack slot so that they have distinct
> +  address from other parameters.  */
> +   if (TYPE_EMPTY_P (data.arg.type)
> +   && TREE_ADDRESSABLE (parm)
> +   && data.entry_parm == data.stack_parm
> +   && MEM_P (data.entry_parm)
> +   && int_size_in_bytes (data.arg.type))
> + data.stack_parm = NULL_RTX;
>   }
>/* Record permanently how this parm was passed.  */
>if (data.arg.pass_by_reference)
> --- gcc/testsuite/g++.dg/torture/pr92384.C.jj 2019-11-06 10:01:39.794413342 
> +0100
> +++ gcc/testsuite/g++.dg/torture/pr92384.C2019-11-06 10:02:44.281441632 
> +0100
> @@ -0,0 +1,38 @@
> +// PR c++/92384
> +// { dg-do run }
> +
> +struct S {};
> +struct T : public S { S a, b, c, d, e, f, g, h, i, j, k, l, m; };
> +struct U { long long a, b, c; };
> +
> +U
> +foo (S, S, S, T, T, T, U g)
> +{
> +  return g;
> +}
> +
> +__attribute__((noipa)) bool
> +bar (S a, S b, S c, T d, T e, T f, U g, void **h)
> +{
> +  h[0] = (void *) 
> +  h[1] = (void *) 
> +  h[2] = (void *) 
> +  h[3] = (void *) 
> +  h[4] = (void *) 
> +  h[5] = (void *) 
> +  h[6] = (void *) 
> +  asm volatile ("" : : "r" (h) : "memory");
> +  return (h[0] != h[1] && h[1] != h[2] && h[2] != h[3]
> +   && h[3] != h[4] && h[4] != h[5] && h[5] != h[6]);
> +}
> +
> +int
> +main ()
> +{
> +  S a;
> +  T b;
> +  U c = { 1, 2, 3 };
> +  void *d[7];
> +  if (!bar (a, a, a, b, b, b, c, d))
> +__builtin_abort ();
> +}
> 
>   Jakub
> 
> 

-- 
Richard Biener 
SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg,
Germany; GF: Felix Imendörffer; HRB 36809 (AG Nuernberg)

Re: [PATCH] Handle gimple_clobber_p stmts in store-merging (PR target/92038)

2019-11-08 Thread Richard Biener
On Thu, 7 Nov 2019, Jakub Jelinek wrote:

> Hi!
> 
> The following patch adds handling of clobbers in store-merging.  The intent
> is if we have a clobber followed by some stores into the clobbered area,
> even if don't store all the bytes in the area, we can avoid masking, because
> the non-stored bytes are undefined and in some cases we can even overwrite
> the whole area with the same or smaller number of stores compared to the
> original IL.
> Clobbers aren't removed from the IL, even if the following stores completely
> cover the whole area, as clobbers carry important additional information
> that the old value is gone, e.g. for tail call discovery if address taken
> before the clobber but not after it, removing the clobbers would disable
> tail call optimization.
> The patch right now treats the clobbered non-stored bytes as non-masked zero
> stores, except that we don't add stores to whole words etc. if there are no
> other overlapping stores; I have a separate patch that also computed
> defined_mask which contained whether some bytes are just undefined and we
> could in theory try different bit patterns in those bytes, but in the end
> decided it is too complicated and if needed, could be done as a follow-up.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

OK.

Thanks,
Richard.

> 2019-11-07  Jakub Jelinek  
> 
>   PR target/92038
>   * gimple-ssa-store-merging.c (find_constituent_stores): For return
>   value only, return non-NULL if there is a single non-clobber
>   constituent store even if there are constituent clobbers and return
>   one of clobber constituent stores if all constituent stores are
>   clobbers.
>   (split_group): Handle clobbers.
>   (imm_store_chain_info::output_merged_store): When computing
>   bzero_first, look after all clobbers at the start.  Don't count
>   clobber stmts in orig_num_stmts, except if the first orig store is
>   a clobber covering the whole area and split_stores cover the whole
>   area, consider equal number of stmts ok.  Punt if split_stores
>   contains only ->orig stores and their number plus number of original
>   clobbers is equal to original number of stmts.  For ->orig, look past
>   clobbers in the constituent stores.
>   (imm_store_chain_info::output_merged_stores): Don't remove clobber
>   stmts.
>   (rhs_valid_for_store_merging_p): Don't return false for clobber stmt
>   rhs.
>   (store_valid_for_store_merging_p): Allow clobber stmts.
>   (verify_clear_bit_region_be): Fix up a thinko in function comment.
> 
>   * g++.dg/opt/store-merging-1.C: New test.
>   * g++.dg/opt/store-merging-2.C: New test.
>   * g++.dg/opt/store-merging-3.C: New test.
> 
> --- gcc/gimple-ssa-store-merging.c.jj 2019-11-07 09:50:38.029447052 +0100
> +++ gcc/gimple-ssa-store-merging.c2019-11-07 12:13:15.048531180 +0100
> @@ -3110,7 +3110,8 @@ split_store::split_store (unsigned HOST_
>  /* Record all stores in GROUP that write to the region starting at BITPOS and
> is of size BITSIZE.  Record infos for such statements in STORES if
> non-NULL.  The stores in GROUP must be sorted by bitposition.  Return INFO
> -   if there is exactly one original store in the range.  */
> +   if there is exactly one original store in the range (in that case ignore
> +   clobber stmts, unless there are only clobber stmts).  */
>  
>  static store_immediate_info *
>  find_constituent_stores (class merged_store_group *group,
> @@ -3146,16 +3147,24 @@ find_constituent_stores (class merged_st
>if (stmt_start >= end)
>   return ret;
>  
> +  if (gimple_clobber_p (info->stmt))
> + {
> +   if (stores)
> + stores->safe_push (info);
> +   if (ret == NULL)
> + ret = info;
> +   continue;
> + }
>if (stores)
>   {
> stores->safe_push (info);
> -   if (ret)
> +   if (ret && !gimple_clobber_p (ret->stmt))
>   {
> ret = NULL;
> second = true;
>   }
>   }
> -  else if (ret)
> +  else if (ret && !gimple_clobber_p (ret->stmt))
>   return NULL;
>if (!second)
>   ret = info;
> @@ -3347,13 +3356,17 @@ split_group (merged_store_group *group,
>  
>if (bzero_first)
>  {
> -  first = 1;
> +  store_immediate_info *gstore;
> +  FOR_EACH_VEC_ELT (group->stores, first, gstore)
> + if (!gimple_clobber_p (gstore->stmt))
> +   break;
> +  ++first;
>ret = 1;
>if (split_stores)
>   {
> split_store *store
> - = new split_store (bytepos, group->stores[0]->bitsize, align_base);
> -   store->orig_stores.safe_push (group->stores[0]);
> + = new split_store (bytepos, gstore->bitsize, align_base);
> +   store->orig_stores.safe_push (gstore);
> store->orig = true;
> any_orig = true;
> split_stores->safe_push (store);
> @@ -3377,6 +3390,7 @@ 

Re: [PATCH, Fortran] Allow CHARACTER literals in assignments and DATA statements

2019-11-08 Thread Tobias Burnus

On 11/8/19 10:45 AM, Mark Eggleston wrote:
PING - OK, to commit? I have a pending patch that needs this in place. 
Thanks for the ping. — Any chance, that you also work on some of the 
general issues once in a while (cf. Bugzilla remark below); e.g. one bug 
per week or fortnight? (Can be a simple one, just not to keep the number 
of bugs growing.)


@Anyone – side question: Are there other pending patches? Ignoring one 
OpenMP and several OpenACC patches, I am currently aware of:
* [Needs review] José's PR92142 - CFI_setpointer corrupts descriptor 
patch [FSF copyright assignment exists]
* [Nearly ready, but needs fixes/follow-up patch] Steve's PR91178 for 
code like 'call foo(a, (a))'
* [Approved but not committed] Paul's PR92123 patch for bind(C) + alloc 
scalars.


Side remark: On the bind(C) side, I think PR 92189 should be fixed. 
Additionally, there are several recent reports on segfaults and 
regressions, looking Bugzilla (component = fortran, sort by change data).



Back to this pending patch:

On 11/5/19 10:55 AM, Mark Eggleston wrote:
I noticed that warning were not produced for conversion to logicals, 
re-ordering of an if..else if sequence fixes that problem. Additional 
test cases have been added.

Looks as if reviewing and revisiting the patch was worthwhile :-) Thanks!

Please find attached the updated patch, the change logs follow. OK to 
commit?


LGTM. Thanks for patch.

Cheers,

Tobias

PS: I still find it interesting, which Fortran code gets used; from a 
supercomputing centre, I heard that everything between 
afterwards-never-touched FORTRAN IV code to code using the latest 
Fortran features is used. (But most amazing I find code, which gets 
newly written in a FORTRAN 66 style/features with some sparse F20xx 
features in between – like the "publicx" example of recently [instead of 
'public x' or 'public::x'; spaces aren't that expensive anymore].)




gcc/fortran/ChangeLog

    Jim MacArthur  
    Mark Eggleston  

    * arith.c (hollerith2representation): Use 
OPT_Wcharacter_truncation in
    call to gfc_warning.  Add character2representation, 
gfc_character2int,

    gfc_character2real, gfc_character2complex and gfc_character2logical.
    * arith.h: Add prototypes for gfc_character2int, gfc_character2real,
    gfc_character2complex and gfc_character2logical.
    * expr.c (gfc_check_assign): Return true if left hand side is numeric
    or logical and the right hand side is character and of kind=1.
    * gfortran.texi: Add -fdec-char-conversions.
    * intrinsic.c (add_conversions): Add conversions from character to
    integer, real, complex and logical types for their supported kinds.
    (gfc_convert_type_warn): Reorder if..else if.. sequence so that 
warnings

    are produced for conversion to logical.
    * invoke.texi: Add option to list of options.
    * invoke.texi: Add Character conversion subsection to Extensions
    section.
    * lang.opt: Add new option.
    * options.c (set_dec_flags): Add SET_BITFLAG for
    flag_dec_char_conversions.
    * resolve.c (resolve_ordindary_assign): Issue error if the left hand
    side is numeric or logical and the right hand side is a character
    variable.
    * simplify.c (gfc_convert_constant): Assign the conversion function
    depending on destination type.
    * trans-const.c (gfc_constant_to_tree): Use OPT_Wsurprising in
    gfc_warning allowing the warning to be switched off only if
    flag_dec_char_conversions is enabled.

gcc/testsuite/gfortran.dg

    Jim MacArthur 
    Mark Eggleston 

    PR fortran/89103
    * gfortran.dg/dec_char_conversion_in_assignment_1.f90: New test.
    * gfortran.dg/dec_char_conversion_in_assignment_2.f90: New test.
    * gfortran.dg/dec_char_conversion_in_assignment_3.f90: New test.
    * gfortran.dg/dec_char_conversion_in_assignment_4.f90: New test.
    * gfortran.dg/dec_char_conversion_in_assignment_5.f90: New test.
    * gfortran.dg/dec_char_conversion_in_assignment_6.f90: New test.
    * gfortran.dg/dec_char_conversion_in_assignment_7.f90: New test.
    * gfortran.dg/dec_char_conversion_in_assignment_8.f90: New test.
    * gfortran.dg/dec_char_conversion_in_data_1.f90: New test.
    * gfortran.dg/dec_char_conversion_in_data_2.f90: New test.
    * gfortran.dg/dec_char_conversion_in_data_3.f90: New test.
    * gfortran.dg/dec_char_conversion_in_data_4.f90: New test.
    * gfortran.dg/dec_char_conversion_in_data_5.f90: New test.
    * gfortran.dg/dec_char_conversion_in_data_6.f90: New test.
    * gfortran.dg/dec_char_conversion_in_data_7.f90: New test.
    * gfortran.dg/hollerith5.f90: Add -Wsurprising to options.
    * gfortran.dg/hollerith_legacy.f90: Add -Wsurprising to options.
    * gfortran.dg/no_char_to_numeric_assign.f90: New test.


[PATCH] Use op->resimplify from genmatch

2019-11-08 Thread Richard Biener


Allows more refactoring here.

Bootstrapped/tested on x86_64-unknown-linux-gnu, applied.

Richard.

2019-11-08  Richard Biener  

* genmatch.c (expr::gen_transform): Use the resimplify
member function instead of hard-coding the gimple_resimplifyN variant.
(dt_simplify::gen_1): Likewise.

Index: gcc/genmatch.c
===
--- gcc/genmatch.c  (revision 277954)
+++ gcc/genmatch.c  (working copy)
@@ -2534,12 +2534,9 @@ expr::gen_transform (FILE *f, int indent
   for (unsigned i = 0; i < ops.length (); ++i)
fprintf (f, ", _o%d[%u]", depth, i);
   fprintf (f, ");\n");
+  fprintf_indent (f, indent, "tem_op.resimplify (lseq, valueize);\n");
   fprintf_indent (f, indent,
- "gimple_resimplify%d (lseq, _op, valueize);\n",
- ops.length ());
-  fprintf_indent (f, indent,
- "_r%d = maybe_push_res_to_seq (_op, lseq);\n",
- depth);
+ "_r%d = maybe_push_res_to_seq (_op, lseq);\n", depth);
   fprintf_indent (f, indent,
  "if (!_r%d) return false;\n",
  depth);
@@ -3413,8 +3410,7 @@ dt_simplify::gen_1 (FILE *f, int indent,
 gimple_build w/o actually building the stmt.  */
  if (!is_predicate)
fprintf_indent (f, indent,
-   "gimple_resimplify%d (lseq, res_op,"
-   " valueize);\n", e->ops.length ());
+   "res_op->resimplify (lseq, valueize);\n");
}
   else if (result->type == operand::OP_CAPTURE
   || result->type == operand::OP_C_EXPR)


Re: Free some more stuff in free_lang_data

2019-11-08 Thread Richard Biener
On Thu, 7 Nov 2019, Jan Hubicka wrote:

> Hi,
> as every year, I went through reasons why types of same ODR name are not
> merged in firefox streaming.  Here are few problems I caught.  Remaining
> issues I understand are
>  1) odr violations (which is OK of course)
>  2) keyed vtables: sometimes the vtable decl is weak and sometimes it is
> external (I have WIP patch for ipa-devirt to stream BINFO_VTABLE off
> the main stream, but at least for Firefox it does not have any
> dramatic effects on the size of stream)
>  3) differences in attribute list (some attributes, like aligned does
> not make sense on incomplete types, but our FEs lets us to add them
> there so I think there is no canonical incomplete variant here)
>  4) TYPELESS storage differences
>  5) if type is not merged also all types referring to it via
> TYPE_CONTEXT are not.  This still causes propagation from type to
> another
> 
> There are still some cases which I did not track down, but we have only
> couple hundred of unmerged types, so situation seems to be mostly under
> control.
> 
> There is about 700MB of trees in global stream for Firefox + 200MB of
> in_decl_state vectors and I am not sure if there are easy ways to cut it
> down.
> 
> Kind   Nodes  Bytes
> 
> constructors 12k   308k
> vecs 15k  1593k
> refs 55k  2702k
> binfos   63k  6670k
> constants   253k  9760k
> random kinds654k25M
> exprs   744k23M
> identifiers1217k47M
> decls  1583k   293M
> types  1822k   298M
> 
> Total  6423k   709M
> 
> 
> union_type 4335 
> real_cst   7244 
> mem_ref8429 
> array_type   10k
> enumeral_type10k
> constructor  12k
> tree_vec 15k
> array_ref15k
> nop_expr 19k
> component_ref30k
> pointer_plus_expr41k
> tree_binfo   63k
> var_decl 78k
> function_type   106k
> integer_cst 110k
> reference_type  125k
> string_cst  132k
> type_decl   203k
> record_type 335k
> field_decl  355k
> method_type 583k
> pointer_type642k
> tree_list   654k
> addr_expr   683k
> function_decl   941k
> identifier_node1217k
> 
> 
> 
> 
> Bootstrapped/regtested x86_64-linux, OK?

OK.

Thanks,
Richard.

> Honza
> 
>   * tree.c (fld_incomplete_type_of): Clear TYPE_FINAL_P, TYPE_EMPTY_P,
>   ENUM_IS_OPAQUE and ENUM_IS_SCOPED.
>   (free_lang_data_in_binfo): Clear TREE_PUBLIC in BINFO
>   (free_lang_data_in_type): Clear ENUM_IS_OPAQUE and ENUM_IS_SCOPED.
> Index: tree.c
> ===
> --- tree.c(revision 277796)
> +++ tree.c(working copy)
> @@ -5383,9 +5387,15 @@ fld_incomplete_type_of (tree t, class fr
> TYPE_TYPELESS_STORAGE (copy) = 0;
> TYPE_FIELDS (copy) = NULL;
> TYPE_BINFO (copy) = NULL;
> +   TYPE_FINAL_P (copy) = 0;
> +   TYPE_EMPTY_P (copy) = 0;
>   }
> else
> - TYPE_VALUES (copy) = NULL;
> + {
> +   TYPE_VALUES (copy) = NULL;
> +   ENUM_IS_OPAQUE (copy) = 0;
> +   ENUM_IS_SCOPED (copy) = 0;
> + }
>  
> /* Build copy of TYPE_DECL in TYPE_NAME if necessary.
>This is needed for ODR violation warnings to come out right (we
> @@ -5468,6 +5478,7 @@ free_lang_data_in_binfo (tree binfo)
>BINFO_INHERITANCE_CHAIN (binfo) = NULL_TREE;
>BINFO_SUBVTT_INDEX (binfo) = NULL_TREE;
>BINFO_VPTR_FIELD (binfo) = NULL_TREE;
> +  TREE_PUBLIC (binfo) = 0;
>  
>FOR_EACH_VEC_ELT (*BINFO_BASE_BINFOS (binfo), i, t)
>  free_lang_data_in_binfo (t);
> @@ -5569,6 +5580,8 @@ free_lang_data_in_type (tree type, class
>  {
>if (TREE_CODE (type) == ENUMERAL_TYPE)
>   {
> +   ENUM_IS_OPAQUE (type) = 0;
> +   ENUM_IS_SCOPED (type) = 0;
> /* Type values are used only for C++ ODR checking.  Drop them
>for all type variants and non-ODR types.
>For ODR types the data is freed in free_odr_warning_data.  */
> 

-- 
Richard Biener 
SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 

[PATCH] Rename identifiers in a test-case.

2019-11-08 Thread Martin Liška

Hi.

I renamed identifiers and reformatted the test-case.
I'm going to install the patch.

Martin

gcc/testsuite/ChangeLog:

2019-11-08  Martin Liska  

* g++.dg/pr92339.C: Rename identifiers to something
more readable.
---
 gcc/testsuite/g++.dg/pr92339.C | 19 ++-
 1 file changed, 14 insertions(+), 5 deletions(-)


diff --git a/gcc/testsuite/g++.dg/pr92339.C b/gcc/testsuite/g++.dg/pr92339.C
index 5bf15b08b17..c94b1d9bc71 100644
--- a/gcc/testsuite/g++.dg/pr92339.C
+++ b/gcc/testsuite/g++.dg/pr92339.C
@@ -1,10 +1,19 @@
 /* PR c++/92339  */
 /* { dg-options "-std=c++11" } */
 
-class a {
-  template  struct c { c(a *); };
+class classA
+{
+  template  struct typeC
+  {
+typeC (classA *);
+  };
   int m_fn1();
-  unsigned long d;
-  using e = c;
+  unsigned long fieldD;
+  using typeE = typeC;
 };
-int a::m_fn1() { e(this); return 0; }
+int
+classA::m_fn1 ()
+{
+  typeE (this);
+  return 0;
+}



Re: [PATCH, Fortran] Allow CHARACTER literals in assignments and DATA statements

2019-11-08 Thread Mark Eggleston

PING - OK, to commit? I have a pending patch that needs this in place.

On 05/11/2019 09:55, Mark Eggleston wrote:


On 25/10/2019 09:03, Tobias Burnus wrote:

Hello Mark, hi all,

On 10/21/19 4:40 PM, Mark Eggleston wrote:
This is an extension to support a legacy feature supported by other 
compilers such as flang and the sun compiler.  As I understand it 
this feature is associated with DEC so it enabled using 
-fdec-char-conversions and by -fdec.


It allows character literals to be assigned to numeric (INTEGER, 
REAL, COMPLEX) and LOGICAL variables by direct assignment or in DATA 
statements.


    * arith.c (hollerith2representation): Use 
OPT_Wcharacter_truncation in

    call to gfc_warning.


This has two effects: First, it permits to toggle the warning on and 
off; secondly, it disables the warning by default. It is enabled by 
-Wall, however. – I think that's acceptable: while Holleriths are 
less transparent as normal strings, for normal strings the result is 
identical.




+ result->representation.string[result_len] = '\0'; /* For debugger  */


Tiny nit: full stop after 'debugger'.

Done.



+/* Convert character to integer. The constant will be padded or 
truncated. */


And here an extra space before '*/'.

Done.



+Allowing character literals to be used in a similar way to 
Hollerith constants

+is a non-standard extension.
+
+Character literals can be used in @code{DATA} statements and 
assignments with


I wonder whether one should mention here explicitly that only 
default-kind (i.e. kind=1) character strings are permitted. 
Additionally, I wonder whether -fdec-char-conversion should be 
mentioned here – without, it is not supported and the error message 
doesn't point to this option.



Now mentions -fdec-char-conversion and kind=1.



+
+  /* Flang allows character conversions similar to Hollerith 
conversions

+ - the first characters will be turned into ascii values. */


Is this Flang or DEC or …? I thought we talk about legacy support and 
Flang is not really legacy.




Re-worded.

--- a/gcc/fortran/resolve.c
+++ b/gcc/fortran/resolve.c
  +  if ((gfc_numeric_ts (>ts) || lhs->ts.type == BT_LOGICAL)
+  && rhs->ts.type == BT_CHARACTER
+  && rhs->expr_type != EXPR_CONSTANT)
+    {
+  gfc_error ("Cannot convert %s to %s at %L", gfc_typename (rhs),
+ gfc_typename (lhs), >where);
+  return false;
+    }


Maybe add a comment like:
/* Happens with flag_dec_char_conversions for nonconstant strings.  */
might help casual readers to understand where this if comes from.


Done.



@@ -331,8 +332,9 @@ gfc_conv_constant_to_tree (gfc_expr * expr)
  gfc_build_string_const (expr->representation.length,
  expr->representation.string));
    if (!integer_zerop (tmp) && !integer_onep (tmp))
-    gfc_warning (0, "Assigning value other than 0 or 1 to LOGICAL"
- " has undefined result at %L", >where);
+    gfc_warning (OPT_Wsurprising, "Assigning value other than 0 
or 1 "

+ "to LOGICAL has undefined result at %L",
+ >where);


I am not happy with this. We had odd issues with combining code 
generated by gfortran and ifort and Booleans types ("logical"). 
Namely, gfortran uses 0 and 1 – while ifort uses -1 and 0. When using 
".not. var", it is sufficient to flip a single bit – either the first 
or the last bit – and it is sufficient to look only a single bit.


Hence, one can get ".not. var .eqv. var".

The same result one can get when assigning "-1" to logical. Hence, a 
default warning makes more sense than -Wsurprising. At least, 
-Wsurprising is enabled by default.


Hence, I wonder whether your 'OPT_Wsurprising' or 
'flag_dec_char_conversions ? OPT_Wsurprising : 0' makes more sense.



The latter.


Actually, I don't quickly see whether   4_'string'  (i.e. kind=4) 
strings are rejected or not. The gfc_character2* functions all assume 
kind=1 characters – while code like gfc_convert_constant or the 
resolve.c code only looks at BT_CHARACTER.
On the other hand, the add_conv calls in intrintrinsic.c's 
add_conversions are only added for the default-character kind.


In any case, can you add a test which checks that – even with 
-fdec-char-conversion – assigning a 2_'string' and 4_'string' to a 
integer/real/complex/logical will be rejected at compile time?


Did not add 2_'string' tests as 2 is not accepted as a valid kind for 
characters. The addition of 4_'string' in a data statement resulted in 
an ICE which has been fixed by only accepting characters of kind=1.

Otherwise, it looks okay to me.

Tobias


I noticed that warning were not produced for conversion to logicals, 
re-ordering of an if..else if sequence fixes that problem. Additional 
test cases have been added.


Steve Kargl suggested a revision to the conversion warning adding 
"Nonstandard" to the text this has also been done.


Tested on x86_64 using make -j 8 check-fortran.

Please find attached the updated patch, the 

[committed] Handle POLY_INT_CST in copy_reference_ops_from_ref

2019-11-08 Thread Richard Sandiford
Tested on aarch64-linux-gnu and x86_64-linux-gnu.  Applied as obvious.

Richard


2019-11-08  Richard Sandiford  

gcc/
* tree-ssa-sccvn.c (copy_reference_ops_from_ref): Handle
POLY_INT_CST.

gcc/testsuite/
* gcc.target/aarch64/sve/acle/general/deref_2.c: New test.
* gcc.target/aarch64/sve/acle/general/whilele_8.c: Likewise.
* gcc.target/aarch64/sve/acle/general/whilelt_4.c: Likewise.

Index: gcc/tree-ssa-sccvn.c
===
--- gcc/tree-ssa-sccvn.c2019-10-31 17:15:21.594544316 +
+++ gcc/tree-ssa-sccvn.c2019-11-08 09:43:07.927488162 +
@@ -928,6 +928,7 @@ copy_reference_ops_from_ref (tree ref, v
  break;
case STRING_CST:
case INTEGER_CST:
+   case POLY_INT_CST:
case COMPLEX_CST:
case VECTOR_CST:
case REAL_CST:
Index: gcc/testsuite/gcc.target/aarch64/sve/acle/general/deref_2.c
===
--- /dev/null   2019-09-17 11:41:18.176664108 +0100
+++ gcc/testsuite/gcc.target/aarch64/sve/acle/general/deref_2.c 2019-11-08 
09:43:07.927488162 +
@@ -0,0 +1,20 @@
+/* { dg-options "-O2" } */
+
+#include 
+#include 
+
+inline void
+copy (void *dst, svbool_t src)
+{
+  memcpy (dst, , svcntd ());
+}
+
+uint64_t
+f (int32_t *x, int32_t *y)
+{
+  union { uint64_t x; char c[8]; } u;
+  svbool_t pg = svptrue_b32 ();
+  copy (u.c, svcmpeq (pg, svld1 (pg, x), 0));
+  copy (u.c + 4, svcmpeq (pg, svld1 (pg, y), 1));
+  return u.x;
+}
Index: gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilele_8.c
===
--- /dev/null   2019-09-17 11:41:18.176664108 +0100
+++ gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilele_8.c   
2019-11-08 09:43:07.927488162 +
@@ -0,0 +1,33 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+#include 
+
+/* { dg-final { scan-assembler-not {\tptrue\t} } } */
+/* { dg-final { scan-assembler-not {\tpfalse\t} } } */
+
+void
+test1 (svbool_t *ptr)
+{
+  *ptr = svwhilele_b32_s32 (-4, 0);
+}
+
+void
+test2 (svbool_t *ptr)
+{
+  *ptr = svwhilele_b16_s64 (svcntb (), svcntb () + 8);
+}
+
+void
+test3 (svbool_t *ptr)
+{
+  *ptr = svwhilele_b64_s32 (0, 2);
+}
+
+void
+test4 (svbool_t *ptr)
+{
+  *ptr = svwhilele_b8_s64 (16, svcntb ());
+}
+
+/* { dg-final { scan-assembler-times {\twhilel[et]\t} 4 } } */
Index: gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilelt_4.c
===
--- /dev/null   2019-09-17 11:41:18.176664108 +0100
+++ gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilelt_4.c   
2019-11-08 09:43:07.927488162 +
@@ -0,0 +1,33 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+#include 
+
+/* { dg-final { scan-assembler-not {\tptrue\t} } } */
+/* { dg-final { scan-assembler-not {\tpfalse\t} } } */
+
+void
+test1 (svbool_t *ptr)
+{
+  *ptr = svwhilelt_b32_s32 (-4, 1);
+}
+
+void
+test2 (svbool_t *ptr)
+{
+  *ptr = svwhilelt_b16_s64 (svcntb (), svcntb () + 9);
+}
+
+void
+test3 (svbool_t *ptr)
+{
+  *ptr = svwhilelt_b64_s32 (0, 3);
+}
+
+void
+test4 (svbool_t *ptr)
+{
+  *ptr = svwhilelt_b8_s64 (16, svcntb ());
+}
+
+/* { dg-final { scan-assembler-times {\twhilel[et]\t} 4 } } */


Mark constant-sized objects as addressable if they have poly-int accesses

2019-11-08 Thread Richard Sandiford
If SVE code is written for a specific vector length, it might load from
or store to fixed-sized objects.  This needs to work even without
-msve-vector-bits=N (which should never be needed for correctness).

There's no way of handling a direct poly-int sized reference to a
fixed-size register; it would have to go via memory.  And in that
case it's more efficient to mark the fixed-size object as
addressable from the outset, like we do for array references
with non-constant indices.

Tested on aarch64-linux-gnu and x86_64-linux-gnu.  OK to install?

Richard


2019-11-08  Richard Sandiford  

gcc/
* cfgexpand.c (discover_nonconstant_array_refs_r): If an access
with POLY_INT_CST size is made to a fixed-size object, force the
object to live in memory.

gcc/testsuite/
* gcc.target/aarch64/sve/acle/general/deref_1.c: New test.

Index: gcc/cfgexpand.c
===
--- gcc/cfgexpand.c 2019-10-01 09:55:35.062089236 +0100
+++ gcc/cfgexpand.c 2019-11-08 09:39:13.105130902 +
@@ -6106,6 +6106,20 @@ discover_nonconstant_array_refs_r (tree
 {
   tree t = *tp;
 
+  /* References of size POLY_INT_CST to a fixed-size object must go
+ through memory.  It's more efficient to force that here than
+ to create temporary slots on the fly.  */
+  if (TYPE_SIZE (TREE_TYPE (t))
+  && POLY_INT_CST_P (TYPE_SIZE (TREE_TYPE (t
+{
+  t = get_base_address (t);
+  if (t
+ && DECL_P (t)
+ && DECL_MODE (t) != BLKmode
+ && GET_MODE_BITSIZE (DECL_MODE (t)).is_constant ())
+   TREE_ADDRESSABLE (t) = 1;
+}
+
   if (IS_TYPE_OR_DECL_P (t))
 *walk_subtrees = 0;
   else if (TREE_CODE (t) == ARRAY_REF || TREE_CODE (t) == ARRAY_RANGE_REF)
Index: gcc/testsuite/gcc.target/aarch64/sve/acle/general/deref_1.c
===
--- /dev/null   2019-09-17 11:41:18.176664108 +0100
+++ gcc/testsuite/gcc.target/aarch64/sve/acle/general/deref_1.c 2019-11-08 
09:39:13.105130902 +
@@ -0,0 +1,13 @@
+/* { dg-options "-O2" } */
+
+#include 
+
+uint64_t
+f (int32_t *x, int32_t *y)
+{
+  union { uint64_t x; char c[8]; } u;
+  svbool_t pg = svptrue_b32 ();
+  *(svbool_t *)[0] = svcmpeq (pg, svld1 (pg, x), 0);
+  *(svbool_t *)[4] = svcmpeq (pg, svld1 (pg, y), 1);
+  return u.x;
+}


Fix code order in tree-sra.c:create_access

2019-11-08 Thread Richard Sandiford
If get_ref_base_and_extent returns poly_int offsets or sizes,
tree-sra.c:create_access prevents SRA from being applied to the base.
However, we haven't verified by that point that we have a valid base
to disqualify.

This originally led to an ICE on the attached testcase, but it
no longer triggers there after the introduction of IPA SRA.

Tested on aarch64-linux-gnu and x86_64-linux-gnu.  OK to install?

Richard


2019-11-08  Richard Sandiford  

gcc/
* tree-sra.c (create_access): Delay disqualifying the base
for poly_int values until we know we have a base.

gcc/testsuite/
* gcc.target/aarch64/sve/acle/general/inline_2.c: New test.

Index: gcc/tree-sra.c
===
--- gcc/tree-sra.c  2019-11-06 12:29:17.870674022 +
+++ gcc/tree-sra.c  2019-11-08 09:20:22.853050602 +
@@ -789,19 +789,11 @@ create_access (tree expr, gimple *stmt,
 {
   struct access *access;
   poly_int64 poffset, psize, pmax_size;
-  HOST_WIDE_INT offset, size, max_size;
   tree base = expr;
   bool reverse, unscalarizable_region = false;
 
   base = get_ref_base_and_extent (expr, , , _size,
  );
-  if (!poffset.is_constant ()
-  || !psize.is_constant ()
-  || !pmax_size.is_constant (_size))
-{
-  disqualify_candidate (base, "Encountered a polynomial-sized access.");
-  return NULL;
-}
 
   /* For constant-pool entries, check we can substitute the constant value.  */
   if (constant_decl_p (base))
@@ -824,6 +816,15 @@ create_access (tree expr, gimple *stmt,
   if (!DECL_P (base) || !bitmap_bit_p (candidate_bitmap, DECL_UID (base)))
 return NULL;
 
+  HOST_WIDE_INT offset, size, max_size;
+  if (!poffset.is_constant ()
+  || !psize.is_constant ()
+  || !pmax_size.is_constant (_size))
+{
+  disqualify_candidate (base, "Encountered a polynomial-sized access.");
+  return NULL;
+}
+
   if (size != max_size)
 {
   size = max_size;
Index: gcc/testsuite/gcc.target/aarch64/sve/acle/general/inline_2.c
===
--- /dev/null   2019-09-17 11:41:18.176664108 +0100
+++ gcc/testsuite/gcc.target/aarch64/sve/acle/general/inline_2.c
2019-11-08 09:20:22.849050630 +
@@ -0,0 +1,16 @@
+/* { dg-options "-O2" } */
+
+typedef struct s { double d[4]; } TYPE;
+
+static inline void
+copy (TYPE *dst, TYPE *src)
+{
+  __SVFloat64_t tmp = *(__SVFloat64_t *) src;
+  *dst = *(TYPE *) 
+}
+
+void
+foo (TYPE *a)
+{
+  copy (a, a + 1);
+}


[C++ PATCH] Fix error-recovery with constexpr dtor (PR c++/92414)

2019-11-08 Thread Jakub Jelinek
Hi!

We ICE on the following testcase, because DECL_INITIAL (decl) is
error_mark_node due to previously reported error and
cxx_eval_outermost_constant_expr is unhappy if ctx.ctor is not
a CONSTRUCTOR, but error_mark_node.

If the initializer is invalid, it should have been diagnosed already and
there is no need to try to evaluate constexpr dtor on it.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2019-11-08  Jakub Jelinek  

PR c++/92414
* constexpr.c (cxx_constant_dtor): Don't call
cxx_eval_outermost_constant_expr if DECL_INITIAL is erroneous.

* g++.dg/cpp2a/constexpr-dtor4.C: New test.

--- gcc/cp/constexpr.c.jj   2019-11-06 08:58:37.0 +0100
+++ gcc/cp/constexpr.c  2019-11-07 22:13:58.395840756 +0100
@@ -6024,7 +6024,8 @@ cxx_constant_value (tree t, tree decl)
 void
 cxx_constant_dtor (tree t, tree decl)
 {
-  cxx_eval_outermost_constant_expr (t, false, true, true, true, decl);
+  if (!error_operand_p (DECL_INITIAL (decl)))
+cxx_eval_outermost_constant_expr (t, false, true, true, true, decl);
 }
 
 /* Helper routine for fold_simple function.  Either return simplified
--- gcc/testsuite/g++.dg/cpp2a/constexpr-dtor4.C.jj 2019-11-07 
22:16:56.943181785 +0100
+++ gcc/testsuite/g++.dg/cpp2a/constexpr-dtor4.C2019-11-07 
22:18:16.752993443 +0100
@@ -0,0 +1,15 @@
+// PR c++/92414
+// { dg-do compile { target c++2a } }
+
+struct A { virtual void foo (); };
+
+struct B : A {
+  constexpr B (int);   // { dg-warning "used but never defined" }
+  constexpr ~B () { }
+};
+
+struct D : B {
+  constexpr D () : B (42) { }  // { dg-error "used before its definition" }
+};
+
+constexpr D d; // { dg-message "in 'constexpr' expansion of" }

Jakub



[committed] Handle POLY_INT_CSTs in declare_return_value

2019-11-08 Thread Richard Sandiford
SVE allows variable-length vectors to be returned by value,
which tripped the assert in declare_return_variable.

Tested on aarch64-linux-gnu and x86_64-linux-gnu.  Applied as obvious/
preapproved by Jeff some time ago for this kind of change.

Richard


2019-11-08  Richard Sandiford  

gcc/
* tree-inline.c (declare_return_variable): Check for poly_int_tree_p
instead of INTEGER_CST.

gcc/testsuite/
* gcc.target/aarch64/sve/acle/general/inline_1.c: New test.

Index: gcc/tree-inline.c
===
--- gcc/tree-inline.c   2019-10-31 17:15:25.110519870 +
+++ gcc/tree-inline.c   2019-11-08 09:06:22.762944419 +
@@ -3654,7 +3654,7 @@ declare_return_variable (copy_body_data
   /* ??? If we're assigning to a variable sized type, then we must
 reuse the destination variable, because we've no good way to
 create variable sized temporaries at this point.  */
-  else if (TREE_CODE (TYPE_SIZE_UNIT (caller_type)) != INTEGER_CST)
+  else if (!poly_int_tree_p (TYPE_SIZE_UNIT (caller_type)))
use_it = true;
 
   /* If the callee cannot possibly modify MODIFY_DEST, then we can
@@ -3689,7 +3689,7 @@ declare_return_variable (copy_body_data
}
 }
 
-  gcc_assert (TREE_CODE (TYPE_SIZE_UNIT (callee_type)) == INTEGER_CST);
+  gcc_assert (poly_int_tree_p (TYPE_SIZE_UNIT (callee_type)));
 
   var = copy_result_decl_to_var (result, id);
   DECL_SEEN_IN_BIND_EXPR_P (var) = 1;
Index: gcc/testsuite/gcc.target/aarch64/sve/acle/general/inline_1.c
===
--- /dev/null   2019-09-17 11:41:18.176664108 +0100
+++ gcc/testsuite/gcc.target/aarch64/sve/acle/general/inline_1.c
2019-11-08 09:06:22.75897 +
@@ -0,0 +1,6 @@
+/* { dg-options "-O2" } */
+
+#pragma GCC aarch64 "arm_sve.h"
+
+static inline svint32_t foo () { return svdup_s32 (32); }
+svint32_t bar () { return svadd_x (svptrue_b8 (), foo (), 1); }


LRA: handle memory constraints that accept more than "m"

2019-11-08 Thread Richard Sandiford
LRA allows address constraints that are more relaxed than "p":

  /* Target hooks sometimes don't treat extra-constraint addresses as
 legitimate address_operands, so handle them specially.  */
  if (insn_extra_address_constraint (cn)
  && satisfies_address_constraint_p (, cn))
return change_p;

For SVE it's useful to allow the same thing for memory constraints.
The particular use case is LD1RQ, which is an SVE instruction that
addresses Advanced SIMD vector modes and that accepts some addresses
that normal Advanced SIMD moves don't.

Normally we require every memory to satisfy at least "m", which is
defined to be a memory "with any kind of address that the machine
supports in general".  However, LD1RQ is very much special-purpose:
it doesn't really have any relation to normal operations on these
modes.  Adding its addressing modes to "m" would lead to bad Advanced
SIMD optimisation decisions in passes like ivopts.  LD1RQ therefore
has a memory constraint that accepts things "m" doesn't.

Tested on aarch64-linux-gnu and x86_64-linux-gnu.  OK to install?

Richard


2019-11-08  Richard Sandiford  

gcc/
* lra-constraints.c (valid_address_p): Take the operand and a
constraint as argument.  If the operand is a MEM and the constraint
is a memory constraint, check whether the eliminated form of the
MEM already satisfies the constraint.
(process_address_1): Update calls accordingly.

gcc/testsuite/
* gcc.target/aarch64/sve/acle/asm/ld1rq_f16.c: Remove XFAIL.
* gcc.target/aarch64/sve/acle/asm/ld1rq_f32.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/ld1rq_f64.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/ld1rq_s16.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/ld1rq_s32.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/ld1rq_s64.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/ld1rq_u16.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/ld1rq_u32.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/ld1rq_u64.c: Likewise.

Index: gcc/lra-constraints.c
===
--- gcc/lra-constraints.c   2019-09-30 17:20:57.366608014 +0100
+++ gcc/lra-constraints.c   2019-11-08 09:00:58.517228517 +
@@ -389,11 +389,24 @@ address_eliminator::~address_eliminator
 *m_index_loc = m_index_reg;
 }
 
-/* Return true if the eliminated form of AD is a legitimate target address.  */
+/* Return true if the eliminated form of AD is a legitimate target address.
+   If OP is a MEM, AD is the address within OP, otherwise OP should be
+   ignored.  CONSTRAINT is one constraint that the operand may need
+   to meet.  */
 static bool
-valid_address_p (struct address_info *ad)
+valid_address_p (rtx op, struct address_info *ad,
+enum constraint_num constraint)
 {
   address_eliminator eliminator (ad);
+
+  /* Allow a memory OP if it matches CONSTRAINT, even if CONSTRAINT is more
+ forgiving than "m".  */
+  if (MEM_P (op)
+  && (insn_extra_memory_constraint (constraint)
+ || insn_extra_special_memory_constraint (constraint))
+  && constraint_satisfied_p (op, constraint))
+return true;
+
   return valid_address_p (ad->mode, *ad->outer, ad->as);
 }
 
@@ -3398,7 +3411,7 @@ process_address_1 (int nop, bool check_o
 
  All these cases involve a non-autoinc address, so there is no
  point revalidating other types.  */
-  if (ad.autoinc_p || valid_address_p ())
+  if (ad.autoinc_p || valid_address_p (op, , cn))
 return change_p;
 
   /* Any index existed before LRA started, so we can assume that the
@@ -3427,7 +3440,7 @@ process_address_1 (int nop, bool check_o
  if (code >= 0)
{
  *ad.inner = gen_rtx_LO_SUM (Pmode, new_reg, addr);
- if (! valid_address_p (ad.mode, *ad.outer, ad.as))
+ if (!valid_address_p (op, , cn))
{
  /* Try to put lo_sum into register.  */
  insn = emit_insn (gen_rtx_SET
@@ -3437,7 +3450,7 @@ process_address_1 (int nop, bool check_o
  if (code >= 0)
{
  *ad.inner = new_reg;
- if (! valid_address_p (ad.mode, *ad.outer, ad.as))
+ if (!valid_address_p (op, , cn))
{
  *ad.inner = addr;
  code = -1;
@@ -3532,7 +3545,7 @@ process_address_1 (int nop, bool check_o
  && CONSTANT_P (XEXP (SET_SRC (set), 1)))
{
  *ad.inner = SET_SRC (set);
- if (valid_address_p (ad.mode, *ad.outer, ad.as))
+ if (valid_address_p (op, , cn))
{
  *ad.base_term = XEXP (SET_SRC (set), 0);
  *ad.disp_term = XEXP (SET_SRC (set), 1);
Index: gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1rq_f16.c

[PATCH] Fix PR92324

2019-11-08 Thread Richard Biener


I've been sitting on this for a few days since I'm not 100% happy
with how the code looks like.  There's possibly still holes in it
(chains with mixed signed/unsigned adds for example might pick
up signed adds in the epilogue), but the wrong-code cases should
work fine now.  I'm probably going to followup with some
mass renaming of variable/parameter names to make it more clear
which stmt / type we are actually looking at ...

Bootstrapped and tested on x86_64-unknown-linux-gnu, applied to trunk.

Richard.

2019-11-08  Richard Biener  

PR tree-optimization/
* tree-vect-loop.c (vect_create_epilog_for_reduction): Use
STMT_VINFO_REDUC_VECTYPE for all computations, inserting
sign-conversions as necessary.
(vectorizable_reduction): Reject conversions in the chain
that are not sign-conversions, base analysis on a non-converting
stmt and its operation sign.  Set STMT_VINFO_REDUC_VECTYPE.
* tree-vect-stmts.c (vect_stmt_relevant_p): Don't dump anything
for debug stmts.
* tree-vectorizer.h (_stmt_vec_info::reduc_vectype): New.
(STMT_VINFO_REDUC_VECTYPE): Likewise.

* gcc.dg/vect/pr92205.c: XFAIL.
* gcc.dg/vect/pr92324-1.c: New testcase.
* gcc.dg/vect/pr92324-2.c: Likewise.

Index: gcc/tree-vect-loop.c
===
--- gcc/tree-vect-loop.c(revision 277922)
+++ gcc/tree-vect-loop.c(working copy)
@@ -4231,7 +4231,6 @@ vect_create_epilog_for_reduction (stmt_v
   gimple *new_phi = NULL, *phi;
   stmt_vec_info phi_info;
   gimple_stmt_iterator exit_gsi;
-  tree vec_dest;
   tree new_temp = NULL_TREE, new_name, new_scalar_dest;
   gimple *epilog_stmt = NULL;
   gimple *exit_phi;
@@ -4264,7 +4263,7 @@ vect_create_epilog_for_reduction (stmt_v
 }
   gcc_assert (!nested_in_vect_loop || double_reduc);
 
-  vectype = STMT_VINFO_VECTYPE (stmt_info);
+  vectype = STMT_VINFO_REDUC_VECTYPE (reduc_info);
   gcc_assert (vectype);
   mode = TYPE_MODE (vectype);
 
@@ -4505,48 +4504,43 @@ vect_create_epilog_for_reduction (stmt_v
  one vector.  */
   if (REDUC_GROUP_FIRST_ELEMENT (stmt_info) || direct_slp_reduc)
 {
+  gimple_seq stmts = NULL;
   tree first_vect = PHI_RESULT (new_phis[0]);
-  gassign *new_vec_stmt = NULL;
-  vec_dest = vect_create_destination_var (scalar_dest, vectype);
+  first_vect = gimple_convert (, vectype, first_vect);
   for (k = 1; k < new_phis.length (); k++)
 {
  gimple *next_phi = new_phis[k];
   tree second_vect = PHI_RESULT (next_phi);
-  tree tem = make_ssa_name (vec_dest, new_vec_stmt);
-  new_vec_stmt = gimple_build_assign (tem, code,
- first_vect, second_vect);
-  gsi_insert_before (_gsi, new_vec_stmt, GSI_SAME_STMT);
- first_vect = tem;
+ second_vect = gimple_convert (, vectype, second_vect);
+  first_vect = gimple_build (, code, vectype,
+first_vect, second_vect);
 }
+  gsi_insert_seq_before (_gsi, stmts, GSI_SAME_STMT);
 
   new_phi_result = first_vect;
-  if (new_vec_stmt)
-{
-  new_phis.truncate (0);
-  new_phis.safe_push (new_vec_stmt);
-}
+  new_phis.truncate (0);
+  new_phis.safe_push (SSA_NAME_DEF_STMT (first_vect));
 }
   /* Likewise if we couldn't use a single defuse cycle.  */
   else if (ncopies > 1)
 {
   gcc_assert (new_phis.length () == 1);
+  gimple_seq stmts = NULL;
   tree first_vect = PHI_RESULT (new_phis[0]);
-  gassign *new_vec_stmt = NULL;
-  vec_dest = vect_create_destination_var (scalar_dest, vectype);
+  first_vect = gimple_convert (, vectype, first_vect);
   stmt_vec_info next_phi_info = loop_vinfo->lookup_stmt (new_phis[0]);
   for (int k = 1; k < ncopies; ++k)
{
  next_phi_info = STMT_VINFO_RELATED_STMT (next_phi_info);
  tree second_vect = PHI_RESULT (next_phi_info->stmt);
-  tree tem = make_ssa_name (vec_dest, new_vec_stmt);
-  new_vec_stmt = gimple_build_assign (tem, code,
- first_vect, second_vect);
-  gsi_insert_before (_gsi, new_vec_stmt, GSI_SAME_STMT);
- first_vect = tem;
+ second_vect = gimple_convert (, vectype, second_vect);
+ first_vect = gimple_build (, code, vectype,
+first_vect, second_vect);
}
+  gsi_insert_seq_before (_gsi, stmts, GSI_SAME_STMT);
   new_phi_result = first_vect;
   new_phis.truncate (0);
-  new_phis.safe_push (new_vec_stmt);
+  new_phis.safe_push (SSA_NAME_DEF_STMT (first_vect));
 }
   else
 new_phi_result = PHI_RESULT (new_phis[0]);
@@ -4877,13 +4871,14 @@ vect_create_epilog_for_reduction (stmt_v
 in a vector mode of smaller size and first reduce upper/lower
 halves against each 

[committed][AArch64] Remove unused mode iterators

2019-11-08 Thread Richard Sandiford
Tested on aarch64-linux-gnu, applied as r277953.

Richard


2019-11-08  Richard Sandiford  

gcc/
* config/aarch64/iterators.md (SVE_BH, SVE_BHS): Delete.

Index: gcc/config/aarch64/iterators.md
===
--- gcc/config/aarch64/iterators.md 2019-11-08 08:31:15.0 +
+++ gcc/config/aarch64/iterators.md 2019-11-08 08:40:10.662024839 +
@@ -302,12 +302,6 @@ (define_mode_iterator SVE_STRUCT [VNx32Q
  VNx64QI VNx32HI VNx16SI VNx8DI
  VNx32HF VNx16SF VNx8DF])
 
-;; All SVE vector modes that have 8-bit or 16-bit elements.
-(define_mode_iterator SVE_BH [VNx16QI VNx8HI VNx8HF])
-
-;; All SVE vector modes that have 8-bit, 16-bit or 32-bit elements.
-(define_mode_iterator SVE_BHS [VNx16QI VNx8HI VNx4SI VNx8HF VNx4SF])
-
 ;; SVE integer vector modes that have 8-bit, 16-bit or 32-bit elements.
 (define_mode_iterator SVE_BHSI [VNx16QI VNx8HI VNx4SI])
 


[committed][AArch64] Don't handle bswap in aarch64_builtin_vectorized_function

2019-11-08 Thread Richard Sandiford
aarch64_builtin_vectorized_function no longer needs to handle bswap*
since we have internal functions and optabs for all supported cases.

Tested on aarch64-linux-gnu and applied as r277951.

Richard


2019-11-08  Richard Sandiford  

gcc/
* config/aarch64/aarch64-builtins.c
(aarch64_builtin_vectorized_function): Remove bswap handling.

Index: gcc/config/aarch64/aarch64-builtins.c
===
--- gcc/config/aarch64/aarch64-builtins.c   2019-10-22 08:46:57.359355939 
+0100
+++ gcc/config/aarch64/aarch64-builtins.c   2019-11-08 08:36:49.199444569 
+
@@ -1918,29 +1918,6 @@ #define AARCH64_CHECK_BUILTIN_MODE(C, N)
 
return aarch64_builtin_decls[builtin];
   }
-case CFN_BUILT_IN_BSWAP16:
-#undef AARCH64_CHECK_BUILTIN_MODE
-#define AARCH64_CHECK_BUILTIN_MODE(C, N) \
-  (out_mode == N##Imode && out_n == C \
-   && in_mode == N##Imode && in_n == C)
-  if (AARCH64_CHECK_BUILTIN_MODE (4, H))
-   return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOPU_bswapv4hi];
-  else if (AARCH64_CHECK_BUILTIN_MODE (8, H))
-   return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOPU_bswapv8hi];
-  else
-   return NULL_TREE;
-case CFN_BUILT_IN_BSWAP32:
-  if (AARCH64_CHECK_BUILTIN_MODE (2, S))
-   return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOPU_bswapv2si];
-  else if (AARCH64_CHECK_BUILTIN_MODE (4, S))
-   return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOPU_bswapv4si];
-  else
-   return NULL_TREE;
-case CFN_BUILT_IN_BSWAP64:
-  if (AARCH64_CHECK_BUILTIN_MODE (2, D))
-   return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOPU_bswapv2di];
-  else
-   return NULL_TREE;
 default:
   return NULL_TREE;
 }


Re: introduce -fcallgraph-info option

2019-11-08 Thread Richard Biener
On Thu, 7 Nov 2019, Alexandre Oliva wrote:

> On Nov  7, 2019, Richard Biener  wrote:
> 
> > (also raises the question why we have both -dumpbase and -auxbase ...)
> 
> https://gcc.gnu.org/ml/gcc-patches/2002-08/msg00294.html
> 
> This was before -dumpdir, however.
> 
> Here's the current logic for aux_base_name:
> 
> -c or -S with -o [odir/]obase.oext: [odir/]obase
> otherwise, given input [idir/]ibase.iext: ibase
> 
> Whereas the current logic for dump_base_name, once aux_base_name has
> been determined as [auxdir/]auxbase, is:
> 
> given -dumpbase ddir/dbase: ddir/dbase
> otherwise, given -dumpdir ddir and -dumpbase dbase: ddir/dbase
> otherwise, given -dumpbase dbase: [auxdir/]dbase
> otherwise, given -dumpdir ddir: ddir/ibase.iext
> otherwise: [auxdir/]ibase.iext
> 
> Relevant cases to consider: (aux, dump) for each compilation with
> CC='gcc -fstack-usage -fdump-tree-original'
> 
> compiling without -o: (ibase, ibase.iext)
> ex $CC -c srcdir/foo.c srcdir/x/bar.c
> -> foo.o foo.su foo.c.#t.original
>  + bar.o bar.su bar.c.#t.original
> 
> compiling with -o: ([odir/]obase, [odir/]ibase.iext)
> ex $CC -c srcdir/foo.c -o objdir/foobaz.o -Dfoobaz
> -> objdir/foobaz.o objdir/foobaz.su objdir/foo.c.#t.original
> 
> compiling multiple sources with -dumpbase: (ibase, [ddir/]dbase)
> ex $CC -dumpbase outdir/runme.dump -c srcdir/foo.c srcdir/x/bar.c
> -> foo.o foo.su outdir/runme.dump.#t.original
>  + bar.o bar.su outdir/runme.dump.#t.original (dupe)
> 
> compiling and linking with -o: (ibase, ibase.iext)
> ex $CC -o outdir/runme srcdir/foo.c srcdir/x/bar.c
> -> /tmp/temp().o foo.su foo.c.#t.original
>  + /tmp/temp().o bar.su bar.c.#t.original
>  + outdir/runme
> 
> lto-recompiling and linking with -o: (/tmp/obase.temp().ltrans#.ltrans, 
> odir/obase.ltrans#)
> ex $CC -o outdir/runme ltobjdir/foo.o ltobjdir/bar.o -fdump-rtl-expand
> -> /tmp/runme.temp().ltrans0.ltrans.o /tmp/runme.temp().ltrans0.ltrans.su
>  + outdir/runme.ltrans0.#r.expand
>  + outdir/runme
> 
> lto-recompiling and linking without -o: (/tmp/temp().ltrans#.ltrans, 
> /tmp/temp().ltrans#.o)
> ex $CC ltobjdir/foo.o ltobjdir/bar.o -fdump-rtl-expand
> -> /tmp/temp().ltrans0.ltrans.o /tmp/temp().ltrans0.ltrans.su
>  + /tmp/temp().ltrans0.#r.expand
>  + a.out
> 
> 
> If we were to unify auxbase and dumpbase, I'd take the opportunity to
> fix the -o objdir/foobaz.o compilation to output dumps named after
> objdir/foobaz or objdir/foobaz-foo.c rather than ./foo.c; for
> outdir/runme.dump to be used as a prefix for aux and dump names, so that
> we wouldn't create and then overwrite outdir/runme.dump, and so that
> other compilations of foo.c and bar.c wouldn't overwrite the .su files,
> but rather create outdir/runme.dump-{foo,bar}.* dumps and aux files; and
> likewise use outdir/runme.ltrans0 or a.out.ltrans0 for the .su and
> .expand files.
> 
> 
> The logic I suggest is involves combining some of the -auxbase and some
> of the -dumpbase logic, namely:
> 
> In the driver:
> 
> compiling a single source idir/ibase.iext:
> 
>   -o odir/obase.oext specified: default -dumpdir odir -dumpbase obase.iext
>   -o obase.oext specified: default -dumpbase obase.iext
>   -o ibase.oext implied: default -dumpbase ibase.iext
> 
> compiling multiple sources named as ibase.iext for linking:
> 
>   -dumpbase [ddir/]dbase specified: make it -dumpbase [ddir/]dbase-ibase.iext
>   -o odir/output specified: default -dumpdir odir -dumpbase output-ibase.iext
>   -o output specified: default -dumpbase output-ibase.iext
>   -o a.out implied: default -dumpbase a.out-ibase.iext
> 
> LTO recompiling:
> 
>   same as above, with each ibase.iext set to ltrans#
> 
> 
> In the compiler, set dump_base_name to:
> 
> Given -dumpbase ddir/dbase: ddir/dbase
> otherwise, given -dumpdir ddir and -dumpbase dbase: ddir/dbase
> otherwise, given -dumpbase dbase: dbase
> 
> and copy aux_base_name from dump_base_name, but if it ends in .iext,
> drop the extension.
> 
> The resulting behavior (aux_base_name, dump_base_name)
> 
> compiling without -o: (ibase, ibase.iext)  unchanged
> ex $CC -c srcdir/foo.c srcdir/x/bar.c
> -> foo.o foo.su foo.c.#t.original
>  + bar.o bar.su bar.c.#t.original
> 
> compiling with -o: ([odir/]obase, [odir/]obase.iext)
> ex $CC -c srcdir/foo.c -o objdir/foobaz.o -Dfoobaz
> -> objdir/foobaz.o objdir/foobaz.su objdir/foobaz.c.#t.original
> 
> compiling multiple sources with -dumpbase: ([ddir]/dbase, [ddir/]dbase)
> ex $CC -dumpbase outdir/runme.dump -c srcdir/foo.c srcdir/x/bar.c
> -> foo.o outdir/runme.dump-foo.su outdir/runme.dump-foo.c.#t.original
>  + bar.o outdir/runme.dump-bar.su outdir/runme.dump-bar.c.#t.original
> 
> compiling and linking with -o: (outdir/runme-ibase, outdir/runme-ibase.iext)
> ex $CC -o outdir/runme srcdir/foo.c srcdir/x/bar.c
> -> /tmp/temp().o outdir/runme-foo.su outdir/runme-foo.c.#t.original
>  + /tmp/temp().o outdir/runme-bar.su outdir/runme-bar.c.#t.original
>  + outdir/runme
> 
> lto-recompiling and linking with -o: (outdir/runme.ltrans#,