Re: [PATCH] re PR tree-optimization/90883 (Generated code is worse if returned struct is unnamed)

2020-03-05 Thread Kito Cheng
Committed, thanks :)

On Thu, Mar 5, 2020 at 11:42 PM Jeff Law  wrote:

> On Tue, 2020-03-03 at 15:34 +0800, Kito Cheng wrote:
> > After add --param max-inline-insns-size=1 all target will remove the
> > redundant store at dse1, except some targets like AArch64 and MIPS will
> > expand the struct initialization into loop due to CLEAR_RATIO.
> >
> > Tested on cross compiler of riscv32, riscv64, x86, x86_64, mips, mips64,
> > aarch64, nds32 and arm.
> >
> > gcc/testsuite/ChangeLog
> >
> >   PR tree-optimization/90883
> >   * g++.dg/tree-ssa/pr90883.c: Add --param max-inline-insns-size=1.
> >   Add aarch64-*-* mips*-*-* to XFAIL.
> OK from me as well.
> jeff
> >
>
>


Bind to std::equal plumbing in ranges::equal

2020-03-05 Thread François Dumont
I started to work on ranges::equal to find out if what I am trying to do 
is totally silly.


With this patch ranges::equal is in pare with std::equal specializations 
that is to say that it correctly deals with Debug mode or std::deque 
iterators.


Once below patch is in:

https://gcc.gnu.org/ml/libstdc++/2019-12/msg00032.html

We will even be able to call std::__equal_aux1 directly using 
__niter_base to get rid of the Debug safe iterator layer. And even in 
this case get rid of the branch __use_memcmp and leave it to __equal_aux1.


I mainly fear the usage of std::iterator_traits in __equal_aux1 to be a 
problem. Is it in this context of sized_sentinel ?


In addition to testsuite I checked running gdb that it does the right thing.

Ok to commit ?

    libstdc++ Leverage on std::equal plumbing in ranges::equal.

    Benefit from the std::equal plumbing to correctly deal with
    _GLIBCXX_DEBUG mode and std::deque iterators.

    * include/bits/ranges_algobase.h (__equal_fn::operator()):
    Review conditions to call std::__equal_aux.
    * testsuite/25_algorithms/equal/constrained.cc (test04): New.

François


diff --git a/libstdc++-v3/include/bits/ranges_algobase.h b/libstdc++-v3/include/bits/ranges_algobase.h
index 80c9a774301..d4f89cb9fb2 100644
--- a/libstdc++-v3/include/bits/ranges_algobase.h
+++ b/libstdc++-v3/include/bits/ranges_algobase.h
@@ -82,8 +82,6 @@ namespace ranges
 		 _Iter2 __first2, _Sent2 __last2, _Pred __pred = {},
 		 _Proj1 __proj1 = {}, _Proj2 __proj2 = {}) const
   {
-	// TODO: implement more specializations to at least have parity with
-	// std::equal.
 	if constexpr (__detail::__is_normal_iterator<_Iter1>
 		  || __detail::__is_normal_iterator<_Iter2>)
 	  return (*this)(std::__niter_base(std::move(__first1)),
@@ -100,19 +98,24 @@ namespace ranges
 	if (__d1 != __d2)
 	  return false;
 
+	if (!__d1)
+	  return true;
+
+	constexpr bool __is_simple_equal
+	  = (is_same_v<_Pred, ranges::equal_to>
+		 && is_same_v<_Proj1, identity>
+		 && is_same_v<_Proj2, identity>);
+	if constexpr (__is_simple_equal)
+	  {
 		using _ValueType1 = iter_value_t<_Iter1>;
 		using _ValueType2 = iter_value_t<_Iter2>;
 		constexpr bool __use_memcmp
 		  = ((is_integral_v<_ValueType1> || is_pointer_v<_ValueType1>)
-		 && __memcmpable<_Iter1, _Iter2>::__value
-		 && is_same_v<_Pred, ranges::equal_to>
-		 && is_same_v<_Proj1, identity>
-		 && is_same_v<_Proj2, identity>);
+		 && __memcmpable<_Iter1, _Iter2>::__value);
 		if constexpr (__use_memcmp)
-	  {
-		if (const size_t __len = (__last1 - __first1))
-		  return !std::__memcmp(__first1, __first2, __len);
-		return true;
+		  return !std::__memcmp(__first1, __first2, __d1);
+		else
+		  return std::__equal_aux(__first1, __first1 + __d1, __first2);
 	  }
 	else
 	  {
diff --git a/libstdc++-v3/testsuite/25_algorithms/equal/constrained.cc b/libstdc++-v3/testsuite/25_algorithms/equal/constrained.cc
index 231bd8cfeaa..aa27fd195a2 100644
--- a/libstdc++-v3/testsuite/25_algorithms/equal/constrained.cc
+++ b/libstdc++-v3/testsuite/25_algorithms/equal/constrained.cc
@@ -19,6 +19,9 @@
 // { dg-do run { target c++2a } }
 
 #include 
+#include 
+#include 
+
 #include 
 #include 
 
@@ -87,10 +90,21 @@ test03()
   VERIFY( !ranges::equal(x, z) );
 }
 
+void
+test04()
+{
+  std::deque x = { {2}, {2}, {6}, {8}, {10}, {11} };
+  std::deque y = { {2}, {2}, {6}, {8}, {10}, {11} };
+  std::deque z = { {2}, {2}, {6}, {8}, {10}, {12} };
+  VERIFY( ranges::equal(x, y) );
+  VERIFY( !ranges::equal(x, z) );
+}
+
 int
 main()
 {
   test01();
   test02();
   test03();
+  test04();
 }


Bind to std::equal plumbing in ranges::equal

2020-03-05 Thread François Dumont
I started to work on ranges::equal to find out if what I am trying to do 
is totally silly.


With this patch ranges::equal is in pare with std::equal specializations 
that is to say that it correctly deals with Debug mode or std::deque 
iterators.


Once below patch is in:

https://gcc.gnu.org/ml/libstdc++/2019-12/msg00032.html

We will even be able to call std::__equal_aux1 directly using 
__niter_base to get rid of the Debug safe iterator layer. And even in 
this case get rid of the branch __use_memcmp and leave it to __equal_aux1.


In addition to testsuite I checked running gdb that it does the right thing.

Ok to commit ?

    libstdc++ Leverage on std::equal plumbing in ranges::equal.

    Benefit from the std::equal plumbing to correctly deal with
    _GLIBCXX_DEBUG mode and std::deque iterators.

    * include/bits/ranges_algobase.h (__equal_fn::operator()):
    Review conditions to call std::__equal_aux.
    * testsuite/25_algorithms/equal/constrained.cc (test04): New.

François


diff --git a/libstdc++-v3/include/bits/ranges_algobase.h b/libstdc++-v3/include/bits/ranges_algobase.h
index 80c9a774301..d4f89cb9fb2 100644
--- a/libstdc++-v3/include/bits/ranges_algobase.h
+++ b/libstdc++-v3/include/bits/ranges_algobase.h
@@ -82,8 +82,6 @@ namespace ranges
 		 _Iter2 __first2, _Sent2 __last2, _Pred __pred = {},
 		 _Proj1 __proj1 = {}, _Proj2 __proj2 = {}) const
   {
-	// TODO: implement more specializations to at least have parity with
-	// std::equal.
 	if constexpr (__detail::__is_normal_iterator<_Iter1>
 		  || __detail::__is_normal_iterator<_Iter2>)
 	  return (*this)(std::__niter_base(std::move(__first1)),
@@ -100,19 +98,24 @@ namespace ranges
 	if (__d1 != __d2)
 	  return false;
 
+	if (!__d1)
+	  return true;
+
+	constexpr bool __is_simple_equal
+	  = (is_same_v<_Pred, ranges::equal_to>
+		 && is_same_v<_Proj1, identity>
+		 && is_same_v<_Proj2, identity>);
+	if constexpr (__is_simple_equal)
+	  {
 		using _ValueType1 = iter_value_t<_Iter1>;
 		using _ValueType2 = iter_value_t<_Iter2>;
 		constexpr bool __use_memcmp
 		  = ((is_integral_v<_ValueType1> || is_pointer_v<_ValueType1>)
-		 && __memcmpable<_Iter1, _Iter2>::__value
-		 && is_same_v<_Pred, ranges::equal_to>
-		 && is_same_v<_Proj1, identity>
-		 && is_same_v<_Proj2, identity>);
+		 && __memcmpable<_Iter1, _Iter2>::__value);
 		if constexpr (__use_memcmp)
-	  {
-		if (const size_t __len = (__last1 - __first1))
-		  return !std::__memcmp(__first1, __first2, __len);
-		return true;
+		  return !std::__memcmp(__first1, __first2, __d1);
+		else
+		  return std::__equal_aux(__first1, __first1 + __d1, __first2);
 	  }
 	else
 	  {
diff --git a/libstdc++-v3/testsuite/25_algorithms/equal/constrained.cc b/libstdc++-v3/testsuite/25_algorithms/equal/constrained.cc
index 231bd8cfeaa..aa27fd195a2 100644
--- a/libstdc++-v3/testsuite/25_algorithms/equal/constrained.cc
+++ b/libstdc++-v3/testsuite/25_algorithms/equal/constrained.cc
@@ -19,6 +19,9 @@
 // { dg-do run { target c++2a } }
 
 #include 
+#include 
+#include 
+
 #include 
 #include 
 
@@ -87,10 +90,21 @@ test03()
   VERIFY( !ranges::equal(x, z) );
 }
 
+void
+test04()
+{
+  std::deque x = { {2}, {2}, {6}, {8}, {10}, {11} };
+  std::deque y = { {2}, {2}, {6}, {8}, {10}, {11} };
+  std::deque z = { {2}, {2}, {6}, {8}, {10}, {12} };
+  VERIFY( ranges::equal(x, y) );
+  VERIFY( !ranges::equal(x, z) );
+}
+
 int
 main()
 {
   test01();
   test02();
   test03();
+  test04();
 }


Re: [PING PATCH coroutines] Do not strip cleanup_point when promote temporaries out of current stmt

2020-03-05 Thread JunMa

在 2020/3/5 下午10:18, Iain Sandoe 写道:

Hello JunMa,

JunMa  wrote:


Ping


Once again, sorry for taking time to review this.


在 2020/2/27 上午10:18, JunMa 写道:

在 2020/2/11 上午10:14, JunMa 写道:
Kindly ping

Regards
JunMa

Hi
In maybe_promote_captured_temps, the cleanup_point_stmt has been
stripped when handle temporaries captured by reference. However, maybe
there are non-reference temporaries in current stmt which cause ice in
gimpilify pass.

This patch fix this. The testcase comes from cppcoro and is reduced by
creduce.


With current trunk + Bin’s two approved patches.

I see no change in the testcase (lambda-09-capture-object.C) before / 
after the patch

 (it fails for me at -O0 only - in both cases).

please could you check?

As I said at previous mail, this patch fix the ICE in gimpilify pass.

I test with current trunk + Bin's two patches, the testcase passes with 
the patch and fails
without the patch. It also fix co-await-syntax-11.C which caused by same 
ICE.


could you do double check?

Regards
JunMa

thanks
Iain





Re: [PING PATCH coroutines] Set side effects flag for BIND_EXPR which build in maybe_promote_captured_temps

2020-03-05 Thread JunMa

在 2020/3/5 下午9:51, Iain Sandoe 写道:

Hello JunMa,

JunMa  wrote:


Ping


Thanks for your patch(es) and I am sorry this has taken some time to 
review.


(right now, we’re trying to ensure that we have the latest standard 
represented in

 GCC10, so updating to n4849).


在 2020/2/27 上午10:17, JunMa 写道:

在 2020/2/11 上午10:50, JunMa 写道:
Hi
kindly ping~

Regards
JunMa

Hi
As title. in maybe_promote_captured_temps, we promote captured 
temporaries

and co_await_expr into a new BIND_EXPR. As the BIND_EXPR contains
co_await_expr and maybe other function calls, the side effects flag 
should

be set.

This patch fix one mismatch in cppcoro, the testcase comes from 
cppcoro

and is reduced by creduce.


With the following test conditions;

r10-7040-ga2ec7c4aafbcd517
 + the two approved patches by Bin Cheng applied.

 1/ the test case in this patch (lambda-10-co-await-lambda.C) fails 
both with and without the patch.

 2/ the patch regresses one of my local testcases.


The test case fails because of ICE which is fixed by the
[PING PATCH coroutines] Do not strip cleanup_point when promote 
temporaries out of current stmt

This patch fix the runtime mismatch.

The extra regression is co-await-syntax-11.C which comes from Bin's 
patch and also is fixed by that

patch.

Regards
JunMa
So, it appears that the testcase might show a bug - but the fix is not 
the right one for current trunk?


Please could you re-check ?

thanks
Iain





Re: [PATCH][gcc] libgccjit: handle long literals in playback::context::new_string_literal

2020-03-05 Thread David Malcolm
On Mon, 2019-09-02 at 09:16 +, Andrea Corallo wrote:
> Hi all,
> yesterday I've found an interesting bug in libgccjit.
> Seems we have an hard limitation of 200 characters for literal
> strings.
> Attempting to create longer strings lead to ICE during pass_expand
> while performing a sanity check in get_constant_size.
> 
> Tracking down the issue seems the code we have was inspired from
> c-family/c-common.c:c_common_nodes_and_builtins were
> array_domain_type
> is actually defined with a size of 200.
> The comment that follows that point sounded premonitory :) :)
> 
> /* Make a type for arrays of characters.
>With luck nothing will ever really depend on the length of this
>array type.  */
> 
> At least in the current implementation the type is set by
> fix_string_type were the actual string length is taken in account.
> 
> I attach a patch updating the logic accordingly and a new testcase
>  for that.
> 
> make check-jit is passing clean.
> 
> Best Regards
>   Andrea

Sorry about the long delay in reviewing this patch.

> gcc/jit/ChangeLog
> 2019-??-??  Andrea Corallo  
> 
> * jit-playback.h
> (gcc::jit::recording::context m_recording_ctxt): Remove
 ^
 "playback" here
 
> m_char_array_type_node field.

[...]

> @@ -670,9 +669,12 @@ playback::rvalue *
>  playback::context::
>  new_string_literal (const char *value)
>  {
> -  tree t_str = build_string (strlen (value), value);
> -  gcc_assert (m_char_array_type_node);
> -  TREE_TYPE (t_str) = m_char_array_type_node;
> +  /* Compare with c-family/c-common.c: fix_string_type.  */
> +  size_t len = strlen (value);
> +  tree i_type = build_index_type (size_int (len));
> +  tree a_type = build_array_type (char_type_node, i_type);
> +  tree t_str = build_string (len, value);
> +  TREE_TYPE (t_str) = a_type;

This code works with string lengths and string sizes which always
requires a little extra care.  I'd like to see at least a comment
discussing this, as it's not immediately clear to me that we're
correctly handling the NUL terminator here.

Consider the string "foo".
This has strlen == 3, and its size is 4 chars.

build_string's comment says "Note that for a C string literal, LEN
should include the trailing NUL."

However, build_string appears to allocate one byte more than LEN, and
write a NUL in that final position.

fix_string_type has:

  int length = TREE_STRING_LENGTH (value);

where tree.h has:

  /* In C terms, this is sizeof, not strlen.  */
  #define TREE_STRING_LENGTH(NODE) (STRING_CST_CHECK (NODE)-
>string.length)

and:
  nchars = length / charsz;

where for our purposes charsz == 1 and so nchars == length.

fix_string_type has:

  i_type = build_index_type (size_int (nchars - 1));

So I *think* the patch is correct, but there ought to be a comment at
least.

Or maybe use TREE_STRING_LENGTH (t_str) to more closely follow
fix_string_type?

[...]

Also, please add an assertion and comment to the testcase to assert
that the very long string is indeed longer than the previous limit of
200.

Thanks
Dave



Re: [PATCH][gcc] libgccjit: introduce version entry points

2020-03-05 Thread David Malcolm
On Thu, 2020-01-16 at 11:11 +, Andrea Corallo wrote:
> Hi, second version of the patch here cleaning up an unnecessary
> change.
> 
> Does not introduce regressions with make check-jit.
> 
> Andrea
> 
> gcc/jit/ChangeLog
> 2020-??-??  Andrea Corallo  
> 
>   * docs/topics/compatibility.rst (LIBGCCJIT_ABI_13): New ABI tag
>   plus add version paragraph.
>   * libgccjit++.h (namespace gccjit::version): Add new namespace.
>   * libgccjit.c (gcc_jit_version_major, gcc_jit_version_minor)
>   (gcc_jit_version_patchlevel): New functions.
>   * libgccjit.h (LIBGCCJIT_HAVE_gcc_jit_version): New macro.
>   (gcc_jit_version_major, gcc_jit_version_minor)
>   (gcc_jit_version_patchlevel): New functions.
>   * libgccjit.map (LIBGCCJIT_ABI_13) New ABI tag.
> 
> gcc/testsuite/ChangeLog
> 2020-??-??  Andrea Corallo  
> 
>   * jit.dg/test-version.c: New testcase.

[...]

Thanks for the patch; sorry for the delay in reviewing this.

Out of interest, do you have a specific use for this, or is it more
speculative?

> diff --git a/gcc/jit/libgccjit.c b/gcc/jit/libgccjit.c
> index 83055fc297b..572c82f053c 100644
> --- a/gcc/jit/libgccjit.c
> +++ b/gcc/jit/libgccjit.c
> @@ -23,6 +23,7 @@ along with GCC; see the file COPYING3.  If not see
>  #include "coretypes.h"
>  #include "timevar.h"
>  #include "typed-splay-tree.h"
> +#include "cppbuiltin.h"
>  
>  #include "libgccjit.h"
>  #include "jit-recording.h"
> @@ -3175,3 +3176,27 @@ gcc_jit_context_new_rvalue_from_vector 
> (gcc_jit_context *ctxt,
>   as_vec_type,
>   (gcc::jit::recording::rvalue **)elements);
>  }
> +
> +extern int
> +gcc_jit_version_major (void)
> +{
> +  int major, minor, patchlevel;
> +  parse_basever (, , );
> +  return major;
> +}
> +
> +extern int
> +gcc_jit_version_minor (void)
> +{
> +  int major, minor, patchlevel;
> +  parse_basever (, , );
> +  return minor;
> +}
> +
> +extern int
> +gcc_jit_version_patchlevel (void)
> +{
> +  int major, minor, patchlevel;
> +  parse_basever (, , );
> +  return patchlevel;
> +}

My first thought here was that we should have a way to get all three at
once, but it turns out that parse_basever does its own caching
internally.

I don't think the current implementation is thread-safe; parse_basever
has:

  static int s_major = -1, s_minor, s_patchlevel;

  if (s_major == -1)
if (sscanf (BASEVER, "%d.%d.%d", _major, _minor, _patchlevel) != 3)
  {
sscanf (BASEVER, "%d.%d", _major, _minor);
s_patchlevel = 0;
  }

I think there's a race here: if two threads call parse_basever at the
same time, it looks like:
 (1) thread A could set s_major
 (2) thread B could read s_major, find it's set
 (3) thread B could read the uninitialized s_minor
 (4) thread A sets s_minor
and various similar issues.

One fix might be to add a version mutex to libgccjit.c; maybe something
like the following (caveat: I haven't tried compiling this):

/* A mutex around the cached state in parse_basever.
   Ideally this would be within parse_basever, but the mutex is only needed
   by libgccjit.  */

static pthread_mutex_t version_mutex = PTHREAD_MUTEX_INITIALIZER;

struct version_info
{
  /* Default constructor.  Populate via parse_basever,
 guarded by version_mutex.  */
  version_info ()
  {
pthread_mutex_lock (_mutex);
parse_basever (, , );
pthread_mutex_unlock (_mutex);
  }
  
  int major;
  int minor;
  int patchlevel;
};

int
gcc_jit_version_major (void)
{
  version_info vi;
  return vi.major;
}

int
gcc_jit_version_minor (void)
{
  version_info vi;
  return vi.minor;
}

int
gcc_jit_version_patchlevel (void)
{
  version_info vi;
  return vi.patchlevel;
}

Is adding a mutex a performance issue?  How frequently are these going
to be called?  

Alternatively, maybe make these functions take a gcc_jit_context and
cache the version information within the context? (since the API
requires multithreaded programs to use their own locking if threads
share a context)

Or some kind of caching in libgccjit.c?  (perhaps simply by making the
version_info instances above static?  my memory of C++ function-static
init rules and what we can rely on on our minimal compiler is a little
hazy)

> diff --git a/gcc/testsuite/jit.dg/test-version.c 
> b/gcc/testsuite/jit.dg/test-version.c
> new file mode 100644
> index 000..4338a00018b
> --- /dev/null
> +++ b/gcc/testsuite/jit.dg/test-version.c
> @@ -0,0 +1,26 @@
> +#include 
> +#include 
> +
> +#include "libgccjit.h"
> +
> +#include "harness.h"
> +
> +#ifndef LIBGCCJIT_HAVE_gcc_jit_version
> +#error LIBGCCJIT_HAVE_gcc_jit_version was not defined
> +#endif
> +
> +void
> +create_code (gcc_jit_context *ctxt, void *user_data)
> +{
> +  /* Do nothing.  */
> +}
> +
> +void
> +verify_code (gcc_jit_context *ctxt, gcc_jit_result *result)
> +{
> +  if (!gcc_jit_version_major ())
> +fail ("Major version is zero");
> +  /* Minor and patchlevel can be zero.  */
> +  gcc_jit_version_minor ();
> +  gcc_jit_version_patchlevel ();
> +}


Re: [PATCH] [rs6000] Rewrite the declaration of a variable

2020-03-05 Thread binbin

Hi Kewen and Segher,

On 2020/3/6 上午10:02, binbin wrote:



On 2020/3/4 下午6:35, Kewen.Lin wrote:

on 2020/3/4 下午3:24, binbin wrote:

Hi

On 2020/3/4 上午8:33, Segher Boessenkool wrote:

Hi!

On Tue, Mar 03, 2020 at 10:13:56AM -0600, Bin Bin Lv wrote:
Rewrite the declaration of toc_section from the source file 
rs6000.c to its

header file for standardizing the code.



diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 0faf44b..c0a6e86 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -181,7 +181,6 @@ static GTY(()) section *tls_private_data_section;
   static GTY(()) section *read_only_private_data_section;
   static GTY(()) section *sdata2_section;
   -extern GTY(()) section *toc_section;
   section *toc_section = 0;
     /* Describe the vector unit used for modes.  */
diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
index 3844bec..e77a84a 100644
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -2494,6 +2494,7 @@ extern GTY(()) tree 
rs6000_builtin_types[RS6000_BTI_MAX];

   extern GTY(()) tree rs6000_builtin_decls[RS6000_BUILTIN_COUNT];
   extern GTY(()) tree builtin_mode_to_type[MAX_MACHINE_MODE][2];
   extern GTY(()) tree altivec_builtin_mask_for_load;
+extern union GTY(()) section *toc_section;


Why does this add "union"?


Segher



If "union" is not added, it reports error showing unknown type name 
‘section’
in file included from 
../../host-powerpc64le-unknown-linux-gnu/gcc/tm.h:25,

from ../.././libgcc/generic-morestack-thread.c:29:
extern GTY(()) section *toc_section.  Then add "union" to solve this. 
Thanks.




Hi Binbin,

Another try seems to move it into #ifndef USED_FOR_TARGET hunk.
Since "typedef union section section" is guard by #ifndef USED_FOR_TARGET
in coretypes.h.  It can make them consistent.

BR,
Kewen



OK, changed the code.  Bootstrap and regression tests were done on
powerpc64le-linux-gnu (LE) with no regressions.  Thanks for your suggestion.

diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 0faf44b..c0a6e86 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -181,7 +181,6 @@ static GTY(()) section *tls_private_data_section;
 static GTY(()) section *read_only_private_data_section;
 static GTY(()) section *sdata2_section;
 
-extern GTY(()) section *toc_section;
 section *toc_section = 0;
 
 /* Describe the vector unit used for modes.  */
diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
index cd3d054..06c214e 100644
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -2494,6 +2494,9 @@ extern GTY(()) tree 
builtin_mode_to_type[MAX_MACHINE_MODE][2];
 extern GTY(()) tree altivec_builtin_mask_for_load;
 
 #ifndef USED_FOR_TARGET
+
+extern GTY(()) section *toc_section;
+
 /* A C structure for machine-specific, per-function data.
This is added to the cfun structure.  */
 typedef struct GTY(()) machine_function


[PATCH] avoid treating more incompatible redeclarations as builtin-ins [PR94040]

2020-03-05 Thread Martin Sebor

Treating incompatible redeclarations of built-in functions as built-ins
is a problem not just for the middle-end but even for the C front-end
itself, when different parts of it make  different assumptions about
what is and isn't valid.  The test case that is the subject of this
bug report (a GCC 9 and 10 regression) is one such example: it shows
that the attribute format validation assumes the function declaration
the attribute applies to has passed the prerequisite validation.  But
that's not the case when the function is an incompatibly redeclared
built-in where a format attribute's positional argument refers to
parameter of an invalid/nonsensical type.

The attached patch further adjusts the front-end to consider even more
incompatible redeclarations as built-ins: in particular, redeclarations
whose pointer arguments point to incompatible variants of unqualified
types (e.g., char* vs int*, though not char* vs const char*).

Besides avoiding the front-end and some middle-end ICEs, the effect
of the patch is also to diagnose more incompatible redeclarations
of built-ins than before, but fewer invalid calls to such functions
(since they're no longer considered built-ins).  That seems like
an unavoidable trade-off.

Tested on x86_64-linux.  Is this acceptable for GCC 10?  How about 9?

Martin
PR c/94040 - ICE on a call to an invalid redeclaration of strftime

gcc/c/ChangeLog:

	PR c/94040
	* c-decl.c (builtin_structptr_type_count): New constant.
	(match_builtin_function_types): Reject decls that are incompatible
	in types pointed to by pointers.
	(diagnose_mismatched_decls): Adjust comments.

gcc/testsuite/ChangeLog:

	PR c/94040
	* gcc.dg/Wbuiltin-declaration-mismatch-12.c: Relax test to look
	for warning name rather than the exact text.
	* gcc.dg/Wbuiltin-declaration-mismatch-14.c: New test.
	* gcc.dg/Wbuiltin-declaration-mismatch-15.c: New test.
	* gcc.dg/pr62090.c: Prune expected warning.
	* gcc.dg/pr89314.c: Look for warning name rather than text.

diff --git a/gcc/c/c-decl.c b/gcc/c/c-decl.c
index c819fd0d0d5..87a0734b715 100644
--- a/gcc/c/c-decl.c
+++ b/gcc/c/c-decl.c
@@ -1641,13 +1641,17 @@ c_bind (location_t loc, tree decl, bool is_global)
 }
 
 
-/* Stores the first FILE*, const struct tm* etc. argument type (whatever it
-   is) seen in a declaration of a file I/O etc. built-in.  Subsequent
-   declarations of such built-ins are expected to refer to it rather than to
-   fileptr_type_node etc. which is just void* (or to any other type).
+/* Stores the first FILE*, const struct tm* etc. argument type (whatever
+   it is) seen in a declaration of a file I/O etc. built-in, corresponding
+   to the builtin_structptr_types array.  Subsequent declarations of such
+   built-ins are expected to refer to it rather than to fileptr_type_node,
+   etc. which is just void* (or to any other type).
Used only by match_builtin_function_types.  */
 
-static GTY(()) tree last_structptr_types[6];
+static const unsigned builtin_structptr_type_count
+  = sizeof builtin_structptr_types / sizeof builtin_structptr_types[0];
+
+static GTY(()) tree last_structptr_types[builtin_structptr_type_count];
 
 /* Returns true if types T1 and T2 representing return types or types
of function arguments are close enough to be considered interchangeable
@@ -1692,10 +1696,13 @@ match_builtin_function_types (tree newtype, tree oldtype,
   tree newargs = TYPE_ARG_TYPES (newtype);
   tree tryargs = newargs;
 
-  gcc_checking_assert ((sizeof (last_structptr_types)
-			/ sizeof (last_structptr_types[0]))
-		   == (sizeof (builtin_structptr_types)
-			   / sizeof (builtin_structptr_types[0])));
+  const unsigned nlst
+= sizeof last_structptr_types / sizeof last_structptr_types[0];
+  const unsigned nbst
+= sizeof builtin_structptr_types / sizeof builtin_structptr_types[0];
+
+  gcc_checking_assert (nlst == nbst);
+
   for (unsigned i = 1; oldargs || newargs; ++i)
 {
   if (!oldargs
@@ -1710,11 +1717,12 @@ match_builtin_function_types (tree newtype, tree oldtype,
   if (!types_close_enough_to_match (oldtype, newtype))
 	return NULL_TREE;
 
-  unsigned j = (sizeof (builtin_structptr_types)
-		/ sizeof (builtin_structptr_types[0]));
+  unsigned j = nbst;
   if (POINTER_TYPE_P (oldtype))
-	for (j = 0; j < (sizeof (builtin_structptr_types)
-			 / sizeof (builtin_structptr_types[0])); ++j)
+	/* Iterate over well-known struct types like FILE (whose types
+	   aren't known to us) and compare the pointer to each to
+	   the pointer argument.  */
+	for (j = 0; j < nbst; ++j)
 	  {
 	if (TREE_VALUE (oldargs) != builtin_structptr_types[j].node)
 	  continue;
@@ -1734,13 +1742,26 @@ match_builtin_function_types (tree newtype, tree oldtype,
 	  last_structptr_types[j] = newtype;
 	break;
 	  }
-  if (j == (sizeof (builtin_structptr_types)
-		/ sizeof (builtin_structptr_types[0]))
-	  && !*strict
-	  && !comptypes (oldtype, newtype))
+
+  if (j == nbst && !comptypes (oldtype, 

RE: [PATCH PR94026] combine missed opportunity to simplify comparisons with zero

2020-03-05 Thread Yangfei (Felix)
> -Original Message-
> From: Jeff Law [mailto:l...@redhat.com]
> Sent: Thursday, March 5, 2020 11:37 PM
> To: Yangfei (Felix) ; gcc-patches@gcc.gnu.org
> Cc: Zhanghaijian (A) 
> Subject: Re: [PATCH PR94026] combine missed opportunity to simplify
> comparisons with zero
> 
> On Wed, 2020-03-04 at 08:39 +, Yangfei (Felix) wrote:
> > Hi,
> >
> >   This is a simple fix for PR94026.
> >   With this fix, combine will try make an extraction if we are in a
> > equality comparison and this is an AND
> >   with a constant which is power of two minus one.  Shift here should
> > be an constant.  For example, combine
> >   will transform (compare (and (lshiftrt x 8) 6) 0) to (compare
> > (zero_extract (x 2 9)) 0).
> >
> >   Added one test case for this.  Bootstrap and tested on both x86_64
> > and
> > aarch64 Linux platform.
> >   Any suggestion?
> >
> > Thanks,
> > Felix
> >
> > gcc:
> > +2020-03-04  Felix Yang  
> > +
> > +   PR rtl-optimization/94026
> > +   * combine.c (make_compound_operation_int): Make an extraction
> > + if we are in a equality comparison and this is an AND with a
> > + constant which is power of two minus one.
> > +
> >
> > gcc/testsuite:
> > +2020-03-04  Felix Yang  
> > +
> > +   PR rtl-optimization/94026
> > +   * gcc.dg/pr94026.c: New test.
> Just a note.  We're in stage4 of our development cycle, meaning we focus on
> regression bugfixes.  I've queued this for evaluation in gcc-11.
> jeff

Sure, this is intended for 11.  Thanks for doing that : - ) 

Best regards,
Felix


Re: [PATCH] drop weakref attribute on function definitions (PR 92799)

2020-03-05 Thread Jeff Law
On Fri, 2020-02-14 at 15:41 -0700, Martin Sebor wrote:
> Because attribute weakref introduces a kind of a definition, it can
> only be applied to declarations of symbols that are not defined.  GCC
> normally issues a warning when the attribute is applied to a defined
> symbol, but PR 92799 shows that it misses some cases on which it then
> leads to an ICE.
> 
> The ICE was introduced in GCC 4.5.  Prior to then, GCC accepted such
> invalid definitions and silently dropped the weakref attribute.
> 
> The attached patch avoids the ICE while again dropping the invalid
> attribute from the definition, except with the (now) usual warning.
> 
> Tested on x86_64-linux.
> 
> I also looked for code bases that make use of attribute weakref to
> rebuild them as another test but couldn't find any.  (There are
> a couple of instances in the Linux kernel but they look #ifdef'd
> out).  Does anyone know of any that do use it that I could try to
> build on Linux?
So you added this check

... || DECL_INITIAL (decl) != error_mark_node

Do you need to check that DECL_INITIAL is not NULL?  IIUC DECL_INITIAL in this
context is a tri-state.

NULL -- DECL is not a function definition
error_mark_node -- it was a function definition, but the body was free'd
everything else -- the function definition

Jeff
> 



Re: [PATCH][testuite] Fix pr80481.C after epilogue vectorization

2020-03-05 Thread Jeff Law
On Thu, 2019-10-31 at 13:55 +, Andre Vieira (lists) wrote:
> Hi,
> 
> I used to have this testcase in my patch when testing but forgot to 
> include it in the patch I sent upstream.  This testcase checks that a 
> vmovaps isn't generated when vectorizing the loop.  When I turn epilogue 
> vectorization it seems to come back.
> 
> @Jakub: This test has -fopenmp but I debuged through the testcase and 
> loop->simdlen always seems to be 0 for any loop we analyze, so I don't 
> think this is a conflict between my epilogue vectorization and openmp 
> code paths in vect_analyze_loop.
> 
> I suspect the vmovaps is introduced because of the epilogue and it is 
> just a testism, but would like a second opinion.
> 
> Cheers,
> Andre
> 
> gcc/testsuite/ChangeLog:
> 2019-10-31  Andre Vieira  
> 
>  * g++.dg/pr80481.C: Disable epilogue vectorization.
THanks.  I've reviewed the discussion between Jakub and yourself and think this
is fine for the trunk, even at this late stage as it's only a testsuite change. 
I'll push it momentarily.

Jeff

ps.  In case anyone is wondering, I already had this in my tester and I'm trying
to flush out stuff.



c: ignore initializers for elements of variable-size types [PR93577]

2020-03-05 Thread Joseph Myers
Bug 93577, apparently a regression (although it isn't very clear to me
exactly when it was introduced; tests I made with various past
compilers produced inconclusive results, including e.g. ICEs appearing
with 64-bit-host compilers for some versions but not 32-bit-host
compilers for the same versions) is an C front-end tree-checking ICE
processing initializers for structs using the VLA-in-struct extension.
There is an error for such initializers, but other processing that
still takes place for them results in the ICE.

This patch ensures that processing of initializers for variable-size
types stops earlier to avoid the code that results in the ICE (and
ensures it stops earlier for error_mark_node to avoid ICEs in the
check for variable-size types), adjusts the conditions for the "empty
scalar initializer" diagnostic to avoid consequent excess errors in
the case of a bad type name, and adds tests for a few variations on
what such initializers might look like, as well as tests for cases
identified from ICEs seen with an earlier version of this patch.

Bootstrapped with no regressions for x86_64-pc-linux-gnu.  Applied to 
mainline.

gcc/c:
2020-03-05  Joseph Myers  

PR c/93577
* c-typeck.c (pop_init_level): Do not diagnose initializers as
empty when initialized type is error_mark_node.
(set_designator, process_init_element): Ignore initializers for
elements of a variable-size type or of error_mark_node.

gcc/testsuite:
2020-03-05  Joseph Myers  

PR c/93577
* gcc.dg/pr93577-1.c, gcc.dg/pr93577-2.c, gcc.dg/pr93577-3.c,
gcc.dg/pr93577-4.c, gcc.dg/pr93577-5.c, gcc.dg/pr93577-6.c: New
tests.
* gcc.dg/vla-init-1.c: Expect fewer errors about VLA initializer.

diff --git a/gcc/c/c-typeck.c b/gcc/c/c-typeck.c
index 308fcffcfb0..d8025de1996 100644
--- a/gcc/c/c-typeck.c
+++ b/gcc/c/c-typeck.c
@@ -8759,7 +8759,7 @@ pop_init_level (location_t loc, int implicit,
 the element, after verifying there is just one.  */
   if (vec_safe_is_empty (constructor_elements))
{
- if (!constructor_erroneous)
+ if (!constructor_erroneous && constructor_type != error_mark_node)
error_init (loc, "empty scalar initializer");
  ret.value = error_mark_node;
}
@@ -8836,8 +8836,8 @@ set_designator (location_t loc, bool array,
   enum tree_code subcode;
 
   /* Don't die if an entire brace-pair level is superfluous
- in the containing level.  */
-  if (constructor_type == NULL_TREE)
+ in the containing level, or for an erroneous type.  */
+  if (constructor_type == NULL_TREE || constructor_type == error_mark_node)
 return true;
 
   /* If there were errors in this designator list already, bail out
@@ -8845,6 +8845,12 @@ set_designator (location_t loc, bool array,
   if (designator_erroneous)
 return true;
 
+  /* Likewise for an initializer for a variable-size type.  Those are
+ diagnosed in digest_init.  */
+  if (COMPLETE_TYPE_P (constructor_type)
+  && TREE_CODE (TYPE_SIZE (constructor_type)) != INTEGER_CST)
+return true;
+
   if (!designator_depth)
 {
   gcc_assert (!constructor_range_stack);
@@ -9955,8 +9961,14 @@ process_init_element (location_t loc, struct c_expr 
value, bool implicit,
 }
 
   /* Ignore elements of a brace group if it is entirely superfluous
- and has already been diagnosed.  */
-  if (constructor_type == NULL_TREE)
+ and has already been diagnosed, or if the type is erroneous.  */
+  if (constructor_type == NULL_TREE || constructor_type == error_mark_node)
+return;
+
+  /* Ignore elements of an initializer for a variable-size type.
+ Those are diagnosed in digest_init.  */
+  if (COMPLETE_TYPE_P (constructor_type)
+  && TREE_CODE (TYPE_SIZE (constructor_type)) != INTEGER_CST)
 return;
 
   if (!implicit && warn_designated_init && !was_designated
diff --git a/gcc/testsuite/gcc.dg/pr93577-1.c b/gcc/testsuite/gcc.dg/pr93577-1.c
new file mode 100644
index 000..31023d79d99
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr93577-1.c
@@ -0,0 +1,16 @@
+/* Test ICE with variable-size struct initializer: bug 93577.  */
+/* { dg-do compile } */
+/* { dg-options "" } */
+
+void
+f (int c)
+{
+  struct s
+  {
+int x[c];
+struct
+{
+  int z;
+} nest;
+  } v = { 1, 2 }; /* { dg-error "variable-sized object may not be initialized" 
} */
+}
diff --git a/gcc/testsuite/gcc.dg/pr93577-2.c b/gcc/testsuite/gcc.dg/pr93577-2.c
new file mode 100644
index 000..c61589ea670
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr93577-2.c
@@ -0,0 +1,16 @@
+/* Test ICE with variable-size struct initializer: bug 93577.  */
+/* { dg-do compile } */
+/* { dg-options "" } */
+
+void
+f (int c)
+{
+  struct s
+  {
+int x[c];
+struct
+{
+  int a, b;
+} nest;
+  } v = { .nest.b = 1, .nest.a = 2 }; /* { dg-error "variable-sized object may 
not be initialized" } */
+}
diff --git a/gcc/testsuite/gcc.dg/pr93577-3.c 

Re: [PATCH 1/6] i386: Properly encode vector registers in vector move

2020-03-05 Thread Jeff Law
On Sat, 2020-02-29 at 06:16 -0800, H.J. Lu wrote:
> On x86, when AVX and AVX512 are enabled, vector move instructions can
> be encoded with either 2-byte/3-byte VEX (AVX) or 4-byte EVEX (AVX512):
> 
>0: c5 f9 6f d1 vmovdqa %xmm1,%xmm2
>4: 62 f1 fd 08 6f d1   vmovdqa64 %xmm1,%xmm2
> 
> We prefer VEX encoding over EVEX since VEX is shorter.  Also AVX512F
> only supports 512-bit vector moves.  AVX512F + AVX512VL supports 128-bit
> and 256-bit vector moves.  xmm16-xmm31 and ymm16-ymm31 are disallowed in
> 128-bit and 256-bit modes when AVX512VL is disabled.  Mode attributes on
> x86 vector move patterns indicate target preferences of vector move
> encoding.  For scalar register to register move, we can use 512-bit
> vector move instructions to move 32-bit/64-bit scalar if AVX512VL isn't
> available.  With AVX512F and AVX512VL, we should use VEX encoding for
> 128-bit/256-bit vector moves if upper 16 vector registers aren't used.
> This patch adds a function, ix86_output_ssemov, to generate vector moves:
> 
> 1. If zmm registers are used, use EVEX encoding.
> 2. If xmm16-xmm31/ymm16-ymm31 registers aren't used, SSE or VEX encoding
> will be generated.
> 3. If xmm16-xmm31/ymm16-ymm31 registers are used:
>a. With AVX512VL, AVX512VL vector moves will be generated.
>b. Without AVX512VL, xmm16-xmm31/ymm16-ymm31 register to register
>   move will be done with zmm register move.
> 
> There is no need to set mode attribute to XImode explicitly since
> ix86_output_ssemov can properly encode xmm16-xmm31/ymm16-ymm31 registers
> with and without AVX512VL.
> 
> Tested on AVX2 and AVX512 with and without --with-arch=native.
> 
> gcc/
> 
>   PR target/89229
>   PR target/89346
>   * config/i386/i386-protos.h (ix86_output_ssemov): New prototype.
>   * config/i386/i386.c (ix86_get_ssemov): New function.
>   (ix86_output_ssemov): Likewise.
>   * config/i386/sse.md (VMOVE:mov_internal): Call
>   ix86_output_ssemov for TYPE_SSEMOV.  Remove TARGET_AVX512VL
>   check.
>   (*movxi_internal_avx512f): Call ix86_output_ssemov for TYPE_SSEMOV.
>   (*movoi_internal_avx): Call ix86_output_ssemov for TYPE_SSEMOV.
>   Remove ext_sse_reg_operand and TARGET_AVX512VL check.
>   (*movti_internal): Likewise.
>   (*movtf_internal): Call ix86_output_ssemov for TYPE_SSEMOV.
> 
> gcc/testsuite/
> 
>   PR target/89229
>   PR target/89346
>   * gcc.target/i386/avx512vl-vmovdqa64-1.c: Updated.
>   * gcc.target/i386/pr89346.c: New test.
> 
> gcc/testsuite/
> 
>   PR target/89229
>   * gcc.target/i386/pr89229-2a.c: New test.
>   * gcc.target/i386/pr89229-2b.c: Likewise.
>   * gcc.target/i386/pr89229-2c.c: Likewise.
>   * gcc.target/i386/pr89229-3a.c: Likewise.
>   * gcc.target/i386/pr89229-3b.c: Likewise.
>   * gcc.target/i386/pr89229-3c.c: Likewise.
OK.  Let's get this one installed, let the various testers out there chew on it
for a day, then we'll iterate through the rest.

Thanks again for your patience.

jeff
> 



Re: [PATCH 01/10] i386: Properly encode vector registers in vector move

2020-03-05 Thread Jeff Law
On Fri, 2020-02-28 at 18:15 -0800, H.J. Lu wrote:
> On Fri, Feb 28, 2020 at 4:16 PM Jeff Law  wrote:
> > On Thu, 2020-02-27 at 06:50 -0800, H.J. Lu wrote:
> > > How about this?  If it looks OK, I will post the whole patch set.
> > It's better.  I'm guessing the two cases that were previously handled with
> > vextract/vbroadcast aren't supposed to happen?  They're caught here IIUC:
> > 
> > > +  /* NB: To move xmm16-xmm31/ymm16-ymm31 registers without AVX512VL,
> > > + we can only use zmm register move without memory operand.  */
> > > +   if (evex_reg_p
> > > +   && !TARGET_AVX512VL
> > > +   && GET_MODE_SIZE (mode) < 64)
> > > + {
> > > +   if (memory_operand (operands[0], mode)
> > > +|| memory_operand (operands[1], mode))
> > > + gcc_unreachable ();
> > > 
> > 
> > If they truly can't happen, that's fine.  My worry is I don't see changes to
> > the operand predicates or constraints which would avoid this case.   Is it
> > prevented by the mode iterator on the operands?  Again, just want to make
> > sure
> > I understand why the vextract/vbroadcast stuff isn't in the new code.
> 
> There are no GCC testcases to show that they are actually ever used.   That is
> why I removed them and added gcc_unreachable ().
Understood.   

> 
> 4 patches don't require changes in ix86_output_ssemov/ix86_get_ssemov:
> 
> https://gitlab.com/x86-gcc/gcc/-/commit/426f2464abb80b97b8533f9efa15bbe72e6aa888
> https://gitlab.com/x86-gcc/gcc/-/commit/ec5b40d77f7a4424935275f1a7ccedbce83b6f54
> https://gitlab.com/x86-gcc/gcc/-/commit/92fdd98234984f86b66fb5403dd828661cd7999f
> https://gitlab.com/x86-gcc/gcc/-/commit/f8fa5e571caf6740b36d042d631b4ace11683cd7
> 
> I can combine them into a single patch.
That sounds reasonable -- it should be trivial to review.  Then we can work
through the patches that require changes to ix86_output_ssemov.

Thanks for your patience.  I'm juggling a fair amount of stuff right now.

jeff




GCC 9.3 Status Report (2020-03-05)

2020-03-05 Thread Jakub Jelinek
Status
==

The GCC 9 branch is now frozen for blocking regressions and documentation
fixes only, all changes to the branch require a RM approval now.


Quality Data


Priority  #   Change from last report
---   ---
P10
P2  226   -  17
P3   37   +   2
P4  168   -   1
P5   23
---   ---
Total P1-P3 263   -  15
Total   454   -  16


Previous Report
===

https://gcc.gnu.org/ml/gcc/2020-02/msg00238.html



[PATCH] c++: Fix ABI issue with alignas on armv7hl [PR94050]

2020-03-05 Thread Marek Polacek
The static_assert in the following test was failing on armv7hl because
we were disregarding the alignas specifier on Cell.  BaseShape's data
takes up 20B on 32-bit architectures, but we failed to round up its
TYPE_SIZE.  This happens since the

patch: here, in layout_class_type for TenuredCell, we see that the size
of TenuredCell and its CLASSTYPE_AS_BASE match, so we set

  CLASSTYPE_AS_BASE (t) = t;

But while TYPE_USER_ALIGN of TenuredCell was 0, TYPE_USER_ALIGN of its
CLASSTYPE_AS_BASE was 1.  After we replace it, it's no longer 1.  Then
we perform layout_empty_base_or_field for TenuredCell and since
TYPE_USER_ALIGN of its CLASSTYPE_AS_BASE is now 0, we don't do this
adjustment:

  if (CLASSTYPE_USER_ALIGN (type))
{
  rli->record_align = MAX (rli->record_align, CLASSTYPE_ALIGN (type));
  if (warn_packed)
rli->unpacked_align = MAX (rli->unpacked_align, CLASSTYPE_ALIGN (type));
  TYPE_USER_ALIGN (rli->t) = 1;
}

where rli->t is BaseShape.  Then finalize_record_size won't use the
correct rli->record_align and therefore
  /* Round the size up to be a multiple of the required alignment.  */
  TYPE_SIZE (rli->t) = round_up (unpadded_size, TYPE_ALIGN (rli->t));
after this we end up with the wrong size.

Since the original fix was to avoid creating extra copies for LTO
purposes, I think the following fix should be acceptable.

Bootstrapped/regtested on x86_64-linux, ok for trunk?  I verified the
fix on the attached testcase using a --target=armv7hl-linux-gnueabi cross,
but haven't actually run the testsuite.

PR c++/94050 - ABI issue with alignas on armv7hl.
* class.c (layout_class_type): Don't replace a class's
CLASSTYPE_AS_BASE if it is CLASSTYPE_USER_ALIGN.

* g++.dg/abi/align3.C: New test.
---
 gcc/cp/class.c|  4 
 gcc/testsuite/g++.dg/abi/align3.C | 12 
 2 files changed, 16 insertions(+)
 create mode 100644 gcc/testsuite/g++.dg/abi/align3.C

diff --git a/gcc/cp/class.c b/gcc/cp/class.c
index b3787f75d7b..4a17751d70b 100644
--- a/gcc/cp/class.c
+++ b/gcc/cp/class.c
@@ -6705,6 +6705,10 @@ layout_class_type (tree t, tree *virtuals_p)
 
   /* If we didn't end up needing an as-base type, don't use it.  */
   if (CLASSTYPE_AS_BASE (t) != t
+  /* If T's CLASSTYPE_AS_BASE is TYPE_USER_ALIGN, but T is not,
+replacing the as-base type would change CLASSTYPE_USER_ALIGN,
+causing us to lose the user-specified alignment as in PR94050.  */
+  && !CLASSTYPE_USER_ALIGN (t)
   && tree_int_cst_equal (TYPE_SIZE (t),
 TYPE_SIZE (CLASSTYPE_AS_BASE (t
 CLASSTYPE_AS_BASE (t) = t;
diff --git a/gcc/testsuite/g++.dg/abi/align3.C 
b/gcc/testsuite/g++.dg/abi/align3.C
new file mode 100644
index 000..a56693a34b8
--- /dev/null
+++ b/gcc/testsuite/g++.dg/abi/align3.C
@@ -0,0 +1,12 @@
+// PR c++/94050 - ABI issue with alignas on armv7hl.
+// { dg-do compile { target c++11 } }
+
+struct alignas(8) Cell {};
+struct TenuredCell : public Cell {};
+struct BaseShape : public TenuredCell {
+  void *p;
+  unsigned q, r;
+  void *s;
+  __UINTPTR_TYPE__ t;
+};
+static_assert (sizeof (BaseShape) % 8 == 0, "");

base-commit: 2d22ab64c4774d7d30c7e014652b28a13d744aec
-- 
Marek Polacek • Red Hat, Inc. • 300 A St, Boston, MA



Re: [RFA/RFC] [tree-optimization/91890] [P1 Regression] Avoid clobbering useful location in Wrestrict code

2020-03-05 Thread Jeff Law
On Thu, 2020-03-05 at 19:51 +0100, Richard Biener wrote:
> On March 5, 2020 3:55:57 PM GMT+01:00, Jeff Law  wrote:
> > On Thu, 2020-03-05 at 08:51 +0100, Richard Biener wrote:
> > > On Thu, Mar 5, 2020 at 12:49 AM Jeff Law  wrote:
> > > > On Wed, 2020-03-04 at 09:22 -0700, Martin Sebor wrote:
> > > > > I don't remember why the code in -Wrestrict unconditionally
> > overwrites
> > > > > the statement location rather than only when it's not available,
> > but
> > > > > I do remember adding conditional code like in your patch in
> > r277076
> > > > > to deal with missing location on the statement.  So either your
> > fix
> > > > > or something like the hunk below might be the right solution (if
> > we
> > > > > go with the code below, abstracting it into a utility function
> > might
> > > > > be nice).
> > > > So there's several chunks that are fairly similar to what you
> > referenced in
> > > > maybe_warn_pointless_strcmp.  Factoring all of them into a single
> > location is
> > > > pretty easy.
> > > > 
> > > > That also gives us a nice place where we can experiment with "does
> > extraction
> > > > of
> > > > location information from the expression ever help".  The answer
> > is, it
> > > > doesn't,
> > > > at least not within our testsuite when run on x86_64.
> > > > 
> > > > I'm hesitant to remove the code that extracts the location out of
> > the
> > > > expression,
> > > > but could be convinced to do so.
> > > > 
> > > > Thoughts?
> > > 
> > > Using anything but the actual stmt location is prone to end up at
> > random places
> > > due to tree sharing issues, CSE and copy propagation.  Simply
> > consider
> > I'd tend to agree.  My conservatism is due to being in stage4 and not
> > knowing
> > precisely why we have code to extract the location from the operand to
> > begin
> > with.
> > 
> > 
> > > where we happily forward p = [0] to both uses injecting
> > > a "faulty" location.  Well, it's actually the correct location
> > > computing [0] but irrelevant for the actual call.
> > Exactly.
> > 
> > > So the question is why we end up with UNKNOWN_LOCATION
> > > for such call and if why we need to bother emit a diagnostic
> > > at all (and why emitting it for another possibly random location is a
> > good idea
> > > instead of maybe simply emitting it without location).
> > One might argue that scenario should be a gcc_unreachable rather than
> > extracting
> > a likely bogus location.  I'm even more hesitant to do that for gcc-10,
> > but it
> > might sense for gcc-11.
> > 
> > My first inclination would be do do the refactor, but leave in the code
> > that
> > extracts a location from the expression.  We'd close out the regression
> > BZ and
> > open a new one to remove the expression handling bits for gcc-11 (or
> > turn them
> > into a gcc_unreachable)
> > 
> > Does that work for  you Richi?
> 
> We should avoid regressing in other ways of course. Given Martin's followup 
> I'm
> not sure what to do but eventually stop using ‰G and remove the odd expression
> location code? 
Yea, I think that's the conclusion Martin and I came to today as well.  Do the
refactoring now, but leaving in the expression location bits, then remove the
expression location bits in gcc-11 after further testing.

jeff



[pushed 9] c++: Avoid ICE on infinite recursion with concepts.

2020-03-05 Thread Jason Merrill
This was simple enough to backport even though it's concepts.

gcc/cp/ChangeLog
2020-03-05  Jason Merrill  

PR c++/88395
PR c++/93551
* constraint.cc (constraints_satisfied_p): Use push_tinst_level.
---
 gcc/cp/constraint.cc  |  4 
 .../g++.dg/concepts/concepts-pr88395.C| 23 +++
 2 files changed, 27 insertions(+)
 create mode 100644 gcc/testsuite/g++.dg/concepts/concepts-pr88395.C

diff --git a/gcc/cp/constraint.cc b/gcc/cp/constraint.cc
index 9884eb0db50..cbd9c141809 100644
--- a/gcc/cp/constraint.cc
+++ b/gcc/cp/constraint.cc
@@ -2390,7 +2390,11 @@ constraints_satisfied_p (tree decl)
   ci = get_constraints (decl);
 }
 
+  if (!push_tinst_level (decl))
+return true;
   tree eval = satisfy_associated_constraints (ci, args);
+  pop_tinst_level ();
+
   return eval == boolean_true_node;
 }
 
diff --git a/gcc/testsuite/g++.dg/concepts/concepts-pr88395.C 
b/gcc/testsuite/g++.dg/concepts/concepts-pr88395.C
new file mode 100644
index 000..da5834bcc07
--- /dev/null
+++ b/gcc/testsuite/g++.dg/concepts/concepts-pr88395.C
@@ -0,0 +1,23 @@
+// { dg-options "-std=c++17 -fconcepts" }
+
+template 
+concept Concept2 = requires (T t, U u)
+{
+t += u; // { dg-error "template instantiation depth" }
+};
+
+template 
+concept Concept = Concept2 ;
+
+struct S
+{
+template 
+constexpr S& operator += (T o);
+};
+
+constexpr S operator * (S a, S b)
+{
+return a += b;
+}
+
+// { dg-prune-output "compilation terminated" }

base-commit: 128418afd7ee1bad9185eb88a6e224c2faf90792
-- 
2.18.1



[committed] i386: Fix some -O0 avx2intrin.h and xopintrin.h intrinsic macros [PR94046]

2020-03-05 Thread Jakub Jelinek
Hi!

As the testcases show, the macros we have for -O0 for intrinsics that require
constant argument(s) should first cast the argument to the type the -O1+
inline uses and afterwards to whatever type e.g. a builtin needs.
The PR reported one which violated this, and I've grepped for all double-casts
and grepped out from that meaningful casts where the __m{128,256,512}{,d,i}
first cast is cast to same sized __v* type and has the same kind of element
type (float, double, integral).  These 7 macros were using different casts,
and I've double checked them against the inline function types.

Bootstrapped/regtested on x86_64-linux and i686-linux, committed to trunk
and 9.3 as obvious, queued for backporting to 8.5.

2020-03-05  Jakub Jelinek  

PR target/94046
* config/i386/avx2intrin.h (_mm_mask_i32gather_ps): Fix first cast of
SRC and MASK arguments to __m128 from __m128d.
(_mm256_mask_i32gather_ps): Fix first cast of MASK argument to __m256
from __m256d.
(_mm_mask_i64gather_ps): Fix first cast of MASK argument to __m128
from __m128d.
* config/i386/xopintrin.h (_mm_permute2_pd): Fix first cast of C
argument to __m128i from __m128d.
(_mm256_permute2_pd): Fix first cast of C argument to __m256i from
__m256d.
(_mm_permute2_ps): Fix first cast of C argument to __m128i from __m128.
(_mm256_permute2_ps): Fix first cast of C argument to __m256i from
__m256.

* g++.target/i386/pr94046-1.C: New test.
* g++.target/i386/pr94046-2.C: New test.

--- gcc/config/i386/avx2intrin.h.jj 2020-01-12 11:54:36.313414917 +0100
+++ gcc/config/i386/avx2intrin.h2020-03-05 15:22:12.684531786 +0100
@@ -1736,10 +1736,10 @@ _mm256_mask_i64gather_epi32 (__m128i __s
(int)SCALE)
 
 #define _mm_mask_i32gather_ps(SRC, BASE, INDEX, MASK, SCALE)\
-  (__m128) __builtin_ia32_gathersiv4sf ((__v4sf)(__m128d)SRC,   \
+  (__m128) __builtin_ia32_gathersiv4sf ((__v4sf)(__m128)SRC,\
(float const *)BASE, \
(__v4si)(__m128i)INDEX,  \
-   (__v4sf)(__m128d)MASK,   \
+   (__v4sf)(__m128)MASK,\
(int)SCALE)
 
 #define _mm256_i32gather_ps(BASE, INDEX, SCALE)   \
@@ -1754,7 +1754,7 @@ _mm256_mask_i64gather_epi32 (__m128i __s
   (__m256) __builtin_ia32_gathersiv8sf ((__v8sf)(__m256)SRC,   \
(float const *)BASE,\
(__v8si)(__m256i)INDEX, \
-   (__v8sf)(__m256d)MASK,  \
+   (__v8sf)(__m256)MASK,   \
(int)SCALE)
 
 #define _mm_i64gather_ps(BASE, INDEX, SCALE)   \
@@ -1769,7 +1769,7 @@ _mm256_mask_i64gather_epi32 (__m128i __s
   (__m128) __builtin_ia32_gatherdiv4sf ((__v4sf)(__m128)SRC,\
(float const *)BASE, \
(__v2di)(__m128i)INDEX,  \
-   (__v4sf)(__m128d)MASK,   \
+   (__v4sf)(__m128)MASK,\
(int)SCALE)
 
 #define _mm256_i64gather_ps(BASE, INDEX, SCALE)
\
--- gcc/config/i386/xopintrin.h.jj  2020-01-12 11:54:36.336414570 +0100
+++ gcc/config/i386/xopintrin.h 2020-03-05 15:40:31.663241763 +0100
@@ -814,25 +814,25 @@ _mm256_permute2_ps (__m256 __X, __m256 _
 #define _mm_permute2_pd(X, Y, C, I)\
   ((__m128d) __builtin_ia32_vpermil2pd ((__v2df)(__m128d)(X),  \
(__v2df)(__m128d)(Y),   \
-   (__v2di)(__m128d)(C),   \
+   (__v2di)(__m128i)(C),   \
(int)(I)))
 
 #define _mm256_permute2_pd(X, Y, C, I) \
   ((__m256d) __builtin_ia32_vpermil2pd256 ((__v4df)(__m256d)(X),   \
   (__v4df)(__m256d)(Y),\
-  (__v4di)(__m256d)(C),\
+  (__v4di)(__m256i)(C),\
   (int)(I)))
 
 #define _mm_permute2_ps(X, Y, C, I)\
   ((__m128) __builtin_ia32_vpermil2ps ((__v4sf)(__m128)(X),\
   (__v4sf)(__m128)(Y), \
-  (__v4si)(__m128)(C), \
+  (__v4si)(__m128i)(C),\
   (int)(I)))
 
 #define 

Re: [RFA/RFC] [tree-optimization/91890] [P1 Regression] Avoid clobbering useful location in Wrestrict code

2020-03-05 Thread Richard Biener
On March 5, 2020 3:55:57 PM GMT+01:00, Jeff Law  wrote:
>On Thu, 2020-03-05 at 08:51 +0100, Richard Biener wrote:
>> On Thu, Mar 5, 2020 at 12:49 AM Jeff Law  wrote:
>> > On Wed, 2020-03-04 at 09:22 -0700, Martin Sebor wrote:
>> > > I don't remember why the code in -Wrestrict unconditionally
>overwrites
>> > > the statement location rather than only when it's not available,
>but
>> > > I do remember adding conditional code like in your patch in
>r277076
>> > > to deal with missing location on the statement.  So either your
>fix
>> > > or something like the hunk below might be the right solution (if
>we
>> > > go with the code below, abstracting it into a utility function
>might
>> > > be nice).
>> > So there's several chunks that are fairly similar to what you
>referenced in
>> > maybe_warn_pointless_strcmp.  Factoring all of them into a single
>location is
>> > pretty easy.
>> > 
>> > That also gives us a nice place where we can experiment with "does
>extraction
>> > of
>> > location information from the expression ever help".  The answer
>is, it
>> > doesn't,
>> > at least not within our testsuite when run on x86_64.
>> > 
>> > I'm hesitant to remove the code that extracts the location out of
>the
>> > expression,
>> > but could be convinced to do so.
>> > 
>> > Thoughts?
>> 
>> Using anything but the actual stmt location is prone to end up at
>random places
>> due to tree sharing issues, CSE and copy propagation.  Simply
>consider
>I'd tend to agree.  My conservatism is due to being in stage4 and not
>knowing
>precisely why we have code to extract the location from the operand to
>begin
>with.
>
>
>> where we happily forward p = [0] to both uses injecting
>> a "faulty" location.  Well, it's actually the correct location
>> computing [0] but irrelevant for the actual call.
>Exactly.
>
>> 
>> So the question is why we end up with UNKNOWN_LOCATION
>> for such call and if why we need to bother emit a diagnostic
>> at all (and why emitting it for another possibly random location is a
>good idea
>> instead of maybe simply emitting it without location).
>One might argue that scenario should be a gcc_unreachable rather than
>extracting
>a likely bogus location.  I'm even more hesitant to do that for gcc-10,
>but it
>might sense for gcc-11.
>
>My first inclination would be do do the refactor, but leave in the code
>that
>extracts a location from the expression.  We'd close out the regression
>BZ and
>open a new one to remove the expression handling bits for gcc-11 (or
>turn them
>into a gcc_unreachable)
>
>Does that work for  you Richi?

We should avoid regressing in other ways of course. Given Martin's followup I'm 
not sure what to do but eventually stop using ‰G and remove the odd expression 
location code? 

Richard. 

>jeff



Re: [RFA/RFC] [tree-optimization/91890] [P1 Regression] Avoid clobbering useful location in Wrestrict code

2020-03-05 Thread Richard Biener
On March 5, 2020 4:53:43 PM GMT+01:00, Martin Sebor  wrote:
>On 3/5/20 12:51 AM, Richard Biener wrote:
>> On Thu, Mar 5, 2020 at 12:49 AM Jeff Law  wrote:
>>>
>>> On Wed, 2020-03-04 at 09:22 -0700, Martin Sebor wrote:

 I don't remember why the code in -Wrestrict unconditionally
>overwrites
 the statement location rather than only when it's not available,
>but
 I do remember adding conditional code like in your patch in r277076
 to deal with missing location on the statement.  So either your fix
 or something like the hunk below might be the right solution (if we
 go with the code below, abstracting it into a utility function
>might
 be nice).
>>> So there's several chunks that are fairly similar to what you
>referenced in
>>> maybe_warn_pointless_strcmp.  Factoring all of them into a single
>location is
>>> pretty easy.
>>>
>>> That also gives us a nice place where we can experiment with "does
>extraction of
>>> location information from the expression ever help".  The answer is,
>it doesn't,
>>> at least not within our testsuite when run on x86_64.
>>>
>>> I'm hesitant to remove the code that extracts the location out of
>the expression,
>>> but could be convinced to do so.
>>>
>>> Thoughts?
>> 
>> Using anything but the actual stmt location is prone to end up at
>random places
>> due to tree sharing issues, CSE and copy propagation.  Simply
>consider
>> 
>> char one[50];
>> char two[50];
>> 
>> void
>> test_strncat (void)
>> {
>>char *p = one;
>>(void) __builtin_strcpy (p, "gh");
>>(void) __builtin_strcpy (two, "ef");
>> 
>> #pragma GCC diagnostic push
>> #pragma GCC diagnostic ignored "-Wstringop-overflow="
>> #pragma GCC diagnostic ignored "-Warray-bounds"
>>(void) __builtin_strncat (p, two, 99);
>
>Interestingly, while the expression location points to p, the warning
>points to the statement:
>
>warning: ‘__builtin_strncat’ forming offset [50, 98] is out of the 
>bounds [0, 50] of object ‘one’ with type ‘char[50]’ [-Warray-bounds]
>14 |   (void) __builtin_strncat (p, two, 99);
>   |  ^~
>
>As it happens, the %G directive in the warning_at() call replaces
>the location passed to it with that of the Gimple call argument to
>the %G directive.  Removing the %G directive turns the warning into:
>
>warning: ‘__builtin_strncat’ forming offset [50, 98] is out of the 
>bounds [0, 50] of object ‘one’ with type ‘char[50]’ [-Warray-bounds]
> 7 |   char *p = one;
>   | ^~~
>
>But the code that checks the scope of #pragma GCC diagnostic uses
>the original location passed to warning_at, not the location set
>subsequently by the %G directive, and so the two are out of synch.

Ah, I see. 

>We've discussed removing the %G/%K directives before and having
>the diagnostic machinery always print the inlining context instead.
>Let me look into it for GCC 11.
>
>Martin
>
>> #pragma GCC diagnostic pop
>> }
>> 
>> where we happily forward p = [0] to both uses injecting
>> a "faulty" location.  Well, it's actually the correct location
>> computing [0] but irrelevant for the actual call.
>> 
>> So the question is why we end up with UNKNOWN_LOCATION
>> for such call and if why we need to bother emit a diagnostic
>> at all (and why emitting it for another possibly random location is a
>good idea
>> instead of maybe simply emitting it without location).
>> 
>> Richard.
>> 
>>> Jeff



Re: [PATCH 2/1] libstdc++: Add missing friend declaration to join_view::_Sentinel

2020-03-05 Thread Patrick Palka
On Thu, 5 Mar 2020, Jonathan Wakely wrote:

> On 05/03/20 11:24 -0500, Patrick Palka wrote:
> > The converting constructor of join_view::_Sentinel needs to be able to
> > access the private members of join_view::_Sentinel.
> > 
> > libstdc++-v3/ChangeLog:
> > 
> > * include/std/ranges (join_view::_Sentinel<_Const>): Befriend
> > join_view::_Sentinel.
> > * testsuite/std/ranges/adaptors/join.cc: Augment test.
> 
> OK.

Thanks for the review.  Unfortunately the added test fails to compile
without first a fix for PR 93978 (or alternatively, we can add -O0 to
dg-options in join.cc).



Re: [GCC][PATCH][AArch32] ACLE intrinsics bfloat16 vmmla and vfma for AArch32 AdvSIMD

2020-03-05 Thread Kyrill Tkachov



On 3/5/20 11:22 AM, Kyrill Tkachov wrote:

Hi Delia,

On 3/4/20 5:20 PM, Delia Burduv wrote:

Hi,

This is the latest version of the patch.

Thanks,
Delia

On 2/21/20 11:41 AM, Kyrill Tkachov wrote:

Hi Delia,

On 2/19/20 5:23 PM, Delia Burduv wrote:

Hi,

Here is the latest version of the patch. It just has some minor 
formatting changes that were brought up by Richard Sandiford in the 
AArch64 patches


Thanks,
Delia

On 1/31/20 3:23 PM, Delia Burduv wrote:
Here is the updated patch. The changes are minor, so let me know 
if there is anything else to fix or if it can be committed.


Thank you,
Delia

On 1/30/20 2:55 PM, Kyrill Tkachov wrote:

Hi Delia,


On 1/28/20 4:44 PM, Delia Burduv wrote:

Ping.
 


*From:* Delia Burduv 
*Sent:* 22 January 2020 17:26
*To:* gcc-patches@gcc.gnu.org 
*Cc:* ni...@redhat.com ; Richard Earnshaw 
; Ramana Radhakrishnan 
; Kyrylo Tkachov 

*Subject:* Re: [GCC][PATCH][AArch32] ACLE intrinsics bfloat16 
vmmla and vfma for AArch32 AdvSIMD

Ping.

I have read Richard Sandiford's comments on the AArch64 patches 
and I
will apply what is relevant to this patch as well. Particularly, 
I will
change the tests to use the exact input and output registers and 
I will

change the types of the rtl patterns.



Please send the updated patches so that someone can commit them 
for you once they're reviewed.


Thanks,

Kyrill




On 12/20/19 6:44 PM, Delia Burduv wrote:
> This patch adds the ARMv8.6 ACLE intrinsics for vmmla, vfmab 
and vfmat

> as part of the BFloat16 extension.
> (https://developer.arm.com/docs/101028/latest.)
> The intrinsics are declared in arm_neon.h and the RTL patterns 
are

> defined in neon.md.
> Two new tests are added to check assembler output and lane 
indices.

>
> This patch depends on the Arm back-end patche.
> (https://gcc.gnu.org/ml/gcc-patches/2019-12/msg01448.html)
>
> Tested for regression on arm-none-eabi and armeb-none-eabi. I 
don't have
> commit rights, so if this is ok can someone please commit it 
for me?

>
> gcc/ChangeLog:
>
> 2019-11-12� Delia Burduv 
>
>� ����* config/arm/arm_neon.h (vbfmmlaq_f32): New.
>� ����� (vbfmlalbq_f32): New.
>� ����� (vbfmlaltq_f32): New.
>� ����� (vbfmlalbq_lane_f32): New.
>� ����� (vbfmlaltq_lane_f32): New.
>� ������� (vbfmlalbq_laneq_f32): New.
>� ����� (vbfmlaltq_laneq_f32): New.
>� ����* config/arm/arm_neon_builtins.def (vbfmmla): New.
>� ��������� (vbfmab): New.
>� ��������� (vbfmat): New.
>� ��������� (vbfmab_lane): New.
>� ��������� (vbfmat_lane): New.
>� ��������� (vbfmab_laneq): New.
>� ��������� (vbfmat_laneq): New.
>� ���� * config/arm/iterators.md (BF_MA): New int 
iterator.

>� ��������� (bt): New int attribute.
>� ��������� (VQXBF): Copy of VQX with V8BF.
>� ��������� (V_HALF): Added V8BF.
>� ����� * config/arm/neon.md (neon_vbfmmlav8hi): 
New insn.

>� ��������� (neon_vbfmav8hi): New insn.
>� ��������� (neon_vbfma_lanev8hi): New 
insn.
>� ��������� (neon_vbfma_laneqv8hi): New 
expand.
>� ��������� (neon_vget_high): Changed 
iterator to VQXBF.
>� ����* config/arm/unspecs.md (UNSPEC_BFMMLA): New 
UNSPEC.

>� ��������� (UNSPEC_BFMAB): New UNSPEC.
>� ��������� (UNSPEC_BFMAT): New UNSPEC.
>
> 2019-11-12� Delia Burduv 
>
>� ������� * gcc.target/arm/simd/bf16_ma_1.c: 
New test.
>� ������� * gcc.target/arm/simd/bf16_ma_2.c: 
New test.
>� ������� * gcc.target/arm/simd/bf16_mmla_1.c: 
New test.


This looks good, a few minor things though...


diff --git a/gcc/config/arm/arm_neon.h b/gcc/config/arm/arm_neon.h
index 
3c78f435009ab027f92693d00ab5b40960d5419d..81f8008ea6a5fb11eb09f6685ba24bb0c54fb248 
100644

--- a/gcc/config/arm/arm_neon.h
+++ b/gcc/config/arm/arm_neon.h
@@ -18742,6 +18742,64 @@ vcmlaq_rot270_laneq_f32 (float32x4_t __r, 
float32x4_t __a, float32x4_t __b,
 ï¿½ï¿½ return __builtin_neon_vcmla_lane270v4sf (__r, __a, __b, 
__index);

 ï¿½}

+#pragma GCC push_options
+#pragma GCC target ("arch=armv8.2-a+bf16")
+
+__extension__ extern __inline float32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vbfmmlaq_f32 (float32x4_t __r, bfloat16x8_t __a, bfloat16x8_t __b)
+{
+� return __builtin_neon_vbfmmlav8bf (__r, __a, __b);
+}
+
+__extension__ extern __inline float32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vbfmlalbq_f32 (float32x4_t __r, bfloat16x8_t __a, bfloat16x8_t __b)
+{
+� return __builtin_neon_vbfmabv8bf (__r, __a, __b);
+}
+
+__extension__ extern __inline float32x4_t
+__attribute__ 

Re: [GCC][PATCH][AArch64] ACLE intrinsics for BFCVTN, BFCVTN2 (AArch64 AdvSIMD) and BFCVT (AArch64 FP)

2020-03-05 Thread Delia Burduv

Hi,

Here is the latest version of the  patch. That test should now work.

Thanks,
Delia

On 3/5/20 11:06 AM, Richard Sandiford wrote:

Hi,

Thanks for the update and sorry for the slow reply.

When I try the patch locally I get:

FAIL: gcc.target/aarch64/advsimd-intrinsics/bfcvt-nosimd.c   -O0  (test for 
excess errors)
FAIL: gcc.target/aarch64/advsimd-intrinsics/bfcvt-nosimd.c   -O1  (test for 
excess errors)
FAIL: gcc.target/aarch64/advsimd-intrinsics/bfcvt-nosimd.c   -O2  (test for 
excess errors)
FAIL: gcc.target/aarch64/advsimd-intrinsics/bfcvt-nosimd.c   -O2 -flto 
-fno-use-linker-plugin -flto-partition=none  (test for excess errors)
FAIL: gcc.target/aarch64/advsimd-intrinsics/bfcvt-nosimd.c   -O2 -flto 
-fuse-linker-plugin -fno-fat-lto-objects  (test for excess errors)
FAIL: gcc.target/aarch64/advsimd-intrinsics/bfcvt-nosimd.c   -O3 -g  (test for 
excess errors)
FAIL: gcc.target/aarch64/advsimd-intrinsics/bfcvt-nosimd.c   -Og -g  (test for 
excess errors)
FAIL: gcc.target/aarch64/advsimd-intrinsics/bfcvt-nosimd.c   -Os  (test for 
excess errors)

I think that's because:

Delia Burduv  writes:

diff --git a/gcc/config/aarch64/arm_bf16.h b/gcc/config/aarch64/arm_bf16.h
index 
3759c0d1cb449a7f0125cc2a1433127564d66622..fa7080c2953bc3254f01d842a8afef917d469080
 100644
--- a/gcc/config/aarch64/arm_bf16.h
+++ b/gcc/config/aarch64/arm_bf16.h
@@ -27,6 +27,19 @@
  #ifndef _AARCH64_BF16_H_
  #define _AARCH64_BF16_H_
  
+#pragma GCC push_options

+#pragma GCC target ("+nothing+bf16")
+
  typedef __bf16 bfloat16_t;
+typedef float float32_t;
+
+__extension__ extern __inline bfloat16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcvth_bf16_f32 (float32_t __a)
+{
+  return __builtin_aarch64_bfcvtbf (__a);
+}
+
+#pragma GCC pop_options


"+bf16" implicitly enables "+simd", so functions guarded with
"+nothing+bf16" are only available when "+simd" is available.
I think we want "+nothing+bf16+nosimd" instead.  (Haven't tested
that though.)

Very minor, but: it might be clearer to leave the typedefs outside
of the #pragma block.  It doesn't make any difference to the behaviour,
but it emphasises that the typedefs really are available unconditionally.

Looks ready to go otherwise.

Thanks,
Richard

diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def
index d8bb96f8ed60648477f952ea6b88eae67cc9c921..cc0bd0e6b592528e4b1559e9a3f5b0153511dffd 100644
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -714,3 +714,9 @@
   VAR1 (TERNOP, simd_smmla, 0, v16qi)
   VAR1 (TERNOPU, simd_ummla, 0, v16qi)
   VAR1 (TERNOP_SSUS, simd_usmmla, 0, v16qi)
+
+  /* Implemented by aarch64_bfcvtn{q}{2}  */
+  VAR1 (UNOP, bfcvtn, 0, v4bf)
+  VAR1 (UNOP, bfcvtn_q, 0, v8bf)
+  VAR1 (BINOP, bfcvtn2, 0, v8bf)
+  VAR1 (UNOP, bfcvt, 0, bf)
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 89aaf8c018e3340dd2d53fc2a6538d3d1220b103..035f3163223d0b618fa28beb007f2f70c7d6c060 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -7207,3 +7207,32 @@
   "mmla\\t%0.4s, %2.16b, %3.16b"
   [(set_attr "type" "neon_mla_s_q")]
 )
+
+;; bfcvtn
+(define_insn "aarch64_bfcvtn"
+  [(set (match_operand:V4SF_TO_BF 0 "register_operand" "=w")
+(unspec:V4SF_TO_BF [(match_operand:V4SF 1 "register_operand" "w")]
+UNSPEC_BFCVTN))]
+  "TARGET_BF16_SIMD"
+  "bfcvtn\\t%0.4h, %1.4s"
+  [(set_attr "type" "neon_fp_cvt_narrow_s_q")]
+)
+
+(define_insn "aarch64_bfcvtn2v8bf"
+  [(set (match_operand:V8BF 0 "register_operand" "=w")
+(unspec:V8BF [(match_operand:V8BF 1 "register_operand" "0")
+  (match_operand:V4SF 2 "register_operand" "w")]
+  UNSPEC_BFCVTN2))]
+  "TARGET_BF16_SIMD"
+  "bfcvtn2\\t%0.8h, %2.4s"
+  [(set_attr "type" "neon_fp_cvt_narrow_s_q")]
+)
+
+(define_insn "aarch64_bfcvtbf"
+  [(set (match_operand:BF 0 "register_operand" "=w")
+(unspec:BF [(match_operand:SF 1 "register_operand" "w")]
+UNSPEC_BFCVT))]
+  "TARGET_BF16_FP"
+  "bfcvt\\t%h0, %s1"
+  [(set_attr "type" "f_cvt")]
+)
diff --git a/gcc/config/aarch64/arm_bf16.h b/gcc/config/aarch64/arm_bf16.h
index 3759c0d1cb449a7f0125cc2a1433127564d66622..984875dcc014300c489209c11abf41b1c47b7fbe 100644
--- a/gcc/config/aarch64/arm_bf16.h
+++ b/gcc/config/aarch64/arm_bf16.h
@@ -28,5 +28,18 @@
 #define _AARCH64_BF16_H_
 
 typedef __bf16 bfloat16_t;
+typedef float float32_t;
+
+#pragma GCC push_options
+#pragma GCC target ("+nothing+bf16+nosimd")
+
+__extension__ extern __inline bfloat16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcvth_bf16_f32 (float32_t __a)
+{
+  return __builtin_aarch64_bfcvtbf (__a);
+}
+
+#pragma GCC pop_options
 
 #endif
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index 

[PATCH] arm: Fix incorrect modes with 'borrow' operations [PR90311]

2020-03-05 Thread Richard Earnshaw

Looking through the arm backend I noticed that the modes used to pass
comparison types into subtract-with-carry operations were being
incorrectly set.  The result is that the compiler is not truly
self-consistent.  To clean this up I've introduced a new predicate,
arm_borrow_operation (borrowed from the AArch64 backend) which can
match the comparison type with the required mode and then fixed all
the patterns to use this.  The split patterns that were generating
incorrect modes have all obviously been fixed as well.

The basic rule for the use of a borrow is:
- if the condition code was set by a 'subtract-like' operation (subs, cmp),
  then use CCmode and LTU.
- if the condition code was by unsigned overflow of addition (adds), then
  use CC_Cmode and GEU.

gcc:
PR target/90311
Backport from master
* config/arm/predicates.md (arm_borrow_operation): New predicate.
* config/arm/arm.c (subdi3_compare1): Use CCmode for the split.
(arm_subdi3, subdi_di_zesidi, subdi_di_sesidi): Likewise.
(subdi_zesidi_zesidi): Likewise.
(negdi2_compare, negdi2_insn): Likewise.
(negdi_extensidi): Likewise.
(negdi_zero_extendsidi): Likewise.
(arm_cmpdi_insn): Likewise.
(subsi3_carryin): Use arm_borrow_operation.
(subsi3_carryin_const): Likewise.
(subsi3_carryin_const0): Likewise.
(subsi3_carryin_compare): Likewise.
(subsi3_carryin_compare_const): Likewise.
(subsi3_carryin_compare_const0): Likewise.
(subsi3_carryin_shift): Likewise.
(rsbsi3_carryin_shift): Likewise.
(negsi2_carryin_compare): Likewise.

gcc/testsuite:
2020-03-05  Jakub Jelinek  

Backport from master
PR target/90311
* gcc.c-torture/execute/pr90311.c: New test.
---
 gcc/config/arm/arm.md | 99 ++-
 gcc/config/arm/predicates.md  | 21 
 gcc/testsuite/gcc.c-torture/execute/pr90311.c | 16 +++
 3 files changed, 87 insertions(+), 49 deletions(-)
 create mode 100644 gcc/testsuite/gcc.c-torture/execute/pr90311.c

diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index efa2d31317b..53e54874c12 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -1119,7 +1119,7 @@ (define_insn_and_split "subdi3_compare1"
(parallel [(set (reg:CC CC_REGNUM)
 		   (compare:CC (match_dup 4) (match_dup 5)))
 	 (set (match_dup 3) (minus:SI (minus:SI (match_dup 4) (match_dup 5))
-			   (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0])]
+			   (ltu:SI (reg:CC CC_REGNUM) (const_int 0])]
   {
 operands[3] = gen_highpart (SImode, operands[0]);
 operands[0] = gen_lowpart (SImode, operands[0]);
@@ -1150,7 +1150,7 @@ (define_insn "*subsi3_carryin"
   [(set (match_operand:SI 0 "s_register_operand" "=r,r,r")
 	(minus:SI (minus:SI (match_operand:SI 1 "reg_or_int_operand" "r,I,Pz")
 			(match_operand:SI 2 "s_register_operand" "r,r,r"))
-		  (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0]
+		  (match_operand:SI 3 "arm_borrow_operation" "")))]
   "TARGET_32BIT"
   "@
sbc%?\\t%0, %1, %2
@@ -1164,9 +1164,10 @@ (define_insn "*subsi3_carryin"
 
 (define_insn "*subsi3_carryin_const"
   [(set (match_operand:SI 0 "s_register_operand" "=r")
-(minus:SI (plus:SI (match_operand:SI 1 "s_register_operand" "r")
-   (match_operand:SI 2 "arm_neg_immediate_operand" "L"))
-  (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0]
+	(minus:SI (plus:SI
+		   (match_operand:SI 1 "s_register_operand" "r")
+		   (match_operand:SI 2 "arm_neg_immediate_operand" "L"))
+		  (match_operand:SI 3 "arm_borrow_operation" "")))]
   "TARGET_32BIT"
   "sbc\\t%0, %1, #%n2"
   [(set_attr "conds" "use")
@@ -1175,8 +1176,8 @@ (define_insn "*subsi3_carryin_const"
 
 (define_insn "*subsi3_carryin_const0"
   [(set (match_operand:SI 0 "s_register_operand" "=r")
-(minus:SI (match_operand:SI 1 "s_register_operand" "r")
-  (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0]
+	(minus:SI (match_operand:SI 1 "s_register_operand" "r")
+		  (match_operand:SI 2 "arm_borrow_operation" "")))]
   "TARGET_32BIT"
   "sbc\\t%0, %1, #0"
   [(set_attr "conds" "use")
@@ -1185,12 +1186,11 @@ (define_insn "*subsi3_carryin_const0"
 
 (define_insn "*subsi3_carryin_compare"
   [(set (reg:CC CC_REGNUM)
-(compare:CC (match_operand:SI 1 "s_register_operand" "r")
-(match_operand:SI 2 "s_register_operand" "r")))
+	(compare:CC (match_operand:SI 1 "s_register_operand" "r")
+		(match_operand:SI 2 "s_register_operand" "r")))
(set (match_operand:SI 0 "s_register_operand" "=r")
-(minus:SI (minus:SI (match_dup 1)
-(match_dup 2))
-  (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0]
+	(minus:SI (minus:SI (match_dup 1) (match_dup 2))
+		  (match_operand:SI 3 "arm_borrow_operation" "")))]
   "TARGET_32BIT"
   "sbcs\\t%0, %1, %2"
   [(set_attr "conds" "set")
@@ 

[committed] libstdc++: Fix some warnings in filesystem tests

2020-03-05 Thread Jonathan Wakely
There's a -Wunused-but-set-variable warning in operations/all.cc which
can be fixed with [[maybe_unused]].

The statements in operations/copy.cc give -Wunused-value warnings. I
think I meant to use |= rather than !=.

And operations/file_size.cc gets -Wsign-compare warnings.

* testsuite/27_io/filesystem/operations/all.cc: Mark unused variable.
* testsuite/27_io/filesystem/operations/copy.cc: Fix typo.
* testsuite/experimental/filesystem/operations/copy.cc: Likewise.
* testsuite/27_io/filesystem/operations/file_size.cc: Use correct type
for return value, and in comparison.
* testsuite/experimental/filesystem/operations/file_size.cc: Likewise.

Tested powerpc64le-linux, committed to master.


commit 9412b35affa46a18f7e657fb449b449ac9a4a599
Author: Jonathan Wakely 
Date:   Thu Mar 5 17:21:24 2020 +

libstdc++: Fix some warnings in filesystem tests

There's a -Wunused-but-set-variable warning in operations/all.cc which
can be fixed with [[maybe_unused]].

The statements in operations/copy.cc give -Wunused-value warnings. I
think I meant to use |= rather than !=.

And operations/file_size.cc gets -Wsign-compare warnings.

* testsuite/27_io/filesystem/operations/all.cc: Mark unused 
variable.
* testsuite/27_io/filesystem/operations/copy.cc: Fix typo.
* testsuite/experimental/filesystem/operations/copy.cc: Likewise.
* testsuite/27_io/filesystem/operations/file_size.cc: Use correct 
type
for return value, and in comparison.
* testsuite/experimental/filesystem/operations/file_size.cc: 
Likewise.

diff --git a/libstdc++-v3/testsuite/27_io/filesystem/operations/all.cc 
b/libstdc++-v3/testsuite/27_io/filesystem/operations/all.cc
index a53c777e522..982f8626cad 100644
--- a/libstdc++-v3/testsuite/27_io/filesystem/operations/all.cc
+++ b/libstdc++-v3/testsuite/27_io/filesystem/operations/all.cc
@@ -39,7 +39,7 @@ main()
   const std::filesystem::perm_options permopts{};
   std::filesystem::space_info sp;
   std::error_code ec;
-  bool b;
+  bool b [[maybe_unused]];
   std::uintmax_t size;
 
   std::filesystem::absolute(p);
diff --git a/libstdc++-v3/testsuite/27_io/filesystem/operations/copy.cc 
b/libstdc++-v3/testsuite/27_io/filesystem/operations/copy.cc
index 69fa693fcb3..f7aac8a302f 100644
--- a/libstdc++-v3/testsuite/27_io/filesystem/operations/copy.cc
+++ b/libstdc++-v3/testsuite/27_io/filesystem/operations/copy.cc
@@ -57,7 +57,7 @@ test01()
   VERIFY( !exists(to) );
 
   ec.clear();
-  opts != fs::copy_options::recursive;
+  opts |= fs::copy_options::recursive;
   fs::copy("/", to, opts, ec);
   VERIFY( ec == std::make_error_code(std::errc::is_a_directory) );
   VERIFY( !exists(to) );
diff --git a/libstdc++-v3/testsuite/27_io/filesystem/operations/file_size.cc 
b/libstdc++-v3/testsuite/27_io/filesystem/operations/file_size.cc
index 4aa0ba1ec43..fecd4fb2696 100644
--- a/libstdc++-v3/testsuite/27_io/filesystem/operations/file_size.cc
+++ b/libstdc++-v3/testsuite/27_io/filesystem/operations/file_size.cc
@@ -29,9 +29,9 @@ void
 test01()
 {
   std::error_code ec;
-  size_t size = fs::file_size(".", ec);
+  auto size = fs::file_size(".", ec);
   VERIFY( ec == std::errc::is_a_directory );
-  VERIFY( size == -1 );
+  VERIFY( size == (std::uintmax_t)-1 );
 
   try {
 size = fs::file_size(".");
@@ -40,7 +40,7 @@ test01()
 ec = e.code();
   }
   VERIFY( ec == std::errc::is_a_directory );
-  VERIFY( size == -1 );
+  VERIFY( size == (std::uintmax_t)-1 );
 }
 
 void
@@ -49,9 +49,9 @@ test02()
   fs::path p = __gnu_test::nonexistent_path();
 
   std::error_code ec;
-  size_t size = fs::file_size(p, ec);
+  auto size = fs::file_size(p, ec);
   VERIFY( ec );
-  VERIFY( size == -1 );
+  VERIFY( size == (std::uintmax_t)-1 );
 
   try {
 size = fs::file_size(p);
@@ -60,7 +60,7 @@ test02()
 ec = e.code();
   }
   VERIFY( ec );
-  VERIFY( size == -1 );
+  VERIFY( size == (std::uintmax_t)-1 );
 }
 
 int
diff --git a/libstdc++-v3/testsuite/experimental/filesystem/operations/copy.cc 
b/libstdc++-v3/testsuite/experimental/filesystem/operations/copy.cc
index e1bda0ffb6f..e1e6d1dcc15 100644
--- a/libstdc++-v3/testsuite/experimental/filesystem/operations/copy.cc
+++ b/libstdc++-v3/testsuite/experimental/filesystem/operations/copy.cc
@@ -57,7 +57,7 @@ test01()
   VERIFY( !exists(to) );
 
   ec.clear();
-  opts != fs::copy_options::recursive;
+  opts |= fs::copy_options::recursive;
   fs::copy("/", to, opts, ec);
   VERIFY( ec == std::make_error_code(std::errc::is_a_directory) );
   VERIFY( !exists(to) );
diff --git 
a/libstdc++-v3/testsuite/experimental/filesystem/operations/file_size.cc 
b/libstdc++-v3/testsuite/experimental/filesystem/operations/file_size.cc
index f609da7f395..ea4df63f55b 100644
--- a/libstdc++-v3/testsuite/experimental/filesystem/operations/file_size.cc
+++ b/libstdc++-v3/testsuite/experimental/filesystem/operations/file_size.cc
@@ -29,9 +29,9 @@ 

Re: [PATCH 2/1] libstdc++: Add missing friend declaration to join_view::_Sentinel

2020-03-05 Thread Jonathan Wakely

On 05/03/20 11:24 -0500, Patrick Palka wrote:

The converting constructor of join_view::_Sentinel needs to be able to
access the private members of join_view::_Sentinel.

libstdc++-v3/ChangeLog:

* include/std/ranges (join_view::_Sentinel<_Const>): Befriend
join_view::_Sentinel.
* testsuite/std/ranges/adaptors/join.cc: Augment test.


OK.



[PATCH] generate EH info for volatile asm statements (PR inline-asm/93981)

2020-03-05 Thread J.W. Jagersma
The following patch extends the generation of exception handling
information to cover volatile asms too.  This was already mostly
implemented, and only very minor changes are required in order to make
it work.

The change in rewrite_stmt is necessary because it inserts debug
statements after the asm, and this causes the gimple verification pass
to fail.  This code is copied from maybe_register_def which does the
same thing.  Alternatively the verification routines could be made to
ignore debug statements at the end of a block.

gcc/
2020-03-05  Jan W. Jagersma  

PR inline-asm/93981
* tree-cfg.c (make_edges_bb): Call make_eh_edges for case
GIMPLE_ASM.
* tree-eh.c (lower_eh_constructs_2): Add case for GIMPLE_ASM.
* tree-into-ssa.c (rewrite_stmt): For bb-ending stmts, insert
debug notes on the fallthrough edge.  Code and comments copied
verbatim from maybe_register_def.
---
 gcc/tree-cfg.c  |  2 ++
 gcc/tree-eh.c   |  3 +++
 gcc/tree-into-ssa.c | 38 +-
 3 files changed, 42 insertions(+), 1 deletion(-)

diff --git a/gcc/tree-cfg.c b/gcc/tree-cfg.c
index f7b817d94e6..c21a7978493 100644
--- a/gcc/tree-cfg.c
+++ b/gcc/tree-cfg.c
@@ -913,6 +913,8 @@ make_edges_bb (basic_block bb, struct omp_region 
**pcur_region, int *pomp_index)
   break;
 
 case GIMPLE_ASM:
+  if (stmt_can_throw_internal (cfun, last))
+   make_eh_edges (last);
   make_gimple_asm_edges (bb);
   fallthru = true;
   break;
diff --git a/gcc/tree-eh.c b/gcc/tree-eh.c
index 2a409dcaffe..8314db00922 100644
--- a/gcc/tree-eh.c
+++ b/gcc/tree-eh.c
@@ -2077,6 +2077,9 @@ lower_eh_constructs_2 (struct leh_state *state, 
gimple_stmt_iterator *gsi)
DECL_GIMPLE_REG_P (tmp) = 1;
  gsi_insert_after (gsi, s, GSI_SAME_STMT);
}
+  /* FALLTHRU */
+
+case GIMPLE_ASM:
   /* Look for things that can throw exceptions, and record them.  */
   if (state->cur_region && stmt_could_throw_p (cfun, stmt))
{
diff --git a/gcc/tree-into-ssa.c b/gcc/tree-into-ssa.c
index 6528acac31a..03bc1d52cfa 100644
--- a/gcc/tree-into-ssa.c
+++ b/gcc/tree-into-ssa.c
@@ -1415,7 +1415,43 @@ rewrite_stmt (gimple_stmt_iterator *si)
if (tracked_var)
  {
gimple *note = gimple_build_debug_bind (tracked_var, name, stmt);
-   gsi_insert_after (si, note, GSI_SAME_STMT);
+   /* If stmt ends the bb, insert the debug stmt on the single
+  non-EH edge from the stmt.  */
+   if (gsi_one_before_end_p (*si) && stmt_ends_bb_p (stmt))
+ {
+   basic_block bb = gsi_bb (*si);
+   edge_iterator ei;
+   edge e, ef = NULL;
+   FOR_EACH_EDGE (e, ei, bb->succs)
+ if (!(e->flags & EDGE_EH))
+   {
+ gcc_checking_assert (!ef);
+ ef = e;
+   }
+   /* If there are other predecessors to ef->dest, then
+  there must be PHI nodes for the modified
+  variable, and therefore there will be debug bind
+  stmts after the PHI nodes.  The debug bind notes
+  we'd insert would force the creation of a new
+  block (diverging codegen) and be redundant with
+  the post-PHI bind stmts, so don't add them.
+
+  As for the exit edge, there wouldn't be redundant
+  bind stmts, but there wouldn't be a PC to bind
+  them to either, so avoid diverging the CFG.  */
+   if (ef && single_pred_p (ef->dest)
+   && ef->dest != EXIT_BLOCK_PTR_FOR_FN (cfun))
+ {
+   /* If there were PHI nodes in the node, we'd
+  have to make sure the value we're binding
+  doesn't need rewriting.  But there shouldn't
+  be PHI nodes in a single-predecessor block,
+  so we just add the note.  */
+   gsi_insert_on_edge_immediate (ef, note);
+ }
+ }
+   else
+ gsi_insert_after (si, note, GSI_SAME_STMT);
  }
   }
 }
-- 
2.25.1



[committed] libstdc++: allow string_view insertion to work with (PR 94051)

2020-03-05 Thread Jonathan Wakely
I don't think this is actually required to compile, because using
operator<< without a definition of the ostream doesn't seem valid to me.
But it's easy to make it work.

PR libstdc++/94051
* include/std/string_view: Include .
* testsuite/21_strings/basic_string_view/inserters/94051.cc: New test.

Tested powerpc64le-linux, committed to master.


commit 6a7052832a4791047b0ff4f18dcbb55c93a7336c
Author: Jonathan Wakely 
Date:   Thu Mar 5 16:46:28 2020 +

libstdc++: allow string_view insertion to work with  (PR 94051)

I don't think this is actually required to compile, because using
operator<< without a definition of the ostream doesn't seem valid to me.
But it's easy to make it work.

PR libstdc++/94051
* include/std/string_view: Include .
* testsuite/21_strings/basic_string_view/inserters/94051.cc: New 
test.

diff --git a/libstdc++-v3/include/std/string_view 
b/libstdc++-v3/include/std/string_view
index ea5d547e006..99a81bb04fa 100644
--- a/libstdc++-v3/include/std/string_view
+++ b/libstdc++-v3/include/std/string_view
@@ -42,6 +42,7 @@
 #include 
 #include 
 #include 
+#include 
 
 namespace std _GLIBCXX_VISIBILITY(default)
 {
diff --git 
a/libstdc++-v3/testsuite/21_strings/basic_string_view/inserters/94051.cc 
b/libstdc++-v3/testsuite/21_strings/basic_string_view/inserters/94051.cc
new file mode 100644
index 000..326669133d7
--- /dev/null
+++ b/libstdc++-v3/testsuite/21_strings/basic_string_view/inserters/94051.cc
@@ -0,0 +1,28 @@
+// Copyright (C) 2020 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING3.  If not see
+// .
+
+// { dg-options "-std=gnu++17" }
+// { dg-do compile { target c++17 } }
+
+#include 
+#include 
+
+void
+test01(std::ostream& os, std::string_view sv)
+{
+  os << sv;
+}


Re: ACLE intrinsics: BFloat16 load intrinsics for AArch32

2020-03-05 Thread Delia Burduv

Hi,

This is the latest version of the patch. I am forcing -mfloat-abi=hard 
because the code generated is slightly differently depending on the 
float-abi used.


Thanks,
Delia

On 3/4/20 5:20 PM, Kyrill Tkachov wrote:

Hi Delia,

On 3/4/20 2:05 PM, Delia Burduv wrote:

Hi,

The previous version of this patch shared part of its code with the
store intrinsics patch
(https://gcc.gnu.org/ml/gcc-patches/2020-03/msg00145.html) so I removed
any duplicated code. This patch now depends on the previously mentioned
store intrinsics patch.

Here is the latest version and the updated ChangeLog.

gcc/ChangeLog:

2019-03-04  Delia Burduv  

    * config/arm/arm_neon.h (bfloat16_t): New typedef.
 (vld2_bf16): New.
    (vld2q_bf16): New.
    (vld3_bf16): New.
    (vld3q_bf16): New.
    (vld4_bf16): New.
    (vld4q_bf16): New.
    (vld2_dup_bf16): New.
    (vld2q_dup_bf16): New.
 (vld3_dup_bf16): New.
    (vld3q_dup_bf16): New.
    (vld4_dup_bf16): New.
    (vld4q_dup_bf16): New.
 * config/arm/arm_neon_builtins.def
 (vld2): Changed to VAR13 and added v4bf, v8bf
 (vld2_dup): Changed to VAR8 and added v4bf, v8bf
 (vld3): Changed to VAR13 and added v4bf, v8bf
 (vld3_dup): Changed to VAR8 and added v4bf, v8bf
 (vld4): Changed to VAR13 and added v4bf, v8bf
 (vld4_dup): Changed to VAR8 and added v4bf, v8bf
 * config/arm/iterators.md (VDXBF): New iterator.
 (VQ2BF): New iterator.
 *config/arm/neon.md (vld2): Used new iterators.
 (vld2_dup): Used new iterators.
 (vld2_dupv8bf): New.
 (vst3): Used new iterators.
 (vst3qa): Used new iterators.
 (vst3qb): Used new iterators.
 (vld3_dup): Used new iterators.
 (vld3_dupv8bf): New.
 (vst4): Used new iterators.
 (vst4qa): Used new iterators.
 (vst4qb): Used new iterators.
 (vld4_dup): Used new iterators.
 (vld4_dupv8bf): New.


gcc/testsuite/ChangeLog:

2019-03-04  Delia Burduv  

    * gcc.target/arm/simd/bf16_vldn_1.c: New test.

Thanks,
Delia

On 2/19/20 5:25 PM, Delia Burduv wrote:
>
> Hi,
>
> Here is the latest version of the patch. It just has some minor
> formatting changes that were brought up by Richard Sandiford in the
> AArch64 patches
>
> Thanks,
> Delia
>
> On 1/22/20 5:31 PM, Delia Burduv wrote:
>> Ping.
>>
>> I will change the tests to use the exact input and output registers as
>> Richard Sandiford suggested for the AArch64 patches.
>>
>> On 12/20/19 6:48 PM, Delia Burduv wrote:
>>> This patch adds the ARMv8.6 ACLE BFloat16 load intrinsics
>>> vld{q}_bf16 as part of the BFloat16 extension.
>>> 
(https://developer.arm.com/architectures/instruction-sets/simd-isas/neon/intrinsics) 


>>>
>>> The intrinsics are declared in arm_neon.h .
>>> A new test is added to check assembler output.
>>>
>>> This patch depends on the Arm back-end patche.
>>> (https://gcc.gnu.org/ml/gcc-patches/2019-12/msg01448.html)
>>>
>>> Tested for regression on arm-none-eabi and armeb-none-eabi. I don't
>>> have commit rights, so if this is ok can someone please commit it for
>>> me?
>>>
>>> gcc/ChangeLog:
>>>
>>> 2019-11-14  Delia Burduv 
>>>
>>>  * config/arm/arm_neon.h (bfloat16_t): New typedef.
>>>  (bfloat16x4x2_t): New typedef.
>>>  (bfloat16x8x2_t): New typedef.
>>>  (bfloat16x4x3_t): New typedef.
>>>  (bfloat16x8x3_t): New typedef.
>>>  (bfloat16x4x4_t): New typedef.
>>>  (bfloat16x8x4_t): New typedef.
>>>  (vld2_bf16): New.
>>>  (vld2q_bf16): New.
>>>  (vld3_bf16): New.
>>>  (vld3q_bf16): New.
>>>  (vld4_bf16): New.
>>>  (vld4q_bf16): New.
>>>  (vld2_dup_bf16): New.
>>>  (vld2q_dup_bf16): New.
>>>   (vld3_dup_bf16): New.
>>>  (vld3q_dup_bf16): New.
>>>  (vld4_dup_bf16): New.
>>>  (vld4q_dup_bf16): New.
>>>  * config/arm/arm-builtins.c (E_V2BFmode): New mode.
>>>  (VAR13): New.
>>>  (arm_simd_types[Bfloat16x2_t]):New type.
>>>  * config/arm/arm-modes.def (V2BF): New mode.
>>>  * config/arm/arm-simd-builtin-types.def
>>>  (Bfloat16x2_t): New entry.
>>>  * config/arm/arm_neon_builtins.def
>>>  (vld2): Changed to VAR13 and added v4bf, v8bf
>>>  (vld2_dup): Changed to VAR8 and added v4bf, v8bf
>>>  (vld3): Changed to VAR13 and added v4bf, v8bf
>>>  (vld3_dup): Changed to VAR8 and added v4bf, v8bf
>>>  (vld4): Changed to VAR13 and added v4bf, v8bf
>>>  (vld4_dup): Changed to VAR8 and added v4bf, v8bf
>>>  * config/arm/iterators.md (VDXBF): New iterator.
>>>  (VQ2BF): New iterator.
>>>  (V_elem): Added V4BF, V8BF.
>>>  (V_sz_elem): Added V4BF, V8BF.
>>>  (V_mode_nunits): Added V4BF, V8BF.
>>>  (q): Added V4BF, V8BF.
>>>  *config/arm/neon.md (vld2): Used new iterators.
>>>  (vld2_dup): Used new 

[PATCH 2/1] libstdc++: Add missing friend declaration to join_view::_Sentinel

2020-03-05 Thread Patrick Palka
The converting constructor of join_view::_Sentinel needs to be able to
access the private members of join_view::_Sentinel.

libstdc++-v3/ChangeLog:

* include/std/ranges (join_view::_Sentinel<_Const>): Befriend
join_view::_Sentinel.
* testsuite/std/ranges/adaptors/join.cc: Augment test.
---
 libstdc++-v3/include/std/ranges   |  2 ++
 .../testsuite/std/ranges/adaptors/join.cc | 23 +++
 2 files changed, 25 insertions(+)

diff --git a/libstdc++-v3/include/std/ranges b/libstdc++-v3/include/std/ranges
index 57edc895e3d..16195812b70 100644
--- a/libstdc++-v3/include/std/ranges
+++ b/libstdc++-v3/include/std/ranges
@@ -2593,6 +2593,8 @@ namespace views
  friend constexpr bool
  operator==(const _Iterator<_Const>& __x, const _Sentinel& __y)
  { return __y.__equal(__x); }
+
+ friend _Sentinel;
};
 
   _Vp _M_base = _Vp();
diff --git a/libstdc++-v3/testsuite/std/ranges/adaptors/join.cc 
b/libstdc++-v3/testsuite/std/ranges/adaptors/join.cc
index d3e652da009..142c9feddcd 100644
--- a/libstdc++-v3/testsuite/std/ranges/adaptors/join.cc
+++ b/libstdc++-v3/testsuite/std/ranges/adaptors/join.cc
@@ -101,6 +101,28 @@ test05()
   VERIFY( i == v.end() );
 }
 
+void
+test06()
+{
+  std::vector x = {""};
+  auto i = std::counted_iterator(x.begin(), 1);
+  auto r = ranges::subrange{i, std::default_sentinel};
+  auto v = r | views::transform(std::identity{}) | views::join;
+
+  // Verify that _Iterator is implicitly convertible to _Iterator.
+  static_assert(!std::same_as);
+  auto a = ranges::cbegin(v);
+  a = ranges::begin(v);
+
+  // Verify that _Sentinel is implicitly convertible to _Sentinel.
+  static_assert(!ranges::common_range);
+  static_assert(!std::same_as);
+  auto b = ranges::cend(v);
+  b = ranges::end(v);
+}
+
 int
 main()
 {
@@ -109,4 +131,5 @@ main()
   test03();
   test04();
   test05();
+  test06();
 }
-- 
2.25.1.460.g2f268890c2



[committed] testsuite: Add testcase for already fixed PR [PR90311]

2020-03-05 Thread Jakub Jelinek
Hi!

This PR has been fixed by r10-3970.  Testcase tested with cross to
armv7hl-linux-gnueabi (all of r10-3969 (FAIL), r10-3970 and current trunk 
(PASS))
and x86_64-linux and i686-linux, committed to trunk as obvious.

2020-03-05  Jakub Jelinek  

PR target/90311
* gcc.c-torture/execute/pr90311.c: New test.

--- gcc/testsuite/gcc.c-torture/execute/pr90311.c.jj2020-03-05 
16:35:46.183005546 +0100
+++ gcc/testsuite/gcc.c-torture/execute/pr90311.c   2020-03-05 
16:35:33.390195187 +0100
@@ -0,0 +1,16 @@
+/* PR rtl-optimization/90311 */
+
+int a, b;
+
+int
+main ()
+{
+  unsigned long long x;
+  unsigned int c;
+  __builtin_add_overflow ((unsigned char) a, b, );
+  b -= c < (unsigned char) a;
+  x = b;
+  if (x)
+__builtin_abort ();
+  return 0;
+}

Jakub



[PATCH] libstdc++: Give ranges::empty() a concrete return type (PR 93978)

2020-03-05 Thread Patrick Palka
This works around PR 93978 by avoiding having to instantiate ranges::empty()
when checking the constraints of view_interface::operator bool().  When
ranges::empty() has an auto return type, then we must instantiate it in order to
determine whether the requires expression { ranges::empty(_M_derived()); } is
well-formed.  But this means instantiating view_interface::empty() and hence
view_interface::_M_derived(), all before we've yet deduced the return type of
join_view::end().  (The reason view_interface::operator bool() is needed in
join_view::end() in the first place is because in this function we perform
direct initialization of join_view::_Sentinel from a join_view, and so we try to
find a conversion sequence from the latter to the former that goes through this
conversion operator.)

Giving ranges::empty() a concrete return type of bool should be safe according
to [ranges.prim.empty]/4 which says "whenever ranges::empty(E) is a valid
expression, it has type bool."

This fixes the test case in PR 93978 when compiling without -Wall, but with 
-Wall
the test case still fails due to the issue described in PR c++/94038, I think.
I still don't quite understand why the test case doesn't fail without -O.

libstdc++-v3/ChangeLog:

PR libstdc++/93978
* include/bits/range_access.h (__cust_access::_Empty::operator()):
Declare return type to be bool instead of auto.
* testsuite/std/ranges/adaptors/93978.cc: New test.
---
 libstdc++-v3/include/bits/range_access.h  |  2 +-
 .../testsuite/std/ranges/adaptors/93978.cc| 34 +++
 2 files changed, 35 insertions(+), 1 deletion(-)
 create mode 100644 libstdc++-v3/testsuite/std/ranges/adaptors/93978.cc

diff --git a/libstdc++-v3/include/bits/range_access.h 
b/libstdc++-v3/include/bits/range_access.h
index c814694623c..eab8bb9721b 100644
--- a/libstdc++-v3/include/bits/range_access.h
+++ b/libstdc++-v3/include/bits/range_access.h
@@ -804,7 +804,7 @@ namespace ranges
   template
requires __member_empty<_Tp> || __size0_empty<_Tp>
|| __eq_iter_empty<_Tp>
-   constexpr auto
+   constexpr bool
operator()(_Tp&& __e) const noexcept(_S_noexcept<_Tp>())
{
  if constexpr (__member_empty<_Tp>)
diff --git a/libstdc++-v3/testsuite/std/ranges/adaptors/93978.cc 
b/libstdc++-v3/testsuite/std/ranges/adaptors/93978.cc
new file mode 100644
index 000..62b5f5f9dac
--- /dev/null
+++ b/libstdc++-v3/testsuite/std/ranges/adaptors/93978.cc
@@ -0,0 +1,34 @@
+// Copyright (C) 2020 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING3.  If not see
+// .
+
+// { dg-options "-std=gnu++2a -O" }
+// { dg-do compile { target c++2a } }
+
+#include 
+#include 
+
+namespace ranges = std::ranges;
+namespace views = std::views;
+
+void
+test()
+{
+  std::vector x = {""};
+  auto i = std::counted_iterator(x.begin(), 1);
+  auto r = ranges::subrange{i, std::default_sentinel};
+  auto v = r | views::join;
+}
-- 
2.25.1.460.g2f268890c2



Re: ACLE intrinsics: BFloat16 store (vst{q}_bf16) intrinsics for AArch32

2020-03-05 Thread Delia Burduv

Hi,

This is the latest version of the patch. I am forcing -mfloat-abi=hard 
because the register allocator behaves differently depending on the 
float-abi used.


Thanks,
Delia

On 3/4/20 5:20 PM, Kyrill Tkachov wrote:

Hi Delia,

On 3/3/20 5:23 PM, Delia Burduv wrote:

Hi,

I noticed that the patch doesn't apply cleanly. I fixed it and this is 
the latest version.


Thanks,
Delia

On 3/3/20 4:23 PM, Delia Burduv wrote:

Sorry, I forgot the attachment.

On 3/3/20 4:20 PM, Delia Burduv wrote:

Hi,

I made a mistake in the previous patch. This is the latest version. 
Please let me know if it is ok.


Thanks,
Delia

On 2/21/20 3:18 PM, Delia Burduv wrote:

Hi Kyrill,

The arm_bf16.h is only used for scalar operations. That is how the 
aarch64 versions are implemented too.


Thanks,
Delia

On 2/21/20 2:06 PM, Kyrill Tkachov wrote:

Hi Delia,

On 2/19/20 5:25 PM, Delia Burduv wrote:

Hi,

Here is the latest version of the patch. It just has some minor
formatting changes that were brought up by Richard Sandiford in the
AArch64 patches

Thanks,
Delia

On 1/22/20 5:29 PM, Delia Burduv wrote:
> Ping.
>
> I will change the tests to use the exact input and output 
registers as

> Richard Sandiford suggested for the AArch64 patches.
>
> On 12/20/19 6:46 PM, Delia Burduv wrote:
>> This patch adds the ARMv8.6 ACLE BFloat16 store intrinsics
>> vst{q}_bf16 as part of the BFloat16 extension.
>> 
(https://developer.arm.com/architectures/instruction-sets/simd-isas/neon/intrinsics) 


>>
>> The intrinsics are declared in arm_neon.h .
>> A new test is added to check assembler output.
>>
>> This patch depends on the Arm back-end patche.
>> (https://gcc.gnu.org/ml/gcc-patches/2019-12/msg01448.html)
>>
>> Tested for regression on arm-none-eabi and armeb-none-eabi. I 
don't
>> have commit rights, so if this is ok can someone please commit 
it for me?

>>
>> gcc/ChangeLog:
>>
>> 2019-11-14  Delia Burduv 
>>
>>  * config/arm/arm_neon.h (bfloat16_t): New typedef.
>>  (bfloat16x4x2_t): New typedef.
>>  (bfloat16x8x2_t): New typedef.
>>  (bfloat16x4x3_t): New typedef.
>>  (bfloat16x8x3_t): New typedef.
>>  (bfloat16x4x4_t): New typedef.
>>  (bfloat16x8x4_t): New typedef.
>>  (vst2_bf16): New.
>>  (vst2q_bf16): New.
>>  (vst3_bf16): New.
>>  (vst3q_bf16): New.
>>  (vst4_bf16): New.
>>  (vst4q_bf16): New.
>>  * config/arm/arm-builtins.c (E_V2BFmode): New mode.
>>  (VAR13): New.
>>  (arm_simd_types[Bfloat16x2_t]):New type.
>>  * config/arm/arm-modes.def (V2BF): New mode.
>>  * config/arm/arm-simd-builtin-types.def
>>  (Bfloat16x2_t): New entry.
>>  * config/arm/arm_neon_builtins.def
>>  (vst2): Changed to VAR13 and added v4bf, v8bf
>>  (vst3): Changed to VAR13 and added v4bf, v8bf
>>  (vst4): Changed to VAR13 and added v4bf, v8bf
>>  * config/arm/iterators.md (VDXBF): New iterator.
>>  (VQ2BF): New iterator.
>>  (V_elem): Added V4BF, V8BF.
>>  (V_sz_elem): Added V4BF, V8BF.
>>  (V_mode_nunits): Added V4BF, V8BF.
>>  (q): Added V4BF, V8BF.
>>  *config/arm/neon.md (vst2): Used new iterators.
>>  (vst3): Used new iterators.
>>  (vst3qa): Used new iterators.
>>  (vst3qb): Used new iterators.
>>  (vst4): Used new iterators.
>>  (vst4qa): Used new iterators.
>>  (vst4qb): Used new iterators.
>>
>>
>> gcc/testsuite/ChangeLog:
>>
>> 2019-11-14  Delia Burduv 
>>
>>  * gcc.target/arm/simd/bf16_vstn_1.c: New test.


One thing I just noticed in this and the other arm bfloat16 
patches...


diff --git a/gcc/config/arm/arm_neon.h b/gcc/config/arm/arm_neon.h
index 
3c78f435009ab027f92693d00ab5b40960d5419d..fd81c18948db3a7f6e8e863d32511f75bf950e6a 
100644

--- a/gcc/config/arm/arm_neon.h
+++ b/gcc/config/arm/arm_neon.h
@@ -18742,6 +18742,89 @@ vcmlaq_rot270_laneq_f32 (float32x4_t __r, 
float32x4_t __a, float32x4_t __b,

    return __builtin_neon_vcmla_lane270v4sf (__r, __a, __b, __index);
  }

+#pragma GCC push_options
+#pragma GCC target ("arch=armv8.2-a+bf16")
+
+typedef struct bfloat16x4x2_t
+{
+  bfloat16x4_t val[2];
+} bfloat16x4x2_t;


These should be in a new arm_bf16.h file that gets included in the 
main arm_neon.h file, right?

I believe the aarch64 versions are implemented that way.

Otherwise the patch looks good to me.
Thanks!
Kyrill


  +
+typedef struct bfloat16x8x2_t
+{
+  bfloat16x8_t val[2];
+} bfloat16x8x2_t;
+



diff --git a/gcc/testsuite/gcc.target/arm/simd/bf16_vstn_1.c 
b/gcc/testsuite/gcc.target/arm/simd/bf16_vstn_1.c

new file mode 100644
index 
..b52ecfb959776fd04c7c33908cb7f8898ec3fe0b 


--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/simd/bf16_vstn_1.c
@@ -0,0 +1,84 @@
+/* { dg-do assemble } */
+/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
+/* { dg-add-options arm_v8_2a_bf16_neon } */
+/* { 

Re: [RFA/RFC] [tree-optimization/91890] [P1 Regression] Avoid clobbering useful location in Wrestrict code

2020-03-05 Thread Martin Sebor

On 3/5/20 12:51 AM, Richard Biener wrote:

On Thu, Mar 5, 2020 at 12:49 AM Jeff Law  wrote:


On Wed, 2020-03-04 at 09:22 -0700, Martin Sebor wrote:


I don't remember why the code in -Wrestrict unconditionally overwrites
the statement location rather than only when it's not available, but
I do remember adding conditional code like in your patch in r277076
to deal with missing location on the statement.  So either your fix
or something like the hunk below might be the right solution (if we
go with the code below, abstracting it into a utility function might
be nice).

So there's several chunks that are fairly similar to what you referenced in
maybe_warn_pointless_strcmp.  Factoring all of them into a single location is
pretty easy.

That also gives us a nice place where we can experiment with "does extraction of
location information from the expression ever help".  The answer is, it doesn't,
at least not within our testsuite when run on x86_64.

I'm hesitant to remove the code that extracts the location out of the 
expression,
but could be convinced to do so.

Thoughts?


Using anything but the actual stmt location is prone to end up at random places
due to tree sharing issues, CSE and copy propagation.  Simply consider

char one[50];
char two[50];

void
test_strncat (void)
{
   char *p = one;
   (void) __builtin_strcpy (p, "gh");
   (void) __builtin_strcpy (two, "ef");

#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wstringop-overflow="
#pragma GCC diagnostic ignored "-Warray-bounds"
   (void) __builtin_strncat (p, two, 99);


Interestingly, while the expression location points to p, the warning
points to the statement:

warning: ‘__builtin_strncat’ forming offset [50, 98] is out of the 
bounds [0, 50] of object ‘one’ with type ‘char[50]’ [-Warray-bounds]

   14 |   (void) __builtin_strncat (p, two, 99);
  |  ^~

As it happens, the %G directive in the warning_at() call replaces
the location passed to it with that of the Gimple call argument to
the %G directive.  Removing the %G directive turns the warning into:

warning: ‘__builtin_strncat’ forming offset [50, 98] is out of the 
bounds [0, 50] of object ‘one’ with type ‘char[50]’ [-Warray-bounds]

7 |   char *p = one;
  | ^~~

But the code that checks the scope of #pragma GCC diagnostic uses
the original location passed to warning_at, not the location set
subsequently by the %G directive, and so the two are out of synch.

We've discussed removing the %G/%K directives before and having
the diagnostic machinery always print the inlining context instead.
Let me look into it for GCC 11.

Martin


#pragma GCC diagnostic pop
}

where we happily forward p = [0] to both uses injecting
a "faulty" location.  Well, it's actually the correct location
computing [0] but irrelevant for the actual call.

So the question is why we end up with UNKNOWN_LOCATION
for such call and if why we need to bother emit a diagnostic
at all (and why emitting it for another possibly random location is a good idea
instead of maybe simply emitting it without location).

Richard.


Jeff




[pushed GCC9] c++: Fix SFINAE for invalid non-type tparm types.

2020-03-05 Thread Jason Merrill
Just missing the usual SFINAE pattern here.  This was fixed for GCC 10 as a
drive-by in r277902.

gcc/cp/ChangeLog
2020-03-05  Jason Merrill  

PR c++/90338
* pt.c (invalid_nontype_parm_type_p): Check complain for non-literal
and mutable errors.
---
 gcc/cp/pt.c | 15 ++-
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/gcc/cp/pt.c b/gcc/cp/pt.c
index 43d9660ebda..4787747b6ff 100644
--- a/gcc/cp/pt.c
+++ b/gcc/cp/pt.c
@@ -25341,15 +25341,20 @@ invalid_nontype_parm_type_p (tree type, 
tsubst_flags_t complain)
return true;
   if (!literal_type_p (type))
{
- error ("%qT is not a valid type for a template non-type parameter "
-"because it is not literal", type);
- explain_non_literal_class (type);
+ if (complain & tf_error)
+   {
+ auto_diagnostic_group d;
+ error ("%qT is not a valid type for a template non-type parameter 
"
+"because it is not literal", type);
+ explain_non_literal_class (type);
+   }
  return true;
}
   if (cp_has_mutable_p (type))
{
- error ("%qT is not a valid type for a template non-type parameter "
-"because it has a mutable member", type);
+ if (complain & tf_error)
+   error ("%qT is not a valid type for a template non-type parameter "
+  "because it has a mutable member", type);
  return true;
}
   /* FIXME check op<=> and strong structural equality once spaceship is
-- 
2.18.1



Re: Minor regression due to recent IRA changes

2020-03-05 Thread Jeff Law
On Sun, 2020-03-01 at 10:37 +0900, Oleg Endo wrote:
> On Sat, 2020-02-29 at 12:35 -0700, Jeff Law wrote:
> > Yup.  That was roughly what I was thinking and roughly the worry I had with
> > trying to squash out the quality regressions.  But it may ultimately be the
> > only way to really resolve these issues.
> 
> Another idea would be to let RA see R0, but ignore all the R0
> constraints.  Then try fixing up everything afterwards.  If R0 is
> removed from the allocatable reg list, there will be one register less
> for it to work with and I'd expect some code quality regressions.  But
> in order to fix up all the R0 cases after the regular RA/reload, I
> believe it will have to re-do a lot of (similar) work that has been
> done by the regular RA already.  One thing that comes instantly to mind
> are loops and the use of R0 as index/base register in memory addressing
> ... it just sounds like a lot of duplicate work in general.
> 
> > DJ's work on the m32c IIRC might be useful if you do try to chase this stuff
> > down.  Essentially there weren't really enough registers.  So he had the 
> > port
> > pretend to have more than it really did, then had a post-reload pass to do
> > the
> > final allocation into the target's actual register file.
> > 
> 
> AFAIK DJ did the same (or similar) thing for RL78.  IMHO that just
> shows that one type of RA/reload does not fit all.  Perhaps it'd be
> better to have the option of different RA/reload implementations, which
> implement different strategies for different needs and priorities.
> 
> Anyway, on SH the R0 problem seems to go away with LRA for the most
> part.  I don't know if anything has been put in LRA specifically to
> address such cases, or it works by general definition of the design, or
> it's just a mere coincidence.  If it's the latter case, I'm not sure
> what to expect in the future.  Perhaps it will start breaking again if
> changes for other targets are being made to LRA.
FWIW I've got an sh4/sh4eb bootstrap and regression test running with
HONOR_REG_ALLOC_ORDER defined.  As Vlad mentioned, that may be a viable
workaround.

Jeff
> 



Re: ACLE intrinsics: BFloat16 store (vst{q}_bf16) intrinsics for AArch32

2020-03-05 Thread Delia Burduv

Hi,

This is the latest version of the patch. I am forcing -mfloat-abi=hard 
because the register allocator behaves differently depending on which 
float-abi is used.


Thanks,
Delia

On 3/4/20 5:20 PM, Kyrill Tkachov wrote:

Hi Delia,

On 3/3/20 5:23 PM, Delia Burduv wrote:

Hi,

I noticed that the patch doesn't apply cleanly. I fixed it and this is 
the latest version.


Thanks,
Delia

On 3/3/20 4:23 PM, Delia Burduv wrote:

Sorry, I forgot the attachment.

On 3/3/20 4:20 PM, Delia Burduv wrote:

Hi,

I made a mistake in the previous patch. This is the latest version. 
Please let me know if it is ok.


Thanks,
Delia

On 2/21/20 3:18 PM, Delia Burduv wrote:

Hi Kyrill,

The arm_bf16.h is only used for scalar operations. That is how the 
aarch64 versions are implemented too.


Thanks,
Delia

On 2/21/20 2:06 PM, Kyrill Tkachov wrote:

Hi Delia,

On 2/19/20 5:25 PM, Delia Burduv wrote:

Hi,

Here is the latest version of the patch. It just has some minor
formatting changes that were brought up by Richard Sandiford in the
AArch64 patches

Thanks,
Delia

On 1/22/20 5:29 PM, Delia Burduv wrote:
> Ping.
>
> I will change the tests to use the exact input and output 
registers as

> Richard Sandiford suggested for the AArch64 patches.
>
> On 12/20/19 6:46 PM, Delia Burduv wrote:
>> This patch adds the ARMv8.6 ACLE BFloat16 store intrinsics
>> vst{q}_bf16 as part of the BFloat16 extension.
>> 
(https://developer.arm.com/architectures/instruction-sets/simd-isas/neon/intrinsics) 


>>
>> The intrinsics are declared in arm_neon.h .
>> A new test is added to check assembler output.
>>
>> This patch depends on the Arm back-end patche.
>> (https://gcc.gnu.org/ml/gcc-patches/2019-12/msg01448.html)
>>
>> Tested for regression on arm-none-eabi and armeb-none-eabi. I 
don't
>> have commit rights, so if this is ok can someone please commit 
it for me?

>>
>> gcc/ChangeLog:
>>
>> 2019-11-14  Delia Burduv 
>>
>>  * config/arm/arm_neon.h (bfloat16_t): New typedef.
>>  (bfloat16x4x2_t): New typedef.
>>  (bfloat16x8x2_t): New typedef.
>>  (bfloat16x4x3_t): New typedef.
>>  (bfloat16x8x3_t): New typedef.
>>  (bfloat16x4x4_t): New typedef.
>>  (bfloat16x8x4_t): New typedef.
>>  (vst2_bf16): New.
>>  (vst2q_bf16): New.
>>  (vst3_bf16): New.
>>  (vst3q_bf16): New.
>>  (vst4_bf16): New.
>>  (vst4q_bf16): New.
>>  * config/arm/arm-builtins.c (E_V2BFmode): New mode.
>>  (VAR13): New.
>>  (arm_simd_types[Bfloat16x2_t]):New type.
>>  * config/arm/arm-modes.def (V2BF): New mode.
>>  * config/arm/arm-simd-builtin-types.def
>>  (Bfloat16x2_t): New entry.
>>  * config/arm/arm_neon_builtins.def
>>  (vst2): Changed to VAR13 and added v4bf, v8bf
>>  (vst3): Changed to VAR13 and added v4bf, v8bf
>>  (vst4): Changed to VAR13 and added v4bf, v8bf
>>  * config/arm/iterators.md (VDXBF): New iterator.
>>  (VQ2BF): New iterator.
>>  (V_elem): Added V4BF, V8BF.
>>  (V_sz_elem): Added V4BF, V8BF.
>>  (V_mode_nunits): Added V4BF, V8BF.
>>  (q): Added V4BF, V8BF.
>>  *config/arm/neon.md (vst2): Used new iterators.
>>  (vst3): Used new iterators.
>>  (vst3qa): Used new iterators.
>>  (vst3qb): Used new iterators.
>>  (vst4): Used new iterators.
>>  (vst4qa): Used new iterators.
>>  (vst4qb): Used new iterators.
>>
>>
>> gcc/testsuite/ChangeLog:
>>
>> 2019-11-14  Delia Burduv 
>>
>>  * gcc.target/arm/simd/bf16_vstn_1.c: New test.


One thing I just noticed in this and the other arm bfloat16 
patches...


diff --git a/gcc/config/arm/arm_neon.h b/gcc/config/arm/arm_neon.h
index 
3c78f435009ab027f92693d00ab5b40960d5419d..fd81c18948db3a7f6e8e863d32511f75bf950e6a 
100644

--- a/gcc/config/arm/arm_neon.h
+++ b/gcc/config/arm/arm_neon.h
@@ -18742,6 +18742,89 @@ vcmlaq_rot270_laneq_f32 (float32x4_t __r, 
float32x4_t __a, float32x4_t __b,

    return __builtin_neon_vcmla_lane270v4sf (__r, __a, __b, __index);
  }

+#pragma GCC push_options
+#pragma GCC target ("arch=armv8.2-a+bf16")
+
+typedef struct bfloat16x4x2_t
+{
+  bfloat16x4_t val[2];
+} bfloat16x4x2_t;


These should be in a new arm_bf16.h file that gets included in the 
main arm_neon.h file, right?

I believe the aarch64 versions are implemented that way.

Otherwise the patch looks good to me.
Thanks!
Kyrill


  +
+typedef struct bfloat16x8x2_t
+{
+  bfloat16x8_t val[2];
+} bfloat16x8x2_t;
+



diff --git a/gcc/testsuite/gcc.target/arm/simd/bf16_vstn_1.c 
b/gcc/testsuite/gcc.target/arm/simd/bf16_vstn_1.c

new file mode 100644
index 
..b52ecfb959776fd04c7c33908cb7f8898ec3fe0b 


--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/simd/bf16_vstn_1.c
@@ -0,0 +1,84 @@
+/* { dg-do assemble } */
+/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
+/* { dg-add-options arm_v8_2a_bf16_neon } */
+/* { 

Re: [committed][ARM] Fix minor testsuite fallout on ARM due to recent IRA changes

2020-03-05 Thread Jeff Law
On Mon, 2020-03-02 at 16:40 +, Richard Earnshaw (lists) wrote:
> On 02/03/2020 15:46, Jeff Law wrote:
> > More minor fallout from Vlad's IRA changes.
> > 
> > Previously this test used r3 to hold a value across a call (it's an ipa-ra
> > test).  After Vlad's changes we're using r1 instead.
> > 
> > This patch makes the obvious change to pattern we can for which should bring
> > the test back to a passing status.
> > 
> > There's a note about r3 being special on thumb1 and the pattern check is
> > skipped for thumb1.  That special casing my not be necessary anymore -- I
> > leave
> > that to the ARM maintainers to resolve one way or the other.
> > 
> > Committing on the trunk momentarily.
> > 
> > jeff
> > 
> 
> Any of r1, r2, r3 could be chosen for the 'save' register, so why not 
> put that in the regexp?
> 
> Something like:
> 
> +/* { dg-final { scan-assembler-times "mov\tr[123], r0" 1 { target { ! 
> arm_thumb1 } } } } */
> 
> And then we are future-proof.
Pushed to the trunk.  Thanks for the suggestion.

jeff



Re: [PATCH] re PR tree-optimization/90883 (Generated code is worse if returned struct is unnamed)

2020-03-05 Thread Jeff Law
On Tue, 2020-03-03 at 15:34 +0800, Kito Cheng wrote:
> After add --param max-inline-insns-size=1 all target will remove the
> redundant store at dse1, except some targets like AArch64 and MIPS will
> expand the struct initialization into loop due to CLEAR_RATIO.
> 
> Tested on cross compiler of riscv32, riscv64, x86, x86_64, mips, mips64,
> aarch64, nds32 and arm.
> 
> gcc/testsuite/ChangeLog
> 
>   PR tree-optimization/90883
>   * g++.dg/tree-ssa/pr90883.c: Add --param max-inline-insns-size=1.
>   Add aarch64-*-* mips*-*-* to XFAIL.
OK from me as well.
jeff
> 



Re: [PATCH PR94026] combine missed opportunity to simplify comparisons with zero

2020-03-05 Thread Jeff Law
On Wed, 2020-03-04 at 08:39 +, Yangfei (Felix) wrote:
> Hi,
> 
>   This is a simple fix for PR94026.  
>   With this fix, combine will try make an extraction if we are in a equality
> comparison and this is an AND
>   with a constant which is power of two minus one.  Shift here should be an
> constant.  For example, combine
>   will transform (compare (and (lshiftrt x 8) 6) 0) to (compare (zero_extract
> (x 2 9)) 0).  
> 
>   Added one test case for this.  Bootstrap and tested on both x86_64 and
> aarch64 Linux platform.  
>   Any suggestion?  
> 
> Thanks,
> Felix
> 
> gcc:
> +2020-03-04  Felix Yang  
> +
> +   PR rtl-optimization/94026
> +   * combine.c (make_compound_operation_int): Make an extraction
> + if we are in a equality comparison and this is an AND with a
> + constant which is power of two minus one.
> +
> 
> gcc/testsuite:
> +2020-03-04  Felix Yang  
> +
> +   PR rtl-optimization/94026
> +   * gcc.dg/pr94026.c: New test.
Just a note.  We're in stage4 of our development cycle, meaning we focus on
regression bugfixes.  I've queued this for evaluation in gcc-11.
jeff



Re: [RFA/RFC] [tree-optimization/91890] [P1 Regression] Avoid clobbering useful location in Wrestrict code

2020-03-05 Thread Jeff Law
On Thu, 2020-03-05 at 08:51 +0100, Richard Biener wrote:
> On Thu, Mar 5, 2020 at 12:49 AM Jeff Law  wrote:
> > On Wed, 2020-03-04 at 09:22 -0700, Martin Sebor wrote:
> > > I don't remember why the code in -Wrestrict unconditionally overwrites
> > > the statement location rather than only when it's not available, but
> > > I do remember adding conditional code like in your patch in r277076
> > > to deal with missing location on the statement.  So either your fix
> > > or something like the hunk below might be the right solution (if we
> > > go with the code below, abstracting it into a utility function might
> > > be nice).
> > So there's several chunks that are fairly similar to what you referenced in
> > maybe_warn_pointless_strcmp.  Factoring all of them into a single location 
> > is
> > pretty easy.
> > 
> > That also gives us a nice place where we can experiment with "does 
> > extraction
> > of
> > location information from the expression ever help".  The answer is, it
> > doesn't,
> > at least not within our testsuite when run on x86_64.
> > 
> > I'm hesitant to remove the code that extracts the location out of the
> > expression,
> > but could be convinced to do so.
> > 
> > Thoughts?
> 
> Using anything but the actual stmt location is prone to end up at random 
> places
> due to tree sharing issues, CSE and copy propagation.  Simply consider
I'd tend to agree.  My conservatism is due to being in stage4 and not knowing
precisely why we have code to extract the location from the operand to begin
with.


> where we happily forward p = [0] to both uses injecting
> a "faulty" location.  Well, it's actually the correct location
> computing [0] but irrelevant for the actual call.
Exactly.

> 
> So the question is why we end up with UNKNOWN_LOCATION
> for such call and if why we need to bother emit a diagnostic
> at all (and why emitting it for another possibly random location is a good 
> idea
> instead of maybe simply emitting it without location).
One might argue that scenario should be a gcc_unreachable rather than extracting
a likely bogus location.  I'm even more hesitant to do that for gcc-10, but it
might sense for gcc-11.

My first inclination would be do do the refactor, but leave in the code that
extracts a location from the expression.  We'd close out the regression BZ and
open a new one to remove the expression handling bits for gcc-11 (or turn them
into a gcc_unreachable)

Does that work for  you Richi?

jeff



Re: [PING PATCH coroutines] Do not strip cleanup_point when promote temporaries out of current stmt

2020-03-05 Thread Iain Sandoe

Hello JunMa,

JunMa  wrote:


Ping


Once again, sorry for taking time to review this.


在 2020/2/27 上午10:18, JunMa 写道:

在 2020/2/11 上午10:14, JunMa 写道:
Kindly ping

Regards
JunMa

Hi
In maybe_promote_captured_temps, the cleanup_point_stmt has been
stripped when handle temporaries captured by reference. However, maybe
there are non-reference temporaries in current stmt which cause ice in
gimpilify pass.

This patch fix this. The testcase comes from cppcoro and is reduced by
creduce.


With current trunk + Bin’s two approved patches.

I see no change in the testcase (lambda-09-capture-object.C) before / after  
the patch

 (it fails for me at -O0 only - in both cases).

please could you check?
thanks
Iain



Re: [PING PATCH coroutines] Set side effects flag for BIND_EXPR which build in maybe_promote_captured_temps

2020-03-05 Thread Iain Sandoe

Hello JunMa,

JunMa  wrote:


Ping


Thanks for your patch(es) and I am sorry this has taken some time to review.

(right now, we’re trying to ensure that we have the latest standard  
represented in

 GCC10, so updating to n4849).


在 2020/2/27 上午10:17, JunMa 写道:

在 2020/2/11 上午10:50, JunMa 写道:
Hi
kindly ping~

Regards
JunMa

Hi
As title. in maybe_promote_captured_temps, we promote captured  
temporaries

and co_await_expr into a new BIND_EXPR. As the BIND_EXPR contains
co_await_expr and maybe other function calls, the side effects flag  
should

be set.

This patch fix one mismatch in cppcoro, the testcase comes from cppcoro
and is reduced by creduce.


With the following test conditions;

r10-7040-ga2ec7c4aafbcd517
 + the two approved patches by Bin Cheng applied.

 1/ the test case in this patch (lambda-10-co-await-lambda.C) fails both with 
and without the patch.
 2/ the patch regresses one of my local testcases.

So, it appears that the testcase might show a bug - but the fix is not the  
right one for current trunk?


Please could you re-check ?

thanks
Iain



[OG9][devel/omp/gcc-9] Merge from gcc-9-branch [was: devel/omp/gcc-9 branch]

2020-03-05 Thread Tobias Burnus

I have now merged the GCC 9 changes into the branch;
before it was up to date with 'releases/gcc-9.1.0',
and now it is with today's
r9-8340-g7beafc829c5b122298093ba517023015611aeca8

This was done in two pieces (9.1.0 to 9.2.0 changes,
and then to today's as only 1000 commits are permitted.)

The main reason for doing this merge is GCC 9's
commit r9-8063-g75003cdd23c310ec385344e8040d490e8dd6d2be
of Fri Dec 20 17:58:35 2019 +0100:
"backport: re PR sanitizer/92154 (new glibc breaks arm bootstrap due to 
libsanitizer)"

Despite the PR title, that commit also fixes bootstrap on x86-64-gnu-linux.

Cheers,

Tobias


-
Mentor Graphics (Deutschland) GmbH, Arnulfstraße 201, 80634 München / Germany
Registergericht München HRB 106955, Geschäftsführer: Thomas Heurung, Alexander 
Walter


Re: [PATCH v2 0/3] Introduce a new GCC option, --record-gcc-command-line

2020-03-05 Thread Egeyar Bagcioglu




On 3/5/20 8:36 AM, Richard Biener wrote:

On Wed, Mar 4, 2020 at 5:28 PM Egeyar Bagcioglu
 wrote:



On 3/4/20 1:18 AM, Fangrui Song wrote:

On 2020-03-03, Joseph Myers wrote:

On Tue, 3 Mar 2020, Egeyar Bagcioglu wrote:


Although we discussed after the submission of the first version that
there are several other options performing similar tasks, I believe we
established that there is still a need for this specific functionality.
Therefore, I am skipping in this email the comparison between this
option and the existing options with similarities.

Mentioning -frecord-gcc-switches will be much appreciated.

How is the new .GCC.command.line different?

Does it still have the SHF_MERGE | SHF_STRINGS flag?
If you change the flags, the .GCC.command.line section may not play with
another object file (generated by -frecord-gcc-switches) whose
.GCC.command.line is
SHF_MERGE | SHF_STRINGS.

When both -frecord-gcc-switches and --record-command-line are specified,
is it an error?

This option is similar to -frecord-gcc-switches. However, they have
three fundamental differences: Firstly, -frecord-gcc-switches saves the
internal state after the argv is processed and passed by the driver. As
opposed to that, --record-gcc-command-line saves the command-line as
received by the driver, with the exception of extending @files first.
Secondly, -frecord-gcc-switches saves the switches as separate entries
into a mergeable string section. Therefore, the entries belonging to
different object files get mixed up after being linked. The new
--record-gcc-command-line, on the other hand, creates one entry per
invocation. By doing so, it makes it clear which options were used
together in a single gcc invocation. Lastly, --record-gcc-command-line
also adds the version of the gcc into this single entry to make it clear
which version of gcc was called with any given command line. This is
useful in cases where .comment section reports multiple versions.

While there are also similarities between the implementations of these
two options, those implementations are completely independent. These
commands can be used separately or together without issues. I used the
same section that -frecord-gcc-switches uses on purpose, so that they
can also be used together to save both the command line given to GCC and
the internal switches passed by GCC.

The option -grecord-gcc-switches is similar to -frecord-gcc-switches,
but saves the internal GCC switches into DWARF. Lastly, -fverbose-asm
option saves the switches into the assembly file but that information
never makes it to the object files.

-grecord-gcc-switches also allows to match the options used to the
actual generated code while both -frecord-gcc-switches and
--record-gcc-command-line
end up as ELF comment sections not associated with particular
code pieces.

So IMHO anything but -grecord-gcc-switches is quite useless in case options
used do not match for all object files.

Richard.


I hear that. I am definitely not arguing against the use cases where 
-grecord-gcc-switches is the best option. But this patch is coming from 
the point of view of a different use case at hand. We need to have the 
frontend command line saved in a way that it can be later extracted from 
the ELF object (including shared libraries and binaries) and be used to 
re-compile individual objects, producing more or less the same generated 
code. This patch is not enough to get the exact same output, but it is 
necessary for us.


Correct me if I am wrong, but none of the existing options are very 
helpful when it comes to asking gcc to do the same thing that it did 
while creating that object. They hint some things about the command line 
but do not help much reconstructing the command line. That is what I 
mean when I say this option is not about the internals of gcc. It is 
definitely not about knowing what's passed to the backend. As long as 
this option's output is concerned, gcc is a blackbox. This is about the 
command-line, about higher-level users' interaction with the driver and 
nothing more.


Best regards
Egeyar




We're now using git-style commit messages with self-contained
explanation
/ justification of the change being committed.

This means that one of the commit messages (not just message 0, whose
contents don't go in a commit message) for an individual patch should
have
the explanation, which should include the self-contained
justification by
reference to comparison with other existing similar options. People
should be able to find the relevant information in the commit without
needing to search the list archives for reviews of a previous patch
version.

Thanks for telling me. I will extend the above comparison according to
the questions I might receive. Then I'll add it, together with the
explanation in the cover letter, into the commit message of the second
patch.

Regards
Egeyar




Re: [PATCH v2 3/3] Keep .GCC.command.line sections of LTO objetcs.

2020-03-05 Thread Egeyar Bagcioglu





I'm sending the updated patch based on Egeyar's work.
It utilizes a new environmental variable and uses the currently
existing -frecord-gcc-switches option.

Thoughts?


I am leaving it to the more experienced to comment on redefining the 
functionality of -frecord-gcc-switches.


The code seems pretty neat to me. Thanks Martin!

Best regards
Egeyar


Re: [Committed 4/4] IBM Z: zTPF: Include glibc-stdint.h

2020-03-05 Thread Andreas Krebbel
On 3/5/20 12:34 AM, Joseph Myers wrote:
> On Wed, 4 Mar 2020, Andreas Krebbel wrote:
> 
>> Building a zTPF cross currently fails when building libstdc++
>> complaining about the __UINTPTR_TYPE__ to be missing.
>>
>> Fixed by including the glibc-stdint.h header.
> 
> To confirm: TPF provides a  header, which uses the same types 
> as glibc's  does?

TPF uses an older version of Glibc. So they have stdint.h and the C99 types. 
Only the *_WIDTH macros
appear to be missing in their version.

Andreas

> 
> If so, you can remove TPF from the list of targets for which bug 448 has 
> yet to be resolved.  (If TPF doesn't provide , you should set 
> use_gcc_stdint=provide for TPF in config.gcc.  If it provides it but using 
> different types from glibc, you'll need to add a host-side header 
> describing those types instead of using glibc-stdint.h.)
> 



Re: [testsuite] Fix PR94019 to allow one vector char when !vect_hw_misalign

2020-03-05 Thread Richard Sandiford
"Kewen.Lin"  writes:
> on 2020/3/5 上午3:09, Richard Sandiford wrote:
>> "Kewen.Lin"  writes:
>>> Hi,
>>>
>>>
>>> --- a/gcc/testsuite/gcc.dg/vect/vect-over-widen-17.c
>>> +++ b/gcc/testsuite/gcc.dg/vect/vect-over-widen-17.c
>>> @@ -41,6 +41,10 @@ main (void)
>>>  }
>>>
>>>  /* { dg-final { scan-tree-dump-not {vect_recog_over_widening_pattern: 
>>> detected} "vect" } } */
>>> -/* { dg-final { scan-tree-dump-not {vector[^\n]*char} "vect" } } */
>>> +/* { dg-final { scan-tree-dump-not {vector[^\n]*char} "vect" { target { { 
>>> ! powerpc*-*-* } || { powerpc*-*-* && vect_hw_misalign } } } } }
>>> +/* On Power, if there is no vect_hw_misalign support, unaligned vector 
>>> access
>>> +   adopts realign_load scheme.  It requires rs6000_builtin_mask_for_load to
>>> +   generate mask whose return type is vector char.  */
>>> +/* { dg-final { scan-tree-dump-times {vector[^\n]*char} 1 "vect" { target 
>>> { powerpc*-*-* && { ! vect_hw_misalign } } } } } */
>> 
>> Thanks for looking at this.  The patch is OK as-is.  However, since
>> vect-over-widen-17.c is a negative test for generic code, there probably
>> isn't much need for the new scan-tree-dump-times line, and it could start
>> failing if we make different optimisation decisions in future.  So the
>> patch is also OK with just the change to the scan-tree-dump-not line,
>> if you prefer that.  (Please keep the comment either way though --
>> it's really helpful.)
>> 
>
> Thanks for your suggestion!  The new patch is updated as below.  I removed
> the scan-tree-dump-times, as well as powerpc specific requirement.  
> Does it look good to you especially the later?  Thanks in advance!
>
> BR,
> Kewen
>
> gcc/testsuite/ChangeLog
>
> 2020-03-05  Kewen Lin  
>
>   PR testsuite/94019
>   * gcc.dg/vect/vect-over-widen-17.c: Don't expect vector char if it's
>   without misaligned vector access support.

OK, thanks.

Richard

>
> --
>
> diff --git a/gcc/testsuite/gcc.dg/vect/vect-over-widen-17.c 
> b/gcc/testsuite/gcc.dg/vect/vect-over-widen-17.c
> index 0448260..333d74a 100644
> --- a/gcc/testsuite/gcc.dg/vect/vect-over-widen-17.c
> +++ b/gcc/testsuite/gcc.dg/vect/vect-over-widen-17.c
> @@ -41,6 +41,9 @@ main (void)
>  }
>
>  /* { dg-final { scan-tree-dump-not {vect_recog_over_widening_pattern: 
> detected} "vect" } } */
> -/* { dg-final { scan-tree-dump-not {vector[^\n]*char} "vect" } } */
> +/* On Power, if there is no vect_hw_misalign support, unaligned vector access
> +   adopts realign_load scheme.  It requires rs6000_builtin_mask_for_load to
> +   generate mask whose return type is vector char.  */
> +/* { dg-final { scan-tree-dump-not {vector[^\n]*char} "vect" { target 
> vect_hw_misalign } } } */
>  /* { dg-final { scan-tree-dump-not {vector[^ ]* int} "vect" } } */
>  /* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" } } */


Re: [PING PATCH coroutines] Do not strip cleanup_point when promote temporaries out of current stmt

2020-03-05 Thread JunMa

Ping

Regards
JunMa
在 2020/2/27 上午10:18, JunMa 写道:

在 2020/2/11 上午10:14, JunMa 写道:
Kindly ping

Regards
JunMa

Hi
In maybe_promote_captured_temps, the cleanup_point_stmt has been
stripped when handle temporaries captured by reference. However, maybe
there are non-reference temporaries in current stmt which cause ice in
gimpilify pass.

This patch fix this. The testcase comes from cppcoro and is reduced by
creduce.

Bootstrap and test on X86_64, is it OK?

Regards
JunMa

gcc/cp
2020-02-11  Jun Ma 

    * coroutines.cc (maybe_promote_captured_temps): Do not strip
    cleanup_point_stmt.

gcc/testsuite
2020-02-11  Jun Ma 

    * g++.dg/coroutines/torture/lambda-09-capture-object.C: New 
test.







Re: [PING PATCH coroutines] Set side effects flag for BIND_EXPR which build in maybe_promote_captured_temps

2020-03-05 Thread JunMa

Ping

Regards
JunMa
在 2020/2/27 上午10:17, JunMa 写道:

在 2020/2/11 上午10:50, JunMa 写道:
Hi
kindly ping~

Regards
JunMa

Hi
As title. in maybe_promote_captured_temps, we promote captured 
temporaries

and co_await_expr into a new BIND_EXPR. As the BIND_EXPR contains
co_await_expr and maybe other function calls, the side effects flag 
should

be set.

This patch fix one mismatch in cppcoro, the testcase comes from cppcoro
and is reduced by creduce.

Bootstrap and test on X86_64, is it OK?

Regards
JunMa

gcc/cp
2020-02-11  Jun Ma 

    * coroutines.cc (maybe_promote_captured_temps): Set side effects
    flag for BIND_EXPR.

gcc/testsuite
2020-02-11  Jun Ma 

    * g++.dg/coroutines/torture/lambda-10-co-await-lambda.C: New 
test.







Re: [GCC][PATCH][AArch32] ACLE intrinsics bfloat16 vmmla and vfma for AArch32 AdvSIMD

2020-03-05 Thread Kyrill Tkachov

Hi Delia,

On 3/4/20 5:20 PM, Delia Burduv wrote:

Hi,

This is the latest version of the patch.

Thanks,
Delia

On 2/21/20 11:41 AM, Kyrill Tkachov wrote:

Hi Delia,

On 2/19/20 5:23 PM, Delia Burduv wrote:

Hi,

Here is the latest version of the patch. It just has some minor 
formatting changes that were brought up by Richard Sandiford in the 
AArch64 patches


Thanks,
Delia

On 1/31/20 3:23 PM, Delia Burduv wrote:
Here is the updated patch. The changes are minor, so let me know if 
there is anything else to fix or if it can be committed.


Thank you,
Delia

On 1/30/20 2:55 PM, Kyrill Tkachov wrote:

Hi Delia,


On 1/28/20 4:44 PM, Delia Burduv wrote:

Ping.
 


*From:* Delia Burduv 
*Sent:* 22 January 2020 17:26
*To:* gcc-patches@gcc.gnu.org 
*Cc:* ni...@redhat.com ; Richard Earnshaw 
; Ramana Radhakrishnan 
; Kyrylo Tkachov 

*Subject:* Re: [GCC][PATCH][AArch32] ACLE intrinsics bfloat16 
vmmla and vfma for AArch32 AdvSIMD

Ping.

I have read Richard Sandiford's comments on the AArch64 patches 
and I
will apply what is relevant to this patch as well. Particularly, 
I will
change the tests to use the exact input and output registers and 
I will

change the types of the rtl patterns.



Please send the updated patches so that someone can commit them 
for you once they're reviewed.


Thanks,

Kyrill




On 12/20/19 6:44 PM, Delia Burduv wrote:
> This patch adds the ARMv8.6 ACLE intrinsics for vmmla, vfmab 
and vfmat

> as part of the BFloat16 extension.
> (https://developer.arm.com/docs/101028/latest.)
> The intrinsics are declared in arm_neon.h and the RTL patterns are
> defined in neon.md.
> Two new tests are added to check assembler output and lane 
indices.

>
> This patch depends on the Arm back-end patche.
> (https://gcc.gnu.org/ml/gcc-patches/2019-12/msg01448.html)
>
> Tested for regression on arm-none-eabi and armeb-none-eabi. I 
don't have
> commit rights, so if this is ok can someone please commit it 
for me?

>
> gcc/ChangeLog:
>
> 2019-11-12� Delia Burduv 
>
>� ����* config/arm/arm_neon.h (vbfmmlaq_f32): New.
>� ����� (vbfmlalbq_f32): New.
>� ����� (vbfmlaltq_f32): New.
>� ����� (vbfmlalbq_lane_f32): New.
>� ����� (vbfmlaltq_lane_f32): New.
>� ������� (vbfmlalbq_laneq_f32): New.
>� ����� (vbfmlaltq_laneq_f32): New.
>� ����* config/arm/arm_neon_builtins.def (vbfmmla): New.
>� ��������� (vbfmab): New.
>� ��������� (vbfmat): New.
>� ��������� (vbfmab_lane): New.
>� ��������� (vbfmat_lane): New.
>� ��������� (vbfmab_laneq): New.
>� ��������� (vbfmat_laneq): New.
>� ���� * config/arm/iterators.md (BF_MA): New int 
iterator.

>� ��������� (bt): New int attribute.
>� ��������� (VQXBF): Copy of VQX with V8BF.
>� ��������� (V_HALF): Added V8BF.
>� ����� * config/arm/neon.md (neon_vbfmmlav8hi): New 
insn.

>� ��������� (neon_vbfmav8hi): New insn.
>� ��������� (neon_vbfma_lanev8hi): New 
insn.
>� ��������� (neon_vbfma_laneqv8hi): New 
expand.
>� ��������� (neon_vget_high): Changed 
iterator to VQXBF.
>� ����* config/arm/unspecs.md (UNSPEC_BFMMLA): New 
UNSPEC.

>� ��������� (UNSPEC_BFMAB): New UNSPEC.
>� ��������� (UNSPEC_BFMAT): New UNSPEC.
>
> 2019-11-12� Delia Burduv 
>
>� ������� * gcc.target/arm/simd/bf16_ma_1.c: New 
test.
>� ������� * gcc.target/arm/simd/bf16_ma_2.c: New 
test.
>� ������� * gcc.target/arm/simd/bf16_mmla_1.c: 
New test.


This looks good, a few minor things though...


diff --git a/gcc/config/arm/arm_neon.h b/gcc/config/arm/arm_neon.h
index 
3c78f435009ab027f92693d00ab5b40960d5419d..81f8008ea6a5fb11eb09f6685ba24bb0c54fb248 
100644

--- a/gcc/config/arm/arm_neon.h
+++ b/gcc/config/arm/arm_neon.h
@@ -18742,6 +18742,64 @@ vcmlaq_rot270_laneq_f32 (float32x4_t __r, 
float32x4_t __a, float32x4_t __b,
 ï¿½ï¿½ return __builtin_neon_vcmla_lane270v4sf (__r, __a, __b, 
__index);

 ï¿½}

+#pragma GCC push_options
+#pragma GCC target ("arch=armv8.2-a+bf16")
+
+__extension__ extern __inline float32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vbfmmlaq_f32 (float32x4_t __r, bfloat16x8_t __a, bfloat16x8_t __b)
+{
+� return __builtin_neon_vbfmmlav8bf (__r, __a, __b);
+}
+
+__extension__ extern __inline float32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vbfmlalbq_f32 (float32x4_t __r, bfloat16x8_t __a, bfloat16x8_t __b)
+{
+� return __builtin_neon_vbfmabv8bf (__r, __a, __b);
+}
+
+__extension__ extern __inline float32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))

Re: Regression on 32-bit powerpc?

2020-03-05 Thread Richard Biener
On Thu, Mar 5, 2020 at 9:14 AM John Paul Adrian Glaubitz
 wrote:
>
> On 3/5/20 9:11 AM, Jakub Jelinek wrote:
> > On Thu, Mar 05, 2020 at 08:56:37AM +0100, John Paul Adrian Glaubitz wrote:
> >> The latest gcc-10 snapshot in Debian fails to build in Debian with:
> >
> > What is the problem?
> > All that is present in what you posted are warnings.
>
> Okay, I was confused by the "internal error: builtin function %qs already 
> processed".
>
> >> Full log in: 
> >> https://buildd.debian.org/status/fetch.php?pkg=gcc-10=powerpc=10-20200304-1=1583386777=0
> >
> > And here it shows the buildbox was OOMed.
>
> Yeah, I suspected that as well, but I wanted to make sure which is
> why I asked. Thanks for the confirmation.
>
> > That might or might not be a GCC problem, guess it depends on how much
> > memory it actually needs and if it isn't excessive compared to other
> > targets.
>
> I have rescheduled the build now. Normally we haven't OOMs with GCC
> on 32-bit PowerPC.

Bootstrap is currently known to fail (for us):
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94042

> Thanks,
> Adrian
>
> --
>  .''`.  John Paul Adrian Glaubitz
> : :' :  Debian Developer - glaub...@debian.org
> `. `'   Freie Universitaet Berlin - glaub...@physik.fu-berlin.de
>   `-GPG: 62FF 8A75 84E0 2956 9546  0006 7426 3B37 F5B5 F913


Re: [GCC][PATCH][AArch64] ACLE intrinsics for BFCVTN, BFCVTN2 (AArch64 AdvSIMD) and BFCVT (AArch64 FP)

2020-03-05 Thread Richard Sandiford
Hi,

Thanks for the update and sorry for the slow reply.

When I try the patch locally I get:

FAIL: gcc.target/aarch64/advsimd-intrinsics/bfcvt-nosimd.c   -O0  (test for 
excess errors)
FAIL: gcc.target/aarch64/advsimd-intrinsics/bfcvt-nosimd.c   -O1  (test for 
excess errors)
FAIL: gcc.target/aarch64/advsimd-intrinsics/bfcvt-nosimd.c   -O2  (test for 
excess errors)
FAIL: gcc.target/aarch64/advsimd-intrinsics/bfcvt-nosimd.c   -O2 -flto 
-fno-use-linker-plugin -flto-partition=none  (test for excess errors)
FAIL: gcc.target/aarch64/advsimd-intrinsics/bfcvt-nosimd.c   -O2 -flto 
-fuse-linker-plugin -fno-fat-lto-objects  (test for excess errors)
FAIL: gcc.target/aarch64/advsimd-intrinsics/bfcvt-nosimd.c   -O3 -g  (test for 
excess errors)
FAIL: gcc.target/aarch64/advsimd-intrinsics/bfcvt-nosimd.c   -Og -g  (test for 
excess errors)
FAIL: gcc.target/aarch64/advsimd-intrinsics/bfcvt-nosimd.c   -Os  (test for 
excess errors)

I think that's because:

Delia Burduv  writes:
> diff --git a/gcc/config/aarch64/arm_bf16.h b/gcc/config/aarch64/arm_bf16.h
> index 
> 3759c0d1cb449a7f0125cc2a1433127564d66622..fa7080c2953bc3254f01d842a8afef917d469080
>  100644
> --- a/gcc/config/aarch64/arm_bf16.h
> +++ b/gcc/config/aarch64/arm_bf16.h
> @@ -27,6 +27,19 @@
>  #ifndef _AARCH64_BF16_H_
>  #define _AARCH64_BF16_H_
>  
> +#pragma GCC push_options
> +#pragma GCC target ("+nothing+bf16")
> +
>  typedef __bf16 bfloat16_t;
> +typedef float float32_t;
> +
> +__extension__ extern __inline bfloat16_t
> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> +vcvth_bf16_f32 (float32_t __a)
> +{
> +  return __builtin_aarch64_bfcvtbf (__a);
> +}
> +
> +#pragma GCC pop_options

"+bf16" implicitly enables "+simd", so functions guarded with
"+nothing+bf16" are only available when "+simd" is available.
I think we want "+nothing+bf16+nosimd" instead.  (Haven't tested
that though.)

Very minor, but: it might be clearer to leave the typedefs outside
of the #pragma block.  It doesn't make any difference to the behaviour,
but it emphasises that the typedefs really are available unconditionally.

Looks ready to go otherwise.

Thanks,
Richard


[PATCH][GCC]: Add myself to MAINTAINERS

2020-03-05 Thread Srinath Parvathaneni
Hello,

Add myself to MAINTAINERS file.

Regards,
SRI.

ChangeLog:

2020-03-05  Srinath Parvathaneni  

* MAINTAINERS (Write After Approval): Add myself.


### Attachment also inlined for ease of reply###


diff --git a/MAINTAINERS b/MAINTAINERS
index 
5e0ce37b6c511a6abfe0ff36dee0929f7d43e8be..be8016675bd6ccfebb6b78e893a3a840395f7535
 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -540,6 +540,7 @@ Peter O'Gorman  

 Andrea Ornstein
 Maxim Ostapenko

 Patrick Palka  
+Srinath Parvathaneni   
 Devang Patel   
 Andris Pavenis 
 Fernando Pereira   

diff --git a/MAINTAINERS b/MAINTAINERS
index 
5e0ce37b6c511a6abfe0ff36dee0929f7d43e8be..be8016675bd6ccfebb6b78e893a3a840395f7535
 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -540,6 +540,7 @@ Peter O'Gorman  

 Andrea Ornstein
 Maxim Ostapenko

 Patrick Palka  
+Srinath Parvathaneni   
 Devang Patel   
 Andris Pavenis 
 Fernando Pereira   



Re: [PATCH] [rs6000] Fix a wrong GC issue

2020-03-05 Thread Segher Boessenkool
Hi Bin Bin,

On Thu, Mar 05, 2020 at 10:31:39AM +0800, binbin wrote:
> On 2020/3/5 上午2:35, Segher Boessenkool wrote:
> >On Wed, Mar 04, 2020 at 03:08:41PM +0800, binbin wrote:
>   * config/rs6000/rs6000.h (MAX_MACHINE_MODE): Include the header file
>   for MAX_MACHINE_MODE.
> >>>
> >>>The changelog entry should say *what* file is included, and under what
> >>>condition.  It doesn't have to say why (that belongs in the commit
> >>>message).
> >>>
> >>>But, can't you just include it unconditionally?  Don't we already,
> >>>anyway, via coretypes.h -> machmode.h -> insn-modes.h?
> >>
> >>OK, change it to uncondition.  Thanks for your suggestion.
> >
> >What about the second part?  Shouldn't it already be included anyway?
> 
> If "insn-modes.h" is not included in rs6000.h, it reports error showing
> MAX_MACHINE_MODE’ undeclared here (not in a function) in file included 
> from
> ../../host-powerpc64le-unknown-linux-gnu/gcc/tm.h:25
> from ../.././libgcc/libgcc2.c:29
> ../.././libgcc/../gcc/config/rs6000/rs6000.h:2495:42.  Thanks.

Sure.  But why do we need to include that file at all here?  It should
already be included via coretypes.h (which includes machmode.h, which
includes insn-modes.h).

> +extern GTY(()) tree builtin_mode_to_type[MAX_MACHINE_MODE][2];
> +extern GTY(()) tree altivec_builtin_mask_for_load;
>  
>  #ifndef USED_FOR_TARGET

Maybe it should be moved into here?  Or some other #ifdef trickery?


Segher


[PATCH] Backport to gcc-9: PR92398: Fix testcase failure of pr72804.c

2020-03-05 Thread luoxhu
From: Xionghu Luo 

Backport the patch to fix failures on P9 and P8BE, P7LE for PR94036.
Tested pass on P9/P8/P7, ok to commit?
(gcc-8 is not needed as the test doesn't exists.)

P9LE generated instruction is not worse than P8LE.
mtvsrdd;xxlnot;stxv vs. not;not;std;std.
It can have longer latency, but latency via memory is not so critical,
and this does save decode and other resources.  It's hard to choose
which is best.  Update the test case to fix failures.

gcc/testsuite/ChangeLog:

2020-03-05  Luo Xiong Hu  

backport from master.
PR testsuite/94036

2019-12-02  Luo Xiong Hu  

PR testsuite/92398
* gcc.target/powerpc/pr72804.c: Split the store function to...
* gcc.target/powerpc/pr92398.h: ... this one.  New.
* gcc.target/powerpc/pr92398.p9+.c: New.
* gcc.target/powerpc/pr92398.p9-.c: New.
* lib/target-supports.exp (check_effective_target_p8): New.
(check_effective_target_p9+): New.
---
 gcc/testsuite/gcc.target/powerpc/pr72804.c| 16 +++
 gcc/testsuite/gcc.target/powerpc/pr92398.h| 17 
 .../gcc.target/powerpc/pr92398.p9+.c  | 12 +++
 .../gcc.target/powerpc/pr92398.p9-.c  | 10 ++
 gcc/testsuite/lib/target-supports.exp | 20 +++
 5 files changed, 62 insertions(+), 13 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/pr92398.h
 create mode 100644 gcc/testsuite/gcc.target/powerpc/pr92398.p9+.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/pr92398.p9-.c

diff --git a/gcc/testsuite/gcc.target/powerpc/pr72804.c 
b/gcc/testsuite/gcc.target/powerpc/pr72804.c
index b83b6350d75..c0711499ae5 100644
--- a/gcc/testsuite/gcc.target/powerpc/pr72804.c
+++ b/gcc/testsuite/gcc.target/powerpc/pr72804.c
@@ -9,17 +9,7 @@ foo (__int128_t *src)
   return ~*src;
 }
 
-void
-bar (__int128_t *dst, __int128_t src)
-{
-  *dst =  ~src;
-}
 
-/* { dg-final { scan-assembler-times "not " 4 } } */
-/* { dg-final { scan-assembler-times "std " 2 } } */
-/* { dg-final { scan-assembler-times "ld " 2 } } */
-/* { dg-final { scan-assembler-not "lxvd2x" } } */
-/* { dg-final { scan-assembler-not "stxvd2x" } } */
-/* { dg-final { scan-assembler-not "xxpermdi" } } */
-/* { dg-final { scan-assembler-not "mfvsrd" } } */
-/* { dg-final { scan-assembler-not "mfvsrd" } } */
+/* { dg-final { scan-assembler-times {\mld\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mnot\M} 2 } } */
+/* { dg-final { scan-assembler-not {\mlxvd2x\M} } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/pr92398.h 
b/gcc/testsuite/gcc.target/powerpc/pr92398.h
new file mode 100644
index 000..5a4a8bcab80
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr92398.h
@@ -0,0 +1,17 @@
+/* This test code is included into pr92398.p9-.c and pr92398.p9+.c.
+   The two files have the tests for the number of instructions generated for
+   P9- versus P9+.
+
+   store generates difference instructions as below:
+   P9+: mtvsrdd;xxlnot;stxv.
+   P8/P7/P6 LE: not;not;std;std.
+   P8 BE: mtvsrd;mtvsrd;xxpermdi;xxlnor;stxvd2x.
+   P7/P6 BE: std;std;addi;lxvd2x;xxlnor;stxvd2x.
+   P9+ and P9- LE are expected, P6/P7/P8 BE are unexpected.  */
+
+void
+bar (__int128_t *dst, __int128_t src)
+{
+  *dst =  ~src;
+}
+
diff --git a/gcc/testsuite/gcc.target/powerpc/pr92398.p9+.c 
b/gcc/testsuite/gcc.target/powerpc/pr92398.p9+.c
new file mode 100644
index 000..a819c3f16af
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr92398.p9+.c
@@ -0,0 +1,12 @@
+/* { dg-do compile { target { lp64 && p9+ } } } */
+/* { dg-require-effective-target powerpc_vsx_ok } */
+/* { dg-options "-O2 -mvsx" } */
+
+/* { dg-final { scan-assembler-times {\mmtvsrdd\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mxxlnor\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mstxv\M} 1 } } */
+/* { dg-final { scan-assembler-not {\mld\M} } } */
+/* { dg-final { scan-assembler-not {\mnot\M} } } */
+
+/* Source code for the test in pr92398.h */
+#include "pr92398.h"
diff --git a/gcc/testsuite/gcc.target/powerpc/pr92398.p9-.c 
b/gcc/testsuite/gcc.target/powerpc/pr92398.p9-.c
new file mode 100644
index 000..065ae73f267
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr92398.p9-.c
@@ -0,0 +1,10 @@
+/* { dg-do compile { target { lp64 && {! p9+} } } } */
+/* { dg-require-effective-target powerpc_vsx_ok } */
+/* { dg-options "-O2 -mvsx" } */
+
+/* { dg-final { scan-assembler-times {\mnot\M} 2 { xfail be } } } */
+/* { dg-final { scan-assembler-times {\mstd\M} 2 { xfail { p8 && be } } } } */
+
+/* Source code for the test in pr92398.h */
+#include "pr92398.h"
+
diff --git a/gcc/testsuite/lib/target-supports.exp 
b/gcc/testsuite/lib/target-supports.exp
index cda0f3d350b..ea9a50ccb27 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -2555,6 +2555,26 @@ proc check_effective_target_le { } {
 }]
 }
 
+# Return 1 if we're generating code for only power8 

Re: GLIBC libmvec status

2020-03-05 Thread Segher Boessenkool
On Fri, Feb 28, 2020 at 05:31:56PM +0100, Jakub Jelinek wrote:
> On Fri, Feb 28, 2020 at 04:23:03PM +, GT wrote:
> > Do we want to change the name and title of the document since Segher 
> > doesn't believe it
> > is an ABI. My initial suggestion: "POWER Architecture Specification of 
> > Scalar Function
> > to Vector Function Mapping".
> 
> It is an ABI, similarly like e.g. the C++ Itanium ABI is an ABI, it specifies
> mangling of certain functions and how the function argument types and return
> types are transformed.

It does not say anything about the machine code generated, or about the
binary format generated, other than the naming of symbols.  It is
confusing to call this an "ABI": you still need to have an actual ABI
underneath, and this itself is not a "binary interface".  In some other
contexts similar things are called "binding", but that is not a very
good name either :-/


Segher


Re: [PATCH] testsuite: Compile asan_test.C with -Wno-alloc-size-larger-than

2020-03-05 Thread Jakub Jelinek
On Thu, Mar 05, 2020 at 10:05:43AM +0100, Uros Bizjak wrote:
> asan_test.cc tries to allocate 0xf000 bytes for 32bit targets in
> a disabled DISABLED_DemoOOM test.  Since the testcase is compiled with
> -Werror, the compilation fails with:
> 
> error: argument 1 value '4026531840' exceeds maximum object size 2147483647
> 
> Compile with -Wno-alloc-size-larger-than to avoid compilation failure.
> 
> 2020-03-05  Uroš Bizjak  
> 
> * g++.de/asan/asan_test.C (dg-options): Add

s/de/dg/

> -Wno-alloc-size-larger-than.

Ok, thanks.

Jakub



[PATCH] testsuite: Compile asan_test.C with -Wno-alloc-size-larger-than

2020-03-05 Thread Uros Bizjak
asan_test.cc tries to allocate 0xf000 bytes for 32bit targets in
a disabled DISABLED_DemoOOM test.  Since the testcase is compiled with
-Werror, the compilation fails with:

error: argument 1 value '4026531840' exceeds maximum object size 2147483647

Compile with -Wno-alloc-size-larger-than to avoid compilation failure.

2020-03-05  Uroš Bizjak  

* g++.de/asan/asan_test.C (dg-options): Add
-Wno-alloc-size-larger-than.

Tested on x86_64-linux-gnu {,-m32}.

unix/-m32 asan test results went from

# of unexpected failures1
# of unresolved testcases1
# of unsupported tests6

to

# of expected passes1193
# of unsupported tests6

OK for mainline?

Uros.
diff --git a/gcc/testsuite/g++.dg/asan/asan_test.C 
b/gcc/testsuite/g++.dg/asan/asan_test.C
index f3f7626ef3be..a151979946ac 100644
--- a/gcc/testsuite/g++.dg/asan/asan_test.C
+++ b/gcc/testsuite/g++.dg/asan/asan_test.C
@@ -2,7 +2,7 @@
 // { dg-skip-if "" { *-*-* } { "*" } { "-O2" } }
 // { dg-skip-if "" { *-*-* } { "-flto" } { "" } }
 // { dg-additional-sources "asan_globals_test-wrapper.cc" }
-// { dg-options "-std=c++11 -fsanitize=address -fno-builtin -Wall -Werror -g 
-DASAN_UAR=0 -DASAN_HAS_EXCEPTIONS=1 -DASAN_HAS_BLACKLIST=0 
-DSANITIZER_USE_DEJAGNU_GTEST=1 -lasan -lpthread -ldl" }
+// { dg-options "-std=c++11 -fsanitize=address -fno-builtin -Wall -Werror 
-Wno-alloc-size-larger-than -g -DASAN_UAR=0 -DASAN_HAS_EXCEPTIONS=1 
-DASAN_HAS_BLACKLIST=0 -DSANITIZER_USE_DEJAGNU_GTEST=1 -lasan -lpthread -ldl" }
 // { dg-additional-options "-DASAN_NEEDS_SEGV=1" { target { ! arm*-*-* } } }
 // { dg-additional-options "-DASAN_LOW_MEMORY=1 -DASAN_NEEDS_SEGV=0" { target 
arm*-*-* } }
 // { dg-additional-options "-DASAN_AVOID_EXPENSIVE_TESTS=1" { target { ! 
run_expensive_tests } } }


Re: [PATCH v2 3/3] Keep .GCC.command.line sections of LTO objetcs.

2020-03-05 Thread Martin Liška

Hi.

I'm sending the updated patch based on Egeyar's work.
It utilizes a new environmental variable and uses the currently
existing -frecord-gcc-switches option.

Thoughts?
Martin
>From 9436d12e7a540691c6f2d6e2db4730a138e5c458 Mon Sep 17 00:00:00 2001
From: Martin Liska 
Date: Thu, 5 Mar 2020 09:39:17 +0100
Subject: [PATCH] Change semantics of -frecord-gcc-switches.

gcc/ChangeLog:

2020-03-05  Martin Liska  
	Egeyar Bagcioglu  

	* common.opt: Make flag_record_gcc_switches also
	a driver option.
	* doc/tm.texi: Regenerate.
	* gcc.c (set_driver_command_line_envvar): New.
	(driver_handle_option): Handle OPT_frecord_gcc_switches
	and export command line into a ENV variable.
	(driver::main): Save command line.
	(driver::set_commandline): New.
	* gcc.h (set_commandline): New.
	* target.def: Simplify documentation by removal
	of unused enum values.
	* target.h (elf_record_gcc_switches): Change function type.
	* toplev.c (init_asm_output): Update call.
	* varasm.c (elf_record_gcc_switches): Record GCC version
	string and content of GCC_DRIVER_COMMAND_LINE.
---
 gcc/common.opt  |  2 +-
 gcc/doc/tm.texi | 37 ++---
 gcc/gcc.c   | 41 +
 gcc/gcc.h   |  1 +
 gcc/target.def  | 37 ++---
 gcc/target.h|  2 +-
 gcc/toplev.c| 11 +--
 gcc/varasm.c| 46 ++
 8 files changed, 59 insertions(+), 118 deletions(-)

diff --git a/gcc/common.opt b/gcc/common.opt
index fa9da505fc2..60ca5a521c5 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -2246,7 +2246,7 @@ Common Joined RejectNegative Var(common_deferred_options) Defer
 ; records information in the assembler output file as comments, so
 ; they never reach the object file.
 frecord-gcc-switches
-Common Report Var(flag_record_gcc_switches)
+Common Driver Report Var(flag_record_gcc_switches)
 Record gcc command line switches in the object file.
 
 freg-struct-return
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index 19985adac3e..30c71d60aff 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -8061,43 +8061,10 @@ need to override this if your target has special flags that might be
 set via @code{__attribute__}.
 @end deftypefn
 
-@deftypefn {Target Hook} int TARGET_ASM_RECORD_GCC_SWITCHES (print_switch_type @var{type}, const char *@var{text})
+@deftypefn {Target Hook} void TARGET_ASM_RECORD_GCC_SWITCHES (void)
 Provides the target with the ability to record the gcc command line
 switches that have been passed to the compiler, and options that are
-enabled.  The @var{type} argument specifies what is being recorded.
-It can take the following values:
-
-@table @gcctabopt
-@item SWITCH_TYPE_PASSED
-@var{text} is a command line switch that has been set by the user.
-
-@item SWITCH_TYPE_ENABLED
-@var{text} is an option which has been enabled.  This might be as a
-direct result of a command line switch, or because it is enabled by
-default or because it has been enabled as a side effect of a different
-command line switch.  For example, the @option{-O2} switch enables
-various different individual optimization passes.
-
-@item SWITCH_TYPE_DESCRIPTIVE
-@var{text} is either NULL or some descriptive text which should be
-ignored.  If @var{text} is NULL then it is being used to warn the
-target hook that either recording is starting or ending.  The first
-time @var{type} is SWITCH_TYPE_DESCRIPTIVE and @var{text} is NULL, the
-warning is for start up and the second time the warning is for
-wind down.  This feature is to allow the target hook to make any
-necessary preparations before it starts to record switches and to
-perform any necessary tidying up after it has finished recording
-switches.
-
-@item SWITCH_TYPE_LINE_START
-This option can be ignored by this target hook.
-
-@item  SWITCH_TYPE_LINE_END
-This option can be ignored by this target hook.
-@end table
-
-The hook's return value must be zero.  Other return values may be
-supported in the future.
+enabled.
 
 By default this hook is set to NULL, but an example implementation is
 provided for ELF based targets.  Called @var{elf_record_gcc_switches},
diff --git a/gcc/gcc.c b/gcc/gcc.c
index 9f790db0daf..673ba2935f9 100644
--- a/gcc/gcc.c
+++ b/gcc/gcc.c
@@ -235,6 +235,11 @@ static int verbose_only_flag;
 
 static int print_subprocess_help;
 
+/* argc and argv used to call gcc.  Necessary for
+   --record-gcc-command-line option.  */
+static unsigned int driver_gcc_argc;
+static const char **driver_gcc_argv;
+
 /* Linker suffix passed to -fuse-ld=... */
 static const char *use_ld;
 
@@ -3724,6 +3729,28 @@ set_source_date_epoch_envvar ()
   setenv ("SOURCE_DATE_EPOCH", source_date_epoch, 0);
 }
 
+/* Set GCC_DRIVER_COMMAND_LINE enviromental variable that is later
+   used by -frecord-gcc-switches option.  */
+
+static void
+set_driver_command_line_envvar ()
+{
+  unsigned int length = 0;
+  for (unsigned int i = 0; i < driver_gcc_argc; i++)
+length += 

Re: Regression on 32-bit powerpc?

2020-03-05 Thread John Paul Adrian Glaubitz
On 3/5/20 9:11 AM, Jakub Jelinek wrote:
> On Thu, Mar 05, 2020 at 08:56:37AM +0100, John Paul Adrian Glaubitz wrote:
>> The latest gcc-10 snapshot in Debian fails to build in Debian with:
> 
> What is the problem?
> All that is present in what you posted are warnings.

Okay, I was confused by the "internal error: builtin function %qs already 
processed".

>> Full log in: 
>> https://buildd.debian.org/status/fetch.php?pkg=gcc-10=powerpc=10-20200304-1=1583386777=0
> 
> And here it shows the buildbox was OOMed.

Yeah, I suspected that as well, but I wanted to make sure which is
why I asked. Thanks for the confirmation.

> That might or might not be a GCC problem, guess it depends on how much
> memory it actually needs and if it isn't excessive compared to other
> targets.

I have rescheduled the build now. Normally we haven't OOMs with GCC
on 32-bit PowerPC.

Thanks,
Adrian

-- 
 .''`.  John Paul Adrian Glaubitz
: :' :  Debian Developer - glaub...@debian.org
`. `'   Freie Universitaet Berlin - glaub...@physik.fu-berlin.de
  `-GPG: 62FF 8A75 84E0 2956 9546  0006 7426 3B37 F5B5 F913


Re: Regression on 32-bit powerpc?

2020-03-05 Thread Jakub Jelinek
On Thu, Mar 05, 2020 at 08:56:37AM +0100, John Paul Adrian Glaubitz wrote:
> The latest gcc-10 snapshot in Debian fails to build in Debian with:

What is the problem?
All that is present in what you posted are warnings.

> Full log in: 
> https://buildd.debian.org/status/fetch.php?pkg=gcc-10=powerpc=10-20200304-1=1583386777=0

And here it shows the buildbox was OOMed.
That might or might not be a GCC problem, guess it depends on how much
memory it actually needs and if it isn't excessive compared to other
targets.

Jakub



Re: [PATCH] print-rtl: Fix printing of CONST_STRING in DEBUG_INSNs [PR93399]

2020-03-05 Thread Richard Biener
On Thu, 5 Mar 2020, Jakub Jelinek wrote:

> On Thu, Mar 05, 2020 at 08:55:33AM +0100, Richard Biener wrote:
> > It looks like tree-pretty-print.c doesn't bother to truncate it
> > so why bother for RTL?
> 
> Ok, so I'll go then with the size_t argument, so that nothing needs to be
> truncated.
> Updated patch:

OK.

Richard.

> 2020-03-05  Jakub Jelinek  
> 
>   PR middle-end/93399
>   * tree-pretty-print.h (pretty_print_string): Declare.
>   * tree-pretty-print.c (pretty_print_string): Remove forward
>   declaration, no longer static.  Change nbytes parameter type
>   from unsigned to size_t.
>   * print-rtl.c (print_value) : Use
>   pretty_print_string and for shrink way too long strings.
> 
>   * gcc.dg/pr93399.c: New test.
> 
> --- gcc/tree-pretty-print.h.jj2020-03-05 07:57:41.875443726 +0100
> +++ gcc/tree-pretty-print.h   2020-03-05 08:59:34.053274560 +0100
> @@ -47,6 +47,7 @@ extern void print_declaration (pretty_pr
>  extern int op_code_prio (enum tree_code);
>  extern int op_prio (const_tree);
>  extern const char *op_symbol_code (enum tree_code);
> +extern void pretty_print_string (pretty_printer *, const char *, size_t);
>  extern void print_call_name (pretty_printer *, tree, dump_flags_t);
>  extern void percent_K_format (text_info *, location_t, tree);
>  extern void pp_tree_identifier (pretty_printer *, tree);
> --- gcc/tree-pretty-print.c.jj2020-03-05 07:57:41.833444349 +0100
> +++ gcc/tree-pretty-print.c   2020-03-05 08:59:18.821502109 +0100
> @@ -45,7 +45,6 @@ along with GCC; see the file COPYING3.
>  
>  /* Local functions, macros and variables.  */
>  static const char *op_symbol (const_tree);
> -static void pretty_print_string (pretty_printer *, const char*, unsigned);
>  static void newline_and_indent (pretty_printer *, int);
>  static void maybe_init_pretty_print (FILE *);
>  static void print_struct_decl (pretty_printer *, const_tree, int, 
> dump_flags_t);
> @@ -4216,8 +4215,8 @@ print_call_name (pretty_printer *pp, tre
>  /* Print the first N characters in the array STR, replacing non-printable
> characters (including embedded nuls) with unambiguous escape sequences.  
> */
>  
> -static void
> -pretty_print_string (pretty_printer *pp, const char *str, unsigned n)
> +void
> +pretty_print_string (pretty_printer *pp, const char *str, size_t n)
>  {
>if (str == NULL)
>  return;
> --- gcc/print-rtl.c.jj2020-03-05 07:57:41.801444821 +0100
> +++ gcc/print-rtl.c   2020-03-05 09:01:00.523982754 +0100
> @@ -1685,7 +1685,9 @@ print_value (pretty_printer *pp, const_r
>pp_string (pp, tmp);
>break;
>  case CONST_STRING:
> -  pp_printf (pp, "\"%s\"", XSTR (x, 0));
> +  pp_string (pp, "\"");
> +  pretty_print_string (pp, XSTR (x, 0), strlen (XSTR (x, 0)));
> +  pp_string (pp, "\"");
>break;
>  case SYMBOL_REF:
>pp_printf (pp, "`%s'", XSTR (x, 0));
> --- gcc/testsuite/gcc.dg/pr93399.c.jj 2020-03-05 08:58:19.177393148 +0100
> +++ gcc/testsuite/gcc.dg/pr93399.c2020-03-05 08:58:19.177393148 +0100
> @@ -0,0 +1,17 @@
> +/* PR middle-end/93399 */
> +/* { dg-do assemble } */
> +/* { dg-options "-fverbose-asm -dA -g -O3" } */
> +
> +extern inline __attribute__ ((__always_inline__, __gnu_inline__)) char *
> +strstr (const char *haystack, const char *needle)
> +{
> +  return __builtin_strstr (haystack, needle);
> +}
> +
> +int
> +main (int argc, const char **argv)
> +{
> +  char *substr = strstr (argv[0], "\n");
> +  char *another = strstr (argv[0], "\r\n");
> +  return 0;
> +}
> 
> 
>   Jakub
> 
> 

-- 
Richard Biener 
SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg,
Germany; GF: Felix Imendörffer; HRB 36809 (AG Nuernberg)

Re: [PATCH] print-rtl: Fix printing of CONST_STRING in DEBUG_INSNs [PR93399]

2020-03-05 Thread Jakub Jelinek
On Thu, Mar 05, 2020 at 08:55:33AM +0100, Richard Biener wrote:
> It looks like tree-pretty-print.c doesn't bother to truncate it
> so why bother for RTL?

Ok, so I'll go then with the size_t argument, so that nothing needs to be
truncated.
Updated patch:

2020-03-05  Jakub Jelinek  

PR middle-end/93399
* tree-pretty-print.h (pretty_print_string): Declare.
* tree-pretty-print.c (pretty_print_string): Remove forward
declaration, no longer static.  Change nbytes parameter type
from unsigned to size_t.
* print-rtl.c (print_value) : Use
pretty_print_string and for shrink way too long strings.

* gcc.dg/pr93399.c: New test.

--- gcc/tree-pretty-print.h.jj  2020-03-05 07:57:41.875443726 +0100
+++ gcc/tree-pretty-print.h 2020-03-05 08:59:34.053274560 +0100
@@ -47,6 +47,7 @@ extern void print_declaration (pretty_pr
 extern int op_code_prio (enum tree_code);
 extern int op_prio (const_tree);
 extern const char *op_symbol_code (enum tree_code);
+extern void pretty_print_string (pretty_printer *, const char *, size_t);
 extern void print_call_name (pretty_printer *, tree, dump_flags_t);
 extern void percent_K_format (text_info *, location_t, tree);
 extern void pp_tree_identifier (pretty_printer *, tree);
--- gcc/tree-pretty-print.c.jj  2020-03-05 07:57:41.833444349 +0100
+++ gcc/tree-pretty-print.c 2020-03-05 08:59:18.821502109 +0100
@@ -45,7 +45,6 @@ along with GCC; see the file COPYING3.
 
 /* Local functions, macros and variables.  */
 static const char *op_symbol (const_tree);
-static void pretty_print_string (pretty_printer *, const char*, unsigned);
 static void newline_and_indent (pretty_printer *, int);
 static void maybe_init_pretty_print (FILE *);
 static void print_struct_decl (pretty_printer *, const_tree, int, 
dump_flags_t);
@@ -4216,8 +4215,8 @@ print_call_name (pretty_printer *pp, tre
 /* Print the first N characters in the array STR, replacing non-printable
characters (including embedded nuls) with unambiguous escape sequences.  */
 
-static void
-pretty_print_string (pretty_printer *pp, const char *str, unsigned n)
+void
+pretty_print_string (pretty_printer *pp, const char *str, size_t n)
 {
   if (str == NULL)
 return;
--- gcc/print-rtl.c.jj  2020-03-05 07:57:41.801444821 +0100
+++ gcc/print-rtl.c 2020-03-05 09:01:00.523982754 +0100
@@ -1685,7 +1685,9 @@ print_value (pretty_printer *pp, const_r
   pp_string (pp, tmp);
   break;
 case CONST_STRING:
-  pp_printf (pp, "\"%s\"", XSTR (x, 0));
+  pp_string (pp, "\"");
+  pretty_print_string (pp, XSTR (x, 0), strlen (XSTR (x, 0)));
+  pp_string (pp, "\"");
   break;
 case SYMBOL_REF:
   pp_printf (pp, "`%s'", XSTR (x, 0));
--- gcc/testsuite/gcc.dg/pr93399.c.jj   2020-03-05 08:58:19.177393148 +0100
+++ gcc/testsuite/gcc.dg/pr93399.c  2020-03-05 08:58:19.177393148 +0100
@@ -0,0 +1,17 @@
+/* PR middle-end/93399 */
+/* { dg-do assemble } */
+/* { dg-options "-fverbose-asm -dA -g -O3" } */
+
+extern inline __attribute__ ((__always_inline__, __gnu_inline__)) char *
+strstr (const char *haystack, const char *needle)
+{
+  return __builtin_strstr (haystack, needle);
+}
+
+int
+main (int argc, const char **argv)
+{
+  char *substr = strstr (argv[0], "\n");
+  char *another = strstr (argv[0], "\r\n");
+  return 0;
+}


Jakub