Re: [PATCH] [PR c++/84979] improve auto handling in explicit tmpl args for concepts

2018-03-30 Thread Alexandre Oliva
On Mar 30, 2018, Jason Merrill  wrote:

> On Thu, Mar 29, 2018 at 6:24 PM, Alexandre Oliva  wrote:

>> AFAICT we wouldn't always know, within cp_parser_template_id, whether
>> the id is a type or a function.

> True, it looks like sometimes we build a TEMPLATE_ID_EXPR with an
> IDENTIFIER_NODE.  Looking at tsubst_copy_and_build, I see that we
> don't call finish_id_expression when substituting such a
> TEMPLATE_ID_EXPR.  So maybe lookup_template_function and
> lookup_template_variable are the right places for this test.

lookup_template_function is no good as it is, it's called twice with the
same (fns, arglist), from build_concept_check (within
cp_parser_maybe_partial_concept_id), and then directly from
cp_parser_template_id.  With the patchlet below, we get two copies of
the error message:

diff --git a/gcc/cp/pt.c b/gcc/cp/pt.c
index 284eaf3cab66..b35f8076cfbd 100644
--- a/gcc/cp/pt.c
+++ b/gcc/cp/pt.c
@@ -8788,6 +8788,18 @@ lookup_template_function (tree fns, tree arglist)
   return error_mark_node;
 }
 
+  if (flag_concepts)
+{
+  tree xauto = type_uses_auto (arglist);
+  if (xauto)
+   {
+ error_at (DECL_SOURCE_LOCATION (TEMPLATE_TYPE_DECL (xauto)),
+   "invalid use of % in template argument"
+   " for template function");
+ return error_mark_node;
+   }
+}
+
   if (BASELINK_P (fns))
 {
   BASELINK_FUNCTIONS (fns) = build2 (TEMPLATE_ID_EXPR,
@@ -9395,6 +9407,19 @@ lookup_template_variable (tree templ, tree arglist)
   if (flag_concepts && variable_concept_p (templ))
 /* Except that concepts are always bool.  */
 type = boolean_type_node;
+
+  if (flag_concepts)
+{
+  tree xauto = type_uses_auto (arglist);
+  if (xauto)
+   {
+ error_at (DECL_SOURCE_LOCATION (TEMPLATE_TYPE_DECL (xauto)),
+   "invalid use of % in template argument"
+   " for template variable");
+ return error_mark_node;
+   }
+}
+
   return build2 (TEMPLATE_ID_EXPR, type, templ, arglist);
 }
 

So I tried the spot at cp_parser_template_id just after the spot that
tries a partial_concept_id, and then instead of two copies of the error
message, I got four:

diff --git a/gcc/cp/parser.c b/gcc/cp/parser.c
index e946d0b72292..f93b3bb21307 100644
--- a/gcc/cp/parser.c
+++ b/gcc/cp/parser.c
@@ -15856,6 +15856,13 @@ cp_parser_template_id (cp_parser *parser,
&& (template_id = (cp_parser_maybe_partial_concept_id
  (parser, templ, arguments
 return template_id;
+  else if (flag_concepts
+  && (template_id = type_uses_auto (arguments)))
+{
+  error_at (DECL_SOURCE_LOCATION (TEMPLATE_TYPE_DECL (template_id)),
+   "invalid use of % in template argument");
+  return error_mark_node;
+}
   else if (variable_template_p (templ))
 {
   template_id = lookup_template_variable (templ, arguments);


I guess this means we don't want to fail to parse the thing as what it
is and force the parser to try other things just because there's an auto
in the arglist.  Parsing it as what it is and then catching it later,
like I proposed before, seems to be working better.  It's not a parse
error, after all, it's rather a constraint on what otherwise legitimate
template arguments might contain.

Can you elaborate on any reasons to not proceed with the patch I
proposed?  (if the reason is the failure to call finish_id_expression
when tsubsting TEMPLATE_ID_EXPRs with identifiers in them, maybe we can
have the test there as well?)

-- 
Alexandre Oliva, freedom fighterhttp://FSFLA.org/~lxoliva/
You must be the change you wish to see in the world. -- Gandhi
Be Free! -- http://FSFLA.org/   FSF Latin America board member
Free Software Evangelist|Red Hat Brasil GNU Toolchain Engineer


[PATCH] rs6000: Fix _mm_min_ps and _mm_max_ps (PR83315)

2018-03-30 Thread Segher Boessenkool
This makes _mm_{min,max}_ps work correctly for QNaNs.

Tested on powerpc64le-linux; committing.


Segher


2018-03-31  Segher Boessenkool  

PR target/83315
* config/rs6000/xmmintrin.h (_mm_set_ps, _mm_max_ps): Handle (quiet)
NaN inputs correctly.

gcc/testsuite/
PR target/83315
* gcc.target/powerpc/sse-maxps-2.c: New test.
* gcc.target/powerpc/sse-minps-2.c: New test.

---
 gcc/config/rs6000/xmmintrin.h  |  6 ++--
 gcc/testsuite/gcc.target/powerpc/sse-maxps-2.c | 43 ++
 gcc/testsuite/gcc.target/powerpc/sse-minps-2.c | 43 ++
 3 files changed, 90 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/sse-maxps-2.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/sse-minps-2.c

diff --git a/gcc/config/rs6000/xmmintrin.h b/gcc/config/rs6000/xmmintrin.h
index 2cf2bf2..aa2823f 100644
--- a/gcc/config/rs6000/xmmintrin.h
+++ b/gcc/config/rs6000/xmmintrin.h
@@ -438,13 +438,15 @@ _mm_max_ss (__m128 __A, __m128 __B)
 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, 
__artificial__))
 _mm_min_ps (__m128 __A, __m128 __B)
 {
-  return ((__m128)vec_min ((__v4sf)__A,(__v4sf) __B));
+  __m128 m = (__m128) vec_vcmpgtfp ((__v4sf) __B, (__v4sf) __A);
+  return vec_sel (__B, __A, m);
 }
 
 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, 
__artificial__))
 _mm_max_ps (__m128 __A, __m128 __B)
 {
-  return ((__m128)vec_max ((__v4sf)__A, (__v4sf)__B));
+  __m128 m = (__m128) vec_vcmpgtfp ((__v4sf) __A, (__v4sf) __B);
+  return vec_sel (__B, __A, m);
 }
 
 /* Perform logical bit-wise operations on 128-bit values.  */
diff --git a/gcc/testsuite/gcc.target/powerpc/sse-maxps-2.c 
b/gcc/testsuite/gcc.target/powerpc/sse-maxps-2.c
new file mode 100644
index 000..5cf9c3f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/sse-maxps-2.c
@@ -0,0 +1,43 @@
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#define NO_WARN_X86_INTRINSICS 1
+
+#ifndef CHECK_H
+#define CHECK_H "sse-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse_test_maxps_2
+#endif
+
+#include 
+
+static __m128
+__attribute__((noinline, unused))
+test (__m128 s1, __m128 s2)
+{
+  return _mm_max_ps (s1, s2);
+}
+
+static void
+TEST (void)
+{
+  union128 u, s1, s2;
+  float e[4];
+  int i;
+
+  s1.x = _mm_set_ps (24.43, __builtin_nanf("1"), __builtin_nanf("2"), 546.46);
+  s2.x = _mm_set_ps (__builtin_nanf("3"), __builtin_nanf("4"), 3.15, 4.14);
+  u.x = test (s1.x, s2.x);
+
+  for (i = 0; i < 4; i++)
+e[i] = s1.a[i] > s2.a[i] ? s1.a[i] : s2.a[i];
+
+  if (__builtin_memcmp (, e, 16))
+abort ();
+}
diff --git a/gcc/testsuite/gcc.target/powerpc/sse-minps-2.c 
b/gcc/testsuite/gcc.target/powerpc/sse-minps-2.c
new file mode 100644
index 000..4cb4b73
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/sse-minps-2.c
@@ -0,0 +1,43 @@
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#define NO_WARN_X86_INTRINSICS 1
+
+#ifndef CHECK_H
+#define CHECK_H "sse-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse_test_minps_2
+#endif
+
+#include 
+
+static __m128
+__attribute__((noinline, unused))
+test (__m128 s1, __m128 s2)
+{
+  return _mm_min_ps (s1, s2);
+}
+
+static void
+TEST (void)
+{
+  union128 u, s1, s2;
+  float e[4];
+  int i;
+
+  s1.x = _mm_set_ps (24.43, __builtin_nanf("1"), __builtin_nanf("2"), 546.46);
+  s2.x = _mm_set_ps (__builtin_nanf("3"), __builtin_nanf("4"), 3.15, 4.14);
+  u.x = test (s1.x, s2.x);
+
+  for (i = 0; i < 4; i++)
+e[i] = s1.a[i] < s2.a[i] ? s1.a[i] : s2.a[i];
+
+  if (__builtin_memcmp (, e, 16))
+abort ();
+}
-- 
1.8.3.1



Re: [PATCH, rs6000] Fix PR80546: FAIL: gcc.target/powerpc/bool3-p[78].c scan-assembler-not

2018-03-30 Thread Segher Boessenkool
Hi!

On Fri, Mar 30, 2018 at 02:29:59PM -0500, Peter Bergner wrote:
> +;; A mode attribute to disparage use of GPR registers, except for scalar
> +;; interger modes.

Typo ("integer").

> +(define_mode_attr ??r[(V16QI "??r")
> +  (V8HI  "??r")
> +  (V4SI  "??r")
> +  (V4SF  "??r")
> +  (V2DI  "??r")
> +  (V2DF  "??r")
> +  (DI"r")
> +  (DF"??r")
> +  (SF"??r")
> +  (V1TI  "??r")
> +  (TI"r")
> +  (TF"??r")
> +  (KF"??r")])

Ugly :-)

DI cannot happen, just remove it (it is not part of VSX_M).  Neither
DF or SF.

> @@ -1200,7 +1216,7 @@ (define_insn_and_split "*xxspltib_
>  (define_insn "*vsx_mov_64bit"
>[(set (match_operand:VSX_M 0 "nonimmediate_operand"
> "=ZwO,  , , r, we,?wQ,
> -?,   ??r,   ??Y,   ??r,   wo,v,
> +?,   ??r,   ??Y,   , wo,v,
>  ?,*r,v, ??r,   wZ,v")
>  
>   (match_operand:VSX_M 1 "input_operand" 
> @@ -1229,7 +1245,7 @@ (define_insn "*vsx_mov_64bit"
>  ;;  LVX (VMX)  STVX (VMX)
>  (define_insn "*vsx_mov_32bit"
>[(set (match_operand:VSX_M 0 "nonimmediate_operand"
> -   "=ZwO,  , , ??r,   ??Y,   ??r,
> +   "=ZwO,  , , ??r,   ??Y,   ,
>  wo,v, ?,*r,v, ??r,
>  wZ,v")

We should give the same treatment to (most or all of) the other ??r in
those patterns, or more likely, split TI off to a separate pattern.

But okay (with the fixes), and okay for 7.  Thanks!


Segher


Re: [PATCH, rs6000] Tidy implementation of vec_ldl

2018-03-30 Thread Segher Boessenkool
Hi!

On Thu, Mar 29, 2018 at 09:47:42AM -0500, Kelvin Nilsen wrote:
> In summary, this patch removes two prototypes:
> 
>   vector int vec_ldl (int, long int *)
>   vector unsigned int vec_ldl (int, unsigned long int *)
> 
> and adds eight:
> 
>   vector bool char vec_ldl (int, bool char *)
>   vector bool short vec_ldl (int, bool short *)
>   vector bool int vec_ldl (int, bool int *)
>   vector bool long long vec_ldl (int, bool long long *)
>   vector pixel vec_ldl (int, pixel *)

Is "pixel" a type?  "vector pixel" is, but plain "pixel"?

>   vector long long vec_ldl (int, long long *)
>   vector unsigned long long vec_ldl (int, unsigned long long *)

>     * config/rs6000/rs6000-c.c (altivec_overloaded_builtins): Remove
>     erroneous entries for
>     "vector int vec_ldl (int, long int *)", and
>     "vector    unsigned int vec_ldl (int, unsigned long int *)".

Whitespace is weird.  I'll just assume this is how you pasted it into
the mail and it will be correct in the changelog you commit.

> @@ -32855,7 +32855,7 @@ rs6000_mangle_type (const_tree type)
>    if (type == bool_short_type_node) return "U6__bools";
>    if (type == pixel_type_node) return "u7__pixel";
>    if (type == bool_int_type_node) return "U6__booli";
> -  if (type == bool_long_type_node) return "U6__booll";
> +  if (type == bool_long_long_type_node) return "U6__booll";

The mangled name now still says it is "bool long", not "bool long long".
Can you do this?  At the very least it needs a comment.

> --- gcc/config/rs6000/rs6000-c.c    (revision 258800)
> +++ gcc/config/rs6000/rs6000-c.c    (working copy)
> @@ -1656,29 +1656,50 @@ const struct altivec_builtin_types altivec_overloa
>  RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI, 0 },
>    { ALTIVEC_BUILTIN_VEC_LVEBX, ALTIVEC_BUILTIN_LVEBX,
>  RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI, 0 
> },
> +
> +  /* vector float vec_ldl (int, vector float *);
> +   * vector float vec_ldl (int, float *); */

No leading * on comment continuation lines.

> --- gcc/testsuite/gcc.target/powerpc/vec-ldl-1.c (nonexistent)
> +++ gcc/testsuite/gcc.target/powerpc/vec-ldl-1.c    (revision 258889)
> @@ -0,0 +1,214 @@
> +/* { dg-do run { target powerpc*-*-* } } */
> +/* { dg-require-effective-target vmx_hw } */
> +/* { dg-require-effective-target lp64 } */
> +/* { dg-options "-maltivec -O0 -Wall" } */

Does this need lp64?  Same question for the other tests.


Segher


Re: [C++ PATCH] P0329R4: Designated Initialization

2018-03-30 Thread Jason Merrill
On Mon, Nov 13, 2017 at 5:53 PM, Jakub Jelinek  wrote:
> +   /* If the element is an anonymous union object and the
> +  initializer list is a designated-initializer-list, the
> +  anonymous union object is initialized by the
> +  designated-initializer-list { D }, where D is the
> +  designated-initializer-clause naming a member of the
> +  anonymous union object.  */
> +   next = build_constructor_single (type, ce->index, ce->value);

We need to use init_list_type_node for the CONSTRUCTOR so that
digest_init works properly.  Applying to trunk.
commit 9c34bc519821ca90cb37dce05744f404bb3a9991
Author: Jason Merrill 
Date:   Fri Mar 30 15:30:45 2018 -0400

Fix designated initializer for anonymous union.

* typeck2.c (process_init_constructor_record): Use
init_list_type_node for the CONSTRUCTOR around an anonymous union
designated initializer.

diff --git a/gcc/cp/typeck2.c b/gcc/cp/typeck2.c
index 464e8a7c3b1..3aae0a362d5 100644
--- a/gcc/cp/typeck2.c
+++ b/gcc/cp/typeck2.c
@@ -1435,7 +1435,8 @@ process_init_constructor_record (tree type, tree init, int nested,
 		   designated-initializer-list { D }, where D is the
 		   designated-initializer-clause naming a member of the
 		   anonymous union object.  */
-		next = build_constructor_single (type, ce->index, ce->value);
+		next = build_constructor_single (init_list_type_node,
+		 ce->index, ce->value);
 	  else
 		{
 		  ce = NULL;
diff --git a/gcc/testsuite/g++.dg/cpp2a/desig9.C b/gcc/testsuite/g++.dg/cpp2a/desig9.C
new file mode 100644
index 000..990a2aa20a5
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp2a/desig9.C
@@ -0,0 +1,13 @@
+// { dg-do compile }
+// { dg-options "" }
+
+struct A {
+  union {
+int a;
+char b;
+  };
+};
+
+struct A x = {
+  .a = 5,
+};


[PATCH, rs6000] Fix PR80546: FAIL: gcc.target/powerpc/bool3-p[78].c scan-assembler-not

2018-03-30 Thread Peter Bergner
Currently, the vwx_mov_* move patterns diparage use of GPR registers.
This tends to force all modes handled by the move patterns to prefer using
VSX registers, even in cases where it doesn't make sense (ie, TImode).
The bool3-p[78].c:ptr() test cases are such an example.  The following patch
is a minimal fix to get test cases PASSing again, by not disparaging GPR
usage if the modes are DImode or TImode.

Comparing before & after for the ptr4() test case, we now see:

-   mtvsrd 0,10
-   mtvsrd 1,11
-   xxpermdi 12,0,1,0
-   xxlnor 0,12,12
-   mfvsrd 10,0
-   xxpermdi 0,0,0,3
-   mfvsrd 11,0
+   not 10,10
+   not 11,11

The plan is to revisit these move patterns again in GCC9 to see if more
cleanup and enhancements can be made, but it's too late for that in GCC8.

This passed bootstrap and regtesting on both powerpc64le-linux and
powerpc64-linux with no regressions.  Mike also did a spec comparison with
the patch and only found two benchmarks (povray and xalancbmk) with any
code differences.  The -mcpu=power8 changes for both benchmarks we mostly
just address differences.  The -mcpu=power9 differences did show a few
cases where we generate better code than before (ie, we don't load values
into VSX registers just use a direct move them into a GPR register).

Is this ok for trunk?  This is marked as a GCC7 and GCC8 regression,
so ok for GCC 7 too after it has baked on trunk for a while?

Peter

PR target/80546
* config/rs6000/vsx.md (??r): New mode attribute.
(*vsx_mov_64bit): Use it.
(*vsx_mov_32bit): Likewise.

Index: gcc/config/rs6000/vsx.md
===
--- gcc/config/rs6000/vsx.md(revision 258802)
+++ gcc/config/rs6000/vsx.md(working copy)
@@ -170,6 +170,22 @@ (define_mode_attr VSa  [(V16QI "wa")
 (TF"wp")
 (KF"wq")])
 
+;; A mode attribute to disparage use of GPR registers, except for scalar
+;; interger modes.
+(define_mode_attr ??r  [(V16QI "??r")
+(V8HI  "??r")
+(V4SI  "??r")
+(V4SF  "??r")
+(V2DI  "??r")
+(V2DF  "??r")
+(DI"r")
+(DF"??r")
+(SF"??r")
+(V1TI  "??r")
+(TI"r")
+(TF"??r")
+(KF"??r")])
+
 ;; Same size integer type for floating point data
 (define_mode_attr VSi [(V4SF  "v4si")
   (V2DF  "v2di")
@@ -1200,7 +1216,7 @@ (define_insn_and_split "*xxspltib_
 (define_insn "*vsx_mov_64bit"
   [(set (match_operand:VSX_M 0 "nonimmediate_operand"
"=ZwO,  , , r, we,?wQ,
-?,   ??r,   ??Y,   ??r,   wo,v,
+?,   ??r,   ??Y,   , wo,v,
 ?,*r,v, ??r,   wZ,v")
 
(match_operand:VSX_M 1 "input_operand" 
@@ -1229,7 +1245,7 @@ (define_insn "*vsx_mov_64bit"
 ;;  LVX (VMX)  STVX (VMX)
 (define_insn "*vsx_mov_32bit"
   [(set (match_operand:VSX_M 0 "nonimmediate_operand"
-   "=ZwO,  , , ??r,   ??Y,   ??r,
+   "=ZwO,  , , ??r,   ??Y,   ,
 wo,v, ?,*r,v, ??r,
 wZ,v")
 



C++ PATCH for c++/85093, excess args accepted with pack expansion

2018-03-30 Thread Jason Merrill
Here an earlier fix of mine related to -fnew-ttp-matching caused us to
silently discard a pack expansion argument that will be ill-formed if
it's ever non-empty.  It would be correct to reject this template
definition immediately, since a variadic template that is well-formed
only for empty packs is ill-formed (no diagnostic required), but my
attempts to do that while still allowing variadic-ttp3a.C have run
into trouble, so I don't think this is the time to keep poking at it.

But if we are going to allow this in the template, we need to remember
the pack expansion argument so we can reject it as needed at
instantiation time.

Tested x86_64-pc-linux-gnu, applied to trunk.
commit 8aaa042bd0a97034d42262ff0efeec9979f13fbe
Author: Jason Merrill 
Date:   Tue Mar 27 19:18:19 2018 -0400

PR c++/85093 - too many template args with pack expansion.
* pt.c (coerce_template_parms): Keep pack expansion args that will
need to be empty.

diff --git a/gcc/cp/pt.c b/gcc/cp/pt.c
index 40ddf9ec989..284eaf3cab6 100644
--- a/gcc/cp/pt.c
+++ b/gcc/cp/pt.c
@@ -8497,6 +8497,22 @@ coerce_template_parms (tree parms,
   goto bad_nargs;
 }
 
+  if (arg_idx < nargs)
+{
+  /* We had some pack expansion arguments that will only work if the packs
+	 are empty, but wait until instantiation time to complain.
+	 See variadic-ttp3.C.  */
+  int len = nparms + (nargs - arg_idx);
+  tree args = make_tree_vec (len);
+  int i = 0;
+  for (; i < nparms; ++i)
+	TREE_VEC_ELT (args, i) = TREE_VEC_ELT (new_inner_args, i);
+  for (; i < len; ++i, ++arg_idx)
+	TREE_VEC_ELT (args, i) = TREE_VEC_ELT (inner_args,
+	   arg_idx - pack_adjust);
+  new_inner_args = args;
+}
+
   if (lost)
 {
   gcc_assert (!(complain & tf_error) || seen_error ());
diff --git a/gcc/testsuite/g++.dg/cpp0x/variadic-empty1.C b/gcc/testsuite/g++.dg/cpp0x/variadic-empty1.C
new file mode 100644
index 000..42daeaa3dcb
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp0x/variadic-empty1.C
@@ -0,0 +1,13 @@
+// PR c++/85093
+// { dg-do compile { target c++11 } }
+
+template class A {};
+
+template class B {
+  typedef A AB;		// { dg-error "arguments" }
+  AB ab;
+};
+
+int main() {
+  B b;  
+}


[PATCH] Improve avx512{f,bw} vec_set (PR middle-end/85090)

2018-03-30 Thread Jakub Jelinek
Hi!

The code we emit on the following testcases is really terrible, with both
-mavx512f -mno-avx512bw as well as -mavx512bw, rather than doing e.g.
vpinsrw $0, %edi, %xmm0, %xmm1
vinserti32x4$0, %xmm1, %zmm0, %zmm0
when trying to insert into low 128-bits or
vextracti32x4   $1, %zmm0, %xmm1
vpinsrw $3, %edi, %xmm1, %xmm1
vinserti32x4$1, %xmm1, %zmm0, %zmm0
when trying to insert into other 128-bits, we emit:
pushq   %rbp
vmovq   %xmm0, %rax
movzwl  %di, %edi
xorw%ax, %ax
movq%rsp, %rbp
orq %rdi, %rax
andq$-64, %rsp
vmovdqa64   %zmm0, -64(%rsp)
movq%rax, -64(%rsp)
vmovdqa64   -64(%rsp), %zmm0
leave
and furthermore there is some RA bug it triggers, so we miscompile it.
All this is because while at least for AVX512BW we have ix86_expand_vector_set
implemented for V64QImode and V32QImode, we actually don't have a pattern
for it.  Fixed by adding those modes to V, which is used only by this
vec_set pattern and some xop pattern, which is misusing it anyway (it really
can't handle 512-bit vectors, so could result in assembly failures etc.
with -mxop -mavx512f).  Furthermore, for AVX512F we can use the above
extraction/insertion/insertion sequence, similarly to what we do for 256-bit
insertions, just we have halves there rather than quarters.
The splitters are added to match what vec_set_lo_* define_insn_and_split do,
if we are trying to extract low 128-bit lane of a 512-bit vector, we really
don't need vextracti32x4 instruction, we can use simple move or even nothing
at all (if source and destination are the same register, or post RA register
renaming can arrange that).

The RA bug still should be fixed, but at least this patch makes it latent
and improves a lot the code.  Bootstrapped/regtested on x86_64-linux and
i686-linux, ok for trunk? 

2018-03-30  Jakub Jelinek  

PR middle-end/85090
* config/i386/sse.md (V): Add V64QI and V32HI for TARGET_AVX512F.
(V_128_256): New mode iterator.
(*avx512dq_vextract64x2_1 splitter): New define_split.
(*avx512f_vextract32x4_1 splitter): Likewise.
(xop_pcmov_): Use V_128_256 mode iterator instead
of V.
* config/i386/i386.c (ix86_expand_vector_set): Improve V32HImode and
V64QImode expansion for !TARGET_AVX512BW && TARGET_AVX512F.

* gcc.target/i386/avx512f-pr85090-1.c: New test.
* gcc.target/i386/avx512f-pr85090-2.c: New test.
* gcc.target/i386/avx512f-pr85090-3.c: New test.
* gcc.target/i386/avx512bw-pr85090-2.c: New test.
* gcc.target/i386/avx512bw-pr85090-3.c: New test.

--- gcc/config/i386/sse.md.jj   2018-03-29 12:38:32.088512220 +0200
+++ gcc/config/i386/sse.md  2018-03-30 14:56:19.949549653 +0200
@@ -229,8 +229,8 @@ (define_mode_iterator VI1_AVX512VL
 
 ;; All vector modes
 (define_mode_iterator V
-  [(V32QI "TARGET_AVX") V16QI
-   (V16HI "TARGET_AVX") V8HI
+  [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
+   (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
(V8DI "TARGET_AVX512F")  (V4DI "TARGET_AVX") V2DI
(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
@@ -244,6 +244,10 @@ (define_mode_iterator V_128
 (define_mode_iterator V_256
   [V32QI V16HI V8SI V4DI V8SF V4DF])
 
+;; All 128bit and 256bit vector modes
+(define_mode_iterator V_128_256
+  [V32QI V16QI V16HI V8HI V8SI V4SI V4DI V2DI V8SF V4SF V4DF V2DF])
+
 ;; All 512bit vector modes
 (define_mode_iterator V_512 [V64QI V32HI V16SI V8DI V16SF V8DF])
 
@@ -7351,6 +7355,15 @@ (define_insn "avx512dq_vex
(set_attr "prefix" "evex")
(set_attr "mode" "")])
 
+(define_split
+  [(set (match_operand: 0 "nonimmediate_operand")
+   (vec_select:
+ (match_operand:V8FI 1 "register_operand")
+ (parallel [(const_int 0) (const_int 1)])))]
+  "TARGET_AVX512DQ && reload_completed"
+  [(set (match_dup 0) (match_dup 1))]
+  "operands[1] = gen_lowpart (mode, operands[1]);")
+
 (define_insn "avx512f_vextract32x4_1"
   [(set (match_operand: 0 "" 
"=")
(vec_select:
@@ -7374,6 +7387,16 @@ (define_insn "avx512f_vext
(set_attr "prefix" "evex")
(set_attr "mode" "")])
 
+(define_split
+  [(set (match_operand: 0 "nonimmediate_operand")
+   (vec_select:
+ (match_operand:V16FI 1 "register_operand")
+ (parallel [(const_int 0) (const_int 1)
+(const_int 2) (const_int 3)])))]
+  "TARGET_AVX512F && reload_completed"
+  [(set (match_dup 0) (match_dup 1))]
+  "operands[1] = gen_lowpart (mode, operands[1]);")
+
 (define_mode_attr extract_type_2
   [(V16SF "avx512dq") (V16SI "avx512dq") (V8DF "avx512f") (V8DI "avx512f")])
 
@@ -16478,11 +16501,11 @@ (define_insn "xop_pwd"
 
 ;; XOP parallel XMM conditional moves
 (define_insn "xop_pcmov_"
-  [(set (match_operand:V 0 "register_operand" "=x,x")
-   

[committed] Fix OpenMP C++ reduction error recovery (PR c++/84791)

2018-03-30 Thread Jakub Jelinek
Hi!

The following patch removes errorneous reduction clause already during
parsing, so that we don't ICE on it during instantiation.

Bootstrapped/regtested on x86_64-linux and i686-linux, committed to trunk.

2018-03-30  Jakub Jelinek  

PR c++/84791
* semantics.c (finish_omp_reduction_clause): If
OMP_CLAUSE_REDUCTION_PLACEHOLDER is error_mark_node, return true
even if processing_template_decl.

* g++.dg/gomp/pr84791.C: New test.

--- gcc/cp/semantics.c.jj   2018-03-23 21:33:56.279959167 +0100
+++ gcc/cp/semantics.c  2018-03-29 14:42:05.759995585 +0200
@@ -5623,7 +5623,11 @@ finish_omp_reduction_clause (tree c, boo
   return false;
 }
   else if (processing_template_decl)
-return false;
+{
+  if (OMP_CLAUSE_REDUCTION_PLACEHOLDER (c) == error_mark_node)
+   return true;
+  return false;
+}
 
   tree id = OMP_CLAUSE_REDUCTION_PLACEHOLDER (c);
 
--- gcc/testsuite/g++.dg/gomp/pr84791.C.jj  2018-03-29 14:35:04.34216 
+0200
+++ gcc/testsuite/g++.dg/gomp/pr84791.C 2018-03-29 14:37:17.491839303 +0200
@@ -0,0 +1,15 @@
+// PR c++/84791
+// { dg-do compile }
+
+typedef int I;
+
+template 
+void
+foo ()
+{
+  I i;
+  #pragma omp parallel reduction (I::I: i) // { dg-error "'I' is not a 
class, namespace, or enumeration" "" { target c++11 } }
+;  // { dg-error "'I' is not a 
class or namespace" "" { target c++98_only } .-1 }
+}
+
+template void foo<0> ();

Jakub


Re: [PATCH] i386: Enable AVX/AVX512 features only if supported by OSXSAVE

2018-03-30 Thread Ilya Verbin
2018-03-30 20:56 GMT+03:00 H.J. Lu :
> On Fri, Mar 30, 2018 at 10:19 AM, Ilya Verbin  wrote:
>> This check will always disable AVX-512 on macOS, because they
>> implemented on-demand support:
>> https://github.com/apple/darwin-xnu/blob/0a798f6738bc1db01281fc08ae024145e84df927/osfmk/i386/fpu.c#L176
>>
>
> Isn't xsaveopt designed for this?

Maybe the goal was to reduce the size of the area allocated by default
for each thread.

> --
> H.J.

  -- Ilya


Re: [PATCH] i386: Enable AVX/AVX512 features only if supported by OSXSAVE

2018-03-30 Thread H.J. Lu
On Fri, Mar 30, 2018 at 10:19 AM, Ilya Verbin  wrote:
> This check will always disable AVX-512 on macOS, because they
> implemented on-demand support:
> https://github.com/apple/darwin-xnu/blob/0a798f6738bc1db01281fc08ae024145e84df927/osfmk/i386/fpu.c#L176
>

Isn't xsaveopt designed for this?

-- 
H.J.


Re: [PATCH] i386: Enable AVX/AVX512 features only if supported by OSXSAVE

2018-03-30 Thread Ilya Verbin
This check will always disable AVX-512 on macOS, because they
implemented on-demand support:
https://github.com/apple/darwin-xnu/blob/0a798f6738bc1db01281fc08ae024145e84df927/osfmk/i386/fpu.c#L176

(I'm not against this change, just for information).

2018-03-29 16:05 GMT+03:00 Uros Bizjak :
> On Thu, Mar 29, 2018 at 2:43 PM, H.J. Lu  wrote:
>> Enable AVX and AVX512 features only if their states are supported by
>> OSXSAVE.

  -- Ilya


[PATCH, committed] Update my MAINTAINERS entries

2018-03-30 Thread Bill Schmidt
Just updating my email address and making it a little clearer which is which
between Will Schmidt and me.  Committed.

2018-03-30  Bill Schmidt  

* MAINTAINERS: Update my email address and disambiguate myself
a bit from Will Schmidt.

Index: MAINTAINERS
===
--- MAINTAINERS (revision 258979)
+++ MAINTAINERS (working copy)
@@ -244,7 +244,7 @@ testsuite   Rainer Orth 

 register allocationVladimir Makarov
 gdbhooks.pyDavid Malcolm   
-SLSR   Bill Schmidt
+SLSR   Bill Schmidt
 jitDavid Malcolm   
 pointer bounds checker Ilya Enkovich   
 i386 MPX   Ilya Enkovich   
@@ -570,8 +570,8 @@ Sujoy Saraswati 

 Trevor Saunders

 Aaron Sawdey   
 Roger Sayle
+Bill Schmidt   
 Will Schmidt   
-William Schmidt

 Tilo Schwarz   
 Martin Sebor   
 Svein Seldal   



Re: [PATCH][GCC][mid-end] Allow larger copies when target supports unaligned access [Patch (1/2)]

2018-03-30 Thread Peter Bergner
On 3/29/18 9:35 AM, Alan Modra wrote:
> On Thu, Nov 16, 2017 at 03:27:01PM +, Tamar Christina wrote:
>> --- a/gcc/expr.c
>> +++ b/gcc/expr.c
>> @@ -2769,7 +2769,9 @@ copy_blkmode_to_reg (machine_mode mode, tree src)
>>  
>>n_regs = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
>>dst_words = XALLOCAVEC (rtx, n_regs);
>> -  bitsize = MIN (TYPE_ALIGN (TREE_TYPE (src)), BITS_PER_WORD);
>> +  bitsize = BITS_PER_WORD;
>> +  if (targetm.slow_unaligned_access (word_mode, TYPE_ALIGN (TREE_TYPE 
>> (src
>> +bitsize = MIN (TYPE_ALIGN (TREE_TYPE (src)), BITS_PER_WORD);
>>  
>>/* Copy the structure BITSIZE bits at a time.  */
>>for (bitpos = 0, xbitpos = padding_correction;
> 
> I believe this patch is wrong.  Please revert.  See the PR84762 testcase.
> 
> There are two problems.  Firstly, if padding_correction is non-zero,
> then xbitpos % BITS_PER_WORD is non-zero and in
> 
>   store_bit_field (dst_word, bitsize, xbitpos % BITS_PER_WORD,
>  0, 0, word_mode,
>  extract_bit_field (src_word, bitsize,
> bitpos % BITS_PER_WORD, 1,
> NULL_RTX, word_mode, word_mode,
> false, NULL),
>  false);
> 
> the stored bit-field exceeds the destination register size.  You could
> fix that by making bitsize the gcd of bitsize and padding_correction.
> 
> However, that doesn't fix the second problem which is that the
> extracted bit-field can exceed the source size.  That will result in
> rubbish being read into a register.

FYI, I received an automated response saying Tamar is away on vacation
with no return date specified.  That means he won't be able to revert
the patch.  What do we do now?

Peter




Re: [PATCH][GCC][mid-end] Allow larger copies when target supports unaligned access [Patch (1/2)]

2018-03-30 Thread Peter Bergner
On 3/29/18 9:35 AM, Alan Modra wrote:
> On Thu, Nov 16, 2017 at 03:27:01PM +, Tamar Christina wrote:
>> --- a/gcc/expr.c
>> +++ b/gcc/expr.c
>> @@ -2769,7 +2769,9 @@ copy_blkmode_to_reg (machine_mode mode, tree src)
>>  
>>n_regs = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
>>dst_words = XALLOCAVEC (rtx, n_regs);
>> -  bitsize = MIN (TYPE_ALIGN (TREE_TYPE (src)), BITS_PER_WORD);
>> +  bitsize = BITS_PER_WORD;
>> +  if (targetm.slow_unaligned_access (word_mode, TYPE_ALIGN (TREE_TYPE 
>> (src
>> +bitsize = MIN (TYPE_ALIGN (TREE_TYPE (src)), BITS_PER_WORD);
>>  
>>/* Copy the structure BITSIZE bits at a time.  */
>>for (bitpos = 0, xbitpos = padding_correction;
> 
> I believe this patch is wrong.  Please revert.  See the PR84762 testcase.

I'm seeing this patch cause an error too on different test case:

  https://gcc.gnu.org/bugzilla/show_bug.cgi?id=85123#c2

I've closed PR85123 as a dup of PR84762, even though the test case
in PR84762 is failing for -m32 while the test case in PR85123 is
failing for -m64, since they're both caused by the same bug.

Peter



Re: [og7] vector_length extension part 2: Generalize state propagation and synchronization

2018-03-30 Thread Tom de Vries

On 03/30/2018 05:00 PM, Cesar Philippidis wrote:

I should
have checked that patch with the vector length fallback disabled.


Right. The patch series introduces a lot of code that is not exercised.

I've added an -mlong-vector-in-workers option in my local branch and 
added 3 test-cases to exercise the code with fallback disabled everytime 
I run the libgomp tests.


Thanks,
- Tom


Re: [PATCH] [PR c++/85039] no type definitions in builtin offsetof

2018-03-30 Thread Jason Merrill
On Fri, Mar 30, 2018 at 3:55 AM, Alexandre Oliva  wrote:
> Types defined within a __builtin_offsetof argument don't always get
> properly recorded as members of their context types, so if they're
> anonymous, we may fail to assign them an anon type index for mangling
> and ICE.
>
> We shouldn't allow types to be introduced in __builtin_offsetof, I
> think, so I've arranged for us to reject them.  Even then, we still
> parse the definitions and attempt to assign mangled names to its
> member functions, so the ICE remains.  Since we've already reported an
> error, we might as well complete the name assignment with an arbitrary
> index, thus avoiding the ICE.
>
>
> Regstrapped on i686- and x86_64-linux-gnu, regressing
> g++.dg/parse/semicolon3.C, which defines a (named) struct in
> builtin_offsetof.  I suppose this means I should look for another
> solution that doesn't involve rejecting these definitions, eh?

Hmm, I'm afraid so.  The C standard defines offsetof in terms of a
notional declaration

static /type/ t;

and a type definition is allowed in such a declaration.

Incidentally, it would be nice to replace all the
type_definition_forbidden stuff with defining-type-specifier as per DR
2141... (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=50169)

Jason


Re: [og7] vector_length extension part 2: Generalize state propagation and synchronization

2018-03-30 Thread Cesar Philippidis
On 03/30/2018 07:45 AM, Tom de Vries wrote:
> On 03/30/2018 03:07 AM, Tom de Vries wrote:
>> On 03/02/2018 05:55 PM, Cesar Philippidis wrote:
>>> As a follow up patch will show, the nvptx BE falls back to using
>>> vector_length = 32 when a vector loop is nested inside a worker loop.
>>
>> I disabled the fallback, and analyzed the vred2d-128.c illegal memory
>> access execution failure.
>>
>> I minimized that down to this ptx:
>> ...
>> .shared .align 8 .u8 __oacc_bcast[176];
>>
>> {
>>    {
>>  .reg .u32 %x;
>>  mov.u32 %x,%tid.x;
>>  setp.ne.u32 %r86,%x,0;
>>    }
>>
>>    {
>>  .reg .u32 %tidy;
>>  .reg .u64 %t_bcast;
>>  .reg .u64 %y64;
>>  mov.u32 %tidy,%tid.y;
>>  cvt.u64.u32 %y64,%tidy;
>>  add.u64 %y64,%y64,1;
>>  cvta.shared.u64 %t_bcast,__oacc_bcast;
>>  mad.lo.u64 %r66,%y64,88,%t_bcast;
>>    }
>>
>>    @ %r86 bra $L28;
>>    st.u32 [%r66+80],0;
>>   $L28:
>>    ret;
>> }
>> ...
>>
>> The ptx is called with 2 workers and 128 vector_length.
>>
>> So, 2 workers mean %tid.y has values 0 and 1.
>> Then %y64 has values 1 and 2.
>> Then %r66 has values __oacc_bcast + (1 * 88) and __oacc_bcast + (2 * 88).
>> Then the st.u32 accesss __oacc_bcast + (1 * 88) + 80 and __oacc_bcast
>> + (2 * 88) + 80.
>>
>> So we're accessing memory at location 256, while the __oacc_bcast is
>> only 176 bytes big.
>>
>> I formulated this assert that AFAIU detects this situation in the
>> compiler:
>> ...
>> @@ -1125,6 +1125,8 @@ nvptx_init_axis_predicate (FILE *file, int
>> regno, const char *name)
>>     fprintf (file, "\t}\n");
>>   }
>>
>> +static int nvptx_mach_max_workers ();
>> +
>>   /* Emit code to initialize OpenACC worker broadcast and synchronization
>>  registers.  */
>>
>> @@ -1148,6 +1150,7 @@ nvptx_init_oacc_workers (FILE *file)
>>     "// vector broadcast offset\n",
>>     REGNO (cfun->machine->bcast_partition),
>>     oacc_bcast_partition);
>> +  gcc_assert (oacc_bcast_partition * (nvptx_mach_max_workers () +
>> 1) <= oacc_bcast_size);
>>   }
>>     if (cfun->machine->sync_bar)
>>   fprintf (file, "\t\tadd.u32\t\t%%r%d, %%tidy, 1; "
>> ...
>>
>> The assert is not triggered when the fallback is used.
> 
> I've tracked the problem down to:
> ...
>> -  if (oacc_bcast_size <
>> data.offset) 
>> 
>> -   oacc_bcast_size =
>> data.offset; 
>>    
>> +  if (oacc_bcast_partition <
>> data.offset) 
>>    
>> +  
>> {
>>  
>> + int psize =
>> data.offset; 
>>    
>> + psize = (psize + oacc_bcast_align - 1) & ~(oacc_bcast_align
>> - 1);    +
>> oacc_bcast_partition =
>> psize;   
>> 
>> + oacc_bcast_size = psize * (nvptx_mach_max_workers () +
>> 1);   +  
>> }
>>  
> 
> ...
> 
> We hit this if clause for a first compiled function, with num_workers(1).
> 
> This sets oacc_bcast_partition and oacc_bcast_size as required for that
> functions.
> 
> Then we hit this if clause for a second compiled function, with
> num_workers (2).
> 
> We need oacc_bcast_size updated, but the 'oacc_bcast_partition <
> data.offset' is false, so the update doesn't happen.
> 
> I managed to fix this by making the code unconditional, and using MAX to
> update oacc_bcast_partition and oacc_bcast_size.

It looks like that's fallout from this patch
. I should
have checked that patch with the vector length fallback disabled.

Cesar


Re: [PATCH] [PR c++/85027] deal with baselink in save_expr in instantiate_type

2018-03-30 Thread Jason Merrill
OK.

On Fri, Mar 30, 2018 at 3:49 AM, Alexandre Oliva  wrote:
> We use SAVE_EXPRs in conditional expressions without the middle
> operand, to evaluate the first operand only once.  When the conversion
> of the first operand fails, we may call instantiate_type get a better
> error message.  We have code to peel off the SAVE_EXPR there, but then
> we may end up with a BASELINK, and we're past the code that deals with
> BASELINKs.  Reorder the tests so that we expose the saved expr first,
> and then deal with BASELINKs.
>
> Regstrapped on i686- and x86_64-linux-gnu.  Ok to install?
>
> for  gcc/cp/ChangeLog
>
> PR c++/85027
> * class.c (instantiate_type): Peel off SAVE_EXPR before
> BASELINK.
>
> for  gcc/testsuite/ChangeLog
>
> PR c++/85027
> * g++.dg/pr85027.C: New.
> ---
>  gcc/cp/class.c |   10 +-
>  gcc/testsuite/g++.dg/pr85027.C |8 
>  2 files changed, 13 insertions(+), 5 deletions(-)
>  create mode 100644 gcc/testsuite/g++.dg/pr85027.C
>
> diff --git a/gcc/cp/class.c b/gcc/cp/class.c
> index debcaf21cf76..0427d1224f74 100644
> --- a/gcc/cp/class.c
> +++ b/gcc/cp/class.c
> @@ -7971,6 +7971,11 @@ instantiate_type (tree lhstype, tree rhs, 
> tsubst_flags_t complain)
> }
>  }
>
> +  /* If we instantiate a template, and it is a A ?: C expression
> + with omitted B, look through the SAVE_EXPR.  */
> +  if (TREE_CODE (rhs) == SAVE_EXPR)
> +rhs = TREE_OPERAND (rhs, 0);
> +
>if (BASELINK_P (rhs))
>  {
>access_path = BASELINK_ACCESS_BINFO (rhs);
> @@ -7986,11 +7991,6 @@ instantiate_type (tree lhstype, tree rhs, 
> tsubst_flags_t complain)
>return error_mark_node;
>  }
>
> -  /* If we instantiate a template, and it is a A ?: C expression
> - with omitted B, look through the SAVE_EXPR.  */
> -  if (TREE_CODE (rhs) == SAVE_EXPR)
> -rhs = TREE_OPERAND (rhs, 0);
> -
>/* There are only a few kinds of expressions that may have a type
>   dependent on overload resolution.  */
>gcc_assert (TREE_CODE (rhs) == ADDR_EXPR
> diff --git a/gcc/testsuite/g++.dg/pr85027.C b/gcc/testsuite/g++.dg/pr85027.C
> new file mode 100644
> index ..01b1b291aecd
> --- /dev/null
> +++ b/gcc/testsuite/g++.dg/pr85027.C
> @@ -0,0 +1,8 @@
> +// { dg-do compile }
> +
> +// Avoid -pedantic-error default
> +// { dg-options "" }
> +
> +struct A { static int a; };
> +
> +int t = A::A ? : 0; // { dg-error "cannot resolve" }
>
>
> --
> Alexandre Oliva, freedom fighterhttp://FSFLA.org/~lxoliva/
> You must be the change you wish to see in the world. -- Gandhi
> Be Free! -- http://FSFLA.org/   FSF Latin America board member
> Free Software Evangelist|Red Hat Brasil GNU Toolchain Engineer


Re: [PATCH] [PR c++/84979] improve auto handling in explicit tmpl args for concepts

2018-03-30 Thread Jason Merrill
On Thu, Mar 29, 2018 at 6:24 PM, Alexandre Oliva  wrote:
> On Mar 28, 2018, Jason Merrill  wrote:
>
>> On Wed, Mar 28, 2018 at 5:06 AM, Alexandre Oliva  wrote:
>>> On Mar 23, 2018, Jason Merrill  wrote:
>>>
 On Fri, Mar 23, 2018 at 3:38 PM, Alexandre Oliva  wrote:
> +  /* Concepts allows 'auto' in template arguments, even multiple
> + 'auto's in a single argument.
>>>
 I think that's only intended for class templates;
>>>
>>> Aah, that explains a lot!  Dang, it was so close!
>>>
>>> It actually makes sense; was there any discussion about standardizing
>>> this, with reasons to reject this construct, or is it just that we
>>> aren't there yet?  I wouldn't be surprised if it were to appear in some
>>> future version of C++.
>
>> If what were to appear?
>
> auto in explicit template arguments for template functions too.
>
>> We only want 'auto' in places where it could be deduced, and there's
>> no way in [temp.deduct.type] to deduce from explicit template
>> arguments to a non-type template.
>
> Well, it could be standardized in some way, e.g. autos could be deduced
> from types of function arguments and expected return types, just like
> function overloads, very much like typename template arguments are
> currently deduced.  Having all explicit autos resolved would be a
> requirement for a specialization to be viable.  Consider:
>
> template  A foo(B b, C c);
>
> ...
>
>   int x1 = foo("", 0); // ok, all args deduced
>   int x2 = foo("", 0); // ok, A is explicit, B and C are deduced
>
>   int x3 = foo("", 0); // B could be deduced as in x2
>   int x4 = foo("", 0); // all args deduced as in x1
>
>   auto x5 = foo("", 0); // deduction of A from context fails
>
> Cases in which auto appears in the top-level are the trivial ones, that
> our type deduction machinery could support right away (it has to; it's
> the same as deducing any typename argument left out).
>
> More involved cases, in which auto appears other than at the top level,
> could still be deduced in general, though we would need some more
> implementation work to get them right:
>
>   int *x6 = foo("", 0);
>   // deduced:   int const charint -> calls foo
>
>
> I seems to me it would be a natural extension to the existing draft
> specs; I guess the most common use would be to just tell the compiler to
> figure out one specific template argument, when you only wish to
> explicitly specify another after that: foo

I suppose that's possible.  Nobody has suggested such a thing to the
committee, however.

>>> Is this (in the patch below) the best spot to test for it?
>
>> This seems like a plausible spot, but is there a reason not to check
>> in cp_parser_template_id?
>
> AFAICT we wouldn't always know, within cp_parser_template_id, whether
> the id is a type or a function.

True, it looks like sometimes we build a TEMPLATE_ID_EXPR with an
IDENTIFIER_NODE.  Looking at tsubst_copy_and_build, I see that we
don't call finish_id_expression when substituting such a
TEMPLATE_ID_EXPR.  So maybe lookup_template_function and
lookup_template_variable are the right places for this test.

Jason


Re: [og7] vector_length extension part 2: Generalize state propagation and synchronization

2018-03-30 Thread Tom de Vries

On 03/30/2018 03:07 AM, Tom de Vries wrote:

On 03/02/2018 05:55 PM, Cesar Philippidis wrote:

As a follow up patch will show, the nvptx BE falls back to using
vector_length = 32 when a vector loop is nested inside a worker loop.


I disabled the fallback, and analyzed the vred2d-128.c illegal memory 
access execution failure.


I minimized that down to this ptx:
...
.shared .align 8 .u8 __oacc_bcast[176];

{
   {
     .reg .u32 %x;
     mov.u32 %x,%tid.x;
     setp.ne.u32 %r86,%x,0;
   }

   {
     .reg .u32 %tidy;
     .reg .u64 %t_bcast;
     .reg .u64 %y64;
     mov.u32 %tidy,%tid.y;
     cvt.u64.u32 %y64,%tidy;
     add.u64 %y64,%y64,1;
     cvta.shared.u64 %t_bcast,__oacc_bcast;
     mad.lo.u64 %r66,%y64,88,%t_bcast;
   }

   @ %r86 bra $L28;
   st.u32 [%r66+80],0;
  $L28:
   ret;
}
...

The ptx is called with 2 workers and 128 vector_length.

So, 2 workers mean %tid.y has values 0 and 1.
Then %y64 has values 1 and 2.
Then %r66 has values __oacc_bcast + (1 * 88) and __oacc_bcast + (2 * 88).
Then the st.u32 accesss __oacc_bcast + (1 * 88) + 80 and __oacc_bcast + 
(2 * 88) + 80.


So we're accessing memory at location 256, while the __oacc_bcast is 
only 176 bytes big.


I formulated this assert that AFAIU detects this situation in the compiler:
...
@@ -1125,6 +1125,8 @@ nvptx_init_axis_predicate (FILE *file, int regno, 
const char *name)

    fprintf (file, "\t}\n");
  }

+static int nvptx_mach_max_workers ();
+
  /* Emit code to initialize OpenACC worker broadcast and synchronization
     registers.  */

@@ -1148,6 +1150,7 @@ nvptx_init_oacc_workers (FILE *file)
    "// vector broadcast offset\n",
    REGNO (cfun->machine->bcast_partition),
    oacc_bcast_partition);
+  gcc_assert (oacc_bcast_partition * (nvptx_mach_max_workers () + 
1) <= oacc_bcast_size);

  }
    if (cfun->machine->sync_bar)
  fprintf (file, "\t\tadd.u32\t\t%%r%d, %%tidy, 1; "
...

The assert is not triggered when the fallback is used.


I've tracked the problem down to:
...
-  if (oacc_bcast_size < data.offset)  
-   oacc_bcast_size = data.offset; 
+  if (oacc_bcast_partition < data.offset) 
+   {  
+ int psize = data.offset; 
+ psize = (psize + oacc_bcast_align - 1) & ~(oacc_bcast_align - 1);
+ oacc_bcast_partition = psize;
+ oacc_bcast_size = psize * (nvptx_mach_max_workers () + 1);   
+   }  

...

We hit this if clause for a first compiled function, with num_workers(1).

This sets oacc_bcast_partition and oacc_bcast_size as required for that 
functions.


Then we hit this if clause for a second compiled function, with 
num_workers (2).


We need oacc_bcast_size updated, but the 'oacc_bcast_partition < 
data.offset' is false, so the update doesn't happen.


I managed to fix this by making the code unconditional, and using MAX to 
update oacc_bcast_partition and oacc_bcast_size.


Thanks,
- Tom


Re: [PATCH] [PR c++/84943] allow folding of array indexing indirect_ref

2018-03-30 Thread Jason Merrill
On Fri, Mar 30, 2018 at 3:48 AM, Alexandre Oliva  wrote:
> On Mar 29, 2018, Alexandre Oliva  wrote:
>
>> Here's a patch that should take care of the marking a namespace-scoped
>> or static member function as used when taking its address, thus working
>> around (fixing?) the reported problem.
>
>> Regstrapping now.  Ok to install if it passes?
>
> It regressed overload/template1.C, because we mark an erroneous template
> specialization as used when we attempt deduction.
>
> The following updated patch avoids that regression, and it has passed
> bootstrap and regression testing on i686- and x86_64-linux-gnu.  Ok to 
> install?
>
> [PR c++/84943] mark function as used when taking its address
>
> fn[0]() ICEd because we would fold the INDIRECT_REF used for the
> array indexing while building the address for the call, after not
> finding the decl hiding there at first.  But the decl would be exposed
> by the folding, and then lower layers would complain we had the decl,
> after all, but it wasn't one of the artificial or special functions
> that could be called without being marked as used.
>
> This patch arranges for a FUNCTION_DECL to be marked as used when
> taking its address, just like we already did when taking the address
> of a static function to call it as a member function (i.e. using the
> obj.fn() notation).  However, we shouldn't mark functions as used when
> just performing overload resolution, lest we might instantiate
> templates we shouldn't, as in g++.dg/overload/template1.C.
>
>
> for  gcc/cp/ChangeLog
>
> PR c++/84943
> * typeck.c (cp_build_addr_expr_1): Mark FUNCTION_DECL as
> used.
>
> for  gcc/testsuite/ChangeLog
>
> PR c++/84943
> * g++.dg/pr84943.C: New.
> * g++.dg/pr84943-2.C: New.
> ---
>  gcc/cp/typeck.c  |9 +
>  gcc/testsuite/g++.dg/pr84943-2.C |   64 
> ++
>  gcc/testsuite/g++.dg/pr84943.C   |8 +
>  3 files changed, 80 insertions(+), 1 deletion(-)
>  create mode 100644 gcc/testsuite/g++.dg/pr84943-2.C
>  create mode 100644 gcc/testsuite/g++.dg/pr84943.C
>
> diff --git a/gcc/cp/typeck.c b/gcc/cp/typeck.c
> index d3183b5321d3..f6b25c8a837d 100644
> --- a/gcc/cp/typeck.c
> +++ b/gcc/cp/typeck.c
> @@ -5831,6 +5831,8 @@ cp_build_addr_expr_1 (tree arg, bool strict_lvalue, 
> tsubst_flags_t complain)
>if (type_unknown_p (arg))
>  return build1 (ADDR_EXPR, unknown_type_node, arg);
>
> +  tree maybe_overloaded_arg = arg;

I don't think we need this; if arg is overloaded, we take the
type_unknown_p early exit, so the code lower down is always dealing
with a single function.

Jason


[og7, openacc, committed] Add vector-length-128-{1,2,3}.c test-cases

2018-03-30 Thread Tom de Vries

Hi,

this patch adds three testcases, setting vector length to 128 in three 
different ways:

1. the vector_length clause
2. the -fopenacc-dim option
3. the GOMP_OPENACC_DIM variable

The tests contains:
- a check of the dimensions that the compiler decides upon
- a check of the dimensions used at runtime by libgomp

[ The first check is made possible by the "Add scan-offload-tree-dump" 
patch I've just committed.


The second used setting environment variable GOMP_DEBUG to 1. We cannot 
do this in main using setenv, as we can for GOMP_OPENACC_DIM, so we have 
to use dg-set-target-env-var. This means the dg-output scans will fail 
in a remote test setup where dg-set-target-env-var is broken. This is 
annoying, but not as annoying as not having tests that DTRT in a local 
test setup.


Also annoying is the fact that GOMP_DEBUG=1 prints a lot of lines into 
libgomp.log. We could improve on that by not emitting the PTX code for 
GOMP_DEBUG=1, and maybe emit that depending on the value of another 
variable, say GOMP_OPENACC_DEBUG=n.

]

Currently, in all three test cases we don't use vector length 128, but 
fall back to warp_size, so checks test for 32, not 128 vector_length.


Tested libgomp on x86_64 build with nvptx accelerator.

Committed.

Thanks,
- Tom
[openacc] Add vector-length-128-{1,2,3}.c test-cases

2018-03-30  Tom de Vries  

	* testsuite/libgomp.oacc-c-c++-common/vector-length-128-1.c: New test.
	* testsuite/libgomp.oacc-c-c++-common/vector-length-128-2.c: New test.
	* testsuite/libgomp.oacc-c-c++-common/vector-length-128-3.c: New test.

---
 .../vector-length-128-1.c  | 39 
 .../vector-length-128-2.c  | 40 +
 .../vector-length-128-3.c  | 42 ++
 3 files changed, 121 insertions(+)

diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-1.c
new file mode 100644
index 000..fab5b0d
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-1.c
@@ -0,0 +1,39 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-foffload=-fdump-tree-oaccdevlow" } */
+/* { dg-set-target-env-var "GOMP_DEBUG" "1" } */
+
+#include 
+
+#define N 1024
+
+unsigned int a[N];
+unsigned int b[N];
+unsigned int c[N];
+unsigned int n = N;
+
+int
+main (void)
+{
+  for (unsigned int i = 0; i < n; ++i)
+{
+  a[i] = i % 3;
+  b[i] = i % 5;
+}
+
+#pragma acc parallel vector_length (128) copyin (a,b) copyout (c)
+  {
+#pragma acc loop vector
+for (unsigned int i = 0; i < n; i++)
+  c[i] = a[i] + b[i];
+  }
+
+  for (unsigned int i = 0; i < n; ++i)
+if (c[i] != (i % 3) + (i % 5))
+  abort ();
+
+  return 0;
+}
+/* { dg-prune-output "using vector_length \\(32\\), ignoring 128" } */
+
+/* { dg-final { scan-offload-tree-dump "__attribute__\\(\\(oacc function \\(1, 1, 32\\)" "oaccdevlow" } } */
+/* { dg-output "nvptx_exec: kernel main\\\$_omp_fn\\\$0: launch gangs=1, workers=1, vectors=32" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-2.c
new file mode 100644
index 000..cc6fd55
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-2.c
@@ -0,0 +1,40 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-fopenacc-dim=-:-:128" } */
+/* { dg-additional-options "-foffload=-fdump-tree-oaccdevlow" } */
+/* { dg-set-target-env-var "GOMP_DEBUG" "1" } */
+
+#include 
+
+#define N 1024
+
+unsigned int a[N];
+unsigned int b[N];
+unsigned int c[N];
+unsigned int n = N;
+
+int
+main (void)
+{
+  for (unsigned int i = 0; i < n; ++i)
+{
+  a[i] = i % 3;
+  b[i] = i % 5;
+}
+
+#pragma acc parallel copyin (a,b) copyout (c)
+  {
+#pragma acc loop vector
+for (unsigned int i = 0; i < n; i++)
+  c[i] = a[i] + b[i];
+  }
+
+  for (unsigned int i = 0; i < n; ++i)
+if (c[i] != (i % 3) + (i % 5))
+  abort ();
+
+  return 0;
+}
+/* { dg-prune-output "using vector_length \\(32\\), ignoring 128" } */
+
+/* { dg-final { scan-offload-tree-dump "__attribute__\\(\\(oacc function \\(1, 1, 32\\)" "oaccdevlow" } } */
+/* { dg-output "nvptx_exec: kernel main\\\$_omp_fn\\\$0: launch gangs=1, workers=1, vectors=32" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-3.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-3.c
new file mode 100644
index 000..c403e74
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-3.c
@@ -0,0 +1,42 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-foffload=-fdump-tree-oaccdevlow" } */
+/* We default to warp size 32 for the vector length, so the GOMP_OPENACC_DIM has
+   no effect.  */

[og7, testsuite, committed] Add scan-offload-tree-dump

2018-03-30 Thread Tom de Vries

Hi,

I've ported "Add scan-wpa-ipa-dump" (submitted here: 
https://gcc.gnu.org/ml/gcc-patches/2018-03/msg01529.html ) to the og7 
branch, and committed it. Changes are trivial, so I'm not reposting that 
patch.


This patch adds the possibility to scan tree dump files generated by the 
offloading lto1 invocation.


Tested libgomp on x86_64 build with nvptx accelerator.

Committed.

Thanks,
- Tom
[testsuite] Add scan-offload-tree-dump

2018-03-28  Tom de Vries  

	PR testsuite/85106
	* lib/scanoffloadtree.exp: New file.

	* testsuite/lib/libgomp-dg.exp (libgomp-dg-test): Add save-temps to
	extra_tool_flags if it contains an -foffload=-fdump-* flag.
	* testsuite/lib/libgomp.exp: Include scanoffloadtree.exp.

	* doc/sourcebuild.texi (Commands for use in dg-final, Scan optimization
	dump files): Add offload-tree.

---
 gcc/doc/sourcebuild.texi  |   4 +-
 gcc/testsuite/lib/scanoffloadtree.exp | 147 ++
 libgomp/testsuite/lib/libgomp-dg.exp  |   8 ++
 libgomp/testsuite/lib/libgomp.exp |   1 +
 4 files changed, 158 insertions(+), 2 deletions(-)

diff --git a/gcc/doc/sourcebuild.texi b/gcc/doc/sourcebuild.texi
index f92ec66..12c7197 100644
--- a/gcc/doc/sourcebuild.texi
+++ b/gcc/doc/sourcebuild.texi
@@ -2378,8 +2378,8 @@ assembly output.
 
 @subsubsection Scan optimization dump files
 
-These commands are available for @var{kind} of @code{tree}, @code{rtl},
-@code{ipa}, and @code{wpa-ipa}.
+These commands are available for @var{kind} of @code{tree}, @code{offload-tree},
+@code{rtl}, @code{ipa}, and @code{wpa-ipa}.
 
 @table @code
 @item scan-@var{kind}-dump @var{regex} @var{suffix} [@{ target/xfail @var{selector} @}]
diff --git a/gcc/testsuite/lib/scanoffloadtree.exp b/gcc/testsuite/lib/scanoffloadtree.exp
new file mode 100644
index 000..26c98d3
--- /dev/null
+++ b/gcc/testsuite/lib/scanoffloadtree.exp
@@ -0,0 +1,147 @@
+#   Copyright (C) 2018 Free Software Foundation, Inc.
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# .
+
+# Various utilities for scanning offloading tree dump output, used by
+# libgomp.exp.
+
+load_lib scandump.exp
+
+# Utility for scanning compiler result, invoked via dg-final.
+# Call pass if pattern is present, otherwise fail.
+#
+# Argument 0 is the regexp to match
+# Argument 1 is the name of the dumped tree pass
+# Argument 2 handles expected failures and the like
+proc scan-offload-tree-dump { args } {
+
+if { [llength $args] < 2 } {
+	error "scan-offload-tree-dump: too few arguments"
+	return
+}
+if { [llength $args] > 3 } {
+	error "scan-offload-tree-dump: too many arguments"
+	return
+}
+if { [llength $args] >= 3 } {
+	scan-dump "offload-tree" [lindex $args 0] \
+		  "\[0-9\]\[0-9\]\[0-9]t.[lindex $args 1]" ".o" \
+		  [lindex $args 2]
+} else {
+	scan-dump "offload-tree" [lindex $args 0] \
+		  "\[0-9\]\[0-9\]\[0-9]t.[lindex $args 1]" ".o"
+}
+}
+
+# Call pass if pattern is present given number of times, otherwise fail.
+# Argument 0 is the regexp to match
+# Argument 1 is number of times the regexp must be found
+# Argument 2 is the name of the dumped tree pass
+# Argument 3 handles expected failures and the like
+proc scan-offload-tree-dump-times { args } {
+
+if { [llength $args] < 3 } {
+	error "scan-offload-tree-dump: too few arguments"
+	return
+}
+if { [llength $args] > 4 } {
+	error "scan-offload-tree-dump: too many arguments"
+	return
+}
+if { [llength $args] >= 4 } {
+	scan-dump-times "offload-tree" [lindex $args 0] [lindex $args 1] \
+			"\[0-9\]\[0-9\]\[0-9]t.[lindex $args 2]" ".o" \
+			[lindex $args 3]
+} else {
+	scan-dump-times "offload-tree" [lindex $args 0] [lindex $args 1] \
+			"\[0-9\]\[0-9\]\[0-9]t.[lindex $args 2]" ".o"
+}
+}
+
+# Call pass if pattern is not present, otherwise fail.
+#
+# Argument 0 is the regexp to match
+# Argument 1 is the name of the dumped tree pass
+# Argument 2 handles expected failures and the like
+proc scan-offload-tree-dump-not { args } {
+
+if { [llength $args] < 2 } {
+	error "scan-offload-tree-dump-not: too few arguments"
+	return
+}
+if { [llength $args] > 3 } {
+	error "scan-offload-tree-dump-not: too many arguments"
+	return
+}
+if { [llength $args] >= 3 } {
+	scan-dump-not "offload-tree" [lindex $args 0] \
+		  "\[0-9\]\[0-9\]\[0-9]t.[lindex $args 1]" ".o" \
+		  

[patch, fortran, testsuite, committed] Remove illegal code from substr_6.f90

2018-03-30 Thread Thomas Koenig

Hello world,

F2003, 6.1.1. "Substrings" states that

Both the starting point and the ending
point shall be within the range 1, 2, ..., n unless the starting point 
exceeds the ending point, in which

case the substring has length zero.

The attached patch, which corrects the test case, has
been committed as obvious.

What's left is the accepts-invalid bug.

Regards

Thomas

2018-03-30  Thomas Koenig  

PR fortran/85130
* gfortran.dg/substr_6.f90: Remove illegal test for
out-of-bounds substring.
Index: gfortran.dg/substr_6.f90
===
--- gfortran.dg/substr_6.f90	(Revision 258845)
+++ gfortran.dg/substr_6.f90	(Arbeitskopie)
@@ -6,8 +6,6 @@
 CHARACTER*5 c(1)
 CHARACTER(1), parameter :: c1(5) = (/ "1", "2", "3", ACHAR(0), "5" /)
 
-c = c0(1)(-5:-8)
-if (c(1) /= " ") STOP 1
 c = (/ c0(1)(1:5) /)
 do i=1,5
if (c(1)(i:i) /= c1(i)) STOP 2


Re: [Aarch64] Fix conditional branches with target far away.

2018-03-30 Thread Sameera Deshpande
Hi Richard,

The testcase is working with the patch you suggested, thanks for
pointing that out.

On 30 March 2018 at 16:54, Sameera Deshpande
 wrote:
> On 30 March 2018 at 16:39, Richard Sandiford
>  wrote:
>>> Hi Sudakshina,
>>>
>>> Thanks for pointing that out. Updated the conditions for attribute
>>> length to take care of boundary conditions for offset range.
>>>
>>> Please find attached the updated patch.
>>>
>>> I have tested it for gcc testsuite and the failing testcase. Ok for trunk?
>>>
>>> On 22 March 2018 at 19:06, Sudakshina Das  wrote:
 Hi Sameera

 On 22/03/18 02:07, Sameera Deshpande wrote:
>
> Hi Sudakshina,
>
> As per the ARMv8 ARM, for the offset range (-1048576 ,1048572), the
> far branch instruction offset is inclusive of both the offsets. Hence,
> I am using <=||=> and not <||>= as it was in previous implementation.


 I have to admit earlier I was only looking at the patch mechanically and
 found a difference with the previous implementation in offset comparison.
 After you pointed out, I looked up the ARMv8 ARM and I have a couple of
 doubts:

 1. My understanding is that any offset in [-1048576 ,1048572] both 
 inclusive
 qualifies as an 'in range' offset. However, the code for both attribute
 length and far_branch has been using [-1048576 ,1048572), that is, ( >= && 
 <
 ). If the far_branch was incorrectly calculated, then maybe the length
 calculations with similar magic numbers should also be corrected? Of 
 course,
 I am not an expert in this and maybe this was a conscience decision so I
 would ask Ramana to maybe clarify if he remembers.

 2. Now to come back to your patch, if my understanding is correct, I think 
 a
 far_branch would be anything outside of this range, that is,
 (offset < -1048576 || offset > 1048572), anything that can not be
 represented in the 21-bit range.

 Thanks
 Sudi
>>
>> [...]
>>
>>> @@ -466,14 +459,9 @@
>>>[(set_attr "type" "branch")
>>> (set (attr "length")
>>>   (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int 
>>> -1048576))
>>> -(lt (minus (match_dup 2) (pc)) (const_int 
>>> 1048572)))
>>> +(le (minus (match_dup 2) (pc)) (const_int 
>>> 1048572)))
>>> (const_int 4)
>>> -   (const_int 8)))
>>
>> Sorry for not replying earlier, but I think the use of "lt" rather than
>> "le" in the current length attribute is deliberate.  Distances measured
>> from (pc) in "length" are a bit special in that backward distances are
>> measured from the start of the instruction and forward distances are
>> measured from the end of the instruction:
>>
>>   /* The address of the current insn.  We implement this actually as the
>>  address of the current insn for backward branches, but the last
>>  address of the next insn for forward branches, and both with
>>  adjustments that account for the worst-case possible stretching of
>>  intervening alignments between this insn and its destination.  */
>>
>> This avoids the chicken-and-egg situation of the length of the branch
>> depending on the forward distance and the forward distance depending
>> on the length of the branch.
>>
>> In contrast, this code:
>>
>>> @@ -712,7 +695,11 @@
>>>{
>>>  if (get_attr_length (insn) == 8)
>>>{
>>> - if (get_attr_far_branch (insn) == 1)
>>> + long long int offset;
>>> + offset = INSN_ADDRESSES (INSN_UID (XEXP (operands[2], 0)))
>>> +   - INSN_ADDRESSES (INSN_UID (insn));
>>> +
>>> + if (offset < -1048576 || offset > 1048572)
>>> return aarch64_gen_far_branch (operands, 2, "Ltb",
>>>"\\t%0, %1, ");
>>>   else
>>
>> is reading the final computed addresses, so the code is right to use
>> the real instruction range.  (FWIW I agree with Kyrill that using
>> IN_RANGE with hex constants would be clearer.)
>>
>> That said... a possible problem comes from situations like:
>>
>>address length insn
>>..c  8 A
>>   ..align to 8 bytes...
>>..8B
>>..c  4 C
>>   ..align to 16 bytes...
>>..0D, branch to B
>>
>> when D is at the maximum extent of the branch range and when GCC's length
>> for A is only a conservative estimate.  If the length of A turns out to
>> be 4 rather than 8 at assembly time, the alignment to 8 bytes will be
>> a no-op and the address of B and C will be 8 less than we calculated.
>> But the alignment to 16 bytes will then insert 8 bytes of padding rather
>> than none, and the address of D won't change.  The branch will then be
>> out of range even though the addresses calculated by GCC showed it as
>> being in range.  

Re: [Aarch64] Fix conditional branches with target far away.

2018-03-30 Thread Sameera Deshpande
On 30 March 2018 at 16:39, Richard Sandiford
 wrote:
>> Hi Sudakshina,
>>
>> Thanks for pointing that out. Updated the conditions for attribute
>> length to take care of boundary conditions for offset range.
>>
>> Please find attached the updated patch.
>>
>> I have tested it for gcc testsuite and the failing testcase. Ok for trunk?
>>
>> On 22 March 2018 at 19:06, Sudakshina Das  wrote:
>>> Hi Sameera
>>>
>>> On 22/03/18 02:07, Sameera Deshpande wrote:

 Hi Sudakshina,

 As per the ARMv8 ARM, for the offset range (-1048576 ,1048572), the
 far branch instruction offset is inclusive of both the offsets. Hence,
 I am using <=||=> and not <||>= as it was in previous implementation.
>>>
>>>
>>> I have to admit earlier I was only looking at the patch mechanically and
>>> found a difference with the previous implementation in offset comparison.
>>> After you pointed out, I looked up the ARMv8 ARM and I have a couple of
>>> doubts:
>>>
>>> 1. My understanding is that any offset in [-1048576 ,1048572] both inclusive
>>> qualifies as an 'in range' offset. However, the code for both attribute
>>> length and far_branch has been using [-1048576 ,1048572), that is, ( >= && <
>>> ). If the far_branch was incorrectly calculated, then maybe the length
>>> calculations with similar magic numbers should also be corrected? Of course,
>>> I am not an expert in this and maybe this was a conscience decision so I
>>> would ask Ramana to maybe clarify if he remembers.
>>>
>>> 2. Now to come back to your patch, if my understanding is correct, I think a
>>> far_branch would be anything outside of this range, that is,
>>> (offset < -1048576 || offset > 1048572), anything that can not be
>>> represented in the 21-bit range.
>>>
>>> Thanks
>>> Sudi
>
> [...]
>
>> @@ -466,14 +459,9 @@
>>[(set_attr "type" "branch")
>> (set (attr "length")
>>   (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -1048576))
>> -(lt (minus (match_dup 2) (pc)) (const_int 1048572)))
>> +(le (minus (match_dup 2) (pc)) (const_int 1048572)))
>> (const_int 4)
>> -   (const_int 8)))
>
> Sorry for not replying earlier, but I think the use of "lt" rather than
> "le" in the current length attribute is deliberate.  Distances measured
> from (pc) in "length" are a bit special in that backward distances are
> measured from the start of the instruction and forward distances are
> measured from the end of the instruction:
>
>   /* The address of the current insn.  We implement this actually as the
>  address of the current insn for backward branches, but the last
>  address of the next insn for forward branches, and both with
>  adjustments that account for the worst-case possible stretching of
>  intervening alignments between this insn and its destination.  */
>
> This avoids the chicken-and-egg situation of the length of the branch
> depending on the forward distance and the forward distance depending
> on the length of the branch.
>
> In contrast, this code:
>
>> @@ -712,7 +695,11 @@
>>{
>>  if (get_attr_length (insn) == 8)
>>{
>> - if (get_attr_far_branch (insn) == 1)
>> + long long int offset;
>> + offset = INSN_ADDRESSES (INSN_UID (XEXP (operands[2], 0)))
>> +   - INSN_ADDRESSES (INSN_UID (insn));
>> +
>> + if (offset < -1048576 || offset > 1048572)
>> return aarch64_gen_far_branch (operands, 2, "Ltb",
>>"\\t%0, %1, ");
>>   else
>
> is reading the final computed addresses, so the code is right to use
> the real instruction range.  (FWIW I agree with Kyrill that using
> IN_RANGE with hex constants would be clearer.)
>
> That said... a possible problem comes from situations like:
>
>address length insn
>..c  8 A
>   ..align to 8 bytes...
>..8B
>..c  4 C
>   ..align to 16 bytes...
>..0D, branch to B
>
> when D is at the maximum extent of the branch range and when GCC's length
> for A is only a conservative estimate.  If the length of A turns out to
> be 4 rather than 8 at assembly time, the alignment to 8 bytes will be
> a no-op and the address of B and C will be 8 less than we calculated.
> But the alignment to 16 bytes will then insert 8 bytes of padding rather
> than none, and the address of D won't change.  The branch will then be
> out of range even though the addresses calculated by GCC showed it as
> being in range.  insn_current_reference_address accounts for this, and
> so copes correctly with conservative lengths.
>
> The length can legitimately be conservative for things like asm
> statements, so I guess using the far_branch attribute is best
> after all.  Sorry for the wrong turn.
>
> On the face of it (without access to the testcase), the only problem
> with 

Re: [Aarch64] Fix conditional branches with target far away.

2018-03-30 Thread Richard Sandiford
> Hi Sudakshina,
>
> Thanks for pointing that out. Updated the conditions for attribute
> length to take care of boundary conditions for offset range.
>
> Please find attached the updated patch.
>
> I have tested it for gcc testsuite and the failing testcase. Ok for trunk?
>
> On 22 March 2018 at 19:06, Sudakshina Das  wrote:
>> Hi Sameera
>>
>> On 22/03/18 02:07, Sameera Deshpande wrote:
>>>
>>> Hi Sudakshina,
>>>
>>> As per the ARMv8 ARM, for the offset range (-1048576 ,1048572), the
>>> far branch instruction offset is inclusive of both the offsets. Hence,
>>> I am using <=||=> and not <||>= as it was in previous implementation.
>>
>>
>> I have to admit earlier I was only looking at the patch mechanically and
>> found a difference with the previous implementation in offset comparison.
>> After you pointed out, I looked up the ARMv8 ARM and I have a couple of
>> doubts:
>>
>> 1. My understanding is that any offset in [-1048576 ,1048572] both inclusive
>> qualifies as an 'in range' offset. However, the code for both attribute
>> length and far_branch has been using [-1048576 ,1048572), that is, ( >= && <
>> ). If the far_branch was incorrectly calculated, then maybe the length
>> calculations with similar magic numbers should also be corrected? Of course,
>> I am not an expert in this and maybe this was a conscience decision so I
>> would ask Ramana to maybe clarify if he remembers.
>>
>> 2. Now to come back to your patch, if my understanding is correct, I think a
>> far_branch would be anything outside of this range, that is,
>> (offset < -1048576 || offset > 1048572), anything that can not be
>> represented in the 21-bit range.
>>
>> Thanks
>> Sudi

[...]

> @@ -466,14 +459,9 @@
>[(set_attr "type" "branch")
> (set (attr "length")
>   (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -1048576))
> -(lt (minus (match_dup 2) (pc)) (const_int 1048572)))
> +(le (minus (match_dup 2) (pc)) (const_int 1048572)))
> (const_int 4)
> -   (const_int 8)))

Sorry for not replying earlier, but I think the use of "lt" rather than
"le" in the current length attribute is deliberate.  Distances measured
from (pc) in "length" are a bit special in that backward distances are
measured from the start of the instruction and forward distances are
measured from the end of the instruction:

  /* The address of the current insn.  We implement this actually as the
 address of the current insn for backward branches, but the last
 address of the next insn for forward branches, and both with
 adjustments that account for the worst-case possible stretching of
 intervening alignments between this insn and its destination.  */

This avoids the chicken-and-egg situation of the length of the branch
depending on the forward distance and the forward distance depending
on the length of the branch.

In contrast, this code:

> @@ -712,7 +695,11 @@
>{
>  if (get_attr_length (insn) == 8)
>{
> - if (get_attr_far_branch (insn) == 1)
> + long long int offset;
> + offset = INSN_ADDRESSES (INSN_UID (XEXP (operands[2], 0)))
> +   - INSN_ADDRESSES (INSN_UID (insn));
> +
> + if (offset < -1048576 || offset > 1048572)
> return aarch64_gen_far_branch (operands, 2, "Ltb",
>"\\t%0, %1, ");
>   else

is reading the final computed addresses, so the code is right to use
the real instruction range.  (FWIW I agree with Kyrill that using
IN_RANGE with hex constants would be clearer.)

That said... a possible problem comes from situations like:

   address length insn
   ..c  8 A
  ..align to 8 bytes...
   ..8B
   ..c  4 C
  ..align to 16 bytes...
   ..0D, branch to B

when D is at the maximum extent of the branch range and when GCC's length
for A is only a conservative estimate.  If the length of A turns out to
be 4 rather than 8 at assembly time, the alignment to 8 bytes will be
a no-op and the address of B and C will be 8 less than we calculated.
But the alignment to 16 bytes will then insert 8 bytes of padding rather
than none, and the address of D won't change.  The branch will then be
out of range even though the addresses calculated by GCC showed it as
being in range.  insn_current_reference_address accounts for this, and
so copes correctly with conservative lengths.

The length can legitimately be conservative for things like asm
statements, so I guess using the far_branch attribute is best
after all.  Sorry for the wrong turn.

On the face of it (without access to the testcase), the only problem
with using far_branch in the output template is that insn_last_address
is not set, but needs to be for insn_current_reference_address to do
the right thing for forward branches.  Does the patch below work for
your testcase?

(As the 

[PATCH] [PR c++/85039] no type definitions in builtin offsetof

2018-03-30 Thread Alexandre Oliva
Types defined within a __builtin_offsetof argument don't always get
properly recorded as members of their context types, so if they're
anonymous, we may fail to assign them an anon type index for mangling
and ICE.

We shouldn't allow types to be introduced in __builtin_offsetof, I
think, so I've arranged for us to reject them.  Even then, we still
parse the definitions and attempt to assign mangled names to its
member functions, so the ICE remains.  Since we've already reported an
error, we might as well complete the name assignment with an arbitrary
index, thus avoiding the ICE.


Regstrapped on i686- and x86_64-linux-gnu, regressing
g++.dg/parse/semicolon3.C, which defines a (named) struct in
builtin_offsetof.  I suppose this means I should look for another
solution that doesn't involve rejecting these definitions, eh?


for  gcc/cp/ChangeLog

PR c++/85039
* parser.c (cp_parser_builtin_offset): Reject type definitions.
* mangle.c (nested_anon_class_index): Avoid crash returning -1
if we've seen errors.

for  gcc/testsuite/ChangeLog

PR c++/85039
* g++.dg/pr85039-1.C: New.
* g++.dg/pr85039-2.C: New.
---
 gcc/cp/mangle.c  |3 +++
 gcc/cp/parser.c  |8 +++-
 gcc/testsuite/g++.dg/pr85039-1.C |   17 +
 gcc/testsuite/g++.dg/pr85039-2.C |   10 ++
 4 files changed, 37 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/g++.dg/pr85039-1.C
 create mode 100644 gcc/testsuite/g++.dg/pr85039-2.C

diff --git a/gcc/cp/mangle.c b/gcc/cp/mangle.c
index 94c4bed28486..a7f9d686345d 100644
--- a/gcc/cp/mangle.c
+++ b/gcc/cp/mangle.c
@@ -1623,6 +1623,9 @@ nested_anon_class_index (tree type)
  ++index;
   }
 
+  if (seen_error ())
+return -1;
+
   gcc_unreachable ();
 }
 
diff --git a/gcc/cp/parser.c b/gcc/cp/parser.c
index e946d0b72292..135efb7eb2da 100644
--- a/gcc/cp/parser.c
+++ b/gcc/cp/parser.c
@@ -9833,7 +9833,13 @@ cp_parser_builtin_offsetof (cp_parser *parser)
   parens.require_open (parser);
   /* Parse the type-id.  */
   location_t loc = cp_lexer_peek_token (parser->lexer)->location;
-  type = cp_parser_type_id (parser);
+  {
+const char *saved_message = parser->type_definition_forbidden_message;
+parser->type_definition_forbidden_message
+  = G_("types may not be defined within __builtin_offsetof");
+type = cp_parser_type_id (parser);
+parser->type_definition_forbidden_message = saved_message;
+  }
   /* Look for the `,'.  */
   cp_parser_require (parser, CPP_COMMA, RT_COMMA);
   token = cp_lexer_peek_token (parser->lexer);
diff --git a/gcc/testsuite/g++.dg/pr85039-1.C b/gcc/testsuite/g++.dg/pr85039-1.C
new file mode 100644
index ..f57c8a261dee
--- /dev/null
+++ b/gcc/testsuite/g++.dg/pr85039-1.C
@@ -0,0 +1,17 @@
+// { dg-do compile { target c++14 } }
+
+constexpr int a() {
+ return
+  __builtin_offsetof(struct { // { dg-error "types may not be defined" }
+int i;
+short b {
+  __builtin_offsetof(struct { // { dg-error "types may not be defined" }
+   int j;
+struct c { // { dg-error "types may not be defined" }
+  void d() {
+  }
+};
+  }, j)
+};
+  }, i);
+}
diff --git a/gcc/testsuite/g++.dg/pr85039-2.C b/gcc/testsuite/g++.dg/pr85039-2.C
new file mode 100644
index ..e6d16325105b
--- /dev/null
+++ b/gcc/testsuite/g++.dg/pr85039-2.C
@@ -0,0 +1,10 @@
+// { dg-do compile }
+
+struct d {
+  static d *b;
+} * d::b(__builtin_offsetof(struct { // { dg-error "types may not be defined" }
+  int i;
+  struct a { // { dg-error "types may not be defined" }
+int c() { return .1f; }
+  };
+}, i));


-- 
Alexandre Oliva, freedom fighterhttp://FSFLA.org/~lxoliva/
You must be the change you wish to see in the world. -- Gandhi
Be Free! -- http://FSFLA.org/   FSF Latin America board member
Free Software Evangelist|Red Hat Brasil GNU Toolchain Engineer


[PATCH] [PR c++/85027] deal with baselink in save_expr in instantiate_type

2018-03-30 Thread Alexandre Oliva
We use SAVE_EXPRs in conditional expressions without the middle
operand, to evaluate the first operand only once.  When the conversion
of the first operand fails, we may call instantiate_type get a better
error message.  We have code to peel off the SAVE_EXPR there, but then
we may end up with a BASELINK, and we're past the code that deals with
BASELINKs.  Reorder the tests so that we expose the saved expr first,
and then deal with BASELINKs.

Regstrapped on i686- and x86_64-linux-gnu.  Ok to install?

for  gcc/cp/ChangeLog

PR c++/85027
* class.c (instantiate_type): Peel off SAVE_EXPR before
BASELINK.

for  gcc/testsuite/ChangeLog

PR c++/85027
* g++.dg/pr85027.C: New.
---
 gcc/cp/class.c |   10 +-
 gcc/testsuite/g++.dg/pr85027.C |8 
 2 files changed, 13 insertions(+), 5 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/pr85027.C

diff --git a/gcc/cp/class.c b/gcc/cp/class.c
index debcaf21cf76..0427d1224f74 100644
--- a/gcc/cp/class.c
+++ b/gcc/cp/class.c
@@ -7971,6 +7971,11 @@ instantiate_type (tree lhstype, tree rhs, tsubst_flags_t 
complain)
}
 }
 
+  /* If we instantiate a template, and it is a A ?: C expression
+ with omitted B, look through the SAVE_EXPR.  */
+  if (TREE_CODE (rhs) == SAVE_EXPR)
+rhs = TREE_OPERAND (rhs, 0);
+
   if (BASELINK_P (rhs))
 {
   access_path = BASELINK_ACCESS_BINFO (rhs);
@@ -7986,11 +7991,6 @@ instantiate_type (tree lhstype, tree rhs, tsubst_flags_t 
complain)
   return error_mark_node;
 }
 
-  /* If we instantiate a template, and it is a A ?: C expression
- with omitted B, look through the SAVE_EXPR.  */
-  if (TREE_CODE (rhs) == SAVE_EXPR)
-rhs = TREE_OPERAND (rhs, 0);
-
   /* There are only a few kinds of expressions that may have a type
  dependent on overload resolution.  */
   gcc_assert (TREE_CODE (rhs) == ADDR_EXPR
diff --git a/gcc/testsuite/g++.dg/pr85027.C b/gcc/testsuite/g++.dg/pr85027.C
new file mode 100644
index ..01b1b291aecd
--- /dev/null
+++ b/gcc/testsuite/g++.dg/pr85027.C
@@ -0,0 +1,8 @@
+// { dg-do compile }
+
+// Avoid -pedantic-error default
+// { dg-options "" }
+
+struct A { static int a; };
+
+int t = A::A ? : 0; // { dg-error "cannot resolve" }


-- 
Alexandre Oliva, freedom fighterhttp://FSFLA.org/~lxoliva/
You must be the change you wish to see in the world. -- Gandhi
Be Free! -- http://FSFLA.org/   FSF Latin America board member
Free Software Evangelist|Red Hat Brasil GNU Toolchain Engineer


Re: [PATCH] [PR c++/84943] allow folding of array indexing indirect_ref

2018-03-30 Thread Alexandre Oliva
On Mar 29, 2018, Alexandre Oliva  wrote:

> Here's a patch that should take care of the marking a namespace-scoped
> or static member function as used when taking its address, thus working
> around (fixing?) the reported problem.

> Regstrapping now.  Ok to install if it passes?

It regressed overload/template1.C, because we mark an erroneous template
specialization as used when we attempt deduction.

The following updated patch avoids that regression, and it has passed
bootstrap and regression testing on i686- and x86_64-linux-gnu.  Ok to install?

[PR c++/84943] mark function as used when taking its address

fn[0]() ICEd because we would fold the INDIRECT_REF used for the
array indexing while building the address for the call, after not
finding the decl hiding there at first.  But the decl would be exposed
by the folding, and then lower layers would complain we had the decl,
after all, but it wasn't one of the artificial or special functions
that could be called without being marked as used.

This patch arranges for a FUNCTION_DECL to be marked as used when
taking its address, just like we already did when taking the address
of a static function to call it as a member function (i.e. using the
obj.fn() notation).  However, we shouldn't mark functions as used when
just performing overload resolution, lest we might instantiate
templates we shouldn't, as in g++.dg/overload/template1.C.


for  gcc/cp/ChangeLog

PR c++/84943
* typeck.c (cp_build_addr_expr_1): Mark FUNCTION_DECL as
used.

for  gcc/testsuite/ChangeLog

PR c++/84943
* g++.dg/pr84943.C: New.
* g++.dg/pr84943-2.C: New.
---
 gcc/cp/typeck.c  |9 +
 gcc/testsuite/g++.dg/pr84943-2.C |   64 ++
 gcc/testsuite/g++.dg/pr84943.C   |8 +
 3 files changed, 80 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/g++.dg/pr84943-2.C
 create mode 100644 gcc/testsuite/g++.dg/pr84943.C

diff --git a/gcc/cp/typeck.c b/gcc/cp/typeck.c
index d3183b5321d3..f6b25c8a837d 100644
--- a/gcc/cp/typeck.c
+++ b/gcc/cp/typeck.c
@@ -5831,6 +5831,8 @@ cp_build_addr_expr_1 (tree arg, bool strict_lvalue, 
tsubst_flags_t complain)
   if (type_unknown_p (arg))
 return build1 (ADDR_EXPR, unknown_type_node, arg);
 
+  tree maybe_overloaded_arg = arg;
+
   if (TREE_CODE (arg) == OFFSET_REF)
 /* We want a pointer to member; bypass all the code for actually taking
the address of something.  */
@@ -5971,6 +5973,10 @@ cp_build_addr_expr_1 (tree arg, bool strict_lvalue, 
tsubst_flags_t complain)
  so we can just form an ADDR_EXPR with the correct type.  */
   if (processing_template_decl || TREE_CODE (arg) != COMPONENT_REF)
 {
+  if (TREE_CODE (arg) == FUNCTION_DECL
+ && arg == maybe_overloaded_arg && !(complain & tf_conv)
+ && !mark_used (arg, complain) && !(complain & tf_error))
+   return error_mark_node;
   val = build_address (arg);
   if (TREE_CODE (arg) == OFFSET_REF)
PTRMEM_OK_P (val) = PTRMEM_OK_P (arg);
@@ -5983,7 +5989,8 @@ cp_build_addr_expr_1 (tree arg, bool strict_lvalue, 
tsubst_flags_t complain)
 function.  */
   gcc_assert (TREE_CODE (fn) == FUNCTION_DECL
  && DECL_STATIC_FUNCTION_P (fn));
-  if (!mark_used (fn, complain) && !(complain & tf_error))
+  if (arg == maybe_overloaded_arg && !(complain & tf_conv)
+ && !mark_used (fn, complain) && !(complain & tf_error))
return error_mark_node;
   val = build_address (fn);
   if (TREE_SIDE_EFFECTS (TREE_OPERAND (arg, 0)))
diff --git a/gcc/testsuite/g++.dg/pr84943-2.C b/gcc/testsuite/g++.dg/pr84943-2.C
new file mode 100644
index ..d1ef012b9155
--- /dev/null
+++ b/gcc/testsuite/g++.dg/pr84943-2.C
@@ -0,0 +1,64 @@
+// { dg-do run }
+
+// Avoid -pedantic-error default
+// { dg-options "" }
+
+// Make sure the functions referenced by various forms of
+// address-taking are marked as used and compiled in.
+
+static void ac() {}
+void a() {
+  ac[0](); // { dg-warning "arithmetic" }
+}
+
+static void bc() {}
+void b() {
+  (&*&*&*)();
+}
+
+template  U cc() {}
+void (*c())() {
+  return cc;
+}
+
+template 
+struct x {
+  void a(int);
+  template  static U a(x*) {}
+  static void a(long) {}
+  static void a(void *) {}
+  static void a() {
+void (*p0)(void*) = x().a;
+p0(0);
+void (*p1)(long) = a;
+p1(0);
+void (*p2)() = a;
+p2();
+void (*p3)(x*) = a;
+p3(0);
+  }
+};
+
+struct z {
+  void a(int);
+  template  static U a(z*) {}
+  static void a(long) {}
+  static void a(void *) {}
+  static void a() {
+void (*p0)(void*) = z().a;
+p0(0);
+void (*p1)(long) = a;
+p1(0);
+void (*p2)() = a;
+p2();
+void (*p3)(z*) = a;
+p3(0);
+  }
+};
+
+int main(int argc, char *argv[]) {
+  if (argc > 1) {
+x().a();
+z().a();
+  }
+}
diff --git a/gcc/testsuite/g++.dg/pr84943.C b/gcc/testsuite/g++.dg/pr84943.C
new