[PATCH] Fix PR84190

2018-02-15 Thread Richard Biener

The following restores behavior of GCC 6 for volatile accesses of
automatic vars that do not have their address taken.

Bootstrapped and tested on x86_64-unknown-linux-gnu, applied to trunk
and branch.

Richard.

2018-02-16  Richard Biener  

PR tree-optimization/84190
* tree-ssa.c (non_rewritable_mem_ref_base): Do not touch
volatile accesses if the decl isn't volatile.

* g++.dg/torture/pr84190.C: New testcase.

Index: gcc/tree-ssa.c
===
--- gcc/tree-ssa.c  (revision 257620)
+++ gcc/tree-ssa.c  (working copy)
@@ -1427,7 +1427,8 @@ non_rewritable_mem_ref_base (tree ref)
   if (! DECL_P (decl))
return NULL_TREE;
   if (! is_gimple_reg_type (TREE_TYPE (base))
- || VOID_TYPE_P (TREE_TYPE (base)))
+ || VOID_TYPE_P (TREE_TYPE (base))
+ || TREE_THIS_VOLATILE (decl) != TREE_THIS_VOLATILE (base))
return decl;
   if ((TREE_CODE (TREE_TYPE (decl)) == VECTOR_TYPE
   || TREE_CODE (TREE_TYPE (decl)) == COMPLEX_TYPE)
Index: gcc/testsuite/g++.dg/torture/pr84190.C
===
--- gcc/testsuite/g++.dg/torture/pr84190.C  (nonexistent)
+++ gcc/testsuite/g++.dg/torture/pr84190.C  (working copy)
@@ -0,0 +1,20 @@
+// { dg-do compile }
+// For slim LTO there's no optimized dump
+// { dg-skip-if "" { *-*-* } { "-flto" } { "" } }
+// { dg-additional-options "-fdump-tree-optimized" }
+
+typedef double T;
+static int equalfn (volatile T* x, volatile T* y);
+T gx, gy;
+int main ()
+{
+  T x = gx, y = gy;
+  return equalfn (, );
+}
+static int equalfn (volatile T* x, volatile T* y)
+{
+  return (*x == *y);
+}
+
+// There should be exactly two volatile accesses (ignoring clobbers).
+// { dg-final { scan-tree-dump-times " ={v} \[^\{\]" 2 "optimized" } }


Re: [Patch, fortran] PR84115] [8 Regression] ICE: tree check: expected tree that contains 'decl minimal' structure, have 'indirect_ref' in add_decl_as_local, at fortran/trans-decl.c:256

2018-02-15 Thread Paul Richard Thomas
> Oddly, the failing test in associate_35.f90 is the only one that works
> in 7-branch. I have left the PR open and changed the title
> accordingly.

The attached patch fixes this. OK for trunk?

Paul

2018-02-16  Paul Thomas  

PR fortran/84115
* resolve.c (resolve_assoc_var): If a non-constant target expr.
has no string length expression, make the associate variable
into a deferred length, allocatable symbol.
* trans-decl.c (gfc_is_reallocatable_lhs): Add and use a ptr to
the symbol.
* trans-stmt.c (trans_associate_var): Null and free scalar
associate names that are allocatable. After assignment, remove
the allocatable attribute to prevent reallocation.

2018-02-16  Paul Thomas  

PR fortran/84115
* gfortran.dg/associate_35.f90: Remove error, add stop n's and
change to run.
Index: gcc/fortran/primary.c
===
*** gcc/fortran/primary.c   (revision 257682)
--- gcc/fortran/primary.c   (working copy)
*** gfc_match_varspec (gfc_expr *primary, in
*** 2082,2088 
  {
bool permissible;
  
!   /* These target expressions can ge resolved at any time.  */
permissible = tgt_expr && tgt_expr->symtree && tgt_expr->symtree->n.sym
&& (tgt_expr->symtree->n.sym->attr.use_assoc
|| tgt_expr->symtree->n.sym->attr.host_assoc
--- 2082,2088 
  {
bool permissible;
  
!   /* These target expressions can be resolved at any time.  */
permissible = tgt_expr && tgt_expr->symtree && tgt_expr->symtree->n.sym
&& (tgt_expr->symtree->n.sym->attr.use_assoc
|| tgt_expr->symtree->n.sym->attr.host_assoc
Index: gcc/fortran/resolve.c
===
*** gcc/fortran/resolve.c   (revision 257682)
--- gcc/fortran/resolve.c   (working copy)
*** resolve_assoc_var (gfc_symbol* sym, bool
*** 8635,8641 
if (sym->ts.type == BT_CHARACTER && !sym->attr.select_type_temporary)
  {
if (!sym->ts.u.cl)
!   sym->ts.u.cl = target->ts.u.cl;
  
if (!sym->ts.u.cl->length && !sym->ts.deferred)
{
--- 8635,8654 
if (sym->ts.type == BT_CHARACTER && !sym->attr.select_type_temporary)
  {
if (!sym->ts.u.cl)
!   {
! if (target->expr_type != EXPR_CONSTANT
! && !target->ts.u.cl->length)
!   {
! sym->ts.u.cl = gfc_get_charlen();
! sym->ts.deferred = 1;
! 
! /* This is reset in trans-stmt.c after the assignment
!of the target expression to the associate name.  */
! sym->attr.allocatable = 1;
!   }
! else
!   sym->ts.u.cl = target->ts.u.cl;
!   }
  
if (!sym->ts.u.cl->length && !sym->ts.deferred)
{
Index: gcc/fortran/trans-array.c
===
*** gcc/fortran/trans-array.c   (revision 257682)
--- gcc/fortran/trans-array.c   (working copy)
*** bool
*** 9470,9498 
  gfc_is_reallocatable_lhs (gfc_expr *expr)
  {
gfc_ref * ref;
  
if (!expr->ref)
  return false;
  
/* An allocatable class variable with no reference.  */
!   if (expr->symtree->n.sym->ts.type == BT_CLASS
!   && CLASS_DATA (expr->symtree->n.sym)->attr.allocatable
&& expr->ref && expr->ref->type == REF_COMPONENT
&& strcmp (expr->ref->u.c.component->name, "_data") == 0
&& expr->ref->next == NULL)
  return true;
  
/* An allocatable variable.  */
!   if (expr->symtree->n.sym->attr.allocatable
&& expr->ref
&& expr->ref->type == REF_ARRAY
&& expr->ref->u.ar.type == AR_FULL)
  return true;
  
/* All that can be left are allocatable components.  */
!   if ((expr->symtree->n.sym->ts.type != BT_DERIVED
!&& expr->symtree->n.sym->ts.type != BT_CLASS)
!   || !expr->symtree->n.sym->ts.u.derived->attr.alloc_comp)
  return false;
  
/* Find a component ref followed by an array reference.  */
--- 9470,9501 
  gfc_is_reallocatable_lhs (gfc_expr *expr)
  {
gfc_ref * ref;
+   gfc_symbol *sym;
  
if (!expr->ref)
  return false;
  
+   sym = expr->symtree->n.sym;
+ 
/* An allocatable class variable with no reference.  */
!   if (sym->ts.type == BT_CLASS
!   && CLASS_DATA (sym)->attr.allocatable
&& expr->ref && expr->ref->type == REF_COMPONENT
&& strcmp (expr->ref->u.c.component->name, "_data") == 0
&& expr->ref->next == NULL)
  return true;
  
/* An allocatable variable.  */
!   if (sym->attr.allocatable
&& expr->ref
&& expr->ref->type == REF_ARRAY
&& expr->ref->u.ar.type == AR_FULL)
  return true;
  
/* All that can be left are allocatable components.  */
!   if ((sym->ts.type != BT_DERIVED
!&& 

Re: [PING][PATCH v3] Disable reg offset in quad-word store for Falkor

2018-02-15 Thread Siddhesh Poyarekar
On Thursday 15 February 2018 07:50 PM, Wilco Dijkstra wrote:
> So it seems to me using existing cost mechanisms is always preferable, even 
> if you
> currently can't differentiate between loads and stores.

Luis is working on address cost adjustments among other things, so I
guess the path of least resistance for gcc8 is to have those adjustments
go in and then figure out how much improvement this patch (or separating
loads and stores) would get on top of that.  Would that be acceptable?

Siddhesh


C++ PATCH for c++/83227, C++17 ICE with list derived-to-base conversion

2018-02-15 Thread Jason Merrill
The code to avoid doing an extra copy of a list-initialized temporary
was incorrectly being used even when we actually want a
derived-to-base conversion.

Tested x86_64-pc-linux-gnu, applying to trunk.
commit b2413566649273c0b7e172213ebf41c6269c947c
Author: Jason Merrill 
Date:   Thu Feb 15 17:22:35 2018 -0500

PR c++/83227 - C++17 ICE with init-list derived-to-base conversion.

* call.c (convert_like_real): Don't use the copy-list-initialization
shortcut for ck_base.

diff --git a/gcc/cp/call.c b/gcc/cp/call.c
index 7176e4afa15..5698ff60a4d 100644
--- a/gcc/cp/call.c
+++ b/gcc/cp/call.c
@@ -6938,6 +6938,11 @@ convert_like_real (conversion *convs, tree expr, tree 
fn, int argnum,
  && DECL_INHERITED_CTOR (current_function_decl))
return expr;
 
+  if (TREE_CODE (expr) == TARGET_EXPR
+ && TARGET_EXPR_LIST_INIT_P (expr))
+   /* Copy-list-initialization doesn't actually involve a copy.  */
+   return expr;
+
   /* Fall through.  */
 case ck_base:
   if (convs->kind == ck_base && !convs->need_temporary_p)
@@ -6964,10 +6969,6 @@ convert_like_real (conversion *convs, tree expr, tree 
fn, int argnum,
   if (convs->rvaluedness_matches_p)
/* standard_conversion got LOOKUP_PREFER_RVALUE.  */
flags |= LOOKUP_PREFER_RVALUE;
-  if (TREE_CODE (expr) == TARGET_EXPR
- && TARGET_EXPR_LIST_INIT_P (expr))
-   /* Copy-list-initialization doesn't actually involve a copy.  */
-   return expr;
   expr = build_temp (expr, totype, flags, _kind, complain);
   if (diag_kind && complain)
{
diff --git a/gcc/testsuite/g++.dg/cpp0x/initlist98.C 
b/gcc/testsuite/g++.dg/cpp0x/initlist98.C
new file mode 100644
index 000..4f2fcd20219
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp0x/initlist98.C
@@ -0,0 +1,17 @@
+// PR c++/83227
+// { dg-do compile { target c++11 } }
+
+#include 
+
+template  struct f {
+  f(std::initializer_list) {}
+};
+
+struct h {};
+struct i : h {
+  i();
+};
+void foo(f);
+int main() {
+  foo({i{}});
+}


Re: Mising Patch #2 from the RISC-V v3 Submission

2018-02-15 Thread Palmer Dabbelt

On Mon, 12 Feb 2018 15:18:39 PST (-0800), Jim Wilson wrote:

On 02/12/2018 03:23 AM, Andreas Schwab wrote:

On Feb 06 2017, Palmer Dabbelt  wrote:


+/* Because RISC-V only has word-sized atomics, it requries libatomic where
+   others do not.  So link libatomic by default, as needed.  */
+#undef LIB_SPEC
+#ifdef LD_AS_NEEDED_OPTION
+#define LIB_SPEC GNU_USER_TARGET_LIB_SPEC \
+  " %{pthread:" LD_AS_NEEDED_OPTION " -latomic " LD_NO_AS_NEEDED_OPTION "}"
+#else
+#define LIB_SPEC GNU_USER_TARGET_LIB_SPEC " -latomic "
+#endif


Why is -latomic added only with -pthread if --as-needed is supported,
but unconditionally if not?  Wouldn't it make sense to add it
unconditionally in both cases?


I don't know the history here, but I do know that the most common atomic
related bug report we get is for people using pthread, so we were
probably thinking about that when this was written.


IIRC that's why it's done this way.


Re: [ patch, testsuite, fortran] Replace "call abort" by "stop n"

2018-02-15 Thread Steve Kargl
On Fri, Feb 16, 2018 at 12:25:11AM +0100, Thomas Koenig wrote:
> I wrote:
> 
> > OK for trunk?
> 
> Following a remark by Steve, here is an additional test case which
> checks if abort actually works.  I'll add that to the patch
> when it is committed.
> 

The patch is ok from my viewpoint.   You might want to
give others a chance to chime in.  

-- 
Steve


[PATCH] replace ICE with error for failed template deduction (PR 84355)

2018-02-15 Thread Martin Sebor

A failed template deduction in template member of a template
triggers an ICE with -std=c++17 due to what seems like
a missing handling of invalid input.  Replacing
the gcc_unreachable() call that causes the ICE with a return
statement indicating the deduction failure eliminates the ICE
and restores sane diagnostics.

Martin
PR c++/84355 - [7/8 Regression] ICE with failing template argument deduction

gcc/cp/ChangeLog:

	PR c++/84355
	* pt.c (unify): Return failure instead of asserting.

gcc/testsuite/ChangeLog:

	PR c++/84355
	* g++.dg/cpp1z/class-deduction48.C: New test.

Index: gcc/cp/pt.c
===
--- gcc/cp/pt.c	(revision 257713)
+++ gcc/cp/pt.c	(working copy)
@@ -20918,7 +20918,7 @@ unify (tree tparms, tree targs, tree parm, tree ar
 	   && TREE_CODE (tparm) != TYPE_DECL)
 	  || (TREE_CODE (parm) == TEMPLATE_TEMPLATE_PARM
 	  && TREE_CODE (tparm) != TEMPLATE_DECL))
-	gcc_unreachable ();
+	return unify_invalid (explain_p);
 
   if (TREE_CODE (parm) == BOUND_TEMPLATE_TEMPLATE_PARM)
 	{
Index: gcc/testsuite/g++.dg/cpp1z/class-deduction48.C
===
--- gcc/testsuite/g++.dg/cpp1z/class-deduction48.C	(nonexistent)
+++ gcc/testsuite/g++.dg/cpp1z/class-deduction48.C	(working copy)
@@ -0,0 +1,20 @@
+// PR c++/84355 - ICE with failing template argument deduction
+// { dg-do compile }
+// { dg-options "-std=c++17" }
+
+template 
+struct A
+{
+  template 
+  struct B
+  {
+B (T);
+  };
+
+  A () {
+B b (0);   // { dg-error "deduction failed" }
+   // { dg-error "no matching function" "" { target *-*-* } .-1 }
+  }
+};
+
+A a;


Re: [PATCH] Fix LRA ICE in lra_substitute_pseudo on DEBUG_INSN (PR rtl-optimization/83723)

2018-02-15 Thread Vladimir Makarov



On 02/14/2018 04:01 PM, Jakub Jelinek wrote:

Hi!

Unlike normal insns where SUBREGs must properly validate, in
DEBUG_INSNs we allow arbitrary SUBREGs, either the dwarf2out code
will be able to use it, or it will just punt.  The reason for it is
among other things that during analysis we usually need to ignore
debug insns, so can't reject some optimization just because it would
create subreg in debug insn that doesn't validate, and resetting such
debug insns is too big hammer.

On the following testcase on i?86 we ICE because we have a SFmode
pseudo and want to use a XFmode new_reg for it and such subreg doesn't
validate on i386.

Fixed by using gen_rtx_raw_SUBREG in DEBUG_INSNs as other passes do.
We don't have gen_lowpart_raw_SUBREG, so the patch inlines what
gen_lowpart_SUBREG does to compute the offset and uses gen_rtx_{,raw_}SUBREG
in all cases.  Bootstrapped/regtested on x86_64-linux and i686-linux, ok for
trunk?

Yes.  Thank you, Jakub.

2018-02-14  Jakub Jelinek  

PR rtl-optimization/83723
* lra-int.h (lra_substitute_pseudo): Add DEBUG_P argument.
* lra.c (lra_substitute_pseudo): Likewise.  If true, use
gen_rtx_raw_SUBREG instead of gen_rtx_SUBREG.  Pass DEBUG_P to
recursive calls.
(lra_substitute_pseudo_within_insn): Adjust lra_substitute_pseudo
callers.
* lra-constraints.c (inherit_reload_reg, split_reg): Likewise.

* gcc.dg/pr83723.c: New test.





Re: [ patch, testsuite, fortran] Replace "call abort" by "stop n"

2018-02-15 Thread Thomas Koenig

I wrote:


OK for trunk?


Following a remark by Steve, here is an additional test case which
checks if abort actually works.  I'll add that to the patch
when it is committed.

Regards

Thomas
! { dg-do  run }
! { dg-shouldfail "Program aborted." }
program main
  call abort
end program main


[C++ Patch] PR 82468 ("[7/8 Regression] ICE with deduction guide template")

2018-02-15 Thread Paolo Carlini

Hi,

this issue is very easy to explain and the testcase minimal: 
check_special_function_return_type ICEs on a TEMPLATE_TEMPLATE_PARM as 
optype, because it tries to use CLASSTYPE_TI_TEMPLATE on it. Today, 
triangulating with other compilers too, I came to believe that here 
essentially we only have to provide sensible diagnostic and below is 
what I'm finishing testing on x86_64-linux (what about an additional 
inform using DECL_SOURCE_LOCATION (TYPE_STUB_DECL (optype))? I'm not 
sure, the rest of the function emits quite terse messages).


Thanks! Paolo.

/

/cp
2018-02-15  Paolo Carlini  

PR c++/82468
* decl.c (check_special_function_return_type): Reject template
template parameter in deduction guide.

/testsuite
2018-02-15  Paolo Carlini  

PR c++/82468
* g++.dg/cpp1z/class-deduction47.C: New.
Index: cp/decl.c
===
--- cp/decl.c   (revision 257712)
+++ cp/decl.c   (working copy)
@@ -9834,7 +9834,14 @@ check_special_function_return_type (special_functi
error_at (smallest_type_quals_location (type_quals, locations),
  "qualifiers are not allowed on declaration of "
  "deduction guide");
-  type = make_template_placeholder (CLASSTYPE_TI_TEMPLATE (optype));
+  if (TREE_CODE (optype) == TEMPLATE_TEMPLATE_PARM)
+   {
+ error ("template template parameter %qT in declaration of "
+"deduction guide", optype);
+ type = error_mark_node;
+   }
+  else
+   type = make_template_placeholder (CLASSTYPE_TI_TEMPLATE (optype));
   for (int i = 0; i < ds_last; ++i)
if (i != ds_explicit && locations[i])
  error_at (locations[i],
Index: testsuite/g++.dg/cpp1z/class-deduction47.C
===
--- testsuite/g++.dg/cpp1z/class-deduction47.C  (nonexistent)
+++ testsuite/g++.dg/cpp1z/class-deduction47.C  (working copy)
@@ -0,0 +1,5 @@
+// PR c++/82468
+// { dg-options -std=c++17 }
+
+template  class TT>
+TT(double) -> TT;  // { dg-error "template template" }


Re: [RFC] Tree loop unroller pass

2018-02-15 Thread Kugan Vivekanandarajah
Hi Wilko,

Thanks for your comments.

On 14 February 2018 at 00:05, Wilco Dijkstra  wrote:
> Hi Kugan,
>
>> Based on the previous discussions, I tried to implement a tree loop
>> unroller for partial unrolling. I would like to queue this RFC patches
>> for next stage1 review.
>
> This is a great plan - GCC urgently requires a good unroller!
>
>> * Cost-model for selecting the loop uses the same params used
>> elsewhere in related optimizations. I was told that keeping this same
>> would allow better tuning for all the optimizations.
>
> I'd advise against using the existing params as is. Unrolling by 8x by 
> default is
> way too aggressive and counterproductive. It was perhaps OK for in-order cores
> 20 years ago, but not today. The goal of unrolling is to create more ILP in 
> small
> loops, not to generate huge blocks of repeated code which definitely won't 
> fit in
> micro-op caches and loop buffers...
>
OK, I will create separate params. It is possible that I misunderstood
it in the first place.


> Also we need to enable this by default, at least with -O3, maybe even for 
> small
> (or rather tiny) loops in -O2 like LLVM does.
It is enabled for -O3 and above now.

>
>> * I have also implemented an option to limit loops based on memory
>> streams. i.e., some micro-architectures where limiting the resulting
>> memory streams is preferred and used  to limit unrolling factor.
>
> I'm not convinced this is needed once you tune the parameters for unrolling.
> If you have say 4 read streams you must have > 10 instructions already so
> you may want to unroll this 2x in -O3, but definitely not 8x. So I see the 
> streams
> issue as a problem caused by too aggressive unroll settings. I think if you
> address that first, you're unlikely going to have an issue with too many 
> streams.
>

I will experiment with some microbenchmarks. I still think that it
will be useful for some micro-architectures. Thats why, it its not
enabled by default. If a back-end thinks that it is useful, they can
enable limiting unroll factor based on memory streams.

>> * I expect that there will be some cost-model changes might be needed
>> to handle (or provide ability to handle) various loop preferences of
>> the micro-architectures. I am sending this patch for review early to
>> get feedbacks on this.
>
> Yes it should be feasible to have settings based on backend preference
> and optimization level (so O3/Ofast will unroll more than O2).
>
>> * Position of the pass in passes.def can also be changed. Example,
>> unrolling before SLP.
>
> As long as it runs before IVOpt so we get base+immediate addressing modes.
Thats what I am doing now.

Thanks,
Kugan

>
> Wilco


Re: [PATCH/RFC] tree-if-conv.c: fix two ICEs seen with -fno-tree-forwprop (PR tree-optimization/84178)

2018-02-15 Thread David Malcolm
On Fri, 2018-02-09 at 12:02 +0100, Richard Biener wrote:
> On Thu, Feb 8, 2018 at 11:23 PM, David Malcolm 
> wrote:
> > PR tree-optimization/84178 reports a couple of source files that
> > ICE inside
> > ifcvt when compiled with -03 -fno-tree-forwprop (trunk and gcc 7).
> > 
> > Both cases involve problems with ifcvt's per-BB gimplified
> > predicates.
> > 
> > Testcase 1 fails this assertion within release_bb_predicate during
> > cleanup:
> > 
> > 283   if (flag_checking)
> > 284 for (gimple_stmt_iterator i = gsi_start (stmts);
> > 285  !gsi_end_p (i); gsi_next ())
> > 286   gcc_assert (! gimple_use_ops (gsi_stmt (i)));
> > 
> > The testcase contains a division in the loop, which leads to
> > if_convertible_loop_p returning false (due to gimple_could_trap_p
> > being true
> > for the division).  This happens *after* the per-BB gimplified
> > predicates
> > have been created in predicate_bbs (loop).
> > Hence tree_if_conversion bails out to "cleanup", but the gimplified
> > predicates
> > exist and make use of SSA names; for example this conjunction for
> > two BB
> > conditions:
> > 
> >   _4 = h4.1_112 != 0;
> >   _175 = (signed char) _117;
> >   _176 = _175 >= 0;
> >   _174 = _4 & _176;
> > 
> > is using SSA names.
> 
> But then this shouldn't cause any stmt operands to be created - who
> is calling
> update_stmt () on a stmt using the SSA names?  Maybe something calls
> force_gimple_operand_gsi to add to the sequence?


The immediate use is created deep within folding when the gimplified
predicate is created.

Here's the backtrace of exactly where:

(gdb) bt
#0  link_imm_use_stmt (linknode=0x71a0b8d0, def=, 
stmt=)
at ../../src/gcc/ssa-iterators.h:307
#1  0x012531c5 in add_use_op (fn=0x71a03000, stmt=, op=0x71a236d8, 
last=0x7fffcb10) at ../../src/gcc/tree-ssa-operands.c:307
#2  0x01253607 in finalize_ssa_uses (fn=0x71a03000, 
stmt=)
at ../../src/gcc/tree-ssa-operands.c:410
#3  0x0125368b in finalize_ssa_stmt_operands (fn=0x71a03000, 
stmt=)
at ../../src/gcc/tree-ssa-operands.c:436
#4  0x01254b62 in build_ssa_operands (fn=0x71a03000, 
stmt=)
at ../../src/gcc/tree-ssa-operands.c:948
#5  0x012550df in update_stmt_operands (fn=0x71a03000, 
stmt=)
at ../../src/gcc/tree-ssa-operands.c:1081
#6  0x00c10642 in update_stmt_if_modified (s=) at ../../src/gcc/gimple-ssa.h:185
#7  0x00c10e82 in update_modified_stmts (seq=0x71a23690) at 
../../src/gcc/gimple-iterator.c:58
#8  0x00c111f1 in gsi_insert_seq_before (i=0x7fffcfb0, 
seq=0x71a23690, mode=GSI_SAME_STMT)
at ../../src/gcc/gimple-iterator.c:217
#9  0x00c241d0 in replace_stmt_with_simplification (gsi=0x7fffcfb0, 
rcode=..., ops=0x7fffcdb0, 
seq=0x7fffcdd8, inplace=false) at ../../src/gcc/gimple-fold.c:4473
#10 0x00c25a63 in fold_stmt_1 (gsi=0x7fffcfb0, inplace=false, 
valueize=0xc2663b )
at ../../src/gcc/gimple-fold.c:4775
#11 0x00c266b7 in fold_stmt (gsi=0x7fffcfb0) at 
../../src/gcc/gimple-fold.c:4996
#12 0x00c552b1 in maybe_fold_stmt (gsi=0x7fffcfb0) at 
../../src/gcc/gimplify.c:3193
#13 0x00c5f1e9 in gimplify_modify_expr (expr_p=0x7fffd328, 
pre_p=0x7fffd910, post_p=0x7fffd1e0, 
want_value=false) at ../../src/gcc/gimplify.c:5803
#14 0x00c7b461 in gimplify_expr (expr_p=0x7fffd328, 
pre_p=0x7fffd910, post_p=0x7fffd1e0, 
gimple_test_f=0xc5d723 , fallback=0) at 
../../src/gcc/gimplify.c:11434
#15 0x00c62661 in gimplify_stmt (stmt_p=0x7fffd328, 
seq_p=0x7fffd910) at ../../src/gcc/gimplify.c:6658
#16 0x00c4c449 in gimplify_and_add (t=, 
seq_p=0x7fffd910) at ../../src/gcc/gimplify.c:441
#17 0x00c4cc89 in internal_get_tmp_var (val=, 
pre_p=0x7fffd910, post_p=0x0, is_formal=true, 
allow_ssa=true) at ../../src/gcc/gimplify.c:597
#18 0x00c4ccd2 in get_formal_tmp_var (val=, 
pre_p=0x7fffd910) at ../../src/gcc/gimplify.c:618
#19 0x00c7ee6a in gimplify_expr (expr_p=0x71a261b0, 
pre_p=0x7fffd910, post_p=0x7fffd790, 
gimple_test_f=0xc0f6d0 , fallback=1) at 
../../src/gcc/gimplify.c:12383
#20 0x00c7e2e9 in gimplify_expr (expr_p=0x7fffd8b8, 
pre_p=0x7fffd910, post_p=0x7fffd790, 
gimple_test_f=0xc0ef75 , fallback=1) at 
../../src/gcc/gimplify.c:12160
#21 0x00c83de5 in force_gimple_operand_1 (expr=, stmts=0x7fffd910, 
gimple_test_f=0xc0ef75 , var=) at 
../../src/gcc/gimplify-me.c:78
#22 0x010c6387 in add_to_predicate_list (loop=0x71a0a330, 
bb=, 
nc=) at ../../src/gcc/tree-if-conv.c:535
#23 0x010c6480 in add_to_dst_predicate_list (loop=0x71a0a330, 
e= 10)>, 
prev_cond=, 

C++ PATCH for c++/84045, dependent typedefs and noexcept

2018-02-15 Thread Jason Merrill
This bug is a throwback to 50852 and such, where we treated a typedef
variant from dependent scope as equivalent to the underlying type,
which leads to crashes when we later try to instantiate that typedef
outside its scope.  To deal with that, I introduced strip_typedefs to
remove such offending typedefs from template arguments; it seems we
need to do the same things for exception-specifications.

Tested x86_64-pc-linux-gnu, applying to trunk.
commit f98a61af4fc4d981b40eba7000bd3585003bf226
Author: Jason Merrill 
Date:   Thu Feb 15 16:06:02 2018 -0500

PR c++/84045 - ICE with typedef and noexcept.
* except.c (build_noexcept_spec): Use strip_typedefs_expr.

diff --git a/gcc/cp/except.c b/gcc/cp/except.c
index 669bf9f6eaf..0b46698b974 100644
--- a/gcc/cp/except.c
+++ b/gcc/cp/except.c
@@ -1217,6 +1217,10 @@ build_noexcept_spec (tree expr, int complain)
 {
   gcc_assert (processing_template_decl
  || TREE_CODE (expr) == DEFERRED_NOEXCEPT);
+  if (TREE_CODE (expr) != DEFERRED_NOEXCEPT)
+   /* Avoid problems with a function type built with a dependent typedef
+  being reused in another scope (c++/84045).  */
+   expr = strip_typedefs_expr (expr);
   return build_tree_list (expr, NULL_TREE);
 }
 }
diff --git a/gcc/testsuite/g++.dg/cpp0x/noexcept32.C 
b/gcc/testsuite/g++.dg/cpp0x/noexcept32.C
new file mode 100644
index 000..9a435049599
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp0x/noexcept32.C
@@ -0,0 +1,14 @@
+// PR c++/84045
+// { dg-do compile { target c++11 } }
+
+template  struct K {
+  static const bool d = true;
+};
+template  struct B {
+  typedef K D;
+  void foo () noexcept (D::d);
+};
+template  struct P {
+  P () noexcept (K::d);
+};
+P p;


[PATCH, rs6000] PR84371 Update builtins-3*.c tests for power9 codegen

2018-02-15 Thread Will Schmidt
Hi,
   This fixes the scan-assembler errors as seen on power9. While verifying
that fix and ensuring coverage, cleaned up some nearby bits as well.

builtins-3.c:  Add/update options and skip-if stanzas, this test specifically
targets vsx/power6.
builtins-3-p9.c:  Update stanzas to require -mcpu=power9 capability.  For better
coverage, added tests here (from builtins-3.c) that have different code-gen
when targeting p9.
builtins-3-p8.c:  Update stanzas to require -mcpu=power8 capability.
builtins-3-runnable.c: Update requires to indicate p8vector_hw is required
for running the test.   Fixed the do-not-override stanza typo so this test
can run.
builtins-3-vec_reve-runnable.c:  Update stanzas to require vsx_hw capability
to run.  Removed the skip-if stanza (by direct inspection, this runs OK on
P8,P9, and vsx_hw requirement prevents run attempts on earlier systems).

Sniff-testing across systems looks good.

Will kick off a regtest in clean environments shortly.

OK for trunk?  (and gcc-7 backport)?

Thanks
-Will

[testsuite]
2018-02-15  Will Schmidt  

target/pr84371
* gcc.target/powerpc/builtins-3-p8.c: Update stanzas.
* gcc.target/powerpc/builtins-3-p9.c: Update stanzas, Enhance test.
* gcc.target/powerpc/builtins-3-runnable.c: Update stanzas.
* gcc.target/powerpc/builtins-3-vec_reve_runnable.c: Same.
* gcc.target/powerpc/builtins-3.c: Same.

diff --git a/gcc/testsuite/gcc.target/powerpc/builtins-3-p8.c 
b/gcc/testsuite/gcc.target/powerpc/builtins-3-p8.c
index 83d538b..a586805 100644
--- a/gcc/testsuite/gcc.target/powerpc/builtins-3-p8.c
+++ b/gcc/testsuite/gcc.target/powerpc/builtins-3-p8.c
@@ -1,8 +1,9 @@
 /* { dg-do compile } */
 /* { dg-require-effective-target powerpc_p8vector_ok } */
-/* { dg-options "-mcpu=power8" } */
+/* { dg-options "-maltivec -mcpu=power8" } */
+/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { 
"-mcpu=power8" } } */
 
 #include 
 
 vector bool long long
 test_eq_long_long (vector bool long long x, vector bool long long y)
diff --git a/gcc/testsuite/gcc.target/powerpc/builtins-3-p9.c 
b/gcc/testsuite/gcc.target/powerpc/builtins-3-p9.c
index 9dc53da..146f8b7 100644
--- a/gcc/testsuite/gcc.target/powerpc/builtins-3-p9.c
+++ b/gcc/testsuite/gcc.target/powerpc/builtins-3-p9.c
@@ -1,8 +1,9 @@
 /* { dg-do compile } */
 /* { dg-require-effective-target powerpc_p9vector_ok } */
 /* { dg-options "-mcpu=power9 -O1" } */
+/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { 
"-mcpu=power9" } } */
 
 #include 
 
 vector bool char
 test_ne_char (vector bool char x, vector bool char y)
@@ -45,28 +46,78 @@ test_vull_bperm_vull_vuc (vector unsigned long long x,
   vector unsigned char y)
 {
return vec_bperm (x, y);
 }
 
+vector signed char
+test_nabs_char (vector signed char x)
+{
+   return vec_nabs (x);
+}
+
+vector short
+test_nabs_short (vector short x)
+{
+  return vec_nabs (x);
+}
+
+vector int
+test_nabs_int (vector int x)
+{
+  return vec_nabs (x);
+}
+
+
+vector signed char
+test_neg_char (vector signed char x)
+{
+   return vec_neg (x);
+}
+
+vector short
+test_neg_short (vector short x)
+{
+   return vec_neg (x);
+}
+
+vector int
+test_neg_int (vector int x)
+{
+   return vec_neg (x);
+}
+
 /* Expected test results:
 
  test_ne_char  1 vcmpneb
  test_ne_short 1 vcmpneh
  test_ne_int   1 vcmpnew
  test_ne_long  1 vcmpequd, 1 xxlnor inst
  test_neg_long_long1 vnegd
  test_vull_bperm_vull_vuc  1 vbpermd
  test_nabs_long_long (-O0) 1 xxspltib, 1 vsubudm, 1 vminsd
  test_nabs_long_long (-O1) 1 vnegd, vminsd
+ test_nabs_char (P9)   1 xxspltib, 1 vsububm, 1 vminsb
+ test_nabs_short (P9)  1 xxspltib, 1 vsubuhm, 1 vminsh
+ test_nabs_int (P9)1 vnegw, 1 vminsw
+ test_neg_char (P9)1 xxspltib, 1 vsububm
+ test_neg_short (P9)   1 xxspltib, 1 vsubuhm
+ test_neg_int (P9) 1 vnegw
 */
 
 /* { dg-final { scan-assembler-times "vcmpneb"  1 } } */
 /* { dg-final { scan-assembler-times "vcmpneh"  1 } } */
 /* { dg-final { scan-assembler-times "vcmpnew"  1 } } */
 /* { dg-final { scan-assembler-times "vcmpequd" 1 } } */
 /* { dg-final { scan-assembler-times "xxlnor"   1 } } */
-/* { dg-final { scan-assembler-times "xxspltib" 0 } } */
+/* { dg-final { scan-assembler-times "xxspltib" 4 } } */
 /* { dg-final { scan-assembler-times "vsubudm"  0 } } */
+/* { dg-final { scan-assembler-times "vsububm"  2 } } */
+/* { dg-final { scan-assembler-times "vsubuhm"  2 } } */
+/* { dg-final { scan-assembler-times "vsubuwm"  0 } } */
+/* { dg-final { scan-assembler-times "vminsb"   1 } } */
+/* { dg-final { scan-assembler-times "vminsh"   1 } } */
+/* { dg-final { scan-assembler-times "vminsw"   1 } } */
 /* { dg-final { scan-assembler-times "vminsd"   1 } } */
 /* { dg-final { scan-assembler-times 

Re: plugin-api.h patch to add a new interface for linker plugins

2018-02-15 Thread Sriraman Tallam via gcc-patches
Ping,  this patch was approved for binutils by Cary:
https://sourceware.org/ml/binutils/2017-12/msg00023.html

Is it ok to apply this to GCC include/plugin-api.h now?  It is a
pretty small change. Patch attached.

* plugin-api.h (ld_plugin_get_wrap_symbols): New
  plugin interface.


Thanks
Sri

On Fri, Dec 8, 2017 at 11:02 AM, Sriraman Tallam  wrote:
> Patch attached.
>
> * plugin-api.h (ld_plugin_get_wrap_symbols): New
>   plugin interface.
>
> On Fri, Dec 8, 2017 at 11:01 AM, Sriraman Tallam  wrote:
>> Hi,
>>
>>This patch was approved for binutils by Cary:
>> https://sourceware.org/ml/binutils/2017-12/msg00023.html
>>
>>Is it ok to apply this to GCC include/plugin-api.h ?
>>
>> Thanks
>> Sri
* plugin-api.h (ld_plugin_get_wrap_symbols): New
  plugin interface.

Index: include/plugin-api.h
===
--- include/plugin-api.h(revision 255515)
+++ include/plugin-api.h(working copy)
@@ -378,7 +378,15 @@
 enum ld_plugin_status
 (*ld_plugin_register_new_input) (ld_plugin_new_input_handler handler);
 
+/* The linker's interface for getting the list of wrapped symbols using the
+   --wrap option. This sets *NUM_SYMBOLS to number of wrapped symbols and
+   *WRAP_SYMBOL_LIST to the list of wrapped symbols. */
 
+typedef
+enum ld_plugin_status
+(*ld_plugin_get_wrap_symbols) (uint64_t *num_symbols,
+   const char ***wrap_symbol_list);
+
 enum ld_plugin_level
 {
   LDPL_INFO,
@@ -422,7 +430,8 @@
   LDPT_GET_SYMBOLS_V3 = 28,
   LDPT_GET_INPUT_SECTION_ALIGNMENT = 29,
   LDPT_GET_INPUT_SECTION_SIZE = 30,
-  LDPT_REGISTER_NEW_INPUT_HOOK = 31
+  LDPT_REGISTER_NEW_INPUT_HOOK = 31,
+  LDPT_GET_WRAP_SYMBOLS = 32
 };
 
 /* The plugin transfer vector.  */
@@ -457,6 +466,7 @@
 ld_plugin_get_input_section_alignment tv_get_input_section_alignment;
 ld_plugin_get_input_section_size tv_get_input_section_size;
 ld_plugin_register_new_input tv_register_new_input;
+ld_plugin_get_wrap_symbols tv_get_wrap_symbols;
   } tv_u;
 };
 


RE: PR84239, Reimplement CET intrinsics for rdssp/incssp insn

2018-02-15 Thread Joseph Myers
On Thu, 15 Feb 2018, Tsimbalist, Igor V wrote:

> I haven't managed to run it through 
> ./glibc/glibc.sourceware/scripts/build-many-glibcs.py. I did bootstrap 
> and CET tests.
> 
> Ok for trunk?

OK.

-- 
Joseph S. Myers
jos...@codesourcery.com


Re: [Patch, Fortran, F03] PR 84409: check DTIO arguments for character len

2018-02-15 Thread Janus Weil
2018-02-15 22:03 GMT+01:00 Steve Kargl :
> On Thu, Feb 15, 2018 at 09:55:58PM +0100, Janus Weil wrote:
>> 2018-02-15 21:16 GMT+01:00 Steve Kargl :
>> >
>> > If the above is a numbered constraint in the Standard,
>> > can you add a comment of the form /* F03:C.  */
>> > above the if-statement.
>>
>> Well, the if statement that I'm adding is not covered by a single
>> clause in the standard. Instead the DTIO interfaces are described as a
>> whole in a chapter, whose number I'm now adding in the comment above
>> the function:
>
> Thanks.  The source code has become sufficiently complicated
> that a small comment references the Standard simply aids (me)
> in debugging/reading the code.

Absolutely. It's important to provide clear links between the standard
and the implementation.

I have committed the patch as r257711.

Cheers,
Janus


PR libstdc++/81797 Add .NOTPARALLEL to include/Makefile for darwin

2018-02-15 Thread Jonathan Wakely
This adds the special .NOTPARALLEL target to include/Makefile when
building on darwin, to work around an apparent bug in the APFS
filesystem.


PR libstdc++/81797
* configure.ac (INCLUDE_DIR_NOTPARALLEL): Define.
* configure: Regenerate.
* include/Makefile.am (INCLUDE_DIR_NOTPARALLEL): Add .NOTPARALLEL when
defined.
* include/Makefile.in: Regenerate.


Tested powerpc64le-linux, and FX reports it works on Darwin. I'll
backport this to the active branches too.
commit 35204421965912b09146585a143189d0d61d23ec
Author: Jonathan Wakely 
Date:   Fri Oct 27 01:09:52 2017 +0100

PR libstdc++/81797 Add .NOTPARALLEL to include/Makefile for darwin

PR libstdc++/81797
* configure.ac (INCLUDE_DIR_NOTPARALLEL): Define.
* configure: Regenerate.
* include/Makefile.am (INCLUDE_DIR_NOTPARALLEL): Add .NOTPARALLEL 
when
defined.
* include/Makefile.in: Regenerate.

diff --git a/libstdc++-v3/configure.ac b/libstdc++-v3/configure.ac
index e110f579917..0ef96270c9c 100644
--- a/libstdc++-v3/configure.ac
+++ b/libstdc++-v3/configure.ac
@@ -473,6 +473,12 @@ AM_CONDITIONAL(BUILD_PDF,
   test $ac_cv_prog_DBLATEX = "yes" &&
   test $ac_cv_prog_PDFLATEX = "yes")
 
+case "$build" in
+ *-*-darwin* ) glibcxx_include_dir_notparallel=yes ;;
+ * ) glibcxx_include_dir_notparallel=no ;;
+esac
+AM_CONDITIONAL(INCLUDE_DIR_NOTPARALLEL,
+   test $glibcxx_include_dir_notparallel = "yes")
 
 # Propagate the target-specific source directories through the build chain.
 ATOMICITY_SRCDIR=config/${atomicity_dir}
diff --git a/libstdc++-v3/include/Makefile.am b/libstdc++-v3/include/Makefile.am
index 7cf0ef93eb4..70a662fa2f9 100644
--- a/libstdc++-v3/include/Makefile.am
+++ b/libstdc++-v3/include/Makefile.am
@@ -1479,3 +1479,8 @@ $(decimal_headers): ; @:
 $(ext_headers): ; @:
 $(experimental_headers): ; @:
 $(experimental_bits_headers): ; @:
+
+if INCLUDE_DIR_NOTPARALLEL
+# See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81797
+.NOTPARALLEL:
+endif


Re: RFA: Sanitize deprecation messages (PR 84195)

2018-02-15 Thread David Malcolm
On Fri, 2018-02-09 at 13:01 +, Nick Clifton wrote:
> Hi David, Hi Martin,
> 
>   OK, take 3 of the patch is attached.  In this version:
> 
>   * The escaping is handled by a new class.
>   * Self-tests have been added (and they helped find a bug - yay!)
>   * The documentation has been extended to mention -fmessage-length's
> effect on the #-directives, and to add documentation for #pragma
> GCC error and #pragma GCC warning.  (Which appeared to be
> missing).
> 
>   I did try to add backslash characters to the regexp in the new
> testcase
>   but I just could not find a way to persuade dejagnu to accept them.
> 
>   OK to apply ?
> 
>   (Apologies for the poor C++ coding - it is not my strong point.  I
> am
>   an assembler level programmer at heart).
> 
> Cheers
>   Nick

Thanks Nick, sorry for inflicting C++ on you.

This looks much better.  Out of interest, what bug did the selftests
help find?

Various nitpicks below

> gcc/ChangeLog
> 2018-02-09  Nick Clifton  
> 
>   PR 84195
>   * tree.c (escaped_string): New class.  Converts an unescaped
>   string into its escaped equivalent.
>   (warn_deprecated_use): Use the new class to convert the
>   deprecation message, if present.
>   (test_escaped_strings): New self test.
>   (test_c_tests): Add test_escaped_strings.

This doesn't mention the changes to the docs.

> gcc/testsuite/ChangeLog
> 2018-02-07  Nick Clifton  
> 
>   * gcc.c-torture/compile/pr84195.c: New test.

> Index: gcc/tree.c
> ===
> --- gcc/tree.c(revision 257519)
> +++ gcc/tree.c(working copy)
> @@ -12416,11 +12416,99 @@
>return is_typedef_decl (TYPE_NAME (type));
>  }
>  
> +// A class to handle converting a string that might contain
> +// control characters, (eg newline, form-feed, etc), into one
> +// in which contains escape sequences instead.

We're still mostly using C-style comments for blocks, though I don't
think we have an actual rule about this.

> +class escaped_string
> +{
> + public:
> +  escaped_string () { m_owned = false; m_str = NULL; };
> +  ~escaped_string () { if (m_owned) free (m_str); }
> +  operator const char *() const { return (const char *) m_str; }
> +  void escape (const char *);
> + private:
> +  char * m_str;
> +  bool   m_owned;
> +};

I'd hoped that instead of this we could have an escape_string function
return an instance of a class that has responsibility for the "if
(ownership) free" dtor, but I don't think C++89 supports that (I had a
go at implementing it, but I think we'd need C++11's move semantics,
rather than just the NRVO).

So the approach above is OK by me (which, given that I suggested it,
may seem redundant :) )

> +void
> +escaped_string::escape (const char * unescaped)
> +{
> +  /* PR 84195: Replace control characters in "unescaped" with their
> + escaped equivalents.  Allow newlines if -fmessage-length has
> + been set to a non-zero value.  This is done here, rather than
> + where the attribute is recorded as the message length can
> + change between these two locations.  */

Move this comment to outside the function, as a descriptive comment for
the function as a whole.

> +  char * escaped;
> +  size_t i, new_i, len;
> +
> +  if (m_owned)
> +free (m_str);
> +
> +  m_str = (char *) unescaped;
> +  m_owned = false;
> +
> +  if (unescaped == NULL || *unescaped == 0)
> +return;

Is NULL a valid input here?  If so, please add test coverage for that
to the selftest.

The "*unescaped == 0" is presumably a micro-optimization for the
strlen(escaped) == 0 case, as it appears the code already handles this
case.

> +  len = strlen (unescaped);
> +  escaped = NULL;
> +  new_i = 0;
> +
> +  for (i = 0; i < len; i++)
> +{
> +  char c = unescaped[i];
> +
> +  if (! ISCNTRL (c))
> + {
> +   if (escaped)
> + escaped[new_i++] = c;
> +   continue;
> + }
> +
> +  if (c != '\n' || ! pp_is_wrapping_line (global_dc->printer))
> + {
> +   if (escaped == NULL)
> + {
> +   /* We only allocate space for a new string if we
> +  actually encounter a control character that
> +  needs replacing.  */
> +   escaped = (char *) xmalloc (len * 2 + 1);
> +   strncpy (escaped, unescaped, i);
> +   new_i = i;
> + }
> +
> +   escaped [new_i++] = '\\';

Some of the spacing in this function looks a bit odd to my eyes, but
you're much more familiar with the GNU standards than me, and I don't
want to be too nitpicky  (e.g. the space before an array access, and
the space after '!').

> +
> +   switch (c)
> + {
> + case '\a': escaped[new_i++] = 'a'; break;
> + case '\b': escaped[new_i++] = 'b'; break;
> + case '\f': escaped[new_i++] = 'f'; break;
> + case '\n': escaped[new_i++] = 'n'; break;
> + case '\r': escaped[new_i++] = 

Re: [Patch, Fortran, F03] PR 84409: check DTIO arguments for character len

2018-02-15 Thread Steve Kargl
On Thu, Feb 15, 2018 at 09:55:58PM +0100, Janus Weil wrote:
> 2018-02-15 21:16 GMT+01:00 Steve Kargl :
> >
> > If the above is a numbered constraint in the Standard,
> > can you add a comment of the form /* F03:C.  */
> > above the if-statement.
> 
> Well, the if statement that I'm adding is not covered by a single
> clause in the standard. Instead the DTIO interfaces are described as a
> whole in a chapter, whose number I'm now adding in the comment above
> the function:

Thanks.  The source code has become sufficiently complicated
that a small comment references the Standard simply aids (me)
in debugging/reading the code.

-- 
Steve


Re: [Patch, Fortran, F03] PR 84409: check DTIO arguments for character len

2018-02-15 Thread Janus Weil
2018-02-15 21:16 GMT+01:00 Steve Kargl :
> On Thu, Feb 15, 2018 at 09:03:55PM +0100, Janus Weil wrote:
>>
>> Regtests cleanly on x86_64-linux-gnu. Ok for trunk?
>>
>
> Looks good to me with a question below.

Thanks for the feedback, Steve.


>> Index: gcc/fortran/interface.c
>> ===
>> --- gcc/fortran/interface.c   (revision 257672)
>> +++ gcc/fortran/interface.c   (working copy)
>> @@ -4702,6 +4702,10 @@ check_dtio_arg_TKR_intent (gfc_symbol *fsym, bool
>>  gfc_error ("DTIO dummy argument at %L must be an "
>>  "ASSUMED SHAPE ARRAY", >declared_at);
>>
>> +  if (type == BT_CHARACTER && fsym->ts.u.cl->length != NULL)
>> +gfc_error ("DTIO character argument at %L must have assumed length",
>> +   >declared_at);
>> +
>
> If the above is a numbered constraint in the Standard,
> can you add a comment of the form /* F03:C.  */
> above the if-statement.

Well, the if statement that I'm adding is not covered by a single
clause in the standard. Instead the DTIO interfaces are described as a
whole in a chapter, whose number I'm now adding in the comment above
the function:


Index: gcc/fortran/interface.c
===
--- gcc/fortran/interface.c(revision 257672)
+++ gcc/fortran/interface.c(working copy)
@@ -4673,7 +4673,7 @@ gfc_check_typebound_override (gfc_symtree* proc, g

 /* The following three functions check that the formal arguments
of user defined derived type IO procedures are compliant with
-   the requirements of the standard.  */
+   the requirements of the standard, see F03:9.5.3.7.2 (F08:9.6.4.8.3).  */

 static void
 check_dtio_arg_TKR_intent (gfc_symbol *fsym, bool typebound, bt type,
@@ -4702,6 +4702,10 @@ check_dtio_arg_TKR_intent (gfc_symbol *fsym, bool
 gfc_error ("DTIO dummy argument at %L must be an "
"ASSUMED SHAPE ARRAY", >declared_at);

+  if (type == BT_CHARACTER && fsym->ts.u.cl->length != NULL)
+gfc_error ("DTIO character argument at %L must have assumed length",
+   >declared_at);
+
   if (fsym->attr.intent != intent)
 gfc_error ("DTIO dummy argument at %L must have INTENT %s",
>declared_at, gfc_code2string (intents, (int)intent));



Will commit this shortly.

Cheers,
Janus


2018-02-15 21:16 GMT+01:00 Steve Kargl :
> On Thu, Feb 15, 2018 at 09:03:55PM +0100, Janus Weil wrote:
>>
>> Regtests cleanly on x86_64-linux-gnu. Ok for trunk?
>>
>
> Looks good to me with a question below.
>
>> Index: gcc/fortran/interface.c
>> ===
>> --- gcc/fortran/interface.c   (revision 257672)
>> +++ gcc/fortran/interface.c   (working copy)
>> @@ -4702,6 +4702,10 @@ check_dtio_arg_TKR_intent (gfc_symbol *fsym, bool
>>  gfc_error ("DTIO dummy argument at %L must be an "
>>  "ASSUMED SHAPE ARRAY", >declared_at);
>>
>> +  if (type == BT_CHARACTER && fsym->ts.u.cl->length != NULL)
>> +gfc_error ("DTIO character argument at %L must have assumed length",
>> +   >declared_at);
>> +
>
> If the above is a numbered constraint in the Standard,
> can you add a comment of the form /* F03:C.  */
> above the if-statement.
>
> --
> Steve


Re: [RFC][AARCH64] Machine reorg pass for aarch64/Falkor to handle prefetcher tag collision

2018-02-15 Thread Kugan Vivekanandarajah
Hi,

On 14 February 2018 at 09:47, Kugan Vivekanandarajah
 wrote:
> Hi Kyrill,
>
> On 13 February 2018 at 20:47, Kyrill  Tkachov
>  wrote:
>> Hi Kugan,
>>
>> On 12/02/18 23:58, Kugan Vivekanandarajah wrote:
>>>
>>> Implements a machine reorg pass for aarch64/Falkor to handle
>>> prefetcher tag collision. This is strictly not part of the loop
>>> unroller but for Falkor, unrolling can make h/w prefetcher performing
>>> badly if there are too much tag collisions based on the discussions in
>>> https://gcc.gnu.org/ml/gcc/2017-10/msg00178.html.
>>>
>>
>> Could you expand a bit more on what transformation exactly this pass does?
>
> This is similar to what LLVM does in https://reviews.llvm.org/D35366.
>
> Falkor hardware prefetcher works well when signature of the prefetches
> (or tags as computed in the patch - similar to LLVM) are different for
> different memory streams. If different memory streams  have the same
> signature, it can result in bad performance. This machine reorg pass
> tries to change the signature of memory loads by changing the base
> register with a free register.
>
>> From my understanding the loads that use the same base
>> register and offset and have the same destination register
>> are considered part of the same stream by the hardware prefetcher, so for
>> example:
>> ldr x0, [x1, 16] (load1)
>> ... (set x1 to something else)
>> ldr x0, [x1, 16] (load2)
>>
>> will cause the prefetcher to think that both loads are part of the same
>> stream,
>> so this pass tries to rewrite the sequence into:
>> ldr x0, [x1, 16]
>> ... (set x1 to something else)
>> mov tmp, x1
>> ldr x0, [tmp, 16]
>>
>> Where the tag/signature is the combination of destination x0, base x1 and
>> offset 16.
>> Is this a fair description?
>
> This is precisely what is happening.
>
>>
>> I've got some comments on the patch itself
>>
>>> gcc/ChangeLog:
>>>
>>> 2018-02-12  Kugan Vivekanandarajah 
>>>
>>> * config/aarch64/aarch64.c (iv_p): New.
>>> (strided_load_p): Likwise.
>>> (make_tag): Likesie.
>>> (get_load_info): Likewise.
>>> (aarch64_reorg): Likewise.
>>> (TARGET_MACHINE_DEPENDENT_REORG): Implement new target hook.
>>
>>
>> New functions need function comments describing the arguments at least.
>> Functions like make_tag, get_load_info etc can get tricky to maintain
>> without
>> some documentation on what they are supposed to accept and return.
>
> I wil add the comments.
>
>>
>> I think the pass should be enabled at certain optimisation levels, say -O2?
>> I don't think it would be desirable at -Os since it creates extra moves that
>> increase code size.
>
> Ok, I will change this.
>
>>
>> That being said, I would recommend you implement this as an aarch64-specific
>> pass,
>> in a similar way to cortex-a57-fma-steering.c. That way you can register it
>> in
>> aarch64-passes.def and have flexibility as to when exactly the pass gets to
>> run
>> (i.e. you wouldn't be limited by when machine_reorg gets run).
>>
>> Also, I suggest you don't use the "if (aarch64_tune != falkor) return;" way
>> of
>> gating this pass. Do it in a similar way to the FMA steering pass that is,
>> define a new flag in aarch64-tuning-flags.def and use it in the tune_flags
>> field
>> of the falkor tuning struct.
>
> Ok, I will revise the patch.

Here is the revised patch.

Thanks,
Kugan

gcc/ChangeLog:

2018-02-15  Kugan Vivekanandarajah  

* config.gcc: Add falkor-tag-collision-avoidance.o to extra_objs for
aarch64-*-*.
* config/aarch64/aarch64-protos.h
(make_pass_tag_collision_avoidance): Declare.
* config/aarch64/aarch64-passes.def: Insert tag collision avoidance pass.
* config/aarch64/aarch64-tuning-flags.def
(AARCH64_EXTRA_TUNE_AVOID_PREFETCH_TAG_COLLISION): Define.
* config/aarch64/aarch64.c (qdf24xx_tunings): Add
AARCH64_EXTRA_TUNE_AVOID_PREFETCH_TAG_COLLISION.
* config/aarch64/falkor-tag-collision-avoidance.c: New file.
* config/aarch64/t-aarch64: Add falkor-tag-collision-avoidance.o.


>
>
> Thanks,
> Kugan
>
>>
>> Hope this helps,
>> Kyrill
diff --git a/gcc/config.gcc b/gcc/config.gcc
index eca156a..c3f3e1a 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -303,7 +303,7 @@ aarch64*-*-*)
extra_headers="arm_fp16.h arm_neon.h arm_acle.h"
c_target_objs="aarch64-c.o"
cxx_target_objs="aarch64-c.o"
-   extra_objs="aarch64-builtins.o aarch-common.o cortex-a57-fma-steering.o"
+   extra_objs="aarch64-builtins.o aarch-common.o cortex-a57-fma-steering.o 
falkor-tag-collision-avoidance.o"
target_gtfiles="\$(srcdir)/config/aarch64/aarch64-builtins.c"
target_has_targetm_common=yes
;;
diff --git a/gcc/config/aarch64/aarch64-passes.def 
b/gcc/config/aarch64/aarch64-passes.def
index 87747b4..d4b6a43 100644
--- a/gcc/config/aarch64/aarch64-passes.def
+++ b/gcc/config/aarch64/aarch64-passes.def
@@ -19,3 +19,4 @@

Re: update LTO test harness README

2018-02-15 Thread David Malcolm
On Thu, 2018-02-15 at 10:54 +0100, Richard Biener wrote:
> On Wed, 14 Feb 2018, Martin Sebor wrote:
> 
> > I was excited to find out about the recent enhancement to
> > the LTO test harness to support the new dg-lto-warning and
> > dg-lto-message directives (thanks, David).
> > 
> > To make them easier to find and use (there is a C++ LTO test
> > that uses them but no C tests yet) the attached patch updates
> > the README to document them.  While I was at it I made a few
> > minor cosmetic improvements to the README as well.
> > 
> > Let me know if I didn't get something quite right or if there
> > is something else that might be worth mentioning in the README.
> 
> Looks good to me.
> 
> Btw, do the new dg-lto-warning/message directives handle
> fat LTO objects correctly where diagnostics usually appear twice,
> once for the fat part generation and once for the ltrans part?

I didn't implement any logic for that, so I suspect not.

> I wonder if we can build upon those new directives to finally
> add scan-lto-tree-dump for dg-final as well... (the complication
> here is to get at the dump file name but if we know the link
> stage is executed we can provide a prefix to the driver IIRC).
> 
> Richard.


Re: [Patch, Fortran, F03] PR 84409: check DTIO arguments for character len

2018-02-15 Thread Steve Kargl
On Thu, Feb 15, 2018 at 09:03:55PM +0100, Janus Weil wrote:
> 
> Regtests cleanly on x86_64-linux-gnu. Ok for trunk?
> 

Looks good to me with a question below.

> Index: gcc/fortran/interface.c
> ===
> --- gcc/fortran/interface.c   (revision 257672)
> +++ gcc/fortran/interface.c   (working copy)
> @@ -4702,6 +4702,10 @@ check_dtio_arg_TKR_intent (gfc_symbol *fsym, bool
>  gfc_error ("DTIO dummy argument at %L must be an "
>  "ASSUMED SHAPE ARRAY", >declared_at);
>  
> +  if (type == BT_CHARACTER && fsym->ts.u.cl->length != NULL)
> +gfc_error ("DTIO character argument at %L must have assumed length",
> +   >declared_at);
> +

If the above is a numbered constraint in the Standard,
can you add a comment of the form /* F03:C.  */
above the if-statement.

-- 
Steve


[PATCHv2][PR target/81535] Fix tests on Power

2018-02-15 Thread Yury Gribov
Hi all,

This is an updated version of patch for PR 81535.  The patch was last
discussed in November.  The new version addresses Segher's comments in
https://gcc.gnu.org/ml/gcc-patches/2017-08/msg00558.html

The patch was tested on
  x86_64-pc-linux-gnu
  powerpc64-unknown-linux-gnu
  powerpc64le-unknown-linux-gnu
by running
  $ make check-gcc RUNTESTFLAGS='powerpc.exp=pr79439-*.c dg.exp=pr56727-*.c '
  $ make check-c RUNTESTFLAGS='powerpc.exp=pr79439-*.c
dg.exp=pr56727-*.c --target_board="unix/-m32"'
(I didn't perform full testing as I don't change anything in compiler).

FTR here's an excerpt from
https://gcc.gnu.org/ml/gcc-patches/2017-11/msg02231.html which
explains the reasoning behind PR 56727 (which introduced regression in
PR 81535):

>> Is it correct that current GCC does not do the call via the PLT?
>
> Well, it was decided in
> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=56727 that it would be a
> valid optimization because the only way to expose the difference would
> be through dlsym hackery.  Note that original PowerPC use-case
> (reported in https://sourceware.org/bugzilla/show_bug.cgi?id=21116)
> would benefit from this optimization as because PLT call + indirection
> would be replaced by normal PC-relative call.

-Y


pr81535-2.patch
Description: Binary data


[Patch, Fortran, F03] PR 84409: check DTIO arguments for character len

2018-02-15 Thread Janus Weil
Hi all,

attached is another simple patch for an accepts-invalid problem (this
time concerning DTIO), also curing an invalid test case.

Regtests cleanly on x86_64-linux-gnu. Ok for trunk?

Cheers,
Janus



2018-02-15  Janus Weil  

PR fortran/84409
* interface.c (check_dtio_arg_TKR_intent): Add a check for character
length.


2018-02-15  Janus Weil  

PR fortran/84409
* gfortran.dg/dtio_21.f03: Add an error message.
* gfortran.dg/dtio_22.f90: Fix invalid test case.
Index: gcc/fortran/interface.c
===
--- gcc/fortran/interface.c (revision 257672)
+++ gcc/fortran/interface.c (working copy)
@@ -4702,6 +4702,10 @@ check_dtio_arg_TKR_intent (gfc_symbol *fsym, bool
 gfc_error ("DTIO dummy argument at %L must be an "
   "ASSUMED SHAPE ARRAY", >declared_at);
 
+  if (type == BT_CHARACTER && fsym->ts.u.cl->length != NULL)
+gfc_error ("DTIO character argument at %L must have assumed length",
+   >declared_at);
+
   if (fsym->attr.intent != intent)
 gfc_error ("DTIO dummy argument at %L must have INTENT %s",
   >declared_at, gfc_code2string (intents, (int)intent));
Index: gcc/testsuite/gfortran.dg/dtio_21.f90
===
--- gcc/testsuite/gfortran.dg/dtio_21.f90   (revision 257672)
+++ gcc/testsuite/gfortran.dg/dtio_21.f90   (working copy)
@@ -19,10 +19,10 @@ program p
allocate(z2)
print *, z2
   contains
-   subroutine wf2(this, a, b, c, d, e)
+   subroutine wf2(this, a, b, c, d, e)  ! { dg-error "must have assumed 
length" }
   class(t2), intent(in) :: this
   integer, intent(in) :: a
-  character, intent(in) :: b
+  character(*), intent(in) :: b
   integer, intent(in) :: c(:)
   integer, intent(out) :: d
   character, intent(inout) :: e
Index: gcc/testsuite/gfortran.dg/dtio_22.f90
===
--- gcc/testsuite/gfortran.dg/dtio_22.f90   (revision 257672)
+++ gcc/testsuite/gfortran.dg/dtio_22.f90   (working copy)
@@ -15,10 +15,10 @@ contains
   subroutine wf(this, unit, b, c, iostat, iomsg)
 class(t), intent(in) :: this
 integer, intent(in) :: unit
-character, intent(in) :: b
+character(*), intent(in) :: b
 integer, intent(in) :: c(:)
 integer, intent(out) :: iostat
-character, intent(inout) :: iomsg
+character(*), intent(inout) :: iomsg
 write (unit, "(i3)", IOSTAT=iostat, IOMSG=iomsg) this%i
   end subroutine
 end


Re: [fortran, test case, committed]

2018-02-15 Thread Janus Weil
2018-02-15 20:53 GMT+01:00 Thomas Koenig :
> Hi Janus,
>
>> apparently your commit only includes the ChangeLog addition, but not
>> the new test case itself. I guess you missed an "svn add"? :)
>
>
> Actually, I messed up the ChangeLog entry in r257702 and corrected that
> one in r257703 :-)
>
> The test case is there.

Ah, ok. Only saw r257703 in the PR. Thanks for the clarification!


Cheers,
Janus


Re: [fortran, test case, committed]

2018-02-15 Thread Thomas Koenig

Hi Janus,


apparently your commit only includes the ChangeLog addition, but not
the new test case itself. I guess you missed an "svn add"? :)


Actually, I messed up the ChangeLog entry in r257702 and corrected that
one in r257703 :-)

The test case is there.

Regards

Thomas


C++ PATCH for c++/84376, ICE with missing template arguments

2018-02-15 Thread Jason Merrill
Here we are asked deduction_guide_p about a TEMPLATE_ID_EXPR; give the
right answer.

Tested x86_64-pc-linux-gnu, applying to trunk.
commit 6ef5c80e09542b55e5dc4e66db87c78adfc0a266
Author: Jason Merrill 
Date:   Thu Feb 15 13:37:58 2018 -0500

PR c++/84376 - ICE with omitted template arguments.

* pt.c (dguide_name_p): Check for IDENTIFIER_NODE.

diff --git a/gcc/cp/pt.c b/gcc/cp/pt.c
index cd1aed8d677..268cfe5a454 100644
--- a/gcc/cp/pt.c
+++ b/gcc/cp/pt.c
@@ -25454,7 +25454,8 @@ dguide_name (tree tmpl)
 bool
 dguide_name_p (tree name)
 {
-  return (TREE_TYPE (name)
+  return (TREE_CODE (name) == IDENTIFIER_NODE
+ && TREE_TYPE (name)
  && !strncmp (IDENTIFIER_POINTER (name), dguide_base,
   strlen (dguide_base)));
 }
diff --git a/gcc/testsuite/g++.dg/cpp1z/class-deduction47.C 
b/gcc/testsuite/g++.dg/cpp1z/class-deduction47.C
new file mode 100644
index 000..3e47f58e698
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp1z/class-deduction47.C
@@ -0,0 +1,7 @@
+// PR c++/84376
+
+template struct A {};
+
+template T foo() { return T(); }
+
+template<> A foo(); // { dg-error "A" }


Re: [fortran, test case, committed]

2018-02-15 Thread Janus Weil
Hi Thomas,

apparently your commit only includes the ChangeLog addition, but not
the new test case itself. I guess you missed an "svn add"? :)

Cheers,
Janus


2018-02-15 19:47 GMT+01:00 Thomas Koenig :
> Hi,
>
> I just committed the test case below as obvious, after testing.
> Just to make sure that, when we convert all test cases from
> CALL ABORT to "STOP 1" (or whatever), we actually get a failure
> when we have a regression.
>
> Regards
>
> Thomas
>
> 2018-02-15  Thomas Koenig  
>
> PR fortran/84381
> * gfortran.dg/stop_shouldfail.f90: New test.
>


Re: [PATCH, libgomp, testsuite] Move tests to libgomp.c-c++-common

2018-02-15 Thread Jason Merrill
OK.

On Fri, Dec 15, 2017 at 4:06 AM, Tom de Vries  wrote:
> [ was: Re: [PATCH, PR81844] Fix condition folding in c_parser_omp_for_loop ]
>
> On 09/14/2017 09:38 PM, Jakub Jelinek wrote:
>>
>> On Thu, Sep 14, 2017 at 07:34:14PM +, de Vries, Tom wrote:
>>
>>> --- a/libgomp/testsuite/libgomp.c++/c++.exp
>>> +++ b/libgomp/testsuite/libgomp.c++/c++.exp
>>> @@ -22,6 +22,11 @@ dg-init
>>>   # Turn on OpenMP.
>>>   lappend ALWAYS_CFLAGS "additional_flags=-fopenmp"
>>>   +# Switch into C++ mode.  Otherwise, the libgomp.c-c++-common/*.c
>>> +# files would be compiled as C files.
>>> +set SAVE_GCC_UNDER_TEST "$GCC_UNDER_TEST"
>>> +set GCC_UNDER_TEST "$GCC_UNDER_TEST -x c++"
>>> +
>>>   set blddir [lookfor_file [get_multilibs] libgomp]
>>> @@ -47,7 +52,9 @@ if { $blddir != "" } {
>>> if { $lang_test_file_found } {
>>>   # Gather a list of all tests.
>>> -set tests [lsort [find $srcdir/$subdir *.C]]
>>> +set tests [lsort [concat \
>>> + [find $srcdir/$subdir *.C] \
>>> + [find $srcdir/$subdir/../libgomp.c-c++-common
>>> *.c]]]
>>> if { $blddir != "" } {
>>>   set ld_library_path
>>> "$always_ld_library_path:${blddir}/${lang_library_path}"
>>
>>
>> I don't see SAVE_GCC_UNDER_TEST being used anywhere after it is set.
>> Did you mean to set GCC_UNDER_TEST back to SAVE_GCC_UNDER_TEST at the end
>> of
>> c++.exp?
>> libgomp.oacc-c++/c++.exp has:
>> # See above.
>> set GCC_UNDER_TEST "$SAVE_GCC_UNDER_TEST"
>>
>> Otherwise LGTM, thanks.
>
>
> Hi,
>
> now that we have libgomp.c-c++-common, this patch moves tests from libgomp.c
> to libgomp.c-c++-common and removes the corresponding libgomp.c++ tests that
> have been made redundant.
>
> Tested on x86_64, no issues found.
>
> OK for trunk?
>
> Thanks,
> - Tom
>
> [ For reference, the git commit output related to renaming:
>
>  rename {libgomp.c => libgomp.c-c++-common}/atomic-18.c (100%)
>  rename {libgomp.c => libgomp.c-c++-common}/cancel-taskgroup-2.c (100%)
>  rename {libgomp.c => libgomp.c-c++-common}/for-1.c (82%)
>  rename {libgomp.c => libgomp.c-c++-common}/for-1.h (100%)
>  rename {libgomp.c => libgomp.c-c++-common}/for-2.c (87%)
>  rename {libgomp.c => libgomp.c-c++-common}/for-2.h (100%)
>  rename {libgomp.c => libgomp.c-c++-common}/for-3.c (95%)
>  rename {libgomp.c => libgomp.c-c++-common}/for-4.c (84%)
>  rename {libgomp.c => libgomp.c-c++-common}/for-5.c (96%)
>  rename {libgomp.c => libgomp.c-c++-common}/for-6.c (95%)
>  rename {libgomp.c => libgomp.c-c++-common}/loop-13.c (100%)
>  rename {libgomp.c => libgomp.c-c++-common}/loop-14.c (100%)
>  rename {libgomp.c => libgomp.c-c++-common}/loop-15.c (100%)
>  rename {libgomp.c => libgomp.c-c++-common}/monotonic-1.c (100%)
>  rename {libgomp.c => libgomp.c-c++-common}/monotonic-2.c (100%)
>  rename {libgomp.c => libgomp.c-c++-common}/nonmonotonic-1.c (100%)
>  rename {libgomp.c => libgomp.c-c++-common}/nonmonotonic-2.c (100%)
>  rename {libgomp.c => libgomp.c-c++-common}/ordered-4.c (100%)
>  rename {libgomp.c => libgomp.c-c++-common}/pr45784.c (100%)
>  rename {libgomp.c => libgomp.c-c++-common}/pr64824.c (100%)
>  rename {libgomp.c => libgomp.c-c++-common}/pr64868.c (100%)
>  rename {libgomp.c => libgomp.c-c++-common}/pr66199-1.c (100%)
>  rename {libgomp.c => libgomp.c-c++-common}/pr66199-2.c (100%)
>  rename {libgomp.c => libgomp.c-c++-common}/pr66199-3.c (96%)
>  rename {libgomp.c => libgomp.c-c++-common}/pr66199-4.c (97%)
>  rename {libgomp.c => libgomp.c-c++-common}/pr66199-5.c (100%)
>  rename {libgomp.c => libgomp.c-c++-common}/pr66199-6.c (96%)
>  rename {libgomp.c => libgomp.c-c++-common}/pr66199-7.c (100%)
>  rename {libgomp.c => libgomp.c-c++-common}/pr66199-8.c (100%)
>  rename {libgomp.c => libgomp.c-c++-common}/pr66199-9.c (100%)
>  rename {libgomp.c => libgomp.c-c++-common}/pr69389.c (100%)
>  rename {libgomp.c => libgomp.c-c++-common}/simd-14.c (100%)
>  rename {libgomp.c => libgomp.c-c++-common}/simd-15.c (100%)
>  rename {libgomp.c => libgomp.c-c++-common}/simd-16.c (96%)
>  rename {libgomp.c => libgomp.c-c++-common}/simd-17.c (96%)
>  rename {libgomp.c => libgomp.c-c++-common}/target-1.c (100%)
>  rename {libgomp.c => libgomp.c-c++-common}/target-10.c (100%)
>  rename {libgomp.c => libgomp.c-c++-common}/target-13.c (100%)
>  rename {libgomp.c => libgomp.c-c++-common}/target-2.c (100%)
>  rename {libgomp.c => libgomp.c-c++-common}/taskgroup-1.c (100%)
>  rename {libgomp.c => libgomp.c-c++-common}/taskloop-1.c (89%)
>  rename {libgomp.c => libgomp.c-c++-common}/taskloop-2.c (97%)
>  rename {libgomp.c => libgomp.c-c++-common}/taskloop-3.c (95%)
>  rename {libgomp.c => libgomp.c-c++-common}/taskloop-4.c (100%)
>  rename {libgomp.c => libgomp.c-c++-common}/udr-1.c (100%)
> ]


Re: [C++] [PR84231] overload on cond_expr in template

2018-02-15 Thread Jason Merrill
On Thu, Feb 8, 2018 at 9:09 PM, Alexandre Oliva  wrote:
> + /* If it was supposed to be an rvalue but it's not, adjust
> +one of the operands so that any overload resolution
> +taking this COND_EXPR as an operand makes the correct
> +decisions.  See c++/84231.  */
> + TREE_OPERAND (min, 2) = build1_loc (loc, NON_LVALUE_EXPR,
> + TREE_TYPE (min),
> + TREE_OPERAND (min, 2));
> + EXPR_LOCATION_WRAPPER_P (TREE_OPERAND (min, 2)) = 1;

But that's not true, this isn't a location wrapper, it has semantic
effect.  And would be the first such use of NON_LVALUE_EXPR in a
template.

Since we're already using the type of the COND_EXPR to indicate a
glvalue, maybe lvalue_kind should say that within a template, a
COND_EXPR which got past the early check for reference type is a
prvalue.

Jason


[fortran, test case, committed]

2018-02-15 Thread Thomas Koenig

Hi,

I just committed the test case below as obvious, after testing.
Just to make sure that, when we convert all test cases from
CALL ABORT to "STOP 1" (or whatever), we actually get a failure
when we have a regression.

Regards

Thomas

2018-02-15  Thomas Koenig  

PR fortran/84381
* gfortran.dg/stop_shouldfail.f90: New test.

! { dg-do  run }
! { dg-shouldfail "STOP 1" }
program main
  stop 1
end program main


Re: [PATCH v6] aarch64: Add split-stack support

2018-02-15 Thread Adhemerval Zanella


On 13/02/2018 13:13, Szabolcs Nagy wrote:
> On 07/02/18 18:07, Adhemerval Zanella wrote:
>  5. The TCB support on GLIBC is meant to be included in version 2.28.
>>
> ...
>> +/* -fsplit-stack uses a TCB field available on glibc-2.27.  GLIBC also
>> +   exports symbol, __tcb_private_ss, to signal it has the field available
>> +   on TCB bloc.  This aims to prevent binaries linked against newer
>> +   GLIBC to run on non-supported ones.  */
> 
> 
> i suspect this needs to be updated since the glibc patch
> is not committed yet.
> 
> (i'll review the glibc patch, if it looks ok then it can
> be committed after the gcc side is accepted.)

I fixed the commit message locally, thanks for checking on this.

> 
>> +
>> +static bool
>> +aarch64_supports_split_stack (bool report ATTRIBUTE_UNUSED,
>> +  struct gcc_options *opts ATTRIBUTE_UNUSED)
>> +{
>> +#ifndef TARGET_GLIBC_MAJOR
>> +#define TARGET_GLIBC_MAJOR 0
>> +#endif
>> +#ifndef TARGET_GLIBC_MINOR
>> +#define TARGET_GLIBC_MINOR 0
>> +#endif
>> +  /* Note: Can't test DEFAULT_ABI here, it isn't set until later.  */
>> +  if (TARGET_GLIBC_MAJOR * 1000 + TARGET_GLIBC_MINOR >= 2026)
>> +    return true;
>> +
>> +  if (report)
>> +    error ("%<-fsplit-stack%> currently only supported on AArch64 GNU/Linux 
>> with glibc-2.27 or later");
>> +  return false;
>> +}
>> +
>> +#undef TARGET_SUPPORTS_SPLIT_STACK
>> +#define TARGET_SUPPORTS_SPLIT_STACK aarch64_supports_split_stack
>> +


C++ PATCH to update __cpp_deduction_guides

2018-02-15 Thread Jason Merrill
I implemented P0512 back in July, but forgot to update the value of
the feature-test macro.
commit 1d440f3731f7350426a019be7bbb636d1db47808
Author: Jason Merrill 
Date:   Thu Feb 15 13:11:52 2018 -0500

* c-cppbuiltin.c (c_cpp_builtins): Update __cpp_deduction_guides.

diff --git a/gcc/c-family/c-cppbuiltin.c b/gcc/c-family/c-cppbuiltin.c
index 0624c006278..3fc4fa9cd68 100644
--- a/gcc/c-family/c-cppbuiltin.c
+++ b/gcc/c-family/c-cppbuiltin.c
@@ -970,7 +970,7 @@ c_cpp_builtins (cpp_reader *pfile)
  cpp_define (pfile, "__cpp_capture_star_this=201603");
  cpp_define (pfile, "__cpp_inline_variables=201606");
  cpp_define (pfile, "__cpp_aggregate_bases=201603");
- cpp_define (pfile, "__cpp_deduction_guides=201606");
+ cpp_define (pfile, "__cpp_deduction_guides=201611");
  cpp_define (pfile, "__cpp_noexcept_function_type=201510");
  cpp_define (pfile, "__cpp_template_auto=201606");
  cpp_define (pfile, "__cpp_structured_bindings=201606");
diff --git a/gcc/testsuite/g++.dg/cpp1z/feat-cxx1z.C 
b/gcc/testsuite/g++.dg/cpp1z/feat-cxx1z.C
index b2f046b5c91..2ab1b4e3e86 100644
--- a/gcc/testsuite/g++.dg/cpp1z/feat-cxx1z.C
+++ b/gcc/testsuite/g++.dg/cpp1z/feat-cxx1z.C
@@ -358,8 +358,8 @@
 
 #ifndef __cpp_deduction_guides
 #  error "__cpp_deduction_guides"
-#elif __cpp_deduction_guides != 201606
-#  error "__cpp_deduction_guides != 201606"
+#elif __cpp_deduction_guides != 201611
+#  error "__cpp_deduction_guides != 201611"
 #endif
 
 #ifndef __cpp_if_constexpr


C++ PATCH for c++/84368, wrong error with local in variadic lambda

2018-02-15 Thread Jason Merrill
I started adding non-pack locals to PACK_EXPANSION_EXTRA_ARGS, but was
still only looking up specializations for the packs...

Tested x86_64-pc-linux-gnu, applying to trunk.
commit c61b45ad2d0886dc53dde7ba95382efa29288b2d
Author: Jason Merrill 
Date:   Thu Feb 15 12:56:47 2018 -0500

PR c++/84368 - wrong error with local variable in variadic lambda.

* pt.c (tsubst_pack_expansion): Fix handling of non-packs in
local_specializations.

diff --git a/gcc/cp/pt.c b/gcc/cp/pt.c
index 3ac7adba00c..cd1aed8d677 100644
--- a/gcc/cp/pt.c
+++ b/gcc/cp/pt.c
@@ -11521,8 +11521,9 @@ tsubst_pack_expansion (tree t, tree args, 
tsubst_flags_t complain,
 context.  */
  tree gen = TREE_PURPOSE (elt);
  tree inst = TREE_VALUE (elt);
- if (DECL_PACK_P (inst))
-   inst = retrieve_local_specialization (inst);
+ if (DECL_P (inst))
+   if (tree local = retrieve_local_specialization (inst))
+ inst = local;
  /* else inst is already a full instantiation of the pack.  */
  register_local_specialization (inst, gen);
}
diff --git a/gcc/testsuite/g++.dg/cpp1y/lambda-generic-variadic14.C 
b/gcc/testsuite/g++.dg/cpp1y/lambda-generic-variadic14.C
new file mode 100644
index 000..76567966293
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp1y/lambda-generic-variadic14.C
@@ -0,0 +1,17 @@
+// PR c++/84368
+// { dg-do compile { target c++14 } }
+
+template < typename ... T >
+void sink(T ...){}
+
+template < typename ... T >
+void foo(T ... v){
+[](auto ... v){
+auto bar = [](auto, auto){ return 0; };
+sink(bar(v, T{}) ...);
+}(v ...);
+}
+
+int main(){
+foo(0);
+}


Re: [testsuite, committed] Require et alloca for Wstringop-overflow-3.c

2018-02-15 Thread Martin Sebor

Thanks.  I've seen these fixes before but I still keep forgetting
to add the directive.  It would be nice if the test harness could
detect this automatically for us and remind us to add the directive
even on targets where alloca is supported.

Some sort of a script (or even DejaGnu itself) would do it for
explicit calls to alloca but I can't think of a solution for
VLAs.  Do you have any idea if that might be doable and how?

Martin

On 02/15/2018 07:10 AM, Tom de Vries wrote:

Hi,

this patch requires effective target alloca for test-case
Wstringop-overflow-3.c.

Committed.

Thanks,
- Tom




Re: [C++ Patch] PR 84330 ("[6/7/8 Regression] [concepts] ICE with broken constraint")

2018-02-15 Thread Jason Merrill
OK.

On Thu, Feb 15, 2018 at 9:29 AM, Paolo Carlini  wrote:
> Hi,
>
> we have been accumulating quite a few bugs under the [concepts] meta-bug,
> most of which of course aren't regressions. This one is a low hanging fruit,
> an error recovery issue where, after some meaningful diagnostic,
> tsubst_constraint doesn't know how to handle an error_mark_node. I believe
> that ideally we should do better, we should be able to issue only the first
> error - I think a TODO in diagnose_constraint hints at that too - but the
> below should do for now (well, in 6.1.0 we issued *3* errors ;-)
>
> Thanks, Paolo.
>
> 
>


Re: [PATCH] correct -Wrestrict handling of arrays of arrays (PR 84095)

2018-02-15 Thread Martin Sebor

On 02/13/2018 11:14 PM, Jeff Law wrote:

On 02/01/2018 04:45 PM, Martin Sebor wrote:

The previous patch didn't resolve all the false positives
in the Linux kernel.  The attached is an update that fixes
the remaining one having to do with multidimensional array
members:

  struct S { char a[2][4]; };

  void f (struct S *p, int i)
  {
strcpy (p->a[0], "012");
strcpy (p->a[i] + 1, p->a[0]);   // false positive here
  }

In the process of fixing this I also made a couple of minor
restructuring changes to the builtin_memref constructor to
in order to make the code easier to follow: I broke it out
into a couple of helper functions and called those.

As with the first revision of the patch, this one is also
meant to be applied on top of

  https://gcc.gnu.org/ml/gcc-patches/2018-01/msg01488.html

Sorry about the late churn.  Even though I tested the original
implementation with the Linux kernel the bugs were only exposed
non-default configurations that I didn't build.

Jakub, you had concerns about the code in the constructor
and about interpreting the offsets in the diagnostics.
I tried to address those in the patch.  Please review
the changes and let me know if you have any further comments.

Thanks
Martin

On 01/30/2018 04:19 PM, Martin Sebor wrote:

Testing GCC 8 with recent Linux kernel sources has uncovered
a bug in the handling of arrays of arrays by the -Wrestrict
checker where it fails to take references to different array
elements into consideration, issuing false positives.

The attached patch corrects this mistake.

In addition, to make warnings involving excessive offset bounds
more meaningful (less confusing), I've made a cosmetic change
to constrain them to the bounds of the accessed object.  I've
done this in response to multiple comments indicating that
the warnings are hard to interpret.  This change is meant to
be applied on top of the patch for bug 83698 (submitted mainly
to improve the readability of the offsets):

  https://gcc.gnu.org/ml/gcc-patches/2018-01/msg01488.html

Martin



gcc-84095.diff


PR middle-end/84095 - false-positive -Wrestrict warnings for memcpy within array

gcc/ChangeLog:

PR middle-end/84095
* gimple-ssa-warn-restrict.c (builtin_memref::extend_offset_range): New.
(builtin_memref::set_base_and_offset): Same.  Handle inner references.
(builtin_memref::builtin_memref): Factor out parts into
set_base_and_offset and call it.

gcc/testsuite/ChangeLog:

PR middle-end/84095
* c-c++-common/Warray-bounds-3.c: Adjust text of expected warnings.
* c-c++-common/Wrestrict.c: Same.
* gcc.dg/Wrestrict-6.c: Same.
* gcc.dg/Warray-bounds-27.c: New test.
* gcc.dg/Wrestrict-8.c: New test.
* gcc.dg/Wrestrict-9.c: New test.
* gcc.dg/pr84095.c: New test.

diff --git a/gcc/gimple-ssa-warn-restrict.c b/gcc/gimple-ssa-warn-restrict.c
index 528eb5b..367e05f 100644
--- a/gcc/gimple-ssa-warn-restrict.c
+++ b/gcc/gimple-ssa-warn-restrict.c



+  else if (gimple_nop_p (stmt))
+   expr = SSA_NAME_VAR (expr);
+  else
+   {
+ base = expr;
+ return;
}

This looks odd.  Can you explain what you're trying to do here?

I'm not offhand why you'd ever want to extract SSA_NAME_VAR.  In general
it's primary use is for dumps and debugging info.  I won't quite go so
far as to say using it for anything else is wrong, but it's certainly
something you ought to explain.


It appears to be dead code.  Nothing in the GCC test suite hits
this code.  It's most likely a vestige of an approach I tried
that didn't work and that I ended up doing differently and forgot
to remove.  I'll remove it before committing.


The rest looks fairly reasonable.  It's a bit hard to follow, but I
don't think we should do another round of refactoring at this stage.


Is the patch good to commit then with the unused code above
removed?

Martin



Re: [PATCH] __VA_OPT__ fixes (PR preprocessor/83063, PR preprocessor/83708)

2018-02-15 Thread Jason Merrill
On Thu, Feb 15, 2018 at 4:27 AM, Jakub Jelinek  wrote:
> On Thu, Feb 15, 2018 at 01:12:08AM -0500, Jason Merrill wrote:
>> > This is just a partial fix, one thing this patch doesn't change is that
>> > the standard says that __VA_OPT__ ( contents ) should be treated as
>> > parameter, which means that #__VA_OPT__ ( contents ) should stringify it,
>> > which we right now reject.  My preprocessor knowledge is too limited to
>> > handle this right myself, including all the corner cases, e.g. one can have
>> > #define f(x, ...) #__VA_OPT__(#x x ## x) etc..  I presume
>> > m_flags = token->flags & (PREV_FALLTHROUGH | PREV_WHITE);
>> > could be changed into:
>> > m_flags = token->flags & (PREV_FALLTHROUGH | PREV_WHITE | STRINGIFY_ARG);
>> > and when handling the PADDING result from update, we could just emit a
>> > "" token, but for INCLUDE_FIRST with this we'd need something complex,
>> > probably a new routine similar to stringify_arg to some extent.
>>
>> Yes, I think long term we really need to treat __VA_OPT__ more like an
>> argument.
>>
>> The first patch below makes your testcases work in what seems to me a
>> simpler way: pad when we see __VA_OPT__ if we aren't pasting to the left,
>> and fix up the end of the body if we're pasting to the right.
>>
>> The second further patch below makes the rest of the clang testcase work the
>> way it does in clang, apart from stringification.  But it feels more
>> kludgey.
>>
>> Thoughts?
>
> Both patches LGTM, thanks for looking at this.  If you apply the second patch,
> you might want to apply also following incremental patch with some additional
> tests from my (failed) attempt to extend the patch further (this passes with
> your second patch).

Great, thanks.  I kept poking at it this morning; instead of checking
the last emitted token for PASTE_LEFT, this version checks whether
it's at the beginning of __VA_OPT__, which seems safer.

Tested x86_64-pc-linux-gnu, applying to trunk.
commit 4a0e6c48c397f4cd6bb654c770f4d8d97de015cc
Author: Jason Merrill 
Date:   Wed Feb 14 13:59:22 2018 -0500

PR preprocessor/83063 - __VA_OPT__ and ##

PR preprocessor/83708
* macro.c (vaopt_state): Reorder m_last_was_paste before m_state.
(vaopt_state::vaopt_state): Adjust.
(vaopt_state::update_flags): Add BEGIN and END.
(vaopt_state::update): Return them.
(copy_paste_flag): Factor out of replace_args.
(last_token_is): New.
(replace_args): Handle BEGIN and END.  Avoid padding there.
(tokens_buff_last_token_ptr): Return NULL if no tokens.

diff --git a/libcpp/macro.c b/libcpp/macro.c
index f994ac584cc..776af7bd00e 100644
--- a/libcpp/macro.c
+++ b/libcpp/macro.c
@@ -105,8 +105,8 @@ class vaopt_state {
 : m_pfile (pfile),
 m_allowed (any_args),
 m_variadic (is_variadic),
-m_state (0),
 m_last_was_paste (false),
+m_state (0),
 m_paste_location (0),
 m_location (0)
   {
@@ -116,7 +116,9 @@ class vaopt_state {
   {
 ERROR,
 DROP,
-INCLUDE
+INCLUDE,
+BEGIN,
+END
   };
 
   /* Given a token, update the state of this tracker and return a
@@ -139,7 +141,7 @@ class vaopt_state {
  }
++m_state;
m_location = token->src_loc;
-   return DROP;
+   return BEGIN;
   }
 else if (m_state == 1)
   {
@@ -191,7 +193,7 @@ class vaopt_state {
return ERROR;
  }
 
-   return DROP;
+   return END;
  }
  }
return m_allowed ? INCLUDE : DROP;
@@ -220,6 +222,9 @@ class vaopt_state {
   bool m_allowed;
   /* True if the macro is variadic.  */
   bool m_variadic;
+  /* If true, the previous token was ##.  This is used to detect when
+ a paste occurs at the end of the sequence.  */
+  bool m_last_was_paste;
 
   /* The state variable:
  0 means not parsing
@@ -228,9 +233,6 @@ class vaopt_state {
  >= 3 means looking for ")", the number encodes the paren depth.  */
   int m_state;
 
-  /* If true, the previous token was ##.  This is used to detect when
- a paste occurs at the end of the sequence.  */
-  bool m_last_was_paste;
   /* The location of the paste token.  */
   source_location m_paste_location;
 
@@ -1701,6 +1703,30 @@ expanded_token_index (cpp_reader *pfile, cpp_macro 
*macro,
   return cur_replacement_token - macro->exp.tokens;
 }
 
+/* Copy whether PASTE_LEFT is set from SRC to *PASTE_FLAG.  */
+
+static void
+copy_paste_flag (cpp_reader *pfile, const cpp_token **paste_flag,
+const cpp_token *src)
+{
+  cpp_token *token = _cpp_temp_token (pfile);
+  token->type = (*paste_flag)->type;
+  token->val = (*paste_flag)->val;
+  if (src->flags & PASTE_LEFT)
+token->flags = (*paste_flag)->flags | PASTE_LEFT;
+  else
+token->flags = (*paste_flag)->flags & ~PASTE_LEFT;
+  *paste_flag = token;
+}
+
+/* True IFF the last token emitted 

RE: PR84239, Reimplement CET intrinsics for rdssp/incssp insn

2018-02-15 Thread Tsimbalist, Igor V


Igor


> -Original Message-
> From: Joseph Myers [mailto:jos...@codesourcery.com]
> Sent: Thursday, February 15, 2018 1:24 AM
> To: Tsimbalist, Igor V 
> Cc: Sandra Loosemore ; gcc-
> patc...@gcc.gnu.org; Uros Bizjak 
> Subject: RE: PR84239, Reimplement CET intrinsics for rdssp/incssp insn
> 
> This patch has broken bootstrap of a cross toolchain for x86_64 (the case
> where inhibit_libc is defined because there is no libc for the target
> available at that stage in the bootstrap process).
> 
> In file included from
> /scratch/jmyers/glibc-bot/build/compilers/x86_64-linux-gnu/gcc-
> first/gcc/include/xmmintrin.h:34,
>  from
> /scratch/jmyers/glibc-bot/build/compilers/x86_64-linux-gnu/gcc-
> first/gcc/include/x86intrin.h:33,
>  from
> /scratch/jmyers/glibc-bot/src/gcc/libgcc/config/i386/shadow-stack-
> unwind.h:25,
>  from ./md-unwind-support.h:27,
>  from 
> /scratch/jmyers/glibc-bot/src/gcc/libgcc/unwind-dw2.c:411:
> ../../.././gcc/mm_malloc.h:27:10: fatal error: stdlib.h: No such file or 
> directory
>  #include 
>   ^~
> 
> https://sourceware.org/ml/libc-testresults/2018-q1/msg00307.html
> 
> The patch makes shadow-stack-unwind.h include , which ends
> up
> including , which includes  and 
> unconditionally.  You can't include any libc system headers
> unconditionally from libgcc (only when inhibit_libc is not defined - and
> , being an installed header, can't test inhibit_libc because
> it's in the user's namespace).  So I think you need to avoid the
> mm_malloc.h include here somehow (without adding any inhibit_libc
> conditionals to installed headers).

Here is a proposed patch

diff --git a/libgcc/config/i386/shadow-stack-unwind.h 
b/libgcc/config/i386/shadow-stack-unwind.h
index 416e061..b7c3d98 100644
--- a/libgcc/config/i386/shadow-stack-unwind.h
+++ b/libgcc/config/i386/shadow-stack-unwind.h
@@ -22,7 +22,14 @@ a copy of the GCC Runtime Library Exception along with this 
program;
 see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 .  */

-#include 
+/* NB: We need _get_ssp and _inc_ssp from .  But we can't
+   include  which ends up including , which
+   includes  and  unconditionally.  But we can't
+   include any libc system headers unconditionally from libgcc.  Avoid
+   including  here by defining _IMMINTRIN_H_INCLUDED.  */
+#define _IMMINTRIN_H_INCLUDED
+#include 
+#undef _IMMINTRIN_H_INCLUDED

 /* Unwind the shadow stack for EH.  */
 #undef _Unwind_Frames_Extra

I haven't managed to run it through 
./glibc/glibc.sourceware/scripts/build-many-glibcs.py. I did bootstrap and CET 
tests.

Ok for trunk?

Igor

> --
> Joseph S. Myers
> jos...@codesourcery.com


Re: [SFN+LVU+IEPM v4 9/9] [IEPM] Introduce inline entry point markers

2018-02-15 Thread Szabolcs Nagy

On 13/02/18 13:43, Alexandre Oliva wrote:

On Feb 12, 2018, Alexandre Oliva  wrote:


This patch supersedes the previous one.  Testing underway...  Ok if it
succeeds?


I failed to update the patch I posted after making a correct to symbol
poisoning, that had caused builds to fail right away, sorry.  Thanks,
Rainer, for catching the error.

Here's the patch that actually passed regstrap on native i686 and
x86_64-linux-gnu, and fixed numerous regressions on cross builds.
Ok to install?


[LVU, IEPM] several new controlling options

Given that the minimum insn length is not generally reliable to tell
whether an insn actually advances PC, this patch disables the locview
list optimizations that can only be done when can tell it.

The preexisting logic is retained, however, and can be enabled with
the newly-introduced -ginternal-reset-location-view.  This is now
enabled by default only if the target defines a hook that may override
or defer to the preexisting logic.  The negated command line option
can then be used should errors still be encountered.


We also introduce options to control whether to assume .loc and view
support in the assembler, and to control whether to output inline
entry points (and views) from markers.



i see assembler slow downs with these location view patches
i opened https://gcc.gnu.org/bugzilla/show_bug.cgi?id=84408


Re: RFA: PATCH to build_type_attribute_qual_variant for c++/84314, ICE with fastcall

2018-02-15 Thread Richard Biener
On Wed, Feb 14, 2018 at 6:10 PM, Jason Merrill  wrote:
> This testcase involves a fastcall-qualified function type.  During
> mangling, we use build_type_attribute_qual_variant to look up an
> attribute-unqualified version of that type.
> build_type_attribute_qual_variant calls type_hash_canon and finds the
> original unqualified type, but then clobbers its TYPE_CANONICAL
> because it's incompatible with the fastcall-qualified type.
>
> Fixed by leaving TYPE_CANONICAL of a previously existing type alone.
>
> Tested x86_64-pc-linux-gnu.  OK for trunk?

Ok.


[C++ Patch] PR 84330 ("[6/7/8 Regression] [concepts] ICE with broken constraint")

2018-02-15 Thread Paolo Carlini

Hi,

we have been accumulating quite a few bugs under the [concepts] 
meta-bug, most of which of course aren't regressions. This one is a low 
hanging fruit, an error recovery issue where, after some meaningful 
diagnostic, tsubst_constraint doesn't know how to handle an 
error_mark_node. I believe that ideally we should do better, we should 
be able to issue only the first error - I think a TODO in 
diagnose_constraint hints at that too - but the below should do for now 
(well, in 6.1.0 we issued *3* errors ;-)


Thanks, Paolo.



/cp
2018-02-15  Paolo Carlini  

PR c++/84330
* constraint.cc (tsubst_constraint_info): Handle an error_mark_node
as first argument.

/testsuite
2018-02-15  Paolo Carlini  

PR c++/84330
* g++.dg/concepts/pr84330.C: New.
Index: cp/constraint.cc
===
--- cp/constraint.cc(revision 257682)
+++ cp/constraint.cc(working copy)
@@ -1918,7 +1918,7 @@ tsubst_constraint_info (tree t, tree args,
 tree
 tsubst_constraint (tree t, tree args, tsubst_flags_t complain, tree in_decl)
 {
-  if (t == NULL_TREE)
+  if (t == NULL_TREE || t == error_mark_node)
 return t;
   switch (TREE_CODE (t))
   {
Index: testsuite/g++.dg/concepts/pr84330.C
===
--- testsuite/g++.dg/concepts/pr84330.C (nonexistent)
+++ testsuite/g++.dg/concepts/pr84330.C (working copy)
@@ -0,0 +1,12 @@
+// PR c++/84330
+// { dg-options "-fconcepts" }
+
+struct A
+{
+  template requires sizeof(T) >> 0 void foo(T);  // { dg-error 
"predicate constraint" }
+
+  void bar()
+  {
+foo(0);  // { dg-error "no matching" }
+  }
+};


Re: [PING][PATCH v3] Disable reg offset in quad-word store for Falkor

2018-02-15 Thread Wilco Dijkstra
Hi Siddhesh,

I still don't like the idea of disabling a whole class of instructions in the 
md file.
It seems much better to adjust the costs here so that you get most of the
improvement now, and fine tune it once we can differentiate between
loads and stores.

Taking your example, adding -funroll-loops generates this for Falkor:

ldr q7, [x2, x18]
add x5, x18, 16
add x4, x1, x18
add x10, x18, 32
add x11, x1, x5
add x3, x18, 48
add x12, x1, x10
add x9, x18, 64
add x14, x1, x3
add x8, x18, 80
add x15, x1, x9
add x7, x18, 96
add x16, x1, x8
str q7, [x4]
ldr q16, [x2, x5]
add x6, x18, 112
add x17, x1, x7
add x18, x18, 128
add x5, x1, x6
cmp x18, x13
str q16, [x11]
ldr q17, [x2, x10]
str q17, [x12]
ldr q18, [x2, x3]
str q18, [x14]
ldr q19, [x2, x9]
str q19, [x15]
ldr q20, [x2, x8]
str q20, [x16]
ldr q21, [x2, x7]
str q21, [x17]
ldr q22, [x2, x6]
str q22, [x5]
bne .L25

If you adjust costs however you'd get this:

.L25:
ldr q7, [x14]
add x14, x14, 128
add x4, x4, 128
str q7, [x4, -128]
ldr q16, [x14, -112]
str q16, [x4, -112]
ldr q17, [x14, -96]
str q17, [x4, -96]
ldr q18, [x14, -80]
str q18, [x4, -80]
ldr q19, [x14, -64]
str q19, [x4, -64]
ldr q20, [x14, -48]
str q20, [x4, -48]
ldr q21, [x14, -32]
str q21, [x4, -32]
ldr q22, [x14, -16]
cmp x14, x9
str q22, [x4, -16]
bne .L25

So it seems to me using existing cost mechanisms is always preferable, even if 
you
currently can't differentiate between loads and stores.

Wilco

[testsuite, committed] Require et alloca for Wstringop-overflow-3.c

2018-02-15 Thread Tom de Vries

Hi,

this patch requires effective target alloca for test-case 
Wstringop-overflow-3.c.


Committed.

Thanks,
- Tom
[testsuite] Require et alloca for Wstringop-overflow-3.c

2018-02-15  Tom de Vries  

	* gcc.dg/Wstringop-overflow-3.c: Require effective target alloca.

---
 gcc/testsuite/gcc.dg/Wstringop-overflow-3.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/gcc/testsuite/gcc.dg/Wstringop-overflow-3.c b/gcc/testsuite/gcc.dg/Wstringop-overflow-3.c
index 5901844..6c8cbf3 100644
--- a/gcc/testsuite/gcc.dg/Wstringop-overflow-3.c
+++ b/gcc/testsuite/gcc.dg/Wstringop-overflow-3.c
@@ -1,6 +1,7 @@
 /* PR tree-optimization/84238 */
 /* { dg-do compile } */
 /* { dg-options "-O2" } */
+/* { dg-require-effective-target alloca } */
 
 char a[1];
 int b;


[testsuite, committed] Require et indirect_jumps for pr84136.c

2018-02-15 Thread Tom de Vries

Hi,

this patch requires effective target indirect_jumps for test-case pr84136.c.

Committed.

Thanks,
- Tom
[testsuite] Require et indirect_jumps for pr84136.c

2018-02-15  Tom de Vries  

	* gcc.c-torture/compile/pr84136.c: Require effective target
	indirect_jumps.

---
 gcc/testsuite/gcc.c-torture/compile/pr84136.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/gcc/testsuite/gcc.c-torture/compile/pr84136.c b/gcc/testsuite/gcc.c-torture/compile/pr84136.c
index 0a70e4e..aa6afb8 100644
--- a/gcc/testsuite/gcc.c-torture/compile/pr84136.c
+++ b/gcc/testsuite/gcc.c-torture/compile/pr84136.c
@@ -1,3 +1,5 @@
+/* { dg-require-effective-target indirect_jumps } */
+
 void* a;
 
 void foo() {


Re: [PATCH] RX TARGET_RTX_COSTS function

2018-02-15 Thread Oleg Endo
On Wed, 2018-02-14 at 01:06 +0900, Oleg Endo wrote:
> 
> Do you happen to have any other numbers on the resulting code
> size/speed?  Looking at the new costs that the patch introduces, I'd
> expect there to be some more changes than just the 1/x...
> 

I've checked your proposed patch with the CSiBE set for code size
changes.

With your patch, the code size of the whole set:
sum:  2806044 -> 2801346-4698 / -0.167424 %


Taking out this piece

    case IF_THEN_ELSE:
  *total = COSTS_N_INSNS (3);
  return true;

from the rx_rtx_costs results in:
sum:  2806044 -> 2801099-4945 / -0.176227 %


Taking out another piece 

      if (GET_CODE (XEXP (x, 0)) == MEM
 || GET_CODE (XEXP (x, 1)) == MEM)
   *total = COSTS_N_INSNS (3);
  else

results in:
sum:  2806044 -> 2800315-5729 / -0.204166 %

So I'd like to propose the attached patch instead, as it eliminates 1
KByte of code more from the whole set.

Just in case, I'm testing it now with
  "make -k check" on rx-sim for c and c++

OK for trunk if it passes?

Cheers,
Oleg

gcc/ChangeLog:
* config/rx/rx.c (rx_rtx_costs): New function.
(TARGET_RTX_COSTS): Override to use rx_rtx_costs.Index: gcc/config/rx/rx.c
===
--- gcc/config/rx/rx.c	(revision 257655)
+++ gcc/config/rx/rx.c	(working copy)
@@ -2976,6 +2976,62 @@
 }
 
 static bool
+rx_rtx_costs (rtx x, machine_mode mode, int outer_code ATTRIBUTE_UNUSED,
+	  int opno ATTRIBUTE_UNUSED, int* total, bool speed)
+{
+  if (x == const0_rtx)
+{
+  *total = 0;
+  return true;
+}
+
+  switch (GET_CODE (x))
+{
+case MULT:
+  if (mode == DImode)
+	{
+	  *total = COSTS_N_INSNS (2);
+	  return true;
+	}
+  /* fall through */
+
+case PLUS:
+case MINUS:
+case AND:
+case COMPARE:
+case IOR:
+case XOR:
+  *total = COSTS_N_INSNS (1);
+  return true;
+
+case DIV:
+  if (speed)
+	/* This is the worst case for a division.  Pessimize divisions when
+	   not optimizing for size and allow reciprocal optimizations which
+	   produce bigger code.  */
+	*total = COSTS_N_INSNS (20);
+  else
+	*total = COSTS_N_INSNS (3);
+  return true;
+
+case UDIV:
+  if (speed)
+	/* This is the worst case for a division.  Pessimize divisions when
+	   not optimizing for size and allow reciprocal optimizations which
+	   produce bigger code.  */
+	*total = COSTS_N_INSNS (18);
+  else
+	*total = COSTS_N_INSNS (3);
+  return true;
+
+default:
+  break;
+}
+
+  return false;
+}
+
+static bool
 rx_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
 {
   /* We can always eliminate to the frame pointer.
@@ -3709,6 +3765,9 @@
 #undef  TARGET_MODES_TIEABLE_P
 #define TARGET_MODES_TIEABLE_P			rx_modes_tieable_p
 
+#undef  TARGET_RTX_COSTS
+#define TARGET_RTX_COSTS rx_rtx_costs
+
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 #include "gt-rx.h"


Re: [Patch][aarch64][PR target/83335] Fix regression, ICE on gcc.target/aarch64/asm-2.c

2018-02-15 Thread Richard Earnshaw (lists)
On 05/01/18 22:14, Steve Ellcey wrote:
> This is a fix for PR target/83335.  We are asserting in
> aarch64_print_address_internal because we have a non Pmode
> address coming from an asm instruction.  My fix is to 
> just allow this by checking this_is_asm_operands.  This is
> what it was doing before the assert was added that caused
> the ICE.
> 
> Verified that it fixed gcc.target/aarch64/asm-2.c in ILP32
> mode and that it caused no regressions.
> 
> Steve Ellcey
> sell...@cavium.com
> 
> 
> 2018-01-05  Steve Ellcey  
> 
>   PR target/83335
>   * config/aarch64/aarch64.c (aarch64_print_address_internal):
>   Allow non Pmode address in asm statements.
> 
> 
> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
> index a189605..af74212 100644
> --- a/gcc/config/aarch64/aarch64.c
> +++ b/gcc/config/aarch64/aarch64.c
> @@ -5670,8 +5670,9 @@ aarch64_print_address_internal (FILE *f, machine_mode 
> mode, rtx x,
>  {
>    struct aarch64_address_info addr;
>  
> -  /* Check all addresses are Pmode - including ILP32.  */
> -  gcc_assert (GET_MODE (x) == Pmode);
> +  /* Check all addresses are Pmode - including ILP32,
> + unless this is coming from an asm statement.  */
> +  gcc_assert (GET_MODE (x) == Pmode || this_is_asm_operands);
>  
>    if (aarch64_classify_address (, x, mode, true, type))
>  switch (addr.type)
> 

Wouldn't it be better to call output_operand_lossage() with a suitable
diagnostic message?  If the operand isn't in Pmode assembly will
(should) fail anyway.

R.


Re: [PATCH] __VA_OPT__ fixes (PR preprocessor/83063, PR preprocessor/83708)

2018-02-15 Thread Jakub Jelinek
On Thu, Feb 15, 2018 at 01:12:08AM -0500, Jason Merrill wrote:
> > This is just a partial fix, one thing this patch doesn't change is that
> > the standard says that __VA_OPT__ ( contents ) should be treated as
> > parameter, which means that #__VA_OPT__ ( contents ) should stringify it,
> > which we right now reject.  My preprocessor knowledge is too limited to
> > handle this right myself, including all the corner cases, e.g. one can have
> > #define f(x, ...) #__VA_OPT__(#x x ## x) etc..  I presume
> > m_flags = token->flags & (PREV_FALLTHROUGH | PREV_WHITE);
> > could be changed into:
> > m_flags = token->flags & (PREV_FALLTHROUGH | PREV_WHITE | STRINGIFY_ARG);
> > and when handling the PADDING result from update, we could just emit a
> > "" token, but for INCLUDE_FIRST with this we'd need something complex,
> > probably a new routine similar to stringify_arg to some extent.
> 
> Yes, I think long term we really need to treat __VA_OPT__ more like an
> argument.
> 
> The first patch below makes your testcases work in what seems to me a
> simpler way: pad when we see __VA_OPT__ if we aren't pasting to the left,
> and fix up the end of the body if we're pasting to the right.
> 
> The second further patch below makes the rest of the clang testcase work the
> way it does in clang, apart from stringification.  But it feels more
> kludgey.
> 
> Thoughts?

Both patches LGTM, thanks for looking at this.  If you apply the second patch,
you might want to apply also following incremental patch with some additional
tests from my (failed) attempt to extend the patch further (this passes with
your second patch).

--- gcc/testsuite/c-c++-common/cpp/va-opt-3.c   2018-01-09 17:20:14.985142201 
+0100
+++ gcc/testsuite/c-c++-common/cpp/va-opt-3.c   2018-01-09 17:54:17.564372639 
+0100
@@ -19,6 +19,15 @@
 #define f13(...) __VA_OPT__(a)__VA_OPT__(b)c
 #define f14(a, b, c, ...) __VA_OPT__(a)__VA_OPT__(b)c
 #define f15(a, b, c, ...) __VA_OPT__(a b)__VA_OPT__(b c)a/**/__VA_OPT__(c a)a
+#define m1 (
+#define f16() f17 m1 )
+#define f17() f18 m1 )
+#define f18() m2 m1 )
+#define m3f17() g
+#define f19(x, ...) m3 ## __VA_OPT__(x x f16() #x)
+#define f20(x, ...) __VA_OPT__(x x)##m4()
+#define f21() f17
+#define f17m4() h
 t1 f1 (1, 2, 3);
 /* { dg-final { scan-file va-opt-3.i "t1 bc;" } } */
 t2 f1 ();
@@ -69,0 +79,4 @@
+t25 f19 (f16 (), 1);
+/* { dg-final { scan-file va-opt-3.i "t25 g f18 \\( \\) f17 \\( \\) \"f16 
\\(\\)\";" } } */
+t26 f20 (f21 (), 2);
+/* { dg-final { scan-file va-opt-3.i "t26 f17 h;" } } */


Jakub


Re: [PATCH] Handle PowerPC64 ELFv1 function descriptors in libbacktrace (PR other/82368)

2018-02-15 Thread Szabolcs Nagy

On 14/02/18 11:41, Jakub Jelinek wrote:

Hi!

As mentioned in detail in the PR, PowerPC64 ELFv1 function symbols
point to function descriptors in .opd section rather than actual
code, and one needs to read the code address from the .opd section
in order to associate symbols with .text addresses.

Fixed thusly, bootstrapped/regtested on powerpc64-linux (-m32/-m64
testing) and powerpc64le-linux, ok for trunk?

2018-02-14  Jakub Jelinek  

PR other/82368
* elf.c (EM_PPC64, EF_PPC64_ABI): Undefine and define.
(struct elf_ppc64_opd_data): New type.
(elf_initialize_syminfo): Add opd argument, handle symbols
pointing into the PowerPC64 ELFv1 .opd section.
(elf_add): Read .opd section on PowerPC64 ELFv1, pass pointer
to structure with .opd data to elf_initialize_syminfo.

--- libbacktrace/elf.c.jj   2018-02-08 20:46:10.671242369 +
+++ libbacktrace/elf.c  2018-02-14 08:39:06.674088951 +

...

@@ -2857,6 +2889,23 @@ elf_add (struct backtrace_state *state,
  debuglink_crc = *(const uint32_t*)(debuglink_data + crc_offset);
}
}
+
+  /* Read the .opd section on PowerPC64 ELFv1.  */
+  if (ehdr.e_machine == EM_PPC64
+ && (ehdr.e_flags & EF_PPC64_ABI) < 2
+ && shdr->sh_type == SHT_PROGBITS


this broke baremetal arm targets (e.g. aarch64-none-elf with newlib)

...src/gcc/libbacktrace/elf.c: In function 'elf_add':
...src/gcc/libbacktrace/elf.c:2896:24: error: 'SHT_PROGBITS' undeclared (first 
use in this function)

&& shdr->sh_type == SHT_PROGBITS
^~~~



+ && strcmp (name, ".opd") == 0)
+   {
+ if (!backtrace_get_view (state, descriptor, shdr->sh_offset,
+  shdr->sh_size, error_callback, data,
+  _data.view))
+   goto fail;
+
+ opd = _data;
+ opd->addr = shdr->sh_addr;
+ opd->data = (const char *) opd_data.view.data;
+ opd->size = shdr->sh_size;
+   }
  }
  
if (symtab_shndx == 0)


Re: [PATCH] Fix endless match.pd recursion on cst1 + cst2 + cst3 (PR tree-optimization/84334, take 2)

2018-02-15 Thread Richard Biener
On Wed, 14 Feb 2018, Jakub Jelinek wrote:

> On Tue, Feb 13, 2018 at 07:04:09PM +0100, Richard Biener wrote:
> > On February 13, 2018 6:51:29 PM GMT+01:00, Jakub Jelinek  
> > wrote:
> > >On the following testcase, we recurse infinitely, because
> > >we have float re-association enabled, but also rounding-math, so
> > >we try to optimize (cst1 + cst2) + cst3 as (cst2 + cst3) + cst1
> > >but (cst2 + cst3) doesn't simplify and we try again and optimize
> > >it as (cst3 + cst1) + cst2 and then (cst1 + cst2) + cst3 and so on
> > >forever.  If @0 is not a CONSTANT_CLASS_P, there is not a problem,
> > >if it is, the code just checks if we can actually simplify the
> > >operation between cst2 and cst3 into a constant.
> > 
> > Is there a reason to try simplifying at all for constant @0?  I'd rather 
> > not try to avoid all the complex code. 
> 
> So like this?  Bootstrapped/regtested on x86_64-linux and i686-linux, ok for
> trunk?

Yes.

Thanks,
Richard.

> 2018-02-14  Jakub Jelinek  
> 
>   PR tree-optimization/84334
>   * match.pd ((A +- CST1) +- CST2 -> A + CST3): If A is
>   also a CONSTANT_CLASS_P, punt.
> 
>   * gcc.dg/pr84334.c: New test.
> 
> --- gcc/match.pd.jj   2018-02-13 21:22:19.565979401 +0100
> +++ gcc/match.pd  2018-02-14 13:55:06.584668049 +0100
> @@ -1733,9 +1733,12 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>  CONSTANT_CLASS_P@2)
>   /* If one of the types wraps, use that one.  */
>   (if (!ANY_INTEGRAL_TYPE_P (type) || TYPE_OVERFLOW_WRAPS (type))
> -  (if (outer_op == PLUS_EXPR)
> -   (plus (view_convert @0) (inner_op @2 (view_convert @1)))
> -   (minus (view_convert @0) (neg_inner_op @2 (view_convert @1
> +  /* If all 3 captures are CONSTANT_CLASS_P, punt, as we might recurse
> +  forever if something doesn't simplify into a constant.  */
> +  (if (!CONSTANT_CLASS_P (@0))
> +   (if (outer_op == PLUS_EXPR)
> + (plus (view_convert @0) (inner_op @2 (view_convert @1)))
> + (minus (view_convert @0) (neg_inner_op @2 (view_convert @1)
>(if (!ANY_INTEGRAL_TYPE_P (TREE_TYPE (@0))
>  || TYPE_OVERFLOW_WRAPS (TREE_TYPE (@0)))
> (if (outer_op == PLUS_EXPR)
> --- gcc/testsuite/gcc.dg/pr84334.c.jj 2018-02-14 13:53:36.816683512 +0100
> +++ gcc/testsuite/gcc.dg/pr84334.c2018-02-14 13:53:36.815683512 +0100
> @@ -0,0 +1,12 @@
> +/* PR tree-optimization/84334 */
> +/* { dg-do compile } */
> +/* { dg-options "-Ofast -frounding-math" } */
> +
> +float
> +foo (void)
> +{
> +  float a = 9.99974752427078783512115478515625e-7f;
> +  float b = 1.4950485415756702423095703125e-6f;
> +  float c = 4.99873689375817775726318359375e-6f;
> +  return a + b + c;
> +}
> 
> 
>   Jakub
> 
> 

-- 
Richard Biener 
SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 
21284 (AG Nuernberg)


Re: [PATCH] Fix ICE in maybe_diag_stxncpy_trunc (PR tree-optimization/84383)

2018-02-15 Thread Richard Biener
On Thu, 15 Feb 2018, Jakub Jelinek wrote:

> Hi!
> 
> The function calls get_addr_base_and_unit_offset on 2 trees, but
> that can return NULL if the unit offset is not constant.
> The conditional tests just one of them for non-NULL and operand_equal_p
> ICEs if one argument is NULL, so depending on the uninitialized poly_int64
> (get_addr_base_and_unit_offset doesn't touch it if it returns NULL),
> we either ICE in operand_equal_p or are lucky and dstoff is equal to lhsoff
> and just valgrind complains.
> 
> Fixed thusly, bootstrapped/regtested on x86_64-linux and i686-linux, ok for
> trunk?

Ok.

Richard.

> 2018-02-15  Jakub Jelinek  
> 
>   PR tree-optimization/84383
>   * tree-ssa-strlen.c (maybe_diag_stxncpy_trunc): Don't look at
>   dstoff nor call operand_equal_p if dstbase is NULL.
> 
>   * gcc.c-torture/compile/pr84383.c: New test.
> 
> --- gcc/tree-ssa-strlen.c.jj  2018-02-09 06:44:29.993809176 +0100
> +++ gcc/tree-ssa-strlen.c 2018-02-14 16:38:36.981713666 +0100
> @@ -1878,6 +1878,7 @@ maybe_diag_stxncpy_trunc (gimple_stmt_it
>poly_int64 lhsoff;
>tree lhsbase = get_addr_base_and_unit_offset (lhs, );
>if (lhsbase
> +   && dstbase
> && known_eq (dstoff, lhsoff)
> && operand_equal_p (dstbase, lhsbase, 0))
>   return false;
> --- gcc/testsuite/gcc.c-torture/compile/pr84383.c.jj  2018-02-14 
> 17:33:21.972803287 +0100
> +++ gcc/testsuite/gcc.c-torture/compile/pr84383.c 2018-02-14 
> 17:32:37.639803918 +0100
> @@ -0,0 +1,14 @@
> +/* PR tree-optimization/84383 */
> +
> +struct S { char *s; };
> +void bar (struct S *);
> +
> +void
> +foo (int a, char *b)
> +{
> +  struct S c[4];
> +  bar (c);
> +  __builtin_strncpy (c[a].s, b, 32);
> +  c[a].s[31] = '\0';
> +  bar (c);
> +}
> 
>   Jakub
> 
> 

-- 
Richard Biener 
SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 
21284 (AG Nuernberg)


Re: update LTO test harness README

2018-02-15 Thread Richard Biener
On Wed, 14 Feb 2018, Martin Sebor wrote:

> I was excited to find out about the recent enhancement to
> the LTO test harness to support the new dg-lto-warning and
> dg-lto-message directives (thanks, David).
> 
> To make them easier to find and use (there is a C++ LTO test
> that uses them but no C tests yet) the attached patch updates
> the README to document them.  While I was at it I made a few
> minor cosmetic improvements to the README as well.
> 
> Let me know if I didn't get something quite right or if there
> is something else that might be worth mentioning in the README.

Looks good to me.

Btw, do the new dg-lto-warning/message directives handle
fat LTO objects correctly where diagnostics usually appear twice,
once for the fat part generation and once for the ltrans part?

I wonder if we can build upon those new directives to finally
add scan-lto-tree-dump for dg-final as well... (the complication
here is to get at the dump file name but if we know the link
stage is executed we can provide a prefix to the driver IIRC).

Richard.


[PING][PATCH v3] Disable reg offset in quad-word store for Falkor

2018-02-15 Thread Siddhesh Poyarekar
Ping!

On Friday 09 February 2018 01:02 PM, Siddhesh Poyarekar wrote:
> Hi,
> 
> Here's v3 of the patch to disable register offset addressing mode for
> stores of 128-bit values on Falkor because they're very costly.
> Following Kyrill's suggestion, I compared the codegen for a53 and
> found that the codegen was quite different.  Jim's original patch is
> the most minimal compromise for this and is also a cleaner temporary
> fix before I attempt to split address costs into loads and stores for
> gcc9.
> 
> So v3 is essentially a very slightly cleaned up version of v1 again,
> this time with confirmation that there are no codegen changes in
> CPU2017 on non-falkor builds; only the codegen for -mcpu=falkor is
> different.
> 
> 
> 
> On Falkor, because of an idiosyncracy of how the pipelines are
> designed, a quad-word store using a reg+reg addressing mode is almost
> twice as slow as an add followed by a quad-word store with a single
> reg addressing mode.  So we get better performance if we disallow
> addressing modes using register offsets with quad-word stores.  This
> is the most minimal change for gcc8, I will volunteer to make a more
> lasting change for gcc9 where I split the addressing mode costs into
> loads and stores wherever possible and needed.
> 
> This patch improves fpspeed by 0.17% and intspeed by 0.62% in CPU2017,
> with xalancbmk_s (3.84%) wrf_s (1.46%) and mcf_s (1.62%) being the
> biggest winners.  There were no regressions beyond 0.4%.
> 
> 2018-xx-xx  Jim Wilson  
> Kugan Vivenakandarajah  
>   Siddhesh Poyarekar  
> 
>   gcc/
>   * config/aarch64/aarch64-protos.h (aarch64_movti_target_operand_p):
>   New.
>   * config/aarch64/aarch64-simd.md (aarch64_simd_mov): Use Utf.
>   * config/aarch64/aarch64-tuning-flags.def
>   (SLOW_REGOFFSET_QUADWORD_STORE): New.
>   * config/aarch64/aarch64.c (qdf24xx_tunings): Add
>   SLOW_REGOFFSET_QUADWORD_STORE to tuning flags.
>   (aarch64_movti_target_operand_p): New.
>   * config/aarch64/aarch64.md (movti_aarch64): Use Utf.
>   (movtf_aarch64): Likewise.
>   * config/aarch64/constraints.md (Utf): New.
> 
>   gcc/testsuite
>   * gcc.target/aarch64/pr82533.c: New test case.
> ---
>  gcc/config/aarch64/aarch64-protos.h |  1 +
>  gcc/config/aarch64/aarch64-simd.md  |  4 ++--
>  gcc/config/aarch64/aarch64-tuning-flags.def |  4 
>  gcc/config/aarch64/aarch64.c| 14 +-
>  gcc/config/aarch64/aarch64.md   |  8 
>  gcc/config/aarch64/constraints.md   |  6 ++
>  gcc/testsuite/gcc.target/aarch64/pr82533.c  | 11 +++
>  7 files changed, 41 insertions(+), 7 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/pr82533.c
> 
> diff --git a/gcc/config/aarch64/aarch64-protos.h 
> b/gcc/config/aarch64/aarch64-protos.h
> index cda2895d28e..5a0323deb1e 100644
> --- a/gcc/config/aarch64/aarch64-protos.h
> +++ b/gcc/config/aarch64/aarch64-protos.h
> @@ -433,6 +433,7 @@ bool aarch64_simd_mem_operand_p (rtx);
>  bool aarch64_sve_ld1r_operand_p (rtx);
>  bool aarch64_sve_ldr_operand_p (rtx);
>  bool aarch64_sve_struct_memory_operand_p (rtx);
> +bool aarch64_movti_target_operand_p (rtx);
>  rtx aarch64_simd_vect_par_cnst_half (machine_mode, int, bool);
>  rtx aarch64_tls_get_addr (void);
>  tree aarch64_fold_builtin (tree, int, tree *, bool);
> diff --git a/gcc/config/aarch64/aarch64-simd.md 
> b/gcc/config/aarch64/aarch64-simd.md
> index 3d1f6a01cb7..f7daac3e28d 100644
> --- a/gcc/config/aarch64/aarch64-simd.md
> +++ b/gcc/config/aarch64/aarch64-simd.md
> @@ -131,9 +131,9 @@
>  
>  (define_insn "*aarch64_simd_mov"
>[(set (match_operand:VQ 0 "nonimmediate_operand"
> - "=w, Umq,  m,  w, ?r, ?w, ?r, w")
> + "=w, Umq, Utf,  w, ?r, ?w, ?r, w")
>   (match_operand:VQ 1 "general_operand"
> - "m,  Dz, w,  w,  w,  r,  r, Dn"))]
> + "m,  Dz,w,  w,  w,  r,  r, Dn"))]
>"TARGET_SIMD
> && (register_operand (operands[0], mode)
> || aarch64_simd_reg_or_zero (operands[1], mode))"
> diff --git a/gcc/config/aarch64/aarch64-tuning-flags.def 
> b/gcc/config/aarch64/aarch64-tuning-flags.def
> index ea9ead234cb..04baf5b6de6 100644
> --- a/gcc/config/aarch64/aarch64-tuning-flags.def
> +++ b/gcc/config/aarch64/aarch64-tuning-flags.def
> @@ -41,4 +41,8 @@ AARCH64_EXTRA_TUNING_OPTION ("slow_unaligned_ldpw", 
> SLOW_UNALIGNED_LDPW)
> are not considered cheap.  */
>  AARCH64_EXTRA_TUNING_OPTION ("cheap_shift_extend", CHEAP_SHIFT_EXTEND)
>  
> +/* Don't use a register offset in a memory address for a quad-word store.  */
> +AARCH64_EXTRA_TUNING_OPTION ("slow_regoffset_quadword_store",
> +  SLOW_REGOFFSET_QUADWORD_STORE)
> +
>  #undef AARCH64_EXTRA_TUNING_OPTION
> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
> index 

AArch64 patch ping

2018-02-15 Thread Jakub Jelinek
Hi!

I'd like to ping this patch from Steve.

On Fri, Jan 05, 2018 at 02:14:26PM -0800, Steve Ellcey wrote:
> This is a fix for PR target/83335.  We are asserting in
> aarch64_print_address_internal because we have a non Pmode
> address coming from an asm instruction.  My fix is to 
> just allow this by checking this_is_asm_operands.  This is
> what it was doing before the assert was added that caused
> the ICE.
> 
> Verified that it fixed gcc.target/aarch64/asm-2.c in ILP32
> mode and that it caused no regressions.
> 
> Steve Ellcey
> sell...@cavium.com
> 
> 
> 2018-01-05  Steve Ellcey  
> 
>   PR target/83335
>   * config/aarch64/aarch64.c (aarch64_print_address_internal):
>   Allow non Pmode address in asm statements.
> 
> 
> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
> index a189605..af74212 100644
> --- a/gcc/config/aarch64/aarch64.c
> +++ b/gcc/config/aarch64/aarch64.c
> @@ -5670,8 +5670,9 @@ aarch64_print_address_internal (FILE *f, machine_mode 
> mode, rtx x,
>  {
>    struct aarch64_address_info addr;
>  
> -  /* Check all addresses are Pmode - including ILP32.  */
> -  gcc_assert (GET_MODE (x) == Pmode);
> +  /* Check all addresses are Pmode - including ILP32,
> + unless this is coming from an asm statement.  */
> +  gcc_assert (GET_MODE (x) == Pmode || this_is_asm_operands);
>  
>    if (aarch64_classify_address (, x, mode, true, type))
>  switch (addr.type)

Jakub


RE: [PATCH][i386] Adjust vec_construct cost for AVX256/512, penaltize elementwise load vectorization

2018-02-15 Thread Shalnov, Sergey
Richard,
I've benchmarked your patch on Skylake with SPEC CPU 20[06|17][fp|int]rate 
and another smaller benchmark suites. I found that it doesn't regress 
any benchmark off-noise but improves 525.x264 by 1.8%, 526.blender by 1.9% and 
465.tonto by 3.2%.
I think this is a good reason to merge the patch.
Sergey

-Original Message-
From: gcc-patches-ow...@gcc.gnu.org [mailto:gcc-patches-ow...@gcc.gnu.org] On 
Behalf Of Richard Biener
Sent: Wednesday, February 14, 2018 11:27 AM
To: gcc-patches@gcc.gnu.org
Cc: Jan Hubicka ; kirill.yuk...@gmail.com
Subject: [PATCH][i386] Adjust vec_construct cost for AVX256/512, penaltize 
elementwise load vectorization


The following tries to account for the fact that when constructing
AVX256 or AVX512 vectors from elements we can only use insertps to insert into 
the low 128bits of a vector but have to use
vinserti128 or vinserti64x4 to build larger AVX256/512 vectors.
Those operations also have higher latency (Agner documents
3 cycles for Broadwell for reg-reg vinserti128 while insertps has one cycle 
latency).  Agner doesn't have tables for AVX512 yet but I guess the story is 
similar for vinserti64x4.

Latency is similar for FP adds so I re-used ix86_cost->addss for this cost.

This works towards fixing the referenced PRs below where we end up vectorizing 
a lot of loads via elementwise construction, mostly "enabled" by the new 
support for alias versioning for variable strides.  Here, analyzed for PR84037, 
the large number of scalar loads and vector builds before any meaningful 
computation means the CPU is bottlenecked with AGU and load ops and doesn't get 
any meaningful work done thus the vectorization should end up being not 
profitable (with some more massaging in the vectorizer and using SLP which 
reduces the number of loads a lot I only can get into same-speed as not 
vectorized territory).

So the real fix for those issues is to account for those microarchitectural 
issues in the backend costing.  I've decided to plumb this onto the vector 
construction op if that happens to be fed by loads, scaling this cost by the 
number of vector elements (overall latency should grow with the number of 
dependences).

Bootstrap/regtest running on x86_64-unknown-linux-gnu.

I've benchmarked this on Haswell with SPEC CPU 2006 and a three-run reveals 
that it doesn't regress any benchmark off-noise but improves 416.gamess by 7%, 
465.tonto by 6% and 481.wrf by 2%.  It also fixes the Polyhedron capacita 
regression (which is what I "tuned" the factoring with).  I've mentioned the 
bugs refering any of the above affected benchmarks in the ChangeLog but it 
still has to be verified if the bugs are fully fixed (84037 is).

Ok for trunk?

Any confirmation of the microarchitectural bottleneck in, say, Capacita from 
people with access to cycle-accurate simulators are welcome ;)  Performance 
counters only help so much (not much...), so my guesses are based on Agner and 
finger-counting.

Thanks,
Richard.

2018-02-13  Richard Biener  

PR tree-optimization/84037
PR tree-optimization/84016
PR target/82862
* config/i386/i386.c (ix86_builtin_vectorization_cost):
Adjust vec_construct for the fact we need additional higher latency
128bit inserts for AVX256 and AVX512 vector builds.
(ix86_add_stmt_cost): Scale vector construction cost for
elementwise loads.

Index: gcc/config/i386/i386.c
===
--- gcc/config/i386/i386.c  (revision 257620)
+++ gcc/config/i386/i386.c  (working copy)
@@ -45904,7 +45904,18 @@ ix86_builtin_vectorization_cost (enum ve
  ix86_cost->sse_op, true);
 
   case vec_construct:
-   return ix86_vec_cost (mode, ix86_cost->sse_op, false);
+   {
+ /* N element inserts.  */
+ int cost = ix86_vec_cost (mode, ix86_cost->sse_op, false);
+ /* One vinserti128 for combining two SSE vectors for AVX256.  */
+ if (GET_MODE_BITSIZE (mode) == 256)
+   cost += ix86_vec_cost (mode, ix86_cost->addss, true);
+ /* One vinserti64x4 and two vinserti128 for combining SSE
+and AVX256 vectors to AVX512.  */
+ else if (GET_MODE_BITSIZE (mode) == 512)
+   cost += 3 * ix86_vec_cost (mode, ix86_cost->addss, true);
+ return cost;
+   }
 
   default:
 gcc_unreachable ();
@@ -50243,6 +50254,18 @@ ix86_add_stmt_cost (void *data, int coun
  break;
}
 }
+  /* If we do elementwise loads into a vector then we are bound by
+ latency and execution resources for the many scalar loads
+ (AGU and load ports).  Try to account for this by scaling the
+ construction cost by the number of elements involved.  */  if 
+ (kind == vec_construct
+  && stmt_info
+  && stmt_info->type == load_vec_info_type
+  && stmt_info->memory_access_type == VMAT_ELEMENTWISE)
+{