[PATCH PR81408]Turn TREE level unsafe loop optimizations warning to missed optimization message

2017-07-18 Thread Bin Cheng
Hi,
I removed unsafe loop optimization on TREE level last year, so GCC doesn't do 
unsafe
loop optimizations on TREE now.  All "unsafe loop optimizations" warnings 
reported by
TREE optimizers are simply missed optimizations.  This patch turns such warning 
into
missed optimization messages.  I didn't change when this will be dumped, for 
now it is
when called from ivopts.
Bootstrap and test on x86_64 and AArch64.  Is it OK?

Thanks,
bin
2017-07-13  Bin Cheng  

PR target/81408
* tree-ssa-loop-niter.c (number_of_iterations_exit): Dump missed
optimization for loop niter analysis.

gcc/testsuite/ChangeLog
2017-07-13  Bin Cheng  

PR target/81408
* g++.dg/tree-ssa/pr81408.C: New.
* gcc.dg/tree-ssa/pr19210-1.c: Check dump message rather than warning.diff --git a/gcc/testsuite/g++.dg/tree-ssa/pr81408.C 
b/gcc/testsuite/g++.dg/tree-ssa/pr81408.C
new file mode 100644
index 000..354d362
--- /dev/null
+++ b/gcc/testsuite/g++.dg/tree-ssa/pr81408.C
@@ -0,0 +1,93 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -std=gnu++11 -fdump-tree-ivopts-missed 
-Wunsafe-loop-optimizations" } */
+
+namespace a {
+void b () __attribute__ ((__noreturn__));
+template  struct d;
+template  struct d
+{
+  typedef e f;
+};
+struct g
+{
+  template  using i = h *;
+};
+}
+using a::d;
+template  class k
+{
+  j l;
+
+public:
+  typename d::f operator* () {}
+  void operator++ () { ++l; }
+  j
+  aa ()
+  {
+return l;
+  }
+};
+template 
+bool
+operator!= (k o, k p2)
+{
+  return o.aa () != p2.aa ();
+}
+struct p;
+namespace a {
+struct F
+{
+  struct q
+  {
+using ai = g::i;
+  };
+  using r = q::ai;
+};
+class H
+{
+public:
+  k begin ();
+  k end ();
+};
+int s;
+class I
+{
+public:
+  void
+  aq (char)
+  {
+if (s)
+  b ();
+  }
+};
+class u : public I
+{
+public:
+  void
+  operator<< (u o (u))
+  {
+o (*this);
+  }
+  u operator<< (void *);
+};
+template 
+at
+av (au o)
+{
+  o.aq ('\n');
+}
+u ax;
+}
+struct p
+{
+  char *ay;
+};
+a::H t;
+void
+ShowHelpListCommands ()
+{
+  for (auto c : t)
+a::ax << c.ay << a::av;
+}
+
+/* { dg-final { scan-tree-dump "note: missed loop optimization: niters 
analysis ends up with assumptions." "ivopts" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr19210-1.c 
b/gcc/testsuite/gcc.dg/tree-ssa/pr19210-1.c
index 3c8ee06..3c18470 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/pr19210-1.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr19210-1.c
@@ -1,15 +1,15 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -Wunsafe-loop-optimizations" } */
+/* { dg-options "-O2 -fdump-tree-ivopts-details -Wunsafe-loop-optimizations" } 
*/
 extern void g(void);
 
 void
 f (unsigned n)
 {
   unsigned k;
-  for(k = 0;k <= n;k++) /* { dg-warning "missed loop optimization.*overflow" } 
*/
+  for(k = 0;k <= n;k++) /* missed optimization for this loop.  */
 g();
 
-  for(k = 0;k <= n;k += 4) /* { dg-warning "missed loop 
optimization.*overflow" } */
+  for(k = 0;k <= n;k += 4) /* missed optimization for this loop.  */
 g();
 
   /* We used to get warning for this loop.  However, since then # of iterations
@@ -21,9 +21,14 @@ f (unsigned n)
 g();
 
   /* So we need the following loop, instead.  */
-  for(k = 4;k <= n;k += 5) /* { dg-warning "missed loop 
optimization.*overflow" } */
+  for(k = 4;k <= n;k += 5) /* missed optimization for this loop.  */
 g();
   
-  for(k = 15;k >= n;k--) /* { dg-warning "missed loop optimization.*overflow" 
} */
+  for(k = 15;k >= n;k--) /* missed optimization for this loop.  */
 g();
 }
+
+/* { dg-final { scan-tree-dump "pr19210-1.c:9:.*: missed loop optimization: 
niters analysis ends up with assumptions." "ivopts" } } */
+/* { dg-final { scan-tree-dump "pr19210-1.c:12:.*: missed loop optimization: 
niters analysis ends up with assumptions." "ivopts" } } */
+/* { dg-final { scan-tree-dump "pr19210-1.c:24:.*: missed loop optimization: 
niters analysis ends up with assumptions." "ivopts" } } */
+/* { dg-final { scan-tree-dump "pr19210-1.c:27:.*: missed loop optimization: 
niters analysis ends up with assumptions." "ivopts" } } */
diff --git a/gcc/tree-ssa-loop-niter.c b/gcc/tree-ssa-loop-niter.c
index 5a7cab5..1421002 100644
--- a/gcc/tree-ssa-loop-niter.c
+++ b/gcc/tree-ssa-loop-niter.c
@@ -2378,9 +2378,9 @@ number_of_iterations_exit (struct loop *loop, edge exit,
 return true;
 
   if (warn)
-warning_at (gimple_location_safe (stmt),
-   OPT_Wunsafe_loop_optimizations,
-   "missed loop optimization, the loop counter may overflow");
+dump_printf_loc (MSG_MISSED_OPTIMIZATION, gimple_location_safe (stmt),
+"missed loop optimization: niters analysis ends up "
+"with assumptions.\n");
 
   return false;
 }


Re: [PATCH] match.pd: reassociate multiplications with constants

2017-07-18 Thread Richard Biener
On Fri, Jul 14, 2017 at 7:59 AM, Marc Glisse  wrote:
> On Thu, 13 Jul 2017, Alexander Monakov wrote:
>
>> On Thu, 13 Jul 2017, Marc Glisse wrote:
>>>
>>> I notice that we do not turn (X*10)*10 into X*100 in GIMPLE.
>>
>>
>> Sorry, could you clarify what you mean here?  I think we certainly do
>> that,
>> just not via match.pd, but in 'associate:' case of fold_binary_loc.
>
>
> fold_binary_loc is for GENERIC, so mostly for front-end time optimization of
> expressions.
>
> int f(int a){
>   int b=a*10;
>   return b*10;
> }
>
> $ gcc-snapshot -O3 -S -fdump-tree-optimized a.c
> $ cat a.c.228t.optimized
> [...]
>   b_2 = a_1(D) * 10;
>   _3 = b_2 * 10;
>   return _3;

Yeah, but I think we best address this by adding support to re-associate
expressions with !TYPE_OVERFLOW_WRAPS to the reassoc pass.
It's not going to be an easy task if you want to avoid re-writing everything
to unsigned arithmetic.

Simple cases might be worth doing with patterns (like the case above).

Richard.

>>> Relying on inner expressions being folded can be slightly dangerous,
>>> especially for generic IIRC. It seems easy enough to check that @1 is
>>> neither
>>> 0 nor -1 for safety.
>>
>>
>> I wanted to add a gcc_checking_assert to that effect, but it's not used in
>> match.pd anywhere.  Is there a nice way to do that?
>
>
> You can use (with { arbitrary C++ code } ... ) to add an assertion (you can
> use @0 in the block of C++ code).
>
> I was more thinking of an "if" than an assertion though.
>
> --
> Marc Glisse


Re: [GCC][PATCH][mid-end] Optimize x * copysign (1.0, y) [Patch (1/2)]

2017-07-18 Thread Richard Biener
On Mon, 10 Jul 2017, Tamar Christina wrote:

> Hi All,
> 
> I've re-spun the patch with the changes requested.
> 
> 
> This is only done when not honoring signaling NaNs.
> This transormation is done at expand time by using
> a new optab "xorsign". If the optab is not available
> then copysign is expanded as normal.
> 
> Bootstrapped on both aarch64-none-linux-gnu and x86_64 with no issues.
> Regression done on aarch64-none-linux-gnu and no regressions.
> 
> Ok for trunk?

+static rtx
+maybe_expand_mult_copysign (tree treeop0, tree treeop1, rtx target)
+{
+  tree type = TREE_TYPE (treeop0);
+  rtx op0, op1;
+
+  if (HONOR_SNANS (type))
+return NULL_RTX;
+
+  if (TREE_CODE (treeop0) == SSA_NAME && TREE_CODE (treeop1) == SSA_NAME)
+{
+  gimple *call0 = SSA_NAME_DEF_STMT (treeop0);

you can't lookup arbitrary def stmts during RTL expansion but you
have to go through get_gimple_for_ssa_name which may return NULL
if SSA name coalescing makes doing so unsafe.

Why's this now done during RTL expansion rather than during late
GIMPLE, using match.pd and an internal function for xorsign?

Thanks,
Richard.

> 
> gcc/
> 2017-07-10  Tamar Christina  
>   Andrew Pinski 
> 
>   PR middle-end/19706
>   * expr.c (is_copysign_call_with_1): New.
>   (maybe_expand_mult_copysign): Likewise.
>   (expand_expr_real_2): Expand copysign.
>   * optabs.def (xorsign_optab): New.
> 
> 
> From: Andrew Pinski 
> Sent: Monday, July 10, 2017 12:21:29 AM
> To: Tamar Christina
> Cc: GCC Patches; nd; l...@redhat.com; i...@airs.com; rguent...@suse.de
> Subject: Re: [GCC][PATCH][mid-end] Optimize x * copysign (1.0, y) [Patch 
> (1/2)]
> 
> On Mon, Jun 12, 2017 at 12:56 AM, Tamar Christina
>  wrote:
> > Hi All,
> >
> > this patch implements a optimization rewriting
> >
> > x * copysign (1.0, y) and
> > x * copysign (-1.0, y)
> >
> > to:
> >
> > x ^ (y & (1 << sign_bit_position))
> >
> > This is done by creating a special builtin during matching and generate the
> > appropriate instructions during expand. This new builtin is called XORSIGN.
> >
> > The expansion of xorsign depends on if the backend has an appropriate optab
> > available. If this is not the case then we use a modified version of the 
> > existing
> > copysign which does not take the abs value of the first argument as a fall 
> > back.
> >
> > This patch is a revival of a previous patch
> > https://gcc.gnu.org/ml/gcc-patches/2015-10/msg00069.html
> >
> > Bootstrapped on both aarch64-none-linux-gnu and x86_64 with no issues.
> > Regression done on aarch64-none-linux-gnu and no regressions.
> 
> 
> Note this is also PR 19706.
> 
> Thanks,
> Andrew
> 
> >
> > Ok for trunk?
> >
> > gcc/
> > 2017-06-07  Tamar Christina  
> >
> > * builtins.def (BUILT_IN_XORSIGN, BUILT_IN_XORSIGNF): New.
> > (BUILT_IN_XORSIGNL, BUILT_IN_XORSIGN_FLOAT_NX): Likewise.
> > * match.pd (mult (COPYSIGN:s real_onep @0) @1): New simplifier.
> > (mult (COPYSIGN:s real_mus_onep @0) @1): Likewise.
> > (copysigns @0 (negate @1)): Likewise.
> > * builtins.c (expand_builtin_copysign): Promoted local to argument.
> > (expand_builtin): Added CASE_FLT_FN_FLOATN_NX (BUILT_IN_XORSIGN) and
> > CASE_FLT_FN (BUILT_IN_XORSIGN).
> > (BUILT_IN_COPYSIGN): Updated function call.
> > * optabs.h (expand_copysign): New bool.
> > (expand_xorsign): New.
> > * optabs.def (xorsign_optab): New.
> > * optabs.c (expand_copysign): New parameter.
> > * fortran/f95-lang.c (xorsignl, xorsign, xorsignf): New.
> > * fortran/mathbuiltins.def (XORSIGN): New.
> >
> > gcc/testsuite/
> > 2017-06-07  Tamar Christina  
> >
> > * gcc.dg/tree-ssa/xorsign.c: New.
> > * gcc.dg/xorsign_exec.c: New.
> > * gcc.dg/vec-xorsign_exec.c: New.
> > * gcc.dg/tree-ssa/reassoc-39.c (f2, f3): Updated constant to 2.
> 

-- 
Richard Biener 
SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 
21284 (AG Nuernberg)


[PATCH] Fix PR81418

2017-07-18 Thread Richard Biener

The following fixes a missed check in vectorizable-reduction.  We cannot
handle the case where we have a lane-reducing reduction operation like
DOT_PROD_EXPR with not using a single def-use cycle because we need
individual reduction vector elements in other vector stmts.

Bootstrapped and tested on x86_64-unknown-linux-gnu, applied to trunk.

Richard.

2017-06-18  Richard Biener  

PR tree-optimization/81418
* tree-vect-loop.c (vectorizable_reduction): Properly compute
vectype_in.  Verify that with lane-reducing reduction operations
we have a single def-use cycle.

* gcc.dg/torture/pr81418.c: New testcase.

Index: gcc/tree-vect-loop.c
===
--- gcc/tree-vect-loop.c(revision 250270)
+++ gcc/tree-vect-loop.c(working copy)
@@ -5642,7 +5642,10 @@ vectorizable_reduction (gimple *stmt, gi
  if (k == 1
  && gimple_assign_rhs_code (reduc_stmt) == COND_EXPR)
continue;
- vectype_in = get_vectype_for_scalar_type (TREE_TYPE (op));
+ tem = get_vectype_for_scalar_type (TREE_TYPE (op));
+ if (! vectype_in
+ || TYPE_VECTOR_SUBPARTS (tem) < TYPE_VECTOR_SUBPARTS (vectype_in))
+   vectype_in = tem;
  break;
}
   gcc_assert (vectype_in);
@@ -6213,26 +6216,6 @@ vectorizable_reduction (gimple *stmt, gi
}
 }
 
-  if (!vec_stmt) /* transformation not required.  */
-{
-  if (first_p)
-   vect_model_reduction_cost (stmt_info, epilog_reduc_code, ncopies);
-  STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
-  return true;
-}
-
-  /* Transform.  */
-
-  if (dump_enabled_p ())
-dump_printf_loc (MSG_NOTE, vect_location, "transform reduction.\n");
-
-  /* FORNOW: Multiple types are not supported for condition.  */
-  if (code == COND_EXPR)
-gcc_assert (ncopies == 1);
-
-  /* Create the destination vector  */
-  vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
-
   /* In case the vectorization factor (VF) is bigger than the number
  of elements that we can fit in a vectype (nunits), we have to generate
  more than one vector stmt - i.e - we need to "unroll" the
@@ -6276,6 +6259,41 @@ vectorizable_reduction (gimple *stmt, gi
   else
 epilog_copies = ncopies;
 
+  /* If the reduction stmt is one of the patterns that have lane
+ reduction embedded we cannot handle the case of ! single_defuse_cycle.  */
+  if ((ncopies > 1
+   && ! single_defuse_cycle)
+  && (code == DOT_PROD_EXPR
+ || code == WIDEN_SUM_EXPR
+ || code == SAD_EXPR))
+{
+  if (dump_enabled_p ())
+   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+"multi def-use cycle not possible for lane-reducing "
+"reduction operation\n");
+  return false;
+}
+
+  if (!vec_stmt) /* transformation not required.  */
+{
+  if (first_p)
+   vect_model_reduction_cost (stmt_info, epilog_reduc_code, ncopies);
+  STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
+  return true;
+}
+
+  /* Transform.  */
+
+  if (dump_enabled_p ())
+dump_printf_loc (MSG_NOTE, vect_location, "transform reduction.\n");
+
+  /* FORNOW: Multiple types are not supported for condition.  */
+  if (code == COND_EXPR)
+gcc_assert (ncopies == 1);
+
+  /* Create the destination vector  */
+  vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
+
   prev_stmt_info = NULL;
   prev_phi_info = NULL;
   if (slp_node)
Index: gcc/testsuite/gcc.dg/torture/pr81418.c
===
--- gcc/testsuite/gcc.dg/torture/pr81418.c  (nonexistent)
+++ gcc/testsuite/gcc.dg/torture/pr81418.c  (working copy)
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-ftree-loop-optimize" } */
+
+int
+ol (int ku)
+{
+  int zq = 0;
+
+  while (ku < 1)
+{
+  int y6;
+
+  for (y6 = 0; y6 < 3; ++y6)
+   zq += (char)ku;
+  ++ku;
+}
+
+  return zq;
+}


Re: PATCH v2][Aarch64] Add vectorized mersenne twister

2017-07-18 Thread Ulrich Drepper
On Tue, Jul 18, 2017 at 7:57 AM, Michael Collison
 wrote:
> This is the second version of a patch for Aarc64 to add a vectorized mersenne 
> twister to libstdc++. The first version used intrinsics and included 
> "arm_neon.h". After feedback from the community this version uses only GCC 
> vector extensions and Aarch64 simd data types.

Looks OK.  Just stylistically, why do you have

+#ifdef __ARM_NEON
+#ifdef __aarch64__

(in more than one place) instead of one preprocessor line?


Fix Eh delivery in partitioned functions

2017-07-18 Thread Jan Hubicka
Hi,
this patch fixes wrong code issue with BB partitioning where sometimes EH
is not delivered.  This is very old issue that affect all release branches
with -fprofile-use, but recently surfaced as libstdc++ testsuite regression
because we now partition functions based on static profile prediction.

The problem is that EH tables are stored as offsets from start of functions.
In the record however value 0 is special and means that there is no landing
pad for a given region.  Normally this is safe because landing pads never
appear as very first label in function.  This is however no longer true with
partitining where cold partition is actually quite likely to start by landing
pad.

The change in except.c adds sanity check that no EH landing pads are very first
in the insn stream.  The change in bb-reorder makes reorder to chose
non-landing-pad BB as first trace for the cold partition. Such BB always exists
because landing pads must be in same partition as the instruction throwing them
and we never make BB both landing pad and reachable by normal control folow.
However I am not thrilled by the fix as it is bit fragile in case some
optimization happends after bb partitioning and code is moved away.  Also the
logic can be confused by asm statement which may result in no code (again
however the BB reachable from outside world should contain something that
produce EH that is a real instruction).

Ideas for better fix would be welcome then.  If the assert I added triggers
for valid reasons, we may just end up adding a NOP in the rare case we do
not suceed arranging cold partition to not start with landing pad.

Bootstrapped/regtested x86_64-linux, looks sane?

Honza

PR middle-end/81331 
* except.c (first_in_partition): New function.
(dw2_output_call_site_table): Sanity check that landing pads are not
very first in the partition.
* bb-reorder.c (ok_to_be_first): New function.
(connect_traces): Avoid traces that are !ok_to_be_first to start
partitions.
Index: except.c
===
--- except.c(revision 250226)
+++ except.c(working copy)
@@ -2724,6 +2724,23 @@ sjlj_size_of_call_site_table (void)
   return size;
 }
 
+/* Return true if L will appear as very first in its partition.  */
+
+bool
+first_in_partition (rtx_insn *l)
+{
+  while (l != NULL_RTX)
+{
+  if (active_insn_p (l))
+   return false;
+  else if (GET_CODE (l) == NOTE
+  && NOTE_KIND (l) == NOTE_INSN_SWITCH_TEXT_SECTIONS)
+   return true;
+  l = PREV_INSN (l);
+}
+  return true;
+}
+
 static void
 dw2_output_call_site_table (int cs_format, int section)
 {
@@ -2749,8 +2766,14 @@ dw2_output_call_site_table (int cs_forma
   ASM_GENERATE_INTERNAL_LABEL (reg_end_lab, "LEHE", call_site_base + i);
 
   if (cs->landing_pad)
-   ASM_GENERATE_INTERNAL_LABEL (landing_pad_lab, "L",
-CODE_LABEL_NUMBER (cs->landing_pad));
+   {
+ ASM_GENERATE_INTERNAL_LABEL (landing_pad_lab, "L",
+  CODE_LABEL_NUMBER (cs->landing_pad));
+ /* Be sure that the offset will not be 0 as that would make EH
+delivery code to think that there is no landing pad.  */
+ gcc_checking_assert (!first_in_partition
+  (as_a  (cs->landing_pad)));
+   }
 
   /* ??? Perhaps use insn length scaling if the assembler supports
 generic arithmetic.  */
Index: bb-reorder.c
===
--- bb-reorder.c(revision 250226)
+++ bb-reorder.c(working copy)
@@ -1066,6 +1066,21 @@ connect_better_edge_p (const_edge e, boo
   return is_better_edge;
 }
 
+/* If we place EH landing pad as very first BB in the partition, its offset
+   from start of function is 0 which is special cased by the eh table to mean
+   no landing pad.  For this reason such BBs can not appear as very first in
+   the partition.  */
+static bool
+ok_to_be_first (struct trace *t)
+{
+  edge e;
+  edge_iterator ei;
+  FOR_EACH_EDGE (e, ei, t->first->preds)
+if (e->flags & EDGE_EH)
+  return false;
+  return true;
+}
+
 /* Connect traces in array TRACES, N_TRACES is the count of traces.  */
 
 static void
@@ -1080,6 +1095,7 @@ connect_traces (int n_traces, struct tra
   int freq_threshold;
   gcov_type count_threshold;
   bool for_size = optimize_function_for_size_p (cfun);
+  bool first_in_partition;
 
   freq_threshold = max_entry_frequency * DUPLICATION_THRESHOLD / 1000;
   if (max_entry_count.to_gcov_type () < INT_MAX / 1000)
@@ -1092,6 +1108,7 @@ connect_traces (int n_traces, struct tra
   current_pass = 1;
   current_partition = BB_PARTITION (traces[0].first);
   two_passes = false;
+  first_in_partition = true;
 
   if (crtl->has_bb_partition)
 for (i = 0; i < n_traces && !two_passes; i++)
@@ -1116,6 +1133,7 @@ connect_traces (int 

RE: PATCH v2][Aarch64] Add vectorized mersenne twister

2017-07-18 Thread Michael Collison
No particular reason; I can merge it into one preprocessor line.

-Original Message-
From: drep...@gmail.com [mailto:drep...@gmail.com] On Behalf Of Ulrich Drepper
Sent: Tuesday, July 18, 2017 12:24 AM
To: Michael Collison 
Cc: GCC Patches ; nd 
Subject: Re: PATCH v2][Aarch64] Add vectorized mersenne twister

On Tue, Jul 18, 2017 at 7:57 AM, Michael Collison  
wrote:
> This is the second version of a patch for Aarc64 to add a vectorized mersenne 
> twister to libstdc++. The first version used intrinsics and included 
> "arm_neon.h". After feedback from the community this version uses only GCC 
> vector extensions and Aarch64 simd data types.

Looks OK.  Just stylistically, why do you have

+#ifdef __ARM_NEON
+#ifdef __aarch64__

(in more than one place) instead of one preprocessor line?


Re: [PATCH][PR 59521] Respect probabilities when expanding switch statement

2017-07-18 Thread Jan Hubicka
> Hi all,
> 
> Currently all cases in switch statement are treated as having equal
> probabilities which causes suboptimal code as demonstrated in
> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=59521 . This patch
> modifies expander to select pivot point for decision tree so that
> probabilities of cases on the left are roughly equal to probabilities
> on the right.
> 
> Patch survives bootstrap and regtesting on x64 but has some issues:
> * tests are fragile but I'm not sure how to make them better
> * I haven't done any performance measurements - would these be needed?
> I don't have access to SPEC these days, any other suggestions?

I think we could just check if daily testers shows some regressions after
patch is comitted. It seems right think to do.
> 
> Patch is jointly authored with Martin.

2017-06-29  Yury Gribov  
Martin Liska  

PR middle-end/59521
gcc/
* predict.c (set_even_probabilities): Handle case of a single
likely edge.

I have made some changes to this fuction to fix other PR. So you may need
to update the patch.  What exactly is set_even_probabilities and 
combine_predictions_for_bb shooting for?

@@ -2451,7 +2484,30 @@ tree_predict_by_opcode (basic_block bb)
   edge_iterator ei;
   enum br_predictor predictor;
 
-  if (!stmt || gimple_code (stmt) != GIMPLE_COND)
+  if (!stmt)
+return;
+
+  if (gswitch *sw = dyn_cast  (stmt))
+{
+  tree index = gimple_switch_index (sw);
+  tree val = expr_expected_value (index, auto_bitmap (),
+ );
+  if (val && TREE_CODE (val) == INTEGER_CST)
+   {
+ edge e = find_taken_edge_switch_expr (sw, bb, val);
+ if (predictor == PRED_BUILTIN_EXPECT)
+   {
+ int percent = PARAM_VALUE (BUILTIN_EXPECT_PROBABILITY);
+ gcc_assert (percent >= 0 && percent <= 100);
+ predict_edge (e, PRED_BUILTIN_EXPECT,
+   HITRATE (percent));
+   }
+ else
+   predict_edge_def (e, predictor, TAKEN);
+   }
+}
+
+  if (gimple_code (stmt) != GIMPLE_COND)

I think this change can go in separately and is OK
(along with a testcase that checks that tree profile is right).

I will look into the RTL bits next.

Honza


Re: Fix Eh delivery in partitioned functions

2017-07-18 Thread Richard Biener
On Tue, 18 Jul 2017, Jan Hubicka wrote:

> > On Tue, 18 Jul 2017, Jan Hubicka wrote:
> > 
> > > Hi,
> > > this patch fixes wrong code issue with BB partitioning where sometimes EH
> > > is not delivered.  This is very old issue that affect all release branches
> > > with -fprofile-use, but recently surfaced as libstdc++ testsuite 
> > > regression
> > > because we now partition functions based on static profile prediction.
> > > 
> > > The problem is that EH tables are stored as offsets from start of 
> > > functions.
> > > In the record however value 0 is special and means that there is no 
> > > landing
> > > pad for a given region.  Normally this is safe because landing pads never
> > > appear as very first label in function.  This is however no longer true 
> > > with
> > > partitining where cold partition is actually quite likely to start by 
> > > landing
> > > pad.
> > > 
> > > The change in except.c adds sanity check that no EH landing pads are very 
> > > first
> > > in the insn stream.  The change in bb-reorder makes reorder to chose
> > > non-landing-pad BB as first trace for the cold partition. Such BB always 
> > > exists
> > > because landing pads must be in same partition as the instruction 
> > > throwing them
> > > and we never make BB both landing pad and reachable by normal control 
> > > folow.
> > > However I am not thrilled by the fix as it is bit fragile in case some
> > > optimization happends after bb partitioning and code is moved away.  Also 
> > > the
> > > logic can be confused by asm statement which may result in no code (again
> > > however the BB reachable from outside world should contain something that
> > > produce EH that is a real instruction).
> > > 
> > > Ideas for better fix would be welcome then.  If the assert I added 
> > > triggers
> > > for valid reasons, we may just end up adding a NOP in the rare case we do
> > > not suceed arranging cold partition to not start with landing pad.
> > 
> > Yeah, I'd rather pad the function start with a nop if it starts with a
> > landing pad.  How difficult would it be to arrange for this?  I suppose
> > we'd need to touch each and every target to accomplish this?  Or end up
> > using gen_nop in generic code?
> 
> I think we could just output from generic code - I think it can be done by
> final_scan_insn. I don't know however if we have a way to tell if the section
> starts with a landing pad?

Not sure either -- some insn note / bb note?  Some flag on the label?
At least the latter should be easy to add if it's not there already.

Richard.

> Honza
> > 
> > Richard.
> > 
> > > Bootstrapped/regtested x86_64-linux, looks sane?
> > > 
> > > Honza
> > > 
> > >   PR middle-end/81331 
> > >   * except.c (first_in_partition): New function.
> > >   (dw2_output_call_site_table): Sanity check that landing pads are not
> > >   very first in the partition.
> > >   * bb-reorder.c (ok_to_be_first): New function.
> > >   (connect_traces): Avoid traces that are !ok_to_be_first to start
> > >   partitions.
> > > Index: except.c
> > > ===
> > > --- except.c  (revision 250226)
> > > +++ except.c  (working copy)
> > > @@ -2724,6 +2724,23 @@ sjlj_size_of_call_site_table (void)
> > >return size;
> > >  }
> > >  
> > > +/* Return true if L will appear as very first in its partition.  */
> > > +
> > > +bool
> > > +first_in_partition (rtx_insn *l)
> > > +{
> > > +  while (l != NULL_RTX)
> > > +{
> > > +  if (active_insn_p (l))
> > > + return false;
> > > +  else if (GET_CODE (l) == NOTE
> > > +&& NOTE_KIND (l) == NOTE_INSN_SWITCH_TEXT_SECTIONS)
> > > + return true;
> > > +  l = PREV_INSN (l);
> > > +}
> > > +  return true;
> > > +}
> > > +
> > >  static void
> > >  dw2_output_call_site_table (int cs_format, int section)
> > >  {
> > > @@ -2749,8 +2766,14 @@ dw2_output_call_site_table (int cs_forma
> > >ASM_GENERATE_INTERNAL_LABEL (reg_end_lab, "LEHE", call_site_base + 
> > > i);
> > >  
> > >if (cs->landing_pad)
> > > - ASM_GENERATE_INTERNAL_LABEL (landing_pad_lab, "L",
> > > -  CODE_LABEL_NUMBER (cs->landing_pad));
> > > + {
> > > +   ASM_GENERATE_INTERNAL_LABEL (landing_pad_lab, "L",
> > > +CODE_LABEL_NUMBER (cs->landing_pad));
> > > +   /* Be sure that the offset will not be 0 as that would make EH
> > > +  delivery code to think that there is no landing pad.  */
> > > +   gcc_checking_assert (!first_in_partition
> > > +(as_a  (cs->landing_pad)));
> > > + }
> > >  
> > >/* ??? Perhaps use insn length scaling if the assembler supports
> > >generic arithmetic.  */
> > > Index: bb-reorder.c
> > > ===
> > > --- bb-reorder.c  (revision 250226)
> > > +++ bb-reorder.c  (working copy)
> > > @@ -1066,6 +1066,21 @@ connect_better_edge_p (const_edge e, boo
> > >return 

Re: [PATCHv2][PING^2][PR 56727] Bypass PLT for recursive calls

2017-07-18 Thread Yuri Gribov
On Mon, Jul 17, 2017 at 10:27 AM, Jan Hubicka  wrote:
>> Hi all,
>>
>> This is a new version of previous patch
>> (https://gcc.gnu.org/ml/gcc-patches/2017-07/msg00020.html), fixed
>> after Rainer's remarks.
> Hi,
> the patch looks OK, but I wonder why you included can_be_discarded check?
> If function is in comdat I believe the optimization still can happen.
> Perhaps you only want to check DECL_EXTERNAL?

TBH I was inspired by can_replace_by_local_alias which prohibits local
alias for discardable functions.  But I agree that situation here is
different and it's indeed not needed (if function is discarded, it
does not matter whether we optimized recursive calls).

Could you elaborate why we still need DECL_EXTERNAL though?

-Y


Re: [PATCH] Fix PR81362: Vector peeling

2017-07-18 Thread Andreas Krebbel
On 07/12/2017 05:11 PM, Robin Dapp wrote:
> gcc/ChangeLog:
>
> 2017-07-12  Robin Dapp  
>
> * (vect_enhance_data_refs_alignment):
>   Remove body_cost_vec from _vect_peel_extended_info.
>   tree-vect-data-refs.c (vect_peeling_hash_get_lowest_cost):
>   Do not set body_cost_vec.
> (vect_peeling_hash_choose_best_peeling): Remove body_cost_vec
>   and npeel.

Applied with a fixed changelog entry. Thanks!

-Andreas-



[PATCH] Fix PR81403

2017-07-18 Thread Richard Biener

The following fixes VN simplification during PHI translation where we
can assign value-numbers to new expressions in different context and thus
have to reset flow-sensitive SSA info like we do during VN.

Bootstrapped and tested on x86_64-unknown-linux-gnu, applied to trunk.

Richard.

2017-07-18  Richard Biener  

PR tree-optimization/80620
PR tree-optimization/81403
* tree-ssa-pre.c (phi_translate_1): Clear range and points-to
info when re-using a VN table entry.

* gcc.dg/torture/pr80620.c: New testcase.
* gcc.dg/torture/pr81403.c: Likewise.

Index: gcc/tree-ssa-pre.c
===
--- gcc/tree-ssa-pre.c  (revision 250270)
+++ gcc/tree-ssa-pre.c  (working copy)
@@ -1489,6 +1489,45 @@ phi_translate_1 (pre_expr expr, bitmap_s
PRE_EXPR_NARY (expr) = nary;
new_val_id = nary->value_id;
get_or_alloc_expression_id (expr);
+   /* When we end up re-using a value number make sure that
+  doesn't have unrelated (which we can't check here)
+  range or points-to info on it.  */
+   if (result
+   && INTEGRAL_TYPE_P (TREE_TYPE (result))
+   && SSA_NAME_RANGE_INFO (result)
+   && ! SSA_NAME_IS_DEFAULT_DEF (result))
+ {
+   if (! VN_INFO (result)->info.range_info)
+ {
+   VN_INFO (result)->info.range_info
+ = SSA_NAME_RANGE_INFO (result);
+   VN_INFO (result)->range_info_anti_range_p
+ = SSA_NAME_ANTI_RANGE_P (result);
+ }
+   if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+   fprintf (dump_file, "clearing range info of ");
+   print_generic_expr (dump_file, result);
+   fprintf (dump_file, "\n");
+ }
+   SSA_NAME_RANGE_INFO (result) = NULL;
+ }
+   else if (result
+&& POINTER_TYPE_P (TREE_TYPE (result))
+&& SSA_NAME_PTR_INFO (result)
+&& ! SSA_NAME_IS_DEFAULT_DEF (result))
+ {
+   if (! VN_INFO (result)->info.ptr_info)
+ VN_INFO (result)->info.ptr_info
+   = SSA_NAME_PTR_INFO (result);
+   if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+   fprintf (dump_file, "clearing points-to info of ");
+   print_generic_expr (dump_file, result);
+   fprintf (dump_file, "\n");
+ }
+   SSA_NAME_PTR_INFO (result) = NULL;
+ }
  }
else
  {
Index: gcc/testsuite/gcc.dg/torture/pr80620.c
===
--- gcc/testsuite/gcc.dg/torture/pr80620.c  (nonexistent)
+++ gcc/testsuite/gcc.dg/torture/pr80620.c  (working copy)
@@ -0,0 +1,33 @@
+/* { dg-do run } */
+/* { dg-require-effective-target int32plus } */
+
+long long int a = -465274079317386463LL;
+int b = 856872806;
+int c = -1940894202;
+int d = 1718449211;
+int e = -392681565;
+unsigned long long int f = 13521452247506316486ULL;
+int g = -13194608;
+
+__attribute__((noinline, noclone))
+void foo ()
+{
+  if (!a - a)
+c = b = 0;
+  else
+d = 3UL * a == 0;
+  if (g / a)
+e = 0 < -a + 500849970701012771LL + (unsigned long) -a;
+  else
+f = 4081116982543369LL & a;
+}
+
+int
+main ()
+{
+  asm volatile ("" : : : "memory");
+  foo ();
+  if (f != 2818598057803777LL)
+__builtin_abort ();
+  return 0;
+}
Index: gcc/testsuite/gcc.dg/torture/pr81403.c
===
--- gcc/testsuite/gcc.dg/torture/pr81403.c  (nonexistent)
+++ gcc/testsuite/gcc.dg/torture/pr81403.c  (working copy)
@@ -0,0 +1,32 @@
+/* { dg-do run } */
+/* { dg-require-effective-target int32plus } */
+
+short var_9 = 19581;
+unsigned char var_33 = 21;
+long long int var_55 = 286697804684061197LL;
+long long int var_59 = -1962393262513510540LL;
+long long int var_71 = 4731868609112929952LL;
+long long int var_773 = -478463345624769LL;
+short var_776 = 5894;
+long long int var_1321 = 7573221950916697355LL;
+unsigned char uc = 217;
+
+void foo()
+{
+  if (var_55)
+var_71 = 0;
+  if (var_9 != ~(0 < uc))
+var_773 = 0;
+  else
+var_776 = 1 / ~var_9 * -1;
+  if (var_33)
+var_59 = ~var_9 & 10393;
+  var_1321 = ~var_9;
+}
+int main()
+{
+  foo();
+  if (var_59 != 8320)
+__builtin_abort ();
+  return 0;
+}


Fix ICE in estimate_bb_frequencies

2017-07-18 Thread Jan Hubicka
Hi,
this patch fixes ICE in estimate_bb_frequencies which triggers because we forget
to compute probability for blocks whose count is earlier statically determined 
to be
0.

Bootstrapped/regtested x86_64-linux, will commit it shortly.

Honza

PR middle-end/81462
* predict.c (set_even_probabilities): Cleanup; do not affect
probabilities that are already known.
(combine_predictions_for_bb): Call even when count is set.

* g++.dg/torture/pr81462.C: New testcase.

Index: predict.c
===
--- predict.c   (revision 250295)
+++ predict.c   (working copy)
@@ -817,16 +817,25 @@ static void
 set_even_probabilities (basic_block bb,
hash_set *unlikely_edges = NULL)
 {
-  unsigned nedges = 0;
+  unsigned nedges = 0, unlikely_count = 0;
   edge e = NULL;
   edge_iterator ei;
+  profile_probability all = profile_probability::always ();
 
   FOR_EACH_EDGE (e, ei, bb->succs)
-if (!unlikely_executed_edge_p (e))
-  nedges ++;
+if (e->probability.initialized_p ())
+  all -= e->probability;
+else if (!unlikely_executed_edge_p (e))
+  {
+nedges ++;
+if (unlikely_edges != NULL && unlikely_edges->contains (e))
+ {
+   all -= profile_probability::very_unlikely ();
+   unlikely_count++;
+ }
+  }
 
   /* Make the distribution even if all edges are unlikely.  */
-  unsigned unlikely_count = unlikely_edges ? unlikely_edges->elements () : 0;
   if (unlikely_count == nedges)
 {
   unlikely_edges = NULL;
@@ -836,13 +845,14 @@ set_even_probabilities (basic_block bb,
   unsigned c = nedges - unlikely_count;
 
   FOR_EACH_EDGE (e, ei, bb->succs)
-if (!unlikely_executed_edge_p (e))
+if (e->probability.initialized_p ())
+  ;
+else if (!unlikely_executed_edge_p (e))
   {
if (unlikely_edges != NULL && unlikely_edges->contains (e))
  e->probability = profile_probability::very_unlikely ();
else
- e->probability = profile_probability::guessed_always ()
-   .apply_scale (1, c);
+ e->probability = all.apply_scale (1, c).guessed ();
   }
 else
   e->probability = profile_probability::never ();
@@ -1151,7 +1161,7 @@ combine_predictions_for_bb (basic_block
  if (pred->ep_probability <= PROB_VERY_UNLIKELY)
unlikely_edges.add (pred->ep_edge);
 
-  if (!bb->count.initialized_p () && !dry_run)
+  if (!dry_run)
set_even_probabilities (bb, _edges);
   clear_bb_predictions (bb);
   if (dump_file)
Index: testsuite/g++.dg/torture/pr81462.C
===
--- testsuite/g++.dg/torture/pr81462.C  (revision 0)
+++ testsuite/g++.dg/torture/pr81462.C  (working copy)
@@ -0,0 +1,17 @@
+// { dg-do compile }
+// { dg-options "-O1 -fno-ipa-pure-const" }
+struct B {
+B* self;
+B() : self( this ) { self->f(); }
+virtual void f() = 0;
+};
+
+struct D : B
+{
+void f() {}
+};
+
+int main()
+{
+D d;
+}


[GCC ARM]Remove unused variable in arm

2017-07-18 Thread Bin Cheng
Hi,
This leftover unused variable breaks arm bootstrap.  Simply remove it.

Thanks,
bin
2017-07-18  Bin Cheng  

* config/arm/arm.c (arm_emit_store_exclusive): Remove unused var.diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 1b7b382..139ab70 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -28268,8 +28268,6 @@ arm_emit_store_exclusive (machine_mode mode, rtx bval, 
rtx rval,
 static void
 emit_unlikely_jump (rtx insn)
 {
-  int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
-
   rtx_insn *jump = emit_jump_insn (insn);
   add_reg_br_prob_note (jump, profile_probability::very_unlikely ());
 }


Re: [PATCH PR81408]Turn TREE level unsafe loop optimizations warning to missed optimization message

2017-07-18 Thread Richard Biener
On Tue, Jul 18, 2017 at 10:00 AM, Bin Cheng  wrote:
> Hi,
> I removed unsafe loop optimization on TREE level last year, so GCC doesn't do 
> unsafe
> loop optimizations on TREE now.  All "unsafe loop optimizations" warnings 
> reported by
> TREE optimizers are simply missed optimizations.  This patch turns such 
> warning into
> missed optimization messages.  I didn't change when this will be dumped, for 
> now it is
> when called from ivopts.
> Bootstrap and test on x86_64 and AArch64.  Is it OK?

Ok but can you change the testcase to not scan the ivopts dump but use
-fopt-info-loop-missed?
You should be able to match the output with dg-message.

Thanks,
Richard.

> Thanks,
> bin
> 2017-07-13  Bin Cheng  
>
> PR target/81408
> * tree-ssa-loop-niter.c (number_of_iterations_exit): Dump missed
> optimization for loop niter analysis.
>
> gcc/testsuite/ChangeLog
> 2017-07-13  Bin Cheng  
>
> PR target/81408
> * g++.dg/tree-ssa/pr81408.C: New.
> * gcc.dg/tree-ssa/pr19210-1.c: Check dump message rather than warning.


Re: [PATCH] vec_merge + vec_duplicate + vec_concat simplification

2017-07-18 Thread Kyrill Tkachov


On 05/07/17 16:14, Kyrill Tkachov wrote:


On 27/06/17 23:28, Jeff Law wrote:

On 06/06/2017 02:35 AM, Kyrill Tkachov wrote:

Hi all,

Another vec_merge simplification that's missing is transforming:
(vec_merge (vec_duplicate x) (vec_concat (y) (z)) (const_int N))
into
(vec_concat x z) if N == 1 (0b01) or
(vec_concat y x) if N == 2 (0b10)

For the testcase in this patch on aarch64 this allows us to try matching
during combine the pattern:
(set (reg:V2DI 78 [ x ])
 (vec_concat:V2DI
 (mem:DI (reg/v/f:DI 76 [ y ]) [1 *y_4(D)+0 S8 A64])
 (mem:DI (plus:DI (reg/v/f:DI 76 [ y ])
 (const_int 8 [0x8])) [1 MEM[(long long int *)y_4(D) +
8B]+0 S8 A64])))

rather than the more complex:
(set (reg:V2DI 78 [ x ])
 (vec_merge:V2DI (vec_duplicate:V2DI (mem:DI (plus:DI (reg/v/f:DI 76
[ y ])
 (const_int 8 [0x8])) [1 MEM[(long long int *)y_4(D)
+ 8B]+0 S8 A64]))
 (vec_duplicate:V2DI (mem:DI (reg/v/f:DI 76 [ y ]) [1 *y_4(D)+0
S8 A64]))
 (const_int 2 [0x2])))

We don't actually have an aarch64 pattern for the simplified version
above, but it's a simple enough
form to add, so this patch adds such a pattern that performs a
concatenated load of two 64-bit vectors
in adjacent memory locations as a single Q-register LDR. The new aarch64
pattern is needed to demonstrate
the effectiveness of the simplify-rtx change, so I've kept them together
as one patch.

Now for the testcase in the patch we can generate:
construct_lanedi:
 ldr q0, [x0]
 ret

construct_lanedf:
 ldr q0, [x0]
 ret

instead of:
construct_lanedi:
 ld1r{v0.2d}, [x0]
 ldr x0, [x0, 8]
 ins v0.d[1], x0
 ret

construct_lanedf:
 ld1r{v0.2d}, [x0]
 ldr d1, [x0, 8]
 ins v0.d[1], v1.d[0]
 ret

The new memory constraint Utq is needed because we need to allow only
the Q-register addressing modes but
the MEM expressions in the RTL pattern have 64-bit vector modes, and if
we don't constrain them they will
allow the D-register addressing modes during register allocation/address
mode selection, which will produce
invalid assembly.

Bootstrapped and tested on aarch64-none-linux-gnu.
Ok for trunk?

Thanks,
Kyrill

2017-06-06  Kyrylo Tkachov  

 * simplify-rtx.c (simplify_ternary_operation, VEC_MERGE):
 Simplify vec_merge of vec_duplicate and vec_concat.
 * config/aarch64/constraints.md (Utq): New constraint.
 * config/aarch64/aarch64-simd.md (load_pair_lanes): New
 define_insn.

2017-06-06  Kyrylo Tkachov  

 * gcc.target/aarch64/load_v2vec_lanes_1.c: New test.

OK for the simplify-rtx bits.


Thanks Jeff.
I'd like to ping the aarch64 bits:
https://gcc.gnu.org/ml/gcc-patches/2017-06/msg00273.html



Ping.

Thanks,
Kyrill


I've re-bootstrapped and re-tested these patches on aarch64 with today's trunk.

Kyrill


jeff







Re: [PATCH] Simplify vec_merge of vec_duplicate with const_vector

2017-07-18 Thread Kyrill Tkachov


On 05/07/17 16:14, Kyrill Tkachov wrote:


On 27/06/17 23:29, Jeff Law wrote:

On 06/06/2017 02:25 AM, Kyrill Tkachov wrote:

Hi all,

I'm trying to improve some of the RTL-level handling of vector lane
operations on aarch64 and that
involves dealing with a lot of vec_merge operations. One simplification
that I noticed missing
from simplify-rtx are combinations of vec_merge with vec_duplicate.
In this particular case:
(vec_merge (vec_duplicate (X)) (const_vector [A, B]) (const_int N))

which can be replaced with

(vec_concat (X) (B)) if N == 1 (0b01) or
(vec_concat (A) (X)) if N == 2 (0b10).

For the aarch64 testcase in this patch this simplifications allows us to
try to combine:
(set (reg:V2DI 77 [ x ])
 (vec_concat:V2DI (mem:DI (reg:DI 0 x0 [ y ]) [1 *y_3(D)+0 S8 A64])
 (const_int 0 [0])))

instead of the more complex:
(set (reg:V2DI 77 [ x ])
 (vec_merge:V2DI (vec_duplicate:V2DI (mem:DI (reg:DI 0 x0 [ y ]) [1
*y_3(D)+0 S8 A64]))
 (const_vector:V2DI [
 (const_int 0 [0])
 (const_int 0 [0])
 ])
 (const_int 1 [0x1])))


For the simplified form above we already have an aarch64 pattern:
*aarch64_combinez which
is missing a DI/DFmode version due to an oversight, so this patch
extends that pattern as well to
use the VDC mode iterator that includes DI and DFmode (as well as V2HF
which VD_BHSI was missing).
The aarch64 hunk is needed to see the benefit of the simplify-rtx.c
hunk, so I didn't split them
into separate patches.

Before this for the testcase we'd generate:
construct_lanedi:
 moviv0.4s, 0
 ldr x0, [x0]
 ins v0.d[0], x0
 ret

construct_lanedf:
 moviv0.2d, 0
 ldr d1, [x0]
 ins v0.d[0], v1.d[0]
 ret

but now we can generate:
construct_lanedi:
 ldr d0, [x0]
 ret

construct_lanedf:
 ldr d0, [x0]
 ret

Bootstrapped and tested on aarch64-none-linux-gnu.

Ok for trunk?

Thanks,
Kyrill

2017-06-06  Kyrylo Tkachov  

 * simplify-rtx.c (simplify_ternary_operation, VEC_MERGE):
 Simplify vec_merge of vec_duplicate and const_vector.
 * config/aarch64/predicates.md (aarch64_simd_or_scalar_imm_zero):
 New predicate.
 * config/aarch64/aarch64-simd.md (*aarch64_combinez): Use VDC
 mode iterator.  Update predicate on operand 1 to
 handle non-const_vec constants.  Delete constraints.
 (*aarch64_combinez_be): Likewise for operand 2.

2017-06-06  Kyrylo Tkachov  

 * gcc.target/aarch64/construct_lane_zero_1.c: New test.

OK for the simplify-rtx parts.


Thanks Jeff.
Pinging the aarch64 parts at:
https://gcc.gnu.org/ml/gcc-patches/2017-06/msg00272.html


Ping.

Thanks,
Kyrill


I've re-bootstrapped and re-tested the patches on top of current trunk.

Kyrill


jeff







Re: [PATCH PR81408]Turn TREE level unsafe loop optimizations warning to missed optimization message

2017-07-18 Thread Bin.Cheng
On Tue, Jul 18, 2017 at 9:31 AM, Richard Biener
 wrote:
> On Tue, Jul 18, 2017 at 10:00 AM, Bin Cheng  wrote:
>> Hi,
>> I removed unsafe loop optimization on TREE level last year, so GCC doesn't 
>> do unsafe
>> loop optimizations on TREE now.  All "unsafe loop optimizations" warnings 
>> reported by
>> TREE optimizers are simply missed optimizations.  This patch turns such 
>> warning into
>> missed optimization messages.  I didn't change when this will be dumped, for 
>> now it is
>> when called from ivopts.
>> Bootstrap and test on x86_64 and AArch64.  Is it OK?
>
> Ok but can you change the testcase to not scan the ivopts dump but use
> -fopt-info-loop-missed?
> You should be able to match the output with dg-message.
Thanks for reviewing.  New patch with test case updated accordingly.  Is it OK?

Thanks,
bin
>
> Thanks,
> Richard.
>
>> Thanks,
>> bin
>> 2017-07-13  Bin Cheng  
>>
>> PR target/81408
>> * tree-ssa-loop-niter.c (number_of_iterations_exit): Dump missed
>> optimization for loop niter analysis.
>>
>> gcc/testsuite/ChangeLog
>> 2017-07-13  Bin Cheng  
>>
>> PR target/81408
>> * g++.dg/tree-ssa/pr81408.C: New.
>> * gcc.dg/tree-ssa/pr19210-1.c: Check dump message rather than 
>> warning.
diff --git a/gcc/testsuite/g++.dg/tree-ssa/pr81408.C 
b/gcc/testsuite/g++.dg/tree-ssa/pr81408.C
new file mode 100644
index 000..f94544b
--- /dev/null
+++ b/gcc/testsuite/g++.dg/tree-ssa/pr81408.C
@@ -0,0 +1,92 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -std=gnu++11 -fopt-info-loop-missed 
-Wunsafe-loop-optimizations" } */
+
+namespace a {
+void b () __attribute__ ((__noreturn__));
+template  struct d;
+template  struct d
+{
+  typedef e f;
+};
+struct g
+{
+  template  using i = h *;
+};
+}
+using a::d;
+template  class k
+{
+  j l;
+
+public:
+  typename d::f operator* () {}
+  void operator++ () { ++l; }
+  j
+  aa ()
+  {
+return l;
+  }
+};
+template 
+bool
+operator!= (k o, k p2)
+{
+  return o.aa () != p2.aa ();
+}
+struct p;
+namespace a {
+struct F
+{
+  struct q
+  {
+using ai = g::i;
+  };
+  using r = q::ai;
+};
+class H
+{
+public:
+  k begin ();
+  k end ();
+};
+int s;
+class I
+{
+public:
+  void
+  aq (char)
+  {
+if (s)
+  b ();
+  }
+};
+class u : public I
+{
+public:
+  void
+  operator<< (u o (u))
+  {
+o (*this);
+  }
+  u operator<< (void *);
+};
+template 
+at
+av (au o)
+{
+  o.aq ('\n');
+}
+u ax;
+}
+struct p
+{
+  char *ay;
+};
+a::H t;
+void
+ShowHelpListCommands ()
+{
+  for (auto c : t) /* { dg-message "note: missed loop optimization: niters 
analysis .*" } */
+a::ax << c.ay << a::av;
+}
+
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr19210-1.c 
b/gcc/testsuite/gcc.dg/tree-ssa/pr19210-1.c
index 3c8ee06..0fa5600 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/pr19210-1.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr19210-1.c
@@ -1,15 +1,15 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -Wunsafe-loop-optimizations" } */
+/* { dg-options "-O2 -fopt-info-loop-missed -Wunsafe-loop-optimizations" } */
 extern void g(void);
 
 void
 f (unsigned n)
 {
   unsigned k;
-  for(k = 0;k <= n;k++) /* { dg-warning "missed loop optimization.*overflow" } 
*/
+  for(k = 0;k <= n;k++) /* { dg-message "note: missed loop optimization: 
niters analysis .*" } */
 g();
 
-  for(k = 0;k <= n;k += 4) /* { dg-warning "missed loop 
optimization.*overflow" } */
+  for(k = 0;k <= n;k += 4) /* { dg-message "note: missed loop optimization: 
niters analysis .*" } */
 g();
 
   /* We used to get warning for this loop.  However, since then # of iterations
@@ -21,9 +21,9 @@ f (unsigned n)
 g();
 
   /* So we need the following loop, instead.  */
-  for(k = 4;k <= n;k += 5) /* { dg-warning "missed loop 
optimization.*overflow" } */
+  for(k = 4;k <= n;k += 5) /* { dg-message "note: missed loop optimization: 
niters analysis .*" } */
 g();
   
-  for(k = 15;k >= n;k--) /* { dg-warning "missed loop optimization.*overflow" 
} */
+  for(k = 15;k >= n;k--) /* { dg-message "note: missed loop optimization: 
niters analysis .*" } */
 g();
 }
diff --git a/gcc/tree-ssa-loop-niter.c b/gcc/tree-ssa-loop-niter.c
index 5a7cab5..1421002 100644
--- a/gcc/tree-ssa-loop-niter.c
+++ b/gcc/tree-ssa-loop-niter.c
@@ -2378,9 +2378,9 @@ number_of_iterations_exit (struct loop *loop, edge exit,
 return true;
 
   if (warn)
-warning_at (gimple_location_safe (stmt),
-   OPT_Wunsafe_loop_optimizations,
-   "missed loop optimization, the loop counter may overflow");
+dump_printf_loc (MSG_MISSED_OPTIMIZATION, gimple_location_safe (stmt),
+"missed loop optimization: niters analysis ends up "
+"with assumptions.\n");
 
   return false;
 }


Re: Fix Eh delivery in partitioned functions

2017-07-18 Thread Jan Hubicka
> On Tue, 18 Jul 2017, Jan Hubicka wrote:
> 
> > Hi,
> > this patch fixes wrong code issue with BB partitioning where sometimes EH
> > is not delivered.  This is very old issue that affect all release branches
> > with -fprofile-use, but recently surfaced as libstdc++ testsuite regression
> > because we now partition functions based on static profile prediction.
> > 
> > The problem is that EH tables are stored as offsets from start of functions.
> > In the record however value 0 is special and means that there is no landing
> > pad for a given region.  Normally this is safe because landing pads never
> > appear as very first label in function.  This is however no longer true with
> > partitining where cold partition is actually quite likely to start by 
> > landing
> > pad.
> > 
> > The change in except.c adds sanity check that no EH landing pads are very 
> > first
> > in the insn stream.  The change in bb-reorder makes reorder to chose
> > non-landing-pad BB as first trace for the cold partition. Such BB always 
> > exists
> > because landing pads must be in same partition as the instruction throwing 
> > them
> > and we never make BB both landing pad and reachable by normal control folow.
> > However I am not thrilled by the fix as it is bit fragile in case some
> > optimization happends after bb partitioning and code is moved away.  Also 
> > the
> > logic can be confused by asm statement which may result in no code (again
> > however the BB reachable from outside world should contain something that
> > produce EH that is a real instruction).
> > 
> > Ideas for better fix would be welcome then.  If the assert I added triggers
> > for valid reasons, we may just end up adding a NOP in the rare case we do
> > not suceed arranging cold partition to not start with landing pad.
> 
> Yeah, I'd rather pad the function start with a nop if it starts with a
> landing pad.  How difficult would it be to arrange for this?  I suppose
> we'd need to touch each and every target to accomplish this?  Or end up
> using gen_nop in generic code?

I think we could just output from generic code - I think it can be done by
final_scan_insn. I don't know however if we have a way to tell if the section
starts with a landing pad?

Honza
> 
> Richard.
> 
> > Bootstrapped/regtested x86_64-linux, looks sane?
> > 
> > Honza
> > 
> > PR middle-end/81331 
> > * except.c (first_in_partition): New function.
> > (dw2_output_call_site_table): Sanity check that landing pads are not
> > very first in the partition.
> > * bb-reorder.c (ok_to_be_first): New function.
> > (connect_traces): Avoid traces that are !ok_to_be_first to start
> > partitions.
> > Index: except.c
> > ===
> > --- except.c(revision 250226)
> > +++ except.c(working copy)
> > @@ -2724,6 +2724,23 @@ sjlj_size_of_call_site_table (void)
> >return size;
> >  }
> >  
> > +/* Return true if L will appear as very first in its partition.  */
> > +
> > +bool
> > +first_in_partition (rtx_insn *l)
> > +{
> > +  while (l != NULL_RTX)
> > +{
> > +  if (active_insn_p (l))
> > +   return false;
> > +  else if (GET_CODE (l) == NOTE
> > +  && NOTE_KIND (l) == NOTE_INSN_SWITCH_TEXT_SECTIONS)
> > +   return true;
> > +  l = PREV_INSN (l);
> > +}
> > +  return true;
> > +}
> > +
> >  static void
> >  dw2_output_call_site_table (int cs_format, int section)
> >  {
> > @@ -2749,8 +2766,14 @@ dw2_output_call_site_table (int cs_forma
> >ASM_GENERATE_INTERNAL_LABEL (reg_end_lab, "LEHE", call_site_base + 
> > i);
> >  
> >if (cs->landing_pad)
> > -   ASM_GENERATE_INTERNAL_LABEL (landing_pad_lab, "L",
> > -CODE_LABEL_NUMBER (cs->landing_pad));
> > +   {
> > + ASM_GENERATE_INTERNAL_LABEL (landing_pad_lab, "L",
> > +  CODE_LABEL_NUMBER (cs->landing_pad));
> > + /* Be sure that the offset will not be 0 as that would make EH
> > +delivery code to think that there is no landing pad.  */
> > + gcc_checking_assert (!first_in_partition
> > +  (as_a  (cs->landing_pad)));
> > +   }
> >  
> >/* ??? Perhaps use insn length scaling if the assembler supports
> >  generic arithmetic.  */
> > Index: bb-reorder.c
> > ===
> > --- bb-reorder.c(revision 250226)
> > +++ bb-reorder.c(working copy)
> > @@ -1066,6 +1066,21 @@ connect_better_edge_p (const_edge e, boo
> >return is_better_edge;
> >  }
> >  
> > +/* If we place EH landing pad as very first BB in the partition, its offset
> > +   from start of function is 0 which is special cased by the eh table to 
> > mean
> > +   no landing pad.  For this reason such BBs can not appear as very first 
> > in
> > +   the partition.  */
> > +static bool
> > +ok_to_be_first (struct trace *t)
> > +{
> > +  edge e;
> > +  

Re: [PATCH] gcc: ada: delete old $(P) reference

2017-07-18 Thread Arnaud Charlet
> From: Mike Frysinger 
> 
> The P variable was deleted back in Nov 2015 (svn rev 231062),
> but its expansion was missed.  Delete those now too.
> 
> 2017-07-18  Mike Frysinger  
> 
>   * gcc-interface/Makefile.in ($(P)): Delete

OK, thanks.


Re: [GCC][PATCH][mid-end] Optimize x * copysign (1.0, y) [Patch (1/2)]

2017-07-18 Thread Tamar Christina
Ping.

From: Tamar Christina
Sent: Monday, July 10, 2017 4:47 PM
To: Andrew Pinski
Cc: GCC Patches; nd; l...@redhat.com; i...@airs.com; rguent...@suse.de
Subject: Re: [GCC][PATCH][mid-end] Optimize x * copysign (1.0, y) [Patch (1/2)]

Hi All,

I've re-spun the patch with the changes requested.


This is only done when not honoring signaling NaNs.
This transormation is done at expand time by using
a new optab "xorsign". If the optab is not available
then copysign is expanded as normal.

Bootstrapped on both aarch64-none-linux-gnu and x86_64 with no issues.
Regression done on aarch64-none-linux-gnu and no regressions.

Ok for trunk?

gcc/
2017-07-10  Tamar Christina  
Andrew Pinski 

PR middle-end/19706
* expr.c (is_copysign_call_with_1): New.
(maybe_expand_mult_copysign): Likewise.
(expand_expr_real_2): Expand copysign.
* optabs.def (xorsign_optab): New.


From: Andrew Pinski 
Sent: Monday, July 10, 2017 12:21:29 AM
To: Tamar Christina
Cc: GCC Patches; nd; l...@redhat.com; i...@airs.com; rguent...@suse.de
Subject: Re: [GCC][PATCH][mid-end] Optimize x * copysign (1.0, y) [Patch (1/2)]

On Mon, Jun 12, 2017 at 12:56 AM, Tamar Christina
 wrote:
> Hi All,
>
> this patch implements a optimization rewriting
>
> x * copysign (1.0, y) and
> x * copysign (-1.0, y)
>
> to:
>
> x ^ (y & (1 << sign_bit_position))
>
> This is done by creating a special builtin during matching and generate the
> appropriate instructions during expand. This new builtin is called XORSIGN.
>
> The expansion of xorsign depends on if the backend has an appropriate optab
> available. If this is not the case then we use a modified version of the 
> existing
> copysign which does not take the abs value of the first argument as a fall 
> back.
>
> This patch is a revival of a previous patch
> https://gcc.gnu.org/ml/gcc-patches/2015-10/msg00069.html
>
> Bootstrapped on both aarch64-none-linux-gnu and x86_64 with no issues.
> Regression done on aarch64-none-linux-gnu and no regressions.


Note this is also PR 19706.

Thanks,
Andrew

>
> Ok for trunk?
>
> gcc/
> 2017-06-07  Tamar Christina  
>
> * builtins.def (BUILT_IN_XORSIGN, BUILT_IN_XORSIGNF): New.
> (BUILT_IN_XORSIGNL, BUILT_IN_XORSIGN_FLOAT_NX): Likewise.
> * match.pd (mult (COPYSIGN:s real_onep @0) @1): New simplifier.
> (mult (COPYSIGN:s real_mus_onep @0) @1): Likewise.
> (copysigns @0 (negate @1)): Likewise.
> * builtins.c (expand_builtin_copysign): Promoted local to argument.
> (expand_builtin): Added CASE_FLT_FN_FLOATN_NX (BUILT_IN_XORSIGN) and
> CASE_FLT_FN (BUILT_IN_XORSIGN).
> (BUILT_IN_COPYSIGN): Updated function call.
> * optabs.h (expand_copysign): New bool.
> (expand_xorsign): New.
> * optabs.def (xorsign_optab): New.
> * optabs.c (expand_copysign): New parameter.
> * fortran/f95-lang.c (xorsignl, xorsign, xorsignf): New.
> * fortran/mathbuiltins.def (XORSIGN): New.
>
> gcc/testsuite/
> 2017-06-07  Tamar Christina  
>
> * gcc.dg/tree-ssa/xorsign.c: New.
> * gcc.dg/xorsign_exec.c: New.
> * gcc.dg/vec-xorsign_exec.c: New.
> * gcc.dg/tree-ssa/reassoc-39.c (f2, f3): Updated constant to 2.


Re: [GCC ARM]Remove unused variable in arm

2017-07-18 Thread Kyrill Tkachov

Hi Bin,

On 18/07/17 09:11, Bin Cheng wrote:

Hi,
This leftover unused variable breaks arm bootstrap.  Simply remove it.

Thanks,
bin
2017-07-18  Bin Cheng  

* config/arm/arm.c (arm_emit_store_exclusive): Remove unused var.


The ChangeLog entry should say emit_unlikely_jump rather than 
arm_emit_store_exclusive.
Ok with fixed ChangeLog entry.

Kyrill


Re: [PATCH] Move static chain and non-local goto init after NOTE_INSN_FUNCTION_BEG (PR sanitize/81186).

2017-07-18 Thread Martin Liška
On 07/17/2017 03:15 PM, Michael Matz wrote:
> Hello,
> 
> On Mon, 17 Jul 2017, Martin Liška wrote:
> 
>> which does all the stack preparation (including the problematic call to 
>> __asan_stack_malloc_N).
>>
>> Note that this code still should be placed before parm_birth_note as we 
>> cant's say that params are ready before a fake stack is prepared.
> 
> Yes, understood.
> 
>> Then we generate code that loads the implicit chain argument:
>>
>> (gdb) p debug_rtx_list(get_insns(), 100)
>> (note 1 0 37 NOTE_INSN_DELETED)
>>
>> (note 37 1 38 NOTE_INSN_FUNCTION_BEG)
>>
>> (insn 38 37 39 (set (reg/f:DI 94 [ CHAIN.1 ])
>> (reg:DI 39 r10 [ CHAIN.1 ])) 
>> "/home/marxin/Programming/gcc/gcc/testsuite/gcc.dg/asan/pr81186.c":9 -1
>>  (nil))
>>
>> (insn 39 38 0 (set (mem/c:DI (plus:DI (reg/f:DI 82 virtual-stack-vars)
>> (const_int -584 [0xfdb8])) [0  S8 A64])
>> (reg:DI 39 r10 [ CHAIN.1 ])) 
>> "/home/marxin/Programming/gcc/gcc/testsuite/gcc.dg/asan/pr81186.c":9 -1
>>  (nil))
>>
>> Which is problematic as using virtual-stack-vars which should point to 
>> fake stack done by AddressSanitizer in __asan_stack_malloc_N.
> 
> If anything, then only the stack access is problematic, i.e. the last 
> instruction.  I don't understand why that should be problematic, though.  

Hi.

Thanks one more time, it's really educative this PR and whole problematic of 
function prologue.
So short answer for your email: marking parm_birth_insn after static chain init 
solves the problem :)
It's because:

(insn 2 1 3 (set (reg/f:DI 100 [ CHAIN.2 ])
(reg:DI 39 r10 [ CHAIN.2 ])) "/tmp/nested.c":6 -1
 (nil))

(insn 3 2 4 (set (mem/c:DI (plus:DI (reg/f:DI 82 virtual-stack-vars)
(const_int -8 [0xfff8])) [0  S8 A64])
(reg:DI 39 r10 [ CHAIN.2 ])) "/tmp/nested.c":6 -1
 (nil))

is just storage of  from caller where content of the FRAME struct lives 
on stack (and thus on
shadow stack). That said it's perfectly fine to store  to real stack of 
callee.

Thus I'm going to test attached patch.

P.S. One interesting side effect of how static chain is implemented:

Consider:

int
main ()
{
  __label__ l;
  int buffer[100];
  void f ()
  {
int a[123];
*([0] - 4) = 123;

goto l;
  }

  f ();
l:
  return 0;
}

It's funny that *([0] - 4) actually corrupts __nl_goto_buf and we end up 
with a
dead signal:

ASAN:DEADLYSIGNAL
=
==30888==ERROR: AddressSanitizer: SEGV on unknown address 0x (pc 
0x bp 0x sp 0x7ffe049b T0)

Thanks,
Martin

> Probably because I don't know much about the ASAN implementation.  But why 
> should there be something magic about using the non-asan stack?  Most 
> local variable accesses are rewritten to be in terms of the fake stack, 
> but those that aren't could use the normal stack just fine, can't they?
> 
> If that really is a problem then that could also be rectified by splitting 
> the static_chain_decl in expand_function_start a bit, ala this:
> 
>   if (cfun->static_chain_decl) {
> all code except the last "if (!optimize) store-into-stack"
>   }
>   emit_note; parm_birth_insn = ...
>   if (cfun->static_chain_decl && !optimize) {
> store into assign_stack_local
>   }
> 
> (requires moving some local variable to an outer scope, but hey).
> 
> But what you say above mystifies me.  You claim that access via 
> virtual-stack-vars is problematic before the shadow stack is created by 
> ASAN.  But the whole parameter setup always uses such local stack storage 
> whenever it needs.  And those definitely happen before the ASAN setup.  
> See the subroutines of assign_parms, (e.g. assign_parm_setup_block and 
> assign_parm_setup_stack).  You might need to use special function argument 
> types or special ABIs to trigger this, though you should be able to find 
> some cases to trigger also on i386 or x86_64.
> 
> So, if the stack access for the static chain is problematic I don't see 
> why the stack accesses for the parameters are not.  And if they indeed are 
> problematic, then something is confused within ASAN, and the fix for that 
> confusion is not to move parm_birth_insn, but something else (I can't say 
> what, as I don't know much about how ASAN is supposed to work in such 
> situations).
> 
> 
> Ciao,
> Michael.
> 

>From 13d08eb4c7d1ff7cddd130acad405ec343cb826f Mon Sep 17 00:00:00 2001
From: marxin 
Date: Thu, 13 Jul 2017 13:37:47 +0200
Subject: [PATCH] Move static chain and non-local goto init after
 NOTE_INSN_FUNCTION_BEG

gcc/ChangeLog:

2017-06-27  Martin Liska  

PR sanitize/81186
	* function.c (expand_function_start): Set parm_birth_insn after
	static chain is initialized.

gcc/testsuite/ChangeLog:

2017-06-27  Martin Liska  

PR sanitize/81186
	* gcc.dg/asan/pr81186.c: New test.
---
 gcc/function.c  | 20 

Re: [PATCH][PR 59521] Respect probabilities when expanding switch statement

2017-07-18 Thread Yuri Gribov
On Tue, Jul 18, 2017 at 8:45 AM, Jan Hubicka  wrote:
>> Hi all,
>>
>> Currently all cases in switch statement are treated as having equal
>> probabilities which causes suboptimal code as demonstrated in
>> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=59521 . This patch
>> modifies expander to select pivot point for decision tree so that
>> probabilities of cases on the left are roughly equal to probabilities
>> on the right.
>>
>> Patch survives bootstrap and regtesting on x64 but has some issues:
>> * tests are fragile but I'm not sure how to make them better
>> * I haven't done any performance measurements - would these be needed?
>> I don't have access to SPEC these days, any other suggestions?
>
> I think we could just check if daily testers shows some regressions after
> patch is comitted. It seems right think to do.

You mean gcc.opensuse.org? Makes sense.

>> Patch is jointly authored with Martin.
>
> 2017-06-29  Yury Gribov  
> Martin Liska  
>
> PR middle-end/59521
> gcc/
> * predict.c (set_even_probabilities): Handle case of a single
> likely edge.
>
> I have made some changes to this fuction to fix other PR. So you may need
> to update the patch.

Will do.

> What exactly is set_even_probabilities and combine_predictions_for_bb 
> shooting for?

combine_predictions_for_bb calculates final probability for edges of
if-else or switch statements.

For if-elses this is done by combining values computed by different
predictors using Dempster-Shafer theory.  For switch statement DS is
not used, mainly because we do not have heuristics for predicting
which case will be taken (paper by Larus concluded that using if-else
heuristics does not give good results).

So until this patch we just used set_even_probabilities. The name of
this function is misleading, in addition to setting even probabilities
it can also understand that some edges are very unlikely and set
unlikely probs for those.  With patch it now also understands that one
edge is very likely.

> @@ -2451,7 +2484,30 @@ tree_predict_by_opcode (basic_block bb)
>edge_iterator ei;
>enum br_predictor predictor;
>
> -  if (!stmt || gimple_code (stmt) != GIMPLE_COND)
> +  if (!stmt)
> +return;
> +
> +  if (gswitch *sw = dyn_cast  (stmt))
> +{
> +  tree index = gimple_switch_index (sw);
> +  tree val = expr_expected_value (index, auto_bitmap (),
> + );
> +  if (val && TREE_CODE (val) == INTEGER_CST)
> +   {
> + edge e = find_taken_edge_switch_expr (sw, bb, val);
> + if (predictor == PRED_BUILTIN_EXPECT)
> +   {
> + int percent = PARAM_VALUE (BUILTIN_EXPECT_PROBABILITY);
> + gcc_assert (percent >= 0 && percent <= 100);
> + predict_edge (e, PRED_BUILTIN_EXPECT,
> +   HITRATE (percent));
> +   }
> + else
> +   predict_edge_def (e, predictor, TAKEN);
> +   }
> +}
> +
> +  if (gimple_code (stmt) != GIMPLE_COND)
>
> I think this change can go in separately and is OK
> (along with a testcase that checks that tree profile is right).

Ok.

> I will look into the RTL bits next.
>
> Honza


RE: [GCC][PATCH][mid-end] Optimize x * copysign (1.0, y) [Patch (1/2)]

2017-07-18 Thread Tamar Christina
> 
> Why's this now done during RTL expansion rather than during late GIMPLE,
> using match.pd and an internal function for xorsign?
> 

Mainly because of Andrew's email on the 10th which stated:

> But you should get the general idea.  I would like to see more of these 
> special expand patterns really.

And there were no objections so I figured this was also an acceptable solution.

> Thanks,
> Richard.
> 
> >
> > gcc/
> > 2017-07-10  Tamar Christina  
> > Andrew Pinski 
> >
> > PR middle-end/19706
> > * expr.c (is_copysign_call_with_1): New.
> > (maybe_expand_mult_copysign): Likewise.
> > (expand_expr_real_2): Expand copysign.
> > * optabs.def (xorsign_optab): New.
> >
> > 
> > From: Andrew Pinski 
> > Sent: Monday, July 10, 2017 12:21:29 AM
> > To: Tamar Christina
> > Cc: GCC Patches; nd; l...@redhat.com; i...@airs.com; rguent...@suse.de
> > Subject: Re: [GCC][PATCH][mid-end] Optimize x * copysign (1.0, y)
> > [Patch (1/2)]
> >
> > On Mon, Jun 12, 2017 at 12:56 AM, Tamar Christina
> >  wrote:
> > > Hi All,
> > >
> > > this patch implements a optimization rewriting
> > >
> > > x * copysign (1.0, y) and
> > > x * copysign (-1.0, y)
> > >
> > > to:
> > >
> > > x ^ (y & (1 << sign_bit_position))
> > >
> > > This is done by creating a special builtin during matching and
> > > generate the appropriate instructions during expand. This new builtin is
> called XORSIGN.
> > >
> > > The expansion of xorsign depends on if the backend has an
> > > appropriate optab available. If this is not the case then we use a
> > > modified version of the existing copysign which does not take the abs
> value of the first argument as a fall back.
> > >
> > > This patch is a revival of a previous patch
> > > https://gcc.gnu.org/ml/gcc-patches/2015-10/msg00069.html
> > >
> > > Bootstrapped on both aarch64-none-linux-gnu and x86_64 with no
> issues.
> > > Regression done on aarch64-none-linux-gnu and no regressions.
> >
> >
> > Note this is also PR 19706.
> >
> > Thanks,
> > Andrew
> >
> > >
> > > Ok for trunk?
> > >
> > > gcc/
> > > 2017-06-07  Tamar Christina  
> > >
> > > * builtins.def (BUILT_IN_XORSIGN, BUILT_IN_XORSIGNF): New.
> > > (BUILT_IN_XORSIGNL, BUILT_IN_XORSIGN_FLOAT_NX): Likewise.
> > > * match.pd (mult (COPYSIGN:s real_onep @0) @1): New simplifier.
> > > (mult (COPYSIGN:s real_mus_onep @0) @1): Likewise.
> > > (copysigns @0 (negate @1)): Likewise.
> > > * builtins.c (expand_builtin_copysign): Promoted local to 
> > > argument.
> > > (expand_builtin): Added CASE_FLT_FN_FLOATN_NX
> (BUILT_IN_XORSIGN) and
> > > CASE_FLT_FN (BUILT_IN_XORSIGN).
> > > (BUILT_IN_COPYSIGN): Updated function call.
> > > * optabs.h (expand_copysign): New bool.
> > > (expand_xorsign): New.
> > > * optabs.def (xorsign_optab): New.
> > > * optabs.c (expand_copysign): New parameter.
> > > * fortran/f95-lang.c (xorsignl, xorsign, xorsignf): New.
> > > * fortran/mathbuiltins.def (XORSIGN): New.
> > >
> > > gcc/testsuite/
> > > 2017-06-07  Tamar Christina  
> > >
> > > * gcc.dg/tree-ssa/xorsign.c: New.
> > > * gcc.dg/xorsign_exec.c: New.
> > > * gcc.dg/vec-xorsign_exec.c: New.
> > > * gcc.dg/tree-ssa/reassoc-39.c (f2, f3): Updated constant to 2.
> >
> 
> --
> Richard Biener 
> SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton,
> HRB 21284 (AG Nuernberg)


[PATCH][PR 59521] Respect probabilities when expanding switch statement

2017-07-18 Thread Yuri Gribov
Hi all,

Currently all cases in switch statement are treated as having equal
probabilities which causes suboptimal code as demonstrated in
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=59521 . This patch
modifies expander to select pivot point for decision tree so that
probabilities of cases on the left are roughly equal to probabilities
on the right.

Patch survives bootstrap and regtesting on x64 but has some issues:
* tests are fragile but I'm not sure how to make them better
* I haven't done any performance measurements - would these be needed?
I don't have access to SPEC these days, any other suggestions?

Patch is jointly authored with Martin.

-Y


pr59521-1.patch
Description: Binary data


Re: Fix Eh delivery in partitioned functions

2017-07-18 Thread Richard Biener
On Tue, 18 Jul 2017, Jan Hubicka wrote:

> Hi,
> this patch fixes wrong code issue with BB partitioning where sometimes EH
> is not delivered.  This is very old issue that affect all release branches
> with -fprofile-use, but recently surfaced as libstdc++ testsuite regression
> because we now partition functions based on static profile prediction.
> 
> The problem is that EH tables are stored as offsets from start of functions.
> In the record however value 0 is special and means that there is no landing
> pad for a given region.  Normally this is safe because landing pads never
> appear as very first label in function.  This is however no longer true with
> partitining where cold partition is actually quite likely to start by landing
> pad.
> 
> The change in except.c adds sanity check that no EH landing pads are very 
> first
> in the insn stream.  The change in bb-reorder makes reorder to chose
> non-landing-pad BB as first trace for the cold partition. Such BB always 
> exists
> because landing pads must be in same partition as the instruction throwing 
> them
> and we never make BB both landing pad and reachable by normal control folow.
> However I am not thrilled by the fix as it is bit fragile in case some
> optimization happends after bb partitioning and code is moved away.  Also the
> logic can be confused by asm statement which may result in no code (again
> however the BB reachable from outside world should contain something that
> produce EH that is a real instruction).
> 
> Ideas for better fix would be welcome then.  If the assert I added triggers
> for valid reasons, we may just end up adding a NOP in the rare case we do
> not suceed arranging cold partition to not start with landing pad.

Yeah, I'd rather pad the function start with a nop if it starts with a
landing pad.  How difficult would it be to arrange for this?  I suppose
we'd need to touch each and every target to accomplish this?  Or end up
using gen_nop in generic code?

Richard.

> Bootstrapped/regtested x86_64-linux, looks sane?
> 
> Honza
> 
>   PR middle-end/81331 
>   * except.c (first_in_partition): New function.
>   (dw2_output_call_site_table): Sanity check that landing pads are not
>   very first in the partition.
>   * bb-reorder.c (ok_to_be_first): New function.
>   (connect_traces): Avoid traces that are !ok_to_be_first to start
>   partitions.
> Index: except.c
> ===
> --- except.c  (revision 250226)
> +++ except.c  (working copy)
> @@ -2724,6 +2724,23 @@ sjlj_size_of_call_site_table (void)
>return size;
>  }
>  
> +/* Return true if L will appear as very first in its partition.  */
> +
> +bool
> +first_in_partition (rtx_insn *l)
> +{
> +  while (l != NULL_RTX)
> +{
> +  if (active_insn_p (l))
> + return false;
> +  else if (GET_CODE (l) == NOTE
> +&& NOTE_KIND (l) == NOTE_INSN_SWITCH_TEXT_SECTIONS)
> + return true;
> +  l = PREV_INSN (l);
> +}
> +  return true;
> +}
> +
>  static void
>  dw2_output_call_site_table (int cs_format, int section)
>  {
> @@ -2749,8 +2766,14 @@ dw2_output_call_site_table (int cs_forma
>ASM_GENERATE_INTERNAL_LABEL (reg_end_lab, "LEHE", call_site_base + i);
>  
>if (cs->landing_pad)
> - ASM_GENERATE_INTERNAL_LABEL (landing_pad_lab, "L",
> -  CODE_LABEL_NUMBER (cs->landing_pad));
> + {
> +   ASM_GENERATE_INTERNAL_LABEL (landing_pad_lab, "L",
> +CODE_LABEL_NUMBER (cs->landing_pad));
> +   /* Be sure that the offset will not be 0 as that would make EH
> +  delivery code to think that there is no landing pad.  */
> +   gcc_checking_assert (!first_in_partition
> +(as_a  (cs->landing_pad)));
> + }
>  
>/* ??? Perhaps use insn length scaling if the assembler supports
>generic arithmetic.  */
> Index: bb-reorder.c
> ===
> --- bb-reorder.c  (revision 250226)
> +++ bb-reorder.c  (working copy)
> @@ -1066,6 +1066,21 @@ connect_better_edge_p (const_edge e, boo
>return is_better_edge;
>  }
>  
> +/* If we place EH landing pad as very first BB in the partition, its offset
> +   from start of function is 0 which is special cased by the eh table to mean
> +   no landing pad.  For this reason such BBs can not appear as very first in
> +   the partition.  */
> +static bool
> +ok_to_be_first (struct trace *t)
> +{
> +  edge e;
> +  edge_iterator ei;
> +  FOR_EACH_EDGE (e, ei, t->first->preds)
> +if (e->flags & EDGE_EH)
> +  return false;
> +  return true;
> +}
> +
>  /* Connect traces in array TRACES, N_TRACES is the count of traces.  */
>  
>  static void
> @@ -1080,6 +1095,7 @@ connect_traces (int n_traces, struct tra
>int freq_threshold;
>gcov_type count_threshold;
>bool for_size = optimize_function_for_size_p 

Re: [PATCH GCC][13/13]Distribute loop with loop versioning under runtime alias check

2017-07-18 Thread Bin.Cheng
On Mon, Jul 17, 2017 at 1:09 PM, Christophe Lyon
 wrote:
> On 17 July 2017 at 12:06, Bin.Cheng  wrote:
>> On Mon, Jul 10, 2017 at 10:32 AM, Christophe Lyon
>>  wrote:
>>> Hi Bin,
>>>
>>> On 30 June 2017 at 12:43, Bin.Cheng  wrote:
 On Wed, Jun 28, 2017 at 2:09 PM, Bin.Cheng  wrote:
> On Wed, Jun 28, 2017 at 1:29 PM, Richard Biener
>  wrote:
>> On Wed, Jun 28, 2017 at 1:46 PM, Bin.Cheng  wrote:
>>> On Wed, Jun 28, 2017 at 11:58 AM, Richard Biener
>>>  wrote:
 On Tue, Jun 27, 2017 at 4:07 PM, Bin.Cheng  
 wrote:
> On Tue, Jun 27, 2017 at 1:44 PM, Richard Biener
>  wrote:
>> On Fri, Jun 23, 2017 at 12:30 PM, Bin.Cheng  
>> wrote:
>>> On Tue, Jun 20, 2017 at 10:22 AM, Bin.Cheng  
>>> wrote:
 On Mon, Jun 12, 2017 at 6:03 PM, Bin Cheng  
 wrote:
> Hi,
>>> Rebased V3 for changes in previous patches.  Bootstap and test on
>>> x86_64 and aarch64.
>>
>> why is ldist-12.c no longer distributed?  your comment says it 
>> doesn't expose
>> more "parallelism" but the point is to reduce memory bandwith 
>> requirements
>> which it clearly does.
>>
>> Likewise for -13.c, -14.c.  -4.c may be a questionable case but the 
>> wording
>> of "parallelism" still confuses me.
>>
>> Can you elaborate on that.  Now onto the patch:
> Given we don't model data locality or memory bandwidth, whether
> distribution enables loops that can be executed paralleled becomes the
> major criteria for distribution.  BTW, I think a good memory stream
> optimization model shouldn't consider small loops as in ldist-12.c,
> etc., appropriate for distribution.

 True.  But what means "parallel" here?  ldist-13.c if partitioned into 
 two loops
 can be executed "in parallel"
>>> So if a loop by itself can be vectorized (or so called can be executed
>>> paralleled), we tend to no distribute it into small ones.  But there
>>> is one exception here, if the distributed small loops are recognized
>>> as builtin functions, we still distribute it.  I assume it's generally
>>> better to call builtin memory functions than vectorize it by GCC?
>>
>> Yes.
>>

>>
>> +   Loop distribution is the dual of loop fusion.  It separates 
>> statements
>> +   of a loop (or loop nest) into multiple loops (or loop nests) 
>> with the
>> +   same loop header.  The major goal is to separate statements 
>> which may
>> +   be vectorized from those that can't.  This pass implements 
>> distribution
>> +   in the following steps:
>>
>> misses the goal of being a memory stream optimization, not only a 
>> vectorization
>> enabler.  distributing a loop can also reduce register pressure.
> I will revise the comment, but as explained, enabling more
> vectorization is the major criteria for distribution to some extend
> now.

 Yes, I agree -- originally it was written to optimize the stream 
 benchmark IIRC.
>>> Let's see if any performance drop will be reported against this patch.
>>> Let's see if we can create a cost model for it.
>>
>> Fine.
> I will run some benchmarks to see if there is breakage.
>>

>>
>> You introduce ldist_alias_id in struct loop (probably in 01/n which I
>> didn't look
>> into yet).  If you don't use that please introduce it separately.
> Hmm, yes it is introduced in patch [01/n] and set in this patch.
>
>>
>> + /* Be conservative.  If data references are not well 
>> analyzed,
>> +or the two data references have the same base 
>> address and
>> +offset, add dependence and consider it alias to 
>> each other.
>> +In other words, the dependence can not be resolved 
>> by
>> +runtime alias check.  */
>> + if (!DR_BASE_ADDRESS (dr1) || !DR_BASE_ADDRESS (dr2)
>> + || !DR_OFFSET (dr1) || !DR_OFFSET (dr2)
>> + || !DR_INIT (dr1) || !DR_INIT (dr2)
>> + || !DR_STEP (dr1) || !tree_fits_uhwi_p (DR_STEP 
>> (dr1))
>> + || !DR_STEP (dr2) || !tree_fits_uhwi_p (DR_STEP 
>> (dr2))
>> +

[PATCH] gcc: ada: delete old $(P) reference

2017-07-18 Thread Mike Frysinger
From: Mike Frysinger 

The P variable was deleted back in Nov 2015 (svn rev 231062),
but its expansion was missed.  Delete those now too.

2017-07-18  Mike Frysinger  

* gcc-interface/Makefile.in ($(P)): Delete
---
 gcc/ada/gcc-interface/Makefile.in | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/ada/gcc-interface/Makefile.in 
b/gcc/ada/gcc-interface/Makefile.in
index 1c172037d927..b485c18ec21e 100644
--- a/gcc/ada/gcc-interface/Makefile.in
+++ b/gcc/ada/gcc-interface/Makefile.in
@@ -2643,10 +2643,10 @@ gnatlink-re: ../stamp-tools gnatmake-re
 #  stamp target in the parent directory whenever gnat1 is rebuilt
 
 # Likewise for the tools
-../../gnatmake$(exeext): $(P) b_gnatm.o $(GNATMAKE_OBJS)
+../../gnatmake$(exeext): b_gnatm.o $(GNATMAKE_OBJS)
+$(GCC_LINK) $(ALL_CFLAGS) -o $@ b_gnatm.o $(GNATMAKE_OBJS) 
$(TOOLS_LIBS) $(TOOLS1_LIBS)
 
-../../gnatlink$(exeext): $(P) b_gnatl.o $(GNATLINK_OBJS)
+../../gnatlink$(exeext): b_gnatl.o $(GNATLINK_OBJS)
+$(GCC_LINK) $(ALL_CFLAGS) -o $@ b_gnatl.o $(GNATLINK_OBJS) 
$(TOOLS_LIBS) $(TOOLS1_LIBS)
 
 ../stamp-gnatlib-$(RTSDIR):
-- 
2.12.0



Re: [PATCH] Fix an UBSAN test-case (PR sanitizer/63361).

2017-07-18 Thread Jakub Jelinek
On Tue, Jul 18, 2017 at 01:44:21PM +0200, Martin Liška wrote:
> gcc/testsuite/ChangeLog:
> 
> 2017-07-17  Martin Liska  
> 
>   PR sanitizer/63361
>   * c-c++-common/ubsan/float-cast-overflow-1.c: Add either
>   -ffloat-store or -mieee for targets that need it.
> ---
>  gcc/testsuite/c-c++-common/ubsan/float-cast-overflow-1.c | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
> 
> diff --git a/gcc/testsuite/c-c++-common/ubsan/float-cast-overflow-1.c 
> b/gcc/testsuite/c-c++-common/ubsan/float-cast-overflow-1.c
> index cd6941c9d30..a25e8dea29e 100644
> --- a/gcc/testsuite/c-c++-common/ubsan/float-cast-overflow-1.c
> +++ b/gcc/testsuite/c-c++-common/ubsan/float-cast-overflow-1.c
> @@ -1,6 +1,7 @@
>  /* { dg-do run { target { lp64 || ilp32 } } } */
> +/* { dg-additional-options "-mfloat-store" { target { ia32 m68k-*-* } } } */

-ffloat-store, not -mfloat-store, that doesn't exist.

I wonder if one can mix effective target names with target tripplets this
way.  If it works, fine, otherwise I guess you'd need { { ia32 } || { m68k-*-* 
} }
or something similar.  Can you please test it?
First make sure that for RUNTESTFLAGS='--target_board=unix\{-m32,-m64\} 
ubsan.exp=float-cast-overflow-1.c'
you get -ffloat-store for 32-bit and not 64-bit.
I guess you don't have m68k-*-* target around, so next step I'd temporarily
change that m68k to x86_64 and retry and see if it enabled on both.

> +/* { dg-additional-options "-mieee" { target { alpha* sh* } } } */

I'd prefer alpha*-*-* sh*-*-*

>  /* { dg-options "-fsanitize=float-cast-overflow" } */
> -/* { dg-additional-options "-msse2 -mfpmath=sse" { target { sse2_runtime && 
> ia32 } } } */

Jakub


Re: RFC/A: Early predictive commoning pass

2017-07-18 Thread Richard Biener
On Mon, Jul 3, 2017 at 10:45 AM, Richard Sandiford
 wrote:
> General predictive commoning would play havoc with loop vectorisation,
> so the current pass order is clearly the right one.  But running a very
> limited form of predictive commoning before vectorisation would allow us
> to vectorise things like:
>
>  for (int i = 1; i < n; ++i)
>x[i] = x[i - 1] + 1;

In principle PRE can handle this case (if we weren't taming it down so much).

I'm not too sympathetic of adding yet another predcom pass given it is
expensive to do dependence analysis.  To make PRE do the transform you
need

Index: gcc/tree-ssa-pre.c
===
--- gcc/tree-ssa-pre.c  (revision 250297)
+++ gcc/tree-ssa-pre.c  (working copy)
@@ -1458,9 +1458,11 @@ phi_translate_1 (pre_expr expr, bitmap_s
   to be inserted and increased register pressure.
   See PR77498 - this avoids doing predcoms work in
   a less efficient way.  */
+#if 0
if (find_edge (pred, phiblock)->flags & EDGE_DFS_BACK)
  ;
else
+#endif
  {
unsigned value_id = get_expr_value_id (constant);
constant = find_leader_in_sets (value_id, set1, set2,
@@ -4377,7 +4379,7 @@ eliminate_dom_walker::before_dom_childre
  if (sprime
  && TREE_CODE (sprime) == SSA_NAME
  && do_pre
- && (flag_tree_loop_vectorize || flag_tree_parallelize_loops > 1)
+ && 0 && (flag_tree_loop_vectorize ||
flag_tree_parallelize_loops > 1)
  && loop_outer (b->loop_father)
  && has_zero_uses (sprime)
  && bitmap_bit_p (inserted_exprs, SSA_NAME_VERSION (sprime))

that is, the challenge is to identify the cases that help
vectorization as opposed
to blocking it ... (here you create a vectorizable induction which is fine).

> This patch adds an extra pass that is restricted to cases that should
> help (or at least not hinder) vectorisation.  It gives some nice
> improvements on some internal benchmarks.

Are you sure you gated off things properly?  It sounds like if we didn't tame
down PRE by use of the PHIs but by use of stmts inserted in the latch we'd
arrive at similar heuristics?

Richard.

> I compared the output for SPEC 2k6 before and after the patch.  For some
> benchmarks it led to a trivial register renaming, but had no effect on
> those benchmarks beyond that.  The only benchmark that changed in a
> significant way was 416.gamess, where we were able to vectorise some
> simple loops that we weren't previously.  None of those loops seem to
> be hot though, so there was no measurable difference in the score.
>
> Tested on aarch64-linux-gnu and x86_64-linux-gnu.  Thoughts?  Is this
> too much of a special case to support a new pass?  OTOH, other compilers
> do vectorise the loop above, so it would be nice if we could too...
>
> Richard
>
>
> 2017-07-03  Richard Sandiford  
>
> gcc/
> * passes.def (pass_early_predcom): New.
> * tree-pass.h (make_pass_early_predcom): Declare.
> * tree-predcom.c (MAX_DISTANCE): Turn into an inclusive rather than
> exclusive upper bound.
> (only_simple_p): New variable.
> (max_distance): Likewise.
> (add_ref_to_chain): Use MAX_DISTANCE rather than max_distance
> and treat it as an inclusive upper bound.  Require the store to
> come after the load at the maximum distance if only_simple_p.
> (add_looparound_copies): Do nothing if only_simple_p.
> (determine_roots_comp): Use MAX_DISTANCE rather than max_distance
> and treat it as an inclusive upper bound.  Require the start of
> a chain to be a store if only_simple_p.
> (determine_unroll_factor): Return 1 if only_simple_p.
> (tree_predictive_commoning): Add an early_p parameter.  Set up
> only_simple_p and max_distance.
> (run_tree_predictive_commoning): Add an early_p parameter.
> Update call to tree_predictive_commoning.
> (pass_data_early_predcom): New descriptor.
> (pass_early_predcom): New class.
> (pass_data_predcom::execute): Update call to
> run_tree_predictive_commoning.
> (make_pass_early_predcom): New function.
>
> gcc/testsuite/
> * gnat.dg/vect18.adb: Turn off predictive commoning.
>
> Index: gcc/passes.def
> ===
> --- gcc/passes.def  2017-06-22 12:22:55.989380389 +0100
> +++ gcc/passes.def  2017-07-03 09:17:28.626495661 +0100
> @@ -290,6 +290,7 @@ along with GCC; see the file COPYING3.
>   NEXT_PASS (pass_parallelize_loops, false /* oacc_kernels_p */);
>   NEXT_PASS (pass_expand_omp_ssa);
>   NEXT_PASS (pass_ch_vect);
> + 

[C++ PATCH] move ctors and assign (again)

2017-07-18 Thread Nathan Sidwell
In addition to the user-declared checkers, we also have a couple of 'do 
we have at all' predicates.  Used exactly once when completing a struct 
and determining the state of the appropriate LAZY flags.


Merged with the attached patch committed to trunk.  Now the two 
functions are stunningly similar a further merging may be in the near 
future ...


nathan
--
Nathan Sidwell
2017-07-18  Nathan Sidwell  

	* class.c (classtype_has_move_assign_or_move_ctor): Declare.
	(add_implicitly_declared_members): Use it.
	(type_has_move_constructor, type_has_move_assign): Merge into ...
	(classtype_has_move_assign_or_move_ctor): ... this new function.
	* cp-tree.h (type_has_move_constructor, type_has_move_assign): Delete.

Index: class.c
===
--- class.c	(revision 250281)
+++ class.c	(working copy)
@@ -150,6 +150,7 @@ static void build_base_fields (record_la
 static void check_methods (tree);
 static void remove_zero_width_bit_fields (tree);
 static bool accessible_nvdtor_p (tree);
+static bool classtype_has_move_assign_or_move_ctor (tree);
 
 /* Used by find_flexarrays and related functions.  */
 struct flexmems_t;
@@ -3384,7 +3385,7 @@ add_implicitly_declared_members (tree t,
   bool move_ok = false;
   if (cxx_dialect >= cxx11 && CLASSTYPE_LAZY_DESTRUCTOR (t)
   && !TYPE_HAS_COPY_CTOR (t) && !TYPE_HAS_COPY_ASSIGN (t)
-  && !type_has_move_constructor (t) && !type_has_move_assign (t))
+  && !classtype_has_move_assign_or_move_ctor (t))
 move_ok = true;
 
   /* [class.ctor]
@@ -5456,38 +5457,19 @@ type_has_virtual_destructor (tree type)
   return (dtor && DECL_VIRTUAL_P (dtor));
 }
 
-/* Returns true iff class T has a move constructor.  */
+/* Returns true iff class T has move assignment or move constructor.  */
 
-bool
-type_has_move_constructor (tree t)
+static bool
+classtype_has_move_assign_or_move_ctor (tree t)
 {
-  if (CLASSTYPE_LAZY_MOVE_CTOR (t))
-{
-  gcc_assert (COMPLETE_TYPE_P (t));
-  lazily_declare_fn (sfk_move_constructor, t);
-}
+  gcc_assert (!CLASSTYPE_LAZY_MOVE_CTOR (t)
+	  && !CLASSTYPE_LAZY_MOVE_ASSIGN (t));
 
-  if (!CLASSTYPE_METHOD_VEC (t))
-return false;
-
-  for (ovl_iterator iter (CLASSTYPE_CONSTRUCTORS (t)); iter; ++iter)
+  for (ovl_iterator iter (lookup_fnfields_slot_nolazy
+			  (t, ctor_identifier)); iter; ++iter)
 if (move_fn_p (*iter))
   return true;
 
-  return false;
-}
-
-/* Returns true iff class T has a move assignment operator.  */
-
-bool
-type_has_move_assign (tree t)
-{
-  if (CLASSTYPE_LAZY_MOVE_ASSIGN (t))
-{
-  gcc_assert (COMPLETE_TYPE_P (t));
-  lazily_declare_fn (sfk_move_assignment, t);
-}
-
   for (ovl_iterator iter (lookup_fnfields_slot_nolazy
 			  (t, cp_assignment_operator_id (NOP_EXPR)));
iter; ++iter)
Index: cp-tree.h
===
--- cp-tree.h	(revision 250280)
+++ cp-tree.h	(working copy)
@@ -6023,8 +6023,6 @@ extern tree default_init_uninitialized_p
 extern bool trivial_default_constructor_is_constexpr (tree);
 extern bool type_has_constexpr_default_constructor (tree);
 extern bool type_has_virtual_destructor		(tree);
-extern bool type_has_move_constructor		(tree);
-extern bool type_has_move_assign		(tree);
 extern bool classtype_has_user_move_assign_or_move_ctor_p (tree);
 extern bool type_build_ctor_call		(tree);
 extern bool type_build_dtor_call		(tree);


backport nvptx patch to gcc-7-branch

2017-07-18 Thread Tom de Vries
[ was: Re: [nvptx, committed, PR81069] Insert diverging jump alap in 
nvptx_single ]


On 07/17/2017 10:41 AM, Tom de Vries wrote:

Hi,

Consider nvptx_single:
...
/* Single neutering according to MASK.  FROM is the incoming block and
TO is the outgoing block.  These may be the same block. Insert at
start of FROM:

  if (tid.) goto end.

and insert before ending branch of TO (if there is such an insn):

  end:
  
  

We currently only use differnt FROM and TO when skipping an entire
loop.  We could do more if we detected superblocks.  */

static void
nvptx_single (unsigned mask, basic_block from, basic_block to)
...

When compiling libgomp.oacc-fortran/nested-function-1.f90 at -O1, we 
observed the following pattern:

...
:
  goto bb3;

: (with single predecessor)
  
  
...

which was translated by nvptx_single into:
...

  if (tid.) goto end.
  goto bb3;

:
  
  end:
  
  
...

There is no benefit to be gained from doing the goto bb3 in neutered 
mode, and there is no need to, so we might as well insert the neutering 
branch as late as possible:

...

  goto bb3;

:
  if (tid.) goto end.
  
  end:
  
  
...

This patch implements inserting the neutering branch as late as possible.

[ As it happens, the actual code for 
libgomp.oacc-fortran/nested-function-1.f90 at -O1 was more complicated: 
there were other bbs inbetween bb2 and bb3. While this doesn't change 
anything from a control flow graph point of view, it did trigger a bug 
in the ptx JIT compiler where it inserts the synchronization point for 
the diverging branch later than the immediate post-dominator point at 
the end label. Consequently, the condition broadcast was executed in 
divergent mode (which is known to give undefined results), resulting in 
a hang.
This patch also works around this ptx JIT compiler bug, for this 
test-case. ]


Build and tested on x86_64 with nvptx accelerator.

Committed.


Jakub,

I'd like to backport this nvptx patch to the gcc-7-branch.

The patch doesn't trivially fit into the category of regression or 
documentation fix.


Without this patch, when building an nvptx offloading compiler and 
running the libgomp testsuite for the gcc-7-branch, the GPU hangs, and 
I've had a report from a colleague who experienced system crashes 
because of it.


However, in principle gcc is not doing anything wrong: the generated 
code is according to the ptx spec. It's just that the patch makes it 
less likely to run into a ptx JIT bug.


Then again, it's an nvptx patch, neither a primary nor secondary target.

I'll commit the backport some time this week, unless there are objections.

Thanks,
- Tom


0001-Insert-diverging-jump-alap-in-nvptx_single.patch


Insert diverging jump alap in nvptx_single

2017-07-17  Tom de Vries  

PR target/81069
* config/nvptx/nvptx.c (nvptx_single): Insert diverging branch as late
as possible.

---
  gcc/config/nvptx/nvptx.c | 22 +++---
  1 file changed, 19 insertions(+), 3 deletions(-)

diff --git a/gcc/config/nvptx/nvptx.c b/gcc/config/nvptx/nvptx.c
index daeec27..cb11686 100644
--- a/gcc/config/nvptx/nvptx.c
+++ b/gcc/config/nvptx/nvptx.c
@@ -3866,9 +3866,25 @@ nvptx_single (unsigned mask, basic_block from, 
basic_block to)
rtx_insn *tail = BB_END (to);
unsigned skip_mask = mask;
  
-  /* Find first insn of from block */

-  while (head != BB_END (from) && !INSN_P (head))
-head = NEXT_INSN (head);
+  while (true)
+{
+  /* Find first insn of from block.  */
+  while (head != BB_END (from) && !INSN_P (head))
+   head = NEXT_INSN (head);
+
+  if (from == to)
+   break;
+
+  if (!(JUMP_P (head) && single_succ_p (from)))
+   break;
+
+  basic_block jump_target = single_succ (from);
+  if (!single_pred_p (jump_target))
+   break;
+
+  from = jump_target;
+  head = BB_HEAD (from);
+}
  
/* Find last insn of to block */

rtx_insn *limit = from == to ? head : BB_HEAD (to);





Re: [PATCH] Fix pr80044, -static and -pie insanity, and pr81170

2017-07-18 Thread H.J. Lu
On Mon, Jul 17, 2017 at 9:02 PM, Alan Modra  wrote:
> On Mon, Jul 17, 2017 at 06:01:47AM -0700, H.J. Lu wrote:
>> On Mon, Jul 17, 2017 at 5:33 AM, Alan Modra  wrote:
>> > On Sat, Jul 15, 2017 at 06:32:40AM -0700, H.J. Lu wrote:
>> >> On Thu, Jun 22, 2017 at 8:28 AM, Alan Modra  wrote:
>> >> > PR80044 notes that -static and -pie together behave differently when
>> >> > gcc is configured with --enable-default-pie as compared to configuring
>> >> > without (or --disable-default-pie).  This patch removes that
>> >> > difference.  In both cases you now will have -static completely
>> >> > overriding -pie.
>> >> >
>> >> > Fixing this wasn't quite as simple as you'd expect, due to poor
>> >> > separation of functionality.  PIE_SPEC didn't just mean that -pie was
>> >> > on explicitly or by default, but also -r and -shared were *not* on.
>> >> > Fortunately the three files touched by this patch are the only places
>> >> > PIE_SPEC and NO_PIE_SPEC are used, so it isn't too hard to see that
>> >> > the reason PIE_SPEC and NO_PIE_SPEC are not inverses is the use of
>> >> > PIE_SPEC in LINK_PIE_SPEC.  So, move the inelegant symmetry breaking
>> >> > addition, to LINK_PIE_SPEC where it belongs.  Doing that showed
>> >> > another problem in gnu-user.h, with PIE_SPEC and NO_PIE_SPEC selection
>> >> > of crtbegin*.o not properly hooked into a chain of if .. elseif ..
>> >> > conditions, which required both PIE_SPEC and NO_PIE_SPEC to exclude
>> >> > -static and -shared.  Fixing that particular problem finally allows
>> >> > PIE_SPEC to serve just one purpose, and NO_PIE_SPEC to disappear.
>> >> >
>> >> > Bootstrapped and regression tested powerpc64le-linux c,c++.  No
>> >> > regressions and a bunch of --enable-default-pie failures squashed.
>> >> > OK mainline and active branches?
>> >> >
>> >> > Incidentally, there is a fairly strong case to be made for adding
>> >> > -static to the -shared, -pie, -no-pie chain of RejectNegative's in
>> >> > common.opt.  Since git 0d6378a9e (svn r48039) 2001-11-15, -static has
>> >> > done more than just the traditional "prevent linking with dynamic
>> >> > libraries", as -static selects crtbeginT.o rather than crtbegin.o
>> >> > on GNU systems.  Realizing this is what led me to close pr80044, which
>> >> > I'd opened with the aim of making -pie -static work together (with the
>> >> > traditional meaning of -static).  I don't that is worth doing, but
>> >> > mention pr80044 in the changelog due to fixing the insane output
>> >> > produced by -pie -static with --disable-default-pie.
>> >> >
>> >>
>> >> On x86-64, without --enable-default-pie, "-static -pie" and "-pie -static"
>> >> never worked since both -static and -pie are passed to linker, which
>> >> uses libc.a to build PIE.
>> >
>> > Yes, it's broken.
>>
>> This behavior may be useful for static PIE when libc.a is compiled with
>> -fPIE.
>
> Building a PIE from static archives using -static -pie or -pie -static
> right now is broken, even if the archives are compiled -fpie/PIE.
> I've looked into fixing it, and decided it wasn't worth the effort.
> There are multiple problems.  See
> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=80044#c1
>
> One reason why I believe it isn't worth fixing is that the meaning of
> -static has changed over the years, from "link using static archives"
> to "produce a static executable", and most certainly the meaning of
> -static and -pie together is not clear.  I'll cite gold behaviour as
> evidence: -static with -pie results in an error from gold.  See
> https://sourceware.org/ml/binutils/2012-02/msg00119.html and following
> discussion.
>
>> >>  With --enable-default-pie, -static and -pie
>> >> override each other.
>> >
>> > No they don't.  -static overrides -pie.
>> >
>> >>  What does your patch do on x86-64?  Make
>> >> with and without --enable-default-pie behave the same?
>> >
>> > Yes, as I said in my original post first paragraph.
>> >
>> >>  Does it
>> >> mean that both fail to create executable?
>> >
>> > I try to leave that sort of patch to those better qualified.
>> > Bootstrap and regression testing on x86_64-linux both
>> > --enable-default-pie and --disable-default-pie was complete June 23.
>> >
>>
>> What is the new behavior?  The old  --disable-default-pie or old
>> --enable-default-pie?
>
> You are asking questions to which the answer is given in the very
> first paragraph posted in this thread, if you knew the current
> --enable-default-pie behaviour.  -static overrides -pie.  ie. current
> --enable-default-pie behaviour is unchanged.
>
>> Will static PIE be supported if libc is
>> compiled with -fPIE by default?
>
> I covered this above, if you're asking about -static and -pie
> together.  Unsupported both before and after my patch.  You *can* link
> a working PIE from -fPIE archives, if that is what you want, with
> "-pie -Wl,-Bstatic", both before and after my patch.

I am working on compiling libc.a in glibc with -fPIE and 

Re: [PATCH] PR libstdc++/81064 fix versioned namespace

2017-07-18 Thread Ville Voutilainen
On 18 July 2017 at 16:31, Jonathan Wakely  wrote:
> This is quite a huge change, so I'd like to wait and see if anyone
> else has any opinion on it.
>
> Personally I think it's necessary (assuming I understand the PR
> correctly) and so if nobody objects I think we should go with this
> change for GCC 8. Let's give it a few days for comments (and I'll
> finish going through the patch carefully).


Looks like the right approach to me. I haven't looked at the patch in
detail, but the main gist
of it is something that we should certainly do for GCC 8. The Elf says "aye".


Re: [PATCH] Fix pr80044, -static and -pie insanity, and pr81170

2017-07-18 Thread Alan Modra
On Tue, Jul 18, 2017 at 05:36:49AM -0700, H.J. Lu wrote:
> I am working on compiling libc.a in glibc with -fPIE and building static
> PIE.  This creates static executable with PIE:
> 
> gcc -nostdlib -nostartfiles -static -o
> /export/build/gnu/glibc-static-pie/build-x86_64-linux/elf/sln -pie
> -Wl,--no-dynamic-linker
> /export/build/gnu/glibc-static-pie/build-x86_64-linux/csu/crt1.o
> /export/build/gnu/glibc-static-pie/build-x86_64-linux/csu/crti.o `gcc
> --print-file-name=crtbeginS.o`
> /export/build/gnu/glibc-static-pie/build-x86_64-linux/elf/sln.o
> /export/build/gnu/glibc-static-pie/build-x86_64-linux/elf/static-stubs.o
>  -Wl,--start-group
> /export/build/gnu/glibc-static-pie/build-x86_64-linux/libc.a -lgcc
> -Wl,--end-group `gcc  --print-file-name=crtendS.o`
> /export/build/gnu/glibc-static-pie/build-x86_64-linux/csu/crtn.o
> 
> Currently, it only works with gcc configured with -disable-default-pie.
> With --enable-default-pie, I got
> 
> [hjl@gnu-tools-1 build-x86_64-linux]$ /usr/gcc-7.1.1-x32-pie/bin/gcc
> -nostdlib -nostartfiles -static -o /tmp/sln -pie
> -Wl,--no-dynamic-linker
> /export/build/gnu/glibc-static-pie/build-x86_64-linux/csu/crt1.o
> /export/build/gnu/glibc-static-pie/build-x86_64-linux/csu/crti.o `gcc
> --print-file-name=crtbeginS.o`
> /export/build/gnu/glibc-static-pie/build-x86_64-linux/elf/sln.o
> /export/build/gnu/glibc-static-pie/build-x86_64-linux/elf/static-stubs.o
>  -Wl,--start-group
> /export/build/gnu/glibc-static-pie/build-x86_64-linux/libc.a -lgcc
> -Wl,--end-group `gcc  --print-file-name=crtendS.o`
> /export/build/gnu/glibc-static-pie/build-x86_64-linux/csu/crtn.o
> /export/build/gnu/glibc-static-pie/build-x86_64-linux/libc.a(dl-support.o):
> In function `elf_machine_load_address':
> /export/gnu/import/git/sources/glibc/elf/../sysdeps/x86_64/dl-machine.h:59:
> undefined reference to `_DYNAMIC'
> /export/build/gnu/glibc-static-pie/build-x86_64-linux/libc.a(dl-support.o):
> In function `elf_get_dynamic_info':
> /export/gnu/import/git/sources/glibc/elf/get-dynamic-info.h:48:
> undefined reference to `_DYNAMIC'
> collect2: error: ld returned 1 exit status
> [hjl@gnu-tools-1 build-x86_64-linux]$
> 
> Will your change fix it?

You have got to be joking!  How should I know whether something will
work with some hare-brained scheme of yours?  One that seemingly
requires you to specify startup files by hand!

-- 
Alan Modra
Australia Development Lab, IBM


[PATCH] Implement one optimization from build_range_check in match.pd (PR tree-optimization/81346)

2017-07-18 Thread Jakub Jelinek
Hi!

The following patch implements the:
  /* Optimize (c>=1) && (c<=127) into (signed char)c > 0.  */
  if (integer_onep (low) && TREE_CODE (high) == INTEGER_CST)
{
  int prec = TYPE_PRECISION (etype);

  if (wi::mask (prec - 1, false, prec) == high)
{
  if (TYPE_UNSIGNED (etype))
{
  tree signed_etype = signed_type_for (etype);
  if (TYPE_PRECISION (signed_etype) != TYPE_PRECISION (etype))
etype
  = build_nonstandard_integer_type (TYPE_PRECISION (etype), 0);
  else
etype = signed_etype;
  exp = fold_convert_loc (loc, etype, exp);
}
  return fold_build2_loc (loc, GT_EXPR, type, exp,
  build_int_cst (etype, 0));
}
}
optimization from build_range_check in match.pd if we already have the
less efficient x-1U <= 127U-1U.  If somebody writes the range test
as x>=1 && x <= 127, then it is already optimized well, but if somebody
writes it as x-1U <= 126U, then it is not without this patch.

Bootstrapped/regtested on x86_64-linux and i686-linux.

In the PR Marc noted that the optimization might be useful even for
constants other than 1, by transforming
x+C1 <= C2 if unsigned and C2-C1==INT_MAX into (int)x > (int)(-1-C1).
Shall I do that immediately, or incrementally?  Shall we also change
build_range_check to do that (i.e. drop the integer_onep above and use
right etype constant?  Also, I think the build_nonstandard_integer_type
above is unnecessary, I think signed_type_for does already call that.

2017-07-18  Jakub Jelinek  

PR tree-optimization/81346
* match.pd: Optimize (X - 1U) <= INT_MAX-1U into (int) X > 0.

* gcc.dg/tree-ssa/pr81346-5.c: New test.

--- gcc/match.pd.jj 2017-07-17 16:25:20.0 +0200
+++ gcc/match.pd2017-07-18 12:32:52.896924558 +0200
@@ -1125,6 +1125,19 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
&& wi::neg_p (@1, TYPE_SIGN (TREE_TYPE (@1
 (cmp @2 @0))
 
+/* (X - 1U) <= INT_MAX-1U into (int) X > 0.  */
+(for cmp (le gt)
+ icmp (gt le)
+ (simplify
+  (cmp (plus @0 integer_minus_onep@1) INTEGER_CST@2)
+   (if (INTEGRAL_TYPE_P (TREE_TYPE (@0))
+   && TYPE_UNSIGNED (TREE_TYPE (@0))
+   && TYPE_PRECISION (TREE_TYPE (@0)) > 1
+   && wi::eq_p (@2, wi::max_value (TYPE_PRECISION (TREE_TYPE (@0)),
+   SIGNED) - 1))
+(with { tree stype = signed_type_for (TREE_TYPE (@0)); }
+ (icmp (convert:stype @0) { build_int_cst (stype, 0); })
+
 /* X / 4 < Y / 4 iff X < Y when the division is known to be exact.  */
 (for cmp (simple_comparison)
  (simplify
--- gcc/testsuite/gcc.dg/tree-ssa/pr81346-5.c.jj2017-07-18 
12:35:27.406063800 +0200
+++ gcc/testsuite/gcc.dg/tree-ssa/pr81346-5.c   2017-07-18 12:37:04.460894965 
+0200
@@ -0,0 +1,17 @@
+/* PR tree-optimization/81346 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+/* { dg-final { scan-tree-dump-times "\\(signed int\\) x" 10 "optimized" } } */
+/* { dg-final { scan-tree-dump-times " <= 0;" 5 "optimized" } } */
+/* { dg-final { scan-tree-dump-times " > 0;" 5 "optimized" } } */
+
+int f1 (unsigned x) { return x - 1 <= __INT_MAX__ - 1; }
+int f2 (unsigned x) { unsigned a = 1, b = __INT_MAX__ - 1; return x - a <= b; }
+int f3 (unsigned x) { return x - 1 < __INT_MAX__; }
+int f4 (unsigned x) { unsigned a = 1, b = __INT_MAX__; return x - a < b; }
+int f5 (unsigned x) { return x >= 1 && x <= __INT_MAX__; }
+int f6 (unsigned x) { return x - 1 > __INT_MAX__ - 1; }
+int f7 (unsigned x) { unsigned a = 1, b = __INT_MAX__ - 1; return x - a > b; }
+int f8 (unsigned x) { return x - 1 >= __INT_MAX__; }
+int f9 (unsigned x) { unsigned a = 1, b = __INT_MAX__; return x - a >= b; }
+int f10 (unsigned x) { return x < 1 || x > __INT_MAX__; }

Jakub


[PATCH] Fix PR81410

2017-07-18 Thread Richard Biener

The following fixes SLP loads with gaps in the case of no permutation.

Bootstrapped and tested on x86_64-unknown-linux-gnu, applied to trunk
sofar.

Richard.

2017-06-18  Richard Biener  

PR tree-optimization/81410
* tree-vect-stmts.c (vectorizable_load): Properly adjust for
the gap in the ! slp_perm SLP case after each group.

* gcc.dg/vect/pr81410.c: New testcase.

Index: gcc/tree-vect-stmts.c
===
*** gcc/tree-vect-stmts.c   (revision 250296)
--- gcc/tree-vect-stmts.c   (working copy)
*** vectorizable_load (gimple *stmt, gimple_
*** 7118,7123 
--- 7118,7124 
  {
first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
+   int group_gap = GROUP_GAP (vinfo_for_stmt (first_stmt));
/* For SLP vectorization we directly vectorize a subchain
   without permutation.  */
if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
*** vectorizable_load (gimple *stmt, gimple_
*** 7153,7162 
 not only the number of vector stmts the permutation result
 fits in.  */
  if (slp_perm)
!   vec_num = (group_size * vf + nunits - 1) / nunits;
  else
!   vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
! group_gap_adj = vf * group_size - nunits * vec_num;
}
else
vec_num = group_size;
--- 7154,7168 
 not only the number of vector stmts the permutation result
 fits in.  */
  if (slp_perm)
!   {
! vec_num = (group_size * vf + nunits - 1) / nunits;
! group_gap_adj = vf * group_size - nunits * vec_num;
!   }
  else
!   {
! vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
! group_gap_adj = group_gap;
!   }
}
else
vec_num = group_size;
*** vectorizable_load (gimple *stmt, gimple_
*** 7316,7321 
--- 7322,7328 
  aggr_type = vectype;
  
prev_stmt_info = NULL;
+   int group_elt = 0;
for (j = 0; j < ncopies; j++)
  {
/* 1. Create the vector or array pointer update chain.  */
*** vectorizable_load (gimple *stmt, gimple_
*** 7603,7612 
  /* Store vector loads in the corresponding SLP_NODE.  */
  if (slp && !slp_perm)
SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
}
  /* Bump the vector pointer to account for a gap or for excess
 elements loaded for a permuted SLP load.  */
! if (group_gap_adj != 0)
{
  bool ovf;
  tree bump
--- 7610,7636 
  /* Store vector loads in the corresponding SLP_NODE.  */
  if (slp && !slp_perm)
SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
+ 
+ /* With SLP permutation we load the gaps as well, without
+we need to skip the gaps after we manage to fully load
+all elements.  group_gap_adj is GROUP_SIZE here.  */
+ group_elt += nunits;
+ if (group_gap_adj != 0 && ! slp_perm
+ && group_elt == group_size - group_gap_adj)
+   {
+ bool ovf;
+ tree bump
+   = wide_int_to_tree (sizetype,
+   wi::smul (TYPE_SIZE_UNIT (elem_type),
+ group_gap_adj, ));
+ dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
+stmt, bump);
+ group_elt = 0;
+   }
}
  /* Bump the vector pointer to account for a gap or for excess
 elements loaded for a permuted SLP load.  */
! if (group_gap_adj != 0 && slp_perm)
{
  bool ovf;
  tree bump
Index: gcc/testsuite/gcc.dg/vect/pr81410.c
===
*** gcc/testsuite/gcc.dg/vect/pr81410.c (nonexistent)
--- gcc/testsuite/gcc.dg/vect/pr81410.c (working copy)
***
*** 0 
--- 1,38 
+ /* { dg-do run } */
+ /* { dg-require-effective-target vect_long_long } */
+ 
+ #include "tree-vect.h"
+ 
+ typedef long long uint64_t;
+ uint64_t x[24];
+ uint64_t y[16];
+ uint64_t z[8];
+ 
+ void __attribute__((noinline)) foo()
+ {
+   for (int i = 0; i < 8; ++i)
+ {
+   y[2*i] = x[3*i];
+   y[2*i + 1] = x[3*i + 1];
+   z[i] = 1;
+ }
+ }
+ 
+ int main()
+ {
+   check_vect ();
+ 
+   for (int i = 0; i < 24; ++i)
+ {
+   x[i] = i;
+   __asm__ volatile ("" : : : "memory");
+ }
+   foo ();
+   for (int i = 0; i < 8; ++i)
+ if (y[2*i] != 3*i || y[2*i+1] != 3*i + 1)
+   __builtin_abort ();
+ 
+   return 0;
+ 

RE: [GCC][PATCH][mid-end] Optimize x * copysign (1.0, y) [Patch (1/2)]

2017-07-18 Thread Richard Biener
On Tue, 18 Jul 2017, Tamar Christina wrote:

> > 
> > Why's this now done during RTL expansion rather than during late GIMPLE,
> > using match.pd and an internal function for xorsign?
> > 
> 
> Mainly because of Andrew's email on the 10th which stated:
> 
> > But you should get the general idea.  I would like to see more of 
> > these special expand patterns really.
> 
> And there were no objections so I figured this was also an acceptable 
> solution.

I see.  But the implementation challenge is that this interacts badly
with SSA coalescing done before this and thus should really happen
on GIMPLE before that.

And yes, I also like to see more of this, it's basically doing some
instruction selection on (late) GIMPLE.  Ideally we'd be able to
generate an expand.pd match.pd variant from the machine
description (named) define_insns, creating IFNs that we know how
to expand.

Think of a combine pass combining GIMPLE stmts to (recognized)
RTL insn (sequences).  Until RTL expansion the RTL insn (sequence)
would be represented by an internal function call (or alternatively
for multi-output cases an GIMPLE ASM with enumerated asm text).

Richard.

> > Thanks,
> > Richard.
> > 
> > >
> > > gcc/
> > > 2017-07-10  Tamar Christina  
> > >   Andrew Pinski 
> > >
> > >   PR middle-end/19706
> > >   * expr.c (is_copysign_call_with_1): New.
> > >   (maybe_expand_mult_copysign): Likewise.
> > >   (expand_expr_real_2): Expand copysign.
> > >   * optabs.def (xorsign_optab): New.
> > >
> > > 
> > > From: Andrew Pinski 
> > > Sent: Monday, July 10, 2017 12:21:29 AM
> > > To: Tamar Christina
> > > Cc: GCC Patches; nd; l...@redhat.com; i...@airs.com; rguent...@suse.de
> > > Subject: Re: [GCC][PATCH][mid-end] Optimize x * copysign (1.0, y)
> > > [Patch (1/2)]
> > >
> > > On Mon, Jun 12, 2017 at 12:56 AM, Tamar Christina
> > >  wrote:
> > > > Hi All,
> > > >
> > > > this patch implements a optimization rewriting
> > > >
> > > > x * copysign (1.0, y) and
> > > > x * copysign (-1.0, y)
> > > >
> > > > to:
> > > >
> > > > x ^ (y & (1 << sign_bit_position))
> > > >
> > > > This is done by creating a special builtin during matching and
> > > > generate the appropriate instructions during expand. This new builtin is
> > called XORSIGN.
> > > >
> > > > The expansion of xorsign depends on if the backend has an
> > > > appropriate optab available. If this is not the case then we use a
> > > > modified version of the existing copysign which does not take the abs
> > value of the first argument as a fall back.
> > > >
> > > > This patch is a revival of a previous patch
> > > > https://gcc.gnu.org/ml/gcc-patches/2015-10/msg00069.html
> > > >
> > > > Bootstrapped on both aarch64-none-linux-gnu and x86_64 with no
> > issues.
> > > > Regression done on aarch64-none-linux-gnu and no regressions.
> > >
> > >
> > > Note this is also PR 19706.
> > >
> > > Thanks,
> > > Andrew
> > >
> > > >
> > > > Ok for trunk?
> > > >
> > > > gcc/
> > > > 2017-06-07  Tamar Christina  
> > > >
> > > > * builtins.def (BUILT_IN_XORSIGN, BUILT_IN_XORSIGNF): New.
> > > > (BUILT_IN_XORSIGNL, BUILT_IN_XORSIGN_FLOAT_NX): Likewise.
> > > > * match.pd (mult (COPYSIGN:s real_onep @0) @1): New simplifier.
> > > > (mult (COPYSIGN:s real_mus_onep @0) @1): Likewise.
> > > > (copysigns @0 (negate @1)): Likewise.
> > > > * builtins.c (expand_builtin_copysign): Promoted local to 
> > > > argument.
> > > > (expand_builtin): Added CASE_FLT_FN_FLOATN_NX
> > (BUILT_IN_XORSIGN) and
> > > > CASE_FLT_FN (BUILT_IN_XORSIGN).
> > > > (BUILT_IN_COPYSIGN): Updated function call.
> > > > * optabs.h (expand_copysign): New bool.
> > > > (expand_xorsign): New.
> > > > * optabs.def (xorsign_optab): New.
> > > > * optabs.c (expand_copysign): New parameter.
> > > > * fortran/f95-lang.c (xorsignl, xorsign, xorsignf): New.
> > > > * fortran/mathbuiltins.def (XORSIGN): New.
> > > >
> > > > gcc/testsuite/
> > > > 2017-06-07  Tamar Christina  
> > > >
> > > > * gcc.dg/tree-ssa/xorsign.c: New.
> > > > * gcc.dg/xorsign_exec.c: New.
> > > > * gcc.dg/vec-xorsign_exec.c: New.
> > > > * gcc.dg/tree-ssa/reassoc-39.c (f2, f3): Updated constant to 2.
> > >
> > 
> > --
> > Richard Biener 
> > SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton,
> > HRB 21284 (AG Nuernberg)
> 
> 

-- 
Richard Biener 
SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 
21284 (AG Nuernberg)


Re: Fix ICE in estimate_bb_frequencies

2017-07-18 Thread Markus Trippelsdorf
On 2017.07.18 at 09:54 +0200, Jan Hubicka wrote:
> Hi,
> this patch fixes ICE in estimate_bb_frequencies which triggers because we 
> forget
> to compute probability for blocks whose count is earlier statically 
> determined to be
> 0.
> 
> Bootstrapped/regtested x86_64-linux, will commit it shortly.

It also fixes both testcases from PR81318.

However the following testcase still ICEs:

trippels@gcc2-power8 linux % cat main.i
int a;
extern void fn4();
__attribute__((__cold__)) void fn1();
void fn2() { fn1(); }
void fn3() {
  fn2();
  if (a)
fn4();
}

trippels@gcc2-power8 linux % gcc -O2 -c main.i
during GIMPLE pass: profile_estimate
main.i: In function ‘fn3’:
main.i:9:1: internal compiler error: in to_reg_br_prob_base, at 
profile-count.h:189

-- 
Markus


RE: [GCC][PATCH][mid-end] Optimize x * copysign (1.0, y) [Patch (1/2)]

2017-07-18 Thread Tamar Christina
> > It should be a match.pd rule that uses a match predicate, so expand in
> > gimple-match.c. but don't do this if the target doesn't have the
> > xorsign optab and don't do it if honouring SNAN.
> 
> Note that this will trigger too early (IMHO), so unless you feel like 
> inventing
> new infrastructure I'd put manual pattern matching in tree-ssa-math-opts.c
> pass_optimize_widening_mul where we currently do this kind of "late
> GIMPLE instruction selection".
> 

Alright, I'll do that then, thanks!

> Richard.
> 
> > I'll make the changes then.
> > Thanks,
> > Tamar
> >
> > >
> > > Think of a combine pass combining GIMPLE stmts to (recognized) RTL
> > > insn (sequences).  Until RTL expansion the RTL insn (sequence) would
> > > be represented by an internal function call (or alternatively for
> > > multi-output cases an GIMPLE ASM with enumerated asm text).
> > >
> > > Richard.
> > >
> > > > > Thanks,
> > > > > Richard.
> > > > >
> > > > > >
> > > > > > gcc/
> > > > > > 2017-07-10  Tamar Christina  
> > > > > > Andrew Pinski 
> > > > > >
> > > > > > PR middle-end/19706
> > > > > > * expr.c (is_copysign_call_with_1): New.
> > > > > > (maybe_expand_mult_copysign): Likewise.
> > > > > > (expand_expr_real_2): Expand copysign.
> > > > > > * optabs.def (xorsign_optab): New.
> > > > > >
> > > > > > 
> > > > > > From: Andrew Pinski 
> > > > > > Sent: Monday, July 10, 2017 12:21:29 AM
> > > > > > To: Tamar Christina
> > > > > > Cc: GCC Patches; nd; l...@redhat.com; i...@airs.com;
> > > > > > rguent...@suse.de
> > > > > > Subject: Re: [GCC][PATCH][mid-end] Optimize x * copysign (1.0,
> > > > > > y) [Patch (1/2)]
> > > > > >
> > > > > > On Mon, Jun 12, 2017 at 12:56 AM, Tamar Christina
> > > > > >  wrote:
> > > > > > > Hi All,
> > > > > > >
> > > > > > > this patch implements a optimization rewriting
> > > > > > >
> > > > > > > x * copysign (1.0, y) and
> > > > > > > x * copysign (-1.0, y)
> > > > > > >
> > > > > > > to:
> > > > > > >
> > > > > > > x ^ (y & (1 << sign_bit_position))
> > > > > > >
> > > > > > > This is done by creating a special builtin during matching
> > > > > > > and generate the appropriate instructions during expand.
> > > > > > > This new builtin is
> > > > > called XORSIGN.
> > > > > > >
> > > > > > > The expansion of xorsign depends on if the backend has an
> > > > > > > appropriate optab available. If this is not the case then we
> > > > > > > use a modified version of the existing copysign which does
> > > > > > > not take the abs
> > > > > value of the first argument as a fall back.
> > > > > > >
> > > > > > > This patch is a revival of a previous patch
> > > > > > > https://gcc.gnu.org/ml/gcc-patches/2015-10/msg00069.html
> > > > > > >
> > > > > > > Bootstrapped on both aarch64-none-linux-gnu and x86_64 with
> > > > > > > no
> > > > > issues.
> > > > > > > Regression done on aarch64-none-linux-gnu and no regressions.
> > > > > >
> > > > > >
> > > > > > Note this is also PR 19706.
> > > > > >
> > > > > > Thanks,
> > > > > > Andrew
> > > > > >
> > > > > > >
> > > > > > > Ok for trunk?
> > > > > > >
> > > > > > > gcc/
> > > > > > > 2017-06-07  Tamar Christina  
> > > > > > >
> > > > > > > * builtins.def (BUILT_IN_XORSIGN, BUILT_IN_XORSIGNF):
> New.
> > > > > > > (BUILT_IN_XORSIGNL, BUILT_IN_XORSIGN_FLOAT_NX):
> > > Likewise.
> > > > > > > * match.pd (mult (COPYSIGN:s real_onep @0) @1): New
> > > simplifier.
> > > > > > > (mult (COPYSIGN:s real_mus_onep @0) @1): Likewise.
> > > > > > > (copysigns @0 (negate @1)): Likewise.
> > > > > > > * builtins.c (expand_builtin_copysign): Promoted
> > > > > > > local to
> > > argument.
> > > > > > > (expand_builtin): Added CASE_FLT_FN_FLOATN_NX
> > > > > (BUILT_IN_XORSIGN) and
> > > > > > > CASE_FLT_FN (BUILT_IN_XORSIGN).
> > > > > > > (BUILT_IN_COPYSIGN): Updated function call.
> > > > > > > * optabs.h (expand_copysign): New bool.
> > > > > > > (expand_xorsign): New.
> > > > > > > * optabs.def (xorsign_optab): New.
> > > > > > > * optabs.c (expand_copysign): New parameter.
> > > > > > > * fortran/f95-lang.c (xorsignl, xorsign, xorsignf): New.
> > > > > > > * fortran/mathbuiltins.def (XORSIGN): New.
> > > > > > >
> > > > > > > gcc/testsuite/
> > > > > > > 2017-06-07  Tamar Christina  
> > > > > > >
> > > > > > > * gcc.dg/tree-ssa/xorsign.c: New.
> > > > > > > * gcc.dg/xorsign_exec.c: New.
> > > > > > > * gcc.dg/vec-xorsign_exec.c: New.
> > > > > > > * gcc.dg/tree-ssa/reassoc-39.c (f2, f3): Updated constant 
> > > > > > > to 2.
> > > > > >
> > > > >
> > > > > --
> > > > > Richard Biener  SUSE LINUX GmbH, GF: Felix
> > > > > Imendoerffer, Jane Smithard, Graham Norton, HRB 21284 (AG
> > > 

[patch,avr.applied] Fix PR81473: Don't use INT8_MIN etc.

2017-07-18 Thread Georg-Johann Lay

https://gcc.gnu.org/r250301
https://gcc.gnu.org/r250302

Removed usage of INT8_MIN and friends as they are not available
in older C++ which leads to build failure.

Applied to trunk and v7 branch.

Johann


gcc/
Backport from 2017-07-18 trunk r250301.
PR target/81473
* config/avr/avr.c (avr_optimize_casesi): Don't use
INT8_MIN, INT8_MAX, UINT8_MAX, INT16_MIN, INT16_MAX, UINT16_MAX.

Index: config/avr/avr.c
===
--- config/avr/avr.c	(revision 250258)
+++ config/avr/avr.c	(working copy)
@@ -553,9 +553,9 @@ avr_optimize_casesi (rtx_insn *insns[6],
   HOST_WIDE_INT hig_idx = low_idx + num_idx;
 
   // Maximum ranges of (un)signed QImode resp. HImode.
-  int imin = QImode == mode ? INT8_MIN : INT16_MIN;
-  int imax = QImode == mode ? INT8_MAX : INT16_MAX;
-  unsigned umax = QImode == mode ? UINT8_MAX : UINT16_MAX;
+  unsigned umax = QImode == mode ? 0xff : 0x;
+  int imax = QImode == mode ? 0x7f : 0x7fff;
+  int imin = -imax - 1;
 
   // Testing the case range and whether it fits into the range of the
   // (un)signed mode.  This test should actually always pass because it


[PATCH] [ARC] Check the assembler for dwarf2 support.

2017-07-18 Thread Claudiu Zissulescu
From: claziss 

We miss the assembler testing if it supports gdwarf2 in our configure file.

Ok to apply?
Claudiu

gcc/
2017-06-21  Claudiu Zissulescu  

* configure.ac: Add arc and check if assembler supports gdwarf2.
* configure: Regenerate.
---
 gcc/configure| 6 +++---
 gcc/configure.ac | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/gcc/configure b/gcc/configure
index cc542ac..bc38022 100755
--- a/gcc/configure
+++ b/gcc/configure
@@ -27588,9 +27588,9 @@ esac
 # ??? Once 2.11 is released, probably need to add first known working
 # version to the per-target configury.
 case "$cpu_type" in
-  aarch64 | alpha | arm | avr | bfin | cris | i386 | m32c | m68k | microblaze \
-  | mips | nios2 | pa | riscv | rs6000 | score | sparc | spu | tilegx \
-  | tilepro | visium | xstormy16 | xtensa)
+  aarch64 | alpha | arc | arm | avr | bfin | cris | i386 | m32c | m68k \
+  | microblaze | mips | nios2 | pa | riscv | rs6000 | score | sparc | spu \
+  | tilegx | tilepro | visium | xstormy16 | xtensa)
 insn="nop"
 ;;
   ia64 | s390)
diff --git a/gcc/configure.ac b/gcc/configure.ac
index b54f797..406709f 100644
--- a/gcc/configure.ac
+++ b/gcc/configure.ac
@@ -4788,9 +4788,9 @@ esac
 # ??? Once 2.11 is released, probably need to add first known working
 # version to the per-target configury.
 case "$cpu_type" in
-  aarch64 | alpha | arm | avr | bfin | cris | i386 | m32c | m68k | microblaze \
-  | mips | nios2 | pa | riscv | rs6000 | score | sparc | spu | tilegx \
-  | tilepro | visium | xstormy16 | xtensa)
+  aarch64 | alpha | arc | arm | avr | bfin | cris | i386 | m32c | m68k \
+  | microblaze | mips | nios2 | pa | riscv | rs6000 | score | sparc | spu \
+  | tilegx | tilepro | visium | xstormy16 | xtensa)
 insn="nop"
 ;;
   ia64 | s390)
-- 
1.9.1



New Ukrainian PO file for 'gcc' (version 7.1.0)

2017-07-18 Thread Translation Project Robot
Hello, gentle maintainer.

This is a message from the Translation Project robot.

A revised PO file for textual domain 'gcc' has been submitted
by the Ukrainian team of translators.  The file is available at:

http://translationproject.org/latest/gcc/uk.po

(This file, 'gcc-7.1.0.uk.po', has just now been sent to you in
a separate email.)

All other PO files for your package are available in:

http://translationproject.org/latest/gcc/

Please consider including all of these in your next release, whether
official or a pretest.

Whenever you have a new distribution with a new version number ready,
containing a newer POT file, please send the URL of that distribution
tarball to the address below.  The tarball may be just a pretest or a
snapshot, it does not even have to compile.  It is just used by the
translators when they need some extra translation context.

The following HTML page has been updated:

http://translationproject.org/domain/gcc.html

If any question arises, please contact the translation coordinator.

Thank you for all your work,

The Translation Project robot, in the
name of your translation coordinator.




RE: [GCC][PATCH][mid-end] Optimize x * copysign (1.0, y) [Patch (1/2)]

2017-07-18 Thread Tamar Christina

> I see.  But the implementation challenge is that this interacts badly with SSA
> coalescing done before this and thus should really happen on GIMPLE before
> that.
> 
> And yes, I also like to see more of this, it's basically doing some 
> instruction
> selection on (late) GIMPLE.  Ideally we'd be able to generate an expand.pd
> match.pd variant from the machine description (named) define_insns,
> creating IFNs that we know how to expand.

Fair enough, Just to check I understood correctly.

It should be a match.pd rule that uses a match predicate, so expand in 
gimple-match.c.
but don't do this if the target doesn't have the xorsign optab and don't do it 
if honouring SNAN.

I'll make the changes then.
Thanks,
Tamar

> 
> Think of a combine pass combining GIMPLE stmts to (recognized) RTL insn
> (sequences).  Until RTL expansion the RTL insn (sequence) would be
> represented by an internal function call (or alternatively for multi-output
> cases an GIMPLE ASM with enumerated asm text).
> 
> Richard.
> 
> > > Thanks,
> > > Richard.
> > >
> > > >
> > > > gcc/
> > > > 2017-07-10  Tamar Christina  
> > > > Andrew Pinski 
> > > >
> > > > PR middle-end/19706
> > > > * expr.c (is_copysign_call_with_1): New.
> > > > (maybe_expand_mult_copysign): Likewise.
> > > > (expand_expr_real_2): Expand copysign.
> > > > * optabs.def (xorsign_optab): New.
> > > >
> > > > 
> > > > From: Andrew Pinski 
> > > > Sent: Monday, July 10, 2017 12:21:29 AM
> > > > To: Tamar Christina
> > > > Cc: GCC Patches; nd; l...@redhat.com; i...@airs.com;
> > > > rguent...@suse.de
> > > > Subject: Re: [GCC][PATCH][mid-end] Optimize x * copysign (1.0, y)
> > > > [Patch (1/2)]
> > > >
> > > > On Mon, Jun 12, 2017 at 12:56 AM, Tamar Christina
> > > >  wrote:
> > > > > Hi All,
> > > > >
> > > > > this patch implements a optimization rewriting
> > > > >
> > > > > x * copysign (1.0, y) and
> > > > > x * copysign (-1.0, y)
> > > > >
> > > > > to:
> > > > >
> > > > > x ^ (y & (1 << sign_bit_position))
> > > > >
> > > > > This is done by creating a special builtin during matching and
> > > > > generate the appropriate instructions during expand. This new
> > > > > builtin is
> > > called XORSIGN.
> > > > >
> > > > > The expansion of xorsign depends on if the backend has an
> > > > > appropriate optab available. If this is not the case then we use
> > > > > a modified version of the existing copysign which does not take
> > > > > the abs
> > > value of the first argument as a fall back.
> > > > >
> > > > > This patch is a revival of a previous patch
> > > > > https://gcc.gnu.org/ml/gcc-patches/2015-10/msg00069.html
> > > > >
> > > > > Bootstrapped on both aarch64-none-linux-gnu and x86_64 with no
> > > issues.
> > > > > Regression done on aarch64-none-linux-gnu and no regressions.
> > > >
> > > >
> > > > Note this is also PR 19706.
> > > >
> > > > Thanks,
> > > > Andrew
> > > >
> > > > >
> > > > > Ok for trunk?
> > > > >
> > > > > gcc/
> > > > > 2017-06-07  Tamar Christina  
> > > > >
> > > > > * builtins.def (BUILT_IN_XORSIGN, BUILT_IN_XORSIGNF): New.
> > > > > (BUILT_IN_XORSIGNL, BUILT_IN_XORSIGN_FLOAT_NX):
> Likewise.
> > > > > * match.pd (mult (COPYSIGN:s real_onep @0) @1): New
> simplifier.
> > > > > (mult (COPYSIGN:s real_mus_onep @0) @1): Likewise.
> > > > > (copysigns @0 (negate @1)): Likewise.
> > > > > * builtins.c (expand_builtin_copysign): Promoted local to
> argument.
> > > > > (expand_builtin): Added CASE_FLT_FN_FLOATN_NX
> > > (BUILT_IN_XORSIGN) and
> > > > > CASE_FLT_FN (BUILT_IN_XORSIGN).
> > > > > (BUILT_IN_COPYSIGN): Updated function call.
> > > > > * optabs.h (expand_copysign): New bool.
> > > > > (expand_xorsign): New.
> > > > > * optabs.def (xorsign_optab): New.
> > > > > * optabs.c (expand_copysign): New parameter.
> > > > > * fortran/f95-lang.c (xorsignl, xorsign, xorsignf): New.
> > > > > * fortran/mathbuiltins.def (XORSIGN): New.
> > > > >
> > > > > gcc/testsuite/
> > > > > 2017-06-07  Tamar Christina  
> > > > >
> > > > > * gcc.dg/tree-ssa/xorsign.c: New.
> > > > > * gcc.dg/xorsign_exec.c: New.
> > > > > * gcc.dg/vec-xorsign_exec.c: New.
> > > > > * gcc.dg/tree-ssa/reassoc-39.c (f2, f3): Updated constant to 
> > > > > 2.
> > > >
> > >
> > > --
> > > Richard Biener 
> > > SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham
> > > Norton, HRB 21284 (AG Nuernberg)
> >
> >
> 
> --
> Richard Biener 
> SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton,
> HRB 21284 (AG Nuernberg)


RE: [GCC][PATCH][mid-end] Optimize x * copysign (1.0, y) [Patch (1/2)]

2017-07-18 Thread Richard Biener
On Tue, 18 Jul 2017, Tamar Christina wrote:

> 
> > I see.  But the implementation challenge is that this interacts badly with 
> > SSA
> > coalescing done before this and thus should really happen on GIMPLE before
> > that.
> > 
> > And yes, I also like to see more of this, it's basically doing some 
> > instruction
> > selection on (late) GIMPLE.  Ideally we'd be able to generate an expand.pd
> > match.pd variant from the machine description (named) define_insns,
> > creating IFNs that we know how to expand.
> 
> Fair enough, Just to check I understood correctly.
> 
> It should be a match.pd rule that uses a match predicate, so expand in 
> gimple-match.c. but don't do this if the target doesn't have the xorsign 
> optab and don't do it if honouring SNAN.

Note that this will trigger too early (IMHO), so unless you feel
like inventing new infrastructure I'd put manual pattern matching in
tree-ssa-math-opts.c pass_optimize_widening_mul where we currently
do this kind of "late GIMPLE instruction selection".

Richard.

> I'll make the changes then.
> Thanks,
> Tamar
> 
> > 
> > Think of a combine pass combining GIMPLE stmts to (recognized) RTL insn
> > (sequences).  Until RTL expansion the RTL insn (sequence) would be
> > represented by an internal function call (or alternatively for multi-output
> > cases an GIMPLE ASM with enumerated asm text).
> > 
> > Richard.
> > 
> > > > Thanks,
> > > > Richard.
> > > >
> > > > >
> > > > > gcc/
> > > > > 2017-07-10  Tamar Christina  
> > > > >   Andrew Pinski 
> > > > >
> > > > >   PR middle-end/19706
> > > > >   * expr.c (is_copysign_call_with_1): New.
> > > > >   (maybe_expand_mult_copysign): Likewise.
> > > > >   (expand_expr_real_2): Expand copysign.
> > > > >   * optabs.def (xorsign_optab): New.
> > > > >
> > > > > 
> > > > > From: Andrew Pinski 
> > > > > Sent: Monday, July 10, 2017 12:21:29 AM
> > > > > To: Tamar Christina
> > > > > Cc: GCC Patches; nd; l...@redhat.com; i...@airs.com;
> > > > > rguent...@suse.de
> > > > > Subject: Re: [GCC][PATCH][mid-end] Optimize x * copysign (1.0, y)
> > > > > [Patch (1/2)]
> > > > >
> > > > > On Mon, Jun 12, 2017 at 12:56 AM, Tamar Christina
> > > > >  wrote:
> > > > > > Hi All,
> > > > > >
> > > > > > this patch implements a optimization rewriting
> > > > > >
> > > > > > x * copysign (1.0, y) and
> > > > > > x * copysign (-1.0, y)
> > > > > >
> > > > > > to:
> > > > > >
> > > > > > x ^ (y & (1 << sign_bit_position))
> > > > > >
> > > > > > This is done by creating a special builtin during matching and
> > > > > > generate the appropriate instructions during expand. This new
> > > > > > builtin is
> > > > called XORSIGN.
> > > > > >
> > > > > > The expansion of xorsign depends on if the backend has an
> > > > > > appropriate optab available. If this is not the case then we use
> > > > > > a modified version of the existing copysign which does not take
> > > > > > the abs
> > > > value of the first argument as a fall back.
> > > > > >
> > > > > > This patch is a revival of a previous patch
> > > > > > https://gcc.gnu.org/ml/gcc-patches/2015-10/msg00069.html
> > > > > >
> > > > > > Bootstrapped on both aarch64-none-linux-gnu and x86_64 with no
> > > > issues.
> > > > > > Regression done on aarch64-none-linux-gnu and no regressions.
> > > > >
> > > > >
> > > > > Note this is also PR 19706.
> > > > >
> > > > > Thanks,
> > > > > Andrew
> > > > >
> > > > > >
> > > > > > Ok for trunk?
> > > > > >
> > > > > > gcc/
> > > > > > 2017-06-07  Tamar Christina  
> > > > > >
> > > > > > * builtins.def (BUILT_IN_XORSIGN, BUILT_IN_XORSIGNF): New.
> > > > > > (BUILT_IN_XORSIGNL, BUILT_IN_XORSIGN_FLOAT_NX):
> > Likewise.
> > > > > > * match.pd (mult (COPYSIGN:s real_onep @0) @1): New
> > simplifier.
> > > > > > (mult (COPYSIGN:s real_mus_onep @0) @1): Likewise.
> > > > > > (copysigns @0 (negate @1)): Likewise.
> > > > > > * builtins.c (expand_builtin_copysign): Promoted local to
> > argument.
> > > > > > (expand_builtin): Added CASE_FLT_FN_FLOATN_NX
> > > > (BUILT_IN_XORSIGN) and
> > > > > > CASE_FLT_FN (BUILT_IN_XORSIGN).
> > > > > > (BUILT_IN_COPYSIGN): Updated function call.
> > > > > > * optabs.h (expand_copysign): New bool.
> > > > > > (expand_xorsign): New.
> > > > > > * optabs.def (xorsign_optab): New.
> > > > > > * optabs.c (expand_copysign): New parameter.
> > > > > > * fortran/f95-lang.c (xorsignl, xorsign, xorsignf): New.
> > > > > > * fortran/mathbuiltins.def (XORSIGN): New.
> > > > > >
> > > > > > gcc/testsuite/
> > > > > > 2017-06-07  Tamar Christina  
> > > > > >
> > > > > > * gcc.dg/tree-ssa/xorsign.c: New.
> > > > > > * gcc.dg/xorsign_exec.c: New.
> > > > > > * 

[PATCH] Remove a Java-specific hunk.

2017-07-18 Thread Martin Liška
Hello.

After we reverted both hunks in gimplify.c, I've just tested with 
--enable-languages=all
that attached patch works.

May I ask you Eric to fix comment in:

  /* The operand may be a void-valued expression such as SAVE_EXPRs
 generated by the Java frontend for class initialization.  It is
 being executed only for its side-effects.  */
  if (TREE_TYPE (val) == void_type_node)
{
  ret = gimplify_expr (_OPERAND (*expr_p, 0), pre_p, post_p,
   is_gimple_stmt, fb_none);
  val = NULL;
}

which is used by Ada and should be described how.

Ready for trunk?
Martin
>From ded1981fb5942b420ef35a0eecf2119940bcd664 Mon Sep 17 00:00:00 2001
From: marxin 
Date: Mon, 17 Jul 2017 14:49:29 +0200
Subject: [PATCH] Remove a Java-specific hunk.

gcc/ChangeLog:

2017-07-18  Martin Liska  

	* gimplify.c (mostly_copy_tree_r): Remove Java specific hunk.
---
 gcc/gimplify.c | 7 +--
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/gcc/gimplify.c b/gcc/gimplify.c
index 641a8210dad..250dedbc44a 100644
--- a/gcc/gimplify.c
+++ b/gcc/gimplify.c
@@ -818,12 +818,7 @@ mostly_copy_tree_r (tree *tp, int *walk_subtrees, void *data)
   /* Stop at types, decls, constants like copy_tree_r.  */
   else if (TREE_CODE_CLASS (code) == tcc_type
 	   || TREE_CODE_CLASS (code) == tcc_declaration
-	   || TREE_CODE_CLASS (code) == tcc_constant
-	   /* We can't do anything sensible with a BLOCK used as an
-	  expression, but we also can't just die when we see it
-	  because of non-expression uses.  So we avert our eyes
-	  and cross our fingers.  Silly Java.  */
-	   || code == BLOCK)
+	   || TREE_CODE_CLASS (code) == tcc_constant)
 *walk_subtrees = 0;
 
   /* Cope with the statement expression extension.  */
-- 
2.13.2



Re: [PATCH PR81408]Turn TREE level unsafe loop optimizations warning to missed optimization message

2017-07-18 Thread Richard Biener
On Tue, Jul 18, 2017 at 11:13 AM, Bin.Cheng  wrote:
> On Tue, Jul 18, 2017 at 9:31 AM, Richard Biener
>  wrote:
>> On Tue, Jul 18, 2017 at 10:00 AM, Bin Cheng  wrote:
>>> Hi,
>>> I removed unsafe loop optimization on TREE level last year, so GCC doesn't 
>>> do unsafe
>>> loop optimizations on TREE now.  All "unsafe loop optimizations" warnings 
>>> reported by
>>> TREE optimizers are simply missed optimizations.  This patch turns such 
>>> warning into
>>> missed optimization messages.  I didn't change when this will be dumped, 
>>> for now it is
>>> when called from ivopts.
>>> Bootstrap and test on x86_64 and AArch64.  Is it OK?
>>
>> Ok but can you change the testcase to not scan the ivopts dump but use
>> -fopt-info-loop-missed?
>> You should be able to match the output with dg-message.
> Thanks for reviewing.  New patch with test case updated accordingly.  Is it 
> OK?

Ok.

Richard.

> Thanks,
> bin
>>
>> Thanks,
>> Richard.
>>
>>> Thanks,
>>> bin
>>> 2017-07-13  Bin Cheng  
>>>
>>> PR target/81408
>>> * tree-ssa-loop-niter.c (number_of_iterations_exit): Dump missed
>>> optimization for loop niter analysis.
>>>
>>> gcc/testsuite/ChangeLog
>>> 2017-07-13  Bin Cheng  
>>>
>>> PR target/81408
>>> * g++.dg/tree-ssa/pr81408.C: New.
>>> * gcc.dg/tree-ssa/pr19210-1.c: Check dump message rather than 
>>> warning.


Re: [PATCH] Implement std::pointer_traits::to_address as per P0653R0

2017-07-18 Thread Jonathan Wakely

On 16/07/17 17:54 -0400, Glen Fernandes wrote:

Implement pointer_traits::to_address as in P0653r0

   * include/bits/allocated_ptr.h (allocated_ptr): Use
pointer_traits::to_address.
   * include/bits/ptr_traits.h (pointer_traits): Implement to_address.
   * include/ext/pointer.h (pointer_traits): Define to_address in
pointer_traits specialization.
   * 
testsuite/20_util/pointer_traits/requirements/explicit_instantiation.cc:
Define operator->.
   * testsuite/20_util/pointer_traits/to_address.cc: New tests.

Tested i686-pc-linux-gnu.



Thanks, Glen.

As discussed offlist, Glen is completing a copyright assignment and
we'll deal with this patch shortly.



[AArch64], patch] PR71727 fix -mstrict-align

2017-07-18 Thread Christophe Lyon
Hello,

I've received a complaint that GCC for AArch64 would generate
vectorized code relying on unaligned memory accesses even when using
-mstrict-align. This is a problem for code where such accesses lead to
memory faults.

A previous patch (r24) introduced
aarch64_builtin_support_vector_misalignment, which rejects such
accesses when the element size is 64 bits, and accept them otherwise,
which I think it shouldn't. The testcase added at that time only used
64 bits elements, and therefore didn't fully test the patch.

The report I received is about vectorized accesses to an array of
unsigned chars, whose start address is not aligned on a 128 bits
boundary.

The attached patch fixes the problem by making
aarch64_builtin_support_vector_misalignment always return false when
the misalignment is not known at compile time.

I've also added a testcase, which tries to check if the array start
address alignment is checked (using %16, and-ing with #15), so that
loop peeling is performed *before* using vectorized accesses. Without
the patch, vectorized accesses are used at the beginning of the array,
and byte accesses are used for the remainder at the end, and there is
not such 'and wX,wX,15'.

BTW, I'm not sure about the same hook for arm... it seems to me it has
a similar problem.

OK?

Thanks,

Christophe
2017-07-18  Christophe Lyon  

PR target/71727
gcc/
* config/aarch64/aarch64.c
(aarch64_builtin_support_vector_misalignment): Always return false
when misalignment is unknown.

gcc/testsuite/
* gcc.target/aarch64/pr71727-2.c: New test.
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 799989a..12a9fbe 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -11758,18 +11758,7 @@ aarch64_builtin_support_vector_misalignment 
(machine_mode mode,
 return false;
 
   if (misalignment == -1)
-   {
- /* Misalignment factor is unknown at compile time but we know
-it's word aligned.  */
- if (aarch64_simd_vector_alignment_reachable (type, is_packed))
-{
-  int element_size = TREE_INT_CST_LOW (TYPE_SIZE (type));
-
-  if (element_size != 64)
-return true;
-}
- return false;
-   }
+   return false;
 }
   return default_builtin_support_vector_misalignment (mode, type, misalignment,
  is_packed);
diff --git a/gcc/testsuite/gcc.target/aarch64/pr71727-2.c 
b/gcc/testsuite/gcc.target/aarch64/pr71727-2.c
new file mode 100644
index 000..8935a72
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr71727-2.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-mstrict-align -O3" } */
+
+unsigned char foo(const unsigned char *buffer, unsigned int length)
+{
+  unsigned char sum;
+  unsigned int  count;
+
+  for (sum = 0, count = 0; count < length; count++) {
+sum = (unsigned char) (sum + *(buffer + count));
+  }
+
+  return sum;
+}
+
+/* { dg-final { scan-assembler-times "and\tw\[0-9\]+, w\[0-9\]+, 15" 1 } } */


Re: Default std::list default and move constructors

2017-07-18 Thread Jonathan Wakely

On 12/07/17 22:12 +0200, François Dumont wrote:

On 05/07/2017 17:22, Jonathan Wakely wrote:

It's mostly good, but I'd like to make a few suggestions ...


diff --git a/libstdc++-v3/include/bits/stl_list.h 
b/libstdc++-v3/include/bits/stl_list.h

index 232885a..7e5 100644
--- a/libstdc++-v3/include/bits/stl_list.h
+++ b/libstdc++-v3/include/bits/stl_list.h
@@ -82,6 +82,17 @@ namespace std _GLIBCXX_VISIBILITY(default)
 _List_node_base* _M_next;
 _List_node_base* _M_prev;

+#if __cplusplus >= 201103L
+  _List_node_base() = default;
+#else
+  _List_node_base()
+  { }
+#endif
+
+  _List_node_base(_List_node_base* __next, _List_node_base* __prev)
+: _M_next(__next), _M_prev(__prev)
+  { }
+


I think I'd prefer to leave this struct with no user-defined
constructors, instead of adding these.


My goal was to make sure that as much as possible instances are 
initialized through their initialization list. But it is still 
possible without it so I made the change.





 static void
 swap(_List_node_base& __x, _List_node_base& __y) 
_GLIBCXX_USE_NOEXCEPT;


@@ -99,6 +110,79 @@ namespace std _GLIBCXX_VISIBILITY(default)
 _M_unhook() _GLIBCXX_USE_NOEXCEPT;
   };

+/// The %list node header.
+struct _List_node_header : public _List_node_base
+{
+private:
+#if _GLIBCXX_USE_CXX11_ABI
+  std::size_t _M_size;
+#endif


I don't think this needs to be private, because we don't have to worry
about users accessing this member. It's an internal-only type, and the
_M_next and _M_prev members are already public.


It's not internal-only as those members are exposed on std::list 
through inheritance. But I agree that consistency is more important 
here so I made it public.


Looks like it's only used as a base class of _List_impl, which is
private to std::list, and in the __distance overload. Am I missing
something?




If it's public then the _List_base::_M_inc_size, _M_dec_size etc.
could access it directly, and we don't need to add duplicates of those
functions to _List_impl.


+
+  _List_node_base* _M_base() { return this; }


Is this function necessary?


It is a nice replacement for calls to __addressof.


OK, that does make it more readable.


-#if _GLIBCXX_USE_CXX11_ABI
-  size_t _M_get_size() const { return 
*_M_impl._M_node._M_valptr(); }

+  size_t
+  _M_get_size() const { return _M_impl._M_node._M_get_size(); }

-  void _M_set_size(size_t __n) { *_M_impl._M_node._M_valptr() 
= __n; }

+  void
+  _M_set_size(size_t __n) { _M_impl._M_node._M_set_size(__n); }

-  void _M_inc_size(size_t __n) { *_M_impl._M_node._M_valptr() 
+= __n; }

+  void
+  _M_inc_size(size_t __n) { _M_impl._M_node._M_inc_size(__n); }

-  void _M_dec_size(size_t __n) { *_M_impl._M_node._M_valptr() 
-= __n; }

+  void
+  _M_dec_size(size_t __n) { _M_impl._M_node._M_dec_size(__n); }


These functions could just access _M_impl._M_size directly if it was
public. Introducing new functions to _List_impl to be used here seems
like unnecessary complication. We don't get rid of the #if because
it's still needed for these functions anyway:

Yes, I wanted to manage as much as possible usage of C++11 abi in the 
new _List_node_header type.


N.B. the ABI is called "cxx11" not C++11. It's a bad name, I should
have called it something else, but please don't make it worse by
saying "C++11". That ABI is also the default for C++98 so saying
"C++11" just causes more confusion than necessary.

You're not getting rid of the #if entirely, so some of the cxx11 ABI
specialization already lives in _List_base. Adding several new member
functions and still failing to remove the #if#else from _List_base
didn't seem like an improvement. The new patch looks much better to
me.

So here is the new patch limited to this evolution. Optimization for 
always equal allocator will come after along with another 
simplification to replace _S_distance with std::distance.


OK.


Tests running, ok to commit if successful ?


OK for trunk with one tiny tweak, while you're already changing the
function ...


@@ -1983,12 +2011,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   _GLIBCXX_STD_C::_List_const_iterator<_Tp> __last,
   input_iterator_tag)
{
-  typedef _GLIBCXX_STD_C::_List_node _Sentinel;
+  typedef __detail::_List_node_header _Sentinel;
  _GLIBCXX_STD_C::_List_const_iterator<_Tp> __beyond = __last;
  ++__beyond;
  bool __whole = __first == __beyond;


Could you please make __whole const?

Thanks!



[PATCH] Move fold_div_compare optimization to match.pd (PR tree-optimization/81346)

2017-07-18 Thread Jakub Jelinek
Hi!

The following patch moves the fold_div_compare optimization from
generic folding only in fold-const.c to match.pd, so that it can be
excersized also when optimizing GIMPLE.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2017-07-18  Jakub Jelinek  

PR tree-optimization/81346
* fold-const.h (fold_div_compare, range_check_type): Declare.
* fold-const.c (range_check_type): New function.
(build_range_check): Use range_check_type.
(fold_div_compare): No longer static, rewritten into
a match.pd helper function.
(fold_comparison): Don't call fold_div_compare here.
* match.pd (X / C1 op C2): New optimization using fold_div_compare
as helper function.

* gcc.dg/tree-ssa/pr81346-1.c: New test.
* gcc.dg/tree-ssa/pr81346-2.c: New test.
* gcc.dg/tree-ssa/pr81346-3.c: New test.
* gcc.dg/tree-ssa/pr81346-4.c: New test.
* gcc.target/i386/umod-3.c: Hide comparison against 1 from the
compiler to avoid X / C1 op C2 optimization to trigger.

--- gcc/fold-const.h.jj 2017-07-17 10:08:28.575727191 +0200
+++ gcc/fold-const.h2017-07-17 12:27:27.071729458 +0200
@@ -88,6 +88,8 @@ extern void fold_undefer_overflow_warnin
 extern void fold_undefer_and_ignore_overflow_warnings (void);
 extern bool fold_deferring_overflow_warnings_p (void);
 extern void fold_overflow_warning (const char*, enum 
warn_strict_overflow_code);
+extern enum tree_code fold_div_compare (enum tree_code, tree, tree,
+   tree *, tree *, bool *);
 extern int operand_equal_p (const_tree, const_tree, unsigned int);
 extern int multiple_of_p (tree, const_tree, const_tree);
 #define omit_one_operand(T1,T2,T3)\
@@ -175,6 +177,7 @@ extern bool tree_expr_nonnegative_warnv_
 extern tree make_range (tree, int *, tree *, tree *, bool *);
 extern tree make_range_step (location_t, enum tree_code, tree, tree, tree,
 tree *, tree *, int *, bool *);
+extern tree range_check_type (tree);
 extern tree build_range_check (location_t, tree, tree, int, tree, tree);
 extern bool merge_ranges (int *, tree *, tree *, int, tree, tree, int,
  tree, tree);
--- gcc/fold-const.c.jj 2017-07-17 10:08:28.564727328 +0200
+++ gcc/fold-const.c2017-07-17 18:29:06.473195833 +0200
@@ -132,7 +132,6 @@ static tree fold_binary_op_with_conditio
 enum tree_code, tree,
 tree, tree,
 tree, tree, int);
-static tree fold_div_compare (location_t, enum tree_code, tree, tree, tree);
 static tree fold_negate_const (tree, tree);
 static tree fold_not_const (const_tree, tree);
 static tree fold_relational_const (enum tree_code, tree, tree, tree);
@@ -4787,6 +4786,39 @@ maskable_range_p (const_tree low, const_
   return true;
 }
 
+/* Helper routine for build_range_check and match.pd.  Return the type to
+   perform the check or NULL if it shouldn't be optimized.  */
+
+tree
+range_check_type (tree etype)
+{
+  /* First make sure that arithmetics in this type is valid, then make sure
+ that it wraps around.  */
+  if (TREE_CODE (etype) == ENUMERAL_TYPE || TREE_CODE (etype) == BOOLEAN_TYPE)
+etype = lang_hooks.types.type_for_size (TYPE_PRECISION (etype),
+   TYPE_UNSIGNED (etype));
+
+  if (TREE_CODE (etype) == INTEGER_TYPE && !TYPE_OVERFLOW_WRAPS (etype))
+{
+  tree utype, minv, maxv;
+
+  /* Check if (unsigned) INT_MAX + 1 == (unsigned) INT_MIN
+for the type in question, as we rely on this here.  */
+  utype = unsigned_type_for (etype);
+  maxv = fold_convert (utype, TYPE_MAX_VALUE (etype));
+  maxv = range_binop (PLUS_EXPR, NULL_TREE, maxv, 1,
+ build_int_cst (TREE_TYPE (maxv), 1), 1);
+  minv = fold_convert (utype, TYPE_MIN_VALUE (etype));
+
+  if (integer_zerop (range_binop (NE_EXPR, integer_type_node,
+ minv, 1, maxv, 1)))
+   etype = utype;
+  else
+   return NULL_TREE;
+}
+  return etype;
+}
+
 /* Given a range, LOW, HIGH, and IN_P, an expression, EXP, and a result
type, TYPE, return an expression to test if EXP is in (or out of, depending
on IN_P) the range.  Return 0 if the test couldn't be created.  */
@@ -4869,31 +4901,10 @@ build_range_check (location_t loc, tree
 }
 
   /* Optimize (c>=low) && (c<=high) into (c-low>=0) && (c-low<=high-low).
- This requires wrap-around arithmetics for the type of the expression.
- First make sure that arithmetics in this type is valid, then make sure
- that it wraps around.  */
-  if (TREE_CODE (etype) == ENUMERAL_TYPE || TREE_CODE (etype) == BOOLEAN_TYPE)
-etype = lang_hooks.types.type_for_size (TYPE_PRECISION (etype),
-   TYPE_UNSIGNED 

Re: [PATCH] PR libstdc++/81064 fix versioned namespace

2017-07-18 Thread Jonathan Wakely

On 18/07/17 14:31 +0100, Jonathan Wakely wrote:

On 17/07/17 22:54 +0200, François Dumont wrote:
This is quite a huge change, so I'd like to wait and see if anyone
else has any opinion on it.

Personally I think it's necessary (assuming I understand the PR
correctly) and so if nobody objects I think we should go with this
change for GCC 8. Let's give it a few days for comments (and I'll
finish going through the patch carefully).


P.S. thanks very much for doing it!




[PATCH] Fix an UBSAN test-case (PR sanitizer/63361).

2017-07-18 Thread Martin Liška
Hello.

This should address spotted in the PR, where I basically implemented what
I was recommended in https://gcc.gnu.org/bugzilla/show_bug.cgi?id=63361#c14.

Ready for trunk?
Thanks,
Martin
>From 92ca98e49112cc164739ebd1f1a00bcc13704331 Mon Sep 17 00:00:00 2001
From: marxin 
Date: Mon, 17 Jul 2017 15:22:25 +0200
Subject: [PATCH] Fix an UBSAN test-case (PR sanitizer/63361).

gcc/testsuite/ChangeLog:

2017-07-17  Martin Liska  

	PR sanitizer/63361
	* c-c++-common/ubsan/float-cast-overflow-1.c: Add either
	-ffloat-store or -mieee for targets that need it.
---
 gcc/testsuite/c-c++-common/ubsan/float-cast-overflow-1.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/gcc/testsuite/c-c++-common/ubsan/float-cast-overflow-1.c b/gcc/testsuite/c-c++-common/ubsan/float-cast-overflow-1.c
index cd6941c9d30..a25e8dea29e 100644
--- a/gcc/testsuite/c-c++-common/ubsan/float-cast-overflow-1.c
+++ b/gcc/testsuite/c-c++-common/ubsan/float-cast-overflow-1.c
@@ -1,6 +1,7 @@
 /* { dg-do run { target { lp64 || ilp32 } } } */
+/* { dg-additional-options "-mfloat-store" { target { ia32 m68k-*-* } } } */
+/* { dg-additional-options "-mieee" { target { alpha* sh* } } } */
 /* { dg-options "-fsanitize=float-cast-overflow" } */
-/* { dg-additional-options "-msse2 -mfpmath=sse" { target { sse2_runtime && ia32 } } } */
 
 #include 
 #include "float-cast.h"
-- 
2.13.2



Re: backport nvptx patch to gcc-7-branch

2017-07-18 Thread Jakub Jelinek
On Tue, Jul 18, 2017 at 02:37:38PM +0200, Tom de Vries wrote:
> I'd like to backport this nvptx patch to the gcc-7-branch.
> 
> The patch doesn't trivially fit into the category of regression or
> documentation fix.
> 
> Without this patch, when building an nvptx offloading compiler and running
> the libgomp testsuite for the gcc-7-branch, the GPU hangs, and I've had a
> report from a colleague who experienced system crashes because of it.
> 
> However, in principle gcc is not doing anything wrong: the generated code is
> according to the ptx spec. It's just that the patch makes it less likely to
> run into a ptx JIT bug.
> 
> Then again, it's an nvptx patch, neither a primary nor secondary target.
> 
> I'll commit the backport some time this week, unless there are objections.

Ok, thanks.

> > 0001-Insert-diverging-jump-alap-in-nvptx_single.patch
> > 
> > 
> > Insert diverging jump alap in nvptx_single
> > 
> > 2017-07-17  Tom de Vries  
> > 
> > PR target/81069
> > * config/nvptx/nvptx.c (nvptx_single): Insert diverging branch as late
> > as possible.

Jakub


[PATCH] TYPE_{MIN,MAX}VAL and TYPE_{MIN,MAX}_VALUE

2017-07-18 Thread Nathan Sidwell
We currently have both TYPE_{MIN,MAX}VAL and TYPE_{MIN,MAX}_VALUE pairs 
of accessors.  This is confusing.  The former is the tree-agnostic raw 
field accessor, which I propose renaming TYPE_{MIN,MAX}VAL_RAW, as is 
common with other raw accessors.


The latter pair are for accessing numeric types.  I've committed this 
patch where the former pair were used when the latter pair should have 
been.  That's the obvious cleanup.  I'll post renaming patch shortly.


nathan

--
Nathan Sidwell
2017-07-18  Nathan Sidwell  

   gcc/
	* tree-parloops.c (try_transform_to_exit_first_loop_alt): Use
	TYPE_MAX_VALUE.

   gcc/c-family/
	* c-warn.c (warn_for_memset): Use TYPE_{MIN,MAX}_VALUE.

   gcc/c/
	* c-parser.c (c_parser_array_notation): Use TYPE_{MIN,MAX}_VALUE.

   gcc/cp/ 
	* cp-array-notation.c (build_array_notation_ref): Use
	TYPE_{MIN,MAX}_VALUE.

   gcc/fortran/
	* trans.c (gfc_build_array_ref): Use TYPE_MAX_VALUE.

Index: c/c-parser.c
===
--- c/c-parser.c	(revision 250272)
+++ c/c-parser.c	(working copy)
@@ -18238,18 +18238,18 @@ c_parser_array_notation (location_t loc,
 	  return error_mark_node;
 	}
 
-	  start_index = TYPE_MINVAL (array_type_domain);
+	  start_index = TYPE_MIN_VALUE (array_type_domain);
 	  start_index = fold_build1 (CONVERT_EXPR, ptrdiff_type_node,
  start_index);
-	  if (!TYPE_MAXVAL (array_type_domain)
-	  || !TREE_CONSTANT (TYPE_MAXVAL (array_type_domain)))
+	  if (!TYPE_MAX_VALUE (array_type_domain)
+	  || !TREE_CONSTANT (TYPE_MAX_VALUE (array_type_domain)))
 	{
 	  error_at (loc, "start-index and length fields necessary for "
 			"using array notations in variable-length arrays");
 	  c_parser_skip_until_found (parser, CPP_CLOSE_SQUARE, NULL);
 	  return error_mark_node;
 	}
-	  end_index = TYPE_MAXVAL (array_type_domain);
+	  end_index = TYPE_MAX_VALUE (array_type_domain);
 	  end_index = fold_build2 (PLUS_EXPR, TREE_TYPE (end_index),
    end_index, integer_one_node);
 	  end_index = fold_build1 (CONVERT_EXPR, ptrdiff_type_node, end_index);
Index: c-family/c-warn.c
===
--- c-family/c-warn.c	(revision 250272)
+++ c-family/c-warn.c	(working copy)
@@ -1799,12 +1799,12 @@ warn_for_memset (location_t loc, tree ar
 	  tree domain = TYPE_DOMAIN (type);
 	  if (!integer_onep (TYPE_SIZE_UNIT (elt_type))
 	  && domain != NULL_TREE
-	  && TYPE_MAXVAL (domain)
-	  && TYPE_MINVAL (domain)
-	  && integer_zerop (TYPE_MINVAL (domain))
+	  && TYPE_MAX_VALUE (domain)
+	  && TYPE_MIN_VALUE (domain)
+	  && integer_zerop (TYPE_MIN_VALUE (domain))
 	  && integer_onep (fold_build2 (MINUS_EXPR, domain,
 	arg2,
-	TYPE_MAXVAL (domain
+	TYPE_MAX_VALUE (domain
 	warning_at (loc, OPT_Wmemset_elt_size,
 			"% used with length equal to "
 			"number of elements without multiplication "
Index: cp/cp-array-notation.c
===
--- cp/cp-array-notation.c	(revisio1n 250272)
+++ cp/cp-array-notation.c	(working copy)
@@ -1375,8 +1375,8 @@ build_array_notation_ref (location_t loc
 		"using array notation with array of unknown bound");
 	  return error_mark_node;
 	}
-  start = cp_fold_convert (ptrdiff_type_node, TYPE_MINVAL (domain));
-  length = size_binop (PLUS_EXPR, TYPE_MAXVAL (domain), size_one_node);
+  start = cp_fold_convert (ptrdiff_type_node, TYPE_MIN_VALUE (domain));
+  length = size_binop (PLUS_EXPR, TYPE_MAX_VALUE (domain), size_one_node);
   length = cp_fold_convert (ptrdiff_type_node, length);
 }
 
Index: fortran/trans.c
===
--- fortran/trans.c	(revision 250272)
+++ fortran/trans.c	(working copy)
@@ -334,15 +334,15 @@ gfc_build_array_ref (tree base, tree off
   /* Use pointer arithmetic for deferred character length array
  references.  */
   if (type && TREE_CODE (type) == ARRAY_TYPE
-  && TYPE_MAXVAL (TYPE_DOMAIN (type)) != NULL_TREE
-  && (VAR_P (TYPE_MAXVAL (TYPE_DOMAIN (type)))
-	  || TREE_CODE (TYPE_MAXVAL (TYPE_DOMAIN (type))) == INDIRECT_REF)
+  && TYPE_MAX_VALUE (TYPE_DOMAIN (type)) != NULL_TREE
+  && (VAR_P (TYPE_MAX_VALUE (TYPE_DOMAIN (type)))
+	  || TREE_CODE (TYPE_MAX_VALUE (TYPE_DOMAIN (type))) == INDIRECT_REF)
   && decl
-  && (TREE_CODE (TYPE_MAXVAL (TYPE_DOMAIN (type))) == INDIRECT_REF
+  && (TREE_CODE (TYPE_MAX_VALUE (TYPE_DOMAIN (type))) == INDIRECT_REF
 	  || TREE_CODE (decl) == FUNCTION_DECL
-	  || DECL_CONTEXT (TYPE_MAXVAL (TYPE_DOMAIN (type)))
-	== DECL_CONTEXT (decl)))
-span = TYPE_MAXVAL (TYPE_DOMAIN (type));
+	  || (DECL_CONTEXT (TYPE_MAX_VALUE (TYPE_DOMAIN (type)))
+	  == DECL_CONTEXT (decl
+span = TYPE_MAX_VALUE (TYPE_DOMAIN (type));
   else
 span = NULL_TREE;
 
Index: tree-parloops.c

Re: [PATCH] PR libstdc++/81064 fix versioned namespace

2017-07-18 Thread Jonathan Wakely

On 17/07/17 22:54 +0200, François Dumont wrote:

Hi

   Here is the patch to fix libstdc++ versioned namespace.

   Now versioned namespace is only at std and __gnu_cxx levels, not 
anymore in nested namespaces.


  PR libstdc++/81064

   * include/bits/algorithmfwd.h: Reorganize versioned namespace.
   * include/bits/basic_string.h: Likewise.
   * include/bits/c++config: Likewise.
   * include/bits/deque.tcc: Likewise.
   * include/bits/forward_list.h: Likewise.
   * include/bits/forward_list.tcc: Likewise.
   * include/bits/hashtable_policy.h: Likewise.
   * include/bits/list.tcc: Likewise.
   * include/bits/move.h: Likewise.
   * include/bits/quoted_string.h: Likewise.
   * include/bits/random.h: Likewise.
   * include/bits/random.tcc: Likewise.
   * include/bits/regex.h: Likewise.
   * include/bits/regex.tcc: Likewise.
   * include/bits/regex_automaton.h: Likewise.
   * include/bits/regex_automaton.tcc: Likewise.
   * include/bits/regex_compiler.h: Likewise.
   * include/bits/regex_compiler.tcc: Likewise.
   * include/bits/regex_constants.h: Likewise.
   * include/bits/regex_error.h: Likewise.
   * include/bits/regex_executor.h: Likewise.
   * include/bits/regex_executor.tcc: Likewise.
   * include/bits/regex_scanner.h: Likewise.
   * include/bits/regex_scanner.tcc: Likewise.
   * include/bits/specfun.h: Likewise.
   * include/bits/stl_algo.h: Likewise.
   * include/bits/stl_algobase.h: Likewise.
   * include/bits/stl_bvector.h: Likewise.
   * include/bits/stl_deque.h: Likewise.
   * include/bits/stl_iterator.h: Likewise.
   * include/bits/stl_iterator_base_funcs.h: Likewise.
   * include/bits/stl_list.h: Likewise.
   * include/bits/stl_map.h: Likewise.
   * include/bits/stl_multimap.h: Likewise.
   * include/bits/stl_multiset.h: Likewise.
   * include/bits/stl_relops.h: Likewise.
   * include/bits/stl_set.h: Likewise.
   * include/bits/stl_vector.h: Likewise.
   * include/bits/uniform_int_dist.h: Likewise.
   * include/bits/unordered_map.h: Likewise.
   * include/bits/unordered_set.h: Likewise.
   * include/bits/vector.tcc: Likewise.
   * include/c_global/cmath: Likewise.
   * include/c_std/cmath: Likewise.
   * include/decimal/decimal: Likewise.
   * include/decimal/decimal.h: Likewise.
   * include/experimental/algorithm: Likewise.
   * include/experimental/any: Likewise.
   * include/experimental/array: Likewise.
   * include/experimental/bits/erase_if.h: Likewise.
   * include/experimental/bits/fs_dir.h: Likewise.
   * include/experimental/bits/fs_fwd.h: Likewise.
   * include/experimental/bits/fs_ops.h: Likewise.
   * include/experimental/bits/fs_path.h: Likewise.
   * include/experimental/bits/shared_ptr.h: Likewise.
   * include/experimental/bits/string_view.tcc: Likewise.
   * include/experimental/chrono: Likewise.
   * include/experimental/deque: Likewise.
   * include/experimental/filesystem: Likewise.
   * include/experimental/forward_list: Likewise.
   * include/experimental/functional: Likewise.
   * include/experimental/iterator: Likewise.
   * include/experimental/list: Likewise.
   * include/experimental/map: Likewise.
   * include/experimental/memory: Likewise.
   * include/experimental/memory_resource: Likewise.
   * include/experimental/numeric: Likewise.
   * include/experimental/optional: Likewise.
   * include/experimental/propagate_const: Likewise.
   * include/experimental/random: Likewise.
   * include/experimental/ratio: Likewise.
   * include/experimental/regex: Likewise.
   * include/experimental/set: Likewise.
   * include/experimental/source_location: Likewise.
   * include/experimental/string: Likewise.
   * include/experimental/string_view: Likewise.
   * include/experimental/system_error: Likewise.
   * include/experimental/tuple: Likewise.
   * include/experimental/type_traits: Likewise.
   * include/experimental/unordered_map: Likewise.
   * include/experimental/unordered_set: Likewise.
   * include/experimental/utility: Likewise.
   * include/experimental/vector: Likewise.
   * include/ext/bitmap_allocator.h: Likewise.
   * include/ext/codecvt_specializations.h: Likewise.
   * include/ext/rope: Likewise.
   * include/ext/typelist.h: Likewise.
   * include/std/chrono: Likewise.
   * include/std/complex: Likewise.
   * include/std/functional: Likewise.
   * include/std/numeric: Likewise.
   * include/std/string_view: Likewise.
   * include/std/thread: Likewise.
   * include/std/variant: Likewise.
   * include/tr1/array: Likewise.
   * include/tr1/bessel_function.tcc: Likewise.
   * include/tr1/beta_function.tcc: Likewise.
   * include/tr1/cmath: Likewise.
   * include/tr1/complex: Likewise.
   * include/tr1/ell_integral.tcc: Likewise.
   * include/tr1/exp_integral.tcc: Likewise.
   * include/tr1/functional: Likewise.
   * include/tr1/functional_hash.h: Likewise.
   * include/tr1/gamma.tcc: Likewise.
   * include/tr1/hashtable.h: Likewise.
   * include/tr1/hashtable_policy.h: Likewise.
   * include/tr1/hypergeometric.tcc: Likewise.
   * 

Re: [PATCH] Fix pr80044, -static and -pie insanity, and pr81170

2017-07-18 Thread H.J. Lu
On Tue, Jul 18, 2017 at 7:09 AM, Alan Modra  wrote:
> On Tue, Jul 18, 2017 at 05:36:49AM -0700, H.J. Lu wrote:
>> I am working on compiling libc.a in glibc with -fPIE and building static
>> PIE.  This creates static executable with PIE:
>>
>> gcc -nostdlib -nostartfiles -static -o
>> /export/build/gnu/glibc-static-pie/build-x86_64-linux/elf/sln -pie
>> -Wl,--no-dynamic-linker
>> /export/build/gnu/glibc-static-pie/build-x86_64-linux/csu/crt1.o
>> /export/build/gnu/glibc-static-pie/build-x86_64-linux/csu/crti.o `gcc
>> --print-file-name=crtbeginS.o`
>> /export/build/gnu/glibc-static-pie/build-x86_64-linux/elf/sln.o
>> /export/build/gnu/glibc-static-pie/build-x86_64-linux/elf/static-stubs.o
>>  -Wl,--start-group
>> /export/build/gnu/glibc-static-pie/build-x86_64-linux/libc.a -lgcc
>> -Wl,--end-group `gcc  --print-file-name=crtendS.o`
>> /export/build/gnu/glibc-static-pie/build-x86_64-linux/csu/crtn.o
>>
>> Currently, it only works with gcc configured with -disable-default-pie.
>> With --enable-default-pie, I got
>>
>> [hjl@gnu-tools-1 build-x86_64-linux]$ /usr/gcc-7.1.1-x32-pie/bin/gcc
>> -nostdlib -nostartfiles -static -o /tmp/sln -pie
>> -Wl,--no-dynamic-linker
>> /export/build/gnu/glibc-static-pie/build-x86_64-linux/csu/crt1.o
>> /export/build/gnu/glibc-static-pie/build-x86_64-linux/csu/crti.o `gcc
>> --print-file-name=crtbeginS.o`
>> /export/build/gnu/glibc-static-pie/build-x86_64-linux/elf/sln.o
>> /export/build/gnu/glibc-static-pie/build-x86_64-linux/elf/static-stubs.o
>>  -Wl,--start-group
>> /export/build/gnu/glibc-static-pie/build-x86_64-linux/libc.a -lgcc
>> -Wl,--end-group `gcc  --print-file-name=crtendS.o`
>> /export/build/gnu/glibc-static-pie/build-x86_64-linux/csu/crtn.o
>> /export/build/gnu/glibc-static-pie/build-x86_64-linux/libc.a(dl-support.o):
>> In function `elf_machine_load_address':
>> /export/gnu/import/git/sources/glibc/elf/../sysdeps/x86_64/dl-machine.h:59:
>> undefined reference to `_DYNAMIC'
>> /export/build/gnu/glibc-static-pie/build-x86_64-linux/libc.a(dl-support.o):
>> In function `elf_get_dynamic_info':
>> /export/gnu/import/git/sources/glibc/elf/get-dynamic-info.h:48:
>> undefined reference to `_DYNAMIC'
>> collect2: error: ld returned 1 exit status
>> [hjl@gnu-tools-1 build-x86_64-linux]$
>>
>> Will your change fix it?
>
> You have got to be joking!  How should I know whether something will
> work with some hare-brained scheme of yours?  One that seemingly
> requires you to specify startup files by hand!
>

The difference is with --enable-default-pie, the gcc driver doesn't pass
both -pie and -static ld when "-static -pie" is used.   Does your change
pass both -pie and -static ld when "-static -pie" is used?

-- 
H.J.


[PATCH] Rename TYPE_{MIN,MAX}VAL

2017-07-18 Thread Nathan Sidwell
As I mentioned in my previous patch, we currently have 
TYPE_{MIN,MAX}_VALUES for numeric types and TYPE_{MIN,MAX}VAL for 
type-agnostic access.


This patch renames the latter to TYPE_{MIN,MAX}VAL_RAW, matching 
TYPE_VALUES_RAW, which had a similar problem.


While renaming the macros, I reordered them in tree.h to be grouped by 
the underlying field.  I think that makes more sense here, as the only 
case when grouping as min/max makes most sense is for the numeric types. 
 And mostly when looking at this, I want to discover what things might 
use this field.


Because of that reordering, I'm hesitant to apply the obvious rule.  I'd 
appreciate review.  thanks.


(This patch is not dependent on the TYPE_METHODS removal patch)

nathan

--
Nathan Sidwell
2017-07-18  Nathan Sidwell  

	gcc/ 
	* tree.h (TYPE_MINVAL, TYPE_MAXVAL): Rename to ...
	(TYPE_MINVAL_RAW, TYPE_MAXVAL_RAW): ... these.
	* tree.c (find_decls_types_r, verify_type): Use TYPE_{MIN,MAX}VAL_RAW.
	* lto-streamer-out.c (DFS::DFS_write_tree_body): Likewise.
	(hash_tree): Likewise.
	* tree-streamer-in.c (lto_input_ts_type_non_common_tree_pointers):
	Likewise.
	* tree-streamer-out.c (write_ts_type_non_common_tree_pointers):
	Likewise.

	cp/
	* cp-tree.h (PACK_EXPANSION_PARAMETER_PACKS,
	PACK_EXPANSION_EXTRA_ARGS): Use TYPE_{MIN,MAX}VAL_RAW.

	lto/
	* lto.c (mentions_vars_p_type): Use TYPE_{MIN,MAX}VAL_RAW.
	(compare_tree_sccs_1, lto_fixup_prevailing_decls): Likewise.

	objc/
	* objc-act.h (CLASS_NST_METHODS, CLASS_CLS_METHODS): Use
	TYPE_{MIN,MAX}VAL_RAW.

Index: cp/cp-tree.h
===
--- cp/cp-tree.h	(revision 250309)
+++ cp/cp-tree.h	(working copy)
@@ -3522,13 +3522,13 @@ extern void decl_shadowed_for_var_insert
 #define PACK_EXPANSION_PARAMETER_PACKS(NODE)		\
   *(TREE_CODE (NODE) == EXPR_PACK_EXPANSION		\
 ? _OPERAND (NODE, 1)\
-: _MINVAL (TYPE_PACK_EXPANSION_CHECK (NODE)))
+: _MINVAL_RAW (TYPE_PACK_EXPANSION_CHECK (NODE)))
 
 /* Any additional template args to be applied when substituting into
the pattern, set by tsubst_pack_expansion for partial instantiations.  */
 #define PACK_EXPANSION_EXTRA_ARGS(NODE)		\
   *(TREE_CODE (NODE) == TYPE_PACK_EXPANSION	\
-? _MAXVAL (NODE)			\
+? _MAXVAL_RAW (NODE)			\
 : _OPERAND ((NODE), 2))
 
 /* True iff this pack expansion is within a function context.  */
Index: lto/lto.c
===
--- lto/lto.c	(revision 250309)
+++ lto/lto.c	(working copy)
@@ -646,8 +646,8 @@ mentions_vars_p_type (tree t)
   CHECK_NO_VAR (TYPE_ATTRIBUTES (t));
   CHECK_NO_VAR (TYPE_NAME (t));
 
-  CHECK_VAR (TYPE_MINVAL (t));
-  CHECK_VAR (TYPE_MAXVAL (t));
+  CHECK_VAR (TYPE_MINVAL_RAW (t));
+  CHECK_VAR (TYPE_MAXVAL_RAW (t));
 
   /* Accessor is for derived node types only. */
   CHECK_NO_VAR (t->type_non_common.binfo);
@@ -1414,9 +1414,10 @@ compare_tree_sccs_1 (tree t1, tree t2, t
   else if (code == FUNCTION_TYPE
 	   || code == METHOD_TYPE)
 	compare_tree_edges (TYPE_ARG_TYPES (t1), TYPE_ARG_TYPES (t2));
+
   if (!POINTER_TYPE_P (t1))
-	compare_tree_edges (TYPE_MINVAL (t1), TYPE_MINVAL (t2));
-  compare_tree_edges (TYPE_MAXVAL (t1), TYPE_MAXVAL (t2));
+	compare_tree_edges (TYPE_MINVAL_RAW (t1), TYPE_MINVAL_RAW (t2));
+  compare_tree_edges (TYPE_MAXVAL_RAW (t1), TYPE_MAXVAL_RAW (t2));
 }
 
   if (CODE_CONTAINS_STRUCT (code, TS_LIST))
@@ -2580,8 +2581,8 @@ lto_fixup_prevailing_decls (tree t)
   LTO_NO_PREVAIL (TYPE_ATTRIBUTES (t));
   LTO_NO_PREVAIL (TYPE_NAME (t));
 
-  LTO_SET_PREVAIL (TYPE_MINVAL (t));
-  LTO_SET_PREVAIL (TYPE_MAXVAL (t));
+  LTO_SET_PREVAIL (TYPE_MINVAL_RAW (t));
+  LTO_SET_PREVAIL (TYPE_MAXVAL_RAW (t));
   LTO_NO_PREVAIL (t->type_non_common.binfo);
 
   LTO_SET_PREVAIL (TYPE_CONTEXT (t));
Index: lto-streamer-out.c
===
--- lto-streamer-out.c	(revision 250309)
+++ lto-streamer-out.c	(working copy)
@@ -835,8 +835,8 @@ DFS::DFS_write_tree_body (struct output_
 	DFS_follow_tree_edge (TYPE_ARG_TYPES (expr));
 
   if (!POINTER_TYPE_P (expr))
-	DFS_follow_tree_edge (TYPE_MINVAL (expr));
-  DFS_follow_tree_edge (TYPE_MAXVAL (expr));
+	DFS_follow_tree_edge (TYPE_MINVAL_RAW (expr));
+  DFS_follow_tree_edge (TYPE_MAXVAL_RAW (expr));
   if (RECORD_OR_UNION_TYPE_P (expr))
 	DFS_follow_tree_edge (TYPE_BINFO (expr));
 }
@@ -1271,8 +1271,8 @@ hash_tree (struct streamer_tree_cache_d
 	   || code == METHOD_TYPE)
 	visit (TYPE_ARG_TYPES (t));
   if (!POINTER_TYPE_P (t))
-	visit (TYPE_MINVAL (t));
-  visit (TYPE_MAXVAL (t));
+	visit (TYPE_MINVAL_RAW (t));
+  visit (TYPE_MAXVAL_RAW (t));
   if (RECORD_OR_UNION_TYPE_P (t))
 	visit (TYPE_BINFO (t));
 }
Index: objc/objc-act.h
===
--- objc/objc-act.h	(revision 250309)
+++ objc/objc-act.h	(working 

Re: [PATCH] Move fold_div_compare optimization to match.pd (PR tree-optimization/81346)

2017-07-18 Thread Jakub Jelinek
On Tue, Jul 18, 2017 at 05:21:42PM +0200, Marc Glisse wrote:
> On Tue, 18 Jul 2017, Jakub Jelinek wrote:
> 
> > +/* X / C1 op C2 into a simple range test.  */
> > +(for cmp (simple_comparison)
> > + (simplify
> > +  (cmp (trunc_div:s @0 INTEGER_CST@1) INTEGER_CST@2)
> > +  (if (INTEGRAL_TYPE_P (TREE_TYPE (@0))
> > +   && integer_nonzerop (@1)
> > +   && !TREE_OVERFLOW (@1)
> > +   && !TREE_OVERFLOW (@2))
> 
> (not specific to this patch)
> I wonder if we should check TREE_OVERFLOW for the input that way in many
> more transformations in match.pd, or never, or how to decide in which
> cases to do it...

The reason for putting it here was that: 1) fold_div_compare did that
2) it relies on TREE_OVERFLOW to detect if the optimization is ok or not,
if there are some TREE_OVERFLOW on the inputs, then it might misbehave.

> > + (with
> > +  {
> > +   tree etype = range_check_type (TREE_TYPE (@0));
> > +   if (etype)
> > + {
> > +   if (! TYPE_UNSIGNED (etype))
> > + etype = unsigned_type_for (etype);
> 
> Now that you enforce unsignedness, can you think of cases where going
> through range_check_type is useful compared to
>   tree etype = unsigned_type_for (TREE_TYPE (@0));
> ? I can propose that trivial patch as a follow-up if you like.

I couldn't convince myself it is safe.  While enums and bool are handled
early, aren't there e.g. Ada integral types with weirdo min/max values and
similar stuff where the range_check_type test would fail?  If it never
fails, why we do it there?  The reason I've added unsigned_type_for
afterwards is that that build_range_check actually does something like
that too.
With -fwrapv it will perform the subtraction of lo in whatever type
range_check_type returns (could be e.g. int for -fwrapv) but then
recurses and runs into:
  if (integer_zerop (low))
{
  if (! TYPE_UNSIGNED (etype))
{
  etype = unsigned_type_for (etype);
  high = fold_convert_loc (loc, etype, high);
  exp = fold_convert_loc (loc, etype, exp);
}
  return build_range_check (loc, type, exp, 1, 0, high);
}
I was thinking whether e.g. range_check_type shouldn't have an extra
argument which would be false for build_range_check and true for
the use in match.pd, and if that arg is false, it would use
!TYPE_OVERFLOW_WRAPS (etype) and if that arg is true, it would
use !TYPE_UNSIGNED (etype) instead.

> > +   hi = fold_convert (etype, hi);
> > +   lo = fold_convert (etype, lo);
> > +   hi = const_binop (MINUS_EXPR, etype, hi, lo);
> > + }
> > +  }
> > +  (if (etype && hi && !TREE_OVERFLOW (hi))
> 
> I don't think you can have an overflow here anymore, now that etype is
> always unsigned and since you check the input (doesn't hurt though).

If const_binop for unsigned etype will never return NULL nor TREE_OVERFLOW
on the result, then that can surely go.  But again, I'm not 100% sure.
> 
> > +   (if (code == EQ_EXPR)
> > +   (le (minus (convert:etype @0) { lo; }) { hi; })
> > +   (gt (minus (convert:etype @0) { lo; }) { hi; })

Jakub


Re: [PATCH] Move fold_div_compare optimization to match.pd (PR tree-optimization/81346)

2017-07-18 Thread Marc Glisse

On Tue, 18 Jul 2017, Jakub Jelinek wrote:


+/* X / C1 op C2 into a simple range test.  */
+(for cmp (simple_comparison)
+ (simplify
+  (cmp (trunc_div:s @0 INTEGER_CST@1) INTEGER_CST@2)
+  (if (INTEGRAL_TYPE_P (TREE_TYPE (@0))
+   && integer_nonzerop (@1)
+   && !TREE_OVERFLOW (@1)
+   && !TREE_OVERFLOW (@2))


(not specific to this patch)
I wonder if we should check TREE_OVERFLOW for the input that way in many
more transformations in match.pd, or never, or how to decide in which
cases to do it...


+   (with { tree lo, hi; bool neg_overflow;
+  enum tree_code code = fold_div_compare (cmp, @1, @2, , ,
+  _overflow); }
+(switch
+ (if (code == LT_EXPR || code == GE_EXPR)
+   (if (TREE_OVERFLOW (lo))
+   { build_int_cst (type, (code == LT_EXPR) ^ neg_overflow); }
+   (if (code == LT_EXPR)
+(lt @0 { lo; })
+(ge @0 { lo; }
+ (if (code == LE_EXPR || code == GT_EXPR)
+   (if (TREE_OVERFLOW (hi))
+   { build_int_cst (type, (code == LE_EXPR) ^ neg_overflow); }
+   (if (code == LE_EXPR)
+(le @0 { hi; })
+(gt @0 { hi; }
+ (if (!lo && !hi)
+  { build_int_cst (type, code == NE_EXPR); })
+ (if (code == EQ_EXPR && !hi)
+  (ge @0 { lo; }))
+ (if (code == EQ_EXPR && !lo)
+  (le @0 { hi; }))
+ (if (code == NE_EXPR && !hi)
+  (lt @0 { lo; }))
+ (if (code == NE_EXPR && !lo)
+  (gt @0 { hi; }))
+ (if (GENERIC)
+  { build_range_check (UNKNOWN_LOCATION, type, @0, code == EQ_EXPR,
+  lo, hi); })
+ (with
+  {
+   tree etype = range_check_type (TREE_TYPE (@0));
+   if (etype)
+ {
+   if (! TYPE_UNSIGNED (etype))
+ etype = unsigned_type_for (etype);


Now that you enforce unsignedness, can you think of cases where going
through range_check_type is useful compared to
  tree etype = unsigned_type_for (TREE_TYPE (@0));
? I can propose that trivial patch as a follow-up if you like.


+   hi = fold_convert (etype, hi);
+   lo = fold_convert (etype, lo);
+   hi = const_binop (MINUS_EXPR, etype, hi, lo);
+ }
+  }
+  (if (etype && hi && !TREE_OVERFLOW (hi))


I don't think you can have an overflow here anymore, now that etype is
always unsigned and since you check the input (doesn't hurt though).


+   (if (code == EQ_EXPR)
+   (le (minus (convert:etype @0) { lo; }) { hi; })
+   (gt (minus (convert:etype @0) { lo; }) { hi; })


--
Marc Glisse


Re: [PATCH] Implement one optimization from build_range_check in match.pd (PR tree-optimization/81346)

2017-07-18 Thread Martin Sebor

--- gcc/match.pd.jj 2017-07-17 16:25:20.0 +0200
+++ gcc/match.pd2017-07-18 12:32:52.896924558 +0200
@@ -1125,6 +1125,19 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
&& wi::neg_p (@1, TYPE_SIGN (TREE_TYPE (@1
 (cmp @2 @0))

+/* (X - 1U) <= INT_MAX-1U into (int) X > 0.  */


Since the transformation applies to other types besides int I
suggest to make it clear in the comment.  E.g., something like:

  /* (X - 1U) <= TYPE_MAX - 1U into (TYPE) X > 0 for any integer
 TYPE.  */

(with spaces around all the operators as per GCC coding style).

Martin



Re: [PATCH] Implement one optimization from build_range_check in match.pd (PR tree-optimization/81346)

2017-07-18 Thread Marc Glisse

On Tue, 18 Jul 2017, Jakub Jelinek wrote:


In the PR Marc noted that the optimization might be useful even for
constants other than 1, by transforming
x+C1 <= C2 if unsigned and C2-C1==INT_MAX into (int)x > (int)(-1-C1).


(int)x >= (int)(-C1) might be easier (and more valid, except that the only 
case where that makes a difference seems to be when C2==UINT_MAX, in which 
case we could hope not to reach this transformation).



Shall I do that immediately, or incrementally?


I vote for "incremental", unless someone finds an issue with your current 
patch.


Shall we also change build_range_check to do that (i.e. drop the 
integer_onep above and use right etype constant?


I would rather consider build_range_check legacy and avoid modifying it 
too much, but if you are motivated...



+   && TYPE_PRECISION (TREE_TYPE (@0)) > 1


I see you've been bitten in the past ;-)

--
Marc Glisse


Re: [PATCH] Implement one optimization from build_range_check in match.pd (PR tree-optimization/81346)

2017-07-18 Thread Martin Sebor

On 07/18/2017 09:43 AM, Jakub Jelinek wrote:

On Tue, Jul 18, 2017 at 09:31:11AM -0600, Martin Sebor wrote:

--- gcc/match.pd.jj 2017-07-17 16:25:20.0 +0200
+++ gcc/match.pd2017-07-18 12:32:52.896924558 +0200
@@ -1125,6 +1125,19 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
&& wi::neg_p (@1, TYPE_SIGN (TREE_TYPE (@1
 (cmp @2 @0))

+/* (X - 1U) <= INT_MAX-1U into (int) X > 0.  */


Since the transformation applies to other types besides int I
suggest to make it clear in the comment.  E.g., something like:

  /* (X - 1U) <= TYPE_MAX - 1U into (TYPE) X > 0 for any integer
 TYPE.  */

(with spaces around all the operators as per GCC coding style).


I think many of the match.pd comments are also not fully generic
to describe what it does, just to give an idea what it does.

...

Examples of other comments that "suffer" from similar lack of sufficient
genericity, but perhaps are good enough to let somebody understand it
quickly:


Sure, but that doesn't make them a good example to follow.  As
someone pointed out to me in code reviews, existing deviations
from the preferred style, whether documented or not, or lack of
clarity, aren't a license to add more.  Please take my suggestion
here in the same constructive spirit.

Martin


[PATCH, PR81464] Handle equal-argument loop exit phi in expand_omp_for_static_chunk

2017-07-18 Thread Tom de Vries

Hi,

this patch fixes PR81464, an ICE in ompexpssa.

The ICE occurs in expand_omp_for_static_chunk when we're trying to fix 
up a loop exit phi:

...
  # .MEM_88 = PHI <.MEM_86(46), .MEM_86(71)>
...

It's a loop exit phi with equal arguments, which means that the variable 
has the same value when the loop is executed, and when the loop is 
skipped, in other words, it's not modified in the loop.


The fixup code ICEs when it cannot find a loop header phi corresponding 
to the loop exit phi. But it's expected that there's no loop header phi, 
given that the variable is not modified in the loop.


The patch fixes the ICE by not trying to fix up this particular kind of 
loop exit phi.


Bootstrapped and reg-tested on x86_64.

OK for trunk?

Thanks,
- Tom
Handle equal-argument loop exit phi in expand_omp_for_static_chunk

2017-07-18  Tom de Vries  

	PR middle-end/81464
	* omp-expand.c (expand_omp_for_static_chunk): Handle equal-argument loop
	exit phi.

	* gfortran.dg/pr81464.f90: New test.

---
 gcc/omp-expand.c  |  4 
 gcc/testsuite/gfortran.dg/pr81464.f90 | 19 +++
 2 files changed, 23 insertions(+)

diff --git a/gcc/omp-expand.c b/gcc/omp-expand.c
index 929c530..63b91d7 100644
--- a/gcc/omp-expand.c
+++ b/gcc/omp-expand.c
@@ -4206,6 +4206,10 @@ expand_omp_for_static_chunk (struct omp_region *region,
 	  source_location locus;
 
 	  phi = psi.phi ();
+	  if (operand_equal_p (gimple_phi_arg_def (phi, 0),
+			   redirect_edge_var_map_def (vm), 0))
+	  continue;
+
 	  t = gimple_phi_result (phi);
 	  gcc_assert (t == redirect_edge_var_map_result (vm));
 
diff --git a/gcc/testsuite/gfortran.dg/pr81464.f90 b/gcc/testsuite/gfortran.dg/pr81464.f90
new file mode 100644
index 000..425cae9
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/pr81464.f90
@@ -0,0 +1,19 @@
+! { dg-do compile }
+! { dg-options "--param parloops-chunk-size=2 -ftree-parallelize-loops=2 -O1" }
+
+program main
+  implicit none
+  real, dimension(:,:),allocatable :: a, b, c
+  real :: sm
+
+  allocate (a(2,2), b(2,2), c(2,2))
+
+  call random_number(a)
+  call random_number(b)
+
+  c = matmul(a,b)
+  sm = sum(c)
+
+  deallocate(a,b,c)
+
+end program main


Re: [PATCH] Implement one optimization from build_range_check in match.pd (PR tree-optimization/81346)

2017-07-18 Thread Jakub Jelinek
On Tue, Jul 18, 2017 at 10:47:37AM -0600, Martin Sebor wrote:
> On 07/18/2017 09:43 AM, Jakub Jelinek wrote:
> > On Tue, Jul 18, 2017 at 09:31:11AM -0600, Martin Sebor wrote:
> > > > --- gcc/match.pd.jj 2017-07-17 16:25:20.0 +0200
> > > > +++ gcc/match.pd2017-07-18 12:32:52.896924558 +0200
> > > > @@ -1125,6 +1125,19 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> > > > && wi::neg_p (@1, TYPE_SIGN (TREE_TYPE (@1
> > > >  (cmp @2 @0))
> > > > 
> > > > +/* (X - 1U) <= INT_MAX-1U into (int) X > 0.  */
> > > 
> > > Since the transformation applies to other types besides int I
> > > suggest to make it clear in the comment.  E.g., something like:
> > > 
> > >   /* (X - 1U) <= TYPE_MAX - 1U into (TYPE) X > 0 for any integer
> > >  TYPE.  */
> > > 
> > > (with spaces around all the operators as per GCC coding style).
> > 
> > I think many of the match.pd comments are also not fully generic
> > to describe what it does, just to give an idea what it does.
> ...
> > Examples of other comments that "suffer" from similar lack of sufficient
> > genericity, but perhaps are good enough to let somebody understand it
> > quickly:
> 
> Sure, but that doesn't make them a good example to follow.  As
> someone pointed out to me in code reviews, existing deviations
> from the preferred style, whether documented or not, or lack of
> clarity, aren't a license to add more.  Please take my suggestion
> here in the same constructive spirit.

The point I'm trying to make is that in order to make the
comments generic enough they will be too large and too hard to parse.
IMHO sometimes it is better to just give an example of what it
does, and those who want to read all the details on what exactly it does,
there is the simplify below it with all the details.
Consider another randomly chosen comment:
 /* hypot(x,x) -> fabs(x)*sqrt(2).  */
This also isn't describing generically what it does, because
it handles not just hypot ->fabs*sqrt, but also hypotl ->fabsl*sqrtl,
hypotf ->fabsf*sqrtf, maybe others.

In the end, it is Richard's call on what he wants to have in match.pd
comments.

Jakub


Re: [PATCH, PR81464] Handle equal-argument loop exit phi in expand_omp_for_static_chunk

2017-07-18 Thread Jakub Jelinek
On Tue, Jul 18, 2017 at 06:48:56PM +0200, Tom de Vries wrote:
> Hi,
> 
> this patch fixes PR81464, an ICE in ompexpssa.
> 
> The ICE occurs in expand_omp_for_static_chunk when we're trying to fix up a
> loop exit phi:
> ...
>   # .MEM_88 = PHI <.MEM_86(46), .MEM_86(71)>
> ...

That is something that should be cleaned up by some phi opt, but if it has
been introduced during the parloops pass or too early before that, we
probably should deal with it.

> --- a/gcc/omp-expand.c
> +++ b/gcc/omp-expand.c
> @@ -4206,6 +4206,10 @@ expand_omp_for_static_chunk (struct omp_region *region,
> source_location locus;
>  
> phi = psi.phi ();
> +   if (operand_equal_p (gimple_phi_arg_def (phi, 0),
> +redirect_edge_var_map_def (vm), 0))
> +   continue;

Wrong formatting, please remove 2 spaces before continue;

Otherwise LGTM.

Jakub


Re: [PATCH] match.pd: reassociate multiplications with constants

2017-07-18 Thread Alexander Monakov
On Mon, 17 Jul 2017, Alexander Monakov wrote:
> On Mon, 17 Jul 2017, Marc Glisse wrote:
> > > +/* Combine successive multiplications.  Similar to above, but handling
> > > +   overflow is different.  */
> > > +(simplify
> > > + (mult (mult @0 INTEGER_CST@1) INTEGER_CST@2)
> > > + (with {
> > > +   bool overflow_p;
> > > +   wide_int mul = wi::mul (@1, @2, TYPE_SIGN (type), _p);
> > > +  }
> > > +  (if (!overflow_p || TYPE_OVERFLOW_WRAPS (type))
> > 
> > I wonder if there are cases where this would cause trouble for saturating
> > integers. The only case I can think of is when @2 is -1, but that's likely
> > simplified to NEGATE_EXPR first.
> 
> Ah, yes, I think if @2 is -1 or 0 then we should not attempt this transform 
> for
> either saturating or sanitized types, just like in the first patch. I think
> wrapping the 'with' with 'if (!integer_minus_onep (@2) && !integer_zerop 
> (@2))'
> works, since as you say it should become a negate/zero anyway?

Updated patch:

* match.pd ((X * CST1) * CST2): Simplify to X * (CST1 * CST2).
testsuite:
* gcc.dg/tree-ssa/assoc-2.c: Enhance.
* gcc.dg/tree-ssa/slsr-4.c: Adjust.

diff --git a/gcc/match.pd b/gcc/match.pd
index 36045f1..0bb5541 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -283,6 +283,20 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 || mul != wi::min_value (TYPE_PRECISION (type), SIGNED))
  { build_zero_cst (type); })

+/* Combine successive multiplications.  Similar to above, but handling
+   overflow is different.  */
+(simplify
+ (mult (mult @0 INTEGER_CST@1) INTEGER_CST@2)
+ /* More specific rules can handle 0 and -1; skip them here to avoid
+wrong transformations for sanitized and saturating types.  */
+ (if (!integer_zerop (@2) && !integer_minus_onep (@2))
+  (with {
+bool overflow_p;
+wide_int mul = wi::mul (@1, @2, TYPE_SIGN (type), _p);
+   }
+   (if (!overflow_p || TYPE_OVERFLOW_WRAPS (type))
+(mult @0 { wide_int_to_tree (type, mul); })
+
 /* Optimize A / A to 1.0 if we don't care about
NaNs or Infinities.  */
 (simplify
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/assoc-2.c 
b/gcc/testsuite/gcc.dg/tree-ssa/assoc-2.c
index a92c882..cc0e9d4 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/assoc-2.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/assoc-2.c
@@ -5,4 +5,15 @@ int f0(int a, int b){
   return a * 33 * b * 55;
 }

-/* { dg-final { scan-tree-dump-times "mult_expr" 2 "gimple" } } */
+int f1(int a){
+  a *= 33;
+  return a * 55;
+}
+
+int f2(int a, int b){
+  a *= 33;
+  return a * b * 55;
+}
+
+/* { dg-final { scan-tree-dump-times "mult_expr" 7 "gimple" } } */
+/* { dg-final { scan-tree-dump-times "mult_expr" 5 "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/slsr-4.c 
b/gcc/testsuite/gcc.dg/tree-ssa/slsr-4.c
index 17d7b4c..1e943b7 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/slsr-4.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/slsr-4.c
@@ -23,13 +23,9 @@ f (int i)
   foo (y);
 }
 
-/* { dg-final { scan-tree-dump-times "\\* 4" 1 "slsr" } } */
-/* { dg-final { scan-tree-dump-times "\\* 10" 1 "slsr" } } */
-/* { dg-final { scan-tree-dump-times "\\+ 20;" 1 "slsr" } } */
+/* { dg-final { scan-tree-dump-times "\\* 40" 1 "slsr" } } */
 /* { dg-final { scan-tree-dump-times "\\+ 200" 1 "slsr" } } */
-/* { dg-final { scan-tree-dump-times "\\- 16;" 1 "slsr" } } */
 /* { dg-final { scan-tree-dump-times "\\- 160" 1 "slsr" } } */
-/* { dg-final { scan-tree-dump-times "\\* 4" 1 "optimized" } } */
-/* { dg-final { scan-tree-dump-times "\\* 10" 1 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "\\* 40" 1 "optimized" } } */
 /* { dg-final { scan-tree-dump-times "\\+ 200" 1 "optimized" } } */
 /* { dg-final { scan-tree-dump-times "\\+ 40" 1 "optimized" } } */


Re: [PATCH][RFA/RFC] Stack clash mitigation 0/9

2017-07-18 Thread Jeff Law
On 07/13/2017 03:26 AM, Christophe Lyon wrote:
> I have executed a validation of your patch series on aarch64 and arm
> targets, and I have minor comments.
> 
> On arm, all new tests are unsupported, as expected.
Good.

> On aarch64-linux, the new tests pass, but they fail on aarch64-elf:
>   - FAIL appears  [ => FAIL]:
That's really strange.  I just tried that here and the only two failures
I got were stack-check-7 and stack-check-8 which failed because I didn't
have a cross assembler installed.

> 
> 
> As I noticed that you used dg-require-effective-target
> stack_clash_protected instead of
> dg-require-stack-check "clash" that I recently committed, I also tried
> with the later.
Yea.  Ultimately I decided that unless the target had explicitly added
support for stack clash protection that the tests should be considered
UNRESOLVED, even if the port had partial protection (as is the case with
ARM).  Thus I ended up with a new effective target test.  I should have
mentioned that in the cover letter.

Thanks,


Jeff


Re: [PATCH] Implement one optimization from build_range_check in match.pd (PR tree-optimization/81346)

2017-07-18 Thread Marc Glisse

On Tue, 18 Jul 2017, Jakub Jelinek wrote:


On Tue, Jul 18, 2017 at 05:35:54PM +0200, Marc Glisse wrote:

On Tue, 18 Jul 2017, Jakub Jelinek wrote:


In the PR Marc noted that the optimization might be useful even for
constants other than 1, by transforming
x+C1 <= C2 if unsigned and C2-C1==INT_MAX into (int)x > (int)(-1-C1).


(int)x >= (int)(-C1) might be easier (and more valid, except that the only
case where that makes a difference seems to be when C2==UINT_MAX, in which
case we could hope not to reach this transformation).


Don't we canonicalize that (int)x >= (int)(-C1) to (int)x > (int)(-1-C1)
immediately though?


We probably don't canonicalize (int)x >= INT_MIN to (int)x > INT_MAX ;-) 
(what I was suggesting essentially delegates the check for INT_MIN or 
overflow to the canonicalization code)


--
Marc Glisse


Re: [PATCH] Add self as maintainer of D front-end and libphobos

2017-07-18 Thread Gerald Pfeifer
On Thu, 13 Jul 2017, Iain Buclaw wrote:
>> As per message on the D language being accepted, this adds myself as a
>> maintainer of the D front-end and libphobos runtime library.
> I may have to request a ping here.

I would commit this when the first bits of D go in.  (Technically you 
could commit this now, though, I guess; that's just my recommendation.)

Gerald


Re: [PATCH] Implement one optimization from build_range_check in match.pd (PR tree-optimization/81346)

2017-07-18 Thread Jakub Jelinek
On Tue, Jul 18, 2017 at 09:31:11AM -0600, Martin Sebor wrote:
> > --- gcc/match.pd.jj 2017-07-17 16:25:20.0 +0200
> > +++ gcc/match.pd2017-07-18 12:32:52.896924558 +0200
> > @@ -1125,6 +1125,19 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> > && wi::neg_p (@1, TYPE_SIGN (TREE_TYPE (@1
> >  (cmp @2 @0))
> > 
> > +/* (X - 1U) <= INT_MAX-1U into (int) X > 0.  */
> 
> Since the transformation applies to other types besides int I
> suggest to make it clear in the comment.  E.g., something like:
> 
>   /* (X - 1U) <= TYPE_MAX - 1U into (TYPE) X > 0 for any integer
>  TYPE.  */
> 
> (with spaces around all the operators as per GCC coding style).

I think many of the match.pd comments are also not fully generic
to describe what it does, just to give an idea what it does.
The above isn't correct either, because it isn't for any integer TYPE,
there needs to be a signed and corresponding unsigned type involved,
X is of the unsigned type, so is the 1.  And TYPE_MAX is actually
the signed type's maximum cast to unsigned type.  And the reason for not putting
spaces around - in the second case was an attempt to give a hint that
it is comparison against a INT_MAX-1U constant, not another subtraction.
After all, the pattern doesn't handle subtraction, because that isn't
what is in the IL, but addition, i.e. X + -1U.
And, the <= -> > is just one possibility, the pattern also handles
> -> <=.

Examples of other comments that "suffer" from similar lack of sufficient
genericity, but perhaps are good enough to let somebody understand it
quickly:
/* Avoid this transformation if C is INT_MIN, i.e. C == -C.  */
  /* Avoid this transformation if X might be INT_MIN or
 Y might be -1, because we would then change valid
 INT_MIN % -(-1) into invalid INT_MIN % -1.  */
   /* If the constant operation overflows we cannot do the transform
  directly as we would introduce undefined overflow, for example
  with (a - 1) + INT_MIN.  */
  /* X+INT_MAX+1 is X-INT_MIN.  */

Jakub


Re: [PATCH] Implement one optimization from build_range_check in match.pd (PR tree-optimization/81346)

2017-07-18 Thread Jakub Jelinek
On Tue, Jul 18, 2017 at 05:35:54PM +0200, Marc Glisse wrote:
> On Tue, 18 Jul 2017, Jakub Jelinek wrote:
> 
> > In the PR Marc noted that the optimization might be useful even for
> > constants other than 1, by transforming
> > x+C1 <= C2 if unsigned and C2-C1==INT_MAX into (int)x > (int)(-1-C1).
> 
> (int)x >= (int)(-C1) might be easier (and more valid, except that the only
> case where that makes a difference seems to be when C2==UINT_MAX, in which
> case we could hope not to reach this transformation).

Don't we canonicalize that (int)x >= (int)(-C1) to (int)x > (int)(-1-C1)
immediately though?

> > +   && TYPE_PRECISION (TREE_TYPE (@0)) > 1
> 
> I see you've been bitten in the past ;-)

Many times ;)

Jakub


[PATCH] Fix infinite recursion with div-by-zero (PR middle-end/70992)

2017-07-18 Thread Marek Polacek
We ended up in infinite recursion between extract_muldiv_1 and
fold_plusminus_mult_expr, because one turns this expression into the other
and the other does the reverse:

((2147483648 / 0) * 2) + 2 <-> 2 * (2147483648 / 0 + 1)

I tried (unsuccessfully) to fix it in either extract_muldiv_1 or
fold_plusminus_mult_expr, but in the end I went with just turning (x / 0) + A
to x / 0 (and similarly for %), because with that undefined division we can do
anything and this fixes the issue.  Any better ideas?

Bootstrapped/regtested on x86_64-linux, ok for trunk?

2017-07-18  Marek Polacek  

PR middle-end/70992
* fold-const.c (fold_binary_loc): Fold (x / 0) + A to x / 0,
and (x % 0) + A to x % 0.

* gcc.dg/torture/pr70992.c: New test.
* gcc.dg/torture/pr70992-2.c: New test.

diff --git gcc/fold-const.c gcc/fold-const.c
index 1bcbbb58154..9abdc9a8c20 100644
--- gcc/fold-const.c
+++ gcc/fold-const.c
@@ -9387,6 +9387,12 @@ fold_binary_loc (location_t loc,
  TREE_TYPE (arg0), arg0,
  cst0));
}
+ /* Adding anything to a division-by-zero makes no sense and
+can confuse extract_muldiv and fold_plusminus_mult_expr.  */
+ else if ((TREE_CODE (arg0) == TRUNC_DIV_EXPR
+   || TREE_CODE (arg0) == TRUNC_MOD_EXPR)
+  && integer_zerop (TREE_OPERAND (arg0, 1)))
+   return fold_convert_loc (loc, type, arg0);
}
 
   /* Handle (A1 * C1) + (A2 * C2) with A1, A2 or C1, C2 being the same or
diff --git gcc/testsuite/gcc.dg/torture/pr70992-2.c 
gcc/testsuite/gcc.dg/torture/pr70992-2.c
index e69de29bb2d..c5d2c5f2683 100644
--- gcc/testsuite/gcc.dg/torture/pr70992-2.c
+++ gcc/testsuite/gcc.dg/torture/pr70992-2.c
@@ -0,0 +1,9 @@
+/* PR middle-end/70992 */
+/* { dg-do compile } */
+
+unsigned int *od;
+int
+fn (void)
+{
+  return (0 % 0 + 1) * *od * 2; /* { dg-warning "division by zero" } */
+}
diff --git gcc/testsuite/gcc.dg/torture/pr70992.c 
gcc/testsuite/gcc.dg/torture/pr70992.c
index e69de29bb2d..56728e09d1b 100644
--- gcc/testsuite/gcc.dg/torture/pr70992.c
+++ gcc/testsuite/gcc.dg/torture/pr70992.c
@@ -0,0 +1,41 @@
+/* PR middle-end/70992 */
+/* { dg-do compile } */
+
+typedef unsigned int uint32_t;
+typedef int int32_t;
+
+uint32_t
+fn (uint32_t so)
+{
+  return (so + so) * (0x8000 / 0 + 1); /* { dg-warning "division by zero" 
} */
+}
+
+uint32_t
+fn5 (uint32_t so)
+{
+  return (0x8000 / 0 + 1) * (so + so); /* { dg-warning "division by zero" 
} */
+}
+
+uint32_t
+fn6 (uint32_t so)
+{
+  return (0x8000 / 0 - 1) * (so + so); /* { dg-warning "division by zero" 
} */
+}
+
+uint32_t
+fn2 (uint32_t so)
+{
+  return (so + so) * (0x8000 / 0 - 1); /* { dg-warning "division by zero" 
} */
+}
+
+int32_t
+fn3 (int32_t so)
+{
+  return (so + so) * (0x8000 / 0 + 1); /* { dg-warning "division by zero" 
} */
+}
+
+int32_t
+fn4 (int32_t so)
+{
+  return (so + so) * (0x8000 / 0 - 1); /* { dg-warning "division by zero" 
} */
+}

Marek


[PATCH, i386]: Fix PR 81471, ICE in curr_insn_transform

2017-07-18 Thread Uros Bizjak
Hello!

Attached patch tightens rorx operand 2 predicate to allow only
const_int RTXes that are also allowed by the operand constraint. This
prevents combine to propagate unsupported const_ints to the pattern.

2017-07-18  Uros Bizjak  

PR target/81471
* config/i386/i386.md (rorx_immediate_operand): New mode attribute.
(*bmi2_rorx3_1): Use rorx_immediate_operand as
operand 2 predicate.
(*bmi2_rorxsi3_1_zext): Use const_0_to_31_operand as
operand 2 predicate.
(ror,rol -> rorx splitters): Use const_int_operand as
operand 2 predicate.

testsuite/ChangeLog:

2017-07-18  Uros Bizjak  

PR target/81471
* gcc.target/i386/pr81471.c: New test.

Bootstrapped and regression tested on x86_64-linux-gnu {,-m32}.

Committed to mainline SVN, will be backported to other release branches.

Uros.
Index: config/i386/i386.md
===
--- config/i386/i386.md (revision 250278)
+++ config/i386/i386.md (working copy)
@@ -10732,10 +10732,15 @@
   split_double_mode (mode, [0], 1, [4], [5]);
 })
 
+(define_mode_attr rorx_immediate_operand
+   [(SI "const_0_to_31_operand")
+(DI "const_0_to_63_operand")])
+
 (define_insn "*bmi2_rorx3_1"
   [(set (match_operand:SWI48 0 "register_operand" "=r")
-   (rotatert:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
-   (match_operand:QI 2 "immediate_operand" "")))]
+   (rotatert:SWI48
+ (match_operand:SWI48 1 "nonimmediate_operand" "rm")
+ (match_operand:QI 2 "" "")))]
   "TARGET_BMI2"
   "rorx\t{%2, %1, %0|%0, %1, %2}"
   [(set_attr "type" "rotatex")
@@ -10778,7 +10783,7 @@
 (define_split
   [(set (match_operand:SWI48 0 "register_operand")
(rotate:SWI48 (match_operand:SWI48 1 "nonimmediate_operand")
- (match_operand:QI 2 "immediate_operand")))
+ (match_operand:QI 2 "const_int_operand")))
(clobber (reg:CC FLAGS_REG))]
   "TARGET_BMI2 && reload_completed"
   [(set (match_dup 0)
@@ -10792,7 +10797,7 @@
 (define_split
   [(set (match_operand:SWI48 0 "register_operand")
(rotatert:SWI48 (match_operand:SWI48 1 "nonimmediate_operand")
-   (match_operand:QI 2 "immediate_operand")))
+   (match_operand:QI 2 "const_int_operand")))
(clobber (reg:CC FLAGS_REG))]
   "TARGET_BMI2 && reload_completed"
   [(set (match_dup 0)
@@ -10802,7 +10807,7 @@
   [(set (match_operand:DI 0 "register_operand" "=r")
(zero_extend:DI
  (rotatert:SI (match_operand:SI 1 "nonimmediate_operand" "rm")
-  (match_operand:QI 2 "immediate_operand" "I"]
+  (match_operand:QI 2 "const_0_to_31_operand" "I"]
   "TARGET_64BIT && TARGET_BMI2"
   "rorx\t{%2, %1, %k0|%k0, %1, %2}"
   [(set_attr "type" "rotatex")
@@ -10846,7 +10851,7 @@
   [(set (match_operand:DI 0 "register_operand")
(zero_extend:DI
  (rotate:SI (match_operand:SI 1 "nonimmediate_operand")
-(match_operand:QI 2 "immediate_operand"
+(match_operand:QI 2 "const_int_operand"
(clobber (reg:CC FLAGS_REG))]
   "TARGET_64BIT && TARGET_BMI2 && reload_completed"
   [(set (match_dup 0)
@@ -10861,7 +10866,7 @@
   [(set (match_operand:DI 0 "register_operand")
(zero_extend:DI
  (rotatert:SI (match_operand:SI 1 "nonimmediate_operand")
-  (match_operand:QI 2 "immediate_operand"
+  (match_operand:QI 2 "const_int_operand"
(clobber (reg:CC FLAGS_REG))]
   "TARGET_64BIT && TARGET_BMI2 && reload_completed"
   [(set (match_dup 0)
Index: testsuite/gcc.target/i386/pr81471.c
===
--- testsuite/gcc.target/i386/pr81471.c (nonexistent)
+++ testsuite/gcc.target/i386/pr81471.c (working copy)
@@ -0,0 +1,13 @@
+/* PR target/81471 */
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mbmi2" } */
+
+static inline unsigned int rotl (unsigned int x, int k)
+{
+  return (x << k) | (x >> (32 - k));
+}
+
+unsigned long long test (unsigned int z)
+{
+  return rotl (z, 55);
+}


Re: [PATCH 1/3] matching tokens: c-family parts

2017-07-18 Thread Marek Polacek
On Tue, Jul 11, 2017 at 11:24:43AM -0400, David Malcolm wrote:
> OK for trunk? (assuming the rest is approved)
 
This is ok.  I'll have to play with this some more before I approve the C part.

Thanks,

Marek


Re: [PATCH] Kill TYPE_METHODS debug 1/9

2017-07-18 Thread Jim Wilson

On 07/14/2017 09:48 AM, Nathan Sidwell wrote:

This changes dbxout and dwarf2out.



Oh, the patch series survived a bootstrap on x86_64-linux.


Changes to the debug info files requires a gdb make check with and 
without the patch to check for regressions.  Since you are changing both 
dbxout and dwarf2out, you would need to do this twice, once for each 
debug info type.  Testing dbxout may be a little tricky, since few 
systems still use this by default.  Maybe you can hack an x86_64-linux 
build to use dbxout by default to test it.


Otherwise, this looks OK.

Jim




Re: RFC: stack/heap collision vulnerability and mitigation with GCC

2017-07-18 Thread Florian Weimer
* Jeff Law:

> On 06/28/2017 12:45 AM, Florian Weimer wrote:
>> * Richard Earnshaw:
>> 
>>> I can't help but feel there's a bit of a goode olde mediaeval witch hunt
>>> going on here.  As Wilco points out, we can never defend against a
>>> function that is built without probe operations but skips the entire
>>> guard zone.  The only defence there is a larger guard zone, but how big
>>> do you make it?
>> 
>> Right.  And in the exploitable cases we have seen, there is a
>> dynamically sized allocation which the attacker can influence, so it
>> seems fairly likely that in a partially hardended binary, there could
>> be another call stack which is exploitable, with a non-hardened
>> function at the top.
>> 
>> I think a probing scheme which assumes that if the caller moves the
>> stack pointer into more than half of the guard area, that's the
>> callers fault would be totally appropriate in practice.  If possible,
>> callee-only probing for its own stack usage is preferable, but not if
>> it means instrumenting all functions which use the stack.

> That position is a surprise Florian :-)  I would have expected a full
> protection position, particularly after the discussions we've had about
> noreturn functions.

I might have gotten carried away on that one.

I really want stack probing to be enabled by default across the board,
so this becomes a non-issue because the caller has been compiled with
probing as well.  However, in order to get there, we need extremely
cheap instrumentation, and if we cover any hypthetical corner case,
this might force us to instrument all functions, and that again
defeats the goal of enabling it by default.

Does that make sense?

> I guess the difference in your position is driven by the relatively high
> frequency of probing worst case assumptions are going to have on aarch64
> with a relatively small vulnerability surface?

Right, and I expect that the limited form of probing can be enabled by
default, so that eventually, the caller will take care of its share of
probing (i.e., it has never moved the stack pointer more than half
into the guard page, or whatever caller/callee split of
responsibilities we come up with).

> Which is a fairly stark contrast to the noreturn situation where it
> rarely, if ever comes up in practice and never on a hot path?

I've since researched the noreturn situation a bit more.  We never
turn noreturn functions into tail calls because the intent is to
preserve the call stack, in the expectation that either the noreturn
function itself performs a backtrace, or that someone later looks at
the coredump.  So the noreturn risk just doesn't seem to be there.


Re: [PATCH] PR libstdc++/81064 fix versioned namespace

2017-07-18 Thread François Dumont

On 18/07/2017 16:03, Ville Voutilainen wrote:

On 18 July 2017 at 16:31, Jonathan Wakely  wrote:

This is quite a huge change, so I'd like to wait and see if anyone
else has any opinion on it.

Personally I think it's necessary (assuming I understand the PR
correctly) and so if nobody objects I think we should go with this
change for GCC 8. Let's give it a few days for comments (and I'll
finish going through the patch carefully).


Looks like the right approach to me. I haven't looked at the patch in
detail, but the main gist
of it is something that we should certainly do for GCC 8. The Elf says "aye".


Thanks for the feedbacks.

However I've been a little bit too confident regarding its validation. 
There are unexpected failures when versioned namespace is activated.


Most of them are related to its usage with experimental namespace. I 
haven't yet fully consider it but just in case: do we really need to 
have versioned namespace on top of experimental namespace ?


François




Re: [PATCH, PR81464] Handle equal-argument loop exit phi in expand_omp_for_static_chunk

2017-07-18 Thread Tom de Vries

On 07/18/2017 06:59 PM, Jakub Jelinek wrote:

On Tue, Jul 18, 2017 at 06:48:56PM +0200, Tom de Vries wrote:

Hi,

this patch fixes PR81464, an ICE in ompexpssa.

The ICE occurs in expand_omp_for_static_chunk when we're trying to fix up a
loop exit phi:
...
   # .MEM_88 = PHI <.MEM_86(46), .MEM_86(71)>
...


That is something that should be cleaned up by some phi opt, but if it has
been introduced during the parloops pass or too early before that, we
probably should deal with it.



I checked, it's introduced during the parloops pass.


--- a/gcc/omp-expand.c
+++ b/gcc/omp-expand.c
@@ -4206,6 +4206,10 @@ expand_omp_for_static_chunk (struct omp_region *region,
  source_location locus;
  
  	  phi = psi.phi ();

+ if (operand_equal_p (gimple_phi_arg_def (phi, 0),
+  redirect_edge_var_map_def (vm), 0))
+ continue;


Wrong formatting, please remove 2 spaces before continue;

Otherwise LGTM.



Updated and committed.

Thanks,
- Tom


Re: [PATCH 4/6] lra-assigns.c: give up on qsort checking in assign_by_spills

2017-07-18 Thread Yuri Gribov
On Sat, Jul 15, 2017 at 9:47 PM, Alexander Monakov  wrote:
> The reload_pseudo_compare_func comparator, when used from assign_by_spills,
> can be non-transitive, indicating A < B < C < A if both A and C satisfy
> !bitmap_bit_p (_reload_pseudos, rAC), but B does not.
>
> This function was originally a proper comparator, and the problematic
> clause was added to fix PR 57878:
> https://gcc.gnu.org/ml/gcc-patches/2013-07/msg00732.html
>
> That the comparator is invalid implies that that PR, if it still exists,
> can reappear (but probably under more complicated circumstances).
>
> This looks like a sensitive area, so disabling checking is the only
> obvious approach.

May make sense to add PR rtl-optimization/68988 annotation to changelog.

> * lra-assigns.c (reload_pseudo_compare_func): Add a FIXME.
> (assign_by_spills): Use non-checking qsort.
> ---
>  gcc/lra-assigns.c | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
>
> diff --git a/gcc/lra-assigns.c b/gcc/lra-assigns.c
> index 2aadeef..a67d1a6 100644
> --- a/gcc/lra-assigns.c
> +++ b/gcc/lra-assigns.c
> @@ -217,6 +217,7 @@ reload_pseudo_compare_func (const void *v1p, const void 
> *v2p)
>/* The code below executes rarely as nregs == 1 in most cases.
>  So we should not worry about using faster data structures to
>  check reload pseudos.  */
> +  /* FIXME this makes comparator non-transitive and thus invalid.  */
>&& ! bitmap_bit_p (_reload_pseudos, r1)
>&& ! bitmap_bit_p (_reload_pseudos, r2))
>  return diff;
> @@ -1384,7 +1385,7 @@ assign_by_spills (void)
>bitmap_ior_into (_reload_pseudos, _optional_reload_pseudos);
>for (iter = 0; iter <= 1; iter++)
>  {
> -  qsort (sorted_pseudos, n, sizeof (int), reload_pseudo_compare_func);
> +  qsort_nochk (sorted_pseudos, n, sizeof (int), 
> reload_pseudo_compare_func);
>nfails = 0;
>for (i = 0; i < n; i++)
> {
> --
> 1.8.3.1
>


[PATCH 2/2] combine: Fix for PR81423

2017-07-18 Thread Segher Boessenkool
We here have an AND of a SUBREG of an LSHIFTRT.  If that SUBREG is
paradoxical, the extraction we form is the length of the size of the
inner mode, which includes some bits that should not be in the result.
Just give up in that case.

Tested on powerpc64-linux {-m32,-m64} and on x86_64-linux.  Committing
to trunk.


Segher


2018-07-18  Segher Boessenkool  

PR rtl-optimization/81423
* combine.c (make_compound_operation_int): Don't try to optimize
the AND of a SUBREG of an LSHIFTRT if that SUBREG is paradoxical.

---
 gcc/combine.c | 15 +++
 1 file changed, 3 insertions(+), 12 deletions(-)

diff --git a/gcc/combine.c b/gcc/combine.c
index c5200db..c486f12 100644
--- a/gcc/combine.c
+++ b/gcc/combine.c
@@ -7990,18 +7990,9 @@ make_compound_operation_int (machine_mode mode, rtx 
*x_ptr,
 XEXP (inner_x0, 1),
 i, 1, 0, in_code == COMPARE);
 
- if (new_rtx)
-   {
- /* If we narrowed the mode when dropping the subreg, then
-we must zero-extend to keep the semantics of the AND.  */
- if (GET_MODE_SIZE (inner_mode) >= GET_MODE_SIZE (mode))
-   ;
- else if (SCALAR_INT_MODE_P (inner_mode))
-   new_rtx = simplify_gen_unary (ZERO_EXTEND, mode,
- new_rtx, inner_mode);
- else
-   new_rtx = NULL;
-   }
+ /* If we narrowed the mode when dropping the subreg, then we lose.  */
+ if (GET_MODE_SIZE (inner_mode) < GET_MODE_SIZE (mode))
+   new_rtx = NULL;
 
  /* If that didn't give anything, see if the AND simplifies on
 its own.  */
-- 
1.9.3



[PATCH 1/2] simplify-rtx: The truncation of an IOR can have all bits set (PR81423)

2017-07-18 Thread Segher Boessenkool
... if it is an IOR with a constant with all bits set in the mode
that is truncated to, for example.  Handle that case.

With this patch the problematic situation for the PR81423 testcase
isn't even reached; but the next patch fixes that anyway.

Bootstrapped and tested on powerpc64-linux {-m32,-m64} and on
x86_64-linux.  Is this okay for trunk?


Segher


2017-07-18  Segher Boessenkool  

PR rtl-optimization/81423
* simplify-rtx.c (simplify_truncation): Handle truncating an IOR
with a constant that is -1 in the truncated to mode.

---
 gcc/simplify-rtx.c | 9 +
 1 file changed, 9 insertions(+)

diff --git a/gcc/simplify-rtx.c b/gcc/simplify-rtx.c
index 3bce329..ef41479 100644
--- a/gcc/simplify-rtx.c
+++ b/gcc/simplify-rtx.c
@@ -857,6 +857,15 @@ simplify_truncation (machine_mode mode, rtx op,
 return simplify_gen_unary (TRUNCATE, mode, XEXP (op, 0),
   GET_MODE (XEXP (op, 0)));
 
+  /* (truncate:A (ior X C)) is (const_int -1) if C is equal to that already,
+ in mode A.  */
+  if (GET_CODE (op) == IOR
+  && SCALAR_INT_MODE_P (mode)
+  && SCALAR_INT_MODE_P (op_mode)
+  && CONST_INT_P (XEXP (op, 1))
+  && trunc_int_for_mode (INTVAL (XEXP (op, 1)), mode) == -1)
+return constm1_rtx;
+
   return NULL_RTX;
 }
 
-- 
1.9.3



[PATCH], Update cpu-builtin-1.c test on PowerPC

2017-07-18 Thread Michael Meissner
This patch modifies the change I made on July 12th.  It modifies the test for
the __builtin_cpu_is and __builtin_cpu_supports built-in functions to use an
#ifdef instead of target-requires for doing the tests.  One motavation is to
make the back port to GCC 6/7 easier, as I won't have to back port the change
to add the target option ppc_cpu_supports_hw.

I've checked the trunk with compilers built with a new GLIBC and without, and
it passes both compilers.  I also checked the back port to GCC 6/7 and both
work fine as well.

Can I check this patch into the trunk and backports to GCC 6 and 7?

2017-07-18  Michael Meissner  

PR target/81193
* gcc.target/powerpc/cpu-builtin-1.c: Change test to use #ifdef
__BUILTIN_CPU_SUPPORTS to see if the GLIBC is new enough that
__builtin_cpu_is and __builtin_cpu_supports are supported.

-- 
Michael Meissner, IBM
IBM, M/S 2506R, 550 King Street, Littleton, MA 01460-6245, USA
email: meiss...@linux.vnet.ibm.com, phone: +1 (978) 899-4797
Index: gcc/testsuite/gcc.target/powerpc/cpu-builtin-1.c
===
--- gcc/testsuite/gcc.target/powerpc/cpu-builtin-1.c(revision 250316)
+++ gcc/testsuite/gcc.target/powerpc/cpu-builtin-1.c(working copy)
@@ -1,10 +1,14 @@
 /* { dg-do compile { target { powerpc*-*-* } } } */
 /* { dg-skip-if "" { powerpc*-*-darwin* } } */
-/* { dg-require-effective-target ppc_cpu_supports_hw } */
 
 void
 use_cpu_is_builtins (unsigned int *p)
 {
+  /* If GCC was configured to use an old GLIBC (before 2.23), the
+ __builtin_cpu_is and __builtin_cpu_supports built-in functions return 0,
+ and the compiler issues a warning that you need a newer glibc to use them.
+ Use #ifdef to avoid the warning.  */
+#ifdef __BUILTIN_CPU_SUPPORTS__
   p[0] = __builtin_cpu_is ("power9");
   p[1] = __builtin_cpu_is ("power8");
   p[2] = __builtin_cpu_is ("power7");
@@ -20,11 +24,15 @@ use_cpu_is_builtins (unsigned int *p)
   p[12] = __builtin_cpu_is ("ppc440");
   p[13] = __builtin_cpu_is ("ppc405");
   p[14] = __builtin_cpu_is ("ppc-cell-be");
+#else
+  p[0] = 0;
+#endif
 }
 
 void
 use_cpu_supports_builtins (unsigned int *p)
 {
+#ifdef __BUILTIN_CPU_SUPPORTS__
   p[0] = __builtin_cpu_supports ("4xxmac");
   p[1] = __builtin_cpu_supports ("altivec");
   p[2] = __builtin_cpu_supports ("arch_2_05");
@@ -63,4 +71,7 @@ use_cpu_supports_builtins (unsigned int
   p[35] = __builtin_cpu_supports ("ucache");
   p[36] = __builtin_cpu_supports ("vcrypto");
   p[37] = __builtin_cpu_supports ("vsx");
+#else
+  p[0] = 0;
+#endif
 }


Re: [PATCH] PR libstdc++/81395 fix crash when write follows large read

2017-07-18 Thread Jonathan Wakely

On 19/07/17 01:17 +0100, Jonathan Wakely wrote:

This fixes a crash that happens in std::filebuf when a large read
consumes the entire get area and is followed by a write, which is then
synced to the file by a call to overflow.

The problem is that xsgetn calls _M_set_buffer(0) after reading from
the file (i.e. when in 'read' mode). As the comments on _M_set_buffer
say, an argument of 0 is used for 'write' mode. This causes the
filebuf to have an active put area while in 'read' mode, so that the
next write inserts straight into that put area, rather than performing
the required seek to leave 'read' mode.

The next overflow then tries to leave 'read' mode by doing a seek, but
that then tries to flush the non-empty put area by calling overflow,
which goes into a loop until we overflow the stack.

The solution is to simply remove the call to _M_set_buffer(0). It's
not needed because the buffers are already set up appropriately after
xsgetn has read from the file: there's no active putback, no put area,
and setg(eback(), egptr(), egptr()) has been called so there's nothing
available in the get area. All we need to do is set _M_reading = true
so that a following write knows it needs to perform a seek.

The new testcase passes with GCC 4.5, so this is technically a
regression. However, I have a more demanding test that fails even with
GCC 4.5, so I don't think mixing reads and writes without intervening
seeks was ever working completely. I hope it is now.

I spent a LOT of time checking the make check-performance results
before and after this patch (and with various other attempted fixes)
and any difference seemed to be noise.

PR libstdc++/81395
* include/bits/fstream.tcc (basic_filebuf::xsgetn): Don't set buffer
pointers for write mode after reading.
* testsuite/27_io/basic_filebuf/sgetn/char/81395.cc: New.


The new test needs this dg-require so it doesn't FAIL on target boards
with no file I/O, and the dg-do is redundant.

Committed to trunk.


commit e868d4e4a67faa9b889720b5fcdd10f5eb0f4fa8
Author: Jonathan Wakely 
Date:   Wed Jul 19 01:19:20 2017 +0100

Use dg-require-fileio in new test

	* testsuite/27_io/basic_filebuf/sgetn/char/81395.cc: Add dg-require.

diff --git a/libstdc++-v3/testsuite/27_io/basic_filebuf/sgetn/char/81395.cc b/libstdc++-v3/testsuite/27_io/basic_filebuf/sgetn/char/81395.cc
index 4985628..ea8dbc1 100644
--- a/libstdc++-v3/testsuite/27_io/basic_filebuf/sgetn/char/81395.cc
+++ b/libstdc++-v3/testsuite/27_io/basic_filebuf/sgetn/char/81395.cc
@@ -15,7 +15,7 @@
 // with this library; see the file COPYING3.  If not see
 // .
 
-// { dg-do run }
+// { dg-require-fileio "" }
 
 // PR libstdc++/81395
 


libgo patch committed: Ignore ptrace_pseeksiginfo_args from

2017-07-18 Thread Ian Lance Taylor
This patch should fix PR 81324 filed against libgo.  With some
versions of glibc and GNU/Linux ptrace_pseeksiginfo_args is defined in
both  and . We don't actually care about
the struct, so use a #define to avoid a redefinition error.
Bootstrapped and ran Go testsuite on x86_64-pc-linux-gnu.  Committed
to mainline.

Ian
Index: gcc/go/gofrontend/MERGE
===
--- gcc/go/gofrontend/MERGE (revision 250217)
+++ gcc/go/gofrontend/MERGE (working copy)
@@ -1,4 +1,4 @@
-2ae6bf76f97f7d4c63a1f0ad0683b9ba62baaf06
+21775ae119830810d9e415a02e85349f4190c68c
 
 The first line of this file holds the git revision number of the last
 merge done from the gofrontend repository.
Index: libgo/sysinfo.c
===
--- libgo/sysinfo.c (revision 249799)
+++ libgo/sysinfo.c (working copy)
@@ -106,9 +106,13 @@
 /* Avoid https://sourceware.org/bugzilla/show_bug.cgi?id=762 .  */
 #define ia64_fpreg pt_ia64_fpreg
 #define pt_all_user_regs pt_ia64_all_user_regs
+/* Avoid redefinition of ptrace_peeksiginfo from .
+   https://gcc.gnu.org/PR81324 .  */
+#define ptrace_peeksiginfo_args ignore_ptrace_peeksiginfo_args
 #include 
 #undef ia64_fpreg
 #undef pt_all_user_regs
+#undef ptrace_peeksiginfo_args
 #endif
 #if defined(HAVE_LINUX_RTNETLINK_H)
 #include 


Re: [PATCH, rs6000] Rev 2, 1/2 Add x86 MMX <mmintrin,h> intrinsics to GCC PPC64LE target

2017-07-18 Thread Segher Boessenkool
Hi!

On Mon, Jul 17, 2017 at 02:15:00PM -0500, Steven Munroe wrote:
> Correct the problems Segher found in review and added a changes to deal
> with the fallout from the __builtin_cpu_supports warning for older
> distros.
> 
> Tested on P8 LE and P6/P7/P8 BE. No new tests failures.
> 
> ./gcc/ChangeLog:
> 
> 2017-07-17  Steven Munroe  
> 
>   * config.gcc (powerpc*-*-*): Add mmintrin.h.
>   * config/rs6000/mmintrin.h: New file.
>   * config/rs6000/x86intrin.h [__ALTIVEC__]: Include mmintrin.h.

Okay for trunk.  Thanks,


Segher


Re: [PATCH rs6000] Fix up BMI/BMI2 intrinsic DG tests

2017-07-18 Thread Steven Munroe
On Tue, 2017-07-18 at 16:54 -0500, Segher Boessenkool wrote:
> Hi!
> 
> On Mon, Jul 17, 2017 at 01:28:20PM -0500, Steven Munroe wrote:
> > After a resent GCC change the previously submitted BMI/BMI2 intrinsic
> > test started to fail with the following warning/error.
> > 
> > ppc_cpu_supports_hw_available122373.c: In function 'main':
> > ppc_cpu_supports_hw_available122373.c:9:10: warning:
> > __builtin_cpu_supports need
> > s GLIBC (2.23 and newer) that exports hardware capability bits
> > 
> > The does not occur on systems with the newer (2.23) GLIBC but is common
> > on older (stable) distos.
> > 
> > As this is coming from the bmi-check.h and bmi2-check.h includes (and
> > not the tests directly) it seems simpler to simply skip the test unless
> > __BUILTIN_CPU_SUPPORTS__ is defined.
> 
> So this will skip on most current systems; is there no reasonable
> way around that?
> 
The work around would be to add an #else leg where we obtain the address
of the auxv then scan for the AT_PLATFOM, AT_HWCAP, and AT_HWCAP2
entries. Then perform the required string compares and / or bit tests.

> Okay otherwise.  One typo thing:
> 
> > 2017-07-17  Steven Munroe  
> > 
> > *gcc.target/powerpc/bmi-check.h (main): Skip unless
> > __BUILTIN_CPU_SUPPORTS__ defined.
> > *gcc.target/powerpc/bmi2-check.h (main): Skip unless
> > __BUILTIN_CPU_SUPPORTS__ defined.
> 
> There should be a space after the asterisks.
> 
> 
> Segher
> 




Go patch committed: Pass correct 'function' flag to circular_pointer_type

2017-07-18 Thread Ian Lance Taylor
The code in Named_type::do_get_backend in the Go frontend was not
passing the correct flag value for circular function types to
Backend::circular_pointer_type (it was always setting this flag to
false). This patch by Than McIntosh passes a true value if the type
being converted is a function type.  Bootstrapped and ran Go testsuite
on x86_64-pc-linux-gnu.  Committed to mainline.

Ian
Index: gcc/go/gofrontend/MERGE
===
--- gcc/go/gofrontend/MERGE (revision 250324)
+++ gcc/go/gofrontend/MERGE (working copy)
@@ -1,4 +1,4 @@
-21775ae119830810d9e415a02e85349f4190c68c
+0036bd04d077f8bbe5aa9a62fb8830c53068209e
 
 The first line of this file holds the git revision number of the last
 merge done from the gofrontend repository.
Index: gcc/go/gofrontend/types.cc
===
--- gcc/go/gofrontend/types.cc  (revision 249799)
+++ gcc/go/gofrontend/types.cc  (working copy)
@@ -10994,13 +10994,13 @@ Named_type::do_get_backend(Gogo* gogo)
   if (this->seen_in_get_backend_)
{
  this->is_circular_ = true;
- return gogo->backend()->circular_pointer_type(bt, false);
+ return gogo->backend()->circular_pointer_type(bt, true);
}
   this->seen_in_get_backend_ = true;
   bt1 = Type::get_named_base_btype(gogo, base);
   this->seen_in_get_backend_ = false;
   if (this->is_circular_)
-   bt1 = gogo->backend()->circular_pointer_type(bt, false);
+   bt1 = gogo->backend()->circular_pointer_type(bt, true);
   if (!gogo->backend()->set_placeholder_pointer_type(bt, bt1))
bt = gogo->backend()->error_type();
   return bt;


libgo patch committed: Inline runtime.osinit

2017-07-18 Thread Ian Lance Taylor
Libgo had two identical copies of runtime_osinit. They set
runtime_ncpu, a variable that is no longer used. Removing that leaves
us with two lines. Inline those two lines in the two places the
function was called.  This fixes GCC PR 81451.  Bootstrapped and ran
Go testsuite on x86_64-pc-linux-gnu.  Committed to mainline.

Ian
Index: gcc/go/gofrontend/MERGE
===
--- gcc/go/gofrontend/MERGE (revision 250325)
+++ gcc/go/gofrontend/MERGE (working copy)
@@ -1,4 +1,4 @@
-0036bd04d077f8bbe5aa9a62fb8830c53068209e
+c49ddc84f3ce89310585aad23ab6e51ef5523748
 
 The first line of this file holds the git revision number of the last
 merge done from the gofrontend repository.
Index: libgo/Makefile.am
===
--- libgo/Makefile.am   (revision 250217)
+++ libgo/Makefile.am   (working copy)
@@ -399,12 +399,6 @@ rtems_task_variable_add_file =
 endif
 
 if LIBGO_IS_LINUX
-runtime_thread_files = runtime/thread-linux.c
-else
-runtime_thread_files = runtime/thread-sema.c
-endif
-
-if LIBGO_IS_LINUX
 runtime_getncpu_file = runtime/getncpu-linux.c
 else
 if LIBGO_IS_DARWIN
@@ -469,7 +463,6 @@ runtime_files = \
runtime/runtime_c.c \
runtime/stack.c \
runtime/thread.c \
-   $(runtime_thread_files) \
runtime/yield.c \
$(rtems_task_variable_add_file) \
$(runtime_getncpu_file)
Index: libgo/go/runtime/stubs.go
===
--- libgo/go/runtime/stubs.go   (revision 249799)
+++ libgo/go/runtime/stubs.go   (working copy)
@@ -422,13 +422,13 @@ func getPanicking() uint32 {
return panicking
 }
 
-// Temporary for gccgo until we initialize ncpu in Go.
+// Called by C code to set the number of CPUs.
 //go:linkname setncpu runtime.setncpu
 func setncpu(n int32) {
ncpu = n
 }
 
-// Temporary for gccgo until we reliably initialize physPageSize in Go.
+// Called by C code to set the page size.
 //go:linkname setpagesize runtime.setpagesize
 func setpagesize(s uintptr) {
if physPageSize == 0 {
Index: libgo/runtime/go-libmain.c
===
--- libgo/runtime/go-libmain.c  (revision 249799)
+++ libgo/runtime/go-libmain.c  (working copy)
@@ -105,7 +105,8 @@ gostart (void *arg)
 
   runtime_check ();
   runtime_args (a->argc, (byte **) a->argv);
-  runtime_osinit ();
+  setncpu (getproccount ());
+  setpagesize (getpagesize ());
   runtime_sched = runtime_getsched();
   runtime_schedinit ();
   __go_go (runtime_main, NULL);
Index: libgo/runtime/go-main.c
===
--- libgo/runtime/go-main.c (revision 249799)
+++ libgo/runtime/go-main.c (working copy)
@@ -51,7 +51,8 @@ main (int argc, char **argv)
   runtime_cpuinit ();
   runtime_check ();
   runtime_args (argc, (byte **) argv);
-  runtime_osinit ();
+  setncpu (getproccount ());
+  setpagesize (getpagesize ());
   runtime_sched = runtime_getsched();
   runtime_schedinit ();
   __go_go (runtime_main, NULL);
Index: libgo/runtime/proc.c
===
--- libgo/runtime/proc.c(revision 249799)
+++ libgo/runtime/proc.c(working copy)
@@ -370,7 +370,6 @@ extern G* allocg(void)
   __asm__ (GOSYM_PREFIX "runtime.allocg");
 
 Sched* runtime_sched;
-int32  runtime_ncpu;
 
 bool   runtime_isarchive;
 
Index: libgo/runtime/runtime.h
===
--- libgo/runtime/runtime.h (revision 249799)
+++ libgo/runtime/runtime.h (working copy)
@@ -217,7 +217,6 @@ extern  M*  runtime_getallm(void)
 extern Sched*  runtime_sched;
 extern uint32  runtime_panicking(void)
   __asm__ (GOSYM_PREFIX "runtime.getPanicking");
-extern int32   runtime_ncpu;
 extern struct debugVars runtime_debug;
 
 extern boolruntime_isstarted;
@@ -237,7 +236,6 @@ voidruntime_gogo(G*)
 struct __go_func_type;
 void   runtime_args(int32, byte**)
   __asm__ (GOSYM_PREFIX "runtime.args");
-void   runtime_osinit();
 void   runtime_alginit(void)
   __asm__ (GOSYM_PREFIX "runtime.alginit");
 void   runtime_goargs(void)
Index: libgo/runtime/thread-linux.c
===
--- libgo/runtime/thread-linux.c(revision 249799)
+++ libgo/runtime/thread-linux.c(working copy)
@@ -1,20 +0,0 @@
-// Copyright 2009 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#include "runtime.h"
-#include "defs.h"
-
-// Linux futex.
-
-#include 
-#include 
-#include 
-
-void
-runtime_osinit(void)
-{
-   runtime_ncpu = getproccount();
-   setncpu(runtime_ncpu);
-   setpagesize(getpagesize());
-}
Index: libgo/runtime/thread-sema.c

Go patch committed: Insert type conversion for closure function value

2017-07-18 Thread Ian Lance Taylor
This patch by Than McIntosh changes the Go frontend, in
Func_expression::do_get_backend, when creating the backend
representation for a closure, to create a backend type conversion to
account for potential differences between the closure struct type
(where the number of fields is dependent on the number of values
referenced in the closure) and the generic function descriptor type
(struct with single function pointer field).  Bootstrapped and ran Go
testsuite on x86_64-pc-linux-gnu.  Committed to mainline.

Ian
Index: gcc/go/gofrontend/MERGE
===
--- gcc/go/gofrontend/MERGE (revision 250326)
+++ gcc/go/gofrontend/MERGE (working copy)
@@ -1,4 +1,4 @@
-c49ddc84f3ce89310585aad23ab6e51ef5523748
+3d9ff9bc339942922f1be3bef07c6fe2978ad81a
 
 The first line of this file holds the git revision number of the last
 merge done from the gofrontend repository.
Index: gcc/go/gofrontend/expressions.cc
===
--- gcc/go/gofrontend/expressions.cc(revision 249799)
+++ gcc/go/gofrontend/expressions.cc(working copy)
@@ -1204,7 +1204,14 @@ Func_expression::do_get_backend(Translat
   // expression.  It is a pointer to a struct whose first field points
   // to the function code and whose remaining fields are the addresses
   // of the closed-over variables.
-  return this->closure_->get_backend(context);
+  Bexpression *bexpr = this->closure_->get_backend(context);
+
+  // Introduce a backend type conversion, to account for any differences
+  // between the argument type (function descriptor, struct with a
+  // single field) and the closure (struct with multiple fields).
+  Gogo* gogo = context->gogo();
+  Btype *btype = this->type()->get_backend(gogo);
+  return gogo->backend()->convert_expression(btype, bexpr, this->location());
 }
 
 // Ast dump for function.


Re: [PATCH rs6000] Fix up BMI/BMI2 intrinsic DG tests

2017-07-18 Thread Segher Boessenkool
Hi!

On Mon, Jul 17, 2017 at 01:28:20PM -0500, Steven Munroe wrote:
> After a resent GCC change the previously submitted BMI/BMI2 intrinsic
> test started to fail with the following warning/error.
> 
> ppc_cpu_supports_hw_available122373.c: In function 'main':
> ppc_cpu_supports_hw_available122373.c:9:10: warning:
> __builtin_cpu_supports need
> s GLIBC (2.23 and newer) that exports hardware capability bits
> 
> The does not occur on systems with the newer (2.23) GLIBC but is common
> on older (stable) distos.
> 
> As this is coming from the bmi-check.h and bmi2-check.h includes (and
> not the tests directly) it seems simpler to simply skip the test unless
> __BUILTIN_CPU_SUPPORTS__ is defined.

So this will skip on most current systems; is there no reasonable
way around that?

Okay otherwise.  One typo thing:

> 2017-07-17  Steven Munroe  
> 
>   *gcc.target/powerpc/bmi-check.h (main): Skip unless
>   __BUILTIN_CPU_SUPPORTS__ defined.
>   *gcc.target/powerpc/bmi2-check.h (main): Skip unless
>   __BUILTIN_CPU_SUPPORTS__ defined.

There should be a space after the asterisks.


Segher


[PATCH] PR libstdc++/81395 fix crash when write follows large read

2017-07-18 Thread Jonathan Wakely

This fixes a crash that happens in std::filebuf when a large read
consumes the entire get area and is followed by a write, which is then
synced to the file by a call to overflow.

The problem is that xsgetn calls _M_set_buffer(0) after reading from
the file (i.e. when in 'read' mode). As the comments on _M_set_buffer
say, an argument of 0 is used for 'write' mode. This causes the
filebuf to have an active put area while in 'read' mode, so that the
next write inserts straight into that put area, rather than performing
the required seek to leave 'read' mode.

The next overflow then tries to leave 'read' mode by doing a seek, but
that then tries to flush the non-empty put area by calling overflow,
which goes into a loop until we overflow the stack.

The solution is to simply remove the call to _M_set_buffer(0). It's
not needed because the buffers are already set up appropriately after
xsgetn has read from the file: there's no active putback, no put area,
and setg(eback(), egptr(), egptr()) has been called so there's nothing
available in the get area. All we need to do is set _M_reading = true
so that a following write knows it needs to perform a seek.

The new testcase passes with GCC 4.5, so this is technically a
regression. However, I have a more demanding test that fails even with
GCC 4.5, so I don't think mixing reads and writes without intervening
seeks was ever working completely. I hope it is now.

I spent a LOT of time checking the make check-performance results
before and after this patch (and with various other attempted fixes)
and any difference seemed to be noise.

PR libstdc++/81395
* include/bits/fstream.tcc (basic_filebuf::xsgetn): Don't set buffer
pointers for write mode after reading.
* testsuite/27_io/basic_filebuf/sgetn/char/81395.cc: New.

Tested powerpc64le-linux, committed to trunk.

commit 535a7ea29b4d6724519c0f472bcfe3eb9d79070a
Author: Jonathan Wakely 
Date:   Tue Jul 18 15:20:25 2017 +0100

PR libstdc++/81395 fix crash when write follows large read

PR libstdc++/81395
* include/bits/fstream.tcc (basic_filebuf::xsgetn): Don't set buffer
pointers for write mode after reading.
* testsuite/27_io/basic_filebuf/sgetn/char/81395.cc: New.

diff --git a/libstdc++-v3/include/bits/fstream.tcc 
b/libstdc++-v3/include/bits/fstream.tcc
index b1beff86..ef51a84 100644
--- a/libstdc++-v3/include/bits/fstream.tcc
+++ b/libstdc++-v3/include/bits/fstream.tcc
@@ -699,7 +699,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
  
   if (__n == 0)
 {
-  _M_set_buffer(0);
+  // Set _M_reading. Buffer is already in initial 'read' mode.
   _M_reading = true;
 }
   else if (__len == 0)
diff --git a/libstdc++-v3/testsuite/27_io/basic_filebuf/sgetn/char/81395.cc 
b/libstdc++-v3/testsuite/27_io/basic_filebuf/sgetn/char/81395.cc
new file mode 100644
index 000..4985628
--- /dev/null
+++ b/libstdc++-v3/testsuite/27_io/basic_filebuf/sgetn/char/81395.cc
@@ -0,0 +1,46 @@
+// Copyright (C) 2017 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING3.  If not see
+// .
+
+// { dg-do run }
+
+// PR libstdc++/81395
+
+#include 
+#include  // for std::memset
+#include   // For BUFSIZ
+
+using std::memset;
+
+int main()
+{
+  {
+std::filebuf fb;
+fb.open("test.txt", std::ios::out);
+char data[BUFSIZ];
+memset(data, 'A', sizeof(data));
+fb.sputn(data, sizeof(data));
+  }
+
+  std::filebuf fb;
+  fb.open("test.txt", std::ios::in|std::ios::out);
+  char buf[BUFSIZ];
+  memset(buf, 0, sizeof(buf));
+  fb.sgetn(buf, sizeof(buf));
+  // Switch from reading to writing without seeking first:
+  fb.sputn("B", 1);
+  fb.pubsync();
+}


Re: [PATCH] Fix pr80044, -static and -pie insanity, and pr81170

2017-07-18 Thread Alan Modra
On Tue, Jul 18, 2017 at 07:49:48AM -0700, H.J. Lu wrote:
> The difference is with --enable-default-pie, the gcc driver doesn't pass
> both -pie and -static ld when "-static -pie" is used.   Does your change
> pass both -pie and -static ld when "-static -pie" is used?

Again, as I said in the original post: "In both cases you now will
have -static completely overriding -pie".

That means "gcc -pie -static" and "gcc -static -pie" just pass
"-static" to ld, and select the appropriate startup files for a static
executable, when configured with --disable-default-pie.  Which is what
happens currently for --enable-default-pie.

None of this is rocket science.  I know what I'm doing where the
linker and startup files are concerned, and I'm comfortable with the
gcc specs language.  The patch is simple!  It should be easy to
review, except for trying to understand the "-" lines.  Yet it has sat
unreviewed for nearly four weeks.  And it fixes a powerpc
--enable-default-pie bootstrap failure (pr81295).

Joseph, would you please take a look?
https://gcc.gnu.org/ml/gcc-patches/2017-06/msg01678.html

I know there is more to do in this area, for example, it seems to me
that the HAVE_LD_PIE definition of GNU_USER_TARGET_STARTFILE_SPEC is
good for !HAVE_LD_PIE, and similarly for GNU_USE_TARGET_ENDFILE_SPEC.
And yes, I propagated that duplication into rs6000/sysv4.h, which
needs some serious tidying.  rs6000/sysv4.h linux support ought to be
using the gnu-user.h defines rather than copying them, something I've
told Segher I'll look at after this patch goes in.

-- 
Alan Modra
Australia Development Lab, IBM


[PATCH][RFA/RFC] Stack clash mitigation patch 00/08 V2

2017-07-18 Thread Jeff Law

So later than I wanted, here's the V2 of the stack clash mitigation work.

Probably the biggest change in this version was moving the protection
out of -fstack-check= and into its own option (-fstack-clash-protection)

This has been bootstrapped and regression tested on the same set of
targets {x86_64, powerpc, powerpc64le, aarch64, s390x}-linux-gnu.  I've
also enabled -fstack-clash-protection and eyeballed test results
relative to the baselines to ensure nothing unexpected was failing.

Since this patch hits other targets that are not protected from
stack-clash a little harder, I also tested things like {alpha, mips,
ia64, hppa}-linux-gnu through building all-gcc.

As with the prior patch, comments, flames, questions are welcomed.

Jeff


[PATCH][RFA/RFC] Stack clash mitigation patch 03/08 V2 -- right patch attached

2017-07-18 Thread Jeff Law

Opps, I clearly attached the wrong file.

--

I don't think this patch changed in any significant way since V1.
--

One of the painful aspects of all this code is the amount of target
dependent bits that have to be written and tested.

I didn't want to be scanning assembly code or RTL for prologues.  Each
target would have to have its own scanner which was too painful to
contemplate.

So instead I settled on having a routine that the target dependent
prologue expanders could call to dump information about what they were
doing.

This greatly simplifies the testing side of things by having a standard
way to dump decisions.  When combined with the dejagnu routines from
patch #1 which describe key attributes of the target's prologue
generation I can write tests in a fairly generic way.

This will be used by every target dependent prologue expander in this
series.

OK for the trunk?

* function.c (dump_stack_clash_frame_info): New function.
* function.h (dump_stack_clash_frame_info): Prototype.
(enum stack_clash_probes): New enum.

diff --git a/gcc/function.c b/gcc/function.c
index f625489..ca48b3f 100644
--- a/gcc/function.c
+++ b/gcc/function.c
@@ -5695,6 +5695,58 @@ get_arg_pointer_save_area (void)
   return ret;
 }
 
+
+/* If debugging dumps are requested, dump information about how the
+   target handled -fstack-check=clash for the prologue.
+
+   PROBES describes what if any probes were emitted.
+
+   RESIDUALS indicates if the prologue had any residual allocation
+   (i.e. total allocation was not a multiple of PROBE_INTERVAL).  */
+
+void
+dump_stack_clash_frame_info (enum stack_clash_probes probes, bool residuals)
+{
+  if (!dump_file)
+return;
+
+  switch (probes)
+{
+case NO_PROBE_NO_FRAME:
+  fprintf (dump_file,
+  "Stack clash no probe no stack adjustment in prologue.\n");
+  break;
+case NO_PROBE_SMALL_FRAME:
+  fprintf (dump_file,
+  "Stack clash no probe small stack adjustment in prologue.\n");
+  break;
+case PROBE_INLINE:
+  fprintf (dump_file, "Stack clash inline probes in prologue.\n");
+  break;
+case PROBE_LOOP:
+  fprintf (dump_file, "Stack clash probe loop in prologue.\n");
+  break;
+}
+
+  if (residuals)
+fprintf (dump_file, "Stack clash residual allocation in prologue.\n");
+  else
+fprintf (dump_file, "Stack clash no residual allocation in prologue.\n");
+
+  if (frame_pointer_needed)
+fprintf (dump_file, "Stack clash frame pointer needed.\n");
+  else
+fprintf (dump_file, "Stack clash no frame pointer needed.\n");
+
+  if (TREE_THIS_VOLATILE (cfun->decl))
+fprintf (dump_file,
+"Stack clash noreturn prologue, assuming no implicit"
+" probes in caller.\n");
+  else
+fprintf (dump_file,
+"Stack clash not noreturn prologue.\n");
+}
+
 /* Add a list of INSNS to the hash HASHP, possibly allocating HASHP
for the first time.  */
 
diff --git a/gcc/function.h b/gcc/function.h
index 0f34bcd..87dac80 100644
--- a/gcc/function.h
+++ b/gcc/function.h
@@ -553,6 +553,14 @@ do {   
\
   ((TARGET_PTRMEMFUNC_VBIT_LOCATION == ptrmemfunc_vbit_in_pfn)  \
? MAX (FUNCTION_BOUNDARY, 2 * BITS_PER_UNIT) : FUNCTION_BOUNDARY)
 
+enum stack_clash_probes {
+  NO_PROBE_NO_FRAME,
+  NO_PROBE_SMALL_FRAME,
+  PROBE_INLINE,
+  PROBE_LOOP
+};
+
+extern void dump_stack_clash_frame_info (enum stack_clash_probes, bool);
 
 
 extern void push_function_context (void);


Re: [patch] Fix Unwind support on DragonFly BSD after sigtramp move

2017-07-18 Thread Jeff Law
On 07/07/2017 05:17 PM, John Marino wrote:
> Right after DragonFly 4.8 was released (27 Mar 2017), the signal
> trampoline was moved (twice) in response to a Ryzen bug.  This broke
> GCC's unwind support for DragonFly.
> 
> To avoid hardcoding the sigtramp location to avoid issues like this in
> the future, a new sysctl was added to DragonFly to return the signal
> trampoline address range (FreeBSD has a similar sysctl for similar
> reasons).  The attached patch fixes DragonFly unwind support for current
> DragonFly, and maintains support for Release 4.8 and earlier.
> 
> This patch has been in use for a few months and works fine.  It is
> similar in function to the FreeBSD Aarch64 unwind support I submitted
> through Andreas T. a few months ago.
> 
> I believe the patch can be applied to trunk and release 7 branch.
> I am the closest thing to a maintainer for DragonFly, so I don't know if
> additional approval is needed.  This patch is purely DragonFly-specific
> and cannot affect other platforms in any way.
> 
> If agreed, it would be great if somebody could commit this for me
> against the trunk and GCC-7-branch.
> 
> Thanks!
> John
> 
> P.S.  Yes, my copyright assignment is on file (I've contributed a few
> patches already).
> 
> suggested log entry of libgcc/ChangeLog:
> 
> 2017-07-XX  John Marino  
>* config/i386/dragonfly-unwind.h: Handle sigtramp relocation.

This is fine.  Sorry it's taken so long for me to get to this.

jeff


Re: [PING 5] [PATCH] [AArch64] vec_pack_trunc_ should split after register allocator

2017-07-18 Thread Hurugalawadi, Naveen
Hi,  

Please consider this as a personal reminder to review the patch
at following link and let me know your comments on the same.  

https://gcc.gnu.org/ml/gcc-patches/2017-04/msg01334.html

Thanks,
Naveen



    

Re: [PING} [PATCH] Transform (m1 > m2) * d into m1> m2 ? d : 0

2017-07-18 Thread Hurugalawadi, Naveen
Hi,  

Please consider this as a personal reminder to review the patch
at following link and let me know your comments on the same.  

https://gcc.gnu.org/ml/gcc-patches/2017-07/msg00178.html

Thanks,
Naveen




[PATCH][RFA/RFC] Stack clash mitigation patch 07/08 V2

2017-07-18 Thread Jeff Law

So this patch has changed considerably since V1 as well.

First, we no longer track the bulk of the register stores in the
prologue.  Those may be separately shrink wrapped and thus not executed
on all paths and as such are not candidates for implicit probes.

Second, per the discussions we've had on-list, we're less aggressive at
probing.  We assume the caller has not pushed us more than 1kbyte into
the stack guard.  Thus stacks of < 3kbytes in the callee need no probes.

Third, the implicit probe tracking is simplified.  I'm exceedingly happy
to find out that we can never have a nonzero initial_adjust and
callee_adjust at the same time.  That's a significant help.

We still use the save of lr/fp as an implicit probe.

This ought to be much more efficient than the prior version.


Hopefully this is closer to something the aarch64 maintainers are
comfortable with.

--

* config/aarch/aarch64.c (aarch64_output_probe_stack_range): Handle
-fstack-clash-protection probing too.
(aarch64_allocate_and_probe_stack_space): New function.
(aarch64_expand_prologue): Assert we never have both an initial
adjustment and callee save adjustment.  Track distance between SP and
most recent probe.  Use aarch64_allocate_and_probe_stack_space
when -fstack-clash-protect is enabled rather than just adjusting sp.
Dump actions via dump_stack_clash_frame_info.

diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 0a8b40a..8764d62 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -2830,6 +2830,9 @@ aarch64_output_probe_stack_range (rtx reg1, rtx reg2)
   char loop_lab[32];
   rtx xops[2];
 
+  if (flag_stack_clash_protection)
+reg1 = stack_pointer_rtx;
+
   ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
 
   /* Loop.  */
@@ -2841,7 +2844,14 @@ aarch64_output_probe_stack_range (rtx reg1, rtx reg2)
   output_asm_insn ("sub\t%0, %0, %1", xops);
 
   /* Probe at TEST_ADDR.  */
-  output_asm_insn ("str\txzr, [%0]", xops);
+  if (flag_stack_clash_protection)
+{
+  gcc_assert (xops[0] == stack_pointer_rtx);
+  xops[1] = GEN_INT (PROBE_INTERVAL - 8);
+  output_asm_insn ("str\txzr, [%0, %1]", xops);
+}
+  else
+output_asm_insn ("str\txzr, [%0]", xops);
 
   /* Test if TEST_ADDR == LAST_ADDR.  */
   xops[1] = reg2;
 static void
 aarch64_save_callee_saves (machine_mode mode, HOST_WIDE_INT start_offset,
@@ -3605,6 +3617,68 @@ aarch64_set_handled_components (sbitmap components)
   cfun->machine->reg_is_wrapped_separately[regno] = true;
 }
 
+/* Allocate SIZE bytes of stack space using SCRATCH_REG as a scratch
+   register.
+
+   LAST_PROBE_OFFSET contains the offset between the stack pointer and
+   the last known probe.  As LAST_PROBE_OFFSET crosses PROBE_INTERVAL
+   emit a probe and adjust LAST_PROBE_OFFSET.  */
+static void
+aarch64_allocate_and_probe_stack_space (int scratchreg, HOST_WIDE_INT size,
+   HOST_WIDE_INT *last_probe_offset)
+{
+  rtx temp = gen_rtx_REG (word_mode, scratchreg);
+
+  HOST_WIDE_INT rounded_size = size & -PROBE_INTERVAL;
+  HOST_WIDE_INT residual = size - rounded_size;
+
+  /* We can handle a small number of allocations/probes inline.  Otherwise
+ punt to a loop.  */
+  if (rounded_size && rounded_size <= 4 * PROBE_INTERVAL)
+{
+  for (HOST_WIDE_INT i = 0; i < rounded_size; i += PROBE_INTERVAL)
+   {
+ /* We should never need a scratch register for this adjustment.  */
+ aarch64_sub_sp (-1, PROBE_INTERVAL, true);
+
+ /* We just allocated PROBE_INTERVAL bytes.  Thus, a probe is
+mandatory.  Note that LAST_PROBE_OFFSET does not change here.  */
+ emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
+  (PROBE_INTERVAL
+   - GET_MODE_SIZE (word_mode;
+   }
+  dump_stack_clash_frame_info (PROBE_INLINE, size != rounded_size);
+}
+  else if (rounded_size)
+{
+  /* Compute the ending address.  */
+  emit_move_insn (temp, GEN_INT (-rounded_size));
+  emit_insn (gen_add3_insn (temp, stack_pointer_rtx, temp));
+
+  /* This allocates and probes the stack.  Like the inline version above
+it does not need to change LAST_PROBE_OFFSET.
+
+It almost certainly does not update CFIs correctly.  */
+  emit_insn (gen_probe_stack_range (temp, temp, temp));
+  dump_stack_clash_frame_info (PROBE_LOOP, size != rounded_size);
+}
+
+  /* Handle any residuals.  */
+  if (residual)
+{
+  aarch64_sub_sp (-1, residual, true);
+  *last_probe_offset += residual;
+  if (*last_probe_offset >= PROBE_INTERVAL)
+   {
+ *last_probe_offset -= PROBE_INTERVAL;
+ emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
+  (residual
+   - 

  1   2   >