date:20151013

Re: Move some bit and binary optimizations in simplify and match

2015-10-13 Thread Marc Glisse



+(simplify
+ (plus (convert? @0) (convert? (xdivamulminusa @0 @1)))
+  (if ((INTEGRAL_TYPE_P (type) || VECTOR_INTEGER_TYPE_P (type))
+   && tree_nop_conversion_p (type, TREE_TYPE (@0)))
+   (trunc_mod (convert @0) (convert @1

See PR 67953.

+(match (abitandnotb @0 @1)
+ (bit_and:c @0 (bit_not INTEGER_CST@1)))

Does that work?

+/* Fold (a * (1 << b)) into (a << b)  */
+(simplify
+ (mult:c @0 (convert? (lshift integer_onep@1 @2)))
+  (if (! FLOAT_TYPE_P (type)
+&& tree_nop_conversion_p (type, TREE_TYPE (@2)))
+   (lshift @0 (convert @2

You don't need/want to convert @2 (fold-const doesn't convert, does it?), 
and you don't need to check for tree_nop_conversion_p.



--
Marc Glisse

Re: Do not use TYPE_CANONICAL in useless_type_conversion

2015-10-13 Thread Jan Hubicka

> On Oct 13, 2015, Eric Botcazou  wrote:
> 
> > Note that this is PR middle-end/67912.
> 
> Thanks.  I added this piece of information to the ChangeLog entry, and
> checked the patch in.
Thanks, Alexandre. That indeed looks better than my variant of the patch.
Does it also fix the IA-64 issue?

Honza
> 
> -- 
> Alexandre Oliva, freedom fighterhttp://FSFLA.org/~lxoliva/
> You must be the change you wish to see in the world. -- Gandhi
> Be Free! -- http://FSFLA.org/   FSF Latin America board member
> Free Software Evangelist|Red Hat Brasil GNU Toolchain Engineer

Re: Move some bit and binary optimizations in simplify and match

2015-10-13 Thread Hurugalawadi, Naveen

Hi.

>> please adjust also according to these comments.
Adjusted the patch as per your comments.

Please find attached the patch as per your comments.
Please review the patch and let me know if any further modifications 
are required.

Thanks,
Naveendiff --git a/gcc/fold-const.c b/gcc/fold-const.c
index de45a2c..2d81b2c 100644
--- a/gcc/fold-const.c
+++ b/gcc/fold-const.c
@@ -9232,26 +9232,6 @@ fold_binary_loc (location_t loc,
   return NULL_TREE;
 
 case PLUS_EXPR:
-  if (INTEGRAL_TYPE_P (type) || VECTOR_INTEGER_TYPE_P (type))
-	{
-	  /* X + (X / CST) * -CST is X % CST.  */
-	  if (TREE_CODE (arg1) == MULT_EXPR
-	  && TREE_CODE (TREE_OPERAND (arg1, 0)) == TRUNC_DIV_EXPR
-	  && operand_equal_p (arg0,
-  TREE_OPERAND (TREE_OPERAND (arg1, 0), 0), 0))
-	{
-	  tree cst0 = TREE_OPERAND (TREE_OPERAND (arg1, 0), 1);
-	  tree cst1 = TREE_OPERAND (arg1, 1);
-	  tree sum = fold_binary_loc (loc, PLUS_EXPR, TREE_TYPE (cst1),
-  cst1, cst0);
-	  if (sum && integer_zerop (sum))
-		return fold_convert_loc (loc, type,
-	 fold_build2_loc (loc, TRUNC_MOD_EXPR,
-		  TREE_TYPE (arg0), arg0,
-		  cst0));
-	}
-	}
-
   /* Handle (A1 * C1) + (A2 * C2) with A1, A2 or C1, C2 being the same or
 	 one.  Make sure the type is not saturating and has the signedness of
 	 the stripped operands, as fold_plusminus_mult_expr will re-associate.
@@ -9692,28 +9672,6 @@ fold_binary_loc (location_t loc,
 			fold_convert_loc (loc, type,
 	  TREE_OPERAND (arg0, 0)));
 
-  if (! FLOAT_TYPE_P (type))
-	{
-	  /* Fold (A & ~B) - (A & B) into (A ^ B) - B, where B is
-	 any power of 2 minus 1.  */
-	  if (TREE_CODE (arg0) == BIT_AND_EXPR
-	  && TREE_CODE (arg1) == BIT_AND_EXPR
-	  && operand_equal_p (TREE_OPERAND (arg0, 0),
-  TREE_OPERAND (arg1, 0), 0))
-	{
-	  tree mask0 = TREE_OPERAND (arg0, 1);
-	  tree mask1 = TREE_OPERAND (arg1, 1);
-	  tree tem = fold_build1_loc (loc, BIT_NOT_EXPR, type, mask0);
-
-	  if (operand_equal_p (tem, mask1, 0))
-		{
-		  tem = fold_build2_loc (loc, BIT_XOR_EXPR, type,
- TREE_OPERAND (arg0, 0), mask1);
-		  return fold_build2_loc (loc, MINUS_EXPR, type, tem, mask1);
-		}
-	}
-	}
-
   /* Fold __complex__ ( x, 0 ) - __complex__ ( 0, y ) to
 	 __complex__ ( x, -y ).  This is not the same for SNaNs or if
 	 signed zeros are involved.  */
@@ -9803,20 +9761,6 @@ fold_binary_loc (location_t loc,
   goto associate;
 
 case MULT_EXPR:
-  /* (-A) * (-B) -> A * B  */
-  if (TREE_CODE (arg0) == NEGATE_EXPR && negate_expr_p (arg1))
-	return fold_build2_loc (loc, MULT_EXPR, type,
-			fold_convert_loc (loc, type,
-	  TREE_OPERAND (arg0, 0)),
-			fold_convert_loc (loc, type,
-	  negate_expr (arg1)));
-  if (TREE_CODE (arg1) == NEGATE_EXPR && negate_expr_p (arg0))
-	return fold_build2_loc (loc, MULT_EXPR, type,
-			fold_convert_loc (loc, type,
-	  negate_expr (arg0)),
-			fold_convert_loc (loc, type,
-	  TREE_OPERAND (arg1, 0)));
-
   if (! FLOAT_TYPE_P (type))
 	{
 	  /* Transform x * -C into -x * C if x is easily negatable.  */
@@ -9830,16 +9774,6 @@ fold_binary_loc (location_t loc,
 		  negate_expr (arg0)),
 tem);
 
-	  /* (a * (1 << b)) is (a << b)  */
-	  if (TREE_CODE (arg1) == LSHIFT_EXPR
-	  && integer_onep (TREE_OPERAND (arg1, 0)))
-	return fold_build2_loc (loc, LSHIFT_EXPR, type, op0,
-TREE_OPERAND (arg1, 1));
-	  if (TREE_CODE (arg0) == LSHIFT_EXPR
-	  && integer_onep (TREE_OPERAND (arg0, 0)))
-	return fold_build2_loc (loc, LSHIFT_EXPR, type, op1,
-TREE_OPERAND (arg0, 1));
-
 	  /* (A + A) * C -> A * 2 * C  */
 	  if (TREE_CODE (arg0) == PLUS_EXPR
 	  && TREE_CODE (arg1) == INTEGER_CST
@@ -9882,21 +9816,6 @@ fold_binary_loc (location_t loc,
 	}
   else
 	{
-	  /* Convert (C1/X)*C2 into (C1*C2)/X.  This transformation may change
- the result for floating point types due to rounding so it is applied
- only if -fassociative-math was specify.  */
-	  if (flag_associative_math
-	  && TREE_CODE (arg0) == RDIV_EXPR
-	  && TREE_CODE (arg1) == REAL_CST
-	  && TREE_CODE (TREE_OPERAND (arg0, 0)) == REAL_CST)
-	{
-	  tree tem = const_binop (MULT_EXPR, TREE_OPERAND (arg0, 0),
-  arg1);
-	  if (tem)
-		return fold_build2_loc (loc, RDIV_EXPR, type, tem,
-TREE_OPERAND (arg0, 1));
-	}
-
   /* Strip sign operations from X in X*X, i.e. -Y*-Y -> Y*Y.  */
 	  if (operand_equal_p (arg0, arg1, 0))
 	{
@@ -10013,28 +9932,6 @@ fold_binary_loc (location_t loc,
 arg1);
 	}
 
-  /* (X & ~Y) | (~X & Y) is X ^ Y */
-  if (TREE_CODE (arg0) == BIT_AND_EXPR
-	  && TREE_CODE (arg1) == BIT_AND_EXPR)
-{
-	  tree a0, a1, l0, l1, n0, n1;
-
-	  a0 = fold_convert_loc (loc, type, TREE_OPERAND (arg1, 0));
-	  a1 = fold_convert_loc (loc, type, TREE_OPERAND (arg1, 1));
-
-	  l0 = fold_convert_loc (loc, type,

Re: [PR67891] drop is_gimple_reg test from set_parm_rtl

2015-10-13 Thread Alexandre Oliva

On Oct 12, 2015, Richard Biener  wrote:

> On Sat, Oct 10, 2015 at 3:16 PM, Alexandre Oliva  wrote:
>> On Oct  9, 2015, Richard Biener  wrote:
>> 
>>> Ok.  Note that I think emit_block_move shouldn't mess with the addressable 
>>> flag.
>> 
>> I have successfully tested a patch that stops it from doing so,
>> reverting https://gcc.gnu.org/bugzilla/show_bug.cgi?id=49429#c11 but
>> according to bugs 49429 and 49454, it looks like removing it would mess
>> with escape analysis introduced in r175063 for bug 44194.  The thread
>> that introduces the mark_addressable calls suggests some discomfort with
>> this solution, and even a suggestion that the markings should be
>> deferred past the end of expand, but in the end there was agreement to
>> go with it.  https://gcc.gnu.org/ml/gcc-patches/2011-06/msg01746.html

> Aww, indeed.  Of course the issue is that we don't track pointers to the
> stack introduced during RTL properly.

> Thanks for checking.  Might want to add a comment before that
> addressable setting now that you've done the archeology.

I decided to give the following approach a try instead.  The following
patch was regstrapped on x86_64-linux-gnu and i686-linux-gnu.
Ok to install?

Would anyone with access to hpux (pa and ia64 are both affected) give it
a spin?


defer mark_addressable calls during expand till the end of expand

From: Alexandre Oliva 

for  gcc/ChangeLog

* gimple-expr.c: Include hash-set.h and rtl.h.
(mark_addressable_queue): New var.
(mark_addressable): Factor actual marking into...
(mark_addressable_1): ... this.  Queue it up during expand.
(mark_addressable_2): New.
(flush_mark_addressable_queue): New.
* gimple-expr.h (flush_mark_addressable_queue): Declare.
* cfgexpand.c: Include gimple-expr.h.
(pass_expand::execute): Flush mark_addressable queue.
---
 gcc/cfgexpand.c   |3 +++
 gcc/gimple-expr.c |   50 --
 gcc/gimple-expr.h |1 +
 3 files changed, 52 insertions(+), 2 deletions(-)

diff --git a/gcc/cfgexpand.c b/gcc/cfgexpand.c
index eaad859..a362e17 100644
--- a/gcc/cfgexpand.c
+++ b/gcc/cfgexpand.c
@@ -51,6 +51,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "internal-fn.h"
 #include "tree-eh.h"
 #include "gimple-iterator.h"
+#include "gimple-expr.h"
 #include "gimple-walk.h"
 #include "cgraph.h"
 #include "tree-cfg.h"
@@ -6373,6 +6374,8 @@ pass_expand::execute (function *fun)
   /* We're done expanding trees to RTL.  */
   currently_expanding_to_rtl = 0;
 
+  flush_mark_addressable_queue ();
+
   FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (fun)->next_bb,
  EXIT_BLOCK_PTR_FOR_FN (fun), next_bb)
 {
diff --git a/gcc/gimple-expr.c b/gcc/gimple-expr.c
index 2a6ba1a..db249a3 100644
--- a/gcc/gimple-expr.c
+++ b/gcc/gimple-expr.c
@@ -35,6 +35,8 @@ along with GCC; see the file COPYING3.  If not see
 #include "gimplify.h"
 #include "stor-layout.h"
 #include "demangle.h"
+#include "hash-set.h"
+#include "rtl.h"
 
 /* - Type related -  */
 
@@ -823,6 +825,50 @@ is_gimple_mem_ref_addr (tree t)
  || decl_address_invariant_p (TREE_OPERAND (t, 0);
 }
 
+/* Hold trees marked addressable during expand.  */
+
+static hash_set *mark_addressable_queue;
+
+/* Mark X as addressable or queue it up if called during expand.  */
+
+static void
+mark_addressable_1 (tree x)
+{
+  if (!currently_expanding_to_rtl)
+{
+  TREE_ADDRESSABLE (x) = 1;
+  return;
+}
+
+  if (!mark_addressable_queue)
+mark_addressable_queue = new hash_set();
+  mark_addressable_queue->add (x);
+}
+
+/* Adaptor for mark_addressable_1 for use in hash_set traversal.  */
+
+bool
+mark_addressable_2 (tree const , void * ATTRIBUTE_UNUSED = NULL)
+{
+  mark_addressable_1 (x);
+  return false;
+}
+
+/* Mark all queued trees as addressable, and empty the queue.  To be
+   called right after clearing CURRENTLY_EXPANDING_TO_RTL.  */
+
+void
+flush_mark_addressable_queue ()
+{
+  gcc_assert (!currently_expanding_to_rtl);
+  if (mark_addressable_queue)
+{
+  mark_addressable_queue->traverse (NULL);
+  delete mark_addressable_queue;
+  mark_addressable_queue = NULL;
+}
+}
+
 /* Mark X addressable.  Unlike the langhook we expect X to be in gimple
form and we don't do any syntax checking.  */
 
@@ -838,7 +884,7 @@ mark_addressable (tree x)
   && TREE_CODE (x) != PARM_DECL
   && TREE_CODE (x) != RESULT_DECL)
 return;
-  TREE_ADDRESSABLE (x) = 1;
+  mark_addressable_1 (x);
 
   /* Also mark the artificial SSA_NAME that points to the partition of X.  */
   if (TREE_CODE (x) == VAR_DECL
@@ -849,7 +895,7 @@ mark_addressable (tree x)
 {
   tree *namep = cfun->gimple_df->decls_to_pointers->get (x);
   if (namep)
-   TREE_ADDRESSABLE (*namep) = 1;
+

Re: [PATCH][AArch64] Update patterns to support FP zero

2015-10-13 Thread James Greenhalgh

On Thu, Oct 08, 2015 at 02:16:16PM +0100, Wilco Dijkstra wrote:
> This patch improves support for instructions that allow FP zero immediate. 
> All FP compares generated
> by various patterns should use aarch64_fp_compare_operand. LDP/STP uses 
> aarch64_reg_or_fp_zero.
> Passes regression on AArch64.
> 
> OK for commit?

OK.

Thanks,
James

> 
> ChangeLog:
> 2015-10-08  Wilco Dijkstra  
> 
>   * gcc/config/aarch64/aarch64.md (cbranch4):
>   Use aarch64_fp_compare_operand.
>   (store_pairsf): Use aarch64_reg_or_fp_zero.
>   (store_pairdf): Likewise.
>   (cstore4): Use aarch64_fp_compare_operand.
>   (cmov6): Likewise.
>   * gcc/config/aarch64/aarch64-ldpstp.md: Use aarch64_reg_or_fp_zero.
>

[gomp4.1] Disallow modifiers on linear clause except in declare simd

2015-10-13 Thread Jakub Jelinek

Hi!

Modifiers are only meaningful in declare simd construct, therefore
latest OpenMP 4.5 disallows them on simd/for constructs.

2015-10-13  Jakub Jelinek  

c/
* c-typeck.c (c_finish_omp_clauses): Disallow modifiers on simd/for
constructs.
cp/
* semantics.c (finish_omp_clauses): Disallow modifiers on simd/for
constructs.
testsuite/
* c-c++-common/gomp/linear-1.c: New test.
* g++.dg/gomp/linear-1.C: New test.

--- gcc/c/c-typeck.c.jj 2015-10-09 11:17:17.0 +0200
+++ gcc/c/c-typeck.c2015-10-09 19:05:48.433813759 +0200
@@ -12470,6 +12470,14 @@ c_finish_omp_clauses (tree clauses, bool
  if (!declare_simd)
need_implicitly_determined = true;
  t = OMP_CLAUSE_DECL (c);
+ if (!declare_simd
+ && OMP_CLAUSE_LINEAR_KIND (c) != OMP_CLAUSE_LINEAR_DEFAULT)
+   {
+ error_at (OMP_CLAUSE_LOCATION (c),
+   "modifier should not be specified in % "
+   "clause on % or % constructs");
+ OMP_CLAUSE_LINEAR_KIND (c) = OMP_CLAUSE_LINEAR_DEFAULT;
+   }
  if (!INTEGRAL_TYPE_P (TREE_TYPE (t))
  && TREE_CODE (TREE_TYPE (t)) != POINTER_TYPE)
{
--- gcc/cp/semantics.c.jj   2015-10-09 11:19:37.0 +0200
+++ gcc/cp/semantics.c  2015-10-09 19:07:12.604636561 +0200
@@ -5725,6 +5725,14 @@ finish_omp_clauses (tree clauses, bool a
case OMP_CLAUSE_LINEAR:
  field_ok = allow_fields;
  t = OMP_CLAUSE_DECL (c);
+ if (!declare_simd
+ && OMP_CLAUSE_LINEAR_KIND (c) != OMP_CLAUSE_LINEAR_DEFAULT)
+   {
+ error_at (OMP_CLAUSE_LOCATION (c),
+   "modifier should not be specified in % "
+   "clause on % or % constructs");
+ OMP_CLAUSE_LINEAR_KIND (c) = OMP_CLAUSE_LINEAR_DEFAULT;
+   }
  if ((VAR_P (t) || TREE_CODE (t) == PARM_DECL)
  && !type_dependent_expression_p (t))
{
--- gcc/testsuite/c-c++-common/gomp/linear-1.c.jj   2015-10-12 
10:59:33.058013750 +0200
+++ gcc/testsuite/c-c++-common/gomp/linear-1.c  2015-10-12 11:25:31.633491633 
+0200
@@ -0,0 +1,42 @@
+/* { dg-do compile } */
+/* { dg-options "-fopenmp" } */
+
+int i;
+
+#pragma omp declare simd linear (val (x) : 1) linear (y : 2)
+int bar (int x, int y, int z);
+
+void
+foo (int x, int y)
+{
+  #pragma omp simd linear (i: 3)
+  for (i = 0; i < 33; i += 3)
+;
+  #pragma omp simd linear (val (i): 3) /* { dg-error "modifier should 
not be specified in" } */
+  for (i = 0; i < 33; i += 3)
+;
+  #pragma omp simd linear (x: y + 1)
+  for (i = 0; i < 10; i++)
+x += y + 1;
+  #pragma omp simd linear (val (x): y + 1) /* { dg-error "modifier should 
not be specified in" } */
+  for (i = 0; i < 10; i++)
+x += y + 1;
+  #pragma omp for linear (x: y + 1)
+  for (i = 0; i < 10; i++)
+x += y + 1;
+  #pragma omp for linear (val (x): y + 1)  /* { dg-error "modifier should 
not be specified in" } */
+  for (i = 0; i < 10; i++)
+x += y + 1;
+  #pragma omp for simd linear (i: 3)
+  for (i = 0; i < 33; i += 3)
+;
+  #pragma omp for simd linear (val (i): 3) /* { dg-error "modifier should 
not be specified in" } */
+  for (i = 0; i < 33; i += 3)
+;
+  #pragma omp for simd linear (x: y + 1)
+  for (i = 0; i < 10; i++)
+x += y + 1;
+  #pragma omp for simd linear (val (x): y + 1) /* { dg-error "modifier should 
not be specified in" } */
+  for (i = 0; i < 10; i++)
+x += y + 1;
+}
--- gcc/testsuite/g++.dg/gomp/linear-1.C.jj 2015-10-12 11:37:13.327880757 
+0200
+++ gcc/testsuite/g++.dg/gomp/linear-1.C2015-10-12 11:39:16.052024819 
+0200
@@ -0,0 +1,48 @@
+// { dg-do compile }
+// { dg-options "-fopenmp" }
+
+int i;
+
+#pragma omp declare simd linear (ref (x) : 1) linear (uval (y) : 2)
+int bar (int , int , int z);
+
+void
+foo (int , int )
+{
+  #pragma omp simd linear (x: y + 1)
+  for (i = 0; i < 10; i++)
+x += y + 1;
+  #pragma omp simd linear (val (x): y + 1) // { dg-error "modifier should 
not be specified in" }
+  for (i = 0; i < 10; i++)
+x += y + 1;
+  #pragma omp simd linear (ref (x): y + 1) // { dg-error "modifier should 
not be specified in" }
+  for (i = 0; i < 10; i++)
+x += y + 1;
+  #pragma omp simd linear (uval (x): y + 1)// { dg-error "modifier should 
not be specified in" }
+  for (i = 0; i < 10; i++)
+x += y + 1;
+  #pragma omp for linear (x: y + 1)
+  for (i = 0; i < 10; i++)
+x += y + 1;
+  #pragma omp for linear (val (x): y + 1)  // { dg-error "modifier should 
not be specified in" }
+  for (i = 0; i < 10; i++)
+x += y + 1;
+  #pragma omp for linear (ref (x): y + 1)  // { dg-error "modifier should 
not be specified in" }
+  for (i = 0; i < 10; i++)
+x += y + 1;
+  #pragma omp for linear (uval (x): y + 1) // { dg-error "modifier should

[PATCH] More vectorizer TLC

2015-10-13 Thread Richard Biener


Bootstrapped and tested on x86_64-unknown-linux-gnu, applied.

Richard.

2015-10-13  Richard Biener  

* tree-vect-data-refs.c (vect_analyze_data_ref_dependences): Allocate
the data dependence vector.
(vect_peeling_hash_insert): Get the peeling hash table as argument.
(vect_peeling_hash_get_lowest_cost): Likewise.
(vect_enhance_data_refs_alignment): Adjust.
(struct _vect_peel_info, struct _vect_peel_extended_info,
struct peel_info_hasher): Move from ...
* tree-vectorizer.h: ... here.
(LOOP_VINFO_COST_MODEL_MIN_ITERS): Remove.
(LOOP_VINFO_PEELING_HTAB): Likewise.
(struct _loop_vec_info): Remove min_profitable_iters and
peeling_htab members.
* tree-vect-loop.c (new_loop_vec_info): Do not allocate vectors
here.
(destroy_loop_vec_info): Adjust.
(vect_analyze_loop_2): Do not set LOOP_VINFO_COST_MODEL_MIN_ITERS.
(vect_estimate_min_profitable_iters): Use LOOP_VINFO_COMP_ALIAS_DDRS
to estimate alias versioning cost.
* tree-vect-slp.c (vect_analyze_slp_cost): Dump header.


Index: gcc/tree-vect-data-refs.c
===
*** gcc/tree-vect-data-refs.c   (revision 228709)
--- gcc/tree-vect-data-refs.c   (working copy)
*** vect_analyze_data_ref_dependences (loop_
*** 468,473 
--- 468,476 
  dump_printf_loc (MSG_NOTE, vect_location,
   "=== vect_analyze_data_ref_dependences ===\n");
  
+   LOOP_VINFO_DDRS (loop_vinfo)
+ .create (LOOP_VINFO_DATAREFS (loop_vinfo).length ()
+* LOOP_VINFO_DATAREFS (loop_vinfo).length ());
LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo) = true;
if (!compute_all_dependences (LOOP_VINFO_DATAREFS (loop_vinfo),
_VINFO_DDRS (loop_vinfo),
*** vect_get_data_access_cost (struct data_r
*** 1039,1048 
  }
  
  
  /* Insert DR into peeling hash table with NPEEL as key.  */
  
  static void
! vect_peeling_hash_insert (loop_vec_info loop_vinfo, struct data_reference *dr,
int npeel)
  {
struct _vect_peel_info elem, *slot;
--- 1042,1089 
  }
  
  
+ typedef struct _vect_peel_info
+ {
+   int npeel;
+   struct data_reference *dr;
+   unsigned int count;
+ } *vect_peel_info;
+ 
+ typedef struct _vect_peel_extended_info
+ {
+   struct _vect_peel_info peel_info;
+   unsigned int inside_cost;
+   unsigned int outside_cost;
+   stmt_vector_for_cost body_cost_vec;
+ } *vect_peel_extended_info;
+ 
+ 
+ /* Peeling hashtable helpers.  */
+ 
+ struct peel_info_hasher : free_ptr_hash <_vect_peel_info>
+ {
+   static inline hashval_t hash (const _vect_peel_info *);
+   static inline bool equal (const _vect_peel_info *, const _vect_peel_info *);
+ };
+ 
+ inline hashval_t
+ peel_info_hasher::hash (const _vect_peel_info *peel_info)
+ {
+   return (hashval_t) peel_info->npeel;
+ }
+ 
+ inline bool
+ peel_info_hasher::equal (const _vect_peel_info *a, const _vect_peel_info *b)
+ {
+   return (a->npeel == b->npeel);
+ }
+ 
+ 
  /* Insert DR into peeling hash table with NPEEL as key.  */
  
  static void
! vect_peeling_hash_insert (hash_table *peeling_htab,
! loop_vec_info loop_vinfo, struct data_reference *dr,
int npeel)
  {
struct _vect_peel_info elem, *slot;
*** vect_peeling_hash_insert (loop_vec_info
*** 1050,1056 
bool supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
  
elem.npeel = npeel;
!   slot = LOOP_VINFO_PEELING_HTAB (loop_vinfo)->find ();
if (slot)
  slot->count++;
else
--- 1091,1097 
bool supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
  
elem.npeel = npeel;
!   slot = peeling_htab->find ();
if (slot)
  slot->count++;
else
*** vect_peeling_hash_insert (loop_vec_info
*** 1059,1066 
slot->npeel = npeel;
slot->dr = dr;
slot->count = 1;
!   new_slot
!   = LOOP_VINFO_PEELING_HTAB (loop_vinfo)->find_slot (slot, 
INSERT);
*new_slot = slot;
  }
  
--- 1100,1106 
slot->npeel = npeel;
slot->dr = dr;
slot->count = 1;
!   new_slot = peeling_htab->find_slot (slot, INSERT);
*new_slot = slot;
  }
  
*** vect_peeling_hash_get_lowest_cost (_vect
*** 1164,1170 
 option that aligns as many accesses as possible.  */
  
  static struct data_reference *
! vect_peeling_hash_choose_best_peeling (loop_vec_info loop_vinfo,
 unsigned int *npeel,
   stmt_vector_for_cost *body_cost_vec)
  {
--- 1204,1211 
 option that aligns as many accesses as possible.  */
  
  static struct data_reference *
! vect_peeling_hash_choose_best_peeling (hash_table 
*peeling_htab,
!

[gomp4.1] Add testcase for negative linear step

2015-10-13 Thread Jakub Jelinek

Hi!

The spec has been confusing and in one spot said that a linear step
on declare simd has to be constant positive integer, which doesn't
make sense, negative steps are just fine.  As that is what we had
implemented, this patch just adds a testcase for it.

2015-10-13  Jakub Jelinek  

* gcc.dg/vect/vect-simd-clone-15.c: New test.

--- gcc/testsuite/gcc.dg/vect/vect-simd-clone-15.c.jj   2015-10-12 
14:02:05.672431442 +0200
+++ gcc/testsuite/gcc.dg/vect/vect-simd-clone-15.c  2015-10-12 
14:07:13.383734571 +0200
@@ -0,0 +1,39 @@
+/* { dg-require-effective-target vect_simd_clones } */
+/* { dg-additional-options "-fopenmp-simd" } */
+/* { dg-additional-options "-mavx" { target avx_runtime } } */
+
+#include "tree-vect.h"
+
+#ifndef N
+#define N 1024
+#endif
+
+int array[N];
+
+#pragma omp declare simd linear(val(b):-3), notinbranch
+__attribute__((noinline)) int
+foo (int a, int b)
+{
+  return a + b;
+}
+
+__attribute__((noinline, noclone)) void
+bar ()
+{
+  int i;
+#pragma omp simd
+  for (i = 0; i < N; ++i)
+array[i] = foo (i >> 1, -i * 3);
+}
+
+int
+main ()
+{
+  int i;
+  check_vect ();
+  bar ();
+  for (i = 0; i < N; i++)
+if (array[i] != ((i >> 1) + (-3 * i)))
+  abort ();
+  return 0;
+}

Jakub

Re: Do not use TYPE_CANONICAL in useless_type_conversion

2015-10-13 Thread Richard Biener

On Tue, 13 Oct 2015, Alexandre Oliva wrote:

> On Oct  9, 2015, Jan Hubicka  wrote:
> 
> > ... we initialize mode to be non-VOIDmode only if the field is not 
> > bitfield. I missed
> > the flag while looking at the dump.  Indeed the DECL_MODE if FIELD_DECL is 
> > SImode,
> > but it is ignored.
> 
> > Hmm, it seems that for CALL_EXPR the register is supposed to be non-BLKmode
> > already.  So I guess only what we need to do is to consider bifields when 
> > TEMP is blk mode and then we want to convert? what about this?
> 
> How about using in store_bit_field the same logic you added to
> store_expr_with_bounds to get input MEMs to have a compatible mode?
> This patch was regstrapped on i686-linux-gnu and x86_64-linux-gnu.  Ok
> to install?

Ok.

Thanks,
Richard.

> support BLKmode inputs for store_bit_field
> 
> From: Alexandre Oliva 
> 
> Revision 228586 changed useless_type_conversion_p and added mode
> changes for MEM:BLKmode inputs in store_expr_with_bounds, but it
> missed store_bit_field.  This caused ada/rts/s-regpat.ads to fail
> compilation on x86_64-linux-gnu.
> 
> for  gcc/ChangeLog
> 
>   * expmed.c (store_bit_field_1): Adjust mode of BLKmode inputs.
> ---
>  gcc/expmed.c |8 
>  1 file changed, 8 insertions(+)
> 
> diff --git a/gcc/expmed.c b/gcc/expmed.c
> index 93cf508..69ea511 100644
> --- a/gcc/expmed.c
> +++ b/gcc/expmed.c
> @@ -757,6 +757,14 @@ store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT 
> bitsize,
>}
>}
>  
> +  /* We allow move between structures of same size but different mode.
> + If source is in memory and the mode differs, simply change the memory.  
> */
> +  if (GET_MODE (value) == BLKmode && GET_MODE (op0) != BLKmode)
> +{
> +  gcc_assert (MEM_P (value));
> +  value = adjust_address_nv (value, GET_MODE (op0), 0);
> +}
> +
>/* Storing an lsb-aligned field in a register
>   can be done with a movstrict instruction.  */
>  
> 
> 
> 

-- 
Richard Biener 
SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 
21284 (AG Nuernberg)

[gomp4.1] Fixup nesting diagnostics for ordered depend, allow ordered without param on for simd

2015-10-13 Thread Jakub Jelinek

Hi!

For ordered depend we unfortunately need to diagnose incorrect length or
contents of the sink vec before OpenMP region nesting is diagnosed,
so this patch diagnoses the incorrect nesting before the diagnostics,
to avoid confusing errors.  Also the patch allows ordered clause
on for simd, but only if ordered has no parameter.  To be used with
#pragma omp ordered threads simd construct.

2015-10-13  Jakub Jelinek  

* gimplify.c (gimplify_omp_ordered): Diagnose ordered depend
inside loop without ordered(n) clause.
* omp-low.c (check_omp_nesting_restrictions): Use a different
wording if ordered clause is present, but without parameter.
c/
* c-parser.c (c_parser_omp_simd): Allow ordered clause on
for simd, but ensure no parameter is specified for it.
cp/
* parser.c (cp_parser_omp_simd): Allow ordered clause on
for simd, but ensure no parameter is specified for it.
testsuite/
* c-c++-common/gomp/ordered-3.c: New test.

--- gcc/gimplify.c.jj   2015-10-12 12:37:32.0 +0200
+++ gcc/gimplify.c  2015-10-12 13:43:30.732469344 +0200
@@ -8749,7 +8749,18 @@ gimplify_omp_ordered (tree expr, gimple_
   if (gimplify_omp_ctxp)
 for (c = OMP_ORDERED_CLAUSES (expr); c; c = OMP_CLAUSE_CHAIN (c))
   if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_DEPEND
- && OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SINK)
+ && gimplify_omp_ctxp->loop_iter_var.is_empty ()
+ && (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SINK
+ || OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SOURCE))
+   {
+ error_at (OMP_CLAUSE_LOCATION (c),
+   "% clause must be closely nested "
+   "inside a loop with % clause with "
+   "a parameter");
+ failures++;
+   }
+  else if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_DEPEND
+  && OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SINK)
{
  bool fail = false;
  for (decls = OMP_CLAUSE_DECL (c), i = 0;
--- gcc/omp-low.c.jj2015-10-12 12:21:32.0 +0200
+++ gcc/omp-low.c   2015-10-12 13:40:39.750083213 +0200
@@ -3375,14 +3375,21 @@ check_omp_nesting_restrictions (gimple s
  || gimple_code (ctx->stmt) != GIMPLE_OMP_FOR
  || (oclause
= find_omp_clause (gimple_omp_for_clauses (ctx->stmt),
-  OMP_CLAUSE_ORDERED)) == NULL_TREE
- || OMP_CLAUSE_ORDERED_EXPR (oclause) == NULL_TREE)
+  OMP_CLAUSE_ORDERED)) == NULL_TREE)
{
  error_at (OMP_CLAUSE_LOCATION (c),
"% clause must be closely nested "
"inside an ordered loop");
  return false;
}
+ else if (OMP_CLAUSE_ORDERED_EXPR (oclause) == NULL_TREE)
+   {
+ error_at (OMP_CLAUSE_LOCATION (c),
+   "% clause must be closely nested "
+   "inside a loop with % clause with "
+   "a parameter");
+ return false;
+   }
}
  else
{
--- gcc/c/c-parser.c.jj 2015-10-09 09:26:12.0 +0200
+++ gcc/c/c-parser.c2015-10-12 13:26:52.275752160 +0200
@@ -14009,13 +14009,21 @@ c_parser_omp_simd (location_t loc, c_par
 
   strcat (p_name, " simd");
   mask |= OMP_SIMD_CLAUSE_MASK;
-  mask &= ~(OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_ORDERED);
 
   clauses = c_parser_omp_all_clauses (parser, mask, p_name, cclauses == NULL);
   if (cclauses)
 {
   omp_split_clauses (loc, OMP_SIMD, mask, clauses, cclauses);
   clauses = cclauses[C_OMP_CLAUSE_SPLIT_SIMD];
+  tree c = find_omp_clause (cclauses[C_OMP_CLAUSE_SPLIT_FOR],
+   OMP_CLAUSE_ORDERED);
+  if (c && OMP_CLAUSE_ORDERED_EXPR (c))
+   {
+ error_at (OMP_CLAUSE_LOCATION (c),
+   "% clause with parameter may not be specified "
+   "on %qs construct", p_name);
+ OMP_CLAUSE_ORDERED_EXPR (c) = NULL_TREE;
+   }
 }
 
   block = c_begin_compound_stmt (true);
--- gcc/cp/parser.c.jj  2015-10-09 09:27:22.0 +0200
+++ gcc/cp/parser.c 2015-10-12 13:27:43.600966307 +0200
@@ -32527,7 +32527,6 @@ cp_parser_omp_simd (cp_parser *parser, c
 
   strcat (p_name, " simd");
   mask |= OMP_SIMD_CLAUSE_MASK;
-  mask &= ~(OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_ORDERED);
 
   clauses = cp_parser_omp_all_clauses (parser, mask, p_name, pragma_tok,
   cclauses == NULL);
@@ -32535,6 +32534,15 @@ cp_parser_omp_simd (cp_parser *parser, c
 {
   cp_omp_split_clauses (loc, OMP_SIMD, mask, clauses, cclauses);
   clauses = cclauses[C_OMP_CLAUSE_SPLIT_SIMD];
+  tree c

[gomp4.1] Various ordered, linear and copyin tweaks

2015-10-13 Thread Jakub Jelinek

Hi!

So, OpenMP 4.5 says that:
1) linear can be only specified for the loop iterator on
   distribute {, parallel for} simd (because distribute can't
   do firstprivate + lastprivate)
2) linear can't be specified at all on distribute parallel for
3) linear can't be specified on doacross loops (ordered(n) clause)
4) ordered can't be specified on distribute parallel for{, simd}
   (as no synchronization exists between contention groups)
5) copyin can't be specified on target parallel{, for, for simd}
   (as threadprivate is not supported in target regions)

This patch adds diagnostics for those restrictions and tweaks the testsuite.

2015-10-13  Jakub Jelinek  

gcc/
* gimplify.c (gimplify_scan_omp_clauses): Diagnose linear
clause on combined distribute {, parallel for} simd construct,
unless it is the loop iterator.
gcc/c/
* c-parser.c (c_parser_omp_for_loop): Disallow linear clause
if ordered(n) is present.
(c_parser_omp_for): Disallow ordered clause if combined with
distribute.  Disallow linear clause if not combined with
simd and combined with distribute.
(c_parser_omp_parallel): Disallow copyin clause on
target parallel{, for, for simd}.
gcc/cp/
* parser.c (cp_parser_omp_for_loop): Disallow linear clause
if ordered(n) is present.
(cp_parser_omp_for): Disallow ordered clause if combined with
distribute.  Disallow linear clause if not combined with
simd and combined with distribute.
(cp_parser_omp_parallel): Disallow copyin clause on
target parallel{, for, for simd}.
gcc/testsuite/
* c-c++-common/gomp/clauses-1.c (bar): Remove linear
and/or ordered clauses where they are no longer allowed.
* c-c++-common/gomp/clauses-4.c: New test.
* c-c++-common/gomp/pr61486-1.c (foo): Remove linear clause
on non-iterator.
* c-c++-common/gomp/pr61486-2.c (test, test2): Remove ordered
clause and ordered construct where no longer allowed.
libgomp/
* testsuite/libgomp.c/pr66199-2.c (f2): Adjust for linear clause
only allowed on the loop iterator.
* testsuite/libgomp.c/pr66199-4.c (f2): Adjust for linear clause
no longer allowed.
* testsuite/libgomp.c/linear-2.c: Remove.
* testsuite/libgomp.c++/linear-2.C: Remove.

--- gcc/gimplify.c.jj   2015-10-12 13:43:30.0 +0200
+++ gcc/gimplify.c  2015-10-12 19:19:34.563575391 +0200
@@ -6340,6 +6340,36 @@ gimplify_scan_omp_clauses (tree *list_p,
}
  else
{
+ if (code == OMP_SIMD
+ && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
+   {
+ struct gimplify_omp_ctx *octx = outer_ctx;
+ if (octx
+ && octx->region_type == ORT_WORKSHARE
+ && octx->combined_loop
+ && !octx->distribute)
+   {
+ if (octx->outer_context
+ && (octx->outer_context->region_type
+ == ORT_COMBINED_PARALLEL))
+   octx = octx->outer_context->outer_context;
+ else
+   octx = octx->outer_context;
+   }
+ if (octx
+ && octx->region_type == ORT_WORKSHARE
+ && octx->combined_loop
+ && octx->distribute
+ && !lang_GNU_Fortran ())
+   {
+ error_at (OMP_CLAUSE_LOCATION (c),
+   "% clause for variable other than "
+   "loop iterator specified on construct "
+   "combined with %");
+ remove = true;
+ break;
+   }
+   }
  /* For combined #pragma omp parallel for simd, need to put
 lastprivate and perhaps firstprivate too on the
 parallel.  Similarly for #pragma omp for simd.  */
--- gcc/c/c-parser.c.jj 2015-10-12 13:26:52.0 +0200
+++ gcc/c/c-parser.c2015-10-12 18:27:03.321992027 +0200
@@ -13662,6 +13662,19 @@ c_parser_omp_for_loop (location_t loc, c
= build_int_cst (NULL_TREE, collapse);
   ordered = collapse;
 }
+  if (ordered)
+{
+  for (tree *pc =  *pc; )
+   if (OMP_CLAUSE_CODE (*pc) == OMP_CLAUSE_LINEAR)
+ {
+   error_at (OMP_CLAUSE_LOCATION (*pc),
+ "% clause may not be specified together "
+ "with % clause with a parameter");
+   *pc = OMP_CLAUSE_CHAIN (*pc);
+ }
+   else
+ pc = _CLAUSE_CHAIN (*pc);
+}
 
   gcc_assert (collapse >= 1 && ordered >= 0);
   count = ordered ? ordered : collapse;
@@ -14066,6 +14079,9 @@ c_parser_omp_for (location_t loc, c_pars
   mask

Re: Do not use TYPE_CANONICAL in useless_type_conversion

2015-10-13 Thread Alexandre Oliva

On Oct  9, 2015, Jan Hubicka  wrote:

> ... we initialize mode to be non-VOIDmode only if the field is not bitfield. 
> I missed
> the flag while looking at the dump.  Indeed the DECL_MODE if FIELD_DECL is 
> SImode,
> but it is ignored.

> Hmm, it seems that for CALL_EXPR the register is supposed to be non-BLKmode
> already.  So I guess only what we need to do is to consider bifields when 
> TEMP is blk mode and then we want to convert? what about this?

How about using in store_bit_field the same logic you added to
store_expr_with_bounds to get input MEMs to have a compatible mode?
This patch was regstrapped on i686-linux-gnu and x86_64-linux-gnu.  Ok
to install?


support BLKmode inputs for store_bit_field

From: Alexandre Oliva 

Revision 228586 changed useless_type_conversion_p and added mode
changes for MEM:BLKmode inputs in store_expr_with_bounds, but it
missed store_bit_field.  This caused ada/rts/s-regpat.ads to fail
compilation on x86_64-linux-gnu.

for  gcc/ChangeLog

* expmed.c (store_bit_field_1): Adjust mode of BLKmode inputs.
---
 gcc/expmed.c |8 
 1 file changed, 8 insertions(+)

diff --git a/gcc/expmed.c b/gcc/expmed.c
index 93cf508..69ea511 100644
--- a/gcc/expmed.c
+++ b/gcc/expmed.c
@@ -757,6 +757,14 @@ store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT 
bitsize,
   }
   }
 
+  /* We allow move between structures of same size but different mode.
+ If source is in memory and the mode differs, simply change the memory.  */
+  if (GET_MODE (value) == BLKmode && GET_MODE (op0) != BLKmode)
+{
+  gcc_assert (MEM_P (value));
+  value = adjust_address_nv (value, GET_MODE (op0), 0);
+}
+
   /* Storing an lsb-aligned field in a register
  can be done with a movstrict instruction.  */
 


-- 
Alexandre Oliva, freedom fighterhttp://FSFLA.org/~lxoliva/
You must be the change you wish to see in the world. -- Gandhi
Be Free! -- http://FSFLA.org/   FSF Latin America board member
Free Software Evangelist|Red Hat Brasil GNU Toolchain Engineer

[gomp4.1] Add testcase for map clause bit-field diagnostics

2015-10-13 Thread Jakub Jelinek

Hi!

Bit-fields are for obvious reasons disallowed in map clause (one can't take
address of them, it would be too complicated and racy to move them to and
from device), but we didn't have a testcase for it.

2015-10-13  Jakub Jelinek  

* c-c++-common/gomp/map-3.c: New test.

--- gcc/testsuite/c-c++-common/gomp/map-3.c.jj  2015-10-12 13:59:09.872114839 
+0200
+++ gcc/testsuite/c-c++-common/gomp/map-3.c 2015-10-12 13:58:25.0 
+0200
@@ -0,0 +1,21 @@
+struct S { int i : 1; int j : 4; long long k : 25; };
+void bar (struct S, int);
+#pragma omp declare target to (bar)
+
+void
+foo (struct S a, struct S b, struct S c, struct S d)
+{
+  #pragma omp target map (a)
+  bar (a, 0);
+  #pragma omp target map (a) map (b.i) /* { dg-error "bit-field 
.b.\(S::\|\)i. in .map. clause" } */
+  bar (a, b.i);
+  #pragma omp target map (a) map (b.j) /* { dg-error "bit-field 
.b.\(S::\|\)j. in .map. clause" } */
+  bar (a, b.j);
+  #pragma omp target map (a) map (b.k) /* { dg-error "bit-field 
.b.\(S::\|\)k. in .map. clause" } */
+  bar (a, b.k);
+  #pragma omp target data map (a) map (b.i)/* { dg-error "bit-field 
.b.\(S::\|\)i. in .map. clause" } */
+  {
+#pragma omp target enter data map (alloc: a) map (to: c.j) /* { 
dg-error "bit-field .c.\(S::\|\)j. in .map. clause" } */
+#pragma omp target exit data map (release: a) map (from: d.k)  /* { 
dg-error "bit-field .d.\(S::\|\)k. in .map. clause" } */
+  }
+}

Jakub

Re: [PATCH ARM]: PR67745: Fix function alignment after attribute 2/2

2015-10-13 Thread Ramana Radhakrishnan


> 
> yes I see, I was hoping to avoid a new hook, but as you said it seems 
> mandatory for the mere declaration case.
> 
> Here is one proposal, it defaults to nothing and the ARM implementation does 
> not need to handle the vptr bit setting. so that simplifies a lot the things.
> 
> The hook is called from rest_of_decl_compilation for mere declarations and 
> allocate_struct_function for definitions.


I'm not sure we have testsuite coverage for this -  can you add a test or 2 ?

> 
> 
> 
> 
> 
> 
> 
> align_hook.patch
> 
> 
> 2015-09-29  Christian Bruel  
> 
>   PR target/67745
>   * config/arm/arm.h (FUNCTION_BOUNDARY): Use FUNCTION_BOUNDARY_P.
>   (FUNCTION_BOUNDARY_P): New macro:
>   * config/arm/arm.c (TARGET_RELAYOUT_FUNCTION, arm_relayout_function): 
>   New hook.

The ARM changes look ok to me. Please as usual watch out for any regressions.



>   * doc/tm.texi.in (TARGET_RELAYOUT_FUNCTION): Document.
>   * doc/tm.texi (TARGET_RELAYOUT_FUNCTION): New hook.
>   * gcc/target.def (TARGET_RELAYOUT_FUNCTION): Likewise.
>   * gcc/function.c (allocate_struct_function): Call relayout_function 
> hook.
>   * gcc/passes.c (rest_of_decl_compilation): Likewise.
>   * gcc/targhooks.c (default_relayout_function): New function.
>   * gcc/targhooks.h (default_relayout_function): Declare.
> 


> --- gnu_trunk.p0/gcc/gcc/config/arm/arm.c 2015-10-12 10:34:27.599740376 
> +0200
> +++ gnu_trunk.devs/gcc/gcc/config/arm/arm.c   2015-10-12 12:26:51.607398021 
> +0200
> @@ -250,6 +250,7 @@ static void arm_override_options_after_c
>  static void arm_option_print (FILE *, int, struct cl_target_option *);
>  static void arm_set_current_function (tree);
>  static bool arm_can_inline_p (tree, tree);
> +static void arm_relayout_function (tree);
>  static bool arm_valid_target_attribute_p (tree, tree, tree, int);
>  static unsigned HOST_WIDE_INT arm_shift_truncation_mask (machine_mode);
>  static bool arm_macro_fusion_p (void);
> @@ -405,6 +406,9 @@ static const struct attribute_spec arm_a
>  #undef TARGET_CAN_INLINE_P
>  #define TARGET_CAN_INLINE_P arm_can_inline_p
>  
> +#undef TARGET_RELAYOUT_FUNCTION
> +#define TARGET_RELAYOUT_FUNCTION arm_relayout_function
> +
>  #undef  TARGET_OPTION_OVERRIDE
>  #define TARGET_OPTION_OVERRIDE arm_option_override
>  
> @@ -29825,6 +29829,23 @@ arm_can_inline_p (tree caller ATTRIBUTE_
>return true;
>  }
>  
> +/* Hook to fix function's alignment affected by target attribute.  */
> +
> +static void
> +arm_relayout_function (tree fndecl)
> +{
> +  if (DECL_USER_ALIGN (fndecl))
> +return;
> +
> +  tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
> +
> +  if (!callee_tree)
> +callee_tree = target_option_default_node;
> +
> +  DECL_ALIGN (fndecl) =
> +FUNCTION_BOUNDARY_P (TREE_TARGET_OPTION (callee_tree)->x_target_flags);
> +}
> +
>  /* Inner function to process the attribute((target(...))), take an argument 
> and
> set the current options from the argument.  If we have a list, recursively
> go over the list.  */
> diff '--exclude=.svn' '--exclude=*~' -r -up 
> gnu_trunk.p0/gcc/gcc/config/arm/arm.h gnu_trunk.devs/gcc/gcc/config/arm/arm.h
> --- gnu_trunk.p0/gcc/gcc/config/arm/arm.h 2015-10-12 10:34:27.607740391 
> +0200
> +++ gnu_trunk.devs/gcc/gcc/config/arm/arm.h   2015-10-12 12:27:55.507546958 
> +0200
> @@ -565,7 +565,8 @@ extern int arm_arch_crc;
>  #define PREFERRED_STACK_BOUNDARY \
>  (arm_abi == ARM_ABI_ATPCS ? 64 : STACK_BOUNDARY)
>  
> -#define FUNCTION_BOUNDARY   (TARGET_THUMB ? 16 : 32)
> +#define FUNCTION_BOUNDARY_P(flags)  (TARGET_THUMB_P (flags) ? 16 : 32)
> +#define FUNCTION_BOUNDARY   (FUNCTION_BOUNDARY_P (target_flags))
>  
>  /* The lowest bit is used to indicate Thumb-mode functions, so the
> vbit must go into the delta field of pointers to member
> diff '--exclude=.svn' '--exclude=*~' -r -up gnu_trunk.p0/gcc/gcc/doc/tm.texi 
> gnu_trunk.devs/gcc/gcc/doc/tm.texi
> --- gnu_trunk.p0/gcc/gcc/doc/tm.texi  2015-10-12 10:33:29.907630642 +0200
> +++ gnu_trunk.devs/gcc/gcc/doc/tm.texi2015-10-12 12:33:33.880332253 
> +0200
> @@ -9985,6 +9985,10 @@ default, inlining is not allowed if the
>  specific target options and the caller does not use the same options.
>  @end deftypefn
>  
> +@deftypefn {Target Hook} void TARGET_RELAYOUT_FUNCTION (tree @var{fndecl})
> +This target hook fixes function @var{fndecl} after attributes are processed. 
> Default does nothing. On ARM, the default function's alignment is updated 
> with the attribute target.
> +@end deftypefn
> +
>  @node Emulated TLS
>  @section Emulating TLS
>  @cindex Emulated TLS
> diff '--exclude=.svn' '--exclude=*~' -r -up 
> gnu_trunk.p0/gcc/gcc/doc/tm.texi.in gnu_trunk.devs/gcc/gcc/doc/tm.texi.in
> --- gnu_trunk.p0/gcc/gcc/doc/tm.texi.in   2015-10-12 10:33:29.919630666 
> +0200
> +++ gnu_trunk.devs/gcc/gcc/doc/tm.texi.in 2015-10-12 11:28:16.350590629 
> +0200
> @@ -7274,6

Re: [PATCH][AArch64] Improve comparison with complex immediates followed by branch/cset

2015-10-13 Thread Kyrill Tkachov


On 08/10/15 09:54, Kyrill Tkachov wrote:

Hi all,

This patch slightly improves sequences where we want to compare against a 
complex immediate and branch against the result
or perform a cset on it.
This means transforming sequences of mov+movk+cmp+branch into sub+subs+branch.
Similar for cset. Unfortunately I can't just do this by simply matching a 
(compare (reg) (const_int)) rtx because
this transformation is only valid for equal/not equal comparisons, not greater 
than/less than ones but the compare instruction
pattern only has the general CC mode. We need to also match the use of the 
condition code.

I've done this by creating a splitter for the conditional jump where the 
condition is the comparison between the register
and the complex immediate and splitting it into the sub+subs+condjump sequence. 
Similar for the cstore pattern.
Thankfully we don't split immediate moves until later in the optimization 
pipeline so combine can still try the right patterns.
With this patch for the example code:
void g(void);
void f8(int x)
{
 if (x != 0x123456) g();
}

I get:
f8:
  sub w0, w0, #1191936
  subsw0, w0, #1110
  beq .L1
  b   g
  .p2align 3
.L1:
  ret

instead of the previous:
f8:
  mov w1, 13398
  movkw1, 0x12, lsl 16
  cmp w0, w1
  beq .L1
  b   g
  .p2align 3
.L1:
  ret


The condjump case triggered 130 times across all of SPEC2006 which is, 
admittedly, not much
whereas the cstore case didn't trigger at all. However, the included testcase 
in the patch
demonstrates the kind of code that it would trigger on.

Bootstrapped and tested on aarch64.

Ok for trunk?


There's a few changes I'd like to make to this patch and I'll post an updated 
version when it's ready.
So no need to review this version, besides getting the general idea of the 
transformation...

Sorry for the noise,
Kyrill



Thanks,
Kyrill


2015-10-08  Kyrylo Tkachov  

  * config/aarch64/aarch64.md (*condjump): Rename to...
  (condjump): ... This.
  (*compare_condjump): New define_insn_and_split.
  (*compare_cstore_insn): Likewise.
  (*cstore_insn): Rename to...
  (cstore_insn): ... This.
  * config/aarch64/iterators.md (CMP): Handle ne code.
  * config/aarch64/predicates.md (aarch64_imm24): New predicate.

2015-10-08  Kyrylo Tkachov  

  * gcc.target/aarch64/cmpimm_branch_1.c: New test.
  * gcc.target/aarch64/cmpimm_cset_1.c: Likewise.

Fix prototype for print_insn in rtl.h

2015-10-13 Thread Nikolai Bozhenov

Currently prototype for print_insn in rtl.h doesn't match it's 
definition in sched-vis.c The patch fixes this mismatch.


Thanks,
Nikolai
2015-10-13  Nikolai Bozhenov  

* gcc/rtl.h (print_insn): fix prototype

diff --git a/gcc/rtl.h b/gcc/rtl.h
index a592a1e..d6edc71 100644
--- a/gcc/rtl.h
+++ b/gcc/rtl.h
@@ -3574,7 +3574,7 @@ extern void dump_rtl_slim (FILE *, const rtx_insn *, 
const rtx_insn *,
   int, int);
 extern void print_value (pretty_printer *, const_rtx, int);
 extern void print_pattern (pretty_printer *, const_rtx, int);
-extern void print_insn (pretty_printer *, const_rtx, int);
+extern void print_insn (pretty_printer *, const rtx_insn *, int);
 extern void rtl_dump_bb_for_graph (pretty_printer *, basic_block);
 extern const char *str_pattern_slim (const_rtx);

[gomp4.1] Disallow multiple depend(source) or mixing source + sink

2015-10-13 Thread Jakub Jelinek

Hi!

Multiple depend(source) is useless and it has been agreed on that we should
disallow it.  Similarly, when mixing sink and source on the same ordered
construct, we'd either need to define behavior for it, or disallow it, where
the latter is what we've done for clarity reasons.

The testcase also verifies diagnostics for the case where ordered is less
than collapse.

2015-10-13  Jakub Jelinek  

* gimplify.c (gimplify_omp_ordered): Disallow multiple
depend(source) clauses on the same construct.  Disallow depend(source)
combined with depend(sink:vec) on the same construct.
* omp-low.c (check_omp_nesting_restrictions): Remove pointless
asserts.
testsuite/
* c-c++-common/gomp/doacross-1.c: New test.

--- gcc/gimplify.c.jj   2015-10-09 09:28:04.0 +0200
+++ gcc/gimplify.c  2015-10-12 12:37:32.065883694 +0200
@@ -8743,6 +8743,8 @@ gimplify_omp_ordered (tree expr, gimple_
   tree c, decls;
   int failures = 0;
   unsigned int i;
+  tree source_c = NULL_TREE;
+  tree sink_c = NULL_TREE;
 
   if (gimplify_omp_ctxp)
 for (c = OMP_ORDERED_CLAUSES (expr); c; c = OMP_CLAUSE_CHAIN (c))
@@ -8772,13 +8774,33 @@ gimplify_omp_ordered (tree expr, gimple_
  if (!fail && i != gimplify_omp_ctxp->loop_iter_var.length () / 2)
{
  error_at (OMP_CLAUSE_LOCATION (c),
-   "number of variables in depend(sink) "
+   "number of variables in % "
"clause does not match number of "
"iteration variables");
- fail = true;
  failures++;
}
+ sink_c = c;
}
+  else if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_DEPEND
+  && OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SOURCE)
+   {
+ if (source_c)
+   {
+ error_at (OMP_CLAUSE_LOCATION (c),
+   "more than one % clause on an "
+   "% construct");
+ failures++;
+   }
+ else
+   source_c = c;
+   }
+  if (source_c && sink_c)
+{
+  error_at (OMP_CLAUSE_LOCATION (source_c),
+   "% clause specified together with "
+   "% clauses on the same construct");
+  failures++;
+}
 
   if (failures)
 return gimple_build_nop ();
--- gcc/omp-low.c.jj2015-10-02 11:38:40.0 +0200
+++ gcc/omp-low.c   2015-10-12 12:21:32.081523021 +0200
@@ -3348,8 +3348,6 @@ check_omp_nesting_restrictions (gimple s
|| OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SINK))
  {
enum omp_clause_depend_kind kind = OMP_CLAUSE_DEPEND_KIND (c);
-   gcc_assert (kind == OMP_CLAUSE_DEPEND_SOURCE
-   || kind == OMP_CLAUSE_DEPEND_SINK);
error_at (OMP_CLAUSE_LOCATION (c),
  "% is only allowed in %",
  kind == OMP_CLAUSE_DEPEND_SOURCE ? "source" : "sink");
@@ -3455,8 +3453,6 @@ check_omp_nesting_restrictions (gimple s
|| OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SINK))
  {
enum omp_clause_depend_kind kind = OMP_CLAUSE_DEPEND_KIND (c);
-   gcc_assert (kind == OMP_CLAUSE_DEPEND_SOURCE
-   || kind == OMP_CLAUSE_DEPEND_SINK);
error_at (OMP_CLAUSE_LOCATION (c),
  "% is only allowed in %",
  kind == OMP_CLAUSE_DEPEND_SOURCE ? "source" : "sink");
--- gcc/testsuite/c-c++-common/gomp/doacross-1.c.jj 2015-10-12 
12:01:39.528659576 +0200
+++ gcc/testsuite/c-c++-common/gomp/doacross-1.c2015-10-12 
12:46:57.691262361 +0200
@@ -0,0 +1,48 @@
+/* { dg-do compile } */
+/* { dg-options "-fopenmp" } */
+
+void
+foo (void)
+{
+  int i, j, k;
+  #pragma omp for ordered (1)
+  for (i = 0; i < 64; i++)
+{
+  #pragma omp ordered depend (sink: i - 1)
+  #pragma omp ordered depend (source)
+}
+  #pragma omp for ordered (1) collapse (1)
+  for (i = 0; i < 64; i++)
+{
+  #pragma omp ordered depend (sink: i - 1)
+  #pragma omp ordered depend (source)
+}
+  #pragma omp for collapse (2) ordered (1) /* { dg-error "clause 
parameter is less than" } */
+  for (i = 0; i < 64; i++)
+for (j = 0; j < 64; j++)
+  {
+   #pragma omp ordered depend (sink: i - 1)/* { dg-error "does not 
match number" } */
+   #pragma omp ordered depend (source)
+  }
+  #pragma omp for ordered (2) collapse (3) /* { dg-error "clause 
parameter is less than" } */
+  for (i = 0; i < 64; i++)
+for (j = 0; j < 64; j++)
+  for (k = 0; k < 64; k++)
+   {
+ #pragma omp ordered depend (sink: i - 1, j - 2) /* { dg-error "does 
not match number" } */
+ #pragma omp ordered depend (source)
+   }
+  #pragma omp ordered depend (sink:

[PATCH] mark libstdc++ tests unsupported if they fail with "relocation truncated"

2015-10-13 Thread Szabolcs Nagy


Dejagnu tweak:
Check the compiler output in libstdc++-dg-test using
${tool}_check_unsupported_p and mark the output
unsupported accordingly to avoid "relocation truncated"
failures cluttering the test results on aarch64-none-elf
with -mcmodel=tiny. (gcc torture tests already use this
predicate for this reason.)

This is a revision of
https://gcc.gnu.org/ml/libstdc++/2015-01/msg00198.html
which had problems because the unsupported result was
propagated to and handled by dg-runtest differently
than expected.

Is it OK?

libstdc++-v3/Changelog:

2015-10-13  Szabolcs Nagy  

* testsuite/lib/libstdc++.exp (libstdc++-dg-test): Check for
unsupported compiler output.
diff --git a/libstdc++-v3/testsuite/lib/libstdc++.exp b/libstdc++-v3/testsuite/lib/libstdc++.exp
index 88738b7..769ac94 100644
--- a/libstdc++-v3/testsuite/lib/libstdc++.exp
+++ b/libstdc++-v3/testsuite/lib/libstdc++.exp
@@ -409,6 +409,11 @@ proc libstdc++-dg-test { prog do_what extra_tool_flags } {
 # and-target file), but the defaults are lacking in goodness.
 set comp_output [$select_compile "$prog" "$output_file" "$compile_type" $options];
 
+set unsupported_message [libstdc++_check_unsupported_p $comp_output]
+if { $unsupported_message != "" } {
+	set comp_output "::unsupported::$unsupported_message"
+}
+
 return [list $comp_output $output_file]
 }

Re: [PATCH] Random shuffle moveable: container size

2015-10-13 Thread Jonathan Wakely


On 08/10/15 10:35 -0300, Aurelio Remonda wrote:

This patch reduces the size of the array A (the array that contains
the values being shuffled) so the test can pass while running the
stdlibc++ testsuite.


Ahem! The project's name is libstdc++ !!! :-)



It also make some minor changes such as:
*Deleting a useless call to fill_ascending function on test02.
*Changing N from const int to const unsigned int.
I have a company-wide copyright assignment, but I don't have commit access.


OK, I will commit this (without the unnecessary whitespace changes).

Thanks.

Re: [RFC VTV] Fix VTV for targets that have section anchors.

2015-10-13 Thread Marcus Shawcroft


On 09/10/15 10:17, Ramana Radhakrishnan wrote:

This started as a Friday afternoon project ...

It turned out enabling VTV for AArch64 and ARM was a matter of fixing PR67868 
which essentially comes from building libvtv with section anchors turned on. 
The problem was that the flow of control from output_object_block through to 
switch_section did not have the same special casing for the vtable section that 
exists in assemble_variable.

Once this was done, I managed to build and test aarch64-none-linux-gnu with 
--enable-vtable-verify, a similar test was done for armhf.

Testing showed no regressions in the gcc/ g++ testsuites for aarch64 and armhf
Testing showed no failures in libvtv testsuite for aarch64 but a few more 
failures - see below.
Testing showed 2 failures in libstdc++-v3 testsuite compared to without vtable 
verification.

FAIL: libstdc++-abi/abi_check
FAIL: experimental/filesystem/iterators/directory_iterator.cc execution test

However both these failures also occur on x86_64 - so I'm content to declare 
victory on AArch64
as far as basic enablement goes.

On ARM I see the following failures that I still need to debug - I can see that 
the failure is because the write to _ZN4_VTVI1BE12__vtable_mapE does not elicit 
a SEGV but I need to go further than that.

FAIL: libvtv.cc/thunk_vtable_map_attack.cc -O0 -fvtable-verify=std execution 
test
FAIL: libvtv.cc/thunk_vtable_map_attack.cc -O2 -fvtable-verify=std execution 
test
FAIL: libvtv.cc/thunk_vtable_map_attack.cc -O0 -fvtable-verify=preinit 
execution test
FAIL: libvtv.cc/thunk_vtable_map_attack.cc -O2 -fvtable-verify=preinit 
execution test


Questions -

1. Are the generic changes to varasm.c ok ?
2. Can we take the AArch64 support in now, given this amount of testing ? 
Marcus / Caroline ?


+  aarch64*-*-linux*)
+   VTV_SUPPORTED=yes
;;

Ramana, Go ahead an add the aarch64 enable once you have the 
pre-requisite varasm changes approved.


Cheers
/Marcus


3. Any suggestions / helpful debug hints for VTV debugging (other than turning 
VTV_DEBUG on and inspecting trace) ?

There's an arm*-*-* hunk there but I'm easy about applying that right now and 
figuring out the issues
over time. In case we don't fix it for 6.0 we can rip the support out before 
release.


Thanks,
Ramana

P.S. (Yes, I'll provide a Changelog :) )

Re: [PATCH, sparc]: Use ROUND_UP and ROUND_DOWN macros

2015-10-13 Thread Uros Bizjak

On Tue, Oct 13, 2015 at 12:10 PM, Eric Botcazou  wrote:
>> Two functional changes I'd like to point out:
>>
>>  /* ALIGN FRAMES on double word boundaries */
>> -#define SPARC_STACK_ALIGN(LOC) \
>> -  (TARGET_ARCH64 ? (((LOC)+15) & ~15) : (((LOC)+7) & ~7))
>> +#define SPARC_STACK_ALIGN(LOC) ROUND_UP ((LOC), UNITS_PER_WORD * 2)
>>
>> The one above uses UNITS_PER_WORD in stack alignment calculation
>
> OK.
>
>>/* Always preserve double-word alignment.  */
>> -  offset = (offset + 8) & -8;
>> +  offset = ROUND_UP (offset, 8);
>>
>> The one above looks like off-by-one bug, but this needs a confirmation.
>
> No, it's correct, it's a bump of 8 followed by a ROUND_DOWN (the offset may or
> may not have been bumped by 4 already in the code just above).

In this case, I think it is better to write this part as:

--cut here--
offset += 8;

/* Always preserve double-word alignment.  */
offset = ROUND_DOWN (offset, 8);
--cut here--

WDYT?

Uros.

Re: Fix 61441

2015-10-13 Thread Sujoy Saraswati

Hi,
 This is another modified version of the patch, incorporating the
previous comments.

Bootstrap and regression tests on x86_64-linux-gnu and
aarch64-unknown-linux-gnu passed with changes done on trunk.

Is this fine ?

Regards,
Sujoy

2015-10-13  Sujoy Saraswati 

PR tree-optimization/61441
* builtins.c (integer_valued_real_p): Return true for
NaN values.
(fold_builtin_trunc, fold_builtin_pow): Avoid the operation
if flag_signaling_nans is on and the operand is a NaN.
(fold_builtin_powi): Same.
* fold-const.c (const_binop): Convert sNaN to qNaN when
flag_signaling_nans is off.
(const_unop): Avoid the operation, other than NEGATE and
ABS, if flag_signaling_nans is on and the operand is a NaN.
(fold_convert_const_real_from_real): Avoid the operation if
flag_signaling_nans is on and the operand is a NaN.
* real.c (do_add): Make resulting NaN value to be qNaN.
(do_multiply, do_divide, do_fix_trunc): Same.
(real_arithmetic, real_ldexp): Same
* simplify-rtx.c (simplify_const_unary_operation): Avoid the
operation if flag_signaling_nans is on and the operand is a NaN.
* tree-ssa-math-opts.c (gimple_expand_builtin_pow): Same.

PR tree-optimization/61441
* gcc.dg/pr61441.c: New testcase.

Index: gcc/builtins.c
===
--- gcc/builtins.c  (revision 228700)
+++ gcc/builtins.c  (working copy)
@@ -7357,7 +7357,11 @@ integer_valued_real_p (tree t)
 && integer_valued_real_p (TREE_OPERAND (t, 2));

 case REAL_CST:
-  return real_isinteger (TREE_REAL_CST_PTR (t), TYPE_MODE (TREE_TYPE (t)));
+  /* Return true for NaN values, since real_isinteger would
+ return false if the value is sNaN.  */
+  return (REAL_VALUE_ISNAN (TREE_REAL_CST (t))
+  || real_isinteger (TREE_REAL_CST_PTR (t),
+ TYPE_MODE (TREE_TYPE (t;

 CASE_CONVERT:
   {
@@ -7910,8 +7914,13 @@ fold_builtin_trunc (location_t loc, tree fndecl, t
   tree type = TREE_TYPE (TREE_TYPE (fndecl));

   x = TREE_REAL_CST (arg);
-  real_trunc (, TYPE_MODE (type), );
-  return build_real (type, r);
+  /* Avoid the folding if flag_signaling_nans is on.  */
+  if (!(HONOR_SNANS (TYPE_MODE (type))
+&& REAL_VALUE_ISNAN (x)))
+  {
+real_trunc (, TYPE_MODE (type), );
+return build_real (type, r);
+  }
 }

   return fold_trunc_transparent_mathfn (loc, fndecl, arg);
@@ -8297,9 +8306,15 @@ fold_builtin_pow (location_t loc, tree fndecl, tre
  bool inexact;

  x = TREE_REAL_CST (arg0);
+
  inexact = real_powi (, TYPE_MODE (type), , n);
- if (flag_unsafe_math_optimizations || !inexact)
-   return build_real (type, x);
+
+  /* Avoid the folding if flag_signaling_nans is on.  */
+ if (flag_unsafe_math_optimizations
+  || (!inexact
+  && !(HONOR_SNANS (TYPE_MODE (TREE_TYPE (arg0)))
+   && REAL_VALUE_ISNAN (x
+ return build_real (type, x);
}

  /* Strip sign ops from even integer powers.  */
@@ -8388,8 +8403,14 @@ fold_builtin_powi (location_t loc, tree fndecl ATT
{
  REAL_VALUE_TYPE x;
  x = TREE_REAL_CST (arg0);
- real_powi (, TYPE_MODE (type), , c);
- return build_real (type, x);
+
+  /* Avoid the folding if flag_signaling_nans is on.  */
+  if (!(HONOR_SNANS (TYPE_MODE (TREE_TYPE (arg0)))
+&& REAL_VALUE_ISNAN (x)))
+  {
+   real_powi (, TYPE_MODE (type), , c);
+   return build_real (type, x);
+  }
}

   /* Optimize pow(x,0) = 1.0.  */
Index: gcc/fold-const.c
===
--- gcc/fold-const.c(revision 228700)
+++ gcc/fold-const.c(working copy)
@@ -1185,9 +1185,21 @@ const_binop (enum tree_code code, tree arg1, tree
   /* If either operand is a NaN, just return it.  Otherwise, set up
 for floating-point trap; we return an overflow.  */
   if (REAL_VALUE_ISNAN (d1))
-   return arg1;
+  {
+/* Make resulting NaN value to be qNaN when flag_signaling_nans
+   is off.  */
+d1.signalling = 0;
+t = build_real (type, d1);
+   return t;
+  }
   else if (REAL_VALUE_ISNAN (d2))
-   return arg2;
+  {
+/* Make resulting NaN value to be qNaN when flag_signaling_nans
+   is off.  */
+d2.signalling = 0;
+t = build_real (type, d2);
+   return t;
+  }

   inexact = real_arithmetic (, code, , );
   real_convert (, mode, );
@@ -1557,6 +1569,15 @@ const_binop (enum tree_code code, tree type, tree
 tree
 const_unop (enum tree_code code,

Re: [PATCH 8/9] Add TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID

2015-10-13 Thread Richard Biener

On Tue, Oct 13, 2015 at 1:27 AM, Richard Henderson  wrote:
> On 10/12/2015 09:10 PM, Richard Biener wrote:

 The check_loadstore change should instead have adjusted the
 flag_delete_null_pointer_checks guard in
 infer_nonnull_range_by_dereference.
>>>
>>>
>>>
>>> Nope, that doesn't work.  You have to wait until you see the actual MEM
>>> being dereferenced before you can look at it's address space.
>>
>>
>> Well, as we are explicitely looking for the pointer 'op' we know the
>> address-space
>> beforehand, no?  TYPE_ADDR_SPACE (TREE_TYPE (TREE_TYPE (op)))?
>
>
> No.  We don't even know what type we're looking for; we're merely looking
> for any use of NULL within any memory reference within STMT.
>
> Specifically, when we're not looking for a specific SSA_NAME (which would be
> properly typed), we always pass in a plain (void *)0:
>
>   bool by_dereference
> = infer_nonnull_range_by_dereference (stmt, null_pointer_node);

Ick.

Richard.

>
>
> r~

Re: Do not use TYPE_CANONICAL in useless_type_conversion

2015-10-13 Thread Eric Botcazou

> How about using in store_bit_field the same logic you added to
> store_expr_with_bounds to get input MEMs to have a compatible mode?
> This patch was regstrapped on i686-linux-gnu and x86_64-linux-gnu.  Ok
> to install?
> 
> 
> support BLKmode inputs for store_bit_field
> 
> From: Alexandre Oliva 
> 
> Revision 228586 changed useless_type_conversion_p and added mode
> changes for MEM:BLKmode inputs in store_expr_with_bounds, but it
> missed store_bit_field.  This caused ada/rts/s-regpat.ads to fail
> compilation on x86_64-linux-gnu.

Thanks for fixing it.  Note that this is PR middle-end/67912.

-- 
Eric Botcazou

Re: [PATCH, sparc]: Use ROUND_UP and ROUND_DOWN macros

2015-10-13 Thread Eric Botcazou

> Two functional changes I'd like to point out:
> 
>  /* ALIGN FRAMES on double word boundaries */
> -#define SPARC_STACK_ALIGN(LOC) \
> -  (TARGET_ARCH64 ? (((LOC)+15) & ~15) : (((LOC)+7) & ~7))
> +#define SPARC_STACK_ALIGN(LOC) ROUND_UP ((LOC), UNITS_PER_WORD * 2)
> 
> The one above uses UNITS_PER_WORD in stack alignment calculation

OK.

>/* Always preserve double-word alignment.  */
> -  offset = (offset + 8) & -8;
> +  offset = ROUND_UP (offset, 8);
> 
> The one above looks like off-by-one bug, but this needs a confirmation.

No, it's correct, it's a bump of 8 followed by a ROUND_DOWN (the offset may or 
may not have been bumped by 4 already in the code just above).

-- 
Eric Botcazou

Re: [PATCH, sparc]: Use ROUND_UP and ROUND_DOWN macros

2015-10-13 Thread Eric Botcazou

> In this case, I think it is better to write this part as:
> 
> --cut here--
> offset += 8;
> 
> /* Always preserve double-word alignment.  */
> offset = ROUND_DOWN (offset, 8);
> --cut here--

Not convinced, having offset == 12 after the first line doesn't make sense.

I'd just beef up the comment:

/* Bump and round down to double word in case we already bumped by 4.  */
offset = ROUND_DOWN (offset + 8, 8);

-- 
Eric Botcazou

Re: Move some bit and binary optimizations in simplify and match

2015-10-13 Thread Hurugalawadi, Naveen

Hi Richard,

Thanks for the comments. Sorry, I was confused with handling the const and 
variable 
together part. Have modified them.
Also, considered that both (X & Y) can be const or variable in those cases
for which match patterns have been added.
Please let me know whether its correct or only "Y" should be both const and 
variable
whereas the "X" should be variable always.

Please find attached the patch as per your comments.
Please review the patch and let me know if any further modifications 
are required.

Am learning lots of useful stuff while porting these patches. 
Thanks for all the help again.

>> Looks like I really need to make 'match' handle these kind of things.
I assume that its for bit ops, and binary operations like (A & B) and so on.
Should I try doing that part? Also, how do we know which patterns should
be const or variable or supports both?

Thanks,
Naveendiff --git a/gcc/fold-const.c b/gcc/fold-const.c
index de45a2c..2d81b2c 100644
--- a/gcc/fold-const.c
+++ b/gcc/fold-const.c
@@ -9232,26 +9232,6 @@ fold_binary_loc (location_t loc,
   return NULL_TREE;
 
 case PLUS_EXPR:
-  if (INTEGRAL_TYPE_P (type) || VECTOR_INTEGER_TYPE_P (type))
-	{
-	  /* X + (X / CST) * -CST is X % CST.  */
-	  if (TREE_CODE (arg1) == MULT_EXPR
-	  && TREE_CODE (TREE_OPERAND (arg1, 0)) == TRUNC_DIV_EXPR
-	  && operand_equal_p (arg0,
-  TREE_OPERAND (TREE_OPERAND (arg1, 0), 0), 0))
-	{
-	  tree cst0 = TREE_OPERAND (TREE_OPERAND (arg1, 0), 1);
-	  tree cst1 = TREE_OPERAND (arg1, 1);
-	  tree sum = fold_binary_loc (loc, PLUS_EXPR, TREE_TYPE (cst1),
-  cst1, cst0);
-	  if (sum && integer_zerop (sum))
-		return fold_convert_loc (loc, type,
-	 fold_build2_loc (loc, TRUNC_MOD_EXPR,
-		  TREE_TYPE (arg0), arg0,
-		  cst0));
-	}
-	}
-
   /* Handle (A1 * C1) + (A2 * C2) with A1, A2 or C1, C2 being the same or
 	 one.  Make sure the type is not saturating and has the signedness of
 	 the stripped operands, as fold_plusminus_mult_expr will re-associate.
@@ -9692,28 +9672,6 @@ fold_binary_loc (location_t loc,
 			fold_convert_loc (loc, type,
 	  TREE_OPERAND (arg0, 0)));
 
-  if (! FLOAT_TYPE_P (type))
-	{
-	  /* Fold (A & ~B) - (A & B) into (A ^ B) - B, where B is
-	 any power of 2 minus 1.  */
-	  if (TREE_CODE (arg0) == BIT_AND_EXPR
-	  && TREE_CODE (arg1) == BIT_AND_EXPR
-	  && operand_equal_p (TREE_OPERAND (arg0, 0),
-  TREE_OPERAND (arg1, 0), 0))
-	{
-	  tree mask0 = TREE_OPERAND (arg0, 1);
-	  tree mask1 = TREE_OPERAND (arg1, 1);
-	  tree tem = fold_build1_loc (loc, BIT_NOT_EXPR, type, mask0);
-
-	  if (operand_equal_p (tem, mask1, 0))
-		{
-		  tem = fold_build2_loc (loc, BIT_XOR_EXPR, type,
- TREE_OPERAND (arg0, 0), mask1);
-		  return fold_build2_loc (loc, MINUS_EXPR, type, tem, mask1);
-		}
-	}
-	}
-
   /* Fold __complex__ ( x, 0 ) - __complex__ ( 0, y ) to
 	 __complex__ ( x, -y ).  This is not the same for SNaNs or if
 	 signed zeros are involved.  */
@@ -9803,20 +9761,6 @@ fold_binary_loc (location_t loc,
   goto associate;
 
 case MULT_EXPR:
-  /* (-A) * (-B) -> A * B  */
-  if (TREE_CODE (arg0) == NEGATE_EXPR && negate_expr_p (arg1))
-	return fold_build2_loc (loc, MULT_EXPR, type,
-			fold_convert_loc (loc, type,
-	  TREE_OPERAND (arg0, 0)),
-			fold_convert_loc (loc, type,
-	  negate_expr (arg1)));
-  if (TREE_CODE (arg1) == NEGATE_EXPR && negate_expr_p (arg0))
-	return fold_build2_loc (loc, MULT_EXPR, type,
-			fold_convert_loc (loc, type,
-	  negate_expr (arg0)),
-			fold_convert_loc (loc, type,
-	  TREE_OPERAND (arg1, 0)));
-
   if (! FLOAT_TYPE_P (type))
 	{
 	  /* Transform x * -C into -x * C if x is easily negatable.  */
@@ -9830,16 +9774,6 @@ fold_binary_loc (location_t loc,
 		  negate_expr (arg0)),
 tem);
 
-	  /* (a * (1 << b)) is (a << b)  */
-	  if (TREE_CODE (arg1) == LSHIFT_EXPR
-	  && integer_onep (TREE_OPERAND (arg1, 0)))
-	return fold_build2_loc (loc, LSHIFT_EXPR, type, op0,
-TREE_OPERAND (arg1, 1));
-	  if (TREE_CODE (arg0) == LSHIFT_EXPR
-	  && integer_onep (TREE_OPERAND (arg0, 0)))
-	return fold_build2_loc (loc, LSHIFT_EXPR, type, op1,
-TREE_OPERAND (arg0, 1));
-
 	  /* (A + A) * C -> A * 2 * C  */
 	  if (TREE_CODE (arg0) == PLUS_EXPR
 	  && TREE_CODE (arg1) == INTEGER_CST
@@ -9882,21 +9816,6 @@ fold_binary_loc (location_t loc,
 	}
   else
 	{
-	  /* Convert (C1/X)*C2 into (C1*C2)/X.  This transformation may change
- the result for floating point types due to rounding so it is applied
- only if -fassociative-math was specify.  */
-	  if (flag_associative_math
-	  && TREE_CODE (arg0) == RDIV_EXPR
-	  && TREE_CODE (arg1) == REAL_CST
-	  && TREE_CODE (TREE_OPERAND (arg0, 0)) == REAL_CST)
-	{
-	  tree tem = const_binop (MULT_EXPR, TREE_OPERAND (arg0, 0),
-  arg1);
-	  if (tem)
-

Re: [ARM] Add ARMv8.1 command line options.

2015-10-13 Thread Matthew Wahab


Some of the command line options may be unnecessary so I'll drop this patch.
Matthew

On 08/10/15 12:00, Matthew Wahab wrote:

Ping.

Updated patch attached, I've broken the over-long lines added to arm-arches.def 
and
arm-fpus.def.

Matthew

On 17/09/15 18:54, Matthew Wahab wrote:

Hello,

ARMv8.1 is a set of architectural extensions to ARMv8. Support has been
enabled in binutils for ARMv8.1 for the architechure, using the name
"armv8.1-a".

This patch adds support to gcc for specifying an ARMv8.1 architecture
using options "-march=armv8.1-a" and "-march=armv8.1-a+crc". It also
adds the FPU options "-mfpu=neon-fp-armv8.1" and
"-mpu=crypto-neon-fp-armv8.1", to specify the ARMv8.1 Adv.SIMD
instruction set.  The changes set the apropriate architecture and fpu
options for binutils but don't otherwise change the code generated by
gcc.

Tested for arm-none-linux-gnueabihf with native bootstrap and make
check.

Ok for trunk?
Matthew

2015-09-17  Matthew Wahab  

 * config/arm/arm-arches.def: Add "armv8.1-a" and "armv8.1-a+crc".
 * config/arm/arm-fpus.def: Add "neon-fp-armv8.1" and
 "crypto-neon-fp-armv8.1".
 * config/arm/arm-protos.h (FL2_ARCH8_1): New.
 (FL2_FOR_ARCH8_1A): New.
 * config/arm/arm-tables.opt: Regenerate.
 * config/arm/arm.h (FPU_FL_RDMA): New.
 * doc/invoke.texi (ARM -march): Add "armv8.1-a" and
 "armv8.1-a+crc".
 (ARM -mfpu): Add "neon-fp-armv8.1" and "crypto-neon-fp-armv8.1".

[gomp4.1] Fix up gimple_copy for GIMPLE_OMP_ORDERED clauses

2015-10-13 Thread Jakub Jelinek

Hi!

We forgot to update gimple_copy when clauses were added to
GIMPLE_OMP_ORDERED.  Fixed thusly:

2015-10-13  Jakub Jelinek  

* gimple.c (gimple_copy): Unshare clauses on GIMPLE_OMP_ORDERED.

--- gcc/gimple.c.jj 2015-10-13 09:39:21.0 +0200
+++ gcc/gimple.c2015-10-13 11:25:54.086967126 +0200
@@ -1792,6 +1792,12 @@ gimple_copy (gimple *stmt)
  gimple_omp_critical_set_clauses (as_a  (copy), t);
  goto copy_omp_body;
 
+   case GIMPLE_OMP_ORDERED:
+ t = unshare_expr (gimple_omp_ordered_clauses
+   (as_a  (stmt)));
+ gimple_omp_ordered_set_clauses (as_a  (copy), t);
+ goto copy_omp_body;
+
case GIMPLE_OMP_SECTIONS:
  t = unshare_expr (gimple_omp_sections_clauses (stmt));
  gimple_omp_sections_set_clauses (copy, t);
@@ -1805,7 +1811,6 @@ gimple_copy (gimple *stmt)
case GIMPLE_OMP_SECTION:
case GIMPLE_OMP_MASTER:
case GIMPLE_OMP_TASKGROUP:
-   case GIMPLE_OMP_ORDERED:
copy_omp_body:
  new_seq = gimple_seq_copy (gimple_omp_body (stmt));
  gimple_omp_set_body (copy, new_seq);

Jakub

Re: [PATCH] Fix "#pragma GCC pop_options" warning.

2015-10-13 Thread Dominik Vogt

On Tue, Oct 13, 2015 at 02:28:37PM +0200, Bernd Schmidt wrote:
> On 10/13/2015 02:02 PM, Dominik Vogt wrote:
> >When "#pragma GCC pop_options" is used on a platform without
> >support for "#pragma GCC target", Gcc emits a warning.  As
> >pop_options is useful on targets without the target pragma to
> >restore optimizations flags, the warning should be removed.
> >
> >The attached patch does that rather inelegantly by checking if the
> >pragma_parse hook points to the default implementation.  I could't
> >think of a similarly terse but less clumsy way.  Suggestions for a
> >better test are very welcome.
> 
> Why not just remove the code that emits the warning message? Are
> there situations where the warning is justified?

Removing the warning would also affect "#pragma GCC target("foo")
But then, "#pragma GCC asdfg" doesn't produce a warning either, so
what's the point warning about an undefined "target" pragma, but
not about other undefined pragmas.  For me, either way to do this
is good.

By the way, the background is that Glibc used pop_options and the
warning broke building with -Werror (they have solved that in a
different way now).

Ciao

Dominik ^_^  ^_^

-- 

Dominik Vogt
IBM Germany

Re: [Boolean Vector, patch 1/5] Introduce boolean vector to be used as a vector comparison type

2015-10-13 Thread Ilya Enkovich

2015-10-13 16:17 GMT+03:00 Richard Biener :
> On Fri, Oct 9, 2015 at 10:43 PM, Jeff Law  wrote:
>> On 10/02/2015 07:59 AM, Ilya Enkovich wrote:
>>>
>>> 2015-10-02  Ilya Enkovich  
>>>
>>> * doc/tm.texi: Regenerated.
>>> * doc/tm.texi.in (TARGET_VECTORIZE_GET_MASK_MODE): New.
>>> * stor-layout.c (layout_type): Use mode to get vector mask size.
>>> * target.def (get_mask_mode): New.
>>> * targhooks.c (default_get_mask_mode): New.
>>> * targhooks.h (default_get_mask_mode): New.
>>> * gcc/tree-vect-stmts.c (get_same_sized_vectype): Add special case
>>> for boolean vector.
>>> * tree.c (MAX_BOOL_CACHED_PREC): New.
>>> (nonstandard_boolean_type_cache): New.
>>> (build_nonstandard_boolean_type): New.
>>> (make_vector_type): Vector mask has no canonical type.
>>> (build_truth_vector_type): New.
>>> (build_same_sized_truth_vector_type): New.
>>> (truth_type_for): Support vector masks.
>>> * tree.h (VECTOR_BOOLEAN_TYPE_P): New.
>>> (build_truth_vector_type): New.
>>> (build_same_sized_truth_vector_type): New.
>>> (build_nonstandard_boolean_type): New.
>>>
>>>
>>> diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
>>> index eb495a8..098213e 100644
>>> --- a/gcc/doc/tm.texi
>>> +++ b/gcc/doc/tm.texi
>>> @@ -5688,6 +5688,11 @@ mode returned by
>>> @code{TARGET_VECTORIZE_PREFERRED_SIMD_MODE}.
>>>   The default is zero which means to not iterate over other vector sizes.
>>>   @end deftypefn
>>>
>>> +@deftypefn {Target Hook} machine_mode TARGET_VECTORIZE_GET_MASK_MODE
>>> (unsigned @var{nunits}, unsigned @var{length})
>>> +This hook returns mode to be used for a mask to be used for a vector
>>> +of specified @var{length} with @var{nunits} elements.
>>> +@end deftypefn
>>
>> Does it make sense to indicate the default used if the target does not
>> provide a definition for this hook?
>>
>>
>>
>>
>>> diff --git a/gcc/stor-layout.c b/gcc/stor-layout.c
>>> index 938e54b..58ecd7b 100644
>>> --- a/gcc/stor-layout.c
>>> +++ b/gcc/stor-layout.c
>>> @@ -2184,10 +2184,16 @@ layout_type (tree type)
>>>
>>> TYPE_SATURATING (type) = TYPE_SATURATING (TREE_TYPE (type));
>>>   TYPE_UNSIGNED (type) = TYPE_UNSIGNED (TREE_TYPE (type));
>>> -   TYPE_SIZE_UNIT (type) = int_const_binop (MULT_EXPR,
>>> -TYPE_SIZE_UNIT
>>> (innertype),
>>> -size_int (nunits));
>>> -   TYPE_SIZE (type) = int_const_binop (MULT_EXPR, TYPE_SIZE
>>> (innertype),
>>> +   /* Several boolean vector elements may fit in a single unit.  */
>>> +   if (VECTOR_BOOLEAN_TYPE_P (type))
>>> + TYPE_SIZE_UNIT (type)
>>> +   = size_int (GET_MODE_SIZE (type->type_common.mode));
>>
>> Shouldn't this be TYPE_MODE rather than accessing the internals of the tree
>> node directly?
>
> Probably not because of TYPE_MODE interfering for vector types.

Seems I need to roll it back then. I don't think I want scalar mode to
be used for cases when proper integer vector mode is unsupported by
target but returned by default get_mask_mode hook. Such cases just
should be lowered into scalars.

>
> But...
>
> +/* Builds a boolean type of precision PRECISION.
> +   Used for boolean vectors to choose proper vector element size.  */
> +tree
> +build_nonstandard_boolean_type (unsigned HOST_WIDE_INT precision)
> +{
> +  tree type;
> +
> +  if (precision <= MAX_BOOL_CACHED_PREC)
> +{
> +  type = nonstandard_boolean_type_cache[precision];
> +  if (type)
> +   return type;
> +}
> +
> +  type = make_node (BOOLEAN_TYPE);
> +  TYPE_PRECISION (type) = precision;
> +  fixup_unsigned_type (type);
>
> do we really need differing _precision_ boolean types?  I think we only
> need differing size (aka mode) boolean types, no?  Thus, keep precision == 1
> but "only" adjust the mode (possibly by simply setting precision to 1 after
> fixup_unsigned_type ...)?

The reason for that was -1 value of a proper size which may be used as
vector element value. I'm not sure if something breaks in the compiler
if I set 1 precision for all created boolean types, but I assumed it's
reasonable to match precision and actually stored values.

Ilya

>
> Richard.
>
>>
>>> diff --git a/gcc/tree.c b/gcc/tree.c
>>> index 84fd34d..0cb8361 100644
>>> --- a/gcc/tree.c
>>> +++ b/gcc/tree.c
>>> @@ -11067,9 +11130,10 @@ truth_type_for (tree type)
>>>   {
>>> if (TREE_CODE (type) == VECTOR_TYPE)
>>>   {
>>> -  tree elem = lang_hooks.types.type_for_size
>>> -(GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (type))), 0);
>>> -  return build_opaque_vector_type (elem, TYPE_VECTOR_SUBPARTS
>>> (type));
>>> +  if (VECTOR_BOOLEAN_TYPE_P (type))
>>> +   return type;
>>> +  return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (type),
>>> +

Re: [vec-cmp, patch 3/6] Vectorize comparison

2015-10-13 Thread Richard Biener

On Thu, Oct 8, 2015 at 5:03 PM, Ilya Enkovich  wrote:
> Hi,
>
> This patch supports comparison statements vectrization basing on introduced 
> optabs.
>
> Thanks,
> Ilya
> --
> gcc/
>
> 2015-10-08  Ilya Enkovich  
>
> * tree-vect-data-refs.c (vect_get_new_vect_var): Support 
> vect_mask_var.
> (vect_create_destination_var): Likewise.
> * tree-vect-stmts.c (vectorizable_comparison): New.
> (vect_analyze_stmt): Add vectorizable_comparison.
> (vect_transform_stmt): Likewise.
> * tree-vectorizer.h (enum vect_var_kind): Add vect_mask_var.
> (enum stmt_vec_info_type): Add comparison_vec_info_type.
> (vectorizable_comparison): New.
>
>
> diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c
> index 3befa38..9edc663 100644
> --- a/gcc/tree-vect-data-refs.c
> +++ b/gcc/tree-vect-data-refs.c
> @@ -3849,6 +3849,9 @@ vect_get_new_vect_var (tree type, enum vect_var_kind 
> var_kind, const char *name)
>case vect_scalar_var:
>  prefix = "stmp";
>  break;
> +  case vect_mask_var:
> +prefix = "mask";
> +break;
>case vect_pointer_var:
>  prefix = "vectp";
>  break;
> @@ -4403,7 +4406,11 @@ vect_create_destination_var (tree scalar_dest, tree 
> vectype)
>tree type;
>enum vect_var_kind kind;
>
> -  kind = vectype ? vect_simple_var : vect_scalar_var;
> +  kind = vectype
> +? VECTOR_BOOLEAN_TYPE_P (vectype)
> +? vect_mask_var
> +: vect_simple_var
> +: vect_scalar_var;
>type = vectype ? vectype : TREE_TYPE (scalar_dest);
>
>gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
> diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
> index 8eda8e9..6949c71 100644
> --- a/gcc/tree-vect-stmts.c
> +++ b/gcc/tree-vect-stmts.c
> @@ -7525,6 +7525,211 @@ vectorizable_condition (gimple *stmt, 
> gimple_stmt_iterator *gsi,
>return true;
>  }
>
> +/* vectorizable_comparison.
> +
> +   Check if STMT is comparison expression that can be vectorized.
> +   If VEC_STMT is also passed, vectorize the STMT: create a vectorized
> +   comparison, put it in VEC_STMT, and insert it at GSI.
> +
> +   Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
> +
> +bool
> +vectorizable_comparison (gimple *stmt, gimple_stmt_iterator *gsi,
> +gimple **vec_stmt, tree reduc_def,
> +slp_tree slp_node)
> +{
> +  tree lhs, rhs1, rhs2;
> +  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
> +  tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
> +  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
> +  tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
> +  tree vec_compare;
> +  tree new_temp;
> +  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
> +  tree def;
> +  enum vect_def_type dt, dts[4];
> +  unsigned nunits;
> +  int ncopies;
> +  enum tree_code code;
> +  stmt_vec_info prev_stmt_info = NULL;
> +  int i, j;
> +  bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
> +  vec vec_oprnds0 = vNULL;
> +  vec vec_oprnds1 = vNULL;
> +  tree mask_type;
> +  tree mask;
> +
> +  if (!VECTOR_BOOLEAN_TYPE_P (vectype))
> +return false;
> +
> +  mask_type = vectype;
> +  nunits = TYPE_VECTOR_SUBPARTS (vectype);
> +
> +  if (slp_node || PURE_SLP_STMT (stmt_info))
> +ncopies = 1;
> +  else
> +ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
> +
> +  gcc_assert (ncopies >= 1);
> +  if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
> +return false;
> +
> +  if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
> +  && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
> +  && reduc_def))
> +return false;
> +
> +  if (STMT_VINFO_LIVE_P (stmt_info))
> +{
> +  if (dump_enabled_p ())
> +   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> +"value used after loop.\n");
> +  return false;
> +}
> +
> +  if (!is_gimple_assign (stmt))
> +return false;
> +
> +  code = gimple_assign_rhs_code (stmt);
> +
> +  if (TREE_CODE_CLASS (code) != tcc_comparison)
> +return false;
> +
> +  rhs1 = gimple_assign_rhs1 (stmt);
> +  rhs2 = gimple_assign_rhs2 (stmt);
> +
> +  if (TREE_CODE (rhs1) == SSA_NAME)
> +{
> +  gimple *rhs1_def_stmt = SSA_NAME_DEF_STMT (rhs1);
> +  if (!vect_is_simple_use_1 (rhs1, stmt, loop_vinfo, bb_vinfo,
> +_def_stmt, , , ))
> +   return false;
> +}
> +  else if (TREE_CODE (rhs1) != INTEGER_CST && TREE_CODE (rhs1) != REAL_CST
> +  && TREE_CODE (rhs1) != FIXED_CST)
> +return false;

I think vect_is_simple_use_1 handles constants just fine an def_stmt
is an output,
you don't need to initialize it.

> +
> +  if (TREE_CODE (rhs2) == SSA_NAME)
> +{
> +  gimple *rhs2_def_stmt = SSA_NAME_DEF_STMT (rhs2);
> +  if (!vect_is_simple_use_1 (rhs2, stmt, loop_vinfo, bb_vinfo,
> +_def_stmt, , , ))
> +   return

[hsa] Fix bitfield alignment ICEs

2015-10-13 Thread Martin Jambor

Hi,

yesterday I did not notice that I introduced an ICE on testcases with
bit-field memory accesses.  The following fixes the issue.  A better
solution would be to expand the bit-field parts of memory expressions
separately, which would often allow us to use better aligned accesses,
but that is not a priority at the moment.

Committed to the branch.
Thanks,

Martin


2015-10-13  Martin Jambor  

* hsa-gen.c (hsa_bitref_alignment): New function
(gen_hsa_insns_for_load): Use it.
(gen_hsa_insns_for_store): Likewise.

diff --git a/gcc/hsa-gen.c b/gcc/hsa-gen.c
index 8f707b5..7f713f6 100644
--- a/gcc/hsa-gen.c
+++ b/gcc/hsa-gen.c
@@ -2028,6 +2028,38 @@ gen_hsa_insns_for_bitfield_load (hsa_op_reg *dest, 
hsa_op_address *addr,
   gen_hsa_insns_for_bitfield (dest, value_reg, bitsize, bitpos, hbb);
 }
 
+/* Return the alignment of base memory accesses we issue to perform bit-field
+   memory access REF.  */
+
+static BrigAlignment8_t
+hsa_bitmemref_alignment (tree ref)
+{
+  unsigned HOST_WIDE_INT bit_offset = 0;
+
+  while (true)
+{
+  if (TREE_CODE (ref) == BIT_FIELD_REF)
+   {
+ if (!tree_fits_uhwi_p (TREE_OPERAND (ref, 2)))
+   return BRIG_ALIGNMENT_1;
+ bit_offset += tree_to_uhwi (TREE_OPERAND (ref, 2));
+   }
+  else if (TREE_CODE (ref) == COMPONENT_REF
+  && DECL_BIT_FIELD (TREE_OPERAND (ref, 1)))
+   bit_offset += int_bit_position (TREE_OPERAND (ref, 1));
+  else
+   break;
+  ref = TREE_OPERAND (ref, 0);
+}
+
+  unsigned HOST_WIDE_INT bits = bit_offset % BITS_PER_UNIT;
+  unsigned HOST_WIDE_INT byte_bits = bit_offset - bits;
+  BrigAlignment8_t base = hsa_alignment_encoding (get_object_alignment (ref));
+  if (byte_bits == 0)
+return base;
+  return MIN (base, hsa_alignment_encoding (byte_bits & -byte_bits));
+}
+
 /* Generate HSAIL instructions loading something into register DEST.  RHS is
tree representation of the loaded data, which are loaded as type TYPE.  Add
instructions to HBB, use SSA_MAP for HSA SSA lookup.  */
@@ -2145,11 +2177,9 @@ gen_hsa_insns_for_load (hsa_op_reg *dest, tree rhs, tree 
type, hsa_bb *hbb,
  return;
}
 
-  BrigAlignment8_t req_align;
-  req_align = hsa_alignment_encoding (get_object_alignment (rhs));
   if (bitsize || bitpos)
gen_hsa_insns_for_bitfield_load (dest, addr, bitsize, bitpos, hbb,
-req_align);
+hsa_bitmemref_alignment (rhs));
   else
{
  BrigType16_t mtype;
@@ -2158,7 +2188,7 @@ gen_hsa_insns_for_load (hsa_op_reg *dest, tree rhs, tree 
type, hsa_bb *hbb,
false));
  hsa_insn_mem *mem = new hsa_insn_mem (BRIG_OPCODE_LD, mtype, dest,
addr);
- mem->set_align (req_align);
+ mem->set_align (hsa_alignment_encoding (get_object_alignment (rhs)));
  hbb->append_insn (mem);
}
 }
@@ -2194,6 +2224,7 @@ gen_hsa_insns_for_store (tree lhs, hsa_op_base *src, 
hsa_bb *hbb,
 vec  *ssa_map)
 {
   HOST_WIDE_INT bitsize = 0, bitpos = 0;
+  BrigAlignment8_t req_align;
   BrigType16_t mtype;
   mtype = mem_type_for_type (hsa_type_for_scalar_tree_type (TREE_TYPE (lhs),
false));
@@ -2227,10 +2258,11 @@ gen_hsa_insns_for_store (tree lhs, hsa_op_base *src, 
hsa_bb *hbb,
 
   hsa_op_reg *value_reg = new hsa_op_reg (mem_type);
 
+  req_align = hsa_bitmemref_alignment (lhs);
   /* Load value from memory.  */
   hsa_insn_mem *mem = new hsa_insn_mem (BRIG_OPCODE_LD, mem_type,
value_reg, addr);
-  mem->set_align (hsa_alignment_encoding (get_object_alignment (lhs)));
+  mem->set_align (req_align);
   hbb->append_insn (mem);
 
   /* AND the loaded value with prepared mask.  */
@@ -2271,9 +2303,11 @@ gen_hsa_insns_for_store (tree lhs, hsa_op_base *src, 
hsa_bb *hbb,
   src = prepared_reg;
   mtype = mem_type;
 }
+  else
+req_align = hsa_alignment_encoding (get_object_alignment (lhs));
 
   hsa_insn_mem *mem = new hsa_insn_mem (BRIG_OPCODE_ST, mtype, src, addr);
-  mem->set_align (hsa_alignment_encoding (get_object_alignment (lhs)));
+  mem->set_align (req_align);
 
   /* XXX The HSAIL disasm has another constraint: if the source
  is an immediate then it must match the destination type.  If

Re: [vec-cmp, patch 4/6] Support vector mask invariants

2015-10-13 Thread Richard Biener

On Thu, Oct 8, 2015 at 5:11 PM, Ilya Enkovich  wrote:
> Hi,
>
> This patch adds a special handling of boolean vector invariants.  We need 
> additional code to determine type of generated invariant.  For VEC_COND_EXPR 
> case we even provide this type directly because statement vectype doesn't 
> allow us to compute it.  Separate code is used to generate and expand such 
> vectors.
>
> Thanks,
> Ilya
> --
> gcc/
>
> 2015-10-08  Ilya Enkovich  
>
> * expr.c (const_vector_mask_from_tree): New.
> (const_vector_from_tree): Use const_vector_mask_from_tree
> for boolean vectors.
> * tree-vect-stmts.c (vect_init_vector): Support boolean vector
> invariants.
> (vect_get_vec_def_for_operand): Add VECTYPE arg.
> (vectorizable_condition): Directly provide vectype for invariants
> used in comparison.
> * tree-vectorizer.h (vect_get_vec_def_for_operand): Add VECTYPE
> arg.
>
>
> diff --git a/gcc/expr.c b/gcc/expr.c
> index 88da8cb..a624a34 100644
> --- a/gcc/expr.c
> +++ b/gcc/expr.c
> @@ -11320,6 +11320,40 @@ try_tablejump (tree index_type, tree index_expr, 
> tree minval, tree range,
>return 1;
>  }
>
> +/* Return a CONST_VECTOR rtx representing vector mask for
> +   a VECTOR_CST of booleans.  */
> +static rtx
> +const_vector_mask_from_tree (tree exp)
> +{
> +  rtvec v;
> +  unsigned i;
> +  int units;
> +  tree elt;
> +  machine_mode inner, mode;
> +
> +  mode = TYPE_MODE (TREE_TYPE (exp));
> +  units = GET_MODE_NUNITS (mode);
> +  inner = GET_MODE_INNER (mode);
> +
> +  v = rtvec_alloc (units);
> +
> +  for (i = 0; i < VECTOR_CST_NELTS (exp); ++i)
> +{
> +  elt = VECTOR_CST_ELT (exp, i);
> +
> +  gcc_assert (TREE_CODE (elt) == INTEGER_CST);
> +  if (integer_zerop (elt))
> +   RTVEC_ELT (v, i) = CONST0_RTX (inner);
> +  else if (integer_onep (elt)
> +  || integer_minus_onep (elt))
> +   RTVEC_ELT (v, i) = CONSTM1_RTX (inner);
> +  else
> +   gcc_unreachable ();
> +}
> +
> +  return gen_rtx_CONST_VECTOR (mode, v);
> +}
> +
>  /* Return a CONST_VECTOR rtx for a VECTOR_CST tree.  */
>  static rtx
>  const_vector_from_tree (tree exp)
> @@ -11335,6 +11369,9 @@ const_vector_from_tree (tree exp)
>if (initializer_zerop (exp))
>  return CONST0_RTX (mode);
>
> +  if (VECTOR_BOOLEAN_TYPE_P (TREE_TYPE (exp)))
> +  return const_vector_mask_from_tree (exp);
> +
>units = GET_MODE_NUNITS (mode);
>inner = GET_MODE_INNER (mode);
>
> diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
> index 6949c71..337ea7b 100644
> --- a/gcc/tree-vect-stmts.c
> +++ b/gcc/tree-vect-stmts.c
> @@ -1308,27 +1308,61 @@ vect_init_vector_1 (gimple *stmt, gimple *new_stmt, 
> gimple_stmt_iterator *gsi)
>  tree
>  vect_init_vector (gimple *stmt, tree val, tree type, gimple_stmt_iterator 
> *gsi)
>  {
> +  tree val_type = TREE_TYPE (val);
> +  machine_mode mode = TYPE_MODE (type);
> +  machine_mode val_mode = TYPE_MODE(val_type);
>tree new_var;
>gimple *init_stmt;
>tree vec_oprnd;
>tree new_temp;
>
>if (TREE_CODE (type) == VECTOR_TYPE
> -  && TREE_CODE (TREE_TYPE (val)) != VECTOR_TYPE)
> -{
> -  if (!types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
> +  && TREE_CODE (val_type) != VECTOR_TYPE)
> +{
> +  /* Handle vector of bool represented as a vector of
> +integers here rather than on expand because it is
> +a default mask type for targets.  Vector mask is
> +built in a following way:
> +
> +tmp = (int)val
> +vec_tmp = {tmp, ..., tmp}
> +vec_cst = VIEW_CONVERT_EXPR(vec_tmp);  */
> +  if (TREE_CODE (val_type) == BOOLEAN_TYPE
> + && VECTOR_MODE_P (mode)
> + && SCALAR_INT_MODE_P (GET_MODE_INNER (mode))
> + && GET_MODE_INNER (mode) != val_mode)
> {
> - if (CONSTANT_CLASS_P (val))
> -   val = fold_unary (VIEW_CONVERT_EXPR, TREE_TYPE (type), val);
> - else
> + unsigned size = GET_MODE_BITSIZE (GET_MODE_INNER (mode));
> + tree stype = build_nonstandard_integer_type (size, 1);
> + tree vectype = get_vectype_for_scalar_type (stype);
> +
> + new_temp = make_ssa_name (stype);
> + init_stmt = gimple_build_assign (new_temp, NOP_EXPR, val);
> + vect_init_vector_1 (stmt, init_stmt, gsi);
> +
> + val = make_ssa_name (vectype);
> + new_temp = build_vector_from_val (vectype, new_temp);
> + init_stmt = gimple_build_assign (val, new_temp);
> + vect_init_vector_1 (stmt, init_stmt, gsi);
> +
> + val = build1 (VIEW_CONVERT_EXPR, type, val);

So I don't quite understand - why don't we want to build

   tmp = (bool-element-type)val;
   vec_cst = {tmp, tmp, tmp ... };

?

> +   }
> +  else
> +   {
> + if (!types_compatible_p (TREE_TYPE (type), val_type))
> {
> - new_temp =

Re: [Boolean Vector, patch 1/5] Introduce boolean vector to be used as a vector comparison type

2015-10-13 Thread Ilya Enkovich

On 09 Oct 14:43, Jeff Law wrote:
> On 10/02/2015 07:59 AM, Ilya Enkovich wrote:
> >+This hook returns mode to be used for a mask to be used for a vector
> >+of specified @var{length} with @var{nunits} elements.
> >+@end deftypefn
> Does it make sense to indicate the default used if the target does not
> provide a definition for this hook?
> 
> 

Sure

> 
> 
> >diff --git a/gcc/stor-layout.c b/gcc/stor-layout.c
> >index 938e54b..58ecd7b 100644
> >--- a/gcc/stor-layout.c
> >+++ b/gcc/stor-layout.c
> >@@ -2184,10 +2184,16 @@ layout_type (tree type)
> >
> > TYPE_SATURATING (type) = TYPE_SATURATING (TREE_TYPE (type));
> >  TYPE_UNSIGNED (type) = TYPE_UNSIGNED (TREE_TYPE (type));
> >-TYPE_SIZE_UNIT (type) = int_const_binop (MULT_EXPR,
> >- TYPE_SIZE_UNIT (innertype),
> >- size_int (nunits));
> >-TYPE_SIZE (type) = int_const_binop (MULT_EXPR, TYPE_SIZE (innertype),
> >+/* Several boolean vector elements may fit in a single unit.  */
> >+if (VECTOR_BOOLEAN_TYPE_P (type))
> >+  TYPE_SIZE_UNIT (type)
> >+= size_int (GET_MODE_SIZE (type->type_common.mode));
> Shouldn't this be TYPE_MODE rather than accessing the internals of the tree
> node directly?

Previous version of this patch had changes in vector_type_mode and seems I 
copy-pasted this field access from there.
Will fix it here.

> 
> 
> >diff --git a/gcc/tree.c b/gcc/tree.c
> >index 84fd34d..0cb8361 100644
> >--- a/gcc/tree.c
> >+++ b/gcc/tree.c
> >@@ -11067,9 +11130,10 @@ truth_type_for (tree type)
> >  {
> >if (TREE_CODE (type) == VECTOR_TYPE)
> >  {
> >-  tree elem = lang_hooks.types.type_for_size
> >-(GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (type))), 0);
> >-  return build_opaque_vector_type (elem, TYPE_VECTOR_SUBPARTS (type));
> >+  if (VECTOR_BOOLEAN_TYPE_P (type))
> >+return type;
> >+  return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (type),
> >+  GET_MODE_SIZE (TYPE_MODE (type)));
> Presumably you're not building an opaque type anymore because you want
> warnings if somethings tries to do a conversion?  I'm going to assume this
> was intentional.

Right.  I don't expect front-end to cast boolean vector to anything.  Its usage 
should be limited by VEC_COND_EXPR.

> 
> 
> With the doc update and the fix to use TYPE_MODE (assuming there's not a
> good reason to be looking at the underlying type directly) this is OK.
> 
> jeff

Here is an updated version.

Thanks,
Ilya
--
2015-10-13  Ilya Enkovich  

* doc/tm.texi: Regenerated.
* doc/tm.texi.in (TARGET_VECTORIZE_GET_MASK_MODE): New.
* stor-layout.c (layout_type): Use mode to get vector mask size.
* target.def (get_mask_mode): New.
* targhooks.c (default_get_mask_mode): New.
* targhooks.h (default_get_mask_mode): New.
* gcc/tree-vect-stmts.c (get_same_sized_vectype): Add special case
for boolean vector.
* tree.c (MAX_BOOL_CACHED_PREC): New.
(nonstandard_boolean_type_cache): New.
(build_nonstandard_boolean_type): New.
(make_vector_type): Vector mask has no canonical type.
(build_truth_vector_type): New.
(build_same_sized_truth_vector_type): New.
(truth_type_for): Support vector masks.
* tree.h (VECTOR_BOOLEAN_TYPE_P): New.
(build_truth_vector_type): New.
(build_same_sized_truth_vector_type): New.
(build_nonstandard_boolean_type): New.


diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
index 33939ec..914cfea 100644
--- a/gcc/doc/tm.texi.in
+++ b/gcc/doc/tm.texi.in
@@ -4225,6 +4225,8 @@ address;  but often a machine-dependent strategy can 
generate better code.
 
 @hook TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
 
+@hook TARGET_VECTORIZE_GET_MASK_MODE
+
 @hook TARGET_VECTORIZE_INIT_COST
 
 @hook TARGET_VECTORIZE_ADD_STMT_COST
diff --git a/gcc/stor-layout.c b/gcc/stor-layout.c
index 938e54b..d2289d9 100644
--- a/gcc/stor-layout.c
+++ b/gcc/stor-layout.c
@@ -2184,10 +2184,16 @@ layout_type (tree type)
 
TYPE_SATURATING (type) = TYPE_SATURATING (TREE_TYPE (type));
 TYPE_UNSIGNED (type) = TYPE_UNSIGNED (TREE_TYPE (type));
-   TYPE_SIZE_UNIT (type) = int_const_binop (MULT_EXPR,
-TYPE_SIZE_UNIT (innertype),
-size_int (nunits));
-   TYPE_SIZE (type) = int_const_binop (MULT_EXPR, TYPE_SIZE (innertype),
+   /* Several boolean vector elements may fit in a single unit.  */
+   if (VECTOR_BOOLEAN_TYPE_P (type))
+ TYPE_SIZE_UNIT (type)
+   = size_int (GET_MODE_SIZE (TYPE_MODE (type)));
+   else
+ TYPE_SIZE_UNIT (type) = int_const_binop (MULT_EXPR,
+  TYPE_SIZE_UNIT (innertype),
+  size_int (nunits));

Re: [Boolean Vector, patch 1/5] Introduce boolean vector to be used as a vector comparison type

2015-10-13 Thread Richard Biener

On Fri, Oct 9, 2015 at 10:43 PM, Jeff Law  wrote:
> On 10/02/2015 07:59 AM, Ilya Enkovich wrote:
>>
>> 2015-10-02  Ilya Enkovich  
>>
>> * doc/tm.texi: Regenerated.
>> * doc/tm.texi.in (TARGET_VECTORIZE_GET_MASK_MODE): New.
>> * stor-layout.c (layout_type): Use mode to get vector mask size.
>> * target.def (get_mask_mode): New.
>> * targhooks.c (default_get_mask_mode): New.
>> * targhooks.h (default_get_mask_mode): New.
>> * gcc/tree-vect-stmts.c (get_same_sized_vectype): Add special case
>> for boolean vector.
>> * tree.c (MAX_BOOL_CACHED_PREC): New.
>> (nonstandard_boolean_type_cache): New.
>> (build_nonstandard_boolean_type): New.
>> (make_vector_type): Vector mask has no canonical type.
>> (build_truth_vector_type): New.
>> (build_same_sized_truth_vector_type): New.
>> (truth_type_for): Support vector masks.
>> * tree.h (VECTOR_BOOLEAN_TYPE_P): New.
>> (build_truth_vector_type): New.
>> (build_same_sized_truth_vector_type): New.
>> (build_nonstandard_boolean_type): New.
>>
>>
>> diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
>> index eb495a8..098213e 100644
>> --- a/gcc/doc/tm.texi
>> +++ b/gcc/doc/tm.texi
>> @@ -5688,6 +5688,11 @@ mode returned by
>> @code{TARGET_VECTORIZE_PREFERRED_SIMD_MODE}.
>>   The default is zero which means to not iterate over other vector sizes.
>>   @end deftypefn
>>
>> +@deftypefn {Target Hook} machine_mode TARGET_VECTORIZE_GET_MASK_MODE
>> (unsigned @var{nunits}, unsigned @var{length})
>> +This hook returns mode to be used for a mask to be used for a vector
>> +of specified @var{length} with @var{nunits} elements.
>> +@end deftypefn
>
> Does it make sense to indicate the default used if the target does not
> provide a definition for this hook?
>
>
>
>
>> diff --git a/gcc/stor-layout.c b/gcc/stor-layout.c
>> index 938e54b..58ecd7b 100644
>> --- a/gcc/stor-layout.c
>> +++ b/gcc/stor-layout.c
>> @@ -2184,10 +2184,16 @@ layout_type (tree type)
>>
>> TYPE_SATURATING (type) = TYPE_SATURATING (TREE_TYPE (type));
>>   TYPE_UNSIGNED (type) = TYPE_UNSIGNED (TREE_TYPE (type));
>> -   TYPE_SIZE_UNIT (type) = int_const_binop (MULT_EXPR,
>> -TYPE_SIZE_UNIT
>> (innertype),
>> -size_int (nunits));
>> -   TYPE_SIZE (type) = int_const_binop (MULT_EXPR, TYPE_SIZE
>> (innertype),
>> +   /* Several boolean vector elements may fit in a single unit.  */
>> +   if (VECTOR_BOOLEAN_TYPE_P (type))
>> + TYPE_SIZE_UNIT (type)
>> +   = size_int (GET_MODE_SIZE (type->type_common.mode));
>
> Shouldn't this be TYPE_MODE rather than accessing the internals of the tree
> node directly?

Probably not because of TYPE_MODE interfering for vector types.

But...

+/* Builds a boolean type of precision PRECISION.
+   Used for boolean vectors to choose proper vector element size.  */
+tree
+build_nonstandard_boolean_type (unsigned HOST_WIDE_INT precision)
+{
+  tree type;
+
+  if (precision <= MAX_BOOL_CACHED_PREC)
+{
+  type = nonstandard_boolean_type_cache[precision];
+  if (type)
+   return type;
+}
+
+  type = make_node (BOOLEAN_TYPE);
+  TYPE_PRECISION (type) = precision;
+  fixup_unsigned_type (type);

do we really need differing _precision_ boolean types?  I think we only
need differing size (aka mode) boolean types, no?  Thus, keep precision == 1
but "only" adjust the mode (possibly by simply setting precision to 1 after
fixup_unsigned_type ...)?

Richard.

>
>> diff --git a/gcc/tree.c b/gcc/tree.c
>> index 84fd34d..0cb8361 100644
>> --- a/gcc/tree.c
>> +++ b/gcc/tree.c
>> @@ -11067,9 +11130,10 @@ truth_type_for (tree type)
>>   {
>> if (TREE_CODE (type) == VECTOR_TYPE)
>>   {
>> -  tree elem = lang_hooks.types.type_for_size
>> -(GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (type))), 0);
>> -  return build_opaque_vector_type (elem, TYPE_VECTOR_SUBPARTS
>> (type));
>> +  if (VECTOR_BOOLEAN_TYPE_P (type))
>> +   return type;
>> +  return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (type),
>> + GET_MODE_SIZE (TYPE_MODE (type)));
>
> Presumably you're not building an opaque type anymore because you want
> warnings if somethings tries to do a conversion?  I'm going to assume this
> was intentional.
>
>
> With the doc update and the fix to use TYPE_MODE (assuming there's not a
> good reason to be looking at the underlying type directly) this is OK.
>
> jeff

Re: [vec-cmp, patch 2/6] Vectorization factor computation

2015-10-13 Thread Richard Biener

On Thu, Oct 8, 2015 at 4:59 PM, Ilya Enkovich  wrote:
> Hi,
>
> This patch handles statements with boolean result in vectorization factor 
> computation.  For comparison its operands type is used instead of restult 
> type to compute VF.  Other boolean statements are ignored for VF.
>
> Vectype for comparison is computed using type of compared values.  Computed 
> type is propagated into other boolean operations.

This feels rather ad-hoc, mixing up the existing way of computing
vector type and VF.  I'd rather have turned the whole
vector type computation around to the scheme working on the operands
rather than on the lhs and then searching
for smaller/larger types on the rhs'.

I know this is a tricky function (heh, but you make it even worse...).
And it needs a helper with knowledge about operations
so one can compute the result vector type for an operation on its
operands.  The seeds should be PHIs (handled like now)
and loads, and yes, externals need special handling.

Ideally we'd do things in two stages, first compute vector types in a
less constrained manner (not forcing a single vector size)
and then in a 2nd run promote to a common size also computing the VF to do that.

Btw, I think you "mishandle" bool b = boolvar != 0;

Richard.

> Thanks,
> Ilya
> --
> gcc/
>
> 2015-10-08  Ilya Enkovich  
>
> * tree-vect-loop.c (vect_determine_vectorization_factor):  Ignore mask
> operations for VF.  Add mask type computation.
> * tree-vect-stmts.c (get_mask_type_for_scalar_type): New.
> * tree-vectorizer.h (get_mask_type_for_scalar_type): New.
>
>
> diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
> index 63e29aa..c7e8067 100644
> --- a/gcc/tree-vect-loop.c
> +++ b/gcc/tree-vect-loop.c
> @@ -183,19 +183,21 @@ vect_determine_vectorization_factor (loop_vec_info 
> loop_vinfo)
>  {
>struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
>basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
> -  int nbbs = loop->num_nodes;
> +  unsigned nbbs = loop->num_nodes;
>unsigned int vectorization_factor = 0;
>tree scalar_type;
>gphi *phi;
>tree vectype;
>unsigned int nunits;
>stmt_vec_info stmt_info;
> -  int i;
> +  unsigned i;
>HOST_WIDE_INT dummy;
>gimple *stmt, *pattern_stmt = NULL;
>gimple_seq pattern_def_seq = NULL;
>gimple_stmt_iterator pattern_def_si = gsi_none ();
>bool analyze_pattern_stmt = false;
> +  bool bool_result;
> +  auto_vec mask_producers;
>
>if (dump_enabled_p ())
>  dump_printf_loc (MSG_NOTE, vect_location,
> @@ -414,6 +416,8 @@ vect_determine_vectorization_factor (loop_vec_info 
> loop_vinfo)
>   return false;
> }
>
> + bool_result = false;
> +
>   if (STMT_VINFO_VECTYPE (stmt_info))
> {
>   /* The only case when a vectype had been already set is for 
> stmts
> @@ -434,6 +438,32 @@ vect_determine_vectorization_factor (loop_vec_info 
> loop_vinfo)
> scalar_type = TREE_TYPE (gimple_call_arg (stmt, 3));
>   else
> scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
> +
> + /* Bool ops don't participate in vectorization factor
> +computation.  For comparison use compared types to
> +compute a factor.  */
> + if (TREE_CODE (scalar_type) == BOOLEAN_TYPE)
> +   {
> + mask_producers.safe_push (stmt_info);
> + bool_result = true;
> +
> + if (gimple_code (stmt) == GIMPLE_ASSIGN
> + && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
> +== tcc_comparison
> + && TREE_CODE (TREE_TYPE (gimple_assign_rhs1 (stmt)))
> +!= BOOLEAN_TYPE)
> +   scalar_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
> + else
> +   {
> + if (!analyze_pattern_stmt && gsi_end_p (pattern_def_si))
> +   {
> + pattern_def_seq = NULL;
> + gsi_next ();
> +   }
> + continue;
> +   }
> +   }
> +
>   if (dump_enabled_p ())
> {
>   dump_printf_loc (MSG_NOTE, vect_location,
> @@ -456,7 +486,8 @@ vect_determine_vectorization_factor (loop_vec_info 
> loop_vinfo)
>   return false;
> }
>
> - STMT_VINFO_VECTYPE (stmt_info) = vectype;
> + if (!bool_result)
> +   STMT_VINFO_VECTYPE (stmt_info) = vectype;
>
>   if (dump_enabled_p ())
> {
> @@ -469,8 +500,9 @@ vect_determine_vectorization_factor (loop_vec_info 
> loop_vinfo)
>   /* The vectorization factor is according to the smallest
>  scalar type (or the largest vector size, but we only
>  support

[PATCH 4/7] Libsanitizer merge from upstream r249633.

2015-10-13 Thread Maxim Ostapenko

This is a reapplied Jakub's patch for disabling ODR violation detection. 
More details can be found here 
(https://gcc.gnu.org/bugzilla/show_bug.cgi?id=63888).
2015-10-12  Maxim Ostapenko  

	PR bootstrap/63888
	Reapply:
	2015-02-20  Jakub Jelinek  

	* asan/asan_globals.cc (RegisterGlobal): Disable detect_odr_violation
	support until it is rewritten upstream.

	* c-c++-common/asan/pr63888.c: New test.

Index: libsanitizer/asan/asan_globals.cc
===
--- libsanitizer/asan/asan_globals.cc	(revision 250059)
+++ libsanitizer/asan/asan_globals.cc	(working copy)
@@ -146,7 +146,9 @@
   CHECK(AddrIsInMem(g->beg));
   CHECK(AddrIsAlignedByGranularity(g->beg));
   CHECK(AddrIsAlignedByGranularity(g->size_with_redzone));
-  if (flags()->detect_odr_violation) {
+  // This "ODR violation" detection is fundamentally incompatible with
+  // how GCC registers globals.  Disable as useless until rewritten upstream.
+  if (0 && flags()->detect_odr_violation) {
 // Try detecting ODR (One Definition Rule) violation, i.e. the situation
 // where two globals with the same name are defined in different modules.
 if (__asan_region_is_poisoned(g->beg, g->size_with_redzone)) {

Re: [PATCH] PR66870 PowerPC64 Enable gold linker with split stack

2015-10-13 Thread Matthias Klose


On 13.10.2015 00:53, Alan Modra wrote:

On Mon, Oct 12, 2015 at 10:15:04AM -0500, Lynn A. Boger wrote:

Thanks for doing this Alan.  I agree this looks better to me.

I assume by "etc" you mean you did biarch builds for your bootstraps on BE?


By "etc" I meant "and regression tested".

I built four configurations, powerpc-linux 32-bit only,
powerpc64le-linux 64-bit only, biarch powerpc-linux with 32-bit
default, and biarch powerpc64-linux with 64-bit default.


thanks, that works for me as well (biarch powerpc-linux-gnu).

Re: Move some bit and binary optimizations in simplify and match

2015-10-13 Thread Marc Glisse


On Tue, 13 Oct 2015, Hurugalawadi, Naveen wrote:


Please find attached the patch as per your comments.


(hmm, maybe you missed the email I sent with other comments?)

+(simplify
+ (plus (convert? @0) (convert? (xdivamulminusa @0 @1)))
+  (if ((INTEGRAL_TYPE_P (type) || VECTOR_INTEGER_TYPE_P (type))
+   && tree_nop_conversion_p (type, TREE_TYPE (@0)))
+   (trunc_mod (convert @0) (convert @1

Is that true when the conversion changes from signed to unsigned? The 
existing transformation X - (X / Y) * Y appears to be broken as well.


(the version in fold-const is hard to trigger because of canonicalization, 
but it was slightly more general in that it allowed for VECTOR_CST)


+/* Fold (a * (1 << b)) into (a << b)  */
+(simplify
+ (mult:c @0 (convert? (lshift integer_onep@1 @2)))
+  (if (! FLOAT_TYPE_P (type)
+&& tree_nop_conversion_p (type, TREE_TYPE (@0)))
+   (lshift @0 (convert @2

Wrong test, did you mean TREE_TYPE (@1) maybe?

--
Marc Glisse

Re: [PATCH] x86 interrupt attribute

2015-10-13 Thread Yulia Koval

Here is the current version of the patch with all the fixes.
Regtested\bootstraped it on 64 bit.

We need a pointer since interrupt handler will update data pointing
to by frame.  Since error_code isn't at the normal location where the
parameter is passed on stack and frame isn't in a hard register, we
changed ix86_function_arg:

+  if (!cum->caller && cfun->machine->func_type != TYPE_NORMAL)
+{
+  /* The first argument of interrupt handler is a pointer and
+points to the return address slot on stack.  The optional
+second argument is an integer for error code on stack.  */
+  gcc_assert (type != NULL_TREE);
+  if (POINTER_TYPE_P (type))
+   {
+ if (cfun->machine->func_type == TYPE_EXCEPTION)
+   /* (AP) in the current frame in exception handler.  */
+   arg = arg_pointer_rtx;
+ else
+   /* -WORD(AP) in the current frame in interrupt handler.  */
+   arg = force_reg (Pmode,
+plus_constant (Pmode, arg_pointer_rtx,
+   -UNITS_PER_WORD));
+ if (mode != Pmode)
+   arg = convert_to_mode (mode, arg, 1);
+   }
+  else
+   {
+ gcc_assert (TREE_CODE (type) == INTEGER_TYPE
+ && cfun->machine->func_type == TYPE_EXCEPTION
+ && mode == word_mode);
+ /* The error code is at -WORD(AP) in the current frame in
+exception handler.  */
+ arg = gen_rtx_MEM (word_mode,
+plus_constant (Pmode, arg_pointer_rtx,
+   -UNITS_PER_WORD));
+   }
+
+  return arg;
+}
+

to return a pseudo register.  It violates

   Return where to put the arguments to a function.
   Return zero to push the argument on the stack, or a hard register in
   which to store the argument.

Register allocator has no problem with parameters in pseudo registers.
But GCC crashes when it tries to access DECL_INCOMING_RTL as a hard
register when generating debug information.  We worked around it by
doing

+
+  if (cfun->machine->func_type != TYPE_NORMAL)
+{
+  /* Since the pointer argument of interrupt handler isn't a real
+ argument, adjust DECL_INCOMING_RTL for debug output.  */
+  tree arg = DECL_ARGUMENTS (current_function_decl);
+  gcc_assert (arg != NULL_TREE
+ && POINTER_TYPE_P (TREE_TYPE (arg)));
+  if (cfun->machine->func_type == TYPE_EXCEPTION)
+   /* (AP) in the current frame in exception handler.  */
+   DECL_INCOMING_RTL (arg) = arg_pointer_rtx;
+  else
+   /* -WORD(AP) in the current frame in interrupt handler.  */
+   DECL_INCOMING_RTL (arg) = plus_constant (Pmode,
+arg_pointer_rtx,
+-UNITS_PER_WORD);
+}


On Mon, Oct 5, 2015 at 12:29 PM, Uros Bizjak  wrote:
> On Mon, Oct 5, 2015 at 1:17 AM, H.J. Lu  wrote:
>
>>> Looking a bit deeper into the code, it looks that we want to realign
>>> the stack in the interrupt handler. Let's assume that interrupt
>>> handler is calling some other function that saves SSE vector regs to
>>> the stack. According to the x86 ABI, incoming stack of the called
>>> function is assumed to be aligned to 16 bytes. But, interrupt handler
>>> violates this assumption, since the stack could be aligned to only 4
>>> bytes for 32bit and 8 bytes for 64bit targets. Entering the called
>>> function with stack, aligned to less than 16 bytes will certainly
>>> violate ABI.
>>>
>>> So, it looks to me that we need to realign the stack in the interrupt
>>> handler unconditionally to 16bytes. In this case, we also won't need
>>> the following changes:
>>>
>>
>> Current stack alignment implementation requires at least
>> one, maybe two, scratch registers:
>>
>> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67841
>>
>> Extend it to the interrupt handler, which doesn't have any scratch
>> registers may require significant changes in backend as well as
>> register allocator.
>
> But without realignment, the handler is unusable for anything but
> simple functions. The handler will crash when called function will try
> to save vector reg to stack.
>

 We can use unaligned load and store to avoid crash.
>>>
>>> Oh, sorry, I meant "called function will crash", like:
>>>
>>> -> interrupt when %rsp = 0x...8 ->
>>> -> interrupt handler ->
>>> -> calls some function that tries to save xmm reg to stack
>>> -> crash in the called function
>>>
>>
>> It should be fixed by this patch.   But we need to fix stack
>> alignment in interrupt handler to avoid scratch register.
>>
>>
>> --
>> H.J.
>> ---
>> commit 15f48be1dc7ff48207927d0b835e593d058f695b
>> Author: H.J. Lu 
>> Date:   Sun Oct 4

Re: [PATCH] Fix "#pragma GCC pop_options" warning.

2015-10-13 Thread Bernd Schmidt


On 10/13/2015 02:02 PM, Dominik Vogt wrote:

When "#pragma GCC pop_options" is used on a platform without
support for "#pragma GCC target", Gcc emits a warning.  As
pop_options is useful on targets without the target pragma to
restore optimizations flags, the warning should be removed.

The attached patch does that rather inelegantly by checking if the
pragma_parse hook points to the default implementation.  I could't
think of a similarly terse but less clumsy way.  Suggestions for a
better test are very welcome.

gcc/ChangeLog:

* c-pragma.c: Include targhooks.h.
(handle_pragma_pop_options): Do not call
default_target_option_pragma_parse to prevent its warning when using
"#pragma GCC pop_options" on platforms that do not support
"#pragma GCC target".


Why not just remove the code that emits the warning message? Are there 
situations where the warning is justified?


A testcase would be good.


Bernd

Re: [PR debug/67192] Fix C loops' back-jump location

2015-10-13 Thread Bernd Schmidt


On 10/12/2015 04:04 PM, Andreas Arnez wrote:

Since r223098 ("Implement -Wmisleading-indentation") the backward-jump
generated for a C while- or for-loop can get the wrong line number.
This is because the check for misleading indentation peeks ahead one
token, advancing input_location to after the loop, and then
c_finish_loop() creates the back-jump and calls add_stmt(), which
assigns input_location to the statement by default.

This patch swaps the check for misleading indentation with the finishing
of the loop, such that input_location still has the right value at the
time of any invocations of add_stmt().


One could argue that peek_token should not have an effect on 
input_location, and in fact cpp_peek_token seems to take steps that this 
does not happen, but it looks like c_parser_peek_token does not use that 
mechanism. Still,


gcc/testsuite/ChangeLog:

PR debug/67192
* gcc.dg/guality/pr67192.c: New test.

gcc/c/ChangeLog:

PR debug/67192
* c-parser.c (c_parser_while_statement): Finish the loop before
parsing ahead for misleading indentation.
(c_parser_for_statement): Likewise.


This fix looks simple enough. Ok. (Might want to add noclone to the 
testcase attributes).



Bernd

Re: [PATCH PR67909 PR67947]

2015-10-13 Thread Yuri Rumyantsev

Here is updated patch with splitting long line.
The patch is attached.

Yuri.

2015-10-13 15:38 GMT+03:00 H.J. Lu :
> On Tue, Oct 13, 2015 at 4:57 AM, Yuri Rumyantsev  wrote:
>> Hi All,
>>
>> Here is a simple patch for unswitching outer loop through guard-edge
>> hoisting. The check that guard-edge is around the inner loop was
>> missed.
>>
>> Bootstrapping and regression testing did not show new failures.
>>
>> Is it OK for trunk?
>>
>> ChangeLog:
>> 2014-10-13  Yuri Rumyantsev  
>>
>> PR tree-optimization/67909, 67947
>> * tree-ssa-loop-unswitch.c (find_loop_guard): Add check that GUARD_EDGE
>> really skip the inner loop.
>>
>> gcc/testsuite/ChangeLog
>> * gcc.dg/torture/pr67947.c: New test.
>
> +  /* Guard edge must skip inner loop.  */
> +  if (!dominated_by_p (CDI_DOMINATORS, loop->inner->header,
> +  guard_edge == fe ? te->dest : fe->dest))
>   It should line up with "CDI_DOMINATORS".
>
> + fprintf (dump_file, "Guard edge %d --> %d is not around the
> loop!\n",guard_edge->src->index,guard_edge->dest->index);
>
> Please break lone line.
>
> --
> H.J.


patch1
Description: Binary data

[gomp4.1] jit and ada fixes

2015-10-13 Thread Jakub Jelinek

Hi!

When bootstrapping/regtesting, I found small omissions, fixed thusly:

2015-10-13  Jakub Jelinek  

gcc/ada/
* gcc-interface/utils.c (DEF_FUNCTION_TYPE_9, DEF_FUNCTION_TYPE_10,
DEF_FUNCTION_TYPE_11): Define.
gcc/jit/
* jit-builtins.h (DEF_FUNCTION_TYPE_9, DEF_FUNCTION_TYPE_11): Define.

--- gcc/ada/gcc-interface/utils.c.jj2015-10-13 10:34:05.0 +0200
+++ gcc/ada/gcc-interface/utils.c   2015-10-13 13:27:49.780598533 +0200
@@ -5369,6 +5369,12 @@ enum c_builtin_type
ARG6, ARG7) NAME,
 #define DEF_FUNCTION_TYPE_8(NAME, RETURN, ARG1, ARG2, ARG3, ARG4, ARG5, \
ARG6, ARG7, ARG8) NAME,
+#define DEF_FUNCTION_TYPE_9(NAME, RETURN, ARG1, ARG2, ARG3, ARG4, ARG5, \
+   ARG6, ARG7, ARG8, ARG9) NAME,
+#define DEF_FUNCTION_TYPE_10(NAME, RETURN, ARG1, ARG2, ARG3, ARG4, ARG5, \
+ARG6, ARG7, ARG8, ARG9, ARG10) NAME,
+#define DEF_FUNCTION_TYPE_11(NAME, RETURN, ARG1, ARG2, ARG3, ARG4, ARG5, \
+ARG6, ARG7, ARG8, ARG9, ARG10, ARG11) NAME,
 #define DEF_FUNCTION_TYPE_VAR_0(NAME, RETURN) NAME,
 #define DEF_FUNCTION_TYPE_VAR_1(NAME, RETURN, ARG1) NAME,
 #define DEF_FUNCTION_TYPE_VAR_2(NAME, RETURN, ARG1, ARG2) NAME,
@@ -5392,6 +5398,9 @@ enum c_builtin_type
 #undef DEF_FUNCTION_TYPE_6
 #undef DEF_FUNCTION_TYPE_7
 #undef DEF_FUNCTION_TYPE_8
+#undef DEF_FUNCTION_TYPE_9
+#undef DEF_FUNCTION_TYPE_10
+#undef DEF_FUNCTION_TYPE_11
 #undef DEF_FUNCTION_TYPE_VAR_0
 #undef DEF_FUNCTION_TYPE_VAR_1
 #undef DEF_FUNCTION_TYPE_VAR_2
@@ -5493,6 +5502,18 @@ install_builtin_function_types (void)
ARG6, ARG7, ARG8)   \
   def_fn_type (ENUM, RETURN, 0, 8, ARG1, ARG2, ARG3, ARG4, ARG5, ARG6, \
   ARG7, ARG8);
+#define DEF_FUNCTION_TYPE_9(ENUM, RETURN, ARG1, ARG2, ARG3, ARG4, ARG5, \
+   ARG6, ARG7, ARG8, ARG9) \
+  def_fn_type (ENUM, RETURN, 0, 9, ARG1, ARG2, ARG3, ARG4, ARG5, ARG6, \
+  ARG7, ARG8, ARG9);
+#define DEF_FUNCTION_TYPE_10(ENUM, RETURN, ARG1, ARG2, ARG3, ARG4, ARG5,\
+ARG6, ARG7, ARG8, ARG9, ARG10) \
+  def_fn_type (ENUM, RETURN, 0, 10, ARG1, ARG2, ARG3, ARG4, ARG5, ARG6,
\
+  ARG7, ARG8, ARG9, ARG10);
+#define DEF_FUNCTION_TYPE_11(ENUM, RETURN, ARG1, ARG2, ARG3, ARG4, ARG5,\
+ARG6, ARG7, ARG8, ARG9, ARG10, ARG11)  \
+  def_fn_type (ENUM, RETURN, 0, 11, ARG1, ARG2, ARG3, ARG4, ARG5, ARG6,
\
+  ARG7, ARG8, ARG9, ARG10, ARG11);
 #define DEF_FUNCTION_TYPE_VAR_0(ENUM, RETURN) \
   def_fn_type (ENUM, RETURN, 1, 0);
 #define DEF_FUNCTION_TYPE_VAR_1(ENUM, RETURN, ARG1) \
@@ -5526,6 +5547,9 @@ install_builtin_function_types (void)
 #undef DEF_FUNCTION_TYPE_6
 #undef DEF_FUNCTION_TYPE_7
 #undef DEF_FUNCTION_TYPE_8
+#undef DEF_FUNCTION_TYPE_9
+#undef DEF_FUNCTION_TYPE_10
+#undef DEF_FUNCTION_TYPE_11
 #undef DEF_FUNCTION_TYPE_VAR_0
 #undef DEF_FUNCTION_TYPE_VAR_1
 #undef DEF_FUNCTION_TYPE_VAR_2
--- gcc/jit/jit-builtins.h.jj   2015-10-13 13:13:02.0 +0200
+++ gcc/jit/jit-builtins.h  2015-10-13 13:31:09.150670112 +0200
@@ -43,8 +43,12 @@ enum jit_builtin_type
ARG6, ARG7) NAME,
 #define DEF_FUNCTION_TYPE_8(NAME, RETURN, ARG1, ARG2, ARG3, ARG4, ARG5, \
ARG6, ARG7, ARG8) NAME,
+#define DEF_FUNCTION_TYPE_9(NAME, RETURN, ARG1, ARG2, ARG3, ARG4, ARG5, \
+   ARG6, ARG7, ARG8, ARG9) NAME,
 #define DEF_FUNCTION_TYPE_10(NAME, RETURN, ARG1, ARG2, ARG3, ARG4, ARG5, \
 ARG6, ARG7, ARG8, ARG9, ARG10) NAME,
+#define DEF_FUNCTION_TYPE_11(NAME, RETURN, ARG1, ARG2, ARG3, ARG4, ARG5, \
+ARG6, ARG7, ARG8, ARG9, ARG10, ARG11) NAME,
 #define DEF_FUNCTION_TYPE_VAR_0(NAME, RETURN) NAME,
 #define DEF_FUNCTION_TYPE_VAR_1(NAME, RETURN, ARG1) NAME,
 #define DEF_FUNCTION_TYPE_VAR_2(NAME, RETURN, ARG1, ARG2) NAME,
@@ -68,7 +72,9 @@ enum jit_builtin_type
 #undef DEF_FUNCTION_TYPE_6
 #undef DEF_FUNCTION_TYPE_7
 #undef DEF_FUNCTION_TYPE_8
+#undef DEF_FUNCTION_TYPE_9
 #undef DEF_FUNCTION_TYPE_10
+#undef DEF_FUNCTION_TYPE_11
 #undef DEF_FUNCTION_TYPE_VAR_0
 #undef DEF_FUNCTION_TYPE_VAR_1
 #undef DEF_FUNCTION_TYPE_VAR_2

Jakub

[PATCH 3/7] Libsanitizer merge from upstream r249633.

2015-10-13 Thread Maxim Ostapenko

This is just reapplied patch for SPARC by David S. Miller. I was unable 
to test this, so could anyone help me here?
2015-10-12  Maxim Ostapenko  

	PR sanitizer/63958
	Reapply:
	2015-03-09  Jakub Jelinek  

	PR sanitizer/63958
	Reapply:
	2014-10-14  David S. Miller  

	* sanitizer_common/sanitizer_platform_limits_linux.cc (time_t):
	Define at __kernel_time_t, as needed for sparc.
	(struct __old_kernel_stat): Don't check if __sparc__ is defined.
	* libsanitizer/sanitizer_common/sanitizer_platform_limits_posix.h
	(__sanitizer): Define struct___old_kernel_stat_sz,
	struct_kernel_stat_sz, and struct_kernel_stat64_sz for sparc.
	(__sanitizer_ipc_perm): Adjust for sparc targets.
	(__sanitizer_shmid_ds): Likewsie.
	(__sanitizer_sigaction): Likewise.
	(IOC_SIZE): Likewsie.

Index: libsanitizer/sanitizer_common/sanitizer_platform_limits_linux.cc
===
--- libsanitizer/sanitizer_common/sanitizer_platform_limits_linux.cc	(revision 250059)
+++ libsanitizer/sanitizer_common/sanitizer_platform_limits_linux.cc	(working copy)
@@ -38,6 +38,7 @@
 #define uid_t __kernel_uid_t
 #define gid_t __kernel_gid_t
 #define off_t __kernel_off_t
+#define time_t __kernel_time_t
 // This header seems to contain the definitions of _kernel_ stat* structs.
 #include 
 #undef ino_t
@@ -62,7 +63,7 @@
 }  // namespace __sanitizer
 
 #if !defined(__powerpc64__) && !defined(__x86_64__) && !defined(__aarch64__)\
-&& !defined(__mips__)
+&& !defined(__mips__) && !defined(__sparc__)
 COMPILER_CHECK(struct___old_kernel_stat_sz == sizeof(struct __old_kernel_stat));
 #endif
 
Index: libsanitizer/sanitizer_common/sanitizer_platform_limits_posix.h
===
--- libsanitizer/sanitizer_common/sanitizer_platform_limits_posix.h	(revision 250059)
+++ libsanitizer/sanitizer_common/sanitizer_platform_limits_posix.h	(working copy)
@@ -83,6 +83,14 @@
   const unsigned struct_kernel_stat_sz = 144;
   #endif
   const unsigned struct_kernel_stat64_sz = 104;
+#elif defined(__sparc__) && defined(__arch64__)
+  const unsigned struct___old_kernel_stat_sz = 0;
+  const unsigned struct_kernel_stat_sz = 104;
+  const unsigned struct_kernel_stat64_sz = 144;
+#elif defined(__sparc__) && !defined(__arch64__)
+  const unsigned struct___old_kernel_stat_sz = 0;
+  const unsigned struct_kernel_stat_sz = 64;
+  const unsigned struct_kernel_stat64_sz = 104;
 #endif
   struct __sanitizer_perf_event_attr {
 unsigned type;
@@ -105,7 +113,7 @@
 
 #if defined(__powerpc64__)
   const unsigned struct___old_kernel_stat_sz = 0;
-#else
+#elif !defined(__sparc__)
   const unsigned struct___old_kernel_stat_sz = 32;
 #endif
 
@@ -184,6 +192,18 @@
 unsigned short __pad1;
 unsigned long __unused1;
 unsigned long __unused2;
+#elif defined(__sparc__)
+# if defined(__arch64__)
+unsigned mode;
+unsigned short __pad1;
+# else
+unsigned short __pad1;
+unsigned short mode;
+unsigned short __pad2;
+# endif
+unsigned short __seq;
+unsigned long long __unused1;
+unsigned long long __unused2;
 #else
 unsigned short mode;
 unsigned short __pad1;
@@ -201,6 +221,26 @@
 
   struct __sanitizer_shmid_ds {
 __sanitizer_ipc_perm shm_perm;
+  #if defined(__sparc__)
+  # if !defined(__arch64__)
+u32 __pad1;
+  # endif
+long shm_atime;
+  # if !defined(__arch64__)
+u32 __pad2;
+  # endif
+long shm_dtime;
+  # if !defined(__arch64__)
+u32 __pad3;
+  # endif
+long shm_ctime;
+uptr shm_segsz;
+int shm_cpid;
+int shm_lpid;
+unsigned long shm_nattch;
+unsigned long __glibc_reserved1;
+unsigned long __glibc_reserved2;
+  #else
   #ifndef __powerpc__
 uptr shm_segsz;
   #elif !defined(__powerpc64__)
@@ -238,6 +278,7 @@
 uptr __unused4;
 uptr __unused5;
   #endif
+#endif
   };
 #elif SANITIZER_FREEBSD
   struct __sanitizer_ipc_perm {
@@ -555,9 +596,13 @@
 #else
 __sanitizer_sigset_t sa_mask;
 #ifndef __mips__
+#if defined(__sparc__)
+unsigned long sa_flags;
+#else
 int sa_flags;
 #endif
 #endif
+#endif
 #if SANITIZER_LINUX
 void (*sa_restorer)();
 #endif
@@ -799,7 +844,7 @@
 
 #define IOC_NRBITS 8
 #define IOC_TYPEBITS 8
-#if defined(__powerpc__) || defined(__powerpc64__) || defined(__mips__)
+#if defined(__powerpc__) || defined(__powerpc64__) || defined(__mips__) || defined(__sparc__)
 #define IOC_SIZEBITS 13
 #define IOC_DIRBITS 3
 #define IOC_NONE 1U
@@ -829,7 +874,17 @@
 #define IOC_DIR(nr) (((nr) >> IOC_DIRSHIFT) & IOC_DIRMASK)
 #define IOC_TYPE(nr) (((nr) >> IOC_TYPESHIFT) & IOC_TYPEMASK)
 #define IOC_NR(nr) (((nr) >> IOC_NRSHIFT) & IOC_NRMASK)
+
+#if defined(__sparc__)
+// In sparc the 14 bits SIZE field overlaps with the
+// least significant bit of DIR, so either IOC_READ or
+// IOC_WRITE shall be 1 in order to get a non-zero SIZE.
+#

[PATCH 7/7] Libsanitizer merge from upstream r249633.

2015-10-13 Thread Maxim Ostapenko

This is the final patch. Force libsanitizer to use an old ABI for ubsan 
float cast data descriptors, because for some exprs (e.g. that type of 
tcc_declaration) we can't get the right location for now. I'm not sure 
about this, perhaps it should be fixed in GCC somehow.
2015-10-13  Maxim Ostapenko  

	* ubsan/ubsan_handlers.cc (looksLikeFloatCastOverflowDataV1): Always
	return true for now.

Index: libsanitizer/ubsan/ubsan_handlers.cc
===
--- libsanitizer/ubsan/ubsan_handlers.cc	(revision 250059)
+++ libsanitizer/ubsan/ubsan_handlers.cc	(working copy)
@@ -307,6 +307,9 @@
 }
 
 static bool looksLikeFloatCastOverflowDataV1(void *Data) {
+  // (TODO): propagate SourceLocation into DataDescriptor and use this
+  // heuristic than.
+  return true;
   // First field is either a pointer to filename or a pointer to a
   // TypeDescriptor.
   u8 *FilenameOrTypeDescriptor;

[PATCH] Fix "#pragma GCC pop_options" warning.

2015-10-13 Thread Dominik Vogt

When "#pragma GCC pop_options" is used on a platform without
support for "#pragma GCC target", Gcc emits a warning.  As
pop_options is useful on targets without the target pragma to
restore optimizations flags, the warning should be removed.

The attached patch does that rather inelegantly by checking if the
pragma_parse hook points to the default implementation.  I could't
think of a similarly terse but less clumsy way.  Suggestions for a
better test are very welcome.

gcc/ChangeLog:

* c-pragma.c: Include targhooks.h.
(handle_pragma_pop_options): Do not call
default_target_option_pragma_parse to prevent its warning when using
"#pragma GCC pop_options" on platforms that do not support
"#pragma GCC target".

Ciao

Dominik ^_^  ^_^

-- 

Dominik Vogt
IBM Germany
>From d149dd8b9d6c9f720809de3839f2ad5a6825f7e5 Mon Sep 17 00:00:00 2001
From: Dominik Vogt 
Date: Tue, 13 Oct 2015 12:55:21 +0100
Subject: [PATCH] Fix "#pragma GCC pop_options" warning.

---
 gcc/c-family/c-pragma.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/gcc/c-family/c-pragma.c b/gcc/c-family/c-pragma.c
index 3c34800..b209b7b 100644
--- a/gcc/c-family/c-pragma.c
+++ b/gcc/c-family/c-pragma.c
@@ -38,6 +38,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "tm_p.h"		/* For REGISTER_TARGET_PRAGMAS (why is
    this not a target hook?).  */
 #include "target.h"
+#include "targhooks.h"
 #include "diagnostic.h"
 #include "opts.h"
 #include "plugin.h"
@@ -997,7 +998,9 @@ handle_pragma_pop_options (cpp_reader *ARG_UNUSED(dummy))
 
   if (p->target_binary != target_option_current_node)
 {
-  (void) targetm.target_option.pragma_parse (NULL_TREE, p->target_binary);
+  if (targetm.target_option.pragma_parse
+	  != default_target_option_pragma_parse)
+	(void) targetm.target_option.pragma_parse (NULL_TREE, p->target_binary);
   target_option_current_node = p->target_binary;
 }
 
-- 
2.3.0

[PATCH] Allow FSM to thread single block cases too

2015-10-13 Thread Jeff Law



One of the cases that was missing in the FSM support is threading when 
the path is a single block.  ie, a control statement's output can be 
statically determined just by looking at PHIs in the control statement's 
block for one or incoming edges.


This is necessary to fix a regression if I turn off the old jump 
threader's backedge support.  Just as important, Jan has in the past 
asked about a trivial jump threader to be run during early 
optimizations.  Limiting the FSM bits to this case would likely satisfy 
that need in the future.


Bootstrapped and regression tested on x86_64-linux-gnu.  Installed on 
the trunk.


Jeff
commit a53bb29a1dffd329aa6235b88b0c2a830aa5a59e
Author: Jeff Law 
Date:   Tue Oct 13 06:19:20 2015 -0600

[PATCH] Allow FSM to thread single block cases too

* tree-ssa-threadbackward.c (fsm_find_control_statement_thread_paths):
Allow single block jump threading paths.

* gcc.dg/tree-ssa/ssa-thread-13.c: New test.

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index d71bcd2..caab533 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,8 @@
+2015-10-13  Jeff Law  
+
+   * tree-ssa-threadbackward.c (fsm_find_control_statement_thread_paths):
+   Allow single block jump threading paths.
+
 2015-10-13  Tom de Vries  
 
PR tree-optimization/67476
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 4a08f0f..acf6df5 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,7 @@
+2015-10-13  Jeff Law  
+
+   * gcc.dg/tree-ssa/ssa-thread-13.c: New test.
+
 2015-10-12  Jeff Law  
 
* gcc.dg/tree-ssa/ssa-thread-12.c: New test.
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-thread-13.c 
b/gcc/testsuite/gcc.dg/tree-ssa/ssa-thread-13.c
new file mode 100644
index 000..5051d11
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-thread-13.c
@@ -0,0 +1,70 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-vrp1-details" } */
+/* { dg-final { scan-tree-dump "FSM" "vrp1" } } */
+
+typedef struct rtx_def *rtx;
+typedef const struct rtx_def *const_rtx;
+enum rtx_code
+{
+  UNKNOWN, VALUE, DEBUG_EXPR, EXPR_LIST, INSN_LIST, SEQUENCE, ADDRESS,
+DEBUG_INSN, INSN, JUMP_INSN, CALL_INSN, BARRIER, CODE_LABEL, NOTE,
+COND_EXEC, PARALLEL, ASM_INPUT, ASM_OPERANDS, UNSPEC, UNSPEC_VOLATILE,
+ADDR_VEC, ADDR_DIFF_VEC, PREFETCH, SET, USE, CLOBBER, CALL, RETURN,
+EH_RETURN, TRAP_IF, CONST_INT, CONST_FIXED, CONST_DOUBLE, CONST_VECTOR,
+CONST_STRING, CONST, PC, REG, SCRATCH, SUBREG, STRICT_LOW_PART, CONCAT,
+CONCATN, MEM, LABEL_REF, SYMBOL_REF, CC0, IF_THEN_ELSE, COMPARE, PLUS,
+MINUS, NEG, MULT, SS_MULT, US_MULT, DIV, SS_DIV, US_DIV, MOD, UDIV, UMOD,
+AND, IOR, XOR, NOT, ASHIFT, ROTATE, ASHIFTRT, LSHIFTRT, ROTATERT, SMIN,
+SMAX, UMIN, UMAX, PRE_DEC, PRE_INC, POST_DEC, POST_INC, PRE_MODIFY,
+POST_MODIFY, NE, EQ, GE, GT, LE, LT, GEU, GTU, LEU, LTU, UNORDERED,
+ORDERED, UNEQ, UNGE, UNGT, UNLE, UNLT, LTGT, SIGN_EXTEND, ZERO_EXTEND,
+TRUNCATE, FLOAT_EXTEND, FLOAT_TRUNCATE, FLOAT, FIX, UNSIGNED_FLOAT,
+UNSIGNED_FIX, FRACT_CONVERT, UNSIGNED_FRACT_CONVERT, SAT_FRACT,
+UNSIGNED_SAT_FRACT, ABS, SQRT, BSWAP, FFS, CLZ, CTZ, POPCOUNT, PARITY,
+SIGN_EXTRACT, ZERO_EXTRACT, HIGH, LO_SUM, VEC_MERGE, VEC_SELECT,
+VEC_CONCAT, VEC_DUPLICATE, SS_PLUS, US_PLUS, SS_MINUS, SS_NEG, US_NEG,
+SS_ABS, SS_ASHIFT, US_ASHIFT, US_MINUS, SS_TRUNCATE, US_TRUNCATE, FMA,
+VAR_LOCATION, DEBUG_IMPLICIT_PTR, ENTRY_VALUE, LAST_AND_UNUSED_RTX_CODE
+};
+union rtunion_def
+{
+  rtx rt_rtx;
+};
+typedef union rtunion_def rtunion;
+struct rtx_def
+{
+  __extension__ enum rtx_code code:16;
+  union u
+  {
+rtunion fld[1];
+  }
+  u;
+};
+
+unsigned int rtx_cost (rtx, enum rtx_code, unsigned char);
+rtx single_set_2 (const_rtx, rtx);
+
+unsigned
+seq_cost (const_rtx seq, unsigned char speed)
+{
+  unsigned cost = 0;
+  rtx set;
+  for (; seq; seq = (((seq)->u.fld[2]).rt_rtx))
+{
+  set =
+   (enum rtx_code) (seq)->code) == INSN)
+ || (((enum rtx_code) (seq)->code) == DEBUG_INSN)
+ || (((enum rtx_code) (seq)->code) == JUMP_INSN)
+ || (((enum rtx_code) (seq)->code) ==
+ CALL_INSN)) ? (((enum rtx_code) seq)->u.fld[4]).rt_rtx))->
+ code) ==
+SET ? (((seq)->u.fld[4]).
+   rt_rtx) : single_set_2 (seq,
+   (((seq)->u.
+ fld[4]).
+rt_rtx))) : (rtx)
+0);
+  if (set)
+   cost += rtx_cost (set, SET, speed);
+}
+}
diff --git a/gcc/tree-ssa-threadbackward.c b/gcc/tree-ssa-threadbackward.c
index 5be6ee4..9128094 100644
--- a/gcc/tree-ssa-threadbackward.c
+++

Re: Fix prototype for print_insn in rtl.h

2015-10-13 Thread Nikolai Bozhenov


On 10/13/2015 03:22 PM, Jeff Law wrote:

On 10/13/2015 02:21 AM, Nikolai Bozhenov wrote:

Currently prototype for print_insn in rtl.h doesn't match it's
definition in sched-vis.c The patch fixes this mismatch.
I'll run this through the usual bootstrap & regression testing before 
installing later today.

jeff


I've bootstrapped it on x86_64, but I don't see much sense in regression
testing this patch cause it's so small. Though, if you think it's necessary,
I can test it myself and write to you when I get the results.

Thanks,
Nikolai

Re: [RFC VTV] Fix VTV for targets that have section anchors.

2015-10-13 Thread Ramana Radhakrishnan




On 12/10/15 21:44, Jeff Law wrote:
> On 10/09/2015 03:17 AM, Ramana Radhakrishnan wrote:
>> This started as a Friday afternoon project ...
>>
>> It turned out enabling VTV for AArch64 and ARM was a matter of fixing
>> PR67868 which essentially comes from building libvtv with section
>> anchors turned on. The problem was that the flow of control from
>> output_object_block through to switch_section did not have the same
>> special casing for the vtable section that exists in
>> assemble_variable.
> That's some ugly code.  You might consider factoring that code into a 
> function and just calling it from both places.  Your version doesn't seem to 
> handle PECOFF, so I'd probably refactor from assemble_variable.
> 

I was a bit lazy as I couldn't immediately think of a target that would want 
PECOFF, section anchors and VTV. That combination seems to be quite rare, 
anyway point taken on the refactor.

Ok if no regressions ?

>>
>> However both these failures also occur on x86_64 - so I'm content to
>> declare victory on AArch64 as far as basic enablement goes.
> Cool.
> 
>>
>> 1. Are the generic changes to varasm.c ok ? 2. Can we take the
>> AArch64 support in now, given this amount of testing ? Marcus /
>> Caroline ? 3. Any suggestions / helpful debug hints for VTV debugging
>> (other than turning VTV_DEBUG on and inspecting trace) ?
> I think that with refactoring they'd be good to go.  No opinions on the 
> AArch64 specific question -- call for the AArch64 maintainers.
> 
> Good to see someone hacking on vtv.  It's in my queue to look at as well.

Yeah figuring out more about vtv is also in my background queue.

regards
Ramana

PR other/67868

* varasm.c (assemble_variable): Move special vtv handling to..
(handle_vtv_comdat_sections): .. here. New function.
(output_object_block): Handle vtv sections.

libvtv/Changelog

* configure.tgt: Support aarch64 and arm.
diff --git a/gcc/varasm.c b/gcc/varasm.c
index f1564bc..62ad863 100644
--- a/gcc/varasm.c
+++ b/gcc/varasm.c
@@ -127,6 +127,7 @@ static void asm_output_aligned_bss (FILE *, tree, const 
char *,
 #endif /* BSS_SECTION_ASM_OP */
 static void mark_weak (tree);
 static void output_constant_pool (const char *, tree);
+static void handle_vtv_comdat_section (section *, const_tree);
 
 /* Well-known sections, each one associated with some sort of *_ASM_OP.  */
 section *text_section;
@@ -2230,56 +2231,10 @@ assemble_variable (tree decl, int top_level 
ATTRIBUTE_UNUSED,
 assemble_noswitch_variable (decl, name, sect, align);
   else
 {
-  /* The following bit of code ensures that vtable_map 
- variables are not only in the comdat section, but that
- each variable has its own unique comdat name.  If this
- code is removed, the variables end up in the same section
- with a single comdat name.
-
- FIXME:  resolve_unique_section needs to deal better with
- decls with both DECL_SECTION_NAME and DECL_ONE_ONLY.  Once
- that is fixed, this if-else statement can be replaced with
- a single call to "switch_to_section (sect)".  */
+  /* Special-case handling of vtv comdat sections.  */
   if (sect->named.name
  && (strcmp (sect->named.name, ".vtable_map_vars") == 0))
-   {
-#if defined (OBJECT_FORMAT_ELF)
-  targetm.asm_out.named_section (sect->named.name,
-sect->named.common.flags
-| SECTION_LINKONCE,
-DECL_NAME (decl));
-  in_section = sect;
-#elif defined (TARGET_PECOFF)
-  /* Neither OBJECT_FORMAT_PE, nor OBJECT_FORMAT_COFF is set here.
- Therefore the following check is used.
- In case a the target is PE or COFF a comdat group section
- is created, e.g. .vtable_map_vars$foo. The linker places
- everything in .vtable_map_vars at the end.
-
- A fix could be made in
- gcc/config/i386/winnt.c: i386_pe_unique_section. */
-  if (TARGET_PECOFF)
-  {
-char *name;
-
-if (TREE_CODE (DECL_NAME (decl)) == IDENTIFIER_NODE)
-  name = ACONCAT ((sect->named.name, "$",
-   IDENTIFIER_POINTER (DECL_NAME (decl)), NULL));
-else
-  name = ACONCAT ((sect->named.name, "$",
-IDENTIFIER_POINTER (DECL_COMDAT_GROUP (DECL_NAME (decl))),
-NULL));
-
-targetm.asm_out.named_section (name,
-   sect->named.common.flags
-   | SECTION_LINKONCE,
-   DECL_NAME (decl));
-in_section = sect;
-}
-#else
-  switch_to_section (sect);
-#endif
-}
+   handle_vtv_comdat_section (sect, decl);
   else
switch_to_section (sect);
   if (align > BITS_PER_UNIT)
@@

Re: [AArch64_be] Fix vtbl[34] and vtbx4

2015-10-13 Thread Christophe Lyon

On 12 October 2015 at 15:30, James Greenhalgh  wrote:
> On Fri, Oct 09, 2015 at 05:16:05PM +0100, Christophe Lyon wrote:
>> On 8 October 2015 at 11:12, James Greenhalgh  
>> wrote:
>> > On Wed, Oct 07, 2015 at 09:07:30PM +0100, Christophe Lyon wrote:
>> >> On 7 October 2015 at 17:09, James Greenhalgh  
>> >> wrote:
>> >> > On Tue, Sep 15, 2015 at 05:25:25PM +0100, Christophe Lyon wrote:
>> >> >
>> >> > Why do we want this for vtbx4 rather than putting out a VTBX instruction
>> >> > directly (as in the inline asm versions you replace)?
>> >> >
>> >> I just followed the pattern used for vtbx3.
>> >>
>> >> > This sequence does make sense for vtbx3.
>> >> In fact, I don't see why vtbx3 and vtbx4 should be different?
>> >
>> > The difference between TBL and TBX is in their handling of a request to
>> > select an out-of-range value. For TBL this returns zero, for TBX this
>> > returns the value which was already in the destination register.
>> >
>> > Because the byte-vectors used by the TBX instruction in aarch64 are 128-bit
>> > (so two of them togather allow selecting elements in the range 0-31), and
>> > vtbx3 needs to emulate the AArch32 behaviour of picking elements from 
>> > 3x64-bit
>> > vectors (allowing elements in the range 0-23), we need to manually check 
>> > for
>> > values which would have been out-of-range on AArch32, but are not out
>> > of range for AArch64 and handle them appropriately. For vtbx4 on the other
>> > hand, 2x128-bit registers give the range 0..31 and 4x64-bit registers give
>> > the range 0..31, so we don't need the special masked handling.
>> >
>> > You can find the suggested instruction sequences for the Neon intrinsics
>> > in this document:
>> >
>> >   
>> > http://infocenter.arm.com/help/topic/com.arm.doc.ihi0073a/IHI0073A_arm_neon_intrinsics_ref.pdf
>> >
>>
>> Hi James,
>>
>> Please find attached an updated version which hopefully addresses your 
>> comments.
>> Tested on aarch64-none-elf and aarch64_be-none-elf using the Foundation 
>> Model.
>>
>> OK?
>
> Looks good to me,
>
> Thanks,
> James
>

I commited this as r228716, and noticed later that
gcc.target/aarch64/table-intrinsics.c failed because of this patch.

This is because that testcase scans the assembly for 'tbl v' or 'tbx
v', but since I replaced some asm statements,
the space is now a tab.

I plan to commit this (probably obvious?):
2015-10-13  Christophe Lyon  

* gcc/testsuite/gcc.target/aarch64/table-intrinsics.c: Fix regexp
after r228716 (Fix vtbl[34] and vtbx4).

Index: gcc/testsuite/gcc.target/aarch64/table-intrinsics.c
===
--- gcc/testsuite/gcc.target/aarch64/table-intrinsics.c	(revision 228759)
+++ gcc/testsuite/gcc.target/aarch64/table-intrinsics.c	(working copy)
@@ -435,5 +435,5 @@
   return vqtbx4q_p8 (r, tab, idx);
 }
 
-/* { dg-final { scan-assembler-times "tbl v" 42} }  */
-/* { dg-final { scan-assembler-times "tbx v" 30} }  */
+/* { dg-final { scan-assembler-times "tbl\[ |\t\]*v" 42} }  */
+/* { dg-final { scan-assembler-times "tbx\[ |\t\]*v" 30} }  */

Re: Move some bit and binary optimizations in simplify and match

2015-10-13 Thread Richard Biener

On Tue, Oct 13, 2015 at 2:18 PM, Marc Glisse  wrote:
> On Tue, 13 Oct 2015, Richard Biener wrote:
>
>> +/* Simplify ~X & X as zero.  */
>> +(simplify
>> + (bit_and:c (convert? @0) (convert? (bit_not @0)))
>> +  (if (tree_nop_conversion_p (type, TREE_TYPE (@0)))
>
>
> The test seems unnecessary for this specific transformation.
>
>> +  { build_zero_cst (TREE_TYPE (@0)); }))
>
>
> I'd rather build_zero_cst (type) directly.
>
>> +/* (-A) * (-B) -> A * B  */
>> +(simplify
>> + (mult:c (convert? (negate @0)) (convert? negate_expr_p@1))
>> +  (if (tree_nop_conversion_p (type, TREE_TYPE (@0)))
>> +   (mult (convert @0) (convert (negate @1)
>>
>> this one is ok with using convert1? and convert2?
>
>
> Is it? Maybe if it also checked tree_nop_conversion_p for @1...

Sorry, your comments are of course correct.  Neveen, please adjust
also according
to these comments.

Richard.

> --
> Marc Glisse

Re: [AArch64_be] Fix vtbl[34] and vtbx4

2015-10-13 Thread James Greenhalgh

On Tue, Oct 13, 2015 at 02:05:01PM +0100, Christophe Lyon wrote:
> I commited this as r228716, and noticed later that
> gcc.target/aarch64/table-intrinsics.c failed because of this patch.
> 
> This is because that testcase scans the assembly for 'tbl v' or 'tbx
> v', but since I replaced some asm statements,
> the space is now a tab.
> 
> I plan to commit this (probably obvious?):

> 2015-10-13  Christophe Lyon  
> 
>   * gcc/testsuite/gcc.target/aarch64/table-intrinsics.c: Fix regexp
>   after r228716 (Fix vtbl[34] and vtbx4).

Bad luck. This is fine (and yes, obvious).

Thanks,
James

> Index: gcc/testsuite/gcc.target/aarch64/table-intrinsics.c
> ===
> --- gcc/testsuite/gcc.target/aarch64/table-intrinsics.c   (revision 
> 228759)
> +++ gcc/testsuite/gcc.target/aarch64/table-intrinsics.c   (working copy)
> @@ -435,5 +435,5 @@
>return vqtbx4q_p8 (r, tab, idx);
>  }
>  
> -/* { dg-final { scan-assembler-times "tbl v" 42} }  */
> -/* { dg-final { scan-assembler-times "tbx v" 30} }  */
> +/* { dg-final { scan-assembler-times "tbl\[ |\t\]*v" 42} }  */
> +/* { dg-final { scan-assembler-times "tbx\[ |\t\]*v" 30} }  */

[PATCH 2/7] Libsanitizer merge from upstream r249633.

2015-10-13 Thread Maxim Ostapenko

This patch introduces required compiler changes. Now, we don't version 
asan_init, we have a special __asan_version_mismatch_check_v[n] symbol 
for this.


Also, asan_stack_malloc_[n] doesn't take a local stack as a second 
parameter anymore, so don't pass it.
2015-10-12  Maxim Ostapenko  

config/

	* bootstrap-asan.mk: Replace ASAN_OPTIONS=detect_leaks with
	LSAN_OPTIONS=detect_leaks

gcc/

	* asan.c (asan_emit_stack_protection): Don't pass local stack to
	asan_stack_malloc_[n] anymore.
	(asan_finish_file): Instert __asan_version_mismatch_check_v[n] call.
	* sanitizer.def (BUILT_IN_ASAN_INIT): Rename to __asan_init.
	(BUILT_IN_ASAN_VERSION_MISMATCH_CHECK): Add new builtin call.

gcc/testsuite/

	g++.dg/asan/default-options-1.C: Adjust testcase.

Index: gcc/asan.c
===
--- gcc/asan.c	(revision 228704)
+++ gcc/asan.c	(working copy)
@@ -1132,12 +1132,10 @@
   snprintf (buf, sizeof buf, "__asan_stack_malloc_%d",
 		use_after_return_class);
   ret = init_one_libfunc (buf);
-  rtx addr = convert_memory_address (ptr_mode, base);
-  ret = emit_library_call_value (ret, NULL_RTX, LCT_NORMAL, ptr_mode, 2,
+  ret = emit_library_call_value (ret, NULL_RTX, LCT_NORMAL, ptr_mode, 1,
  GEN_INT (asan_frame_size
 	  + base_align_bias),
- TYPE_MODE (pointer_sized_int_node),
- addr, ptr_mode);
+ TYPE_MODE (pointer_sized_int_node));
   ret = convert_memory_address (Pmode, ret);
   emit_move_insn (base, ret);
   emit_label (lab);
@@ -2470,6 +2468,8 @@
 {
   tree fn = builtin_decl_implicit (BUILT_IN_ASAN_INIT);
   append_to_statement_list (build_call_expr (fn, 0), _ctor_statements);
+  fn = builtin_decl_implicit (BUILT_IN_ASAN_VERSION_MISMATCH_CHECK);
+  append_to_statement_list (build_call_expr (fn, 0), _ctor_statements);
 }
   FOR_EACH_DEFINED_VARIABLE (vnode)
 if (TREE_ASM_WRITTEN (vnode->decl)
Index: gcc/sanitizer.def
===
--- gcc/sanitizer.def	(revision 228704)
+++ gcc/sanitizer.def	(working copy)
@@ -27,8 +27,11 @@
for other FEs by asan.c.  */
 
 /* Address Sanitizer */
-DEF_SANITIZER_BUILTIN(BUILT_IN_ASAN_INIT, "__asan_init_v4",
+DEF_SANITIZER_BUILTIN(BUILT_IN_ASAN_INIT, "__asan_init",
 		  BT_FN_VOID, ATTR_NOTHROW_LEAF_LIST)
+DEF_SANITIZER_BUILTIN(BUILT_IN_ASAN_VERSION_MISMATCH_CHECK,
+		  "__asan_version_mismatch_check_v6",
+		  BT_FN_VOID, ATTR_NOTHROW_LEAF_LIST)
 /* Do not reorder the BUILT_IN_ASAN_{REPORT,CHECK}* builtins, e.g. cfgcleanup.c
relies on this order.  */
 DEF_SANITIZER_BUILTIN(BUILT_IN_ASAN_REPORT_LOAD1, "__asan_report_load1",
Index: gcc/testsuite/g++.dg/asan/default-options-1.C
===
--- gcc/testsuite/g++.dg/asan/default-options-1.C	(revision 228704)
+++ gcc/testsuite/g++.dg/asan/default-options-1.C	(working copy)
@@ -12,4 +12,4 @@
   return 0;
 }
 
-// { dg-output "Using the defaults from __asan_default_options:.* foo=bar.*(\n|\r\n|\r)" }
+// { dg-output "WARNING: found 1 unrecognized flag\\(s\\):(\n|\r\n|\r).*foo(\n|\r\n|\r)" }

[PATCH 5/7] Libsanitizer merge from upstream r249633.

2015-10-13 Thread Maxim Ostapenko

This patch removes UBSan stubs from ASan and TSan code. We don't embed 
UBSan to ASan and UBSan because that would lead to undefined references 
to C++ stuff when linking with -static-libasan. AFAIK, sanitizer 
developers use different libraries for C and CXX runtimes, but I think 
this is out of scope of this merge.
2015-10-13  Maxim Ostapenko  

	* tsan/tsan_defs.h: Define TSAN_CONTAINS_UBSAN to 0.
	* asan/asan_flags.cc (InitializeFlags): Do not initialize UBSan flags.
	* asan/asan_rtl.cc (AsanInitInternal): Do not init UBSan.

Index: libsanitizer/asan/asan_flags.cc
===
--- libsanitizer/asan/asan_flags.cc	(revision 250059)
+++ libsanitizer/asan/asan_flags.cc	(working copy)
@@ -86,15 +86,6 @@
   RegisterCommonFlags(_parser);
 #endif
 
-#if CAN_SANITIZE_UB
-  __ubsan::Flags *uf = __ubsan::flags();
-  uf->SetDefaults();
-
-  FlagParser ubsan_parser;
-  __ubsan::RegisterUbsanFlags(_parser, uf);
-  RegisterCommonFlags(_parser);
-#endif
-
   // Override from ASan compile definition.
   const char *asan_compile_def = MaybeUseAsanDefaultOptionsCompileDefinition();
   asan_parser.ParseString(asan_compile_def);
@@ -102,20 +93,11 @@
   // Override from user-specified string.
   const char *asan_default_options = MaybeCallAsanDefaultOptions();
   asan_parser.ParseString(asan_default_options);
-#if CAN_SANITIZE_UB
-  const char *ubsan_default_options = __ubsan::MaybeCallUbsanDefaultOptions();
-  ubsan_parser.ParseString(ubsan_default_options);
-#endif
-
   // Override from command line.
   asan_parser.ParseString(GetEnv("ASAN_OPTIONS"));
 #if CAN_SANITIZE_LEAKS
   lsan_parser.ParseString(GetEnv("LSAN_OPTIONS"));
 #endif
-#if CAN_SANITIZE_UB
-  ubsan_parser.ParseString(GetEnv("UBSAN_OPTIONS"));
-#endif
-
   // Let activation flags override current settings. On Android they come
   // from a system property. On other platforms this is no-op.
   if (!flags()->start_deactivated) {
Index: libsanitizer/asan/asan_rtl.cc
===
--- libsanitizer/asan/asan_rtl.cc	(revision 250059)
+++ libsanitizer/asan/asan_rtl.cc	(working copy)
@@ -513,10 +513,6 @@
   }
 #endif  // CAN_SANITIZE_LEAKS
 
-#if CAN_SANITIZE_UB
-  __ubsan::InitAsPlugin();
-#endif
-
   InitializeSuppressions();
 
   VReport(1, "AddressSanitizer Init done\n");
Index: libsanitizer/tsan/rtl/tsan_defs.h
===
--- libsanitizer/tsan/tsan_defs.h	(revision 250059)
+++ libsanitizer/tsan/tsan_defs.h	(working copy)
@@ -29,7 +29,7 @@
 #endif
 
 #ifndef TSAN_CONTAINS_UBSAN
-# define TSAN_CONTAINS_UBSAN (CAN_SANITIZE_UB && !defined(SANITIZER_GO))
+# define TSAN_CONTAINS_UBSAN 0
 #endif
 
 namespace __tsan {

[PATCH 6/7] Libsanitizer merge from upstream r249633.

2015-10-13 Thread Maxim Ostapenko

This patch adjusts the fix for 
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=61771 to extract the last 
PC from the stack frame if no valid FP is available for ARM.
2015-10-13  Maxim Ostapenko  

	* sanitizer_common/sanitizer_stacktrace.cc (GetCanonicFrame): Assume we
	compiled code with GCC when extracting the caller PC for ARM if no
	valid frame pointer is available.

Index: libsanitizer/sanitizer_common/sanitizer_stacktrace.cc
===
--- libsanitizer/sanitizer_common/sanitizer_stacktrace.cc	(revision 250059)
+++ libsanitizer/sanitizer_common/sanitizer_stacktrace.cc	(working copy)
@@ -62,8 +62,8 @@
   // Nope, this does not look right either. This means the frame after next does
   // not have a valid frame pointer, but we can still extract the caller PC.
   // Unfortunately, there is no way to decide between GCC and LLVM frame
-  // layouts. Assume LLVM.
-  return bp_prev;
+  // layouts. Assume GCC.
+  return bp_prev - 1;
 #else
   return (uhwptr*)bp;
 #endif

Re: Fix prototype for print_insn in rtl.h

2015-10-13 Thread Jeff Law


On 10/13/2015 02:21 AM, Nikolai Bozhenov wrote:

Currently prototype for print_insn in rtl.h doesn't match it's
definition in sched-vis.c The patch fixes this mismatch.
I'll run this through the usual bootstrap & regression testing before 
installing later today.

jeff

Re: [PATCH PR67909 PR67947]

2015-10-13 Thread H.J. Lu

On Tue, Oct 13, 2015 at 4:57 AM, Yuri Rumyantsev  wrote:
> Hi All,
>
> Here is a simple patch for unswitching outer loop through guard-edge
> hoisting. The check that guard-edge is around the inner loop was
> missed.
>
> Bootstrapping and regression testing did not show new failures.
>
> Is it OK for trunk?
>
> ChangeLog:
> 2014-10-13  Yuri Rumyantsev  
>
> PR tree-optimization/67909, 67947
> * tree-ssa-loop-unswitch.c (find_loop_guard): Add check that GUARD_EDGE
> really skip the inner loop.
>
> gcc/testsuite/ChangeLog
> * gcc.dg/torture/pr67947.c: New test.

+  /* Guard edge must skip inner loop.  */
+  if (!dominated_by_p (CDI_DOMINATORS, loop->inner->header,
+  guard_edge == fe ? te->dest : fe->dest))
  It should line up with "CDI_DOMINATORS".

+ fprintf (dump_file, "Guard edge %d --> %d is not around the
loop!\n",guard_edge->src->index,guard_edge->dest->index);

Please break lone line.

-- 
H.J.

Re: [PATCH] Allow FSM to thread single block cases too

2015-10-13 Thread Richard Biener

On Tue, Oct 13, 2015 at 2:21 PM, Jeff Law  wrote:
>
> One of the cases that was missing in the FSM support is threading when the
> path is a single block.  ie, a control statement's output can be statically
> determined just by looking at PHIs in the control statement's block for one
> or incoming edges.
>
> This is necessary to fix a regression if I turn off the old jump threader's
> backedge support.  Just as important, Jan has in the past asked about a
> trivial jump threader to be run during early optimizations.  Limiting the
> FSM bits to this case would likely satisfy that need in the future.

I think he asked for trivial forward threads though due to repeated tests.
I hacked FRE to do this (I think), but maybe some trivial cleanup opportunities
are still left here.  Honza?

Richard.

> Bootstrapped and regression tested on x86_64-linux-gnu.  Installed on the
> trunk.
>
> Jeff
>
> commit a53bb29a1dffd329aa6235b88b0c2a830aa5a59e
> Author: Jeff Law 
> Date:   Tue Oct 13 06:19:20 2015 -0600
>
> [PATCH] Allow FSM to thread single block cases too
>
> * tree-ssa-threadbackward.c
> (fsm_find_control_statement_thread_paths):
> Allow single block jump threading paths.
>
> * gcc.dg/tree-ssa/ssa-thread-13.c: New test.
>
> diff --git a/gcc/ChangeLog b/gcc/ChangeLog
> index d71bcd2..caab533 100644
> --- a/gcc/ChangeLog
> +++ b/gcc/ChangeLog
> @@ -1,3 +1,8 @@
> +2015-10-13  Jeff Law  
> +
> +   * tree-ssa-threadbackward.c
> (fsm_find_control_statement_thread_paths):
> +   Allow single block jump threading paths.
> +
>  2015-10-13  Tom de Vries  
>
> PR tree-optimization/67476
> diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
> index 4a08f0f..acf6df5 100644
> --- a/gcc/testsuite/ChangeLog
> +++ b/gcc/testsuite/ChangeLog
> @@ -1,3 +1,7 @@
> +2015-10-13  Jeff Law  
> +
> +   * gcc.dg/tree-ssa/ssa-thread-13.c: New test.
> +
>  2015-10-12  Jeff Law  
>
> * gcc.dg/tree-ssa/ssa-thread-12.c: New test.
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-thread-13.c
> b/gcc/testsuite/gcc.dg/tree-ssa/ssa-thread-13.c
> new file mode 100644
> index 000..5051d11
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-thread-13.c
> @@ -0,0 +1,70 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-vrp1-details" } */
> +/* { dg-final { scan-tree-dump "FSM" "vrp1" } } */
> +
> +typedef struct rtx_def *rtx;
> +typedef const struct rtx_def *const_rtx;
> +enum rtx_code
> +{
> +  UNKNOWN, VALUE, DEBUG_EXPR, EXPR_LIST, INSN_LIST, SEQUENCE, ADDRESS,
> +DEBUG_INSN, INSN, JUMP_INSN, CALL_INSN, BARRIER, CODE_LABEL, NOTE,
> +COND_EXEC, PARALLEL, ASM_INPUT, ASM_OPERANDS, UNSPEC, UNSPEC_VOLATILE,
> +ADDR_VEC, ADDR_DIFF_VEC, PREFETCH, SET, USE, CLOBBER, CALL, RETURN,
> +EH_RETURN, TRAP_IF, CONST_INT, CONST_FIXED, CONST_DOUBLE, CONST_VECTOR,
> +CONST_STRING, CONST, PC, REG, SCRATCH, SUBREG, STRICT_LOW_PART, CONCAT,
> +CONCATN, MEM, LABEL_REF, SYMBOL_REF, CC0, IF_THEN_ELSE, COMPARE, PLUS,
> +MINUS, NEG, MULT, SS_MULT, US_MULT, DIV, SS_DIV, US_DIV, MOD, UDIV,
> UMOD,
> +AND, IOR, XOR, NOT, ASHIFT, ROTATE, ASHIFTRT, LSHIFTRT, ROTATERT, SMIN,
> +SMAX, UMIN, UMAX, PRE_DEC, PRE_INC, POST_DEC, POST_INC, PRE_MODIFY,
> +POST_MODIFY, NE, EQ, GE, GT, LE, LT, GEU, GTU, LEU, LTU, UNORDERED,
> +ORDERED, UNEQ, UNGE, UNGT, UNLE, UNLT, LTGT, SIGN_EXTEND, ZERO_EXTEND,
> +TRUNCATE, FLOAT_EXTEND, FLOAT_TRUNCATE, FLOAT, FIX, UNSIGNED_FLOAT,
> +UNSIGNED_FIX, FRACT_CONVERT, UNSIGNED_FRACT_CONVERT, SAT_FRACT,
> +UNSIGNED_SAT_FRACT, ABS, SQRT, BSWAP, FFS, CLZ, CTZ, POPCOUNT, PARITY,
> +SIGN_EXTRACT, ZERO_EXTRACT, HIGH, LO_SUM, VEC_MERGE, VEC_SELECT,
> +VEC_CONCAT, VEC_DUPLICATE, SS_PLUS, US_PLUS, SS_MINUS, SS_NEG, US_NEG,
> +SS_ABS, SS_ASHIFT, US_ASHIFT, US_MINUS, SS_TRUNCATE, US_TRUNCATE, FMA,
> +VAR_LOCATION, DEBUG_IMPLICIT_PTR, ENTRY_VALUE, LAST_AND_UNUSED_RTX_CODE
> +};
> +union rtunion_def
> +{
> +  rtx rt_rtx;
> +};
> +typedef union rtunion_def rtunion;
> +struct rtx_def
> +{
> +  __extension__ enum rtx_code code:16;
> +  union u
> +  {
> +rtunion fld[1];
> +  }
> +  u;
> +};
> +
> +unsigned int rtx_cost (rtx, enum rtx_code, unsigned char);
> +rtx single_set_2 (const_rtx, rtx);
> +
> +unsigned
> +seq_cost (const_rtx seq, unsigned char speed)
> +{
> +  unsigned cost = 0;
> +  rtx set;
> +  for (; seq; seq = (((seq)->u.fld[2]).rt_rtx))
> +{
> +  set =
> +   (enum rtx_code) (seq)->code) == INSN)
> + || (((enum rtx_code) (seq)->code) == DEBUG_INSN)
> + || (((enum rtx_code) (seq)->code) == JUMP_INSN)
> + || (((enum rtx_code) (seq)->code) ==
> + CALL_INSN)) ? (((enum rtx_code) seq)->u.fld[4]).rt_rtx))->
> + code) ==
> +SET ? (((seq)->u.fld[4]).
> +   rt_rtx) :

Re: [PATCH] Random shuffle moveable: container size

2015-10-13 Thread Aurelio Remonda

On Tue, Oct 13, 2015 at 6:26 AM, Jonathan Wakely  wrote:
> On 08/10/15 10:35 -0300, Aurelio Remonda wrote:
>>
>> This patch reduces the size of the array A (the array that contains
>> the values being shuffled) so the test can pass while running the
>> stdlibc++ testsuite.
>
>
> Ahem! The project's name is libstdc++ !!! :-)

:) My bad! Sorry about that!

>> It also make some minor changes such as:
>> *Deleting a useless call to fill_ascending function on test02.
>> *Changing N from const int to const unsigned int.
>> I have a company-wide copyright assignment, but I don't have commit
>> access.
>
>
> OK, I will commit this (without the unnecessary whitespace changes).
>
> Thanks.

Thank you!
Regards



-- 
Aurelio Remonda

Software Engineer

San Lorenzo 47, 3rd Floor, Office 5
Córdoba, Argentina
Phone: +54-351-4217888 / 4218211

Re: Fix prototype for print_insn in rtl.h

2015-10-13 Thread Jeff Law


On 10/13/2015 06:41 AM, Nikolai Bozhenov wrote:

On 10/13/2015 03:22 PM, Jeff Law wrote:

On 10/13/2015 02:21 AM, Nikolai Bozhenov wrote:

Currently prototype for print_insn in rtl.h doesn't match it's
definition in sched-vis.c The patch fixes this mismatch.

I'll run this through the usual bootstrap & regression testing before
installing later today.
jeff


I've bootstrapped it on x86_64, but I don't see much sense in regression
testing this patch cause it's so small. Though, if you think it's
necessary,
I can test it myself and write to you when I get the results.
It's standard procedure.  While I agree that a bootstrap is almost 
certainly sufficient here, it's not a big deal to add this to the 
regression run I set up to run while getting the kids ready for school :-)


jeff

[gomp4.1] Testsuite tweaks

2015-10-13 Thread Jakub Jelinek

Hi!

This patch fixes issues in tests I'm not running all the time during
gomp branch development (basically I do just RUNTESTFLAGS=gomp.exp
in gcc subdir and check-target-libgomp).

2015-10-13  Jakub Jelinek  

* c-c++-common/cpp/openmp-define-3.c: Adjust for the new
value of _OPENMP macro.
* c-c++-common/cilk-plus/PS/body.c (foo): Adjust expected diagnostics.
* c-c++-common/goacc-gomp/nesting-fail-1.c (f_acc_parallel,
f_acc_kernels, f_acc_data, f_acc_loop): Add map clause to target data.

--- gcc/testsuite/c-c++-common/cpp/openmp-define-3.c.jj 2015-04-24 
12:32:01.0 +0200
+++ gcc/testsuite/c-c++-common/cpp/openmp-define-3.c2015-10-13 
14:58:40.968654734 +0200
@@ -6,6 +6,6 @@
 # error _OPENMP not defined
 #endif
 
-#if _OPENMP != 201307
+#if _OPENMP != 201511
 # error _OPENMP defined to wrong value
 #endif
--- gcc/testsuite/c-c++-common/cilk-plus/PS/body.c.jj   2015-04-24 
12:32:01.0 +0200
+++ gcc/testsuite/c-c++-common/cilk-plus/PS/body.c  2015-10-13 
15:00:00.946495358 +0200
@@ -27,7 +27,7 @@ void foo()
 #pragma simd
   for (int i=0; i < 1000; ++i)
 {
-#pragma omp for /* { dg-error "OpenMP constructs may not" } */
+#pragma omp for /* { dg-error "OpenMP constructs other than" } */
   for (j=0; j < 1000; ++j)
a[i] = b[i];
 }
--- gcc/testsuite/c-c++-common/goacc-gomp/nesting-fail-1.c.jj   2015-04-24 
12:32:01.0 +0200
+++ gcc/testsuite/c-c++-common/goacc-gomp/nesting-fail-1.c  2015-10-13 
15:02:18.549500635 +0200
@@ -230,7 +230,7 @@ f_acc_parallel (void)
   {
 #pragma omp target /* { dg-error "non-OpenACC construct inside of OpenACC 
region" } */
 ;
-#pragma omp target data /* { dg-error "non-OpenACC construct inside of OpenACC 
region" } */
+#pragma omp target data map(i) /* { dg-error "non-OpenACC construct inside of 
OpenACC region" } */
 ;
 #pragma omp target update to(i) /* { dg-error "non-OpenACC construct inside of 
OpenACC region" } */
   }
@@ -300,7 +300,7 @@ f_acc_kernels (void)
   {
 #pragma omp target /* { dg-error "non-OpenACC construct inside of OpenACC 
region" } */
 ;
-#pragma omp target data /* { dg-error "non-OpenACC construct inside of OpenACC 
region" } */
+#pragma omp target data map(i) /* { dg-error "non-OpenACC construct inside of 
OpenACC region" } */
 ;
 #pragma omp target update to(i) /* { dg-error "non-OpenACC construct inside of 
OpenACC region" } */
   }
@@ -370,7 +370,7 @@ f_acc_data (void)
   {
 #pragma omp target /* { dg-error "non-OpenACC construct inside of OpenACC 
region" } */
 ;
-#pragma omp target data /* { dg-error "non-OpenACC construct inside of OpenACC 
region" } */
+#pragma omp target data map(i) /* { dg-error "non-OpenACC construct inside of 
OpenACC region" } */
 ;
 #pragma omp target update to(i) /* { dg-error "non-OpenACC construct inside of 
OpenACC region" } */
   }
@@ -450,7 +450,7 @@ f_acc_loop (void)
 {
 #pragma omp target /* { dg-error "non-OpenACC construct inside of OpenACC 
region" } */
   ;
-#pragma omp target data /* { dg-error "non-OpenACC construct inside of OpenACC 
region" } */
+#pragma omp target data map(i) /* { dg-error "non-OpenACC construct inside of 
OpenACC region" } */
   ;
 #pragma omp target update to(i) /* { dg-error "non-OpenACC construct inside of 
OpenACC region" } */
 }

Jakub

[PATCH 0/7] Libsanitizer merge from upstream r249633.

2015-10-13 Thread Maxim Ostapenko


Hi,

it's been a while since the last libsanitizer merge from upstream into 
GCC happened and the library has significantly changed since that time. 
The main features to be ported are:


-New common strings interceptors were added.
-Various allocator improvements were performed.
-Improvements for ASan deactivated start were performed.
-TSan and LSan were enabled for Aarch64.
-Fast unwinding was enabled for Aarch64.
-New tsan_unaligned_{load, store}_[n] functions were intoduced.
-asan_stack_malloc_[n] doesn't take a local stack as a second parameter 
anymore.

-sanitization for std containers is supported now.
-New interface functions for dynamic allocas and VLA's 
poisoning/unpoisoning were introduced.


Some features are not ported for now, by might be enabled in future:

-Embedded UBSan runtime into ASan and TSan ones. I don't enable this 
now, because of errors during ASan static linkage: GCC uses 
-whole-archive option that would lead to undefined references to C++ stuff.
-UBSan data descriptors for float-cast conversion support location 
propagation now. But sometimes we have loc == UNKNOWN_LOCATION in 
ubsan_instrument_float_cast, so use old ABI for now. See below for details.


The first patch of the series is the merge itself.

The second one introduces corresponding compiler changes.

Other patches are applied to library and they are GCC-specific:

Patches 3 and 4 are just reapplied David's and Jakub's patches for SPARC 
and disabling ODR violation detection respectively.


Patch 5 removes UBSan stubs from ASan and TSan code since we don't 
support embedded UBSan runtime into ASan and TSan.


Patch 6 changes heuristic for extracting last PC from stack frame for 
ARM in fast unwind routine. More details can be found here 
(https://gcc.gnu.org/bugzilla/show_bug.cgi?id=61771).


Patch 7 forces libsanitizer to use an old ABI for ubsan float cast data 
descriptors, because sometimes we can have loc == UNKNOWN_LOCATION in 
ubsan_instrument_float_cast e.g. in a such case:


..
volatile double foo; // ubsan_instrument_float_cast is called by convert 
function.

..

Since foo is a tcc_declaration, loc is UNKNOWN_LOCATION. I'm actually 
not sure about this, perhaps we can fix this in GCC somehow.


I've regtested and {A, UB}San bootstrapped these patches on 
x86-64-unknown-linux-gnu and aarch64-linux-gnueabi (Juno board, 39 bit 
VA space) and tested for ARM under QEMU-ARM.
Testing ASan under QEMU-AARCH64 revealed many test failures due to LSan 
was enabled. In particular, it tries to call internal_clone function in 
LSan internals, that in turn calls _NR_clone syscall and than QEMU exits 
with EINTR error code (that might be expected, AFAIK QEMU is not very 
good with threads). So, I wonder, if I should disable LSan for AArch64 now?


I'm also asking community to help me with testing these patches on 
various targets (ARM, PPC, etc) I'm lack of, so could you help me on 
this please?


-Maxim

Re: [patch 2/6] scalar-storage-order merge: C front-end

2015-10-13 Thread Jeff Law


On 10/06/2015 05:02 AM, Eric Botcazou wrote:

This is the C front-end + C family part.

* doc/extend.texi (type attributes): Document scalar_storage_order.
(Structure-Packing Pragmas): Rename into...
(Structure-Layout Pragmas): ...this.  Document scalar_storage_order.
* doc/invoke.texi (C Dialect Options): Document -fsso-struct
(Warnings): Document -Wno-scalar-storage-order.
* flag-types.h (enum scalar_storage_order_kind): New enumeration.
c-family/
* c-common.c (c_common_attributes): Add scalar_storage_order.
(handle_scalar_storage_order_attribute): New function.
* c-pragma.c (global_sso): New variable.
(maybe_apply_pragma_scalar_storage_order): New function.
(handle_pragma_scalar_storage_order): Likewise.
(init_pragma): Register scalar_storage_order.
* c-pragma.h (maybe_apply_pragma_scalar_storage_order): Declare.
* c.opt (Wscalar-storage-order): New warning.
(fsso-struct=): New option.
c/
* c-decl.c (finish_struct): If the structure has reverse storage
order, rewrite the type of array fields with scalar component.  Call
maybe_apply_pragma_scalar_storage_order on entry.
* c-typeck.c (build_unary_op) : Remove left-overs.  Issue
errors on bit-fields and reverse SSO here and not...
(c_mark_addressable): ...here.
(output_init_element): Adjust call to initializer_constant_valid_p.
(c_build_qualified_type): Propagate TYPE_REVERSE_STORAGE_ORDER.

  doc/extend.texi |   69 ++
  doc/invoke.texi |   22 +++-
  flag-types.h|9 +-
  c-family/c.opt  |   17 
  c-family/c-common.c |   47 ++-
  c-family/c-pragma.c |   50 +
  c-family/c-pragma.h |1
  c/c-typeck.c|   66 ++---
  c/c-decl.c  |   48 +---
  8 files changed, 273 insertions(+), 47 deletions(-)

-- Eric Botcazou


sso-c.diff


Index: doc/extend.texi
===
--- doc/extend.texi (.../trunk/gcc) (revision 228112)
+++ doc/extend.texi (.../branches/scalar-storage-order/gcc) (revision 
228133)
@@ -6310,6 +6310,42 @@ of the structure or union is placed to m
+@itemize
+@item Taking the address of a scalar field of a @code{union} or a
+@code{struct} with reverse scalar storage order is not permitted and will
+yield an error
Seems reasonable.  Certainly avoids a host of problems tracking this 
stuff later I bet.




+static tree
+handle_scalar_storage_order_attribute (tree *node, tree name, tree args,
+  int flags, bool *no_add_attrs)
+{
+  tree id = TREE_VALUE (args);
+  tree type;
+
+  if (TREE_CODE (*node) == TYPE_DECL
+  && ! (flags & ATTR_FLAG_CXX11))
+node = _TYPE (*node);
+  type = *node;
+
+  if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
+error ("scalar_storage_order is not supported");
You might want to consider indicating why it's not supported.  Not that 
I expect folks to be using this on a pdp11 :-)





Index: c/c-typeck.c
===
--- c/c-typeck.c(.../trunk/gcc) (revision 228112)
+++ c/c-typeck.c(.../branches/scalar-storage-order/gcc) (revision 
228133)
@@ -4173,18 +4173,10 @@ build_unary_op (location_t location,
  goto return_build_unary_op;
}

-  /* For [y], return x+y */
-  if (TREE_CODE (arg) == ARRAY_REF)
-   {
- tree op0 = TREE_OPERAND (arg, 0);
- if (!c_mark_addressable (op0))
-   return error_mark_node;
-   }
Do we still get a proper diagnostic for [y] where x isn't something we 
can mark addressable?


Our testsuites aren't particularly good (IMHO) at ensuring we're getting 
diags in all the cases where they're required.


No real objections, assuming that [y] diagnostics is still handled 
correctly somewhere.

Re: Move some bit and binary optimizations in simplify and match

2015-10-13 Thread Richard Biener

On Tue, Oct 13, 2015 at 12:52 PM, Hurugalawadi, Naveen
 wrote:
> Hi Richard,
>
> Thanks for the comments. Sorry, I was confused with handling the const and 
> variable
> together part. Have modified them.
> Also, considered that both (X & Y) can be const or variable in those cases
> for which match patterns have been added.

Both can't be constant as (bit_and INTEGER_CST INTEGER_CST) will have been
simplified to a INTEGER_CST already.  Likewise (bit_not INTEGER_CST) will
never appear (that is the problem we are trying to solve!).

> Please let me know whether its correct or only "Y" should be both const and 
> variable
> whereas the "X" should be variable always.
>
> Please find attached the patch as per your comments.
> Please review the patch and let me know if any further modifications
> are required.
>
> Am learning lots of useful stuff while porting these patches.
> Thanks for all the help again.
>
>>> Looks like I really need to make 'match' handle these kind of things.
> I assume that its for bit ops, and binary operations like (A & B) and so on.
> Should I try doing that part? Also, how do we know which patterns should
> be const or variable or supports both?

I was thinking about this for quite a while and didn't find a good solution on
how to implement this reliably other than basically doing the pattern
duplication
in genmatch.  Say, for

/* Fold (A & ~B) - (A & B) into (A ^ B) - B.  */
(simplify
 (minus (bit_and:s @0 (bit_not @1)) (bit_and:s @0 @1))
  (if (! FLOAT_TYPE_P (type))
   (minus (bit_xor @0 @1) @1)))

generate also

(simplify
 (minus (bit_and:s @0 INTEGER_CST@2) (bit_and:s @0 INTEGER_CST@1))
 (if (! FLOAT_TYPE_P (type)
  && wi::eq (const_unop (BIT_NOT_EXPR, @2), @1))
  (minus (bit_xor @0 @1) @1)))

where we'd only target matches and unary ops of depth 1.

The question is whether this is really worth the effort, writing the
above explicitely
isn't too difficult.  So for your case simply do that duplication manually
(not using const_unop of course but wi:: functionality).  Sorry that I misled
you into doing this with (match (xdivamulminusa ..., etc.).  We want to minimize
the number of lines in match.pd and this doesn't really achieve this compared
to duplicating the whole pattern.

Also please take Marcs review comments into account.

+/* Fold (C1/X)*C2 into (C1*C2)/X.  */
+(simplify
+ (mult (rdiv REAL_CST@0 @1) REAL_CST@2)
+  (if (flag_associative_math)
+  (with
+   { tree tem = const_binop (MULT_EXPR, type, @0, @2); }
+  (if (tem)
+   (rdiv { tem; } @1)

this one is ok with :s added on the rdiv

+/* Simplify ~X & X as zero.  */
+(simplify
+ (bit_and:c (convert? @0) (convert? (bit_not @0)))
+  (if (tree_nop_conversion_p (type, TREE_TYPE (@0)))
+  { build_zero_cst (TREE_TYPE (@0)); }))

this one is ok as well.

+/* (-A) * (-B) -> A * B  */
+(simplify
+ (mult:c (convert? (negate @0)) (convert? negate_expr_p@1))
+  (if (tree_nop_conversion_p (type, TREE_TYPE (@0)))
+   (mult (convert @0) (convert (negate @1)

this one is ok with using convert1? and convert2?

Please consider splitting those three patterns out (with the suggested
adjustments and the corresponding fold-const.c changes) and committing
them separately to make the rest of the patch smaller.

Thanks,
Richard.

> Thanks,
> Naveen

[PATCH PR67909 PR67947]

2015-10-13 Thread Yuri Rumyantsev

Hi All,

Here is a simple patch for unswitching outer loop through guard-edge
hoisting. The check that guard-edge is around the inner loop was
missed.

Bootstrapping and regression testing did not show new failures.

Is it OK for trunk?

ChangeLog:
2014-10-13  Yuri Rumyantsev  

PR tree-optimization/67909, 67947
* tree-ssa-loop-unswitch.c (find_loop_guard): Add check that GUARD_EDGE
really skip the inner loop.

gcc/testsuite/ChangeLog
* gcc.dg/torture/pr67947.c: New test.


patch
Description: Binary data

Re: Move some bit and binary optimizations in simplify and match

2015-10-13 Thread Marc Glisse


On Tue, 13 Oct 2015, Richard Biener wrote:


+/* Simplify ~X & X as zero.  */
+(simplify
+ (bit_and:c (convert? @0) (convert? (bit_not @0)))
+  (if (tree_nop_conversion_p (type, TREE_TYPE (@0)))


The test seems unnecessary for this specific transformation.


+  { build_zero_cst (TREE_TYPE (@0)); }))


I'd rather build_zero_cst (type) directly.


+/* (-A) * (-B) -> A * B  */
+(simplify
+ (mult:c (convert? (negate @0)) (convert? negate_expr_p@1))
+  (if (tree_nop_conversion_p (type, TREE_TYPE (@0)))
+   (mult (convert @0) (convert (negate @1)

this one is ok with using convert1? and convert2?


Is it? Maybe if it also checked tree_nop_conversion_p for @1...

--
Marc Glisse

[gomp4] More openacc loop indirection

2015-10-13 Thread Nathan Sidwell

I've committed this next patch  in my series to move loop partitioning decisions 
to the target compiler.


It introduces 2 more IFN_UNIQUE cases, marking the head and tail sequences of an 
openACC loop.  These are added around the reduction and fork/join regions.  In 
the oacc_device_lower pass we use these markers to reconstruct the openacc 
partitioning regions (their  unique property permits this,  in the same way the 
ptx backend uses the fork/join markers themselves).  Then we scan over the head 
and tail sequences setting the partitioning level.


This patch still doesn't actually defer the partitioning decision -- its putting 
in place machinery to allow such deferral.  I expect the next patch to complete 
the transition.


nathan
Index: gcc/internal-fn.def
===
--- gcc/internal-fn.def	(revision 228713)
+++ gcc/internal-fn.def	(working copy)
@@ -78,6 +78,10 @@ DEF_INTERNAL_FN (UNIQUE, ECF_NOTHROW | E
indicating the axis of forking or joining and return nothing.  */
 #define IFN_UNIQUE_OACC_FORK 1
 #define IFN_UNIQUE_OACC_JOIN 2
+/* HEAD_MARK and TAIL_MARK are used to demark the sequence entering or
+   leaving partitioned execution.  */
+#define IFN_UNIQUE_OACC_HEAD_MARK 3
+#define IFN_UNIQUE_OACC_TAIL_MARK 4
 
 /* DIM_SIZE and DIM_POS return the size of a particular compute
dimension and the executing thread's position within that
Index: gcc/omp-low.c
===
--- gcc/omp-low.c	(revision 228713)
+++ gcc/omp-low.c	(working copy)
@@ -236,6 +236,26 @@ struct omp_for_data
   struct omp_for_data_loop *loops;
 };
 
+/* Describe the OpenACC looping structure of a function.  The entire
+   function is held in a 'NULL' loop.  */
+
+struct oacc_loop
+{
+  oacc_loop *parent; /* Containing loop.  */
+
+  oacc_loop *child; /* First inner loop.  */
+
+  oacc_loop *sibling; /* Next loop within same parent.  */
+
+  location_t loc; /* Location of the loop start.  */
+
+  /* Start of head and tail.  */
+  gcall *head;  /* Head marker function. */
+  gcall *tail;  /* Tail marker function.  */
+
+  /* Partitioning level.  */
+  unsigned level;
+};
 
 static splay_tree all_contexts;
 static int taskreg_nesting_level;
@@ -4737,11 +4757,12 @@ expand_oacc_get_thread_num (gimple_seq *
   return res;
 }
 
-/* Lower the OpenACC reductions of CLAUSES for compute axis DIM.  INNER
-   is true if this is an inner axis of a multi-axis loop.  FORK and
-   JOIN are (optional) fork and join markers.  Generate the
-   before-loop forking sequence in FORK_SEQ and the after-loop joining
-   sequence to JOIN_SEQ.  The general form of these sequences is
+/* Lower the OpenACC reductions of CLAUSES for compute axis LEVEL
+   (which might be a placeholder).  INNER is true if this is an inner
+   axis of a multi-axis loop.  FORK and JOIN are (optional) fork and
+   join markers.  Generate the before-loop forking sequence in
+   FORK_SEQ and the after-loop joining sequence to JOIN_SEQ.  The
+   general form of these sequences is
 
  GOACC_REDUCTION_SETUP
  GOACC_FORK
@@ -4752,7 +4773,7 @@ expand_oacc_get_thread_num (gimple_seq *
  GOACC_REDUCTION_TEARDOWN.  */
 
 static void
-lower_oacc_reductions (location_t loc, tree clauses, unsigned dim, bool inner,
+lower_oacc_reductions (location_t loc, tree clauses, tree level, bool inner,
 		   gcall *fork, gcall *join, gimple_seq *fork_seq,
 		   gimple_seq *join_seq, omp_context *ctx)
 {
@@ -4764,7 +4785,6 @@ lower_oacc_reductions (location_t loc, t
   gimple_seq after_join = NULL;
   unsigned count = 0;
   tree lid = build_int_cst (unsigned_type_node, oacc_lid++);
-  tree level = build_int_cst (unsigned_type_node, dim);
 
   for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION)
@@ -4866,6 +4886,22 @@ lower_oacc_reductions (location_t loc, t
   gimple_seq_add_seq (join_seq, after_join);
 }
 
+/* Emit an OpenACC lopp head or tail marker to SEQ.  LEVEL is the
+   partitioning level of the enclosed region.  */ 
+
+static void
+lower_oacc_loop_marker (location_t loc, bool head, tree level,
+			gimple_seq *seq)
+{
+  tree marker = build_int_cst
+(integer_type_node, (head ? IFN_UNIQUE_OACC_HEAD_MARK
+			 : IFN_UNIQUE_OACC_TAIL_MARK));
+  gcall *call = gimple_build_call_internal
+(IFN_UNIQUE, 1 + (level != NULL_TREE), marker, level);
+  gimple_set_location (call, loc);
+  gimple_seq_add_stmt (seq, call);
+}
+
 /* Generate the before and after OpenACC loop sequences.  CLAUSES are
the loop clauses, from which we extract reductions.  Initialize
HEAD and TAIL.  */
@@ -4884,19 +4920,25 @@ lower_oacc_head_tail (location_t loc, tr
   for (ix = GOMP_DIM_GANG; ix != GOMP_DIM_MAX; ix++)
 if (mask & GOMP_DIM_MASK (ix))
   {
-	tree level = build_int_cst (unsigned_type_node, ix);
+	tree place = build_int_cst (integer_type_node, -1);
+	tree level = build_int_cst (integer_type_node, ix);
 	gcall

Re: [PATCH PR67909 PR67947]

2015-10-13 Thread Richard Biener

On Tue, Oct 13, 2015 at 2:49 PM, Yuri Rumyantsev  wrote:
> Here is updated patch with splitting long line.
> The patch is attached.

Ok with aligning the guard_edge == ... line properly

Thanks,
Richard.

> Yuri.
>
> 2015-10-13 15:38 GMT+03:00 H.J. Lu :
>> On Tue, Oct 13, 2015 at 4:57 AM, Yuri Rumyantsev  wrote:
>>> Hi All,
>>>
>>> Here is a simple patch for unswitching outer loop through guard-edge
>>> hoisting. The check that guard-edge is around the inner loop was
>>> missed.
>>>
>>> Bootstrapping and regression testing did not show new failures.
>>>
>>> Is it OK for trunk?
>>>
>>> ChangeLog:
>>> 2014-10-13  Yuri Rumyantsev  
>>>
>>> PR tree-optimization/67909, 67947
>>> * tree-ssa-loop-unswitch.c (find_loop_guard): Add check that GUARD_EDGE
>>> really skip the inner loop.
>>>
>>> gcc/testsuite/ChangeLog
>>> * gcc.dg/torture/pr67947.c: New test.
>>
>> +  /* Guard edge must skip inner loop.  */
>> +  if (!dominated_by_p (CDI_DOMINATORS, loop->inner->header,
>> +  guard_edge == fe ? te->dest : fe->dest))
>>   It should line up with "CDI_DOMINATORS".
>>
>> + fprintf (dump_file, "Guard edge %d --> %d is not around the
>> loop!\n",guard_edge->src->index,guard_edge->dest->index);
>>
>> Please break lone line.
>>
>> --
>> H.J.

Re: [PATCH] Fix "#pragma GCC pop_options" warning.

2015-10-13 Thread Bernd Schmidt


On 10/13/2015 03:31 PM, Dominik Vogt wrote:

On Tue, Oct 13, 2015 at 02:28:37PM +0200, Bernd Schmidt wrote:

On 10/13/2015 02:02 PM, Dominik Vogt wrote:

When "#pragma GCC pop_options" is used on a platform without
support for "#pragma GCC target", Gcc emits a warning.  As
pop_options is useful on targets without the target pragma to
restore optimizations flags, the warning should be removed.

The attached patch does that rather inelegantly by checking if the
pragma_parse hook points to the default implementation.  I could't
think of a similarly terse but less clumsy way.  Suggestions for a
better test are very welcome.


Ok, I had to go look at the code to figure out what's going on. A 
suggestion for a possibly less clumsy way - recognize which pragma we're 
looking at from the arguments. Looks like ix86_pragma_target_parse has a 
"! args" test to determine if it has a pop, maybe the default function 
could do the same. If that's insufficient, pass another argument to 
identify clearly in what situation the hook is being parsed.



Bernd

Re: [gomp4.1] Add new versions of GOMP_target{,_data,_update} and GOMP_target_enter_exit_data

2015-10-13 Thread Ilya Verbin

On Mon, Jun 15, 2015 at 22:48:50 +0300, Ilya Verbin wrote:
> @@ -950,50 +997,41 @@ GOMP_target (int device, void (*fn) (void *), const 
> void *unused,
> ...
> +  devicep->run_func (devicep->target_id, fn_addr, (void *) 
> tgt_vars->tgt_start);

If mapnum is 0, tgt_vars->tgt_start is uninitialized.  This is not a big bug,
because in this case the target function doesn't use this pointer, however
valgrind warns about sending uninitialized data to target.
OK for gomp-4_1-branch?


libgomp/
* target.c (gomp_map_vars): Zero tgt->tgt_start when mapnum is 0.


diff --git a/libgomp/target.c b/libgomp/target.c
index 95360d1..c4e3323 100644
--- a/libgomp/target.c
+++ b/libgomp/target.c
@@ -323,6 +323,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t 
mapnum,
   struct splay_tree_key_s cur_node;
   struct target_mem_desc *tgt
 = gomp_malloc (sizeof (*tgt) + sizeof (tgt->list[0]) * mapnum);
+  tgt->tgt_start = 0;
   tgt->list_count = mapnum;
   tgt->refcount = pragma_kind == GOMP_MAP_VARS_ENTER_DATA ? 0 : 1;
   tgt->device_descr = devicep;


  -- Ilya

Re: [[Boolean Vector, patch 5/5] Support boolean vectors in vector lowering

2015-10-13 Thread Ilya Enkovich

2015-10-12 13:37 GMT+03:00 Alan Lawrence :
> On 09/10/15 22:01, Jeff Law wrote:
>
>> So my question for the series as a whole is whether or not we need to do
>> something for the other languages, particularly Fortran.  I was a bit
>> surprised to see this stuff bleed into the C/C++ front-ends and
>> obviously wonder if it's bled into Fortran, Ada, Java, etc.
>
>
> Isn't that just because, we have GNU extensions to C/C++, for vectors? I
> admit I don't know enough Ada/Fortran to know whether we've added GNU
> extensions to those languages as well...
>
> A.

I also got an impression only GNU vector extensions should be
affected. And those are for C/C++ only.

Thanks,
Ilya

Re: [PATCH] gcc/ira.c: Check !HAVE_FP_INSTEAD_INSNS when frame pointer is needed and as global register

2015-10-13 Thread Bernd Schmidt


On 10/13/2015 04:50 PM, Chen Gang wrote:

OK, under the bugzilla, the maintainer treated it as expected behavior
(not a bug). For me, we need more explanation for it (why we treat it
as expected behavior).


A global register is under control of the user. If the compiler uses it 
as a frame pointer, it will get clobbered outside the user's control, 
which is unexpected behaviour. Therefore, the code Mike quoted detects 
that case and issues an error, indicating that you must use 
-fomit-frame-pointer if you expect to use the frame pointer register for 
other purposes.


If you want an address on the stack there's __builtin_frame_address 
which may or may not do what was intended. The code quoted in the 
bugzilla is just invalid.



to `fix it’, one would simple remove this chunk as misguided and fix up any 
code gen issues exposed.



If there were not only one issues related with it, for me, what you said
sounds reasonable to me.


That's totally the wrong thing to do as the issue is not compiler code 
generation, it's the danger of clobbering a user variable.



Bernd

Re: [PATCH] gcc/ira.c: Check !HAVE_FP_INSTEAD_INSNS when frame pointer is needed and as global register

2015-10-13 Thread Chen Gang


On 10/13/15 22:56, Bernd Schmidt wrote:
> On 10/13/2015 04:50 PM, Chen Gang wrote:
>> OK, under the bugzilla, the maintainer treated it as expected behavior
>> (not a bug). For me, we need more explanation for it (why we treat it
>> as expected behavior).
> 
> A global register is under control of the user. If the compiler uses it as a 
> frame pointer, it will get clobbered outside the user's control, which is 
> unexpected behaviour. Therefore, the code Mike quoted detects that case and 
> issues an error, indicating that you must use -fomit-frame-pointer if you 
> expect to use the frame pointer register for other purposes.
> 

OK, thanks.

> If you want an address on the stack there's __builtin_frame_address which may 
> or may not do what was intended. The code quoted in the bugzilla is just 
> invalid.
> 

OK, thank you very much, I shall send related kernel fix patch to kernel
mailing list.

Thanks.
-- 
Chen Gang (陈刚)

Open, share, and attitude like air, water, and life which God blessed

RE: [Patch] [x86_64]: Add bdver4 for multi versioning and fix AMD cpu model detection.

2015-10-13 Thread Kumar, Venkataramanan

Hi Uros,

I realized both GCC 4.9 and GCC 5 branches includes processor subtype 
AMDFAM15H_BDVER4.
So I need to back port not only model selection fix but also the detection of 
model for bdver4.

Is that fine?

Regards,
Venkat.   

> -Original Message-
> From: Kumar, Venkataramanan
> Sent: Friday, October 09, 2015 3:31 PM
> To: 'Uros Bizjak'
> Cc: gcc-patches@gcc.gnu.org
> Subject: RE: [Patch] [x86_64]: Add bdver4 for multi versioning and fix AMD
> cpu model detection.
> 
> Thank you Uros,
> 
> I will  test and commit model selection change in all release branches as 
> well.
> 
> Regards,
> Venkat.
> 
> > -Original Message-
> > From: Uros Bizjak [mailto:ubiz...@gmail.com]
> > Sent: Friday, October 09, 2015 3:25 PM
> > To: Kumar, Venkataramanan
> > Cc: gcc-patches@gcc.gnu.org
> > Subject: Re: [Patch] [x86_64]: Add bdver4 for multi versioning and fix
> > AMD cpu model detection.
> >
> > On Fri, Oct 9, 2015 at 11:50 AM, Kumar, Venkataramanan
> >  wrote:
> > > Hi Uros,
> > >
> > > Please find below patch that adds bdver4 target for multi versioning.
> > > Also I while computing model, the extended_model is incorrectly left
> > shifted  by 4. I have removed it now.
> > >
> > > Is below patch Ok for trunk ?
> > > GCC bootstrap and regressions passed.
> >
> > OK for trunk and release branches, where applicable. IMO, model
> > selection fix should be applied to all release branches.
> >
> > Thanks,
> > Uros.
> >
> > > diff --git a/libgcc/ChangeLog b/libgcc/ChangeLog index
> > > bb3a722..8676747 100644
> > > --- a/libgcc/ChangeLog
> > > +++ b/libgcc/ChangeLog
> > > @@ -1,3 +1,8 @@
> > > +2015-10-09  Venkataramanan Kumar
> > 
> > > +
> > > +   * config/i386/cpuinfo.c (get_amd_cpu): Detect bdver4.
> > > +   (__cpu_indicator_init): Fix model selection for AMD CPUs.
> > > +
> > >  2015-10-05  Kirill Yukhin  
> > >
> > > * config/i386/cpuinfo.c (get_intel_cpu): Detect "skylake-avx512".
> > > diff --git a/libgcc/config/i386/cpuinfo.c
> > > b/libgcc/config/i386/cpuinfo.c index 0cbbc85..1313ca3 100644
> > > --- a/libgcc/config/i386/cpuinfo.c
> > > +++ b/libgcc/config/i386/cpuinfo.c
> > > @@ -169,6 +169,9 @@ get_amd_cpu (unsigned int family, unsigned int
> > model)
> > >/* Bulldozer version 3 "Steamroller"  */
> > >if (model >= 0x30 && model <= 0x4f)
> > > __cpu_model.__cpu_subtype = AMDFAM15H_BDVER3;
> > > +  /* Bulldozer version 4 "Excavator"   */
> > > +  if (model >= 0x60 && model <= 0x7f)
> > > +   __cpu_model.__cpu_subtype = AMDFAM15H_BDVER4;
> > >break;
> > >  /* AMD Family 16h "btver2" */
> > >  case 0x16:
> > > @@ -455,7 +458,7 @@ __cpu_indicator_init (void)
> > >if (family == 0x0f)
> > > {
> > >   family += extended_family;
> > > - model += (extended_model << 4);
> > > + model += extended_model;
> > > }
> > >
> > >/* Get CPU type.  */
> > >
> > > Regards,
> > > Venkat.
> > >
> > >
> > >
> > >
> > >
> > >

[PATCH v2] PR rtl-optimization/66790: uninitialized registers handling in REE

2015-10-13 Thread Pierre-Marie de Rodat


Hello,

The first attached patch is the second attempt to fix PR 
rtl-optimization/66790 (see 
).


The second one is a fix for some inconsistency noticed while working on 
the original bug. This specific patch fixes no known bug, but anyway…


Both were bootstrapped and regtested on x86_64-linux. Ok to commit? 
Thank you in advance!


[PATCH 1/2] REE: fix uninitialized registers handling

gcc/ChangeLog:

PR rtl-optimization/66790
* df.h (DF_MIR): New macro.
(DF_LAST_PROBLEM_PLUS1): Update to be past DF_MIR
(DF_MIR_INFO_BB): New macro.
(DF_MIR_IN, DF_MIR_OUT): New macros.
(struct df_mir_bb_info): New.
(df_mir): New macro.
(df_mir_add_problem, df_mir_simulate_one_insn): New forward
declarations.
(df_mir_get_bb_info): New.
* df-problems.c (struct df_mir_problem_data): New.
(df_mir_free_bb_info, df_mir_alloc, df_mir_reset,
df_mir_bb_local_compute, df_mir_local_compute, df_mir_init,
df_mir_confluence_0, df_mir_confluence_n,
df_mir_transfer_function, df_mir_free, df_mir_top_dump,
df_mir_bottom_dump, df_mir_verify_solution_start,
df_mir_verify_solution_end): New.
(problem_MIR): New.
(df_mir_add_problem, df_mir_simulate_one_insn): New.
* timevar.def (TV_DF_MIR): New.
* ree.c: Include bitmap.h
(add_removable_extension): Add an INIT_REGS parameter.  Use it
to skip zero-extensions that may get an uninitialized register.
(find_removable_extensions): Compute must-initialized registers
using the MIR dataflow problem. Update the call to
add_removable_extension.
(find_and_remove_re): Call df_mir_add_problem.

gcc/testsuite/ChangeLog:

* gnat.dg/opt50.adb: New test.
* gnat.dg/opt50_pkg.adb: New helper.
* gnat.dg/opt50_pkg.ads: New helper.

[PATCH 2/2] DF_LIVE: make clobbers cancel effect of previous GENs in
 the same BBs

gcc/ChangeLog:

* df-problems.c (df_live_bb_local_compute): Clear GEN bits for
DF_REF_MUST_CLOBBER references.

--
Pierre-Marie de Rodat
>From d7bf6e8c194f66e6b7e1823ad3d118115e4406bc Mon Sep 17 00:00:00 2001
From: Pierre-Marie de Rodat 
Date: Sat, 18 Jul 2015 13:10:45 +0200
Subject: [PATCH 1/2] REE: fix uninitialized registers handling

gcc/ChangeLog:

	PR rtl-optimization/66790
	* df.h (DF_MIR): New macro.
	(DF_LAST_PROBLEM_PLUS1): Update to be past DF_MIR
	(DF_MIR_INFO_BB): New macro.
	(DF_MIR_IN, DF_MIR_OUT): New macros.
	(struct df_mir_bb_info): New.
	(df_mir): New macro.
	(df_mir_add_problem, df_mir_simulate_one_insn): New forward
	declarations.
	(df_mir_get_bb_info): New.
	* df-problems.c (struct df_mir_problem_data): New.
	(df_mir_free_bb_info, df_mir_alloc, df_mir_reset,
	df_mir_bb_local_compute, df_mir_local_compute, df_mir_init,
	df_mir_confluence_0, df_mir_confluence_n,
	df_mir_transfer_function, df_mir_free, df_mir_top_dump,
	df_mir_bottom_dump, df_mir_verify_solution_start,
	df_mir_verify_solution_end): New.
	(problem_MIR): New.
	(df_mir_add_problem, df_mir_simulate_one_insn): New.
	* timevar.def (TV_DF_MIR): New.
	* ree.c: Include bitmap.h
	(add_removable_extension): Add an INIT_REGS parameter.  Use it
	to skip zero-extensions that may get an uninitialized register.
	(find_removable_extensions): Compute must-initialized registers
	using the MIR dataflow problem. Update the call to
	add_removable_extension.
	(find_and_remove_re): Call df_mir_add_problem.

gcc/testsuite/ChangeLog:

	* gnat.dg/opt50.adb: New test.
	* gnat.dg/opt50_pkg.adb: New helper.
	* gnat.dg/opt50_pkg.ads: New helper.
---
 gcc/df-problems.c   | 406 
 gcc/df.h|  34 ++-
 gcc/ree.c   |  62 --
 gcc/testsuite/gnat.dg/opt50.adb |  23 ++
 gcc/testsuite/gnat.dg/opt50_pkg.adb |  48 +
 gcc/testsuite/gnat.dg/opt50_pkg.ads |  12 ++
 gcc/timevar.def |   1 +
 7 files changed, 572 insertions(+), 14 deletions(-)
 create mode 100644 gcc/testsuite/gnat.dg/opt50.adb
 create mode 100644 gcc/testsuite/gnat.dg/opt50_pkg.adb
 create mode 100644 gcc/testsuite/gnat.dg/opt50_pkg.ads

diff --git a/gcc/df-problems.c b/gcc/df-problems.c
index 153732a..c08ae36 100644
--- a/gcc/df-problems.c
+++ b/gcc/df-problems.c
@@ -1849,6 +1849,412 @@ df_live_verify_transfer_functions (void)
 }
 
 /*
+   MUST-INITIALIZED REGISTERS.
+*/
+
+/* Private data used to verify the solution for this problem.  */
+struct df_mir_problem_data
+{
+  bitmap_head *in;
+  bitmap_head *out;
+  /* An obstack for the bitmaps we need for this problem.  */
+  bitmap_obstack mir_bitmaps;
+};
+
+
+/* Free basic block info.  */
+
+static void
+df_mir_free_bb_info (basic_block bb

Benchmarks of v2 (was Re: [PATCH 0/5] RFC: Overhaul of diagnostics (v2))

2015-10-13 Thread David Malcolm

On Thu, 2015-09-24 at 10:15 +0200, Richard Biener wrote:
> On Thu, Sep 24, 2015 at 2:25 AM, David Malcolm  wrote:
> > On Wed, 2015-09-23 at 15:36 +0200, Richard Biener wrote:
> >> On Wed, Sep 23, 2015 at 3:19 PM, Michael Matz  wrote:
> >> > Hi,
> >> >
> >> > On Tue, 22 Sep 2015, David Malcolm wrote:
> >> >
> >> >> The drawback is that it could bloat the ad-hoc table.  Can the ad-hoc
> >> >> table ever get smaller, or does it only ever get inserted into?
> >> >
> >> > It only ever grows.
> >> >
> >> >> An idea I had is that we could stash short ranges directly into the 32
> >> >> bits of location_t, by offsetting the per-column-bits somewhat.
> >> >
> >> > It's certainly worth an experiment: let's say you restrict yourself to
> >> > tokens less than 8 characters, you need an additional 3 bits (using one
> >> > value, e.g. zero, as the escape value).  That leaves 20 bits for the line
> >> > numbers (for the normal 8 bit columns), which might be enough for most
> >> > single-file compilations.  For LTO compilation this often won't be 
> >> > enough.
> >> >
> >> >> My plan is to investigate the impact these patches have on the time and
> >> >> memory consumption of the compiler,
> >> >
> >> > When you do so, make sure you're also measuring an LTO compilation with
> >> > debug info of something big (firefox).  I know that we already had issues
> >> > with the size of the linemap data in the past for these cases (probably
> >> > when we added columns).
> >>
> >> The issue we have with LTO is that the linemap gets populated in quite
> >> random order and thus we repeatedly switch files (we've mitigated this
> >> somewhat for GCC 5).  We also considered dropping column info
> >> (and would drop range info) as diagnostics are from optimizers only
> >> with LTO and we keep locations merely for debug info.
> >
> > Thanks.  Presumably the mitigation you're referring to is the
> > lto_location_cache class in lto-streamer-in.c?
> >
> > Am I right in thinking that, right now, the LTO code doesn't support
> > ad-hoc locations? (presumably the block pointers only need to exist
> > during optimization, which happens after the serialization)
> 
> LTO code does support ad-hoc locations but they are "restored" only
> when reading function bodies and stmts (by means of COMBINE_LOCATION_DATA).
> 
> > The obvious simplification would be, as you suggest, to not bother
> > storing range information with LTO, falling back to just the existing
> > representation.  Then there's no need to extend LTO to serialize ad-hoc
> > data; simply store the underlying locus into the bit stream.  I think
> > that this happens already: lto-streamer-out.c calls expand_location and
> > stores the result, so presumably any ad-hoc location_t values made by
> > the v2 patches would have dropped their range data there when I ran the
> > test suite.
> 
> Yep.  We only preserve BLOCKs, so if you don't add extra code to
> preserve ranges they'll be "dropped".
> 
> > If it's acceptable to not bother with ranges for LTO, one way to do the
> > "stashing short ranges into the location_t" idea might be for the
> > bits-per-range of location_t values to be a property of the line_table
> > (or possibly the line map), set up when the struct line_maps is created.
> > For non-LTO it could be some tuned value (maybe from a param?); for LTO
> > it could be zero, so that we have as many bits as before for line/column
> > data.
> 
> That could be a possibility (likewise for column info?)
> 
> Richard.
> 
> > Hope this sounds sane
> > Dave

I did some crude benchmarking of the patchkit, using these scripts:
  https://github.com/davidmalcolm/gcc-benchmarking
(specifically, bb0222b455df8cefb53bfc1246eb0a8038256f30),
using the "big-code.c" and "kdecore.cc" files Michael posted as:
  https://gcc.gnu.org/ml/gcc-patches/2013-09/msg00062.html
and "influence.i", a preprocessed version of SPEC2006's 445.gobmk
engine/influence.c (as an example of a moderate-sized pure C source
file).

This doesn't yet cover very large autogenerated C files, and the .cc
file is only being measured to see the effect on the ad-hoc table (and
tokenization).

"control" was r227977.
"experiment" was the same revision with the v2 patchkit applied.

Recall that this patchkit captures ranges for tokens as an extra field
within tokens within libcpp and the C FE, and adds ranges to the ad-hoc
location lookaside, storing them for all tree nodes within the C FE that
have a location_t, and passing them around within c_expr for all C
expressions (including those that don't have a location_t).

Both control and experiment were built with
  --enable-checking=release \
  --disable-bootstrap \
  --disable-multilib \
  --enable-languages=c,ada,c++,fortran,go,java,lto,objc,obj-c++

The script measures:

(a) wallclock time for "xgcc -S" so it's measuring the driver, parsing,
optimimation, etc, rather than attempting to directly measure parsing.
This is without -ftime-report, since

Re: Fix prototype for print_insn in rtl.h

2015-10-13 Thread Jeff Law


On 10/13/2015 02:21 AM, Nikolai Bozhenov wrote:

2015-10-13  Nikolai Bozhenov

 * gcc/rtl.h (print_insn): fix prototype

Installed on the trunk after bootstrap & regression test.

jeff

Re: [Boolean Vector, patch 3/5] Use boolean vector in C/C++ FE

2015-10-13 Thread Ilya Enkovich

On 09 Oct 14:51, Jeff Law wrote:
> On 10/02/2015 08:04 AM, Ilya Enkovich wrote:
> >Hi,
> >
> >This patch makes C/C++ FE to use boolean vector as a resulting type for 
> >vector comparison.  As a result vector comparison in source code now parsed 
> >into VEC_COND_EXPR, it required a testcase fix-up.
> >
> >Thanks,
> >Ilya
> >--
> >gcc/c
> >
> >2015-10-02  Ilya Enkovich  
> >
> > * c-typeck.c (build_conditional_expr): Use boolean vector
> > type for vector comparison.
> > (build_vec_cmp): New.
> > (build_binary_op): Use build_vec_cmp for comparison.
> >
> >gcc/cp
> >
> >2015-10-02  Ilya Enkovich  
> >
> > * call.c (build_conditional_expr_1): Use boolean vector
> > type for vector comparison.
> > * typeck.c (build_vec_cmp): New.
> > (cp_build_binary_op): Use build_vec_cmp for comparison.
> >
> >gcc/testsuite/
> >
> >2015-10-02  Ilya Enkovich  
> >
> > * g++.dg/ext/vector22.C: Allow VEC_COND_EXPR.
> >
> >
> >diff --git a/gcc/c/c-typeck.c b/gcc/c/c-typeck.c
> >index 3b26231..3f64d76 100644
> >--- a/gcc/c/c-typeck.c
> >+++ b/gcc/c/c-typeck.c
> >@@ -10220,6 +10232,19 @@ push_cleanup (tree decl, tree cleanup, bool eh_only)
> >STATEMENT_LIST_STMT_EXPR (list) = stmt_expr;
> >  }
> >  
> >+/* Build a vector comparison using VEC_COND_EXPR.  */
> Please make sure your function comments include descriptions of all the
> arguments and return values.

Fixed.

> 
> 
> >+
> >+static tree
> >+build_vec_cmp (tree_code code, tree type,
> >+   tree arg0, tree arg1)
> >+{
> >+  tree zero_vec = build_zero_cst (type);
> >+  tree minus_one_vec = build_minus_one_cst (type);
> >+  tree cmp_type = build_same_sized_truth_vector_type (type);
> >+  tree cmp = build2 (code, cmp_type, arg0, arg1);
> >+  return build3 (VEC_COND_EXPR, type, cmp, minus_one_vec, zero_vec);
> >+}
> Isn't this implementation the same for C & C++?  Does it make sense to put
> it in c-family/c-common.c?

C++ version calls fold_if_not_in_template for generated comparison.  It is 
required there to successfully recognize vector MIN, MAX and ABS templates for 
vector ?: conditional operator.  Vector form of ?: conditional operator is 
supported for C++ only.

> 
> 
> >+
> >  /* Build a binary-operation expression without default conversions.
> > CODE is the kind of expression to build.
> > LOCATION is the operator's location.
> >@@ -10786,7 +10811,8 @@ build_binary_op (location_t location, enum tree_code 
> >code,
> >result_type = build_opaque_vector_type (intt,
> >   TYPE_VECTOR_SUBPARTS (type0));
> >converted = 1;
> >-  break;
> >+  ret = build_vec_cmp (resultcode, result_type, op0, op1);
> >+  goto return_build_binary_op;
> I suspect there's some kind of whitespace/tab problem.  Those two lines
> should be indented the same, right?

Fixed.

> 
> 
> >  }
> >if (FLOAT_TYPE_P (type0) || FLOAT_TYPE_P (type1))
> > warning_at (location,
> >@@ -10938,7 +10964,8 @@ build_binary_op (location_t location, enum tree_code 
> >code,
> >result_type = build_opaque_vector_type (intt,
> >   TYPE_VECTOR_SUBPARTS (type0));
> >converted = 1;
> >-  break;
> >+  ret = build_vec_cmp (resultcode, result_type, op0, op1);
> >+  goto return_build_binary_op;
> Similarly here.
> 
> With the items above fixed, this is OK.
> 
> However, more generally, do we need to do anything for the other languages?

Looking into that I got an impression vector modes are used by C/C++ vector 
extensions only.  And I think regression testing would reveal some failures 
otherwise.

> 
> Jeff

Here is an updated version.

Thanks,
Ilya
--
gcc/c

2015-10-02  Ilya Enkovich  

* c-typeck.c (build_conditional_expr): Use boolean vector
type for vector comparison.
(build_vec_cmp): New.
(build_binary_op): Use build_vec_cmp for comparison.

gcc/cp

2015-10-02  Ilya Enkovich  

* call.c (build_conditional_expr_1): Use boolean vector
type for vector comparison.
* typeck.c (build_vec_cmp): New.
(cp_build_binary_op): Use build_vec_cmp for comparison.

gcc/testsuite/

2015-10-02  Ilya Enkovich  

* g++.dg/ext/vector22.C: Allow VEC_COND_EXPR.


diff --git a/gcc/c/c-typeck.c b/gcc/c/c-typeck.c
index df3245a..8fe6a74 100644
--- a/gcc/c/c-typeck.c
+++ b/gcc/c/c-typeck.c
@@ -4771,6 +4771,18 @@ build_conditional_expr (location_t colon_loc, tree 
ifexp, bool ifexp_bcp,
   && TREE_CODE (orig_op2) == INTEGER_CST
   && !TREE_OVERFLOW (orig_op2)));
 }
+
+  /* Need to convert condition operand into a vector mask.  */
+  if (VECTOR_TYPE_P (TREE_TYPE (ifexp)))
+{
+  tree vectype = TREE_TYPE (ifexp);
+  tree elem_type =

[HSA] Fix emission of hsa_num_threads

2015-10-13 Thread Martin Liška

Hello.

Following pair of patches changes behavior of omp_{get,set}_num_threads and
provides more clever way how these values are passed to a another kernel.

Martin
>From 1d2732a0e33259e73a2d8059fb5f68e359144ef6 Mon Sep 17 00:00:00 2001
From: marxin 
Date: Thu, 8 Oct 2015 11:21:16 +0200
Subject: [PATCH 1/2] HSA: encapsulate type conversion constructs

gcc/ChangeLog:

2015-10-08  Martin Liska  

	* hsa-gen.c (hsa_op_with_type::get_in_type): New function.
	(gen_hsa_insns_for_switch_stmt): Use it.
	(gen_set_num_threads): Dtto.
	(gen_hsa_insns_for_known_library_call): Dtto.
	* hsa.h (hsa_op_with_type::get_in_type): Declarate the function.
---
 gcc/hsa-gen.c | 64 +--
 gcc/hsa.h |  4 
 2 files changed, 36 insertions(+), 32 deletions(-)

diff --git a/gcc/hsa-gen.c b/gcc/hsa-gen.c
index 8f707b5..ab4917b 100644
--- a/gcc/hsa-gen.c
+++ b/gcc/hsa-gen.c
@@ -795,6 +795,34 @@ hsa_op_with_type::hsa_op_with_type (BrigKind16_t k, BrigType16_t t)
   type = t;
 }
 
+hsa_op_with_type *
+hsa_op_with_type::get_in_type (BrigType16_t dtype, hsa_bb *hbb)
+{
+  if (type == dtype)
+return this;
+
+  hsa_op_reg *dest;
+
+  if (hsa_needs_cvt (dtype, type))
+{
+  dest = new hsa_op_reg (dtype);
+  hbb->append_insn (new hsa_insn_basic (2, BRIG_OPCODE_CVT,
+	dest->type, dest, this));
+}
+  else
+{
+  dest = new hsa_op_reg (type);
+  hbb->append_insn (new hsa_insn_basic (2, BRIG_OPCODE_MOV,
+	dest->type, dest, this));
+
+  /* We cannot simply for instance: 'mov_u32 $_3, 48 (s32)' because
+	 type of the operand must be same as type of the instruction.  */
+  dest->type = dtype;
+}
+
+  return dest;
+}
+
 /* Constructor of class representing HSA immediate values.  TREE_VAL is the
tree representation of the immediate value.  If min32int is true,
always expand integer types to one that has at least 32 bits.  */
@@ -3016,16 +3044,8 @@ gen_hsa_insns_for_switch_stmt (gswitch *s, hsa_bb *hbb,
 	sub_index, index,
 	new hsa_op_immed (lowest)));
 
-  if (hsa_needs_cvt (BRIG_TYPE_U64, sub_index->type))
-{
-  hsa_op_reg *sub_index_cvt = new hsa_op_reg (BRIG_TYPE_U64);
-  hbb->append_insn (new hsa_insn_basic (2, BRIG_OPCODE_CVT,
-	sub_index_cvt->type,
-	sub_index_cvt, sub_index));
-
-  sub_index = sub_index_cvt;
-}
-
+  hsa_op_base *tmp = sub_index->get_in_type (BRIG_TYPE_U64, hbb);
+  sub_index = as_a  (tmp);
   unsigned labels = gimple_switch_num_labels (s);
   unsigned HOST_WIDE_INT size = tree_to_uhwi (get_switch_size (s));
 
@@ -3251,17 +3271,7 @@ gen_set_num_threads (tree value, hsa_bb *hbb, vec  *ssa_map)
   hsa_op_with_type *src = hsa_reg_or_immed_for_gimple_op (value, hbb,
 			  ssa_map);
 
-  BrigType16_t dtype = hsa_num_threads->type;
-  if (hsa_needs_cvt (dtype, src->type))
-{
-  hsa_op_reg *tmp = new hsa_op_reg (dtype);
-  hbb->append_insn (new hsa_insn_basic (2, BRIG_OPCODE_CVT, tmp->type,
-	tmp, src));
-  src = tmp;
-}
-  else
-src->type = dtype;
-
+  src = src->get_in_type (hsa_num_threads->type, hbb);
   hsa_op_address *addr = new hsa_op_address (hsa_num_threads);
 
   hsa_op_immed *limit = new hsa_op_immed (64, BRIG_TYPE_U32);
@@ -3394,17 +3404,7 @@ gen_hsa_insns_for_known_library_call (gimple *stmt, hsa_bb *hbb,
 	  hsa_op_with_type *src = hsa_reg_or_immed_for_gimple_op (rhs1, hbb,
   ssa_map);
 
-	  BrigType16_t dtype = BRIG_TYPE_U64;
-	  if (hsa_needs_cvt (dtype, src->type))
-	{
-	  hsa_op_reg *tmp = new hsa_op_reg (dtype);
-	  hbb->append_insn (new hsa_insn_basic (2, BRIG_OPCODE_CVT,
-		tmp->type, tmp, src));
-	  src = tmp;
-	}
-	  else
-	src->type = dtype;
-
+	  src = src->get_in_type (BRIG_TYPE_U64, hbb);
 	  set_debug_value (hbb, src);
 	  return true;
 	}
diff --git a/gcc/hsa.h b/gcc/hsa.h
index 86adaa5..89d339f 100644
--- a/gcc/hsa.h
+++ b/gcc/hsa.h
@@ -120,6 +120,10 @@ public:
   /* The type.  */
   BrigType16_t type;
 
+  /* Convert an operand to a destination type DTYPE and attach insns
+ to HBB if needed.  */
+  hsa_op_with_type *get_in_type (BrigType16_t dtype, hsa_bb *hbb);
+
 protected:
   hsa_op_with_type (BrigKind16_t k, BrigType16_t t);
 private:
-- 
2.6.0

>From 7f10daa1f37ee47091a3956a13bb610464e8e279 Mon Sep 17 00:00:00 2001
From: marxin 
Date: Mon, 12 Oct 2015 15:49:50 +0200
Subject: [PATCH 2/2] HSA: handle properly number of threads in a kernel

gcc/ChangeLog:

2015-10-13  Martin Liska  

	* hsa-gen.c (hsa_insn_basic::set_output_in_type): New function.
	(query_hsa_grid): Likewise.
	(gen_set_num_threads): Save the value without any value range
	checking.
	(gen_num_threads_for_dispatch): New function.
	(gen_hsa_insns_for_known_library_call): Use the newly added
	function query_hsa_grid.
	(gen_hsa_insns_for_call): Likewise.
	(gen_hsa_insns_for_kernel_call): Use the newly added function

Re: [vec-cmp, patch 4/6] Support vector mask invariants

2015-10-13 Thread Ilya Enkovich

2015-10-13 16:54 GMT+03:00 Richard Biener :
> On Thu, Oct 8, 2015 at 5:11 PM, Ilya Enkovich  wrote:
>> Hi,
>>
>> This patch adds a special handling of boolean vector invariants.  We need 
>> additional code to determine type of generated invariant.  For VEC_COND_EXPR 
>> case we even provide this type directly because statement vectype doesn't 
>> allow us to compute it.  Separate code is used to generate and expand such 
>> vectors.
>>
>> Thanks,
>> Ilya
>> --
>> gcc/
>>
>> 2015-10-08  Ilya Enkovich  
>>
>> * expr.c (const_vector_mask_from_tree): New.
>> (const_vector_from_tree): Use const_vector_mask_from_tree
>> for boolean vectors.
>> * tree-vect-stmts.c (vect_init_vector): Support boolean vector
>> invariants.
>> (vect_get_vec_def_for_operand): Add VECTYPE arg.
>> (vectorizable_condition): Directly provide vectype for invariants
>> used in comparison.
>> * tree-vectorizer.h (vect_get_vec_def_for_operand): Add VECTYPE
>> arg.
>>
>>
>> diff --git a/gcc/expr.c b/gcc/expr.c
>> index 88da8cb..a624a34 100644
>> --- a/gcc/expr.c
>> +++ b/gcc/expr.c
>> @@ -11320,6 +11320,40 @@ try_tablejump (tree index_type, tree index_expr, 
>> tree minval, tree range,
>>return 1;
>>  }
>>
>> +/* Return a CONST_VECTOR rtx representing vector mask for
>> +   a VECTOR_CST of booleans.  */
>> +static rtx
>> +const_vector_mask_from_tree (tree exp)
>> +{
>> +  rtvec v;
>> +  unsigned i;
>> +  int units;
>> +  tree elt;
>> +  machine_mode inner, mode;
>> +
>> +  mode = TYPE_MODE (TREE_TYPE (exp));
>> +  units = GET_MODE_NUNITS (mode);
>> +  inner = GET_MODE_INNER (mode);
>> +
>> +  v = rtvec_alloc (units);
>> +
>> +  for (i = 0; i < VECTOR_CST_NELTS (exp); ++i)
>> +{
>> +  elt = VECTOR_CST_ELT (exp, i);
>> +
>> +  gcc_assert (TREE_CODE (elt) == INTEGER_CST);
>> +  if (integer_zerop (elt))
>> +   RTVEC_ELT (v, i) = CONST0_RTX (inner);
>> +  else if (integer_onep (elt)
>> +  || integer_minus_onep (elt))
>> +   RTVEC_ELT (v, i) = CONSTM1_RTX (inner);
>> +  else
>> +   gcc_unreachable ();
>> +}
>> +
>> +  return gen_rtx_CONST_VECTOR (mode, v);
>> +}
>> +
>>  /* Return a CONST_VECTOR rtx for a VECTOR_CST tree.  */
>>  static rtx
>>  const_vector_from_tree (tree exp)
>> @@ -11335,6 +11369,9 @@ const_vector_from_tree (tree exp)
>>if (initializer_zerop (exp))
>>  return CONST0_RTX (mode);
>>
>> +  if (VECTOR_BOOLEAN_TYPE_P (TREE_TYPE (exp)))
>> +  return const_vector_mask_from_tree (exp);
>> +
>>units = GET_MODE_NUNITS (mode);
>>inner = GET_MODE_INNER (mode);
>>
>> diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
>> index 6949c71..337ea7b 100644
>> --- a/gcc/tree-vect-stmts.c
>> +++ b/gcc/tree-vect-stmts.c
>> @@ -1308,27 +1308,61 @@ vect_init_vector_1 (gimple *stmt, gimple *new_stmt, 
>> gimple_stmt_iterator *gsi)
>>  tree
>>  vect_init_vector (gimple *stmt, tree val, tree type, gimple_stmt_iterator 
>> *gsi)
>>  {
>> +  tree val_type = TREE_TYPE (val);
>> +  machine_mode mode = TYPE_MODE (type);
>> +  machine_mode val_mode = TYPE_MODE(val_type);
>>tree new_var;
>>gimple *init_stmt;
>>tree vec_oprnd;
>>tree new_temp;
>>
>>if (TREE_CODE (type) == VECTOR_TYPE
>> -  && TREE_CODE (TREE_TYPE (val)) != VECTOR_TYPE)
>> -{
>> -  if (!types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
>> +  && TREE_CODE (val_type) != VECTOR_TYPE)
>> +{
>> +  /* Handle vector of bool represented as a vector of
>> +integers here rather than on expand because it is
>> +a default mask type for targets.  Vector mask is
>> +built in a following way:
>> +
>> +tmp = (int)val
>> +vec_tmp = {tmp, ..., tmp}
>> +vec_cst = VIEW_CONVERT_EXPR(vec_tmp);  */
>> +  if (TREE_CODE (val_type) == BOOLEAN_TYPE
>> + && VECTOR_MODE_P (mode)
>> + && SCALAR_INT_MODE_P (GET_MODE_INNER (mode))
>> + && GET_MODE_INNER (mode) != val_mode)
>> {
>> - if (CONSTANT_CLASS_P (val))
>> -   val = fold_unary (VIEW_CONVERT_EXPR, TREE_TYPE (type), val);
>> - else
>> + unsigned size = GET_MODE_BITSIZE (GET_MODE_INNER (mode));
>> + tree stype = build_nonstandard_integer_type (size, 1);
>> + tree vectype = get_vectype_for_scalar_type (stype);
>> +
>> + new_temp = make_ssa_name (stype);
>> + init_stmt = gimple_build_assign (new_temp, NOP_EXPR, val);
>> + vect_init_vector_1 (stmt, init_stmt, gsi);
>> +
>> + val = make_ssa_name (vectype);
>> + new_temp = build_vector_from_val (vectype, new_temp);
>> + init_stmt = gimple_build_assign (val, new_temp);
>> + vect_init_vector_1 (stmt, init_stmt, gsi);
>> +
>> + val = build1 (VIEW_CONVERT_EXPR, type, val);
>l
> So I don't quite understand - why don't we want to build
>
>

Re: [PATCH] gcc/ira.c: Check !HAVE_FP_INSTEAD_INSNS when frame pointer is needed and as global register

2015-10-13 Thread Chen Gang


On 10/13/15 07:02, Mike Stump wrote:
> On Oct 12, 2015, at 3:32 PM, Chen Gang  wrote:
>>
>> OK, thanks. If we really need to fix it, which target hook should I use?
>> (or do we need a new target hook?)
> 
> So, the first discussion would be if it is, or is not a bug.  If it isn’t, 
> then there is no fix.  No fix, no target hook.  So far, Bernd said not a bug.
> 

OK, under the bugzilla, the maintainer treated it as expected behavior
(not a bug). For me, we need more explanation for it (why we treat it
as expected behavior).


> So, I’ll note that one _can_ do this with the stack pointer, as a fixed 
> register.
> When the frame pointer is fixed, one cannot do this.
> 

Excuse me, I do not quite understand, could you please provide more
details?

> The code that does this is:
> 
>   /* Diagnose uses of the hard frame pointer when it is used as a global  
>   
>
>  register.  Often we can get away with letting the user appropriate   
>   
> 
>  the frame pointer, but we should let them know when code generation  
>   
> 
>  makes that impossible.  */
>   if (global_regs[HARD_FRAME_POINTER_REGNUM] && frame_pointer_needed)
> {
>   tree decl = global_regs_decl[HARD_FRAME_POINTER_REGNUM];
>   error_at (DECL_SOURCE_LOCATION (current_function_decl),
> "frame pointer required, but reserved");
>   inform (DECL_SOURCE_LOCATION (decl), "for %qD", decl);
> }
> 
> to `fix it’, one would simple remove this chunk as misguided and fix up any 
> code gen issues exposed.
> 

If there were not only one issues related with it, for me, what you said
sounds reasonable to me.


Thanks.
-- 
Chen Gang (陈刚)

Open, share, and attitude like air, water, and life which God blessed

Re: [PATCH] Fix "#pragma GCC pop_options" warning.

2015-10-13 Thread Dominik Vogt

On Tue, Oct 13, 2015 at 04:33:42PM +0200, Bernd Schmidt wrote:
> Looks like
> ix86_pragma_target_parse has a "! args" test to determine if it has
> a pop, maybe the default function could do the same.

All right, this solution is way better.  New patch attached.

Ciao

Dominik ^_^  ^_^

-- 

Dominik Vogt
IBM Germany
gcc/ChangeLog

* targhooks.c (default_target_option_pragma_parse): Do not warn if
called on behalf of "#pragma GCC pop_options".

gcc/testsuite/ChangeLog

* gcc.dg/pragma-pop_options-1.c: New test.
>From 4bb0068875e005b2f0e33bec0bd5a70b798af6e3 Mon Sep 17 00:00:00 2001
From: Dominik Vogt 
Date: Tue, 13 Oct 2015 15:54:15 +0100
Subject: [PATCH] Remove "#pragma GCC pop_options" warning for "#pragma GCC
 pop_options".

---
 gcc/targhooks.c | 8 ++--
 gcc/testsuite/gcc.dg/pragma-pop_options-1.c | 7 +++
 2 files changed, 13 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/pragma-pop_options-1.c

diff --git a/gcc/targhooks.c b/gcc/targhooks.c
index 7238c8f..5077ec9 100644
--- a/gcc/targhooks.c
+++ b/gcc/targhooks.c
@@ -1305,8 +1305,12 @@ bool
 default_target_option_pragma_parse (tree ARG_UNUSED (args),
 tree ARG_UNUSED (pop_target))
 {
-  warning (OPT_Wpragmas,
-	   "#pragma GCC target is not supported for this machine");
+  /* If args is NULL the caller is handle_pragma_pop_options ().  In that case,
+ emit no warning because "#pragma GCC pop_target" is valid on targets that
+ do not have the "target" pragma.  */
+  if (args)
+warning (OPT_Wpragmas,
+	 "#pragma GCC target is not supported for this machine");
 
   return false;
 }
diff --git a/gcc/testsuite/gcc.dg/pragma-pop_options-1.c b/gcc/testsuite/gcc.dg/pragma-pop_options-1.c
new file mode 100644
index 000..4e969de
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pragma-pop_options-1.c
@@ -0,0 +1,7 @@
+/* Check warnings produced by #pragma GCC push/pop/reset_options.  */
+/* { dg-do assemble } */
+
+#pragma push_options
+#pragma pop_options
+
+int foo;
-- 
2.3.0

Re: [PATCH] Fix "#pragma GCC pop_options" warning.

2015-10-13 Thread Bernd Schmidt


On 10/13/2015 05:03 PM, Dominik Vogt wrote:

On Tue, Oct 13, 2015 at 04:33:42PM +0200, Bernd Schmidt wrote:

Looks like
ix86_pragma_target_parse has a "! args" test to determine if it has
a pop, maybe the default function could do the same.


All right, this solution is way better.  New patch attached.


This is ok, thanks!


Bernd

Re: [[Boolean Vector, patch 5/5] Support boolean vectors in vector lowering

2015-10-13 Thread Jeff Law


On 10/13/2015 08:56 AM, Ilya Enkovich wrote:

2015-10-12 13:37 GMT+03:00 Alan Lawrence :

On 09/10/15 22:01, Jeff Law wrote:


So my question for the series as a whole is whether or not we need to do
something for the other languages, particularly Fortran.  I was a bit
surprised to see this stuff bleed into the C/C++ front-ends and
obviously wonder if it's bled into Fortran, Ada, Java, etc.



Isn't that just because, we have GNU extensions to C/C++, for vectors? I
admit I don't know enough Ada/Fortran to know whether we've added GNU
extensions to those languages as well...

A.


I also got an impression only GNU vector extensions should be
affected. And those are for C/C++ only.
I'd be surprised if Fortran doesn't have vector capabilities.  I think 
some sanity checking in there would be wise.


jeff

Re: [Patch] [x86_64]: Add bdver4 for multi versioning and fix AMD cpu model detection.

2015-10-13 Thread Uros Bizjak

On Tue, Oct 13, 2015 at 5:16 PM, Kumar, Venkataramanan
 wrote:
> Hi Uros,
>
> I realized both GCC 4.9 and GCC 5 branches includes processor subtype 
> AMDFAM15H_BDVER4.
> So I need to back port not only model selection fix but also the detection of 
> model for bdver4.
>
> Is that fine?

OK, but to avoid ABI mismatches, please double check that enum values
passed between library and compiled code are always the same in all
gcc releases.

Uros.

> Regards,
> Venkat.
>
>> -Original Message-
>> From: Kumar, Venkataramanan
>> Sent: Friday, October 09, 2015 3:31 PM
>> To: 'Uros Bizjak'
>> Cc: gcc-patches@gcc.gnu.org
>> Subject: RE: [Patch] [x86_64]: Add bdver4 for multi versioning and fix AMD
>> cpu model detection.
>>
>> Thank you Uros,
>>
>> I will  test and commit model selection change in all release branches as 
>> well.
>>
>> Regards,
>> Venkat.
>>
>> > -Original Message-
>> > From: Uros Bizjak [mailto:ubiz...@gmail.com]
>> > Sent: Friday, October 09, 2015 3:25 PM
>> > To: Kumar, Venkataramanan
>> > Cc: gcc-patches@gcc.gnu.org
>> > Subject: Re: [Patch] [x86_64]: Add bdver4 for multi versioning and fix
>> > AMD cpu model detection.
>> >
>> > On Fri, Oct 9, 2015 at 11:50 AM, Kumar, Venkataramanan
>> >  wrote:
>> > > Hi Uros,
>> > >
>> > > Please find below patch that adds bdver4 target for multi versioning.
>> > > Also I while computing model, the extended_model is incorrectly left
>> > shifted  by 4. I have removed it now.
>> > >
>> > > Is below patch Ok for trunk ?
>> > > GCC bootstrap and regressions passed.
>> >
>> > OK for trunk and release branches, where applicable. IMO, model
>> > selection fix should be applied to all release branches.
>> >
>> > Thanks,
>> > Uros.
>> >
>> > > diff --git a/libgcc/ChangeLog b/libgcc/ChangeLog index
>> > > bb3a722..8676747 100644
>> > > --- a/libgcc/ChangeLog
>> > > +++ b/libgcc/ChangeLog
>> > > @@ -1,3 +1,8 @@
>> > > +2015-10-09  Venkataramanan Kumar
>> > 
>> > > +
>> > > +   * config/i386/cpuinfo.c (get_amd_cpu): Detect bdver4.
>> > > +   (__cpu_indicator_init): Fix model selection for AMD CPUs.
>> > > +
>> > >  2015-10-05  Kirill Yukhin  
>> > >
>> > > * config/i386/cpuinfo.c (get_intel_cpu): Detect "skylake-avx512".
>> > > diff --git a/libgcc/config/i386/cpuinfo.c
>> > > b/libgcc/config/i386/cpuinfo.c index 0cbbc85..1313ca3 100644
>> > > --- a/libgcc/config/i386/cpuinfo.c
>> > > +++ b/libgcc/config/i386/cpuinfo.c
>> > > @@ -169,6 +169,9 @@ get_amd_cpu (unsigned int family, unsigned int
>> > model)
>> > >/* Bulldozer version 3 "Steamroller"  */
>> > >if (model >= 0x30 && model <= 0x4f)
>> > > __cpu_model.__cpu_subtype = AMDFAM15H_BDVER3;
>> > > +  /* Bulldozer version 4 "Excavator"   */
>> > > +  if (model >= 0x60 && model <= 0x7f)
>> > > +   __cpu_model.__cpu_subtype = AMDFAM15H_BDVER4;
>> > >break;
>> > >  /* AMD Family 16h "btver2" */
>> > >  case 0x16:
>> > > @@ -455,7 +458,7 @@ __cpu_indicator_init (void)
>> > >if (family == 0x0f)
>> > > {
>> > >   family += extended_family;
>> > > - model += (extended_model << 4);
>> > > + model += extended_model;
>> > > }
>> > >
>> > >/* Get CPU type.  */
>> > >
>> > > Regards,
>> > > Venkat.
>> > >
>> > >
>> > >
>> > >
>> > >
>> > >

Re: [Boolean Vector, patch 3/5] Use boolean vector in C/C++ FE

2015-10-13 Thread Jeff Law


On 10/13/2015 08:14 AM, Ilya Enkovich wrote:

+
+static tree
+build_vec_cmp (tree_code code, tree type,
+  tree arg0, tree arg1)
+{
+  tree zero_vec = build_zero_cst (type);
+  tree minus_one_vec = build_minus_one_cst (type);
+  tree cmp_type = build_same_sized_truth_vector_type (type);
+  tree cmp = build2 (code, cmp_type, arg0, arg1);
+  return build3 (VEC_COND_EXPR, type, cmp, minus_one_vec, zero_vec);
+}

Isn't this implementation the same for C & C++?  Does it make sense to put
it in c-family/c-common.c?


C++ version calls fold_if_not_in_template for generated comparison.  It is 
required there to successfully recognize vector MIN, MAX and ABS templates for 
vector ?: conditional operator.  Vector form of ?: conditional operator is 
supported for C++ only.

Ah, nevermind then.




However, more generally, do we need to do anything for the other languages?


Looking into that I got an impression vector modes are used by C/C++ vector 
extensions only.  And I think regression testing would reveal some failures 
otherwise.
Maybe this stuff hasn't bled into the Fortran front-end, but the 
gfortran front-end certainly has OpenMP support which presumably has 
vector extensions.


The fact that nothing's failing in the testsuite is encouraging, but 
it'd be worth spending a few minutes taking a look to see if there's 
something that might need updating.


Jeff

Re: [[Boolean Vector, patch 5/5] Support boolean vectors in vector lowering

2015-10-13 Thread Ilya Enkovich

2015-10-13 18:35 GMT+03:00 Jeff Law :
> On 10/13/2015 08:56 AM, Ilya Enkovich wrote:
>>
>> 2015-10-12 13:37 GMT+03:00 Alan Lawrence :
>>>
>>> On 09/10/15 22:01, Jeff Law wrote:
>>>
 So my question for the series as a whole is whether or not we need to do
 something for the other languages, particularly Fortran.  I was a bit
 surprised to see this stuff bleed into the C/C++ front-ends and
 obviously wonder if it's bled into Fortran, Ada, Java, etc.
>>>
>>>
>>>
>>> Isn't that just because, we have GNU extensions to C/C++, for vectors? I
>>> admit I don't know enough Ada/Fortran to know whether we've added GNU
>>> extensions to those languages as well...
>>>
>>> A.
>>
>>
>> I also got an impression only GNU vector extensions should be
>> affected. And those are for C/C++ only.
>
> I'd be surprised if Fortran doesn't have vector capabilities.  I think some
> sanity checking in there would be wise.

Vector type in language doesn't mean SIMD. AFAIK OpenMP is used in
Fortran for SIMD features. Also I would get a lot of Fortran
regressions in case such feature exists due to fixed IL checker.

Thanks,
Ilya

>
> jeff

Re: [PATCH 8/9] Add TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID

2015-10-13 Thread Jeff Law


On 10/13/2015 04:13 AM, Richard Biener wrote:

On Tue, Oct 13, 2015 at 1:27 AM, Richard Henderson  wrote:

On 10/12/2015 09:10 PM, Richard Biener wrote:


The check_loadstore change should instead have adjusted the
flag_delete_null_pointer_checks guard in
infer_nonnull_range_by_dereference.




Nope, that doesn't work.  You have to wait until you see the actual MEM
being dereferenced before you can look at it's address space.



Well, as we are explicitely looking for the pointer 'op' we know the
address-space
beforehand, no?  TYPE_ADDR_SPACE (TREE_TYPE (TREE_TYPE (op)))?



No.  We don't even know what type we're looking for; we're merely looking
for any use of NULL within any memory reference within STMT.

Specifically, when we're not looking for a specific SSA_NAME (which would be
properly typed), we always pass in a plain (void *)0:

   bool by_dereference
 = infer_nonnull_range_by_dereference (stmt, null_pointer_node);


Ick.
It's just looking to see if there's an explicit *0 in stmt.  That can 
occur due to cprop & friends obviously.  It was an easy way to avoid 
having to write a special walker.


The problem here is we don't know what address space the *0 is going to 
hit, right?   Isn't that also an issue for code generation as well?


Jeff

Re: [gomp4, committed] Add goacc/kernels-acc-on-device.c

2015-10-13 Thread Tom de Vries


On 12/10/15 14:52, Tom de Vries wrote:

On 12/10/15 12:49, Thomas Schwinge wrote:

Hi Tom!

On Sat, 10 Oct 2015 12:49:01 +0200, Tom de
Vries  wrote:

>--- /dev/null
>+++ b/gcc/testsuite/c-c++-common/goacc/kernels-acc-on-device.c
>@@ -0,0 +1,39 @@
>+/* { dg-additional-options "-O2" } */
>+
>+#include 


Hi Thomas,


That doesn't work (at least in build-tree testing), as gcc/testsuite/ is
not set up to look for header files in [target]/libgomp/:

[...]/source-gcc/gcc/testsuite/c-c++-common/goacc/kernels-acc-on-device.c:3:21:
fatal error: openacc.h: No such file or directory
 compilation terminated.
 compiler exited with status 1



Ah, I see. I was doing 'make' followed by 'make install', and then
build-tree testing. The build-tree testing seems to pick up the header
file from the install directory. So for me test passed.


>+
>+#define N 32
>+
>+void
>+foo (float *a, float *b)
>+{
>+  float exp;
>+  int i;
>+  int n;
>+
>+#pragma acc kernels copyin(a[0:N]) copyout(b[0:N])
>+  {
>+int ii;
>+
>+for (ii = 0; ii < N; ii++)
>+  {
>+if (acc_on_device (acc_device_host))

Your two options are: if that's applicable/sufficient for what you intend
to test here, use __builtin_acc_on_device with a hard-coded acc_device_*,
or duplicate part of  as done for example in
gcc/testsuite/c-c++-common/goacc/acc_on_device-2.c.



Went with second option, committed as attached.


As a follow-up patch, I've factored the code into a mockup openacc.h, 
now shared by several test-cases.


Committed to gomp-4_0-branch.

Thanks,
- Tom
Factor out goacc/openacc.h

2015-10-13  Tom de Vries  

	* c-c++-common/goacc/openacc.h: New header file, factored out of ...
	* c-c++-common/goacc/kernels-acc-on-device.c: ... here.
	* c-c++-common/goacc/acc_on_device-2-off.c: Use openacc.h.
	* c-c++-common/goacc/acc_on_device-2.c: Same.
---
 .../c-c++-common/goacc/acc_on_device-2-off.c  | 11 +--
 gcc/testsuite/c-c++-common/goacc/acc_on_device-2.c| 13 +
 .../c-c++-common/goacc/kernels-acc-on-device.c| 19 +--
 gcc/testsuite/c-c++-common/goacc/openacc.h| 18 ++
 4 files changed, 21 insertions(+), 40 deletions(-)
 create mode 100644 gcc/testsuite/c-c++-common/goacc/openacc.h

diff --git a/gcc/testsuite/c-c++-common/goacc/acc_on_device-2-off.c b/gcc/testsuite/c-c++-common/goacc/acc_on_device-2-off.c
index 71abe11..cce58de 100644
--- a/gcc/testsuite/c-c++-common/goacc/acc_on_device-2-off.c
+++ b/gcc/testsuite/c-c++-common/goacc/acc_on_device-2-off.c
@@ -3,16 +3,7 @@
 
 /* Duplicate parts of libgomp/openacc.h, because we can't include it here.  */
 
-#if __cplusplus
-extern "C" {
-#endif
-
-typedef enum acc_device_t { acc_device_X = 123 } acc_device_t;
-extern int acc_on_device (int);
-
-#if __cplusplus
-}
-#endif
+#include "openacc.h"
 
 int
 f (void)
diff --git a/gcc/testsuite/c-c++-common/goacc/acc_on_device-2.c b/gcc/testsuite/c-c++-common/goacc/acc_on_device-2.c
index 243e562..19a5bd3 100644
--- a/gcc/testsuite/c-c++-common/goacc/acc_on_device-2.c
+++ b/gcc/testsuite/c-c++-common/goacc/acc_on_device-2.c
@@ -1,18 +1,7 @@
 /* Have to enable optimizations, as otherwise builtins won't be expanded.  */
 /* { dg-additional-options "-O -fdump-rtl-expand" } */
 
-/* Duplicate parts of libgomp/openacc.h, because we can't include it here.  */
-
-#if __cplusplus
-extern "C" {
-#endif
-
-typedef enum acc_device_t { acc_device_X = 123 } acc_device_t;
-extern int acc_on_device (int);
-
-#if __cplusplus
-}
-#endif
+#include "openacc.h"
 
 int
 f (void)
diff --git a/gcc/testsuite/c-c++-common/goacc/kernels-acc-on-device.c b/gcc/testsuite/c-c++-common/goacc/kernels-acc-on-device.c
index 784c66a..958b65b 100644
--- a/gcc/testsuite/c-c++-common/goacc/kernels-acc-on-device.c
+++ b/gcc/testsuite/c-c++-common/goacc/kernels-acc-on-device.c
@@ -1,23 +1,6 @@
 /* { dg-additional-options "-O2" } */
 
-#if __cplusplus
-extern "C" {
-#endif
-
-#if __cplusplus >= 201103
-# define __GOACC_NOTHROW noexcept
-#elif __cplusplus
-# define __GOACC_NOTHROW throw ()
-#else /* Not C++ */
-# define __GOACC_NOTHROW __attribute__ ((__nothrow__))
-#endif
-
-typedef enum acc_device_t { acc_device_X = 123 } acc_device_t;
-int acc_on_device (int) __GOACC_NOTHROW;
-
-#if __cplusplus
-}
-#endif
+#include "openacc.h"
 
 #define N 32
 
diff --git a/gcc/testsuite/c-c++-common/goacc/openacc.h b/gcc/testsuite/c-c++-common/goacc/openacc.h
new file mode 100644
index 000..a74a482
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/goacc/openacc.h
@@ -0,0 +1,18 @@
+#if __cplusplus
+extern "C" {
+#endif
+
+#if __cplusplus >= 201103
+# define __GOACC_NOTHROW noexcept
+#elif __cplusplus
+# define __GOACC_NOTHROW throw ()
+#else /* Not C++ */
+# define __GOACC_NOTHROW __attribute__ ((__nothrow__))
+#endif
+
+typedef enum acc_device_t { acc_device_X = 123 } acc_device_t;
+int acc_on_device (int) __GOACC_NOTHROW;
+
+#if __cplusplus
+}
+#endif
-- 
1.9.1

Re: [Boolean Vector, patch 3/5] Use boolean vector in C/C++ FE

2015-10-13 Thread Ilya Enkovich

2015-10-13 18:42 GMT+03:00 Jeff Law :
> On 10/13/2015 08:14 AM, Ilya Enkovich wrote:

 +
 +static tree
 +build_vec_cmp (tree_code code, tree type,
 +  tree arg0, tree arg1)
 +{
 +  tree zero_vec = build_zero_cst (type);
 +  tree minus_one_vec = build_minus_one_cst (type);
 +  tree cmp_type = build_same_sized_truth_vector_type (type);
 +  tree cmp = build2 (code, cmp_type, arg0, arg1);
 +  return build3 (VEC_COND_EXPR, type, cmp, minus_one_vec, zero_vec);
 +}
>>>
>>> Isn't this implementation the same for C & C++?  Does it make sense to
>>> put
>>> it in c-family/c-common.c?
>>
>>
>> C++ version calls fold_if_not_in_template for generated comparison.  It is
>> required there to successfully recognize vector MIN, MAX and ABS templates
>> for vector ?: conditional operator.  Vector form of ?: conditional operator
>> is supported for C++ only.
>
> Ah, nevermind then.
>
>
>>>
>>> However, more generally, do we need to do anything for the other
>>> languages?
>>
>>
>> Looking into that I got an impression vector modes are used by C/C++
>> vector extensions only.  And I think regression testing would reveal some
>> failures otherwise.
>
> Maybe this stuff hasn't bled into the Fortran front-end, but the gfortran
> front-end certainly has OpenMP support which presumably has vector
> extensions.

OpenMP extension doesn't produce any vector code in front-end. Code
will be produced by vectorizer anyway.

>
> The fact that nothing's failing in the testsuite is encouraging, but it'd be
> worth spending a few minutes taking a look to see if there's something that
> might need updating.

I also grepped for VEC_COND_EXPR and it never occurs in front-ends
other than C/C++.

Thanks,
Ilya

>
> Jeff
>

[gomp4, committed] Move kernels pass group before pass_fre

2015-10-13 Thread Tom de Vries


Hi,

this patch moves the kernels pass group to before pass_fre. Instead we 
use pass_dominator_oacc_kernels in the pass group.


This fixes an ICE while compiling the test-case included in the patch.

Committed to gomp-4_0-branch.

Thanks,
- Tom
Move kernels pass group before pass_fre

2015-10-13  Tom de Vries  

	* tree-ssa-dom.c (pass_dominator_oacc_kernels::clone): New function.
	* passes.def: Move pass group pass_oacc_kernels to before pass_fre. Add
	pass_dominator_oacc_kernels twice in the pass_oacc_kernels pass group.

	* c-c++-common/goacc/kernels-acc-on-device-2.c: New test.
	* c-c++-common/goacc/kernels-counter-var-redundant-load.c: Update.
---
 gcc/passes.def |  4 ++-
 .../c-c++-common/goacc/kernels-acc-on-device-2.c   | 37 ++
 .../goacc/kernels-counter-var-redundant-load.c | 10 +++---
 gcc/tree-ssa-dom.c |  1 +
 4 files changed, 47 insertions(+), 5 deletions(-)
 create mode 100644 gcc/testsuite/c-c++-common/goacc/kernels-acc-on-device-2.c

diff --git a/gcc/passes.def b/gcc/passes.def
index bc454c0..4ed4ccd 100644
--- a/gcc/passes.def
+++ b/gcc/passes.def
@@ -86,12 +86,13 @@ along with GCC; see the file COPYING3.  If not see
 	  /* pass_build_ealias is a dummy pass that ensures that we
 	 execute TODO_rebuild_alias at this point.  */
 	  NEXT_PASS (pass_build_ealias);
-	  NEXT_PASS (pass_fre);
 	  /* Pass group that runs when there are oacc kernels in the
 	 function.  */
 	  NEXT_PASS (pass_oacc_kernels);
 	  PUSH_INSERT_PASSES_WITHIN (pass_oacc_kernels)
+	  NEXT_PASS (pass_dominator_oacc_kernels);
 	  NEXT_PASS (pass_ch_oacc_kernels);
+	  NEXT_PASS (pass_dominator_oacc_kernels);
 	  NEXT_PASS (pass_tree_loop_init);
 	  NEXT_PASS (pass_lim);
 	  NEXT_PASS (pass_copy_prop);
@@ -105,6 +106,7 @@ along with GCC; see the file COPYING3.  If not see
 	  NEXT_PASS (pass_expand_omp_ssa);
 	  NEXT_PASS (pass_tree_loop_done);
 	  POP_INSERT_PASSES ()
+	  NEXT_PASS (pass_fre);
 	  NEXT_PASS (pass_merge_phi);
   NEXT_PASS (pass_dse);
 	  NEXT_PASS (pass_cd_dce);
diff --git a/gcc/testsuite/c-c++-common/goacc/kernels-acc-on-device-2.c b/gcc/testsuite/c-c++-common/goacc/kernels-acc-on-device-2.c
new file mode 100644
index 000..2c7297b
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/goacc/kernels-acc-on-device-2.c
@@ -0,0 +1,37 @@
+/* { dg-additional-options "-O2" } */
+
+#include "openacc.h"
+
+#define N 32
+
+void
+foo (float *a, float *b)
+{
+#pragma acc kernels copyin(a[0:N]) copyout(b[0:N])
+  {
+int ii;
+int on_host = acc_on_device (acc_device_X);
+
+for (ii = 0; ii < N; ii++)
+  {
+	if (on_host)
+	  b[ii] = a[ii] + 1;
+	else
+	  b[ii] = a[ii];
+  }
+  }
+
+#pragma acc kernels copyin(a[0:N]) copyout(b[0:N])
+  {
+int ii;
+int on_host = acc_on_device (acc_device_X);
+
+for (ii = 0; ii < N; ii++)
+  {
+	if (on_host)
+	  b[ii] = a[ii] + 2;
+	else
+	  b[ii] = a[ii];
+  }
+  }
+}
diff --git a/gcc/testsuite/c-c++-common/goacc/kernels-counter-var-redundant-load.c b/gcc/testsuite/c-c++-common/goacc/kernels-counter-var-redundant-load.c
index 84dee69..c4ffc1d 100644
--- a/gcc/testsuite/c-c++-common/goacc/kernels-counter-var-redundant-load.c
+++ b/gcc/testsuite/c-c++-common/goacc/kernels-counter-var-redundant-load.c
@@ -1,5 +1,5 @@
 /* { dg-additional-options "-O2" } */
-/* { dg-additional-options "-fdump-tree-dom_oacc_kernels" } */
+/* { dg-additional-options "-fdump-tree-dom_oacc_kernels3" } */
 
 #include 
 
@@ -28,7 +28,9 @@ foo (unsigned int *c)
_15 = .omp_data_i_10->c;
c.1_16 = *_15;
 
-   Check that there's only one load from anonymous ssa-name (which we assume to
-   be the one to read c), and that there's no such load for ii.  */
+   Check that there are two loads from anonymous ssa-names, which we assume to
+   be:
+   - the one to read c
+   - the one to read ii after the kernels region.  */
 
-/* { dg-final { scan-tree-dump-times "(?n)\\*_\[0-9\]\[0-9\]*;$" 1 "dom_oacc_kernels" } } */
+/* { dg-final { scan-tree-dump-times "(?n)\\*_\[0-9\]\[0-9\]*;$" 2 "dom_oacc_kernels3" } } */
diff --git a/gcc/tree-ssa-dom.c b/gcc/tree-ssa-dom.c
index c7dc7b0..87f9daa 100644
--- a/gcc/tree-ssa-dom.c
+++ b/gcc/tree-ssa-dom.c
@@ -788,6 +788,7 @@ public:
   {}
 
   /* opt_pass methods: */
+  opt_pass * clone () { return new pass_dominator_oacc_kernels (m_ctxt); }
   virtual bool gate (function *) { return true; }
 
  private:
-- 
1.9.1

Re: [patch 5/6] scalar-storage-order merge: rest

2015-10-13 Thread Jeff Law


On 10/06/2015 05:05 AM, Eric Botcazou wrote:

This is the rest of the implementation.

* asan.c (instrument_derefs): Adjust call to get_inner_reference.
* builtins.c (get_object_alignment_2): Likewise.
* cfgexpand.c (expand_debug_expr): Adjust call to get_inner_reference
and get_ref_base_and_extent.
* dbxout.c (dbxout_expand_expr): Likewise.
* dwarf2out.c (add_var_loc_to_decl): Likewise.
(loc_list_for_address_of_addr_expr_of_indirect_ref): Likewise.
(loc_list_from_tree): Likewise.
(fortran_common): Likewise.
* gimple-fold.c (gimple_fold_builtin_memory_op): Adjust calls to
get_ref_base_and_extent.
(get_base_constructor): Likewise.
(fold_const_aggregate_ref_1): Likewise.
* gimple-laddress.c (pass_laddress::execute): Adjust call to
get_inner_reference.
* gimple-ssa-strength-reduction.c (slsr_process_ref): Adjust call to
get_inner_reference and bail out on reverse storage order.
* ifcvt.c (noce_emit_move_insn): Adjust calls to store_bit_field.
* ipa-cp.c (ipa_get_jf_ancestor_result): Adjust call to
build_ref_for_offset.
* ipa-polymorphic-call.c (set_by_invariant): Adjust call to
get_ref_base_and_extent.
(ipa_polymorphic_call_context): Likewise.
(extr_type_from_vtbl_ptr_store): Likewise.
(check_stmt_for_type_change): Likewise.
(get_dynamic_type): Likewise.
* ipa-prop.c (ipa_load_from_parm_agg_1): Adjust call to
get_ref_base_and_extent.
(compute_complex_assign_jump_func): Likewise.
(get_ancestor_addr_info): Likewise.
(compute_known_type_jump_func): Likewise.
(determine_known_aggregate_parts): Likewise.
(ipa_get_adjustment_candidate): Likewise.
(ipa_modify_call_arguments): Set REF_REVERSE_STORAGE_ORDER on
MEM_REF.
* ipa-prop.h (ipa_parm_adjustment): Add REVERSE field.
(build_ref_for_offset): Adjust prototype.
* simplify-rtx.c (delegitimize_mem_from_attrs): Adjust call to
get_inner_reference.
* tree-affine.c (tree_to_aff_combination): Adjust call to
get_inner_reference.
(get_inner_reference_aff): Likewise.
* tree-data-ref.c (split_constant_offset_1): Likewise.
(dr_analyze_innermost): Likewise.  Bail out if reverse storage order.
* tree-scalar-evolution.c (interpret_rhs_expr): Adjust call to
get_inner_reference.
* tree-sra.c (struct access): Add REVERSE and move WRITE around.
(dump_access): Print new fields.
(create_access): Adjust call to get_ref_base_and_extent and set the
REVERSE flag according to the result.
(completely_scalarize_record): Set the REVERSE flag.
(scalarize_elem): Add REVERSE parameter.
(build_access_from_expr_1): Preserve storage order barriers.
(build_accesses_from_assign): Likewise.
(build_ref_for_offset): Add REVERSE parameter and set the
REF_REVERSE_STORAGE_ORDER flag accordingly.
(build_ref_for_model): Adjust call to build_ref_for_offset and clear
the REF_REVERSE_STORAGE_ORDER flag if there are components.
(analyze_access_subtree): Likewise.
(create_artificial_child_access): Set the REVERSE flag.
(get_access_for_expr): Adjust call to get_ref_base_and_extent.
(turn_representatives_into_adjustments): Propagate REVERSE flag.
(ipa_sra_check_caller): Adjust call to get_inner_reference.
* tree-ssa-alias.c (ao_ref_base): Adjust call to
get_ref_base_and_extent.
(aliasing_component_refs_p): Likewise.
(stmt_kills_ref_p_1): Likewise.
* tree-ssa-dce.c (mark_aliased_reaching_defs_necessary_1): Likewise.
* tree-ssa-loop-ivopts.c (may_be_nonaddressable_p) : New.
Return true if reverse storage order.
: Likewise.
: Likewise.
: Likewise.
: Likewise.
(split_address_cost): Likewise.  Bail out if reverse storage order.
* tree-ssa-math-opts.c (find_bswap_or_nop_load): Adjust call to
get_inner_reference.  Bail out if reverse storage order.
(bswap_replace): Adjust call to get_inner_reference.
* tree-ssa-pre.c (create_component_ref_by_pieces_1) : Set
the REF_REVERSE_STORAGE_ORDER flag.
: Likewise.
* tree-ssa-sccvn.c (vn_reference_eq): Return false on storage order
barriers.
(copy_reference_ops_from_ref) : Set REVERSE field according
to the REF_REVERSE_STORAGE_ORDER flag.
: Likewise.
: Set it for storage order barriers.
(contains_storage_order_barrier_p): New predicate.
(vn_reference_lookup_3): Adjust calls to get_ref_base_and_extent.
Punt on storage order barriers if necessary.
* tree-ssa-sccvn.h (struct vn_reference_op_struct): Add REVERSE.
* tree-ssa-structalias.c

[PATCHv2, ARM, libgcc] New aeabi_idiv function for armv6-m

2015-10-13 Thread Andre Vieira

This patch ports the aeabi_idiv routine from Linaro Cortex-Strings 
(https://git.linaro.org/toolchain/cortex-strings.git), which was 
contributed by ARM under Free BSD license.


The new aeabi_idiv routine is used to replace the one in 
libgcc/config/arm/lib1funcs.S. This replacement happens within the 
Thumb1 wrapper. The new routine is under LGPLv3 license.


The main advantage of this version is that it can improve the 
performance of the aeabi_idiv function for Thumb1. This solution will 
also increase the code size. So it will only be used if 
__OPTIMIZE_SIZE__ is not defined.


Make check passed for armv6-m.

libgcc/ChangeLog:
2015-08-10  Hale Wang  
Andre Vieira  

  * config/arm/lib1funcs.S: Add new wrapper.
From 832a3d6af6f06399f70b5a4ac3727d55960c93b7 Mon Sep 17 00:00:00 2001
From: Andre Simoes Dias Vieira 
Date: Fri, 21 Aug 2015 14:23:28 +0100
Subject: [PATCH] new wrapper idivmod

---
 libgcc/config/arm/lib1funcs.S | 250 --
 1 file changed, 217 insertions(+), 33 deletions(-)

diff --git a/libgcc/config/arm/lib1funcs.S b/libgcc/config/arm/lib1funcs.S
index 252efcbd5385cc58a5ce1e48c6816d36a6f4c797..c9e544114590da8cde88382bea0f67206e593816 100644
--- a/libgcc/config/arm/lib1funcs.S
+++ b/libgcc/config/arm/lib1funcs.S
@@ -306,34 +306,12 @@ LSYM(Lend_fde):
 #ifdef __ARM_EABI__
 .macro THUMB_LDIV0 name signed
 #if defined(__ARM_ARCH_6M__)
-	.ifc \signed, unsigned
-	cmp	r0, #0
-	beq	1f
-	mov	r0, #0
-	mvn	r0, r0		@ 0x
-1:
-	.else
-	cmp	r0, #0
-	beq	2f
-	blt	3f
+
+	push	{r0, lr}
 	mov	r0, #0
-	mvn	r0, r0
-	lsr	r0, r0, #1	@ 0x7fff
-	b	2f
-3:	mov	r0, #0x80
-	lsl	r0, r0, #24	@ 0x8000
-2:
-	.endif
-	push	{r0, r1, r2}
-	ldr	r0, 4f
-	adr	r1, 4f
-	add	r0, r1
-	str	r0, [sp, #8]
-	@ We know we are not on armv4t, so pop pc is safe.
-	pop	{r0, r1, pc}
-	.align	2
-4:
-	.word	__aeabi_idiv0 - 4b
+	bl	SYM(__aeabi_idiv0)
+	pop	{r1, pc}
+
 #elif defined(__thumb2__)
 	.syntax unified
 	.ifc \signed, unsigned
@@ -945,7 +923,170 @@ LSYM(Lover7):
 	add	dividend, work
   .endif
 LSYM(Lgot_result):
-.endm	
+.endm
+
+#if defined(__prefer_thumb__) && !defined(__OPTIMIZE_SIZE__)
+/* If performance is preferred, the following functions are provided.  */
+
+/* Branch to div(n), and jump to label if curbit is lo than divisior.  */
+.macro BranchToDiv n, label
+	lsr	curbit, dividend, \n
+	cmp	curbit, divisor
+	blo	\label
+.endm
+
+/* Body of div(n).  Shift the divisor in n bits and compare the divisor
+   and dividend.  Update the dividend as the substruction result.  */
+.macro DoDiv n
+	lsr	curbit, dividend, \n
+	cmp	curbit, divisor
+	bcc	1f
+	lsl	curbit, divisor, \n
+	sub	dividend, dividend, curbit
+
+1:	adc	result, result
+.endm
+
+/* The body of division with positive divisor.  Unless the divisor is very
+   big, shift it up in multiples of four bits, since this is the amount of
+   unwinding in the main division loop.  Continue shifting until the divisor
+   is larger than the dividend.  */
+.macro THUMB1_Div_Positive
+	mov	result, #0
+	BranchToDiv #1, LSYM(Lthumb1_div1)
+	BranchToDiv #4, LSYM(Lthumb1_div4)
+	BranchToDiv #8, LSYM(Lthumb1_div8)
+	BranchToDiv #12, LSYM(Lthumb1_div12)
+	BranchToDiv #16, LSYM(Lthumb1_div16)
+LSYM(Lthumb1_div_large_positive):
+	mov	result, #0xff
+	lsl	divisor, divisor, #8
+	rev	result, result
+	lsr	curbit, dividend, #16
+	cmp	curbit, divisor
+	blo	1f
+	asr	result, #8
+	lsl	divisor, divisor, #8
+	beq	LSYM(Ldivbyzero_waypoint)
+
+1:	lsr	curbit, dividend, #12
+	cmp	curbit, divisor
+	blo	LSYM(Lthumb1_div12)
+	b	LSYM(Lthumb1_div16)
+LSYM(Lthumb1_div_loop):
+	lsr	divisor, divisor, #8
+LSYM(Lthumb1_div16):
+	Dodiv	#15
+	Dodiv	#14
+	Dodiv	#13
+	Dodiv	#12
+LSYM(Lthumb1_div12):
+	Dodiv	#11
+	Dodiv	#10
+	Dodiv	#9
+	Dodiv	#8
+	bcs	LSYM(Lthumb1_div_loop)
+LSYM(Lthumb1_div8):
+	Dodiv	#7
+	Dodiv	#6
+	Dodiv	#5
+LSYM(Lthumb1_div5):
+	Dodiv	#4
+LSYM(Lthumb1_div4):
+	Dodiv	#3
+LSYM(Lthumb1_div3):
+	Dodiv	#2
+LSYM(Lthumb1_div2):
+	Dodiv	#1
+LSYM(Lthumb1_div1):
+	sub	divisor, dividend, divisor
+	bcs	1f
+	cpy	divisor, dividend
+
+1:	adc	result, result
+	cpy	dividend, result
+	RET
+
+LSYM(Ldivbyzero_waypoint):
+	b	LSYM(Ldiv0)
+.endm
+
+/* The body of division with negative divisor.  Similar with
+   THUMB1_Div_Positive except that the shift steps are in multiples
+   of six bits.  */
+.macro THUMB1_Div_Negative
+	lsr	result, divisor, #31
+	beq	1f
+	neg	divisor, divisor
+
+1:	asr	curbit, dividend, #32
+	bcc	2f
+	neg	dividend, dividend
+
+2:	eor	curbit, result
+	mov	result, #0
+	cpy	ip, curbit
+	BranchToDiv #4, LSYM(Lthumb1_div_negative4)
+	BranchToDiv #8, LSYM(Lthumb1_div_negative8)
+LSYM(Lthumb1_div_large):
+	mov	result, #0xfc
+	lsl	divisor, divisor, #6
+	rev	result, result
+	lsr	curbit, dividend, #8
+	cmp	curbit, divisor
+	blo	LSYM(Lthumb1_div_negative8)
+
+	lsl	divisor, divisor, #6
+	asr	result, result, #6
+	cmp	curbit, divisor
+	blo	LSYM(Lthumb1_div_negative8)
+
+	lsl	divisor,

Re: [patch 4/6] scalar-storage-order merge: bulk

2015-10-13 Thread Jeff Law


On 10/06/2015 05:04 AM, Eric Botcazou wrote:

This is the bulk of the implementation.

* calls.c (store_unaligned_arguments_into_pseudos): Adjust calls to
extract_bit_field and store_bit_field.
(initialize_argument_information): Adjust call to store_expr.
(load_register_parameters): Adjust call to extract_bit_field.
* expmed.c (check_reverse_storage_order_support): New function.
(check_reverse_float_storage_order_support): Likewise.
(flip_storage_order): Likewise.
(store_bit_field_1): Add REVERSE parameter.  Flip the storage order
of the value if it is true.  Pass REVERSE to recursive call after
adjusting the target offset.
Do not use extraction or movstrict instruction if REVERSE is true.
Pass REVERSE to store_fixed_bit_field.
(store_bit_field): Add REVERSE parameter and pass to it to above.
(store_fixed_bit_field): Add REVERSE parameter and pass to it to
store_split_bit_field and store_fixed_bit_field_1.
(store_fixed_bit_field_1):  Add REVERSE parameter.  Flip the storage
order of the value if it is true and adjust the target offset.
(store_split_bit_field): Add REVERSE parameter and pass it to
store_fixed_bit_field.  Adjust the target offset if it is true.
(extract_bit_field_1): Add REVERSE parameter.  Flip the storage order
of the value if it is true.  Pass REVERSE to recursive call after
adjusting the target offset.
Do not use extraction or subreg instruction if REVERSE is true.
Pass REVERSE to extract_fixed_bit_field.
(extract_bit_field): Add REVERSE parameter and pass to it to above.
(extract_fixed_bit_field): Add REVERSE parameter and pass to it to
extract_split_bit_field and extract_fixed_bit_field_1.
(extract_fixed_bit_field_1): Add REVERSE parameter.  Flip the storage
order of the value if it is true and adjust the target offset.
(extract_split_bit_field): Add REVERSE parameter and pass it to
extract_fixed_bit_field.  Adjust the target offset if it is true.
* expmed.h (flip_storage_order): Declare.
(store_bit_field): Adjust prototype.
(extract_bit_field): Likewise.
* expr.c (emit_group_load_1): Adjust calls to extract_bit_field.
(emit_group_store): Adjust call to store_bit_field.
(copy_blkmode_from_reg): Likewise.
(copy_blkmode_to_reg): Likewise.
(write_complex_part): Likewise.
(read_complex_part): Likewise.
(optimize_bitfield_assignment_op): Add REVERSE parameter.  Assert
that it isn't true if the target is a register.
: If it is, do not optimize unless bitsize is equal to 1,
and flip the storage order of the value.
: Flip the storage order of the value.
(get_bit_range): Adjust call to get_inner_reference.
(expand_assignment): Adjust calls to get_inner_reference, store_expr,
optimize_bitfield_assignment_op and store_field.  Handle MEM_EXPRs
with reverse storage order.
(store_expr_with_bounds): Add REVERSE parameter and pass it to
recursive calls and call to store_bit_field.  Force the value into a
register if it is true and then flip the storage order of the value.
(store_expr): Add REVERSE parameter and pass it to above.
(categorize_ctor_elements_1): Adjust call to
initializer_constant_valid_p.
(store_constructor_field): Add REVERSE parameter and pass it to
recursive calls and call to store_field.
(store_constructor): Add REVERSE parameter and pass it to calls to
store_constructor_field and store_expr.  Set it to true for an
aggregate type with TYPE_REVERSE_STORAGE_ORDER.
(store_field): Add REVERSE parameter and pass it to recursive calls
and calls to store_expr and store_bit_field.  Temporarily flip the
storage order of the value with record type and integral mode and
adjust the shift if it is true.
(get_inner_reference): Add PREVERSEP parameter and set it to true
upon encoutering a reference with reverse storage order.
(expand_expr_addr_expr_1): Adjust call to get_inner_reference.
(expand_constructor): Adjust call to store_constructor.
(expand_expr_real_2) : Pass TYPE_REVERSE_STORAGE_ORDER
of the union type to store_expr in the MEM case and assert that it
isn't set in the REG case.  Adjust call to store_field.
(expand_expr_real_1) : Handle reverse storage order.
: Add REVERSEP variable and adjust calls to
get_inner_reference and extract_bit_field. Temporarily flip the
storage order of the value with record type and integral mode and
adjust the shift if it is true.  Flip the storage order of the value
at the end if it is true.
: Add REVERSEP variable and adjust call to

Re: Do not use TYPE_CANONICAL in useless_type_conversion

2015-10-13 Thread Alexandre Oliva

On Oct 13, 2015, Eric Botcazou  wrote:

> Note that this is PR middle-end/67912.

Thanks.  I added this piece of information to the ChangeLog entry, and
checked the patch in.

-- 
Alexandre Oliva, freedom fighterhttp://FSFLA.org/~lxoliva/
You must be the change you wish to see in the world. -- Gandhi
Be Free! -- http://FSFLA.org/   FSF Latin America board member
Free Software Evangelist|Red Hat Brasil GNU Toolchain Engineer

[PATCH] c++/67942 - diagnose placement new buffer overflow

2015-10-13 Thread Martin Sebor


C++ placement new expression is susceptible to buffer overflow flaws
(see [1]).  In many such cases GCC has sufficient information to
detect and diagnose such defects. The attached patch is a starting
point for this feature.  It lets GCC diagnose basic cases of buffer
overflows when both the size of the buffer and the type being
constructed are constant expressions.  A more sophisticated
implementation would try to detect additional cases in a manner
similar to _FORTIFY_SOURCE.

Besides buffer overflow, placement new can also be misused to
construct objects in unaligned storage (also discussed in the paper
below).  I leave diagnosing such cases and improving the detection
of buffer overflows via a mechanism like Object Size Checking for
a future patch.

Tested on x86_64 with no regressions.

Martin

[1] A New Class of Buffer Overflow Attacks, Kundu, A., Bertino, E.,
31st International Conference on Distributed Computing Systems (ICDCS),
2011 http://ieeexplore.ieee.org/stamp/stamp.jsp?tp==5961725

gcc ChangeLog
2015-10-12  Martin Sebor  

	PR c++/67942
* invoke.texi (-Wplacement-new): Document new option.
	* gcc/testsuite/g++.dg/warn/Wplacement-new-size.C: New test.

gcc/c-family ChangeLog
2015-10-12  Martin Sebor  

	PR c++/67942
* c.opt (-Wplacement-new): New option.

gcc/cp ChangeLog
2015-10-12  Martin Sebor  

	PR c++/67942
	* cp/init.c (warn_placement_new_too_small): New function.
	(build_new_1): Call it.

iff --git a/gcc/c-family/c.opt b/gcc/c-family/c.opt
index 47ba070..5e9d7a3 100644
--- a/gcc/c-family/c.opt
+++ b/gcc/c-family/c.opt
@@ -760,6 +760,10 @@ Wprotocol
 ObjC ObjC++ Var(warn_protocol) Init(1) Warning
 Warn if inherited methods are unimplemented

+Wplacement-new
+C++ Var(warn_placement_new) Init(1) Warning
+Warn for placement new expressions with undefined behavior
+
 Wredundant-decls
 C ObjC C++ ObjC++ Var(warn_redundant_decls) Warning
 Warn about multiple declarations of the same object
diff --git a/gcc/cp/init.c b/gcc/cp/init.c
index 1ed8f6c..9d23fea 100644
--- a/gcc/cp/init.c
+++ b/gcc/cp/init.c
@@ -2269,6 +2269,183 @@ throw_bad_array_new_length (void)
   return build_cxx_call (fn, 0, NULL, tf_warning_or_error);
 }

+/* Attempt to verify that the argument, OPER, of a placement new expression
+   refers to an object sufficiently large for an object of TYPE or an array
+   of NELTS of such objects when NELTS is non-null, and issue a warning when
+   it does not.  SIZE specifies the size needed to construct the object or
+   array and captures the result of NELTS * sizeof (TYPE). (SIZE could, in
+   theory, be greater when the array under construction requires a cookie
+   to store NELTS, but GCC's placement new does not store the cookie.  */
+static void
+warn_placement_new_too_small (tree type, tree nelts, tree size, tree oper)
+{
+  const_tree orig_oper = oper;
+
+  /* The number of bytes to add or subtract from the size of the provided
+ buffer based on an offset into an array or an array element reference.  */
+  HOST_WIDE_INT adjust = 0;
+  bool addr_expr = false;
+  bool use_obj_size = false;
+
+  while (TREE_CODE (oper) == NOP_EXPR)
+oper = TREE_OPERAND (oper, 0);
+
+  /* Using a function argument or a (non-array) variable as an argument
+ to placement new is not checked since it's unknownwhat it might
+ point to.  */
+  if (TREE_CODE (oper) == PARM_DECL
+  || TREE_CODE (oper) == VAR_DECL
+  || TREE_CODE (oper) == COMPONENT_REF)
+return;
+
+  /* Evaluate any constant expressions.  */
+  size = fold_non_dependent_expr (size);
+
+  /* Handle the common case of array + offset expression when the offset
+ is a constant.  */
+  if (TREE_CODE (oper) == POINTER_PLUS_EXPR)
+{
+  /* If the offset is comple-time constant, use it to compute a more
+	 accurate estimate of the size of the buffer.  Otherwise, use
+	 the size of the entire array as an optimistic estimate (this
+	 may lead to false negatives).  */
+  const_tree adj = TREE_OPERAND (oper, 1);
+  if (CONSTANT_CLASS_P (adj))
+	adjust += (HOST_WIDE_INT)tree_to_uhwi (adj);
+  else
+	use_obj_size = true;
+
+  oper = TREE_OPERAND (oper, 0);
+
+  while (TREE_CODE (oper) == NOP_EXPR)
+	oper = TREE_OPERAND (oper, 0);
+}
+
+  if (TREE_CODE (oper) == TARGET_EXPR)
+oper = TREE_OPERAND (oper, 1);
+  else if (TREE_CODE (oper) == ADDR_EXPR) {
+addr_expr = true;
+oper = TREE_OPERAND (oper, 0);
+  }
+
+  while (TREE_CODE (oper) == NOP_EXPR)
+oper = TREE_OPERAND (oper, 0);
+
+  if (TREE_CODE (oper) == ARRAY_REF)
+{
+  // fold_array_ref (oper);
+
+  /* Similar to the offset computed above, see if the array index
+	 is a compile-time constant.  If so, and unless the offset was
+	 not a compile-time constant, use the index to determine the
+	 size of the buffer.  Otherwise, use the entire array as
+	 an optimistic estimate of the size.  */
+  const_tree adj = TREE_OPERAND (oper, 1);
+

[PATCH] Optimize const1 * copysign (const2, y) in reassoc (PR tree-optimization/67815)

2015-10-13 Thread Marek Polacek

This patch implements the copysign optimization for reassoc I promised
I'd look into.  I.e.,

CST1 * copysign (CST2, y) -> copysign (CST1 * CST2, y) if CST1 > 0
CST1 * copysign (CST2, y) -> -copysign (CST1 * CST2, y) if CST1 < 0

After getting familiar with reassoc a bit this wasn't that hard.  But
I'm hopeless when it comes to floating-point stuff, so I'd appreciate
if you could glance over the tests.  The reassoc-40.c should address
Joseph's comment in the audit trail (with -fno-rounding-math the
optimization would take place).

For 0.0 * copysign (cst, x), the result is folded into 0.0 way before
reassoc, so we probably don't have to pay attention to this case.

Bootstrapped/regtested on x86_64-linux, ok for trunk?

2015-10-13  Marek Polacek  

PR tree-optimization/67815
* tree-ssa-reassoc.c (attempt_builtin_copysign): New function.
(reassociate_bb): Call it.

* gcc.dg/tree-ssa/reassoc-39.c: New test.
* gcc.dg/tree-ssa/reassoc-40.c: New test.

diff --git gcc/testsuite/gcc.dg/tree-ssa/reassoc-39.c 
gcc/testsuite/gcc.dg/tree-ssa/reassoc-39.c
index e69de29..589d06b 100644
--- gcc/testsuite/gcc.dg/tree-ssa/reassoc-39.c
+++ gcc/testsuite/gcc.dg/tree-ssa/reassoc-39.c
@@ -0,0 +1,41 @@
+/* PR tree-optimization/67815 */
+/* { dg-do compile } */
+/* { dg-options "-Ofast -fdump-tree-reassoc1-details" } */
+
+float
+f0 (float x)
+{
+  return 7.5 * __builtin_copysignf (2.0, x);
+}
+
+float
+f1 (float x)
+{
+  return -7.5 * __builtin_copysignf (2.0, x);
+}
+
+double
+f2 (double x, double y)
+{
+  return x * ((1.0/12) * __builtin_copysign (1.0, y));
+}
+
+double
+f3 (double x, double y)
+{
+  return (x * (-1.0/12)) * __builtin_copysign (1.0, y);
+}
+
+double
+f4 (double x, double y, double z)
+{
+  return (x * z) * ((1.0/12) * __builtin_copysign (4.0, y));
+}
+
+double
+f5 (double x, double y, double z)
+{
+  return (x * (-1.0/12)) * z * __builtin_copysign (2.0, y);
+}
+
+/* { dg-final { scan-tree-dump-times "Optimizing copysign" 6 "reassoc1"} }*/
diff --git gcc/testsuite/gcc.dg/tree-ssa/reassoc-40.c 
gcc/testsuite/gcc.dg/tree-ssa/reassoc-40.c
index e69de29..d65bcc1b 100644
--- gcc/testsuite/gcc.dg/tree-ssa/reassoc-40.c
+++ gcc/testsuite/gcc.dg/tree-ssa/reassoc-40.c
@@ -0,0 +1,21 @@
+/* PR tree-optimization/67815 */
+/* { dg-do compile } */
+/* { dg-options "-Ofast -frounding-math -fdump-tree-reassoc1-details" } */
+
+/* Test that the copysign reassoc optimization doesn't fire for
+   -frounding-math (i.e. HONOR_SIGN_DEPENDENT_ROUNDING) if the multiplication
+   is inexact.  */
+
+double
+f1 (double y)
+{
+  return (1.2 * __builtin_copysign (1.1, y));
+}
+
+double
+f2 (double y)
+{
+  return (-1.2 * __builtin_copysign (1.1, y));
+}
+
+/* { dg-final { scan-tree-dump-not "Optimizing copysign" "reassoc1" } } */
diff --git gcc/tree-ssa-reassoc.c gcc/tree-ssa-reassoc.c
index 879722e..b8897b7 100644
--- gcc/tree-ssa-reassoc.c
+++ gcc/tree-ssa-reassoc.c
@@ -4622,6 +4622,95 @@ attempt_builtin_powi (gimple *stmt, vec 
*ops)
   return result;
 }
 
+/* Attempt to optimize
+   CST1 * copysign (CST2, y) -> copysign (CST1 * CST2, y) if CST1 > 0, or
+   CST1 * copysign (CST2, y) -> -copysign (CST1 * CST2, y) if CST1 < 0.  */
+
+static void
+attempt_builtin_copysign (vec *ops)
+{
+  operand_entry *oe;
+  unsigned int i;
+  unsigned int length = ops->length ();
+  tree cst1 = ops->last ()->op;
+
+  if (length == 1 || TREE_CODE (cst1) != REAL_CST)
+return;
+
+  FOR_EACH_VEC_ELT (*ops, i, oe)
+{
+  if (TREE_CODE (oe->op) == SSA_NAME)
+   {
+ gimple *def_stmt = SSA_NAME_DEF_STMT (oe->op);
+ if (is_gimple_call (def_stmt))
+   {
+ tree fndecl = gimple_call_fndecl (def_stmt);
+ tree cst2;
+ switch (DECL_FUNCTION_CODE (fndecl))
+   {
+   CASE_FLT_FN (BUILT_IN_COPYSIGN):
+ cst2 = gimple_call_arg (def_stmt, 0);
+ /* The first argument of copysign must be a constant,
+otherwise there's nothing to do.  */
+ if (TREE_CODE (cst2) == REAL_CST)
+   {
+ tree mul = const_binop (MULT_EXPR, TREE_TYPE (cst1),
+ cst1, cst2);
+ /* If we couldn't fold to a single constant, skip it.  */
+ if (mul == NULL_TREE)
+   break;
+ /* We're going to replace the copysign argument with
+the multiplication product.  Remove the constant.  */
+ ops->pop ();
+ gimple_call_set_arg (def_stmt, 0, mul);
+ bool cst1_neg = real_isneg (TREE_REAL_CST_PTR (cst1));
+ /* Handle the CST1 < 0 case -- negate the result.  */
+ if (cst1_neg)
+   {
+ tree lhs = gimple_call_lhs (def_stmt);
+ tree negrhs = make_ssa_name

Re: [patch 0/6] scalar-storage-order merge (2)

2015-10-13 Thread Eric Botcazou

> My main question about this series is - how generally useful do you
> expect it to be? I know of some different projects that would like
> bi-endian capability, but it looks like this series implements something
> that is a little too limited to be of use in these cases.

AdaCore has customers who have been using it for a few years.  With the inline 
pragma and either the configuration pragma (Ada) or the switch (C/C++), you 
can use it without much code rewriting.

> It looks like it comes with a nontrivial maintenance cost.

Nontrivial but manageable IMO and the heavily modified parts (mostly the RTL 
expander) are "cold" these days.  I suspect that less "limited" versions would 
be far more intrusive and less manageable.

Of course I would do the maintenance (I have been doing it for a few years at 
AdaCore), except for the C++ front-end that I don't know at all; that's why 
I'm OK to drop the C++ support for now.

-- 
Eric Botcazou

1 2 >

1 - 100 of 120 matches

Mail list logo