[PATCH] Don't lower VEC_PERM_EXPR if it can be expanded using vec_shr optab (PR target/68483)

2015-11-23 Thread Jakub Jelinek
Hi!

The patches that removed VEC_RSHIFT_EXPR regressed the first of these
testcases on i?86/-msse2, because can_vec_perm_p returns false for that,
and indeed as can_vec_perm_p is given only the mode and mask indices,
there is nothing it can do about it.  The former VEC_RSHIFT_EXPR
is a special VEC_PERM_EXPR with zero (bitwise, so not -0.0) as second
argument and we can use vec_shr in that case.  The expander knows that, but
veclower hasn't been taught about that, which is what this patch does.

The patch also fixes up the shift_amt_for_vec_perm_mask function,
if the first index is >= nelt, then it certainly is not a vector shift, but
all zeros result (we should have folded it), plus when first is < nelt,
then it doesn't make sense to mask the result, even for first == nelt - 1
first + nelt - 1 is <= 2 * nelt - 1.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk/5.3?

2015-11-23  Jakub Jelinek  

PR target/68483
* tree-vect-generic.c (lower_vec_perm): If VEC_PERM_EXPR
is valid vec_shr pattern, don't lower it even if can_vec_perm_p
returns false.
* optabs.c (shift_amt_for_vec_perm_mask): Return NULL_RTX
whenever first is nelt or above.  Don't mask expected with
2 * nelt - 1.

* gcc.target/i386/pr68483-1.c: New test.
* gcc.target/i386/pr68483-2.c: New test.

--- gcc/tree-vect-generic.c.jj  2015-11-23 13:29:41.959236201 +0100
+++ gcc/tree-vect-generic.c 2015-11-23 14:13:10.378094173 +0100
@@ -1272,6 +1272,30 @@ lower_vec_perm (gimple_stmt_iterator *gs
  update_stmt (stmt);
  return;
}
+  /* Also detect vec_shr pattern - VEC_PERM_EXPR with zero
+vector as VEC1 and a right element shift MASK.  */
+  if (optab_handler (vec_shr_optab, TYPE_MODE (vect_type))
+ != CODE_FOR_nothing
+ && TREE_CODE (vec1) == VECTOR_CST
+ && initializer_zerop (vec1)
+ && sel_int[0]
+ && sel_int[0] < elements)
+   {
+ for (i = 1; i < elements; ++i)
+   {
+ unsigned int expected = i + sel_int[0];
+ /* Indices into the second vector are all equivalent.  */
+ if (MIN (elements, (unsigned) sel_int[i])
+ != MIN (elements, expected))
+   break;
+   }
+ if (i == elements)
+   {
+ gimple_assign_set_rhs3 (stmt, mask);
+ update_stmt (stmt);
+ return;
+   }
+   }
 }
   else if (can_vec_perm_p (TYPE_MODE (vect_type), true, NULL))
 return;
--- gcc/optabs.c.jj 2015-11-23 13:29:41.706239800 +0100
+++ gcc/optabs.c2015-11-23 13:33:14.857205132 +0100
@@ -5232,12 +5232,12 @@ shift_amt_for_vec_perm_mask (rtx sel)
 return NULL_RTX;
 
   first = INTVAL (CONST_VECTOR_ELT (sel, 0));
-  if (first >= 2*nelt)
+  if (first >= nelt)
 return NULL_RTX;
   for (i = 1; i < nelt; i++)
 {
   int idx = INTVAL (CONST_VECTOR_ELT (sel, i));
-  unsigned int expected = (i + first) & (2 * nelt - 1);
+  unsigned int expected = i + first;
   /* Indices into the second vector are all equivalent.  */
   if (idx < 0 || (MIN (nelt, (unsigned) idx) != MIN (nelt, expected)))
return NULL_RTX;
--- gcc/testsuite/gcc.target/i386/pr68483-1.c.jj2015-11-23 
14:27:54.213534756 +0100
+++ gcc/testsuite/gcc.target/i386/pr68483-1.c   2015-11-23 14:33:57.810362424 
+0100
@@ -0,0 +1,22 @@
+/* PR target/68483 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -msse2 -mno-sse3" } */
+
+void
+test (int *input, int *out, unsigned x1, unsigned x2)
+{
+  unsigned i, j;
+  unsigned end = x1;
+
+  for (i = j = 0; i < 1000; i++)
+{
+  int sum = 0;
+  end += x2;
+  for (; j < end; j++)
+   sum += input[j];
+  out[i] = sum;
+}
+}
+
+/* { dg-final { scan-assembler "psrldq\[^\n\r]*(8,|, 8)" { target ia32 } } } */
+/* { dg-final { scan-assembler "psrldq\[^\n\r]*(4,|, 4)" { target ia32 } } } */
--- gcc/testsuite/gcc.target/i386/pr68483-2.c.jj2015-11-23 
14:33:22.436865628 +0100
+++ gcc/testsuite/gcc.target/i386/pr68483-2.c   2015-11-23 14:34:33.716851638 
+0100
@@ -0,0 +1,15 @@
+/* PR target/68483 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse2 -mno-sse3" } */
+
+typedef int V __attribute__((vector_size (16)));
+
+void
+foo (V *a, V *b)
+{
+  V c = { 0, 0, 0, 0 };
+  V d = { 1, 2, 3, 4 };
+  *a = __builtin_shuffle (*b, c, d);
+}
+
+/* { dg-final { scan-assembler "psrldq\[^\n\r]*(4,|, 4)" } } */

Jakub


Re: [PATCH/RFC] C++ FE: expression ranges (v2)

2015-11-23 Thread Jason Merrill

On 11/23/2015 12:07 PM, Marek Polacek wrote:

On Mon, Nov 23, 2015 at 05:57:54PM +0100, Jakub Jelinek wrote:

On Mon, Nov 23, 2015 at 11:53:40AM -0500, David Malcolm wrote:

Does the following look like the kind of thing you had in mind?  (just
the tree.def part for now).   Presumably usable for both lvalues and
rvalues, where the thing it wraps is what's important.  It merely exists
to add an EXPR_LOCATION, for a usage of the wrapped thing.


Yes, but please see with Jason, Richard and perhaps others if they are ok
with that too before spending too much time in that direction.
All occurrences of it would have to be folded away during the gimplification
at latest, this shouldn't be something we use in the middle-end.


I'd expect LOCATION_EXPR be defined in c-family/c-common.def, not tree.def.
And I'd think it shouldn't survive genericizing, thus never leak into the ME.


Makes sense.

Jason




Re: [PATCH] New version of libmpx with new memmove wrapper

2015-11-23 Thread Aleksandra Tsvetkova
gcc/testsuite/ChangeLog
+2015-10-27  Tsvetkova Alexandra  
+
+ * gcc.target/i386/mpx/memmove.c: New test for __mpx_wrapper_memmove.

libmpx/ChangeLog
+2015-10-28  Tsvetkova Alexandra  
+
+ * mpxrt/Makefile.am (libmpx_la_LDFLAGS): Add -version-info option.
+ * libmpxwrap/Makefile.am (libmpx_la_LDFLAGS): Likewise + includes fixed.
+ * libmpx/Makefile.in: Regenerate.
+ * mpxrt/Makefile.in: Regenerate.
+ * libmpxwrap/Makefile.in: Regenerate.
+ * mpxrt/libtool-version: New version.
+ * libmpxwrap/libtool-version: Likewise.
+ * mpxrt/libmpx.map: Add new version and a new symbol.
+ * mpxrt/mpxrt.h: New file.
+ * mpxrt/mpxrt.c (NUM_L1_BITS): Moved to mpxrt.h.
+(REG_IP_IDX): Moved to mpxrt.h.
+(REX_PREFIX): Moved to mpxrt.h.
+(XSAVE_OFFSET_IN_FPMEM): Moved to mpxrt.h.
+(MPX_L1_SIZE): Moved to mpxrt.h.
+ * libmpxwrap/mpx_wrappers.c: Rewrite __mpx_wrapper_memmove
+ to make it faster.
+ New types: mpx_pointer for extraction of indexes from pointer
+   mpx_bt_entry represents a cell in bounds table.
+ New functions: alloc_bt for allocatinn bounds table
+   get_bt to get address of bounds table
+   copy_if_possible and copy_if_possible_from_end move elements
+   of bounds table if we can
+   move_bounds moves bounds just like memmove


All fixed except for:

>>+static inline void
>>+alloc_bt (void *ptr)
>>+{
>>+  __asm__ __volatile__ ("bndstx %%bnd0, (%0,%0)"::"r" (ptr):"%bnd0");
>>+}
>
>This should be marked as bnd_legacy.

It will not work.

> +void *
> +__mpx_wrapper_memmove (void *dst, const void *src, size_t n)
> +{
> +  if (n == 0)
> +return dst;
> +
> +  __bnd_chk_ptr_bounds (dst, n);
> +  __bnd_chk_ptr_bounds (src, n);
> +
> +  memmove (dst, src, n);
> +  move_bounds (dst, src, n);
> +  return dst;
>  }
>
> You completely remove old algorithm which should be faster on small
> sizes. __mpx_wrapper_memmove should become a dispatcher between old
> and new implementations depending on target (32-bit or 64-bit) and N.
> Since old version performs both data and bounds copy, BD check should
> be moved into __mpx_wrapper_memmove to never call
> it when MPX is disabled.

Even though the old algorithm is faster on small sizes, it should not be used
with the new one because the new one supports unaligned pointers and the
old one does not. Different behavior may cause more problems.

Thanks,
Alexandra.
diff --git a/gcc/testsuite/gcc.target/i386/mpx/memmove.c 
b/gcc/testsuite/gcc.target/i386/mpx/memmove.c
new file mode 100755
index 000..57030a3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/mpx/memmove.c
@@ -0,0 +1,119 @@
+/* { dg-do run } */
+/* { dg-options "-fcheck-pointer-bounds -mmpx" } */
+
+
+#include 
+#include 
+#include 
+#include 
+#include "mpx-check.h"
+
+#ifdef __i386__
+/* i386 directory size is 4MB.  */
+#define MPX_NUM_L2_BITS 10
+#define MPX_NUM_IGN_BITS 2
+#else /* __i386__ */
+/* x86_64 directory size is 2GB.  */
+#define MPX_NUM_L2_BITS 17
+#define MPX_NUM_IGN_BITS 3
+#endif /* !__i386__ */
+
+
+/* bt_num_of_elems is the number of elements in bounds table.  */
+unsigned long bt_num_of_elems = (1UL << MPX_NUM_L2_BITS);
+/* Function to test MPX wrapper of memmove function.
+   src_bigger_dst determines which address is bigger, can be 0 or 1.
+   src_bt_index and dst_bt index are bt_indexes
+   from the beginning of the page.
+   bd_index_end is the bd index of the last element of src if we define
+   bd index of the first element as 0.
+   src_bt index_end is bt index of the last element of src.
+   pointers inside determines if array being copied includes pointers
+   src_align and dst_align are alignments of src and dst.
+   Arrays may contain unaligned pointers.  */
+int
+test (int src_bigger_dst, int src_bt_index, int dst_bt_index,
+  int bd_index_end, int src_bt_index_end, int pointers_inside,
+  int src_align, int dst_align)
+{
+  const int n =
+src_bt_index_end - src_bt_index + bd_index_end * bt_num_of_elems;
+  if (n < 0)
+{
+  return 0;
+}
+  const int num_of_pointers = (bd_index_end + 2) * bt_num_of_elems;
+  void **arr = 0;
+  posix_memalign ((void **) (),
+   1UL << (MPX_NUM_L2_BITS + MPX_NUM_IGN_BITS),
+   num_of_pointers * sizeof (void *));
+  void **src = arr, **dst = arr;
+  if ((src_bigger_dst) && (src_bt_index < dst_bt_index))
+src_bt_index += bt_num_of_elems;
+  if (!(src_bigger_dst) && (src_bt_index > dst_bt_index))
+dst_bt_index += bt_num_of_elems;
+  src += src_bt_index;
+  dst += dst_bt_index;
+  char *realign = (char *) src;
+  realign += src_align;
+  src = (void **) realign;
+  realign = (char *) dst;
+  realign += src_align;
+  dst = (void **) realign;
+  if (pointers_inside)
+{
+  for (int i = 0; i < n; i++)
+src[i] = __bnd_set_ptr_bounds (arr + i, i * sizeof (void *) + 1);
+}
+  memmove (dst, src, n * sizeof (void *));
+  if (pointers_inside)
+{
+

Re: [PATCH] Fix PR objc/68438 (uninitialized source ranges)

2015-11-23 Thread David Malcolm
On Mon, 2015-11-23 at 10:25 -0700, Jeff Law wrote:
> On 11/23/2015 04:13 AM, Joseph Myers wrote:
> > On Sun, 22 Nov 2015, David Malcolm wrote:
> >
> >> Is there (or could there be) a precanned dg- directive to ask if ObjC is
> >> available?
> >
> > I don't think so.  Normal practice is that each language's tests are in
> > appropriate directories for that language, with runtest never called with
> > a --tool option for that language if it wasn't built.
> Right.  Which argues that we really want to create a new test directory 
> for objc plugin tests.

Attached is a revised version of the patch which creates an
objc.dg/plugin subdirectory, and builds the plugin that way (directly
reusing the plugin src from the gcc.dg subdir).

Successfully bootstrapped on x86_64-pc-linux-gnu; adds 16
PASS results to objc.sum.

OK for trunk?

>From f09c48b2ac55b2f9b5c3688e76fb4b91c3325fbb Mon Sep 17 00:00:00 2001
From: David Malcolm 
Date: Fri, 20 Nov 2015 11:12:47 -0500
Subject: [PATCH] Fix PR objc/68438 (uninitialized source ranges)

gcc/c/ChangeLog:
	PR objc/68438
	* c-parser.c (c_parser_postfix_expression): Set up source ranges
	for various Objective-C constructs: Class.name syntax,
	@selector(), @protocol, @encode(), and [] message syntax.

gcc/testsuite/ChangeLog:
	PR objc/68438
	* objc.dg/plugin/diagnostic-test-expressions-1.m: New test file.
	* objc.dg/plugin/plugin.exp: New file, based on
	gcc.dg/plugin/plugin.exp.
---
 gcc/c/c-parser.c   | 17 +++-
 .../objc.dg/plugin/diagnostic-test-expressions-1.m | 94 ++
 gcc/testsuite/objc.dg/plugin/plugin.exp| 90 +
 3 files changed, 198 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/objc.dg/plugin/diagnostic-test-expressions-1.m
 create mode 100644 gcc/testsuite/objc.dg/plugin/plugin.exp

diff --git a/gcc/c/c-parser.c b/gcc/c/c-parser.c
index 7b10764..18e9957 100644
--- a/gcc/c/c-parser.c
+++ b/gcc/c/c-parser.c
@@ -7338,10 +7338,13 @@ c_parser_postfix_expression (c_parser *parser)
 		expr.value = error_mark_node;
 		break;
 	  }
-	component = c_parser_peek_token (parser)->value;
+	c_token *component_tok = c_parser_peek_token (parser);
+	component = component_tok->value;
+	location_t end_loc = component_tok->get_finish ();
 	c_parser_consume_token (parser);
 	expr.value = objc_build_class_component_ref (class_name, 
 			 component);
+	set_c_expr_source_range (, loc, end_loc);
 	break;
 	  }
 	default:
@@ -7816,9 +7819,11 @@ c_parser_postfix_expression (c_parser *parser)
 	}
 	  {
 	tree sel = c_parser_objc_selector_arg (parser);
+	location_t close_loc = c_parser_peek_token (parser)->location;
 	c_parser_skip_until_found (parser, CPP_CLOSE_PAREN,
    "expected %<)%>");
 	expr.value = objc_build_selector_expr (loc, sel);
+	set_c_expr_source_range (, loc, close_loc);
 	  }
 	  break;
 	case RID_AT_PROTOCOL:
@@ -7839,9 +7844,11 @@ c_parser_postfix_expression (c_parser *parser)
 	  {
 	tree id = c_parser_peek_token (parser)->value;
 	c_parser_consume_token (parser);
+	location_t close_loc = c_parser_peek_token (parser)->location;
 	c_parser_skip_until_found (parser, CPP_CLOSE_PAREN,
    "expected %<)%>");
 	expr.value = objc_build_protocol_expr (id);
+	set_c_expr_source_range (, loc, close_loc);
 	  }
 	  break;
 	case RID_AT_ENCODE:
@@ -7860,11 +7867,13 @@ c_parser_postfix_expression (c_parser *parser)
 	  c_parser_skip_until_found (parser, CPP_CLOSE_PAREN, NULL);
 	  break;
 	}
-	  c_parser_skip_until_found (parser, CPP_CLOSE_PAREN,
- "expected %<)%>");
 	  {
+	location_t close_loc = c_parser_peek_token (parser)->location;
+	c_parser_skip_until_found (parser, CPP_CLOSE_PAREN,
+ "expected %<)%>");
 	tree type = groktypename (t1, NULL, NULL);
 	expr.value = objc_build_encode_expr (type);
+	set_c_expr_source_range (, loc, close_loc);
 	  }
 	  break;
 	case RID_GENERIC:
@@ -7907,9 +7916,11 @@ c_parser_postfix_expression (c_parser *parser)
 	  c_parser_consume_token (parser);
 	  receiver = c_parser_objc_receiver (parser);
 	  args = c_parser_objc_message_args (parser);
+	  location_t close_loc = c_parser_peek_token (parser)->location;
 	  c_parser_skip_until_found (parser, CPP_CLOSE_SQUARE,
  "expected %<]%>");
 	  expr.value = objc_build_message_expr (receiver, args);
+	  set_c_expr_source_range (, loc, close_loc);
 	  break;
 	}
   /* Else fall through to report error.  */
diff --git a/gcc/testsuite/objc.dg/plugin/diagnostic-test-expressions-1.m b/gcc/testsuite/objc.dg/plugin/diagnostic-test-expressions-1.m
new file mode 100644
index 000..ed7aca3
--- /dev/null
+++ b/gcc/testsuite/objc.dg/plugin/diagnostic-test-expressions-1.m
@@ -0,0 +1,94 @@
+/* { dg-do compile } */
+/* { dg-options "-O -fdiagnostics-show-caret" } */
+
+/* This file is similar to diagnostic-test-expressions-1.c
+   (see the 

Re: [PATCH 5/6] Fix parser memory leak in cilk_simd_fn_info

2015-11-23 Thread Jeff Law

On 11/23/2015 06:48 AM, marxin wrote:

gcc/cp/ChangeLog:

2015-11-23  Martin Liska  

* parser.c (cp_parser_late_parsing_cilk_simd_fn_info):
Release tokens.
There's a vec of objects in cilk_simd_fn_info, so unless that vec is 
copied elsewhere, we definitely want to release them before we blow away 
parser->cilk_simd_fn_info.  AFAICT the vec is never copied elsewhere.  So...


OK for the trunk.

jeff




Re: [PATCH] PR c/68473: sanitize source range-printing within certain macro expansions

2015-11-23 Thread David Malcolm
On Mon, 2015-11-23 at 18:59 +0100, Bernd Schmidt wrote:
> On 11/23/2015 06:52 PM, David Malcolm wrote:
> > This patch fixes PR c/68473 by bulletproofing the new
> > diagnostic_show_locus implementation against ranges that finish before
> > they start (which can happen when using the C preprocessor), falling
> > back to simply printing a caret.
> 
> Hmm, wouldn't it be better to avoid such a situation? Can you describe a 
> bit more how exactly the macro expansion caused such a situation?

The issue is here:

 1  /* { dg-options "-fdiagnostics-show-caret -mno-fp-ret-in-387" } */
 2  
 3  extern long double fminl (long double __x, long double __y);
 4  
 5  #define TEST_EQ(FUNC) do { \
 6if ((long)FUNC##l(xl,xl) != (long)xl) \
 7  return; \
 8} while (0)
 9  
10  void
11  foo (long double xl)
12  {
13TEST_EQ (fmin); /* { dg-error "x87 register return with x87 disabled" 
} */
14  }


16  /* { dg-begin-multiline-output "" }
17 TEST_EQ (fmin);
18  ^
19 { dg-end-multiline-output "" } */
20  
21  /* { dg-begin-multiline-output "" }
22 if ((long)FUNC##l(xl,xl) != (long)xl) \
23   ^~~~
24 { dg-end-multiline-output "" } */

An error is emitted whilst expanding the macro at line 13, at
input_location.

This is at the expansion of this function call:

   fminl (xl, xl)

Normally we'd emit a source range like this for a function call:

   fminl (xl, xl)
   ^~

However, once we fully resolve locations, the "fmin" part of "fminl"
appears at line 13 here:

13TEST_EQ (fmin);
   ^~~~

giving the location of the caret, and start of the range, whereas the
rest of the the call is spelled here:

 6if ((long)FUNC##l(xl,xl) != (long)xl) \
   ~~~

where the close paren gives the end of the range.

It would be wrong to try to print the whole range (anything might be
between lines 6 and 13).

In theory we could attempt to try to handle this kind of thing by
looking at the macro expansions, and to print something like:

13TEST_EQ (fmin);
   ^~~~
 6if ((long)FUNC##l(xl,xl) != (long)xl) \
  

or whatnot, but that strikes me as error-prone at this stage.


The patch instead detects such a situation, and tries to handle things
gracefully by falling back to simply printing a caret, without any
underlines:

pr68473-1.c: In function ‘foo’:
pr68473-1.c:13:12: error: x87 register return with x87 disabled
   TEST_EQ (fmin);
^

pr68473-1.c:6:13: note: in definition of macro ‘TEST_EQ’
   if ((long)FUNC##l(xl,xl) != (long)xl) \
 ^~~~


Dave



Re: update zlib to 1.2.8

2015-11-23 Thread Joel Brobecker
> In GCC zlib is only used for libjava; for binutils and gdb it is used when
> building without --with-system-zlib.  This just updates zlib from 1.2.7 to
> 1.2.8 (released in 2013).  Applies cleanly, libjava still builds and doesn't
> show any regressions in the testsuite.  Ok to apply (even if we already are
> in stage3)?

> +2015-11-23  Matthias Klose  
> +
> +   * Imported zlib 1.2.8; merged local changes.

Should not be a problem for GDB, since we're not near branching time.

Out of curiosity, what prompted this update? Just to be in sync with
the latest? Or was there an actual bug that you hit which 1.2.8 fixes?

-- 
Joel


Re: Fix lto-symtab ICE during Ada LTO bootstrap

2015-11-23 Thread Jan Hubicka
BTW for the LTO type merging issues one could probably just drop those types
and all derivations to alias set 0. But indeed rewriting them to pointers would
be better, especially for ABI compatibility.

The Ada ICE I get is:
Continuing.
+===GNAT BUG DETECTED==+
| 6.0.0 20151122 (experimental) (x86_64-pc-linux-gnu) Assert_Failure 
atree.adb:6776|
| Error detected at system.ads:107:4   |
| Please submit a bug report; see http://gcc.gnu.org/bugs.html.|
| Use a subject line meaningful to you and us to track the bug.|
| Include the entire contents of this bug box in the report.   |
| Include the exact command that you entered.  |
| Also include sources listed below.   |
+==+

Please include these source files with error report
Note that list may not be accurate in some cases,
so please double check that the problem can still
be reproduced with the set of files listed.
Consider also -gnatd.n switch (see debug.adb).

../../gcc/ada/system.ads
../../gcc/ada/a-except.adb
../../gcc/ada/a-except.ads
../../gcc/ada/ada.ads
../../gcc/ada/s-parame.ads
../../gcc/ada/s-stalib.ads
../../gcc/ada/a-unccon.ads
../../gcc/ada/s-traent.ads
../../gcc/ada/s-excdeb.ads
../../gcc/ada/s-soflin.ads
../../gcc/ada/s-stache.ads
../../gcc/ada/s-stoele.ads

compilation abandoned

(gdb) bt
#0  atree__unchecked_access__set_flag96.part.697.lto_priv.6676 () at 
../../gcc/ada/atree.adb:6776
#1  0x01711774 in atree__unchecked_access__set_flag96 (n=, val=) at ../../gcc/ada/atree.adb:6774
#2  0x0126a95c in einfo.set_warnings_off (v=, id=0) at 
../../gcc/ada/einfo.adb:6435
#3  sem_prag.analyze_pragma () at ../../gcc/ada/sem_prag.adb:22879
#4  0x00989893 in sem.analyze (n=12466) at ../../gcc/ada/sem.adb:456
#5  0x00cac089 in sem_ch3.analyze_declarations (l=-8775) at 
../../gcc/ada/sem_ch3.adb:2323
#6  0x0134e4d5 in sem_ch7.analyze_package_specification () at 
../../gcc/ada/sem_ch7.adb:1395
#7  0x009898ab in sem.analyze (n=12078) at ../../gcc/ada/sem.adb:450
#8  0x013517d8 in sem_ch7.analyze_package_declaration (n=12875) at 
../../gcc/ada/sem_ch7.adb:1006
#9  0x00989e89 in sem.analyze (n=n@entry=12875) at 
../../gcc/ada/sem.adb:441
#10 0x00998d6d in sem_ch10.analyze_compilation_unit (n=n@entry=12067) 
at ../../gcc/ada/sem_ch10.adb:892
#11 0x00989947 in sem.analyze (n=n@entry=12067) at 
../../gcc/ada/sem.adb:174
#12 0x0099760f in sem.semantics.do_analyze () at 
../../gcc/ada/sem.adb:1337
#13 sem.semantics () at ../../gcc/ada/sem.adb:1517
#14 0x00998039 in sem_ch10.analyze_with_clause (n=n@entry=2286) at 
../../gcc/ada/sem_ch10.adb:2540
#15 0x00989a7f in sem.analyze (n=n@entry=2286) at 
../../gcc/ada/sem.adb:601
#16 0x00991e67 in sem_ch10.analyze_context (n=n@entry=2284) at 
../../gcc/ada/sem_ch10.adb:1371
#17 0x00998cb0 in sem_ch10.analyze_compilation_unit (n=n@entry=2284) at 
../../gcc/ada/sem_ch10.adb:686
#18 0x00989947 in sem.analyze (n=n@entry=2284) at 
../../gcc/ada/sem.adb:174
#19 0x0099760f in sem.semantics.do_analyze () at 
../../gcc/ada/sem.adb:1337
#20 sem.semantics () at ../../gcc/ada/sem.adb:1517
#21 0x0090e5f9 in frontend () at ../../gcc/ada/frontend.adb:408
#22 0x0146de0a in _ada_gnat1drv () at ../../gcc/ada/gnat1drv.adb:1029
#23 0x006f579e in gnat_parse_file() [clone .lto_priv.5151] () at 
../../gcc/ada/gcc-interface/misc.c:121
#24 0x016f723c in compile_file () at ../../gcc/toplev.c:464
#25 0x0068996e in do_compile () at ../../gcc/toplev.c:1951
#26 toplev::main (this=this@entry=0x7fffe850, argc=argc@entry=39, 
argv=argv@entry=0x7fffe958) at ../../gcc/toplev.c:2058
#27 0x00688e29 in main (argc=39, argv=0x7fffe958) at 
../../gcc/main.c:39

If you have any clue how to debug it further, I would be happy to try.
That atree code is real software engineering treat BTW

Honza


[RFA] [PATCH] Fix invalid redundant extension elimination for rl78 port

2015-11-23 Thread Jeff Law


The core analysis was from Nick.  Essentially:


(insn  44 (set (reg:QI r11) (mem:QI (reg:HI r20)))
(insn  45 (set (reg:QI r10) (mem:QI (reg:HI r18)))
[...]
(insn  71 (set (reg:HI r14) (zero_extend:HI (reg:QI r11)))
[...]
(insn  88 (set (reg:HI r10) (zero_extend:HI (reg:QI r10)))

  (This is on the RL78 target where HImode values occupy two hard
  registers and QImode values only one.  The bug however is generic, not
  RL78 specific).

  The REE pass transforms this into:

(insn  44 (set (reg:QI r11) (mem:QI (reg:HI r20)))
(insn  45 (set (reg:HI r10) (zero_extend:HI (mem:QI (reg:HI r18
[...]
(insn  71 (set (reg:HI r14) (zero_extend:HI (reg:QI r11)))
[...]
(insn  88 deleted)

  Note how the new set at insn 45 clobbers the value loaded by insn 44
  into r11.  Thus when we use the value in insn 71, we're using the
  wrong value.


Nick had a more complex patch which tried to determine if the additional 
hard registers were used/set.  But the implementation was flawed in that 
it assumed the use succeeded the def in the linear insn chain, which is 
an invalid assumption in general.  For this to work what we'd really 
have to do is note all the blocks through which there's a path from the 
def to the use, then check for uses/sets within all those blocks.


Given this scenario is quite rare, it doesn't seem worth the effort. 
Even with an abort in the codepath, I can't get it to trigger during 
normal x86_64 or rl78 builds.  It only triggers on the rl78 with -O1 -free.


As I mentioned in a prior message on the subject, this is only a problem 
when the source/dest of the extension are the same.  When the 
source/dest of the extension are different, we only optimize when the 
original set and extension are in the same block and we verify that all 
affected registers are not set/used between the original set and the 
extension.
Bootstrapped and regression tested on x86_64-linux-gnu.  Also tested 
execute.exp on rl78 with no regressions.


I didn't include a distinct testcase as these are covered by pr42833 and 
strct-stdarg-1.c -- but only when those are run with -O1 -free.  I can 
certainly add a -free test for those tests if folks want.


I took this opportunity to also remove a block of #if 0'd code that I 
had in place for this situation, but had been unable to trigger.  I 
prefer Nick's location for the test.


Ok for the trunk?



Jeff
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index e560746..29ed4e4 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,11 @@
+2015-11-18  Nick Clifton  
+   Jeff Law  
+
+   * ree.c (add_removable_extension): Avoid mis-optimizing cases where
+   the source/dest of the target extension require a different number of
+   hard registers.
+   (combine_set_extension): Remove #if 0 code.
+
 2015-11-20  Jim Wilson  
 
* tree-vect-data-refs.c (compare_tree): Call STRIP_NOPS.
diff --git a/gcc/ree.c b/gcc/ree.c
index b8436f2..f3b79e0 100644
--- a/gcc/ree.c
+++ b/gcc/ree.c
@@ -332,16 +332,6 @@ combine_set_extension (ext_cand *cand, rtx_insn 
*curr_insn, rtx *orig_set)
   else
 new_reg = gen_rtx_REG (cand->mode, REGNO (SET_DEST (*orig_set)));
 
-#if 0
-  /* Rethinking test.  Temporarily disabled.  */
-  /* We're going to be widening the result of DEF_INSN, ensure that doing so
- doesn't change the number of hard registers needed for the result.  */
-  if (HARD_REGNO_NREGS (REGNO (new_reg), cand->mode)
-  != HARD_REGNO_NREGS (REGNO (SET_DEST (*orig_set)),
-  GET_MODE (SET_DEST (*orig_set
-   return false;
-#endif
-
   /* Merge constants by directly moving the constant into the register under
  some conditions.  Recall that RTL constants are sign-extended.  */
   if (GET_CODE (orig_src) == CONST_INT
@@ -1080,6 +1070,18 @@ add_removable_extension (const_rtx expr, rtx_insn *insn,
  }
  }
 
+  /* Fourth, if the extended version occupies more registers than the
+original and the source of the extension is the same hard register
+as the destination of the extension, then we can not eliminate
+the extension without deep analysis, so just punt.
+
+We allow this when the registers are different because the
+code in combine_reaching_defs will handle that case correctly.  */
+  if ((HARD_REGNO_NREGS (REGNO (dest), mode)
+  != HARD_REGNO_NREGS (REGNO (reg), GET_MODE (reg)))
+ && REGNO (dest) == REGNO (reg))
+   return;
+
   /* Then add the candidate to the list and insert the reaching definitions
  into the definition map.  */
   ext_cand e = {expr, code, mode, insn};


[ptx] Fix sso tests

2015-11-23 Thread Nathan Sidwell
The gcc.dg/sso tests gratuitously fail on PTX because they use IO facilities 
that don't exist there.  This  patch changes the dumping to use the putchar 
function call (and not a macro), and not use fputs.


With this they all pass.

I'm not quite sure where the maintainer  boundaries lie for this kind of fix. 
Any objections?


nathan
2015-11-23  Nathan Sidwell  

	* gcc.dg/sso/dump.h: Force IO to be putchar function call on nvptx.

Index: gcc/testsuite/gcc.dg/sso/dump.h
===
--- gcc/testsuite/gcc.dg/sso/dump.h	(revision 230718)
+++ gcc/testsuite/gcc.dg/sso/dump.h	(working copy)
@@ -1,3 +1,9 @@
+#ifdef __nvptx__
+/* Force function call.  NVPTX's IO is extremely limited.  */
+#undef putchar
+#define putchar (putchar)
+#endif
+
 void dump (void *p, unsigned int len)
 {
   const char digits[17] = "0123456789abcdef";
@@ -14,7 +20,13 @@ void dump (void *p, unsigned int len)
 
 void put (const char s[])
 {
+#ifdef  __nvptx__
+  int i;
+  for (i = 0; s[i]; i++)
+putchar (s[i]);
+#else
   fputs (s, stdout);
+#endif
 }
 
 void new_line (void)


Re: [PATCH 1/6] Fix memory leak in cilk

2015-11-23 Thread Trevor Saunders
> diff --git a/gcc/c-family/cilk.c b/gcc/c-family/cilk.c
> index e75e20c..1167b2b 100644
> --- a/gcc/c-family/cilk.c
> +++ b/gcc/c-family/cilk.c
> @@ -844,6 +844,7 @@ gimplify_cilk_spawn (tree *spawn_p)
>   call2, build_empty_stmt (EXPR_LOCATION (call1)));
>append_to_statement_list (spawn_expr, spawn_p);
>  
> +  free (arg_array);

seems like arg_array could just be made an auto_vec, but I guess this is
fine for now and someone can hopefully remember to clean that up later.

Trev



Re: update zlib to 1.2.8

2015-11-23 Thread Matthias Klose

On 23.11.2015 19:13, Joel Brobecker wrote:

In GCC zlib is only used for libjava; for binutils and gdb it is used when
building without --with-system-zlib.  This just updates zlib from 1.2.7 to
1.2.8 (released in 2013).  Applies cleanly, libjava still builds and doesn't
show any regressions in the testsuite.  Ok to apply (even if we already are
in stage3)?



+2015-11-23  Matthias Klose  
+
+   * Imported zlib 1.2.8; merged local changes.


Should not be a problem for GDB, since we're not near branching time.

Out of curiosity, what prompted this update? Just to be in sync with
the latest? Or was there an actual bug that you hit which 1.2.8 fixes?


No, just a packaging issue with somebody mentioning a static binutils build. 
That's when I saw the outdated version.


Now updated in the GCC VCS.

Matthias



Re: [PATCH 1/2] Libsanitizer merge from upstream r253555.

2015-11-23 Thread Jakub Jelinek
On Mon, Nov 23, 2015 at 10:46:33AM +0300, Maxim Ostapenko wrote:
> Index: libsanitizer/configure.ac
> ===
> --- libsanitizer/configure.ac (revision 230597)
> +++ libsanitizer/configure.ac (working copy)
> @@ -136,6 +136,12 @@
>  esac
>  AM_CONDITIONAL(USING_MAC_INTERPOSE, $MAC_INTERPOSE)
>  
> +case "$target" in
> +  aarch64-*-linux*) tsan_aarch64=true ;;
> +  *) tsan_aarch64=false ;;
> +esac
> +AM_CONDITIONAL(TSAN_AARCH64, $tsan_aarch64)
> +

I don't understand the purpose of the above.

> Index: libsanitizer/configure.tgt
> ===
> --- libsanitizer/configure.tgt(revision 230597)
> +++ libsanitizer/configure.tgt(working copy)
> @@ -37,6 +37,8 @@
>aarch64*-*-linux*)
>   if test x$ac_cv_sizeof_void_p = x8; then
>   TSAN_SUPPORTED=yes
> + LSAN_SUPPORTED=yes
> + TSAN_TARGET_DEPENDENT_OBJECTS=tsan_rtl_aarch64.lo
>   fi
>   ;;
>x86_64-*-darwin[1]* | i?86-*-darwin[1]*)

You already have this.

> Index: libsanitizer/tsan/Makefile.am
> ===
> --- libsanitizer/tsan/Makefile.am (revision 230597)
> +++ libsanitizer/tsan/Makefile.am (working copy)
> @@ -21,6 +21,8 @@
>   tsan_interface_atomic.cc \
>   tsan_interface.cc \
>   tsan_interface_java.cc \
> + tsan_libdispatch_mac.cc \
> + tsan_malloc_mac.cc \
>   tsan_md5.cc \
>   tsan_mman.cc \
>   tsan_mutex.cc \
> @@ -28,6 +30,7 @@
>   tsan_new_delete.cc \
>   tsan_platform_linux.cc \
>   tsan_platform_mac.cc \
> + tsan_platform_posix.cc \
>   tsan_platform_windows.cc \
>   tsan_report.cc \
>   tsan_rtl.cc \
> @@ -41,7 +44,11 @@
>   tsan_sync.cc 
>  
>  libtsan_la_SOURCES = $(tsan_files)
> +if TSAN_AARCH64
> +EXTRA_libtsan_la_SOURCES = tsan_rtl_aarch64.S
> +else
>  EXTRA_libtsan_la_SOURCES = tsan_rtl_amd64.S
> +endif

And if I understand automake manual, you can list in there both
EXTRA_libtsan_la_SOURCES = tsan_rtl_amd64.S tsan_rtl_aarch64.S
unconditionally, and what object actually gets linked in is picked from the
$(TSAN_TARGET_DEPENDENT_OBJECTS) (and similarly dependencies).

Otherwise LGTM.

Jakub


Re: [Patch] S/390: Fix symbol ref alignment

2015-11-23 Thread Andreas Krebbel
On 10/23/2015 02:12 PM, Robin Dapp wrote:
> gcc/testsuite/ChangeLog:
> 
> 2015-10-23  Robin Dapp  
> 
> * gcc.target/s390/load-relative-check.c: New test to check
> generation of load relative instructions.
> 
> 
> gcc/ChangeLog:
> 
> 2015-10-23  Robin Dapp  
> 
> * config/s390/s390.h: Add new symref flags, _NOTALIGN2 etc.
> * config/s390/s390.c (s390_check_symref_alignment): Use new
> symref flags, early abort on wrong alignment
> (s390_secondary_reload): Use new symref flags.
> (s390_encode_section_info): Likewise.
> * config/s390/predicates.md: Likewise.

Applied. Thanks!

-Andreas-




Re: [PATCH, PR tree-optimization/68327] Compute vectype for live phi nodes when copmputing VF

2015-11-23 Thread Richard Biener
On Fri, Nov 20, 2015 at 4:10 PM, Ilya Enkovich  wrote:
> On 20 Nov 14:31, Ilya Enkovich wrote:
>> 2015-11-20 14:28 GMT+03:00 Richard Biener :
>> > On Wed, Nov 18, 2015 at 2:53 PM, Ilya Enkovich  
>> > wrote:
>> >> 2015-11-18 16:44 GMT+03:00 Richard Biener :
>> >>> On Wed, Nov 18, 2015 at 12:34 PM, Ilya Enkovich  
>> >>> wrote:
>>  Hi,
>> 
>>  When we compute vectypes we skip non-relevant phi nodes.  But we 
>>  process non-relevant alive statements and thus may need vectype of 
>>  non-relevant live phi node to compute mask vectype.  This patch enables 
>>  vectype computation for live phi nodes.  Botostrapped and regtested on 
>>  x86_64-unknown-linux-gnu.  OK for trunk?
>> >>>
>> >>> Hmm.  What breaks if you instead skip all !relevant stmts and not
>> >>> compute vectype for life but not relevant ones?  We won't ever
>> >>> "vectorize" !relevant ones, that is, we don't need their vector type.
>> >>
>> >> I tried it and got regression in SLP.  It expected non-null vectype
>> >> for non-releveant but live statement. Regression was in
>> >> gcc/gcc/testsuite/gfortran.fortran-torture/execute/pr43390.f90
>> >
>> > Because somebody put a vector type check before
>> >
>> >   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
>> > return false;
>> >
>> > @@ -7590,6 +7651,9 @@ vectorizable_comparison (gimple *stmt, g
>> >tree mask_type;
>> >tree mask;
>> >
>> > +  if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
>> > +return false;
>> > +
>> >if (!VECTOR_BOOLEAN_TYPE_P (vectype))
>> >  return false;
>> >
>> > @@ -7602,8 +7666,6 @@ vectorizable_comparison (gimple *stmt, g
>> >  ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
>> >
>> >gcc_assert (ncopies >= 1);
>> > -  if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
>> > -return false;
>> >
>> >if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
>> >&& !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
>> >
>> > fixes this particular fallout for me.
>>
>> I'll try it.
>
> With this fix it works fine, thanks!  Bootstrapped and regtested on 
> x86_64-unknown-linux-gnu.  OK for trunk?

Ok.

Thanks,
Richard.

> Ilya
> --
> gcc/
>
> 2015-11-20  Ilya Enkovich  
> Richard Biener  
>
> * tree-vect-loop.c (vect_determine_vectorization_factor): Don't
> compute vectype for non-relevant mask producers.
> * gcc/tree-vect-stmts.c (vectorizable_comparison): Check stmt
> relevance earlier.
>
> gcc/testsuite/
>
> 2015-11-20  Ilya Enkovich  
>
> * gcc.dg/pr68327.c: New test.
>
>
> diff --git a/gcc/testsuite/gcc.dg/pr68327.c b/gcc/testsuite/gcc.dg/pr68327.c
> new file mode 100644
> index 000..c3e6a94
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/pr68327.c
> @@ -0,0 +1,15 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3" } */
> +
> +int a, d;
> +char b, c;
> +
> +void
> +fn1 ()
> +{
> +  int i = 0;
> +  for (; i < 1; i++)
> +d = 1;
> +  for (; b; b++)
> +a = 1 && (d & b);
> +}
> diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
> index 80937ec..592372d 100644
> --- a/gcc/tree-vect-loop.c
> +++ b/gcc/tree-vect-loop.c
> @@ -439,7 +439,8 @@ vect_determine_vectorization_factor (loop_vec_info 
> loop_vinfo)
>  compute a factor.  */
>   if (TREE_CODE (scalar_type) == BOOLEAN_TYPE)
> {
> - mask_producers.safe_push (stmt_info);
> + if (STMT_VINFO_RELEVANT_P (stmt_info))
> +   mask_producers.safe_push (stmt_info);
>   bool_result = true;
>
>   if (gimple_code (stmt) == GIMPLE_ASSIGN
> diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
> index 0f64aaf..3723b26 100644
> --- a/gcc/tree-vect-stmts.c
> +++ b/gcc/tree-vect-stmts.c
> @@ -7546,6 +7546,9 @@ vectorizable_comparison (gimple *stmt, 
> gimple_stmt_iterator *gsi,
>tree mask_type;
>tree mask;
>
> +  if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
> +return false;
> +
>if (!VECTOR_BOOLEAN_TYPE_P (vectype))
>  return false;
>
> @@ -7558,9 +7561,6 @@ vectorizable_comparison (gimple *stmt, 
> gimple_stmt_iterator *gsi,
>  ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
>
>gcc_assert (ncopies >= 1);
> -  if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
> -return false;
> -
>if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
>&& !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
>&& reduc_def))


Re: update zlib to 1.2.8

2015-11-23 Thread Andrew Haley
On 23/11/15 04:37, Matthias Klose wrote:
> In GCC zlib is only used for libjava; for binutils and gdb it is used when 
> building without --with-system-zlib.  This just updates zlib from 1.2.7 to 
> 1.2.8 
> (released in 2013).  Applies cleanly, libjava still builds and doesn't show 
> any 
> regressions in the testsuite.  Ok to apply (even if we already are in stage3)?

Fine by me; GDB assent is more important.

Andrew.



Re: [PATCH] Mark by_ref mem_ref in build_receiver_ref as non-trapping

2015-11-23 Thread Richard Biener
On Mon, Nov 23, 2015 at 9:45 AM, Jakub Jelinek  wrote:
> On Sat, Nov 21, 2015 at 07:34:02PM +0100, Tom de Vries wrote:
>> Mark by_ref mem_ref in build_receiver_ref as non-trapping
>>
>> 2015-11-21  Tom de Vries  
>>
>>   * omp-low.c (build_receiver_ref): Mark by_ref mem_ref as non-trapping.
>
> This is ok.

Are you sure this is properly re-set by inlining via

  /* We cannot propagate the TREE_THIS_NOTRAP flag if we have
 remapped a parameter as the property might be valid only
 for the parameter itself.  */
  if (TREE_THIS_NOTRAP (old)
  && (!is_parm (TREE_OPERAND (old, 0))
  || (!id->transform_parameter && is_parm (ptr
TREE_THIS_NOTRAP (*tp) = 1;

?  Or is this never hoistable to a place where TREE_THIS_NOTRAP is not true
even after inlining?  (I presume this is not directly a load via the
static chain pointer?)

>>
>> ---
>>  gcc/omp-low.c | 5 -
>>  1 file changed, 4 insertions(+), 1 deletion(-)
>>
>> diff --git a/gcc/omp-low.c b/gcc/omp-low.c
>> index 830db75..78f2853 100644
>> --- a/gcc/omp-low.c
>> +++ b/gcc/omp-low.c
>> @@ -1249,7 +1249,10 @@ build_receiver_ref (tree var, bool by_ref, 
>> omp_context *ctx)
>>TREE_THIS_NOTRAP (x) = 1;
>>x = omp_build_component_ref (x, field);
>>if (by_ref)
>> -x = build_simple_mem_ref (x);
>> +{
>> +  x = build_simple_mem_ref (x);
>> +  TREE_THIS_NOTRAP (x) = 1;
>> +}
>>
>>return x;
>>  }
>
>
> Jakub


Re: [PATCH, gcc5 backport] Fix PR ipa/65908

2015-11-23 Thread Richard Biener
On Mon, Nov 23, 2015 at 10:21 AM, Martin Liška  wrote:
> Hi.
>
> At the end of last week, Richi asked me to back port aforementioned PR.
> The patch contains two parts: first one is the patch that was applied to trunk
> and the second one is a hunk that implements param_used_p (coming from 
> r222374).
>
> Patch can bootstrap and survives regression tests on x86_64-linux-gnu.
>
> Ready for 5 branch?

Ok.

Richard.

> Thanks,
> Martin


Re: [PATCH] Mark by_ref mem_ref in build_receiver_ref as non-trapping

2015-11-23 Thread Jakub Jelinek
On Mon, Nov 23, 2015 at 11:39:26AM +0100, Richard Biener wrote:
> On Mon, Nov 23, 2015 at 9:45 AM, Jakub Jelinek  wrote:
> > On Sat, Nov 21, 2015 at 07:34:02PM +0100, Tom de Vries wrote:
> >> Mark by_ref mem_ref in build_receiver_ref as non-trapping
> >>
> >> 2015-11-21  Tom de Vries  
> >>
> >>   * omp-low.c (build_receiver_ref): Mark by_ref mem_ref as 
> >> non-trapping.
> >
> > This is ok.
> 
> Are you sure this is properly re-set by inlining via
> 
>   /* We cannot propagate the TREE_THIS_NOTRAP flag if we have
>  remapped a parameter as the property might be valid only
>  for the parameter itself.  */
>   if (TREE_THIS_NOTRAP (old)
>   && (!is_parm (TREE_OPERAND (old, 0))
>   || (!id->transform_parameter && is_parm (ptr
> TREE_THIS_NOTRAP (*tp) = 1;
> 
> ?  Or is this never hoistable to a place where TREE_THIS_NOTRAP is not true
> even after inlining?  (I presume this is not directly a load via the
> static chain pointer?)

I don't think inlining is ever around here, this is inside of the outlined
bodies of the OpenMP constructs, those are the *.omp_fn* artificial
functions called from libgomp, and is used in cases where
  .omp_data_i->field
is not the field itself, but pointer to the original variable.  The caller
of the libgomp functions that in the end invoke the .omp_fn* functions
guarantees that the field in that case is initialized to an address of the
original variables, is not NULL or some invalid pointer.

Jakub


[Ada] Introduce a Frontend_Exceptions flag in system.ads

2015-11-23 Thread Olivier Hainque
Hello,

The Ada compiler supports different sorts of exception schemes today. The two
most commonly used are what we commonly refer to as "frontend-sjlj", and
"gcc-zcx". The former is entirely managed by the front-end (gigi included),
relying on builtin_setjmp / builtin_longjmp pairs. The latter exposes the
exception related constructs to the middle-end, most often configured for
table based unwinding. We refer to table based schemes as "zero cost" with
respect to what happens in absence of propagation, hence the "zcx" abbrev
to denote "zero cost exceptions".

We can configure compilers to use the sjlj eh model as well but the front-end
internals aren't really prepared for this and this leads to bugs in some
circumstances.

The frontend perception of the EH scheme in use is currently controlled by the
"ZCX_By_Default" flag in system.ads. Very roughly, True is taken to denote
"gcc-zcx" and False conveys "frontend-sjlj". To allow proper support of
"gcc-sjlj", we introduce a "Frontend_Exceptions" flag and adjust the compiler
and runtimes accordingly.

This patch contains the front-end + Makefile part and a couple of adjustments
to the "tools", compensating for changes that were done preventively before,
when we had a different scheme in mind.

The compiler part involves a few general steps:
 - Adjust the possible values of the Exception_Scheme variable (opt)
 - Reflect this update in fe.h for gigi's consumption
 - Handle the new flag throughout (targparm, lib-writ, ali, bcheck, gnat1drv)
 - Adjust the abort_defer/abort_undefer call expansions to
   trigger for ZCX instead of back-end eh (exp_ch9, exp_ch11, exp_sel).
 - Adjust gigi to use the new mechanism values and helpers.
 
Then all the system.ads files will be updated with a correct value of the
Frontend_Exceptions flags.

Bootstrapped and regression tested on x86_64-linux-gnu. Committing to trunk.

Olivier

* opt.ads (Exception_Mechanism): Now three values: Front_End_SJLJ,
Back_End_SJLJ and Back_End_ZCX.
(Back_End_Exceptions, Front_End_Exceptions, ZCX_Exceptions,
SJLJ_Exceptions): New functions, reflecting properties of the current
Exception_Mechanism.
* opt.adb: Implement the new functions.
* fe.h: Bind the new Exception_Mechanism and helper functions for gigi.

* exp_ch11.adb (Expand_At_End_Handler): Replace test on mechanism by
use of property helper and update comments.
(Expand_Exception_Handlers): Replace tests on mechanism by use of
helper. Restrict Abort_Defer to ZCX specifically.
* exp_ch9.adb (Expand_N_Asynchronous_Select): Replace tests on
mechanism by calls to helper functions. Abort_Undefer for ZCX only,
paired with Expand_Exception_Handlers.
* exp_sel.adb (Build_Abort_Block_Handler): Replace tests on mechanism
by calls to helper functions. Abort_Undefer for ZCX only, paired with
Expand_Exception_Handlers.

* lib-writ.ads (P line documentation): Add entry for "FX",
representative of unit compiled with Frontend_Exceptions True.
* lib-writ.adb (Output_Main_Program_Line): Add "FX" on P line if
compiled with Frontend_Exceptions True.

* ali.ads (ALIs_Record): Ada a Frontend_Exceptions component, to reflect
whether the ALI file contained an "FX" indication on the P line.
(Frontend_Exceptions_Specified): New boolean, to keep track of whether
at least an FX ALI file is in the closure.
* ali.adb (Scan_ALI): Handle "FX" on the P line.
(Initialize_ALI): Initialize Frontend_Exceptions_Specified to False.

* targparm.ads: Update desription of exception schemes.
(Frontend_Exceptions_On_Target): New flag, reflect Frontend_Exceptions
set to True in system.ads, or not set at all.
* targparm.adb (Targparm_Tags): Add FEX to convey Frontend_Exceptions.
Rename ZCD to ZCX for consistency.
(FEX_Str, Targparm_Str, Get_Target_Parameters): Adjust accordingly.

* gnat1drv.adb (Adjust_Global_Switches): Adjust Exception_Mechanism
setting, now from combination of Frontend_Exceptions and ZCX_By_Default.

* bcheck.adb (Check_Consistent_Zero_Cost_Exception_Handling): Rename
as ...
(Check_Consistent_Exception_Handling): Check consistency of both
ZCX_By_Default and Frontend_Exceptions.
(Check_Configuration_Consistency): Check_Consistent_Exception_Handling
if either flag was set at least once.

* make.adb (Check): Remove processing of a possible -fsjlj coming from
lang-specs.h.
* gnatlink.adb (Gnatlin): Likewise.

gcc-interface/

* decl.c (gnat_to_gnu_entity, case E_Variable): Use eh property helper
to test for back-end exceptions. Adjust mechanism name when testing for
front-end sjlj.
(case E_Procedure): Likewise.
* trans.c 

Re: [PATCH 6/n] OpenMP 4.0 offloading infrastructure: option handling

2015-11-23 Thread Richard Biener
On Fri, 20 Nov 2015, Ilya Verbin wrote:

> On Wed, Dec 10, 2014 at 01:48:21 +0300, Ilya Verbin wrote:
> > On 09 Dec 14:59, Richard Biener wrote:
> > > On Mon, 8 Dec 2014, Ilya Verbin wrote:
> > > > Unfortunately, this fix was not general enough.
> > > > There might be cases when mixed object files get into lto-wrapper, ie 
> > > > some of
> > > > them contain only LTO sections, some contain only offload sections, and 
> > > > some
> > > > contain both.  But when lto-wrapper will pass all these files to 
> > > > recompilation,
> > > > the compiler might crash (it depends on the order of input files), 
> > > > since in
> > > > read_cgraph_and_symbols it expects that *all* input files contain IR 
> > > > section of
> > > > given type.
> > > > This patch splits input objects from argv into lto_argv and 
> > > > offload_argv, so
> > > > that all files in arrays contain corresponding IR.
> > > > Similarly, in lto-plugin, it was bad idea to add objects, which contain 
> > > > offload
> > > > IR without LTO, to claimed_files, since this may corrupt a resolution 
> > > > file.
> > > > 
> > > > Tested on various combinations of files with/without -flto and 
> > > > with/without
> > > > offload, using trunk ld and gold, also tested on ld without plugin 
> > > > support.
> > > > Bootstrap and make check passed on x86_64-linux and i686-linux.  Ok for 
> > > > trunk?
> > > 
> > > Did you check that bootstrap-lto still works?  Ok if so.
> > 
> > Yes, bootstrap-lto passed.
> > Committed revision 218543.
> 
> I don't know how I missed this a year ago, but mixing of LTO objects with
> offloading-without-LTO objects still doesn't work :(
> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=68463 filed about that.
> Any thoughts how to fix this?

Don't claim files you don't handle.

Richard.


[gomp4] Adjust Fortran OACC async lib test

2015-11-23 Thread Chung-Lin Tang
Hi Thomas,
this fix adds more acc_wait's to libgomp.oacc-fortran/lib-1[13].f90.

For lib-12.f90, it's sort of a fix before we can resolve the issue
of intended semantics for "wait+async".

As for lib-13.f90, I believe these added acc_wait calls seem
reasonable, since we can't immediately assume the async-launched parallels
already completed there.

Does this seem reasonable?

Thanks,
Chung-Lin

* testsuite/libgomp.oacc-fortran/lib-12.f90 (main): Add acc_wait()
after async parallel construct.
* testsuite/libgomp.oacc-fortran/lib-13.f90 (main): Add acc_wait()
calls after parallel construct launches.
Index: libgomp.oacc-fortran/lib-12.f90
===
--- libgomp.oacc-fortran/lib-12.f90 (revision 230719)
+++ libgomp.oacc-fortran/lib-12.f90 (working copy)
@@ -15,6 +15,8 @@ program main
 end do
   !$acc end parallel
 
+  call acc_wait (0)
+
   call acc_wait_async (0, 1)
 
   if (acc_async_test (0) .neqv. .TRUE.) call abort
Index: libgomp.oacc-fortran/lib-13.f90
===
--- libgomp.oacc-fortran/lib-13.f90 (revision 230719)
+++ libgomp.oacc-fortran/lib-13.f90 (working copy)
@@ -21,6 +21,9 @@ program main
 end do
   !$acc end data
 
+  call acc_wait (1)
+  call acc_wait (2)
+
   if (acc_async_test (1) .neqv. .TRUE.) call abort
   if (acc_async_test (2) .neqv. .TRUE.) call abort
 


Re: Fix lto-symtab ICE during Ada LTO bootstrap

2015-11-23 Thread Arnaud Charlet
> > So there is indeed no point in trying to fix one or two cases, and we should
> > instead instruct LTO somehow to treat System.Address is compatible with
> > void* otherwise we'll run into endless troubles on that since using
> > System.Address as void* is very common practice in Ada code.
> 
> Maybe we could apply this special treatment only to the void_ptr subtype of
> Interfaces.C.Extensions and require its use when interfacing with C.

No, Interfaces.C.Extensions is non portable, so almost no Ada code out there
is using it. As I said, existing Ada code is using System.Address all the time,
so requiring any code change in this area is just a non starter. We'd
rather require that people don't use LTO with Ada rather than tell them to
use Interfaces.C.Extensions, that would be more constructive :-)

Arno


Re: [PATCH] Don't reapply loops flags if unnecessary in loop_optimizer_init

2015-11-23 Thread Tom de Vries

On 23/11/15 11:29, Richard Biener wrote:

On Mon, 23 Nov 2015, Tom de Vries wrote:


[ was: Re: [PATCH, 10/16] Add pass_oacc_kernels pass group in passes.def ]

On 20/11/15 11:37, Richard Biener wrote:

I'd rather make loop_optimizer_init do nothing
if requested flags are already set and no fixup is needed and
call the above unconditionally.  Thus sth like

Index: gcc/loop-init.c
===
--- gcc/loop-init.c (revision 230649)
+++ gcc/loop-init.c (working copy)
@@ -103,7 +103,11 @@ loop_optimizer_init (unsigned flags)
 calculate_dominance_info (CDI_DOMINATORS);

 if (!needs_fixup)
-   checking_verify_loop_structure ();
+   {
+ checking_verify_loop_structure ();
+ if (loops_state_satisfies_p (flags))
+   goto out;
+   }

 /* Clear all flags.  */
 if (recorded_exits)
@@ -122,11 +126,12 @@ loop_optimizer_init (unsigned flags)
 /* Apply flags to loops.  */
 apply_loop_flags (flags);

+  checking_verify_loop_structure ();
+
+out:
 /* Dump loops.  */
 flow_loops_dump (dump_file, NULL, 1);

-  checking_verify_loop_structure ();
-
 timevar_pop (TV_LOOP_INIT);
   }


This patch implements that approach, but the patch is slightly more
complicated because of the need to handle LOOPS_MAY_HAVE_MULTIPLE_LATCHES
differently than the rest of the flags.

Bootstrapped and reg-tested on x86_64.

OK for stage3 trunk?


Let's revisit this during stage1 if the scev_initialized () thing
SLP vectorization uses works, ok?



OK, I'll give that a try.

FTR, attached two patches are an attempt at a cleaner solution for 
pass_slp_vectorize::execute (in combination with patch "Don't reapply 
loops flags if unnecessary in loop_optimizer_init").


The first patch introduces a property PROP_scev, set for the duration of 
the loop pipeline. It allows us to call scev_initialize and 
scev_finalize unconditionally. Outside the loop pipeline calling the 
functions has the usual effect. Inside the loop pipeline, calling the 
functions has no effect.


The second patch introduces a property PROP_loops_normal_re_lcssa, set 
for the duration of the loop pipeline. It allows us (in combination with 
"Don't reapply loops flags if unnecessary in loop_optimizer_init") to 
call loop_optimizer_init and loop_optimizer_finalize unconditionally.
Outside the loop pipeline, calling the functions has the usual effect. 
Inside the loop pipeline, calling loop_optimizer_finalize has no effect, 
and calling loop_optimizer_initialize has no effect unless a fixup or a 
new loop property is needed.


Thanks,
- Tom

Add PROP_scev

---
 gcc/tree-pass.h |  1 +
 gcc/tree-scalar-evolution.c | 13 +
 gcc/tree-ssa-loop.c |  3 ++-
 gcc/tree-vectorizer.c   |  4 ++--
 4 files changed, 18 insertions(+), 3 deletions(-)

diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h
index 004db77..4e66b2c 100644
--- a/gcc/tree-pass.h
+++ b/gcc/tree-pass.h
@@ -227,6 +227,7 @@ protected:
 		   of math functions; the
 		   current choices have
 		   been optimized.  */
+#define PROP_scev		(1 << 16)	/* preserve scev info.  */
 
 #define PROP_trees \
   (PROP_gimple_any | PROP_gimple_lcf | PROP_gimple_leh | PROP_gimple_lomp)
diff --git a/gcc/tree-scalar-evolution.c b/gcc/tree-scalar-evolution.c
index 9b33693..5d5e354 100644
--- a/gcc/tree-scalar-evolution.c
+++ b/gcc/tree-scalar-evolution.c
@@ -280,6 +280,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "params.h"
 #include "tree-ssa-propagate.h"
 #include "gimple-fold.h"
+#include "tree-pass.h"
 
 static tree analyze_scalar_evolution_1 (struct loop *, tree, tree);
 static tree analyze_scalar_evolution_for_address_of (struct loop *loop,
@@ -3168,6 +3169,12 @@ scev_initialize (void)
 {
   struct loop *loop;
 
+  if (cfun->curr_properties & PROP_scev)
+{
+  gcc_assert (scev_initialized_p ());
+  return;
+}
+
   scalar_evolution_info = hash_table::create_ggc (100);
 
   initialize_scalar_evolutions_analyzer ();
@@ -3367,6 +3374,12 @@ simple_iv (struct loop *wrto_loop, struct loop *use_loop, tree op,
 void
 scev_finalize (void)
 {
+  if (cfun->curr_properties & PROP_scev)
+{
+  gcc_assert (scev_initialized_p ());
+  return;
+}
+
   if (!scalar_evolution_info)
 return;
   scalar_evolution_info->empty ();
diff --git a/gcc/tree-ssa-loop.c b/gcc/tree-ssa-loop.c
index d30e3c8..739fda7 100644
--- a/gcc/tree-ssa-loop.c
+++ b/gcc/tree-ssa-loop.c
@@ -290,7 +290,7 @@ const pass_data pass_data_tree_loop_init =
   OPTGROUP_LOOP, /* optinfo_flags */
   TV_NONE, /* tv_id */
   PROP_cfg, /* properties_required */
-  0, /* properties_provided */
+  PROP_scev, /* properties_provided */
   0, /* properties_destroyed */
   0, /* todo_flags_start */
   0, /* todo_flags_finish */
@@ -524,6 +524,7 @@ make_pass_iv_optimize (gcc::context *ctxt)
 static unsigned int
 tree_ssa_loop_done (void)
 {
+  cfun->curr_properties &= 

Re: [PATCH, 10/16] Add pass_oacc_kernels pass group in passes.def

2015-11-23 Thread Richard Biener
On Sat, 21 Nov 2015, Tom de Vries wrote:

> On 20/11/15 11:28, Richard Biener wrote:
> > On Thu, 19 Nov 2015, Tom de Vries wrote:
> > 
> > > >On 17/11/15 15:53, Tom de Vries wrote:
> > > > > > > >And the above LIM example
> > > > > > > >is none for why you need two LIM passes...
> > > > > >
> > > > > >Indeed. I'm planning a separate reply to explain in more detail the
> > > > need
> > > > > >for the two pass_lims.
> > > >
> > > >I.
> > > >
> > > >I managed to get rid of the two pass_lims for the motivating example that
> > > I
> > > >used until now (goacc/kernels-double-reduction.c). I found that by adding
> > > a
> > > >pass_dominator instance after pass_ch, I could get rid of the second
> > > pass_lim
> > > >(and pass_copyprop as well).
> > > >
> > > >But... then I wrote a counter example
> > > (goacc/kernels-double-reduction-n.c),
> > > >and I'm back at two pass_lims (and two pass_dominators).
> > > >Also I've split the pass group into a bit before and after pass_fre.
> > > >
> > > >So, the current pass group looks like:
> > > >...
> > > >NEXT_PASS (pass_build_ealias);
> > > >
> > > >/* Pass group that runs when the function is an offloaded function
> > > >containing oacc kernels loops.  Part 1.  */
> > > >NEXT_PASS (pass_oacc_kernels);
> > > >PUSH_INSERT_PASSES_WITHIN (pass_oacc_kernels)
> > > > /* We need pass_ch here, because pass_lim has no effect on
> > > >exit-first loops (PR65442).  Ideally we want to remove both
> > > >this pass instantiation, and the reverse transformation
> > > >transform_to_exit_first_loop_alt, which is done in
> > > >pass_parallelize_loops_oacc_kernels. */
> > > > NEXT_PASS (pass_ch);
> > > >POP_INSERT_PASSES ()
> > > >
> > > >NEXT_PASS (pass_fre);
> > > >
> > > >/* Pass group that runs when the function is an offloaded function
> > > >containing oacc kernels loops.  Part 2.  */
> > > >NEXT_PASS (pass_oacc_kernels2);
> > > >PUSH_INSERT_PASSES_WITHIN (pass_oacc_kernels2)
> > > > /* We use pass_lim to rewrite in-memory iteration and reduction
> > > >variable accesses in loops into local variables accesses.  */
> > > > NEXT_PASS (pass_lim);
> > > > NEXT_PASS (pass_dominator, false /* may_peel_loop_headers_p */);
> > > > NEXT_PASS (pass_lim);
> > > > NEXT_PASS (pass_dominator, false /* may_peel_loop_headers_p */);
> > > > NEXT_PASS (pass_dce);
> > > > NEXT_PASS (pass_parallelize_loops_oacc_kernels);
> > > > NEXT_PASS (pass_expand_omp_ssa);
> > > >POP_INSERT_PASSES ()
> > > >NEXT_PASS (pass_merge_phi);
> > > >...
> > > >
> > > >
> > > >II.
> > > >
> > > >The motivating test-case kernels-double-reduction-n.c:
> > > >...
> > > >#include 
> > > >
> > > >#define N 500
> > > >
> > > >unsigned int a[N][N];
> > > >
> > > >void  __attribute__((noinline,noclone))
> > > >foo (unsigned int n)
> > > >{
> > > >   int i, j;
> > > >   unsigned int sum = 1;
> > > >
> > > >#pragma acc kernels copyin (a[0:n]) copy (sum)
> > > >   {
> > > > for (i = 0; i < n; ++i)
> > > >   for (j = 0; j < n; ++j)
> > > > sum += a[i][j];
> > > >   }
> > > >
> > > >   if (sum != 5001)
> > > > abort ();
> > > >}
> > > >...
> > > >
> > > >
> > > >III.
> > > >
> > > >Before first pass_lim. Note no phis on inner or outer loop header for
> > > >iteration varables or reduction variable:
> > > >...
> > > >   :
> > > >   _5 = *.omp_data_i_4(D).i;
> > > >   *_5 = 0;
> > > >   _44 = *.omp_data_i_4(D).n;
> > > >   _45 = *_44;
> > > >   if (_45 != 0)
> > > > goto ;
> > > >   else
> > > > goto ;
> > > >
> > > >   : outer loop header
> > > >   _12 = *.omp_data_i_4(D).j;
> > > >   *_12 = 0;
> > > >   if (_45 != 0)
> > > > goto ;
> > > >   else
> > > > goto ;
> > > >
> > > >   : inner loop header, latch
> > > >   _19 = *.omp_data_i_4(D).a;
> > > >   _21 = *_5;
> > > >   _23 = *_12;
> > > >   _24 = *_19[_21][_23];
> > > >   _25 = *.omp_data_i_4(D).sum;
> > > >   sum.0_26 = *_25;
> > > >   sum.1_27 = _24 + sum.0_26;
> > > >   *_25 = sum.1_27;
> > > >   _33 = _23 + 1;
> > > >   *_12 = _33;
> > > >   j.2_16 = (unsigned int) _33;
> > > >   if (j.2_16 < _45)
> > > > goto ;
> > > >   else
> > > > goto ;
> > > >
> > > >   : outer loop latch
> > > >   _36 = *_5;
> > > >   _38 = _36 + 1;
> > > >   *_5 = _38;
> > > >   i.3_9 = (unsigned int) _38;
> > > >   if (i.3_9 < _45)
> > > > goto ;
> > > >   else
> > > > goto ;
> > > >
> > > >   :
> > > >   return;
> > > >...
> > > >
> > > >
> > > >IV.
> > > >
> > > >After first pass_lim/pass_dom pair. Note there are phis on the inner loop
> > > >header for the reduction and the iteration variable, but not on the outer
> > > loop
> > > >header:
> > > >...
> > > >   :
> > > >   _5 = *.omp_data_i_4(D).i;
> > > >   *_5 = 0;
> > > >   _44 = *.omp_data_i_4(D).n;
> > > >   _45 = *_44;
> > > >   if (_45 != 0)
> > > > goto ;
> > > >   else
> > > > goto ;
> > > >
> > > >   :
> > > >   _12 = *.omp_data_i_4(D).j;
> > > >   _19 = *.omp_data_i_4(D).a;
> > > >   

Re: [gomp4.1] Handle new form of #pragma omp declare target

2015-11-23 Thread Thomas Schwinge
Hi Jakub!

On Fri, 17 Jul 2015 15:05:59 +0200, Jakub Jelinek  wrote:
> [...] "omp declare target link" [...]

> This patch only marks them with the new attribute, [...]

> --- gcc/c/c-parser.c.jj   2015-07-16 18:09:25.0 +0200
> +++ gcc/c/c-parser.c  2015-07-17 14:11:08.553694975 +0200

>  static void
>  c_parser_omp_declare_target (c_parser *parser)
>  {
> [...]
> +  for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
> +{
> +  tree t = OMP_CLAUSE_DECL (c), id;
> +  tree at1 = lookup_attribute ("omp declare target", DECL_ATTRIBUTES 
> (t));
> +  tree at2 = lookup_attribute ("omp declare target link",
> +DECL_ATTRIBUTES (t));
> +  if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINK)
> + {
> +   id = get_identifier ("omp declare target link");
> +   std::swap (at1, at2);
> + }
> +  else
> + id = get_identifier ("omp declare target");

Is it intentional that you didn't add "omp declare target link" to
gcc/c-family/c-common.c:c_common_attribute_table, next to the existing
"omp declare target"?


Grüße
 Thomas


signature.asc
Description: PGP signature


Re: [PATCH, PR68337] Don't fold memcpy/memmove we want to instrument

2015-11-23 Thread Ilya Enkovich
On 23 Nov 11:44, Richard Biener wrote:
> On Mon, Nov 23, 2015 at 11:10 AM, Ilya Enkovich  
> wrote:
> > On 23 Nov 10:39, Richard Biener wrote:
> >> On Fri, Nov 20, 2015 at 3:30 PM, Ilya Enkovich  
> >> wrote:
> >> > On 20 Nov 14:54, Richard Biener wrote:
> >> >>
> >> >> I don't think you can in any way rely on the pointer type of the src 
> >> >> argument
> >> >> as all pointer conversions are useless and memcpy and friends take void 
> >> >> *
> >> >> anyway.
> >> >
> >> > This check is looking for cases when we have type information indicating
> >> > no pointers are copied.  In case of 'void *' we have to assume pointers
> >> > are copied and inlining is undesired.  Test pr68337-2.c checks pointer
> >> > type allows to enable inlining.  Looks like this check misses
> >> > || !COMPLETE_TYPE_P(TREE_TYPE (TREE_TYPE (src)))?
> >>
> >> As said there is no information in the pointer / pointed-to type in GIMPLE.
> >
> > What does it mean?  We do have TREE_TYPE for used pointer and nested 
> > TREE_TYPE
> > holding pointed-to type.  Is it some random invalid type?
> 
> Yes, it can be a "random" type.  Like for
> 
> void foo (float *f)
> {
>   memcpy ((void *)f, ...);
> }
> int main()
> {
>   int **a[10];
>   foo (a);
> }
> 
> which tries to copy to an array of int * but the GIMPLE IL for foo
> will call memcpy with a float * typed argument.

I see.  But it should still be OK to check type in case of strict aliasing, 
right?

Thanks,
Ilya

> 
> >>
> >> >>
> >> >> Note that you also disable memmove to memcpy simplification with this
> >> >> early check.
> >> >
> >> > Doesn't matter for MPX which uses the same implementation for both cases.
> >> >
> >> >>
> >> >> Where is pointer transfer handled for MPX?  I suppose it's not done
> >> >> transparently
> >> >> for all memory move instructions but explicitely by instrumented block 
> >> >> copy
> >> >> routines in libmpx?  In which case how does that identify pointers vs.
> >> >> non-pointers?
> >> >
> >> > It is handled by instrumentation pass.  Compiler checks type of stored 
> >> > data to
> >> > find pointer stores.  Each pointer store is instrumented with bndstx 
> >> > call.
> >>
> >> How does it identify "pointer store"?  With -fno-strict-aliasing you can 
> >> store
> >> pointers using an integer type.  You can also always store pointers using
> >> a character type like
> >>
> >> void foo (int *p, int **dest)
> >> {
> >>   ((char *)*dest)[0] = (((char *))[0];
> >>   ((char *)*dest)[1] = (((char *))[1];
> >>   ((char *)*dest)[2] = (((char *))[2];
> >>   ((char *)*dest)[3] = (((char *))[3];
> >> }
> >
> > Pointer store is identified using type information.  When pointer is casted 
> > to
> > a non-pointer type its bounds are lost.
> >
> > Ilya
> >
> >>
> >> > MPX versions of memcpy, memmove etc. don't make any assumptions about
> >> > type of copied data and just copy whole chunk of bounds metadata 
> >> > corresponding
> >> > to copied block.
> >>
> >> So it handles copying a pointer in two pieces with two memcpy calls
> >> correctly.  Good.
> >>
> >> Richard.
> >>
> >> > Thanks,
> >> > Ilya
> >> >
> >> >>
> >> >> Richard.
> >> >>


Re: [PATCH][AArch64][v2] Improve comparison with complex immediates followed by branch/cset

2015-11-23 Thread Kyrill Tkachov


On 12/11/15 12:05, James Greenhalgh wrote:

On Tue, Nov 03, 2015 at 03:43:24PM +, Kyrill Tkachov wrote:

Hi all,

Bootstrapped and tested on aarch64.

Ok for trunk?

Comments in-line.



Here's an updated patch according to your comments.
Sorry it took so long to respin it, had other things to deal with with
stage1 closing...

I've indented the sample code sequences and used valid mnemonics.
These patterns can only match during combine, so I'd expect them to always
split during combine or immediately after, but I don't think that's a documented
guarantee so I've gated them on !reload_completed.

I've used IN_RANGE in the predicate.md hunk and added scan-assembler checks
in the tests.

Is this ok?

Thanks,
Kyrill

2015-11-20  Kyrylo Tkachov  

* config/aarch64/aarch64.md (*condjump): Rename to...
(condjump): ... This.
(*compare_condjump): New define_insn_and_split.
(*compare_cstore_insn): Likewise.
(*cstore_insn): Rename to...
(cstore_insn): ... This.
* config/aarch64/iterators.md (CMP): Handle ne code.
* config/aarch64/predicates.md (aarch64_imm24): New predicate.

2015-11-20  Kyrylo Tkachov  

* gcc.target/aarch64/cmpimm_branch_1.c: New test.
* gcc.target/aarch64/cmpimm_cset_1.c: Likewise.


Thanks,
Kyrill


2015-11-03  Kyrylo Tkachov  

 * config/aarch64/aarch64.md (*condjump): Rename to...
 (condjump): ... This.
 (*compare_condjump): New define_insn_and_split.
 (*compare_cstore_insn): Likewise.
 (*cstore_insn): Rename to...
 (aarch64_cstore): ... This.
 * config/aarch64/iterators.md (CMP): Handle ne code.
 * config/aarch64/predicates.md (aarch64_imm24): New predicate.

2015-11-03  Kyrylo Tkachov  

 * gcc.target/aarch64/cmpimm_branch_1.c: New test.
 * gcc.target/aarch64/cmpimm_cset_1.c: Likewise.
commit 7df013a391532f39932b80c902e3b4bbd841710f
Author: Kyrylo Tkachov 
Date:   Mon Sep 21 10:56:47 2015 +0100

 [AArch64] Improve comparison with complex immediates

diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 126c9c2..1bfc870 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -369,7 +369,7 @@ (define_expand "mod3"
}
  )
  
-(define_insn "*condjump"

+(define_insn "condjump"
[(set (pc) (if_then_else (match_operator 0 "aarch64_comparison_operator"
[(match_operand 1 "cc_register" "") (const_int 0)])
   (label_ref (match_operand 2 "" ""))
@@ -394,6 +394,40 @@ (define_insn "*condjump"
  (const_int 1)))]
  )
  
+;; For a 24-bit immediate CST we can optimize the compare for equality

+;; and branch sequence from:
+;; mov x0, #imm1
+;; movkx0, #imm2, lsl 16 /* x0 contains CST.  */
+;; cmp x1, x0
+;; b .Label

This would be easier on the eyes if you were to indent the code sequence.

+;; and branch sequence from:
+;; mov x0, #imm1
+;; movkx0, #imm2, lsl 16 /* x0 contains CST.  */
+;; cmp x1, x0
+;; b .Label
+;; into the shorter:
+;; sub x0, #(CST & 0xfff000)


+;; into the shorter:
+;; sub x0, #(CST & 0xfff000)
+;; subsx0, #(CST & 0x000fff)

These instructions are not valid (2 operand sub/subs?) can you write them
out fully for this comment so I can see the data flow?


+;; b .Label
+(define_insn_and_split "*compare_condjump"
+  [(set (pc) (if_then_else (EQL
+ (match_operand:GPI 0 "register_operand" "r")
+ (match_operand:GPI 1 "aarch64_imm24" "n"))
+  (label_ref:P (match_operand 2 "" ""))
+  (pc)))]
+  "!aarch64_move_imm (INTVAL (operands[1]), mode)
+   && !aarch64_plus_operand (operands[1], mode)"
+  "#"
+  "&& true"
+  [(const_int 0)]
+  {
+HOST_WIDE_INT lo_imm = UINTVAL (operands[1]) & 0xfff;
+HOST_WIDE_INT hi_imm = UINTVAL (operands[1]) & 0xfff000;
+rtx tmp = gen_reg_rtx (mode);

Can you guarantee we can always create this pseudo? What if we're a
post-register-allocation split?


+emit_insn (gen_add3 (tmp, operands[0], GEN_INT (-hi_imm)));
+emit_insn (gen_add3_compare0 (tmp, tmp, GEN_INT (-lo_imm)));
+rtx cc_reg = gen_rtx_REG (CC_NZmode, CC_REGNUM);
+rtx cmp_rtx = gen_rtx_fmt_ee (, mode, cc_reg, const0_rtx);
+emit_jump_insn (gen_condjump (cmp_rtx, cc_reg, operands[2]));
+DONE;
+  }
+)
+
  (define_expand "casesi"
[(match_operand:SI 0 "register_operand" "") ; Index
 (match_operand:SI 1 "const_int_operand" ""); Lower bound
@@ -2898,7 +2932,7 @@ (define_expand "cstore4"
"
  )
  
-(define_insn "*cstore_insn"

+(define_insn "aarch64_cstore"
[(set (match_operand:ALLI 0 "register_operand" "=r")
(match_operator:ALLI 1 "aarch64_comparison_operator"
 [(match_operand 2 "cc_register" "") (const_int 0)]))]
@@ -2907,6 +2941,39 

Re: Enable pointer TBAA for LTO

2015-11-23 Thread Richard Biener
On Mon, 23 Nov 2015, Jan Hubicka wrote:

> Hi,
> here is updated patch which I finally comitted today.  It addresses all the 
> comments
> and also fixes one nasty bug that really cost me a lot of time to understand. 
> 
> +   /* LTO type merging does not make any difference between 
> +  component pointer types.  We may have
> +
> +  struct foo {int *a;};
> +
> +  as TYPE_CANONICAL of 
> +
> +  struct bar {float *a;};
> +
> +  Because accesses to int * and float * do not alias, we would get
> +  false negative when accessing the same memory location by
> +  float ** and bar *. We thus record the canonical type as:
> +
> +  struct {void *a;};
> +
> +  void * is special cased and works as a universal pointer type.
> +  Accesses to it conflicts with accesses to any other pointer
> +  type.  */
> 
> This problem manifested itself only as a lto-bootstrap miscompare on 32bit
> build and I spent a lot of time localizing the wrong code since it reproduces
> only in quite large programs where we get conficts in canonical type merging
> like this.
> 
> The patch thus updates record_component_aliases to substitute 
> void_ptr_type for all pointer types. I re-did the stats.  Now the 
> improvement on dealII is 14% that is quite a bit lower than earlier, but 
> still substantial.  Since we have voidptr globing counter, I know that 
> the number of disambiguations would go at least 19% up if we did not do 
> it.

Please in future leave patches for review again if you do such
big changes before committing...

I don't understand why we need this (testcase?) because get_alias_set ()
is supposed to do the alias-set of pointer globbing for us.

Thanks,
Richard.

> THere is a lot of low hanging fruit in that area now, but the real 
> solution is to track types that needs to be merge by fortran rules and 
> don't do all this fancy globing for C/C++ types.  I will open branch for 
> IPA work and try to prepare this for next stage 1.
> 
> bootstrapped/regtested x86_64-linux and ppc64-linux, earlier version tested 
> on i386-linux
> and also on some bigger apps, committed
> 
> Note that we still have bootstrap miscompare with LTO build and 
> --disable-checking,
> I am looking for that now.  Additoinally after fixing the ICEs preventing us 
> to build
> the gnat1 binary, gnat1 aborts. Both these are independent of the patch.
> 
> Honza
>   * lto.c (iterative_hash_canonical_type): Always recurse for pointers.
>   (gimple_register_canonical_type_1): Check that pointers do not get
>   canonical types.
>   (gimple_register_canonical_type): Do not register pointers.
> 
>   * tree.c (build_pointer_type_for_mode,build_reference_type_for_mode):
>   In LTO we do not compute TYPE_CANONICAL of pointers.
>   (gimple_canonical_types_compatible_p): Improve coments; sanity check
>   that pointers do not have canonical type that would make us believe
>   they are different.
>   * alias.c (get_alias_set): Do structural type equality on pointers;
>   enable pointer path for LTO; also glob pointer to vector with pointer
>   to vector element; glob pointers and references for LTO; do more strict
>   sanity checking about build_pointer_type returning the canonical type
>   which is also the main variant.
>   (record_component_aliases): When component type is pointer and we
>   do LTO; record void_type_node alias set.
> Index: tree.c
> ===
> --- tree.c(revision 230714)
> +++ tree.c(working copy)
> @@ -7919,7 +7919,8 @@ build_pointer_type_for_mode (tree to_typ
>TYPE_NEXT_PTR_TO (t) = TYPE_POINTER_TO (to_type);
>TYPE_POINTER_TO (to_type) = t;
>  
> -  if (TYPE_STRUCTURAL_EQUALITY_P (to_type))
> +  /* During LTO we do not set TYPE_CANONICAL of pointers and references.  */
> +  if (TYPE_STRUCTURAL_EQUALITY_P (to_type) || in_lto_p)
>  SET_TYPE_STRUCTURAL_EQUALITY (t);
>else if (TYPE_CANONICAL (to_type) != to_type || could_alias)
>  TYPE_CANONICAL (t)
> @@ -7987,7 +7988,8 @@ build_reference_type_for_mode (tree to_t
>TYPE_NEXT_REF_TO (t) = TYPE_REFERENCE_TO (to_type);
>TYPE_REFERENCE_TO (to_type) = t;
>  
> -  if (TYPE_STRUCTURAL_EQUALITY_P (to_type))
> +  /* During LTO we do not set TYPE_CANONICAL of pointers and references.  */
> +  if (TYPE_STRUCTURAL_EQUALITY_P (to_type) || in_lto_p)
>  SET_TYPE_STRUCTURAL_EQUALITY (t);
>else if (TYPE_CANONICAL (to_type) != to_type || could_alias)
>  TYPE_CANONICAL (t)
> @@ -13224,7 +13226,9 @@ type_with_interoperable_signedness (cons
> TBAA is concerned.  
> This function is used both by lto.c canonical type merging and by the
> verifier.  If TRUST_TYPE_CANONICAL we do not look into structure of types
> -   that have TYPE_CANONICAL defined and assume them equivalent.  */
> +   that have TYPE_CANONICAL defined and assume them equivalent.  This 

Re: [PATCH, PR68337] Don't fold memcpy/memmove we want to instrument

2015-11-23 Thread Richard Biener
On Mon, Nov 23, 2015 at 11:10 AM, Ilya Enkovich  wrote:
> On 23 Nov 10:39, Richard Biener wrote:
>> On Fri, Nov 20, 2015 at 3:30 PM, Ilya Enkovich  
>> wrote:
>> > On 20 Nov 14:54, Richard Biener wrote:
>> >> On Fri, Nov 20, 2015 at 2:08 PM, Ilya Enkovich  
>> >> wrote:
>> >> > On 19 Nov 18:19, Richard Biener wrote:
>> >> >> On November 19, 2015 6:12:30 PM GMT+01:00, Bernd Schmidt 
>> >> >>  wrote:
>> >> >> >On 11/19/2015 05:31 PM, Ilya Enkovich wrote:
>> >> >> >> Currently we fold all memcpy/memmove calls with a known data size.
>> >> >> >> It causes two problems when used with Pointer Bounds Checker.
>> >> >> >> The first problem is that we may copy pointers as integer data
>> >> >> >> and thus loose bounds.  The second problem is that if we inline
>> >> >> >> memcpy, we also have to inline bounds copy and this may result
>> >> >> >> in a huge amount of code and significant compilation time growth.
>> >> >> >> This patch disables folding for functions we want to instrument.
>> >> >> >>
>> >> >> >> Does it look reasonable for trunk and GCC5 branch?  Bootstrapped
>> >> >> >> and regtested on x86_64-unknown-linux-gnu.
>> >> >> >
>> >> >> >Can't see anything wrong with it. Ok.
>> >> >>
>> >> >> But for small sizes this can have a huge impact on optimization.  
>> >> >> Which is why we have the code in the first place.  I'd make the check 
>> >> >> less broad, for example inlining copies of size less than a pointer 
>> >> >> shouldn't be affected.
>> >> >
>> >> > Right.  We also may inline in case we know no pointers are copied.  
>> >> > Below is a version with extended condition and a couple more tests.  
>> >> > Bootstrapped and regtested on x86_64-unknown-linux-gnu.  Does it OK for 
>> >> > trunk and gcc-5-branch?
>> >> >
>> >> >>
>> >> >> Richard.
>> >> >>
>> >> >> >
>> >> >> >Bernd
>> >> >>
>> >> >>
>> >> >
>> >> > Thanks,
>> >> > Ilya
>> >> > --
>> >> > gcc/
>> >> >
>> >> > 2015-11-20  Ilya Enkovich  
>> >> >
>> >> > * gimple-fold.c (gimple_fold_builtin_memory_op): Don't
>> >> > fold call if we are going to instrument it and it may
>> >> > copy pointers.
>> >> >
>> >> > gcc/testsuite/
>> >> >
>> >> > 2015-11-20  Ilya Enkovich  
>> >> >
>> >> > * gcc.target/i386/mpx/pr68337-1.c: New test.
>> >> > * gcc.target/i386/mpx/pr68337-2.c: New test.
>> >> > * gcc.target/i386/mpx/pr68337-3.c: New test.
>> >> >
>> >> >
>> >> > diff --git a/gcc/gimple-fold.c b/gcc/gimple-fold.c
>> >> > index 1ab20d1..dd9f80b 100644
>> >> > --- a/gcc/gimple-fold.c
>> >> > +++ b/gcc/gimple-fold.c
>> >> > @@ -53,6 +53,8 @@ along with GCC; see the file COPYING3.  If not see
>> >> >  #include "gomp-constants.h"
>> >> >  #include "optabs-query.h"
>> >> >  #include "omp-low.h"
>> >> > +#include "tree-chkp.h"
>> >> > +#include "ipa-chkp.h"
>> >> >
>> >> >
>> >> >  /* Return true when DECL can be referenced from current unit.
>> >> > @@ -664,6 +666,23 @@ gimple_fold_builtin_memory_op 
>> >> > (gimple_stmt_iterator *gsi,
>> >> >unsigned int src_align, dest_align;
>> >> >tree off0;
>> >> >
>> >> > +  /* Inlining of memcpy/memmove may cause bounds lost (if we copy
>> >> > +pointers as wide integer) and also may result in huge function
>> >> > +size because of inlined bounds copy.  Thus don't inline for
>> >> > +functions we want to instrument in case pointers are copied.  
>> >> > */
>> >> > +  if (flag_check_pointer_bounds
>> >> > + && chkp_instrumentable_p (cfun->decl)
>> >> > + /* Even if data may contain pointers we can inline if copy
>> >> > +less than a pointer size.  */
>> >> > + && (!tree_fits_uhwi_p (len)
>> >> > + || compare_tree_int (len, POINTER_SIZE_UNITS) >= 0)
>> >>
>> >> || tree_to_uhwi (len) >= POINTER_SIZE_UNITS
>> >>
>> >> > + /* Check data type for pointers.  */
>> >> > + && (!TREE_TYPE (src)
>> >> > + || !TREE_TYPE (TREE_TYPE (src))
>> >> > + || VOID_TYPE_P (TREE_TYPE (TREE_TYPE (src)))
>> >> > + || chkp_type_has_pointer (TREE_TYPE (TREE_TYPE (src)
>> >>
>> >> I don't think you can in any way rely on the pointer type of the src 
>> >> argument
>> >> as all pointer conversions are useless and memcpy and friends take void *
>> >> anyway.
>> >
>> > This check is looking for cases when we have type information indicating
>> > no pointers are copied.  In case of 'void *' we have to assume pointers
>> > are copied and inlining is undesired.  Test pr68337-2.c checks pointer
>> > type allows to enable inlining.  Looks like this check misses
>> > || !COMPLETE_TYPE_P(TREE_TYPE (TREE_TYPE (src)))?
>>
>> As said there is no information in the pointer / pointed-to type in GIMPLE.
>
> What does it mean?  We do have TREE_TYPE for used pointer and nested TREE_TYPE
> holding pointed-to type.  Is it some 

[Patch] PR68137, drop constant overflow flag in adjust_range_with_scev when possible

2015-11-23 Thread Jiong Wang

As reported by pr68137 and pr68326, r230150 caused new issues.

Those ICEs are caused by adjust_range_with_scev getting range with
overflowed constants min or max. So given there are too many places to
generate OVF, we do a check in adjust_range_with_scev, to drop OVF flag
when it's uncessary. This should fix the OVF side-effect caused by
r230150.

A simple regression testcase is included in this patch.

bootstrap OK on x86-64 and aarch64, regression ok on both.

For more background, please see discussion at

  https://gcc.gnu.org/bugzilla/show_bug.cgi?id=68317

OK for trunk?

2015-11-23  Richard Biener  
Jiong Wang  

gcc/
  PR tree-optimization/68137
  PR tree-optimization/68326
  * tree-vrp.c (adjust_range_with_scev): Call drop_tree_overflow if the
  final min and max are not infinity.

gcc/testsuite/
  * gcc.dg/pr68139.c: New testcase.

--
Regards,
Jiong

diff --git a/gcc/tree-vrp.c b/gcc/tree-vrp.c
index e2393e4..8efeb76 100644
--- a/gcc/tree-vrp.c
+++ b/gcc/tree-vrp.c
@@ -4331,6 +4331,17 @@ adjust_range_with_scev (value_range *vr, struct loop *loop,
 	  && is_positive_overflow_infinity (max)))
 return;
 
+  /* Even for valid range info, sometimes overflow flag will leak in.
+ As GIMPLE IL should have no constants with TREE_OVERFLOW set, we
+ drop them except for +-overflow_infinity which still need special
+ handling in vrp pass.  */
+  if (TREE_OVERFLOW_P (min)
+  && ! is_negative_overflow_infinity (min))
+min = drop_tree_overflow (min);
+  if (TREE_OVERFLOW_P (max)
+  && ! is_positive_overflow_infinity (max))
+max = drop_tree_overflow (max);
+
   set_value_range (vr, VR_RANGE, min, max, vr->equiv);
 }
 
diff --git a/gcc/testsuite/gcc.dg/pr68137.c b/gcc/testsuite/gcc.dg/pr68137.c
new file mode 100644
index 000..a30e1ac
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr68137.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+void bar (int);
+
+void
+foo ()
+{
+ int index = 0;
+ for (index; index <= 10; index--)
+   /* Result of the following multiply will overflow
+  when converted to signed int.  */
+   bar((0xcafe + index) * 0xdead);
+}


Re: Fix lto-symtab ICE during Ada LTO bootstrap

2015-11-23 Thread Eric Botcazou
> So there is indeed no point in trying to fix one or two cases, and we should
> instead instruct LTO somehow to treat System.Address is compatible with
> void* otherwise we'll run into endless troubles on that since using
> System.Address as void* is very common practice in Ada code.

Maybe we could apply this special treatment only to the void_ptr subtype of 
Interfaces.C.Extensions and require its use when interfacing with C.

-- 
Eric Botcazou


Re: [PATCH] Fix up reduction-1{1,2} testcases (PR middle-end/68221)

2015-11-23 Thread Richard Biener
On Fri, 20 Nov 2015, Jakub Jelinek wrote:

> Hi!
> 
> If C/C++ array section reductions have non-zero (positive) bias, it is
> implemented by declaring a smaller private array and subtracting the bias
> from the start of the private array (because valid code may only dereference
> elements from bias onwards).  But, this isn't something that is kosher in
> C/C++ pointer arithmetics and the alias oracle seems to get upset on that.
> So, the following patch fixes that by performing the subtraction on integral
> type instead of p+ -bias.

So this still does use the biased pointer because you do not
re-write accesses (where you could have applied the biasing to
the indexes / offsets), right?  Thus the patch is merely obfuscation
for GCC rather than making it kosher for C/C++ (you still have a
pointer pointing outside of the private array object)?

I still hope to have a look where the alias oracle gets things
wrong (well, if so by accident at least).

Richard.

> Bootstrapped/regtested on x86_64-linux and i686-linux, committed to trunk.
> 
> 2015-11-20  Jakub Jelinek  
> 
>   PR middle-end/68221
>   * omp-low.c (lower_rec_input_clauses): If C/C++ array reduction
>   has non-zero bias, subtract it in integer type instead of
>   pointer plus of negated bias.
> 
>   * testsuite/libgomp.c/reduction-11.c: Remove xfail.
>   * testsuite/libgomp.c/reduction-12.c: Likewise.
>   * testsuite/libgomp.c++/reduction-11.C: Likewise.
>   * testsuite/libgomp.c++/reduction-12.C: Likewise.
> 
> --- gcc/omp-low.c.jj  2015-11-20 12:56:17.0 +0100
> +++ gcc/omp-low.c 2015-11-20 13:44:29.080374051 +0100
> @@ -,11 +,13 @@ lower_rec_input_clauses (tree clauses, g
>  
> if (!integer_zerop (bias))
>   {
> -   bias = fold_convert_loc (clause_loc, sizetype, bias);
> -   bias = fold_build1_loc (clause_loc, NEGATE_EXPR,
> -   sizetype, bias);
> -   x = fold_build2_loc (clause_loc, POINTER_PLUS_EXPR,
> -TREE_TYPE (x), x, bias);
> +   bias = fold_convert_loc (clause_loc, pointer_sized_int_node,
> +bias);
> +   yb = fold_convert_loc (clause_loc, pointer_sized_int_node,
> +  x);
> +   yb = fold_build2_loc (clause_loc, MINUS_EXPR,
> + pointer_sized_int_node, yb, bias);
> +   x = fold_convert_loc (clause_loc, TREE_TYPE (x), yb);
> yb = create_tmp_var (ptype, name);
> gimplify_assign (yb, x, ilist);
> x = yb;
> --- libgomp/testsuite/libgomp.c/reduction-11.c.jj 2015-11-05 
> 16:03:53.0 +0100
> +++ libgomp/testsuite/libgomp.c/reduction-11.c2015-11-20 
> 13:38:24.448520879 +0100
> @@ -1,4 +1,4 @@
> -/* { dg-do run { xfail *-*-* } } */
> +/* { dg-do run } */
>  
>  char z[10] = { 0 };
>  
> --- libgomp/testsuite/libgomp.c/reduction-12.c.jj 2015-11-05 
> 16:03:53.0 +0100
> +++ libgomp/testsuite/libgomp.c/reduction-12.c2015-11-20 
> 13:38:34.565378078 +0100
> @@ -1,4 +1,4 @@
> -/* { dg-do run { xfail *-*-* } } */
> +/* { dg-do run } */
>  
>  struct A { int t; };
>  struct B { char t; };
> --- libgomp/testsuite/libgomp.c++/reduction-11.C.jj   2015-11-05 
> 16:03:53.0 +0100
> +++ libgomp/testsuite/libgomp.c++/reduction-11.C  2015-11-20 
> 13:37:53.921951766 +0100
> @@ -1,4 +1,4 @@
> -// { dg-do run { xfail *-*-* } }
> +// { dg-do run }
>  
>  char z[10] = { 0 };
>  
> --- libgomp/testsuite/libgomp.c++/reduction-12.C.jj   2015-11-05 
> 16:03:53.0 +0100
> +++ libgomp/testsuite/libgomp.c++/reduction-12.C  2015-11-20 
> 13:38:03.983809741 +0100
> @@ -1,4 +1,4 @@
> -// { dg-do run { xfail *-*-* } }
> +// { dg-do run }
>  
>  template 
>  struct A
> 
>   Jakub
> 
> 

-- 
Richard Biener 
SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 
21284 (AG Nuernberg)


Re: [PATCH] Fix PR objc/68438 (uninitialized source ranges)

2015-11-23 Thread Joseph Myers
On Sun, 22 Nov 2015, David Malcolm wrote:

> Is there (or could there be) a precanned dg- directive to ask if ObjC is
> available?  

I don't think so.  Normal practice is that each language's tests are in 
appropriate directories for that language, with runtest never called with 
a --tool option for that language if it wasn't built.

-- 
Joseph S. Myers
jos...@codesourcery.com


Re: [Ada] Introduce a Frontend_Exceptions flag in system.ads

2015-11-23 Thread Olivier Hainque

> On Nov 23, 2015, at 12:02 , Olivier Hainque  wrote:
> Then all the system.ads files will be updated with a correct value of the
> Frontend_Exceptions flags.

Here's the patch.



eh-flags-rts.diff
Description: Binary data


Re: Enable pointer TBAA for LTO

2015-11-23 Thread Eric Botcazou
> You are right, TYPE_NONALIASED_COMPONENT is the wrong way.  I will fix it
> and try to come up with a testcase (TYPE_NONALIASED_COMPONENT is quite
> rarely used beast)

It's only used in Ada as far as I know, but is quite sensitive and quickly 
leads to wrong code if not handled properly in my experience, so this could 
well be responsible for the gnat1 miscompilation.

-- 
Eric Botcazou


Re: [PATCH,RFC] Introduce RUN_UNDER_VALGRIND in test-suite

2015-11-23 Thread Martin Liška
On 11/21/2015 05:26 AM, Hans-Peter Nilsson wrote:
> On Thu, 19 Nov 2015, Martin Li?ka wrote:
>> Hello.
>>
>> In last two weeks I've removed couple of memory leaks, mainly tight to 
>> middle-end.
>> Currently, a user of the GCC compiler can pass '--enable-checking=valgrind' 
>> configure option
>> that will run all commands within valgrind environment, but as the valgrind 
>> runs just with '-q' option,
>> the result is not very helpful.
>>
>> I would like to start with another approach, where we can run all tests in 
>> test-suite
>> within the valgrind sandbox and return an exit code if there's an error seen 
>> by the tool.
>> That unfortunately leads to many latent (maybe false positives, FE issues, 
>> ...) that can
>> be efficiently ignored by valgrind suppressions file (the file is part of 
>> suggested patch).
>>
>> The first version of the valgrind.supp can survive running compilation of 
>> tramp3d with -O2
>> and majority of tests in test-suite can successfully finish. Most of memory 
>> leaks
>> mentioned in the file can be eventually fixed.
> 
> I didn't quite understand the need for the suppression files.
> Is it like Markus said, only because valgrind annotations are
> not on by default?  Then let's change it so that's the default
> during DEV-PHASE = experimental (the development phase) or
> prerelease.  I really thought that was the case by now.
> (The suppression files are IMHO a useful addition to contrib/
> either way.)

Hi.

Well, the original motivation was to basically to fill up the file with all 
common
errors (known issues) and to fix all newly introduced issues. That can minimize
the number of errors reported by the tool.

However, as I run complete test-suite for all default languages, I've seen:

== Statistics ==
Total number of errors: 249615
Number of different errors: 5848

Where two errors are different if they produce either different message or 
back-backtrace.
For complete list of errors (sorted by # of occurrences), download:

https://docs.google.com/uc?authuser=0=0B0pisUJ80pO1MENrWXBzak5naFk=download

> 
>> As I noticed in results log files, most of remaining issues are connected to 
>> gcc.c and
>> lto-wrapper.c files. gcc.c heavily manipulates with strings and it would 
>> probably require
>> usage of a string pool, that can easily eventually removed (just in case of 
>> --enable-valgrind-annotations).
>> The second source file tends to produce memory leaks because of fork/exec 
>> constructs. However both
>> can be improved during next stage1.
>>
>> Apart from aforementioned issues, the compiler does not contain so many 
>> issues and I think it's
>> doable to prune them and rely on reported valgrind errors.
>>
>> Patch touches many .exp files, but basically does just couple of 
>> modifications:
>>
>> 1) gcc-defs.exp introduces new global variable run_under_valgrind
>> 2) new procedure dg-run-valgrind distinguishes between just passing options 
>> to 'gd-test',
>>or runs 'dg-test' with additional flags that enable valgrind (using 
>> -wrapper)
>> 3) new procedure dg-runtest-valgrind does the similar
>> 4) many changes in corresponding *.exp files that utilize these procedures
>>
>> The patch should be definitely part of next stage1, but I would appreciate 
>> any thoughts
>> about the described approach?
> 
> IIRC you can replace the actual dg-runtest proc with your own
> (implementing a wrapper).  Grep aroung, I think we do that
> already.  That's certainly preferable instead of touching all
> callers.

You are right, the suggested patch was over-kill, wrapper should be fine for 
that.
Currently I've been playing with a bit different approach (suggested by Markus),
where I would like to enable valgrind in gcc.c using an environmental variable.

Question is if it should replace existing ENABLE_VALGRIND_CHECKING and how to
integrate it with a valgrind suppressions file?

Ideas are highly welcomed.

Thanks,
Martin

> 
>>
>> Thank you,
>> Martin
> 
> brgds, H-P
> 

>From f0b211e4194e11e5ad52fa3b295a62f67b4060b8 Mon Sep 17 00:00:00 2001
From: marxin 
Date: Fri, 20 Nov 2015 09:46:09 +0100
Subject: [PATCH] Initial version of valgrind wrapper

---
 contrib/gcc.supp | 108 +++
 gcc/gcc.c|  51 +++---
 2 files changed, 139 insertions(+), 20 deletions(-)
 create mode 100644 contrib/gcc.supp

diff --git a/contrib/gcc.supp b/contrib/gcc.supp
new file mode 100644
index 000..deefb28
--- /dev/null
+++ b/contrib/gcc.supp
@@ -0,0 +1,108 @@
+{
+   cpp_get_buff
+   Memcheck:Leak
+   match-leak-kinds: possible
+   fun:malloc
+   fun:xmalloc
+   fun:new_buff
+   fun:_cpp_get_buff
+   ...
+}
+{
+   gnu-as
+   Memcheck:Leak
+   match-leak-kinds: definite,possible
+   fun:malloc
+   ...
+   obj:/usr/bin/as
+   ...
+}
+{
+   gnu-as
+   Memcheck:Leak
+   match-leak-kinds: definite,possible
+   fun:calloc
+   fun:xcalloc
+   ...
+   obj:/usr/bin/as
+   ...
+}
+{
+   todo-fix-mpfr
+  

Re: [PATCH, PR68460] Always call free_stmt_vec_info_vec in gather_scalar_reductions

2015-11-23 Thread Richard Biener
On Fri, Nov 20, 2015 at 4:57 PM, Tom de Vries  wrote:
> [ was: Re: [PATCH] Fix parloops gimple_uid usage ]
>
> On 09/10/15 23:09, Tom de Vries wrote:
>>
>> @@ -2392,6 +2397,9 @@ gather_scalar_reductions (loop_p loop,
>> reduction_info_table_type *reduction_list
>> loop_vec_info simple_inner_loop_info = NULL;
>> bool allow_double_reduc = true;
>>
>> +  if (!stmt_vec_info_vec.exists ())
>> +init_stmt_vec_info_vec ();
>> +
>> simple_loop_info = vect_analyze_loop_form (loop);
>> if (simple_loop_info == NULL)
>>   return;
>> @@ -2453,9 +2461,16 @@ gather_scalar_reductions (loop_p loop,
>> reduction_info_table_type *reduction_list
>> destroy_loop_vec_info (simple_loop_info, true);
>> destroy_loop_vec_info (simple_inner_loop_info, true);
>>
>> +  /* Release the claim on gimple_uid.  */
>> +  free_stmt_vec_info_vec ();
>> +
>
>
> With the src/libgomp/testsuite/libgomp.c/pr46886.c testcase, compiled in
> addition with -ftree-vectorize, I ran into an ICE:
> ...
> src/libgomp/testsuite/libgomp.c/pr46886.c:8:5: internal compiler error: in
> init_stmt_vec_info_vec, at tree-vect-stmts.c:8250
>  int foo (void)
>  ^~~
>
> 0x1196082 init_stmt_vec_info_vec()
> src/gcc/tree-vect-stmts.c:8250
> 0x11c3ed4 vectorize_loops()
> src/gcc/tree-vectorizer.c:510
> 0x10a7ea5 execute
> src/gcc/tree-ssa-loop.c:276
> ...
>
> The ICE is caused by the fact that init_stmt_vec_info_vec is called at the
> start of vectorize_loops, while stmt_vec_info_vec is not empty. I traced
> this back to gather_scalar_reduction, where we call init_stmt_vec_info_vec,
> but we skip free_stmt_vec_info_vec if we take the early-out for
> simple_loop_info == NULL.
>
> This patch fixes the ICE by making sure we always call
> free_stmt_vec_info_vec in gather_scalar_reduction.
>
> Passes cc1/f951 rebuild and autopar testing.
>
> OK for stage3 trunk if bootstrap and regtest succeeds?

Ok.

Richard.

> Thanks,
> - Tom


Re: [Patch, vrp] Allow VRP type conversion folding only for widenings upto word mode

2015-11-23 Thread Richard Biener
On Fri, 20 Nov 2015, Jeff Law wrote:

> On 11/20/2015 10:04 AM, Senthil Kumar Selvaraj wrote:
> > On Thu, Nov 19, 2015 at 10:31:41AM -0700, Jeff Law wrote:
> > > On 11/18/2015 11:20 PM, Senthil Kumar Selvaraj wrote:
> > > > On Wed, Nov 18, 2015 at 09:36:21AM +0100, Richard Biener wrote:
> > > > > 
> > > > > Otherwise ok.
> > > > 
> > > > See modified patch below. If you think vrp98.c is unnecessary, feel free
> > > > to dump it :).
> > > > 
> > > > If ok, could you commit it for me please? I don't have commit access.
> > > > 
> > > > Regards
> > > > Senthil
> > > > 
> > > > gcc/ChangeLog
> > > > 2015-11-19  Senthil Kumar Selvaraj  
> > > > 
> > > > * tree.h (desired_pro_or_demotion_p): New function.
> > > > * tree-vrp.c (simplify_cond_using_ranges): Call it.
> > > > 
> > > > gcc/testsuite/ChangeLog
> > > > 2015-11-19  Senthil Kumar Selvaraj  
> > > > 
> > > > * gcc.dg/tree-ssa/vrp98.c: New testcase.
> > > > * gcc.target/avr/uint8-single-reg.c: New testcase.
> > > I went ahead and committed this as-is.
> > > 
> > > I do think the vrp98 testcase is useful as it verifies that VRP is doing
> > > what we want in a target independent way.  It's a good complement to the
> > > AVR
> > > specific testcase.
> > 
> > I see the same problem on gcc-5-branch as well. Would it be ok to
> > backport the fix to that branch as well?
> That's a call for the release managers.  I typically don't backport anything
> expect ICE or incorrect code generation fixes as I tend to be very
> conservative on what goes onto a release branch.
> 
> Jakub, Richi or Joseph would need to ack into a release branch.

As this is fixes a regression it qualifies in principle.  But as
it is an optimization regression only I'd prefer to wait a bit to look
for fallout.

Richard.

> jeff
> 
> 

-- 
Richard Biener 
SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 
21284 (AG Nuernberg)


Re: [PATCH] Fix GC ICE during simd clone creation (PR middle-end/68339)

2015-11-23 Thread Richard Biener
On Fri, Nov 20, 2015 at 9:03 PM, Jakub Jelinek  wrote:
> Hi!
>
> node->get_body () can run various IPA passes and ggc_collect in them

Aww.  Looks like we never implemented that ggc_defer_collecting idea
(don't remember the context this popped up, maybe it was when we
introduced TODO_do_not_ggc_collect).  At least late IPA passes
might be affected by this issue as well.

Richard.

>, so
> it is undesirable to hold pointers to GC memory in automatic vars over it.
> While I could store those vars (clone_info, clone and id) into special GTY
> vars just to avoid collecting them, it seems easier to call node->get_body
> () earlier.
>
> Bootstrapped/regtested on x86_64-linux and i686-linux, committed to trunk
> and 5 branch.
>
> 2015-11-20  Jakub Jelinek  
>
> PR middle-end/68339
> * omp-low.c (expand_simd_clones): Call node->get_body () before
> allocating stuff in GC.
>
> * gcc.dg/vect/pr68339.c: New test.
>
> --- gcc/omp-low.c.jj2015-11-18 11:19:19.0 +0100
> +++ gcc/omp-low.c   2015-11-20 12:56:17.075193601 +0100
> @@ -18319,6 +18319,10 @@ expand_simd_clones (struct cgraph_node *
>&& TYPE_ARG_TYPES (TREE_TYPE (node->decl)) == NULL_TREE)
>  return;
>
> +  /* Call this before creating clone_info, as it might ggc_collect.  */
> +  if (node->definition && node->has_gimple_body_p ())
> +node->get_body ();
> +
>do
>  {
>/* Start with parsing the "omp declare simd" attribute(s).  */
> --- gcc/testsuite/gcc.dg/vect/pr68339.c.jj  2015-11-20 13:10:47.756905395 
> +0100
> +++ gcc/testsuite/gcc.dg/vect/pr68339.c 2015-11-20 13:08:13.0 +0100
> @@ -0,0 +1,17 @@
> +/* PR middle-end/68339 */
> +/* { dg-do compile } */
> +/* { dg-options "--param ggc-min-heapsize=0 --param ggc-min-expand=0 
> -fopenmp-simd" } */
> +
> +#pragma omp declare simd notinbranch
> +int
> +f1 (int x)
> +{
> +  return x;
> +}
> +
> +#pragma omp declare simd notinbranch
> +int
> +f2 (int x)
> +{
> +  return x;
> +}
>
> Jakub


Re: [PATCH, 10/16] Add pass_oacc_kernels pass group in passes.def

2015-11-23 Thread Richard Biener
On Fri, 20 Nov 2015, Tom de Vries wrote:

> On 20/11/15 14:29, Richard Biener wrote:
> > I agree it's somewhat of an odd behavior but all passes should
> > either be placed in a sub-pipeline with an outer
> > loop_optimizer_init()/finalize () call or call both themselves.
> 
> Hmm, but adding loop_optimizer_finalize at the end of pass_lim breaks the loop
> pipeline.
> 
> We could use the style used in pass_slp_vectorize::execute:
> ...
> pass_slp_vectorize::execute (function *fun)
> {
>   basic_block bb;
> 
>   bool in_loop_pipeline = scev_initialized_p ();
>   if (!in_loop_pipeline)
> {
>   loop_optimizer_init (LOOPS_NORMAL);
>   scev_initialize ();
> }
> 
>   ...
> 
>   if (!in_loop_pipeline)
> {
>   scev_finalize ();
>   loop_optimizer_finalize ();
> }
> ...
> 
> Although that doesn't strike me as particularly clean.

At least it would be a consistent "unclean" style.  So yes, the
above would work for me.

Thanks,
Richard.


Re: [PATCH, PR68337] Don't fold memcpy/memmove we want to instrument

2015-11-23 Thread Ilya Enkovich
On 23 Nov 10:39, Richard Biener wrote:
> On Fri, Nov 20, 2015 at 3:30 PM, Ilya Enkovich  wrote:
> > On 20 Nov 14:54, Richard Biener wrote:
> >> On Fri, Nov 20, 2015 at 2:08 PM, Ilya Enkovich  
> >> wrote:
> >> > On 19 Nov 18:19, Richard Biener wrote:
> >> >> On November 19, 2015 6:12:30 PM GMT+01:00, Bernd Schmidt 
> >> >>  wrote:
> >> >> >On 11/19/2015 05:31 PM, Ilya Enkovich wrote:
> >> >> >> Currently we fold all memcpy/memmove calls with a known data size.
> >> >> >> It causes two problems when used with Pointer Bounds Checker.
> >> >> >> The first problem is that we may copy pointers as integer data
> >> >> >> and thus loose bounds.  The second problem is that if we inline
> >> >> >> memcpy, we also have to inline bounds copy and this may result
> >> >> >> in a huge amount of code and significant compilation time growth.
> >> >> >> This patch disables folding for functions we want to instrument.
> >> >> >>
> >> >> >> Does it look reasonable for trunk and GCC5 branch?  Bootstrapped
> >> >> >> and regtested on x86_64-unknown-linux-gnu.
> >> >> >
> >> >> >Can't see anything wrong with it. Ok.
> >> >>
> >> >> But for small sizes this can have a huge impact on optimization.  Which 
> >> >> is why we have the code in the first place.  I'd make the check less 
> >> >> broad, for example inlining copies of size less than a pointer 
> >> >> shouldn't be affected.
> >> >
> >> > Right.  We also may inline in case we know no pointers are copied.  
> >> > Below is a version with extended condition and a couple more tests.  
> >> > Bootstrapped and regtested on x86_64-unknown-linux-gnu.  Does it OK for 
> >> > trunk and gcc-5-branch?
> >> >
> >> >>
> >> >> Richard.
> >> >>
> >> >> >
> >> >> >Bernd
> >> >>
> >> >>
> >> >
> >> > Thanks,
> >> > Ilya
> >> > --
> >> > gcc/
> >> >
> >> > 2015-11-20  Ilya Enkovich  
> >> >
> >> > * gimple-fold.c (gimple_fold_builtin_memory_op): Don't
> >> > fold call if we are going to instrument it and it may
> >> > copy pointers.
> >> >
> >> > gcc/testsuite/
> >> >
> >> > 2015-11-20  Ilya Enkovich  
> >> >
> >> > * gcc.target/i386/mpx/pr68337-1.c: New test.
> >> > * gcc.target/i386/mpx/pr68337-2.c: New test.
> >> > * gcc.target/i386/mpx/pr68337-3.c: New test.
> >> >
> >> >
> >> > diff --git a/gcc/gimple-fold.c b/gcc/gimple-fold.c
> >> > index 1ab20d1..dd9f80b 100644
> >> > --- a/gcc/gimple-fold.c
> >> > +++ b/gcc/gimple-fold.c
> >> > @@ -53,6 +53,8 @@ along with GCC; see the file COPYING3.  If not see
> >> >  #include "gomp-constants.h"
> >> >  #include "optabs-query.h"
> >> >  #include "omp-low.h"
> >> > +#include "tree-chkp.h"
> >> > +#include "ipa-chkp.h"
> >> >
> >> >
> >> >  /* Return true when DECL can be referenced from current unit.
> >> > @@ -664,6 +666,23 @@ gimple_fold_builtin_memory_op (gimple_stmt_iterator 
> >> > *gsi,
> >> >unsigned int src_align, dest_align;
> >> >tree off0;
> >> >
> >> > +  /* Inlining of memcpy/memmove may cause bounds lost (if we copy
> >> > +pointers as wide integer) and also may result in huge function
> >> > +size because of inlined bounds copy.  Thus don't inline for
> >> > +functions we want to instrument in case pointers are copied.  */
> >> > +  if (flag_check_pointer_bounds
> >> > + && chkp_instrumentable_p (cfun->decl)
> >> > + /* Even if data may contain pointers we can inline if copy
> >> > +less than a pointer size.  */
> >> > + && (!tree_fits_uhwi_p (len)
> >> > + || compare_tree_int (len, POINTER_SIZE_UNITS) >= 0)
> >>
> >> || tree_to_uhwi (len) >= POINTER_SIZE_UNITS
> >>
> >> > + /* Check data type for pointers.  */
> >> > + && (!TREE_TYPE (src)
> >> > + || !TREE_TYPE (TREE_TYPE (src))
> >> > + || VOID_TYPE_P (TREE_TYPE (TREE_TYPE (src)))
> >> > + || chkp_type_has_pointer (TREE_TYPE (TREE_TYPE (src)
> >>
> >> I don't think you can in any way rely on the pointer type of the src 
> >> argument
> >> as all pointer conversions are useless and memcpy and friends take void *
> >> anyway.
> >
> > This check is looking for cases when we have type information indicating
> > no pointers are copied.  In case of 'void *' we have to assume pointers
> > are copied and inlining is undesired.  Test pr68337-2.c checks pointer
> > type allows to enable inlining.  Looks like this check misses
> > || !COMPLETE_TYPE_P(TREE_TYPE (TREE_TYPE (src)))?
> 
> As said there is no information in the pointer / pointed-to type in GIMPLE.

What does it mean?  We do have TREE_TYPE for used pointer and nested TREE_TYPE
holding pointed-to type.  Is it some random invalid type?

> 
> >>
> >> Note that you also disable memmove to memcpy simplification with this
> >> early check.
> >
> > Doesn't matter for MPX which uses the same implementation for both cases.
> 

Re: RFA: PATCH to match.pd for c++/68385

2015-11-23 Thread Richard Biener
On Sat, Nov 21, 2015 at 7:57 PM, Marc Glisse  wrote:
> On Sat, 21 Nov 2015, Richard Biener wrote:
>
>> On November 20, 2015 8:58:15 PM GMT+01:00, Jason Merrill
>>  wrote:
>>>
>>> In this bug, we hit the (A & sign-bit) != 0 -> A < 0 transformation.
>>> Because of delayed folding, the operands aren't fully folded yet, so we
>>>
>>> have NOP_EXPRs around INTEGER_CSTs, and so calling wi::only_sign_bit_p
>>> ICEs.  We've been seeing several similar bugs, where code calls
>>> integer_zerop and therefore assumes that they have an INTEGER_CST, but
>>> in fact integer_zerop does STRIP_NOPS.
>>>
>>> This patch changes the pattern to only match if the operand is actually
>>>
>>> an INTEGER_CST.  Alternatively we could call tree_strip_nop_conversions
>>>
>>> on the operand, but I would expect that to have issues when the
>>> conversion changes the signedness of the type.
>>>
>>> OK if testing passes?
>>
>>
>> What happens if we remove the nops stripping from integer_zerop?
>
>
> I had the same reaction.
>
>> Do other integer predicates strip nops?
>
>
> Yes, they do.
>
> I believe I added one or two of those, and the reason I added STRIP_NOPS is
> because they started as a copy-paste of integer_zerop...

Ok...

Jason, from looking at the PRs backtrace I see the C++ FE does things like

  if (complain & tf_warning)
warn_logical_operator (loc, code, boolean_type_node,
   code_orig_arg1, fold (arg1),
   code_orig_arg2, fold (arg2));

but that's in principle a no-no, if arg1s operands are not folded.
Delayed folding needs
to happen recursively, bottom-up.  Folders generally do not expect
unfolded operands
like (int) 1.

There is c-common.c:c_fully_fold () which does this properly but with

  /* This function is not relevant to C++ because C++ folds while
 parsing, and may need changes to be correct for C++ when C++
 stops folding while parsing.  */
  if (c_dialect_cxx ())
gcc_unreachable ();

not sure if the C++ FE can re-use this for the diagnostic cases.

Richard.



> --
> Marc Glisse


Re: [gomp4.1] Handle new form of #pragma omp declare target

2015-11-23 Thread Jakub Jelinek
On Mon, Nov 23, 2015 at 12:31:24PM +0100, Thomas Schwinge wrote:
> Hi Jakub!
> 
> On Fri, 17 Jul 2015 15:05:59 +0200, Jakub Jelinek  wrote:
> > [...] "omp declare target link" [...]
> 
> > This patch only marks them with the new attribute, [...]
> 
> > --- gcc/c/c-parser.c.jj 2015-07-16 18:09:25.0 +0200
> > +++ gcc/c/c-parser.c2015-07-17 14:11:08.553694975 +0200
> 
> >  static void
> >  c_parser_omp_declare_target (c_parser *parser)
> >  {
> > [...]
> > +  for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
> > +{
> > +  tree t = OMP_CLAUSE_DECL (c), id;
> > +  tree at1 = lookup_attribute ("omp declare target", DECL_ATTRIBUTES 
> > (t));
> > +  tree at2 = lookup_attribute ("omp declare target link",
> > +  DECL_ATTRIBUTES (t));
> > +  if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINK)
> > +   {
> > + id = get_identifier ("omp declare target link");
> > + std::swap (at1, at2);
> > +   }
> > +  else
> > +   id = get_identifier ("omp declare target");
> 
> Is it intentional that you didn't add "omp declare target link" to
> gcc/c-family/c-common.c:c_common_attribute_table, next to the existing
> "omp declare target"?

No.  But the link attribute support is still unfinished, Ilya is working on
the support.

Jakub


Re: [PATCH, 4/16] Implement -foffload-alias

2015-11-23 Thread Richard Biener
On Sat, 21 Nov 2015, Tom de Vries wrote:

> On 13/11/15 12:39, Jakub Jelinek wrote:
> > On Fri, Nov 13, 2015 at 12:29:51PM +0100, Richard Biener wrote:
> > > > thanks for the explanation. Filed as PR68331 - '[meta-bug] fipa-pta
> > > > issues'.
> > > > 
> > > > Any feedback on the '#pragma GCC offload-alias=' bit
> > > > above?
> > > > Is that sort of what you had in mind?
> > > 
> > > Yes.  Whether that makes sense is another question of course.  You can
> > > annotate memory references with MR_DEPENDENCE_BASE/CLIQUE yourself
> > > as well if you know dependences without the users intervention.
> > 
> > I really don't like even the GCC offload-alias, I just don't see anything
> > special on the offload code.  Not to mention that the same issue is already
> > with other outlined functions, like OpenMP tasks or parallel regions, those
> > aren't offloaded, yet they can suffer from worse alias/points-to analysis
> > too.
> 
> AFAIU there is one aspect that is different for offloaded code: the setup of
> the data on the device.
> 
> Consider this example:
> ...
> unsigned int a[N];
> unsigned int b[N];
> unsigned int c[N];
> 
> int
> main (void)
> {
>   ...
> 
> #pragma acc kernels copyin (a) copyin (b) copyout (c)
>   {
> for (COUNTERTYPE ii = 0; ii < N; ii++)
>   c[ii] = a[ii] + b[ii];
>   }
> 
>   ...
> ...
> 
> At gimple level, we have:
> ...
> #pragma omp target oacc_kernels \
>   map(force_from:c [len: 2097152]) \
>   map(force_to:b [len: 2097152]) \
>   map(force_to:a [len: 2097152])
> ...
> 
> [ The meaning of the force_from/force_to mappings is given in
> include/gomp-constants.h:
> ...
> /* Allocate.  */
> GOMP_MAP_FORCE_ALLOC = (GOMP_MAP_FLAG_FORCE | GOMP_MAP_ALLOC),
> /* ..., and copy to device.  */
> GOMP_MAP_FORCE_TO = (GOMP_MAP_FLAG_FORCE | GOMP_MAP_TO),
> /* ..., and copy from device.  */
> GOMP_MAP_FORCE_FROM = (GOMP_MAP_FLAG_FORCE | GOMP_MAP_FROM),
> /* ..., and copy to and from device.  */
> GOMP_MAP_FORCE_TOFROM = (GOMP_MAP_FLAG_FORCE | GOMP_MAP_TOFROM),
> ...  ]
> 
> So before calling the offloaded function, a separate alloc is done for a, b
> and c, and the base pointers of the newly allocated objects are passed to the
> offloaded function.
> 
> This means we can mark those base pointers as restrict in the offloaded
> function.
> 
> Attached proof-of-concept patch implements that.
> 
> > We simply have some compiler internal interface between the caller and
> > callee of the outlined regions, each interface in between those has
> > its own structure type used to communicate the info;
> > we can attach attributes on the fields, or some flags to indicate some
> > properties interesting from aliasing POV.
> > We don't really need to perform
> > full IPA-PTA, perhaps it would be enough to a) record somewhere in cgraph
> > the relationship in between such callers and callees (for offloading regions
> > we already have "omp target entrypoint" attribute on the callee and a
> > singler caller), tell LTO if possible not to split those into different
> > partitions if easily possible, and then just for these pairs perform
> > aliasing/points-to analysis in the caller and the result record using
> > cliques/special attributes/whatever to the callee side, so that the callee
> > (outlined OpenMP/OpenACC/Cilk+ region) can then improve its alias analysis.
> 
> As a start, is the approach of this patch OK?

Works for me but leaving to Jakub to review for correctness.

Richard.

> It will allow us to commit the oacc kernels patch series with the ability to
> parallelize non-trivial testcases, and work on improving the alias bit after
> that.
> 
> Thanks,
> - Tom
> 
> 
> 
> 

-- 
Richard Biener 
SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 
21284 (AG Nuernberg)


Re: [PATCH v2] Add uaddv_optab, usubv4_optab

2015-11-23 Thread Richard Henderson

On 11/22/2015 05:57 PM, Segher Boessenkool wrote:

Hi Richard,

On Sun, Nov 22, 2015 at 11:38:31AM +0100, Richard Henderson wrote:

One of which I believe I've worked around in the i386 backend, but I
believe to be a latent problem within combine.

With the following patch, disable the add3_*_overflow_2 patterns.
Then compile c-c++-common/torture/builtin-arith-overflow-4.c with -O2 and
you'll see

  t151_2add:
testb   %dil, %dil
leal-1(%rdi), %eax
jne .L644



0xff + x < 0xff  (everything as unsigned char) is the same as  x != 0 .


You'd think yes.  But certainly something right there triggered the abort that 
fails the test case.  Perhaps I simply mis-identified the error, but the "fix" 
for this fixed the other as well.



r~



Re: [AArch64][dejagnu][PATCH 5/7] Dejagnu support for ARMv8.1 Adv.SIMD.

2015-11-23 Thread James Greenhalgh
On Tue, Oct 27, 2015 at 03:32:04PM +, Matthew Wahab wrote:
> On 24/10/15 08:16, Bernhard Reutner-Fischer wrote:
> >On October 23, 2015 2:24:26 PM GMT+02:00, Matthew Wahab 
> > wrote:
> >>The ARMv8.1 architecture extension adds two Adv.SIMD instructions,.
> >>This
> >>patch adds support in Dejagnu for ARMv8.1 Adv.SIMD specifiers and
> >>checks.
> >>
> >>The new test options are
> >>- { dg-add-options arm_v8_1a_neon }: Add compiler options needed to
> >>   enable ARMv8.1 Adv.SIMD.
> >>- { dg-require-effective-target arm_v8_1a_neon_hw }: Require a target
> >>   capable of executing ARMv8.1 Adv.SIMD instructions.
> >>
> >
> >Please error with something more meaningful than FOO, !__ARM_FEATURE_QRDMX 
> >comes to mind.
> >
> >TIA,
> >
> 
> I've reworked the patch so that the error is "__ARM_FEATURE_QRDMX not
> defined" and also strengthened the check_effective_target tests.
> 
> Retested for aarch64-none-elf with cross-compiled check-gcc on an
> ARMv8.1 emulator. Also tested with a version of the compiler that
> doesn't define the ACLE feature macro.

Hi Matthew,

I have a couple of comments below. Neither need to block the patch, but
I'd appreciate a reply before I say OK.

> From b12969882298cb79737e882c48398c58a45161b9 Mon Sep 17 00:00:00 2001
> From: Matthew Wahab 
> Date: Mon, 26 Oct 2015 14:58:36 +
> Subject: [PATCH 5/7] [Testsuite] Add dejagnu options for armv8.1 neon
> 
> Change-Id: Ib58b8c4930ad3971af3ea682eda043e14cd2e8b3
> ---
>  gcc/testsuite/lib/target-supports.exp | 56 
> ++-
>  1 file changed, 55 insertions(+), 1 deletion(-)
> 
> diff --git a/gcc/testsuite/lib/target-supports.exp 
> b/gcc/testsuite/lib/target-supports.exp
> index 4d5b0a3d..0fb679d 100644
> --- a/gcc/testsuite/lib/target-supports.exp
> +++ b/gcc/testsuite/lib/target-supports.exp
> @@ -2700,6 +2700,16 @@ proc add_options_for_arm_v8_neon { flags } {
>  return "$flags $et_arm_v8_neon_flags -march=armv8-a"
>  }
>  
> +# Add the options needed for ARMv8.1 Adv.SIMD.
> +
> +proc add_options_for_arm_v8_1a_neon { flags } {
> +if { [istarget aarch64*-*-*] } {
> + return "$flags -march=armv8.1-a"

Should this be -march=armv8.1-a+simd or some other feature flag?

> +} else {
> + return "$flags"
> +}
> +}
> +
>  proc add_options_for_arm_crc { flags } {
>  if { ! [check_effective_target_arm_crc_ok] } {
>  return "$flags"
> @@ -2984,7 +2994,8 @@ foreach { armfunc armflag armdef } { v4 "-march=armv4 
> -marm" __ARM_ARCH_4__
>v7r "-march=armv7-r" __ARM_ARCH_7R__
>v7m "-march=armv7-m -mthumb" 
> __ARM_ARCH_7M__
>v7em "-march=armv7e-m -mthumb" 
> __ARM_ARCH_7EM__
> -  v8a "-march=armv8-a" __ARM_ARCH_8A__ } {
> +  v8a "-march=armv8-a" __ARM_ARCH_8A__
> +  v8_1a "-march=armv8.1a" __ARM_ARCH_8A__ } {
>  eval [string map [list FUNC $armfunc FLAG $armflag DEF $armdef ] {
>   proc check_effective_target_arm_arch_FUNC_ok { } {
>   if { [ string match "*-marm*" "FLAG" ] &&
> @@ -3141,6 +3152,25 @@ proc check_effective_target_arm_neonv2_hw { } {
>  } [add_options_for_arm_neonv2 ""]]
>  }
>  
> +# Return 1 if the target supports the ARMv8.1 Adv.SIMD extension, 0
> +# otherwise.  The test is valid for AArch64.
> +
> +proc check_effective_target_arm_v8_1a_neon_ok_nocache { } {
> +if { ![istarget aarch64*-*-*] } {
> + return 0
> +}
> +return [check_no_compiler_messages_nocache arm_v8_1a_neon_ok assembly {
> + #if !defined (__ARM_FEATURE_QRDMX)
> + #error "__ARM_FEATURE_QRDMX not defined"
> + #endif
> +} [add_options_for_arm_v8_1a_neon ""]]
> +}
> +
> +proc check_effective_target_arm_v8_1a_neon_ok { } {
> +return [check_cached_effective_target arm_v8_1a_neon_ok \
> + check_effective_target_arm_v8_1a_neon_ok_nocache]
> +}
> +
>  # Return 1 if the target supports executing ARMv8 NEON instructions, 0
>  # otherwise.
>  
> @@ -3159,6 +3189,30 @@ proc check_effective_target_arm_v8_neon_hw { } {
>  } [add_options_for_arm_v8_neon ""]]
>  }
>  
> +# Return 1 if the target supports executing the ARMv8.1 Adv.SIMD extension, 0
> +# otherwise.  The test is valid for AArch64.
> +
> +proc check_effective_target_arm_v8_1a_neon_hw { } {
> +if { ![check_effective_target_arm_v8_1a_neon_ok] } {
> + return 0;
> +}
> +return [check_runtime_nocache arm_v8_1a_neon_hw_available {
> + int
> + main (void)
> + {
> +   long long a = 0, b = 1;
> +   long long result = 0;
> +
> +   asm ("sqrdmlah %s0,%s1,%s2"
> +: "=w"(result)
> +: "w"(a), "w"(b)
> +: /* No clobbers.  */);

Hm, those types look wrong, I guess this works but it is an unusual way
to write it. I presume this is to avoid including arm_neon.h each time, but
you 

[gomp4] Merge trunk r230274 (2015-11-12) into gomp-4_0-branch

2015-11-23 Thread Thomas Schwinge
Hi!

Committed to gomp-4_0-branch in r230749:

commit 4002b8b54d3e1e9ac049446339fc02e3fd192f43
Merge: 018ba48 5902f28
Author: tschwinge 
Date:   Mon Nov 23 10:41:31 2015 +

svn merge -r 230255:230274 svn+ssh://gcc.gnu.org/svn/gcc/trunk


git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/gomp-4_0-branch@230749 
138bc75d-0d04-0410-961f-82ee72b054a4


Grüße
 Thomas


signature.asc
Description: PGP signature


Re: [PATCH] Avoid false vector mask conversion

2015-11-23 Thread Ilya Enkovich
Ping

2015-11-13 16:17 GMT+03:00 Ilya Enkovich :
> 2015-11-13 13:03 GMT+03:00 Richard Biener :
>> On Thu, Nov 12, 2015 at 5:08 PM, Ilya Enkovich  
>> wrote:
>>> Hi,
>>>
>>> When we use LTO for fortran we may have a mix 32bit and 1bit scalar 
>>> booleans. It means we may have conversion of one scalar type to another 
>>> which confuses vectorizer because values with different scalar boolean type 
>>> may get the same vectype.
>>
>> Confuses aka fails to vectorize?
>
> Right.
>
>>
>>>  This patch transforms such conversions into comparison.
>>>
>>> I managed to make a small fortran test which gets vectorized with this 
>>> patch but I didn't find how I can run fortran test with LTO and then scan 
>>> tree dump to check it is vectorized.  BTW here is a loop from the test:
>>>
>>>   real*8 a(18)
>>>   logical b(18)
>>>   integer i
>>>
>>>   do i=1,18
>>>  if(a(i).gt.0.d0) then
>>> b(i)=.true.
>>>  else
>>> b(i)=.false.
>>>  endif
>>>   enddo
>>
>> This looks the the "error" comes from if-conversion - can't we do
>> better there then?
>
> No, this loop is transformed into a single BB before if-conversion by
> cselim + phiopt.
>
> Ilya
>
>>
>> Richard.
>>
>>> Bootstrapped and tested on x86_64-unknown-linux-gnu.  OK for trunk?
>>>
>>> Thanks,
>>> Ilya


Re: Fix lto-symtab ICE during Ada LTO bootstrap

2015-11-23 Thread Eric Botcazou
> No, Interfaces.C.Extensions is non portable, so almost no Ada code out there
> is using it. As I said, existing Ada code is using System.Address all the
> time, so requiring any code change in this area is just a non starter. We'd
> rather require that people don't use LTO with Ada rather than tell them to
> use Interfaces.C.Extensions, that would be more constructive :-)

I see, too bad that Interfaces.C doesn't define something along these lines.

In practice I'm not sure it's a big issue, because presumably it's essentially 
used to interface the C library and AFAIK we don't LTO the C library (yet).
And I presume that, if people start interfacing in Ada with C code doing heavy 
pointer manipulation, we can also point them to Interfaces.C.Pointers. :-)

-- 
Eric Botcazou


Re: [PATCH 1/2] Libsanitizer merge from upstream r253555.

2015-11-23 Thread Maxim Ostapenko

+ Adhemerval

Christophe, it looks like your kernel headers (asm/ptrace.h) don't 
contain ARM_VFPREGS_SIZE. Do you use old kernel version?


-Maxim

On 23/11/15 15:16, Christophe Lyon wrote:

On 23 November 2015 at 09:07, Jakub Jelinek  wrote:

On Mon, Nov 23, 2015 at 10:46:33AM +0300, Maxim Ostapenko wrote:

Index: libsanitizer/configure.ac
===
--- libsanitizer/configure.ac (revision 230597)
+++ libsanitizer/configure.ac (working copy)
@@ -136,6 +136,12 @@
  esac
  AM_CONDITIONAL(USING_MAC_INTERPOSE, $MAC_INTERPOSE)

+case "$target" in
+  aarch64-*-linux*) tsan_aarch64=true ;;
+  *) tsan_aarch64=false ;;
+esac
+AM_CONDITIONAL(TSAN_AARCH64, $tsan_aarch64)
+

I don't understand the purpose of the above.


Index: libsanitizer/configure.tgt
===
--- libsanitizer/configure.tgt(revision 230597)
+++ libsanitizer/configure.tgt(working copy)
@@ -37,6 +37,8 @@
aarch64*-*-linux*)
   if test x$ac_cv_sizeof_void_p = x8; then
   TSAN_SUPPORTED=yes
+ LSAN_SUPPORTED=yes
+ TSAN_TARGET_DEPENDENT_OBJECTS=tsan_rtl_aarch64.lo
   fi
   ;;
x86_64-*-darwin[1]* | i?86-*-darwin[1]*)

You already have this.


Index: libsanitizer/tsan/Makefile.am
===
--- libsanitizer/tsan/Makefile.am (revision 230597)
+++ libsanitizer/tsan/Makefile.am (working copy)
@@ -21,6 +21,8 @@
   tsan_interface_atomic.cc \
   tsan_interface.cc \
   tsan_interface_java.cc \
+ tsan_libdispatch_mac.cc \
+ tsan_malloc_mac.cc \
   tsan_md5.cc \
   tsan_mman.cc \
   tsan_mutex.cc \
@@ -28,6 +30,7 @@
   tsan_new_delete.cc \
   tsan_platform_linux.cc \
   tsan_platform_mac.cc \
+ tsan_platform_posix.cc \
   tsan_platform_windows.cc \
   tsan_report.cc \
   tsan_rtl.cc \
@@ -41,7 +44,11 @@
   tsan_sync.cc

  libtsan_la_SOURCES = $(tsan_files)
+if TSAN_AARCH64
+EXTRA_libtsan_la_SOURCES = tsan_rtl_aarch64.S
+else
  EXTRA_libtsan_la_SOURCES = tsan_rtl_amd64.S
+endif

And if I understand automake manual, you can list in there both
EXTRA_libtsan_la_SOURCES = tsan_rtl_amd64.S tsan_rtl_aarch64.S
unconditionally, and what object actually gets linked in is picked from the
$(TSAN_TARGET_DEPENDENT_OBJECTS) (and similarly dependencies).

Otherwise LGTM.

Since this commit (r230739), I've noticed that the arm*linux* builds fail:
libtool: compile:
/tmp/1800227_1.tmpdir/aci-gcc-fsf/builds/gcc-fsf-gccsrc/obj-arm-none-linux-gnueabi/gcc3/./gcc/xgcc
-shared-libgcc -B/tmp/1800227_1.tmpdir/aci-gcc-fsf/builds/g
cc-fsf-gccsrc/obj-arm-none-linux-gnueabi/gcc3/./gcc -nostdinc++
-L/tmp/1800227_1.tmpdir/aci-gcc-fsf/builds/gcc-fsf-gccsrc/obj-arm-none-linux-gnueabi/gcc3/arm-none-linux-gnueabi/
libstdc++-v3/src
-L/tmp/1800227_1.tmpdir/aci-gcc-fsf/builds/gcc-fsf-gccsrc/obj-arm-none-linux-gnueabi/gcc3/arm-none-linux-gnueabi/libstdc++-v3/src/.libs
-L/tmp/1800227_1.tmpdir/
aci-gcc-fsf/builds/gcc-fsf-gccsrc/obj-arm-none-linux-gnueabi/gcc3/arm-none-linux-gnueabi/libstdc++-v3/libsupc++/.libs
-B/tmp/1800227_1.tmpdir/aci-gcc-fsf/builds/gcc-fsf-gccsrc/t
ools/arm-none-linux-gnueabi/bin/
-B/tmp/1800227_1.tmpdir/aci-gcc-fsf/builds/gcc-fsf-gccsrc/tools/arm-none-linux-gnueabi/lib/
-isystem /tmp/1800227_1.tmpdir/aci-gcc-fsf/builds/gc
c-fsf-gccsrc/tools/arm-none-linux-gnueabi/include -isystem
/tmp/1800227_1.tmpdir/aci-gcc-fsf/builds/gcc-fsf-gccsrc/tools/arm-none-linux-gnueabi/sys-include
-D_GNU_SOURCE -D_DEBU
G -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS
-D__STDC_LIMIT_MACROS -DHAVE_RPC_XDR_H=1 -DHAVE_TIRPC_RPC_XDR_H=0 -I.
-I/tmp/1800227_1.tmpdir/aci-gcc-fsf/sources/gcc-fsf/gccsr
c/libsanitizer/sanitizer_common -I.. -I
/tmp/1800227_1.tmpdir/aci-gcc-fsf/sources/gcc-fsf/gccsrc/libsanitizer/include
-isystem /tmp/1800227_1.tmpdir/aci-gcc-fsf/sources/gcc-fsf/
gccsrc/libsanitizer/include/system -Wall -W -Wno-unused-parameter
-Wwrite-strings -pedantic -Wno-long-long -fPIC -fno-builtin
-fno-exceptions -fno-rtti -fomit-frame-pointer -fun
wind-tables -fvisibility=hidden -Wno-variadic-macros
-I../../libstdc++-v3/include
-I../../libstdc++-v3/include/arm-none-linux-gnueabi
-I/tmp/1800227_1.tmpdir/aci-gcc-fsf/sources
/gcc-fsf/gccsrc/libsanitizer/../libstdc++-v3/libsupc++ -std=gnu++11
-DSANITIZER_LIBBACKTRACE -DSANITIZER_CP_DEMANGLE -I
/tmp/1800227_1.tmpdir/aci-gcc-fsf/sources/gcc-fsf/gccsrc/
libsanitizer/../libbacktrace -I ../libbacktrace -I
/tmp/1800227_1.tmpdir/aci-gcc-fsf/sources/gcc-fsf/gccsrc/libsanitizer/../include
-include /tmp/1800227_1.tmpdir/aci-gcc-fsf/so
urces/gcc-fsf/gccsrc/libsanitizer/libbacktrace/backtrace-rename.h -g
-O2 -D_GNU_SOURCE -MT sanitizer_platform_limits_posix.lo -MD -MP -MF
.deps/sanitizer_platform_limits_posix.T
po -c 
/tmp/1800227_1.tmpdir/aci-gcc-fsf/sources/gcc-fsf/gccsrc/libsanitizer/sanitizer_common/sanitizer_platform_limits_posix.cc
  -fPIC 

Re: [OpenACC] declare directive

2015-11-23 Thread Thomas Schwinge
Hi Jim!

A few things I noticed when working on merging your trunk r230275 into
gomp-4_0-branch.  Please fix these (on trunk).

| --- gcc/c-family/c-pragma.h
| +++ gcc/c-family/c-pragma.h

| @@ -176,7 +178,8 @@ enum pragma_omp_clause {
|PRAGMA_OACC_CLAUSE_FIRSTPRIVATE = PRAGMA_OMP_CLAUSE_FIRSTPRIVATE,
|PRAGMA_OACC_CLAUSE_IF = PRAGMA_OMP_CLAUSE_IF,
|PRAGMA_OACC_CLAUSE_PRIVATE = PRAGMA_OMP_CLAUSE_PRIVATE,
| -  PRAGMA_OACC_CLAUSE_REDUCTION = PRAGMA_OMP_CLAUSE_REDUCTION
| +  PRAGMA_OACC_CLAUSE_REDUCTION = PRAGMA_OMP_CLAUSE_REDUCTION,
| +  PRAGMA_OACC_CLAUSE_LINK = PRAGMA_OMP_CLAUSE_LINK
|  };

Maintain alphabetic sorting (as it had been present on gomp-4_0-branch, I
think?).

| --- gcc/c/c-parser.c
| +++ gcc/c/c-parser.c

| @@ -10018,6 +10023,8 @@ c_parser_omp_clause_name (c_parser *parser)
|   result = PRAGMA_OMP_CLAUSE_DEVICE;
| else if (!strcmp ("deviceptr", p))
|   result = PRAGMA_OACC_CLAUSE_DEVICEPTR;
| +   else if (!strcmp ("device_resident", p))
| + result = PRAGMA_OACC_CLAUSE_DEVICE_RESIDENT;
| else if (!strcmp ("dist_schedule", p))
|   result = PRAGMA_OMP_CLAUSE_DIST_SCHEDULE;

Lower-case "device_resident" sorts before "deviceptr".

| @@ -10454,10 +10461,16 @@ c_parser_oacc_data_clause (c_parser *parser, 
pragma_omp_clause c_kind,
|  case PRAGMA_OACC_CLAUSE_DEVICE:
|kind = GOMP_MAP_FORCE_TO;
|break;
| +case PRAGMA_OACC_CLAUSE_DEVICE_RESIDENT:
| +  kind = GOMP_MAP_DEVICE_RESIDENT;
| +  break;
|  case PRAGMA_OACC_CLAUSE_HOST:
|  case PRAGMA_OACC_CLAUSE_SELF:
|kind = GOMP_MAP_FORCE_FROM;
|break;
| +case PRAGMA_OACC_CLAUSE_LINK:
| +  kind = GOMP_MAP_LINK;
| +  break;
|  case PRAGMA_OACC_CLAUSE_PRESENT:
|kind = GOMP_MAP_FORCE_PRESENT;
|break;

Update accepted syntax comment for c_parser_oacc_data_clause function (as
present on gomp-4_0-branch).

| --- gcc/cp/parser.c
| +++ gcc/cp/parser.c
| @@ -29128,6 +29128,8 @@ cp_parser_omp_clause_name (cp_parser *parser)
|   result = PRAGMA_OMP_CLAUSE_DEVICE;
| else if (!strcmp ("deviceptr", p))
|   result = PRAGMA_OACC_CLAUSE_DEVICEPTR;
| +   else if (!strcmp ("device_resident", p))
| + result = PRAGMA_OACC_CLAUSE_DEVICE_RESIDENT;
| else if (!strcmp ("dist_schedule", p))
|   result = PRAGMA_OMP_CLAUSE_DIST_SCHEDULE;

As in gcc/c/c-parser.c.

| @@ -29541,10 +29543,16 @@ cp_parser_oacc_data_clause (cp_parser *parser, 
pragma_omp_clause c_kind,
|  case PRAGMA_OACC_CLAUSE_DEVICE:
|kind = GOMP_MAP_FORCE_TO;
|break;
| +case PRAGMA_OACC_CLAUSE_DEVICE_RESIDENT:
| +  kind = GOMP_MAP_DEVICE_RESIDENT;
| +  break;
|  case PRAGMA_OACC_CLAUSE_HOST:
|  case PRAGMA_OACC_CLAUSE_SELF:
|kind = GOMP_MAP_FORCE_FROM;
|break;
| +case PRAGMA_OACC_CLAUSE_LINK:
| +  kind = GOMP_MAP_LINK;
| +  break;
|  case PRAGMA_OACC_CLAUSE_PRESENT:
|kind = GOMP_MAP_FORCE_PRESENT;
|break;

Likewise.

| +static tree
| +cp_parser_oacc_declare (cp_parser *parser, cp_token *pragma_tok)
| +{
| +  tree clauses, stmt, t;
| +  bool error = false;
| +
| +  clauses = cp_parser_oacc_all_clauses (parser, OACC_DECLARE_CLAUSE_MASK,
| + "#pragma acc declare", pragma_tok, 
true);
| +
| +
| +  if (find_omp_clause (clauses, OMP_CLAUSE_MAP) == NULL_TREE)
| +{
| +  error_at (pragma_tok->location,
| + "no valid clauses specified in %<#pragma acc declare%>");

The last parameter to cp_parser_oacc_all_clauses, true, is the default
anyway, and is not specified at other call sites, so remove that.  Also
remove the following double blank lines.

In the C front end, instead of find_omp_clause OMP_CLAUSE_MAP, you just
check for "!clauses" -- unless there is a reason to be different here,
for uniformity settle on one variant.

That said, even if it doesn't make sense, is it actually a hard error to
not specify any clauses with the declare directive?

| --- gcc/gimplify.c
| +++ gcc/gimplify.c

| +/* Return true if global var DECL is device resident.  */
| +
| +static bool
| +device_resident_p (tree decl)

I suggest to improve that function's very generic name, and its
descriptive comment.  Without more context, the casual reader will not
understand what "device resident" means, for example.  At least note that
this relates to the OpenACC declare directive, or something like that.

| +{
| +  tree attr = lookup_attribute ("oacc declare target", DECL_ATTRIBUTES 
(decl));
| +
| +  if (!attr)
| +return false;

As discussed in

already, for "oacc declare" used in an earlier version of this patch:
there is no "oacc declare target" attribute defined/ever set, so I
suspect device_resident_p will always return false, and thus isn't doing
what it's intended to be doing?

| +
| +  for (tree t = TREE_VALUE 

Re: [Patch] PR68137, drop constant overflow flag in adjust_range_with_scev when possible

2015-11-23 Thread Richard Biener
On Mon, 23 Nov 2015, Jiong Wang wrote:

> As reported by pr68137 and pr68326, r230150 caused new issues.
> 
> Those ICEs are caused by adjust_range_with_scev getting range with
> overflowed constants min or max. So given there are too many places to
> generate OVF, we do a check in adjust_range_with_scev, to drop OVF flag
> when it's uncessary. This should fix the OVF side-effect caused by
> r230150.
> 
> A simple regression testcase is included in this patch.
> 
> bootstrap OK on x86-64 and aarch64, regression ok on both.
> 
> For more background, please see discussion at
> 
>   https://gcc.gnu.org/bugzilla/show_bug.cgi?id=68317
> 
> OK for trunk?

Ok.

Thanks,
Richard.

> 2015-11-23  Richard Biener  
> Jiong Wang  
> 
> gcc/
>   PR tree-optimization/68137
>   PR tree-optimization/68326
>   * tree-vrp.c (adjust_range_with_scev): Call drop_tree_overflow if the
>   final min and max are not infinity.
> 
> gcc/testsuite/
>   * gcc.dg/pr68139.c: New testcase.


Re: [PATCH 1/2] Libsanitizer merge from upstream r253555.

2015-11-23 Thread Christophe Lyon
On 23 November 2015 at 09:07, Jakub Jelinek  wrote:
> On Mon, Nov 23, 2015 at 10:46:33AM +0300, Maxim Ostapenko wrote:
>> Index: libsanitizer/configure.ac
>> ===
>> --- libsanitizer/configure.ac (revision 230597)
>> +++ libsanitizer/configure.ac (working copy)
>> @@ -136,6 +136,12 @@
>>  esac
>>  AM_CONDITIONAL(USING_MAC_INTERPOSE, $MAC_INTERPOSE)
>>
>> +case "$target" in
>> +  aarch64-*-linux*) tsan_aarch64=true ;;
>> +  *) tsan_aarch64=false ;;
>> +esac
>> +AM_CONDITIONAL(TSAN_AARCH64, $tsan_aarch64)
>> +
>
> I don't understand the purpose of the above.
>
>> Index: libsanitizer/configure.tgt
>> ===
>> --- libsanitizer/configure.tgt(revision 230597)
>> +++ libsanitizer/configure.tgt(working copy)
>> @@ -37,6 +37,8 @@
>>aarch64*-*-linux*)
>>   if test x$ac_cv_sizeof_void_p = x8; then
>>   TSAN_SUPPORTED=yes
>> + LSAN_SUPPORTED=yes
>> + TSAN_TARGET_DEPENDENT_OBJECTS=tsan_rtl_aarch64.lo
>>   fi
>>   ;;
>>x86_64-*-darwin[1]* | i?86-*-darwin[1]*)
>
> You already have this.
>
>> Index: libsanitizer/tsan/Makefile.am
>> ===
>> --- libsanitizer/tsan/Makefile.am (revision 230597)
>> +++ libsanitizer/tsan/Makefile.am (working copy)
>> @@ -21,6 +21,8 @@
>>   tsan_interface_atomic.cc \
>>   tsan_interface.cc \
>>   tsan_interface_java.cc \
>> + tsan_libdispatch_mac.cc \
>> + tsan_malloc_mac.cc \
>>   tsan_md5.cc \
>>   tsan_mman.cc \
>>   tsan_mutex.cc \
>> @@ -28,6 +30,7 @@
>>   tsan_new_delete.cc \
>>   tsan_platform_linux.cc \
>>   tsan_platform_mac.cc \
>> + tsan_platform_posix.cc \
>>   tsan_platform_windows.cc \
>>   tsan_report.cc \
>>   tsan_rtl.cc \
>> @@ -41,7 +44,11 @@
>>   tsan_sync.cc
>>
>>  libtsan_la_SOURCES = $(tsan_files)
>> +if TSAN_AARCH64
>> +EXTRA_libtsan_la_SOURCES = tsan_rtl_aarch64.S
>> +else
>>  EXTRA_libtsan_la_SOURCES = tsan_rtl_amd64.S
>> +endif
>
> And if I understand automake manual, you can list in there both
> EXTRA_libtsan_la_SOURCES = tsan_rtl_amd64.S tsan_rtl_aarch64.S
> unconditionally, and what object actually gets linked in is picked from the
> $(TSAN_TARGET_DEPENDENT_OBJECTS) (and similarly dependencies).
>
> Otherwise LGTM.

Since this commit (r230739), I've noticed that the arm*linux* builds fail:
libtool: compile:
/tmp/1800227_1.tmpdir/aci-gcc-fsf/builds/gcc-fsf-gccsrc/obj-arm-none-linux-gnueabi/gcc3/./gcc/xgcc
-shared-libgcc -B/tmp/1800227_1.tmpdir/aci-gcc-fsf/builds/g
cc-fsf-gccsrc/obj-arm-none-linux-gnueabi/gcc3/./gcc -nostdinc++
-L/tmp/1800227_1.tmpdir/aci-gcc-fsf/builds/gcc-fsf-gccsrc/obj-arm-none-linux-gnueabi/gcc3/arm-none-linux-gnueabi/
libstdc++-v3/src
-L/tmp/1800227_1.tmpdir/aci-gcc-fsf/builds/gcc-fsf-gccsrc/obj-arm-none-linux-gnueabi/gcc3/arm-none-linux-gnueabi/libstdc++-v3/src/.libs
-L/tmp/1800227_1.tmpdir/
aci-gcc-fsf/builds/gcc-fsf-gccsrc/obj-arm-none-linux-gnueabi/gcc3/arm-none-linux-gnueabi/libstdc++-v3/libsupc++/.libs
-B/tmp/1800227_1.tmpdir/aci-gcc-fsf/builds/gcc-fsf-gccsrc/t
ools/arm-none-linux-gnueabi/bin/
-B/tmp/1800227_1.tmpdir/aci-gcc-fsf/builds/gcc-fsf-gccsrc/tools/arm-none-linux-gnueabi/lib/
-isystem /tmp/1800227_1.tmpdir/aci-gcc-fsf/builds/gc
c-fsf-gccsrc/tools/arm-none-linux-gnueabi/include -isystem
/tmp/1800227_1.tmpdir/aci-gcc-fsf/builds/gcc-fsf-gccsrc/tools/arm-none-linux-gnueabi/sys-include
-D_GNU_SOURCE -D_DEBU
G -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS
-D__STDC_LIMIT_MACROS -DHAVE_RPC_XDR_H=1 -DHAVE_TIRPC_RPC_XDR_H=0 -I.
-I/tmp/1800227_1.tmpdir/aci-gcc-fsf/sources/gcc-fsf/gccsr
c/libsanitizer/sanitizer_common -I.. -I
/tmp/1800227_1.tmpdir/aci-gcc-fsf/sources/gcc-fsf/gccsrc/libsanitizer/include
-isystem /tmp/1800227_1.tmpdir/aci-gcc-fsf/sources/gcc-fsf/
gccsrc/libsanitizer/include/system -Wall -W -Wno-unused-parameter
-Wwrite-strings -pedantic -Wno-long-long -fPIC -fno-builtin
-fno-exceptions -fno-rtti -fomit-frame-pointer -fun
wind-tables -fvisibility=hidden -Wno-variadic-macros
-I../../libstdc++-v3/include
-I../../libstdc++-v3/include/arm-none-linux-gnueabi
-I/tmp/1800227_1.tmpdir/aci-gcc-fsf/sources
/gcc-fsf/gccsrc/libsanitizer/../libstdc++-v3/libsupc++ -std=gnu++11
-DSANITIZER_LIBBACKTRACE -DSANITIZER_CP_DEMANGLE -I
/tmp/1800227_1.tmpdir/aci-gcc-fsf/sources/gcc-fsf/gccsrc/
libsanitizer/../libbacktrace -I ../libbacktrace -I
/tmp/1800227_1.tmpdir/aci-gcc-fsf/sources/gcc-fsf/gccsrc/libsanitizer/../include
-include /tmp/1800227_1.tmpdir/aci-gcc-fsf/so
urces/gcc-fsf/gccsrc/libsanitizer/libbacktrace/backtrace-rename.h -g
-O2 -D_GNU_SOURCE -MT sanitizer_platform_limits_posix.lo -MD -MP -MF
.deps/sanitizer_platform_limits_posix.T
po -c 
/tmp/1800227_1.tmpdir/aci-gcc-fsf/sources/gcc-fsf/gccsrc/libsanitizer/sanitizer_common/sanitizer_platform_limits_posix.cc
 -fPIC -DPIC -o 

Re: [PATCH 1/2] Libsanitizer merge from upstream r253555.

2015-11-23 Thread Jakub Jelinek
On Mon, Nov 23, 2015 at 03:33:57PM +0300, Maxim Ostapenko wrote:
> + Adhemerval
> 
> Christophe, it looks like your kernel headers (asm/ptrace.h) don't contain
> ARM_VFPREGS_SIZE. Do you use old kernel version?

Unlike LLVM, we do care to support older kernel headers.
So, if it is say a define, you could add
libsanitizer/include/system/linux/ptrace.h
or
libsanitizer/include/system/asm/ptrace.h
that would #include_next the original header and ifdef __arm__ and
that define is not defined (or some other condition, kernel version etc.),
define it.

Jakub


Re: [PATCH 1/2] Libsanitizer merge from upstream r253555.

2015-11-23 Thread Christophe Lyon
On 23 November 2015 at 13:41, Jakub Jelinek  wrote:
> On Mon, Nov 23, 2015 at 03:33:57PM +0300, Maxim Ostapenko wrote:
>> + Adhemerval
>>
>> Christophe, it looks like your kernel headers (asm/ptrace.h) don't contain
>> ARM_VFPREGS_SIZE. Do you use old kernel version?
>

Yes, I do use old kernel headers.
I could upgrade them, but I tend to avoid changing versions (binutils,
glibc, newlib, kernel headers) unless really necessary.

> Unlike LLVM, we do care to support older kernel headers.
> So, if it is say a define, you could add
> libsanitizer/include/system/linux/ptrace.h
> or
> libsanitizer/include/system/asm/ptrace.h
> that would #include_next the original header and ifdef __arm__ and
> that define is not defined (or some other condition, kernel version etc.),
> define it.
>
> Jakub

So, given Jakub's answer I'll not upgrade them yet on my side :-)


[PATCH, C++] Wrap OpenACC wait in EXPR_STMT

2015-11-23 Thread Chung-Lin Tang
The OpenACC wait directive is represented as a call to the runtime
function "GOACC_wait" instead of a tree code.  I am seeing when
'#pragma acc wait' is using inside a template function, the CALL_EXPR
to GOACC_wait is being silently ignored/removed during tsubst_expr().

I think the correct way to organize this is that the call should be inside
an EXPR_STMT, so here's a patch to do that; basically remove the
add_stmt() call from the shared c_finish_oacc_wait() code, and add
add_stmt()/finish_expr_stmt() in the corresponding C/C++ parts.

Tested with no regressions on trunk, okay to commit?

Thanks,
Chung-Lin

* c-family/c-omp.c (c_finish_oacc_wait): Remove add_stmt() call.
* c/c-parser.c (c_parser_oacc_wait): Add add_stmt() call.
* cp/parser.c (cp_parser_oacc_wait): Add finish_expr_stmt() call.
Index: c-family/c-omp.c
===
--- c-family/c-omp.c(revision 230703)
+++ c-family/c-omp.c(working copy)
@@ -63,7 +63,6 @@ c_finish_oacc_wait (location_t loc, tree parms, tr
 }
 
   stmt = build_call_expr_loc_vec (loc, stmt, args);
-  add_stmt (stmt);
 
   vec_free (args);
 
Index: c/c-parser.c
===
--- c/c-parser.c(revision 230703)
+++ c/c-parser.c(working copy)
@@ -13886,6 +13886,7 @@ c_parser_oacc_wait (location_t loc, c_parser *pars
   strcpy (p_name, " wait");
   clauses = c_parser_oacc_all_clauses (parser, OACC_WAIT_CLAUSE_MASK, p_name);
   stmt = c_finish_oacc_wait (loc, list, clauses);
+  add_stmt (stmt);
 
   return stmt;
 }
Index: cp/parser.c
===
--- cp/parser.c (revision 230703)
+++ cp/parser.c (working copy)
@@ -34930,6 +34930,7 @@ cp_parser_oacc_wait (cp_parser *parser, cp_token *
"#pragma acc wait", pragma_tok);
 
   stmt = c_finish_oacc_wait (loc, list, clauses);
+  stmt = finish_expr_stmt (stmt);
 
   return stmt;
 }


[PATCH] Add testcase for PR68482

2015-11-23 Thread Richard Biener

Committed.

Richard.

2015-11-23  Richard Biener  

PR tree-optimization/68482
* gcc.dg/vect/pr68482.c: New testcase.

Index: gcc/testsuite/gcc.dg/vect/pr68482.c
===
--- gcc/testsuite/gcc.dg/vect/pr68482.c (revision 0)
+++ gcc/testsuite/gcc.dg/vect/pr68482.c (working copy)
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_int } */
+
+void test(int* input, int* out, unsigned x1, unsigned x2)
+{
+  unsigned i, j;
+  unsigned end = x1;
+
+  for(i = j = 0; i < 1000; i++) {
+  int sum = 0;
+  end += x2;
+  for( ; j < end; j++)
+   sum += input[j];
+  out[i] = sum;
+  }
+}
+
+/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" } } */


Re: [Aarch64] Use vector wide add for mixed-mode adds

2015-11-23 Thread James Greenhalgh
On Sun, Nov 22, 2015 at 06:24:19PM -0700, Michael Collison wrote:
> 
> 
> On 11/22/2015 8:48 AM, James Greenhalgh wrote:
> >On Sun, Nov 08, 2015 at 11:51:47PM -0700, Michael Collison wrote:
> >>2015-11-06  Michael Collison 
> >> * config/aarch64/aarch64-simd.md (widen_ssum, widen_usum)
> >>(aarch64_w_internal): New patterns
> >> * config/aarch64/iterators.md (Vhalf, VDBLW): New mode attributes.
> >> * gcc.target/aarch64/saddw-1.c: New test.
> >> * gcc.target/aarch64/saddw-2.c: New test.
> >> * gcc.target/aarch64/uaddw-1.c: New test.
> >> * gcc.target/aarch64/uaddw-2.c: New test.
> >> * gcc.target/aarch64/uaddw-3.c: New test.
> >> * lib/target-support.exp
> >> (check_effective_target_vect_widen_sum_hi_to_si_pattern):
> >> Add aarch64 to list of support targets.
> >
> >These hunks are all OK (with the minor style comments below applied).
> 
> Okay I will update with your comments.
> >
> >As we understand what's happening here, let's take the regressions below
> >for now and add AArch64 to the targets affected by pr68333.
> >
> >> * gcc.dg/vect/slp-multitypes-4.c: Disable test for
> >> targets with widening adds from V8HI=>V4SI.
> >> * gcc.dg/vect/slp-multitypes-5.c: Ditto.
> >> * gcc.dg/vect/vect-125.c: Ditto.
> >Let's leave these for now, while we wait for pr68333.
> 
> To clarify you would like me to exclude these bits from the patch?

Yes, given the direction that pr68333 is going (a bug that should be
fixed, rather than an expected failure) that seems best to me.

Thanks,
James
 


Re: [RFC] Cse reducing performance of register allocation with -O2

2015-11-23 Thread Dominik Vogt
On Tue, Oct 13, 2015 at 11:06:48AM -0600, Jeff Law wrote:
> On 10/13/2015 07:12 AM, Dominik Vogt wrote:
> >In some cases, the work of the cse1 pass is counterproductive, as
> >we noticed on s390x.  The effect described below is present since
> >at least 4.8.0.  Note that this may not become manifest in a
> >performance issue problem on all platforms.  Also note that -O1
> >does not show this behaviour because the responsible code is only
> >executed with -O2 or higher.
> >
> >The core of the problem is the was cse1 sometimes handles function
> >parameters.  Roughly, the observed situation is
> >
> >Before cse1
> >
> >   start of function
> >   set pseudoreg Rp to the first argument from hardreg R2
> >   (some code that uses Rp)
> >   set R2 to Rp
> >
> >After cse1:
> >
> >   start of function
> >   set pseudoreg Rp to the first argument from hardreg R2
> >   (some code that uses Rp)  <--- The use of Rp is still present
> >   set R2 to R2  <--- cse1 has replaced Rp with R2
> >
> >After that, the set pattern is removed completely, and now we have
> >both, Rp and R2 live in the drafted code snippet.  Because R2 ist
> >still supposed to be live later on, the ira pass chooses a
> >different hard register (R1) for Rp, and code to copy R1 back to
> >R2 is added later.  (See further down for Rtl and assembly code.)
...
> >So, I've made an experimental hack (see attachment) and treid
> >that.  In a larger test suite, register copies could be saved in
> >quite some places (including the test program below), but in other
> >places new register copies were introduced, resulting in about
> >twice as much "issues" as without the patch.
> >
> >Maybe the patch is just too coarse.  In general I'd assume that
> >the register allocator does a better job of assigning hard
> >registers to pseudo registers.  Is it possible to better describe
> >when cse1 should keep its hands off pseudo registers?
> We don't really have a way to describe this.
> 
> I know Vlad looked at problems in this space -- essentially knowing
> when two registers had the same value in the allocators/reload and
> exploiting that information.
> 
> My recollection was it didn't help in any measurable way -- I think
> he discussed it during one of the old GCC summit conferences.  That
> was also in the reload era.
> 
> Ultimately this feels like all the issues around coalescing and
> copy-propagation. With that in mind, if we had lifetime & conflict
> information, then we'd be able to query that and perhaps be able to
> make different choices.

I've spent some more time to try out the naive approach of
detecting this situation in cse_insn().

1. In cse_insn()

  IF current "set" is "set Hardreg H := Pseudoreg P"
  AND  P is generated as a copy of C further up in the extended BB
  AND  P and H still contain the same value
  AND  Cse considers to replace the set with "set H := H"
  AND  P is still live at the end of the EBB
   (In the test program this prevents that *all uses of P are
   replaced by H.)
  THEN do not replace

  => Testing this with the Spec 2006 suite on S390 results in a
  small gain in some cases, a small loss im lots of cases, and a
  substantial win in two cases and a substantial loss in one.  On
  average there is a small win.  I've not tested that on x86, but
  assuming that x86 does not suffer from the original problem I
  expect to see mostly losses.

  This patch requires that a per-register bitmap is created for
  each EBB to record which pseude registers have been generated
  inside the EBB.

2. 

  IF current "set" is "set Hardreg H := Pseudoreg P"
  AND  P is generated as a copy of C further up in the extended BB
  AND  P and H still contain the same value
  AND  Cse considers to replace the set with "set H := H"
  AND  P is still live at the end of the EBB
  AND  P is used between generation and the current instruction.
  THEN do not replace

  => Has fewer win and fewer loss situations and is only slightly
 better on average than (1).  No real improvement.

  This patch requires scanning every insn in cse_insn() for all
  uses of all pseudo registers.  At the moment there is no
  function in rtlanal.c to do this in one call, so I've just
  scanned for each one individually, causing a dramatic increase
  of compile time (* 2 or even more).

So, my conclusion is that the attempt to fix this by patching
cse_insn() is more or less futile.  Replacing the pseudo register
with thte hard register early is actually often a *good* thing,
and to determine whether it's good or bad the code in cse_insn()
would have to correctly guess what later passes do.

Ciao

Dominik ^_^  ^_^

-- 

Dominik Vogt
IBM Germany



Re: [PATCH/RFC] C++ FE: expression ranges (v2)

2015-11-23 Thread Richard Biener
On Sat, Nov 21, 2015 at 9:21 AM, Jakub Jelinek  wrote:
> On Sat, Nov 21, 2015 at 02:16:49AM -0500, Jason Merrill wrote:
>> On 11/19/2015 03:46 PM, Jason Merrill wrote:
>> >On 11/15/2015 12:01 AM, David Malcolm wrote:
>> >>As with the C frontend, there's an issue with tree nodes that
>> >>don't have locations: VAR_DECL, INTEGER_CST, etc:
>> >>
>> >>   int test (int foo)
>> >>   {
>> >> return foo * 100;
>> >>^^^   ^^^
>> >>   }
>> >>
>> >>where we'd like to access the source spelling ranges of the expressions
>> >>during parsing, so that we can use them when reporting parser errors.
>> >
>> >Hmm, I had been thinking to address this in the C++ front end by
>> >wrapping uses in another tree: NOP_EXPR for rvalues, VIEW_CONVERT_EXPR
>> >for lvalues.
>>
>> On the other hand, my direction seems likely to cause more issues,
>> especially with code that doesn't yet know how to handle VIEW_CONVERT_EXPR,
>> and could create ambiguity with explicit conversions.  So I guess your
>> approach seems reasonable.
>
> But your approach would allow better diagnostics even in places where you
> don't have the structures with tree, location_t pairs around.  With that
> it will be limited solely to the parser and nothing else, so even template
> instantiation if it is something that can be only detected when
> instantiating would be too late.
>
> I think using a new tree (rather than using NOP_EXPR/VIEW_CONVERT_EXPR)
> that would be just some expression with location and teaching the FE and
> folder about it might be even better.

Agreed.  Note that we already have NON_LVALUE_EXPR and fold-const.c uses
that to stick locations on things that cannot have them.

OTOH I would like to get rid of NON_LVALUE_EXPR in the middle-end (and thus
fold-const.c).

Richard.

> Jakub


Re: Fix lto-symtab ICE during Ada LTO bootstrap

2015-11-23 Thread Arnaud Charlet
> I updated the warning to actually check if the TBAA information is in
> conflict
> and silence warnings on allowed type transtions that are not
> useless_type_conversion_p
> (which is needed for Fortran, too). This is list of warnings I get which I
> suppose will need to be adressed.

Most if not all these warnings are cases of System.Address used as void*

So there is indeed no point in trying to fix one or two cases, and we should
instead instruct LTO somehow to treat System.Address is compatible with void*
otherwise we'll run into endless troubles on that since using System.Address
as void* is very common practice in Ada code.

Arno


Re: [PATCH] Check NULL loop->latch in verify_loop_structure

2015-11-23 Thread Richard Biener
On Mon, 23 Nov 2015, Tom de Vries wrote:

> Hi,
> 
> In verify_loop_structure, we stop checking the latch once we find that it's
> NULL.
> 
> This patch tries a bit harder:
> - if !LOOPS_MAY_HAVE_MULTIPLE_LATCHES, we don't allow a NULL latch
> - if LOOPS_MAY_HAVE_MULTIPLE_LATCHES, we check that indeed there's no
>   single loop latch.
> 
> As a consequence of adding this check, I needed to fix expand_omp_for_generic,
> which missed an initialization of a loop latch.
> 
> Bootstrapped and reg-tested on x86_64.
> 
> OK for stage3 trunk?

You miss to catch the case where loop->latch is non-NULL but there
are multiple latches, so I think the patch can be improved.
That case is more important for correctness (passes
seeing ->latch non-NULL assume a single latch).

Thanks,
Richard.

> Thanks,
> - Tom
> 

-- 
Richard Biener 
SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 
21284 (AG Nuernberg)


Re: [PATCH 2/2] Libsanitizer merge from upstream r253555.

2015-11-23 Thread Jakub Jelinek
On Mon, Nov 23, 2015 at 10:48:30AM +0300, Maxim Ostapenko wrote:
> Index: gcc/testsuite/c-c++-common/asan/halt_on_error-1.c
> ===
> --- gcc/testsuite/c-c++-common/asan/halt_on_error-1.c (revision 0)
> +++ gcc/testsuite/c-c++-common/asan/halt_on_error-1.c (working copy)
> @@ -0,0 +1,23 @@
> +/* Test recovery mode.  */
> +/* { dg-do run } */
> +/* { dg-options "-fsanitize-recover=address" } */
> +/* { dg-set-target-env-var ASAN_OPTIONS "halt_on_error=false" } */
> +
> +#include 
> +
> +volatile int ten = 10;
> +
> +int main() {
> +  char x[10];
> +  memset(x, 0, 11);

Please use ten + 1 instead of 11 here.
With -fsanitize=address, there will be padding immediately after the
variable, therefore otherwise (if the compiler does not see we are
running into undefined behavior) the testcase might be ok.

> --- gcc/testsuite/c-c++-common/asan/halt_on_error-2.c (revision 0)
> +++ gcc/testsuite/c-c++-common/asan/halt_on_error-2.c (working copy)
> @@ -0,0 +1,24 @@
> +/* Test recovery mode.  */
> +/* { dg-do run } */
> +/* { dg-options "-fsanitize-recover=address" } */
> +/* { dg-set-target-env-var ASAN_OPTIONS "halt_on_error=true" } */
> +/* { dg-shouldfail "asan" } */
> +
> +#include 
> +
> +volatile int ten = 10;
> +
> +int main() {
> +  char x[10];
> +  memset(x, 0, 11);

Likewise.

Otherwise OK for trunk.

Jakub


RE: [PING][Patch] Add support for IEEE-conformant versions of scalar fmin* and fmax*

2015-11-23 Thread David Sherwood
Hi,

This is part 1 of a reworked version of a patch I originally submitted in
August, rebased after Richard Sandiford's recent work on the internal
functions. This first patch adds the internal function definitions and optabs
that provide support for IEEE fmax()/fmin() functions.

Later patches will add the appropriate aarch64/aarch32 vector instructions.

Tested:

x86_64-linux: no regressions
aarch64-none-elf: no regressions
arm-none-eabi: no regressions

Regards,
David Sherwood.

ChangeLog:

2015-11-19  David Sherwood  

gcc/
* optabs.def: Add new optabs fmax_optab/fmin_optab.
* internal-fn.def: Add new fmax/fmin internal functions.
* config/aarch64/aarch64.md: New pattern.
* config/aarch64/aarch64-simd.md: Likewise.
* config/aarch64/iterators.md: New unspecs, iterators.
* config/arm/iterators.md: New iterators.
* config/arm/unspecs.md: New unspecs.
* config/arm/neon.md: New pattern.
* config/arm/vfp.md: Likewise.
* doc/md.texi: Add fmin and fmax patterns.
gcc/testsuite
* gcc.target/aarch64/fmaxmin.c: New test.
* gcc.target/arm/fmaxmin.c: New test.


> -Original Message-
> From: Richard Biener [mailto:richard.guent...@gmail.com]
> Sent: 19 August 2015 13:35
> To: Richard Biener; David Sherwood; GCC Patches; Richard Sandiford
> Subject: Re: [PING][Patch] Add support for IEEE-conformant versions of scalar 
> fmin* and fmax*
> 
> On Wed, Aug 19, 2015 at 2:11 PM, Richard Sandiford
>  wrote:
> > Richard Biener  writes:
> >> On Wed, Aug 19, 2015 at 11:54 AM, Richard Sandiford
> >>  wrote:
> >>> Richard Biener  writes:
>  On Tue, Aug 18, 2015 at 4:15 PM, Richard Sandiford
>   wrote:
> > Richard Biener  writes:
> >> On Tue, Aug 18, 2015 at 1:07 PM, David Sherwood
> >>  wrote:
>  On Mon, Aug 17, 2015 at 11:29 AM, David Sherwood
>   wrote:
>  > Hi Richard,
>  >
>  > Thanks for the reply. I'd chosen to add new expressions as this
>  > seemed more
>  > consistent with the existing MAX_EXPR and MIN_EXPR tree codes. In
>  > addition it
>  > would seem to provide more opportunities for optimisation than a
>  > target-specific
>  > builtin implementation would. I accept that optimisation
>  > opportunities will
>  > be more limited for strict math compilation, but that it was still
>  > worth having
>  > them. Also, if we did map it to builtins then the scalar
>  > version would go
>  > through the optabs and the vector version would go through the
>  > target's builtin
>  > expansion, which doesn't seem very consistent.
> 
>  On another note ISTR you can't associate STRICT_MIN/MAX_EXPR and thus
>  you can't vectorize anyway?  (strict IEEE behavior is about NaNs,
>  correct?)
> >>> I thought for this particular case associativity wasn't an issue?
> >>> We're not doing any
> >>> reductions here, just simply performing max/min operations on each
> >>> pair of elements
> >>> in the vectors. I thought for IEEE-compliant behaviour we just need to
> >>> ensure that for
> >>> each pair of elements if one element is a NaN we return the other one.
> >>
> >> Hmm, true.  Ok, my comment still stands - I don't see that using a
> >> tree code is the best thing to do here.  You can add fmin/max optabs
> >> and special expansion of BUILT_IN_FMIN/MAX and you can use a target
> >> builtin for the vectorized variant.
> >>
> >> The reason I am pushing against a new tree code is that we'd have an
> >> awful lot of similar codes when pushing other flag related IL
> >> specialities to actual IL constructs.  And we still need to find a
> >> consistent way to do that.
> >
> > In this case though the new code is really the "native" min/max 
> > operation
> > for fp, rather than some weird flag-dependent behaviour.  Maybe it's
> > a bit unfortunate that the non-strict min/max fp operation got mapped
> > to the generic MIN_EXPR and MAX_EXPR when the non-strict version is 
> > really
> > the flag-related modification.  The STRICT_* prefix is forced by that 
> > and
> > might make it seem like more of a special case than it really is.
> 
>  In some sense.  But the "strict" version already has a builtin (just no
>  special expander in builtins.c).  We usually don't add 1:1 tree codes
>  for existing builtins (why have builtins at all then?).
> >>>
> >>> We still need the builtin to match the C function (and to allow direct
> >>> calls to __builtin_fmin, etc., which are occasionally 

Re: [PATCH, PR68337] Don't fold memcpy/memmove we want to instrument

2015-11-23 Thread Richard Biener
On Fri, Nov 20, 2015 at 3:30 PM, Ilya Enkovich  wrote:
> On 20 Nov 14:54, Richard Biener wrote:
>> On Fri, Nov 20, 2015 at 2:08 PM, Ilya Enkovich  
>> wrote:
>> > On 19 Nov 18:19, Richard Biener wrote:
>> >> On November 19, 2015 6:12:30 PM GMT+01:00, Bernd Schmidt 
>> >>  wrote:
>> >> >On 11/19/2015 05:31 PM, Ilya Enkovich wrote:
>> >> >> Currently we fold all memcpy/memmove calls with a known data size.
>> >> >> It causes two problems when used with Pointer Bounds Checker.
>> >> >> The first problem is that we may copy pointers as integer data
>> >> >> and thus loose bounds.  The second problem is that if we inline
>> >> >> memcpy, we also have to inline bounds copy and this may result
>> >> >> in a huge amount of code and significant compilation time growth.
>> >> >> This patch disables folding for functions we want to instrument.
>> >> >>
>> >> >> Does it look reasonable for trunk and GCC5 branch?  Bootstrapped
>> >> >> and regtested on x86_64-unknown-linux-gnu.
>> >> >
>> >> >Can't see anything wrong with it. Ok.
>> >>
>> >> But for small sizes this can have a huge impact on optimization.  Which 
>> >> is why we have the code in the first place.  I'd make the check less 
>> >> broad, for example inlining copies of size less than a pointer shouldn't 
>> >> be affected.
>> >
>> > Right.  We also may inline in case we know no pointers are copied.  Below 
>> > is a version with extended condition and a couple more tests.  
>> > Bootstrapped and regtested on x86_64-unknown-linux-gnu.  Does it OK for 
>> > trunk and gcc-5-branch?
>> >
>> >>
>> >> Richard.
>> >>
>> >> >
>> >> >Bernd
>> >>
>> >>
>> >
>> > Thanks,
>> > Ilya
>> > --
>> > gcc/
>> >
>> > 2015-11-20  Ilya Enkovich  
>> >
>> > * gimple-fold.c (gimple_fold_builtin_memory_op): Don't
>> > fold call if we are going to instrument it and it may
>> > copy pointers.
>> >
>> > gcc/testsuite/
>> >
>> > 2015-11-20  Ilya Enkovich  
>> >
>> > * gcc.target/i386/mpx/pr68337-1.c: New test.
>> > * gcc.target/i386/mpx/pr68337-2.c: New test.
>> > * gcc.target/i386/mpx/pr68337-3.c: New test.
>> >
>> >
>> > diff --git a/gcc/gimple-fold.c b/gcc/gimple-fold.c
>> > index 1ab20d1..dd9f80b 100644
>> > --- a/gcc/gimple-fold.c
>> > +++ b/gcc/gimple-fold.c
>> > @@ -53,6 +53,8 @@ along with GCC; see the file COPYING3.  If not see
>> >  #include "gomp-constants.h"
>> >  #include "optabs-query.h"
>> >  #include "omp-low.h"
>> > +#include "tree-chkp.h"
>> > +#include "ipa-chkp.h"
>> >
>> >
>> >  /* Return true when DECL can be referenced from current unit.
>> > @@ -664,6 +666,23 @@ gimple_fold_builtin_memory_op (gimple_stmt_iterator 
>> > *gsi,
>> >unsigned int src_align, dest_align;
>> >tree off0;
>> >
>> > +  /* Inlining of memcpy/memmove may cause bounds lost (if we copy
>> > +pointers as wide integer) and also may result in huge function
>> > +size because of inlined bounds copy.  Thus don't inline for
>> > +functions we want to instrument in case pointers are copied.  */
>> > +  if (flag_check_pointer_bounds
>> > + && chkp_instrumentable_p (cfun->decl)
>> > + /* Even if data may contain pointers we can inline if copy
>> > +less than a pointer size.  */
>> > + && (!tree_fits_uhwi_p (len)
>> > + || compare_tree_int (len, POINTER_SIZE_UNITS) >= 0)
>>
>> || tree_to_uhwi (len) >= POINTER_SIZE_UNITS
>>
>> > + /* Check data type for pointers.  */
>> > + && (!TREE_TYPE (src)
>> > + || !TREE_TYPE (TREE_TYPE (src))
>> > + || VOID_TYPE_P (TREE_TYPE (TREE_TYPE (src)))
>> > + || chkp_type_has_pointer (TREE_TYPE (TREE_TYPE (src)
>>
>> I don't think you can in any way rely on the pointer type of the src argument
>> as all pointer conversions are useless and memcpy and friends take void *
>> anyway.
>
> This check is looking for cases when we have type information indicating
> no pointers are copied.  In case of 'void *' we have to assume pointers
> are copied and inlining is undesired.  Test pr68337-2.c checks pointer
> type allows to enable inlining.  Looks like this check misses
> || !COMPLETE_TYPE_P(TREE_TYPE (TREE_TYPE (src)))?

As said there is no information in the pointer / pointed-to type in GIMPLE.

>>
>> Note that you also disable memmove to memcpy simplification with this
>> early check.
>
> Doesn't matter for MPX which uses the same implementation for both cases.
>
>>
>> Where is pointer transfer handled for MPX?  I suppose it's not done
>> transparently
>> for all memory move instructions but explicitely by instrumented block copy
>> routines in libmpx?  In which case how does that identify pointers vs.
>> non-pointers?
>
> It is handled by instrumentation pass.  Compiler checks type of stored data to
> find pointer stores.  Each pointer store is instrumented with 

Re: [PATCH] Fix PR68067

2015-11-23 Thread Richard Biener
On Fri, 20 Nov 2015, Alan Lawrence wrote:

> On 6 November 2015 at 10:39, Richard Biener  wrote:
> >> ../spec2000/benchspec/CINT2000/254.gap/src/polynom.c:358:11: error: 
> >> location
> >> references block not in block tree
> >> l1_279 = PHI <1(28), l1_299(33)>
> >
> > ^^^
> >
> > this is the error to look at!  It means that the GC heap will be corrupted
> > quite easily.
> >
> 
> This looked very similar to PR68117 - the invalid phi arg, and block
> not in  block-tree, even if not the invalid tree code - and as the
> posters there were having success with valgrind, whereas I wasn't, I
> watched and waited. First observation is that it triggers the asserts
> you suggested in comment 27
> (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=3D68117#c27). Indeed, it
> fails those asserts, even after the patch in comment 25 (committed as
> r230594) to tree-ssa.c (delete_tree_ssa), and the patch in comment#35
> to function.c (set_cfun), and the patch in comment#30 (committed as
> r230424) to cfgexpand.c (pass_expand::execute).
> 
> The patch in comment#29 (which replaces the asserts in comment#27 with
> empties), however, fixes the problem - although I can't rule out, that
> that's just by changing the memory allocation pattern.
> 
> Moreover, if I take those patches and rebase onto a recent trunk (onto
> which the delete_tree_ssa and pass_expand::execute patches have
> already been committed), i.e. just adding the assertions from
> comment#27 and the call in function.c (set_cfun) - the assertions are
> still failing on my testcase, whereas the original (assertionless)
> failure was very erratic, and had since disappeared/been hidden on
> trunk. Indeed those same assertions break in a few other places (even
> in a --disable-bootstrap build after gcc/xgcc is built), so I feel I
> have a good chance of producing a reasonable assertion-breaking
> testcase.
> 
> So I have to ask, how sure are you that those assertions are(/should
> be!) "correct"? :)

Ideally they should be correct but they happen to be not (and I think
the intent was that this should be harmless).  Basically I tried
to assert that nobody creates stale edge redirect data that is not
later consumed or cleared.  Happens to be too optimistic :/

Richard.


Re: [PATCH] Add LANG_HOOKS_EMPTY_RECORD_P for C++ empty class

2015-11-23 Thread Richard Biener
On Sat, Nov 21, 2015 at 12:46 AM, H.J. Lu  wrote:
> On Fri, Nov 20, 2015 at 2:17 PM, Jason Merrill  wrote:
>> On 11/20/2015 01:52 PM, H.J. Lu wrote:
>>>
>>> On Tue, Nov 17, 2015 at 4:22 AM, Richard Biener
>>>  wrote:

 On Tue, Nov 17, 2015 at 12:01 PM, H.J. Lu  wrote:
>
> Empty record should be returned and passed the same way in C and C++.
> This patch adds LANG_HOOKS_EMPTY_RECORD_P for C++ empty class, which
> defaults to return false.  For C++, LANG_HOOKS_EMPTY_RECORD_P is defined
> to is_really_empty_class, which returns true for C++ empty classes.  For
> LTO, we stream out a bit to indicate if a record is empty and we store
> it in TYPE_LANG_FLAG_0 when streaming in.  get_ref_base_and_extent is
> changed to set bitsize to 0 for empty records.  Middle-end and x86
> backend are updated to ignore empty records for parameter passing and
> function value return.  Other targets may need similar changes.


 Please avoid a new langhook for this and instead claim a bit in
 tree_type_common
 like for example restrict_flag (double-check it is unused for
 non-pointers).
>>>
>>>
>>> There is no bit in tree_type_common I can overload.  restrict_flag is
>>> checked for non-pointers to issue an error when it is used on
>>> non-pointers:
>>>
>>>
>>> /export/gnu/import/git/sources/gcc/gcc/testsuite/g++.dg/template/qualttp20.C:19:38:
>>> error: ‘__restrict__’ qualifiers cannot be applied to ‘AS::L’
>>> typedef typename T::L __restrict__ r;// { dg-error "'__restrict__'
>>> qualifiers cannot" "" }
>>
>>
>> The C++ front end only needs to check TYPE_RESTRICT for this purpose on
>> front-end-specific type codes like TEMPLATE_TYPE_PARM; cp_type_quals could
>> handle that specifically if you change TYPE_RESTRICT to only apply to
>> pointers.
>>
>
> restrict_flag is also checked in this case:
>
> [hjl@gnu-6 gcc]$ cat x.i
> struct dummy { };
>
> struct dummy
> foo (struct dummy __restrict__ i)
> {
>   return i;
> }
> [hjl@gnu-6 gcc]$ gcc -S x.i -Wall
> x.i:4:13: error: invalid use of ‘restrict’
>  foo (struct dummy __restrict__ i)
>  ^
> x.i:4:13: error: invalid use of ‘restrict’
> [hjl@gnu-6 gcc]$
>
> restrict_flag can't also be used to indicate `i' is an empty record.

I'm sure this error can be done during parsing w/o relying on TYPE_RESTRICT.

But well, use any other free bit (but do not enlarge
tree_type_common).  Eventually
you can free up a bit by putting sth into type_lang_specific currently
using bits
in tree_type_common.

Richard.

>
> H.J.


Re: [PATCH] Don't reapply loops flags if unnecessary in loop_optimizer_init

2015-11-23 Thread Richard Biener
On Mon, 23 Nov 2015, Tom de Vries wrote:

> [ was: Re: [PATCH, 10/16] Add pass_oacc_kernels pass group in passes.def ]
> 
> On 20/11/15 11:37, Richard Biener wrote:
> > I'd rather make loop_optimizer_init do nothing
> > if requested flags are already set and no fixup is needed and
> > call the above unconditionally.  Thus sth like
> > 
> > Index: gcc/loop-init.c
> > ===
> > --- gcc/loop-init.c (revision 230649)
> > +++ gcc/loop-init.c (working copy)
> > @@ -103,7 +103,11 @@ loop_optimizer_init (unsigned flags)
> > calculate_dominance_info (CDI_DOMINATORS);
> > 
> > if (!needs_fixup)
> > -   checking_verify_loop_structure ();
> > +   {
> > + checking_verify_loop_structure ();
> > + if (loops_state_satisfies_p (flags))
> > +   goto out;
> > +   }
> > 
> > /* Clear all flags.  */
> > if (recorded_exits)
> > @@ -122,11 +126,12 @@ loop_optimizer_init (unsigned flags)
> > /* Apply flags to loops.  */
> > apply_loop_flags (flags);
> > 
> > +  checking_verify_loop_structure ();
> > +
> > +out:
> > /* Dump loops.  */
> > flow_loops_dump (dump_file, NULL, 1);
> > 
> > -  checking_verify_loop_structure ();
> > -
> > timevar_pop (TV_LOOP_INIT);
> >   }
> 
> This patch implements that approach, but the patch is slightly more
> complicated because of the need to handle LOOPS_MAY_HAVE_MULTIPLE_LATCHES
> differently than the rest of the flags.
> 
> Bootstrapped and reg-tested on x86_64.
> 
> OK for stage3 trunk?

Let's revisit this during stage1 if the scev_initialized () thing
SLP vectorization uses works, ok?

Thanks,
Richard.

> Thanks,
> - Tom
> 
> 

-- 
Richard Biener 
SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 
21284 (AG Nuernberg)


[PATCH] Fix PR68445

2015-11-23 Thread Richard Biener

The following fixes PR68445, SLP permutation being confused by
a permutation only using the "first" vector.

Bootstrapped and tested on x86_64-unknown-linux-gnu, applied to trunk.

Richard.

2015-11-23  Richard Biener  

PR tree-optimization/68445
* tree-vect-slp.c (vect_create_mask_and_perm): Properly use
two different strides.

* gcc.dg/vect/pr68445.c: New testcase.

Index: gcc/tree-vect-slp.c
===
*** gcc/tree-vect-slp.c (revision 230653)
--- gcc/tree-vect-slp.c (working copy)
*** vect_create_mask_and_perm (gimple *stmt,
*** 3177,3186 
  {
tree perm_dest;
gimple *perm_stmt = NULL;
!   int i, stride;
tree first_vec, second_vec, data_ref;
  
!   stride = SLP_TREE_NUMBER_OF_VEC_STMTS (node) / ncopies;
  
/* Initialize the vect stmts of NODE to properly insert the generated
   stmts later.  */
--- 3206,3216 
  {
tree perm_dest;
gimple *perm_stmt = NULL;
!   int i, stride_in, stride_out;
tree first_vec, second_vec, data_ref;
  
!   stride_out = SLP_TREE_NUMBER_OF_VEC_STMTS (node) / ncopies;
!   stride_in = dr_chain.length () / ncopies;
  
/* Initialize the vect stmts of NODE to properly insert the generated
   stmts later.  */
*** vect_create_mask_and_perm (gimple *stmt,
*** 3202,3211 
vect_finish_stmt_generation (stmt, perm_stmt, gsi);
  
/* Store the vector statement in NODE.  */
!   SLP_TREE_VEC_STMTS (node)[stride * i + vect_stmts_counter] = perm_stmt;
  
!   first_vec_indx += stride;
!   second_vec_indx += stride;
  }
  }
  
--- 3232,3242 
vect_finish_stmt_generation (stmt, perm_stmt, gsi);
  
/* Store the vector statement in NODE.  */
!   SLP_TREE_VEC_STMTS (node)[stride_out * i + vect_stmts_counter]
!   = perm_stmt;
  
!   first_vec_indx += stride_in;
!   second_vec_indx += stride_in;
  }
  }
  
Index: gcc/testsuite/gcc.dg/vect/pr68445.c
===
*** gcc/testsuite/gcc.dg/vect/pr68445.c (revision 0)
--- gcc/testsuite/gcc.dg/vect/pr68445.c (working copy)
***
*** 0 
--- 1,19 
+ /* { dg-do compile } */
+ /* { dg-require-effective-target vect_int } */
+ 
+ void IMB_double_fast_x (int *destf, int *dest, int y, int *p1f)
+ {
+   int i;
+   for (i = y; i > 0; i--)
+ {
+   *dest++ = 0;
+   destf[0] = destf[4] = p1f[0];
+   destf[1] = destf[5] = p1f[1];
+   destf[2] = destf[6] = p1f[2];
+   destf[3] = destf[7] = p1f[3];
+   destf += 8;
+   p1f += 4;
+ }
+ }
+ 
+ /* { dg-final { scan-tree-dump "vectorizing stmts using SLP" "vect" } } */


Re: [ptx] partitioning optimization

2015-11-23 Thread Jakub Jelinek
On Mon, Nov 23, 2015 at 08:46:30AM +0100, Thomas Schwinge wrote:
> Here, -fopenacc induces -lgomp.  So, we'll either need a (dummy?) libgomp
> available to link against in gcc/testsuite/, or come up with a way to do
> LTO/offloading compilation without actually linking (libgomp into) the
> final executable, or move such tests into libgomp/testsuite/.  (Jakub?)

Link/run tests that link against libgomp belong to libgomp/testsuite/.

Jakub


Re: [PATCH] Mark by_ref mem_ref in build_receiver_ref as non-trapping

2015-11-23 Thread Jakub Jelinek
On Sat, Nov 21, 2015 at 07:34:02PM +0100, Tom de Vries wrote:
> Mark by_ref mem_ref in build_receiver_ref as non-trapping
> 
> 2015-11-21  Tom de Vries  
> 
>   * omp-low.c (build_receiver_ref): Mark by_ref mem_ref as non-trapping.

This is ok.
> 
> ---
>  gcc/omp-low.c | 5 -
>  1 file changed, 4 insertions(+), 1 deletion(-)
> 
> diff --git a/gcc/omp-low.c b/gcc/omp-low.c
> index 830db75..78f2853 100644
> --- a/gcc/omp-low.c
> +++ b/gcc/omp-low.c
> @@ -1249,7 +1249,10 @@ build_receiver_ref (tree var, bool by_ref, omp_context 
> *ctx)
>TREE_THIS_NOTRAP (x) = 1;
>x = omp_build_component_ref (x, field);
>if (by_ref)
> -x = build_simple_mem_ref (x);
> +{
> +  x = build_simple_mem_ref (x);
> +  TREE_THIS_NOTRAP (x) = 1;
> +}
>  
>return x;
>  }


Jakub


[PATCH, gcc5 backport] Fix PR ipa/65908

2015-11-23 Thread Martin Liška
Hi.

At the end of last week, Richi asked me to back port aforementioned PR.
The patch contains two parts: first one is the patch that was applied to trunk
and the second one is a hunk that implements param_used_p (coming from r222374).

Patch can bootstrap and survives regression tests on x86_64-linux-gnu.

Ready for 5 branch?
Thanks,
Martin
>From df7a30adc412324dfd181a39d4ac6b550428e49f Mon Sep 17 00:00:00 2001
From: marxin 
Date: Fri, 20 Nov 2015 16:30:42 +0100
Subject: [PATCH] PR ipa/65908

Backport from mainline

gcc/ChangeLog:

	PR ipa/65908
	* ipa-icf.c (sem_item::target_supports_symbol_aliases): Remove
	construction of arg_types.
	(sem_function::sem_function): Likewise.
	(sem_function::~sem_function): Remove destruction of arg_types.
	(sem_function::compatible_parm_types_p): New function.
	(sem_function::equals_wpa): Reorg matching of return values
	and parameter types.
	(sem_function::equals_private): Reorg mathcing of argument types.
	(sem_function::parse_tree_args): Remove.
	(sem_function::param_used_p): New function.
	* ipa-icf.h (init_wpa): Do not call it.
	(parse_tree_args): Remove.
	(compatible_parm_types_p): Declare.
	(result_type): Remove.
	(arg_types): Remove.
	(param_used_p): Declare.

gcc/testsuite/ChangeLog:

	PR ipa/65908
	* g++.dg/ipa/pr65908.C: New testcase.
---
 gcc/ipa-icf.c  | 138 ++---
 gcc/ipa-icf.h  |  17 ++---
 gcc/testsuite/g++.dg/ipa/pr65908.C |  27 
 3 files changed, 117 insertions(+), 65 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/ipa/pr65908.C

diff --git a/gcc/ipa-icf.c b/gcc/ipa-icf.c
index b902373..3f29011 100644
--- a/gcc/ipa-icf.c
+++ b/gcc/ipa-icf.c
@@ -273,7 +273,6 @@ sem_item::target_supports_symbol_aliases_p (void)
 sem_function::sem_function (bitmap_obstack *stack): sem_item (FUNC, stack),
   m_checker (NULL), m_compared_func (NULL)
 {
-  arg_types.create (0);
   bb_sizes.create (0);
   bb_sorted.create (0);
 }
@@ -285,7 +284,6 @@ sem_function::sem_function (cgraph_node *node, hashval_t hash,
   sem_item (FUNC, node, hash, stack),
   m_checker (NULL), m_compared_func (NULL)
 {
-  arg_types.create (0);
   bb_sizes.create (0);
   bb_sorted.create (0);
 }
@@ -295,7 +293,6 @@ sem_function::~sem_function ()
   for (unsigned i = 0; i < bb_sorted.length (); i++)
 delete (bb_sorted[i]);
 
-  arg_types.release ();
   bb_sizes.release ();
   bb_sorted.release ();
 }
@@ -417,6 +414,47 @@ bool sem_function::compare_edge_flags (cgraph_edge *e1, cgraph_edge *e2)
   return true;
 }
 
+/* Perform additional check needed to match types function parameters that are
+   used.  Unlike for normal decls it matters if type is TYPE_RESTRICT and we
+   make an assumption that REFERENCE_TYPE parameters are always non-NULL.  */
+
+bool
+sem_function::compatible_parm_types_p (tree parm1, tree parm2)
+{
+  /* Be sure that parameters are TBAA compatible.  */
+  if (!func_checker::compatible_types_p (parm1, parm2))
+return return_false_with_msg ("parameter type is not compatible");
+
+  if (POINTER_TYPE_P (parm1)
+  && (TYPE_RESTRICT (parm1) != TYPE_RESTRICT (parm2)))
+return return_false_with_msg ("argument restrict flag mismatch");
+
+  /* nonnull_arg_p implies non-zero range to REFERENCE types.  */
+  if (POINTER_TYPE_P (parm1)
+  && TREE_CODE (parm1) != TREE_CODE (parm2)
+  && opt_for_fn (decl, flag_delete_null_pointer_checks))
+return return_false_with_msg ("pointer wrt reference mismatch");
+
+  return true;
+}
+
+/* Return true if parameter I may be used.  */
+
+bool
+sem_function::param_used_p (unsigned int i)
+{
+  if (ipa_node_params_sum == NULL)
+return false;
+
+  struct ipa_node_params *parms_info = IPA_NODE_REF (get_node ());
+
+  if (parms_info->descriptors.is_empty ()
+  || parms_info->descriptors.length () <= i)
+ return true;
+
+  return ipa_is_param_used (IPA_NODE_REF (get_node ()), i);
+}
+
 /* Fast equality function based on knowledge known in WPA.  */
 
 bool
@@ -427,9 +465,6 @@ sem_function::equals_wpa (sem_item *item,
 
   m_compared_func = static_cast (item);
 
-  if (arg_types.length () != m_compared_func->arg_types.length ())
-return return_false_with_msg ("different number of arguments");
-
   /* Compare special function DECL attributes.  */
   if (DECL_FUNCTION_PERSONALITY (decl)
   != DECL_FUNCTION_PERSONALITY (item->decl))
@@ -506,26 +541,40 @@ sem_function::equals_wpa (sem_item *item,
 }
 
   /* Result type checking.  */
-  if (!func_checker::compatible_types_p (result_type,
-	 m_compared_func->result_type))
+  if (!func_checker::compatible_types_p
+	 (TREE_TYPE (TREE_TYPE (decl)),
+	  TREE_TYPE (TREE_TYPE (m_compared_func->decl
 return return_false_with_msg ("result types are different");
 
   /* Checking types of arguments.  */
-  for (unsigned i = 0; i < arg_types.length (); i++)
+  tree list1 = TYPE_ARG_TYPES (TREE_TYPE (decl)),
+   list2 = TYPE_ARG_TYPES (TREE_TYPE 

Re: [PATCH] GCC system.h and Graphite header order

2015-11-23 Thread Richard Biener
On Sun, Nov 22, 2015 at 4:13 AM, Sebastian Pop  wrote:
> On Sat, Nov 21, 2015 at 4:03 PM, David Edelsohn  wrote:
>> Graphite relies on the ISL library and includes multiple ISL headers.
>> The ISL headers refer to identifiers that are poisoned for use in GCC.
>> The source files for Graphite were organized to include the ISL
>> headers first, to avoid the identifier poisoning, which breaks some
>> platforms because GCC header features are disabled.
>>
>> This patch reorganizes the graphite*.c header file inclusion order to
>> list ISL header files near the end, just before the graphite header
>> files on which they rely.  A new macro, USES_ISL, is defined, which
>> skips the relevant identifier poisoning, similar to logic for Flex and
>> Bison.
>>
>> This patch also removes early inclusion of stddef.h for ISL because it
>> now should be provided by GCC system.h
>>
>> This has been bootstrapped on powerpc-ibm-aix7.1.0.0
>>
>> Okay for trunk?
>>
>> Thanks, David
>>
>> * system.h: Don't poison calloc and strdup if USES_ISL is defined.
>> * graphite-dependences.c: Define USES_ISL.  Include ISL header files
>> after GCC header files and before graphite header files.
>> * graphite-dependences.c: Same.
>> * graphite-isl-ast-to-gimple.c: Same.
>> * graphite-optimize-isl.c: Same.
>> * graphite-poly.c: Same.
>> * graphite-scop-detection.c: Same.
>> * graphite-sese-to-poly.c: Same.
>> * graphite.c: Same.
>
> The patch looks good to me.  Thanks David for fixing this.

Note that if we run into more problems like this instead of guarding
the poisoning we may as well move the include of the ISL headers
to system.h guarded by USES_ISL.

Richard.

> Sebastian


Re: [PATCH, PR68337] Don't fold memcpy/memmove we want to instrument

2015-11-23 Thread Richard Biener
On Mon, Nov 23, 2015 at 12:33 PM, Ilya Enkovich  wrote:
> On 23 Nov 11:44, Richard Biener wrote:
>> On Mon, Nov 23, 2015 at 11:10 AM, Ilya Enkovich  
>> wrote:
>> > On 23 Nov 10:39, Richard Biener wrote:
>> >> On Fri, Nov 20, 2015 at 3:30 PM, Ilya Enkovich  
>> >> wrote:
>> >> > On 20 Nov 14:54, Richard Biener wrote:
>> >> >>
>> >> >> I don't think you can in any way rely on the pointer type of the src 
>> >> >> argument
>> >> >> as all pointer conversions are useless and memcpy and friends take 
>> >> >> void *
>> >> >> anyway.
>> >> >
>> >> > This check is looking for cases when we have type information indicating
>> >> > no pointers are copied.  In case of 'void *' we have to assume pointers
>> >> > are copied and inlining is undesired.  Test pr68337-2.c checks pointer
>> >> > type allows to enable inlining.  Looks like this check misses
>> >> > || !COMPLETE_TYPE_P(TREE_TYPE (TREE_TYPE (src)))?
>> >>
>> >> As said there is no information in the pointer / pointed-to type in 
>> >> GIMPLE.
>> >
>> > What does it mean?  We do have TREE_TYPE for used pointer and nested 
>> > TREE_TYPE
>> > holding pointed-to type.  Is it some random invalid type?
>>
>> Yes, it can be a "random" type.  Like for
>>
>> void foo (float *f)
>> {
>>   memcpy ((void *)f, ...);
>> }
>> int main()
>> {
>>   int **a[10];
>>   foo (a);
>> }
>>
>> which tries to copy to an array of int * but the GIMPLE IL for foo
>> will call memcpy with a float * typed argument.
>
> I see.  But it should still be OK to check type in case of strict aliasing, 
> right?

No, memcpy is always "no-strict-aliasing"

> Thanks,
> Ilya
>
>>
>> >>
>> >> >>
>> >> >> Note that you also disable memmove to memcpy simplification with this
>> >> >> early check.
>> >> >
>> >> > Doesn't matter for MPX which uses the same implementation for both 
>> >> > cases.
>> >> >
>> >> >>
>> >> >> Where is pointer transfer handled for MPX?  I suppose it's not done
>> >> >> transparently
>> >> >> for all memory move instructions but explicitely by instrumented block 
>> >> >> copy
>> >> >> routines in libmpx?  In which case how does that identify pointers vs.
>> >> >> non-pointers?
>> >> >
>> >> > It is handled by instrumentation pass.  Compiler checks type of stored 
>> >> > data to
>> >> > find pointer stores.  Each pointer store is instrumented with bndstx 
>> >> > call.
>> >>
>> >> How does it identify "pointer store"?  With -fno-strict-aliasing you can 
>> >> store
>> >> pointers using an integer type.  You can also always store pointers using
>> >> a character type like
>> >>
>> >> void foo (int *p, int **dest)
>> >> {
>> >>   ((char *)*dest)[0] = (((char *))[0];
>> >>   ((char *)*dest)[1] = (((char *))[1];
>> >>   ((char *)*dest)[2] = (((char *))[2];
>> >>   ((char *)*dest)[3] = (((char *))[3];
>> >> }
>> >
>> > Pointer store is identified using type information.  When pointer is 
>> > casted to
>> > a non-pointer type its bounds are lost.
>> >
>> > Ilya
>> >
>> >>
>> >> > MPX versions of memcpy, memmove etc. don't make any assumptions about
>> >> > type of copied data and just copy whole chunk of bounds metadata 
>> >> > corresponding
>> >> > to copied block.
>> >>
>> >> So it handles copying a pointer in two pieces with two memcpy calls
>> >> correctly.  Good.
>> >>
>> >> Richard.
>> >>
>> >> > Thanks,
>> >> > Ilya
>> >> >
>> >> >>
>> >> >> Richard.
>> >> >>


Re: [AArch64][PATCH 7/7] Add NEON intrinsics vqrdmlah_lane and vqrdmlsh_lane.

2015-11-23 Thread James Greenhalgh
On Fri, Oct 23, 2015 at 01:30:46PM +0100, Matthew Wahab wrote:
> The ARMv8.1 architecture extension adds two Adv.SIMD instructions,
> sqrdmlah and sqrdmlsh. This patch adds the NEON intrinsics vqrdmlah_lane
> and vqrdmlsh_lane for these instructions. The new intrinsics are of the
> form vqrdml{as}h[q]_lane_.
> 
> Tested the series for aarch64-none-linux-gnu with native bootstrap and
> make check on an ARMv8 architecture. Also tested aarch64-none-elf with
> cross-compiled check-gcc on an ARMv8.1 emulator.
> 
> Ok for trunk?
> Matthew
> 
> gcc/
> 2015-10-23  Matthew Wahab  
> 
>   * gcc/config/aarch64/arm_neon.h
>   (vqrdmlah_laneq_s16, vqrdmlah_laneq_s32): New.
>   (vqrdmlahq_laneq_s16, vqrdmlahq_laneq_s32): New.
>   (vqrdmlsh_laneq_s16, vqrdmlsh_laneq_s32): New.
>   (vqrdmlshq_laneq_s16, vqrdmlshq_laneq_s32): New.
>   (vqrdmlah_lane_s16, vqrdmlah_lane_s32): New.
>   (vqrdmlahq_lane_s16, vqrdmlahq_lane_s32): New.
>   (vqrdmlahh_s16, vqrdmlahh_lane_s16, vqrdmlahh_laneq_s16): New.
>   (vqrdmlahs_s32, vqrdmlahs_lane_s32, vqrdmlahs_laneq_s32): New.
>   (vqrdmlsh_lane_s16, vqrdmlsh_lane_s32): New.
>   (vqrdmlshq_lane_s16, vqrdmlshq_lane_s32): New.
>   (vqrdmlshh_s16, vqrdmlshh_lane_s16, vqrdmlshh_laneq_s16): New.
>   (vqrdmlshs_s32, vqrdmlshs_lane_s32, vqrdmlshs_laneq_s32): New.
> 
> gcc/testsuite
> 2015-10-23  Matthew Wahab  
> 
>   * gcc.target/aarch64/advsimd-intrinsics/vqrdmlXh_lane.inc: New file,
>   support code for vqrdml{as}h_lane tests.
>   * gcc.target/aarch64/advsimd-intrinsics/vqrdmlah_lane.c: New.
>   * gcc.target/aarch64/advsimd-intrinsics/vqrdmlsh_lane.c: New.
> 

> From a2399818dba85ff2801a28bad77ef51697990da7 Mon Sep 17 00:00:00 2001
> From: Matthew Wahab 
> Date: Thu, 27 Aug 2015 14:17:26 +0100
> Subject: [PATCH 7/7] Add neon intrinsics: vqrdmlah_lane, vqrdmlsh_lane.
> 
> Change-Id: I6d7a372e0a5b83ef0846ab62abbe9b24ada69fc4
> ---
>  gcc/config/aarch64/arm_neon.h  | 182 
> +
>  .../aarch64/advsimd-intrinsics/vqrdmlXh_lane.inc   | 154 +
>  .../aarch64/advsimd-intrinsics/vqrdmlah_lane.c |  57 +++
>  .../aarch64/advsimd-intrinsics/vqrdmlsh_lane.c |  61 +++
>  4 files changed, 454 insertions(+)
>  create mode 100644 
> gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmlXh_lane.inc
>  create mode 100644 
> gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmlah_lane.c
>  create mode 100644 
> gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmlsh_lane.c
> 
> diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
> index 9e73809..9b68e4a 100644
> --- a/gcc/config/aarch64/arm_neon.h
> +++ b/gcc/config/aarch64/arm_neon.h
> @@ -10675,6 +10675,59 @@ vqrdmulhq_laneq_s32 (int32x4_t __a, int32x4_t __b, 
> const int __c)
>return __builtin_aarch64_sqrdmulh_laneqv4si (__a, __b, __c);
>  }
>  
> +#pragma GCC push_options
> +#pragma GCC target ("arch=armv8.1-a")

Rather than strict alphabetical order, can we group everything which is
under one set of extensions together, to save on the push_options/pop_options
pairs.

This patch is OK with that change.

Thanks,
James



[5 PATCH] Fix ICE with mangling aliases (PR c++/67354)

2015-11-23 Thread Jakub Jelinek
Hi!

On Thu, Nov 19, 2015 at 03:04:35PM -0500, Jason Merrill wrote:
> On 11/19/2015 07:40 AM, Jakub Jelinek wrote:
> >@@ -4502,6 +4509,7 @@ c_parse_final_cleanups (void)
> >
> >locus_at_end_of_parsing = input_location;
> >at_eof = 1;
> >+  defer_mangling_aliases = false;
> 
> Let's clear this in generate_mangling_aliases rather than here.  OK with
> that change.

Unfortunately, the GCC 5.3 backport of this is larger, because it relies
on the deferring of the mangling aliases that has been added during the
early debug efforts.  Still, it looks to me small enough.
Bootstrapped/regtested on GCC 5 branch on x86_64-linux and i686-linux, ok
for branch?

2015-11-23  Jakub Jelinek  

Backported from mainline
2015-11-20  Jakub Jelinek  

PR c++/67354
* cp-tree.h (defer_mangling_aliases): Declare.
(generate_mangling_aliases): New prototype.
* decl2.c (defer_mangling_aliases): New variable.
(note_mangling_alias): Use !defer_mangling_aliases
instead of at_eof.
(generate_mangling_aliases): No longer static. Clear
defer_mangling_aliases.
* optimize.c (maybe_thunk_body): Defer emitting mangling aliases
if !defer_mangling_aliases until the fns are put into the same
comdat group.

* g++.dg/abi/mangle67.C: New test.

2015-05-09  Aldy Hernandez  

PR bootstrap/66085
* decl2.c (note_mangling_alias): Declare arguments as unused.

2015-05-08  Jason Merrill  

* decl2.c (mangling_aliases): New variable.
(note_mangling_alias, generate_mangling_aliases): New.
(cp_write_global_declarations): Call generate_mangling_aliases.
(generate_mangling_alias): Split out from...
* mangle.c (mangle_decl): ...here.
* cp-tree.h: Declare note_mangling_alias.

--- gcc/cp/cp-tree.h.jj 2015-11-20 10:12:02.917358200 +0100
+++ gcc/cp/cp-tree.h2015-11-23 10:42:12.707937270 +0100
@@ -4606,6 +4606,11 @@ extern GTY(()) vec *local_c
 
 extern int at_eof;
 
+/* True if note_mangling_alias should enqueue mangling aliases for
+   later generation, rather than emitting them right away.  */
+
+extern bool defer_mangling_aliases;
+
 /* A list of namespace-scope objects which have constructors or
destructors which reside in the global scope.  The decl is stored
in the TREE_VALUE slot and the initializer is stored in the
@@ -5453,6 +5458,8 @@ extern tree finish_case_label (locatio
 extern tree cxx_maybe_build_cleanup(tree, tsubst_flags_t);
 
 /* in decl2.c */
+extern void note_mangling_alias(tree, tree);
+extern void generate_mangling_aliases  (void);
 extern bool check_java_method  (tree);
 extern tree build_memfn_type   (tree, tree, cp_cv_quals, 
cp_ref_qualifier);
 extern tree build_pointer_ptrmemfn_type(tree);
--- gcc/cp/mangle.c.jj  2015-09-04 20:33:01.456531377 +0200
+++ gcc/cp/mangle.c 2015-11-23 10:37:30.668926382 +0100
@@ -3584,30 +3584,7 @@ mangle_decl (const tree decl)
 flag_abi_compat_version, id2);
}
 
-#ifdef ASM_OUTPUT_DEF
-  /* If there's a declaration already using this mangled name,
-don't create a compatibility alias that conflicts.  */
-  if (IDENTIFIER_GLOBAL_VALUE (id2))
-   return;
-
-  struct cgraph_node *n = NULL;
-  if (TREE_CODE (decl) == FUNCTION_DECL
- && !(n = cgraph_node::get (decl)))
-   /* Don't create an alias to an unreferenced function.  */
-   return;
-
-  tree alias = make_alias_for (decl, id2);
-  SET_IDENTIFIER_GLOBAL_VALUE (id2, alias);
-  DECL_IGNORED_P (alias) = 1;
-  TREE_PUBLIC (alias) = TREE_PUBLIC (decl);
-  DECL_VISIBILITY (alias) = DECL_VISIBILITY (decl);
-  if (vague_linkage_p (decl))
-   DECL_WEAK (alias) = 1;
-  if (TREE_CODE (decl) == FUNCTION_DECL)
-   n->create_same_body_alias (alias, decl);
-  else
-   varpool_node::create_extra_name_alias (alias, decl);
-#endif
+  note_mangling_alias (decl, id2);
 }
 }
 
--- gcc/cp/optimize.c.jj2015-11-20 10:12:02.941357861 +0100
+++ gcc/cp/optimize.c   2015-11-23 10:42:12.709937242 +0100
@@ -294,7 +294,11 @@ maybe_thunk_body (tree fn, bool force)
 }
   else if (HAVE_COMDAT_GROUP)
 {
+  /* At eof, defer creation of mangling aliases temporarily.  */
+  bool save_defer_mangling_aliases = defer_mangling_aliases;
+  defer_mangling_aliases = true;
   tree comdat_group = cdtor_comdat_group (fns[1], fns[0]);
+  defer_mangling_aliases = save_defer_mangling_aliases;
   cgraph_node::get_create (fns[0])->set_comdat_group (comdat_group);
   cgraph_node::get_create (fns[1])->add_to_same_comdat_group
(cgraph_node::get_create (fns[0]));
@@ -305,6 +309,9 @@ maybe_thunk_body (tree fn, bool force)
   virtual, it 

Re: [PATCH, PR68337] Don't fold memcpy/memmove we want to instrument

2015-11-23 Thread Ilya Enkovich
On 23 Nov 14:29, Richard Biener wrote:
> On Mon, Nov 23, 2015 at 12:33 PM, Ilya Enkovich  
> wrote:
> >
> > I see.  But it should still be OK to check type in case of strict aliasing, 
> > right?
> 
> No, memcpy is always "no-strict-aliasing"
> 

Thanks a lot for help!  Here is a variant with a size check only as
you originally suggested.  Is it OK for trunk and gcc-5-branch if
no regressions?

Thanks,
Ilya
--
gcc/

2015-11-23  Ilya Enkovich  

* gimple-fold.c: Include ipa-chkp.h.
(gimple_fold_builtin_memory_op): Don't fold call if we
are going to instrument it and it may copy pointers.

gcc/testsuite/

2015-11-23  Ilya Enkovich  

* gcc.target/i386/mpx/pr68337-1.c: New test.
* gcc.target/i386/mpx/pr68337-2.c: New test.


diff --git a/gcc/gimple-fold.c b/gcc/gimple-fold.c
index 1ab20d1..6ff5e26 100644
--- a/gcc/gimple-fold.c
+++ b/gcc/gimple-fold.c
@@ -53,6 +53,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "gomp-constants.h"
 #include "optabs-query.h"
 #include "omp-low.h"
+#include "ipa-chkp.h"
 
 
 /* Return true when DECL can be referenced from current unit.
@@ -664,6 +665,18 @@ gimple_fold_builtin_memory_op (gimple_stmt_iterator *gsi,
   unsigned int src_align, dest_align;
   tree off0;
 
+  /* Inlining of memcpy/memmove may cause bounds lost (if we copy
+pointers as wide integer) and also may result in huge function
+size because of inlined bounds copy.  Thus don't inline for
+functions we want to instrument.  */
+  if (flag_check_pointer_bounds
+ && chkp_instrumentable_p (cfun->decl)
+ /* Even if data may contain pointers we can inline if copy
+less than a pointer size.  */
+ && (!tree_fits_uhwi_p (len)
+ || compare_tree_int (len, POINTER_SIZE_UNITS) >= 0))
+   return false;
+
   /* Build accesses at offset zero with a ref-all character type.  */
   off0 = build_int_cst (build_pointer_type_for_mode (char_type_node,
 ptr_mode, true), 0);
diff --git a/gcc/testsuite/gcc.target/i386/mpx/pr68337-1.c 
b/gcc/testsuite/gcc.target/i386/mpx/pr68337-1.c
new file mode 100644
index 000..3f8d79d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/mpx/pr68337-1.c
@@ -0,0 +1,32 @@
+/* { dg-do run } */
+/* { dg-options "-fcheck-pointer-bounds -mmpx" } */
+
+#include "mpx-check.h"
+
+#define N 2
+
+extern void abort ();
+
+static int
+mpx_test (int argc, const char **argv)
+{
+  char ** src = (char **)malloc (sizeof (char *) * N);
+  char ** dst = (char **)malloc (sizeof (char *) * N);
+  int i;
+
+  for (i = 0; i < N; i++)
+src[i] = __bnd_set_ptr_bounds (argv[0] + i, i + 1);
+
+  __builtin_memcpy(dst, src, sizeof (char *) * N);
+
+  for (i = 0; i < N; i++)
+{
+  char *p = dst[i];
+  if (p != argv[0] + i
+ || __bnd_get_ptr_lbound (p) != p
+ || __bnd_get_ptr_ubound (p) != p + i)
+   abort ();
+}
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/i386/mpx/pr68337-2.c 
b/gcc/testsuite/gcc.target/i386/mpx/pr68337-2.c
new file mode 100644
index 000..8845cca
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/mpx/pr68337-2.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-fcheck-pointer-bounds -mmpx" } */
+/* { dg-final { scan-assembler-not "memcpy" } } */
+
+void
+test (void *dst, void *src)
+{
+  __builtin_memcpy (dst, src, sizeof (char *) / 2);
+}


[PATCH 2/6] Fix memory leak in tree-ssa

2015-11-23 Thread marxin
gcc/ChangeLog:

2015-11-20  Martin Liska  

* tree-ssa.c (redirect_edge_var_map_destroy): Release
vectors that are used as a second argument of a hash_map.
---
 gcc/tree-ssa.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/gcc/tree-ssa.c b/gcc/tree-ssa.c
index 02fca4c..db7d065 100644
--- a/gcc/tree-ssa.c
+++ b/gcc/tree-ssa.c
@@ -121,6 +121,11 @@ redirect_edge_var_map_vector (edge e)
 void
 redirect_edge_var_map_destroy (void)
 {
+  if (edge_var_maps)
+for (hash_map::iterator it =
+edge_var_maps->begin (); it != edge_var_maps->end (); ++it)
+  (*it).second.release ();
+
   delete edge_var_maps;
   edge_var_maps = NULL;
 }
-- 
2.6.3




Re: Enable pointer TBAA for LTO

2015-11-23 Thread Martin Jambor
Hi,

On Mon, Nov 23, 2015 at 12:00:25AM +0100, Jan Hubicka wrote:
> Hi,
> here is updated patch which I finally comitted today.  It addresses all the 
> comments
> and also fixes one nasty bug that really cost me a lot of time to understand. 
> 
> +   /* LTO type merging does not make any difference between 
> +  component pointer types.  We may have
> +
> +  struct foo {int *a;};
> +
> +  as TYPE_CANONICAL of 
> +
> +  struct bar {float *a;};
> +
> +  Because accesses to int * and float * do not alias, we would get
> +  false negative when accessing the same memory location by
> +  float ** and bar *. We thus record the canonical type as:
> +
> +  struct {void *a;};
> +
> +  void * is special cased and works as a universal pointer type.
> +  Accesses to it conflicts with accesses to any other pointer
> +  type.  */
> 
> This problem manifested itself only as a lto-bootstrap miscompare on 32bit
> build and I spent a lot of time localizing the wrong code since it reproduces
> only in quite large programs where we get conficts in canonical type merging
> like this.
> 
> The patch thus updates record_component_aliases to substitute void_ptr_type 
> for
> all pointer types. I re-did the stats.  Now the improvement on dealII is 14%
> that is quite a bit lower than earlier, but still substantial.  Since we have
> voidptr globing counter, I know that the number of disambiguations would go at
> least 19% up if we did not do it.
> 
> THere is a lot of low hanging fruit in that area now, but the real solution 
> is to
> track types that needs to be merge by fortran rules and don't do all this 
> fancy
> globing for C/C++ types.  I will open branch for IPA work and try to prepare 
> this
> for next stage 1.
> 
> bootstrapped/regtested x86_64-linux and ppc64-linux, earlier version tested 
> on i386-linux
> and also on some bigger apps, committed
> 
> Note that we still have bootstrap miscompare with LTO build and 
> --disable-checking,
> I am looking for that now.  Additoinally after fixing the ICEs preventing us 
> to build
> the gnat1 binary, gnat1 aborts. Both these are independent of the patch.
> 
> Honza
>   * lto.c (iterative_hash_canonical_type): Always recurse for pointers.
>   (gimple_register_canonical_type_1): Check that pointers do not get
>   canonical types.
>   (gimple_register_canonical_type): Do not register pointers.
> 
>   * tree.c (build_pointer_type_for_mode,build_reference_type_for_mode):
>   In LTO we do not compute TYPE_CANONICAL of pointers.
>   (gimple_canonical_types_compatible_p): Improve coments; sanity check
>   that pointers do not have canonical type that would make us believe
>   they are different.
>   * alias.c (get_alias_set): Do structural type equality on pointers;
>   enable pointer path for LTO; also glob pointer to vector with pointer
>   to vector element; glob pointers and references for LTO; do more strict
>   sanity checking about build_pointer_type returning the canonical type
>   which is also the main variant.
>   (record_component_aliases): When component type is pointer and we
>   do LTO; record void_type_node alias set.

...

> Index: alias.c
> ===
> --- alias.c   (revision 230714)
> +++ alias.c   (working copy)
> @@ -869,13 +869,23 @@ get_alias_set (tree t)
>set = lang_hooks.get_alias_set (t);
>if (set != -1)
>   return set;
> -  return 0;
> +  /* Handle structure type equality for pointer types.  This is easy
> +  to do, because the code bellow ignore canonical types on these anyway.
> +  This is important for LTO, where TYPE_CANONICAL for pointers can not
> +  be meaningfuly computed by the frotnend.  */
> +  if (!POINTER_TYPE_P (t))
> + {
> +   /* In LTO we set canonical types for all types where it makes
> +  sense to do so.  Double check we did not miss some type.  */
> +   gcc_checking_assert (!in_lto_p || !type_with_alias_set_p (t));
> +  return 0;

I have hit this assert on our LTO tests when doing a merge from trunk
to the HSA branch.  On the branch, we generate very simple static
constructors/destructors which just call libgomp (un)registration
routines to which we pass data in static variables of artificial
types.  The assert happens inside varpool_node::finalize_decl calls on
those variables, e.g.:

lto1: internal compiler error: in get_alias_set, at alias.c:880
0x613650 get_alias_set(tree_node*)
/home/mjambor/gcc/branch/src/gcc/alias.c:880
0x71d2c7 set_mem_attributes_minus_bitpos(rtx_def*, tree_node*, int, long)
/home/mjambor/gcc/branch/src/gcc/emit-rtl.c:1772
0xd2d2f0 make_decl_rtl(tree_node*)
/home/mjambor/gcc/branch/src/gcc/varasm.c:1473
0xd310c7 assemble_variable(tree_node*, int, int, int)

[PATCH 0/6] Another fixes of various memory leaks

2015-11-23 Thread marxin
Hi.

Following series has been just bootregtested on x86_64-linux-gnu
(all patches together).

Ready for trunk?
Thanks,
Martin

marxin (6):
  Fix memory leak in cilk
  Fix memory leak in tree-ssa
  Fix memory leaks in IPA devirt
  Fix memory leak in loop_vec_info
  Fix parser memory leak in cilk_simd_fn_info
  Fix memory leak in tree-chkp.c

 gcc/c-family/array-notation-common.c |  2 ++
 gcc/c-family/cilk.c  |  1 +
 gcc/c/c-array-notation.c | 38 ++
 gcc/cp/cp-array-notation.c   | 52 ++--
 gcc/cp/parser.c  |  2 ++
 gcc/ipa-devirt.c |  5 ++--
 gcc/tree-chkp.c  |  2 ++
 gcc/tree-ssa.c   |  5 
 gcc/tree-vect-loop-manip.c   |  2 --
 gcc/tree-vect-loop.c |  1 +
 gcc/vec.h| 12 +
 11 files changed, 68 insertions(+), 54 deletions(-)

-- 
2.6.3



[hsa] Fix operand and instruction types for bit-ops

2015-11-23 Thread Martin Jambor
Hi,

I have applied the following patch tot he HSA branch to fix some
rather unexpected type requirements of the HSA finalizer we discovered
when running the target-3[34].c libgomp tests.

Thanks,

Martin


2015-11-23  Martin Jambor  

* hsa-gen.c (gen_hsa_binary_operation): Make immediate operands of
bit AND, OR and XOR always unsigned integer.
(gen_hsa_ternary_atomic_for_builtin): Make bit-operations have
bit-type.

---
 gcc/hsa-gen.c | 12 
 1 file changed, 12 insertions(+)

diff --git a/gcc/hsa-gen.c b/gcc/hsa-gen.c
index 0cbf3ec..0ef27de 100644
--- a/gcc/hsa-gen.c
+++ b/gcc/hsa-gen.c
@@ -2777,6 +2777,14 @@ gen_hsa_binary_operation (int opcode, hsa_op_reg *dest,
   hsa_op_immed *i = dyn_cast  (op2);
   i->set_type (BRIG_TYPE_U32);
 }
+  if ((opcode == BRIG_OPCODE_OR
+   || opcode == BRIG_OPCODE_XOR
+   || opcode == BRIG_OPCODE_AND)
+  && is_a  (op2))
+{
+  hsa_op_immed *i = dyn_cast  (op2);
+  i->set_type (hsa_uint_for_bitsize (hsa_type_bit_size (i->m_type)));
+}
 
   hsa_insn_basic *insn = new hsa_insn_basic (3, opcode, dest->m_type, dest,
 op1, op2);
@@ -4223,6 +4231,10 @@ gen_hsa_ternary_atomic_for_builtin (bool ret_orig,
 {
 case BRIG_ATOMIC_LD:
 case BRIG_ATOMIC_ST:
+case BRIG_ATOMIC_AND:
+case BRIG_ATOMIC_OR:
+case BRIG_ATOMIC_XOR:
+case BRIG_ATOMIC_EXCH:
   mtype = hsa_bittype_for_type (mtype);
   break;
 default:
-- 
2.6.0



Re: [PATCH 1/2] Libsanitizer merge from upstream r253555.

2015-11-23 Thread Maxim Ostapenko

On 23/11/15 16:00, Christophe Lyon wrote:

On 23 November 2015 at 13:41, Jakub Jelinek  wrote:

On Mon, Nov 23, 2015 at 03:33:57PM +0300, Maxim Ostapenko wrote:

+ Adhemerval

Christophe, it looks like your kernel headers (asm/ptrace.h) don't contain
ARM_VFPREGS_SIZE. Do you use old kernel version?

Yes, I do use old kernel headers.
I could upgrade them, but I tend to avoid changing versions (binutils,
glibc, newlib, kernel headers) unless really necessary.


Unlike LLVM, we do care to support older kernel headers.
So, if it is say a define, you could add
libsanitizer/include/system/linux/ptrace.h
or
libsanitizer/include/system/asm/ptrace.h
that would #include_next the original header and ifdef __arm__ and
that define is not defined (or some other condition, kernel version etc.),
define it.

 Jakub

So, given Jakub's answer I'll not upgrade them yet on my side :-)




Yeah, right. I've asked about kernel headers just to make sure I 
correctly understand the issue.


Actually, I see such code in 
lib/sanitizer_common/sanitizer_platform_limits_posix.cc:


#if defined(PTRACE_GETVFPREGS) && defined(PTRACE_SETVFPREGS)
  int ptrace_getvfpregs = PTRACE_GETVFPREGS;
  int ptrace_setvfpregs = PTRACE_SETVFPREGS;
#else
  int ptrace_getvfpregs = -1;
  int ptrace_setvfpregs = -1;
#endif

and in ptrace interceptor:

 else if (request == ptrace_setvfpregs)
COMMON_INTERCEPTOR_READ_RANGE(ctx, data, 
struct_user_vfpregs_struct_sz);

 else if (request == ptrace_getvfpregs)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, data, 
struct_user_vfpregs_struct_sz)


So, perhaps we can do the same thing with ARM_VFPREGS_SIZE, something 
like this?


diff --git 
a/libsanitizer/sanitizer_common/sanitizer_platform_limits_posix.cc 
b/libsanitizer/sanitizer_common/sanitizer_platform_limits_posix.cc

index 9866cc9..20ff224 100644
--- a/libsanitizer/sanitizer_common/sanitizer_platform_limits_posix.cc
+++ b/libsanitizer/sanitizer_common/sanitizer_platform_limits_posix.cc
@@ -323,10 +323,14 @@ unsigned struct_ElfW_Phdr_sz = sizeof(Elf_Phdr);
   unsigned struct_user_fpxregs_struct_sz = sizeof(struct 
user_fpxregs_struct);

 #endif // __x86_64 || __mips64 || __powerpc64__ || __aarch64__ || __arm__
 #ifdef __arm__
+#if defined(ARM_VFPREGS_SIZE)
   unsigned struct_user_vfpregs_struct_sz = ARM_VFPREGS_SIZE;
 #else
   unsigned struct_user_vfpregs_struct_sz = 0;
 #endif
+#else
+  unsigned struct_user_vfpregs_struct_sz = 0;
+#endif





Re: [PATCH 1/2] Libsanitizer merge from upstream r253555.

2015-11-23 Thread Jakub Jelinek
On Mon, Nov 23, 2015 at 04:21:34PM +0300, Maxim Ostapenko wrote:
> Yeah, right. I've asked about kernel headers just to make sure I correctly
> understand the issue.
> 
> Actually, I see such code in
> lib/sanitizer_common/sanitizer_platform_limits_posix.cc:
> 
> #if defined(PTRACE_GETVFPREGS) && defined(PTRACE_SETVFPREGS)
>   int ptrace_getvfpregs = PTRACE_GETVFPREGS;
>   int ptrace_setvfpregs = PTRACE_SETVFPREGS;
> #else
>   int ptrace_getvfpregs = -1;
>   int ptrace_setvfpregs = -1;
> #endif
> 
> and in ptrace interceptor:
> 
>  else if (request == ptrace_setvfpregs)
> COMMON_INTERCEPTOR_READ_RANGE(ctx, data, struct_user_vfpregs_struct_sz);
>  else if (request == ptrace_getvfpregs)
> COMMON_INTERCEPTOR_WRITE_RANGE(ctx, data, struct_user_vfpregs_struct_sz)
> 
> So, perhaps we can do the same thing with ARM_VFPREGS_SIZE, something like
> this?
> 
> diff --git
> a/libsanitizer/sanitizer_common/sanitizer_platform_limits_posix.cc
> b/libsanitizer/sanitizer_common/sanitizer_platform_limits_posix.cc
> index 9866cc9..20ff224 100644
> --- a/libsanitizer/sanitizer_common/sanitizer_platform_limits_posix.cc
> +++ b/libsanitizer/sanitizer_common/sanitizer_platform_limits_posix.cc
> @@ -323,10 +323,14 @@ unsigned struct_ElfW_Phdr_sz = sizeof(Elf_Phdr);
>unsigned struct_user_fpxregs_struct_sz = sizeof(struct
> user_fpxregs_struct);
>  #endif // __x86_64 || __mips64 || __powerpc64__ || __aarch64__ || __arm__
>  #ifdef __arm__
> +#if defined(ARM_VFPREGS_SIZE)
>unsigned struct_user_vfpregs_struct_sz = ARM_VFPREGS_SIZE;
>  #else
>unsigned struct_user_vfpregs_struct_sz = 0;
>  #endif
> +#else
> +  unsigned struct_user_vfpregs_struct_sz = 0;
> +#endif

Maybe, but then it would need to be approved upstream.
If you just define ARM_VFPREGS_SIZE to 0 or whatever else in
the GCC owned wrapper headers, you can avoid that.
I guess talk to upstream.

Jakub


Re: Fix lto-symtab ICE during Ada LTO bootstrap

2015-11-23 Thread Richard Biener
On Mon, Nov 23, 2015 at 12:45 PM, Eric Botcazou  wrote:
>> No, Interfaces.C.Extensions is non portable, so almost no Ada code out there
>> is using it. As I said, existing Ada code is using System.Address all the
>> time, so requiring any code change in this area is just a non starter. We'd
>> rather require that people don't use LTO with Ada rather than tell them to
>> use Interfaces.C.Extensions, that would be more constructive :-)
>
> I see, too bad that Interfaces.C doesn't define something along these lines.
>
> In practice I'm not sure it's a big issue, because presumably it's essentially
> used to interface the C library and AFAIK we don't LTO the C library (yet).
> And I presume that, if people start interfacing in Ada with C code doing heavy
> pointer manipulation, we can also point them to Interfaces.C.Pointers. :-)

But can't you on the GENERIC side drop System.Address to void_ptr_node
again and just not make use of the "heavy lifting" you were talking about?
That is, why is that speciality of System.Address not a Ada FE thing only?

Richard.

> --
> Eric Botcazou


Re: [AArch64][PATCH 6/7] Add NEON intrinsics vqrdmlah and vqrdmlsh.

2015-11-23 Thread James Greenhalgh
On Fri, Oct 23, 2015 at 01:26:11PM +0100, Matthew Wahab wrote:
> The ARMv8.1 architecture extension adds two Adv.SIMD instructions,
> sqrdmlah and sqrdmlsh. This patch adds the NEON intrinsics vqrdmlah and
> vqrdmlsh for these instructions. The new intrinsics are of the form
> vqrdml{as}h[q]_.
> 
> Tested the series for aarch64-none-linux-gnu with native bootstrap and
> make check on an ARMv8 architecture. Also tested aarch64-none-elf with
> cross-compiled check-gcc on an ARMv8.1 emulator.
> 
> Ok for trunk?
> Matthew
> 
> gcc/
> 2015-10-23  Matthew Wahab  
> 
>   * gcc/config/aarch64/arm_neon.h (vqrdmlah_s16, vqrdmlah_s32): New.
>   (vqrdmlahq_s16, vqrdmlahq_s32): New.
>   (vqrdmlsh_s16, vqrdmlsh_s32): New.
>   (vqrdmlshq_s16, vqrdmlshq_s32): New.
> 
> gcc/testsuite
> 2015-10-23  Matthew Wahab  
> 
>   * gcc.target/aarch64/advsimd-intrinsics/vqrdmlXh.inc: New file,
>   support code for vqrdml{as}h tests.
>   * gcc.target/aarch64/advsimd-intrinsics/vqrdmlah.c: New.
>   * gcc.target/aarch64/advsimd-intrinsics/vqrdmlsh.c: New.
> 

> From 611e1232a59dfe42f2cd980407d67abcfea5 Mon Sep 17 00:00:00 2001
> From: Matthew Wahab 
> Date: Thu, 27 Aug 2015 13:22:41 +0100
> Subject: [PATCH 6/7] Add neon intrinsics: vqrdmlah, vqrdmlsh.
> 
> Change-Id: I5c7f8d36ee980d280c1d50f6f212b286084c5acf
> ---
>  gcc/config/aarch64/arm_neon.h  |  53 
>  .../aarch64/advsimd-intrinsics/vqrdmlXh.inc| 138 
> +
>  .../aarch64/advsimd-intrinsics/vqrdmlah.c  |  57 +
>  .../aarch64/advsimd-intrinsics/vqrdmlsh.c  |  61 +
>  4 files changed, 309 insertions(+)
>  create mode 100644 
> gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmlXh.inc
>  create mode 100644 
> gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmlah.c
>  create mode 100644 
> gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmlsh.c
> 
> diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
> index e186348..9e73809 100644
> --- a/gcc/config/aarch64/arm_neon.h
> +++ b/gcc/config/aarch64/arm_neon.h
> @@ -2649,6 +2649,59 @@ vqrdmulhq_s32 (int32x4_t __a, int32x4_t __b)
>return (int32x4_t) __builtin_aarch64_sqrdmulhv4si (__a, __b);
>  }
>  
> +#pragma GCC push_options
> +#pragma GCC target ("arch=armv8.1-a")

Can we please patch the documentation to make it clear that -march=armv8.1-a
always implies -march=armv8.1-a+rdma ? The documentation around which
feature modifiers are implied when leaves much to be desired.

> +
> +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
> +vqrdmlah_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c)
> +{
> +  return (int16x4_t) __builtin_aarch64_sqrdmlahv4hi (__a, __b, __c);

We don't need this cast (likewise the other instances)?

Thanks,
James




[PATCH 4/6] Fix memory leak in loop_vec_info

2015-11-23 Thread marxin
gcc/ChangeLog:

2015-11-23  Martin Liska  

* tree-vect-loop-manip.c (vect_create_cond_for_alias_checks):
Do not release memory for comp_alias_ddrs.
* tree-vect-loop.c (destroy_loop_vec_info): Release
the memory for all loop_vec_info.
---
 gcc/tree-vect-loop-manip.c | 2 --
 gcc/tree-vect-loop.c   | 1 +
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/gcc/tree-vect-loop-manip.c b/gcc/tree-vect-loop-manip.c
index c96e196..226b88f 100644
--- a/gcc/tree-vect-loop-manip.c
+++ b/gcc/tree-vect-loop-manip.c
@@ -2284,8 +2284,6 @@ vect_create_cond_for_alias_checks (loop_vec_info 
loop_vinfo, tree * cond_expr)
 dump_printf_loc (MSG_NOTE, vect_location,
 "created %u versioning for alias checks.\n",
 comp_alias_ddrs.length ());
-
-  comp_alias_ddrs.release ();
 }
 
 
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index 41e5031..8f39578 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -1179,6 +1179,7 @@ destroy_loop_vec_info (loop_vec_info loop_vinfo, bool 
clean_stmts)
   free_dependence_relations (LOOP_VINFO_DDRS (loop_vinfo));
   LOOP_VINFO_LOOP_NEST (loop_vinfo).release ();
   LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).release ();
+  LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo).release ();
   LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo).release ();
   slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
   FOR_EACH_VEC_ELT (slp_instances, j, instance)
-- 
2.6.3




[PATCH 5/6] Fix parser memory leak in cilk_simd_fn_info

2015-11-23 Thread marxin
gcc/cp/ChangeLog:

2015-11-23  Martin Liska  

* parser.c (cp_parser_late_parsing_cilk_simd_fn_info):
Release tokens.
---
 gcc/cp/parser.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/gcc/cp/parser.c b/gcc/cp/parser.c
index 24ed404..fd5c7ec 100644
--- a/gcc/cp/parser.c
+++ b/gcc/cp/parser.c
@@ -35014,6 +35014,7 @@ cp_parser_late_parsing_cilk_simd_fn_info (cp_parser 
*parser, tree attrs)
   error ("%<#pragma omp declare simd%> of % attribute cannot be "
 "used in the same function marked as a Cilk Plus SIMD-enabled "
 " function");
+  parser->cilk_simd_fn_info->tokens.release ();
   XDELETE (parser->cilk_simd_fn_info);
   parser->cilk_simd_fn_info = NULL;
   return attrs;
@@ -35051,6 +35052,7 @@ cp_parser_late_parsing_cilk_simd_fn_info (cp_parser 
*parser, tree attrs)
   attrs = c;
 }
   info->fndecl_seen = true;
+  parser->cilk_simd_fn_info->tokens.release ();
   XDELETE (parser->cilk_simd_fn_info);
   parser->cilk_simd_fn_info = NULL;
   return attrs;
-- 
2.6.3




[PATCH 6/6] Fix memory leak in tree-chkp.c

2015-11-23 Thread marxin
gcc/ChangeLog:

2015-11-23  Martin Liska  

* tree-chkp.c (chkp_make_static_bounds): Release buffer
used for string.
---
 gcc/tree-chkp.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/gcc/tree-chkp.c b/gcc/tree-chkp.c
index 34d9dfc..8b6381f 100644
--- a/gcc/tree-chkp.c
+++ b/gcc/tree-chkp.c
@@ -2910,6 +2910,8 @@ chkp_make_static_bounds (tree obj)
pointer_bounds_type_node);
 }
 
+  free (bnd_var_name);
+
   TREE_PUBLIC (bnd_var) = 0;
   TREE_USED (bnd_var) = 1;
   TREE_READONLY (bnd_var) = 0;
-- 
2.6.3



[PATCH 1/6] Fix memory leak in cilk

2015-11-23 Thread marxin
gcc/c/ChangeLog:

2015-11-20  Martin Liska  

PR c++/68312
* c-array-notation.c (fix_builtin_array_notation_fn):
Use release_vec_vec instead of vec::release.
(build_array_notation_expr): Likewise.
(fix_conditional_array_notations_1): Likewise.
(fix_array_notation_expr): Likewise.
(fix_array_notation_call_expr): Likewise.

gcc/cp/ChangeLog:

2015-11-20  Martin Liska  

PR c++/68312
* cp-array-notation.c (expand_sec_reduce_builtin):
Likewise.
(create_array_refs): Replace argument with const reference.
(expand_an_in_modify_expr): Likewise.
(cp_expand_cond_array_notations): Likewise.
(expand_unary_array_notation_exprs): Likewise.

gcc/c-family/ChangeLog:

2015-11-20  Martin Liska  

PR c++/68312
* array-notation-common.c (cilkplus_extract_an_triplets):
Release vector of vectors.
* cilk.c (gimplify_cilk_spawn): Free allocated memory.

gcc/ChangeLog:

2015-11-20  Martin Liska  

PR c++/68312
* vec.h (release_vec_vec): New function.
---
 gcc/c-family/array-notation-common.c |  2 ++
 gcc/c-family/cilk.c  |  1 +
 gcc/c/c-array-notation.c | 38 ++
 gcc/cp/cp-array-notation.c   | 52 ++--
 gcc/vec.h| 12 +
 5 files changed, 55 insertions(+), 50 deletions(-)

diff --git a/gcc/c-family/array-notation-common.c 
b/gcc/c-family/array-notation-common.c
index 4f7072b..5f2209d 100644
--- a/gcc/c-family/array-notation-common.c
+++ b/gcc/c-family/array-notation-common.c
@@ -636,6 +636,8 @@ cilkplus_extract_an_triplets (vec *list, 
size_t size, size_t rank,
  fold_build1 (CONVERT_EXPR, integer_type_node,
   ARRAY_NOTATION_STRIDE (ii_tree));
  }
+
+  release_vec_vec (array_exprs);
 }
 
 /* Replaces all the __sec_implicit_arg functions in LIST with the induction
diff --git a/gcc/c-family/cilk.c b/gcc/c-family/cilk.c
index e75e20c..1167b2b 100644
--- a/gcc/c-family/cilk.c
+++ b/gcc/c-family/cilk.c
@@ -844,6 +844,7 @@ gimplify_cilk_spawn (tree *spawn_p)
call2, build_empty_stmt (EXPR_LOCATION (call1)));
   append_to_statement_list (spawn_expr, spawn_p);
 
+  free (arg_array);
   return GS_OK;
 }
 
diff --git a/gcc/c/c-array-notation.c b/gcc/c/c-array-notation.c
index 21f8684..49f5f7b 100644
--- a/gcc/c/c-array-notation.c
+++ b/gcc/c/c-array-notation.c
@@ -98,7 +98,7 @@ make_triplet_val_inv (location_t loc, tree *value)
 
 static void
 create_cmp_incr (location_t loc, vec *node, size_t rank,
-vec an_info)
+const vec _info)
 {
   for (size_t ii = 0; ii < rank; ii++)
 {
@@ -122,7 +122,7 @@ create_cmp_incr (location_t loc, vec *node, 
size_t rank,
 */
 
 static vec *
-create_array_refs (location_t loc, vec an_info,
+create_array_refs (location_t loc, const vec _info,
   vec an_loop_info, size_t size, size_t rank)
 {
   tree ind_mult, ind_incr;
@@ -205,7 +205,7 @@ fix_builtin_array_notation_fn (tree an_builtin_fn, tree 
*new_var)
   location_t location = UNKNOWN_LOCATION;
   tree loop_with_init = alloc_stmt_list ();
   vec an_info = vNULL;
-  vec an_loop_info = vNULL;
+  auto_vec an_loop_info;
   enum built_in_function an_type =
 is_cilkplus_reduce_builtin (CALL_EXPR_FN (an_builtin_fn));
   if (an_type == BUILT_IN_NONE)
@@ -593,8 +593,7 @@ fix_builtin_array_notation_fn (tree an_builtin_fn, tree 
*new_var)
 }
   append_to_statement_list_force (body, _with_init);
 
-  an_info.release ();
-  an_loop_info.release ();
+  release_vec_vec (an_info);
   
   return loop_with_init;
 }
@@ -614,7 +613,7 @@ build_array_notation_expr (location_t location, tree lhs, 
tree lhs_origtype,
   tree array_expr_lhs = NULL_TREE, array_expr_rhs = NULL_TREE;
   tree array_expr = NULL_TREE;
   tree an_init = NULL_TREE;
-  vec cond_expr = vNULL;
+  auto_vec cond_expr;
   tree body, loop_with_init = alloc_stmt_list();
   tree scalar_mods = NULL_TREE;
   vec *rhs_array_operand = NULL, *lhs_array_operand = NULL;
@@ -624,7 +623,7 @@ build_array_notation_expr (location_t location, tree lhs, 
tree lhs_origtype,
   tree new_modify_expr, new_var = NULL_TREE, builtin_loop = NULL_TREE;
   size_t rhs_list_size = 0, lhs_list_size = 0; 
   vec lhs_an_info = vNULL, rhs_an_info = vNULL;
-  vec lhs_an_loop_info = vNULL, rhs_an_loop_info = vNULL;
+  auto_vec lhs_an_loop_info, rhs_an_loop_info;
   
   /* If either of this is true, an error message must have been send out
  already.  Not necessary to send out multiple error messages.  */
@@ -881,14 +880,9 @@ build_array_notation_expr (location_t location, tree lhs, 
tree lhs_origtype,
 }
   append_to_statement_list_force (body, _with_init);
 
-  lhs_an_info.release ();
-  

[PATCH 3/6] Fix memory leaks in IPA devirt

2015-11-23 Thread marxin
gcc/ChangeLog:

2015-11-20  Martin Liska  

* ipa-devirt.c (ipa_devirt): Use auto_vec instead
of a local-scope vec. Release final_warning_records.
---
 gcc/ipa-devirt.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/gcc/ipa-devirt.c b/gcc/ipa-devirt.c
index e74f853..6003c92 100644
--- a/gcc/ipa-devirt.c
+++ b/gcc/ipa-devirt.c
@@ -3837,7 +3837,7 @@ ipa_devirt (void)
 
   if (warn_suggest_final_methods)
{
- vec decl_warnings_vec = vNULL;
+ auto_vec decl_warnings_vec;
 
  final_warning_records->decl_warnings.traverse
 
(_warnings_vec);
@@ -3887,7 +3887,8 @@ ipa_devirt (void)
  decl, count, dyn_count);
}
}
-   
+
+  final_warning_records->type_warnings.release ();
   delete (final_warning_records);
   final_warning_records = 0;
 }
-- 
2.6.3




[hsa] depend nowait support for target

2015-11-23 Thread Martin Jambor
On Fri, Nov 13, 2015 at 04:11:50PM +0100, Jakub Jelinek wrote:
> On Fri, Nov 13, 2015 at 11:18:41AM +0100, Jakub Jelinek wrote:
> > For the offloading case, I actually see a problematic spot, namely that
> > GOMP_PLUGIN_target_task_completion could finish too early, and get the
> > task_lock before the thread that run the gomp_target_task_fn doing map_vars
> > + async_run for it.  Bet I need to add further ttask state kinds and deal
> > with that case (so GOMP_PLUGIN_target_task_completion would just take the
> > task lock and tweak ttask state if it has not been added to the queues
> > yet).
> > Plus I think I want to improve the case where we are not waiting, in
> > gomp_create_target_task if not waiting for dependencies actually schedule
> > manually the gomp_target_task_fn.
> 
> These two have been resolved, plus target-34.c issue resolved too (the bug
> was that I've been too lazy and just put target-33.c test into #pragma omp
> parallel #pragma omp single, but that is invalid OpenMP, as single is a
> worksharing region and #pragma omp barrier may not be encountered in such a
> region.  Fixed by rewriting the testcase.
> 
> So here is a full patch that passes for me both non-offloading and
> offloading, OMP_NUM_THREADS=16 (implicit on my box) as well as
> OMP_NUM_THREADS=1 (explicit).  I've incorporated your incremental patch.
> 

I have committed the following patch to the hsa branch to implement
GOMP_OFFLOAD_async_run.  Tests target-33.c and target-34.c pass right
away.  I also do not have any usleep on HSA, so I only ran target-32.c
manually after replacing the usleeps with some pointless busy looping.

During the testing, I have come accross quite a few places where
libgomp has to treat shared memory devices like it treats host, and so
I added that to the patch too.

The hunk in gomp_create_target_task should have been in the previous
merge from trunk but I forgot to add it then.

Any feedback welcome,

Martin


2015-11-23  Martin Jambor  

libgomp/
* plugin/plugin-hsa.c (async_run_info): New structure.
(run_kernel_asynchronously): New function.
(GOMP_OFFLOAD_async_run): New implementation.
* target.c (GOMP_target_data_ext): Handle shared memory devices like
the host.
(GOMP_target_update): Likewise.
(GOMP_target_update_ext): Likewise.
(GOMP_target_enter_exit_data): Likewise.
(omp_target_alloc): Likewise.
(omp_target_free): Likewise.
(omp_target_memcpy): Likewise.
(omp_target_memcpy_rect): Likewise.
* task.c (gomp_create_target_task): Fill in args field of ttask.
---
 libgomp/plugin/plugin-hsa.c | 61 -
 libgomp/target.c| 30 ++
 libgomp/task.c  |  1 +
 3 files changed, 76 insertions(+), 16 deletions(-)

diff --git a/libgomp/plugin/plugin-hsa.c b/libgomp/plugin/plugin-hsa.c
index 40dbcde..72f5bdd 100644
--- a/libgomp/plugin/plugin-hsa.c
+++ b/libgomp/plugin/plugin-hsa.c
@@ -1127,9 +1127,9 @@ failure:
   return false;
 }
 
-/* Part of the libgomp plugin interface.  Run a kernel on a device N and pass
-   the it an array of pointers in VARS as a parameter.  The kernel is
-   identified by FN_PTR which must point to a kernel_info structure.  */
+/* Part of the libgomp plugin interface.  Run a kernel on device N and pass it
+   an array of pointers in VARS as a parameter.  The kernel is identified by
+   FN_PTR which must point to a kernel_info structure.  */
 
 void
 GOMP_OFFLOAD_run (int n, void *fn_ptr, void *vars, void** args)
@@ -1237,13 +1237,62 @@ GOMP_OFFLOAD_run (int n, void *fn_ptr, void *vars, 
void** args)
 GOMP_PLUGIN_fatal ("Unable to unlock an HSA agent rwlock");
 }
 
+/* Information to be passed to a thread running a kernel asycnronously.  */
+
+struct async_run_info
+{
+  int device;
+  void *tgt_fn;
+  void *tgt_vars;
+  void **args;
+  void *async_data;
+};
+
+/* Thread routine to run a kernel asynchronously.  */
+
+static void *
+run_kernel_asynchronously (void *thread_arg)
+{
+  struct async_run_info *info = (struct async_run_info *) thread_arg;
+  int device = info->device;
+  void *tgt_fn = info->tgt_fn;
+  void *tgt_vars = info->tgt_vars;
+  void **args = info->args;
+  void *async_data = info->async_data;
+
+  free (info);
+  GOMP_OFFLOAD_run (device, tgt_fn, tgt_vars, args);
+  GOMP_PLUGIN_target_task_completion (async_data);
+  return NULL;
+}
+
+/* Part of the libgomp plugin interface.  Run a kernel like GOMP_OFFLOAD_run
+   does, but asynchronously and call GOMP_PLUGIN_target_task_completion when it
+   has finished.  */
+
 void
 GOMP_OFFLOAD_async_run (int device, void *tgt_fn, void *tgt_vars,
void **args, void *async_data)
 {
-  /* FIXME: Implement.  */
-  GOMP_PLUGIN_fatal ("Support for HSA does not yet implement asynchronous "
-"execution. ");
+  pthread_t pt;
+  struct async_run_info *info;
+  HSA_DEBUG 

Re: [hsa] depend nowait support for target

2015-11-23 Thread Jakub Jelinek
On Mon, Nov 23, 2015 at 03:12:05PM +0100, Martin Jambor wrote:
> +/* Thread routine to run a kernel asynchronously.  */
> +
> +static void *
> +run_kernel_asynchronously (void *thread_arg)
> +{
> +  struct async_run_info *info = (struct async_run_info *) thread_arg;
> +  int device = info->device;
> +  void *tgt_fn = info->tgt_fn;
> +  void *tgt_vars = info->tgt_vars;
> +  void **args = info->args;
> +  void *async_data = info->async_data;
> +
> +  free (info);
> +  GOMP_OFFLOAD_run (device, tgt_fn, tgt_vars, args);
> +  GOMP_PLUGIN_target_task_completion (async_data);
> +  return NULL;

Is this just a temporary hack to work-around the missing task.c/target.c
support for plugins that need polling (calling some hook) to determine
completion of the tasks, or there is no way to tell HSA to spawn something
asynchronously?
Short term it is ok this way.

> +  int err = pthread_create (, NULL, _kernel_asynchronously, info);
> +  if (err != 0)
> +GOMP_PLUGIN_fatal ("HSA asynchronous thread creation failed: %s",
> +strerror (err));
> +  err = pthread_detach (pt);
> +  if (err != 0)
> +GOMP_PLUGIN_fatal ("Failed to detach a thread to run HRA kernel "
> +"asynchronously: %s", strerror (err));

HSA instead of HRA?

Jakub


Re: [AARCH64] Adding constant folding for __builtin_fmulx* with scalar 32 and 64 bit arguments

2015-11-23 Thread James Greenhalgh
On Mon, Nov 09, 2015 at 11:40:11AM +, Bilyan Borisov wrote:
> This patch adds an extension to aarch64_gimple_fold_builtin () that does
> constant folding on __builtin_fmulx* calls for 32 and 64 bit floating point
> scalar modes. We fold when both arguments are constant, as well as when only 
> one
> is. The special cases of 0*inf, -0*inf, 0*-inf, and -0*-inf are also
> handled. The case for vector constant arguments will be dealt with in a future
> patch since the tests for that would be obscure and would unnecessarily
> complicate this patch.
> 
> Added tests to check for proper handling of constant folding. Tested on 
> targets
> aarch64-none-elf and aarch64_be-none-elf.
> 
> ---
> 
> gcc/
> 
> 2015-XX-XX  Bilyan Borisov  
> 
>   * config/aarch64/aarch64-builtins.c (aarch64_gimple_fold_builtin): Added
>   constant folding.
> 
> gcc/testsuite/
> 
> 2015-XX-XX  Bilyan Borisov  
> 
>   * gcc.target/aarch64/simd/vmulx.x: New.
>   * gcc.target/aarch64/simd/vmulx_f64_2.c: Likewise.
>   * gcc.target/aarch64/simd/vmulxd_f64_2.c: Likewise.
>   * gcc.target/aarch64/simd/vmulxs_f32_2.c: Likewise.
> 

OK, thanks.

I've committed this on your behalf as revision 230758 with a slight tweak to
the changelog to read:

* config/aarch64/aarch64-builtins.c
(aarch64_gimple_fold_builtin): Fold FMULX.

Thanks,
James



[hsa] Use new format of device-specific target arguments

2015-11-23 Thread Martin Jambor
Hi,

I have committed this branch which changes the format of
device-specific arguments of GOMP_target_ext to something closer to
what Jakub wants.  Specifically, lowest 7 bits determine the
accelerator type (0 means all of them), next bit determines whether
the next element in args array is also part of this one, then there
are 8 bits for an ID of the argument and then up to 16 bits for a
value.

Thanks,

Martin


2015-11-20  Martin Jambor  

gcc/
* omp-low.c (get_target_argument_identifier_1): New function.
(get_target_argument_identifier): Likewise.
(get_target_argument_value): Likewise.
(get_target_arguments): Use them.

include/
* gomp-constants.h (GOMP_TARGET_ARG_FIRST_DEVICE_SPECIFIC): Removed.
(GOMP_TARGET_ARG_DEVICE_MASK): New.
(GOMP_TARGET_ARG_DEVICE_ALL): Likewise.
(GOMP_TARGET_ARG_SUBSEQUENT_PARAM): Likewise.
(GOMP_TARGET_ARG_ID_MASK): Likewise.
(GOMP_TARGET_ARG_VALUE_SHIFT): Likewise.
(GOMP_TARGET_ARG_NUM_TEAMS): Skifted by 8.
(GOMP_TARGET_ARG_THREAD_LIMIT): Likewise.
(GOMP_TARGET_ARG_HSA_KERNEL_ATTRIBUTES): Likewise.

libgomp/
* plugin/plugin-hsa.c (parse_launch_attributes): Renamed to
parse_target_attributes.  Process the new format of arguments.
---
 gcc/omp-low.c   | 69 -
 include/gomp-constants.h| 32 +++--
 libgomp/plugin/plugin-hsa.c | 16 +++
 3 files changed, 89 insertions(+), 28 deletions(-)

diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index f2436d7..a9188dc 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -12638,7 +12638,50 @@ get_kernel_launch_attributes (gimple_stmt_iterator 
*gsi, gomp_target *tgt_stmt)
   return build_fold_addr_expr (lattrs);
 }
 
-/* Create an array of arguments that is then passed to GOMP_target.  */
+/* Build target argument identifier from the DEVICE identifier, value
+   identifier ID and whether the element also has a SUBSEQUENT_PARAM.  */
+
+static tree
+get_target_argument_identifier_1 (int device, bool subseqent_param, int id)
+{
+  tree t = build_int_cst (integer_type_node, device);
+  if (subseqent_param)
+t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
+build_int_cst (integer_type_node,
+   GOMP_TARGET_ARG_SUBSEQUENT_PARAM));
+  t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
+  build_int_cst (integer_type_node, id));
+  return t;
+}
+
+/* Like above but return it in type that can be directly stored as an element
+   of the argument array.  */
+
+static tree
+get_target_argument_identifier (int device, bool subseqent_param, int id)
+{
+  tree t = get_target_argument_identifier_1 (device, subseqent_param, id);
+  return fold_convert (ptr_type_node, t);
+}
+
+/* Return a target argument consisiting of DEVICE identifier, value identifier
+   ID, and the actual VALUE.  */
+
+static tree
+get_target_argument_value (gimple_stmt_iterator *gsi, int device, int id,
+  tree value)
+{
+  tree t = fold_build2 (LSHIFT_EXPR, integer_type_node,
+   fold_convert (integer_type_node, value),
+   build_int_cst (unsigned_type_node,
+  GOMP_TARGET_ARG_VALUE_SHIFT));
+  t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
+  get_target_argument_identifier_1 (device, false, id));
+  t = fold_convert (ptr_type_node, t);
+  return force_gimple_operand_gsi (gsi, t, true, NULL, true, GSI_SAME_STMT);
+}
+
+/* Create an array of arguments that is then passed to GOMP_target.   */
 
 static tree
 get_target_arguments (gimple_stmt_iterator *gsi, gomp_target *tgt_stmt)
@@ -12647,28 +12690,28 @@ get_target_arguments (gimple_stmt_iterator *gsi, 
gomp_target *tgt_stmt)
   tree clauses = gimple_omp_target_clauses (tgt_stmt);
   tree t, c = find_omp_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
   if (c)
-{
-  t = fold_convert (ptr_type_node, OMP_CLAUSE_NUM_TEAMS_EXPR (c));
-  t = force_gimple_operand_gsi (gsi, t, true, NULL, true, GSI_SAME_STMT);
-}
+t = OMP_CLAUSE_NUM_TEAMS_EXPR (c);
   else
-t = fold_convert (ptr_type_node, integer_minus_one_node);
+t = integer_minus_one_node;
+  t = get_target_argument_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
+GOMP_TARGET_ARG_NUM_TEAMS, t);
   args.quick_push (t);
+
   c = find_omp_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
   if (c)
-{
-  t = fold_convert (ptr_type_node, OMP_CLAUSE_THREAD_LIMIT_EXPR (c));
-  t = force_gimple_operand_gsi (gsi, t, true, NULL, true, GSI_SAME_STMT);
-}
+t = OMP_CLAUSE_THREAD_LIMIT_EXPR (c);
   else
-t = fold_convert (ptr_type_node, integer_minus_one_node);
+t = integer_minus_one_node;
+  t = get_target_argument_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
+GOMP_TARGET_ARG_THREAD_LIMIT, t);
   

Re: [PATCH 0/6] Another fixes of various memory leaks

2015-11-23 Thread Bernd Schmidt

On 11/23/2015 02:49 PM, marxin wrote:

Following series has been just bootregtested on x86_64-linux-gnu
(all patches together).


All ok except 5/6 which I'm not finding obvious. Better to have a 
cilk/c++ person have a look.


In the future, a few more explanations would help with reviewing. Let's 
say for 4/6, how does the leak occur?


Some changes appear beneficial but unnecessary (converting explicitly 
released vecs to auto_vecs), and:



 static vec *
-create_array_refs (location_t loc, vec an_info,
+create_array_refs (location_t loc, const vec _info,
   vec an_loop_info, size_t size,  size_t rank)


How does this help prevent leaks? In general we don't want non-bugfixes 
at this stage.



Bernd


[hsa] Perform version checks in HSA plugin

2015-11-23 Thread Martin Jambor
Hi,

the following patch against the HSA branch makes it call
GOMP_offload_register_ver and GOMP_offload_unregister_ver as opposed
to the routines without version information and adds a version check
to the libgomp plugin along the lines other plugins do it.

Committed to the branch, any feedback welcome,

Martin


2015-11-23  Martin Jambor  

gcc/
* builtin-types.def (BT_FN_VOID_PTR_INT_PTR): Removed.
(BT_FN_VOID_UINT_PTR_INT_PTR): New.
* fortran/types.def (BT_FN_VOID_PTR_INT_PTR): Removed.
(BT_FN_VOID_UINT_PTR_INT_PTR): New.
* hsa-brig.c: Include gomp-constants.
(hsa_output_libgomp_mapping): Add version arguments to to registration
and unregistration calls.
* omp-builtins.def (BUILT_IN_GOMP_OFFLOAD_REGISTER): Change to refer
to functions with versions.
(BUILT_IN_GOMP_OFFLOAD_UNREGISTER): Likewise.

include/
* gomp-constants.h (GOMP_VERSION_HSA): New.

libgomp/
* plugin/plugin-hsa.c (GOMP_OFFLOAD_load_image): Check version.
(GOMP_OFFLOAD_unload_image): Likewise.

---
 gcc/builtin-types.def   |  3 ++-
 gcc/fortran/types.def   |  3 ++-
 gcc/hsa-brig.c  | 19 ---
 gcc/omp-builtins.def|  9 +
 include/gomp-constants.h|  1 +
 libgomp/plugin/plugin-hsa.c | 21 +
 6 files changed, 35 insertions(+), 21 deletions(-)

diff --git a/gcc/builtin-types.def b/gcc/builtin-types.def
index 251c980..8dcf3a6 100644
--- a/gcc/builtin-types.def
+++ b/gcc/builtin-types.def
@@ -450,7 +450,6 @@ DEF_FUNCTION_TYPE_3 (BT_FN_BOOL_ULONG_ULONG_ULONGPTR, 
BT_BOOL, BT_ULONG,
 BT_ULONG, BT_PTR_ULONG)
 DEF_FUNCTION_TYPE_3 (BT_FN_BOOL_ULONGLONG_ULONGLONG_ULONGLONGPTR, BT_BOOL,
 BT_ULONGLONG, BT_ULONGLONG, BT_PTR_ULONGLONG)
-DEF_FUNCTION_TYPE_3 (BT_FN_VOID_PTR_INT_PTR, BT_VOID, BT_PTR, BT_INT, BT_PTR)
 
 DEF_FUNCTION_TYPE_4 (BT_FN_SIZE_CONST_PTR_SIZE_SIZE_FILEPTR,
 BT_SIZE, BT_CONST_PTR, BT_SIZE, BT_SIZE, BT_FILEPTR)
@@ -479,6 +478,8 @@ DEF_FUNCTION_TYPE_4 
(BT_FN_BOOL_UINT_LONGPTR_LONGPTR_LONGPTR,
 DEF_FUNCTION_TYPE_4 (BT_FN_BOOL_UINT_ULLPTR_ULLPTR_ULLPTR,
 BT_BOOL, BT_UINT, BT_PTR_ULONGLONG, BT_PTR_ULONGLONG,
 BT_PTR_ULONGLONG)
+DEF_FUNCTION_TYPE_4 (BT_FN_VOID_UINT_PTR_INT_PTR, BT_VOID, BT_INT, BT_PTR,
+BT_INT, BT_PTR)
 
 DEF_FUNCTION_TYPE_5 (BT_FN_INT_STRING_INT_SIZE_CONST_STRING_VALIST_ARG,
 BT_INT, BT_STRING, BT_INT, BT_SIZE, BT_CONST_STRING,
diff --git a/gcc/fortran/types.def b/gcc/fortran/types.def
index d5f44ab..283eaf4 100644
--- a/gcc/fortran/types.def
+++ b/gcc/fortran/types.def
@@ -145,7 +145,6 @@ DEF_FUNCTION_TYPE_3 (BT_FN_VOID_VPTR_I2_INT, BT_VOID, 
BT_VOLATILE_PTR, BT_I2, BT
 DEF_FUNCTION_TYPE_3 (BT_FN_VOID_VPTR_I4_INT, BT_VOID, BT_VOLATILE_PTR, BT_I4, 
BT_INT)
 DEF_FUNCTION_TYPE_3 (BT_FN_VOID_VPTR_I8_INT, BT_VOID, BT_VOLATILE_PTR, BT_I8, 
BT_INT)
 DEF_FUNCTION_TYPE_3 (BT_FN_VOID_VPTR_I16_INT, BT_VOID, BT_VOLATILE_PTR, 
BT_I16, BT_INT)
-DEF_FUNCTION_TYPE_3 (BT_FN_VOID_PTR_INT_PTR, BT_VOID, BT_PTR, BT_INT, BT_PTR)
 
 DEF_FUNCTION_TYPE_4 (BT_FN_VOID_OMPFN_PTR_UINT_UINT,
  BT_VOID, BT_PTR_FN_VOID_PTR, BT_PTR, BT_UINT, BT_UINT)
@@ -160,6 +159,8 @@ DEF_FUNCTION_TYPE_4 
(BT_FN_BOOL_UINT_LONGPTR_LONGPTR_LONGPTR,
 DEF_FUNCTION_TYPE_4 (BT_FN_BOOL_UINT_ULLPTR_ULLPTR_ULLPTR,
 BT_BOOL, BT_UINT, BT_PTR_ULONGLONG, BT_PTR_ULONGLONG,
 BT_PTR_ULONGLONG)
+DEF_FUNCTION_TYPE_4 (BT_FN_VOID_UINT_PTR_INT_PTR, BT_VOID, BT_INT, BT_PTR,
+BT_INT, BT_PTR)
 
 DEF_FUNCTION_TYPE_5 (BT_FN_VOID_OMPFN_PTR_UINT_UINT_UINT,
 BT_VOID, BT_PTR_FN_VOID_PTR, BT_PTR, BT_UINT, BT_UINT,
diff --git a/gcc/hsa-brig.c b/gcc/hsa-brig.c
index f47e9c3..b687cc5 100644
--- a/gcc/hsa-brig.c
+++ b/gcc/hsa-brig.c
@@ -42,6 +42,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "print-tree.h"
 #include "symbol-summary.h"
 #include "hsa.h"
+#include "gomp-constants.h"
 
 #define BRIG_ELF_SECTION_NAME ".brig"
 #define BRIG_LABEL_STRING "hsa_brig"
@@ -2216,10 +2217,12 @@ hsa_output_libgomp_mapping (tree brig_decl)
   gcc_checking_assert (offload_register);
 
   append_to_statement_list
-(build_call_expr (offload_register, 3,
+(build_call_expr (offload_register, 4,
+ build_int_cstu (unsigned_type_node,
+ GOMP_VERSION_PACK (GOMP_VERSION,
+GOMP_VERSION_HSA)),
  build_fold_addr_expr (hsa_libgomp_host_table),
- /* 7 stands for HSA.  */
- build_int_cst (integer_type_node, 7),
+ build_int_cst (integer_type_node, GOMP_DEVICE_HSA),
  build_fold_addr_expr (hsa_img_descriptor)),
  _ctor_statements);
 
@@ -2230,10 +2233,12 @@ 

Re: [hsa] Use new format of device-specific target arguments

2015-11-23 Thread Jakub Jelinek
On Mon, Nov 23, 2015 at 03:35:48PM +0100, Martin Jambor wrote:
> +/* If the value is directly embeded in target argument, it should be a 16-bit
> +   at most and shifted by tis many bits.  */

this

Jakub


Re: [PATCH] lround for PowerPC

2015-11-23 Thread Michael Meissner
David ping'ed me on internal IRC, and I had a thinko in terms of the use of the
 mode attribute.  In some of the uses (such as abs, smax, etc.) we want to
use ISA 2.06 instructions on SFmode, while in other uses (add, mul, etc.) we
want to use it only if we have the ISA 2.07 instrucitons.

I have split these mode attributes into Fv and Fv2 and gone through all of the
uses in the compiler to use the appropriate attribute.  I have built a cross
compiler on x86, but it blew up on a big endian power7 with a segmentation
violation that I need to look into.  I'm also building on a little endian
power8 right now, and it has gotten further.

2015-11-23  David Edelsohn  
Michael Meissner  

* config/rs6000/rs6000.md (UNSPEC_XSRDPI): New unspec.
(Fv2): New mode attribute to be used when ISA 2.06 instructions
are used on SF/DF values.
(abs2_fpr): Use  instead of .
(nabs2_fpr): Likewise.
(neg2_fpr): Likewise.
(copysign3_fcpsgn): Likewise.
(smax3_vsx): Likewise.
(smin3_vsx): Likewise.
(floatsi2_lfiwax): Likewise.
(floatunssi2_lfiwz): Likewise.
(fctiwz_): Likewise.
(fctiwuz_): Likewise.
(btrunc2): Likewise.
(ceil2): Likewise.
(floor2): Likewise.
(xsrdpi): Add support for the lround function.
(lround2): Likewise.

-- 
Michael Meissner, IBM
IBM, M/S 2506R, 550 King Street, Littleton, MA 01460-6245, USA
email: meiss...@linux.vnet.ibm.com, phone: +1 (978) 899-4797
Index: gcc/config/rs6000/rs6000.md
===
--- gcc/config/rs6000/rs6000.md (revision 230768)
+++ gcc/config/rs6000/rs6000.md (working copy)
@@ -77,6 +77,7 @@ (define_c_enum "unspec"
UNSPEC_FRIN
UNSPEC_FRIP
UNSPEC_FRIZ
+   UNSPEC_XSRDPI
UNSPEC_LD_MPIC  ; load_macho_picbase
UNSPEC_RELD_MPIC; re-load_macho_picbase
UNSPEC_MPIC_CORRECT ; macho_correct_pic
@@ -491,9 +492,17 @@ (define_mode_attr Fvsx [(SF "sp") (DF  "
 ; SF/DF constraint for arithmetic on traditional floating point registers
 (define_mode_attr Ff   [(SF "f") (DF "d") (DI "d")])
 
-; SF/DF constraint for arithmetic on VSX registers
+; SF/DF constraint for arithmetic on VSX registers.  This is intended to be
+; used for DFmode instructions added in ISA 2.06 (power7) and SFmode
+; instructions added in ISA 2.07 (power8)
 (define_mode_attr Fv   [(SF "wy") (DF "ws") (DI "wi")])
 
+; SF/DF constraint for arithmetic on VSX registers using instructions added in
+; ISA 2.06 (power7).  This includes instructions that normally target DF mode,
+; but are used on SFmode, since internally SFmode values are kept in the DFmode
+; format.
+(define_mode_attr Fv2  [(SF "ww") (DF "ws") (DI "wi")])
+
 ; SF/DF constraint for arithmetic on altivec registers
 (define_mode_attr Fa   [(SF "wu") (DF "wv")])
 
@@ -4299,8 +4308,8 @@ (define_expand "abs2"
   "")
 
 (define_insn "*abs2_fpr"
-  [(set (match_operand:SFDF 0 "gpc_reg_operand" "=,")
-   (abs:SFDF (match_operand:SFDF 1 "gpc_reg_operand" ",")))]
+  [(set (match_operand:SFDF 0 "gpc_reg_operand" "=,")
+   (abs:SFDF (match_operand:SFDF 1 "gpc_reg_operand" ",")))]
   "TARGET__FPR"
   "@
fabs %0,%1
@@ -4309,10 +4318,10 @@ (define_insn "*abs2_fpr"
(set_attr "fp_type" "fp_addsub_")])
 
 (define_insn "*nabs2_fpr"
-  [(set (match_operand:SFDF 0 "gpc_reg_operand" "=,")
+  [(set (match_operand:SFDF 0 "gpc_reg_operand" "=,")
(neg:SFDF
 (abs:SFDF
- (match_operand:SFDF 1 "gpc_reg_operand" ","]
+ (match_operand:SFDF 1 "gpc_reg_operand" ","]
   "TARGET__FPR"
   "@
fnabs %0,%1
@@ -4327,8 +4336,8 @@ (define_expand "neg2"
   "")
 
 (define_insn "*neg2_fpr"
-  [(set (match_operand:SFDF 0 "gpc_reg_operand" "=,")
-   (neg:SFDF (match_operand:SFDF 1 "gpc_reg_operand" ",")))]
+  [(set (match_operand:SFDF 0 "gpc_reg_operand" "=,")
+   (neg:SFDF (match_operand:SFDF 1 "gpc_reg_operand" ",")))]
   "TARGET__FPR"
   "@
fneg %0,%1
@@ -4557,9 +4566,9 @@ (define_expand "copysign3"
 ;; Use an unspec rather providing an if-then-else in RTL, to prevent the
 ;; compiler from optimizing -0.0
 (define_insn "copysign3_fcpsgn"
-  [(set (match_operand:SFDF 0 "gpc_reg_operand" "=,")
-   (unspec:SFDF [(match_operand:SFDF 1 "gpc_reg_operand" ",")
- (match_operand:SFDF 2 "gpc_reg_operand" ",")]
+  [(set (match_operand:SFDF 0 "gpc_reg_operand" "=,")
+   (unspec:SFDF [(match_operand:SFDF 1 "gpc_reg_operand" ",")
+ (match_operand:SFDF 2 "gpc_reg_operand" ",")]
 UNSPEC_COPYSIGN))]
   "TARGET__FPR && TARGET_CMPB"
   "@
@@ -4593,9 +4602,9 @@ (define_expand "smax3"
 })
 
 (define_insn "*smax3_vsx"
-  [(set (match_operand:SFDF 0 "gpc_reg_operand" "=,")
-   (smax:SFDF (match_operand:SFDF 1 "gpc_reg_operand" "%,")
-

[PATCH] Fix declaration of pthread-structs in s-osinte-rtems.ads (ada/68169)

2015-11-23 Thread Jan Sommer
Just noticed that I forgot to crosspost this mail to the rtems-devel list.

If someone with commit rights could check and push the patches we might get it 
into the next release.

Cheers,

   Jan--- Begin Message ---
Hello,

The paperwork seems to have gone through.
Here is the patch again for the 4.9.x, 5.x and trunk respectively.
I just pulled the head of the corresponding branches and created a new diff, so 
it should apply properly.

Best regards,

   JanIndex: gcc/ada/ChangeLog
===
--- gcc/ada/ChangeLog	(Revision 230563)
+++ gcc/ada/ChangeLog	(Arbeitskopie)
@@ -1,3 +1,9 @@
+2015-11-18  Jan Sommer 
+
+	* s-oscons-tmplt.c: Generate pthread constants for RTEMS
+	* s-osinte-rtems.ads: Declare pthread structs as opaque types in Ada
+	Fixes PR ada/68169
+
 2015-10-09  Eric Botcazou  
 
 	* gcc-interface/Make-lang.in: Make sure that GNAT1_OBJS and not just
Index: gcc/ada/s-oscons-tmplt.c
===
--- gcc/ada/s-oscons-tmplt.c	(Revision 230563)
+++ gcc/ada/s-oscons-tmplt.c	(Arbeitskopie)
@@ -154,7 +154,7 @@ pragma Style_Checks ("M32766");
 # include <_types.h>
 #endif
 
-#ifdef __linux__
+#if defined (__linux__) || defined (__rtems__)
 # include 
 # include 
 #endif
@@ -1441,7 +1441,8 @@ CND(CLOCK_THREAD_CPUTIME_ID, "Thread CPU clock")
 CNS(CLOCK_RT_Ada, "")
 #endif
 
-#if defined (__APPLE__) || defined (__linux__) || defined (DUMMY)
+#if defined (__APPLE__) || defined (__linux__) || defined (__rtems__) || \
+  defined (DUMMY)
 /*
 
--  Sizes of pthread data types
@@ -1484,7 +1485,7 @@ CND(PTHREAD_RWLOCKATTR_SIZE, "pthread_rwlockattr_t
 CND(PTHREAD_RWLOCK_SIZE, "pthread_rwlock_t")
 CND(PTHREAD_ONCE_SIZE,   "pthread_once_t")
 
-#endif /* __APPLE__ || __linux__ */
+#endif /* __APPLE__ || __linux__ || __rtems__*/
 
 /*
 
Index: gcc/ada/s-osinte-rtems.ads
===
--- gcc/ada/s-osinte-rtems.ads	(Revision 230563)
+++ gcc/ada/s-osinte-rtems.ads	(Arbeitskopie)
@@ -51,6 +51,8 @@
 --  It is designed to be a bottom-level (leaf) package.
 
 with Interfaces.C;
+with System.OS_Constants;
+
 package System.OS_Interface is
pragma Preelaborate;
 
@@ -60,6 +62,7 @@ package System.OS_Interface is
subtype rtems_id   is Interfaces.C.unsigned;
 
subtype intis Interfaces.C.int;
+   subtype char   is Interfaces.C.char;
subtype short  is Interfaces.C.short;
subtype long   is Interfaces.C.long;
subtype unsigned   is Interfaces.C.unsigned;
@@ -68,7 +71,6 @@ package System.OS_Interface is
subtype unsigned_char  is Interfaces.C.unsigned_char;
subtype plain_char is Interfaces.C.plain_char;
subtype size_t is Interfaces.C.size_t;
-
---
-- Errno --
---
@@ -76,11 +78,11 @@ package System.OS_Interface is
function errno return int;
pragma Import (C, errno, "__get_errno");
 
-   EAGAIN: constant := 11;
-   EINTR : constant := 4;
-   EINVAL: constant := 22;
-   ENOMEM: constant := 12;
-   ETIMEDOUT : constant := 116;
+   EAGAIN: constant := System.OS_Constants.EAGAIN;
+   EINTR : constant := System.OS_Constants.EINTR;
+   EINVAL: constant := System.OS_Constants.EINVAL;
+   ENOMEM: constant := System.OS_Constants.ENOMEM;
+   ETIMEDOUT : constant := System.OS_Constants.ETIMEDOUT;
 
-
-- Signals --
@@ -448,6 +450,7 @@ package System.OS_Interface is
   ss_low_priority : int;
   ss_replenish_period : timespec;
   ss_initial_budget   : timespec;
+  sched_ss_max_repl   : int;
end record;
pragma Convention (C, struct_sched_param);
 
@@ -621,43 +624,34 @@ private
end record;
pragma Convention (C, timespec);
 
-   CLOCK_REALTIME :  constant clockid_t := 1;
-   CLOCK_MONOTONIC : constant clockid_t := 4;
+   CLOCK_REALTIME :  constant clockid_t := System.OS_Constants.CLOCK_REALTIME;
+   CLOCK_MONOTONIC : constant clockid_t := System.OS_Constants.CLOCK_MONOTONIC;
 
+   subtype char_array is Interfaces.C.char_array;
+
type pthread_attr_t is record
-  is_initialized  : int;
-  stackaddr   : System.Address;
-  stacksize   : int;
-  contentionscope : int;
-  inheritsched: int;
-  schedpolicy : int;
-  schedparam  : struct_sched_param;
-  cputime_clocked_allowed : int;
-  detatchstate: int;
+  Data : char_array (1 .. OS_Constants.PTHREAD_ATTR_SIZE);
end record;
pragma Convention (C, pthread_attr_t);
+   for pthread_attr_t'Alignment use Interfaces.C.double'Alignment;
 
type pthread_condattr_t is record
-  flags   : int;
-  process_shared  : int;
+  Data : char_array (1 .. OS_Constants.PTHREAD_CONDATTR_SIZE);
end record;
pragma Convention (C, pthread_condattr_t);
+   for pthread_condattr_t'Alignment use 

  1   2   >