[PATCH] spellcheck: support transpositions aka Damerau-Levenshtein (PR other/69968)

2018-04-30 Thread David Malcolm
This patch updates the edit-distance algorithm in spellcheck.c to
support transpositions as well as additions/deletions/substitutions,
so that a transposition error counts as a distance of 1 rather than 2.

This leads to saner suggestions for such cases.

Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.

OK for trunk?

gcc/fortran/ChangeLog:
PR other/69968
* misc.c (gfc_closest_fuzzy_match): Update for renaming of
levenshtein_distance to get_edit_distance.

gcc/ChangeLog:
PR other/69968
* spellcheck-tree.c (levenshtein_distance): Rename to...
(get_edit_distance): ...this, and update for underlying renaming.
* spellcheck-tree.h (levenshtein_distance): Rename to...
(get_edit_distance): ...this.
* spellcheck.c (levenshtein_distance): Rename to...
(get_edit_distance): ...this.  Convert from Levenshtein distance
to Damerau-Levenshtein distance by supporting transpositions of
adjacent characters.  Rename "v1" to "v_next" and "v0" to
"v_one_ago".
(selftest::levenshtein_distance_unit_test_oneway): Rename to...
(selftest::test_edit_distance_unit_test_oneway): ...this, and
update for underlying renaming.
(selftest::levenshtein_distance_unit_test): Rename to...
(selftest::test_get_edit_distance_unit): ...this, and update for
underlying renaming.
(selftest::test_find_closest_string): Add example from PR 69968
where transposition helps
(selftest::test_metric_conditions): Update for renaming.
(selftest::test_metric_conditions): Likewise.
(selftest::spellcheck_c_tests): Likewise.
* spellcheck.h (levenshtein_distance): Rename both overloads to...
(get_edit_distance): ...this.
(best_match::consider): Update for renaming.

gcc/testsuite/ChangeLog:
PR other/69968
* gcc.dg/spellcheck-transposition.c: New test.
---
 gcc/fortran/misc.c  |   4 +-
 gcc/spellcheck-tree.c   |  12 +-
 gcc/spellcheck-tree.h   |   2 +-
 gcc/spellcheck.c| 143 +++-
 gcc/spellcheck.h|  14 +--
 gcc/testsuite/gcc.dg/spellcheck-transposition.c |  20 
 6 files changed, 124 insertions(+), 71 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/spellcheck-transposition.c

diff --git a/gcc/fortran/misc.c b/gcc/fortran/misc.c
index ec1f548..fb18c5c 100644
--- a/gcc/fortran/misc.c
+++ b/gcc/fortran/misc.c
@@ -286,7 +286,7 @@ get_c_kind(const char *c_kind_name, CInteropKind_t 
kinds_table[])
 
 
 /* For a given name TYPO, determine the best candidate from CANDIDATES
-   perusing Levenshtein distance.  Frees CANDIDATES before returning.  */
+   using get_edit_distance.  Frees CANDIDATES before returning.  */
 
 const char *
 gfc_closest_fuzzy_match (const char *typo, char **candidates)
@@ -299,7 +299,7 @@ gfc_closest_fuzzy_match (const char *typo, char 
**candidates)
 
   while (cand && *cand)
 {
-  edit_distance_t dist = levenshtein_distance (typo, tl, *cand,
+  edit_distance_t dist = get_edit_distance (typo, tl, *cand,
  strlen (*cand));
   if (dist < best_distance)
{
diff --git a/gcc/spellcheck-tree.c b/gcc/spellcheck-tree.c
index 2a66649..596293e 100644
--- a/gcc/spellcheck-tree.c
+++ b/gcc/spellcheck-tree.c
@@ -27,18 +27,18 @@ along with GCC; see the file COPYING3.  If not see
 #include "selftest.h"
 #include "stringpool.h"
 
-/* Calculate Levenshtein distance between two identifiers.  */
+/* Calculate edit distance between two identifiers.  */
 
 edit_distance_t
-levenshtein_distance (tree ident_s, tree ident_t)
+get_edit_distance (tree ident_s, tree ident_t)
 {
   gcc_assert (TREE_CODE (ident_s) == IDENTIFIER_NODE);
   gcc_assert (TREE_CODE (ident_t) == IDENTIFIER_NODE);
 
-  return levenshtein_distance (IDENTIFIER_POINTER (ident_s),
-  IDENTIFIER_LENGTH (ident_s),
-  IDENTIFIER_POINTER (ident_t),
-  IDENTIFIER_LENGTH (ident_t));
+  return get_edit_distance (IDENTIFIER_POINTER (ident_s),
+   IDENTIFIER_LENGTH (ident_s),
+   IDENTIFIER_POINTER (ident_t),
+   IDENTIFIER_LENGTH (ident_t));
 }
 
 /* Given TARGET, an identifier, and CANDIDATES, a vec of identifiers,
diff --git a/gcc/spellcheck-tree.h b/gcc/spellcheck-tree.h
index 1debef5..4324bd2 100644
--- a/gcc/spellcheck-tree.h
+++ b/gcc/spellcheck-tree.h
@@ -25,7 +25,7 @@ along with GCC; see the file COPYING3.  If not see
 /* spellcheck-tree.c  */
 
 extern edit_distance_t
-levenshtein_distance (tree ident_s, tree ident_t);
+get_edit_distance (tree ident_s, tree ident_t);
 
 extern tree
 find_closest_identifier (tree target, const auto_vec *candidates);
diff --git a/gcc/spellcheck.c b/gcc/spellcheck.c
index 

Re: [PATCH] Add fix-it hint for missing return statement in assignment operators (PR c++/85523)

2018-04-30 Thread David Malcolm
On Mon, 2018-04-30 at 20:29 -0400, David Malcolm wrote:
[...]
>   In doing so I guarded it with a call to:
>  global_dc->option_enabled (OPT_Wreturn_type

Gah; typo, sorry:

>   as per the insides of diagnostic.s, since adding a fix-it hint to
  ~~~^
  diagnostic.c


>   a rich_location is non-trivial (e.g. it requires an allocation).
[...]


[PATCH] Add fix-it hint for missing return statement in assignment operators (PR c++/85523)

2018-04-30 Thread David Malcolm
Following on from the thread on the "gcc" list here:

  https://gcc.gnu.org/ml/gcc/2018-04/msg00172.html

here's an updated version of Jonathan's patch, which:

* eliminates the separate "note" in favor of simplying having
  the warning itself emit the "return *this;" fix-it hint

   pr85523.C: In member function 's1& s1::operator=(const s1&)':
   pr85523.C:7:30: warning: no return statement in function returning
   non-void [-Wreturn-type]
   s1& operator=(const s1&) { }
  ^
  return *this;

  In doing so I guarded it with a call to:
 global_dc->option_enabled (OPT_Wreturn_type
  as per the insides of diagnostic.s, since adding a fix-it hint to
  a rich_location is non-trivial (e.g. it requires an allocation).

* uses the new gcc_rich_location::add_fixit_insert_formatted to
  make the fix-it hint respect typical C++ code formatting
  conventions

* adds testcases

Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.

OK for trunk?


Note for ChangeLog: jwakely is co-author.

gcc/cp/ChangeLog:
PR c++/85523
* decl.c: Include "gcc-rich-location.h".
(add_return_star_this_fixit): New function.
(finish_function): When warning about missing return statements in
functions returning non-void, add a "return *this;" fix-it hint for
assignment operators.

gcc/testsuite/ChangeLog:
PR c++/85523
* g++.dg/pr85523.C: New test.
---
 gcc/cp/decl.c  | 34 +++-
 gcc/testsuite/g++.dg/pr85523.C | 88 ++
 2 files changed, 120 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/pr85523.C

diff --git a/gcc/cp/decl.c b/gcc/cp/decl.c
index 07f3a61..7952106 100644
--- a/gcc/cp/decl.c
+++ b/gcc/cp/decl.c
@@ -51,6 +51,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "builtins.h"
 #include "gimplify.h"
 #include "asan.h"
+#include "gcc-rich-location.h"
 
 /* Possible cases of bad specifiers type used by bad_specifiers. */
 enum bad_spec_place {
@@ -15677,6 +15678,22 @@ maybe_save_function_definition (tree fun)
 register_constexpr_fundef (fun, DECL_SAVED_TREE (fun));
 }
 
+/* Attempt to add a fix-it hint to RICHLOC suggesting the insertion
+   of "return *this;" immediately before its location, using FNDECL's
+   first statement (if any) to give the indentation, if appropriate.  */
+
+static void
+add_return_star_this_fixit (gcc_rich_location *richloc, tree fndecl)
+{
+  location_t indent = UNKNOWN_LOCATION;
+  tree stmts = expr_first (DECL_SAVED_TREE (fndecl));
+  if (stmts)
+indent = EXPR_LOCATION (stmts);
+  richloc->add_fixit_insert_formatted ("return *this;",
+  richloc->get_loc (),
+  indent);
+}
+
 /* Finish up a function declaration and compile that function
all the way to assembler language output.  The free the storage
for the function definition. INLINE_P is TRUE if we just
@@ -15870,8 +15887,21 @@ finish_function (bool inline_p)
   && !DECL_DESTRUCTOR_P (fndecl)
   && targetm.warn_func_return (fndecl))
 {
-  warning (OPT_Wreturn_type,
-  "no return statement in function returning non-void");
+  gcc_rich_location richloc (input_location);
+  /* Potentially add a "return *this;" fix-it hint for
+assignment operators.  */
+  if (IDENTIFIER_ASSIGN_OP_P (DECL_NAME (fndecl)))
+   {
+ tree valtype = TREE_TYPE (DECL_RESULT (fndecl));
+ if (TREE_CODE (valtype) == REFERENCE_TYPE
+ && same_type_ignoring_top_level_qualifiers_p
+ (TREE_TYPE (valtype), TREE_TYPE (current_class_ref)))
+   if (global_dc->option_enabled (OPT_Wreturn_type,
+  global_dc->option_state))
+ add_return_star_this_fixit (, fndecl);
+   }
+  warning_at (, OPT_Wreturn_type,
+ "no return statement in function returning non-void");
   TREE_NO_WARNING (fndecl) = 1;
 }
 
diff --git a/gcc/testsuite/g++.dg/pr85523.C b/gcc/testsuite/g++.dg/pr85523.C
new file mode 100644
index 000..9cd939b
--- /dev/null
+++ b/gcc/testsuite/g++.dg/pr85523.C
@@ -0,0 +1,88 @@
+/* { dg-options "-fdiagnostics-show-caret" } */
+
+/* Verify that we emit a "return *this;" fix-it hint for
+   a missing return in an assignment operator.  */
+
+struct s1 {
+  s1& operator=(const s1&) { } // { dg-warning "no return statement in 
function returning non-void" }
+  /* { dg-begin-multiline-output "" }
+   s1& operator=(const s1&) { }
+  ^
+  return *this;
+ { dg-end-multiline-output "" } */
+};
+
+/* Likewise for +=.  */
+
+struct s2 {
+  s2& operator+=(const s2&) {} // { dg-warning "no return statement in 
function returning non-void" }
+  /* { dg-begin-multiline-output "" }
+   s2& operator+=(const s2&) {}
+  ^
+  

[committed] Add gcc_rich_location::add_fixit_insert_formatted

2018-04-30 Thread David Malcolm
This patch adds a support function to class gcc_rich_location
to make it easier for fix-it hints to use idiomatic C/C++
indentation, for use by the patch for PR c++/85523.

Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.

Committed to trunk as r259783.

gcc/ChangeLog:
PR c++/85523
* gcc-rich-location.c (blank_line_before_p): New function.
(use_new_line): New function.
(gcc_rich_location::add_fixit_insert_formatted): New function.
* gcc-rich-location.h
(gcc_rich_location::add_fixit_insert_formatted): New function.

gcc/testsuite/ChangeLog:
PR c++/85523
* gcc.dg/plugin/diagnostic-test-show-locus-generate-patch.c
(test_add_fixit_insert_formatted_single_line): New function.
(test_add_fixit_insert_formatted_multiline): New function.
Extend expected output of generated patch to include fix-it hints
for these.
* gcc.dg/plugin/diagnostic_plugin_test_show_locus.c: Include
"gcc-rich-location.h".  Add test coverage for
gcc_rich_location::add_fixit_insert_formatted.
---
 gcc/gcc-rich-location.c| 111 +
 gcc/gcc-rich-location.h|  36 +++
 .../diagnostic-test-show-locus-generate-patch.c|  32 ++
 .../plugin/diagnostic_plugin_test_show_locus.c |  24 +
 4 files changed, 203 insertions(+)

diff --git a/gcc/gcc-rich-location.c b/gcc/gcc-rich-location.c
index 3481425..0a0adf9 100644
--- a/gcc/gcc-rich-location.c
+++ b/gcc/gcc-rich-location.c
@@ -69,3 +69,114 @@ gcc_rich_location::add_fixit_misspelled_id (location_t 
misspelled_token_loc,
 
   add_fixit_replace (misspelled_token_loc, IDENTIFIER_POINTER (hint_id));
 }
+
+/* Return true if there is nothing on LOC's line before LOC.  */
+
+static bool
+blank_line_before_p (location_t loc)
+{
+  expanded_location exploc = expand_location (loc);
+  char_span line = location_get_source_line (exploc.file, exploc.line);
+  if (!line)
+return false;
+  if (line.length () < (size_t)exploc.column)
+return false;
+  /* Columns are 1-based.  */
+  for (int column = 1; column < exploc.column; ++column)
+if (!ISSPACE (line[column - 1]))
+  return false;
+  return true;
+}
+
+/* Subroutine of gcc_rich_location::add_fixit_insert_formatted.
+   Return true if we should add the content on its own line,
+   false otherwise.
+   If true is returned then *OUT_START_OF_LINE is written to.  */
+
+static bool
+use_new_line (location_t insertion_point, location_t indent,
+ location_t *out_start_of_line)
+{
+  if (indent == UNKNOWN_LOCATION)
+return false;
+  const line_map *indent_map = linemap_lookup (line_table, indent);
+  if (linemap_macro_expansion_map_p (indent_map))
+return false;
+
+  if (!blank_line_before_p (insertion_point))
+return false;
+
+  /* Locate the start of the line containing INSERTION_POINT.  */
+  const line_map *insertion_point_map
+= linemap_lookup (line_table, insertion_point);
+  if (linemap_macro_expansion_map_p (insertion_point_map))
+return false;
+  const line_map_ordinary *ordmap
+= linemap_check_ordinary (insertion_point_map);
+  expanded_location exploc_insertion_point = expand_location (insertion_point);
+  location_t start_of_line
+= linemap_position_for_line_and_column (line_table, ordmap,
+   exploc_insertion_point.line, 1);
+  *out_start_of_line = start_of_line;
+  return true;
+}
+
+/* Add a fix-it hint suggesting the insertion of CONTENT before
+   INSERTION_POINT.
+
+   Attempt to handle formatting: if INSERTION_POINT is the first thing on
+   its line, and INDENT is sufficiently sane, then add CONTENT on its own
+   line, using the indentation of INDENT.
+   Otherwise, add CONTENT directly before INSERTION_POINT.
+
+   For example, adding "CONTENT;" with the closing brace as the insertion
+   point and "INDENT;" as the indentation point:
+
+   if ()
+ {
+   INDENT;
+ }
+
+  would lead to:
+
+   if ()
+ {
+   INDENT;
+   CONTENT;
+ }
+
+  but adding it to:
+
+if () {INDENT;}
+
+  would lead to:
+
+if () {INDENT;CONTENT;}
+*/
+
+void
+gcc_rich_location::add_fixit_insert_formatted (const char *content,
+  location_t insertion_point,
+  location_t indent)
+{
+  location_t start_of_line;
+  if (use_new_line (insertion_point, indent, _of_line))
+{
+  /* Add CONTENT on its own line, using the indentation of INDENT.  */
+
+  /* Generate an insertion string, indenting by the amount INDENT
+was indented.  */
+  int indent_column = LOCATION_COLUMN (get_start (indent));
+  pretty_printer tmp_pp;
+  pretty_printer *pp = _pp;
+  /* Columns are 1-based.  */
+  for (int column = 1; column < indent_column; ++column)
+   pp_space (pp);
+  pp_string (pp, content);
+  pp_newline (pp);
+

Re: [AARCH64] Neon vld1_*_x3, vst1_*_x2 and vst1_*_x3 intrinsics

2018-04-30 Thread Sameera Deshpande
On 13 April 2018 at 20:21, James Greenhalgh  wrote:
> On Fri, Apr 13, 2018 at 03:39:32PM +0100, Sameera Deshpande wrote:
>> On Fri 13 Apr, 2018, 8:04 PM James Greenhalgh, 
>> > wrote:
>> On Fri, Apr 06, 2018 at 08:55:47PM +0100, Christophe Lyon wrote:
>> > Hi,
>> >
>> > 2018-04-06 12:15 GMT+02:00 Sameera Deshpande 
>> > >:
>> > > Hi Christophe,
>> > >
>> > > Please find attached the updated patch with testcases.
>> > >
>> > > Ok for trunk?
>> >
>> > Thanks for the update.
>> >
>> > Since the new intrinsics are only available on aarch64, you want to
>> > prevent the tests from running on arm.
>> > Indeed gcc.target/aarch64/advsimd-intrinsics/ is shared between the two 
>> > targets.
>> > There are several examples on how to do that in that directory.
>> >
>> > I have also noticed that the tests fail at execution on aarch64_be.
>>
>> I think this is important to fix. We don't want the big-endian target to have
>> failing implementations of the Neon intrinsics. What is the nature of the
>> failure?
>>
>> From what I can see, nothing in the patch prevents using these intrinsics
>> on big-endian, so either the intrinsics behaviour is wrong (we have a wrong
>> code bug), or the testcase expected behaviour is wrong.
>>
>> I don't think disabling the test for big-endian is the right fix. We should
>> either fix the intrinsics, or fix the testcase.
>>
>> Thanks,
>> James
>>
>> Hi James,
>>
>> As the tests assume the little endian order of elements while checking the
>> results, the tests are failing for big endian targets. So, the failures are
>> not because of intrinsic implementations, but because of the testcase.
>
> The testcase is a little hard to follow through the macros, but why would
> this be the case?
>
> ld1 is deterministic on big and little endian for which elements will be
> loaded from memory, as is st1.
>
> My expectation would be that:
>
>   int __attribute__ ((noinline))
>   test_vld_u16_x3 ()
>   {
> uint16_t data[3 * 3];
> uint16_t temp[3 * 3];
> uint16x4x3_t vectors;
> int i,j;
> for (i = 0; i < 3 * 3; i++)
>   data [i] = (uint16_t) 3*i;
> asm volatile ("" : : : "memory");
> vectors = vld1_u16_x3 (data);
> vst1_u16 (temp, vectors.val[0]);
> vst1_u16 ([3], vectors.val[1]);
> vst1_u16 ([3 * 2], vectors.val[2]);
> asm volatile ("" : : : "memory");
> for (j = 0; j < 3 * 3; j++)
>   if (temp[j] != data[j])
> return 1;
> return 0;
>   }
>
> would work equally well for big- or little-endian.
>
> I think this is more likely to be an intrinsics implementation bug.
>
> Thanks,
> James
>

Hi James,

Please find attached the updated patch, which now passes for little as
well as big endian.
Ok for trunk?

-- 
- Thanks and regards,
  Sameera D.

gcc/Changelog:

2018-05-01  Sameera Deshpande  


* config/aarch64/aarch64-simd-builtins.def (ld1x3): New.
(st1x2): Likewise.
(st1x3): Likewise.
* config/aarch64/aarch64-simd.md
(aarch64_ld1x3): New pattern.
(aarch64_ld1_x3_): Likewise
(aarch64_st1x2): Likewise
(aarch64_st1_x2_): Likewise
(aarch64_st1x3): Likewise
(aarch64_st1_x3_): Likewise
* config/aarch64/arm_neon.h (vld1_u8_x3): New function.
(vld1_s8_x3): Likewise.
(vld1_u16_x3): Likewise.
(vld1_s16_x3): Likewise.
(vld1_u32_x3): Likewise.
(vld1_s32_x3): Likewise.
(vld1_u64_x3): Likewise.
(vld1_s64_x3): Likewise.
(vld1_f16_x3): Likewise.
(vld1_f32_x3): Likewise.
(vld1_f64_x3): Likewise.
(vld1_p8_x3): Likewise.
(vld1_p16_x3): Likewise.
(vld1_p64_x3): Likewise.
(vld1q_u8_x3): Likewise.
(vld1q_s8_x3): Likewise.
(vld1q_u16_x3): Likewise.
(vld1q_s16_x3): Likewise.
(vld1q_u32_x3): Likewise.
(vld1q_s32_x3): Likewise.
(vld1q_u64_x3): Likewise.
(vld1q_s64_x3): Likewise.
(vld1q_f16_x3): Likewise.
(vld1q_f32_x3): Likewise.
(vld1q_f64_x3): Likewise.
(vld1q_p8_x3): Likewise.
(vld1q_p16_x3): Likewise.
(vld1q_p64_x3): Likewise.
(vst1_s64_x2): Likewise.
(vst1_u64_x2): Likewise.
(vst1_f64_x2): Likewise.
(vst1_s8_x2): Likewise.
(vst1_p8_x2): Likewise.
(vst1_s16_x2): Likewise.
(vst1_p16_x2): Likewise.
(vst1_s32_x2): Likewise.
(vst1_u8_x2): Likewise.
(vst1_u16_x2): Likewise.
(vst1_u32_x2): Likewise.
(vst1_f16_x2): Likewise.
(vst1_f32_x2): Likewise.
(vst1_p64_x2): Likewise.
(vst1q_s8_x2): Likewise.
(vst1q_p8_x2): Likewise.
(vst1q_s16_x2): Likewise.
(vst1q_p16_x2): Likewise.
(vst1q_s32_x2): Likewise.
(vst1q_s64_x2): Likewise.

[PATCH] Add support for gcc as git submodule of another repository.

2018-04-30 Thread Jim Wilson
We have some github repos that use git submodule to include other repos,
including gcc.  When git submodule is used, .git is actually a file not a dir,
and contains the path to the modules file in the parent repo's .git dir.  This
patch allows contrib/gcc_update to work in this situation.

OK?

Jim

contrib/
* gcc_update: Check for .git as a file.
---
 contrib/gcc_update | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/contrib/gcc_update b/contrib/gcc_update
index a33bb543902..c9c71548f53 100755
--- a/contrib/gcc_update
+++ b/contrib/gcc_update
@@ -286,7 +286,7 @@ p
 esac
 
 # Check for known version control systems.
-if [ -d .git ]; then
+if [ -d .git ] || [ -f .git ]; then
 GCC_GIT=${GCC_GIT-${GIT-git}}
 vcs_type="git"
 elif [ -d .hg ]; then
-- 
2.14.1



Re: [PATCH] Add -C when using -Wimplicit-fallthrough and --save-temps (PR preprocessor/78497).

2018-04-30 Thread Jeff Law
On 04/03/2018 06:29 AM, Martin Liška wrote:
> Hi.
> 
> This helps the warning with --save-temps. Doing that one needs to preserve 
> comments
> in preprocessed source file.
> 
> Ready for trunk?
> Martin
> 
> gcc/ChangeLog:
> 
> 2018-04-03  Martin Liska  
> 
>   PR preprocessor/78497
>   * gcc.c: Add -C when using -Wimplicit-fallthrough and --save-temps.
> 
> gcc/testsuite/ChangeLog:
> 
> 2018-04-03  Martin Liska  
> 
>   PR preprocessor/78497
>   * c-c++-common/Wimplicit-fallthrough-37.c: New test.
OK.
jeff


Re: [PATCH] POPCOUNT folding optimizations

2018-04-30 Thread Jeff Law
On 02/09/2018 05:42 AM, Roger Sayle wrote:
> The following patch implements a number of __builtin_popcount related
> optimizations.
> (i) popcount(x) == 0 can be simplified to x==0, and popcount(x) != 0 to
> x!=0.
> (ii) popcount(x&1) can be simplified to x&1, and for unsigned x,
> popcount(x>>31) to x>>31.
> (iii) popcount (x&6) + popcount(y&16) can be simplified to
> popcount((x&6)|(y&16))
> 
> These may seem obscure transformations, but performing these types of
> POPCOUNT
> operations are often the performance critical steps in some cheminformatics
> applications.
> 
> To implement the above transformations I've introduced the tree_nonzero_bits
> function,
> which is a tree-level version of rtlanal's nonzero_bits used by the RTL
> optimizers.
> 
> The following patch has been tested on x86_64-pc-linux-gnu with a "make
> bootstrap"
> and "make check" with no regressions, and passes for the four new gcc.dg
> test cases.
> 
> Many thanks In advance.  Best regards,
> 
> Roger
> --
> Roger Sayle, PhD.
> NextMove Software Limited
> Innovation Centre (Unit 23), Cambridge Science Park, Cambridge, CB4 0EY
> 
> 2018-02-09  Roger Sayle  
> 
> * fold-const.c (tree_nonzero_bits): New function.
> * fold-const.h (tree_nonzero_bits): Likewise.
> * match.pd (POPCOUNT): New patterns to fold BUILTIN_POPCOUNT and
> friends.  POPCOUNT(x&1) => x&1, POPCOUNT(x)==0 => x==0, etc.
> 
> 2018-02-09  Roger Sayle  
> 
> * gcc.dg/fold-popcount-1.c: New testcase.
> * gcc.dg/fold-popcount-2.c: New testcase.
> * gcc.dg/fold-popcount-3.c: New testcase.
> * gcc.dg/fold-popcount-4.c: New testcase.
> 
> 
> 
> 
> Index: gcc/fold-const.c
> ===
> --- gcc/fold-const.c  (revision 257227)
> +++ gcc/fold-const.c  (working copy)
> @@ -14580,6 +14580,75 @@
>return string + offset;
>  }
>  
> +/* Given a tree T, compute which bits in T may be nonzero.  */
> +
> +wide_int
> +tree_nonzero_bits (const_tree t)
> +{
> +  switch (TREE_CODE (t))
> +{
> +case BIT_IOR_EXPR:
> +case BIT_XOR_EXPR:
> +  return wi::bit_or (tree_nonzero_bits (TREE_OPERAND (t, 0)),
> +  tree_nonzero_bits (TREE_OPERAND (t, 1)));
Hmm.   I think this will potentially have too many bits set in the
BIT_XOR case.  Is there some reason you didn't use wi::bit_xor for that
case?


We can probably go ahead and ACK this once that question is resolved.

THanks,
jeff


Re: [PATCH] restore -Warray-bounds for string literals (PR 83776)

2018-04-30 Thread Jeff Law
On 01/25/2018 07:16 PM, Martin Sebor wrote:
> PR tree-optimization/83776 - [6/7/8 Regression] missing
> -Warray-bounds indexing past the end of a string literal,
> identified a not-so-recent improvement to constant propagation
> as the reason for GCC no longer being able to detect out-of-
> bounds accesses to string literals.  The root cause is that
> the change caused accesses to strings to be transformed into
> MEM_REFs that the -Warray-bounds checker isn't prepared to
> handle.  A simple example is:
> 
>   int h (void)
>   {
> const char *p = "1234";
> return p[16];   // missing -Warray-bounds
>   }
> 
> To fix the regression the attached patch extends the array bounds
> checker to handle the small subset of MEM_REF expressions that
> refer to string literals but stops of short of doing more than
> that.  There are outstanding gaps in the detection that the patch
> intentionally doesn't handle.  They are either caused by other
> regressions (PR 84047) or by other latent bugs/limitations, or
> by limitations in the approach I took to try to keep the patch
> simple.  I hope to address some of those in a follow-up patch
> for GCC 9.
> 
> Martin
> 
> gcc-83776.diff
> 
> 
> PR tree-optimization/83776 - [6/7/8 Regression] missing -Warray-bounds 
> indexing past the end of a string literal
> 
> gcc/ChangeLog:
> 
>   PR tree-optimization/83776
>   * tree-vrp.c (vrp_prop::check_mem_ref): New function.
>   (check_array_bounds): Call it.
> 
> gcc/testsuite/ChangeLog:
> 
>   PR tree-optimization/83776
>   * gcc.dg/Warray-bounds-27.c: New test.
>   * gcc.dg/Warray-bounds-28.c: New test.
>   * gcc.dg/Warray-bounds-29.c: New test.
> 
> diff --git a/gcc/tree-vrp.c b/gcc/tree-vrp.c
> index 3294bde..b2e45c9 100644
> --- a/gcc/tree-vrp.c
> +++ b/gcc/tree-vrp.c
> @@ -4763,6 +4763,7 @@ class vrp_prop : public ssa_propagation_engine
>void vrp_finalize (bool);
>void check_all_array_refs (void);
>void check_array_ref (location_t, tree, bool);
> +  void check_mem_ref (location_t, tree);
>void search_for_addr_array (tree, location_t);
>  
>class vr_values vr_values;
> @@ -4781,6 +4782,7 @@ class vrp_prop : public ssa_propagation_engine
>void extract_range_from_phi_node (gphi *phi, value_range *vr)
>  { vr_values.extract_range_from_phi_node (phi, vr); }
>  };
> +
>  /* Checks one ARRAY_REF in REF, located at LOCUS. Ignores flexible arrays
> and "struct" hacks. If VRP can determine that the
> array subscript is a constant, check if it is outside valid
> @@ -4915,6 +4917,179 @@ vrp_prop::check_array_ref (location_t location, tree 
> ref,
>  }
>  }
>  
> +/* Checks one MEM_REF in REF, located at LOCATION, for out-of-bounds
> +   references to string constants.  If VRP can determine that the array
> +   subscript is a constant, check if it is outside valid range.
> +   If the array subscript is a RANGE, warn if it is non-overlapping
> +   with valid range.
> +   IGNORE_OFF_BY_ONE is true if the MEM_REF is inside an ADDR_EXPR.  */
This function doesn't have IGNORE_OFF_BY_ONE as a parameter.  Drop it
from the comment.


> +
> +void
> +vrp_prop::check_mem_ref (location_t location, tree ref)
> +{
> +  if (TREE_NO_WARNING (ref))
> +return;
> +
> +  tree arg = TREE_OPERAND (ref, 0);
> +  tree cstoff = TREE_OPERAND (ref, 1);
> +  tree varoff = NULL_TREE;
> +
> +  const offset_int maxobjsize = tree_to_shwi (max_object_size ());
> +
> +  /* The string constant bounds in bytes.  Initially set to [0, MAXOBJSIZE]
> + until a tighter bound is determined.  */
> +  offset_int strbounds[2];
> +  strbounds[1] = maxobjsize;
> +  strbounds[0] = -strbounds[1] - 1;
> +
> +  /* The minimum and maximum intermediate offset.  For a reference
> + to be valid, not only does the final offset/subscript must be
> + in bounds but all intermediate offsets must be as well. */
> +  offset_int ioff = wi::to_offset (fold_convert (ptrdiff_type_node, cstoff));
> +  offset_int extrema[2] = { 0, wi::abs (ioff) };
> +
> +  /* The range of the byte offset into the reference.  */
> +  offset_int offrange[2] = { 0, 0 };
> +
> +  value_range *vr = NULL;
> +
> +  /* Determine the offsets and increment OFFRANGE for the bounds of each.  */
> +  while (TREE_CODE (arg) == SSA_NAME)
> +{
> +  gimple *def = SSA_NAME_DEF_STMT (arg);
> +  if (!is_gimple_assign (def))
> + {
> +   if (tree var = SSA_NAME_VAR (arg))
> + arg = var;
> +   break;
> + }
What's the point of looking at the underlying SSA_NAME_VAR here? I can't
see how that's ever helpful.  You'll always exit the loop at this point
which does something like

if (TREE_CODE (arg) == ADDR_EXPR)
  {
 do something interesting
  }
else
  return;

ISTM that any time you dig into SSA_NAME_VAR (arg) what you're going to
get back is some kind of _DECL node -- I'm not aware of a case where
you're going to get back an ADDR_EXPR.




> +
> +  tree_code code = gimple_assign_rhs_code (def);
> +  if (code 

Re: [PATCH] selftest: remove "Yoda ordering" in assertions

2018-04-30 Thread Jeff Law
On 04/30/2018 09:48 AM, David Malcolm wrote:
> Our selftest assertions were of the form:
> 
>   ASSERT_EQ (expected, actual)
> 
> and both Richard Sandiford and I find this "Yoda ordering" confusing.
> 
> Our existing tests aren't entirely consistent about this, and it doesn't make
> sense for ASSERT_NE and its variants.
> 
> The ordering comes from googletest's API, which is what
> the earliest version of the selftest code used (before Bernd persuaded
> me to stop over-engineering it :) ).
> 
> googletest's API now uses just "val1" and "val2" for binary assertion
> macros, and their docs now say:
> 
> "Historical note: Before February 2016 *_EQ had a convention of calling
> it as ASSERT_EQ(expected, actual), so lots of existing code uses this
> order. Now *_EQ treats both parameters in the same way."
> 
> This seems to have been:
> https://github.com/google/googletest/commit/f364e188372e489230ef4e44e1aec6bcb08f3acf
> https://github.com/google/googletest/pull/713
> 
> This patch renames the params in our selftest API from "expected" and
> "actual" to "val1" and "val2".
> 
> ASSERT_STREQ (and ASSERT_STREQ_AT) had an asymmetry in error-reporting, where
> they did a better job of reporting if the second of the params was NULL; this
> patch now handles params equivalently (and both must be non-NULL for a pass).
> We aren't able to selftest selftest failures, so I tested the five cases
> by hand while developing the patch (4 NULL vs non-NULL cases, with the both
> non-NULL case having a pass and fail sub-cases).
> 
> Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.
> 
> OK for trunk?
> 
> gcc/ChangeLog:
>   * selftest.c (assert_streq): Rename "expected" and "actual" to
>   "val1" and "val2".  Extend NULL-handling to cover both inputs
>   symmetrically, while still requiring both to be non-NULL for a pass.
>   * selftest.h (assert_streq): Rename "expected" and "actual" to
>   "val1" and "val2".
>   (ASSERT_EQ): Likewise.
>   (ASSERT_EQ_AT): Likewise.
>   (ASSERT_KNOWN_EQ): Likewise.
>   (ASSERT_KNOWN_EQ_AT): Likewise.
>   (ASSERT_NE): Likewise.
>   (ASSERT_MAYBE_NE): Likewise.
>   (ASSERT_MAYBE_NE_AT): Likewise.
>   (ASSERT_STREQ): Likewise.  Clarify that both must be non-NULL for
>   the assertion to pass.
>   (ASSERT_STREQ_AT): Likewise.
OK.
jeff


Re: Generalize a<b a<min(b,c)

2018-04-30 Thread Jeff Law
On 04/30/2018 11:24 AM, Marc Glisse wrote:
> Hello,
> 
> this transformation was lacking symmetry, only handling & and not |.
> 
> It probably still fails to handle a < b & a <= 123, while it would
> handle < 124, but that's for another day.
> 
> Bootstrap+testsuite on powerpc64le-unknown-linux-gnu.
> 
> 2018-05-01  Marc Glisse  
> 
> PR tree-optimization/85143
> gcc/
> * match.pd (A 
> gcc/testsuite/
> * gcc.dg/tree-ssa/minmax-loopend.c: Extend and split...
> * gcc.dg/tree-ssa/minmax-loopend-2.c: ... here.
> 
OK.
jeff


C++ PATCH for c++/84701, unsigned typeof(x)

2018-04-30 Thread Jason Merrill
We were neither rejecting nor properly handling "unsigned typeof".
Since nobody is likely to be relying on a broken extension, this patch
makes it an error rather than the pedwarn it was previously.

While there I overhauled that section of diagnostics to be more
specific and give locations for the keywords we're complaining about.

Tested x86_64-pc-linux-gnu, applying to trunk.
commit 8caf1e41a9625dcc108cef55e976b8704ca402e8
Author: Jason Merrill 
Date:   Mon Apr 30 14:27:14 2018 -0400

PR c++/84701 - unsigned typeof.

* decl.c (grokdeclarator): Overhaul diagnostics for invalid use
of long/short/signed/unsigned.

diff --git a/gcc/cp/decl.c b/gcc/cp/decl.c
index 07f3a61fed6..6f2fe01d83d 100644
--- a/gcc/cp/decl.c
+++ b/gcc/cp/decl.c
@@ -10608,45 +10608,61 @@ grokdeclarator (const cp_declarator *declarator,
 
   if (unsigned_p || signed_p || long_p || short_p)
 {
+  location_t loc;
+  const char *key;
+  if (unsigned_p)
+	{
+	  key = "unsigned";
+	  loc = declspecs->locations[ds_unsigned];
+	}
+  else if (signed_p)
+	{
+	  key = "signed";
+	  loc = declspecs->locations[ds_signed];
+	}
+  else if (longlong)
+	{
+	  key = "long long";
+	  loc = declspecs->locations[ds_long_long];
+	}
+  else if (long_p)
+	{
+	  key = "long";
+	  loc = declspecs->locations[ds_long];
+	}
+  else /* if (short_p) */
+	{
+	  key = "short";
+	  loc = declspecs->locations[ds_short];
+	}
+
   int ok = 0;
 
-  if ((signed_p || unsigned_p) && TREE_CODE (type) != INTEGER_TYPE)
-	error ("% or % invalid for %qs", name);
-  else if (signed_p && unsigned_p)
-	error ("% and % specified together for %qs", name);
-  else if (longlong && TREE_CODE (type) != INTEGER_TYPE)
-	error ("% invalid for %qs", name);
-  else if (long_p && TREE_CODE (type) == REAL_TYPE)
-	error ("% invalid for %qs", name);
-  else if (short_p && TREE_CODE (type) == REAL_TYPE)
-	error ("% invalid for %qs", name);
-  else if ((long_p || short_p) && TREE_CODE (type) != INTEGER_TYPE)
-	error ("% or % invalid for %qs", name);
-  else if ((long_p || short_p || explicit_char || explicit_int) && explicit_intN)
-	error ("%, %, %, or % invalid for %qs", name);
-  else if ((long_p || short_p) && explicit_char)
-	error ("% or % specified with char for %qs", name);
+  if (signed_p && unsigned_p)
+	error_at (loc, "% and % specified together");
   else if (long_p && short_p)
-	error ("% and % specified together for %qs", name);
-  else if (type == char16_type_node || type == char32_type_node)
+	error_at (loc, "% and % specified together");
+  else if (TREE_CODE (type) != INTEGER_TYPE
+	   || type == char16_type_node || type == char32_type_node
+	   || ((long_p || short_p)
+		   && (explicit_char || explicit_intN)))
+	error_at (loc, "%qs specified with %qT", key, type);
+  else if (!explicit_int && !defaulted_int
+	   && !explicit_char && !explicit_intN)
 	{
-	  if (signed_p || unsigned_p)
-	error ("% or % invalid for %qs", name);
-	  else if (short_p || long_p)
-	error ("% or % invalid for %qs", name);
-	}
-  else
-	{
-	  ok = 1;
-	  if (!explicit_int && !defaulted_int && !explicit_char && !explicit_intN && pedantic)
+	  if (typedef_decl)
 	{
-	  pedwarn (input_location, OPT_Wpedantic, 
-		   "long, short, signed or unsigned used invalidly for %qs",
-		   name);
-	  if (flag_pedantic_errors)
-		ok = 0;
+	  pedwarn (loc, OPT_Wpedantic, "%qs specified with %qT",
+		   key, type);
+	  ok = !flag_pedantic_errors;
 	}
+	  else if (declspecs->decltype_p)
+	error_at (loc, "%qs specified with %", key);
+	  else
+	error_at (loc, "%qs specified with %", key);
 	}
+  else
+	ok = 1;
 
   /* Discard the type modifiers if they are invalid.  */
   if (! ok)
diff --git a/gcc/testsuite/g++.dg/cpp1z/decomp3.C b/gcc/testsuite/g++.dg/cpp1z/decomp3.C
index 1886cdbe90d..4d75e938098 100644
--- a/gcc/testsuite/g++.dg/cpp1z/decomp3.C
+++ b/gcc/testsuite/g++.dg/cpp1z/decomp3.C
@@ -29,7 +29,7 @@ test (A , B c)
 	// { dg-warning "structured bindings only available with -std=c..17 or -std=gnu..17" "" { target c++14_down } .-1 }
   __restrict auto [ t ] = c;		// { dg-error "invalid use of 'restrict'" }
 	// { dg-warning "structured bindings only available with -std=c..17 or -std=gnu..17" "" { target c++14_down } .-1 }
-  long long auto [ u ] = c;		// { dg-error "'long long' invalid for 'structured binding'" }
+  long long auto [ u ] = c;		// { dg-error "'long long' specified with 'auto'" }
 	// { dg-warning "structured bindings only available with -std=c..17 or -std=gnu..17" "" { target c++14_down } .-1 }
   virtual auto [ v ] = c;		// { dg-error "'virtual' outside 

Re: [i386] Mask generation in avx2intrin.h

2018-04-30 Thread Marc Glisse

Ping https://gcc.gnu.org/ml/gcc-patches/2017-11/msg02233.html

On Sat, 25 Nov 2017, Marc Glisse wrote:


Hello,

the way full masks are generated currently in avx2intrin.h is questionable: 
opaque for the inline functions, weird/wrong for the macros.


It is possible we may want to add code so the constant mask with all ones may 
be generated with vxorpd+vcmpeqpd instead of loading it from memory, but that 
looks like something that should be decided globally, not in each instruction 
that uses it.


Bootstrap+regtest on x86_64-pc-linux-gnu (skylake).

2017-11-27  Marc Glisse  

PR target/80885
* config/i386/avx2intrin.h (_mm_i32gather_pd): Rewrite mask generation.
(_mm256_i32gather_pd): Likewise.
(_mm_i64gather_pd): Likewise.
(_mm256_i64gather_pd): Likewise.
(_mm_i32gather_ps): Likewise.
(_mm256_i32gather_ps): Likewise.
(_mm_i64gather_ps): Likewise.
(_mm256_i64gather_ps): Likewise.


--
Marc Glisse


C++ PATCH for c++/85305, pack in lambda init-capture

2018-04-30 Thread Jason Merrill
We parse the initializer for an init-capture as an initializer, which
checks for any unexpanded parameter packs.  Normally we avoid that
error in a lambda by checking whether we're in a lambda, but at this
point we haven't entered the lambda scope yet.

Jakub, if we end up needing an RC3 it would be nice to get this into
8.1, but if not it's fine to wait for 8.2.

Tested x86_64-pc-linux-gnu, applying to trunk.
commit 2009e6d79c38c4867600156ab6b457c0e664927f
Author: Jason Merrill 
Date:   Mon Apr 30 13:33:12 2018 -0400

PR c++/85305 - pack in lambda init-capture.

* parser.c (cp_parser_initializer): Add subexpression_p parm; don't
check_for_bare_parameter_packs in a subexpression.
(cp_parser_lambda_introducer): Use it.

diff --git a/gcc/cp/parser.c b/gcc/cp/parser.c
index d8ce28a6d61..b839232bcbe 100644
--- a/gcc/cp/parser.c
+++ b/gcc/cp/parser.c
@@ -2243,7 +2243,7 @@ static tree cp_parser_default_argument
 static void cp_parser_function_body
   (cp_parser *, bool);
 static tree cp_parser_initializer
-  (cp_parser *, bool *, bool *);
+  (cp_parser *, bool *, bool *, bool = false);
 static cp_expr cp_parser_initializer_clause
   (cp_parser *, bool *);
 static cp_expr cp_parser_braced_list
@@ -10358,7 +10358,7 @@ cp_parser_lambda_introducer (cp_parser* parser, tree lambda_expr)
 		 "lambda capture initializers "
 		 "only available with -std=c++14 or -std=gnu++14");
 	  capture_init_expr = cp_parser_initializer (parser, ,
-		 _constant);
+		 _constant, true);
 	  explicit_init_p = true;
 	  if (capture_init_expr == NULL_TREE)
 	{
@@ -21860,7 +21860,7 @@ cp_parser_ctor_initializer_opt_and_function_body (cp_parser *parser,
 
 static tree
 cp_parser_initializer (cp_parser* parser, bool* is_direct_init,
-		   bool* non_constant_p)
+		   bool* non_constant_p, bool subexpression_p)
 {
   cp_token *token;
   tree init;
@@ -21907,7 +21907,7 @@ cp_parser_initializer (cp_parser* parser, bool* is_direct_init,
   init = error_mark_node;
 }
 
-  if (check_for_bare_parameter_packs (init))
+  if (!subexpression_p && check_for_bare_parameter_packs (init))
 init = error_mark_node;
 
   return init;
diff --git a/gcc/testsuite/g++.dg/cpp1z/fold-lambda2.C b/gcc/testsuite/g++.dg/cpp1z/fold-lambda2.C
new file mode 100644
index 000..e93f55f7fd8
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp1z/fold-lambda2.C
@@ -0,0 +1,8 @@
+// PR c++/85305
+// { dg-additional-options -std=c++17 }
+
+template 
+void foo()
+{
+  ([i = Is]{}(), ...); 
+}


Re: [PATCH] Warn for ignored ASM labels on typdef declarations PR 85444 (v.2)

2018-04-30 Thread Joseph Myers
On Mon, 30 Apr 2018, Will Hawkins wrote:

> I agree! It was, however, the closest of all the categories that I
> could find that seemed to match the warning that I am trying to emit.
> I will go back and review the categories and see if there is something
> that I missed.

If there isn't a suitable warning option for a new warning, that means you 
need to add (with documentation) a new warning option (which might then be 
enabled by -Wall or -Wextra if appropriate; and, once in GCC, should have 
release notes added to gcc-9/changes.html on the website).

-- 
Joseph S. Myers
jos...@codesourcery.com


Re: [PATCH] [configure] Added "nfp" to the build for binutils.

2018-04-30 Thread Joel Brobecker
> +2018-04-30  Francois H. Theron 
> +
> + * config.sub: Added "nfp" to basic_machine list.
> + * configure.ac: Added "nfp" target.
> + * configure: Regenerate.

I am not a maintainer, but I noticed that config.sub is not being
modified by this commit -- a small discrepancy between the patch
and the propose ChangeLog entry.

>  2018-02-13  Maciej W. Rozycki  
>  
>   * configure.ac  (noconfigdirs): Add `ld'.
> diff --git a/configure b/configure
> index 0601395512..69c99e99cc 100755
> --- a/configure
> +++ b/configure
> @@ -3777,6 +3777,10 @@ case "${target}" in
>mt-*-*)
>  noconfigdirs="$noconfigdirs sim"
>  ;;
> +  nfp-*-*)
> +noconfigdirs="$noconfigdirs ld gas gdb gprof sim"
> +noconfigdirs="$noconfigdirs $target_libraries"
> +;;
>powerpc-*-aix*)
>  # copied from rs6000-*-* entry
>  noconfigdirs="$noconfigdirs gprof"
> diff --git a/configure.ac b/configure.ac
> index c34652..a1edc369a2 100644
> --- a/configure.ac
> +++ b/configure.ac
> @@ -1108,6 +1108,10 @@ case "${target}" in
>mt-*-*)
>  noconfigdirs="$noconfigdirs sim"
>  ;;
> +  nfp-*-*)
> +noconfigdirs="$noconfigdirs ld gas gdb gprof sim"
> +noconfigdirs="$noconfigdirs $target_libraries"
> +;;
>powerpc-*-aix*)
>  # copied from rs6000-*-* entry
>  noconfigdirs="$noconfigdirs gprof"


-- 
Joel


[PATCH] Implement absv2di2 and absv4di2 expanders for pre-avx512vl (PR target/85572)

2018-04-30 Thread Jakub Jelinek
Hi!

Before avx512vl we don't have a single instruction to do V2DImode and
V4DImode abs, but that isn't much different from say V4SImode before SSE3
where we also just emit a short sequence that is better than elementwise
expansion.  Bootstrapped/regtested on x86_64-linux and i686-linux, ok for
trunk?

2018-04-30  Jakub Jelinek  

PR target/85572
* config/i386/i386.c (ix86_expand_sse2_abs): Handle E_V2DImode and
E_V4DImode.
* config/i386/sse.md (abs2): Use VI_AVX2 iterator instead of
VI1248_AVX512VL_AVX512BW.  Handle V2DImode and V4DImode if not
TARGET_AVX512VL using ix86_expand_sse2_abs.  Formatting fixes.

* g++.dg/other/sse2-pr85572-1.C: New test.
* g++.dg/other/sse2-pr85572-2.C: New test.
* g++.dg/other/sse4-pr85572-1.C: New test.
* g++.dg/other/avx2-pr85572-1.C: New test.

--- gcc/config/i386/i386.c.jj   2018-04-25 15:09:29.895453703 +0200
+++ gcc/config/i386/i386.c  2018-04-30 18:31:56.027101932 +0200
@@ -49806,39 +49806,74 @@ ix86_expand_sse2_abs (rtx target, rtx in
 
   switch (mode)
 {
+case E_V2DImode:
+case E_V4DImode:
+  /* For 64-bit signed integer X, with SSE4.2 use
+pxor t0, t0; pcmpgtq X, t0; pxor t0, X; psubq t0, X.
+Otherwise handle it similarly to V4SImode, except use 64 as W instead 
of
+32 and use logical instead of arithmetic right shift (which is
+unimplemented) and subtract.  */
+  if (TARGET_SSE4_2)
+   {
+ tmp0 = gen_reg_rtx (mode);
+ tmp1 = gen_reg_rtx (mode);
+ emit_move_insn (tmp1, CONST0_RTX (mode));
+ if (mode == E_V2DImode)
+   emit_insn (gen_sse4_2_gtv2di3 (tmp0, tmp1, input));
+ else
+   emit_insn (gen_avx2_gtv4di3 (tmp0, tmp1, input));
+
+ tmp1 = expand_simple_binop (mode, XOR, tmp0, input,
+ NULL, 0, OPTAB_DIRECT);
+ x = expand_simple_binop (mode, MINUS, tmp1, tmp0,
+  target, 0, OPTAB_DIRECT);
+ break;
+   }
+
+  tmp0 = expand_simple_binop (mode, LSHIFTRT, input,
+ GEN_INT (GET_MODE_UNIT_BITSIZE (mode) - 1),
+ NULL, 0, OPTAB_DIRECT);
+  tmp0 = expand_simple_unop (mode, NEG, tmp0, NULL, false);
+
+  tmp1 = expand_simple_binop (mode, XOR, tmp0, input,
+ NULL, 0, OPTAB_DIRECT);
+  x = expand_simple_binop (mode, MINUS, tmp1, tmp0,
+  target, 0, OPTAB_DIRECT);
+  break;
+
+case E_V4SImode:
   /* For 32-bit signed integer X, the best way to calculate the absolute
 value of X is (((signed) X >> (W-1)) ^ X) - ((signed) X >> (W-1)).  */
-  case E_V4SImode:
-   tmp0 = expand_simple_binop (mode, ASHIFTRT, input,
-   GEN_INT (GET_MODE_UNIT_BITSIZE (mode) - 1),
-   NULL, 0, OPTAB_DIRECT);
-   tmp1 = expand_simple_binop (mode, XOR, tmp0, input,
-   NULL, 0, OPTAB_DIRECT);
-   x = expand_simple_binop (mode, MINUS, tmp1, tmp0,
-target, 0, OPTAB_DIRECT);
-   break;
+  tmp0 = expand_simple_binop (mode, ASHIFTRT, input,
+ GEN_INT (GET_MODE_UNIT_BITSIZE (mode) - 1),
+ NULL, 0, OPTAB_DIRECT);
+  tmp1 = expand_simple_binop (mode, XOR, tmp0, input,
+ NULL, 0, OPTAB_DIRECT);
+  x = expand_simple_binop (mode, MINUS, tmp1, tmp0,
+  target, 0, OPTAB_DIRECT);
+  break;
 
+case E_V8HImode:
   /* For 16-bit signed integer X, the best way to calculate the absolute
 value of X is max (X, -X), as SSE2 provides the PMAXSW insn.  */
-  case E_V8HImode:
-   tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
+  tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
 
-   x = expand_simple_binop (mode, SMAX, tmp0, input,
-target, 0, OPTAB_DIRECT);
-   break;
+  x = expand_simple_binop (mode, SMAX, tmp0, input,
+  target, 0, OPTAB_DIRECT);
+  break;
 
+case E_V16QImode:
   /* For 8-bit signed integer X, the best way to calculate the absolute
 value of X is min ((unsigned char) X, (unsigned char) (-X)),
 as SSE2 provides the PMINUB insn.  */
-  case E_V16QImode:
-   tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
+  tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
 
-   x = expand_simple_binop (V16QImode, UMIN, tmp0, input,
-target, 0, OPTAB_DIRECT);
-   break;
+  x = expand_simple_binop (V16QImode, UMIN, tmp0, input,
+  target, 0, OPTAB_DIRECT);
+  break;
 
-  default:
-   gcc_unreachable ();
+default:
+  

Re: [PATCH] Warn for ignored ASM labels on typdef declarations PR 85444 (v.2)

2018-04-30 Thread Will Hawkins
On Mon, Apr 30, 2018 at 7:51 AM, Joseph Myers  wrote:
> On Sat, 28 Apr 2018, Will Hawkins wrote:
>
>> +{
>> +  warning (OPT_Wignored_qualifiers, "asm-specifier is ignored in "
>> +   "typedef declaration");
>
> This does not match the documented semantics of -Wignored-qualifiers.  I
> don't think it's appropriate to expand those semantics to include this
> warning either.
>


I agree! It was, however, the closest of all the categories that I
could find that seemed to match the warning that I am trying to emit.
I will go back and review the categories and see if there is something
that I missed.

I am certainly not asking you to "do my homework" for me, but does
anyone have suggestions for a category that might house this warning?

Thanks!
Will


> --
> Joseph S. Myers
> jos...@codesourcery.com


Re: [PATCH] Simplify floating point comparisons

2018-04-30 Thread Marc Glisse

On Fri, 12 Jan 2018, Wilco Dijkstra wrote:


Hi,

Here is the updated version:

This patch implements some of the optimizations discussed in
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=71026.

Simplify (C / x >= 0.0) into x >= 0.0 with -fno-signed-zeros
and -ffinite-math-only.  If C is negative the comparison is reversed.
Only handle >= and <= for now since C / x can underflow if C is small.


Simplify (x * C1) > C2 into x > (C2 / C1) with -funsafe-math-optimizations.
If C1 is negative the comparison is reversed.

OK for commit?

ChangeLog
2018-01-10  Wilco Dijkstra  
Jackson Woodruff  

   gcc/
PR 71026/tree-optimization
* match.pd: Simplify floating point comparisons.

   gcc/testsuite/
PR 71026/tree-optimization
* gcc.dg/div-cmp-1.c: New test.
* gcc.dg/div-cmp-2.c: New test.
--

diff --git a/gcc/match.pd b/gcc/match.pd
index 
435125a317275527661fba011a9d26e507d293a6..8a6fee906de6a750201362119862f8326868f26b
 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -376,6 +376,21 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 (rdiv @0 (negate @1))
 (rdiv (negate @0) @1))

+/* Simplify (C / x op 0.0) to x op 0.0 for C != 0, C != Inf/Nan.
+   Only handle >= and <= since C / x may underflow to zero.  */
+(for op (le ge)
+ res_op (lt ge)
+ neg_op (ge lt)
+ (simplify
+  (op (rdiv REAL_CST@0 @1) real_zerop@2)
+  (if (!HONOR_SIGNED_ZEROS (@1) && !HONOR_INFINITIES (@1))
+   (switch
+(if (real_less (, TREE_REAL_CST_PTR (@0)))
+ (res_op @1 @2))
+/* For C < 0, use the inverted operator.  */
+(if (real_less (TREE_REAL_CST_PTR (@0), ))
+ (neg_op @1 @2))


Let's try with C = DBL_MIN and x = ±DBL_MAX. I don't believe it involves 
signed zeros or infinities, just an underflow. First, the result depends 
on the rounding mode. And in the default round-to-nearest, both divisions 
give 0, and thus compare the same with 0, but we replace that with a sign 
test on x, where they clearly give opposite answers.


What would be the proper flag to test to check if we care about underflow?

--
Marc Glisse


[PATCH] [configure] Added "nfp" to the build for binutils.

2018-04-30 Thread Francois H. Theron

* Excludes: ld gas gdb gprof sim
* For 
https://sourceware.org/git/gitweb.cgi?p=binutils-gdb.git;a=commit;h=fe944acf8f858cfe6bcfd00670a88847a464717c

Signed-off-by: Francois H. Theron 
---
 ChangeLog| 6 ++
 configure| 4 
 configure.ac | 4 
 3 files changed, 14 insertions(+)

diff --git a/ChangeLog b/ChangeLog
index 03e1852114..edb69ed8b5 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,9 @@
+2018-04-30  Francois H. Theron 
+
+	* config.sub: Added "nfp" to basic_machine list.
+	* configure.ac: Added "nfp" target.
+	* configure: Regenerate.
+
 2018-02-13  Maciej W. Rozycki  
 
 	* configure.ac  (noconfigdirs): Add `ld'.
diff --git a/configure b/configure
index 0601395512..69c99e99cc 100755
--- a/configure
+++ b/configure
@@ -3777,6 +3777,10 @@ case "${target}" in
   mt-*-*)
 noconfigdirs="$noconfigdirs sim"
 ;;
+  nfp-*-*)
+noconfigdirs="$noconfigdirs ld gas gdb gprof sim"
+noconfigdirs="$noconfigdirs $target_libraries"
+;;
   powerpc-*-aix*)
 # copied from rs6000-*-* entry
 noconfigdirs="$noconfigdirs gprof"
diff --git a/configure.ac b/configure.ac
index c34652..a1edc369a2 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1108,6 +1108,10 @@ case "${target}" in
   mt-*-*)
 noconfigdirs="$noconfigdirs sim"
 ;;
+  nfp-*-*)
+noconfigdirs="$noconfigdirs ld gas gdb gprof sim"
+noconfigdirs="$noconfigdirs $target_libraries"
+;;
   powerpc-*-aix*)
 # copied from rs6000-*-* entry
 noconfigdirs="$noconfigdirs gprof"


Re: ATTRIBUTE_NONSTRING

2018-04-30 Thread Pedro Alves
On 04/27/2018 02:41 AM, Alan Modra wrote:
> This patch adds ATTRIBUTE_NONSTRING, which will be used to curb
> -Wstringop-truncation warnings in binutils.  OK to apply?
> 
>   * ansidecl.h (ATTRIBUTE_NONSTRING): Define.

+1, FWIW.

Thanks,
Pedro Alves


Re: [PATCH] handle local aggregate initialization in strlen (PR 83821)

2018-04-30 Thread Jeff Law
On 01/12/2018 02:30 PM, Martin Sebor wrote:
> A failure in a test for the recently enhanced -Warray-bounds
> warning exposed an unnecessarily broad restriction in the strlen
> pass that prevents it from tracking the length of a member string
> of locally defined and initialized struct:
> 
>   void f (void)
>   {
> struct { char s[8]; int i } a = { "1234", 5 };
> 
> if (strlen (a.s) != 4)   // not folded
>   abort ();
>    }
> 
> IIUC, the restriction was in place to account for writes into
> an array changing or invalidating the length of a string stored
> in its initial elements.  This would happen if the write either
> changed the string's terminating nul byte, or if it reset one
> of the prior non-nul bytes.
> 
> To reflect just this intent the restriction can be tightened
> up to improve the pass' ability to track even the lengths of
> string members of locally initialized aggregates.  Besides
> leading to better code this change also clears up the test
> failure.
> 
> Tested on x86_64-linux.
> 
> Martin
> 
> 
> gcc-83821.diff
> 
> 
> PR tree-optimization/83821 - local aggregate initialization defeats strlen 
> optimization
> 
> gcc/ChangeLog:
> 
>   PR tree-optimization/83821
>   * tree-ssa-strlen.c (maybe_invalidate): Consider the length of
>   a string when available.
>   (handle_char_store): Reset calloc statement on a non-nul store.
> 
> gcc/testsuite/ChangeLog:
> 
>   PR tree-optimization/83821
>   * c-c++-common/Warray-bounds-4.c: Remove XFAIL.
>   * gcc.dg/strlenopt-43.c: New test.
>   * gcc.dg/strlenopt-44.c: Same.
>   * gcc.dg/tree-ssa/calloc-4.c: Same.
I see what you're trying to do.  But I'm really struggling to understand
Marc G's comment "Do not use si->nonzero_chars" since that's precisely
what your patch does.

Your patch seems reasonable on the surface, but I fear there's something
I'm missing.  Can you reach out to Marc G. to see if he recalls the
rational behind the comment.

The comment in its original form was introduced here:

commit 9f15ed6e5c148ded6e7942e75595d91151792c9b
Author: glisse 
Date:   Tue Jun 24 18:50:00 2014 +

2014-06-24  Marc Glisse  

PR tree-optimization/57742
gcc/
* tree-ssa-strlen.c (get_string_length): Ignore malloc.
(handle_builtin_malloc, handle_builtin_memset): New functions.
(strlen_optimize_stmt): Call them.
* passes.def: Move strlen after loop+dom but before vrp.
gcc/testsuite/
* g++.dg/tree-ssa/calloc.C: New testcase.
* gcc.dg/tree-ssa/calloc-1.c: Likewise.
* gcc.dg/tree-ssa/calloc-2.c: Likewise.
* gcc.dg/strlenopt-9.c: Adapt.


git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@211956
138bc75d-0d04-0410-961f-82ee72b054a4


Jeff


[PATCH][tree-complex.c] PR tree-optimization/70291: Inline floating-point complex multiplication more aggressively

2018-04-30 Thread Kyrill Tkachov

Hi all,

We can improve the performance of complex floating-point multiplications by 
inlining the expansion a bit more aggressively.
We can inline complex x = a * b as:
x = (ar*br - ai*bi) + i(ar*bi + br*ai);
if (isunordered (__real__ x, __imag__ x))
  x = __muldc3 (a, b); //Or __mulsc3 for single-precision

That way the common case where no NaNs are produced we can avoid the libgcc 
call and fall back to the
NaN handling stuff in libgcc if either components of the expansion are NaN.

The implementation is done in expand_complex_multiplication in tree-complex.c 
and the above expansion
will be done when optimising for -O1 and greater and when not optimising for 
size.
At -O0 and -Os the single call to libgcc will be emitted.

For the code:
__complex double
foo (__complex double a, __complex double b)
{
  return a * b;
}

We will now emit at -O2 for aarch64:
foo:
fmuld16, d1, d3
fmuld6, d1, d2
fnmsub  d5, d0, d2, d16
fmadd   d4, d0, d3, d6
fcmpd5, d4
bvs .L8
fmovd1, d4
fmovd0, d5
ret
.L8:
stp x29, x30, [sp, -16]!
mov x29, sp
bl  __muldc3
ldp x29, x30, [sp], 16
ret

Instead of just a branch to __muldc3.

Bootstrapped and tested on aarch64-none-linux-gnu, arm-none-linux-gnueabihf, 
x86_64-unknown-linux-gnu.

Ok for trunk? (GCC 9)

Thanks,
Kyrill

2018-04-30  Kyrylo Tkachov  

PR tree-optimization/70291
* tree-complex.c (insert_complex_mult_libcall): New function.
(expand_complex_multiplication_limited_range): Likewise.
(expand_complex_multiplication): Expand floating-point complex
multiplication using the above.

2018-04-30  Kyrylo Tkachov  

PR tree-optimization/70291
* gcc.dg/complex-6.c: New test.
* gcc.dg/complex-7.c: Likewise.
diff --git a/gcc/testsuite/gcc.dg/complex-6.c b/gcc/testsuite/gcc.dg/complex-6.c
new file mode 100644
index ..123b2a8206f098e7140792375830ff5f01f30cf6
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/complex-6.c
@@ -0,0 +1,13 @@
+/* PR tree-optimization/70291.  */
+
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-cplxlower" } */
+
+__complex float
+foo (__complex float a, __complex float b)
+{
+  return a * b;
+}
+
+/* { dg-final { scan-tree-dump-times "__builtin_isunordered" 1 "cplxlower1" } } */
+/* { dg-final { scan-tree-dump-times "__mulsc3" 1 "cplxlower1" } } */
diff --git a/gcc/testsuite/gcc.dg/complex-7.c b/gcc/testsuite/gcc.dg/complex-7.c
new file mode 100644
index ..7d5ba3aefb3e007824b778d716ef7f21a48c58f8
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/complex-7.c
@@ -0,0 +1,13 @@
+/* PR tree-optimization/70291.  */
+
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-cplxlower" } */
+
+__complex double
+foo (__complex double a, __complex double b)
+{
+  return a * b;
+}
+
+/* { dg-final { scan-tree-dump-times "__builtin_isunordered" 1 "cplxlower1" } } */
+/* { dg-final { scan-tree-dump-times "__muldc3" 1 "cplxlower1" } } */
diff --git a/gcc/tree-complex.c b/gcc/tree-complex.c
index 622b8696399b9e9d8bddcc6340d2f8d8ca852637..319c302526483ca80ecafe7e55289c1850ad6a11 100644
--- a/gcc/tree-complex.c
+++ b/gcc/tree-complex.c
@@ -978,6 +978,43 @@ expand_complex_addition (gimple_stmt_iterator *gsi, tree inner_type,
 }
 
 /* Expand a complex multiplication or division to a libcall to the c99
+   compliant routines.  Unlike expand_complex_libcall create and insert
+   the call, assign it to an output variable and return that rather than
+   modifying existing statements in place.  */
+
+static tree
+insert_complex_mult_libcall (gimple_stmt_iterator *gsi, tree type, tree ar,
+			  tree ai, tree br, tree bi)
+{
+  machine_mode mode;
+  built_in_function bcode;
+  tree fn, lhs;
+  gcall *stmt;
+
+
+  mode = TYPE_MODE (type);
+  gcc_assert (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT);
+
+  bcode = ((built_in_function)
+	 (BUILT_IN_COMPLEX_MUL_MIN + mode - MIN_MODE_COMPLEX_FLOAT));
+
+  fn = builtin_decl_explicit (bcode);
+
+  stmt = gimple_build_call (fn, 4, ar, ai, br, bi);
+  lhs = create_tmp_var (type);
+  gimple_call_set_lhs (stmt, lhs);
+  if (gimple_in_ssa_p (cfun))
+{
+  lhs = make_ssa_name (lhs, stmt);
+  gimple_call_set_lhs (stmt, lhs);
+}
+  update_stmt (stmt);
+  gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
+
+  return lhs;
+}
+
+/* Expand a complex multiplication or division to a libcall to the c99
compliant routines.  */
 
 static void
@@ -1025,6 +1062,35 @@ expand_complex_libcall (gimple_stmt_iterator *gsi, tree ar, tree ai,
 }
 }
 
+/* Perform a complex multiplication assuming limited range on two
+   complex constants A, B represented by AR, AI, BR, BI of type TYPE.
+   The operation we want is: a * b = (ar*br - ai*bi) + i(ar*bi + br*ai).
+   Insert the GIMPLE statements into GSI.  Store the real and imaginary
+   

Re: [PATCH] Simplify floating point comparisons

2018-04-30 Thread Jeff Law
On 01/12/2018 06:21 AM, Wilco Dijkstra wrote:
> Hi,
> 
> Here is the updated version:
> 
> This patch implements some of the optimizations discussed in
> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=71026.
> 
> Simplify (C / x >= 0.0) into x >= 0.0 with -fno-signed-zeros
> and -ffinite-math-only.  If C is negative the comparison is reversed.
> Only handle >= and <= for now since C / x can underflow if C is small.
> 
> 
> Simplify (x * C1) > C2 into x > (C2 / C1) with -funsafe-math-optimizations.
> If C1 is negative the comparison is reversed.
> 
> OK for commit?
> 
> ChangeLog
> 2018-01-10  Wilco Dijkstra    
>   Jackson Woodruff  
> 
> gcc/
>   PR 71026/tree-optimization
>   * match.pd: Simplify floating point comparisons.
> 
> gcc/testsuite/
>   PR 71026/tree-optimization
>   * gcc.dg/div-cmp-1.c: New test.
>   * gcc.dg/div-cmp-2.c: New test.
OK for the trunk.  I don't think there is a need to backport to gcc-8.

jeff


Generalize a<b a<min(b,c)

2018-04-30 Thread Marc Glisse

Hello,

this transformation was lacking symmetry, only handling & and not |.

It probably still fails to handle a < b & a <= 123, while it would handle 
< 124, but that's for another day.


Bootstrap+testsuite on powerpc64le-unknown-linux-gnu.

2018-05-01  Marc Glisse  

PR tree-optimization/85143
gcc/
* match.pd (A @1 and @0 > @2) to use max */
-(for op (lt le gt ge)
- ext (min min max max)
+(for logic (bit_and bit_and bit_and bit_and bit_ior bit_ior bit_ior bit_ior)
+ op(lt  le  gt  ge  lt  le  gt  ge )
+ ext   (min min max max max max min min)
  (simplify
-  (bit_and (op:cs @0 @1) (op:cs @0 @2))
+  (logic (op:cs @0 @1) (op:cs @0 @2))
   (if (INTEGRAL_TYPE_P (TREE_TYPE (@0))
&& TREE_CODE (@0) != INTEGER_CST)
(op @0 (ext @1 @2)
 
 (simplify
  /* signbit(x) -> 0 if x is nonnegative.  */
  (SIGNBIT tree_expr_nonnegative_p@0)
  { integer_zero_node; })
 
 (simplify
Index: gcc/testsuite/gcc.dg/tree-ssa/minmax-loopend-2.c
===
--- gcc/testsuite/gcc.dg/tree-ssa/minmax-loopend-2.c	(nonexistent)
+++ gcc/testsuite/gcc.dg/tree-ssa/minmax-loopend-2.c	(working copy)
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+int and_test(long a, long b, long c) {
+  int cmp1 = a > b;
+  int cmp2 = a > c;
+  return cmp1 & cmp2;
+}
+
+int ior_test (long a, long b, long c) {
+  int cmp1 = a < b;
+  int cmp2 = a < c;
+  return cmp1 | cmp2;
+}
+
+/* { dg-final { scan-tree-dump-times "MAX_EXPR" 2 "optimized" } } */
Index: gcc/testsuite/gcc.dg/tree-ssa/minmax-loopend.c
===
--- gcc/testsuite/gcc.dg/tree-ssa/minmax-loopend.c	(revision 259767)
+++ gcc/testsuite/gcc.dg/tree-ssa/minmax-loopend.c	(working copy)
@@ -1,17 +1,16 @@
 /* { dg-do compile } */
 /* { dg-options "-O2 -fdump-tree-optimized" } */
 
-int min_test(long a, long b, long c) {
+int and_test(long a, long b, long c) {
   int cmp1 = a < b;
   int cmp2 = a < c;
   return cmp1 & cmp2;
 }
 
-int max_test (long a, long b, long c) {
+int ior_test (long a, long b, long c) {
   int cmp1 = a > b;
   int cmp2 = a > c;
-  return cmp1 & cmp2;
+  return cmp1 | cmp2;
 }
 
-/* { dg-final { scan-tree-dump-times "MIN_EXPR" 1 "optimized" } } */
-/* { dg-final { scan-tree-dump-times "MAX_EXPR" 1 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "MIN_EXPR" 2 "optimized" } } */


Re: [PATCH] fold strlen of constant aggregates (PR 83693)

2018-04-30 Thread Jeff Law
On 01/09/2018 02:41 PM, Martin Sebor wrote:
> I found a few problems in the previous revision:
> 
> 1) It didn't handle the simple case of member arrays of const
>    struct objects (only member arrays of  const arrays of structs
>    were handled).
> 2) The array_ctor_elt() function returned a narrow empty string
>    for an uninitialized CONSTRUCTOR element of any character type
>    when it should return the same string in the expected character
>    type (char, wchar_t, etc.)
> 3) The string_constant() function would in some cases use a byte
>    offset to get the initializer from a CONSTRUCTOR instead of
>    an array index.
> 
> The attached version 3 of the patch corrects these issues.
> Retested on x86_64 and with the Glibc ToT.
> 
>> After sleeping on it I realized that although enhancing
>> gimple_fold_builtin_strlen is an improvement, it only benefits
>> straight calls to strlen and nothing else.  Calls to strcmp,
>> sprintf, or strcpy (and along with it the rest of the strlen
>> pass) are still expanded as if the argument were unknown.  To
>> improve even those callers, the folding needs to be done at
>> a lower level (otherwise they'd all have to duplicate the same
>> code as gimple_fold_builtin_strlen).  With that in mind I've
>> moved the logic to string_constant() so all of those clients
>> benefit.
>>
>> Retested on x86_64-linux.  Out of paranoia I also built and
>> tested the top of Glibc trunk with no unusual failures.
>>
>> Martin
> 
> 
> gcc-83693.diff
> 
> 
> PR tree-optimization/83693 - missing strlen optimization for array of arrays
> 
> gcc/ChangeLog:
> 
>   PR tree-optimization/83693
>   * expr.c (array_ctor_elt): New function.
>   (string_constant): Call it.  Handle initializers of arrays of arrays.
> 
> gcc/testsuite/ChangeLog:
> 
>   PR tree-optimization/83693
>   * gcc.dg/strcmp-2.c: New test.
>   * gcc.dg/strlenopt-42.c: New test.
>   * gcc.dg/strlenopt-43.c: New test.
> 
> diff --git a/gcc/expr.c b/gcc/expr.c
> index cd1e57d..75110e5 100644
> --- a/gcc/expr.c
> +++ b/gcc/expr.c
> @@ -62,7 +62,7 @@ along with GCC; see the file COPYING3.  If not see
>  #include "rtl-chkp.h"
>  #include "ccmp.h"
>  #include "rtx-vector-builder.h"
> -
> +#include "gimple-fold.h"
>  
>  /* If this is nonzero, we do not bother generating VOLATILE
> around volatile memory references, and we are willing to
> @@ -11343,6 +11343,50 @@ is_aligning_offset (const_tree offset, const_tree 
> exp)
>return TREE_CODE (offset) == ADDR_EXPR && TREE_OPERAND (offset, 0) == exp;
>  }
>  
> +/* Return initializer element IDX for the array CONSTRUCTOR initializer
> +   INIT or an empty string constant with type CHARTYPE if no such element
> +   exists.  If IDX is null, simply return an empty string.  If IDX is not
> +   constant, return NULL_TREE.  A helper of string_constant.  */
> +
> +static tree
> +array_ctor_elt (tree chartype, tree init, tree idx)
> +{
> +  if (idx)
> +{
> +  if (!tree_fits_uhwi_p (idx))
> + return NULL_TREE;
> +
> +  HOST_WIDE_INT i = tree_to_uhwi (idx);
> +
> +  if (i < CONSTRUCTOR_NELTS (init)
> +   && tree_int_cst_equal (CONSTRUCTOR_ELT (init, i)->index, idx))
> + return CONSTRUCTOR_ELT (init, i)->value;
> +
> +  tree index, value;
> +  FOR_EACH_CONSTRUCTOR_ELT (CONSTRUCTOR_ELTS (init), i, index, value)
> + {
> +   if (tree_int_cst_equal (index, idx))
> + return value;
> + }
So you first look at IDX and if it patches you return the appropriate value.

Else you iterate from the start of the constructor list through the
elements until you find a match.

Would a binary search between 0..IDX be better here?  Do we have any
imposed ordering on the elements?

> +}
> +
> +  /* Build and return a STRING_CST representing the empty string with
> + CHARTYPE.  Make sure the string representation has enough zero
> + bytes for CHARTYPE.  */
> +  const char nuls[16] = "";
> +  unsigned elemsize = tree_to_uhwi (TYPE_SIZE_UNIT(chartype));
> +  tree str = build_string (elemsize, nuls);
> +  tree elemtype = build_qualified_type (chartype, TYPE_QUAL_CONST);
> +  tree indextype = build_index_type (size_zero_node);
> +  tree arraytype = build_array_type (elemtype, indextype);
> +  TREE_TYPE (str) = arraytype;
> +  TREE_CONSTANT (str) = true;
> +  TREE_READONLY (str) = true;
> +  TREE_STATIC (str) = true;
I'm a but surprised we don't have a suitable string constant lying
around, but I don't see one.


So really the only concern is the compile-time cost of array_ctor_elt.
If we know anything about the ordering of elements within the
constructor, then we could do a lot better WRT the compile-time cost.

jeff


[PATCH] rs6000: Remove paired single

2018-04-30 Thread Segher Boessenkool
This removes paired single (used on the 750CL and friends).  It was
deprecated in GCC 8.  Removing it means we only have one vector model
to deal with (VMX+VSX, 16-byte vectors).

Tested on powerpc64-linux {-m32,-m64} and on powerpc64le-linux.
I plan to commit this later this week.


Segher


2018-04-30  Segher Boessenkool  

* config.gcc (powerpc*-*-*): Remove paired.h.  Unsupport the
powerpc*-*-linux*paired* target.
* config/rs6000/750cl.h: Delete.
* config/rs6000/paired.h: Delete.
* config/rs6000/paired.md: Delete.
* config/rs6000/predicates.md (easy_vector_constant): Remove paired
float support.
* config/rs6000/rs6000-builtin.def: Remove paired float support.
* config/rs6000/rs6000-c.c (rs6000_target_modify_macros): Update
comment.  Remove paired float support.
* config/rs6000/rs6000-modes.def: Remove V2SF and V2SI.
* config/rs6000/rs6000-opts.h (enum rs6000_vector): Delete
VECTOR_PAIRED.
* config/rs6000/rs6000-protos.h (paired_expand_vector_init,
paired_emit_vector_cond_expr, paired_expand_vector_move): Delete
declarations.
* config/rs6000/rs6000.c: Remove paired float support.
(paired_expand_vector_init, paired_expand_vector_move,
paired_emit_vector_compare, paired_emit_vector_cond_expr,
(paired_expand_lv_builtin, paired_expand_stv_builtin,
paired_expand_builtin, paired_expand_predicate_builtin,
paired_init_builtins): Delete.
* config/rs6000/rs6000.h: Remove paired float support.
* config/rs6000/rs6000.md: Remove paired float support.
(move_from_CR_ov_bit): Delete.
* config/rs6000/rs6000.opt (mpaired): Delete.
* config/rs6000/t-rs6000: Remove paired.md from MD_INCLUDES.
* doc/invoke.texi (RS/6000 and PowerPC Options): Delete -mpaired.

---
 gcc/config.gcc   |   7 +-
 gcc/config/rs6000/750cl.h|  30 --
 gcc/config/rs6000/paired.h   |  75 
 gcc/config/rs6000/paired.md  | 492 
 gcc/config/rs6000/predicates.md  |   5 -
 gcc/config/rs6000/rs6000-builtin.def |  84 -
 gcc/config/rs6000/rs6000-c.c |  13 +-
 gcc/config/rs6000/rs6000-modes.def   |   4 -
 gcc/config/rs6000/rs6000-opts.h  |   1 -
 gcc/config/rs6000/rs6000-protos.h|   4 -
 gcc/config/rs6000/rs6000.c   | 698 ++-
 gcc/config/rs6000/rs6000.h   |  50 +--
 gcc/config/rs6000/rs6000.md  |  12 -
 gcc/config/rs6000/rs6000.opt |   4 -
 gcc/config/rs6000/t-rs6000   |   3 +-
 gcc/doc/invoke.texi  |   8 -
 16 files changed, 47 insertions(+), 1443 deletions(-)
 delete mode 100644 gcc/config/rs6000/750cl.h
 delete mode 100644 gcc/config/rs6000/paired.h
 delete mode 100644 gcc/config/rs6000/paired.md

diff --git a/gcc/config.gcc b/gcc/config.gcc
index a5defb0..3658c42 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -473,7 +473,6 @@ powerpc*-*-*)
extra_headers="${extra_headers} xmmintrin.h mm_malloc.h emmintrin.h"
extra_headers="${extra_headers} mmintrin.h x86intrin.h"
extra_headers="${extra_headers} ppu_intrinsics.h spu2vmx.h vec_types.h 
si2vmx.h"
-   extra_headers="${extra_headers} paired.h"
extra_headers="${extra_headers} amo.h"
case x$with_cpu in

xpowerpc64|xdefault64|x6[23]0|x970|xG5|xpower[3456789]|xpower6x|xrs64a|xcell|xa2|xe500mc64|xe5500|xe6500)
@@ -2502,11 +2501,11 @@ powerpc*-*-linux*)
all) maybe_biarch=yes ;;
esac
case ${target} in
-   powerpc64*-*-linux*spe* | powerpc64*-*-linux*paired*)
+   powerpc64*-*-linux*spe* | powerpc*-*-linux*paired*)
echo "*** Configuration ${target} not supported" 1>&2
exit 1
;;
-   powerpc*-*-linux*spe* | powerpc*-*-linux*paired*)
+   powerpc*-*-linux*spe*)
maybe_biarch=
;;
esac
@@ -2552,8 +2551,6 @@ powerpc*-*-linux*)
tm_file="${tm_file} rs6000/linuxaltivec.h" ;;
powerpc*-*-linux*spe*)
tm_file="${tm_file} ${cpu_type}/linuxspe.h ${cpu_type}/e500.h" 
;;
-   powerpc*-*-linux*paired*)
-   tm_file="${tm_file} rs6000/750cl.h" ;;
esac
case ${target} in
*-linux*-musl*)
diff --git a/gcc/config/rs6000/750cl.h b/gcc/config/rs6000/750cl.h
deleted file mode 100644
index 5008002..000
--- a/gcc/config/rs6000/750cl.h
+++ /dev/null
@@ -1,30 +0,0 @@
-/* Enable 750cl paired single support.
-   Copyright (C) 2007-2018 Free Software Foundation, Inc.
-   Contributed by Revital Eres (e...@il.ibm.com)
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify it
-   under the terms of the GNU General Public License as published
-   by the Free Software Foundation; either 

Re: [PATCH, rs6000] Add missing vec_max tests

2018-04-30 Thread Carl Love
GCC Maintainers:

I have re-worked the patch per Peter's comments.  I retested the patch
on P8 BE, P8 LE and P9 LE.

 Please let me know if the patch looks OK for GCC mainline.

 Carl Love



gcc/testsuite/ChangeLog:

2018-04-30  Carl Love  
* gcc.target/powerpc/vsx-vector-6.h (foo): Add test for vec_max,
vec_trunc.
* gcc.target/powerpc/vsx-vector-6-le.c (dg-final): Update xvcmpeqdp,
xvcmpgtdp, xvcmpgedp counts.
---
 gcc/testsuite/gcc.target/powerpc/vsx-vector-6-le.c |  6 +++---
 gcc/testsuite/gcc.target/powerpc/vsx-vector-6.h| 16 
 2 files changed, 15 insertions(+), 7 deletions(-)

diff --git a/gcc/testsuite/gcc.target/powerpc/vsx-vector-6-le.c 
b/gcc/testsuite/gcc.target/powerpc/vsx-vector-6-le.c
index fe7eeb1..001dc41 100644
--- a/gcc/testsuite/gcc.target/powerpc/vsx-vector-6-le.c
+++ b/gcc/testsuite/gcc.target/powerpc/vsx-vector-6-le.c
@@ -14,9 +14,9 @@
their usage counts being stable.  Therefore, we just ensure at least one
xxlor instruction was generated.  */
 /* { dg-final { scan-assembler "xxlor" } } */
-/* { dg-final { scan-assembler-times "xvcmpeqdp" 5 } } */
-/* { dg-final { scan-assembler-times "xvcmpgtdp" 8 } } */
-/* { dg-final { scan-assembler-times "xvcmpgedp" 6 } } */
+/* { dg-final { scan-assembler-times "xvcmpeqdp" 4 } } */
+/* { dg-final { scan-assembler-times "xvcmpgtdp" 7 } } */
+/* { dg-final { scan-assembler-times "xvcmpgedp" 7 } } */
 /* { dg-final { scan-assembler-times "xvrdpim" 1 } } */
 /* { dg-final { scan-assembler-times "xvmaddadp" 1 } } */
 /* { dg-final { scan-assembler-times "xvmsubadp" 1 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/vsx-vector-6.h 
b/gcc/testsuite/gcc.target/powerpc/vsx-vector-6.h
index 422f8a1..4819433 100644
--- a/gcc/testsuite/gcc.target/powerpc/vsx-vector-6.h
+++ b/gcc/testsuite/gcc.target/powerpc/vsx-vector-6.h
@@ -7,7 +7,9 @@
 void foo (vector double *out, vector double *in, vector long *p_l, vector bool 
long *p_b,
  vector unsigned char *p_uc, int *i, vector float *p_f,
  vector bool char *outbc, vector bool int *outbi,
- vector bool short *outbsi, vector int *outsi, vector unsigned int 
*outui)
+ vector bool short *outbsi, vector int *outsi,
+ vector unsigned int *outui, vector signed char *outsc,
+ vector unsigned char *outuc)
 {
   vector double in0 = in[0];
   vector double in1 = in[1];
@@ -20,6 +22,8 @@ void foo (vector double *out, vector double *in, vector long 
*p_l, vector bool l
   vector float inf0;
   vector float inf1;
   vector float inf2;
+  vector char inc0;
+  vector char inc1;
   vector bool char inbc0;
   vector bool char inbc1;
   vector bool short inbs0;
@@ -30,6 +34,7 @@ void foo (vector double *out, vector double *in, vector long 
*p_l, vector bool l
   vector unsigned short int inusi0, inusi1;
   vector signed int insi0, insi1;
   vector unsigned int inui0, inui1;
+  vector unsigned char inuc0, inuc1;
   
   *out++ = vec_abs (in0);
   *out++ = vec_add (in0, in1);
@@ -65,8 +70,6 @@ void foo (vector double *out, vector double *in, vector long 
*p_l, vector bool l
   *out++ = vec_or (inb, in0);
   *out++ = vec_perm (in0, in1, uc);
   *out++ = vec_rint (in0);
-  *out++ = vec_sel (in0, in1, inl);
-  *out++ = vec_sel (in0, in1, inb);
   *out++ = vec_sub (in0, in1);
   *out++ = vec_sqrt (in0);
   *out++ = vec_trunc (in0);
@@ -103,6 +106,7 @@ void foo (vector double *out, vector double *in, vector 
long *p_l, vector bool l
   *p_f++ = vec_nmsub (inf0, inf1, inf2);
   *p_f++ = vec_nmadd (inf0, inf1, inf2);
   *p_f++ = vec_or (inf0, inf1);
+  *p_f++ = vec_trunc (inf0);
   
   *out++ = vec_or (inbl0, in0);
   *out++ = vec_or (in0, inbl0);
@@ -113,6 +117,8 @@ void foo (vector double *out, vector double *in, vector 
long *p_l, vector bool l
   *outbc++ = vec_andc (inbc0, inbc1);
   *outbc++ = vec_or (inbc0, inbc1);
 
+  *outuc++ = vec_max (inuc0, inuc1);
+
   *outbi++ = vec_andc (inbi0, inbi1);
   *outbsi++ = vec_andc (inbs0, inbs1);
 
@@ -151,7 +157,9 @@ int main()
   vector bool short *outbsi;
   vector int *outsi;
   vector unsigned int *outui;
+  vector signed char *outsc;
+  vector unsigned char *outuc;
 
   foo (out, in, p_l, p_b, p_uc, i, p_f, outbc,
-   outbi, outbsi, outsi, outui);
+   outbi, outbsi, outsi, outui, outsc, outuc);
 }
-- 
2.7.4



Re: Use poly_int tree accessors

2018-04-30 Thread Jeff Law
On 01/09/2018 11:39 AM, Richard Sandiford wrote:
> This patch generalises various places that used hwi tree accessors
> so that they can handle poly_ints instead.  Earlier patches did
> this while updating interfaces; this patch just mops up some
> left-over pieces that weren't necessary to make things compile,
> but that still make sense.
> 
> In many cases these changes are by inspection rather than because
> something had shown them to be necessary.
> 
> I think the alias.c part is a minor bug fix: previously we used
> fits_uhwi_p for a signed HOST_WIDE_INT (which the caller does
> treat as signed rather than unsigned).  We also checked whether
> each individual offset overflowed but didn't check whether the
> sum did.
> 
> Sorry for not posting this earlier.  I kept holding it back in case
> more examples showed up.
> 
> Tested on aarch64-linux-gnu, x86_64-linux-gnu and powerpc64le-linux-gnu.
> Also tested by comparing the before-and-after assembly output for at
> least one target per CPU directory.  OK to install?
> 
> Richard
> 
> 
> 2018-01-09  Richard Sandiford  
> 
> gcc/
>   * alias.c (adjust_offset_for_component_ref): Use poly_int_tree_p
>   and wi::to_poly_offset.  Add the current offset and then check
>   whether the sum fits, rather than using an unchecked addition of
>   a checked term.  Check for a shwi rather than a uhwi.
>   * expr.c (get_bit_range): Use tree_to_poly_uint64.
>   (store_constructor): Use poly_int_tree_p.
>   (expand_expr_real_1): Likewise.
>   * function.c (assign_temp): Likewise.
>   * fold-const.c (const_binop): Use poly_int_tree_p and
>   wi::to_poly_offset.
>   (fold_indirect_ref_1): Likewise.  Use known_in_range_p to test
>   for an in-range vector access and multiple_p to attempt an exact
>   division.
>   * gimplify.c (gimple_add_tmp_var_fn): Use tree_fits_poly_uint64_p.
>   (gimple_add_tmp_var): Likewise.
>   * ipa-icf-gimple.c (func_checker::compare_operand): Use
>   to_poly_offset for MEM offsets.
>   * ipa-icf.c (sem_variable::equals): Likewise.
>   * stor-layout.c (compute_record_mode): Use poly_int_tree_p.
>   * tree-vectorizer.c (get_vec_alignment_for_array_type): Likewise.
>   * tree-predcom.c (aff_combination_dr_offset): Use wi::to_poly_widest
>   rather than wi::to_widest for DR_INITs.
>   * tree-ssa-sccvn.c (ao_ref_init_from_vn_reference): Use
>   wi::to_poly_offset for BIT_FIELD_REF offsets.
>   (vn_reference_maybe_forwprop_address): Use poly_int_tree_p and
>   wi::to_poly_offset.
>   * tree-vect-data-refs.c (vect_find_same_alignment_drs): Use
>   wi::to_poly_offset for DR_INIT.
>   (vect_analyze_data_ref_accesses): Require both DR_INITs to be
>   INTEGER_CSTs.
>   (vect_analyze_group_access_1): Note that here.
>   * var-tracking.c (emit_note_insn_var_location): Use
>   tree_to_poly_uint64.
OK.  If minor edits are necessary to deal changes since this was
originally posted, consider those pre-approved.

Jeff


Re: [PATCH, rs6000] Add missing vec_max tests

2018-04-30 Thread Peter Bergner
On 4/30/18 10:36 AM, Carl Love wrote:
> -/* We generate xxlor instructions for many reasons other than or'ing vector
> -   operands or calling __builtin_vec_or(), which  means we cannot rely on
> -   their usage counts being stable.  Therefore, we just ensure at least one
> -   xxlor instruction was generated.  */
> -/* { dg-final { scan-assembler "xxlor" } } */
[snip]
> +/* { dg-final { scan-assembler-times "xxlor" 25 } } */

Please do not change back to counting xxlor insns.  We used to do that before
my change in March where I added the comment you are removing that explains
the folly of trying to count them.


Peter



[PATCH, rs6000] Add vec_first_match_index, vec_first_mismatch_index, vec_first_match_or_eos_index, vec_first_mismatch_or_eos_index

2018-04-30 Thread Carl Love

GCC Maintainers:

The following patch adds a new test file for the vec_first_match_index,
vec_first_mismatch_index, vec_first_match_or_eos_index,
vec_first_mismatch_or_eos_index builtins.

The patch for the test case was tested on

    powerpc64le-unknown-linux-gnu (Power 8 LE)
    powerpc64-unknown-linux-gnu (Power 8 BE)
    powerpc64le-unknown-linux-gnu (Power 9 LE).

 Please let me know if the patch looks OK for GCC mainline.

 Carl Love

-

gcc/testsuite/ChangeLog:

2018-04-27  Carl Love  
* gcc.target/powerpc/builtins-8-p9-runnable.c: Add new test file.
---
 .../gcc.target/powerpc/builtins-8-p9-runnable.c| 1044 
 1 file changed, 1044 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/builtins-8-p9-runnable.c

diff --git a/gcc/testsuite/gcc.target/powerpc/builtins-8-p9-runnable.c 
b/gcc/testsuite/gcc.target/powerpc/builtins-8-p9-runnable.c
new file mode 100644
index 000..4379d41
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/builtins-8-p9-runnable.c
@@ -0,0 +1,1044 @@
+/* { dg-do run { target { powerpc*-*-* &&  p9vector_hw } } } */
+/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { 
"-mcpu=power9" } } */
+/* { dg-options "-mcpu=power9 -O2" } */
+
+#include 
+#include 
+#include 
+#include  // vector
+
+#ifdef DEBUG
+#include 
+#endif
+
+void abort (void);
+
+
+int main() {
+
+  vector signed char char_src1, char_src2;
+  vector unsigned char uchar_src1, uchar_src2;
+  vector signed short int short_src1, short_src2;
+  vector unsigned short int ushort_src1, ushort_src2;
+  vector signed int int_src1, int_src2;
+  vector unsigned int uint_src1, uint_src2;
+  unsigned int result, expected_result;
+
+
+  /* Tests for: vec_first_match_index() */
+  /* char */
+  char_src1 = (vector signed char) {-1, 2, 3, 4, -5, 6, 7, 8,
+   9, 10, 11, 12, 13, 14, 15, 16};
+  char_src2 = (vector signed char) {-1, 2, 3, 20, -5, 6, 7, 8,
+   9, 10, 11, 12, 13, 14, 15, 16};
+  expected_result = 0;
+
+  result = vec_first_match_index (char_src1, char_src2);
+
+  if (result != expected_result)
+#ifdef DEBUG
+printf("Error: char first match result (%d) does not match expected result 
(%d)\n",
+  result, expected_result);
+#else
+abort();
+#endif
+
+  char_src1 = (vector signed char) {1, 2, 3, 4, 5, 6, 7, 8,
+   9, 10, 11, 12, 13, 14, 15, 16};
+  char_src2 = (vector signed char) {-1, -2, -3, -4, -5, -6, -7, -8,
+   -9, -10, -11, -12, -13, -14, -15, -16};
+  expected_result = 16;
+
+  result = vec_first_match_index (char_src1, char_src2);
+
+  if (result != expected_result)
+#ifdef DEBUG
+printf("Error: char first match result (%d) does not match expected result 
(%d)\n",
+  result, expected_result);
+#else
+abort();
+#endif
+
+  uchar_src1 = (vector unsigned char) {0, 2, 3, 4, 5, 6, 7, 8,
+  9, 10, 11, 12, 13, 14, 15, 16};
+  uchar_src2 = (vector unsigned char) {1, 0, 3, 4, 5, 6, 7, 8,
+  9, 10, 11, 12, 13, 14, 15, 16};
+  expected_result = 2;
+
+  result = vec_first_match_index (uchar_src1, uchar_src2);
+
+  if (result != expected_result)
+#ifdef DEBUG
+printf("Error: uchar first match result (%d) does not match expected 
result (%d)\n",
+  result, expected_result);
+#else
+abort();
+#endif
+
+  uchar_src1 = (vector unsigned char) {2, 3, 4, 5, 6, 7, 8, 9,
+  10, 11, 12, 13, 14, 15, 16, 17};
+  uchar_src2 = (vector unsigned char) {3, 4, 5, 6, 7, 8, 9, 10,
+  11, 12, 13, 14, 15, 16, 17, 18};
+  expected_result = 16;
+
+  result = vec_first_match_index (uchar_src1, uchar_src2);
+
+  if (result != expected_result)
+#ifdef DEBUG
+printf("Error: uchar first match result (%d) does not match expected 
result (%d)\n",
+  result, expected_result);
+#else
+abort();
+#endif
+
+  /* short int */
+  short_src1 = (vector short int) {10, -20, -30, 40, 50, 60, 70, 80};
+  short_src2 = (vector short int) {-10, 20, 30, 40, 50, 60, 70, 80};
+
+  expected_result = 3;
+
+  result = vec_first_match_index (short_src1, short_src2);
+
+  if (result != expected_result)
+#ifdef DEBUG
+printf("Error: short int first match result (%d) does not match expected 
result (%d)\n",
+  result, expected_result);
+#else
+abort();
+#endif
+
+  short_src1 = (vector short int) {10, 20, 30, 40, 50, 60, 70, 80};
+  short_src2 = (vector short int) {0, 0, 0, 0, 0, 0, 0, 0};
+
+  expected_result = 8;
+
+  result = vec_first_match_index (short_src1, short_src2);
+
+  if (result != expected_result)
+#ifdef DEBUG
+printf("Error: short int first match result (%d) does not match expected 
result (%d)\n",
+  result, expected_result);
+#else
+

Re: [PATCH] Clarify documentation for -fpie and -fPIE

2018-04-30 Thread Jeff Law
On 04/30/2018 06:51 AM, Jonathan Wakely wrote:
> I noticed a couple of missing definite articles in the description of
> -fpie and -fPIE, and found the last sentence unclear. This clarifies
> that it means you should use these options during compilation if you
> plan to use -pie during linking (rather than implying you use these
> during linking as well).
> 
> I also added a hyphen to "position independent" but maybe that part
> should wait for a decision on
> https://gcc.gnu.org/ml/gcc/2018-04/msg00196.html
> 
> 
> * doc/invoke.texi (-fpie, -fPIE): Fix grammar and clarify
> interaction with -pie.
> 
> OK for trunk?
> 
OK
jeff


Re: [PATCH] Print function attributes in rtl dumps

2018-04-30 Thread Jeff Law
On 03/29/2018 04:23 AM, Tom de Vries wrote:
> [ Fix ENOPATCH ]
> 
> On 03/29/2018 12:17 PM, Tom de Vries wrote:
>> Hi,
>>
>> when we compile a function with attributes:
>> ...
>> int __attribute__((noinline, noclone))
>> foo (void)
>> {
>>    return 2;
>> }
>> ...
>>
>> like this:
>> ...
>> gcc main.c -fdump-tree-all -fdump-rtl-all
>> ...
>>
>> we find the function attributes starting from foo.c.004t.gimple:
>> ...
>> __attribute__((noclone, noinline))
>> foo ()
>> {
>>    int D.1961;
>>
>>    D.1961 = 2;
>>    return D.1961;
>> }
>> ...
>> to foo.c.232t.optimized.
>>
>>
>> But we don't find the attributes in the rtl dumps:
>> ...
>> $ grep __attribute__ foo.c.*r.*
>> $
>> ...
>>
>> This patch adds printing of the function attributes in the rtl dump,
>> f.i. foo.c.235r.vregs looks like this :
>> ...
>> ;; Function foo (foo, funcdef_no=0, decl_uid=1958, cgraph_uid=0,
>> symbol_order=0)
>>
>> function foo attributes: __attribute__((noclone, noinline))
>> (note 1 0 3 NOTE_INSN_DELETED)
>> (note 3 1 2 2 [bb 2] NOTE_INSN_BASIC_BLOCK)
>> (note 2 3 5 2 NOTE_INSN_FUNCTION_BEG)
>> (insn 5 2 8 2 (set (reg:SI 87 [ _1 ])
>>  (const_int 2 [0x2])) "foo.c":4 86 {*movsi_internal}
>>   (nil))
>> (insn 8 5 12 2 (set (reg:SI 88 [  ])
>>  (reg:SI 87 [ _1 ])) "foo.c":4 86 {*movsi_internal}
>>   (nil))
>> (insn 12 8 13 2 (set (reg/i:SI 0 ax)
>>  (reg:SI 88 [  ])) "foo.c":5 86 {*movsi_internal}
>>   (nil))
>> (insn 13 12 0 2 (use (reg/i:SI 0 ax)) "foo.c":5 -1
>>   (nil))
>> ...
>>
>> I've added the "function foo attributes" prefix because in other rtl
>> dumps there may be quite a number of lines between the ";; Function
>> foo" header and the first insn.
>>
>> OK for stage1 if bootstrap and reg-test on x86 succeeds?
>>
>> Thanks,
>> - Tom
> 
> 
> 0001-Print-function-attributes-in-rtl-dump.patch
> 
> 
> Print function attributes in rtl dump
> 
> 2018-03-29  Tom de Vries  
> 
>   * passes.c (execute_function_dump): Call dump_function_attributes before
>   print_rtl_with_bb.
>   * tree-cfg.c (dump_function_attributes): New function, factored out of
>   ...
>   (dump_function_to_file): ... here.
>   * tree-cfg.h (dump_function_attributes): Declare.
So I think the RTL front-end needs updating to parse the attributes.

With that supported added this will be OK.

Jeff


Re: [PATCH] Fix loop-header copying do-while loop detection (PR85116)

2018-04-30 Thread David Edelsohn
On Mon, Apr 30, 2018 at 3:50 AM Richard Biener  wrote

> On Sun, 29 Apr 2018, Richard Biener wrote:

> > On April 29, 2018 1:06:47 AM GMT+02:00, David Edelsohn <
dje@gmail.com> wrote:
> > >Hi, Richi
> > >
> > >I had been using two source trees to speed the bisection and didn't
> > >realize
> > >that one defaulted to DWARF debugging and the other defaulted to XCOFF
> > >debugging, which confused the bisection result.  The -f[no-]checking
> > >patch
> > >is the culprit.
> >
> > My theory is that all non-bootstrap-debug  bootstrap configs are
currently broken.
> >
> > I'll deal with this tomorrow.

> So it looks like it is a very simple mistake fixed by properly ignoring
> -fchecking[=] in gen_producer_string ().  I'm including the use of
> -fchecking=1 instead of -fchecking given that -fchecking=2 we may
> default to is documented to eventually affect code-generation.

> Bootstrap / bootstrap-O3 running on x86_64-unknown-linux-gnu.

> Richard.

> 2018-04-30  Richard Biener  

>  * Makefile.tpl (STAGE3_CFLAGS): Use -fchecking=1.
>  (STAGE3_TFLAGS): Likewise.
>  (STAGEtrain_CFLAGS): Filter out -fchecking=1.
>  (STAGEtrain_TFLAGS): Likewise.
>  * Makefile.in: Regenerate.

>  * dwarf2out.c (gen_producer_string): Ignore -fchecking[=].

> Index: Makefile.tpl
> ===
> --- Makefile.tpl(revision 259754)
> +++ Makefile.tpl(working copy)
> @@ -459,14 +459,14 @@ STAGE1_CONFIGURE_FLAGS = --disable-inter
>   STAGE1_TFLAGS += -fno-checking
>   STAGE2_CFLAGS += -fno-checking
>   STAGE2_TFLAGS += -fno-checking
> -STAGE3_CFLAGS += -fchecking
> -STAGE3_TFLAGS += -fchecking
> +STAGE3_CFLAGS += -fchecking=1
> +STAGE3_TFLAGS += -fchecking=1

>   STAGEprofile_CFLAGS = $(STAGE2_CFLAGS) -fprofile-generate
>   STAGEprofile_TFLAGS = $(STAGE2_TFLAGS)

> -STAGEtrain_CFLAGS = $(filter-out -fchecking,$(STAGE3_CFLAGS))
> -STAGEtrain_TFLAGS = $(filter-out -fchecking,$(STAGE3_TFLAGS))
> +STAGEtrain_CFLAGS = $(filter-out -fchecking=1,$(STAGE3_CFLAGS))
> +STAGEtrain_TFLAGS = $(filter-out -fchecking=1,$(STAGE3_TFLAGS))

>   STAGEfeedback_CFLAGS = $(STAGE4_CFLAGS) -fprofile-use
>   STAGEfeedback_TFLAGS = $(STAGE4_TFLAGS)
> Index: Makefile.in
> ===
> --- Makefile.in (revision 259754)
> +++ Makefile.in (working copy)
> @@ -536,14 +536,14 @@ STAGE1_CONFIGURE_FLAGS = --disable-inter
>   STAGE1_TFLAGS += -fno-checking
>   STAGE2_CFLAGS += -fno-checking
>   STAGE2_TFLAGS += -fno-checking
> -STAGE3_CFLAGS += -fchecking
> -STAGE3_TFLAGS += -fchecking
> +STAGE3_CFLAGS += -fchecking=1
> +STAGE3_TFLAGS += -fchecking=1

>   STAGEprofile_CFLAGS = $(STAGE2_CFLAGS) -fprofile-generate
>   STAGEprofile_TFLAGS = $(STAGE2_TFLAGS)

> -STAGEtrain_CFLAGS = $(filter-out -fchecking,$(STAGE3_CFLAGS))
> -STAGEtrain_TFLAGS = $(filter-out -fchecking,$(STAGE3_TFLAGS))
> +STAGEtrain_CFLAGS = $(filter-out -fchecking=1,$(STAGE3_CFLAGS))
> +STAGEtrain_TFLAGS = $(filter-out -fchecking=1,$(STAGE3_TFLAGS))

>   STAGEfeedback_CFLAGS = $(STAGE4_CFLAGS) -fprofile-use
>   STAGEfeedback_TFLAGS = $(STAGE4_TFLAGS)
> Index: gcc/dwarf2out.c
> ===
> --- gcc/dwarf2out.c (revision 259754)
> +++ gcc/dwarf2out.c (working copy)
> @@ -24234,6 +24234,8 @@ gen_producer_string (void)
> case OPT_fmacro_prefix_map_:
> case OPT_ffile_prefix_map_:
> case OPT_fcompare_debug:
> +  case OPT_fchecking:
> +  case OPT_fchecking_:
>  /* Ignore these.  */
>  continue;
> default:

Bootstrap on AIX succeeds with this patch.

Thanks, David


[PATCH] selftest: remove "Yoda ordering" in assertions

2018-04-30 Thread David Malcolm
Our selftest assertions were of the form:

  ASSERT_EQ (expected, actual)

and both Richard Sandiford and I find this "Yoda ordering" confusing.

Our existing tests aren't entirely consistent about this, and it doesn't make
sense for ASSERT_NE and its variants.

The ordering comes from googletest's API, which is what
the earliest version of the selftest code used (before Bernd persuaded
me to stop over-engineering it :) ).

googletest's API now uses just "val1" and "val2" for binary assertion
macros, and their docs now say:

"Historical note: Before February 2016 *_EQ had a convention of calling
it as ASSERT_EQ(expected, actual), so lots of existing code uses this
order. Now *_EQ treats both parameters in the same way."

This seems to have been:
https://github.com/google/googletest/commit/f364e188372e489230ef4e44e1aec6bcb08f3acf
https://github.com/google/googletest/pull/713

This patch renames the params in our selftest API from "expected" and
"actual" to "val1" and "val2".

ASSERT_STREQ (and ASSERT_STREQ_AT) had an asymmetry in error-reporting, where
they did a better job of reporting if the second of the params was NULL; this
patch now handles params equivalently (and both must be non-NULL for a pass).
We aren't able to selftest selftest failures, so I tested the five cases
by hand while developing the patch (4 NULL vs non-NULL cases, with the both
non-NULL case having a pass and fail sub-cases).

Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.

OK for trunk?

gcc/ChangeLog:
* selftest.c (assert_streq): Rename "expected" and "actual" to
"val1" and "val2".  Extend NULL-handling to cover both inputs
symmetrically, while still requiring both to be non-NULL for a pass.
* selftest.h (assert_streq): Rename "expected" and "actual" to
"val1" and "val2".
(ASSERT_EQ): Likewise.
(ASSERT_EQ_AT): Likewise.
(ASSERT_KNOWN_EQ): Likewise.
(ASSERT_KNOWN_EQ_AT): Likewise.
(ASSERT_NE): Likewise.
(ASSERT_MAYBE_NE): Likewise.
(ASSERT_MAYBE_NE_AT): Likewise.
(ASSERT_STREQ): Likewise.  Clarify that both must be non-NULL for
the assertion to pass.
(ASSERT_STREQ_AT): Likewise.
---
 gcc/selftest.c | 39 --
 gcc/selftest.h | 66 +-
 2 files changed, 56 insertions(+), 49 deletions(-)

diff --git a/gcc/selftest.c b/gcc/selftest.c
index 5709110..74adc63 100644
--- a/gcc/selftest.c
+++ b/gcc/selftest.c
@@ -63,27 +63,34 @@ fail_formatted (const location , const char *fmt, ...)
 }
 
 /* Implementation detail of ASSERT_STREQ.
-   Compare val_expected and val_actual with strcmp.  They ought
-   to be non-NULL; fail gracefully if either are NULL.  */
+   Compare val1 and val2 with strcmp.  They ought
+   to be non-NULL; fail gracefully if either or both are NULL.  */
 
 void
 assert_streq (const location ,
- const char *desc_expected, const char *desc_actual,
- const char *val_expected, const char *val_actual)
+ const char *desc_val1, const char *desc_val2,
+ const char *val1, const char *val2)
 {
-  /* If val_expected is NULL, the test is buggy.  Fail gracefully.  */
-  if (val_expected == NULL)
-fail_formatted (loc, "ASSERT_STREQ (%s, %s) expected=NULL",
-   desc_expected, desc_actual);
-  /* If val_actual is NULL, fail with a custom error message.  */
-  if (val_actual == NULL)
-fail_formatted (loc, "ASSERT_STREQ (%s, %s) expected=\"%s\" actual=NULL",
-   desc_expected, desc_actual, val_expected);
-  if (strcmp (val_expected, val_actual) == 0)
-pass (loc, "ASSERT_STREQ");
+  /* If val1 or val2 are NULL, fail with a custom error message.  */
+  if (val1 == NULL)
+if (val2 == NULL)
+  fail_formatted (loc, "ASSERT_STREQ (%s, %s) val1=NULL val2=NULL",
+ desc_val1, desc_val2);
+else
+  fail_formatted (loc, "ASSERT_STREQ (%s, %s) val1=NULL val2=\"%s\"",
+ desc_val1, desc_val2, val2);
   else
-fail_formatted (loc, "ASSERT_STREQ (%s, %s) expected=\"%s\" actual=\"%s\"",
-   desc_expected, desc_actual, val_expected, val_actual);
+if (val2 == NULL)
+  fail_formatted (loc, "ASSERT_STREQ (%s, %s) val1=\"%s\" val2=NULL",
+ desc_val1, desc_val2, val1);
+else
+  {
+   if (strcmp (val1, val2) == 0)
+ pass (loc, "ASSERT_STREQ");
+   else
+ fail_formatted (loc, "ASSERT_STREQ (%s, %s) val1=\"%s\" val2=\"%s\"",
+ desc_val1, desc_val2, val1, val2);
+  }
 }
 
 /* Implementation detail of ASSERT_STR_CONTAINS.
diff --git a/gcc/selftest.h b/gcc/selftest.h
index fbc2bfe..fc47b2c 100644
--- a/gcc/selftest.h
+++ b/gcc/selftest.h
@@ -67,8 +67,8 @@ extern void fail_formatted (const location , const char 
*fmt, ...)
 /* Implementation detail of ASSERT_STREQ.  */
 
 extern void assert_streq (const 

[PATCH, rs6000] Add missing vec_max tests

2018-04-30 Thread Carl Love

GCC Maintainers:

The following patch adds tests for the vec_max builtin.

The patch for the test case was tested on

    powerpc64le-unknown-linux-gnu (Power 8 LE)
powerpc64-unknown-linux-gnu (Power 8 BE)
powerpc64le-unknown-linux-gnu (Power 9 LE).

 Please let me know if the patch looks OK for GCC mainline.

 Carl Love

-

gcc/testsuite/ChangeLog:

2018-04-27  Carl Love  
* gcc.target/powerpc/vsx-vector-6.h (foo): Add test for vec_max,
vec_trunc.
* gcc.target/powerpc/vsx-vector-6-le.c (dg-final): Update xxlor,
xxlnor, xvcmpgedp counts.
---
 gcc/testsuite/gcc.target/powerpc/vsx-vector-6-le.c | 12 
 gcc/testsuite/gcc.target/powerpc/vsx-vector-6.h| 16 
 2 files changed, 16 insertions(+), 12 deletions(-)

diff --git a/gcc/testsuite/gcc.target/powerpc/vsx-vector-6-le.c 
b/gcc/testsuite/gcc.target/powerpc/vsx-vector-6-le.c
index fe7eeb1..e02eadf 100644
--- a/gcc/testsuite/gcc.target/powerpc/vsx-vector-6-le.c
+++ b/gcc/testsuite/gcc.target/powerpc/vsx-vector-6-le.c
@@ -9,14 +9,10 @@
 /* { dg-final { scan-assembler-times "xvabsdp" 1 } } */
 /* { dg-final { scan-assembler-times "xvadddp" 1 } } */
 /* { dg-final { scan-assembler-times "xxlnor" 8 } } */
-/* We generate xxlor instructions for many reasons other than or'ing vector
-   operands or calling __builtin_vec_or(), which  means we cannot rely on
-   their usage counts being stable.  Therefore, we just ensure at least one
-   xxlor instruction was generated.  */
-/* { dg-final { scan-assembler "xxlor" } } */
-/* { dg-final { scan-assembler-times "xvcmpeqdp" 5 } } */
-/* { dg-final { scan-assembler-times "xvcmpgtdp" 8 } } */
-/* { dg-final { scan-assembler-times "xvcmpgedp" 6 } } */
+/* { dg-final { scan-assembler-times "xxlor" 25 } } */
+/* { dg-final { scan-assembler-times "xvcmpeqdp" 4 } } */
+/* { dg-final { scan-assembler-times "xvcmpgtdp" 7 } } */
+/* { dg-final { scan-assembler-times "xvcmpgedp" 7 } } */
 /* { dg-final { scan-assembler-times "xvrdpim" 1 } } */
 /* { dg-final { scan-assembler-times "xvmaddadp" 1 } } */
 /* { dg-final { scan-assembler-times "xvmsubadp" 1 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/vsx-vector-6.h 
b/gcc/testsuite/gcc.target/powerpc/vsx-vector-6.h
index 422f8a1..4819433 100644
--- a/gcc/testsuite/gcc.target/powerpc/vsx-vector-6.h
+++ b/gcc/testsuite/gcc.target/powerpc/vsx-vector-6.h
@@ -7,7 +7,9 @@
 void foo (vector double *out, vector double *in, vector long *p_l, vector bool 
long *p_b,
  vector unsigned char *p_uc, int *i, vector float *p_f,
  vector bool char *outbc, vector bool int *outbi,
- vector bool short *outbsi, vector int *outsi, vector unsigned int 
*outui)
+ vector bool short *outbsi, vector int *outsi,
+ vector unsigned int *outui, vector signed char *outsc,
+ vector unsigned char *outuc)
 {
   vector double in0 = in[0];
   vector double in1 = in[1];
@@ -20,6 +22,8 @@ void foo (vector double *out, vector double *in, vector long 
*p_l, vector bool l
   vector float inf0;
   vector float inf1;
   vector float inf2;
+  vector char inc0;
+  vector char inc1;
   vector bool char inbc0;
   vector bool char inbc1;
   vector bool short inbs0;
@@ -30,6 +34,7 @@ void foo (vector double *out, vector double *in, vector long 
*p_l, vector bool l
   vector unsigned short int inusi0, inusi1;
   vector signed int insi0, insi1;
   vector unsigned int inui0, inui1;
+  vector unsigned char inuc0, inuc1;
   
   *out++ = vec_abs (in0);
   *out++ = vec_add (in0, in1);
@@ -65,8 +70,6 @@ void foo (vector double *out, vector double *in, vector long 
*p_l, vector bool l
   *out++ = vec_or (inb, in0);
   *out++ = vec_perm (in0, in1, uc);
   *out++ = vec_rint (in0);
-  *out++ = vec_sel (in0, in1, inl);
-  *out++ = vec_sel (in0, in1, inb);
   *out++ = vec_sub (in0, in1);
   *out++ = vec_sqrt (in0);
   *out++ = vec_trunc (in0);
@@ -103,6 +106,7 @@ void foo (vector double *out, vector double *in, vector 
long *p_l, vector bool l
   *p_f++ = vec_nmsub (inf0, inf1, inf2);
   *p_f++ = vec_nmadd (inf0, inf1, inf2);
   *p_f++ = vec_or (inf0, inf1);
+  *p_f++ = vec_trunc (inf0);
   
   *out++ = vec_or (inbl0, in0);
   *out++ = vec_or (in0, inbl0);
@@ -113,6 +117,8 @@ void foo (vector double *out, vector double *in, vector 
long *p_l, vector bool l
   *outbc++ = vec_andc (inbc0, inbc1);
   *outbc++ = vec_or (inbc0, inbc1);
 
+  *outuc++ = vec_max (inuc0, inuc1);
+
   *outbi++ = vec_andc (inbi0, inbi1);
   *outbsi++ = vec_andc (inbs0, inbs1);
 
@@ -151,7 +157,9 @@ int main()
   vector bool short *outbsi;
   vector int *outsi;
   vector unsigned int *outui;
+  vector signed char *outsc;
+  vector unsigned char *outuc;
 
   foo (out, in, p_l, p_b, p_uc, i, p_f, outbc,
-   outbi, outbsi, outsi, outui);
+   outbi, outbsi, outsi, outui, outsc, outuc);
 }
-- 
2.7.4



[committed] selftest.h: fix alphabetization of per-source-file selftest declarations

2018-04-30 Thread David Malcolm
Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.

Committed to trunk as r259773.

gcc/ChangeLog:
* selftest.h: Fix alphabetization of per-source-file selftest
declarations.
---
 gcc/selftest.h | 14 +++---
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/gcc/selftest.h b/gcc/selftest.h
index e3117c6..fbc2bfe 100644
--- a/gcc/selftest.h
+++ b/gcc/selftest.h
@@ -186,36 +186,36 @@ class test_runner
alphabetical order.  */
 extern void attribute_c_tests ();
 extern void bitmap_c_tests ();
-extern void sbitmap_c_tests ();
 extern void diagnostic_c_tests ();
 extern void diagnostic_show_locus_c_tests ();
 extern void edit_context_c_tests ();
 extern void et_forest_c_tests ();
-extern void fold_const_c_tests ();
 extern void fibonacci_heap_c_tests ();
+extern void fold_const_c_tests ();
 extern void function_tests_c_tests ();
-extern void gimple_c_tests ();
 extern void ggc_tests_c_tests ();
+extern void gimple_c_tests ();
 extern void hash_map_tests_c_tests ();
 extern void hash_set_tests_c_tests ();
 extern void input_c_tests ();
+extern void predict_c_tests ();
 extern void pretty_print_c_tests ();
 extern void read_rtl_function_c_tests ();
 extern void rtl_tests_c_tests ();
+extern void sbitmap_c_tests ();
 extern void selftest_c_tests ();
+extern void simplify_rtx_c_tests ();
 extern void spellcheck_c_tests ();
 extern void spellcheck_tree_c_tests ();
 extern void sreal_c_tests ();
 extern void store_merging_c_tests ();
-extern void typed_splay_tree_c_tests ();
 extern void tree_c_tests ();
 extern void tree_cfg_c_tests ();
+extern void typed_splay_tree_c_tests ();
 extern void unique_ptr_tests_cc_tests ();
 extern void vec_c_tests ();
-extern void wide_int_cc_tests ();
-extern void predict_c_tests ();
-extern void simplify_rtx_c_tests ();
 extern void vec_perm_indices_c_tests ();
+extern void wide_int_cc_tests ();
 
 extern int num_passes;
 
-- 
1.8.5.3



[committed] Use char_span for return type of location_get_source_line

2018-04-30 Thread David Malcolm
location_get_source_line returns a const char * that isn't 0-terminated,
writing back a length through an int * param.

This is error-prone, as all call-sites have to take into account the
lack of 0-termination, and respect the length of the buffer.

It's cleaner to bundle together this pointer+length state into a class,
so this patch does so, reusing the "char_span" class that I introduced
in r250187 (as part of the fix for PR c/81405).

The patch also adds assertions to all access to the char_span.

Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.

Committed to trunk as r259768.

gcc/c-family/ChangeLog:
* c-format.c (get_corrected_substring): Update for
location_get_source_line returning a char_span.  Use a char_span
when handling the prefix of the correction.
* c-indentation.c (get_visual_column): Update for
location_get_source_line returning a char_span.
(get_first_nws_vis_column): Likewise.

gcc/ChangeLog:
* diagnostic-show-locus.c (layout::layout): Update for
location_get_source_line returning a char_span.
(struct char_span): Move to input.h.
(struct correction): Update for fields in char_span becoming
private.
(struct source_line): Update for location_get_source_line
returning a char_span.
(layout::print_line): Likewise.
* edit-context.c (edited_file::print_content): Likewise.
(edited_file::print_diff_hunk): Likewise.
(edited_file::print_run_of_changed_lines): Likewise.
(edited_file::get_num_lines): Likewise.
(edited_line::edited_line): Likewise.
* final.c (asm_show_source): Likewise.
* input.c (location_get_source_line): Convert return type
from const char * to char_span, losing the final "line_len"
param.
(dump_location_info): Update for the above.
(get_substring_ranges_for_loc): Likewise.  Use a char_span
when handling the literal within the line.
(test_reading_source_line): Update for location_get_source_line
returning a char_span.
* input.h (class char_span): Move here from
diagnostic-show-locus.c, converting from a struct to a class.
Make data members private.
(char_span::operator bool): New.
(char_span::length): New.
(char_span::get_buffer): New.
(char_span::operator[]): New.
(char_span::subspan): Make const.
(char_span::xstrdup): New.
(location_get_source_line): Convert return type from const char *
to char_span, losing the final "line_size" param.

gcc/testsuite/ChangeLog:
* gcc.dg/plugin/diagnostic_plugin_test_show_locus.c
(test_show_locus): Update for location_get_source_line returning a
char_span.  Use char_span for handling words in the
"test_many_nested_locations" fix-it example.
---
 gcc/c-family/c-format.c| 10 ++-
 gcc/c-family/c-indentation.c   |  9 +--
 gcc/diagnostic-show-locus.c| 52 +--
 gcc/edit-context.c | 31 -
 gcc/final.c|  7 +-
 gcc/input.c| 76 ++
 gcc/input.h| 48 +-
 .../plugin/diagnostic_plugin_test_show_locus.c | 14 ++--
 8 files changed, 124 insertions(+), 123 deletions(-)

diff --git a/gcc/c-family/c-format.c b/gcc/c-family/c-format.c
index 3f4f83a..ee7c33d 100644
--- a/gcc/c-family/c-format.c
+++ b/gcc/c-family/c-format.c
@@ -3499,10 +3499,8 @@ get_corrected_substring (const substring_loc _loc,
   if (caret.column > finish.column)
 return NULL;
 
-  int line_width;
-  const char *line = location_get_source_line (start.file, start.line,
-  _width);
-  if (line == NULL)
+  char_span line = location_get_source_line (start.file, start.line);
+  if (!line)
 return NULL;
 
   /* If we got this far, then we have the line containing the
@@ -3511,9 +3509,9 @@ get_corrected_substring (const substring_loc _loc,
  Generate a trimmed copy, containing the prefix part of the conversion
  specification, up to the (but not including) the length modifier.
  In the above example, this would be "%-+*.*".  */
-  const char *current_content = line + start.column - 1;
   int length_up_to_type = caret.column - start.column;
-  char *prefix = xstrndup (current_content, length_up_to_type);
+  char_span prefix_span = line.subspan (start.column - 1, length_up_to_type);
+  char *prefix = prefix_span.xstrdup ();
 
   /* Now attempt to generate a suggestion for the rest of the specification
  (length modifier and conversion char), based on ARG_TYPE and
diff --git a/gcc/c-family/c-indentation.c b/gcc/c-family/c-indentation.c
index acca444..44b1e1e 100644
--- a/gcc/c-family/c-indentation.c
+++ 

[committed] input.h: use STATIC_ASSERT

2018-04-30 Thread David Malcolm
Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.

Committed to trunk as r259766.

gcc/ChangeLog:
* input.h (builtins_location_check): Convert to a STATIC_ASSERT.
---
 gcc/input.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/gcc/input.h b/gcc/input.h
index 67e4e7c..2569f85 100644
--- a/gcc/input.h
+++ b/gcc/input.h
@@ -34,8 +34,7 @@ const source_location BUILTINS_LOCATION = ((source_location) 
1);
 
 /* line-map.c reserves RESERVED_LOCATION_COUNT to the user.  Ensure
both UNKNOWN_LOCATION and BUILTINS_LOCATION fit into that.  */
-extern char builtins_location_check[(BUILTINS_LOCATION
-< RESERVED_LOCATION_COUNT) ? 1 : -1];
+STATIC_ASSERT (BUILTINS_LOCATION < RESERVED_LOCATION_COUNT);
 
 extern bool is_location_from_builtin_token (source_location);
 extern expanded_location expand_location (source_location);
-- 
1.8.5.3



Re: [PATCH] Prevent excessive loop-header copying with multiple exits

2018-04-30 Thread Kyrill Tkachov

Hi Richard,


On 27/04/18 09:24, Richard Biener wrote:

On Thu, 26 Apr 2018, Richard Biener wrote:

>
> The following makes loop-header copying stop after the first exit test
> it copied.  The reports rightfully complain about too much peeling.
> If some cases pop up which show we should peel up to a specific test
> we need to improve this heuristic which simply errs on the easy side.
>
> Bootstrap & regtest running on x86_64-unknown-linux-gnu.

Bootstrap went ok but it showed a few required testsuite adjustments.

gcc.dg/tree-ssa/cunroll-13.c looks fragile (it was a test for a
profile mismatch), so I rewrote it as a GIMPLE testcase.

With the ivopt_mult_[12].c testcases IVOPTs no longer elminiates
an IV due to the change in IL -- what loop header copying does now
looks more sensible than before though, so I added GIMPLE testcase
variants that verify IVOPTs can still pull off the trick but I
had to XFAIL the C testcases (less IVs still look good and I fail
to see why the trick shouldn't work with the new IL - sth to
investigate).

Re-bootstrapping and testing on x86_64-unknown-linux-gnu now.



After this patch I'm seeing gfortran.dg/pr51434.f90 FAIL on aarch64 at -O1
It is an execution failure.

The current trunk removes almost all of main (final optimised GIMPLE dump):

__attribute__((externally_visible))
main (integer(kind=4) argc, character(kind=1) * * argv)
{
  static integer(kind=4) options.3[7] = {68, 8191, 0, 1, 1, 0, 31};

   [local count: 1073741826]:
  _gfortran_set_args (argc_2(D), argv_3(D));
  _gfortran_set_options (7, [0]);
  return 0;

}

whereas trunk on the 28th had much more control flow in it:

__attribute__((externally_visible))
main (integer(kind=4) argc, character(kind=1) * * argv)
{
  unsigned long ivtmp.5;
  static struct a c = {.m=5, .t={"a", "b", "c", "d", "e", " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", 
" ", " ", " "}};
  static integer(kind=4) options.3[7] = {68, 8191, 0, 1, 1, 0, 31};
  character(kind=1) _8;
  character(kind=1)[1:1] * _9;
  character(kind=1) _10;

   [local count: 1073741826]:
  _gfortran_set_args (argc_2(D), argv_3(D));
  _gfortran_set_options (7, [0]);

   [local count: 4208275768]:
  # ivtmp.5_12 = PHI <0(2), ivtmp.5_15(4)>
  _8 = MEM[symbol: c, index: ivtmp.5_12, offset: 4B];
  _9 = MEM[symbol: A.1, index: ivtmp.5_12, step: 8, offset: 0B];
  _10 = *_9[1]{lb: 1 sz: 1};
  if (_8 != _10)
goto ; [5.50%]
  else
goto ; [94.50%]

   [local count: 3976820602]:
  ivtmp.5_15 = ivtmp.5_12 + 1;
  if (ivtmp.5_15 == 5)
goto ; [16.67%]
  else
goto ; [83.33%]

   [local count: 1072883003]:
  return 0;

   [local count: 429327]:
  _gfortran_stop_numeric (2, 0);

}

Cheers,
Kyrill


Richard.

2018-04-26  Richard Biener  

PR tree-optimization/28364
PR tree-optimization/85275
* tree-ssa-loop-ch.c (ch_base::copy_headers): Stop after
copying first exit test.

* gcc.dg/tree-ssa/copy-headers-5.c: New testcase.
* gcc.dg/tree-ssa/predcom-8.c: Likewise.
* gcc.dg/tree-ssa/cunroll-13.c: Rewrite to gimple testcase.
* gcc.dg/tree-ssa/ivopt_mult_1.c: XFAIL.
* gcc.dg/tree-ssa/ivopt_mult_1g.c: Add gimple variant that
still passes.
* gcc.dg/tree-ssa/ivopt_mult_2.c: XFAIL.
* gcc.dg/tree-ssa/ivopt_mult_2g.c: Add gimple variant that
still passes.
* gcc.dg/tree-ssa/ssa-dom-thread-7.c: Adjust.
* gcc.dg/tree-ssa/20030710-1.c: Likewise.
* gcc.dg/tree-ssa/20030711-1.c: Likewise.

Index: gcc/tree-ssa-loop-ch.c
===
--- gcc/tree-ssa-loop-ch.c  (revision 259695)
+++ gcc/tree-ssa-loop-ch.c  (working copy)
@@ -340,6 +340,11 @@ ch_base::copy_headers (function *fun)
   bbs[n_bbs++] = header;
   gcc_assert (bbs_size > n_bbs);
   header = exit->dest;
+ /* Make sure to stop copying after we copied the first exit test.
+Without further heuristics we do not want to rotate the loop
+any further.  */
+ if (loop_exits_from_bb_p (loop, exit->src))
+   break;
 }

   if (!exit)
Index: gcc/testsuite/gcc.dg/tree-ssa/copy-headers-5.c
===
--- gcc/testsuite/gcc.dg/tree-ssa/copy-headers-5.c (nonexistent)
+++ gcc/testsuite/gcc.dg/tree-ssa/copy-headers-5.c (working copy)
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-ch2-details" } */
+
+int is_sorted(int *a, int n)
+{
+  for (int i = 0; i < n - 1; i++)
+if (a[i] > a[i + 1])
+  return 0;
+  return 1;
+}
+
+/* Verify we apply loop header copying but only copy the IV test and
+   not the alternate exit test.  */
+
+/* { dg-final { scan-tree-dump "is now do-while loop" "ch2" } } */
+/* { dg-final { scan-tree-dump-times "  if " 3 "ch2" } } */
Index: gcc/testsuite/gcc.dg/tree-ssa/predcom-8.c
===

[PATCH][GCC][AArch64] Correct 3 way XOR instructions adding missing patterns.

2018-04-30 Thread Tamar Christina
Hi All,

This patch adds the missing neon intrinsics for all 128 bit vector Integer 
modes for the
three-way XOR and negate and xor instructions for Arm8.2-a to Armv8.4-a.

Bootstrapped and regtested on aarch64-none-linux-gnue and no issues.

Ok for master? And for backport to the GCC-8 branch?

gcc/
2018-04-30  Tamar Christina  

* config/aarch64/aarch64-simd.md (aarch64_eor3qv8hi): Change to
eor3q4.
(aarch64_bcaxqv8hi): Change to bcaxq4.
* config/aarch64/aarch64-simd-builtins.def (veor3q_u8, veor3q_u32,
veor3q_u64, veor3q_s8, veor3q_s16, veor3q_s32, veor3q_s64, vbcaxq_u8,
vbcaxq_u32, vbcaxq_u64, vbcaxq_s8, vbcaxq_s16, vbcaxq_s32,
vbcaxq_s64): New.
* config/aarch64/arm_neon.h: Likewise.
* config/aarch64/iterators.md (VQ_I): New.

gcc/testsuite/
2018-04-30  Tamar Christina  

* gcc.target/gcc.target/aarch64/sha3.h (veor3q_u8, veor3q_u32,
veor3q_u64, veor3q_s8, veor3q_s16, veor3q_s32, veor3q_s64, vbcaxq_u8,
vbcaxq_u32, vbcaxq_u64, vbcaxq_s8, vbcaxq_s16, vbcaxq_s32,
vbcaxq_s64): New.
* gcc.target/gcc.target/aarch64/sha3_1.c: Likewise.
* gcc.target/gcc.target/aarch64/sha3_1.c: Likewise.
* gcc.target/gcc.target/aarch64/sha3_1.c: Likewise.

Thanks,
Tamar

-- 
diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def
index b383f2485e5a287c6d833122d6be0c9ff2ef72a2..439d4837fe724b33d4c1bd834570fb464f47eb5b 100644
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -599,14 +599,16 @@
   VAR1 (BINOPU, crypto_sha512su0q, 0, v2di)
   /* Implemented by aarch64_crypto_sha512su1qv2di.  */
   VAR1 (TERNOPU, crypto_sha512su1q, 0, v2di)
-  /* Implemented by aarch64_eor3qv8hi.  */
-  VAR1 (TERNOPU, eor3q, 0, v8hi)
+  /* Implemented by eor3q4.  */
+  BUILTIN_VQ_I (TERNOPU, eor3q, 4)
+  BUILTIN_VQ_I (TERNOP, eor3q, 4)
   /* Implemented by aarch64_rax1qv2di.  */
   VAR1 (BINOPU, rax1q, 0, v2di)
   /* Implemented by aarch64_xarqv2di.  */
   VAR1 (TERNOPUI, xarq, 0, v2di)
-  /* Implemented by aarch64_bcaxqv8hi.  */
-  VAR1 (TERNOPU, bcaxq, 0, v8hi)
+  /* Implemented by bcaxq4.  */
+  BUILTIN_VQ_I (TERNOPU, bcaxq, 4)
+  BUILTIN_VQ_I (TERNOP, bcaxq, 4)
 
   /* Implemented by aarch64_fmll_low.  */
   VAR1 (TERNOP, fmlal_low, 0, v2sf)
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 1154fc3d58deaa33413ea3050ff7feec37f092a6..12fea393fa74f04a61c0c81342898dfc0e7228b5 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -5955,13 +5955,13 @@
 
 ;; sha3
 
-(define_insn "aarch64_eor3qv8hi"
-  [(set (match_operand:V8HI 0 "register_operand" "=w")
-	(xor:V8HI
-	 (xor:V8HI
-	  (match_operand:V8HI 2 "register_operand" "%w")
-	  (match_operand:V8HI 3 "register_operand" "w"))
-	 (match_operand:V8HI 1 "register_operand" "w")))]
+(define_insn "eor3q4"
+  [(set (match_operand:VQ_I 0 "register_operand" "=w")
+	(xor:VQ_I
+	 (xor:VQ_I
+	  (match_operand:VQ_I 2 "register_operand" "%w")
+	  (match_operand:VQ_I 3 "register_operand" "w"))
+	 (match_operand:VQ_I 1 "register_operand" "w")))]
   "TARGET_SIMD && TARGET_SHA3"
   "eor3\\t%0.16b, %1.16b, %2.16b, %3.16b"
   [(set_attr "type" "crypto_sha3")]
@@ -5991,13 +5991,13 @@
   [(set_attr "type" "crypto_sha3")]
 )
 
-(define_insn "aarch64_bcaxqv8hi"
-  [(set (match_operand:V8HI 0 "register_operand" "=w")
-	(xor:V8HI
-	 (and:V8HI
-	  (not:V8HI (match_operand:V8HI 3 "register_operand" "w"))
-	  (match_operand:V8HI 2 "register_operand" "w"))
-	 (match_operand:V8HI 1 "register_operand" "w")))]
+(define_insn "bcaxq4"
+  [(set (match_operand:VQ_I 0 "register_operand" "=w")
+	(xor:VQ_I
+	 (and:VQ_I
+	  (not:VQ_I (match_operand:VQ_I 3 "register_operand" "w"))
+	  (match_operand:VQ_I 2 "register_operand" "w"))
+	 (match_operand:VQ_I 1 "register_operand" "w")))]
   "TARGET_SIMD && TARGET_SHA3"
   "bcax\\t%0.16b, %1.16b, %2.16b, %3.16b"
   [(set_attr "type" "crypto_sha3")]
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index c45c29ae815c9ff373eb2f57a77ebeda910a30cf..4ff76b4133959ae598468dff2554db37a0d07a62 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -32068,6 +32068,13 @@ vsha512su1q_u64 (uint64x2_t __a, uint64x2_t __b, uint64x2_t __c)
   return __builtin_aarch64_crypto_sha512su1qv2di_ (__a, __b, __c);
 }
 
+__extension__ extern __inline uint8x16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+veor3q_u8 (uint8x16_t __a, uint8x16_t __b, uint8x16_t __c)
+{
+  return __builtin_aarch64_eor3qv16qi_ (__a, __b, __c);
+}
+
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 veor3q_u16 (uint16x8_t __a, uint16x8_t __b, uint16x8_t __c)
@@ -32075,6 +32082,49 @@ veor3q_u16 (uint16x8_t __a, uint16x8_t __b, uint16x8_t __c)
   

[og7, openacc, testsuite, committed] Reduce resource usage for Titan V in parallel-dims.c

2018-04-30 Thread Tom de Vries

Hi,

atm parallel-dims.c fails on og7 with Titan V due to too few resources.

This patch reduces the amount of resources used for one offloading 
region, and moves another to a link-only test-case.


This allows the test-case to pass.

Committed to og7.

Thanks,
- Tom
[openacc, testsuite] Reduce resource usage for Titan V in parallel-dims.c

2018-04-30  Tom de Vries  

	* testsuite/libgomp.oacc-c-c++-common/parallel-dims-compile.c: New test,
	factored out of ...
	* testsuite/libgomp.oacc-c-c++-common/parallel-dims.c (main): ... here.
	Limit num_workers to avoid insufficient-resources-to-launch fatal error.

---
 .../parallel-dims-compile.c| 100 +
 .../libgomp.oacc-c-c++-common/parallel-dims.c  |  44 ++---
 2 files changed, 107 insertions(+), 37 deletions(-)

diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims-compile.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims-compile.c
new file mode 100644
index 000..2d7fdbd
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims-compile.c
@@ -0,0 +1,100 @@
+/* { dg-do "link" } */
+/* { dg-additional-options "-foffload-force" } */
+
+#include 
+#include 
+
+/* TODO: "(int) acc_device_*" casts because of the C++ acc_on_device wrapper
+   not behaving as expected for -O0.  */
+#pragma acc routine seq
+static unsigned int __attribute__ ((optimize ("O2"))) acc_gang ()
+{
+  if (acc_on_device ((int) acc_device_host))
+return 0;
+  else if (acc_on_device ((int) acc_device_nvidia))
+{
+  unsigned int r;
+  asm volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (r));
+  return r;
+}
+  else
+__builtin_abort ();
+}
+
+#pragma acc routine seq
+static unsigned int __attribute__ ((optimize ("O2"))) acc_worker ()
+{
+  if (acc_on_device ((int) acc_device_host))
+return 0;
+  else if (acc_on_device ((int) acc_device_nvidia))
+{
+  unsigned int r;
+  asm volatile ("mov.u32 %0,%%tid.y;" : "=r" (r));
+  return r;
+}
+  else
+__builtin_abort ();
+}
+
+#pragma acc routine seq
+static unsigned int __attribute__ ((optimize ("O2"))) acc_vector ()
+{
+  if (acc_on_device ((int) acc_device_host))
+return 0;
+  else if (acc_on_device ((int) acc_device_nvidia))
+{
+  unsigned int r;
+  asm volatile ("mov.u32 %0,%%tid.x;" : "=r" (r));
+  return r;
+}
+  else
+__builtin_abort ();
+}
+
+
+int main ()
+{
+  acc_init (acc_device_default);
+
+  /* GR, WP, VS.  */
+  {
+/* We try with an outrageously large value. */
+#define WORKERS 2 << 20
+int workers_actual = WORKERS;
+int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
+gangs_min = workers_min = vectors_min = INT_MAX;
+gangs_max = workers_max = vectors_max = INT_MIN;
+#pragma acc parallel copy (workers_actual) /* { dg-warning "using num_workers \\(32\\), ignoring 2097152" "" { target openacc_nvidia_accel_selected } } */ \
+  num_workers (WORKERS)
+{
+  if (acc_on_device (acc_device_host))
+	{
+	  /* We're actually executing with num_workers (1).  */
+	  workers_actual = 1;
+	}
+  else if (acc_on_device (acc_device_nvidia))
+	{
+	  /* The GCC nvptx back end enforces num_workers (32).  */
+	  workers_actual = 32;
+	}
+  else
+	__builtin_abort ();
+#pragma acc loop worker reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
+  for (int i = 100 * workers_actual; i > -100 * workers_actual; --i)
+	{
+	  gangs_min = gangs_max = acc_gang ();
+	  workers_min = workers_max = acc_worker ();
+	  vectors_min = vectors_max = acc_vector ();
+	}
+}
+if (workers_actual < 1)
+  __builtin_abort ();
+if (gangs_min != 0 || gangs_max != 0
+	|| workers_min != 0 || workers_max != workers_actual - 1
+	|| vectors_min != 0 || vectors_max != 0)
+  __builtin_abort ();
+#undef WORKERS
+  }
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c
index 1dd6353..1498fb4 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c
@@ -273,42 +273,11 @@ int main ()
 
   /* GR, WP, VS.  */
   {
-/* We try with an outrageously large value. */
-#define WORKERS 2 << 20
-int workers_actual = WORKERS;
-int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
-gangs_min = workers_min = vectors_min = INT_MAX;
-gangs_max = workers_max = vectors_max = INT_MIN;
-#pragma acc parallel copy (workers_actual) /* { dg-warning "using num_workers \\(32\\), ignoring 2097152" "" { target openacc_nvidia_accel_selected } } */ \
-  num_workers (WORKERS)
-{
-  if (acc_on_device (acc_device_host))
-	{
-	  /* We're actually executing with num_workers (1).  */
-	  workers_actual = 1;
-	}
-  else if (acc_on_device (acc_device_nvidia))
-	{
-	  

[og7, libgomp, nvptx, committed] Fix too-many-resources fatal error condition and message

2018-04-30 Thread Tom de Vries

Hi,

atm parallel-dims.c fails on Titan-V, with a cuda launch failure:
...
libgomp: cuLaunchKernel error: too many resources requested for launch
...

We've got a check in the libgomp nvptx plugin to prevent the cuda launch 
failure and give a more informative error message:

...
 /* Check if the accelerator has sufficient hardware resources to 


 launch the offloaded kernel.  */
  if (dims[GOMP_DIM_WORKER] > 1)
{
  if (reg_granularity > 0
  && dims[GOMP_DIM_WORKER] > threads_per_block)
GOMP_PLUGIN_fatal
  ("The Nvidia accelerator has insufficient resources "
   "to launch '%s'; recompile the program with "
   "'num_workers = %d' on that offloaded region or "
   "'-fopenacc-dim=-:%d'.\n",
   targ_fn->launch->fn, threads_per_block,
   threads_per_block);
}
...

The message doesn't trigger, because reg_granularity == -1.
This value comes from dev->register_allocation_granularity which 
defaults to -1 because libgomp does not have a hardcoded constant for 
sm_70. The hardcoded constants that are present match 'Warp Allocation 
Granularity' in the GPU Data table in CUDA_Occupancy_calculator.xls, but 
AFAICT there's no column published yet for sm_70.


Furthermore, the comparison to threads_per_block is not correct. What we 
want here is the maximum amount of threads per block, while the 
threads_per_block variable contains an approximation of that, and the 
exact amount required is already available from the CUDA runtime and 
stored at targ_fn->max_threads_per_block.


Then, the comparison to dims[GOMP_DIM_WORKER] is incorrect. It used to 
be correct before "[nvptx] Handle large vectors in libgomp" when we used 
to do "threads_per_block /= warp_size", but now we need to compare 
against dims[GOMP_DIM_WORKER] * dims[GOMP_DIM_VECTOR].


Finally, the message has not been updated to reflect that vector length 
can be larger than 32.


The patch addresses these issues.

Committed to og7.

Thanks,
- Tom
[libgomp, nvptx] Fix too-many-resources fatal error condition and message

2018-04-30  Tom de Vries  

	* plugin/plugin-nvptx.c (nvptx_exec): Fix
	insufficient-resources-to-launch fatal error condition and message.

---
 libgomp/plugin/plugin-nvptx.c | 19 +--
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/libgomp/plugin/plugin-nvptx.c b/libgomp/plugin/plugin-nvptx.c
index 9b4768f..3c00555 100644
--- a/libgomp/plugin/plugin-nvptx.c
+++ b/libgomp/plugin/plugin-nvptx.c
@@ -834,16 +834,15 @@ nvptx_exec (void (*fn), size_t mapnum, void **hostaddrs, void **devaddrs,
 
   /* Check if the accelerator has sufficient hardware resources to
  launch the offloaded kernel.  */
-  if (dims[GOMP_DIM_WORKER] > 1)
-{
-  if (reg_granularity > 0 && dims[GOMP_DIM_WORKER] > threads_per_block)
-	GOMP_PLUGIN_fatal ("The Nvidia accelerator has insufficient resources "
-			   "to launch '%s'; recompile the program with "
-			   "'num_workers = %d' on that offloaded region or "
-			   "'-fopenacc-dim=-:%d'.\n",
-			   targ_fn->launch->fn, threads_per_block,
-			   threads_per_block);
-}
+  if (dims[GOMP_DIM_WORKER] * dims[GOMP_DIM_VECTOR]
+  > targ_fn->max_threads_per_block)
+GOMP_PLUGIN_fatal ("The Nvidia accelerator has insufficient resources to"
+		   " launch '%s' with num_workers = %d and vector_length ="
+		   " %d; recompile the program with 'num_workers = x and"
+		   " vector_length = y' on that offloaded region or "
+		   "'-fopenacc-dim=-:x:y' where x * y <= %d.\n",
+		   targ_fn->launch->fn, dims[GOMP_DIM_WORKER],
+		   dims[GOMP_DIM_VECTOR], targ_fn->max_threads_per_block);
 
   GOMP_PLUGIN_debug (0, "  %s: kernel %s: launch"
 		 " gangs=%u, workers=%u, vectors=%u\n",


RE: [PATCH 5/5] [ARC] Clear the instruction cache using syscalls.

2018-04-30 Thread Claudiu Zissulescu
committed. Thank you for your review,
Claudiu

From: Andrew Burgess [andrew.burg...@embecosm.com]
Sent: Friday, April 27, 2018 11:40 PM
To: Claudiu Zissulescu
Cc: gcc-patches@gcc.gnu.org; francois.bed...@synopsys.com
Subject: Re: [PATCH 5/5] [ARC] Clear the instruction cache using syscalls.

* Claudiu Zissulescu  [2018-04-06 11:00:14 
+0200]:

> Clear the instruction cache from `beg' to `end'.  This makes an inline
> system call to SYS_cacheflush.
>
> gcc/
> 2017-03-28  Claudiu Zissulescu  
>
>   * config/arc/linux.h (CLEAR_INSN_CACHE): Define.


Looks good.

Thanks,
Andrew

> ---
>  gcc/config/arc/linux.h | 14 ++
>  1 file changed, 14 insertions(+)
>
> diff --git a/gcc/config/arc/linux.h b/gcc/config/arc/linux.h
> index 4e87dfe..96d548e 100644
> --- a/gcc/config/arc/linux.h
> +++ b/gcc/config/arc/linux.h
> @@ -109,3 +109,17 @@ along with GCC; see the file COPYING3.  If not see
>  /* Build attribute: procedure call standard.  */
>  #undef ATTRIBUTE_PCS
>  #define ATTRIBUTE_PCS 3
> +
> +/* Clear the instruction cache from `beg' to `end'.  This makes an
> +   inline system call to SYS_cacheflush.  */
> +#undef CLEAR_INSN_CACHE
> +#define CLEAR_INSN_CACHE(beg, end)   \
> +{\
> +  register unsigned long _beg __asm ("r0") = (unsigned long) (beg);  \
> +  register unsigned long _end __asm ("r1") = (unsigned long) (end);  \
> +  register unsigned long _xtr __asm ("r2") = 0;  
> \
> +  register unsigned long _scno __asm ("r8") = 244;   \
> +  __asm __volatile ("trap_s 0; sys_cache_sync"   
> \
> + : "=r" (_beg)   \
> + : "0" (_beg), "r" (_end), "r" (_xtr), "r" (_scno)); \
> +}
> --
> 1.9.1
>


RE: [PATCH 4/5] [ARC] Cleanup sdata handling.

2018-04-30 Thread Claudiu Zissulescu
committed. Thank you for your review,
Claudiu

From: Andrew Burgess [andrew.burg...@embecosm.com]
Sent: Friday, April 27, 2018 11:39 PM
To: Claudiu Zissulescu
Cc: gcc-patches@gcc.gnu.org; francois.bed...@synopsys.com; Claudiu Zissulescu
Subject: Re: [PATCH 4/5] [ARC] Cleanup sdata handling.

* Claudiu Zissulescu  [2018-04-06 11:00:13 
+0200]:

> From: Claudiu Zissulescu 
>
> Clean up how we handle small data load/store operations. This patch clears 
> -flto-fat-lto-object LTO related errors.
>
> gcc/
> 2018-01-18  Claudiu Zissulescu  
>
>   * config/arc/arc-protos.h (prepare_extend_operands): Remove.
>   (small_data_pattern): Likewise.
>   (arc_rewrite_small_data): Likewise.
>   * config/arc/arc.c (LEGITIMATE_SMALL_DATA_OFFSET_P): Remove.
>   (LEGITIMATE_SMALL_DATA_ADDRESS_P): Likewise.
>   (get_symbol_alignment): New function.
>   (legitimate_small_data_address_p): Likewise.
>   (legitimate_scaled_address): Update, call
>   legitimate_small_data_address_p.
>   (output_sdata): New static variable.
>   (arc_print_operand): Update how we handle small data operands.
>   (arc_print_operand_address): Likewise.
>   (arc_legitimate_address_p): Update, use
>   legitimate_small_data_address_p.
>   (arc_rewrite_small_data_p): Remove.
>   (arc_rewrite_small_data_1): Likewise.
>   (arc_rewrite_small_data): Likewise.
>   (small_data_pattern): Likewise.
>   (compact_sda_memory_operand): Update to use
>   legitimate_small_data_address_p and get_symbol_alignment.
>   (prepare_move_operands): Don't rewite sdata pattern.
>   (prepare_extend_operands): Remove.
>   * config/arc/arc.md (zero_extendqihi2): Don't rewrite sdata
>   pattern.
>   (zero_extendqisi2): Likewise.
>   (zero_extendhisi2): Likewise.
>   (extendqihi2): Likewise.
>   (extendqisi2): Likewise.
>   (extendhisi2): Likewise.
>   (addsi3): Likewise.
>   (subsi3): Likewise.
>   (andsi3): Likewise.
>   * config/arc/constraints.md (Usd): Change it to memory constraint.
>
> gcc/testsuite
> 2018-01-18  Claudiu Zissulescu  
>
>   * gcc.target/arc/interrupt-8.c: Update test.
>   * gcc.target/arc/loop-4.c: Likewise.
>   * gcc.target/arc/loop-hazard-1.c: Likewise.
>   * gcc.target/arc/sdata-3.c: Likewise.

Looks like a good clean up.

Thanks,
Andrew

> ---
>  gcc/config/arc/arc-protos.h  |   4 -
>  gcc/config/arc/arc.c | 309 
> ---
>  gcc/config/arc/arc.md|  22 +-
>  gcc/config/arc/constraints.md|   6 +-
>  gcc/testsuite/gcc.target/arc/interrupt-8.c   |   5 +-
>  gcc/testsuite/gcc.target/arc/loop-4.c|   2 +-
>  gcc/testsuite/gcc.target/arc/loop-hazard-1.c |   2 +-
>  gcc/testsuite/gcc.target/arc/sdata-3.c   |   8 +-
>  8 files changed, 110 insertions(+), 248 deletions(-)
>
> diff --git a/gcc/config/arc/arc-protos.h b/gcc/config/arc/arc-protos.h
> index 0ba6871..67f3b4e 100644
> --- a/gcc/config/arc/arc-protos.h
> +++ b/gcc/config/arc/arc-protos.h
> @@ -33,8 +33,6 @@ extern void arc_print_operand (FILE *, rtx, int);
>  extern void arc_print_operand_address (FILE *, rtx);
>  extern void arc_final_prescan_insn (rtx_insn *, rtx *, int);
>  extern const char *arc_output_libcall (const char *);
> -extern bool prepare_extend_operands (rtx *operands, enum rtx_code code,
> -  machine_mode omode);
>  extern int arc_output_addsi (rtx *operands, bool, bool);
>  extern int arc_output_commutative_cond_exec (rtx *operands, bool);
>  extern bool arc_expand_movmem (rtx *operands);
> @@ -65,8 +63,6 @@ extern bool arc_raw_symbolic_reference_mentioned_p (rtx, 
> bool);
>  extern bool arc_is_longcall_p (rtx);
>  extern bool arc_is_shortcall_p (rtx);
>  extern bool valid_brcc_with_delay_p (rtx *);
> -extern bool small_data_pattern (rtx , machine_mode);
> -extern rtx arc_rewrite_small_data (rtx);
>  extern bool arc_ccfsm_cond_exec_p (void);
>  struct secondary_reload_info;
>  extern int arc_register_move_cost (machine_mode, enum reg_class,
> diff --git a/gcc/config/arc/arc.c b/gcc/config/arc/arc.c
> index 2ccdce8..2ce1744 100644
> --- a/gcc/config/arc/arc.c
> +++ b/gcc/config/arc/arc.c
> @@ -96,22 +96,6 @@ HARD_REG_SET overrideregs;
> ? 0 \
> : -(-GET_MODE_SIZE (MODE) | -4) >> 1)))
>
> -#define LEGITIMATE_SMALL_DATA_OFFSET_P(X)\
> -  (GET_CODE (X) == CONST \
> -   && GET_CODE (XEXP ((X), 0)) == PLUS   
> \
> -   && GET_CODE (XEXP (XEXP ((X), 0), 0)) == SYMBOL_REF   
> \
> -   && SYMBOL_REF_SMALL_P (XEXP (XEXP ((X), 0), 0))   \
> -   && GET_CODE (XEXP(XEXP ((X), 0), 1)) == CONST_INT 

RE: [PATCH 3/5] [ARC] Update movhi and movdi patterns.

2018-04-30 Thread Claudiu Zissulescu
committed. Thank you for your review,
Claudiu

From: Andrew Burgess [andrew.burg...@embecosm.com]
Sent: Friday, April 27, 2018 11:27 PM
To: Claudiu Zissulescu
Cc: gcc-patches@gcc.gnu.org; francois.bed...@synopsys.com; Claudiu Zissulescu
Subject: Re: [PATCH 3/5] [ARC] Update movhi and movdi patterns.

* Claudiu Zissulescu  [2018-04-06 11:00:12 
+0200]:

> From: Claudiu Zissulescu 
>
> Allow signed 6-bit short immediates into st[d] instructions.
>
> 2017-10-19  Claudiu Zissulescu  
>
>   * config/arc/arc.c (arc_split_move): Allow signed 6-bit constants
>   as source of std instructions.
>   * config/arc/arc.md (movsi_insn): Update pattern predicate to
>   allow 6-bit constants as source for store instructions.
>   (movdi_insn): Update instruction pattern to allow 6-bit constants
>   as source for store instructions.
>
> testsuite/
> 2017-10-19  Claudiu Zissulescu  
>
>   * gcc.target/arc/store-merge-1.c: New test.
>   * gcc.target/arc/add_n-combine.c: Update test.

Looks good thanks,

Andrew


> ---
>  gcc/config/arc/arc.c |  3 ++-
>  gcc/config/arc/arc.md| 25 +
>  gcc/testsuite/gcc.target/arc/add_n-combine.c |  2 +-
>  gcc/testsuite/gcc.target/arc/store-merge-1.c | 17 +
>  4 files changed, 33 insertions(+), 14 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/arc/store-merge-1.c
>
> diff --git a/gcc/config/arc/arc.c b/gcc/config/arc/arc.c
> index 47d3ba4..2ccdce8 100644
> --- a/gcc/config/arc/arc.c
> +++ b/gcc/config/arc/arc.c
> @@ -9669,7 +9669,8 @@ arc_split_move (rtx *operands)
>
>if (TARGET_LL64
>&& ((memory_operand (operands[0], mode)
> -&& even_register_operand (operands[1], mode))
> +&& (even_register_operand (operands[1], mode)
> +|| satisfies_constraint_Cm3 (operands[1])))
> || (memory_operand (operands[1], mode)
> && even_register_operand (operands[0], mode
>  {
> diff --git a/gcc/config/arc/arc.md b/gcc/config/arc/arc.md
> index ffd9d5b..0fc7aba 100644
> --- a/gcc/config/arc/arc.md
> +++ b/gcc/config/arc/arc.md
> @@ -740,7 +740,9 @@ archs4x, archs4xd, archs4xd_slow"
> /* Don't use a LIMM that we could load with a single insn - we loose
> delay-slot filling opportunities.  */
> && !satisfies_constraint_I (operands[1])
> -   && satisfies_constraint_Usc (operands[0]))"
> +   && satisfies_constraint_Usc (operands[0]))
> +   || (satisfies_constraint_Cm3 (operands[1])
> +  && memory_operand (operands[0], SImode))"
>"@
> mov%? %0,%1%& ;0
> mov%? %0,%1%& ;1
> @@ -1237,10 +1239,12 @@ archs4x, archs4xd, archs4xd_slow"
>")
>
>  (define_insn_and_split "*movdi_insn"
> -  [(set (match_operand:DI 0 "move_dest_operand"  "=w, w,r,m")
> - (match_operand:DI 1 "move_double_src_operand" "c,Hi,m,c"))]
> +  [(set (match_operand:DI 0 "move_dest_operand"  "=w, w,r,   m")
> + (match_operand:DI 1 "move_double_src_operand" "c,Hi,m,cCm3"))]
>"register_operand (operands[0], DImode)
> -   || register_operand (operands[1], DImode)"
> +   || register_operand (operands[1], DImode)
> +   || (satisfies_constraint_Cm3 (operands[1])
> +  && memory_operand (operands[0], DImode))"
>"*
>  {
>switch (which_alternative)
> @@ -1250,19 +1254,16 @@ archs4x, archs4xd, archs4xd_slow"
>
>  case 2:
>  if (TARGET_LL64
> - && ((even_register_operand (operands[0], DImode)
> -  && memory_operand (operands[1], DImode))
> - || (memory_operand (operands[0], DImode)
> - && even_register_operand (operands[1], DImode
> +&& memory_operand (operands[1], DImode)
> + && even_register_operand (operands[0], DImode))
>return \"ldd%U1%V1 %0,%1%&\";
>  return \"#\";
>
>  case 3:
>  if (TARGET_LL64
> - && ((even_register_operand (operands[0], DImode)
> -  && memory_operand (operands[1], DImode))
> - || (memory_operand (operands[0], DImode)
> - && even_register_operand (operands[1], DImode
> + && memory_operand (operands[0], DImode)
> + && (even_register_operand (operands[1], DImode)
> + || satisfies_constraint_Cm3 (operands[1])))
>   return \"std%U0%V0 %1,%0\";
>  return \"#\";
>  }
> diff --git a/gcc/testsuite/gcc.target/arc/add_n-combine.c 
> b/gcc/testsuite/gcc.target/arc/add_n-combine.c
> index db6454f..cd32ed3 100644
> --- a/gcc/testsuite/gcc.target/arc/add_n-combine.c
> +++ b/gcc/testsuite/gcc.target/arc/add_n-combine.c
> @@ -45,4 +45,4 @@ void f() {
>a(at3.bn[bu]);
>  }
>
> -/* { dg-final { scan-rtl-dump-times "\\*add_n" 3 "combine" } } */
> +/* { dg-final { scan-rtl-dump-times "\\*add_n" 2 "combine" } } */
> diff --git a/gcc/testsuite/gcc.target/arc/store-merge-1.c 
> 

[PATCH] Clarify documentation for -fpie and -fPIE

2018-04-30 Thread Jonathan Wakely

I noticed a couple of missing definite articles in the description of
-fpie and -fPIE, and found the last sentence unclear. This clarifies
that it means you should use these options during compilation if you
plan to use -pie during linking (rather than implying you use these
during linking as well).

I also added a hyphen to "position independent" but maybe that part
should wait for a decision on
https://gcc.gnu.org/ml/gcc/2018-04/msg00196.html


* doc/invoke.texi (-fpie, -fPIE): Fix grammar and clarify
interaction with -pie.

OK for trunk?

commit 169e8c617d502eae8ef37ea60cd199286d318863
Author: Jonathan Wakely 
Date:   Mon Apr 30 13:26:10 2018 +0100

Clarify documentation for -fpie and -fPIE

* doc/invoke.texi (-fpie, -fPIE): Fix grammar and clarify
interaction with -pie.

diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 606e9152cfc..efe66e37901 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -13094,10 +13094,10 @@ are defined to 2.
 @itemx -fPIE
 @opindex fpie
 @opindex fPIE
-These options are similar to @option{-fpic} and @option{-fPIC}, but
-generated position independent code can be only linked into executables.
-Usually these options are used when @option{-pie} GCC option is
-used during linking.
+These options are similar to @option{-fpic} and @option{-fPIC}, but the
+generated position-independent code can be only linked into executables.
+Usually these options are used to compile code that will be linked using
+the @option{-pie} GCC option.
 
 @option{-fpie} and @option{-fPIE} both define the macros
 @code{__pie__} and @code{__PIE__}.  The macros have the value 1


[PATCH] Kill verify_expr

2018-04-30 Thread Richard Biener

This finally removes verify_expr and moves all its bells and whistles
(where they exist) elsewhere.  There's one pice that didn't fit somewhere
and I put it where it fit most closely (but it still doesn't work because
of weird debug stmts so it's #if 0ed out).

This ends the series of verifier TLC for me at this moment.

Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.

Richard.

2018-04-30  Richard Biener  

* tree-cfg.c (verify_address): Remove base argument, add
flag whether to check TREE_ADDRESSABLE and do that.
(verify_expr): Remove.
(verify_types_in_gimple_reference): Add pieces from verify_expr.
(verify_gimple_assign_single): Likewise.
(verify_gimple_switch): Likewise.
(verify_expr_location_1): Dereference tp once.  Add (disabled)
piece from verify_expr.
(verify_gimple_in_cfg): Do not call verify_expr on all ops.

Index: gcc/tree-cfg.c
===
*** gcc/tree-cfg.c  (revision 259754)
--- gcc/tree-cfg.c  (working copy)
*** gimple_split_edge (edge edge_in)
*** 2982,2991 
  }
  
  
! /* Verify properties of the address expression T with base object BASE.  */
  
! static tree
! verify_address (tree t, tree base)
  {
bool old_constant;
bool old_side_effects;
--- 2982,2992 
  }
  
  
! /* Verify properties of the address expression T whose base should be
!TREE_ADDRESSABLE if VERIFY_ADDRESSABLE is true.  */
  
! static bool 
! verify_address (tree t, bool verify_addressable)
  {
bool old_constant;
bool old_side_effects;
*** verify_address (tree t, tree base)
*** 3002,3320 
if (old_constant != new_constant)
  {
error ("constant not recomputed when ADDR_EXPR changed");
!   return t;
  }
if (old_side_effects != new_side_effects)
  {
error ("side effects not recomputed when ADDR_EXPR changed");
!   return t;
  }
  
if (!(VAR_P (base)
|| TREE_CODE (base) == PARM_DECL
|| TREE_CODE (base) == RESULT_DECL))
! return NULL_TREE;
  
if (DECL_GIMPLE_REG_P (base))
  {
error ("DECL_GIMPLE_REG_P set on a variable with address taken");
!   return base;
  }
  
!   return NULL_TREE;
! }
! 
! /* Callback for walk_tree, check that all elements with address taken are
!properly noticed as such.  The DATA is an int* that is 1 if TP was seen
!inside a PHI node.  */
! 
! static tree
! verify_expr (tree *tp, int *walk_subtrees, void *data ATTRIBUTE_UNUSED)
! {
!   tree t = *tp, x;
! 
!   if (TYPE_P (t))
! *walk_subtrees = 0;
! 
!   /* Check operand N for being valid GIMPLE and give error MSG if not.  */
! #define CHECK_OP(N, MSG) \
!   do { if (!is_gimple_val (TREE_OPERAND (t, N)))  \
!{ error (MSG); return TREE_OPERAND (t, N); }} while (0)
! 
!   switch (TREE_CODE (t))
  {
! case SSA_NAME:
!   if (SSA_NAME_IN_FREE_LIST (t))
!   {
! error ("SSA name in freelist but still referenced");
! return *tp;
!   }
!   break;
! 
! case PARM_DECL:
! case VAR_DECL:
! case RESULT_DECL:
!   {
!   tree context = decl_function_context (t);
!   if (context != cfun->decl
!   && !SCOPE_FILE_SCOPE_P (context)
!   && !TREE_STATIC (t)
!   && !DECL_EXTERNAL (t))
! {
!   error ("Local declaration from a different function");
!   return t;
! }
!   }
!   break;
! 
! case INDIRECT_REF:
!   error ("INDIRECT_REF in gimple IL");
!   return t;
! 
! case MEM_REF:
!   x = TREE_OPERAND (t, 0);
!   if (!POINTER_TYPE_P (TREE_TYPE (x))
! || !is_gimple_mem_ref_addr (x))
!   {
! error ("invalid first operand of MEM_REF");
! return x;
!   }
!   if (!poly_int_tree_p (TREE_OPERAND (t, 1))
! || !POINTER_TYPE_P (TREE_TYPE (TREE_OPERAND (t, 1
!   {
! error ("invalid offset operand of MEM_REF");
! return TREE_OPERAND (t, 1);
!   }
!   if (TREE_CODE (x) == ADDR_EXPR)
!   {
! tree va = verify_address (x, TREE_OPERAND (x, 0));
! if (va)
!   return va;
! x = TREE_OPERAND (x, 0);
!   }
!   walk_tree (, verify_expr, data, NULL);
!   *walk_subtrees = 0;
!   break;
! 
! case ASSERT_EXPR:
!   x = fold (ASSERT_EXPR_COND (t));
!   if (x == boolean_false_node)
!   {
! error ("ASSERT_EXPR with an always-false condition");
! return *tp;
!   }
!   break;
! 
! case MODIFY_EXPR:
!   error ("MODIFY_EXPR not expected while having tuples");
!   return *tp;
! 
! case ADDR_EXPR:
!   {
!   tree tem;
! 
!   gcc_assert (is_gimple_address (t));
! 
!   /* Skip any references (they will be checked when we recurse down the
!  tree) and ensure that any variable used as a prefix is marked
!  

Re: RFA (clobbers, gimplification): PATCH for c++/61982, dead stores to destroyed objects

2018-04-30 Thread Richard Biener
On Fri, Apr 27, 2018 at 11:43 PM, Jason Merrill  wrote:
> 61982 notes that an explicit destructor call or delete expression ends
> the lifetime of an object, but we weren't clobbering affected objects
> if their destructors are trivial.  This patch fixes that.
>
> The first commit just adds a helper function, build_clobber.
>
> The second commit changes explicit destruction to clobber the affected
> object.  As a result, I needed to change the gimplifier to handle the
> more general forms of lvalue we might be clobbering, by introducing a
> temporary.  I'm not sure why clobbers are so picky about the form of
> lvalue they can use, but this makes it work.
>
> Tested x86_64-pc-linux-gnu.  OK for trunk (9)?

Ok for the middle-end parts.

Richard.


Re: gcc 8 trunk broken O3 on x86_64

2018-04-30 Thread Richard Biener
On Fri, Apr 27, 2018 at 9:21 AM, graham stott via gcc-patches
 wrote:
> All
> Just a heads the trunk has been broken since about Weds most files fail 
> compare during bootstrap at O3 but pass at O2
> My last succesful boostrap at O3 was Tuesday
> I no idea which commit caused it

Should be fixed now.

Richard.

> Graham


[PATCH] DWARF: Emit DWARF5 forms for indirect addresses and string offsets.

2018-04-30 Thread Mark Wielaard
We already emit DWARF5 attributes and tables for indirect addresses
and string offsets, but still use GNU forms. Add a new helper function
dwarf_FORM () for emitting the right form.

Currently we only use the uleb128 forms. But DWARF5 also allows
1, 2, 3 and 4 byte forms (DW_FORM_strx[1234] and DW_FORM_addrx[1234])
which might be more space efficient.

gcc/ChangeLog:

* dwarf2out.c (dwarf_FORM): New function.
(set_indirect_string): Use dwarf_FORM.
(reset_indirect_string): Likewise.
(size_of_die): Likewise.
(value_format): Likewise.
(output_die): Likewise.
(add_skeleton_AT_string): Likewise.
(output_macinfo_op): Likewise.
(index_string): Likewise.
(output_index_string_offset): Likewise.
(output_index_string): Likewise.
(count_index_strings): Likewise.
 
diff --git a/gcc/dwarf2out.c b/gcc/dwarf2out.c
index 340de5b..85a1a8b 100644
--- a/gcc/dwarf2out.c
+++ b/gcc/dwarf2out.c
@@ -246,7 +246,7 @@ static GTY (()) hash_table 
*debug_line_str_hash;
That is, the comp_dir and dwo_name will appear in both places.
 
2) Strings can use four forms: DW_FORM_string, DW_FORM_strp,
-   DW_FORM_line_strp or DW_FORM_GNU_str_index.
+   DW_FORM_line_strp or DW_FORM_strx/GNU_str_index.
 
3) GCC chooses the form to use late, depending on the size and
reference count.
@@ -1757,6 +1757,28 @@ dwarf_TAG (enum dwarf_tag tag)
   return tag;
 }
 
+/* And similarly for forms.  */
+static inline enum dwarf_form
+dwarf_FORM (enum dwarf_form form)
+{
+  switch (form)
+{
+case DW_FORM_addrx:
+  if (dwarf_version < 5)
+   return DW_FORM_GNU_addr_index;
+  break;
+
+case DW_FORM_strx:
+  if (dwarf_version < 5)
+   return DW_FORM_GNU_str_index;
+  break;
+
+default:
+  break;
+}
+  return form;
+}
+
 static unsigned long int get_base_type_offset (dw_die_ref);
 
 /* Return the size of a location descriptor.  */
@@ -4387,8 +4409,8 @@ AT_class (dw_attr_node *a)
 }
 
 /* Return the index for any attribute that will be referenced with a
-   DW_FORM_GNU_addr_index or DW_FORM_GNU_str_index.  String indices
-   are stored in dw_attr_val.v.val_str for reference counting
+   DW_FORM_addrx/GNU_addr_index or DW_FORM_strx/GNU_str_index.  String
+   indices are stored in dw_attr_val.v.val_str for reference counting
pruning.  */
 
 static inline unsigned int
@@ -4652,7 +4674,7 @@ set_indirect_string (struct indirect_string_node *node)
   /* Already indirect is a no op.  */
   if (node->form == DW_FORM_strp
   || node->form == DW_FORM_line_strp
-  || node->form == DW_FORM_GNU_str_index)
+  || node->form == dwarf_FORM (DW_FORM_strx))
 {
   gcc_assert (node->label);
   return;
@@ -4668,7 +4690,7 @@ set_indirect_string (struct indirect_string_node *node)
 }
   else
 {
-  node->form = DW_FORM_GNU_str_index;
+  node->form = dwarf_FORM (DW_FORM_strx);
   node->index = NO_INDEX_ASSIGNED;
 }
 }
@@ -4681,7 +4703,7 @@ int
 reset_indirect_string (indirect_string_node **h, void *)
 {
   struct indirect_string_node *node = *h;
-  if (node->form == DW_FORM_strp || node->form == DW_FORM_GNU_str_index)
+  if (node->form == DW_FORM_strp || node->form == dwarf_FORM (DW_FORM_strx))
 {
   free (node->label);
   node->label = NULL;
@@ -9419,7 +9441,7 @@ size_of_die (dw_die_ref die)
   form = AT_string_form (a);
  if (form == DW_FORM_strp || form == DW_FORM_line_strp)
size += DWARF_OFFSET_SIZE;
- else if (form == DW_FORM_GNU_str_index)
+ else if (form == dwarf_FORM (DW_FORM_strx))
size += size_of_uleb128 (AT_index (a));
  else
size += strlen (a->dw_attr_val.v.val_str->str) + 1;
@@ -9666,7 +9688,7 @@ value_format (dw_attr_node *a)
case DW_AT_entry_pc:
case DW_AT_trampoline:
   return (AT_index (a) == NOT_INDEXED
-  ? DW_FORM_addr : DW_FORM_GNU_addr_index);
+  ? DW_FORM_addr : dwarf_FORM (DW_FORM_addrx));
default:
  break;
}
@@ -9839,7 +9861,7 @@ value_format (dw_attr_node *a)
   return DW_FORM_data;
 case dw_val_class_lbl_id:
   return (AT_index (a) == NOT_INDEXED
-  ? DW_FORM_addr : DW_FORM_GNU_addr_index);
+  ? DW_FORM_addr : dwarf_FORM (DW_FORM_addrx));
 case dw_val_class_lineptr:
 case dw_val_class_macptr:
 case dw_val_class_loclistsptr:
@@ -10807,7 +10829,7 @@ output_die (dw_die_ref die)
   a->dw_attr_val.v.val_str->label,
   debug_line_str_section,
   "%s: \"%s\"", name, AT_string (a));
-  else if (a->dw_attr_val.v.val_str->form == DW_FORM_GNU_str_index)
+  else if (a->dw_attr_val.v.val_str->form == dwarf_FORM (DW_FORM_strx))
 dw2_asm_output_data_uleb128 (AT_index (a),
  "%s: \"%s\"", name, 

[PATCH] DWARF: Add header for .debug_str_offsets table for dwarf_version 5.

2018-04-30 Thread Mark Wielaard
DWARF5 defines a small header for .debug_str_offsets.  Since we only use
it for split dwarf .dwo files we don't need to keep track of the actual
index offset in an attribute.

gcc/ChangeLog:

* dwarf2out.c (count_index_strings): New function.
(output_indirect_strings): Call count_index_strings and generate
header for dwarf_version >= 5.

diff --git a/gcc/dwarf2out.c b/gcc/dwarf2out.c
index d2d4ec0..340de5b 100644
--- a/gcc/dwarf2out.c
+++ b/gcc/dwarf2out.c
@@ -28732,6 +28732,19 @@ output_index_string (indirect_string_node **h, 
unsigned int *cur_idx)
   return 1;
 }
 
+/* A helper function for output_indirect_strings.  Counts the number
+   of index strings offsets.  Must match the logic of the functions
+   output_index_string[_offsets] above.  */
+int
+count_index_strings (indirect_string_node **h, unsigned int *last_idx)
+{
+  struct indirect_string_node *node = *h;
+
+  if (node->form == DW_FORM_GNU_str_index && node->refcount > 0)
+*last_idx += 1;
+  return 1;
+}
+
 /* A helper function for dwarf2out_finish called through
htab_traverse.  Emit one queued .debug_str string.  */
 
@@ -28769,6 +28782,33 @@ output_indirect_strings (void)
  output_indirect_string> 
(DW_FORM_strp);
 
   switch_to_section (debug_str_offsets_section);
+  /* For DWARF5 the .debug_str_offsets[.dwo] section needs a unit
+header.  Note that we don't need to generate a label to the
+actual index table following the header here, because this is
+for the split dwarf case only.  In an .dwo file there is only
+one string offsets table (and one debug info section).  But
+if we would start using string offset tables for the main (or
+skeleton) unit, then we have to add a DW_AT_str_offsets_base
+pointing to the actual index after the header.  Split dwarf
+units will never have a string offsets base attribute.  When
+a split unit is moved into a .dwp file the string offsets can
+be found through the .debug_cu_index section table.  */
+  if (dwarf_version >= 5)
+   {
+ unsigned int last_idx = 0;
+ unsigned long str_offsets_length;
+
+ debug_str_hash->traverse_noresize
+(_idx);
+ str_offsets_length = last_idx * DWARF_OFFSET_SIZE + 4;
+ if (DWARF_INITIAL_LENGTH_SIZE - DWARF_OFFSET_SIZE == 4)
+   dw2_asm_output_data (4, 0x,
+"Escape value for 64-bit DWARF extension");
+ dw2_asm_output_data (DWARF_OFFSET_SIZE, str_offsets_length,
+  "Length of string offsets unit");
+ dw2_asm_output_data (2, 5, "DWARF string offsets version");
+ dw2_asm_output_data (2, 0, "Header zero padding");
+   }
   debug_str_hash->traverse_noresize
 ();
   switch_to_section (debug_str_dwo_section);



[PATCH] doc/invoke.texi (-fdebug-types-section): Fix grammar.

2018-04-30 Thread Jonathan Wakely

Fixes a singular vs plural mixup.

Committed to trunk.

commit e72e2d5538d11734cd4c9ac2b4edcf179062cf6e
Author: Jonathan Wakely 
Date:   Mon Apr 30 13:10:35 2018 +0100

* doc/invoke.texi (-fdebug-types-section): Fix grammar.

diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index e45f467155a..94442849228 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -7253,7 +7253,7 @@ with a linker that can produce GDB@ index version 7.
 When using DWARF Version 4 or higher, type DIEs can be put into
 their own @code{.debug_types} section instead of making them part of the
 @code{.debug_info} section.  It is more efficient to put them in a separate
-comdat sections since the linker can then remove duplicates.
+comdat section since the linker can then remove duplicates.
 But not all DWARF consumers support @code{.debug_types} sections yet
 and on some objects @code{.debug_types} produces larger instead of smaller
 debugging information.


Re: [PATCH] Warn for ignored ASM labels on typdef declarations PR 85444 (v.2)

2018-04-30 Thread Joseph Myers
On Sat, 28 Apr 2018, Will Hawkins wrote:

> +{
> +  warning (OPT_Wignored_qualifiers, "asm-specifier is ignored in "
> +   "typedef declaration");

This does not match the documented semantics of -Wignored-qualifiers.  I 
don't think it's appropriate to expand those semantics to include this 
warning either.

-- 
Joseph S. Myers
jos...@codesourcery.com


Re: [patch] allow '-' for stdout dump

2018-04-30 Thread Nathan Sidwell

On 04/28/2018 11:33 AM, Sandra Loosemore wrote:

On 04/27/2018 07:12 AM, Nathan Sidwell wrote:


Hmmm, I'd like to wordsmith this a bit and clean up the markup, etc. How 
about this?


looks good, this is what I'm committing, thanks!

nathan

--
Nathan Sidwell
2018-04-30  Nathan Sidwell  
	Sandra Loosemore 

	* dumpfile.c (dump_open): Allow '-' for stdout.
	* doc/invoke.texi (Developer Options): Document dump filename
	determination early.  Document stdin/stdout selection.

Index: doc/invoke.texi
===
--- doc/invoke.texi	(revision 259758)
+++ doc/invoke.texi	(working copy)
@@ -13358,6 +13358,26 @@ configuration, such as where it searches
 rarely need to use any of these options for ordinary compilation and
 linking tasks.
 
+Many developer options that cause GCC to dump output to a file take an
+optional @samp{=@var{filename}} suffix. You can specify @samp{stdout}
+or @samp{-} to dump to standard output, and @samp{stderr} for standard
+error.
+
+If @samp{=@var{filename}} is omitted, a default dump file name is
+constructed by concatenating the base dump file name, a pass number,
+phase letter, and pass name.  The base dump file name is the name of
+output file produced by the compiler if explicitly specified and not
+an executable; otherwise it is the source file name.
+The pass number is determined by the order passes are registered with
+the compiler's pass manager. 
+This is generally the same as the order of execution, but passes
+registered by plugins, target-specific passes, or passes that are
+otherwise registered late are numbered higher than the pass named
+@samp{final}, even if they are executed earlier.  The phase letter is
+one of @samp{i} (inter-procedural analysis), @samp{l}
+(language-specific), @samp{r} (RTL), or @samp{t} (tree). 
+The files are created in the directory of the output file. 
+
 @table @gcctabopt
 
 @item -d@var{letters}
@@ -13367,20 +13387,7 @@ linking tasks.
 @opindex fdump-rtl-@var{pass}
 Says to make debugging dumps during compilation at times specified by
 @var{letters}.  This is used for debugging the RTL-based passes of the
-compiler.  The file names for most of the dumps are made by appending
-a pass number and a word to the @var{dumpname}, and the files are
-created in the directory of the output file.  In case of
-@option{=@var{filename}} option, the dump is output on the given file
-instead of the pass numbered dump files.  Note that the pass number is
-assigned as passes are registered into the pass manager.  Most passes
-are registered in the order that they will execute and for these passes
-the number corresponds to the pass execution order.  However, passes
-registered by plugins, passes specific to compilation targets, or
-passes that are otherwise registered after all the other passes are
-numbered higher than a pass named "final", even if they are executed
-earlier.  @var{dumpname} is generated from the name of the output
-file if explicitly specified and not an executable, otherwise it is
-the basename of the source file.  
+compiler.
 
 Some @option{-d@var{letters}} switches have different meaning when
 @option{-E} is used for preprocessing.  @xref{Preprocessor Options},
@@ -13768,11 +13775,7 @@ counters for each function compiled.
 @opindex fdump-tree-all
 @opindex fdump-tree
 Control the dumping at various stages of processing the intermediate
-language tree to a file.  The file name is generated by appending a
-switch-specific suffix to the source file name, and the file is
-created in the same directory as the output file. In case of
-@option{=@var{filename}} option, the dump is output on the given file
-instead of the auto named dump files.  If the @samp{-@var{options}}
+language tree to a file.  If the @samp{-@var{options}}
 form is used, @var{options} is a list of @samp{-} separated options
 which control the details of the dump.  Not all options are applicable
 to all dumps; those that are not meaningful are ignored.  The
@@ -13839,26 +13842,9 @@ passes).
 @item note
 Enable other detailed optimization information (only available in
 certain passes).
-@item =@var{filename}
-Instead of an auto named dump file, output into the given file
-name. The file names @file{stdout} and @file{stderr} are treated
-specially and are considered already open standard streams. For
-example,
-
-@smallexample
-gcc -O2 -ftree-vectorize -fdump-tree-vect-blocks=foo.dump
- -fdump-tree-pre=/dev/stderr file.c
-@end smallexample
-
-outputs vectorizer dump into @file{foo.dump}, while the PRE dump is
-output on to @file{stderr}. If two conflicting dump filenames are
-given for the same pass, then the latter option overrides the earlier
-one.
-
 @item all
 Turn on all options, except @option{raw}, @option{slim}, @option{verbose}
 and @option{lineno}.
-
 @item optall
 Turn on all optimization options, i.e., @option{optimized},
 @option{missed}, and @option{note}.

Re: [PATCH] Do not set nothrow flag on recursive function with stack checking

2018-04-30 Thread Jan Hubicka
> 
> Honza, do you have any suggestions here for the issue of detecting possible
> recursion as opposted to just direct recursion?

Well, you can look for strongly connected components but you must assume that
every call out of the current unit that is not leaf may call any function that
is externally visible or has address taken. So basically every function that may
lead to external call is possibly recursive because you can assume that every
function is reachable (with exception of main).

This will rule out many nothrow candidates but I do not see how that can be
strenghtened.

Honza
> 
> Richard.
> 
> > --
> > Eric Botcazou


Re: [PATCH] Do not set nothrow flag on recursive function with stack checking

2018-04-30 Thread Richard Biener
On Sat, Apr 28, 2018 at 11:20 AM, Eric Botcazou  wrote:
>> This looks not a generic enough fix to me - consider
>>
>> void foo(void) { int a[10]; a[0] = 1; a[9] = 1; }
>> int main() { try { foo (); } catch (...) {} }
>>
>> with -fnon-call-exceptions.  If we'd like to catch the SEGV from stack
>> overflows then your fix doesn't handle the non-recursive case nor the case
>> where -fstack-check is not supplied.
>
> But -fstack-check is required to detect stack overflows...  Moreover the above
> testcase will be entirely optimized at -O so there is no stack overflow at -O.
>
>> So to me your attempt in fixing this isn't complete but a complete fix would
>> be quite pessimizing :/ (for -fnon-call-exceptions)
>
> Then let's at least fix the recursive case since it's simple and cheap.  But I
> can indeed key this on -fnon-call-exceptions explicitly.
>
>> At least all this should be documented somewhere, that is, what to expect
>> when trying to catch stack faults in general with -fnon-call-exceptions
>> [-fstack-check].
>
> It's already documented that you need -fstack-check to detect stack overflows.
> But I can add a blurb to the -fnon-call-exceptions entry about it.
>
> Revised patch attached.

Is the propagate_nothrow hunk really necessary since you set ->can_throw = true
in local analysis?  Again this not only handles only recursion but also only
direct recursion.  I wonder if there's a way to prove (or at least estimate with
good confidence) that a function does _not_ need local stack space?  Thus
for -fstack-check -fnon-call-exceptions consider all functions not
marked explicitely
as can_throw unless we "prove" the opposite?  Or consider all non-leaf functions
as possibly throwing that way?

I'm not against your patch but I think that this kind of limitations
need to be documented.

Index: doc/invoke.texi
===
--- doc/invoke.texi (revision 259642)
+++ doc/invoke.texi (working copy)
@@ -12812,6 +12812,9 @@ not exist everywhere.  Moreover, it only
 instructions to throw exceptions, i.e.@: memory references or floating-point
 instructions.  It does not allow exceptions to be thrown from
 arbitrary signal handlers such as @code{SIGALRM}.
+This option must be specified if you enable @option{-fstack-check} and
+want stack overflows to throw exceptions.  Note that this again requires
+platform-specific runtime support.


I'd phrase it as "If you want to handle stack overflows as exceptions
you need to
enable @option{-fstack-check} in addition to this option.  Support for this is
experimental, some stack overflows might not be catchable."

Honza, do you have any suggestions here for the issue of detecting possible
recursion as opposted to just direct recursion?

Richard.

> --
> Eric Botcazou


Re: Improve partitioning decisions

2018-04-30 Thread Jan Hubicka
> 
> FAIL: gcc.dg/lto/20081204-2 c_lto_20081204-2_0.o-c_lto_20081204-2_0.o
> link, -w -flto -fPIC -r -nostdlib (internal compiler error)
> FAIL: gcc.dg/lto/20090914-2 c_lto_20090914-2_0.o-c_lto_20090914-2_0.o
> link, -O2 -flto -fuse-linker-plugin (internal compiler error)
> FAIL: gcc.dg/lto/20091014-1 c_lto_20091014-1_0.o-c_lto_20091014-1_0.o
> link, -fPIC -r -nostdlib -flto (internal compiler error)
> FAIL: gcc.dg/lto/20100603-1 c_lto_20100603-1_0.o-c_lto_20100603-1_0.o
> link, -O0 -flto -fuse-linker-plugin -fno-fat-lto-objects  (internal
> compiler error)
> FAIL: gcc.dg/lto/20100603-1 c_lto_20100603-1_0.o-c_lto_20100603-1_0.o
> link, -O2 -flto -fuse-linker-plugin (internal compiler error)
> FAIL: g++.dg/lto/20081120-1
> cp_lto_20081120-1_0.o-cp_lto_20081120-1_1.o link, -flto -r -nostdlib
> (internal compiler error)
> FAIL: g++.dg/lto/20081120-2
> cp_lto_20081120-2_0.o-cp_lto_20081120-2_1.o link, -flto -r -nostdlib
> (internal compiler error)

Sorry, that was last minute fix into the sanity check.  Curiously enough
the check incorrectly triggers for empty file. It is interesting we test
it so many times.

I am testing.

Index: lto-partition.c
===
--- lto-partition.c (revision 259755)
+++ lto-partition.c (working copy)
@@ -809,7 +809,7 @@
 next_nodes.safe_push (noreorder[noreorder_pos++]);
   /* For one partition the cost of boundary should be 0 unless we added final
  symbols here (these are not accounted) or we have accounting bug.  */
-  gcc_assert (next_nodes.length () || npartitions != 1 || !best_cost);
+  gcc_assert (next_nodes.length () || npartitions != 1 || !best_cost || 
best_cost == -1);
   add_sorted_nodes (next_nodes, partition);
 
   free (order);


[PATCH] More checking TLC

2018-04-30 Thread Richard Biener

Bootstrapped on x86_64-unknown-linux-gnu, applied.

Richard.

2018-04-30  Richard Biener  

* tree-chrec.h (evolution_function_is_constant_p): Remove
redundant check.
* tree-cfg.c (tree_node_can_be_shared): Re-order checks.

Index: gcc/tree-chrec.h
===
--- gcc/tree-chrec.h(revision 259755)
+++ gcc/tree-chrec.h(working copy)
@@ -170,8 +170,6 @@ evolution_function_is_constant_p (const_
   if (chrec == NULL_TREE)
 return false;
 
-  if (CONSTANT_CLASS_P (chrec))
-return true;
   return is_gimple_min_invariant (chrec);
 }
 
Index: gcc/tree-cfg.c
===
--- gcc/tree-cfg.c  (revision 259755)
+++ gcc/tree-cfg.c  (working copy)
@@ -5212,16 +5028,13 @@ static bool
 tree_node_can_be_shared (tree t)
 {
   if (IS_TYPE_OR_DECL_P (t)
-  || is_gimple_min_invariant (t)
   || TREE_CODE (t) == SSA_NAME
-  || t == error_mark_node
-  || TREE_CODE (t) == IDENTIFIER_NODE)
-return true;
-
-  if (TREE_CODE (t) == CASE_LABEL_EXPR)
+  || TREE_CODE (t) == IDENTIFIER_NODE
+  || TREE_CODE (t) == CASE_LABEL_EXPR
+  || is_gimple_min_invariant (t))
 return true;
 
-  if (DECL_P (t))
+  if (t == error_mark_node)
 return true;
 
   return false;



[Patch, Fortran, pr85507, v1] Modell variable dependencies in coarray expression correctly.

2018-04-30 Thread Andre Vehreschild
Hi all,

attached patch partially reverts r259385 preventing an ICE. At the same time
the check_dependency routine is parameterized correctly so that variable
dependencies in coarray expressions are determined more precisely.

Bootstrapped and regtested ok on x86_64-linux-gnu/f27. Ok for trunk,
gcc-8/-7/-6?

Regards,
Andre
-- 
Andre Vehreschild * Email: vehre ad gmx dot de 
gcc/fortran/ChangeLog:

2018-04-30  Andre Vehreschild  

PR fortran/85507
* dependency.c (gfc_dep_resolver): Revert looking at coarray dimension
introduced by r259385.
* trans-intrinsic.c (conv_caf_send): Always report a dependency for
same variables in coarray assignments.

gcc/testsuite/ChangeLog:

2018-04-30  Andre Vehreschild  

PR fortran/85507
* gfortran.dg/coarray_dependency_1.f90: New test.
* gfortran.dg/coarray_lib_comm_1.f90: Fix counting caf-expressions.

diff --git a/gcc/fortran/dependency.c b/gcc/fortran/dependency.c
index 3e14ddc25d8..a0bbd584947 100644
--- a/gcc/fortran/dependency.c
+++ b/gcc/fortran/dependency.c
@@ -2238,9 +2238,8 @@ gfc_dep_resolver (gfc_ref *lref, gfc_ref *rref, gfc_reverse *reverse)
 	break;
 
 	  /* Exactly matching and forward overlapping ranges don't cause a
-	 dependency, when they are not part of a coarray ref.  */
-	  if (fin_dep < GFC_DEP_BACKWARD
-	  && lref->u.ar.codimen == 0 && rref->u.ar.codimen == 0)
+	 dependency.  */
+	  if (fin_dep < GFC_DEP_BACKWARD)
 	return 0;
 
 	  /* Keep checking.  We only have a dependency if
diff --git a/gcc/fortran/trans-intrinsic.c b/gcc/fortran/trans-intrinsic.c
index 00edd447bb2..87b3ca72c05 100644
--- a/gcc/fortran/trans-intrinsic.c
+++ b/gcc/fortran/trans-intrinsic.c
@@ -1860,7 +1860,7 @@ conv_caf_send (gfc_code *code) {
 
   lhs_expr = code->ext.actual->expr;
   rhs_expr = code->ext.actual->next->expr;
-  may_require_tmp = gfc_check_dependency (lhs_expr, rhs_expr, false) == 0
+  may_require_tmp = gfc_check_dependency (lhs_expr, rhs_expr, true) == 0
 		? boolean_false_node : boolean_true_node;
   gfc_init_block ();
 
diff --git a/gcc/testsuite/gfortran.dg/coarray_dependency_1.f90 b/gcc/testsuite/gfortran.dg/coarray_dependency_1.f90
new file mode 100644
index 000..dc4cbacba1e
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/coarray_dependency_1.f90
@@ -0,0 +1,11 @@
+! { dg-do compile }
+! { dg-options "-fcoarray=lib -lcaf_single" }
+!
+! Check that reffing x on both sides of a coarray send does not ICE. 
+! PR 85507
+
+program check_dependency
+  integer :: x[*]
+  x[42] = x
+end program check_dependency
+
diff --git a/gcc/testsuite/gfortran.dg/coarray_lib_comm_1.f90 b/gcc/testsuite/gfortran.dg/coarray_lib_comm_1.f90
index a7567af7b8f..171a27bd4c3 100644
--- a/gcc/testsuite/gfortran.dg/coarray_lib_comm_1.f90
+++ b/gcc/testsuite/gfortran.dg/coarray_lib_comm_1.f90
@@ -38,9 +38,8 @@ B(1:5) = B(3:7)
 if (any (A-B /= 0)) STOP 4
 end
 
-! { dg-final { scan-tree-dump-times "_gfortran_caf_get \\\(caf_token.0, \\\(integer\\\(kind=\[48\]\\\)\\\) parm.\[0-9\]+.data - \\\(integer\\\(kind=\[48\]\\\)\\\) a, 1, \[0-9\]+, 0B, \[0-9\]+, 4, 4, 0, 0B\\\);" 1 "original" } }
-! { dg-final { scan-tree-dump-times "_gfortran_caf_get \\\(caf_token.0, \\\(integer\\\(kind=\[48\]\\\)\\\) parm.\[0-9\]+.data - \\\(integer\\\(kind=\[48\]\\\)\\\) a, 1, \[0-9\]+, 0B, \[0-9\]+, 4, 4, 1, 0B\\\);" 1 "original" } }
+! { dg-final { scan-tree-dump-times "_gfortran_caf_get \\\(caf_token.0, \\\(integer\\\(kind=\[48\]\\\)\\\) parm.\[0-9\]+.data - \\\(integer\\\(kind=\[48\]\\\)\\\) a, 1, \[0-9\]+, 0B, \[0-9\]+, 4, 4, 1, 0B\\\);" 2 "original" } }
 ! { dg-final { scan-tree-dump-times "_gfortran_caf_get \\\(caf_token.0, \\\(integer\\\(kind=\[48\]\\\)\\\) parm.\[0-9\]+.data - \\\(integer\\\(kind=\[48\]\\\)\\\) a, 1, \[0-9\]+, 0B, , 4, 4, 1, 0B\\\);" 1 "original" } }
 ! { dg-final { scan-tree-dump-times "_gfortran_caf_get \\\(caf_token.1, \\\(integer\\\(kind=\[48\]\\\)\\\) parm.\[0-9\]+.data - \\\(integer\\\(kind=\[48\]\\\)\\\) b, 1, \[0-9\]+, 0B, , 4, 4, 0, 0B\\\);" 1 "original" } }
-! { dg-final { scan-tree-dump-times "_gfortran_caf_sendget \\\(caf_token.0, \\\(integer\\\(kind=\[48\]\\\)\\\) parm.\[0-9\]+.data - \\\(integer\\\(kind=\[48\]\\\)\\\) a, 1, \[0-9\]+, 0B, caf_token.0, \\\(integer\\\(kind=\[48\]\\\)\\\) parm.\[0-9\]+.data - \\\(integer\\\(kind=\[48\]\\\)\\\) a, 1, \[0-9\]+, 0B, 4, 4, 0, 0B\\\);" 1 "original" } }
+! { dg-final { scan-tree-dump-times "_gfortran_caf_sendget \\\(caf_token.0, \\\(integer\\\(kind=\[48\]\\\)\\\) parm.\[0-9\]+.data - \\\(integer\\\(kind=\[48\]\\\)\\\) a, 1, \[0-9\]+, 0B, caf_token.0, \\\(integer\\\(kind=\[48\]\\\)\\\) parm.\[0-9\]+.data - \\\(integer\\\(kind=\[48\]\\\)\\\) a, 1, \[0-9\]+, 0B, 4, 4, 1, 0B\\\);" 1 "original" } }
 


Re: [PATCH] Fix PR c++/85400

2018-04-30 Thread Eric Botcazou
> It seems like we're likely to need the same thing when we get to
> make_decl_one_only by other paths; perhaps we should put this
> recalculation in a cxx_make_decl_one_only.

There are 4 calls to make_decl_one_only in the cp/ directory: the one at stake 
here in comdat_linkage, 1 in maybe_make_one_only, 1 in get_guard and 1 in 
get_tls_init_fn.  The last 2 don't need the recalculation, especially the 3rd 
one which makes a copy of the TLS model.

There are 3 calls to maybe_make_one_only in the cp/directory: 1 from 
start_preparsed_function, 1 from mark_decl_instantiated and 1 one from 
import_export_decl but guarded by DECL_FUNCTION_MEMBER_P.  The 1st and 3rd 
don't need the recalculation.

So it isn't clear to me if a cxx_make_decl_one_only is the way to go.  Maybe 
doing the recalculation in comdat_linkage and maybe_make_one_only only would 
be sufficient.

-- 
Eric Botcazou


Patch ping

2018-04-30 Thread Jakub Jelinek
Hi!

I'd like to ping following patches for stage1:

  - PR78420 __builtin_early_constant_p 
http://gcc.gnu.org/ml/gcc-patches/2018-03/msg00355.html

  - use --push-state --as-needed and --pop-state around -lgcc_s
http://gcc.gnu.org/ml/gcc-patches/2018-04/msg00567.html

  - PR85466 next{after,toward}{,f,l} constant folding
http://gcc.gnu.org/ml/gcc-patches/2018-04/msg01027.html

  - PR85480 improve AVX512 128-bit insertion into 512-bit zero vector
http://gcc.gnu.org/ml/gcc-patches/2018-04/msg01058.html

Thanks

Jakub


Re: [PATCH] Fix loop-header copying do-while loop detection (PR85116)

2018-04-30 Thread Richard Biener
On Sun, 29 Apr 2018, Richard Biener wrote:

> On April 29, 2018 1:06:47 AM GMT+02:00, David Edelsohn  
> wrote:
> >Hi, Richi
> >
> >I had been using two source trees to speed the bisection and didn't
> >realize
> >that one defaulted to DWARF debugging and the other defaulted to XCOFF
> >debugging, which confused the bisection result.  The -f[no-]checking
> >patch
> >is the culprit.
> 
> My theory is that all non-bootstrap-debug  bootstrap configs are currently 
> broken. 
> 
> I'll deal with this tomorrow. 

So it looks like it is a very simple mistake fixed by properly ignoring
-fchecking[=] in gen_producer_string ().  I'm including the use of
-fchecking=1 instead of -fchecking given that -fchecking=2 we may
default to is documented to eventually affect code-generation.

Bootstrap / bootstrap-O3 running on x86_64-unknown-linux-gnu.

Richard.

2018-04-30  Richard Biener  

* Makefile.tpl (STAGE3_CFLAGS): Use -fchecking=1.
(STAGE3_TFLAGS): Likewise.
(STAGEtrain_CFLAGS): Filter out -fchecking=1.
(STAGEtrain_TFLAGS): Likewise.
* Makefile.in: Regenerate.

* dwarf2out.c (gen_producer_string): Ignore -fchecking[=].

Index: Makefile.tpl
===
--- Makefile.tpl(revision 259754)
+++ Makefile.tpl(working copy)
@@ -459,14 +459,14 @@ STAGE1_CONFIGURE_FLAGS = --disable-inter
 STAGE1_TFLAGS += -fno-checking
 STAGE2_CFLAGS += -fno-checking
 STAGE2_TFLAGS += -fno-checking
-STAGE3_CFLAGS += -fchecking
-STAGE3_TFLAGS += -fchecking
+STAGE3_CFLAGS += -fchecking=1
+STAGE3_TFLAGS += -fchecking=1
 
 STAGEprofile_CFLAGS = $(STAGE2_CFLAGS) -fprofile-generate
 STAGEprofile_TFLAGS = $(STAGE2_TFLAGS)
 
-STAGEtrain_CFLAGS = $(filter-out -fchecking,$(STAGE3_CFLAGS))
-STAGEtrain_TFLAGS = $(filter-out -fchecking,$(STAGE3_TFLAGS))
+STAGEtrain_CFLAGS = $(filter-out -fchecking=1,$(STAGE3_CFLAGS))
+STAGEtrain_TFLAGS = $(filter-out -fchecking=1,$(STAGE3_TFLAGS))
 
 STAGEfeedback_CFLAGS = $(STAGE4_CFLAGS) -fprofile-use
 STAGEfeedback_TFLAGS = $(STAGE4_TFLAGS)
Index: Makefile.in
===
--- Makefile.in (revision 259754)
+++ Makefile.in (working copy)
@@ -536,14 +536,14 @@ STAGE1_CONFIGURE_FLAGS = --disable-inter
 STAGE1_TFLAGS += -fno-checking
 STAGE2_CFLAGS += -fno-checking
 STAGE2_TFLAGS += -fno-checking
-STAGE3_CFLAGS += -fchecking
-STAGE3_TFLAGS += -fchecking
+STAGE3_CFLAGS += -fchecking=1
+STAGE3_TFLAGS += -fchecking=1
 
 STAGEprofile_CFLAGS = $(STAGE2_CFLAGS) -fprofile-generate
 STAGEprofile_TFLAGS = $(STAGE2_TFLAGS)
 
-STAGEtrain_CFLAGS = $(filter-out -fchecking,$(STAGE3_CFLAGS))
-STAGEtrain_TFLAGS = $(filter-out -fchecking,$(STAGE3_TFLAGS))
+STAGEtrain_CFLAGS = $(filter-out -fchecking=1,$(STAGE3_CFLAGS))
+STAGEtrain_TFLAGS = $(filter-out -fchecking=1,$(STAGE3_TFLAGS))
 
 STAGEfeedback_CFLAGS = $(STAGE4_CFLAGS) -fprofile-use
 STAGEfeedback_TFLAGS = $(STAGE4_TFLAGS)
Index: gcc/dwarf2out.c
===
--- gcc/dwarf2out.c (revision 259754)
+++ gcc/dwarf2out.c (working copy)
@@ -24234,6 +24234,8 @@ gen_producer_string (void)
   case OPT_fmacro_prefix_map_:
   case OPT_ffile_prefix_map_:
   case OPT_fcompare_debug:
+  case OPT_fchecking:
+  case OPT_fchecking_:
/* Ignore these.  */
continue;
   default: