[PATCH] Improve code generation of v += (c == 0) etc. on x86 (PR target/92140)

2019-10-18 Thread Jakub Jelinek
Hi!

As mentioned in the PR, x == 0 can be equivalently tested as x < 1U
and the latter form has the advantage that it sets the carry flag and if it
is consumed by an instruction that can directly use the carry flag, it is a
win.
The following patch adds a couple of (pre-reload only) define_insn_and_split
to handle the most common cases.
Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2019-10-18  Jakub Jelinek  
Uroš Bizjak  

PR target/92140
* config/i386/predicates.md (int_nonimmediate_operand): New special
predicate.
* config/i386/i386.md (*add3_eq, *add3_ne,
*add3_eq_0, *add3_ne_0, *sub3_eq, *sub3_ne,
*sub3_eq_1, *sub3_eq_0, *sub3_ne_0): New
define_insn_and_split patterns.

* gcc.target/i386/pr92140.c: New test.
* gcc.c-torture/execute/pr92140.c: New test.

--- gcc/config/i386/predicates.md.jj2019-10-07 13:09:06.486261815 +0200
+++ gcc/config/i386/predicates.md   2019-10-18 15:47:50.781855838 +0200
@@ -100,6 +100,15 @@ (define_special_predicate "ext_register_
(match_test "GET_MODE (op) == SImode")
(match_test "GET_MODE (op) == HImode"
 
+;; Match a DI, SI, HI or QImode nonimmediate_operand.
+(define_special_predicate "int_nonimmediate_operand"
+  (and (match_operand 0 "nonimmediate_operand")
+   (ior (and (match_test "TARGET_64BIT")
+(match_test "GET_MODE (op) == DImode"))
+   (match_test "GET_MODE (op) == SImode")
+   (match_test "GET_MODE (op) == HImode")
+   (match_test "GET_MODE (op) == QImode"
+
 ;; Match register operands, but include memory operands for TARGET_SSE_MATH.
 (define_predicate "register_ssemem_operand"
   (if_then_else
--- gcc/config/i386/i386.md.jj  2019-09-20 12:25:48.0 +0200
+++ gcc/config/i386/i386.md 2019-10-18 15:52:22.697717013 +0200
@@ -6843,6 +6843,228 @@ (define_insn "*addsi3_zext_cc_overflow_2
   [(set_attr "type" "alu")
(set_attr "mode" "SI")])
 
+;; x == 0 with zero flag test can be done also as x < 1U with carry flag
+;; test, where the latter is preferrable if we have some carry consuming
+;; instruction.
+;; For x != 0, we need to use x < 1U with negation of carry, i.e.
+;; + (1 - CF).
+(define_insn_and_split "*add3_eq"
+  [(set (match_operand:SWI 0 "nonimmediate_operand")
+   (plus:SWI
+ (plus:SWI
+   (eq:SWI (match_operand 3 "int_nonimmediate_operand") (const_int 0))
+   (match_operand:SWI 1 "nonimmediate_operand"))
+ (match_operand:SWI 2 "")))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_binary_operator_ok (PLUS, mode, operands)
+   && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(set (reg:CC FLAGS_REG)
+   (compare:CC (match_dup 3) (const_int 1)))
+   (parallel [(set (match_dup 0)
+  (plus:SWI
+(plus:SWI (ltu:SWI (reg:CC FLAGS_REG) (const_int 0))
+  (match_dup 1))
+(match_dup 2)))
+ (clobber (reg:CC FLAGS_REG))])])
+
+(define_insn_and_split "*add3_ne"
+  [(set (match_operand:SWI 0 "nonimmediate_operand")
+   (plus:SWI
+ (plus:SWI
+   (ne:SWI (match_operand 3 "int_nonimmediate_operand") (const_int 0))
+   (match_operand:SWI 1 "nonimmediate_operand"))
+ (match_operand:SWI 2 "")))
+   (clobber (reg:CC FLAGS_REG))]
+  "CONST_INT_P (operands[2])
+   && (mode != DImode
+   || INTVAL (operands[2]) != HOST_WIDE_INT_C (-0x8000))
+   && ix86_binary_operator_ok (PLUS, mode, operands)
+   && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(set (reg:CC FLAGS_REG)
+   (compare:CC (match_dup 3) (const_int 1)))
+   (parallel [(set (match_dup 0)
+  (minus:SWI
+(minus:SWI (match_dup 1)
+   (ltu:SWI (reg:CC FLAGS_REG) (const_int 0)))
+(match_dup 2)))
+ (clobber (reg:CC FLAGS_REG))])]
+{
+  operands[2] = gen_int_mode (~INTVAL (operands[2]),
+ mode == DImode ? SImode : mode);
+})
+
+(define_insn_and_split "*add3_eq_0"
+  [(set (match_operand:SWI 0 "nonimmediate_operand")
+   (plus:SWI
+ (eq:SWI (match_operand 2 "int_nonimmediate_operand") (const_int 0))
+ (match_operand:SWI 1 "")))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_unary_operator_ok (PLUS, mode, operands)
+   && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(set (reg:CC FLAGS_REG)
+   (compare:CC (match_dup 2) (const_int 1)))
+   (parallel [(set (match_dup 0)
+  (plus:SWI (ltu:SWI (reg:CC FLAGS_REG) (const_int 0))
+(match_dup 1)))
+ (clobber (reg:CC FLAGS_REG))])]
+{
+  if (!nonimmediate_operand (operands[1], mode))
+operands[1] = force_reg (mode, operands[1]);
+})
+
+(define_insn_and_split "*add3_ne_0"
+  [(set (match_operand:SWI 0 "nonimmediate_operand")
+   (plus:SWI
+ (ne:SWI (match_operand 2 "int_nonimmediate_operand") 

Re: [ C++ ] [ PATCH ] [ RFC ] p1301 - [[nodiscard("should have a reason")]]

2019-10-18 Thread Jason Merrill

On 10/18/19 1:54 AM, JeanHeyd Meneide wrote:

... And I am very tired and forgot to attach the patch. Again. Sorry...!

On Fri, Oct 18, 2019 at 1:54 AM JeanHeyd Meneide
 wrote:


Dear Jason,

On Thu, Oct 17, 2019 at 3:51 PM Jason Merrill  wrote:

  > FAIL: g++.dg/cpp0x/gen-attrs-67.C  -std=c++11  (test for errors, line 8)
  > FAIL: g++.dg/cpp1z/feat-cxx1z.C  -std=gnu++17 (test for excess errors)
  > FAIL: g++.dg/cpp1z/nodiscard4.C  -std=c++11 (test for excess errors)
  > FAIL: g++.dg/cpp1z/nodiscard4.C  -std=c++11  (test for warnings, line 12)
  > FAIL: g++.dg/cpp1z/nodiscard4.C  -std=c++11  (test for warnings, line 13)
  > FAIL: g++.dg/cpp2a/feat-cxx2a.C   (test for excess errors)


  Sorry about that! I implemented a bit of a better warning to
cover gen-attrs-67, and bumped the feature test macro value checks in
the feat tests. The rest should be fine now too.

  Let me know if anything else seems off!



+ "%qE attribute%<'%>s argument list is empty",


Using %<'%> results in "attribute'''s" in the output, not what we want 
here; that's only for when you are trying to give a diagnostic about an 
apostrophe in the source.  You can use %' for an apostrophe, but I think 
we might as well drop the 's entirely.  Here's what I'm committing.


Thanks a lot for the patch!
commit 073503af93e3409553fc32107cbcd316ddabc7c8
Author: JeanHeyd Meneide 
Date:   Fri Oct 18 01:54:47 2019 -0400

Implement C++20 P1301 [[nodiscard("should have a reason")]].

2019-10-17  JeanHeyd Meneide  

gcc/
* escaped_string.h (escaped_string): New header.
* tree.c (escaped_string): Remove escaped_string class.

gcc/c-family
* c-lex.c (c_common_has_attribute): Update nodiscard value.

gcc/cp/
* tree.c (handle_nodiscard_attribute) Added C++2a nodiscard
string message.
(std_attribute_table) Increase nodiscard argument handling
max_length from 0 to 1.
* parser.c (cp_parser_check_std_attribute): Add requirement
that nodiscard only be seen once in attribute-list.
(cp_parser_std_attribute): Check that empty parenthesis lists are
not specified for attributes that have max_length > 0 (e.g.
[[attr()]]).
* cvt.c (maybe_warn_nodiscard): Add nodiscard message to
output, if applicable.
(convert_to_void): Allow constructors to be nodiscard-able (P1771).

gcc/testsuite/g++.dg/cpp0x
* gen-attrs-67.C: Test new error message for empty-parenthesis-list.

gcc/testsuite/g++.dg/cpp2a
* nodiscard-construct.C: New test.
* nodiscard-once.C: New test.
* nodiscard-reason-nonstring.C: New test.
* nodiscard-reason-only-one.C: New test.
* nodiscard-reason.C: New test.

Reviewed-by: Jason Merrill 

diff --git a/gcc/escaped_string.h b/gcc/escaped_string.h
new file mode 100644
index 000..b83e1281f27
--- /dev/null
+++ b/gcc/escaped_string.h
@@ -0,0 +1,43 @@
+/* Shared escaped string class.
+   Copyright (C) 1999-2019 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+.  */
+
+#ifndef GCC_ESCAPED_STRING_H
+#define GCC_ESCAPED_STRING_H
+
+#include 
+
+/* A class to handle converting a string that might contain
+   control characters, (eg newline, form-feed, etc), into one
+   in which contains escape sequences instead.  */
+
+class escaped_string
+{
+ public:
+  escaped_string () { m_owned = false; m_str = NULL; };
+  ~escaped_string () { if (m_owned) free (m_str); }
+  operator const char *() const { return m_str; }
+  void escape (const char *);
+ private:
+  escaped_string(const escaped_string&) {}
+  escaped_string& operator=(const escaped_string&) { return *this; }
+  char *m_str;
+  bool  m_owned;
+};
+
+#endif /* ! GCC_ESCAPED_STRING_H */
diff --git a/gcc/c-family/c-lex.c b/gcc/c-family/c-lex.c
index e3c602fbb8d..fb05b5f8af0 100644
--- a/gcc/c-family/c-lex.c
+++ b/gcc/c-family/c-lex.c
@@ -353,13 +353,14 @@ c_common_has_attribute (cpp_reader *pfile)
 	  else if (is_attribute_p ("deprecated", attr_name))
 		result = 201309;
 	  else if (is_attribute_p ("maybe_unused", attr_name)
-		   || is_attribute_p ("nodiscard", attr_name)
 		   || is_attribute_p ("fallthrough", attr_name))
 		result = 201603;
 	  

Re: RFA [PATCH] * lock-and-run.sh: Check for process existence rather than timeout.

2019-10-18 Thread Alexandre Oliva
Hello, Jason,

On Oct 14, 2019, Jason Merrill  wrote:

> Alex, you had a lot of helpful comments when I first wrote this, any thoughts
> on this revision?

I think the check of the pid file could be made slightly simpler and
cheaper if we created it using:

   echo $$ > $lockdir/pidT && mv $lockdir/pidT $lockdir/pid

instead of

> +touch $lockdir/$$



> + pid="`(cd $lockdir; echo *)`"

The ""s are implicit in a shell assignment, though there really
shouldn't be more than one PID-named file in the dir.  With the change
suggested above, this would become

pid=`cat $lockdir/pid 2>/dev/null`

There's a slight possibility of hitting this right between the creation
of the dir and the creation of the pid file, thus the 2>/dev/null.

> + if ps "$pid" >/dev/null; then

could be tested with much lower overhead:

if test -z "$pid" || kill -0 $pid ; then

though it might make sense to have a different test and error message
for the case of the absent pid file.

We might also wish to use different lock-breaking logic for that case,
too, e.g. checking that the timestamp of the dir didn't change by
comparing `ls -ld $lockdir` with what we got 30 seconds before.  If it
changed or the output is now empty, we just lost the race again.

It's unlikely that the dir would remain unchanged for several seconds
without the pid file, so if we get the same timestamp after 30 seconds,
it's likely that something went wrong with the lock holder, though it's
not impossible to imagine a scenario in which the lock program that just
succeeded in creating the dir got stopped (^Z) or killed-9 just before
creating the PID file.


Even then, maybe breaking the lock is not such a great idea in
general...

Though mkdir is an operation that forces a synchronization, reading a
file without a filesystem lock isn't.  The rename alleviates that a bit,
but it's not entirely unreasonable for an attempt to read the file to
cache the absence of the file and not notice a creation shortly
afterward.  This would be a lot more of an issue in case of contention
for the lock between different clients of a shared networked filesystem,
though we might imagine highly parallel systems to eventually hit such
issues as well.

But just the possibility of contention across a shared network
filesystem would give me pause, out of realizing that checking for a
local process with the same PID would do no good.  And then, kill -0
would not succeed if the lock holder was started by a different user,
unlike ps.

What if we printed an error message suggesting the command to clean up,
and then errored out, instead of retrying forever or breaking the lock
and proceeding?  Several programs that rely on lock files (git and svn
come to mind) seem to be taking such an approach these days, presumably
because of all the difficulties in automating the double-checking in all
potential scenarios.

-- 
Alexandre Oliva, freedom fighter  he/him   https://FSFLA.org/blogs/lxo
Be the change, be Free!FSF VP & FSF Latin America board member
GNU Toolchain EngineerFree Software Evangelist
Hay que enGNUrecerse, pero sin perder la terGNUra jamás - Che GNUevara


Re: [committed] correct strcmp() == 0 result for unknown strings (PR 92157)

2019-10-18 Thread Jeff Law
On 10/18/19 4:27 PM, Martin Sebor wrote:
> The optimization to fold (strcmp() == 0) results involving
> arrays/strings of unequal size/length has a bug where it is
> unprepared for the compute_string_length() function to return
> an invalid length as an indication that the length is unknown.
> This leads to some strings that are unequal being considered
> equal.
> 
> The attached patch corrects this handling.  I have committed
> it in r277194 with Jeff's okay.
And just for the record, this was caught by the Fedora tester I spoke
about at Cauldron.  I'm spinning the entire distro against our weekly
snapshots again.

Most of the issues have been package level problems (missing #includes,
narrowing conversions, fortran argument passing, assumptions about the
inliner, etc).   Those are largely under control, leading to...

My focus now is on chasing down the codegen issues and I've contacted a
few folks already with problems in their code.  There'll certainly be
more over time.  I'm starting with the easier to understand problems in
the hopes the tough ones (python & perl interpreters) get fixed along
the way.

The nastiest problem so far is the improved tail call opts from Jakub in
May.  AFAICT they're doing the right thing in the caller which is a good
indication that something is going wrong in the indirect sibling callee
(ugh).

jeff


[committed] correct strcmp() == 0 result for unknown strings (PR 92157)

2019-10-18 Thread Martin Sebor

The optimization to fold (strcmp() == 0) results involving
arrays/strings of unequal size/length has a bug where it is
unprepared for the compute_string_length() function to return
an invalid length as an indication that the length is unknown.
This leads to some strings that are unequal being considered
equal.

The attached patch corrects this handling.  I have committed
it in r277194 with Jeff's okay.

Martin

PR tree-optimization/92157 - incorrect strcmp() == 0 result for unknown strings

gcc/testsuite/ChangeLog:

	PR tree-optimization/92157
	* gcc.dg/strlenopt-69.c: Disable test failing due to PR 92155.
	* gcc.dg/strlenopt-87.c: New test.

gcc/ChangeLog:

	PR tree-optimization/92157
	* tree-ssa-strlen.c (handle_builtin_string_cmp): Be prepared for
	compute_string_length to return a negative result.


Index: gcc/testsuite/gcc.dg/strlenopt-69.c
===
--- gcc/testsuite/gcc.dg/strlenopt-69.c	(revision 277156)
+++ gcc/testsuite/gcc.dg/strlenopt-69.c	(working copy)
@@ -66,11 +66,14 @@ void test_empty_string (void)
   b4[2] = '\0';
   A (0 == strcmp ([2], [2]));
 
+#if 0
+  /* The following isn't handled yet due to PR 92155.  */
   clobber (a4, b4);
 
   memset (a4, 0, sizeof a4);
   memset (b4, 0, sizeof b4);
   A (0 == strcmp (a4, b4));
+#endif
 }
 
 /* Verify that comparison of dynamically created strings with unknown
Index: gcc/testsuite/gcc.dg/strlenopt-87.c
===
--- gcc/testsuite/gcc.dg/strlenopt-87.c	(nonexistent)
+++ gcc/testsuite/gcc.dg/strlenopt-87.c	(working copy)
@@ -0,0 +1,105 @@
+/* PR tree-optimization/92157 - incorrect strcmp() == 0 result for unknown
+   strings​
+   { dg-do run }
+   { dg-options "-O2 -Wall" } */
+
+#include "strlenopt.h"
+
+
+char a2[2], a3[3];
+
+
+static inline __attribute__ ((always_inline)) int
+verify_not_equal (const char *s, const char *t, int x)
+{
+  int n = x < 0 ? strlen (s) : 0 < x ? strlen (t) : strlen (s) + strlen (t);
+
+  if (strcmp (t, s) == 0)
+abort ();
+
+  return n;
+}
+
+__attribute__ ((noipa)) int test_a2_s (const char *s)
+{
+  return verify_not_equal (a2, s, 0);
+}
+
+__attribute__ ((noipa)) int test_a2_a3 (void)
+{
+  return verify_not_equal (a2, a3, 0);
+}
+
+__attribute__ ((noipa)) int test_a3_a2 (void)
+{
+  return verify_not_equal (a3, a2, 0);
+}
+
+__attribute__ ((noipa)) int test_s_a2 (const char *s)
+{
+  return verify_not_equal (s, a2, 0);
+}
+
+
+__attribute__ ((noipa)) int test_a2_s_1 (const char *s)
+{
+  return verify_not_equal (a2, s, -1);
+}
+
+__attribute__ ((noipa)) int test_a2_a3_1 (void)
+{
+  return verify_not_equal (a2, a3, -1);
+}
+
+__attribute__ ((noipa)) int test_a3_a2_1 (void)
+{
+  return verify_not_equal (a3, a2, -1);
+}
+
+__attribute__ ((noipa)) int test_s_a2_1 (const char *s)
+{
+  return verify_not_equal (s, a2, -1);
+}
+
+
+__attribute__ ((noipa)) int test_a2_s_2 (const char *s)
+{
+  return verify_not_equal (a2, s, +1);
+}
+
+__attribute__ ((noipa)) int test_a2_a3_2 (void)
+{
+  return verify_not_equal (a2, a3, +1);
+}
+
+__attribute__ ((noipa)) int test_a3_a2_2 (void)
+{
+  return verify_not_equal (a3, a2, +1);
+}
+
+__attribute__ ((noipa)) int test_s_a2_2 (const char *s)
+{
+  return verify_not_equal (s, a2, +1);
+}
+
+int main (void)
+{
+  a2[0] = '1';
+  a3[0] = '1';
+  a3[0] = '2';
+
+  test_a2_s ("");
+  test_a2_a3 ();
+  test_a3_a2 ();
+  test_s_a2 ("");
+
+  test_a2_s_1 ("");
+  test_a2_a3_1 ();
+  test_a3_a2_1 ();
+  test_s_a2_1 ("");
+
+  test_a2_s_2 ("");
+  test_a2_a3_2 ();
+  test_a3_a2_2 ();
+  test_s_a2_2 ("");
+}
Index: gcc/tree-ssa-strlen.c
===
--- gcc/tree-ssa-strlen.c	(revision 277156)
+++ gcc/tree-ssa-strlen.c	(working copy)
@@ -3842,7 +3842,7 @@ handle_builtin_string_cmp (gimple_stmt_iterator *g
   HOST_WIDE_INT arysiz1 = -1, arysiz2 = -1;
 
   if (idx1)
-cstlen1 = compute_string_length (idx1) + 1;
+cstlen1 = compute_string_length (idx1);
   else
 arysiz1 = determine_min_objsize (arg1);
 
@@ -3853,7 +3853,7 @@ handle_builtin_string_cmp (gimple_stmt_iterator *g
 
   /* Repeat for the second argument.  */
   if (idx2)
-cstlen2 = compute_string_length (idx2) + 1;
+cstlen2 = compute_string_length (idx2);
   else
 arysiz2 = determine_min_objsize (arg2);
 
@@ -3860,6 +3860,14 @@ handle_builtin_string_cmp (gimple_stmt_iterator *g
   if (cstlen2 < 0 && arysiz2 < 0)
 return false;
 
+  if (cstlen1 < 0 && cstlen2 < 0)
+return false;
+
+  if (cstlen1 >= 0)
+++cstlen1;
+  if (cstlen2 >= 0)
+++cstlen2;
+
   /* The exact number of characters to compare.  */
   HOST_WIDE_INT cmpsiz = bound < 0 ? cstlen1 < 0 ? cstlen2 : cstlen1 : bound;
   /* The size of the array in which the unknown string is stored.  */


[testsuite] Add test for PR91532

2019-10-18 Thread Prathamesh Kulkarni
Hi Richard,
Sorry for not adding the test in PR91532 fix.
Is the attached patch OK to commit ?

Thanks,
Prathamesh
2019-10-18  Prathamesh Kulkarni  

PR tree-optimization/91532
testsuite/
* gcc.target/aarch64/sve/fmla_2.c: Add dg-scan check for deleted store.

diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fmla_2.c 
b/gcc/testsuite/gcc.target/aarch64/sve/fmla_2.c
index 5c04bcdb3f5..bebb073d1f8 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/fmla_2.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/fmla_2.c
@@ -1,4 +1,4 @@
-/* { dg-options "-O3" } */
+/* { dg-options "-O3 -fdump-tree-ifcvt-details" } */
 
 #include 
 
@@ -15,5 +15,6 @@ f (double *restrict a, double *restrict b, double *restrict c,
 }
 }
 
+/* { dg-final { scan-tree-dump-times "Deleted dead store" 1 "ifcvt" } } */
 /* { dg-final { scan-assembler-times {\tfmla\tz[0-9]+\.d, p[0-7]/m, 
z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */
 /* { dg-final { scan-assembler-not {\tfmad\t} } } */


[PATCH 14/29] [arm] Early split simple DImode equality comparisons

2019-10-18 Thread Richard Earnshaw

This is the first step of early splitting all the DImode comparison
operations.  We start by factoring the DImode handling out of
arm_gen_compare_reg into its own function.

Simple DImode equality comparisions (such as equality with zero, or
equality with a constant that is zero in one of the two word values
that it comprises) can be done using a single subtract followed by an
ORRS instruction.  This avoids the need for conditional execution.

For example, (r0 != 5) can be written as

SUB Rt, R0, #5
ORRSRt, Rt, R1

The ORRS is now expanded using an SImode pattern that already exists
in the MD file and this gives the register allocator more freedom to
select registers (consecutive pairs are no-longer required).
Furthermore, we can then delete the arm_cmpdi_zero pattern as it is
no-longer required.  We use SUB for the value adjustment as this has a
generally more flexible range of immediates than XOR and what's more
has the opportunity to be relaxed in thumb2 to a 16-bit SUBS
instruction.

* config/arm/arm.c (arm_select_cc_mode): For DImode equality tests
return CC_Zmode if comparing against a constant where one word is
zero.
(arm_gen_compare_reg): Split DImode handling to ...
(arm_gen_dicompare_reg): ... here.  Handle equality comparisons
against simple constants.
* config/arm/arm.md (arm_cmpdi_zero): Delete pattern.
---
 gcc/config/arm/arm.c  | 87 +--
 gcc/config/arm/arm.md | 11 --
 2 files changed, 68 insertions(+), 30 deletions(-)

diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index e33b6b14d28..64367b42332 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -15350,8 +15350,14 @@ arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
 	case EQ:
 	case NE:
 	  /* A DImode comparison against zero can be implemented by
-	 or'ing the two halves together.  */
-	  if (y == const0_rtx)
+	 or'ing the two halves together.  We can also handle
+	 immediates where one word of that value is zero by
+	 subtracting the non-zero word from the corresponding word
+	 in the other register and then ORRing it with the other
+	 word.  */
+	  if (CONST_INT_P (y)
+	  && ((UINTVAL (y) & 0x) == 0
+		  || (UINTVAL (y) >> 32) == 0))
 	return CC_Zmode;
 
 	  /* We can do an equality test in three Thumb instructions.  */
@@ -15393,37 +15399,64 @@ arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
   return CCmode;
 }
 
-/* X and Y are two things to compare using CODE.  Emit the compare insn and
-   return the rtx for register 0 in the proper mode.  FP means this is a
-   floating point compare: I don't think that it is needed on the arm.  */
-rtx
-arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y, rtx scratch)
+/* X and Y are two (DImode) things to compare for the condition CODE.  Emit
+   the sequence of instructions needed to generate a suitable condition
+   code register.  Return the CC register result.  */
+static rtx
+arm_gen_dicompare_reg (rtx_code code, rtx x, rtx y, rtx scratch)
 {
-  machine_mode mode;
-  rtx cc_reg;
-  int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
+  /* We don't currently handle DImode in thumb1, but rely on libgcc.  */
+  gcc_assert (TARGET_32BIT);
 
   /* We might have X as a constant, Y as a register because of the predicates
  used for cmpdi.  If so, force X to a register here.  */
-  if (dimode_comparison && !REG_P (x))
+  if (!REG_P (x))
 x = force_reg (DImode, x);
 
-  mode = SELECT_CC_MODE (code, x, y);
-  cc_reg = gen_rtx_REG (mode, CC_REGNUM);
+  machine_mode mode = SELECT_CC_MODE (code, x, y);
+  rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
 
-  if (dimode_comparison
-  && mode != CC_CZmode)
+  if (mode != CC_CZmode)
 {
   rtx clobber, set;
 
   /* To compare two non-zero values for equality, XOR them and
 	 then compare against zero.  Not used for ARM mode; there
 	 CC_CZmode is cheaper.  */
-  if (mode == CC_Zmode && y != const0_rtx)
+  if (mode == CC_Zmode)
 	{
-	  gcc_assert (!reload_completed);
-	  x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
-	  y = const0_rtx;
+	  mode = CC_NOOVmode;
+	  PUT_MODE (cc_reg, mode);
+	  if (y != const0_rtx)
+	{
+	  gcc_assert (CONST_INT_P (y));
+	  rtx xlo, xhi, ylo, yhi;
+	  arm_decompose_di_binop (x, y, , , , );
+	  if (!scratch)
+		scratch = gen_reg_rtx (SImode);
+	  if (ylo == const0_rtx)
+		{
+		  yhi = GEN_INT (-INTVAL(yhi));
+		  if (!arm_add_operand (yhi, SImode))
+		yhi = force_reg (SImode, yhi);
+		  emit_insn (gen_addsi3 (scratch, xhi, yhi));
+		  y = xlo;
+		}
+	  else
+		{
+		  gcc_assert (yhi == const0_rtx);
+		  ylo = GEN_INT (-INTVAL(ylo));
+		  if (!arm_add_operand (ylo, SImode))
+		ylo = force_reg (SImode, ylo);
+		  emit_insn (gen_addsi3 (scratch, xlo, ylo));
+		  y = xhi;
+		}
+	  x = gen_rtx_IOR (SImode, scratch, y);
+	  y = 

[PATCH 29/29] [arm] Fix testsuite nit when compiling for thumb2

2019-10-18 Thread Richard Earnshaw

In thumb2 we now generate a NEGS instruction rather than RSBS, so this
test needs updating.

* gcc.target/arm/negdi-3.c: Update expected output to allow NEGS.
---
 gcc/testsuite/gcc.target/arm/negdi-3.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/testsuite/gcc.target/arm/negdi-3.c b/gcc/testsuite/gcc.target/arm/negdi-3.c
index 76ddf49fc0d..1520e9c65df 100644
--- a/gcc/testsuite/gcc.target/arm/negdi-3.c
+++ b/gcc/testsuite/gcc.target/arm/negdi-3.c
@@ -8,10 +8,10 @@ signed long long negdi_zero_extendsidi (unsigned int x)
 }
 /*
 Expected output:
-rsbsr0, r0, #0
+rsbsr0, r0, #0 (arm) | negs	r0, r0 (thumb2)
 sbc r1, r1, r1
 */
-/* { dg-final { scan-assembler-times "rsb" 1 } } */
+/* { dg-final { scan-assembler-times "rsbs|negs" 1 } } */
 /* { dg-final { scan-assembler-times "sbc" 1 } } */
 /* { dg-final { scan-assembler-times "mov" 0 } } */
 /* { dg-final { scan-assembler-times "rsc" 0 } } */


[PATCH 07/29] [arm] Remove redundant DImode subtract patterns

2019-10-18 Thread Richard Earnshaw

Now that we early split DImode subtracts, the patterns to emit the
original and to match zero-extend with subtraction or negation are
no-longer useful.

* config/arm/arm.md (arm_subdi3): Delete insn.
(zextendsidi_negsi, negdi_extendsidi): Delete insn_and_split.
---
 gcc/config/arm/arm.md | 102 --
 1 file changed, 102 deletions(-)

diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index 99d931525f8..f597a277c17 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -1161,18 +1161,6 @@ (define_expand "subdi3"
   "
 )
 
-(define_insn "*arm_subdi3"
-  [(set (match_operand:DI 0 "arm_general_register_operand" "=,,")
-	(minus:DI (match_operand:DI 1 "arm_general_register_operand" "0,r,0")
-		  (match_operand:DI 2 "arm_general_register_operand" "r,0,0")))
-   (clobber (reg:CC CC_REGNUM))]
-  "TARGET_32BIT"
-  "subs\\t%Q0, %Q1, %Q2\;sbc\\t%R0, %R1, %R2"
-  [(set_attr "conds" "clob")
-   (set_attr "length" "8")
-   (set_attr "type" "multiple")]
-)
-
 (define_expand "subsi3"
   [(set (match_operand:SI   0 "s_register_operand")
 	(minus:SI (match_operand:SI 1 "reg_or_int_operand")
@@ -3866,96 +3854,6 @@ (define_expand "negdf2"
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
   "")
 
-(define_insn_and_split "*zextendsidi_negsi"
-  [(set (match_operand:DI 0 "s_register_operand" "=r")
-(zero_extend:DI (neg:SI (match_operand:SI 1 "s_register_operand" "r"]
-   "TARGET_32BIT"
-   "#"
-   ""
-   [(set (match_dup 2)
- (neg:SI (match_dup 1)))
-(set (match_dup 3)
- (const_int 0))]
-   {
-  operands[2] = gen_lowpart (SImode, operands[0]);
-  operands[3] = gen_highpart (SImode, operands[0]);
-   }
- [(set_attr "length" "8")
-  (set_attr "type" "multiple")]
-)
-
-;; Negate an extended 32-bit value.
-(define_insn_and_split "*negdi_extendsidi"
-  [(set (match_operand:DI 0 "s_register_operand" "=l,r")
-	(neg:DI (sign_extend:DI
-		 (match_operand:SI 1 "s_register_operand" "l,r"
-   (clobber (reg:CC CC_REGNUM))]
-  "TARGET_32BIT"
-  "#"
-  "&& reload_completed"
-  [(const_int 0)]
-  {
-rtx low = gen_lowpart (SImode, operands[0]);
-rtx high = gen_highpart (SImode, operands[0]);
-
-if (reg_overlap_mentioned_p (low, operands[1]))
-  {
-	/* Input overlaps the low word of the output.  Use:
-		asr	Rhi, Rin, #31
-		rsbs	Rlo, Rin, #0
-		rsc	Rhi, Rhi, #0 (thumb2: sbc Rhi, Rhi, Rhi, lsl #1).  */
-	rtx cc_reg = gen_rtx_REG (CCmode, CC_REGNUM);
-
-	emit_insn (gen_rtx_SET (high,
-gen_rtx_ASHIFTRT (SImode, operands[1],
-		  GEN_INT (31;
-
-	emit_insn (gen_subsi3_compare (low, const0_rtx, operands[1]));
-	if (TARGET_ARM)
-	  emit_insn (gen_rtx_SET (high,
-  gen_rtx_MINUS (SImode,
-		 gen_rtx_MINUS (SImode,
-const0_rtx,
-high),
-		 gen_rtx_LTU (SImode,
-			  cc_reg,
-			  const0_rtx;
-	else
-	  {
-	rtx two_x = gen_rtx_ASHIFT (SImode, high, GEN_INT (1));
-	emit_insn (gen_rtx_SET (high,
-gen_rtx_MINUS (SImode,
-		   gen_rtx_MINUS (SImode,
-  high,
-  two_x),
-		   gen_rtx_LTU (SImode,
-cc_reg,
-const0_rtx;
-	  }
-  }
-else
-  {
-	/* No overlap, or overlap on high word.  Use:
-		rsb	Rlo, Rin, #0
-		bic	Rhi, Rlo, Rin
-		asr	Rhi, Rhi, #31
-	   Flags not needed for this sequence.  */
-	emit_insn (gen_rtx_SET (low, gen_rtx_NEG (SImode, operands[1])));
-	emit_insn (gen_rtx_SET (high,
-gen_rtx_AND (SImode,
-	 gen_rtx_NOT (SImode, operands[1]),
-	 low)));
-	emit_insn (gen_rtx_SET (high,
-gen_rtx_ASHIFTRT (SImode, high,
-		  GEN_INT (31;
-  }
-DONE;
-  }
-  [(set_attr "length" "12")
-   (set_attr "arch" "t2,*")
-   (set_attr "type" "multiple")]
-)
-
 ;; abssi2 doesn't really clobber the condition codes if a different register
 ;; is being set.  To keep things simple, assume during rtl manipulations that
 ;; it does, but tell the final scan operator the truth.  Similarly for


[PATCH 21/29] [arm] Improve code generation for addvsi4.

2019-10-18 Thread Richard Earnshaw

Similar to the improvements for uaddvsi4, this patch improves the code
generation for addvsi4 to handle immediates and to add alternatives
that better target thumb2.  To do this we separate out the expansion
of uaddvsi4 from that of uaddvdi4 and then add an additional pattern
to handle constants.  Also, while doing this I've fixed the incorrect
usage of NE instead of COMPARE in the generated RTL.

* config/arm/arm.md (addv4): Delete.
(addvsi4): New pattern.  Handle immediate values that the architecture
supports.
(addvdi4): New pattern.
(addsi3_compareV): Rename to ...
(addsi3_compareV_reg): ... this.  Add constraints for thumb2 variants
and use COMPARE rather than NE.
(addsi3_compareV_imm): New pattern.
* config/arm/arm.c (arm_select_cc_mode): Return CC_Vmode for
a signed-overflow check.
---
 gcc/config/arm/arm.c  |  8 ++
 gcc/config/arm/arm.md | 63 ---
 2 files changed, 61 insertions(+), 10 deletions(-)

diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index eebbdc3d9c2..638c82df25f 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -15411,6 +15411,14 @@ arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
 	  || arm_borrow_operation (y, DImode)))
 return CC_Bmode;
 
+  if (GET_MODE (x) == DImode
+  && (op == EQ || op == NE)
+  && GET_CODE (x) == PLUS
+  && GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
+  && GET_CODE (y) == SIGN_EXTEND
+  && GET_CODE (XEXP (y, 0)) == PLUS)
+return CC_Vmode;
+
   if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
 return GET_MODE (x);
 
diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index 9f0e43571fd..b5214c79c35 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -488,14 +488,30 @@ (define_expand "adddi3"
   "
 )
 
-(define_expand "addv4"
-  [(match_operand:SIDI 0 "register_operand")
-   (match_operand:SIDI 1 "register_operand")
-   (match_operand:SIDI 2 "register_operand")
+(define_expand "addvsi4"
+  [(match_operand:SI 0 "s_register_operand")
+   (match_operand:SI 1 "s_register_operand")
+   (match_operand:SI 2 "arm_add_operand")
(match_operand 3 "")]
   "TARGET_32BIT"
 {
-  emit_insn (gen_add3_compareV (operands[0], operands[1], operands[2]));
+  if (CONST_INT_P (operands[2]))
+emit_insn (gen_addsi3_compareV_imm (operands[0], operands[1], operands[2]));
+  else
+emit_insn (gen_addsi3_compareV_reg (operands[0], operands[1], operands[2]));
+  arm_gen_unlikely_cbranch (NE, CC_Vmode, operands[3]);
+
+  DONE;
+})
+
+(define_expand "addvdi4"
+  [(match_operand:DI 0 "register_operand")
+   (match_operand:DI 1 "register_operand")
+   (match_operand:DI 2 "register_operand")
+   (match_operand 3 "")]
+  "TARGET_32BIT"
+{
+  emit_insn (gen_adddi3_compareV (operands[0], operands[1], operands[2]));
   arm_gen_unlikely_cbranch (NE, CC_Vmode, operands[3]);
 
   DONE;
@@ -770,21 +786,48 @@ (define_insn "adddi3_compareV"
(set_attr "type" "multiple")]
 )
 
-(define_insn "addsi3_compareV"
+(define_insn "addsi3_compareV_reg"
   [(set (reg:CC_V CC_REGNUM)
-	(ne:CC_V
+	(compare:CC_V
 	  (plus:DI
-	(sign_extend:DI (match_operand:SI 1 "register_operand" "r"))
-	(sign_extend:DI (match_operand:SI 2 "register_operand" "r")))
+	(sign_extend:DI (match_operand:SI 1 "register_operand" "%l,0,r"))
+	(sign_extend:DI (match_operand:SI 2 "register_operand" "l,r,r")))
 	  (sign_extend:DI (plus:SI (match_dup 1) (match_dup 2)
-   (set (match_operand:SI 0 "register_operand" "=r")
+   (set (match_operand:SI 0 "register_operand" "=l,r,r")
 	(plus:SI (match_dup 1) (match_dup 2)))]
   "TARGET_32BIT"
   "adds%?\\t%0, %1, %2"
   [(set_attr "conds" "set")
+   (set_attr "arch" "t2,t2,*")
+   (set_attr "length" "2,2,4")
(set_attr "type" "alus_sreg")]
 )
 
+(define_insn "addsi3_compareV_imm"
+  [(set (reg:CC_V CC_REGNUM)
+	(compare:CC_V
+	  (plus:DI
+	(sign_extend:DI
+	 (match_operand:SI 1 "register_operand" "l,0,l,0,r,r"))
+	(match_operand 2 "arm_addimm_operand" "Pd,Py,Px,Pw,I,L"))
+	  (sign_extend:DI (plus:SI (match_dup 1) (match_dup 2)
+   (set (match_operand:SI 0 "register_operand" "=l,l,l,l,r,r")
+	(plus:SI (match_dup 1) (match_dup 2)))]
+  "TARGET_32BIT
+   && INTVAL (operands[2]) == ARM_SIGN_EXTEND (INTVAL (operands[2]))"
+  "@
+   adds%?\\t%0, %1, %2
+   adds%?\\t%0, %0, %2
+   subs%?\\t%0, %1, #%n2
+   subs%?\\t%0, %0, #%n2
+   adds%?\\t%0, %1, %2
+   subs%?\\t%0, %1, #%n2"
+  [(set_attr "conds" "set")
+   (set_attr "arch" "t2,t2,t2,t2,*,*")
+   (set_attr "length" "2,2,2,2,4,4")
+   (set_attr "type" "alus_imm")]
+)
+
 (define_insn "addsi3_compare0"
   [(set (reg:CC_NOOV CC_REGNUM)
 	(compare:CC_NOOV


[PATCH 06/29] [arm] Early split subdi3

2019-10-18 Thread Richard Earnshaw

This patch adds early splitting of subdi3 so that the individual
operations can be seen by the optimizers, particuarly combine.  This
should allow us to do at least as good a job as previously, but with
far fewer patterns in the machine description.

This is just the initial patch to add the early splitting.  The
cleanups will follow later.

A special trick is used to handle the 'reverse subtract and compare'
where a register is subtracted from a constant.  The natural
comparison

(COMPARE (const) (reg))

is not canonical in this case and combine will never correctly
generate it (trying to swap the order of the operands.  To handle this
we write the comparison as

(COMPARE (NOT (reg)) (~const)),

which has the same result for EQ, NE, LTU, LEU, GTU and GEU, which are
all the cases we are really interested in here.

Finally, we delete the negdi2 pattern.  The generic expanders will use
our new subdi3 expander if this pattern is missing and that can handle
the negate case just fine.

* config/arm/arm-modes.def (CC_RSB): New CC mode.
* config/arm/predicates.md (arm_borrow_operation): Handle CC_RSBmode.
* config/arm/arm.c (arm_select_cc_mode): Detect when we should
return CC_RSBmode.
(maybe_get_arm_condition_code): Handle CC_RSBmode.
* config/arm/arm.md (subsi3_carryin): Make this pattern available to
expand.
(subdi3): Rewrite to early-expand the sub-operations.
(rsb_im_compare): New pattern.
(negdi2): Delete.
(negdi2_insn): Delete.
(arm_negsi2): Correct type attribute to alu_imm.
(negsi2_0compare): New insn pattern.
(negsi2_carryin): New insn pattern.
---
 gcc/config/arm/arm-modes.def |   4 +
 gcc/config/arm/arm.c |  23 ++
 gcc/config/arm/arm.md| 141 ---
 gcc/config/arm/predicates.md |   2 +-
 4 files changed, 141 insertions(+), 29 deletions(-)

diff --git a/gcc/config/arm/arm-modes.def b/gcc/config/arm/arm-modes.def
index 8f131c369b5..4fa7f1b43e5 100644
--- a/gcc/config/arm/arm-modes.def
+++ b/gcc/config/arm/arm-modes.def
@@ -36,6 +36,9 @@ ADJUST_FLOAT_FORMAT (HF, ((arm_fp16_format == ARM_FP16_FORMAT_ALTERNATIVE)
CC_Nmode should be used if only the N (sign) flag is set correctly
CC_CZmode should be used if only the C and Z flags are correct
(used for DImode unsigned comparisons).
+   CC_RSBmode should be used where the comparison is set by an RSB immediate,
+ or NEG instruction.  The form of the comparison for (const - reg) will
+ be (COMPARE (not (reg)) (~const)).
CC_NCVmode should be used if only the N, C, and V flags are correct
(used for DImode signed comparisons).
CCmode should be used otherwise.  */
@@ -45,6 +48,7 @@ CC_MODE (CC_Z);
 CC_MODE (CC_CZ);
 CC_MODE (CC_NCV);
 CC_MODE (CC_SWP);
+CC_MODE (CC_RSB);
 CC_MODE (CCFP);
 CC_MODE (CCFPE);
 CC_MODE (CC_DNE);
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index db18651346f..9a779e24cac 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -15214,6 +15214,17 @@ arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
 	  || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
 return CC_NOOVmode;
 
+  /* An unsigned comparison of ~reg with a const is really a special
+ canoncialization of compare (~const, reg), which is a reverse
+ subtract operation.  We may not get here if CONST is 0, but that
+ doesn't matter because ~0 isn't a valid immediate for RSB.  */
+  if (GET_MODE (x) == SImode
+  && GET_CODE (x) == NOT
+  && CONST_INT_P (y)
+  && (op == EQ || op == NE
+	  || op == LTU || op == LEU || op == GEU || op == GTU))
+return CC_RSBmode;
+
   if (GET_MODE (x) == QImode && (op == EQ || op == NE))
 return CC_Zmode;
 
@@ -23629,6 +23640,18 @@ maybe_get_arm_condition_code (rtx comparison)
 	default: return ARM_NV;
 	}
 
+case E_CC_RSBmode:
+  switch (comp_code)
+	{
+	case NE: return ARM_NE;
+	case EQ: return ARM_EQ;
+	case GEU: return ARM_CS;
+	case GTU: return ARM_HI;
+	case LEU: return ARM_LS;
+	case LTU: return ARM_CC;
+	default: return ARM_NV;
+	}
+
 case E_CCmode:
   switch (comp_code)
 	{
diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index fbe154a9873..99d931525f8 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -989,7 +989,7 @@ (define_insn "subsi3_compare1"
(set_attr "type" "alus_sreg")]
 )
 
-(define_insn "*subsi3_carryin"
+(define_insn "subsi3_carryin"
   [(set (match_operand:SI 0 "s_register_operand" "=r,r,r")
 	(minus:SI (minus:SI (match_operand:SI 1 "reg_or_int_operand" "r,I,Pz")
 			(match_operand:SI 2 "s_register_operand" "r,r,r"))
@@ -1094,12 +1094,72 @@ (define_expand "adddf3"
 (define_expand "subdi3"
  [(parallel
[(set (match_operand:DI0 "s_register_operand")
-	  (minus:DI (match_operand:DI 1 "s_register_operand")
+	  (minus:DI (match_operand:DI 1 "reg_or_int_operand")
 		(match_operand:DI 2 "s_register_operand")))
   

[PATCH 19/29] [arm] Handle immediate values in uaddvsi4

2019-10-18 Thread Richard Earnshaw

The uaddv patterns in the arm back-end do not currenty handle immediates
during expansion.  This patch adds this support for uaddvsi4.  It's really
a stepping-stone towards early expansion of uaddvdi4, but it complete and
a useful change in its own right.

Whilst making this change I also observed that we really had two patterns
that did exactly the same thing, but with slightly different properties;
consequently I've cleaned up all of the add-and-compare patterns to bring
some consistency.

* config/arm/arm.md (adddi3): Call gen_addsi3_compare_op1.
* (uaddv4): Delete expansion pattern.
(uaddvsi4): New pattern.
(uaddvdi4): Likewise.
(addsi3_compareC): Delete pattern, change callers to use
addsi3_compare_op1.
(addsi3_compare_op1): No-longer anonymous.  Clean up constraints to
reduce the number of alternatives and re-work type attribute handling.
(addsi3_compare_op2): Clean up constraints to reduce the number of
alternatives and re-work type attribute handling.
(compare_addsi2_op0): Likewise.
(compare_addsi2_op1): Likewise.
---
 gcc/config/arm/arm.md | 118 ++
 1 file changed, 62 insertions(+), 56 deletions(-)

diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index eaadfd64128..4ea6f4b226c 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -470,7 +470,7 @@ (define_expand "adddi3"
 	  if (!arm_not_operand (hi_op2, SImode))
 	hi_op2 = force_reg (SImode, hi_op2);
 
-	  emit_insn (gen_addsi3_compareC (lo_dest, lo_op1, lo_op2));
+	  emit_insn (gen_addsi3_compare_op1 (lo_dest, lo_op1, lo_op2));
 	  rtx carry = gen_rtx_LTU (SImode, gen_rtx_REG (CC_Cmode, CC_REGNUM),
    const0_rtx);
 	  if (hi_op2 == const0_rtx)
@@ -501,14 +501,27 @@ (define_expand "addv4"
   DONE;
 })
 
-(define_expand "uaddv4"
-  [(match_operand:SIDI 0 "register_operand")
-   (match_operand:SIDI 1 "register_operand")
-   (match_operand:SIDI 2 "register_operand")
+(define_expand "uaddvsi4"
+  [(match_operand:SI 0 "s_register_operand")
+   (match_operand:SI 1 "s_register_operand")
+   (match_operand:SI 2 "arm_add_operand")
(match_operand 3 "")]
   "TARGET_32BIT"
 {
-  emit_insn (gen_add3_compareC (operands[0], operands[1], operands[2]));
+  emit_insn (gen_addsi3_compare_op1 (operands[0], operands[1], operands[2]));
+  arm_gen_unlikely_cbranch (LTU, CC_Cmode, operands[3]);
+
+  DONE;
+})
+
+(define_expand "uaddvdi4"
+  [(match_operand:DI 0 "s_register_operand")
+   (match_operand:DI 1 "s_register_operand")
+   (match_operand:DI 2 "s_register_operand")
+   (match_operand 3 "")]
+  "TARGET_32BIT"
+{
+  emit_insn (gen_adddi3_compareC (operands[0], operands[1], operands[2]));
   arm_gen_unlikely_cbranch (LTU, CC_Cmode, operands[3]);
 
   DONE;
@@ -639,19 +652,6 @@ (define_insn "adddi3_compareC"
(set_attr "type" "multiple")]
 )
 
-(define_insn "addsi3_compareC"
-   [(set (reg:CC_C CC_REGNUM)
-	 (compare:CC_C (plus:SI (match_operand:SI 1 "register_operand" "r")
-(match_operand:SI 2 "register_operand" "r"))
-		   (match_dup 1)))
-(set (match_operand:SI 0 "register_operand" "=r")
-	 (plus:SI (match_dup 1) (match_dup 2)))]
-  "TARGET_32BIT"
-  "adds%?\\t%0, %1, %2"
-  [(set_attr "conds" "set")
-   (set_attr "type" "alus_sreg")]
-)
-
 (define_insn "addsi3_compare0"
   [(set (reg:CC_NOOV CC_REGNUM)
 	(compare:CC_NOOV
@@ -770,13 +770,13 @@ (define_peephole2
 ;; the operands, and we know that the use of the condition code is
 ;; either GEU or LTU, so we can use the carry flag from the addition
 ;; instead of doing the compare a second time.
-(define_insn "*addsi3_compare_op1"
+(define_insn "addsi3_compare_op1"
   [(set (reg:CC_C CC_REGNUM)
 	(compare:CC_C
-	 (plus:SI (match_operand:SI 1 "s_register_operand" "l,0,l,0,r,r,r")
-		  (match_operand:SI 2 "arm_add_operand" "lPd,Py,lPx,Pw,I,L,r"))
+	 (plus:SI (match_operand:SI 1 "s_register_operand" "l,0,l,0,rk,rk")
+		  (match_operand:SI 2 "arm_add_operand" "lPd,Py,lPx,Pw,rkI,L"))
 	 (match_dup 1)))
-   (set (match_operand:SI 0 "s_register_operand" "=l,l,l,l,r,r,r")
+   (set (match_operand:SI 0 "s_register_operand" "=l,l,l,l,rk,rk")
 	(plus:SI (match_dup 1) (match_dup 2)))]
   "TARGET_32BIT"
   "@
@@ -785,22 +785,23 @@ (define_insn "*addsi3_compare_op1"
subs%?\\t%0, %1, #%n2
subs%?\\t%0, %0, #%n2
adds%?\\t%0, %1, %2
-   subs%?\\t%0, %1, #%n2
-   adds%?\\t%0, %1, %2"
+   subs%?\\t%0, %1, #%n2"
   [(set_attr "conds" "set")
-   (set_attr "arch" "t2,t2,t2,t2,*,*,*")
-   (set_attr "length" "2,2,2,2,4,4,4")
-   (set_attr "type"
-"alus_sreg,alus_imm,alus_sreg,alus_imm,alus_imm,alus_imm,alus_sreg")]
+   (set_attr "arch" "t2,t2,t2,t2,*,*")
+   (set_attr "length" "2,2,2,2,4,4")
+   (set (attr "type")
+	(if_then_else (match_operand 2 "const_int_operand")
+		  (const_string "alu_imm")
+		  (const_string "alu_sreg")))]
 )
 
 (define_insn "*addsi3_compare_op2"
   [(set (reg:CC_C CC_REGNUM)
 	(compare:CC_C
-	 (plus:SI 

[PATCH 18/29] [arm] Cleanup dead code - old support for DImode comparisons

2019-10-18 Thread Richard Earnshaw

Now that all the major patterns for DImode have been converted to
early expansion, we can safely clean up some dead code for the old way
of handling DImode.

* config/arm/arm-modes.def (CC_NCV, CC_CZ): Delete CC modes.
* config/arm/arm.c (arm_select_cc_mode): Remove old selection code
for DImode operands.
(arm_gen_dicompare_reg): Remove unreachable expansion code.
(maybe_get_arm_condition_code): Remove support for CC_CZmode and
CC_NCVmode.
* config/arm/arm.md (arm_cmpdi_insn): Delete.
(arm_cmpdi_unsigned): Delete.
---
 gcc/config/arm/arm-modes.def |   5 --
 gcc/config/arm/arm.c | 147 +--
 gcc/config/arm/arm.md|  45 ---
 3 files changed, 1 insertion(+), 196 deletions(-)

diff --git a/gcc/config/arm/arm-modes.def b/gcc/config/arm/arm-modes.def
index 65cddf68cdb..f0eb8415b93 100644
--- a/gcc/config/arm/arm-modes.def
+++ b/gcc/config/arm/arm-modes.def
@@ -36,12 +36,9 @@ ADJUST_FLOAT_FORMAT (HF, ((arm_fp16_format == ARM_FP16_FORMAT_ALTERNATIVE)
CC_Nmode should be used if only the N (sign) flag is set correctly
CC_NVmode should be used if only the N and V bits are set correctly,
  (used for signed comparisons when the carry is propagated in).
-   CC_CZmode should be used if only the C and Z flags are correct
-   (used for DImode unsigned comparisons).
CC_RSBmode should be used where the comparison is set by an RSB immediate,
  or NEG instruction.  The form of the comparison for (const - reg) will
  be (COMPARE (not (reg)) (~const)).
-   CC_NCVmode should be used if only the N, C, and V flags are correct
CC_Bmode should be used if only the C flag is correct after a subtract
  (eg after an unsigned borrow with carry-in propagation).
(used for DImode signed comparisons).
@@ -49,8 +46,6 @@ ADJUST_FLOAT_FORMAT (HF, ((arm_fp16_format == ARM_FP16_FORMAT_ALTERNATIVE)
 
 CC_MODE (CC_NOOV);
 CC_MODE (CC_Z);
-CC_MODE (CC_CZ);
-CC_MODE (CC_NCV);
 CC_MODE (CC_NV);
 CC_MODE (CC_SWP);
 CC_MODE (CC_RSB);
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 299dce638c2..6da2a368d9f 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -15403,56 +15403,6 @@ arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
 	  || arm_borrow_operation (y, DImode)))
 return CC_Bmode;
 
-  if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
-{
-  switch (op)
-	{
-	case EQ:
-	case NE:
-	  /* A DImode comparison against zero can be implemented by
-	 or'ing the two halves together.  We can also handle
-	 immediates where one word of that value is zero by
-	 subtracting the non-zero word from the corresponding word
-	 in the other register and then ORRing it with the other
-	 word.  */
-	  if (CONST_INT_P (y)
-	  && ((UINTVAL (y) & 0x) == 0
-		  || (UINTVAL (y) >> 32) == 0))
-	return CC_Zmode;
-
-	  /* We can do an equality test in three Thumb instructions.  */
-	  if (!TARGET_32BIT)
-	return CC_Zmode;
-
-	  /* FALLTHROUGH */
-
-	case LTU:
-	case LEU:
-	case GTU:
-	case GEU:
-	  /* DImode unsigned comparisons can be implemented by cmp +
-	 cmpeq without a scratch register.  Not worth doing in
-	 Thumb-2.  */
-	  if (TARGET_32BIT)
-	return CC_CZmode;
-
-	  /* FALLTHROUGH */
-
-	case LT:
-	case LE:
-	case GT:
-	case GE:
-	  /* DImode signed and unsigned comparisons can be implemented
-	 by cmp + sbcs with a scratch register, but that does not
-	 set the Z flag - we must reverse GT/LE/GTU/LEU.  */
-	  gcc_assert (op != EQ && op != NE);
-	  return CC_NCVmode;
-
-	default:
-	  gcc_unreachable ();
-	}
-}
-
   if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
 return GET_MODE (x);
 
@@ -15673,81 +15623,8 @@ arm_gen_dicompare_reg (rtx_code code, rtx x, rtx y, rtx scratch)
   }
 
 default:
-  break;
-}
-
-  /* We might have X as a constant, Y as a register because of the predicates
- used for cmpdi.  If so, force X to a register here.  */
-  if (!REG_P (x))
-x = force_reg (DImode, x);
-
-  mode = SELECT_CC_MODE (code, x, y);
-  cc_reg = gen_rtx_REG (mode, CC_REGNUM);
-
-  if (mode != CC_CZmode)
-{
-  rtx clobber, set;
-
-  /* To compare two non-zero values for equality, XOR them and
-	 then compare against zero.  Not used for ARM mode; there
-	 CC_CZmode is cheaper.  */
-  if (mode == CC_Zmode)
-	{
-	  mode = CC_NOOVmode;
-	  PUT_MODE (cc_reg, mode);
-	  if (y != const0_rtx)
-	{
-	  gcc_assert (CONST_INT_P (y));
-	  rtx xlo, xhi, ylo, yhi;
-	  arm_decompose_di_binop (x, y, , , , );
-	  if (!scratch)
-		scratch = gen_reg_rtx (SImode);
-	  if (ylo == const0_rtx)
-		{
-		  yhi = gen_int_mode (-INTVAL (yhi), SImode);
-		  if (!arm_add_operand (yhi, SImode))
-		yhi = force_reg (SImode, yhi);
-		  emit_insn (gen_addsi3 (scratch, xhi, yhi));
-		  y = xlo;
-		}
-	  else
-		{
-		  gcc_assert (yhi == const0_rtx);
-		  ylo = gen_int_mode (-INTVAL 

[PATCH 22/29] [arm] Allow the summation result of signed add-with-overflow to be discarded.

2019-10-18 Thread Richard Earnshaw

This patch matches the signed add-with-overflow patterns when the
summation itself is dropped.  In this case we can use CMN (or CMP with
some immediates).  There are a small number of constants in thumb2
where this can result in less dense code (as we lack 16-bit CMN with
immediate patterns).  To handle this we use peepholes to try these
alternatives when either a scratch is available (0 <= i <= 7) or the
original register is dead (0 <= i <= 255).  We don't use a scratch in
the pattern as if those conditions are not satisfied then the 32-bit
form is preferable to forcing a reload.

* config/arm/arm.md (addsi3_compareV_reg_nosum): New insn.
(addsi3_compareV_imm_nosum): New insn.  Also add peephole2 patterns
to transform this back into the summation version when that leads
to smaller code.
---
 gcc/config/arm/arm.md | 78 +++
 1 file changed, 78 insertions(+)

diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index b5214c79c35..be002f77382 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -803,6 +803,21 @@ (define_insn "addsi3_compareV_reg"
(set_attr "type" "alus_sreg")]
 )
 
+(define_insn "*addsi3_compareV_reg_nosum"
+  [(set (reg:CC_V CC_REGNUM)
+	(compare:CC_V
+	  (plus:DI
+	(sign_extend:DI (match_operand:SI 0 "register_operand" "%l,r"))
+	(sign_extend:DI (match_operand:SI 1 "register_operand" "l,r")))
+	  (sign_extend:DI (plus:SI (match_dup 0) (match_dup 1)]
+  "TARGET_32BIT"
+  "cmn%?\\t%0, %1"
+  [(set_attr "conds" "set")
+   (set_attr "arch" "t2,*")
+   (set_attr "length" "2,4")
+   (set_attr "type" "alus_sreg")]
+)
+
 (define_insn "addsi3_compareV_imm"
   [(set (reg:CC_V CC_REGNUM)
 	(compare:CC_V
@@ -828,6 +843,69 @@ (define_insn "addsi3_compareV_imm"
(set_attr "type" "alus_imm")]
 )
 
+(define_insn "addsi3_compareV_imm_nosum"
+  [(set (reg:CC_V CC_REGNUM)
+	(compare:CC_V
+	  (plus:DI
+	(sign_extend:DI
+	 (match_operand:SI 0 "register_operand" "l,r,r"))
+	(match_operand 1 "arm_addimm_operand" "Pw,I,L"))
+	  (sign_extend:DI (plus:SI (match_dup 0) (match_dup 1)]
+  "TARGET_32BIT
+   && INTVAL (operands[1]) == ARM_SIGN_EXTEND (INTVAL (operands[1]))"
+  "@
+   cmp%?\\t%0, #%n1
+   cmn%?\\t%0, %1
+   cmp%?\\t%0, #%n1"
+  [(set_attr "conds" "set")
+   (set_attr "arch" "t2,*,*")
+   (set_attr "length" "2,4,4")
+   (set_attr "type" "alus_imm")]
+)
+
+;; We can handle more constants efficently if we can clobber either a scratch
+;; or the other source operand.  We deliberately leave this late as in
+;; high register pressure situations it's not worth forcing any reloads.
+(define_peephole2
+  [(match_scratch:SI 2 "l")
+   (set (reg:CC_V CC_REGNUM)
+	(compare:CC_V
+	  (plus:DI
+	(sign_extend:DI
+	 (match_operand:SI 0 "low_register_operand"))
+	(match_operand 1 "const_int_operand"))
+	  (sign_extend:DI (plus:SI (match_dup 0) (match_dup 1)]
+  "TARGET_THUMB2
+   && satisfies_constraint_Pd (operands[1])"
+  [(parallel[
+(set (reg:CC_V CC_REGNUM)
+	 (compare:CC_V
+	  (plus:DI (sign_extend:DI (match_dup 0))
+		   (sign_extend:DI (match_dup 1)))
+	  (sign_extend:DI (plus:SI (match_dup 0) (match_dup 1)
+(set (match_dup 2) (plus:SI (match_dup 0) (match_dup 1)))])]
+)
+
+(define_peephole2
+  [(set (reg:CC_V CC_REGNUM)
+	(compare:CC_V
+	  (plus:DI
+	(sign_extend:DI
+	 (match_operand:SI 0 "low_register_operand"))
+	(match_operand 1 "const_int_operand"))
+	  (sign_extend:DI (plus:SI (match_dup 0) (match_dup 1)]
+  "TARGET_THUMB2
+   && dead_or_set_p (peep2_next_insn (0), operands[0])
+   && satisfies_constraint_Py (operands[1])"
+  [(parallel[
+(set (reg:CC_V CC_REGNUM)
+	 (compare:CC_V
+	  (plus:DI (sign_extend:DI (match_dup 0))
+		   (sign_extend:DI (match_dup 1)))
+	  (sign_extend:DI (plus:SI (match_dup 0) (match_dup 1)
+(set (match_dup 0) (plus:SI (match_dup 0) (match_dup 1)))])]
+)
+
 (define_insn "addsi3_compare0"
   [(set (reg:CC_NOOV CC_REGNUM)
 	(compare:CC_NOOV


[PATCH 20/29] [arm] Early expansion of uaddvdi4.

2019-10-18 Thread Richard Earnshaw

This code borrows strongly on the uaddvti4 expansion for aarch64 since
the principles are similar.  Firstly, if the one of the low words of
the expansion is 0, we can simply copy the other low word to the
destination and use uaddvsi4 for the upper word.  If that doesn't work
we have to handle three possible cases for the upper work (the lower
word is simply an add-with-carry operation as for adddi3): zero in the
upper word, some other constant and a register (each has a different
canonicalization).  We use CC_ADCmode (a new CC mode variant) to
describe the cases as the introduction of the carry means we can
no-longer use the normal overflow trick of comparing the sum against
one of the operands.

* config/arm/arm-modes.def (CC_ADC): New CC mode.
* config/arm/arm.c (arm_select_cc_mode): Detect selection of
CC_ADCmode.
(maybe_get_arm_condition_code): Handle CC_ADCmode.
* config/arm/arm.md (uaddvdi4): Early expansion of unsigned addition
with overflow.
(addsi3_cin_cout_reg, addsi3_cin_cout_imm, addsi3_cin_cout_0): New
expand patterns.
(addsi3_cin_cout_reg_insn, addsi3_cin_cout_0_insn): New insn patterns
(addsi3_cin_cout_imm_insn): Likewise.
(adddi3_compareC): Delete insn.
* config/arm/predicates.md (arm_carry_operation): Handle CC_ADCmode.
---
 gcc/config/arm/arm-modes.def |   4 +
 gcc/config/arm/arm.c |  16 
 gcc/config/arm/arm.md| 171 +++
 gcc/config/arm/predicates.md |   2 +-
 4 files changed, 173 insertions(+), 20 deletions(-)

diff --git a/gcc/config/arm/arm-modes.def b/gcc/config/arm/arm-modes.def
index f0eb8415b93..a6b520df32d 100644
--- a/gcc/config/arm/arm-modes.def
+++ b/gcc/config/arm/arm-modes.def
@@ -42,6 +42,9 @@ ADJUST_FLOAT_FORMAT (HF, ((arm_fp16_format == ARM_FP16_FORMAT_ALTERNATIVE)
CC_Bmode should be used if only the C flag is correct after a subtract
  (eg after an unsigned borrow with carry-in propagation).
(used for DImode signed comparisons).
+   CC_ADCmode is used when the carry is formed from the output of ADC for an
+ addtion.  In this case we cannot use the trick of comparing the sum
+ against one of the other operands.
CCmode should be used otherwise.  */
 
 CC_MODE (CC_NOOV);
@@ -65,6 +68,7 @@ CC_MODE (CC_C);
 CC_MODE (CC_B);
 CC_MODE (CC_N);
 CC_MODE (CC_V);
+CC_MODE (CC_ADC);
 
 /* Vector modes.  */
 VECTOR_MODES (INT, 4);/*V4QI V2HI */
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 6da2a368d9f..eebbdc3d9c2 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -15387,6 +15387,14 @@ arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
   && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
 return CC_Cmode;
 
+  if (GET_MODE (x) == DImode
+  && GET_CODE (x) == PLUS
+  && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
+  && CONST_INT_P (y)
+  && UINTVAL (y) == 0x8
+  && (op == GEU || op == LTU))
+return CC_ADCmode;
+
   if (GET_MODE (x) == DImode
   && (op == GE || op == LT)
   && GET_CODE (x) == SIGN_EXTEND
@@ -23952,6 +23960,14 @@ maybe_get_arm_condition_code (rtx comparison)
 	default: return ARM_NV;
 	}
 
+case E_CC_ADCmode:
+  switch (comp_code)
+	{
+	case GEU: return ARM_CS;
+	case LTU: return ARM_CC;
+	default: return ARM_NV;
+	}
+
 case E_CCmode:
 case E_CC_RSBmode:
   switch (comp_code)
diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index 4ea6f4b226c..9f0e43571fd 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -517,16 +517,165 @@ (define_expand "uaddvsi4"
 (define_expand "uaddvdi4"
   [(match_operand:DI 0 "s_register_operand")
(match_operand:DI 1 "s_register_operand")
-   (match_operand:DI 2 "s_register_operand")
+   (match_operand:DI 2 "reg_or_int_operand")
(match_operand 3 "")]
   "TARGET_32BIT"
 {
-  emit_insn (gen_adddi3_compareC (operands[0], operands[1], operands[2]));
-  arm_gen_unlikely_cbranch (LTU, CC_Cmode, operands[3]);
+  rtx lo_result, hi_result;
+  rtx lo_op1, hi_op1, lo_op2, hi_op2;
+  arm_decompose_di_binop (operands[1], operands[2], _op1, _op1,
+			  _op2, _op2);
+  lo_result = gen_lowpart (SImode, operands[0]);
+  hi_result = gen_highpart (SImode, operands[0]);
+
+  if (lo_op2 == const0_rtx)
+{
+  emit_move_insn (lo_result, lo_op1);
+  if (!arm_add_operand (hi_op2, SImode))
+	hi_op2 = force_reg (SImode, hi_op2);
+
+  gen_uaddvsi4 (hi_result, hi_op1, hi_op2, operands[3]);
+}
+  else
+{
+  if (!arm_add_operand (lo_op2, SImode))
+	lo_op2 = force_reg (SImode, lo_op2);
+  if (!arm_not_operand (hi_op2, SImode))
+	hi_op2 = force_reg (SImode, hi_op2);
+
+  emit_insn (gen_addsi3_compare_op1 (lo_result, lo_op1, lo_op2));
+
+  if (hi_op2 == const0_rtx)
+emit_insn (gen_addsi3_cin_cout_0 (hi_result, hi_op1));
+  else if (CONST_INT_P (hi_op2))
+emit_insn (gen_addsi3_cin_cout_imm 

[PATCH 28/29] [arm] Improvements to negvsi4 and negvdi4.

2019-10-18 Thread Richard Earnshaw

The generic expansion code for negv does not try the subv patterns,
but instead emits a sub and a compare separately.  Fortunately, the
patterns can make use of the new subv operations, so just call those.
We can also rewrite this using an iterator to simplify things further.
Finally, we can now make negvdi4 work on Thumb2 as well as Arm.

* config/arm/arm.md (negv3): New expansion rule.
(negvsi3, negvdi3): Delete.
(negdi2_compare): Delete.
---
 gcc/config/arm/arm.md | 41 +
 1 file changed, 5 insertions(+), 36 deletions(-)

diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index 5a8175ff8b0..7ef0c16580d 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -4581,48 +4581,17 @@ (define_insn "udivsi3"
 
 ;; Unary arithmetic insns
 
-(define_expand "negvsi3"
-  [(match_operand:SI 0 "register_operand")
-   (match_operand:SI 1 "register_operand")
+(define_expand "negv3"
+  [(match_operand:SIDI 0 "s_register_operand")
+   (match_operand:SIDI 1 "s_register_operand")
(match_operand 2 "")]
   "TARGET_32BIT"
 {
-  emit_insn (gen_subsi3_compare (operands[0], const0_rtx, operands[1]));
-  arm_gen_unlikely_cbranch (NE, CC_Vmode, operands[2]);
-
-  DONE;
-})
-
-(define_expand "negvdi3"
-  [(match_operand:DI 0 "s_register_operand")
-   (match_operand:DI 1 "s_register_operand")
-   (match_operand 2 "")]
-  "TARGET_ARM"
-{
-  emit_insn (gen_negdi2_compare (operands[0], operands[1]));
-  arm_gen_unlikely_cbranch (NE, CC_Vmode, operands[2]);
-
+  emit_insn (gen_subv4 (operands[0], const0_rtx, operands[1],
+			  operands[2]));
   DONE;
 })
 
-
-(define_insn "negdi2_compare"
-  [(set (reg:CC CC_REGNUM)
-	(compare:CC
-	  (const_int 0)
-	  (match_operand:DI 1 "register_operand" "r,r")))
-   (set (match_operand:DI 0 "register_operand" "=,")
-	(minus:DI (const_int 0) (match_dup 1)))]
-  "TARGET_ARM"
-  "@
-   rsbs\\t%Q0, %Q1, #0;rscs\\t%R0, %R1, #0
-   rsbs\\t%Q0, %Q1, #0;sbcs\\t%R0, %R1, %R1, lsl #1"
-  [(set_attr "conds" "set")
-   (set_attr "arch" "a,t2")
-   (set_attr "length" "8")
-   (set_attr "type" "multiple")]
-)
-
 (define_expand "negsi2"
   [(set (match_operand:SI 0 "s_register_operand")
 	(neg:SI (match_operand:SI 1 "s_register_operand")))]


[PATCH 04/29] [arm] Rewrite addsi3_carryin_shift_ in canonical form

2019-10-18 Thread Richard Earnshaw

The add-with-carry operation which involves a shift doesn't match at present
because it isn't matching the canonical form generated by combine.  Fixing
this is simply a matter of re-ordering the operands.

* config/arm/arm.md (addsi3_carryin_shift_): Reorder operands
to match canonical form.
---
 gcc/config/arm/arm.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index 4a7a64e6613..9754a761faf 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -913,8 +913,8 @@ (define_insn "*addsi3_carryin_shift_"
 		  (match_operator:SI 2 "shift_operator"
 		[(match_operand:SI 3 "s_register_operand" "r")
 		 (match_operand:SI 4 "reg_or_int_operand" "rM")])
-		  (match_operand:SI 1 "s_register_operand" "r"))
-		 (LTUGEU:SI (reg: CC_REGNUM) (const_int 0]
+		  (LTUGEU:SI (reg: CC_REGNUM) (const_int 0)))
+		 (match_operand:SI 1 "s_register_operand" "r")))]
   "TARGET_32BIT"
   "adc%?\\t%0, %1, %3%S2"
   [(set_attr "conds" "use")


[PATCH 15/29] [arm] Improve handling of DImode comparisions against constants.

2019-10-18 Thread Richard Earnshaw

In almost all cases it is better to handle inequality handling against constants
by transforming comparisons of the form (reg  const) into
(reg  (const+1)).  However, there are many cases that we could
handle but currently failed to do so because we forced the constant into a
register too early in the pattern expansion.  To permit this to be done we need
to defer forcing the constant into a register until after we've had the chance
to do the transform - in some cases that may even mean that we no-longer need
to force the constant into a register at all.  For example, on Arm, the case:

_Bool f8 (unsigned long long a) { return a > 0x; }

previously compiled to

mov r3, #0
cmp r1, r3
mvn r2, #0
cmpeq   r0, r2
movhi   r0, #1
movls   r0, #0
bx  lr

But now compiles to

cmp r1, #1
cmpeq   r0, #0
movcs   r0, #1
movcc   r0, #0
bx  lr

Which although not yet completely optimal, is certainly better than
previously.

* config/arm/arm.md (cbranchdi4): Accept reg_or_int_operand for
operand 2.
(cstoredi4): Similarly, but for operand 3.
* config/arm/arm.c (arm_canoncialize_comparison): Allow canonicalization
of unsigned compares with a constant on Arm.  Prefer using const+1 and
adjusting the comparison over swapping the operands whenever the
original constant was not valid.
(arm_gen_dicompare_reg): If Y is not a valid operand, force it to a
register here.
(arm_validize_comparison): Do not force invalid DImode operands to
registers here.
---
 gcc/config/arm/arm.c  | 37 +++--
 gcc/config/arm/arm.md |  4 ++--
 2 files changed, 25 insertions(+), 16 deletions(-)

diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 64367b42332..ddfe4335169 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -5372,15 +5372,16 @@ arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
 
   maxval = (HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (mode) - 1)) - 1;
 
-  /* For DImode, we have GE/LT/GEU/LTU comparisons.  In ARM mode
- we can also use cmp/cmpeq for GTU/LEU.  GT/LE must be either
- reversed or (for constant OP1) adjusted to GE/LT.  Similarly
- for GTU/LEU in Thumb mode.  */
+  /* For DImode, we have GE/LT/GEU/LTU comparisons (with cmp/sbc).  In
+ ARM mode we can also use cmp/cmpeq for GTU/LEU.  GT/LE must be
+ either reversed or (for constant OP1) adjusted to GE/LT.
+ Similarly for GTU/LEU in Thumb mode.  */
   if (mode == DImode)
 {
 
   if (*code == GT || *code == LE
-	  || (!TARGET_ARM && (*code == GTU || *code == LEU)))
+	  || ((!TARGET_ARM || CONST_INT_P (*op1))
+	  && (*code == GTU || *code == LEU)))
 	{
 	  /* Missing comparison.  First try to use an available
 	 comparison.  */
@@ -5392,23 +5393,27 @@ arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
 		case GT:
 		case LE:
 		  if (i != maxval
-		  && arm_const_double_by_immediates (GEN_INT (i + 1)))
+		  && (!arm_const_double_by_immediates (*op1)
+			  || arm_const_double_by_immediates (GEN_INT (i + 1
 		{
 		  *op1 = GEN_INT (i + 1);
 		  *code = *code == GT ? GE : LT;
 		  return;
 		}
 		  break;
+
 		case GTU:
 		case LEU:
 		  if (i != ~((unsigned HOST_WIDE_INT) 0)
-		  && arm_const_double_by_immediates (GEN_INT (i + 1)))
+		  && (!arm_const_double_by_immediates (*op1)
+			  || arm_const_double_by_immediates (GEN_INT (i + 1
 		{
 		  *op1 = GEN_INT (i + 1);
 		  *code = *code == GTU ? GEU : LTU;
 		  return;
 		}
 		  break;
+
 		default:
 		  gcc_unreachable ();
 		}
@@ -15436,7 +15441,7 @@ arm_gen_dicompare_reg (rtx_code code, rtx x, rtx y, rtx scratch)
 		scratch = gen_reg_rtx (SImode);
 	  if (ylo == const0_rtx)
 		{
-		  yhi = GEN_INT (-INTVAL(yhi));
+		  yhi = gen_int_mode (-INTVAL (yhi), SImode);
 		  if (!arm_add_operand (yhi, SImode))
 		yhi = force_reg (SImode, yhi);
 		  emit_insn (gen_addsi3 (scratch, xhi, yhi));
@@ -15445,7 +15450,7 @@ arm_gen_dicompare_reg (rtx_code code, rtx x, rtx y, rtx scratch)
 	  else
 		{
 		  gcc_assert (yhi == const0_rtx);
-		  ylo = GEN_INT (-INTVAL(ylo));
+		  ylo = gen_int_mode (-INTVAL (ylo), SImode);
 		  if (!arm_add_operand (ylo, SImode))
 		ylo = force_reg (SImode, ylo);
 		  emit_insn (gen_addsi3 (scratch, xlo, ylo));
@@ -15458,6 +15463,8 @@ arm_gen_dicompare_reg (rtx_code code, rtx x, rtx y, rtx scratch)
 	x = gen_rtx_IOR (SImode, gen_lowpart (SImode, x),
 			 gen_highpart (SImode, x));
 	}
+  else if (!cmpdi_operand (y, mode))
+	y = force_reg (DImode, y);
 
   /* A scratch register is required.  */
   if (reload_completed)
@@ -15470,7 +15477,12 @@ arm_gen_dicompare_reg (rtx_code code, rtx x, rtx y, rtx scratch)
   emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
 }
   else
-

[PATCH 26/29] [arm] Improve constant handling for subvsi4.

2019-10-18 Thread Richard Earnshaw

This patch addresses constant handling in subvsi4.  Either operand may
be a constant.  If the second input (operand[2]) is a constant, then
we can canonicalize this into an addition form, providing we take care
of the INT_MIN case.  In that case the negation has to handle the fact
that -INT_MIN is still INT_MIN and we need to ensure that a subtract
operation is performed rather than an addition.  The remaining cases
are largely duals of the usubvsi4 expansion.

This patch also fixes a technical correctness bug in the old
expansion, where we did not realy describe the test for overflow in
the RTL.  We seem to have got away with that, however...

* config/arm/arm.md (subv4): Delete.
(subvdi4): New expander pattern.
(subvsi4): Likewise.  Handle some immediate values.
(subvsi3_intmin): New insn pattern.
(subvsi3): Likewise.
(subvsi3_imm1): Likewise.
* config/arm/arm.c (select_cc_mode): Also allow minus for CC_V
idioms.
---
 gcc/config/arm/arm.c  |  5 ++-
 gcc/config/arm/arm.md | 96 ---
 2 files changed, 94 insertions(+), 7 deletions(-)

diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index c9abbb0f91d..d5ffd2133a9 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -15413,11 +15413,12 @@ arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
 
   if (GET_MODE (x) == DImode
   && (op == EQ || op == NE)
-  && GET_CODE (x) == PLUS
+  && (GET_CODE (x) == PLUS
+	  || GET_CODE (x) == MINUS)
   && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
 	  || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
   && GET_CODE (y) == SIGN_EXTEND
-  && GET_CODE (XEXP (y, 0)) == PLUS)
+  && GET_CODE (XEXP (y, 0)) == GET_CODE (x))
 return CC_Vmode;
 
   if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index 92f1823cdfa..05b735cfccd 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -957,6 +957,22 @@ (define_insn "*addsi3_compareV_reg_nosum"
(set_attr "type" "alus_sreg")]
 )
 
+(define_insn "subvsi3_intmin"
+  [(set (reg:CC_V CC_REGNUM)
+	(compare:CC_V
+	  (plus:DI
+	(sign_extend:DI
+	 (match_operand:SI 1 "register_operand" "r"))
+	(const_int 2147483648))
+	  (sign_extend:DI (plus:SI (match_dup 1) (const_int -2147483648)
+   (set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (match_dup 1) (const_int -2147483648)))]
+  "TARGET_32BIT"
+  "subs%?\\t%0, %1, #-2147483648"
+  [(set_attr "conds" "set")
+   (set_attr "type" "alus_imm")]
+)
+
 (define_insn "addsi3_compareV_imm"
   [(set (reg:CC_V CC_REGNUM)
 	(compare:CC_V
@@ -1339,14 +1355,52 @@ (define_insn "*addsi3_carryin_clobercc"
 (set_attr "type" "adcs_reg")]
 )
 
-(define_expand "subv4"
-  [(match_operand:SIDI 0 "register_operand")
-   (match_operand:SIDI 1 "register_operand")
-   (match_operand:SIDI 2 "register_operand")
+(define_expand "subvsi4"
+  [(match_operand:SI 0 "s_register_operand")
+   (match_operand:SI 1 "arm_rhs_operand")
+   (match_operand:SI 2 "arm_add_operand")
+   (match_operand 3 "")]
+  "TARGET_32BIT"
+{
+  if (CONST_INT_P (operands[1]) && CONST_INT_P (operands[2]))
+{
+  /* If both operands are constants we can decide the result statically.  */
+  wi::overflow_type overflow;
+  wide_int val = wi::sub (rtx_mode_t (operands[1], SImode),
+			  rtx_mode_t (operands[2], SImode),
+			  SIGNED, );
+  emit_move_insn (operands[0], GEN_INT (val.to_shwi ()));
+  if (overflow != wi::OVF_NONE)
+	emit_jump_insn (gen_jump (operands[3]));
+  DONE;
+}
+  else if (CONST_INT_P (operands[2]))
+{
+  operands[2] = GEN_INT (-INTVAL (operands[2]));
+  /* Special case for INT_MIN.  */
+  if (INTVAL (operands[2]) == 0x8000)
+	emit_insn (gen_subvsi3_intmin (operands[0], operands[1]));
+  else
+	emit_insn (gen_addsi3_compareV_imm (operands[0], operands[1],
+	  operands[2]));
+}
+  else if (CONST_INT_P (operands[1]))
+emit_insn (gen_subvsi3_imm1 (operands[0], operands[1], operands[2]));
+  else
+emit_insn (gen_subvsi3 (operands[0], operands[1], operands[2]));
+
+  arm_gen_unlikely_cbranch (NE, CC_Vmode, operands[3]);
+  DONE;
+})
+
+(define_expand "subvdi4"
+  [(match_operand:DI 0 "s_register_operand")
+   (match_operand:DI 1 "s_register_operand")
+   (match_operand:DI 2 "s_register_operand")
(match_operand 3 "")]
   "TARGET_32BIT"
 {
-  emit_insn (gen_sub3_compare1 (operands[0], operands[1], operands[2]));
+  emit_insn (gen_subdi3_compare1 (operands[0], operands[1], operands[2]));
   arm_gen_unlikely_cbranch (NE, CC_Vmode, operands[3]);
 
   DONE;
@@ -1496,6 +1550,38 @@ (define_insn "subsi3_compare1"
(set_attr "type" "alus_sreg")]
 )
 
+(define_insn "subvsi3"
+  [(set (reg:CC_V CC_REGNUM)
+	(compare:CC_V
+	 (minus:DI
+	  (sign_extend:DI (match_operand:SI 1 "s_register_operand" "l,r"))
+	  (sign_extend:DI (match_operand:SI 2 "s_register_operand" "l,r")))
+	 

[PATCH 05/29] [arm] fix constraints on addsi3_carryin_alt2

2019-10-18 Thread Richard Earnshaw

addsi3_carryin_alt2 has a more strict constraint than the predicate
when adding a constant.  This leads to sub-optimal code in some
circumstances.

* config/arm/arm.md (addsi3_carryin_alt2): Use arm_not_operand for
operand 2.
---
 gcc/config/arm/arm.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index 9754a761faf..fbe154a9873 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -893,7 +893,7 @@ (define_insn "*addsi3_carryin_alt2_"
   [(set (match_operand:SI 0 "s_register_operand" "=l,r,r")
 (plus:SI (plus:SI (LTUGEU:SI (reg: CC_REGNUM) (const_int 0))
   (match_operand:SI 1 "s_register_operand" "%l,r,r"))
- (match_operand:SI 2 "arm_rhs_operand" "l,rI,K")))]
+ (match_operand:SI 2 "arm_not_operand" "l,rI,K")))]
   "TARGET_32BIT"
   "@
adc%?\\t%0, %1, %2


[PATCH 17/29] [arm] Handle some constant comparisons using rsbs+rscs

2019-10-18 Thread Richard Earnshaw

In a small number of cases it is preferable to handle comparisons with
constants using the sequence

RSBStmp, Xlo, constlo
RSCStmp, Xhi, consthi

which allows us to handle a small number of LE/GT/LEU/GEU cases when
changing the code to use LT/GE/LTU/GEU would make the constant more
expensive.  Sadly, we cannot do this on Thumb, since we need RSC, so we
now always use the incremented constant in that case since normally that
still works out cheaper than forcing the entire constant into a register.

Further investigation has also shown that the canonicalization of a
reverse subtract and compare is valid for signed as well as unsigned value,
so we relax the restriction on selecting CC_RSBmode to allow all types
of compare.

* config/arm/arm.c (arm_const_double_prefer_rsbs_rsc): New function.
(arm_canonicalize_comparison): For GT/LE/GTU/GEU, use the constant
unchanged only if that will be cheaper.
(arm_select_cc_mode): Recognize a swapped comparison that will
be regenerated using RSBS or RSCS.  Relax restriction on selecting
CC_RSBmode.
(arm_gen_dicompare_reg): Handle LE/GT/LEU/GEU comparisons against
a constant.
(arm_gen_compare_reg): Handle compare (CONST, X) when the mode
is CC_RSBmode.
(maybe_get_arm_condition_code): CC_RSBmode now returns the same codes
as CCmode.
* config/arm/arm.md (rsb_imm_compare_scratch): New pattern.
(rscsi3_out_scratch): New pattern.
---
 gcc/config/arm/arm.c  | 153 +-
 gcc/config/arm/arm.md |  27 
 2 files changed, 134 insertions(+), 46 deletions(-)

diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 99c8bd79d30..299dce638c2 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -5355,6 +5355,21 @@ arm_gen_constant (enum rtx_code code, machine_mode mode, rtx cond,
   return insns;
 }
 
+/* Return TRUE if op is a constant where both the low and top words are
+   suitable for RSB/RSC instructions.  This is never true for Thumb, since
+   we do not have RSC in that case.  */
+static bool
+arm_const_double_prefer_rsbs_rsc (rtx op)
+{
+  /* Thumb lacks RSC, so we never prefer that sequence.  */
+  if (TARGET_THUMB || !CONST_INT_P (op))
+return false;
+  HOST_WIDE_INT hi, lo;
+  lo = UINTVAL (op) & 0xULL;
+  hi = UINTVAL (op) >> 32;
+  return const_ok_for_arm (lo) && const_ok_for_arm (hi);
+}
+
 /* Canonicalize a comparison so that we are more likely to recognize it.
This can be done for a few constant compares, where we can make the
immediate value easier to load.  */
@@ -5380,8 +5395,7 @@ arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
 {
 
   if (*code == GT || *code == LE
-	  || ((!TARGET_ARM || CONST_INT_P (*op1))
-	  && (*code == GTU || *code == LEU)))
+	  || *code == GTU || *code == LEU)
 	{
 	  /* Missing comparison.  First try to use an available
 	 comparison.  */
@@ -5392,10 +5406,13 @@ arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
 		{
 		case GT:
 		case LE:
-		  if (i != maxval
-		  && (!arm_const_double_by_immediates (*op1)
-			  || arm_const_double_by_immediates (GEN_INT (i + 1
+		  if (i != maxval)
 		{
+		  /* Try to convert to GE/LT, unless that would be more
+			 expensive.  */
+		  if (!arm_const_double_by_immediates (GEN_INT (i + 1))
+			  && arm_const_double_prefer_rsbs_rsc (*op1))
+			return;
 		  *op1 = GEN_INT (i + 1);
 		  *code = *code == GT ? GE : LT;
 		  return;
@@ -5404,10 +5421,13 @@ arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
 
 		case GTU:
 		case LEU:
-		  if (i != ~((unsigned HOST_WIDE_INT) 0)
-		  && (!arm_const_double_by_immediates (*op1)
-			  || arm_const_double_by_immediates (GEN_INT (i + 1
+		  if (i != ~((unsigned HOST_WIDE_INT) 0))
 		{
+		  /* Try to convert to GEU/LTU, unless that would
+			 be more expensive.  */
+		  if (!arm_const_double_by_immediates (GEN_INT (i + 1))
+			  && arm_const_double_prefer_rsbs_rsc (*op1))
+			return;
 		  *op1 = GEN_INT (i + 1);
 		  *code = *code == GTU ? GEU : LTU;
 		  return;
@@ -5419,7 +5439,6 @@ arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
 		}
 	}
 
-	  /* If that did not work, reverse the condition.  */
 	  if (!op0_preserve_value)
 	{
 	  std::swap (*op0, *op1);
@@ -15251,6 +15270,28 @@ arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
 	  || GET_CODE (x) == ROTATERT))
 return CC_SWPmode;
 
+  /* A widened compare of the sum of a value plus a carry against a
+ constant.  This is a representation of RSC.  We want to swap the
+ result of the comparison at output.  Not valid if the Z bit is
+ needed.  */
+  if (GET_MODE (x) == DImode
+  && GET_CODE (x) == PLUS
+  && arm_borrow_operation (XEXP (x, 1), DImode)
+  && CONST_INT_P (y)
+  && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
+	   && (op == LE || op 

[PATCH 08/29] [arm] Introduce arm_carry_operation

2019-10-18 Thread Richard Earnshaw

An earlier patch introduced arm_borrow_operation, this one introduces
the carry variant, which is the same except that the logic of the
carry-setting is inverted.  Having done this we can now match more
cases where the carry flag is propagated from comparisons with
different modes without having to define even more patterns.  A few
small changes to the expand patterns are required to directly create
the carry representation.

The iterators LTUGEU is no-longer needed and removed, as is the code
attribute 'cnb'.

Finally, we fix a long-standing bug which was probably inert before:
in Thumb2 a shift with ADC can only be by an immediate amount;
register-specified shifts are not permitted.

* config/arm/predicates.md (arm_carry_operation): New special
predicate.
* config/arm/iterators.md (LTUGEU): Delete iterator.
(cnb): Delete code attribute.
(optab): Delete ltu and geu elements.
* config/arm/arm.md (addsi3_carryin): Renamed from
addsi3_carryin_.  Remove iterator and use arm_carry_operand.
(add0si3_carryin): Similarly, but from add0si3_carryin_.
(addsi3_carryin_alt2): Similarly, but from addsi3_carryin_alt2_.
(addsi3_carryin_clobercc): Similarly.
(addsi3_carryin_shift): Similarly.  Do not allow register shifts in
Thumb2 state.
---
 gcc/config/arm/arm.md| 36 
 gcc/config/arm/iterators.md  | 11 +--
 gcc/config/arm/predicates.md | 21 +
 3 files changed, 42 insertions(+), 26 deletions(-)

diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index f597a277c17..f53dbc27207 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -471,10 +471,12 @@ (define_expand "adddi3"
 	hi_op2 = force_reg (SImode, hi_op2);
 
 	  emit_insn (gen_addsi3_compareC (lo_dest, lo_op1, lo_op2));
+	  rtx carry = gen_rtx_LTU (SImode, gen_rtx_REG (CC_Cmode, CC_REGNUM),
+   const0_rtx);
 	  if (hi_op2 == const0_rtx)
-	emit_insn (gen_add0si3_carryin_ltu (hi_dest, hi_op1));
+	emit_insn (gen_add0si3_carryin (hi_dest, hi_op1, carry));
 	  else
-	emit_insn (gen_addsi3_carryin_ltu (hi_dest, hi_op1, hi_op2));
+	emit_insn (gen_addsi3_carryin (hi_dest, hi_op1, hi_op2, carry));
 	}
 
   if (lo_result != lo_dest)
@@ -858,11 +860,11 @@ (define_insn "*compare_addsi2_op1"
(set_attr "type" "alus_imm,alus_sreg,alus_imm,alus_imm,alus_sreg")]
  )
 
-(define_insn "addsi3_carryin_"
+(define_insn "addsi3_carryin"
   [(set (match_operand:SI 0 "s_register_operand" "=l,r,r")
 (plus:SI (plus:SI (match_operand:SI 1 "s_register_operand" "%l,r,r")
   (match_operand:SI 2 "arm_not_operand" "0,rI,K"))
- (LTUGEU:SI (reg: CC_REGNUM) (const_int 0]
+ (match_operand:SI 3 "arm_carry_operation" "")))]
   "TARGET_32BIT"
   "@
adc%?\\t%0, %1, %2
@@ -877,9 +879,9 @@ (define_insn "addsi3_carryin_"
 )
 
 ;; Canonicalization of the above when the immediate is zero.
-(define_insn "add0si3_carryin_"
+(define_insn "add0si3_carryin"
   [(set (match_operand:SI 0 "s_register_operand" "=r")
-	(plus:SI (LTUGEU:SI (reg: CC_REGNUM) (const_int 0))
+	(plus:SI (match_operand:SI 2 "arm_carry_operation" "")
 		 (match_operand:SI 1 "arm_not_operand" "r")))]
   "TARGET_32BIT"
   "adc%?\\t%0, %1, #0"
@@ -889,9 +891,9 @@ (define_insn "add0si3_carryin_"
(set_attr "type" "adc_imm")]
 )
 
-(define_insn "*addsi3_carryin_alt2_"
+(define_insn "*addsi3_carryin_alt2"
   [(set (match_operand:SI 0 "s_register_operand" "=l,r,r")
-(plus:SI (plus:SI (LTUGEU:SI (reg: CC_REGNUM) (const_int 0))
+(plus:SI (plus:SI (match_operand:SI 3 "arm_carry_operation" "")
   (match_operand:SI 1 "s_register_operand" "%l,r,r"))
  (match_operand:SI 2 "arm_not_operand" "l,rI,K")))]
   "TARGET_32BIT"
@@ -907,28 +909,30 @@ (define_insn "*addsi3_carryin_alt2_"
(set_attr "type" "adc_reg,adc_reg,adc_imm")]
 )
 
-(define_insn "*addsi3_carryin_shift_"
-  [(set (match_operand:SI 0 "s_register_operand" "=r")
+(define_insn "*addsi3_carryin_shift"
+  [(set (match_operand:SI 0 "s_register_operand" "=r,r")
 	(plus:SI (plus:SI
 		  (match_operator:SI 2 "shift_operator"
-		[(match_operand:SI 3 "s_register_operand" "r")
-		 (match_operand:SI 4 "reg_or_int_operand" "rM")])
-		  (LTUGEU:SI (reg: CC_REGNUM) (const_int 0)))
-		 (match_operand:SI 1 "s_register_operand" "r")))]
+		[(match_operand:SI 3 "s_register_operand" "r,r")
+		 (match_operand:SI 4 "shift_amount_operand" "M,r")])
+		  (match_operand:SI 5 "arm_carry_operation" ""))
+		 (match_operand:SI 1 "s_register_operand" "r,r")))]
   "TARGET_32BIT"
   "adc%?\\t%0, %1, %3%S2"
   [(set_attr "conds" "use")
+   (set_attr "arch" "32,a")
+   (set_attr "shift" "3")
(set_attr "predicable" "yes")
(set (attr "type") (if_then_else (match_operand 4 "const_int_operand" "")
 		  (const_string "alu_shift_imm")
 		  (const_string 

[PATCH 09/29] [arm] Correctly cost addition with a carry-in

2019-10-18 Thread Richard Earnshaw

The cost routine for Arm and Thumb2 was not recognising the idioms that
describe the addition with carry, this results in the instructions
appearing more expensive than they really are, which occasionally can lead
to poor choices by combine.  Recognising all the possible variants is
a little trickier than normal because the expressions can become complex
enough that this is no single canonical from.

* config/arm/arm.c (strip_carry_operation): New function.
(arm_rtx_costs_internal, case PLUS): Handle addtion with carry-in
for SImode.
---
 gcc/config/arm/arm.c | 76 +---
 1 file changed, 65 insertions(+), 11 deletions(-)

diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 9a779e24cac..dfbd5cde5eb 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -9504,6 +9504,20 @@ thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
 }
 }
 
+/* Helper function for arm_rtx_costs.  If one operand of the OP, a
+   PLUS, adds the carry flag, then return the other operand.  If
+   neither is a carry, return OP unchanged.  */
+static rtx
+strip_carry_operation (rtx op)
+{
+  gcc_assert (GET_CODE (op) == PLUS);
+  if (arm_carry_operation (XEXP (op, 0), GET_MODE (op)))
+return XEXP (op, 1);
+  else if (arm_carry_operation (XEXP (op, 1), GET_MODE (op)))
+return XEXP (op, 0);
+  return op;
+}
+
 /* Helper function for arm_rtx_costs.  If the operand is a valid shift
operand, then return the operand that is being shifted.  If the shift
is not by a constant, then set SHIFT_REG to point to the operand.
@@ -10253,8 +10267,41 @@ arm_rtx_costs_internal (rtx x, enum rtx_code code, enum rtx_code outer_code,
 	  return true;
 	}
 
+	  rtx op0 = XEXP (x, 0);
+	  rtx op1 = XEXP (x, 1);
+
+	  /* Handle a side effect of adding in the carry to an addition.  */
+	  if (GET_CODE (op0) == PLUS
+	  && arm_carry_operation (op1, mode))
+	{
+	  op1 = XEXP (op0, 1);
+	  op0 = XEXP (op0, 0);
+	}
+	  else if (GET_CODE (op1) == PLUS
+		   && arm_carry_operation (op0, mode))
+	{
+	  op0 = XEXP (op1, 0);
+	  op1 = XEXP (op1, 1);
+	}
+	  else if (GET_CODE (op0) == PLUS)
+	{
+	  op0 = strip_carry_operation (op0);
+	  if (swap_commutative_operands_p (op0, op1))
+		std::swap (op0, op1);
+	}
+
+	  if (arm_carry_operation (op0, mode))
+	{
+	  /* Adding the carry to a register is a canonicalization of
+		 adding 0 to the register plus the carry.  */
+	  if (speed_p)
+		*cost += extra_cost->alu.arith;
+	  *cost += rtx_cost (op1, mode, PLUS, 1, speed_p);
+	  return true;
+	}
+
 	  shift_reg = NULL;
-	  shift_op = shifter_op_p (XEXP (x, 0), _reg);
+	  shift_op = shifter_op_p (op0, _reg);
 	  if (shift_op != NULL)
 	{
 	  if (shift_reg)
@@ -10267,12 +10314,13 @@ arm_rtx_costs_internal (rtx x, enum rtx_code code, enum rtx_code outer_code,
 		*cost += extra_cost->alu.arith_shift;
 
 	  *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
-			+ rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
+			+ rtx_cost (op1, mode, PLUS, 1, speed_p));
 	  return true;
 	}
-	  if (GET_CODE (XEXP (x, 0)) == MULT)
+
+	  if (GET_CODE (op0) == MULT)
 	{
-	  rtx mul_op = XEXP (x, 0);
+	  rtx mul_op = op0;
 
 	  if (TARGET_DSP_MULTIPLY
 		  && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
@@ -10296,7 +10344,7 @@ arm_rtx_costs_internal (rtx x, enum rtx_code code, enum rtx_code outer_code,
   SIGN_EXTEND, 0, speed_p)
 			+ rtx_cost (XEXP (XEXP (mul_op, 1), 0), mode,
 	SIGN_EXTEND, 0, speed_p)
-			+ rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
+			+ rtx_cost (op1, mode, PLUS, 1, speed_p));
 		  return true;
 		}
 
@@ -10304,24 +10352,30 @@ arm_rtx_costs_internal (rtx x, enum rtx_code code, enum rtx_code outer_code,
 		*cost += extra_cost->mult[0].add;
 	  *cost += (rtx_cost (XEXP (mul_op, 0), mode, MULT, 0, speed_p)
 			+ rtx_cost (XEXP (mul_op, 1), mode, MULT, 1, speed_p)
-			+ rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
+			+ rtx_cost (op1, mode, PLUS, 1, speed_p));
 	  return true;
 	}
-	  if (CONST_INT_P (XEXP (x, 1)))
+
+	  if (CONST_INT_P (op1))
 	{
 	  int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
-	INTVAL (XEXP (x, 1)), NULL_RTX,
+	INTVAL (op1), NULL_RTX,
 	NULL_RTX, 1, 0);
 	  *cost = COSTS_N_INSNS (insns);
 	  if (speed_p)
 		*cost += insns * extra_cost->alu.arith;
-	  *cost += rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
+	  *cost += rtx_cost (op0, mode, PLUS, 0, speed_p);
 	  return true;
 	}
-	  else if (speed_p)
+
+	  if (speed_p)
 	*cost += extra_cost->alu.arith;
 
-	  return false;
+	  /* Don't recurse here because we want to test the operands
+	 without any carry operation.  */
+	  *cost += rtx_cost (op0, mode, PLUS, 0, speed_p);
+	  *cost += rtx_cost (op1, mode, PLUS, 1, speed_p);
+	  return true;
 	}
 
   if 

[PATCH 11/29] [arm] Reduce cost of insns that are simple reg-reg moves.

2019-10-18 Thread Richard Earnshaw

Consider this sequence during combine:

Trying 18, 7 -> 22:
   18: r118:SI=r122:SI
  REG_DEAD r122:SI
7: r114:SI=0x1-r118:SI-ltu(cc:CC_RSB,0)
  REG_DEAD r118:SI
  REG_DEAD cc:CC_RSB
   22: r1:SI=r114:SI
  REG_DEAD r114:SI
Failed to match this instruction:
(set (reg:SI 1 r1 [+4 ])
(minus:SI (geu:SI (reg:CC_RSB 100 cc)
(const_int 0 [0]))
(reg:SI 122)))
Successfully matched this instruction:
(set (reg:SI 114)
(geu:SI (reg:CC_RSB 100 cc)
(const_int 0 [0])))
Successfully matched this instruction:
(set (reg:SI 1 r1 [+4 ])
(minus:SI (reg:SI 114)
(reg:SI 122)))
allowing combination of insns 18, 7 and 22
original costs 4 + 4 + 4 = 12
replacement costs 8 + 4 = 12

The costs are all correct, but we really don't want this combination
to take place.  The original costs contain an insn that is a simple
move of one pseudo register to another and it is extremely likely that
register allocation will eliminate this insn entirely.  On the other
hand, the resulting sequence really does expand into a sequence that
costs 12 (ie 3 insns).

We don't want to prevent combine from eliminating such moves, as this
can expose more combine opportunities, but we shouldn't rate them as
profitable in themselves.  We can do this be adjusting the costs
slightly so that the benefit of eliminating such a simple insn is
reduced.

We only do this before register allocation; after allocation we give
such insns their full cost.

* config/arm/arm.c (arm_insn_cost): New function.
(TARGET_INSN_COST): Override default definition.
---
 gcc/config/arm/arm.c | 21 +
 1 file changed, 21 insertions(+)

diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index b91b52f6d51..e33b6b14d28 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -181,6 +181,7 @@ static bool arm_have_conditional_execution (void);
 static bool arm_cannot_force_const_mem (machine_mode, rtx);
 static bool arm_legitimate_constant_p (machine_mode, rtx);
 static bool arm_rtx_costs (rtx, machine_mode, int, int, int *, bool);
+static int arm_insn_cost (rtx_insn *, bool);
 static int arm_address_cost (rtx, machine_mode, addr_space_t, bool);
 static int arm_register_move_cost (machine_mode, reg_class_t, reg_class_t);
 static int arm_memory_move_cost (machine_mode, reg_class_t, bool);
@@ -510,6 +511,8 @@ static const struct attribute_spec arm_attribute_table[] =
 #define TARGET_RTX_COSTS arm_rtx_costs
 #undef  TARGET_ADDRESS_COST
 #define TARGET_ADDRESS_COST arm_address_cost
+#undef TARGET_INSN_COST
+#define TARGET_INSN_COST arm_insn_cost
 
 #undef TARGET_SHIFT_TRUNCATION_MASK
 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
@@ -11486,6 +11489,24 @@ arm_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED, int outer_code,
   return result;
 }
 
+static int
+arm_insn_cost (rtx_insn *insn, bool speed)
+{
+  int cost;
+
+  /* Don't cost a simple reg-reg move at a full insn cost: such moves
+ will likely disappear during register allocation.  */
+  if (!reload_completed
+  && GET_CODE (PATTERN (insn)) == SET
+  && REG_P (SET_DEST (PATTERN (insn)))
+  && REG_P (SET_SRC (PATTERN (insn
+return 2;
+  cost = pattern_cost (PATTERN (insn), speed);
+  /* If the cost is zero, then it's likely a complex insn.  We don't want the
+ cost of these to be less than something we know about.  */
+  return cost ? cost : COSTS_N_INSNS (2);
+}
+
 /* All address computations that can be done are free, but rtx cost returns
the same for practically all of them.  So we weight the different types
of address here in the order (most pref first):


[PATCH 13/29] [arm] Add alternative canonicalizations for subtract-with-carry + shift

2019-10-18 Thread Richard Earnshaw

This patch adds a couple of alternative canonicalizations to allow
combine to match a subtract-with-carry operation when one of the operands
is shifted first.  The most common case of this is when combining a
sign-extend of one operand with a long-long value during subtraction.
The RSC variant is only enabled for Arm, the SBC variant for any 32-bit
compilation.

* config/arm/arm.md (subsi3_carryin_shift_alt): New pattern.
(rsbsi3_carryin_shift_alt): Likewise.
---
 gcc/config/arm/arm.md | 34 ++
 1 file changed, 34 insertions(+)

diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index 74f417fbe4b..613f50ae5f0 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -1048,6 +1048,23 @@ (define_insn "*subsi3_carryin_shift"
 (const_string "alu_shift_reg")))]
 )
 
+(define_insn "*subsi3_carryin_shift_alt"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(minus:SI (minus:SI
+		   (match_operand:SI 1 "s_register_operand" "r")
+		   (match_operand:SI 5 "arm_borrow_operation" ""))
+		  (match_operator:SI 2 "shift_operator"
+		   [(match_operand:SI 3 "s_register_operand" "r")
+		(match_operand:SI 4 "reg_or_int_operand" "rM")])))]
+  "TARGET_32BIT"
+  "sbc%?\\t%0, %1, %3%S2"
+  [(set_attr "conds" "use")
+   (set_attr "predicable" "yes")
+   (set (attr "type") (if_then_else (match_operand 4 "const_int_operand" "")
+(const_string "alu_shift_imm")
+(const_string "alu_shift_reg")))]
+)
+
 (define_insn "*rsbsi3_carryin_shift"
   [(set (match_operand:SI 0 "s_register_operand" "=r")
 	(minus:SI (minus:SI
@@ -1065,6 +1082,23 @@ (define_insn "*rsbsi3_carryin_shift"
 		  (const_string "alu_shift_reg")))]
 )
 
+(define_insn "*rsbsi3_carryin_shift_alt"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(minus:SI (minus:SI
+		   (match_operator:SI 2 "shift_operator"
+		[(match_operand:SI 3 "s_register_operand" "r")
+		 (match_operand:SI 4 "reg_or_int_operand" "rM")])
+		(match_operand:SI 5 "arm_borrow_operation" ""))
+		  (match_operand:SI 1 "s_register_operand" "r")))]
+  "TARGET_ARM"
+  "rsc%?\\t%0, %1, %3%S2"
+  [(set_attr "conds" "use")
+   (set_attr "predicable" "yes")
+   (set (attr "type") (if_then_else (match_operand 4 "const_int_operand" "")
+		  (const_string "alu_shift_imm")
+		  (const_string "alu_shift_reg")))]
+)
+
 ; transform ((x << y) - 1) to ~(~(x-1) << y)  Where X is a constant.
 (define_split
   [(set (match_operand:SI 0 "s_register_operand" "")


[PATCH 27/29] [arm] Early expansion of subvdi4

2019-10-18 Thread Richard Earnshaw

This patch adds early expansion of subvdi4.  The expansion sequence
is broadly based on the expansion of usubvdi4.

* config/arm/arm.md (subvdi4): Decompose calculation into 32-bit
operations.
(subdi3_compare1): Delete pattern.
(subvsi3_borrow): New insn pattern.
(subvsi3_borrow_imm): Likewise.
---
 gcc/config/arm/arm.md | 131 --
 1 file changed, 114 insertions(+), 17 deletions(-)

diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index 05b735cfccd..5a8175ff8b0 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -1395,12 +1395,79 @@ (define_expand "subvsi4"
 
 (define_expand "subvdi4"
   [(match_operand:DI 0 "s_register_operand")
-   (match_operand:DI 1 "s_register_operand")
-   (match_operand:DI 2 "s_register_operand")
+   (match_operand:DI 1 "reg_or_int_operand")
+   (match_operand:DI 2 "reg_or_int_operand")
(match_operand 3 "")]
   "TARGET_32BIT"
 {
-  emit_insn (gen_subdi3_compare1 (operands[0], operands[1], operands[2]));
+  rtx lo_result, hi_result;
+  rtx lo_op1, hi_op1, lo_op2, hi_op2;
+  lo_result = gen_lowpart (SImode, operands[0]);
+  hi_result = gen_highpart (SImode, operands[0]);
+  machine_mode mode = CCmode;
+
+  if (CONST_INT_P (operands[1]) && CONST_INT_P (operands[2]))
+{
+  /* If both operands are constants we can decide the result statically.  */
+  wi::overflow_type overflow;
+  wide_int val = wi::sub (rtx_mode_t (operands[1], DImode),
+			  rtx_mode_t (operands[2], DImode),
+			  SIGNED, );
+  emit_move_insn (operands[0], GEN_INT (val.to_shwi ()));
+  if (overflow != wi::OVF_NONE)
+	emit_jump_insn (gen_jump (operands[3]));
+  DONE;
+}
+  else if (CONST_INT_P (operands[1]))
+{
+  arm_decompose_di_binop (operands[2], operands[1], _op2, _op2,
+			  _op1, _op1);
+  if (const_ok_for_arm (INTVAL (lo_op1)))
+	{
+	  emit_insn (gen_rsb_imm_compare (lo_result, lo_op1, lo_op2,
+	  GEN_INT (~UINTVAL (lo_op1;
+	  /* We could potentially use RSC here in Arm state, but not
+	 in Thumb, so it's probably not worth the effort of handling
+	 this.  */
+	  hi_op1 = force_reg (SImode, hi_op1);
+	  mode = CC_RSBmode;
+	  goto highpart;
+	}
+  operands[1] = force_reg (DImode, operands[1]);
+}
+
+  arm_decompose_di_binop (operands[1], operands[2], _op1, _op1,
+			  _op2, _op2);
+  if (lo_op2 == const0_rtx)
+{
+  emit_move_insn (lo_result, lo_op1);
+  if (!arm_add_operand (hi_op2, SImode))
+hi_op2 = force_reg (SImode, hi_op2);
+  emit_insn (gen_subvsi4 (hi_result, hi_op1, hi_op2, operands[3]));
+  DONE;
+}
+
+  if (CONST_INT_P (lo_op2) && !arm_addimm_operand (lo_op2, SImode))
+lo_op2 = force_reg (SImode, lo_op2);
+  if (CONST_INT_P (lo_op2))
+emit_insn (gen_cmpsi2_addneg (lo_result, lo_op1, lo_op2,
+  GEN_INT (-INTVAL (lo_op2;
+  else
+emit_insn (gen_subsi3_compare1 (lo_result, lo_op1, lo_op2));
+
+ highpart:
+  if (!arm_not_operand (hi_op2, SImode))
+hi_op2 = force_reg (SImode, hi_op2);
+  rtx ccreg = gen_rtx_REG (mode, CC_REGNUM);
+  if (CONST_INT_P (hi_op2))
+emit_insn (gen_subvsi3_borrow_imm (hi_result, hi_op1, hi_op2,
+   gen_rtx_LTU (SImode, ccreg, const0_rtx),
+   gen_rtx_LTU (DImode, ccreg,
+		const0_rtx)));
+  else
+emit_insn (gen_subvsi3_borrow (hi_result, hi_op1, hi_op2,
+   gen_rtx_LTU (SImode, ccreg, const0_rtx),
+   gen_rtx_LTU (DImode, ccreg, const0_rtx)));
   arm_gen_unlikely_cbranch (NE, CC_Vmode, operands[3]);
 
   DONE;
@@ -1523,20 +1590,6 @@ (define_expand "usubvdi4"
   DONE;
 })
 
-(define_insn "subdi3_compare1"
-  [(set (reg:CC CC_REGNUM)
-	(compare:CC
-	  (match_operand:DI 1 "s_register_operand" "r")
-	  (match_operand:DI 2 "s_register_operand" "r")))
-   (set (match_operand:DI 0 "s_register_operand" "=")
-	(minus:DI (match_dup 1) (match_dup 2)))]
-  "TARGET_32BIT"
-  "subs\\t%Q0, %Q1, %Q2;sbcs\\t%R0, %R1, %R2"
-  [(set_attr "conds" "set")
-   (set_attr "length" "8")
-   (set_attr "type" "multiple")]
-)
-
 (define_insn "subsi3_compare1"
   [(set (reg:CC CC_REGNUM)
 	(compare:CC
@@ -2016,6 +2069,50 @@ (define_insn "usubvsi3_borrow_imm"
(set_attr "type" "alus_imm")]
 )
 
+(define_insn "subvsi3_borrow"
+  [(set (reg:CC_V CC_REGNUM)
+	(compare:CC_V
+	 (minus:DI
+	  (minus:DI
+	   (sign_extend:DI (match_operand:SI 1 "s_register_operand" "0,r"))
+	   (sign_extend:DI (match_operand:SI 2 "s_register_operand" "l,r")))
+	  (match_operand:DI 4 "arm_borrow_operation" ""))
+	 (sign_extend:DI
+	  (minus:SI (minus:SI (match_dup 1) (match_dup 2))
+		(match_operand:SI 3 "arm_borrow_operation" "")
+   (set (match_operand:SI 0 "s_register_operand" "=l,r")
+	(minus:SI (minus:SI (match_dup 1) (match_dup 2))
+		  (match_dup 3)))]
+  "TARGET_32BIT"
+  "sbcs%?\\t%0, %1, %2"
+  [(set_attr "conds" "set")
+   (set_attr "arch" "t2,*")
+   (set_attr "length" "2,4")]
+)
+
+(define_insn "subvsi3_borrow_imm"
+  [(set (reg:CC_V 

[PATCH 23/29] [arm] Early split addvdi4

2019-10-18 Thread Richard Earnshaw

This patch adds early splitting for addvdi4; it's very similar to the
uaddvdi4 splitter, but the details are just different enough in
places, especially for the patterns that match the splitting, where we
have to compare against the non-widened version to detect if overflow
occurred.

I've also added a testcase to the testsuite for a couple of constants
that caught me out during the development of this patch.  They're
probably arm-specific values, but the test is generic enough that I've
included it for all targets.

[gcc]
* config/arm/arm.c (arm_select_cc_mode): Allow either the first
or second operand of the PLUS inside a DImode equality test to be
sign-extend when selecting CC_Vmode.
* config/arm/arm.md (addvdi4): Early-split the operation into SImode
instructions.
(addsi3_cin_vout_reg, addsi3_cin_vout_imm, addsi3_cin_vout_0): New
expand patterns.
(addsi3_cin_vout_reg_insn, addsi3_cin_vout_imm_insn): New patterns.
(addsi3_cin_vout_0): Likewise.
(adddi3_compareV): Delete.

[gcc/testsuite]
* gcc.dg/builtin-arith-overflow-3.c: New test.
---
 gcc/config/arm/arm.c  |   3 +-
 gcc/config/arm/arm.md | 181 --
 .../gcc.dg/builtin-arith-overflow-3.c |  41 
 3 files changed, 203 insertions(+), 22 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/builtin-arith-overflow-3.c

diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 638c82df25f..c9abbb0f91d 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -15414,7 +15414,8 @@ arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
   if (GET_MODE (x) == DImode
   && (op == EQ || op == NE)
   && GET_CODE (x) == PLUS
-  && GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
+  && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
+	  || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
   && GET_CODE (y) == SIGN_EXTEND
   && GET_CODE (XEXP (y, 0)) == PLUS)
 return CC_Vmode;
diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index be002f77382..e9e0ca925d2 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -505,18 +505,173 @@ (define_expand "addvsi4"
 })
 
 (define_expand "addvdi4"
-  [(match_operand:DI 0 "register_operand")
-   (match_operand:DI 1 "register_operand")
-   (match_operand:DI 2 "register_operand")
+  [(match_operand:DI 0 "s_register_operand")
+   (match_operand:DI 1 "s_register_operand")
+   (match_operand:DI 2 "reg_or_int_operand")
(match_operand 3 "")]
   "TARGET_32BIT"
 {
-  emit_insn (gen_adddi3_compareV (operands[0], operands[1], operands[2]));
-  arm_gen_unlikely_cbranch (NE, CC_Vmode, operands[3]);
+  rtx lo_result, hi_result;
+  rtx lo_op1, hi_op1, lo_op2, hi_op2;
+  arm_decompose_di_binop (operands[1], operands[2], _op1, _op1,
+			  _op2, _op2);
+  lo_result = gen_lowpart (SImode, operands[0]);
+  hi_result = gen_highpart (SImode, operands[0]);
+
+  if (lo_op2 == const0_rtx)
+{
+  emit_move_insn (lo_result, lo_op1);
+  if (!arm_add_operand (hi_op2, SImode))
+	hi_op2 = force_reg (SImode, hi_op2);
+
+  emit_insn (gen_addvsi4 (hi_result, hi_op1, hi_op2, operands[3]));
+}
+  else
+{
+  if (!arm_add_operand (lo_op2, SImode))
+	lo_op2 = force_reg (SImode, lo_op2);
+  if (!arm_not_operand (hi_op2, SImode))
+	hi_op2 = force_reg (SImode, hi_op2);
+
+  emit_insn (gen_addsi3_compare_op1 (lo_result, lo_op1, lo_op2));
+
+  if (hi_op2 == const0_rtx)
+emit_insn (gen_addsi3_cin_vout_0 (hi_result, hi_op1));
+  else if (CONST_INT_P (hi_op2))
+emit_insn (gen_addsi3_cin_vout_imm (hi_result, hi_op1, hi_op2));
+  else
+emit_insn (gen_addsi3_cin_vout_reg (hi_result, hi_op1, hi_op2));
+
+  arm_gen_unlikely_cbranch (NE, CC_Vmode, operands[3]);
+}
 
   DONE;
 })
 
+(define_expand "addsi3_cin_vout_reg"
+  [(parallel
+[(set (match_dup 3)
+	  (compare:CC_V
+	   (plus:DI
+	(plus:DI (match_dup 4)
+		 (sign_extend:DI (match_operand:SI 1 "s_register_operand")))
+	(sign_extend:DI (match_operand:SI 2 "s_register_operand")))
+	   (sign_extend:DI (plus:SI (plus:SI (match_dup 5) (match_dup 1))
+(match_dup 2)
+ (set (match_operand:SI 0 "s_register_operand")
+	  (plus:SI (plus:SI (match_dup 5) (match_dup 1))
+		   (match_dup 2)))])]
+  "TARGET_32BIT"
+  {
+operands[3] = gen_rtx_REG (CC_Vmode, CC_REGNUM);
+rtx ccin = gen_rtx_REG (CC_Cmode, CC_REGNUM);
+operands[4] = gen_rtx_LTU (DImode, ccin, const0_rtx);
+operands[5] = gen_rtx_LTU (SImode, ccin, const0_rtx);
+  }
+)
+
+(define_insn "*addsi3_cin_vout_reg_insn"
+  [(set (reg:CC_V CC_REGNUM)
+	(compare:CC_V
+	 (plus:DI
+	  (plus:DI
+	   (match_operand:DI 3 "arm_carry_operation" "")
+	   (sign_extend:DI (match_operand:SI 1 "s_register_operand" "%0,r")))
+	  (sign_extend:DI (match_operand:SI 2 "s_register_operand" "l,r")))
+	 (sign_extend:DI
+	  (plus:SI (plus:SI (match_operand:SI 4 "arm_carry_operation" "")
+			

[PATCH 12/29] [arm] Implement negscc using SBC when appropriate.

2019-10-18 Thread Richard Earnshaw

When the carry flag is appropriately set by a comprison, negscc
patterns can expand into a simple SBC of a register with itself.  This
means we can convert two conditional instructions into a single
non-conditional instruction.  Furthermore, in Thumb2 we can avoid the
need for an IT instruction as well.  This patch also fixes the remaining
testcase that we initially XFAILed in the first patch of this series.

gcc:
* config/arm/arm.md (negscc_borrow): New pattern.
(mov_negscc): Don't split if the insn would match negscc_borrow.
* config/arm/thumb2.md (thumb2_mov_negscc): Likewise.
(thumb2_mov_negscc_strict_it): Likewise.

testsuite:
* gcc.target/arm/negdi-3.c: Remove XFAIL markers.
---
 gcc/config/arm/arm.md  | 14 --
 gcc/config/arm/thumb2.md   |  8 ++--
 gcc/testsuite/gcc.target/arm/negdi-3.c |  8 
 3 files changed, 22 insertions(+), 8 deletions(-)

diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index f53dbc27207..74f417fbe4b 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -6612,13 +6612,23 @@ (define_insn_and_split "*mov_scc"
(set_attr "type" "multiple")]
 )
 
+(define_insn "*negscc_borrow"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(neg:SI (match_operand:SI 1 "arm_borrow_operation" "")))]
+  "TARGET_32BIT"
+  "sbc\\t%0, %0, %0"
+  [(set_attr "conds" "use")
+   (set_attr "length" "4")
+   (set_attr "type" "adc_reg")]
+)
+
 (define_insn_and_split "*mov_negscc"
   [(set (match_operand:SI 0 "s_register_operand" "=r")
 	(neg:SI (match_operator:SI 1 "arm_comparison_operator_mode"
 		 [(match_operand 2 "cc_register" "") (const_int 0)])))]
-  "TARGET_ARM"
+  "TARGET_ARM && !arm_borrow_operation (operands[1], SImode)"
   "#"   ; "mov%D1\\t%0, #0\;mvn%d1\\t%0, #0"
-  "TARGET_ARM"
+  "&& true"
   [(set (match_dup 0)
 (if_then_else:SI (match_dup 1)
  (match_dup 3)
diff --git a/gcc/config/arm/thumb2.md b/gcc/config/arm/thumb2.md
index 6ccc875e2b4..8d0b6be9205 100644
--- a/gcc/config/arm/thumb2.md
+++ b/gcc/config/arm/thumb2.md
@@ -368,7 +368,9 @@ (define_insn_and_split "*thumb2_mov_negscc"
   [(set (match_operand:SI 0 "s_register_operand" "=r")
 	(neg:SI (match_operator:SI 1 "arm_comparison_operator_mode"
 		 [(match_operand 2 "cc_register" "") (const_int 0)])))]
-  "TARGET_THUMB2 && !arm_restrict_it"
+  "TARGET_THUMB2
+   && !arm_restrict_it
+   && !arm_borrow_operation (operands[1], SImode)"
   "#"   ; "ite\\t%D1\;mov%D1\\t%0, #0\;mvn%d1\\t%0, #0"
   "&& true"
   [(set (match_dup 0)
@@ -387,7 +389,9 @@ (define_insn_and_split "*thumb2_mov_negscc_strict_it"
   [(set (match_operand:SI 0 "low_register_operand" "=l")
 	(neg:SI (match_operator:SI 1 "arm_comparison_operator_mode"
 		 [(match_operand 2 "cc_register" "") (const_int 0)])))]
-  "TARGET_THUMB2 && arm_restrict_it"
+  "TARGET_THUMB2
+   && arm_restrict_it
+   && !arm_borrow_operation (operands[1], SImode)"
   "#"   ; ";mvn\\t%0, #0 ;it\\t%D1\;mov%D1\\t%0, #0\"
   "&& reload_completed"
   [(set (match_dup 0)
diff --git a/gcc/testsuite/gcc.target/arm/negdi-3.c b/gcc/testsuite/gcc.target/arm/negdi-3.c
index 3f6f2d1c2bb..76ddf49fc0d 100644
--- a/gcc/testsuite/gcc.target/arm/negdi-3.c
+++ b/gcc/testsuite/gcc.target/arm/negdi-3.c
@@ -11,7 +11,7 @@ Expected output:
 rsbsr0, r0, #0
 sbc r1, r1, r1
 */
-/* { dg-final { scan-assembler-times "rsb" 1 { xfail *-*-* } } } */
-/* { dg-final { scan-assembler-times "sbc" 1 { xfail *-*-* } } } */
-/* { dg-final { scan-assembler-times "mov" 0 { xfail *-*-* } } } */
-/* { dg-final { scan-assembler-times "rsc" 0 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times "rsb" 1 } } */
+/* { dg-final { scan-assembler-times "sbc" 1 } } */
+/* { dg-final { scan-assembler-times "mov" 0 } } */
+/* { dg-final { scan-assembler-times "rsc" 0 } } */


[PATCH 10/29] [arm] Correct cost calculations involving borrow for subtracts.

2019-10-18 Thread Richard Earnshaw

The rtx_cost calculations when a borrow operation was being performed were
not being calculated correctly.  The borrow is free as part of the
subtract-with-carry instructions.  This patch recognizes the various
idioms that can describe this and returns the correct costs.

* config/arm/arm.c (arm_rtx_costs_internal, case MINUS): Handle
borrow operations.
---
 gcc/config/arm/arm.c | 49 +---
 1 file changed, 42 insertions(+), 7 deletions(-)

diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index dfbd5cde5eb..b91b52f6d51 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -10049,15 +10049,46 @@ arm_rtx_costs_internal (rtx x, enum rtx_code code, enum rtx_code outer_code,
 	  rtx shift_by_reg = NULL;
 	  rtx shift_op;
 	  rtx non_shift_op;
+	  rtx op0 = XEXP (x, 0);
+	  rtx op1 = XEXP (x, 1);
 
-	  shift_op = shifter_op_p (XEXP (x, 0), _by_reg);
+	  /* Factor out any borrow operation.  There's more than one way
+	 of expressing this; try to recognize them all.  */
+	  if (GET_CODE (op0) == MINUS)
+	{
+	  if (arm_borrow_operation (op1, SImode))
+		{
+		  op1 = XEXP (op0, 1);
+		  op0 = XEXP (op0, 0);
+		}
+	  else if (arm_borrow_operation (XEXP (op0, 1), SImode))
+		op0 = XEXP (op0, 0);
+	}
+	  else if (GET_CODE (op1) == PLUS
+		   && arm_borrow_operation (XEXP (op1, 0), SImode))
+	op1 = XEXP (op1, 0);
+	  else if (GET_CODE (op0) == NEG
+		   && arm_borrow_operation (op1, SImode))
+	{
+	  /* Negate with carry-in.  For Thumb2 this is done with
+		 SBC R, X, X lsl #1 (ie X - 2X - C) as Thumb lacks the
+		 RSC instruction that exists in Arm mode.  */
+	  if (speed_p)
+		*cost += (TARGET_THUMB2
+			  ? extra_cost->alu.arith_shift
+			  : extra_cost->alu.arith);
+	  *cost += rtx_cost (XEXP (op0, 0), mode, MINUS, 0, speed_p);
+	  return true;
+	}
+
+	  shift_op = shifter_op_p (op0, _by_reg);
 	  if (shift_op == NULL)
 	{
-	  shift_op = shifter_op_p (XEXP (x, 1), _by_reg);
-	  non_shift_op = XEXP (x, 0);
+	  shift_op = shifter_op_p (op1, _by_reg);
+	  non_shift_op = op0;
 	}
 	  else
-	non_shift_op = XEXP (x, 1);
+	non_shift_op = op1;
 
 	  if (shift_op != NULL)
 	{
@@ -10087,10 +10118,10 @@ arm_rtx_costs_internal (rtx x, enum rtx_code code, enum rtx_code outer_code,
 	  return true;
 	}
 
-	  if (CONST_INT_P (XEXP (x, 0)))
+	  if (CONST_INT_P (op0))
 	{
 	  int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
-	INTVAL (XEXP (x, 0)), NULL_RTX,
+	INTVAL (op0), NULL_RTX,
 	NULL_RTX, 1, 0);
 	  *cost = COSTS_N_INSNS (insns);
 	  if (speed_p)
@@ -10101,7 +10132,11 @@ arm_rtx_costs_internal (rtx x, enum rtx_code code, enum rtx_code outer_code,
 	  else if (speed_p)
 	*cost += extra_cost->alu.arith;
 
-	  return false;
+	  /* Don't recurse as we don't want to cost any borrow that
+	 we've stripped.  */
+	  *cost += rtx_cost (op0, mode, MINUS, 0, speed_p);
+	  *cost += rtx_cost (op1, mode, MINUS, 1, speed_p);
+	  return true;
 	}
 
   if (GET_MODE_CLASS (mode) == MODE_INT


[PATCH 25/29] [arm] Early expansion of usubvdi4.

2019-10-18 Thread Richard Earnshaw

This patch adds early expansion of usubvdi4, allowing us to handle some
constants in place, which previously we were unable to do.

* config/arm/arm.md (usubvdi4): Allow registers or integers for
incoming operands.  Early split the calculation into SImode
operations.
(usubvsi3_borrow): New insn pattern.
(usubvsi3_borrow_imm): Likewise.
---
 gcc/config/arm/arm.md | 113 --
 1 file changed, 109 insertions(+), 4 deletions(-)

diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index a465bf8e7a3..92f1823cdfa 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -1390,13 +1390,81 @@ (define_expand "usubvsi4"
 
 (define_expand "usubvdi4"
   [(match_operand:DI 0 "s_register_operand")
-   (match_operand:DI 1 "s_register_operand")
-   (match_operand:DI 2 "s_register_operand")
+   (match_operand:DI 1 "reg_or_int_operand")
+   (match_operand:DI 2 "reg_or_int_operand")
(match_operand 3 "")]
   "TARGET_32BIT"
 {
-  emit_insn (gen_subdi3_compare1 (operands[0], operands[1], operands[2]));
-  arm_gen_unlikely_cbranch (LTU, CCmode, operands[3]);
+  rtx lo_result, hi_result;
+  rtx lo_op1, hi_op1, lo_op2, hi_op2;
+  lo_result = gen_lowpart (SImode, operands[0]);
+  hi_result = gen_highpart (SImode, operands[0]);
+  machine_mode mode = CCmode;
+
+  if (CONST_INT_P (operands[1]) && CONST_INT_P (operands[2]))
+{
+  /* If both operands are constants we can decide the result statically.  */
+  wi::overflow_type overflow;
+  wide_int val = wi::sub (rtx_mode_t (operands[1], DImode),
+			  rtx_mode_t (operands[2], DImode),
+			  UNSIGNED, );
+  emit_move_insn (operands[0], GEN_INT (val.to_shwi ()));
+  if (overflow != wi::OVF_NONE)
+	emit_jump_insn (gen_jump (operands[3]));
+  DONE;
+}
+  else if (CONST_INT_P (operands[1]))
+{
+  arm_decompose_di_binop (operands[2], operands[1], _op2, _op2,
+			  _op1, _op1);
+  if (const_ok_for_arm (INTVAL (lo_op1)))
+	{
+	  emit_insn (gen_rsb_imm_compare (lo_result, lo_op1, lo_op2,
+	  GEN_INT (~UINTVAL (lo_op1;
+	  /* We could potentially use RSC here in Arm state, but not
+	 in Thumb, so it's probably not worth the effort of handling
+	 this.  */
+	  hi_op1 = force_reg (SImode, hi_op1);
+	  mode = CC_RSBmode;
+	  goto highpart;
+	}
+  operands[1] = force_reg (DImode, operands[1]);
+}
+
+  arm_decompose_di_binop (operands[1], operands[2], _op1, _op1,
+			  _op2, _op2);
+  if (lo_op2 == const0_rtx)
+{
+  emit_move_insn (lo_result, lo_op1);
+  if (!arm_add_operand (hi_op2, SImode))
+hi_op2 = force_reg (SImode, hi_op2);
+  emit_insn (gen_usubvsi4 (hi_result, hi_op1, hi_op2, operands[3]));
+  DONE;
+}
+
+  if (CONST_INT_P (lo_op2) && !arm_addimm_operand (lo_op2, SImode))
+lo_op2 = force_reg (SImode, lo_op2);
+  if (CONST_INT_P (lo_op2))
+emit_insn (gen_cmpsi2_addneg (lo_result, lo_op1, lo_op2,
+  GEN_INT (-INTVAL (lo_op2;
+  else
+emit_insn (gen_subsi3_compare1 (lo_result, lo_op1, lo_op2));
+
+ highpart:
+  if (!arm_not_operand (hi_op2, SImode))
+hi_op2 = force_reg (SImode, hi_op2);
+  rtx ccreg = gen_rtx_REG (mode, CC_REGNUM);
+  if (CONST_INT_P (hi_op2))
+emit_insn (gen_usubvsi3_borrow_imm (hi_result, hi_op1, hi_op2,
+	GEN_INT (UINTVAL (hi_op2) & 0x),
+	gen_rtx_LTU (SImode, ccreg, const0_rtx),
+	gen_rtx_LTU (DImode, ccreg,
+		 const0_rtx)));
+  else
+emit_insn (gen_usubvsi3_borrow (hi_result, hi_op1, hi_op2,
+gen_rtx_LTU (SImode, ccreg, const0_rtx),
+gen_rtx_LTU (DImode, ccreg, const0_rtx)));
+  arm_gen_unlikely_cbranch (LTU, CC_Bmode, operands[3]);
 
   DONE;
 })
@@ -1825,6 +1893,43 @@ (define_insn "rscsi3_out_scratch"
(set_attr "type" "alus_imm")]
 )
 
+(define_insn "usubvsi3_borrow"
+  [(set (reg:CC_B CC_REGNUM)
+	(compare:CC_B
+	 (zero_extend:DI (match_operand:SI 1 "s_register_operand" "0,r"))
+	 (plus:DI (match_operand:DI 4 "arm_borrow_operation" "")
+	  (zero_extend:DI
+		   (match_operand:SI 2 "s_register_operand" "l,r")
+   (set (match_operand:SI 0 "s_register_operand" "=l,r")
+	(minus:SI (match_dup 1)
+		  (plus:SI (match_operand:SI 3 "arm_borrow_operation" "")
+			   (match_dup 2]
+  "TARGET_32BIT"
+  "sbcs%?\\t%0, %1, %2"
+  [(set_attr "conds" "set")
+   (set_attr "arch" "t2,*")
+   (set_attr "length" "2,4")]
+)
+
+(define_insn "usubvsi3_borrow_imm"
+  [(set (reg:CC_B CC_REGNUM)
+	(compare:CC_B
+	 (zero_extend:DI (match_operand:SI 1 "s_register_operand" "r,r"))
+	 (plus:DI (match_operand:DI 5 "arm_borrow_operation" "")
+		  (match_operand:DI 3 "const_int_operand" "n,n"
+   (set (match_operand:SI 0 "s_register_operand" "=r,r")
+	(minus:SI (match_dup 1)
+		  (plus:SI (match_operand:SI 4 "arm_borrow_operation" "")
+			   (match_operand:SI 2 "arm_adcimm_operand" "I,K"]
+  "TARGET_32BIT
+   && (UINTVAL (operands[2]) & 0x) == UINTVAL (operands[3])"
+  "@
+  sbcs%?\\t%0, 

[PATCH 24/29] [arm] Improve constant handling for usubvsi4.

2019-10-18 Thread Richard Earnshaw

This patch improves the expansion of usubvsi4 by allowing suitable
constants to be passed directly.  Unlike normal subtraction, either
operand may be a constant (and indeed I have seen cases where both can
be with LTO enabled).  One interesting testcase that improves as a
result of this is:

unsigned f6 (unsigned a)
{
  unsigned x;
  return __builtin_sub_overflow (5U, a, ) ? 0 : x;
}

Which previously compiled to:

rsbsr3, r0, #5
cmp r0, #5
movls   r0, r3
movhi   r0, #0

but now generates the optimal sequence:

rsbsr0, r0, #5
movcc   r0, #0

* config/arm/arm.md (usubv4): Delete expansion.
(usubvsi4): New pattern.  Allow some immediate values for inputs.
(usubvdi4): New pattern.
---
 gcc/config/arm/arm.md | 46 ++-
 1 file changed, 41 insertions(+), 5 deletions(-)

diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index e9e0ca925d2..a465bf8e7a3 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -1352,14 +1352,50 @@ (define_expand "subv4"
   DONE;
 })
 
-(define_expand "usubv4"
-  [(match_operand:SIDI 0 "register_operand")
-   (match_operand:SIDI 1 "register_operand")
-   (match_operand:SIDI 2 "register_operand")
+(define_expand "usubvsi4"
+  [(match_operand:SI 0 "s_register_operand")
+   (match_operand:SI 1 "arm_rhs_operand")
+   (match_operand:SI 2 "arm_add_operand")
(match_operand 3 "")]
   "TARGET_32BIT"
 {
-  emit_insn (gen_sub3_compare1 (operands[0], operands[1], operands[2]));
+  machine_mode mode = CCmode;
+  if (CONST_INT_P (operands[1]) && CONST_INT_P (operands[2]))
+{
+  /* If both operands are constants we can decide the result statically.  */
+  wi::overflow_type overflow;
+  wide_int val = wi::sub (rtx_mode_t (operands[1], SImode),
+			  rtx_mode_t (operands[2], SImode),
+			  UNSIGNED, );
+  emit_move_insn (operands[0], GEN_INT (val.to_shwi ()));
+  if (overflow != wi::OVF_NONE)
+	emit_jump_insn (gen_jump (operands[3]));
+  DONE;
+}
+  else if (CONST_INT_P (operands[2]))
+emit_insn (gen_cmpsi2_addneg (operands[0], operands[1], operands[2],
+  GEN_INT (-INTVAL (operands[2];
+  else if (CONST_INT_P (operands[1]))
+{
+  mode = CC_RSBmode;
+  emit_insn (gen_rsb_imm_compare (operands[0], operands[1], operands[2],
+  GEN_INT (~UINTVAL (operands[1];
+}
+  else
+emit_insn (gen_subsi3_compare1 (operands[0], operands[1], operands[2]));
+  arm_gen_unlikely_cbranch (LTU, mode, operands[3]);
+
+  DONE;
+})
+
+(define_expand "usubvdi4"
+  [(match_operand:DI 0 "s_register_operand")
+   (match_operand:DI 1 "s_register_operand")
+   (match_operand:DI 2 "s_register_operand")
+   (match_operand 3 "")]
+  "TARGET_32BIT"
+{
+  emit_insn (gen_subdi3_compare1 (operands[0], operands[1], operands[2]));
   arm_gen_unlikely_cbranch (LTU, CCmode, operands[3]);
 
   DONE;


[PATCH 16/29] [arm] early split most DImode comparison operations.

2019-10-18 Thread Richard Earnshaw

This patch does most of the work for early splitting the DImode
comparisons.  We now handle EQ, NE, LT, GE, LTU and GEU during early
expansion, in addition to EQ and NE, for which the expansion has now
been reworked to use a standard conditional-compare pattern already in
the back-end.

To handle this we introduce two new condition flag modes that are used
when comparing the upper words of decomposed DImode values: one for
signed, and one for unsigned comparisons.  CC_Bmode (B for Borrow) is
essentially the inverse of CC_Cmode and is used when the carry flag is
set by a subtraction of unsigned values.

* config/arm/arm-modes.def (CC_NV, CC_B): New CC modes.
* config/arm/arm.c (arm_select_cc_mode): Recognize constructs that
need these modes.
(arm_gen_dicompare_reg): New code to early expand the sub-operations
of EQ, NE, LT, GE, LTU and GEU.
* config/arm/iterators.md (CC_EXTEND): New code attribute.
* config/arm/predicates.md (arm_adcimm_operand): New predicate..
* config/arm/arm.md (cmpsi3_carryin_out): New pattern.
(cmpsi3_imm_carryin_out): Likewise.
(cmpsi3_0_carryin_out): Likewise.
---
 gcc/config/arm/arm-modes.def |   6 +
 gcc/config/arm/arm.c | 220 ++-
 gcc/config/arm/arm.md|  45 +++
 gcc/config/arm/iterators.md  |   4 +
 gcc/config/arm/predicates.md |   6 +
 5 files changed, 278 insertions(+), 3 deletions(-)

diff --git a/gcc/config/arm/arm-modes.def b/gcc/config/arm/arm-modes.def
index 4fa7f1b43e5..65cddf68cdb 100644
--- a/gcc/config/arm/arm-modes.def
+++ b/gcc/config/arm/arm-modes.def
@@ -34,12 +34,16 @@ ADJUST_FLOAT_FORMAT (HF, ((arm_fp16_format == ARM_FP16_FORMAT_ALTERNATIVE)
CC_Cmode should be used if only the C flag is set correctly, after an
  addition.
CC_Nmode should be used if only the N (sign) flag is set correctly
+   CC_NVmode should be used if only the N and V bits are set correctly,
+ (used for signed comparisons when the carry is propagated in).
CC_CZmode should be used if only the C and Z flags are correct
(used for DImode unsigned comparisons).
CC_RSBmode should be used where the comparison is set by an RSB immediate,
  or NEG instruction.  The form of the comparison for (const - reg) will
  be (COMPARE (not (reg)) (~const)).
CC_NCVmode should be used if only the N, C, and V flags are correct
+   CC_Bmode should be used if only the C flag is correct after a subtract
+ (eg after an unsigned borrow with carry-in propagation).
(used for DImode signed comparisons).
CCmode should be used otherwise.  */
 
@@ -47,6 +51,7 @@ CC_MODE (CC_NOOV);
 CC_MODE (CC_Z);
 CC_MODE (CC_CZ);
 CC_MODE (CC_NCV);
+CC_MODE (CC_NV);
 CC_MODE (CC_SWP);
 CC_MODE (CC_RSB);
 CC_MODE (CCFP);
@@ -62,6 +67,7 @@ CC_MODE (CC_DLTU);
 CC_MODE (CC_DGEU);
 CC_MODE (CC_DGTU);
 CC_MODE (CC_C);
+CC_MODE (CC_B);
 CC_MODE (CC_N);
 CC_MODE (CC_V);
 
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index ddfe4335169..99c8bd79d30 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -15348,6 +15348,22 @@ arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
   && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
 return CC_Cmode;
 
+  if (GET_MODE (x) == DImode
+  && (op == GE || op == LT)
+  && GET_CODE (x) == SIGN_EXTEND
+  && ((GET_CODE (y) == PLUS
+	   && arm_borrow_operation (XEXP (y, 0), DImode))
+	  || arm_borrow_operation (y, DImode)))
+return CC_NVmode;
+
+  if (GET_MODE (x) == DImode
+  && (op == GEU || op == LTU)
+  && GET_CODE (x) == ZERO_EXTEND
+  && ((GET_CODE (y) == PLUS
+	   && arm_borrow_operation (XEXP (y, 0), DImode))
+	  || arm_borrow_operation (y, DImode)))
+return CC_Bmode;
+
   if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
 {
   switch (op)
@@ -15410,16 +15426,198 @@ arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
 static rtx
 arm_gen_dicompare_reg (rtx_code code, rtx x, rtx y, rtx scratch)
 {
-  /* We don't currently handle DImode in thumb1, but rely on libgcc.  */
+  machine_mode mode;
+  rtx cc_reg;
+
+/* We don't currently handle DImode in thumb1, but rely on libgcc.  */
   gcc_assert (TARGET_32BIT);
 
+  rtx x_lo = simplify_gen_subreg (SImode, x, DImode,
+  subreg_lowpart_offset (SImode, DImode));
+  rtx x_hi = simplify_gen_subreg (SImode, x, DImode,
+  subreg_highpart_offset (SImode, DImode));
+  rtx y_lo = simplify_gen_subreg (SImode, y, DImode,
+  subreg_lowpart_offset (SImode, DImode));
+  rtx y_hi = simplify_gen_subreg (SImode, y, DImode,
+  subreg_highpart_offset (SImode, DImode));
+  switch (code)
+{
+case EQ:
+case NE:
+  {
+	/* We should never have X as a const_int in this case.  */
+	gcc_assert (!CONST_INT_P (x));
+
+	if (y_lo == const0_rtx || y_hi == const0_rtx)
+	  {
+	if (y_lo != const0_rtx)
+	  {
+		rtx scratch2 = scratch ? scratch : gen_reg_rtx (SImode);
+
+		gcc_assert 

[PATCH 03/29] [arm] Early split zero- and sign-extension

2019-10-18 Thread Richard Earnshaw

This patch changes the insn patterns for zero- and sign-extend into
define_expands that generate the appropriate word operations
immediately.

* config/arm/arm.md (zero_extenddi2): Convert to define_expand.
(extenddi2): Likewise.
---
 gcc/config/arm/arm.md | 75 +++
 1 file changed, 54 insertions(+), 21 deletions(-)

diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index 5ba42a13430..4a7a64e6613 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -4196,31 +4196,64 @@ (define_expand "truncdfhf2"
 
 ;; Zero and sign extension instructions.
 
-(define_insn "zero_extenddi2"
-  [(set (match_operand:DI 0 "s_register_operand" "=r,?r")
-(zero_extend:DI (match_operand:QHSI 1 ""
-	"")))]
+(define_expand "zero_extenddi2"
+  [(set (match_operand:DI 0 "s_register_operand" "")
+	(zero_extend:DI (match_operand:QHSI 1 "" "")))]
   "TARGET_32BIT "
-  "#"
-  [(set_attr "length" "4,8")
-   (set_attr "arch" "*,*")
-   (set_attr "ce_count" "2")
-   (set_attr "predicable" "yes")
-   (set_attr "type" "mov_reg,multiple")]
+  {
+rtx res_lo, res_hi, op0_lo, op0_hi;
+res_lo = gen_lowpart (SImode, operands[0]);
+res_hi = gen_highpart (SImode, operands[0]);
+if (can_create_pseudo_p ())
+  {
+	op0_lo = mode == SImode ? operands[1] : gen_reg_rtx (SImode);
+	op0_hi = gen_reg_rtx (SImode);
+  }
+else
+  {
+	op0_lo = mode == SImode ? operands[1] : res_lo;
+	op0_hi = res_hi;
+  }
+if (mode != SImode)
+  emit_insn (gen_rtx_SET (op0_lo,
+			  gen_rtx_ZERO_EXTEND (SImode, operands[1])));
+emit_insn (gen_movsi (op0_hi, const0_rtx));
+if (res_lo != op0_lo)
+  emit_move_insn (res_lo, op0_lo);
+if (res_hi != op0_hi)
+  emit_move_insn (res_hi, op0_hi);
+DONE;
+  }
 )
 
-(define_insn "extenddi2"
-  [(set (match_operand:DI 0 "s_register_operand" "=r,?r,?r")
-(sign_extend:DI (match_operand:QHSI 1 ""
-	"")))]
+(define_expand "extenddi2"
+  [(set (match_operand:DI 0 "s_register_operand" "")
+	(sign_extend:DI (match_operand:QHSI 1 "" "")))]
   "TARGET_32BIT "
-  "#"
-  [(set_attr "length" "4,8,8")
-   (set_attr "ce_count" "2")
-   (set_attr "shift" "1")
-   (set_attr "predicable" "yes")
-   (set_attr "arch" "*,a,t")
-   (set_attr "type" "mov_reg,multiple,multiple")]
+  {
+rtx res_lo, res_hi, op0_lo, op0_hi;
+res_lo = gen_lowpart (SImode, operands[0]);
+res_hi = gen_highpart (SImode, operands[0]);
+if (can_create_pseudo_p ())
+  {
+	op0_lo = mode == SImode ? operands[1] : gen_reg_rtx (SImode);
+	op0_hi = gen_reg_rtx (SImode);
+  }
+else
+  {
+	op0_lo = mode == SImode ? operands[1] : res_lo;
+	op0_hi = res_hi;
+  }
+if (mode != SImode)
+  emit_insn (gen_rtx_SET (op0_lo,
+			  gen_rtx_SIGN_EXTEND (SImode, operands[1])));
+emit_insn (gen_ashrsi3 (op0_hi, op0_lo, GEN_INT (31)));
+if (res_lo != op0_lo)
+  emit_move_insn (res_lo, op0_lo);
+if (res_hi != op0_hi)
+  emit_move_insn (res_hi, op0_hi);
+DONE;
+  }
 )
 
 ;; Splits for all extensions to DImode


[PATCH 01/29] [arm] Rip out DImode addition and subtraction splits.

2019-10-18 Thread Richard Earnshaw

The first step towards early splitting of addition and subtraction at
DImode is to rip out the old patterns that are designed to propagate
DImode through the RTL optimization passes and the do late splitting.

This patch does cause some code size regressions, but it should still
execute correctly.  We will progressively add back the optimizations
we had here in later patches.

A small number of tests in the Arm-specific testsuite do fail as a
result of this patch, but that's to be expected, since the
optimizations they are looking for have just been removed.  I've kept
the tests, but XFAILed them for now.

One small technical change is also done in this patch as part of the
cleanup: the uaddv4 expander is changed to use LTU as the branch
comparison.  This eliminates the need for CC_Cmode to recognize
somewhat bogus equality constraints.

gcc:
* arm.md (adddi3): Only accept register operands.
(arm_adddi3): Convert to simple insn with no split.  Do not accept
constants.
(adddi_sesidi_di): Delete patern.
(adddi_zesidi_di): Likewise.
(uaddv4): Use LTU as condition for branch.
(adddi3_compareV): Convert to simple insn with no split.
(addsi3_compareV_upper): Delete pattern.
(adddi3_compareC): Convert to simple insn with no split.  Correct
flags setting expression.
(addsi3_compareC_upper): Delete pattern.
(addsi3_compareC): Correct flags setting expression.
(subdi3_compare1): Convert to simple insn with no split.
(subsi3_carryin_compare): Delete pattern.
(arm_subdi3): Convert to simple insn with no split.
(subdi_zesidi): Delete pattern.
(subdi_di_sesidi): Delete pattern.
(subdi_zesidi_di): Delete pattern.
(subdi_sesidi_di): Delete pattern.
(subdi_zesidi_zesidi): Delete pattern.
(negvdi3): Use s_register_operand.
(negdi2_compare): Convert to simple insn with no split.
(negdi2_insn): Likewise.
(negsi2_carryin_compare): Delete pattern.
(negdi_zero_extendsidi): Delete pattern.
(arm_cmpdi_insn): Convert to simple insn with no split.
(negdi2): Don't call gen_negdi2_neon.
* config/arm/neon.md (adddi3_neon): Delete pattern.
(subdi3_neon): Delete pattern.
(negdi2_neon): Delete pattern.
(splits for negdi2_neon): Delete splits.

testsuite:
* gcc.target/arm/negdi-3.c: Add XFAILS.
* gcc.target/arm/pr3447-1.c: Likewise.
* gcc.target/arm/pr3447-3.c: Likewise.
* gcc.target/arm/pr3447-4.c: Likewise.
---
 gcc/config/arm/arm.c |   2 -
 gcc/config/arm/arm.md| 569 ++-
 gcc/testsuite/gcc.target/arm/negdi-3.c   |   8 +-
 gcc/testsuite/gcc.target/arm/pr53447-1.c |   2 +-
 gcc/testsuite/gcc.target/arm/pr53447-3.c |   2 +-
 gcc/testsuite/gcc.target/arm/pr53447-4.c |   2 +-
 6 files changed, 56 insertions(+), 529 deletions(-)

diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index ba330470141..41567af1869 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -23581,8 +23581,6 @@ maybe_get_arm_condition_code (rtx comparison)
 	{
 	case LTU: return ARM_CS;
 	case GEU: return ARM_CC;
-	case NE: return ARM_CS;
-	case EQ: return ARM_CC;
 	default: return ARM_NV;
 	}
 
diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index f861c72ccfc..241ba97c4ba 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -437,7 +437,7 @@ (define_expand "adddi3"
  [(parallel
[(set (match_operand:DI   0 "s_register_operand")
 	  (plus:DI (match_operand:DI 1 "s_register_operand")
-	   (match_operand:DI 2 "arm_adddi_operand")))
+	   (match_operand:DI 2 "s_register_operand")))
 (clobber (reg:CC CC_REGNUM))])]
   "TARGET_EITHER"
   "
@@ -446,87 +446,13 @@ (define_expand "adddi3"
   "
 )
 
-(define_insn_and_split "*arm_adddi3"
-  [(set (match_operand:DI  0 "arm_general_register_operand" "=")
-	(plus:DI (match_operand:DI 1 "arm_general_register_operand" "%0, 0, r, 0, r")
-		 (match_operand:DI 2 "arm_general_adddi_operand""r,  0, r, Dd, Dd")))
+(define_insn "*arm_adddi3"
+  [(set (match_operand:DI 0 "s_register_operand"  "=,,")
+	(plus:DI (match_operand:DI 1 "s_register_operand" " %0,0,r")
+		 (match_operand:DI 2 "s_register_operand" " r,0,r")))
(clobber (reg:CC CC_REGNUM))]
   "TARGET_32BIT"
-  "#"
-  "TARGET_32BIT"
-  [(parallel [(set (reg:CC_C CC_REGNUM)
-		   (compare:CC_C (plus:SI (match_dup 1) (match_dup 2))
- (match_dup 1)))
-	  (set (match_dup 0) (plus:SI (match_dup 1) (match_dup 2)))])
-   (set (match_dup 3) (plus:SI (plus:SI (match_dup 4) (match_dup 5))
-			   (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0]
-  "
-  {
-operands[3] = gen_highpart (SImode, operands[0]);
-operands[0] = gen_lowpart (SImode, operands[0]);
-operands[4] = gen_highpart (SImode, operands[1]);
-operands[1] = gen_lowpart (SImode, 

[PATCH 02/29] [arm] Perform early splitting of adddi3.

2019-10-18 Thread Richard Earnshaw

This patch causes the expansion of adddi3 to split the operation
immediately for Arm and Thumb-2.  This is desirable as it frees up the
register allocator to pick what ever combination of registers suits
best and reduces the number of auxiliary patterns that we need in the
back-end.  Three of the testcases that we disabled earlier are already
fixed by this patch.  Finally, we add a new pattern to match the
canonicalization of add-with-carry when using an immediate of zero.

gcc:
* config/arm/arm-protos.h (arm_decompose_di_binop): New prototype.
* config/arm/arm.c (arm_decompose_di_binop): New function.
* config/arm/arm.md (adddi3): Also accept any const_int for op2.
If not generating Thumb-1 code, decompose the operation into 32-bit
pieces.
* add0si_carryin_: New pattern.

testsuite:
* gcc.target/arm/pr53447-1.c: Remove XFAIL.
* gcc.target/arm/pr53447-3.c: Remove XFAIL.
* gcc.target/arm/pr53447-4.c: Remove XFAIL.
---
 gcc/config/arm/arm-protos.h  |  1 +
 gcc/config/arm/arm.c | 15 +
 gcc/config/arm/arm.md| 73 ++--
 gcc/testsuite/gcc.target/arm/pr53447-1.c |  2 +-
 gcc/testsuite/gcc.target/arm/pr53447-3.c |  2 +-
 gcc/testsuite/gcc.target/arm/pr53447-4.c |  2 +-
 6 files changed, 76 insertions(+), 19 deletions(-)

diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h
index f995974f9bb..c685bcbf99c 100644
--- a/gcc/config/arm/arm-protos.h
+++ b/gcc/config/arm/arm-protos.h
@@ -57,6 +57,7 @@ extern rtx arm_simd_vect_par_cnst_half (machine_mode mode, bool high);
 extern bool arm_simd_check_vect_par_cnst_half_p (rtx op, machine_mode mode,
 		 bool high);
 extern void arm_emit_speculation_barrier_function (void);
+extern void arm_decompose_di_binop (rtx, rtx, rtx *, rtx *, rtx *, rtx *);
 
 #ifdef RTX_CODE
 extern void arm_gen_unlikely_cbranch (enum rtx_code, machine_mode cc_mode,
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 41567af1869..db18651346f 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -14933,6 +14933,21 @@ gen_cpymem_ldrd_strd (rtx *operands)
   return true;
 }
 
+/* Decompose operands for a 64-bit binary operation in OP1 and OP2
+   into its component 32-bit subregs.  OP2 may be an immediate
+   constant and we want to simplify it in that case.  */
+void
+arm_decompose_di_binop (rtx op1, rtx op2, rtx *lo_op1, rtx *hi_op1,
+			rtx *lo_op2, rtx *hi_op2)
+{
+  *lo_op1 = gen_lowpart (SImode, op1);
+  *hi_op1 = gen_highpart (SImode, op1);
+  *lo_op2 = simplify_gen_subreg (SImode, op2, DImode,
+ subreg_lowpart_offset (SImode, DImode));
+  *hi_op2 = simplify_gen_subreg (SImode, op2, DImode,
+ subreg_highpart_offset (SImode, DImode));
+}
+
 /* Select a dominance comparison mode if possible for a test of the general
form (OP (COND_OR (X) (Y)) (const_int 0)).  We support three forms.
COND_OR == DOM_CC_X_AND_Y => (X && Y)
diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index 241ba97c4ba..5ba42a13430 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -437,25 +437,53 @@ (define_expand "adddi3"
  [(parallel
[(set (match_operand:DI   0 "s_register_operand")
 	  (plus:DI (match_operand:DI 1 "s_register_operand")
-	   (match_operand:DI 2 "s_register_operand")))
+		   (match_operand:DI 2 "reg_or_int_operand")))
 (clobber (reg:CC CC_REGNUM))])]
   "TARGET_EITHER"
   "
-  if (TARGET_THUMB1 && !REG_P (operands[2]))
-operands[2] = force_reg (DImode, operands[2]);
-  "
-)
+  if (TARGET_THUMB1)
+{
+  if (!REG_P (operands[2]))
+	operands[2] = force_reg (DImode, operands[2]);
+}
+  else
+{
+  rtx lo_result, hi_result, lo_dest, hi_dest;
+  rtx lo_op1, hi_op1, lo_op2, hi_op2;
+  arm_decompose_di_binop (operands[1], operands[2], _op1, _op1,
+			  _op2, _op2);
+  lo_result = lo_dest = gen_lowpart (SImode, operands[0]);
+  hi_result = hi_dest = gen_highpart (SImode, operands[0]);
+
+  if (lo_op2 == const0_rtx)
+	{
+	  lo_dest = lo_op1;
+	  if (!arm_add_operand (hi_op2, SImode))
+	hi_op2 = force_reg (SImode, hi_op2);
+	  /* Assume hi_op2 won't also be zero.  */
+	  emit_insn (gen_addsi3 (hi_dest, hi_op1, hi_op2));
+	}
+  else
+	{
+	  if (!arm_add_operand (lo_op2, SImode))
+	lo_op2 = force_reg (SImode, lo_op2);
+	  if (!arm_not_operand (hi_op2, SImode))
+	hi_op2 = force_reg (SImode, hi_op2);
+
+	  emit_insn (gen_addsi3_compareC (lo_dest, lo_op1, lo_op2));
+	  if (hi_op2 == const0_rtx)
+	emit_insn (gen_add0si3_carryin_ltu (hi_dest, hi_op1));
+	  else
+	emit_insn (gen_addsi3_carryin_ltu (hi_dest, hi_op1, hi_op2));
+	}
 
-(define_insn "*arm_adddi3"
-  [(set (match_operand:DI 0 "s_register_operand"  "=,,")
-	(plus:DI (match_operand:DI 1 "s_register_operand" " %0,0,r")
-		 (match_operand:DI 2 "s_register_operand" " r,0,r")))
-   (clobber (reg:CC CC_REGNUM))]
-  "TARGET_32BIT"
-  "adds\\t%Q0, %Q1, 

[PATCH 00/29] [arm] Rewrite DImode arithmetic support

2019-10-18 Thread Richard Earnshaw

This series of patches rewrites all the DImode arithmetic patterns for
the Arm backend when compiling for Arm or Thumb2 to split the
operations during expand (the thumb1 code is unchanged and cannot
benefit from early splitting as we are unable to expose the carry
flag).

This has a number of benefits:
 - register allocation has more freedom to use independent
   registers for the upper and lower halves of the register
 - we can make better use of combine for spotting insn merge
   opportunities without needing many additional patterns that are
   only used for DImode
 - we eliminate a number of bugs in the machine description where
   the carry calculations were not correctly propagated by the
   split patterns (we mostly got away with this because the
   splitting previously happened only after most of the important
   optimization passes had been run).

The patch series starts by paring back all the DImode arithmetic
support to a very simple form without any splitting at all and then
progressively re-implementing the patterns with early split
operations.  This proved to be the only sane way of untangling the
existing code due to a number of latent bugs which would have been
exposed if a different approach had been taken.

Each patch should produce a working compiler (it did when it was
originally written), though since the patch set has been re-ordered
slightly there is a possibility that some of the intermediate steps
may have missing test updates that are only cleaned up later.
However, only the end of the series should be considered complete.
I've kept the patch as a series to permit easier regression hunting
should that prove necessary.

R.

Richard Earnshaw (29):
  [arm] Rip out DImode addition and subtraction splits.
  [arm] Perform early splitting of adddi3.
  [arm] Early split zero- and sign-extension
  [arm] Rewrite addsi3_carryin_shift_ in canonical form
  [arm] fix constraints on addsi3_carryin_alt2
  [arm] Early split subdi3
  [arm] Remove redundant DImode subtract patterns
  [arm] Introduce arm_carry_operation
  [arm] Correctly cost addition with a carry-in
  [arm] Correct cost calculations involving borrow for subtracts.
  [arm] Reduce cost of insns that are simple reg-reg moves.
  [arm] Implement negscc using SBC when appropriate.
  [arm] Add alternative canonicalizations for subtract-with-carry +
shift
  [arm] Early split simple DImode equality comparisons
  [arm] Improve handling of DImode comparisions against constants.
  [arm] early split most DImode comparison operations.
  [arm] Handle some constant comparisons using rsbs+rscs
  [arm] Cleanup dead code - old support for DImode comparisons
  [arm] Handle immediate values in uaddvsi4
  [arm] Early expansion of uaddvdi4.
  [arm] Improve code generation for addvsi4.
  [arm] Allow the summation result of signed add-with-overflow to be
discarded.
  [arm] Early split addvdi4
  [arm] Improve constant handling for usubvsi4.
  [arm] Early expansion of usubvdi4.
  [arm] Improve constant handling for subvsi4.
  [arm] Early expansion of subvdi4
  [arm] Improvements to negvsi4 and negvdi4.
  [arm] Fix testsuite nit when compiling for thumb2

 gcc/config/arm/arm-modes.def  |   19 +-
 gcc/config/arm/arm-protos.h   |1 +
 gcc/config/arm/arm.c  |  598 -
 gcc/config/arm/arm.md | 2020 ++---
 gcc/config/arm/iterators.md   |   15 +-
 gcc/config/arm/predicates.md  |   29 +-
 gcc/config/arm/thumb2.md  |8 +-
 .../gcc.dg/builtin-arith-overflow-3.c |   41 +
 gcc/testsuite/gcc.target/arm/negdi-3.c|4 +-
 9 files changed, 1757 insertions(+), 978 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/builtin-arith-overflow-3.c



Re: PR69455

2019-10-18 Thread Steve Kargl
On Fri, Oct 18, 2019 at 05:17:38PM +0100, Iain Sandoe wrote:
> 
> something like this, perhaps (I regret my Fortran skills are in the f77 era):
> 

If you know/knew F77 and have some working knowledge of C/C++ and
you want to see where modern Fortran sits, I recommend Modern Fortran
Explained iby Metcalf et al.  You can probably read it in a day.

For the record, here is the patch (see attached) committed to all
open branches.

With this commit, I'll be taking a long break from looking at
any gfortran bugs.

2019-10-18  Steven G. Kargl  

PR fortran/69455
* trans-decl.c (generate_local_decl): Avoid misconstructed
intrinsic modules in a BLOCK construct.

2019-10-18  Steven G. Kargl  

PR fortran/69455
* gfortran.dg/pr69455_1.f90: New test.
* gfortran.dg/pr69455_2.f90: Ditto.

-- 
Steve
25.16%
Index: gcc/fortran/trans-decl.c
===
--- gcc/fortran/trans-decl.c	(revision 277157)
+++ gcc/fortran/trans-decl.c	(working copy)
@@ -5962,7 +5962,14 @@ generate_local_decl (gfc_symbol * sym)
 
   if (sym->ns && sym->ns->construct_entities)
 	{
-	  if (sym->attr.referenced)
+	  /* Construction of the intrinsic modules within a BLOCK
+	 construct, where ONLY and RENAMED entities are included,
+	 seems to be bogus.  This is a workaround that can be removed
+	 if someone ever takes on the task to creating full-fledge
+	 modules.  See PR 69455.  */
+	  if (sym->attr.referenced
+	  && sym->from_intmod != INTMOD_ISO_C_BINDING
+	  && sym->from_intmod != INTMOD_ISO_FORTRAN_ENV)
 	gfc_get_symbol_decl (sym);
 	  sym->mark = 1;
 	}
Index: gcc/testsuite/gfortran.dg/pr69455_1.f90
===
--- gcc/testsuite/gfortran.dg/pr69455_1.f90	(nonexistent)
+++ gcc/testsuite/gfortran.dg/pr69455_1.f90	(working copy)
@@ -0,0 +1,14 @@
+! { dg-do run }
+program foo
+   block
+  use, intrinsic :: iso_c_binding, only: wp => c_float, ik => c_int
+  if (ik /= 4) stop 1
+  if (wp /= 4) stop 2
+   end block
+   block
+  use, intrinsic :: iso_c_binding, only: wp => c_double, ik => c_int64_t
+  if (ik /= 8) stop 3
+  if (wp /= 8) stop 4
+   end block
+end program foo
+
Index: gcc/testsuite/gfortran.dg/pr69455_2.f90
===
--- gcc/testsuite/gfortran.dg/pr69455_2.f90	(nonexistent)
+++ gcc/testsuite/gfortran.dg/pr69455_2.f90	(working copy)
@@ -0,0 +1,13 @@
+! { dg-do run }
+program foo
+   block
+  use, intrinsic :: ISO_FORTRAN_ENV, only: wp => REAL32, ik => INT32
+  if (ik /= 4) stop 1
+  if (wp /= 4) stop 2
+   end block
+   block
+  use, intrinsic :: ISO_FORTRAN_ENV, only: wp => REAL64, ik => INT64
+  if (ik /= 8) stop 3
+  if (wp /= 8) stop 4
+   end block
+end program foo


Re: [Patch][Demangler] Fix for complex values

2019-10-18 Thread Miguel Saldivar
The only reason  I wanted `float complex` was for interoperability
between the two other demanglers. Although the go demangler
does use `_Complex` and `_Imaginary`, so I guess it's sort of split.
But I agree, `_Complex` and `_Imaginary` is probably the
better option.

Thanks,
Miguel Saldivar

On Fri, Oct 18, 2019 at 9:04 AM Ian Lance Taylor  wrote:

> On Thu, Oct 17, 2019 at 10:20 PM Miguel Saldivar 
> wrote:
> >
> > This is a small fix for Bug 67299, where symbol: `Z1fCf` which would
> become
> > `f(float complex)` instead of `f(floatcomplex )`.
> > I thought this would be the preferred way of printing, because both
> > `llvm-cxxfilt` and `cpp_filt` both print the the mangled name in this
> > fashion.
>
> Thanks.  Personally I think it would be better to change the strings
> to " _Complex" and " _Imaginary".  I'm open to discussion on this.
>
> Ian
>
> > From 4ca98c0749bae1389594b31ee7f6ef575aafcd8f Mon Sep 17 00:00:00 2001
> > From: Miguel Saldivar 
> > Date: Thu, 17 Oct 2019 16:36:19 -0700
> > Subject: [PATCH][Demangler] Small fix for complex values
> >
> > gcc/libiberty/
> > * cp-demangle.c (d_print_mod): Add a space before printing `complex`
> > and `imaginary`, as opposed to after.
> >
> > gcc/libiberty/
> > * testsuite/demangle-expected: Adjust test.
> > ---
> >  libiberty/ChangeLog   | 5 +
> >  libiberty/cp-demangle.c   | 4 ++--
> >  libiberty/testsuite/demangle-expected | 2 +-
> >  3 files changed, 8 insertions(+), 3 deletions(-)
> >
> > diff --git a/libiberty/ChangeLog b/libiberty/ChangeLog
> > index 97d9767c2ea..62d5527b95b 100644
> > --- a/libiberty/ChangeLog
> > +++ b/libiberty/ChangeLog
> > @@ -1,3 +1,8 @@
> > +2019-10-17  Miguel Saldivar  
> > + * cp-demangle.c (d_print_mod): Add a space before printing `complex`
> > + and `imaginary`, as opposed to after.
> > + * testsuite/demangle-expected: Adjust test.
> > +
> >  2019-10-03  Eduard-Mihai Burtescu  
> >
> >   * rust-demangle.c (looks_like_rust): Remove.
> > diff --git a/libiberty/cp-demangle.c b/libiberty/cp-demangle.c
> > index aa78c86dd44..bd4dfb785a9 100644
> > --- a/libiberty/cp-demangle.c
> > +++ b/libiberty/cp-demangle.c
> > @@ -5977,10 +5977,10 @@ d_print_mod (struct d_print_info *dpi, int
> options,
> >d_append_string (dpi, "&&");
> >return;
> >  case DEMANGLE_COMPONENT_COMPLEX:
> > -  d_append_string (dpi, "complex ");
> > +  d_append_string (dpi, " complex");
> >return;
> >  case DEMANGLE_COMPONENT_IMAGINARY:
> > -  d_append_string (dpi, "imaginary ");
> > +  d_append_string (dpi, " imaginary");
> >return;
> >  case DEMANGLE_COMPONENT_PTRMEM_TYPE:
> >if (d_last_char (dpi) != '(')
> > diff --git a/libiberty/testsuite/demangle-expected
> > b/libiberty/testsuite/demangle-expected
> > index f21ed00e559..43f003655b2 100644
> > --- a/libiberty/testsuite/demangle-expected
> > +++ b/libiberty/testsuite/demangle-expected
> > @@ -1278,7 +1278,7 @@ int& int_if_addable(A > ((*((Y*)(0)))+(*((Y*)(0>*)
> >  #
> >  --format=gnu-v3
> >  _Z3bazIiEvP1AIXszcl3foocvT__ELCf_
> > -void baz(A )_))>*)
> > +void baz(A complex)_))>*)
> >  #
> >  --format=gnu-v3
> >  _Z3fooI1FEN1XIXszdtcl1PclcvT__EEE5arrayEE4TypeEv
> > --
> > 2.23.0
>


Re: [Patch, fortran] PR fortran/92142 - CFI_setpointer corrupts descriptor

2019-10-18 Thread Paul Richard Thomas
I will deal with this and various other issues associated with
ISO_Fortran_binding tomorrow.

Thanks for your help

Paul

On Thu, 17 Oct 2019 at 18:30, Tobias Burnus  wrote:
>
> Hi,
>
> +  fprintf (stderr, "CFI_setpointer: Result is NULL.\n");
> …
> > + return CFI_INVALID_DESCRIPTOR;
> > +! { dg-do run }
> > +! { dg-additional-options "-fbounds-check" }
> > +! { dg-additional-sources ISO_Fortran_binding_15.c }
>
>
> If you generate to stdout/stderr like in this case, I think it makes
> sense to also check for this output using "{dg-output …}".
>
> Otherwise, it looks okay at a glance – but I defer the proper review to
> either someone else or to later.
>
> Another question would be: Is it always guaranteed that
> result->attribute  is set? I am asking because it resembles to the
> untrained eye the code at https://gcc.gnu.org/bugzilla/show_bug.cgi?id=92027
>
> And there, the result attribute is unset – that might be a bug in the C
> code of the test itself – or in libgomp. But it doesn't harm to quickly
> think about whether that can be an issue here as well or not.
>
> Cheers,
>
> Tobias
>


-- 
"If you can't explain it simply, you don't understand it well enough"
- Albert Einstein


Re: Implement ggc_trim

2019-10-18 Thread Jakub Jelinek
On Fri, Oct 11, 2019 at 09:03:53AM +0200, Jan Hubicka wrote:
> Bootstrapped/regtested x86_64-linux, OK?
> 
>   * ggc-page.c (release_pages): Output statistics when !quiet_flag.
>   (ggc_collect): Dump later to not interfere with release_page dump.
>   (ggc_trim): New function.
>   * ggc-none.c (ggc_trim): New.
> 
>   * lto.c (lto_wpa_write_files): Call ggc_trim.

> @@ -1152,10 +1156,20 @@ release_pages (void)
>   *gp = g->next;
>   G.bytes_mapped -= g->alloc_size;
>   free (g->allocation);
> + n1 += g->alloc_size;
>}
>  else
>gp = >next;
>  #endif

This broke !defined(USING_MMAP) support, the second g->alloc_size read
is after the memory containing *g is freed.

Fixed thusly, tested with #undef USING_MMAP in the file (without the patch
self-test ICEs, with it succeeds), committed to trunk as obvious.

2019-10-18  Jakub Jelinek  

PR middle-end/92153
* ggc-page.c (release_pages): Read g->alloc_size before free rather
than after it.

--- gcc/ggc-page.c.jj   2019-10-11 14:10:44.987386981 +0200
+++ gcc/ggc-page.c  2019-10-18 19:13:59.458085610 +0200
@@ -1155,8 +1155,8 @@ release_pages (void)
   {
*gp = g->next;
G.bytes_mapped -= g->alloc_size;
-   free (g->allocation);
n1 += g->alloc_size;
+   free (g->allocation);
   }
 else
   gp = >next;


Jakub


Re: [PATCH] OpenACC 2.6 manual deep copy support (attach/detach)

2019-10-18 Thread Thomas Schwinge
Hi!

While reviewing
<20191003163505.49997-2-julian@codesourcery.com">http://mid.mail-archive.com/20191003163505.49997-2-julian@codesourcery.com>
"OpenACC reference count overhaul", I've just now stumbled over one thing
that originally was designed here:

On 2018-12-10T19:41:37+, Julian Brown  wrote:
> On Fri, 7 Dec 2018 14:50:19 +0100
> Jakub Jelinek  wrote:
>
>> On Fri, Nov 30, 2018 at 03:41:09AM -0800, Julian Brown wrote:
>> > @@ -918,8 +920,13 @@ struct splay_tree_key_s {
>> >uintptr_t tgt_offset;
>> >/* Reference count.  */
>> >uintptr_t refcount;
>> > -  /* Dynamic reference count.  */
>> > -  uintptr_t dynamic_refcount;
>> > +  /* Reference counts beyond those that represent genuine references in 
>> > the
>> > + linked splay tree key/target memory structures, e.g. for multiple 
>> > OpenACC
>> > + "present increment" operations (via "acc enter data") refering to 
>> > the same
>> > + host-memory block.  */
>> > +  uintptr_t virtual_refcount;
>> > +  /* For a block with attached pointers, the attachment counters for 
>> > each.  */
>> > +  unsigned short *attach_count;
>> >/* Pointer to the original mapping of "omp declare target link" object. 
>> >  */
>> >splay_tree_key link_key;
>> >  };  
>> 
>> This is something I'm worried about a lot, the nodes keep growing way
>> too much.

Is that just a would-be-nice-to-avoid, or is it an actual problem?

If the latter, can we maybe move some data into on-the-side data
structures, say an associative array keyed by [something suitable]?  I
would assume that compared to actual host to/from device data movement
(or even lookup etc.), lookup of values from such an associative array
should be relatively cheap?

I'm bringing this up, because:

>> Is there a way to reuse some other field if it is of
>> certain kind?
>
> How about this -- it seems that the link_key is only used for OpenMP,

So, is that actually correct?  Per my understanding, for the OpenACC
'link' clause we uses 'GOMP_MAP_LINK', which sets "omp declare target
link", and thus:

> and the attach count is only needed for OpenACC. So the obvious thing
> to do is probably to put those two together into a tagged union. The
> question is where to put the tag?
>
> Options are, I guess:
>
> 1. The high or low bits of the address.  Potentially non-portable, ugly.
>
> 2. Or, the virtual refcount is also only needed for OpenACC, so we can
>reserve a magic value for that field to act as a tag.
>
> I've tried implementing the latter in the attached patch, and it seems
> to work OK.

... this is not actually feasible?

It's certainly possible that we're totally lacking sufficient testsuite
coverage, and that there are issues in the 'link' implementation
( "libgomp.c/target-link-1.c fails for
nvptx: #pragma omp target link not implemented" comes to mind
immediatelly, and certainly for OpenACC I used to be aware of additional
issues; I think I intended to use that mechanism for Fortran
'allocatable' with OpenACC 'declare'), but the libgomp handling to me
seems reasonable upon quick review -- just that we need to keep it alive
for OpenACC, too, unless I'm confused?


Simplifying the libgomp code to avoid the 'VREFCOUNT_LINK_KEY' toggle
flag, and not putting 'link_key' into an union together with
'attach_count', that should -- I hope -- resolve/obsolete some of the
questions raised in my late-night pre-Christmas 2018 review,
, where I'm now
not sure yet whether all my questions have been addressed (or disputed,
but I didn't hear anything) in the recent -- split-out, thanks! --
version of this patch,
<20191003163505.49997-2-julian@codesourcery.com">http://mid.mail-archive.com/20191003163505.49997-2-julian@codesourcery.com>
"OpenACC reference count overhaul".


Grüße
 Thomas


signature.asc
Description: PGP signature


Re: [Patch][Demangler] Fix for complex values

2019-10-18 Thread Ian Lance Taylor via gcc-patches
On Thu, Oct 17, 2019 at 10:20 PM Miguel Saldivar  wrote:
>
> This is a small fix for Bug 67299, where symbol: `Z1fCf` which would become
> `f(float complex)` instead of `f(floatcomplex )`.
> I thought this would be the preferred way of printing, because both
> `llvm-cxxfilt` and `cpp_filt` both print the the mangled name in this
> fashion.

Thanks.  Personally I think it would be better to change the strings
to " _Complex" and " _Imaginary".  I'm open to discussion on this.

Ian

> From 4ca98c0749bae1389594b31ee7f6ef575aafcd8f Mon Sep 17 00:00:00 2001
> From: Miguel Saldivar 
> Date: Thu, 17 Oct 2019 16:36:19 -0700
> Subject: [PATCH][Demangler] Small fix for complex values
>
> gcc/libiberty/
> * cp-demangle.c (d_print_mod): Add a space before printing `complex`
> and `imaginary`, as opposed to after.
>
> gcc/libiberty/
> * testsuite/demangle-expected: Adjust test.
> ---
>  libiberty/ChangeLog   | 5 +
>  libiberty/cp-demangle.c   | 4 ++--
>  libiberty/testsuite/demangle-expected | 2 +-
>  3 files changed, 8 insertions(+), 3 deletions(-)
>
> diff --git a/libiberty/ChangeLog b/libiberty/ChangeLog
> index 97d9767c2ea..62d5527b95b 100644
> --- a/libiberty/ChangeLog
> +++ b/libiberty/ChangeLog
> @@ -1,3 +1,8 @@
> +2019-10-17  Miguel Saldivar  
> + * cp-demangle.c (d_print_mod): Add a space before printing `complex`
> + and `imaginary`, as opposed to after.
> + * testsuite/demangle-expected: Adjust test.
> +
>  2019-10-03  Eduard-Mihai Burtescu  
>
>   * rust-demangle.c (looks_like_rust): Remove.
> diff --git a/libiberty/cp-demangle.c b/libiberty/cp-demangle.c
> index aa78c86dd44..bd4dfb785a9 100644
> --- a/libiberty/cp-demangle.c
> +++ b/libiberty/cp-demangle.c
> @@ -5977,10 +5977,10 @@ d_print_mod (struct d_print_info *dpi, int options,
>d_append_string (dpi, "&&");
>return;
>  case DEMANGLE_COMPONENT_COMPLEX:
> -  d_append_string (dpi, "complex ");
> +  d_append_string (dpi, " complex");
>return;
>  case DEMANGLE_COMPONENT_IMAGINARY:
> -  d_append_string (dpi, "imaginary ");
> +  d_append_string (dpi, " imaginary");
>return;
>  case DEMANGLE_COMPONENT_PTRMEM_TYPE:
>if (d_last_char (dpi) != '(')
> diff --git a/libiberty/testsuite/demangle-expected
> b/libiberty/testsuite/demangle-expected
> index f21ed00e559..43f003655b2 100644
> --- a/libiberty/testsuite/demangle-expected
> +++ b/libiberty/testsuite/demangle-expected
> @@ -1278,7 +1278,7 @@ int& int_if_addable(A ((*((Y*)(0)))+(*((Y*)(0>*)
>  #
>  --format=gnu-v3
>  _Z3bazIiEvP1AIXszcl3foocvT__ELCf_
> -void baz(A*)
> +void baz(A*)
>  #
>  --format=gnu-v3
>  _Z3fooI1FEN1XIXszdtcl1PclcvT__EEE5arrayEE4TypeEv
> --
> 2.23.0


Re: Type representation in CTF and DWARF

2019-10-18 Thread Nick Alcock
On 18 Oct 2019, Pedro Alves stated:

> On 10/18/19 2:21 PM, Richard Biener wrote:
>
 In most cases local types etc are a fairly small contributor to the
 total volume -- but macros can contribute a lot in some codebases.
>>> (The
 Linux kernel's READ_ONCE macro is one I've personally been bitten by
>>> in
 the past, with a new local struct in every use. GCC doesn't
>>> deduplicate
 any of those so the resulting bloat from tens of thousands of
>>> instances
 of this identical structure is quite incredible...)

>>>
>>> Sounds like something that would be beneficial to do with DWARF too.
>> 
>> Otoh those are distinct types according to the C standard and since dwarf is 
>> a source level representation we should preserve this (source locations also 
>> differ). 
>
> Right.  Maybe some partial deduplication would be possible, preserving
> type distinction.  But since CTF doesn't include these, this is moot
> for now.

Yeah, the libctf API and existing CTF users only care if they're
assignment-compatible, which they are. We could preserve more
type-identity information if there was a need to do so, but none has yet
emerged.

-- 
NULL && (void)


[C++ Patch] Improve cp_parser_class_head error recovery

2019-10-18 Thread Paolo Carlini

Hi,

a few days ago I noticed that for, say, g++.dg/parse/qualified2.C we 
were issuing two additional misleading errors after the first one, 
mentioning in particular a certain "unnamed class" (I'm reproducing only 
the error messages proper):


namespace Glib {
  template  class Value {};
  template <> class Glib::Value {}; // { dg-error "" }
}

qualified2.C:3:29: error: extra qualification not allowed [\-fpermissive\]
qualified2.C:3:46: error: explicit specialization of non-template 
‘Glib::’
qualified2.C:3:47: error: abstract declarator ‘Glib::’ 
used as declaration


Let's see if I can explain clearly enough what I think it's going on.

In cp_parser_class_head, upon the permerror about the extra 
qualification, we try to do error recovery, which is particularly 
tricky, because we are dealing with a permerror thus we have to make 
sure that in case of -fpermissive everything remains internally 
consistent anyway. In this context, clearing 'nested_name_specifier' and 
'num_templates' doesn't seem a good idea because it does *not* give us 
an internal state similar to the one normally obtained when the nested 
name specifier is not there, the reason being that, earlier in the 
function, when a nested name specifier really isn't there we try 
cp_parser_template_id or in case cp_parser_identifier, which set the 
locale 'id' and possibly 'template_id' and 'num_templates', whereas 
during error recovery we remain so to speak completely empty handed. 
Thus, what about not clearing anything? That seems to work at least for 
the two testcases below and doesn't cause regressions.


Thanks, Paolo.

/



/cp
2019-10-18  Paolo Carlini  

* parser.c (cp_parser_class_head): Improve error recovery upon
extra qualification error.

/testsuite
2019-10-18  Paolo Carlini  

* g++.dg/parse/qualified2.C: Tighten dg-error directive.
* g++.old-deja/g++.other/decl5.C: Don't expect redundant error.
Index: cp/parser.c
===
--- cp/parser.c (revision 277149)
+++ cp/parser.c (working copy)
@@ -24178,12 +24178,8 @@ cp_parser_class_head (cp_parser* parser,
 ... [or] the definition or explicit instantiation of a
 class member of a namespace outside of its namespace.  */
   if (scope == nested_name_specifier)
-   {
- permerror (nested_name_specifier_token_start->location,
-"extra qualification not allowed");
- nested_name_specifier = NULL_TREE;
- num_templates = 0;
-   }
+   permerror (nested_name_specifier_token_start->location,
+  "extra qualification not allowed");
 }
   /* An explicit-specialization must be preceded by "template <>".  If
  it is not, try to recover gracefully.  */
Index: testsuite/g++.dg/parse/qualified2.C
===
--- testsuite/g++.dg/parse/qualified2.C (revision 277144)
+++ testsuite/g++.dg/parse/qualified2.C (working copy)
@@ -1,4 +1,4 @@
 namespace Glib {
   template  class Value {};
-  template <> class Glib::Value {}; // { dg-error "" }
+  template <> class Glib::Value {}; // { dg-error "29:extra 
qualification" }
 }
Index: testsuite/g++.old-deja/g++.other/decl5.C
===
--- testsuite/g++.old-deja/g++.other/decl5.C(revision 277144)
+++ testsuite/g++.old-deja/g++.other/decl5.C(working copy)
@@ -12,7 +12,6 @@ struct A {
   int A::m;   // { dg-error "extra qualification" } 
   struct e;
   struct A::e {int i;}; // { dg-error "extra qualification" "qual" } 
-  // { dg-error "anonymous struct" "anon" { target *-*-* } .-1 }
   struct A::expand {  // { dg-error "qualified name" } 
   int m;
   };


[patch,testsuite] More fixes for small targets.

2019-10-18 Thread Georg-Johann Lay

Here is some more cases fixed for small targets for noise reduction.

Ok to apply?

Johann

gcc/testsuite/
Fix some fallout for small targets.

PR testsuite/52641
* gcc.dg/torture/pr86034.c: Use 32-bit base type for a bitfield of
width > 16 bits.
* gcc.dg/torture/pr90972.c [avr]: Add option "-w".
* gcc.dg/torture/pr87693.c: Same.
* gcc.dg/torture/pr91178.c: Add dg-require-effective-target size32plus.
* gcc.dg/torture/pr91178-2.c: Same.
* gcc.dg/torture/20181024-1.c
* gcc.dg/torture/pr86554-1.c: Use 32-bit integers.
* gcc.dg/tree-ssa/pr91091-1.c: Same.
Index: gcc.dg/torture/20181024-1.c
===
--- gcc.dg/torture/20181024-1.c	(revision 277097)
+++ gcc.dg/torture/20181024-1.c	(working copy)
@@ -1,4 +1,5 @@
 /* { dg-do compile } */
+/* { dg-require-effective-target size32plus } */
 /* { dg-additional-options "-march=core-avx2" { target { x86_64-*-* i?86-*-* } } } */
 
 typedef enum {
Index: gcc.dg/torture/pr86034.c
===
--- gcc.dg/torture/pr86034.c	(revision 277097)
+++ gcc.dg/torture/pr86034.c	(working copy)
@@ -6,7 +6,7 @@
 struct A
 {
   int b;
-  int c:24;
+  __INT32_TYPE__ c:24;
   int d:10;
   int e;
 } f;
Index: gcc.dg/torture/pr86554-1.c
===
--- gcc.dg/torture/pr86554-1.c	(revision 277097)
+++ gcc.dg/torture/pr86554-1.c	(working copy)
@@ -2,25 +2,25 @@
 
 struct foo
 {
-  unsigned x;
+  __UINT32_TYPE__ x;
 };
 typedef struct foo foo;
 
-static inline int zot(foo *f)
+static inline __INT32_TYPE__ zot(foo *f)
 {
   int ret;
 
   if (f->x > 0x7FFF)
-ret = (int)(f->x - 0x7FFF);
+ret = (__INT32_TYPE__)(f->x - 0x7FFF);
   else
-ret = (int)f->x - 0x7FFF;
+ret = (__INT32_TYPE__)f->x - 0x7FFF;
   return ret;
 }
 
 void __attribute__((noinline,noclone)) bar(foo *f)
 {
-  int ret = zot(f);
-  volatile int x = ret;
+  __INT32_TYPE__ ret = zot(f);
+  volatile __INT32_TYPE__ x = ret;
   if (ret < 1)
 __builtin_abort ();
 }
Index: gcc.dg/torture/pr87693.c
===
--- gcc.dg/torture/pr87693.c	(revision 277097)
+++ gcc.dg/torture/pr87693.c	(working copy)
@@ -1,6 +1,7 @@
 /* { dg-do compile } */
 /* { dg-require-effective-target indirect_jumps } */
-
+/* { dg-additional-options "-w" { target avr-*-* } } */
+   
 void f (void);
 void g (void);
 void h (int a)
Index: gcc.dg/torture/pr90972.c
===
--- gcc.dg/torture/pr90972.c	(revision 277097)
+++ gcc.dg/torture/pr90972.c	(working copy)
@@ -1,5 +1,6 @@
 /* { dg-do compile } */
 /* { dg-additional-options "-mcpu=power8" { target powerpc*-*-* } } */
+/* { dg-additional-options "-w" { target avr-*-* } } */
 
 long f;
 void a();
Index: gcc.dg/torture/pr91178-2.c
===
--- gcc.dg/torture/pr91178-2.c	(revision 277097)
+++ gcc.dg/torture/pr91178-2.c	(working copy)
@@ -1,4 +1,5 @@
 /* { dg-do compile } */
+/* { dg-require-effective-target size32plus } */
 
 int a[100][70304];
 int b[100];
Index: gcc.dg/torture/pr91178.c
===
--- gcc.dg/torture/pr91178.c	(revision 277097)
+++ gcc.dg/torture/pr91178.c	(working copy)
@@ -1,4 +1,5 @@
 /* { dg-do compile } */
+/* { dg-require-effective-target size32plus } */
 
 int a;
 extern int f[10][91125];
Index: gcc.dg/tree-ssa/pr91091-1.c
===
--- gcc.dg/tree-ssa/pr91091-1.c	(revision 277097)
+++ gcc.dg/tree-ssa/pr91091-1.c	(working copy)
@@ -1,8 +1,8 @@
 /* { dg-do run } */
 /* { dg-options "-O3 -fno-strict-aliasing" } */
 
-struct s { int x; } __attribute__((packed));
-struct t { int x; };
+struct s { __INT32_TYPE__ x; } __attribute__((packed));
+struct t { __INT32_TYPE__ x; };
 
 void __attribute__((noinline,noipa))
 swap(struct s* p, struct t* q)


[PATCH] Use narrow mode of constant when expanding widening multiplication

2019-10-18 Thread Jozef Lawrynowicz
I experienced the following ICE when working on a downstream patch for msp430:

void
foo (unsigned int r, unsigned int y)
{
  __builtin_umul_overflow ((unsigned int) (-1), y, );
}

> msp430-elf-gcc -S tester.c -O0

tester.c: In function 'foo':
tester.c:4:1: error: unrecognizable insn:
4 | }
  | ^
(insn 16 15 17 2 (set (reg:HI 32)
(const_int 65535 [0x])) "tester.c":3:3 -1
 (nil))
during RTL pass: vregs
dump file: tester.c.234r.vregs
tester.c:4:1: internal compiler error: in extract_insn, at recog.c:2311

Following discussions on ml/gcc
(https://gcc.gnu.org/ml/gcc/2019-10/msg00083.html), I narrowed this down to a
call to expand_mult_highpart_adjust in expand_expr_real_2.

If one of the operands is a constant, its mode had been converted to the wide
mode of our multiplication to generate some RTL, but not converted back to the
narrow mode before expanding what will be the high part of the result of the
multiplication.

If we look at the other two uses of expand_mult_highpart_adjust in the sources,
(both in expmed.c (expmed_mult_highpart_optab)) we can see that the narrow
version of the constant is always used:
  if (tem)
/* We used the wrong signedness.  Adjust the result.  */
return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
tem, unsignedp);

So the attached patch updates the use in expand_expr_real_2 to also use the
narrow version of the constant operand.
This fixes the aforementioned ICE.

Successfully bootstrapped and regtested for x86_64-pc-linux-gnu.
Successfully regtested for msp430-elf.

Ok for trunk?
>From b430cddbd257353f162fe3968a447b63cbcaa964 Mon Sep 17 00:00:00 2001
From: Jozef Lawrynowicz 
Date: Thu, 17 Oct 2019 18:22:01 +0100
Subject: [PATCH] Use narrow mode of constant when expanding widening
 multiplication

gcc/ChangeLog:

2019-10-18  Jozef Lawrynowicz  

	* expr.c (expand_expr_real_2): Use op1 in its original narrow mode when
	calling expand_mult_highpart_adjust. 

---
 gcc/expr.c | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/gcc/expr.c b/gcc/expr.c
index b54bf1d3dc5..0a571d3f7e3 100644
--- a/gcc/expr.c
+++ b/gcc/expr.c
@@ -8947,9 +8947,12 @@ expand_expr_real_2 (sepops ops, rtx target, machine_mode tmode,
 		  != CODE_FOR_nothing
 		  && innermode == word_mode)
 		{
-		  rtx htem, hipart;
+		  rtx htem, hipart, narrow_op1;
 		  op0 = expand_normal (treeop0);
 		  op1 = expand_normal (treeop1);
+		  /* Save op1 in the narrower mode WORD_MODE for when we expand
+		 the high part.  */
+		  narrow_op1 = op1;
 		  /* op0 and op1 might be constants, despite the above
 		 != INTEGER_CST check.  Handle it.  */
 		  if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
@@ -8961,7 +8964,7 @@ expand_expr_real_2 (sepops ops, rtx target, machine_mode tmode,
    unsignedp, OPTAB_LIB_WIDEN);
 		  hipart = gen_highpart (word_mode, temp);
 		  htem = expand_mult_highpart_adjust (word_mode, hipart,
-		  op0, op1, hipart,
+		  op0, narrow_op1, hipart,
 		  zextend_p);
 		  if (htem != hipart)
 		emit_move_insn (hipart, htem);
-- 
2.17.1



Re: Type representation in CTF and DWARF

2019-10-18 Thread Pedro Alves
On 10/18/19 2:21 PM, Richard Biener wrote:

>>> In most cases local types etc are a fairly small contributor to the
>>> total volume -- but macros can contribute a lot in some codebases.
>> (The
>>> Linux kernel's READ_ONCE macro is one I've personally been bitten by
>> in
>>> the past, with a new local struct in every use. GCC doesn't
>> deduplicate
>>> any of those so the resulting bloat from tens of thousands of
>> instances
>>> of this identical structure is quite incredible...)
>>>
>>
>> Sounds like something that would be beneficial to do with DWARF too.
> 
> Otoh those are distinct types according to the C standard and since dwarf is 
> a source level representation we should preserve this (source locations also 
> differ). 

Right.  Maybe some partial deduplication would be possible, preserving
type distinction.  But since CTF doesn't include these, this is moot
for now.

Thanks,
Pedro Alves


[match.pd] Mid-end fix for r277110

2019-10-18 Thread Yuliang Wang
Hi,

SVE2 vectorization for BSL and NBSL fails when the element type is unsigned 
8/16-bit.

The operands are being converted implicitly to corresponding signed types, 
which the mid-end fold pattern does not take into account; this patch augments 
the pattern with type conversion checks in order to rectify the above problem.

#define TYPE uint{8,16}_t

void
foo (TYPE *a, TYPE *b, TYPE *c, TYPE *d, int n)
{
  for (int i = 0; i < n; i++)
a[i] = OP (b[i], c[i], d[i]);
}

BSL:

 // #define OP(x,y,z) (((x) & (z)) | ((y) & ~(z)))

  beforeand z1.d, z2.d, z1.d
bic z0.d, z0.d, z2.d
orr z0.d, z0.d, z1.d
  ...
  after bsl z0.d, z0.d, z1.d, z2.d

NBSL:

  // #define OP(x,y,z) ~(((x) & (z)) | ((y) & ~(z)))

  beforeand z1.d, z2.d, z1.d
bic z0.d, z0.d, z2.d
orr z0.d, z0.d, z1.d
not z0.{b,h}, p1/m, z0.{b,h}
  ...
  after nbslz0.d, z0.d, z1.d, z2.d

The GIMPLE output for BSL shows where conversions could be inserted:

_1 = b[i];
_2 = d[i];
_3 = _1 & _2;
_4 = (signed short) _3;
_5 = c[i];
_6 = (signed short) _5;
_7 = d[i];
_8 = (signed short) _7;
_9 = ~_8;
_10 = _6 & _9;
_11 = _4 | _10;
_12 = (short unsigned int) _11;
a[i] = _12;

In contrast, for 32/64-bit types (regardless of signedness):

_1 = b[i];
_2 = d[i];
_3 = _1 & _2;
_4 = c[i];
_5 = d[i];
_6 = ~_5;
_7 = _4 & _6;
_8 = _3 | _7;
_9 = ~_8;
a[i] = _9;

Built and tested on aarch64-none-elf.

Regards,
Yuliang Wang


gcc/ChangeLog:

2019-10-17  Yuliang Wang  

* match.pd (/* (x & ~m) | (y & m) -> ... */): Modified fold pattern.
* genmatch.c (convert3): New convert operation to support the above.

gcc/testsuite/ChangeLog:

2019-10-17  Yuliang Wang  

* gcc.target/aarch64/sve2/bitsel_1.c: Add testing for unsigned types.
* gcc.target/aarch64/sve2/bitsel_2.c: As above.
* gcc.target/aarch64/sve2/bitsel_3.c: As above.
* gcc.target/aarch64/sve2/bitsel_4.c: As above.
* gcc.target/aarch64/sve2/eor3_1.c: As above.


diff --git a/gcc/genmatch.c b/gcc/genmatch.c
index 
7db1f135840e09e794e2921859fa8e9b7fa8..ce87ae33e0b3c06f4d1fde8d8e74bf2210ee7a5a
 100644
--- a/gcc/genmatch.c
+++ b/gcc/genmatch.c
@@ -227,6 +227,7 @@ enum tree_code {
 CONVERT0,
 CONVERT1,
 CONVERT2,
+CONVERT3,
 VIEW_CONVERT0,
 VIEW_CONVERT1,
 VIEW_CONVERT2,
@@ -1176,6 +1177,7 @@ lower_opt_convert (operand *o)
 = { CONVERT0, CONVERT_EXPR,
CONVERT1, CONVERT_EXPR,
CONVERT2, CONVERT_EXPR,
+   CONVERT3, CONVERT_EXPR,
VIEW_CONVERT0, VIEW_CONVERT_EXPR,
VIEW_CONVERT1, VIEW_CONVERT_EXPR,
VIEW_CONVERT2, VIEW_CONVERT_EXPR };
@@ -4145,8 +4147,8 @@ parser::record_operlist (location_t loc, user_id *p)
 }
 }
 
-/* Parse the operator ID, special-casing convert?, convert1? and
-   convert2?  */
+/* Parse the operator ID, special-casing convert?, convert1?, convert2? and
+   convert3?  */
 
 id_base *
 parser::parse_operation ()
@@ -4167,6 +4169,8 @@ parser::parse_operation ()
;
   else if (strcmp (id, "convert2") == 0)
;
+  else if (strcmp (id, "convert3") == 0)
+   ;
   else if (strcmp (id, "view_convert") == 0)
id = "view_convert0";
   else if (strcmp (id, "view_convert1") == 0)
@@ -4183,6 +4187,7 @@ parser::parse_operation ()
 }
   else if (strcmp (id, "convert1") == 0
   || strcmp (id, "convert2") == 0
+  || strcmp (id, "convert3") == 0
   || strcmp (id, "view_convert1") == 0
   || strcmp (id, "view_convert2") == 0)
 fatal_at (id_tok, "expected '?' after conditional operator");
@@ -4723,9 +4728,9 @@ parser::parse_for (location_t)
  id_base *idb = get_operator (oper, true);
  if (idb == NULL)
fatal_at (token, "no such operator '%s'", oper);
- if (*idb == CONVERT0 || *idb == CONVERT1 || *idb == CONVERT2
- || *idb == VIEW_CONVERT0 || *idb == VIEW_CONVERT1
- || *idb == VIEW_CONVERT2)
+ if (*idb == CONVERT0 || *idb == VIEW_CONVERT0
+ || *idb == CONVERT1 || *idb == CONVERT2|| *idb == CONVERT3
+ || *idb == VIEW_CONVERT1 || *idb == VIEW_CONVERT2)
fatal_at (token, "conditional operators cannot be used inside for");
 
  if (arity == -1)
@@ -5136,6 +5141,7 @@ main (int argc, char **argv)
 add_operator (CONVERT0, "convert0", "tcc_unary", 1);
 add_operator (CONVERT1, "convert1", "tcc_unary", 1);
 add_operator (CONVERT2, "convert2", "tcc_unary", 1);
+add_operator (CONVERT3, "convert3", "tcc_unary", 1);
 add_operator (VIEW_CONVERT0, "view_convert0", "tcc_unary", 1);
 add_operator (VIEW_CONVERT1, "view_convert1", "tcc_unary", 1);
 add_operator (VIEW_CONVERT2, "view_convert2", "tcc_unary", 1);
diff --git a/gcc/match.pd b/gcc/match.pd
index 
e3ac06c8ef5b893bd344734095b11047a43f98b8..0aa065c2941dd79477434fd3b6691c9a9b68d20c
 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -1461,8 +1461,13 @@ 

[PATCH, OpenACC] Fortran deviceptr

2019-10-18 Thread Chung-Lin Tang

Hi Thomas,
this is the updated Fortran deviceptr patche, originated from Cesar, and one of
the tests was from James Norris:
https://gcc.gnu.org/ml/gcc-patches/2018-05/msg00286.html
https://gcc.gnu.org/ml/gcc-patches/2018-08/msg00532.html

There were a few style cleanups, but the goal of modification is the same:
to use only one clause to represent Fortran deviceptr, and to preserve it
during gimplification.

Because of this modification, and as we discussed earlier, the 
handle_ftn_pointers()
code in libgomp/oacc-parallel.c appeared to be no longer needed.
I have remove them in this patch, and tested libgomp without regressions.

Also, I've added a new libgomp.oacc-fortran/deviceptr-2.f90 testcase that
actually copies out and verifies the deviceptr computation.

Is this okay for trunk now?

Thanks,
Chung-Lin

2019-10-18  Cesar Philippidis  
Chung-Lin Tang  

gcc/fortran/
* trans-openmp.c (gfc_omp_finish_clause): Don't create pointer data
mappings for deviceptr clauses.
(gfc_trans_omp_clauses): Likewise.

gcc/
* gimplify.c (enum gimplify_omp_var_data): Add GOVD_DEVICETPR.
(omp_notice_variable): Add GOVD_DEVICEPTR attribute when appropriate.
(gimplify_scan_omp_clauses): Likewise.
(gimplify_adjust_omp_clauses_1): Set GOMP_MAP_FORCE_DEVICEPTR for
implicit deviceptr mappings.
gcc/testsuite/
* c-c++-common/goacc/deviceptr-4.c: Update expected data mapping.

2019-10-18  Chung-Lin Tang  
James Norris  

libgomp/
* oacc-parallel.c (handle_ftn_pointers): Delete function.
(GOACC_parallel_keyed): Remove call to handle_ftn_pointers.
* testsuite/libgomp.oacc-fortran/deviceptr-1.f90: New test.
* testsuite/libgomp.oacc-fortran/deviceptr-2.f90: New test.
Index: gcc/fortran/trans-openmp.c
===
--- gcc/fortran/trans-openmp.c  (revision 277155)
+++ gcc/fortran/trans-openmp.c  (working copy)
@@ -1099,7 +1099,8 @@ gfc_omp_clause_dtor (tree clause, tree decl)
 void
 gfc_omp_finish_clause (tree c, gimple_seq *pre_p)
 {
-  if (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_MAP)
+  if (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_MAP
+  || OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_FORCE_DEVICEPTR)
 return;
 
   tree decl = OMP_CLAUSE_DECL (c);
@@ -2173,6 +2174,12 @@ gfc_trans_omp_clauses (stmtblock_t *block, gfc_omp
  if (n->expr == NULL || n->expr->ref->u.ar.type == AR_FULL)
{
  if (POINTER_TYPE_P (TREE_TYPE (decl))
+ && n->u.map_op == OMP_MAP_FORCE_DEVICEPTR)
+   {
+ OMP_CLAUSE_DECL (node) = decl;
+ goto finalize_map_clause;
+   }
+ else if (POINTER_TYPE_P (TREE_TYPE (decl))
  && (gfc_omp_privatize_by_reference (decl)
  || GFC_DECL_GET_SCALAR_POINTER (decl)
  || GFC_DECL_GET_SCALAR_ALLOCATABLE (decl)
@@ -2346,6 +2353,7 @@ gfc_trans_omp_clauses (stmtblock_t *block, gfc_omp
  OMP_CLAUSE_SIZE (node3)
= fold_build2 (MINUS_EXPR, sizetype, ptr, ptr2);
}
+   finalize_map_clause:
  switch (n->u.map_op)
{
case OMP_MAP_ALLOC:
Index: gcc/gimplify.c
===
--- gcc/gimplify.c  (revision 277155)
+++ gcc/gimplify.c  (working copy)
@@ -123,6 +123,9 @@ enum gimplify_omp_var_data
   /* Flag for GOVD_REDUCTION: inscan seen in {in,ex}clusive clause.  */
   GOVD_REDUCTION_INSCAN = 0x200,
 
+  /* Flag for OpenACC deviceptrs.  */
+  GOVD_DEVICEPTR = 0x400,
+
   GOVD_DATA_SHARE_CLASS = (GOVD_SHARED | GOVD_PRIVATE | GOVD_FIRSTPRIVATE
   | GOVD_LASTPRIVATE | GOVD_REDUCTION | GOVD_LINEAR
   | GOVD_LOCAL)
@@ -7426,6 +7429,7 @@ omp_notice_variable (struct gimplify_omp_ctx *ctx,
error ("variable %qE declared in enclosing "
   "% region", DECL_NAME (decl));
  nflags |= GOVD_MAP;
+ nflags |= (n2->value & GOVD_DEVICEPTR);
  if (octx->region_type == ORT_ACC_DATA
  && (n2->value & GOVD_MAP_0LEN_ARRAY))
nflags |= GOVD_MAP_0LEN_ARRAY;
@@ -8943,6 +8947,8 @@ gimplify_scan_omp_clauses (tree *list_p, gimple_se
  if (OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_ALWAYS_TO
  || OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_ALWAYS_TOFROM)
flags |= GOVD_MAP_ALWAYS_TO;
+ else if (OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_FORCE_DEVICEPTR)
+   flags |= GOVD_DEVICEPTR;
  goto do_add;
 
case OMP_CLAUSE_DEPEND:
@@ -9727,7 +9733,8 @@ gimplify_adjust_omp_clauses_1 (splay_tree_node n,
   | GOVD_MAP_FORCE
   | 

Re: [PATCH] Fix objsz ICE (PR tree-optimization/92056)

2019-10-18 Thread Martin Sebor

On 10/18/19 12:52 AM, Jakub Jelinek wrote:

On Thu, Oct 17, 2019 at 06:07:37PM -0600, Martin Sebor wrote:

On 10/17/19 1:00 AM, Jakub Jelinek wrote:

Hi!

The following bug has been introduced when cond_expr_object_size has been
added in 2007.  We want to treat a COND_EXPR like a PHI with 2 arguments,
and PHI is handled in a loop that breaks if the lhs value is unknown, and
then does the if (TREE_CODE (arg) == SSA_NAME) merge_object_sizes else
expr_object_size which is used even in places that handle just a single
operand (with the lhs value initialized to the opposite value of unknown
first).  At least expr_object_size asserts that the lhs value is not
unknown at the start.

Fixed thusly, bootstrapped/regtested on x86_64-linux and i686-linux, ok for
trunk?


I'm not sure the other change (r277134) it the right way to fix
the problem with the missing initialization.  It was introduced
with the merger of the sprintf pass.  The latter still calls
init_object_sizes in get_destination_size.  I think the call
should be moved from there into the new combined sprintf/strlen
printf_strlen_execute function that also calls fini_object_sizes,
and the one from determine_min_objsize should be removed.  I can
take care of it unless you think it needs to stay the way it is
now for some reason.


Why?  As I said, init_object_sizes is designed to be called multiple times
and is cheap (load + comparison + early return) if it has been already
called, so is meant to be called only when needed, rather than at the
beginning of a pass just in case something appears.  The objsz pass does the
same.  No need to allocate bitmaps/vectors if nothing will need them.


Why?  Because as the bug you just fixed illustrates it's obviously
error prone to initialize the pass on demand in a utility function.
There are two now, and the next time someone adds a new one they
could easily reintroduce the same problem.

Martin


[committed][Arm] Fix multilibs for Armv7-R (was Re: [PATCH][Arm] Fix multilibs for Armv7-R)

2019-10-18 Thread Andre Vieira (lists)

Hi,

Updated the patch with respect to Richard Earnshaw's comments and 
committed in r277156.


gcc/ChangeLog:
2019-10-18  Andre Vieira  

* config/arm/t-multilib: Add rule to regenerate mutlilib header
file with any change to t-multilib, t-aprofile and t-rmprofile.
Also add new multilib variants and new mappings.

gcc/testsuite/ChangeLog:
2019-10-18  Andre Vieira  

* gcc.target/arm/multilib.exp: Add extra tests.


Cheers,
Andre

On 18/10/2019 12:04, Richard Earnshaw (lists) wrote:

On 18/10/2019 11:53, Andre Vieira (lists) wrote:

Hi

This patch maps multilibs using -march=armv7-r+vfpv3-d16-fp16 and
-march=armv7-r+vfpv3-d16-fp16+idiv to v7+fp.  This patch also adds a new
multilib for armv7-r+fp.sp and maps -march=armv7-r+fp.sp+idiv,
-march=armv7-r+vfpv3xd-fp16 and -march=armv7-r+vfpv3xd-fp16+idiv to it.

This solves issues encountered when trying to link for
-mcpu=cortex-r8 -mfloat-abi=hard.

Built arm-none-eabi and compared testsuite run of 
-march=armv7e-m+fp/-mfloat-abi=hard to 
-march=armv7-r+fp.sp/-mfloat-abi=hard which looked alright.


Is this OK for trunk?

gcc/ChangeLog:
2019-10-18  Andre Vieira  

 * config/arm/t-multilib: Add new multilib variants and new
 mappings.

gcc/testsuite/ChangeLog:
2019-10-18  Andre Vieira  

 * gcc.target/arm/multilib.exp: Add extra tests.


Cheers,
Andre


+s-mlib: $(srcdir)/config/arm/t-multilib $(srcdir)/config/arm/t-aprofile 
$(srcdir)/config/arm/t-rmprofile


Needs to be mentioned in the ChangeLog

+MULTILIB_MATCHES    += $(foreach ARCH,+fp +fp+idiv +vfpv3-d16-fp16 
+vfpv3-d16-fp16+idiv, \

+ march?armv7+fp=march?armv7-r$(ARCH))

The list here is the DP dual of v7_r_sp_variants, so it would make more 
sense to define v7_r_dp_variants and use it here.


+    {-march=armv7-r+fp -mfpu=auto -mfloat-abi=softfp} "thumb/v7+fp/softfp"
+    {-march=armv7-r+fp -mfpu=auto -mfloat-abi=hard} "thumb/v7+fp/hard"
+    {-march=armv7-r+fp+idiv -mfpu=auto -mfloat-abi=softfp} 
"thumb/v7+fp/softfp"
+    {-march=armv7-r+fp+idiv -mfpu=auto -mfloat-abi=hard} 
"thumb/v7+fp/hard"
+    {-march=armv7-r+vfpv3-d16-fp16 -mfpu=auto -mfloat-abi=softfp} 
"thumb/v7+fp/softfp"
+    {-march=armv7-r+vfpv3-d16-fp16 -mfpu=auto -mfloat-abi=hard} 
"thumb/v7+fp/hard"
+    {-march=armv7-r+vfpv3-d16-fp16+idiv -mfpu=auto -mfloat-abi=softfp} 
"thumb/v7+fp/softfp"
+    {-march=armv7-r+vfpv3-d16-fp16+idiv -mfpu=auto -mfloat-abi=hard} 
"thumb/v7+fp/hard"
+    {-march=armv7-r+fp.sp -mfpu=auto -mfloat-abi=softfp} 
"thumb/v7-r+fp.sp/softfp"
+    {-march=armv7-r+fp.sp -mfpu=auto -mfloat-abi=hard} 
"thumb/v7-r+fp.sp/hard"
+    {-march=armv7-r+fp.sp+idiv -mfpu=auto -mfloat-abi=softfp} 
"thumb/v7-r+fp.sp/softfp"
+    {-march=armv7-r+fp.sp+idiv -mfpu=auto -mfloat-abi=hard} 
"thumb/v7-r+fp.sp/hard"
+    {-march=armv7-r+vfpv3xd -mfpu=auto -mfloat-abi=softfp} 
"thumb/v7-r+fp.sp/softfp"
+    {-march=armv7-r+vfpv3xd -mfpu=auto -mfloat-abi=hard} 
"thumb/v7-r+fp.sp/hard"
+    {-march=armv7-r+vfpv3xd+idiv -mfpu=auto -mfloat-abi=softfp} 
"thumb/v7-r+fp.sp/softfp"
+    {-march=armv7-r+vfpv3xd+idiv -mfpu=auto -mfloat-abi=hard} 
"thumb/v7-r+fp.sp/hard"
+    {-march=armv7-r+vfpv3xd-fp16+idiv -mfpu=auto 
-mfloat-abi=softfp} "thumb/v7-r+fp.sp/softfp"
+    {-march=armv7-r+vfpv3xd-fp16+idiv -mfpu=auto -mfloat-abi=hard} 
"thumb/v7-r+fp.sp/hard"


8 spaces -> 1 tab.



OK with those changes.

R.
diff --git a/gcc/config/arm/t-multilib b/gcc/config/arm/t-multilib
index 08526302283eea03e4a8f22a2a049e85bd7bb6af..dc97c8f09fb0b7f53520432e1a174adfce1bf6af 100644
--- a/gcc/config/arm/t-multilib
+++ b/gcc/config/arm/t-multilib
@@ -24,6 +24,8 @@
 # values during the configure step.  We enforce this during the
 # top-level configury.
 
+s-mlib: $(srcdir)/config/arm/t-multilib $(srcdir)/config/arm/t-aprofile $(srcdir)/config/arm/t-rmprofile
+
 MULTILIB_OPTIONS =
 MULTILIB_DIRNAMES=
 MULTILIB_EXCEPTIONS  =
@@ -63,6 +65,8 @@ all_early_arch		:= armv5tej armv6 armv6j armv6k armv6z armv6kz \
 v7_a_arch_variants	:= $(call all_feat_combs, mp sec)
 v7_a_nosimd_variants	:= +fp +vfpv3 +vfpv3-d16-fp16 +vfpv3-fp16 +vfpv4-d16 +vfpv4
 v7_a_simd_variants	:= +simd +neon-fp16 +neon-vfpv4
+v7_r_sp_variants	:= +fp.sp +fp.sp+idiv +vfpv3xd-fp16 +vfpv3xd-fp16+idiv
+v7_r_dp_variants	:= +fp +fp+idiv +vfpv3-d16-fp16 +vfpv3-d16-fp16+idiv
 v7ve_nosimd_variants	:= +vfpv3-d16 +vfpv3 +vfpv3-d16-fp16 +vfpv3-fp16 +fp +vfpv4
 v7ve_vfpv3_simd_variants := +neon +neon-fp16
 v7ve_vfpv4_simd_variants := +simd
@@ -86,8 +90,8 @@ SEP := $(and $(HAS_APROFILE),$(HAS_RMPROFILE),/)
 MULTILIB_OPTIONS	+= marm/mthumb
 MULTILIB_DIRNAMES	+= arm thumb
 
-MULTILIB_OPTIONS	+= march=armv5te+fp/march=armv7/march=armv7+fp/$(MULTI_ARCH_OPTS_A)$(SEP)$(MULTI_ARCH_OPTS_RM)
-MULTILIB_DIRNAMES	+= v5te v7 v7+fp $(MULTI_ARCH_DIRS_A) $(MULTI_ARCH_DIRS_RM)
+MULTILIB_OPTIONS	+= march=armv5te+fp/march=armv7/march=armv7+fp/march=armv7-r+fp.sp/$(MULTI_ARCH_OPTS_A)$(SEP)$(MULTI_ARCH_OPTS_RM)

Re: C++ PATCH for c++/92062 - ODR-use ignored for static member of class template

2019-10-18 Thread Marek Polacek
Ping.

On Fri, Oct 11, 2019 at 04:23:34PM -0400, Marek Polacek wrote:
> has_value_dependent_address wasn't stripping location wrappers so it
> gave the wrong answer for "" in the static_assert.  That led us to
> thinking that the expression isn't instantiation-dependent, and we
> skipped static initialization of A<0>::x.
> 
> This patch adds stripping so that has_value_dependent_address gives the
> same answer as it used to before the location wrappers addition.
> 
> Bootstrapped/regtested on x86_64-linux, ok for trunk and 9?
> 
> 2019-10-11  Marek Polacek  
> 
>   PR c++/92062 - ODR-use ignored for static member of class template.
>   * pt.c (has_value_dependent_address): Strip location wrappers.
> 
>   * g++.dg/cpp0x/constexpr-odr1.C: New test.
>   * g++.dg/cpp0x/constexpr-odr2.C: New test.
> 
> diff --git gcc/cp/pt.c gcc/cp/pt.c
> index 84464436991..521d0c56002 100644
> --- gcc/cp/pt.c
> +++ gcc/cp/pt.c
> @@ -6542,6 +6542,8 @@ check_valid_ptrmem_cst_expr (tree type, tree expr,
>  static bool
>  has_value_dependent_address (tree op)
>  {
> +  STRIP_ANY_LOCATION_WRAPPER (op);
> +
>/* We could use get_inner_reference here, but there's no need;
>   this is only relevant for template non-type arguments, which
>   can only be expressed as   */
> diff --git gcc/testsuite/g++.dg/cpp0x/constexpr-odr1.C 
> gcc/testsuite/g++.dg/cpp0x/constexpr-odr1.C
> new file mode 100644
> index 000..cf3f95f0565
> --- /dev/null
> +++ gcc/testsuite/g++.dg/cpp0x/constexpr-odr1.C
> @@ -0,0 +1,19 @@
> +// PR c++/92062 - ODR-use ignored for static member of class template.
> +// { dg-do run { target c++11 } }
> +
> +template struct A {
> +  static const bool x;
> +  static_assert(, ""); // odr-uses A<...>::x
> +};
> +
> +int g;
> +
> +template
> +const bool A::x = (g = 42, false);
> +
> +void f(A<0>) {}// A<0> must be complete, so is instantiated
> +int main()
> +{
> +  if (g != 42)
> +__builtin_abort ();
> +}
> diff --git gcc/testsuite/g++.dg/cpp0x/constexpr-odr2.C 
> gcc/testsuite/g++.dg/cpp0x/constexpr-odr2.C
> new file mode 100644
> index 000..0927488e569
> --- /dev/null
> +++ gcc/testsuite/g++.dg/cpp0x/constexpr-odr2.C
> @@ -0,0 +1,19 @@
> +// PR c++/92062 - ODR-use ignored for static member of class template.
> +// { dg-do run { target c++11 } }
> +
> +template struct A {
> +  static const bool x;
> +  enum { force_instantiation =! }; // odr-uses A<...>::x
> +};
> +
> +int g;
> +
> +template
> +const bool A::x = (g = 42, false);
> +
> +void f(A<0>) {}// A<0> must be complete, so is instantiated
> +int main()
> +{
> +  if (g != 42)
> +__builtin_abort ();
> +}

Marek


Re: Ping: Add a simulate_builin_function_decl langhook

2019-10-18 Thread Richard Sandiford
Here's a version rebased on top of Nathan's C++ patch yesterday.
This actually makes the patch simpler; the changes to the frontends
are now pure additions, so no existing frontend code should be affected.

FWIW, this patch and the enum one:

  https://gcc.gnu.org/ml/gcc-patches/2019-09/msg01523.html
  pinged with more rationale here:
  https://gcc.gnu.org/ml/gcc-patches/2019-10/msg01003.html

are the only remaining prerequisites we need before adding (fixed-length
support for) the SVE intrinsics and calling conventions.

Thanks,
Richard

Richard Sandiford  writes:
> Ping
>
> See also https://gcc.gnu.org/ml/gcc-patches/2019-10/msg00413.html
> for more details about why this seemed like a good idea.  A shorter
> version of that message (well, it started out that way :-)) is that
> it lets us use:
>
>   #ifndef _ARM_SVE_H_
>   #define _ARM_SVE_H_
>
>   #include 
>
>   typedef __fp16 float16_t;
>   typedef float float32_t;
>   typedef double float64_t;
>
>   #pragma GCC aarch64 "arm_sve.h"
>
>   #endif
>
> as the full intrinsics header file.  Rather than have GCC call:
>
>   add_builtin_function ("__builtin_aarch64_svarsd_s16_n_x",
> fntype, code, BUILT_IN_MD, NULL, attrs);
>
> at start-up and having the header file provide an inline wrapper like:
>
>   __extension__ extern __inline svint8_t
>   __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>   svasrd_n_s16_x (svbool_t __a, svint8_t __b, int64_t __c)
>   {
> return __builtin_aarch64_svasrd_n_s16_x (__a, __b, __c);
>   }
>
> we can just have GCC call:
>
>   simulate_builtin_function_decl (input_location, "svasrd_n_s16_x",
>   fntype, code, NULL, attrs);
>
> when the header pragma is invoked.
>
> When repeated for thousands of functions, this significantly reduces
> the amount of code needed.  It also gives quicker start-up times
> (because functions are only registered when they're needed) and makes
> arm_sve.h compile more quickly (because there's less code to parse).
>
> Another benefit is that inline wrappers don't show up in error messages.
> E.g.:
>
>   #include 
>   int8x8_t foo (int16x8_t x, int y)
>   { return vqrshrn_n_s16 (x, y); }
>
> gives:
>
>   In file included from foo.c:3:10:
>   In function 'vqrshrn_n_s16',
>   inlined from 'foo' at foo.c:2:
>   include/arm_neon.h:24419:10: error: argument 2 must be a constant immediate
>   24419 |   return (int8x8_t) __builtin_aarch64_sqrshrn_nv8hi (__a, __b);
> |  ^
>
> where the quoted line is the inline wrapper in arm_neon.h rather than
> the user's code.  An SVE example like:
>
>   #include 
>   svint16_t foo (svbool_t pg, svint16_t x, int y)
>   { return svasrd_x (pg, x, y); }
>
> instead gives:
>
>   foo.c: In function 'foo':
>   foo.c:3:10: error: argument 3 of 'svasrd_x' must be an integer constant 
> expression
>   3 | { return svasrd_x (pg, x, y); }
> |  ^~~~
>
> which seems more user-friendly.
>
> Thanks,
> Richard
>
> Richard Sandiford  writes:
>> Although it's possible to define the SVE intrinsics in a normal header
>> file, it's much more convenient to define them directly in the compiler.
>> This also speeds up compilation and gives better error messages.
>>
>> The idea is therefore for arm_sve.h (the main intrinsics header file)
>> to have the pragma:
>>
>> #pragma GCC aarch64 "arm_sve.h"
>>
>> telling GCC to define (almost) everything arm_sve.h needs to define.
>> The target then needs a way of injecting new built-in function
>> declarations during compilation.
>>
>> The main hook for defining built-in functions is add_builtin_function.
>> This is designed for use at start-up, and so has various features that
>> are correct in that context but not for the pragma above:
>>
>>   (1) the location is always BUILTINS_LOCATION, whereas for arm_sve.h
>>   it ought to be the location of the pragma.
>>
>>   (2) the function is only immediately visible if it's in the implementation
>>   namespace, whereas the pragma is deliberately injecting functions
>>   into the general namespace.
>>
>>   (3) there's no attempt to emulate a normal function declaration in
>>   C or C++, whereas functions declared by the pragma should be
>>   checked in the same way as an open-coded declaration would be.
>>   E.g. we should get an error if there was a previous incompatible
>>   declaration.
>>
>>   (4) in C++, the function is treated as extern "C" and so can't be
>>   overloaded, whereas SVE intrinsics do use function overloading.
>>
>> This patch therefore adds a hook that targets can use to inject
>> the equivalent of a source-level function declaration, but bound
>> to a BUILT_IN_MD function.
>>
>> The main SVE intrinsic patch has tests to make sure that we report an
>> error for conflicting definitions that appear either before or after
>> including arm_sve.h.
>>
>> Tested on aarch64-linux-gnu and 

Re: [PATCH] Multibyte awareness for diagnostics (PR 49973)

2019-10-18 Thread Lewis Hyatt
On Fri, Sep 27, 2019 at 4:41 PM Lewis Hyatt  wrote:
>
> On Thu, Sep 26, 2019 at 08:46:56PM +, Joseph Myers wrote:
> > On Thu, 26 Sep 2019, Lewis Hyatt wrote:
> >
> > > A couple notes:
> > > - In order to avoid any portability problems with wchar_t, the
> > > equivalent of wcwidth() from libc is implemented in-house.
> >
> > I'm uneasy about contrib/gen_wcwidth.cpp doing the generation using host
> > libc's wcwidth.  The effect is that libcpp/generated_cpp_wcwidth.h is
> > *not* reproducible unless you know exactly what host (libc version, locale
> > used when running the program, distribution patches to libc and locale
> > data) was used to run the program.  I think we need a generator that works
> > from Unicode data in some way so we can explicitly say what version of the
> > (unmodified) Unicode data was used.
>
> Here is a revised patch that hopefully addresses your concerns. I borrowed the
> relevant Python code for parsing Unicode's data files from glibc, then added a
> new script that parses the locale data they output into the same sort of 
> simply
> searchable tables I was creating before. The new generated table is very close
> to the old one, but there are some differences due to improvements that have
> been made to glibc recently, affecting 200 or so codepoints.

This patch (https://gcc.gnu.org/ml/gcc-patches/2019-09/msg01627.html)
now requires some minor merge resolution, after recent other commits
affecting diagnostic-show-locus.c. I wasn't sure what's the preferred
practice, should I resend it now so that you have something that can
be applied to trunk, or it's better to wait for comments on what was
already sent? Thank you!

-Lewis


Re: [Patch][Fortran] OpenACC – permit common blocks in some clauses

2019-10-18 Thread Thomas Schwinge
Hi!

On 2019-10-15T23:32:32+0200, Tobias Burnus  wrote:
> This OpenACC-only patch extends the support for /common/ blocks.

I'll be quick to note that I don't have any first-hand experience with
Fortran common blocks.  :-P

> [In OpenMP (4.0 to 5.0, unchanged) and gfortran, common blocks are supported 
> in copyin/copyprivate, in firstprivate/lastprivate/private/shared, in 
> threadprivate and in declare target.]
>
> For OpenACC, gfortran already supports common blocks for 
> device_resident/usedevice/cache/flush/link.
>
> This patch adds them (for OpenACC only) to copy/copyin/copyout, create/delete,
> host, pcopy/pcopy_in/pcopy_out, present_or_copy, present_or_copy_in,
> present_or_copy_out, present_or_create and self.
> [Of those, only "copy()" is also an OpenMP clause name.]

I'm confused: in
 Jakub stated
that "OpenMP doesn't have a copy clause, so I'd expect true here":

| @@ -1051,7 +1052,7 @@ gfc_match_omp_clauses (gfc_omp_clauses **cp, const 
omp_mask mask,
| if ((mask & OMP_CLAUSE_COPY)
| && gfc_match ("copy ( ") == MATCH_YES
| && gfc_match_omp_map_clause (>lists[OMP_LIST_MAP],
| -OMP_MAP_TOFROM))
| +OMP_MAP_TOFROM, openacc))
|   continue;

> [Cf. OpenACC 2.7 in 1.9 (for the p* variants) and 2.13; the latter is new 
> since OpenACC 2.0.]
>
>
>
> I think the Fortran part is obvious, once one agrees on the list of clauses; 
> and OK from a Fortran's maintainer view.

I'll defer to your judgement there, but just one comment: I noticed that
OpenACC 2.7 in 2.7. "Data Clauses" states that "For all clauses except
'deviceptr' and 'present', the list argument may include a Fortran
_common block_ name enclosed within slashes, if that _common block_ name
also appears in a 'declare' directive 'link' clause".

Are we already properly handling the aspect that requires that the "that
_common block_ name also appears in a 'declare' directive 'link' clause"?

The libgomp execution test cases you're adding all state that "This test
does not exercise ACC DECLARE", yet they supposedly already do work fine.
Or am I understading the OpenACC specification wrongly here?

I'm certainly aware of (big) deficiencies in the OpenACC 'declare'
handling, so I guess my question here may be whether these test cases are
valid after all?

> gcc/gimplify.c: oacc_default_clause contains some changes; there are 
> additionally two lines which only differ for ORT_ACC – Hence, it is an 
> OpenACC-only change!
> The ME change is about privatizing common blocks (I haven't studied this part 
> closer.)

So, please do study that closer.  ;-P

In 
I raised some questions, got a bit of an answer, and in

asked further, didn't get an answer.

All the rationale from Cesar's original submission email should be
transferred into 'gcc/gimplify.c' as much as feasible, to make that
"voodoo code" better understandable.

> @Jakub, all: comments and approvals are welcome.

Indeed.  :-)

>   gcc/
>   * gimplify.c (oacc_default_clause): Privatize fortran common blocks.
>   (omp_notice_variable): Defer the expansion of DECL_VALUE_EXPR for
>   common block decls.

> --- a/gcc/gimplify.c
> +++ b/gcc/gimplify.c
> @@ -7218,15 +7218,20 @@ oacc_default_clause (struct gimplify_omp_ctx *ctx, 
> tree decl, unsigned flags)
>  {
>const char *rkind;
>bool on_device = false;
> +  bool is_private = false;
>bool declared = is_oacc_declared (decl);
>tree type = TREE_TYPE (decl);
>  
>if (lang_hooks.decls.omp_privatize_by_reference (decl))
>  type = TREE_TYPE (type);
>  
> +  if (RECORD_OR_UNION_TYPE_P (type))
> +is_private = lang_hooks.decls.omp_disregard_value_expr (decl, false);
> +
>if ((ctx->region_type & (ORT_ACC_PARALLEL | ORT_ACC_KERNELS)) != 0
>&& is_global_var (decl)
> -  && device_resident_p (decl))
> +  && device_resident_p (decl)
> +  && !is_private)
>  {
>on_device = true;
>flags |= GOVD_MAP_TO_ONLY;
> @@ -7237,7 +7242,9 @@ oacc_default_clause (struct gimplify_omp_ctx *ctx, tree 
> decl, unsigned flags)
>  case ORT_ACC_KERNELS:
>rkind = "kernels";
>  
> -  if (AGGREGATE_TYPE_P (type))
> +  if (is_private)
> + flags |= GOVD_MAP;
> +  else if (AGGREGATE_TYPE_P (type))
>   {
> /* Aggregates default to 'present_or_copy', or 'present'.  */
> if (ctx->default_kind != OMP_CLAUSE_DEFAULT_PRESENT)
> @@ -7254,7 +7261,9 @@ oacc_default_clause (struct gimplify_omp_ctx *ctx, tree 
> decl, unsigned flags)
>  case ORT_ACC_PARALLEL:
>rkind = "parallel";
>  
> -  if (on_device || declared)
> +  if (is_private)
> + flags |= GOVD_FIRSTPRIVATE;
> +  else if (on_device || declared)
>   flags |= 

Re: Type representation in CTF and DWARF

2019-10-18 Thread Richard Biener
On October 18, 2019 1:59:36 PM GMT+02:00, Pedro Alves  wrote:
>On 10/17/19 7:59 PM, Nick Alcock wrote:
>> On 17 Oct 2019, Richard Biener verbalised:
>> 
>>> On Thu, Oct 17, 2019 at 7:36 PM Nick Alcock 
>wrote:

 On 11 Oct 2019, Indu Bhagat stated:
> Compile with -g -gdwarf-like-ctf and use dwz -o 
> (using
> dwz compiled from the master branch) on the generated binaries:
>
> (coreutils-0.22)
>  .debug_info(D1) | .debug_abbrev(D2) | .debug_str(D4) | .ctf
>(uncompressed) | ratio (.ctf/(D1+D2+0.5*D4))
> ls   30616   |1136   |21098   | 26240 
> | 0.62
> pwd  10734   |788|10433   | 13929 
> | 0.83
> groups 10706 |811|10249   | 13378 
> | 0.80
>
> (emacs-26.3)
>  .debug_info(D1) | .debug_abbrev(D2) | .debug_str(D4) | .ctf
>(uncompressed) | ratio (.ctf/(D1+D2+0.5*D4))
> emacs-26.3.1 674657  |6402   |   273963   |  
>273910| 0.33
>>>
>>> Btw, for a fair comparison you have to remove all DW_TAG_subroutine
>>> children as well since CTF doesn't represent scopes or local
>variables
>>> at all (nor types only used by locals). It seems CTF only represents
>>> function entry points.
>> 
>> Good point: I'll have to hack up a DWARF trimmer to do this
>comparison
>> properly, I think. (Though CTF does represent global variables,
>> including file-scope statics.)
>
>Wouldn't it be possible to extend the -gdwarf-like-ctf hack to skip
>emitting those things?

Sure. 

>> 
>> In most cases local types etc are a fairly small contributor to the
>> total volume -- but macros can contribute a lot in some codebases.
>(The
>> Linux kernel's READ_ONCE macro is one I've personally been bitten by
>in
>> the past, with a new local struct in every use. GCC doesn't
>deduplicate
>> any of those so the resulting bloat from tens of thousands of
>instances
>> of this identical structure is quite incredible...)
>> 
>
>Sounds like something that would be beneficial to do with DWARF too.

Otoh those are distinct types according to the C standard and since dwarf is a 
source level representation we should preserve this (source locations also 
differ). 

Richard. 

>Thanks,
>Pedro Alves



Re: [PATCH] Fix PR c++/92024

2019-10-18 Thread Bernd Edlinger
Ping...

for the c++ FE and testsuite changes in the updated patch
here: https://gcc.gnu.org/ml/gcc-patches/2019-10/msg00916.html


Thanks
Bernd.




On 10/12/19 8:10 PM, Bernd Edlinger wrote:
> On 10/11/19 6:31 PM, Jason Merrill wrote:
>> On 10/10/19 2:06 PM, Bernd Edlinger wrote:
>>> On 10/10/19 7:49 PM, Jason Merrill wrote:
>>>
>>> if -Wshadow=compatible-local is used, the can_convert function crashes
>>> in instantiate_class_template_1.
>>
>> Right, checking can_convert is problematic here, as it can cause template 
>> instantiations that change the semantics of the program.  Or, in this case, 
>> crash.
>>
> 
> So I try to make C++ behave more consistently with the code in c-decl.c,
> thus dependent on warn_shadow but not on warn_shadow_local and/or
> warn_shadow_compatible_local:
> 
>if (warn_shadow)
>   warning_code = OPT_Wshadow;
> else if (comptypes (TREE_TYPE (old_decl), TREE_TYPE (new_decl)))
>   warning_code = OPT_Wshadow_compatible_local;
> else
>   warning_code = OPT_Wshadow_local;
> warned = warning_at (DECL_SOURCE_LOCATION (new_decl), 
> warning_code,
>  "declaration of %qD shadows a parameter",
>  new_decl);
> 
> I cannot remove the if (warn_shadow) since this breaks gcc.dg/pr48062.c
> which uses:
> 
> #pragma GCC diagnostic ignored "-Wshadow"
> 
> to disable a -Wshadow=compatible-local warning, but while -Wno-shadow on the
> command line disables also dependent warnings the pragma does not (always) do 
> that.
> 
> So instead I'd like to adjust the doc of -Wshadow to reflect the 
> implementation
> and remove the if(warn_shadow_local) to have C and C++ behave identical and
> hopefully now in sync with the doc.
> 
> 
> Bootstrapped and reg-tested on x86_64-pc-linux-gnu.
> Is it OK for trunk?
> 
> 
> Thanks
> Bernd.
> 


[PATCH 1/2] [ARM,testsuite] Skip tests incompatible with -mpure-code

2019-10-18 Thread Christophe Lyon
Hi,

All these tests fail when using -mpure-code:
* some force A or R profile
* some use Neon
* some use -fpic/-fPIC
all of which are not supported by this option.

OK?

Thanks,

Christophe
2019-10-18  Christophe Lyon  

gcc/testsuite/
* gcc.target/arm/attr-crypto.c: Skip if -mpure-code is used.
* gcc.target/arm/attr-neon-fp16.c: Likewise.
* gcc.target/arm/combine-cmp-shift.c: Likewise.
* gcc.target/arm/data-rel-1.c: Likewise.
* gcc.target/arm/data-rel-2.c: Likewise.
* gcc.target/arm/data-rel-3.c: Likewise.
* gcc.target/arm/ftest-armv7a-arm.c: Likewise.
* gcc.target/arm/ftest-armv7a-thumb.c: Likewise.
* gcc.target/arm/ftest-armv7r-arm.c: Likewise.
* gcc.target/arm/ftest-armv7r-thumb.c: Likewise.
* gcc.target/arm/ftest-armv7ve-arm.c: Likewise.
* gcc.target/arm/ftest-armv7ve-thumb.c: Likewise.
* gcc.target/arm/ftest-armv8a-arm.c: Likewise.
* gcc.target/arm/ftest-armv8a-thumb.c: Likewise.
* gcc.target/arm/lceil-vcvt_1.c: Likewise.
* gcc.target/arm/lfloor-vcvt_1.c: Likewise.
* gcc.target/arm/lround-vcvt_1.c: Likewise.
* gcc.target/arm/memset-inline-10.c: Likewise.
* gcc.target/arm/mod_2.c: Likewise.
* gcc.target/arm/mod_256.c: Likewise.
* gcc.target/arm/pr40657-1.c: Likewise.
* gcc.target/arm/pr44788.c: Likewise.
* gcc.target/arm/pr50305.c: Likewise.
* gcc.target/arm/pr51835.c: Likewise.
* gcc.target/arm/pr51915.c: Likewise.
* gcc.target/arm/pr52006.c: Likewise.
* gcc.target/arm/pr53187.c: Likewise.
* gcc.target/arm/pr58784.c: Likewise.
* gcc.target/arm/pr59575.c: Likewise.
* gcc.target/arm/pr59923.c: Likewise.
* gcc.target/arm/pr60650-2.c: Likewise.
* gcc.target/arm/pr60657.c: Likewise.
* gcc.target/arm/pr60663.c: Likewise.
* gcc.target/arm/pr67439_1.c: Likewise.
* gcc.target/arm/pr68620.c: Likewise.
* gcc.target/arm/pr7676.c: Likewise.
* gcc.target/arm/pr79239.c: Likewise.
* gcc.target/arm/pr81863.c: Likewise.
* gcc.target/arm/pragma_attribute.c: Likewise.
* gcc.target/arm/pragma_cpp_fma.c: Likewise.
* gcc.target/arm/require-pic-register-loc.c: Likewise.
* gcc.target/arm/thumb-ltu.c: Likewise.
* gcc.target/arm/thumb1-pic-high-reg.c: Likewise.
* gcc.target/arm/thumb1-pic-single-base.c: Likewise.
* gcc.target/arm/tlscall.c: Likewise.
* gcc.target/arm/unsigned-float.c: Likewise.
* gcc.target/arm/vrinta-ce.c: Likewise.
From 2ee4c65a4d308fa48c0bfff69e4670feeb649227 Mon Sep 17 00:00:00 2001
From: Christophe Lyon 
Date: Thu, 17 Oct 2019 11:11:48 +0200
Subject: [PATCH 1/2] [ARM,testsuite] Skip tests incompatible with -mpure-code.

All these tests fail when using -mpure-code:
* some force A or R profile
* some use Neon
* some use -fpic/-fPIC
all of which are not supported by this option.

2019-10-18  Christophe Lyon  

gcc/testsuite/
* gcc.target/arm/attr-crypto.c: Skip if -mpure-code is used.
* gcc.target/arm/attr-neon-fp16.c: Likewise.
* gcc.target/arm/combine-cmp-shift.c: Likewise.
* gcc.target/arm/data-rel-1.c: Likewise.
* gcc.target/arm/data-rel-2.c: Likewise.
* gcc.target/arm/data-rel-3.c: Likewise.
* gcc.target/arm/ftest-armv7a-arm.c: Likewise.
* gcc.target/arm/ftest-armv7a-thumb.c: Likewise.
* gcc.target/arm/ftest-armv7r-arm.c: Likewise.
* gcc.target/arm/ftest-armv7r-thumb.c: Likewise.
* gcc.target/arm/ftest-armv7ve-arm.c: Likewise.
* gcc.target/arm/ftest-armv7ve-thumb.c: Likewise.
* gcc.target/arm/ftest-armv8a-arm.c: Likewise.
* gcc.target/arm/ftest-armv8a-thumb.c: Likewise.
* gcc.target/arm/lceil-vcvt_1.c: Likewise.
* gcc.target/arm/lfloor-vcvt_1.c: Likewise.
* gcc.target/arm/lround-vcvt_1.c: Likewise.
* gcc.target/arm/memset-inline-10.c: Likewise.
* gcc.target/arm/mod_2.c: Likewise.
* gcc.target/arm/mod_256.c: Likewise.
* gcc.target/arm/pr40657-1.c: Likewise.
* gcc.target/arm/pr44788.c: Likewise.
* gcc.target/arm/pr50305.c: Likewise.
* gcc.target/arm/pr51835.c: Likewise.
* gcc.target/arm/pr51915.c: Likewise.
* gcc.target/arm/pr52006.c: Likewise.
* gcc.target/arm/pr53187.c: Likewise.
* gcc.target/arm/pr58784.c: Likewise.
* gcc.target/arm/pr59575.c: Likewise.
* gcc.target/arm/pr59923.c: Likewise.
* gcc.target/arm/pr60650-2.c: Likewise.
* gcc.target/arm/pr60657.c: Likewise.
* gcc.target/arm/pr60663.c: Likewise.
* gcc.target/arm/pr67439_1.c: Likewise.
* gcc.target/arm/pr68620.c: Likewise.
* gcc.target/arm/pr7676.c: Likewise.
* gcc.target/arm/pr79239.c: Likewise.
* gcc.target/arm/pr81863.c: Likewise.
* 

[PATCH 2/2] [ARM] Add support for -mpure-code in thumb-1 (v6m)

2019-10-18 Thread Christophe Lyon
Hi,

This patch extends support for -mpure-code to all thumb-1 processors,
by removing the need for MOVT.

Symbol addresses are built using upper8_15, upper0_7, lower8_15 and
lower0_7 relocations, and constants are built using sequences of
movs/adds and lsls instructions.

The extension of the *thumb1_movhf pattern uses always the same size
(6) although it can emit a shorter sequence when possible. This is
similar to what *arm32_movhf already does.

CASE_VECTOR_PC_RELATIVE is now false with -mpure-code, to avoid
generating invalid assembly code with differences from symbols from
two different sections (the difference cannot be computed by the
assembler).

Tests pr45701-[12].c needed a small adjustment to avoid matching
upper8_15 when looking for the r8 register.

Test no-literal-pool.c is augmented with __fp16, so it now uses
-mfp16-format=ieee.

Test thumb1-Os-mult.c generates an inline code sequence with
-mpure-code and computes the multiplication by using a sequence of
add/shift rather than using the multiply instruction, so we skip it in
presence of -mpure-code.

With -mcpu=cortex-m0, the pure-code/no-literal-pool.c fails because
code like:
static char *p = "Hello World";
char *
testchar ()
{
  return p + 4;
}
generates 2 indirections (I removed non-essential directives/code)
.section.rodata
.LC0:
.ascii  "Hello World\000"
.data
p:
.word   .LC0
.section.rodata
.LC2:
.word   p
.section .text,"0x2006",%progbits
testchar:
push{r7, lr}
add r7, sp, #0
movsr3, #:upper8_15:#.LC2
lslsr3, #8
addsr3, #:upper0_7:#.LC2
lslsr3, #8
addsr3, #:lower8_15:#.LC2
lslsr3, #8
addsr3, #:lower0_7:#.LC2
ldr r3, [r3]
ldr r3, [r3]
addsr3, r3, #4
movsr0, r3
mov sp, r7
@ sp needed
pop {r7, pc}

By contrast, when using -mcpu=cortex-m4, the code looks like:
.section.rodata
.LC0:
.ascii  "Hello World\000"
.data
p:
.word   .LC0
testchar:
push{r7}
add r7, sp, #0
movwr3, #:lower16:p
movtr3, #:upper16:p
ldr r3, [r3]
addsr3, r3, #4
mov r0, r3
mov sp, r7
pop {r7}
bx  lr

I haven't found yet how to make code for cortex-m0 apply upper/lower
relocations to "p" instead of .LC2. The current code looks functional,
but could be improved.

OK as-is?

Thanks,

Christophe
From 8c57d721ee94d813553a203bcca5ee31b7ad1a31 Mon Sep 17 00:00:00 2001
From: Christophe Lyon 
Date: Fri, 18 Oct 2019 12:15:12 +
Subject: [PATCH 2/2] [ARM] Add support for -mpure-code in thumb-1 (v6m)

This patch extends support for -mpure-code to all thumb-1 processors,
by removing the need for MOVT.

Symbol addresses are built using upper8_15, upper0_7, lower8_15 and
lower0_7 relocations, and constants are built using sequences of
movs/adds and lsls instructions.

The extension of the *thumb1_movhf pattern uses always the same size
(6) although it can emit a shorter sequence when possible. This is
similar to what *arm32_movhf already does.

CASE_VECTOR_PC_RELATIVE is now false with -mpure-code, to avoid
generating invalid assembly code with differences from symbols from
two different sections (the difference cannot be computed by the
assembler).

Tests pr45701-[12].c needed a small adjustment to avoid matching
upper8_15 when looking for the r8 register.

Test no-literal-pool.c is augmented with __fp16, so it now uses
-mfp16-format=ieee.

Test thumb1-Os-mult.c generates an inline code sequence with
-mpure-code and computes the multiplication by using a sequence of
add/shift rather than using the multiply instruction, so we skip it in
presence of -mpure-code.

With -mcpu=cortex-m0, the pure-code/no-literal-pool.c fails because
code like:
static char *p = "Hello World";
char *
testchar ()
{
  return p + 4;
}
generates 2 indirections (I removed non-essential directives/code)
.section.rodata
.LC0:
.ascii  "Hello World\000"
.data
p:
.word   .LC0
.section.rodata
.LC2:
.word   p
.section .text,"0x2006",%progbits
testchar:
push{r7, lr}
add r7, sp, #0
movsr3, #:upper8_15:#.LC2
lslsr3, #8
addsr3, #:upper0_7:#.LC2
lslsr3, #8
addsr3, #:lower8_15:#.LC2
lslsr3, #8
addsr3, #:lower0_7:#.LC2
ldr r3, [r3]
ldr r3, [r3]
addsr3, r3, #4
movsr0, r3
mov sp, r7
@ sp needed
pop {r7, pc}

By contrast, when using -mcpu=cortex-m4, the code looks like:
.section.rodata
.LC0:
.ascii  "Hello World\000"
.data
p:
.word   .LC0
testchar:
push{r7}
add r7, sp, #0
movwr3, #:lower16:p
movtr3, #:upper16:p
ldr r3, [r3]
addsr3, r3, #4
mov r0, r3
mov sp, r7
pop {r7}
bx  lr

I haven't found yet how to make code for cortex-m0 apply upper/lower
relocations to "p" 

[Patch,committed,Fortran] PR91586 Fix ICE on invalid code with CLASS

2019-10-18 Thread Tobias Burnus

Re-instate error message which got by dereferrencing a NULL pointer.

Commited as r277153 to the trunk.
Committed as r277154 to the GCC 9 branch (as it was a 9/10 regression).

Thanks,

Tobias

commit f9aef7e7d320df8560e602d863e97ea324e68644
Author: burnus 
Date:   Fri Oct 18 12:04:31 2019 +

Fortran] PR91586 Fix ICE on invalid code with CLASS

gcc/fortran/
PR fortran/91586
* class.c (gfc_find_derived_vtab): Return NULL
instead of deref'ing NULL pointer.

gcc/testsuite/
PR fortran/91586
* gfortran.dg/class_71.f90: New.



git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@277153 138bc75d-0d04-0410-961f-82ee72b054a4

diff --git a/gcc/fortran/ChangeLog b/gcc/fortran/ChangeLog
index 1dbc890f5da..8a1beeafa01 100644
--- a/gcc/fortran/ChangeLog
+++ b/gcc/fortran/ChangeLog
@@ -1,3 +1,9 @@
+2019-10-18  Tobias Burnus  
+
+	PR fortran/91586
+	* class.c (gfc_find_derived_vtab): Return NULL
+	instead of deref'ing NULL pointer.
+
 2019-10-15  James Norris  
 	Tobias Burnus  
 
diff --git a/gcc/fortran/class.c b/gcc/fortran/class.c
index 8a1f43f116c..9117121e5fe 100644
--- a/gcc/fortran/class.c
+++ b/gcc/fortran/class.c
@@ -2241,6 +2241,9 @@ gfc_find_derived_vtab (gfc_symbol *derived)
   if (!derived->attr.unlimited_polymorphic && derived->attr.is_class)
 derived = gfc_get_derived_super_type (derived);
 
+  if (!derived)
+return NULL;
+
   /* Find the gsymbol for the module of use associated derived types.  */
   if ((derived->attr.use_assoc || derived->attr.used_in_submodule)
&& !derived->attr.vtype && !derived->attr.is_class)
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index b783c8dede3..4e817eb1371 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,8 @@
+2019-10-18  Tobias Burnus  
+
+	PR fortran/91586
+	* gfortran.dg/class_71.f90: New.
+
 2019-10-18  Georg-Johann Lay  
 
 	Fix some fallout for small targets.
diff --git a/gcc/testsuite/gfortran.dg/class_71.f90 b/gcc/testsuite/gfortran.dg/class_71.f90
new file mode 100644
index 000..bc0ffcd2305
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/class_71.f90
@@ -0,0 +1,13 @@
+! { dg-do compile }
+!
+! PR fortran/91586
+!
+! Contributed by G. Steinmetz
+!
+program p
+   type t
+  class(*), allocatable :: a
+   end type
+   class(t) :: x, y  ! { dg-error "must be dummy, allocatable or pointer" }
+   y = x  ! { dg-error "Nonallocatable variable must not be polymorphic in intrinsic assignment" }
+end


[C++ PATCH] anon type names

2019-10-18 Thread Nathan Sidwell
I noticed that we use a bitfield flag to note types with names for 
linkage purposes:

  typedef struct {} foo;
but, we can infer this by comparing TYPE_STUB_DECL and TYPE_DECL of the 
main variant.  It's only checked in two places -- the C++ parser and the 
objective C++ encoder.


Committing this to trunk.

nathan
--
Nathan Sidwell
2019-10-18  Nathan Sidwell  

	* cp-tree.h (struct lang_type): Remove was_anonymous.
	(TYPE_WAS_UNNAMED): Implement by checking TYPE_DECL &
	TYPE_STUB_DECL.
	* decl.c (name_unnamed_type): Don't set TYPE_WAS_UNNAMED.

Index: gcc/cp/cp-tree.h
===
--- gcc/cp/cp-tree.h	(revision 277149)
+++ gcc/cp/cp-tree.h	(working copy)
@@ -2153,5 +2153,4 @@ struct GTY(()) lang_type {
   unsigned ptrmemfunc_flag : 1;
 
-  unsigned was_anonymous : 1;
   unsigned lazy_default_ctor : 1;
   unsigned lazy_copy_ctor : 1;
@@ -2161,6 +2160,6 @@ struct GTY(()) lang_type {
   unsigned has_complex_copy_ctor : 1;
   unsigned has_complex_copy_assign : 1;
-
   unsigned non_aggregate : 1;
+
   unsigned has_complex_dflt : 1;
   unsigned has_list_ctor : 1;
@@ -2170,6 +2169,6 @@ struct GTY(()) lang_type {
   unsigned lazy_move_assign : 1;
   unsigned has_complex_move_ctor : 1;
-
   unsigned has_complex_move_assign : 1;
+
   unsigned has_constexpr_ctor : 1;
   unsigned unique_obj_representations : 1;
@@ -2183,5 +2182,5 @@ struct GTY(()) lang_type {
  of this by updating the size of this bitfield whenever you add or
  remove a flag.  */
-  unsigned dummy : 4;
+  unsigned dummy : 5;
 
   tree primary_base;
@@ -4586,6 +4585,10 @@ more_aggr_init_expr_args_p (const aggr_i
 /* Define fields and accessors for nodes representing declared names.  */
 
-/* Nonzero if TYPE is an unnamed class with a typedef for linkage purposes.  */
-#define TYPE_WAS_UNNAMED(NODE) (LANG_TYPE_CLASS_CHECK (NODE)->was_anonymous)
+/* True if TYPE is an unnamed structured type with a typedef for
+   linkage purposes.  In that case TYPE_NAME and TYPE_STUB_DECL of the
+   MAIN-VARIANT are different. */
+#define TYPE_WAS_UNNAMED(NODE)\
+  (TYPE_NAME (TYPE_MAIN_VARIANT (NODE))			\
+   != TYPE_STUB_DECL (TYPE_MAIN_VARIANT (NODE)))
 
 /* C++: all of these are overloaded!  These apply only to TYPE_DECLs.  */
Index: gcc/cp/decl.c
===
--- gcc/cp/decl.c	(revision 277149)
+++ gcc/cp/decl.c	(working copy)
@@ -10442,7 +10442,4 @@ name_unnamed_type (tree type, tree decl)
   TYPE_NAME (t) = decl;
 
-  if (TYPE_LANG_SPECIFIC (type))
-TYPE_WAS_UNNAMED (type) = 1;
-
   /* If this is a typedef within a template class, the nested
  type is a (non-primary) template.  The name for the


Re: [PATCH 1/4] Add function for pretty-printing OpenACC clause names

2019-10-18 Thread Thomas Schwinge
Hi Julian!

On 2019-10-06T15:32:34-0700, Julian Brown  wrote:
> This patch adds a function to pretty-print OpenACC clause names from
> OMP_CLAUSE_MAP_KINDs, for error output.

Indeed talking about (OpenMP) 'map' clauses in an OpenACC context is not
quite ideal -- that's what PR65095 is about, so please mention that one
in your ChangeLog updates.

> The function is used by subsequent
> patches.

Ah, I somehow had assumed you'd also adapt existing code to use it.  ;-)

> Previously approved as part of:
>
>   https://gcc.gnu.org/ml/gcc-patches/2018-12/msg01292.html
>
> FAOD, OK for trunk?

Still fine.  To record the review effort, please include "Reviewed-by:
Thomas Schwinge " in the commit log, see
.


A few more comments, for later:

>  gcc/c-family/c-common.h |  1 +
>  gcc/c-family/c-omp.c| 33 +

As I'd mentioned before: 'Eventually (that is, later), this should move
into generic code, next to the other "clause printing".  Also to be
shared with Fortran.'

> --- a/gcc/c-family/c-omp.c
> +++ b/gcc/c-family/c-omp.c

> +/* For OpenACC, the OMP_CLAUSE_MAP_KIND of an OMP_CLAUSE_MAP is used 
> internally
> +   to distinguish clauses as seen by the user.  Return the "friendly" clause
> +   name for error messages etc., where possible.  See also
> +   c/c-parser.c:c_parser_oacc_data_clause and
> +   cp/parser.c:cp_parser_oacc_data_clause.  */
> +
> +const char *
> +c_omp_map_clause_name (tree clause, bool oacc)
> +{
> +  if (oacc && OMP_CLAUSE_CODE (clause) == OMP_CLAUSE_MAP)
> +switch (OMP_CLAUSE_MAP_KIND (clause))
> +{
> +case GOMP_MAP_FORCE_ALLOC:
> +case GOMP_MAP_ALLOC: return "create";
> +case GOMP_MAP_FORCE_TO:
> +case GOMP_MAP_TO: return "copyin";
> +case GOMP_MAP_FORCE_FROM:
> +case GOMP_MAP_FROM: return "copyout";
> +case GOMP_MAP_FORCE_TOFROM:
> +case GOMP_MAP_TOFROM: return "copy";
> +case GOMP_MAP_RELEASE: return "delete";
> +case GOMP_MAP_FORCE_PRESENT: return "present";
> +case GOMP_MAP_ATTACH: return "attach";
> +case GOMP_MAP_FORCE_DETACH:
> +case GOMP_MAP_DETACH: return "detach";
> +case GOMP_MAP_DEVICE_RESIDENT: return "device_resident";
> +case GOMP_MAP_LINK: return "link";
> +case GOMP_MAP_FORCE_DEVICEPTR: return "deviceptr";
> +default: break;
> +}
> +  return omp_clause_code_name[OMP_CLAUSE_CODE (clause)];
> +}

Indeed nearby (after) the 'omp_clause_code_name' definition in
'gcc/tree.c' would probably be a better place for this, as that's where
the current clause names are coming from.

I did wonder whether we need to explicitly translate from (OpenMP) "'map'
clause" into (OpenACC) "'create' clause" etc., or if a generic (OpenACC)
"data clause" would be sufficient?  (After all, in diagnostics we also
print out the original code, so the user can then see which specific data
clause is being complained about.  But -- somewhat funnily! -- the way
you're doing this might actually be better in terms of translatability in
diagnostics printing: "%qs clause" might require a different translation
when its "%s" can be "'map'" (doesn't get translated) vs. "data" (gets
translated), but remains the same when "%s" is "'map'" vs. "'create'"
etc.

Do we at all still generate 'GOMP_MAP_FORCE_*' anywhere, or should these
in fact be 'gcc_unreachable'?

Generally, I prefer if all possible 'case's are listed explicitly, and
then the 'default' (and here OpenMP-only ones, too) be 'gcc_unreachable',
so that we easily catch the case that new 'GOMP_MAP_*' get added but such
functions not updated, for example.


Grüße
 Thomas


signature.asc
Description: PGP signature


Re: Type representation in CTF and DWARF

2019-10-18 Thread Pedro Alves
On 10/17/19 7:59 PM, Nick Alcock wrote:
> On 17 Oct 2019, Richard Biener verbalised:
> 
>> On Thu, Oct 17, 2019 at 7:36 PM Nick Alcock  wrote:
>>>
>>> On 11 Oct 2019, Indu Bhagat stated:
 Compile with -g -gdwarf-like-ctf and use dwz -o   
 (using
 dwz compiled from the master branch) on the generated binaries:

 (coreutils-0.22)
  .debug_info(D1) | .debug_abbrev(D2) | .debug_str(D4) | .ctf 
 (uncompressed) | ratio (.ctf/(D1+D2+0.5*D4))
 ls   30616   |1136   |21098   | 26240  
  | 0.62
 pwd  10734   |788|10433   | 13929  
  | 0.83
 groups 10706 |811|10249   | 13378  
  | 0.80

 (emacs-26.3)
  .debug_info(D1) | .debug_abbrev(D2) | .debug_str(D4) | .ctf 
 (uncompressed) | ratio (.ctf/(D1+D2+0.5*D4))
 emacs-26.3.1 674657  |6402   |   273963   |   273910   
  | 0.33
>>
>> Btw, for a fair comparison you have to remove all DW_TAG_subroutine
>> children as well since CTF doesn't represent scopes or local variables
>> at all (nor types only used by locals). It seems CTF only represents
>> function entry points.
> 
> Good point: I'll have to hack up a DWARF trimmer to do this comparison
> properly, I think. (Though CTF does represent global variables,
> including file-scope statics.)

Wouldn't it be possible to extend the -gdwarf-like-ctf hack to skip
emitting those things?

> 
> In most cases local types etc are a fairly small contributor to the
> total volume -- but macros can contribute a lot in some codebases. (The
> Linux kernel's READ_ONCE macro is one I've personally been bitten by in
> the past, with a new local struct in every use. GCC doesn't deduplicate
> any of those so the resulting bloat from tens of thousands of instances
> of this identical structure is quite incredible...)
> 

Sounds like something that would be beneficial to do with DWARF too.

Thanks,
Pedro Alves


Re: Type representation in CTF and DWARF

2019-10-18 Thread Pedro Alves
On 10/17/19 6:36 PM, Nick Alcock wrote:
> A side note here: the sizes given above are uncompressed sizes, but in
> the real world CTF is almost always compressed: the threshold for
> compression is in theory customizable but at the moment is hardwired at
> 4KiB-uncompressed in the linker. I usually see compression ratios of
> roughly 3 or 4 to 1: e.g. I just tried it with a randomly chosen binary,
> /usr/lib/libgtk-3.so.0.2404.3, and got these sizes:

DWARF can be compressed too, with --compress-debug-sections.

Thanks,
Pedro Alves


[PATCH] PR libstdc++/92143 adjust for OS X aligned_alloc behaviour

2019-10-18 Thread Jonathan Wakely

OS X 10.15 adds aligned_alloc but it has the same restriction as the AIX
version, namely that alignments smaller than sizeof(void*) are not
supported.

PR libstdc++/92143
* libsupc++/new_opa.cc (operator new) [__APPLE__]: Increase alignment
to at least sizeof(void*).

Tested powerpc64le-linux, committed to trunk.

commit 356a59f69de9896d5b517cb28790da4335fd758f
Author: Jonathan Wakely 
Date:   Fri Oct 18 11:45:20 2019 +0100

PR libstdc++/92143 adjust for OS X aligned_alloc behaviour

OS X 10.15 adds aligned_alloc but it has the same restriction as the AIX
version, namely that alignments smaller than sizeof(void*) are not
supported.

PR libstdc++/92143
* libsupc++/new_opa.cc (operator new) [__APPLE__]: Increase 
alignment
to at least sizeof(void*).

diff --git a/libstdc++-v3/libsupc++/new_opa.cc 
b/libstdc++-v3/libsupc++/new_opa.cc
index aa5d2e14455..80eb343a1c8 100644
--- a/libstdc++-v3/libsupc++/new_opa.cc
+++ b/libstdc++-v3/libsupc++/new_opa.cc
@@ -108,9 +108,10 @@ operator new (std::size_t sz, std::align_val_t al)
 sz = 1;
 
 #if _GLIBCXX_HAVE_ALIGNED_ALLOC
-# ifdef _AIX
+# if defined _AIX || defined __APPLE__
   /* AIX 7.2.0.0 aligned_alloc incorrectly has posix_memalign's requirement
-   * that alignment is a multiple of sizeof(void*).  */
+   * that alignment is a multiple of sizeof(void*).
+   * OS X 10.15 has the same requirement.  */
   if (align < sizeof(void*))
 align = sizeof(void*);
 # endif


Re: [PATCH] Define [range.cmp] comparisons for C++20

2019-10-18 Thread Jonathan Wakely

On 17/10/19 16:40 +0100, Jonathan Wakely wrote:

Define std::identity, std::ranges::equal_to, std::ranges::not_equal_to,
std::ranges::greater, std::ranges::less, std::ranges::greater_equal and
std::ranges::less_equal.

* include/Makefile.am: Add new header.
* include/Makefile.in: Regenerate.
* include/bits/range_cmp.h: New header for C++20 function objects.
* include/std/functional: Include new header.
* testsuite/20_util/function_objects/identity/1.cc: New test.
* testsuite/20_util/function_objects/range.cmp/equal_to.cc: New test.
* testsuite/20_util/function_objects/range.cmp/greater.cc: New test.
* testsuite/20_util/function_objects/range.cmp/greater_equal.cc: New
test.
* testsuite/20_util/function_objects/range.cmp/less.cc: New test.
* testsuite/20_util/function_objects/range.cmp/less_equal.cc: New test.
* testsuite/20_util/function_objects/range.cmp/not_equal_to.cc: New
test.


This removes the dependency on std::less, so that ranges::less doesn't
need to instantiate another template, and  doesn't
need to include . Various components in 
and  require ranges::less but don't necessarily need to depend
on everything in , so this removes unnecessary
coupling between the new ranges world and the old STL world.

Tested powerpc64le-linux, committed to trunk.

commit cc43d4d492ecc64cc3aac8b47aae759942d9e57f
Author: Jonathan Wakely 
Date:   Thu Oct 17 18:56:46 2019 +0100

Implement std::ranges::less without std::less

* include/bits/range_cmp.h (ranges::less::operator()): Inline the
logic from std::less::operator() to remove the dependency on it.

diff --git a/libstdc++-v3/include/bits/range_cmp.h b/libstdc++-v3/include/bits/range_cmp.h
index 3e5bb8847ab..a77fd5274b9 100644
--- a/libstdc++-v3/include/bits/range_cmp.h
+++ b/libstdc++-v3/include/bits/range_cmp.h
@@ -121,10 +121,19 @@ namespace ranges
   noexcept(noexcept(std::declval<_Tp>() < std::declval<_Up>()))
   {
 	if constexpr (__detail::__less_builtin_ptr_cmp<_Tp, _Up>)
-	  return std::less{}(
-	  static_cast(std::forward<_Tp>(__t)),
+	  {
+#ifdef __cpp_lib_is_constant_evaluated
+	if (std::is_constant_evaluated())
+	  return __t < __u;
+#endif
+	auto __x = reinterpret_cast<__UINTPTR_TYPE__>(
+	  static_cast(std::forward<_Tp>(__t)));
+	auto __y = reinterpret_cast<__UINTPTR_TYPE__>(
 	  static_cast(std::forward<_Up>(__u)));
-	return std::forward<_Tp>(__t) < std::forward<_Up>(__u);
+	return __x < __y;
+	  }
+	else
+	  return std::forward<_Tp>(__t) < std::forward<_Up>(__u);
   }
 
 using is_transparent = __is_transparent;


Re: [PATCH][Arm] Fix multilibs for Armv7-R

2019-10-18 Thread Richard Earnshaw (lists)

On 18/10/2019 11:53, Andre Vieira (lists) wrote:

Hi

This patch maps multilibs using -march=armv7-r+vfpv3-d16-fp16 and
-march=armv7-r+vfpv3-d16-fp16+idiv to v7+fp.  This patch also adds a new
multilib for armv7-r+fp.sp and maps -march=armv7-r+fp.sp+idiv,
-march=armv7-r+vfpv3xd-fp16 and -march=armv7-r+vfpv3xd-fp16+idiv to it.

This solves issues encountered when trying to link for
-mcpu=cortex-r8 -mfloat-abi=hard.

Built arm-none-eabi and compared testsuite run of 
-march=armv7e-m+fp/-mfloat-abi=hard to 
-march=armv7-r+fp.sp/-mfloat-abi=hard which looked alright.


Is this OK for trunk?

gcc/ChangeLog:
2019-10-18  Andre Vieira  

     * config/arm/t-multilib: Add new multilib variants and new
     mappings.

gcc/testsuite/ChangeLog:
2019-10-18  Andre Vieira  

     * gcc.target/arm/multilib.exp: Add extra tests.


Cheers,
Andre


+s-mlib: $(srcdir)/config/arm/t-multilib $(srcdir)/config/arm/t-aprofile 
$(srcdir)/config/arm/t-rmprofile


Needs to be mentioned in the ChangeLog

+MULTILIB_MATCHES	+= $(foreach ARCH,+fp +fp+idiv +vfpv3-d16-fp16 
+vfpv3-d16-fp16+idiv, \

+march?armv7+fp=march?armv7-r$(ARCH))

The list here is the DP dual of v7_r_sp_variants, so it would make more 
sense to define v7_r_dp_variants and use it here.


+   {-march=armv7-r+fp -mfpu=auto -mfloat-abi=softfp} "thumb/v7+fp/softfp"
+   {-march=armv7-r+fp -mfpu=auto -mfloat-abi=hard} "thumb/v7+fp/hard"
+	{-march=armv7-r+fp+idiv -mfpu=auto -mfloat-abi=softfp} 
"thumb/v7+fp/softfp"

+   {-march=armv7-r+fp+idiv -mfpu=auto -mfloat-abi=hard} "thumb/v7+fp/hard"
+	{-march=armv7-r+vfpv3-d16-fp16 -mfpu=auto -mfloat-abi=softfp} 
"thumb/v7+fp/softfp"
+	{-march=armv7-r+vfpv3-d16-fp16 -mfpu=auto -mfloat-abi=hard} 
"thumb/v7+fp/hard"
+	{-march=armv7-r+vfpv3-d16-fp16+idiv -mfpu=auto -mfloat-abi=softfp} 
"thumb/v7+fp/softfp"
+	{-march=armv7-r+vfpv3-d16-fp16+idiv -mfpu=auto -mfloat-abi=hard} 
"thumb/v7+fp/hard"
+{-march=armv7-r+fp.sp -mfpu=auto -mfloat-abi=softfp} 
"thumb/v7-r+fp.sp/softfp"
+{-march=armv7-r+fp.sp -mfpu=auto -mfloat-abi=hard} 
"thumb/v7-r+fp.sp/hard"
+{-march=armv7-r+fp.sp+idiv -mfpu=auto -mfloat-abi=softfp} 
"thumb/v7-r+fp.sp/softfp"
+{-march=armv7-r+fp.sp+idiv -mfpu=auto -mfloat-abi=hard} 
"thumb/v7-r+fp.sp/hard"
+{-march=armv7-r+vfpv3xd -mfpu=auto -mfloat-abi=softfp} 
"thumb/v7-r+fp.sp/softfp"
+{-march=armv7-r+vfpv3xd -mfpu=auto -mfloat-abi=hard} 
"thumb/v7-r+fp.sp/hard"
+{-march=armv7-r+vfpv3xd+idiv -mfpu=auto -mfloat-abi=softfp} 
"thumb/v7-r+fp.sp/softfp"
+{-march=armv7-r+vfpv3xd+idiv -mfpu=auto -mfloat-abi=hard} 
"thumb/v7-r+fp.sp/hard"
+{-march=armv7-r+vfpv3xd-fp16+idiv -mfpu=auto 
-mfloat-abi=softfp} "thumb/v7-r+fp.sp/softfp"
+{-march=armv7-r+vfpv3xd-fp16+idiv -mfpu=auto -mfloat-abi=hard} 
"thumb/v7-r+fp.sp/hard"


8 spaces -> 1 tab.



OK with those changes.

R.


Re: [PATCH] Relax integer condition reduction, simplify vect_is_simple_reduction

2019-10-18 Thread Richard Biener
On Fri, 18 Oct 2019, Christophe Lyon wrote:

> On Wed, 16 Oct 2019 at 15:09, Richard Biener  wrote:
> >
> >
> > It happens we cannot have different typed data and index for
> > integer condition reductions right now, for whatever reason.
> > The following makes that work, even for double data and integer index.
> > There's hope this enables some relevant amount of extra vectorization.
> >
> > Actually this is fallout from simplifying vect_is_simple_reduction
> > down to SSA cycle detection and moving reduction validity / handling
> > checks to vectorizable_reduction (thus a single place).
> >
> > I've decided to take an intermediate step here as I enable more
> > vectorization.  Which also needed the vect_transform_stmt change.
> >
> > Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.
> >
> > Richard.
> >
> > * tree-vect-loop.c (vect_valid_reduction_input_p): Remove.
> > (vect_is_simple_reduction): Delay checking to
> > vectorizable_reduction and relax the checking.
> > (vectorizable_reduction): Check we have a simple use.  Check
> > for bogus condition reductions.
> > * tree-vect-stmts.c (vect_transform_stmt): Make sure we
> > are looking at the last stmt in a pattern sequence when
> > filling in backedge PHI values.
> >
> > * gcc.dg/vect/vect-cond-reduc-3.c: New testcase.
> > * gcc.dg/vect/vect-cond-reduc-4.c: Likewise.
> >
> 
> Hi Richard,
> 
> The new test vect-cond-reduc-3.c fails on arm*linux-gnueabihf when
> configured --with-fpu neon-*:
> FAIL: gcc.dg/vect/vect-cond-reduc-3.c -flto -ffat-lto-objects
> scan-tree-dump-times vect "LOOP VECTORIZED" 2
> FAIL: gcc.dg/vect/vect-cond-reduc-3.c -flto -ffat-lto-objects
> scan-tree-dump-times vect "condition expression based on integer
> induction." 2
> FAIL: gcc.dg/vect/vect-cond-reduc-3.c scan-tree-dump-times vect "LOOP
> VECTORIZED" 2
> FAIL: gcc.dg/vect/vect-cond-reduc-3.c scan-tree-dump-times vect
> "condition expression based on integer induction." 2
> 
> vect_float is true in such cases, so is that a vectorization failure
> or is that expected on this target and the test should be disabled?

Hmm, maybe it needs vect_cond_mixed, arm doesnt have it but aarch64 does.

Richard.

> Thanks,
> 
> Christophe
> 
> 
> > diff --git a/gcc/testsuite/gcc.dg/vect/vect-cond-reduc-3.c 
> > b/gcc/testsuite/gcc.dg/vect/vect-cond-reduc-3.c
> > new file mode 100644
> > index 000..a5b3849a8c3
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.dg/vect/vect-cond-reduc-3.c
> > @@ -0,0 +1,45 @@
> > +/* { dg-require-effective-target vect_condition } */
> > +/* { dg-require-effective-target vect_float } */
> > +
> > +#include "tree-vect.h"
> > +
> > +extern void abort (void) __attribute__ ((noreturn));
> > +
> > +#define N 27
> > +
> > +/* Condition reduction with different types.  */
> > +
> > +int
> > +condition_reduction (float *a, float min_v)
> > +{
> > +  int last = 0;
> > +
> > +  for (int i = 0; i < N; i++)
> > +if (a[i] < min_v)
> > +  last = i;
> > +
> > +  return last;
> > +}
> > +
> > +int
> > +main (void)
> > +{
> > +  float a[N] = {
> > +  11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
> > +  1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
> > +  21, 22, 23, 24, 25, 26, 27
> > +  };
> > +
> > +  check_vect ();
> > +
> > +  int ret = condition_reduction (a, 10);
> > +  if (ret != 18)
> > +abort ();
> > +
> > +  return 0;
> > +}
> > +
> > +/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */
> > +/* { dg-final { scan-tree-dump-times "optimizing condition reduction with 
> > FOLD_EXTRACT_LAST" 4 "vect" { target vect_fold_extract_last } } } */
> > +/* { dg-final { scan-tree-dump-times "condition expression based on 
> > integer induction." 2 "vect" { target { ! vect_fold_extract_last } } } } */
> > +
> > diff --git a/gcc/testsuite/gcc.dg/vect/vect-cond-reduc-4.c 
> > b/gcc/testsuite/gcc.dg/vect/vect-cond-reduc-4.c
> > new file mode 100644
> > index 000..6b6d17fb93c
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.dg/vect/vect-cond-reduc-4.c
> > @@ -0,0 +1,45 @@
> > +/* { dg-require-effective-target vect_condition } */
> > +/* { dg-require-effective-target vect_double } */
> > +
> > +#include "tree-vect.h"
> > +
> > +extern void abort (void) __attribute__ ((noreturn));
> > +
> > +#define N 27
> > +
> > +/* Condition reduction with different types.  */
> > +
> > +int
> > +condition_reduction (double *a, double min_v)
> > +{
> > +  int last = 0;
> > +
> > +  for (int i = 0; i < N; i++)
> > +if (a[i] < min_v)
> > +  last = i;
> > +
> > +  return last;
> > +}
> > +
> > +int
> > +main (void)
> > +{
> > +  double a[N] = {
> > +  11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
> > +  1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
> > +  21, 22, 23, 24, 25, 26, 27
> > +  };
> > +
> > +  check_vect ();
> > +
> > +  int ret = condition_reduction (a, 10);
> > +  if (ret != 18)
> > +abort ();
> > +
> > +  return 0;
> > +}
> > +
> > +/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 

[PATCH][Arm] Fix multilibs for Armv7-R

2019-10-18 Thread Andre Vieira (lists)

Hi

This patch maps multilibs using -march=armv7-r+vfpv3-d16-fp16 and
-march=armv7-r+vfpv3-d16-fp16+idiv to v7+fp.  This patch also adds a new
multilib for armv7-r+fp.sp and maps -march=armv7-r+fp.sp+idiv,
-march=armv7-r+vfpv3xd-fp16 and -march=armv7-r+vfpv3xd-fp16+idiv to it.

This solves issues encountered when trying to link for
-mcpu=cortex-r8 -mfloat-abi=hard.

Built arm-none-eabi and compared testsuite run of 
-march=armv7e-m+fp/-mfloat-abi=hard to 
-march=armv7-r+fp.sp/-mfloat-abi=hard which looked alright.


Is this OK for trunk?

gcc/ChangeLog:
2019-10-18  Andre Vieira  

* config/arm/t-multilib: Add new multilib variants and new
mappings.

gcc/testsuite/ChangeLog:
2019-10-18  Andre Vieira  

* gcc.target/arm/multilib.exp: Add extra tests.


Cheers,
Andre
diff --git a/gcc/config/arm/t-multilib b/gcc/config/arm/t-multilib
index 08526302283eea03e4a8f22a2a049e85bd7bb6af..84ba5c9d32c839851ce9d75572e43c269f19722d 100644
--- a/gcc/config/arm/t-multilib
+++ b/gcc/config/arm/t-multilib
@@ -24,6 +24,8 @@
 # values during the configure step.  We enforce this during the
 # top-level configury.
 
+s-mlib: $(srcdir)/config/arm/t-multilib $(srcdir)/config/arm/t-aprofile $(srcdir)/config/arm/t-rmprofile
+
 MULTILIB_OPTIONS =
 MULTILIB_DIRNAMES=
 MULTILIB_EXCEPTIONS  =
@@ -63,6 +65,7 @@ all_early_arch		:= armv5tej armv6 armv6j armv6k armv6z armv6kz \
 v7_a_arch_variants	:= $(call all_feat_combs, mp sec)
 v7_a_nosimd_variants	:= +fp +vfpv3 +vfpv3-d16-fp16 +vfpv3-fp16 +vfpv4-d16 +vfpv4
 v7_a_simd_variants	:= +simd +neon-fp16 +neon-vfpv4
+v7_r_sp_variants	:= +fp.sp +fp.sp+idiv +vfpv3xd-fp16 +vfpv3xd-fp16+idiv
 v7ve_nosimd_variants	:= +vfpv3-d16 +vfpv3 +vfpv3-d16-fp16 +vfpv3-fp16 +fp +vfpv4
 v7ve_vfpv3_simd_variants := +neon +neon-fp16
 v7ve_vfpv4_simd_variants := +simd
@@ -86,8 +89,8 @@ SEP := $(and $(HAS_APROFILE),$(HAS_RMPROFILE),/)
 MULTILIB_OPTIONS	+= marm/mthumb
 MULTILIB_DIRNAMES	+= arm thumb
 
-MULTILIB_OPTIONS	+= march=armv5te+fp/march=armv7/march=armv7+fp/$(MULTI_ARCH_OPTS_A)$(SEP)$(MULTI_ARCH_OPTS_RM)
-MULTILIB_DIRNAMES	+= v5te v7 v7+fp $(MULTI_ARCH_DIRS_A) $(MULTI_ARCH_DIRS_RM)
+MULTILIB_OPTIONS	+= march=armv5te+fp/march=armv7/march=armv7+fp/march=armv7-r+fp.sp/$(MULTI_ARCH_OPTS_A)$(SEP)$(MULTI_ARCH_OPTS_RM)
+MULTILIB_DIRNAMES	+= v5te v7 v7+fp v7-r+fp.sp $(MULTI_ARCH_DIRS_A) $(MULTI_ARCH_DIRS_RM)
 
 MULTILIB_OPTIONS	+= mfloat-abi=soft/mfloat-abi=softfp/mfloat-abi=hard
 MULTILIB_DIRNAMES	+= nofp softfp hard
@@ -100,22 +103,31 @@ MULTILIB_REQUIRED	+= mthumb/march=armv7/mfloat-abi=soft
 MULTILIB_REQUIRED	+= mthumb/march=armv7+fp/mfloat-abi=softfp
 MULTILIB_REQUIRED	+= mthumb/march=armv7+fp/mfloat-abi=hard
 
-# Map v7-r down onto common v7 code.
+MULTILIB_REQUIRED	+= mthumb/march=armv7-r+fp.sp/mfloat-abi=softfp
+MULTILIB_REQUIRED	+= mthumb/march=armv7-r+fp.sp/mfloat-abi=hard
+
+# Map v7-r with double precision down onto common v7 code.
 MULTILIB_MATCHES	+= march?armv7=march?armv7-r
 MULTILIB_MATCHES	+= march?armv7=march?armv7-r+idiv
-MULTILIB_MATCHES	+= march?armv7+fp=march?armv7-r+fp
-MULTILIB_MATCHES	+= march?armv7+fp=march?armv7-r+fp+idiv
+MULTILIB_MATCHES	+= $(foreach ARCH,+fp +fp+idiv +vfpv3-d16-fp16 +vfpv3-d16-fp16+idiv, \
+			 march?armv7+fp=march?armv7-r$(ARCH))
+
+# Map v7-r single precision variants to v7-r with single precision.
+MULTILIB_MATCHES	+= $(foreach ARCH, \
+			 $(filter-out +fp.sp, $(v7_r_sp_variants)), \
+			 march?armv7-r+fp.sp=march?armv7-r$(ARCH))
 
 MULTILIB_MATCHES	+= $(foreach ARCH, $(all_early_arch), \
 			 march?armv5te+fp=march?$(ARCH)+fp)
-# Map v8-r down onto common v7 code.
+# Map v8-r down onto common v7 code or v7-r sp.
 MULTILIB_MATCHES	+= march?armv7=march?armv8-r
 MULTILIB_MATCHES	+= $(foreach ARCH, $(v8_r_nosimd_variants), \
 			 march?armv7=march?armv8-r$(ARCH))
 MULTILIB_MATCHES	+= $(foreach ARCH,+simd +crypto, \
 			 march?armv7+fp=march?armv8-r$(ARCH) \
 			 march?armv7+fp=march?armv8-r+crc$(ARCH))
-
+MULTILIB_MATCHES	+= march?armv7-r+fp.sp=march?armv8-r+fp.sp
+MULTILIB_MATCHES	+= march?armv7-r+fp.sp=march?armv8-r+crc+fp.sp
 
 ifeq (,$(HAS_APROFILE))
 # Map all v7-a
@@ -177,7 +189,7 @@ MULTILIB_MATCHES	+= $(foreach ARCH, $(v8_5_a_simd_variants), \
 MULTILIB_REUSE		+= mthumb/march.armv7/mfloat-abi.soft=marm/march.armv7/mfloat-abi.soft
 
 MULTILIB_REUSE		+= $(foreach ABI, hard softfp, \
-			 $(foreach ARCH, armv7+fp, \
+			 $(foreach ARCH, armv7+fp armv7-r+fp\.sp, \
 			   mthumb/march.$(ARCH)/mfloat-abi.$(ABI)=marm/march.$(ARCH)/mfloat-abi.$(ABI)))
 
 # Softfp but no FP, use the soft-float libraries.
diff --git a/gcc/testsuite/gcc.target/arm/multilib.exp b/gcc/testsuite/gcc.target/arm/multilib.exp
index d82306ed630f2df0645ccaa43ba1f9dd3d5c72ed..7d8fc3a57e9bb6e568e9e04224535487c8c72289 100644
--- a/gcc/testsuite/gcc.target/arm/multilib.exp
+++ b/gcc/testsuite/gcc.target/arm/multilib.exp
@@ -753,6 +753,28 @@ if {[multilib_config "rmprofile"] } {
 	{-march=armv8-m.main+fp.dp -mfpu=fpv5-d16 

Re: [PATCH] Move jump threading before reload

2019-10-18 Thread Segher Boessenkool
On Fri, Oct 18, 2019 at 11:06:45AM +0200, Ilya Leoshkevich wrote:
> Bootstrapped and regtested on x86_64-redhat-linux, s390x-redhat-linux
> and ppc64le-redhat-linux.  The offending patch is in gcc-9_1_0-release
> and gcc-9_2_0-release - do I need to backport this fix to gcc-9-branch?

It is a regression on 9 (or so I assume), so yes please.

>   PR rtl-optimization/92007
>   * cfgcleanup.c (thread_jump): Add an assertion that we don't
>   call it after reload if hot/cold partitioning has been done.
>   (class pass_postreload_jump): Rename to
>   pass_jump_after_combine.

This fits on one line just fine.

>   (make_pass_postreload_jump): Rename to
>   make_pass_jump_after_combine.
>   * passes.def(pass_postreload_jump): Move before reload, rename

Space before (.

> --- a/gcc/cfgcleanup.c
> +++ b/gcc/cfgcleanup.c
> @@ -259,6 +259,10 @@ thread_jump (edge e, basic_block b)
>bool failed = false;
>reg_set_iterator rsi;
>  
> +  /* Jump threading may cause fixup_partitions to introduce new crossing 
> edges,
> + which is not allowed after reload.  */
> +  gcc_checking_assert (!reload_completed || !crtl->has_bb_partition);

Thanks for the assert, that will help prevent people from running into
this again.

The patch looks fine to me, but I'm not a global reviewer :-)


Segher


[Patch][Fortran/OpenMP] Don't create "alloc:" for 'target exit data'

2019-10-18 Thread Tobias Burnus

Currently, one has for
  !$omp target exit data map(delete:x)
in the original dump:
  #pragma omp target exit data map(delete:*x) map(alloc:x [pointer 
assign, bias: 0])


The "alloc:" not only does not make sense but also gives run-time 
messages like:

libgomp: GOMP_target_enter_exit_data unhandled kind 0x04

[Depending on the data type, in gfc_trans_omp_clauses's OMP_LIST_MAP, 
add map clauses of type GOMP_MAP_POINTER and/or GOMP_MAP_TO_PSET.]


That's for release:/delete:. However, for 'target exit data' 
(GOMP_target_enter_exit_data) the same issue occurs for "from:"/"always, 
from:".  But "from:" implies "alloc:". – While "alloc:" does not make 
sense for "target exit data" or "update", for "target" or "target data" 
it surely matters. Hence, I only exclude "from:" for exit data and update.


See attached patch. I have additionally Fortran-fied 
libgomp.c/target-20.c to have at least one 'enter/exit target data' test 
case for Fortran.


Build + regtested on x86_64-gnu-linux w/o offloading. And I have tested 
the new test case with nvptx.


Tobias

 	gcc/fortran/
	* trans-openmp.c (gfc_trans_omp_clauses): Do not create
	map(alloc:) for map(delete:/release:) and for
	(from:/always,from:) only if new arg require_from_alloc is true,
	which is the default.
	(gfc_trans_omp_target_exit_data, gfc_trans_omp_target_update):
	Call it with require_from_alloc = false.

	libgomp/
	* testsuite/libgomp.fortran/target9.f90: New.

diff --git a/gcc/fortran/trans-openmp.c b/gcc/fortran/trans-openmp.c
index dad11a24430..f890629c73d 100644
--- a/gcc/fortran/trans-openmp.c
+++ b/gcc/fortran/trans-openmp.c
@@ -1852,7 +1852,8 @@ static vec *doacross_steps;
 
 static tree
 gfc_trans_omp_clauses (stmtblock_t *block, gfc_omp_clauses *clauses,
-		   locus where, bool declare_simd = false)
+		   locus where, bool declare_simd = false,
+		   bool require_from_alloc = true)
 {
   tree omp_clauses = NULL_TREE, chunk_size, c;
   int list, ifc;
@@ -2163,6 +2164,16 @@ gfc_trans_omp_clauses (stmtblock_t *block, gfc_omp_clauses *clauses,
 	  if (!n->sym->attr.referenced)
 		continue;
 
+	  /* map(alloc:) etc. is not needed for delete/release
+		 For 'from:', it is needed when setting up the environment
+		 but not for updating or copying out of the data.  */
+	  bool no_extra_pointer = n->u.map_op == OMP_MAP_DELETE
+  || n->u.map_op == OMP_MAP_RELEASE
+  || (!require_from_alloc
+	  && (n->u.map_op == OMP_MAP_FROM
+	  || n->u.map_op
+		 == OMP_MAP_ALWAYS_FROM));
+
 	  tree node = build_omp_clause (input_location, OMP_CLAUSE_MAP);
 	  tree node2 = NULL_TREE;
 	  tree node3 = NULL_TREE;
@@ -2172,7 +2183,8 @@ gfc_trans_omp_clauses (stmtblock_t *block, gfc_omp_clauses *clauses,
 		TREE_ADDRESSABLE (decl) = 1;
 	  if (n->expr == NULL || n->expr->ref->u.ar.type == AR_FULL)
 		{
-		  if (POINTER_TYPE_P (TREE_TYPE (decl))
+		  if (!no_extra_pointer
+		  && POINTER_TYPE_P (TREE_TYPE (decl))
 		  && (gfc_omp_privatize_by_reference (decl)
 			  || GFC_DECL_GET_SCALAR_POINTER (decl)
 			  || GFC_DECL_GET_SCALAR_ALLOCATABLE (decl)
@@ -2208,17 +2220,20 @@ gfc_trans_omp_clauses (stmtblock_t *block, gfc_omp_clauses *clauses,
 	  ptr);
 		  ptr = build_fold_indirect_ref (ptr);
 		  OMP_CLAUSE_DECL (node) = ptr;
-		  node2 = build_omp_clause (input_location,
-		OMP_CLAUSE_MAP);
-		  OMP_CLAUSE_SET_MAP_KIND (node2, GOMP_MAP_TO_PSET);
-		  OMP_CLAUSE_DECL (node2) = decl;
-		  OMP_CLAUSE_SIZE (node2) = TYPE_SIZE_UNIT (type);
-		  node3 = build_omp_clause (input_location,
-		OMP_CLAUSE_MAP);
-		  OMP_CLAUSE_SET_MAP_KIND (node3, GOMP_MAP_POINTER);
-		  OMP_CLAUSE_DECL (node3)
-			= gfc_conv_descriptor_data_get (decl);
-		  OMP_CLAUSE_SIZE (node3) = size_int (0);
+		  if (!no_extra_pointer)
+			{
+			  node2 = build_omp_clause (input_location,
+		OMP_CLAUSE_MAP);
+			  OMP_CLAUSE_SET_MAP_KIND (node2, GOMP_MAP_TO_PSET);
+			  OMP_CLAUSE_DECL (node2) = decl;
+			  OMP_CLAUSE_SIZE (node2) = TYPE_SIZE_UNIT (type);
+			  node3 = build_omp_clause (input_location,
+		OMP_CLAUSE_MAP);
+			  OMP_CLAUSE_SET_MAP_KIND (node3, GOMP_MAP_POINTER);
+			  OMP_CLAUSE_DECL (node3)
+= gfc_conv_descriptor_data_get (decl);
+			  OMP_CLAUSE_SIZE (node3) = size_int (0);
+			}
 
 		  /* We have to check for n->sym->attr.dimension because
 			 of scalar coarrays.  */
@@ -2302,6 +2317,9 @@ gfc_trans_omp_clauses (stmtblock_t *block, gfc_omp_clauses *clauses,
   ptr);
 		  OMP_CLAUSE_DECL (node) = build_fold_indirect_ref (ptr);
 
+		  if (no_extra_pointer)
+		goto skip_extra_map_pointer;
+
 		  if (POINTER_TYPE_P (TREE_TYPE (decl))
 		  && GFC_DESCRIPTOR_TYPE_P (TREE_TYPE (TREE_TYPE (decl
 		{
@@ -2346,6 +2364,9 @@ gfc_trans_omp_clauses (stmtblock_t *block, gfc_omp_clauses *clauses,
 		  OMP_CLAUSE_SIZE (node3)
 		= fold_build2 (MINUS_EXPR, sizetype, ptr, ptr2);
 		}
+
+	  

[PATCH] Move jump threading before reload

2019-10-18 Thread Ilya Leoshkevich
Bootstrapped and regtested on x86_64-redhat-linux, s390x-redhat-linux
and ppc64le-redhat-linux.  The offending patch is in gcc-9_1_0-release
and gcc-9_2_0-release - do I need to backport this fix to gcc-9-branch?


r266734 has introduced a new instance of jump threading pass in order to
take advantage of opportunities that combine opens up.  It was perceived
back then that it was beneficial to delay it after reload, since that
might produce even more such opportunities.

Unfortunately jump threading interferes with hot/cold partitioning.  In
the code from PR92007, it converts the following

  +-- 2/HOT +
  | |
  v v
3/HOT --> 5/HOT --> 8/HOT --> 11/COLD --> 6/HOT --EH--> 16/HOT
|   ^
|   |
+---+

into the following:

  +-- 2/HOT --+
  |   |
  v   v
3/HOT --> 8/HOT --> 11/COLD --> 6/COLD --EH--> 16/HOT

This makes hot bb 6 dominated by cold bb 11, and because of this
fixup_partitions makes bb 6 cold as well, which in turn makes EH edge
6->16 a crossing one.  Not only can't we have crossing EH edges, we are
also not allowed to introduce new crossing edges after reload in
general, since it might require extra registers on some targets.

Therefore, move the jump threading pass between combine and hot/cold
partitioning.  Building SPEC 2006 and SPEC 2017 with the old and the new
code indicates that:

* When doing jump threading right after reload, 3889 edges are threaded.
* When doing jump threading right after combine, 3918 edges are
  threaded.

This means this change will not introduce performance regressions.

gcc/ChangeLog:

2019-10-17  Ilya Leoshkevich  

PR rtl-optimization/92007
* cfgcleanup.c (thread_jump): Add an assertion that we don't
call it after reload if hot/cold partitioning has been done.
(class pass_postreload_jump): Rename to
pass_jump_after_combine.
(make_pass_postreload_jump): Rename to
make_pass_jump_after_combine.
* passes.def(pass_postreload_jump): Move before reload, rename
to pass_jump_after_combine.
* tree-pass.h (make_pass_postreload_jump): Rename to
make_pass_jump_after_combine.

gcc/testsuite/ChangeLog:

2019-10-17  Ilya Leoshkevich  

PR rtl-optimization/92007
* g++.dg/opt/pr92007.C: New test (from Arseny Solokha).
---
 gcc/cfgcleanup.c   | 22 +++-
 gcc/passes.def |  2 +-
 gcc/testsuite/g++.dg/opt/pr92007.C | 32 ++
 gcc/tree-pass.h|  2 +-
 4 files changed, 47 insertions(+), 11 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/opt/pr92007.C

diff --git a/gcc/cfgcleanup.c b/gcc/cfgcleanup.c
index ced7e0a4283..835f7d79ea4 100644
--- a/gcc/cfgcleanup.c
+++ b/gcc/cfgcleanup.c
@@ -259,6 +259,10 @@ thread_jump (edge e, basic_block b)
   bool failed = false;
   reg_set_iterator rsi;
 
+  /* Jump threading may cause fixup_partitions to introduce new crossing edges,
+ which is not allowed after reload.  */
+  gcc_checking_assert (!reload_completed || !crtl->has_bb_partition);
+
   if (b->flags & BB_NONTHREADABLE_BLOCK)
 return NULL;
 
@@ -3280,10 +3284,10 @@ make_pass_jump (gcc::context *ctxt)
 
 namespace {
 
-const pass_data pass_data_postreload_jump =
+const pass_data pass_data_jump_after_combine =
 {
   RTL_PASS, /* type */
-  "postreload_jump", /* name */
+  "jump_after_combine", /* name */
   OPTGROUP_NONE, /* optinfo_flags */
   TV_JUMP, /* tv_id */
   0, /* properties_required */
@@ -3293,20 +3297,20 @@ const pass_data pass_data_postreload_jump =
   0, /* todo_flags_finish */
 };
 
-class pass_postreload_jump : public rtl_opt_pass
+class pass_jump_after_combine : public rtl_opt_pass
 {
 public:
-  pass_postreload_jump (gcc::context *ctxt)
-: rtl_opt_pass (pass_data_postreload_jump, ctxt)
+  pass_jump_after_combine (gcc::context *ctxt)
+: rtl_opt_pass (pass_data_jump_after_combine, ctxt)
   {}
 
   /* opt_pass methods: */
   virtual unsigned int execute (function *);
 
-}; // class pass_postreload_jump
+}; // class pass_jump_after_combine
 
 unsigned int
-pass_postreload_jump::execute (function *)
+pass_jump_after_combine::execute (function *)
 {
   cleanup_cfg (flag_thread_jumps ? CLEANUP_THREADING : 0);
   return 0;
@@ -3315,9 +3319,9 @@ pass_postreload_jump::execute (function *)
 } // anon namespace
 
 rtl_opt_pass *
-make_pass_postreload_jump (gcc::context *ctxt)
+make_pass_jump_after_combine (gcc::context *ctxt)
 {
-  return new pass_postreload_jump (ctxt);
+  return new pass_jump_after_combine (ctxt);
 }
 
 namespace {
diff --git a/gcc/passes.def 

Re: [SVE] PR91272

2019-10-18 Thread Richard Sandiford
Prathamesh Kulkarni  writes:
> Hi,
> The attached patch tries to fix PR91272.
> Does it look OK ?
>
> With patch, I see following failures for aarch64-sve.exp:
> FAIL: gcc.target/aarch64/sve/clastb_1.c -march=armv8.2-a+sve
> scan-assembler \\tclastb\\tw[0-9]+, p[0-7], w[0-9]+, z[0-9]+\\.s
> FAIL: gcc.target/aarch64/sve/clastb_2.c -march=armv8.2-a+sve
> scan-assembler \\tclastb\\tw[0-9]+, p[0-7]+, w[0-9]+, z[0-9]+\\.s
> FAIL: gcc.target/aarch64/sve/clastb_3.c -march=armv8.2-a+sve
> scan-assembler \\tclastb\\tw[0-9]+, p[0-7]+, w[0-9]+, z[0-9]+\\.b
> FAIL: gcc.target/aarch64/sve/clastb_5.c -march=armv8.2-a+sve
> scan-assembler \\tclastb\\tx[0-9]+, p[0-7], x[0-9]+, z[0-9]+\\.d
>
> For instance, in clastb_1.c, it now emits:
> clastb  s1, p1, s1, z0.s
> while using a fully predicated loop.
> Should I adjust the tests ?

Yeah, that's an improvement really.

> diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
> index acdd90784dc..2cad2cb94c8 100644
> --- a/gcc/tree-vect-stmts.c
> +++ b/gcc/tree-vect-stmts.c
> @@ -10016,7 +10016,8 @@ vectorizable_condition (stmt_vec_info stmt_info, 
> gimple_stmt_iterator *gsi,
>/* See whether another part of the vectorized code applies a loop
>mask to the condition, or to its inverse.  */
>  
> -  if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
> +  if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
> +   && reduction_type != EXTRACT_LAST_REDUCTION)
>   {
> scalar_cond_masked_key cond (cond_expr, ncopies);
> if (loop_vinfo->scalar_cond_masked_set.contains (cond))

The context here is:

  if (loop_vinfo->scalar_cond_masked_set.contains (cond))
{
  vec_loop_masks *masks = _VINFO_MASKS (loop_vinfo);
  loop_mask = vect_get_loop_mask (gsi, masks, ncopies, vectype, j);
}
  else
{
  bool honor_nans = HONOR_NANS (TREE_TYPE (cond.op0));
  cond.code = invert_tree_comparison (cond.code, honor_nans);
  if (loop_vinfo->scalar_cond_masked_set.contains (cond))
{
  vec_loop_masks *masks = _VINFO_MASKS (loop_vinfo);
  loop_mask = vect_get_loop_mask (gsi, masks, ncopies,
  vectype, j);
  cond_code = cond.code;
  swap_cond_operands = true;
}
}

Rather than have another instance of vect_get_loop_mask, I think
it would cleaner to use a nonnull "masks" to decide whether to apply
the loop mask:

  vec_loop_masks *masks = NULL;
  if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
{
  if (reduction_type == EXTRACT_LAST_REDUCTION
  || loop_vinfo->scalar_cond_masked_set.contains (cond))
masks = _VINFO_MASKS (loop_vinfo);
  else
{
  ...
}

Then:

> @@ -10116,6 +10117,15 @@ vectorizable_condition (stmt_vec_info stmt_info, 
> gimple_stmt_iterator *gsi,
>vec_then_clause = vec_oprnds2[i];
>vec_else_clause = vec_oprnds3[i];
>  
> +  if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
> +   && reduction_type == EXTRACT_LAST_REDUCTION)
> + {
> +   vec_loop_masks *masks = _VINFO_MASKS (loop_vinfo);
> +   unsigned vec_num = vec_oprnds0.length ();
> +   loop_mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
> +   vectype, vec_num * j + i);
> + }
> +

...do this vect_get_loop_mask under the condition of "if (masks)".

> if (swap_cond_operands)
>   std::swap (vec_then_clause, vec_else_clause);
>  
> @@ -10180,7 +10190,7 @@ vectorizable_condition (stmt_vec_info stmt_info, 
> gimple_stmt_iterator *gsi,
>vec != { 0, ... } (masked in the MASK_LOAD,
>unmasked in the VEC_COND_EXPR).  */
>  
> -   if (loop_mask)
> +   if (loop_mask && reduction_type != EXTRACT_LAST_REDUCTION)
>   {
> if (COMPARISON_CLASS_P (vec_compare))
>   {
> @@ -10220,6 +10230,16 @@ vectorizable_condition (stmt_vec_info stmt_info, 
> gimple_stmt_iterator *gsi,
> vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
> vec_compare = vec_compare_name;
>   }

The code above here:

  if (!is_gimple_val (vec_compare))
{
  tree vec_compare_name = make_ssa_name (vec_cmp_type);
  gassign *new_stmt = gimple_build_assign (vec_compare_name,
   vec_compare);
  vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
  vec_compare = vec_compare_name;
}

is doing something similar to the new COND_EXPR handling:

  if (COMPARISON_CLASS_P (vec_compare))
{
  tree tmp = 

Re: [PATCH] Relax integer condition reduction, simplify vect_is_simple_reduction

2019-10-18 Thread Christophe Lyon
On Wed, 16 Oct 2019 at 15:09, Richard Biener  wrote:
>
>
> It happens we cannot have different typed data and index for
> integer condition reductions right now, for whatever reason.
> The following makes that work, even for double data and integer index.
> There's hope this enables some relevant amount of extra vectorization.
>
> Actually this is fallout from simplifying vect_is_simple_reduction
> down to SSA cycle detection and moving reduction validity / handling
> checks to vectorizable_reduction (thus a single place).
>
> I've decided to take an intermediate step here as I enable more
> vectorization.  Which also needed the vect_transform_stmt change.
>
> Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.
>
> Richard.
>
> * tree-vect-loop.c (vect_valid_reduction_input_p): Remove.
> (vect_is_simple_reduction): Delay checking to
> vectorizable_reduction and relax the checking.
> (vectorizable_reduction): Check we have a simple use.  Check
> for bogus condition reductions.
> * tree-vect-stmts.c (vect_transform_stmt): Make sure we
> are looking at the last stmt in a pattern sequence when
> filling in backedge PHI values.
>
> * gcc.dg/vect/vect-cond-reduc-3.c: New testcase.
> * gcc.dg/vect/vect-cond-reduc-4.c: Likewise.
>

Hi Richard,

The new test vect-cond-reduc-3.c fails on arm*linux-gnueabihf when
configured --with-fpu neon-*:
FAIL: gcc.dg/vect/vect-cond-reduc-3.c -flto -ffat-lto-objects
scan-tree-dump-times vect "LOOP VECTORIZED" 2
FAIL: gcc.dg/vect/vect-cond-reduc-3.c -flto -ffat-lto-objects
scan-tree-dump-times vect "condition expression based on integer
induction." 2
FAIL: gcc.dg/vect/vect-cond-reduc-3.c scan-tree-dump-times vect "LOOP
VECTORIZED" 2
FAIL: gcc.dg/vect/vect-cond-reduc-3.c scan-tree-dump-times vect
"condition expression based on integer induction." 2

vect_float is true in such cases, so is that a vectorization failure
or is that expected on this target and the test should be disabled?

Thanks,

Christophe


> diff --git a/gcc/testsuite/gcc.dg/vect/vect-cond-reduc-3.c 
> b/gcc/testsuite/gcc.dg/vect/vect-cond-reduc-3.c
> new file mode 100644
> index 000..a5b3849a8c3
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/vect/vect-cond-reduc-3.c
> @@ -0,0 +1,45 @@
> +/* { dg-require-effective-target vect_condition } */
> +/* { dg-require-effective-target vect_float } */
> +
> +#include "tree-vect.h"
> +
> +extern void abort (void) __attribute__ ((noreturn));
> +
> +#define N 27
> +
> +/* Condition reduction with different types.  */
> +
> +int
> +condition_reduction (float *a, float min_v)
> +{
> +  int last = 0;
> +
> +  for (int i = 0; i < N; i++)
> +if (a[i] < min_v)
> +  last = i;
> +
> +  return last;
> +}
> +
> +int
> +main (void)
> +{
> +  float a[N] = {
> +  11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
> +  1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
> +  21, 22, 23, 24, 25, 26, 27
> +  };
> +
> +  check_vect ();
> +
> +  int ret = condition_reduction (a, 10);
> +  if (ret != 18)
> +abort ();
> +
> +  return 0;
> +}
> +
> +/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */
> +/* { dg-final { scan-tree-dump-times "optimizing condition reduction with 
> FOLD_EXTRACT_LAST" 4 "vect" { target vect_fold_extract_last } } } */
> +/* { dg-final { scan-tree-dump-times "condition expression based on integer 
> induction." 2 "vect" { target { ! vect_fold_extract_last } } } } */
> +
> diff --git a/gcc/testsuite/gcc.dg/vect/vect-cond-reduc-4.c 
> b/gcc/testsuite/gcc.dg/vect/vect-cond-reduc-4.c
> new file mode 100644
> index 000..6b6d17fb93c
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/vect/vect-cond-reduc-4.c
> @@ -0,0 +1,45 @@
> +/* { dg-require-effective-target vect_condition } */
> +/* { dg-require-effective-target vect_double } */
> +
> +#include "tree-vect.h"
> +
> +extern void abort (void) __attribute__ ((noreturn));
> +
> +#define N 27
> +
> +/* Condition reduction with different types.  */
> +
> +int
> +condition_reduction (double *a, double min_v)
> +{
> +  int last = 0;
> +
> +  for (int i = 0; i < N; i++)
> +if (a[i] < min_v)
> +  last = i;
> +
> +  return last;
> +}
> +
> +int
> +main (void)
> +{
> +  double a[N] = {
> +  11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
> +  1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
> +  21, 22, 23, 24, 25, 26, 27
> +  };
> +
> +  check_vect ();
> +
> +  int ret = condition_reduction (a, 10);
> +  if (ret != 18)
> +abort ();
> +
> +  return 0;
> +}
> +
> +/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */
> +/* { dg-final { scan-tree-dump-times "optimizing condition reduction with 
> FOLD_EXTRACT_LAST" 4 "vect" { target vect_fold_extract_last } } } */
> +/* { dg-final { scan-tree-dump-times "condition expression based on integer 
> induction." 2 "vect" { target { ! vect_fold_extract_last } } } } */
> +
> diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
> index 455251070d0..0530d6643b4 100644
> --- 

Re: [PATCH] Fix objsz ICE (PR tree-optimization/92056)

2019-10-18 Thread Jakub Jelinek
On Thu, Oct 17, 2019 at 06:07:37PM -0600, Martin Sebor wrote:
> On 10/17/19 1:00 AM, Jakub Jelinek wrote:
> > Hi!
> > 
> > The following bug has been introduced when cond_expr_object_size has been
> > added in 2007.  We want to treat a COND_EXPR like a PHI with 2 arguments,
> > and PHI is handled in a loop that breaks if the lhs value is unknown, and
> > then does the if (TREE_CODE (arg) == SSA_NAME) merge_object_sizes else
> > expr_object_size which is used even in places that handle just a single
> > operand (with the lhs value initialized to the opposite value of unknown
> > first).  At least expr_object_size asserts that the lhs value is not
> > unknown at the start.
> > 
> > Fixed thusly, bootstrapped/regtested on x86_64-linux and i686-linux, ok for
> > trunk?
> 
> I'm not sure the other change (r277134) it the right way to fix
> the problem with the missing initialization.  It was introduced
> with the merger of the sprintf pass.  The latter still calls
> init_object_sizes in get_destination_size.  I think the call
> should be moved from there into the new combined sprintf/strlen
> printf_strlen_execute function that also calls fini_object_sizes,
> and the one from determine_min_objsize should be removed.  I can
> take care of it unless you think it needs to stay the way it is
> now for some reason.

Why?  As I said, init_object_sizes is designed to be called multiple times
and is cheap (load + comparison + early return) if it has been already
called, so is meant to be called only when needed, rather than at the
beginning of a pass just in case something appears.  The objsz pass does the
same.  No need to allocate bitmaps/vectors if nothing will need them.

Jakub


[PATCH] [MIPS] Mark built-in functions as pure

2019-10-18 Thread Mihailo Stojanovic
Mips built-in functions are currently not marked as pure, which
invalidates pointers across built-in function calls. If a pointer is
alive across built-in call, dereferencing it before and after the call
will generate two load instructions instead of one.

This marks the built-ins as pure, which removes the unnecessary load.

Tested on mips-mti-linux-gnu.

gcc/ChangeLog:

* config/mips/mips.c (DIRECT_BUILTIN_PURE): New macro. Add a
pure qualifier to the built-in.
(MSA_BUILTIN_PURE): New macro. Add a pure qualifier to the MSA
built-ins.
(struct mips_builtin_description): Add is_pure flag.
(mips_init_builtins): Mark built-in as pure if the flag in the
corresponding mips_builtin_description struct is set.

gcc/testsuite/ChangeLog:

* gcc.target/mips/mips-builtins-pure.c: New test.
---
 gcc/config/mips/mips.c | 1306 ++--
 gcc/testsuite/gcc.target/mips/mips-builtins-pure.c |   20 +
 2 files changed, 684 insertions(+), 642 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/mips/mips-builtins-pure.c

diff --git a/gcc/config/mips/mips.c b/gcc/config/mips/mips.c
index 3a77097..e337b82 100644
--- a/gcc/config/mips/mips.c
+++ b/gcc/config/mips/mips.c
@@ -15242,6 +15242,9 @@ struct mips_builtin_description {
 
   /* Whether the function is available.  */
   unsigned int (*avail) (void);
+
+  /* Whether the function is pure.  */
+  bool is_pure;
 };
 
 AVAIL_ALL (hard_float, TARGET_HARD_FLOAT_ABI)
@@ -15273,24 +15276,33 @@ AVAIL_NON_MIPS16 (msa, TARGET_MSA)
AVAIL is the name of the availability predicate, without the leading
mips_builtin_avail_.  */
 #define MIPS_BUILTIN(INSN, COND, NAME, BUILTIN_TYPE,   \
-FUNCTION_TYPE, AVAIL)  \
+FUNCTION_TYPE, AVAIL, PURE)\
   { CODE_FOR_mips_ ## INSN, MIPS_FP_COND_ ## COND, \
 "__builtin_mips_" NAME, BUILTIN_TYPE, FUNCTION_TYPE,   \
-mips_builtin_avail_ ## AVAIL }
+mips_builtin_avail_ ## AVAIL, PURE }
 
 /* Define __builtin_mips_, which is a MIPS_BUILTIN_DIRECT function
mapped to instruction CODE_FOR_mips_,  FUNCTION_TYPE and AVAIL
are as for MIPS_BUILTIN.  */
 #define DIRECT_BUILTIN(INSN, FUNCTION_TYPE, AVAIL) \
-  MIPS_BUILTIN (INSN, f, #INSN, MIPS_BUILTIN_DIRECT, FUNCTION_TYPE, AVAIL)
+  MIPS_BUILTIN (INSN, f, #INSN, MIPS_BUILTIN_DIRECT, FUNCTION_TYPE,\
+   AVAIL, false)
+
+/* Define __builtin_mips_, which is a MIPS_BUILTIN_DIRECT pure function
+   mapped to instruction CODE_FOR_mips_,  FUNCTION_TYPE and AVAIL
+   are as for MIPS_BUILTIN.  */
+#define DIRECT_BUILTIN_PURE(INSN, FUNCTION_TYPE, AVAIL)\
+  MIPS_BUILTIN (INSN, f, #INSN, MIPS_BUILTIN_DIRECT, FUNCTION_TYPE,\
+   AVAIL, true)
 
 /* Define __builtin_mips___{s,d} functions, both of which
are subject to mips_builtin_avail_.  */
 #define CMP_SCALAR_BUILTINS(INSN, COND, AVAIL) \
   MIPS_BUILTIN (INSN ## _cond_s, COND, #INSN "_" #COND "_s",   \
-   MIPS_BUILTIN_CMP_SINGLE, MIPS_INT_FTYPE_SF_SF, AVAIL),  \
+   MIPS_BUILTIN_CMP_SINGLE, MIPS_INT_FTYPE_SF_SF, AVAIL,   \
+   false), \
   MIPS_BUILTIN (INSN ## _cond_d, COND, #INSN "_" #COND "_d",   \
-   MIPS_BUILTIN_CMP_SINGLE, MIPS_INT_FTYPE_DF_DF, AVAIL)
+   MIPS_BUILTIN_CMP_SINGLE, MIPS_INT_FTYPE_DF_DF, AVAIL, false)
 
 /* Define __builtin_mips_{any,all,upper,lower}___ps.
The lower and upper forms are subject to mips_builtin_avail_
@@ -15298,36 +15310,36 @@ AVAIL_NON_MIPS16 (msa, TARGET_MSA)
 #define CMP_PS_BUILTINS(INSN, COND, AVAIL) \
   MIPS_BUILTIN (INSN ## _cond_ps, COND, "any_" #INSN "_" #COND "_ps",  \
MIPS_BUILTIN_CMP_ANY, MIPS_INT_FTYPE_V2SF_V2SF, \
-   mips3d),\
+   mips3d, false), \
   MIPS_BUILTIN (INSN ## _cond_ps, COND, "all_" #INSN "_" #COND "_ps",  \
MIPS_BUILTIN_CMP_ALL, MIPS_INT_FTYPE_V2SF_V2SF, \
-   mips3d),\
+   mips3d, false), \
   MIPS_BUILTIN (INSN ## _cond_ps, COND, "lower_" #INSN "_" #COND "_ps",
\
MIPS_BUILTIN_CMP_LOWER, MIPS_INT_FTYPE_V2SF_V2SF,   \
-   AVAIL), \
+   AVAIL, false),  \
   MIPS_BUILTIN (INSN ## _cond_ps, COND, "upper_" #INSN "_" #COND "_ps",
\
MIPS_BUILTIN_CMP_UPPER, MIPS_INT_FTYPE_V2SF_V2SF,   \
-   AVAIL)
+   AVAIL, false)
 
 /* Define