Re:

2021-06-01 Thread Hongtao Liu via Gcc-patches
Please discard this one, sorry for disturbing.
Obviously I'm new to git send-email.

On Wed, Jun 2, 2021 at 1:40 PM liuhongt via Gcc-patches
 wrote:
>
> This is the updated patch.
>
>


-- 
BR,
Hongtao


[PATCH] Canonicalize (vec_duplicate (not A)) to (not (vec_duplicate A)).

2021-06-01 Thread liuhongt via Gcc-patches
For i386, it will enable below opt

from
notl%edi
vpbroadcastd%edi, %xmm0
vpand   %xmm1, %xmm0, %xmm0
to
vpbroadcastd%edi, %xmm0
vpandn   %xmm1, %xmm0, %xmm0

gcc/ChangeLog:

PR target/100711
* simplify-rtx.c (simplify_unary_operation_1):
Canonicalize (vec_duplicate (not A)) to
(not (vec_duplicate A)).
* doc/md.texi (Insn Canonicalizations): Document
canonicalization of vec_duplicate.

gcc/testsuite/ChangeLog:

PR target/100711
* gcc.target/i386/avx2-pr100711.c: New test.
* gcc.target/i386/avx512bw-pr100711.c: New test.
---
 gcc/doc/md.texi   |  5 ++
 gcc/simplify-rtx.c|  6 ++
 gcc/testsuite/gcc.target/i386/avx2-pr100711.c | 73 +++
 .../gcc.target/i386/avx512bw-pr100711.c   | 48 
 4 files changed, 132 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/i386/avx2-pr100711.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512bw-pr100711.c

diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index 0e65b3ae663..06b42901413 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -8297,6 +8297,11 @@ operand of @code{mult} is also a shift, then that is 
extended also.
 This transformation is only applied when it can be proven that the
 original operation had sufficient precision to prevent overflow.
 
+@cindex @code{vec_duplicate}, canonicalization of
+@item
+@code{(vec_duplicate (not @var{a}))} is converted to
+@code{(not (vec_duplicate @var{a}))}.
+
 @end itemize
 
 Further canonicalization rules are defined in the function
diff --git a/gcc/simplify-rtx.c b/gcc/simplify-rtx.c
index 04423bbd195..171fc447d50 100644
--- a/gcc/simplify-rtx.c
+++ b/gcc/simplify-rtx.c
@@ -1708,6 +1708,12 @@ simplify_context::simplify_unary_operation_1 (rtx_code 
code, machine_mode mode,
 #endif
   break;
 
+  /* Canonicalize (vec_duplicate (not A)) to (not (vec_duplicate A)).  */
+case VEC_DUPLICATE:
+  if (GET_CODE (op) == NOT)
+   return gen_rtx_NOT (mode, gen_rtx_VEC_DUPLICATE (mode, XEXP (op, 0)));
+  break;
+
 default:
   break;
 }
diff --git a/gcc/testsuite/gcc.target/i386/avx2-pr100711.c 
b/gcc/testsuite/gcc.target/i386/avx2-pr100711.c
new file mode 100644
index 000..5b144623873
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx2-pr100711.c
@@ -0,0 +1,73 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -O2" } */
+/* { dg-final { scan-assembler-times "pandn" 8 } } */
+/* { dg-final { scan-assembler-not "not\[bwlq\]" } } */
+typedef char v16qi __attribute__((vector_size(16)));
+typedef char v32qi __attribute__((vector_size(32)));
+typedef short v8hi __attribute__((vector_size(16)));
+typedef short v16hi __attribute__((vector_size(32)));
+typedef int v4si __attribute__((vector_size(16)));
+typedef int v8si __attribute__((vector_size(32)));
+typedef long long v2di __attribute__((vector_size(16)));
+typedef long long v4di __attribute__((vector_size(32)));
+
+v16qi
+f1 (char a, v16qi c)
+{
+  char b = ~a;
+  return (__extension__(v16qi) {b, b, b, b, b, b, b, b,
+b, b, b, b, b, b, b, b}) & c;
+}
+
+v32qi
+f2 (char a, v32qi c)
+{
+  char b = ~a;
+  return (__extension__(v32qi) {b, b, b, b, b, b, b, b,
+b, b, b, b, b, b, b, b,
+b, b, b, b, b, b, b, b,
+b, b, b, b, b, b, b, b}) & c;
+}
+
+v8hi
+f3 (short a, v8hi c)
+{
+  short b = ~a;
+  return (__extension__(v8hi) {b, b, b, b, b, b, b, b}) & c;
+}
+
+v16hi
+f4 (short a, v16hi c)
+{
+  short b = ~a;
+  return (__extension__(v16hi) {b, b, b, b, b, b, b, b,
+b, b, b, b, b, b, b, b}) & c;
+}
+
+v4si
+f5 (int a, v4si c)
+{
+  int b = ~a;
+  return (__extension__(v4si) {b, b, b, b}) & c;
+}
+
+v8si
+f6 (int a, v8si c)
+{
+  int b = ~a;
+  return (__extension__(v8si) {b, b, b, b, b, b, b, b}) & c;
+}
+
+v2di
+f7 (long long a, v2di c)
+{
+  long long b = ~a;
+  return (__extension__(v2di) {b, b}) & c;
+}
+
+v4di
+f8 (long long a, v4di c)
+{
+  long long b = ~a;
+  return (__extension__(v4di) {b, b, b, b}) & c;
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-pr100711.c 
b/gcc/testsuite/gcc.target/i386/avx512bw-pr100711.c
new file mode 100644
index 000..f0a103d0bc2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-pr100711.c
@@ -0,0 +1,48 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -O2" } */
+/* { dg-final { scan-assembler-times "pandn" 4 } } */
+/* { dg-final { scan-assembler-not "not\[bwlq\]" } } */
+
+typedef char v64qi __attribute__((vector_size(64)));
+typedef short v32hi __attribute__((vector_size(64)));
+typedef int v16si __attribute__((vector_size(64)));
+typedef long long v8di __attribute__((vector_size(64)));
+
+v64qi
+f1 (char a, v64qi c)
+{
+  char b = ~a;
+  return (__extension__(v64qi) {b, b, b, b, b, b, b, b,
+  

[PATCH] Canonicalize (vec_duplicate (not A)) to (not (vec_duplicate A)).

2021-06-01 Thread liuhongt via Gcc-patches
For i386, it will enable below opt

from
notl%edi
vpbroadcastd%edi, %xmm0
vpand   %xmm1, %xmm0, %xmm0
to
vpbroadcastd%edi, %xmm0
vpandn   %xmm1, %xmm0, %xmm0

gcc/ChangeLog:

PR target/100711
* simplify-rtx.c (simplify_unary_operation_1):
Canonicalize (vec_duplicate (not A)) to
(not (vec_duplicate A)).
* doc/md.texi (Insn Canonicalizations): Document
canonicalization of vec_duplicate.

gcc/testsuite/ChangeLog:

PR target/100711
* gcc.target/i386/avx2-pr100711.c: New test.
* gcc.target/i386/avx512bw-pr100711.c: New test.
---
 gcc/doc/md.texi   |  5 ++
 gcc/simplify-rtx.c|  6 ++
 gcc/testsuite/gcc.target/i386/avx2-pr100711.c | 73 +++
 .../gcc.target/i386/avx512bw-pr100711.c   | 48 
 4 files changed, 132 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/i386/avx2-pr100711.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512bw-pr100711.c

diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index 0e65b3ae663..06b42901413 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -8297,6 +8297,11 @@ operand of @code{mult} is also a shift, then that is 
extended also.
 This transformation is only applied when it can be proven that the
 original operation had sufficient precision to prevent overflow.
 
+@cindex @code{vec_duplicate}, canonicalization of
+@item
+@code{(vec_duplicate (not @var{a}))} is converted to
+@code{(not (vec_duplicate @var{a}))}.
+
 @end itemize
 
 Further canonicalization rules are defined in the function
diff --git a/gcc/simplify-rtx.c b/gcc/simplify-rtx.c
index 04423bbd195..171fc447d50 100644
--- a/gcc/simplify-rtx.c
+++ b/gcc/simplify-rtx.c
@@ -1708,6 +1708,12 @@ simplify_context::simplify_unary_operation_1 (rtx_code 
code, machine_mode mode,
 #endif
   break;
 
+  /* Canonicalize (vec_duplicate (not A)) to (not (vec_duplicate A)).  */
+case VEC_DUPLICATE:
+  if (GET_CODE (op) == NOT)
+   return gen_rtx_NOT (mode, gen_rtx_VEC_DUPLICATE (mode, XEXP (op, 0)));
+  break;
+
 default:
   break;
 }
diff --git a/gcc/testsuite/gcc.target/i386/avx2-pr100711.c 
b/gcc/testsuite/gcc.target/i386/avx2-pr100711.c
new file mode 100644
index 000..5b144623873
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx2-pr100711.c
@@ -0,0 +1,73 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -O2" } */
+/* { dg-final { scan-assembler-times "pandn" 8 } } */
+/* { dg-final { scan-assembler-not "not\[bwlq\]" } } */
+typedef char v16qi __attribute__((vector_size(16)));
+typedef char v32qi __attribute__((vector_size(32)));
+typedef short v8hi __attribute__((vector_size(16)));
+typedef short v16hi __attribute__((vector_size(32)));
+typedef int v4si __attribute__((vector_size(16)));
+typedef int v8si __attribute__((vector_size(32)));
+typedef long long v2di __attribute__((vector_size(16)));
+typedef long long v4di __attribute__((vector_size(32)));
+
+v16qi
+f1 (char a, v16qi c)
+{
+  char b = ~a;
+  return (__extension__(v16qi) {b, b, b, b, b, b, b, b,
+b, b, b, b, b, b, b, b}) & c;
+}
+
+v32qi
+f2 (char a, v32qi c)
+{
+  char b = ~a;
+  return (__extension__(v32qi) {b, b, b, b, b, b, b, b,
+b, b, b, b, b, b, b, b,
+b, b, b, b, b, b, b, b,
+b, b, b, b, b, b, b, b}) & c;
+}
+
+v8hi
+f3 (short a, v8hi c)
+{
+  short b = ~a;
+  return (__extension__(v8hi) {b, b, b, b, b, b, b, b}) & c;
+}
+
+v16hi
+f4 (short a, v16hi c)
+{
+  short b = ~a;
+  return (__extension__(v16hi) {b, b, b, b, b, b, b, b,
+b, b, b, b, b, b, b, b}) & c;
+}
+
+v4si
+f5 (int a, v4si c)
+{
+  int b = ~a;
+  return (__extension__(v4si) {b, b, b, b}) & c;
+}
+
+v8si
+f6 (int a, v8si c)
+{
+  int b = ~a;
+  return (__extension__(v8si) {b, b, b, b, b, b, b, b}) & c;
+}
+
+v2di
+f7 (long long a, v2di c)
+{
+  long long b = ~a;
+  return (__extension__(v2di) {b, b}) & c;
+}
+
+v4di
+f8 (long long a, v4di c)
+{
+  long long b = ~a;
+  return (__extension__(v4di) {b, b, b, b}) & c;
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-pr100711.c 
b/gcc/testsuite/gcc.target/i386/avx512bw-pr100711.c
new file mode 100644
index 000..f0a103d0bc2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-pr100711.c
@@ -0,0 +1,48 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -O2" } */
+/* { dg-final { scan-assembler-times "pandn" 4 } } */
+/* { dg-final { scan-assembler-not "not\[bwlq\]" } } */
+
+typedef char v64qi __attribute__((vector_size(64)));
+typedef short v32hi __attribute__((vector_size(64)));
+typedef int v16si __attribute__((vector_size(64)));
+typedef long long v8di __attribute__((vector_size(64)));
+
+v64qi
+f1 (char a, v64qi c)
+{
+  char b = ~a;
+  return (__extension__(v64qi) {b, b, b, b, b, b, b, b,
+  

[no subject]

2021-06-01 Thread liuhongt via Gcc-patches
This is the updated patch.




Re: [PATCH] IBM Z: Remove match_scratch workaround

2021-06-01 Thread Jeff Law via Gcc-patches




On 6/1/2021 8:21 PM, Ilya Leoshkevich via Gcc-patches wrote:

Bootstrapped and regtested on s390x-redhat-linux.  Ok for master?



Since commit dd1ef00c45ba ("Fix bug in the define_subst handling that
made match_scratch unusable for multi-alternative patterns.") the
workaround for that bug in *ashrdi3_31 is not only no
longer necessary, but actually breaks the build.

Get rid of it by using only one alternative in (match_scratch).  It
will be replicated as many times as needed in order to match the
pattern with which (define_subst) is used.

gcc/ChangeLog:

* config/s390/s390.md(*ashrdi3_31): Use a single
constraint.
* config/s390/subst.md(cconly_subst): Use a single constraint
in (match_scratch).

gcc/testsuite/ChangeLog:

* gcc.target/s390/ashr.c: New test.

Presumably this fixes:

../../../gcc/gcc/config/s390/s390.md:9335:1: alternative number mismatch: 
operand 0 has 4, operand 1 has 2
../../../gcc/gcc/config/s390/s390.md:9335:1: alternative number mismatch: 
operand 0 has 4, operand 2 has 2
../../../gcc/gcc/config/s390/s390.md:9335:1: wrong number of alternatives in 
the output template
../../../gcc/gcc/config/s390/s390.md:9349:1: alternative number mismatch: 
operand 0 has 4, operand 1 has 2
../../../gcc/gcc/config/s390/s390.md:9349:1: alternative number mismatch: 
operand 0 has 4, operand 2 has 2
../../../gcc/gcc/config/s390/s390.md:9349:1: wrong number of alternatives in 
the output template
../../../gcc/gcc/config/s390/s390.md:9349:1: alternative number mismatch: 
operand 0 has 4, operand 1 has 2
../../../gcc/gcc/config/s390/s390.md:9349:1: alternative number mismatch: 
operand 0 has 4, operand 2 has 2
../../../gcc/gcc/config/s390/s390.md:9349:1: wrong number of alternatives in 
the output template


The tester has been tripping over that for about a week.

I'll let the s390 maintainers chime in on the correctness of the change.

Jeff


Re: [PATCH 10/11] sh: Update unexpected empty split condition

2021-06-01 Thread Oleg Endo
On Wed, 2021-06-02 at 00:05 -0500, Kewen Lin wrote:
> gcc/ChangeLog:
> 
>   * config/sh/sh.md (doloop_end_split): Fix empty split condition.
> ---
>  gcc/config/sh/sh.md | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/gcc/config/sh/sh.md b/gcc/config/sh/sh.md
> index e3af9ae21c1..93ee7c9a7de 100644
> --- a/gcc/config/sh/sh.md
> +++ b/gcc/config/sh/sh.md
> @@ -6424,7 +6424,7 @@ (define_insn_and_split "doloop_end_split"
> (clobber (reg:SI T_REG))]
>"TARGET_SH2"
>"#"
> -  ""
> +  "&& 1"
>[(parallel [(set (reg:SI T_REG)
>  (eq:SI (match_dup 2) (const_int 1)))
> (set (match_dup 0) (plus:SI (match_dup 2) (const_int -1)))])

This is OK (obvious).

Cheers,
Oleg



[PATCH 04/11] cris: Update unexpected empty split condition

2021-06-01 Thread Kewen Lin via Gcc-patches
gcc/ChangeLog:

* config/cris/cris.md (*addi_reload): Fix empty split condition.
---
 gcc/config/cris/cris.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/config/cris/cris.md b/gcc/config/cris/cris.md
index 7de0ec63fcf..d5a3c703a83 100644
--- a/gcc/config/cris/cris.md
+++ b/gcc/config/cris/cris.md
@@ -1311,7 +1311,7 @@ (define_insn_and_split "*addi_reload"
&& (INTVAL (operands[3]) == 2 || INTVAL (operands[3]) == 4)
&& (reload_in_progress || reload_completed)"
   "#"
-  ""
+  "&& 1"
   [(set (match_dup 0)
(plus:SI (ashift:SI (match_dup 2) (match_dup 3)) (match_dup 1)))]
   "operands[3] = operands[3] == const2_rtx ? const1_rtx : const2_rtx;")
-- 
2.17.1



[PATCH 11/11] sparc: Update unexpected empty split condition

2021-06-01 Thread Kewen Lin via Gcc-patches
gcc/ChangeLog:

* config/sparc/sparc.md (*snedi_zero_vis3,
*neg_snedi_zero_subxc, *plus_snedi_zero,
*plus_plus_snedi_zero, *minus_snedi_zero,
*minus_minus_snedi_zero): Fix empty split condition.
---
 gcc/config/sparc/sparc.md | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/gcc/config/sparc/sparc.md b/gcc/config/sparc/sparc.md
index c5d369626cc..0f85cb192c8 100644
--- a/gcc/config/sparc/sparc.md
+++ b/gcc/config/sparc/sparc.md
@@ -855,7 +855,7 @@ (define_insn_and_split "*snedi_zero_vis3"
(clobber (reg:CCX CC_REG))]
   "TARGET_ARCH64 && TARGET_VIS3"
   "#"
-  ""
+  "&& 1"
   [(set (reg:CCXC CC_REG) (compare:CCXC (not:DI (match_dup 1)) (const_int -1)))
(set (match_dup 0) (ltu:W (reg:CCXC CC_REG) (const_int 0)))]
   ""
@@ -882,7 +882,7 @@ (define_insn_and_split "*neg_snedi_zero_subxc"
(clobber (reg:CCX CC_REG))]
   "TARGET_ARCH64 && TARGET_SUBXC"
   "#"
-  ""
+  "&& 1"
   [(set (reg:CCXC CC_REG) (compare:CCXC (not:DI (match_dup 1)) (const_int -1)))
(set (match_dup 0) (neg:W (ltu:W (reg:CCXC CC_REG) (const_int 0]
   ""
@@ -984,7 +984,7 @@ (define_insn_and_split "*plus_snedi_zero"
(clobber (reg:CCX CC_REG))]
   "TARGET_ARCH64 && TARGET_VIS3"
   "#"
-  ""
+  "&& 1"
   [(set (reg:CCXC CC_REG) (compare:CCXC (not:DI (match_dup 1)) (const_int -1)))
(set (match_dup 0) (plus:W (ltu:W (reg:CCXC CC_REG) (const_int 0))
  (match_dup 2)))]
@@ -1000,7 +1000,7 @@ (define_insn_and_split "*plus_plus_snedi_zero"
(clobber (reg:CCX CC_REG))]
   "TARGET_ARCH64 && TARGET_VIS3"
   "#"
-  ""
+  "&& 1"
   [(set (reg:CCXC CC_REG) (compare:CCXC (not:DI (match_dup 1)) (const_int -1)))
(set (match_dup 0) (plus:W (plus:W (ltu:W (reg:CCXC CC_REG) (const_int 0))
  (match_dup 2))
@@ -1048,7 +1048,7 @@ (define_insn_and_split "*minus_snedi_zero"
(clobber (reg:CCX CC_REG))]
   "TARGET_ARCH64 && TARGET_SUBXC"
   "#"
-  ""
+  "&& 1"
   [(set (reg:CCXC CC_REG) (compare:CCXC (not:DI (match_dup 1)) (const_int -1)))
(set (match_dup 0) (minus:W (match_dup 2)
   (ltu:W (reg:CCXC CC_REG) (const_int 0]
@@ -1064,7 +1064,7 @@ (define_insn_and_split "*minus_minus_snedi_zero"
(clobber (reg:CCX CC_REG))]
   "TARGET_ARCH64 && TARGET_SUBXC"
   "#"
-  ""
+  "&& 1"
   [(set (reg:CCXC CC_REG) (compare:CCXC (not:DI (match_dup 1)) (const_int -1)))
(set (match_dup 0) (minus:W (minus:W (match_dup 2)
(ltu:W (reg:CCXC CC_REG) (const_int 0)))
-- 
2.17.1



[PATCH 10/11] sh: Update unexpected empty split condition

2021-06-01 Thread Kewen Lin via Gcc-patches
gcc/ChangeLog:

* config/sh/sh.md (doloop_end_split): Fix empty split condition.
---
 gcc/config/sh/sh.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/config/sh/sh.md b/gcc/config/sh/sh.md
index e3af9ae21c1..93ee7c9a7de 100644
--- a/gcc/config/sh/sh.md
+++ b/gcc/config/sh/sh.md
@@ -6424,7 +6424,7 @@ (define_insn_and_split "doloop_end_split"
(clobber (reg:SI T_REG))]
   "TARGET_SH2"
   "#"
-  ""
+  "&& 1"
   [(parallel [(set (reg:SI T_REG)
   (eq:SI (match_dup 2) (const_int 1)))
  (set (match_dup 0) (plus:SI (match_dup 2) (const_int -1)))])
-- 
2.17.1



[PATCH 09/11] or1k: Update unexpected empty split condition

2021-06-01 Thread Kewen Lin via Gcc-patches
gcc/ChangeLog:

* config/or1k/or1k.md (*movdi): Fix empty split condition.
---
 gcc/config/or1k/or1k.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/config/or1k/or1k.md b/gcc/config/or1k/or1k.md
index eb94efba0e4..495b3e277ba 100644
--- a/gcc/config/or1k/or1k.md
+++ b/gcc/config/or1k/or1k.md
@@ -351,7 +351,7 @@ (define_insn_and_split "*movdi"
   "register_operand (operands[0], DImode)
|| reg_or_0_operand (operands[1], DImode)"
   "#"
-  ""
+  "&& 1"
   [(const_int 0)]
 {
   rtx l0 = operand_subword (operands[0], 0, 0, DImode);
-- 
2.17.1



[PATCH 08/11] mips: Update unexpected empty split condition

2021-06-01 Thread Kewen Lin via Gcc-patches
gcc/ChangeLog:

* config/mips/mips.md (, bswapsi2, bswapdi2): Fix empty
split condition.
---
 gcc/config/mips/mips.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/gcc/config/mips/mips.md b/gcc/config/mips/mips.md
index eef3cfd50a8..455b9b802f6 100644
--- a/gcc/config/mips/mips.md
+++ b/gcc/config/mips/mips.md
@@ -5835,7 +5835,7 @@ (define_insn_and_split ""
 (match_operand:SI 2 "immediate_operand" "I")))]
   "TARGET_MIPS16"
   "#"
-  ""
+  "&& 1"
   [(set (match_dup 0) (match_dup 1))
(set (match_dup 0) (lshiftrt:SI (match_dup 0) (match_dup 2)))]
   ""
@@ -5871,7 +5871,7 @@ (define_insn_and_split "bswapsi2"
(bswap:SI (match_operand:SI 1 "register_operand" "d")))]
   "ISA_HAS_WSBH && ISA_HAS_ROR"
   "#"
-  ""
+  "&& 1"
   [(set (match_dup 0) (unspec:SI [(match_dup 1)] UNSPEC_WSBH))
(set (match_dup 0) (rotatert:SI (match_dup 0) (const_int 16)))]
   ""
@@ -5882,7 +5882,7 @@ (define_insn_and_split "bswapdi2"
(bswap:DI (match_operand:DI 1 "register_operand" "d")))]
   "TARGET_64BIT && ISA_HAS_WSBH"
   "#"
-  ""
+  "&& 1"
   [(set (match_dup 0) (unspec:DI [(match_dup 1)] UNSPEC_DSBH))
(set (match_dup 0) (unspec:DI [(match_dup 0)] UNSPEC_DSHD))]
   ""
-- 
2.17.1



[PATCH 05/11] h8300: Update unexpected empty split condition

2021-06-01 Thread Kewen Lin via Gcc-patches
gcc/ChangeLog:

* config/h8300/combiner.md (*andsi3_lshiftrt_n_sb): Fix empty split
condition.
---
 gcc/config/h8300/combiner.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/config/h8300/combiner.md b/gcc/config/h8300/combiner.md
index 20e19da0419..e31bd507a6f 100644
--- a/gcc/config/h8300/combiner.md
+++ b/gcc/config/h8300/combiner.md
@@ -271,7 +271,7 @@ (define_insn_and_split "*andsi3_lshiftrt_n_sb"
   "exact_log2 (INTVAL (operands[3])) < 16
&& INTVAL (operands[2]) + exact_log2 (INTVAL (operands[3])) == 31"
   "#"
-  ""
+  "&& 1"
   [(parallel [(set (match_dup 0)
   (and:SI (lshiftrt:SI (match_dup 1) (match_dup 2))
   (match_dup 3)))
-- 
2.17.1



[PATCH 03/11] arm: Update unexpected empty split condition

2021-06-01 Thread Kewen Lin via Gcc-patches
gcc/ChangeLog:

* config/arm/vfp.md (no_literal_pool_df_immediate,
no_literal_pool_sf_immediate): Fix empty split condition.
---
 gcc/config/arm/vfp.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/config/arm/vfp.md b/gcc/config/arm/vfp.md
index f97af92716b..55b6c1ac585 100644
--- a/gcc/config/arm/vfp.md
+++ b/gcc/config/arm/vfp.md
@@ -2129,7 +2129,7 @@ (define_insn_and_split "no_literal_pool_df_immediate"
&& !arm_const_double_rtx (operands[1])
&& !(TARGET_VFP_DOUBLE && vfp3_const_double_rtx (operands[1]))"
   "#"
-  ""
+  "&& 1"
   [(const_int 0)]
 {
   long buf[2];
@@ -2154,7 +2154,7 @@ (define_insn_and_split "no_literal_pool_sf_immediate"
&& TARGET_VFP_BASE
&& !vfp3_const_double_rtx (operands[1])"
   "#"
-  ""
+  "&& 1"
   [(const_int 0)]
 {
   long buf;
-- 
2.17.1



[PATCH 06/11] i386: Update unexpected empty split condition

2021-06-01 Thread Kewen Lin via Gcc-patches
gcc/ChangeLog:

* config/i386/i386.md (*load_tp_x32_zext, *add_tp_x32_zext,
*tls_dynamic_gnu2_combine_32): Fix empty split condition.
* config/i386/sse.md (*_pmovmskb_lt,
*_pmovmskb_zext_lt, *sse2_pmovmskb_ext_lt,
*_pblendvb_lt): Likewise.
---
 gcc/config/i386/i386.md | 6 +++---
 gcc/config/i386/sse.md  | 8 
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 9ff35d9a607..545d048906d 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -15712,7 +15712,7 @@ (define_insn_and_split "*load_tp_x32_zext"
  (unspec:SI [(const_int 0)] UNSPEC_TP)))]
   "TARGET_X32"
   "#"
-  ""
+  "&& 1"
   [(set (match_dup 0)
(zero_extend:DI (match_dup 1)))]
 {
@@ -15750,7 +15750,7 @@ (define_insn_and_split "*add_tp_x32_zext"
(clobber (reg:CC FLAGS_REG))]
   "TARGET_X32"
   "#"
-  ""
+  "&& 1"
   [(parallel
  [(set (match_dup 0)
   (zero_extend:DI
@@ -15841,7 +15841,7 @@ (define_insn_and_split "*tls_dynamic_gnu2_combine_32"
(clobber (reg:CC FLAGS_REG))]
   "!TARGET_64BIT && TARGET_GNU2_TLS"
   "#"
-  ""
+  "&& 1"
   [(set (match_dup 0) (match_dup 5))]
 {
   operands[5] = can_create_pseudo_p () ? gen_reg_rtx (Pmode) : operands[0];
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 9d3728d1cb0..a9d78030119 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -16467,7 +16467,7 @@ (define_insn_and_split "*_pmovmskb_lt"
  UNSPEC_MOVMSK))]
   "TARGET_SSE2"
   "#"
-  ""
+  "&& 1"
   [(set (match_dup 0)
(unspec:SI [(match_dup 1)] UNSPEC_MOVMSK))]
   ""
@@ -16489,7 +16489,7 @@ (define_insn_and_split "*_pmovmskb_zext_lt"
UNSPEC_MOVMSK)))]
   "TARGET_64BIT && TARGET_SSE2"
   "#"
-  ""
+  "&& 1"
   [(set (match_dup 0)
(zero_extend:DI (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK)))]
   ""
@@ -16511,7 +16511,7 @@ (define_insn_and_split "*sse2_pmovmskb_ext_lt"
UNSPEC_MOVMSK)))]
   "TARGET_64BIT && TARGET_SSE2"
   "#"
-  ""
+  "&& 1"
   [(set (match_dup 0)
(sign_extend:DI (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK)))]
   ""
@@ -17769,7 +17769,7 @@ (define_insn_and_split "*_pblendvb_lt"
  UNSPEC_BLENDV))]
   "TARGET_SSE4_1"
   "#"
-  ""
+  "&& 1"
   [(set (match_dup 0)
(unspec:VI1_AVX2
 [(match_dup 1) (match_dup 2) (match_dup 3)] UNSPEC_BLENDV))]
-- 
2.17.1



[PATCH 07/11] m68k: Update unexpected empty split condition

2021-06-01 Thread Kewen Lin via Gcc-patches
gcc/ChangeLog:

* config/m68k/m68k.md (*zero_extend_inc, *zero_extend_dec,
*zero_extendsidi2): Fix empty split condition.
---
 gcc/config/m68k/m68k.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/gcc/config/m68k/m68k.md b/gcc/config/m68k/m68k.md
index 59a456cd496..82d075e8bf0 100644
--- a/gcc/config/m68k/m68k.md
+++ b/gcc/config/m68k/m68k.md
@@ -1693,7 +1693,7 @@ (define_insn_and_split "*zero_extend_inc"
GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT &&
GET_MODE_SIZE (GET_MODE (operands[0])) == GET_MODE_SIZE (GET_MODE 
(operands[1])) * 2"
   "#"
-  ""
+  "&& 1"
   [(set (match_dup 0)
(const_int 0))
(set (match_dup 0)
@@ -1710,7 +1710,7 @@ (define_insn_and_split "*zero_extend_dec"
GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT &&
GET_MODE_SIZE (GET_MODE (operands[0])) == GET_MODE_SIZE (GET_MODE 
(operands[1])) * 2"
   "#"
-  ""
+  "&& 1"
   [(set (match_dup 0)
(match_dup 1))
(set (match_dup 0)
@@ -1764,7 +1764,7 @@ (define_insn_and_split "*zero_extendsidi2"
(zero_extend:DI (match_operand:SI 1 "nonimmediate_src_operand" "")))]
   "GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM"
   "#"
-  ""
+  "&& 1"
   [(set (match_dup 2)
(match_dup 1))
(set (match_dup 3)
-- 
2.17.1



[PATCH 02/11] arc: Update unexpected empty split condition

2021-06-01 Thread Kewen Lin via Gcc-patches
gcc/ChangeLog:

* config/arc/arc.md (*bbit_di): Fix empty split condition.
---
 gcc/config/arc/arc.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/config/arc/arc.md b/gcc/config/arc/arc.md
index 7a52551eef5..a03840c4c36 100644
--- a/gcc/config/arc/arc.md
+++ b/gcc/config/arc/arc.md
@@ -5020,7 +5020,7 @@ (define_insn_and_split "*bbit_di"
(clobber (reg:CC_ZN CC_REG))]
   "!CROSSING_JUMP_P (insn)"
   "#"
-  ""
+  "&& 1"
   [(parallel
  [(set (pc) (if_then_else (match_dup 3) (label_ref (match_dup 0)) (pc)))
   (clobber (reg:CC_ZN CC_REG))])]
-- 
2.17.1



[PATCH 01/11] gen: Emit error msg for empty split condition

2021-06-01 Thread Kewen Lin via Gcc-patches
As Segher suggested, this patch is to emit the error message
if the split condition of define_insn_and_split is empty while
the insn condition isn't.

gcc/ChangeLog:

* gensupport.c (process_rtx): Emit error message for empty
split condition in define_insn_and_split while the insn
condition isn't.
---
 gcc/gensupport.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/gcc/gensupport.c b/gcc/gensupport.c
index 0f19bd70664..52cee120215 100644
--- a/gcc/gensupport.c
+++ b/gcc/gensupport.c
@@ -620,6 +620,9 @@ process_rtx (rtx desc, file_location loc)
  }
else if (GET_CODE (desc) == DEFINE_INSN_AND_REWRITE)
  error_at (loc, "the rewrite condition must start with `&&'");
+   else if (split_cond[0] == '\0' && strlen (XSTR (desc, 2)) != 0)
+ error_at (loc, "the split condition mustn't be empty if the "
+"insn condition isn't empty");
XSTR (split, 1) = split_cond;
if (GET_CODE (desc) == DEFINE_INSN_AND_REWRITE)
  XVEC (split, 2) = gen_rewrite_sequence (XVEC (desc, 1));
-- 
2.17.1



[RFC/PATCH 00/11] Fix up some unexpected empty split conditions

2021-06-01 Thread Kewen Lin via Gcc-patches
Hi all,

define_insn_and_split should avoid to use empty split condition
if the condition for define_insn isn't empty, otherwise it can
sometimes result in unexpected consequence, since the split
will always be done even if the insn condition doesn't hold.

To avoid forgetting to add "&& 1" onto split condition, as
Segher suggested in thread[1], this series is to add the check
and raise an error if it catches the unexpected cases.  With
this new check, we have to fix up some existing
define_insn_and_split which are detected as error.  I hope all
these places are not intentional to be kept as blank.

Any comments are highly appreciated.

BR,
Kewen

[1] https://gcc.gnu.org/pipermail/gcc-patches/2021-March/566970.html

Kewen Lin (11):
  gen: Emit error msg for empty split condition
  arc: Update unexpected empty split condition
  arm: Update unexpected empty split condition
  cris: Update unexpected empty split condition
  h8300: Update unexpected empty split condition
  i386: Update unexpected empty split condition
  m68k: Update unexpected empty split condition
  mips: Update unexpected empty split condition
  or1k: Update unexpected empty split condition
  sh: Update unexpected empty split condition
  sparc: Update unexpected empty split condition

 gcc/config/arc/arc.md|  2 +-
 gcc/config/arm/vfp.md|  4 ++--
 gcc/config/cris/cris.md  |  2 +-
 gcc/config/h8300/combiner.md |  2 +-
 gcc/config/i386/i386.md  |  6 +++---
 gcc/config/i386/sse.md   |  8 
 gcc/config/m68k/m68k.md  |  6 +++---
 gcc/config/mips/mips.md  |  6 +++---
 gcc/config/or1k/or1k.md  |  2 +-
 gcc/config/sh/sh.md  |  2 +-
 gcc/config/sparc/sparc.md| 12 ++--
 gcc/gensupport.c |  3 +++
 12 files changed, 29 insertions(+), 26 deletions(-)

-- 
2.17.1



[committed] Fix minor H8 bug and prepare for more redundant test/compare elimination

2021-06-01 Thread Jeff Law
These are some minor changes to the H8 port to fix a latent bug and 
prepare for having logical operations participate in redundant 
test/compare elimination that I've been sitting on for a while.


These have been through my tester with testing timeouts dramatically 
increased to give as much coverage as possible (runs are taking nearly 
18 hours, even after some hacks to dramatically improve the long-running 
__builtin__overflow tests).


Conceptually the idea here is to move away from using match_operator and 
instead towards an iterator for the logical ops.  That allows me to use 
the existing define_subst to enable redundant test/compare elimination 
in a later patch.  Along the way this also starts to consolidate the 
QImode logicals with the HI/SI mode logicals and consolidate AND 
handling with IOR/XOR.


There's one bugfix buried in here.  In particular the 
define_insn_and_split patterns were using "reload_completed" in their 
split condition.  It should have been "&& reload_completed".  I strongly 
suspect there's other cases of this bug lurking and fixing them is on my 
TODO list.




Installing on the trunk,
Jeff

commit 40e484885c10a0c8bd994c5cf7bf247998a4ad6b
Author: Jeff Law 
Date:   Wed Jun 2 00:56:38 2021 -0400

Fix minor bugs in H8 port logical ops.  Prepare for more compare/test 
removal

gcc/
* config/h8300/h8300-protos. (compute_a_shift_length): Drop unused
argument from prototype.
(output_logical_op): Add rtx_code argument.
(compute_logical_op_length): Likewise.
* config/h8300/h8300.c (h8300_and_costs): Pass additional argument
to compute_a_shift_length.
(output_logical_op); New argument with the rtx code rather than
extracting it from an operand.  Handle QImode too.
(compute_logical_op_length): Similary.
(compute_a_shift_length): Drop unused argument.
* config/h8300/h8300.md (logicals): New code iterator.
* config/h8300/logical.md (3 expander): Combine
the "and" expander with the "ior"/"xor" expander.
(bclrmsx): Combine the QI/HI mode patterns.
(3 insns): Use code iterator rather than 
match_operator.
Handle QImode as well.   Update call to output_logical_op and
compute_logical_op_length to pass in rtx_code
Fix split condition on all define_insn_and_split patterns.
(one_cmpl2): Use  to support both clobbering
the flags and setting ZN via existing define_subst.
* config/h8300/shiftrotate.md: Drop unused argument from
calls to compute_a_shift_length.

Signed-off-by: Jeff Law 

diff --git a/gcc/config/h8300/h8300-protos.h b/gcc/config/h8300/h8300-protos.h
index 45e7dec3c7d..af653292a9d 100644
--- a/gcc/config/h8300/h8300-protos.h
+++ b/gcc/config/h8300/h8300-protos.h
@@ -29,16 +29,15 @@ extern unsigned int compute_mov_length (rtx *);
 extern const char *output_plussi (rtx *, bool);
 extern unsigned int compute_plussi_length (rtx *, bool);
 extern const char *output_a_shift (rtx *);
-extern unsigned int compute_a_shift_length (rtx, rtx *);
+extern unsigned int compute_a_shift_length (rtx *);
 extern const char *output_a_rotate (enum rtx_code, rtx *);
 extern unsigned int compute_a_rotate_length (rtx *);
 extern const char *output_simode_bld (int, rtx[]);
 extern void final_prescan_insn (rtx_insn *, rtx *, int);
 extern int h8300_expand_movsi (rtx[]);
 extern machine_mode  h8300_select_cc_mode (RTX_CODE, rtx, rtx);
-extern const char *output_logical_op (machine_mode, rtx *);
-extern unsigned int compute_logical_op_length (machine_mode,
-  rtx *);
+extern const char *output_logical_op (machine_mode, rtx_code code, rtx *);
+extern unsigned int compute_logical_op_length (machine_mode, rtx_code, rtx *);
 
 extern int compute_logical_op_cc (machine_mode, rtx *);
 extern int compute_a_shift_cc (rtx, rtx *);
diff --git a/gcc/config/h8300/h8300.c b/gcc/config/h8300/h8300.c
index ba2b9daf487..ef947aa468a 100644
--- a/gcc/config/h8300/h8300.c
+++ b/gcc/config/h8300/h8300.c
@@ -1100,7 +1100,7 @@ h8300_and_costs (rtx x)
   operands[1] = XEXP (x, 0);
   operands[2] = XEXP (x, 1);
   operands[3] = x;
-  return compute_logical_op_length (GET_MODE (x), operands) / 2;
+  return compute_logical_op_length (GET_MODE (x), AND, operands) / 2;
 }
 
 /* Compute the cost of a shift insn.  */
@@ -1119,7 +1119,7 @@ h8300_shift_costs (rtx x)
   operands[1] = NULL;
   operands[2] = XEXP (x, 1);
   operands[3] = x;
-  return compute_a_shift_length (NULL, operands) / 2;
+  return compute_a_shift_length (operands) / 2;
 }
 
 /* Worker function for TARGET_RTX_COSTS.  */
@@ -2879,10 +2879,8 @@ compute_plussi_cc (rtx *operands)
 /* Output a logical insn.  */
 
 const char *
-output_logical_op (machine_mode mode, rtx *operands)
+output_logical_op (machine_mode mode, rtx_code code, rtx *operands)
 

[PATCH] IBM Z: Remove match_scratch workaround

2021-06-01 Thread Ilya Leoshkevich via Gcc-patches
Bootstrapped and regtested on s390x-redhat-linux.  Ok for master?



Since commit dd1ef00c45ba ("Fix bug in the define_subst handling that
made match_scratch unusable for multi-alternative patterns.") the
workaround for that bug in *ashrdi3_31 is not only no
longer necessary, but actually breaks the build.

Get rid of it by using only one alternative in (match_scratch).  It
will be replicated as many times as needed in order to match the
pattern with which (define_subst) is used.

gcc/ChangeLog:

* config/s390/s390.md(*ashrdi3_31): Use a single
constraint.
* config/s390/subst.md(cconly_subst): Use a single constraint
in (match_scratch).

gcc/testsuite/ChangeLog:

* gcc.target/s390/ashr.c: New test.
---
 gcc/config/s390/s390.md  | 14 --
 gcc/config/s390/subst.md |  2 +-
 gcc/testsuite/gcc.target/s390/ashr.c | 11 +++
 3 files changed, 16 insertions(+), 11 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/s390/ashr.c

diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md
index 7faf775fbf2..0c5b4dc9029 100644
--- a/gcc/config/s390/s390.md
+++ b/gcc/config/s390/s390.md
@@ -9328,19 +9328,13 @@
   ""
   "")
 
-; FIXME: The number of alternatives is doubled here to match the fix
-; number of 2 in the subst pattern for the (clobber (match_scratch...
-; The right fix should be to support match_scratch in the output
-; pattern of a define_subst.
 (define_insn "*ashrdi3_31"
-  [(set (match_operand:DI 0 "register_operand"   "=d, d")
-(ashiftrt:DI (match_operand:DI 1 "register_operand"   "0, 0")
- (match_operand:QI 2 "shift_count_operand" "jsc,jsc")))
+  [(set (match_operand:DI 0 "register_operand"   "=d")
+(ashiftrt:DI (match_operand:DI 1 "register_operand"   "0")
+ (match_operand:QI 2 "shift_count_operand" "jsc")))
(clobber (reg:CC CC_REGNUM))]
   "!TARGET_ZARCH"
-  "@
-   srda\t%0,%Y2
-   srda\t%0,%Y2"
+  "srda\t%0,%Y2"
   [(set_attr "op_type" "RS")
(set_attr "atype"   "reg")])
 
diff --git a/gcc/config/s390/subst.md b/gcc/config/s390/subst.md
index 384af11c198..3ea6fc40ba8 100644
--- a/gcc/config/s390/subst.md
+++ b/gcc/config/s390/subst.md
@@ -45,7 +45,7 @@
   "s390_match_ccmode(insn, CCSmode)"
   [(set (reg CC_REGNUM)
(compare (match_dup 1) (const_int 0)))
-   (clobber (match_scratch:DSI 0 "=d,d"))])
+   (clobber (match_scratch:DSI 0 "=d"))])
 
 (define_subst_attr "cconly" "cconly_subst" "" "_cconly")
 
diff --git a/gcc/testsuite/gcc.target/s390/ashr.c 
b/gcc/testsuite/gcc.target/s390/ashr.c
new file mode 100644
index 000..8cffdfa9a1d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/ashr.c
@@ -0,0 +1,11 @@
+/* Test the arithmetic shift right pattern.  */
+
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+int e(void);
+
+int f (long c, int b)
+{
+  return (c >> b) && e ();
+}
-- 
2.31.1



Re: [PATCH v2] Add vec_const_duplicate optab and TARGET_GEN_MEMSET_SCRATCH_RTX

2021-06-01 Thread H.J. Lu via Gcc-patches
On Tue, Jun 1, 2021 at 6:17 PM Hongtao Liu  wrote:
>
> On Wed, Jun 2, 2021 at 7:07 AM H.J. Lu via Gcc-patches
>  wrote:
> >
> > On Tue, Jun 1, 2021 at 7:21 AM Jeff Law  wrote:
> > >
> > >
> > >
> > > On 6/1/2021 7:29 AM, H.J. Lu via Gcc-patches wrote:
> > > > On Tue, Jun 1, 2021 at 6:25 AM Richard Biener
> > > >  wrote:
> > > >> On Tue, Jun 1, 2021 at 3:05 PM H.J. Lu  wrote:
> > > >>> On Mon, May 31, 2021 at 11:54:53PM -0600, Jeff Law wrote:
> > > 
> > >  On 5/31/2021 11:50 PM, Richard Sandiford wrote:
> > > > "H.J. Lu via Gcc-patches"  writes:
> > > >> On Mon, May 31, 2021 at 06:32:04AM -0700, H.J. Lu wrote:
> > > >>> On Mon, May 31, 2021 at 6:26 AM Richard Biener
> > > >>>  wrote:
> > >  On Mon, May 31, 2021 at 3:12 PM H.J. Lu  
> > >  wrote:
> > > > On Mon, May 31, 2021 at 5:46 AM Richard Biener
> > > >  wrote:
> > > >> On Mon, May 31, 2021 at 2:09 PM H.J. Lu  
> > > >> wrote:
> > > >>> On Wed, May 26, 2021 at 10:28:16AM +0200, Richard Biener 
> > > >>> wrote:
> > > >>>-- Target Hook: rtx TARGET_GEN_MEMSET_VALUE (rtx DATA, 
> > > >>> scalar_int_mode
> > > >>> MODE)
> > > >>>This function returns the RTL of a register 
> > > >>> containing
> > > >>>'GET_MODE_SIZE (MODE)' consecutive copies of the 
> > > >>> unsigned char
> > > >>>value given in the RTL register DATA.  For 
> > > >>> example, if MODE is 4
> > > >>>bytes wide, return the RTL for 0x01010101*DATA.
> > > >> For this one I wonder if it should be an optab instead.  
> > > >> Couldn't you
> > > >> use the existing vec_duplicate for this by using 
> > > >> (paradoxical) subregs
> > > >> like (subreg:TI (vec_duplicate:VnQI (subreg:VnQI (reg:QI 
> > > >> ...)))?
> > > > I tried.   It doesn't even work on x86.  See:
> > > >
> > > > https://gcc.gnu.org/pipermail/gcc-patches/2021-May/570661.html
> > >  Not sure what I should read from there...
> > > 
> > > > There are special cases to subreg HI, SI and DI modes of TI 
> > > > mode in
> > > > ix86_gen_memset_value_from_prev.   simplify_gen_subreg 
> > > > doesn't
> > > > work here.   Each backend may need its own special handling.
> > >  OK, I guess I'm not (RTL) qualified enough to further review 
> > >  these parts,
> > >  sorry.  Since we're doing code generation the canonical way 
> > >  to communicate
> > >  with backends should be optabs, not some set of disconnected 
> > >  target hooks.
> > >  But as said, I probably don't know enough of RTL to see why 
> > >  it's the only way.
> > > 
> > >  Richard.
> > > >>> Here is the patch to add optabs instead.  Does it look OK?
> > > >>>
> > > >>> Thanks.
> > > >>>
> > > >>> H.J.
> > > >>> ---
> > > >>> Add 2 optabs:
> > > >>>
> > > >>> 1. integer_extract: Extract lower bit value from the integer 
> > > >>> value in
> > > >>> TImode, OImode or XImode.
> > > >> That sounds very specific, esp. the restriction to 
> > > >> {TI,OI,XI}mode.
> > > >> It also sounds like it matches (subreg:{TI,OI,XI} (...) 0).  
> > > >> There are
> > > >> existing target hooks verifying subreg validity - why's that 
> > > >> not a good
> > > >> fit here?  ISTR you say gen_lowpart () doesn't work (or was it
> > > >> simplify_gen_subreg?), why's that so?
> > > > {TI,OI,XI}mode are storage only integer types.   subreg doesn't 
> > > > work
> > > > well on them.  I got
> > > >
> > > > [hjl@gnu-cfl-2 pieces]$ cat s2.i
> > > > extern void *ops;
> > > >
> > > > void
> > > > foo (int c)
> > > > {
> > > > __builtin_memset (ops, c, 34);
> > > > }
> > > > [hjl@gnu-cfl-2 pieces]$ make s2.s
> > > > /export/build/gnu/tools-build/gcc-gitlab-debug/build-x86_64-linux/gcc/xgcc
> > > > -B/export/build/gnu/tools-build/gcc-gitlab-debug/build-x86_64-linux/gcc/
> > > > -O2 -march=haswell -S s2.i
> > > > during RTL pass: reload
> > > > s2.i: In function ‘foo’:
> > > > s2.i:7:1: internal compiler error: maximum number of generated 
> > > > reload
> > > > insns per insn achieved (90)
> > > >   7 | }
> > > > | ^
> > > > 0x1050734 lra_constraints(bool)
> > > > /export/gnu/import/git/gitlab/x86-gcc/gcc/lra-constraints.c:5091
> > > > 0x1039536 lra(_IO_FILE*)
> > > > 

Re: [PATCH v2] Add vec_const_duplicate optab and TARGET_GEN_MEMSET_SCRATCH_RTX

2021-06-01 Thread Hongtao Liu via Gcc-patches
On Wed, Jun 2, 2021 at 7:07 AM H.J. Lu via Gcc-patches
 wrote:
>
> On Tue, Jun 1, 2021 at 7:21 AM Jeff Law  wrote:
> >
> >
> >
> > On 6/1/2021 7:29 AM, H.J. Lu via Gcc-patches wrote:
> > > On Tue, Jun 1, 2021 at 6:25 AM Richard Biener
> > >  wrote:
> > >> On Tue, Jun 1, 2021 at 3:05 PM H.J. Lu  wrote:
> > >>> On Mon, May 31, 2021 at 11:54:53PM -0600, Jeff Law wrote:
> > 
> >  On 5/31/2021 11:50 PM, Richard Sandiford wrote:
> > > "H.J. Lu via Gcc-patches"  writes:
> > >> On Mon, May 31, 2021 at 06:32:04AM -0700, H.J. Lu wrote:
> > >>> On Mon, May 31, 2021 at 6:26 AM Richard Biener
> > >>>  wrote:
> >  On Mon, May 31, 2021 at 3:12 PM H.J. Lu  
> >  wrote:
> > > On Mon, May 31, 2021 at 5:46 AM Richard Biener
> > >  wrote:
> > >> On Mon, May 31, 2021 at 2:09 PM H.J. Lu  
> > >> wrote:
> > >>> On Wed, May 26, 2021 at 10:28:16AM +0200, Richard Biener wrote:
> > >>>-- Target Hook: rtx TARGET_GEN_MEMSET_VALUE (rtx DATA, 
> > >>> scalar_int_mode
> > >>> MODE)
> > >>>This function returns the RTL of a register 
> > >>> containing
> > >>>'GET_MODE_SIZE (MODE)' consecutive copies of the 
> > >>> unsigned char
> > >>>value given in the RTL register DATA.  For example, 
> > >>> if MODE is 4
> > >>>bytes wide, return the RTL for 0x01010101*DATA.
> > >> For this one I wonder if it should be an optab instead.  
> > >> Couldn't you
> > >> use the existing vec_duplicate for this by using 
> > >> (paradoxical) subregs
> > >> like (subreg:TI (vec_duplicate:VnQI (subreg:VnQI (reg:QI 
> > >> ...)))?
> > > I tried.   It doesn't even work on x86.  See:
> > >
> > > https://gcc.gnu.org/pipermail/gcc-patches/2021-May/570661.html
> >  Not sure what I should read from there...
> > 
> > > There are special cases to subreg HI, SI and DI modes of TI 
> > > mode in
> > > ix86_gen_memset_value_from_prev.   simplify_gen_subreg doesn't
> > > work here.   Each backend may need its own special handling.
> >  OK, I guess I'm not (RTL) qualified enough to further review 
> >  these parts,
> >  sorry.  Since we're doing code generation the canonical way to 
> >  communicate
> >  with backends should be optabs, not some set of disconnected 
> >  target hooks.
> >  But as said, I probably don't know enough of RTL to see why 
> >  it's the only way.
> > 
> >  Richard.
> > >>> Here is the patch to add optabs instead.  Does it look OK?
> > >>>
> > >>> Thanks.
> > >>>
> > >>> H.J.
> > >>> ---
> > >>> Add 2 optabs:
> > >>>
> > >>> 1. integer_extract: Extract lower bit value from the integer 
> > >>> value in
> > >>> TImode, OImode or XImode.
> > >> That sounds very specific, esp. the restriction to 
> > >> {TI,OI,XI}mode.
> > >> It also sounds like it matches (subreg:{TI,OI,XI} (...) 0).  
> > >> There are
> > >> existing target hooks verifying subreg validity - why's that not 
> > >> a good
> > >> fit here?  ISTR you say gen_lowpart () doesn't work (or was it
> > >> simplify_gen_subreg?), why's that so?
> > > {TI,OI,XI}mode are storage only integer types.   subreg doesn't 
> > > work
> > > well on them.  I got
> > >
> > > [hjl@gnu-cfl-2 pieces]$ cat s2.i
> > > extern void *ops;
> > >
> > > void
> > > foo (int c)
> > > {
> > > __builtin_memset (ops, c, 34);
> > > }
> > > [hjl@gnu-cfl-2 pieces]$ make s2.s
> > > /export/build/gnu/tools-build/gcc-gitlab-debug/build-x86_64-linux/gcc/xgcc
> > > -B/export/build/gnu/tools-build/gcc-gitlab-debug/build-x86_64-linux/gcc/
> > > -O2 -march=haswell -S s2.i
> > > during RTL pass: reload
> > > s2.i: In function ‘foo’:
> > > s2.i:7:1: internal compiler error: maximum number of generated 
> > > reload
> > > insns per insn achieved (90)
> > >   7 | }
> > > | ^
> > > 0x1050734 lra_constraints(bool)
> > > /export/gnu/import/git/gitlab/x86-gcc/gcc/lra-constraints.c:5091
> > > 0x1039536 lra(_IO_FILE*)
> > > /export/gnu/import/git/gitlab/x86-gcc/gcc/lra.c:2336
> > > 0xfe1140 do_reload
> > > /export/gnu/import/git/gitlab/x86-gcc/gcc/ira.c:5822
> > > 0xfe162e execute
> > > /export/gnu/import/git/gitlab/x86-gcc/gcc/ira.c:6008
> > > Please submit a full bug report,
> > 

Re: rs6000: Require ELFv2 ABI for ROP test (PR100750)

2021-06-01 Thread Segher Boessenkool
On Tue, Jun 01, 2021 at 11:18:05AM -0500, Bill Schmidt wrote:
> Hi!  PR100750 reports a failure on my part to require the ELFv2 ABI for
> one of the ROP tests.  This fixes that.

It would be nice if we had a selector for when we can use -mrop-protect,
instead of assuming it is only for ELFv2.


Segher


Ping #2: [PATCH] Change rs6000_const_f32_to_i32 return type.

2021-06-01 Thread Michael Meissner via Gcc-patches
Ping patch again.

Original patch (Change rs6000_const_f32_to_i32 return type)

| Date: Tue, 18 May 2021 16:39:28 -0400
| Subject: [PATCH] Change rs6000_const_f32_to_i32 return type.
| Message-ID: <20210518203928.ga15...@ibm-toto.the-meissners.org>
| https://gcc.gnu.org/pipermail/gcc-patches/2021-May/570680.html

-- 
Michael Meissner, IBM
IBM, M/S 2506R, 550 King Street, Littleton, MA 01460-6245, USA
email: meiss...@linux.ibm.com, phone: +1 (978) 899-4797


Ping patch #2: [PATCH 2/2] Fix tests when running on power10, PR testsuite/100166

2021-06-01 Thread Michael Meissner via Gcc-patches
Ping patch again.

Original patch (Fix tests when running on power10, PR testsuite/100166).

| Date: Tue, 18 May 2021 16:59:12 -0400
| Subject: [PATCH 2/2] Fix tests when running on power10, PR testsuite/100166
| Message-ID: <20210518205912.gb18...@ibm-toto.the-meissners.org>
| https://gcc.gnu.org/pipermail/gcc-patches/2021-May/570688.html

-- 
Michael Meissner, IBM
IBM, M/S 2506R, 550 King Street, Littleton, MA 01460-6245, USA
email: meiss...@linux.ibm.com, phone: +1 (978) 899-4797


Ping #2: [PATCH 1/2] Deal with prefixed loads/stores in tests, PR testsuite/100166

2021-06-01 Thread Michael Meissner via Gcc-patches
Ping patch again.

Original patch (Deal with prefixed loads/stores in tests, PR testsuite/100166):

| Date: Tue, 18 May 2021 16:57:59 -0400
| Subject: [PATCH 1/2] Deal with prefixed loads/stores in tests, PR 
testsuite/100166
| Message-ID: <20210518205759.ga18...@ibm-toto.the-meissners.org>
| https://gcc.gnu.org/pipermail/gcc-patches/2021-May/570686.html

-- 
Michael Meissner, IBM
IBM, M/S 2506R, 550 King Street, Littleton, MA 01460-6245, USA
email: meiss...@linux.ibm.com, phone: +1 (978) 899-4797


Ping #2: [PATCH] Fix vec-splati-runnable.c test.

2021-06-01 Thread Michael Meissner via Gcc-patches
Ping patch again.

Original patch (Fix vec-splati-runnable.c test)

| Date: Tue, 18 May 2021 16:49:58 -0400
| Subject: [PATCH] Fix vec-splati-runnable.c test.
| Message-ID: <20210518204958.ga17...@ibm-toto.the-meissners.org>
| https://gcc.gnu.org/pipermail/gcc-patches/2021-May/570684.html

Note as Will points out I need to change the check-in message to eliminate
adding abort () since in the current version of the test, the abort has been
changed to storing the value in a global volatile variable.

-- 
Michael Meissner, IBM
IBM, M/S 2506R, 550 King Street, Littleton, MA 01460-6245, USA
email: meiss...@linux.ibm.com, phone: +1 (978) 899-4797


Re: [PATCH 2/2] Fix xxeval predicates.

2021-06-01 Thread Michael Meissner via Gcc-patches
Ping patch again.

Original patch (fix xxevel predicates):

| Date: Tue, 18 May 2021 16:47:58 -0400
| Subject: [PATCH 2/2] Fix xxeval predicates.
| Message-ID: <20210518204758.gb16...@ibm-toto.the-meissners.org>
| https://gcc.gnu.org/pipermail/gcc-patches/2021-May/570683.html

-- 
Michael Meissner, IBM
IBM, M/S 2506R, 550 King Street, Littleton, MA 01460-6245, USA
email: meiss...@linux.ibm.com, phone: +1 (978) 899-4797


Ping #2: [PATCH 0/2] Move xx* builtins to vsx.md.

2021-06-01 Thread Michael Meissner via Gcc-patches
Ping patch again.

Original patch (move xx* builtins to vsx.md)

| Date: Tue, 18 May 2021 16:46:47 -0400
| Subject: [PATCH 1/2] Move xx* builtins to vsx.md.
| Message-ID: <20210518204647.ga16...@ibm-toto.the-meissners.org>
| https://gcc.gnu.org/pipermail/gcc-patches/2021-May/570682.html

-- 
Michael Meissner, IBM
IBM, M/S 2506R, 550 King Street, Littleton, MA 01460-6245, USA
email: meiss...@linux.ibm.com, phone: +1 (978) 899-4797


Ping #2: [PATCH] Allow __ibm128 on older PowerPC systems.

2021-06-01 Thread Michael Meissner via Gcc-patches
Ping patch again:

Original patch (Allow __ibm128 on older PowerPC systems):

| Date: Tue, 18 May 2021 16:36:32 -0400
| Subject: [PATCH] Allow __ibm128 on older PowerPC systems.
| Message-ID: <20210518203632.ga15...@ibm-toto.the-meissners.org>
| https://gcc.gnu.org/pipermail/gcc-patches/2021-May/570679.html

-- 
Michael Meissner, IBM
IBM, M/S 2506R, 550 King Street, Littleton, MA 01460-6245, USA
email: meiss...@linux.ibm.com, phone: +1 (978) 899-4797


Ping #2: [PATCH] Fix long double tests when default long double is not IBM.

2021-06-01 Thread Michael Meissner via Gcc-patches
Ping patch again.

| Date: Tue, 18 May 2021 16:32:33 -0400
| Subject: [PATCH] Fix long double tests when default long double is not IBM.
| Message-ID: <20210518203233.ga15...@ibm-toto.the-meissners.org>
| https://gcc.gnu.org/pipermail/gcc-patches/2021-May/570678.html

-- 
Michael Meissner, IBM
IBM, M/S 2506R, 550 King Street, Littleton, MA 01460-6245, USA
email: meiss...@linux.ibm.com, phone: +1 (978) 899-4797


Ping #2: [PATCH 2/2] Add IEEE 128-bit fp conditional move on PowerPC.

2021-06-01 Thread Michael Meissner via Gcc-patches
Ping patch again.

Original patch (Add IEEE 128-bit fp conditional move on PowerPC):

| Date: Tue, 18 May 2021 16:28:27 -0400
| Subject: [PATCH 2/2] Add IEEE 128-bit fp conditional move on PowerPC.
| Message-ID: <20210518202827.gb14...@ibm-toto.the-meissners.org>

-- 
Michael Meissner, IBM
IBM, M/S 2506R, 550 King Street, Littleton, MA 01460-6245, USA
email: meiss...@linux.ibm.com, phone: +1 (978) 899-4797


Ping #2: [PATCH 1/2] Add IEEE 128-bit min/max support on PowerPC.

2021-06-01 Thread Michael Meissner via Gcc-patches
Ping again.

Original patch (Add IEEE 128-bit min/max support on PowerPC):

| Date: Tue, 18 May 2021 16:26:06 -0400
| Subject: [PATCH 1/2] Add IEEE 128-bit min/max support on PowerPC.
| Message-ID: <20210518202606.ga14...@ibm-toto.the-meissners.org>
| https://gcc.gnu.org/pipermail/gcc-patches/2021-May/570675.html

-- 
Michael Meissner, IBM
IBM, M/S 2506R, 550 King Street, Littleton, MA 01460-6245, USA
email: meiss...@linux.ibm.com, phone: +1 (978) 899-4797


Re: [PATCH v2] Add vec_const_duplicate optab and TARGET_GEN_MEMSET_SCRATCH_RTX

2021-06-01 Thread H.J. Lu via Gcc-patches
On Tue, Jun 1, 2021 at 7:21 AM Jeff Law  wrote:
>
>
>
> On 6/1/2021 7:29 AM, H.J. Lu via Gcc-patches wrote:
> > On Tue, Jun 1, 2021 at 6:25 AM Richard Biener
> >  wrote:
> >> On Tue, Jun 1, 2021 at 3:05 PM H.J. Lu  wrote:
> >>> On Mon, May 31, 2021 at 11:54:53PM -0600, Jeff Law wrote:
> 
>  On 5/31/2021 11:50 PM, Richard Sandiford wrote:
> > "H.J. Lu via Gcc-patches"  writes:
> >> On Mon, May 31, 2021 at 06:32:04AM -0700, H.J. Lu wrote:
> >>> On Mon, May 31, 2021 at 6:26 AM Richard Biener
> >>>  wrote:
>  On Mon, May 31, 2021 at 3:12 PM H.J. Lu  wrote:
> > On Mon, May 31, 2021 at 5:46 AM Richard Biener
> >  wrote:
> >> On Mon, May 31, 2021 at 2:09 PM H.J. Lu  
> >> wrote:
> >>> On Wed, May 26, 2021 at 10:28:16AM +0200, Richard Biener wrote:
> >>>-- Target Hook: rtx TARGET_GEN_MEMSET_VALUE (rtx DATA, 
> >>> scalar_int_mode
> >>> MODE)
> >>>This function returns the RTL of a register containing
> >>>'GET_MODE_SIZE (MODE)' consecutive copies of the 
> >>> unsigned char
> >>>value given in the RTL register DATA.  For example, if 
> >>> MODE is 4
> >>>bytes wide, return the RTL for 0x01010101*DATA.
> >> For this one I wonder if it should be an optab instead.  
> >> Couldn't you
> >> use the existing vec_duplicate for this by using (paradoxical) 
> >> subregs
> >> like (subreg:TI (vec_duplicate:VnQI (subreg:VnQI (reg:QI 
> >> ...)))?
> > I tried.   It doesn't even work on x86.  See:
> >
> > https://gcc.gnu.org/pipermail/gcc-patches/2021-May/570661.html
>  Not sure what I should read from there...
> 
> > There are special cases to subreg HI, SI and DI modes of TI 
> > mode in
> > ix86_gen_memset_value_from_prev.   simplify_gen_subreg doesn't
> > work here.   Each backend may need its own special handling.
>  OK, I guess I'm not (RTL) qualified enough to further review 
>  these parts,
>  sorry.  Since we're doing code generation the canonical way to 
>  communicate
>  with backends should be optabs, not some set of disconnected 
>  target hooks.
>  But as said, I probably don't know enough of RTL to see why it's 
>  the only way.
> 
>  Richard.
> >>> Here is the patch to add optabs instead.  Does it look OK?
> >>>
> >>> Thanks.
> >>>
> >>> H.J.
> >>> ---
> >>> Add 2 optabs:
> >>>
> >>> 1. integer_extract: Extract lower bit value from the integer 
> >>> value in
> >>> TImode, OImode or XImode.
> >> That sounds very specific, esp. the restriction to {TI,OI,XI}mode.
> >> It also sounds like it matches (subreg:{TI,OI,XI} (...) 0).  There 
> >> are
> >> existing target hooks verifying subreg validity - why's that not a 
> >> good
> >> fit here?  ISTR you say gen_lowpart () doesn't work (or was it
> >> simplify_gen_subreg?), why's that so?
> > {TI,OI,XI}mode are storage only integer types.   subreg doesn't work
> > well on them.  I got
> >
> > [hjl@gnu-cfl-2 pieces]$ cat s2.i
> > extern void *ops;
> >
> > void
> > foo (int c)
> > {
> > __builtin_memset (ops, c, 34);
> > }
> > [hjl@gnu-cfl-2 pieces]$ make s2.s
> > /export/build/gnu/tools-build/gcc-gitlab-debug/build-x86_64-linux/gcc/xgcc
> > -B/export/build/gnu/tools-build/gcc-gitlab-debug/build-x86_64-linux/gcc/
> > -O2 -march=haswell -S s2.i
> > during RTL pass: reload
> > s2.i: In function ‘foo’:
> > s2.i:7:1: internal compiler error: maximum number of generated 
> > reload
> > insns per insn achieved (90)
> >   7 | }
> > | ^
> > 0x1050734 lra_constraints(bool)
> > /export/gnu/import/git/gitlab/x86-gcc/gcc/lra-constraints.c:5091
> > 0x1039536 lra(_IO_FILE*)
> > /export/gnu/import/git/gitlab/x86-gcc/gcc/lra.c:2336
> > 0xfe1140 do_reload
> > /export/gnu/import/git/gitlab/x86-gcc/gcc/ira.c:5822
> > 0xfe162e execute
> > /export/gnu/import/git/gitlab/x86-gcc/gcc/ira.c:6008
> > Please submit a full bug report,
> > with preprocessed source if appropriate.
> > Please include the complete backtrace with any bug report.
> > See  for instructions.
> > make: *** [Makefile:32: s2.s] Error 1
> > [hjl@gnu-cfl-2 pieces]$
> >
> > due to
> >
> > (insn 12 11 0 (set 

Re: [RFC][patch for gcc12][version 1] add -ftrivial-auto-var-init and variable attribute "uninitialized" to gcc

2021-06-01 Thread Kees Cook via Gcc-patches
On Tue, Jun 01, 2021 at 04:35:53PM -0400, David Malcolm wrote:
> [...]
> Did this patch get reviewed/approved?

It's still under review, but I think it's close.

> Is the latest version still this one:
>   https://gcc.gnu.org/pipermail/gcc-patches/2021-February/565581.html
> or is there a more recent version that should be reviewed?

Yup, here's the latest (v3):
https://gcc.gnu.org/pipermail/gcc-patches/2021-May/570208.html

> (I don't think I'm qualified to approve the patch, I'm just a fan of
> the approach.  FWIW I've been experimenting with extending -fanalyzer
> to detect infoleaks in the kernel, whereas AIUI this patch is about
> mitigating them)

Thanks for your interest! If you patch your GCC with this, it should
Just Work in the kernel (i.e. you can set CONFIG_INIT_STACK_ALL_ZERO=y)

> Hope this is constructive

Yup! Please report back any testing; that'll help show people are
interested in the feature. :)

-- 
Kees Cook


[wwwdocs] lists: Fix thinko

2021-06-01 Thread Segher Boessenkool
Brown paper bag time.  The List-Id: should look like a hostname, not
like an email address.  Somehow I put in an at-sign when changing my
gcc-patches example to the match-all example we have here.

Note that the "." in the procmail match are RE wildcards btw.  This is
common practice in procmailrcs, although not necessarily a good idea :-)

Thanks to Andreas Schwab for noticing.  Committed.


Segher
---
 htdocs/lists.html | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/htdocs/lists.html b/htdocs/lists.html
index 4ac5d5a84cd1..e9636198b732 100644
--- a/htdocs/lists.html
+++ b/htdocs/lists.html
@@ -226,7 +226,7 @@ our lists into a single folder named INLIST.gcc:
 
 
 :0
-* ^List-Id: .*<.*@gcc.gnu.org>$
+* ^List-Id: .*<.*.gcc.gnu.org>$
 INLIST.gcc
 
 
@@ -235,7 +235,7 @@ can use the following recipe (Use at your own risk!):
 
 
 :0 Wh: msgid.lock
-* ^List-Id: .*<.*@gcc.gnu.org>$
+* ^List-Id: .*<.*.gcc.gnu.org>$
 | formail -D 8192 msgid.cache
 
 
-- 
1.8.3.1



Re: [PATCH] define auto_vec copy ctor and assignment (PR 90904)

2021-06-01 Thread Jason Merrill via Gcc-patches

On 6/1/21 3:56 PM, Martin Sebor wrote:

On 5/27/21 2:53 PM, Jason Merrill wrote:

On 4/27/21 11:52 AM, Martin Sebor via Gcc-patches wrote:

On 4/27/21 8:04 AM, Richard Biener wrote:

On Tue, Apr 27, 2021 at 3:59 PM Martin Sebor  wrote:


On 4/27/21 1:58 AM, Richard Biener wrote:

On Tue, Apr 27, 2021 at 2:46 AM Martin Sebor via Gcc-patches
 wrote:


PR 90904 notes that auto_vec is unsafe to copy and assign because
the class manages its own memory but doesn't define (or delete)
either special function.  Since I first ran into the problem,
auto_vec has grown a move ctor and move assignment from
a dynamically-allocated vec but still no copy ctor or copy
assignment operator.

The attached patch adds the two special functions to auto_vec along
with a few simple tests.  It makes auto_vec safe to use in 
containers
that expect copyable and assignable element types and passes 
bootstrap

and regression testing on x86_64-linux.


The question is whether we want such uses to appear since those
can be quite inefficient?  Thus the option is to delete those 
operators?


I would strongly prefer the generic vector class to have the 
properties

expected of any other generic container: copyable and assignable.  If
we also want another vector type with this restriction I suggest to 
add
another "noncopyable" type and make that property explicit in its 
name.

I can submit one in a followup patch if you think we need one.


I'm not sure (and not strictly against the copy and assign).  
Looking around

I see that vec<> does not do deep copying.  Making auto_vec<> do it
might be surprising (I added the move capability to match how vec<>
is used - as "reference" to a vector)


The vec base classes are special: they have no ctors at all (because
of their use in unions).  That's something we might have to live with
but it's not a model to follow in ordinary containers.


I don't think we have to live with it anymore, now that we're writing 
C++11.



The auto_vec class was introduced to fill the need for a conventional
sequence container with a ctor and dtor.  The missing copy ctor and
assignment operators were an oversight, not a deliberate feature.
This change fixes that oversight.

The revised patch also adds a copy ctor/assignment to the auto_vec
primary template (that's also missing it).  In addition, it adds
a new class called auto_vec_ncopy that disables copying and
assignment as you prefer.


Hmm, adding another class doesn't really help with the confusion richi 
mentions.  And many uses of auto_vec will pass them as vec, which will 
still do a shallow copy.  I think it's probably better to disable the 
copy special members for auto_vec until we fix vec<>.


There are at least a couple of problems that get in the way of fixing
all of vec to act like a well-behaved C++ container:

1) The embedded vec has a trailing "flexible" array member with its
instances having different size.  They're initialized by memset and
copied by memcpy.  The class can't have copy ctors or assignments
but it should disable/delete them instead.

2) The heap-based vec is used throughout GCC with the assumption of
shallow copy semantics (not just as function arguments but also as
members of other such POD classes).  This can be changed by providing
copy and move ctors and assignment operators for it, and also for
some of the classes in which it's a member and that are used with
the same assumption.

3) The heap-based vec::block_remove() assumes its elements are PODs.
That breaks in VEC_ORDERED_REMOVE_IF (used in gcc/dwarf2cfi.c:2862
and tree-vect-patterns.c).

I took a stab at both and while (1) is easy, (2) is shaping up to
be a big and tricky project.  Tricky because it involves using
std::move in places where what's moved is subsequently still used.
I can keep plugging away at it but it won't change the fact that
the embedded and heap-based vecs have different requirements.

It doesn't seem to me that having a safely copyable auto_vec needs
to be put on hold until the rats nest above is untangled.  It won't
make anything worse than it is.  (I have a project that depends on
a sane auto_vec working).

A couple of alternatives to solving this are to use std::vector or
write an equivalent vector class just for GCC.


It occurs to me that another way to work around the issue of passing an 
auto_vec by value as a vec, and thus doing a shallow copy, would be to 
add a vec ctor taking an auto_vec, and delete that.  This would mean if 
you want to pass an auto_vec to a vec interface, it needs to be by 
reference.  We might as well do the same for operator=, though that 
isn't as important.


Jason



Re: [PATCH, rs6000] Fix alias set of link reg save MEM

2021-06-01 Thread Segher Boessenkool
On Tue, Jun 01, 2021 at 03:34:50PM -0500, Pat Haugen wrote:
> Make sure link reg save MEM has frame alias set, to match other link reg
> save/restore code.

Okay for trunk and any backports (please do at least GCC 11).  Thanks!


Segher


Re: [PATCH] Add gnu::diagnose_as attribute

2021-06-01 Thread Matthias Kretz
On Tuesday, 1 June 2021 21:12:18 CEST Jason Merrill wrote:
> On 5/28/21 3:42 AM, Matthias Kretz wrote:
> > On Friday, 28 May 2021 05:05:52 CEST Jason Merrill wrote:
> >> I'd think you could get the same effect from a hypothetical
> >> 
> >> namespace [[gnu::diagnose_as]] stdx = std::experimental;
> >> 
> >> though we'll need to add support for attributes on namespace aliases to
> >> the grammar.
> > 
> > Right, but then two of my design goals can't be met:
> > 
> > 1. Diagnostics have an improved signal-to-noise ratio out of the box.
> > 
> > 2. We can use replacement names that are not valid identifiers.
> 
> This is the basic disconnect: I think that these goals are
> contradictory, and that replacement names that are not valid identifiers
> will just confuse users that don't know about them.

With signal-to-noise ratio I meant the ratio (averaged over all GCC users - so 
yes, we can't give actual numbers for these):

  #characters one needs to read to understand / #total diagnostic characters.

Or more specifically

  1 - #characters that are distracting from understanding the issue / #total 
diagnostic characters.

Consider that for the stdx::simd case I regularly hit the problem that vim's 
QuickFix truncates at 4095 characters and the message basically just got 
started (i.e. it's sometimes impossible to use vim's QuickFix to understand 
errors involving stdx::simd). There's *a lot* of noise that must be removed 
*per default*.

WRT "invalid identifiers", there are two types:
(1) string of characters that is not a valid C++ identifier
(2) valid C++ identifier, but not defined for the given TU

(2) can be confusing, I agree, but doesn't have to be. (1) provides a stronger 
hint that something is either abbreviated or intentionally hidden from the 
user.

If I write `std::experimental::simd` in my code and get a diagnostic 
that says 'stdₓ::simd' then it's relatively easy to 
make the connection what happened here: 'stdₓ' clearly must mean something 
else than a literal 'stdₓ' in my code. The user knows there's no `std::simd' 
so it must be `std::experimental::simd`. (Note that once 
std::experimental::simd goes into the IS, I would be the first to propose a 
change for 'stdₓ::simd' back to 'std::experimental::simd'.)

> If a user sees stdx::foo in a diagnostic and then tries to refer to
> stdx::foo and gets an error, the diagnostic is not more helpful than one
> that uses the fully qualified name.

Hmm, if GCC prints an actual suggestion like "write 'stdₓ::foo' here" then 
yes, I agree. That should not make use of diagnose_as.

> Jonathan, David, any thoughts on this issue?
>
> > I can imagine using it to make _Internal __names more readable while at
> > the
> > same time discouraging users to utter them in their own code. Sorry for
> > the
> > bad code obfuscation example above.
> > 
> > An example for consideration from stdx::simd:
> >namespace std {
> >namespace experimental {
> >namespace parallelism_v2 [[gnu::diagnose_as("stdx")]] {
> >namespace simd_abi [[gnu::diagnose_as("simd_abi")]] {
> >
> >  template 
> >  
> >struct _VecBuiltin;
> >  
> >  template 
> >  
> >struct _VecBltnBtmsk;
> >
> >#if x86
> >
> >  using __ignore_me_0 [[gnu::diagnose_as("[SSE]")]] = _VecBuiltin<16>;
> >  using __ignore_me_1 [[gnu::diagnose_as("[AVX]")]] = _VecBuiltin<32>;
> >  using __ignore_me_2 [[gnu::diagnose_as("[AVX512]")]] =
> >  _VecBltnBtmsk<64>;
> >
> >#endif
> >
> > 
> > Then diagnostics would print 'stdx::simd'
> > instead of 'stdx::simd>'. (Users utter
> > the type by saying e.g. 'stdx::native_simd', while compiling with
> > AVX512 flags.)
>
> Wouldn't it be better to print stdx::native_simd if that's how
> the users write the type?

No. For example, I might expect that native_simd maps to AVX-512 vectors but 
forgot the relevant -m flag(s). If the diagnostics show 'simd' I have a good chance of catching that issue.
And the other way around: If I wrote `stdx::simd` and it happens to be 
the same type as the native_simd typedef, it would show the latter in 
diagnostics. Similar issue with asking for a simd ABI with 
`simd_abi::deduce_t`: I typically don't want to know whether that's 
also native_simd but rather what exact simd_abi I got. And no, as a 
user I don't typically care about the libstdc++ implementation details but 
what those details mean.

-- 
──
 Dr. Matthias Kretz   https://mattkretz.github.io
 GSI Helmholtz Centre for Heavy Ion Research   https://gsi.de
 std::experimental::simd  https://github.com/VcDevel/std-simd
──





Re: [RFC][patch for gcc12][version 1] add -ftrivial-auto-var-init and variable attribute "uninitialized" to gcc

2021-06-01 Thread David Malcolm via Gcc-patches
On Mon, 2021-03-15 at 12:14 -0500, Qing Zhao via Gcc-patches wrote:
> (CC’ing gcc-patch alias).
> 
> Hi, Kees,
> 
> 
> > On Mar 12, 2021, at 3:55 PM, Kees Cook  wrote:
> > 
> > On Fri, Mar 12, 2021 at 03:35:28PM -0600, Qing Zhao wrote:
> > > Hi, Kees,
> > > 
> > > I am looking at the structure padding initialization issue. And
> > > also have some questions:
> > > 
> > > 
> > > > On Feb 24, 2021, at 10:41 PM, Kees Cook 
> > > > wrote:
> > > > 
> > > > It looks like there is still some issues with padding and pre-
> > > > case
> > > > switch variables. Here's the test output, FWIW:
> > > > 
> > > > 
> > > > test_stackinit: small_hole_static_all FAIL (uninit bytes: 3)
> > > > test_stackinit: big_hole_static_all FAIL (uninit bytes: 61)
> > > > test_stackinit: trailing_hole_static_all FAIL (uninit bytes: 7)
> > > > test_stackinit: small_hole_dynamic_all FAIL (uninit bytes: 3)
> > > > test_stackinit: big_hole_dynamic_all FAIL (uninit bytes: 61)
> > > > test_stackinit: trailing_hole_dynamic_all FAIL (uninit bytes: 7)
> > > > 
> > > > test_stackinit: switch_1_none FAIL (uninit bytes: 8)
> > > > test_stackinit: switch_2_none FAIL (uninit bytes: 8)
> > > > test_stackinit: failures: 8
> > > > 
> > > > 
> > > > /* Simple structure with padding likely to be covered by
> > > > compiler. */
> > > > struct test_small_hole {
> > > > size_t one;
> > > > char two;
> > > > /* 3 byte padding hole here. */
> > > > int three;
> > > > unsigned long four;
> > > > };
> > > > 
> > > > /* Try to trigger unhandled padding in a structure. */
> > > > struct test_aligned {
> > > > u32 internal1;
> > > > u64 internal2;
> > > > } __aligned(64);
> > > > 
> > > > struct test_big_hole {
> > > > u8 one;
> > > > u8 two;
> > > > u8 three;
> > > > /* 61 byte padding hole here. */
> > > > struct test_aligned four;
> > > > } __aligned(64);
> > > > 
> > > > struct test_trailing_hole {
> > > > char *one;
> > > > char *two;
> > > > char *three;
> > > > char four;
> > > > /* "sizeof(unsigned long) - 1" byte padding hole here. */
> > > > };
> > > > 
> > > > They fail when they're statically initialized (either fully or
> > > > partially),
> > > 
> > > So, when the structure is not statically initialized,  the compiler
> > > initialization is good?
> > > 
> > > For the failing cases, what’s the behavior of the LLVM -ftrivial-
> > > auto-var-init?
> > > 
> > > From the LLVM patch: 
> > > (https://reviews.llvm.org/D54604 )
> > > 
> > > 
> > > To keep the patch simple, only some undef is removed for now, see
> > > replaceUndef. The padding-related infoleaks are therefore not all
> > > gone yet.
> > > This will be addressed in a follow-up, mainly because addressing
> > > padding-related
> > > leaks should be a stand-alone option which is implied by variable
> > > initialization.
> > > 
> > 
> > Right, padding init happened in:
> > https://github.com/llvm/llvm-project/commit/4f7bc0eee7e6099b1abd57dac3c83529944ab23c
> > 
> > And was further clarified that, IIUC, padding _must be zero_
> > regardless
> > of pattern-vs-zero in:
> > https://github.com/llvm/llvm-project/commit/d39fbc7e20d84364e409ce59724ce20625637062
> 
> Thanks a lot for the above information, they are very useful.
> I will take a look at the LLVM patch and try to implement this feature
> into GCC as well.
> 
> > 
> > > Yes, in GCC’s implementation, I think that  fixing all padding-
> > > related leaks also require a
> > > separate patch.
> > 
> > That's fine -- but it'll need to be tied to -ftrivial-auto-var-init,
> > since otherwise the memory isn't actually fully initialized. :)
> 
> Okay, will do that.
> 
> Thanks again.
> 
> Qing
> 

Did this patch get reviewed/approved?

Is the latest version still this one:
  https://gcc.gnu.org/pipermail/gcc-patches/2021-February/565581.html
or is there a more recent version that should be reviewed?

(I don't think I'm qualified to approve the patch, I'm just a fan of
the approach.  FWIW I've been experimenting with extending -fanalyzer
to detect infoleaks in the kernel, whereas AIUI this patch is about
mitigating them)

Hope this is constructive
Dave



[PATCH, rs6000] Fix alias set of link reg save MEM

2021-06-01 Thread Pat Haugen via Gcc-patches
Make sure link reg save MEM has frame alias set, to match other link reg
save/restore code.

Bootstrap/regtest on powerpc64/powerpc64le with no new regressions. Ok for
trunk?

-Pat


2021-06-01  Pat Haugen  

gcc/ChangeLog:

* config/rs6000/rs6000-logue.c (rs6000_emit_prologue): Use
gen_frame_store.



diff --git a/gcc/config/rs6000/rs6000-logue.c b/gcc/config/rs6000/rs6000-logue.c
index 13c00e740d6..07337c4836a 100644
--- a/gcc/config/rs6000/rs6000-logue.c
+++ b/gcc/config/rs6000/rs6000-logue.c
@@ -3257,7 +3257,7 @@ rs6000_emit_prologue (void)
   if (!WORLD_SAVE_P (info) && info->lr_save_p
   && !cfun->machine->lr_is_wrapped_separately)
 {
-  rtx addr, reg, mem;
+  rtx reg;
 
   reg = gen_rtx_REG (Pmode, 0);
   START_USE (0);
@@ -3267,13 +3267,8 @@ rs6000_emit_prologue (void)
   if (!(strategy & (SAVE_NOINLINE_GPRS_SAVES_LR
| SAVE_NOINLINE_FPRS_SAVES_LR)))
{
- addr = gen_rtx_PLUS (Pmode, frame_reg_rtx,
-  GEN_INT (info->lr_save_offset + frame_off));
- mem = gen_rtx_MEM (Pmode, addr);
- /* This should not be of rs6000_sr_alias_set, because of
-__builtin_return_address.  */
-
- insn = emit_move_insn (mem, reg);
+ insn = emit_insn (gen_frame_store (reg, frame_reg_rtx,
+info->lr_save_offset + frame_off));
  rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
NULL_RTX, NULL_RTX);
  END_USE (0);


Re: [PATCH] Hashtable PR96088

2021-06-01 Thread François Dumont via Gcc-patches

On 01/06/21 8:10 pm, Jonathan Wakely wrote:

On 01/06/21 18:47 +0100, Jonathan Wakely wrote:

On 01/06/21 18:45 +0100, Jonathan Wakely wrote:

On 22/05/21 18:35 +0200, François Dumont wrote:
diff --git 
a/libstdc++-v3/testsuite/23_containers/unordered_set/96088.cc 
b/libstdc++-v3/testsuite/23_containers/unordered_set/96088.cc

new file mode 100644
index 000..53bb754dab6
--- /dev/null
+++ b/libstdc++-v3/testsuite/23_containers/unordered_set/96088.cc
@@ -0,0 +1,271 @@
+// { dg-do run { target c++11 } }
+
+// Copyright (C) 2021 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is 
free

+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public 
License along

+// with this library; see the file COPYING3.  If not see
+// .
+
+// libstdc++/96088
+
+#include 


This is a c++11 test, but it uses .

The test fails for make check 
RUNTESTFLAGS=--target_board=unix/-std=gnu++11


I assume it should use { target c++17 } instead?


Same for 23_containers/unordered_map/96088.cc


I've pushed this fix.

Tested x86_64-linux.


I wonder why the same in unordered_multimap/unordered_multiset are not 
wrong too.


Thanks



Re: [PATCH] define auto_vec copy ctor and assignment (PR 90904)

2021-06-01 Thread Martin Sebor via Gcc-patches

On 5/27/21 2:53 PM, Jason Merrill wrote:

On 4/27/21 11:52 AM, Martin Sebor via Gcc-patches wrote:

On 4/27/21 8:04 AM, Richard Biener wrote:

On Tue, Apr 27, 2021 at 3:59 PM Martin Sebor  wrote:


On 4/27/21 1:58 AM, Richard Biener wrote:

On Tue, Apr 27, 2021 at 2:46 AM Martin Sebor via Gcc-patches
 wrote:


PR 90904 notes that auto_vec is unsafe to copy and assign because
the class manages its own memory but doesn't define (or delete)
either special function.  Since I first ran into the problem,
auto_vec has grown a move ctor and move assignment from
a dynamically-allocated vec but still no copy ctor or copy
assignment operator.

The attached patch adds the two special functions to auto_vec along
with a few simple tests.  It makes auto_vec safe to use in containers
that expect copyable and assignable element types and passes 
bootstrap

and regression testing on x86_64-linux.


The question is whether we want such uses to appear since those
can be quite inefficient?  Thus the option is to delete those 
operators?


I would strongly prefer the generic vector class to have the properties
expected of any other generic container: copyable and assignable.  If
we also want another vector type with this restriction I suggest to add
another "noncopyable" type and make that property explicit in its name.
I can submit one in a followup patch if you think we need one.


I'm not sure (and not strictly against the copy and assign).  Looking 
around

I see that vec<> does not do deep copying.  Making auto_vec<> do it
might be surprising (I added the move capability to match how vec<>
is used - as "reference" to a vector)


The vec base classes are special: they have no ctors at all (because
of their use in unions).  That's something we might have to live with
but it's not a model to follow in ordinary containers.


I don't think we have to live with it anymore, now that we're writing 
C++11.



The auto_vec class was introduced to fill the need for a conventional
sequence container with a ctor and dtor.  The missing copy ctor and
assignment operators were an oversight, not a deliberate feature.
This change fixes that oversight.

The revised patch also adds a copy ctor/assignment to the auto_vec
primary template (that's also missing it).  In addition, it adds
a new class called auto_vec_ncopy that disables copying and
assignment as you prefer.


Hmm, adding another class doesn't really help with the confusion richi 
mentions.  And many uses of auto_vec will pass them as vec, which will 
still do a shallow copy.  I think it's probably better to disable the 
copy special members for auto_vec until we fix vec<>.


There are at least a couple of problems that get in the way of fixing
all of vec to act like a well-behaved C++ container:

1) The embedded vec has a trailing "flexible" array member with its
instances having different size.  They're initialized by memset and
copied by memcpy.  The class can't have copy ctors or assignments
but it should disable/delete them instead.

2) The heap-based vec is used throughout GCC with the assumption of
shallow copy semantics (not just as function arguments but also as
members of other such POD classes).  This can be changed by providing
copy and move ctors and assignment operators for it, and also for
some of the classes in which it's a member and that are used with
the same assumption.

3) The heap-based vec::block_remove() assumes its elements are PODs.
That breaks in VEC_ORDERED_REMOVE_IF (used in gcc/dwarf2cfi.c:2862
and tree-vect-patterns.c).

I took a stab at both and while (1) is easy, (2) is shaping up to
be a big and tricky project.  Tricky because it involves using
std::move in places where what's moved is subsequently still used.
I can keep plugging away at it but it won't change the fact that
the embedded and heap-based vecs have different requirements.

It doesn't seem to me that having a safely copyable auto_vec needs
to be put on hold until the rats nest above is untangled.  It won't
make anything worse than it is.  (I have a project that depends on
a sane auto_vec working).

A couple of alternatives to solving this are to use std::vector or
write an equivalent vector class just for GCC.

Martin



Jason





[PATCH] ARC: gcc driver default to hs38_linux

2021-06-01 Thread Vineet Gupta via Gcc-patches
arc700 is legacy and there's no active development for it, so switch to
latest hs38_linux as default

Signed-off-by: Vineet Gupta 
---
 gcc/config/arc/arc.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/config/arc/arc.h b/gcc/config/arc/arc.h
index bd1fe0abd7af..252241a858c9 100644
--- a/gcc/config/arc/arc.h
+++ b/gcc/config/arc/arc.h
@@ -34,7 +34,7 @@ along with GCC; see the file COPYING3.  If not see
 #define SYMBOL_FLAG_CMEM   (SYMBOL_FLAG_MACH_DEP << 3)
 
 #ifndef TARGET_CPU_DEFAULT
-#define TARGET_CPU_DEFAULT PROCESSOR_arc700
+#define TARGET_CPU_DEFAULT PROCESSOR_hs38_linux
 #endif
 
 /* Check if this symbol has a long_call attribute in its declaration */
-- 
2.25.1



[PATCH] rtl: constm64_rtx..const64_rtx

2021-06-01 Thread Segher Boessenkool
Since times immemorial there has been const_int_rtx for all values from
-64 to 64, but only constm1_rtx..const2_rtx have been available for
convenient use.  Change this, so that we can use all values in
{-64,...,64} in RTL easily.  This matters, because then we we just say
  if (XEXP (x, 1) == const16_rtx)
and things like that, since all const_int in that range are unique.  We
already do for -1, 0, 1, 2, but we could for everything.

2021-06-01  Segher Boessenkool  
* rtl.h (constm64_rtx, ..., constm2_rtx): New.
(const3_rtx, ..., const64_rtx): New.

doc/
* rtl.texi (Constants): Document the new names.
---
Tested on powerpc64-linux {-m32,-m64}, but this of course doesn't mean
all that much until any of the new names are actually used.

Is this okay for trunk?


Segher


 gcc/doc/rtl.texi |  20 +
 gcc/rtl.h| 127 ++-
 2 files changed, 137 insertions(+), 10 deletions(-)

diff --git a/gcc/doc/rtl.texi b/gcc/doc/rtl.texi
index 5af71137a878..5dbfb6028095 100644
--- a/gcc/doc/rtl.texi
+++ b/gcc/doc/rtl.texi
@@ -1658,19 +1658,21 @@ copies of the top bit.  Note however that values are 
neither
 inherently signed nor inherently unsigned; where necessary, signedness
 is determined by the rtl operation instead.
 
+@findex constm64_rtx
+@findex constm1_rtx
 @findex const0_rtx
 @findex const1_rtx
 @findex const2_rtx
-@findex constm1_rtx
-There is only one expression object for the integer value zero; it is
+@findex const64_rtx
+There is only one expression object for the integer value zero: it is
 the value of the variable @code{const0_rtx}.  Likewise, the only
-expression for integer value one is found in @code{const1_rtx}, the only
-expression for integer value two is found in @code{const2_rtx}, and the
-only expression for integer value negative one is found in
-@code{constm1_rtx}.  Any attempt to create an expression of code
-@code{const_int} and value zero, one, two or negative one will return
-@code{const0_rtx}, @code{const1_rtx}, @code{const2_rtx} or
-@code{constm1_rtx} as appropriate.
+expression for integer value one is found in @code{const1_rtx}, and more
+generally, the only expression for integer value @var{N} is found in
+@code{const@var{N}_rtx}, and the only expression for integer value negative
+@var{N} is found in @code{constm@var{N}_rtx}, both for any @var{N} from 1 up
+to 64 inclusive.  Any attempt to create an expression of code
+@code{const_int} and value in that range will return @code{const0_rtx},
+@code{const1_rtx}, and so on, as appropriate.
 
 @findex const_true_rtx
 Similarly, there is only one object for the integer whose value is
diff --git a/gcc/rtl.h b/gcc/rtl.h
index 35178b5bfaca..5429b7a3f4ac 100644
--- a/gcc/rtl.h
+++ b/gcc/rtl.h
@@ -3768,10 +3768,135 @@ extern unsigned int split_all_insns_noflow (void);
 #define MAX_SAVED_CONST_INT 64
 extern GTY(()) rtx const_int_rtx[MAX_SAVED_CONST_INT * 2 + 1];
 
+#define constm64_rtx   (const_int_rtx[MAX_SAVED_CONST_INT-64])
+#define constm63_rtx   (const_int_rtx[MAX_SAVED_CONST_INT-63])
+#define constm62_rtx   (const_int_rtx[MAX_SAVED_CONST_INT-62])
+#define constm61_rtx   (const_int_rtx[MAX_SAVED_CONST_INT-61])
+#define constm60_rtx   (const_int_rtx[MAX_SAVED_CONST_INT-60])
+#define constm59_rtx   (const_int_rtx[MAX_SAVED_CONST_INT-59])
+#define constm58_rtx   (const_int_rtx[MAX_SAVED_CONST_INT-58])
+#define constm57_rtx   (const_int_rtx[MAX_SAVED_CONST_INT-57])
+#define constm56_rtx   (const_int_rtx[MAX_SAVED_CONST_INT-56])
+#define constm55_rtx   (const_int_rtx[MAX_SAVED_CONST_INT-55])
+#define constm54_rtx   (const_int_rtx[MAX_SAVED_CONST_INT-54])
+#define constm53_rtx   (const_int_rtx[MAX_SAVED_CONST_INT-53])
+#define constm52_rtx   (const_int_rtx[MAX_SAVED_CONST_INT-52])
+#define constm51_rtx   (const_int_rtx[MAX_SAVED_CONST_INT-51])
+#define constm50_rtx   (const_int_rtx[MAX_SAVED_CONST_INT-50])
+#define constm49_rtx   (const_int_rtx[MAX_SAVED_CONST_INT-49])
+#define constm48_rtx   (const_int_rtx[MAX_SAVED_CONST_INT-48])
+#define constm47_rtx   (const_int_rtx[MAX_SAVED_CONST_INT-47])
+#define constm46_rtx   (const_int_rtx[MAX_SAVED_CONST_INT-46])
+#define constm45_rtx   (const_int_rtx[MAX_SAVED_CONST_INT-45])
+#define constm44_rtx   (const_int_rtx[MAX_SAVED_CONST_INT-44])
+#define constm43_rtx   (const_int_rtx[MAX_SAVED_CONST_INT-43])
+#define constm42_rtx   (const_int_rtx[MAX_SAVED_CONST_INT-42])
+#define constm41_rtx   (const_int_rtx[MAX_SAVED_CONST_INT-41])
+#define constm40_rtx   (const_int_rtx[MAX_SAVED_CONST_INT-40])
+#define constm39_rtx   (const_int_rtx[MAX_SAVED_CONST_INT-39])
+#define constm38_rtx   (const_int_rtx[MAX_SAVED_CONST_INT-38])
+#define constm37_rtx   (const_int_rtx[MAX_SAVED_CONST_INT-37])
+#define constm36_rtx   (const_int_rtx[MAX_SAVED_CONST_INT-36])
+#define constm35_rtx   (const_int_rtx[MAX_SAVED_CONST_INT-35])
+#define constm34_rtx   (const_int_rtx[MAX_SAVED_CONST_INT-34])
+#define constm33_rtx   

Re: [PATCH] Add gnu::diagnose_as attribute

2021-06-01 Thread Jason Merrill via Gcc-patches

On 5/28/21 3:42 AM, Matthias Kretz wrote:

On Friday, 28 May 2021 05:05:52 CEST Jason Merrill wrote:

On 5/27/21 6:07 PM, Matthias Kretz wrote:

On Thursday, 27 May 2021 23:15:46 CEST Jason Merrill wrote:

On 5/27/21 2:54 PM, Matthias Kretz wrote:

namespace Vir {
 inline namespace foo {
   struct A {};
 }
 struct A {};
}
using Vir::A;

:7:12: error: reference to 'A' is ambiguous
:3:12: note: candidates are: 'struct Vir::A'
:5:10: note: 'struct Vir::A'


That doesn't seem so bad.


As long as you ignore the line numbers, it's a puzzling diagnostic.


Only briefly puzzling, I think; Vir::A is a valid way of referring to
both types.


True. But that's also what lead to the error. GCC easily clears it up
nowadays, but wouldn't anymore if inline namespaces were hidden by default.


I'd think you could get the same effect from a hypothetical

namespace [[gnu::diagnose_as]] stdx = std::experimental;

though we'll need to add support for attributes on namespace aliases to
the grammar.


Right, but then two of my design goals can't be met:

1. Diagnostics have an improved signal-to-noise ratio out of the box.

2. We can use replacement names that are not valid identifiers.


This is the basic disconnect: I think that these goals are 
contradictory, and that replacement names that are not valid identifiers 
will just confuse users that don't know about them.


If a user sees stdx::foo in a diagnostic and then tries to refer to 
stdx::foo and gets an error, the diagnostic is not more helpful than one 
that uses the fully qualified name.


Jonathan, David, any thoughts on this issue?


I don't think libstdc++ would ship with a namespace alias outside of the std
namespace. So we'd place the "burden" of using diagnose_as correctly on our
users. Also as a user you'd possibly have to repeat the namespace alias in
every source file and/or place it in your applications/libraries namespace.


Here it seems like you want to say "use this typedef as the true name of
the type".  Is it useful to have to repeat the name?  Allowing people to
use names that don't correspond to actual declarations seems unnecessary.


Yes, but you could also use it to apply diagnose_as to a template
instantiation without introducing a name for users. E.g.

using __only_to_apply_the_attribute [[gnu::diagnose_as("intvector")]]

  = std::vector;


Now all diagnostics of 'std::vector' would print 'intvector' instead.


Yes, but why would you want to?  Making diagnostics print names that the
user can't use in their own code seems obfuscatory, and requiring users
to write the same names in two places seems like extra work.


I can imagine using it to make _Internal __names more readable while at the
same time discouraging users to utter them in their own code. Sorry for the
bad code obfuscation example above.

An example for consideration from stdx::simd:

   namespace std {
   namespace experimental {
   namespace parallelism_v2 [[gnu::diagnose_as("stdx")]] {
   namespace simd_abi [[gnu::diagnose_as("simd_abi")]] {
 template 
   struct _VecBuiltin;

 template 
   struct _VecBltnBtmsk;

   #if x86
 using __ignore_me_0 [[gnu::diagnose_as("[SSE]")]] = _VecBuiltin<16>;
 using __ignore_me_1 [[gnu::diagnose_as("[AVX]")]] = _VecBuiltin<32>;
 using __ignore_me_2 [[gnu::diagnose_as("[AVX512]")]] = _VecBltnBtmsk<64>;
   #endif
   

Then diagnostics would print 'stdx::simd' instead
of 'stdx::simd>'. (Users utter the type by
saying e.g. 'stdx::native_simd', while compiling with AVX512 flags.)


Wouldn't it be better to print stdx::native_simd if that's how 
the users write the type?



But in general, I tend to agree, for type aliases there's rarely a case
where the names wouldn't match.

However, I didn't want to special-case the attribute parameters for type
aliases (or introduce another attribute just for this case). The attribute
works consistently and with the same interface independent of where it's
used. I tried to build a generic, broad feature instead of a narrow
one-problem solution.


"Treat this declaration as the name of the type/namespace it refers to
in diagnostics" also seems consistent to me.


Sure. In general, I think

   namespace foo [[gnu::this_is_the_name_I_want]] = bar;
   using foo [[gnu::this_is_the_name_I_want]] = bar;

is not a terribly bad idea on its own. But it's not the solution for the
problems I set out to solve.


Still, perhaps it would be better to store these aliases in a separate hash
table instead of *_ATTRIBUTES.


Maybe. For performance reasons or for simplification of the implementation?


I was thinking for not messing with the type after it's defined, but 
there are other things that do that (such as lazy declaration of 
implicit constructors) so I wouldn't worry about it.


Jason



Re: [PATCH 1/2] c-family: Copy DECL_USER_ALIGN even if DECL_ALIGN is similar.

2021-06-01 Thread Jason Merrill via Gcc-patches

On 6/1/21 9:20 AM, Robin Dapp wrote:

As you say, the logic is convoluted.  Let's simplify it rather than make
it more convoluted.  One possibility would be to change || to | to avoid
the shortcut, and then

bool note = lastalign > curalign;
if (note)
    curalign = lastalign;


I went with your suggestion in the attached v2.  Regtested and 
bootstrapped on s390x, x86 and ppc64le.


OK.

Jason



Re: [PATCH] c++: value init vs zero init in expand_aggr_init_1 [PR65816]

2021-06-01 Thread Jason Merrill via Gcc-patches

On 6/1/21 1:37 PM, Patrick Palka wrote:

In the case of value-initializing an object of class type T,
[dcl.init.general]/8 says:

   - if T has either no default constructor ([class.default.ctor]) or
 a default constructor that is user-provided or deleted, then the
 object is default-initialized;
   - otherwise, the object is zero-initialized and ...  if T has a
 non-trivial default constructor, the object is default-initialized;

But when determining whether to first zero-initialize the object,
expand_aggr_init_1 incorrectly looks for _any_ user provided
constructor, not just for a user-provided _default_ constructor.  This
causes us to incorrectly skip the zero-initialization step when the
class type has a trivial default constructor alongside another
user-defined constructor.

It seems the predicate type_has_non_user_provided_default_constructor
accurately captures the above rule for when to first perform a
zero-initialization during value-initialization, so this patch corrects
the check in expand_aggr_init_1 using this predicate instead.

Bootstrapped and regtested on x86_64-pc-linux-gnu, does this look OK for
trunk?


OK.


PR c++/65816

gcc/cp/ChangeLog:

* init.c (expand_aggr_init_1): Check
type_has_non_user_provided_default_constructor instead of
type_has_user_provided_constructor.

gcc/testsuite/ChangeLog:

* g++.dg/cpp0x/constexpr-delegating3.C: New test.
* g++.dg/cpp0x/dc10.C: New test.
* g++.dg/cpp0x/initlist-base4.C: New test.
* g++.dg/cpp2a/constexpr-init22.C: New test.

libstdc++-v3/ChangeLog:

* testsuite/23_containers/deque/allocator/default_init.cc,
testsuite/23_containers/forward_list/allocator/default_init.cc,
testsuite/23_containers/list/allocator/default_init.cc,
testsuite/23_containers/map/allocator/default_init.cc,
testsuite/23_containers/set/allocator/default_init.cc,
testsuite/23_containers/vector/allocator/default_init.cc,
testsuite/23_containers/vector/bool/allocator/default_init.cc:
Remove xfail.
---
  gcc/cp/init.c |  4 +--
  .../g++.dg/cpp0x/constexpr-delegating3.C  | 10 +++
  gcc/testsuite/g++.dg/cpp0x/dc10.C | 19 ++
  gcc/testsuite/g++.dg/cpp0x/initlist-base4.C   | 26 +++
  gcc/testsuite/g++.dg/cpp2a/constexpr-init22.C | 14 ++
  .../deque/allocator/default_init.cc   |  1 -
  .../forward_list/allocator/default_init.cc|  1 -
  .../list/allocator/default_init.cc|  1 -
  .../map/allocator/default_init.cc |  1 -
  .../set/allocator/default_init.cc |  1 -
  .../vector/allocator/default_init.cc  |  1 -
  .../vector/bool/allocator/default_init.cc |  1 -
  12 files changed, 71 insertions(+), 9 deletions(-)
  create mode 100644 gcc/testsuite/g++.dg/cpp0x/constexpr-delegating3.C
  create mode 100644 gcc/testsuite/g++.dg/cpp0x/dc10.C
  create mode 100644 gcc/testsuite/g++.dg/cpp0x/initlist-base4.C
  create mode 100644 gcc/testsuite/g++.dg/cpp2a/constexpr-init22.C

diff --git a/gcc/cp/init.c b/gcc/cp/init.c
index a85f4d50750..7aba5c60b32 100644
--- a/gcc/cp/init.c
+++ b/gcc/cp/init.c
@@ -2078,9 +2078,9 @@ expand_aggr_init_1 (tree binfo, tree true_exp, tree exp, 
tree init, int flags,
   that's value-initialization.  */
if (init == void_type_node)
  {
-  /* If the type has data but no user-provided ctor, we need to zero
+  /* If the type has data but no user-provided default ctor, we need to 
zero
 out the object.  */
-  if (!type_has_user_provided_constructor (type)
+  if (type_has_non_user_provided_default_constructor (type)
  && !is_really_empty_class (type, /*ignore_vptr*/true))
{
  tree field_size = NULL_TREE;
diff --git a/gcc/testsuite/g++.dg/cpp0x/constexpr-delegating3.C 
b/gcc/testsuite/g++.dg/cpp0x/constexpr-delegating3.C
new file mode 100644
index 000..2263ec89488
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp0x/constexpr-delegating3.C
@@ -0,0 +1,10 @@
+// PR c++/65816
+// { dg-do compile { target c++11 } }
+
+struct test {
+  int m;
+  test() = default;
+  constexpr test(int) : test() {}
+};
+
+static_assert(test(0).m == 0, "");
diff --git a/gcc/testsuite/g++.dg/cpp0x/dc10.C 
b/gcc/testsuite/g++.dg/cpp0x/dc10.C
new file mode 100644
index 000..c008a1703e8
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp0x/dc10.C
@@ -0,0 +1,19 @@
+// PR c++/65816
+// { dg-do run { target c++11 } }
+
+void* operator new(decltype(sizeof(int)), void* ptr) { return ptr; }
+
+struct test {
+  int i;
+  test() = default;
+  test(int) : test() {}
+};
+
+int main() {
+  alignas(test) unsigned char space[sizeof(test)];
+  for (auto& c : space) c = 0xff;
+
+  auto ptr = ::new() test(42);
+  int& i = static_cast(*ptr).i;
+  if (i != 0) __builtin_abort();
+}
diff --git a/gcc/testsuite/g++.dg/cpp0x/initlist-base4.C 

[PATCH] Improve match_simplify_replacement in phi-opt

2021-06-01 Thread apinski--- via Gcc-patches
From: Andrew Pinski 

This improves match_simplify_replace in phi-opt to handle the
case where there is one cheap preparation statement in the
middle basic block similar to xor_replacement and others.
This allows to remove xor_replacement too.

OK?  Bootstrapped and tested on x86_64-linux-gnu with no regressions.

gcc/ChangeLog:

PR tree-optimization/25290
* tree-ssa-phiopt.c (xor_replacement): Delete.
(tree_ssa_phiopt_worker): Delete use of xor_replacement.
(match_simplify_replacement): Allow one cheap preparation
statement that can be moved to before the if.

gcc/testsuite/ChangeLog:

* gcc.dg/tree-ssa/pr96928-1.c: Fix testcase for now that ~
happens on the outside of the bit_xor.
---
 gcc/testsuite/gcc.dg/tree-ssa/pr96928-1.c |   4 +-
 gcc/tree-ssa-phiopt.c | 136 +-
 2 files changed, 28 insertions(+), 112 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr96928-1.c 
b/gcc/testsuite/gcc.dg/tree-ssa/pr96928-1.c
index a2770e5e896..2e86620da11 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/pr96928-1.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr96928-1.c
@@ -1,9 +1,9 @@
 /* PR tree-optimization/96928 */
 /* { dg-do compile } */
-/* { dg-options "-O2 -fdump-tree-phiopt2" } */
+/* { dg-options "-O2 -fdump-tree-phiopt2 -fdump-tree-optimized" } */
 /* { dg-final { scan-tree-dump-times " = a_\[0-9]*\\\(D\\\) >> " 5 "phiopt2" } 
} */
 /* { dg-final { scan-tree-dump-times " = ~c_\[0-9]*\\\(D\\\);" 1 "phiopt2" } } 
*/
-/* { dg-final { scan-tree-dump-times " = ~" 1 "phiopt2" } } */
+/* { dg-final { scan-tree-dump-times " = ~" 1 "optimized" } } */
 /* { dg-final { scan-tree-dump-times " = \[abc_0-9\\\(\\\)D]* \\\^ " 5 
"phiopt2" } } */
 /* { dg-final { scan-tree-dump-not "a < 0" "phiopt2" } } */
 
diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c
index 969b868397e..4f98e505029 100644
--- a/gcc/tree-ssa-phiopt.c
+++ b/gcc/tree-ssa-phiopt.c
@@ -63,8 +63,6 @@ static bool minmax_replacement (basic_block, basic_block,
edge, edge, gphi *, tree, tree);
 static bool abs_replacement (basic_block, basic_block,
 edge, edge, gphi *, tree, tree);
-static bool xor_replacement (basic_block, basic_block,
-edge, edge, gphi *, tree, tree);
 static bool spaceship_replacement (basic_block, basic_block,
   edge, edge, gphi *, tree, tree);
 static bool cond_removal_in_popcount_clz_ctz_pattern (basic_block, basic_block,
@@ -352,9 +350,6 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool 
do_hoist_loads, bool early_p)
cfgchanged = true;
  else if (abs_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
cfgchanged = true;
- else if (!early_p
-  && xor_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
-   cfgchanged = true;
  else if (!early_p
   && cond_removal_in_popcount_clz_ctz_pattern (bb, bb1, e1,
e2, phi, arg0,
@@ -801,9 +796,23 @@ match_simplify_replacement (basic_block cond_bb, 
basic_block middle_bb,
   edge true_edge, false_edge;
   gimple_seq seq = NULL;
   tree result;
+  gimple *stmt_to_move = NULL;
 
+  /* If the basic block only has a cheap preparation statement. */
   if (!empty_block_p (middle_bb))
-return false;
+{
+  stmt_to_move = last_and_only_stmt (middle_bb);
+  if (!stmt_to_move)
+   return false;
+  if (gimple_could_trap_p (stmt_to_move)
+ || gimple_has_side_effects (stmt_to_move))
+   return false;
+  if ((!is_gimple_assign (stmt_to_move)
+  || TREE_CODE (gimple_assign_lhs (stmt_to_move)) != SSA_NAME)
+ && (!is_gimple_call (stmt_to_move)
+ || !gimple_inexpensive_call_p (as_a   (stmt_to_move
+   return false;
+}
 
   /* Special case A ? B : B as this will always simplify to B. */
   if (operand_equal_for_phi_arg_p (arg0, arg1))
@@ -844,7 +853,17 @@ match_simplify_replacement (basic_block cond_bb, 
basic_block middle_bb,
 return false;
 
   gsi = gsi_last_bb (cond_bb);
-
+  if (stmt_to_move)
+{
+  if (dump_file && (dump_flags & TDF_DETAILS))
+   {
+ fprintf (dump_file, "statement un-sinked:\n");
+ print_gimple_stmt (dump_file, stmt_to_move, 0,
+  TDF_VOPS|TDF_MEMSYMS);
+   }
+  gimple_stmt_iterator gsi1 = gsi_for_stmt (stmt_to_move);
+  gsi_move_before (, );
+}
   if (seq)
 gsi_insert_seq_before (, seq, GSI_SAME_STMT);
 
@@ -2592,109 +2611,6 @@ abs_replacement (basic_block cond_bb, basic_block 
middle_bb,
   return true;
 }
 
-/* Optimize x < 0 ? ~y : y into (x >> (prec-1)) ^ y.  */
-
-static bool
-xor_replacement (basic_block cond_bb, basic_block middle_bb,
-edge e0 ATTRIBUTE_UNUSED, edge e1,
-gphi *phi, tree arg0, tree arg1)
-{
-  if (!INTEGRAL_TYPE_P (TREE_TYPE 

Re: [PATCH] Hashtable PR96088

2021-06-01 Thread Jonathan Wakely via Gcc-patches

On 01/06/21 18:47 +0100, Jonathan Wakely wrote:

On 01/06/21 18:45 +0100, Jonathan Wakely wrote:

On 22/05/21 18:35 +0200, François Dumont wrote:

diff --git a/libstdc++-v3/testsuite/23_containers/unordered_set/96088.cc 
b/libstdc++-v3/testsuite/23_containers/unordered_set/96088.cc
new file mode 100644
index 000..53bb754dab6
--- /dev/null
+++ b/libstdc++-v3/testsuite/23_containers/unordered_set/96088.cc
@@ -0,0 +1,271 @@
+// { dg-do run { target c++11 } }
+
+// Copyright (C) 2021 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING3.  If not see
+// .
+
+// libstdc++/96088
+
+#include 


This is a c++11 test, but it uses .

The test fails for make check RUNTESTFLAGS=--target_board=unix/-std=gnu++11

I assume it should use { target c++17 } instead?


Same for 23_containers/unordered_map/96088.cc


I've pushed this fix.

Tested x86_64-linux.


commit 833d348aec154f231525ad2bf4c8a51c8d16b213
Author: Jonathan Wakely 
Date:   Tue Jun 1 19:05:03 2021

libstdc++: Fix effective target for new tests [PR 96088]

These tests use  so must not run for anything older than
C++17.

Signed-off-by: Jonathan Wakely 

libstdc++-v3/ChangeLog:

* testsuite/23_containers/unordered_map/96088.cc: Change
effective target to c++17.
* testsuite/23_containers/unordered_set/96088.cc: Likewise.

diff --git a/libstdc++-v3/testsuite/23_containers/unordered_map/96088.cc b/libstdc++-v3/testsuite/23_containers/unordered_map/96088.cc
index 062c8316a9e..e552b04f8c8 100644
--- a/libstdc++-v3/testsuite/23_containers/unordered_map/96088.cc
+++ b/libstdc++-v3/testsuite/23_containers/unordered_map/96088.cc
@@ -1,4 +1,4 @@
-// { dg-do run { target c++11 } }
+// { dg-do run { target c++17 } }
 
 // Copyright (C) 2021 Free Software Foundation, Inc.
 //
diff --git a/libstdc++-v3/testsuite/23_containers/unordered_set/96088.cc b/libstdc++-v3/testsuite/23_containers/unordered_set/96088.cc
index 53bb754dab6..efb2f9eb6b1 100644
--- a/libstdc++-v3/testsuite/23_containers/unordered_set/96088.cc
+++ b/libstdc++-v3/testsuite/23_containers/unordered_set/96088.cc
@@ -1,4 +1,4 @@
-// { dg-do run { target c++11 } }
+// { dg-do run { target c++17 } }
 
 // Copyright (C) 2021 Free Software Foundation, Inc.
 //


Re: [PATCH] PR libstdc++/89728 diagnose some missuses of [locale.convenience] functions

2021-06-01 Thread Jonathan Wakely via Gcc-patches

On 17/05/21 18:14 +0100, Jonathan Wakely wrote:

On 12/05/21 17:16 +0100, Jonathan Wakely wrote:

On 12/05/21 18:51 +0300, Antony Polukhin via Libstdc++ wrote:

ср, 12 мая 2021 г. в 18:38, Antony Polukhin :


ср, 12 мая 2021 г. в 17:44, Jonathan Wakely :


On 12/05/21 12:58 +0300, Antony Polukhin wrote:

ср, 12 мая 2021 г. в 12:18, Jonathan Wakely :
<...>

Or just leave it undefined, as libc++ seems to do according to your
comment in PR 89728:

error: implicit instantiation of undefined template 
'std::__1::ctype >'

Was your aim to have a static_assert that gives a more descriptive
error? We could leave it undefined in C++98 and have the static assert
for C++11 and up.


Leaving it undefined would be the best. It would allow SFINAE on ctype
and a compile time error is informative enough.

However, there may be users who instantiate ctype in a
shared library without ctype template specializations in
the main executable. Making the default ctype undefined would break
their compilation:

#include 
// no ctype specialization
c = std::tolower(ThierChar{42}, locale_from_shared_library()); // OK
right now in libstdc++, fails on libc++


What I meant was leaving the partial specialization undefined, not the
primary template, i.e.

--- a/libstdc++-v3/include/bits/locale_facets.h
+++ b/libstdc++-v3/include/bits/locale_facets.h
@@ -1476,6 +1476,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 };
 #endif //_GLIBCXX_USE_WCHAR_T

+  template
+class ctype >;
+
   /// class ctype_byname [22.2.1.2].
   template
 class ctype_byname : public ctype<_CharT>

This makes your test fail with errors like this:

In file included from /home/jwakely/gcc/12/include/c++/12.0.0/locale:40,
 from loc.C:1:
/home/jwakely/gcc/12/include/c++/12.0.0/bits/locale_facets.h: In instantiation of 'bool 
std::isspace(_CharT, const std::locale&) [with _CharT = 
std::__cxx11::basic_string]':
loc.C:16:15:   required from here
/home/jwakely/gcc/12/include/c++/12.0.0/bits/locale_facets.h:2600:47: error: invalid use of 
incomplete type 'const class std::ctype >'
 2600 | { return use_facet >(__loc).is(ctype_base::space, 
__c); }
  |  ~^~

But it shouldn't affect the uses of ctype.

What do you think?


Good idea. That way the compiler message points directly to the
misused function.

Patch is in attachment


Replaced {} with () in test to be C++98 compatible


Looks great, thanks.

I'll test and commit this tomorrow.


Not quite "tomorrow", but it's pushed to trunk now. Thanks again!


I've also pushed this fix for the new test, so it passes with
-std=gnu++98.

Tested x86_64-linux.


commit b514fce354b5309a9c232a3fe9347e3abde4385f
Author: Jonathan Wakely 
Date:   Tue Jun 1 19:01:37 2021

libstdc++: Fix new test for C++98 mode [PR 89728]

The isblank class is not supported until C++11.

Signed-off-by: Jonathan Wakely 

libstdc++-v3/ChangeLog:

* testsuite/22_locale/ctype/is/string/89728_neg.cc: Only test
isblank for C++11 and later.

diff --git a/libstdc++-v3/testsuite/22_locale/ctype/is/string/89728_neg.cc b/libstdc++-v3/testsuite/22_locale/ctype/is/string/89728_neg.cc
index 9f15620c9a8..89843b68494 100644
--- a/libstdc++-v3/testsuite/22_locale/ctype/is/string/89728_neg.cc
+++ b/libstdc++-v3/testsuite/22_locale/ctype/is/string/89728_neg.cc
@@ -45,7 +45,9 @@ void test01()
   std::isxdigit(make_str(), loc);	// { dg-error "required from here" }
   std::isalnum(make_str(), loc);	// { dg-error "required from here" }
   std::isgraph(make_str(), loc);	// { dg-error "required from here" }
-  std::isblank(make_str(), loc);	// { dg-error "required from here" }
+#if __cplusplus >= 201103
+  std::isblank(make_str(), loc);	// { dg-error "required from here" "" { target c++11 } }
+#endif
   std::toupper(make_str(), loc);	// { dg-error "required from here" }
   std::tolower(make_str(), loc);	// { dg-error "required from here" }
 }
@@ -66,7 +68,9 @@ void test02()
   std::isxdigit(make_str(), loc);	// { dg-error "required from here" }
   std::isalnum(make_str(), loc);	// { dg-error "required from here" }
   std::isgraph(make_str(), loc);	// { dg-error "required from here" }
-  std::isblank(make_str(), loc);	// { dg-error "required from here" }
+#if __cplusplus >= 201103
+  std::isblank(make_str(), loc);	// { dg-error "required from here" "" { target c++11 } }
+#endif
   std::toupper(make_str(), loc);	// { dg-error "required from here" }
   std::tolower(make_str(), loc);	// { dg-error "required from here" }
 }


Re: [PATCH] icf: Fix memory leak of a vector.

2021-06-01 Thread Jeff Law via Gcc-patches




On 6/1/2021 8:28 AM, Martin Liška wrote:

Simple leak fix.

Patch can bootstrap on x86_64-linux-gnu and survives regression tests.

Ready to be installed?
Thanks,
Martin

gcc/ChangeLog:

* ipa-icf.h: Use auto_vec for memory_access_types.

OK
jeff



Re: [PATCH] Hashtable PR96088

2021-06-01 Thread Jonathan Wakely via Gcc-patches

On 01/06/21 18:45 +0100, Jonathan Wakely wrote:

On 22/05/21 18:35 +0200, François Dumont wrote:

diff --git a/libstdc++-v3/testsuite/23_containers/unordered_set/96088.cc 
b/libstdc++-v3/testsuite/23_containers/unordered_set/96088.cc
new file mode 100644
index 000..53bb754dab6
--- /dev/null
+++ b/libstdc++-v3/testsuite/23_containers/unordered_set/96088.cc
@@ -0,0 +1,271 @@
+// { dg-do run { target c++11 } }
+
+// Copyright (C) 2021 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING3.  If not see
+// .
+
+// libstdc++/96088
+
+#include 


This is a c++11 test, but it uses .

The test fails for make check RUNTESTFLAGS=--target_board=unix/-std=gnu++11

I assume it should use { target c++17 } instead?


Same for 23_containers/unordered_map/96088.cc





Re: [PATCH] Hashtable PR96088

2021-06-01 Thread Jonathan Wakely via Gcc-patches

On 22/05/21 18:35 +0200, François Dumont wrote:

diff --git a/libstdc++-v3/testsuite/23_containers/unordered_set/96088.cc 
b/libstdc++-v3/testsuite/23_containers/unordered_set/96088.cc
new file mode 100644
index 000..53bb754dab6
--- /dev/null
+++ b/libstdc++-v3/testsuite/23_containers/unordered_set/96088.cc
@@ -0,0 +1,271 @@
+// { dg-do run { target c++11 } }
+
+// Copyright (C) 2021 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING3.  If not see
+// .
+
+// libstdc++/96088
+
+#include 


This is a c++11 test, but it uses .

The test fails for make check RUNTESTFLAGS=--target_board=unix/-std=gnu++11

I assume it should use { target c++17 } instead?



[PATCH] c++: value init vs zero init in expand_aggr_init_1 [PR65816]

2021-06-01 Thread Patrick Palka via Gcc-patches
In the case of value-initializing an object of class type T,
[dcl.init.general]/8 says:

  - if T has either no default constructor ([class.default.ctor]) or
a default constructor that is user-provided or deleted, then the
object is default-initialized;
  - otherwise, the object is zero-initialized and ...  if T has a
non-trivial default constructor, the object is default-initialized;

But when determining whether to first zero-initialize the object,
expand_aggr_init_1 incorrectly looks for _any_ user provided
constructor, not just for a user-provided _default_ constructor.  This
causes us to incorrectly skip the zero-initialization step when the
class type has a trivial default constructor alongside another
user-defined constructor.

It seems the predicate type_has_non_user_provided_default_constructor
accurately captures the above rule for when to first perform a
zero-initialization during value-initialization, so this patch corrects
the check in expand_aggr_init_1 using this predicate instead.

Bootstrapped and regtested on x86_64-pc-linux-gnu, does this look OK for
trunk?

PR c++/65816

gcc/cp/ChangeLog:

* init.c (expand_aggr_init_1): Check
type_has_non_user_provided_default_constructor instead of
type_has_user_provided_constructor.

gcc/testsuite/ChangeLog:

* g++.dg/cpp0x/constexpr-delegating3.C: New test.
* g++.dg/cpp0x/dc10.C: New test.
* g++.dg/cpp0x/initlist-base4.C: New test.
* g++.dg/cpp2a/constexpr-init22.C: New test.

libstdc++-v3/ChangeLog:

* testsuite/23_containers/deque/allocator/default_init.cc,
testsuite/23_containers/forward_list/allocator/default_init.cc,
testsuite/23_containers/list/allocator/default_init.cc,
testsuite/23_containers/map/allocator/default_init.cc,
testsuite/23_containers/set/allocator/default_init.cc,
testsuite/23_containers/vector/allocator/default_init.cc,
testsuite/23_containers/vector/bool/allocator/default_init.cc:
Remove xfail.
---
 gcc/cp/init.c |  4 +--
 .../g++.dg/cpp0x/constexpr-delegating3.C  | 10 +++
 gcc/testsuite/g++.dg/cpp0x/dc10.C | 19 ++
 gcc/testsuite/g++.dg/cpp0x/initlist-base4.C   | 26 +++
 gcc/testsuite/g++.dg/cpp2a/constexpr-init22.C | 14 ++
 .../deque/allocator/default_init.cc   |  1 -
 .../forward_list/allocator/default_init.cc|  1 -
 .../list/allocator/default_init.cc|  1 -
 .../map/allocator/default_init.cc |  1 -
 .../set/allocator/default_init.cc |  1 -
 .../vector/allocator/default_init.cc  |  1 -
 .../vector/bool/allocator/default_init.cc |  1 -
 12 files changed, 71 insertions(+), 9 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/cpp0x/constexpr-delegating3.C
 create mode 100644 gcc/testsuite/g++.dg/cpp0x/dc10.C
 create mode 100644 gcc/testsuite/g++.dg/cpp0x/initlist-base4.C
 create mode 100644 gcc/testsuite/g++.dg/cpp2a/constexpr-init22.C

diff --git a/gcc/cp/init.c b/gcc/cp/init.c
index a85f4d50750..7aba5c60b32 100644
--- a/gcc/cp/init.c
+++ b/gcc/cp/init.c
@@ -2078,9 +2078,9 @@ expand_aggr_init_1 (tree binfo, tree true_exp, tree exp, 
tree init, int flags,
  that's value-initialization.  */
   if (init == void_type_node)
 {
-  /* If the type has data but no user-provided ctor, we need to zero
+  /* If the type has data but no user-provided default ctor, we need to 
zero
 out the object.  */
-  if (!type_has_user_provided_constructor (type)
+  if (type_has_non_user_provided_default_constructor (type)
  && !is_really_empty_class (type, /*ignore_vptr*/true))
{
  tree field_size = NULL_TREE;
diff --git a/gcc/testsuite/g++.dg/cpp0x/constexpr-delegating3.C 
b/gcc/testsuite/g++.dg/cpp0x/constexpr-delegating3.C
new file mode 100644
index 000..2263ec89488
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp0x/constexpr-delegating3.C
@@ -0,0 +1,10 @@
+// PR c++/65816
+// { dg-do compile { target c++11 } }
+
+struct test {
+  int m;
+  test() = default;
+  constexpr test(int) : test() {}
+};
+
+static_assert(test(0).m == 0, "");
diff --git a/gcc/testsuite/g++.dg/cpp0x/dc10.C 
b/gcc/testsuite/g++.dg/cpp0x/dc10.C
new file mode 100644
index 000..c008a1703e8
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp0x/dc10.C
@@ -0,0 +1,19 @@
+// PR c++/65816
+// { dg-do run { target c++11 } }
+
+void* operator new(decltype(sizeof(int)), void* ptr) { return ptr; }
+
+struct test {
+  int i;
+  test() = default;
+  test(int) : test() {}
+};
+
+int main() {
+  alignas(test) unsigned char space[sizeof(test)];
+  for (auto& c : space) c = 0xff;
+
+  auto ptr = ::new() test(42);
+  int& i = static_cast(*ptr).i;
+  if (i != 0) __builtin_abort();
+}
diff --git a/gcc/testsuite/g++.dg/cpp0x/initlist-base4.C 
b/gcc/testsuite/g++.dg/cpp0x/initlist-base4.C
new file mode 100644
index 000..4a02af92799
--- /dev/null

RE: [GCC][Patch] arm: Fix the mve multilib for the broken cmse support (pr99939).

2021-06-01 Thread Srinath Parvathaneni via Gcc-patches
Hi Richard,

> -Original Message-
> From: Richard Earnshaw 
> Sent: 13 April 2021 14:55
> To: Srinath Parvathaneni ; gcc-
> patc...@gcc.gnu.org
> Cc: Richard Earnshaw 
> Subject: Re: [GCC][Patch] arm: Fix the mve multilib for the broken cmse
> support (pr99939).
> 
> 
> 
> On 12/04/2021 14:04, Srinath Parvathaneni via Gcc-patches wrote:
> > Hi,
> >
> > The current CMSE support in the multilib build for "-march=armv8.1-
> m.main+mve -mfloat-abi=hard -mfpu=auto"
> > is broken as specified in PR99939 and this patch fixes the issue.
> >
> > Regression tested on arm-none-eabi and found no regressions.
> >
> > Ok for master? and Ok for GCC-10 branch?
> >
> > Regards,
> > Srinath.
> >
> > gcc/testsuite/ChangeLog:
> >
> > 2021-04-12  Srinath Parvathaneni  
> >
> > PR target/99939
> > * gcc.target/arm/cmse/cmse-20.c: New test.
> >
> > libgcc/ChangeLog:
> >
> > 2021-04-12  Srinath Parvathaneni  
> >
> > PR target/99939
> > * config/arm/t-arm: Make changes to use cmse.c for all the
> > armv8.1-m.main mulitlibs.
> >
> >
> >
> > ### Attachment also inlined for ease of reply
> ###
> >
> >
> > diff --git a/gcc/testsuite/gcc.target/arm/cmse/cmse-20.c
> > b/gcc/testsuite/gcc.target/arm/cmse/cmse-20.c
> > new file mode 100644
> > index
> >
> ..7e2739e14792624adf5b428
> 0ca58
> > a5d8320acbf0
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/arm/cmse/cmse-20.c
> > @@ -0,0 +1,28 @@
> > +/* { dg-do run } */
> > +/* { dg-additional-options "-mcmse
> > +-Wl,--section-start,.gnu.sgstubs=0x0019" } */
> > +
> > +#include 
> > +#include 
> > +#include 
> > +
> > +void __attribute__((cmse_nonsecure_entry))
> > +secure_fun (int a, int *p)
> > +{
> > +  void *b = cmse_check_address_range ((void *)p, a, 1);
> > +
> > +  if (b == NULL)
> > +   __builtin_abort ();
> > +  printf("%d", *((int *)b));
> > +}
> > +
> > +int
> > +main (void)
> > +{
> > +  int *ptr;
> > +  int size = 1;
> > +  ptr = (int *) calloc (1, sizeof(int *));
> > +  *ptr = 1315852292;
> > +  secure_fun (size, ptr);
> > +  free (ptr);
> > +  return 0;
> > +}
> > diff --git a/libgcc/config/arm/t-arm b/libgcc/config/arm/t-arm index
> >
> 3625a2590beec4e4e0e0881be9ad284c595c7190..949e2ee06653680211ff2dcf
> 0b55
> > a41a6aedc31c 100644
> > --- a/libgcc/config/arm/t-arm
> > +++ b/libgcc/config/arm/t-arm
> > @@ -9,11 +9,12 @@ CMSE_OPTS:=-mcmse
> >   endif
> >
> >   ifdef HAVE_CMSE
> > -ifndef HAVE_V81M
> > -libgcc-objects += cmse.o cmse_nonsecure_call.o
> > +libgcc-objects += cmse.o
> >
> >   cmse.o: $(srcdir)/config/arm/cmse.c
> > $(gcc_compile) -c $(CMSE_OPTS) $<
> > +ifndef HAVE_V81M
> > +libgcc-objects += cmse_nonsecure_call.o
> >   cmse_nonsecure_call.o: $(srcdir)/config/arm/cmse_nonsecure_call.S
> >$(gcc_compile) -c $<
> >   endif
> >
> 
> So if I have two object files using CMSE and one is built with v8m, but the
> other with v8.1m, when I link them, the needed additional support for the
> v8m object file will be missing the library support.
> 
> Wouldn't it be better to just build the cmse_nonsecure_call code
> unconditionally?  It won't be called if it's not needed, but will be there if
> something does require it.

I have modified the patch to build the cmse_nonsecure_call code unconditionally,
I have attached the diff and cover letter in this email.

Please let me know if it is ok for master?

Regards,
Srinath.
> 
> R.

Hi,

The current CMSE support in the multilib build for "-march=armv8.1-m.main+mve 
-mfloat-abi=hard -mfpu=auto"
is broken as specified in PR99939 and this patch fixes the issue.

Regression tested on arm-none-eabi and found no regressions.

Ok for master? and Ok for GCC-10 branch?

Regards,
Srinath.

gcc/testsuite/ChangeLog:

2021-06-01  Srinath Parvathaneni  

* gcc.target/arm/cmse/cmse-18.c: Modify
* gcc.target/arm/cmse/cmse-20.c: New test.

libgcc/ChangeLog:

2021-06-01  Srinath Parvathaneni  

* config/arm/cmse_nonsecure_call.S: Modify to add
__ARM_FEATURE_MVE macro check.
* config/arm/t-arm: Make changes to link cmse.o and
cmse_nonsecure_call.o on finding -mcmse gcc options.

diff (2).patch
Description: diff (2).patch


[GCC][PATCH] arm: Fix multilib mapping for CDE extensions.

2021-06-01 Thread Srinath Parvathaneni via Gcc-patches
Hi All,

On passing +cdecp[0-7] extension to the -march string in command line options,
multilib linking is failing as mentioned in PR100856. This patch fixes this
issue by generating a separate -march string only for multilib comparison.

Regression tested on arm-none-eabi and found no regressions.

Ok for master?

Regards,
Srinath.

gcc/ChangeLog:

2021-06-01  Srinath Parvathaneni  

PR target/100856
* common/config/arm/arm-common.c (arm_canon_arch_option): Modify
function to generate canonical march string after removing cde related
compiler extensions.
(arm_canon_arch_multilib_option): Define function.
* config/arm/arm-cpus.in (CDE_LIST): Define fgroup.
* config/arm/arm.h (arm_canon_arch_multilib_option): Define macro.
(CANON_ARCH_MULTILIB_SPEC_FUNCTION): Define macro.
(ARCH_CANONICAL_MULTILIB_SPECS): Define macro.
(TARGET_MULTLILIB_ARCH): Define macro.
* gcc.c (used_arg_t::operator ()): Add condition to generate separate
march string for multilib matching.

gcc/testsuite/ChangeLog:

2021-06-01  Srinath Parvathaneni  

PR target/100856
* gcc.target/arm/acle/pr100856.c: New test.
* gcc.target/arm/multilib.exp: Modify.



### Attachment also inlined for ease of reply###


diff --git a/gcc/common/config/arm/arm-common.c 
b/gcc/common/config/arm/arm-common.c
index 
9980af6885c3dfe68f61fa0f39b23022b4e59c19..7d8c6e5253f3f1683eed99f479a09186a46c2d22
 100644
--- a/gcc/common/config/arm/arm-common.c
+++ b/gcc/common/config/arm/arm-common.c
@@ -616,6 +616,8 @@ public:
 }
 };
 
+static int multilib_arch = 0;
+
 /* Generate a canonical representation of the -march option from the
current -march string (if given) and other options on the command
line that might affect the architecture.  This aids multilib selection
@@ -703,6 +705,14 @@ arm_canon_arch_option (int argc, const char **argv)
   arm_initialize_isa (target_isa, selected_arch->common.isa_bits);
   arm_parse_option_features (target_isa, _arch->common,
 strchr (arch, '+'));
+  if (multilib_arch == 1)
+   {
+ const enum isa_feature cde_bitlist[] = {ISA_ALL_CDE, isa_nobit};
+ sbitmap isa_cdebits = sbitmap_alloc (isa_num_bits);
+ arm_initialize_isa (isa_cdebits, cde_bitlist);
+ bitmap_and_compl (target_isa, target_isa, isa_cdebits);
+   }
+
   if (fpu && strcmp (fpu, "auto") != 0)
{
  /* We assume that architectures do not have any FPU bits
@@ -786,18 +796,27 @@ arm_canon_arch_option (int argc, const char **argv)
 
   arm_initialize_isa (base_isa, selected_arch->common.isa_bits);
 
-  /* Architecture has no extension options, so just return the canonical
- architecture name.  */
-  if (selected_arch->common.extensions == NULL)
-return selected_arch->common.name;
-
   /* We're only interested in extension bits.  */
   bitmap_and_compl (target_isa, target_isa, base_isa);
 
+  /* Architecture has no extension options, so just return the canonical
+ architecture name.  */
+  if (multilib_arch == 0 && selected_arch->common.extensions == NULL)
+return selected_arch->common.name;
   /* There are no extensions needed.  Just return the canonical architecture
  name.  */
-  if (bitmap_empty_p (target_isa))
+  else if (multilib_arch == 0 && bitmap_empty_p (target_isa))
 return selected_arch->common.name;
+  else if (multilib_arch == 1
+  && (selected_arch->common.extensions == NULL
+  || bitmap_empty_p (target_isa)))
+{
+  canonical_arch = (char *) xmalloc (strlen (selected_arch->common.name)
++ strlen ("march="));
+  strcpy (canonical_arch, "march=");
+  strcat (canonical_arch, selected_arch->common.name);
+  return canonical_arch;
+}
 
   /* What is left is the architecture that the compiler will target.  We
  now need to map that back into a suitable option+features list.
@@ -899,10 +918,20 @@ arm_canon_arch_option (int argc, const char **argv)
}
 }
 
-  canonical_arch
-= (char *) xmalloc (len + strlen (selected_arch->common.name));
-
-  strcpy (canonical_arch, selected_arch->common.name);
+  if (multilib_arch == 1)
+{
+  canonical_arch
+   = (char *) xmalloc (len + strlen (selected_arch->common.name)
+   + strlen ("march="));
+  strcpy (canonical_arch, "march=");
+  strcat (canonical_arch, selected_arch->common.name);
+}
+  else
+{
+  canonical_arch
+   = (char *) xmalloc (len + strlen (selected_arch->common.name));
+  strcpy (canonical_arch, selected_arch->common.name);
+}
 
   for (std::list::iterator iter = extensions.begin ();
iter != extensions.end (); ++iter)
@@ -1069,3 +1098,15 @@ arm_asm_auto_mfpu (int argc, const char **argv)
 #define TARGET_EXCEPT_UNWIND_INFO  arm_except_unwind_info
 
 struct 

Re: [wwwdocs] lists: Correct procmail recipe

2021-06-01 Thread Andreas Schwab
On Jun 01 2021, Segher Boessenkool wrote:

> @@ -226,7 +226,7 @@ our lists into a single folder named INLIST.gcc:
>  
>  
>  :0
> -* ^Sender: .*-ow...@gcc.gnu.org
> +* ^List-Id: .*<.*@gcc.gnu.org>$

That will never match.

List-Id: Gcc-patches mailing list 

Andreas.

-- 
Andreas Schwab, sch...@linux-m68k.org
GPG Key fingerprint = 7578 EB47 D4E5 4D69 2510  2552 DF73 E780 A9DA AEC1
"And now for something completely different."


rs6000: Require ELFv2 ABI for ROP test (PR100750)

2021-06-01 Thread Bill Schmidt via Gcc-patches

Hi!  PR100750 reports a failure on my part to require the ELFv2 ABI for
one of the ROP tests.  This fixes that.

Tested on powerpc64-linux-gnu, committed as obvious.

Thanks!
Bill


PR100750: Require ELFv2 ABI for ROP test

2021-06-01  Bill Schmidt  

gcc/testsuite/
PR testsuite/100750
* gcc.target/powerpc/rop-5.c: Require ELFv2 ABI.

diff --git a/gcc/testsuite/gcc.target/powerpc/rop-5.c 
b/gcc/testsuite/gcc.target/powerpc/rop-5.c
index cf04ea90eeb..f2594df8a44 100644
--- a/gcc/testsuite/gcc.target/powerpc/rop-5.c
+++ b/gcc/testsuite/gcc.target/powerpc/rop-5.c
@@ -1,4 +1,5 @@
 /* { dg-do compile } */
+/* { dg-require-effective-target powerpc_elfv2 } */
 /* { dg-options "-O2 -mdejagnu-cpu=power10 -mrop-protect" } */
 
 /* Verify that __ROP_PROTECT__ is predefined for -mrop-protect.  */




[committed] libstdc++: Fix return value of std::ranges::advance [PR 100833]

2021-06-01 Thread Jonathan Wakely via Gcc-patches
The three-argument form of ranges::advance is supposed to return the
difference between the second argument and the distance the iterator was
advanced. When a non-random-access iterator is not advanced (because it
already equals the sentinel) we were returning 0 rather than n - 0.

libstdc++-v3/ChangeLog:

PR libstdc++/100833
* include/bits/ranges_base.h (ranges::advance(iter, n, sentinel)):
Fix return value for no-op case.
* testsuite/24_iterators/range_operations/advance.cc: Test
return values of three-argument overload.

Tested powerpc64le-linux. Committed to trunk.

commit d8326291695c0f13124c232ddf4fd34e3310e649
Author: Jonathan Wakely 
Date:   Tue Jun 1 16:02:45 2021

libstdc++: Fix return value of std::ranges::advance [PR 100833]

The three-argument form of ranges::advance is supposed to return the
difference between the second argument and the distance the iterator was
advanced. When a non-random-access iterator is not advanced (because it
already equals the sentinel) we were returning 0 rather than n - 0.

libstdc++-v3/ChangeLog:

PR libstdc++/100833
* include/bits/ranges_base.h (ranges::advance(iter, n, sentinel)):
Fix return value for no-op case.
* testsuite/24_iterators/range_operations/advance.cc: Test
return values of three-argument overload.

diff --git a/libstdc++-v3/include/bits/ranges_base.h 
b/libstdc++-v3/include/bits/ranges_base.h
index 11f05fa4e2d..32d664f1a86 100644
--- a/libstdc++-v3/include/bits/ranges_base.h
+++ b/libstdc++-v3/include/bits/ranges_base.h
@@ -749,7 +749,7 @@ namespace ranges
  }
  }
else if (__it == __bound || __n == 0)
- return iter_difference_t<_It>(0);
+ return __n;
else if (__n > 0)
  {
iter_difference_t<_It> __m = 0;
diff --git a/libstdc++-v3/testsuite/24_iterators/range_operations/advance.cc 
b/libstdc++-v3/testsuite/24_iterators/range_operations/advance.cc
index e4abf83d59e..feee08b58c4 100644
--- a/libstdc++-v3/testsuite/24_iterators/range_operations/advance.cc
+++ b/libstdc++-v3/testsuite/24_iterators/range_operations/advance.cc
@@ -49,10 +49,12 @@ test01()
   std::ranges::advance(iter, r.begin());
   VERIFY( iter == r.begin() );
 
-  std::ranges::advance(iter, 99, r.end());
+  auto diff = std::ranges::advance(iter, 99, r.end());
   VERIFY( iter == r.end() );
-  std::ranges::advance(iter, -222, r.begin());
+  VERIFY( diff == 97 );
+  diff = std::ranges::advance(iter, -222, r.begin());
   VERIFY( iter == r.begin() );
+  VERIFY( diff == -220 );
 }
 
 void
@@ -77,10 +79,12 @@ test02()
   std::ranges::advance(iter, r.begin());
   VERIFY( iter == r.begin() );
 
-  std::ranges::advance(iter, 99, r.end());
+  auto diff = std::ranges::advance(iter, 99, r.end());
   VERIFY( iter == r.end() );
-  std::ranges::advance(iter, -222, r.begin());
+  VERIFY( diff == 97 );
+  diff = std::ranges::advance(iter, -222, r.begin());
   VERIFY( iter == r.begin() );
+  VERIFY( diff == -220 );
 }
 
 void
@@ -108,15 +112,19 @@ test03()
   std::ranges::advance(iter, r.end());
   VERIFY( iter == r.end() );
 
-  std::ranges::advance(iter, 99, r.end());
+  auto diff = std::ranges::advance(iter, 99, r.end());
   VERIFY( iter == r.end() );
-  std::ranges::advance(iter, 99, r.end());
+  VERIFY( diff == 99 ); // PR libstdc++/100833
+  diff = std::ranges::advance(iter, 99, r.end());
   VERIFY( iter == r.end() );
+  VERIFY( diff == 99 );
   iter = r.begin();
-  std::ranges::advance(iter, 99, r.end());
+  diff = std::ranges::advance(iter, 99, r.end());
   VERIFY( iter == r.end() );
-  std::ranges::advance(iter, 99, r.end());
+  VERIFY( diff == 97 );
+  diff = std::ranges::advance(iter, 99, r.end());
   VERIFY( iter == r.end() );
+  VERIFY( diff == 99 );
 }
 
 void
@@ -145,17 +153,21 @@ test04()
   std::ranges::advance(iter, r2.end());
   VERIFY( iter == r2.end() );
 
-  std::ranges::advance(iter, 99, r2.end());
+  auto diff = std::ranges::advance(iter, 99, r2.end());
   VERIFY( iter == r2.end() );
-  std::ranges::advance(iter, 99, r2.end());
+  VERIFY( diff == 99 );
+  diff = std::ranges::advance(iter, 99, r2.end());
   VERIFY( iter == r2.end() );
+  VERIFY( diff == 99 );
 
   test_range r3(a);
   iter = r3.begin();
-  std::ranges::advance(iter, 99, r3.end());
+  diff = std::ranges::advance(iter, 99, r3.end());
   VERIFY( iter == r3.end() );
-  std::ranges::advance(iter, 99, r3.end());
+  VERIFY( diff == 97 );
+  diff = std::ranges::advance(iter, 99, r3.end());
   VERIFY( iter == r3.end() );
+  VERIFY( diff == 99 );
 }
 
 void
@@ -180,17 +192,21 @@ test05()
   std::ranges::advance(iter, r2.end());
   VERIFY( iter == r2.end() );
 
-  std::ranges::advance(iter, 99, r2.end());
+  auto diff = std::ranges::advance(iter, 99, r2.end());
   VERIFY( iter == r2.end() );
-  std::ranges::advance(iter, 99, r2.end());
+  VERIFY( diff == 99 );
+  diff = std::ranges::advance(iter, 99, r2.end());
   VERIFY( 

[wwwdocs] lists: Correct procmail recipe

2021-06-01 Thread Segher Boessenkool
We haven't had Sender: for a while now.  Use the standard List-Id:
header in its place.

Committed.


Segher
---
 htdocs/lists.html | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/htdocs/lists.html b/htdocs/lists.html
index 42a44ab83d9b..4ac5d5a84cd1 100644
--- a/htdocs/lists.html
+++ b/htdocs/lists.html
@@ -217,8 +217,8 @@ with enough prodding, that sending mail to the address 
listed in
 Filtering
 
 If you want to use procmail or similar tools to process the GCC
-mailing lists, you can filter using the Sender: header, as well as all
-of the RFC2369 headers (List-Subscribe, List-Unsubscribe, List-Post,
+mailing lists, you can filter using the List-Id: header from RFC2919, as well
+as all of the RFC2369 headers (List-Subscribe, List-Unsubscribe, List-Post,
 List-Archive, etc.).
 
 For example, the following procmail rule will sort all mail from
@@ -226,7 +226,7 @@ our lists into a single folder named INLIST.gcc:
 
 
 :0
-* ^Sender: .*-ow...@gcc.gnu.org
+* ^List-Id: .*<.*@gcc.gnu.org>$
 INLIST.gcc
 
 
@@ -235,7 +235,7 @@ can use the following recipe (Use at your own risk!):
 
 
 :0 Wh: msgid.lock
-* ^Sender: .*-ow...@gcc.gnu.org
+* ^List-Id: .*<.*@gcc.gnu.org>$
 | formail -D 8192 msgid.cache
 
 
-- 
1.8.3.1



Re: [PATCH 06/57] rs6000: Add helper functions for parsing

2021-06-01 Thread Bill Schmidt via Gcc-patches



On 5/21/21 6:43 PM, Segher Boessenkool wrote:


Yes, wrappers is a no-go.  But you could just have added the features
you need to the generic code?  Was there a technical reason not to do
that?  It sounds useful in many places, not just here.
I agree it would be nice if all the gen* tools had line/column 
reporting.  Maybe we could look at that as a follow-up?  I was trying to 
keep the patch series as simple as possible, since it's already pretty 
large.



+static int
+match_integer ()
+{
+  int startpos = pos;
+  if (linebuf[pos] == '-')
+safe_inc_pos ();
+
+  int lastpos = pos - 1;
+  while (isdigit (linebuf[lastpos + 1]))
+if (++lastpos >= LINELEN - 1)
+  {
+   (*diag) ("line length overrun in match_integer.\n");
+   exit (EC_INTERR);
+  }
+
+  if (lastpos < pos)
+return MININT;
+
+  pos = lastpos + 1;
+  char *buf = (char *) malloc (lastpos - startpos + 2);
+  memcpy (buf, [startpos], lastpos - startpos + 1);
+  buf[lastpos - startpos + 1] = '\0';
+
+  int x;
+  sscanf (buf, "%d", );
+  return x;
+}

Can't you just use strtol?



<...my extraneous response and follow-ups deleted...>

Sorry, I wrote this a long time ago and forgot what I was doing here.  
Yes, that would be a reasonable thing to do.  Even more reasonable would 
be to do what I thought I remembered doing...


It would be best to just save these as strings, not integers, since I'm 
just going to fprintf them back to a file later anyway. All of these are 
strings of at most two digits (with possible leading minus), so just 
using isdigit to find the strings is efficient.  Using sscanf (or 
strtol) and fprintf is just silly. I'll rework to remove that.



Frankly I don't see it...and I don't see anything in the GNU or GCC
coding conventions about this.  I'd rather keep what I have.

The most used side effect in conditionals is assignment.  This saves a
few keystrokes, and maybe a whole line in the code, but it makes the
code a lot harder to comprehend.

In your code it does not matter so very much, since you exit if there
is an error anyway, but it makes it a lot harder to verify what the code
does in all cases, and to check that that is what is wanted.

ok.  Not worth a big argument. ;-)



+  {
+   (*diag) ("line length overrun.\n");
+   exit (EC_INTERR);
+  }

I don't think you shoulod check for line length overrun in any of these
functions, btw?  Just check where you read them in, and that is plenty?

Yes -- I think if I check in advance_line for a line that doesn't end in
\n, that will make a lot of those things superfluous.

I've been a little reluctant to do that, since eventually I want to
support escape-newline processing to avoid long input lines, but I can
still work around that when I get to it.

Aha, that explains.  Yeah you should be able to check the length again
when concatenating two lines, and that is all you need?


Mostly.  I'll have to make sure the line/column reporting is still 
sensible, which is the tricky bit.  But that's for later...


Thanks again for the review!
Bill



Checking for errors repeatedly is so error-prone :-(


Segher


[pushed] c++: no clobber for C++20 destroying delete [PR91859]

2021-06-01 Thread Jason Merrill via Gcc-patches
Before C++20 added destroying operator delete, by the time we called
operator delete for a pointer, the object would already be gone.  But that
isn't true for destroying delete.  Since the optimizers' assumptions about
operator delete are based on either DECL_IS_REPLACEABLE_OPERATOR (which
already is not set) or CALL_FROM_NEW_OR_DELETE_P, let's avoid setting the
latter flag in this case.

Tested x86_64-pc-linux-gnu, applying to trunk.

PR c++/91859

gcc/ChangeLog:

* tree.h (CALL_FROM_NEW_OR_DELETE_P): Adjust comment.

gcc/cp/ChangeLog:

* call.c (build_op_delete_call): Don't set CALL_FROM_NEW_OR_DELETE_P
for destroying delete.
* init.c (build_delete): Don't clobber before destroying delete.

gcc/testsuite/ChangeLog:

* g++.dg/cpp2a/destroying-delete5.C: New test.
---
 gcc/tree.h|  2 +-
 gcc/cp/call.c |  4 ++-
 gcc/cp/init.c |  5 ++-
 .../g++.dg/cpp2a/destroying-delete5.C | 36 +++
 4 files changed, 44 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/cpp2a/destroying-delete5.C

diff --git a/gcc/tree.h b/gcc/tree.h
index 260a3ae6c83..62b2de46479 100644
--- a/gcc/tree.h
+++ b/gcc/tree.h
@@ -937,7 +937,7 @@ extern void omp_clause_range_check_failed (const_tree, 
const char *, int,
 
 /* In a CALL_EXPR, if the function being called is DECL_IS_OPERATOR_NEW_P or
DECL_IS_OPERATOR_DELETE_P, true for allocator calls from C++ new or delete
-   expressions.  */
+   expressions.  Not set for C++20 destroying delete operators.  */
 #define CALL_FROM_NEW_OR_DELETE_P(NODE) \
   (CALL_EXPR_CHECK (NODE)->base.protected_flag)
 
diff --git a/gcc/cp/call.c b/gcc/cp/call.c
index bf524b50bc4..90192b1b8aa 100644
--- a/gcc/cp/call.c
+++ b/gcc/cp/call.c
@@ -7206,8 +7206,10 @@ build_op_delete_call (enum tree_code code, tree addr, 
tree size,
 treat that as an implicit delete-expression.  This is also called for
 the delete if the constructor throws in a new-expression, and for a
 deleting destructor (which implements a delete-expression).  */
+  /* But leave this flag off for destroying delete to avoid wrong
+assumptions in the optimizers.  */
   tree call = extract_call_expr (ret);
-  if (TREE_CODE (call) == CALL_EXPR)
+  if (TREE_CODE (call) == CALL_EXPR && !destroying_delete_p (fn))
CALL_FROM_NEW_OR_DELETE_P (call) = 1;
 
   return ret;
diff --git a/gcc/cp/init.c b/gcc/cp/init.c
index a85f4d50750..04d495807ef 100644
--- a/gcc/cp/init.c
+++ b/gcc/cp/init.c
@@ -4881,7 +4881,10 @@ build_delete (location_t loc, tree otype, tree addr,
complain);
 }
 
-  if (!destroying_delete && type_build_dtor_call (type))
+  if (destroying_delete)
+/* The operator delete will call the destructor.  */
+expr = addr;
+  else if (type_build_dtor_call (type))
 expr = build_dtor_call (cp_build_fold_indirect_ref (addr),
auto_delete, flags, complain);
   else
diff --git a/gcc/testsuite/g++.dg/cpp2a/destroying-delete5.C 
b/gcc/testsuite/g++.dg/cpp2a/destroying-delete5.C
new file mode 100644
index 000..553c964b9e9
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp2a/destroying-delete5.C
@@ -0,0 +1,36 @@
+// PR c++/91859
+// { dg-do run { target c++20 } }
+// { dg-additional-options -O2 }
+
+#include 
+#include 
+
+struct Expression {
+  int i = 0;
+  void *operator new(std::size_t);
+  void operator delete(Expression *, std::destroying_delete_t);
+};
+
+void * Expression::operator new(std::size_t sz)
+{
+  return std::malloc(sz);
+}
+
+int i;
+
+void Expression::operator delete(Expression *p, std::destroying_delete_t)
+{
+  Expression * e = p;
+  ::i = e->i;
+  p->~Expression();
+  std::free(p);
+}
+
+int main()
+{
+  auto p = new Expression();
+  p->i = 1;
+  delete p;
+  if (i != 1)
+__builtin_abort();
+}
-- 
2.27.0



Re: [PATCH] ARM: reset arm_fp16_format

2021-06-01 Thread Richard Earnshaw via Gcc-patches




On 01/06/2021 15:05, Martin Liška wrote:

Hello.

The patch fixes https://gcc.gnu.org/bugzilla/show_bug.cgi?id=98636#c20 
where

target option restore can be called and arm_fp16_format should be reset
to ARM_FP16_FORMAT_NONE.

It fixes the ICE in the PR.

Can please ARM folks test me the patch on a Arm machine?
Thanks,
Martin

gcc/ChangeLog:

 PR target/98636
 * config/arm/arm.c (arm_option_reconfigure_globals): Reset
 the option if isa_bit_fp16 is not set.
---
  gcc/config/arm/arm.c | 2 ++
  1 file changed, 2 insertions(+)

diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 7b37e1b602c..4543f3c6b55 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -3765,6 +3765,8 @@ arm_option_reconfigure_globals (void)
  error ("selected fp16 options are incompatible");
    arm_fp16_format = ARM_FP16_FORMAT_IEEE;
  }
+  else
+    arm_fp16_format = ARM_FP16_FORMAT_NONE;

    arm_arch_cde = 0;
    arm_arch_cde_coproc = 0;


My initial reaction is 'that can't be right'.

How would -mfp16-format=alternative ever work in this case?

R.


Re: [PATCH] Revert patch that disabled exporting of global ranges.

2021-06-01 Thread Aldy Hernandez via Gcc-patches

On 6/1/21 4:54 PM, Andrew MacLeod wrote:

On 6/1/21 10:51 AM, Aldy Hernandez wrote:

Andrew's last set of changes fixes the bootstrap problem on i686 when
global ranges are exported from evrp.  The specific patch that fixes the
problem is 715914d3:

Author: Andrew MacLeod 
Date:   Mon May 31 12:13:50 2021 -0400

    Do not calculate new values when evaluating a debug statement.

    Add a flag to enable/disable immediately improving poor values 
found during
    cache propagation. Then disable it when processing debug 
statements.


This patch reverts commit 2364b58 now that exporting of global ranges 
works.


Tested on x86-64 Linux with default flags, and on i686 with the flags in
the PR: --enable-clocale=gnu --with-system-zlib --with-demangler-in-ld 
--with-fpmath=sse --enable-languages=c,c++ --enable-cet i686-linux 
--enable-bootstrap --with-fpmath=sse --disable-libcc1 
--disable-libcilkrts --disable-libsanitizer


OK for trunk?
Aldy


OK.  you have a follow up too with the other global export tweaks right?



Yes, but I didn't want to mix this patch with that one, so I'll test and 
post independently.


Aldy



Re: [PATCH RFA (diagnostic)] c++: -Wdeprecated-copy and #pragma diagnostic [PR94492]

2021-06-01 Thread David Malcolm via Gcc-patches
On Wed, 2021-05-26 at 10:00 -0400, Jason Merrill via Gcc-patches wrote:
> On 5/13/21 9:07 PM, Jason Merrill wrote:
> > On 5/13/21 7:38 PM, Martin Sebor wrote:
> > > On 5/13/21 1:28 PM, Jason Merrill via Gcc-patches wrote:
> > > > Ping.
> > > > 
> > > > On 4/28/21 9:32 AM, Jason Merrill wrote:
> > > > >   -Wdeprecated-copy was depending only on the state of the
> > > > > warning 
> > > > > at the
> > > > > point where we call the function, making it hard to use
> > > > > #pragma 
> > > > > diagnostic
> > > > > to suppress the warning for a particular implicitly declared
> > > > > function.
> > > > > 
> > > > > But checking whether the warning is enabled at the location
> > > > > of the 
> > > > > implicit
> > > > > declaration turned out to be a bit complicated;
> > > > > option_enabled only 
> > > > > tests
> > > > > whether it was enabled at the start of compilation, the
> > > > > actual test 
> > > > > only
> > > > > existed in the middle of diagnostic_report_diagnostic.  So
> > > > > this patch
> > > > > factors it out and adds a new warning_enabled function to
> > > > > diagnostic.h.
> > > 
> > > There is a bit of overlap in this patch with my work here:
> > >   https://gcc.gnu.org/pipermail/gcc-patches/2021-January/563862.html
> > > but nothing that concerns me, for whatever it's worth.
> > > 
> > > In my ongoing work to extend TREE_NO_WARNING to more than one bit
> > > I've been thinking of introducing a function (actually a pair of
> > > them) similar to warning_enabled().  The overloads will take
> > > a tree and gimple* rather than a location so that they can
> > > consider
> > > the inlining context.  This is only useful in the middle end so
> > > front ends can still use location_t.
> > > 
> > > Just one suggestion: since warning_at() takes location_t and int
> > > for the option in that order, I would recommend doing the same
> > > for warning_enabled(), just to reduce the risk of confusion.
> > > (It would be nice if location_t could be something other than
> > > an arithmetic type).
> > 
> > Sure.  I'd probably rename it to warning_enabled_at, in that case,
> > and 
> > drop the default argument.
> 
> Looks like I never sent the (slightly) updated patch.

The diagnostic changes look good to me, thanks.

Dave



Re: [PATCH] Revert patch that disabled exporting of global ranges.

2021-06-01 Thread Andrew MacLeod via Gcc-patches

On 6/1/21 10:51 AM, Aldy Hernandez wrote:

Andrew's last set of changes fixes the bootstrap problem on i686 when
global ranges are exported from evrp.  The specific patch that fixes the
problem is 715914d3:

Author: Andrew MacLeod 
Date:   Mon May 31 12:13:50 2021 -0400

Do not calculate new values when evaluating a debug statement.

Add a flag to enable/disable immediately improving poor values 
found during
cache propagation. Then disable it when processing debug statements.

This patch reverts commit 2364b58 now that exporting of global ranges works.

Tested on x86-64 Linux with default flags, and on i686 with the flags in
the PR: --enable-clocale=gnu --with-system-zlib --with-demangler-in-ld 
--with-fpmath=sse --enable-languages=c,c++ --enable-cet i686-linux 
--enable-bootstrap --with-fpmath=sse --disable-libcc1 --disable-libcilkrts 
--disable-libsanitizer

OK for trunk?
Aldy


OK.  you have a follow up too with the other global export tweaks right?



[PATCH] Revert patch that disabled exporting of global ranges.

2021-06-01 Thread Aldy Hernandez via Gcc-patches
Andrew's last set of changes fixes the bootstrap problem on i686 when
global ranges are exported from evrp.  The specific patch that fixes the
problem is 715914d3:

Author: Andrew MacLeod 
Date:   Mon May 31 12:13:50 2021 -0400

Do not calculate new values when evaluating a debug statement.

Add a flag to enable/disable immediately improving poor values 
found during
cache propagation. Then disable it when processing debug statements.

This patch reverts commit 2364b58 now that exporting of global ranges works.

Tested on x86-64 Linux with default flags, and on i686 with the flags in
the PR: --enable-clocale=gnu --with-system-zlib --with-demangler-in-ld 
--with-fpmath=sse --enable-languages=c,c++ --enable-cet i686-linux 
--enable-bootstrap --with-fpmath=sse --disable-libcc1 --disable-libcilkrts 
--disable-libsanitizer

OK for trunk?
Aldy

gcc/ChangeLog:

PR/tree-optimization 100787
* gimple-ssa-evrp.c: Enable exporting of global ranges.

gcc/testsuite/ChangeLog:

* gcc.dg/Wstringop-overflow-55.c: Adjust for global ranges changes.
* gcc.dg/pr80776-1.c: Same.
---
 gcc/gimple-ssa-evrp.c| 6 ++
 gcc/testsuite/gcc.dg/Wstringop-overflow-55.c | 8 
 gcc/testsuite/gcc.dg/pr80776-1.c | 4 +---
 3 files changed, 7 insertions(+), 11 deletions(-)

diff --git a/gcc/gimple-ssa-evrp.c b/gcc/gimple-ssa-evrp.c
index f1eea206afd..118d10365a0 100644
--- a/gcc/gimple-ssa-evrp.c
+++ b/gcc/gimple-ssa-evrp.c
@@ -127,8 +127,7 @@ public:
 if (dump_file && (dump_flags & TDF_DETAILS))
   m_ranger->dump (dump_file);
 
-// FIXME: Do not export ranges until PR100787 is fixed.
-//m_ranger->export_global_ranges ();
+m_ranger->export_global_ranges ();
 disable_ranger (cfun);
   }
 
@@ -194,8 +193,7 @@ public:
 if (dump_file && (dump_flags & TDF_DETAILS))
   m_ranger->dump (dump_file);
 
-// FIXME: Do not export ranges until PR100787 is fixed.
-//m_ranger->export_global_ranges ();
+m_ranger->export_global_ranges ();
 disable_ranger (cfun);
   }
 
diff --git a/gcc/testsuite/gcc.dg/Wstringop-overflow-55.c 
b/gcc/testsuite/gcc.dg/Wstringop-overflow-55.c
index 5f83af7c57f..c3c2dbe06dd 100644
--- a/gcc/testsuite/gcc.dg/Wstringop-overflow-55.c
+++ b/gcc/testsuite/gcc.dg/Wstringop-overflow-55.c
@@ -66,7 +66,7 @@ void warn_ptrdiff_anti_range_add (ptrdiff_t i)
 {
   i |= 1;
 
-  char ca5[5];  // { dg-message "at offset \\\[1, 5]" "pr?" { 
xfail *-*-* } }
+  char ca5[5];  // { dg-message "at offset \\\[1, 5]" "pr?" }
   char *p0 = ca5;   // offset
   char *p1 = p0 + i;//  1-5
   char *p2 = p1 + i;//  2-5
@@ -74,7 +74,7 @@ void warn_ptrdiff_anti_range_add (ptrdiff_t i)
   char *p4 = p3 + i;//  4-5
   char *p5 = p4 + i;//   5
 
-  memset (p5, 0, 5);// { dg-warning "writing 5 bytes into a region of 
size 0" "pr?" { xfail *-*-* } }
+  memset (p5, 0, 5);// { dg-warning "writing 5 bytes into a region of 
size" "pr?" }
 
   sink (p0, p1, p2, p3, p4, p5);
 }
@@ -83,7 +83,7 @@ void warn_int_anti_range (int i)
 {
   i |= 1;
 
-  char ca5[5];  // { dg-message "at offset \\\[1, 5]" "pr?" { 
xfail *-*-* } }
+  char ca5[5];  // { dg-message "at offset \\\[1, 5]" "pr?" }
   char *p0 = ca5;   // offset
   char *p1 = p0 + i;//  1-5
   char *p2 = p1 + i;//  2-5
@@ -91,7 +91,7 @@ void warn_int_anti_range (int i)
   char *p4 = p3 + i;//  4-5
   char *p5 = p4 + i;//   5
 
-  memset (p5, 0, 5);// { dg-warning "writing 5 bytes into a region of 
size 0" "pr?" { xfail *-*-* } }
+  memset (p5, 0, 5);// { dg-warning "writing 5 bytes into a region of 
size" "pr?" }
 
   sink (p0, p1, p2, p3, p4, p5);
 }
diff --git a/gcc/testsuite/gcc.dg/pr80776-1.c b/gcc/testsuite/gcc.dg/pr80776-1.c
index af41c0c2ffa..f3a120b6744 100644
--- a/gcc/testsuite/gcc.dg/pr80776-1.c
+++ b/gcc/testsuite/gcc.dg/pr80776-1.c
@@ -17,7 +17,5 @@ Foo (void)
 __builtin_unreachable ();
   if (! (0 <= i && i <= 99))
 __builtin_unreachable ();
-  /* The correctness bits for [E]VRP cannot handle chained conditionals
- when deciding to ignore a unreachable branch for setting SSA range info. 
*/
-  sprintf (number, "%d", i); /* { dg-bogus "writing" "" { xfail *-*-* } } */
+  sprintf (number, "%d", i); /* { dg-bogus "writing" "" } */
 }
-- 
2.31.1



[PATCH] icf: Fix memory leak of a vector.

2021-06-01 Thread Martin Liška

Simple leak fix.

Patch can bootstrap on x86_64-linux-gnu and survives regression tests.

Ready to be installed?
Thanks,
Martin

gcc/ChangeLog:

* ipa-icf.h: Use auto_vec for memory_access_types.
---
 gcc/ipa-icf.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/ipa-icf.h b/gcc/ipa-icf.h
index 9f21a2018fc..4b4d4924512 100644
--- a/gcc/ipa-icf.h
+++ b/gcc/ipa-icf.h
@@ -372,7 +372,7 @@ public:
   hashval_t gcode_hash;
 
   /* Vector of subpart of memory access types.  */

-  vec memory_access_types;
+  auto_vec memory_access_types;
 
   /* Total number of SSA names used in the function.  */

   unsigned ssa_names_size;
--
2.31.1



Re: [PATCH] PR tree-optimization/100781 - Do not calculate new values when evaluating a debug, statement.

2021-06-01 Thread Andrew MacLeod via Gcc-patches

On 6/1/21 3:34 AM, Richard Biener wrote:

On Tue, Jun 1, 2021 at 3:38 AM Andrew MacLeod via Gcc-patches
 wrote:

An ongoing issue  is the the order we evaluate things in can affect
decisions along the way. As ranger isn't a fully iterative pass, we can
sometimes come up with different results if back edges are processed in
different orders.

One of the ways this can happen is when the cache is propagating
on-entry values for an SSA_NAME. It calculates outgoing edge values and
the gori-compute engine can flag ssa-names that were involved in a range
calculation that have not yet been initialized.  When the propagation
for the original name is done, it goes back and examines the "poor
values" and tries to quickly calculate a better range, and if it comes
up with one, immediately tries to go back  and update the location/range
gori_compute flagged.   This produces better ranges earlier.

However, when we do this in different orders, we can get different
results.  We were processing the uses on is_gimple_debug statements just
like normal uses, and this would sometimes cause a difference in how
things were resolved.

This patch adds a flag to enable/disable this attempt to look up new
values, and when range_of_expr is processing the use on a debug
statement, turns it off for the query.  This means the query will never
cause a new lookup, and this should resolve all the -fcompare-debug issues.

Bootstrapped on x86_64-pc-linux-gnu, with no new regressions. Pushed.

Please check if such fixes also apply to the GCC 11 branch.

Richard.


I've checked both testcases against gcc11 release, and neither is an 
issue there.  Much of this was triggered by changes to the export list.  
That said, is there potential for it to surface? The potential is 
probably there.   We'd have to address it differently tho.  For the 
gcc11 release, since we always run in hybrid mode it doesn't really 
matter if ranger looks up ranges for debug statements... EVRP will still 
pick up what we use to get for them.  we could simply disable looking 
for contextual ranges for is_gimple_stmt and simply pick up the best 
known global/on-entry value available..   I can either provide a patch 
for that now, or deal with it if we ever get a PR.  I'm ok either way.


btw, when is the next point release? I added an infrastructure patch to 
trunk (https://gcc.gnu.org/pipermail/gcc-patches/2021-May/569884.html) 
to enable replacing the on-entry cache to deal with memory consumption 
issues like in https://gcc.gnu.org/bugzilla/show_bug.cgi?id=100299 .  I 
specifically put it in early before the other changes so that it could 
be directly applied to gcc11 as well, but I need to follow up with one 
of the replacements I have queued up to look at if we are interested in 
fixing this in gcc 11.  I'll bump the priority to try to hit the next 
release if thats the case.


Andrew



Re: [PATCH v2] Add vec_const_duplicate optab and TARGET_GEN_MEMSET_SCRATCH_RTX

2021-06-01 Thread Jeff Law via Gcc-patches




On 6/1/2021 7:29 AM, H.J. Lu via Gcc-patches wrote:

On Tue, Jun 1, 2021 at 6:25 AM Richard Biener
 wrote:

On Tue, Jun 1, 2021 at 3:05 PM H.J. Lu  wrote:

On Mon, May 31, 2021 at 11:54:53PM -0600, Jeff Law wrote:


On 5/31/2021 11:50 PM, Richard Sandiford wrote:

"H.J. Lu via Gcc-patches"  writes:

On Mon, May 31, 2021 at 06:32:04AM -0700, H.J. Lu wrote:

On Mon, May 31, 2021 at 6:26 AM Richard Biener
 wrote:

On Mon, May 31, 2021 at 3:12 PM H.J. Lu  wrote:

On Mon, May 31, 2021 at 5:46 AM Richard Biener
 wrote:

On Mon, May 31, 2021 at 2:09 PM H.J. Lu  wrote:

On Wed, May 26, 2021 at 10:28:16AM +0200, Richard Biener wrote:

   -- Target Hook: rtx TARGET_GEN_MEMSET_VALUE (rtx DATA, scalar_int_mode
MODE)
   This function returns the RTL of a register containing
   'GET_MODE_SIZE (MODE)' consecutive copies of the unsigned char
   value given in the RTL register DATA.  For example, if MODE is 4
   bytes wide, return the RTL for 0x01010101*DATA.

For this one I wonder if it should be an optab instead.  Couldn't you
use the existing vec_duplicate for this by using (paradoxical) subregs
like (subreg:TI (vec_duplicate:VnQI (subreg:VnQI (reg:QI ...)))?

I tried.   It doesn't even work on x86.  See:

https://gcc.gnu.org/pipermail/gcc-patches/2021-May/570661.html

Not sure what I should read from there...


There are special cases to subreg HI, SI and DI modes of TI mode in
ix86_gen_memset_value_from_prev.   simplify_gen_subreg doesn't
work here.   Each backend may need its own special handling.

OK, I guess I'm not (RTL) qualified enough to further review these parts,
sorry.  Since we're doing code generation the canonical way to communicate
with backends should be optabs, not some set of disconnected target hooks.
But as said, I probably don't know enough of RTL to see why it's the only way.

Richard.

Here is the patch to add optabs instead.  Does it look OK?

Thanks.

H.J.
---
Add 2 optabs:

1. integer_extract: Extract lower bit value from the integer value in
TImode, OImode or XImode.

That sounds very specific, esp. the restriction to {TI,OI,XI}mode.
It also sounds like it matches (subreg:{TI,OI,XI} (...) 0).  There are
existing target hooks verifying subreg validity - why's that not a good
fit here?  ISTR you say gen_lowpart () doesn't work (or was it
simplify_gen_subreg?), why's that so?

{TI,OI,XI}mode are storage only integer types.   subreg doesn't work
well on them.  I got

[hjl@gnu-cfl-2 pieces]$ cat s2.i
extern void *ops;

void
foo (int c)
{
__builtin_memset (ops, c, 34);
}
[hjl@gnu-cfl-2 pieces]$ make s2.s
/export/build/gnu/tools-build/gcc-gitlab-debug/build-x86_64-linux/gcc/xgcc
-B/export/build/gnu/tools-build/gcc-gitlab-debug/build-x86_64-linux/gcc/
-O2 -march=haswell -S s2.i
during RTL pass: reload
s2.i: In function ‘foo’:
s2.i:7:1: internal compiler error: maximum number of generated reload
insns per insn achieved (90)
  7 | }
| ^
0x1050734 lra_constraints(bool)
/export/gnu/import/git/gitlab/x86-gcc/gcc/lra-constraints.c:5091
0x1039536 lra(_IO_FILE*)
/export/gnu/import/git/gitlab/x86-gcc/gcc/lra.c:2336
0xfe1140 do_reload
/export/gnu/import/git/gitlab/x86-gcc/gcc/ira.c:5822
0xfe162e execute
/export/gnu/import/git/gitlab/x86-gcc/gcc/ira.c:6008
Please submit a full bug report,
with preprocessed source if appropriate.
Please include the complete backtrace with any bug report.
See  for instructions.
make: *** [Makefile:32: s2.s] Error 1
[hjl@gnu-cfl-2 pieces]$

due to

(insn 12 11 0 (set (mem:HI (plus:DI (reg/f:DI 84)
  (const_int 32 [0x20])) [0 MEM  [(void
*)ops.0_1]+32 S2 A8])
  (subreg:HI (reg:OI 51 xmm15) 0)) "s2.i":6:3 -1
   (nil))

The new optab gives us

(insn 12 11 13 2 (set (reg:TI 88)
  (reg:TI 51 xmm15)) "s2.i":6:3 -1
   (nil))
(insn 13 12 14 2 (set (reg:SI 89)
  (subreg:SI (reg:TI 88) 0)) "s2.i":6:3 -1
   (nil))
(insn 14 13 15 2 (set (reg:HI 87)
  (subreg:HI (reg:SI 89) 0)) "s2.i":6:3 -1
   (nil))

that looks odd to me - what's the final result after LRA?  I think

I got:

vmovd %edi, %xmm15
movq ops(%rip), %rdx
vpbroadcastb %xmm15, %ymm15
vmovq %xmm15, %rax move to GPR
vmovdqu %ymm15, (%rdx)
movw %ax, 32(%rdx)    subreg of GPR
vzeroupper
ret


we should see to make lowpart_subreg work on {XI,OI,TI}mode.
Only two steps should be necessary at most:
xmm -> gpr, grp -> subreg, or gpr -> subreg.  So the expander
code in memset should try to generate the subreg directly

subreg didn't fail on x86 when I tried.


and if that fails, try a word_mode subreg followed by the subreg.

I will try word_mode subreg.


Here is the v2 patch to use word_mode subreg.  For

---
extern void *ops;

void
foo (int c)
{
__builtin_memset (ops, 4, 32);
}
---

without vec_const_duplicate, I got

   movl$4, %eax
   movqops(%rip), %rdx
   movd%eax, %xmm0
   punpcklbw   %xmm0, %xmm0
   punpcklwd   %xmm0, %xmm0
   pshufd  $0, %xmm0, %xmm0
   movups 

[PATCH] Introduce -Wcoverage-invalid-line-number

2021-06-01 Thread Martin Liška

Hello.

As seen in the PR, one can easily corrupt line number information and
we can end up with a function that ends before it starts ;)
I'm adding a new warning for that instead of the ICE.

Patch can bootstrap on x86_64-linux-gnu and survives regression tests.

Ready to be installed?
Thanks,
Martin

PR gcov-profile/100788

gcc/ChangeLog:

* common.opt: Add new option.
* coverage.c (coverage_begin_function): Emit warning instead on
the internal compiler error.
* doc/invoke.texi: Document the option.
* toplev.c (process_options): Enable it by default.

gcc/testsuite/ChangeLog:

* gcc.dg/pr100788.c: New test.
---
 gcc/common.opt  |  4 
 gcc/coverage.c  | 31 ---
 gcc/doc/invoke.texi | 11 +++
 gcc/testsuite/gcc.dg/pr100788.c | 13 +
 gcc/toplev.c| 19 +--
 5 files changed, 61 insertions(+), 17 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/pr100788.c

diff --git a/gcc/common.opt b/gcc/common.opt
index ffb968d90f8..509937da24f 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -826,6 +826,10 @@ Wcoverage-mismatch
 Common Var(warn_coverage_mismatch) Init(1) Warning
 Warn in case profiles in -fprofile-use do not match.
 
+Wcoverage-invalid-line-number

+Common Var(warn_coverage_invalid_linenum) Init(1) Warning
+Warn in case a function ends earlier than it begins due to an invalid linenum 
macros.
+
 Wmissing-profile
 Common Var(warn_missing_profile) Init(1) Warning
 Warn in case profiles in -fprofile-use do not exist.
diff --git a/gcc/coverage.c b/gcc/coverage.c
index 5a344cdfc17..dfc8108d5d8 100644
--- a/gcc/coverage.c
+++ b/gcc/coverage.c
@@ -622,18 +622,16 @@ coverage_compute_cfg_checksum (struct function *fn)
 int
 coverage_begin_function (unsigned lineno_checksum, unsigned cfg_checksum)
 {
-  expanded_location xloc;
-  unsigned long offset;
-
   /* We don't need to output .gcno file unless we're under -ftest-coverage
  (e.g. -fprofile-arcs/generate/use don't need .gcno to work). */
   if (no_coverage || !bbg_file_name)
 return 0;
 
-  xloc = expand_location (DECL_SOURCE_LOCATION (current_function_decl));

+  expanded_location startloc
+= expand_location (DECL_SOURCE_LOCATION (current_function_decl));
 
   /* Announce function */

-  offset = gcov_write_tag (GCOV_TAG_FUNCTION);
+  unsigned long offset = gcov_write_tag (GCOV_TAG_FUNCTION);
   if (param_profile_func_internal_id)
 gcov_write_unsigned (current_function_funcdef_no + 1);
   else
@@ -650,16 +648,27 @@ coverage_begin_function (unsigned lineno_checksum, 
unsigned cfg_checksum)
   gcov_write_unsigned (DECL_ARTIFICIAL (current_function_decl)
   && !DECL_FUNCTION_VERSIONED (current_function_decl)
   && !DECL_LAMBDA_FUNCTION_P (current_function_decl));
-  gcov_write_filename (xloc.file);
-  gcov_write_unsigned (xloc.line);
-  gcov_write_unsigned (xloc.column);
+  gcov_write_filename (startloc.file);
+  gcov_write_unsigned (startloc.line);
+  gcov_write_unsigned (startloc.column);
 
   expanded_location endloc = expand_location (cfun->function_end_locus);
 
   /* Function can start in a single file and end in another one.  */

-  int end_line = endloc.file == xloc.file ? endloc.line : xloc.line;
-  int end_column = endloc.file == xloc.file ? endloc.column: xloc.column;
-  gcc_assert (xloc.line <= end_line);
+  int end_line
+= endloc.file == startloc.file ? endloc.line : startloc.line;
+  int end_column
+= endloc.file == startloc.file ? endloc.column: startloc.column;
+
+  if (startloc.line > end_line)
+{
+  warning_at (DECL_SOURCE_LOCATION (current_function_decl),
+ OPT_Wcoverage_invalid_line_number,
+ "function starts on a higher line number than it ends");
+  end_line = startloc.line;
+  end_column = startloc.column;
+}
+
   gcov_write_unsigned (end_line);
   gcov_write_unsigned (end_column);
   gcov_write_length (offset);
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 08c3206b719..e91680ab329 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -5795,6 +5795,17 @@ poorly optimized code and is useful only in the
 case of very minor changes such as bug fixes to an existing code-base.
 Completely disabling the warning is not recommended.
 
+@item -Wno-coverage-invalid-line-number

+@opindex Wno-coverage-invalid-line-number
+@opindex Wcoverage-invalid-line-number
+Warn in case a function ends earlier than it begins due
+to an invalid linenum macros.  The warning is emitted only
+with @option{--coverage} enabled.
+ By default, this warning is enabled and is treated as an
+error.  @option{-Wno-coverage-invalid-line-number} can be used to disable the
+warning or @option{-Wno-error=coverage-invalid-line-number} can be used to
+disable the error.
+
 @item -Wno-cpp
 @r{(C, Objective-C, C++, Objective-C++ and Fortran only)}
 @opindex 

[PATCH] ARM: reset arm_fp16_format

2021-06-01 Thread Martin Liška

Hello.

The patch fixes https://gcc.gnu.org/bugzilla/show_bug.cgi?id=98636#c20 where
target option restore can be called and arm_fp16_format should be reset
to ARM_FP16_FORMAT_NONE.

It fixes the ICE in the PR.

Can please ARM folks test me the patch on a Arm machine?
Thanks,
Martin

gcc/ChangeLog:

PR target/98636
* config/arm/arm.c (arm_option_reconfigure_globals): Reset
the option if isa_bit_fp16 is not set.
---
 gcc/config/arm/arm.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 7b37e1b602c..4543f3c6b55 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -3765,6 +3765,8 @@ arm_option_reconfigure_globals (void)
error ("selected fp16 options are incompatible");
   arm_fp16_format = ARM_FP16_FORMAT_IEEE;
 }
+  else
+arm_fp16_format = ARM_FP16_FORMAT_NONE;
 
   arm_arch_cde = 0;

   arm_arch_cde_coproc = 0;
--
2.31.1



Re: [PATCH][i386] Split not+broadcast+pand to broadcast+pandn.

2021-06-01 Thread Segher Boessenkool
>   PR target/100711
>   * simplify-rtx.c (simplify_unary_operation_1):
>   Simplify (vec_duplicate (not op)) to (not (vec_duplicate op)).

This is not a simplification.  If we want to do this we need to document
this canonicalisation (in md.texi, "Insn Canonicalizations").

> +/* Prefer (not (vec_duplicate (nonimmedaite_operand)))
> +   to (vec_duplicate (not (nonimmedaite_operand))).  */

What Andrew said here (also, it's misspelled :-) )

> +case VEC_DUPLICATE:
> +  if (GET_CODE (op) == NOT)
> + return gen_rtx_NOT (mode, gen_rtx_VEC_DUPLICATE (mode, XEXP (op, 0)));
> +  break;

If it isn't a canonicalisation you need to simplify the result, and then
only do it if it does in fact simplify.  You risk "simplification" loops
if you don't.


Segher


Re: GCC documentation: porting to Sphinx

2021-06-01 Thread Michael Matz
Hello,

On Tue, 1 Jun 2021, Martin Liška wrote:

> On 5/31/21 5:49 PM, Michael Matz wrote:
> > Hello Martin,
> > 
> > On Mon, 31 May 2021, Martin Liška wrote:
> > 
> >> I've made quite some progress with the porting of the documentation and
> >> I would like to present it to the community now:
> >> https://splichal.eu/scripts/sphinx/
> >>   Note the documentation is automatically ([1]) generated from texinfo with
> >> a
> >> GitHub workflow ([2]).
> > 
> > One other thing I was recently thinking about, in the Spinx vs. texinfo
> > discussion: locally available documentation browsable/searchable in
> > terminal with info(1) (or equivalents).
> 
> Yes, that's handy.
> 
> > I think the above (i.e. generating .rst from the texinfo file) would 
> > immediately nullify all my worries.  So, just to be extra sure: your 
> > proposal now is to generate the .rst files, and that .texinfo remains 
> > the maintained sources, right?
> 
> No, .texinfo files will be gone. However, Sphinx can output to info 
> format: 
> https://www.sphinx-doc.org/en/master/man/sphinx-build.html#cmdoption-sphinx-build-M

I see, that's good to hear.

> And I've just added the generated Info pages here:
> https://splichal.eu/scripts/sphinx/

Okay, but there's something amiss, just compare a local gcc.info with 
that.  The sphinx generated one seems to only contain command line 
options, but none of the other topics, in particular it seems to contain 
the "Invoking GCC" chapter (and only that) as top-level, and all other 
ones are missing (like "C implementation", "C++ implementation", "C 
extension", and so on).

Looking at gccint.info I also seem quite some confusion, it's unclear to 
me if content is missing or not.  But e.g. the top-level structure has a 
different order (a less logical one, this one is btw. shared with the 
order of the HTML generated docu, so it's probably specific to sphinx 
setup or such).

Ignoring that missing content what is there right now does seem somewhat 
acceptable for local use, though.


Ciao,
Michael.


Re: [PATCH][RFC] tree-optimization/100801 - perform final value replacement from VRP

2021-06-01 Thread Aldy Hernandez via Gcc-patches
I have some old numbers from late April.

VRP vs ranger was more difficult to compare than evrp, since the
gimple is different (ASSERT_EXPRs).  What I did was run a late evrp
pass before each VRP pass and compared branches that were folded, for
an estimate.

On it's own VRP1 could fold 5482 branches versus 6338 for a late evrp
pass running before VRP1, so 15% more.  If late evrp runs before VRP1,
VRP1 can find something late evrp can't in 77 out of 388 .ii files
distilled from a bootstrap.  So, VRP1 can fold a branch in 1 out of 5
files.

For VRP2, the numbers are 376 branches on its own versus 467 for late
evrp2.  That's 24% more branches for late evrp2.  VRP2 can find
something late evrp2 can't in 199 out of 388 files, so one in every 2
files.

I didn't dig deep into the VRP[12] foldable branches.  Some of them
looked like duplicates, and others looked like stuff we could get with
minor tweaks with range-ops.  But I really don't have hard data here.

And yes, I agree, we should re-run these stats when the dust settles
in the next week or two.

Aldy



Re: RFA: save/restore target options in handle_optimize_attribute

2021-06-01 Thread Martin Liška

On 5/19/21 11:48 PM, Joern Wolfgang Rennecke wrote:

We set default for some target options in TARGET_OPTION_OPTIMIZATION_TABLE,
but these can be overridden by specifying the corresponding explicit
-mXXX / -mno-XXX options.
When a function bears the attribue
__attribute__ ((optimize("02")))
the target options are set to the default for that optimization level,
which can be different from what was selected for the file as a whole.
As handle_optimize_attribute is right now, it will thus clobber the
target options, and with enable_checking it will then abort.

The attached patch makes it save and restore the target options.

Bootstrapped and regression tested on x86_64-pc-linux-gnu.



Btw. do you have a test-case we can add for this patch?

Thanks,
Martin


Re: [PATCH][i386] Split not+broadcast+pand to broadcast+pandn.

2021-06-01 Thread Segher Boessenkool
On Tue, Jun 01, 2021 at 04:32:42PM +0800, Hongtao Liu wrote:

[ no attachment to reply to ]

Please send this with either the patch actually inline, or as
attachment with content-disposition inline, no encoding, and a valid
text mimetype.  So that people can see it, also on the archives, and
actually reply to it!

I'll see if I can fix it up this time.


Segher


Re: Fallout: save/restore target options in handle_optimize_attribute

2021-06-01 Thread Richard Biener via Gcc-patches
On Tue, Jun 1, 2021 at 1:17 PM Martin Liška  wrote:
>
> On 5/28/21 2:46 PM, Richard Biener wrote:
> > On Fri, May 28, 2021 at 11:48 AM Martin Liška  wrote:
> >>
> >> Hi.
> >>
> >> There's a fallout after my revision 
> >> ebd5e86c0f41dc1d692f9b2b68a510b1f6835a3e. I would like to analyze
> >> all case and discuss possible solution. To be honest it's a can of worms 
> >> and reverting the commit
> >> is an option on the table.
> >>
> >> So the cases:
> >>
> >> 1) PR100759 - ppc64le
> >>
> >> $ cat pr.C
> >> #pragma GCC optimize 0
> >> void main();
> >>
> >> $ ./xgcc -B. -Os pr.C
> >> pr.C:2:11: internal compiler error: ‘global_options’ are modified in local 
> >> context
> >>   2 | void main();
> >>
> >> What happens: we change from -Os to -O0 and rs6000_isa_flags differ in 
> >> cl_optimization_compare.
> >> Problem is that OPTION_MASK_SAVE_TOC_INDIRECT is set based on optimize 
> >> flag:
> >>
> >> /* If we can shrink-wrap the TOC register save separately, then use
> >>-msave-toc-indirect unless explicitly disabled.  */
> >> if ((rs6000_isa_flags_explicit & OPTION_MASK_SAVE_TOC_INDIRECT) == 0
> >> && flag_shrink_wrap_separate
> >> && optimize_function_for_speed_p (cfun))
> >>   rs6000_isa_flags |= OPTION_MASK_SAVE_TOC_INDIRECT;
> >
> > So that means that
> >
> >/* Restore current options.  */
> >cl_optimization_restore (_options, _options_set,
> > _opts);
> >cl_target_option_restore (_options, _options_set,
> >  TREE_TARGET_OPTION (prev_target_node));
> >
> > does not result in the same outcome as the original command-line processing?
> >
> > Given both restore processes could interact (not sure if that's the issue 
> > here)
> > shouldn't we just have a single restore operation and a single target
> > hook instead of both targetm.override_options_after_change and
> > targetm.target_option.restore?
>
> That's not this case. But it can be a unification approach for the future.
>
> >
> > Likewise we should probably _always_ set both, DECL_FUNCTION_SPECIFIC_OPT
> > and _TARGET as a step towards unifying them.
>
> Yes, that's basically what's happening at various places.
>
> >
> > That said, for the above case a more detailed run-down as to how things go 
> > wrong
> > would be nice to see.
>
> Anyway, detail analysis of this issue is:
>
> 1) one provides -Os on the command-line, thus global_options.x_optimize_size 
> == 1
> 2) then we reach #pragma GCC optimize 0, at this point parse_optimize_options 
> is called
> and thus global_options are modified (global_options.x_optimize_size)
> That's reflected in optimization_current_node, which is now different 
> from optimization_default_node.
> 3) targetm.override_options_after_change is not called, so 
> global_options.x_rs6000_isa_flags
> is not changed to 1.
> 4) for all subsequent functions, handle_optimize_attribute is called as we 
> are in a 'pragma optimize'
> 5) here the sanity checking code saves saved_global_options, parsing happens 
> and cl_*_restore is done
> 6) as cl_target_option_restore calls targetm.override_options_after_change, 
> the global_options.x_rs6000_isa_flags
> has OPTION_MASK_SAVE_TOC_INDIRECT set
> 7) and the cl_optimization_compare complains
>
> I have a patch that reflects that. In fact, we global options state is 
> correct for each function.
> Apart from that, PR100759 mentions a test-case that fails due to a missing 
> cl_target_option_restore
> for 'pragma pop'.
>
> Patch can bootstrap on x86_64-linux-gnu and survives regression tests. And it 
> survives tests on ppc64-linux-gnu.
>
> Ready to be installed?

It sounds like a clear progression so OK.

I still don't get

+  /* When #pragma GCC optimize pragma is used, it modifies global_options
+without calling targetm.override_options_after_change.  That can leave
+target flags inconsistent for comparison.  */

fully, esp. as to why we cannot fix pragma handling and thus why the
"inconsistent"
state is actually OK.

Richard.

> Thanks,
> Martin
>
> >
> >> Suggested solution is doing:
> >>
> >> if ((rs6000_isa_flags_explicit & OPTION_MASK_SAVE_TOC_INDIRECT) == 0
> >> && flag_shrink_wrap_separate
> >>   rs6000_isa_flags |= OPTION_MASK_SAVE_TOC_INDIRECT;
> >>
> >> and add '&& optimize_function_for_speed_p (cfun)' to the place where the 
> >> option mask is used.
> >>
> >> 2) Joseph's case:
> >>
> >> $ cat ~/Programming/testcases/opts-bug.i
> >> extern unsigned long int x;
> >> extern float f (float);
> >> extern __typeof (f) f_power8;
> >> extern __typeof (f) f_power9;
> >> extern __typeof (f) f __attribute__ ((ifunc ("f_ifunc")));
> >> static __attribute__ ((optimize ("-fno-stack-protector"))) __typeof (f) *
> >> f_ifunc (void)
> >> {
> >> __typeof (f) *res = x ? f_power9 : f_power8;
> >> return res;
> >> }
> >>
> >> $ ./xgcc -B. ~/Programming/testcases/opts-bug.i -c -S -O2 
> >> 

Re: [PATCH][RFC] tree-optimization/100801 - perform final value replacement from VRP

2021-06-01 Thread Andrew MacLeod via Gcc-patches

On 6/1/21 6:01 AM, Richard Biener wrote:

On Mon, 31 May 2021, Andrew MacLeod wrote:


On 5/28/21 11:25 AM, Richard Biener wrote:

This makes sure to perform final value replacement of constants
when we also are sure to propagate those, like in VRP.  This avoids
spurious diagnostics when doing so only from within SCCP which can
leave unreachable loops in the IL triggering bogus diagnostics.

The choice is VRP because it already defers to SCEV for PHI nodes
so the following makes it analyze final values for loop exit PHIs.
To make that work it arranges for loop-closed SSA to be built only
after inserting ASSERT_EXPRs.

Bootstrapped and tested on x86_64-unknown-linux-gnu.

It does

FAIL: gcc.dg/ubsan/pr94423.c   -O2  (internal compiler error)

where VRP somehow propagates abnormals in a bogus way.

OK, so I analyzed this some more and it results from the hunk moving
loop-closed SSA construction after ASSERT_EXPR insertion in
execute_vrp.  The motivation for this is that we end up splitting
the loop exit edge when inserting the ASSERT_EXPR, creating
non-loop-closed SSA and thus fail to pick up the final value.

Now with swapping insert and loop-closed SSA build we get LC
SSA PHIs on an abnormal loop exit in the above testcase which
messes up assert expr removal which does

 /* Propagate the RHS into every use of the LHS.  For SSA names
also propagate abnormals as it merely restores the original
IL in this case (an replace_uses_by would assert).  */

in remove_range_assertions, explicitely ignoring constraints around
abnormals.  But since LC SSA PHIs remain we fail IL verification.
Note the LC SSA PHI is only required because we insert a SSA def
via the ASSERT_EXPR in the loop body.  We can fix up the IL detail
by marking the ASSERT_EXPR source appropriately via

diff --git a/gcc/tree-vrp.c b/gcc/tree-vrp.c
index 450926d5f9b..705e2489eb1 100644
--- a/gcc/tree-vrp.c
+++ b/gcc/tree-vrp.c
@@ -3809,6 +3809,8 @@ vrp_asserts::remove_range_assertions ()
 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
   FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
 SET_USE (use_p, var);
+   if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
+ SSA_NAME_OCCURS_IN_ABNORMAL_PHI (var) = 1;
   }
 else
   replace_uses_by (lhs, var);

but we also never get rid of a SSA_NAME_OCCURS_IN_ABNORMAL_PHI marking.

One option would be to keep the order as-is but fixup assert expr
insertion to update/honor loop-closed SSA.  But then - how far are you
with removing all this ASSERT_EXPR stuff?

Thanks,
Richard.

I hope we can replace the VRP pass as well this release.. which will 
then kill off all the need for the assert expr stuff.  Aldy did some 
preliminary comparisons with VRP on our branch about a month ago... 
running a ranger EVRP pass right before vrp1 and vrp2 then seeing what 
VRP got that we missed.   and I believe we were doing pretty well, but 
we havent had a chance to look at it in detail to see if there is 
anything major we miss.  Maybe he can comment on where we were.  Once I 
get the rest of the branch into trunk, we'll take another stab at the 
comparison because what is going into trunk is more consistent than what 
was on the branch.


Right now I only have 3 remaining bits to bring over .. relations, 
application of equivalences, and recomputations.  With any luck I'll get 
them in this week. we can then do another run by of how we compare to VRP.


Andrew




Re: [PATCH v2] Add vec_const_duplicate optab and TARGET_GEN_MEMSET_SCRATCH_RTX

2021-06-01 Thread H.J. Lu via Gcc-patches
On Mon, May 31, 2021 at 11:54:53PM -0600, Jeff Law wrote:
> 
> 
> On 5/31/2021 11:50 PM, Richard Sandiford wrote:
> > "H.J. Lu via Gcc-patches"  writes:
> > > On Mon, May 31, 2021 at 06:32:04AM -0700, H.J. Lu wrote:
> > > > On Mon, May 31, 2021 at 6:26 AM Richard Biener
> > > >  wrote:
> > > > > On Mon, May 31, 2021 at 3:12 PM H.J. Lu  wrote:
> > > > > > On Mon, May 31, 2021 at 5:46 AM Richard Biener
> > > > > >  wrote:
> > > > > > > On Mon, May 31, 2021 at 2:09 PM H.J. Lu  
> > > > > > > wrote:
> > > > > > > > On Wed, May 26, 2021 at 10:28:16AM +0200, Richard Biener wrote:
> > > > > > > > > > > >   -- Target Hook: rtx TARGET_GEN_MEMSET_VALUE (rtx 
> > > > > > > > > > > > DATA, scalar_int_mode
> > > > > > > > > > > >MODE)
> > > > > > > > > > > >   This function returns the RTL of a register 
> > > > > > > > > > > > containing
> > > > > > > > > > > >   'GET_MODE_SIZE (MODE)' consecutive copies of the 
> > > > > > > > > > > > unsigned char
> > > > > > > > > > > >   value given in the RTL register DATA.  For 
> > > > > > > > > > > > example, if MODE is 4
> > > > > > > > > > > >   bytes wide, return the RTL for 0x01010101*DATA.
> > > > > > > > > > > For this one I wonder if it should be an optab instead.  
> > > > > > > > > > > Couldn't you
> > > > > > > > > > > use the existing vec_duplicate for this by using 
> > > > > > > > > > > (paradoxical) subregs
> > > > > > > > > > > like (subreg:TI (vec_duplicate:VnQI (subreg:VnQI (reg:QI 
> > > > > > > > > > > ...)))?
> > > > > > > > > > I tried.   It doesn't even work on x86.  See:
> > > > > > > > > > 
> > > > > > > > > > https://gcc.gnu.org/pipermail/gcc-patches/2021-May/570661.html
> > > > > > > > > Not sure what I should read from there...
> > > > > > > > > 
> > > > > > > > > > There are special cases to subreg HI, SI and DI modes of TI 
> > > > > > > > > > mode in
> > > > > > > > > > ix86_gen_memset_value_from_prev.   simplify_gen_subreg 
> > > > > > > > > > doesn't
> > > > > > > > > > work here.   Each backend may need its own special handling.
> > > > > > > > > OK, I guess I'm not (RTL) qualified enough to further review 
> > > > > > > > > these parts,
> > > > > > > > > sorry.  Since we're doing code generation the canonical way 
> > > > > > > > > to communicate
> > > > > > > > > with backends should be optabs, not some set of disconnected 
> > > > > > > > > target hooks.
> > > > > > > > > But as said, I probably don't know enough of RTL to see why 
> > > > > > > > > it's the only way.
> > > > > > > > > 
> > > > > > > > > Richard.
> > > > > > > > Here is the patch to add optabs instead.  Does it look OK?
> > > > > > > > 
> > > > > > > > Thanks.
> > > > > > > > 
> > > > > > > > H.J.
> > > > > > > > ---
> > > > > > > > Add 2 optabs:
> > > > > > > > 
> > > > > > > > 1. integer_extract: Extract lower bit value from the integer 
> > > > > > > > value in
> > > > > > > > TImode, OImode or XImode.
> > > > > > > That sounds very specific, esp. the restriction to {TI,OI,XI}mode.
> > > > > > > It also sounds like it matches (subreg:{TI,OI,XI} (...) 0).  
> > > > > > > There are
> > > > > > > existing target hooks verifying subreg validity - why's that not 
> > > > > > > a good
> > > > > > > fit here?  ISTR you say gen_lowpart () doesn't work (or was it
> > > > > > > simplify_gen_subreg?), why's that so?
> > > > > > {TI,OI,XI}mode are storage only integer types.   subreg doesn't work
> > > > > > well on them.  I got
> > > > > > 
> > > > > > [hjl@gnu-cfl-2 pieces]$ cat s2.i
> > > > > > extern void *ops;
> > > > > > 
> > > > > > void
> > > > > > foo (int c)
> > > > > > {
> > > > > >__builtin_memset (ops, c, 34);
> > > > > > }
> > > > > > [hjl@gnu-cfl-2 pieces]$ make s2.s
> > > > > > /export/build/gnu/tools-build/gcc-gitlab-debug/build-x86_64-linux/gcc/xgcc
> > > > > > -B/export/build/gnu/tools-build/gcc-gitlab-debug/build-x86_64-linux/gcc/
> > > > > > -O2 -march=haswell -S s2.i
> > > > > > during RTL pass: reload
> > > > > > s2.i: In function ‘foo’:
> > > > > > s2.i:7:1: internal compiler error: maximum number of generated 
> > > > > > reload
> > > > > > insns per insn achieved (90)
> > > > > >  7 | }
> > > > > >| ^
> > > > > > 0x1050734 lra_constraints(bool)
> > > > > > /export/gnu/import/git/gitlab/x86-gcc/gcc/lra-constraints.c:5091
> > > > > > 0x1039536 lra(_IO_FILE*)
> > > > > > /export/gnu/import/git/gitlab/x86-gcc/gcc/lra.c:2336
> > > > > > 0xfe1140 do_reload
> > > > > > /export/gnu/import/git/gitlab/x86-gcc/gcc/ira.c:5822
> > > > > > 0xfe162e execute
> > > > > > /export/gnu/import/git/gitlab/x86-gcc/gcc/ira.c:6008
> > > > > > Please submit a full bug report,
> > > > > > with preprocessed source if appropriate.
> > > > > > Please include the complete backtrace with any bug report.
> > > > > > See  for instructions.
> > > > > > make: *** [Makefile:32: s2.s] Error 1
> > > > > > [hjl@gnu-cfl-2 pieces]$
> > > > > > 
> > > > > > due to
> > > > > > 
> > > > > > (insn 12 11 0 (set (mem:HI (plus:DI 

Re: [PATCH 1/2] c-family: Copy DECL_USER_ALIGN even if DECL_ALIGN is similar.

2021-06-01 Thread Robin Dapp via Gcc-patches
This is the revised testsuite change with v2 adding a check for no 
separate note for


 __attribute__((aligned (32), aligned (4)).

Regards
 Robin
diff --git a/gcc/testsuite/c-c++-common/Wattributes.c b/gcc/testsuite/c-c++-common/Wattributes.c
index 4ad90441b4d..a97e5ad5f74 100644
--- a/gcc/testsuite/c-c++-common/Wattributes.c
+++ b/gcc/testsuite/c-c++-common/Wattributes.c
@@ -97,6 +97,8 @@ fnoinline1 (void);   /* { dg-message "previous declaration here" } */
 /* Verify a warning for always_inline conflict.  */
 void ATTR ((always_inline))
 fnoinline1 (void) { }/* { dg-warning "ignoring attribute .always_inline. because it conflicts with attribute .noinline." } */
+			 /* { dg-message "note: previous declaration here" "" { target *-*-* } .-1 } */
+			 /* { dg-message "note: previous definition" "" { target *-*-* } .-2 } */
 
 /* Verify a warning for gnu_inline conflict.  */
 inline void ATTR ((gnu_inline))
@@ -364,13 +366,15 @@ inline int ATTR ((cold))
 finline_cold_noreturn (int);
 
 inline int ATTR ((noreturn))
-finline_cold_noreturn (int);
+finline_cold_noreturn (int);	/* { dg-message "note: previous declaration here" } */
 
 inline int ATTR ((noinline))
 finline_cold_noreturn (int);/* { dg-warning "ignoring attribute .noinline. because it conflicts with attribute .always_inline." } */
+/* { dg-message "note: previous declaration here" "" { target *-*-* } .-1 } */
 
 inline int ATTR ((hot))
 finline_cold_noreturn (int);/* { dg-warning "ignoring attribute .hot. because it conflicts with attribute .cold." } */
+/* { dg-message "note: previous declaration here" "" { target *-*-* } .-1 } */
 
 inline int ATTR ((warn_unused_result))
 finline_cold_noreturn (int);/* { dg-warning "ignoring attribute .warn_unused_result. because it conflicts with attribute .noreturn." } */
@@ -389,23 +393,24 @@ finline_cold_noreturn (int i) { (void) __builtin_abort (); }
and some on distinct declarations.  */
 
 inline int ATTR ((always_inline, hot))
-finline_hot_noret_align (int);
+finline_hot_noret_align (int);	/* { dg-message "note: previous declaration here" } */
 
 inline int ATTR ((noreturn, noinline))
 finline_hot_noret_align (int);  /* { dg-warning "ignoring attribute .noinline. because it conflicts with attribute .always_inline." } */
+/* { dg-message "note: previous declaration here" "" { target *-*-* } .-1 } */
 
 inline int ATTR ((cold, aligned (8)))
 finline_hot_noret_align (int);  /* { dg-warning "ignoring attribute .cold. because it conflicts with attribute .hot." } */
+/* { dg-message "note: previous declaration here" "" { target *-*-* } .-1 } */
 
 inline int ATTR ((warn_unused_result))
 finline_hot_noret_align (int);  /* { dg-warning "ignoring attribute .warn_unused_result. because it conflicts with attribute .noreturn." } */
 
 inline int ATTR ((aligned (4)))
-  finline_hot_noret_align (int);  /* { dg-warning "ignoring attribute .aligned \\(4\\). because it conflicts with attribute .aligned \\(8\\)." "" { target { ! { hppa*64*-*-* s390*-*-* } } } } */
-/* { dg-error "alignment for '.*finline_hot_noret_align.*' must be at least 8" "" { target s390*-*-* } .-1 } */
+  finline_hot_noret_align (int);  /* { dg-warning "ignoring attribute .aligned \\(4\\). because it conflicts with attribute .aligned \\(8\\)." "" { target { ! { hppa*64*-*-* } } } } */
 
 inline int ATTR ((aligned (8)))
-finline_hot_noret_align (int);
+finline_hot_noret_align (int);  /* { dg-message "note: previous declaration here" } */
 
 inline int ATTR ((const))
 finline_hot_noret_align (int);  /* { dg-warning "ignoring attribute .const. because it conflicts with attribute .noreturn." } */
@@ -416,6 +421,26 @@ inline int ATTR ((noreturn))
 finline_hot_noret_align (int i) { (void) __builtin_abort (); }
 
 
+/* Expect a warning about conflicting alignment but without
+   other declarations inbetween.  */
+inline int ATTR ((aligned (32)))
+finline_align (int);	/* { dg-message "note: previous declaration here" } */
+
+inline int ATTR ((aligned (4)))
+finline_align (int);  /* { dg-warning "ignoring attribute .aligned \\(4\\). because it conflicts with attribute .aligned \\(32\\)." "" } */
+
+inline int ATTR ((noreturn))
+finline_align (int i) { (void) __builtin_abort (); }
+
+
+/* Expect no note that would refer to the same declaration.  */
+inline int ATTR ((aligned (32), aligned (4)))
+finline_double_align (int); /* { dg-warning "ignoring attribute .aligned \\(4\\). because it conflicts with attribute .aligned \\(32\\)." } */
+
+inline int ATTR ((noreturn))
+finline_double_align (int i) { (void) __builtin_abort (); }
+
+
 /* Exercise variable attributes.  */
 
 extern int ATTR ((common))
diff --git a/gcc/testsuite/gcc.dg/Wattributes-6.c b/gcc/testsuite/gcc.dg/Wattributes-6.c
index 4ba59bf2806..c6b2225943d 100644
--- a/gcc/testsuite/gcc.dg/Wattributes-6.c
+++ b/gcc/testsuite/gcc.dg/Wattributes-6.c
@@ -401,8 +401,7 @@ inline int ATTR ((warn_unused_result))
 finline_hot_noret_align (int);  /* { 

Re: [PATCH v2] Add vec_const_duplicate optab and TARGET_GEN_MEMSET_SCRATCH_RTX

2021-06-01 Thread H.J. Lu via Gcc-patches
On Tue, Jun 1, 2021 at 6:25 AM Richard Biener
 wrote:
>
> On Tue, Jun 1, 2021 at 3:05 PM H.J. Lu  wrote:
> >
> > On Mon, May 31, 2021 at 11:54:53PM -0600, Jeff Law wrote:
> > >
> > >
> > > On 5/31/2021 11:50 PM, Richard Sandiford wrote:
> > > > "H.J. Lu via Gcc-patches"  writes:
> > > > > On Mon, May 31, 2021 at 06:32:04AM -0700, H.J. Lu wrote:
> > > > > > On Mon, May 31, 2021 at 6:26 AM Richard Biener
> > > > > >  wrote:
> > > > > > > On Mon, May 31, 2021 at 3:12 PM H.J. Lu  
> > > > > > > wrote:
> > > > > > > > On Mon, May 31, 2021 at 5:46 AM Richard Biener
> > > > > > > >  wrote:
> > > > > > > > > On Mon, May 31, 2021 at 2:09 PM H.J. Lu  
> > > > > > > > > wrote:
> > > > > > > > > > On Wed, May 26, 2021 at 10:28:16AM +0200, Richard Biener 
> > > > > > > > > > wrote:
> > > > > > > > > > > > > >   -- Target Hook: rtx TARGET_GEN_MEMSET_VALUE (rtx 
> > > > > > > > > > > > > > DATA, scalar_int_mode
> > > > > > > > > > > > > >MODE)
> > > > > > > > > > > > > >   This function returns the RTL of a register 
> > > > > > > > > > > > > > containing
> > > > > > > > > > > > > >   'GET_MODE_SIZE (MODE)' consecutive copies of 
> > > > > > > > > > > > > > the unsigned char
> > > > > > > > > > > > > >   value given in the RTL register DATA.  For 
> > > > > > > > > > > > > > example, if MODE is 4
> > > > > > > > > > > > > >   bytes wide, return the RTL for 
> > > > > > > > > > > > > > 0x01010101*DATA.
> > > > > > > > > > > > > For this one I wonder if it should be an optab 
> > > > > > > > > > > > > instead.  Couldn't you
> > > > > > > > > > > > > use the existing vec_duplicate for this by using 
> > > > > > > > > > > > > (paradoxical) subregs
> > > > > > > > > > > > > like (subreg:TI (vec_duplicate:VnQI (subreg:VnQI 
> > > > > > > > > > > > > (reg:QI ...)))?
> > > > > > > > > > > > I tried.   It doesn't even work on x86.  See:
> > > > > > > > > > > >
> > > > > > > > > > > > https://gcc.gnu.org/pipermail/gcc-patches/2021-May/570661.html
> > > > > > > > > > > Not sure what I should read from there...
> > > > > > > > > > >
> > > > > > > > > > > > There are special cases to subreg HI, SI and DI modes 
> > > > > > > > > > > > of TI mode in
> > > > > > > > > > > > ix86_gen_memset_value_from_prev.   simplify_gen_subreg 
> > > > > > > > > > > > doesn't
> > > > > > > > > > > > work here.   Each backend may need its own special 
> > > > > > > > > > > > handling.
> > > > > > > > > > > OK, I guess I'm not (RTL) qualified enough to further 
> > > > > > > > > > > review these parts,
> > > > > > > > > > > sorry.  Since we're doing code generation the canonical 
> > > > > > > > > > > way to communicate
> > > > > > > > > > > with backends should be optabs, not some set of 
> > > > > > > > > > > disconnected target hooks.
> > > > > > > > > > > But as said, I probably don't know enough of RTL to see 
> > > > > > > > > > > why it's the only way.
> > > > > > > > > > >
> > > > > > > > > > > Richard.
> > > > > > > > > > Here is the patch to add optabs instead.  Does it look OK?
> > > > > > > > > >
> > > > > > > > > > Thanks.
> > > > > > > > > >
> > > > > > > > > > H.J.
> > > > > > > > > > ---
> > > > > > > > > > Add 2 optabs:
> > > > > > > > > >
> > > > > > > > > > 1. integer_extract: Extract lower bit value from the 
> > > > > > > > > > integer value in
> > > > > > > > > > TImode, OImode or XImode.
> > > > > > > > > That sounds very specific, esp. the restriction to 
> > > > > > > > > {TI,OI,XI}mode.
> > > > > > > > > It also sounds like it matches (subreg:{TI,OI,XI} (...) 0).  
> > > > > > > > > There are
> > > > > > > > > existing target hooks verifying subreg validity - why's that 
> > > > > > > > > not a good
> > > > > > > > > fit here?  ISTR you say gen_lowpart () doesn't work (or was it
> > > > > > > > > simplify_gen_subreg?), why's that so?
> > > > > > > > {TI,OI,XI}mode are storage only integer types.   subreg doesn't 
> > > > > > > > work
> > > > > > > > well on them.  I got
> > > > > > > >
> > > > > > > > [hjl@gnu-cfl-2 pieces]$ cat s2.i
> > > > > > > > extern void *ops;
> > > > > > > >
> > > > > > > > void
> > > > > > > > foo (int c)
> > > > > > > > {
> > > > > > > >__builtin_memset (ops, c, 34);
> > > > > > > > }
> > > > > > > > [hjl@gnu-cfl-2 pieces]$ make s2.s
> > > > > > > > /export/build/gnu/tools-build/gcc-gitlab-debug/build-x86_64-linux/gcc/xgcc
> > > > > > > > -B/export/build/gnu/tools-build/gcc-gitlab-debug/build-x86_64-linux/gcc/
> > > > > > > > -O2 -march=haswell -S s2.i
> > > > > > > > during RTL pass: reload
> > > > > > > > s2.i: In function ‘foo’:
> > > > > > > > s2.i:7:1: internal compiler error: maximum number of generated 
> > > > > > > > reload
> > > > > > > > insns per insn achieved (90)
> > > > > > > >  7 | }
> > > > > > > >| ^
> > > > > > > > 0x1050734 lra_constraints(bool)
> > > > > > > > /export/gnu/import/git/gitlab/x86-gcc/gcc/lra-constraints.c:5091
> > > > > > > > 0x1039536 lra(_IO_FILE*)
> > > > > > > > /export/gnu/import/git/gitlab/x86-gcc/gcc/lra.c:2336
> > 

Re: [Patch] Fortran/OpenMP: Support (parallel) master taskloop (simd) [PR99928]

2021-06-01 Thread Tobias Burnus

On 01.06.21 12:02, Jakub Jelinek wrote:

 * gfortran.dg/gomp/scan-1.f90: Likewise.
 * gfortran.dg/gomp/pr99928-3.f90: New test.
 * gfortran.dg/gomp/taskloop-1.f90: New test.



Otherwise LGTM.
  Jakub


Thanks for the review! – I just saw that I missed to include another
testcase – unless there I comments, I will commit it later.

Tobias

-
Mentor Graphics (Deutschland) GmbH, Arnulfstrasse 201, 80634 München 
Registergericht München HRB 106955, Geschäftsführer: Thomas Heurung, Frank 
Thürauf
Fortran/OpenMP: Add gfortran.dg/gomp/taskloop-2.f90 [PR99928]

	PR middle-end/99928

gcc/testsuite/ChangeLog

	* gfortran.dg/gomp/taskloop-2.f90: New.

diff --git a/gcc/testsuite/gfortran.dg/gomp/taskloop-2.f90 b/gcc/testsuite/gfortran.dg/gomp/taskloop-2.f90
new file mode 100644
index 000..21427623584
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/gomp/taskloop-2.f90
@@ -0,0 +1,72 @@
+subroutine foo()
+implicit none
+integer :: i, r
+!$omp taskloop reduction(task, +: r)  ! { dg-error "Only DEFAULT permitted as reduction-modifier in REDUCTION clause" }
+do i = 1, 64
+end do
+!$omp taskloop simd reduction(task, +: r)  ! { dg-error "Only DEFAULT permitted as reduction-modifier in REDUCTION clause" }
+do i = 1, 64
+end do
+!$omp master taskloop reduction(task, +: r)  ! { dg-error "Only DEFAULT permitted as reduction-modifier in REDUCTION clause" }
+do i = 1, 64
+end do
+!$omp master taskloop simd reduction(task, +: r)  ! { dg-error "Only DEFAULT permitted as reduction-modifier in REDUCTION clause" }
+do i = 1, 64
+end do
+!$omp parallel master taskloop reduction(task, +: r)  ! { dg-error "Only DEFAULT permitted as reduction-modifier in REDUCTION clause" }
+do i = 1, 64
+end do
+!$omp parallel master taskloop simd reduction(task, +: r)  ! { dg-error "Only DEFAULT permitted as reduction-modifier in REDUCTION clause" }
+do i = 1, 64
+end do
+
+!$omp taskloop reduction(inscan, +: r)  ! { dg-error "'inscan' REDUCTION clause on construct other than DO, SIMD, DO SIMD, PARALLEL DO, PARALLEL DO SIMD" }
+do i = 1, 64! { dg-error "OMP SCAN between two structured-block-sequences" "" { target *-*-* } .-1 }
+end do
+!$omp taskloop simd reduction(inscan, +: r)  ! { dg-error "'inscan' REDUCTION clause on construct other than DO, SIMD, DO SIMD, PARALLEL DO, PARALLEL DO SIMD" }
+do i = 1, 64 ! { dg-error "OMP SCAN between two structured-block-sequences" "" { target *-*-* } .-1 }
+end do
+!$omp master taskloop reduction(inscan, +: r) ! { dg-error "'inscan' REDUCTION clause on construct other than DO, SIMD, DO SIMD, PARALLEL DO, PARALLEL DO SIMD" }
+do i = 1, 64
+end do
+!$omp master taskloop simd reduction(inscan, +: r)  ! { dg-error "'inscan' REDUCTION clause on construct other than DO, SIMD, DO SIMD, PARALLEL DO, PARALLEL DO SIMD" }
+do i = 1, 64
+end do
+!$omp parallel master taskloop reduction(inscan, +: r)  ! { dg-error "'inscan' REDUCTION clause on construct other than DO, SIMD, DO SIMD, PARALLEL DO, PARALLEL DO SIMD" }
+do i = 1, 64 ! { dg-error "OMP SCAN between two structured-block-sequences" "" { target *-*-* } .-1 }
+end do
+!$omp parallel master taskloop simd reduction(inscan, +: r)   ! { dg-error "'inscan' REDUCTION clause on construct other than DO, SIMD, DO SIMD, PARALLEL DO, PARALLEL DO SIMD" }
+do i = 1, 64 ! { dg-error "OMP SCAN between two structured-block-sequences" "" { target *-*-* } .-1 }
+end do
+end
+
+
+subroutine bar()
+implicit none
+integer :: i, r
+r = 0
+!$omp parallel reduction(+:r)
+  !$omp master taskloop in_reduction(+:r)
+do i = 1, 64
+end do
+  !$omp master taskloop simd in_reduction(+:r)
+do i = 1, 64
+end do
+  !$omp master
+!$omp taskloop in_reduction(+:r)
+  do i = 1, 64
+  end do
+!$omp taskloop simd in_reduction(+:r)
+  do i = 1, 64
+  end do
+  !$omp end master
+!$omp end parallel
+
+!$omp parallel master taskloop in_reduction(+:r)  ! { dg-error "Failed to match clause" }
+do i = 1, 64
+end do
+
+!$omp parallel master taskloop simd in_reduction(+:r)  ! { dg-error "Failed to match clause" }
+do i = 1, 64
+end do
+end


Re: [PATCH 1/2] c-family: Copy DECL_USER_ALIGN even if DECL_ALIGN is similar.

2021-06-01 Thread Robin Dapp via Gcc-patches

As you say, the logic is convoluted.  Let's simplify it rather than make
it more convoluted.  One possibility would be to change || to | to avoid
the shortcut, and then

bool note = lastalign > curalign;
if (note)
curalign = lastalign;


I went with your suggestion in the attached v2.  Regtested and 
bootstrapped on s390x, x86 and ppc64le.


Regards
 Robin
diff --git a/gcc/c-family/c-attribs.c b/gcc/c-family/c-attribs.c
index ccf9e4ccf0b..f9d1c17f8ef 100644
--- a/gcc/c-family/c-attribs.c
+++ b/gcc/c-family/c-attribs.c
@@ -2314,14 +2314,14 @@ common_handle_aligned_attribute (tree *node, tree name, tree args, int flags,
   *no_add_attrs = true;
 }
   else if (TREE_CODE (decl) == FUNCTION_DECL
-	   && ((curalign = DECL_ALIGN (decl)) > bitalign
-	   || ((lastalign = DECL_ALIGN (last_decl)) > bitalign)))
+	   && (((curalign = DECL_ALIGN (decl)) > bitalign)
+	   | ((lastalign = DECL_ALIGN (last_decl)) > bitalign)))
 {
   /* Either a prior attribute on the same declaration or one
 	 on a prior declaration of the same function specifies
 	 stricter alignment than this attribute.  */
-  bool note = lastalign != 0;
-  if (lastalign)
+  bool note = lastalign > curalign;
+  if (note)
 	curalign = lastalign;
 
   curalign /= BITS_PER_UNIT;
@@ -2366,25 +2366,6 @@ common_handle_aligned_attribute (tree *node, tree name, tree args, int flags,
   This formally comes from the c++11 specification but we are
   doing it for the GNU attribute syntax as well.  */
 *no_add_attrs = true;
-  else if (!warn_if_not_aligned_p
-	   && TREE_CODE (decl) == FUNCTION_DECL
-	   && DECL_ALIGN (decl) > bitalign)
-{
-  /* Don't warn for function alignment here if warn_if_not_aligned_p
-	 is true.  It will be warned about later.  */
-  if (DECL_USER_ALIGN (decl))
-	{
-	  /* Only reject attempts to relax/override an alignment
-	 explicitly specified previously and accept declarations
-	 that appear to relax the implicit function alignment for
-	 the target.  Both increasing and increasing the alignment
-	 set by -falign-functions setting is permitted.  */
-	  error ("alignment for %q+D was previously specified as %d "
-		 "and may not be decreased", decl,
-		 DECL_ALIGN (decl) / BITS_PER_UNIT);
-	  *no_add_attrs = true;
-	}
-}
   else if (warn_if_not_aligned_p
 	   && TREE_CODE (decl) == FIELD_DECL
 	   && !DECL_C_BIT_FIELD (decl))
diff --git a/gcc/c/c-decl.c b/gcc/c/c-decl.c
index 53b2b5b637d..40488585052 100644
--- a/gcc/c/c-decl.c
+++ b/gcc/c/c-decl.c
@@ -2620,6 +2620,9 @@ merge_decls (tree newdecl, tree olddecl, tree newtype, tree oldtype)
 	  SET_DECL_ALIGN (newdecl, DECL_ALIGN (olddecl));
 	  DECL_USER_ALIGN (newdecl) |= DECL_USER_ALIGN (olddecl);
 	}
+  else if (DECL_ALIGN (olddecl) == DECL_ALIGN (newdecl)
+	   && DECL_USER_ALIGN (olddecl) != DECL_USER_ALIGN (newdecl))
+	DECL_USER_ALIGN (newdecl) = 1;
   if (DECL_WARN_IF_NOT_ALIGN (olddecl)
 	  > DECL_WARN_IF_NOT_ALIGN (newdecl))
 	SET_DECL_WARN_IF_NOT_ALIGN (newdecl,
diff --git a/gcc/cp/decl.c b/gcc/cp/decl.c
index e7268d5ad18..f774c75228d 100644
--- a/gcc/cp/decl.c
+++ b/gcc/cp/decl.c
@@ -2794,6 +2794,10 @@ duplicate_decls (tree newdecl, tree olddecl, bool hiding, bool was_hidden)
   SET_DECL_ALIGN (newdecl, DECL_ALIGN (olddecl));
   DECL_USER_ALIGN (newdecl) |= DECL_USER_ALIGN (olddecl);
 }
+  else if (DECL_ALIGN (olddecl) == DECL_ALIGN (newdecl)
+  && DECL_USER_ALIGN (olddecl) != DECL_USER_ALIGN (newdecl))
+DECL_USER_ALIGN (newdecl) = 1;
+
   DECL_USER_ALIGN (olddecl) = DECL_USER_ALIGN (newdecl);
   if (DECL_WARN_IF_NOT_ALIGN (olddecl)
   > DECL_WARN_IF_NOT_ALIGN (newdecl))


Re: [Patch] Fortran/OpenMP: Support (parallel) master taskloop (simd) [PR99928]

2021-06-01 Thread Jakub Jelinek via Gcc-patches
On Tue, Jun 01, 2021 at 11:53:12AM +0200, Tobias Burnus wrote:
> Fortran/OpenMP: Support (parallel) master taskloop (simd) [PR99928]
> 
>   PR middle-end/99928
> 
> gcc/fortran/ChangeLog:
> 
>   * dump-parse-tree.c (show_omp_node, show_code_node): Handle
>   (parallel) master taskloop (simd).
>   * frontend-passes.c (gfc_code_walker): Set in_omp_workshare
>   to false for parallel master taskloop (simd).
>   * gfortran.h (enum gfc_statement):
>   Add ST_OMP_(END_)(PARALLEL_)MASTER_TASKLOOP(_SIMD).
>   (enum gfc_exec_op): EXEC_OMP_(PARALLEL_)MASTER_TASKLOOP(_SIMD).
>   * match.h (gfc_match_omp_master_taskloop,
>   gfc_match_omp_master_taskloop_simd,
>   gfc_match_omp_parallel_master_taskloop,
>   gfc_match_omp_parallel_master_taskloop_simd): New prototype.
>   * openmp.c (gfc_match_omp_parallel_master_taskloop,
>   gfc_match_omp_parallel_master_taskloop_simd,
>   gfc_match_omp_master_taskloop,
>   gfc_match_omp_master_taskloop_simd): New.
>   (gfc_match_omp_taskloop_simd): Permit 'reduction' clause.
>   (resolve_omp_clauses): Handle new combined directives; remove
>   inscan-reduction check to reduce multiple errors; add
>   task-reduction error for 'taskloop simd'.
> (gfc_resolve_omp_parallel_blocks,
>   resolve_omp_do, omp_code_to_statement,
>   gfc_resolve_omp_directive): Handle new combined constructs.
>   * parse.c (decode_omp_directive, next_statement,
>   gfc_ascii_statement, parse_omp_do, parse_omp_structured_block,
>   parse_executable): Likewise.
>   * resolve.c (gfc_resolve_blocks, gfc_resolve_code): Likewise.
>   * st.c (gfc_free_statement): Likewise.
>   * trans.c (trans_code): Likewise.
>   * trans-openmp.c (gfc_split_omp_clauses,
>   gfc_trans_omp_directive): Likewise.
>   (gfc_trans_omp_parallel_master): Move after 
> gfc_trans_omp_master_taskloop;
>   handle parallel master taskloop (simd) as well.
>   (gfc_trans_omp_taskloop): Take gfc_exec_op as arg.
>   (gfc_trans_omp_master_taskloop): New.
> 
> gcc/testsuite/ChangeLog:
> 
>   * gfortran.dg/gomp/reduction5.f90: Remove dg-error the issue is
>   now diagnosed with error output less. 

The above sentence is weird.  I'd expect semicolon or comma before the
and less error output instead of error output less.

>   * gfortran.dg/gomp/scan-1.f90: Likewise.
>   * gfortran.dg/gomp/pr99928-3.f90: New test.
>   * gfortran.dg/gomp/taskloop-1.f90: New test.

Otherwise LGTM.

Jakub



Re: [PATCH v2] Add vec_const_duplicate optab and TARGET_GEN_MEMSET_SCRATCH_RTX

2021-06-01 Thread Richard Biener via Gcc-patches
On Tue, Jun 1, 2021 at 3:05 PM H.J. Lu  wrote:
>
> On Mon, May 31, 2021 at 11:54:53PM -0600, Jeff Law wrote:
> >
> >
> > On 5/31/2021 11:50 PM, Richard Sandiford wrote:
> > > "H.J. Lu via Gcc-patches"  writes:
> > > > On Mon, May 31, 2021 at 06:32:04AM -0700, H.J. Lu wrote:
> > > > > On Mon, May 31, 2021 at 6:26 AM Richard Biener
> > > > >  wrote:
> > > > > > On Mon, May 31, 2021 at 3:12 PM H.J. Lu  wrote:
> > > > > > > On Mon, May 31, 2021 at 5:46 AM Richard Biener
> > > > > > >  wrote:
> > > > > > > > On Mon, May 31, 2021 at 2:09 PM H.J. Lu  
> > > > > > > > wrote:
> > > > > > > > > On Wed, May 26, 2021 at 10:28:16AM +0200, Richard Biener 
> > > > > > > > > wrote:
> > > > > > > > > > > > >   -- Target Hook: rtx TARGET_GEN_MEMSET_VALUE (rtx 
> > > > > > > > > > > > > DATA, scalar_int_mode
> > > > > > > > > > > > >MODE)
> > > > > > > > > > > > >   This function returns the RTL of a register 
> > > > > > > > > > > > > containing
> > > > > > > > > > > > >   'GET_MODE_SIZE (MODE)' consecutive copies of 
> > > > > > > > > > > > > the unsigned char
> > > > > > > > > > > > >   value given in the RTL register DATA.  For 
> > > > > > > > > > > > > example, if MODE is 4
> > > > > > > > > > > > >   bytes wide, return the RTL for 0x01010101*DATA.
> > > > > > > > > > > > For this one I wonder if it should be an optab instead. 
> > > > > > > > > > > >  Couldn't you
> > > > > > > > > > > > use the existing vec_duplicate for this by using 
> > > > > > > > > > > > (paradoxical) subregs
> > > > > > > > > > > > like (subreg:TI (vec_duplicate:VnQI (subreg:VnQI 
> > > > > > > > > > > > (reg:QI ...)))?
> > > > > > > > > > > I tried.   It doesn't even work on x86.  See:
> > > > > > > > > > >
> > > > > > > > > > > https://gcc.gnu.org/pipermail/gcc-patches/2021-May/570661.html
> > > > > > > > > > Not sure what I should read from there...
> > > > > > > > > >
> > > > > > > > > > > There are special cases to subreg HI, SI and DI modes of 
> > > > > > > > > > > TI mode in
> > > > > > > > > > > ix86_gen_memset_value_from_prev.   simplify_gen_subreg 
> > > > > > > > > > > doesn't
> > > > > > > > > > > work here.   Each backend may need its own special 
> > > > > > > > > > > handling.
> > > > > > > > > > OK, I guess I'm not (RTL) qualified enough to further 
> > > > > > > > > > review these parts,
> > > > > > > > > > sorry.  Since we're doing code generation the canonical way 
> > > > > > > > > > to communicate
> > > > > > > > > > with backends should be optabs, not some set of 
> > > > > > > > > > disconnected target hooks.
> > > > > > > > > > But as said, I probably don't know enough of RTL to see why 
> > > > > > > > > > it's the only way.
> > > > > > > > > >
> > > > > > > > > > Richard.
> > > > > > > > > Here is the patch to add optabs instead.  Does it look OK?
> > > > > > > > >
> > > > > > > > > Thanks.
> > > > > > > > >
> > > > > > > > > H.J.
> > > > > > > > > ---
> > > > > > > > > Add 2 optabs:
> > > > > > > > >
> > > > > > > > > 1. integer_extract: Extract lower bit value from the integer 
> > > > > > > > > value in
> > > > > > > > > TImode, OImode or XImode.
> > > > > > > > That sounds very specific, esp. the restriction to 
> > > > > > > > {TI,OI,XI}mode.
> > > > > > > > It also sounds like it matches (subreg:{TI,OI,XI} (...) 0).  
> > > > > > > > There are
> > > > > > > > existing target hooks verifying subreg validity - why's that 
> > > > > > > > not a good
> > > > > > > > fit here?  ISTR you say gen_lowpart () doesn't work (or was it
> > > > > > > > simplify_gen_subreg?), why's that so?
> > > > > > > {TI,OI,XI}mode are storage only integer types.   subreg doesn't 
> > > > > > > work
> > > > > > > well on them.  I got
> > > > > > >
> > > > > > > [hjl@gnu-cfl-2 pieces]$ cat s2.i
> > > > > > > extern void *ops;
> > > > > > >
> > > > > > > void
> > > > > > > foo (int c)
> > > > > > > {
> > > > > > >__builtin_memset (ops, c, 34);
> > > > > > > }
> > > > > > > [hjl@gnu-cfl-2 pieces]$ make s2.s
> > > > > > > /export/build/gnu/tools-build/gcc-gitlab-debug/build-x86_64-linux/gcc/xgcc
> > > > > > > -B/export/build/gnu/tools-build/gcc-gitlab-debug/build-x86_64-linux/gcc/
> > > > > > > -O2 -march=haswell -S s2.i
> > > > > > > during RTL pass: reload
> > > > > > > s2.i: In function ‘foo’:
> > > > > > > s2.i:7:1: internal compiler error: maximum number of generated 
> > > > > > > reload
> > > > > > > insns per insn achieved (90)
> > > > > > >  7 | }
> > > > > > >| ^
> > > > > > > 0x1050734 lra_constraints(bool)
> > > > > > > /export/gnu/import/git/gitlab/x86-gcc/gcc/lra-constraints.c:5091
> > > > > > > 0x1039536 lra(_IO_FILE*)
> > > > > > > /export/gnu/import/git/gitlab/x86-gcc/gcc/lra.c:2336
> > > > > > > 0xfe1140 do_reload
> > > > > > > /export/gnu/import/git/gitlab/x86-gcc/gcc/ira.c:5822
> > > > > > > 0xfe162e execute
> > > > > > > /export/gnu/import/git/gitlab/x86-gcc/gcc/ira.c:6008
> > > > > > > Please submit a full bug report,
> > > > > > > with preprocessed source if appropriate.
> > 

Re: [PATCH] Try LTO partial linking. (Was: Speed of compiling gimple-match.c)

2021-06-01 Thread Richard Biener via Gcc-patches
On Tue, Jun 1, 2021 at 1:25 PM Martin Liška  wrote:
>
> On 6/1/21 9:42 AM, Richard Biener wrote:
> > On Tue, Jun 1, 2021 at 9:33 AM Martin Liška  wrote:
> >>
> >> @Richi: Can you please reply to this email?
> >
> > Not sure what I should add here?  Honza suggested to mangle the
> > promoted symbol names.
>
> Sure and I sent a patch for that.
>
> > I don't
> > really like the idea to compile multiple TUs into one object.  Also
>
> What's problematic is that we'll have to wait for one another release to make 
> it useful
> (if you don't want to build the current master with a snapshot compiler).

IMHO it's a bugfix.  Note that I'm not sure what the intent of the change is.
If it is to speedup bootstrap then using LTO bootstrap would do the trick
as well (and better) if we'd simply process all of libbackend.a this way
(and thus avoid re-linking that once for each frontend).  If it is to speedup
dev (re-)builds then dragging in more files will make it build longer since
for example insn-recog.c may be unchanged but gimple-match.c not.

> > +LTO_LINKER_FLAGS = -flto=auto --param=lto-partitions=16
> > -flinker-output=nolto-rel -r
> >
> > why hard-code to 16 partitions?  You're side-stepping the driver
> > diagnostic by doing
> > compile & link separately, but in the end we're going to want sth like 
> > Giulianos
> > -fparallel-compile that works transparently from within the driver, so
> > the "manual"
> > operation should try to follow that or alternatively a driver-only
> > wrapper around the
> > "manual" processing could be added whose implementation can be optimized 
> > later.
>
> All right. Do you want me refreshing his -fparallel-compile option 
> introduction?

I'm not sure if we've arrived at mergeable state - but if it's
reasonably possible
to hide s/-fparallel-compile/-flto -r -flinker-output=nolto-rel/ split
into compile & link
parts (avoiding the diagnostic on -flinker-output) in the driver then
I think that's
a very reasonable first step (after fixing the symbol privatization issue).  The
GSOC project then was to elide the IL streaming from the high-level operation.

Richard,

> >
> > Why do you use -flto=auto?  There should be a jobserver active.
>
> Yes, that should not be needed.
>
> Martin
>
> >
> >> On 5/21/21 10:43 AM, Martin Liška wrote:
> >>> On 5/20/21 2:54 PM, Richard Biener wrote:
>  On Thu, May 20, 2021 at 2:34 PM Martin Liška  wrote:
> >
> > Hello.
> >
> > I've got a patch candidate that leverages partial linking for a couple 
> > of selected object files.
> >
> > I'm sending make all-host- jX results for my machine:
> >
> > before: 3m18s (user 32m52s)
> > https://gist.githubusercontent.com/marxin/223890df4d8d8e490b6b2918b77dacad/raw/1dd5eae5001295ba0230a689f7edc67284c9b742/gcc-all-host.svg
> >
> > after: 2m57m (user 35m)
> > https://gist.githubusercontent.com/marxin/223890df4d8d8e490b6b2918b77dacad/raw/d659b2187cf622167841efbbe6bc93cb33855fa9/gcc-all-host-partial-lto.svg
> >
> > One can utilize it with:
> > make -j16 all-host PARTIAL_LTO=1
> >
> > @Segher, Andrew: Can you please measure time improvement for your slow 
> > bootstrap?
> > One can also tweak --param=lto-partitions=16 param value.
> >
> > Thoughts?
> 
>  You're LTO linking multiple objects here - that's almost as if you
>  were doing this
>  for the whole of libbackend.a ... so $(OBJS)_CLFAGS += -flto and in the
>  libbackend.a rule do a similar partial link trick.
> >>>
> >>> Yeah, apart from that one can't likely do partial linking for an archive:
> >>>
> >>> $ g++ -no-pie -flto=auto --param=lto-partitions=16 
> >>> -flinker-output=nolto-rel -r libbackend.a
> >>> collect2: fatal error: ld terminated with signal 11 [Segmentation fault], 
> >>> core dumped
> >>> compilation terminated.
> >>>
> >>> while ld.bfd immediately finishes.
> >>>
> 
>  That gets you half of a LTO bootstrap then.
> 
>  So why did you go from applying this per-file to multiple files?  Does 
>  $(LINKER)
>  have a proper rule to pick up a jobserver?
> 
>  When upstreaming in any form you probably have to gate it on 
>  bootstrap-lto
>  being not active.
> >>>
> >>> Sure, that's reasonable, we can likely detect a -flto option in 
> >>> $(COMPILE), right?
> >>>
> >>> One more thing I face is broken dependency:
> >>> $ make clean && make -j32 PARTIAL_LTO=1
> >>>
> >>> g++ -fcf-protection -fno-PIE -c   -g   -DIN_GCC -fPIC-fno-exceptions 
> >>> -fno-rtti -fasynchronous-unwind-tables -W -Wall -Wno-narrowing 
> >>> -Wwrite-strings -Wcast-qual -Wno-error=format-diag 
> >>> -Wmissing-format-attribute -Woverloaded-virtual -pedantic -Wno-long-long 
> >>> -Wno-variadic-macros -Wno-overlength-strings -fno-common -Wno-unused 
> >>> -DHAVE_CONFIG_H -I. -I. -I/home/marxin/Programming/gcc/gcc 
> >>> -I/home/marxin/Programming/gcc/gcc/. 
> >>> -I/home/marxin/Programming/gcc/gcc/../include 
> >>> 

Re: Fallout: save/restore target options in handle_optimize_attribute

2021-06-01 Thread Martin Liška

On 6/1/21 3:11 PM, Richard Biener wrote:

On Tue, Jun 1, 2021 at 1:17 PM Martin Liška  wrote:


On 5/28/21 2:46 PM, Richard Biener wrote:

On Fri, May 28, 2021 at 11:48 AM Martin Liška  wrote:


Hi.

There's a fallout after my revision ebd5e86c0f41dc1d692f9b2b68a510b1f6835a3e. I 
would like to analyze
all case and discuss possible solution. To be honest it's a can of worms and 
reverting the commit
is an option on the table.

So the cases:

1) PR100759 - ppc64le

$ cat pr.C
#pragma GCC optimize 0
void main();

$ ./xgcc -B. -Os pr.C
pr.C:2:11: internal compiler error: ‘global_options’ are modified in local 
context
   2 | void main();

What happens: we change from -Os to -O0 and rs6000_isa_flags differ in 
cl_optimization_compare.
Problem is that OPTION_MASK_SAVE_TOC_INDIRECT is set based on optimize flag:

 /* If we can shrink-wrap the TOC register save separately, then use
-msave-toc-indirect unless explicitly disabled.  */
 if ((rs6000_isa_flags_explicit & OPTION_MASK_SAVE_TOC_INDIRECT) == 0
 && flag_shrink_wrap_separate
 && optimize_function_for_speed_p (cfun))
   rs6000_isa_flags |= OPTION_MASK_SAVE_TOC_INDIRECT;


So that means that

/* Restore current options.  */
cl_optimization_restore (_options, _options_set,
 _opts);
cl_target_option_restore (_options, _options_set,
  TREE_TARGET_OPTION (prev_target_node));

does not result in the same outcome as the original command-line processing?

Given both restore processes could interact (not sure if that's the issue here)
shouldn't we just have a single restore operation and a single target
hook instead of both targetm.override_options_after_change and
targetm.target_option.restore?


That's not this case. But it can be a unification approach for the future.



Likewise we should probably _always_ set both, DECL_FUNCTION_SPECIFIC_OPT
and _TARGET as a step towards unifying them.


Yes, that's basically what's happening at various places.



That said, for the above case a more detailed run-down as to how things go wrong
would be nice to see.


Anyway, detail analysis of this issue is:

1) one provides -Os on the command-line, thus global_options.x_optimize_size == 
1
2) then we reach #pragma GCC optimize 0, at this point parse_optimize_options 
is called
 and thus global_options are modified (global_options.x_optimize_size)
 That's reflected in optimization_current_node, which is now different from 
optimization_default_node.
3) targetm.override_options_after_change is not called, so 
global_options.x_rs6000_isa_flags
 is not changed to 1.
4) for all subsequent functions, handle_optimize_attribute is called as we are 
in a 'pragma optimize'
5) here the sanity checking code saves saved_global_options, parsing happens 
and cl_*_restore is done
6) as cl_target_option_restore calls targetm.override_options_after_change, the 
global_options.x_rs6000_isa_flags
 has OPTION_MASK_SAVE_TOC_INDIRECT set
7) and the cl_optimization_compare complains

I have a patch that reflects that. In fact, we global options state is correct 
for each function.
Apart from that, PR100759 mentions a test-case that fails due to a missing 
cl_target_option_restore
for 'pragma pop'.

Patch can bootstrap on x86_64-linux-gnu and survives regression tests. And it 
survives tests on ppc64-linux-gnu.

Ready to be installed?


It sounds like a clear progression so OK.


Good, I'm going to install it.



I still don't get

+  /* When #pragma GCC optimize pragma is used, it modifies global_options
+without calling targetm.override_options_after_change.  That can leave
+target flags inconsistent for comparison.  */

fully, esp. as to why we cannot fix pragma handling and thus why the
"inconsistent"
state is actually OK.


Well, the sanity check is designed simply as it saved global_options, then
parse_optimize_options happens and cl_*_restore is done. After that we want
to be sure the global_options is equal to the saved one.

And here comes the problem. We saved global_options modified after '#pragma GCC 
optimize 0'.

Martin



Richard.


Thanks,
Martin




Suggested solution is doing:

 if ((rs6000_isa_flags_explicit & OPTION_MASK_SAVE_TOC_INDIRECT) == 0
 && flag_shrink_wrap_separate
   rs6000_isa_flags |= OPTION_MASK_SAVE_TOC_INDIRECT;

and add '&& optimize_function_for_speed_p (cfun)' to the place where the option 
mask is used.

2) Joseph's case:

$ cat ~/Programming/testcases/opts-bug.i
extern unsigned long int x;
extern float f (float);
extern __typeof (f) f_power8;
extern __typeof (f) f_power9;
extern __typeof (f) f __attribute__ ((ifunc ("f_ifunc")));
static __attribute__ ((optimize ("-fno-stack-protector"))) __typeof (f) *
f_ifunc (void)
{
 __typeof (f) *res = x ? f_power9 : f_power8;
 return res;
}

$ ./xgcc -B. ~/Programming/testcases/opts-bug.i -c -S -O2 -mlong-double-128 
-mabi=ibmlongdouble

Re: [PATCH] Try LTO partial linking. (Was: Speed of compiling gimple-match.c)

2021-06-01 Thread Martin Liška

On 6/1/21 9:42 AM, Richard Biener wrote:

On Tue, Jun 1, 2021 at 9:33 AM Martin Liška  wrote:


@Richi: Can you please reply to this email?


Not sure what I should add here?  Honza suggested to mangle the
promoted symbol names.


Sure and I sent a patch for that.


I don't
really like the idea to compile multiple TUs into one object.  Also


What's problematic is that we'll have to wait for one another release to make 
it useful
(if you don't want to build the current master with a snapshot compiler).



+LTO_LINKER_FLAGS = -flto=auto --param=lto-partitions=16
-flinker-output=nolto-rel -r

why hard-code to 16 partitions?  You're side-stepping the driver
diagnostic by doing
compile & link separately, but in the end we're going to want sth like Giulianos
-fparallel-compile that works transparently from within the driver, so
the "manual"
operation should try to follow that or alternatively a driver-only
wrapper around the
"manual" processing could be added whose implementation can be optimized later.


All right. Do you want me refreshing his -fparallel-compile option introduction?



Why do you use -flto=auto?  There should be a jobserver active.


Yes, that should not be needed.

Martin




On 5/21/21 10:43 AM, Martin Liška wrote:

On 5/20/21 2:54 PM, Richard Biener wrote:

On Thu, May 20, 2021 at 2:34 PM Martin Liška  wrote:


Hello.

I've got a patch candidate that leverages partial linking for a couple of 
selected object files.

I'm sending make all-host- jX results for my machine:

before: 3m18s (user 32m52s)
https://gist.githubusercontent.com/marxin/223890df4d8d8e490b6b2918b77dacad/raw/1dd5eae5001295ba0230a689f7edc67284c9b742/gcc-all-host.svg

after: 2m57m (user 35m)
https://gist.githubusercontent.com/marxin/223890df4d8d8e490b6b2918b77dacad/raw/d659b2187cf622167841efbbe6bc93cb33855fa9/gcc-all-host-partial-lto.svg

One can utilize it with:
make -j16 all-host PARTIAL_LTO=1

@Segher, Andrew: Can you please measure time improvement for your slow 
bootstrap?
One can also tweak --param=lto-partitions=16 param value.

Thoughts?


You're LTO linking multiple objects here - that's almost as if you
were doing this
for the whole of libbackend.a ... so $(OBJS)_CLFAGS += -flto and in the
libbackend.a rule do a similar partial link trick.


Yeah, apart from that one can't likely do partial linking for an archive:

$ g++ -no-pie -flto=auto --param=lto-partitions=16 -flinker-output=nolto-rel -r 
libbackend.a
collect2: fatal error: ld terminated with signal 11 [Segmentation fault], core 
dumped
compilation terminated.

while ld.bfd immediately finishes.



That gets you half of a LTO bootstrap then.

So why did you go from applying this per-file to multiple files?  Does $(LINKER)
have a proper rule to pick up a jobserver?

When upstreaming in any form you probably have to gate it on bootstrap-lto
being not active.


Sure, that's reasonable, we can likely detect a -flto option in $(COMPILE), 
right?

One more thing I face is broken dependency:
$ make clean && make -j32 PARTIAL_LTO=1

g++ -fcf-protection -fno-PIE -c   -g   -DIN_GCC -fPIC-fno-exceptions 
-fno-rtti -fasynchronous-unwind-tables -W -Wall -Wno-narrowing -Wwrite-strings 
-Wcast-qual -Wno-error=format-diag -Wmissing-format-attribute 
-Woverloaded-virtual -pedantic -Wno-long-long -Wno-variadic-macros 
-Wno-overlength-strings -fno-common -Wno-unused -DHAVE_CONFIG_H -I. -I. 
-I/home/marxin/Programming/gcc/gcc -I/home/marxin/Programming/gcc/gcc/. 
-I/home/marxin/Programming/gcc/gcc/../include 
-I/home/marxin/Programming/gcc/gcc/../libcpp/include 
-I/home/marxin/Programming/gcc/gcc/../libcody  
-I/home/marxin/Programming/gcc/gcc/../libdecnumber 
-I/home/marxin/Programming/gcc/gcc/../libdecnumber/bid -I../libdecnumber 
-I/home/marxin/Programming/gcc/gcc/../libbacktrace   -o gimple-match-lto.o -MT 
gimple-match-lto.o -MMD -MP -MF ./.deps/gimple-match-lto.TPo gimple-match.c 
-flto
g++ -fcf-protection -fno-PIE -c   -g   -DIN_GCC -fPIC-fno-exceptions 
-fno-rtti -fasynchronous-unwind-tables -W -Wall -Wno-narrowing -Wwrite-strings 
-Wcast-qual -Wno-error=format-diag -Wmissing-format-attribute 
-Woverloaded-virtual -pedantic -Wno-long-long -Wno-variadic-macros 
-Wno-overlength-strings -fno-common -Wno-unused -DHAVE_CONFIG_H -I. -I. 
-I/home/marxin/Programming/gcc/gcc -I/home/marxin/Programming/gcc/gcc/. 
-I/home/marxin/Programming/gcc/gcc/../include 
-I/home/marxin/Programming/gcc/gcc/../libcpp/include 
-I/home/marxin/Programming/gcc/gcc/../libcody  
-I/home/marxin/Programming/gcc/gcc/../libdecnumber 
-I/home/marxin/Programming/gcc/gcc/../libdecnumber/bid -I../libdecnumber 
-I/home/marxin/Programming/gcc/gcc/../libbacktrace   -o generic-match-lto.o -MT 
generic-match-lto.o -MMD -MP -MF ./.deps/generic-match-lto.TPo generic-match.c 
-flto

In file included from ./tm.h:26,
   from /home/marxin/Programming/gcc/gcc/backend.h:28,
   from 
/home/marxin/Programming/gcc/gcc/generic-match-head.c:23,
   from 

[PATCH][OBVIOUS] docs: Mention that -fgcse-after-reload is enabled with -O3.

2021-06-01 Thread Martin Liška

Pushed as obvious where I verified that it's really enabled with -O3.

Martin

gcc/ChangeLog:

PR other/100826
* doc/invoke.texi: Mention that -fgcse-after-reload
is enabled with -O3.
---
 gcc/doc/invoke.texi | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 5cd4e2d993c..08c3206b719 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -10708,7 +10708,7 @@ When @option{-fgcse-after-reload} is enabled, a 
redundant load elimination
 pass is performed after reload.  The purpose of this pass is to clean up
 redundant spilling.
 
-Enabled by @option{-fprofile-use} and @option{-fauto-profile}.

+Enabled by @option{-O3}, @option{-fprofile-use} and @option{-fauto-profile}.
 
 @item -faggressive-loop-optimizations

 @opindex faggressive-loop-optimizations
--
2.31.1



[PATCH PR100740]Fix overflow check in simplifying exit cond comparing two IVs.

2021-06-01 Thread bin.cheng via Gcc-patches
Hi,
As described in patch summary, this fixes the wrong code issue by adding 
overflow-ness
check for iv1.step - iv2.step.

Bootstrap and test on x86_64.  Any comments?

Thanks,
bin

pr100740-20210525.txt
Description: Binary data


Re: [PATCH][RFC] tree-optimization/100801 - perform final value replacement from VRP

2021-06-01 Thread Richard Biener
On Mon, 31 May 2021, Andrew MacLeod wrote:

> On 5/28/21 11:25 AM, Richard Biener wrote:
> > This makes sure to perform final value replacement of constants
> > when we also are sure to propagate those, like in VRP.  This avoids
> > spurious diagnostics when doing so only from within SCCP which can
> > leave unreachable loops in the IL triggering bogus diagnostics.
> >
> > The choice is VRP because it already defers to SCEV for PHI nodes
> > so the following makes it analyze final values for loop exit PHIs.
> > To make that work it arranges for loop-closed SSA to be built only
> > after inserting ASSERT_EXPRs.
> >
> > Bootstrapped and tested on x86_64-unknown-linux-gnu.
> >
> > It does
> >
> > FAIL: gcc.dg/ubsan/pr94423.c   -O2  (internal compiler error)
> >
> > where VRP somehow propagates abnormals in a bogus way.

OK, so I analyzed this some more and it results from the hunk moving
loop-closed SSA construction after ASSERT_EXPR insertion in
execute_vrp.  The motivation for this is that we end up splitting
the loop exit edge when inserting the ASSERT_EXPR, creating
non-loop-closed SSA and thus fail to pick up the final value.

Now with swapping insert and loop-closed SSA build we get LC
SSA PHIs on an abnormal loop exit in the above testcase which
messes up assert expr removal which does

/* Propagate the RHS into every use of the LHS.  For SSA names
   also propagate abnormals as it merely restores the original
   IL in this case (an replace_uses_by would assert).  */

in remove_range_assertions, explicitely ignoring constraints around
abnormals.  But since LC SSA PHIs remain we fail IL verification.
Note the LC SSA PHI is only required because we insert a SSA def
via the ASSERT_EXPR in the loop body.  We can fix up the IL detail
by marking the ASSERT_EXPR source appropriately via

diff --git a/gcc/tree-vrp.c b/gcc/tree-vrp.c
index 450926d5f9b..705e2489eb1 100644
--- a/gcc/tree-vrp.c
+++ b/gcc/tree-vrp.c
@@ -3809,6 +3809,8 @@ vrp_asserts::remove_range_assertions ()
FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
  FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
SET_USE (use_p, var);
+   if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
+ SSA_NAME_OCCURS_IN_ABNORMAL_PHI (var) = 1;
  }
else
  replace_uses_by (lhs, var);

but we also never get rid of a SSA_NAME_OCCURS_IN_ABNORMAL_PHI marking.

One option would be to keep the order as-is but fixup assert expr
insertion to update/honor loop-closed SSA.  But then - how far are you
with removing all this ASSERT_EXPR stuff?

Thanks,
Richard.


> >  It also
> > needs adjustment for a couple of fails like
> >
> > FAIL: gcc.dg/vect/no-scevccp-outer-11.c scan-tree-dump-times vect "OUTER
> > LOOP VECTORIZED." 1
> >
> > where the testcases do -fno-tree-scev-cprop but that no longer has
> > the desired effect then (might just guard the final value replacement
> > in VRP with this).
> >
> > Any comments?  I probably can plug final_value_at_exit elsewhere
> > than VRP (to avoid the issues with asserts) but it somewhat felt
> > naturally because that already uses SCEV.
> 
> l think its OK.  I'll keep track this change as we may need to incorporate the
> changes into fold_using_range::range_of_phi() when we move towards being on
> par with VRP,  but the testsuite will show a difference if we miss it then
> anyway.
> 
> 
> > Thanks,
> > Richard.
> >
> > 2021-05-28  Richard Biener  
> >
> >  PR tree-optimization/100801
> >  * tree-scalar-evolution.h (final_value_at_exit): Declare.
> >  * tree-scalar-evolution.c (final_value_at_exit): New API,
> >  split out from ...
> >  (final_value_replacement_loop): ... here.
> >  * tree-vrp.c (execute_vrp): Build loop-closed SSA only
> >  after inserting assert expressions.
> >  (vrp_asserts::process_assert_insertions_for): Avoid inserting
> >  asserts for RESULT_DECLs, when building loop-closed SSA
> >  after assert insertion we're not able to propagate out all
> >  PHIs and thus trigger IL validation.
> >  * vr-values.c (vr_values::extract_range_from_phi_node):
> >  For loop exit PHIs also perform final value analysis using
> >  SCEV.
> >
> > * gcc.target/i386/pr100801.c: New testcase.
> > ---
> >   gcc/testsuite/gcc.target/i386/pr100801.c | 29 
> >   gcc/tree-scalar-evolution.c  | 28 +++
> >   gcc/tree-scalar-evolution.h  |  1 +
> >   gcc/tree-vrp.c   | 12 --
> >   gcc/vr-values.c  | 23 +++
> >   5 files changed, 77 insertions(+), 16 deletions(-)
> >   create mode 100644 gcc/testsuite/gcc.target/i386/pr100801.c
> >
> > diff --git a/gcc/testsuite/gcc.target/i386/pr100801.c
> > b/gcc/testsuite/gcc.target/i386/pr100801.c
> > new file mode 100644
> > index 000..c58f9be6898
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr100801.c
> > @@ -0,0 +1,29 

[Patch] Fortran/OpenMP: Support (parallel) master taskloop (simd) [PR99928]

2021-06-01 Thread Tobias Burnus

(PR 99928 is mainly about handling clauses in combined constructs.
but it sees some related commits like this one. The PR is not
fully fixed for C/C++ and for Fortran a bit more is missing.)

This patch adds '(parallel) master taskloop (simd)' support to gfortran.

For full parsing support for the Fortran version of the C/C++ testcases 
pr99928-*.c,
parsing support for 'loop' (+ combined constructs) and reduction with an array 
(slice)
is missing. – And, of course, doing FE fixes for handling the clauses,
similar to what has been/is being done for C/C++.
(No attempt has been made in this patch to fix long-standing
combined-clause handling issues.)

As bug fix, 'taskloop simd reduction(...)' is now supported and
'taskloop simd reduction(task,...' is now diagnosed by the FE.

Tobias

-
Mentor Graphics (Deutschland) GmbH, Arnulfstrasse 201, 80634 München 
Registergericht München HRB 106955, Geschäftsführer: Thomas Heurung, Frank 
Thürauf
Fortran/OpenMP: Support (parallel) master taskloop (simd) [PR99928]

	PR middle-end/99928

gcc/fortran/ChangeLog:

	* dump-parse-tree.c (show_omp_node, show_code_node): Handle
	(parallel) master taskloop (simd).
	* frontend-passes.c (gfc_code_walker): Set in_omp_workshare
	to false for parallel master taskloop (simd).
	* gfortran.h (enum gfc_statement):
	Add ST_OMP_(END_)(PARALLEL_)MASTER_TASKLOOP(_SIMD).
	(enum gfc_exec_op): EXEC_OMP_(PARALLEL_)MASTER_TASKLOOP(_SIMD).
	* match.h (gfc_match_omp_master_taskloop,
	gfc_match_omp_master_taskloop_simd,
	gfc_match_omp_parallel_master_taskloop,
	gfc_match_omp_parallel_master_taskloop_simd): New prototype.
	* openmp.c (gfc_match_omp_parallel_master_taskloop,
	gfc_match_omp_parallel_master_taskloop_simd,
	gfc_match_omp_master_taskloop,
	gfc_match_omp_master_taskloop_simd): New.
	(gfc_match_omp_taskloop_simd): Permit 'reduction' clause.
	(resolve_omp_clauses): Handle new combined directives; remove
	inscan-reduction check to reduce multiple errors; add
	task-reduction error for 'taskloop simd'.
(gfc_resolve_omp_parallel_blocks,
	resolve_omp_do, omp_code_to_statement,
	gfc_resolve_omp_directive): Handle new combined constructs.
	* parse.c (decode_omp_directive, next_statement,
	gfc_ascii_statement, parse_omp_do, parse_omp_structured_block,
	parse_executable): Likewise.
	* resolve.c (gfc_resolve_blocks, gfc_resolve_code): Likewise.
	* st.c (gfc_free_statement): Likewise.
	* trans.c (trans_code): Likewise.
	* trans-openmp.c (gfc_split_omp_clauses,
	gfc_trans_omp_directive): Likewise.
	(gfc_trans_omp_parallel_master): Move after gfc_trans_omp_master_taskloop;
	handle parallel master taskloop (simd) as well.
	(gfc_trans_omp_taskloop): Take gfc_exec_op as arg.
	(gfc_trans_omp_master_taskloop): New.

gcc/testsuite/ChangeLog:

	* gfortran.dg/gomp/reduction5.f90: Remove dg-error the issue is
	now diagnosed with error output less. 
	* gfortran.dg/gomp/scan-1.f90: Likewise.
	* gfortran.dg/gomp/pr99928-3.f90: New test.
	* gfortran.dg/gomp/taskloop-1.f90: New test.

 gcc/fortran/dump-parse-tree.c |  12 +++
 gcc/fortran/frontend-passes.c |   2 +
 gcc/fortran/gfortran.h|  10 +-
 gcc/fortran/match.h   |   4 +
 gcc/fortran/openmp.c  |  85 ++--
 gcc/fortran/parse.c   |  73 +-
 gcc/fortran/resolve.c |  10 ++
 gcc/fortran/st.c  |   4 +
 gcc/fortran/trans-openmp.c| 112 -
 gcc/fortran/trans.c   |   4 +
 gcc/testsuite/gfortran.dg/gomp/pr99928-3.f90  | 139 ++
 gcc/testsuite/gfortran.dg/gomp/reduction5.f90 |   4 +-
 gcc/testsuite/gfortran.dg/gomp/scan-1.f90 |   4 +-
 gcc/testsuite/gfortran.dg/gomp/taskloop-1.f90 | 126 +++
 14 files changed, 550 insertions(+), 39 deletions(-)

 gcc/fortran/dump-parse-tree.c |  12 +++
 gcc/fortran/frontend-passes.c |   2 +
 gcc/fortran/gfortran.h|  10 +-
 gcc/fortran/match.h   |   4 +
 gcc/fortran/openmp.c  |  85 ++--
 gcc/fortran/parse.c   |  73 +-
 gcc/fortran/resolve.c |  10 ++
 gcc/fortran/st.c  |   4 +
 gcc/fortran/trans-openmp.c| 112 -
 gcc/fortran/trans.c   |   4 +
 gcc/testsuite/gfortran.dg/gomp/pr99928-3.f90  | 139 ++
 gcc/testsuite/gfortran.dg/gomp/reduction5.f90 |   4 +-
 gcc/testsuite/gfortran.dg/gomp/scan-1.f90 |   4 +-
 gcc/testsuite/gfortran.dg/gomp/taskloop-1.f90 | 126 +++
 14 files changed, 550 insertions(+), 39 deletions(-)

diff --git a/gcc/fortran/dump-parse-tree.c b/gcc/fortran/dump-parse-tree.c
index 93ff572cbd2..0e7fe1cc3f3 100644
--- 

[committed] libstdc++: Fix installation of python hooks [PR 99453]

2021-06-01 Thread Jonathan Wakely via Gcc-patches
When no shared library is installed, the new code to determine the name
of the -gdb.py file yields an empty string. Use the name of the static
library in that case.

libstdc++-v3/ChangeLog:

PR libstdc++/99453
* python/Makefile.am: Use archive name for printer hook if no
dynamic library name is available.
* python/Makefile.in: Regenerate.

Tested x86_64-linux. Committed to trunk. This is needed for gcc-11
too.

commit 9f7bc160b4a0f27dce248d1226e3ae7104b0e67b
Author: Jonathan Wakely 
Date:   Tue Jun 1 11:00:16 2021

libstdc++: Fix installation of python hooks [PR 99453]

When no shared library is installed, the new code to determine the name
of the -gdb.py file yields an empty string. Use the name of the static
library in that case.

libstdc++-v3/ChangeLog:

PR libstdc++/99453
* python/Makefile.am: Use archive name for printer hook if no
dynamic library name is available.
* python/Makefile.in: Regenerate.

diff --git a/libstdc++-v3/python/Makefile.am b/libstdc++-v3/python/Makefile.am
index 0c2b207b86e..8efefa5725c 100644
--- a/libstdc++-v3/python/Makefile.am
+++ b/libstdc++-v3/python/Makefile.am
@@ -48,5 +48,9 @@ install-data-local: gdb.py
 ## the correct name.
@libname=`sed -ne "/^library_names=/{s/.*='//;s/'$$//;s/ .*//;p;}" \
  $(DESTDIR)$(toolexeclibdir)/libstdc++.la`; \
+   if [ -z "$$libname" ]; then \
+ libname=`sed -ne "/^old_library=/{s/.*='//;s/'$$//;s/ .*//;p;}" \
+ $(DESTDIR)$(toolexeclibdir)/libstdc++.la`; \
+   fi; \
echo " $(INSTALL_DATA) gdb.py 
$(DESTDIR)$(toolexeclibdir)/$$libname-gdb.py"; \
$(INSTALL_DATA) gdb.py $(DESTDIR)$(toolexeclibdir)/$$libname-gdb.py


Re: [PATCH] Replace conditional_replacement with match and simplify

2021-06-01 Thread Richard Biener via Gcc-patches
On Tue, Jun 1, 2021 at 8:06 AM apinski--- via Gcc-patches
 wrote:
>
> From: Andrew Pinski 
>
> This is the first of series of patches to simplify phi-opt
> to use match and simplify in many cases.  This simplification
> will more things to optimize.
>
> This is what Richard requested in
> https://gcc.gnu.org/pipermail/gcc-patches/2021-May/571197.html
> and I think it is the right thing to do too.
>
> OK? Bootstrapped and tested on x86_64-linux-gnu with no regressions.

OK.

Richard.

> gcc/ChangeLog:
>
> * tree-ssa-phiopt.c (match_simplify_replacement):
> New function.
> (tree_ssa_phiopt_worker): Use match_simplify_replacement.
> (two_value_replacement): Change the comment about
> conditional_replacement.
> (conditional_replacement): Delete.
> ---
>  gcc/tree-ssa-phiopt.c | 144 --
>  1 file changed, 39 insertions(+), 105 deletions(-)
>
> diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c
> index e3bd18023a0..969b868397e 100644
> --- a/gcc/tree-ssa-phiopt.c
> +++ b/gcc/tree-ssa-phiopt.c
> @@ -53,8 +53,8 @@ along with GCC; see the file COPYING3.  If not see
>  static unsigned int tree_ssa_phiopt_worker (bool, bool, bool);
>  static bool two_value_replacement (basic_block, basic_block, edge, gphi *,
>tree, tree);
> -static bool conditional_replacement (basic_block, basic_block,
> -edge, edge, gphi *, tree, tree);
> +static bool match_simplify_replacement (basic_block, basic_block,
> +   edge, edge, gphi *, tree, tree);
>  static gphi *factor_out_conditional_conversion (edge, edge, gphi *, tree, 
> tree,
> gimple *);
>  static int value_replacement (basic_block, basic_block,
> @@ -347,8 +347,8 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool 
> do_hoist_loads, bool early_p)
>   if (!early_p && two_value_replacement (bb, bb1, e2, phi, arg0, 
> arg1))
> cfgchanged = true;
>   else if (!early_p
> -  && conditional_replacement (bb, bb1, e1, e2, phi,
> -  arg0, arg1))
> +  && match_simplify_replacement (bb, bb1, e1, e2, phi,
> + arg0, arg1))
> cfgchanged = true;
>   else if (abs_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
> cfgchanged = true;
> @@ -675,7 +675,7 @@ two_value_replacement (basic_block cond_bb, basic_block 
> middle_bb,
>  }
>
>/* Defer boolean x ? 0 : {1,-1} or x ? {1,-1} : 0 to
> - conditional_replacement.  */
> + match_simplify_replacement.  */
>if (TREE_CODE (TREE_TYPE (lhs)) == BOOLEAN_TYPE
>&& (integer_zerop (arg0)
>   || integer_zerop (arg1)
> @@ -784,137 +784,71 @@ two_value_replacement (basic_block cond_bb, 
> basic_block middle_bb,
>return true;
>  }
>
> -/*  The function conditional_replacement does the main work of doing the
> -conditional replacement.  Return true if the replacement is done.
> +/*  The function match_simplify_replacement does the main work of doing the
> +replacement using match and simplify.  Return true if the replacement is 
> done.
>  Otherwise return false.
>  BB is the basic block where the replacement is going to be done on.  ARG0
>  is argument 0 from PHI.  Likewise for ARG1.  */
>
>  static bool
> -conditional_replacement (basic_block cond_bb, basic_block middle_bb,
> -edge e0, edge e1, gphi *phi,
> -tree arg0, tree arg1)
> +match_simplify_replacement (basic_block cond_bb, basic_block middle_bb,
> +   edge e0, edge e1, gphi *phi,
> +   tree arg0, tree arg1)
>  {
> -  tree result;
>gimple *stmt;
> -  gassign *new_stmt;
>tree cond;
>gimple_stmt_iterator gsi;
>edge true_edge, false_edge;
> -  tree new_var, new_var2;
> -  bool neg = false;
> -  int shift = 0;
> -  tree nonzero_arg;
> -
> -  /* FIXME: Gimplification of complex type is too hard for now.  */
> -  /* We aren't prepared to handle vectors either (and it is a question
> - if it would be worthwhile anyway).  */
> -  if (!(INTEGRAL_TYPE_P (TREE_TYPE (arg0))
> -   || POINTER_TYPE_P (TREE_TYPE (arg0)))
> -  || !(INTEGRAL_TYPE_P (TREE_TYPE (arg1))
> -  || POINTER_TYPE_P (TREE_TYPE (arg1
> -return false;
> +  gimple_seq seq = NULL;
> +  tree result;
>
> -  /* The PHI arguments have the constants 0 and 1, or 0 and -1 or
> - 0 and (1 << cst), then convert it to the conditional.  */
> -  if (integer_zerop (arg0))
> -nonzero_arg = arg1;
> -  else if (integer_zerop (arg1))
> -nonzero_arg = arg0;
> -  else
> -return false;
> -  if (integer_pow2p (nonzero_arg))
> -{
> -  shift = tree_log2 (nonzero_arg);
> -  if (shift && POINTER_TYPE_P (TREE_TYPE (nonzero_arg)))

[DWARF] Couple of pending patches

2021-06-01 Thread Eric Botcazou
Hi,

They are at:
  https://gcc.gnu.org/pipermail/gcc-patches/2021-April/568731.html
and
  https://gcc.gnu.org/pipermail/gcc-patches/2021-April/568916.html

I can devise a gnat.dg/ testcase for each of them if need be, although they 
will be exercised by the GDB testsuite.  They are essentially the last step 
before we can drop the historical GNAT encodings in favor of (GNU) DWARF.

Thanks in advance.

-- 
Eric Botcazou




[ARM] PR97906 - Missed lowering abs(a) >= abs(b) to vacge

2021-06-01 Thread Prathamesh Kulkarni via Gcc-patches
Hi,
As mentioned in PR, for following test-case:

#include 

uint32x2_t f1(float32x2_t a, float32x2_t b)
{
  return vabs_f32 (a) >= vabs_f32 (b);
}

uint32x2_t f2(float32x2_t a, float32x2_t b)
{
  return (uint32x2_t) __builtin_neon_vcagev2sf (a, b);
}

We generate vacge for f2, but with -ffast-math, we generate following for f1:
f1:
vabs.f32d1, d1
vabs.f32d0, d0
vcge.f32d0, d0, d1
bx  lr

This happens because, the middle-end inverts the comparison to b <= a,
.optimized dump:
 _8 = __builtin_neon_vabsv2sf (a_4(D));
  _7 = __builtin_neon_vabsv2sf (b_5(D));
  _1 = _7 <= _8;
  _2 = VIEW_CONVERT_EXPR(_1);
  _6 = VIEW_CONVERT_EXPR(_2);
  return _6;

and combine fails to match the following pattern:
(set (reg:V2SI 121)
(neg:V2SI (le:V2SI (abs:V2SF (reg:V2SF 123))
(abs:V2SF (reg:V2SF 122)

because neon_vca pattern has GTGE code iterator.
The attached patch adjusts the neon_vca patterns to use GLTE instead
similar to neon_vca_fp16insn, and removes NEON_VACMP iterator.
Code-gen with patch:
f1:
vacle.f32   d0, d1, d0
bx  lr

Bootstrapped + tested on arm-linux-gnueabihf and cross-tested on arm*-*-*.
OK to commit ?

Thanks,
Prathamesh
2021-06-01  Prathamesh Kulkarni  

PR target/97906
* config/arm/iterators.md (NEON_VACMP): Remove.
* config/arm/neon.md (neon_vca): Use GLTE instead of GTGE 
iterator.
(neon_vca_insn): Likewise.
(neon_vca_insn_unspec): Use NEON_VAGLTE instead of 
NEON_VACMP.

testsuite/
* gcc.target/arm/simd/pr97906.c: New test.

diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
index 3042bafc6c6..0e0e825225a 100644
--- a/gcc/config/arm/iterators.md
+++ b/gcc/config/arm/iterators.md
@@ -364,8 +364,6 @@
 (define_int_iterator NEON_VCMP [UNSPEC_VCEQ UNSPEC_VCGT UNSPEC_VCGE
UNSPEC_VCLT UNSPEC_VCLE])
 
-(define_int_iterator NEON_VACMP [UNSPEC_VCAGE UNSPEC_VCAGT])
-
 (define_int_iterator NEON_VAGLTE [UNSPEC_VCAGE UNSPEC_VCAGT
  UNSPEC_VCALE UNSPEC_VCALT])
 
diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
index cc82d068a1c..ccc99603531 100644
--- a/gcc/config/arm/neon.md
+++ b/gcc/config/arm/neon.md
@@ -2400,7 +2400,7 @@
 (define_expand "neon_vca"
   [(set (match_operand: 0 "s_register_operand")
 (neg:
-  (GTGE:
+  (GLTE:
 (abs:VCVTF (match_operand:VCVTF 1 "s_register_operand"))
 (abs:VCVTF (match_operand:VCVTF 2 "s_register_operand")]
   "TARGET_NEON"
@@ -2419,7 +2419,7 @@
 (define_insn "neon_vca_insn"
   [(set (match_operand: 0 "s_register_operand" "=w")
 (neg:
-  (GTGE:
+  (GLTE:
 (abs:VCVTF (match_operand:VCVTF 1 "s_register_operand" "w"))
 (abs:VCVTF (match_operand:VCVTF 2 "s_register_operand" "w")]
   "TARGET_NEON && flag_unsafe_math_optimizations"
@@ -2431,7 +2431,7 @@
   [(set (match_operand: 0 "s_register_operand" "=w")
 (unspec: [(match_operand:VCVTF 1 "s_register_operand" 
"w")
(match_operand:VCVTF 2 "s_register_operand" 
"w")]
-   NEON_VACMP))]
+   NEON_VAGLTE))]
   "TARGET_NEON"
   "vac.\t%0, %1, %2"
   [(set_attr "type" "neon_fp_compare_s")]
diff --git a/gcc/testsuite/gcc.target/arm/simd/pr97906.c 
b/gcc/testsuite/gcc.target/arm/simd/pr97906.c
new file mode 100644
index 000..7c972e311ed
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/simd/pr97906.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ffast-math" } */
+/* { dg-add-options arm_neon } */
+
+#include 
+
+uint32x2_t f1(float32x2_t a, float32x2_t b)
+{
+  return vabs_f32 (a) >= vabs_f32 (b);
+}
+
+/* { dg-final { scan-assembler-times {\tvacle.f32\td[0-9]+, d[0-9]+, d[0-9]+} 
1 } } */
+/* { dg-final { scan-assembler-not "vabs" } } */


Re: [PATCH] Simplify (view_convert ~a) < 0 to (view_convert a) >= 0 [PR middle-end/100738]

2021-06-01 Thread Marc Glisse

On Tue, 1 Jun 2021, Hongtao Liu via Gcc-patches wrote:


Hi:
 This patch is about to simplify (view_convert:type ~a) < 0 to
(view_convert:type a) >= 0 when type is signed integer. Similar for
(view_convert:type ~a) >= 0.
 Bootstrapped and regtested on x86_64-linux-gnu{-m32,}.
 Ok for the trunk?

gcc/ChangeLog:

   PR middle-end/100738
   * match.pd ((view_convert ~a) < 0 --> (view_convert a) >= 0,
   (view_convert ~a) >= 0 --> (view_convert a) < 0): New GIMPLE
   simplification.


We already have

/* Fold ~X op C as X op' ~C, where op' is the swapped comparison.  */
(for cmp (simple_comparison)
 scmp (swapped_simple_comparison)
 (simplify
  (cmp (bit_not@2 @0) CONSTANT_CLASS_P@1)
  (if (single_use (@2)
   && (TREE_CODE (@1) == INTEGER_CST || TREE_CODE (@1) == VECTOR_CST))
   (scmp @0 (bit_not @1)

Would it make sense to try and generalize it a bit, say with

(cmp (nop_convert1? (bit_not @0)) CONSTANT_CLASS_P)

(scmp (view_convert:XXX @0) (bit_not @1))

(I still believe that it is a bad idea that SSA_NAMEs are strongly typed, 
encoding the type in operations would be more convenient, but I think the 
time for that choice has long gone)


--
Marc Glisse


Re: Fallout: save/restore target options in handle_optimize_attribute

2021-06-01 Thread Martin Liška

On 5/28/21 2:46 PM, Richard Biener wrote:

On Fri, May 28, 2021 at 11:48 AM Martin Liška  wrote:


Hi.

There's a fallout after my revision ebd5e86c0f41dc1d692f9b2b68a510b1f6835a3e. I 
would like to analyze
all case and discuss possible solution. To be honest it's a can of worms and 
reverting the commit
is an option on the table.

So the cases:

1) PR100759 - ppc64le

$ cat pr.C
#pragma GCC optimize 0
void main();

$ ./xgcc -B. -Os pr.C
pr.C:2:11: internal compiler error: ‘global_options’ are modified in local 
context
  2 | void main();

What happens: we change from -Os to -O0 and rs6000_isa_flags differ in 
cl_optimization_compare.
Problem is that OPTION_MASK_SAVE_TOC_INDIRECT is set based on optimize flag:

/* If we can shrink-wrap the TOC register save separately, then use
   -msave-toc-indirect unless explicitly disabled.  */
if ((rs6000_isa_flags_explicit & OPTION_MASK_SAVE_TOC_INDIRECT) == 0
&& flag_shrink_wrap_separate
&& optimize_function_for_speed_p (cfun))
  rs6000_isa_flags |= OPTION_MASK_SAVE_TOC_INDIRECT;


So that means that

   /* Restore current options.  */
   cl_optimization_restore (_options, _options_set,
_opts);
   cl_target_option_restore (_options, _options_set,
 TREE_TARGET_OPTION (prev_target_node));

does not result in the same outcome as the original command-line processing?

Given both restore processes could interact (not sure if that's the issue here)
shouldn't we just have a single restore operation and a single target
hook instead of both targetm.override_options_after_change and
targetm.target_option.restore?


That's not this case. But it can be a unification approach for the future.



Likewise we should probably _always_ set both, DECL_FUNCTION_SPECIFIC_OPT
and _TARGET as a step towards unifying them.


Yes, that's basically what's happening at various places.



That said, for the above case a more detailed run-down as to how things go wrong
would be nice to see.


Anyway, detail analysis of this issue is:

1) one provides -Os on the command-line, thus global_options.x_optimize_size == 
1
2) then we reach #pragma GCC optimize 0, at this point parse_optimize_options 
is called
   and thus global_options are modified (global_options.x_optimize_size)
   That's reflected in optimization_current_node, which is now different from 
optimization_default_node.
3) targetm.override_options_after_change is not called, so 
global_options.x_rs6000_isa_flags
   is not changed to 1.
4) for all subsequent functions, handle_optimize_attribute is called as we are 
in a 'pragma optimize'
5) here the sanity checking code saves saved_global_options, parsing happens 
and cl_*_restore is done
6) as cl_target_option_restore calls targetm.override_options_after_change, the 
global_options.x_rs6000_isa_flags
   has OPTION_MASK_SAVE_TOC_INDIRECT set
7) and the cl_optimization_compare complains

I have a patch that reflects that. In fact, we global options state is correct 
for each function.
Apart from that, PR100759 mentions a test-case that fails due to a missing 
cl_target_option_restore
for 'pragma pop'.

Patch can bootstrap on x86_64-linux-gnu and survives regression tests. And it 
survives tests on ppc64-linux-gnu.

Ready to be installed?
Thanks,
Martin




Suggested solution is doing:

if ((rs6000_isa_flags_explicit & OPTION_MASK_SAVE_TOC_INDIRECT) == 0
&& flag_shrink_wrap_separate
  rs6000_isa_flags |= OPTION_MASK_SAVE_TOC_INDIRECT;

and add '&& optimize_function_for_speed_p (cfun)' to the place where the option 
mask is used.

2) Joseph's case:

$ cat ~/Programming/testcases/opts-bug.i
extern unsigned long int x;
extern float f (float);
extern __typeof (f) f_power8;
extern __typeof (f) f_power9;
extern __typeof (f) f __attribute__ ((ifunc ("f_ifunc")));
static __attribute__ ((optimize ("-fno-stack-protector"))) __typeof (f) *
f_ifunc (void)
{
__typeof (f) *res = x ? f_power9 : f_power8;
return res;
}

$ ./xgcc -B. ~/Programming/testcases/opts-bug.i -c -S -O2 -mlong-double-128 
-mabi=ibmlongdouble
/home/marxin/Programming/testcases/opts-bug.i:8:1: error: ‘-mabi=ibmlongdouble’ 
requires ‘-mlong-double-128’

This is caused by a weird option override:

else if (rs6000_long_double_type_size == 128)
  rs6000_long_double_type_size = FLOAT_PRECISION_TFmode; (it's 127)

later when rs6000_option_override_internal is called for saved target flags 
(127), it complains.
Possible fix:

else if (rs6000_long_double_type_size == 128
|| rs6000_long_double_type_size == FLOAT_PRECISION_TFmode)

3) ARM issue reported here:
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=98636#c20

arm_fp16_inst = bitmap_bit_p (arm_active_target.isa, isa_bit_fp16);
if (arm_fp16_inst)
  {
if (arm_fp16_format == ARM_FP16_FORMAT_ALTERNATIVE)
 error ("selected fp16 options are incompatible");
arm_fp16_format = ARM_FP16_FORMAT_IEEE;
  

  1   2   >