Re: [PATCH] LoongArch: Support loading floating-point zero into MEM[base + index].

2023-09-01 Thread Xi Ruoyao via Gcc-patches
LGTM.

Nit: it should be "storing" floating-point zero into MEM, not "loading".

On Sat, 2023-09-02 at 12:47 +0800, Guo Jie wrote:
> gcc/ChangeLog:
> 
> * config/loongarch/loongarch.md: Support 'G' -> 'k' in
> movsf_hardfloat and movdf_hardfloat.
> 
> gcc/testsuite/ChangeLog:
> 
> * gcc.target/loongarch/const-double-zero-stx.c: New test.
> 
> ---
>  gcc/config/loongarch/loongarch.md  | 12 ++--
>  .../loongarch/const-double-zero-stx.c  | 18 ++
>  2 files changed, 24 insertions(+), 6 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/loongarch/const-double-zero-stx.c
> 
> diff --git a/gcc/config/loongarch/loongarch.md 
> b/gcc/config/loongarch/loongarch.md
> index b37e070660f..6f47c23a79c 100644
> --- a/gcc/config/loongarch/loongarch.md
> +++ b/gcc/config/loongarch/loongarch.md
> @@ -1915,13 +1915,13 @@ (define_expand "movsf"
>  })
>  
>  (define_insn "*movsf_hardfloat"
> -  [(set (match_operand:SF 0 "nonimmediate_operand" 
> "=f,f,f,m,f,k,m,*f,*r,*r,*r,*m")
> -   (match_operand:SF 1 "move_operand" "f,G,m,f,k,f,G,*r,*f,*G*r,*m,*r"))]
> +  [(set (match_operand:SF 0 "nonimmediate_operand" 
> "=f,f,f,m,f,k,m,k,*f,*r,*r,*r,*m")
> +   (match_operand:SF 1 "move_operand" 
> "f,G,m,f,k,f,G,G,*r,*f,*G*r,*m,*r"))]
>    "TARGET_HARD_FLOAT
>     && (register_operand (operands[0], SFmode)
>     || reg_or_0_operand (operands[1], SFmode))"
>    { return loongarch_output_move (operands[0], operands[1]); }
> -  [(set_attr "move_type" 
> "fmove,mgtf,fpload,fpstore,fpload,fpstore,store,mgtf,mftg,move,load,store")
> +  [(set_attr "move_type" 
> "fmove,mgtf,fpload,fpstore,fpload,fpstore,store,store,mgtf,mftg,move,load,store")
>     (set_attr "mode" "SF")])
>  
>  (define_insn "*movsf_softfloat"
> @@ -1946,13 +1946,13 @@ (define_expand "movdf"
>  })
>  
>  (define_insn "*movdf_hardfloat"
> -  [(set (match_operand:DF 0 "nonimmediate_operand" 
> "=f,f,f,m,f,k,m,*f,*r,*r,*r,*m")
> -   (match_operand:DF 1 "move_operand" "f,G,m,f,k,f,G,*r,*f,*r*G,*m,*r"))]
> +  [(set (match_operand:DF 0 "nonimmediate_operand" 
> "=f,f,f,m,f,k,m,k,*f,*r,*r,*r,*m")
> +   (match_operand:DF 1 "move_operand" 
> "f,G,m,f,k,f,G,G,*r,*f,*r*G,*m,*r"))]
>    "TARGET_DOUBLE_FLOAT
>     && (register_operand (operands[0], DFmode)
>     || reg_or_0_operand (operands[1], DFmode))"
>    { return loongarch_output_move (operands[0], operands[1]); }
> -  [(set_attr "move_type" 
> "fmove,mgtf,fpload,fpstore,fpload,fpstore,store,mgtf,mftg,move,load,store")
> +  [(set_attr "move_type" 
> "fmove,mgtf,fpload,fpstore,fpload,fpstore,store,store,mgtf,mftg,move,load,store")
>     (set_attr "mode" "DF")])
>  
>  (define_insn "*movdf_softfloat"
> diff --git a/gcc/testsuite/gcc.target/loongarch/const-double-zero-stx.c 
> b/gcc/testsuite/gcc.target/loongarch/const-double-zero-stx.c
> new file mode 100644
> index 000..8fb04be8ff5
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/loongarch/const-double-zero-stx.c
> @@ -0,0 +1,18 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +/* { dg-final { scan-assembler-times {stx\..\t\$r0} 2 } } */
> +
> +extern float arr_f[];
> +extern double arr_d[];
> +
> +void
> +test_f (int base, int index)
> +{
> +  arr_f[base + index] = 0.0;
> +}
> +
> +void
> +test_d (int base, int index)
> +{
> +  arr_d[base + index] = 0.0;
> +}

-- 
Xi Ruoyao 
School of Aerospace Science and Technology, Xidian University


[PATCH] LoongArch: Support loading floating-point zero into MEM[base + index].

2023-09-01 Thread Guo Jie
gcc/ChangeLog:

* config/loongarch/loongarch.md: Support 'G' -> 'k' in
movsf_hardfloat and movdf_hardfloat.

gcc/testsuite/ChangeLog:

* gcc.target/loongarch/const-double-zero-stx.c: New test.

---
 gcc/config/loongarch/loongarch.md  | 12 ++--
 .../loongarch/const-double-zero-stx.c  | 18 ++
 2 files changed, 24 insertions(+), 6 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/loongarch/const-double-zero-stx.c

diff --git a/gcc/config/loongarch/loongarch.md 
b/gcc/config/loongarch/loongarch.md
index b37e070660f..6f47c23a79c 100644
--- a/gcc/config/loongarch/loongarch.md
+++ b/gcc/config/loongarch/loongarch.md
@@ -1915,13 +1915,13 @@ (define_expand "movsf"
 })
 
 (define_insn "*movsf_hardfloat"
-  [(set (match_operand:SF 0 "nonimmediate_operand" 
"=f,f,f,m,f,k,m,*f,*r,*r,*r,*m")
-   (match_operand:SF 1 "move_operand" "f,G,m,f,k,f,G,*r,*f,*G*r,*m,*r"))]
+  [(set (match_operand:SF 0 "nonimmediate_operand" 
"=f,f,f,m,f,k,m,k,*f,*r,*r,*r,*m")
+   (match_operand:SF 1 "move_operand" "f,G,m,f,k,f,G,G,*r,*f,*G*r,*m,*r"))]
   "TARGET_HARD_FLOAT
&& (register_operand (operands[0], SFmode)
|| reg_or_0_operand (operands[1], SFmode))"
   { return loongarch_output_move (operands[0], operands[1]); }
-  [(set_attr "move_type" 
"fmove,mgtf,fpload,fpstore,fpload,fpstore,store,mgtf,mftg,move,load,store")
+  [(set_attr "move_type" 
"fmove,mgtf,fpload,fpstore,fpload,fpstore,store,store,mgtf,mftg,move,load,store")
(set_attr "mode" "SF")])
 
 (define_insn "*movsf_softfloat"
@@ -1946,13 +1946,13 @@ (define_expand "movdf"
 })
 
 (define_insn "*movdf_hardfloat"
-  [(set (match_operand:DF 0 "nonimmediate_operand" 
"=f,f,f,m,f,k,m,*f,*r,*r,*r,*m")
-   (match_operand:DF 1 "move_operand" "f,G,m,f,k,f,G,*r,*f,*r*G,*m,*r"))]
+  [(set (match_operand:DF 0 "nonimmediate_operand" 
"=f,f,f,m,f,k,m,k,*f,*r,*r,*r,*m")
+   (match_operand:DF 1 "move_operand" "f,G,m,f,k,f,G,G,*r,*f,*r*G,*m,*r"))]
   "TARGET_DOUBLE_FLOAT
&& (register_operand (operands[0], DFmode)
|| reg_or_0_operand (operands[1], DFmode))"
   { return loongarch_output_move (operands[0], operands[1]); }
-  [(set_attr "move_type" 
"fmove,mgtf,fpload,fpstore,fpload,fpstore,store,mgtf,mftg,move,load,store")
+  [(set_attr "move_type" 
"fmove,mgtf,fpload,fpstore,fpload,fpstore,store,store,mgtf,mftg,move,load,store")
(set_attr "mode" "DF")])
 
 (define_insn "*movdf_softfloat"
diff --git a/gcc/testsuite/gcc.target/loongarch/const-double-zero-stx.c 
b/gcc/testsuite/gcc.target/loongarch/const-double-zero-stx.c
new file mode 100644
index 000..8fb04be8ff5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/const-double-zero-stx.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-final { scan-assembler-times {stx\..\t\$r0} 2 } } */
+
+extern float arr_f[];
+extern double arr_d[];
+
+void
+test_f (int base, int index)
+{
+  arr_f[base + index] = 0.0;
+}
+
+void
+test_d (int base, int index)
+{
+  arr_d[base + index] = 0.0;
+}
-- 
2.20.1



[PATCH] ssa_name_has_boolean_range vs signed-boolean:31 types

2023-09-01 Thread Andrew Pinski via Gcc-patches
This turns out to be a latent bug in ssa_name_has_boolean_range
where it would return true for all boolean types but all of the
uses of ssa_name_has_boolean_range was expecting 0/1 as the range
rather than [-1,0].
So when I fixed vector lower to do all comparisons in boolean_type
rather than still in the signed-boolean:31 type (to fix a different issue),
the pattern in match for `-(type)!A -> (type)A - 1.` would assume A (which
was signed-boolean:31) had a range of [0,1] which broke down and sometimes
gave us -1/-2 as values rather than what we were expecting of -1/0.

This was the simpliest patch I found while testing.

We have another way of matching [0,1] range which we could use instead
of ssa_name_has_boolean_range except that uses only the global ranges
rather than the local range (during VRP).
I tried to clean this up slightly by using gimple_match_zero_one_valuedp
inside ssa_name_has_boolean_range but that failed because due to using
only the global ranges. I then tried to change get_nonzero_bits to use
the local ranges at the optimization time but that failed also because
we would remove branches to __builtin_unreachable during evrp and lose
information as we don't set the global ranges during evrp.

OK? Bootstrapped and tested on x86_64-linux-gnu.

PR 110817

gcc/ChangeLog:

* tree-ssanames.cc (ssa_name_has_boolean_range): Remove the
check for boolean type as they don't have "[0,1]" range.

gcc/testsuite/ChangeLog:

* gcc.c-torture/execute/pr110817-1.c: New test.
* gcc.c-torture/execute/pr110817-2.c: New test.
* gcc.c-torture/execute/pr110817-3.c: New test.
---
 gcc/testsuite/gcc.c-torture/execute/pr110817-1.c | 13 +
 gcc/testsuite/gcc.c-torture/execute/pr110817-2.c | 16 
 gcc/testsuite/gcc.c-torture/execute/pr110817-3.c | 14 ++
 gcc/tree-ssanames.cc |  4 
 4 files changed, 43 insertions(+), 4 deletions(-)
 create mode 100644 gcc/testsuite/gcc.c-torture/execute/pr110817-1.c
 create mode 100644 gcc/testsuite/gcc.c-torture/execute/pr110817-2.c
 create mode 100644 gcc/testsuite/gcc.c-torture/execute/pr110817-3.c

diff --git a/gcc/testsuite/gcc.c-torture/execute/pr110817-1.c 
b/gcc/testsuite/gcc.c-torture/execute/pr110817-1.c
new file mode 100644
index 000..1d33fa9a207
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/execute/pr110817-1.c
@@ -0,0 +1,13 @@
+typedef unsigned long __attribute__((__vector_size__ (8))) V;
+
+
+V c;
+
+int
+main (void)
+{
+  V v = ~((V) { } <=0);
+  if (v[0])
+__builtin_abort ();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.c-torture/execute/pr110817-2.c 
b/gcc/testsuite/gcc.c-torture/execute/pr110817-2.c
new file mode 100644
index 000..1f759178425
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/execute/pr110817-2.c
@@ -0,0 +1,16 @@
+
+typedef unsigned char u8;
+typedef unsigned __attribute__((__vector_size__ (8))) V;
+
+V v;
+unsigned char c;
+
+int
+main (void)
+{
+  V x = (v > 0) > (v != c);
+ // V x = foo ();
+  if (x[0] || x[1])
+__builtin_abort ();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.c-torture/execute/pr110817-3.c 
b/gcc/testsuite/gcc.c-torture/execute/pr110817-3.c
new file mode 100644
index 000..36f09c88dd9
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/execute/pr110817-3.c
@@ -0,0 +1,14 @@
+typedef unsigned __attribute__((__vector_size__ (1*sizeof(unsigned V;
+
+V v;
+unsigned char c;
+
+int
+main (void)
+{
+  V x = (v > 0) > (v != c);
+  volatile signed int t = x[0];
+  if (t)
+__builtin_abort ();
+  return 0;
+}
diff --git a/gcc/tree-ssanames.cc b/gcc/tree-ssanames.cc
index 23387b90fe3..6c362995c1a 100644
--- a/gcc/tree-ssanames.cc
+++ b/gcc/tree-ssanames.cc
@@ -521,10 +521,6 @@ ssa_name_has_boolean_range (tree op)
 {
   gcc_assert (TREE_CODE (op) == SSA_NAME);
 
-  /* Boolean types always have a range [0..1].  */
-  if (TREE_CODE (TREE_TYPE (op)) == BOOLEAN_TYPE)
-return true;
-
   /* An integral type with a single bit of precision.  */
   if (INTEGRAL_TYPE_P (TREE_TYPE (op))
   && TYPE_UNSIGNED (TREE_TYPE (op))
-- 
2.31.1



Re: [PATCH v6] LoongArch:Implement 128-bit floating point functions in gcc.

2023-09-01 Thread chenglulu

Hi,RuoYao:

 I have merged the V6 patch into trunk(r14-3635). If the generic 
optimization of copysignf128 cannot be solved,


 we will mention the optimization code under the architecture again.

Thanks!


在 2023/9/1 上午11:22, chenxiaolong 写道:

Brief version history of patch set:

v1 -> v2:
According to the GNU code specification, adjust the format of the
function implementation with "q" as the suffix function.

v2 - >v3:

1.On the LoongArch architecture, refer to the functionality of 64-bit
functions and modify the underlying implementation of __builtin_{nanq, nansq}
functions in libgcc.

2.Modify the function's instruction template to use some instructions such
as "bstrins.d" to implement the 128-bit __builtin_{fabsq, copysignq} function
instead of calling libgcc library support, so as to better play the machine's
performance.

v3 -> v4:

1.The above v1,v2, and v3 all implement 128-bit floating-point functions
with "q" as the suffix, but it is an older implementation. The v4 version
completely abandoned the old implementation by associating the 128-bit
floating-point function with the "q" suffix with the "f128" function that
already existed in GCC.

2.Modify the code so that both "__float128" and "_Float128" function types
can be supported in compiler gcc.

3.Associating a function with the suffix "q" to the "f128" function allows
two different forms of the function to produce the same effect, For example,
__builtin_{huge_{valq/valf128},{infq/inff128},{nanq/nanf128},{nansq/nansf128},
{fabsq/fabsf128}}.

4.For the _builtin_copysignq  function, do not call the new "f128"
implementation, but use the "bstrins" and other instructions in the machine
description file to implement the function function, the result is that the
number of assembly instructions can be reduced and the function optimization
to achieve the optimal effect.

v4 -> v5:

Removed the v4 implementation of the __builtin_fabsf128() function added
to LoongArch.md.

v5 -> v6:

1.Modify the test cases in the math-float-128.c file.

2.Removed the v5 implementation of the __builtin_copysignf128() function
added to LoongArch.md.

During implementation, float128_type_node is bound with the type "__float128"
so that the compiler can correctly identify the type   of the function. The
"q" suffix is associated with the "f128" function, which makes GCC more
flexible to support different user input cases, implementing functions such
as __builtin_{huge_valq, infq, fabsq, copysignq, nanq, nansq}.

gcc/ChangeLog:

* config/loongarch/loongarch-builtins.cc (loongarch_init_builtins):
Associate the __float128 type to float128_type_node so that it can
be recognized by the compiler.
* config/loongarch/loongarch-c.cc (loongarch_cpu_cpp_builtins):
Add the flag "FLOAT128_TYPE" to gcc and associate a function
 with the suffix "q" to "f128".
* doc/extend.texi:Added support for 128-bit floating-point functions on
the LoongArch architecture.

gcc/testsuite/ChangeLog:

* gcc.target/loongarch/math-float-128.c: New test.
---
  gcc/config/loongarch/loongarch-builtins.cc|  5 ++
  gcc/config/loongarch/loongarch-c.cc   | 11 +++
  gcc/doc/extend.texi   | 20 -
  .../gcc.target/loongarch/math-float-128.c | 81 +++
  4 files changed, 114 insertions(+), 3 deletions(-)
  create mode 100644 gcc/testsuite/gcc.target/loongarch/math-float-128.c

diff --git a/gcc/config/loongarch/loongarch-builtins.cc 
b/gcc/config/loongarch/loongarch-builtins.cc
index b929f224dfa..58b612bf445 100644
--- a/gcc/config/loongarch/loongarch-builtins.cc
+++ b/gcc/config/loongarch/loongarch-builtins.cc
@@ -256,6 +256,11 @@ loongarch_init_builtins (void)
unsigned int i;
tree type;
  
+  /* Register the type float128_type_node as a built-in type and

+ give it an alias "__float128".  */
+  (*lang_hooks.types.register_builtin_type) (float128_type_node,
+   "__float128");
+
/* Iterate through all of the bdesc arrays, initializing all of the
   builtin functions.  */
for (i = 0; i < ARRAY_SIZE (loongarch_builtins); i++)
diff --git a/gcc/config/loongarch/loongarch-c.cc 
b/gcc/config/loongarch/loongarch-c.cc
index 67911b78f28..6ffbf748316 100644
--- a/gcc/config/loongarch/loongarch-c.cc
+++ b/gcc/config/loongarch/loongarch-c.cc
@@ -99,6 +99,17 @@ loongarch_cpu_cpp_builtins (cpp_reader *pfile)
else
  builtin_define ("__loongarch_frlen=0");
  
+  /* Add support for FLOAT128_TYPE on the LoongArch architecture.  */

+  builtin_define ("__FLOAT128_TYPE__");
+
+  /* Map the old _Float128 'q' builtins into the new 'f128' builtins.  */
+  builtin_define ("__builtin_fabsq=__builtin_fabsf128");
+  builtin_define ("__builtin_copysignq=__builtin_copysignf128");
+  builtin_define ("__builtin_nanq=__builtin_nanf128");
+  builtin_define ("__builtin_nansq=__builtin_nansf128");
+  builtin_define 

Re: [PATCH] c++: improve verify_constant diagnostic [PR91483]

2023-09-01 Thread Marek Polacek via Gcc-patches
On Fri, Sep 01, 2023 at 08:00:01PM -0400, Marek Polacek via Gcc-patches wrote:
>if (TREE_OVERFLOW_P (t))
> diff --git a/gcc/testsuite/g++.dg/diagnostic/constexpr3.C 
> b/gcc/testsuite/g++.dg/diagnostic/constexpr3.C
> new file mode 100644
> index 000..b6e43a93664
> --- /dev/null
> +++ b/gcc/testsuite/g++.dg/diagnostic/constexpr3.C
> @@ -0,0 +1,32 @@
> +// { dg-do compile { target c++14 } }

I've added the missing PR c++/91483 here and in the ChangeLog in my local repo.

Marek



[PATCH] c++: improve verify_constant diagnostic [PR91483]

2023-09-01 Thread Marek Polacek via Gcc-patches
Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?

-- >8 --

When verify_constant complains, it's pretty terse.  Consider

  void test ()
  {
constexpr int i = 42;
constexpr const int *p = 
  }

where it says "'& i' is not a constant expression".  OK, but why?

With this patch, we say:

b.C:5:28: error: '& i' is not a constant expression
5 |   constexpr const int *p = 
  |^~
b.C:5:28: note: pointer to 'i' is not a constant expression
b.C:4:17: note: address of non-static constexpr variable 'i' may differ on each 
invocation of the enclosing function; add 'static' to give it a constant address
4 |   constexpr int i = 42;
  | ^
  | static

which brings g++ on par with clang++.

gcc/cp/ChangeLog:

* constexpr.cc (verify_constant_explain_r): New.
(verify_constant): Call it.

gcc/testsuite/ChangeLog:

* g++.dg/diagnostic/constexpr3.C: New test.
---
 gcc/cp/constexpr.cc  | 56 +++-
 gcc/testsuite/g++.dg/diagnostic/constexpr3.C | 32 +++
 2 files changed, 87 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/g++.dg/diagnostic/constexpr3.C

diff --git a/gcc/cp/constexpr.cc b/gcc/cp/constexpr.cc
index 8bd5c4a47f8..6d5aed82377 100644
--- a/gcc/cp/constexpr.cc
+++ b/gcc/cp/constexpr.cc
@@ -3381,6 +3381,54 @@ ok:
 }
 }
 
+/* *TP was not deemed constant by reduced_constant_expression_p.  Explain
+   why and suggest what could be done about it.  */
+
+static tree
+verify_constant_explain_r (tree *tp, int *, void *)
+{
+  bool ref_p = false;
+
+  switch (TREE_CODE (*tp))
+{
+CASE_CONVERT:
+  if (TREE_CODE (TREE_OPERAND (*tp, 0)) != ADDR_EXPR)
+   break;
+  ref_p = TYPE_REF_P (TREE_TYPE (*tp));
+  *tp = TREE_OPERAND (*tp, 0);
+  gcc_fallthrough ();
+case ADDR_EXPR:
+  {
+   tree op = TREE_OPERAND (*tp, 0);
+   if (VAR_P (op)
+   && DECL_DECLARED_CONSTEXPR_P (op)
+   && !TREE_STATIC (op)
+   /* ??? We should also say something about temporaries.  */
+   && !DECL_ARTIFICIAL (op))
+ {
+   if (ref_p)
+ inform (location_of (*tp), "reference to %qD is not a constant "
+ "expression", op);
+   else
+ inform (location_of (*tp), "pointer to %qD is not a constant "
+ "expression", op);
+   const location_t op_loc = DECL_SOURCE_LOCATION (op);
+   rich_location richloc (line_table, op_loc);
+   richloc.add_fixit_insert_before (op_loc, "static ");
+   inform (,
+   "address of non-static constexpr variable %qD may differ on 
"
+   "each invocation of the enclosing function; add % "
+   "to give it a constant address", op);
+ }
+   break;
+  }
+default:
+  break;
+}
+
+  return NULL_TREE;
+}
+
 /* Some expressions may have constant operands but are not constant
themselves, such as 1/0.  Call this function to check for that
condition.
@@ -3398,7 +3446,13 @@ verify_constant (tree t, bool allow_non_constant, bool 
*non_constant_p,
   && t != void_node)
 {
   if (!allow_non_constant)
-   error ("%q+E is not a constant expression", t);
+   {
+ auto_diagnostic_group d;
+ error_at (cp_expr_loc_or_input_loc (t),
+   "%q+E is not a constant expression", t);
+ cp_walk_tree_without_duplicates (, verify_constant_explain_r,
+  nullptr);
+   }
   *non_constant_p = true;
 }
   if (TREE_OVERFLOW_P (t))
diff --git a/gcc/testsuite/g++.dg/diagnostic/constexpr3.C 
b/gcc/testsuite/g++.dg/diagnostic/constexpr3.C
new file mode 100644
index 000..b6e43a93664
--- /dev/null
+++ b/gcc/testsuite/g++.dg/diagnostic/constexpr3.C
@@ -0,0 +1,32 @@
+// { dg-do compile { target c++14 } }
+
+struct X {
+  int const& var;
+};
+
+struct A {
+  A *ap = this;
+};
+
+constexpr A
+foo ()
+{
+  return {};
+}
+
+void
+test ()
+{
+  constexpr int i = 42; // { dg-message "may differ on each invocation" }
+
+  constexpr X x{i}; // { dg-error "not a constant expression" }
+  // { dg-message "reference to .i. is not a constant expression" "" { target 
*-*-* } .-1 }
+  constexpr const int *p =  // { dg-error "not a constant expression" }
+  // { dg-message "pointer to .i. is not a constant expression" "" { target 
*-*-* } .-1 }
+
+  constexpr A a = foo (); // { dg-error "not a constant expression" }
+  // { dg-message "pointer to .a. is not a constant expression|may differ" "" 
{ target *-*-* } .-1 }
+
+  constexpr const int *q = __builtin_launder (); // { dg-error "not a 
constant expression" }
+  // { dg-message "pointer to .i. is not a constant expression" "" { target 
*-*-* } .-1 }
+}

base-commit: 6f06152541d62ae7c8579b7d7bf552be19e15b05
-- 
2.41.0



Re: [PATCH] diagnostics: Delete config pointer before overwriting it.

2023-09-01 Thread David Malcolm via Gcc-patches
On Fri, 2023-09-01 at 21:16 +0200, Mikael Morin via Gcc-patches wrote:
> Hello,
> 
> this is a fix for a small memory leak in the fortran frontend.
> Tested on x86_64-pc-linux-gnu, nothing stands out besides the
> apparently well-known guality instability.
> OK for master ? 

LGTM, thanks!

Dave



Re: [PATCH] analyzer: call off a superseding when diagnostics are unrelated [PR110830]

2023-09-01 Thread David Malcolm via Gcc-patches
On Fri, 2023-09-01 at 21:59 +0200, priour...@gmail.com wrote:
> From: benjamin priour 
> 
> Hi,
> 
> Patch succesfully regstrapped off trunk
> 7f2ed06ddc825e8a4e0edfd1d66b5156e6dc1d34
> on x86_64-linux-gnu.
> 
> Is it OK for trunk ?
> 
> Thanks,
> Benjamin.
> 

[...snip...]

>  
> +/* Walk up the two paths to each of their common conditional
> +   branching.  At each branching, make sure both diagnostics'
> +   paths branched similarly.  If there is at least one where
> +   both paths go down a different outcome, then the paths
> +   are incompatible and this function returns FALSE.
> +   Otherwise return TRUE.
> +
> +   Incompatible paths:
> +
> +   
> +   /  \
> +  /    \
> +    true  false
> + |  |
> +    ...    ...
> + |  |
> +    ...   stmt x
> + |
> +   stmt x
> +
> +   Both LHS_PATH and RHS_PATH final enodes should be
> +   over the same gimple statement.  */
> +
> +static bool
> +compatible_epath_p (const exploded_path *lhs_path,
> +   const exploded_path *rhs_path)
> +{
> +  gcc_assert (lhs_path);
> +  gcc_assert (rhs_path);
> +  int i;
> +  const exploded_edge *outer_eedge;
> +  FOR_EACH_VEC_ELT_REVERSE (lhs_path->m_edges, i, outer_eedge)
> +    {
> +  const superedge *outer_sedge = outer_eedge->m_sedge;
> +  if (!outer_sedge || !outer_eedge->m_src)
> +   continue;
> +  const program_point _src_point = outer_eedge->m_src->get_point 
> ();
> +  switch (outer_src_point.get_kind ())
> +   {
> + case PK_AFTER_SUPERNODE:
> +   if (const cfg_superedge *cfg_outer_sedge
> +   = outer_sedge->dyn_cast_cfg_superedge ())
> + {
> +   int j;
> +   const exploded_edge *inner_eedge;
> +   FOR_EACH_VEC_ELT_REVERSE (rhs_path->m_edges, j, inner_eedge)
> + {
> +   const superedge *inner_sedge = inner_eedge->m_sedge;
> +   if (!inner_sedge || !inner_eedge->m_src)
> + continue;
> +   const program_point _src_point
> + = inner_eedge->m_src->get_point ();
> +   switch (inner_src_point.get_kind ())
> + {
> +   case PK_AFTER_SUPERNODE:
> + if (inner_src_point.get_stmt ()
> + != outer_src_point.get_stmt ())
> +   continue;
> + if (const cfg_superedge *cfg_inner_sedge
> + = inner_sedge->dyn_cast_cfg_superedge ())
> +   {
> + if (cfg_inner_sedge->true_value_p ()
> + != cfg_outer_sedge->true_value_p ())
> +   return false;
> +   }
> + break;
> +   default:
> + break;
> + }
> + }
> + }
> +   break;
> +
> + default:
> +   break;
> +   }
> +    }
> +    return true;
> +}

[...snip...]

Thanks for the patch.  I think the high-level idea is good, but I'm not
sure the implementation is correct:

- it is O(n^2), where n is the length of exploded_path.
- it walks backwards through the LHS path, and for each eedge from a
PK_AFTER_SUPERNODE it walks backwards from the end of the RHS epath; it
only looks at the "true" flag on CFG edges.  I think this works for
simple cases, but the way it restarts the rhs_path iteration from the
end of the rhs_path each time "feels" incorrect.

An eedge from a PK_AFTER_SUPERNODE is presumably just an eedge that has
a non-NULL m_sedge i.e. an exploded edge relating to an edge in the
supergraph.  Rather than looking at flags, can we simply compare
superedge pointers?  For example, if we care that we followed the
"true" path of a conditional in both lhs and rhs epaths, we can look to
see if both have an eedge where the superedge is the cfg_superedge
wrapping the CFG "true" edge i.e. I think we can simply compare the
superedge pointers.

Or is there some detail here that I'm misunderstanding?

I *think* it's possible to implement it in O(n) with something like
this:  (warning: untested code follows!)

  /* For compatibility, there should effectively be the same
 vector of superedges followed in both epaths.
 Walk backwards through each epath, looking at the superedges.  */
  // FIXME: really?  Benjamin, have I understood this correctly?

  gcc_assert (lhs_path->length () > 0);
  gcc_assert (rhs_path->length () > 0);

  int lhs_idx = lhs_path->length () - 1;
  int rhs_idx = rhs_path->length () - 1;

  while (lhs_idx >= 0 && rhs_idx >= 0)
{
  /* Find next LHS superedge, if any.  */
  while (lhs_idx >= 0)
{
  const exploded_edge *lhs_eedge = lhs_path->m_edges[lhs_idx];
  if (lhs_eedge->m_sedge)
break;
  else
 

[pushed] wwwdocs: gcc-12: Improve language around vectorizer and -O2

2023-09-01 Thread Gerald Pfeifer
Pushed.

Gerald
---
 htdocs/gcc-12/changes.html | 10 ++
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/htdocs/gcc-12/changes.html b/htdocs/gcc-12/changes.html
index 3816d06f..b10f2aa4 100644
--- a/htdocs/gcc-12/changes.html
+++ b/htdocs/gcc-12/changes.html
@@ -127,10 +127,12 @@ You may also want to check out our
 General Improvements
 
 
-  Vectorization is enabled at -O2 which is now equivalent to 
the
-  original -O2 -ftree-vectorize -fvect-cost-model=very-cheap.
-  Note that default vectorizer cost model has been changed which used to 
behave
-  as -fvect-cost-model=cheap were specified.
+  Vectorization is enabled at -O2 which is now
+  equivalent to what would have been
+  -O2 -ftree-vectorize -fvect-cost-model=very-cheap
+  in the past. Note that the default vectorizer cost model has
+  been changed; it used to behave as if
+  -fvect-cost-model=cheap had been specified.
   
   
 GCC now supports the
-- 
2.41.0


Re: RFC: Introduce -fhardened to enable security-related flags

2023-09-01 Thread Qing Zhao via Gcc-patches


> On Aug 29, 2023, at 3:42 PM, Marek Polacek via Gcc-patches 
>  wrote:
> 
> Improving the security of software has been a major trend in the recent
> years.  Fortunately, GCC offers a wide variety of flags that enable extra
> hardening.  These flags aren't enabled by default, though.  And since
> there are a lot of hardening flags, with more to come, it's been difficult
> to keep on top of them; more so for the users of GCC who ought not to be
> expected to keep track of all the new options.
> 
> To alleviate some of the problems I mentioned, we thought it would
> be useful to provide a new umbrella option that enables a reasonable set
> of hardening flags.  What's "reasonable" in this context is not easy to
> pin down.  Surely, there must be no ABI impact, the option cannot cause
> severe performance issues, and, I suspect, it should not cause build
> errors by enabling stricter compile-time errors (such as, -Wimplicit-int,
> -Wint-conversion).  Including a controversial option in -fhardened
> would likely cause that users would not use -fhardened at all.  It's
> roughly akin to -Wall or -O2 -- those also enable a reasonable set of
> options, and evolve over time, and are not kept in sync with other
> compilers.
> 
> Currently, -fhardened enables:
> 
>  -D_FORTIFY_SOURCE=3 (or =2 for older glibcs)
>  -D_GLIBCXX_ASSERTIONS
>  -ftrivial-auto-var-init=zero
>  -fPIE  -pie  -Wl,-z,relro,-z,now
>  -fstack-protector-strong
>  -fstack-clash-protection
>  -fcf-protection=full (x86 GNU/Linux only)
> 
> -fsanitize=undefined is specifically not enabled.  -fstrict-flex-arrays is
> also liable to break a lot of code so I didn't include it.
> 
> Appended is a proof-of-concept patch.  It doesn't implement --help=hardened
> yet.  A fairly crucial point is that -fhardened will not override options
> that were specified on the command line (before or after -fhardened).  For
> example,
> 
> -D_FORTIFY_SOURCE=1 -fhardened
> 
> means that _FORTIFY_SOURCE=1 will be used.  Similarly,
> 
>  -fhardened -fstack-protector
> 
> will not enable -fstack-protector-strong.
> 
> Thoughts?

In general, I think that it is a very good idea to provide umbrella options
 for software security purpose.  Thanks a lot for this work!

1. I do agree with Martin, multiple-level control for this purpose might be 
needed,
similar as multiple levels for warnings, and multiple levels for optimizations.

Similar as optimization options, can we organize all the security options 
together 
In our manual, then the user will have a good central place to get more and 
complete
Information of the security features our compiler provides? 

2. What’s the major criteria to decide which security feature should go into 
this list?
Later, when we have new security features, how to decide whether to add them to
This list or not?
I am wondering why -fzero-call-used-regs is not included in the list and also 
Why chose -ftrivial-auto-var-init=zero instead of 
-ftrivial-auto-var-init=pattern? 

3. Looks like currently, -fhardened excludes all compilation-time Warning 
options for security purpose,
(For example, -Warray-bounds, --Wstringop-overflow…)
And also excludes all sanitizer options for security purpose 
(-fsanitizer=undifined)

So, shall we also provide an umbrella option for compilation-time warning 
options for security purpose
And a umbrella option for sanitizer options (is the -fsanitizer=undefined this 
one)?

Just some thoughts. -:).

Qing









Re: [PATCH 9/12] libgcc _BitInt support [PR102989]

2023-09-01 Thread Joseph Myers
On Wed, 9 Aug 2023, Jakub Jelinek via Gcc-patches wrote:

> I know that soft-fp is owned by glibc and I think the op-common.h change
> should be propagated there, but the bitint stuff is really GCC specific
> and IMHO doesn't belong into the glibc copy.

The op-common.h change is OK for glibc.

Some additional tests I think should be added to the testsuite for 
floating-point functionality in this patch, that I didn't spot in the 
testsuite patches - if any of these aren't included initially, there 
should at least be bugs filed in Bugzilla for the omissions:

1. Test overflowing conversions to integers (including from inf or NaN) 
raise FE_INVALID.  (Note: it's not specified in the standard whether 
inexact conversions to integers raise FE_INEXACT or not, so testing that 
seems less important.)

2. Test conversions from integers to floating point raise FE_INEXACT when 
inexact, together with FE_OVERFLOW when overflowing (while exact 
conversions don't raise exceptions).

3. Test conversions from integers to floating point respect the rounding 
mode.

4. Test converting floating-point values in the range (-1.0, 0.0] to both 
unsigned and signed _BitInt; I didn't see such tests for binary floating 
types, only for decimal types, and the decimal tests didn't include tests 
of negative zero itself as the value converted to _BitInt.

5. Test conversions of noncanonical BID zero to integers (these tests 
would be specific to BID).  See below for a bug in this area.

For points 2 and 3 above, it's probably appropriate to test only for 
binary floating point, to avoid any issues with the separate DFP rounding 
mode and with DFP arithmetic operations not necessarily working correctly 
with exceptions - but then a bug should be filed in Bugzilla noting the 
omission of such tests for DFP.

For points 1, 2 and 3 above, if the conversions for types such as 
_BitInt(32) might end up using the same conversions as for types such as 
int, then tests for such types should probably be omitted (again with a 
bug filed) given the range of known bugs about exceptions from such 
operations with types such as int.

> +__bid_fixtdbitint (UBILtype *r, SItype rprec, _Decimal128 a)
> +{
> +  FP_DECL_EX;
> +  USItype arprec = rprec < 0 ? -rprec : rprec;
> +  USItype rn = (arprec + BIL_TYPE_SIZE - 1) / BIL_TYPE_SIZE;
> +  union { _Decimal128 d; UDItype u[2]; } u;
> +  UDItype mantissahi, mantissalo, t;
> +  SItype sgn;
> +  SItype exponent;
> +  USItype exp_bits, mant_bits;
> +  UBILtype *pow10v, *resv;
> +  USItype pow10_limbs, res_limbs, min_limbs, mant_limbs, low_zeros;
> +
> +  FP_INIT_EXCEPTIONS;
> +  u.d = a;
> +  mantissahi = u.u[__FLOAT_WORD_ORDER__ != __ORDER_BIG_ENDIAN__];
> +  mantissalo = u.u[__FLOAT_WORD_ORDER__ == __ORDER_BIG_ENDIAN__];
> +  t = mantissahi >> 47;
> +  sgn = (DItype) mantissahi < 0;
> +  if ((t & (3 << 14)) != (3 << 14))
> +{
> +  mantissahi &= UDItype) 1) << 49) - 1);
> +  exponent = (t >> 2) & 0x3fff;

Overflow (thus producing a noncanonical zero) is possible in this case for 
TDmode.  An appropriate test of a noncanonical zero that goes through this 
case should thus be added to the testsuite.

> +}
> +  else if ((t & (3 << 12)) != (3 << 12))
> +{
> +  mantissahi &= UDItype) 1) << 47) - 1);
> +  mantissahi |= ((UDItype) 1) << 49;
> +  exponent = t & 0x3fff;
> +  if (mantissahi > (UDItype) 0x1ed09bead87c0
> +   || (mantissahi == (UDItype) 0x1ed09bead87c0
> +   && mantissalo > 0x378d8e63))
> + {
> +   mantissahi = 0;
> +   mantissalo = 0;
> + }

And in this case, overflow is guaranteed; the check for the overflow 
threshold should thus move to the previous case.

This patch is OK with these fixes.

Note for powerpc architecture maintainers: adding _BitInt support on that 
architecture will mean, as well as adding support for the conversions 
to/from DPD (if S/390 doesn't get there first), also adding support for 
conversions to/from IBM long double.

-- 
Joseph S. Myers
jos...@codesourcery.com


Re: [PATCH 14/12] libgcc _BitInt helper documentation [PR102989]

2023-09-01 Thread Joseph Myers
On Tue, 22 Aug 2023, Jakub Jelinek via Gcc-patches wrote:

> +significant limb if @var{N} is not divisible by

@var{N} should be @var{n}, throughout.

> +@deftypefn {Runtime Function} void __bid_fixsdbitint (@code{UBILtype} 
> *@var{r}, int32_t @var{rprec}, _Decimal32 @var{a})
> +@deftypefnx {Runtime Function} void __bid_fixddbitint (@code{UBILtype} 
> *@var{r}, int32_t @var{rprec}, _Decimal64 @var{a})
> +@deftypefnx {Runtime Function} void __bid_fixtdbitint (@code{UBILtype} 
> *@var{r}, int32_t @var{rprec}, _Decimal128 @var{a})
> +These functions convert @var{a} to bit-precise integer @var{r}, rounding 
> toward zero.
> +If @var{rprec} is positive, it converts to unsigned bit-precise integer and
> +negative values all become zero, if @var{rprec} is negative, it converts
> +to signed bit-precise integer.
> +@end deftypefn
> +
> +@deftypefn {Runtime Function} _Decimal32 __bid_floatbitintsd 
> (@code{UBILtype} *@var{i}, int32_t @var{iprec})
> +@deftypefnx {Runtime Function} _Decimal64 __bid_floatbitintdd 
> (@code{UBILtype} *@var{i}, int32_t @var{iprec})
> +@deftypefnx {Runtime Function} _Decimal128 __bid_floatbitinttd 
> (@code{UBILtype} *@var{i}, int32_t @var{iprec})
> +These functions convert bit-precise integer @var{i} to decimal floating 
> point.  If
> +@var{iprec} is positive, it is conversion from unsigned bit-precise integer,
> +otherwise from signed bit-precise integer.
> +@end deftypefn

The documentation for __bid_* should say explicitly that these functions 
are for BID format (assuming it's intended that functions for DPD format 
should use __dpd_* when support is added for an architecture using DPD).

> +/* Common final part of __fix?fbitint conversion functions.
> +   The A floating point value should have been converted using
> +   soft-fp macros into RV, U##DI##type DI##_BITS precise normal
> +   integral type and SHIFT, how many bits should that value be
> +   shifted to the left.  R is pointer to limbs array passed to the
> +   function, RN number of limbs in it, ARPREC absolute value of
> +   RPREC argument passed to it, RSIZE number of significant bits in RV.
> +   RSIGNED is non-zero if the result is signed bit-precise integer,
> +   otherwise zero.  If OVF is true, instead of storing RV shifted left
> +   by SHIFT bits and zero or sign extended store minimum or maximum
> +   of the signed or unsigned bit-precise integer type depending on if
> +   RV contains the minimum or maximum signed or unsigned value.  */

As I understand it, OVF is also for the case of a zero result from input 
close to zero, for signed types (when that's not the maximum or minimum) 
in addition to unsigned types.

> +/* Common initial part of __floatbitint?f conversion functions.
> +   I and IPREC are arguments passed to those functions, convert that
> +   into a pair of DI##type IV integer and SHIFT, such that converting
> +   IV to floating point and multiplicating that by pow (2, SHIFT)
> +   gives the expected result.  IV size needs to be chosen such that
> +   it is large than number of bits in floating-point mantissa and

"large than" -> "larger than".

This patch is OK with those fixes.

-- 
Joseph S. Myers
jos...@codesourcery.com


Re: [PATCH 8/12] libgcc: Generated tables for _BitInt <-> _Decimal* conversions [PR102989]

2023-09-01 Thread Joseph Myers
On Wed, 9 Aug 2023, Jakub Jelinek via Gcc-patches wrote:

> Hi!
> 
> The following patch adds a header with generated helper tables to support
> computation of powers of 10 from 10^0 to 10^6111 inclusive into a
> sufficiently large array of _BitInt limbs.  This is split from the rest
> of the libgcc _BitInt support because it is quite large and together it
> would run into gcc-patches mail length limits.

This patch is OK.

-- 
Joseph S. Myers
jos...@codesourcery.com


Re: [PATCH] analyzer: implement reference count checking for CPython plugin [PR107646]

2023-09-01 Thread Eric Feng via Gcc-patches
Thank you for the patch!

On Fri, Sep 1, 2023 at 10:51 AM David Malcolm  wrote:
>
> On Fri, 2023-09-01 at 04:49 +0200, Hans-Peter Nilsson wrote:
> > (Looks like this was committed as r14-3580-g597b9ec69bca8a)
> >
> > > Cc: g...@gcc.gnu.org, gcc-patches@gcc.gnu.org, Eric Feng
> > > 
> > > From: Eric Feng via Gcc 
> >
> > > gcc/testsuite/ChangeLog:
> > >   PR analyzer/107646
> > > * gcc.dg/plugin/analyzer_cpython_plugin.c: Implements
> > > reference count
> > >   * checking for PyObjects.
> > > * gcc.dg/plugin/cpython-plugin-test-2.c: Moved to...
> > > * gcc.dg/plugin/cpython-plugin-test-PyList_Append.c:
> > > ...here (and
> > >   * added more tests).
> > > * gcc.dg/plugin/cpython-plugin-test-1.c: Moved to...
> > > * gcc.dg/plugin/cpython-plugin-test-no-plugin.c: ...here
> > > (and added
> > >   * more tests).
> > > * gcc.dg/plugin/plugin.exp: New tests.
> > > * gcc.dg/plugin/cpython-plugin-test-PyList_New.c: New test.
> > > * gcc.dg/plugin/cpython-plugin-test-PyLong_FromLong.c: New
> > > test.
> > > * gcc.dg/plugin/cpython-plugin-test-refcnt-checking.c: New
> > > test.
> >
> > It seems this was more or less a rewrite, but that said,
> > it's generally preferable to always *add* tests, never *modify* them.
> >
> > >  .../gcc.dg/plugin/analyzer_cpython_plugin.c   | 376
> > > +-
> >
> > ^^^ Ouch!  Was it not within reason to keep that test as it
> > was, and just add another test?
Thanks for the feedback. To clarify, 'analyzer_cpython_plugin.c' is
not a test itself but rather a plugin that currently lives within the
testsuite. The core of the test cases were also not modified, rather I
renamed certain filenames containing them for clarity (unless this is
what you meant in terms of modification, in which case noted) and
added to them. However, I understand the preference and will keep that
in mind.
> >
> > Anyway, the test after rewrite fails, and for some targets
> > like cris-elf and apparently m68k-linux, yields an error.
> > I see a PR was already opened.
> >
> > Also, mostly for future reference, several files in the
> > patch miss a final newline, as seen by a "\ No newline at
> > end of file"-marker.
Noted.
> >
> > I think I found the problem; a mismatch between default C++
> > language standard between host-gcc and target-gcc.
> >
> > (It's actually *not* as simple as "auto var = typeofvar()"
> > not being recognized in C++11 --or else there'd be an error
> > for the hash_set declaration too, which I just changed for
> > consistency-- but it's close enough for me.)
> >
> > With this, retesting plugin.exp for cris-elf works.
Sounds good, thanks again! I was also curious about why hash_map had
an issue here with that syntax whilst hash_set did not, so I tried to
investigate a bit further. I believe the issue was due to the compiler
having trouble disambiguating between the hash_map constructors in
C++11.

>From the error message we received:

test/gcc/testsuite/gcc.dg/plugin/analyzer_cpython_plugin.c:480:58:
error: no matching function for call to 'hash_map::hash_map(hash_map)'
   auto region_to_refcnt = hash_map ();

I think the compiler is mistakenly interpreting the call here as we
would like to create a new hash_map object using its copy constructor,
but we "forgot" to provide the object to be copied, rather than our
intention of using the default constructor.

Looking at hash_map.h and hash_set.h seems to support this hypothesis,
as hash_map has two constructors, one of which resembles a copy
constructor with additional arguments:
https://github.com/gcc-mirror/gcc/blob/master/gcc/hash-map.h#L147.
Perhaps the default arguments here further complicated the ambiguity
as to which constructor to use in the presence of the empty
parenthesis.

On the other hand, hash_set has only the default constructor with
default parameters, and thus there is no ambiguity:
https://github.com/gcc-mirror/gcc/blob/master/gcc/hash-set.h#L40.

I assume this ambiguity was cleared up by later versions, and thus we
observed no problems in C++17. However, I am certainly still a
relative newbie of C++, so please anyone feel free to correct my
reasoning and chime in!
> >
> > Ok to commit?
>
> Sorry about the failing tests.
>
> Thanks for the patch; please go ahead and commit.
>
> Dave
>
> >
> > -- >8 --
> > From: Hans-Peter Nilsson 
> > Date: Fri, 1 Sep 2023 04:36:03 +0200
> > Subject: [PATCH] testsuite: Fix analyzer_cpython_plugin.c
> > declarations, PR testsuite/111264
> >
> > Also, add missing newline at end of file.
> >
> > PR testsuite/111264
> > * gcc.dg/plugin/analyzer_cpython_plugin.c: Make declarations
> > C++11-compatible.
> > ---
> >  gcc/testsuite/gcc.dg/plugin/analyzer_cpython_plugin.c | 8 
> >  1 file changed, 4 insertions(+), 4 deletions(-)
> >
> > diff --git a/gcc/testsuite/gcc.dg/plugin/analyzer_cpython_plugin.c
> > b/gcc/testsuite/gcc.dg/plugin/analyzer_cpython_plugin.c
> > index 

Re: [PATCH] Fortran: runtime bounds-checking in presence of array constructors [PR31059]

2023-09-01 Thread Harald Anlauf via Gcc-patches

Hi Mikael,

On 9/1/23 10:41, Mikael Morin via Gcc-patches wrote:

Le 31/08/2023 à 22:42, Harald Anlauf via Fortran a écrit :

Dear all,

gfortran's array bounds-checking code does a mostly reasonable
job for array sections in expressions and assignments, but
forgot the case that (rank-1) expressions can involve array
constructors, which have a shape ;-)

The attached patch walks over the loops generated by the
scalarizer, checks for the presence of a constructor, and
takes the first shape found as reference.  (If several
constructors are present, discrepancies in their shape
seems to be already detected at compile time).

For more details on what will be caught now see testcase.

Regtested on x86_64-pc-linux-gnu.  OK for mainline?


This is OK.


I've pushed this is the first step.


May I suggest to handle functions the same way?


I'll have a look at them, but will need to gather a few
suitable testcases first.

Thanks for the review!

Harald



Thanks.


Thanks,
Harald









[PATCH] analyzer: call off a superseding when diagnostics are unrelated [PR110830]

2023-09-01 Thread Benjamin Priour via Gcc-patches
From: benjamin priour 

Hi,

Patch succesfully regstrapped off trunk 7f2ed06ddc825e8a4e0edfd1d66b5156e6dc1d34
on x86_64-linux-gnu.

Is it OK for trunk ?

Thanks,
Benjamin.

Patch below.
---

Before this patch, a saved_diagnostic would supersede another at
the same statement if and only its vfunc supercedes_p returned true
for the other diagnostic's kind.
That both warning were unrelated, that is resolving one would not fix
the other was not considered in making the above choice.

This patch makes it so that two saved_diagnostics taking a different
outcome of at least one common conditional branching cannot supersede
each other.

Signed-off-by: benjamin priour 

gcc/analyzer/ChangeLog:

PR analyzer/110830
* diagnostic-manager.cc
(compatible_epaths_p): New function.
(saved_diagnostic::supercedes_p): Now calls the above
to determine if the diagnostics do overlap and the superseding
may proceed.

gcc/testsuite/ChangeLog:

PR analyzer/110830
* c-c++-common/analyzer/pr110830.c: New test.
---
 gcc/analyzer/diagnostic-manager.cc|  89 +-
 .../c-c++-common/analyzer/pr110830.c  | 111 ++
 2 files changed, 199 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/c-c++-common/analyzer/pr110830.c

diff --git a/gcc/analyzer/diagnostic-manager.cc 
b/gcc/analyzer/diagnostic-manager.cc
index 10fea486b8c..7cf181e7972 100644
--- a/gcc/analyzer/diagnostic-manager.cc
+++ b/gcc/analyzer/diagnostic-manager.cc
@@ -887,6 +887,87 @@ saved_diagnostic::add_duplicate (saved_diagnostic *other)
   m_duplicates.safe_push (other);
 }
 
+/* Walk up the two paths to each of their common conditional
+   branching.  At each branching, make sure both diagnostics'
+   paths branched similarly.  If there is at least one where
+   both paths go down a different outcome, then the paths
+   are incompatible and this function returns FALSE.
+   Otherwise return TRUE.
+
+   Incompatible paths:
+
+   
+   /  \
+  /\
+true  false
+ |  |
+......
+ |  |
+...   stmt x
+ |
+   stmt x
+
+   Both LHS_PATH and RHS_PATH final enodes should be
+   over the same gimple statement.  */
+
+static bool
+compatible_epath_p (const exploded_path *lhs_path,
+   const exploded_path *rhs_path)
+{
+  gcc_assert (lhs_path);
+  gcc_assert (rhs_path);
+  int i;
+  const exploded_edge *outer_eedge;
+  FOR_EACH_VEC_ELT_REVERSE (lhs_path->m_edges, i, outer_eedge)
+{
+  const superedge *outer_sedge = outer_eedge->m_sedge;
+  if (!outer_sedge || !outer_eedge->m_src)
+   continue;
+  const program_point _src_point = outer_eedge->m_src->get_point ();
+  switch (outer_src_point.get_kind ())
+   {
+ case PK_AFTER_SUPERNODE:
+   if (const cfg_superedge *cfg_outer_sedge
+   = outer_sedge->dyn_cast_cfg_superedge ())
+ {
+   int j;
+   const exploded_edge *inner_eedge;
+   FOR_EACH_VEC_ELT_REVERSE (rhs_path->m_edges, j, inner_eedge)
+ {
+   const superedge *inner_sedge = inner_eedge->m_sedge;
+   if (!inner_sedge || !inner_eedge->m_src)
+ continue;
+   const program_point _src_point
+ = inner_eedge->m_src->get_point ();
+   switch (inner_src_point.get_kind ())
+ {
+   case PK_AFTER_SUPERNODE:
+ if (inner_src_point.get_stmt ()
+ != outer_src_point.get_stmt ())
+   continue;
+ if (const cfg_superedge *cfg_inner_sedge
+ = inner_sedge->dyn_cast_cfg_superedge ())
+   {
+ if (cfg_inner_sedge->true_value_p ()
+ != cfg_outer_sedge->true_value_p ())
+   return false;
+   }
+ break;
+   default:
+ break;
+ }
+ }
+ }
+   break;
+
+ default:
+   break;
+   }
+}
+return true;
+}
+
+
 /* Return true if this diagnostic supercedes OTHER, and that OTHER should
therefore not be emitted.  */
 
@@ -896,7 +977,13 @@ saved_diagnostic::supercedes_p (const saved_diagnostic 
) const
   /* They should be at the same stmt.  */
   if (m_stmt != other.m_stmt)
 return false;
-  return m_d->supercedes_p (*other.m_d);
+  /* return early if OTHER won't be superseded anyway.  */
+  if (!m_d->supercedes_p (*other.m_d))
+return false;
+
+  /* If the two saved_diagnostics' path are not compatible
+ then they cannot supersede one another.  */
+  return compatible_epath_p (m_best_epath.get (), other.m_best_epath.get ());
 }
 
 /* 

[PATCH v2] RISC-V: zicond: Fix opt2 pattern

2023-09-01 Thread Vineet Gupta
This was tripping up gcc.c-torture/execute/pr60003.c at -O1 since in
failing case, pattern's asm czero.nez gets both rs2 and rs1 as non zero.

We start with the following src code snippet:

  if (a == 0)
return 0;
  else
return x;
}

which is equivalent to:  "x = (a != 0) ? x : a" where x is NOT 0.


and matches define_insn "*czero.nez..opt2"

| (insn 41 20 38 3 (set (reg/v:DI 136 [ x ])
|(if_then_else:DI (ne (reg/v:DI 134 [ a ])
|(const_int 0 [0]))
|(reg/v:DI 136 [ x ])
|(reg/v:DI 134 [ a ]))) {*czero.nez.didi.opt2}

The corresponding asm pattern generates
czero.nez x, x, a   ; %0, %2, %1

which implies
"x = (a != 0) ? 0 : a"

clearly not what the pattern wants to do.

Essentially "(a != 0) ? x : a" cannot be expressed with CZERO.nez if X
is not guaranteed to be 0.

However this can be fixed with a small tweak

"x = (a != 0) ? x : a"

   is same as

"x = (a == 0) ? a : x" since middle operand is 0 when a == 0.

which can be expressed with CZERO.eqz

before fix  after fix
-   -
lia5,1  lia5,1
lda4,8(sp)  lda4,8(sp)   # a4 is runtime non zero
czero.nez a0,a4,a5 # a0=0 NOK   czero.eqz a0,a4,a5   # a0=a4!=0 OK

The issue only happens at -O1 as at higher optimization levels, the
whole conditional move gets optimized away.

This fixes 4 testsuite failues in a zicond build:

FAIL: gcc.c-torture/execute/pr60003.c   -O1  execution test
FAIL: gcc.dg/setjmp-3.c execution test
FAIL: gcc.dg/torture/stackalign/setjmp-3.c   -O1  execution test
FAIL: gcc.dg/torture/stackalign/setjmp-3.c   -O1 -fpic execution test

gcc/ChangeLog:
* config/riscv/zicond.md: Fix op2 pattern.

Fixes: 1d5bc3285e8a ("[committed][RISC-V] Fix 20010221-1.c with zicond")
Signed-off-by: Vineet Gupta 
---
changes since v1
   - instead of discarding opt2 pattern, fix the asm
---
 gcc/config/riscv/zicond.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/config/riscv/zicond.md b/gcc/config/riscv/zicond.md
index 4619220ef8ac..1721e1011ea8 100644
--- a/gcc/config/riscv/zicond.md
+++ b/gcc/config/riscv/zicond.md
@@ -60,7 +60,7 @@
   (match_operand:GPR 2 "register_operand" "r")
   (match_operand:GPR 3 "register_operand" "1")))]
   "TARGET_ZICOND && rtx_equal_p (operands[1], operands[3])"
-  "czero.nez\t%0,%2,%1"
+  "czero.eqz\t%0,%2,%1"
 )
 
 ;; Combine creates this form in some cases (particularly the coremark
-- 
2.34.1



Re: [PATCH] RISC-V: zicond: remove bogus opt2 pattern

2023-09-01 Thread Vineet Gupta




On 9/1/23 10:40, Palmer Dabbelt wrote:


Just working through this in email, as there's a lot of 
double-negatives and I managed to screw up my Linux PR this morning so 
I may not be thinking that well...


The docs say "(if_then_else test true-value false-value)".  So in this 
case it's


   test:  (ne (match_operand:X 1 "register_operand" "r") (const_int 0))
   true:  (match_operand:GPR 2 "register_operand" "r")
   false: (match_operand:GPR 3 "register_operand" "1") == 
(match_operand:X 1 "register_operand" "r")


and we're encoding it as

   czero.nez %0,%2,%1

so that's

   rd:  output
   rs1: on-true
   rs2: condition (the value inside the ne in RTL)

That looks correct to me: the instruction's condition source register 
is inside a "(ne ... 0)", but we're doing the cmov.nez so it looks OK.


Yes it is fine, until you end up having both operand 2 and operand 3 
have non-zero values at runtime and somehow match this pattern Then the 
semantics of czero* are not honored.


It might be easier for everyone to understand if you add a specific 
testcase for just the broken codegen.  I'm not having luck 
constructing a small reproducer (though I don't have a clean tree 
lying around, so I might have screwed something up here).


IIUC something like

   long func(long x, long a) {
   if (a != 0)
 return x;
   return 0;
   }

should do it, but I'm getting

   func:
   czero.eqz   a0,a0,a1
   ret


Unfortunately tests any simpler don't trigger it - they code seqs just 
get optimized away - otherwise Jeff would have found this 3 weeks ago ;-)

Just use gcc/testsuite/gcc.c-torture/execute/pr60003.c

Thx,
-Vineet


[PATCH] diagnostics: Delete config pointer before overwriting it.

2023-09-01 Thread Mikael Morin via Gcc-patches
Hello,

this is a fix for a small memory leak in the fortran frontend.
Tested on x86_64-pc-linux-gnu, nothing stands out besides the
apparently well-known guality instability.
OK for master ?

-- >8 --

Delete m_client_data_hooks before it is reassigned in
tree_diagnostics_defaults.  This fixes a small memory leak in the fortran
frontend, which restores the diagnostics configurations to their default
values with a call to tree_diagnostics_defaults at the end of the main parse
hook.

gcc/ChangeLog:

* tree-diagnostic.cc (tree_diagnostics_defaults): Delete allocated
pointer before overwriting it.
---
 gcc/tree-diagnostic.cc | 1 +
 1 file changed, 1 insertion(+)

diff --git a/gcc/tree-diagnostic.cc b/gcc/tree-diagnostic.cc
index 731e3559cd8..d2f6637b6d9 100644
--- a/gcc/tree-diagnostic.cc
+++ b/gcc/tree-diagnostic.cc
@@ -377,5 +377,6 @@ tree_diagnostics_defaults (diagnostic_context *context)
   context->print_path = default_tree_diagnostic_path_printer;
   context->make_json_for_path = default_tree_make_json_for_path;
   context->set_locations_cb = set_inlining_locations;
+  delete context->m_client_data_hooks;
   context->m_client_data_hooks = make_compiler_data_hooks ();
 }
-- 
2.40.1



[PATCH v2] c: don't emit -Wmissing-variable-declarations for register variables [PR110947]

2023-09-01 Thread Hamza Mahfooz
Resolves:
PR c/110947 - Should -Wmissing-variable-declarations not trigger on
register variables?

gcc/c/ChangeLog:

PR c/110947
* c-decl.cc (start_decl): don't emit
-Wmissing-variable-declarations for DECL_REGISTER VAR_DECLs.

gcc/testsuite/ChangeLog:

PR c/110947
* gcc.dg/pr110947.c: New test.

Signed-off-by: Hamza Mahfooz 
---
Please push this for me if you think it looks good. Since, I don't have
write access to the repository.

v2: put "target" before the relevant architectures in pr110947.c.
---
 gcc/c/c-decl.cc | 3 ++-
 gcc/testsuite/gcc.dg/pr110947.c | 4 
 2 files changed, 6 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.dg/pr110947.c

diff --git a/gcc/c/c-decl.cc b/gcc/c/c-decl.cc
index 1f9eb44dbaa..819af6aa050 100644
--- a/gcc/c/c-decl.cc
+++ b/gcc/c/c-decl.cc
@@ -5376,7 +5376,8 @@ start_decl (struct c_declarator *declarator, struct 
c_declspecs *declspecs,
 warning (OPT_Wmain, "%q+D is usually a function", decl);
 
   if (warn_missing_variable_declarations && VAR_P (decl)
-  && !DECL_EXTERNAL (decl) && TREE_PUBLIC (decl) && old_decl == NULL_TREE)
+  && !DECL_EXTERNAL (decl) && !DECL_REGISTER (decl) && TREE_PUBLIC (decl)
+  && old_decl == NULL_TREE)
 warning_at (DECL_SOURCE_LOCATION (decl), 
OPT_Wmissing_variable_declarations,
"no previous declaration for %qD", decl);
 
diff --git a/gcc/testsuite/gcc.dg/pr110947.c b/gcc/testsuite/gcc.dg/pr110947.c
new file mode 100644
index 000..3c0b8a82ab3
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr110947.c
@@ -0,0 +1,4 @@
+/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
+/* { dg-options "-Wmissing-variable-declarations" } */
+
+register unsigned long current_stack_pointer asm("rsp");
-- 
2.41.0



Re: [PATCH] RISC-V: zicond: remove bogus opt2 pattern

2023-09-01 Thread Vineet Gupta



On 9/1/23 06:13, Jeff Law wrote:
I could very well be mistaken, but define_insn is a pattern match and 
opt2 has *ne* so the expression has to be in != form and thus needs 
to work with that condition. No ?

My point was  that

x = (a != 0) ? x : 0

is equivalent to

x = (a == 0) ? 0 : x

You can invert the condition and swap the arms and get the same 
semantics.  Thus if one can be supported, so can the other as they're 
functionally equivalent. 


Ah I see what you mean. Indeed the pattern is fine, it just doesn't map 
to the right asm.

So we certainly need a fix but it could just very well be this:

(define_insn "*czero.nez..opt2"
  [(set (match_operand:GPR 0 "register_operand" "=r")
    (if_then_else:GPR (ne (match_operand:X 1 "register_operand" "r")
  (const_int 0))
  (match_operand:GPR 2 "register_operand" "r")
  (match_operand:GPR 3 "register_operand" "1")))]
  "TARGET_ZICOND && rtx_equal_p (operands[1], operands[3])"
-  "czero.nez\t%0,%2,%1"
}  "czero.eqz\t%0,%2,%1"
)

It may be the at we've goof'd something in handling the inverted case, 
but conceptually we ought to be able to handle both.


Indeed there's a small goof as shown above.



I don't doubt you've got a failure, but it's also the case that I'm 
not seeing the same failure when I turn on zicond and run the 
execute.exp tests.  So clearly there's a difference somewhere in what 
we're doing.


It doesn't show up in execute.exp but as following (perhaps I should add 
that to commit log too).


FAIL: gcc.c-torture/execute/pr60003.c   -O1  execution test
FAIL: gcc.dg/setjmp-3.c execution test
FAIL: gcc.dg/torture/stackalign/setjmp-3.c   -O1  execution test
FAIL: gcc.dg/torture/stackalign/setjmp-3.c   -O1 -fpic execution test




So perhaps we should start with comparing assembly output for the test 
in question.  Can you pass yours along, I'll diff them this afternoon 
and see what we find.


Attached is slightly modified pr60003.c (to differentiate 'X' and 'a') 
and the failing asm and with fix (both the deleted pattern and modified 
pattern produce correct, if slightly different code).


Thx,
-Vineet/* PR tree-optimization/60003 */
/* { dg-require-effective-target indirect_jumps } */

extern void abort (void);

unsigned long long jmp_buf[5];

__attribute__((noinline, noclone)) void
baz (void)
{
  __builtin_longjmp (_buf, 1);
}

void
bar (void)
{
  baz ();
}

__attribute__((noinline, noclone)) int
foo (int x)
{
  int a = 0;

  if (__builtin_setjmp (_buf) == 0)
{
  while (1)
	{
	  a = 1;
	  bar ();  /* OK if baz () instead */
	}
}
  else
{
  if (a == 0)
	return 0;
  else
	return x;
}
}

int
main ()
{
  if (foo (2) == 0)	// orig test has foo (1)
return 1;

  return 0;
}
.file   "pr60003.c"
.option nopic
# GNU C17 (GCC) version 14.0.0 20230830 (experimental) (riscv-unknown-elf)
#   compiled by GNU C version 11.4.0, GMP version 6.1.0, MPFR version 
3.1.4, MPC version 1.0.3, isl version isl-0.18-GMP

# GGC heuristics: --param ggc-min-expand=100 --param ggc-min-heapsize=131072
# options passed: -march=rv64gc_zba_zbb_zbs_zicond -mabi=lp64d -O1
.text
.align  1
.globl  baz
.type   baz, @function
baz:
addisp,sp,-16   #,,
sd  ra,8(sp)#,
sd  s0,0(sp)#,
addis0,sp,16#,,
# pr60003.c:11:   __builtin_longjmp (_buf, 1);
lui a5,%hi(.LANCHOR0)   # tmp135,
addia5,a5,%lo(.LANCHOR0)# tmp134, tmp135,
ld  a4,8(a5)# tmp136,
ld  a3,0(a5)# tmp137,
ld  sp,16(a5)   #,
mv  s0,a3   #, tmp137
jr  a4  # tmp136
.size   baz, .-baz
.align  1
.globl  bar
.type   bar, @function
bar:
addisp,sp,-16   #,,
sd  ra,8(sp)#,
# pr60003.c:17:   baz ();
callbaz #
.size   bar, .-bar
.align  1
.globl  foo
.type   foo, @function
foo:
addisp,sp,-224  #,,
sd  ra,216(sp)  #,
sd  s0,208(sp)  #,
sd  s1,200(sp)  #,
sd  s2,192(sp)  #,
sd  s3,184(sp)  #,
sd  s4,176(sp)  #,
sd  s5,168(sp)  #,
sd  s6,160(sp)  #,
sd  s7,152(sp)  #,
sd  s8,144(sp)  #,
sd  s9,136(sp)  #,
sd  s10,128(sp) #,
sd  s11,120(sp) #,
fsd fs0,104(sp) #,
fsd fs1,96(sp)  #,
fsd fs2,88(sp)  #,
fsd fs3,80(sp)  #,
fsd fs4,72(sp)  #,
fsd fs5,64(sp)  #,
fsd fs6,56(sp)  #,
fsd fs7,48(sp)  #,
fsd fs8,40(sp)  #,
fsd fs9,32(sp)  #,
fsd fs10,24(sp) 

[PATCH] c: don't emit -Wmissing-variable-declarations for register variables [PR110947]

2023-09-01 Thread Hamza Mahfooz
Resolves:
PR c/110947 - Should -Wmissing-variable-declarations not trigger on
register variables?

gcc/c/ChangeLog:

PR c/110947
* c-decl.cc (start_decl): don't emit
-Wmissing-variable-declarations for DECL_REGISTER VAR_DECLs.

gcc/testsuite/ChangeLog:

PR c/110947
* gcc.dg/pr110947.c: New test.

Signed-off-by: Hamza Mahfooz 
---
Please push this for me if you think it looks good. Since, I don't have
write access to the repository.
---
 gcc/c/c-decl.cc | 3 ++-
 gcc/testsuite/gcc.dg/pr110947.c | 4 
 2 files changed, 6 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.dg/pr110947.c

diff --git a/gcc/c/c-decl.cc b/gcc/c/c-decl.cc
index 1f9eb44dbaa..819af6aa050 100644
--- a/gcc/c/c-decl.cc
+++ b/gcc/c/c-decl.cc
@@ -5376,7 +5376,8 @@ start_decl (struct c_declarator *declarator, struct 
c_declspecs *declspecs,
 warning (OPT_Wmain, "%q+D is usually a function", decl);
 
   if (warn_missing_variable_declarations && VAR_P (decl)
-  && !DECL_EXTERNAL (decl) && TREE_PUBLIC (decl) && old_decl == NULL_TREE)
+  && !DECL_EXTERNAL (decl) && !DECL_REGISTER (decl) && TREE_PUBLIC (decl)
+  && old_decl == NULL_TREE)
 warning_at (DECL_SOURCE_LOCATION (decl), 
OPT_Wmissing_variable_declarations,
"no previous declaration for %qD", decl);
 
diff --git a/gcc/testsuite/gcc.dg/pr110947.c b/gcc/testsuite/gcc.dg/pr110947.c
new file mode 100644
index 000..19e38ed4d18
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr110947.c
@@ -0,0 +1,4 @@
+/* { dg-do compile { i?86-*-* target x86_64-*-* } } */
+/* { dg-options "-Wmissing-variable-declarations" } */
+
+register unsigned long current_stack_pointer asm("rsp");
-- 
2.41.0



Re: [PATCH] RISC-V: Add Types to Un-Typed Risc-v Instructions:

2023-09-01 Thread Jeff Law via Gcc-patches




On 8/31/23 11:32, Edwin Lu wrote:

Related Discussion:
https://inbox.sourceware.org/gcc-patches/12fb5088-3f28-0a69-de1e-f387371a5...@gmail.com/

This patch updates the riscv instructions to ensure that no insn is left
without a type attribute. Added new types: "trap" (self explanatory) and "cbo"
(for cache related instructions)

Tested for regressions using rv32/64 multilib for linux/newlib. Also tested
rv32/64 gcv for linux.

gcc/Changelog:

* config/riscv/riscv.md: Update/Add types

OK.

jeff


Re: [PATCH] RISC-V: zicond: remove bogus opt2 pattern

2023-09-01 Thread Palmer Dabbelt

On Thu, 31 Aug 2023 10:57:52 PDT (-0700), Vineet Gupta wrote:



On 8/31/23 06:51, Jeff Law wrote:



On 8/30/23 15:57, Vineet Gupta wrote:

This was tripping up gcc.c-torture/execute/pr60003.c at -O1 since the
pattern semantics can't be expressed by zicond instructions.

This involves test code snippet:

   if (a == 0)
return 0;
   else
return x;
 }

which is equivalent to:  "x = (a != 0) ? x : a"

Isn't it

x = (a == 0) ? 0 : x

Which seems like it ought to fit zicond just fine.


Logically they are equivalent, but 



If we take yours;

x = (a != 0) ? x : a

And simplify with the known value of a on the false arm we get:

x = (a != 0 ) ? x : 0;

Which is equivalent to

x = (a == 0) ? 0 : x;

So ISTM this does fit zicond just fine.


I could very well be mistaken, but define_insn is a pattern match and
opt2 has *ne* so the expression has to be in != form and thus needs to
work with that condition. No ?


and matches define_insn "*czero.nez..opt2"

| (insn 41 20 38 3 (set (reg/v:DI 136 [ x ])
|    (if_then_else:DI (ne (reg/v:DI 134 [ a ])
|    (const_int 0 [0]))
|    (reg/v:DI 136 [ x ])
|    (reg/v:DI 134 [ a ]))) {*czero.nez.didi.opt2}

The corresponding asm pattern generates
 czero.nez x, x, a   ; %0, %2, %1
implying
 "x = (a != 0) ? 0 : a"

I get this from the RTL pattern:

x = (a != 0) ? x : a
x = (a != 0) ? x : 0


This is the issue, for ne, czero.nez can only express
x = (a != 0) ? 0 : x



I think you got the arms reversed.


Just working through this in email, as there's a lot of 
double-negatives and I managed to screw up my Linux PR this morning so I 
may not be thinking that well...


The docs say "(if_then_else test true-value false-value)".  So in this 
case it's


   test:  (ne (match_operand:X 1 "register_operand" "r") (const_int 0))
   true:  (match_operand:GPR 2 "register_operand" "r")
   false: (match_operand:GPR 3 "register_operand" "1") == (match_operand:X 1 
"register_operand" "r")

and we're encoding it as

   czero.nez %0,%2,%1

so that's

   rd:  output
   rs1: on-true
   rs2: condition (the value inside the ne in RTL)

That looks correct to me: the instruction's condition source register is 
inside a "(ne ... 0)", but we're doing the cmov.nez so it looks OK.


The rest of the zero juggling looks sane as well -- I'm not sure if the 
X vs GPR mismatch will confuse something else, but it should be caught 
by the rtx_equal_p() and thus should at least be safe.



What I meant was czero.nez as specified in RTL pattern would generate x
= (a != 0) ? 0 : a, whereas pattern's desired semantics is (a != 0) ? x : 0
And that is a problem because after all equivalents/simplifications, a
ternary operation's middle operand has to be zero to map to czero*, but
it doesn't for the opt2 RTL semantics.

I've sat on this for 2 days, trying to convince myself I was wrong, but
as it stands, it was generating wrong code in the test which is fixed
after the patch.


It might be easier for everyone to understand if you add a specific 
testcase for just the broken codegen.  I'm not having luck constructing 
a small reproducer (though I don't have a clean tree lying around, so I 
might have screwed something up here).


IIUC something like

   long func(long x, long a) {
   if (a != 0)
 return x;
   return 0;
   }

should do it, but I'm getting

   func:
   czero.eqz   a0,a0,a1
   ret

which looks right to me -- though it's not triggering this pattern, so 
not sure that means much.




Thx,
-Vineet


Re: [PATCH] c++: Move consteval folding to cp_fold_r

2023-09-01 Thread Marek Polacek via Gcc-patches
On Fri, Sep 01, 2023 at 01:23:48PM -0400, Marek Polacek via Gcc-patches wrote:
> --- a/gcc/cp/cp-gimplify.cc
> +++ b/gcc/cp/cp-gimplify.cc
[...]
>  case ADDR_EXPR:
>if (TREE_CODE (TREE_OPERAND (stmt, 0)) == FUNCTION_DECL
> -   && DECL_IMMEDIATE_FUNCTION_P (TREE_OPERAND (stmt, 0)))
> +   && DECL_IMMEDIATE_FUNCTION_P (TREE_OPERAND (stmt, 0))
> +   && !in_immediate_context ())

This hunk isn't actually necessary.  I'm happy to drop it.  Or add the
in_immediate_context check into case PTRMEM_CST too.

Marek



[PATCH 2/2] VR-VALUES: Rewrite test_for_singularity using range_op_handler

2023-09-01 Thread Andrew Pinski via Gcc-patches
So it turns out there was a simplier way of starting to
improve VRP to start to fix PR 110131, PR 108360, and PR 108397.
That was rewrite test_for_singularity to use range_op_handler
and Value_Range.

This patch implements that and

OK? Bootstrapped and tested on x86_64-linux-gnu with no regressions.

gcc/ChangeLog:

* vr-values.cc (test_for_singularity): Add edge argument
and rewrite using range_op_handler.
(simplify_compare_using_range_pairs): Use Value_Range
instead of value_range and update test_for_singularity call.

gcc/testsuite/ChangeLog:

* gcc.dg/tree-ssa/vrp124.c: New test.
* gcc.dg/tree-ssa/vrp125.c: New test.
---
 gcc/testsuite/gcc.dg/tree-ssa/vrp124.c | 44 
 gcc/testsuite/gcc.dg/tree-ssa/vrp125.c | 44 
 gcc/vr-values.cc   | 99 --
 3 files changed, 117 insertions(+), 70 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/vrp124.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/vrp125.c

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/vrp124.c 
b/gcc/testsuite/gcc.dg/tree-ssa/vrp124.c
new file mode 100644
index 000..6ccbda35d1b
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/vrp124.c
@@ -0,0 +1,44 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+/* Should be optimized to a == -100 */
+int g(int a)
+{
+  if (a == -100 || a >= 0)
+;
+  else
+return 0;
+  return a < 0;
+}
+
+/* Should optimize to a == 0 */
+int f(int a)
+{
+  if (a == 0 || a > 100)
+;
+  else
+return 0;
+  return a < 50;
+}
+
+/* Should be optimized to a == 0. */
+int f2(int a)
+{
+  if (a == 0 || a > 100)
+;
+  else
+return 0;
+  return a < 100;
+}
+
+/* Should optimize to a == 100 */
+int f1(int a)
+{
+  if (a < 0 || a == 100)
+;
+  else
+return 0;
+  return a > 50;
+}
+
+/* { dg-final { scan-tree-dump-not "goto " "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/vrp125.c 
b/gcc/testsuite/gcc.dg/tree-ssa/vrp125.c
new file mode 100644
index 000..f6c2f8e35f1
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/vrp125.c
@@ -0,0 +1,44 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+/* Should be optimized to a == -100 */
+int g(int a)
+{
+  if (a == -100 || a == -50 || a >= 0)
+;
+  else
+return 0;
+  return a < -50;
+}
+
+/* Should optimize to a == 0 */
+int f(int a)
+{
+  if (a == 0 || a == 50 || a > 100)
+;
+  else
+return 0;
+  return a < 50;
+}
+
+/* Should be optimized to a == 0. */
+int f2(int a)
+{
+  if (a == 0 || a == 50 || a > 100)
+;
+  else
+return 0;
+  return a < 25;
+}
+
+/* Should optimize to a == 100 */
+int f1(int a)
+{
+  if (a < 0 || a == 50 || a == 100)
+;
+  else
+return 0;
+  return a > 50;
+}
+
+/* { dg-final { scan-tree-dump-not "goto " "optimized" } } */
diff --git a/gcc/vr-values.cc b/gcc/vr-values.cc
index 52ab4fe6109..2474e57ee90 100644
--- a/gcc/vr-values.cc
+++ b/gcc/vr-values.cc
@@ -904,69 +904,33 @@ simplify_using_ranges::simplify_bit_ops_using_ranges
 }
 
 /* We are comparing trees OP1 and OP2 using COND_CODE.  OP1 has
-   a known value range VR.
+   a known value range OP1_RANGE.
 
If there is one and only one value which will satisfy the
-   conditional, then return that value.  Else return NULL.
-
-   If signed overflow must be undefined for the value to satisfy
-   the conditional, then set *STRICT_OVERFLOW_P to true.  */
+   conditional on the EDGE, then return that value.
+   Else return NULL.  */
 
 static tree
 test_for_singularity (enum tree_code cond_code, tree op1,
- tree op2, const value_range *vr)
+ tree op2, const int_range_max _range, bool edge)
 {
-  tree min = NULL;
-  tree max = NULL;
-
-  /* Extract minimum/maximum values which satisfy the conditional as it was
- written.  */
-  if (cond_code == LE_EXPR || cond_code == LT_EXPR)
+  /* This is already a singularity.  */
+  if (cond_code == NE_EXPR || cond_code == EQ_EXPR)
+return NULL;
+  auto range_op = range_op_handler (cond_code);
+  wide_int w = wi::to_wide (op2);
+  int_range<1> op2_range (TREE_TYPE (op2), w, w);
+  int_range_max vr;
+  if (range_op.op1_range (vr, TREE_TYPE (op1),
+ edge ? range_true () : range_false (),
+ op2_range))
 {
-  min = TYPE_MIN_VALUE (TREE_TYPE (op1));
-
-  max = op2;
-  if (cond_code == LT_EXPR)
-   {
- tree one = build_int_cst (TREE_TYPE (op1), 1);
- max = fold_build2 (MINUS_EXPR, TREE_TYPE (op1), max, one);
- /* Signal to compare_values_warnv this expr doesn't overflow.  */
- if (EXPR_P (max))
-   suppress_warning (max, OPT_Woverflow);
-   }
-}
-  else if (cond_code == GE_EXPR || cond_code == GT_EXPR)
-{
-  max = TYPE_MAX_VALUE (TREE_TYPE (op1));
-
-  min = op2;
-  if (cond_code == GT_EXPR)
-   {
- tree one = build_int_cst 

[PATCH 1/2] VR-VALUES: Rename op0/op1 to op1/op2 for test_for_singularity

2023-09-01 Thread Andrew Pinski via Gcc-patches
As requested and make easier to understand with the new ranger
code, rename the arguments op0/op1 to op1/op2.

OK? Bootstrapped and tested on x86_64-linux-gnu with no regressions

gcc/ChangeLog:

* vr-values.cc (test_for_singularity): Rename
arguments op0/op1 to op1/op2.
---
 gcc/vr-values.cc | 26 +-
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/gcc/vr-values.cc b/gcc/vr-values.cc
index a4fddd62841..52ab4fe6109 100644
--- a/gcc/vr-values.cc
+++ b/gcc/vr-values.cc
@@ -903,7 +903,7 @@ simplify_using_ranges::simplify_bit_ops_using_ranges
   return true;
 }
 
-/* We are comparing trees OP0 and OP1 using COND_CODE.  OP0 has
+/* We are comparing trees OP1 and OP2 using COND_CODE.  OP1 has
a known value range VR.
 
If there is one and only one value which will satisfy the
@@ -913,8 +913,8 @@ simplify_using_ranges::simplify_bit_ops_using_ranges
the conditional, then set *STRICT_OVERFLOW_P to true.  */
 
 static tree
-test_for_singularity (enum tree_code cond_code, tree op0,
- tree op1, const value_range *vr)
+test_for_singularity (enum tree_code cond_code, tree op1,
+ tree op2, const value_range *vr)
 {
   tree min = NULL;
   tree max = NULL;
@@ -923,13 +923,13 @@ test_for_singularity (enum tree_code cond_code, tree op0,
  written.  */
   if (cond_code == LE_EXPR || cond_code == LT_EXPR)
 {
-  min = TYPE_MIN_VALUE (TREE_TYPE (op0));
+  min = TYPE_MIN_VALUE (TREE_TYPE (op1));
 
-  max = op1;
+  max = op2;
   if (cond_code == LT_EXPR)
{
- tree one = build_int_cst (TREE_TYPE (op0), 1);
- max = fold_build2 (MINUS_EXPR, TREE_TYPE (op0), max, one);
+ tree one = build_int_cst (TREE_TYPE (op1), 1);
+ max = fold_build2 (MINUS_EXPR, TREE_TYPE (op1), max, one);
  /* Signal to compare_values_warnv this expr doesn't overflow.  */
  if (EXPR_P (max))
suppress_warning (max, OPT_Woverflow);
@@ -937,13 +937,13 @@ test_for_singularity (enum tree_code cond_code, tree op0,
 }
   else if (cond_code == GE_EXPR || cond_code == GT_EXPR)
 {
-  max = TYPE_MAX_VALUE (TREE_TYPE (op0));
+  max = TYPE_MAX_VALUE (TREE_TYPE (op1));
 
-  min = op1;
+  min = op2;
   if (cond_code == GT_EXPR)
{
- tree one = build_int_cst (TREE_TYPE (op0), 1);
- min = fold_build2 (PLUS_EXPR, TREE_TYPE (op0), min, one);
+ tree one = build_int_cst (TREE_TYPE (op1), 1);
+ min = fold_build2 (PLUS_EXPR, TREE_TYPE (op1), min, one);
  /* Signal to compare_values_warnv this expr doesn't overflow.  */
  if (EXPR_P (min))
suppress_warning (min, OPT_Woverflow);
@@ -951,10 +951,10 @@ test_for_singularity (enum tree_code cond_code, tree op0,
 }
 
   /* Now refine the minimum and maximum values using any
- value range information we have for op0.  */
+ value range information we have for op1.  */
   if (min && max)
 {
-  tree type = TREE_TYPE (op0);
+  tree type = TREE_TYPE (op1);
   tree tmin = wide_int_to_tree (type, vr->lower_bound ());
   tree tmax = wide_int_to_tree (type, vr->upper_bound ());
   if (compare_values (tmin, min) == 1)
-- 
2.31.1



[PATCH] c++: Move consteval folding to cp_fold_r

2023-09-01 Thread Marek Polacek via Gcc-patches
Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?

-- >8 --

In the review of P2564:

it turned out that in order to correctly handle an example in the paper,
we should stop doing immediate evaluation in build_over_call and
bot_replace, and instead do it in cp_fold_r.  This patch does that.

Another benefit is that this is a pretty significant simplification, at
least in my opinion.  Also, this fixes the c++/110997 ICE (but the test
doesn't compile yet).

The main drawback seems to be that cp_fold_r doesn't process as much
code as we did before: uninstantiated templates and things like
"false ? foo () : 1".

You'll see that I've reintroduced ADDR_EXPR_DENOTES_CALL_P here.  This
is to detect

  *()) ()
  (s.*::foo) ()

which were deemed ill-formed.

gcc/cp/ChangeLog:

* call.cc (in_immediate_context): No longer static.
(build_over_call): Set ADDR_EXPR_DENOTES_CALL_P.  Don't handle
immediate_invocation_p here.
* constexpr.cc (cxx_eval_call_expression): Use mce_true for
immediate_invocation_p.
* cp-gimplify.cc (cp_fold_r): Expand immediate invocations.
* cp-tree.h (ADDR_EXPR_DENOTES_CALL_P): Define.
(immediate_invocation_p): Declare.
* tree.cc (bot_replace): Don't handle immediate invocations here.

gcc/testsuite/ChangeLog:

* g++.dg/cpp23/consteval-if2.C: Add xfail.
* g++.dg/cpp2a/consteval-memfn1.C: Adjust.
* g++.dg/cpp2a/consteval11.C: Remove dg-message.
* g++.dg/cpp2a/consteval3.C: Remove dg-message and dg-error.
* g++.dg/cpp2a/consteval9.C: Remove dg-message.
* g++.dg/cpp2a/consteval32.C: New test.
* g++.dg/cpp2a/consteval33.C: New test.

libstdc++-v3/ChangeLog:

* testsuite/20_util/allocator/105975.cc: Add dg-error.
---
 gcc/cp/call.cc| 42 +++
 gcc/cp/constexpr.cc   |  5 +++
 gcc/cp/cp-gimplify.cc | 14 ++-
 gcc/cp/cp-tree.h  |  6 +++
 gcc/cp/tree.cc| 23 +-
 gcc/testsuite/g++.dg/cpp23/consteval-if2.C|  2 +-
 gcc/testsuite/g++.dg/cpp2a/consteval-memfn1.C |  7 
 gcc/testsuite/g++.dg/cpp2a/consteval11.C  | 37 
 gcc/testsuite/g++.dg/cpp2a/consteval3.C   |  3 +-
 gcc/testsuite/g++.dg/cpp2a/consteval32.C  |  4 ++
 gcc/testsuite/g++.dg/cpp2a/consteval33.C  | 34 +++
 gcc/testsuite/g++.dg/cpp2a/consteval9.C   |  2 +-
 .../testsuite/20_util/allocator/105975.cc |  2 +-
 13 files changed, 100 insertions(+), 81 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/cpp2a/consteval32.C
 create mode 100644 gcc/testsuite/g++.dg/cpp2a/consteval33.C

diff --git a/gcc/cp/call.cc b/gcc/cp/call.cc
index 40d9fdc0516..abdbc8fff8c 100644
--- a/gcc/cp/call.cc
+++ b/gcc/cp/call.cc
@@ -9763,7 +9763,7 @@ in_immediate_context ()
 /* Return true if a call to FN with number of arguments NARGS
is an immediate invocation.  */
 
-static bool
+bool
 immediate_invocation_p (tree fn)
 {
   return (TREE_CODE (fn) == FUNCTION_DECL
@@ -10471,6 +10471,10 @@ build_over_call (struct z_candidate *cand, int flags, 
tsubst_flags_t complain)
   fn = build_addr_func (fn, complain);
   if (fn == error_mark_node)
return error_mark_node;
+
+  /* We're actually invoking the function.  (Immediate functions get an
+& when invoking it even though the user didn't use &.)  */
+  ADDR_EXPR_DENOTES_CALL_P (fn) = true;
 }
 
   tree call = build_cxx_call (fn, nargs, argarray, complain|decltype_flag);
@@ -10488,41 +10492,7 @@ build_over_call (struct z_candidate *cand, int flags, 
tsubst_flags_t complain)
   if (TREE_CODE (c) == CALL_EXPR)
suppress_warning (c /* Suppress all warnings.  */);
 }
-  if (TREE_CODE (fn) == ADDR_EXPR)
-{
-  tree fndecl = STRIP_TEMPLATE (TREE_OPERAND (fn, 0));
-  if (immediate_invocation_p (fndecl))
-   {
- tree obj_arg = NULL_TREE;
- /* Undo convert_from_reference called by build_cxx_call.  */
- if (REFERENCE_REF_P (call))
-   call = TREE_OPERAND (call, 0);
- if (DECL_CONSTRUCTOR_P (fndecl))
-   obj_arg = cand->first_arg ? cand->first_arg : (*args)[0];
- if (obj_arg && is_dummy_object (obj_arg))
-   {
- call = build_cplus_new (DECL_CONTEXT (fndecl), call, complain);
- obj_arg = NULL_TREE;
-   }
- /* Look through *(const T *)  */
- else if (obj_arg && INDIRECT_REF_P (obj_arg))
-   {
- tree addr = TREE_OPERAND (obj_arg, 0);
- STRIP_NOPS (addr);
- if (TREE_CODE (addr) == ADDR_EXPR)
-   {
- tree typeo = TREE_TYPE (obj_arg);
- tree typei = TREE_TYPE (TREE_OPERAND (addr, 0));
- if (same_type_ignoring_top_level_qualifiers_p 

[committed] libstdc++: Fix debug-mode tests for constexpr algorithms

2023-09-01 Thread Jonathan Wakely via Gcc-patches
Tested x86_64-linux. Pushed to trunk.

-- >8 --

These tests started failing at some point:
FAIL: 25_algorithms/copy/debug/constexpr_neg.cc  (test for errors, line 49)
FAIL: 25_algorithms/copy/debug/constexpr_neg.cc (test for excess errors)
FAIL: 25_algorithms/equal/debug/constexpr_neg.cc  (test for errors, line 47)
FAIL: 25_algorithms/equal/debug/constexpr_neg.cc (test for excess errors)

They only run with -D_GLIBCXX_DEBUG or make check-debug so seem to have
gone unnoticed until now.

libstdc++-v3/ChangeLog:

* testsuite/25_algorithms/copy/debug/constexpr_neg.cc: Adjust
expected errors.
* testsuite/25_algorithms/equal/debug/constexpr_neg.cc:
Likewise.
---
 .../25_algorithms/copy/debug/constexpr_neg.cc  |  8 +++-
 .../25_algorithms/equal/debug/constexpr_neg.cc | 10 --
 2 files changed, 7 insertions(+), 11 deletions(-)

diff --git a/libstdc++-v3/testsuite/25_algorithms/copy/debug/constexpr_neg.cc 
b/libstdc++-v3/testsuite/25_algorithms/copy/debug/constexpr_neg.cc
index 6981c470666..bf3c4939bfb 100644
--- a/libstdc++-v3/testsuite/25_algorithms/copy/debug/constexpr_neg.cc
+++ b/libstdc++-v3/testsuite/25_algorithms/copy/debug/constexpr_neg.cc
@@ -34,6 +34,7 @@ test1()
 }
 
 static_assert(test1()); // { dg-error "non-constant condition" }
+// { dg-error "builtin_unreachable" "" { target *-*-* } 0 }
 
 constexpr bool
 test2()
@@ -46,8 +47,5 @@ test2()
   return out6 == ma0.begin() + 18;
 }
 
-static_assert(test2()); // { dg-error "is outside the bounds" }
-
-// { dg-prune-output "in 'constexpr' expansion" }
-// { dg-prune-output "builtin_unreachable" }
-// { dg-prune-output "non-constant condition" }
+static_assert(test2()); // { dg-error "non-constant condition" }
+// { dg-error "is outside the bounds" "" { target *-*-* } 0 }
diff --git a/libstdc++-v3/testsuite/25_algorithms/equal/debug/constexpr_neg.cc 
b/libstdc++-v3/testsuite/25_algorithms/equal/debug/constexpr_neg.cc
index bb613bef03b..f5e46e58e49 100644
--- a/libstdc++-v3/testsuite/25_algorithms/equal/debug/constexpr_neg.cc
+++ b/libstdc++-v3/testsuite/25_algorithms/equal/debug/constexpr_neg.cc
@@ -32,7 +32,8 @@ test01()
   return outa;
 }
 
-static_assert(test01()); // { dg-error }
+static_assert(test01()); // { dg-error "non-constant condition" }
+// { dg-error "builtin_unreachable" "" { target *-*-* } 0 }
 
 constexpr bool
 test02()
@@ -44,8 +45,5 @@ test02()
   return outa;
 }
 
-static_assert(test02()); // { dg-error "outside the bounds" }
-
-// { dg-prune-output "non-constant condition" }
-// { dg-prune-output "in 'constexpr'" }
-// { dg-prune-output "builtin_unreachable" }
+static_assert(test02()); // { dg-error "non-constant condition" }
+// { dg-error "is outside the bounds" "" { target *-*-* } 0 }
-- 
2.41.0



[committed] libstdc++: Add -Wno-self-move to two filesystem tests

2023-09-01 Thread Jonathan Wakely via Gcc-patches
Tested x86_64-linux. Pushed to trunk.

-- >8 --

libstdc++-v3/ChangeLog:

* testsuite/27_io/filesystem/iterators/91067.cc: Add
-Wno-self-move to options.
* testsuite/27_io/filesystem/path/assign/copy.cc: Likewise.
---
 libstdc++-v3/testsuite/27_io/filesystem/iterators/91067.cc  | 1 +
 libstdc++-v3/testsuite/27_io/filesystem/path/assign/copy.cc | 1 +
 2 files changed, 2 insertions(+)

diff --git a/libstdc++-v3/testsuite/27_io/filesystem/iterators/91067.cc 
b/libstdc++-v3/testsuite/27_io/filesystem/iterators/91067.cc
index b960ee7c798..2bf1e081c25 100644
--- a/libstdc++-v3/testsuite/27_io/filesystem/iterators/91067.cc
+++ b/libstdc++-v3/testsuite/27_io/filesystem/iterators/91067.cc
@@ -17,6 +17,7 @@
 
 // { dg-do link { target c++17 } }
 // { dg-require-filesystem-ts "" }
+// { dg-options "-Wno-self-move" }
 
 #include 
 
diff --git a/libstdc++-v3/testsuite/27_io/filesystem/path/assign/copy.cc 
b/libstdc++-v3/testsuite/27_io/filesystem/path/assign/copy.cc
index dc147b1cf3b..6ec347531bc 100644
--- a/libstdc++-v3/testsuite/27_io/filesystem/path/assign/copy.cc
+++ b/libstdc++-v3/testsuite/27_io/filesystem/path/assign/copy.cc
@@ -1,4 +1,5 @@
 // { dg-do run { target c++17 } }
+// { dg-options "-Wno-self-move" }
 
 // Copyright (C) 2014-2023 Free Software Foundation, Inc.
 //
-- 
2.41.0



Re: [PATCH] libstdc++: fix memory clobbering in std::vector [PR110879]

2023-09-01 Thread Jonathan Wakely via Gcc-patches
At Marek and Jason's suggestion I've moved the new test to a subdir:

   c++: Move new test to 'opt' sub-directory

   gcc/testsuite/ChangeLog:

   * g++.dg/pr110879.C: Moved to...
   * g++.dg/opt/pr110879.C: ...here.



Re: [RFC] libstdc++: Make --enable-libstdcxx-backtrace=auto default to yes

2023-09-01 Thread Jonathan Wakely via Gcc-patches
On Fri, 1 Sept 2023 at 12:16, Jonathan Wakely  wrote:
>
> On Wed, 23 Aug 2023 at 17:03, Jonathan Wakely via Libstdc++
>  wrote:
> >
> > Any objections to this? It's a C++23 feture, so should be enabled by
> > default.
>
> I've pushed this to trunk, so let's see what breaks!

This modules header broke on aarch64, of course:
FAIL: g++.dg/modules/xtreme-header_b.C -std=c++2b (test for excess errors)

>
>
> >
> > -- >8 --
> >
> > This causes libstdc++_libbacktrace.a to be built by default. This might
> > fail on some targets, in which case we can make the 'auto' choice expand
> > to either 'yes' or 'no' depending on the target.
> >
> > libstdc++-v3/ChangeLog:
> >
> > * acinclude.m4 (GLIBCXX_ENABLE_BACKTRACE): Default to yes.
> > * configure: Regenerate.
> > ---
> >  libstdc++-v3/acinclude.m4 | 2 +-
> >  libstdc++-v3/configure| 2 +-
> >  2 files changed, 2 insertions(+), 2 deletions(-)
> >
> > diff --git a/libstdc++-v3/acinclude.m4 b/libstdc++-v3/acinclude.m4
> > index b25378eaace..50c808c6b2d 100644
> > --- a/libstdc++-v3/acinclude.m4
> > +++ b/libstdc++-v3/acinclude.m4
> > @@ -5481,7 +5481,7 @@ BACKTRACE_CPPFLAGS="$BACKTRACE_CPPFLAGS 
> > -DBACKTRACE_ELF_SIZE=$elfsize"
> >
> >AC_MSG_CHECKING([whether to build libbacktrace support])
> >if test "$enable_libstdcxx_backtrace" = "auto"; then
> > -enable_libstdcxx_backtrace=no
> > +enable_libstdcxx_backtrace=yes
> >fi
> >AC_MSG_RESULT($enable_libstdcxx_backtrace)
> >if test "$enable_libstdcxx_backtrace" = "yes"; then
> >



Re: [PATCH] analyzer: Add support of placement new and improved operator new [PR105948,PR94355]

2023-09-01 Thread David Malcolm via Gcc-patches
On Fri, 2023-09-01 at 16:48 +0200, Benjamin Priour wrote:
> Patch has been updated as per your suggestions and successfully
> regstrapped
> on x86_64-linux-gnu.
> 
> call_details::maybe_get_arg_region is now
> /* If argument IDX's svalue at the callsite is of pointer type,
>     return the region it points to.
>     Otherwise return NULL.  */
> 
> const region *
>  call_details::deref_ptr_arg (unsigned idx) const
>  {
>    const svalue *ptr_sval = get_arg_svalue (idx);
>    return m_model->deref_rvalue (ptr_sval, get_arg_tree (idx),
> m_ctxt);
>  }
> 
> 
> New test is
> 
> +
> +void test_binop ()
> +{
> +  char *p = (char *) malloc (4);
> +  if (!p)
> +    return;
> +  int32_t *i = ::new (p + 1) int32_t; /* { dg-warning "heap-based
> buffer
> overflow" } */
> +  *i = 42; /* { dg-warning "heap-based buffer overflow" } */
> +  free (p);
> +}
> 
> Is it OK for trunk ?
> I didn't resend the whole patch as it otherwise was OK.

Yes, thanks.

Dave



Re: [committed] libstdc++: Fix compare_exchange_padding.cc test for std::atomic_ref

2023-09-01 Thread Jonathan Wakely via Gcc-patches
On Mon, 31 Oct 2022 at 15:34, Eric Botcazou wrote:
>
> > The test was only failing for me with -m32 (and not -m64), so I didn't
> > notice until now. That probably means we should make the test fail more
> > reliably if the padding isn't being cleared.
>
> The tests fail randomly for me on SPARC64/Linux:
>
> FAIL: 29_atomics/atomic/compare_exchange_padding.cc execution test
> FAIL: 29_atomics/atomic_ref/compare_exchange_padding.cc execution test
>
> /home/ebotcazou/src/libstdc++-v3/testsuite/29_atomics/atomic_ref/
> compare_exchange_padding.cc:34: int main(): Assertion 'compare_struct(ts, es)'
> failed.
> FAIL: 29_atomics/atomic_ref/compare_exchange_padding.cc execution test
>
>   std::atomic as{ s };
>   auto ts = as.load();
>   VERIFY( !compare_struct(s, ts) ); // padding cleared on construction
>   as.exchange(s);
>   auto es = as.load();
>   VERIFY( compare_struct(ts, es) ); // padding cleared on exchange
>
> How is it supposed to pass exactly?  AFAICS you have no control on the padding
> bits of ts or es and, indeed, at -O2 the loads are scalarized:
>
>   __buf$c_81 = MEM[(struct S *)&__buf].c;
>   __buf$s_59 = MEM[(struct S *)&__buf].s;
>   __buf ={v} {CLOBBER(eol)};
>   ts.c = __buf$c_81;
>   ts.s = __buf$s_59;
> [...]
>   __buf$c_100 = MEM[(struct S *)&__buf].c;
>   __buf$s_35 = MEM[(struct S *)&__buf].s;
>   __buf ={v} {CLOBBER(eol)};
>   es.c = __buf$c_100;
>   es.s = __buf$s_35;
>   _66 = MEM  [(char * {ref-all})];
>   _101 = MEM  [(char * {ref-all})];
>   if (_66 != _101)
> goto ; [0.04%]
>   else
> goto ; [99.96%]
>
> so the result of the 4-byte comparison is random.

This should be fixed now. I rewrote the test to check the padding byte
directly, instead of inspecting a copy of it which might not preserve
the padding bits.



Re: [PATCH]AArch64 xorsign: Fix scalar xorsign lowering

2023-09-01 Thread Richard Sandiford via Gcc-patches
Tamar Christina  writes:
>> -Original Message-
>> From: Richard Sandiford 
>> Sent: Friday, September 1, 2023 2:36 PM
>> To: Tamar Christina 
>> Cc: gcc-patches@gcc.gnu.org; nd ; Richard Earnshaw
>> ; Marcus Shawcroft
>> ; Kyrylo Tkachov 
>> Subject: Re: [PATCH]AArch64 xorsign: Fix scalar xorsign lowering
>> 
>> Tamar Christina  writes:
>> > Hi All,
>> >
>> > In GCC-9 our scalar xorsign pattern broke and we didn't notice it
>> > because the testcase was not strong enough.  With this commit
>> >
>> > 8d2d39587d941a40f25ea0144cceb677df115040 is the first bad commit
>> > commit 8d2d39587d941a40f25ea0144cceb677df115040
>> > Author: Segher Boessenkool 
>> > Date:   Mon Oct 22 22:23:39 2018 +0200
>> >
>> > combine: Do not combine moves from hard registers
>> >
>> > combine started introducing useless moves on hard registers,  when one
>> > of the arguments to our scalar xorsign is a hardreg we get an additional 
>> > move
>> inserted.
>> >
>> > This leads to combine forming an AND with the immediate inside and
>> > using the superflous move to do the r->w move, instead of what we
>> > wanted before which was for the `and` to be a vector and and have reload
>> pick the right alternative.
>> 
>> IMO, the xorsign optab ought to go away.  IIRC it was just a stop-gap measure
>> that (like most stop-gap measures) never got cleaned up later.
>> 
>> But that's not important now. :)
>> 
>> > To fix this the patch just forces the use of the vector version
>> > directly and so combine has no chance to mess it up.
>> >
>> > Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
>> >
>> > Ok for master?
>> >
>> > Thanks,
>> > Tamar
>> >
>> > gcc/ChangeLog:
>> >
>> >* config/aarch64/aarch64-simd.md (xorsign3): Renamed to..
>> >(@xorsign3): ...This.
>> >* config/aarch64/aarch64.md (xorsign3): Renamed to...
>> >(@xorsign3): ..This and emit vectors directly
>> >* config/aarch64/iterators.md (VCONQ): Add SF and DF.
>> >
>> > gcc/testsuite/ChangeLog:
>> >
>> >* gcc.target/aarch64/xorsign.c:
>> >
>> > --- inline copy of patch --
>> > diff --git a/gcc/config/aarch64/aarch64-simd.md
>> > b/gcc/config/aarch64/aarch64-simd.md
>> > index
>> >
>> f67eb70577d0c2d9911d8c867d38a4d0b390337c..e955691f1be8830efacc2
>> 3746511
>> > 9764ce2a4942 100644
>> > --- a/gcc/config/aarch64/aarch64-simd.md
>> > +++ b/gcc/config/aarch64/aarch64-simd.md
>> > @@ -500,7 +500,7 @@ (define_expand "ctz2"
>> >}
>> >  )
>> >
>> > -(define_expand "xorsign3"
>> > +(define_expand "@xorsign3"
>> >[(match_operand:VHSDF 0 "register_operand")
>> > (match_operand:VHSDF 1 "register_operand")
>> > (match_operand:VHSDF 2 "register_operand")] diff --git
>> > a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index
>> >
>> 01cf989641fce8e6c3828f6cfef62e101c4142df..9db82347bf891f9bc40aede
>> cdc84
>> > 62c94bf1a769 100644
>> > --- a/gcc/config/aarch64/aarch64.md
>> > +++ b/gcc/config/aarch64/aarch64.md
>> > @@ -6953,31 +6953,20 @@ (define_insn "copysign3_insn"
>> >  ;; EOR   v0.8B, v0.8B, v3.8B
>> >  ;;
>> >
>> > -(define_expand "xorsign3"
>> > +(define_expand "@xorsign3"
>> >[(match_operand:GPF 0 "register_operand")
>> > (match_operand:GPF 1 "register_operand")
>> > (match_operand:GPF 2 "register_operand")]
>> >"TARGET_SIMD"
>> >  {
>> > -
>> > -  machine_mode imode = mode;
>> > -  rtx mask = gen_reg_rtx (imode);
>> > -  rtx op1x = gen_reg_rtx (imode);
>> > -  rtx op2x = gen_reg_rtx (imode);
>> > -
>> > -  int bits = GET_MODE_BITSIZE (mode) - 1;
>> > -  emit_move_insn (mask, GEN_INT (trunc_int_for_mode
>> (HOST_WIDE_INT_M1U << bits,
>> > -   imode)));
>> > -
>> > -  emit_insn (gen_and3 (op2x, mask,
>> > -  lowpart_subreg (imode, operands[2],
>> > -  mode)));
>> > -  emit_insn (gen_xor3 (op1x,
>> > -  lowpart_subreg (imode, operands[1],
>> > -  mode),
>> > -  op2x));
>> > +  rtx tmp = gen_reg_rtx (mode);  rtx op1 = gen_reg_rtx
>> > + (mode);  rtx op2 = gen_reg_rtx (mode);
>> emit_move_insn
>> > + (op1, lowpart_subreg (mode, operands[1], mode));
>> > + emit_move_insn (op2, lowpart_subreg (mode, operands[2],
>> > + mode));  emit_insn (gen_xorsign3(mode, tmp, op1,
>> op2));
>> 
>> Do we need the extra moves into op1 and op2?  I would have expected the
>> subregs to be acceptable as direct operands of the xorsign3.  Making them
>> direct operands should be better, since there's then less risk of having the
>> same value live in different registers at the same time.
>> 
>
> That was the first thing I tried but it doesn't work because validate_subreg 
> seems
> to have the invariant that you can either change mode between the same size
> or make it paradoxical but not both at the same time.
>
> i.e. it rejects subreg:V2DI (subreg:DI (reg:DF))), and lowpart_subreg folds 
> it to
> 

Re: [PATCH] libstdc++: fix memory clobbering in std::vector [PR110879]

2023-09-01 Thread Jonathan Wakely via Gcc-patches
On Thu, 17 Aug 2023 at 08:43, Vladimir Palevich  wrote:
>
> On Thu, 17 Aug 2023 at 01:51, Jonathan Wakely  wrote:
> >
> > On 09/08/23 01:34 +0300, Vladimir Palevich wrote:
> > >Because of the recent change in _M_realloc_insert and _M_default_append, 
> > >call
> > >to deallocate was ordered after assignment to class members of std::vector
> > >(in the guard destructor), which is causing said members to be 
> > >call-clobbered.
> > >This is preventing further optimization, the compiler is unable to move 
> > >memory
> > >read out of a hot loop in this case.
> > >This patch reorders the call to before assignments by putting guard in its 
> > >own
> > >block. Plus a new testsuite for this case.
> > >I'm not very happy with the new testsuite, but I don't know how to properly
> > >test this.
> > >
> > >Tested on x86_64-pc-linux-gnu.
> > >
> > >Maybe something could be done so that the compiler would be able to 
> > >optimize
> > >such cases anyway. Reads could be moved just after the clobbering calls in
> > >unlikely branches, for example. This should be a fairly common case with
> > >destructors at the end of a function.
> > >
> > >Note: I don't have write access.
> > >
> > >-- >8 --
> > >
> > >Fix ordering to prevent clobbering of class members by a call to deallocate
> > >in _M_realloc_insert and _M_default_append.
> > >
> > >libstdc++-v3/ChangeLog:
> > >PR libstdc++/110879
> > >* include/bits/vector.tcc: End guard lifetime just before assignment to
> > >class members.
> > >* testsuite/libstdc++-dg/conformance.exp: Load scantree.exp.
> > >* testsuite/23_containers/vector/110879.cc: New test.
> > >
> > >Signed-off-by: Vladimir Palevich  
> > >---
> > > libstdc++-v3/include/bits/vector.tcc  | 220 +-
> > > .../testsuite/23_containers/vector/110879.cc  |  35 +++
> > > .../testsuite/libstdc++-dg/conformance.exp|  13 ++
> > > 3 files changed, 163 insertions(+), 105 deletions(-)
> > > create mode 100644 libstdc++-v3/testsuite/23_containers/vector/110879.cc
> > >
> > >diff --git a/libstdc++-v3/include/bits/vector.tcc 
> > >b/libstdc++-v3/include/bits/vector.tcc
> > >index ada396c9b30..80631d1e2a1 100644
> > >--- a/libstdc++-v3/include/bits/vector.tcc
> > >+++ b/libstdc++-v3/include/bits/vector.tcc
> > >@@ -488,78 +488,83 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER
> > >   private:
> > >   _Guard(const _Guard&);
> > >   };
> > >-  _Guard __guard(__new_start, __len, _M_impl);
> > >
> > >-  // The order of the three operations is dictated by the C++11
> > >-  // case, where the moves could alter a new element belonging
> > >-  // to the existing vector.  This is an issue only for callers
> > >-  // taking the element by lvalue ref (see last bullet of C++11
> > >-  // [res.on.arguments]).
> > >+  {
> > >+  _Guard __guard(__new_start, __len, _M_impl);
> > >
> > >-  // If this throws, the existing elements are unchanged.
> > >+  // The order of the three operations is dictated by the C++11
> > >+  // case, where the moves could alter a new element belonging
> > >+  // to the existing vector.  This is an issue only for callers
> > >+  // taking the element by lvalue ref (see last bullet of C++11
> > >+  // [res.on.arguments]).
> > >+
> > >+  // If this throws, the existing elements are unchanged.
> > > #if __cplusplus >= 201103L
> > >-  _Alloc_traits::construct(this->_M_impl,
> > >- std::__to_address(__new_start + 
> > >__elems_before),
> > >- std::forward<_Args>(__args)...);
> > >+  _Alloc_traits::construct(this->_M_impl,
> > >+   std::__to_address(__new_start + 
> > >__elems_before),
> > >+   std::forward<_Args>(__args)...);
> > > #else
> > >-  _Alloc_traits::construct(this->_M_impl,
> > >- __new_start + __elems_before,
> > >- __x);
> > >+  _Alloc_traits::construct(this->_M_impl,
> > >+   __new_start + __elems_before,
> > >+   __x);
> > > #endif
> > >
> > > #if __cplusplus >= 201103L
> > >-  if _GLIBCXX17_CONSTEXPR (_S_use_relocate())
> > >-  {
> > >-// Relocation cannot throw.
> > >-__new_finish = _S_relocate(__old_start, __position.base(),
> > >-   __new_start, _M_get_Tp_allocator());
> > >-++__new_finish;
> > >-__new_finish = _S_relocate(__position.base(), __old_finish,
> > >-   __new_finish, _M_get_Tp_allocator());
> > >-  }
> > >-  else
> > >+  if _GLIBCXX17_CONSTEXPR (_S_use_relocate())
> > >+{
> > >+  // Relocation cannot throw.
> > >+  __new_finish = _S_relocate(__old_start, __position.base(),
> > >+ __new_start, _M_get_Tp_allocator());
> > >+  ++__new_finish;
> > >+  

[committed] libstdc++: Use std::string::__resize_and_overwrite in std::filesystem

2023-09-01 Thread Jonathan Wakely via Gcc-patches
Tested x86_64-linux. Pushed to trunk.

-- >8 --

There are a few places in the std::filesystem code that use a string as
a buffer for OS APIs to write to. We can use the new extension
__resize_and_overwrite to avoid redundant initialization of those
buffers.

libstdc++-v3/ChangeLog:

* src/c++17/fs_ops.cc (fs::absolute) [FILESYSTEM_IS_WINDOWS]:
Use __resize_and_overwrite to fill buffer.
(fs::read_symlink) [HAVE_READLINK]: Likewise.
* src/filesystem/ops-common.h (get_temp_directory_from_env)
[FILESYSTEM_IS_WINDOWS]: Likewise.
---
 libstdc++-v3/src/c++17/fs_ops.cc | 45 
 libstdc++-v3/src/filesystem/ops-common.h |  7 ++--
 2 files changed, 27 insertions(+), 25 deletions(-)

diff --git a/libstdc++-v3/src/c++17/fs_ops.cc b/libstdc++-v3/src/c++17/fs_ops.cc
index c94d260632f..6cdeac17c33 100644
--- a/libstdc++-v3/src/c++17/fs_ops.cc
+++ b/libstdc++-v3/src/c++17/fs_ops.cc
@@ -112,18 +112,17 @@ fs::absolute(const path& p, error_code& ec)
   wstring buf;
   do
 {
-  buf.resize(len);
-  len = GetFullPathNameW(s.data(), len, buf.data(), nullptr);
+  buf.__resize_and_overwrite(len, [, ](wchar_t* p, unsigned n) {
+   len = GetFullPathNameW(s.data(), n, p, nullptr);
+   return len > n ? 0 : len;
+  });
 }
   while (len > buf.size());
 
   if (len == 0)
 ec = __last_system_error();
   else
-{
-  buf.resize(len);
-  ret = std::move(buf);
-}
+ret = std::move(buf);
 #else
   ret = current_path(ec);
   ret /= p;
@@ -1187,31 +1186,33 @@ fs::path fs::read_symlink(const path& p, error_code& ec)
   return result;
 }
 
-  std::string buf(st.st_size ? st.st_size + 1 : 128, '\0');
+  std::string buf;
+  size_t bufsz = st.st_size ? st.st_size + 1 : 128;
   do
 {
-  ssize_t len = ::readlink(p.c_str(), buf.data(), buf.size());
-  if (len == -1)
+  ssize_t len;
+  buf.__resize_and_overwrite(bufsz, [, ](char* ptr, size_t n) {
+   len = ::readlink(p.c_str(), ptr, n);
+   return size_t(len) < n ? len : 0;
+  });
+  if (buf.size())
+   {
+ result.assign(std::move(buf));
+ ec.clear();
+ break;
+   }
+  else if (len == -1)
{
  ec.assign(errno, std::generic_category());
  return result;
}
-  else if (len == (ssize_t)buf.size())
+  else if (bufsz > 4096)
{
- if (buf.size() > 4096)
-   {
- ec.assign(ENAMETOOLONG, std::generic_category());
- return result;
-   }
- buf.resize(buf.size() * 2);
+ ec.assign(ENAMETOOLONG, std::generic_category());
+ return result;
}
   else
-   {
- buf.resize(len);
- result.assign(buf);
- ec.clear();
- break;
-   }
+   bufsz *= 2;
 }
   while (true);
 #else
diff --git a/libstdc++-v3/src/filesystem/ops-common.h 
b/libstdc++-v3/src/filesystem/ops-common.h
index 2e4331bb682..79dcb756453 100644
--- a/libstdc++-v3/src/filesystem/ops-common.h
+++ b/libstdc++-v3/src/filesystem/ops-common.h
@@ -700,8 +700,10 @@ _GLIBCXX_BEGIN_NAMESPACE_FILESYSTEM
 std::wstring buf;
 do
   {
-   buf.resize(len);
-   len = GetTempPathW(buf.size(), buf.data());
+   buf.__resize_and_overwrite(len, [](wchar_t* p, unsigned n) {
+ len = GetTempPathW(n, p);
+ return len > n ? 0 : len;
+   });
   }
 while (len > buf.size());
 
@@ -710,7 +712,6 @@ _GLIBCXX_BEGIN_NAMESPACE_FILESYSTEM
 else
   ec.clear();
 
-buf.resize(len);
 return buf;
   }
 #else
-- 
2.41.0



[committed] libstdc++: Use a loop in atomic_ref::compare_exchange_strong [PR111077]

2023-09-01 Thread Jonathan Wakely via Gcc-patches
Tested x86_64-linux. Pushed to trunk. Backport to gcc-13 needed too.

-- >8 --

We need to use a loop in std::atomic_ref::compare_exchange_strong in
order to properly implement the C++20 requirement that padding bits do
not participate when checking the value for equality. The variable being
modified by a std::atomic_ref might have an initial value with non-zero
padding bits, so when the __atomic_compare_exchange built-in returns
false we need to check whether that was only because of non-equal
padding bits that are not part of the value representation. If the value
bits differ, it's just a failed compare-exchange. If the value bits are
the same, we need to retry the __atomic_compare_exchange using the value
that was just read by the previous failed call. As noted in the
comments, it's possible for that second try to also fail due to another
thread storing the same value but with differences in padding.

Because it's undefined to access a variable directly while it's held by
a std::atomic_ref, and because std::atomic_ref will only ever store
values with zeroed padding, we know that padding bits will never go from
zero to non-zero during the lifetime of a std::atomic_ref. They can only
go from an initial non-zero state to zero. This means the loop will
terminate, rather than looping indefinitely as padding bits flicker on
and off. In theory users could call __atomic_store etc. directly and
write a value with non-zero padding bits, but we don't need to support
that. Users doing that should ensure they do not write non-zero padding,
to be compatibile with our std::atomic_ref's invariants.

This isn't a problem for std::atomic::compare_exchange_strong because
the initial value (and all later stores to the variable) are performed
by the library, so we ensure that stored values always have padding bits
cleared. That means we can simply clear the padding bits of the
'expected' value and we will be comparing two values with equal padding
bits. This means we don't need the loop for std::atomic, so update the
__atomic_impl::__compare_exchange function to take a bool parameter that
says whether it's being used by std::atomic_ref. If not, we can use a
simpler, non-looping implementation.

libstdc++-v3/ChangeLog:

PR libstdc++/111077
* include/bits/atomic_base.h (__atomic_impl::__compare_exchange):
Add _AtomicRef non-type template parameter and use a loop if it
is true.
(__atomic_impl::compare_exchange_weak): Add _AtomicRef NTTP.
(__atomic_impl::compare_exchange_strong): Likewise.
(atomic_ref::compare_exchange_weak): Use true for NTTP.
(atomic_ref::compare_exchange_strong): Use true for NTTP.
* testsuite/29_atomics/atomic_ref/compare_exchange_padding.cc:
Fix test to not rely on atomic_ref::load() to return an object
with padding preserved.
---
 libstdc++-v3/include/bits/atomic_base.h   | 147 --
 .../atomic_ref/compare_exchange_padding.cc|  75 ++---
 2 files changed, 150 insertions(+), 72 deletions(-)

diff --git a/libstdc++-v3/include/bits/atomic_base.h 
b/libstdc++-v3/include/bits/atomic_base.h
index 4ce04a02dd0..974872ad7a6 100644
--- a/libstdc++-v3/include/bits/atomic_base.h
+++ b/libstdc++-v3/include/bits/atomic_base.h
@@ -985,7 +985,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 template
   using _Val = typename remove_volatile<_Tp>::type;
 
-template
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wc++17-extensions"
+
+template
   _GLIBCXX_ALWAYS_INLINE bool
   __compare_exchange(_Tp& __val, _Val<_Tp>& __e, _Val<_Tp>& __i,
 bool __is_weak,
@@ -994,27 +997,79 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
__glibcxx_assert(__is_valid_cmpexch_failure_order(__f));
 
using _Vp = _Val<_Tp>;
+   _Tp* const __pval = std::__addressof(__val);
 
-   if _GLIBCXX17_CONSTEXPR (__atomic_impl::__maybe_has_padding<_Vp>())
+   if constexpr (!__atomic_impl::__maybe_has_padding<_Vp>())
  {
-   // We must not modify __e on success, so cannot clear its padding.
-   // Copy into a buffer and clear that, then copy back on failure.
-   alignas(_Vp) unsigned char __buf[sizeof(_Vp)];
-   _Vp* __exp = ::new((void*)__buf) _Vp(__e);
-   __atomic_impl::__clear_padding(*__exp);
-   if (__atomic_compare_exchange(std::__addressof(__val), __exp,
- __atomic_impl::__clear_padding(__i),
+   return __atomic_compare_exchange(__pval, std::__addressof(__e),
+std::__addressof(__i), __is_weak,
+int(__s), int(__f));
+ }
+   else if constexpr (!_AtomicRef) // std::atomic
+ {
+   // Clear padding of the value we want to set:
+   _Vp* const __pi = __atomic_impl::__clear_padding(__i);
+   // Only allowed to modify __e on failure, so make a 

Re: [PATCH] analyzer: implement reference count checking for CPython plugin [PR107646]

2023-09-01 Thread David Malcolm via Gcc-patches
On Fri, 2023-09-01 at 04:49 +0200, Hans-Peter Nilsson wrote:
> (Looks like this was committed as r14-3580-g597b9ec69bca8a)
> 
> > Cc: g...@gcc.gnu.org, gcc-patches@gcc.gnu.org, Eric Feng
> > 
> > From: Eric Feng via Gcc 
> 
> > gcc/testsuite/ChangeLog:
> >   PR analyzer/107646
> > * gcc.dg/plugin/analyzer_cpython_plugin.c: Implements
> > reference count
> >   * checking for PyObjects.
> > * gcc.dg/plugin/cpython-plugin-test-2.c: Moved to...
> > * gcc.dg/plugin/cpython-plugin-test-PyList_Append.c:
> > ...here (and
> >   * added more tests).
> > * gcc.dg/plugin/cpython-plugin-test-1.c: Moved to...
> > * gcc.dg/plugin/cpython-plugin-test-no-plugin.c: ...here
> > (and added
> >   * more tests).
> > * gcc.dg/plugin/plugin.exp: New tests.
> > * gcc.dg/plugin/cpython-plugin-test-PyList_New.c: New test.
> > * gcc.dg/plugin/cpython-plugin-test-PyLong_FromLong.c: New
> > test.
> > * gcc.dg/plugin/cpython-plugin-test-refcnt-checking.c: New
> > test.
> 
> It seems this was more or less a rewrite, but that said,
> it's generally preferable to always *add* tests, never *modify* them.
> 
> >  .../gcc.dg/plugin/analyzer_cpython_plugin.c   | 376
> > +-
> 
> ^^^ Ouch!  Was it not within reason to keep that test as it
> was, and just add another test?
> 
> Anyway, the test after rewrite fails, and for some targets
> like cris-elf and apparently m68k-linux, yields an error.
> I see a PR was already opened.
> 
> Also, mostly for future reference, several files in the
> patch miss a final newline, as seen by a "\ No newline at
> end of file"-marker.
> 
> I think I found the problem; a mismatch between default C++
> language standard between host-gcc and target-gcc.
> 
> (It's actually *not* as simple as "auto var = typeofvar()"
> not being recognized in C++11 --or else there'd be an error
> for the hash_set declaration too, which I just changed for
> consistency-- but it's close enough for me.)
> 
> With this, retesting plugin.exp for cris-elf works.
> 
> Ok to commit?

Sorry about the failing tests.

Thanks for the patch; please go ahead and commit.

Dave

> 
> -- >8 --
> From: Hans-Peter Nilsson 
> Date: Fri, 1 Sep 2023 04:36:03 +0200
> Subject: [PATCH] testsuite: Fix analyzer_cpython_plugin.c
> declarations, PR testsuite/111264
> 
> Also, add missing newline at end of file.
> 
> PR testsuite/111264
> * gcc.dg/plugin/analyzer_cpython_plugin.c: Make declarations
> C++11-compatible.
> ---
>  gcc/testsuite/gcc.dg/plugin/analyzer_cpython_plugin.c | 8 
>  1 file changed, 4 insertions(+), 4 deletions(-)
> 
> diff --git a/gcc/testsuite/gcc.dg/plugin/analyzer_cpython_plugin.c
> b/gcc/testsuite/gcc.dg/plugin/analyzer_cpython_plugin.c
> index 7af520436549..bf1982e79c37 100644
> --- a/gcc/testsuite/gcc.dg/plugin/analyzer_cpython_plugin.c
> +++ b/gcc/testsuite/gcc.dg/plugin/analyzer_cpython_plugin.c
> @@ -477,8 +477,8 @@ pyobj_refcnt_checker (const region_model *model,
>    if (!ctxt)
>  return;
>  
> -  auto region_to_refcnt = hash_map ();
> -  auto seen_regions = hash_set ();
> +  hash_map region_to_refcnt;
> +  hash_set seen_regions;
>  
>    count_pyobj_references (model, region_to_refcnt, retval,
> seen_regions);
>    check_refcnts (model, old_model, retval, ctxt, region_to_refcnt);
> @@ -561,7 +561,7 @@ public:
>  if (!ctxt)
>    return;
>  region_model *model = cd.get_model ();
> -    auto region_to_refcnt = hash_map ();
> +    hash_map region_to_refcnt;
>  count_all_references(model, region_to_refcnt);
>  dump_refcnt_info(region_to_refcnt, model, ctxt);
>    }
> @@ -1330,4 +1330,4 @@ plugin_init (struct plugin_name_args
> *plugin_info,
>    sorry_no_analyzer ();
>  #endif
>    return 0;
> -}
> \ No newline at end of file
> +}



Re: [PATCH] analyzer: Add support of placement new and improved operator new [PR105948,PR94355]

2023-09-01 Thread Benjamin Priour via Gcc-patches
Patch has been updated as per your suggestions and successfully regstrapped
on x86_64-linux-gnu.

call_details::maybe_get_arg_region is now
/* If argument IDX's svalue at the callsite is of pointer type,
return the region it points to.
Otherwise return NULL.  */

const region *
 call_details::deref_ptr_arg (unsigned idx) const
 {
   const svalue *ptr_sval = get_arg_svalue (idx);
   return m_model->deref_rvalue (ptr_sval, get_arg_tree (idx), m_ctxt);
 }


New test is

+
+void test_binop ()
+{
+  char *p = (char *) malloc (4);
+  if (!p)
+return;
+  int32_t *i = ::new (p + 1) int32_t; /* { dg-warning "heap-based buffer
overflow" } */
+  *i = 42; /* { dg-warning "heap-based buffer overflow" } */
+  free (p);
+}

Is it OK for trunk ?
I didn't resend the whole patch as it otherwise was OK.

Thanks,
Benjamin.

On Fri, Sep 1, 2023 at 12:07 PM Benjamin Priour  wrote:

> Hi David,
>
> On Fri, Sep 1, 2023 at 1:59 AM David Malcolm  wrote:
>
>> On Fri, 2023-09-01 at 00:04 +0200, priour...@gmail.com wrote:
>>
>>
> [..snip..]
>
>
>> ...which will only fire if arg 1 is a region_svalue.  This won't
>> trigger if you have e.g. a binop_svalue for pointer arithmetic.
>>
>> What happens e.g. for this one-off-the-end bug:
>>
>>   void *p = malloc (4);
>>   if (!p)
>> return;
>>   int32_t *i = ::new (p + 1) int32_t;
>>   *i = 42;
>>
>> So maybe call_details::maybe_get_arg_region should instead be:
>>
>> /* Return the region that argument IDX points to.  */
>>
>> const region *
>> call_details::deref_ptr_arg (unsigned idx) const
>> {
>>   const svalue *ptr_sval = get_arg_svalue (idx);
>>   return m_model->deref_rvalue (ptr_sval, get_arg_tree (idx), m_ctxt);
>> }
>>
>> (caveat: I didn't test this)
>>
>> > + const region *base_reg = ptr_reg->get_base_region ();
>> > + const svalue *num_bytes_sval = cd.get_arg_svalue (0);
>> > + const region *sized_new_reg
>> > + = mgr->get_sized_region (base_reg,
>> > +  cd.get_lhs_type (),
>> > +  num_bytes_sval);
>>
>> Why do you use the base_reg here, rather than just ptr_reg?
>>
>> In the example above, the *(p + 1) has base region
>> heap_allocated_region, but the ptr_reg is one byte higher; hence
>> check_region_for_write of 4 bytes ought to detect a problem with
>> writing 4 bytes to *(p + 1), but wouldn't complain about the write to
>> *p.
>>
>> ...assuming that I'm reading this code correctly.
>>
>> > + model->check_region_for_write (sized_new_reg,
>> > +nullptr,
>> > +ctxt);
>> > + const svalue *ptr_sval
>> > +   = mgr->get_ptr_svalue (cd.get_lhs_type (), sized_new_reg);
>> > + cd.maybe_set_lhs (ptr_sval);
>> > +   }
>> > +  }
>>
>> [...snip...]
>>
>> The patch is OK for trunk as is; but please can you look into the
>> above.
>>
>>
> Thanks for the test case David, it exposed a missing heap-based over write
> when on the placement new statement.
> I've updated the code as per your suggestions, and it now works properly.
>
>
>> If the above is a problem, you can either do another version of the
>> patch, or do it as a followup patch (whichever you're more comfortable
>> with, but it might be best to get the patch into trunk as-is, given
>> that the GSoC period is nearly over).
>>
>> Thanks
>> Dave
>>
>>
> I will update the patch and regstrap it, so that it is done at once.
> I've compared the new test case to a "C" version of it, resulting in
> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111266
>
> I will attempt to fix it while I'm regstrapping everything else,
> I still have 4 patches in queue.
> It will give me a brief break from transitioning the tests :)
>
> Thanks for the review,
> Benjamin.
>


[PATCH] c++, v3: Diagnose [basic.scope.block]/2 violations even in compound-stmt of function-try-block [PR52953]

2023-09-01 Thread Jakub Jelinek via Gcc-patches
On Fri, Sep 01, 2023 at 03:24:54PM +0200, Jakub Jelinek via Gcc-patches wrote:
> So like this?
> 
> It actually changes behaviour on the
> void foo (int x) try {} catch (int x) {} case, where previously
> this triggered the
>|| (TREE_CODE (old) == PARM_DECL
>&& (current_binding_level->kind == sk_catch
>|| current_binding_level->level_chain->kind == 
> sk_catch)
>&& in_function_try_handler))
> {
>   auto_diagnostic_group d;
>   if (permerror (DECL_SOURCE_LOCATION (decl),
>  "redeclaration of %q#D", decl))
> inform (DECL_SOURCE_LOCATION (old),
> "%q#D previously declared here", old);
> diagnostics (note, just the current_binding_level->kind == sk_catch
> case), while now it triggers already the earlier
>   if (b->kind == sk_function_parms)
> {
>   error_at (DECL_SOURCE_LOCATION (decl),
> "declaration of %q#D shadows a parameter", decl);
>   inform (DECL_SOURCE_LOCATION (old),
>   "%q#D previously declared here", old);
> error.  If you think it is important to differentiate that,
> I guess I could guard the while (b->artificial) loop with say
> +   if (!in_function_try_handler
> +   || current_binding_level->kind != sk_catch)
>   while (b->artificial)
> b = b->level_chain;
> and adjust the 2 testcases.

BTW, that in_function_try_handler case doesn't work correctly
(before/after this patch),
void
foo (int x)
try {
}
catch (int)
{
  try {
  } catch (int x)
  {
  }
  try {
  } catch (int)
  {
int x;
  }
}
(which is valid) is rejected, because
|| (TREE_CODE (old) == PARM_DECL
&& (current_binding_level->kind == sk_catch
|| current_binding_level->level_chain->kind == sk_catch)
&& in_function_try_handler))
is true but nothing verified that for the first case
current_binding_level->level_chain->kind == sk_function_params
(with perhaps artificial scopes in between and in the latter case
with one extra level in between).

Here is an untested variant of the patch which does diagnostics of the
in_function_try_handler cases only if it is proven there are no intervening
non-artificial scopes, but uses the old wording + permerror for that case
like before.

Another possibility would be to use permerror for that case but the
"declaration of %q#D shadows a parameter" wording (then we'd need to adjust
both testcases, each on one line).

2023-09-01  Jakub Jelinek  

PR c++/52953
* name-lookup.h (struct cp_binding_level): Add artificial bit-field.
Formatting fixes.
* name-lookup.cc (check_local_shadow): Skip artificial bindings when
checking if parameter scope is parent scope.  Don't special case
FUNCTION_NEEDS_BODY_BLOCK.  Diagnose the in_function_try_handler
cases in the b->kind == sk_function_parms test, verify no
non-artificial intervening scopes but use permerror for that case with
different wording.  Add missing auto_diagnostic_group.
* decl.cc (begin_function_body): Set
current_binding_level->artificial.
* semantics.cc (begin_function_try_block): Likewise.

* g++.dg/diagnostic/redeclaration-3.C: New test.

--- gcc/cp/name-lookup.h.jj 2023-09-01 12:15:22.574619674 +0200
+++ gcc/cp/name-lookup.h2023-09-01 16:11:47.838401045 +0200
@@ -292,11 +292,11 @@ struct GTY(()) cp_binding_level {
   only valid if KIND == SK_TEMPLATE_PARMS.  */
   BOOL_BITFIELD explicit_spec_p : 1;
 
-  /* true means make a BLOCK for this level regardless of all else.  */
+  /* True means make a BLOCK for this level regardless of all else.  */
   unsigned keep : 1;
 
   /* Nonzero if this level can safely have additional
-  cleanup-needing variables added to it.  */
+ cleanup-needing variables added to it.  */
   unsigned more_cleanups_ok : 1;
   unsigned have_cleanups : 1;
 
@@ -308,9 +308,13 @@ struct GTY(()) cp_binding_level {
   unsigned defining_class_p : 1;
 
   /* True for SK_FUNCTION_PARMS of a requires-expression.  */
-  unsigned requires_expression: 1;
+  unsigned requires_expression : 1;
 
-  /* 22 bits left to fill a 32-bit word.  */
+  /* True for artificial blocks which should be ignored when finding
+ parent scope.  */
+  unsigned artificial : 1;
+
+  /* 21 bits left to fill a 32-bit word.  */
 };
 
 /* The binding level currently in effect.  */
--- gcc/cp/name-lookup.cc.jj2023-09-01 12:15:22.566619785 +0200
+++ gcc/cp/name-lookup.cc   2023-09-01 16:19:12.567335710 +0200
@@ -3146,18 +3146,34 @@ check_local_shadow (tree decl)
 them there.  */
  cp_binding_level *b = current_binding_level->level_chain;
 
- if (FUNCTION_NEEDS_BODY_BLOCK (current_function_decl))
-   /* Skip the ctor/dtor cleanup level.  */
+ 

RE: [PATCH]AArch64 xorsign: Fix scalar xorsign lowering

2023-09-01 Thread Tamar Christina via Gcc-patches



> -Original Message-
> From: Richard Sandiford 
> Sent: Friday, September 1, 2023 2:36 PM
> To: Tamar Christina 
> Cc: gcc-patches@gcc.gnu.org; nd ; Richard Earnshaw
> ; Marcus Shawcroft
> ; Kyrylo Tkachov 
> Subject: Re: [PATCH]AArch64 xorsign: Fix scalar xorsign lowering
> 
> Tamar Christina  writes:
> > Hi All,
> >
> > In GCC-9 our scalar xorsign pattern broke and we didn't notice it
> > because the testcase was not strong enough.  With this commit
> >
> > 8d2d39587d941a40f25ea0144cceb677df115040 is the first bad commit
> > commit 8d2d39587d941a40f25ea0144cceb677df115040
> > Author: Segher Boessenkool 
> > Date:   Mon Oct 22 22:23:39 2018 +0200
> >
> > combine: Do not combine moves from hard registers
> >
> > combine started introducing useless moves on hard registers,  when one
> > of the arguments to our scalar xorsign is a hardreg we get an additional 
> > move
> inserted.
> >
> > This leads to combine forming an AND with the immediate inside and
> > using the superflous move to do the r->w move, instead of what we
> > wanted before which was for the `and` to be a vector and and have reload
> pick the right alternative.
> 
> IMO, the xorsign optab ought to go away.  IIRC it was just a stop-gap measure
> that (like most stop-gap measures) never got cleaned up later.
> 
> But that's not important now. :)
> 
> > To fix this the patch just forces the use of the vector version
> > directly and so combine has no chance to mess it up.
> >
> > Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
> >
> > Ok for master?
> >
> > Thanks,
> > Tamar
> >
> > gcc/ChangeLog:
> >
> > * config/aarch64/aarch64-simd.md (xorsign3): Renamed to..
> > (@xorsign3): ...This.
> > * config/aarch64/aarch64.md (xorsign3): Renamed to...
> > (@xorsign3): ..This and emit vectors directly
> > * config/aarch64/iterators.md (VCONQ): Add SF and DF.
> >
> > gcc/testsuite/ChangeLog:
> >
> > * gcc.target/aarch64/xorsign.c:
> >
> > --- inline copy of patch --
> > diff --git a/gcc/config/aarch64/aarch64-simd.md
> > b/gcc/config/aarch64/aarch64-simd.md
> > index
> >
> f67eb70577d0c2d9911d8c867d38a4d0b390337c..e955691f1be8830efacc2
> 3746511
> > 9764ce2a4942 100644
> > --- a/gcc/config/aarch64/aarch64-simd.md
> > +++ b/gcc/config/aarch64/aarch64-simd.md
> > @@ -500,7 +500,7 @@ (define_expand "ctz2"
> >}
> >  )
> >
> > -(define_expand "xorsign3"
> > +(define_expand "@xorsign3"
> >[(match_operand:VHSDF 0 "register_operand")
> > (match_operand:VHSDF 1 "register_operand")
> > (match_operand:VHSDF 2 "register_operand")] diff --git
> > a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index
> >
> 01cf989641fce8e6c3828f6cfef62e101c4142df..9db82347bf891f9bc40aede
> cdc84
> > 62c94bf1a769 100644
> > --- a/gcc/config/aarch64/aarch64.md
> > +++ b/gcc/config/aarch64/aarch64.md
> > @@ -6953,31 +6953,20 @@ (define_insn "copysign3_insn"
> >  ;; EOR   v0.8B, v0.8B, v3.8B
> >  ;;
> >
> > -(define_expand "xorsign3"
> > +(define_expand "@xorsign3"
> >[(match_operand:GPF 0 "register_operand")
> > (match_operand:GPF 1 "register_operand")
> > (match_operand:GPF 2 "register_operand")]
> >"TARGET_SIMD"
> >  {
> > -
> > -  machine_mode imode = mode;
> > -  rtx mask = gen_reg_rtx (imode);
> > -  rtx op1x = gen_reg_rtx (imode);
> > -  rtx op2x = gen_reg_rtx (imode);
> > -
> > -  int bits = GET_MODE_BITSIZE (mode) - 1;
> > -  emit_move_insn (mask, GEN_INT (trunc_int_for_mode
> (HOST_WIDE_INT_M1U << bits,
> > -imode)));
> > -
> > -  emit_insn (gen_and3 (op2x, mask,
> > -   lowpart_subreg (imode, operands[2],
> > -   mode)));
> > -  emit_insn (gen_xor3 (op1x,
> > -   lowpart_subreg (imode, operands[1],
> > -   mode),
> > -   op2x));
> > +  rtx tmp = gen_reg_rtx (mode);  rtx op1 = gen_reg_rtx
> > + (mode);  rtx op2 = gen_reg_rtx (mode);
> emit_move_insn
> > + (op1, lowpart_subreg (mode, operands[1], mode));
> > + emit_move_insn (op2, lowpart_subreg (mode, operands[2],
> > + mode));  emit_insn (gen_xorsign3(mode, tmp, op1,
> op2));
> 
> Do we need the extra moves into op1 and op2?  I would have expected the
> subregs to be acceptable as direct operands of the xorsign3.  Making them
> direct operands should be better, since there's then less risk of having the
> same value live in different registers at the same time.
> 

That was the first thing I tried but it doesn't work because validate_subreg 
seems
to have the invariant that you can either change mode between the same size
or make it paradoxical but not both at the same time.

i.e. it rejects subreg:V2DI (subreg:DI (reg:DF))), and lowpart_subreg folds it 
to
NULL_RTX. Because the lowering when the input is a subreg takes the mode of
the original RTX. i.e. the above is folder to subreg:V2DI (reg:DF) which is 

[PING][PATCH v2] Add clang's invalid-noreturn warning flag (gnu.org)

2023-09-01 Thread Julian Waters via Gcc-patches
Hi all, this is a gentle ping for the following patch:

https://gcc.gnu.org/pipermail/gcc-patches/2023-August/627913.html


Re: [PING][PATCH] LoongArch: initial ada support on linux

2023-09-01 Thread Arnaud Charlet via Gcc-patches
> gcc/ChangeLog:
> 
>   * ada/Makefile.rtl: Add LoongArch support.
>   * ada/libgnarl/s-linux__loongarch.ads: New.
>   * ada/libgnat/system-linux-loongarch.ads: New.
>   * config/loongarch/loongarch.h: mark normalized options
>   passed from driver to gnat1 as explicit for multilib.
> ---
> --- a/gcc/ada/Makefile.rtl
> +++ b/gcc/ada/Makefile.rtl
> @@ -2111,6 +2111,55 @@ ifeq ($(strip $(filter-out cygwin% mingw32% 
> pe,$(target_os))),)
>LIBRARY_VERSION := $(LIB_VERSION)
>  endif
>  
> +# LoongArch Linux
> +ifeq ($(strip $(filter-out loongarch% linux%,$(target_cpu) $(target_os))),)
> +  LIBGNAT_TARGET_PAIRS = \
> +  a-exetim.adb +  a-exetim.ads +  a-intnam.ads +  a-nallfl.ads +  a-synbar.adb +  a-synbar.ads +  s-inmaop.adb +  s-intman.adb +  s-linux.ads +  s-mudido.adb +  s-osinte.ads +  s-osinte.adb +  s-osprim.adb +  s-taprop.adb +  s-tasinf.ads +  s-tasinf.adb +  s-tpopsp.adb +  s-taspri.ads +  g-sercom.adb +  $(TRASYM_DWARF_UNIX_PAIRS) \
> +  $(GNATRTL_128BIT_PAIRS) \
> +  s-tsmona.adb +  $(ATOMICS_TARGET_PAIRS) \
> +  $(ATOMICS_BUILTINS_TARGET_PAIRS) \
> +  system.ads +
> +  TOOLS_TARGET_PAIRS = indepsw.adb +
> +  EXTRA_GNATRTL_NONTASKING_OBJS += $(TRASYM_DWARF_UNIX_OBJS)
> +  EXTRA_GNATRTL_NONTASKING_OBJS += $(GNATRTL_128BIT_OBJS)
> +  EXTRA_GNATRTL_TASKING_OBJS = s-linux.o a-exetim.o
> +
> +  EH_MECHANISM = -gcc
> +  THREADSLIB = -lpthread
> +  MISCLIB = -ldl
> +  GNATLIB_SHARED = gnatlib-shared-dual
> +  GMEM_LIB = gmemlib
> +  LIBRARY_VERSION := $(LIB_VERSION)
> +  # Temporarily disable strict alignment -- for some reason, it causes
> +  # infinite loops during stack unwinding (libgcc) and indefinite hang
> +  # in some futex system calls.
> +  GNATLIBCFLAGS := $(GNATLIBCFLAGS) -mno-strict-align
> +  GNATLIBCFLAGS_FOR_C := $(GNATLIBCFLAGS_FOR_C) -mno-strict-align

Patch looks indeed OK.
A small nit above: I'd suggest using += instead of := $(XXX) to make things
clearer.

Arno


Re: [PATCH][Ada] Fix syntax errors in expect.c

2023-09-01 Thread Arnaud Charlet via Gcc-patches
Change is OK, thanks!

> Noticed trivial syntax errors in gcc/ada/expect.c when tried to compile gcc
> 13.2 as cross-compiler for target i686-pc-msdosdjgpp.
> 
> Errors were there since
> 
> Tiedostossa, joka sisällytettiin kohdasta expect.c:54:
> expect.c:Funktio ”__gnat_waitpid”:
> expect.c:353:13:virhe: expected ”(” before numeric constant
>  353 |   } else if WIFSTOPPED(status) {
>  | ^~
> expect.c:358:1:varoitus: ei-void-tyyppisen funktion loppu saavutettu 
> [-Wreturn-type]
>  358 | }
>  | ^
> make[5]: *** [../gcc-interface/Makefile:297: expect.o] Error 1
> 
> Errors were there since commit 9e6274e0a3b60e77a42784c3fb6ef2aa3cfc071a(Wed
> Dec 15 19:26:50 2021 +0600)
> 
> Fixing these errors (attached patch for master branch) was not sufficient
> for building Ada cross-compiler, but it fixed compiler errors.
> 
> This would perhaps qualify for trivial change, but it seems that I no more
> have write access (I got it in 2015, but have not used it for a long time.
> Perhaps I do not really need it)
> 
> Andris
> 
> commit 64c48aa99656e06d5728bf5837da3bbc50ae4cc5
> Author: Andris Pavēnis 
> Date:   Sat Aug 19 10:40:22 2023 +0300
> 
> Fix syntax error
> 
> gcc/ada/expect.c(__gnat_waitpid):
> fix syntax errors


Re: [PATCH]AArch64 xorsign: Fix scalar xorsign lowering

2023-09-01 Thread Richard Sandiford via Gcc-patches
Tamar Christina  writes:
> Hi All,
>
> In GCC-9 our scalar xorsign pattern broke and we didn't notice it because the
> testcase was not strong enough.  With this commit
>
> 8d2d39587d941a40f25ea0144cceb677df115040 is the first bad commit
> commit 8d2d39587d941a40f25ea0144cceb677df115040
> Author: Segher Boessenkool 
> Date:   Mon Oct 22 22:23:39 2018 +0200
>
> combine: Do not combine moves from hard registers
>
> combine started introducing useless moves on hard registers,  when one of the
> arguments to our scalar xorsign is a hardreg we get an additional move 
> inserted.
>
> This leads to combine forming an AND with the immediate inside and using the
> superflous move to do the r->w move, instead of what we wanted before which 
> was
> for the `and` to be a vector and and have reload pick the right alternative.

IMO, the xorsign optab ought to go away.  IIRC it was just a stop-gap
measure that (like most stop-gap measures) never got cleaned up later.

But that's not important now. :)

> To fix this the patch just forces the use of the vector version directly and
> so combine has no chance to mess it up.
>
> Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
>
> Ok for master?
>
> Thanks,
> Tamar
>
> gcc/ChangeLog:
>
>   * config/aarch64/aarch64-simd.md (xorsign3): Renamed to..
>   (@xorsign3): ...This.
>   * config/aarch64/aarch64.md (xorsign3): Renamed to...
>   (@xorsign3): ..This and emit vectors directly
>   * config/aarch64/iterators.md (VCONQ): Add SF and DF.
>
> gcc/testsuite/ChangeLog:
>
>   * gcc.target/aarch64/xorsign.c:
>
> --- inline copy of patch -- 
> diff --git a/gcc/config/aarch64/aarch64-simd.md 
> b/gcc/config/aarch64/aarch64-simd.md
> index 
> f67eb70577d0c2d9911d8c867d38a4d0b390337c..e955691f1be8830efacc237465119764ce2a4942
>  100644
> --- a/gcc/config/aarch64/aarch64-simd.md
> +++ b/gcc/config/aarch64/aarch64-simd.md
> @@ -500,7 +500,7 @@ (define_expand "ctz2"
>}
>  )
>  
> -(define_expand "xorsign3"
> +(define_expand "@xorsign3"
>[(match_operand:VHSDF 0 "register_operand")
> (match_operand:VHSDF 1 "register_operand")
> (match_operand:VHSDF 2 "register_operand")]
> diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
> index 
> 01cf989641fce8e6c3828f6cfef62e101c4142df..9db82347bf891f9bc40aedecdc8462c94bf1a769
>  100644
> --- a/gcc/config/aarch64/aarch64.md
> +++ b/gcc/config/aarch64/aarch64.md
> @@ -6953,31 +6953,20 @@ (define_insn "copysign3_insn"
>  ;; EOR   v0.8B, v0.8B, v3.8B
>  ;;
>  
> -(define_expand "xorsign3"
> +(define_expand "@xorsign3"
>[(match_operand:GPF 0 "register_operand")
> (match_operand:GPF 1 "register_operand")
> (match_operand:GPF 2 "register_operand")]
>"TARGET_SIMD"
>  {
> -
> -  machine_mode imode = mode;
> -  rtx mask = gen_reg_rtx (imode);
> -  rtx op1x = gen_reg_rtx (imode);
> -  rtx op2x = gen_reg_rtx (imode);
> -
> -  int bits = GET_MODE_BITSIZE (mode) - 1;
> -  emit_move_insn (mask, GEN_INT (trunc_int_for_mode (HOST_WIDE_INT_M1U << 
> bits,
> -  imode)));
> -
> -  emit_insn (gen_and3 (op2x, mask,
> - lowpart_subreg (imode, operands[2],
> - mode)));
> -  emit_insn (gen_xor3 (op1x,
> - lowpart_subreg (imode, operands[1],
> - mode),
> - op2x));
> +  rtx tmp = gen_reg_rtx (mode);
> +  rtx op1 = gen_reg_rtx (mode);
> +  rtx op2 = gen_reg_rtx (mode);
> +  emit_move_insn (op1, lowpart_subreg (mode, operands[1], 
> mode));
> +  emit_move_insn (op2, lowpart_subreg (mode, operands[2], 
> mode));
> +  emit_insn (gen_xorsign3(mode, tmp, op1, op2));

Do we need the extra moves into op1 and op2?  I would have expected the
subregs to be acceptable as direct operands of the xorsign3.  Making
them direct operands should be better, since there's then less risk of
having the same value live in different registers at the same time.

OK with that change if it works.

Also, nit: missing space before "(".

Thanks,
Richard

>emit_move_insn (operands[0],
> -   lowpart_subreg (mode, op1x, imode));
> +   lowpart_subreg (mode, tmp, mode));
>DONE;
>  }
>  )
> diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
> index 
> 9398d713044433cd89b2a83db5ae7969feb1dcf7..2451d8c2cd8e2da6ac8339eed9bc975cf203fa4c
>  100644
> --- a/gcc/config/aarch64/iterators.md
> +++ b/gcc/config/aarch64/iterators.md
> @@ -1428,7 +1428,8 @@ (define_mode_attr VCONQ [(V8QI "V16QI") (V16QI "V16QI")
>(V4HF "V8HF") (V8HF "V8HF")
>(V2SF "V4SF") (V4SF "V4SF")
>(V2DF "V2DF") (SI   "V4SI")
> -  (HI   "V8HI") (QI   "V16QI")])
> +  (HI   "V8HI") (QI   "V16QI")
> +  (SF   "V4SF") (DF   "V2DF")])
>  
>  

[PATCH] c++, v2: Diagnose [basic.scope.block]/2 violations even for block externs [PR52953]

2023-09-01 Thread Jakub Jelinek via Gcc-patches
On Thu, Aug 31, 2023 at 05:46:28PM -0400, Jason Merrill wrote:
> I've suggested this to Core.

Thanks.

> > So, I'm not really sure what to do.  Intuitively the patch seems right
> > because even block externs redeclare stuff and change meaning of the
> > identifiers and void foo () { int i; extern int i (int); } is rejected
> > by all compilers.
> 
> I think this direction makes sense, though we might pedwarn on these rather
> than error to reduce possible breakage.

It wasn't clear to me whether you want to make those pedwarns just for the
DECL_EXTERNAL cases, ones that actually changed, or all others as well
(which were errors or permerrors depending on the case).
I've implemented the former, kept existing behavior of !DECL_EXTERNAL.

> > 2023-08-31  Jakub Jelinek  
> > 
> > PR c++/52953
> > * name-lookup.cc (check_local_shadow): Defer punting on
> > DECL_EXTERNAL (decl) from the start of function to right before
> > the -Wshadow* checks.
> 
> Don't we want to consider externs for the -Wshadow* checks as well?

I think that is a good idea (though dunno how much it will trigger in
real-world), but there is one case I've excluded, the global variable
shadowing case, because warning that
int z;
void foo () { extern int z; z = 1; }
shadows the global var would be incorrect, it is the same var.
It is true that
int y; namespace N { void bar () { extern int y; y = 1; } }
shadows ::y but it is unclear how to differentiate those two cases with
the information we have at check_local_shadow time.

I've also found one spot which wasn't using auto_diagnostic_group d;
on a pair of error_at/inform.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2023-09-01  Jakub Jelinek  

PR c++/52953
* name-lookup.cc (check_local_shadow): Don't punt early for
DECL_EXTERNAL decls, instead just disable the shadowing of namespace
decls check for those and emit a pedwarn rather than error_at for
those.  Add missing auto_diagnostic_group.  Formatting fix.

* g++.dg/diagnostic/redeclaration-4.C: New test.
* g++.dg/diagnostic/redeclaration-5.C: New test.
* g++.dg/warn/Wshadow-19.C: New test.

--- gcc/cp/name-lookup.cc.jj2023-09-01 10:21:03.658118594 +0200
+++ gcc/cp/name-lookup.cc   2023-09-01 11:30:10.868516494 +0200
@@ -3096,10 +3096,6 @@ check_local_shadow (tree decl)
   if (TREE_CODE (decl) == PARM_DECL && !DECL_CONTEXT (decl))
 return;
 
-  /* External decls are something else.  */
-  if (DECL_EXTERNAL (decl))
-return;
-
   tree old = NULL_TREE;
   cp_binding_level *old_scope = NULL;
   if (cxx_binding *binding = outer_binding (DECL_NAME (decl), NULL, true))
@@ -3130,11 +3126,9 @@ check_local_shadow (tree decl)
  && DECL_CONTEXT (old) == lambda_function (current_lambda_expr ())
  && TREE_CODE (old) == PARM_DECL
  && DECL_NAME (decl) != this_identifier)
-   {
- error_at (DECL_SOURCE_LOCATION (old),
-   "lambda parameter %qD "
-   "previously declared as a capture", old);
-   }
+   error_at (DECL_SOURCE_LOCATION (old),
+ "lambda parameter %qD "
+ "previously declared as a capture", old);
  return;
}
   /* Don't complain if it's from an enclosing function.  */
@@ -3156,10 +3150,18 @@ check_local_shadow (tree decl)
 in the outermost block of the function definition.  */
  if (b->kind == sk_function_parms)
{
- error_at (DECL_SOURCE_LOCATION (decl),
-   "declaration of %q#D shadows a parameter", decl);
- inform (DECL_SOURCE_LOCATION (old),
- "%q#D previously declared here", old);
+ auto_diagnostic_group d;
+ bool emit = true;
+ if (DECL_EXTERNAL (decl))
+   emit = pedwarn (DECL_SOURCE_LOCATION (decl), OPT_Wpedantic,
+   "declaration of %q#D shadows a parameter",
+   decl);
+ else
+   error_at (DECL_SOURCE_LOCATION (decl),
+ "declaration of %q#D shadows a parameter", decl);
+ if (emit)
+   inform (DECL_SOURCE_LOCATION (old),
+   "%q#D previously declared here", old);
  return;
}
}
@@ -3185,10 +3187,16 @@ check_local_shadow (tree decl)
   && (old_scope->kind == sk_cond || old_scope->kind == sk_for))
{
  auto_diagnostic_group d;
- error_at (DECL_SOURCE_LOCATION (decl),
-   "redeclaration of %q#D", decl);
- inform (DECL_SOURCE_LOCATION (old),
- "%q#D previously declared here", old);
+ bool emit = true;
+ if (DECL_EXTERNAL (decl))
+   emit = pedwarn (DECL_SOURCE_LOCATION (decl), OPT_Wpedantic,
+   "redeclaration of 

Re: [PATCH] ipa: Self-DCE of uses of removed call LHSs (PR 108007)

2023-09-01 Thread Martin Jambor
Hello

and ping.

Thanks,

Martin


On Fri, May 12 2023, Martin Jambor wrote:
> Hi,
>
> PR 108007 is another manifestation where we rely on DCE to clean-up
> after IPA-SRA and if the user explicitely switches DCE off, IPA-SRA
> can leave behind statements which are fed uninitialized values and
> trap, even though their results are themselves never used.
>
> I have already fixed this for unused parameters in callees, this bug
> shows that almost the same thing can happen for removed returns, on
> the side of callers.  This means that the issue has to be fixed
> elsewhere, in call redirection.  This patch adds a function which
> recursivewly looks for uses of operations fed specific SSA names and
> removes them all.
>
> That would have been easy if it wasn't for debug statements during
> tree-inline (from which call redirection is also invoked).  Debug
> statements are decoupled from the rest at this point and iterating
> over uses of SSAs does not bring them up.  During tree-inline they are
> handled especially at the end, I assume in order to make sure that
> relative ordering of UIDs are the same with and without debug info.
>
> This means that during tree-inline we need to make a hash of killed
> SSAs, that we already have in copy_body_data, available to the
> function making the purging.  So the patch duly does also that, making
> the interface slightly ugly.
>
> Bootstrapped and tested on x86_64-linux.  OK for master?  (I am not sure
> the problem is grave enough to warrant backporting to release branches
> but can do that as well if people think I should.)
>
> Thanks,
>
> Martin
>
>
> gcc/ChangeLog:
>
> 2023-05-11  Martin Jambor  
>
>   PR ipa/108007
>   * cgraph.h (cgraph_edge): Add a parameter to
>   redirect_call_stmt_to_callee.
>   * ipa-param-manipulation.h (ipa_param_adjustments): Added a
>   parameter to modify_call.
>   * cgraph.cc (cgraph_edge::redirect_call_stmt_to_callee): New
>   parameter killed_ssas, pass it to padjs->modify_call.
>   * ipa-param-manipulation.cc (purge_transitive_uses): New function.
>   (ipa_param_adjustments::modify_call): New parameter killed_ssas.
>   Instead of substitutin uses, invoke purge_transitive_uses.  If
>   hash of killed SSAs has not been provided, create a temporary one
>   and release SSAs that have been added to it.
>   * tree-inline.cc (redirect_all_calls): Create
>   id->killed_new_ssa_names earlier, pass it to edge redirection,
>   adjust a comment.
>   (copy_body): Release SSAs in id->killed_new_ssa_names.
>
> gcc/testsuite/ChangeLog:
>
> 2023-05-11  Martin Jambor  
>
>   PR ipa/108007
>   * gcc.dg/ipa/pr108007.c: New test.
> ---
>  gcc/cgraph.cc   | 10 +++-
>  gcc/cgraph.h|  9 ++-
>  gcc/ipa-param-manipulation.cc   | 85 +
>  gcc/ipa-param-manipulation.h|  3 +-
>  gcc/testsuite/gcc.dg/ipa/pr108007.c | 32 +++
>  gcc/tree-inline.cc  | 28 ++
>  6 files changed, 129 insertions(+), 38 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.dg/ipa/pr108007.c
>
> diff --git a/gcc/cgraph.cc b/gcc/cgraph.cc
> index e8f9bec8227..5e923bf0557 100644
> --- a/gcc/cgraph.cc
> +++ b/gcc/cgraph.cc
> @@ -1403,11 +1403,17 @@ cgraph_edge::redirect_callee (cgraph_node *n)
> speculative indirect call, remove "speculative" of the indirect call and
> also redirect stmt to it's final direct target.
>  
> +   When called from within tree-inline, KILLED_SSAs has to contain the 
> pointer
> +   to killed_new_ssa_names within the copy_body_data structure and SSAs
> +   discovered to be useless (if LHS is removed) will be added to it, 
> otherwise
> +   it needs to be NULL.
> +
> It is up to caller to iteratively transform each "speculative"
> direct call as appropriate.  */
>  
>  gimple *
> -cgraph_edge::redirect_call_stmt_to_callee (cgraph_edge *e)
> +cgraph_edge::redirect_call_stmt_to_callee (cgraph_edge *e,
> +hash_set  *killed_ssas)
>  {
>tree decl = gimple_call_fndecl (e->call_stmt);
>gcall *new_stmt;
> @@ -1527,7 +1533,7 @@ cgraph_edge::redirect_call_stmt_to_callee (cgraph_edge 
> *e)
>   remove_stmt_from_eh_lp (e->call_stmt);
>  
>tree old_fntype = gimple_call_fntype (e->call_stmt);
> -  new_stmt = padjs->modify_call (e, false);
> +  new_stmt = padjs->modify_call (e, false, killed_ssas);
>cgraph_node *origin = e->callee;
>while (origin->clone_of)
>   origin = origin->clone_of;
> diff --git a/gcc/cgraph.h b/gcc/cgraph.h
> index f5f54769eda..c1a3691b6f5 100644
> --- a/gcc/cgraph.h
> +++ b/gcc/cgraph.h
> @@ -1833,9 +1833,16 @@ public:
>   speculative indirect call, remove "speculative" of the indirect call and
>   also redirect stmt to it's final direct target.
>  
> + When called from within tree-inline, KILLED_SSAs has to contain the
> + pointer to killed_new_ssa_names 

Re: [PATCH] ipa-sra: Allow IPA-SRA in presence of returns which will be removed

2023-09-01 Thread Martin Jambor
Hello

and ping.

Thanks,

Martin


On Fri, Aug 18 2023, Martin Jambor wrote:
> Hi,
>
> testing on 32bit arm revealed that even the simplest case of PR 110378
> was still not resolved there because destructors were returning this
> pointer.  Needless to say, the return value of those destructors often
> is just not used, which IPA-SRA can already detect in time.  Since
> such enhancement seems generally useful, here it is.
>
> The patch simply adds two flag to respective summaries to mark down
> situations when it encounters either a simple direct use of a default
> definition SSA_NAME of a parameter, which means that the parameter may
> still be split when return value is removed, and when any derived use
> of it is returned, allowing for complete removal in that case, instead
> of discarding it as a candidate for removal or splitting like we do
> now.  The IPA phase then simply checks that we indeed plan to remove
> the return value before allowing any transformation to be considered
> in such cases.
>
> Bootstrapped, LTO-bootstrapped and tested on x86_64-linux.  OK for
> master?
>
> Thanks,
>
> Martin
>
>
> gcc/ChangeLog:
>
> 2023-08-18  Martin Jambor  
>
>   PR ipa/110378
>   * ipa-param-manipulation.cc
>   (ipa_param_body_adjustments::mark_dead_statements): Verify that any
>   return uses of PARAM will be removed.
>   (ipa_param_body_adjustments::mark_clobbers_dead): Likewise.
>   * ipa-sra.cc (isra_param_desc): New fields
>   remove_only_when_retval_removed and split_only_when_retval_removed.
>   (struct gensum_param_desc): Likewise.  Fix comment long line.
>   (ipa_sra_function_summaries::duplicate): Copy the new flags.
>   (dump_gensum_param_descriptor): Dump the new flags.
>   (dump_isra_param_descriptor): Likewise.
>   (isra_track_scalar_value_uses): New parameter desc.  Set its flag
>   remove_only_when_retval_removed when encountering a simple return.
>   (isra_track_scalar_param_local_uses): Replace parameter call_uses_p
>   with desc.  Pass it to isra_track_scalar_value_uses and set its
>   call_uses.
>   (ptr_parm_has_nonarg_uses): Accept parameter descriptor as a
>   parameter.  If there is a direct return use, mark any..
>   (create_parameter_descriptors): Pass the whole parameter descriptor to
>   isra_track_scalar_param_local_uses and ptr_parm_has_nonarg_uses.
>   (process_scan_results): Copy the new flags.
>   (isra_write_node_summary): Stream the new flags.
>   (isra_read_node_info): Likewise.
>   (adjust_parameter_descriptions): Check that transformations
>   requring return removal only happen when return value is removed.
>   Restructure main loop.  Adjust dump message.
>
> gcc/testsuite/ChangeLog:
>
> 2023-08-18  Martin Jambor  
>
>   PR ipa/110378
>   * gcc.dg/ipa/ipa-sra-32.c: New test.
>   * gcc.dg/ipa/pr110378-4.c: Likewise.
>   * gcc.dg/ipa/ipa-sra-4.c: Use a return value.
> ---
>  gcc/ipa-param-manipulation.cc |   7 +-
>  gcc/ipa-sra.cc| 247 +-
>  gcc/testsuite/gcc.dg/ipa/ipa-sra-32.c |  30 
>  gcc/testsuite/gcc.dg/ipa/ipa-sra-4.c  |   4 +-
>  gcc/testsuite/gcc.dg/ipa/pr110378-4.c |  50 ++
>  5 files changed, 251 insertions(+), 87 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.dg/ipa/ipa-sra-32.c
>  create mode 100644 gcc/testsuite/gcc.dg/ipa/pr110378-4.c
>
> diff --git a/gcc/ipa-param-manipulation.cc b/gcc/ipa-param-manipulation.cc
> index 4a185ddbdf4..ae52f17b2c9 100644
> --- a/gcc/ipa-param-manipulation.cc
> +++ b/gcc/ipa-param-manipulation.cc
> @@ -1163,6 +1163,8 @@ ipa_param_body_adjustments::mark_dead_statements (tree 
> dead_param,
>   stack.safe_push (lhs);
>   }
>   }
> +   else if (gimple_code (stmt) == GIMPLE_RETURN)
> + gcc_assert (m_adjustments && m_adjustments->m_skip_return);
> else
>   /* IPA-SRA does not analyze other types of statements.  */
>   gcc_unreachable ();
> @@ -1182,7 +1184,8 @@ ipa_param_body_adjustments::mark_dead_statements (tree 
> dead_param,
>  }
>  
>  /* Put all clobbers of of dereference of default definition of PARAM into
> -   m_dead_stmts.  */
> +   m_dead_stmts.  If there are returns among uses of the default definition 
> of
> +   PARAM, verify they will be stripped off the return value.  */
>  
>  void
>  ipa_param_body_adjustments::mark_clobbers_dead (tree param)
> @@ -1200,6 +1203,8 @@ ipa_param_body_adjustments::mark_clobbers_dead (tree 
> param)
>   gimple *stmt = USE_STMT (use_p);
>   if (gimple_clobber_p (stmt))
> m_dead_stmts.add (stmt);
> + else if (gimple_code (stmt) == GIMPLE_RETURN)
> +   gcc_assert (m_adjustments && m_adjustments->m_skip_return);
> }
>  }
>  
> diff --git a/gcc/ipa-sra.cc b/gcc/ipa-sra.cc
> index edba364f56e..817f29ea62f 100644
> --- a/gcc/ipa-sra.cc
> +++ b/gcc/ipa-sra.cc
> @@ -185,6 +185,13 @@ struct GTY(()) 

[PATCH] c++, v2: Diagnose [basic.scope.block]/2 violations even in compound-stmt of function-try-block [PR52953]

2023-09-01 Thread Jakub Jelinek via Gcc-patches
On Thu, Aug 31, 2023 at 03:52:22PM -0400, Jason Merrill wrote:
> On 8/31/23 03:20, Jakub Jelinek wrote:
> > As the following testcase shows, while check_local_shadow diagnoses most of
> > the [basic.scope.block]/2 violations, it doesn't diagnose when parameter's
> > name is redeclared inside of the compound-stmt of a function-try-block.
> > 
> > There is in that case an extra scope (sk_try with parent artificial
> > sk_block with for FUNCTION_NEEDS_BODY_BLOCK another sk_block and only then
> > sk_function_param).
> > 
> > The following patch fixes that.
> > 
> > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
> > 
> > 2023-08-31  Jakub Jelinek  
> > 
> > PR c++/52953
> > * cp-tree.h (struct language_function): Add x_in_function_try_block
> > member.
> 
> How about adding a flag to cp_binding_level instead?  Maybe to mark the
> artificial sk_block level as such, which we could use for both this case and
> the FUNCTION_NEEDS_BODY_BLOCK cases.

So like this?

It actually changes behaviour on the
void foo (int x) try {} catch (int x) {} case, where previously
this triggered the
   || (TREE_CODE (old) == PARM_DECL
   && (current_binding_level->kind == sk_catch
   || current_binding_level->level_chain->kind == sk_catch)
   && in_function_try_handler))
{
  auto_diagnostic_group d;
  if (permerror (DECL_SOURCE_LOCATION (decl),
 "redeclaration of %q#D", decl))
inform (DECL_SOURCE_LOCATION (old),
"%q#D previously declared here", old);
diagnostics (note, just the current_binding_level->kind == sk_catch
case), while now it triggers already the earlier
  if (b->kind == sk_function_parms)
{
  error_at (DECL_SOURCE_LOCATION (decl),
"declaration of %q#D shadows a parameter", decl);
  inform (DECL_SOURCE_LOCATION (old),
  "%q#D previously declared here", old);
error.  If you think it is important to differentiate that,
I guess I could guard the while (b->artificial) loop with say
+ if (!in_function_try_handler
+ || current_binding_level->kind != sk_catch)
while (b->artificial)
  b = b->level_chain;
and adjust the 2 testcases.
Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk
or with modification?

2023-09-01  Jakub Jelinek  

PR c++/52953
* name-lookup.h (struct cp_binding_level): Add artificial bit-field.
Formatting fixes.
* name-lookup.cc (check_local_shadow): Skip artificial bindings when
checking if parameter scope is parent scope.  Don't special case
FUNCTION_NEEDS_BODY_BLOCK.
* decl.cc (begin_function_body): Set
current_binding_level->artificial.
* semantics.cc (begin_function_try_block): Likewise.

* g++.dg/diagnostic/redeclaration-3.C: New test.
* g++.dg/parse/pr31952-3.C: Expect different diagnostic wording.

--- gcc/cp/name-lookup.h.jj 2023-08-21 11:57:33.105460770 +0200
+++ gcc/cp/name-lookup.h2023-09-01 10:15:20.137943395 +0200
@@ -292,11 +292,11 @@ struct GTY(()) cp_binding_level {
   only valid if KIND == SK_TEMPLATE_PARMS.  */
   BOOL_BITFIELD explicit_spec_p : 1;
 
-  /* true means make a BLOCK for this level regardless of all else.  */
+  /* True means make a BLOCK for this level regardless of all else.  */
   unsigned keep : 1;
 
   /* Nonzero if this level can safely have additional
-  cleanup-needing variables added to it.  */
+ cleanup-needing variables added to it.  */
   unsigned more_cleanups_ok : 1;
   unsigned have_cleanups : 1;
 
@@ -308,9 +308,13 @@ struct GTY(()) cp_binding_level {
   unsigned defining_class_p : 1;
 
   /* True for SK_FUNCTION_PARMS of a requires-expression.  */
-  unsigned requires_expression: 1;
+  unsigned requires_expression : 1;
 
-  /* 22 bits left to fill a 32-bit word.  */
+  /* True for artificial blocks which should be ignored when finding
+ parent scope.  */
+  unsigned artificial : 1;
+
+  /* 21 bits left to fill a 32-bit word.  */
 };
 
 /* The binding level currently in effect.  */
--- gcc/cp/name-lookup.cc.jj2023-08-31 14:31:06.055762306 +0200
+++ gcc/cp/name-lookup.cc   2023-09-01 10:21:03.658118594 +0200
@@ -3146,8 +3146,10 @@ check_local_shadow (tree decl)
 them there.  */
  cp_binding_level *b = current_binding_level->level_chain;
 
- if (FUNCTION_NEEDS_BODY_BLOCK (current_function_decl))
-   /* Skip the ctor/dtor cleanup level.  */
+ /* Skip artificially added scopes which aren't present
+in the C++ standard, e.g. for function-try-block or
+ctor/dtor cleanups.  */
+ while (b->artificial)
b = b->level_chain;
 
  /* [basic.scope.param] A parameter name shall not be redeclared
--- gcc/cp/decl.cc.jj   2023-08-31 

Re: [PATCH] Add Types to Un-Typed Pic Instructions:

2023-09-01 Thread Jeff Law via Gcc-patches




On 8/31/23 17:01, Edwin Lu wrote:

Related Discussion:
https://inbox.sourceware.org/gcc-patches/12fb5088-3f28-0a69-de1e-f387371a5...@gmail.com/

This patch updates the pic instructions to ensure that no insn is left
without a type attribute.

Tested for regressions using rv32/64 multilib with newlib/linux.

gcc/Changelog:

* config/riscv/pic.md: Update types

OK.  THanks.
jeff


Re: [PATCH] RISC-V: zicond: remove bogus opt2 pattern

2023-09-01 Thread Jeff Law via Gcc-patches




On 8/31/23 11:57, Vineet Gupta wrote:



On 8/31/23 06:51, Jeff Law wrote:



On 8/30/23 15:57, Vineet Gupta wrote:

This was tripping up gcc.c-torture/execute/pr60003.c at -O1 since the
pattern semantics can't be expressed by zicond instructions.

This involves test code snippet:

   if (a == 0)
return 0;
   else
return x;
 }

which is equivalent to:  "x = (a != 0) ? x : a"

Isn't it

x = (a == 0) ? 0 : x

Which seems like it ought to fit zicond just fine.


Logically they are equivalent, but 



If we take yours;

x = (a != 0) ? x : a

And simplify with the known value of a on the false arm we get:

x = (a != 0 ) ? x : 0;

Which is equivalent to

x = (a == 0) ? 0 : x;

So ISTM this does fit zicond just fine.


I could very well be mistaken, but define_insn is a pattern match and 
opt2 has *ne* so the expression has to be in != form and thus needs to 
work with that condition. No ?

My point was  that

x = (a != 0) ? x : 0

is equivalent to

x = (a == 0) ? 0 : x

You can invert the condition and swap the arms and get the same 
semantics.  Thus if one can be supported, so can the other as they're 
functionally equivalent.  It may be the at we've goof'd something in 
handling the inverted case, but conceptually we ought to be able to 
handle both.


I don't doubt you've got a failure, but it's also the case that I'm not 
seeing the same failure when I turn on zicond and run the execute.exp 
tests.  So clearly there's a difference somewhere in what we're doing.


So perhaps we should start with comparing assembly output for the test 
in question.  Can you pass yours along, I'll diff them this afternoon 
and see what we find.


jeff


[PATCH v8 4/4] c++modules: report module mapper files as a dependency

2023-09-01 Thread Ben Boeckel via Gcc-patches
It affects the build, and if used as a static file, can reliably be
tracked using the `-MF` mechanism.

gcc/cp/:

* mapper-client.cc, mapper-client.h (open_module_client): Accept
dependency tracking and track module mapper files as
dependencies.
* module.cc (make_mapper, get_mapper): Pass the dependency
tracking class down.

gcc/testsuite/:

* g++.dg/modules/depreport-2.modmap: New test.
* g++.dg/modules/depreport-2_a.C: New test.
* g++.dg/modules/depreport-2_b.C: New test.
* g++.dg/modules/test-depfile.py: Support `:|` syntax output
when generating modules.

Signed-off-by: Ben Boeckel 
---
 gcc/cp/mapper-client.cc   |  5 +
 gcc/cp/mapper-client.h|  1 +
 gcc/cp/module.cc  | 18 -
 .../g++.dg/modules/depreport-2.modmap |  2 ++
 gcc/testsuite/g++.dg/modules/depreport-2_a.C  | 15 ++
 gcc/testsuite/g++.dg/modules/depreport-2_b.C  | 14 +
 gcc/testsuite/g++.dg/modules/test-depfile.py  | 20 +++
 7 files changed, 66 insertions(+), 9 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/modules/depreport-2.modmap
 create mode 100644 gcc/testsuite/g++.dg/modules/depreport-2_a.C
 create mode 100644 gcc/testsuite/g++.dg/modules/depreport-2_b.C

diff --git a/gcc/cp/mapper-client.cc b/gcc/cp/mapper-client.cc
index 39e80df2d25..92727195246 100644
--- a/gcc/cp/mapper-client.cc
+++ b/gcc/cp/mapper-client.cc
@@ -34,6 +34,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "diagnostic-core.h"
 #include "mapper-client.h"
 #include "intl.h"
+#include "mkdeps.h"
 
 #include "../../c++tools/resolver.h"
 
@@ -132,6 +133,7 @@ spawn_mapper_program (char const **errmsg, std::string 
,
 
 module_client *
 module_client::open_module_client (location_t loc, const char *o,
+  class mkdeps *deps,
   void (*set_repo) (const char *),
   char const *full_program_name)
 {
@@ -285,6 +287,9 @@ module_client::open_module_client (location_t loc, const 
char *o,
  errmsg = "opening";
else
  {
+   /* Add the mapper file to the dependency tracking. */
+   if (deps)
+ deps_add_dep (deps, name.c_str ());
if (int l = r->read_tuple_file (fd, ident, false))
  {
if (l > 0)
diff --git a/gcc/cp/mapper-client.h b/gcc/cp/mapper-client.h
index b32723ce296..a3b0b8adc51 100644
--- a/gcc/cp/mapper-client.h
+++ b/gcc/cp/mapper-client.h
@@ -55,6 +55,7 @@ public:
 
 public:
   static module_client *open_module_client (location_t loc, const char *option,
+   class mkdeps *,
void (*set_repo) (const char *),
char const *);
   static void close_module_client (location_t loc, module_client *);
diff --git a/gcc/cp/module.cc b/gcc/cp/module.cc
index f3acc4e02fe..77c9edcbc04 100644
--- a/gcc/cp/module.cc
+++ b/gcc/cp/module.cc
@@ -3969,12 +3969,12 @@ static GTY(()) vec 
*partial_specializations;
 /* Our module mapper (created lazily).  */
 module_client *mapper;
 
-static module_client *make_mapper (location_t loc);
-inline module_client *get_mapper (location_t loc)
+static module_client *make_mapper (location_t loc, class mkdeps *deps);
+inline module_client *get_mapper (location_t loc, class mkdeps *deps)
 {
   auto *res = mapper;
   if (!res)
-res = make_mapper (loc);
+res = make_mapper (loc, deps);
   return res;
 }
 
@@ -14033,7 +14033,7 @@ get_module (const char *ptr)
 /* Create a new mapper connecting to OPTION.  */
 
 module_client *
-make_mapper (location_t loc)
+make_mapper (location_t loc, class mkdeps *deps)
 {
   timevar_start (TV_MODULE_MAPPER);
   const char *option = module_mapper_name;
@@ -14041,7 +14041,7 @@ make_mapper (location_t loc)
 option = getenv ("CXX_MODULE_MAPPER");
 
   mapper = module_client::open_module_client
-(loc, option, _cmi_repo,
+(loc, option, deps, _cmi_repo,
  (save_decoded_options[0].opt_index == OPT_SPECIAL_program_name)
  && save_decoded_options[0].arg != progname
  ? save_decoded_options[0].arg : nullptr);
@@ -19506,7 +19506,7 @@ maybe_translate_include (cpp_reader *reader, line_maps 
*lmaps, location_t loc,
   dump.push (NULL);
 
   dump () && dump ("Checking include translation '%s'", path);
-  auto *mapper = get_mapper (cpp_main_loc (reader));
+  auto *mapper = get_mapper (cpp_main_loc (reader), cpp_get_deps (reader));
 
   size_t len = strlen (path);
   path = canonicalize_header_name (NULL, loc, true, path, len);
@@ -19622,7 +19622,7 @@ module_begin_main_file (cpp_reader *reader, line_maps 
*lmaps,
 static void
 name_pending_imports (cpp_reader *reader)
 {
-  auto *mapper = get_mapper (cpp_main_loc (reader));
+  auto *mapper = get_mapper (cpp_main_loc 

[PATCH v8 3/4] c++modules: report imported CMI files as dependencies

2023-09-01 Thread Ben Boeckel via Gcc-patches
They affect the build, so report them via `-MF` mechanisms.

gcc/cp/

* module.cc (do_import): Report imported CMI files as
dependencies.

gcc/testsuite/

* g++.dg/modules/depreport-1_a.C: New test.
* g++.dg/modules/depreport-1_b.C: New test.
* g++.dg/modules/test-depfile.py: New tool for validating depfile
information.
* lib/modules.exp: Support for validating depfile contents.

Signed-off-by: Ben Boeckel 
---
 gcc/cp/module.cc |   3 +
 gcc/testsuite/g++.dg/modules/depreport-1_a.C |  10 +
 gcc/testsuite/g++.dg/modules/depreport-1_b.C |  12 ++
 gcc/testsuite/g++.dg/modules/test-depfile.py | 187 +++
 gcc/testsuite/lib/modules.exp|  29 +++
 5 files changed, 241 insertions(+)
 create mode 100644 gcc/testsuite/g++.dg/modules/depreport-1_a.C
 create mode 100644 gcc/testsuite/g++.dg/modules/depreport-1_b.C
 create mode 100644 gcc/testsuite/g++.dg/modules/test-depfile.py

diff --git a/gcc/cp/module.cc b/gcc/cp/module.cc
index 9df60d695b1..f3acc4e02fe 100644
--- a/gcc/cp/module.cc
+++ b/gcc/cp/module.cc
@@ -18968,6 +18968,9 @@ module_state::do_import (cpp_reader *reader, bool 
outermost)
   dump () && dump ("CMI is %s", file);
   if (note_module_cmi_yes || inform_cmi_p)
inform (loc, "reading CMI %qs", file);
+  /* Add the CMI file to the dependency tracking. */
+  if (cpp_get_deps (reader))
+   deps_add_dep (cpp_get_deps (reader), file);
   fd = open (file, O_RDONLY | O_CLOEXEC | O_BINARY);
   e = errno;
 }
diff --git a/gcc/testsuite/g++.dg/modules/depreport-1_a.C 
b/gcc/testsuite/g++.dg/modules/depreport-1_a.C
new file mode 100644
index 000..241701728a2
--- /dev/null
+++ b/gcc/testsuite/g++.dg/modules/depreport-1_a.C
@@ -0,0 +1,10 @@
+// { dg-additional-options -fmodules-ts }
+
+export module Foo;
+// { dg-module-cmi Foo }
+
+export class Base
+{
+public:
+  int m;
+};
diff --git a/gcc/testsuite/g++.dg/modules/depreport-1_b.C 
b/gcc/testsuite/g++.dg/modules/depreport-1_b.C
new file mode 100644
index 000..b6e317c6703
--- /dev/null
+++ b/gcc/testsuite/g++.dg/modules/depreport-1_b.C
@@ -0,0 +1,12 @@
+// { dg-additional-options -fmodules-ts }
+// { dg-additional-options -MD }
+// { dg-additional-options "-MF depreport-1.d" }
+
+import Foo;
+
+void foo ()
+{
+  Base b;
+}
+
+// { dg-final { run-check-module-dep-expect-input "depreport-1.d" 
"gcm.cache/Foo.gcm" } }
diff --git a/gcc/testsuite/g++.dg/modules/test-depfile.py 
b/gcc/testsuite/g++.dg/modules/test-depfile.py
new file mode 100644
index 000..ea4edb61434
--- /dev/null
+++ b/gcc/testsuite/g++.dg/modules/test-depfile.py
@@ -0,0 +1,187 @@
+import json
+
+
+# Parameters.
+ALL_ERRORS = False
+
+
+def _report_error(msg):
+'''Report an error.'''
+full_msg = 'ERROR: ' + msg
+if ALL_ERRORS:
+print(full_msg)
+else:
+raise RuntimeError(full_msg)
+
+
+class Token(object):
+pass
+
+
+class Output(Token):
+def __init__(self, path):
+self.path = path
+
+
+class Input(Token):
+def __init__(self, path):
+self.path = path
+
+
+class Colon(Token):
+pass
+
+
+class Append(Token):
+pass
+
+
+class Variable(Token):
+def __init__(self, name):
+self.name = name
+
+
+class Word(Token):
+def __init__(self, name):
+self.name = name
+
+
+def validate_depfile(depfile, expect_input=None):
+'''Validate a depfile contains some information
+
+Returns `False` if the information is not found.
+'''
+with open(depfile, 'r') as fin:
+depfile_content = fin.read()
+
+real_lines = []
+join_line = False
+for line in depfile_content.split('\n'):
+# Join the line if needed.
+if join_line:
+line = real_lines.pop() + line
+
+# Detect line continuations.
+join_line = line.endswith('\\')
+# Strip line continuation characters.
+if join_line:
+line = line[:-1]
+
+# Add to the real line set.
+real_lines.append(line)
+
+# Perform tokenization.
+tokenized_lines = []
+for line in real_lines:
+tokenized = []
+join_word = False
+for word in line.split(' '):
+if join_word:
+word = tokenized.pop() + ' ' + word
+
+# Detect word joins.
+join_word = word.endswith('\\')
+# Strip escape character.
+if join_word:
+word = word[:-1]
+
+# Detect `:` at the end of a word.
+if word.endswith(':'):
+tokenized.append(word[:-1])
+word = word[-1]
+
+# Add word to the tokenized set.
+tokenized.append(word)
+
+tokenized_lines.append(tokenized)
+
+# Parse.
+ast = []
+for line in tokenized_lines:
+kind = None
+for token in line:
+if token == ':':
+kind = 'dependency'
+

[PATCH v8 2/4] p1689r5: initial support

2023-09-01 Thread Ben Boeckel via Gcc-patches
This patch implements support for [P1689R5][] to communicate to a build
system the C++20 module dependencies to build systems so that they may
build `.gcm` files in the proper order.

Support is communicated through the following three new flags:

- `-fdeps-format=` specifies the format for the output. Currently named
  `p1689r5`.

- `-fdeps-file=` specifies the path to the file to write the format to.

- `-fdeps-target=` specifies the `.o` that will be written for the TU
  that is scanned. This is required so that the build system can
  correlate the dependency output with the actual compilation that will
  occur.

CMake supports this format as of 17 Jun 2022 (to be part of 3.25.0)
using an experimental feature selection (to allow for future usage
evolution without committing to how it works today). While it remains
experimental, docs may be found in CMake's documentation for
experimental features.

Future work may include using this format for Fortran module
dependencies as well, however this is still pending work.

[P1689R5]: https://isocpp.org/files/papers/P1689R5.html
[cmake-experimental]: 
https://gitlab.kitware.com/cmake/cmake/-/blob/master/Help/dev/experimental.rst

TODO:

- header-unit information fields

Header units (including the standard library headers) are 100%
unsupported right now because the `-E` mechanism wants to import their
BMIs. A new mode (i.e., something more workable than existing `-E`
behavior) that mocks up header units as if they were imported purely
from their path and content would be required.

- non-utf8 paths

The current standard says that paths that are not unambiguously
represented using UTF-8 are not supported (because these cases are rare
and the extra complication is not worth it at this time). Future
versions of the format might have ways of encoding non-UTF-8 paths. For
now, this patch just doesn't support non-UTF-8 paths (ignoring the
"unambiguously representable in UTF-8" case).

- figure out why junk gets placed at the end of the file

Sometimes it seems like the file gets a lot of `NUL` bytes appended to
it. It happens rarely and seems to be the result of some
`ftruncate`-style call which results in extra padding in the contents.
Noting it here as an observation at least.

libcpp/

* include/cpplib.h: Add cpp_fdeps_format enum.
(cpp_options): Add fdeps_format field
(cpp_finish): Add structured dependency fdeps_stream parameter.
* include/mkdeps.h (deps_add_module_target): Add flag for
whether a module is exported or not.
(fdeps_add_target): Add function.
(deps_write_p1689r5): Add function.
* init.cc (cpp_finish): Add new preprocessor parameter used for C++
module tracking.
* mkdeps.cc (mkdeps): Implement P1689R5 output.

gcc/

* doc/invoke.texi: Document -fdeps-format=, -fdeps-file=, and
-fdeps-target= flags.
* gcc.cc: add defaults for -fdeps-target= and -fdeps-file= when
only -fdeps-format= is specified.
* json.h: Add a TODO item to refactor out to share with
`libcpp/mkdeps.cc`.

gcc/c-family/

* c-opts.cc (c_common_handle_option): Add fdeps_file variable and
-fdeps-format=, -fdeps-file=, and -fdeps-target= parsing.
* c.opt: Add -fdeps-format=, -fdeps-file=, and -fdeps-target=
flags.

gcc/cp/

* module.cc (preprocessed_module): Pass whether the module is
exported to dependency tracking.

gcc/testsuite/

* g++.dg/modules/depflags-f-MD.C: New test.
* g++.dg/modules/depflags-f.C: New test.
* g++.dg/modules/depflags-fi.C: New test.
* g++.dg/modules/depflags-fj-MD.C: New test.
* g++.dg/modules/depflags-fj.C: New test.
* g++.dg/modules/depflags-fjo-MD.C: New test.
* g++.dg/modules/depflags-fjo.C: New test.
* g++.dg/modules/depflags-fo-MD.C: New test.
* g++.dg/modules/depflags-fo.C: New test.
* g++.dg/modules/depflags-j-MD.C: New test.
* g++.dg/modules/depflags-j.C: New test.
* g++.dg/modules/depflags-jo-MD.C: New test.
* g++.dg/modules/depflags-jo.C: New test.
* g++.dg/modules/depflags-o-MD.C: New test.
* g++.dg/modules/depflags-o.C: New test.
* g++.dg/modules/p1689-1.C: New test.
* g++.dg/modules/p1689-1.exp.ddi: New test expectation.
* g++.dg/modules/p1689-2.C: New test.
* g++.dg/modules/p1689-2.exp.ddi: New test expectation.
* g++.dg/modules/p1689-3.C: New test.
* g++.dg/modules/p1689-3.exp.ddi: New test expectation.
* g++.dg/modules/p1689-4.C: New test.
* g++.dg/modules/p1689-4.exp.ddi: New test expectation.
* g++.dg/modules/p1689-5.C: New test.
* g++.dg/modules/p1689-5.exp.ddi: New test expectation.
* g++.dg/modules/modules.exp: Load new P1689 library routines.
* g++.dg/modules/test-p1689.py: New tool for validating P1689 output.
* lib/modules.exp: Support for 

[PATCH v8 0/4] P1689R5 support

2023-09-01 Thread Ben Boeckel via Gcc-patches
Hi,

This patch series adds initial support for ISO C++'s [P1689R5][], a
format for describing C++ module requirements and provisions based on
the source code. This is required because compiling C++ with modules is
not embarrassingly parallel and need to be ordered to ensure that
`import some_module;` can be satisfied in time by making sure that any
TU with `export import some_module;` is compiled first.

[P1689R5]: https://isocpp.org/files/papers/P1689R5.html

I've also added patches to include imported module CMI files and the
module mapper file as dependencies of the compilation. I briefly looked
into adding dependencies on response files as well, but that appeared to
need some code contortions to have a `class mkdeps` available before
parsing the command line or to keep the information around until one was
made.

I'd like feedback on the approach taken here with respect to the
user-visible flags. I'll also note that header units are not supported
at this time because the current `-E` behavior with respect to `import
;` is to search for an appropriate `.gcm` file which is not
something such a "scan" can support. A new mode will likely need to be
created (e.g., replacing `-E` with `-fc++-module-scanning` or something)
where headers are looked up "normally" and processed only as much as
scanning requires.

FWIW, Clang as taken an alternate approach with its `clang-scan-deps`
tool rather than using the compiler directly.

Thanks,

--Ben

---
v7 -> v8:

- rename `DEPS_FMT_` enum variants to `FDEPS_FMT_` to match the
  associated flag
- memory leak fix in the `join` specfunc implementation (also better
  comments), both from Jason
- formatting fix in `mkdeps.cc` for `write_make_modules_deps` assignment
- comments on new functions for P1689R5 implementation

v6 -> v7:

- rebase onto `master` (80ae426a195 (d: Fix core.volatile.volatileLoad
  discarded if result is unused, 2023-07-02))
- add test cases for patches 3 and 4 (new dependency reporting in `-MF`)
- add a Python script to test aspects of generated dependency files
- a new `join` spec function to support `-fdeps-*` defaults based on the
  `-o` flag (needed to strip the leading space that appears otherwise)
- note that JSON writing support should be factored out for use by
  `libcpp` and `gcc` (libiberty?)
- use `.ddi` for the extension of `-fdeps-*` output files by default
- support defaults for `-fdeps-file=` and `-fdeps-target=` when only
  `-fdeps-format=` is provided (with tests)
- error if `-MF` and `-fdeps-file=` are both the same (non-`stdout`)
  file as their formats are incompatible
- expand the documentation on how the `-fdeps-*` flags should be used

v5 -> v6:

- rebase onto `master` (585c660f041 (reload1: Change return type of
  predicate function from int to bool, 2023-06-06))
- fix crash related to reporting imported CMI files as dependencies
- rework utf-8 validity to patch the new `cpp_valid_utf8_p` function
  instead of the core utf-8 decoding routine to reject invalid
  codepoints (preserves higher-level error detection of invalid utf-8)
- harmonize of `fdeps` spelling in flags, variables, comments, etc.
- rename `-fdeps-output=` to `-fdeps-target=`

v4 -> v5:

- add dependency tracking for imported modules to `-MF`
- add dependency tracking for static module mapper files given to
  `-fmodule-mapper=`

v3 -> v4:

- add missing spaces between function names and arguments

v2 -> v3:

- changelog entries moved to commit messages
- documentation updated/added in the UTF-8 routine editing

v1 -> v2:

- removal of the `deps_write(extra)` parameter to option-checking where
  ndeeded
- default parameter of `cpp_finish(fdeps_stream = NULL)`
- unification of libcpp UTF-8 validity functions from v1
- test cases for flag parsing states (depflags-*) and p1689 output
  (p1689-*)

Ben Boeckel (4):
  spec: add a spec function to join arguments
  p1689r5: initial support
  c++modules: report imported CMI files as dependencies
  c++modules: report module mapper files as a dependency

 gcc/c-family/c-opts.cc|  44 +++-
 gcc/c-family/c.opt|  12 +
 gcc/cp/mapper-client.cc   |   5 +
 gcc/cp/mapper-client.h|   1 +
 gcc/cp/module.cc  |  24 +-
 gcc/doc/invoke.texi   |  27 +++
 gcc/gcc.cc|  27 ++-
 gcc/json.h|   3 +
 gcc/testsuite/g++.dg/modules/depflags-f-MD.C  |   2 +
 gcc/testsuite/g++.dg/modules/depflags-f.C |   3 +
 gcc/testsuite/g++.dg/modules/depflags-fi.C|   4 +
 gcc/testsuite/g++.dg/modules/depflags-fj-MD.C |   3 +
 .../g++.dg/modules/depflags-fj-MF-share.C |   6 +
 gcc/testsuite/g++.dg/modules/depflags-fj.C|   4 +
 .../g++.dg/modules/depflags-fjo-MD.C  |   4 +
 gcc/testsuite/g++.dg/modules/depflags-fjo.C   |   5 +
 gcc/testsuite/g++.dg/modules/depflags-fo-MD.C |   3 +
 gcc/testsuite/g++.dg/modules/depflags-fo.C 

[PATCH v8 1/4] spec: add a spec function to join arguments

2023-09-01 Thread Ben Boeckel via Gcc-patches
When passing `-o` flags to other options, the typical `-o foo` spelling
leaves a leading whitespace when replacing elsewhere. This ends up
creating flags spelled as `-some-option-with-arg= foo.ext` which doesn't
parse properly. When attempting to make a spec function to just remove
the leading whitespace, the argument splitting ends up masking the
whitespace. However, the intended extension *also* ends up being its own
argument. To perform the desired behavior, the arguments need to be
concatenated together.

gcc/:

* gcc.cc (join_spec_func): Add a spec function to join all
arguments.

Signed-off-by: Ben Boeckel 
---
 gcc/gcc.cc | 23 +++
 1 file changed, 23 insertions(+)

diff --git a/gcc/gcc.cc b/gcc/gcc.cc
index fdfac0b4fe4..4c4e81dee50 100644
--- a/gcc/gcc.cc
+++ b/gcc/gcc.cc
@@ -447,6 +447,7 @@ static const char *greater_than_spec_func (int, const char 
**);
 static const char *debug_level_greater_than_spec_func (int, const char **);
 static const char *dwarf_version_greater_than_spec_func (int, const char **);
 static const char *find_fortran_preinclude_file (int, const char **);
+static const char *join_spec_func (int, const char **);
 static char *convert_white_space (char *);
 static char *quote_spec (char *);
 static char *quote_spec_arg (char *);
@@ -1772,6 +1773,7 @@ static const struct spec_function static_spec_functions[] 
=
   { "debug-level-gt",  debug_level_greater_than_spec_func },
   { "dwarf-version-gt",dwarf_version_greater_than_spec_func },
   { "fortran-preinclude-file", find_fortran_preinclude_file},
+  { "join",join_spec_func},
 #ifdef EXTRA_SPEC_FUNCTIONS
   EXTRA_SPEC_FUNCTIONS
 #endif
@@ -10975,6 +10977,27 @@ find_fortran_preinclude_file (int argc, const char 
**argv)
   return result;
 }
 
+/* The function takes any number of arguments and joins them together.
+
+   This seems to be necessary to build "-fjoined=foo.b" from "-fseparate foo.a"
+   with a %{fseparate*:-fjoined=%.b$*} rule without adding undesired spaces:
+   when doing $* replacement we first replace $* with the rest of the switch
+   (in this case ""), and then add any arguments as arguments after the result,
+   resulting in "-fjoined= foo.b".  Using this function with e.g.
+   %{fseparate*:-fjoined=%:join(%.b$*)} gets multiple words as separate argv
+   elements instead of separated by spaces, and we paste them together.  */
+
+static const char *
+join_spec_func (int argc, const char **argv)
+{
+  if (argc == 1)
+return argv[0];
+  for (int i = 0; i < argc; ++i)
+obstack_grow (, argv[i], strlen (argv[i]));
+  obstack_1grow (, '\0');
+  return XOBFINISH (, const char *);
+}
+
 /* If any character in ORIG fits QUOTE_P (_, P), reallocate the string
so as to precede every one of them with a backslash.  Return the
original string or the reallocated one.  */
-- 
2.41.0



[PING][PATCH v2 2/2] libstdc++: Use new built-in trait __is_signed

2023-09-01 Thread Ken Matsui via Gcc-patches
Ping for the use of __is_signed built-in.

Sincerely,
Ken Matsui


On Wed, Jul 12, 2023 at 6:45 PM Ken Matsui  wrote:
>
> This patch lets libstdc++ use new built-in trait __is_signed.
>
> libstdc++-v3/ChangeLog:
>
> * include/std/type_traits (is_signed): Use __is_signed built-in trait.
> (is_signed_v): Likewise.
>
> Signed-off-by: Ken Matsui 
> ---
>  libstdc++-v3/include/std/type_traits | 15 ++-
>  1 file changed, 14 insertions(+), 1 deletion(-)
>
> diff --git a/libstdc++-v3/include/std/type_traits 
> b/libstdc++-v3/include/std/type_traits
> index 0e7a9c9c7f3..23ab5a4b1e5 100644
> --- a/libstdc++-v3/include/std/type_traits
> +++ b/libstdc++-v3/include/std/type_traits
> @@ -865,6 +865,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>  : public __bool_constant<__is_abstract(_Tp)>
>  { };
>
> +  /// is_signed
> +#if __has_builtin(__is_signed)
> +  template
> +struct is_signed
> +: public __bool_constant<__is_signed(_Tp)>
> +{ };
> +#else
>/// @cond undocumented
>templatebool = is_arithmetic<_Tp>::value>
> @@ -877,11 +884,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>  { };
>/// @endcond
>
> -  /// is_signed
>template
>  struct is_signed
>  : public __is_signed_helper<_Tp>::type
>  { };
> +#endif
>
>/// is_unsigned
>template
> @@ -3240,8 +3247,14 @@ template 
>  template 
>inline constexpr bool is_final_v = __is_final(_Tp);
>
> +#if __has_builtin(__is_signed)
> +template 
> +  inline constexpr bool is_signed_v = __is_signed(_Tp);
> +#else
>  template 
>inline constexpr bool is_signed_v = is_signed<_Tp>::value;
> +#endif
> +
>  template 
>inline constexpr bool is_unsigned_v = is_unsigned<_Tp>::value;
>
> --
> 2.41.0
>


Re: [x86_64 PATCH] Improve __int128 argument passing (in ix86_expand_move).

2023-09-01 Thread Manolis Tsamis
On Fri, Sep 1, 2023 at 3:35 PM Roger Sayle  wrote:
>
>
> Hi Manolis,
> Many thanks.  If you haven't already, could you create/file a
> bug report at https://gcc.gnu.org/bugzilla/ which ensures this
> doesn't get lost/forgotten.  It provides a PR number for tracking
> discussions, and patches/fixes with PR numbers are (often)
> prioritized during the review and approval process.
>
Sure, opened as PR111267.

> I'll investigate what's going on.  Either my "improvements"
> need to be disabled for V2SF arguments, or the middle/back
> end needs to figure out how to efficiently shuffle these values,
> without reload moving them via integer registers, at least as
> efficiently as we were before.  As you/clang show, we could
> do better.
>
Sounds good, thanks a lot!
Manolis

> Thanks again, and sorry for any inconvenience.
> Best regards,
> Roger
> --
>
> > -Original Message-
> > From: Manolis Tsamis 
> > Sent: 01 September 2023 11:45
> > To: Uros Bizjak 
> > Cc: Roger Sayle ; gcc-patches@gcc.gnu.org
> > Subject: Re: [x86_64 PATCH] Improve __int128 argument passing (in
> > ix86_expand_move).
> >
> > Hi Roger,
> >
> > I've (accidentally) found a codegen regression that I bisected down to this 
> > patch.
> > For these two functions:
> >
> > typedef struct {
> >   float minx, miny;
> >   float maxx, maxy;
> > } AABB;
> >
> > int TestOverlap(AABB a, AABB b) {
> >   return a.minx <= b.maxx
> >   && a.miny <= b.maxy
> >   && a.maxx >= b.minx
> >   && a.maxx >= b.minx;
> > }
> >
> > int TestOverlap2(AABB a, AABB b) {
> >   return a.miny <= b.maxy
> >   && a.maxx >= b.minx;
> > }
> >
> > GCC used to produce this code:
> >
> > TestOverlap:
> > comiss  xmm3, xmm0
> > movqrdx, xmm0
> > movqrsi, xmm1
> > movqrax, xmm3
> > jb  .L10
> > shr rdx, 32
> > shr rax, 32
> > movdxmm0, eax
> > movdxmm4, edx
> > comiss  xmm0, xmm4
> > jb  .L10
> > movdxmm1, esi
> > xor eax, eax
> > comiss  xmm1, xmm2
> > setnb   al
> > ret
> > .L10:
> > xor eax, eax
> > ret
> > TestOverlap2:
> > shufps  xmm0, xmm0, 85
> > shufps  xmm3, xmm3, 85
> > comiss  xmm3, xmm0
> > jb  .L17
> > xor eax, eax
> > comiss  xmm1, xmm2
> > setnb   al
> > ret
> > .L17:
> > xor eax, eax
> > ret
> >
> > After this patch codegen gets much worse:
> >
> > TestOverlap:
> > movqrax, xmm1
> > movqrdx, xmm2
> > movqrsi, xmm0
> > mov rdi, rax
> > movqrax, xmm3
> > mov rcx, rsi
> > xchgrdx, rax
> > movdxmm1, edx
> > mov rsi, rax
> > mov rax, rdx
> > comiss  xmm1, xmm0
> > jb  .L10
> > shr rcx, 32
> > shr rax, 32
> > movdxmm0, eax
> > movdxmm4, ecx
> > comiss  xmm0, xmm4
> > jb  .L10
> > movdxmm0, esi
> > movdxmm1, edi
> > xor eax, eax
> > comiss  xmm1, xmm0
> > setnb   al
> > ret
> > .L10:
> > xor eax, eax
> > ret
> > TestOverlap2:
> > movqrdx, xmm2
> > movqrax, xmm3
> > movqrsi, xmm0
> > xchgrdx, rax
> > mov rcx, rsi
> > mov rsi, rax
> > mov rax, rdx
> > shr rcx, 32
> > shr rax, 32
> > movdxmm4, ecx
> > movdxmm0, eax
> > comiss  xmm0, xmm4
> > jb  .L17
> > movdxmm0, esi
> > xor eax, eax
> > comiss  xmm1, xmm0
> > setnb   al
> > ret
> > .L17:
> > xor eax, eax
> > ret
> >
> > I saw that you've been improving i386 argument passing, so maybe this is 
> > just a
> > missed case of these additions?
> >
> > (Can also be seen here https://godbolt.org/z/E4xrEn6KW)
> >
> > PS: I found the code that clang generates, with cmpleps + pextrw to avoid 
> > the fp-
> > >int->fp + shr interesting. I wonder if something like this could be added 
> > >to GCC as
> > well.
> >
> > Thanks!
> > Manolis
> >
> > On Thu, Jul 6, 2023 at 5:21 PM Uros Bizjak via Gcc-patches  > patc...@gcc.gnu.org> wrote:
> > >
> > > On Thu, Jul 6, 2023 at 3:48 PM Roger Sayle 
> > wrote:
> > > >
> > > > > On Thu, Jul 6, 2023 at 2:04 PM Roger Sayle
> > > > > 
> > > > > wrote:
> > > > > >
> > > > > >
> > > > > > Passing 128-bit integer (TImode) parameters on x86_64 can
> > > > > > sometimes result in surprising code.  Consider the example below 
> > > > > > (from PR
> > 43644):
> > > > > >
> > > > > > __uint128 foo(__uint128 x, unsigned long long y) {
> > > > > >   return x+y;
> > > > > > }
> > > > > >
> > > > > > which currently results in 6 consecutive movq instructions:
> > > > > >
> > > > > > foo:movq%rsi, %rax
> > > 

[PING][PATCH v6 2/2] libstdc++: Use new built-in trait __is_pointer

2023-09-01 Thread Ken Matsui via Gcc-patches
Ping for the use of __is_pointer built-in.

Sincerely,
Ken Matsui

On Thu, Jul 13, 2023 at 1:49 PM Ken Matsui  wrote:
>
> This patch lets libstdc++ use new built-in trait __is_pointer.
>
> libstdc++-v3/ChangeLog:
>
> * include/bits/cpp_type_traits.h (__is_ptr): Use __is_pointer
> built-in trait.
> * include/std/type_traits (is_pointer): Likewise. Optimize its
> implementation.
> (is_pointer_v): Likewise.
>
> Co-authored-by: Jonathan Wakely 
> Signed-off-by: Ken Matsui 
> ---
>  libstdc++-v3/include/bits/cpp_type_traits.h |  8 
>  libstdc++-v3/include/std/type_traits| 44 +
>  2 files changed, 44 insertions(+), 8 deletions(-)
>
> diff --git a/libstdc++-v3/include/bits/cpp_type_traits.h 
> b/libstdc++-v3/include/bits/cpp_type_traits.h
> index 3711e4be526..4da1e7c407c 100644
> --- a/libstdc++-v3/include/bits/cpp_type_traits.h
> +++ b/libstdc++-v3/include/bits/cpp_type_traits.h
> @@ -363,6 +363,13 @@ __INT_N(__GLIBCXX_TYPE_INT_N_3)
>//
>// Pointer types
>//
> +#if __has_builtin(__is_pointer)
> +  template
> +struct __is_ptr : __truth_type<__is_pointer(_Tp)>
> +{
> +  enum { __value = __is_pointer(_Tp) };
> +};
> +#else
>template
>  struct __is_ptr
>  {
> @@ -376,6 +383,7 @@ __INT_N(__GLIBCXX_TYPE_INT_N_3)
>enum { __value = 1 };
>typedef __true_type __type;
>  };
> +#endif
>
>//
>// An arithmetic type is an integer type or a floating point type
> diff --git a/libstdc++-v3/include/std/type_traits 
> b/libstdc++-v3/include/std/type_traits
> index 0e7a9c9c7f3..16b2f6de536 100644
> --- a/libstdc++-v3/include/std/type_traits
> +++ b/libstdc++-v3/include/std/type_traits
> @@ -515,19 +515,33 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>  struct is_array<_Tp[]>
>  : public true_type { };
>
> -  template
> -struct __is_pointer_helper
> +  /// is_pointer
> +#if __has_builtin(__is_pointer)
> +  template
> +struct is_pointer
> +: public __bool_constant<__is_pointer(_Tp)>
> +{ };
> +#else
> +  template
> +struct is_pointer
>  : public false_type { };
>
>template
> -struct __is_pointer_helper<_Tp*>
> +struct is_pointer<_Tp*>
>  : public true_type { };
>
> -  /// is_pointer
>template
> -struct is_pointer
> -: public __is_pointer_helper<__remove_cv_t<_Tp>>::type
> -{ };
> +struct is_pointer<_Tp* const>
> +: public true_type { };
> +
> +  template
> +struct is_pointer<_Tp* volatile>
> +: public true_type { };
> +
> +  template
> +struct is_pointer<_Tp* const volatile>
> +: public true_type { };
> +#endif
>
>/// is_lvalue_reference
>template
> @@ -3168,8 +3182,22 @@ template 
>  template 
>inline constexpr bool is_array_v<_Tp[_Num]> = true;
>
> +#if __has_builtin(__is_pointer)
> +template 
> +  inline constexpr bool is_pointer_v = __is_pointer(_Tp);
> +#else
>  template 
> -  inline constexpr bool is_pointer_v = is_pointer<_Tp>::value;
> +  inline constexpr bool is_pointer_v = false;
> +template 
> +  inline constexpr bool is_pointer_v<_Tp*> = true;
> +template 
> +  inline constexpr bool is_pointer_v<_Tp* const> = true;
> +template 
> +  inline constexpr bool is_pointer_v<_Tp* volatile> = true;
> +template 
> +  inline constexpr bool is_pointer_v<_Tp* const volatile> = true;
> +#endif
> +
>  template 
>inline constexpr bool is_lvalue_reference_v = false;
>  template 
> --
> 2.41.0
>


[PING][PATCH v3 2/2] libstdc++: use new built-in trait __is_array

2023-09-01 Thread Ken Matsui via Gcc-patches
Ping for the use of __is_array built-in.

Sincerely,
Ken Matsui

On Fri, Jul 7, 2023 at 10:24 PM Ken Matsui  wrote:
>
> This patch lets libstdc++ use new built-in trait __is_array.
>
> libstdc++-v3/ChangeLog:
>
> * include/std/type_traits (is_array): Use __is_array built-in trait.
> (is_array_v): Likewise.
>
> Signed-off-by: Ken Matsui 
> ---
>  libstdc++-v3/include/std/type_traits | 12 
>  1 file changed, 12 insertions(+)
>
> diff --git a/libstdc++-v3/include/std/type_traits 
> b/libstdc++-v3/include/std/type_traits
> index 0e7a9c9c7f3..f2a3a327e7d 100644
> --- a/libstdc++-v3/include/std/type_traits
> +++ b/libstdc++-v3/include/std/type_traits
> @@ -503,6 +503,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>  { };
>
>/// is_array
> +#if __has_builtin(__is_array)
> +  template
> +struct is_array
> +: public __bool_constant<__is_array(_Tp)>
> +{ };
> +#else
>template
>  struct is_array
>  : public false_type { };
> @@ -514,6 +520,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>template
>  struct is_array<_Tp[]>
>  : public true_type { };
> +#endif
>
>template
>  struct __is_pointer_helper
> @@ -3161,12 +3168,17 @@ template 
>  template 
>inline constexpr bool is_floating_point_v = is_floating_point<_Tp>::value;
>
> +#if __has_builtin(__is_array)
> +template 
> +  inline constexpr bool is_array_v = __is_array(_Tp);
> +#else
>  template 
>inline constexpr bool is_array_v = false;
>  template 
>inline constexpr bool is_array_v<_Tp[]> = true;
>  template 
>inline constexpr bool is_array_v<_Tp[_Num]> = true;
> +#endif
>
>  template 
>inline constexpr bool is_pointer_v = is_pointer<_Tp>::value;
> --
> 2.41.0
>


[PING][PATCH v10 5/5] libstdc++: Make std::is_object dispatch to new built-in traits

2023-09-01 Thread Ken Matsui via Gcc-patches
Ping for std::is_object.

Sincerely,
Ken Matsui

On Wed, Jul 12, 2023 at 7:42 PM Ken Matsui  wrote:
>
> This patch gets std::is_object to dispatch to new built-in traits,
> __is_function and __is_reference.
>
> libstdc++-v3/ChangeLog:
> * include/std/type_traits (is_object): Use new built-in traits,
> __is_function and __is_reference.
> (is_object_v): Likewise.
>
> Signed-off-by: Ken Matsui 
> ---
>  libstdc++-v3/include/std/type_traits | 17 +
>  1 file changed, 17 insertions(+)
>
> diff --git a/libstdc++-v3/include/std/type_traits 
> b/libstdc++-v3/include/std/type_traits
> index 7ef50a2e64f..4ff025b09fa 100644
> --- a/libstdc++-v3/include/std/type_traits
> +++ b/libstdc++-v3/include/std/type_traits
> @@ -682,11 +682,20 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>  { };
>
>/// is_object
> +#if __has_builtin(__is_function) && __has_builtin(__is_reference) \
> +&& __has_builtin(__is_void)
> +  template
> +struct is_object
> +: public __bool_constant + __is_void(_Tp))>
> +{ };
> +#else
>template
>  struct is_object
>  : public __not_<__or_, is_reference<_Tp>,
>is_void<_Tp>>>::type
>  { };
> +#endif
>
>template
>  struct is_member_pointer;
> @@ -3233,8 +3242,16 @@ template 
>inline constexpr bool is_arithmetic_v = is_arithmetic<_Tp>::value;
>  template 
>inline constexpr bool is_fundamental_v = is_fundamental<_Tp>::value;
> +
> +#if __has_builtin(__is_function) && __has_builtin(__is_reference)
> +template 
> +  inline constexpr bool is_object_v
> += !(__is_function(_Tp) || __is_reference(_Tp) || is_void<_Tp>::value);
> +#else
>  template 
>inline constexpr bool is_object_v = is_object<_Tp>::value;
> +#endif
> +
>  template 
>inline constexpr bool is_scalar_v = is_scalar<_Tp>::value;
>  template 
> --
> 2.41.0
>


[PING][PATCH v3 2/2] libstdc++: use new built-in trait __is_const

2023-09-01 Thread Ken Matsui via Gcc-patches
Ping for the use of __is_const built-in.

Sincerely,
Ken Matsui

On Fri, Jul 7, 2023 at 10:27 PM Ken Matsui  wrote:
>
> This patch lets libstdc++ use new built-in trait __is_const.
>
> libstdc++-v3/ChangeLog:
>
> * include/std/type_traits (is_const): Use __is_const built-in trait.
> (is_const_v): Likewise.
>
> Signed-off-by: Ken Matsui 
> ---
>  libstdc++-v3/include/std/type_traits | 14 ++
>  1 file changed, 14 insertions(+)
>
> diff --git a/libstdc++-v3/include/std/type_traits 
> b/libstdc++-v3/include/std/type_traits
> index 0e7a9c9c7f3..3a46eca5377 100644
> --- a/libstdc++-v3/include/std/type_traits
> +++ b/libstdc++-v3/include/std/type_traits
> @@ -764,6 +764,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>// Type properties.
>
>/// is_const
> +#if __has_builtin(__is_const)
> +  template
> +struct is_const
> +: public __bool_constant<__is_const(_Tp)>
> +{ };
> +#else
>template
>  struct is_const
>  : public false_type { };
> @@ -771,6 +777,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>template
>  struct is_const<_Tp const>
>  : public true_type { };
> +#endif
>
>/// is_volatile
>template
> @@ -3210,10 +3217,17 @@ template 
>inline constexpr bool is_compound_v = is_compound<_Tp>::value;
>  template 
>inline constexpr bool is_member_pointer_v = is_member_pointer<_Tp>::value;
> +
> +#if __has_builtin(__is_const)
> +template 
> +  inline constexpr bool is_const_v = __is_const(_Tp);
> +#else
>  template 
>inline constexpr bool is_const_v = false;
>  template 
>inline constexpr bool is_const_v = true;
> +#endif
> +
>  template 
>inline constexpr bool is_volatile_v = false;
>  template 
> --
> 2.41.0
>


[PING][PATCH v3 2/2] libstdc++: use new built-in trait __is_volatile

2023-09-01 Thread Ken Matsui via Gcc-patches
Ping for the use of __is_volatile built-in.

Sincerely,
Ken Matsui

On Fri, Jul 7, 2023 at 10:19 PM Ken Matsui  wrote:
>
> This patch lets libstdc++ use new built-in trait __is_volatile.
>
> libstdc++-v3/ChangeLog:
>
> * include/std/type_traits (is_volatile): Use __is_volatile built-in
> trait.
> (is_volatile_v): Likewise.
>
> Signed-off-by: Ken Matsui 
> ---
>  libstdc++-v3/include/std/type_traits | 13 +
>  1 file changed, 13 insertions(+)
>
> diff --git a/libstdc++-v3/include/std/type_traits 
> b/libstdc++-v3/include/std/type_traits
> index 0e7a9c9c7f3..db74b884b35 100644
> --- a/libstdc++-v3/include/std/type_traits
> +++ b/libstdc++-v3/include/std/type_traits
> @@ -773,6 +773,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>  : public true_type { };
>
>/// is_volatile
> +#if __has_builtin(__is_volatile)
> +  template
> +struct is_volatile
> +: public __bool_constant<__is_volatile(_Tp)>
> +{ };
> +#else
>template
>  struct is_volatile
>  : public false_type { };
> @@ -780,6 +786,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>template
>  struct is_volatile<_Tp volatile>
>  : public true_type { };
> +#endif
>
>/// is_trivial
>template
> @@ -3214,10 +3221,16 @@ template 
>inline constexpr bool is_const_v = false;
>  template 
>inline constexpr bool is_const_v = true;
> +
> +#if __has_builtin(__is_volatile)
> +template 
> +  inline constexpr bool is_volatile_v = __is_volatile(_Tp);
> +#else
>  template 
>inline constexpr bool is_volatile_v = false;
>  template 
>inline constexpr bool is_volatile_v = true;
> +#endif
>
>  template 
>inline constexpr bool is_trivial_v = __is_trivial(_Tp);
> --
> 2.41.0
>


[PATCH]AArch64 xorsign: Fix scalar xorsign lowering

2023-09-01 Thread Tamar Christina via Gcc-patches
Hi All,

In GCC-9 our scalar xorsign pattern broke and we didn't notice it because the
testcase was not strong enough.  With this commit

8d2d39587d941a40f25ea0144cceb677df115040 is the first bad commit
commit 8d2d39587d941a40f25ea0144cceb677df115040
Author: Segher Boessenkool 
Date:   Mon Oct 22 22:23:39 2018 +0200

combine: Do not combine moves from hard registers

combine started introducing useless moves on hard registers,  when one of the
arguments to our scalar xorsign is a hardreg we get an additional move inserted.

This leads to combine forming an AND with the immediate inside and using the
superflous move to do the r->w move, instead of what we wanted before which was
for the `and` to be a vector and and have reload pick the right alternative.

To fix this the patch just forces the use of the vector version directly and
so combine has no chance to mess it up.

Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.

Ok for master?

Thanks,
Tamar

gcc/ChangeLog:

* config/aarch64/aarch64-simd.md (xorsign3): Renamed to..
(@xorsign3): ...This.
* config/aarch64/aarch64.md (xorsign3): Renamed to...
(@xorsign3): ..This and emit vectors directly
* config/aarch64/iterators.md (VCONQ): Add SF and DF.

gcc/testsuite/ChangeLog:

* gcc.target/aarch64/xorsign.c:

--- inline copy of patch -- 
diff --git a/gcc/config/aarch64/aarch64-simd.md 
b/gcc/config/aarch64/aarch64-simd.md
index 
f67eb70577d0c2d9911d8c867d38a4d0b390337c..e955691f1be8830efacc237465119764ce2a4942
 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -500,7 +500,7 @@ (define_expand "ctz2"
   }
 )
 
-(define_expand "xorsign3"
+(define_expand "@xorsign3"
   [(match_operand:VHSDF 0 "register_operand")
(match_operand:VHSDF 1 "register_operand")
(match_operand:VHSDF 2 "register_operand")]
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 
01cf989641fce8e6c3828f6cfef62e101c4142df..9db82347bf891f9bc40aedecdc8462c94bf1a769
 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -6953,31 +6953,20 @@ (define_insn "copysign3_insn"
 ;; EOR   v0.8B, v0.8B, v3.8B
 ;;
 
-(define_expand "xorsign3"
+(define_expand "@xorsign3"
   [(match_operand:GPF 0 "register_operand")
(match_operand:GPF 1 "register_operand")
(match_operand:GPF 2 "register_operand")]
   "TARGET_SIMD"
 {
-
-  machine_mode imode = mode;
-  rtx mask = gen_reg_rtx (imode);
-  rtx op1x = gen_reg_rtx (imode);
-  rtx op2x = gen_reg_rtx (imode);
-
-  int bits = GET_MODE_BITSIZE (mode) - 1;
-  emit_move_insn (mask, GEN_INT (trunc_int_for_mode (HOST_WIDE_INT_M1U << bits,
-imode)));
-
-  emit_insn (gen_and3 (op2x, mask,
-   lowpart_subreg (imode, operands[2],
-   mode)));
-  emit_insn (gen_xor3 (op1x,
-   lowpart_subreg (imode, operands[1],
-   mode),
-   op2x));
+  rtx tmp = gen_reg_rtx (mode);
+  rtx op1 = gen_reg_rtx (mode);
+  rtx op2 = gen_reg_rtx (mode);
+  emit_move_insn (op1, lowpart_subreg (mode, operands[1], mode));
+  emit_move_insn (op2, lowpart_subreg (mode, operands[2], mode));
+  emit_insn (gen_xorsign3(mode, tmp, op1, op2));
   emit_move_insn (operands[0],
- lowpart_subreg (mode, op1x, imode));
+ lowpart_subreg (mode, tmp, mode));
   DONE;
 }
 )
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 
9398d713044433cd89b2a83db5ae7969feb1dcf7..2451d8c2cd8e2da6ac8339eed9bc975cf203fa4c
 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -1428,7 +1428,8 @@ (define_mode_attr VCONQ [(V8QI "V16QI") (V16QI "V16QI")
 (V4HF "V8HF") (V8HF "V8HF")
 (V2SF "V4SF") (V4SF "V4SF")
 (V2DF "V2DF") (SI   "V4SI")
-(HI   "V8HI") (QI   "V16QI")])
+(HI   "V8HI") (QI   "V16QI")
+(SF   "V4SF") (DF   "V2DF")])
 
 ;; Half modes of all vector modes.
 (define_mode_attr VHALF [(V8QI "V4QI")  (V16QI "V8QI")
diff --git a/gcc/testsuite/gcc.target/aarch64/xorsign.c 
b/gcc/testsuite/gcc.target/aarch64/xorsign.c
index 
22c5829449d932bed08de7e453c435ade3b787b2..dfb7ba7f140524507cb79cb06e12c72ad46eb753
 100644
--- a/gcc/testsuite/gcc.target/aarch64/xorsign.c
+++ b/gcc/testsuite/gcc.target/aarch64/xorsign.c
@@ -79,8 +79,9 @@ check_l_neg_rev (long double x, long double y)
   return __builtin_copysignl (-1.0, y) * x;
 }
 
-/* { dg-final { scan-assembler "\[ \t\]?eor\[ \t\]?" } } */
-/* { dg-final { scan-assembler "\[ \t\]?and\[ \t\]?" } } */
+/* { dg-final { scan-assembler-times {eor\tv[0-9]+\.16b, v[0-9]+\.16b, 
v[0-9]+\.16b} 8 } } */
+/* { dg-final { scan-assembler-times 

RE: [x86_64 PATCH] Improve __int128 argument passing (in ix86_expand_move).

2023-09-01 Thread Roger Sayle


Hi Manolis,
Many thanks.  If you haven't already, could you create/file a
bug report at https://gcc.gnu.org/bugzilla/ which ensures this
doesn't get lost/forgotten.  It provides a PR number for tracking
discussions, and patches/fixes with PR numbers are (often)
prioritized during the review and approval process.

I'll investigate what's going on.  Either my "improvements"
need to be disabled for V2SF arguments, or the middle/back
end needs to figure out how to efficiently shuffle these values,
without reload moving them via integer registers, at least as
efficiently as we were before.  As you/clang show, we could
do better.

Thanks again, and sorry for any inconvenience.
Best regards,
Roger
--

> -Original Message-
> From: Manolis Tsamis 
> Sent: 01 September 2023 11:45
> To: Uros Bizjak 
> Cc: Roger Sayle ; gcc-patches@gcc.gnu.org
> Subject: Re: [x86_64 PATCH] Improve __int128 argument passing (in
> ix86_expand_move).
> 
> Hi Roger,
> 
> I've (accidentally) found a codegen regression that I bisected down to this 
> patch.
> For these two functions:
> 
> typedef struct {
>   float minx, miny;
>   float maxx, maxy;
> } AABB;
> 
> int TestOverlap(AABB a, AABB b) {
>   return a.minx <= b.maxx
>   && a.miny <= b.maxy
>   && a.maxx >= b.minx
>   && a.maxx >= b.minx;
> }
> 
> int TestOverlap2(AABB a, AABB b) {
>   return a.miny <= b.maxy
>   && a.maxx >= b.minx;
> }
> 
> GCC used to produce this code:
> 
> TestOverlap:
> comiss  xmm3, xmm0
> movqrdx, xmm0
> movqrsi, xmm1
> movqrax, xmm3
> jb  .L10
> shr rdx, 32
> shr rax, 32
> movdxmm0, eax
> movdxmm4, edx
> comiss  xmm0, xmm4
> jb  .L10
> movdxmm1, esi
> xor eax, eax
> comiss  xmm1, xmm2
> setnb   al
> ret
> .L10:
> xor eax, eax
> ret
> TestOverlap2:
> shufps  xmm0, xmm0, 85
> shufps  xmm3, xmm3, 85
> comiss  xmm3, xmm0
> jb  .L17
> xor eax, eax
> comiss  xmm1, xmm2
> setnb   al
> ret
> .L17:
> xor eax, eax
> ret
> 
> After this patch codegen gets much worse:
> 
> TestOverlap:
> movqrax, xmm1
> movqrdx, xmm2
> movqrsi, xmm0
> mov rdi, rax
> movqrax, xmm3
> mov rcx, rsi
> xchgrdx, rax
> movdxmm1, edx
> mov rsi, rax
> mov rax, rdx
> comiss  xmm1, xmm0
> jb  .L10
> shr rcx, 32
> shr rax, 32
> movdxmm0, eax
> movdxmm4, ecx
> comiss  xmm0, xmm4
> jb  .L10
> movdxmm0, esi
> movdxmm1, edi
> xor eax, eax
> comiss  xmm1, xmm0
> setnb   al
> ret
> .L10:
> xor eax, eax
> ret
> TestOverlap2:
> movqrdx, xmm2
> movqrax, xmm3
> movqrsi, xmm0
> xchgrdx, rax
> mov rcx, rsi
> mov rsi, rax
> mov rax, rdx
> shr rcx, 32
> shr rax, 32
> movdxmm4, ecx
> movdxmm0, eax
> comiss  xmm0, xmm4
> jb  .L17
> movdxmm0, esi
> xor eax, eax
> comiss  xmm1, xmm0
> setnb   al
> ret
> .L17:
> xor eax, eax
> ret
> 
> I saw that you've been improving i386 argument passing, so maybe this is just 
> a
> missed case of these additions?
> 
> (Can also be seen here https://godbolt.org/z/E4xrEn6KW)
> 
> PS: I found the code that clang generates, with cmpleps + pextrw to avoid the 
> fp-
> >int->fp + shr interesting. I wonder if something like this could be added to 
> >GCC as
> well.
> 
> Thanks!
> Manolis
> 
> On Thu, Jul 6, 2023 at 5:21 PM Uros Bizjak via Gcc-patches  patc...@gcc.gnu.org> wrote:
> >
> > On Thu, Jul 6, 2023 at 3:48 PM Roger Sayle 
> wrote:
> > >
> > > > On Thu, Jul 6, 2023 at 2:04 PM Roger Sayle
> > > > 
> > > > wrote:
> > > > >
> > > > >
> > > > > Passing 128-bit integer (TImode) parameters on x86_64 can
> > > > > sometimes result in surprising code.  Consider the example below 
> > > > > (from PR
> 43644):
> > > > >
> > > > > __uint128 foo(__uint128 x, unsigned long long y) {
> > > > >   return x+y;
> > > > > }
> > > > >
> > > > > which currently results in 6 consecutive movq instructions:
> > > > >
> > > > > foo:movq%rsi, %rax
> > > > > movq%rdi, %rsi
> > > > > movq%rdx, %rcx
> > > > > movq%rax, %rdi
> > > > > movq%rsi, %rax
> > > > > movq%rdi, %rdx
> > > > > addq%rcx, %rax
> > > > > adcq$0, %rdx
> > > > > ret
> > > > >
> > > > > The underlying issue is that during RTL expansion, we generate
> > > > > the following initial RTL for the x argument:
> > > > >
> > > > > (insn 4 3 5 2 (set 

Re: [PATCH V6] Optimize '(X - N * M) / N' to 'X / N - M' if valid

2023-09-01 Thread Richard Biener via Gcc-patches
On Fri, 1 Sep 2023, Jiufu Guo wrote:

> Hi,
> 
> Integer expression "(X - N * M) / N" can be optimized to "X / N - M" with
> the below conditions:
> 1. There is no wrap/overflow/underflow.
>wrap/overflow/underflow breaks the arithmetic operation.
> 2. "X - N * M" and "X" are not of opposite sign.
>Here, the operation "/" would be "trunc_div", the fractional part is
>discarded towards zero. If "X - N * M" and "X" are in different signs,
>then trunc_div discards the fractional parts (of /N) in different
>directions.
> 
> Compare the previous version:
> https://gcc.gnu.org/pipermail/gcc-patches/2023-July/624801.html
> This patch adds comments and update the pattern on "(t + C)" to be more
> tight.
> 
> Bootstrap & regtest pass on ppc64{,le} and x86_64.
> Is this patch ok for trunk?
> 
> BR,
> Jeff (Jiufu Guo)
> 
>   PR tree-optimization/108757
> 
> gcc/ChangeLog:
> 
>   * match.pd ((X - N * M) / N): New pattern.
>   ((X + N * M) / N): New pattern.
>   ((X + C) div_rshift N): New pattern.
> 
> gcc/testsuite/ChangeLog:
> 
>   * gcc.dg/pr108757-1.c: New test.
>   * gcc.dg/pr108757-2.c: New test.
>   * gcc.dg/pr108757.h: New test.
> 
> ---
>  gcc/match.pd  |  78 ++
>  gcc/testsuite/gcc.dg/pr108757-1.c |  18 +++
>  gcc/testsuite/gcc.dg/pr108757-2.c |  19 +++
>  gcc/testsuite/gcc.dg/pr108757.h   | 233 ++
>  4 files changed, 348 insertions(+)
>  create mode 100644 gcc/testsuite/gcc.dg/pr108757-1.c
>  create mode 100644 gcc/testsuite/gcc.dg/pr108757-2.c
>  create mode 100644 gcc/testsuite/gcc.dg/pr108757.h
> 
> diff --git a/gcc/match.pd b/gcc/match.pd
> index 
> fa598d5ca2e470f9cc3b82469e77d743b12f107e..863bc7299cdefc622a7806a4d32e37268c50d453
>  100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -959,6 +959,84 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>  #endif
> 
>  
> +#if GIMPLE
> +(for div (trunc_div exact_div)
> + /* Simplify (X + M*N) / N -> X / N + M.  */
> + (simplify
> +  (div (plus:c@4 @0 (mult:c@3 @1 @2)) @2)
> +  (with {value_range vr0, vr1, vr2, vr3, vr4;}
> +  (if (INTEGRAL_TYPE_P (type)
> +   && get_range_query (cfun)->range_of_expr (vr1, @1)
> +   && get_range_query (cfun)->range_of_expr (vr2, @2)
> +   /* "N*M" doesn't overflow.  */
> +   && range_op_handler (MULT_EXPR).overflow_free_p (vr1, vr2)
> +   && get_range_query (cfun)->range_of_expr (vr0, @0)
> +   && get_range_query (cfun)->range_of_expr (vr3, @3)
> +   /* "X+(N*M)" doesn't overflow.  */
> +   && range_op_handler (PLUS_EXPR).overflow_free_p (vr0, vr3)
> +   && get_range_query (cfun)->range_of_expr (vr4, @4)
> +   /* "X+N*M" is not with opposite sign as "X".  */
> +   && (TYPE_UNSIGNED (type)
> +|| (vr0.nonnegative_p () && vr4.nonnegative_p ())
> +|| (vr0.nonpositive_p () && vr4.nonpositive_p (
> +  (plus (div @0 @2) @1
> +
> + /* Simplify (X - M*N) / N -> X / N - M.  */
> + (simplify
> +  (div (minus@4 @0 (mult:c@3 @1 @2)) @2)
> +  (with {value_range vr0, vr1, vr2, vr3, vr4;}
> +  (if (INTEGRAL_TYPE_P (type)
> +   && get_range_query (cfun)->range_of_expr (vr1, @1)
> +   && get_range_query (cfun)->range_of_expr (vr2, @2)
> +   /* "N * M" doesn't overflow.  */
> +   && range_op_handler (MULT_EXPR).overflow_free_p (vr1, vr2)
> +   && get_range_query (cfun)->range_of_expr (vr0, @0)
> +   && get_range_query (cfun)->range_of_expr (vr3, @3)
> +   /* "X - (N*M)" doesn't overflow.  */
> +   && range_op_handler (MINUS_EXPR).overflow_free_p (vr0, vr3)
> +   && get_range_query (cfun)->range_of_expr (vr4, @4)
> +   /* "X-N*M" is not with opposite sign as "X".  */
> +   && (TYPE_UNSIGNED (type)
> +|| (vr0.nonnegative_p () && vr4.nonnegative_p ())
> +|| (vr0.nonpositive_p () && vr4.nonpositive_p (
> +  (minus (div @0 @2) @1)
> +
> +/* Simplify
> +   (X + C) / N -> X / N + C / N where C is multiple of N.
> +   (X + C) >> N -> X >> N + C>>N if low N bits of C is 0.  */
> +(for op (trunc_div exact_div rshift)
> + (simplify
> +  (op (plus@3 @0 INTEGER_CST@1) INTEGER_CST@2)
> +   (with
> +{
> +  wide_int c = wi::to_wide (@1);
> +  wide_int n = wi::to_wide (@2);
> +  bool shift = op == RSHIFT_EXPR;
> +  #define plus_op1(v) (shift ? wi::rshift (v, n, TYPE_SIGN (type)) \
> +  : wi::div_trunc (v, n, TYPE_SIGN (type)))
> +  #define exact_mod(v) (shift ? wi::ctz (v) >= n.to_shwi () \
> +   : wi::multiple_of_p (v, n, TYPE_SIGN (type)))

please indent these full left

> +  value_range vr0, vr1, vr3;
> +}
> +(if (INTEGRAL_TYPE_P (type)
> +  && get_range_query (cfun)->range_of_expr (vr0, @0))
> + (if (exact_mod (c)
> +   && get_range_query (cfun)->range_of_expr (vr1, @1)
> +   /* "X+C" doesn't overflow.  */
> +   && range_op_handler (PLUS_EXPR).overflow_free_p (vr0, vr1)
> +   && get_range_query 

[committed] testsuite: Fix vectcond-1.C FAIL on i686-linux [PR19832]

2023-09-01 Thread Jakub Jelinek via Gcc-patches
On Thu, Aug 31, 2023 at 10:24:10AM -0700, Andrew Pinski via Gcc-patches wrote:
> This patch adds the following match patterns to optimize these:
>  /* (a != b) ? (a - b) : 0 -> (a - b) */

These tests FAIL on i686-linux, with
.../gcc/testsuite/gcc.dg/pr110915-1.c:8:1: warning: MMX vector return without 
MMX enabled changes the ABI [-Wpsabi]
.../gcc/testsuite/gcc.dg/pr110915-1.c:7:15: warning: MMX vector argument 
without MMX enabled changes the ABI [-Wpsabi]
excess warnings.  I've added -Wno-psabi to quiet that up, plus I think
it is undesirable to define macros like vector before including C library
headers in case the header would use that identifier in non-obfuscated
form somewhere.

Tested with
make check-g++ RUNTESTFLAGS='--target_board=unix\{-m32,-m32/-march=i686,-m64\} 
dg.exp=vectcond-1.C'
on x86_64-linux which previously FAILed, committed to trunk as obvious.

2023-09-01  Jakub Jelinek  

PR tree-optimization/19832
* g++.dg/opt/vectcond-1.C: Add -Wno-psabi to dg-options.

--- gcc/testsuite/g++.dg/opt/vectcond-1.C.jj2023-09-01 12:15:36.072430927 
+0200
+++ gcc/testsuite/g++.dg/opt/vectcond-1.C   2023-09-01 14:20:22.688739458 
+0200
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -fdump-tree-ccp1 -fdump-tree-optimized" } */
+/* { dg-options "-O2 -fdump-tree-ccp1 -fdump-tree-optimized -Wno-psabi" } */
 /* This is the vector version of these optimizations. */
 /* PR tree-optimization/19832 */
 

Jakub



[committed] testsuite: Fix up pr110915* tests on i686-linux [PR110915]

2023-09-01 Thread Jakub Jelinek via Gcc-patches
On Wed, Aug 30, 2023 at 03:25:11PM -0700, Andrew Pinski via Gcc-patches wrote:
> This simple patch extends the min_value/max_value match to vector integer 
> types.
> Using uniform_integer_cst_p makes this easy.

These tests FAIL on i686-linux, with
.../gcc/testsuite/gcc.dg/pr110915-1.c:8:1: warning: MMX vector return without 
MMX enabled changes the ABI [-Wpsabi]
.../gcc/testsuite/gcc.dg/pr110915-1.c:7:15: warning: MMX vector argument 
without MMX enabled changes the ABI [-Wpsabi]
excess warnings.  I've added -Wno-psabi to quiet that up, plus I think
it is undesirable to define macros like vector before including C library
headers in case the header would use that identifier in non-obfuscated
form somewhere.

Tested on x86_64-linux with
make check-gcc RUNTESTFLAGS='--target_board=unix\{-m32,-m32/-march=i686,-m64\} 
dg.exp=pr110915*'
which previously FAILed, committed to trunk as obvious.

2023-09-01  Jakub Jelinek  

PR tree-optimization/110915
* gcc.dg/pr110915-1.c: Add -Wno-psabi to dg-options.  Move vector
macro definition after limits.h inclusion.
* gcc.dg/pr110915-2.c: Likewise.
* gcc.dg/pr110915-3.c: Likewise.
* gcc.dg/pr110915-4.c: Likewise.
* gcc.dg/pr110915-5.c: Likewise.
* gcc.dg/pr110915-6.c: Likewise.
* gcc.dg/pr110915-7.c: Likewise.
* gcc.dg/pr110915-8.c: Likewise.
* gcc.dg/pr110915-9.c: Likewise.
* gcc.dg/pr110915-10.c: Likewise.
* gcc.dg/pr110915-11.c: Likewise.
* gcc.dg/pr110915-12.c: Likewise.

--- gcc/testsuite/gcc.dg/pr110915-1.c.jj2023-08-31 19:52:16.889305069 
+0200
+++ gcc/testsuite/gcc.dg/pr110915-1.c   2023-09-01 14:12:47.937873487 +0200
@@ -1,9 +1,10 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -fdump-tree-ifcombine" } */
-#define vector __attribute__((vector_size(sizeof(unsigned)*2)))
+/* { dg-options "-O2 -fdump-tree-ifcombine -Wno-psabi" } */
 
 #include 
 
+#define vector __attribute__((vector_size(sizeof(unsigned)*2)))
+
 vector signed and1(vector unsigned x, vector unsigned y)
 {
   /* (x > y) & (x != 0)  --> x > y */
--- gcc/testsuite/gcc.dg/pr110915-2.c.jj2023-08-31 19:52:16.889305069 
+0200
+++ gcc/testsuite/gcc.dg/pr110915-2.c   2023-09-01 14:12:52.791808013 +0200
@@ -1,9 +1,10 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -fdump-tree-optimized" } */
-#define vector __attribute__((vector_size(sizeof(unsigned)*2)))
+/* { dg-options "-O2 -fdump-tree-optimized -Wno-psabi" } */
 
 #include 
 
+#define vector __attribute__((vector_size(sizeof(unsigned)*2)))
+
 vector signed and1(vector unsigned x, vector unsigned y)
 {
   /* (x > y)   &   (x != 0)  --> x > y */
--- gcc/testsuite/gcc.dg/pr110915-3.c.jj2023-08-31 19:52:16.889305069 
+0200
+++ gcc/testsuite/gcc.dg/pr110915-3.c   2023-09-01 14:12:57.514744307 +0200
@@ -1,9 +1,10 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -fdump-tree-ifcombine" } */
-#define vector __attribute__((vector_size(sizeof(unsigned)*2)))
+/* { dg-options "-O2 -fdump-tree-ifcombine -Wno-psabi" } */
 
 #include 
 
+#define vector __attribute__((vector_size(sizeof(unsigned)*2)))
+
 vector signed and1(vector unsigned x, vector unsigned y)
 {
   /* (x > y)   &   (x == 0)  --> false */
--- gcc/testsuite/gcc.dg/pr110915-4.c.jj2023-08-31 19:52:16.889305069 
+0200
+++ gcc/testsuite/gcc.dg/pr110915-4.c   2023-09-01 14:13:02.094682529 +0200
@@ -1,9 +1,10 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -fdump-tree-optimized" } */
-#define vector __attribute__((vector_size(sizeof(unsigned)*2)))
+/* { dg-options "-O2 -fdump-tree-optimized -Wno-psabi" } */
 
 #include 
 
+#define vector __attribute__((vector_size(sizeof(unsigned)*2)))
+
 vector signed and1(vector unsigned x, vector unsigned y)
 {
   /* (x > y)   &   (x == 0)  --> false */
--- gcc/testsuite/gcc.dg/pr110915-5.c.jj2023-08-31 19:52:16.889305069 
+0200
+++ gcc/testsuite/gcc.dg/pr110915-5.c   2023-09-01 14:13:06.609621628 +0200
@@ -1,9 +1,10 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -fdump-tree-ifcombine" } */
-#define vector __attribute__((vector_size(sizeof(unsigned)*2)))
+/* { dg-options "-O2 -fdump-tree-ifcombine -Wno-psabi" } */
 
 #include 
 
+#define vector __attribute__((vector_size(sizeof(unsigned)*2)))
+
 vector signed and1(vector unsigned x, vector unsigned y)
 {
   /* (x <= y)   &   (x == 0)  --> x == 0 */
--- gcc/testsuite/gcc.dg/pr110915-6.c.jj2023-08-31 19:52:16.889305069 
+0200
+++ gcc/testsuite/gcc.dg/pr110915-6.c   2023-09-01 14:13:11.175560039 +0200
@@ -1,9 +1,10 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -fdump-tree-optimized" } */
-#define vector __attribute__((vector_size(sizeof(unsigned)*2)))
+/* { dg-options "-O2 -fdump-tree-optimized -Wno-psabi" } */
 
 #include 
 
+#define vector __attribute__((vector_size(sizeof(unsigned)*2)))
+
 vector signed and1(vector unsigned x, vector unsigned y)
 {
   /* (x <= y)   &   (x == 0)  --> x == 0 */
--- gcc/testsuite/gcc.dg/pr110915-7.c.jj2023-08-31 

[PATCH v2] c++: Catch indirect change of active union member in constexpr [PR101631]

2023-09-01 Thread Nathaniel Shead via Gcc-patches
On Wed, Aug 30, 2023 at 04:28:18PM -0400, Jason Merrill wrote:
> On 8/29/23 09:35, Nathaniel Shead wrote:
> > This is an attempt to improve the constexpr machinery's handling of
> > union lifetime by catching more cases that cause UB. Is this approach
> > OK?
> > 
> > I'd also like some feedback on a couple of pain points with this
> > implementation; in particular, is there a good way to detect if a type
> > has a non-deleted trivial constructor? I've used 'is_trivially_xible' in
> > this patch, but that also checks for a trivial destructor which by my
> > reading of [class.union.general]p5 is possibly incorrect. Checking for a
> > trivial default constructor doesn't seem too hard but I couldn't find a
> > good way of checking if that constructor is deleted.
> 
> I guess the simplest would be
> 
> (TYPE_HAS_TRIVIAL_DFLT (t) && locate_ctor (t))
> 
> because locate_ctor returns null for a deleted default ctor.  It would be
> good to make this a separate predicate.
> 
> > I'm also generally unsatisfied with the additional complexity with the
> > third 'refs' argument in 'cxx_eval_store_expression' being pushed and
> > popped; would it be better to replace this with a vector of some
> > specific structure type for the data that needs to be passed on?
> 
> Perhaps, but what you have here is fine.  Another possibility would be to
> just have a vec of the refs and extract the index from the ref later as
> needed.
> 
> Jason
> 

Thanks for the feedback. I've kept the refs as-is for now. I've also
cleaned up a couple of other typos I'd had with comments and diagnostics.

Bootstrapped and regtested on x86_64-pc-linux-gnu.

-- 8< --

This patch adds checks for attempting to change the active member of a
union by methods other than a member access expression.

To be able to properly distinguish `*() = ` from `u.a = `, this
patch redoes the solution for c++/59950 to avoid extranneous *&; it
seems that the only case that needed the workaround was when copying
empty classes.

Additionally, this patch ensures that constructors for a union field
mark that field as the active member before entering the call itself;
this ensures that modifications of the field within the constructor's
body don't cause false positives (as these will not appear to be member
access expressions). This means that we no longer need to start the
lifetime of empty union members after the constructor body completes.

PR c++/101631

gcc/cp/ChangeLog:

* call.cc (build_over_call): Fold more indirect refs for trivial
assignment op.
* class.cc (type_has_non_deleted_trivial_default_ctor): Create.
* constexpr.cc (cxx_eval_call_expression): Start lifetime of
union member before entering constructor.
(cxx_eval_store_expression): Check for accessing inactive union
member indirectly.
* cp-tree.h (type_has_non_deleted_trivial_default_ctor):
Forward declare.

gcc/testsuite/ChangeLog:

* g++.dg/cpp2a/constexpr-union2.C: New test.
* g++.dg/cpp2a/constexpr-union3.C: New test.
* g++.dg/cpp2a/constexpr-union4.C: New test.
* g++.dg/cpp2a/constexpr-union5.C: New test.

Signed-off-by: Nathaniel Shead 
---
 gcc/cp/call.cc|  11 +-
 gcc/cp/class.cc   |   8 ++
 gcc/cp/constexpr.cc   | 105 --
 gcc/cp/cp-tree.h  |   1 +
 gcc/testsuite/g++.dg/cpp2a/constexpr-union2.C |  30 +
 gcc/testsuite/g++.dg/cpp2a/constexpr-union3.C |  45 
 gcc/testsuite/g++.dg/cpp2a/constexpr-union4.C |  29 +
 gcc/testsuite/g++.dg/cpp2a/constexpr-union5.C |  55 +
 8 files changed, 246 insertions(+), 38 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/cpp2a/constexpr-union2.C
 create mode 100644 gcc/testsuite/g++.dg/cpp2a/constexpr-union3.C
 create mode 100644 gcc/testsuite/g++.dg/cpp2a/constexpr-union4.C
 create mode 100644 gcc/testsuite/g++.dg/cpp2a/constexpr-union5.C

diff --git a/gcc/cp/call.cc b/gcc/cp/call.cc
index 23e458d3252..3372c88f182 100644
--- a/gcc/cp/call.cc
+++ b/gcc/cp/call.cc
@@ -10358,10 +10358,7 @@ build_over_call (struct z_candidate *cand, int flags, 
tsubst_flags_t complain)
   && DECL_OVERLOADED_OPERATOR_IS (fn, NOP_EXPR)
   && trivial_fn_p (fn))
 {
-  /* Don't use cp_build_fold_indirect_ref, op= returns an lvalue even if
-the object argument isn't one.  */
-  tree to = cp_build_indirect_ref (input_location, argarray[0],
-  RO_ARROW, complain);
+  tree to = cp_build_fold_indirect_ref (argarray[0]);
   tree type = TREE_TYPE (to);
   tree as_base = CLASSTYPE_AS_BASE (type);
   tree arg = argarray[1];
@@ -10369,7 +10366,11 @@ build_over_call (struct z_candidate *cand, int flags, 
tsubst_flags_t complain)
 
   if (is_really_empty_class (type, /*ignore_vptr*/true))
{
- /* Avoid copying empty classes.  */
+ 

Re: [PING][PATCH 1/2] Ada: Synchronized private extensions are always limited

2023-09-01 Thread Arnaud Charlet via Gcc-patches
> For some reason, your email is endeing up in a strange format, I almost
> missed the .patch file attached, making the review harder.

Never mind, I was on vacation earlier this month and then busy with a seminar 
last week, so I started looking at your ping email before the original email 
which did contain the patch easily found, sorry for the noise!

Arno


Re: [PING][PATCH 1/2] Ada: Synchronized private extensions are always limited

2023-09-01 Thread Arnaud Charlet via Gcc-patches
Richard,

For some reason, your email is endeing up in a strange format, I almost
missed the .patch file attached, making the review harder.

There's a typo in the comment added:

+  --  explicit limitedness implied by a synchronized private extension
+  --  the does not derive from a synchronized interface (see RM-7.3(6/2)).

the -> that does not derive...

OK with this change.

> From: Richard Wai  
> Sent: Thursday, August 10, 2023 12:55 AM
> To: 'gcc-patches@gcc.gnu.org' 
> Cc: 'Eric Botcazou' ; 'Arnaud Charlet'
> ; 'Stephen Baird' 
> Subject: [PATCH 1/2] Ada: Synchronized private extensions are always limited
> 
>  
> 
> GNAT currently considers a synchronized private extension that derives from
> an interface to be limited only when said interface is a concurrent
> interface. However it is of course legal for a synchronized private
> extension to derive from a limited interface. In this case GNAT fails to
> correctly determine that the private extension is limited.
> 
>  
> 
> This causes two separate problems that makes discriminated types in such a
> case impossible:
> 
> 1.GNAT inappropriately rejects compilation, claiming default
> discriminants on such a private extension are illegal.
> 2.GNAT fails to generate the expected discriminals for the
> unconstrained discriminanted case, leading to the corresponding
> discriminants of the "corresponding record" of the underlying concurrent
> type to have no identifiers, and thus compilation fails.
> 
>  
> 
> Fairly simple fix. If "synchronized" appears in the private extension
> declaration, it is limited. This is explicit in the RM as well (7.3(6/2)).
> 
>  
> 
> Fixing this bug uncovered of a related bug wrt. TSS address finalizer
> generation for constrained subtypes of synchronized private extensions with
> no default discriminants. That patch is to follow separately.
> 
>  
> 
> Patch file is attached.
> 
>  
> 
> --  Begin change log entry --
> 
>  
> 
> ada: Private extensions with the keyword "synchronized" are always limited.
> 
>  
> 
> GNAT was relying on synchronized private type extensions deriving from a
> concurrent interface to determine its limitedness. This does not cover the
> case where such an extension derives a limited interface. RM-7.6(6/2) makes
> is clear that "synchronized" in a private extension implies the derived type
> is limited. GNAT should explicitly check for the presence of "synchronized"
> in a private extension declaration, and it should have the same effect as
> the presence of "limited".
> 
>  
> 
> gcc/ada/
> 
> * sem_ch3.adb (Build_Derived_Record_Type): Treat presence of
> keyword "synchronized" the same as "limited" when determining if a private
> extension is limited.
> 
> 
> -- End change log entry --
>  
> 
> This patch was bootstrapped on x86_64-*-freebsd13.2. Two new test cases were
> added. Note that 4 gnat test cases fail currently on master and are
> unrelated to this patch.


Re: [PATCH 08/13] [APX EGPR] Handle GPR16 only vector move insns

2023-09-01 Thread Jakub Jelinek via Gcc-patches
On Fri, Sep 01, 2023 at 07:34:16PM +0800, Hongyu Wang wrote:
> > On Fri, Sep 01, 2023 at 05:07:53PM +0800, Hongyu Wang wrote:
> > > Jakub Jelinek via Gcc-patches  于2023年8月31日周四 
> > > 17:44写道:
> > > >
> > > > On Thu, Aug 31, 2023 at 04:20:19PM +0800, Hongyu Wang via Gcc-patches 
> > > > wrote:
> > > > > For vector move insns like vmovdqa/vmovdqu, their evex counterparts
> > > > > requrire explicit suffix 64/32/16/8. The usage of these instruction
> > > > > are prohibited under AVX10_1 or AVX512F, so for AVX2+APX_F we select
> > > > > vmovaps/vmovups for vector load/store insns that contains EGPR.
> > > >
> > > > Why not make it dependent on AVX512VL?
> > > > I.e. if egpr_p && TARGET_AVX512VL, still use vmovdqu16 or vmovdqa16
> > > > and the like, and only if !evex_reg_p && egpr_p && !TARGET_AVX512VL
> > > > fall back to what you're doing?
> > >
> > > I'm not sure if it is necessary, as on hardware there is no difference 
> > > between
> > > vmovdqu16/vmovups. If vmovups already has the capability to represent
> > > EGPR why do we need to distinguish them under VL?
> >
> > On the Intel HW you're currently planning.
> > Will that be the case for AMD as well?
> > Some insns are documented to move float or double vectors while others
> > integer vectors (of different element sizes).
> > Or is vmovups with GPR32 at least encoded smaller than vmovdqu{16,32,64}?
> 
> With GPR32 they have same encoding size. If we need to strictly follow
> the meaning of mnemonics,
> I will adjust as you suggested. Thanks.

I think it is useful, even if just for those who try to read the
assembler/disassembler.  Of course, if there are cases where only one of
those has to be used (say -mavx -mno-avx2 and 256-bit integer vector moves),
there is no way around that and one just uses what is available.

Jakub



Re: [PATCH v3] tree-optimization/110279- Check for nested FMA in reassoc

2023-09-01 Thread Richard Biener via Gcc-patches
On Wed, Aug 9, 2023 at 6:53 PM Di Zhao OS  wrote:
>
> Hi,
>
> The previous version of this patch tries to solve two problems
> at the same time. For better clarity, I'll separate them and
> only deal with the "nested" FMA in this version. I plan to
> propose another patch in avoiding bad shaped FMA (deferring FMA).
>
> Other changes:
>
> 1. Added new testcases for the "nested" FMA issue. For the
>following code:
>
> tmp1 = a + c * c + d * d + x * y;
> tmp2 = x * tmp1;
> result += (a + c + d + tmp2);
>
>, when "tmp1 = ..." is not rewritten, tmp1 will be result of
>an FMA, and there will be a list of consecutive FMAs:
>
> _1 = .FMA (c, c, a_39);
> _2 = .FMA (d, d, _1);
> tmp1 = .FMA (x, y, _2);
> _3 = .FMA (tmp1, x, d);
> ...
>
>If "tmp1 = ..." is rewritten to parallel, tmp1 will be result
>of a PLUS_EXPR between FMAs:
>
> _1 = .FMA (c, c, a_39);
> _2 = x * y;
> _3 = .FMA (d, d, _2);
>  tmp1 = _3 + _1;
>  _4 = .FMA (tmp1, x, d);
> ...
>
>It seems the register pressure of the latter is higher than
>the former.

Yes, that's a natural consequence of rewriting to parallel.

>On the test machines we have (including Ampere1,
>Neoverse-n1 and Intel Xeon), with "tmp1 = ..." is rewritten to
>parallel, the run time all increased significantly. In
>contrast, when "tmp1" is not the 1st or 2nd operand of another
>FMA (pr110279-1.c), rewriting it results in better performance.
>(I'll also append the testcases in the bug tracker.)
>
> 2. Enhanced checking for nested FMA by: 1) Modified
>convert_mult_to_fma so it can return multiple LHS.  2) Check
>NEGATE_EXPRs for nested FMA.
>
> (I think maybe this can be further refined by enabling rewriting
> to parallel for very long op list. )

So again, what you do applies to all operations, not just FMA.
Consider

  tmp1 = a + c + d + y;
  tmp2 = x + tmp1;
  result += (a + c + d + tmp2);
  foo (tmp2);

where I just removed all multiplications.  Since re-assoc works
on isolated single-use chains it will rewrite the tmp2 chain
to parallel and it will rewrite the result chain to parallel, in
the end this results in reassoc-width for 'result' to not be honored
because we don't see that at 'tmp2' it will fork again.  OTOH
the other 'result' arms end, so eventually just two (for reassoc
width 2) arms are "active" at any point.

That said - isn't the issue that we "overcommit" reassoc-width
this way because we apply it locally instead of globally
(of course also ignoring every other chain of instructions
reassoc isn't interestedin)?

Unfortunately we work backwards when processing chains,
if we processed leaf chains first we could record the
association width applied to the chain at its new root and
honor that when such root ends up in the oplist of a consuming
chain.  But as we work backwards we'd have to record
the reassoc width used to in the leafs of the associated
chain.  So if those become roots of other chains we can
then still honor that.

Would it work to attack the problem this way?  For
parallel rewritten chains record the width used?
Similar to operand_rank we could use a hash-map
from SSA leaf to width it appears in.  When we rewrite
a chain with such leaf as root we can then subtract
the incoming chain width from reassoc-width to lower
the width its tail?

Richard.

> Bootstrapped and regression tested on x86_64-linux-gnu.
>
> Thanks,
> Di Zhao
>
> 
>
> PR tree-optimization/110279
>
> gcc/ChangeLog:
>
> * tree-ssa-math-opts.cc (convert_mult_to_fma_1): Added
> new parameter collect_lhs.
> (struct fma_transformation_info): Moved to header.
> (class fma_deferring_state): Moved to header.
> (convert_mult_to_fma): Added new parameter collect_lhs.
> * tree-ssa-math-opts.h (struct fma_transformation_info):
> (class fma_deferring_state): Moved from .cc.
> (convert_mult_to_fma): Moved from .cc.
> * tree-ssa-reassoc.cc (enum fma_state): Defined enum to
> describe the state of FMA candidates for a list of
> operands.
> (rewrite_expr_tree_parallel): Changed boolean parameter
> to enum type.
> (has_nested_fma_p): New function to check for nested FMA
> on given multiplication statement.
> (rank_ops_for_fma): Return enum fma_state.
> (reassociate_bb): Avoid rewriting to parallel if nested
> FMAs are found.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.dg/pr110279-1.c: New test.
> * gcc.dg/pr110279-2.c: New test.


Re: [PATCH 1/4] RISC-V: Adjust expand_cond_len_{unary,binop,op} api

2023-09-01 Thread Lehua Ding

On 2023/9/1 18:00, Robin Dapp via Gcc-patches wrote:

Thanks, LGTM.


Thanks, committed these patches.


Btw. I haven't forgotten to respond to your last refactor but just didn't find
the time yet.  I figured I should have some proper draft before suggesting
more things :)


Well, let's talk about it when you're free. Thanks in advance for more 
comments and suggestions later.


--
Best,
Lehua




Re: [PATCH 08/13] [APX EGPR] Handle GPR16 only vector move insns

2023-09-01 Thread Hongyu Wang via Gcc-patches
Jakub Jelinek  于2023年9月1日周五 17:20写道:
>
> On Fri, Sep 01, 2023 at 05:07:53PM +0800, Hongyu Wang wrote:
> > Jakub Jelinek via Gcc-patches  于2023年8月31日周四 
> > 17:44写道:
> > >
> > > On Thu, Aug 31, 2023 at 04:20:19PM +0800, Hongyu Wang via Gcc-patches 
> > > wrote:
> > > > For vector move insns like vmovdqa/vmovdqu, their evex counterparts
> > > > requrire explicit suffix 64/32/16/8. The usage of these instruction
> > > > are prohibited under AVX10_1 or AVX512F, so for AVX2+APX_F we select
> > > > vmovaps/vmovups for vector load/store insns that contains EGPR.
> > >
> > > Why not make it dependent on AVX512VL?
> > > I.e. if egpr_p && TARGET_AVX512VL, still use vmovdqu16 or vmovdqa16
> > > and the like, and only if !evex_reg_p && egpr_p && !TARGET_AVX512VL
> > > fall back to what you're doing?
> >
> > I'm not sure if it is necessary, as on hardware there is no difference 
> > between
> > vmovdqu16/vmovups. If vmovups already has the capability to represent
> > EGPR why do we need to distinguish them under VL?
>
> On the Intel HW you're currently planning.
> Will that be the case for AMD as well?
> Some insns are documented to move float or double vectors while others
> integer vectors (of different element sizes).
> Or is vmovups with GPR32 at least encoded smaller than vmovdqu{16,32,64}?

With GPR32 they have same encoding size. If we need to strictly follow
the meaning of mnemonics,
I will adjust as you suggested. Thanks.


>
> Jakub
>


[PATCH v5 4/4] libstdc++: Optimize is_compound trait performance

2023-09-01 Thread Ken Matsui via Gcc-patches
This patch optimizes the performance of the is_compound trait by
dispatching to the new __is_arithmetic built-in trait.

libstdc++-v3/ChangeLog:

* include/std/type_traits (is_compound): Do not use __not_.
(is_compound_v): Use is_fundamental_v instead.

Signed-off-by: Ken Matsui 
---
 libstdc++-v3/include/std/type_traits | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libstdc++-v3/include/std/type_traits 
b/libstdc++-v3/include/std/type_traits
index cf24de2fcac..531f5a2bb3a 100644
--- a/libstdc++-v3/include/std/type_traits
+++ b/libstdc++-v3/include/std/type_traits
@@ -704,7 +704,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   /// is_compound
   template
 struct is_compound
-: public __not_>::type { };
+: public __bool_constant::value> { };
 
   /// @cond undocumented
   template
@@ -3234,7 +3234,7 @@ template 
 template 
   inline constexpr bool is_scalar_v = is_scalar<_Tp>::value;
 template 
-  inline constexpr bool is_compound_v = is_compound<_Tp>::value;
+  inline constexpr bool is_compound_v = !is_fundamental_v<_Tp>;
 template 
   inline constexpr bool is_member_pointer_v = is_member_pointer<_Tp>::value;
 template 
-- 
2.42.0



Re: [PATCH 06/13] [APX EGPR] Map reg/mem constraints in inline asm to non-EGPR constraint.

2023-09-01 Thread Uros Bizjak via Gcc-patches
On Fri, Sep 1, 2023 at 12:36 PM Hongtao Liu  wrote:
>
> On Fri, Sep 1, 2023 at 5:38 PM Uros Bizjak via Gcc-patches
>  wrote:
> >
> > On Fri, Sep 1, 2023 at 11:10 AM Hongyu Wang  wrote:
> > >
> > > Uros Bizjak via Gcc-patches  于2023年8月31日周四 
> > > 18:01写道:
> > > >
> > > > On Thu, Aug 31, 2023 at 11:18 AM Jakub Jelinek via Gcc-patches
> > > >  wrote:
> > > > >
> > > > > On Thu, Aug 31, 2023 at 04:20:17PM +0800, Hongyu Wang via Gcc-patches 
> > > > > wrote:
> > > > > > From: Kong Lingling 
> > > > > >
> > > > > > In inline asm, we do not know if the insn can use EGPR, so disable 
> > > > > > EGPR
> > > > > > usage by default from mapping the common reg/mem constraint to 
> > > > > > non-EGPR
> > > > > > constraints. Use a flag mapx-inline-asm-use-gpr32 to enable EGPR 
> > > > > > usage
> > > > > > for inline asm.
> > > > > >
> > > > > > gcc/ChangeLog:
> > > > > >
> > > > > >   * config/i386/i386.cc (INCLUDE_STRING): Add include for
> > > > > >   ix86_md_asm_adjust.
> > > > > >   (ix86_md_asm_adjust): When APX EGPR enabled without 
> > > > > > specifying the
> > > > > >   target option, map reg/mem constraints to non-EGPR 
> > > > > > constraints.
> > > > > >   * config/i386/i386.opt: Add option mapx-inline-asm-use-gpr32.
> > > > > >
> > > > > > gcc/testsuite/ChangeLog:
> > > > > >
> > > > > >   * gcc.target/i386/apx-inline-gpr-norex2.c: New test.
> > > > > > ---
> > > > > >  gcc/config/i386/i386.cc   |  44 +++
> > > > > >  gcc/config/i386/i386.opt  |   5 +
> > > > > >  .../gcc.target/i386/apx-inline-gpr-norex2.c   | 107 
> > > > > > ++
> > > > > >  3 files changed, 156 insertions(+)
> > > > > >  create mode 100644 
> > > > > > gcc/testsuite/gcc.target/i386/apx-inline-gpr-norex2.c
> > > > > >
> > > > > > diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
> > > > > > index d26d9ab0d9d..9460ebbfda4 100644
> > > > > > --- a/gcc/config/i386/i386.cc
> > > > > > +++ b/gcc/config/i386/i386.cc
> > > > > > @@ -17,6 +17,7 @@ You should have received a copy of the GNU 
> > > > > > General Public License
> > > > > >  along with GCC; see the file COPYING3.  If not see
> > > > > >  .  */
> > > > > >
> > > > > > +#define INCLUDE_STRING
> > > > > >  #define IN_TARGET_CODE 1
> > > > > >
> > > > > >  #include "config.h"
> > > > > > @@ -23077,6 +23078,49 @@ ix86_md_asm_adjust (vec , 
> > > > > > vec & /*inputs*/,
> > > > > >bool saw_asm_flag = false;
> > > > > >
> > > > > >start_sequence ();
> > > > > > +  /* TODO: Here we just mapped the general r/m constraints to 
> > > > > > non-EGPR
> > > > > > +   constraints, will eventually map all the usable constraints in 
> > > > > > the future. */
> > > > >
> > > > > I think there should be some constraint which explicitly has all the 
> > > > > 32
> > > > > GPRs, like there is one for just all 16 GPRs (h), so that regardless 
> > > > > of
> > > > > -mapx-inline-asm-use-gpr32 one can be explicit what the inline asm 
> > > > > wants.
> > > > >
> > > > > Also, what about the "g" constraint?  Shouldn't there be another for 
> > > > > "g"
> > > > > without r16..r31?  What about the various other memory
> > > > > constraints ("<", "o", ...)?
> > > >
> > > > I think we should leave all existing constraints as they are, so "r"
> > > > covers only GPR16, "m" and "o" to only use GPR16. We can then
> > > > introduce "h" to instructions that have the ability to handle EGPR.
> > > > This would be somehow similar to the SSE -> AVX512F transition, where
> > > > we still have "x" for SSE16 and "v" was introduced as a separate
> > > > register class for EVEX SSE registers. This way, asm will be
> > > > compatible, when "r", "m", "o" and "g" are used. The new memory
> > > > constraint "Bt", should allow new registers, and should be added to
> > > > the constraint string as a separate constraint, and conditionally
> > > > enabled by relevant "isa" (AKA "enabled") attribute.
> > >
> > > The extended constraint can work for registers, but for memory it is more
> > > complicated.
> >
> > Yes, unfortunately. The compiler assumes that an unchangeable register
> > class is used for BASE/INDEX registers. I have hit this limitation
> > when trying to implement memory support for instructions involving
> > 8-bit high registers (%ah, %bh, %ch, %dh), which do not support REX
> > registers, also inside memory operand. (You can see the "hack" in e.g.
> > *extzvqi_mem_rex64" and corresponding peephole2 with the original
> > *extzvqi pattern). I am aware that dynamic insn-dependent BASE/INDEX
> > register class is the major limitation in the compiler, so perhaps the
> > strategy on how to override this limitation should be discussed with
> > the register allocator author first. Perhaps adding an insn attribute
> > to insn RTX pattern to specify different BASE/INDEX register sets can
> > be a better solution than passing insn RTX to the register allocator.
> >
> > The above idea still 

[PATCH v5 3/4] libstdc++: Optimize is_fundamental trait performance

2023-09-01 Thread Ken Matsui via Gcc-patches
This patch optimizes the performance of the is_fundamental trait by
dispatching to the new __is_arithmetic built-in trait.

libstdc++-v3/ChangeLog:

* include/std/type_traits (is_fundamental_v): Use __is_arithmetic
built-in trait.
(is_fundamental): Likewise. Optimize the original implementation.

Signed-off-by: Ken Matsui 
---
 libstdc++-v3/include/std/type_traits | 21 +
 1 file changed, 17 insertions(+), 4 deletions(-)

diff --git a/libstdc++-v3/include/std/type_traits 
b/libstdc++-v3/include/std/type_traits
index 7ebbe04c77b..cf24de2fcac 100644
--- a/libstdc++-v3/include/std/type_traits
+++ b/libstdc++-v3/include/std/type_traits
@@ -668,11 +668,21 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 #endif
 
   /// is_fundamental
+#if __has_builtin(__is_arithmetic)
+  template
+struct is_fundamental
+: public __bool_constant<__is_arithmetic(_Tp)
+ || is_void<_Tp>::value
+ || is_null_pointer<_Tp>::value>
+{ };
+#else
   template
 struct is_fundamental
-: public __or_, is_void<_Tp>,
-  is_null_pointer<_Tp>>::type
+: public __bool_constant::value
+ || is_void<_Tp>::value
+ || is_null_pointer<_Tp>::value>
 { };
+#endif
 
   /// is_object
   template
@@ -3209,13 +3219,16 @@ template 
 #if __has_builtin(__is_arithmetic)
 template 
   inline constexpr bool is_arithmetic_v = __is_arithmetic(_Tp);
+template 
+  inline constexpr bool is_fundamental_v
+= __is_arithmetic(_Tp) || is_void_v<_Tp> || is_null_pointer_v<_Tp>;
 #else
 template 
   inline constexpr bool is_arithmetic_v = is_arithmetic<_Tp>::value;
-#endif
-
 template 
   inline constexpr bool is_fundamental_v = is_fundamental<_Tp>::value;
+#endif
+
 template 
   inline constexpr bool is_object_v = is_object<_Tp>::value;
 template 
-- 
2.42.0



[PATCH v5 2/4] libstdc++: Optimize is_arithmetic trait performance

2023-09-01 Thread Ken Matsui via Gcc-patches
This patch optimizes the performance of the is_arithmetic trait by
dispatching to the new __is_arithmetic built-in trait.

libstdc++-v3/ChangeLog:

* include/std/type_traits (is_arithmetic): Use __is_arithmetic
built-in trait.
(is_arithmetic_v): Likewise.

Signed-off-by: Ken Matsui 
---
 libstdc++-v3/include/std/type_traits | 14 ++
 1 file changed, 14 insertions(+)

diff --git a/libstdc++-v3/include/std/type_traits 
b/libstdc++-v3/include/std/type_traits
index 0e7a9c9c7f3..7ebbe04c77b 100644
--- a/libstdc++-v3/include/std/type_traits
+++ b/libstdc++-v3/include/std/type_traits
@@ -655,10 +655,17 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 { };
 
   /// is_arithmetic
+#if __has_builtin(__is_arithmetic)
+  template
+struct is_arithmetic
+: public __bool_constant<__is_arithmetic(_Tp)>
+{ };
+#else
   template
 struct is_arithmetic
 : public __or_, is_floating_point<_Tp>>::type
 { };
+#endif
 
   /// is_fundamental
   template
@@ -3198,8 +3205,15 @@ template 
   inline constexpr bool is_reference_v<_Tp&> = true;
 template 
   inline constexpr bool is_reference_v<_Tp&&> = true;
+
+#if __has_builtin(__is_arithmetic)
+template 
+  inline constexpr bool is_arithmetic_v = __is_arithmetic(_Tp);
+#else
 template 
   inline constexpr bool is_arithmetic_v = is_arithmetic<_Tp>::value;
+#endif
+
 template 
   inline constexpr bool is_fundamental_v = is_fundamental<_Tp>::value;
 template 
-- 
2.42.0



[PATCH v5 1/4] c++, libstdc++: Implement __is_arithmetic built-in trait

2023-09-01 Thread Ken Matsui via Gcc-patches
This patch implements built-in trait for std::is_arithmetic.

gcc/cp/ChangeLog:

* cp-trait.def: Define __is_arithmetic.
* constraint.cc (diagnose_trait_expr): Handle CPTK_IS_ARITHMETIC.
* semantics.cc (trait_expr_value): Likewise.
(finish_trait_expr): Likewise.

gcc/testsuite/ChangeLog:

* g++.dg/ext/has-builtin-1.C: Test existence of __is_arithmetic.
* g++.dg/ext/is_arithmetic.C: New test.
* g++.dg/tm/pr46567.C (__is_arithmetic): Rename to ...
(__is_arith): ... this.
* g++.dg/torture/pr57107.C: Likewise.

libstdc++-v3/ChangeLog:

* include/bits/cpp_type_traits.h (__is_arithmetic): Rename to ...
(__is_arith): ... this.
* include/c_global/cmath: Use __is_arith instead.
* include/c_std/cmath: Likewise.
* include/tr1/cmath: Likewise.

Signed-off-by: Ken Matsui 
---
 gcc/cp/constraint.cc|  3 ++
 gcc/cp/cp-trait.def |  1 +
 gcc/cp/semantics.cc |  4 ++
 gcc/testsuite/g++.dg/ext/has-builtin-1.C|  3 ++
 gcc/testsuite/g++.dg/ext/is_arithmetic.C| 33 ++
 gcc/testsuite/g++.dg/tm/pr46567.C   |  6 +--
 gcc/testsuite/g++.dg/torture/pr57107.C  |  4 +-
 libstdc++-v3/include/bits/cpp_type_traits.h |  4 +-
 libstdc++-v3/include/c_global/cmath | 48 ++---
 libstdc++-v3/include/c_std/cmath| 24 +--
 libstdc++-v3/include/tr1/cmath  | 24 +--
 11 files changed, 99 insertions(+), 55 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/ext/is_arithmetic.C

diff --git a/gcc/cp/constraint.cc b/gcc/cp/constraint.cc
index 8cf0f2d0974..bd517d08843 100644
--- a/gcc/cp/constraint.cc
+++ b/gcc/cp/constraint.cc
@@ -3754,6 +3754,9 @@ diagnose_trait_expr (tree expr, tree args)
 case CPTK_IS_AGGREGATE:
   inform (loc, "  %qT is not an aggregate", t1);
   break;
+case CPTK_IS_ARITHMETIC:
+  inform (loc, "  %qT is not an arithmetic type", t1);
+  break;
 case CPTK_IS_TRIVIALLY_COPYABLE:
   inform (loc, "  %qT is not trivially copyable", t1);
   break;
diff --git a/gcc/cp/cp-trait.def b/gcc/cp/cp-trait.def
index 8b7fece0cc8..a95aeeaf778 100644
--- a/gcc/cp/cp-trait.def
+++ b/gcc/cp/cp-trait.def
@@ -82,6 +82,7 @@ DEFTRAIT_EXPR (IS_TRIVIALLY_ASSIGNABLE, 
"__is_trivially_assignable", 2)
 DEFTRAIT_EXPR (IS_TRIVIALLY_CONSTRUCTIBLE, "__is_trivially_constructible", -1)
 DEFTRAIT_EXPR (IS_TRIVIALLY_COPYABLE, "__is_trivially_copyable", 1)
 DEFTRAIT_EXPR (IS_UNION, "__is_union", 1)
+DEFTRAIT_EXPR (IS_ARITHMETIC, "__is_arithmetic", 1)
 DEFTRAIT_EXPR (REF_CONSTRUCTS_FROM_TEMPORARY, 
"__reference_constructs_from_temporary", 2)
 DEFTRAIT_EXPR (REF_CONVERTS_FROM_TEMPORARY, 
"__reference_converts_from_temporary", 2)
 /* FIXME Added space to avoid direct usage in GCC 13.  */
diff --git a/gcc/cp/semantics.cc b/gcc/cp/semantics.cc
index 8fb47fd179e..4531f047d73 100644
--- a/gcc/cp/semantics.cc
+++ b/gcc/cp/semantics.cc
@@ -12118,6 +12118,9 @@ trait_expr_value (cp_trait_kind kind, tree type1, tree 
type2)
 case CPTK_IS_UNION:
   return type_code1 == UNION_TYPE;
 
+case CPTK_IS_ARITHMETIC:
+  return ARITHMETIC_TYPE_P (type1);
+
 case CPTK_IS_ASSIGNABLE:
   return is_xible (MODIFY_EXPR, type1, type2);
 
@@ -12296,6 +12299,7 @@ finish_trait_expr (location_t loc, cp_trait_kind kind, 
tree type1, tree type2)
 case CPTK_IS_ENUM:
 case CPTK_IS_UNION:
 case CPTK_IS_SAME:
+case CPTK_IS_ARITHMETIC:
   break;
 
 case CPTK_IS_LAYOUT_COMPATIBLE:
diff --git a/gcc/testsuite/g++.dg/ext/has-builtin-1.C 
b/gcc/testsuite/g++.dg/ext/has-builtin-1.C
index f343e153e56..3d63b0101d1 100644
--- a/gcc/testsuite/g++.dg/ext/has-builtin-1.C
+++ b/gcc/testsuite/g++.dg/ext/has-builtin-1.C
@@ -146,3 +146,6 @@
 #if !__has_builtin (__remove_cvref)
 # error "__has_builtin (__remove_cvref) failed"
 #endif
+#if !__has_builtin (__is_arithmetic)
+# error "__has_builtin (__is_arithmetic) failed"
+#endif
diff --git a/gcc/testsuite/g++.dg/ext/is_arithmetic.C 
b/gcc/testsuite/g++.dg/ext/is_arithmetic.C
new file mode 100644
index 000..fd35831f646
--- /dev/null
+++ b/gcc/testsuite/g++.dg/ext/is_arithmetic.C
@@ -0,0 +1,33 @@
+// { dg-do compile { target c++11 } }
+
+#include 
+
+using namespace __gnu_test;
+
+#define SA(X) static_assert((X),#X)
+#define SA_TEST_CATEGORY(TRAIT, TYPE, EXPECT)  \
+  SA(TRAIT(TYPE) == EXPECT);   \
+  SA(TRAIT(const TYPE) == EXPECT); \
+  SA(TRAIT(volatile TYPE) == EXPECT);  \
+  SA(TRAIT(const volatile TYPE) == EXPECT)
+
+SA_TEST_CATEGORY(__is_arithmetic, void, false);
+
+SA_TEST_CATEGORY(__is_arithmetic, char, true);
+SA_TEST_CATEGORY(__is_arithmetic, signed char, true);
+SA_TEST_CATEGORY(__is_arithmetic, unsigned char, true);
+SA_TEST_CATEGORY(__is_arithmetic, wchar_t, true);
+SA_TEST_CATEGORY(__is_arithmetic, short, true);

Re: [PATCH] MATCH: `(nop_convert)-a` into -(nop_convert)a if the negate is single use and a is known not to be signed min value

2023-09-01 Thread Richard Biener via Gcc-patches
On Fri, Sep 1, 2023 at 4:27 AM Andrew Pinski via Gcc-patches
 wrote:
>
> This pushes the conversion further down the chain which allows to optimize 
> away more
> conversions in many cases.

But when building (T1)(T2)-x it will make simplifying (T1)(T2) more difficult
as we'd need a

(convert (negate (convert ...)))

pattern for that?  So I'm not convinced this is the correct approach to the
cases you want to optimize?  The testcase actually are of the
form (T1)-(T2)x so hoisting the other way around would have worked as well
(if the outer convert would have been folded).

Are there any existing cases where we push/pull (nop) conversions around
unary operations?

Should we pay the price and simply have patterns for
(convert (unary (convert ...)))?

[how nice is the RTL world without signedness of operands but
signed/unsigned operation variants ...]

Richard.

> OK? Bootstrapped and tested on x86_64-linux-gnu with no regressions.
>
> PR tree-optimization/107765
> PR tree-optimization/107137
>
> gcc/ChangeLog:
>
> * match.pd (`(nop_convert)-a`): New pattern.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.dg/tree-ssa/neg-cast-1.c: New test.
> * gcc.dg/tree-ssa/neg-cast-2.c: New test.
> * gcc.dg/tree-ssa/neg-cast-3.c: New test.
> ---
>  gcc/match.pd   | 31 ++
>  gcc/testsuite/gcc.dg/tree-ssa/neg-cast-1.c | 17 
>  gcc/testsuite/gcc.dg/tree-ssa/neg-cast-2.c | 20 ++
>  gcc/testsuite/gcc.dg/tree-ssa/neg-cast-3.c | 15 +++
>  4 files changed, 83 insertions(+)
>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/neg-cast-1.c
>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/neg-cast-2.c
>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/neg-cast-3.c
>
> diff --git a/gcc/match.pd b/gcc/match.pd
> index 487a7e38719..3cff9b03d92 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -959,6 +959,37 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>  #endif
> 
>
> +/* (nop_cast)-var -> -(nop_cast)(var)
> +   if -var is known to not overflow; that is does not include
> +   the signed integer MIN. */
> +(simplify
> + (convert (negate:s @0))
> + (if (INTEGRAL_TYPE_P (type)
> +  && tree_nop_conversion_p (type, TREE_TYPE (@0)))
> +  (with {
> +/* If the top is not set, there is no overflow happening. */
> +bool contains_signed_min = !wi::ges_p (tree_nonzero_bits (@0), 0);
> +#if GIMPLE
> +int_range_max vr;
> +if (contains_signed_min
> +&& TREE_CODE (@0) == SSA_NAME
> +   && get_range_query (cfun)->range_of_expr (vr, @0)
> +   && !vr.undefined_p ())
> +  {
> +tree stype = signed_type_for (type);
> +   auto minvalue = wi::min_value (stype);
> +   int_range_max valid_range (TREE_TYPE (@0), minvalue, minvalue);
> +   vr.intersect (valid_range);
> +   /* If the range does not include min value,
> +  then we can do this change around. */
> +   if (vr.undefined_p ())
> + contains_signed_min = false;
> +  }
> +#endif
> +   }
> +   (if (!contains_signed_min)
> +(negate (convert @0))
> +
>  (for op (negate abs)
>   /* Simplify cos(-x) and cos(|x|) -> cos(x).  Similarly for cosh.  */
>   (for coss (COS COSH)
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/neg-cast-1.c 
> b/gcc/testsuite/gcc.dg/tree-ssa/neg-cast-1.c
> new file mode 100644
> index 000..7ddf40aca29
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/neg-cast-1.c
> @@ -0,0 +1,17 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-evrp" } */
> +/* PR tree-optimization/107765 */
> +
> +#include 
> +
> +int a(int input)
> +{
> +if (input == INT_MIN) __builtin_unreachable();
> +unsigned t = input;
> +int tt =  -t;
> +return tt == -input;
> +}
> +
> +/* Should be able to optimize this down to just `return 1;` during evrp. */
> +/* { dg-final { scan-tree-dump "return 1;" "evrp" } } */
> +/* { dg-final { scan-tree-dump-not " - " "evrp" } } */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/neg-cast-2.c 
> b/gcc/testsuite/gcc.dg/tree-ssa/neg-cast-2.c
> new file mode 100644
> index 000..ce49079e235
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/neg-cast-2.c
> @@ -0,0 +1,20 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O1 -fdump-tree-fre3 -fdump-tree-optimized" } */
> +/* part of PR tree-optimization/108397 */
> +
> +long long
> +foo (unsigned char o)
> +{
> +  unsigned long long t1 = -(long long) (o == 0);
> +  unsigned long long t2 = -(long long) (t1 != 0);
> +  unsigned long long t3 = -(long long) (t1 <= t2);
> +  return t3;
> +}
> +
> +/* Should be able to optimize this down to just `return -1;` during fre3. */
> +/* { dg-final { scan-tree-dump "return -1;" "fre3" } } */
> +/* FRE does not remove all dead statements */
> +/* { dg-final { scan-tree-dump-not " - " "fre3" { xfail *-*-* } } } */
> +
> +/* { dg-final { scan-tree-dump "return -1;" "optimized" } } */
> +/* { dg-final { scan-tree-dump-not " - " "optimized" 

Re: [RFC] libstdc++: Make --enable-libstdcxx-backtrace=auto default to yes

2023-09-01 Thread Jonathan Wakely via Gcc-patches
On Wed, 23 Aug 2023 at 17:03, Jonathan Wakely via Libstdc++
 wrote:
>
> Any objections to this? It's a C++23 feture, so should be enabled by
> default.

I've pushed this to trunk, so let's see what breaks!


>
> -- >8 --
>
> This causes libstdc++_libbacktrace.a to be built by default. This might
> fail on some targets, in which case we can make the 'auto' choice expand
> to either 'yes' or 'no' depending on the target.
>
> libstdc++-v3/ChangeLog:
>
> * acinclude.m4 (GLIBCXX_ENABLE_BACKTRACE): Default to yes.
> * configure: Regenerate.
> ---
>  libstdc++-v3/acinclude.m4 | 2 +-
>  libstdc++-v3/configure| 2 +-
>  2 files changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/libstdc++-v3/acinclude.m4 b/libstdc++-v3/acinclude.m4
> index b25378eaace..50c808c6b2d 100644
> --- a/libstdc++-v3/acinclude.m4
> +++ b/libstdc++-v3/acinclude.m4
> @@ -5481,7 +5481,7 @@ BACKTRACE_CPPFLAGS="$BACKTRACE_CPPFLAGS 
> -DBACKTRACE_ELF_SIZE=$elfsize"
>
>AC_MSG_CHECKING([whether to build libbacktrace support])
>if test "$enable_libstdcxx_backtrace" = "auto"; then
> -enable_libstdcxx_backtrace=no
> +enable_libstdcxx_backtrace=yes
>fi
>AC_MSG_RESULT($enable_libstdcxx_backtrace)
>if test "$enable_libstdcxx_backtrace" = "yes"; then
>



[committed] libstdc++: Use dg-require-filesystem-ts in link test

2023-09-01 Thread Jonathan Wakely via Gcc-patches
Tested x86_64-linux. Pushed to trunk.

-- >8 --

This test expects to be able to link, which fails if there are undefined
references to chdir, mkdir etc. in fs_ops.o in the libstdc++.a archive.

libstdc++-v3/ChangeLog:

* testsuite/27_io/filesystem/path/108636.cc: Add dg-require for
filesystem support.
---
 libstdc++-v3/testsuite/27_io/filesystem/path/108636.cc | 1 +
 1 file changed, 1 insertion(+)

diff --git a/libstdc++-v3/testsuite/27_io/filesystem/path/108636.cc 
b/libstdc++-v3/testsuite/27_io/filesystem/path/108636.cc
index d58de461090..73742df93b0 100644
--- a/libstdc++-v3/testsuite/27_io/filesystem/path/108636.cc
+++ b/libstdc++-v3/testsuite/27_io/filesystem/path/108636.cc
@@ -1,5 +1,6 @@
 // { dg-do link { target c++17 } }
 // { dg-options "-fkeep-inline-functions" }
+// { dg-require-filesystem-ts "" }
 
 #include 
 int main()
-- 
2.41.0



[committed] libstdc++: Avoid useless dependency on read_symlink from tzdb

2023-09-01 Thread Jonathan Wakely via Gcc-patches
Tested x86_64-linux. Pushed to trunk.

-- >8 --

chrono::tzdb::current_zone uses filesystem::read_symlink, which creates
a dependency on the fs_ops.o object in libstdc++.a, which then creates
dependencies on several OS functions if --gc-sections isn't used. For
more details see PR libstdc++/104167 comment 8 and comment 11.

In the cases where that causes linker failures, we probably don't have
readlink anyway, so the filesystem::read_symlink call will always fail.
Repeat the preprocessor conditions for filesystem::read_symlink in the
body of chrono::tzdb::current_zone so that we don't create a
dependency on fs_ops.o for a function that will always fail.

libstdc++-v3/ChangeLog:

* src/c++20/tzdb.cc (tzdb::current_zone): Check configure macros
for POSIX readlink before using filesystem::read_symlink.
---
 libstdc++-v3/src/c++20/tzdb.cc | 4 
 1 file changed, 4 insertions(+)

diff --git a/libstdc++-v3/src/c++20/tzdb.cc b/libstdc++-v3/src/c++20/tzdb.cc
index 0fcbf6a4824..d22cea7e070 100644
--- a/libstdc++-v3/src/c++20/tzdb.cc
+++ b/libstdc++-v3/src/c++20/tzdb.cc
@@ -1635,6 +1635,9 @@ namespace std::chrono
 // TODO cache this function's result?
 
 #ifndef _AIX
+// Repeat the preprocessor condition used by filesystem::read_symlink,
+// to avoid a dependency on src/c++17/fs_ops.o if it won't work anyway.
+#if defined(_GLIBCXX_HAVE_READLINK) && defined(_GLIBCXX_HAVE_SYS_STAT_H)
 error_code ec;
 // This should be a symlink to e.g. /usr/share/zoneinfo/Europe/London
 auto path = filesystem::read_symlink("/etc/localtime", ec);
@@ -1653,6 +1656,7 @@ namespace std::chrono
  return tz;
  }
   }
+#endif
 // Otherwise, look for a file naming the time zone.
 string_view files[] {
   "/etc/timezone",// Debian derivates
-- 
2.41.0



Re: [PATCH 06/13] [APX EGPR] Map reg/mem constraints in inline asm to non-EGPR constraint.

2023-09-01 Thread Richard Sandiford via Gcc-patches
Uros Bizjak via Gcc-patches  writes:
> On Thu, Aug 31, 2023 at 11:18 AM Jakub Jelinek via Gcc-patches
>  wrote:
>>
>> On Thu, Aug 31, 2023 at 04:20:17PM +0800, Hongyu Wang via Gcc-patches wrote:
>> > From: Kong Lingling 
>> >
>> > In inline asm, we do not know if the insn can use EGPR, so disable EGPR
>> > usage by default from mapping the common reg/mem constraint to non-EGPR
>> > constraints. Use a flag mapx-inline-asm-use-gpr32 to enable EGPR usage
>> > for inline asm.
>> >
>> > gcc/ChangeLog:
>> >
>> >   * config/i386/i386.cc (INCLUDE_STRING): Add include for
>> >   ix86_md_asm_adjust.
>> >   (ix86_md_asm_adjust): When APX EGPR enabled without specifying the
>> >   target option, map reg/mem constraints to non-EGPR constraints.
>> >   * config/i386/i386.opt: Add option mapx-inline-asm-use-gpr32.
>> >
>> > gcc/testsuite/ChangeLog:
>> >
>> >   * gcc.target/i386/apx-inline-gpr-norex2.c: New test.
>> > ---
>> >  gcc/config/i386/i386.cc   |  44 +++
>> >  gcc/config/i386/i386.opt  |   5 +
>> >  .../gcc.target/i386/apx-inline-gpr-norex2.c   | 107 ++
>> >  3 files changed, 156 insertions(+)
>> >  create mode 100644 gcc/testsuite/gcc.target/i386/apx-inline-gpr-norex2.c
>> >
>> > diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
>> > index d26d9ab0d9d..9460ebbfda4 100644
>> > --- a/gcc/config/i386/i386.cc
>> > +++ b/gcc/config/i386/i386.cc
>> > @@ -17,6 +17,7 @@ You should have received a copy of the GNU General 
>> > Public License
>> >  along with GCC; see the file COPYING3.  If not see
>> >  .  */
>> >
>> > +#define INCLUDE_STRING
>> >  #define IN_TARGET_CODE 1
>> >
>> >  #include "config.h"
>> > @@ -23077,6 +23078,49 @@ ix86_md_asm_adjust (vec , vec & 
>> > /*inputs*/,
>> >bool saw_asm_flag = false;
>> >
>> >start_sequence ();
>> > +  /* TODO: Here we just mapped the general r/m constraints to non-EGPR
>> > +   constraints, will eventually map all the usable constraints in the 
>> > future. */
>>
>> I think there should be some constraint which explicitly has all the 32
>> GPRs, like there is one for just all 16 GPRs (h), so that regardless of
>> -mapx-inline-asm-use-gpr32 one can be explicit what the inline asm wants.
>>
>> Also, what about the "g" constraint?  Shouldn't there be another for "g"
>> without r16..r31?  What about the various other memory
>> constraints ("<", "o", ...)?
>
> I think we should leave all existing constraints as they are, so "r"
> covers only GPR16, "m" and "o" to only use GPR16. We can then
> introduce "h" to instructions that have the ability to handle EGPR.

Yeah.  I'm jumping in without having read the full thread, sorry,
but the current mechanism for handling this is TARGET_MEM_CONSTRAINT
(added for s390).  That is, TARGET_MEM_CONSTRAINT can be defined to some
new constraint that is more general than the traditional "m" constraint.
This constraint is then the one that is associated with memory_operand
etc.  "m" can then be defined explicitly to the old definition,
so that existing asms continue to work.

So if the port wants generic internal memory addresses to use the
EGPR set (sounds reasonable), then TARGET_MEM_CONSTRAINT would be
a new constraint that maps to those addresses.

Thanks,
Richard


RE: [PATCH] RISC-V: Enable VECT_COMPARE_COSTS by default

2023-09-01 Thread Li, Pan2 via Gcc-patches
Committed, thank Robin.

Pan

-Original Message-
From: Gcc-patches  On Behalf 
Of Robin Dapp via Gcc-patches
Sent: Friday, September 1, 2023 5:58 PM
To: Juzhe-Zhong ; gcc-patches@gcc.gnu.org
Cc: kito.ch...@sifive.com; kito.ch...@gmail.com
Subject: Re: [PATCH] RISC-V: Enable VECT_COMPARE_COSTS by default

Hi Juzhe,

thanks, this is OK, we would have needed this sooner or later anyway.

Regards
 Robin



RE: [PATCH] RISC-V: Add dynamic LMUL compile option

2023-09-01 Thread Li, Pan2 via Gcc-patches
Committed, thanks Robin.

Pan

-Original Message-
From: Gcc-patches  On Behalf 
Of Robin Dapp via Gcc-patches
Sent: Friday, September 1, 2023 5:58 PM
To: Juzhe-Zhong ; gcc-patches@gcc.gnu.org
Cc: kito.ch...@sifive.com; kito.ch...@gmail.com
Subject: Re: [PATCH] RISC-V: Add dynamic LMUL compile option

LGTM

Regards
 Robin



[committed] libstdc++: Fix how chrono::parse handles errors for time-of-day values

2023-09-01 Thread Jonathan Wakely via Gcc-patches
Tested x86_64-linux. Pushed to trunk.

-- >8 --

We fail to diagnose an error and extract an incorrect time for cases
like "25:59" >> parse("%H:%M", mins). The bad "25" hour value gets
ignored (on the basis that we might not care about it if trying to
extract something like a weekday or a month name), but then when we get
to the end of the function we think we have a valid time from "59" and
so the result is 00:59.

The problem is that the '__bad_h' value is used for "no hour value read
yet" as well as "bad hour value read". If we just set __h = __bad_h and
continue, we can't tell later that we read an invalid hour.

The fix is to set failbit early when we're trying to extract a
time-of-day (e.g. duration or time_point) and we encounter an invalid
hour, minute, or second value. We can still delay other error checking
to the end.

libstdc++-v3/ChangeLog:

* include/bits/chrono_io.h (_Parser::operator()): Set failbit
early if invalid values are read when _M_need & _TimeOfDay is
non-zero.
* testsuite/std/time/parse.cc: Check that "25:59" cannot be
parsed for "%H:%M".
---
 libstdc++-v3/include/bits/chrono_io.h| 53 +---
 libstdc++-v3/testsuite/std/time/parse.cc |  7 
 2 files changed, 36 insertions(+), 24 deletions(-)

diff --git a/libstdc++-v3/include/bits/chrono_io.h 
b/libstdc++-v3/include/bits/chrono_io.h
index f359571b4db..7352df095ff 100644
--- a/libstdc++-v3/include/bits/chrono_io.h
+++ b/libstdc++-v3/include/bits/chrono_io.h
@@ -3327,7 +3327,16 @@ namespace __detail
  __h = __bad_h;
}
  else if (__c == 'H' && __val >= 0 && __val <= 23)
-   __h = hours(__val);
+   {
+ __h = hours(__val);
+ __h12 = __bad_h;
+   }
+ else
+   {
+ if (_M_need & _ChronoParts::_TimeOfDay)
+   __err |= ios_base::failbit;
+ break;
+   }
}
  __parts |= _ChronoParts::_TimeOfDay;
  break;
@@ -3392,9 +3401,8 @@ namespace __detail
__min = minutes(__val);
  else
{
- __h = __bad_h;
- __min = __bad_min;
- __s = __bad_sec;
+ if (_M_need & _ChronoParts::_TimeOfDay)
+   __err |= ios_base::failbit;
  break;
}
}
@@ -3481,33 +3489,31 @@ namespace __detail
  else
{
  auto __val = __read_unsigned(2);
- if (__val == -1 || __val > 23)
+ if (__val == -1 || __val > 23) [[unlikely]]
{
- __h = __bad_h;
- __min = __bad_min;
- __s = __bad_sec;
- break;
-   }
- if (!__read_chr(':'))
-   {
- __err |= ios_base::failbit;
+ if (_M_need & _ChronoParts::_TimeOfDay)
+   __err |= ios_base::failbit;
  break;
}
+ if (!__read_chr(':')) [[unlikely]]
+   break;
  __h = hours(__val);
 
  __val = __read_unsigned(2);
- if (__val == -1 || __val > 60)
+ if (__val == -1 || __val > 60) [[unlikely]]
{
- __h = __bad_h;
- __min = __bad_min;
- __s = __bad_sec;
+ if (_M_need & _ChronoParts::_TimeOfDay)
+   __err |= ios_base::failbit;
  break;
}
  __min = minutes(__val);
 
- __parts |= _ChronoParts::_TimeOfDay;
-
- if (__c != 'T' || !__read_chr(':'))
+ if (__c == 'R')
+   {
+ __parts |= _ChronoParts::_TimeOfDay;
+ break;
+   }
+ else if (!__read_chr(':')) [[unlikely]]
break;
}
  [[fallthrough]];
@@ -3527,13 +3533,12 @@ namespace __detail
   ratio<1>>)
{
  auto __val = __read_unsigned(__num ? __num : 2);
- if (0 <= __val && __val <= 59)
+ if (0 <= __val && __val <= 59) [[likely]]
__s = seconds(__val);
 

[committed] libstdc++: Simplify __format::_Sink::_M_reset

2023-09-01 Thread Jonathan Wakely via Gcc-patches
Tested x86_64-linux. Pushed to trunk.

-- >8 --

Using an offset as the second argument instead of an iterator makes it
easier for callers, as they don't need to create an lvalue span in order
to get an iterator from it for the _M_reset call.

libstdc++-v3/ChangeLog:

* include/std/format (__format::_Sink::_M_reset): Change second
argument from iterator to offset.
---
 libstdc++-v3/include/std/format | 22 +-
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/libstdc++-v3/include/std/format b/libstdc++-v3/include/std/format
index fe2caa58688..128a5b79282 100644
--- a/libstdc++-v3/include/std/format
+++ b/libstdc++-v3/include/std/format
@@ -2537,11 +2537,10 @@ namespace __format
 
   // Replace the current output range.
   void
-  _M_reset(span<_CharT> __s,
-  typename span<_CharT>::iterator __next) noexcept
+  _M_reset(span<_CharT> __s, size_t __pos = 0) noexcept
   {
_M_span = __s;
-   _M_next = __next;
+   _M_next = __s.begin() + __pos;
   }
 
   // Called by the iterator for *it++ = c
@@ -2599,7 +2598,7 @@ namespace __format
   // A sink that fills a sequence (e.g. std::string, std::vector, std::deque).
   // Writes to a buffer then appends that to the sequence when it fills up.
   template
-class _Seq_sink : public _Buf_sink
+class _Seq_sink final : public _Buf_sink
 {
   using _CharT = typename _Seq::value_type;
 
@@ -2610,6 +2609,8 @@ namespace __format
   _M_overflow() override
   {
auto __s = this->_M_used();
+   if (__s.empty())
+ return;
if constexpr (__is_specialization_of<_Seq, basic_string>)
  _M_seq.append(__s.data(), __s.size());
else
@@ -2618,6 +2619,9 @@ namespace __format
   }
 
 public:
+  // TODO: for SSO string, use SSO buffer as initial span, then switch
+  // to _M_buf if it overflows? Or even do that for all unused capacity?
+
   [[__gnu__::__always_inline__]]
   _Seq_sink() noexcept(is_nothrow_default_constructible_v<_Seq>)
   { }
@@ -2722,21 +2726,20 @@ namespace __format
  return; // No need to switch to internal buffer yet.
 
auto __s = this->_M_used();
-   _M_count += __s.size();
 
if (_M_max >= 0)
  {
+   _M_count += __s.size();
// Span was already sized for the maximum character count,
// if it overflows then any further output must go to the
// internal buffer, to be discarded.
-   span<_CharT> __buf{_M_buf};
-   this->_M_reset(__buf, __buf.begin());
+   this->_M_reset(this->_M_buf);
  }
else
  {
// No maximum character count. Just extend the span to allow
// writing more characters to it.
-   this->_M_reset({__s.data(), __s.size() + 1024}, __s.end());
+   this->_M_reset({__s.data(), __s.size() + 1024}, __s.size());
  }
   }
 
@@ -3473,6 +3476,7 @@ namespace __format
 
   template
 template
+  inline
   basic_format_args<_Context>::
   basic_format_args(const _Store<_Args...>& __store) noexcept
   {
@@ -4063,7 +4067,7 @@ namespace __format
 {
 #if 1
   template
-class _Counting_sink : public _Iter_sink<_CharT, _CharT*>
+class _Counting_sink final : public _Iter_sink<_CharT, _CharT*>
 {
 public:
   _Counting_sink() : _Iter_sink<_CharT, _CharT*>(nullptr, 0) { }
-- 
2.41.0



[committed] libstdc++: Do not allow chrono::parse to overflow for %C [PR111162]

2023-09-01 Thread Jonathan Wakely via Gcc-patches
Tested x86_64-linux. Pushed to trunk.

-- >8 --

libstdc++-v3/ChangeLog:

PR libstdc++/62
* include/bits/chrono_io.h (_Parser::Operator()): Check %C
values are in range of year::min() to year::max().
* testsuite/std/time/parse.cc: Check out of range centuries.
---
 libstdc++-v3/include/bits/chrono_io.h|  9 -
 libstdc++-v3/testsuite/std/time/parse.cc | 12 
 2 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/libstdc++-v3/include/bits/chrono_io.h 
b/libstdc++-v3/include/bits/chrono_io.h
index d558802e7d8..f359571b4db 100644
--- a/libstdc++-v3/include/bits/chrono_io.h
+++ b/libstdc++-v3/include/bits/chrono_io.h
@@ -3171,7 +3171,14 @@ namespace __detail
{
  auto __v = __read_signed(__num ? __num : 2);
  if (!__is_failed(__err))
-   __century = __v * 100;
+   {
+ int __cmin = (int)year::min() / 100;
+ int __cmax = (int)year::max() / 100;
+ if (__cmin <= __v && __v <= __cmax)
+   __century = __v * 100;
+ else
+   __century = -2; // This prevents guessing century.
+   }
}
  else if (__mod == 'E')
{
diff --git a/libstdc++-v3/testsuite/std/time/parse.cc 
b/libstdc++-v3/testsuite/std/time/parse.cc
index 9b36c5d7db4..46eb7f28c85 100644
--- a/libstdc++-v3/testsuite/std/time/parse.cc
+++ b/libstdc++-v3/testsuite/std/time/parse.cc
@@ -251,6 +251,18 @@ test_errors()
   is >> parse("%H:%M %3y", y); // 61min is out of range but not needed
   VERIFY( is.eof() && ! is.fail() );
   VERIFY( y == 2010y );
+
+  is.clear();
+  is.str("328 00");
+  is >> parse("%3C %y", y); // 328 is out of range for %C (PR libstdc++/62)
+  VERIFY( is.fail() );
+  VERIFY( y == 2010y );
+
+  is.clear();
+  is.str("-328 00");
+  is >> parse("%3C %y", y); // -328 is out of range for %C
+  VERIFY( is.fail() );
+  VERIFY( y == 2010y );
 }
 
 void
-- 
2.41.0



Re: [RFC] gimple ssa: SCCP - A new PHI optimization pass

2023-09-01 Thread Richard Biener via Gcc-patches
On Fri, 1 Sep 2023, Filip Kastl wrote:

> > That's interesting.  Your placement at
> > 
> >   NEXT_PASS (pass_cd_dce, false /* update_address_taken_p */);
> >   NEXT_PASS (pass_phiopt, true /* early_p */);
> > + NEXT_PASS (pass_sccp);
> > 
> > and
> > 
> >NEXT_PASS (pass_tsan);
> >NEXT_PASS (pass_dse, true /* use DR analysis */);
> >NEXT_PASS (pass_dce);
> > +  NEXT_PASS (pass_sccp);
> > 
> > isn't immediately after the "best" existing pass we have to
> > remove dead PHIs which is pass_cd_dce.  phiopt might leave
> > dead PHIs around and the second instance runs long after the
> > last CD-DCE.
> > 
> > So I wonder if your pass just detects unnecessary PHIs we'd have
> > removed by other means and what survives until RTL expansion is
> > what we should count?
> > 
> > Can you adjust your original early placement to right after
> > the cd-dce pass and for the late placement turn the dce pass
> > before it into cd-dce and re-do your measurements?
> 
> So I did this
> 
>   NEXT_PASS (pass_dse);
>   NEXT_PASS (pass_cd_dce, false /* update_address_taken_p */);
>   NEXT_PASS (pass_sccp);
>   NEXT_PASS (pass_phiopt, true /* early_p */);
>   NEXT_PASS (pass_tail_recursion); 
> 
> and this
> 
>   NEXT_PASS (pass_dse, true /* use DR analysis */);
>   NEXT_PASS (pass_cd_dce, false /* update_address_taken_p */);
>   NEXT_PASS (pass_sccp);
>   /* Pass group that runs when 1) enabled, 2) there are loops
> 
> and got these results:
> 
> 500.perlbench_r
> Started with (1) 30318
> Ended with (1) 26219
> Removed PHI % (1) 13.52002110957187149600
> Started with (2) 39043
> Ended with (2) 38941
> Removed PHI % (2) .26125041620777092000
> 
> 502.gcc_r
> Started with (1) 148361
> Ended with (1) 140464
> Removed PHI % (1) 5.32282742769326170700
> Started with (2) 216209
> Ended with (2) 215367
> Removed PHI % (2) .38943799749316633500
> 
> 505.mcf_r
> Started with (1) 342
> Ended with (1) 304
> Removed PHI % (1) 11.1200
> Started with (2) 437
> Ended with (2) 433
> Removed PHI % (2) .91533180778032036700
>  
> 523.xalancbmk_r
> Started with (1) 62995
> Ended with (1) 58289 
> Removed PHI % (1) 7.47043416144138423700
> Started with (2) 134026
> Ended with (2) 133193
> Removed PHI % (2) .62152119737961291100
>   
> 531.deepsjeng_r
> Started with (1) 1402
> Ended with (1) 1264
> Removed PHI % (1) 9.84308131241084165500
> Started with (2) 1928
> Ended with (2) 1920
> Removed PHI % (2) .41493775933609958600
> 
> 541.leela_r
> Started with (1) 3398
> Ended with (1) 3060
> Removed PHI % (1) 9.94702766333137139500
> Started with (2) 4473
> Ended with (2) 4453
> Removed PHI % (2) .44712720769058797300
> 
> 557.xz_r
> Started with (1) 47
> Ended with (1) 44
> Removed PHI % (1) 6.38297872340425532000
> Started with (2) 43
> Ended with (2) 43
> Removed PHI % (2) 0
> 
> These measurements don't differ very much from the previous. It seems to me
> that phiopt does output some redundant PHIs but the vast majority of the
> eliminated PHIs are generated in earlier passes and cd_dce isn't able to get
> rid of them.
> 
> A noteworthy information might be that most of the eliminated PHIs are 
> actually
> trivial PHIs. I consider a PHI to be trivial if it only references itself or
> one other SSA name.

Ah.  The early pass numbers are certainly intresting - can you elaborate
on the last bit?  We have for example loop-closed PHI nodes like

_1 = PHI <_2>

and there are non-trivial degenerate PHIs like

_1 = PHI <_2, _2>

those are generally removed by value-numbering (FRE, DOM and PRE) and SSA 
propagation (CCP and copyprop), they are not "dead" so CD-DCE doesn't
remove them.

But we do have passes removing these kind of PHIs.

The issue with the early pass is likely that we have

  NEXT_PASS (pass_fre, true /* may_iterate */);
^^
would elimimate these kind of PHIs

  NEXT_PASS (pass_early_vrp);
^^
rewrites into loop-closed SSA, adding many such PHIs

  NEXT_PASS (pass_merge_phi);
  NEXT_PASS (pass_dse);
  NEXT_PASS (pass_cd_dce, false /* update_address_taken_p */);

and until here there's no pass eliding the LC SSA PHIs.

You could add a pass_copy_prop after early_vrp, the later sccp
pass shouldn't run into this issue I think so it must be other
passes adding such kind of PHIs.

Maybe you can count single-argument PHIs, degenerate multi-arg PHIs
and "other" PHIs separately as you remove them?


> Here is a comparison of the newest measurements (sccp after cd_dce) with the
> previous ones (sccp after phiopt and dce):
> 
> 500.perlbench_r
>  
> Started with (1-PREV) 30287
> Started with (1-NEW) 30318
>  
> Ended with (1-PREV) 26188
> Ended with (1-NEW) 26219
>  
> Removed PHI % (1-PREV) 13.53385941162875161000
> Removed PHI % (1-NEW) 

Re: [PATCH] expmed: Allow extract_bit_field via mem for low-precision modes.

2023-09-01 Thread Richard Sandiford via Gcc-patches
Robin Dapp via Gcc-patches  writes:
>> It's not just a question of which byte though.  It's also a question
>> of which bit.
>> 
>> One option would be to code-generate for even X and for odd X, and select
>> between them at runtime.  But that doesn't scale well to 2+2X and 1+1X.
>> 
>> Otherwise I think we need to treat the bit position as a variable,
>> with bitpos % 8 and bitpos / 8 being calculated at runtime.
>
> Thanks.  I worked around it with a backend vec_extractQI expander
> so we don't run into that situation directly anymore.  The problem is of
> course still latent and I'm going to look at it again after some other things
> on my plate.

Yeah, sounds like a good workaround.  If the target has an efficient way
of coping with the VLAness then the optab will probably be better than
whatever the generic code ends up being.

Thanks,
Richard


Re: [x86_64 PATCH] Improve __int128 argument passing (in ix86_expand_move).

2023-09-01 Thread Manolis Tsamis
Hi Roger,

I've (accidentally) found a codegen regression that I bisected down to
this patch.
For these two functions:

typedef struct {
  float minx, miny;
  float maxx, maxy;
} AABB;

int TestOverlap(AABB a, AABB b) {
  return a.minx <= b.maxx
  && a.miny <= b.maxy
  && a.maxx >= b.minx
  && a.maxx >= b.minx;
}

int TestOverlap2(AABB a, AABB b) {
  return a.miny <= b.maxy
  && a.maxx >= b.minx;
}

GCC used to produce this code:

TestOverlap:
comiss  xmm3, xmm0
movqrdx, xmm0
movqrsi, xmm1
movqrax, xmm3
jb  .L10
shr rdx, 32
shr rax, 32
movdxmm0, eax
movdxmm4, edx
comiss  xmm0, xmm4
jb  .L10
movdxmm1, esi
xor eax, eax
comiss  xmm1, xmm2
setnb   al
ret
.L10:
xor eax, eax
ret
TestOverlap2:
shufps  xmm0, xmm0, 85
shufps  xmm3, xmm3, 85
comiss  xmm3, xmm0
jb  .L17
xor eax, eax
comiss  xmm1, xmm2
setnb   al
ret
.L17:
xor eax, eax
ret

After this patch codegen gets much worse:

TestOverlap:
movqrax, xmm1
movqrdx, xmm2
movqrsi, xmm0
mov rdi, rax
movqrax, xmm3
mov rcx, rsi
xchgrdx, rax
movdxmm1, edx
mov rsi, rax
mov rax, rdx
comiss  xmm1, xmm0
jb  .L10
shr rcx, 32
shr rax, 32
movdxmm0, eax
movdxmm4, ecx
comiss  xmm0, xmm4
jb  .L10
movdxmm0, esi
movdxmm1, edi
xor eax, eax
comiss  xmm1, xmm0
setnb   al
ret
.L10:
xor eax, eax
ret
TestOverlap2:
movqrdx, xmm2
movqrax, xmm3
movqrsi, xmm0
xchgrdx, rax
mov rcx, rsi
mov rsi, rax
mov rax, rdx
shr rcx, 32
shr rax, 32
movdxmm4, ecx
movdxmm0, eax
comiss  xmm0, xmm4
jb  .L17
movdxmm0, esi
xor eax, eax
comiss  xmm1, xmm0
setnb   al
ret
.L17:
xor eax, eax
ret

I saw that you've been improving i386 argument passing, so maybe this
is just a missed case of these additions?

(Can also be seen here https://godbolt.org/z/E4xrEn6KW)

PS: I found the code that clang generates, with cmpleps + pextrw to
avoid the fp->int->fp + shr interesting. I wonder if something like
this could be added to GCC as well.

Thanks!
Manolis

On Thu, Jul 6, 2023 at 5:21 PM Uros Bizjak via Gcc-patches
 wrote:
>
> On Thu, Jul 6, 2023 at 3:48 PM Roger Sayle  wrote:
> >
> > > On Thu, Jul 6, 2023 at 2:04 PM Roger Sayle 
> > > wrote:
> > > >
> > > >
> > > > Passing 128-bit integer (TImode) parameters on x86_64 can sometimes
> > > > result in surprising code.  Consider the example below (from PR 43644):
> > > >
> > > > __uint128 foo(__uint128 x, unsigned long long y) {
> > > >   return x+y;
> > > > }
> > > >
> > > > which currently results in 6 consecutive movq instructions:
> > > >
> > > > foo:movq%rsi, %rax
> > > > movq%rdi, %rsi
> > > > movq%rdx, %rcx
> > > > movq%rax, %rdi
> > > > movq%rsi, %rax
> > > > movq%rdi, %rdx
> > > > addq%rcx, %rax
> > > > adcq$0, %rdx
> > > > ret
> > > >
> > > > The underlying issue is that during RTL expansion, we generate the
> > > > following initial RTL for the x argument:
> > > >
> > > > (insn 4 3 5 2 (set (reg:TI 85)
> > > > (subreg:TI (reg:DI 86) 0)) "pr43644-2.c":5:1 -1
> > > >  (nil))
> > > > (insn 5 4 6 2 (set (subreg:DI (reg:TI 85) 8)
> > > > (reg:DI 87)) "pr43644-2.c":5:1 -1
> > > >  (nil))
> > > > (insn 6 5 7 2 (set (reg/v:TI 84 [ x ])
> > > > (reg:TI 85)) "pr43644-2.c":5:1 -1
> > > >  (nil))
> > > >
> > > > which by combine/reload becomes
> > > >
> > > > (insn 25 3 22 2 (set (reg/v:TI 84 [ x ])
> > > > (const_int 0 [0])) "pr43644-2.c":5:1 -1
> > > >  (nil))
> > > > (insn 22 25 23 2 (set (subreg:DI (reg/v:TI 84 [ x ]) 0)
> > > > (reg:DI 93)) "pr43644-2.c":5:1 90 {*movdi_internal}
> > > >  (expr_list:REG_DEAD (reg:DI 93)
> > > > (nil)))
> > > > (insn 23 22 28 2 (set (subreg:DI (reg/v:TI 84 [ x ]) 8)
> > > > (reg:DI 94)) "pr43644-2.c":5:1 90 {*movdi_internal}
> > > >  (expr_list:REG_DEAD (reg:DI 94)
> > > > (nil)))
> > > >
> > > > where the heavy use of SUBREG SET_DESTs creates challenges for both
> > > > combine and register allocation.
> > > >
> > > > The improvement proposed here is to avoid these problematic SUBREGs by
> > > > adding (two) special cases to ix86_expand_move.  For insn 4, which
> > > > sets a TImode destination from a paradoxical SUBREG, to assign the
> > > > lowpart, we can use 

Re: [PATCH v4 4/4] libstdc++: Optimize is_compound trait performance

2023-09-01 Thread Ken Matsui via Gcc-patches
On Tue, Aug 8, 2023 at 1:33 PM Jonathan Wakely  wrote:
>
>
>
> On Tue, 18 Jul 2023 at 08:44, Ken Matsui via Libstdc++ 
>  wrote:
>>
>> This patch optimizes the performance of the is_compound trait by
>> dispatching to the new __is_arithmetic built-in trait.
>>
>> libstdc++-v3/ChangeLog:
>>
>> * include/std/type_traits (is_compound): Use __is_arithmetic
>> built-in trait.
>> (is_compound_v): Use is_fundamental_v instead.
>>
>> Signed-off-by: Ken Matsui 
>> ---
>>  libstdc++-v3/include/std/type_traits | 11 ++-
>>  1 file changed, 10 insertions(+), 1 deletion(-)
>>
>> diff --git a/libstdc++-v3/include/std/type_traits 
>> b/libstdc++-v3/include/std/type_traits
>> index cf24de2fcac..73d9a2b16fc 100644
>> --- a/libstdc++-v3/include/std/type_traits
>> +++ b/libstdc++-v3/include/std/type_traits
>> @@ -702,9 +702,18 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>>  { };
>>
>>/// is_compound
>> +#if __has_builtin(__is_arithmetic)
>> +  template
>> +struct is_compound
>> +: public __bool_constant> + || is_void<_Tp>::value
>> + || is_null_pointer<_Tp>::value)>
>> +{ };
>> +#else
>>template
>>  struct is_compound
>>  : public __not_>::type { };
>> +#endif
>
>
> I think it would be simpler to just do this unconditionally (i.e. just a 
> single definition without using __has_builtin):
>
> template
>   struct is_compound
>   : __bool_constant::value>
>   { };
>
> This still avoids instantiating __not_. If is_fundamental is much more 
> efficient now, then I think it's OK to instantiate it here. Otherwise we're 
> duplicating the logic for is_fundamental, and just giving ourselves more code 
> to maintain.
>
> Nobody ever uses is_compound anyway!
>
Agreed! Will fix this patch. Thank you!

>
>
>>
>>/// @cond undocumented
>>template
>> @@ -3234,7 +3243,7 @@ template 
>>  template 
>>inline constexpr bool is_scalar_v = is_scalar<_Tp>::value;
>>  template 
>> -  inline constexpr bool is_compound_v = is_compound<_Tp>::value;
>> +  inline constexpr bool is_compound_v = !is_fundamental_v<_Tp>;
>>  template 
>>inline constexpr bool is_member_pointer_v = is_member_pointer<_Tp>::value;
>>  template 
>> --
>> 2.41.0
>>


Re: [PATCH] expmed: Allow extract_bit_field via mem for low-precision modes.

2023-09-01 Thread Robin Dapp via Gcc-patches
> It's not just a question of which byte though.  It's also a question
> of which bit.
> 
> One option would be to code-generate for even X and for odd X, and select
> between them at runtime.  But that doesn't scale well to 2+2X and 1+1X.
> 
> Otherwise I think we need to treat the bit position as a variable,
> with bitpos % 8 and bitpos / 8 being calculated at runtime.

Thanks.  I worked around it with a backend vec_extractQI expander
so we don't run into that situation directly anymore.  The problem is of
course still latent and I'm going to look at it again after some other things
on my plate.

Regards
 Robin


Re: [PATCH 11/13] [APX EGPR] Handle legacy insns that only support GPR16 (3/5)

2023-09-01 Thread Hongtao Liu via Gcc-patches
On Thu, Aug 31, 2023 at 5:31 PM Richard Biener via Gcc-patches
 wrote:
>
> On Thu, Aug 31, 2023 at 11:26 AM Richard Biener
>  wrote:
> >
> > On Thu, Aug 31, 2023 at 10:25 AM Hongyu Wang via Gcc-patches
> >  wrote:
> > >
> > > From: Kong Lingling 
> > >
> > > Disable EGPR usage for below legacy insns in opcode map2/3 that have vex
> > > but no evex counterpart.
> > >
> > > insn list:
> > > 1. phminposuw/vphminposuw
> > > 2. ptest/vptest
> > > 3. roundps/vroundps, roundpd/vroundpd,
> > >roundss/vroundss, roundsd/vroundsd
> > > 4. pcmpestri/vpcmpestri, pcmpestrm/vpcmpestrm
> > > 5. pcmpistri/vpcmpistri, pcmpistrm/vpcmpistrm
> >
> > How are GPRs involved in the above?  Or did I misunderstand something?
>
> Following up myself - for the memory operand alternatives I guess.  How about
> simply disabling the memory alternatives when EGPR is active?  Wouldn't
> that simplify the initial patchset a lot?  Re-enabling them when
> deemed important
> could be done as followup then?
>
There're instructions only support memory operand but don't support
gpr32 (.i.e. xsave)
We still need to handle them at the initial patch.
> Richard.
>
> > > 6. aesimc/vaesimc, aeskeygenassist/vaeskeygenassist
> > >
> > > gcc/ChangeLog:
> > >
> > > * config/i386/i386-protos.h (x86_evex_reg_mentioned_p): New
> > > prototype.
> > > * config/i386/i386.cc (x86_evex_reg_mentioned_p): New
> > > function.
> > > * config/i386/i386.md (sse4_1_round2): Set attr gpr32 0
> > > and constraint Bt/BM to all non-evex alternatives, adjust
> > > alternative outputs if evex reg is mentioned.
> > > * config/i386/sse.md (_ptest): Set attr gpr32 0
> > > and constraint Bt/BM to all non-evex alternatives.
> > > (ptesttf2): Likewise.
> > > (_round > > (sse4_1_round): Likewise.
> > > (sse4_2_pcmpestri): Likewise.
> > > (sse4_2_pcmpestrm): Likewise.
> > > (sse4_2_pcmpestr_cconly): Likewise.
> > > (sse4_2_pcmpistr): Likewise.
> > > (sse4_2_pcmpistri): Likewise.
> > > (sse4_2_pcmpistrm): Likewise.
> > > (sse4_2_pcmpistr_cconly): Likewise.
> > > (aesimc): Likewise.
> > > (aeskeygenassist): Likewise.
> > >
> > > gcc/testsuite/ChangeLog:
> > >
> > > * gcc.target/i386/apx-legacy-insn-check-norex2.c: Add intrinsic
> > > tests.
> > > ---
> > >  gcc/config/i386/i386-protos.h |  1 +
> > >  gcc/config/i386/i386.cc   | 13 +++
> > >  gcc/config/i386/i386.md   |  3 +-
> > >  gcc/config/i386/sse.md| 93 +--
> > >  .../i386/apx-legacy-insn-check-norex2.c   | 55 ++-
> > >  5 files changed, 132 insertions(+), 33 deletions(-)
> > >
> > > diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
> > > index 78eb3e0f584..bbb219e3039 100644
> > > --- a/gcc/config/i386/i386-protos.h
> > > +++ b/gcc/config/i386/i386-protos.h
> > > @@ -65,6 +65,7 @@ extern bool extended_reg_mentioned_p (rtx);
> > >  extern bool x86_extended_QIreg_mentioned_p (rtx_insn *);
> > >  extern bool x86_extended_reg_mentioned_p (rtx);
> > >  extern bool x86_extended_rex2reg_mentioned_p (rtx);
> > > +extern bool x86_evex_reg_mentioned_p (rtx [], int);
> > >  extern bool x86_maybe_negate_const_int (rtx *, machine_mode);
> > >  extern machine_mode ix86_cc_mode (enum rtx_code, rtx, rtx);
> > >
> > > diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
> > > index f5d642948bc..ec93c5bab97 100644
> > > --- a/gcc/config/i386/i386.cc
> > > +++ b/gcc/config/i386/i386.cc
> > > @@ -22936,6 +22936,19 @@ x86_extended_rex2reg_mentioned_p (rtx insn)
> > >return false;
> > >  }
> > >
> > > +/* Return true when rtx operands mentions register that must be encoded 
> > > using
> > > +   evex prefix.  */
> > > +bool
> > > +x86_evex_reg_mentioned_p (rtx operands[], int nops)
> > > +{
> > > +  int i;
> > > +  for (i = 0; i < nops; i++)
> > > +if (EXT_REX_SSE_REG_P (operands[i])
> > > +   || x86_extended_rex2reg_mentioned_p (operands[i]))
> > > +  return true;
> > > +  return false;
> > > +}
> > > +
> > >  /* If profitable, negate (without causing overflow) integer constant
> > > of mode MODE at location LOC.  Return true in this case.  */
> > >  bool
> > > diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
> > > index 83ad01b43c1..4c305e72389 100644
> > > --- a/gcc/config/i386/i386.md
> > > +++ b/gcc/config/i386/i386.md
> > > @@ -21603,7 +21603,7 @@ (define_expand "significand2"
> > >  (define_insn "sse4_1_round2"
> > >[(set (match_operand:MODEFH 0 "register_operand" "=x,x,x,v,v")
> > > (unspec:MODEFH
> > > - [(match_operand:MODEFH 1 "nonimmediate_operand" "0,x,m,v,m")
> > > + [(match_operand:MODEFH 1 "nonimmediate_operand" "0,x,Bt,v,m")
> > >(match_operand:SI 2 "const_0_to_15_operand")]
> > >   UNSPEC_ROUND))]
> > >"TARGET_SSE4_1"
> > > 

Re: [PATCH 06/13] [APX EGPR] Map reg/mem constraints in inline asm to non-EGPR constraint.

2023-09-01 Thread Hongtao Liu via Gcc-patches
On Fri, Sep 1, 2023 at 5:38 PM Uros Bizjak via Gcc-patches
 wrote:
>
> On Fri, Sep 1, 2023 at 11:10 AM Hongyu Wang  wrote:
> >
> > Uros Bizjak via Gcc-patches  于2023年8月31日周四 18:01写道:
> > >
> > > On Thu, Aug 31, 2023 at 11:18 AM Jakub Jelinek via Gcc-patches
> > >  wrote:
> > > >
> > > > On Thu, Aug 31, 2023 at 04:20:17PM +0800, Hongyu Wang via Gcc-patches 
> > > > wrote:
> > > > > From: Kong Lingling 
> > > > >
> > > > > In inline asm, we do not know if the insn can use EGPR, so disable 
> > > > > EGPR
> > > > > usage by default from mapping the common reg/mem constraint to 
> > > > > non-EGPR
> > > > > constraints. Use a flag mapx-inline-asm-use-gpr32 to enable EGPR usage
> > > > > for inline asm.
> > > > >
> > > > > gcc/ChangeLog:
> > > > >
> > > > >   * config/i386/i386.cc (INCLUDE_STRING): Add include for
> > > > >   ix86_md_asm_adjust.
> > > > >   (ix86_md_asm_adjust): When APX EGPR enabled without specifying 
> > > > > the
> > > > >   target option, map reg/mem constraints to non-EGPR constraints.
> > > > >   * config/i386/i386.opt: Add option mapx-inline-asm-use-gpr32.
> > > > >
> > > > > gcc/testsuite/ChangeLog:
> > > > >
> > > > >   * gcc.target/i386/apx-inline-gpr-norex2.c: New test.
> > > > > ---
> > > > >  gcc/config/i386/i386.cc   |  44 +++
> > > > >  gcc/config/i386/i386.opt  |   5 +
> > > > >  .../gcc.target/i386/apx-inline-gpr-norex2.c   | 107 
> > > > > ++
> > > > >  3 files changed, 156 insertions(+)
> > > > >  create mode 100644 
> > > > > gcc/testsuite/gcc.target/i386/apx-inline-gpr-norex2.c
> > > > >
> > > > > diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
> > > > > index d26d9ab0d9d..9460ebbfda4 100644
> > > > > --- a/gcc/config/i386/i386.cc
> > > > > +++ b/gcc/config/i386/i386.cc
> > > > > @@ -17,6 +17,7 @@ You should have received a copy of the GNU General 
> > > > > Public License
> > > > >  along with GCC; see the file COPYING3.  If not see
> > > > >  .  */
> > > > >
> > > > > +#define INCLUDE_STRING
> > > > >  #define IN_TARGET_CODE 1
> > > > >
> > > > >  #include "config.h"
> > > > > @@ -23077,6 +23078,49 @@ ix86_md_asm_adjust (vec , 
> > > > > vec & /*inputs*/,
> > > > >bool saw_asm_flag = false;
> > > > >
> > > > >start_sequence ();
> > > > > +  /* TODO: Here we just mapped the general r/m constraints to 
> > > > > non-EGPR
> > > > > +   constraints, will eventually map all the usable constraints in 
> > > > > the future. */
> > > >
> > > > I think there should be some constraint which explicitly has all the 32
> > > > GPRs, like there is one for just all 16 GPRs (h), so that regardless of
> > > > -mapx-inline-asm-use-gpr32 one can be explicit what the inline asm 
> > > > wants.
> > > >
> > > > Also, what about the "g" constraint?  Shouldn't there be another for "g"
> > > > without r16..r31?  What about the various other memory
> > > > constraints ("<", "o", ...)?
> > >
> > > I think we should leave all existing constraints as they are, so "r"
> > > covers only GPR16, "m" and "o" to only use GPR16. We can then
> > > introduce "h" to instructions that have the ability to handle EGPR.
> > > This would be somehow similar to the SSE -> AVX512F transition, where
> > > we still have "x" for SSE16 and "v" was introduced as a separate
> > > register class for EVEX SSE registers. This way, asm will be
> > > compatible, when "r", "m", "o" and "g" are used. The new memory
> > > constraint "Bt", should allow new registers, and should be added to
> > > the constraint string as a separate constraint, and conditionally
> > > enabled by relevant "isa" (AKA "enabled") attribute.
> >
> > The extended constraint can work for registers, but for memory it is more
> > complicated.
>
> Yes, unfortunately. The compiler assumes that an unchangeable register
> class is used for BASE/INDEX registers. I have hit this limitation
> when trying to implement memory support for instructions involving
> 8-bit high registers (%ah, %bh, %ch, %dh), which do not support REX
> registers, also inside memory operand. (You can see the "hack" in e.g.
> *extzvqi_mem_rex64" and corresponding peephole2 with the original
> *extzvqi pattern). I am aware that dynamic insn-dependent BASE/INDEX
> register class is the major limitation in the compiler, so perhaps the
> strategy on how to override this limitation should be discussed with
> the register allocator author first. Perhaps adding an insn attribute
> to insn RTX pattern to specify different BASE/INDEX register sets can
> be a better solution than passing insn RTX to the register allocator.
>
> The above idea still does not solve the asm problem on how to select
> correct BASE/INDEX register set for memory operands.
The current approach disables gpr32 for memory operand in asm_operand
by default. but can be turned on by options
ix86_apx_inline_asm_use_gpr32(users need to guarantee the instruction
supports gpr32).
Only ~ 5% of total 

Re: [PATCH 4/4] RISC-V: Add conditional autovec convert(INT<->FP) patterns

2023-09-01 Thread Robin Dapp via Gcc-patches
This one is OK as well, thanks.

Regards
 Robin


[PATCH] riscv: xtheadcondmov: Don't run tests with -Oz

2023-09-01 Thread Christoph Muellner
From: Christoph Müllner 

Recently, these xtheadcondmov tests regressed with -Oz:
* FAIL: gcc.target/riscv/xtheadcondmov-mveqz-imm-eqz.c
* FAIL: gcc.target/riscv/xtheadcondmov-mveqz-imm-not.c
* FAIL: gcc.target/riscv/xtheadcondmov-mvnez-imm-cond.c
* FAIL: gcc.target/riscv/xtheadcondmov-mvnez-imm-nez.c

As -Oz stands for "Optimize aggressively for size rather than speed.",
we need to inspect the generated code, which looks like this:

  -Oz
   :
 0:   e199bneza1,6 <.L2>
 2:   40100513li  a0,1025
  0006 <.L2>:
 6:   8082ret

  -O2:
   :
 0:   40100793li  a5,1025
 4:   40b7950bth.mveqza0,a5,a1
 8:   8082ret

As the generated code with -Oz consumes less size, there is nothing
wrong in the code generation. Instead, let's not run the xtheadcondmov
tests with -Oz.

Signed-off-by: Christoph Müllner 

gcc/testsuite/ChangeLog:

* gcc.target/riscv/xtheadcondmov-mveqz-imm-eqz.c: Disable for -Oz.
* gcc.target/riscv/xtheadcondmov-mveqz-imm-not.c: Likewise.
* gcc.target/riscv/xtheadcondmov-mveqz-reg-eqz.c: Likewise.
* gcc.target/riscv/xtheadcondmov-mveqz-reg-not.c: Likewise.
* gcc.target/riscv/xtheadcondmov-mvnez-imm-cond.c: Likewise.
* gcc.target/riscv/xtheadcondmov-mvnez-imm-nez.c: Likewise.
* gcc.target/riscv/xtheadcondmov-mvnez-reg-cond.c: Likewise.
* gcc.target/riscv/xtheadcondmov-mvnez-reg-nez.c: Likewise.
---
 gcc/testsuite/gcc.target/riscv/xtheadcondmov-mveqz-imm-eqz.c  | 2 +-
 gcc/testsuite/gcc.target/riscv/xtheadcondmov-mveqz-imm-not.c  | 2 +-
 gcc/testsuite/gcc.target/riscv/xtheadcondmov-mveqz-reg-eqz.c  | 2 +-
 gcc/testsuite/gcc.target/riscv/xtheadcondmov-mveqz-reg-not.c  | 2 +-
 gcc/testsuite/gcc.target/riscv/xtheadcondmov-mvnez-imm-cond.c | 2 +-
 gcc/testsuite/gcc.target/riscv/xtheadcondmov-mvnez-imm-nez.c  | 2 +-
 gcc/testsuite/gcc.target/riscv/xtheadcondmov-mvnez-reg-cond.c | 2 +-
 gcc/testsuite/gcc.target/riscv/xtheadcondmov-mvnez-reg-nez.c  | 2 +-
 8 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/gcc/testsuite/gcc.target/riscv/xtheadcondmov-mveqz-imm-eqz.c 
b/gcc/testsuite/gcc.target/riscv/xtheadcondmov-mveqz-imm-eqz.c
index 913ae43f21b..9cc9ec1d0c7 100644
--- a/gcc/testsuite/gcc.target/riscv/xtheadcondmov-mveqz-imm-eqz.c
+++ b/gcc/testsuite/gcc.target/riscv/xtheadcondmov-mveqz-imm-eqz.c
@@ -1,7 +1,7 @@
 /* { dg-do compile } */
 /* { dg-options "-march=rv32gc_xtheadcondmov" { target { rv32 } } } */
 /* { dg-options "-march=rv64gc_xtheadcondmov" { target { rv64 } } } */
-/* { dg-skip-if "" { *-*-* } { "-O0" "-Os" "-Og" } } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Os" "-Og" "-Oz" } } */
 
 int
 not_int_int (int x, int cond)
diff --git a/gcc/testsuite/gcc.target/riscv/xtheadcondmov-mveqz-imm-not.c 
b/gcc/testsuite/gcc.target/riscv/xtheadcondmov-mveqz-imm-not.c
index 1bc8b838233..491343370b7 100644
--- a/gcc/testsuite/gcc.target/riscv/xtheadcondmov-mveqz-imm-not.c
+++ b/gcc/testsuite/gcc.target/riscv/xtheadcondmov-mveqz-imm-not.c
@@ -1,7 +1,7 @@
 /* { dg-do compile } */
 /* { dg-options "-march=rv32gc_xtheadcondmov" { target { rv32 } } } */
 /* { dg-options "-march=rv64gc_xtheadcondmov" { target { rv64 } } } */
-/* { dg-skip-if "" { *-*-* } { "-O0" "-Os" "-Og" } } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Os" "-Og" "-Oz" } } */
 
 int
 not_int_int (int x, int cond)
diff --git a/gcc/testsuite/gcc.target/riscv/xtheadcondmov-mveqz-reg-eqz.c 
b/gcc/testsuite/gcc.target/riscv/xtheadcondmov-mveqz-reg-eqz.c
index 8ef5869a89b..d7227249e84 100644
--- a/gcc/testsuite/gcc.target/riscv/xtheadcondmov-mveqz-reg-eqz.c
+++ b/gcc/testsuite/gcc.target/riscv/xtheadcondmov-mveqz-reg-eqz.c
@@ -1,7 +1,7 @@
 /* { dg-do compile } */
 /* { dg-options "-march=rv32gc_xtheadcondmov" { target { rv32 } } } */
 /* { dg-options "-march=rv64gc_xtheadcondmov" { target { rv64 } } } */
-/* { dg-skip-if "" { *-*-* } { "-O0" "-Os" "-Og" } } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Os" "-Og" "-Oz" } } */
 
 int
 not_int_int (int x, int cond, int v)
diff --git a/gcc/testsuite/gcc.target/riscv/xtheadcondmov-mveqz-reg-not.c 
b/gcc/testsuite/gcc.target/riscv/xtheadcondmov-mveqz-reg-not.c
index f9568bee27f..6cc98e36e71 100644
--- a/gcc/testsuite/gcc.target/riscv/xtheadcondmov-mveqz-reg-not.c
+++ b/gcc/testsuite/gcc.target/riscv/xtheadcondmov-mveqz-reg-not.c
@@ -1,7 +1,7 @@
 /* { dg-do compile } */
 /* { dg-options "-march=rv32gc_xtheadcondmov" { target { rv32 } } } */
 /* { dg-options "-march=rv64gc_xtheadcondmov" { target { rv64 } } } */
-/* { dg-skip-if "" { *-*-* } { "-O0" "-Os" "-Og" } } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Os" "-Og" "-Oz" } } */
 
 int
 not_int_int (int x, int cond, int v)
diff --git a/gcc/testsuite/gcc.target/riscv/xtheadcondmov-mvnez-imm-cond.c 
b/gcc/testsuite/gcc.target/riscv/xtheadcondmov-mvnez-imm-cond.c
index 8feddbeb79d..7cce2c71d18 100644

  1   2   >