date:20230825

RE: [PATCH V2] RISC-V: Support LEN_FOLD_EXTRACT_LAST auto-vectorization

2023-08-25 Thread Li, Pan2 via Gcc-patches

Committed, thanks Robin.

Pan

-Original Message-
From: Gcc-patches  On Behalf 
Of Robin Dapp via Gcc-patches
Sent: Thursday, August 24, 2023 6:23 PM
To: Juzhe-Zhong ; gcc-patches@gcc.gnu.org
Cc: rdapp@gmail.com; kito.ch...@gmail.com; kito.ch...@sifive.com; 
jeffreya...@gmail.com
Subject: Re: [PATCH V2] RISC-V: Support LEN_FOLD_EXTRACT_LAST auto-vectorization

LGTM.

Regards
 Robin

[PATCH] PHIOPT: Add dump for match and simplify and early phiopt

2023-08-25 Thread Andrew Pinski via Gcc-patches

This adds dump on the full result of the match-and-simplify
for phiopt and specifically to know if we are rejecting something
due to being in early phi-opt.

OK? Bootstrapped and tested on x86_64-linux-gnu with no regressions.

gcc/ChangeLog:

* tree-ssa-phiopt.cc (gimple_simplify_phiopt): Add dump information
when resimplify returns true.
(match_simplify_replacement): Print only if accepted the 
match-and-simplify
result rather than the full sequence.
---
 gcc/tree-ssa-phiopt.cc | 70 ++
 1 file changed, 44 insertions(+), 26 deletions(-)

diff --git a/gcc/tree-ssa-phiopt.cc b/gcc/tree-ssa-phiopt.cc
index 7e63fb115db..9993bbe5b76 100644
--- a/gcc/tree-ssa-phiopt.cc
+++ b/gcc/tree-ssa-phiopt.cc
@@ -499,7 +499,6 @@ gimple_simplify_phiopt (bool early_p, tree type, gimple 
*comp_stmt,
tree arg0, tree arg1,
gimple_seq *seq)
 {
-  tree result;
   gimple_seq seq1 = NULL;
   enum tree_code comp_code = gimple_cond_code (comp_stmt);
   location_t loc = gimple_location (comp_stmt);
@@ -529,18 +528,29 @@ gimple_simplify_phiopt (bool early_p, tree type, gimple 
*comp_stmt,
 
   if (op.resimplify (, follow_all_ssa_edges))
 {
-  /* Early we want only to allow some generated tree codes. */
-  if (!early_p
- || phiopt_early_allow (seq1, op))
+  bool allowed = !early_p || phiopt_early_allow (seq1, op);
+  tree result = maybe_push_res_to_seq (, );
+  if (dump_file && (dump_flags & TDF_FOLDING))
{
- result = maybe_push_res_to_seq (, );
+ fprintf (dump_file, "\nphiopt match-simplify back:\n");
+ if (seq1)
+   print_gimple_seq (dump_file, seq1, 0, TDF_VOPS|TDF_MEMSYMS);
+ fprintf (dump_file, "result: ");
  if (result)
-   {
- if (loc != UNKNOWN_LOCATION)
-   annotate_all_with_location (seq1, loc);
- gimple_seq_add_seq_without_update (seq, seq1);
- return result;
-   }
+   print_generic_expr (dump_file, result);
+ else
+   fprintf (dump_file, " (none)");
+ fprintf (dump_file, "\n");
+ if (!allowed)
+   fprintf (dump_file, "rejected because early\n");
+   }
+  /* Early we want only to allow some generated tree codes. */
+  if (allowed && result)
+   {
+ if (loc != UNKNOWN_LOCATION)
+   annotate_all_with_location (seq1, loc);
+ gimple_seq_add_seq_without_update (seq, seq1);
+ return result;
}
 }
   gimple_seq_discard (seq1);
@@ -572,18 +582,29 @@ gimple_simplify_phiopt (bool early_p, tree type, gimple 
*comp_stmt,
 
   if (op1.resimplify (, follow_all_ssa_edges))
 {
-  /* Early we want only to allow some generated tree codes. */
-  if (!early_p
- || phiopt_early_allow (seq1, op1))
+  bool allowed = !early_p || phiopt_early_allow (seq1, op1);
+  tree result = maybe_push_res_to_seq (, );
+  if (dump_file && (dump_flags & TDF_FOLDING))
{
- result = maybe_push_res_to_seq (, );
+ fprintf (dump_file, "\nphiopt match-simplify back:\n");
+ if (seq1)
+   print_gimple_seq (dump_file, seq1, 0, TDF_VOPS|TDF_MEMSYMS);
+ fprintf (dump_file, "result: ");
  if (result)
-   {
- if (loc != UNKNOWN_LOCATION)
-   annotate_all_with_location (seq1, loc);
- gimple_seq_add_seq_without_update (seq, seq1);
- return result;
-   }
+   print_generic_expr (dump_file, result);
+ else
+   fprintf (dump_file, " (none)");
+ fprintf (dump_file, "\n");
+ if (!allowed)
+   fprintf (dump_file, "rejected because early\n");
+   }
+  /* Early we want only to allow some generated tree codes. */
+  if (allowed && result)
+   {
+ if (loc != UNKNOWN_LOCATION)
+   annotate_all_with_location (seq1, loc);
+ gimple_seq_add_seq_without_update (seq, seq1);
+ return result;
}
 }
   gimple_seq_discard (seq1);
@@ -855,6 +876,8 @@ match_simplify_replacement (basic_block cond_bb, 
basic_block middle_bb,
 
   if (!result)
 return false;
+  if (dump_file && (dump_flags & TDF_FOLDING))
+fprintf (dump_file, "accepted the phiopt match-simplify.\n");
 
   auto_bitmap exprs_maybe_dce;
 
@@ -881,11 +904,6 @@ match_simplify_replacement (basic_block cond_bb, 
basic_block middle_bb,
  if (name && TREE_CODE (name) == SSA_NAME)
bitmap_set_bit (exprs_maybe_dce, SSA_NAME_VERSION (name));
}
-  if (dump_file && (dump_flags & TDF_FOLDING))
-   {
- fprintf (dump_file, "Folded into the sequence:\n");
- print_gimple_seq (dump_file, seq, 0, TDF_VOPS|TDF_MEMSYMS);
-   }
 gsi_insert_seq_before (, seq, GSI_CONTINUE_LINKING);
   }
 
-- 
2.31.1

[PATCH] Fix phi-opt-34.c testcase

2023-08-25 Thread Andrew Pinski via Gcc-patches

Somehow when I was testing the new testcase, it was working but
when I re-ran the full testsuite it was not. Anyways the issue
was just a simple space before the `}` for dg-options directive.

Committed as obvious.

gcc/testsuite/ChangeLog:

* gcc.dg/tree-ssa/phi-opt-34.c: Fix dg-options directive.
---
 gcc/testsuite/gcc.dg/tree-ssa/phi-opt-34.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-34.c 
b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-34.c
index 157c3ea9a0b..61054231b4c 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-34.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-34.c
@@ -1,7 +1,7 @@
 /* { dg-do compile } */
 /* Disable early phiopt1 as  early ccp1 does not export non-zero bits
so at the point of phiopt1, we don't know that a is [0,1] range */
-/* { dg-options "-O1 -fdisable-tree-phiopt1 -fdump-tree-phiopt2-folding"} */
+/* { dg-options "-O1 -fdisable-tree-phiopt1 -fdump-tree-phiopt2-folding" } */
 
 unsigned f(unsigned a)
 {
-- 
2.31.1

Re: [PATCH] c++: tweaks for explicit conversion fns diagnostic

2023-08-25 Thread Jason Merrill via Gcc-patches


On 8/25/23 19:37, Marek Polacek wrote:

Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?

-- >8 --

1) When saying that a conversion is erroneous because it would use
an explicit constructor, it might be nice to show where exactly
the explicit constructor is located.  For example, with this patch:

[...]
explicit.C:4:12: note: 'S::S(int)' declared here
 4 |   explicit S(int) { }
   |^

2) When a conversion doesn't work out merely because the conversion
function necessary to do the conversion couldn't be used because
it was marked explicit, it would be useful to the user to say so,
rather than just saying "cannot convert".  For example, with this patch:

explicit.C:13:12: error: cannot convert 'S' to 'bool' in initialization
13 |   bool b = S{1};
   |^~~~
   ||
   |S
explicit.C:5:12: note: explicit conversion function was not considered
 5 |   explicit operator bool() const { return true; }
   |^~~~

gcc/cp/ChangeLog:

* call.cc (convert_like_internal): Show where the conversion function
was declared.
(maybe_show_nonconverting_candidate): New.
* cp-tree.h (maybe_show_nonconverting_candidate): Declare.
* typeck.cc (convert_for_assignment): Call it.

gcc/testsuite/ChangeLog:

* g++.dg/diagnostic/explicit.C: New test.
---
  gcc/cp/call.cc | 41 +++---
  gcc/cp/cp-tree.h   |  1 +
  gcc/cp/typeck.cc   |  5 +++
  gcc/testsuite/g++.dg/diagnostic/explicit.C | 16 +
  4 files changed, 59 insertions(+), 4 deletions(-)
  create mode 100644 gcc/testsuite/g++.dg/diagnostic/explicit.C

diff --git a/gcc/cp/call.cc b/gcc/cp/call.cc
index 23e458d3252..09ebcf6a115 100644
--- a/gcc/cp/call.cc
+++ b/gcc/cp/call.cc
@@ -8459,12 +8459,21 @@ convert_like_internal (conversion *convs, tree expr, 
tree fn, int argnum,
if (pedwarn (loc, 0, "converting to %qT from initializer list "
 "would use explicit constructor %qD",
 totype, convfn))
- inform (loc, "in C++11 and above a default constructor "
- "can be explicit");
+ {
+   inform (loc, "in C++11 and above a default constructor "
+   "can be explicit");
+   inform (DECL_SOURCE_LOCATION (convfn), "%qD declared here",
+   convfn);


I'd swap these two informs.


+++ b/gcc/testsuite/g++.dg/diagnostic/explicit.C
@@ -0,0 +1,16 @@
+// { dg-do compile { target c++11 } }
+
+struct S {
+  explicit S(int) { }
+  explicit operator bool() const { return true; } // { dg-message "explicit 
conversion function was not considered" }
+  explicit operator int() const { return 42; } // { dg-message "explicit conversion 
function was not considered" }
+};
+
+void
+g ()
+{
+  S s = {1}; // { dg-error "would use explicit constructor" }
+  bool b = S{1}; // { dg-error "cannot convert .S. to .bool. in 
initialization" }
+  int i;
+  i = S{2}; // { dg-error "cannot convert .S. to .int. in assignment" }
+}


Let's also test other copy-initialization contexts: parameter passing, 
return, throw, aggregate member initialization.


Jason

Re: [Committed] RISC-V: Add Types to Un-Typed Sync Instructions:

2023-08-25 Thread Edwin Lu


On 8/25/2023 11:53 AM, Jeff Law via Gcc-patches wrote:



On 8/24/23 15:19, Edwin Lu wrote:

Related Discussion:
https://inbox.sourceware.org/gcc-patches/12fb5088-3f28-0a69-de1e-f387371a5...@gmail.com/

This patch updates the sync instructions to ensure that no insn is left
without a type attribute. Updates a total of 6 insns to have type 
"atomic"


Tested for regressions using rv32/64 multilib with newlib/linux.

gcc/Changelog:

* config/riscv/sync-rvwmo.md: updated types to "multi" or
 "atomic" based on number of assembly lines generated
* config/riscv/sync-ztso.md: likewise
* config/riscv/sync.md: likewise

OK.

You should have your write access set up already.  So go ahead and 
follow the directions in the email you received to get yourself into the 
MAINTAINERS file.  Then you can go ahead and push this change.


THanks,
Jeff


Committed!

Edwin

[PATCH] MAINTAINERS: Add myself to write after approval

2023-08-25 Thread Edwin Lu

* MAINTAINERS: Add myself

Signed-off-by: Edwin Lu 
---
 MAINTAINERS | 1 +
 1 file changed, 1 insertion(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 4e706df6522..da2817a547c 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -541,6 +541,7 @@ Gabor Loki  

 Manuel López-Ibáñez
 Carl Love  
 Martin v. Löwis

+Edwin Lu   
 H.J. Lu
 Xiong Hu Luo   
 Bin Bin Lv 
-- 
2.34.1

[PATCH] c++: tweaks for explicit conversion fns diagnostic

2023-08-25 Thread Marek Polacek via Gcc-patches

Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?

-- >8 --

1) When saying that a conversion is erroneous because it would use
an explicit constructor, it might be nice to show where exactly
the explicit constructor is located.  For example, with this patch:

[...]
explicit.C:4:12: note: 'S::S(int)' declared here
4 |   explicit S(int) { }
  |^

2) When a conversion doesn't work out merely because the conversion
function necessary to do the conversion couldn't be used because
it was marked explicit, it would be useful to the user to say so,
rather than just saying "cannot convert".  For example, with this patch:

explicit.C:13:12: error: cannot convert 'S' to 'bool' in initialization
   13 |   bool b = S{1};
  |^~~~
  ||
  |S
explicit.C:5:12: note: explicit conversion function was not considered
5 |   explicit operator bool() const { return true; }
  |^~~~

gcc/cp/ChangeLog:

* call.cc (convert_like_internal): Show where the conversion function
was declared.
(maybe_show_nonconverting_candidate): New.
* cp-tree.h (maybe_show_nonconverting_candidate): Declare.
* typeck.cc (convert_for_assignment): Call it.

gcc/testsuite/ChangeLog:

* g++.dg/diagnostic/explicit.C: New test.
---
 gcc/cp/call.cc | 41 +++---
 gcc/cp/cp-tree.h   |  1 +
 gcc/cp/typeck.cc   |  5 +++
 gcc/testsuite/g++.dg/diagnostic/explicit.C | 16 +
 4 files changed, 59 insertions(+), 4 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/diagnostic/explicit.C

diff --git a/gcc/cp/call.cc b/gcc/cp/call.cc
index 23e458d3252..09ebcf6a115 100644
--- a/gcc/cp/call.cc
+++ b/gcc/cp/call.cc
@@ -8459,12 +8459,21 @@ convert_like_internal (conversion *convs, tree expr, 
tree fn, int argnum,
if (pedwarn (loc, 0, "converting to %qT from initializer list "
 "would use explicit constructor %qD",
 totype, convfn))
- inform (loc, "in C++11 and above a default constructor "
- "can be explicit");
+ {
+   inform (loc, "in C++11 and above a default constructor "
+   "can be explicit");
+   inform (DECL_SOURCE_LOCATION (convfn), "%qD declared here",
+   convfn);
+ }
  }
else
- error ("converting to %qT from initializer list would use "
-"explicit constructor %qD", totype, convfn);
+ {
+   auto_diagnostic_group d;
+   error ("converting to %qT from initializer list would use "
+  "explicit constructor %qD", totype, convfn);
+   inform (DECL_SOURCE_LOCATION (convfn), "%qD declared here",
+   convfn);
+ }
  }
 
/* If we're initializing from {}, it's value-initialization.  */
@@ -14323,4 +14332,28 @@ is_list_ctor (tree decl)
   return true;
 }
 
+/* We know that can_convert_arg_bad already said "no" when trying to convert
+   FROM to TO with ARG and FLAGS.  Try to figure out if it was because
+   an explicit conversion function was skipped when looking for a way to
+   perform the conversion.  At this point we've already printed an error.  */
+
+void
+maybe_show_nonconverting_candidate (tree to, tree from, tree arg, int flags)
+{
+  if (!(flags & LOOKUP_ONLYCONVERTING))
+return;
+
+  conversion_obstack_sentinel cos;
+  conversion *c = implicit_conversion (to, from, arg, /*c_cast_p=*/false,
+  flags & ~LOOKUP_ONLYCONVERTING, tf_none);
+  if (c && !c->bad_p && c->user_conv_p)
+/* Ay, the conversion would have worked in copy-init context.  */
+for (; c; c = next_conversion (c))
+  if (c->kind == ck_user
+ && DECL_P (c->cand->fn)
+ && DECL_NONCONVERTING_P (c->cand->fn))
+   inform (DECL_SOURCE_LOCATION (c->cand->fn), "explicit conversion "
+   "function was not considered");
+}
+
 #include "gt-cp-call.h"
diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h
index 608d6310e53..6b225ca182f 100644
--- a/gcc/cp/cp-tree.h
+++ b/gcc/cp/cp-tree.h
@@ -6727,6 +6727,7 @@ extern bool cp_handle_deprecated_or_unavailable (tree, 
tsubst_flags_t = tf_warni
 extern void cp_warn_deprecated_use_scopes  (tree);
 extern tree get_function_version_dispatcher(tree);
 extern bool any_template_arguments_need_structural_equality_p (tree);
+extern void maybe_show_nonconverting_candidate (tree, tree, tree, int);
 
 /* in class.cc */
 extern tree build_vfield_ref   (tree, tree);
diff --git a/gcc/cp/typeck.cc b/gcc/cp/typeck.cc
index d5c0c85ed51..bf9963db3ba 100644
--- a/gcc/cp/typeck.cc
+++ b/gcc/cp/typeck.cc
@@ -10262,6 +10262,8 @@ convert_for_assignment (tree type,

Re: [PATCH 3/3] PHIOPT: Allow BIT_AND and BIT_IOR in early phiopt

2023-08-25 Thread Andrew Pinski via Gcc-patches

On Thu, Aug 24, 2023 at 11:47 PM Richard Biener via Gcc-patches
 wrote:
>
> On Thu, Aug 24, 2023 at 9:16 PM Andrew Pinski via Gcc-patches
>  wrote:
> >
> > Now that MIN/MAX can sometimes be transformed into BIT_AND/BIT_IOR,
> > we should allow BIT_AND and BIT_IOR in the early phiopt.
> > Also we produce BIT_AND/BIT_IOR for things like `bool0 ? bool1 : 0`
> > which seems like a good thing to allow early on too.
>
> Hum.
>
> I think if we allow AND/IOR we should also allow XOR and NOT.

Yes, XOR and NOT most likely should be added too. Maybe even
comparisons without a conversion too.

>
> Can you add dumping for replacements we disallow?  I'm esp. curious
> for those otherwise being "singleton".  I know when doing early phiopt
> I wanted to be very conservative (also to reduce testsuite fallout), and
> I was mostly interested in MIN/MAX which I then extended to similar
> things like ABS.  But maybe we can revisit this if we understand which
> cases we definitely do not want to do early?

I have a patch which prints out the dumping of the result and will
submit it later today. In the next couple of days I will look into the
dump when compiling GCC to see if there are others that seem fine. It
might be the case where we want to reject only non single statement
ones (except for MIN/MAX were allowing 2 there too).

Thanks,
Andrew

>
> > OK? Bootstrapped and tested on x86_64-linux-gnu with no regressions.
> >
> > gcc/ChangeLog:
> >
> > * tree-ssa-phiopt.cc (phiopt_early_allow): Allow
> > BIT_AND_EXPR and BIT_IOR_EXPR.
> > ---
> >  gcc/tree-ssa-phiopt.cc | 3 +++
> >  1 file changed, 3 insertions(+)
> >
> > diff --git a/gcc/tree-ssa-phiopt.cc b/gcc/tree-ssa-phiopt.cc
> > index 54706f4c7e7..7e63fb115db 100644
> > --- a/gcc/tree-ssa-phiopt.cc
> > +++ b/gcc/tree-ssa-phiopt.cc
> > @@ -469,6 +469,9 @@ phiopt_early_allow (gimple_seq , gimple_match_op 
> > )
> >  {
> >case MIN_EXPR:
> >case MAX_EXPR:
> > +  /* MIN/MAX could be convert into these. */
> > +  case BIT_IOR_EXPR:
> > +  case BIT_AND_EXPR:
> >case ABS_EXPR:
> >case ABSU_EXPR:
> >case NEGATE_EXPR:
> > --
> > 2.31.1
> >

[committed] RISC-V: Make stack_save_restore tests more robust

2023-08-25 Thread Jeff Law

Spurred by Jivan's patch and a desire for cleaner testresults, I went 
ahead and make the stack_save_restore tests independent of the precise 
stack size by using a regexp.


Pushed to the trunk.

Jeffcommit e1f096a3cc96c71907cfbc7b8baf67a3d863cb6d
Author: Jeff Law 
Date:   Fri Aug 25 16:34:17 2023 -0600

RISC-V: Make stack_save_restore tests more robust

Spurred by Jivan's patch and a desire for cleaner testresults, I went ahead 
and
make the stack_save_restore tests independent of the precise stack size by
using a regexp.

gcc/testsuite/
* gcc.target/riscv/stack_save_restore_1.c: Robustify.
* gcc.target/riscv/stack_save_restore_2.c: Robustify.

diff --git a/gcc/testsuite/gcc.target/riscv/stack_save_restore_1.c 
b/gcc/testsuite/gcc.target/riscv/stack_save_restore_1.c
index 255ce5f40c9..d8b0668a820 100644
--- a/gcc/testsuite/gcc.target/riscv/stack_save_restore_1.c
+++ b/gcc/testsuite/gcc.target/riscv/stack_save_restore_1.c
@@ -8,15 +8,15 @@ float getf();
 /*
 ** bar:
 ** callt0,__riscv_save_(3|4)
-** addisp,sp,-2032
+** addisp,sp,-[0-9]+
 ** ...
-** li  t0,-12288
+** li  t0,-[0-9]+
 ** add sp,sp,t0
 ** ...
-** li  t0,12288
+** li  t0,[0-9]+
 ** add sp,sp,t0
 ** ...
-** addisp,sp,2032
+** addisp,sp,[0-9]+
 ** tail__riscv_restore_(3|4)
 */
 int bar()
diff --git a/gcc/testsuite/gcc.target/riscv/stack_save_restore_2.c 
b/gcc/testsuite/gcc.target/riscv/stack_save_restore_2.c
index 4ce5e0118a4..4c549cb11ae 100644
--- a/gcc/testsuite/gcc.target/riscv/stack_save_restore_2.c
+++ b/gcc/testsuite/gcc.target/riscv/stack_save_restore_2.c
@@ -8,15 +8,15 @@ float getf();
 /*
 ** bar:
 ** callt0,__riscv_save_(3|4)
-** addisp,sp,-2032
+** addisp,sp,-[0-9]+
 ** ...
-** li  t0,-12288
+** li  t0,-[0-9]+
 ** add sp,sp,t0
 ** ...
-** li  t0,12288
+** li  t0,[0-9]+
 ** add sp,sp,t0
 ** ...
-** addisp,sp,2032
+** addisp,sp,[0-9]+
 ** tail__riscv_restore_(3|4)
 */
 int bar()

[committed] RISC-V: Fix minor testsuite problem with zicond

2023-08-25 Thread Jeff Law

I thought I had already fixed this, but clearly if I did, I didn't 
include it in any upstream commits.


With -Og the optimizers are hindered in various ways and this prevents 
using zicond.  So skip this test with -Og (it was already being skipped 
at -O0).


Pushed to the trunk.

Jeff

commit 3cd2b73079bac374ce1c542b9c9e354e00a8713d
Author: Jeff Law 
Date:   Fri Aug 25 16:23:06 2023 -0600

[committed] RISC-V: Fix minor testsuite problem with zicond

I thought I had already fixed this, but clearly if I did, I didn't include 
it
in any upstream commits.

With -Og the optimizers are hindered in various ways and this prevents using
zicond.  So skip this test with -Og (it was already being skipped at -O0).

gcc/testsuite
* gcc.target/riscv/zicond-primitiveSemantics.c: Disable for -Og.

diff --git a/gcc/testsuite/gcc.target/riscv/zicond-primitiveSemantics.c 
b/gcc/testsuite/gcc.target/riscv/zicond-primitiveSemantics.c
index 76c5019a992..47d4e4c5683 100644
--- a/gcc/testsuite/gcc.target/riscv/zicond-primitiveSemantics.c
+++ b/gcc/testsuite/gcc.target/riscv/zicond-primitiveSemantics.c
@@ -1,7 +1,7 @@
 /* { dg-do compile } */
 /* { dg-options "-march=rv64gc_zicond -mabi=lp64d" { target { rv64 } } } */
 /* { dg-options "-march=rv32gc_zicond -mabi=ilp32f" { target { rv32 } } } */
-/* { dg-skip-if "" { *-*-* } {"-O0"} } */
+/* { dg-skip-if "" { *-*-* } {"-O0" "-Og"} } */
 
 long primitiveSemantics_00(long a, long b) { return a == 0 ? 0 : b; }

Re: [PATCH] rs6000: Disable PCREL for unsupported targets [PR111045]

2023-08-25 Thread Peter Bergner via Gcc-patches

On 8/25/23 6:20 AM, Kewen.Lin wrote:
> Assuming the current PCREL_SUPPORTED_BY_OS unchanged, when
> PCREL_SUPPORTED_BY_OS is true, all its required conditions are
> satisfied, it should be safe.  while PCREL_SUPPORTED_BY_OS is
> false, it means the given subtarget doesn't support it, or one
> or more of conditions below don't hold:
> 
>  - TARGET_POWER10 
>  - TARGET_PREFIXED
>  - ELFv2_ABI_CHECK
>  - TARGET_CMODEL == CMODEL_MEDIUM

The pcrel instructions are 64-bit/prefix instructions, so I think
for PCREL_SUPPORTED_BY_OS, we want to keep the TARGET_POWER10 and
the TARGET_PREFIXED checks.  Then we should have the checks for
the OS that can support pcrel, in this case, only ELFv2_ABI_CHECK
for now.  I think we should remove the TARGET_CMODEL check, since
that isn't strictly required, it's a current code requirement for
ELFv2, but could change in the future.  In fact, Mike has talked
about adding pcrel support for ELFv2 and -mcmodel=large, so I think
that should move moved out of PCREL_SUPPORTED_BY_OS and into the
option override checks.

> I guess your proposal seems to drop CMODEL_MEDIUM (even PREFIX?)
> from PCREL_SUPPORTED_BY_OS, to leave it just ABI_ELFv2 & P10? (
> otherwise TARGET_CMODEL should be always CMODEL_MEDIUM for
> ABI_ELFv2 with the original PCREL_SUPPORTED_BY_OS).  And it
> seems to make the subtarget specific checking according to the
> ABI type further?

As I said above, we should also keep TARGET_PREFIX, since that is
required for the pcrel instructions, which are 64-bit/prefix insns.
I think the MCMODEL check should be removed from PCREL_SUPPORTED_BY_OS
and moved to the option override checks, since there's not technical
reason the different MCMODEL options cannot be made to work with
pcrel.

> I was expecting that when new subtargets need to be supported, we
> can move these subtarget specific checkings into macro/function
> SUB*TARGET_OVERRIDE_OPTIONS, IMHO the upside is each subtarget
> can take care of its own checkings separately.  Maybe we can
> just move them now (to rs6000_linux64_override_options).  And in
> the common code, for the cases with zero value PCREL_SUPPORTED_BY_OS,
> (assuming PCREL_SUPPORTED_BY_OS just simple as like ABI_ELFv2), we
> can emit an invalid error for explicit -mpcrel as you proposed
> below.  Thoughts?

Yeah, I think that would probably help simplify the tests, since
they're semi-target specific already.  Of course, we there is no
SUB*TARGET_OVERRIDE_OPTIONS and the other OSes, so those would
have to be added.

> btw, I was also expecting that we don't implicitly set
> OPTION_MASK_PCREL any more for Power10, that is to remove
> OPTION_MASK_PCREL from OTHER_POWER10_MASKS.

I'm on the fence about this one and would like to hear from Segher
and Mike on what they think.  In some respect, pcrel is a Power10
hardware feature, so that would seem to make sense to set the flag,
but yeah, we only have one OS that currently supports it, so maybe
not setting it makes sense?  Like I said, I think I need Segher and
Mike to chime in with their thoughts.

Peter

Re: [PATCH v10] RISC-V: Add support for the Zfa extension

2023-08-25 Thread Jeff Law via Gcc-patches





On 8/13/23 23:50, Jin Ma wrote:

This patch adds the 'Zfa' extension for riscv, which is based on:
https://github.com/riscv/riscv-isa-manual/commits/zfb

The binutils-gdb for 'Zfa' extension:
https://sourceware.org/pipermail/binutils/2023-April/127060.html

What needs special explanation is:
1, According to riscv-spec, "The FCVTMO D.W.D instruction was added principally 
to
   accelerate the processing of JavaScript Numbers.", so it seems that no 
implementation
   is required.

2, The instructions FMINM and FMAXM correspond to C23 library function fminimum 
and fmaximum.
   Therefore, this patch has simply implemented the pattern of fminm3 
and
   fmaxm3 to prepare for later.

gcc/ChangeLog:

* common/config/riscv/riscv-common.cc: Add zfa extension version, which 
depends on
the F extension.
* config/riscv/constraints.md (zfli): Constrain the floating point 
number that the
instructions FLI.H/S/D can load.
* config/riscv/iterators.md (ceil): New.
* config/riscv/riscv-opts.h (MASK_ZFA): New.
(TARGET_ZFA): New.
* config/riscv/riscv-protos.h (riscv_float_const_rtx_index_for_fli): 
New.
* config/riscv/riscv.cc (riscv_float_const_rtx_index_for_fli): New.
(riscv_cannot_force_const_mem): If instruction FLI.H/S/D can be used, 
memory is
not applicable.
(riscv_const_insns): Likewise.
(riscv_legitimize_const_move): Likewise.
(riscv_split_64bit_move_p): If instruction FLI.H/S/D can be used, no 
split is
required.
(riscv_split_doubleword_move): Likewise.
(riscv_output_move): Output the mov instructions in zfa extension.
(riscv_print_operand): Output the floating-point value of the FLI.H/S/D 
immediate
in assembly.
(riscv_secondary_memory_needed): Likewise.
* config/riscv/riscv.md (fminm3): New.
(fmaxm3): New.
(movsidf2_low_rv32): New.
(movsidf2_high_rv32): New.
(movdfsisi3_rv32): New.
(f_quiet4_zfa): New.
* config/riscv/riscv.opt: New.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/zfa-fleq-fltq.c: New test.
* gcc.target/riscv/zfa-fli-zfh.c: New test.
* gcc.target/riscv/zfa-fli.c: New test.
* gcc.target/riscv/zfa-fmovh-fmovp.c: New test.
* gcc.target/riscv/zfa-fround.c: New test.
Thanks.  I added Tsukasa's testcases, except for zfa-fli-5.c which 
depends on sorting out the question around zvfh to your patch.


If you and Tsukasa could sort out the zvfh situation and submit a 
follow-up patch (if needed) it would be greatly appreciated.


I've pushed this to the trunk.

Thanks for your patience,
Jeff

Re: [2/2] RISC-V: Constant FP Optimization with 'Zfa'

2023-08-25 Thread Jeff Law via Gcc-patches





On 8/14/23 06:51, Jin Ma wrote:



This code is great and completely different from the way I implemented it.
I'm not sure which one is better, but my idea is that the fli instruction
corresponds to three tables (HF, SF and DF), all of which represent
specific values. the library in gcc's real.h can very well convert
the corresponding values into the values in the table, so it is only
necessary to perform a simple binary search to look up the tables.
Yea, I was kindof amazed at how Tsukasa implemented that code.  But I 
think the tables are easier to understand, so I'd tend to prefer them.


I'm still evaluating, but in general it looks like your implementation 
is (functionally) a superset of what Tsukasa has done.  I've still got 
some testing to do with Tsukasa's tests to verify, but my inclination is 
to go with your v10 patch right now.


Jeff

[PATCH] c++: Implement C++26 P1854R4 - Making non-encodable string literals ill-formed [PR110341]

2023-08-25 Thread Jakub Jelinek via Gcc-patches

Hi!

This paper voted in as DR makes some multi-character literals ill-formed.
'abcd' stays valid, but e.g. 'á' is newly invalid in UTF-8 exec charset
while valid e.g. in ISO-8859-1, because it is a single character which needs
2 bytes to be encoded.

The following patch does that by checking (only pedantically, especially
because it is a DR) if we'd emit a -Wmultichar warning because character
constant has more than one byte in it whether the number of bytes in the
narrow string matches number of bytes in CPP_STRING32 divided by char32_t
size in bytes.  If it is, it is normal multi-character literal constant
and is diagnosed normally with -Wmultichar, if the number of bytes is
larger, at least one of the c-chars in the sequence was encoded as 2+
bytes.

Now, doing this way has 2 drawbacks, some of the diagnostics which doesn't
result in cpp_interpret_string_1 failures can be printed twice, once
when calling cpp_interpret_string_1 for CPP_CHAR, once for CPP_STRING32.
And, functionally I think it must work 100% correctly if host source
character set is UTF-8 (because all valid UTF-8 chars are encodable in
UTF-32), but might not work for some control codes in UTF-EBCDIC if
that is the source character set (though I don't know if we really actually
support it, e.g. Linux iconv certainly doesn't).
All we actually need is count the number of c-chars in the literal,
alternative would be to write custom character counter which would quietly
interpret/skip over + count escape sequences and decode UTF-8 characters
in between those escape sequences.  But we'd need to have something similar
also for UTF-EBCDIC if it works at all, and from what I've looked, we don't
have anyything like that implemented in libcpp nor anywhere else in GCC.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
Or ok with some tweaks to avoid the second round of diagnostics from
cpp_interpret_string_1/convert_escape?  Or reimplement that second time and
count manually?

2023-08-25  Jakub Jelinek  

PR c++/110341
libcpp/
* charset.cc: Implement C++ 26 P1854R4 - Making non-encodable string
literals ill-formed.
(narrow_str_to_charconst): Change last type from cpp_ttype to
const cpp_token *.  For C++ if pedantic and i > 1 in CPP_CHAR
interpret token also as CPP_STRING32 and if number of characters
in the CPP_STRING32 is larger than number of bytes in CPP_CHAR,
pedwarn on it.
(cpp_interpret_charconst): Adjust narrow_str_to_charconst caller.
gcc/testsuite/
* g++.dg/cpp26/literals1.C: New test.
* g++.dg/cpp26/literals2.C: New test.
* g++.dg/cpp23/wchar-multi1.C (c, d): Expect an error rather than
warning.

--- libcpp/charset.cc.jj2023-08-24 15:36:59.0 +0200
+++ libcpp/charset.cc   2023-08-25 17:14:14.098733396 +0200
@@ -2567,18 +2567,20 @@ cpp_interpret_string_notranslate (cpp_re
 /* Subroutine of cpp_interpret_charconst which performs the conversion
to a number, for narrow strings.  STR is the string structure returned
by cpp_interpret_string.  PCHARS_SEEN and UNSIGNEDP are as for
-   cpp_interpret_charconst.  TYPE is the token type.  */
+   cpp_interpret_charconst.  TOKEN is the token.  */
 static cppchar_t
 narrow_str_to_charconst (cpp_reader *pfile, cpp_string str,
 unsigned int *pchars_seen, int *unsignedp,
-enum cpp_ttype type)
+const cpp_token *token)
 {
+  enum cpp_ttype type = token->type;
   size_t width = CPP_OPTION (pfile, char_precision);
   size_t max_chars = CPP_OPTION (pfile, int_precision) / width;
   size_t mask = width_to_mask (width);
   size_t i;
   cppchar_t result, c;
   bool unsigned_p;
+  bool diagnosed = false;
 
   /* The value of a multi-character character constant, or a
  single-character character constant whose representation in the
@@ -2602,7 +2604,37 @@ narrow_str_to_charconst (cpp_reader *pfi
 
   if (type == CPP_UTF8CHAR)
 max_chars = 1;
-  if (i > max_chars)
+  else if (i > 1 && CPP_OPTION (pfile, cplusplus) && CPP_PEDANTIC (pfile))
+{
+  /* C++ as a DR since
+P1854R4 - Making non-encodable string literals ill-formed
+makes multi-character narrow character literals if any of the
+characters in the literal isn't encodable in char/unsigned char
+ill-formed.  We need to count the number of c-chars and compare
+that to str.len.  */
+  cpp_string str2 = { 0, 0 };
+  if (cpp_interpret_string (pfile, >val.str, 1, ,
+   CPP_STRING32))
+   {
+ size_t width32 = converter_for_type (pfile, CPP_STRING32).width;
+ size_t nbwc = width32 / width;
+ size_t len = str2.len / nbwc;
+ if (str2.text != token->val.str.text)
+   free ((void *)str2.text);
+ if (str.len > len)
+   {
+ diagnosed
+   = cpp_error (pfile, CPP_DL_PEDWARN,
+

Re: [PATCH] libgccjit: Add support for `restrict` attribute on function parameters

2023-08-25 Thread Guillaume Gomez via Gcc-patches

After more investigations, it seems like the fault was on our side as
we were calling `gcc_jit_type_get_restrict` on types that weren't
pointers.
I added a check for that in `gcc_jit_type_get_restrict` directly as well.

We will continue our investigation to be sure we didn't miss anything.

Le mar. 22 août 2023 à 17:26, Antoni Boucher  a écrit :
>
> Since the tests in the PR for rustc_codegen_gcc
> (https://github.com/rust-lang/rustc_codegen_gcc/pull/312) currently
> fails, let's wait a bit before merging the patch, in case it would need
> some fixes.
>
> On Thu, 2023-08-17 at 20:09 +0200, Guillaume Gomez via Jit wrote:
> > Quick question: do you plan to make the merge or should I ask Antoni?
> >
> > Le jeu. 17 août 2023 à 17:59, Guillaume Gomez
> > 
> > a écrit :
> >
> > > Thanks for the review!
> > >
> > > Le jeu. 17 août 2023 à 17:50, David Malcolm  a
> > > écrit
> > > :
> > > >
> > > > On Thu, 2023-08-17 at 17:41 +0200, Guillaume Gomez wrote:
> > > > > And now I just discovered that a lot of commits from Antoni's
> > > > > fork
> > > > > haven't been sent upstream which is why the ABI count is so
> > > > > high in
> > > > > his repository. Fixed that as well.
> > > >
> > > > Thanks for the updated patch; I was about to comment on that.
> > > >
> > > > This version is good for gcc trunk.
> > > >
> > > > Dave
> > > >
> > > > >
> > > > > Le jeu. 17 août 2023 à 17:26, Guillaume Gomez
> > > > >  a écrit :
> > > > > >
> > > > > > Antoni spot a typo I made:
> > > > > >
> > > > > > I added `LIBGCCJIT_HAVE_gcc_jit_type_get_size` instead of
> > > > > > `LIBGCCJIT_HAVE_gcc_jit_type_get_restrict`. Fixed in this
> > > > > > patch,
> > > > > > sorry
> > > > > > for the noise.
> > > > > >
> > > > > > Le jeu. 17 août 2023 à 11:30, Guillaume Gomez
> > > > > >  a écrit :
> > > > > > >
> > > > > > > Hi Dave,
> > > > > > >
> > > > > > > > What kind of testing has the patch had? (e.g. did you run
> > > > > > > > "make
> > > > > > > > check-
> > > > > > > > jit" ?  Has this been in use on real Rust code?)
> > > > > > >
> > > > > > > I tested it as Rust backend directly on this code:
> > > > > > >
> > > > > > > ```
> > > > > > > pub fn foo(a:  i32, b:  i32, c: ) {
> > > > > > > *a += *c;
> > > > > > > *b += *c;
> > > > > > > }
> > > > > > > ```
> > > > > > >
> > > > > > > I ran it with `rustc` (and the GCC backend) with the
> > > > > > > following
> > > > > > > flags:
> > > > > > > `-C link-args=-lc --emit=asm -O --crate-type=lib` which
> > > > > > > gave the
> > > > > > > diff
> > > > > > > you can see in the attached file. Explanations: the diff on
> > > > > > > the
> > > > > > > right
> > > > > > > has the `__restrict__` attribute used whereas on the left
> > > > > > > it is
> > > > > > > the
> > > > > > > current version where we don't handle it.
> > > > > > >
> > > > > > > As for C testing, I used this code:
> > > > > > >
> > > > > > > ```
> > > > > > > void t(int *__restrict__ a, int *__restrict__ b, char
> > > > > > > *__restrict__ c) {
> > > > > > > *a += *c;
> > > > > > > *b += *c;
> > > > > > > }
> > > > > > > ```
> > > > > > >
> > > > > > > (without the `__restrict__` of course when I need to have a
> > > > > > > witness
> > > > > > > ASM). I attached the diff as well, this time the file with
> > > > > > > the
> > > > > > > use of
> > > > > > > `__restrict__` in on the left. I compiled with the
> > > > > > > following
> > > > > > > flags:
> > > > > > > `-S -O3`.
> > > > > > >
> > > > > > > > Please add a feature macro:
> > > > > > > > #define LIBGCCJIT_HAVE_gcc_jit_type_get_restrict
> > > > > > > > (see the similar ones in the header).
> > > > > > >
> > > > > > > I added `LIBGCCJIT_HAVE_gcc_jit_type_get_size` and extended
> > > > > > > the
> > > > > > > documentation as well to mention the ABI change.
> > > > > > >
> > > > > > > > Please add a new ABI tag (LIBGCCJIT_ABI_25 ?), rather
> > > > > > > > than
> > > > > > > > adding this
> > > > > > > > to ABI_0.
> > > > > > >
> > > > > > > I added `LIBGCCJIT_ABI_34` as `LIBGCCJIT_ABI_33` was the
> > > > > > > last
> > > > > > > one.
> > > > > > >
> > > > > > > > This refers to a "cold attribute"; is this a vestige of a
> > > > > > > > copy-
> > > > > > > > and-
> > > > > > > > paste from a different test case?
> > > > > > >
> > > > > > > It is a vestige indeed... Missed this one.
> > > > > > >
> > > > > > > > I see that the test scans the generated assembler.  Does
> > > > > > > > the
> > > > > > > > test
> > > > > > > > actually verify that restrict has an effect, or was that
> > > > > > > > another
> > > > > > > > vestige from a different test case?
> > > > > > >
> > > > > > > No, this time it's what I wanted. Please see the C diff I
> > > > > > > provided
> > > > > > > above to see that the ASM has a small diff that allowed me
> > > > > > > to
> > > > > > > confirm
> > > > > > > that the `__restrict__` attribute was correctly set.
> > > > > > >
> > > > > > > > If this test is meant to run at -O3 and thus can't be
> > > > > > > > part of
> > > > > >

[PATCH] testsuite: Add test for already-fixed issue with _Pragma expansion [PR90400]

2023-08-25 Thread Lewis Hyatt via Gcc-patches

Hello-

This is adding a testcase for a PR that was already incidentally fixed. OK
to commit please? Thanks...

-Lewis

-- >8 --

The PR was fixed by r12-5454. Since the fix was somewhat incidental,
although related, add a testcase from PR90400 too before closing it out.

gcc/testsuite/ChangeLog:

PR preprocessor/90400
* c-c++-common/cpp/pr90400.c: New test.
---
 gcc/testsuite/c-c++-common/cpp/pr90400.c | 14 ++
 1 file changed, 14 insertions(+)
 create mode 100644 gcc/testsuite/c-c++-common/cpp/pr90400.c

diff --git a/gcc/testsuite/c-c++-common/cpp/pr90400.c 
b/gcc/testsuite/c-c++-common/cpp/pr90400.c
new file mode 100644
index 000..4f2cab8d6ab
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/cpp/pr90400.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-save-temps" } */
+/* PR preprocessor/90400 */
+
+#define OUTER(x) x
+#define FOR(x) _Pragma ("GCC unroll 0") for (x)
+void f ()
+{
+/* If the pragma were to be seen prior to the expansion of FOR, as was
+   the case before r12-5454, then the unroll pragma would complain
+   because the immediately following statement would be ";" rather than
+   a loop.  */
+OUTER (; FOR (int i = 0; i != 1; ++i);) /* { dg-bogus {statement expected 
before ';' token} } */
+}

[PATCH] c++: Implement C++ DR 2406 - [[fallthrough]] attribute and iteration statements

2023-08-25 Thread Jakub Jelinek via Gcc-patches

Hi!

The following patch implements
CWG 2406 - [[fallthrough]] attribute and iteration statements
The genericization of some loops leaves nothing at all or just a label
after a body of a loop, so if the loop is later followed by
case or default label in a switch, the fallthrough statement isn't
diagnosed.

The following patch implements it by marking the IFN_FALLTHROUGH call
in such a case, such that during gimplification it can be pedantically
diagnosed even if it is followed by case or default label or some normal
labels followed by case/default labels.

While looking into this, I've discovered other problems.
expand_FALLTHROUGH_r is removing the IFN_FALLTHROUGH calls from the IL,
but wasn't telling that to walk_gimple_stmt/walk_gimple_seq_mod, so
the callers would then skip the next statement after it, and it would
return non-NULL if the removed stmt was last in the sequence.  This could
lead to wi->callback_result being set even if it didn't appear at the very
end of switch sequence.
The patch makes use of wi->removed_stmt such that the callers properly
know what happened, and use different way to handle the end of switch
sequence case.

That change discovered a bug in the gimple-walk handling of
wi->removed_stmt.  If that flag is set, the callback is telling the callers
that the current statement has been removed and so the innermost
walk_gimple_seq_mod shouldn't gsi_next.  The problem is that
wi->removed_stmt is only reset at the start of a walk_gimple_stmt, but that
can be too late for some cases.  If we have two nested gimple sequences,
say GIMPLE_BIND as the last stmt of some gimple seq, we remove the last
statement inside of that GIMPLE_BIND, set wi->removed_stmt there, don't
do gsi_next correctly because already gsi_remove moved us to the next stmt,
there is no next stmt, so we return back to the caller, but wi->removed_stmt
is still set and so we don't do gsi_next even in the outer sequence, despite
the GIMPLE_BIND (etc.) not being removed.  That means we walk the
GIMPLE_BIND with its whole sequence again.
The patch fixes that by resetting wi->removed_stmt after we've used that
flag in walk_gimple_seq_mod.  Nothing really uses that flag after the
outermost walk_gimple_seq_mod, it is just a private notification that
the stmt callback has removed a stmt.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2023-08-25  Jakub Jelinek  

gcc/
* gimplify.cc (expand_FALLTHROUGH_r): Use wi->removed_stmt after
gsi_remove, change the way of passing fallthrough stmt at the end
of sequence to expand_FALLTHROUGH.  Diagnose IFN_FALLTHROUGH
with GF_CALL_NOTHROW flag.
(expand_FALLTHROUGH): Change loc into array of 2 location_t elts,
don't test wi.callback_result, instead check whether first
elt is not UNKNOWN_LOCATION and in that case pedwarn with the
second location.
* gimple-walk.cc (walk_gimple_seq_mod): Clear wi->removed_stmt
after the flag has been used.
gcc/c-family/
* c-gimplify.cc (genericize_c_loop): For C++ mark IFN_FALLTHROUGH
call at the end of loop body as TREE_NOTHROW.
gcc/testsuite/
* g++.dg/DRs/dr2406.C: New test.

--- gcc/gimplify.cc.jj  2023-08-23 11:22:28.115592483 +0200
+++ gcc/gimplify.cc 2023-08-25 13:43:58.711847414 +0200
@@ -2588,17 +2588,33 @@ expand_FALLTHROUGH_r (gimple_stmt_iterat
   *handled_ops_p = false;
   break;
 case GIMPLE_CALL:
+  static_cast(wi->info)[0] = UNKNOWN_LOCATION;
   if (gimple_call_internal_p (stmt, IFN_FALLTHROUGH))
{
+ location_t loc = gimple_location (stmt);
  gsi_remove (gsi_p, true);
+ wi->removed_stmt = true;
+
+ /* nothrow flag is added by genericize_c_loop to mark fallthrough
+statement at the end of some loop's body.  Those should be
+always diagnosed, either because they indeed don't precede
+a case label or default label, or because the next statement
+is not within the same iteration statement.  */
+ if ((stmt->subcode & GF_CALL_NOTHROW) != 0)
+   {
+ pedwarn (loc, 0, "attribute % not preceding "
+  "a case label or default label");
+ break;
+   }
+
  if (gsi_end_p (*gsi_p))
{
- *static_cast(wi->info) = gimple_location (stmt);
- return integer_zero_node;
+ static_cast(wi->info)[0] = BUILTINS_LOCATION;
+ static_cast(wi->info)[1] = loc;
+ break;
}
 
  bool found = false;
- location_t loc = gimple_location (stmt);
 
  gimple_stmt_iterator gsi2 = *gsi_p;
  stmt = gsi_stmt (gsi2);
@@ -2648,6 +2664,7 @@ expand_FALLTHROUGH_r (gimple_stmt_iterat
}
   break;
 default:
+  static_cast(wi->info)[0] = UNKNOWN_LOCATION;
   break;
 }
   return NULL_TREE;
@@ -2659,14 +2676,16 @@ static void
 expand_FALLTHROUGH

Re: [PATCH 1/2] RISC-V: Add support for the 'Zfa' extension

2023-08-25 Thread Jeff Law via Gcc-patches





On 8/13/23 23:32, Tsukasa OI via Gcc-patches wrote:

From: Tsukasa OI 

This commit adds support for the 'Zfa' extension containing additional
floating point instructions, version 0.1 (stable and approved).

gcc/ChangeLog:

* common/config/riscv/riscv-common.cc
(riscv_implied_info): Add implication 'Zfa' -> 'F'.
(riscv_ext_version_table) Add support for the 'Zfa' extension.
(riscv_ext_flag_table) Set MASK_ZFA if 'Zfa' is available.
* config/riscv/riscv-opts.h (MASK_ZFA, TARGET_ZFA): New.
So I think this and Jin Ma's most recently posted Zfa bits are almost 
functionally equivalent.  The only notable difference in this patch is 
Jin's work puts Zfa into its own subextension rather than in the 
existing zf subextension.


I think that was done in the v10 patch from Jin in response to the 
implies/depends comment from Kito.  So I'm inclined to say that's the 
preferred approach.


Given that's the only notable difference between this patch and Jin's 
patch, I'm going to consider 1/2 in this series superseded by Jin's work.


jeff

Re: [PATCH] analyzer: Move gcc.dg/analyzer tests to c-c++-common (1).

2023-08-25 Thread David Malcolm via Gcc-patches

On Fri, 2023-08-25 at 14:48 +0200, Benjamin Priour wrote:
> Hi David,
> 
> Thanks for the review.
> 
> On Fri, Aug 25, 2023 at 2:12 AM David Malcolm 
> wrote:
> 
> > > From: benjamin priour 
> > > 
> > > Hi,
> > > 
> > > Below the first batch of a serie of patches to transition
> > > the analyzer testsuite from gcc.dg/analyzer to c-c++-
> > > common/analyzer.
> > > I do not know how long this serie will be, thus the patch was not
> > > numbered.
> > > 
> > > For the grand majority of the tests, the transition only required
> > > some
> > > adjustement over the syntax and casts to be C++-friendly, or to
> > > adjust
> > > the warnings regexes to fit the C++ FE.
> > > 
> > > The most noteworthy change is in the handling of known_functions,
> > > as described in the below patch.
> > 
> > Hi Benjamin.
> > 
> > Many thanks for putting this together, it looks like it was a lot
> > of
> > work.
> > 
> > > Successfully regstrapped on x86_64-linux-gnu off trunk
> > > 18befd6f050e70f11ecca1dd58624f0ee3c68cc7.
> > 
> > Did you compare the before/after results from DejaGnu somehow?
> > 
> > Note that I've pushed 9 patches to the analyzer since
> > 18befd6f050e70f11ecca1dd58624f0ee3c68cc7 and some of those touch
> > the
> > files below, so it's worth rebasing and double-checking the
> > results.
> > 
> > 

[...snip...]

> 
> > I confess I'm still a little hazy as to the whole builtin_kf logic,
> > but
> > I trust you that this is needed.
> > 
> > Please can you add a paragraph to this comment to explain the
> > motivation here (perhaps giving examples?)
> > 
> > > +
> > > +const builtin_known_function *
> > > +region_model::get_builtin_kf (const gcall *call,
> > > +    region_model_context *ctxt /* = NULL
> > > */)
> > const
> > > +{
> > > +  region_model *mut_this = const_cast  (this);
> > > +  tree callee_fndecl = mut_this->get_fndecl_for_call (call,
> > > ctxt);
> > > +  if (! callee_fndecl)
> > > +    return NULL;
> > > +
> > > +  call_details cd (call, mut_this, ctxt);
> > > +  if (const known_function *kf = get_known_function
> > > (callee_fndecl, cd))
> > > +    return kf->dyn_cast_builtin_kf ();
> > > +
> > > +  return NULL;
> > > +}
> > > +
> > 
> > 
> The new comment is as follow:
> 
> /* Get any builtin_known_function for CALL and emit any warning to
> CTXT
>    if not NULL.
> 
>    The call must match all assumptions made by the known_function
> (such as
>    e.g. "argument 1's type must be a pointer type").
> 
>    Return NULL if no builtin_known_function is found, or it does
>    not match the assumption(s).
> 
>    Internally calls get_known_function to find a known_function and
> cast it
>    to a builtin_known_function.
> 
>    For instance, calloc is a C builtin, defined in gcc/builtins.def
>    by the DEF_LIB_BUILTIN macro. Such builtins are recognized by the
>    analyzer by their name, so that even in C++ or if the user
> redeclares
>    them but mismatch their signature, they are still recognized as
> builtins.
> 
>    Cases when a supposed builtin is not flagged as one by the FE:
> 
>     The C++ FE does not recognize calloc as a builtin if it has not
> been
>     included from a standard header, but the C FE does. Hence in C++
> if
>     CALL comes from a calloc and stdlib is not included,
>     gcc/tree.h:fndecl_built_in_p (CALL) would be false.
> 
>     In C code, a __SIZE_TYPE__ calloc (__SIZE_TYPE__, __SIZE_TYPE__)
> user
>     declaration has obviously a mismatching signature from the
> standard, and
>     its function_decl tree won't be unified by
>     gcc/c-decl.cc:match_builtin_function_types.
> 
>    Yet in both cases the analyzer should treat the calls as a builtin
> calloc
>    so that extra attributes unspecified by the standard but added by
> GCC
>    (e.g. sprintf attributes in gcc/builtins.def), useful for the
> detection
> of
>    dangerous behavior, are indeed processed.
> 
>    Therefore for those cases when a "builtin flag" is not added by
> the FE,
>    builtins' kf are derived from builtin_known_function, whose method
>    builtin_known_function::builtin_decl returns the builtin's
>    function_decl tree as defined in gcc/builtins.def, with all the
> extra
>    attributes.  */
> 
> I hope it clarifies the new kf subclass's purpose.

Thanks!

[...snip...]

> > 
> 
> > > diff --git a/gcc/testsuite/gcc.dg/analyzer/sprintf-1.c
> > b/gcc/testsuite/gcc.dg/analyzer/sprintf-1.c
> > > index f8dc806d619..e94c0561665 100644
> > > --- a/gcc/testsuite/gcc.dg/analyzer/sprintf-1.c
> > > +++ b/gcc/testsuite/gcc.dg/analyzer/sprintf-1.c
> > > @@ -1,53 +1,14 @@
> > >  /* See e.g. https://en.cppreference.com/w/c/io/fprintf
> > >     and
> > > https://www.man7.org/linux/man-pages/man3/sprintf.3.html */
> > > 
> > > +/* { dg-skip-if "C++ fpermissive already throws an error" { c++
> > > } } */
> > 
> > Given that this is in the gcc.dg directory, this directive
> > presumably
> > never skips.
> > 
> > Is the intent here to document that
> > (a) this set of tests is just

Re: [V3][PATCH 0/3] New attribute "counted_by" to annotate bounds for C99 FAM(PR108896)

2023-08-25 Thread Kees Cook via Gcc-patches

On Fri, Aug 25, 2023 at 03:24:22PM +, Qing Zhao wrote:
> This is the 3rd version of the patch, per our discussion based on the
> review comments for the 1st and 2nd version, the major changes in this

This tests out great for me; thanks you! I'm able to build the entire
kernel tree with 201 annotations[1] added. Things work as expected. :)

-Kees

[1] 
https://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git/log/?h=devel/next-20230825/counted_by

-- 
Kees Cook

[COMMITTED V3 5/6] OpenMP: Fortran support for imperfectly-nested loops

2023-08-25 Thread Sandra Loosemore

OpenMP 5.0 removed the restriction that multiple collapsed loops must
be perfectly nested, allowing "intervening code" (including nested
BLOCKs) before or after each nested loop.  In GCC this code is moved
into the inner loop body by the respective front ends.

In the Fortran front end, most of the semantic processing happens during
the translation phase, so the parse phase just collects the intervening
statements, checks them for errors, and splices them around the loop body.

gcc/fortran/ChangeLog
* gfortran.h (struct gfc_namespace): Add omp_structured_block bit.
* openmp.cc: Include omp-api.h.
(resolve_omp_clauses): Consolidate inscan reduction clause conflict
checking here.
(find_nested_loop_in_chain): New.
(find_nested_loop_in_block): New.
(gfc_resolve_omp_do_blocks): Set omp_current_do_collapse properly.
Handle imperfectly-nested loops when looking for nested omp scan.
Refactor to move inscan reduction clause conflict checking to
resolve_omp_clauses.
(gfc_resolve_do_iterator): Handle imperfectly-nested loops.
(struct icode_error_state): New.
(icode_code_error_callback): New.
(icode_expr_error_callback): New.
(diagnose_intervening_code_errors_1): New.
(diagnose_intervening_code_errors): New.
(make_structured_block): New.
(restructure_intervening_code): New.
(is_outer_iteration_variable): Do not assume loops are perfectly
nested.
(check_nested_loop_in_chain): New.
(check_nested_loop_in_block_state): New.
(check_nested_loop_in_block_symbol): New.
(check_nested_loop_in_block): New.
(expr_uses_intervening_var): New.
(is_intervening_var): New.
(expr_is_invariant): Do not assume loops are perfectly nested.
(resolve_omp_do): Handle imperfectly-nested loops.
* trans-stmt.cc (gfc_trans_block_construct): Generate
OMP_STRUCTURED_BLOCK if magic bit is set on block namespace.

gcc/testsuite/ChangeLog
* gfortran.dg/gomp/collapse1.f90: Adjust expected errors.
* gfortran.dg/gomp/collapse2.f90: Likewise.
* gfortran.dg/gomp/imperfect-gotos.f90: New.
* gfortran.dg/gomp/imperfect-invalid-scope.f90: New.
* gfortran.dg/gomp/imperfect1.f90: New.
* gfortran.dg/gomp/imperfect2.f90: New.
* gfortran.dg/gomp/imperfect3.f90: New.
* gfortran.dg/gomp/imperfect4.f90: New.
* gfortran.dg/gomp/imperfect5.f90: New.

libgomp/ChangeLog
* testsuite/libgomp.fortran/imperfect-destructor.f90: New.
* testsuite/libgomp.fortran/imperfect1.f90: New.
* testsuite/libgomp.fortran/imperfect2.f90: New.
* testsuite/libgomp.fortran/imperfect3.f90: New.
* testsuite/libgomp.fortran/imperfect4.f90: New.
* testsuite/libgomp.fortran/target-imperfect1.f90: New.
* testsuite/libgomp.fortran/target-imperfect2.f90: New.
* testsuite/libgomp.fortran/target-imperfect3.f90: New.
* testsuite/libgomp.fortran/target-imperfect4.f90: New.
---
 gcc/fortran/gfortran.h|   3 +
 gcc/fortran/openmp.cc | 763 +++---
 gcc/fortran/trans-stmt.cc |   7 +-
 gcc/testsuite/gfortran.dg/gomp/collapse1.f90  |   6 +-
 gcc/testsuite/gfortran.dg/gomp/collapse2.f90  |  10 +-
 .../gfortran.dg/gomp/imperfect-gotos.f90  |  69 ++
 .../gomp/imperfect-invalid-scope.f90  |  81 ++
 gcc/testsuite/gfortran.dg/gomp/imperfect1.f90 |  39 +
 gcc/testsuite/gfortran.dg/gomp/imperfect2.f90 |  56 ++
 gcc/testsuite/gfortran.dg/gomp/imperfect3.f90 |  45 ++
 gcc/testsuite/gfortran.dg/gomp/imperfect4.f90 |  36 +
 gcc/testsuite/gfortran.dg/gomp/imperfect5.f90 |  85 ++
 .../libgomp.fortran/imperfect-destructor.f90  | 142 
 .../testsuite/libgomp.fortran/imperfect1.f90  |  67 ++
 .../testsuite/libgomp.fortran/imperfect2.f90  | 102 +++
 .../testsuite/libgomp.fortran/imperfect3.f90  | 110 +++
 .../testsuite/libgomp.fortran/imperfect4.f90  | 121 +++
 .../libgomp.fortran/target-imperfect1.f90 |  72 ++
 .../libgomp.fortran/target-imperfect2.f90 | 110 +++
 .../libgomp.fortran/target-imperfect3.f90 | 116 +++
 .../libgomp.fortran/target-imperfect4.f90 | 126 +++
 21 files changed, 2058 insertions(+), 108 deletions(-)
 create mode 100644 gcc/testsuite/gfortran.dg/gomp/imperfect-gotos.f90
 create mode 100644 gcc/testsuite/gfortran.dg/gomp/imperfect-invalid-scope.f90
 create mode 100644 gcc/testsuite/gfortran.dg/gomp/imperfect1.f90
 create mode 100644 gcc/testsuite/gfortran.dg/gomp/imperfect2.f90
 create mode 100644 gcc/testsuite/gfortran.dg/gomp/imperfect3.f90
 create mode 100644 gcc/testsuite/gfortran.dg/gomp/imperfect4.f90
 create mode 100644 gcc/testsuite/gfortran.dg/gomp/imperfect5.f90
 create mode 100644 libgomp/testsuite/libgomp.fortran/imperfect-destructor.f90
 create mode 100644 libgomp/testsuite/libgomp.fortran/imperfect1.f90
 create

[COMMITTED V3 4/6] OpenMP: New C/C++ testcases for imperfectly nested loops.

2023-08-25 Thread Sandra Loosemore

gcc/testsuite/ChangeLog
* c-c++-common/gomp/imperfect-attributes.c: New.
* c-c++-common/gomp/imperfect-badloops.c: New.
* c-c++-common/gomp/imperfect-blocks.c: New.
* c-c++-common/gomp/imperfect-extension.c: New.
* c-c++-common/gomp/imperfect-gotos.c: New.
* c-c++-common/gomp/imperfect-invalid-scope.c: New.
* c-c++-common/gomp/imperfect-labels.c: New.
* c-c++-common/gomp/imperfect-legacy-syntax.c: New.
* c-c++-common/gomp/imperfect-pragmas.c: New.
* c-c++-common/gomp/imperfect1.c: New.
* c-c++-common/gomp/imperfect2.c: New.
* c-c++-common/gomp/imperfect3.c: New.
* c-c++-common/gomp/imperfect4.c: New.
* c-c++-common/gomp/imperfect5.c: New.

libgomp/ChangeLog
* testsuite/libgomp.c-c++-common/imperfect1.c: New.
* testsuite/libgomp.c-c++-common/imperfect2.c: New.
* testsuite/libgomp.c-c++-common/imperfect3.c: New.
* testsuite/libgomp.c-c++-common/imperfect4.c: New.
* testsuite/libgomp.c-c++-common/imperfect5.c: New.
* testsuite/libgomp.c-c++-common/imperfect6.c: New.
* testsuite/libgomp.c-c++-common/target-imperfect1.c: New.
* testsuite/libgomp.c-c++-common/target-imperfect2.c: New.
* testsuite/libgomp.c-c++-common/target-imperfect3.c: New.
* testsuite/libgomp.c-c++-common/target-imperfect4.c: New.
---
 .../c-c++-common/gomp/imperfect-attributes.c  |  81 
 .../c-c++-common/gomp/imperfect-badloops.c|  50 +
 .../c-c++-common/gomp/imperfect-blocks.c  |  75 
 .../c-c++-common/gomp/imperfect-extension.c   |  55 ++
 .../c-c++-common/gomp/imperfect-gotos.c   | 174 ++
 .../gomp/imperfect-invalid-scope.c|  77 
 .../c-c++-common/gomp/imperfect-labels.c  |  85 +
 .../gomp/imperfect-legacy-syntax.c|  44 +
 .../c-c++-common/gomp/imperfect-pragmas.c |  85 +
 gcc/testsuite/c-c++-common/gomp/imperfect1.c  |  38 
 gcc/testsuite/c-c++-common/gomp/imperfect2.c  |  34 
 gcc/testsuite/c-c++-common/gomp/imperfect3.c  |  52 ++
 gcc/testsuite/c-c++-common/gomp/imperfect4.c  |  33 
 gcc/testsuite/c-c++-common/gomp/imperfect5.c  |  95 ++
 .../libgomp.c-c++-common/imperfect1.c |  76 
 .../libgomp.c-c++-common/imperfect2.c | 114 
 .../libgomp.c-c++-common/imperfect3.c | 119 
 .../libgomp.c-c++-common/imperfect4.c | 117 
 .../libgomp.c-c++-common/imperfect5.c |  49 +
 .../libgomp.c-c++-common/imperfect6.c | 115 
 .../libgomp.c-c++-common/target-imperfect1.c  |  81 
 .../libgomp.c-c++-common/target-imperfect2.c  | 122 
 .../libgomp.c-c++-common/target-imperfect3.c  | 125 +
 .../libgomp.c-c++-common/target-imperfect4.c  | 122 
 24 files changed, 2018 insertions(+)
 create mode 100644 gcc/testsuite/c-c++-common/gomp/imperfect-attributes.c
 create mode 100644 gcc/testsuite/c-c++-common/gomp/imperfect-badloops.c
 create mode 100644 gcc/testsuite/c-c++-common/gomp/imperfect-blocks.c
 create mode 100644 gcc/testsuite/c-c++-common/gomp/imperfect-extension.c
 create mode 100644 gcc/testsuite/c-c++-common/gomp/imperfect-gotos.c
 create mode 100644 gcc/testsuite/c-c++-common/gomp/imperfect-invalid-scope.c
 create mode 100644 gcc/testsuite/c-c++-common/gomp/imperfect-labels.c
 create mode 100644 gcc/testsuite/c-c++-common/gomp/imperfect-legacy-syntax.c
 create mode 100644 gcc/testsuite/c-c++-common/gomp/imperfect-pragmas.c
 create mode 100644 gcc/testsuite/c-c++-common/gomp/imperfect1.c
 create mode 100644 gcc/testsuite/c-c++-common/gomp/imperfect2.c
 create mode 100644 gcc/testsuite/c-c++-common/gomp/imperfect3.c
 create mode 100644 gcc/testsuite/c-c++-common/gomp/imperfect4.c
 create mode 100644 gcc/testsuite/c-c++-common/gomp/imperfect5.c
 create mode 100644 libgomp/testsuite/libgomp.c-c++-common/imperfect1.c
 create mode 100644 libgomp/testsuite/libgomp.c-c++-common/imperfect2.c
 create mode 100644 libgomp/testsuite/libgomp.c-c++-common/imperfect3.c
 create mode 100644 libgomp/testsuite/libgomp.c-c++-common/imperfect4.c
 create mode 100644 libgomp/testsuite/libgomp.c-c++-common/imperfect5.c
 create mode 100644 libgomp/testsuite/libgomp.c-c++-common/imperfect6.c
 create mode 100644 libgomp/testsuite/libgomp.c-c++-common/target-imperfect1.c
 create mode 100644 libgomp/testsuite/libgomp.c-c++-common/target-imperfect2.c
 create mode 100644 libgomp/testsuite/libgomp.c-c++-common/target-imperfect3.c
 create mode 100644 libgomp/testsuite/libgomp.c-c++-common/target-imperfect4.c

diff --git a/gcc/testsuite/c-c++-common/gomp/imperfect-attributes.c 
b/gcc/testsuite/c-c++-common/gomp/imperfect-attributes.c
new file mode 100644
index 000..776295ce22a
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/gomp/imperfect-attributes.c
@@ -0,0 +1,81 @@
+/* { dg-do compile { target { c ||

[COMMITTED V3 2/6] OpenMP: C front end support for imperfectly-nested loops

2023-08-25 Thread Sandra Loosemore

OpenMP 5.0 removed the restriction that multiple collapsed loops must
be perfectly nested, allowing "intervening code" (including nested
BLOCKs) before or after each nested loop.  In GCC this code is moved
into the inner loop body by the respective front ends.

This patch changes the C front end to use recursive descent parsing
on nested loops within an "omp for" construct, rather than an iterative
approach, in order to preserve proper nesting of compound statements.

New common C/C++ testcases are in a separate patch.

gcc/c-family/ChangeLog
* c-common.h (c_omp_check_loop_binding_exprs): Declare.
* c-omp.cc: Include tree-iterator.h.
(find_binding_in_body): New.
(check_loop_binding_expr_r): New.
(LOCATION_OR): New.
(check_looop_binding_expr): New.
(c_omp_check_loop_binding_exprs): New.

gcc/c/ChangeLog
* c-parser.cc (struct c_parser): Add omp_for_parse_state field.
(struct omp_for_parse_data): New.
(check_omp_intervening_code): New.
(add_structured_block_stmt): New.
(c_parser_compound_statement_nostart): Recognize intervening code,
nested loops, and other things that need special handling in
OpenMP loop constructs.
(c_parser_while_statement): Error on loop in intervening code.
(c_parser_do_statement): Likewise.
(c_parser_for_statement): Likewise.
(c_parser_postfix_expression_after_primary): Error on calls to
the OpenMP runtime in intervening code.
(c_parser_pragma): Error on OpenMP pragmas in intervening code.
(c_parser_omp_loop_nest): New.
(c_parser_omp_for_loop): Rewrite to use recursive descent, calling
c_parser_omp_loop_nest to do the heavy lifting.

gcc/ChangeLog
* omp-api.h: New.
* omp-general.cc (omp_runtime_api_procname): New.
(omp_runtime_api_call): Moved here from omp-low.cc, and make
non-static.
* omp-general.h: Include omp-api.h.
* omp-low.cc (omp_runtime_api_call): Delete this copy.

gcc/testsuite/ChangeLog
* c-c++-common/goacc/collapse-1.c: Update for new C error behavior.
* c-c++-common/goacc/tile-2.c: Likewise.
* gcc.dg/gomp/collapse-1.c: Likewise.
---
 gcc/c-family/c-common.h   |   1 +
 gcc/c-family/c-omp.cc | 151 +++
 gcc/c/c-parser.cc | 860 +-
 gcc/omp-api.h |  32 +
 gcc/omp-general.cc| 134 +++
 gcc/omp-general.h |   1 +
 gcc/omp-low.cc| 129 ---
 gcc/testsuite/c-c++-common/goacc/collapse-1.c |  16 +-
 gcc/testsuite/c-c++-common/goacc/tile-2.c |   4 +-
 gcc/testsuite/gcc.dg/gomp/collapse-1.c|  10 +-
 10 files changed, 942 insertions(+), 396 deletions(-)
 create mode 100644 gcc/omp-api.h

diff --git a/gcc/c-family/c-common.h b/gcc/c-family/c-common.h
index 78fc5248ba6..732cb4eff64 100644
--- a/gcc/c-family/c-common.h
+++ b/gcc/c-family/c-common.h
@@ -1299,6 +1299,7 @@ extern tree c_finish_omp_for (location_t, enum tree_code, 
tree, tree, tree,
 extern bool c_omp_check_loop_iv (tree, tree, walk_tree_lh);
 extern bool c_omp_check_loop_iv_exprs (location_t, enum tree_code, tree, int,
   tree, tree, tree, walk_tree_lh);
+extern bool c_omp_check_loop_binding_exprs (tree, vec *);
 extern tree c_finish_oacc_wait (location_t, tree, tree);
 extern tree c_oacc_split_loop_clauses (tree, tree *, bool);
 extern void c_omp_split_clauses (location_t, enum tree_code, omp_clause_mask,
diff --git a/gcc/c-family/c-omp.cc b/gcc/c-family/c-omp.cc
index 4faddb00bbc..9b7d7f789e3 100644
--- a/gcc/c-family/c-omp.cc
+++ b/gcc/c-family/c-omp.cc
@@ -36,6 +36,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "gimplify.h"
 #include "langhooks.h"
 #include "bitmap.h"
+#include "tree-iterator.h"
 
 
 /* Complete a #pragma oacc wait construct.  LOC is the location of
@@ -1728,6 +1729,156 @@ c_omp_check_loop_iv_exprs (location_t stmt_loc, enum 
tree_code code,
   return !data.fail;
 }
 
+
+/* Helper function for c_omp_check_loop_binding_exprs: look for a binding
+   of DECL in BODY.  Only traverse things that might be containers for
+   intervening code in an OMP loop.  Returns the BIND_EXPR or DECL_EXPR
+   if found, otherwise null.  */
+
+static tree
+find_binding_in_body (tree decl, tree body)
+{
+  if (!body)
+return NULL_TREE;
+
+  switch (TREE_CODE (body))
+{
+case BIND_EXPR:
+  for (tree b = BIND_EXPR_VARS (body); b; b = DECL_CHAIN (b))
+   if (b == decl)
+ return body;
+  return find_binding_in_body (decl, BIND_EXPR_BODY (body));
+
+case DECL_EXPR:
+  if (DECL_EXPR_DECL (body) == decl)
+   return body;
+  return NULL_TREE;
+
+case STATEMENT_LIST:
+  for (tree_stmt_iterator si = tsi_start (body); !tsi_end_p (si);
+  tsi_next ())
+

[COMMITTED V3 6/6] OpenMP: Document support for imperfectly-nested loops.

2023-08-25 Thread Sandra Loosemore

libgomp/ChangeLog
* libgomp.texi (OpenMP 5.0):  Imperfectly-nested loops are done.
---
 libgomp/libgomp.texi | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libgomp/libgomp.texi b/libgomp/libgomp.texi
index 5c91163893f..4aad8cc52f4 100644
--- a/libgomp/libgomp.texi
+++ b/libgomp/libgomp.texi
@@ -200,7 +200,7 @@ The OpenMP 4.5 specification is fully supported.
 @item @code{!=} as relational-op in canonical loop form for C/C++ @tab Y @tab
 @item @code{nonmonotonic} as default loop schedule modifier for 
worksharing-loop
   constructs @tab Y @tab
-@item Collapse of associated loops that are imperfectly nested loops @tab N 
@tab
+@item Collapse of associated loops that are imperfectly nested loops @tab Y 
@tab
 @item Clauses @code{if}, @code{nontemporal} and @code{order(concurrent)} in
   @code{simd} construct @tab Y @tab
 @item @code{atomic} constructs in @code{simd} @tab Y @tab
-- 
2.31.1

[COMMITTED V3 3/6] OpenMP: C++ support for imperfectly-nested loops

2023-08-25 Thread Sandra Loosemore

OpenMP 5.0 removed the restriction that multiple collapsed loops must
be perfectly nested, allowing "intervening code" (including nested
BLOCKs) before or after each nested loop.  In GCC this code is moved
into the inner loop body by the respective front ends.

This patch changes the C++ front end to use recursive descent parsing
on nested loops within an "omp for" construct, rather than an
iterative approach, in order to preserve proper nesting of compound
statements.  Preserving cleanups (destructors) for class objects
declared in intervening code and loop initializers complicates moving
the former into the body of the loop; this is handled by parsing the
entire construct before reassembling any of it.

gcc/cp/ChangeLog
* cp-tree.h (cp_convert_omp_range_for): Adjust declaration.
* parser.cc (struct omp_for_parse_data): New.
(cp_parser_postfix_expression): Diagnose calls to OpenMP runtime
in intervening code.
(check_omp_intervening_code): New.
(cp_parser_statement_seq_opt): Special-case nested loops, blocks,
and other constructs for OpenMP loops.
(cp_parser_iteration_statement): Reject loops in intervening code.
(cp_parser_omp_for_loop_init): Expand comments and tweak the
interface slightly to better distinguish input/output parameters.
(cp_convert_omp_range_for): Likewise.
(cp_parser_omp_loop_nest): New, split from cp_parser_omp_for_loop
and largely rewritten.  Add more comments.
(insert_structured_blocks): New.
(find_structured_blocks): New.
(struct sit_data, substitute_in_tree_walker, substitute_in_tree):
New.
(fixup_blocks_walker): New.
(cp_parser_omp_for_loop): Rewrite to use recursive descent instead
of a loop.  Add logic to reshuffle the bits of code collected
during parsing so intervening code gets moved to the loop body.
(cp_parser_omp_loop): Remove call to finish_omp_for_block, which
is now redundant.
(cp_parser_omp_simd): Likewise.
(cp_parser_omp_for): Likewise.
(cp_parser_omp_distribute): Likewise.
(cp_parser_oacc_loop): Likewise.
(cp_parser_omp_taskloop): Likewise.
(cp_parser_pragma): Reject OpenMP pragmas in intervening code.
* parser.h (struct cp_parser): Add omp_for_parse_state field.
* pt.cc (tsubst_omp_for_iterator): Adjust call to
cp_convert_omp_range_for.
* semantics.cc (finish_omp_for): Try harder to preserve location
of loop variable init expression for use in diagnostics.
(struct fofb_data, finish_omp_for_block_walker): New.
(finish_omp_for_block): Allow variables to be bound in a BIND_EXPR
nested inside BIND instead of directly in BIND itself.

gcc/testsuite/ChangeLog
* c-c++-common/goacc/tile-2.c: Adjust expected error patterns.
* g++.dg/gomp/attrs-imperfect1.C: New test.
* g++.dg/gomp/attrs-imperfect2.C: New test.
* g++.dg/gomp/attrs-imperfect3.C: New test.
* g++.dg/gomp/attrs-imperfect4.C: New test.
* g++.dg/gomp/attrs-imperfect5.C: New test.
* g++.dg/gomp/pr41967.C: Adjust expected error patterns.
* g++.dg/gomp/tpl-imperfect-gotos.C: New test.
* g++.dg/gomp/tpl-imperfect-invalid-scope.C: New test.

libgomp/ChangeLog
* testsuite/libgomp.c++/attrs-imperfect1.C: New test.
* testsuite/libgomp.c++/attrs-imperfect2.C: New test.
* testsuite/libgomp.c++/attrs-imperfect3.C: New test.
* testsuite/libgomp.c++/attrs-imperfect4.C: New test.
* testsuite/libgomp.c++/attrs-imperfect5.C: New test.
* testsuite/libgomp.c++/attrs-imperfect6.C: New test.
* testsuite/libgomp.c++/imperfect-class-1.C: New test.
* testsuite/libgomp.c++/imperfect-class-2.C: New test.
* testsuite/libgomp.c++/imperfect-class-3.C: New test.
* testsuite/libgomp.c++/imperfect-destructor.C: New test.
* testsuite/libgomp.c++/imperfect-template-1.C: New test.
* testsuite/libgomp.c++/imperfect-template-2.C: New test.
* testsuite/libgomp.c++/imperfect-template-3.C: New test.
---
 gcc/cp/cp-tree.h  |2 +-
 gcc/cp/parser.cc  | 1315 -
 gcc/cp/parser.h   |3 +
 gcc/cp/pt.cc  |3 +-
 gcc/cp/semantics.cc   |  117 +-
 gcc/testsuite/c-c++-common/goacc/tile-2.c |4 +-
 gcc/testsuite/g++.dg/gomp/attrs-imperfect1.C  |   38 +
 gcc/testsuite/g++.dg/gomp/attrs-imperfect2.C  |   34 +
 gcc/testsuite/g++.dg/gomp/attrs-imperfect3.C  |   33 +
 gcc/testsuite/g++.dg/gomp/attrs-imperfect4.C  |   33 +
 gcc/testsuite/g++.dg/gomp/attrs-imperfect5.C  |   57 +
 gcc/testsuite/g++.dg/gomp/pr41967.C   |2 +-
 .../g++.dg/gomp/tpl-imperfect-gotos.C |  161 ++
 .../g++.dg/gomp/tpl-imperfect-invalid-scope.C |

[COMMITTED V3 1/6] OpenMP: Add OMP_STRUCTURED_BLOCK and GIMPLE_OMP_STRUCTURED_BLOCK.

2023-08-25 Thread Sandra Loosemore

In order to detect invalid jumps in and out of intervening code in
imperfectly-nested loops, the front ends need to insert some sort of
marker to identify the structured block sequences that they push into
the inner body of the loop.  The error checking happens in the
diagnose_omp_blocks pass, between gimplification and OMP lowering, so
we need both GENERIC and GIMPLE representations of these markers.
They are removed in OMP lowering so no subsequent passes need to know
about them.

This patch doesn't include any front-end changes to generate the new
data structures.

gcc/cp/ChangeLog
* constexpr.cc (cxx_eval_constant_expression): Handle
OMP_STRUCTURED_BLOCK.
* pt.cc (tsubst_expr): Likewise.

gcc/ChangeLog
* doc/generic.texi (OpenMP): Document OMP_STRUCTURED_BLOCK.
* doc/gimple.texi (GIMPLE instruction set): Add
GIMPLE_OMP_STRUCTURED_BLOCK.
(GIMPLE_OMP_STRUCTURED_BLOCK): New subsection.
* gimple-low.cc (lower_stmt): Error on GIMPLE_OMP_STRUCTURED_BLOCK.
* gimple-pretty-print.cc (dump_gimple_omp_block): Handle
GIMPLE_OMP_STRUCTURED_BLOCK.
(pp_gimple_stmt_1): Likewise.
* gimple-walk.cc (walk_gimple_stmt): Likewise.
* gimple.cc (gimple_build_omp_structured_block): New.
* gimple.def (GIMPLE_OMP_STRUCTURED_BLOCK): New.
* gimple.h (gimple_build_omp_structured_block): Declare.
(gimple_has_substatements): Handle GIMPLE_OMP_STRUCTURED_BLOCK.
(CASE_GIMPLE_OMP): Likewise.
* gimplify.cc (is_gimple_stmt): Handle OMP_STRUCTURED_BLOCK.
(gimplify_expr): Likewise.
* omp-expand.cc (GIMPLE_OMP_STRUCTURED_BLOCK): Error on
GIMPLE_OMP_STRUCTURED_BLOCK.
* omp-low.cc (scan_omp_1_stmt): Handle GIMPLE_OMP_STRUCTURED_BLOCK.
(lower_omp_1): Likewise.
(diagnose_sb_1): Likewise.
(diagnose_sb_2): Likewise.
* tree-inline.cc (remap_gimple_stmt): Handle
GIMPLE_OMP_STRUCTURED_BLOCK.
(estimate_num_insns): Likewise.
* tree-nested.cc (convert_nonlocal_reference_stmt): Likewise.
(convert_local_reference_stmt): Likewise.
(convert_gimple_call): Likewise.
* tree-pretty-print.cc (dump_generic_node): Handle
OMP_STRUCTURED_BLOCK.
* tree.def (OMP_STRUCTURED_BLOCK): New.
* tree.h (OMP_STRUCTURED_BLOCK_BODY): New.
---
 gcc/cp/constexpr.cc|  1 +
 gcc/cp/pt.cc   |  1 +
 gcc/doc/generic.texi   | 14 ++
 gcc/doc/gimple.texi| 19 +++
 gcc/gimple-low.cc  |  4 
 gcc/gimple-pretty-print.cc |  6 +-
 gcc/gimple-walk.cc |  1 +
 gcc/gimple.cc  | 15 +++
 gcc/gimple.def |  5 +
 gcc/gimple.h   |  3 +++
 gcc/gimplify.cc|  6 ++
 gcc/omp-expand.cc  |  4 
 gcc/omp-low.cc | 11 +++
 gcc/tree-inline.cc |  6 ++
 gcc/tree-nested.cc |  3 +++
 gcc/tree-pretty-print.cc   |  4 
 gcc/tree.def   |  9 +
 gcc/tree.h |  3 +++
 18 files changed, 114 insertions(+), 1 deletion(-)

diff --git a/gcc/cp/constexpr.cc b/gcc/cp/constexpr.cc
index da2c3116810..8bd5c4a47f8 100644
--- a/gcc/cp/constexpr.cc
+++ b/gcc/cp/constexpr.cc
@@ -8155,6 +8155,7 @@ cxx_eval_constant_expression (const constexpr_ctx *ctx, 
tree t,
 case OMP_SCAN:
 case OMP_SCOPE:
 case OMP_SECTION:
+case OMP_STRUCTURED_BLOCK:
 case OMP_MASTER:
 case OMP_MASKED:
 case OMP_TASKGROUP:
diff --git a/gcc/cp/pt.cc b/gcc/cp/pt.cc
index a4809f034dc..e06d11fcb75 100644
--- a/gcc/cp/pt.cc
+++ b/gcc/cp/pt.cc
@@ -19741,6 +19741,7 @@ tsubst_expr (tree t, tree args, tsubst_flags_t 
complain, tree in_decl)
   break;
 
 case OMP_MASTER:
+case OMP_STRUCTURED_BLOCK:
   omp_parallel_combined_clauses = NULL;
   /* FALLTHRU */
 case OMP_SECTION:
diff --git a/gcc/doc/generic.texi b/gcc/doc/generic.texi
index 3f9bddd7eae..c8d6ef062f6 100644
--- a/gcc/doc/generic.texi
+++ b/gcc/doc/generic.texi
@@ -2488,6 +2488,20 @@ In some cases, @code{OMP_CONTINUE} is placed right before
 occur right after the looping body, it will be emitted between
 @code{OMP_CONTINUE} and @code{OMP_RETURN}.
 
+@item OMP_STRUCTURED_BLOCK
+
+This is another statement that doesn't correspond to an OpenMP directive.
+It is used to mark sections of code in another directive that must
+be structured block sequences, in particular for sequences of intervening code
+in the body of an @code{OMP_FOR}.  It is not necessary to use this when the
+entire body of a directive is required to be a structured block sequence,
+since that is implicit in the representation of the corresponding node.
+
+This tree node is used only to allow error checking transfers of control
+in/out of the structured block sequence after gimplification.
+It has a single operand (@code{OMP_STRUCTURED_BLOCK_BODY}) that is
+the code within the structured

[COMMITTED V3 0/6] Support for imperfectly-nested loops

2023-08-25 Thread Sandra Loosemore

V2 of these patches were previously approved with a few small changes
requested.  For the archives, here is the version I've pushed.

Parts 1 and 2 are unchanged since V2.  In part 3 (C++) I had to fix a
merge problem by hand.  In parts 4 and 5, I hacked up a couple tests
as requested by Jakub.  Part 5 (Fortran) also includes the whitespace
fix Tobias pointed out, and part 6 (documentation) is new.

-Sandra

Sandra Loosemore (6):
  OpenMP: Add OMP_STRUCTURED_BLOCK and GIMPLE_OMP_STRUCTURED_BLOCK.
  OpenMP: C front end support for imperfectly-nested loops
  OpenMP: C++ support for imperfectly-nested loops
  OpenMP: New C/C++ testcases for imperfectly nested loops.
  OpenMP: Fortran support for imperfectly-nested loops
  OpenMP: Document support for imperfectly-nested loops.

 gcc/c-family/c-common.h   |1 +
 gcc/c-family/c-omp.cc |  151 ++
 gcc/c/c-parser.cc |  860 +++
 gcc/cp/constexpr.cc   |1 +
 gcc/cp/cp-tree.h  |2 +-
 gcc/cp/parser.cc  | 1315 -
 gcc/cp/parser.h   |3 +
 gcc/cp/pt.cc  |4 +-
 gcc/cp/semantics.cc   |  117 +-
 gcc/doc/generic.texi  |   14 +
 gcc/doc/gimple.texi   |   19 +
 gcc/fortran/gfortran.h|3 +
 gcc/fortran/openmp.cc |  763 --
 gcc/fortran/trans-stmt.cc |7 +-
 gcc/gimple-low.cc |4 +
 gcc/gimple-pretty-print.cc|6 +-
 gcc/gimple-walk.cc|1 +
 gcc/gimple.cc |   15 +
 gcc/gimple.def|5 +
 gcc/gimple.h  |3 +
 gcc/gimplify.cc   |6 +
 gcc/omp-api.h |   32 +
 gcc/omp-expand.cc |4 +
 gcc/omp-general.cc|  134 ++
 gcc/omp-general.h |1 +
 gcc/omp-low.cc|  140 +-
 gcc/testsuite/c-c++-common/goacc/collapse-1.c |   16 +-
 gcc/testsuite/c-c++-common/goacc/tile-2.c |4 +-
 .../c-c++-common/gomp/imperfect-attributes.c  |   81 +
 .../c-c++-common/gomp/imperfect-badloops.c|   50 +
 .../c-c++-common/gomp/imperfect-blocks.c  |   75 +
 .../c-c++-common/gomp/imperfect-extension.c   |   55 +
 .../c-c++-common/gomp/imperfect-gotos.c   |  174 +++
 .../gomp/imperfect-invalid-scope.c|   77 +
 .../c-c++-common/gomp/imperfect-labels.c  |   85 ++
 .../gomp/imperfect-legacy-syntax.c|   44 +
 .../c-c++-common/gomp/imperfect-pragmas.c |   85 ++
 gcc/testsuite/c-c++-common/gomp/imperfect1.c  |   38 +
 gcc/testsuite/c-c++-common/gomp/imperfect2.c  |   34 +
 gcc/testsuite/c-c++-common/gomp/imperfect3.c  |   52 +
 gcc/testsuite/c-c++-common/gomp/imperfect4.c  |   33 +
 gcc/testsuite/c-c++-common/gomp/imperfect5.c  |   95 ++
 gcc/testsuite/g++.dg/gomp/attrs-imperfect1.C  |   38 +
 gcc/testsuite/g++.dg/gomp/attrs-imperfect2.C  |   34 +
 gcc/testsuite/g++.dg/gomp/attrs-imperfect3.C  |   33 +
 gcc/testsuite/g++.dg/gomp/attrs-imperfect4.C  |   33 +
 gcc/testsuite/g++.dg/gomp/attrs-imperfect5.C  |   57 +
 gcc/testsuite/g++.dg/gomp/pr41967.C   |2 +-
 .../g++.dg/gomp/tpl-imperfect-gotos.C |  161 ++
 .../g++.dg/gomp/tpl-imperfect-invalid-scope.C |   94 ++
 gcc/testsuite/gcc.dg/gomp/collapse-1.c|   10 +-
 gcc/testsuite/gfortran.dg/gomp/collapse1.f90  |6 +-
 gcc/testsuite/gfortran.dg/gomp/collapse2.f90  |   10 +-
 .../gfortran.dg/gomp/imperfect-gotos.f90  |   69 +
 .../gomp/imperfect-invalid-scope.f90  |   81 +
 gcc/testsuite/gfortran.dg/gomp/imperfect1.f90 |   39 +
 gcc/testsuite/gfortran.dg/gomp/imperfect2.f90 |   56 +
 gcc/testsuite/gfortran.dg/gomp/imperfect3.f90 |   45 +
 gcc/testsuite/gfortran.dg/gomp/imperfect4.f90 |   36 +
 gcc/testsuite/gfortran.dg/gomp/imperfect5.f90 |   85 ++
 gcc/tree-inline.cc|6 +
 gcc/tree-nested.cc|3 +
 gcc/tree-pretty-print.cc  |4 +
 gcc/tree.def  |9 +
 gcc/tree.h|3 +
 libgomp/libgomp.texi  |2 +-
 .../testsuite/libgomp.c++/attrs-imperfect1.C  |   76 +
 .../testsuite/libgomp.c++/attrs-imperfect2.C  |  114 ++
 .../testsuite/libgomp.c++/attrs-imperfect3.C  |  119 ++
 .../testsuite/libgomp.c++/attrs-imperfect4.C  |  117 ++
 .../testsuite/libgomp.c++/attrs-imperfect5.C  |   49 +
 .../testsuite/libgomp.c++/attrs-imperfect6.C  |  115 ++
 .../testsuite/libgomp.c++/imperfect-class-1.C |  169 +++
 .../testsuite/libgomp.c++/imperfect-class-2.C |  167 +++

[Committed] RISC-V: Enable Hoist to GCSE simple constants

2023-08-25 Thread Vineet Gupta

Hoist want_to_gcse_p () calls rtx_cost () to compute max distance for
hoist candidates. For a simple const (say 6 which needs seperate insn "LI 6")
backend currently returns 0, causing Hoist to bail and elide GCSE.

Note that constants requiring more than 1 insns to setup were working
fine since riscv_rtx_costs () was returning non-zero (although that
itself might need refining: see bugzilla 39).

To keep testsuite parity, some V tests need updating which started failing
in the new costing regime.

gcc/ChangeLog:
* config/riscv/riscv.cc (riscv_rtx_costs): Adjust const_int
cost. Add some comments about different constants handling.

gcc/testsuite/ChangeLog:
* gcc.target/riscv/gcse-const.c: New Test
* gcc.target/riscv/rvv/vsetvl/vlmax_conflict-7.c: Remove test
for Jump.
* gcc.target/riscv/rvv/vsetvl/vlmax_conflict-8.c: Ditto.

Signed-off-by: Vineet Gupta 
---
 gcc/config/riscv/riscv.cc  | 18 +-
 gcc/testsuite/gcc.target/riscv/gcse-const.c| 13 +
 .../riscv/rvv/vsetvl/vlmax_conflict-7.c|  1 -
 .../riscv/rvv/vsetvl/vlmax_conflict-8.c|  1 -
 4 files changed, 22 insertions(+), 11 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/gcse-const.c

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 13166d19619c..98a46b00ceb5 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -2490,6 +2490,8 @@ riscv_rtx_costs (rtx x, machine_mode mode, int 
outer_code, int opno ATTRIBUTE_UN
   switch (GET_CODE (x))
 {
 case CONST_INT:
+  /* trivial constants checked using OUTER_CODE in case they are
+encodable in insn itself w/o need for additional insn(s).  */
   if (riscv_immediate_operand_p (outer_code, INTVAL (x)))
{
  *total = 0;
@@ -2507,17 +2509,15 @@ riscv_rtx_costs (rtx x, machine_mode mode, int 
outer_code, int opno ATTRIBUTE_UN
   /* Fall through.  */
 
 case CONST:
+  /* Non trivial CONST_INT Fall through: check if need multiple insns.  */
   if ((cost = riscv_const_insns (x)) > 0)
{
- /* If the constant is likely to be stored in a GPR, SETs of
-single-insn constants are as cheap as register sets; we
-never want to CSE them.  */
- if (cost == 1 && outer_code == SET)
-   *total = 0;
- /* When we load a constant more than once, it usually is better
-to duplicate the last operation in the sequence than to CSE
-the constant itself.  */
- else if (outer_code == SET || GET_MODE (x) == VOIDmode)
+ /* 1. Hoist will GCSE constants only if TOTAL returned is non-zero.
+2. For constants loaded more than once, the approach so far has
+   been to duplicate the operation than to CSE the constant.
+3. TODO: make cost more accurate specially if riscv_const_insns
+   returns > 1.  */
+ if (outer_code == SET || GET_MODE (x) == VOIDmode)
*total = COSTS_N_INSNS (1);
}
   else /* The instruction will be fetched from the constant pool.  */
diff --git a/gcc/testsuite/gcc.target/riscv/gcse-const.c 
b/gcc/testsuite/gcc.target/riscv/gcse-const.c
new file mode 100644
index ..b04707ce9745
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/gcse-const.c
@@ -0,0 +1,13 @@
+/* Slightly modified copy of gcc.target/arm/pr40956.c.  */
+/* { dg-options "-Os" }  */
+/* Make sure the constant "6" is loaded into register only once.  */
+/* { dg-final { scan-assembler-times "\tli.*6" 1 } } */
+
+int foo(int p, int* q)
+{
+  if (p!=9)
+*q = 6;
+  else
+*(q+1) = 6;
+  return 3;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vlmax_conflict-7.c 
b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vlmax_conflict-7.c
index 60ad108666f8..085ca9db8542 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vlmax_conflict-7.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vlmax_conflict-7.c
@@ -21,5 +21,4 @@ void f (int32_t * restrict in, int32_t * restrict out, size_t 
n, size_t cond, si
 }
 
 /* { dg-final { scan-assembler-times {vsetvli} 4 { target { no-opts "-O0"  
no-opts "-O1"  no-opts "-Os" no-opts "-Oz" no-opts "-funroll-loops" no-opts 
"-g" } } } } */
-/* { dg-final { scan-assembler-times {j\s+\.L[0-9]+\s+\.L[0-9]+:\s+vlm\.v} 1 { 
target { no-opts "-O0" no-opts "-O1"  no-opts "-Os" no-opts "-Oz" no-opts 
"-funroll-loops" no-opts "-g" } } } } */
 /* { dg-final { scan-assembler-times 
{vsetvli\s+[a-x0-9]+,\s*zero,\s*e8,\s*m8,\s*t[au],\s*m[au]} 3 { target { 
no-opts "-O0" no-opts "-O1"  no-opts "-Os" no-opts "-Oz" no-opts 
"-funroll-loops" no-opts "-g" } } } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vlmax_conflict-8.c 
b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vlmax_conflict-8.c
index 7b9574cc332d..fbca9b00e54e 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vlmax_conflict-8.c
+++

Re: [PATCH V2] RISC-V: Add Types to Un-Typed Sync Instructions:

2023-08-25 Thread Jeff Law via Gcc-patches





On 8/24/23 15:19, Edwin Lu wrote:

Related Discussion:
https://inbox.sourceware.org/gcc-patches/12fb5088-3f28-0a69-de1e-f387371a5...@gmail.com/

This patch updates the sync instructions to ensure that no insn is left
without a type attribute. Updates a total of 6 insns to have type "atomic"

Tested for regressions using rv32/64 multilib with newlib/linux.

gcc/Changelog:

* config/riscv/sync-rvwmo.md: updated types to "multi" or
 "atomic" based on number of assembly lines generated
* config/riscv/sync-ztso.md: likewise
* config/riscv/sync.md: likewise

OK.

You should have your write access set up already.  So go ahead and 
follow the directions in the email you received to get yourself into the 
MAINTAINERS file.  Then you can go ahead and push this change.


THanks,
Jeff

Re: [PATCH v2] RISC-V: Enable Hoist to GCSE simple constants

2023-08-25 Thread Jeff Law via Gcc-patches





On 8/24/23 23:16, Vineet Gupta wrote:

Hoist want_to_gcse_p () calls rtx_cost () to compute max distance for
hoist candidates. For a simple const (say 6 which needs seperate insn "LI 6")
backend currently returns 0, causing Hoist to bail and elide GCSE.

Note that constants requiring more than 1 insns to setup were working
fine since riscv_rtx_costs () was returning non-zero (although that
itself might need refining: see bugzilla 39).

To keep testsuite parity, some V tests need updating which started failing
in the new costing regime.

gcc/ChangeLog:
* gcc/config/riscv.cc (riscv_rtx_costs): Adjust const_int cost.
Add some comments about different constants handling.

gcc/testsuite/ChangeLog:
* gcc.target/riscv/gcse-const.c: New Test
* gcc.target/riscv/rvv/vsetvl/vlmax_conflict-7.c: Remove test
  for Jump.
* gcc.target/riscv/rvv/vsetvl/vlmax_conflict-8.c: Ditto.

OK.

jeff

Re: RISC-V: Fix stack_save_restore_1/2 test cases

2023-08-25 Thread Jeff Law via Gcc-patches

On 8/24/23 09:45, Jivan Hakobyan via Gcc-patches wrote:

Subject:
RISC-V: Fix stack_save_restore_1/2 test cases
From:
Jivan Hakobyan via Gcc-patches 
Date:
8/24/23, 09:45

To:
GCC Patches , Jeff Law 

This patch fixes failing stack_save_restore_1/2 test cases.
After 6619b3d4c15c commit size of the frame was changed.

gcc/testsuite/ChangeLog:
 * gcc.target/riscv/stack_save_restore_1.c: Update frame size
 * gcc.target/riscv/stack_save_restore_2.c: Likewise.
Rather than use specific values for the size of the stack in this test, 
can we match something a little more general so that we're not 
constantly having to come back and adjust the stack offset?

I'm not real familiar with the check-function-bodies capabilities, but I 
suspect we can probably use a regexp like [0-9]+ rather than 2016, 2032, 
etc.

Jeff

[COMMITTEDv2] MATCH: Move `a ? one_zero : one_zero` matching after min/max matching

2023-08-25 Thread Andrew Pinski via Gcc-patches

In PR 106677, I noticed that on the trunk we were producing:
```
  _25 = SR.116_117 == 0;
  _27 = (unsigned char) _25;
  _32 = _27 | SR.116_117;
```
>From `SR.115_117 != 0 ? SR.115_117 : 1`
Rather than:
```
  _119 = MAX_EXPR <1, SR.115_117>;
```
Or (rather)
```
  _119 = SR.115_117 | 1;
```
Due to the order of the patterns.

Committed as approved with the new comment and testcase.
Bootstrapped and tested on x86_64-linux-gnu with no regressions.

gcc/ChangeLog:

* match.pd (`a ? one_zero : one_zero`): Move
below detection of minmax.

gcc/testsuite/ChangeLog:

* gcc.dg/tree-ssa/phi-opt-34.c: New test.
---
 gcc/match.pd   | 42 --
 gcc/testsuite/gcc.dg/tree-ssa/phi-opt-34.c | 23 
 2 files changed, 47 insertions(+), 18 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi-opt-34.c

diff --git a/gcc/match.pd b/gcc/match.pd
index d9f35e9e25b..fa598d5ca2e 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -4961,24 +4961,6 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
  )
 )
 
-(simplify
- (cond @0 zero_one_valued_p@1 zero_one_valued_p@2)
- (switch
-  /* bool0 ? bool1 : 0 -> bool0 & bool1 */
-  (if (integer_zerop (@2))
-   (bit_and (convert @0) @1))
-  /* bool0 ? 0 : bool2 -> (bool0^1) & bool2 */
-  (if (integer_zerop (@1))
-   (bit_and (bit_xor (convert @0) { build_one_cst (type); } ) @2))
-  /* bool0 ? 1 : bool2 -> bool0 | bool2 */
-  (if (integer_onep (@1))
-   (bit_ior (convert @0) @2))
-  /* bool0 ? bool1 : 1 -> (bool0^1) | bool1 */
-  (if (integer_onep (@2))
-   (bit_ior (bit_xor (convert @0) @2) @1))
- )
-)
-
 /* Optimize
# x_5 in range [cst1, cst2] where cst2 = cst1 + 1
x_5 ? cstN ? cst4 : cst3
@@ -5309,6 +5291,30 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
   && integer_nonzerop (fold_build2 (GE_EXPR, boolean_type_node, @3, 
@1)))
   (max @2 @4))
 
+#if GIMPLE
+/* These patterns should be after min/max detection as simplifications
+   of `(type)(zero_one ==/!= 0)` to `(type)(zero_one)`
+   and `(type)(zero_one^1)` are not done yet.  See PR 110637.
+   Even without those, reaching min/max/and/ior faster is better.  */
+(simplify
+ (cond @0 zero_one_valued_p@1 zero_one_valued_p@2)
+ (switch
+  /* bool0 ? bool1 : 0 -> bool0 & bool1 */
+  (if (integer_zerop (@2))
+   (bit_and (convert @0) @1))
+  /* bool0 ? 0 : bool2 -> (bool0^1) & bool2 */
+  (if (integer_zerop (@1))
+   (bit_and (bit_xor (convert @0) { build_one_cst (type); } ) @2))
+  /* bool0 ? 1 : bool2 -> bool0 | bool2 */
+  (if (integer_onep (@1))
+   (bit_ior (convert @0) @2))
+  /* bool0 ? bool1 : 1 -> (bool0^1) | bool1 */
+  (if (integer_onep (@2))
+   (bit_ior (bit_xor (convert @0) @2) @1))
+ )
+)
+#endif
+
 /* X != C1 ? -X : C2 simplifies to -X when -C1 == C2.  */
 (simplify
  (cond (ne @0 INTEGER_CST@1) (negate@3 @0) INTEGER_CST@2)
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-34.c 
b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-34.c
new file mode 100644
index 000..157c3ea9a0b
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-34.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* Disable early phiopt1 as  early ccp1 does not export non-zero bits
+   so at the point of phiopt1, we don't know that a is [0,1] range */
+/* { dg-options "-O1 -fdisable-tree-phiopt1 -fdump-tree-phiopt2-folding"} */
+
+unsigned f(unsigned a)
+{
+  a &= 1;
+  if (a > 0)
+return a;
+  return 1;
+}
+/* PHIOPT2 should be able to change this into just return 1;
+   (which was `MAX` or `a | 1` but since a is known to be a
+   range of [0,1], it should be folded into 1)
+   And not fold it into `(a == 0) | a`. */
+/* { dg-final { scan-tree-dump-not " == " "phiopt2" } } */
+/* { dg-final { scan-tree-dump-not " if " "phiopt2" } } */
+/* { dg-final { scan-tree-dump-not "Folded into the sequence:" "phiopt2" } } */
+/* { dg-final { scan-tree-dump "return 1;" "phiopt2" } } */
+/* We want to make sure that phiopt2 is happening and not some other pass
+   before it does the transformation. */
+/* { dg-final { scan-tree-dump "Removing basic block" "phiopt2" } } */
-- 
2.31.1

Re: [PATCH 1/3] MATCH: Move `a ? one_zero : one_zero` matching after min/max matching

2023-08-25 Thread Andrew Pinski via Gcc-patches

On Fri, Aug 25, 2023 at 11:11 AM Andrew Pinski  wrote:
>
> On Thu, Aug 24, 2023 at 11:39 PM Richard Biener via Gcc-patches
>  wrote:
> >
> > On Thu, Aug 24, 2023 at 9:16 PM Andrew Pinski via Gcc-patches
> >  wrote:
> > >
> > > In PR 106677, I noticed that on the trunk we were producing:
> > > ```
> > >   _25 = SR.116_117 == 0;
> > >   _27 = (unsigned char) _25;
> > >   _32 = _27 | SR.116_117;
> > > ```
> > > From `SR.115_117 != 0 ? SR.115_117 : 1`
> > > Rather than:
> > > ```
> > >   _119 = MAX_EXPR <1, SR.115_117>;
> > > ```
> > > Or (rather)
> > > ```
> > >   _119 = SR.115_117 | 1;
> > > ```
> > > Due to the order of the patterns.
> >
> > Hmm, that means the former when present in source isn't optimized?
>
> That it is correct; they are not optimized at the gimple level down to
> 1. it is sometimes (but not on all targets) optimized at the RTL level
> though.

I forgot to mention that this is recorded as PR 110637 already.

Thanks,
Andrew

>
> >
> > > OK? Bootstrapped and tested on x86_64-linux-gnu with no
> > > regressions.
> >
> > OK, but please add a comment indicating the ordering requirement.
> >
> > Can you also add a testcase?
>
> Yes and yes. Will send out a new patch in a few minutes with the added
> comment and testcase.
>
> Thanks,
> Andrew
>
> >
> > Richard.
> >
> > > gcc/ChangeLog:
> > >
> > > * match.pd (`a ? one_zero : one_zero`): Move
> > > below detection of minmax.
> > > ---
> > >  gcc/match.pd | 38 --
> > >  1 file changed, 20 insertions(+), 18 deletions(-)
> > >
> > > diff --git a/gcc/match.pd b/gcc/match.pd
> > > index 890f050cbad..c87a0795667 100644
> > > --- a/gcc/match.pd
> > > +++ b/gcc/match.pd
> > > @@ -4950,24 +4950,6 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> > >   )
> > >  )
> > >
> > > -(simplify
> > > - (cond @0 zero_one_valued_p@1 zero_one_valued_p@2)
> > > - (switch
> > > -  /* bool0 ? bool1 : 0 -> bool0 & bool1 */
> > > -  (if (integer_zerop (@2))
> > > -   (bit_and (convert @0) @1))
> > > -  /* bool0 ? 0 : bool2 -> (bool0^1) & bool2 */
> > > -  (if (integer_zerop (@1))
> > > -   (bit_and (bit_xor (convert @0) { build_one_cst (type); } ) @2))
> > > -  /* bool0 ? 1 : bool2 -> bool0 | bool2 */
> > > -  (if (integer_onep (@1))
> > > -   (bit_ior (convert @0) @2))
> > > -  /* bool0 ? bool1 : 1 -> (bool0^1) | bool1 */
> > > -  (if (integer_onep (@2))
> > > -   (bit_ior (bit_xor (convert @0) @2) @1))
> > > - )
> > > -)
> > > -
> > >  /* Optimize
> > > # x_5 in range [cst1, cst2] where cst2 = cst1 + 1
> > > x_5 ? cstN ? cst4 : cst3
> > > @@ -5298,6 +5280,26 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> > >&& integer_nonzerop (fold_build2 (GE_EXPR, boolean_type_node, 
> > > @3, @1)))
> > >(max @2 @4))
> > >
> > > +#if GIMPLE
> > > +(simplify
> > > + (cond @0 zero_one_valued_p@1 zero_one_valued_p@2)
> > > + (switch
> > > +  /* bool0 ? bool1 : 0 -> bool0 & bool1 */
> > > +  (if (integer_zerop (@2))
> > > +   (bit_and (convert @0) @1))
> > > +  /* bool0 ? 0 : bool2 -> (bool0^1) & bool2 */
> > > +  (if (integer_zerop (@1))
> > > +   (bit_and (bit_xor (convert @0) { build_one_cst (type); } ) @2))
> > > +  /* bool0 ? 1 : bool2 -> bool0 | bool2 */
> > > +  (if (integer_onep (@1))
> > > +   (bit_ior (convert @0) @2))
> > > +  /* bool0 ? bool1 : 1 -> (bool0^1) | bool1 */
> > > +  (if (integer_onep (@2))
> > > +   (bit_ior (bit_xor (convert @0) @2) @1))
> > > + )
> > > +)
> > > +#endif
> > > +
> > >  /* X != C1 ? -X : C2 simplifies to -X when -C1 == C2.  */
> > >  (simplify
> > >   (cond (ne @0 INTEGER_CST@1) (negate@3 @0) INTEGER_CST@2)
> > > --
> > > 2.31.1
> > >

[wwwdocs] projects/gomp: Update implementation status and minor fixes

2023-08-25 Thread Tobias Burnus


This syncs the libgomp.texi implementation status to the webpage,
i.e. adding a few new items + marking some as 'supported'.

It also fixes a couple of bugs and adds links providing more details
for two items (a PR link as in libgomp.texi and a section in the manual).

Comments? Suggestions? If not, I will commit it tomorrow.

Current version: https://gcc.gnu.org/projects/gomp/
General suggestions about this page - like what to add, split off,
move around are also welcome. (Likewise comments to


Tobias

PS: The patch assumes that's Sandra's intervening-code support patch is
applied (should happen in a few hours).

PPS: The GCC 14 release notes still need to be updated to match the
current support. Alas, that's an on going theme as features keep getting
added :-)
-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955
projects/gomp/: Update implementation status and minor fixes

diff --git a/htdocs/projects/gomp/index.html b/htdocs/projects/gomp/index.html
index 2df67403..04bfd908 100644
--- a/htdocs/projects/gomp/index.html
+++ b/htdocs/projects/gomp/index.html
@@ -38,7 +38,9 @@ OpenMP and OpenACC are supported with GCC's C, C++ and Fortran compilers.
   To enable https://www.openmp.org;>OpenMP,
   use https://gcc.gnu.org/onlinedocs/gcc/C-Dialect-Options.html#index-fopenmp;
-  >-fopenmp. -fopenmp-simd can be used
+  >-fopenmp. https://gcc.gnu.org/onlinedocs/gcc/C-Dialect-Options.html#index-fopenmp-simd;>
+  -fopenmp-simd can be used
   to enable only the SIMD vectorization and loop-transformation constructs
   without creating multiple threads, offloading code or adding a library
   dependency.
@@ -75,8 +77,8 @@ OpenMP and OpenACC are supported with GCC's C, C++ and Fortran compilers.
   https://www.openmp.org/specifications/;>OpenMP specification,
   including OpenMP API examples documents, reference cards and additional
   definitions specification.
-  https://www.openacc.org/specification;>OpenACC
-  specification.
+  https://www.openacc.org/specification;>OpenACC
+  specification.
   Related GCC wiki pages: https://gcc.gnu.org/wiki/openmp;
   >openmp, https://gcc.gnu.org/wiki/OpenACC;>OpenACC,
   https://gcc.gnu.org/wiki/Offloading;>Offloading.
@@ -312,7 +314,7 @@ than listed, depending on resolved corner cases and optimizations.
 
   GCC9
   GCC12
-  GCC13
+  GCC13
   GCC14
 
 
@@ -371,12 +373,13 @@ than listed, depending on resolved corner cases and optimizations.
   
 Predefined memory spaces, memory allocators, allocator traits
 GCC11
-Some are only stubs
+Some are only stubs; see manual (https://gcc.gnu.org/onlinedocs/libgomp/Memory-allocation.html;>mainline)
   
   
 Non-rectangular loop nests
 GCC11GCC13
-C/C++ (full)Fortran (partial)
+C/C++ (full)Fortran (partial, https://gcc.gnu.org/PR110735;>PR110735)
   
   
 Nested-parallel changes to max-active-levels-var ICV
@@ -446,7 +449,7 @@ than listed, depending on resolved corner cases and optimizations.
   
   
 Mapping of Fortran pointer and allocatable variables, including pointer and allocatable components of variables
-GCC12
+GCC12
 Mapping of vars with allocatable components unsupported
   
   
@@ -471,7 +474,7 @@ than listed, depending on resolved corner cases and optimizations.
   
   
 Collapse of associated loops that are imperfectly nested loops
-No
+GCC14
 
   
   
@@ -610,7 +613,7 @@ than listed, depending on resolved corner cases and optimizations.
 
   
   
-OMP_NUM_TEAMS and OMP_TEAMS_THREAD_LIMIT env variables
+OMP_NUM_TEAMS and OMP_TEAMS_THREAD_LIMIT environment variables
 GCC12
 
   
@@ -746,6 +749,26 @@ than listed, depending on resolved corner cases and optimizations.
 No
 
   
+  
+Optional comma between directive and clause in the #pragma form
+No
+
+  
+  
+indirect clause in declare target
+No
+
+  
+  
+device_type(nohost)/device_type(host) for variables
+No
+
+  
+  
+present modifier to the map, to and from clauses
+GCC14
+
+  
   
 ompt_sync_region_t enum additions
 No
@@ -818,12 +841,12 @@ than listed, depending on resolved corner cases and optimizations.
   
   
 declare mapper with iterator and present modifiers
-No
+GCC14
 
   
   
 If a matching mapped list item is not found in the data environment, the pointer retains its original value
-No
+GCC14
 
   
   
@@ -838,7 +861,7 @@ than listed, depending on resolved corner cases and optimizations.
   
   
 Extended list of directives permitted in Fortran pure procedures
-GCC14
+GCC14
 
   
   
@@ -928,7 +951,12 @@ than listed, depending on resolved corner cases and optimizations.
   
   
 Initial value of default-device-var ICV with

Re: [PATCH 1/3] MATCH: Move `a ? one_zero : one_zero` matching after min/max matching

2023-08-25 Thread Andrew Pinski via Gcc-patches

On Thu, Aug 24, 2023 at 11:39 PM Richard Biener via Gcc-patches
 wrote:
>
> On Thu, Aug 24, 2023 at 9:16 PM Andrew Pinski via Gcc-patches
>  wrote:
> >
> > In PR 106677, I noticed that on the trunk we were producing:
> > ```
> >   _25 = SR.116_117 == 0;
> >   _27 = (unsigned char) _25;
> >   _32 = _27 | SR.116_117;
> > ```
> > From `SR.115_117 != 0 ? SR.115_117 : 1`
> > Rather than:
> > ```
> >   _119 = MAX_EXPR <1, SR.115_117>;
> > ```
> > Or (rather)
> > ```
> >   _119 = SR.115_117 | 1;
> > ```
> > Due to the order of the patterns.
>
> Hmm, that means the former when present in source isn't optimized?

That it is correct; they are not optimized at the gimple level down to
1. it is sometimes (but not on all targets) optimized at the RTL level
though.

>
> > OK? Bootstrapped and tested on x86_64-linux-gnu with no
> > regressions.
>
> OK, but please add a comment indicating the ordering requirement.
>
> Can you also add a testcase?

Yes and yes. Will send out a new patch in a few minutes with the added
comment and testcase.

Thanks,
Andrew

>
> Richard.
>
> > gcc/ChangeLog:
> >
> > * match.pd (`a ? one_zero : one_zero`): Move
> > below detection of minmax.
> > ---
> >  gcc/match.pd | 38 --
> >  1 file changed, 20 insertions(+), 18 deletions(-)
> >
> > diff --git a/gcc/match.pd b/gcc/match.pd
> > index 890f050cbad..c87a0795667 100644
> > --- a/gcc/match.pd
> > +++ b/gcc/match.pd
> > @@ -4950,24 +4950,6 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> >   )
> >  )
> >
> > -(simplify
> > - (cond @0 zero_one_valued_p@1 zero_one_valued_p@2)
> > - (switch
> > -  /* bool0 ? bool1 : 0 -> bool0 & bool1 */
> > -  (if (integer_zerop (@2))
> > -   (bit_and (convert @0) @1))
> > -  /* bool0 ? 0 : bool2 -> (bool0^1) & bool2 */
> > -  (if (integer_zerop (@1))
> > -   (bit_and (bit_xor (convert @0) { build_one_cst (type); } ) @2))
> > -  /* bool0 ? 1 : bool2 -> bool0 | bool2 */
> > -  (if (integer_onep (@1))
> > -   (bit_ior (convert @0) @2))
> > -  /* bool0 ? bool1 : 1 -> (bool0^1) | bool1 */
> > -  (if (integer_onep (@2))
> > -   (bit_ior (bit_xor (convert @0) @2) @1))
> > - )
> > -)
> > -
> >  /* Optimize
> > # x_5 in range [cst1, cst2] where cst2 = cst1 + 1
> > x_5 ? cstN ? cst4 : cst3
> > @@ -5298,6 +5280,26 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> >&& integer_nonzerop (fold_build2 (GE_EXPR, boolean_type_node, 
> > @3, @1)))
> >(max @2 @4))
> >
> > +#if GIMPLE
> > +(simplify
> > + (cond @0 zero_one_valued_p@1 zero_one_valued_p@2)
> > + (switch
> > +  /* bool0 ? bool1 : 0 -> bool0 & bool1 */
> > +  (if (integer_zerop (@2))
> > +   (bit_and (convert @0) @1))
> > +  /* bool0 ? 0 : bool2 -> (bool0^1) & bool2 */
> > +  (if (integer_zerop (@1))
> > +   (bit_and (bit_xor (convert @0) { build_one_cst (type); } ) @2))
> > +  /* bool0 ? 1 : bool2 -> bool0 | bool2 */
> > +  (if (integer_onep (@1))
> > +   (bit_ior (convert @0) @2))
> > +  /* bool0 ? bool1 : 1 -> (bool0^1) | bool1 */
> > +  (if (integer_onep (@2))
> > +   (bit_ior (bit_xor (convert @0) @2) @1))
> > + )
> > +)
> > +#endif
> > +
> >  /* X != C1 ? -X : C2 simplifies to -X when -C1 == C2.  */
> >  (simplify
> >   (cond (ne @0 INTEGER_CST@1) (negate@3 @0) INTEGER_CST@2)
> > --
> > 2.31.1
> >

Re: [PATCH] c++: CWG 2359, wrong copy-init with designated init [PR91319]

2023-08-25 Thread Patrick Palka via Gcc-patches

On Fri, 25 Aug 2023, Marek Polacek via Gcc-patches wrote:

> Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?

LGTM

> 
> -- >8 --
> 
> This CWG clarifies that designated initializer support direct-initialization.
> Just be careful what Note 2 in [dcl.init.aggr]/4.2 says: "If the
> initialization is by designated-initializer-clause, its form determines
> whether copy-initialization or direct-initialization is performed."  Hence
> this patch sets CONSTRUCTOR_IS_DIRECT_INIT only when we are dealing with
> ".x{}", but not ".x = {}".
> 
>   PR c++/91319
> 
> gcc/cp/ChangeLog:
> 
>   * parser.cc (cp_parser_initializer_list): Set CONSTRUCTOR_IS_DIRECT_INIT
>   when the designated initializer is of the .x{} form.
> 
> gcc/testsuite/ChangeLog:
> 
>   * g++.dg/cpp2a/desig30.C: New test.
> ---
>  gcc/cp/parser.cc |  6 ++
>  gcc/testsuite/g++.dg/cpp2a/desig30.C | 22 ++
>  2 files changed, 28 insertions(+)
>  create mode 100644 gcc/testsuite/g++.dg/cpp2a/desig30.C
> 
> diff --git a/gcc/cp/parser.cc b/gcc/cp/parser.cc
> index eeb22e44fb4..b3d5c65b469 100644
> --- a/gcc/cp/parser.cc
> +++ b/gcc/cp/parser.cc
> @@ -25718,6 +25718,7 @@ cp_parser_initializer_list (cp_parser* parser, bool* 
> non_constant_p,
>tree designator;
>tree initializer;
>bool clause_non_constant_p;
> +  bool direct_p = false;
>location_t loc = cp_lexer_peek_token (parser->lexer)->location;
>  
>/* Handle the C++20 syntax, '. id ='.  */
> @@ -25740,6 +25741,8 @@ cp_parser_initializer_list (cp_parser* parser, bool* 
> non_constant_p,
> if (cp_lexer_next_token_is (parser->lexer, CPP_EQ))
>   /* Consume the `='.  */
>   cp_lexer_consume_token (parser->lexer);
> +   else
> + direct_p = true;
>   }
>/* Also, if the next token is an identifier and the following one is a
>colon, we are looking at the GNU designated-initializer
> @@ -25817,6 +25820,9 @@ cp_parser_initializer_list (cp_parser* parser, bool* 
> non_constant_p,
>if (clause_non_constant_p && non_constant_p)
>   *non_constant_p = true;
>  
> +  if (TREE_CODE (initializer) == CONSTRUCTOR)
> + CONSTRUCTOR_IS_DIRECT_INIT (initializer) |= direct_p;
> +
>/* If we have an ellipsis, this is an initializer pack
>expansion.  */
>if (cp_lexer_next_token_is (parser->lexer, CPP_ELLIPSIS))
> diff --git a/gcc/testsuite/g++.dg/cpp2a/desig30.C 
> b/gcc/testsuite/g++.dg/cpp2a/desig30.C
> new file mode 100644
> index 000..d9292df9de2
> --- /dev/null
> +++ b/gcc/testsuite/g++.dg/cpp2a/desig30.C
> @@ -0,0 +1,22 @@
> +// PR c++/91319
> +// { dg-do compile { target c++20 } }
> +
> +struct X {
> +explicit X() { }
> +};
> +
> +struct Aggr {
> +X x;
> +};
> +
> +Aggr
> +f ()
> +{
> +  return Aggr{.x{}};
> +}
> +
> +Aggr
> +f2 ()
> +{
> +  return Aggr{.x = {}}; // { dg-error "explicit constructor" }
> +}
> 
> base-commit: 54cc21eaf5f3eb7f7a508919a086f6c8bf5c4c17
> -- 
> 2.41.0
> 
>

[PATCH] c++: more dummy non_constant_p arg avoidance

2023-08-25 Thread Patrick Palka via Gcc-patches

Tested on x86_64-pc-linux-gnu, does this look OK for trunk?  This
reduces calls to is_rvalue_constant_expression from
cp_parser_constant_expression by 10% for stdc++.h.

-- >8 --

As a follow-up to Marek's r14-3088-ga263152643bbec, this patch makes
us avoid passing an effectively dummy non_constant_p argument in two
more spots in the parser.

gcc/cp/ChangeLog:

* parser.cc (cp_parser_parenthesized_expression_list_elt): Pass
nullptr as non_constant_p to cp_parser_braced_list if our
non_constant_p is null.
(cp_parser_initializer_list): Likewise to
cp_parser_initializer_clause.
---
 gcc/cp/parser.cc | 11 ---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/gcc/cp/parser.cc b/gcc/cp/parser.cc
index 774706ac607..a8cc91059c1 100644
--- a/gcc/cp/parser.cc
+++ b/gcc/cp/parser.cc
@@ -8099,7 +8099,10 @@ cp_parser_parenthesized_expression_list_elt (cp_parser 
*parser, bool cast_p,
   /* A braced-init-list.  */
   cp_lexer_set_source_position (parser->lexer);
   maybe_warn_cpp0x (CPP0X_INITIALIZER_LISTS);
-  expr = cp_parser_braced_list (parser, _non_constant_p);
+  expr = cp_parser_braced_list (parser,
+   (non_constant_p != nullptr
+   ? _non_constant_p
+   : nullptr));
   if (non_constant_p && expr_non_constant_p)
*non_constant_p = true;
 }
@@ -25812,9 +25815,11 @@ cp_parser_initializer_list (cp_parser* parser, bool* 
non_constant_p,
 
   /* Parse the initializer.  */
   initializer = cp_parser_initializer_clause (parser,
- _non_constant_p);
+ (non_constant_p != nullptr
+  ? _non_constant_p
+  : nullptr));
   /* If any clause is non-constant, so is the entire initializer.  */
-  if (clause_non_constant_p && non_constant_p)
+  if (non_constant_p && clause_non_constant_p)
*non_constant_p = true;
 
   /* If we have an ellipsis, this is an initializer pack
-- 
2.42.0.29.gcd9da15a85

Re: [PATCH] c++: use conversion_obstack_sentinel throughout

2023-08-25 Thread Marek Polacek via Gcc-patches

On Fri, Aug 25, 2023 at 12:33:31PM -0400, Patrick Palka via Gcc-patches wrote:
> Boostrapped and regtested on x86_64-pc-linux-gnu, does this look OK for
> trunk?

Very nice.  LGTM.
 
> -- >8 --
> 
> This replaces manual memory management via conversion_obstack_alloc(0)
> and obstack_free with the recently added conversion_obstack_sentinel,
> and also uses the latter in build_user_type_conversion and
> build_operator_new_call.
> 
> gcc/cp/ChangeLog:
> 
>   * call.cc (build_user_type_conversion): Free allocated
>   conversions.
>   (build_converted_constant_expr_internal): Use
>   conversion_obstack_sentinel instead.
>   (perform_dguide_overload_resolution): Likewise.
>   (build_new_function_call): Likewise.
>   (build_operator_new_call): Free allocated conversions.
>   (build_op_call): Use conversion_obstack_sentinel instead.
>   (build_conditional_expr): Use conversion_obstack_sentinel
>   instead, and hoist it out to the outermost scope.
>   (build_new_op): Use conversion_obstack_sentinel instead
>   and set it up before the first goto.  Remove second unneeded goto.
>   (build_op_subscript): Use conversion_obstack_sentinel instead.
>   (ref_conv_binds_to_temporary): Likewise.
>   (build_new_method_call): Likewise.
>   (can_convert_arg): Likewise.
>   (can_convert_arg_bad): Likewise.
>   (perform_implicit_conversion_flags): Likewise.
>   (perform_direct_initialization_if_possible): Likewise.
>   (initialize_reference): Likewise.
> ---
>  gcc/cp/call.cc | 107 ++---
>  1 file changed, 22 insertions(+), 85 deletions(-)
> 
> diff --git a/gcc/cp/call.cc b/gcc/cp/call.cc
> index 673ec91d60e..432ac99b4bb 100644
> --- a/gcc/cp/call.cc
> +++ b/gcc/cp/call.cc
> @@ -4646,6 +4646,9 @@ build_user_type_conversion (tree totype, tree expr, int 
> flags,
>tree ret;
>  
>auto_cond_timevar tv (TV_OVERLOAD);
> +
> +  conversion_obstack_sentinel cos;
> +
>cand = build_user_type_conversion_1 (totype, expr, flags, complain);
>  
>if (cand)
> @@ -4698,15 +4701,13 @@ build_converted_constant_expr_internal (tree type, 
> tree expr,
>   int flags, tsubst_flags_t complain)
>  {
>conversion *conv;
> -  void *p;
>tree t;
>location_t loc = cp_expr_loc_or_input_loc (expr);
>  
>if (error_operand_p (expr))
>  return error_mark_node;
>  
> -  /* Get the high-water mark for the CONVERSION_OBSTACK.  */
> -  p = conversion_obstack_alloc (0);
> +  conversion_obstack_sentinel cos;
>  
>conv = implicit_conversion (type, TREE_TYPE (expr), expr,
> /*c_cast_p=*/false, flags, complain);
> @@ -4815,9 +4816,6 @@ build_converted_constant_expr_internal (tree type, tree 
> expr,
>expr = error_mark_node;
>  }
>  
> -  /* Free all the conversions we allocated.  */
> -  obstack_free (_obstack, p);
> -
>return expr;
>  }
>  
> @@ -4985,8 +4983,7 @@ perform_dguide_overload_resolution (tree dguides, const 
> vec *args,
>  
>gcc_assert (deduction_guide_p (OVL_FIRST (dguides)));
>  
> -  /* Get the high-water mark for the CONVERSION_OBSTACK.  */
> -  void *p = conversion_obstack_alloc (0);
> +  conversion_obstack_sentinel cos;
>  
>z_candidate *cand = perform_overload_resolution (dguides, args, 
> ,
>  _viable_p, complain);
> @@ -4999,9 +4996,6 @@ perform_dguide_overload_resolution (tree dguides, const 
> vec *args,
>else
>  result = cand->fn;
>  
> -  /* Free all the conversions we allocated.  */
> -  obstack_free (_obstack, p);
> -
>return result;
>  }
>  
> @@ -5015,7 +5009,6 @@ build_new_function_call (tree fn, vec 
> **args,
>  {
>struct z_candidate *candidates, *cand;
>bool any_viable_p;
> -  void *p;
>tree result;
>  
>if (args != NULL && *args != NULL)
> @@ -5028,8 +5021,7 @@ build_new_function_call (tree fn, vec 
> **args,
>if (flag_tm)
>  tm_malloc_replacement (fn);
>  
> -  /* Get the high-water mark for the CONVERSION_OBSTACK.  */
> -  p = conversion_obstack_alloc (0);
> +  conversion_obstack_sentinel cos;
>  
>cand = perform_overload_resolution (fn, *args, , _viable_p,
> complain);
> @@ -5061,9 +5053,6 @@ build_new_function_call (tree fn, vec 
> **args,
> == BUILT_IN_NORMAL)
> result = coro_validate_builtin_call (result);
>  
> -  /* Free all the conversions we allocated.  */
> -  obstack_free (_obstack, p);
> -
>return result;
>  }
>  
> @@ -5108,6 +5097,8 @@ build_operator_new_call (tree fnname, vec 
> **args,
>if (*args == NULL)
>  return error_mark_node;
>  
> +  conversion_obstack_sentinel cos;
> +
>/* Based on:
>  
> [expr.new]
> @@ -5234,7 +5225,6 @@ build_op_call (tree obj, vec **args, 
> tsubst_flags_t complain)
>tree fns, convs, first_mem_arg = NULL_TREE;
>bool any_viable_p;
>tree result = NULL_TREE;
> -  void *p;
>  
>

[PATCH] c++: CWG 2359, wrong copy-init with designated init [PR91319]

2023-08-25 Thread Marek Polacek via Gcc-patches

Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?

-- >8 --

This CWG clarifies that designated initializer support direct-initialization.
Just be careful what Note 2 in [dcl.init.aggr]/4.2 says: "If the
initialization is by designated-initializer-clause, its form determines
whether copy-initialization or direct-initialization is performed."  Hence
this patch sets CONSTRUCTOR_IS_DIRECT_INIT only when we are dealing with
".x{}", but not ".x = {}".

PR c++/91319

gcc/cp/ChangeLog:

* parser.cc (cp_parser_initializer_list): Set CONSTRUCTOR_IS_DIRECT_INIT
when the designated initializer is of the .x{} form.

gcc/testsuite/ChangeLog:

* g++.dg/cpp2a/desig30.C: New test.
---
 gcc/cp/parser.cc |  6 ++
 gcc/testsuite/g++.dg/cpp2a/desig30.C | 22 ++
 2 files changed, 28 insertions(+)
 create mode 100644 gcc/testsuite/g++.dg/cpp2a/desig30.C

diff --git a/gcc/cp/parser.cc b/gcc/cp/parser.cc
index eeb22e44fb4..b3d5c65b469 100644
--- a/gcc/cp/parser.cc
+++ b/gcc/cp/parser.cc
@@ -25718,6 +25718,7 @@ cp_parser_initializer_list (cp_parser* parser, bool* 
non_constant_p,
   tree designator;
   tree initializer;
   bool clause_non_constant_p;
+  bool direct_p = false;
   location_t loc = cp_lexer_peek_token (parser->lexer)->location;
 
   /* Handle the C++20 syntax, '. id ='.  */
@@ -25740,6 +25741,8 @@ cp_parser_initializer_list (cp_parser* parser, bool* 
non_constant_p,
  if (cp_lexer_next_token_is (parser->lexer, CPP_EQ))
/* Consume the `='.  */
cp_lexer_consume_token (parser->lexer);
+ else
+   direct_p = true;
}
   /* Also, if the next token is an identifier and the following one is a
 colon, we are looking at the GNU designated-initializer
@@ -25817,6 +25820,9 @@ cp_parser_initializer_list (cp_parser* parser, bool* 
non_constant_p,
   if (clause_non_constant_p && non_constant_p)
*non_constant_p = true;
 
+  if (TREE_CODE (initializer) == CONSTRUCTOR)
+   CONSTRUCTOR_IS_DIRECT_INIT (initializer) |= direct_p;
+
   /* If we have an ellipsis, this is an initializer pack
 expansion.  */
   if (cp_lexer_next_token_is (parser->lexer, CPP_ELLIPSIS))
diff --git a/gcc/testsuite/g++.dg/cpp2a/desig30.C 
b/gcc/testsuite/g++.dg/cpp2a/desig30.C
new file mode 100644
index 000..d9292df9de2
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp2a/desig30.C
@@ -0,0 +1,22 @@
+// PR c++/91319
+// { dg-do compile { target c++20 } }
+
+struct X {
+explicit X() { }
+};
+
+struct Aggr {
+X x;
+};
+
+Aggr
+f ()
+{
+  return Aggr{.x{}};
+}
+
+Aggr
+f2 ()
+{
+  return Aggr{.x = {}}; // { dg-error "explicit constructor" }
+}

base-commit: 54cc21eaf5f3eb7f7a508919a086f6c8bf5c4c17
-- 
2.41.0

[PATCH] c++: use conversion_obstack_sentinel throughout

2023-08-25 Thread Patrick Palka via Gcc-patches

Boostrapped and regtested on x86_64-pc-linux-gnu, does this look OK for
trunk?

-- >8 --

This replaces manual memory management via conversion_obstack_alloc(0)
and obstack_free with the recently added conversion_obstack_sentinel,
and also uses the latter in build_user_type_conversion and
build_operator_new_call.

gcc/cp/ChangeLog:

* call.cc (build_user_type_conversion): Free allocated
conversions.
(build_converted_constant_expr_internal): Use
conversion_obstack_sentinel instead.
(perform_dguide_overload_resolution): Likewise.
(build_new_function_call): Likewise.
(build_operator_new_call): Free allocated conversions.
(build_op_call): Use conversion_obstack_sentinel instead.
(build_conditional_expr): Use conversion_obstack_sentinel
instead, and hoist it out to the outermost scope.
(build_new_op): Use conversion_obstack_sentinel instead
and set it up before the first goto.  Remove second unneeded goto.
(build_op_subscript): Use conversion_obstack_sentinel instead.
(ref_conv_binds_to_temporary): Likewise.
(build_new_method_call): Likewise.
(can_convert_arg): Likewise.
(can_convert_arg_bad): Likewise.
(perform_implicit_conversion_flags): Likewise.
(perform_direct_initialization_if_possible): Likewise.
(initialize_reference): Likewise.
---
 gcc/cp/call.cc | 107 ++---
 1 file changed, 22 insertions(+), 85 deletions(-)

diff --git a/gcc/cp/call.cc b/gcc/cp/call.cc
index 673ec91d60e..432ac99b4bb 100644
--- a/gcc/cp/call.cc
+++ b/gcc/cp/call.cc
@@ -4646,6 +4646,9 @@ build_user_type_conversion (tree totype, tree expr, int 
flags,
   tree ret;
 
   auto_cond_timevar tv (TV_OVERLOAD);
+
+  conversion_obstack_sentinel cos;
+
   cand = build_user_type_conversion_1 (totype, expr, flags, complain);
 
   if (cand)
@@ -4698,15 +4701,13 @@ build_converted_constant_expr_internal (tree type, tree 
expr,
int flags, tsubst_flags_t complain)
 {
   conversion *conv;
-  void *p;
   tree t;
   location_t loc = cp_expr_loc_or_input_loc (expr);
 
   if (error_operand_p (expr))
 return error_mark_node;
 
-  /* Get the high-water mark for the CONVERSION_OBSTACK.  */
-  p = conversion_obstack_alloc (0);
+  conversion_obstack_sentinel cos;
 
   conv = implicit_conversion (type, TREE_TYPE (expr), expr,
  /*c_cast_p=*/false, flags, complain);
@@ -4815,9 +4816,6 @@ build_converted_constant_expr_internal (tree type, tree 
expr,
   expr = error_mark_node;
 }
 
-  /* Free all the conversions we allocated.  */
-  obstack_free (_obstack, p);
-
   return expr;
 }
 
@@ -4985,8 +4983,7 @@ perform_dguide_overload_resolution (tree dguides, const 
vec *args,
 
   gcc_assert (deduction_guide_p (OVL_FIRST (dguides)));
 
-  /* Get the high-water mark for the CONVERSION_OBSTACK.  */
-  void *p = conversion_obstack_alloc (0);
+  conversion_obstack_sentinel cos;
 
   z_candidate *cand = perform_overload_resolution (dguides, args, ,
   _viable_p, complain);
@@ -4999,9 +4996,6 @@ perform_dguide_overload_resolution (tree dguides, const 
vec *args,
   else
 result = cand->fn;
 
-  /* Free all the conversions we allocated.  */
-  obstack_free (_obstack, p);
-
   return result;
 }
 
@@ -5015,7 +5009,6 @@ build_new_function_call (tree fn, vec **args,
 {
   struct z_candidate *candidates, *cand;
   bool any_viable_p;
-  void *p;
   tree result;
 
   if (args != NULL && *args != NULL)
@@ -5028,8 +5021,7 @@ build_new_function_call (tree fn, vec **args,
   if (flag_tm)
 tm_malloc_replacement (fn);
 
-  /* Get the high-water mark for the CONVERSION_OBSTACK.  */
-  p = conversion_obstack_alloc (0);
+  conversion_obstack_sentinel cos;
 
   cand = perform_overload_resolution (fn, *args, , _viable_p,
  complain);
@@ -5061,9 +5053,6 @@ build_new_function_call (tree fn, vec **args,
  == BUILT_IN_NORMAL)
result = coro_validate_builtin_call (result);
 
-  /* Free all the conversions we allocated.  */
-  obstack_free (_obstack, p);
-
   return result;
 }
 
@@ -5108,6 +5097,8 @@ build_operator_new_call (tree fnname, vec 
**args,
   if (*args == NULL)
 return error_mark_node;
 
+  conversion_obstack_sentinel cos;
+
   /* Based on:
 
[expr.new]
@@ -5234,7 +5225,6 @@ build_op_call (tree obj, vec **args, 
tsubst_flags_t complain)
   tree fns, convs, first_mem_arg = NULL_TREE;
   bool any_viable_p;
   tree result = NULL_TREE;
-  void *p;
 
   auto_cond_timevar tv (TV_OVERLOAD);
 
@@ -5273,8 +5263,7 @@ build_op_call (tree obj, vec **args, 
tsubst_flags_t complain)
return error_mark_node;
 }
 
-  /* Get the high-water mark for the CONVERSION_OBSTACK.  */
-  p = conversion_obstack_alloc (0);
+  conversion_obstack_sentinel cos;
 
   if (fns)
 {
@@ -5377,9 +5366,6 @@

Re: Darwin: Replace environment runpath with embedded [PR88590]

2023-08-25 Thread Joseph Myers

On Fri, 25 Aug 2023, FX Coudert via Gcc-patches wrote:

> Hi,
> 
> Thanks Joseph for the review.
> 
> > The driver changes are OK.
> > 
> > I think the new configure options and the new -nodefaultrpaths compiler 
> > option need documenting
> 
> Doc patch was added, and okay’ed by Iain.

I see documentation for -nodefaultrpaths; not for configure options, and 
at least one of the configure options looks like it's being defined as a 
GCC-specific configure option rather than a libtool one, so should 
definitely be documented in install.texi.

> Attached is the final patchset. Joseph, is your previous OK good for 
> pushing, or does it need further review?

The driver changes are still OK.

-- 
Joseph S. Myers
jos...@codesourcery.com

[PATCH] MATCH: Move `(X & ~Y) | (~X & Y)` over to use bitwise_inverted_equal_p

2023-08-25 Thread Andrew Pinski via Gcc-patches

This moves the pattern `(X & ~Y) | (~X & Y)` to use bitwise_inverted_equal_p
so we can simplify earlier the case where X and Y are defined by comparisons.
We were able to optimize to (!X)^(!Y) in the end due to the pattern added in
r14-3110-g7fb65f102851248bafa0815 and the older pattern r13-4620-g4d9db4bdd458 .
But folding it earlier is better.

OK? Bootstrapped and tested on x86_64-linux-gnu with no regressions.

Note pr87009.c now gets `return x ^ s; in one case where the test had been 
expecting
`return s ^ x;` both are valid and would be expectly the same; just we now 
chose a slightly
different order of simplification which causes the order of the operands to be 
different.

gcc/ChangeLog:

* match.pd (`(X & ~Y) | (~X & Y)`): Use bitwise_inverted_equal_p
instead of specifically checking for ~X.

gcc/testsuite/ChangeLog:

* gcc.dg/tree-ssa/cmpbit-3.c: New test.
* gcc.dg/pr87009.c: Update test.
---
 gcc/match.pd | 13 +-
 gcc/testsuite/gcc.dg/pr87009.c   |  2 +-
 gcc/testsuite/gcc.dg/tree-ssa/cmpbit-3.c | 33 
 3 files changed, 41 insertions(+), 7 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/cmpbit-3.c

diff --git a/gcc/match.pd b/gcc/match.pd
index 70884bd48eb..e41403664d0 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -1228,12 +1228,13 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 /* Simplify (X & ~Y) |^+ (~X & Y) -> X ^ Y.  */
 (for op (bit_ior bit_xor plus)
  (simplify
-  (op (bit_and:c @0 (bit_not @1)) (bit_and:c (bit_not @0) @1))
-   (bit_xor @0 @1))
- (simplify
-  (op:c (bit_and @0 INTEGER_CST@2) (bit_and (bit_not @0) INTEGER_CST@1))
-  (if (~wi::to_wide (@2) == wi::to_wide (@1))
-   (bit_xor @0 @1
+  (op (bit_and:c @0 @2) (bit_and:c @3 @1))
+  (with { bool wascmp0, wascmp1; }
+   (if (bitwise_inverted_equal_p (@2, @1, wascmp0)
+&& bitwise_inverted_equal_p (@0, @3, wascmp1)
+   && ((!wascmp0 && !wascmp1)
+   || element_precision (type) == 1))
+   (bit_xor @0 @1)
 
 /* PR53979: Transform ((a ^ b) | a) -> (a | b) */
 (simplify
diff --git a/gcc/testsuite/gcc.dg/pr87009.c b/gcc/testsuite/gcc.dg/pr87009.c
index eb8a4ecd920..6f0341d17cc 100644
--- a/gcc/testsuite/gcc.dg/pr87009.c
+++ b/gcc/testsuite/gcc.dg/pr87009.c
@@ -1,6 +1,6 @@
 /* { dg-do compile } */
 /* { dg-options "-O -fdump-tree-original" } */
-/* { dg-final { scan-tree-dump-times "return s \\^ x;" 4 "original" } } */
+/* { dg-final { scan-tree-dump-times "return s \\^ x;|return x \\^ s;" 4 
"original" } } */
 
 int f1 (int x, int s)
 {
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/cmpbit-3.c 
b/gcc/testsuite/gcc.dg/tree-ssa/cmpbit-3.c
new file mode 100644
index 000..936c0934a10
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/cmpbit-3.c
@@ -0,0 +1,33 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized-raw -fdump-tree-dse1-raw 
-fdump-tree-forwprop1" } */
+
+_Bool f(int a, int b)
+{
+  _Bool X = a==1, Y = b == 2;
+return (X & !Y) | (!X & Y);
+}
+
+
+_Bool f1(int a, int b)
+{
+  _Bool X = a==1, Y = b == 2;
+  _Bool c = (X & !Y);
+  _Bool d = (!X & Y);
+  return c | d;
+}
+
+/* Both of these should be optimized to (a==1) ^ (b==2) or (a != 1) ^ (b != 2) 
*/
+/* { dg-final { scan-tree-dump-not "gimple_cond " "optimized" } } */
+/* { dg-final { scan-tree-dump-not "gimple_phi " "optimized" } } */
+/* { dg-final { scan-tree-dump-times "ne_expr|eq_expr, " 4 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "bit_xor_expr, " 2 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "gimple_assign " 6 "optimized" } } */
+
+/* Both of these should be optimized early in the pipeline after forwprop1 */
+/* { dg-final { scan-tree-dump-times "ne_expr|eq_expr, " 4 "forwprop1" { xfail 
*-*-* } } } */
+/* { dg-final { scan-tree-dump-times "bit_xor_expr, " 2 "forwprop1" { xfail 
*-*-* } } } */
+/* { dg-final { scan-tree-dump-times "gimple_assign " 6 "forwprop1" { xfail 
*-*-* } } } */
+/* Note forwprop1 does not remove all unused statements sometimes so test dse1 
also. */
+/* { dg-final { scan-tree-dump-times "ne_expr|eq_expr, " 4 "dse1" } } */
+/* { dg-final { scan-tree-dump-times "bit_xor_expr, " 2 "dse1" } } */
+/* { dg-final { scan-tree-dump-times "gimple_assign " 6 "dse1" } } */
-- 
2.31.1

[V3][PATCH 3/3] Use the counted_by attribute information in bound sanitizer[PR108896]

2023-08-25 Thread Qing Zhao via Gcc-patches

Use the counted_by attribute information in bound sanitizer.

gcc/c-family/ChangeLog:

PR C/108896
* c-ubsan.cc (ubsan_instrument_bounds): Use counted_by attribute
information.

gcc/testsuite/ChangeLog:

PR C/108896
* gcc.dg/ubsan/flex-array-counted-by-bounds.c: New test.
* gcc.dg/ubsan/flex-array-counted-by-bounds-2.c: New test.
---
 gcc/c-family/c-ubsan.cc   | 16 +++
 .../ubsan/flex-array-counted-by-bounds-2.c| 27 +++
 .../ubsan/flex-array-counted-by-bounds.c  | 46 +++
 3 files changed, 89 insertions(+)
 create mode 100644 gcc/testsuite/gcc.dg/ubsan/flex-array-counted-by-bounds-2.c
 create mode 100644 gcc/testsuite/gcc.dg/ubsan/flex-array-counted-by-bounds.c

diff --git a/gcc/c-family/c-ubsan.cc b/gcc/c-family/c-ubsan.cc
index 51aa83a378d2..a99e8433069f 100644
--- a/gcc/c-family/c-ubsan.cc
+++ b/gcc/c-family/c-ubsan.cc
@@ -362,6 +362,10 @@ ubsan_instrument_bounds (location_t loc, tree array, tree 
*index,
 {
   tree type = TREE_TYPE (array);
   tree domain = TYPE_DOMAIN (type);
+  /* whether the array ref is a flexible array member with valid counted_by
+ attribute.  */
+  bool fam_has_count_attr = false;
+  tree counted_by = NULL_TREE;
 
   if (domain == NULL_TREE)
 return NULL_TREE;
@@ -375,6 +379,17 @@ ubsan_instrument_bounds (location_t loc, tree array, tree 
*index,
  && COMPLETE_TYPE_P (type)
  && integer_zerop (TYPE_SIZE (type)))
bound = build_int_cst (TREE_TYPE (TYPE_MIN_VALUE (domain)), -1);
+  /* If the array ref is to flexible array member field which has
+counted_by attribute.  We can use the information from the
+attribute as the bound to instrument the reference.  */
+  else if ((counted_by = component_ref_get_counted_by (array))
+   != NULL_TREE)
+   {
+ fam_has_count_attr = true;
+ bound = fold_build2 (MINUS_EXPR, TREE_TYPE (counted_by),
+  counted_by,
+  build_int_cst (TREE_TYPE (counted_by), 1));
+   }
   else
return NULL_TREE;
 }
@@ -387,6 +402,7 @@ ubsan_instrument_bounds (location_t loc, tree array, tree 
*index,
  -fsanitize=bounds-strict.  */
   tree base = get_base_address (array);
   if (!sanitize_flags_p (SANITIZE_BOUNDS_STRICT)
+  && !fam_has_count_attr
   && TREE_CODE (array) == COMPONENT_REF
   && base && (INDIRECT_REF_P (base) || TREE_CODE (base) == MEM_REF))
 {
diff --git a/gcc/testsuite/gcc.dg/ubsan/flex-array-counted-by-bounds-2.c 
b/gcc/testsuite/gcc.dg/ubsan/flex-array-counted-by-bounds-2.c
new file mode 100644
index ..77ec333509d0
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/ubsan/flex-array-counted-by-bounds-2.c
@@ -0,0 +1,27 @@
+/* test the attribute counted_by and its usage in
+   bounds sanitizer combined with VLA.  */
+/* { dg-do run } */
+/* { dg-options "-fsanitize=bounds" } */
+
+#include 
+
+void __attribute__((__noinline__)) setup_and_test_vla (int n, int m)
+{
+   struct foo {
+   int n;
+   int p[][n] __attribute__((counted_by(n)));
+   } *f;
+
+   f = (struct foo *) malloc (sizeof(struct foo) + m*sizeof(int[n]));
+   f->n = m;
+   f->p[m][n-1]=1;
+   return;
+}
+
+int main(int argc, char *argv[])
+{
+  setup_and_test_vla (10, 11);
+  return 0;
+}
+
+/* { dg-output "17:8: runtime error: index 11 out of bounds for type" } */
diff --git a/gcc/testsuite/gcc.dg/ubsan/flex-array-counted-by-bounds.c 
b/gcc/testsuite/gcc.dg/ubsan/flex-array-counted-by-bounds.c
new file mode 100644
index ..81eaeb3f2681
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/ubsan/flex-array-counted-by-bounds.c
@@ -0,0 +1,46 @@
+/* test the attribute counted_by and its usage in
+   bounds sanitizer.  */
+/* { dg-do run } */
+/* { dg-options "-fsanitize=bounds" } */
+
+#include 
+
+struct flex {
+  int b;
+  int c[];
+} *array_flex;
+
+struct annotated {
+  int b;
+  int c[] __attribute__ ((counted_by (b)));
+} *array_annotated;
+
+void __attribute__((__noinline__)) setup (int normal_count, int 
annotated_count)
+{
+  array_flex
+= (struct flex *)malloc (sizeof (struct flex)
++ normal_count *  sizeof (int));
+  array_flex->b = normal_count;
+
+  array_annotated
+= (struct annotated *)malloc (sizeof (struct annotated)
+ + annotated_count *  sizeof (int));
+  array_annotated->b = annotated_count;
+
+  return;
+}
+
+void __attribute__((__noinline__)) test (int normal_index, int annotated_index)
+{
+  array_flex->c[normal_index] = 1;
+  array_annotated->c[annotated_index] = 2;
+}
+
+int main(int argc, char *argv[])
+{
+  setup (10, 10);   
+  test (10, 10);
+  return 0;
+}
+
+/* { dg-output "36:21: runtime error: index 10 out of bounds for type" } */
-- 
2.31.1

[V3][PATCH 2/3] Use the counted_by atribute info in builtin object size [PR108896]

2023-08-25 Thread Qing Zhao via Gcc-patches

Use the counted_by atribute info in builtin object size to compute the
subobject size for flexible array members.

gcc/ChangeLog:

PR C/108896
* tree-object-size.cc (addr_object_size): Use the counted_by
attribute info.
* tree.cc (component_ref_has_counted_by_p): New function.
(component_ref_get_counted_by): New function.
* tree.h (component_ref_has_counted_by_p): New prototype.
(component_ref_get_counted_by): New prototype.

gcc/testsuite/ChangeLog:

PR C/108896
* gcc.dg/flex-array-counted-by-2.c: New test.
* gcc.dg/flex-array-counted-by-3.c: New test.
---
 .../gcc.dg/flex-array-counted-by-2.c  |  74 ++
 .../gcc.dg/flex-array-counted-by-3.c  | 210 ++
 gcc/tree-object-size.cc   |  37 ++-
 gcc/tree.cc   |  95 +++-
 gcc/tree.h|  10 +
 5 files changed, 418 insertions(+), 8 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/flex-array-counted-by-2.c
 create mode 100644 gcc/testsuite/gcc.dg/flex-array-counted-by-3.c

diff --git a/gcc/testsuite/gcc.dg/flex-array-counted-by-2.c 
b/gcc/testsuite/gcc.dg/flex-array-counted-by-2.c
new file mode 100644
index ..ec580c1f1f01
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/flex-array-counted-by-2.c
@@ -0,0 +1,74 @@
+/* test the attribute counted_by and its usage in
+ * __builtin_dynamic_object_size.  */ 
+/* { dg-do run } */
+/* { dg-options "-O2" } */
+
+#include "builtin-object-size-common.h"
+
+#define expect(p, _v) do { \
+size_t v = _v; \
+if (p == v) \
+   __builtin_printf ("ok:  %s == %zd\n", #p, p); \
+else \
+   {  \
+ __builtin_printf ("WAT: %s == %zd (expected %zd)\n", #p, p, v); \
+ FAIL (); \
+   } \
+} while (0);
+
+struct flex {
+  int b;
+  int c[];
+} *array_flex;
+
+struct annotated {
+  int b;
+  int c[] __attribute__ ((counted_by (b)));
+} *array_annotated;
+
+struct nested_annotated {
+  struct {
+union {
+  int b;
+  float f; 
+};
+int n;
+  };
+  int c[] __attribute__ ((counted_by (b)));
+} *array_nested_annotated;
+
+void __attribute__((__noinline__)) setup (int normal_count, int attr_count)
+{
+  array_flex
+= (struct flex *)malloc (sizeof (struct flex)
++ normal_count *  sizeof (int));
+  array_flex->b = normal_count;
+
+  array_annotated
+= (struct annotated *)malloc (sizeof (struct annotated)
+ + attr_count *  sizeof (int));
+  array_annotated->b = attr_count;
+
+  array_nested_annotated
+= (struct nested_annotated *)malloc (sizeof (struct nested_annotated)
++ attr_count *  sizeof (int));
+  array_nested_annotated->b = attr_count;
+
+  return;
+}
+
+void __attribute__((__noinline__)) test ()
+{
+expect(__builtin_dynamic_object_size(array_flex->c, 1), -1);
+expect(__builtin_dynamic_object_size(array_annotated->c, 1),
+  array_annotated->b * sizeof (int));
+expect(__builtin_dynamic_object_size(array_nested_annotated->c, 1),
+  array_nested_annotated->b * sizeof (int));
+}
+
+int main(int argc, char *argv[])
+{
+  setup (10,10);   
+  test ();
+  DONE ();
+}
diff --git a/gcc/testsuite/gcc.dg/flex-array-counted-by-3.c 
b/gcc/testsuite/gcc.dg/flex-array-counted-by-3.c
new file mode 100644
index ..a0c3cb88ec71
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/flex-array-counted-by-3.c
@@ -0,0 +1,210 @@
+/* test the attribute counted_by and its usage in
+__builtin_dynamic_object_size: what's the correct behavior when the
+allocation size mismatched with the value of counted_by attribute?  */
+/* { dg-do run } */
+/* { dg-options "-O -fstrict-flex-arrays=3" } */
+
+#include "builtin-object-size-common.h"
+
+struct annotated {
+  size_t foo;
+  char others;
+  char array[] __attribute__((counted_by (foo)));
+};
+
+#define expect(p, _v) do { \
+size_t v = _v; \
+if (p == v) \
+__builtin_printf ("ok:  %s == %zd\n", #p, p); \
+else \
+{  \
+  __builtin_printf ("WAT: %s == %zd (expected %zd)\n", #p, p, v); \
+ FAIL (); \
+} \
+} while (0);
+
+#define noinline __attribute__((__noinline__))
+#define SIZE_BUMP 10 
+#define MAX(a, b) ((a) > (b) ? (a) : (b))
+#define MIN(a, b) ((a) < (b) ? (a) : (b))
+
+/* In general, Due to type casting, the type for the pointee of a pointer
+   does not say anything about the object it points to,
+   So, __builtin_object_size can not directly use the type of the pointee
+   to decide the size of the object the pointer points to.
+
+   there are only two reliable ways:
+   A. observed allocations  (call to the allocation functions in the routine)
+   B. observed accesses (read or write access to the location of the
+ pointer points to)
+
+   that provide information about the type/existence of an object at
+   the corresponding

[V3][PATCH 0/3] New attribute "counted_by" to annotate bounds for C99 FAM(PR108896)

2023-08-25 Thread Qing Zhao via Gcc-patches

This is the 3rd version of the patch, per our discussion based on the
review comments for the 1st and 2nd version, the major changes in this
version are:

***Against 1st version:
1. change the name "element_count" to "counted_by";
2. change the parameter for the attribute from a STRING to an
Identifier;
3. Add logic and testing cases to handle anonymous structure/unions;
4. Clarify documentation to permit the situation when the allocation
size is larger than what's specified by "counted_by", at the same time,
it's user's error if allocation size is smaller than what's specified by
"counted_by";
5. Add a complete testing case for using counted_by attribute in
__builtin_dynamic_object_size when there is mismatch between the
allocation size and the value of "counted_by", the expecting behavior
for each case and the explanation on why in the comments. 

***Against 2rd version:
1. Identify a tree node sharing issue and fixed it in the routine
   "component_ref_get_counted_ty" of tree.cc;
2. Update the documentation and testing cases with the clear usage
   of the fomula to compute the allocation size:
MAX (sizeof (struct A), offsetof (struct A, array[0]) + counted_by * 
sizeof(element))
   (the algorithm used in tree-object-size.cc is correct).

In this set of patches, the major functionality provided is:

1. a new attribute "counted_by";
2. use this new attribute in bound sanitizer;
3. use this new attribute in dynamic object size for subobject size;

As discussed, I plan to add two more separate patches sets after this initial
patch set is approved and committed.

set 1. A new warning option and a new sanitizer option for the user error
  when the allocation size is smaller than the value of "counted_by".
set 2. An improvement to __builtin_dynamic_object_size  for whole-object
  size of the structure with FAM annaoted with counted_by. 

there are also some existing bugs in tree-object-size.cc identified
during the study, and PRs were filed to record them. these bugs will 
be fixed seperately with individual patches:

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111030
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111040

Bootstrapped and regression tested on both aarch64 and X86, no issue.

Please see more details on the description of this work on:

https://gcc.gnu.org/pipermail/gcc-patches/2023-May/619708.html

and more discussions on
https://gcc.gnu.org/pipermail/gcc-patches/2023-August/626376.html

Okay for committing?

thanks.

Qing

Qing Zhao (3):
  Provide counted_by attribute to flexible array member field (PR108896)
  Use the counted_by atribute info in builtin object size [PR108896]
  Use the counted_by attribute information in bound sanitizer[PR108896]

 gcc/c-family/c-attribs.cc |  54 -
 gcc/c-family/c-common.cc  |  13 ++
 gcc/c-family/c-common.h   |   1 +
 gcc/c-family/c-ubsan.cc   |  16 ++
 gcc/c/c-decl.cc   |  79 +--
 gcc/doc/extend.texi   |  77 +++
 .../gcc.dg/flex-array-counted-by-2.c  |  74 ++
 .../gcc.dg/flex-array-counted-by-3.c  | 210 ++
 gcc/testsuite/gcc.dg/flex-array-counted-by.c  |  40 
 .../ubsan/flex-array-counted-by-bounds-2.c|  27 +++
 .../ubsan/flex-array-counted-by-bounds.c  |  46 
 gcc/tree-object-size.cc   |  37 ++-
 gcc/tree.cc   | 133 +++
 gcc/tree.h|  15 ++
 14 files changed, 797 insertions(+), 25 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/flex-array-counted-by-2.c
 create mode 100644 gcc/testsuite/gcc.dg/flex-array-counted-by-3.c
 create mode 100644 gcc/testsuite/gcc.dg/flex-array-counted-by.c
 create mode 100644 gcc/testsuite/gcc.dg/ubsan/flex-array-counted-by-bounds-2.c
 create mode 100644 gcc/testsuite/gcc.dg/ubsan/flex-array-counted-by-bounds.c

-- 
2.31.1

[V3][PATCH 1/3] Provide counted_by attribute to flexible array member field (PR108896)

2023-08-25 Thread Qing Zhao via Gcc-patches

Provide a new counted_by attribute to flexible array member field.

'counted_by (COUNT)'
 The 'counted_by' attribute may be attached to the flexible array
 member of a structure.  It indicates that the number of the
 elements of the array is given by the field named "COUNT" in the
 same structure as the flexible array member.  GCC uses this
 information to improve the results of the array bound sanitizer and
 the '__builtin_dynamic_object_size'.

 For instance, the following code:

  struct P {
size_t count;
char other;
char array[] __attribute__ ((counted_by (count)));
  } *p;

 specifies that the 'array' is a flexible array member whose number
 of elements is given by the field 'count' in the same structure.

 The field that represents the number of the elements should have an
 integer type.  An explicit 'counted_by' annotation defines a
 relationship between two objects, 'p->array' and 'p->count', that
 'p->array' has _at least_ 'p->count' number of elements available.
 This relationship must hold even after any of these related objects
 are updated.  It's the user's responsibility to make sure this
 relationship to be kept all the time.  Otherwise the results of the
 array bound sanitizer and the '__builtin_dynamic_object_size' might
 be incorrect.

 For instance, in the following example, the allocated array has
 less elements than what's specified by the 'sbuf->count', this is
 an user error.  As a result, out-of-bounds access to the array
 might not be detected.

  #define SIZE_BUMP 10
  struct P *sbuf;
  void alloc_buf (size_t nelems)
  {
sbuf = (struct P *) malloc (MAX (sizeof (struct P),
   (offsetof (struct P, array[0])
+ nelems * sizeof (char;
sbuf->count = nelems + SIZE_BUMP;
/* This is invalid when the sbuf->array has less than sbuf->count
   elements.  */
  }

 In the following example, the 2nd update to the field 'sbuf->count'
 of the above structure will permit out-of-bounds access to the
 array 'sbuf>array' as well.

  #define SIZE_BUMP 10
  struct P *sbuf;
  void alloc_buf (size_t nelems)
  {
sbuf = (struct P *) malloc (MAX (sizeof (struct P),
   (offsetof (struct P, array[0])
+ (nelems + SIZE_BUMP) * sizeof 
(char;
sbuf->count = nelems;
/* This is valid when the sbuf->array has at least sbuf->count
   elements.  */
  }
  void use_buf (int index)
  {
sbuf->count = sbuf->count + SIZE_BUMP + 1;
/* Now the value of sbuf->count is larger than the number
   of elements of sbuf->array.  */
sbuf->array[index] = 0;
/* then the out-of-bound access to this array
   might not be detected.  */
  }

gcc/c-family/ChangeLog:

PR C/108896
* c-attribs.cc (handle_counted_by_attribute): New function.
(attribute_takes_identifier_p): Add counted_by attribute to the list.
* c-common.cc (c_flexible_array_member_type_p): ...To this.
* c-common.h (c_flexible_array_member_type_p): New prototype.

gcc/c/ChangeLog:

PR C/108896
* c-decl.cc (flexible_array_member_type_p): Renamed and moved to...
(add_flexible_array_elts_to_size): Use renamed function.
(is_flexible_array_member_p): Use renamed function.
(verify_counted_by_attribute): New function.
(finish_struct): Use renamed function and verify counted_by
attribute.

gcc/ChangeLog:

PR C/108896
* doc/extend.texi: Document attribute counted_by.
* tree.cc (get_named_field): New function.
* tree.h (get_named_field): New prototype.

gcc/testsuite/ChangeLog:

PR C/108896
* gcc.dg/flex-array-counted-by.c: New test.
---
 gcc/c-family/c-attribs.cc| 54 -
 gcc/c-family/c-common.cc | 13 
 gcc/c-family/c-common.h  |  1 +
 gcc/c/c-decl.cc  | 79 +++-
 gcc/doc/extend.texi  | 77 +++
 gcc/testsuite/gcc.dg/flex-array-counted-by.c | 40 ++
 gcc/tree.cc  | 40 ++
 gcc/tree.h   |  5 ++
 8 files changed, 291 insertions(+), 18 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/flex-array-counted-by.c

diff --git a/gcc/c-family/c-attribs.cc b/gcc/c-family/c-attribs.cc
index e2792ca6898b..65e4f6639109 100644
--- a/gcc/c-family/c-attribs.cc
+++ b/gcc/c-family/c-attribs.cc
@@ -103,6 +103,8 @@ static tree

Re: [RFC] -folding dumping for fold-const.cc

2023-08-25 Thread Patrick Palka via Gcc-patches

On Fri, 25 Aug 2023, Richard Biener via Gcc-patches wrote:

> 
> The following adds the capability to have fold-const.cc matched
> patterns visible in -folding dumps.  There's two choices,
> a portable one replacing return stmts like
> 
> -   return fold_build1 (tcode, ctype, fold_convert (ctype, t1));
> +   DRET (fold_build1 (tcode, ctype, fold_convert (ctype, t1)));
> 
> (carefully keeping total line length the same)
> 
> Or less portably by wrapping the return value:
> 
> -   return fold_build1 (tcode, ctype, fold_convert (ctype, t1));
> +   return DUMP_FOLD (fold_build1 (tcode, ctype, fold_convert (ctype, 
> t1)));
> 
> (requiring re-indenting)
> 
> +/* Similar to match.pd dumping support notes as part of -folding dumping
> +   by wrapping return values in DUMP_FOLD (...).  */
> +#if __GNUC__
> +#define DUMP_FOLD(X) (__extension__ ({ \
> +  auto x = (X); \
> +  if (x && dump_file && (dump_flags & TDF_FOLDING)) \
> +fprintf (dump_file, "Applying fold-const.c:%d\n", __LINE__); \
> +  x; \
> +}))  

Would using an ordinary function here work?

  static tree
  dump_fold (tree x, int line)
  {
if (...)
  fprintf (dump_file, "Applying fold-const.c:%d\n", line);
return x;
  }

  #define DUMP_FOLD(X) dump_fold ((X), __LINE__)

> +#else 
> +#define DUMP_FOLD(X) (X)  
> +#endif 
> 
> vs.
> 
> +/* Similar to match.pd dumping support notes as part of -folding dumping
> +   by changing return statements to DRET (...).  */
> +#define DRET(X) do { \
> +  auto x = (X); \
> +  if (x && dump_file && (dump_flags & TDF_FOLDING)) \
> +fprintf (dump_file, "Applying fold-const.c:%d\n", __LINE__); \
> +  return x; \
> +} while (0)
> 
> I personally prefer keeping 'return' visible and thus going the
> non-portable way.  Any C++ folks know how to avoid re-evaluating
> X portably in expression context?
> 
> Any other comments?
> 
> Thanks,
> Richard.
> 
>

[RFC] -folding dumping for fold-const.cc

2023-08-25 Thread Richard Biener via Gcc-patches



The following adds the capability to have fold-const.cc matched
patterns visible in -folding dumps.  There's two choices,
a portable one replacing return stmts like

-   return fold_build1 (tcode, ctype, fold_convert (ctype, t1));
+   DRET (fold_build1 (tcode, ctype, fold_convert (ctype, t1)));

(carefully keeping total line length the same)

Or less portably by wrapping the return value:

-   return fold_build1 (tcode, ctype, fold_convert (ctype, t1));
+   return DUMP_FOLD (fold_build1 (tcode, ctype, fold_convert (ctype, 
t1)));

(requiring re-indenting)

+/* Similar to match.pd dumping support notes as part of -folding dumping
+   by wrapping return values in DUMP_FOLD (...).  */
+#if __GNUC__
+#define DUMP_FOLD(X) (__extension__ ({ \
+  auto x = (X); \
+  if (x && dump_file && (dump_flags & TDF_FOLDING)) \
+fprintf (dump_file, "Applying fold-const.c:%d\n", __LINE__); \
+  x; \
+}))  
+#else 
+#define DUMP_FOLD(X) (X)  
+#endif 

vs.

+/* Similar to match.pd dumping support notes as part of -folding dumping
+   by changing return statements to DRET (...).  */
+#define DRET(X) do { \
+  auto x = (X); \
+  if (x && dump_file && (dump_flags & TDF_FOLDING)) \
+fprintf (dump_file, "Applying fold-const.c:%d\n", __LINE__); \
+  return x; \
+} while (0)

I personally prefer keeping 'return' visible and thus going the
non-portable way.  Any C++ folks know how to avoid re-evaluating
X portably in expression context?

Any other comments?

Thanks,
Richard.

Re: [PATCH] analyzer: Move gcc.dg/analyzer tests to c-c++-common (1).

2023-08-25 Thread Benjamin Priour via Gcc-patches

Hi David,

Thanks for the review.

On Fri, Aug 25, 2023 at 2:12 AM David Malcolm  wrote:

> > From: benjamin priour 
> >
> > Hi,
> >
> > Below the first batch of a serie of patches to transition
> > the analyzer testsuite from gcc.dg/analyzer to c-c++-common/analyzer.
> > I do not know how long this serie will be, thus the patch was not
> > numbered.
> >
> > For the grand majority of the tests, the transition only required some
> > adjustement over the syntax and casts to be C++-friendly, or to adjust
> > the warnings regexes to fit the C++ FE.
> >
> > The most noteworthy change is in the handling of known_functions,
> > as described in the below patch.
>
> Hi Benjamin.
>
> Many thanks for putting this together, it looks like it was a lot of
> work.
>
> > Successfully regstrapped on x86_64-linux-gnu off trunk
> > 18befd6f050e70f11ecca1dd58624f0ee3c68cc7.
>
> Did you compare the before/after results from DejaGnu somehow?
>
> Note that I've pushed 9 patches to the analyzer since
> 18befd6f050e70f11ecca1dd58624f0ee3c68cc7 and some of those touch the
> files below, so it's worth rebasing and double-checking the results.
>
>
Thanks for the info, I've rebased off it and I'm regstrapping.
For tests I didn't touch the warnings, I have checked they still pass in C
and C++.
Except for pr96841.c, C tests most notable update was to add a { target c }.
Every previous PASS and XFAIL remained as such in gcc.dg output.
For C++ I went  with a no failure policy. Some tests weren't making sense
in C++,
such as transparent_unions. I've just skipped those.
For some tests C++ fpermissive would throw out an error. These tests I've
tried
to smuggle them out with const_cast and the likes, but never disabled
fpermissive.

> Please add
> PR analyzer/96395
> to the ChangeLog entries, and [PR96395] to the end of the Subject of
> the commit, so that these get tracked within that bug as they get
> pushed.
>
>
Nods.

[...snip...]

> I confess I'm still a little hazy as to the whole builtin_kf logic, but
> I trust you that this is needed.
>
> Please can you add a paragraph to this comment to explain the
> motivation here (perhaps giving examples?)
>
> > +
> > +const builtin_known_function *
> > +region_model::get_builtin_kf (const gcall *call,
> > +region_model_context *ctxt /* = NULL */)
> const
> > +{
> > +  region_model *mut_this = const_cast  (this);
> > +  tree callee_fndecl = mut_this->get_fndecl_for_call (call, ctxt);
> > +  if (! callee_fndecl)
> > +return NULL;
> > +
> > +  call_details cd (call, mut_this, ctxt);
> > +  if (const known_function *kf = get_known_function (callee_fndecl, cd))
> > +return kf->dyn_cast_builtin_kf ();
> > +
> > +  return NULL;
> > +}
> > +
>
>
The new comment is as follow:

/* Get any builtin_known_function for CALL and emit any warning to CTXT
   if not NULL.

   The call must match all assumptions made by the known_function (such as
   e.g. "argument 1's type must be a pointer type").

   Return NULL if no builtin_known_function is found, or it does
   not match the assumption(s).

   Internally calls get_known_function to find a known_function and cast it
   to a builtin_known_function.

   For instance, calloc is a C builtin, defined in gcc/builtins.def
   by the DEF_LIB_BUILTIN macro. Such builtins are recognized by the
   analyzer by their name, so that even in C++ or if the user redeclares
   them but mismatch their signature, they are still recognized as builtins.

   Cases when a supposed builtin is not flagged as one by the FE:

The C++ FE does not recognize calloc as a builtin if it has not been
included from a standard header, but the C FE does. Hence in C++ if
CALL comes from a calloc and stdlib is not included,
gcc/tree.h:fndecl_built_in_p (CALL) would be false.

In C code, a __SIZE_TYPE__ calloc (__SIZE_TYPE__, __SIZE_TYPE__) user
declaration has obviously a mismatching signature from the standard, and
its function_decl tree won't be unified by
gcc/c-decl.cc:match_builtin_function_types.

   Yet in both cases the analyzer should treat the calls as a builtin calloc
   so that extra attributes unspecified by the standard but added by GCC
   (e.g. sprintf attributes in gcc/builtins.def), useful for the detection
of
   dangerous behavior, are indeed processed.

   Therefore for those cases when a "builtin flag" is not added by the FE,
   builtins' kf are derived from builtin_known_function, whose method
   builtin_known_function::builtin_decl returns the builtin's
   function_decl tree as defined in gcc/builtins.def, with all the extra
   attributes.  */

I hope it clarifies the new kf subclass's purpose.

[...snip...]
>
> > diff --git a/gcc/testsuite/gcc.dg/analyzer/aliasing-3.c
> b/gcc/testsuite/c-c++-common/analyzer/aliasing-3.c
> > similarity index 85%
> > rename from gcc/testsuite/gcc.dg/analyzer/aliasing-3.c
> > rename to gcc/testsuite/c-c++-common/analyzer/aliasing-3.c
> > index 003077ad5c1..f78fea64fbe

RE: [PATCH v1] Mode-Switching: Add optional EMIT_AFTER hook

2023-08-25 Thread Li, Pan2 via Gcc-patches

Hi Jeff,

> You might also peek at the RTL gcse/pre code which is also LCM based and 
> has the same class of problems.

I found a similar approach to take care of this in gcse.cc/pre_edge_insert with 
some comments as below.

  /* We can't insert anything on an abnormal and
   critical edge, so we insert the insn at the end of
   the previous block. There are several alternatives
   detailed in Morgans book P277 (sec 10.5) for
   handling this situation.  This one is easiest for
   now.  */

if (eg->flags & EDGE_ABNORMAL)
  insert_insn_end_basic_block (index_map[j], bb);
else
  {
  insn = process_insert_insn (index_map[j]);
  insert_insn_on_edge (insn, eg);
  }

It looks the insert_insn_end_basic_block is designed to handle the ABNORMAL 
edge by inserting at end of previous block from the comments.

Pan

-Original Message-
From: Gcc-patches  On Behalf 
Of Li, Pan2 via Gcc-patches
Sent: Thursday, August 24, 2023 12:54 PM
To: Jeff Law ; gcc-patches@gcc.gnu.org
Cc: juzhe.zh...@rivai.ai; Wang, Yanzhang ; 
kito.ch...@gmail.com
Subject: RE: [PATCH v1] Mode-Switching: Add optional EMIT_AFTER hook

Thanks Jeff.

> That implies a save/restore pair around the call (possibly optimized so 
> that we minimize the number of save/restores).  I would have expected 
> x86 to already be doing this.  But maybe there's some ABI thing around 
> mmx vs x86 state that allows it to be avoided

Very similar to save/restore but optional.
If no static rounding mode instrinsic here, it is unnecessary to add 
save/restore
pair around the call. I bet mode-switching take care of this already.

Pan

-Original Message-
From: Jeff Law  
Sent: Thursday, August 24, 2023 7:27 AM
To: Li, Pan2 ; gcc-patches@gcc.gnu.org
Cc: juzhe.zh...@rivai.ai; Wang, Yanzhang ; 
kito.ch...@gmail.com
Subject: Re: [PATCH v1] Mode-Switching: Add optional EMIT_AFTER hook



On 8/23/23 08:54, Li, Pan2 wrote:
> Thanks Jeff for comments.
> 
>> Understood.  So the natural question is why does x86/sh not need this
>> for its mode switching?   Don't all the same issues exist on those
>> targets as well?
> 
> AFAIK, it comes from the different design principle between the risc-v and 
> x86/arm intrinsic API.
> The risc-v rvv FP rounding mode intrinsic API has one abstract level above 
> the insn itself, while
> the x86/arm only indicates the semantics of the insn.
> 
> For example, if one vector instruction VFADD doesn't have static rounding 
> mode (aka encoding rm in insn),
> there is no such a intrinsic API contains rounding mode argument in x86/arm. 
> While the risc-v fp
> vector intrinsic will always have static rounding mode API if the frm is 
> honored.
> 
> In short, the risc-v intrinsic API is closer to the end-user, while the 
> x86/arm instrinsic API is closer to insn itself.
OK, but I'm still strugging to see how the distinction is important 
here.  Ultimately there's a state at a call site.  We need to make sure 
that state from the current function doesn't impact the callee and we 
need to make sure that the callee doesn't impact the state in the caller.

That implies a save/restore pair around the call (possibly optimized so 
that we minimize the number of save/restores).  I would have expected 
x86 to already be doing this.  But maybe there's some ABI thing around 
mmx vs x86 state that allows it to be avoided

> 
> For the rest part, will have a try based on your suggestion soon as I am in 
> the middle of something.
No problem.  Get to it when you can.  I think it affects you more than 
me :-)

jeff

[pushed] analyzer: fix ICE in text art strings support

2023-08-25 Thread David Malcolm via Gcc-patches

Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.
Pushed to trunk as r14-3481-g99a3fcb8ff0bf2.

gcc/analyzer/ChangeLog:
* access-diagram.cc (class string_region_spatial_item): Remove
assumption that the string is written to the start of the cluster.

gcc/testsuite/ChangeLog:
* gcc.dg/analyzer/out-of-bounds-diagram-17.c: New test.
* gcc.dg/analyzer/out-of-bounds-diagram-18.c: New test.
* gcc.dg/analyzer/out-of-bounds-diagram-19.c: New test.
---
 gcc/analyzer/access-diagram.cc| 57 ---
 .../analyzer/out-of-bounds-diagram-17.c   | 34 +++
 .../analyzer/out-of-bounds-diagram-18.c   | 38 +
 .../analyzer/out-of-bounds-diagram-19.c   | 45 +++
 4 files changed, 155 insertions(+), 19 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/analyzer/out-of-bounds-diagram-17.c
 create mode 100644 gcc/testsuite/gcc.dg/analyzer/out-of-bounds-diagram-18.c
 create mode 100644 gcc/testsuite/gcc.dg/analyzer/out-of-bounds-diagram-19.c

diff --git a/gcc/analyzer/access-diagram.cc b/gcc/analyzer/access-diagram.cc
index d7b669a4e38e..a51d594b5b2c 100644
--- a/gcc/analyzer/access-diagram.cc
+++ b/gcc/analyzer/access-diagram.cc
@@ -1509,10 +1509,16 @@ public:
   out.add_all_bytes_in_range (m_actual_bits);
 else
   {
-   byte_range head_of_string (0, m_ellipsis_head_len);
+   byte_range bytes (0, 0);
+   bool valid = m_actual_bits.as_concrete_byte_range ();
+   gcc_assert (valid);
+   byte_range head_of_string (bytes.get_start_byte_offset (),
+  m_ellipsis_head_len);
out.add_all_bytes_in_range (head_of_string);
byte_range tail_of_string
- (TREE_STRING_LENGTH (string_cst) - m_ellipsis_tail_len,
+ ((bytes.get_start_byte_offset ()
+   + TREE_STRING_LENGTH (string_cst)
+   - m_ellipsis_tail_len),
   m_ellipsis_tail_len);
out.add_all_bytes_in_range (tail_of_string);
/* Adding the above pair of ranges will also effectively add
@@ -1535,11 +1541,14 @@ public:
 tree string_cst = get_string_cst ();
 if (m_show_full_string)
   {
-   for (byte_offset_t byte_idx = bytes.get_start_byte_offset ();
-   byte_idx < bytes.get_next_byte_offset ();
-   byte_idx = byte_idx + 1)
-add_column_for_byte (t, btm, sm, byte_idx,
- byte_idx_table_y, byte_val_table_y);
+   for (byte_offset_t byte_idx_within_cluster
+ = bytes.get_start_byte_offset ();
+   byte_idx_within_cluster < bytes.get_next_byte_offset ();
+   byte_idx_within_cluster = byte_idx_within_cluster + 1)
+add_column_for_byte
+  (t, btm, sm, byte_idx_within_cluster,
+   byte_idx_within_cluster - bytes.get_start_byte_offset (),
+   byte_idx_table_y, byte_val_table_y);
 
if (m_show_utf8)
 {
@@ -1566,10 +1575,13 @@ public:
 = decoded_char.m_start_byte - TREE_STRING_POINTER (string_cst);
   byte_size_t size_in_bytes
 = decoded_char.m_next_byte - decoded_char.m_start_byte;
-  byte_range bytes (start_byte_idx, size_in_bytes);
+  byte_range cluster_bytes_for_codepoint
+(start_byte_idx + bytes.get_start_byte_offset (),
+ size_in_bytes);
 
   const table::rect_t code_point_table_rect
-= btm.get_table_rect (_string_reg, bytes,
+= btm.get_table_rect (_string_reg,
+  cluster_bytes_for_codepoint,
   utf8_code_point_table_y, 1);
   char buf[100];
   sprintf (buf, "U+%04x", decoded_char.m_ch);
@@ -1579,7 +1591,8 @@ public:
   if (show_unichars)
 {
   const table::rect_t character_table_rect
-= btm.get_table_rect (_string_reg, bytes,
+= btm.get_table_rect (_string_reg,
+  cluster_bytes_for_codepoint,
   utf8_character_table_y, 1);
   if (cpp_is_printable_char (decoded_char.m_ch))
 t.set_cell_span (character_table_rect,
@@ -1598,12 +1611,14 @@ public:
   {
/* Head of string.  */
for (int byte_idx = 0; byte_idx < m_ellipsis_head_len; byte_idx++)
- add_column_for_byte (t, btm, sm, byte_idx,
+ add_column_for_byte (t, btm, sm,
+  byte_idx + bytes.get_start_byte_offset (),
+  byte_idx,
   byte_idx_table_y, byte_val_table_y);
 
/* Ellipsis (two rows high).  */
const byte_range ellipsis_bytes
- (m_ellipsis_head_len,
+ (m_ellipsis_head_len + bytes.get_start_byte_offset (),
   TREE_STRING_LENGTH (string_cst)
   -

[PATCH] tree-optimization/111137 - dependence checking for SLP

2023-08-25 Thread Richard Biener via Gcc-patches

The following fixes a mistake with SLP dependence checking.  When
checking whether we can hoist loads to the first load place we
special-case stores of the same instance considering them sunk
to the last store place.  But we fail to consider that stores from
other SLP instances are sunk in a similar way.  This leads us to
miss the dependence between (A) and (B) in

  b[0][1] = 0; (A)
...
  _6 = b[_5 /* 0 */][0];   (B')
  _7 = _6 ^ 1;
  b[_5 /* 0 */][0] = _7;
  b[0][2] = 0; (A')
  _10 = b[_5 /* 0 */][1];  (B)
  _11 = _10 ^ 1;
  b[_5 /* 0 */][1] = _11;

where the zeroing stores are sunk to (A') and the loads hoisted
to (B').  The following fixes this, treating grouped stores from
other instances similar to stores from our own instance.  The
difference is - and this is more conservative than necessary - that
we don't know which stores of a group are in which SLP instance
(though I believe either all of the grouped stores will be in
a single SLP instance or in none at the moment), so we don't
know which stores are sunk where.  We simply assume they are
all sunk to the last store we run into.  Likewise we do not take
into account that an SLP instance might be cancelled (or a grouped
store not actually belong to any instance).

Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.

PR tree-optimization/37
* tree-vect-data-refs.cc (vect_slp_analyze_load_dependences):
Properly handle grouped stores from other SLP instances.

* gcc.dg/torture/pr37.c: New testcase.
---
 gcc/testsuite/gcc.dg/torture/pr37.c | 30 
 gcc/tree-vect-data-refs.cc  | 64 ++---
 2 files changed, 77 insertions(+), 17 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/torture/pr37.c

diff --git a/gcc/testsuite/gcc.dg/torture/pr37.c 
b/gcc/testsuite/gcc.dg/torture/pr37.c
new file mode 100644
index 000..77560487926
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/torture/pr37.c
@@ -0,0 +1,30 @@
+/* { dg-do run } */
+
+int b[3][8];
+short d;
+volatile int t = 1;
+
+void __attribute__((noipa))
+foo()
+{
+  int  g = t;
+  for (int e = 1; e >= 0; e--)
+{
+  d = 1;
+  for (; d >= 0; d--)
+   {
+ b[0][d * 2 + 1] = 0;
+ b[g - 1 + d][0] ^= 1;
+ b[0][d * 2 + 2] = 0;
+ b[g - 1 + d][1] ^= 1;
+   }
+}
+}
+
+int
+main()
+{
+  foo ();
+  if (b[0][1] != 1)
+__builtin_abort();
+}
diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc
index 0295e256a44..40ab568fe35 100644
--- a/gcc/tree-vect-data-refs.cc
+++ b/gcc/tree-vect-data-refs.cc
@@ -750,6 +750,7 @@ vect_slp_analyze_load_dependences (vec_info *vinfo, 
slp_tree node,
   data_reference *dr_a = STMT_VINFO_DATA_REF (access_info);
   ao_ref ref;
   bool ref_initialized_p = false;
+  hash_set grp_visited;
   for (gimple_stmt_iterator gsi = gsi_for_stmt (access_info->stmt);
   gsi_stmt (gsi) != first_access_info->stmt; gsi_prev ())
{
@@ -782,26 +783,55 @@ vect_slp_analyze_load_dependences (vec_info *vinfo, 
slp_tree node,
  continue;
}
 
- /* We are hoisting a load - this means we can use TBAA for
-disambiguation.  */
- if (!ref_initialized_p)
-   ao_ref_init (, DR_REF (dr_a));
- if (stmt_may_clobber_ref_p_1 (stmt, , true))
+ auto check_hoist = [&] (stmt_vec_info stmt_info) -> bool
{
- /* If we couldn't record a (single) data reference for this
-stmt we have to give up now.  */
- data_reference *dr_b = STMT_VINFO_DATA_REF (stmt_info);
- if (!dr_b)
-   return false;
- ddr_p ddr = initialize_data_dependence_relation (dr_a,
-  dr_b, vNULL);
- bool dependent
-   = vect_slp_analyze_data_ref_dependence (vinfo, ddr);
- free_dependence_relation (ddr);
- if (dependent)
+ /* We are hoisting a load - this means we can use TBAA for
+disambiguation.  */
+ if (!ref_initialized_p)
+   ao_ref_init (, DR_REF (dr_a));
+ if (stmt_may_clobber_ref_p_1 (stmt_info->stmt, , true))
+   {
+ /* If we couldn't record a (single) data reference for this
+stmt we have to give up now.  */
+ data_reference *dr_b = STMT_VINFO_DATA_REF (stmt_info);
+ if (!dr_b)
+   return false;
+ ddr_p ddr = initialize_data_dependence_relation (dr_a,
+  dr_b, vNULL);
+ bool dependent
+   = vect_slp_analyze_data_ref_dependence (vinfo, ddr);
+ free_dependence_relation (ddr);
+ if (dependent)
+   return false;
+   }
+

Re: [PATCH] rs6000: Disable PCREL for unsupported targets [PR111045]

2023-08-25 Thread Kewen.Lin via Gcc-patches

on 2023/8/25 11:20, Peter Bergner wrote:
> On 8/24/23 12:56 AM, Kewen.Lin wrote:
>> By looking into the uses of function rs6000_pcrel_p, I think we can
>> just replace it with TARGET_PCREL.  Previously we don't require PCREL
>> unset for any unsupported target/OS, so we need rs6000_pcrel_p() to
>> ensure it's really supported in those use places, now if we can guarantee
>> TARGET_PCREL only hold for all places where it's supported, it looks
>> we can just check TARGET_PCREL only?
> 
> I think you're correct on only needing TARGET_PCREL instead of 
> rs6000_pcrel_p()
> as long as we correctly disable PCREL on the targets that either don't support
> it or those that due, but don't due to explicit options (ie, -mno-prel or
> ELFv2 and -mcmodel != medium, etc.).
> 
> 
> 
>> Then the code structure can look like:
>>
>> if (PCREL_SUPPORTED_BY_OS
>> && (rs6000_isa_flags_explicit & OPTION_MASK_PCREL) == 0)
>>// enable
>> else if (TARGET_PCREL && DEFAULT_ABI != ABI_ELFv2)
>>// disable
>> else if (TARGET_PCREL && TARGET_CMODEL != CMODEL_MEDIUM)
>>// disable
> 
> I don't like that first conditional expression. The problem is,
> PCREL_SUPPORTED_BY_OS could be true or false for the following
> tests, because it's anded with the explicit option test, and
> sometimes that won't make sense.  I think something safer is
> something like:

Assuming the current PCREL_SUPPORTED_BY_OS unchanged, when
PCREL_SUPPORTED_BY_OS is true, all its required conditions are
satisfied, it should be safe.  while PCREL_SUPPORTED_BY_OS is
false, it means the given subtarget doesn't support it, or one
or more of conditions below don't hold:

 - TARGET_POWER10 
 - TARGET_PREFIXED
 - ELFv2_ABI_CHECK
 - TARGET_CMODEL == CMODEL_MEDIUM

The two "else if" check for the last two (abi test also can
check unsupported targets), we have check code for TARGET_PCREL
without TARGET_PREFIXED support, and further Power10, so it
seems safe?

> 
> if (PCREL_SUPPORTED_BY_OS)
>   { 
> /* PCREL on ELFv2 requires -mcmodel=medium.  */
> if (DEFAULT_ABI == ABI_ELFv2 && TARGET_CMODEL != CMODEL_MEDIUM)
>   error ("%qs requires %qs", "-mpcrel", "-mcmodel=medium");

I guess your proposal seems to drop CMODEL_MEDIUM (even PREFIX?)
from PCREL_SUPPORTED_BY_OS, to leave it just ABI_ELFv2 & P10? (
otherwise TARGET_CMODEL should be always CMODEL_MEDIUM for
ABI_ELFv2 with the original PCREL_SUPPORTED_BY_OS).  And it
seems to make the subtarget specific checking according to the
ABI type further?

I was expecting that when new subtargets need to be supported, we
can move these subtarget specific checkings into macro/function
SUB*TARGET_OVERRIDE_OPTIONS, IMHO the upside is each subtarget
can take care of its own checkings separately.  Maybe we can
just move them now (to rs6000_linux64_override_options).  And in
the common code, for the cases with zero value PCREL_SUPPORTED_BY_OS,
(assuming PCREL_SUPPORTED_BY_OS just simple as like ABI_ELFv2), we
can emit an invalid error for explicit -mpcrel as you proposed
below.  Thoughts?

btw, I was also expecting that we don't implicitly set
OPTION_MASK_PCREL any more for Power10, that is to remove
OPTION_MASK_PCREL from OTHER_POWER10_MASKS.

> 
> if ((rs6000_isa_flags_explicit & OPTION_MASK_PCREL) == 0)
>   rs6000_isa_flags |= OPTION_MASK_PCREL;
>   } 
> else
>   {
> if ((rs6000_isa_flags_explicit & OPTION_MASK_PCREL) != 0
> && TARGET_PCREL)
>   error ("use of %qs is invalid for this target", "-mpcrel");

I like this, it's more clear!

> rs6000_isa_flags &= ~OPTION_MASK_PCREL;
>   }
> 
> ...although, even the cmodel != medium test above should probably have
> some extra tests to ensure TARGET_PCREL is true (and explicit?) and
> mcmodel != medium before giving an error???  Ie, a ELFv2 P10 compile
> with an implicit -mpcrel and explicit -mcmodel={small,large} probably
> should not error and just silently disable pcrel???  Meaning only
> when we explicitly say -mpcrel -mcmodel={small,large} should we give
> that error.  Thoughts on that?

Yeah, I think it makes more sense to only error for explicit -mpcrel
-mcmodel={small,large}, linux64 sets cmodel as medium by default, I guess
that's why the current code doesn't check if the cmodel explicitly specified,
!medium implies it's changed explicitly.  So if we move the checkings
to subtarget, it seems to have good locality (context)?

BR,
Kewen

[PATCH] Apply some TLC to vect_slp_analyze_instance_dependence

2023-08-25 Thread Richard Biener via Gcc-patches

This refactors things, separating load and store handing, adjusting
comments to reflect reality and removing some dead code.

Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.

This is in preparation for a fix for PR37.

* tree-vect-data-refs.cc (vect_slp_analyze_store_dependences):
Split out from vect_slp_analyze_node_dependences, remove
dead code.
(vect_slp_analyze_load_dependences): Split out from
vect_slp_analyze_node_dependences, adjust comments.  Process
queued stores before any disambiguation.
(vect_slp_analyze_node_dependences): Remove.
(vect_slp_analyze_instance_dependence): Adjust.
---
 gcc/tree-vect-data-refs.cc | 238 +
 1 file changed, 108 insertions(+), 130 deletions(-)

diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc
index a2caf6cb1c7..0295e256a44 100644
--- a/gcc/tree-vect-data-refs.cc
+++ b/gcc/tree-vect-data-refs.cc
@@ -670,160 +670,138 @@ vect_slp_analyze_data_ref_dependence (vec_info *vinfo,
 }
 
 
-/* Analyze dependences involved in the transform of SLP NODE.  STORES
-   contain the vector of scalar stores of this instance if we are
-   disambiguating the loads.  */
+/* Analyze dependences involved in the transform of a store SLP NODE.  */
 
 static bool
-vect_slp_analyze_node_dependences (vec_info *vinfo, slp_tree node,
-  vec stores,
-  stmt_vec_info last_store_info)
+vect_slp_analyze_store_dependences (vec_info *vinfo, slp_tree node)
 {
-  /* This walks over all stmts involved in the SLP load/store done
+  /* This walks over all stmts involved in the SLP store done
  in NODE verifying we can sink them up to the last stmt in the
  group.  */
-  if (DR_IS_WRITE (STMT_VINFO_DATA_REF (SLP_TREE_REPRESENTATIVE (node
+  stmt_vec_info last_access_info = vect_find_last_scalar_stmt_in_slp (node);
+  gcc_assert (DR_IS_WRITE (STMT_VINFO_DATA_REF (last_access_info)));
+
+  for (unsigned k = 0; k < SLP_TREE_SCALAR_STMTS (node).length (); ++k)
 {
-  stmt_vec_info last_access_info = vect_find_last_scalar_stmt_in_slp 
(node);
-  for (unsigned k = 0; k < SLP_TREE_SCALAR_STMTS (node).length (); ++k)
+  stmt_vec_info access_info
+   = vect_orig_stmt (SLP_TREE_SCALAR_STMTS (node)[k]);
+  if (access_info == last_access_info)
+   continue;
+  data_reference *dr_a = STMT_VINFO_DATA_REF (access_info);
+  ao_ref ref;
+  bool ref_initialized_p = false;
+  for (gimple_stmt_iterator gsi = gsi_for_stmt (access_info->stmt);
+  gsi_stmt (gsi) != last_access_info->stmt; gsi_next ())
{
- stmt_vec_info access_info
-   = vect_orig_stmt (SLP_TREE_SCALAR_STMTS (node)[k]);
- if (access_info == last_access_info)
+ gimple *stmt = gsi_stmt (gsi);
+ if (! gimple_vuse (stmt))
continue;
- data_reference *dr_a = STMT_VINFO_DATA_REF (access_info);
- ao_ref ref;
- bool ref_initialized_p = false;
- for (gimple_stmt_iterator gsi = gsi_for_stmt (access_info->stmt);
-  gsi_stmt (gsi) != last_access_info->stmt; gsi_next ())
-   {
- gimple *stmt = gsi_stmt (gsi);
- if (! gimple_vuse (stmt))
-   continue;
-
- /* If we couldn't record a (single) data reference for this
-stmt we have to resort to the alias oracle.  */
- stmt_vec_info stmt_info = vinfo->lookup_stmt (stmt);
- data_reference *dr_b = STMT_VINFO_DATA_REF (stmt_info);
- if (!dr_b)
-   {
- /* We are moving a store - this means
-we cannot use TBAA for disambiguation.  */
- if (!ref_initialized_p)
-   ao_ref_init (, DR_REF (dr_a));
- if (stmt_may_clobber_ref_p_1 (stmt, , false)
- || ref_maybe_used_by_stmt_p (stmt, , false))
-   return false;
- continue;
-   }
-
- bool dependent = false;
- /* If we run into a store of this same instance (we've just
-marked those) then delay dependence checking until we run
-into the last store because this is where it will have
-been sunk to (and we verify if we can do that as well).  */
- if (gimple_visited_p (stmt))
-   {
- if (stmt_info != last_store_info)
-   continue;
 
- for (stmt_vec_info _info : stores)
-   {
- data_reference *store_dr
-   = STMT_VINFO_DATA_REF (store_info);
- ddr_p ddr = initialize_data_dependence_relation
-   (dr_a, store_dr, vNULL);
- dependent
-   = vect_slp_analyze_data_ref_dependence (vinfo, ddr);
-

[PATCH v2] LoongArch: Remove redundant sign extension instructions caused by SLT instructions.

2023-08-25 Thread Lulu Cheng

v1 -> v2:
1. Modify description information


Since the SLT instruction does not distinguish between 64-bit operations and 
32-bit
operations under the 64-bit LoongArch architecture, if the operand of slt is 
SImode,
the sign extension of the operand needs to be displayed.

But similar to the test case below, the sign extension is redundant:

extern int src1, src2, src3;

int
test (void)
{
  int data1 = src1 + src2;
  int data2 = src1 + src3;
  return data1 > data2 ? data1 : data2;
}
Assembly code before optimization:
...
add.w   $r4,$r4,$r14
add.w   $r13,$r13,$r14
slli.w  $r12,$r4,0
slli.w  $r14,$r13,0
slt $r12,$r12,$r14
masknez $r4,$r4,$r12
maskeqz $r12,$r13,$r12
or  $r4,$r4,$r12
slli.w  $r4,$r4,0
...

After optimization:
...
add.w   $r12,$r12,$r14
add.w   $r13,$r13,$r14
slt $r4,$r12,$r13
masknez $r12,$r12,$r4
maskeqz $r4,$r13,$r4
or  $r4,$r12,$r4
...

Similar to this test example, the two operands of SLT are obtained by the
addition operation, and add.w implicitly sign-extends, so the two operands
of SLT do not require sign-extend.

gcc/ChangeLog:

* config/loongarch/loongarch.cc (loongarch_expand_conditional_move):
Optimize the function implementation.

gcc/testsuite/ChangeLog:

* gcc.target/loongarch/slt-sign-extend.c: New test.
---
 gcc/config/loongarch/loongarch.cc | 53 +--
 .../gcc.target/loongarch/slt-sign-extend.c| 14 +
 2 files changed, 63 insertions(+), 4 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/loongarch/slt-sign-extend.c

diff --git a/gcc/config/loongarch/loongarch.cc 
b/gcc/config/loongarch/loongarch.cc
index 86d58784113..1905599b9e8 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -4384,14 +4384,30 @@ loongarch_expand_conditional_move (rtx *operands)
   enum rtx_code code = GET_CODE (operands[1]);
   rtx op0 = XEXP (operands[1], 0);
   rtx op1 = XEXP (operands[1], 1);
+  rtx op0_extend = op0;
+  rtx op1_extend = op1;
+
+  /* Record whether operands[2] and operands[3] modes are promoted to 
word_mode.  */
+  bool promote_p = false;
+  machine_mode mode = GET_MODE (operands[0]);
 
   if (FLOAT_MODE_P (GET_MODE (op1)))
 loongarch_emit_float_compare (, , );
   else
 {
+  if ((REGNO (op0) == REGNO (operands[2])
+  || (REGNO (op1) == REGNO (operands[3]) && (op1 != const0_rtx)))
+ && (GET_MODE_SIZE (GET_MODE (op0)) < word_mode))
+   {
+ mode = word_mode;
+ promote_p = true;
+   }
+
   loongarch_extend_comparands (code, , );
 
   op0 = force_reg (word_mode, op0);
+  op0_extend = op0;
+  op1_extend = force_reg (word_mode, op1);
 
   if (code == EQ || code == NE)
{
@@ -4418,23 +4434,52 @@ loongarch_expand_conditional_move (rtx *operands)
   && register_operand (operands[2], VOIDmode)
   && register_operand (operands[3], VOIDmode))
 {
-  machine_mode mode = GET_MODE (operands[0]);
+  rtx op2 = operands[2];
+  rtx op3 = operands[3];
+
+  if (promote_p)
+   {
+ if (REGNO (XEXP (operands[1], 0)) == REGNO (operands[2]))
+   op2 = op0_extend;
+ else
+   {
+ loongarch_extend_comparands (code, , _rtx);
+ op2 = force_reg (mode, op2);
+   }
+
+ if (REGNO (XEXP (operands[1], 1)) == REGNO (operands[3]))
+   op3 = op1_extend;
+ else
+   {
+ loongarch_extend_comparands (code, , _rtx);
+ op3 = force_reg (mode, op3);
+   }
+   }
+
   rtx temp = gen_reg_rtx (mode);
   rtx temp2 = gen_reg_rtx (mode);
 
   emit_insn (gen_rtx_SET (temp,
  gen_rtx_IF_THEN_ELSE (mode, cond,
-   operands[2], const0_rtx)));
+   op2, const0_rtx)));
 
   /* Flip the test for the second operand.  */
   cond = gen_rtx_fmt_ee ((code == EQ) ? NE : EQ, GET_MODE (op0), op0, op1);
 
   emit_insn (gen_rtx_SET (temp2,
  gen_rtx_IF_THEN_ELSE (mode, cond,
-   operands[3], const0_rtx)));
+   op3, const0_rtx)));
 
   /* Merge the two results, at least one is guaranteed to be zero.  */
-  emit_insn (gen_rtx_SET (operands[0], gen_rtx_IOR (mode, temp, temp2)));
+  if (promote_p)
+   {
+ rtx temp3 = gen_reg_rtx (mode);
+ emit_insn (gen_rtx_SET (temp3, gen_rtx_IOR (mode, temp, temp2)));
+ temp3 = gen_lowpart (GET_MODE (operands[0]), temp3);
+ loongarch_emit_move (operands[0], temp3);
+   }
+  else
+   emit_insn (gen_rtx_SET (operands[0],

Re: [PATCH] fortran: Rename TRUE/FALSE to true/false in *.cc files

2023-08-25 Thread Tobias Burnus


On 25.08.23 10:26, Uros Bizjak via Gcc-patches wrote:

gcc/fortran/ChangeLog:

 * match.cc (gfc_match_equivalence): Rename TRUE/FALSE to true/false.
 * module.cc (check_access): Ditto.
 * primary.cc (match_real_constant): Ditto.
 * trans-array.cc (gfc_trans_allocate_array_storage): Ditto.
 (get_array_ctor_strlen): Ditto.
 * trans-common.cc (find_equivalence): Ditto.
 (add_equivalences): Ditto.

Bootstrapped and regression tested on x86_64-linux-gnu {,-m32}.
OK for master?


OK. Not really necessary but it makes the code more consistent.

(We already heavily use true/false (5000+ times) and the patch changes
the remaining 11 TRUE/FALSE.)

Tobias

-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955

[PATCH] fortran: Rename TRUE/FALSE to true/false in *.cc files

2023-08-25 Thread Uros Bizjak via Gcc-patches

gcc/fortran/ChangeLog:

* match.cc (gfc_match_equivalence): Rename TRUE/FALSE to true/false.
* module.cc (check_access): Ditto.
* primary.cc (match_real_constant): Ditto.
* trans-array.cc (gfc_trans_allocate_array_storage): Ditto.
(get_array_ctor_strlen): Ditto.
* trans-common.cc (find_equivalence): Ditto.
(add_equivalences): Ditto.

Bootstrapped and regression tested on x86_64-linux-gnu {,-m32}.

OK for master?

Uros.
diff --git a/gcc/fortran/match.cc b/gcc/fortran/match.cc
index ba23bcd9692..c926f38058f 100644
--- a/gcc/fortran/match.cc
+++ b/gcc/fortran/match.cc
@@ -5788,7 +5788,7 @@ gfc_match_equivalence (void)
goto syntax;
 
   set = eq;
-  common_flag = FALSE;
+  common_flag = false;
   cnt = 0;
 
   for (;;)
@@ -5829,7 +5829,7 @@ gfc_match_equivalence (void)
 
  if (sym->attr.in_common)
{
- common_flag = TRUE;
+ common_flag = true;
  common_head = sym->common_head;
}
 
diff --git a/gcc/fortran/module.cc b/gcc/fortran/module.cc
index 95fdda6b2aa..c07e9dc9ba2 100644
--- a/gcc/fortran/module.cc
+++ b/gcc/fortran/module.cc
@@ -5744,9 +5744,9 @@ check_access (gfc_access specific_access, gfc_access 
default_access)
 return true;
 
   if (specific_access == ACCESS_PUBLIC)
-return TRUE;
+return true;
   if (specific_access == ACCESS_PRIVATE)
-return FALSE;
+return false;
 
   if (flag_module_private)
 return default_access == ACCESS_PUBLIC;
diff --git a/gcc/fortran/primary.cc b/gcc/fortran/primary.cc
index 0bb440b85a9..d3aeeb89362 100644
--- a/gcc/fortran/primary.cc
+++ b/gcc/fortran/primary.cc
@@ -530,13 +530,13 @@ match_real_constant (gfc_expr **result, int signflag)
   seen_dp = 0;
   seen_digits = 0;
   exp_char = ' ';
-  negate = FALSE;
+  negate = false;
 
   c = gfc_next_ascii_char ();
   if (signflag && (c == '+' || c == '-'))
 {
   if (c == '-')
-   negate = TRUE;
+   negate = true;
 
   gfc_gobble_whitespace ();
   c = gfc_next_ascii_char ();
diff --git a/gcc/fortran/trans-array.cc b/gcc/fortran/trans-array.cc
index 951cecfa5d5..90a7d4e9aef 100644
--- a/gcc/fortran/trans-array.cc
+++ b/gcc/fortran/trans-array.cc
@@ -1121,7 +1121,7 @@ gfc_trans_allocate_array_storage (stmtblock_t * pre, 
stmtblock_t * post,
 {
   /* A callee allocated array.  */
   gfc_conv_descriptor_data_set (pre, desc, null_pointer_node);
-  onstack = FALSE;
+  onstack = false;
 }
   else
 {
@@ -2481,7 +2481,7 @@ get_array_ctor_strlen (stmtblock_t *block, 
gfc_constructor_base base, tree * len
   gfc_constructor *c;
   bool is_const;
 
-  is_const = TRUE;
+  is_const = true;
 
   if (gfc_constructor_first (base) == NULL)
 {
diff --git a/gcc/fortran/trans-common.cc b/gcc/fortran/trans-common.cc
index c83b6f930eb..91a98b30b8d 100644
--- a/gcc/fortran/trans-common.cc
+++ b/gcc/fortran/trans-common.cc
@@ -1048,7 +1048,7 @@ find_equivalence (segment_info *n)
   gfc_equiv *e1, *e2, *eq;
   bool found;
 
-  found = FALSE;
+  found = false;
 
   for (e1 = n->sym->ns->equiv; e1; e1 = e1->next)
 {
@@ -1083,7 +1083,7 @@ find_equivalence (segment_info *n)
{
  add_condition (n, eq, e2);
  e2->used = 1;
- found = TRUE;
+ found = true;
}
}
 }
@@ -1102,11 +1102,11 @@ static void
 add_equivalences (bool *saw_equiv)
 {
   segment_info *f;
-  bool more = TRUE;
+  bool more = true;
 
   while (more)
 {
-  more = FALSE;
+  more = false;
   for (f = current_segment; f; f = f->next)
{
  if (!f->sym->equiv_built)

[committed] treewide: Rename TRUE/FALSE to true/false in *.cc files

2023-08-25 Thread Uros Bizjak via Gcc-patches

gcc/c-family/ChangeLog:

* c-format.cc (read_any_format_width):
Rename TRUE/FALSE to true/false.

gcc/ChangeLog:

* caller-save.cc (new_saved_hard_reg):
Rename TRUE/FALSE to true/false.
(setup_save_areas): Ditto.
* gcc.cc (set_collect_gcc_options): Ditto.
(driver::build_multilib_strings): Ditto.
(print_multilib_info): Ditto.
* genautomata.cc (gen_cpu_unit): Ditto.
(gen_query_cpu_unit): Ditto.
(gen_bypass): Ditto.
(gen_excl_set): Ditto.
(gen_presence_absence_set): Ditto.
(gen_presence_set): Ditto.
(gen_final_presence_set): Ditto.
(gen_absence_set): Ditto.
(gen_final_absence_set): Ditto.
(gen_automaton): Ditto.
(gen_regexp_repeat): Ditto.
(gen_regexp_allof): Ditto.
(gen_regexp_oneof): Ditto.
(gen_regexp_sequence): Ditto.
(process_decls): Ditto.
(reserv_sets_are_intersected): Ditto.
(initiate_excl_sets): Ditto.
(form_reserv_sets_list): Ditto.
(check_presence_pattern_sets): Ditto.
(check_absence_pattern_sets): Ditto.
(check_regexp_units_distribution): Ditto.
(check_unit_distributions_to_automata): Ditto.
(create_ainsns): Ditto.
(output_insn_code_cases): Ditto.
(output_internal_dead_lock_func): Ditto.
(form_important_insn_automata_lists): Ditto.
* gengtype-state.cc (read_state_files_list): Ditto.
* gengtype.cc (main): Ditto.
* gimple-array-bounds.cc (array_bounds_checker::check_array_bounds):
Ditto.
* gimple.cc (gimple_build_call_from_tree): Ditto.
(preprocess_case_label_vec_for_gimple): Ditto.
* gimplify.cc (gimplify_call_expr): Ditto.
* ordered-hash-map-tests.cc (test_map_of_int_to_strings): Ditto.

gcc/cp/ChangeLog:

* call.cc (build_conditional_expr):
Rename TRUE/FALSE to true/false.
(build_new_op): Ditto.

Bootstrapped and regression tested on x86_64-linux-gnu {,-m32}.

Uros.
diff --git a/gcc/c-family/c-format.cc b/gcc/c-family/c-format.cc
index b3ef2d44ce9..b9906ecc171 100644
--- a/gcc/c-family/c-format.cc
+++ b/gcc/c-family/c-format.cc
@@ -2325,13 +2325,13 @@ read_any_format_width (tree ,
 {
   /* Possibly read a numeric width.  If the width is zero,
 we complain if appropriate.  */
-  int non_zero_width_char = FALSE;
-  int found_width = FALSE;
+  int non_zero_width_char = false;
+  int found_width = false;
   while (ISDIGIT (*format_chars))
{
- found_width = TRUE;
+ found_width = true;
  if (*format_chars != '0')
-   non_zero_width_char = TRUE;
+   non_zero_width_char = true;
  ++format_chars;
}
   if (found_width && !non_zero_width_char &&
diff --git a/gcc/caller-save.cc b/gcc/caller-save.cc
index b8915dab128..9ddf9b70b21 100644
--- a/gcc/caller-save.cc
+++ b/gcc/caller-save.cc
@@ -342,7 +342,7 @@ new_saved_hard_reg (int regno, int call_freq)
   saved_reg->num = saved_regs_num++;
   saved_reg->hard_regno = regno;
   saved_reg->call_freq = call_freq;
-  saved_reg->first_p = FALSE;
+  saved_reg->first_p = false;
   saved_reg->next = -1;
 }
 
@@ -558,7 +558,7 @@ setup_save_areas (void)
+ saved_reg2->num]
  = saved_reg_conflicts[saved_reg2->num * saved_regs_num
+ saved_reg->num]
- = TRUE;
+ = true;
  }
}
}
@@ -608,7 +608,7 @@ setup_save_areas (void)
}
  if (j == i)
{
- saved_reg->first_p = TRUE;
+ saved_reg->first_p = true;
  for (best_slot_num = -1, j = 0; j < prev_save_slots_num; j++)
{
  slot = prev_save_slots[j];
diff --git a/gcc/cp/call.cc b/gcc/cp/call.cc
index 673ec91d60e..23e458d3252 100644
--- a/gcc/cp/call.cc
+++ b/gcc/cp/call.cc
@@ -6058,7 +6058,7 @@ build_conditional_expr (const op_location_t ,
   if (complain & tf_error)
 {
   auto_diagnostic_group d;
-  op_error (loc, COND_EXPR, NOP_EXPR, arg1, arg2, arg3, FALSE);
+ op_error (loc, COND_EXPR, NOP_EXPR, arg1, arg2, arg3, false);
   print_z_candidates (loc, candidates);
 }
  return error_mark_node;
@@ -7129,7 +7129,7 @@ build_new_op (const op_location_t , enum tree_code 
code, int flags,
/* ... Otherwise, report the more generic
   "no matching operator found" error */
auto_diagnostic_group d;
-   op_error (loc, code, code2, arg1, arg2, arg3, FALSE);
+   op_error (loc, code, code2, arg1, arg2, arg3, false);
print_z_candidates (loc, candidates);
  }
}
@@ -7145,7 +7145,7 @@ build_new_op (const op_location_t , enum tree_code 
code, int flags,
  if (complain & tf_error)
{
  auto_diagnostic_group d;
- op_error (loc, code, code2,

Re: RISC-V: Fix stack_save_restore_1/2 test cases

2023-08-25 Thread Jivan Hakobyan via Gcc-patches

Hi Vineet.

Do you mind sending your patches inline using git send-email or some such ?

Never thought about that, what is the purpose of sending it in that way?

Of course, if it is more convenient for the community then I will send
through git.

On Fri, Aug 25, 2023 at 9:12 AM Vineet Gupta  wrote:

> Hi Jivan,
>
> On 8/24/23 08:45, Jivan Hakobyan via Gcc-patches wrote:
> > This patch fixes failing stack_save_restore_1/2 test cases.
> > After 6619b3d4c15c commit size of the frame was changed.
> >
> >
> > gcc/testsuite/ChangeLog:
> >  * gcc.target/riscv/stack_save_restore_1.c: Update frame size
> >  * gcc.target/riscv/stack_save_restore_2.c: Likewise.
>
> Do you mind sending your patches inline using git send-email or some such ?
>
> Thx,
> -Vineet
>

-- 
With the best regards
Jivan Hakobyan

[PATCH] tree-optimization/111136 - STMT_VINFO_SLP_VECT_ONLY and stores

2023-08-25 Thread Richard Biener via Gcc-patches

vect_dissolve_slp_only_groups currently only expects loads, for stores
we have to make sure to mark the dissolved "groups" strided.

Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.

PR tree-optimization/36
* tree-vect-loop.cc (vect_dissolve_slp_only_groups): For
stores force STMT_VINFO_STRIDED_P and also duplicate that
to all elements.
---
 gcc/tree-vect-loop.cc | 9 +++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index ebee8037e02..23c6e8259e7 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -2453,8 +2453,13 @@ vect_dissolve_slp_only_groups (loop_vec_info loop_vinfo)
  DR_GROUP_FIRST_ELEMENT (vinfo) = vinfo;
  DR_GROUP_NEXT_ELEMENT (vinfo) = NULL;
  DR_GROUP_SIZE (vinfo) = 1;
- if (STMT_VINFO_STRIDED_P (first_element))
-   DR_GROUP_GAP (vinfo) = 0;
+ if (STMT_VINFO_STRIDED_P (first_element)
+ /* We cannot handle stores with gaps.  */
+ || DR_IS_WRITE (dr_info->dr))
+   {
+ STMT_VINFO_STRIDED_P (vinfo) = true;
+ DR_GROUP_GAP (vinfo) = 0;
+   }
  else
DR_GROUP_GAP (vinfo) = group_size - 1;
  /* Duplicate and adjust alignment info, it needs to
-- 
2.35.3

[PATCH] RISC-V: Refactor and clean expand_cond_len_{unop, binop, ternop}

2023-08-25 Thread Lehua Ding

Hi,

This patch refactors the codes of expand_cond_len_{unop,binop,ternop}.
Introduces a new unified function expand_cond_len_op to do the main thing.
The expand_cond_len_{unop,binop,ternop} functions only care about how
to pass the operands to the intrinsic patterns.

Best,
Lehua

gcc/ChangeLog:

* config/riscv/autovec.md: Adjust
* config/riscv/riscv-protos.h (RVV_VUNDEF): Clean.
(get_vlmax_rtx): Exported.
* config/riscv/riscv-v.cc (emit_nonvlmax_fp_ternary_tu_insn): Deleted.
(emit_vlmax_masked_gather_mu_insn): Adjust.
(get_vlmax_rtx): New func.
(expand_load_store): Adjust.
(expand_cond_len_unop): Call expand_cond_len_op.
(expand_cond_len_op): New subroutine.
(expand_cond_len_binop): Call expand_cond_len_op.
(expand_cond_len_ternop): Call expand_cond_len_op.
(expand_lanes_load_store): Adjust.

---
 gcc/config/riscv/autovec.md |   6 +-
 gcc/config/riscv/riscv-protos.h |  16 ++-
 gcc/config/riscv/riscv-v.cc | 166 ++--
 3 files changed, 60 insertions(+), 128 deletions(-)

diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
index e9659b2b157..ad072ff439a 100644
--- a/gcc/config/riscv/autovec.md
+++ b/gcc/config/riscv/autovec.md
@@ -971,9 +971,9 @@
   rtx mask = gen_reg_rtx (mask_mode);
   riscv_vector::expand_vec_cmp (mask, LT, operands[1], zero);
 
-  rtx ops[] = {operands[0], mask, operands[1], operands[1]};
-  riscv_vector::emit_vlmax_masked_mu_insn (code_for_pred (NEG, mode),
-  riscv_vector::RVV_UNOP_MU, ops);
+  rtx ops[] = {operands[0], mask, operands[1], operands[1],
+   riscv_vector::get_vlmax_rtx (mode)};
+  riscv_vector::expand_cond_len_unop (NEG, ops);
   DONE;
 })
 
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 2c4405c9860..6a49718b34d 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -180,25 +180,20 @@ namespace riscv_vector {
 #define RVV_VUNDEF(MODE)   
\
   gen_rtx_UNSPEC (MODE, gen_rtvec (1, gen_rtx_REG (SImode, X0_REGNUM)),
\
  UNSPEC_VUNDEF)
+
+/* The value means the number of operands for insn_expander.  */
 enum insn_type
 {
   RVV_MISC_OP = 1,
   RVV_UNOP = 2,
-  RVV_UNOP_M = RVV_UNOP + 2,
-  RVV_UNOP_MU = RVV_UNOP + 2,
-  RVV_UNOP_TU = RVV_UNOP + 2,
-  RVV_UNOP_TUMU = RVV_UNOP + 2,
+  RVV_UNOP_MASK = RVV_UNOP + 2,
   RVV_BINOP = 3,
-  RVV_BINOP_MU = RVV_BINOP + 2,
-  RVV_BINOP_TU = RVV_BINOP + 2,
-  RVV_BINOP_TUMU = RVV_BINOP + 2,
+  RVV_BINOP_MASK = RVV_BINOP + 2,
   RVV_MERGE_OP = 4,
   RVV_CMP_OP = 4,
   RVV_CMP_MU_OP = RVV_CMP_OP + 2, /* +2 means mask and maskoff operand.  */
   RVV_TERNOP = 5,
-  RVV_TERNOP_MU = RVV_TERNOP + 1,
-  RVV_TERNOP_TU = RVV_TERNOP + 1,
-  RVV_TERNOP_TUMU = RVV_TERNOP + 1,
+  RVV_TERNOP_MASK = RVV_TERNOP + 1,
   RVV_WIDEN_TERNOP = 4,
   RVV_SCALAR_MOV_OP = 4, /* +1 for VUNDEF according to vector.md.  */
   RVV_SLIDE_OP = 4,  /* Dest, VUNDEF, source and offset.  */
@@ -258,6 +253,7 @@ void emit_vlmax_masked_mu_insn (unsigned, int, rtx *);
 void emit_scalar_move_insn (unsigned, rtx *, rtx = 0);
 void emit_nonvlmax_integer_move_insn (unsigned, rtx *, rtx);
 enum vlmul_type get_vlmul (machine_mode);
+rtx get_vlmax_rtx (machine_mode);
 unsigned int get_ratio (machine_mode);
 unsigned int get_nf (machine_mode);
 machine_mode get_subpart_mode (machine_mode);
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 14eda581d00..8b5bb0097bc 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -759,28 +759,6 @@ emit_vlmax_fp_ternary_insn (unsigned icode, int op_num, 
rtx *ops, rtx vl)
   e.emit_insn ((enum insn_code) icode, ops);
 }
 
-/* This function emits a {NONVLMAX, TAIL_UNDISTURBED, MASK_ANY} vsetvli 
followed
- * by the ternary operation which always has a real merge operand.  */
-static void
-emit_nonvlmax_fp_ternary_tu_insn (unsigned icode, int op_num, rtx *ops, rtx vl)
-{
-  machine_mode dest_mode = GET_MODE (ops[0]);
-  machine_mode mask_mode = get_mask_mode (dest_mode);
-  insn_expander e (/*OP_NUM*/ op_num,
- /*HAS_DEST_P*/ true,
- /*FULLY_UNMASKED_P*/ false,
- /*USE_REAL_MERGE_P*/ true,
- /*HAS_AVL_P*/ true,
- /*VLMAX_P*/ false,
- /*DEST_MODE*/ dest_mode,
- /*MASK_MODE*/ mask_mode);
-  e.set_policy (TAIL_UNDISTURBED);
-  e.set_policy (MASK_ANY);
-  e.set_rounding_mode (FRM_DYN);
-  e.set_vl (vl);
-  e.emit_insn ((enum insn_code) icode, ops);
-}
-
 /* This function emits a {NONVLMAX, TAIL_ANY, MASK_ANY} vsetvli followed by the
  * actual operation.  */
 void
@@ -1165,8 +1143,8 @@

Re: [PATCH 2/3] MATCH: `a | C -> C` when we know that `a & ~C == 0`

2023-08-25 Thread Andrew Pinski via Gcc-patches

On Thu, Aug 24, 2023 at 11:37 PM Richard Biener via Gcc-patches
 wrote:
>
> On Thu, Aug 24, 2023 at 9:16 PM Andrew Pinski via Gcc-patches
>  wrote:
> >
> > Even though this is handled by other code inside both VRP and CCP,
> > sometimes we want to optimize this outside of VRP and CCP.
> > An example is given in PR 106677 where phiopt will happen
> > after VRP (which removes a cast for a comparison) and then
> > phiopt will optimize the phi to be `a | 1` which can then
> > be optimized to `1` due to this patch.
>
> Also works for xor, no?

No, because IOR is a saturation operation while XOR is not. So if you
know that x and C are full subsets (nonzero(x) & ~nonzero(C) == 0)
then A^C is not the constant C but rather just (~A) which we already
a pattern for that to turn it back in to A^C:
/* Simplify (~X & Y) to X ^ Y if we know that (X & ~Y) is 0.  */

The only thing you can do for XOR is that if you have `A ^ B` and A
and B are known not to share any bits in common (that is nonzero(A) &
nonzero(B) == 0), you can convert it to `A | B` (that is what
simplify-rtx.cc does). Which looks like we don't do on the gimple
level.

>
> > OK? Bootstrapped and tested on x86_64-linux-gnu with no regressions.
>
> OK with or without adding XOR.

Ok.

Thanks,
Andrew

>
> Richard.
>
> > Note Similar code already exists in simplify_rtx for the RTL level;
> > it was moved from combine to simplify_rtx in r0-72539-gbd1ef757767f6d.
> > gcc/ChangeLog:
> >
> > * match.pd (`a | C -> C`): New pattern.
> > ---
> >  gcc/match.pd | 6 ++
> >  1 file changed, 6 insertions(+)
> >
> > diff --git a/gcc/match.pd b/gcc/match.pd
> > index c87a0795667..3bbeceb37b4 100644
> > --- a/gcc/match.pd
> > +++ b/gcc/match.pd
> > @@ -1456,6 +1456,12 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> >   (if (INTEGRAL_TYPE_P (TREE_TYPE (@0))
> >&& wi::bit_and_not (get_nonzero_bits (@0), wi::to_wide (@1)) == 0)
> >@0))
> > +/* x | C -> C if we know that x & ~C == 0.  */
> > +(simplify
> > + (bit_ior SSA_NAME@0 INTEGER_CST@1)
> > + (if (INTEGRAL_TYPE_P (TREE_TYPE (@0))
> > +  && wi::bit_and_not (get_nonzero_bits (@0), wi::to_wide (@1)) == 0)
> > +  @1))
> >  #endif
> >
> >  /* ~(~X - Y) -> X + Y and ~(~X + Y) -> X - Y.  */
> > --
> > 2.31.1
> >

Re: [PATCH V2] RISC-V: Add conditional autovec convert(INT<->INT) patterns

2023-08-25 Thread Lehua Ding


Hi Robin,

There is one issue with this patch and the next few patches, the mask 
policy for the generated vsetvl should be mu, which is currently done 
wrong. I'm going to clear the expand_cond_len* functions before sending 
the next version of these patches. Then these patches can directly use 
expand_cond_len_unary instead of emit_vlmax_*.


One thing maybe for the next patches: It seems to me that we lump all of 
the COND_... tests into the cond subdirectory when IMHO they would also 
fit into the respective directories of their operations (binop, unop 
etc). Right now we will have a lot of rather unrelated tests (or just 
related by their use of COND_) in one dir. What do you think?


I have no idea about it, I will add some more testcases for cond 
widen-binop testcases and it looks like there are too many files in the 
cond directory. Maybe it's better to subdivide unop, binop and ternop 
directories in the cond directory since all these testcases are designed 
to test the conditional patterns.


--
Best,
Lehua

MIPS: the method of getting GOT address for PIC code

2023-08-25 Thread YunQiang Su via Gcc-patches

When working on LLVM, I found this problem
https://github.com/llvm/llvm-project/issues/64974.
Maybe it's time for us to reconsider the way of getting GOT address
for PIC code.

1.Background[1]:
All of the accessing of global variables and normal function calls
needs help from GOT.
So normally, the first 3 instructions of a function are to compute the
address of GOT.
Normally like:
lui $gp,%hi(_gp_disp)
addiu   $gp,$gp,%lo(_gp_disp)
addu$gp,$gp,$t9
These 3 instructions load the value of _gp_disp, which is a link-time
constant value,
and add them with $t9, which normally contains the address of the
current function.

So, why $t9? The reason is that the pre-R6 MIPS lacks instructions to
get the PC value.
Thus, the ABI defines that, the caller should call functions with
jr/jarl $t9. So the callee
can get the address of itself by $t9.

2. What's my proposal?
2.1 For MIPSr6, its has instructions to get the current PC value, so
we can use them,
which can gain performance improvement.

2.2 For pre-R6, in fact, it can get the value of PC with NAL
instruction [2], while the performance will be some worse. For the
worst case, the decreasing of performance will be 20%-40%
My plan for pre-R6, is to add a non-default option to use NAL to get
the address of GOT.
Some software, like Linux kernel (for Kalsr support) will need it.

Any suggestions?

[1] https://refspecs.linuxfoundation.org/elf/mipsabi.pdf
[2] 
https://s3-eu-west-1.amazonaws.com/downloads-mips/documents/MD00087-2B-MIPS64BIS-AFP-6.06.pdf
Page 404


-- 
YunQiang Su

Re: [PATCH V2] RISC-V: Add conditional autovec convert(INT<->INT) patterns

2023-08-25 Thread Robin Dapp via Gcc-patches

Hi Lehua,

thanks, LGTM.

One thing maybe for the next patches:  It seems to me that we lump all of
the COND_... tests into the cond subdirectory when IMHO they would also
fit into the respective directories of their operations (binop, unop etc).
Right now we will have a lot of rather unrelated tests (or just related
by their use of COND_) in one dir.  What do you think?  

Regards
 Robin

Re: [PATCH 3/3] PHIOPT: Allow BIT_AND and BIT_IOR in early phiopt

2023-08-25 Thread Richard Biener via Gcc-patches

On Thu, Aug 24, 2023 at 9:16 PM Andrew Pinski via Gcc-patches
 wrote:
>
> Now that MIN/MAX can sometimes be transformed into BIT_AND/BIT_IOR,
> we should allow BIT_AND and BIT_IOR in the early phiopt.
> Also we produce BIT_AND/BIT_IOR for things like `bool0 ? bool1 : 0`
> which seems like a good thing to allow early on too.

Hum.

I think if we allow AND/IOR we should also allow XOR and NOT.

Can you add dumping for replacements we disallow?  I'm esp. curious
for those otherwise being "singleton".  I know when doing early phiopt
I wanted to be very conservative (also to reduce testsuite fallout), and
I was mostly interested in MIN/MAX which I then extended to similar
things like ABS.  But maybe we can revisit this if we understand which
cases we definitely do not want to do early?

> OK? Bootstrapped and tested on x86_64-linux-gnu with no regressions.
>
> gcc/ChangeLog:
>
> * tree-ssa-phiopt.cc (phiopt_early_allow): Allow
> BIT_AND_EXPR and BIT_IOR_EXPR.
> ---
>  gcc/tree-ssa-phiopt.cc | 3 +++
>  1 file changed, 3 insertions(+)
>
> diff --git a/gcc/tree-ssa-phiopt.cc b/gcc/tree-ssa-phiopt.cc
> index 54706f4c7e7..7e63fb115db 100644
> --- a/gcc/tree-ssa-phiopt.cc
> +++ b/gcc/tree-ssa-phiopt.cc
> @@ -469,6 +469,9 @@ phiopt_early_allow (gimple_seq , gimple_match_op )
>  {
>case MIN_EXPR:
>case MAX_EXPR:
> +  /* MIN/MAX could be convert into these. */
> +  case BIT_IOR_EXPR:
> +  case BIT_AND_EXPR:
>case ABS_EXPR:
>case ABSU_EXPR:
>case NEGATE_EXPR:
> --
> 2.31.1
>

[PATCH-2, rs6000] Implement 32bit inline lrint [PR88558]

2023-08-25 Thread HAO CHEN GUI via Gcc-patches

Hi,
  This patch implements 32bit inline lrint by "fctiw". It depends on
the patch1 to do SImode move from FP register on P7.

  Bootstrapped and tested on powerpc64-linux BE and LE with no regressions.

Thanks
Gui Haochen

ChangeLog
rs6000: support 32bit inline lrint

gcc/
PR target/88558
* config/rs6000/rs6000.md (lrintdi2): Remove TARGET_FPRND
from insn condition.
(lrintsi2): New insn pattern for 32bit lrint.

gcc/testsuite/
PR target/106769
* gcc.target/powerpc/pr88558.h: New.
* gcc.target/powerpc/pr88558-p7.c: New.
* gcc.target/powerpc/pr88558-p8v.c: New.

patch.diff
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index fd263e8dfe3..b36304de8c6 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -6655,10 +6655,18 @@ (define_insn "lrintdi2"
   [(set (match_operand:DI 0 "gpc_reg_operand" "=d")
(unspec:DI [(match_operand:SFDF 1 "gpc_reg_operand" "")]
   UNSPEC_FCTID))]
-  "TARGET_HARD_FLOAT && TARGET_FPRND"
+  "TARGET_HARD_FLOAT"
   "fctid %0,%1"
   [(set_attr "type" "fp")])

+(define_insn "lrintsi2"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=d")
+   (unspec:SI [(match_operand:SFDF 1 "gpc_reg_operand" "")]
+  UNSPEC_FCTIW))]
+  "TARGET_HARD_FLOAT && TARGET_POPCNTD"
+  "fctiw %0,%1"
+  [(set_attr "type" "fp")])
+
 (define_insn "btrunc2"
   [(set (match_operand:SFDF 0 "gpc_reg_operand" "=d,wa")
(unspec:SFDF [(match_operand:SFDF 1 "gpc_reg_operand" "d,wa")]
diff --git a/gcc/testsuite/gcc.target/powerpc/pr88558-p7.c 
b/gcc/testsuite/gcc.target/powerpc/pr88558-p7.c
new file mode 100644
index 000..6437c55fa61
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr88558-p7.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fno-math-errno -mdejagnu-cpu=power7" } */
+
+#include "pr88558.h"
+
+/* { dg-final { scan-assembler-times {\mfctid\M} 2 { target lp64 } } } */
+/* { dg-final { scan-assembler-times {\mfctid\M} 1 { target ilp32 } } } */
+/* { dg-final { scan-assembler-times {\mfctiw\M} 1 { target lp64 } } } */
+/* { dg-final { scan-assembler-times {\mfctiw\M} 2 { target ilp32 } } } */
+/* { dg-final { scan-assembler-times {\mstfiwx\M} 1 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/pr88558-p8v.c 
b/gcc/testsuite/gcc.target/powerpc/pr88558-p8v.c
new file mode 100644
index 000..fd22123ffb6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr88558-p8v.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-options "-O2 -fno-math-errno -mdejagnu-cpu=power8" } */
+
+long int foo (double a)
+{
+  return __builtin_lrint (a);
+}
+
+long long bar (double a)
+{
+  return __builtin_llrint (a);
+}
+
+int baz (double a)
+{
+  return __builtin_irint (a);
+}
+
+/* { dg-final { scan-assembler-times {\mfctid\M} 2 { target lp64 } } } */
+/* { dg-final { scan-assembler-times {\mfctid\M} 1 { target ilp32 } } } */
+/* { dg-final { scan-assembler-times {\mfctiw\M} 1 { target lp64 } } } */
+/* { dg-final { scan-assembler-times {\mfctiw\M} 2 { target ilp32 } } } */
+/* { dg-final { scan-assembler-times {\mmfvsrwz\M} 1 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/pr88558.h 
b/gcc/testsuite/gcc.target/powerpc/pr88558.h
new file mode 100644
index 000..0cc0c68dd4e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr88558.h
@@ -0,0 +1,14 @@
+long int foo (double a)
+{
+  return __builtin_lrint (a);
+}
+
+long long bar (double a)
+{
+  return __builtin_llrint (a);
+}
+
+int baz (double a)
+{
+  return __builtin_irint (a);
+}

[PATCH-1, rs6000] Enable SImode in FP register on P7 [PR88558]

2023-08-25 Thread HAO CHEN GUI via Gcc-patches

Hi,
  This patch enables SImode in FP register on P7. Instruction "fctiw"
stores its integer output in an FP register. So SImode in FP register
needs be enabled on P7 if we want support "fctiw" on P7.

  The test case is in the second patch which implements 32bit inline
lrint.

  Bootstrapped and tested on powerpc64-linux BE and LE with no regressions.

Thanks
Gui Haochen

ChangeLog
rs6000: enable SImode in FP register on P7

gcc/
PR target/88558
* config/rs6000/rs6000.cc (rs6000_hard_regno_mode_ok_uncached):
Enable Simode in FP register for P7.
* config/rs6000/rs6000.md (*movsi_internal1): Add fmr for SImode
move between FP register.  Set attribute isa of stfiwx to "*"
and attribute of stxsiwx to "p7".

patch.diff
diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index 44b448d2ba6..99085c2cdd7 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -1903,7 +1903,7 @@ rs6000_hard_regno_mode_ok_uncached (int regno, 
machine_mode mode)
  if(GET_MODE_SIZE (mode) == UNITS_PER_FP_WORD)
return 1;

- if (TARGET_P8_VECTOR && (mode == SImode))
+ if (TARGET_POPCNTD && mode == SImode)
return 1;

  if (TARGET_P9_VECTOR && (mode == QImode || mode == HImode))
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index cdab49fbb91..ac5d29a2cf8 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -7566,7 +7566,7 @@ (define_split

 (define_insn "*movsi_internal1"
   [(set (match_operand:SI 0 "nonimmediate_operand"
- "=r, r,
+ "=r, r,  ^d,
   r,  d,  v,
   m,  ?Z, ?Z,
   r,  r,  r,  r,
@@ -7575,7 +7575,7 @@ (define_insn "*movsi_internal1"
   wa, r,
   r,  *h, *h")
(match_operand:SI 1 "input_operand"
- "r,  U,
+ "r,  U,  ^d,
   m,  ?Z, ?Z,
   r,  d,  v,
   I,  L,  eI, n,
@@ -7588,6 +7588,7 @@ (define_insn "*movsi_internal1"
   "@
mr %0,%1
la %0,%a1
+   fmr %0,%1
lwz%U1%X1 %0,%1
lfiwzx %0,%y1
lxsiwzx %x0,%y1
@@ -7611,7 +7612,7 @@ (define_insn "*movsi_internal1"
mt%0 %1
nop"
   [(set_attr "type"
- "*,  *,
+ "*,  *,  fpsimple,
   load,   fpload, fpload,
   store,  fpstore,fpstore,
   *,  *,  *,  *,
@@ -7620,7 +7621,7 @@ (define_insn "*movsi_internal1"
   mtvsr,  mfvsr,
   *,  *,  *")
(set_attr "length"
- "*,  *,
+ "*,  *,  *,
   *,  *,  *,
   *,  *,  *,
   *,  *,  *,  8,
@@ -7629,9 +7630,9 @@ (define_insn "*movsi_internal1"
   *,  *,
   *,  *,  *")
(set_attr "isa"
- "*,  *,
-  *,  p8v,p8v,
-  *,  p8v,p8v,
+ "*,  *,  *,
+  *,  p7, p8v,
+  *,  *,  p8v,
   *,  *,  p10,*,
   p8v,p9v,p9v,p8v,
   p9v,p8v,p9v,

Re: [PATCH 1/3] MATCH: Move `a ? one_zero : one_zero` matching after min/max matching

2023-08-25 Thread Richard Biener via Gcc-patches

On Thu, Aug 24, 2023 at 9:16 PM Andrew Pinski via Gcc-patches
 wrote:
>
> In PR 106677, I noticed that on the trunk we were producing:
> ```
>   _25 = SR.116_117 == 0;
>   _27 = (unsigned char) _25;
>   _32 = _27 | SR.116_117;
> ```
> From `SR.115_117 != 0 ? SR.115_117 : 1`
> Rather than:
> ```
>   _119 = MAX_EXPR <1, SR.115_117>;
> ```
> Or (rather)
> ```
>   _119 = SR.115_117 | 1;
> ```
> Due to the order of the patterns.

Hmm, that means the former when present in source isn't optimized?

> OK? Bootstrapped and tested on x86_64-linux-gnu with no
> regressions.

OK, but please add a comment indicating the ordering requirement.

Can you also add a testcase?

Richard.

> gcc/ChangeLog:
>
> * match.pd (`a ? one_zero : one_zero`): Move
> below detection of minmax.
> ---
>  gcc/match.pd | 38 --
>  1 file changed, 20 insertions(+), 18 deletions(-)
>
> diff --git a/gcc/match.pd b/gcc/match.pd
> index 890f050cbad..c87a0795667 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -4950,24 +4950,6 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>   )
>  )
>
> -(simplify
> - (cond @0 zero_one_valued_p@1 zero_one_valued_p@2)
> - (switch
> -  /* bool0 ? bool1 : 0 -> bool0 & bool1 */
> -  (if (integer_zerop (@2))
> -   (bit_and (convert @0) @1))
> -  /* bool0 ? 0 : bool2 -> (bool0^1) & bool2 */
> -  (if (integer_zerop (@1))
> -   (bit_and (bit_xor (convert @0) { build_one_cst (type); } ) @2))
> -  /* bool0 ? 1 : bool2 -> bool0 | bool2 */
> -  (if (integer_onep (@1))
> -   (bit_ior (convert @0) @2))
> -  /* bool0 ? bool1 : 1 -> (bool0^1) | bool1 */
> -  (if (integer_onep (@2))
> -   (bit_ior (bit_xor (convert @0) @2) @1))
> - )
> -)
> -
>  /* Optimize
> # x_5 in range [cst1, cst2] where cst2 = cst1 + 1
> x_5 ? cstN ? cst4 : cst3
> @@ -5298,6 +5280,26 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>&& integer_nonzerop (fold_build2 (GE_EXPR, boolean_type_node, @3, 
> @1)))
>(max @2 @4))
>
> +#if GIMPLE
> +(simplify
> + (cond @0 zero_one_valued_p@1 zero_one_valued_p@2)
> + (switch
> +  /* bool0 ? bool1 : 0 -> bool0 & bool1 */
> +  (if (integer_zerop (@2))
> +   (bit_and (convert @0) @1))
> +  /* bool0 ? 0 : bool2 -> (bool0^1) & bool2 */
> +  (if (integer_zerop (@1))
> +   (bit_and (bit_xor (convert @0) { build_one_cst (type); } ) @2))
> +  /* bool0 ? 1 : bool2 -> bool0 | bool2 */
> +  (if (integer_onep (@1))
> +   (bit_ior (convert @0) @2))
> +  /* bool0 ? bool1 : 1 -> (bool0^1) | bool1 */
> +  (if (integer_onep (@2))
> +   (bit_ior (bit_xor (convert @0) @2) @1))
> + )
> +)
> +#endif
> +
>  /* X != C1 ? -X : C2 simplifies to -X when -C1 == C2.  */
>  (simplify
>   (cond (ne @0 INTEGER_CST@1) (negate@3 @0) INTEGER_CST@2)
> --
> 2.31.1
>

Re: [PATCH 2/3] MATCH: `a | C -> C` when we know that `a & ~C == 0`

2023-08-25 Thread Richard Biener via Gcc-patches

On Thu, Aug 24, 2023 at 9:16 PM Andrew Pinski via Gcc-patches
 wrote:
>
> Even though this is handled by other code inside both VRP and CCP,
> sometimes we want to optimize this outside of VRP and CCP.
> An example is given in PR 106677 where phiopt will happen
> after VRP (which removes a cast for a comparison) and then
> phiopt will optimize the phi to be `a | 1` which can then
> be optimized to `1` due to this patch.

Also works for xor, no?

> OK? Bootstrapped and tested on x86_64-linux-gnu with no regressions.

OK with or without adding XOR.

Richard.

> Note Similar code already exists in simplify_rtx for the RTL level;
> it was moved from combine to simplify_rtx in r0-72539-gbd1ef757767f6d.
> gcc/ChangeLog:
>
> * match.pd (`a | C -> C`): New pattern.
> ---
>  gcc/match.pd | 6 ++
>  1 file changed, 6 insertions(+)
>
> diff --git a/gcc/match.pd b/gcc/match.pd
> index c87a0795667..3bbeceb37b4 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -1456,6 +1456,12 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>   (if (INTEGRAL_TYPE_P (TREE_TYPE (@0))
>&& wi::bit_and_not (get_nonzero_bits (@0), wi::to_wide (@1)) == 0)
>@0))
> +/* x | C -> C if we know that x & ~C == 0.  */
> +(simplify
> + (bit_ior SSA_NAME@0 INTEGER_CST@1)
> + (if (INTEGRAL_TYPE_P (TREE_TYPE (@0))
> +  && wi::bit_and_not (get_nonzero_bits (@0), wi::to_wide (@1)) == 0)
> +  @1))
>  #endif
>
>  /* ~(~X - Y) -> X + Y and ~(~X + Y) -> X - Y.  */
> --
> 2.31.1
>

Re: [PATCH v1] LoongArch: Remove the symbolic extension instruction due to the SLT directive.

2023-08-25 Thread chenglulu




在 2023/8/25 下午12:16, WANG Xuerui 写道:

On 8/25/23 12:01, Lulu Cheng wrote:
Since the slt instruction does not distinguish between 32-bit and 
64-bit operations
under the LoongArch 64-bit architecture, if the operands of slt are 
of SImode, symbol

expansion is required before operation.
Hint:“符号扩展” is "sign extension" (as noun) or "sign-extend" (as verb), 
not "symbol expansion".


But similar to the following test case, symbol expansion can be omitted:

extern int src1, src2, src3;

int
test (void)
{
  int data1 = src1 + src2;
  int data2 = src1 + src3;
  return test1 > test2 ? test1 : test2;
}
Assembly code before optimization:
  ...
add.w    $r4,$r4,$r14
add.w    $r13,$r13,$r14
slli.w    $r12,$r4,0
slli.w    $r14,$r13,0
slt    $r12,$r12,$r14
masknez    $r4,$r4,$r12
maskeqz    $r12,$r13,$r12
or    $r4,$r4,$r12
slli.w    $r4,$r4,0
...

After optimization:
...
add.w    $r12,$r12,$r14
add.w    $r13,$r13,$r14
slt    $r4,$r12,$r13
masknez    $r12,$r12,$r4
maskeqz    $r4,$r13,$r4
or    $r4,$r12,$r4
...

Similar to this test example, the two operands of SLT are obtained by 
the

addition operation, and the addition operation "add.w" is an implicit
symbolic extension function, so the two operands of SLT do not require


more naturally: "and add.w implicitly sign-extends" -- brevity are 
often desired and clearer ;-)


Sorry I'll revise it soon!

Thanks!:-)




symbolic expansion.

gcc/ChangeLog:

* config/loongarch/loongarch.cc (loongarch_expand_conditional_move):
Optimize the function implementation.

gcc/testsuite/ChangeLog:

* gcc.target/loongarch/slt-sign-extend.c: New test.
---
  gcc/config/loongarch/loongarch.cc | 53 +--
  .../gcc.target/loongarch/slt-sign-extend.c    | 14 +
  2 files changed, 63 insertions(+), 4 deletions(-)
  create mode 100644 
gcc/testsuite/gcc.target/loongarch/slt-sign-extend.c


diff --git a/gcc/config/loongarch/loongarch.cc 
b/gcc/config/loongarch/loongarch.cc

index 86d58784113..1905599b9e8 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -4384,14 +4384,30 @@ loongarch_expand_conditional_move (rtx 
*operands)

    enum rtx_code code = GET_CODE (operands[1]);
    rtx op0 = XEXP (operands[1], 0);
    rtx op1 = XEXP (operands[1], 1);
+  rtx op0_extend = op0;
+  rtx op1_extend = op1;
+
+  /* Record whether operands[2] and operands[3] modes are promoted 
to word_mode.  */

+  bool promote_p = false;
+  machine_mode mode = GET_MODE (operands[0]);
      if (FLOAT_MODE_P (GET_MODE (op1)))
  loongarch_emit_float_compare (, , );
    else
  {
+  if ((REGNO (op0) == REGNO (operands[2])
+   || (REGNO (op1) == REGNO (operands[3]) && (op1 != const0_rtx)))
+  && (GET_MODE_SIZE (GET_MODE (op0)) < word_mode))
+    {
+  mode = word_mode;
+  promote_p = true;
+    }
+
    loongarch_extend_comparands (code, , );
      op0 = force_reg (word_mode, op0);
+  op0_extend = op0;
+  op1_extend = force_reg (word_mode, op1);
      if (code == EQ || code == NE)
  {
@@ -4418,23 +4434,52 @@ loongarch_expand_conditional_move (rtx 
*operands)

    && register_operand (operands[2], VOIDmode)
    && register_operand (operands[3], VOIDmode))
  {
-  machine_mode mode = GET_MODE (operands[0]);
+  rtx op2 = operands[2];
+  rtx op3 = operands[3];
+
+  if (promote_p)
+    {
+  if (REGNO (XEXP (operands[1], 0)) == REGNO (operands[2]))
+    op2 = op0_extend;
+  else
+    {
+  loongarch_extend_comparands (code, , _rtx);
+  op2 = force_reg (mode, op2);
+    }
+
+  if (REGNO (XEXP (operands[1], 1)) == REGNO (operands[3]))
+    op3 = op1_extend;
+  else
+    {
+  loongarch_extend_comparands (code, , _rtx);
+  op3 = force_reg (mode, op3);
+    }
+    }
+
    rtx temp = gen_reg_rtx (mode);
    rtx temp2 = gen_reg_rtx (mode);
      emit_insn (gen_rtx_SET (temp,
    gen_rtx_IF_THEN_ELSE (mode, cond,
-    operands[2], const0_rtx)));
+    op2, const0_rtx)));
      /* Flip the test for the second operand.  */
    cond = gen_rtx_fmt_ee ((code == EQ) ? NE : EQ, GET_MODE 
(op0), op0, op1);

      emit_insn (gen_rtx_SET (temp2,
    gen_rtx_IF_THEN_ELSE (mode, cond,
-    operands[3], const0_rtx)));
+    op3, const0_rtx)));
      /* Merge the two results, at least one is guaranteed to be 
zero.  */
-  emit_insn (gen_rtx_SET (operands[0], gen_rtx_IOR (mode, temp, 
temp2)));

+  if (promote_p)
+    {
+  rtx temp3 = gen_reg_rtx (mode);
+  emit_insn (gen_rtx_SET (temp3, gen_rtx_IOR (mode, temp, temp2)));
+  temp3 = gen_lowpart (GET_MODE (operands[0]), temp3);
+  loongarch_emit_move (operands[0], temp3);
+    }
+  else
+

71 matches

Mail list logo