date:20240216

[patch, libgfortran] Bug 105473 - semicolon allowed when list-directed read integer with decimal='point'

2024-02-16 Thread Jerry D


Hello,

I posted the attached patch in bugzilla some time ago. This includes a 
new test case. The patch adds additional checks in key places to catch 
eroneous use of semicolons


Regression tested on x86_64,

OK for trunk and later backport to 13?

Jerrydiff --git a/gcc/testsuite/gfortran.dg/pr105473.f90 b/gcc/testsuite/gfortran.dg/pr105473.f90
new file mode 100644
index 000..b309217540d
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/pr105473.f90
@@ -0,0 +1,46 @@
+! { dg-do run }
+! PR libgfortran/105473
+  implicit none
+  integer n,m,ios
+  real r
+  complex z
+  character(40):: testinput
+  n = 999; m = 777; r=1.2345
+  z = cmplx(0.0,0.0)
+
+! Check that semi-colon is not allowed as separator with decimal=point.
+  ios=0
+  testinput = '1;17;3.14159'
+  read(testinput,*,decimal='point',iostat=ios) n, m, r
+  if (ios /= 5010) print *, "stop 1"
+
+! Check that comma is not allowed as a separator with decimal=comma.
+  ios=0
+  testinput = '1,17,3,14159'
+  read(testinput,*,decimal='comma',iostat=ios) n, m, r
+  if (ios /= 5010) print *, "stop 2"
+
+! Check a good read.
+  ios=99
+  testinput = '1;17;3,14159'
+  read(testinput,*,decimal='comma',iostat=ios) n, m, r
+  if (ios /= 0) print *, "stop 3"
+
+! Check that comma is not allowed as a separator with decimal=comma.
+  ios=99; z = cmplx(0.0,0.0)
+  testinput = '1,17, (3,14159, 1,7182)'
+  read(testinput,*,decimal='comma', iostat=ios) n, m, z
+  if (ios /= 5010) stop 4
+
+! Check that semi-colon is not allowed as separator with decimal=point.
+  ios=99; z = cmplx(0.0,0.0)
+  testinput = '1,17; (3.14159; 1.7182)'
+  read(testinput,*,decimal='point', iostat=ios) n, m, z
+  if (ios /= 5010) stop 5
+
+! Check a good read.
+  ios=99;z = cmplx(0.0,0.0)
+  testinput = '1;17; (3,14159; 1,7182)'
+  read(testinput,*,decimal='comma', iostat=ios) n, m, z
+  if (ios /= 0) stop 6
+end program
diff --git a/libgfortran/io/list_read.c b/libgfortran/io/list_read.c
index 0b7884fdda7..d2316ad6fe2 100644
--- a/libgfortran/io/list_read.c
+++ b/libgfortran/io/list_read.c
@@ -53,7 +53,6 @@ typedef unsigned char uchar;
 #define CASE_SEPARATORS /* Fall through. */ \
 			case ' ': case ',': case '/': case '\n': \
 			case '\t': case '\r': case ';'
-
 /* This macro assumes that we're operating on a variable.  */
 
 #define is_separator(c) (c == '/' ||  c == ',' || c == '\n' || c == ' ' \
@@ -475,11 +474,23 @@ eat_separator (st_parameter_dt *dtp)
 case ',':
   if (dtp->u.p.current_unit->decimal_status == DECIMAL_COMMA)
 	{
+	  generate_error (>common, LIBERROR_READ_VALUE,
+	   "Comma not allowed as separator with DECIMAL='comma'");
 	  unget_char (dtp, c);
 	  break;
 	}
-  /* Fall through.  */
+  dtp->u.p.comma_flag = 1;
+  eat_spaces (dtp);
+  break;
+
 case ';':
+  if (dtp->u.p.current_unit->decimal_status == DECIMAL_POINT)
+	{
+	  generate_error (>common, LIBERROR_READ_VALUE,
+	   "Semicolon not allowed as separator with DECIMAL='point'");
+	  unget_char (dtp, c);
+	  break;
+	}
   dtp->u.p.comma_flag = 1;
   eat_spaces (dtp);
   break;
@@ -1326,8 +1337,13 @@ parse_real (st_parameter_dt *dtp, void *buffer, int length)
 {
   if ((c = next_char (dtp)) == EOF)
 	goto bad;
-  if (c == ',' && dtp->u.p.current_unit->decimal_status == DECIMAL_COMMA)
-	c = '.';
+  if (dtp->u.p.current_unit->decimal_status == DECIMAL_COMMA)
+	{
+	  if (c == '.')
+	goto bad;
+	  if (c == ',')
+	c = '.';
+	}
   switch (c)
 	{
 	CASE_DIGITS:
@@ -1636,8 +1652,18 @@ read_real (st_parameter_dt *dtp, void *dest, int length)
   seen_dp = 0;
 
   c = next_char (dtp);
-  if (c == ',' && dtp->u.p.current_unit->decimal_status == DECIMAL_COMMA)
-c = '.';
+  if (dtp->u.p.current_unit->decimal_status == DECIMAL_COMMA)
+{
+  if (c == '.')
+	goto bad_real;
+  if (c == ',')
+	c = '.';
+}
+  if (dtp->u.p.current_unit->decimal_status == DECIMAL_POINT)
+{
+  if (c == ';')
+	goto bad_real;
+}
   switch (c)
 {
 CASE_DIGITS:
@@ -1677,8 +1703,13 @@ read_real (st_parameter_dt *dtp, void *dest, int length)
   for (;;)
 {
   c = next_char (dtp);
-  if (c == ',' && dtp->u.p.current_unit->decimal_status == DECIMAL_COMMA)
-	c = '.';
+  if (dtp->u.p.current_unit->decimal_status == DECIMAL_COMMA)
+	{
+	  if (c == '.')
+	goto bad_real;
+	  if (c == ',')
+	c = '.';
+	}
   switch (c)
 	{
 	CASE_DIGITS:
@@ -1718,7 +1749,7 @@ read_real (st_parameter_dt *dtp, void *dest, int length)
 
 	CASE_SEPARATORS:
 	case EOF:
-  if (c != '\n' && c != ',' && c != '\r' && c != ';')
+	  if (c != '\n' && c != ',' && c != ';' && c != '\r')
 	unget_char (dtp, c);
 	  goto done;
 
diff --git a/libgfortran/io/read.c b/libgfortran/io/read.c
index e2d2f8be806..7a9e341d7d8 100644
--- a/libgfortran/io/read.c
+++ b/libgfortran/io/read.c
@@ -1062,8 +1062,17 @@ read_f (st_parameter_dt *dtp, const fnode *f, char *dest, int length)
 	case ',':
 	  if (dtp->u.p.current_unit->decimal_status != DECIMAL_COMMA)

[patch, libgfortran] PR107068 Run-time error when reading logical arrays with a namelist

2024-02-16 Thread Jerry D


The attached patch fixes this one. Se the ChangeLog below for explanation.

OK for trunk?

I think simple enough to backport to 13 as well.

Regards,

Jerry

Author: Jerry DeLisle 
Date:   Fri Feb 16 17:06:37 2024 -0800

libgfortran: Fix namelist read.

PR libgfortran/107068

libgfortran/ChangeLog:

* io/list_read.c (read_logical): When looking for a possible
variable name, check for left paren, indicating a possible
array reference.

gcc/testsuite/ChangeLog:

* gfortran.dg/pr107068.f90: New test.
diff --git a/gcc/testsuite/gfortran.dg/pr107068.f90 b/gcc/testsuite/gfortran.dg/pr107068.f90
new file mode 100644
index 000..c5ea0c1d244
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/pr107068.f90
@@ -0,0 +1,22 @@
+! { dg-do run }
+program test
+  implicit none
+  integer :: error
+  logical, dimension(3,3) :: flc,flp
+  namelist/inputdata/flc, flp
+
+  flc = .false.
+  flp = .false.
+
+  open(10, file="inputfile")
+  write(10,*) ""
+  write(10,*) " FLC = T, "
+  write(10,*) " FLP(1,2) = T,"
+  write(10,*) "/"
+  rewind(10)
+  !write(*, nml=inputdata)
+  !open(10,file="inputfile")
+  read(10,inputdata,iostat=error)
+  close(10, status='delete')
+  if (error /= 0) stop 20
+end program test
diff --git a/libgfortran/io/list_read.c b/libgfortran/io/list_read.c
index f8ca64422de..0b7884fdda7 100644
--- a/libgfortran/io/list_read.c
+++ b/libgfortran/io/list_read.c
@@ -888,6 +888,14 @@ read_logical (st_parameter_dt *dtp, int length)
   for(i = 0; i < 63; i++)
 {
   c = next_char (dtp);
+  if (c == '(')
+	{
+	  l_push_char (dtp, c);
+	  dtp->u.p.nml_read_error = 1;
+	  dtp->u.p.line_buffer_enabled = 1;
+	  dtp->u.p.line_buffer_pos = 0;
+	  return;
+	}
   if (is_separator(c))
 	{
 	  /* All done if this is not a namelist read.  */

Re: [PATCH] Turn on LRA on all targets

2024-02-16 Thread Maciej W. Rozycki

On Fri, 16 Feb 2024, Maciej W. Rozycki wrote:

> On Fri, 16 Feb 2024, Jakub Jelinek wrote:
> 
> > >  There is no function prologue to optimise in the VAX case, because all 
> > > the frame setup has already been made by the CALLS instruction itself in 
> > > the caller.  The first machine instruction of the callee is technically 
> > > already past the "prologue".  And then RET serves as the whole function 
> > > "epilogue".
> > 
> > So, what is the problem with DWARF unwinding?  Just make sure to emit
> > appropriate instructions describing the saving of the corresponding
> > registers at specific points based on CFA at the start of the function
> > (so that it appears in CIE instructions) and that should be all that is
> > needed, no?
> 
>  I may not remember all the issues correctly offhand as it's been a while 
> since I looked into it, but as I recall DWARF handling code has not been 
> prepared for all the frame to have been already allocated and initialised 
> at a function's entry point, and also at least DWARF-4 is IIRC required to 
> have statics at offsets positive from FP (for a stack growing downwards).

 There is a further complication actually where lazy binding is in use.  
In that case a function that has been jumped to indirectly from the lazy 
resolver will often have a different number of statics saved in the frame 
from where the function has been called directly via a fully resolved PLT 
GOT entry.

 This is because at the time the lazy resolver is being called it is not 
known what statics the ultimate callee wants to save, as it is not a part 
of the ABI.  Therefore the worst condition is assumed and the resolver 
requests all the statics (R6-R11) to be saved, observing that saving more 
statics than required makes no change to code semantics, it just hurts 
performance (but calls to the lazy resolver are rare, so this is not a big 
deal).  Conversely when the function has been already resolved, the PLT 
GOT entry points at the callee instead, which will then only save the 
statics it has requested itself, knowing them to be used.

 Obviously a frame that has all the statics saved will have a different 
size of its variable part and slots will have been assigned differently 
there from the case where only some statics have been saved.  Of course it 
does not matter for regular code execution as RET will always correctly 
interpret a stack frame and restore exactly these statics that have been 
saved in the frame, but for unwinding actual frame contents have to be 
interpreted.

 I am not sure if this run-time dependent frame layout can be described in 
DWARF terms even, so I am leaning towards concluding a native unwinder is 
the only feasible way to go.

 For those who are unaware how information as to what statics are to be 
saved is made available by functions with VAX hardware: it is embedded at 
the function's address in a form of a 16-bit data quantity, which is a 
register save bitmask (an entry mask in VAX-speak) for registers R0-R11;
1 in the mask requests that the corresponding register be saved in the 
callee's frame by the CALLS instruction.  Once the frame has been built by 
CALLS, control is then passed to the location immediately following the 
bitmask, which is the function's actual entry point, i.e. the PC is set 
to the function's address + 2.

  Maciej

Re: [PATCH] Fortran: deferred length of character variables shall not get lost [PR113911]

2024-02-16 Thread Jerry D


On 2/16/24 1:40 PM, Harald Anlauf wrote:

Dear all,

this patch fixes a regression which was a side-effect of r14-8947,
losing the length of a deferred-length character variable when
passed as a dummy.

The new testcase provides a workout for deferred length to improve
coverage in the testsuite.  Another temporarily disabled test was
re-enabled.

Regtested on x86_64-pc-linux-gnu.  OK for mainline?

Thanks,
Harald



Yes OK for mainline.

Thanks,

Jerry

Re: [PATCH] libgcc: fix Win32 CV abnormal spurious wakeups in timed wait [PR113850]

2024-02-16 Thread Jonathan Yong


On 2/10/24 10:10, Matteo Italia wrote:

Il 09/02/24 15:18, Matteo Italia ha scritto:

The Win32 threading model uses __gthr_win32_abs_to_rel_time to convert
the timespec used in gthreads to specify the absolute time for end of
the condition variables timed wait to a milliseconds value relative to
"now" to pass to the Win32 SleepConditionVariableCS function.

Unfortunately, the conversion is incorrect, as, due to a typo, it
returns the relative time _in seconds_, so SleepConditionVariableCS
receives a timeout value 1000 times shorter than it should be, resulting
in a huge amount of spurious wakeups in calls such as
std::condition_variable::wait_for or wait_until.

Re-reading the commit message I found a few typos, and it was generally 
a bit more obscure than I like; reworded it now, hope it's better.


Thanks, pushed to master and 13.x branches.

ping: [PATCH] diagnostics: Fix behavior of permerror options after diagnostic pop [PR111918]

2024-02-16 Thread Lewis Hyatt

CCing some global reviewers as well, in case anyone has a minute to
take a look please? Thanks!
https://gcc.gnu.org/pipermail/gcc-patches/2023-November/638692.html

On Thu, Jan 25, 2024 at 4:57 PM Lewis Hyatt  wrote:
>
> May I please ask again about this one? It's just a couple lines, and I
> think it fixes an important gap in the logic for #pragma GCC
> diagnostic. The PR was not reported by me so I think at least one
> other person does care about it :). Thanks!
>
> https://gcc.gnu.org/pipermail/gcc-patches/2023-November/638692.html
>
> -Lewis
>
> On Mon, Jan 8, 2024 at 6:53 PM Lewis Hyatt  wrote:
> >
> > Can I please ping this one again? It's 3 lines or so to fix the PR. Thanks!
> > https://gcc.gnu.org/pipermail/gcc-patches/2023-November/638692.html
> >
> > On Tue, Dec 19, 2023 at 6:20 PM Lewis Hyatt  wrote:
> > >
> > > Hello-
> > >
> > > May I please ping this one? Thanks...
> > > https://gcc.gnu.org/pipermail/gcc-patches/2023-November/638692.html
> > >
> > > -Lewis
> > >
> > > On Wed, Nov 29, 2023 at 7:05 PM Lewis Hyatt  wrote:
> > > >
> > > > On Thu, Nov 09, 2023 at 04:16:10PM -0500, Lewis Hyatt wrote:
> > > > > https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111918
> > > > >
> > > > > This patch fixes the behavior of `#pragma GCC diagnostic pop' for 
> > > > > permissive
> > > > > error diagnostics such as -Wnarrowing (in C++11). Those currently do 
> > > > > not
> > > > > return to the correct state after the last pop; they become 
> > > > > effectively
> > > > > simple warnings instead. Bootstrap + regtest all languages on x86-64, 
> > > > > does
> > > > > it look OK please? Thanks!
> > > >
> > > > Hello-
> > > >
> > > > May I please ping this bug fix?
> > > > https://gcc.gnu.org/pipermail/gcc-patches/2023-November/635871.html
> > > >
> > > > Please note, it requires a trivial rebase on top of recent changes to
> > > > the class diagnostic_context public interface. I attached the rebased 
> > > > patch
> > > > here as well. Thanks!
> > > >
> > > > -Lewis

Re: [PATCH] RISC-V: Adjust vec unit-stride load/store costs.

2024-02-16 Thread 钟居哲

Can memrefs computed in analyze_loop_vinfo ?

juzhe.zh...@rivai.ai

From: Robin Dapp
Date: 2024-02-13 21:42
To: gcc-patches; palmer; Kito Cheng; jeffreyalaw; juzhe.zh...@rivai.ai
CC: rdapp.gcc
Subject: [PATCH] RISC-V: Adjust vec unit-stride load/store costs.
Hi,

scalar loads provide offset addressing while unit-stride vector
instructions cannot.  The offset must be loaded into a general-purpose
register before it can be used.  In order to account for this, this
patch adds an address arithmetic heuristic that keeps track of data
reference operands.  If we haven't seen the operand before we add the
cost of a scalar statement.

This helps to get rid of an lbm regression when vectorizing (roughly
0.5% fewer dynamic instructions).  gcc5 improves by 0.2% and deepsjeng
by 0.25%.  wrf and nab degrade by 0.1%.  This is because before we now
adjust the cost of SLP as well as loop-vectorized instructions whereas
we would only adjust loop-vectorized instructions before.
Considering higher scalar_to_vec costs (3 vs 1) for all vectorization
types causes some snippets not to get vectorized anymore.  Given these
costs the decisions look correct but appear worse when just counting
dynamic instructions.

In total SPECint 2017 has 4 bn dynamic instructions less and SPECfp 0.7
bn less so not a whole lot.

Regtested on riscv64.

Regards
Robin

gcc/ChangeLog:

* config/riscv/riscv-vector-costs.cc (adjust_stmt_cost): Move...
(costs::adjust_stmt_cost): ... to here and add vec_load/vec_store
offset handling.
(costs::add_stmt_cost): Also adjust cost for statements without
stmt_info.
* config/riscv/riscv-vector-costs.h: Define zero constant.

gcc/testsuite/ChangeLog:

* gcc.dg/vect/costmodel/riscv/rvv/vse-slp-1.c: New test.
* gcc.dg/vect/costmodel/riscv/rvv/vse-slp-2.c: New test.
---
gcc/config/riscv/riscv-vector-costs.cc| 86 ---
gcc/config/riscv/riscv-vector-costs.h | 10 +++
.../vect/costmodel/riscv/rvv/vse-slp-1.c  | 51 +++
.../vect/costmodel/riscv/rvv/vse-slp-2.c  | 53 
4 files changed, 190 insertions(+), 10 deletions(-)
create mode 100644 gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vse-slp-1.c
create mode 100644 gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vse-slp-2.c

diff --git a/gcc/config/riscv/riscv-vector-costs.cc 
b/gcc/config/riscv/riscv-vector-costs.cc
index 7c9840df4e9..adf9c197df5 100644
--- a/gcc/config/riscv/riscv-vector-costs.cc
+++ b/gcc/config/riscv/riscv-vector-costs.cc
@@ -42,6 +42,7 @@ along with GCC; see the file COPYING3.  If not see
#include "backend.h"
#include "tree-data-ref.h"
#include "tree-ssa-loop-niter.h"
+#include "tree-hash-traits.h"
/* This file should be included last.  */
#include "riscv-vector-costs.h"
@@ -1047,18 +1048,81 @@ costs::better_main_loop_than_p (const vector_costs 
*uncast_other) const
top of riscv_builtin_vectorization_cost handling which doesn't have any
information on statement operation codes etc.  */
-static unsigned
-adjust_stmt_cost (enum vect_cost_for_stmt kind, tree vectype, int stmt_cost)
+unsigned
+costs::adjust_stmt_cost (enum vect_cost_for_stmt kind, loop_vec_info loop,
+ stmt_vec_info stmt_info,
+ slp_tree, tree vectype, int stmt_cost)
{
   const cpu_vector_cost *costs = get_vector_costs ();
   switch (kind)
 {
 case scalar_to_vec:
-  return stmt_cost += (FLOAT_TYPE_P (vectype) ? costs->regmove->FR2VR
-   : costs->regmove->GR2VR);
+  stmt_cost += (FLOAT_TYPE_P (vectype) ? costs->regmove->FR2VR
+ : costs->regmove->GR2VR);
+  break;
 case vec_to_scalar:
-  return stmt_cost += (FLOAT_TYPE_P (vectype) ? costs->regmove->VR2FR
-   : costs->regmove->VR2GR);
+  stmt_cost += (FLOAT_TYPE_P (vectype) ? costs->regmove->VR2FR
+ : costs->regmove->VR2GR);
+  break;
+case vector_load:
+case vector_store:
+ {
+   /* Unit-stride vector loads and stores do not have offset addressing
+  as opposed to scalar loads and stores.
+  If the address depends on a variable we need an additional
+  add/sub for each load/store in the worst case.  */
+   if (stmt_info && stmt_info->stmt)
+ {
+   data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
+   class loop *father = stmt_info->stmt->bb->loop_father;
+   if (!loop && father && !father->inner && father->superloops)
+ {
+   tree ref;
+   if (TREE_CODE (dr->ref) != MEM_REF
+   || !(ref = TREE_OPERAND (dr->ref, 0))
+   || TREE_CODE (ref) != SSA_NAME)
+ break;
+
+   if (SSA_NAME_IS_DEFAULT_DEF (ref))
+ break;
+
+   if (memrefs.contains ({ref, cst0}))
+ break;
+
+   memrefs.add ({ref, cst0});
+
+   /* In case we have not seen REF before and the base address
+  is a pointer operation try a bit harder.  */
+   tree base = DR_BASE_ADDRESS (dr);
+   if (TREE_CODE (base) == POINTER_PLUS_EXPR
+   || TREE_CODE (base) == POINTER_DIFF_EXPR)
+ {
+   /* Deconstruct BASE's first operand.  If it is a binary
+ operation, i.e. a base and an "offset" store this
+

[Patch] OpenMP/C++: Fix (first)private clause with member variables [PR110347] [was: [RFA/RFC] C++/OpenMP: Supporting (first)private for member variables [PR110347] - or VALUE_EXPR and gimplify]

2024-02-16 Thread Tobias Burnus


Hi,

your suggestion almost did the trick, but caused regressions with
lambda closures in target regions.

Jakub Jelinek wrote:

Ah, and the reason why it doesn't work on target is that it has the
everything is mapped assumption:
   if ((ctx->region_type & ORT_TARGET) != 0)
 {
   if (ctx->region_type & ORT_ACC)
 /* For OpenACC, as remarked above, defer expansion.  */
 shared = false;
   else
 shared = true;
  
   ret = lang_hooks.decls.omp_disregard_value_expr (decl, shared);


Perhaps shared = true; should be shared = (flags & GOVD_MAPPED) != 0;
now that we have private/firstprivate clauses on target?


Hence, I now use this code, but also pass a flag to distinguish target
regions (→ map) from shared usage, assuming that it is needed for the
latter (otherwise, there wouldn't be that code).

The issue only showed up for a compile-only testcase, which I have now
turned into a run-time testcase.
In order to do so, I had to fix a bogus test for is mapped (or at least
I think it is bogus) - and for sure it didn't handle shared memory.

I also modified it such that it iterates over devices. Changes to the 
dump: the 'device' clause had to be added (3x) and for the long line: 
'this' and 'iptr' swapped the order and 'map(from:mapped)' became 
'firstprivate(mapped)' due to my changes.
I appended a patch which only shows the test-case differences as "git 
diff" contains all lines as I move it to libgomp/.


Comments, remarks, suggestions?

TobiasOpenMP/C++: Fix (first)private clause with member variables [PR110347]

OpenMP permits '(first)private' for C++ member variables, which GCC handles
by tagging those by DECL_OMP_PRIVATIZED_MEMBER, adding a temporary VAR_DECL
and DECL_VALUE_EXPR pointing to the 'this->member_var' in the C++ front end.

The idea is that in omp-low.cc, the DECL_VALUE_EXPR is used before the
region (for 'firstprivate'; ignored for 'private') while in the region,
the DECL itself is used.

In gimplify, the value expansion is suppressed and deferred if the
  lang_hooks.decls.omp_disregard_value_expr (decl, shared)
returns true - which is never the case if 'shared' is true. In OpenMP 4.5,
only 'map' and 'use_device_ptr' was permitted for the 'target' directive.
And when OpenMP 5.0's 'private'/'firstprivate' clauses was added, the
update that 'shared' is only true for 'map' was missed.

However, just enabling it for all '!shared' will cause issues with
Lambda closures ("__closure->this->...") for which also a DECL_VALUE_EXPR
exists but that is not related to DECL_OMP_PRIVATIZED_MEMBER. Solution:
Update the lang hook to take a Boolean argument, indicating whether it
is called for a target region or not.

2024-02-16  Tobias Burnus  
	Jakub Jelinek  

	PR c++/110347

gcc/cp/ChangeLog:

	* cp-gimplify.cc (cxx_omp_disregard_value_expr): Add new
	Boolean argument and use it.
	* cp-tree.h (cxx_omp_disregard_value_expr): Update prototype.

gcc/fortran/ChangeLog:

	* trans-openmp.cc (gfc_omp_disregard_value_expr): Add
	unused Boolean argument.
	* trans.h (gfc_omp_disregard_value_expr): Update
	prototype.

gcc/ChangeLog:

	* gimplify.cc (omp_notice_variable): Update call to
	lang_hooks.decls.omp_disregard_value_expr.
	(omp_notice_variable): Likewise; fix 'shared' arg for
	(first)private in target regions.
	* hooks.cc (hook_bool_tree_bool_bool_false): New.
	* hooks.h (hook_bool_tree_bool_bool_false): New.
	* langhooks-def.h (LANG_HOOKS_OMP_DISREGARD_VALUE_EXPR):
	Use it.
	* langhooks.h (struct lang_hooks_for_decls): Add second
	Boolean argument.
	* omp-low.cc (omp_member_access_dummy_var): Update
	lang_hooks.decls.omp_disregard_value_expr call.

libgomp/ChangeLog:

	* testsuite/libgomp.c++/target-lambda-3.C: Moved from
	gcc/testsuite/g++.dg/gomp/ and fixed is-mapped handling.
	* testsuite/libgomp.c++/firstprivate-c++-1.C: New test.
	* testsuite/libgomp.c++/firstprivate-c++-2.C: New test.
	* testsuite/libgomp.c++/private-c++-1.C: New test.
	* testsuite/libgomp.c++/private-c++-2.C: New test.
	* testsuite/libgomp.c++/use_device_ptr-c++-1.C: New test.

gcc/testsuite/ChangeLog:

	* g++.dg/gomp/target-lambda-1.C: Moved to become a
	run-time test under testsuite/libgomp.c++.

Co-authored-by: Jakub Jelinek 

 gcc/cp/cp-gimplify.cc  |   7 +-
 gcc/cp/cp-tree.h   |   2 +-
 gcc/fortran/trans-openmp.cc|   2 +-
 gcc/fortran/trans.h|   2 +-
 gcc/gimplify.cc|  12 +-
 gcc/hooks.cc   |   6 +
 gcc/hooks.h|   1 +
 gcc/langhooks-def.h|   2 +-
 gcc/langhooks.h|   5 +-
 gcc/omp-low.cc |   2 +-
 gcc/testsuite/g++.dg/gomp/target-lambda-1.C|  94 ---
 libgomp/testsuite/libgomp.c++/firstprivate-c++-1.C | 305 +

Re: [PATCH] c++: Fix up parameter pack diagnostics on xobj vs. varargs functions [PR113802]

2024-02-16 Thread Jakub Jelinek

On Fri, Feb 16, 2024 at 10:47:47PM +0100, Jakub Jelinek wrote:
> The following patch works.

Or yet another option would be instead of (sometimes) clearing
declarator->parameter_pack_p when we diagnose this bug for error
recovery ignore the this specifier.
With the following patch (testsuite patch remains the same),
I get excess errors though:
/usr/src/gcc/gcc/testsuite/g++.dg/cpp23/explicit-obj-diagnostics3.C:30:25: 
error: expansion pattern 'Selves' contains no parameter packs
/usr/src/gcc/gcc/testsuite/g++.dg/cpp23/explicit-obj-diagnostics3.C:42:26: 
error: expansion pattern 'Selves' contains no parameter packs
/usr/src/gcc/gcc/testsuite/g++.dg/cpp23/explicit-obj-diagnostics3.C:56:26: 
error: expansion pattern 'Selves&' contains no parameter packs
/usr/src/gcc/gcc/testsuite/g++.dg/cpp23/explicit-obj-diagnostics3.C:68:27: 
error: expansion pattern 'Selves&' contains no parameter packs
/usr/src/gcc/gcc/testsuite/g++.dg/cpp23/explicit-obj-diagnostics3.C:82:27: 
error: expansion pattern 'Selves&&' contains no parameter packs
/usr/src/gcc/gcc/testsuite/g++.dg/cpp23/explicit-obj-diagnostics3.C:94:28: 
error: expansion pattern 'Selves&&' contains no parameter packs
/usr/src/gcc/gcc/testsuite/g++.dg/cpp23/explicit-obj-diagnostics3.C:108:32: 
error: expansion pattern 'const Selves&' contains no parameter packs
/usr/src/gcc/gcc/testsuite/g++.dg/cpp23/explicit-obj-diagnostics3.C:120:33: 
error: expansion pattern 'const Selves&' contains no parameter packs
/usr/src/gcc/gcc/testsuite/g++.dg/cpp23/explicit-obj-diagnostics3.C:134:33: 
error: expansion pattern 'const Selves&&' contains no parameter packs
/usr/src/gcc/gcc/testsuite/g++.dg/cpp23/explicit-obj-diagnostics3.C:146:34: 
error: expansion pattern 'const Selves&&' contains no parameter packs
though, that is e.g. on
struct S0 {
  template
  void g(this Selves... selves) {}  // { dg-error "an explicit object parameter 
cannot be a function parameter pack" }
}
where such an extra error would have been emitted if the this keyword was
omitted.

--- gcc/cp/parser.cc.jj 2024-02-16 17:38:27.802845433 +0100
+++ gcc/cp/parser.cc2024-02-16 23:08:40.835437740 +0100
@@ -25734,22 +25734,6 @@ cp_parser_parameter_declaration (cp_pars
   decl_specifiers.locations[ds_this] = 0;
 }
 
-  if (xobj_param_p
-  && ((declarator && declarator->parameter_pack_p)
- || cp_lexer_next_token_is (parser->lexer, CPP_ELLIPSIS)))
-{
-  location_t xobj_param
-   = make_location (decl_specifiers.locations[ds_this],
-decl_spec_token_start->location,
-input_location);
-  error_at (xobj_param,
-   "an explicit object parameter cannot "
-   "be a function parameter pack");
-  /* Suppress errors that occur down the line.  */
-  if (declarator)
-   declarator->parameter_pack_p = false;
-}
-
   /* If a function parameter pack was specified and an implicit template
  parameter was introduced during cp_parser_parameter_declaration,
  change any implicit parameters introduced into packs.  */
@@ -25762,9 +25746,10 @@ cp_parser_parameter_declaration (cp_pars
(INNERMOST_TEMPLATE_PARMS (current_template_parms));
 
   if (latest_template_parm_idx != template_parm_idx)
-   decl_specifiers.type = convert_generic_types_to_packs
- (decl_specifiers.type,
-  template_parm_idx, latest_template_parm_idx);
+   decl_specifiers.type
+ = convert_generic_types_to_packs (decl_specifiers.type,
+   template_parm_idx,
+   latest_template_parm_idx);
 }
 
   if (cp_lexer_next_token_is (parser->lexer, CPP_ELLIPSIS))
@@ -25794,6 +25779,21 @@ cp_parser_parameter_declaration (cp_pars
}
 }
 
+  if (xobj_param_p
+  && (declarator ? declarator->parameter_pack_p
+: PACK_EXPANSION_P (decl_specifiers.type)))
+{
+  location_t xobj_param
+   = make_location (decl_specifiers.locations[ds_this],
+decl_spec_token_start->location,
+input_location);
+  error_at (xobj_param,
+   "an explicit object parameter cannot "
+   "be a function parameter pack");
+  xobj_param_p = false;
+  decl_specifiers.locations[ds_this] = 0;
+}
+
   /* The restriction on defining new types applies only to the type
  of the parameter, not to the default argument.  */
   parser->type_definition_forbidden_message = saved_message;

Jakub

[PATCH v3] c++: wrong looser excep spec for dep noexcept [PR113158]

2024-02-16 Thread Marek Polacek

On Fri, Feb 16, 2024 at 04:39:47PM -0500, Patrick Palka wrote:
> On Fri, 16 Feb 2024, Marek Polacek wrote:
> > +  /* We also have to defer checking when we're in a template and couldn't
> > + instantiate & evaluate the noexcept to true/false.  */
> > +  if (processing_template_decl)
> > +if ((base_throw
> > +&& (base_throw != noexcept_true_spec
> > +|| base_throw != noexcept_false_spec))
> 
> Shouldn't these innermost || be &&?

D'oh, yes, what a dumb mistake.  But that shows that we could also just
always return true in a template ;).

Fixed.  dg.exp passed so far.

-- >8 --
Here we find ourselves in maybe_check_overriding_exception_spec in
a template context where we can't instantiate a dependent noexcept.
That's OK, but we have to defer the checking otherwise we give wrong
errors.

PR c++/113158

gcc/cp/ChangeLog:

* search.cc (maybe_check_overriding_exception_spec): Defer checking
when a noexcept couldn't be instantiated & evaluated to false/true.

gcc/testsuite/ChangeLog:

* g++.dg/cpp0x/noexcept83.C: New test.
---
 gcc/cp/search.cc| 11 
 gcc/testsuite/g++.dg/cpp0x/noexcept83.C | 37 +
 2 files changed, 48 insertions(+)
 create mode 100644 gcc/testsuite/g++.dg/cpp0x/noexcept83.C

diff --git a/gcc/cp/search.cc b/gcc/cp/search.cc
index c948839dc53..827f48e8604 100644
--- a/gcc/cp/search.cc
+++ b/gcc/cp/search.cc
@@ -1975,6 +1975,17 @@ maybe_check_overriding_exception_spec (tree overrider, 
tree basefn)
   || UNPARSED_NOEXCEPT_SPEC_P (over_throw))
 return true;
 
+  /* We also have to defer checking when we're in a template and couldn't
+ instantiate & evaluate the noexcept to true/false.  */
+  if (processing_template_decl)
+if ((base_throw
+&& base_throw != noexcept_true_spec
+&& base_throw != noexcept_false_spec)
+   || (over_throw
+   && over_throw != noexcept_true_spec
+   && over_throw != noexcept_false_spec))
+  return true;
+
   if (!comp_except_specs (base_throw, over_throw, ce_derived))
 {
   auto_diagnostic_group d;
diff --git a/gcc/testsuite/g++.dg/cpp0x/noexcept83.C 
b/gcc/testsuite/g++.dg/cpp0x/noexcept83.C
new file mode 100644
index 000..47832bbb44d
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp0x/noexcept83.C
@@ -0,0 +1,37 @@
+// PR c++/113158
+// { dg-do compile { target c++11 } }
+
+template
+struct V {
+  static constexpr bool t = false;
+};
+struct base {
+virtual int f() = 0;
+};
+
+template
+struct derived : base {
+int f() noexcept(V::t) override;
+};
+
+struct base2 {
+virtual int f() noexcept = 0;
+};
+
+template
+struct W {
+  static constexpr bool t = B;
+};
+
+template
+struct derived2 : base2 {
+int f() noexcept(W::t) override; // { dg-error "looser exception 
specification" }
+};
+
+void
+g ()
+{
+  derived d1;
+  derived2 d2; // { dg-message "required from here" }
+  derived2 d3;
+}

base-commit: cd503b0616462445381a8232fb753239d319af76
-- 
2.43.2

RE: [COMMITTED] c++: Add testcase for this PR [PR97990]

2024-02-16 Thread Andrew Pinski (QUIC)



> -Original Message-
> From: Marek Polacek 
> Sent: Friday, February 16, 2024 11:11 AM
> To: Andrew Pinski (QUIC) 
> Cc: gcc-patches@gcc.gnu.org
> Subject: Re: [COMMITTED] c++: Add testcase for this PR [PR97990]
> 
> On Fri, Feb 16, 2024 at 11:00:34AM -0800, Andrew Pinski wrote:
> > This testcase was fixed by r14-5934-gf26d68d5d128c8 but we should add
> > one to make sure it does not regress again.
> >
> > Committed as obvious after a quick test on the testcase.
> >
> > PR c++/97990
> >
> > gcc/testsuite/ChangeLog:
> >
> > * g++.dg/torture/vector-struct-1.C: New test.
> >
> > Signed-off-by: Andrew Pinski 
> > ---
> >  gcc/testsuite/g++.dg/torture/vector-struct-1.C | 18
> > ++
> >  1 file changed, 18 insertions(+)
> >  create mode 100644 gcc/testsuite/g++.dg/torture/vector-struct-1.C
> >
> > diff --git a/gcc/testsuite/g++.dg/torture/vector-struct-1.C
> > b/gcc/testsuite/g++.dg/torture/vector-struct-1.C
> > new file mode 100644
> > index 000..e2747417e2d
> > --- /dev/null
> > +++ b/gcc/testsuite/g++.dg/torture/vector-struct-1.C
> > @@ -0,0 +1,18 @@
> > +/* PR c++/97990 */
> > +/* This used to crash with lto and strict aliasing enabled as the
> > +   vector type variant still had TYPE_ALIAS_SET set on it. */
> 
> You don't have -Wstrict-aliasing here without which the test didn't ICE.

Oh you are correct. This is what I committed after testing to make sure it even 
ICEd before the reference commit.

Thanks,
Andrew Pinski


> 
> > +typedef __attribute__((__vector_size__(sizeof(short short TSimd;
> > +TSimd hh(int); struct y6 {
> > +  TSimd VALUE;
> > +  ~y6();
> > +};
> > +template 
> > +auto f2(T1 p1, T2){
> > +  return hh(p1) <= 0;
> > +}
> > +void f1(){
> > +  f2(0, y6{});
> > +}
> > --
> > 2.43.0
> >
> 
> Marek



0001-Add-Wstrict-aliasing-to-vector-struct-1.C-testcase.patch
Description: 0001-Add-Wstrict-aliasing-to-vector-struct-1.C-testcase.patch

Re: [PATCH] c++: Fix up parameter pack diagnostics on xobj vs. varargs functions [PR113802]

2024-02-16 Thread Jakub Jelinek

On Fri, Feb 16, 2024 at 10:20:26PM +0100, Jakub Jelinek wrote:
> I've tried that (see below), but am getting
> Excess errors:
> /usr/src/gcc/gcc/testsuite/g++.dg/cpp23/explicit-obj-diagnostics3.C:33:29: 
> error: parameter packs not expanded with '...':

And the reason for those is that e.g. on the reduced
struct S0 {
  void j(this auto... selves) {}  // { dg-error "an explicit object parameter 
cannot be a function parameter pack" }
};
with the first posted patch we first do:
if (xobj_param_p && (declarator && declarator->parameter_pack_p))
and clear declarator->parameter_pack_p there.
Then comes
  if (parser->implicit_template_parms
  && ((token->type == CPP_ELLIPSIS
   && declarator_can_be_parameter_pack (declarator))
  || (declarator && declarator->parameter_pack_p)))
which is true only with the second patch and not the first.

Adding
   if (parser->implicit_template_parms
   && ((token->type == CPP_ELLIPSIS
   && declarator_can_be_parameter_pack (declarator))
- || (declarator && declarator->parameter_pack_p)))
+ || (declarator && declarator->parameter_pack_p && !xobj_param_p)))
 {
   int latest_template_parm_idx = TREE_VEC_LENGTH
(INNERMOST_TEMPLATE_PARMS (current_template_parms));
fixes some of the excess errors, but not all of them, e.g. on
struct S1 {
  void h(this auto&...) {}  // { dg-error "an explicit object parameter cannot 
be a function parameter pack" }
};
it still remains.  The problem in there is that apparently we
need declarator->parameter_pack_p cleared on the xobj diagnostics
iff it was previously set before the CPP_ELLIPSIS handling above it,
but not if it was set in that handling.  I'm afraid I have no idea
why though.

The following patch works.

2024-02-16  Jakub Jelinek  

PR c++/113802
* parser.cc (cp_parser_parameter_declaration): Move the xobj_param_p
pack diagnostics after ellipsis handling, but only clear
declarator->parameter_pack_p for it if it was set before CPP_ELLIPSIS
handling.  Ignore declarator->parameter_pack_p if xobj_param_p.
Formatting fix.

* g++.dg/cpp23/explicit-obj-diagnostics3.C (S0, S1, S2, S3, S4): Don't
expect any diagnostics on f and fd member function templates, add
similar templates with ...Selves instead of Selves as k and kd and
expect diagnostics for those.

--- gcc/cp/parser.cc.jj 2024-02-16 17:38:27.802845433 +0100
+++ gcc/cp/parser.cc2024-02-16 22:42:38.393896067 +0100
@@ -25734,39 +25734,25 @@ cp_parser_parameter_declaration (cp_pars
   decl_specifiers.locations[ds_this] = 0;
 }
 
-  if (xobj_param_p
-  && ((declarator && declarator->parameter_pack_p)
- || cp_lexer_next_token_is (parser->lexer, CPP_ELLIPSIS)))
-{
-  location_t xobj_param
-   = make_location (decl_specifiers.locations[ds_this],
-decl_spec_token_start->location,
-input_location);
-  error_at (xobj_param,
-   "an explicit object parameter cannot "
-   "be a function parameter pack");
-  /* Suppress errors that occur down the line.  */
-  if (declarator)
-   declarator->parameter_pack_p = false;
-}
-
   /* If a function parameter pack was specified and an implicit template
  parameter was introduced during cp_parser_parameter_declaration,
  change any implicit parameters introduced into packs.  */
   if (parser->implicit_template_parms
   && ((token->type == CPP_ELLIPSIS
   && declarator_can_be_parameter_pack (declarator))
- || (declarator && declarator->parameter_pack_p)))
+ || (declarator && declarator->parameter_pack_p && !xobj_param_p)))
 {
   int latest_template_parm_idx = TREE_VEC_LENGTH
(INNERMOST_TEMPLATE_PARMS (current_template_parms));
 
   if (latest_template_parm_idx != template_parm_idx)
-   decl_specifiers.type = convert_generic_types_to_packs
- (decl_specifiers.type,
-  template_parm_idx, latest_template_parm_idx);
+   decl_specifiers.type
+ = convert_generic_types_to_packs (decl_specifiers.type,
+   template_parm_idx,
+   latest_template_parm_idx);
 }
 
+  bool prev_parameter_pack_p = declarator && declarator->parameter_pack_p;
   if (cp_lexer_next_token_is (parser->lexer, CPP_ELLIPSIS))
 {
   tree type = decl_specifiers.type;
@@ -25794,6 +25780,22 @@ cp_parser_parameter_declaration (cp_pars
}
 }
 
+  if (xobj_param_p
+  && (declarator ? declarator->parameter_pack_p
+: PACK_EXPANSION_P (decl_specifiers.type)))
+{
+  location_t xobj_param
+   = make_location (decl_specifiers.locations[ds_this],
+decl_spec_token_start->location,
+input_location);
+  error_at (xobj_param,
+   "an explicit object parameter cannot "
+

[PATCH] Fortran: deferred length of character variables shall not get lost [PR113911]

2024-02-16 Thread Harald Anlauf

Dear all,

this patch fixes a regression which was a side-effect of r14-8947,
losing the length of a deferred-length character variable when
passed as a dummy.

The new testcase provides a workout for deferred length to improve
coverage in the testsuite.  Another temporarily disabled test was
re-enabled.

Regtested on x86_64-pc-linux-gnu.  OK for mainline?

Thanks,
Harald

From 07fcdf7c9f9272d8e4752c23f04795d02d4ad440 Mon Sep 17 00:00:00 2001
From: Harald Anlauf 
Date: Fri, 16 Feb 2024 22:33:16 +0100
Subject: [PATCH] Fortran: deferred length of character variables shall not get
 lost [PR113911]

	PR fortran/113911

gcc/fortran/ChangeLog:

	* trans-array.cc (gfc_trans_deferred_array): Do not clobber
	deferred length for a character variable passed as dummy argument.

gcc/testsuite/ChangeLog:

	* gfortran.dg/allocatable_length_2.f90: New test.
	* gfortran.dg/bind_c_optional-2.f90: Enable deferred-length test.
---
 gcc/fortran/trans-array.cc|   2 +-
 .../gfortran.dg/allocatable_length_2.f90  | 107 ++
 .../gfortran.dg/bind_c_optional-2.f90 |   3 +-
 3 files changed, 109 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/gfortran.dg/allocatable_length_2.f90

diff --git a/gcc/fortran/trans-array.cc b/gcc/fortran/trans-array.cc
index 2181990aa04..3673fa40720 100644
--- a/gcc/fortran/trans-array.cc
+++ b/gcc/fortran/trans-array.cc
@@ -11531,7 +11531,7 @@ gfc_trans_deferred_array (gfc_symbol * sym, gfc_wrapped_block * block)
   if (sym->ts.type == BT_CHARACTER
   && !INTEGER_CST_P (sym->ts.u.cl->backend_decl))
 {
-  if (sym->ts.deferred && !sym->ts.u.cl->length)
+  if (sym->ts.deferred && !sym->ts.u.cl->length && !sym->attr.dummy)
 	gfc_add_modify (, sym->ts.u.cl->backend_decl,
 			build_zero_cst (TREE_TYPE (sym->ts.u.cl->backend_decl)));
   gfc_conv_string_length (sym->ts.u.cl, NULL, );
diff --git a/gcc/testsuite/gfortran.dg/allocatable_length_2.f90 b/gcc/testsuite/gfortran.dg/allocatable_length_2.f90
new file mode 100644
index 000..2fd64efdc25
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/allocatable_length_2.f90
@@ -0,0 +1,107 @@
+! { dg-do run }
+! PR fortran/113911
+!
+! Test that deferred length is not lost
+
+module m
+  integer, parameter:: n = 100, l = 10
+  character(l)  :: a = 'a234567890', b(n) = 'bcdefghijk'
+  character(:), allocatable :: c1, c2(:)
+end
+
+program p
+  use m, only : l, n, a, b, x => c1, y => c2
+  implicit none
+  character(:), allocatable :: d, e(:)
+  allocate (d, source=a)
+  allocate (e, source=b)
+  if (len (d) /= l .or. len (e) /= l .or. size (e) /= n) stop 12
+  call plain_deferred (d, e)
+  call optional_deferred (d, e)
+  call optional_deferred_ar (d, e)
+  if (len (d) /= l .or. len (e) /= l .or. size (e) /= n) stop 13
+  deallocate (d, e)
+  call alloc (d, e)
+  if (len (d) /= l .or. len (e) /= l .or. size (e) /= n) stop 14
+  deallocate (d, e)
+  call alloc_host_assoc ()
+  if (len (d) /= l .or. len (e) /= l .or. size (e) /= n) stop 15
+  deallocate (d, e)
+  call alloc_use_assoc ()
+  if (len (x) /= l .or. len (y) /= l .or. size (y) /= n) stop 16
+  call indirect (x, y)
+  if (len (x) /= l .or. len (y) /= l .or. size (y) /= n) stop 17
+  deallocate (x, y)
+contains
+  subroutine plain_deferred (c1, c2)
+character(:), allocatable :: c1, c2(:)
+if (.not. allocated (c1) .or. .not. allocated (c2)) stop 1
+if (len (c1) /= l) stop 2
+if (len (c2) /= l) stop 3
+if (c1(1:3)/= "a23") stop 4
+if (c2(5)(1:3) /= "bcd") stop 5
+  end
+
+  subroutine optional_deferred (c1, c2)
+character(:), allocatable, optional :: c1, c2(:)
+if (.not. present   (c1) .or. .not. present   (c2)) stop 6
+if (.not. allocated (c1) .or. .not. allocated (c2)) stop 7
+if (len (c1) /= l) stop 8
+if (len (c2) /= l) stop 9
+if (c1(1:3)/= "a23") stop 10
+if (c2(5)(1:3) /= "bcd") stop 11
+  end
+
+  ! Assumed rank
+  subroutine optional_deferred_ar (c1, c2)
+character(:), allocatable, optional :: c1(..)
+character(:), allocatable, optional :: c2(..)
+if (.not. present   (c1) .or. &
+.not. present   (c2)) stop 21
+if (.not. allocated (c1) .or. &
+.not. allocated (c2)) stop 22
+
+select rank (c1)
+rank (0)
+if (len (c1) /= l)   stop 23
+  if (c1(1:3)  /= "a23") stop 24
+rank default
+  stop 25
+end select
+
+select rank (c2)
+rank (1)
+  if (len (c2) /= l)   stop 26
+  if (c2(5)(1:3) /= "bcd") stop 27
+rank default
+  stop 28
+end select
+  end
+
+  ! Allocate dummy arguments
+  subroutine alloc (c1, c2)
+character(:), allocatable :: c1, c2(:)
+allocate (c1, source=a)
+allocate (c2, source=b)
+  end
+
+  ! Allocate host-associated variables
+  subroutine alloc_host_assoc ()
+allocate (d, source=a)
+allocate (e, source=b)
+  end
+
+  ! Allocate use-associated variables
+  subroutine alloc_use_assoc ()
+allocate (x, source=a)
+

Re: [PATCH v2] c++: wrong looser excep spec for dep noexcept [PR113158]

2024-02-16 Thread Patrick Palka

On Fri, 16 Feb 2024, Marek Polacek wrote:

> On Fri, Feb 16, 2024 at 03:58:02PM -0500, Jason Merrill wrote:
> > On 2/15/24 17:17, Marek Polacek wrote:
> > > Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?
> > > 
> > > By the ??? below I mean that maybe_instantiate_noexcept could return
> > > a tristate, and then maybe_check_overriding_exception_spec could check
> > > 
> > >if (maybe_instantiate_noexcept ().is_unknown ())
> > >  return true;
> > > 
> > > and we don't have to add any new checks to maybe_check_o_e_spec.
> > > 
> > > -- >8 --
> > > Here we find ourselves in maybe_check_overriding_exception_spec in
> > > a template context where we can't instantiate a dependent noexcept.
> > > That's OK, but we have to defer the checking otherwise we give wrong
> > > errors.
> > > 
> > >   PR c++/113158
> > > 
> > > gcc/cp/ChangeLog:
> > > 
> > >   * search.cc (maybe_check_overriding_exception_spec): Defer checking
> > >   when a noexcept couldn't be instantiated.
> > > 
> > > gcc/testsuite/ChangeLog:
> > > 
> > >   * g++.dg/cpp0x/noexcept83.C: New test.
> > > ---
> > >   gcc/cp/search.cc|  7 +
> > >   gcc/testsuite/g++.dg/cpp0x/noexcept83.C | 37 +
> > >   2 files changed, 44 insertions(+)
> > >   create mode 100644 gcc/testsuite/g++.dg/cpp0x/noexcept83.C
> > > 
> > > diff --git a/gcc/cp/search.cc b/gcc/cp/search.cc
> > > index c948839dc53..73d254d6b84 100644
> > > --- a/gcc/cp/search.cc
> > > +++ b/gcc/cp/search.cc
> > > @@ -1975,6 +1975,13 @@ maybe_check_overriding_exception_spec (tree 
> > > overrider, tree basefn)
> > > || UNPARSED_NOEXCEPT_SPEC_P (over_throw))
> > >   return true;
> > > +  /* We also have to defer checking when we're in a template and couldn't
> > > + instantiate the noexcept yet.
> > > + ??? maybe_instantiate_noexcept already checked these.  Use 
> > > tristate?  */
> > > +  if (type_dependent_expression_p (base_throw)
> > > +  || type_dependent_expression_p (over_throw))
> > 
> > I think we also want to avoid comparing value-dependent expressions, but
> > actually checking either one seems like more work than needed here; I'd
> > think we want to defer in a template if the specifiers aren't both exactly
> > true or false.
> 
> Yeah, that'll work too.  So like this?
> 
> Bootstrap/regtest running; dg.exp passed.  FWIW, the new check only
> triggered on the new test.
> 
> Thanks,
> 
> -- >8 --
> Here we find ourselves in maybe_check_overriding_exception_spec in
> a template context where we can't instantiate a dependent noexcept.
> That's OK, but we have to defer the checking otherwise we give wrong
> errors.
> 
>   PR c++/113158
> 
> gcc/cp/ChangeLog:
> 
>   * search.cc (maybe_check_overriding_exception_spec): Defer checking
>   when a noexcept couldn't be instantiated & evaluated to false/true.
> 
> gcc/testsuite/ChangeLog:
> 
>   * g++.dg/cpp0x/noexcept83.C: New test.
> ---
>  gcc/cp/search.cc| 11 
>  gcc/testsuite/g++.dg/cpp0x/noexcept83.C | 37 +
>  2 files changed, 48 insertions(+)
>  create mode 100644 gcc/testsuite/g++.dg/cpp0x/noexcept83.C
> 
> diff --git a/gcc/cp/search.cc b/gcc/cp/search.cc
> index c948839dc53..554ba71f4a7 100644
> --- a/gcc/cp/search.cc
> +++ b/gcc/cp/search.cc
> @@ -1975,6 +1975,17 @@ maybe_check_overriding_exception_spec (tree overrider, 
> tree basefn)
>|| UNPARSED_NOEXCEPT_SPEC_P (over_throw))
>  return true;
>  
> +  /* We also have to defer checking when we're in a template and couldn't
> + instantiate & evaluate the noexcept to true/false.  */
> +  if (processing_template_decl)
> +if ((base_throw
> +  && (base_throw != noexcept_true_spec
> +  || base_throw != noexcept_false_spec))

Shouldn't these innermost || be &&?

> + || (over_throw
> + && (over_throw != noexcept_true_spec
> + || over_throw != noexcept_false_spec)))

> +  return true;
> +
>if (!comp_except_specs (base_throw, over_throw, ce_derived))
>  {
>auto_diagnostic_group d;
> diff --git a/gcc/testsuite/g++.dg/cpp0x/noexcept83.C 
> b/gcc/testsuite/g++.dg/cpp0x/noexcept83.C
> new file mode 100644
> index 000..47832bbb44d
> --- /dev/null
> +++ b/gcc/testsuite/g++.dg/cpp0x/noexcept83.C
> @@ -0,0 +1,37 @@
> +// PR c++/113158
> +// { dg-do compile { target c++11 } }
> +
> +template
> +struct V {
> +  static constexpr bool t = false;
> +};
> +struct base {
> +virtual int f() = 0;
> +};
> +
> +template
> +struct derived : base {
> +int f() noexcept(V::t) override;
> +};
> +
> +struct base2 {
> +virtual int f() noexcept = 0;
> +};
> +
> +template
> +struct W {
> +  static constexpr bool t = B;
> +};
> +
> +template
> +struct derived2 : base2 {
> +int f() noexcept(W::t) override; // { dg-error "looser exception 
> specification" }
> +};
> +
> +void
> +g ()
> +{
> +  derived d1;
> +  derived2 d2; // { dg-message "required from here" }
> +

Re: [PATCH v6 0/5]New attribute "counted_by" to annotate bounds for C99 FAM(PR108896)

2024-02-16 Thread Kees Cook

On Fri, Feb 16, 2024 at 07:47:18PM +, Qing Zhao wrote:
> This is the 6th version of the patch.

Thanks! I've tested this and it meets all the current behavioral
expectations I've got:
https://github.com/kees/kernel-tools/blob/trunk/fortify/array-bounds.c

Additionally, this builds the Linux kernel where we have almost 300
instances of "counted_by" attributes.

Yay!

-Kees

-- 
Kees Cook

[PATCH v2] c++: wrong looser excep spec for dep noexcept [PR113158]

2024-02-16 Thread Marek Polacek

On Fri, Feb 16, 2024 at 03:58:02PM -0500, Jason Merrill wrote:
> On 2/15/24 17:17, Marek Polacek wrote:
> > Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?
> > 
> > By the ??? below I mean that maybe_instantiate_noexcept could return
> > a tristate, and then maybe_check_overriding_exception_spec could check
> > 
> >if (maybe_instantiate_noexcept ().is_unknown ())
> >  return true;
> > 
> > and we don't have to add any new checks to maybe_check_o_e_spec.
> > 
> > -- >8 --
> > Here we find ourselves in maybe_check_overriding_exception_spec in
> > a template context where we can't instantiate a dependent noexcept.
> > That's OK, but we have to defer the checking otherwise we give wrong
> > errors.
> > 
> > PR c++/113158
> > 
> > gcc/cp/ChangeLog:
> > 
> > * search.cc (maybe_check_overriding_exception_spec): Defer checking
> > when a noexcept couldn't be instantiated.
> > 
> > gcc/testsuite/ChangeLog:
> > 
> > * g++.dg/cpp0x/noexcept83.C: New test.
> > ---
> >   gcc/cp/search.cc|  7 +
> >   gcc/testsuite/g++.dg/cpp0x/noexcept83.C | 37 +
> >   2 files changed, 44 insertions(+)
> >   create mode 100644 gcc/testsuite/g++.dg/cpp0x/noexcept83.C
> > 
> > diff --git a/gcc/cp/search.cc b/gcc/cp/search.cc
> > index c948839dc53..73d254d6b84 100644
> > --- a/gcc/cp/search.cc
> > +++ b/gcc/cp/search.cc
> > @@ -1975,6 +1975,13 @@ maybe_check_overriding_exception_spec (tree 
> > overrider, tree basefn)
> > || UNPARSED_NOEXCEPT_SPEC_P (over_throw))
> >   return true;
> > +  /* We also have to defer checking when we're in a template and couldn't
> > + instantiate the noexcept yet.
> > + ??? maybe_instantiate_noexcept already checked these.  Use tristate?  
> > */
> > +  if (type_dependent_expression_p (base_throw)
> > +  || type_dependent_expression_p (over_throw))
> 
> I think we also want to avoid comparing value-dependent expressions, but
> actually checking either one seems like more work than needed here; I'd
> think we want to defer in a template if the specifiers aren't both exactly
> true or false.

Yeah, that'll work too.  So like this?

Bootstrap/regtest running; dg.exp passed.  FWIW, the new check only
triggered on the new test.

Thanks,

-- >8 --
Here we find ourselves in maybe_check_overriding_exception_spec in
a template context where we can't instantiate a dependent noexcept.
That's OK, but we have to defer the checking otherwise we give wrong
errors.

PR c++/113158

gcc/cp/ChangeLog:

* search.cc (maybe_check_overriding_exception_spec): Defer checking
when a noexcept couldn't be instantiated & evaluated to false/true.

gcc/testsuite/ChangeLog:

* g++.dg/cpp0x/noexcept83.C: New test.
---
 gcc/cp/search.cc| 11 
 gcc/testsuite/g++.dg/cpp0x/noexcept83.C | 37 +
 2 files changed, 48 insertions(+)
 create mode 100644 gcc/testsuite/g++.dg/cpp0x/noexcept83.C

diff --git a/gcc/cp/search.cc b/gcc/cp/search.cc
index c948839dc53..554ba71f4a7 100644
--- a/gcc/cp/search.cc
+++ b/gcc/cp/search.cc
@@ -1975,6 +1975,17 @@ maybe_check_overriding_exception_spec (tree overrider, 
tree basefn)
   || UNPARSED_NOEXCEPT_SPEC_P (over_throw))
 return true;
 
+  /* We also have to defer checking when we're in a template and couldn't
+ instantiate & evaluate the noexcept to true/false.  */
+  if (processing_template_decl)
+if ((base_throw
+&& (base_throw != noexcept_true_spec
+|| base_throw != noexcept_false_spec))
+   || (over_throw
+   && (over_throw != noexcept_true_spec
+   || over_throw != noexcept_false_spec)))
+  return true;
+
   if (!comp_except_specs (base_throw, over_throw, ce_derived))
 {
   auto_diagnostic_group d;
diff --git a/gcc/testsuite/g++.dg/cpp0x/noexcept83.C 
b/gcc/testsuite/g++.dg/cpp0x/noexcept83.C
new file mode 100644
index 000..47832bbb44d
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp0x/noexcept83.C
@@ -0,0 +1,37 @@
+// PR c++/113158
+// { dg-do compile { target c++11 } }
+
+template
+struct V {
+  static constexpr bool t = false;
+};
+struct base {
+virtual int f() = 0;
+};
+
+template
+struct derived : base {
+int f() noexcept(V::t) override;
+};
+
+struct base2 {
+virtual int f() noexcept = 0;
+};
+
+template
+struct W {
+  static constexpr bool t = B;
+};
+
+template
+struct derived2 : base2 {
+int f() noexcept(W::t) override; // { dg-error "looser exception 
specification" }
+};
+
+void
+g ()
+{
+  derived d1;
+  derived2 d2; // { dg-message "required from here" }
+  derived2 d3;
+}

base-commit: 40b8d7b73ad2ce498758c1d9bd38ebdbc26b918b
-- 
2.43.2

Re: [PATCH] Notes on the warnings-as-errors change in GCC 14

2024-02-16 Thread Gerald Pfeifer

On Thu, 15 Feb 2024, Florian Weimer wrote:
>>> +GCC no longer casts all pointer types to all other pointer types.
>>
>> Do you mean it no longer does so implicitly, or not at all? That is,
>> there are now cases where even an explicit cast such as
>>
>>   foo_p = (foo_type*) bar_p
>>
>> no longer works? Or just
>>
>>   foo_p = bar_p
>>
>> no longer works for all combinations?
> The latter, other reviewers noted it as well, and I've got this now:
> “GCC no longer [allows implicitly casting] all pointer types to all”

Ah, got it. The wording above nicely clarifies it to me.

I am wondering whether "...every point type to every other pointer type" 
might be even more clear? (Open question, "no" being a very valid answer.)

>> I *think* we may need to use  here instead of plain '>', though I may 
>> be wrong.
> No, only  needs to be quoted.  This is true even for XML, not just
> HTML5.  Do you want me to change these to ?

No, no; if it validates, we're good. :-)

> What about this?
> 
>These failed probes tend to disable program features [together with]
>their tests[], resulting in silently dropping features.
> 
> This what I meant with “consistently”: implementations and tests are
> gone, so the testsuite doesn't flag it.

I like it!

>>> +In cases where this is a concern, generated config.log,
>>> +config.h and other source code files can be compared
>>> +using https://www.gnu.org/software/diffutils/;>diff,
>>> +to ensure there are no unexpected differences.
>> I wouldn't link to GNU diffutils here; just refer to the diff 
>> command - or even omit that aspect and leave it at "can be compared".
> diff is really useful for that, manual comparison isn't. 8-)
> I can drop the hyperlink.

Yes, I never would compare manually myself. :)

Let's drop the hyperlink then; people developing software would know diff.

>>> +Some build systems do not pass the CFLAGS environment
>>> +or make variable to all parts of the builds
>>
>> Is "make" a common variable? What is the context here?
> Hmm, I meant to allude $(CFLAGS) here.
> 
> “CFLAGS [] variable to all parts of the builds” should be
> sufficient.

Ah, reading this again I see it was "environment variable" or "make 
variable" - the beauty of natural languages and their ambiguity.

Yes, your suggested edit looks good!

> I need to add two more code examples to the Autoconf section, should I
> post a v2 with that, or add that in a subsequent commit?

Primarily as you prefer. My personal recommendation (not request) is to 
commit the current patch and then add on top.

Thanks again for your work documenting all this!

Gerald

Re: [PATCH] c++: Fix up parameter pack diagnostics on xobj vs. varargs functions [PR113802]

2024-02-16 Thread Jakub Jelinek

On Fri, Feb 16, 2024 at 03:47:41PM -0500, Jason Merrill wrote:
> Can we move all the xobj handling down here (where we can trust
> declarator->parameter_pack_p) instead of adding a new variable?

I've tried that (see below), but am getting
Excess errors:
/usr/src/gcc/gcc/testsuite/g++.dg/cpp23/explicit-obj-diagnostics3.C:33:29: 
error: parameter packs not expanded with '...':
/usr/src/gcc/gcc/testsuite/g++.dg/cpp23/explicit-obj-diagnostics3.C:45:30: 
error: parameter packs not expanded with '...':
/usr/src/gcc/gcc/testsuite/g++.dg/cpp23/explicit-obj-diagnostics3.C:58:23: 
error: parameter packs not expanded with '...':
/usr/src/gcc/gcc/testsuite/g++.dg/cpp23/explicit-obj-diagnostics3.C:59:30: 
error: parameter packs not expanded with '...':
/usr/src/gcc/gcc/testsuite/g++.dg/cpp23/explicit-obj-diagnostics3.C:62:25: 
error: parameter packs not expanded with '...':
/usr/src/gcc/gcc/testsuite/g++.dg/cpp23/explicit-obj-diagnostics3.C:70:24: 
error: parameter packs not expanded with '...':
/usr/src/gcc/gcc/testsuite/g++.dg/cpp23/explicit-obj-diagnostics3.C:71:31: 
error: parameter packs not expanded with '...':
/usr/src/gcc/gcc/testsuite/g++.dg/cpp23/explicit-obj-diagnostics3.C:74:26: 
error: parameter packs not expanded with '...':
/usr/src/gcc/gcc/testsuite/g++.dg/cpp23/explicit-obj-diagnostics3.C:84:24: 
error: parameter packs not expanded with '...':
/usr/src/gcc/gcc/testsuite/g++.dg/cpp23/explicit-obj-diagnostics3.C:85:31: 
error: parameter packs not expanded with '...':
/usr/src/gcc/gcc/testsuite/g++.dg/cpp23/explicit-obj-diagnostics3.C:88:26: 
error: parameter packs not expanded with '...':
/usr/src/gcc/gcc/testsuite/g++.dg/cpp23/explicit-obj-diagnostics3.C:96:25: 
error: parameter packs not expanded with '...':
/usr/src/gcc/gcc/testsuite/g++.dg/cpp23/explicit-obj-diagnostics3.C:97:32: 
error: parameter packs not expanded with '...':
/usr/src/gcc/gcc/testsuite/g++.dg/cpp23/explicit-obj-diagnostics3.C:100:27: 
error: parameter packs not expanded with '...':
/usr/src/gcc/gcc/testsuite/g++.dg/cpp23/explicit-obj-diagnostics3.C:110:29: 
error: parameter packs not expanded with '...':
/usr/src/gcc/gcc/testsuite/g++.dg/cpp23/explicit-obj-diagnostics3.C:111:36: 
error: parameter packs not expanded with '...':
/usr/src/gcc/gcc/testsuite/g++.dg/cpp23/explicit-obj-diagnostics3.C:114:31: 
error: parameter packs not expanded with '...':
/usr/src/gcc/gcc/testsuite/g++.dg/cpp23/explicit-obj-diagnostics3.C:122:30: 
error: parameter packs not expanded with '...':
/usr/src/gcc/gcc/testsuite/g++.dg/cpp23/explicit-obj-diagnostics3.C:123:37: 
error: parameter packs not expanded with '...':
/usr/src/gcc/gcc/testsuite/g++.dg/cpp23/explicit-obj-diagnostics3.C:126:32: 
error: parameter packs not expanded with '...':
/usr/src/gcc/gcc/testsuite/g++.dg/cpp23/explicit-obj-diagnostics3.C:136:30: 
error: parameter packs not expanded with '...':
/usr/src/gcc/gcc/testsuite/g++.dg/cpp23/explicit-obj-diagnostics3.C:137:37: 
error: parameter packs not expanded with '...':
/usr/src/gcc/gcc/testsuite/g++.dg/cpp23/explicit-obj-diagnostics3.C:140:32: 
error: parameter packs not expanded with '...':
/usr/src/gcc/gcc/testsuite/g++.dg/cpp23/explicit-obj-diagnostics3.C:148:31: 
error: parameter packs not expanded with '...':
/usr/src/gcc/gcc/testsuite/g++.dg/cpp23/explicit-obj-diagnostics3.C:149:38: 
error: parameter packs not expanded with '...':
/usr/src/gcc/gcc/testsuite/g++.dg/cpp23/explicit-obj-diagnostics3.C:152:33: 
error: parameter packs not expanded with '...':
with it (the other errors are emitted as expected).

2024-02-16  Jakub Jelinek  

PR c++/113802
* parser.cc (cp_parser_parameter_declaration): Move the xobj_param_p
pack diagnostics after ellipsis handling.  Formatting fix.

* g++.dg/cpp23/explicit-obj-diagnostics3.C (S0, S1, S2, S3, S4): Don't
expect any diagnostics on f and fd member function templates, add
similar templates with ...Selves instead of Selves as k and kd and
expect diagnostics for those.

--- gcc/cp/parser.cc.jj 2024-02-16 17:38:27.802845433 +0100
+++ gcc/cp/parser.cc2024-02-16 22:10:32.904353402 +0100
@@ -25734,22 +25734,6 @@ cp_parser_parameter_declaration (cp_pars
   decl_specifiers.locations[ds_this] = 0;
 }

-  if (xobj_param_p
-  && ((declarator && declarator->parameter_pack_p)
- || cp_lexer_next_token_is (parser->lexer, CPP_ELLIPSIS)))
-{
-  location_t xobj_param
-   = make_location (decl_specifiers.locations[ds_this],
-decl_spec_token_start->location,
-input_location);
-  error_at (xobj_param,
-   "an explicit object parameter cannot "
-   "be a function parameter pack");
-  /* Suppress errors that occur down the line.  */
-  if (declarator)
-   declarator->parameter_pack_p = false;
-}
-
   /* If a function parameter pack was specified and an implicit template
  parameter was introduced during

Re: [PATCH] Fortran: fix passing array component to polymorphic argument [PR105658]

2024-02-16 Thread Harald Anlauf


Hi Peter,

thanks for your contribution to gfortran!  You've found indeed
a solution for a potentially annoying bug.

Am 15.02.24 um 18:50 schrieb Peter Hill:

Dear all,

The attached patch fixes PR105658 by forcing an array temporary to be
created. This is required when passing an array component, but this
didn't happen if the dummy argument was an unlimited polymorphic type.

The problem bit of code is in `gfc_conv_expr_descriptor`, near L7828:

   subref_array_target = (is_subref_array (expr)
  && (se->direct_byref
|| expr->ts.type == BT_CHARACTER));
   need_tmp = (gfc_ref_needs_temporary_p (expr->ref)
   && !subref_array_target);

where `need_tmp` is being evaluated to 0.  The logic here isn't clear
to me, and this function is used in several places, which is why I
went with setting `parmse.force_tmp = 1` in `gfc_conv_procedure_call`
and using the same conditional as the later branch for the
non-polymorphic case (near the call to `gfc_conv_subref_array_arg`)

If this patch is ok, please could someone commit it for me? This is my
first patch for GCC, so apologies in advance if the commit message is
missing something.


Your patch mostly does the right thing.  Note that when fsym is
an unlimited polymorphic, some of its attributes are buried deep
within its internal representation.  I would also prefer to move
the code to gfc_conv_intrinsic_to_class where it seems to fit better,
like:

diff --git a/gcc/fortran/trans-expr.cc b/gcc/fortran/trans-expr.cc
index a0593b76f18..db906caa52e 100644
--- a/gcc/fortran/trans-expr.cc
+++ b/gcc/fortran/trans-expr.cc
@@ -1019,6 +1019,14 @@ gfc_conv_intrinsic_to_class (gfc_se *parmse,
gfc_expr *e,
   tmp = gfc_typenode_for_spec (_ts);
   var = gfc_create_var (tmp, "class");

+  /* Force a temporary for component or substring references.  */
+  if (unlimited_poly
+  && class_ts.u.derived->components->attr.dimension
+  && !class_ts.u.derived->components->attr.class_pointer
+  && !class_ts.u.derived->components->attr.allocatable
+  && is_subref_array (e))
+parmse->force_tmp = 1;
+
   /* Set the vptr.  */
   ctree = gfc_class_vptr_get (var);

(I am not entirely sure whether we need to exclude pointer and
allocatable attributes here explicitly, given the constraints
in F2023:15.5.2.6, but other may have an opinion, too.
The above should be safe anyway.)


Tested on x86_64-pc-linux-gnu.

The bug is present in gfortran back to 4.9, so should it also be backported?


I think we'll target 14-mainline and might consider a backport to
13-branch.


Cheers,
Peter

  PR fortran/105658

gcc/fortran/ChangeLog

 * trans-expr.cc (gfc_conv_procedure_call): When passing an
 array component reference of intrinsic type to a procedure
 with an unlimited polymorphic dummy argument, a temporary
 should be created.

gcc/testsuite/ChangeLog

 * gfortran.dg/PR105658.f90: New test.
---
  gcc/fortran/trans-expr.cc  |  8 
  gcc/testsuite/gfortran.dg/PR105658.f90 | 25 +
  2 files changed, 33 insertions(+)
  create mode 100644 gcc/testsuite/gfortran.dg/PR105658.f90

diff --git a/gcc/fortran/trans-expr.cc b/gcc/fortran/trans-expr.cc
index a0593b76f18..7fd3047c4e9 100644
--- a/gcc/fortran/trans-expr.cc
+++ b/gcc/fortran/trans-expr.cc
@@ -6439,6 +6439,14 @@ gfc_conv_procedure_call (gfc_se * se, gfc_symbol * sym,
CLASS object for the unlimited polymorphic formal.  */
 gfc_find_vtab (>ts);
 gfc_init_se (, se);
+   /* The actual argument is a component reference to an array
+  of derived types, so we need to force creation of a
+  temporary */
+   if (e->expr_type == EXPR_VARIABLE
+   && is_subref_array (e)
+   && !(fsym && fsym->attr.pointer))
+ parmse.force_tmp = 1;
+
 gfc_conv_intrinsic_to_class (, e, fsym->ts);

   }
diff --git a/gcc/testsuite/gfortran.dg/PR105658.f90
b/gcc/testsuite/gfortran.dg/PR105658.f90
new file mode 100644
index 000..407ee25f77c
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/PR105658.f90
@@ -0,0 +1,25 @@
+! { dg-do compile }
+! { dg-options "-Warray-temporaries" }
+! Test fix for incorrectly passing array component to unlimited
polymorphic procedure
+
+module test_PR105658_mod
+  implicit none
+  type :: foo
+integer :: member1
+integer :: member2
+  end type foo
+contains
+  subroutine print_poly(array)
+class(*), dimension(:), intent(in) :: array
+select type(array)
+type is (integer)
+  print*, array
+end select
+  end subroutine print_poly
+
+  subroutine do_print(thing)
+type(foo), dimension(3), intent(in) :: thing
+call print_poly(thing%member1) ! { dg-warning "array temporary" }
+  end subroutine do_print
+
+end module test_PR105658_mod


One could extend this testcase to cover substrings as well:

module test_PR105658_mod
  implicit none
  type :: foo
integer :: member1
integer :: member2
  end type foo
contains
  subroutine print_poly(array)
class(*),

[PATCH] dwarf2out, v3: Emit DW_AT_export_symbols on anon unions/structs [PR113918]

2024-02-16 Thread Jakub Jelinek

On Fri, Feb 16, 2024 at 03:40:39PM -0500, Jason Merrill wrote:
> > --- gcc/cp/cp-objcp-common.cc.jj2024-02-13 12:50:21.666846296 +0100
> > +++ gcc/cp/cp-objcp-common.cc   2024-02-16 20:40:51.374763528 +0100
> > @@ -410,6 +410,15 @@ cp_type_dwarf_attribute (const_tree type
> > return 1;
> > break;
> > +case DW_AT_export_symbols:
> 
> For C++ this can use ANON_AGGR_TYPE_P, so it doesn't need to involve the
> FIELD_DECL at all.  But I suppose the C front-end doesn't have a similar
> flag?

Yes, using ANON_AGGR_TYPE_P there works for C++, but C doesn't have anything
like that.  All it uses is DECL_NAME == NULL on FIELD_DECL +
RECORD_OR_UNION_TYPE_P on its type to determine anon struct/union.

The patch below has updated cp_type_dwarf_attribute, otherwise the same as
before.

2024-02-16  Jakub Jelinek  

PR debug/113918
gcc/
* dwarf2out.cc (gen_field_die): Emit DW_AT_export_symbols
on anonymous unions or structs for -gdwarf-5 or -gno-strict-dwarf.
gcc/c/
* c-tree.h (c_type_dwarf_attribute): Declare.
* c-objc-common.h (LANG_HOOKS_TYPE_DWARF_ATTRIBUTE): Redefine.
* c-objc-common.cc: Include dwarf2.h.
(c_type_dwarf_attribute): New function.
gcc/cp/
* cp-objcp-common.cc (cp_type_dwarf_attribute): Return 1
for DW_AT_export_symbols on anonymous structs or unions.
gcc/testsuite/
* c-c++-common/dwarf2/pr113918.c: New test.

--- gcc/dwarf2out.cc.jj 2024-02-15 13:54:29.284358101 +0100
+++ gcc/dwarf2out.cc2024-02-16 20:38:19.718841259 +0100
@@ -25153,6 +25153,17 @@ gen_field_die (tree decl, struct vlr_con
 
   add_accessibility_attribute (decl_die, decl);
 
+  /* Add DW_AT_export_symbols to anonymous unions or structs.  */
+  if ((dwarf_version >= 5 || !dwarf_strict) && DECL_NAME (decl) == NULL_TREE)
+if (tree type = member_declared_type (decl))
+  if (lang_hooks.types.type_dwarf_attribute (TYPE_MAIN_VARIANT (type),
+DW_AT_export_symbols) != -1)
+   {
+ dw_die_ref type_die = lookup_type_die (TYPE_MAIN_VARIANT (type));
+ if (type_die && get_AT (type_die, DW_AT_export_symbols) == NULL)
+   add_AT_flag (type_die, DW_AT_export_symbols, 1);
+   }
+
   /* Equate decl number to die, so that we can look up this decl later on.  */
   equate_decl_number_to_die (decl, decl_die);
 }
--- gcc/c/c-tree.h.jj   2024-01-31 10:46:35.164761720 +0100
+++ gcc/c/c-tree.h  2024-02-16 20:43:45.993372908 +0100
@@ -731,6 +731,7 @@ extern bool c_warn_unused_global_decl (c
 extern void c_initialize_diagnostics (diagnostic_context *);
 extern bool c_var_mod_p (tree x, tree fn);
 extern alias_set_type c_get_alias_set (tree);
+extern int c_type_dwarf_attribute (const_tree, int);
 
 /* in c-typeck.cc */
 extern int in_alignof;
--- gcc/c/c-objc-common.h.jj2024-01-03 12:06:52.973862999 +0100
+++ gcc/c/c-objc-common.h   2024-02-16 20:42:21.073535465 +0100
@@ -119,6 +119,9 @@ static const scoped_attribute_specs *con
 #undef LANG_HOOKS_GIMPLIFY_EXPR
 #define LANG_HOOKS_GIMPLIFY_EXPR c_gimplify_expr
 
+#undef LANG_HOOKS_TYPE_DWARF_ATTRIBUTE
+#define LANG_HOOKS_TYPE_DWARF_ATTRIBUTE c_type_dwarf_attribute
+
 #undef LANG_HOOKS_OMP_PREDETERMINED_SHARING
 #define LANG_HOOKS_OMP_PREDETERMINED_SHARING c_omp_predetermined_sharing
 
--- gcc/c/c-objc-common.cc.jj   2024-01-03 12:06:53.213859637 +0100
+++ gcc/c/c-objc-common.cc  2024-02-16 20:45:24.649022305 +0100
@@ -30,6 +30,7 @@ along with GCC; see the file COPYING3.
 #include "gcc-rich-location.h"
 #include "stringpool.h"
 #include "attribs.h"
+#include "dwarf2.h"
 
 static bool c_tree_printer (pretty_printer *, text_info *, const char *,
int, bool, bool, bool, bool *, const char **);
@@ -446,3 +447,25 @@ instantiation_dependent_expression_p (tr
 {
   return false;
 }
+
+/* Return -1 if dwarf ATTR shouldn't be added for TYPE, or the attribute
+   value otherwise.  */
+int
+c_type_dwarf_attribute (const_tree type, int attr)
+{
+  if (type == NULL_TREE)
+return -1;
+
+  switch (attr)
+{
+case DW_AT_export_symbols:
+  if (RECORD_OR_UNION_TYPE_P (type) && TYPE_NAME (type) == NULL_TREE)
+   return 1;
+  break;
+
+default:
+  break;
+}
+
+  return -1;
+}
--- gcc/cp/cp-objcp-common.cc.jj2024-02-13 12:50:21.666846296 +0100
+++ gcc/cp/cp-objcp-common.cc   2024-02-16 21:48:33.880458318 +0100
@@ -410,6 +410,11 @@ cp_type_dwarf_attribute (const_tree type
return 1;
   break;
 
+case DW_AT_export_symbols:
+  if (ANON_AGGR_TYPE_P (type))
+   return 1;
+  break;
+
 default:
   break;
 }
--- gcc/testsuite/c-c++-common/dwarf2/pr113918.c.jj 2024-02-16 
20:27:13.996961811 +0100
+++ gcc/testsuite/c-c++-common/dwarf2/pr113918.c2024-02-16 
20:27:13.996961811 +0100
@@ -0,0 +1,33 @@
+/* PR debug/113918 */
+/* { dg-do compile } */
+/* { dg-options "-gdwarf-5 -dA -fno-merge-debug-strings" } */
+
+struct S {
+

Re: [PATCH RFA] build: drop target libs from LD_LIBRARY_PATH [PR105688]

2024-02-16 Thread Jason Merrill


On 2/14/24 18:33, Iain Sandoe wrote:



On 14 Feb 2024, at 22:59, Iain Sandoe  wrote:



On 12 Feb 2024, at 19:59, Jason Merrill  wrote:

On 2/10/24 07:30, Iain Sandoe wrote:

On 10 Feb 2024, at 12:07, Jason Merrill  wrote:

On 2/10/24 05:46, Iain Sandoe wrote:

On 9 Feb 2024, at 23:21, Iain Sandoe  wrote:




On 9 Feb 2024, at 10:56, Iain Sandoe  wrote:

On 8 Feb 2024, at 21:44, Jason Merrill  wrote:

On 2/8/24 12:55, Paolo Bonzini wrote:

On 2/8/24 18:16, Jason Merrill wrote:




Hmm.  In stage 1, when we build with the system gcc, I'd think we want the 
just-built gnat1 to find the system libgcc.

In stage 2, when we build with the stage 1 gcc, we want the just-built gnat1 to 
find the stage 1 libgcc.

In neither case do we want it to find the libgcc from the current stage.

So it seems to me that what we want is for stage2+ LD_LIBRARY_PATH to include 
the TARGET_LIB_PATH from the previous stage.  Something like the below, on top 
of the earlier patch.

Does this make sense?  Does it work on Darwin?


Oops, that was broken, please consider this one instead:

Yes, this one makes sense (and the current code would not work since it lacks 
the prev- prefix on TARGET_LIB_PATH).


Indeed, that seems like evidence that the only element of TARGET_LIB_PATH that 
has been useful in HOST_EXPORTS is the prev- part of HOST_LIB_PATH_gcc.

So, here's another patch that just includes that for post-stage1:
<0001-build-drop-target-libs-from-LD_LIBRARY_PATH-PR105688.patch>


Hmm this still fails for me with gnat1 being unable to find libgcc_s.
It seems I have to add the PREV_HOST_LIB_PATH_gcc to HOST_LIB_PATH for it to 
succeed so,
presumably, the post stage1 exports are not being forwarded to that build.  
I’ll try to analyze what
exactly is failing.


The fail is occurring in the target libada build; so, I suppose, one might say 
it’s reasonable that it
requires this host path to be added to the target exports since it’s a host 
library used during target
builds (or do folks expect the host exports to be made for target lib builds as 
well?)

Appending the prev-gcc dirctory to the HOST_LIB_PATH fixes this

Hmm this is still not right, in this case, I think it should actually be the 
“just built” directory;
- if we have a tool that depends on host libraries (that happen to be also 
target ones),
  then those libraries have to be built before the tool so that they can be 
linked to it.
  (we specially copy libgcc* and the CRTs to gcc/ to allow for this case)
- there is no prev-gcc in cross and —disable-bootstrap builds, but the tool 
will still be
   linked to the just-built host libraries (which will also be installed).
So, I think we have to add HOST_LIB_PATH_gcc to HOST_LIB_PATH
and HOST_PREV_LIB_PATH_gcc to POSTSTAGE1_HOST_EXPORTS (as per this patch).


I don't follow.  In a cross build, host libraries are a different architecture 
from target libraries, and certainly can't be linked into host binaries.

In a disable-bootstrap build, even before my change TARGET_LIB_PATH isn't added 
to RPATH_ENVVAR, since that has been guarded with @if gcc-bootstrap.

So in a bootstrap build, it shouldn't be needed for stage1 either.  And for 
stage2, the one we need is from stage1, that matches the compiler we're 
building host tools with.

What am I missing?

nothing, I was off on a tangent about the cross/non-bootstrap, sorry about that.
However, when doing target builds (the previous point) it seems we do have to 
make provision for gnat1 to find libgcc_s, and, at present, it seems that only 
the target exports are active.


Ah, I see: When building target libraries in stage2, we run the stage2 compiler 
that needs the stage1 libgcc_s, but we don't have the HOST_EXPORTS because 
we're building target code, so we also need to get the libgcc path into 
TARGET_EXPORTS.

Since TARGET_LIB_PATH is only added when gcc-bootstrap, I guess the previous 
libgcc is the only piece needed in TARGET_EXPORTS as well.  So, how about this 
version of the patch?


I tested this one on an affected platform version with and without 
—enable-host-shared and for all languages (less go which is not yet supported). 
 It works for me, thanks,
Iain


Incidentally, during my investigations I was looking into various parts of this 
and it seems that actually TARGET_LIB_PATH might well be effectively dead code 
now.


Good point.  Could you test this version as well?


From 6cd188de23ba5b7ac38a7902580fd861af03d3aa Mon Sep 17 00:00:00 2001
From: Jason Merrill 
Date: Wed, 24 Jan 2024 07:47:26 -0500
Subject: [PATCH] build: drop target libs from LD_LIBRARY_PATH [PR105688]
To: gcc-patches@gcc.gnu.org

The patch for PR22340 (r104978) moved the adding of TARGET_LIB_PATH to
RPATH_ENVVAR from POSTSTAGE1_HOST_EXPORTS to HOST_EXPORTS, but didn't
mention that in the ChangeLog; it also wasn't part of the patch that was
sent to gcc-patches.  I suspect it was included accidentally?

It also causes PR105688 when rebuilding stage1: once the stage1 libstdc++
has been built,

Re: [PATCH] c++: wrong looser excep spec for dep noexcept [PR113158]

2024-02-16 Thread Jason Merrill


On 2/15/24 17:17, Marek Polacek wrote:

Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?

By the ??? below I mean that maybe_instantiate_noexcept could return
a tristate, and then maybe_check_overriding_exception_spec could check

   if (maybe_instantiate_noexcept ().is_unknown ())
 return true;

and we don't have to add any new checks to maybe_check_o_e_spec.

-- >8 --
Here we find ourselves in maybe_check_overriding_exception_spec in
a template context where we can't instantiate a dependent noexcept.
That's OK, but we have to defer the checking otherwise we give wrong
errors.

PR c++/113158

gcc/cp/ChangeLog:

* search.cc (maybe_check_overriding_exception_spec): Defer checking
when a noexcept couldn't be instantiated.

gcc/testsuite/ChangeLog:

* g++.dg/cpp0x/noexcept83.C: New test.
---
  gcc/cp/search.cc|  7 +
  gcc/testsuite/g++.dg/cpp0x/noexcept83.C | 37 +
  2 files changed, 44 insertions(+)
  create mode 100644 gcc/testsuite/g++.dg/cpp0x/noexcept83.C

diff --git a/gcc/cp/search.cc b/gcc/cp/search.cc
index c948839dc53..73d254d6b84 100644
--- a/gcc/cp/search.cc
+++ b/gcc/cp/search.cc
@@ -1975,6 +1975,13 @@ maybe_check_overriding_exception_spec (tree overrider, 
tree basefn)
|| UNPARSED_NOEXCEPT_SPEC_P (over_throw))
  return true;
  
+  /* We also have to defer checking when we're in a template and couldn't

+ instantiate the noexcept yet.
+ ??? maybe_instantiate_noexcept already checked these.  Use tristate?  */
+  if (type_dependent_expression_p (base_throw)
+  || type_dependent_expression_p (over_throw))


I think we also want to avoid comparing value-dependent expressions, but 
actually checking either one seems like more work than needed here; I'd 
think we want to defer in a template if the specifiers aren't both 
exactly true or false.



+return true;
+
if (!comp_except_specs (base_throw, over_throw, ce_derived))
  {
auto_diagnostic_group d;
diff --git a/gcc/testsuite/g++.dg/cpp0x/noexcept83.C 
b/gcc/testsuite/g++.dg/cpp0x/noexcept83.C
new file mode 100644
index 000..47832bbb44d
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp0x/noexcept83.C
@@ -0,0 +1,37 @@
+// PR c++/113158
+// { dg-do compile { target c++11 } }
+
+template
+struct V {
+  static constexpr bool t = false;
+};
+struct base {
+virtual int f() = 0;
+};
+
+template
+struct derived : base {
+int f() noexcept(V::t) override;
+};
+
+struct base2 {
+virtual int f() noexcept = 0;
+};
+
+template
+struct W {
+  static constexpr bool t = B;
+};
+
+template
+struct derived2 : base2 {
+int f() noexcept(W::t) override; // { dg-error "looser exception 
specification" }
+};
+
+void
+g ()
+{
+  derived d1;
+  derived2 d2; // { dg-message "required from here" }
+  derived2 d3;
+}

base-commit: b3b3bd250f0a7c22b7d46d3522c8b94c6a35d22a
prerequisite-patch-id: 3beddc8cae6ef7f28cd7eac7240d5f4dad08e5f7

Re: [PATCH] c++: wrong looser exception spec with deleted fn

2024-02-16 Thread Jason Merrill


On 2/15/24 17:16, Marek Polacek wrote:

Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?

IMHO trivial enough to go ahead now seeing as it doesn't introduce
new errors.


OK.


-- >8 --
I noticed we don't implement the "unless the overriding function is
defined as deleted" wording added to [except.spec] via CWG 1351.

DR 1351

gcc/cp/ChangeLog:

* search.cc (maybe_check_overriding_exception_spec): Don't error about
a looser exception specification if the overrider is deleted.

gcc/testsuite/ChangeLog:

* g++.dg/cpp0x/noexcept82.C: New test.
---
  gcc/cp/search.cc| 11 +--
  gcc/testsuite/g++.dg/cpp0x/noexcept82.C | 14 ++
  2 files changed, 23 insertions(+), 2 deletions(-)
  create mode 100644 gcc/testsuite/g++.dg/cpp0x/noexcept82.C

diff --git a/gcc/cp/search.cc b/gcc/cp/search.cc
index 2b4ed5d024e..c948839dc53 100644
--- a/gcc/cp/search.cc
+++ b/gcc/cp/search.cc
@@ -1949,7 +1949,11 @@ locate_field_accessor (tree basetype_path, tree 
field_decl, bool const_p)
  }
  
  /* Check throw specifier of OVERRIDER is at least as strict as

-   the one of BASEFN.  */
+   the one of BASEFN.  This is due to [except.spec]: "If a virtual function
+   has a non-throwing exception specification, all declarations, including
+   the definition, of any function that overrides that virtual function in
+   any derived class shall have a non-throwing exception specification,
+   unless the overriding function is defined as deleted."  */
  
  bool

  maybe_check_overriding_exception_spec (tree overrider, tree basefn)
@@ -1959,7 +1963,10 @@ maybe_check_overriding_exception_spec (tree overrider, 
tree basefn)
tree base_throw = TYPE_RAISES_EXCEPTIONS (TREE_TYPE (basefn));
tree over_throw = TYPE_RAISES_EXCEPTIONS (TREE_TYPE (overrider));
  
-  if (DECL_INVALID_OVERRIDER_P (overrider))

+  if (DECL_INVALID_OVERRIDER_P (overrider)
+  /* CWG 1351 added the "unless the overriding function is defined as
+deleted" wording.  */
+  || DECL_DELETED_FN (overrider))
  return true;
  
/* Can't check this yet.  Pretend this is fine and let

diff --git a/gcc/testsuite/g++.dg/cpp0x/noexcept82.C 
b/gcc/testsuite/g++.dg/cpp0x/noexcept82.C
new file mode 100644
index 000..c996613139b
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp0x/noexcept82.C
@@ -0,0 +1,14 @@
+// DR 1351, Problems with implicitly-declared exception-specifications
+// { dg-do compile { target c++11 } }
+
+struct B {
+  virtual void f() noexcept;
+  virtual void g();
+  virtual void h() noexcept = delete;
+};
+
+struct D: B {
+  void f(); // { dg-error "looser" }
+  void g() noexcept;// OK
+  void h() = delete;// OK
+};

base-commit: 0d5d1c75f5c68b6064640c3154ae5f4c0b464905

Re: [PATCH] c++: Fix up parameter pack diagnostics on xobj vs. varargs functions [PR113802]

2024-02-16 Thread Jason Merrill


On 2/16/24 04:03, Jakub Jelinek wrote:

Hi!

The simple presence of ellipsis as next token after the parameter
declaration doesn't imply it is a parameter pack, it sometimes is, e.g.
if its type is a pack, but sometimes is not and in that case it acts
the same as if the next tokens were , ... instead of just ...
The xobj param cannot be a function parameter pack though treats both
the declarator->parameter_pack_p and token->type == CPP_ELLIPSIS as
sufficient conditions for the error.  The conditions for CPP_ELLIPSIS
are done a little bit later in the same function and complex enough that
IMHO shouldn't be repeated, on the other side for the
declarator->parameter_pack_p case we clear that flag for xobj params
for error recovery reasons.
In order to avoid diagnosing this in two spots, one at the current spot
for declarator->parameter_pack_p and one for the ellipsis case after
we decide if it is parameter pack or varargs, the following patch instead
just sets a boolean flag whether we should emit this diagnostics, does it
early for declarator->parameter_pack_p case and clears the parameter_pack_p
flag in that case like the older patch did, and for the ellipsis case
sets the flag later, then emits the diagnostics.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2024-02-16  Jakub Jelinek  

PR c++/113802
* parser.cc (cp_parser_parameter_declaration): Don't emit
PR113307 diagnostics too early, instead for the
declarator->parameter_pack_p case just set a flag it should be emitted
later.  Set that flag also when consuming following ellipsis as part
of a parameter pack and diagnose either afterwards.  Formatting fix.

* g++.dg/cpp23/explicit-obj-diagnostics3.C (S0, S1, S2, S3, S4): Don't
expect any diagnostics on f and fd member function templates, add
similar templates with ...Selves instead of Selves as k and kd and
expect diagnostics for those.

--- gcc/cp/parser.cc.jj 2024-02-14 14:26:19.0 +0100
+++ gcc/cp/parser.cc2024-02-15 11:58:27.033618967 +0100
@@ -25727,17 +25727,10 @@ cp_parser_parameter_declaration (cp_pars
bool const xobj_param_p
  = decl_spec_seq_has_spec_p (_specifiers, ds_this);
  
-  if (xobj_param_p

-  && ((declarator && declarator->parameter_pack_p)
- || cp_lexer_next_token_is (parser->lexer, CPP_ELLIPSIS)))
+  bool diag_xobj_parameter_pack = false;
+  if (xobj_param_p && (declarator && declarator->parameter_pack_p))
  {
-  location_t xobj_param
-   = make_location (decl_specifiers.locations[ds_this],
-decl_spec_token_start->location,
-input_location);
-  error_at (xobj_param,
-   "an explicit object parameter cannot "
-   "be a function parameter pack");
+  diag_xobj_parameter_pack = true;
/* Suppress errors that occur down the line.  */
if (declarator)
declarator->parameter_pack_p = false;
@@ -25755,9 +25748,10 @@ cp_parser_parameter_declaration (cp_pars
(INNERMOST_TEMPLATE_PARMS (current_template_parms));
  
if (latest_template_parm_idx != template_parm_idx)

-   decl_specifiers.type = convert_generic_types_to_packs
- (decl_specifiers.type,
-  template_parm_idx, latest_template_parm_idx);
+   decl_specifiers.type
+ = convert_generic_types_to_packs (decl_specifiers.type,
+   template_parm_idx,
+   latest_template_parm_idx);
  }
  
if (cp_lexer_next_token_is (parser->lexer, CPP_ELLIPSIS))

@@ -25773,6 +25767,8 @@ cp_parser_parameter_declaration (cp_pars
   || (!type && template_parm_p))
  && declarator_can_be_parameter_pack (declarator))
{
+ if (xobj_param_p)
+   diag_xobj_parameter_pack = true;
  /* Consume the `...'. */
  cp_lexer_consume_token (parser->lexer);
  maybe_warn_variadic_templates ();
@@ -25787,6 +25783,17 @@ cp_parser_parameter_declaration (cp_pars
}
  }
  
+  if (diag_xobj_parameter_pack)


Can we move all the xobj handling down here (where we can trust 
declarator->parameter_pack_p) instead of adding a new variable?



+{
+  location_t xobj_param
+   = make_location (decl_specifiers.locations[ds_this],
+decl_spec_token_start->location,
+input_location);
+  error_at (xobj_param,
+   "an explicit object parameter cannot "
+   "be a function parameter pack");
+}
+
/* The restriction on defining new types applies only to the type
   of the parameter, not to the default argument.  */
parser->type_definition_forbidden_message = saved_message;
--- gcc/testsuite/g++.dg/cpp23/explicit-obj-diagnostics3.C.jj   2024-01-17 
10:34:49.812597960 +0100
+++ gcc/testsuite/g++.dg/cpp23/explicit-obj-diagnostics3.C  2024-02-15

Re: [PATCH] dwarf2out, v2: Emit DW_AT_export_symbols on anon unions/structs [PR113918]

2024-02-16 Thread Jason Merrill


On 2/16/24 14:52, Jakub Jelinek wrote:

On Fri, Feb 16, 2024 at 04:52:20PM +0100, Jakub Jelinek wrote:

On Fri, Feb 16, 2024 at 10:48:28AM -0500, Jason Merrill wrote:

On 2/16/24 04:14, Jakub Jelinek wrote:

DWARF5 added DW_AT_export_symbols both for use on inline namespaces (where
we emit it), but also on anonymous unions/structs (and we didn't emit that
attribute there).
The following patch fixes it.


Should this involve cp_decl_dwarf_attribute like the namespace handling?


I wrote it in dwarf2out.cc because the same thing needs to be done for C and
C++ (admittedly dunno if other languages have something similar).

Sure, it could be done in cp_decl_dwarf_attribute too but then it has to be
done in c_decl_dwarf_attribute which doesn't exist.  Though, it is slightly
complicated by DW_AT_export_symbols not actually going on the DW_TAG_member
die but on the DW_TAG_{structure,class,union}_type which DW_TAG_member uses
as its DW_AT_type.  But in order to ask the langhook, we likely need to pass
the FIELD_DECL and not the type...


Anyway, here is a version of the patch that uses the langhook for it.
So that the langhook isn't totally abused and for performance reasons too
I've kept the DECL_NAME == NULL check on the dwarf2out.cc side and am
calling it on the type rather than decl, because type is where the attribute
should go.

2024-02-16  Jakub Jelinek  

PR debug/113918
gcc/
* dwarf2out.cc (gen_field_die): Emit DW_AT_export_symbols
on anonymous unions or structs for -gdwarf-5 or -gno-strict-dwarf.
gcc/c/
* c-tree.h (c_type_dwarf_attribute): Declare.
* c-objc-common.h (LANG_HOOKS_TYPE_DWARF_ATTRIBUTE): Redefine.
* c-objc-common.cc: Include dwarf2.h.
(c_type_dwarf_attribute): New function.
gcc/cp/
* cp-objcp-common.cc (cp_type_dwarf_attribute): Return 1
for DW_AT_export_symbols on anonymous structs or unions.
gcc/testsuite/
* c-c++-common/dwarf2/pr113918.c: New test.

--- gcc/dwarf2out.cc.jj 2024-02-15 13:54:29.284358101 +0100
+++ gcc/dwarf2out.cc2024-02-16 20:38:19.718841259 +0100
@@ -25153,6 +25153,17 @@ gen_field_die (tree decl, struct vlr_con
  
add_accessibility_attribute (decl_die, decl);
  
+  /* Add DW_AT_export_symbols to anonymous unions or structs.  */

+  if ((dwarf_version >= 5 || !dwarf_strict) && DECL_NAME (decl) == NULL_TREE)
+if (tree type = member_declared_type (decl))
+  if (lang_hooks.types.type_dwarf_attribute (TYPE_MAIN_VARIANT (type),
+DW_AT_export_symbols) != -1)
+   {
+ dw_die_ref type_die = lookup_type_die (TYPE_MAIN_VARIANT (type));
+ if (type_die && get_AT (type_die, DW_AT_export_symbols) == NULL)
+   add_AT_flag (type_die, DW_AT_export_symbols, 1);
+   }
+
/* Equate decl number to die, so that we can look up this decl later on.  */
equate_decl_number_to_die (decl, decl_die);
  }
--- gcc/c/c-tree.h.jj   2024-01-31 10:46:35.164761720 +0100
+++ gcc/c/c-tree.h  2024-02-16 20:43:45.993372908 +0100
@@ -731,6 +731,7 @@ extern bool c_warn_unused_global_decl (c
  extern void c_initialize_diagnostics (diagnostic_context *);
  extern bool c_var_mod_p (tree x, tree fn);
  extern alias_set_type c_get_alias_set (tree);
+extern int c_type_dwarf_attribute (const_tree, int);
  
  /* in c-typeck.cc */

  extern int in_alignof;
--- gcc/c/c-objc-common.h.jj2024-01-03 12:06:52.973862999 +0100
+++ gcc/c/c-objc-common.h   2024-02-16 20:42:21.073535465 +0100
@@ -119,6 +119,9 @@ static const scoped_attribute_specs *con
  #undef LANG_HOOKS_GIMPLIFY_EXPR
  #define LANG_HOOKS_GIMPLIFY_EXPR c_gimplify_expr
  
+#undef LANG_HOOKS_TYPE_DWARF_ATTRIBUTE

+#define LANG_HOOKS_TYPE_DWARF_ATTRIBUTE c_type_dwarf_attribute
+
  #undef LANG_HOOKS_OMP_PREDETERMINED_SHARING
  #define LANG_HOOKS_OMP_PREDETERMINED_SHARING c_omp_predetermined_sharing
  
--- gcc/c/c-objc-common.cc.jj	2024-01-03 12:06:53.213859637 +0100

+++ gcc/c/c-objc-common.cc  2024-02-16 20:45:24.649022305 +0100
@@ -30,6 +30,7 @@ along with GCC; see the file COPYING3.
  #include "gcc-rich-location.h"
  #include "stringpool.h"
  #include "attribs.h"
+#include "dwarf2.h"
  
  static bool c_tree_printer (pretty_printer *, text_info *, const char *,

int, bool, bool, bool, bool *, const char **);
@@ -446,3 +447,25 @@ instantiation_dependent_expression_p (tr
  {
return false;
  }
+
+/* Return -1 if dwarf ATTR shouldn't be added for TYPE, or the attribute
+   value otherwise.  */
+int
+c_type_dwarf_attribute (const_tree type, int attr)
+{
+  if (type == NULL_TREE)
+return -1;
+
+  switch (attr)
+{
+case DW_AT_export_symbols:
+  if (RECORD_OR_UNION_TYPE_P (type) && TYPE_NAME (type) == NULL_TREE)
+   return 1;
+  break;
+
+default:
+  break;
+}
+
+  return -1;
+}
--- gcc/cp/cp-objcp-common.cc.jj2024-02-13 12:50:21.666846296 +0100
+++ gcc/cp/cp-objcp-common.cc   2024-02-16 20:40:51.374763528

[PATCH] dwarf2out, v2: Emit DW_AT_export_symbols on anon unions/structs [PR113918]

2024-02-16 Thread Jakub Jelinek

On Fri, Feb 16, 2024 at 04:52:20PM +0100, Jakub Jelinek wrote:
> On Fri, Feb 16, 2024 at 10:48:28AM -0500, Jason Merrill wrote:
> > On 2/16/24 04:14, Jakub Jelinek wrote:
> > > DWARF5 added DW_AT_export_symbols both for use on inline namespaces (where
> > > we emit it), but also on anonymous unions/structs (and we didn't emit that
> > > attribute there).
> > > The following patch fixes it.
> > 
> > Should this involve cp_decl_dwarf_attribute like the namespace handling?
> 
> I wrote it in dwarf2out.cc because the same thing needs to be done for C and
> C++ (admittedly dunno if other languages have something similar).
> 
> Sure, it could be done in cp_decl_dwarf_attribute too but then it has to be
> done in c_decl_dwarf_attribute which doesn't exist.  Though, it is slightly
> complicated by DW_AT_export_symbols not actually going on the DW_TAG_member
> die but on the DW_TAG_{structure,class,union}_type which DW_TAG_member uses
> as its DW_AT_type.  But in order to ask the langhook, we likely need to pass
> the FIELD_DECL and not the type...

Anyway, here is a version of the patch that uses the langhook for it.
So that the langhook isn't totally abused and for performance reasons too
I've kept the DECL_NAME == NULL check on the dwarf2out.cc side and am
calling it on the type rather than decl, because type is where the attribute
should go.

2024-02-16  Jakub Jelinek  

PR debug/113918
gcc/
* dwarf2out.cc (gen_field_die): Emit DW_AT_export_symbols
on anonymous unions or structs for -gdwarf-5 or -gno-strict-dwarf.
gcc/c/
* c-tree.h (c_type_dwarf_attribute): Declare.
* c-objc-common.h (LANG_HOOKS_TYPE_DWARF_ATTRIBUTE): Redefine.
* c-objc-common.cc: Include dwarf2.h.
(c_type_dwarf_attribute): New function.
gcc/cp/
* cp-objcp-common.cc (cp_type_dwarf_attribute): Return 1
for DW_AT_export_symbols on anonymous structs or unions.
gcc/testsuite/
* c-c++-common/dwarf2/pr113918.c: New test.

--- gcc/dwarf2out.cc.jj 2024-02-15 13:54:29.284358101 +0100
+++ gcc/dwarf2out.cc2024-02-16 20:38:19.718841259 +0100
@@ -25153,6 +25153,17 @@ gen_field_die (tree decl, struct vlr_con
 
   add_accessibility_attribute (decl_die, decl);
 
+  /* Add DW_AT_export_symbols to anonymous unions or structs.  */
+  if ((dwarf_version >= 5 || !dwarf_strict) && DECL_NAME (decl) == NULL_TREE)
+if (tree type = member_declared_type (decl))
+  if (lang_hooks.types.type_dwarf_attribute (TYPE_MAIN_VARIANT (type),
+DW_AT_export_symbols) != -1)
+   {
+ dw_die_ref type_die = lookup_type_die (TYPE_MAIN_VARIANT (type));
+ if (type_die && get_AT (type_die, DW_AT_export_symbols) == NULL)
+   add_AT_flag (type_die, DW_AT_export_symbols, 1);
+   }
+
   /* Equate decl number to die, so that we can look up this decl later on.  */
   equate_decl_number_to_die (decl, decl_die);
 }
--- gcc/c/c-tree.h.jj   2024-01-31 10:46:35.164761720 +0100
+++ gcc/c/c-tree.h  2024-02-16 20:43:45.993372908 +0100
@@ -731,6 +731,7 @@ extern bool c_warn_unused_global_decl (c
 extern void c_initialize_diagnostics (diagnostic_context *);
 extern bool c_var_mod_p (tree x, tree fn);
 extern alias_set_type c_get_alias_set (tree);
+extern int c_type_dwarf_attribute (const_tree, int);
 
 /* in c-typeck.cc */
 extern int in_alignof;
--- gcc/c/c-objc-common.h.jj2024-01-03 12:06:52.973862999 +0100
+++ gcc/c/c-objc-common.h   2024-02-16 20:42:21.073535465 +0100
@@ -119,6 +119,9 @@ static const scoped_attribute_specs *con
 #undef LANG_HOOKS_GIMPLIFY_EXPR
 #define LANG_HOOKS_GIMPLIFY_EXPR c_gimplify_expr
 
+#undef LANG_HOOKS_TYPE_DWARF_ATTRIBUTE
+#define LANG_HOOKS_TYPE_DWARF_ATTRIBUTE c_type_dwarf_attribute
+
 #undef LANG_HOOKS_OMP_PREDETERMINED_SHARING
 #define LANG_HOOKS_OMP_PREDETERMINED_SHARING c_omp_predetermined_sharing
 
--- gcc/c/c-objc-common.cc.jj   2024-01-03 12:06:53.213859637 +0100
+++ gcc/c/c-objc-common.cc  2024-02-16 20:45:24.649022305 +0100
@@ -30,6 +30,7 @@ along with GCC; see the file COPYING3.
 #include "gcc-rich-location.h"
 #include "stringpool.h"
 #include "attribs.h"
+#include "dwarf2.h"
 
 static bool c_tree_printer (pretty_printer *, text_info *, const char *,
int, bool, bool, bool, bool *, const char **);
@@ -446,3 +447,25 @@ instantiation_dependent_expression_p (tr
 {
   return false;
 }
+
+/* Return -1 if dwarf ATTR shouldn't be added for TYPE, or the attribute
+   value otherwise.  */
+int
+c_type_dwarf_attribute (const_tree type, int attr)
+{
+  if (type == NULL_TREE)
+return -1;
+
+  switch (attr)
+{
+case DW_AT_export_symbols:
+  if (RECORD_OR_UNION_TYPE_P (type) && TYPE_NAME (type) == NULL_TREE)
+   return 1;
+  break;
+
+default:
+  break;
+}
+
+  return -1;
+}
--- gcc/cp/cp-objcp-common.cc.jj2024-02-13 12:50:21.666846296 +0100
+++ gcc/cp/cp-objcp-common.cc   2024-02-16 20:40:51.374763528 +0100
@@ -410,6

[PATCH v6 1/5] Provide counted_by attribute to flexible array member field (PR108896)

2024-02-16 Thread Qing Zhao

'counted_by (COUNT)'
 The 'counted_by' attribute may be attached to the C99 flexible
 array member of a structure.  It indicates that the number of the
 elements of the array is given by the field named "COUNT" in the
 same structure as the flexible array member.  GCC uses this
 information to improve the results of the array bound sanitizer and
 the '__builtin_dynamic_object_size'.

 For instance, the following code:

  struct P {
size_t count;
char other;
char array[] __attribute__ ((counted_by (count)));
  } *p;

 specifies that the 'array' is a flexible array member whose number
 of elements is given by the field 'count' in the same structure.

 The field that represents the number of the elements should have an
 integer type.  Otherwise, the compiler will report a warning and
 ignore the attribute.

 When the field that represents the number of the elements is assigned a
 negative integer value, the compiler will treat the value as zero.

 An explicit 'counted_by' annotation defines a relationship between
 two objects, 'p->array' and 'p->count', and there are the following
 requirementthat on the relationship between this pair:

* 'p->count' should be initialized before the first reference to
  'p->array';

* 'p->array' has _at least_ 'p->count' number of elements
  available all the time.  This relationship must hold even
  after any of these related objects are updated during the
  program.

 It's the user's responsibility to make sure the above requirements
 to be kept all the time.  Otherwise the compiler will report
 warnings, at the same time, the results of the array bound
 sanitizer and the '__builtin_dynamic_object_size' is undefined.

 One important feature of the attribute is, a reference to the
 flexible array member field will use the latest value assigned to
 the field that represents the number of the elements before that
 reference.  For example,

p->count = val1;
p->array[20] = 0;  // ref1 to p->array
p->count = val2;
p->array[30] = 0;  // ref2 to p->array

 in the above, 'ref1' will use 'val1' as the number of the elements
 in 'p->array', and 'ref2' will use 'val2' as the number of elements
 in 'p->array'.

gcc/c-family/ChangeLog:

PR C/108896
* c-attribs.cc (handle_counted_by_attribute): New function.
(attribute_takes_identifier_p): Add counted_by attribute to the list.
* c-common.cc (c_flexible_array_member_type_p): ...To this.
* c-common.h (c_flexible_array_member_type_p): New prototype.

gcc/c/ChangeLog:

PR C/108896
* c-decl.cc (flexible_array_member_type_p): Renamed and moved to...
(add_flexible_array_elts_to_size): Use renamed function.
(is_flexible_array_member_p): Use renamed function.
(verify_counted_by_attribute): New function.
(finish_struct): Use renamed function and verify counted_by
attribute.
* c-tree.h (lookup_field): New prototype.
* c-typeck.cc (lookup_field): Expose as extern function.

gcc/ChangeLog:

PR C/108896
* doc/extend.texi: Document attribute counted_by.

gcc/testsuite/ChangeLog:

PR C/108896
* gcc.dg/flex-array-counted-by.c: New test.
---
 gcc/c-family/c-attribs.cc| 54 -
 gcc/c-family/c-common.cc | 13 +++
 gcc/c-family/c-common.h  |  1 +
 gcc/c/c-decl.cc  | 85 
 gcc/c/c-tree.h   |  1 +
 gcc/c/c-typeck.cc|  3 +-
 gcc/doc/extend.texi  | 64 +++
 gcc/testsuite/gcc.dg/flex-array-counted-by.c | 40 +
 8 files changed, 241 insertions(+), 20 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/flex-array-counted-by.c

diff --git a/gcc/c-family/c-attribs.cc b/gcc/c-family/c-attribs.cc
index 40a0cf90295d..4395c0656b14 100644
--- a/gcc/c-family/c-attribs.cc
+++ b/gcc/c-family/c-attribs.cc
@@ -105,6 +105,8 @@ static tree handle_warn_if_not_aligned_attribute (tree *, 
tree, tree,
  int, bool *);
 static tree handle_strict_flex_array_attribute (tree *, tree, tree,
 int, bool *);
+static tree handle_counted_by_attribute (tree *, tree, tree,
+  int, bool *);
 static tree handle_weak_attribute (tree *, tree, tree, int, bool *) ;
 static tree handle_noplt_attribute (tree *, tree, tree, int, bool *) ;
 static tree handle_alias_ifunc_attribute (bool, tree *, tree, tree, bool *);
@@ -412,6 +414,8 @@ const struct attribute_spec c_common_gnu_attributes[] =
  handle_warn_if_not_aligned_attribute, NULL },

[PATCH v6 5/5] Add the 6th argument to .ACCESS_WITH_SIZE

2024-02-16 Thread Qing Zhao

to carry the TYPE of the flexible array.

Such information is needed during tree-object-size.cc.

We cannot use the result type or the type of the 1st argument
of the routine .ACCESS_WITH_SIZE to decide the element type
of the original array due to possible type casting in the
source code.

gcc/c/ChangeLog:

* c-typeck.cc (build_access_with_size_for_counted_by): Add the 6th
argument to .ACCESS_WITH_SIZE.

gcc/ChangeLog:

* tree-object-size.cc (access_with_size_object_size): Use the type
of the 6th argument for the type of the element.

gcc/testsuite/ChangeLog:

* gcc.dg/flex-array-counted-by-6.c: New test.
---
 gcc/c/c-typeck.cc | 11 +++--
 .../gcc.dg/flex-array-counted-by-6.c  | 46 +++
 gcc/tree-object-size.cc   | 16 ---
 3 files changed, 64 insertions(+), 9 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/flex-array-counted-by-6.c

diff --git a/gcc/c/c-typeck.cc b/gcc/c/c-typeck.cc
index a29a7d7ec029..c17ac6862546 100644
--- a/gcc/c/c-typeck.cc
+++ b/gcc/c/c-typeck.cc
@@ -2608,7 +2608,8 @@ build_counted_by_ref (tree datum, tree subdatum, tree 
*counted_by_type)
 
to:
 
-   (*.ACCESS_WITH_SIZE (REF, COUNTED_BY_REF, 1, (TYPE_OF_SIZE)0, -1))
+   (*.ACCESS_WITH_SIZE (REF, COUNTED_BY_REF, 1, (TYPE_OF_SIZE)0, -1,
+   (TYPE_OF_ARRAY *)0))
 
NOTE: The return type of this function is the POINTER type pointing
to the original flexible array type.
@@ -2620,6 +2621,9 @@ build_counted_by_ref (tree datum, tree subdatum, tree 
*counted_by_type)
The 4th argument of the call is a constant 0 with the TYPE of the
object pointed by COUNTED_BY_REF.
 
+   The 6th argument of the call is a constant 0 with the pointer TYPE
+   to the original flexible array type.
+
   */
 static tree
 build_access_with_size_for_counted_by (location_t loc, tree ref,
@@ -2632,12 +2636,13 @@ build_access_with_size_for_counted_by (location_t loc, 
tree ref,
 
   tree call
 = build_call_expr_internal_loc (loc, IFN_ACCESS_WITH_SIZE,
-   result_type, 5,
+   result_type, 6,
array_to_pointer_conversion (loc, ref),
counted_by_ref,
build_int_cst (integer_type_node, 1),
build_int_cst (counted_by_type, 0),
-   build_int_cst (integer_type_node, -1));
+   build_int_cst (integer_type_node, -1),
+   build_int_cst (result_type, 0));
   /* Wrap the call with an INDIRECT_REF with the flexible array type.  */
   call = build1 (INDIRECT_REF, TREE_TYPE (ref), call);
   SET_EXPR_LOCATION (call, loc);
diff --git a/gcc/testsuite/gcc.dg/flex-array-counted-by-6.c 
b/gcc/testsuite/gcc.dg/flex-array-counted-by-6.c
new file mode 100644
index ..65a401796479
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/flex-array-counted-by-6.c
@@ -0,0 +1,46 @@
+/* test the attribute counted_by and its usage in
+ * __builtin_dynamic_object_size. when the type of the flexible array member
+ * is casting to another type.  */
+/* { dg-do run } */
+/* { dg-options "-O2" } */
+
+#include "builtin-object-size-common.h"
+
+typedef unsigned short u16;
+
+struct info {
+   u16 data_len;
+   char data[] __attribute__((counted_by(data_len)));
+};
+
+struct foo {
+   int a;
+   int b;
+};
+
+static __attribute__((__noinline__))
+struct info *setup ()
+{
+ struct info *p;
+ size_t bytes = 3 * sizeof(struct foo);
+
+ p = (struct info *)malloc (sizeof (struct info) + bytes);
+ p->data_len = bytes;
+
+ return p;
+}
+
+static void
+__attribute__((__noinline__)) report (struct info *p)
+{
+ struct foo *bar = (struct foo *)p->data;
+ EXPECT(__builtin_dynamic_object_size((char *)(bar + 1), 1), 16);
+ EXPECT(__builtin_dynamic_object_size((char *)(bar + 2), 1), 8);
+}
+
+int main(int argc, char *argv[])
+{
+ struct info *p = setup();
+ report(p);
+ return 0;
+}
diff --git a/gcc/tree-object-size.cc b/gcc/tree-object-size.cc
index 630b0a7aaa4b..c3098c521a43 100644
--- a/gcc/tree-object-size.cc
+++ b/gcc/tree-object-size.cc
@@ -763,17 +763,21 @@ addr_object_size (struct object_size_info *osi, 
const_tree ptr,
  2: the number of bytes;
4th argument TYPE_OF_SIZE: A constant 0 with the TYPE of the object
  refed by REF_TO_SIZE
+   6th argument: A constant 0 with the pointer TYPE to the original flexible
+ array type.
 
-   the size of the element can be retrived from the result type of the call,
-   which is the pointer to the array type.  */
+   the size of the element can be retrived from the TYPE of the 6th argument
+   of the call, which is the pointer to the array type.  */
 static tree
 access_with_size_object_size (const gcall *call, int object_size_type)
 {
   gcc_assert (gimple_call_internal_p (call,

[PATCH v6 4/5] Use the .ACCESS_WITH_SIZE in bound sanitizer.

2024-02-16 Thread Qing Zhao

gcc/c-family/ChangeLog:

* c-ubsan.cc (get_bound_from_access_with_size): New function.
(ubsan_instrument_bounds): Handle call to .ACCESS_WITH_SIZE.

gcc/testsuite/ChangeLog:

* gcc.dg/ubsan/flex-array-counted-by-bounds-2.c: New test.
* gcc.dg/ubsan/flex-array-counted-by-bounds-3.c: New test.
* gcc.dg/ubsan/flex-array-counted-by-bounds.c: New test.
---
 gcc/c-family/c-ubsan.cc   | 42 +
 .../ubsan/flex-array-counted-by-bounds-2.c| 45 ++
 .../ubsan/flex-array-counted-by-bounds-3.c| 34 ++
 .../ubsan/flex-array-counted-by-bounds.c  | 46 +++
 4 files changed, 167 insertions(+)
 create mode 100644 gcc/testsuite/gcc.dg/ubsan/flex-array-counted-by-bounds-2.c
 create mode 100644 gcc/testsuite/gcc.dg/ubsan/flex-array-counted-by-bounds-3.c
 create mode 100644 gcc/testsuite/gcc.dg/ubsan/flex-array-counted-by-bounds.c

diff --git a/gcc/c-family/c-ubsan.cc b/gcc/c-family/c-ubsan.cc
index 940982819ddf..164b29845b3a 100644
--- a/gcc/c-family/c-ubsan.cc
+++ b/gcc/c-family/c-ubsan.cc
@@ -376,6 +376,40 @@ ubsan_instrument_return (location_t loc)
   return build_call_expr_loc (loc, t, 1, build_fold_addr_expr_loc (loc, data));
 }
 
+/* Get the tree that represented the number of counted_by, i.e, the maximum
+   number of the elements of the object that the call to .ACCESS_WITH_SIZE
+   points to, this number will be the bound of the corresponding array.  */
+static tree
+get_bound_from_access_with_size (tree call)
+{
+  if (!is_access_with_size_p (call))
+return NULL_TREE;
+
+  tree ref_to_size = CALL_EXPR_ARG (call, 1);
+  unsigned int type_of_size = TREE_INT_CST_LOW (CALL_EXPR_ARG (call, 2));
+  tree type = TREE_TYPE (CALL_EXPR_ARG (call, 3));
+  tree size = fold_build2 (MEM_REF, type, unshare_expr (ref_to_size),
+  build_int_cst (ptr_type_node, 0));
+  /* If size is negative value, treat it as zero.  */
+  if (!TYPE_UNSIGNED (type))
+  {
+tree cond = fold_build2 (LT_EXPR, boolean_type_node,
+unshare_expr (size), build_zero_cst (type));
+size = fold_build3 (COND_EXPR, type, cond,
+   build_zero_cst (type), size);
+  }
+
+  /* Only when type_of_size is 1,i.e, the number of the elements of
+ the object type, return the size.  */
+  if (type_of_size != 1)
+return NULL_TREE;
+  else
+size = fold_convert (sizetype, size);
+
+  return size;
+}
+
+
 /* Instrument array bounds for ARRAY_REFs.  We create special builtin,
that gets expanded in the sanopt pass, and make an array dimension
of it.  ARRAY is the array, *INDEX is an index to the array.
@@ -401,6 +435,14 @@ ubsan_instrument_bounds (location_t loc, tree array, tree 
*index,
  && COMPLETE_TYPE_P (type)
  && integer_zerop (TYPE_SIZE (type)))
bound = build_int_cst (TREE_TYPE (TYPE_MIN_VALUE (domain)), -1);
+  else if (INDIRECT_REF_P (array)
+  && is_access_with_size_p ((TREE_OPERAND (array, 0
+   {
+ bound = get_bound_from_access_with_size ((TREE_OPERAND (array, 0)));
+ bound = fold_build2 (MINUS_EXPR, TREE_TYPE (bound),
+  bound,
+  build_int_cst (TREE_TYPE (bound), 1));
+   }
   else
return NULL_TREE;
 }
diff --git a/gcc/testsuite/gcc.dg/ubsan/flex-array-counted-by-bounds-2.c 
b/gcc/testsuite/gcc.dg/ubsan/flex-array-counted-by-bounds-2.c
new file mode 100644
index ..148934975ee5
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/ubsan/flex-array-counted-by-bounds-2.c
@@ -0,0 +1,45 @@
+/* test the attribute counted_by and its usage in
+   bounds sanitizer combined with VLA.  */
+/* { dg-do run } */
+/* { dg-options "-fsanitize=bounds" } */
+/* { dg-output "index 11 out of bounds for type 'int 
\\\[\\\*\\\]\\\[\\\*\\\]'\[^\n\r]*(\n|\r\n|\r)" } */
+/* { dg-output "\[^\n\r]*index 20 out of bounds for type 'int 
\\\[\\\*\\\]\\\[\\\*\\\]\\\[\\\*\\\]'\[^\n\r]*(\n|\r\n|\r)" } */
+/* { dg-output "\[^\n\r]*index 11 out of bounds for type 'int 
\\\[\\\*\\\]\\\[\\\*\\\]'\[^\n\r]*(\n|\r\n|\r)" } */
+/* { dg-output "\[^\n\r]*index 10 out of bounds for type 'int 
\\\[\\\*\\\]'\[^\n\r]*(\n|\r\n|\r)" } */
+
+
+#include 
+
+void __attribute__((__noinline__)) setup_and_test_vla (int n, int m)
+{
+   struct foo {
+   int n;
+   int p[][n] __attribute__((counted_by(n)));
+   } *f;
+
+   f = (struct foo *) malloc (sizeof(struct foo) + m*sizeof(int[n]));
+   f->n = m;
+   f->p[m][n-1]=1;
+   return;
+}
+
+void __attribute__((__noinline__)) setup_and_test_vla_1 (int n1, int n2, int m)
+{
+  struct foo {
+int n;
+int p[][n2][n1] __attribute__((counted_by(n)));
+  } *f;
+
+  f = (struct foo *) malloc (sizeof(struct foo) + m*sizeof(int[n2][n1]));
+  f->n = m;
+  f->p[m][n2][n1]=1;
+  return;
+}
+
+int main(int argc, char *argv[])
+{
+  setup_and_test_vla (10, 11);
+  setup_and_test_vla_1 (10, 11, 20);
+  return 0;

[PATCH v6 3/5] Use the .ACCESS_WITH_SIZE in builtin object size.

2024-02-16 Thread Qing Zhao

gcc/ChangeLog:

* tree-object-size.cc (access_with_size_object_size): New function.
(call_object_size): Call the new function.

gcc/testsuite/ChangeLog:

* gcc.dg/builtin-object-size-common.h: Add a new macro EXPECT.
* gcc.dg/flex-array-counted-by-3.c: New test.
* gcc.dg/flex-array-counted-by-4.c: New test.
* gcc.dg/flex-array-counted-by-5.c: New test.
---
 .../gcc.dg/builtin-object-size-common.h   |  11 ++
 .../gcc.dg/flex-array-counted-by-3.c  |  63 +++
 .../gcc.dg/flex-array-counted-by-4.c  | 178 ++
 .../gcc.dg/flex-array-counted-by-5.c  |  48 +
 gcc/tree-object-size.cc   |  59 ++
 5 files changed, 359 insertions(+)
 create mode 100644 gcc/testsuite/gcc.dg/flex-array-counted-by-3.c
 create mode 100644 gcc/testsuite/gcc.dg/flex-array-counted-by-4.c
 create mode 100644 gcc/testsuite/gcc.dg/flex-array-counted-by-5.c

diff --git a/gcc/testsuite/gcc.dg/builtin-object-size-common.h 
b/gcc/testsuite/gcc.dg/builtin-object-size-common.h
index 66ff7cdd953a..b677067c6e6b 100644
--- a/gcc/testsuite/gcc.dg/builtin-object-size-common.h
+++ b/gcc/testsuite/gcc.dg/builtin-object-size-common.h
@@ -30,3 +30,14 @@ unsigned nfails = 0;
   __builtin_abort ();\
 return 0;\
   } while (0)
+
+#define EXPECT(p, _v) do {   \
+  size_t v = _v; \
+  if (p == v)\
+__builtin_printf ("ok:  %s == %zd\n", #p, p);\
+  else   \
+{\
+  __builtin_printf ("WAT: %s == %zd (expected %zd)\n", #p, p, v);\
+  FAIL ();   \
+}\
+} while (0);
diff --git a/gcc/testsuite/gcc.dg/flex-array-counted-by-3.c 
b/gcc/testsuite/gcc.dg/flex-array-counted-by-3.c
new file mode 100644
index ..0066c32ca808
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/flex-array-counted-by-3.c
@@ -0,0 +1,63 @@
+/* test the attribute counted_by and its usage in
+ * __builtin_dynamic_object_size.  */ 
+/* { dg-do run } */
+/* { dg-options "-O2" } */
+
+#include "builtin-object-size-common.h"
+
+struct flex {
+  int b;
+  int c[];
+} *array_flex;
+
+struct annotated {
+  int b;
+  int c[] __attribute__ ((counted_by (b)));
+} *array_annotated;
+
+struct nested_annotated {
+  struct {
+union {
+  int b;
+  float f; 
+};
+int n;
+  };
+  int c[] __attribute__ ((counted_by (b)));
+} *array_nested_annotated;
+
+void __attribute__((__noinline__)) setup (int normal_count, int attr_count)
+{
+  array_flex
+= (struct flex *)malloc (sizeof (struct flex)
++ normal_count *  sizeof (int));
+  array_flex->b = normal_count;
+
+  array_annotated
+= (struct annotated *)malloc (sizeof (struct annotated)
+ + attr_count *  sizeof (int));
+  array_annotated->b = attr_count;
+
+  array_nested_annotated
+= (struct nested_annotated *)malloc (sizeof (struct nested_annotated)
++ attr_count *  sizeof (int));
+  array_nested_annotated->b = attr_count;
+
+  return;
+}
+
+void __attribute__((__noinline__)) test ()
+{
+EXPECT(__builtin_dynamic_object_size(array_flex->c, 1), -1);
+EXPECT(__builtin_dynamic_object_size(array_annotated->c, 1),
+  array_annotated->b * sizeof (int));
+EXPECT(__builtin_dynamic_object_size(array_nested_annotated->c, 1),
+  array_nested_annotated->b * sizeof (int));
+}
+
+int main(int argc, char *argv[])
+{
+  setup (10,10);   
+  test ();
+  DONE ();
+}
diff --git a/gcc/testsuite/gcc.dg/flex-array-counted-by-4.c 
b/gcc/testsuite/gcc.dg/flex-array-counted-by-4.c
new file mode 100644
index ..3ce7f3545549
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/flex-array-counted-by-4.c
@@ -0,0 +1,178 @@
+/* test the attribute counted_by and its usage in
+__builtin_dynamic_object_size: what's the correct behavior when the
+allocation size mismatched with the value of counted_by attribute?
+we should always use the latest value that is hold by the counted_by
+field.  */
+/* { dg-do run } */
+/* { dg-options "-O -fstrict-flex-arrays=3" } */
+
+#include "builtin-object-size-common.h"
+
+struct annotated {
+  size_t foo;
+  char others;
+  char array[] __attribute__((counted_by (foo)));
+};
+
+#define noinline __attribute__((__noinline__))
+#define SIZE_BUMP 10 
+#define MAX(a, b) ((a) > (b) ? (a) : (b))
+
+/* In general, Due to type casting, the type for the pointee of a pointer
+   does not say

[PATCH v6 2/5] Convert references with "counted_by" attributes to/from .ACCESS_WITH_SIZE.

2024-02-16 Thread Qing Zhao

Including the following changes:
* The definition of the new internal function .ACCESS_WITH_SIZE
  in internal-fn.def.
* C FE converts every reference to a FAM with a "counted_by" attribute
  to a call to the internal function .ACCESS_WITH_SIZE.
  (build_component_ref in c_typeck.cc)

  This includes the case when the object is statically allocated and
  initialized.
  In order to make this working, the routines initializer_constant_valid_p_1
  and output_constant in varasm.cc are updated to handle calls to
  .ACCESS_WITH_SIZE.
  (initializer_constant_valid_p_1 and output_constant in varasm.c)

  However, for the reference inside "offsetof", the "counted_by" attribute is
  ignored since it's not useful at all.
  (c_parser_postfix_expression in c/c-parser.cc)

  In addtion to "offsetof", for the reference inside operator "typeof" and
  "alignof", we ignore counted_by attribute too.

  When building ADDR_EXPR for the .ACCESS_WITH_SIZE in C FE,
  replace the call with its first argument.

* Convert every call to .ACCESS_WITH_SIZE to its first argument.
  (expand_ACCESS_WITH_SIZE in internal-fn.cc)
* Adjust alias analysis to exclude the new internal from clobbering anything.
  (ref_maybe_used_by_call_p_1 and call_may_clobber_ref_p_1 in tree-ssa-alias.cc)
* Adjust dead code elimination to eliminate the call to .ACCESS_WITH_SIZE when
  it's LHS is eliminated as dead code.
  (eliminate_unnecessary_stmts in tree-ssa-dce.cc)
* Provide the utility routines to check the call is .ACCESS_WITH_SIZE and
  get the reference from the call to .ACCESS_WITH_SIZE.
  (is_access_with_size_p and get_ref_from_access_with_size in tree.cc)

gcc/c/ChangeLog:

* c-parser.cc (c_parser_postfix_expression): Ignore the counted-by
attribute when build_component_ref inside offsetof operator.
* c-tree.h (build_component_ref): Add one more parameter.
* c-typeck.cc (build_counted_by_ref): New function.
(build_access_with_size_for_counted_by): New function.
(build_component_ref): Check the counted-by attribute and build
call to .ACCESS_WITH_SIZE.
(build_unary_op): When building ADDR_EXPR for
.ACCESS_WITH_SIZE, use its first argument.
(lvalue_p): Accept call to .ACCESS_WITH_SIZE.

gcc/ChangeLog:

* internal-fn.cc (expand_ACCESS_WITH_SIZE): New function.
* internal-fn.def (ACCESS_WITH_SIZE): New internal function.
* tree-ssa-alias.cc (ref_maybe_used_by_call_p_1): Special case
IFN_ACCESS_WITH_SIZE.
(call_may_clobber_ref_p_1): Special case IFN_ACCESS_WITH_SIZE.
* tree-ssa-dce.cc (eliminate_unnecessary_stmts): Eliminate the call
to .ACCESS_WITH_SIZE when its LHS is dead.
* tree.cc (process_call_operands): Adjust side effect for function
.ACCESS_WITH_SIZE.
(is_access_with_size_p): New function.
(get_ref_from_access_with_size): New function.
* tree.h (is_access_with_size_p): New prototype.
(get_ref_from_access_with_size): New prototype.
* varasm.cc (initializer_constant_valid_p_1): Handle call to
.ACCESS_WITH_SIZE.
(output_constant): Handle call to .ACCESS_WITH_SIZE.

gcc/testsuite/ChangeLog:

* gcc.dg/flex-array-counted-by-2.c: New test.
---
 gcc/c/c-parser.cc |  10 +-
 gcc/c/c-tree.h|   2 +-
 gcc/c/c-typeck.cc | 128 +-
 gcc/internal-fn.cc|  36 +
 gcc/internal-fn.def   |   4 +
 .../gcc.dg/flex-array-counted-by-2.c  | 112 +++
 gcc/tree-ssa-alias.cc |   2 +
 gcc/tree-ssa-dce.cc   |   5 +-
 gcc/tree.cc   |  25 +++-
 gcc/tree.h|   8 ++
 gcc/varasm.cc |  10 ++
 11 files changed, 332 insertions(+), 10 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/flex-array-counted-by-2.c

diff --git a/gcc/c/c-parser.cc b/gcc/c/c-parser.cc
index c31349dae2ff..a6ed5ac43bb1 100644
--- a/gcc/c/c-parser.cc
+++ b/gcc/c/c-parser.cc
@@ -10850,9 +10850,12 @@ c_parser_postfix_expression (c_parser *parser)
if (c_parser_next_token_is (parser, CPP_NAME))
  {
c_token *comp_tok = c_parser_peek_token (parser);
+   /* Ignore the counted_by attribute for reference inside
+  offsetof since the information is not useful at all.  */
offsetof_ref
  = build_component_ref (loc, offsetof_ref, comp_tok->value,
-comp_tok->location, UNKNOWN_LOCATION);
+comp_tok->location, UNKNOWN_LOCATION,
+false);
c_parser_consume_token (parser);
while (c_parser_next_token_is (parser, CPP_DOT)
   ||

[PATCH v6 0/5]New attribute "counted_by" to annotate bounds for C99 FAM(PR108896)

2024-02-16 Thread Qing Zhao

Hi,

This is the 6th version of the patch.

compare with the 5th version, the only difference is:

1. Add the 6th argument to .ACCESS_WITH_SIZE
   to carry the TYPE of the flexible array.
   Such information is needed during tree-object-size.cc.

   previously, we use the result type of the routine
   .ACCESS_WITH_SIZE to decide the element type of the
   original array, however, the result type of the routine
   might be changed during tree optimizations due to 
   possible type casting in the source code.


compare with the 4th version, the major difference are:

1. Change the return type of the routine .ACCESS_WITH_SIZE 
   FROM:
 Pointer to the type of the element of the flexible array;
   TO:
 Pointer to the type of the flexible array;
And then wrap the call with an indirection reference. 

2. Adjust all other parts with this change, (this will simplify the bound 
sanitizer instrument);

3. Add the fixes to the kernel building failures, which include:
A. The operator ???typeof??? cannot return correct type for a->array; 
B. The operator ???&??? cannot return correct address for a->array;

4. Correctly handle the case when the value of ???counted-by??? is zero or 
negative as following
   4.1. Update the counted-by doc with the following:
When the counted-by field is assigned a negative integer value, the 
compiler will treat the value as zero. 
   4.2. Adjust __bdos and array bound sanitizer to handle correctly when 
???counted-by??? is zero. 


It based on the following proposal:

https://gcc.gnu.org/pipermail/gcc-patches/2023-November/635884.html
Represent the missing dependence for the "counted_by" attribute and its 
consumers

**The summary of the proposal is:

* Add a new internal function ".ACCESS_WITH_SIZE" to carry the size information 
for every reference to a FAM field;
* In C FE, Replace every reference to a FAM field whose TYPE has the 
"counted_by" attribute with the new internal function ".ACCESS_WITH_SIZE";
* In every consumer of the size information, for example, BDOS or array bound 
sanitizer, query the size information or ACCESS_MODE information from the new 
internal function;
* When expansing to RTL, replace the internal function with the actual 
reference to the FAM field;
* Some adjustment to ipa alias analysis, and other SSA passes to mitigate the 
impact to the optimizer and code generation.


**The new internal function

  .ACCESS_WITH_SIZE (REF_TO_OBJ, REF_TO_SIZE, CLASS_OF_SIZE, TYPE_OF_SIZE, 
ACCESS_MODE, TYPE_OF_REF)

INTERNAL_FN (ACCESS_WITH_SIZE, ECF_LEAF | ECF_NOTHROW, NULL)

which returns the "REF_TO_OBJ" same as the 1st argument;

Both the return type and the type of the first argument of this function have 
been converted from the incomplete array type to the corresponding pointer type.

The call to .ACCESS_WITH_SIZE is wrapped with an INDIRECT_REF, whose type is 
the original imcomplete array type.

Please see the following link for why:
https://gcc.gnu.org/pipermail/gcc-patches/2023-November/638793.html
https://gcc.gnu.org/pipermail/gcc-patches/2023-December/639605.html

1st argument "REF_TO_OBJ": The reference to the object;
2nd argument "REF_TO_SIZE": The reference to the size of the object,
3rd argument "CLASS_OF_SIZE": The size referenced by the REF_TO_SIZE represents
   0: unknown;
   1: the number of the elements of the object type;
   2: the number of bytes;
4th argument "TYPE_OF_SIZE": A constant 0 with the TYPE of the object
  refed by REF_TO_SIZE
5th argument "ACCESS_MODE":
  -1: Unknown access semantics
   0: none
   1: read_only
   2: write_only
   3: read_write
6th argument "TYPE_OF_REF": A constant 0 with the pointer TYPE to
  the original flexible array type.

** The Patch sets included:

1. Provide counted_by attribute to flexible array member field;
  which includes:
  * "counted_by" attribute documentation;
  * C FE handling of the new attribute;
syntax checking, error reporting;
  * testing cases;

2. Convert "counted_by" attribute to/from .ACCESS_WITH_SIZE.
  which includes:
  * The definition of the new internal function .ACCESS_WITH_SIZE in 
internal-fn.def.
  * C FE converts every reference to a FAM with "counted_by" attribute to a 
call to the internal function .ACCESS_WITH_SIZE.
(build_component_ref in c_typeck.cc)
This includes the case when the object is statically allocated and 
initialized.
In order to make this working, we should update 
initializer_constant_valid_p_1 and output_constant in varasm.cc to include 
calls to .ACCESS_WITH_SIZE.

However, for the reference inside "offsetof", ignore the "counted_by" 
attribute since it's not useful at all. (c_parser_postfix_expression in 
c/c-parser.cc)
In addtion to "offsetof", for the reference inside operator "typeof" and
  "alignof", we ignore counted_by attribute too.
When building ADDR_EXPR for the .ACCESS_WITH_SIZE in C FE,
  replace the call with its first

[committed] libstdc++: Fix FAIL: 26_numerics/random/pr60037-neg.cc again [PR113961]

2024-02-16 Thread Jonathan Wakely

I had another change to  in my local tree which affected
the second dg-error and I "fixed" it unnecessarily. I've tested with a
clean tree this time.

Tested aarch64-linux. Pushed to trunk.

-- >8 --

PR libstdc++/87744
PR libstdc++/113961

libstdc++-v3/ChangeLog:

* testsuite/26_numerics/random/pr60037-neg.cc: Adjust dg-error
line number.
---
 libstdc++-v3/testsuite/26_numerics/random/pr60037-neg.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libstdc++-v3/testsuite/26_numerics/random/pr60037-neg.cc 
b/libstdc++-v3/testsuite/26_numerics/random/pr60037-neg.cc
index 9d6925fb416..3c5aa7feefc 100644
--- a/libstdc++-v3/testsuite/26_numerics/random/pr60037-neg.cc
+++ b/libstdc++-v3/testsuite/26_numerics/random/pr60037-neg.cc
@@ -12,4 +12,4 @@ auto x = std::generate_canonical

Re: [COMMITTED] c++: Add testcase for this PR [PR97990]

2024-02-16 Thread Marek Polacek

On Fri, Feb 16, 2024 at 11:00:34AM -0800, Andrew Pinski wrote:
> This testcase was fixed by r14-5934-gf26d68d5d128c8 but we should add
> one to make sure it does not regress again.
> 
> Committed as obvious after a quick test on the testcase.
> 
>   PR c++/97990
> 
> gcc/testsuite/ChangeLog:
> 
>   * g++.dg/torture/vector-struct-1.C: New test.
> 
> Signed-off-by: Andrew Pinski 
> ---
>  gcc/testsuite/g++.dg/torture/vector-struct-1.C | 18 ++
>  1 file changed, 18 insertions(+)
>  create mode 100644 gcc/testsuite/g++.dg/torture/vector-struct-1.C
> 
> diff --git a/gcc/testsuite/g++.dg/torture/vector-struct-1.C 
> b/gcc/testsuite/g++.dg/torture/vector-struct-1.C
> new file mode 100644
> index 000..e2747417e2d
> --- /dev/null
> +++ b/gcc/testsuite/g++.dg/torture/vector-struct-1.C
> @@ -0,0 +1,18 @@
> +/* PR c++/97990 */
> +/* This used to crash with lto and strict aliasing enabled as the
> +   vector type variant still had TYPE_ALIAS_SET set on it. */

You don't have -Wstrict-aliasing here without which the test didn't ICE.

> +typedef __attribute__((__vector_size__(sizeof(short short TSimd;
> +TSimd hh(int);
> +struct y6
> +{
> +  TSimd VALUE;
> +  ~y6();
> +};
> +template 
> +auto f2(T1 p1, T2){
> +  return hh(p1) <= 0;
> +}
> +void f1(){
> +  f2(0, y6{});
> +}
> -- 
> 2.43.0
> 

Marek

[COMMITTED] c++: Add testcase for this PR [PR97990]

2024-02-16 Thread Andrew Pinski

This testcase was fixed by r14-5934-gf26d68d5d128c8 but we should add
one to make sure it does not regress again.

Committed as obvious after a quick test on the testcase.

PR c++/97990

gcc/testsuite/ChangeLog:

* g++.dg/torture/vector-struct-1.C: New test.

Signed-off-by: Andrew Pinski 
---
 gcc/testsuite/g++.dg/torture/vector-struct-1.C | 18 ++
 1 file changed, 18 insertions(+)
 create mode 100644 gcc/testsuite/g++.dg/torture/vector-struct-1.C

diff --git a/gcc/testsuite/g++.dg/torture/vector-struct-1.C 
b/gcc/testsuite/g++.dg/torture/vector-struct-1.C
new file mode 100644
index 000..e2747417e2d
--- /dev/null
+++ b/gcc/testsuite/g++.dg/torture/vector-struct-1.C
@@ -0,0 +1,18 @@
+/* PR c++/97990 */
+/* This used to crash with lto and strict aliasing enabled as the
+   vector type variant still had TYPE_ALIAS_SET set on it. */
+
+typedef __attribute__((__vector_size__(sizeof(short short TSimd;
+TSimd hh(int);
+struct y6
+{
+  TSimd VALUE;
+  ~y6();
+};
+template 
+auto f2(T1 p1, T2){
+  return hh(p1) <= 0;
+}
+void f1(){
+  f2(0, y6{});
+}
-- 
2.43.0

Re: [PATCH] testsuite: Fix up lra effective target

2024-02-16 Thread Mike Stump

On Feb 16, 2024, at 2:16 AM, Jakub Jelinek  wrote:
> 
> There is one special case, NVPTX, which is a TARGET_NO_REGISTER_ALLOCATION
> target.  I think claiming for it that it is a lra target is strange (even
> though it effectively returns true for targetm.lra_p ()), unsure if it
> supports asm goto with outputs or not, if it does and we want to test it,
> perhaps we should introduce asm_goto_outputs effective target and use
> lra || nvptx-*-* for that?

Since the port people have to maintain that code in general, I usually leave it 
to them to try and select a cheap, maintainable way to manage it.

If people want to pave the way, I'd tend to defer to them, having thought about 
more than I.

Re: [PATCH] testsuite: Fix up lra effective target

2024-02-16 Thread Mike Stump

On Feb 16, 2024, at 2:16 AM, Jakub Jelinek  wrote:
> 
> Given the recent discussions on IRC started with Andrew P. mentioning that
> an asm goto outputs test should have { target lra } and the lra effective
> target in GCC 11/12 only returning 0 for PA and in 13/14 for PA/AVR, while
> we clearly have 14 other targets which don't support LRA and a couple of
> further ones which have an -mlra/-mno-lra switch (whatever default they
> have), seems to me the effective target is quite broken.
> 
> Ok for trunk?

Ok.

Re: [Committed] testsuite: Add support for scanning assembly with comparitor

2024-02-16 Thread Edwin Lu


Thanks! Committed

Edwin

On 2/15/2024 9:27 AM, Mike Stump wrote:

On Feb 12, 2024, at 11:38 AM, Edwin Lu  wrote:

There is currently no support for matching at least x lines of assembly
(only scan-assembler-times). This patch would allow setting upper or lower
bounds.

Use case: using different scheduler descriptions and/or cost models will change
assembler output. Testing common functionality across tunes would require a
separate testcase per tune since each assembly output would be different. If we
know a base number of lines should appear across all tunes (i.e. testing return
values: we expect at minimum n stores into register x), we can lower-bound the
test to search for scan-assembler-bound {RE for storing into register x} >= n.
This avoids artificially inflating the scan-assembler-times expected count due
to the assembler choosing to perform extra stores into register x (using it as
a temporary register).

The testcase would be more robust to cpu/tune changes at the cost of not being
as granular towards specific cpu tuning.

I didn't see an Ok?  Just in case you forgot, yes, this is ok.

Re: [PATCH] testsuite: Disable slow and unneeded test variants

2024-02-16 Thread Jakub Jelinek

On Fri, Feb 16, 2024 at 07:52:17PM +0200, Dimitar Dimitrov wrote:
> The issue in PR112344 is triggered only at -O2, so there is little value
> in running the test at lower optimization levels.  At the same time the

That is generally not true.
We had hundreds of cases in the history where a test written for one bug
let us discover a different bug later on, often at different optimization
level etc.

If the test is too slow, perhaps the dg-skip-if should take the
run_expensive_tests effective target into account, like:
/* { dg-skip-if "" { ! run_expensive_tests }  { "*" } { "-O0" "-O1" } } */

But guess another question is if the bug can be reproduced with fewer
iterations...

Jakub

[PATCH] testsuite: Disable slow and unneeded test variants

2024-02-16 Thread Dimitar Dimitrov

The issue in PR112344 is triggered only at -O2, so there is little value
in running the test at lower optimization levels.  At the same time the
generated code at low and code-size optimization levels is taking a long
time to execute because it loops a few billion iterations.

On the PRU simulator target the non-optimized test variants take more
than 10 minutes, thus failing due to timeout.  Even a native x86_64
takes a few seconds to run the non-optimized variants.

Let's not waste cycles and run only the test configurations which
triggered the issue described in the PR.

On native x86_64 Linux:
$ time make check-gcc-c -j10 RUNTESTFLAGS="dg-torture.exp=pr112344.c"
--
TimePreviously  With this patch
--
real0m4,786s0m1,694s
user0m7,031s0m4,013s
sys 0m3,300s0m3,234s

With PRU simulator:
$ time make -j10 check-gcc-c RUNTESTFLAGS="--target_board=pru-sim 
dg-torture.exp=pr112344.c"
--
TimePreviously   With this patch
--
real11m32,740s   0m1,897s
user11m34,301s   0m4,012s
sys 0m2,178s 0m2,133s


Ok for trunk?

PR middle-end/112344

gcc/testsuite/ChangeLog:

* gcc.dg/torture/pr112344.c: Run only
for expensive speed optimizations.

Signed-off-by: Dimitar Dimitrov 
---
 gcc/testsuite/gcc.dg/torture/pr112344.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/gcc/testsuite/gcc.dg/torture/pr112344.c 
b/gcc/testsuite/gcc.dg/torture/pr112344.c
index c52d2c8304b..abcef51428f 100644
--- a/gcc/testsuite/gcc.dg/torture/pr112344.c
+++ b/gcc/testsuite/gcc.dg/torture/pr112344.c
@@ -1,6 +1,8 @@
 /* { dg-do run } */
 /* { dg-require-effective-target int32plus } */
 
+/* { dg-skip-if "triggered by expensive speed optimizations" { *-*-* } { "-O0" 
"-O1" "-Os" "-Oz" } { "" } } */
+
 int
 main ()
 {
-- 
2.43.0

[pushed] c++: add fixed testcase [PR111682]

2024-02-16 Thread Patrick Palka

Fixed by the PR113612 fix r14-8960-g19ac327de421fe.

PR c++/111682

gcc/testsuite/ChangeLog:

* g++.dg/cpp1y/var-templ86.C: New test.
---
 gcc/testsuite/g++.dg/cpp1y/var-templ86.C | 23 +++
 1 file changed, 23 insertions(+)
 create mode 100644 gcc/testsuite/g++.dg/cpp1y/var-templ86.C

diff --git a/gcc/testsuite/g++.dg/cpp1y/var-templ86.C 
b/gcc/testsuite/g++.dg/cpp1y/var-templ86.C
new file mode 100644
index 000..0c933cac26c
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp1y/var-templ86.C
@@ -0,0 +1,23 @@
+// PR c++/111682
+// { dg-do compile { target c++14 } }
+
+template struct A {
+  template struct B;
+  template struct B;
+};
+template template struct A::B {};
+template struct A;
+A::B b;
+
+
+template struct B {
+  template static const int var1;
+  template static const int var1;
+
+  template static const int var2;
+};
+template template const int B::var1 = 1;
+template template const int B::var2 = 1;
+template struct B;
+int b_test1[B::var1];
+int b_test2[B::var2];
-- 
2.44.0.rc1.15.g4fc51f00ef

Re: [PATCH v5 0/4] New attribute "counted_by" to annotate bounds for C99 FAM(PR108896)

2024-02-16 Thread Qing Zhao

An update to the 5th version of the patches:

Kees helped me to do more testings, and found one issue:

===
   We cannot use the result type or the type of the 1st argument
of the routine .ACCESS_WITH_SIZE to decide the element type
of the original array due to possible type casting in the
source code.

The element type of the original array is needed during tree-object-size.cc 
.
===

In order to resolve this issue, I added the 6th argument to the routine 
.ACCESS_WITH_SIZE
to carry the original type of the array:

-   (*.ACCESS_WITH_SIZE (REF, COUNTED_BY_REF, 1, (TYPE_OF_SIZE)0, -1))
+   (*.ACCESS_WITH_SIZE (REF, COUNTED_BY_REF, 1, (TYPE_OF_SIZE)0, -1,
+   (TYPE_OF_ARRAY *)0))

+   The 6th argument of the call is a constant 0 with the pointer TYPE
+   to the original flexible array type.
+

With this fix. The kernel (with counted-by annotation) has been built 
successfully and the gcc with counted-by
Support found one kernel bug!!.

Other testings were all good.

I will send the 6th version of the patch soon.  (The only change of the 6th 
version compared to the 5th version
Is the above fix).

Thanks.

Qing

> On Feb 9, 2024, at 10:54 AM, Qing Zhao  wrote:
> 
> Hi,
> 
> This is the 5th version of the patch.
> 
> compare with the 4th version, the major difference are:
> 
> 1. Change the return type of the routine .ACCESS_WITH_SIZE 
>   FROM:
> Pointer to the type of the element of the flexible array;
>   TO:
> Pointer to the type of the flexible array;
>And then wrap the call with an indirection reference. 
> 
> 2. Adjust all other parts with this change, (this will simplify the bound 
> sanitizer instrument);
> 
> 3. Add the fixes to the kernel building failures, which include:
>A. The operator “typeof” cannot return correct type for a->array; 
>B. The operator “&” cannot return correct address for a->array;
> 
> 4. Correctly handle the case when the value of “counted-by” is zero or 
> negative as following
>   4.1. Update the counted-by doc with the following:
>When the counted-by field is assigned a negative integer value, the 
> compiler will treat the value as zero. 
>   4.2. Adjust __bdos and array bound sanitizer to handle correctly when 
> “counted-by” is zero. 
> 
> 
> It based on the following proposal:
> 
> https://gcc.gnu.org/pipermail/gcc-patches/2023-November/635884.html
> Represent the missing dependence for the "counted_by" attribute and its 
> consumers
> 
> **The summary of the proposal is:
> 
> * Add a new internal function ".ACCESS_WITH_SIZE" to carry the size 
> information for every reference to a FAM field;
> * In C FE, Replace every reference to a FAM field whose TYPE has the 
> "counted_by" attribute with the new internal function ".ACCESS_WITH_SIZE";
> * In every consumer of the size information, for example, BDOS or array bound 
> sanitizer, query the size information or ACCESS_MODE information from the new 
> internal function;
> * When expansing to RTL, replace the internal function with the actual 
> reference to the FAM field;
> * Some adjustment to ipa alias analysis, and other SSA passes to mitigate the 
> impact to the optimizer and code generation.
> 
> 
> **The new internal function
> 
>  .ACCESS_WITH_SIZE (REF_TO_OBJ, REF_TO_SIZE, CLASS_OF_SIZE, TYPE_OF_SIZE, 
> ACCESS_MODE)
> 
> INTERNAL_FN (ACCESS_WITH_SIZE, ECF_LEAF | ECF_NOTHROW, NULL)
> 
> which returns the "REF_TO_OBJ" same as the 1st argument;
> 
> Both the return type and the type of the first argument of this function have 
> been converted from the incomplete array type to the corresponding pointer 
> type.
> 
> The call to .ACCESS_WITH_SIZE is wrapped with an INDIRECT_REF, whose type is 
> the original imcomplete array type.
> 
> Please see the following link for why:
> https://gcc.gnu.org/pipermail/gcc-patches/2023-November/638793.html
> https://gcc.gnu.org/pipermail/gcc-patches/2023-December/639605.html
> 
> 1st argument "REF_TO_OBJ": The reference to the object;
> 2nd argument "REF_TO_SIZE": The reference to the size of the object,
> 3rd argument "CLASS_OF_SIZE": The size referenced by the REF_TO_SIZE 
> represents
>   0: unknown;
>   1: the number of the elements of the object type;
>   2: the number of bytes;
> 4th argument TYPE_OF_SIZE: A constant 0 with the TYPE of the object
>  refed by REF_TO_SIZE
> 5th argument "ACCESS_MODE":
>  -1: Unknown access semantics
>   0: none
>   1: read_only
>   2: write_only
>   3: read_write
> 
> ** The Patch sets included:
> 
> 1. Provide counted_by attribute to flexible array member field;
>  which includes:
>  * "counted_by" attribute documentation;
>  * C FE handling of the new attribute;
>syntax checking, error reporting;
>  * testing cases;
> 
> 2. Convert "counted_by" attribute to/from .ACCESS_WITH_SIZE.
>  which includes:
>  * The definition of the new internal function .ACCESS_WITH_SIZE in 
> internal-fn.def.
>  * C

Re: [PATCH v2] c++: implicit move with throw [PR113853]

2024-02-16 Thread Jason Merrill


On 2/16/24 10:58, Marek Polacek wrote:

On Thu, Feb 15, 2024 at 04:36:40PM -0500, Jason Merrill wrote:

On 2/15/24 10:19, Marek Polacek wrote:

Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?

-- >8 --
Here we have

template
auto is_throwable(T t) -> decltype(throw t, true) { ... }

where we didn't properly mark 't' as IMPLICIT_RVALUE_P, which caused
the wrong overload to have been chosen.  Jason figured out it's because
we don't correctly implement [expr.prim.id.unqual]#4.2, which post-P2266
says that an id-expression is move-eligible if

"the id-expression (possibly parenthesized) is the operand of
a throw-expression, and names an implicitly movable entity that belongs
to a scope that does not contain the compound-statement of the innermost
lambda-expression, try-block, or function-try-block (if any) whose
compound-statement or ctor-initializer contains the throw-expression."

I worked out that it's trying to say that given

struct X {
  X();
  X(const X&);
  X(X&&) = delete;
};

the following should fail: the scope of the throw is an sk_try, and it's
also x's scope S, and S "does not contain the compound-statement of the
*try-block" so x is move-eligible, so we move, so we fail.

void f ()
try {
  X x;
  throw x;  // use of deleted function
} catch (...) {
}

Whereas here:

void g (X x)
try {
  throw x;
} catch (...) {
}

the throw is again in an sk_try, but x's scope is an sk_function_parms
which *does* contain the {} of the *try-block, so x is not move-eligible,
so we don't move, so we use X(const X&), and the code is fine.

The current code also doesn't seem to handle

void h (X x) {
  void z (decltype(throw x, true));
}

where there's no enclosing lambda or sk_try so we should move.

I'm not doing anything about lambdas because we shouldn't reach the
code at the end of the function: the DECL_HAS_VALUE_EXPR_P check
shouldn't let us go further.

PR c++/113789
PR c++/113853

gcc/cp/ChangeLog:

* typeck.cc (treat_lvalue_as_rvalue_p): Update code to better
reflect [expr.prim.id.unqual]#4.2.

gcc/testsuite/ChangeLog:

* g++.dg/cpp0x/sfinae69.C: Remove dg-bogus.
* g++.dg/cpp0x/sfinae70.C: New test.
* g++.dg/cpp0x/sfinae71.C: New test.
* g++.dg/cpp0x/sfinae72.C: New test.
* g++.dg/cpp2a/implicit-move4.C: New test.
---
   gcc/cp/typeck.cc| 32 +--
   gcc/testsuite/g++.dg/cpp0x/sfinae69.C   |  2 +-
   gcc/testsuite/g++.dg/cpp0x/sfinae70.C   | 16 ++
   gcc/testsuite/g++.dg/cpp0x/sfinae71.C   | 17 ++
   gcc/testsuite/g++.dg/cpp0x/sfinae72.C   | 17 ++
   gcc/testsuite/g++.dg/cpp2a/implicit-move4.C | 59 +
   6 files changed, 126 insertions(+), 17 deletions(-)
   create mode 100644 gcc/testsuite/g++.dg/cpp0x/sfinae70.C
   create mode 100644 gcc/testsuite/g++.dg/cpp0x/sfinae71.C
   create mode 100644 gcc/testsuite/g++.dg/cpp0x/sfinae72.C
   create mode 100644 gcc/testsuite/g++.dg/cpp2a/implicit-move4.C

diff --git a/gcc/cp/typeck.cc b/gcc/cp/typeck.cc
index 132c55cfc6d..0dc44cd87ca 100644
--- a/gcc/cp/typeck.cc
+++ b/gcc/cp/typeck.cc
@@ -10837,37 +10837,37 @@ treat_lvalue_as_rvalue_p (tree expr, bool return_p)
parenthesized) id-expression that names an implicitly movable entity
declared in the body or parameter-declaration-clause of the innermost
enclosing function or lambda-expression, */
-  if (DECL_CONTEXT (retval) != current_function_decl)
-return NULL_TREE;
 if (return_p)
   {
+  if (DECL_CONTEXT (retval) != current_function_decl)
+   return NULL_TREE;
 expr = move (expr);
 if (expr == error_mark_node)
return NULL_TREE;
 return set_implicit_rvalue_p (expr);
   }
-  /* if the operand of a throw-expression is a (possibly parenthesized)
- id-expression that names an implicitly movable entity whose scope does not
- extend beyond the compound-statement of the innermost try-block or
- function-try-block (if any) whose compound-statement or ctor-initializer
- encloses the throw-expression, */
+  /* if the id-expression (possibly parenthesized) is the operand of
+ a throw-expression, and names an implicitly movable entity that belongs
+ to a scope that does not contain the compound-statement of the innermost
+ lambda-expression, try-block, or function-try-block (if any) whose
+ compound-statement or ctor-initializer contains the throw-expression.  */


Please also mention here why you aren't checking for lambdas.


Done.


 /* C++20 added move on throw of parms.  */
 if (TREE_CODE (retval) == PARM_DECL && cxx_dialect < cxx20)
   return NULL_TREE;
 for (cp_binding_level *b = current_binding_level;
-   ; b = b->level_chain)
-{
-  for (tree decl = b->names; decl; decl = TREE_CHAIN (decl))
-   if (decl == retval)
- return

Re: [PATCH] Turn on LRA on all targets

2024-02-16 Thread Maciej W. Rozycki

On Fri, 16 Feb 2024, Jakub Jelinek wrote:

> >  There is no function prologue to optimise in the VAX case, because all 
> > the frame setup has already been made by the CALLS instruction itself in 
> > the caller.  The first machine instruction of the callee is technically 
> > already past the "prologue".  And then RET serves as the whole function 
> > "epilogue".
> 
> So, what is the problem with DWARF unwinding?  Just make sure to emit
> appropriate instructions describing the saving of the corresponding
> registers at specific points based on CFA at the start of the function
> (so that it appears in CIE instructions) and that should be all that is
> needed, no?

 I may not remember all the issues correctly offhand as it's been a while 
since I looked into it, but as I recall DWARF handling code has not been 
prepared for all the frame to have been already allocated and initialised 
at a function's entry point, and also at least DWARF-4 is IIRC required to 
have statics at offsets positive from FP (for a stack growing downwards).

 There was an issue about restoring the caller's value of SP too, which is 
callee's AP+4*(*AP)+4 (AP being the argument pointer, which is a hardreg, 
pointing at #arguments), or alternatively it can be calculated as the sum 
of FP, the fixed frame size (20), the variable frame size (4*#statics, 
recorded as a register mask in a 12-bit field at *(FP+6)), the #arguments 
slot (4), and the argument space size (4*#arguments).  Obviously the 
former calculation is easier to make (#arguments has to be zero-extended 
from bits 7:0).

 FAOD the #arguments slot is filled by the CALLS instruction at call time 
and can vary between callers obviously for varargs, but also cases such as 
open(2), so to reconstruct the caller's SP you always need to examine the 
actual stack frame, and it does not appear to be supported by libgcc DWARF 
EH code as it stands.

 I've left out stack realignment stuff or CALLG instruction support from 
the consideration above, as we don't handle these features in our psABI, 
all of which have provisions for from hardware in the stack frame.

 As I say I may have missed or confused something, and anyway I'll take 
another stab at it, hopefully soon.

  Maciej

[PATCH v2] c++: implicit move with throw [PR113853]

2024-02-16 Thread Marek Polacek

On Thu, Feb 15, 2024 at 04:36:40PM -0500, Jason Merrill wrote:
> On 2/15/24 10:19, Marek Polacek wrote:
> > Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?
> > 
> > -- >8 --
> > Here we have
> > 
> >template
> >auto is_throwable(T t) -> decltype(throw t, true) { ... }
> > 
> > where we didn't properly mark 't' as IMPLICIT_RVALUE_P, which caused
> > the wrong overload to have been chosen.  Jason figured out it's because
> > we don't correctly implement [expr.prim.id.unqual]#4.2, which post-P2266
> > says that an id-expression is move-eligible if
> > 
> > "the id-expression (possibly parenthesized) is the operand of
> > a throw-expression, and names an implicitly movable entity that belongs
> > to a scope that does not contain the compound-statement of the innermost
> > lambda-expression, try-block, or function-try-block (if any) whose
> > compound-statement or ctor-initializer contains the throw-expression."
> > 
> > I worked out that it's trying to say that given
> > 
> >struct X {
> >  X();
> >  X(const X&);
> >  X(X&&) = delete;
> >};
> > 
> > the following should fail: the scope of the throw is an sk_try, and it's
> > also x's scope S, and S "does not contain the compound-statement of the
> > *try-block" so x is move-eligible, so we move, so we fail.
> > 
> >void f ()
> >try {
> >  X x;
> >  throw x;  // use of deleted function
> >} catch (...) {
> >}
> > 
> > Whereas here:
> > 
> >void g (X x)
> >try {
> >  throw x;
> >} catch (...) {
> >}
> > 
> > the throw is again in an sk_try, but x's scope is an sk_function_parms
> > which *does* contain the {} of the *try-block, so x is not move-eligible,
> > so we don't move, so we use X(const X&), and the code is fine.
> > 
> > The current code also doesn't seem to handle
> > 
> >void h (X x) {
> >  void z (decltype(throw x, true));
> >}
> > 
> > where there's no enclosing lambda or sk_try so we should move.
> > 
> > I'm not doing anything about lambdas because we shouldn't reach the
> > code at the end of the function: the DECL_HAS_VALUE_EXPR_P check
> > shouldn't let us go further.
> > 
> > PR c++/113789
> > PR c++/113853
> > 
> > gcc/cp/ChangeLog:
> > 
> > * typeck.cc (treat_lvalue_as_rvalue_p): Update code to better
> > reflect [expr.prim.id.unqual]#4.2.
> > 
> > gcc/testsuite/ChangeLog:
> > 
> > * g++.dg/cpp0x/sfinae69.C: Remove dg-bogus.
> > * g++.dg/cpp0x/sfinae70.C: New test.
> > * g++.dg/cpp0x/sfinae71.C: New test.
> > * g++.dg/cpp0x/sfinae72.C: New test.
> > * g++.dg/cpp2a/implicit-move4.C: New test.
> > ---
> >   gcc/cp/typeck.cc| 32 +--
> >   gcc/testsuite/g++.dg/cpp0x/sfinae69.C   |  2 +-
> >   gcc/testsuite/g++.dg/cpp0x/sfinae70.C   | 16 ++
> >   gcc/testsuite/g++.dg/cpp0x/sfinae71.C   | 17 ++
> >   gcc/testsuite/g++.dg/cpp0x/sfinae72.C   | 17 ++
> >   gcc/testsuite/g++.dg/cpp2a/implicit-move4.C | 59 +
> >   6 files changed, 126 insertions(+), 17 deletions(-)
> >   create mode 100644 gcc/testsuite/g++.dg/cpp0x/sfinae70.C
> >   create mode 100644 gcc/testsuite/g++.dg/cpp0x/sfinae71.C
> >   create mode 100644 gcc/testsuite/g++.dg/cpp0x/sfinae72.C
> >   create mode 100644 gcc/testsuite/g++.dg/cpp2a/implicit-move4.C
> > 
> > diff --git a/gcc/cp/typeck.cc b/gcc/cp/typeck.cc
> > index 132c55cfc6d..0dc44cd87ca 100644
> > --- a/gcc/cp/typeck.cc
> > +++ b/gcc/cp/typeck.cc
> > @@ -10837,37 +10837,37 @@ treat_lvalue_as_rvalue_p (tree expr, bool 
> > return_p)
> >parenthesized) id-expression that names an implicitly movable entity
> >declared in the body or parameter-declaration-clause of the innermost
> >enclosing function or lambda-expression, */
> > -  if (DECL_CONTEXT (retval) != current_function_decl)
> > -return NULL_TREE;
> > if (return_p)
> >   {
> > +  if (DECL_CONTEXT (retval) != current_function_decl)
> > +   return NULL_TREE;
> > expr = move (expr);
> > if (expr == error_mark_node)
> > return NULL_TREE;
> > return set_implicit_rvalue_p (expr);
> >   }
> > -  /* if the operand of a throw-expression is a (possibly parenthesized)
> > - id-expression that names an implicitly movable entity whose scope 
> > does not
> > - extend beyond the compound-statement of the innermost try-block or
> > - function-try-block (if any) whose compound-statement or 
> > ctor-initializer
> > - encloses the throw-expression, */
> > +  /* if the id-expression (possibly parenthesized) is the operand of
> > + a throw-expression, and names an implicitly movable entity that 
> > belongs
> > + to a scope that does not contain the compound-statement of the 
> > innermost
> > + lambda-expression, try-block, or function-try-block (if any) whose
> > + compound-statement or ctor-initializer contains the throw-expression. 
> >  */
> 
> Please also mention here why

Re: [RFA/RFC] C++/OpenMP: Supporting (first)private for member variables [PR110347] - or VALUE_EXPR and gimplify

2024-02-16 Thread Jakub Jelinek

Hi!

Ah, and the reason why it doesn't work on target is that it has the
everything is mapped assumption:
  if ((ctx->region_type & ORT_TARGET) != 0)
{
  if (ctx->region_type & ORT_ACC)
/* For OpenACC, as remarked above, defer expansion.  */
shared = false;
  else
shared = true;
 
  ret = lang_hooks.decls.omp_disregard_value_expr (decl, shared);

Perhaps shared = true; should be shared = (flags & GOVD_MAPPED) != 0;
now that we have private/firstprivate clauses on target?

Jakub

Re: [PATCH] dwarf2out: Emit DW_AT_export_symbols on anon unions/structs [PR113918]

2024-02-16 Thread Jakub Jelinek

On Fri, Feb 16, 2024 at 10:48:28AM -0500, Jason Merrill wrote:
> On 2/16/24 04:14, Jakub Jelinek wrote:
> > DWARF5 added DW_AT_export_symbols both for use on inline namespaces (where
> > we emit it), but also on anonymous unions/structs (and we didn't emit that
> > attribute there).
> > The following patch fixes it.
> 
> Should this involve cp_decl_dwarf_attribute like the namespace handling?

I wrote it in dwarf2out.cc because the same thing needs to be done for C and
C++ (admittedly dunno if other languages have something similar).

Sure, it could be done in cp_decl_dwarf_attribute too but then it has to be
done in c_decl_dwarf_attribute which doesn't exist.  Though, it is slightly
complicated by DW_AT_export_symbols not actually going on the DW_TAG_member
die but on the DW_TAG_{structure,class,union}_type which DW_TAG_member uses
as its DW_AT_type.  But in order to ask the langhook, we likely need to pass
the FIELD_DECL and not the type...

Jakub

Re: [PATCH] libiberty: Fix error return value in pex_unix_exec_child [PR113957].

2024-02-16 Thread Jeff Law





On 2/16/24 07:56, Iain Sandoe wrote:

tested on x86_64-darwin, so far. OK for trunk if regression test is
successful on Linux too?
thanks
Iain

--- 8< ---

r14-5310-g879cf9ff45d940 introduced some new handling for spawning sub
processes.  The return value from the generic exec_child is examined
and needs to be < 0 to signal an error. However, the unix flavour of
this routine is returning the PID value set from the posix_spawn{p}.

This latter value is undefined per the manual pages for both Darwin
and Linux, and it seems Darwin, at least, sets the value to some
usually positive number (presumably the PID that would have been used
if the fork had succeeded).

The fix proposed here is to set the pid = -1 in the relevant error
paths.

PR other/113957

libiberty/ChangeLog:

* pex-unix.c (pex_unix_exec_child): Set pid = -1 in the error
paths, since that is used to signal an erroneous outcome for
the routine.
I wonder if that's what's been causing the native emulated PA builds to 
fail.  It's definitely related to that code.  Never saw it on any of the 
other native emulated targets through.


OK
jeff

Re: [PATCH] dwarf2out: Emit DW_AT_export_symbols on anon unions/structs [PR113918]

2024-02-16 Thread Jason Merrill


On 2/16/24 04:14, Jakub Jelinek wrote:

Hi!

DWARF5 added DW_AT_export_symbols both for use on inline namespaces (where
we emit it), but also on anonymous unions/structs (and we didn't emit that
attribute there).
The following patch fixes it.


Should this involve cp_decl_dwarf_attribute like the namespace handling?


Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2024-02-16  Jakub Jelinek  

PR debug/113918
* dwarf2out.cc (gen_field_die): Emit DW_AT_export_symbols
on anonymous unions or structs for -gdwarf-5 or -gno-strict-dwarf.

* c-c++-common/dwarf2/pr113918.c: New test.

--- gcc/dwarf2out.cc.jj 2024-02-10 11:25:09.836476263 +0100
+++ gcc/dwarf2out.cc2024-02-15 13:38:14.485675460 +0100
@@ -25153,6 +25153,20 @@ gen_field_die (tree decl, struct vlr_con
  
add_accessibility_attribute (decl_die, decl);
  
+  /* Add DW_AT_export_symbols to anonymous unions or structs.  */

+  if ((dwarf_version >= 5 || !dwarf_strict) && DECL_NAME (decl) == NULL_TREE)
+if (tree type = member_declared_type (decl))
+  {
+   tree type_id = TYPE_IDENTIFIER (TYPE_MAIN_VARIANT (type));
+   if (RECORD_OR_UNION_TYPE_P (type)
+   && (type_id == NULL_TREE || IDENTIFIER_ANON_P (type_id)))
+   {
+ dw_die_ref type_die = lookup_type_die (TYPE_MAIN_VARIANT (type));
+ if (type_die && get_AT (type_die, DW_AT_export_symbols) == NULL)
+   add_AT_flag (type_die, DW_AT_export_symbols, 1);
+   }
+}
+
/* Equate decl number to die, so that we can look up this decl later on.  */
equate_decl_number_to_die (decl, decl_die);
  }
--- gcc/testsuite/c-c++-common/dwarf2/pr113918.c.jj 2024-02-15 
13:26:53.935984554 +0100
+++ gcc/testsuite/c-c++-common/dwarf2/pr113918.c2024-02-15 
13:26:48.773055181 +0100
@@ -0,0 +1,33 @@
+/* PR debug/113918 */
+/* { dg-do compile } */
+/* { dg-options "-gdwarf-5 -dA -fno-merge-debug-strings" } */
+
+struct S {
+  union {
+int i;
+long long j;
+  };
+  struct {
+int k;
+long long l;
+  };
+  union {
+int m;
+long long n;
+  } u;
+  struct {
+int o;
+long long p;
+  } v;
+} s;
+
+int
+main ()
+{
+  s.i = 1;
+  s.k = 2;
+  s.u.m = 3;
+  s.v.o = 4;
+}
+
+/* { dg-final { scan-assembler-times "DW_AT_export_symbols" 4 } } */

Jakub

Re: [RFA/RFC] C++/OpenMP: Supporting (first)private for member variables [PR110347] - or VALUE_EXPR and gimplify

2024-02-16 Thread Jakub Jelinek

On Fri, Feb 16, 2024 at 04:15:05PM +0100, Tobias Burnus wrote:
> I have no idea whether that would - nor whether that would be
> the way forward. - Thoughts?

Don't have time to dive through this now in detail, just want to point out
why we ignore DECL_VALUE_EXPR on the magic var during gimplification for
the parallel case - gimplify_var_or_parm_decl has
  /* When within an OMP context, notice uses of variables.  */
  if (gimplify_omp_ctxp && omp_notice_variable (gimplify_omp_ctxp, decl, true))
return GS_ALL_DONE;

  /* If the decl is an alias for another expression, substitute it now.  */
  if (DECL_HAS_VALUE_EXPR_P (decl))
{
  *expr_p = unshare_expr (DECL_VALUE_EXPR (decl));
  return GS_OK;
}

  return GS_ALL_DONE;
and the trick is to make sure omp_notice_variable returns true if it is
undesirable to expand the DECL_VALUE_EXPR of decl at that point.
And whether omp_notice_variable returns true or false depends on
lang_hooks.decls.omp_disregard_value_expr langhook.
And that one has
  if (VAR_P (decl)
  && DECL_HAS_VALUE_EXPR_P (decl)
  && DECL_ARTIFICIAL (decl)
  && DECL_LANG_SPECIFIC (decl)
  && DECL_OMP_PRIVATIZED_MEMBER (decl))
return true;
to deal with this.

Jakub

[RFA/RFC] C++/OpenMP: Supporting (first)private for member variables [PR110347] - or VALUE_EXPR and gimplify

2024-02-16 Thread Tobias Burnus


The following works with PARALLEL but not with TARGET.

OpenMP states the following is supposed to work:

   A = 5;  // == this->A
   B = 6;  // == this->B
   C[44] = 7; // == this->C; assume 'int C[100]'

   #pragma  firstprivate(A,C) private(B)
   {
 A += 5;  // Now: A is 10.
 B = 7;
 C[44] += 7; // Now C is 14
 // It is unspecified what value this->{A,B,C} has
   }
   // {A,B,C[44]} == this->{A,B,C[44]} are still {5,6,7}

* * *

In the C++ FE, that's handled by creating a temporary variable:
  v = create_temporary_var (TREE_TYPE (m));
with
  SET_DECL_VALUE_EXPR (v, m);
  DECL_OMP_PRIVATIZED_MEMBER(v)
where 'm' is, e.g., 'this->A' - and a bunch of
  'if (DECL_OMP_PRIVATIZED_MEMBER(decl))'
in the g++ FE, only.

For PARALLEL, the VALUE_EXPR survives until omp-low.cc, which handles 
this for (first)privatizing.


But for TARGET, in gimplify.cc, after the following call in 
gimplify_omp_workshare


16813  gimple *g = gimplify_and_return_first (OMP_BODY (expr), );

the 'A' in the body will be turned into 'this->A'.

* * *

Thus, while there is after omplower the expected

  #pragma omp target ... firstprivate(A)

and also

   D.3081 = .omp_data_i->A; A= ...;

what actually gets used is

   D.3084 = .omp_data_i->D.3046;
   this = D.3084;
   D.2996 = this->A;

which unsurprisingly breaks.

* * *

This can be "fixed" by using the following patch.

With that patch, the -fdump-tree-omplower looks fine. But it does then 
fail with:


 during RTL pass: expand
 g2.cpp:11:7: internal compiler error: in make_decl_rtl, at varasm.cc:1443

for the 'A' with 'B = A' (where B is a non-member var) and 'A' is still 
as the value expr 'this->A'.


--- a/gcc/gimplify.cc
+++ b/gcc/gimplify.cc
@@ -3285,12 +3285,15 @@ gimplify_var_or_parm_decl (tree *expr_p)
   if (gimplify_omp_ctxp && omp_notice_variable (gimplify_omp_ctxp, 
decl, true))

 return GS_ALL_DONE;

+ if (!flag_openmp) // Assume: C++'s DECL_OMP_PRIVATIZED_MEMBER (decl)
+ {
   /* If the decl is an alias for another expression, substitute it. */
   if (DECL_HAS_VALUE_EXPR_P (decl))
 {
   *expr_p = unshare_expr (DECL_VALUE_EXPR (decl));
   return GS_OK;
 }
+ }

   return GS_ALL_DONE;
 }


* * *

Any idea / suggestion how to handle this best?

One way I see would be to add a lang-hook here to check for 
DECL_OMP_PRIVATIZED_MEMBER, similar to the hack above. And

then ensure that the DECL_VALUE_EXPR points to the var decl
in the target region (i.e. some hacking in omp-low.cc).

I have no idea whether that would - nor whether that would be
the way forward. - Thoughts?

Tobias#if TEMPL
template 
#else
#define T int
#endif
#if PRIVATE
#define firstprivate private
#endif
struct t {
  T A;
void f()
{
  T B = 49;
  A = 7;
  #pragma omp parallel firstprivate(A) if(0) shared(B) default(none)
  {
if (A != 7) __builtin_printf("ERROR 1b: %d (!= 7) inside omp parallel\n", A);
A = 5;
B = A;
  }
  if (A != 7) __builtin_printf("ERROR 1: %d (!= 7) omp parallel\n", A);
  if (B != 5) __builtin_printf("ERROR 1a: %d\n", B);
  A = 8; B = 49;
  #pragma omp parallel firstprivate(A)if(0) shared(B) default(none)
  {
if (A != 8) __builtin_printf("ERROR 1b: %d (!= 8) inside omp parallel\n", A);
A = 6;
B = A;
  }
  if (A != 8) __builtin_printf("ERROR 2: %d (!= 8) omp parallel\n", A);
  if (B != 6) __builtin_printf("ERROR 2a: %d\n", B);
  A = 8; B = 49;
  #pragma omp target firstprivate(A) map(from:B) defaultmap(none)
  {
if (A != 7) __builtin_printf("ERROR 2b: %d (!= 7) inside omp target\n", A);
A = 7;
B = A;
  }
  if (A != 8) __builtin_printf("ERROR 3: %d (!= 8) omp target\n", A);
  if (B != 7) __builtin_printf("ERROR 3a: %d\n", B);
  A = 9; B = 49;
  #pragma omp target firstprivate(A) map(from:B) defaultmap(none)
  {
if (A != 7) __builtin_printf("ERROR 3b: %d (!= 7) inside omp target\n", A);
A = 8;
B = A;
  }
  if (A != 9) __builtin_printf("ERROR 4: %d (!= 9) omp target\n", A); else __builtin_printf("OK\n");
  if (B != 8) __builtin_printf("ERROR 4a: %d\n", B);
}
};

void bar() {
#if TEMPL
  struct t x;
#else
  struct t x;
#endif
  x.f();
}

int main()
{
  bar();
}

Re: [PATCH] c++: Diagnose this specifier on template parameters [PR113929]

2024-02-16 Thread Jason Merrill


On 2/16/24 04:21, Jakub Jelinek wrote:

Hi!

For template parameters, the optional this specifier is in the grammar
template-parameter-list -> template-parameter -> parameter-declaration,
just [dcl.fct/6] says that it is only valid in parameter-list of certain
functions.  So, unlike the case of decl-specifier-seq used in non-terminals
other than parameter-declaration, I think it is better not to fix this
by
cp_parser_decl_specifier_seq (parser,
-flags | CP_PARSER_FLAGS_PARAMETER,
+flags | (template_parameter_p ? 0
+ : CP_PARSER_FLAGS_PARAMETER),
  _specifiers,
  _class_or_enum);
which would be pretending it isn't in the grammar, but by diagnosing it
separately, which is what the following patch does.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?


OK.


2024-02-16  Jakub Jelinek  

PR c++/113929
* parser.cc (cp_parser_parameter_declaration): Diagnose this specifier
on template parameter declaration.

* g++.dg/parse/pr113929.C: New test.

--- gcc/cp/parser.cc.jj 2024-02-15 17:33:11.641453437 +0100
+++ gcc/cp/parser.cc2024-02-15 17:40:29.592447265 +0100
@@ -25724,8 +25724,15 @@ cp_parser_parameter_declaration (cp_pars
   for a C-style variadic function. */
token = cp_lexer_peek_token (parser->lexer);
  
-  bool const xobj_param_p

+  bool xobj_param_p
  = decl_spec_seq_has_spec_p (_specifiers, ds_this);
+  if (xobj_param_p && template_parm_p)
+{
+  error_at (decl_specifiers.locations[ds_this],
+   "% specifier in template parameter declaration");
+  xobj_param_p = false;
+  decl_specifiers.locations[ds_this] = 0;
+}
  
bool diag_xobj_parameter_pack = false;

if (xobj_param_p && (declarator && declarator->parameter_pack_p))
--- gcc/testsuite/g++.dg/parse/pr113929.C.jj2024-02-15 17:43:18.500129688 
+0100
+++ gcc/testsuite/g++.dg/parse/pr113929.C   2024-02-15 17:42:54.564458109 
+0100
@@ -0,0 +1,7 @@
+// PR c++/113929
+// { dg-do compile }
+
+template // { dg-error "'this' specifier in template 
parameter declaration" }
+struct S {};
+template  // { dg-error "'this' specifier in template 
parameter declaration" }
+struct T {};

Jakub

Re: [PATCH] libstdc++: atomic: Add missing clear_padding in __atomic_float constructor

2024-02-16 Thread Jonathan Wakely

On Fri, 16 Feb 2024 at 14:10, Jakub Jelinek wrote:
>
> On Fri, Feb 16, 2024 at 01:51:54PM +, Jonathan Wakely wrote:
> > Ah, although __atomic_compare_exchange only takes pointers, the
> > compiler replaces that with a call to __atomic_compare_exchange_n
> > which takes the newval by value, which presumably uses an 80-bit FP
> > register and so the padding bits become indeterminate again.
>
> __atomic_compare_exchange_n only works with integers, so I guess
> it is doing VIEW_CONVERT_EXPR (aka union-style type punning) on the
> argument.
>
> Do you have preprocessed source for the testcase?

Sent offlist.

[RFA/RFC] C++/OpenMP: Supporting (first)private for member variables [PR110347] - or VALUE_EXPR and gimplify

2024-02-16 Thread Tobias Burnus


The following works with PARALLEL but not with TARGET.

OpenMP states the following is supposed to work:

  A = 5;  // == this->A
  B = 6;  // == this->B
  C[44] = 7; // == this->C; assume 'int C[100]'

  #pragma  firstprivate(A,C) private(B)
  {
A += 5;  // Now: A is 10.
B = 7;
C[44] += 7; // Now C is 14
// It is unspecified what value this->{A,B,C} has
  }
  // {A,B,C[44]} == this->{A,B,C[44]} are still {5,6,7}

* * *

In the C++ FE, that's handledby creating a temporary variable:  v = create_temporary_var 
(TREE_TYPE (m)); with  SET_DECL_VALUE_EXPR (v, m);DECL_OMP_PRIVATIZED_MEMBER(v)
where 'm' is, e.g., 'this->A' - and a bunch of 'if 
(DECL_OMP_PRIVATIZED_MEMBER(decl))'
in theg++ FE, only. For PARALLEL, the VALUE_EXPR survives until omp-low.cc, 
which handles this for (first)privatizing. But for TARGET, in 
gimplify.cc, after the following call in gimplify_omp_workshare 16813 
gimple *g = gimplify_and_return_first (OMP_BODY (expr), ); 
will turn the 'A' in the body into 'this->A'.

* * *
Thus, while there is after omplower the expected
#pragma omp target ... firstprivate(A) and also    D.3081 = 
.omp_data_i->A; A= ...; what actually gets used is    D.3084 
= .omp_data_i->D.3046;    this = D.3084; 
   D.2996 = this->A; which unsurprisingly breaks. * * * 
This can be "fixed" by using the following patch. With that patch, the 
-fdump-tree-omplower looks fine. But it does then fail with: during RTL 
pass: expand g2.cpp:11:7: internal compiler error: in make_decl_rtl, at 
varasm.cc:1443
for the 'A' with 'B = A' (where B is a non-member var) and 'A' is still 
as the value expr 'this->A'. --- a/gcc/gimplify.cc +++ b/gcc/gimplify.cc 
@@ -3285,12 +3285,15 @@ gimplify_var_or_parm_decl (tree *expr_p) if 
(gimplify_omp_ctxp && omp_notice_variable (gimplify_omp_ctxp, decl, 
true)) return GS_ALL_DONE; + if (!flag_openmp) // Assume: C++'s 
DECL_OMP_PRIVATIZED_MEMBER (decl) + { /* If the decl is an alias for 
another expression, substitute it now. */ if (DECL_HAS_VALUE_EXPR_P 
(decl)) { *expr_p = unshare_expr (DECL_VALUE_EXPR (decl)); return GS_OK; 
} + } return GS_ALL_DONE; } * * * Any idea / suggestion how to handle 
this best? One way I see would be to add a lang-hook here to check for 
DECL_OMP_PRIVATIZED_MEMBER, similar to the hack above. And

then ensure that the DECL_VALUE_EXPR points to the var decl
in the target region (i.e. some hacking in omp-low.cc).

I have no idea whether that would - nor whether that would be
the way forward. - Thoughts?
Tobias
#if TEMPL
template 
#else
#define T int
#endif
#if PRIVATE
#define firstprivate private
#endif
struct t {
  T A;
void f()
{
  T B = 49;
  A = 7;
  #pragma omp parallel firstprivate(A) if(0) shared(B) default(none)
  {
if (A != 7) __builtin_printf("ERROR 1b: %d (!= 7) inside omp parallel\n", A);
A = 5;
B = A;
  }
  if (A != 7) __builtin_printf("ERROR 1: %d (!= 7) omp parallel\n", A);
  if (B != 5) __builtin_printf("ERROR 1a: %d\n", B);
  A = 8; B = 49;
  #pragma omp parallel firstprivate(A)if(0) shared(B) default(none)
  {
if (A != 8) __builtin_printf("ERROR 1b: %d (!= 8) inside omp parallel\n", A);
A = 6;
B = A;
  }
  if (A != 8) __builtin_printf("ERROR 2: %d (!= 8) omp parallel\n", A);
  if (B != 6) __builtin_printf("ERROR 2a: %d\n", B);
  A = 8; B = 49;
  #pragma omp target firstprivate(A) map(from:B) defaultmap(none)
  {
if (A != 7) __builtin_printf("ERROR 2b: %d (!= 7) inside omp target\n", A);
A = 7;
B = A;
  }
  if (A != 8) __builtin_printf("ERROR 3: %d (!= 8) omp target\n", A);
  if (B != 7) __builtin_printf("ERROR 3a: %d\n", B);
  A = 9; B = 49;
  #pragma omp target firstprivate(A) map(from:B) defaultmap(none)
  {
if (A != 7) __builtin_printf("ERROR 3b: %d (!= 7) inside omp target\n", A);
A = 8;
B = A;
  }
  if (A != 9) __builtin_printf("ERROR 4: %d (!= 9) omp target\n", A); else __builtin_printf("OK\n");
  if (B != 8) __builtin_printf("ERROR 4a: %d\n", B);
}
};

void bar() {
#if TEMPL
  struct t x;
#else
  struct t x;
#endif
  x.f();
}

int main()
{
  bar();
}

Re: [PATCH] c++/modules: optimize tree flag streaming

2024-02-16 Thread Patrick Palka

On Thu, 15 Feb 2024, Patrick Palka wrote:

> Bootstrapped and regtested on x86_64-pc-linux-gnu, does this look
> OK for trunk?
> 
> -- >8 --
> 
> One would expect consecutive calls to bytes_in/out::b for streaming
> adjacent bits, as we do for tree flag streaming, to at least be
> optimized by the compiler into individual bit operations using
> statically known bit positions (and ideally merged into larger sized
> reads/writes).
> 
> Unfortunately this doesn't happen because the compiler has trouble
> tracking the values of this->bit_pos and this->bit_val across such
> calls, likely because the compiler doesn't know 'this' and so it's
> treated as global memory.  This means for each consecutive bit stream
> operation, bit_pos and bit_val are loaded from memory, checked if
> buffering is needed, and finally the bit is extracted from bit_val
> according to the (unknown) bit_pos, even though relative to the previous
> operation (if we didn't need to buffer) bit_val is unchanged and bit_pos
> is just 1 larger.  This ends up being quite slow, with tree_node_bools
> taking 10% of time when streaming in parts of the std module.
> 
> This patch optimizes this by making tracking of bit_pos and bit_val
> easier for the compiler.  Rather than bit_pos and bit_val being members
> of the (effectively global) bytes_in/out objects, this patch factors out
> the bit streaming code/state into separate classes bits_in/out that get
> constructed locally as needed for bit streaming.  Since these objects
> are now clearly local, the compiler can more easily track their values.
> 
> And since bit streaming is intended to be batched it's natural for these
> new classes to be RAII-enabled such that the bit stream is flushed upon
> destruction.
> 
> In order to make the most of this improved tracking of bit position,
> this patch changes parts where we conditionally stream a tree flag
> to unconditionally stream (the flag or a dummy value).  That way
> the number of bits streamed and the respective bit positions are as
> statically known as reasonably possible.  In lang_decl_bools and
> lang_type_bools we flush the current bit buffer at the start so that
> subsequent bit positions are statically known.  And in core bools, we
> can add explicit early exits utilizing invariants that the compiler
> can't figure out itself (e.g. a tree code can't have both TS_TYPE_COMMON
> and TS_DECL_COMMON, and if a tree code doesn't have TS_DECL_COMMON then
> it doesn't have TS_DECL_WITH_VIS).  Finally if we're streaming fewer
> than 4 bits, it's more space efficient to stream them as individual
> bytes rather than as packed bits (due to the 32-bit buffer).

Oops, this last sentence is wrong.  Although the size of the bit buffer
is 32 bits, upon flushing we rewind unused bytes within the buffer,
which means streaming 2-8 bits ends up using only one byte not all four.
So v2 below undoes this pessimization.

> This patch also moves the definitions of the relevant streaming classes
> into anonymous namespaces so that the compiler can make more informed
> decisions about inlining their member functions.
> 
> After this patch, compile time for a simple Hello World using the std
> module is reduced by 7% with a release compiler.  The on-disk size of
> the std module increases by 0.7% (presumably due to the extra flushing
> done in lang_decl_bools and lang_type_bools).

The on-disk std module now only grows 0.4% instead of 0.7%.

> 
> The bit stream out performance isn't improved as much as the stream in
> due to the spans/lengths instrumentation performed on stream out (which
> probably should be e.g. removed for release builds?)

-- >8 --

gcc/cp/ChangeLog:

* module.cc: Update comment about classes defined.
(class data): Enclose in an anonymous namespace.
(data::calc_crc): Moved from bytes::calc_crc.
(class bytes): Remove.  Move bit_flush to namespace scope.
(class bytes_in): Enclose in an anonymous namespace.  Inherit
directly from data and adjust accordingly.  Move b and bflush
members to bits_in.
(class bytes_out): As above.  Remove is_set static data member.
(bit_flush): Moved from class bytes.
(struct bits_in): Define.
(struct bits_out): Define.
(bytes_out::bflush): Moved to bits_out/in.
(bytes_in::bflush): Likewise
(bytes_in::bfill): Removed.
(bytes_out::b): Moved to bits_out/in.
(bytes_in::b): Likewise.
(class trees_in): Enclose in an anonymous namespace.
(class trees_out): Enclose in an anonymous namespace.
(trees_out::core_bools): Add bits_out/in parameter and use it.
Unconditionally stream a bit for public_flag.  Add early exits
as appropriate.
(trees_out::core_bools): Likewise.
(trees_out::lang_decl_bools): Add bits_out/in parameter and use
it.  Flush the current bit buffer at the start.  Unconditionally
stream a bit for module_keyed_decls_p.

[PATCH] libiberty: Fix error return value in pex_unix_exec_child [PR113957].

2024-02-16 Thread Iain Sandoe

tested on x86_64-darwin, so far. OK for trunk if regression test is
successful on Linux too?
thanks
Iain

--- 8< ---

r14-5310-g879cf9ff45d940 introduced some new handling for spawning sub
processes.  The return value from the generic exec_child is examined
and needs to be < 0 to signal an error. However, the unix flavour of
this routine is returning the PID value set from the posix_spawn{p}.

This latter value is undefined per the manual pages for both Darwin
and Linux, and it seems Darwin, at least, sets the value to some
usually positive number (presumably the PID that would have been used
if the fork had succeeded).

The fix proposed here is to set the pid = -1 in the relevant error
paths.

PR other/113957

libiberty/ChangeLog:

* pex-unix.c (pex_unix_exec_child): Set pid = -1 in the error
paths, since that is used to signal an erroneous outcome for
the routine.

Signed-off-by: Iain Sandoe 
---
 libiberty/pex-unix.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/libiberty/pex-unix.c b/libiberty/pex-unix.c
index af98062a94c..f3a1cc95ada 100644
--- a/libiberty/pex-unix.c
+++ b/libiberty/pex-unix.c
@@ -695,6 +695,7 @@ pex_unix_exec_child (struct pex_obj *obj ATTRIBUTE_UNUSED,
{
  *err = ret;
  *errmsg = "posix_spawnp";
+ pid = -1; /* The value of pid is unspecified on failure.  */
  goto exit;
}
 }
@@ -705,6 +706,7 @@ pex_unix_exec_child (struct pex_obj *obj ATTRIBUTE_UNUSED,
{
  *err = ret;
  *errmsg = "posix_spawn";
+ pid = -1;
  goto exit;
}
 }
-- 
2.39.2 (Apple Git-143)

Re: [PATCH] Turn on LRA on all targets

2024-02-16 Thread Paul Koning

> On Feb 16, 2024, at 6:34 AM, Maciej W. Rozycki  wrote:
> 
> On Thu, 15 Feb 2024, Paul Koning wrote:
> 
>>> On May 15, 2023, at 5:09 PM, Maciej W. Rozycki  wrote:
>>> 
>>> ...
>>> 
>>> I may choose to implement a non-DWARF unwinder instead, as the VAX stack 
>>> frame is always fully described by the hardware and there is never ever a 
>>> need for debug information to be able to decode any VAX stack frame (the 
>>> RET machine instruction uses the stack frame information to restore the 
>>> previous PC, FP, SP, AP and any static registers saved by CALLS).
>> 
>> That would make sense; it's like the heuristic unwinder found in some 
>> other targets (I remember the MIPS one, which worked fairly well and 
>> allowed debugging without useable debug data).
> 
> Not really, in particular because EH unwinding has to be reliable and 
> heuristics inherently is not.

Fair enough, but what I meant is only that it's conceptually similar: unwind 
based on the code and machine state, rather than auxiliary information like 
debug data.  And I think your point was that on VAX this is indeed a reliable 
technique by virtue of the iSA.  In fact, the standard way to do exeception 
handling unwinding is part of the originail VAX architecture (via the otherwise 
unused first word (I think) of the call frame).

paul

Re: GCN: Conditionalize 'define_expand "reduc__scal_"' on '!TARGET_RDNA2_PLUS' [PR113615]

2024-02-16 Thread Andrew Stubbs


On 16/02/2024 14:34, Thomas Schwinge wrote:

Hi!

On 2024-01-29T11:34:05+0100, Tobias Burnus  wrote:

Andrew wrote off list:
"Vector reductions don't work on RDNA, as is, but they're
 supposed to be disabled by the insn condition"

This patch disables "fold_left_plus_", which is about
vectorization and in the code path shown in the backtrace.
I can also confirm manually that it fixes the ICE I saw and
also the ICE for the testfile that Richard's PR shows at the
end of his backtrace.  (-O3 is needed to trigger the ICE.)


On top of that, OK to push the attached
"GCN: Conditionalize 'define_expand "reduc__scal_"' on 
'!TARGET_RDNA2_PLUS' [PR113615]"?

Which of the 'assert's are worth keeping?

Only tested 'vect.exp' for 'check-gcc-c' so far; full testing to run
later.

Please confirm I'm understanding this correctly:

Andrew's original commit c7ec7bd1c6590cf4eed267feab490288e0b8d691
"amdgcn: add -march=gfx1030 EXPERIMENTAL" did this:

  (define_expand "reduc__scal_"
[(set (match_operand: 0 "register_operand")
 (unspec:
   [(match_operand:V_ALL 1 "register_operand")]
   REDUC_UNSPEC))]
 -  ""
 +  "!TARGET_RDNA2" [later '!TARGET_RDNA2_PLUS']
{
  [...]

This conditional, however, does *not* govern any explicit
'gen_reduc_plus_scal_', and therefore Tobias in
commit 7cc2262ec9a410dc56d1c1c6b950c922e14f621d
"gcn/gcn-valu.md: Disable fold_left_plus for TARGET_RDNA2_PLUS [PR113615]"
had to replicate the '!TARGET_RDNA2_PLUS' condition:


@@ -4274,7 +4274,8 @@ (define_expand "fold_left_plus_"
   [(match_operand: 0 "register_operand")
(match_operand: 1 "gcn_alu_operand")
(match_operand:V_FP 2 "gcn_alu_operand")]
-  "can_create_pseudo_p ()
+  "!TARGET_RDNA2_PLUS
+   && can_create_pseudo_p ()
 && (flag_openacc || flag_openmp
 || flag_associative_math)"
{

|  rtx dest = operands[0];
|  rtx scalar = operands[1];
|  rtx vector = operands[2];
|  rtx tmp = gen_reg_rtx (mode);
|
|  emit_insn (gen_reduc_plus_scal_ (tmp, vector));
|  [...]

..., and I thus now have to do similar for
'gen_reduc__scal_' use in here:

  (define_expand "reduc__scal_"
[(match_operand: 0 "register_operand")
 (fminmaxop:V_FP
   (match_operand:V_FP 1 "register_operand"))]
 -  ""
 +  "!TARGET_RDNA2_PLUS"
{
  /* fmin/fmax are identical to smin/smax.  */
  emit_insn (gen_reduc__scal_ (operands[0], 
operands[1]));
  [...]


OK. I don't mind the asserts. Hopefully they're redundant, but I suppose 
it's better than tracking down an unrecognised instruction in a later pass.


Andrew

GCN: Conditionalize 'define_expand "reduc__scal_"' on '!TARGET_RDNA2_PLUS' [PR113615] (was: [patch] gcn/gcn-valu.md: Disable fold_left_plus for TARGET_RDNA2_PLUS [PR113615])

2024-02-16 Thread Thomas Schwinge

Hi!

On 2024-01-29T11:34:05+0100, Tobias Burnus  wrote:
> Andrew wrote off list:
>"Vector reductions don't work on RDNA, as is, but they're
> supposed to be disabled by the insn condition"
>
> This patch disables "fold_left_plus_", which is about
> vectorization and in the code path shown in the backtrace.
> I can also confirm manually that it fixes the ICE I saw and
> also the ICE for the testfile that Richard's PR shows at the
> end of his backtrace.  (-O3 is needed to trigger the ICE.)

On top of that, OK to push the attached
"GCN: Conditionalize 'define_expand "reduc__scal_"' on 
'!TARGET_RDNA2_PLUS' [PR113615]"?

Which of the 'assert's are worth keeping?

Only tested 'vect.exp' for 'check-gcc-c' so far; full testing to run
later.

Please confirm I'm understanding this correctly:

Andrew's original commit c7ec7bd1c6590cf4eed267feab490288e0b8d691
"amdgcn: add -march=gfx1030 EXPERIMENTAL" did this:

 (define_expand "reduc__scal_"
   [(set (match_operand: 0 "register_operand")
(unspec:
  [(match_operand:V_ALL 1 "register_operand")]
  REDUC_UNSPEC))]
-  ""
+  "!TARGET_RDNA2" [later '!TARGET_RDNA2_PLUS']
   {
 [...]

This conditional, however, does *not* govern any explicit
'gen_reduc_plus_scal_', and therefore Tobias in
commit 7cc2262ec9a410dc56d1c1c6b950c922e14f621d
"gcn/gcn-valu.md: Disable fold_left_plus for TARGET_RDNA2_PLUS [PR113615]"
had to replicate the '!TARGET_RDNA2_PLUS' condition:

> @@ -4274,7 +4274,8 @@ (define_expand "fold_left_plus_"
>   [(match_operand: 0 "register_operand")
>(match_operand: 1 "gcn_alu_operand")
>(match_operand:V_FP 2 "gcn_alu_operand")]
> -  "can_create_pseudo_p ()
> +  "!TARGET_RDNA2_PLUS
> +   && can_create_pseudo_p ()
> && (flag_openacc || flag_openmp
> || flag_associative_math)"
>{
|  rtx dest = operands[0];
|  rtx scalar = operands[1];
|  rtx vector = operands[2];
|  rtx tmp = gen_reg_rtx (mode);
|  
|  emit_insn (gen_reduc_plus_scal_ (tmp, vector));
|  [...]

..., and I thus now have to do similar for
'gen_reduc__scal_' use in here:

 (define_expand "reduc__scal_"
   [(match_operand: 0 "register_operand")
(fminmaxop:V_FP
  (match_operand:V_FP 1 "register_operand"))]
-  ""
+  "!TARGET_RDNA2_PLUS"
   {
 /* fmin/fmax are identical to smin/smax.  */
 emit_insn (gen_reduc__scal_ (operands[0], 
operands[1]));
 [...]


Grüße
 Thomas


>From 1ca37da07f0fd3fa2e87fcbde9f2c2aadbe320dc Mon Sep 17 00:00:00 2001
From: Thomas Schwinge 
Date: Fri, 16 Feb 2024 13:04:00 +0100
Subject: [PATCH] GCN: Conditionalize 'define_expand
 "reduc__scal_"' on '!TARGET_RDNA2_PLUS' [PR113615]

On top of commit c7ec7bd1c6590cf4eed267feab490288e0b8d691
"amdgcn: add -march=gfx1030 EXPERIMENTAL" conditionalizing
'define_expand "reduc__scal_"' on
'!TARGET_RDNA2' (later: '!TARGET_RDNA2_PLUS'), we then did similar in
commit 7cc2262ec9a410dc56d1c1c6b950c922e14f621d
"gcn/gcn-valu.md: Disable fold_left_plus for TARGET_RDNA2_PLUS [PR113615]"
to conditionalize 'define_expand "fold_left_plus_"' on
'!TARGET_RDNA2_PLUS', but I found we also need to conditionalize the related
'define_expand "reduc__scal_"' on '!TARGET_RDNA2_PLUS', to
avoid ICEs like:

[...]/gcc.dg/vect/pr108608.c: In function 'foo':
[...]/gcc.dg/vect/pr108608.c:9:1: error: unrecognizable insn:
(insn 34 33 35 2 (set (reg:V64DF 723)
(unspec:V64DF [
(reg:V64DF 690 [ vect_m_11.20 ])
(const_int 1 [0x1])
] UNSPEC_MOV_DPP_SHR)) -1
 (nil))
during RTL pass: vregs

Similar for 'gcc.dg/vect/vect-fmax-2.c', 'gcc.dg/vect/vect-fmin-2.c', and
'UNSPEC_SMAX_DPP_SHR' for 'gcc.dg/vect/vect-fmax-1.c', and
'UNSPEC_SMIN_DPP_SHR' for 'gcc.dg/vect/vect-fmin-1.c', when running 'vect.exp'
for 'check-gcc-c'.

	PR target/113615
	gcc/
	* config/gcn/gcn-valu.md (define_expand "reduc__scal_"):
	Conditionalize on '!TARGET_RDNA2_PLUS'.
---
 gcc/config/gcn/gcn-valu.md | 6 +-
 gcc/config/gcn/gcn.cc  | 4 
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/gcc/config/gcn/gcn-valu.md b/gcc/config/gcn/gcn-valu.md
index 59e27d0aed79..973a72e3fc41 100644
--- a/gcc/config/gcn/gcn-valu.md
+++ b/gcc/config/gcn/gcn-valu.md
@@ -4247,6 +4247,8 @@
 	  REDUC_UNSPEC))]
   "!TARGET_RDNA2_PLUS"
   {
+gcc_checking_assert (!TARGET_RDNA2_PLUS);
+
 rtx tmp = gcn_expand_reduc_scalar (mode, operands[1],
    );
 
@@ -4261,8 +4263,10 @@
   [(match_operand: 0 "register_operand")
(fminmaxop:V_FP
  (match_operand:V_FP 1 "register_operand"))]
-  ""
+  "!TARGET_RDNA2_PLUS"
   {
+gcc_checking_assert (!TARGET_RDNA2_PLUS);
+
 /* fmin/fmax are identical to smin/smax.  */
 emit_insn (gen_reduc__scal_ (operands[0], operands[1]));
 DONE;
diff --git a/gcc/config/gcn/gcn.cc b/gcc/config/gcn/gcn.cc
index fce2d4d30c9d..8fa445deda53 100644
--- a/gcc/config/gcn/gcn.cc
+++ b/gcc/config/gcn/gcn.cc
@@

Re: [PATCH] Turn on LRA on all targets

2024-02-16 Thread Jakub Jelinek

On Fri, Feb 16, 2024 at 02:23:54PM +, Maciej W. Rozycki wrote:
> On Fri, 16 Feb 2024, Segher Boessenkool wrote:
> 
> > >  Conversely no heuristics is required to unwind VAX frames, because they 
> > > are fixed in layout by hardware, fully self-described, and with the 
> > > hardware frame pointer always available.
> > 
> > The downside of the VAX situation of course is that the compiler has no
> > freedom to optimise the frame and *logue code at all, let alone well.
> > This may not matter so much on narrow ucoded in-order machines, there
> > are different balances there :-)
> 
>  There is no function prologue to optimise in the VAX case, because all 
> the frame setup has already been made by the CALLS instruction itself in 
> the caller.  The first machine instruction of the callee is technically 
> already past the "prologue".  And then RET serves as the whole function 
> "epilogue".

So, what is the problem with DWARF unwinding?  Just make sure to emit
appropriate instructions describing the saving of the corresponding
registers at specific points based on CFA at the start of the function
(so that it appears in CIE instructions) and that should be all that is
needed, no?

Jakub

Re: [PATCH] Turn on LRA on all targets

2024-02-16 Thread Maciej W. Rozycki

On Fri, 16 Feb 2024, Segher Boessenkool wrote:

> >  Conversely no heuristics is required to unwind VAX frames, because they 
> > are fixed in layout by hardware, fully self-described, and with the 
> > hardware frame pointer always available.
> 
> The downside of the VAX situation of course is that the compiler has no
> freedom to optimise the frame and *logue code at all, let alone well.
> This may not matter so much on narrow ucoded in-order machines, there
> are different balances there :-)

 There is no function prologue to optimise in the VAX case, because all 
the frame setup has already been made by the CALLS instruction itself in 
the caller.  The first machine instruction of the callee is technically 
already past the "prologue".  And then RET serves as the whole function 
"epilogue".

 A discussion happened at the VAX/NetBSD mailing list as to a change to 
the calling convention to make it more RISC-like and replace the procedure 
call (CALLS and CALLG; the latter unused in our psABI) and return (RET) 
instructions with the subroutine branch (BSB), jump (JSB) and return (RSB) 
instructions, which are similar in semantics to say x86 CALL (for BSB/JSB) 
and RET (for RSB) instructions.  Should that happen we'd have fine-grained 
control over the function prologues and epilogues.

 It's not clear however what the consequences such a change would have on 
performance, and it would surely increase code size.  And the final VAX 
microarchitecture implementations (NVAX/NVAX+, dating back to 1991) have 
provisions in hardware, unsurprisingly, for optimising the execution speed 
of the CALL* and RET instructions.

  Maciej

Re: [PATCH] libstdc++: atomic: Add missing clear_padding in __atomic_float constructor

2024-02-16 Thread Jakub Jelinek

On Fri, Feb 16, 2024 at 01:51:54PM +, Jonathan Wakely wrote:
> Ah, although __atomic_compare_exchange only takes pointers, the
> compiler replaces that with a call to __atomic_compare_exchange_n
> which takes the newval by value, which presumably uses an 80-bit FP
> register and so the padding bits become indeterminate again.

__atomic_compare_exchange_n only works with integers, so I guess
it is doing VIEW_CONVERT_EXPR (aka union-style type punning) on the
argument.

Do you have preprocessed source for the testcase?

Jakub

Re: GCN RDNA2+ vs. GCC SLP vectorizer

2024-02-16 Thread Thomas Schwinge

Hi!

On 2024-02-16T12:41:06+, Andrew Stubbs  wrote:
> On 16/02/2024 12:26, Richard Biener wrote:
>> On Fri, 16 Feb 2024, Andrew Stubbs wrote:
>>> On 16/02/2024 10:17, Richard Biener wrote:
 On Fri, 16 Feb 2024, Thomas Schwinge wrote:
> On 2023-10-20T12:51:03+0100, Andrew Stubbs  wrote:
>> I've committed this patch
>
> ... as commit c7ec7bd1c6590cf4eed267feab490288e0b8d691
> "amdgcn: add -march=gfx1030 EXPERIMENTAL", which the later RDNA3/gfx1100
> support builds on top of, and that's what I'm currently working on
> getting proper GCC/GCN target (not offloading) results for.
>
> Now looking at 'gcc.dg/vect/bb-slp-cond-1.c', which is reasonably simple,
> and hopefully representative for other SLP execution test FAILs
> (regressions compared to my earlier non-gfx1100 testing).
>
>   $ build-gcc/gcc/xgcc -Bbuild-gcc/gcc/
>   source-gcc/gcc/testsuite/gcc.dg/vect/bb-slp-cond-1.c
>   --sysroot=install/amdgcn-amdhsa -ftree-vectorize
>   -fno-tree-loop-distribute-patterns -fno-vect-cost-model -fno-common
>   -O2 -fdump-tree-slp-details -fdump-tree-vect-details -isystem
>   build-gcc/amdgcn-amdhsa/gfx1100/newlib/targ-include -isystem
>   source-gcc/newlib/libc/include
>   -Bbuild-gcc/amdgcn-amdhsa/gfx1100/newlib/
>   -Lbuild-gcc/amdgcn-amdhsa/gfx1100/newlib -wrapper
>   setarch,--addr-no-randomize -fdump-tree-all-all -fdump-ipa-all-all
>   -fdump-rtl-all-all -save-temps -march=gfx1100
>
> The '-march=gfx1030' 'a-bb-slp-cond-1.s' is identical (apart from
> 'TARGET_PACKED_WORK_ITEMS' in 'gcn_target_asm_function_prologue'), so I
> suppose will also exhibit the same failure mode, once again?
>
> Compared to '-march=gfx90a', the differences begin in
> 'a-bb-slp-cond-1.c.266r.expand' (only!), down to 'a-bb-slp-cond-1.s'.
>
> Changed like:
>
>   @@ -38,10 +38,10 @@ int main ()
>#pragma GCC novector
>  for (i = 1; i < N; i++)
>if (a[i] != i%4 + 1)
>   -  abort ();
>   +  __builtin_printf("%d %d != %d\n", i, a[i], i%4 + 1);
>
>  if (a[0] != 5)
>   -abort ();
>   +__builtin_printf("%d %d != %d\n", 0, a[0], 5);
>
> ..., we see:
>
>   $ flock /tmp/gcn.lock build-gcc/gcc/gcn-run a.out
>   40 5 != 1
>   41 6 != 2
>   42 7 != 3
>   43 8 != 4
>   44 5 != 1
>   45 6 != 2
>   46 7 != 3
>   47 8 != 4
>
> '40..47' are the 'i = 10..11' in 'foo', and the expectation is
> 'a[i * stride + 0..3] != 0'.  So, either some earlier iteration has
> scribbled zero values over these (vector lane masking issue, perhaps?),
> or some other code generation issue?

>>> [...], I must be doing something different because vect/bb-slp-cond-1.c
>>> passes for me, on gfx1100.

That's strange.  I've looked at your log file (looks good), and used your
toolchain to compile, and your 'gcn-run' to invoke, and still do get:

$ flock /tmp/gcn.lock ~/gcn-run ~/bb-slp-cond-1.exe
GCN Kernel Aborted
Kernel aborted

Andrew, later on, please try what happens when you put an unconditional
'abort' call into a test case?

>> I didn't try to run it - when doing make check-gcc fails to using
>> gcn-run for test invocation

Note, that for such individual test cases, invoking the compiler and then
'gcn-run' manually would seem easiest?

>> what's the trick to make it do that?

I tell you've probably not done much "embedded" or simulator testing of
GCC targets?  ;-P

> There's a config file for nvptx here: 
> https://github.com/SourceryTools/nvptx-tools/blob/master/nvptx-none-run.exp

Yes, and I have pending some updates to that one, to be finished once
I've generally got my testing set up again, to a sufficient degree...

> You can probably make the obvious adjustments. I think Thomas has a GCN 
> version with a few more features.

Right.  I'm attaching my current 'amdgcn-amdhsa-run.exp'.

I'm aware that the 'set_board_info gcc,[...] [...]' may be obsolete/wrong
(as Andrew also noted privately) -- likewise, at least in part, for
GCC/nvptx, which is where I copied all that from.  (Will revise later;
not relevant for this discussion, here.)

Similar to what I've recently added to libgomp, there is 'flock'ing here,
so that you may use 'make -j[...] check' for (partial) parallelism, but
still all execution testing runs serialized.  I found this to greatly
help denoise the test results.  (Not ideal, of course, but improving that
is for later, too.)

You may want to disable the 'HSA_STATUS_ERROR_OUT_OF_RESOURCES' thing if
that doesn't work like that in your case.  (I've no idea what
'amdgpu_gpu_recover' would do if the GPU is also used for display.)  But
this, again, greatly helps denoise test results, at least for the one
system I'm currently testing on.

I intend to

Re: [PATCH] libstdc++: atomic: Add missing clear_padding in __atomic_float constructor

2024-02-16 Thread Jonathan Wakely

On Fri, 16 Feb 2024 at 12:38, Jonathan Wakely  wrote:
>
> On Fri, 2 Feb 2024 at 16:52, xndcn  wrote:
> >
> > Thank you for your careful review!
> >
> > > But we don't need a new one if it's going to be used in exactly one test 
> > > and if the new option does the same thing for all targets that run the 
> > > test.
> > Got it, thanks. Now add option "-latomic" directly, but it still rely
> > on the trick "[atomic_link_flags [get_multilibs]]"
> >
> > > No, because the patch is supposed to prevent the infinite loop, and so 
> > > there's no need to stop it looping after 10s. It won't loop at all.
> > Thanks, deleted.
> >
> > > We only need to clear padding for long double, not float and double, 
> > > right?
> > Yes, actually there is a check "if constexpr
> > (__atomic_impl::__maybe_has_padding<_Fp>())".
> > But "__atomic_impl::__clear_padding(_M_fp); " is indeed simply, so fixed 
> > here.
> >
> > > Why can't we run this on all targets?
> > Got it, now target option deleted.
> >
> > > There's no reason to use __builtin_memset here, just include  
> > > and use std::memcpy.
> > Thanks, fixed.
> >
> > > It definitely does have padding, just say "long double has padding bits 
> > > on x86"
> > Thanks, fixed.
> >
> > So here comes the latest patch:
>
>
> Thanks. I've applied the patch to my tree, but the new test fails
> pretty reliably.
>
> The infinite loop in std::atomic::fetch_add is fixed by
> clearing padding in the constructor, but the test fails on the
> compare_exchange_weak or compare_exchange_strong lines here:
>
>
> > +as.fetch_add(t);
> > +long double s = f + t;
> > +t = as.load();
> > +VERIFY(s == t); // padding ignored on float comparing
> > +fill_padding(s);
> > +VERIFY(as.compare_exchange_weak(s, f)); // padding cleared on cmpexchg
> > +fill_padding(f);
> > +VERIFY(as.compare_exchange_strong(f, t)); // padding cleared on 
> > cmpexchg
> >
>
>
> I think the problem is here in __atomic_impl::__compare_exchange:
>
>if (__atomic_compare_exchange(__pval, __pexp, __pi,
>   __is_weak, int(__s), int(__f)))
>  return true;
>
> Even though padding in *__pexp and *__pi has been cleared, the value
> of *__pval after a successful __atomic_compare_exchange has non-zero
> padding. That means that the next compare_exchange will fail, because
> we assume that the stored value always has zeroed padding bits.
>
> Here's a gdb session showing that __atomic_compare_exchange stores a
> value with non-zero padding:
>
> Breakpoint 2, test01 () at compare_exchange_padding.cc:43
> 43long double s2 = s;
> (gdb) n
> 44fill_padding(s2);
> (gdb)
> 45while (!as.compare_exchange_weak(s2, f)) // padding cleared
> on compexchg
> (gdb) p/x as._M_fp
> $11 = 0x40008000
> (gdb) step
> std::__atomic_float::compare_exchange_weak
> (this=0x7fffd8c0, __expected=@0x7fffd8a0: 2, __desired=0.5,
> __order=std::memory_order::seq_cst) at
> /home/jwakely/gcc/14/include/c++/14.0.1/bits/atomic_base.h:1387
> 1387return compare_exchange_weak(__expected, __desired, __order,
> (gdb) step
> std::__atomic_float::compare_exchange_weak
> (this=0x7fffd8c0, __expected=@0x7fffd8a0: 2, __desired=0.5,
> __success=std::memory_order::seq_cst,
> __failure=std::memory_order::seq_cst) at
> /home/jwakely/gcc/14/include/c++/14.0.1/bits/atomic_base.h:1347
> 1347return __atomic_impl::compare_exchange_weak(&_M_fp,
> (gdb) step
> std::__atomic_impl::compare_exchange_weak
> (__check_padding=false, __failure=std::memory_order::seq_cst,
> __success=std::memory_order::seq_cst, __desired=0.5,
> __expected=@0x7fffd8a0: 2, __ptr=0x7fffd8c0)
> at /home/jwakely/gcc/14/include/c++/14.0.1/bits/atomic_base.h:1123
> 1123return __atomic_impl::__compare_exchange<_AtomicRef>(
> (gdb)
> std::__atomic_impl::__compare_exchange
> (__f=std::memory_order::seq_cst, __s=std::memory_order::seq_cst,
> __is_weak=true,
> __i=, __e=@0x7fffd8a0: 2,
> __val=@0x7fffd8c0: 2) at
> /home/jwakely/gcc/14/include/c++/14.0.1/bits/atomic_base.h:994
> 994 __glibcxx_assert(__is_valid_cmpexch_failure_order(__f));
> (gdb) n
> 997 _Tp* const __pval = std::__addressof(__val);
> (gdb)
> 1008_Vp* const __pi = __atomic_impl::__clear_padding(__i);
> (gdb)
> 1010_Vp __exp = __e;
> (gdb)
> 1012_Vp* const __pexp = __atomic_impl::__clear_padding(__exp);
> (gdb)
> 1016if (__atomic_compare_exchange(__pval, __pexp, __pi,
> (gdb) p/x *__pval
> $12 = 0x40008000
> (gdb) p/x *__pexp
> $13 = 0x40008000
> (gdb) p/x *__pi
> $14 = 0x3ffe8000
> (gdb) n
> 1018  return true;
> (gdb) p/x *__pval
> $15 = 0x77bf3ffe8000
> (gdb)
>
> We stored *__pi which has zero padding, but the result in *__pval has
> non-zero padding. This doesn't seem to be gdb being misleading by
> loading

Re: [PATCH] Turn on LRA on all targets

2024-02-16 Thread Segher Boessenkool

On Fri, Feb 16, 2024 at 11:34:55AM +, Maciej W. Rozycki wrote:
>  Not really, in particular because EH unwinding has to be reliable and 
> heuristics inherently is not.

Yup.  Which is why I did 0359465c703a for rs6000 six years ago (how time
flies!)  The commit message for that includes

To find out where on-entry register values live at any point in a
program, GDB currently tries to parse to parse the executable code.
This does not work very well, for example it gets confused if some
accesses to the stack use the frame pointer (r31) and some use the
stack pointer (r1).  A symptom is that backtraces can be cut short.

and the patch does

+  /* By default, always emit DWARF-2 unwind info.  This allows debugging
+ without maintaining a stack frame back-chain.  It also allows the
+ debugger to find out where on-entry register values are stored at any
+ point in a function, without having to analyze the executable code (which
+ isn't even possible to do in the general case).  */
+#ifdef OBJECT_FORMAT_ELF
+  opts->x_flag_asynchronous_unwind_tables = 1;
+#endif

We went through very many refinements of the heuristics through the
years, but at some point you just have to give up: heuristics never
can make up for missing information.

>  Consequently the more aggressive the compiler has become to schedule
> function body instructions within a function's prologue the more lost the 
> machine code interpreter has become.  Ultimately it would have to become a 
> full-fledged CPU simulator to do its heuristics.

Yup, exactly.

> In reality it means the 
> unwinder may fail to produce acceptable results, which will happen at any 
> frequency between hardly ever to most often, depending on the exact 
> circumstances.

Yes.  If the compiler optimises the *logue code well, there is no way
heuristics can follow that.

>  Conversely no heuristics is required to unwind VAX frames, because they 
> are fixed in layout by hardware, fully self-described, and with the 
> hardware frame pointer always available.

The downside of the VAX situation of course is that the compiler has no
freedom to optimise the frame and *logue code at all, let alone well.
This may not matter so much on narrow ucoded in-order machines, there
are different balances there :-)

Segher

Re: [PATCH] aarch64, acle header: Cast uint64_t pointers to DIMode.

2024-02-16 Thread Iain Sandoe




> On 15 Feb 2024, at 18:05, Richard Sandiford  wrote:
> 
> Iain Sandoe  writes:
>>> On 5 Feb 2024, at 14:56, Iain Sandoe  wrote:
>>> 
>>> Tested on aarch64-linux,darwin and a cross from aarch64-darwin to linux,
>>> OK for trunk, or some alternative is needed?
>> 
>> Hmm.. apparently, this fails the linaro pre-commit CI for g++ with:
>> error: invalid conversion from 'long int*' to 'long unsigned int*' 
>> [-fpermissive]
>> 
>> So, I guess some alternative is needed, advice welcome,
> 
> The builtins are registered with:
> 
> static void
> aarch64_init_rng_builtins (void)
> {
>  tree unsigned_ptr_type = build_pointer_type (unsigned_intDI_type_node);
>  ...
> 
> Does it work if you change unsigned_intDI_type_node to
> get_typenode_from_name (UINT64_TYPE)?

Yes, that works fine; tested on aarch64-linux and aarch64-darwin.

revised, as below,
OK for trunk?
Iain


Subject: [PATCH] aarch64: Register rng builtins with uint64_t pointers.

Currently, these are registered as unsigned_intDI_type_node which is not
necessarily the same type definition as uint64_t.  On platforms where these
differ that causes fails in consuming the arm_acle.h header.

gcc/ChangeLog:

* config/aarch64/aarch64-builtins.cc (aarch64_init_rng_builtins):
Register these builtins with a pointer to uint64_t rather than unsigned
DI mode.
---
 gcc/config/aarch64/aarch64-builtins.cc | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/gcc/config/aarch64/aarch64-builtins.cc 
b/gcc/config/aarch64/aarch64-builtins.cc
index e211a7271ba..1330558f109 100644
--- a/gcc/config/aarch64/aarch64-builtins.cc
+++ b/gcc/config/aarch64/aarch64-builtins.cc
@@ -1759,7 +1759,8 @@ aarch64_init_tme_builtins (void)
 static void
 aarch64_init_rng_builtins (void)
 {
-  tree unsigned_ptr_type = build_pointer_type (unsigned_intDI_type_node);
+  tree unsigned_ptr_type
+= build_pointer_type (get_typenode_from_name (UINT64_TYPE));
   tree ftype
 = build_function_type_list (integer_type_node, unsigned_ptr_type, NULL);
   aarch64_builtin_decls[AARCH64_BUILTIN_RNG_RNDR]
-- 
2.39.2 (Apple Git-143)

Re: [PATCH] libsanitizer: Intercept __makecontext_v2 on Solaris/SPARC [PR113785]

2024-02-16 Thread Rainer Orth

Hi Jakub,

> On Fri, Feb 16, 2024 at 01:32:04PM +0100, Rainer Orth wrote:
>> c-c++-common/asan/swapcontext-test-1.c FAILs on Solaris/SPARC:
>> 
>> FAIL: c-c++-common/asan/swapcontext-test-1.c   -O0  execution test
>> FAIL: c-c++-common/asan/swapcontext-test-1.c   -O1  execution test
>> FAIL: c-c++-common/asan/swapcontext-test-1.c   -O2  execution test
>> FAIL: c-c++-common/asan/swapcontext-test-1.c   -O2 -flto  execution test
>> FAIL: c-c++-common/asan/swapcontext-test-1.c -O2 -flto
>> -flto-partition=none execution test
>> FAIL: c-c++-common/asan/swapcontext-test-1.c -O3 -fomit-frame-pointer
>> -funroll-loops -fpeel-loops -ftracer -finline-functions execution test
>> FAIL: c-c++-common/asan/swapcontext-test-1.c   -O3 -g  execution test
>> FAIL: c-c++-common/asan/swapcontext-test-1.c   -Os  execution test
>> 
>> As detailed in PR sanitizer/113785, this happens because an ABI change
>> in Solaris 10/SPARC caused the external symbol for makecontext to be
>> changed to __makecontext_v2, which isn't intercepted.
>
> Is Solaris 9/SPARC and earlier no longer supported in GCC?

no, Solaris 9 support was removed in GCC 5 already.  The only version
supported by trunk is 11.4; 11.3 isn't but I won't actually remove
support until Solaris 11.4 systems have been added to the cfarm (which
should be soon).

> If so, ok for trunk.

Thanks.

> Otherwise I'd expect some ifdefs or whatever to check if it is
> Solaris 10+ with __makecontext_v2 or Solaris up to 9 with makecontext.

True.  However, it can be difficult to get patches upstream for OS
versions not remotely supported in LLVM (which has been 11.4-only for
years).

Rainer

-- 
-
Rainer Orth, Center for Biotechnology, Bielefeld University

Re: [PATCH] libsanitizer: Intercept __makecontext_v2 on Solaris/SPARC [PR113785]

2024-02-16 Thread Jakub Jelinek

On Fri, Feb 16, 2024 at 01:32:04PM +0100, Rainer Orth wrote:
> c-c++-common/asan/swapcontext-test-1.c FAILs on Solaris/SPARC:
> 
> FAIL: c-c++-common/asan/swapcontext-test-1.c   -O0  execution test
> FAIL: c-c++-common/asan/swapcontext-test-1.c   -O1  execution test
> FAIL: c-c++-common/asan/swapcontext-test-1.c   -O2  execution test
> FAIL: c-c++-common/asan/swapcontext-test-1.c   -O2 -flto  execution test
> FAIL: c-c++-common/asan/swapcontext-test-1.c   -O2 -flto -flto-partition=none 
>  execution test
> FAIL: c-c++-common/asan/swapcontext-test-1.c   -O3 -fomit-frame-pointer 
> -funroll-loops -fpeel-loops -ftracer -finline-functions  execution test
> FAIL: c-c++-common/asan/swapcontext-test-1.c   -O3 -g  execution test
> FAIL: c-c++-common/asan/swapcontext-test-1.c   -Os  execution test
> 
> As detailed in PR sanitizer/113785, this happens because an ABI change
> in Solaris 10/SPARC caused the external symbol for makecontext to be
> changed to __makecontext_v2, which isn't intercepted.

Is Solaris 9/SPARC and earlier no longer supported in GCC?

If so, ok for trunk.

Otherwise I'd expect some ifdefs or whatever to check if it is
Solaris 10+ with __makecontext_v2 or Solaris up to 9 with makecontext.

Jakub

[PATCH][RFC] tree-optimization/113910 - bitmap_hash is weak, improve iterative_hash_*

2024-02-16 Thread Richard Biener

The following addresses the weak bitmap_hash function which results
in points-to analysis taking a long time because of a high collision
rate in one of its bitmap hash tables.  Using a better hash function
like in the bitmap.cc hunk below doesn't help unless one also replaces
the hash function in iterative_hash_* with something faster.

I've taken the implementation from BFD string merging and extracted
a 4 and 8 byte worker to replace iterative_hash_hashval_t and
iterative_hash_host_wide_it.  I didn't yet replace the generic
iterative_hash as its implementation resides in libiberty.

With this hash the testcase shows

   5.15%  9323  cc1plus  cc1plus [.] bitmap_hash

and a compile-time of 44s while using the original hash implementation
this becomes

  10.50% 20405  cc1plus  cc1plus [.] bitmap_hash

and a compile-time of 46s, still faster than using original bitmap_hash
which takes 56s and while having bitmap_hash off the profile shows

  21.56% 49490  cc1plus  cc1plus   [.] bitmap_equal_p

because of collision rates in the 20s.

Bootstrapped / tested on x86_64-unknown-linux-gnu.

OK for stage1?

Should I try to change libiberty iterative_hash or implement a
generic block variant for GCCs use with a different name, no
longer using libibertys iterative_hash?

Thanks,
Richard.

PR tree-optimization/113910
* inchash.h (iterative_hash_host_wide_int): Change hash
function.
(iterative_hash_hashval_t): Likewise.
* bitmap.cc (bitmap_hash): Hash index and bits.
---
 gcc/bitmap.cc |  8 +++---
 gcc/inchash.h | 80 +--
 2 files changed, 44 insertions(+), 44 deletions(-)

diff --git a/gcc/bitmap.cc b/gcc/bitmap.cc
index 459e32c1ad1..f502841f385 100644
--- a/gcc/bitmap.cc
+++ b/gcc/bitmap.cc
@@ -2695,18 +2695,18 @@ hashval_t
 bitmap_hash (const_bitmap head)
 {
   const bitmap_element *ptr;
-  BITMAP_WORD hash = 0;
+  hashval_t hash = 0;
   int ix;
 
   gcc_checking_assert (!head->tree_form);
 
   for (ptr = head->first; ptr; ptr = ptr->next)
 {
-  hash ^= ptr->indx;
+  hash = iterative_hash_hashval_t (ptr->indx, hash);
   for (ix = 0; ix != BITMAP_ELEMENT_WORDS; ix++)
-   hash ^= ptr->bits[ix];
+   hash = iterative_hash_host_wide_int (ptr->bits[ix], hash);
 }
-  return iterative_hash (, sizeof (hash), 0);
+  return hash;
 }
 
 
diff --git a/gcc/inchash.h b/gcc/inchash.h
index e88f9b5eac1..4cdef1e7fce 100644
--- a/gcc/inchash.h
+++ b/gcc/inchash.h
@@ -28,8 +28,8 @@ along with GCC; see the file COPYING3.  If not see
Currently it just implements the plain old jhash based
incremental hash from gcc's tree.cc.  */
 
-hashval_t iterative_hash_host_wide_int (HOST_WIDE_INT, hashval_t);
-hashval_t iterative_hash_hashval_t (hashval_t, hashval_t);
+hashval_t iterative_hash_host_wide_int (uint64_t, hashval_t);
+hashval_t iterative_hash_hashval_t (uint32_t, hashval_t);
 
 namespace inchash
 {
@@ -157,57 +157,57 @@ class hash
 
 }
 
-/* Borrowed from hashtab.c iterative_hash implementation.  */
-#define mix(a,b,c) \
-{ \
-  a -= b; a -= c; a ^= (c>>13); \
-  b -= c; b -= a; b ^= (a<< 8); \
-  c -= a; c -= b; c ^= ((b&0x)>>13); \
-  a -= b; a -= c; a ^= ((c&0x)>>12); \
-  b -= c; b -= a; b = (b ^ (a<<16)) & 0x; \
-  c -= a; c -= b; c = (c ^ (b>> 5)) & 0x; \
-  a -= b; a -= c; a = (a ^ (c>> 3)) & 0x; \
-  b -= c; b -= a; b = (b ^ (a<<10)) & 0x; \
-  c -= a; c -= b; c = (c ^ (b>>15)) & 0x; \
-}
-
 
 /* Produce good hash value combining VAL and VAL2.  */
 inline
 hashval_t
-iterative_hash_hashval_t (hashval_t val, hashval_t val2)
+iterative_hash_hashval_t (uint32_t val, hashval_t val2)
 {
-  /* the golden ratio; an arbitrary value.  */
-  hashval_t a = 0x9e3779b9;
+  static_assert (sizeof (hashval_t) == sizeof (uint32_t), "");
+
+  const uint32_t mul = ((1 << 0) +  (1 << 2) + (1 << 3) + (1 << 5)
+   + (1 << 7) + (1 << 11) + (1 << 13) + (1 << 17)
+   + (0 << 19) + (1 << 23) + (1 << 29) + (1u << 31));
+
+  const unsigned len = 4;
+  hashval_t acc = val2 + len * 0x9e3779b1;
+
+  uint16_t i1 = (uint16_t)val ^ (0x396cfeb8 + len);
+  uint16_t i2 = (uint16_t)(val >> 16) ^ (0xbe4ba423 + len);
+  hashval_t m = (uint32_t)i1 * i2;
 
-  mix (a, val, val2);
-  return val2;
+  acc += m;
+  acc = acc ^ (acc >> 7);
+  uint64_t r = (uint64_t)mul * acc;
+  return (uint32_t)r ^ (uint32_t)(r >> 32);
 }
 
 /* Produce good hash value combining VAL and VAL2.  */
 
 inline
 hashval_t
-iterative_hash_host_wide_int (HOST_WIDE_INT val, hashval_t val2)
+iterative_hash_host_wide_int (uint64_t val, hashval_t val2)
 {
-  if (sizeof (HOST_WIDE_INT) == sizeof (hashval_t))
-return iterative_hash_hashval_t (val, val2);
-  else
-{
-  hashval_t a = (hashval_t) val;
-  /* Avoid warnings about shifting of more than the width of the type on
- hosts that won't execute this path.  */

Re: GCN RDNA2+ vs. GCC SLP vectorizer

2024-02-16 Thread Andrew Stubbs


On 16/02/2024 12:26, Richard Biener wrote:

On Fri, 16 Feb 2024, Andrew Stubbs wrote:


On 16/02/2024 10:17, Richard Biener wrote:

On Fri, 16 Feb 2024, Thomas Schwinge wrote:


Hi!

On 2023-10-20T12:51:03+0100, Andrew Stubbs  wrote:

I've committed this patch


... as commit c7ec7bd1c6590cf4eed267feab490288e0b8d691
"amdgcn: add -march=gfx1030 EXPERIMENTAL", which the later RDNA3/gfx1100
support builds on top of, and that's what I'm currently working on
getting proper GCC/GCN target (not offloading) results for.

Now looking at 'gcc.dg/vect/bb-slp-cond-1.c', which is reasonably simple,
and hopefully representative for other SLP execution test FAILs
(regressions compared to my earlier non-gfx1100 testing).

  $ build-gcc/gcc/xgcc -Bbuild-gcc/gcc/
  source-gcc/gcc/testsuite/gcc.dg/vect/bb-slp-cond-1.c
  --sysroot=install/amdgcn-amdhsa -ftree-vectorize
  -fno-tree-loop-distribute-patterns -fno-vect-cost-model -fno-common
  -O2 -fdump-tree-slp-details -fdump-tree-vect-details -isystem
  build-gcc/amdgcn-amdhsa/gfx1100/newlib/targ-include -isystem
  source-gcc/newlib/libc/include
  -Bbuild-gcc/amdgcn-amdhsa/gfx1100/newlib/
  -Lbuild-gcc/amdgcn-amdhsa/gfx1100/newlib -wrapper
  setarch,--addr-no-randomize -fdump-tree-all-all -fdump-ipa-all-all
  -fdump-rtl-all-all -save-temps -march=gfx1100

The '-march=gfx1030' 'a-bb-slp-cond-1.s' is identical (apart from
'TARGET_PACKED_WORK_ITEMS' in 'gcn_target_asm_function_prologue'), so I
suppose will also exhibit the same failure mode, once again?

Compared to '-march=gfx90a', the differences begin in
'a-bb-slp-cond-1.c.266r.expand' (only!), down to 'a-bb-slp-cond-1.s'.

Changed like:

  @@ -38,10 +38,10 @@ int main ()
   #pragma GCC novector
 for (i = 1; i < N; i++)
   if (a[i] != i%4 + 1)
  -  abort ();
  +  __builtin_printf("%d %d != %d\n", i, a[i], i%4 + 1);
   
 if (a[0] != 5)

  -abort ();
  +__builtin_printf("%d %d != %d\n", 0, a[0], 5);

..., we see:

  $ flock /tmp/gcn.lock build-gcc/gcc/gcn-run a.out
  40 5 != 1
  41 6 != 2
  42 7 != 3
  43 8 != 4
  44 5 != 1
  45 6 != 2
  46 7 != 3
  47 8 != 4

'40..47' are the 'i = 10..11' in 'foo', and the expectation is
'a[i * stride + 0..3] != 0'.  So, either some earlier iteration has
scribbled zero values over these (vector lane masking issue, perhaps?),
or some other code generation issue?


So we're indeed BB vectorizing this to

_54 = MEM  [(int *)_14];
vect_iftmp.12_56 = .VCOND (_54, { 0, 0, 0, 0 }, { 1, 2, 3, 4 }, { 5, 6,
7, 8 }, 115);
MEM  [(int *)_14] = vect_iftmp.12_56;

I don't understand the assembly very well but it might be that
the mask computation for the .VCOND scribbles the mask used
to constrain operation to 4 lanes?

.L3:
  s_mov_b64   exec, 15
  v_add_co_u32v4, s[22:23], s32, v3
  v_mov_b32   v5, s33
  v_add_co_ci_u32 v5, s[22:23], 0, v5, s[22:23]
  flat_load_dword v7, v[4:5] offset:0
  s_waitcnt   0
  flat_load_dword v0, v[10:11] offset:0
  s_waitcnt   0
  flat_load_dword v6, v[8:9] offset:0
  s_waitcnt   0
  v_cmp_ne_u32s[18:19], v7, 0
  v_cndmask_b32   v0, v6, v0, s[18:19]
  flat_store_dwordv[4:5], v0 offset:0
  s_add_i32   s12, s12, 1
  s_add_u32   s32, s32, s28
  s_addc_u32  s33, s33, s29
  s_cmp_lg_u32s12, s13
  s_cbranch_scc1  .L3


This basic block has EXEC set to 15 (4 lanes) throughout. The mask for the
VCOND a.k.a. v_vndmask_b32 is in s[18:19]. Those things seem OK.

I see the testcase avoids vec_extract V64SI to V4SI for gfx1100, even though
it would be a no-op conversion, because the general case requires a permute
instruction and named pattern insns can't have non-constant conditions. Is
vec_extract allowed to FAIL? That might give a better result in this case.


I found that vec_extract is not allowed to FAIL. I guess the only way to 
allow the no-op conversions is to implement manual fall-back code-gen 
for the broken cases.




However, I must be doing something different because vect/bb-slp-cond-1.c
passes for me, on gfx1100.


I didn't try to run it - when doing make check-gcc fails to using
gcn-run for test invocation, what's the trick to make it do that?


There's a config file for nvptx here: 
https://github.com/SourceryTools/nvptx-tools/blob/master/nvptx-none-run.exp


You can probably make the obvious adjustments. I think Thomas has a GCN 
version with a few more features.


I usually use the CodeSourcery magic stack of scripts for testing 
installed toolchains on remote devices, so I'm not too familiar with 
using Dejagnu directly.


Andrew

Re: [PATCH] libstdc++: atomic: Add missing clear_padding in __atomic_float constructor

2024-02-16 Thread Jonathan Wakely

On Fri, 2 Feb 2024 at 16:52, xndcn  wrote:
>
> Thank you for your careful review!
>
> > But we don't need a new one if it's going to be used in exactly one test 
> > and if the new option does the same thing for all targets that run the test.
> Got it, thanks. Now add option "-latomic" directly, but it still rely
> on the trick "[atomic_link_flags [get_multilibs]]"
>
> > No, because the patch is supposed to prevent the infinite loop, and so 
> > there's no need to stop it looping after 10s. It won't loop at all.
> Thanks, deleted.
>
> > We only need to clear padding for long double, not float and double, right?
> Yes, actually there is a check "if constexpr
> (__atomic_impl::__maybe_has_padding<_Fp>())".
> But "__atomic_impl::__clear_padding(_M_fp); " is indeed simply, so fixed here.
>
> > Why can't we run this on all targets?
> Got it, now target option deleted.
>
> > There's no reason to use __builtin_memset here, just include  and 
> > use std::memcpy.
> Thanks, fixed.
>
> > It definitely does have padding, just say "long double has padding bits on 
> > x86"
> Thanks, fixed.
>
> So here comes the latest patch:


Thanks. I've applied the patch to my tree, but the new test fails
pretty reliably.

The infinite loop in std::atomic::fetch_add is fixed by
clearing padding in the constructor, but the test fails on the
compare_exchange_weak or compare_exchange_strong lines here:


> +as.fetch_add(t);
> +long double s = f + t;
> +t = as.load();
> +VERIFY(s == t); // padding ignored on float comparing
> +fill_padding(s);
> +VERIFY(as.compare_exchange_weak(s, f)); // padding cleared on cmpexchg
> +fill_padding(f);
> +VERIFY(as.compare_exchange_strong(f, t)); // padding cleared on cmpexchg
>


I think the problem is here in __atomic_impl::__compare_exchange:

   if (__atomic_compare_exchange(__pval, __pexp, __pi,
  __is_weak, int(__s), int(__f)))
 return true;

Even though padding in *__pexp and *__pi has been cleared, the value
of *__pval after a successful __atomic_compare_exchange has non-zero
padding. That means that the next compare_exchange will fail, because
we assume that the stored value always has zeroed padding bits.

Here's a gdb session showing that __atomic_compare_exchange stores a
value with non-zero padding:

Breakpoint 2, test01 () at compare_exchange_padding.cc:43
43long double s2 = s;
(gdb) n
44fill_padding(s2);
(gdb)
45while (!as.compare_exchange_weak(s2, f)) // padding cleared
on compexchg
(gdb) p/x as._M_fp
$11 = 0x40008000
(gdb) step
std::__atomic_float::compare_exchange_weak
(this=0x7fffd8c0, __expected=@0x7fffd8a0: 2, __desired=0.5,
__order=std::memory_order::seq_cst) at
/home/jwakely/gcc/14/include/c++/14.0.1/bits/atomic_base.h:1387
1387return compare_exchange_weak(__expected, __desired, __order,
(gdb) step
std::__atomic_float::compare_exchange_weak
(this=0x7fffd8c0, __expected=@0x7fffd8a0: 2, __desired=0.5,
__success=std::memory_order::seq_cst,
__failure=std::memory_order::seq_cst) at
/home/jwakely/gcc/14/include/c++/14.0.1/bits/atomic_base.h:1347
1347return __atomic_impl::compare_exchange_weak(&_M_fp,
(gdb) step
std::__atomic_impl::compare_exchange_weak
(__check_padding=false, __failure=std::memory_order::seq_cst,
__success=std::memory_order::seq_cst, __desired=0.5,
__expected=@0x7fffd8a0: 2, __ptr=0x7fffd8c0)
at /home/jwakely/gcc/14/include/c++/14.0.1/bits/atomic_base.h:1123
1123return __atomic_impl::__compare_exchange<_AtomicRef>(
(gdb)
std::__atomic_impl::__compare_exchange
(__f=std::memory_order::seq_cst, __s=std::memory_order::seq_cst,
__is_weak=true,
__i=, __e=@0x7fffd8a0: 2,
__val=@0x7fffd8c0: 2) at
/home/jwakely/gcc/14/include/c++/14.0.1/bits/atomic_base.h:994
994 __glibcxx_assert(__is_valid_cmpexch_failure_order(__f));
(gdb) n
997 _Tp* const __pval = std::__addressof(__val);
(gdb)
1008_Vp* const __pi = __atomic_impl::__clear_padding(__i);
(gdb)
1010_Vp __exp = __e;
(gdb)
1012_Vp* const __pexp = __atomic_impl::__clear_padding(__exp);
(gdb)
1016if (__atomic_compare_exchange(__pval, __pexp, __pi,
(gdb) p/x *__pval
$12 = 0x40008000
(gdb) p/x *__pexp
$13 = 0x40008000
(gdb) p/x *__pi
$14 = 0x3ffe8000
(gdb) n
1018  return true;
(gdb) p/x *__pval
$15 = 0x77bf3ffe8000
(gdb)

We stored *__pi which has zero padding, but the result in *__pval has
non-zero padding. This doesn't seem to be gdb being misleading by
loading *__pval into a FP register which doesn't preserve the zero
padding, because if I do this then it fails:

  as.fetch_add(t);
  VERIFY(as.load() == s);
  __builtin_clear_padding();
  VERIFY( std::memcmp(, , sizeof(s)) == 0 );

So the value stored by fetch_add (which uses compare_exchange_weak in
a loop) really doesn't have

[PATCH] libsanitizer: Intercept __makecontext_v2 on Solaris/SPARC [PR113785]

2024-02-16 Thread Rainer Orth

c-c++-common/asan/swapcontext-test-1.c FAILs on Solaris/SPARC:

FAIL: c-c++-common/asan/swapcontext-test-1.c   -O0  execution test
FAIL: c-c++-common/asan/swapcontext-test-1.c   -O1  execution test
FAIL: c-c++-common/asan/swapcontext-test-1.c   -O2  execution test
FAIL: c-c++-common/asan/swapcontext-test-1.c   -O2 -flto  execution test
FAIL: c-c++-common/asan/swapcontext-test-1.c   -O2 -flto -flto-partition=none  
execution test
FAIL: c-c++-common/asan/swapcontext-test-1.c   -O3 -fomit-frame-pointer 
-funroll-loops -fpeel-loops -ftracer -finline-functions  execution test
FAIL: c-c++-common/asan/swapcontext-test-1.c   -O3 -g  execution test
FAIL: c-c++-common/asan/swapcontext-test-1.c   -Os  execution test

As detailed in PR sanitizer/113785, this happens because an ABI change
in Solaris 10/SPARC caused the external symbol for makecontext to be
changed to __makecontext_v2, which isn't intercepted.

The following patch, submitted upstream at
https://github.com/llvm/llvm-project/pull/81588, fixes that.

Tested on sparc-sun-solaris2.11 and i386-pc-solaris2.11.

Ok to cherry-pick into trunk?

Rainer

-- 
-
Rainer Orth, Center for Biotechnology, Bielefeld University


2024-02-16  Rainer Orth  

libsanitizer:
PR sanitizer/113785
* sanitizer_common/asan/asan_interceptors.cpp: Cherry-pick
llvm-project revision 8c2033719a843a1880427a5e8caa5563248bce78.

# HG changeset patch
# Parent  2fb800df7e0fd2d03a485601ad4683a29f78f2a4
libsanitizer: Intercept __makecontext_v2 on Solaris/SPARC [PR113785]

diff --git a/libsanitizer/asan/asan_interceptors.cpp b/libsanitizer/asan/asan_interceptors.cpp
--- a/libsanitizer/asan/asan_interceptors.cpp
+++ b/libsanitizer/asan/asan_interceptors.cpp
@@ -347,8 +347,16 @@ static void ClearShadowMemoryForContextS
   PoisonShadow(bottom, ssize, 0);
 }
 
+// Since Solaris 10/SPARC, ucp->uc_stack.ss_sp refers to the stack base address
+// as on other targets.  For binary compatibility, the new version uses a
+// different external name, so we intercept that.
+#if SANITIZER_SOLARIS && defined(__sparc__)
+INTERCEPTOR(void, __makecontext_v2, struct ucontext_t *ucp, void (*func)(),
+int argc, ...) {
+#else
 INTERCEPTOR(void, makecontext, struct ucontext_t *ucp, void (*func)(), int argc,
 ...) {
+#endif
   va_list ap;
   uptr args[64];
   // We don't know a better way to forward ... into REAL function. We can
@@ -368,7 +376,11 @@ INTERCEPTOR(void, makecontext, struct uc
   ENUMERATE_ARRAY_16(0), ENUMERATE_ARRAY_16(16), ENUMERATE_ARRAY_16(32), \
   ENUMERATE_ARRAY_16(48)
 
+#if SANITIZER_SOLARIS && defined(__sparc__)
+  REAL(__makecontext_v2)
+#else
   REAL(makecontext)
+#endif
   ((struct ucontext_t *)ucp, func, argc, ENUMERATE_ARRAY_64());
 
 #undef ENUMERATE_ARRAY_4
@@ -783,7 +795,12 @@ void InitializeAsanInterceptors() {
 
 #  if ASAN_INTERCEPT_SWAPCONTEXT
   ASAN_INTERCEPT_FUNC(swapcontext);
+  // See the makecontext interceptor above for an explanation.
+#if SANITIZER_SOLARIS && defined(__sparc__)
+  ASAN_INTERCEPT_FUNC(__makecontext_v2);
+#else
   ASAN_INTERCEPT_FUNC(makecontext);
+#endif
 #  endif
 #  if ASAN_INTERCEPT__LONGJMP
   ASAN_INTERCEPT_FUNC(_longjmp);

Re: GCN RDNA2+ vs. GCC SLP vectorizer

2024-02-16 Thread Richard Biener

On Fri, 16 Feb 2024, Andrew Stubbs wrote:

> On 16/02/2024 10:17, Richard Biener wrote:
> > On Fri, 16 Feb 2024, Thomas Schwinge wrote:
> > 
> >> Hi!
> >>
> >> On 2023-10-20T12:51:03+0100, Andrew Stubbs  wrote:
> >>> I've committed this patch
> >>
> >> ... as commit c7ec7bd1c6590cf4eed267feab490288e0b8d691
> >> "amdgcn: add -march=gfx1030 EXPERIMENTAL", which the later RDNA3/gfx1100
> >> support builds on top of, and that's what I'm currently working on
> >> getting proper GCC/GCN target (not offloading) results for.
> >>
> >> Now looking at 'gcc.dg/vect/bb-slp-cond-1.c', which is reasonably simple,
> >> and hopefully representative for other SLP execution test FAILs
> >> (regressions compared to my earlier non-gfx1100 testing).
> >>
> >>  $ build-gcc/gcc/xgcc -Bbuild-gcc/gcc/
> >>  source-gcc/gcc/testsuite/gcc.dg/vect/bb-slp-cond-1.c
> >>  --sysroot=install/amdgcn-amdhsa -ftree-vectorize
> >>  -fno-tree-loop-distribute-patterns -fno-vect-cost-model -fno-common
> >>  -O2 -fdump-tree-slp-details -fdump-tree-vect-details -isystem
> >>  build-gcc/amdgcn-amdhsa/gfx1100/newlib/targ-include -isystem
> >>  source-gcc/newlib/libc/include
> >>  -Bbuild-gcc/amdgcn-amdhsa/gfx1100/newlib/
> >>  -Lbuild-gcc/amdgcn-amdhsa/gfx1100/newlib -wrapper
> >>  setarch,--addr-no-randomize -fdump-tree-all-all -fdump-ipa-all-all
> >>  -fdump-rtl-all-all -save-temps -march=gfx1100
> >>
> >> The '-march=gfx1030' 'a-bb-slp-cond-1.s' is identical (apart from
> >> 'TARGET_PACKED_WORK_ITEMS' in 'gcn_target_asm_function_prologue'), so I
> >> suppose will also exhibit the same failure mode, once again?
> >>
> >> Compared to '-march=gfx90a', the differences begin in
> >> 'a-bb-slp-cond-1.c.266r.expand' (only!), down to 'a-bb-slp-cond-1.s'.
> >>
> >> Changed like:
> >>
> >>  @@ -38,10 +38,10 @@ int main ()
> >>   #pragma GCC novector
> >> for (i = 1; i < N; i++)
> >>   if (a[i] != i%4 + 1)
> >>  -  abort ();
> >>  +  __builtin_printf("%d %d != %d\n", i, a[i], i%4 + 1);
> >>   
> >> if (a[0] != 5)
> >>  -abort ();
> >>  +__builtin_printf("%d %d != %d\n", 0, a[0], 5);
> >>
> >> ..., we see:
> >>
> >>  $ flock /tmp/gcn.lock build-gcc/gcc/gcn-run a.out
> >>  40 5 != 1
> >>  41 6 != 2
> >>  42 7 != 3
> >>  43 8 != 4
> >>  44 5 != 1
> >>  45 6 != 2
> >>  46 7 != 3
> >>  47 8 != 4
> >>
> >> '40..47' are the 'i = 10..11' in 'foo', and the expectation is
> >> 'a[i * stride + 0..3] != 0'.  So, either some earlier iteration has
> >> scribbled zero values over these (vector lane masking issue, perhaps?),
> >> or some other code generation issue?
> > 
> > So we're indeed BB vectorizing this to
> > 
> >_54 = MEM  [(int *)_14];
> >vect_iftmp.12_56 = .VCOND (_54, { 0, 0, 0, 0 }, { 1, 2, 3, 4 }, { 5, 6,
> > 7, 8 }, 115);
> >MEM  [(int *)_14] = vect_iftmp.12_56;
> > 
> > I don't understand the assembly very well but it might be that
> > the mask computation for the .VCOND scribbles the mask used
> > to constrain operation to 4 lanes?
> > 
> > .L3:
> >  s_mov_b64   exec, 15
> >  v_add_co_u32v4, s[22:23], s32, v3
> >  v_mov_b32   v5, s33
> >  v_add_co_ci_u32 v5, s[22:23], 0, v5, s[22:23]
> >  flat_load_dword v7, v[4:5] offset:0
> >  s_waitcnt   0
> >  flat_load_dword v0, v[10:11] offset:0
> >  s_waitcnt   0
> >  flat_load_dword v6, v[8:9] offset:0
> >  s_waitcnt   0
> >  v_cmp_ne_u32s[18:19], v7, 0
> >  v_cndmask_b32   v0, v6, v0, s[18:19]
> >  flat_store_dwordv[4:5], v0 offset:0
> >  s_add_i32   s12, s12, 1
> >  s_add_u32   s32, s32, s28
> >  s_addc_u32  s33, s33, s29
> >  s_cmp_lg_u32s12, s13
> >  s_cbranch_scc1  .L3
> 
> This basic block has EXEC set to 15 (4 lanes) throughout. The mask for the
> VCOND a.k.a. v_vndmask_b32 is in s[18:19]. Those things seem OK.
> 
> I see the testcase avoids vec_extract V64SI to V4SI for gfx1100, even though
> it would be a no-op conversion, because the general case requires a permute
> instruction and named pattern insns can't have non-constant conditions. Is
> vec_extract allowed to FAIL? That might give a better result in this case.
> 
> However, I must be doing something different because vect/bb-slp-cond-1.c
> passes for me, on gfx1100.

I didn't try to run it - when doing make check-gcc fails to using
gcn-run for test invocation, what's the trick to make it do that?

Richard.

[PATCH v2] c++/modules: Support lambdas attached to more places in modules [PR111710]

2024-02-16 Thread Nathaniel Shead

On Tue, Feb 13, 2024 at 07:52:01PM -0500, Jason Merrill wrote:
> On 2/10/24 17:57, Nathaniel Shead wrote:
> > The fix for PR107398 weakened the restrictions that lambdas must belong
> > to namespace scope. However this was not sufficient: we also need to
> > allow lambdas keyed to FIELD_DECLs or PARM_DECLs.
> 
> I wonder about keying such lambdas to the class and function, respectively,
> rather than specifically to the field or parameter, but I suppose it doesn't
> matter.

I did some more testing and realised my testcase didn't properly
exercise whether I'd properly deduplicated or not, and an improved
testcase proved that actually keying to the field rather than the class
did cause issues. (Parameter vs. function doesn't seem to have mattered
however.)

Here's an updated patch that fixes this, and includes the changes for
lambdas in base classes that I'd had as a separate patch earlier. I've
also added some concepts testcases just in case.

Bootstrapped and regtested on x86_64-pc-linux-gnu, OK for trunk?

-- >8 --

The fix for PR107398 weakened the restrictions that lambdas must belong
to namespace scope. However this was not sufficient: we also need to
allow lambdas attached to FIELD_DECLs, PARM_DECLs, and TYPE_DECLs.

For field decls we key the lambda to its class rather than the field
itself. This avoids some errors with deduplicating fields.

Additionally, by [basic.link] p15.2 a lambda defined anywhere in a
class-specifier should not be TU-local, which includes base-class
declarations, so ensure that lambdas declared there are keyed
appropriately as well.

Because this now requires 'DECL_MODULE_KEYED_DECLS_P' to be checked on a
fairly large number of different kinds of DECLs, and that in general
it's safe to just get 'false' as a result of a check on an unexpected
DECL type, this patch also removes the tree checking from the accessor.

Finally, to handle deduplicating templated lambda fields, we need to
ensure that we can determine that two lambdas from different field decls
match. The modules code does not attempt to deduplicate expression
nodes, which causes issues as the LAMBDA_EXPRs are then considered to be
different. However, rather than checking the LAMBDA_EXPR directly we can
instead check its type: the generated RECORD_TYPE for a LAMBDA_EXPR must
also be unique, and /is/ deduplicated on import, so we can just check
for that instead.

PR c++/111710

gcc/cp/ChangeLog:

* cp-tree.h (DECL_MODULE_KEYED_DECLS_P): Remove tree checking.
(struct lang_decl_base): Update comments and fix whitespace.
* module.cc (trees_out::lang_decl_bools): Always write
module_keyed_decls_p flag...
(trees_in::lang_decl_bools): ...and always read it.
(trees_out::decl_value): Handle all kinds of keyed decls.
(trees_in::decl_value): Likewise.
(maybe_key_decl): Also support lambdas attached to fields,
parameters, and types. Key lambdas attached to fields to their
class.
(trees_out::get_merge_kind): Likewise.
(trees_out::key_mergeable): Likewise.
(trees_in::key_mergeable): Support keyed decls in a TYPE_DECL
container.
* parser.cc (cp_parser_class_head): Start a lambda scope when
parsing base classes.
* tree.cc (cp_tree_equal): Check equality of the types of
LAMBDA_EXPRs instead of the exprs themselves.

gcc/testsuite/ChangeLog:

* g++.dg/modules/lambda-7.h: New test.
* g++.dg/modules/lambda-7_a.H: New test.
* g++.dg/modules/lambda-7_b.C: New test.
* g++.dg/modules/lambda-7_c.C: New test.

Signed-off-by: Nathaniel Shead 
---
 gcc/cp/cp-tree.h  | 26 +++
 gcc/cp/module.cc  | 94 +--
 gcc/cp/parser.cc  | 10 ++-
 gcc/cp/tree.cc|  4 +-
 gcc/testsuite/g++.dg/modules/lambda-7.h   | 42 ++
 gcc/testsuite/g++.dg/modules/lambda-7_a.H |  4 +
 gcc/testsuite/g++.dg/modules/lambda-7_b.C |  5 ++
 gcc/testsuite/g++.dg/modules/lambda-7_c.C | 41 ++
 8 files changed, 169 insertions(+), 57 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/modules/lambda-7.h
 create mode 100644 gcc/testsuite/g++.dg/modules/lambda-7_a.H
 create mode 100644 gcc/testsuite/g++.dg/modules/lambda-7_b.C
 create mode 100644 gcc/testsuite/g++.dg/modules/lambda-7_c.C

diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h
index 334c11396c2..04c3aa6cd91 100644
--- a/gcc/cp/cp-tree.h
+++ b/gcc/cp/cp-tree.h
@@ -1773,9 +1773,8 @@ check_constraint_info (tree t)
   (DECL_LANG_SPECIFIC (DECL_MODULE_CHECK (NODE))->u.base.module_entity_p)

 /* DECL that has attached decls for ODR-relatedness.  */
-#define DECL_MODULE_KEYED_DECLS_P(NODE)\
-  (DECL_LANG_SPECIFIC (TREE_CHECK2(NODE,FUNCTION_DECL,VAR_DECL))\
-   ->u.base.module_keyed_decls_p)
+#define DECL_MODULE_KEYED_DECLS_P(NODE) \
+  (DECL_LANG_SPECIFIC (DECL_MODULE_CHECK

[PATCH] tree-optimization/113895 - consistency check fails in copy_reference_ops_from_ref

2024-02-16 Thread Richard Biener

The following addresses consistency check fails in copy_reference_ops_from_ref
when we are handling out-of-bound array accesses (it's almost impossible
to identically mimic the get_ref_base_and_extent behavior).  It also
addresses the case where an out-of-bound constant offset computes to a
-1 off which is the special value for "unknown".  This patch basically
turns off verification in those cases.

Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.

PR tree-optimization/113895
* tree-ssa-sccvn.cc (copy_reference_ops_from_ref): Disable
consistency checking when there are out-of-bound array
accesses.  Allow -1 off when from an array reference with
constant index.

* gcc.dg/torture/pr113895-2.c: New testcase.
* gcc.dg/torture/pr113895-3.c: Likewise.
* gcc.dg/torture/pr113895-4.c: Likewise.
---
 gcc/testsuite/gcc.dg/torture/pr113895-2.c | 13 ++
 gcc/testsuite/gcc.dg/torture/pr113895-3.c | 10 
 gcc/testsuite/gcc.dg/torture/pr113895-4.c | 14 ++
 gcc/tree-ssa-sccvn.cc | 31 +--
 4 files changed, 66 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/torture/pr113895-2.c
 create mode 100644 gcc/testsuite/gcc.dg/torture/pr113895-3.c
 create mode 100644 gcc/testsuite/gcc.dg/torture/pr113895-4.c

diff --git a/gcc/testsuite/gcc.dg/torture/pr113895-2.c 
b/gcc/testsuite/gcc.dg/torture/pr113895-2.c
new file mode 100644
index 000..a1c20250c99
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/torture/pr113895-2.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+
+extern void d(int);
+int a[2][4], b;
+int main() {
+  while (b) {
+int c;
+d(a[b][c]);
+for (c = 0; c < 7; c++)
+  ;
+  }
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/torture/pr113895-3.c 
b/gcc/testsuite/gcc.dg/torture/pr113895-3.c
new file mode 100644
index 000..255975f3353
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/torture/pr113895-3.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+
+extern void f();
+char a[1][1], b;
+int main() {
+  int c = -1U;
+  if (b)
+f(a[c][b]);
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/torture/pr113895-4.c 
b/gcc/testsuite/gcc.dg/torture/pr113895-4.c
new file mode 100644
index 000..75f71dcb451
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/torture/pr113895-4.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+
+long a, b, c;
+int d;
+long e[2][1];
+int f() {
+  if (c == a)
+c = b;
+}
+void g() {
+  int h, i = 0;
+  for (; f() + d + i; i++)
+e[h][i] = 4;
+}
diff --git a/gcc/tree-ssa-sccvn.cc b/gcc/tree-ssa-sccvn.cc
index 707a62e94a3..e737c8c6a69 100644
--- a/gcc/tree-ssa-sccvn.cc
+++ b/gcc/tree-ssa-sccvn.cc
@@ -1107,9 +1107,29 @@ copy_reference_ops_from_ref (tree ref, 
vec *result)
 the vn_reference ops differ by adjusting those indexes to
 appropriate constants.  */
   poly_int64 off = 0;
+  bool oob_index = false;
   for (unsigned i = result->length (); i > start; --i)
{
  auto  = (*result)[i-1];
+ if (flag_checking
+ && op.opcode == ARRAY_REF
+ && TREE_CODE (op.op0) == INTEGER_CST)
+   {
+ /* The verifier below chokes on inconsistencies of handling
+out-of-bound accesses so disable it in that case.  */
+ tree atype = (*result)[i].type;
+ if (TREE_CODE (atype) == ARRAY_TYPE)
+   if (tree dom = TYPE_DOMAIN (atype))
+ if ((TYPE_MIN_VALUE (dom)
+  && TREE_CODE (TYPE_MIN_VALUE (dom)) == INTEGER_CST
+  && (wi::to_widest (op.op0)
+  < wi::to_widest (TYPE_MIN_VALUE (dom
+ || (TYPE_MAX_VALUE (dom)
+ && TREE_CODE (TYPE_MAX_VALUE (dom)) == INTEGER_CST
+ && (wi::to_widest (op.op0)
+ > wi::to_widest (TYPE_MAX_VALUE (dom)
+   oob_index = true;
+   }
  if ((op.opcode == ARRAY_REF
   || op.opcode == ARRAY_RANGE_REF)
  && TREE_CODE (op.op0) == SSA_NAME)
@@ -1162,12 +1182,19 @@ copy_reference_ops_from_ref (tree ref, 
vec *result)
}
  else
{
- gcc_assert (known_ne (op.off, -1));
+ gcc_assert (known_ne (op.off, -1)
+ /* Out-of-bound indices can compute to
+a known -1 offset.  */
+ || ((op.opcode == ARRAY_REF
+  || op.opcode == ARRAY_RANGE_REF)
+ && poly_int_tree_p (op.op0)
+ && poly_int_tree_p (op.op1)
+ && TREE_CODE (op.op2) == INTEGER_CST));
  off += op.off * BITS_PER_UNIT;
}
}
}
-  if (flag_checking)
+  if (flag_checking && !oob_index)

Re: [PATCH] Turn on LRA on all targets

2024-02-16 Thread Maciej W. Rozycki

On Thu, 15 Feb 2024, Paul Koning wrote:

> > On May 15, 2023, at 5:09 PM, Maciej W. Rozycki  wrote:
> > 
> > ...
> > 
> > I may choose to implement a non-DWARF unwinder instead, as the VAX stack 
> > frame is always fully described by the hardware and there is never ever a 
> > need for debug information to be able to decode any VAX stack frame (the 
> > RET machine instruction uses the stack frame information to restore the 
> > previous PC, FP, SP, AP and any static registers saved by CALLS).
> 
> That would make sense; it's like the heuristic unwinder found in some 
> other targets (I remember the MIPS one, which worked fairly well and 
> allowed debugging without useable debug data).

 Not really, in particular because EH unwinding has to be reliable and 
heuristics inherently is not.

 The MIPS heuristic unwinder continues living in GDB; I have extended it
to the microMIPS ISA at one point.  It has a major flaw though: the MIPS 
psABI uses a variable frame layout, with the frame maintained solely by 
software and with no fixed hardware frame pointer, so to analyse it in the 
absence of debug information the instruction sequence of the function's in 
question prologue has to be decoded to discover the location of individual 
frame slots.

 Consequently the more aggressive the compiler has become to schedule
function body instructions within a function's prologue the more lost the 
machine code interpreter has become.  Ultimately it would have to become a 
full-fledged CPU simulator to do its heuristics.  In reality it means the 
unwinder may fail to produce acceptable results, which will happen at any 
frequency between hardly ever to most often, depending on the exact 
circumstances.

 A mixed approach by interpreting lightweight PDR (Procedure Description 
Record) information inherited from the ECOFF Mdebug format combined with 
function prologue scanning might be more reliable, because in that case 
frame slot positions are known and the only unknown is the code locations 
they are initialised each at.  So all the prologue scanner has to know it 
is the store machine instructions and any that set a frame pointer from 
the stack pointer.  All the other instructions can be simply ignored.  And 
then only in the innermost frame, because any outer frames must have been 
fully set up already.  But I never got to implementing it and remnants of 
the PDR unwinder have long been removed from GDB.

 Conversely no heuristics is required to unwind VAX frames, because they 
are fixed in layout by hardware, fully self-described, and with the 
hardware frame pointer always available.  Therefore to unwind a VAX frame 
steps similar to those made by hardware on a function return (machine RET 
instruction) can simply be recreated from information produced by hardware 
at the function call and recorded in the stack frame and registers.  There 
is room reserved in the stack frame for a pointer to an exception handler 
even ("condition handler" in VAX-speak), preset to zero (a null pointer) 
by hardware at function entry.

 It does seem really attractive (and saves some storage space, which VAX 
hardware users will likely appreciate), but implies dedicated libgcc code 
as opposed to reusing common bits, which may or may not be welcome by the 
community for such an exotic corner case target.

  Maciej

Re: GCN RDNA2+ vs. GCC SLP vectorizer

2024-02-16 Thread Andrew Stubbs


On 16/02/2024 10:17, Richard Biener wrote:

On Fri, 16 Feb 2024, Thomas Schwinge wrote:


Hi!

On 2023-10-20T12:51:03+0100, Andrew Stubbs  wrote:

I've committed this patch


... as commit c7ec7bd1c6590cf4eed267feab490288e0b8d691
"amdgcn: add -march=gfx1030 EXPERIMENTAL", which the later RDNA3/gfx1100
support builds on top of, and that's what I'm currently working on
getting proper GCC/GCN target (not offloading) results for.

Now looking at 'gcc.dg/vect/bb-slp-cond-1.c', which is reasonably simple,
and hopefully representative for other SLP execution test FAILs
(regressions compared to my earlier non-gfx1100 testing).

 $ build-gcc/gcc/xgcc -Bbuild-gcc/gcc/ 
source-gcc/gcc/testsuite/gcc.dg/vect/bb-slp-cond-1.c 
--sysroot=install/amdgcn-amdhsa -ftree-vectorize 
-fno-tree-loop-distribute-patterns -fno-vect-cost-model -fno-common -O2 
-fdump-tree-slp-details -fdump-tree-vect-details -isystem 
build-gcc/amdgcn-amdhsa/gfx1100/newlib/targ-include -isystem 
source-gcc/newlib/libc/include -Bbuild-gcc/amdgcn-amdhsa/gfx1100/newlib/ 
-Lbuild-gcc/amdgcn-amdhsa/gfx1100/newlib -wrapper setarch,--addr-no-randomize 
-fdump-tree-all-all -fdump-ipa-all-all -fdump-rtl-all-all -save-temps 
-march=gfx1100

The '-march=gfx1030' 'a-bb-slp-cond-1.s' is identical (apart from
'TARGET_PACKED_WORK_ITEMS' in 'gcn_target_asm_function_prologue'), so I
suppose will also exhibit the same failure mode, once again?

Compared to '-march=gfx90a', the differences begin in
'a-bb-slp-cond-1.c.266r.expand' (only!), down to 'a-bb-slp-cond-1.s'.

Changed like:

 @@ -38,10 +38,10 @@ int main ()
  #pragma GCC novector
for (i = 1; i < N; i++)
  if (a[i] != i%4 + 1)
 -  abort ();
 +  __builtin_printf("%d %d != %d\n", i, a[i], i%4 + 1);
  
if (a[0] != 5)

 -abort ();
 +__builtin_printf("%d %d != %d\n", 0, a[0], 5);

..., we see:

 $ flock /tmp/gcn.lock build-gcc/gcc/gcn-run a.out
 40 5 != 1
 41 6 != 2
 42 7 != 3
 43 8 != 4
 44 5 != 1
 45 6 != 2
 46 7 != 3
 47 8 != 4

'40..47' are the 'i = 10..11' in 'foo', and the expectation is
'a[i * stride + 0..3] != 0'.  So, either some earlier iteration has
scribbled zero values over these (vector lane masking issue, perhaps?),
or some other code generation issue?


So we're indeed BB vectorizing this to

   _54 = MEM  [(int *)_14];
   vect_iftmp.12_56 = .VCOND (_54, { 0, 0, 0, 0 }, { 1, 2, 3, 4 }, { 5, 6,
7, 8 }, 115);
   MEM  [(int *)_14] = vect_iftmp.12_56;

I don't understand the assembly very well but it might be that
the mask computation for the .VCOND scribbles the mask used
to constrain operation to 4 lanes?

.L3:
 s_mov_b64   exec, 15
 v_add_co_u32v4, s[22:23], s32, v3
 v_mov_b32   v5, s33
 v_add_co_ci_u32 v5, s[22:23], 0, v5, s[22:23]
 flat_load_dword v7, v[4:5] offset:0
 s_waitcnt   0
 flat_load_dword v0, v[10:11] offset:0
 s_waitcnt   0
 flat_load_dword v6, v[8:9] offset:0
 s_waitcnt   0
 v_cmp_ne_u32s[18:19], v7, 0
 v_cndmask_b32   v0, v6, v0, s[18:19]
 flat_store_dwordv[4:5], v0 offset:0
 s_add_i32   s12, s12, 1
 s_add_u32   s32, s32, s28
 s_addc_u32  s33, s33, s29
 s_cmp_lg_u32s12, s13
 s_cbranch_scc1  .L3


This basic block has EXEC set to 15 (4 lanes) throughout. The mask for 
the VCOND a.k.a. v_vndmask_b32 is in s[18:19]. Those things seem OK.


I see the testcase avoids vec_extract V64SI to V4SI for gfx1100, even 
though it would be a no-op conversion, because the general case requires 
a permute instruction and named pattern insns can't have non-constant 
conditions. Is vec_extract allowed to FAIL? That might give a better 
result in this case.


However, I must be doing something different because 
vect/bb-slp-cond-1.c passes for me, on gfx1100.


Andrew

[committed] libstdc++: Fix FAIL: 26_numerics/random/pr60037-neg.cc [PR113931]

2024-02-16 Thread Jonathan Wakely

Tested x86_64-linux, pushed to trunk.

-- >8 --

PR libstdc++/87744
PR libstdc++/113931

libstdc++-v3/ChangeLog:

* testsuite/26_numerics/random/pr60037-neg.cc: Adjust dg-error
line number.
---
 libstdc++-v3/testsuite/26_numerics/random/pr60037-neg.cc | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libstdc++-v3/testsuite/26_numerics/random/pr60037-neg.cc 
b/libstdc++-v3/testsuite/26_numerics/random/pr60037-neg.cc
index 4c24e56cea2..9d6925fb416 100644
--- a/libstdc++-v3/testsuite/26_numerics/random/pr60037-neg.cc
+++ b/libstdc++-v3/testsuite/26_numerics/random/pr60037-neg.cc
@@ -10,6 +10,6 @@ std::__detail::_Adaptor 
aurng(urng);
 auto x = std::generate_canonical::digits>(urng);
 
-// { dg-error "static assertion failed: template argument must be a floating 
point type" "" { target *-*-* } 271 }
+// { dg-error "static assertion failed: template argument must be a floating 
point type" "" { target *-*-* } 270 }
 
-// { dg-error "static assertion failed: template argument must be a floating 
point type" "" { target *-*-* } 3351 }
+// { dg-error "static assertion failed: template argument must be a floating 
point type" "" { target *-*-* } 3350 }
-- 
2.43.0

[committed] libstdc++: Improve docs for debug mode backtraces

2024-02-16 Thread Jonathan Wakely

Pushed to trunk.

-- >8 --

The configure option is no longer necessary.

libstdc++-v3/ChangeLog:

* doc/xml/manual/debug_mode.xml: Update docs for backtraces.
* doc/html/manual/debug_mode_using.html: Regenerate.
---
 libstdc++-v3/doc/html/manual/debug_mode_using.html | 9 -
 libstdc++-v3/doc/xml/manual/debug_mode.xml | 9 -
 2 files changed, 8 insertions(+), 10 deletions(-)

diff --git a/libstdc++-v3/doc/html/manual/debug_mode_using.html 
b/libstdc++-v3/doc/html/manual/debug_mode_using.html
index e26d06c9179..70d6692f514 100644
--- a/libstdc++-v3/doc/html/manual/debug_mode_using.html
+++ b/libstdc++-v3/doc/html/manual/debug_mode_using.html
@@ -10,11 +10,10 @@
   78 characters.  The environment variable
   GLIBCXX_DEBUG_MESSAGE_LENGTH can be used to 
request a
   different length.Note that libstdc++ is able to produce backtraces on 
error.
-  It requires that you configure libstdc++ build with
-  --enable-libstdcxx-backtrace=yes.
-  Use -D_GLIBCXX_DEBUG_BACKTRACE to activate it.
-  You'll then have to link with libstdc++_libbacktrace static library
-  (-lstdc++_libbacktrace) to build your 
application.Using a Specific Debug 
ContainerWhen it is not feasible to recompile your 
entire application, or
+  To enable these, compile with -D_GLIBCXX_DEBUG_BACKTRACE
+  and then link with -lstdc++exp.
+  These backtraces are not supported on all platforms.
+Using a Specific Debug 
ContainerWhen it is not feasible to recompile your 
entire application, or
   only specific containers need checking, debugging containers are
   available as GNU extensions. These debugging containers are
   functionally equivalent to the standard drop-in containers used in
diff --git a/libstdc++-v3/doc/xml/manual/debug_mode.xml 
b/libstdc++-v3/doc/xml/manual/debug_mode.xml
index ac15ef6f6d0..a12158fa03e 100644
--- a/libstdc++-v3/doc/xml/manual/debug_mode.xml
+++ b/libstdc++-v3/doc/xml/manual/debug_mode.xml
@@ -162,11 +162,10 @@ which always works correctly.
   different length.
 
 Note that libstdc++ is able to produce backtraces on error.
-  It requires that you configure libstdc++ build with
-  --enable-libstdcxx-backtrace=yes.
-  Use -D_GLIBCXX_DEBUG_BACKTRACE to activate it.
-  You'll then have to link against libstdc++exp static library
-  (-lstdc++exp) to build your application.
+  To enable these, compile with -D_GLIBCXX_DEBUG_BACKTRACE
+  and then link with -lstdc++exp.
+  These backtraces are not supported on all platforms.
+
 
 
 Using a Specific Debug Container
-- 
2.43.0

[committed] libstdc++: Fix spelling of elements in manual

2024-02-16 Thread Jonathan Wakely

Pushed to trunk.

-- >8 --

libstdc++-v3/ChangeLog:

* doc/xml/manual/test.xml: Fix spelling of  elements.
* doc/html/manual/test.html: Regenerate.
---
 libstdc++-v3/doc/html/manual/test.html | 4 ++--
 libstdc++-v3/doc/xml/manual/test.xml   | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/libstdc++-v3/doc/html/manual/test.html 
b/libstdc++-v3/doc/html/manual/test.html
index 4b7f60c9bad..3657997fad4 100644
--- a/libstdc++-v3/doc/html/manual/test.html
+++ b/libstdc++-v3/doc/html/manual/test.html
@@ -356,7 +356,7 @@ cat 27_io/objects/char/3_xin.in | a.outDEJAGNU environment variable will cause every 
test to
   be run three times, using a different -std 
each time.
   Alternatively, a list of standard versions to test with can be specified
-  as a comma-separated list in the envvarGLIBCXX_TESTSUITE_STDS/envvar
+  as a comma-separated list in the GLIBCXX_TESTSUITE_STDS
   environment variable.
 
   To run the libstdc++ test suite under the
@@ -478,7 +478,7 @@ cat 27_io/objects/char/3_xin.in | a.out-std=gnu++11
 or -std=c++11 is explicitly specified, either 
via a
 target board, the v3_std_list dejagnu 
variable,
-or the envvarGLIBCXX_TESTSUITE_STDS/envvar environment 
variable.
+or the GLIBCXX_TESTSUITE_STDS environment 
variable.
 For tests that require a specific standard it is useful to also add a
 dg-options directive:
 // { dg-options "-std=gnu++11" }
diff --git a/libstdc++-v3/doc/xml/manual/test.xml 
b/libstdc++-v3/doc/xml/manual/test.xml
index f2c709bc6bf..40926946fe7 100644
--- a/libstdc++-v3/doc/xml/manual/test.xml
+++ b/libstdc++-v3/doc/xml/manual/test.xml
@@ -604,7 +604,7 @@ cat 27_io/objects/char/3_xin.in | a.out
   DEJAGNU environment variable will cause every test to
   be run three times, using a different -std each time.
   Alternatively, a list of standard versions to test with can be specified
-  as a comma-separated list in the GLIBCXX_TESTSUITE_STDS
+  as a comma-separated list in the GLIBCXX_TESTSUITE_STDS
   environment variable.
 
 
@@ -779,7 +779,7 @@ cat 27_io/objects/char/3_xin.in | a.out
 the test will be skipped by default unless -std=gnu++11
 or -std=c++11 is explicitly specified, either via a
 target board, the v3_std_list dejagnu variable,
-or the GLIBCXX_TESTSUITE_STDS environment variable.
+or the GLIBCXX_TESTSUITE_STDS environment variable.
 For tests that require a specific standard it is useful to also add a
 dg-options directive:
 // { dg-options "-std=gnu++11" }
-- 
2.43.0

Re: [PATCH] Turn on LRA on all targets

2024-02-16 Thread Segher Boessenkool

On Thu, Feb 15, 2024 at 08:41:42PM -0500, Paul Koning wrote:
> > On Feb 15, 2024, at 5:56 PM, Segher Boessenkool 
> >  wrote:
> > 
> > On Thu, Feb 15, 2024 at 07:34:32PM +, Sam James wrote:
> >> I have now started doing this in PR113932.
> > 
> > Thank you!
> > 
> > Segher
> 
> Presumably this isn't for version 14 since it's in a late stage, right?  I 
> have my bits about ready to go in but I'll wait for State 1 to open.  Correct?

Absolutely.  It was decided early in stage 1 that this wasn't going to
happen for 14.

It appears most of the anywhere near hard targets have not done anything
though.  I'll just push very hard for 15.  But you will be fine :-)

Segher

Re: GCN RDNA2+ vs. GCC SLP vectorizer (was: [committed] amdgcn: add -march=gfx1030 EXPERIMENTAL)

2024-02-16 Thread Richard Biener

On Fri, 16 Feb 2024, Thomas Schwinge wrote:

> Hi!
> 
> On 2023-10-20T12:51:03+0100, Andrew Stubbs  wrote:
> > I've committed this patch
> 
> ... as commit c7ec7bd1c6590cf4eed267feab490288e0b8d691
> "amdgcn: add -march=gfx1030 EXPERIMENTAL", which the later RDNA3/gfx1100
> support builds on top of, and that's what I'm currently working on
> getting proper GCC/GCN target (not offloading) results for.
> 
> Now looking at 'gcc.dg/vect/bb-slp-cond-1.c', which is reasonably simple,
> and hopefully representative for other SLP execution test FAILs
> (regressions compared to my earlier non-gfx1100 testing).
> 
> $ build-gcc/gcc/xgcc -Bbuild-gcc/gcc/ 
> source-gcc/gcc/testsuite/gcc.dg/vect/bb-slp-cond-1.c 
> --sysroot=install/amdgcn-amdhsa -ftree-vectorize 
> -fno-tree-loop-distribute-patterns -fno-vect-cost-model -fno-common -O2 
> -fdump-tree-slp-details -fdump-tree-vect-details -isystem 
> build-gcc/amdgcn-amdhsa/gfx1100/newlib/targ-include -isystem 
> source-gcc/newlib/libc/include -Bbuild-gcc/amdgcn-amdhsa/gfx1100/newlib/ 
> -Lbuild-gcc/amdgcn-amdhsa/gfx1100/newlib -wrapper setarch,--addr-no-randomize 
> -fdump-tree-all-all -fdump-ipa-all-all -fdump-rtl-all-all -save-temps 
> -march=gfx1100
> 
> The '-march=gfx1030' 'a-bb-slp-cond-1.s' is identical (apart from
> 'TARGET_PACKED_WORK_ITEMS' in 'gcn_target_asm_function_prologue'), so I
> suppose will also exhibit the same failure mode, once again?
> 
> Compared to '-march=gfx90a', the differences begin in
> 'a-bb-slp-cond-1.c.266r.expand' (only!), down to 'a-bb-slp-cond-1.s'.
> 
> Changed like:
> 
> @@ -38,10 +38,10 @@ int main ()
>  #pragma GCC novector
>for (i = 1; i < N; i++)
>  if (a[i] != i%4 + 1)
> -  abort ();
> +  __builtin_printf("%d %d != %d\n", i, a[i], i%4 + 1);
>  
>if (a[0] != 5)
> -abort ();
> +__builtin_printf("%d %d != %d\n", 0, a[0], 5);
> 
> ..., we see:
> 
> $ flock /tmp/gcn.lock build-gcc/gcc/gcn-run a.out
> 40 5 != 1
> 41 6 != 2
> 42 7 != 3
> 43 8 != 4
> 44 5 != 1
> 45 6 != 2
> 46 7 != 3
> 47 8 != 4
> 
> '40..47' are the 'i = 10..11' in 'foo', and the expectation is
> 'a[i * stride + 0..3] != 0'.  So, either some earlier iteration has
> scribbled zero values over these (vector lane masking issue, perhaps?),
> or some other code generation issue?

So we're indeed BB vectorizing this to

  _54 = MEM  [(int *)_14];
  vect_iftmp.12_56 = .VCOND (_54, { 0, 0, 0, 0 }, { 1, 2, 3, 4 }, { 5, 6, 
7, 8 }, 115);
  MEM  [(int *)_14] = vect_iftmp.12_56;

I don't understand the assembly very well but it might be that
the mask computation for the .VCOND scribbles the mask used
to constrain operation to 4 lanes?

.L3:
s_mov_b64   exec, 15
v_add_co_u32v4, s[22:23], s32, v3
v_mov_b32   v5, s33
v_add_co_ci_u32 v5, s[22:23], 0, v5, s[22:23]
flat_load_dword v7, v[4:5] offset:0
s_waitcnt   0
flat_load_dword v0, v[10:11] offset:0
s_waitcnt   0
flat_load_dword v6, v[8:9] offset:0
s_waitcnt   0
v_cmp_ne_u32s[18:19], v7, 0
v_cndmask_b32   v0, v6, v0, s[18:19]
flat_store_dwordv[4:5], v0 offset:0
s_add_i32   s12, s12, 1
s_add_u32   s32, s32, s28
s_addc_u32  s33, s33, s29
s_cmp_lg_u32s12, s13
s_cbranch_scc1  .L3

Richard.

[PATCH] testsuite: Fix up lra effective target

2024-02-16 Thread Jakub Jelinek

Hi!

Given the recent discussions on IRC started with Andrew P. mentioning that
an asm goto outputs test should have { target lra } and the lra effective
target in GCC 11/12 only returning 0 for PA and in 13/14 for PA/AVR, while
we clearly have 14 other targets which don't support LRA and a couple of
further ones which have an -mlra/-mno-lra switch (whatever default they
have), seems to me the effective target is quite broken.

The following patch rewrites it, such that it has a fast path for heavily
used targets which are for years known to use only LRA (just an
optimization) plus determines whether it is a LRA target or reload target
by scanning the -fdump-rtl-reload-details dump on an empty function,
LRA has quite a few always emitted messages in that case while reload has
none of those.

Tested on x86_64-linux and cross to s390x-linux, for the latter with both
make check-gcc RUNTESTFLAGS='--target_board=unix/-mno-lra dg.exp=pr107385.c'
where the test is now UNSUPPORTED and
make check-gcc RUNTESTFLAGS='--target_board=unix/-mlra dg.exp=pr107385.c'
where it fails because I don't have libc around.

Ok for trunk?

There is one special case, NVPTX, which is a TARGET_NO_REGISTER_ALLOCATION
target.  I think claiming for it that it is a lra target is strange (even
though it effectively returns true for targetm.lra_p ()), unsure if it
supports asm goto with outputs or not, if it does and we want to test it,
perhaps we should introduce asm_goto_outputs effective target and use
lra || nvptx-*-* for that?

2024-02-16  Jakub Jelinek  

* lib/target-supports.exp (check_effective_target_lra): Rewrite
to list some heavily used always LRA targets and otherwise check the
-fdump-rtl-reload-details dump for messages specific to LRA.

--- gcc/testsuite/lib/target-supports.exp.jj2024-02-15 09:51:34.591064180 
+0100
+++ gcc/testsuite/lib/target-supports.exp   2024-02-16 10:50:29.986180603 
+0100
@@ -13215,10 +13215,17 @@ proc check_effective_target_powerpc_as_p
 # return 1 if LRA is supported.
 
 proc check_effective_target_lra { } {
-if { [istarget hppa*-*-*] || [istarget avr-*-*] } {
-   return 0
+# Start with heavily used targets which are known to always use LRA.
+if { [istarget i?86-*-*] || [istarget x86_64-*-*]
+|| [istarget aarch64*-*-*] || [istarget arm*-*-*]
+|| [istarget powerpc*-*-*] || [istarget riscv*-*-*] } {
+   return 1
 }
-return 1
+
+# Otherwise check the reload dump for messages emitted solely by LRA.
+return [check_no_messages_and_pattern lra "\\\*{9} Local #1: \\\*{9}" 
rtl-reload {
+void foo (void) {}
+} {-O2 -fdump-rtl-reload-details}] ;# LRA notes requires a detailed dump.
 }
 
 # Test whether optimizations are enabled ('__OPTIMIZE__') per the

Jakub

[committed] RISC-V: Fix *sge_ pattern

2024-02-16 Thread Kito Cheng

*sge_ pattern has referenced operand[2] which is
invalid...it should just use `slti` rather than `slti%i2`.

gcc/ChangeLog:

PR target/106543
* config/riscv/riscv.md (*sge_): Fix asm
pattern.
---
 gcc/config/riscv/riscv.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index 39b29795cd6..3f7a023d941 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -3107,7 +3107,7 @@
(any_ge:GPR (match_operand:X 1 "register_operand" " r")
(const_int 1)))]
   ""
-  "slt%i2\t%0,zero,%1"
+  "slti\t%0,zero,%1"
   [(set_attr "type" "slt")
(set_attr "mode" "")])
 
-- 
2.34.1

GCN RDNA2+ vs. GCC SLP vectorizer (was: [committed] amdgcn: add -march=gfx1030 EXPERIMENTAL)

2024-02-16 Thread Thomas Schwinge

Hi!

On 2023-10-20T12:51:03+0100, Andrew Stubbs  wrote:
> I've committed this patch

... as commit c7ec7bd1c6590cf4eed267feab490288e0b8d691
"amdgcn: add -march=gfx1030 EXPERIMENTAL", which the later RDNA3/gfx1100
support builds on top of, and that's what I'm currently working on
getting proper GCC/GCN target (not offloading) results for.

Now looking at 'gcc.dg/vect/bb-slp-cond-1.c', which is reasonably simple,
and hopefully representative for other SLP execution test FAILs
(regressions compared to my earlier non-gfx1100 testing).

$ build-gcc/gcc/xgcc -Bbuild-gcc/gcc/ 
source-gcc/gcc/testsuite/gcc.dg/vect/bb-slp-cond-1.c 
--sysroot=install/amdgcn-amdhsa -ftree-vectorize 
-fno-tree-loop-distribute-patterns -fno-vect-cost-model -fno-common -O2 
-fdump-tree-slp-details -fdump-tree-vect-details -isystem 
build-gcc/amdgcn-amdhsa/gfx1100/newlib/targ-include -isystem 
source-gcc/newlib/libc/include -Bbuild-gcc/amdgcn-amdhsa/gfx1100/newlib/ 
-Lbuild-gcc/amdgcn-amdhsa/gfx1100/newlib -wrapper setarch,--addr-no-randomize 
-fdump-tree-all-all -fdump-ipa-all-all -fdump-rtl-all-all -save-temps 
-march=gfx1100

The '-march=gfx1030' 'a-bb-slp-cond-1.s' is identical (apart from
'TARGET_PACKED_WORK_ITEMS' in 'gcn_target_asm_function_prologue'), so I
suppose will also exhibit the same failure mode, once again?

Compared to '-march=gfx90a', the differences begin in
'a-bb-slp-cond-1.c.266r.expand' (only!), down to 'a-bb-slp-cond-1.s'.

Changed like:

@@ -38,10 +38,10 @@ int main ()
 #pragma GCC novector
   for (i = 1; i < N; i++)
 if (a[i] != i%4 + 1)
-  abort ();
+  __builtin_printf("%d %d != %d\n", i, a[i], i%4 + 1);
 
   if (a[0] != 5)
-abort ();
+__builtin_printf("%d %d != %d\n", 0, a[0], 5);

..., we see:

$ flock /tmp/gcn.lock build-gcc/gcc/gcn-run a.out
40 5 != 1
41 6 != 2
42 7 != 3
43 8 != 4
44 5 != 1
45 6 != 2
46 7 != 3
47 8 != 4

'40..47' are the 'i = 10..11' in 'foo', and the expectation is
'a[i * stride + 0..3] != 0'.  So, either some earlier iteration has
scribbled zero values over these (vector lane masking issue, perhaps?),
or some other code generation issue?


Grüße
 Thomas

Re: [PATCH] c++/modules: optimize tree flag streaming

2024-02-16 Thread Richard Biener

On Thu, Feb 15, 2024 at 7:38 PM Patrick Palka  wrote:
>
> Bootstrapped and regtested on x86_64-pc-linux-gnu, does this look
> OK for trunk?

Btw, there's the "bitpack" streaming support in data-streamer.h also
added for exactly the same reason, it's likely not easily re-usable
but this kind of approach is indeed important for performance.

The whole "data-streamer" was supposed to be re-usable though.

Richard.

> -- >8 --
>
> One would expect consecutive calls to bytes_in/out::b for streaming
> adjacent bits, as we do for tree flag streaming, to at least be
> optimized by the compiler into individual bit operations using
> statically known bit positions (and ideally merged into larger sized
> reads/writes).
>
> Unfortunately this doesn't happen because the compiler has trouble
> tracking the values of this->bit_pos and this->bit_val across such
> calls, likely because the compiler doesn't know 'this' and so it's
> treated as global memory.  This means for each consecutive bit stream
> operation, bit_pos and bit_val are loaded from memory, checked if
> buffering is needed, and finally the bit is extracted from bit_val
> according to the (unknown) bit_pos, even though relative to the previous
> operation (if we didn't need to buffer) bit_val is unchanged and bit_pos
> is just 1 larger.  This ends up being quite slow, with tree_node_bools
> taking 10% of time when streaming in parts of the std module.
>
> This patch optimizes this by making tracking of bit_pos and bit_val
> easier for the compiler.  Rather than bit_pos and bit_val being members
> of the (effectively global) bytes_in/out objects, this patch factors out
> the bit streaming code/state into separate classes bits_in/out that get
> constructed locally as needed for bit streaming.  Since these objects
> are now clearly local, the compiler can more easily track their values.
>
> And since bit streaming is intended to be batched it's natural for these
> new classes to be RAII-enabled such that the bit stream is flushed upon
> destruction.
>
> In order to make the most of this improved tracking of bit position,
> this patch changes parts where we conditionally stream a tree flag
> to unconditionally stream (the flag or a dummy value).  That way
> the number of bits streamed and the respective bit positions are as
> statically known as reasonably possible.  In lang_decl_bools and
> lang_type_bools we flush the current bit buffer at the start so that
> subsequent bit positions are statically known.  And in core bools, we
> can add explicit early exits utilizing invariants that the compiler
> can't figure out itself (e.g. a tree code can't have both TS_TYPE_COMMON
> and TS_DECL_COMMON, and if a tree code doesn't have TS_DECL_COMMON then
> it doesn't have TS_DECL_WITH_VIS).  Finally if we're streaming fewer
> than 4 bits, it's more space efficient to stream them as individual
> bytes rather than as packed bits (due to the 32-bit buffer).
>
> This patch also moves the definitions of the relevant streaming classes
> into anonymous namespaces so that the compiler can make more informed
> decisions about inlining their member functions.
>
> After this patch, compile time for a simple Hello World using the std
> module is reduced by 7% with a release compiler.  The on-disk size of
> the std module increases by 0.7% (presumably due to the extra flushing
> done in lang_decl_bools and lang_type_bools).
>
> The bit stream out performance isn't improved as much as the stream in
> due to the spans/lengths instrumentation performed on stream out (which
> probably should be e.g. removed for release builds?)
>
> gcc/cp/ChangeLog:
>
> * module.cc
> (class data): Enclose in an anonymous namespace.
> (data::calc_crc): Moved from bytes::calc_crc.
> (class bytes): Remove.  Move bit_flush to namespace scope.
> (class bytes_in): Enclose in an anonymous namespace.  Inherit
> directly from data and adjust accordingly.  Move b and bflush
> members to bits_in.
> (class bytes_out): As above.  Remove is_set static data member.
> (bit_flush): Moved from class bytes.
> (struct bits_in): Define.
> (struct bits_out): Define.
> (bytes_out::bflush): Moved to bits_out/in.
> (bytes_in::bflush): Likewise
> (bytes_in::bfill): Removed.
> (bytes_out::b): Moved to bits_out/in.
> (bytes_in::b): Likewise.
> (class trees_in): Enclose in an anonymous namespace.
> (class trees_out): Enclose in an anonymous namespace.
> (trees_out::core_bools): Add bits_out/in parameter and use it.
> Unconditionally stream a bit for public_flag.  Add early exits
> as appropriate.
> (trees_out::core_bools): Likewise.
> (trees_out::lang_decl_bools): Add bits_out/in parameter and use
> it.  Flush the current bit buffer at the start.  Unconditionally
> stream a bit for module_keyed_decls_p.
>

[PATCH] c++: Diagnose this specifier on template parameters [PR113929]

2024-02-16 Thread Jakub Jelinek

Hi!

For template parameters, the optional this specifier is in the grammar
template-parameter-list -> template-parameter -> parameter-declaration,
just [dcl.fct/6] says that it is only valid in parameter-list of certain
functions.  So, unlike the case of decl-specifier-seq used in non-terminals
other than parameter-declaration, I think it is better not to fix this
by
   cp_parser_decl_specifier_seq (parser,
-flags | CP_PARSER_FLAGS_PARAMETER,
+flags | (template_parameter_p ? 0
+ : CP_PARSER_FLAGS_PARAMETER),
 _specifiers,
 _class_or_enum);
which would be pretending it isn't in the grammar, but by diagnosing it
separately, which is what the following patch does.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2024-02-16  Jakub Jelinek  

PR c++/113929
* parser.cc (cp_parser_parameter_declaration): Diagnose this specifier
on template parameter declaration.

* g++.dg/parse/pr113929.C: New test.

--- gcc/cp/parser.cc.jj 2024-02-15 17:33:11.641453437 +0100
+++ gcc/cp/parser.cc2024-02-15 17:40:29.592447265 +0100
@@ -25724,8 +25724,15 @@ cp_parser_parameter_declaration (cp_pars
  for a C-style variadic function. */
   token = cp_lexer_peek_token (parser->lexer);
 
-  bool const xobj_param_p
+  bool xobj_param_p
 = decl_spec_seq_has_spec_p (_specifiers, ds_this);
+  if (xobj_param_p && template_parm_p)
+{
+  error_at (decl_specifiers.locations[ds_this],
+   "% specifier in template parameter declaration");
+  xobj_param_p = false;
+  decl_specifiers.locations[ds_this] = 0;
+}
 
   bool diag_xobj_parameter_pack = false;
   if (xobj_param_p && (declarator && declarator->parameter_pack_p))
--- gcc/testsuite/g++.dg/parse/pr113929.C.jj2024-02-15 17:43:18.500129688 
+0100
+++ gcc/testsuite/g++.dg/parse/pr113929.C   2024-02-15 17:42:54.564458109 
+0100
@@ -0,0 +1,7 @@
+// PR c++/113929
+// { dg-do compile }
+
+template   // { dg-error "'this' specifier in template 
parameter declaration" }
+struct S {};
+template// { dg-error "'this' specifier in template 
parameter declaration" }
+struct T {};

Jakub

[PATCH] dwarf2out: Emit DW_AT_export_symbols on anon unions/structs [PR113918]

2024-02-16 Thread Jakub Jelinek

Hi!

DWARF5 added DW_AT_export_symbols both for use on inline namespaces (where
we emit it), but also on anonymous unions/structs (and we didn't emit that
attribute there).
The following patch fixes it.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2024-02-16  Jakub Jelinek  

PR debug/113918
* dwarf2out.cc (gen_field_die): Emit DW_AT_export_symbols
on anonymous unions or structs for -gdwarf-5 or -gno-strict-dwarf.

* c-c++-common/dwarf2/pr113918.c: New test.

--- gcc/dwarf2out.cc.jj 2024-02-10 11:25:09.836476263 +0100
+++ gcc/dwarf2out.cc2024-02-15 13:38:14.485675460 +0100
@@ -25153,6 +25153,20 @@ gen_field_die (tree decl, struct vlr_con
 
   add_accessibility_attribute (decl_die, decl);
 
+  /* Add DW_AT_export_symbols to anonymous unions or structs.  */
+  if ((dwarf_version >= 5 || !dwarf_strict) && DECL_NAME (decl) == NULL_TREE)
+if (tree type = member_declared_type (decl))
+  {
+   tree type_id = TYPE_IDENTIFIER (TYPE_MAIN_VARIANT (type));
+   if (RECORD_OR_UNION_TYPE_P (type)
+   && (type_id == NULL_TREE || IDENTIFIER_ANON_P (type_id)))
+   {
+ dw_die_ref type_die = lookup_type_die (TYPE_MAIN_VARIANT (type));
+ if (type_die && get_AT (type_die, DW_AT_export_symbols) == NULL)
+   add_AT_flag (type_die, DW_AT_export_symbols, 1);
+   }
+}
+
   /* Equate decl number to die, so that we can look up this decl later on.  */
   equate_decl_number_to_die (decl, decl_die);
 }
--- gcc/testsuite/c-c++-common/dwarf2/pr113918.c.jj 2024-02-15 
13:26:53.935984554 +0100
+++ gcc/testsuite/c-c++-common/dwarf2/pr113918.c2024-02-15 
13:26:48.773055181 +0100
@@ -0,0 +1,33 @@
+/* PR debug/113918 */
+/* { dg-do compile } */
+/* { dg-options "-gdwarf-5 -dA -fno-merge-debug-strings" } */
+
+struct S {
+  union {
+int i;
+long long j;
+  };
+  struct {
+int k;
+long long l;
+  };
+  union {
+int m;
+long long n;
+  } u;
+  struct {
+int o;
+long long p;
+  } v;
+} s;
+
+int
+main ()
+{
+  s.i = 1;
+  s.k = 2;
+  s.u.m = 3;
+  s.v.o = 4;
+}
+
+/* { dg-final { scan-assembler-times "DW_AT_export_symbols" 4 } } */

Jakub

[PATCH] c++: Fix up parameter pack diagnostics on xobj vs. varargs functions [PR113802]

2024-02-16 Thread Jakub Jelinek

Hi!

The simple presence of ellipsis as next token after the parameter
declaration doesn't imply it is a parameter pack, it sometimes is, e.g.
if its type is a pack, but sometimes is not and in that case it acts
the same as if the next tokens were , ... instead of just ...
The xobj param cannot be a function parameter pack though treats both
the declarator->parameter_pack_p and token->type == CPP_ELLIPSIS as
sufficient conditions for the error.  The conditions for CPP_ELLIPSIS
are done a little bit later in the same function and complex enough that
IMHO shouldn't be repeated, on the other side for the
declarator->parameter_pack_p case we clear that flag for xobj params
for error recovery reasons.
In order to avoid diagnosing this in two spots, one at the current spot
for declarator->parameter_pack_p and one for the ellipsis case after
we decide if it is parameter pack or varargs, the following patch instead
just sets a boolean flag whether we should emit this diagnostics, does it
early for declarator->parameter_pack_p case and clears the parameter_pack_p
flag in that case like the older patch did, and for the ellipsis case
sets the flag later, then emits the diagnostics.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2024-02-16  Jakub Jelinek  

PR c++/113802
* parser.cc (cp_parser_parameter_declaration): Don't emit
PR113307 diagnostics too early, instead for the
declarator->parameter_pack_p case just set a flag it should be emitted
later.  Set that flag also when consuming following ellipsis as part
of a parameter pack and diagnose either afterwards.  Formatting fix.

* g++.dg/cpp23/explicit-obj-diagnostics3.C (S0, S1, S2, S3, S4): Don't
expect any diagnostics on f and fd member function templates, add
similar templates with ...Selves instead of Selves as k and kd and
expect diagnostics for those.

--- gcc/cp/parser.cc.jj 2024-02-14 14:26:19.0 +0100
+++ gcc/cp/parser.cc2024-02-15 11:58:27.033618967 +0100
@@ -25727,17 +25727,10 @@ cp_parser_parameter_declaration (cp_pars
   bool const xobj_param_p
 = decl_spec_seq_has_spec_p (_specifiers, ds_this);
 
-  if (xobj_param_p
-  && ((declarator && declarator->parameter_pack_p)
- || cp_lexer_next_token_is (parser->lexer, CPP_ELLIPSIS)))
+  bool diag_xobj_parameter_pack = false;
+  if (xobj_param_p && (declarator && declarator->parameter_pack_p))
 {
-  location_t xobj_param
-   = make_location (decl_specifiers.locations[ds_this],
-decl_spec_token_start->location,
-input_location);
-  error_at (xobj_param,
-   "an explicit object parameter cannot "
-   "be a function parameter pack");
+  diag_xobj_parameter_pack = true;
   /* Suppress errors that occur down the line.  */
   if (declarator)
declarator->parameter_pack_p = false;
@@ -25755,9 +25748,10 @@ cp_parser_parameter_declaration (cp_pars
(INNERMOST_TEMPLATE_PARMS (current_template_parms));
 
   if (latest_template_parm_idx != template_parm_idx)
-   decl_specifiers.type = convert_generic_types_to_packs
- (decl_specifiers.type,
-  template_parm_idx, latest_template_parm_idx);
+   decl_specifiers.type
+ = convert_generic_types_to_packs (decl_specifiers.type,
+   template_parm_idx,
+   latest_template_parm_idx);
 }
 
   if (cp_lexer_next_token_is (parser->lexer, CPP_ELLIPSIS))
@@ -25773,6 +25767,8 @@ cp_parser_parameter_declaration (cp_pars
   || (!type && template_parm_p))
  && declarator_can_be_parameter_pack (declarator))
{
+ if (xobj_param_p)
+   diag_xobj_parameter_pack = true;
  /* Consume the `...'. */
  cp_lexer_consume_token (parser->lexer);
  maybe_warn_variadic_templates ();
@@ -25787,6 +25783,17 @@ cp_parser_parameter_declaration (cp_pars
}
 }
 
+  if (diag_xobj_parameter_pack)
+{
+  location_t xobj_param
+   = make_location (decl_specifiers.locations[ds_this],
+decl_spec_token_start->location,
+input_location);
+  error_at (xobj_param,
+   "an explicit object parameter cannot "
+   "be a function parameter pack");
+}
+
   /* The restriction on defining new types applies only to the type
  of the parameter, not to the default argument.  */
   parser->type_definition_forbidden_message = saved_message;
--- gcc/testsuite/g++.dg/cpp23/explicit-obj-diagnostics3.C.jj   2024-01-17 
10:34:49.812597960 +0100
+++ gcc/testsuite/g++.dg/cpp23/explicit-obj-diagnostics3.C  2024-02-15 
12:14:29.994356800 +0100
@@ -24,7 +24,7 @@ void S::f12(this S s = {}) {} // { dg-er
 
 struct S0 {
   template
-  void f(this Selves...) {} // { dg-error "an explicit object parameter cannot 
be a

[COMMITTED] testsuite: Require lto-plugin support in gcc.dg/lto/modref-3 etc. [PR98237]

2024-02-16 Thread Rainer Orth

gcc.dg/lto/modref-3 etc. FAIL on Solaris with the native linker:

FAIL: gcc-dg-lto-modref-3-01.exe scan-wpa-ipa-dump modref "parm 1 flags: 
no_direct_clobber no_direct_escape"
FAIL: gcc-dg-lto-modref-4-01.exe scan-wpa-ipa-dump modref "parm 1 flags: 
no_direct_clobber no_direct_escape"
FAIL: gcc.dg/lto/modref-3 c_lto_modref-3_0.o-c_lto_modref-3_1.o execute -O2 
-flto-partition=max -fdump-ipa-modref -fno-ipa-sra -fno-ipa-cp -flto
FAIL: gcc.dg/lto/modref-4 c_lto_modref-4_0.o-c_lto_modref-4_1.o execute -O2 
-flto-partition=max -fdump-ipa-modref -fno-ipa-sra -flto

The issue is that the tests require the linker plugin, which isn't
available with Solaris ld.  Thus, it also FAILs when gcc is configured
with --disable-lto-plugin.

This patch thus declares the requirement.  As it turns out, there's an
undocumented dg-require-linker-plugin already, but I introduce and use
the corresponding effective-target keyword and document both.

Given that the effective-target form is more flexible, I'm tempted to
remove dg-require-* with an empty arg as already mentioned in
sourcebuild.texi.  That is not this patch, however.

Tested on i386-pc-solaris2.11 with ld and gld.

Committed to trunk.

Rainer

-- 
-
Rainer Orth, Center for Biotechnology, Bielefeld University


2024-02-14  Rainer Orth  

gcc/testsuite:
PR ipa/98237
* lib/target-supports.exp (is-effective-target): Handle
linker_plugin.
* gcc.dg/lto/modref-3_0.c: Require linker_plugin support.
* gcc.dg/lto/modref-4_0.c: Likewise.

gcc:
* doc/sourcebuild.texi (Effective-Target Keywords, Other
attribugs): Document linker_plugin.
(Require Support): Document dg-require-linker-plugin.

# HG changeset patch
# Parent  c4cd589ec9c0ac709c6678e47799d87d82fbad00
testsuite: Require lto-plugin support in gcc.dg/lto/modref-3 etc. [PR98237]

diff --git a/gcc/doc/sourcebuild.texi b/gcc/doc/sourcebuild.texi
--- a/gcc/doc/sourcebuild.texi
+++ b/gcc/doc/sourcebuild.texi
@@ -3009,6 +3009,9 @@ Target supports @option{-mpe-aligned-com
 @item pie
 Target supports @option{-pie}, @option{-fpie} and @option{-fPIE}.
 
+@item linker_plugin
+Target supports the linker plugin.
+
 @item rdynamic
 Target supports @option{-rdynamic}.
 
@@ -3337,6 +3340,10 @@ system.  Some tests are incompatible wit
 hosts, which involves copying the source file to the host and compiling
 it with a relative path and "@code{-o a.out}".
 
+@item dg-require-linker-plugin ""
+Skip the test is the target does not support the linker plugin.
+This is equivalent to @code{dg-require-effective-target linker_plugin}.
+
 @item dg-require-mkfifo ""
 Skip the test if the target does not support @code{mkfifo}.
 
diff --git a/gcc/testsuite/gcc.dg/lto/modref-3_0.c b/gcc/testsuite/gcc.dg/lto/modref-3_0.c
--- a/gcc/testsuite/gcc.dg/lto/modref-3_0.c
+++ b/gcc/testsuite/gcc.dg/lto/modref-3_0.c
@@ -1,5 +1,6 @@
 /* { dg-lto-do run } */
 /* { dg-lto-options { {-O2 -flto-partition=max -fdump-ipa-modref -fno-ipa-sra -fno-ipa-cp -flto} } } */
+/* { dg-require-effective-target linker_plugin } */
 extern void copy (int *a, int *b);
 extern void barrier ();
 extern int *ptr;
diff --git a/gcc/testsuite/gcc.dg/lto/modref-4_0.c b/gcc/testsuite/gcc.dg/lto/modref-4_0.c
--- a/gcc/testsuite/gcc.dg/lto/modref-4_0.c
+++ b/gcc/testsuite/gcc.dg/lto/modref-4_0.c
@@ -1,5 +1,6 @@
 /* { dg-lto-do run } */
 /* { dg-lto-options { {-O2 -flto-partition=max -fdump-ipa-modref -fno-ipa-sra -flto} } } */
+/* { dg-require-effective-target linker_plugin } */
 extern void copy (int *a, int *b);
 extern void barrier ();
 extern int *ptr;
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -9914,6 +9914,7 @@ proc is-effective-target { arg } {
 	  "htm_hw" { set selected [check_htm_hw_available] }
 	  "named_sections" { set selected [check_named_sections_available] }
 	  "gc_sections"{ set selected [check_gc_sections_available] }
+	  "linker_plugin"  { set selected [check_linker_plugin_available] }
 	  "cxa_atexit" { set selected [check_cxa_atexit_available] }
 	  default  { error "unknown effective target keyword `$arg'" }
 	}

90 matches

Mail list logo