Re: [PATCH v1] C++: Support constexpr strings for asm statements

2024-03-15 Thread Andi Kleen
> > I think keeping it untranslated is fine for now. Any translation
> > if really needed would be a separate feature.
> 
> I mean, unless you make extra effort, it is translated.
> Even in your current version, try constexpr *foo () { return "nop"; }
> and you'll see that it actually results in "\x95\x96\x97" with
> -fexec-charset=EBCDICUS.

Perhaps I don't understand the use case for this option.
Would it ever be used on a non EBCDIC system?

> What is worse, constexpr *bar () { return "%0 %1"; }
> results in "\x6c\xf0\x40\x6c\xf1", so the compiler will not be able to find
> the % special characters in there etc.
> The parsing of the string literal in asm definitions uses translate=false
> to avoid the translations.
> As the static_assert paper says, for static_assert it isn't that big a deal,
> the program is already UB if it diagnoses static assertion failure, worst
> case it prints garbage if one plays with -fexec-charset=.  But for inline
> asm it would fail to compile...
> 
> So, the extension really should be well defined vs. the character set,
> either it should be characters in the execution charset and the FE would
> need to ask libcpp to translate it back, or it would need to be declared
> to be e.g. in UTF-8 regardless of the charset (like u8'x' or u8"abc"
> literals are; but then shouldn't the _M_data in that case return a pointer
> to char8_t instead), something else?

Okay then we can always translate to UTF-8.

-Andi


Re: [PATCH 6/4] libbacktrace: Add loaded dlls after initialize

2024-03-15 Thread Björn Schäpers

Am 10.01.2024 um 13:34 schrieb Eli Zaretskii:

Date: Tue, 9 Jan 2024 21:02:44 +0100
Cc: i...@google.com, gcc-patches@gcc.gnu.org, g...@gcc.gnu.org
From: Björn Schäpers 

Am 07.01.2024 um 18:03 schrieb Eli Zaretskii:

In that case, you an call either GetModuleHandeExA or
GetModuleHandeExW, the difference is minor.


Here an updated version without relying on TEXT or TCHAR, directly calling
GetModuleHandleExW.


Thanks, this LGTM (but I couldn't test it, I just looked at the
sour ce code).


Here an updated version. It is rebased on the combined approach of getting the 
loaded DLLs and two minor changes to suppress warnings.
From 55d0f312c0a9c4e2305d72fa2329b37937a02e44 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B6rn=20Sch=C3=A4pers?= 
Date: Sat, 6 Jan 2024 22:53:54 +0100
Subject: [PATCH 2/2] libbacktrace: Add loaded dlls after initialize

libbacktrace/Changelog:

* pecoff.c [HAVE_WINDOWS_H]:
  (dll_notification_data): Added
  (dll_notification_context): Added
  (dll_notification): Added
  (backtrace_initialize): Use LdrRegisterDllNotification to load
  debug information of later loaded
  dlls.
---
 libbacktrace/pecoff.c | 106 ++
 1 file changed, 106 insertions(+)

diff --git a/libbacktrace/pecoff.c b/libbacktrace/pecoff.c
index faa0be0b700..455a5c2191d 100644
--- a/libbacktrace/pecoff.c
+++ b/libbacktrace/pecoff.c
@@ -61,6 +61,34 @@ POSSIBILITY OF SUCH DAMAGE.  */
 #undef Module32Next
 #endif
 #endif
+
+#if defined(_ARM_)
+#define NTAPI
+#else
+#define NTAPI __stdcall
+#endif
+
+/* This is a simplified (but binary compatible) version of what Microsoft
+   defines in their documentation. */
+struct dll_notifcation_data
+{
+  ULONG reserved;
+  /* The name as UNICODE_STRING struct. */
+  PVOID full_dll_name;
+  PVOID base_dll_name;
+  PVOID dll_base;
+  ULONG size_of_image;
+};
+
+#define LDR_DLL_NOTIFICATION_REASON_LOADED 1
+
+typedef LONG NTSTATUS;
+typedef VOID CALLBACK (*LDR_DLL_NOTIFICATION)(ULONG,
+ struct dll_notifcation_data*,
+ PVOID);
+typedef NTSTATUS NTAPI (*LDR_REGISTER_FUNCTION)(ULONG,
+   LDR_DLL_NOTIFICATION, PVOID,
+   PVOID*);
 #endif
 
 /* Coff file header.  */
@@ -911,6 +939,53 @@ coff_add (struct backtrace_state *state, int descriptor,
   return 0;
 }
 
+#ifdef HAVE_WINDOWS_H
+struct dll_notification_context
+{
+  struct backtrace_state *state;
+  backtrace_error_callback error_callback;
+  void *data;
+};
+
+static VOID CALLBACK
+dll_notification (ULONG reason,
+ struct dll_notifcation_data *notification_data,
+ PVOID context)
+{
+  char module_name[MAX_PATH];
+  int descriptor;
+  struct dll_notification_context* dll_context =
+(struct dll_notification_context*) context;
+  struct backtrace_state *state = dll_context->state;
+  void *data = dll_context->data;
+  backtrace_error_callback error_callback = dll_context->data;
+  fileline fileline;
+  int found_sym;
+  int found_dwarf;
+  HMODULE module_handle;
+
+  if (reason != LDR_DLL_NOTIFICATION_REASON_LOADED)
+return;
+
+  if (!GetModuleHandleExW (GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS
+  | GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT,
+  (wchar_t*) notification_data->dll_base,
+  _handle))
+return;
+
+  if (!GetModuleFileNameA ((HMODULE) module_handle, module_name, MAX_PATH - 1))
+return;
+
+  descriptor = backtrace_open (module_name, error_callback, data, NULL);
+
+  if (descriptor < 0)
+return;
+
+  coff_add (state, descriptor, error_callback, data, , _sym,
+   _dwarf, (uintptr_t) module_handle);
+}
+#endif
+
 /* Initialize the backtrace data we need from an ELF executable.  At
the ELF level, all we need to do is find the debug info
sections.  */
@@ -935,6 +1010,8 @@ backtrace_initialize (struct backtrace_state *state,
 #endif
 
 #ifdef HAVE_WINDOWS_H
+  HMODULE nt_dll_handle;
+
   module_handle = (uintptr_t) GetModuleHandle (NULL);
 #endif
 
@@ -981,6 +1058,35 @@ backtrace_initialize (struct backtrace_state *state,
 }
 #endif
 
+#ifdef HAVE_WINDOWS_H
+  nt_dll_handle = GetModuleHandleW (L"ntdll.dll");
+  if (nt_dll_handle)
+{
+  LDR_REGISTER_FUNCTION register_func;
+  const char register_name[] = "LdrRegisterDllNotification";
+  register_func = (void*) GetProcAddress (nt_dll_handle,
+ register_name);
+
+  if (register_func)
+   {
+ PVOID cookie;
+ struct dll_notification_context *context
+   = backtrace_alloc (state,
+  sizeof(struct dll_notification_context),
+  error_callback, data);
+
+ if (context)
+   {
+ 

Re: [PATCH 5/4] libbacktrace: improve getting debug information for loaded dlls

2024-03-15 Thread Björn Schäpers

Am 25.01.2024 um 23:04 schrieb Ian Lance Taylor:

On Thu, Jan 25, 2024 at 11:53 AM Björn Schäpers  wrote:


Am 23.01.2024 um 23:37 schrieb Ian Lance Taylor:

On Thu, Jan 4, 2024 at 2:33 PM Björn Schäpers  wrote:


Am 03.01.2024 um 00:12 schrieb Björn Schäpers:

Am 30.11.2023 um 20:53 schrieb Ian Lance Taylor:

On Fri, Jan 20, 2023 at 2:55 AM Björn Schäpers  wrote:


From: Björn Schäpers 

Fixes https://github.com/ianlancetaylor/libbacktrace/issues/53, except
that libraries loaded after the backtrace_initialize are not handled.
But as far as I can see that's the same for elf.


Thanks, but I don't want a patch that loops using goto statements.
Please rewrite to avoid that.  It may be simpler to call a function.

Also starting with a module count of 1000 seems like a lot.  Do
typical Windows programs load that many modules?

Ian




Rewritten using a function.

If that is commited, could you attribute that commit to me (--author="Björn
Schäpers ")?

Thanks and kind regards,
Björn.


I noticed that under 64 bit libraries loaded with LoadLibrary were missing.
EnumProcessModules stated the correct number of modules, but did not fill the
the HMODULEs, but set them to 0. While trying to investigate I noticed if I do
the very same thing from main (in C++) I even got fewer module HMODULEs.

So I went a different way. This detects all libraries correctly, in 32 and 64
bit. The question is, if it should be a patch on top of the previous, or should
they be merged, or even only this solution and drop the EnumProcessModules 
variant?


Is there any reason to use both patches?  Seems simpler to just use
this one if it works.  Thanks.

Ian


This one needs the tlhelp32 header and its functions, which are (accoridng to
the microsoft documentation) are only available since Windows XP rsp. Windows
Server 2003.

If that's no problem, and in my opinion it shouldn't be, then I can basically
drop patch 4 and rebase this one.


I don't see that as a problem.  It seems like the patch will fall back
cleanly on ancient Windows and simply fail to pick up other loaded
DLLs.  I think that is fine.  I'll look for an updated patch.  Thanks.

Ian


Attached is the combined version of the two patches, only implementing the 
variant with the tlhelp32 API.


Tested on x86 and x86_64 windows.

Kind regards,
Björn.From 33a6380feff66e32ef0f0d7cbad6fb55319f4e2f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B6rn=20Sch=C3=A4pers?= 
Date: Sun, 30 Apr 2023 23:54:32 +0200
Subject: [PATCH 1/2] libbacktrace: get debug information for loaded dlls
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fixes https://github.com/ianlancetaylor/libbacktrace/issues/53, except
that libraries loaded after the backtrace_initialize are not handled.
But as far as I can see that's the same for elf.

Tested on x86_64-linux and i686-w64-mingw32.

-- >8 --

libbacktrace/Changelog:

* configure.ac: Checked for tlhelp32.h
* configure: Regenerate.
* config.h.in: Regenerate.
* pecoff.c: Include  if available.
 (backtrace_initialize): Use tlhelp32 api for a snapshot to
 detect loaded modules.
 (coff_add): New argument for the module handle of the file,
 to get the base address.

Signed-off-by: Björn Schäpers 
---
 libbacktrace/config.h.in  |   3 +
 libbacktrace/configure| 185 --
 libbacktrace/configure.ac |   4 +
 libbacktrace/pecoff.c |  74 +--
 4 files changed, 171 insertions(+), 95 deletions(-)

diff --git a/libbacktrace/config.h.in b/libbacktrace/config.h.in
index ee2616335c7..9b8ab88ab63 100644
--- a/libbacktrace/config.h.in
+++ b/libbacktrace/config.h.in
@@ -101,6 +101,9 @@
 /* Define to 1 if you have the  header file. */
 #undef HAVE_SYS_TYPES_H
 
+/* Define to 1 if you have the  header file. */
+#undef HAVE_TLHELP32_H
+
 /* Define to 1 if you have the  header file. */
 #undef HAVE_UNISTD_H
 
diff --git a/libbacktrace/configure b/libbacktrace/configure
index 7ade966b54d..ca52ee3bafb 100755
--- a/libbacktrace/configure
+++ b/libbacktrace/configure
@@ -1866,7 +1866,7 @@ else
 #define $2 innocuous_$2
 
 /* System header to define __stub macros and hopefully few prototypes,
-which can conflict with char $2 (); below.
+which can conflict with char $2 (void); below.
 Prefer  to  if __STDC__ is defined, since
  exists even on freestanding compilers.  */
 
@@ -1884,7 +1884,7 @@ else
 #ifdef __cplusplus
 extern "C"
 #endif
-char $2 ();
+char $2 (void);
 /* The GNU C library defines this for functions which it implements
 to always fail with ENOSYS.  Some functions are actually named
 something starting with __ and the normal name is an alias.  */
@@ -1893,7 +1893,7 @@ choke me
 #endif
 
 int
-main ()
+main (void)
 {
 return $2 ();
   ;
@@ -1932,7 +1932,7 @@ else
 /* end confdefs.h.  */
 $4
 int
-main ()
+main (void)
 {
 if (sizeof ($2))
 return 0;
@@ 

Re: [PATCH V3] rs6000: Don't ICE when compiling the __builtin_vsx_splat_2di built-in [PR113950]

2024-03-15 Thread Peter Bergner
On 3/6/24 3:27 AM, Kewen.Lin wrote:
> on 2024/3/4 02:55, jeevitha wrote:
>> The following patch has been bootstrapped and regtested on powerpc64le-linux.
>>  
>> When we expand the __builtin_vsx_splat_2di function, we were allowing 
>> immediate
>> value for second operand which causes an unrecognizable insn ICE. Even though
>> the immediate value was forced into a register, it wasn't correctly assigned
>> to the second operand. So corrected the assignment of op1 to operands[1].
[snip]
> As the discussions in the thread of the previous versions, I think
> Segher agreed this approach, so OK for trunk with the above nits
> tweaked, thanks!

The bogus vsx_splat_ code goes all the way back to GCC 8, so we
should backport this fix.  Segher and Ke Wen, can we get an approval
to backport this to all the open release branches (GCC 13, 12, 11)?
Thanks.

Jeevitha, once we get approval, please perform the backports.

Peter




[PATCH] arm: [MVE intrinsics] Fix support for loads [PR target/114323]

2024-03-15 Thread Christophe Lyon
The testcase in this PR shows that we would load from an uninitialized
location, because the vld1 instrinsics are reported as "const". This
is because function_instance::reads_global_state_p() does not take
CP_READ_MEMORY into account.  Fixing this gives vld1 the "pure"
attribute instead, and solves the problem.

2024-03-15  Christophe Lyon  

PR target/114323
gcc/
* config/arm/arm-mve-builtins.cc
(function_instance::reads_global_state_p): Take CP_READ_MEMORY
into account.

gcc/testsuite/
* gcc.target/arm/mve/pr114323.c: New.
---
 gcc/config/arm/arm-mve-builtins.cc  |  2 +-
 gcc/testsuite/gcc.target/arm/mve/pr114323.c | 22 +
 2 files changed, 23 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.target/arm/mve/pr114323.c

diff --git a/gcc/config/arm/arm-mve-builtins.cc 
b/gcc/config/arm/arm-mve-builtins.cc
index 2f2c0f4a02a..6a5775c67e5 100644
--- a/gcc/config/arm/arm-mve-builtins.cc
+++ b/gcc/config/arm/arm-mve-builtins.cc
@@ -746,7 +746,7 @@ function_instance::reads_global_state_p () const
   if (flags & CP_READ_FPCR)
 return true;
 
-  return false;
+  return flags & CP_READ_MEMORY;
 }
 
 /* Return true if calls to the function could modify some form of
diff --git a/gcc/testsuite/gcc.target/arm/mve/pr114323.c 
b/gcc/testsuite/gcc.target/arm/mve/pr114323.c
new file mode 100644
index 000..bd9127b886a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/pr114323.c
@@ -0,0 +1,22 @@
+/* { dg-do run } */
+/* { dg-require-effective-target arm_mve_hw } */
+/* { dg-options "-O2" } */
+/* { dg-add-options arm_v8_1m_mve_fp } */
+
+#include 
+
+__attribute__((noipa))
+uint32x4_t foo (void) {
+  uint32x4_t V0 = vld1q_u32(((const uint32_t[4]){1, 2, 3, 4}));
+  return V0;
+}
+
+int main(void)
+{
+  uint32_t buf[4];
+ vst1q_u32 (buf, foo());
+
+  for (int i = 0; i < 4; i++)
+if (buf[i] != i+1)
+  __builtin_abort ();
+}
-- 
2.34.1



[PATCH, v2] Fortran: fix for absent array argument passed to optional dummy [PR101135]

2024-03-15 Thread Harald Anlauf

Dear all,

as there has been some good progress in the handling of optional dummy
arguments, I looked again at this PR and a patch for it that I withdrew
as it turned out incomplete.

It turned out that it now needs only a minor adjustment for optional
dummy arguments of procedures with bind(c) attribute so that ubsan
checking does not trigger.

Along this way I extended the previous testcase to exercise to some
extent combinations of bind(c) and non-bind(c) procedures and found
one failure (since at least gcc-9) that is genuine: passing a missing
optional from a bind(c) procedure to an assumed-rank dummy, see
PR114355.  The corresponding test is commented in the testcase.

Regtested on x86_64-pc-linux-gnu.  OK for mainline?

Thanks,
Harald


On 2/6/22 22:04, Harald Anlauf wrote:

Hi Mikael,

Am 04.02.22 um 11:45 schrieb Mikael Morin:

Hello,

Le 29/01/2022 à 22:41, Harald Anlauf via Fortran a écrit :

The least invasive change - already pointed out by the reporter - is
to check the presence of the argument before dereferencing the data
pointer after the offset calculation.  This requires adjusting the
checking pattern for gfortran.dg/missing_optional_dummy_6a.f90.

Regtesting reminded me that procedures with bind(c) attribute are doing
their own stuff, which is why they need to be excluded here, otherwise
testcase bind-c-contiguous-4.f90 would regress on the expected output.


only after submitting the patch I figured that the patch is incomplete.

When we have a call chain of procedures with and without bind(c),
there are still cases left where the failure with the sanitizer
is not fixed.  Just add "bind(c)" to subroutine test_wrapper only
in the original PR.

I have added a corresponding comment in the PR.


There is a potential alternative solution which I did not pursue, as I
think it is more invasive, but also that I didn't succeed to implement:
A non-present dummy array argument should not need to get its descriptor
set up.  Pursuing this is probably not the right thing to do during the
current stage of development and could be implemented later.  If 
somebody

believes this is important, feel free to open a PR for this.

I have an other (equally unimportant) concern that it may create an 
unnecessary conditional when passing a subobject of an optional 
argument.  In that case we can assume that the optional is present.

It’s not a correctness issue, so let’s not bother at this stage.


Judging from the dump tree of the cases I looked at I did not see
anything that would pose a problem to the optimizer.


Regtested on x86_64-pc-linux-gnu.  OK for mainline?


OK.


Given my latest observations I'd rather withdraw the current version of
the patch and rethink.  I also did not see an issue with bind(c)
procedures calling alikes.

It would help if one would not only know the properties of the actual
argument, but also of the formal one, which is not available at that
point in the code.  I'll have another look and resubmit.


Thanks.



Thanks for the review!

Harald

From b3079a82a220477704f8156207239e4af4103ea9 Mon Sep 17 00:00:00 2001
From: Harald Anlauf 
Date: Fri, 15 Mar 2024 20:14:07 +0100
Subject: [PATCH] Fortran: fix for absent array argument passed to optional
 dummy [PR101135]

gcc/fortran/ChangeLog:

	PR fortran/101135
	* trans-array.cc (gfc_get_dataptr_offset): Check for optional
	arguments being present before dereferencing data pointer.

gcc/testsuite/ChangeLog:

	PR fortran/101135
	* gfortran.dg/missing_optional_dummy_6a.f90: Adjust diagnostic pattern.
	* gfortran.dg/ubsan/missing_optional_dummy_8.f90: New test.
---
 gcc/fortran/trans-array.cc|  11 ++
 .../gfortran.dg/missing_optional_dummy_6a.f90 |   2 +-
 .../ubsan/missing_optional_dummy_8.f90| 108 ++
 3 files changed, 120 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gfortran.dg/ubsan/missing_optional_dummy_8.f90

diff --git a/gcc/fortran/trans-array.cc b/gcc/fortran/trans-array.cc
index 3673fa40720..a7717a8107e 100644
--- a/gcc/fortran/trans-array.cc
+++ b/gcc/fortran/trans-array.cc
@@ -7526,6 +7526,17 @@ gfc_get_dataptr_offset (stmtblock_t *block, tree parm, tree desc, tree offset,
 
   /* Set the target data pointer.  */
   offset = gfc_build_addr_expr (gfc_array_dataptr_type (desc), tmp);
+
+  /* Check for optional dummy argument being present.  Arguments of BIND(C)
+ procedures are excepted here since they are handled differently.  */
+  if (expr->expr_type == EXPR_VARIABLE
+  && expr->symtree->n.sym->attr.dummy
+  && expr->symtree->n.sym->attr.optional
+  && !is_CFI_desc (NULL, expr))
+offset = build3_loc (input_location, COND_EXPR, TREE_TYPE (offset),
+			 gfc_conv_expr_present (expr->symtree->n.sym), offset,
+			 fold_convert (TREE_TYPE (offset), gfc_index_zero_node));
+
   gfc_conv_descriptor_data_set (block, parm, offset);
 }
 
diff --git a/gcc/testsuite/gfortran.dg/missing_optional_dummy_6a.f90 

Re: [PATCH, v2] Fortran: use name of array component in runtime error message [PR30802]

2024-03-15 Thread Mikael Morin

Le 15/03/2024 à 18:26, Harald Anlauf a écrit :

Hi Mikael,

On 3/15/24 17:31, Mikael Morin wrote:

Le 10/03/2024 à 22:31, Harald Anlauf a écrit :

Dear all,

after playing for some time with NAG and Intel, and an off-list
discussion with Jerry, I am getting more and more convinced that
simpler runtime error messages (also simpler to parse by a human)
are superior to awkward solutions.  This is also what Intel does:
use only the name of the array (component) in the message whose
indices are out of bounds.

(NAG's solution appears also inconsistent for nested derived types.)

So no x%z, or x%_data, etc. in runtime error messages any more.


That's a pity.  What about providing the root variable and the failing
component only?

... dimension 1 of array component 'z...%x' above array bound ...

The data reference doesn't look great, but it provides valuable (in my
opinion) information.


OK, that sounds interesting.  To clarify the options:

- for ordinary array x it would stay 'x'

- when z is a DT scalar, and z%x is the array in question, use 'z%x'
   (here z...%x would look strange to me)


Yes, the ellipsis would look strange to me as well.


- when z is a DT array, and x some component further down, 'z...%x'

This case also applies when z is a DT scalar and x is more than one 
level deep.



I would rather not make the error message text vary too much to avoid
to run into issues with translation.  Would it be fine with you to have

... dimension 1 of array 'z...%x' above array bound ...

only?


OK, let's drop "component".


Anything else?


No, I think you covered everything.


Cheers,
Harald


Please give it a spin...

Regtested on x86_64-pc-linux-gnu.  OK for mainline?

Thanks,
Harald


On 1/30/24 11:46, Mikael Morin wrote:

Le 30/01/2024 à 11:38, Mikael Morin a écrit :


Another (easier) way to clarify the data reference would be rephrasing
the message so that the array part is separate from the scalar part,
like so (there are too many 'of', but I lack inspiration):
Index '0' of dimension 1 of component 'zz' of element from 'x1%vv'
below lower bound of 1


This has the same number of 'of' but sounds better maybe:
Out of bounds accessing component 'zz' of element from 'x1%yy': index
'0' of dimension 1 below lower bound of 1










Re: [PATCH] ipa: Avoid duplicate replacements in IPA-SRA transformation phase

2024-03-15 Thread Jakub Jelinek
On Fri, Mar 15, 2024 at 06:57:18PM +0100, Martin Jambor wrote:
> --- a/gcc/ipa-param-manipulation.cc
> +++ b/gcc/ipa-param-manipulation.cc
> @@ -1525,6 +1525,22 @@ ipa_param_body_adjustments::common_initialization 
> (tree old_fndecl,
>replacement with a constant (for split aggregates passed
>by value).  */
>  
> +   if (split[parm_num])
> + {
> +   /* We must be careful not to add a duplicate
> +  replacement. */
> +   sort_replacements ();
> +   ipa_param_body_replacement *pbr =
> + lookup_replacement_1 (m_oparms[parm_num],
> +   av.unit_offset);

Formatting nit, the = should be on the next line before
lookup_replacement_1.
  ipa_param_body_replacement *pbr
= lookup_replacement_1 (m_oparms[parm_num],
av.unit_offset);

Jakub



[PATCH] ipa: Avoid duplicate replacements in IPA-SRA transformation phase

2024-03-15 Thread Martin Jambor
Hi,

when the analysis part of IPA-SRA figures out that it would split out
a scalar part of an aggregate which is known by IPA-CP to contain a
known constant, it skips it knowing that the transformation part looks
at IPA-CP aggregate results too and does the right thing (which can
include doing the propagation in GIMPLE because that is the last
moment the parameter exists).

However, when IPA-SRA wants to split out a smaller non-aggregate out
of an aggregate, which happens to be of the same size as a known
scalar constant at the same offset, the transformation bit fails to
recognize the situation, tries to do both splitting and constant
propagation and in PR 111571 testcase creates a nonsensical call
statement on which the call redirection then ICEs.

Fixed by making sure we don't try to do two replacements of the same
part of the same parameter.

The look-up among replacements requires these are sorted and this
patch just sorts them if they are not already sorted before each new
look-up.  The worst number of sortings that can happen is number of
parameters which are both split and have aggregate constants times
param_ipa_max_agg_items (default 16).  I don't think complicating the
source code to optimize for this unlikely case is worth it but if need
be, it can of course be done.

Bootstrapped and tested on x86_64-linux.  OK for master and eventually
also the gcc-13 branch?

Thanks,

Martin



gcc/ChangeLog:

2024-03-15  Martin Jambor  

PR ipa/111571
* ipa-param-manipulation.cc
(ipa_param_body_adjustments::common_initialization): Avoid creating
duplicate replacement entries.

gcc/testsuite/ChangeLog:

2024-03-15  Martin Jambor  

PR ipa/111571
* gcc.dg/ipa/pr111571.c: New test.
---
 gcc/ipa-param-manipulation.cc   | 16 
 gcc/testsuite/gcc.dg/ipa/pr111571.c | 29 +
 2 files changed, 45 insertions(+)
 create mode 100644 gcc/testsuite/gcc.dg/ipa/pr111571.c

diff --git a/gcc/ipa-param-manipulation.cc b/gcc/ipa-param-manipulation.cc
index 3e0df6a6f77..4c6337cc563 100644
--- a/gcc/ipa-param-manipulation.cc
+++ b/gcc/ipa-param-manipulation.cc
@@ -1525,6 +1525,22 @@ ipa_param_body_adjustments::common_initialization (tree 
old_fndecl,
 replacement with a constant (for split aggregates passed
 by value).  */
 
+ if (split[parm_num])
+   {
+ /* We must be careful not to add a duplicate
+replacement. */
+ sort_replacements ();
+ ipa_param_body_replacement *pbr =
+   lookup_replacement_1 (m_oparms[parm_num],
+ av.unit_offset);
+ if (pbr)
+   {
+ /* Otherwise IPA-SRA should have bailed out.  */
+ gcc_assert (AGGREGATE_TYPE_P (TREE_TYPE (pbr->repl)));
+ continue;
+   }
+   }
+
  tree repl;
  if (av.by_ref)
repl = av.value;
diff --git a/gcc/testsuite/gcc.dg/ipa/pr111571.c 
b/gcc/testsuite/gcc.dg/ipa/pr111571.c
new file mode 100644
index 000..2a4adc608db
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/ipa/pr111571.c
@@ -0,0 +1,29 @@
+/* { dg-do compile } */
+/* { dg-options "-O2"  } */
+
+struct a {
+  int b;
+};
+struct c {
+  long d;
+  struct a e;
+  long f;
+};
+int g, h, i;
+int j() {return 0;}
+static void k(struct a l, int p) {
+  if (h)
+g = 0;
+  for (; g; g = j())
+if (l.b)
+  break;
+}
+static void m(struct c l) {
+  k(l.e, l.f);
+  for (;; --i)
+;
+}
+int main() {
+  struct c n = {10, 9};
+  m(n);
+}
-- 
2.44.0



Re: [PATCH v1] C++: Support constexpr strings for asm statements

2024-03-15 Thread Jakub Jelinek
On Fri, Mar 15, 2024 at 10:20:06AM -0700, Andi Kleen wrote:
> > One issue is the character set question.  The strings in inline asm right
> > now are untranslated, i.e. remain in SOURCE_CHARSET, usually UTF-8 (in
> > theory there is also UTF-EBCDIC support, but nobody knows if it actually
> > works), which is presumably what the assembler expects too.
> > Most of the string literals and character literals constexpr deals with
> > are in the execution character set though.  For static_assert we just assume
> > the user knows what he is doing when trying to emit non-ASCII characters in
> > the message when using say -fexec-charset=EBCDICUS , but should that be the
> > case for inline asm too?  Or should we try to translate those strings back
> > from execution character set to source character set?  Or require that it
> > actually constructs UTF-8 string literals and for the UTF-EBCDIC case
> > translate from UTF-8 to UTF-EBCDIC?  So the user constexpr functions then
> > would return u8"insn"; or construct from u8'i' etc. character literals...
> 
> I think keeping it untranslated is fine for now. Any translation
> if really needed would be a separate feature.

I mean, unless you make extra effort, it is translated.
Even in your current version, try constexpr *foo () { return "nop"; }
and you'll see that it actually results in "\x95\x96\x97" with
-fexec-charset=EBCDICUS.
What is worse, constexpr *bar () { return "%0 %1"; }
results in "\x6c\xf0\x40\x6c\xf1", so the compiler will not be able to find
the % special characters in there etc.
The parsing of the string literal in asm definitions uses translate=false
to avoid the translations.
As the static_assert paper says, for static_assert it isn't that big a deal,
the program is already UB if it diagnoses static assertion failure, worst
case it prints garbage if one plays with -fexec-charset=.  But for inline
asm it would fail to compile...

So, the extension really should be well defined vs. the character set,
either it should be characters in the execution charset and the FE would
need to ask libcpp to translate it back, or it would need to be declared
to be e.g. in UTF-8 regardless of the charset (like u8'x' or u8"abc"
literals are; but then shouldn't the _M_data in that case return a pointer
to char8_t instead), something else?

Jakub



Re: [PATCH] c++: explicit inst of template method not generated [PR110323]

2024-03-15 Thread Marek Polacek
On Thu, Mar 14, 2024 at 03:39:04PM -0400, Jason Merrill wrote:
> On 3/8/24 12:02, Marek Polacek wrote:
> > Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?
> > 
> > -- >8 --
> > Consider
> > 
> >constexpr int VAL = 1;
> >struct foo {
> >template 
> >void bar(typename std::conditional::type arg) { }
> >};
> >template void foo::bar<1>(int arg);
> > 
> > where we since r11-291 fail to emit the code for the explicit
> > instantiation.  That's because cp_walk_subtrees/TYPENAME_TYPE now
> > walks TYPE_CONTEXT ('conditional' here) as well, and in a template
> > finds the B==VAL template argument.  VAL is constexpr, which implies const,
> > which in the global scope implies static.  constrain_visibility_for_template
> > then makes "struct conditional<(B == VAL), int, float>" non-TREE_PUBLIC.
> > Then symtab_node::needed_p checks TREE_PUBLIC, sees it's 0, and we don't
> > emit any code.
> > 
> > I thought the fix would be some ODR-esque check to not consider
> > constexpr variables/fns that are used just for their value.  But
> > it turned out to be tricky.  For instance, we can't skip
> > determine_visibility in a template; we can't even skip it for value-dep
> > expressions.  For example, no-linkage-expr1.C has
> > 
> >using P = struct {}*;
> >template 
> >void f(int(*)[((P)0, N)]) {}
> > 
> > where ((P)0, N) is value-dep, but N is not relevant here: we have to
> > ferret out the anonymous type.  When instantiating, it's already gone.
> 
> Hmm, how is that different from the B == VAL case?  In both cases we're
> naming an internal entity that gets folded away.
> 
> I guess the difference is that B == VAL falls under the special allowance in
> https://eel.is/c++draft/basic.def.odr#14.5.1 because it's a constant used as
> a prvalue, and therefore is not odr-used under
> https://eel.is/c++draft/basic.def.odr#5.2
> 
> So I would limit this change to decl_constant_var_p.  Really we should also
> be checking that the lvalue-rvalue conversion is applied, but that's more
> complicated.

Thanks.  My previous version had it, but it didn't handle

  static constexpr int getval () { return 1; }

  template 
  void baz(typename conditional::type arg) { }

I'd say that "getval()" is one of "manifestly constant-evaluated expressions 
that
are not value-dependent", so it should be treated the same as B == VAL.  I
don't know if this is important to handle.  Do you want me to poke further or
should we just go with decl_constant_var_p and leave it at that for now?

Marek



Re: [PATCH, v2] Fortran: use name of array component in runtime error message [PR30802]

2024-03-15 Thread Harald Anlauf

Hi Mikael,

On 3/15/24 17:31, Mikael Morin wrote:

Le 10/03/2024 à 22:31, Harald Anlauf a écrit :

Dear all,

after playing for some time with NAG and Intel, and an off-list
discussion with Jerry, I am getting more and more convinced that
simpler runtime error messages (also simpler to parse by a human)
are superior to awkward solutions.  This is also what Intel does:
use only the name of the array (component) in the message whose
indices are out of bounds.

(NAG's solution appears also inconsistent for nested derived types.)

So no x%z, or x%_data, etc. in runtime error messages any more.

That's a pity.  What about providing the root variable and the failing 
component only?


... dimension 1 of array component 'z...%x' above array bound ...

The data reference doesn't look great, but it provides valuable (in my 
opinion) information.


OK, that sounds interesting.  To clarify the options:

- for ordinary array x it would stay 'x'

- when z is a DT scalar, and z%x is the array in question, use 'z%x'
  (here z...%x would look strange to me)

- when z is a DT array, and x some component further down, 'z...%x'

I would rather not make the error message text vary too much to avoid
to run into issues with translation.  Would it be fine with you to have

... dimension 1 of array 'z...%x' above array bound ...

only?

Anything else?

Cheers,
Harald


Please give it a spin...

Regtested on x86_64-pc-linux-gnu.  OK for mainline?

Thanks,
Harald


On 1/30/24 11:46, Mikael Morin wrote:

Le 30/01/2024 à 11:38, Mikael Morin a écrit :


Another (easier) way to clarify the data reference would be rephrasing
the message so that the array part is separate from the scalar part,
like so (there are too many 'of', but I lack inspiration):
Index '0' of dimension 1 of component 'zz' of element from 'x1%vv'
below lower bound of 1


This has the same number of 'of' but sounds better maybe:
Out of bounds accessing component 'zz' of element from 'x1%yy': index
'0' of dimension 1 below lower bound of 1









Re: [PATCH v1] C++: Support constexpr strings for asm statements

2024-03-15 Thread Andi Kleen
Thanks Jakub.

> > +/* Parse a string literal or constant expression yielding a string.
> > +   The constant expression uses extra parens to avoid ambiguity with "x" 
> > (expr).
> > +
> > +   asm-string-expr:
> > + string-literal
> > + ( constant-expr ) */
> > +
> > +static tree
> > +cp_parser_asm_string_expression (cp_parser *parser)
> > +{
> > +  location_t sloc = cp_lexer_peek_token (parser->lexer)->location;
> > +
> > +  if (cp_lexer_next_token_is (parser->lexer, CPP_OPEN_PAREN))
> 
> Why should it be wrapped in ()s?

It's true it's only needed for the constraints, but not here.

> Too long line.
> 
> > +   string = TREE_OPERAND (string, 0);
> > +  if (TREE_CODE (string) == VIEW_CONVERT_EXPR)
> > +   string = TREE_OPERAND (string, 0);
> > +  if (TREE_CODE (string) != STRING_CST && string != error_mark_node)
> > +   {
> > + error_at (sloc, "Expected string valued constant expression for 
> > %, not type %qT",
> > +   TREE_TYPE (string));
> 
> Again, too long line, diagnostics should never start with a capital letter,
> but more importantly, this will handle only a small subset of what one can
> construct with constexpr functions, not everything they can return even if
> they return const char * is necessarily a STRING_LITERAL, could be an array
> of chars or something similar, especially if the intent is not just to
> return prepared whole string literals, but construct the template etc. from
> substrings.
> 
> Given the https://wg21.link/P2741R3 C++26 addition, I wonder if it wouldn't

That's a good find.

> be much better to stay compatible with the static_assert extension there and
> use similar thing for inline asm.
> See https://gcc.gnu.org/r14-5771 and r14-5956 follow-up for the actual
> implementation.

Makes sense. I will adapt the code.

> One issue is the character set question.  The strings in inline asm right
> now are untranslated, i.e. remain in SOURCE_CHARSET, usually UTF-8 (in
> theory there is also UTF-EBCDIC support, but nobody knows if it actually
> works), which is presumably what the assembler expects too.
> Most of the string literals and character literals constexpr deals with
> are in the execution character set though.  For static_assert we just assume
> the user knows what he is doing when trying to emit non-ASCII characters in
> the message when using say -fexec-charset=EBCDICUS , but should that be the
> case for inline asm too?  Or should we try to translate those strings back
> from execution character set to source character set?  Or require that it
> actually constructs UTF-8 string literals and for the UTF-EBCDIC case
> translate from UTF-8 to UTF-EBCDIC?  So the user constexpr functions then
> would return u8"insn"; or construct from u8'i' etc. character literals...

I think keeping it untranslated is fine for now. Any translation
if really needed would be a separate feature.


> 
> In any case, as has been said earlier, this isn't stage4 material.

Yes that merge can be deferred of course.

-Andi


Re: [PATCH] libgcc: Fix quotient and/or remainder negation in __divmodbitint4 [PR114327]

2024-03-15 Thread Joseph Myers
On Fri, 15 Mar 2024, Jakub Jelinek wrote:

> Hi!
> 
> While for __mulbitint3 we actually don't negate anything and perform the
> multiplication in unsigned style always, for __divmodbitint4 if the operands
> aren't unsigned and are negative, we negate them first and then try to
> negate them as needed at the end.
> quotient is negated if just one of the operands was negated and the other
> wasn't or vice versa, and remainder is negated if the first operand was
> negated.
> The case which doesn't work correctly is if due to limited range of the
> operands we perform the division/modulo in some smaller number of limbs
> and then extend it to the desired precision of the quotient and/or
> remainder results.  If they aren't negated, the extension is done with
> memset to 0, if they are negated, the extension was done with memset
> to -1.  The problem is that if the quotient or remainder is zero,
> then bitint_negate negates it again to zero (that is ok), but we should
> then extend with memset to 0, not memset to -1.
> 
> The following patch achieves that by letting bitint_negate also check if
> the negated operand is zero and changes the memset argument based on that.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

OK.

-- 
Joseph S. Myers
josmy...@redhat.com



Re: [Patch][RFC] GCN: Define ISA archs in gcn-devices.def and use it

2024-03-15 Thread Andrew Stubbs

On 15/03/2024 13:56, Tobias Burnus wrote:

Hi Andrew,

Andrew Stubbs wrote:
This is more-or-less what I was planning to do myself, but as I want 
to include all the other features that get parametrized in gcn.cc, 
gcn.h, gcn-hsa.h, gcn-opts.h, I hadn't got around to it yet. 
Unfortunately, I think the gcn.opt and config.gcc will always need 
manually updating, but if that's all it'll be an improvement.


Well, for .opt see how nvptx does it – it actually generates an .opt file.


I don't like the idea of including AMDGPU_ISA_UNSUPPORTED;


I concur – I was initially thinking of reporting the device name 
("Unsupported %s") but I then realized that the agent returns a string 
while only for GCC generated files (→ eflag) the hexcode is used. Thus, 
I ended up not using it.


Ultimately, I want to replace many of the conditionals like 
"TARGET_CDNA2_PLUS" from the code and replace them with feature flags 
derived from a def file, or at least a header file. We've acquired too 
many places where there are unsearchable conditionals that need 
finding and fixing every time a new device comes along.
I was thinking of having more flags, but those where the only ones 
required for the two files.
I had imagined that this .def file would exist in gcc/config/gcn, but 
you've placed it in libgomp maybe it makes sense to have multiple 
such files if they contain very different data, but I had imagined one 
file and I'm not sure that the compiler definitions live in libgomp.


There is already:

gcc/config/darwin-c.cc:#include "../../libcpp/internal.h"

gcc/config/gcn/gcn-run.cc:#include 
"../../../libgomp/config/gcn/libgomp-gcn.h"


gcc/fortran/cpp.cc:#include "../../libcpp/internal.h"

gcc/fortran/trigd_fe.inc:#include "../../libgfortran/intrinsics/trigd.inc"

But there is also the reverse:

libcpp/lex.cc:#include "../gcc/config/i386/cpuid.h"

libgfortran/libgfortran.h:#include "../gcc/fortran/libgfortran.h"

lto-plugin/lto-plugin.c:#include "../gcc/lto/common.h"

If you add more items, it is probably better to have it under 
gcc/config/gcn/ - and I really prefer a single file for all.


* * *

Talking about feature sets: This would be a bit like LLVM (see below) 
but I think they have a bit too much indirections. But I do concur that 
we need to consolidate the current support – and hopefully make it 
easier to keep adding more GPU support; we seem to have already covered 
a larger chunk :-)


I also did wonder whether we should support, e.g., running a gfx1100 
code (or a gfx11-generic one) on, e.g., a gfx1103 device. Alternatively, 
we could keep the current check which requires an exact match.


We didn't invent that restriction; the runtime won't let you do it. We 
only have the check because the HSA/ROCr error message is not very 
user-friendly.


BTW: I do note that looking at the feature sets in LLVM that all GFX110x 
GPUs seem to have common silicon bugs: FeatureMSAALoadDstSelBug and 
FeatureMADIntraFwdBug, while 1100 and 1102 additionally have the 
FeatureUserSGPRInit16Bug but 1101 and 1103 don't. — For some reasons, 
FeatureISAVersion11_Generic only consists of two of those bugs (it 
doesn't have FeatureMADIntraFwdBug), which doesn't seem to be that 
consistent. Maybe the workaround has issues elsewhere? If so, a generic 
-march=gfx11 might be not as useful as one might hope for.


* * *

If I look at LLVM's 
https://github.com/llvm/llvm-project/blob/main/llvm/lib/Target/AMDGPU/AMDGPU.td ,


they first define several features – like 'FeatureUnalignedScratchAccess'.

Then they combine them like in:

def FeatureISAVersion11_Common ... [FeatureGFX11, ... 
FeatureAtomicFaddRtnInsts ...


And then they use those to map them to feature sets like:

def FeatureISAVersion11_0_Common ... 
listconcat(FeatureISAVersion11_Common.Features,

     [FeatureMSAALoadDstSelBug ...

And for gfx1103:

def FeatureISAVersion11_0_3 : FeatureSet<
   !listconcat(FeatureISAVersion11_0_Common.Features,
     [])>;

The mapping to gfx... names then happens in 
https://github.com/llvm/llvm-project/blob/main/llvm/lib/Target/AMDGPU/GCNProcessors.td such as:


def : ProcessorModel<"gfx1103", GFX11SpeedModel,
   FeatureISAVersion11_0_3.Features
 >;

Or for the generic one, i.e.:

// [gfx1100, gfx1101, gfx1102, gfx1103, gfx1150, gfx1151]
def : ProcessorModel<"gfx11-generic", GFX11SpeedModel,
   FeatureISAVersion11_Generic.Features

LLVM also has some generic flags like the following in 
https://github.com/llvm/llvm-project/blob/main/llvm/lib/TargetParser/TargetParser.cpp


     {{"gfx1013"},   {"gfx1013"}, GK_GFX1013, 
FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK|FEATURE_WGP},


I hope that this will give some inspiration – but I assume that at least 
the initial implementation will be much shorter.


Yeah, we can have one macro for each arch, or multiple macros for 
building different tables. First one seems easier but less readable, 
second one will need some thinking about. Probably best to keep it 
simple 

Re: [PATCH, v2] Fortran: use name of array component in runtime error message [PR30802]

2024-03-15 Thread Mikael Morin

Le 10/03/2024 à 22:31, Harald Anlauf a écrit :

Dear all,

after playing for some time with NAG and Intel, and an off-list
discussion with Jerry, I am getting more and more convinced that
simpler runtime error messages (also simpler to parse by a human)
are superior to awkward solutions.  This is also what Intel does:
use only the name of the array (component) in the message whose
indices are out of bounds.

(NAG's solution appears also inconsistent for nested derived types.)

So no x%z, or x%_data, etc. in runtime error messages any more.

That's a pity.  What about providing the root variable and the failing 
component only?


... dimension 1 of array component 'z...%x' above array bound ...

The data reference doesn't look great, but it provides valuable (in my 
opinion) information.



Please give it a spin...

Regtested on x86_64-pc-linux-gnu.  OK for mainline?

Thanks,
Harald


On 1/30/24 11:46, Mikael Morin wrote:

Le 30/01/2024 à 11:38, Mikael Morin a écrit :


Another (easier) way to clarify the data reference would be rephrasing
the message so that the array part is separate from the scalar part,
like so (there are too many 'of', but I lack inspiration):
Index '0' of dimension 1 of component 'zz' of element from 'x1%vv'
below lower bound of 1


This has the same number of 'of' but sounds better maybe:
Out of bounds accessing component 'zz' of element from 'x1%yy': index
'0' of dimension 1 below lower bound of 1





Re: [PATCH] c++: ICE with noexcept and local specialization [PR114114]

2024-03-15 Thread Patrick Palka
On Fri, 15 Mar 2024, Marek Polacek wrote:

> On Fri, Mar 15, 2024 at 10:35:07AM -0400, Patrick Palka wrote:
> > On Tue, 5 Mar 2024, Marek Polacek wrote:
> > 
> > > Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?
> > > 
> > > -- >8 --
> > > Here we ICE because we call register_local_specialization while
> > > local_specializations is null, so
> > > 
> > >   local_specializations->put ();
> > > 
> > > crashes on null this.  It's null since maybe_instantiate_noexcept calls
> > > push_to_top_level which creates a new scope.  Normally, I would have
> > > guessed that we need a new local_specialization_stack.  But here we're
> > > dealing with an operand of a noexcept, which is an unevaluated operand,
> > > and those aren't registered in the hash map.  maybe_instantiate_noexcept
> > > wasn't signalling that it's substituting an unevaluated operand though.
> > 
> > It thought it was noexcept-exprs rather than noexcept-specs that are
> > unevaluated contexts?
> 
> Yes, sigh.  It would have to be noexcept(noexcept(x)).  I was looking at
> cp_parser_unary_expression/RID_NOEXCEPT but that's a noexcept-expr.  So
> what can we do here, set a new local_specialization_stack?  That wasn't
> that straightforward when I tried.  Or maybe just

Maybe we can avoid doing push_to_top_level (which clears
local_specializations) from maybe_instantiate_noexcept if
current_function_decl == fn?

Relatedly I wonder if we can avoid calling regenerate_decl_from_template
for local class member functions since they can't be redeclared?

> 
> --- a/gcc/cp/pt.cc
> +++ b/gcc/cp/pt.cc
> @@ -15649,7 +15649,7 @@ tsubst_decl (tree t, tree args, tsubst_flags_t 
> complain,
>   {
> if (DECL_LANG_SPECIFIC (r))
>   DECL_TEMPLATE_INFO (r) = NULL_TREE;
> -   if (!cp_unevaluated_operand)
> +   if (!cp_unevaluated_operand && local_specializations)
>   register_local_specialization (r, t);
>   }
> 
> ?
> 
> 



[committed] testsuite: Fix pr113431.c FAIL on sparc* [PR113431]

2024-03-15 Thread Jakub Jelinek
Hi!

As mentioned in the PR, the new testcase FAILs on sparc*-* due to
lack of support of misaligned store.

This patch restricts that to vect_hw_misalign targets.

Tested on x86_64-linux -m32/-m64, committed to trunk based on
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113431#c21 comment.

2024-03-15  Jakub Jelinek  

PR tree-optimization/113431
* gcc.dg/vect/pr113431.c: Restrict scan-tree-dump-times to
vect_hw_misalign targets.

--- gcc/testsuite/gcc.dg/vect/pr113431.c.jj 2024-01-18 08:44:33.673916652 
+0100
+++ gcc/testsuite/gcc.dg/vect/pr113431.c2024-03-15 16:46:27.328154091 
+0100
@@ -15,4 +15,4 @@ int main()
   return 0;
 }
 
-/* { dg-final { scan-tree-dump-times "optimized: basic block part vectorized" 
2 "slp1" { target vect_int } } } */
+/* { dg-final { scan-tree-dump-times "optimized: basic block part vectorized" 
2 "slp1" { target { vect_int && vect_hw_misalign } } } } */

Jakub



Re: [PATCH] aarch64: Add +lse128 architectural extension command-line flag

2024-03-15 Thread Christophe Lyon
On Fri, 15 Mar 2024 at 12:15, Victor Do Nascimento
 wrote:
>
> Given how, at present, the choice of using LSE128 atomic instructions
> by the toolchain is delegated to run-time selection in the form of
> Libatomic ifuncs, responsible for querying target support, the
> `+lse128' target architecture compile-time flag is absent from GCC.
>
> This, however, contrasts with the Binutils implementation, which gates
> LSE128 instructions behind the `+lse128' flag.  This can lead to
> problems in GCC for certain use-cases.  One such example is in the use
> of inline assembly, whereby the inability of enabling the feature in
> the command-line prevents the compiler from automatically issuing the
> necessary LSE128 `.arch' directive.
>
> This patch therefore brings GCC into alignment with LLVM and Binutils
> in adding support for the `+lse128' architectural extension flag.
>
> gcc/ChangeLog:
>
> * config/aarch64/aarch64-option-extensions.def: Add LSE128
> AARCH64_OPT_EXTENSION, adding it as a dependency for the D128
> feature.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/aarch64/lse128-flag.c: New.
> * gcc.target/aarch64/cpunative/info_23: Likewise.
> * gcc.target/aarch64/cpunative/native_cpu_23.c: Likewise.
> ---
>  gcc/config/aarch64/aarch64-option-extensions.def  |  4 +++-
>  gcc/testsuite/gcc.target/aarch64/cpunative/info_23|  8 
>  .../gcc.target/aarch64/cpunative/native_cpu_23.c  | 11 +++
>  gcc/testsuite/gcc.target/aarch64/lse128-flag.c| 10 ++
>  4 files changed, 32 insertions(+), 1 deletion(-)
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/cpunative/info_23
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_23.c
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/lse128-flag.c
>
> diff --git a/gcc/config/aarch64/aarch64-option-extensions.def 
> b/gcc/config/aarch64/aarch64-option-extensions.def
> index 1a3b91c68cf..ac54b899a06 100644
> --- a/gcc/config/aarch64/aarch64-option-extensions.def
> +++ b/gcc/config/aarch64/aarch64-option-extensions.def
> @@ -275,7 +275,9 @@ AARCH64_OPT_EXTENSION("mops", MOPS, (), (), (), "")
>
>  AARCH64_OPT_EXTENSION("cssc", CSSC, (), (), (), "cssc")
>
> -AARCH64_OPT_EXTENSION("d128", D128, (), (), (), "d128")
> +AARCH64_OPT_EXTENSION("lse128", LSE128, (LSE), (), (), "lse128")
> +
> +AARCH64_OPT_EXTENSION("d128", D128, (LSE128), (), (), "d128")
>
FWIW, looks good to me, I noticed that now we'll also have the
dependency of d128 on lse128, although d128 didn't have the (implicit)
dependency on lse before.

Christophe

>  AARCH64_OPT_EXTENSION("the", THE, (), (), (), "the")
>
> diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/info_23 
> b/gcc/testsuite/gcc.target/aarch64/cpunative/info_23
> new file mode 100644
> index 000..d77c25d2f61
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/cpunative/info_23
> @@ -0,0 +1,8 @@
> +processor  : 0
> +BogoMIPS   : 100.00
> +Features   : fp asimd evtstrm aes pmull sha1 sha2 crc32 asimddp atomics 
> lse128
> +CPU implementer: 0xfe
> +CPU architecture: 8
> +CPU variant: 0x0
> +CPU part   : 0xd08
> +CPU revision   : 2
> \ No newline at end of file
> diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_23.c 
> b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_23.c
> new file mode 100644
> index 000..8a1e235d8ab
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_23.c
> @@ -0,0 +1,11 @@
> +/* { dg-do compile { target { { aarch64*-*-linux*} && native } } } */
> +/* { dg-set-compiler-env-var GCC_CPUINFO 
> "$srcdir/gcc.target/aarch64/cpunative/info_23" } */
> +/* { dg-additional-options "-mcpu=native" } */
> +
> +int main()
> +{
> +  return 0;
> +}
> +
> +/* { dg-final { scan-assembler {\.arch 
> armv8-a\+dotprod\+crc\+crypto\+lse128} } } */
> +/* Test one where lse128 is available and so should be emitted.  */
> diff --git a/gcc/testsuite/gcc.target/aarch64/lse128-flag.c 
> b/gcc/testsuite/gcc.target/aarch64/lse128-flag.c
> new file mode 100644
> index 000..71339c3af6d
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/lse128-flag.c
> @@ -0,0 +1,10 @@
> +/* { dg-do compile { target { aarch64*-*-*} } } */
> +/* { dg-additional-options "-march=armv9.4-a+lse128" } */
> +
> +int main()
> +{
> +  return 0;
> +}
> +
> +/* { dg-final { scan-assembler {\.arch armv9\.4-a\+crc\+lse128} } } */
> +/* Test a normal looking procinfo.  */
> --
> 2.34.1
>


Re: [PATCH] c++: ICE with noexcept and local specialization [PR114114]

2024-03-15 Thread Marek Polacek
On Fri, Mar 15, 2024 at 10:35:07AM -0400, Patrick Palka wrote:
> On Tue, 5 Mar 2024, Marek Polacek wrote:
> 
> > Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?
> > 
> > -- >8 --
> > Here we ICE because we call register_local_specialization while
> > local_specializations is null, so
> > 
> >   local_specializations->put ();
> > 
> > crashes on null this.  It's null since maybe_instantiate_noexcept calls
> > push_to_top_level which creates a new scope.  Normally, I would have
> > guessed that we need a new local_specialization_stack.  But here we're
> > dealing with an operand of a noexcept, which is an unevaluated operand,
> > and those aren't registered in the hash map.  maybe_instantiate_noexcept
> > wasn't signalling that it's substituting an unevaluated operand though.
> 
> It thought it was noexcept-exprs rather than noexcept-specs that are
> unevaluated contexts?

Yes, sigh.  It would have to be noexcept(noexcept(x)).  I was looking at
cp_parser_unary_expression/RID_NOEXCEPT but that's a noexcept-expr.  So
what can we do here, set a new local_specialization_stack?  That wasn't
that straightforward when I tried.  Or maybe just

--- a/gcc/cp/pt.cc
+++ b/gcc/cp/pt.cc
@@ -15649,7 +15649,7 @@ tsubst_decl (tree t, tree args, tsubst_flags_t complain,
  {
if (DECL_LANG_SPECIFIC (r))
  DECL_TEMPLATE_INFO (r) = NULL_TREE;
-   if (!cp_unevaluated_operand)
+   if (!cp_unevaluated_operand && local_specializations)
  register_local_specialization (r, t);
  }

?



Re: [PATCH] testsuite: Turn errors back into warnings in arm/acle/cde-mve-error-2.c

2024-03-15 Thread Thiago Jung Bauermann


Hello,

"Richard Earnshaw (lists)"  writes:

> On 13/01/2024 20:46, Thiago Jung Bauermann wrote:
>> diff --git a/gcc/testsuite/gcc.target/arm/acle/cde-mve-error-2.c 
>> b/gcc/testsuite/gcc.target/arm/acle/cde-mve-error-2.c
>> index 5b7774825442..da283a06a54d 100644
>> --- a/gcc/testsuite/gcc.target/arm/acle/cde-mve-error-2.c
>> +++ b/gcc/testsuite/gcc.target/arm/acle/cde-mve-error-2.c
>> @@ -2,6 +2,7 @@
>>
>>  /* { dg-do assemble } */
>>  /* { dg-require-effective-target arm_v8_1m_main_cde_mve_fp_ok } */
>> +/* { dg-options "-fpermissive" } */
>>  /* { dg-add-options arm_v8_1m_main_cde_mve_fp } */
>>
>>  /* The error checking files are split since there are three kinds of
>> @@ -115,73 +116,73 @@ uint8x16_t test_bad_immediates (uint8x16_t n, 
>> uint8x16_t m, int someval,
>>
>>/* `imm' is of wrong type.  */
>>accum += __arm_vcx1q_u8 (0, "");/* { dg-error 
>> {argument 2 to '__builtin_arm_vcx1qv16qi' must be a constant immediate in 
>> range \[0-4095\]} } */
>> -  /* { dg-warning {passing argument 2 of '__builtin_arm_vcx1qv16qi' makes 
>> integer from pointer without a cast \[-Wint-conversion\]} "" { target *-*-* 
>> } 117 } */
>> +  /* { dg-warning {passing argument 2 of '__builtin_arm_vcx1qv16qi' makes 
>> integer from pointer without a cast \[-Wint-conversion\]} "" { target *-*-* 
>> } 118 } */
>
> Absolute line numbers are a pain, but I think we can use '.-1' (without the 
> quotes) in
> these cases to minimize the churn.

That worked, thank you for the tip.

> If that works, ok with that change.

I took the opportunity to request commit access to the GCC repo so that
I can commit the patch myself. Sorry for the delay. I'll commit it as
soon as I get it.

Thank you for the patch review! I'm including below the updated version.

--
Thiago


>From 78e70788da5ed849d7828b0219d3aa8955ad0fea Mon Sep 17 00:00:00 2001
From: Thiago Jung Bauermann 
Date: Sat, 13 Jan 2024 14:28:07 -0300
Subject: [PATCH v2] testsuite: Turn errors back into warnings in
 arm/acle/cde-mve-error-2.c
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Since commit 2c3db94d9fd ("c: Turn int-conversion warnings into
permerrors") the test fails with errors such as:

  FAIL: gcc.target/arm/acle/cde-mve-error-2.c   -O0   (test for errors, line 32)
  FAIL: gcc.target/arm/acle/cde-mve-error-2.c   -O0   (test for errors, line 33)
  FAIL: gcc.target/arm/acle/cde-mve-error-2.c   -O0   (test for errors, line 34)
  FAIL: gcc.target/arm/acle/cde-mve-error-2.c   -O0   (test for errors, line 35)
⋮
  FAIL: gcc.target/arm/acle/cde-mve-error-2.c   -O0   at line 118 (test for 
warnings, line 117)
  FAIL: gcc.target/arm/acle/cde-mve-error-2.c   -O0   (test for errors, line 
119)
  FAIL: gcc.target/arm/acle/cde-mve-error-2.c   -O0   at line 120 (test for 
warnings, line 119)
  FAIL: gcc.target/arm/acle/cde-mve-error-2.c   -O0   (test for errors, line 
121)
  FAIL: gcc.target/arm/acle/cde-mve-error-2.c   -O0   at line 122 (test for 
warnings, line 121)
  FAIL: gcc.target/arm/acle/cde-mve-error-2.c   -O0   (test for errors, line 
123)
  FAIL: gcc.target/arm/acle/cde-mve-error-2.c   -O0   at line 124 (test for 
warnings, line 123)
  FAIL: gcc.target/arm/acle/cde-mve-error-2.c   -O0   (test for errors, line 
125)
⋮
  FAIL: gcc.target/arm/acle/cde-mve-error-2.c   -O0  (test for excess errors)

There's a total of 1016 errors.  Here's a sample of the excess errors:

  Excess errors:
  /path/gcc.git/gcc/testsuite/gcc.target/arm/acle/cde-mve-error-2.c:117:31: 
error: passing argument 2 of '__builtin_arm_vcx1qv16qi' makes integer from 
pointer without a cast [-Wint-conversion]
  /path/gcc.git/gcc/testsuite/gcc.target/arm/acle/cde-mve-error-2.c:119:3: 
error: passing argument 3 of '__builtin_arm_vcx1qav16qi' makes integer from 
pointer without a cast [-Wint-conversion]
  /path/gcc.git/gcc/testsuite/gcc.target/arm/acle/cde-mve-error-2.c:121:3: 
error: passing argument 3 of '__builtin_arm_vcx2qv16qi' makes integer from 
pointer without a cast [-Wint-conversion]
  /path/gcc.git/gcc/testsuite/gcc.target/arm/acle/cde-mve-error-2.c:123:3: 
error: passing argument 3 of '__builtin_arm_vcx2qv16qi' makes integer from 
pointer without a cast [-Wint-conversion]

The test expects these messages to be warnings, not errors.  My first try
was to change it to expect them as errors instead.  This didn't work, IIUC
because the error prevents the compiler from continuing processing the file
and thus other errors which are expected by the test don't get emitted.

Therefore, add -fpermissive so that the test behaves as it did previously.
Because of the additional line in the header, the line numbers of the
expected warnings don't match anymore so replace them with ".-1" as
suggested by Richard Earnshaw.

Tested on armv8l-linux-gnueabihf.

gcc/testsuite/ChangeLog:
* gcc.target/arm/acle/cde-mve-error-2.c: Add -fpermissive.
---
 .../gcc.target/arm/acle/cde-mve-error-2.c | 63 ++-
 1 file 

Re: [PATCH] c++: ICE with noexcept and local specialization [PR114114]

2024-03-15 Thread Patrick Palka
On Tue, 5 Mar 2024, Marek Polacek wrote:

> Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?
> 
> -- >8 --
> Here we ICE because we call register_local_specialization while
> local_specializations is null, so
> 
>   local_specializations->put ();
> 
> crashes on null this.  It's null since maybe_instantiate_noexcept calls
> push_to_top_level which creates a new scope.  Normally, I would have
> guessed that we need a new local_specialization_stack.  But here we're
> dealing with an operand of a noexcept, which is an unevaluated operand,
> and those aren't registered in the hash map.  maybe_instantiate_noexcept
> wasn't signalling that it's substituting an unevaluated operand though.

It thought it was noexcept-exprs rather than noexcept-specs that are
unevaluated contexts?

> 
>   PR c++/114114
> 
> gcc/cp/ChangeLog:
> 
>   * pt.cc (maybe_instantiate_noexcept): Save/restore
>   cp_unevaluated_operand, c_inhibit_evaluation_warnings, and
>   cp_noexcept_operand around the tsubst_expr call.
> 
> gcc/testsuite/ChangeLog:
> 
>   * g++.dg/cpp0x/noexcept84.C: New test.
> ---
>  gcc/cp/pt.cc|  6 +
>  gcc/testsuite/g++.dg/cpp0x/noexcept84.C | 32 +
>  2 files changed, 38 insertions(+)
>  create mode 100644 gcc/testsuite/g++.dg/cpp0x/noexcept84.C
> 
> diff --git a/gcc/cp/pt.cc b/gcc/cp/pt.cc
> index c4bc54a8fdb..11f7d33c766 100644
> --- a/gcc/cp/pt.cc
> +++ b/gcc/cp/pt.cc
> @@ -26869,10 +26869,16 @@ maybe_instantiate_noexcept (tree fn, tsubst_flags_t 
> complain)
> if (orig_fn)
>   ++processing_template_decl;
>  
> +   ++cp_unevaluated_operand;
> +   ++c_inhibit_evaluation_warnings;
> +   ++cp_noexcept_operand;
> /* Do deferred instantiation of the noexcept-specifier.  */
> noex = tsubst_expr (DEFERRED_NOEXCEPT_PATTERN (noex),
> DEFERRED_NOEXCEPT_ARGS (noex),
> tf_warning_or_error, fn);
> +   --cp_unevaluated_operand;
> +   --c_inhibit_evaluation_warnings;
> +   --cp_noexcept_operand;
>  
> /* Build up the noexcept-specification.  */
> spec = build_noexcept_spec (noex, tf_warning_or_error);
> diff --git a/gcc/testsuite/g++.dg/cpp0x/noexcept84.C 
> b/gcc/testsuite/g++.dg/cpp0x/noexcept84.C
> new file mode 100644
> index 000..06f33264f77
> --- /dev/null
> +++ b/gcc/testsuite/g++.dg/cpp0x/noexcept84.C
> @@ -0,0 +1,32 @@
> +// PR c++/114114
> +// { dg-do compile { target c++11 } }
> +
> +template
> +constexpr void
> +test ()
> +{
> +  constexpr bool is_yes = B;
> +  struct S {
> +constexpr S() noexcept(is_yes) { }
> +  };
> +  S s;
> +}
> +
> +constexpr bool foo() { return true; }
> +
> +template
> +constexpr void
> +test2 ()
> +{
> +  constexpr T (*pfn)() = 
> +  struct S {
> +constexpr S() noexcept(pfn()) { }
> +  };
> +  S s;
> +}
> +
> +int main()
> +{
> +  test();
> +  test2();
> +}
> 
> base-commit: 8776468d9e57ace5f832c1368243a6dbce9984d5
> -- 
> 2.44.0
> 
> 



Re: [PATCH v6 4/5] Use the .ACCESS_WITH_SIZE in bound sanitizer.

2024-03-15 Thread Qing Zhao


On Mar 13, 2024, at 15:19, Qing Zhao  wrote:



On Mar 11, 2024, at 13:15, Siddhesh Poyarekar  wrote:



On 2024-02-16 14:47, Qing Zhao wrote:
gcc/c-family/ChangeLog:
* c-ubsan.cc (get_bound_from_access_with_size): New function.
(ubsan_instrument_bounds): Handle call to .ACCESS_WITH_SIZE.
gcc/testsuite/ChangeLog:
* gcc.dg/ubsan/flex-array-counted-by-bounds-2.c: New test.
* gcc.dg/ubsan/flex-array-counted-by-bounds-3.c: New test.
* gcc.dg/ubsan/flex-array-counted-by-bounds.c: New test.
---
 gcc/c-family/c-ubsan.cc   | 42 +
 .../ubsan/flex-array-counted-by-bounds-2.c| 45 ++
 .../ubsan/flex-array-counted-by-bounds-3.c| 34 ++
 .../ubsan/flex-array-counted-by-bounds.c  | 46 +++
 4 files changed, 167 insertions(+)
 create mode 100644 gcc/testsuite/gcc.dg/ubsan/flex-array-counted-by-bounds-2.c
 create mode 100644 gcc/testsuite/gcc.dg/ubsan/flex-array-counted-by-bounds-3.c
 create mode 100644 gcc/testsuite/gcc.dg/ubsan/flex-array-counted-by-bounds.c
diff --git a/gcc/c-family/c-ubsan.cc b/gcc/c-family/c-ubsan.cc
index 940982819ddf..164b29845b3a 100644
--- a/gcc/c-family/c-ubsan.cc
+++ b/gcc/c-family/c-ubsan.cc
@@ -376,6 +376,40 @@ ubsan_instrument_return (location_t loc)
   return build_call_expr_loc (loc, t, 1, build_fold_addr_expr_loc (loc, data));
 }
 +/* Get the tree that represented the number of counted_by, i.e, the maximum
+   number of the elements of the object that the call to .ACCESS_WITH_SIZE
+   points to, this number will be the bound of the corresponding array.  */
+static tree
+get_bound_from_access_with_size (tree call)
+{
+  if (!is_access_with_size_p (call))
+return NULL_TREE;
+
+  tree ref_to_size = CALL_EXPR_ARG (call, 1);
+  unsigned int type_of_size = TREE_INT_CST_LOW (CALL_EXPR_ARG (call, 2));

Again for consistency, this should probably be class_of_size.

Okay, I will update this consistently with the change relate to the 3rd 
argument.

+  tree type = TREE_TYPE (CALL_EXPR_ARG (call, 3));
+  tree size = fold_build2 (MEM_REF, type, unshare_expr (ref_to_size),
+build_int_cst (ptr_type_node, 0));
+  /* If size is negative value, treat it as zero.  */
+  if (!TYPE_UNSIGNED (type))
+  {
+tree cond = fold_build2 (LT_EXPR, boolean_type_node,
+  unshare_expr (size), build_zero_cst (type));
+size = fold_build3 (COND_EXPR, type, cond,
+ build_zero_cst (type), size);
+  }
+
+  /* Only when type_of_size is 1,i.e, the number of the elements of
+ the object type, return the size.  */
+  if (type_of_size != 1)
+return NULL_TREE;
+  else
+size = fold_convert (sizetype, size);
+
+  return size;
+}
+
+
 /* Instrument array bounds for ARRAY_REFs.  We create special builtin,
that gets expanded in the sanopt pass, and make an array dimension
of it.  ARRAY is the array, *INDEX is an index to the array.
@@ -401,6 +435,14 @@ ubsan_instrument_bounds (location_t loc, tree array, tree 
*index,
&& COMPLETE_TYPE_P (type)
&& integer_zerop (TYPE_SIZE (type)))
  bound = build_int_cst (TREE_TYPE (TYPE_MIN_VALUE (domain)), -1);
+  else if (INDIRECT_REF_P (array)
+&& is_access_with_size_p ((TREE_OPERAND (array, 0
+ {
+   bound = get_bound_from_access_with_size ((TREE_OPERAND (array, 0)));
+   bound = fold_build2 (MINUS_EXPR, TREE_TYPE (bound),
+bound,
+build_int_cst (TREE_TYPE (bound), 1));
+ }

This will wrap if bound == 0, maybe that needs to be special-cased.  And maybe 
also add a test for it below.

Will check on this to see whether a new testing is needed.

Checked, the current code can handle the case when bound==0 correctly.
I just add a new testing case for this.

thanks.

Qing

Thanks a lot for the review.

Qing

   else
  return NULL_TREE;
 }
diff --git a/gcc/testsuite/gcc.dg/ubsan/flex-array-counted-by-bounds-2.c 
b/gcc/testsuite/gcc.dg/ubsan/flex-array-counted-by-bounds-2.c
new file mode 100644
index ..148934975ee5
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/ubsan/flex-array-counted-by-bounds-2.c
@@ -0,0 +1,45 @@
+/* test the attribute counted_by and its usage in
+   bounds sanitizer combined with VLA.  */
+/* { dg-do run } */
+/* { dg-options "-fsanitize=bounds" } */
+/* { dg-output "index 11 out of bounds for type 'int 
\\\[\\\*\\\]\\\[\\\*\\\]'\[^\n\r]*(\n|\r\n|\r)" } */
+/* { dg-output "\[^\n\r]*index 20 out of bounds for type 'int 
\\\[\\\*\\\]\\\[\\\*\\\]\\\[\\\*\\\]'\[^\n\r]*(\n|\r\n|\r)" } */
+/* { dg-output "\[^\n\r]*index 11 out of bounds for type 'int 
\\\[\\\*\\\]\\\[\\\*\\\]'\[^\n\r]*(\n|\r\n|\r)" } */
+/* { dg-output "\[^\n\r]*index 10 out of bounds for type 'int 
\\\[\\\*\\\]'\[^\n\r]*(\n|\r\n|\r)" } */
+
+
+#include 
+
+void __attribute__((__noinline__)) setup_and_test_vla (int n, int m)
+{
+   struct foo {
+   int n;
+   int p[][n] __attribute__((counted_by(n)));
+   } *f;
+
+   f = (struct foo *) malloc (sizeof(struct foo) + m*sizeof(int[n]));
+   f->n = m;
+   f->p[m][n-1]=1;
+   return;
+}
+

Re: [Patch][RFC] GCN: Define ISA archs in gcn-devices.def and use it

2024-03-15 Thread Tobias Burnus

Hi Andrew,

Andrew Stubbs wrote:
This is more-or-less what I was planning to do myself, but as I want 
to include all the other features that get parametrized in gcn.cc, 
gcn.h, gcn-hsa.h, gcn-opts.h, I hadn't got around to it yet. 
Unfortunately, I think the gcn.opt and config.gcc will always need 
manually updating, but if that's all it'll be an improvement.


Well, for .opt see how nvptx does it – it actually generates an .opt file.


I don't like the idea of including AMDGPU_ISA_UNSUPPORTED;


I concur – I was initially thinking of reporting the device name 
("Unsupported %s") but I then realized that the agent returns a string 
while only for GCC generated files (→ eflag) the hexcode is used. Thus, 
I ended up not using it.


Ultimately, I want to replace many of the conditionals like 
"TARGET_CDNA2_PLUS" from the code and replace them with feature flags 
derived from a def file, or at least a header file. We've acquired too 
many places where there are unsearchable conditionals that need 
finding and fixing every time a new device comes along.
I was thinking of having more flags, but those where the only ones 
required for the two files.
I had imagined that this .def file would exist in gcc/config/gcn, but 
you've placed it in libgomp maybe it makes sense to have multiple 
such files if they contain very different data, but I had imagined one 
file and I'm not sure that the compiler definitions live in libgomp.


There is already:

gcc/config/darwin-c.cc:#include "../../libcpp/internal.h"

gcc/config/gcn/gcn-run.cc:#include 
"../../../libgomp/config/gcn/libgomp-gcn.h"


gcc/fortran/cpp.cc:#include "../../libcpp/internal.h"

gcc/fortran/trigd_fe.inc:#include "../../libgfortran/intrinsics/trigd.inc"

But there is also the reverse:

libcpp/lex.cc:#include "../gcc/config/i386/cpuid.h"

libgfortran/libgfortran.h:#include "../gcc/fortran/libgfortran.h"

lto-plugin/lto-plugin.c:#include "../gcc/lto/common.h"

If you add more items, it is probably better to have it under 
gcc/config/gcn/ - and I really prefer a single file for all.


* * *

Talking about feature sets: This would be a bit like LLVM (see below) 
but I think they have a bit too much indirections. But I do concur that 
we need to consolidate the current support – and hopefully make it 
easier to keep adding more GPU support; we seem to have already covered 
a larger chunk :-)


I also did wonder whether we should support, e.g., running a gfx1100 
code (or a gfx11-generic one) on, e.g., a gfx1103 device. Alternatively, 
we could keep the current check which requires an exact match.


BTW: I do note that looking at the feature sets in LLVM that all GFX110x 
GPUs seem to have common silicon bugs: FeatureMSAALoadDstSelBug and 
FeatureMADIntraFwdBug, while 1100 and 1102 additionally have the 
FeatureUserSGPRInit16Bug but 1101 and 1103 don't. — For some reasons, 
FeatureISAVersion11_Generic only consists of two of those bugs (it 
doesn't have FeatureMADIntraFwdBug), which doesn't seem to be that 
consistent. Maybe the workaround has issues elsewhere? If so, a generic 
-march=gfx11 might be not as useful as one might hope for.


* * *

If I look at LLVM's 
https://github.com/llvm/llvm-project/blob/main/llvm/lib/Target/AMDGPU/AMDGPU.td 
,


they first define several features – like 'FeatureUnalignedScratchAccess'.

Then they combine them like in:

def FeatureISAVersion11_Common ... [FeatureGFX11, ... 
FeatureAtomicFaddRtnInsts ...


And then they use those to map them to feature sets like:

def FeatureISAVersion11_0_Common ... 
listconcat(FeatureISAVersion11_Common.Features,

    [FeatureMSAALoadDstSelBug ...

And for gfx1103:

def FeatureISAVersion11_0_3 : FeatureSet<
  !listconcat(FeatureISAVersion11_0_Common.Features,
    [])>;

The mapping to gfx... names then happens in 
https://github.com/llvm/llvm-project/blob/main/llvm/lib/Target/AMDGPU/GCNProcessors.td 
such as:


def : ProcessorModel<"gfx1103", GFX11SpeedModel,
  FeatureISAVersion11_0_3.Features
>;

Or for the generic one, i.e.:

// [gfx1100, gfx1101, gfx1102, gfx1103, gfx1150, gfx1151]
def : ProcessorModel<"gfx11-generic", GFX11SpeedModel,
  FeatureISAVersion11_Generic.Features

LLVM also has some generic flags like the following in 
https://github.com/llvm/llvm-project/blob/main/llvm/lib/TargetParser/TargetParser.cpp


    {{"gfx1013"},   {"gfx1013"}, GK_GFX1013, 
FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK|FEATURE_WGP},


I hope that this will give some inspiration – but I assume that at least 
the initial implementation will be much shorter.


Tobias



[commit] Regenerate opt.urls

2024-03-15 Thread YunQiang Su
Fixes: acc38ff59976 ("MIPS: Add -m(no-)strict-align option")

gcc/ChangeLog:

* config/riscv/riscv.opt.urls: Regenerated.
* config/rs6000/sysv4.opt.urls: Likewise.
* config/xtensa/xtensa.opt.urls: Likewise.
---
 gcc/config/riscv/riscv.opt.urls   | 2 +-
 gcc/config/rs6000/sysv4.opt.urls  | 2 +-
 gcc/config/xtensa/xtensa.opt.urls | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/gcc/config/riscv/riscv.opt.urls b/gcc/config/riscv/riscv.opt.urls
index f40795866cf..da31820e234 100644
--- a/gcc/config/riscv/riscv.opt.urls
+++ b/gcc/config/riscv/riscv.opt.urls
@@ -44,7 +44,7 @@ UrlSuffix(gcc/RISC-V-Options.html#index-mshorten-memrefs)
 ; skipping UrlSuffix for 'mcmodel=' due to finding no URLs
 
 mstrict-align
-UrlSuffix(gcc/RISC-V-Options.html#index-mstrict-align-3)
+UrlSuffix(gcc/RISC-V-Options.html#index-mstrict-align-4)
 
 ; skipping UrlSuffix for 'mexplicit-relocs' due to finding no URLs
 
diff --git a/gcc/config/rs6000/sysv4.opt.urls b/gcc/config/rs6000/sysv4.opt.urls
index f8d58d6602c..c155cddfa36 100644
--- a/gcc/config/rs6000/sysv4.opt.urls
+++ b/gcc/config/rs6000/sysv4.opt.urls
@@ -12,7 +12,7 @@ mbit-align
 UrlSuffix(gcc/RS_002f6000-and-PowerPC-Options.html#index-mbit-align)
 
 mstrict-align
-UrlSuffix(gcc/RS_002f6000-and-PowerPC-Options.html#index-mstrict-align-4)
+UrlSuffix(gcc/RS_002f6000-and-PowerPC-Options.html#index-mstrict-align-5)
 
 mrelocatable
 UrlSuffix(gcc/RS_002f6000-and-PowerPC-Options.html#index-mrelocatable)
diff --git a/gcc/config/xtensa/xtensa.opt.urls 
b/gcc/config/xtensa/xtensa.opt.urls
index 146db23d1e3..1f193a7da0c 100644
--- a/gcc/config/xtensa/xtensa.opt.urls
+++ b/gcc/config/xtensa/xtensa.opt.urls
@@ -33,5 +33,5 @@ mabi=windowed
 UrlSuffix(gcc/Xtensa-Options.html#index-mabi_003dwindowed)
 
 mstrict-align
-UrlSuffix(gcc/Xtensa-Options.html#index-mstrict-align-5)
+UrlSuffix(gcc/Xtensa-Options.html#index-mstrict-align-6)
 
-- 
2.39.2



Re: Patch ping Re: [PATCH] icf: Reset SSA_NAME_{PTR,RANGE}_INFO in successfully merged functions [PR113907]

2024-03-15 Thread Jan Hubicka
> > +  if (POINTER_TYPE_P (TREE_TYPE (t1)))
> > +{
> > +  if (SSA_NAME_PTR_INFO (t1))
> > +   {
> > + if (!SSA_NAME_PTR_INFO (t2)
> > + || SSA_NAME_PTR_INFO (t1)->align != SSA_NAME_PTR_INFO (t2)->align
> > + || SSA_NAME_PTR_INFO (t1)->misalign != SSA_NAME_PTR_INFO 
> > (t2)->misalign)
> 
> You want to compare SSA_NAME_PTR_INFO ()->pt.zero as well I think since
> we store pointer non-null-ness from VRP there.
> 
> You are not comparing the actual points-to info - but of course I'd
> expect that to differ as soon as local decls are involved.  Still looks
> like a hole to me.

I convinced myself that we don't need to do that since we recompute PTA
after IPA stage: unlike value ranges it is thrown away and recomputed
rather then just refined from prevoius solution.  But indeed if parts
are persistent, we need to compare it (and should stream it to LTO I
guess).

Honza


Re: [Patch][RFC] GCN: Define ISA archs in gcn-devices.def and use it

2024-03-15 Thread Andrew Stubbs

On 15/03/2024 12:21, Tobias Burnus wrote:
Given the large number of AMD GPU ISAs and the number of files which 
have to be adapted, I wonder whether it makes sense to consolidate this 
a bit, especially in the light that we may want to support more in the 
future.


Besides using some macros, I also improved the diagnostic if the object 
code couldn't be recognized (shouldn't happen) or if the GPU is 
unsupported (likely; it now prints the GPU string). I was initially 
thinking of resolving the arch encoded in the eflag to a string, but as 
this is about GCC-generated code, it seemed to be unlikely of much use. 
[It should that rare that we might also go back to the static string 
instead of outputting the hex value of the eflag.]


Note: I only modified mkoffload.cc and plugin-gcn.c, but with some 
tweaks it could also be used for other files in gcc/config/gcn/.


If you add a new ISA, you still need to update plugin-gcn.c's 
max_isa_vgprs and the xnack/sram-ecc handling in mkoffload.c's main, but 
that should be all for those two files.


Thoughts?


This is more-or-less what I was planning to do myself, but as I want to 
include all the other features that get parametrized in gcn.cc, gcn.h, 
gcn-hsa.h, gcn-opts.h, I hadn't got around to it yet.  Unfortunately, I 
think the gcn.opt and config.gcc will always need manually updating, but 
if that's all it'll be an improvement.


I don't like the idea of including AMDGPU_ISA_UNSUPPORTED; that list is 
going to be permanently out of date, and even if we maintain it 
fastidiously last year's release isn't going to have the updated list in 
the wild. I think it's not actually active in this patch in any case.


Instead of AMDGPU_ISA, I think "AMDGPU_ELF" makes more sense. The ISA is 
"CDNA2" or "RDNA3", etc., and the compiler needs to know about that.


Ultimately, I want to replace many of the conditionals like 
"TARGET_CDNA2_PLUS" from the code and replace them with feature flags 
derived from a def file, or at least a header file. We've acquired too 
many places where there are unsearchable conditionals that need finding 
and fixing every time a new device comes along.


I had imagined that this .def file would exist in gcc/config/gcn, but 
you've placed it in libgomp maybe it makes sense to have multiple 
such files if they contain very different data, but I had imagined one 
file and I'm not sure that the compiler definitions live in libgomp.



Tobias

PS: I think the patch is fine and builds, but I have not tested it on an 
AMD GPU machine, yet.


PPS: For using for other files, see also in config/nvptx which uses 
nvptx-sm.def to generate several files.




Andrew


Re: RFC: New mechanism for hard reg operands to inline asm

2024-03-15 Thread Stefan Schulze Frielinghaus
On Fri, Jun 04, 2021 at 06:02:27PM +, Andreas Krebbel via Gcc wrote:
> Hi,
> 
> I wonder if we could replace the register asm construct for
> inline assemblies with something a bit nicer and more obvious.
> E.g. turning this (real world example from IBM Z kernel code):
> 
> int diag8_response(int cmdlen, char *response, int *rlen)
> {
> register unsigned long reg2 asm ("2") = (addr_t) cpcmd_buf;
> register unsigned long reg3 asm ("3") = (addr_t) response;
> register unsigned long reg4 asm ("4") = cmdlen | 0x4000L;
> register unsigned long reg5 asm ("5") = *rlen; /* <-- */
> asm volatile(
> "   diag%2,%0,0x8\n"
> "   brc 8,1f\n"
> "   agr %1,%4\n"
> "1:\n"
> : "+d" (reg4), "+d" (reg5)
> : "d" (reg2), "d" (reg3), "d" (*rlen): "cc");
> *rlen = reg5;
> return reg4;
> }
> 
> into this:
> 
> int diag8_response(int cmdlen, char *response, int *rlen)
> {
> unsigned long len = cmdlen | 0x4000L;
> 
> asm volatile(
> "   diag%2,%0,0x8\n"
> "   brc 8,1f\n"
> "   agr %1,%4\n"
> "1:\n"
> : "+{r4}" (len), "+{r5}" (*rlen)
> : "{r2}" ((addr_t)cpcmd_buf), "{r3}" ((addr_t)response), "d" 
> (*rlen): "cc");
> return len;
> }
> 
> Apart from being much easier to read because the hard regs become part
> of the inline assembly it solves also a couple of other issues:
> 
> - function calls might clobber register asm variables see BZ100908
> - the constraints for the register asm operands are superfluous
> - one register asm variable cannot be used for 2 different inline
>   assemblies if the value is expected in different hard regs
> 
> I've started with a hackish implementation for IBM Z using the
> TARGET_MD_ASM_ADJUST hook and let all the places parsing constraints
> skip over the {} parts.  But perhaps it would be useful to make this a
> generic mechanism for all targets?!
> 
> Andrea

Hi all,

I would like to resurrect this topic
https://gcc.gnu.org/pipermail/gcc/2021-June/236269.html and have been
coming up with a first implementation in order to discuss this further.

Basically, I see two ways to implement this.  First is by letting LRA
assign the registers and the second one by introducing extra moves just
before/after asm statements.  Currently I went for the latter and emit
extra moves during expand into hard regs as specified by the
input/output constraints.

Before going forward I would like to get some feedback whether this approach
makes sense to you at all or whether you see some show stoppers.  I was
wondering whether my current approach is robust enough in the sense that no
other pass could potentially remove the extra moves I introduced before.
In particular I was first worried about code motion.  Initially I thought I
have to make use not only of hard regs but hard regs which are flagged as
register-asms in order to prevent optimizations to fiddly around with those
moves.  However, after some more investigation I tend to conclude that this is
not necessary.  Any thoughts about this approach?

With the current approach I can at least handle cases like:

int __attribute__ ((noipa))
foo (int x) { return x; }

int test (int x)
{
  asm ("foo %0,%1\n" :: "{r3}" (foo (x + 1)), "{r2}" (x));
  return x;
}

Note, this is written with the s390 ABI in mind where the first int argument
and return value are passed in register r2.  The point here is that r2 needs to
be altered and restored multiple times until we reach } of function test().
Luckily, during expand we get all this basically for free.

This brings me to the general question what should be allowed and what not?
Evaluation order of input expressions is probably unspecified similar to
function arguments.  However, what about this one:

int test (int x)
{
  register int y asm ("r5") = x + 1;
  asm ("foo %0,%1\n" : "={r4}" (y) : "{r1}" (y));
  return y;
}

IMHO the input is just fine but the output constraint is misleading and it is
not obvious in which register variable y resides after the asm statement.
With my current implementation, were I don't bail out, it is register r4
contrary to the decl.  Interestingly, the other way around where one register
is "aliased" by multiple variables is accepted by vanilla GCC:

int foo (int x, int y)
{
  register int a asm ("r1") = x;
  register int b asm ("r1") = y;
  return a + b;
}

Though, probably not intentionally.

Cheers,
Stefan


Re: [PATCH] vect: Use xor to invert oversized vector masks

2024-03-15 Thread Richard Biener
On Fri, Mar 15, 2024 at 12:24 PM Andrew Stubbs  wrote:
>
> On 15/03/2024 07:35, Richard Biener wrote:
> > On Fri, Mar 15, 2024 at 4:35 AM Hongtao Liu  wrote:
> >>
> >> On Thu, Mar 14, 2024 at 11:42 PM Andrew Stubbs  wrote:
> >>>
> >>> Don't enable excess lanes when inverting vector bit-masks smaller than the
> >>> integer mode.  This is yet another case of wrong-code due to mishandling
> >>> of oversized bitmasks.
> >>>
> >>> This issue shows up in vect/tsvc/vect-tsvc-s278.c and
> >>> vect/tsvc/vect-tsvc-s279.c if I set the preferred vector size to V32
> >>> (down from V64) on amdgcn.
> >>>
> >>> OK for mainline?
> >>>
> >>> Andrew
> >>>
> >>> gcc/ChangeLog:
> >>>
> >>>  * expr.cc (expand_expr_real_2): Use xor to invert vector masks.
> >>> ---
> >>>   gcc/expr.cc | 11 +++
> >>>   1 file changed, 11 insertions(+)
> >>>
> >>> diff --git a/gcc/expr.cc b/gcc/expr.cc
> >>> index 403eeaa108e4..3540327d879e 100644
> >>> --- a/gcc/expr.cc
> >>> +++ b/gcc/expr.cc
> >>> @@ -10497,6 +10497,17 @@ expand_expr_real_2 (sepops ops, rtx target, 
> >>> machine_mode tmode,
> >>> immed_wide_int_const (mask, int_mode),
> >>> target, 1, OPTAB_LIB_WIDEN);
> >>>  }
> >>> +  /* If it's a vector mask don't enable excess bits.  */
> >>> +  else if (VECTOR_BOOLEAN_TYPE_P (type)
> >>> +  && SCALAR_INT_MODE_P (mode)
> >>> +  && maybe_ne (GET_MODE_PRECISION (mode),
> >>> +   TYPE_VECTOR_SUBPARTS (type).to_constant ()))
> >>> +   {
> >>> + auto nunits = TYPE_VECTOR_SUBPARTS (type).to_constant ();
> >>> + temp = expand_binop (mode, xor_optab, op0,
> >>> +  GEN_INT ((HOST_WIDE_INT_1U << nunits) - 1),
> >>> +  target, true, OPTAB_WIDEN);
> >>> +   }
> >> Not review, just curious, should the issue be fixed by the commit in 
> >> PR113576.
> >> Also wonder besides cbranch, excess land bits also matter?
> >> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113576#c35
> >
> > Yes, you patch BIT_NOT but we decided to patch final compares.  Is it that
> > we need to fixup every mask use in a .COND_* expansion as well?  If so
> > we should do it there.
>
> I thought that the "not" to "xor" change was nice and there was already
> code there for fixing bitfields, but OK, I take your point.
>
> The .COND_* statements are handled as internal function calls that are
> expanded directly via the optab with no special cases for different call
> types. This is because the "expand_cond_*_optab_fn" functions just map
> straight to "expand_direct_optab_fn" would that be the right place
> to insert a special case handler to insert "and" operations?

Yes, I think in expand_fn_using_insn where we handle the "undefined" input
operands we want to handle the vector bool operands as well, masking
rhs_rtx accordingly.

Richard.

> Andrew


[PATCH] doc: Improve punctuation and grammar in -fdiagnostics-format docs

2024-03-15 Thread Jonathan Wakely
OK for trunk?

-- >8 --

The hyphen can be misunderstood to mean "emitted to -" i.e. stdout.
Refer to both forms by name, rather than using "the former" for one and
referring to the other by name.

gcc/ChangeLog:

* doc/invoke.texi (Diagnostic Message Formatting Options):
Replace hyphen with a new sentence. Replace "the former" with
the actual value.
---
 gcc/doc/invoke.texi | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 85c938d4a14..d850b5fcdcc 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -5737,8 +5737,9 @@ named @file{@var{source}.sarif}, respectively.
 
 The @samp{json} format is a synonym for @samp{json-stderr}.
 The @samp{json-stderr} and @samp{json-file} formats are identical, apart from
-where the JSON is emitted to - with the former, the JSON is emitted to stderr,
-whereas with @samp{json-file} it is written to @file{@var{source}.gcc.json}.
+where the JSON is emitted to.  With @samp{json-stderr}, the JSON is emitted
+to stderr, whereas with @samp{json-file} it is written to
+@file{@var{source}.gcc.json}.
 
 The emitted JSON consists of a top-level JSON array containing JSON objects
 representing the diagnostics.
-- 
2.44.0



[Patch][RFC] GCN: Define ISA archs in gcn-devices.def and use it

2024-03-15 Thread Tobias Burnus
Given the large number of AMD GPU ISAs and the number of files which 
have to be adapted, I wonder whether it makes sense to consolidate this 
a bit, especially in the light that we may want to support more in the 
future.


Besides using some macros, I also improved the diagnostic if the object 
code couldn't be recognized (shouldn't happen) or if the GPU is 
unsupported (likely; it now prints the GPU string). I was initially 
thinking of resolving the arch encoded in the eflag to a string, but as 
this is about GCC-generated code, it seemed to be unlikely of much use. 
[It should that rare that we might also go back to the static string 
instead of outputting the hex value of the eflag.]


Note: I only modified mkoffload.cc and plugin-gcn.c, but with some 
tweaks it could also be used for other files in gcc/config/gcn/.


If you add a new ISA, you still need to update plugin-gcn.c's 
max_isa_vgprs and the xnack/sram-ecc handling in mkoffload.c's main, but 
that should be all for those two files.


Thoughts?

Tobias

PS: I think the patch is fine and builds, but I have not tested it on an 
AMD GPU machine, yet.


PPS: For using for other files, see also in config/nvptx which uses 
nvptx-sm.def to generate several files.
GCN: Define ISA archs in gcn-devices.def and use it

Adding new a GCN ISAs requires to update many files, making it more
likely to miss a file; by adding the gcn-devices.def file and using
it in config/gcn/mkoffload.cc and libgomp/plugin/plugin-gcn.c, it
reduces the duplications.

gcc/ChangeLog:

	* config/gcn/mkoffload.cc (EF_AMDGPU_MACH_AMDGCN_...): Replace
	explicit #define by an enum created from gcn-devices.def.
	(main): Use gcn-devices.def definitions for -march=gfx.* string
	parsing.

libgomp/ChangeLog:

	* plugin/gcn-devices.def: New file.
	* plugin/plugin-gcn.c (gcn_..._s): Remove.
	(enum EF_AMDGPU_MACH): Generate EF_AMDGPU_MACH_AMDGCN_...
	using gcn-devices.def.
	(isa_hsa_name, isa_gcc_name, isa_code): Use gcn-devices.def
	to handle the ISAs.
	(max_isa_vgprs): Update used enum name (GFX90a -> GFX90A).
	(isa_matches_agent, GOMP_OFFLOAD_init_device): Be more verbose
	in case of an unsupported ISA.

 gcc/config/gcn/mkoffload.cc|  42 ++-
 libgomp/plugin/gcn-devices.def |  62 ++
 libgomp/plugin/plugin-gcn.c| 118 +++--
 3 files changed, 119 insertions(+), 103 deletions(-)

diff --git a/gcc/config/gcn/mkoffload.cc b/gcc/config/gcn/mkoffload.cc
index fe443abba21..081110d7030 100644
--- a/gcc/config/gcn/mkoffload.cc
+++ b/gcc/config/gcn/mkoffload.cc
@@ -47,20 +47,14 @@
 #undef  ELFABIVERSION_AMDGPU_HSA_V4
 #define ELFABIVERSION_AMDGPU_HSA_V4 2
 
-#undef  EF_AMDGPU_MACH_AMDGCN_GFX803
-#define EF_AMDGPU_MACH_AMDGCN_GFX803 0x2a
-#undef  EF_AMDGPU_MACH_AMDGCN_GFX900
-#define EF_AMDGPU_MACH_AMDGCN_GFX900 0x2c
-#undef  EF_AMDGPU_MACH_AMDGCN_GFX906
-#define EF_AMDGPU_MACH_AMDGCN_GFX906 0x2f
-#undef  EF_AMDGPU_MACH_AMDGCN_GFX908
-#define EF_AMDGPU_MACH_AMDGCN_GFX908 0x30
-#undef  EF_AMDGPU_MACH_AMDGCN_GFX90a
-#define EF_AMDGPU_MACH_AMDGCN_GFX90a 0x3f
-#undef  EF_AMDGPU_MACH_AMDGCN_GFX1030
-#define EF_AMDGPU_MACH_AMDGCN_GFX1030 0x36
-#undef  EF_AMDGPU_MACH_AMDGCN_GFX1100
-#define EF_AMDGPU_MACH_AMDGCN_GFX1100 0x41
+/* Use an enum as macros cannot define macros and
+   assume that EF_AMDGPU_MACH_AMDGCN_... is not #defined.  */
+enum {
+#define AMDGPU_ISA(suffix, str, val) \
+ EF_AMDGPU_MACH_AMDGCN_ ## suffix = val,
+#include "../libgomp/plugin/gcn-devices.def"
+#undef AMDGPU_ISA
+};
 
 #define EF_AMDGPU_FEATURE_XNACK_V4	0x300  /* Mask.  */
 #define EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4	0x000
@@ -959,18 +953,12 @@ main (int argc, char **argv)
 	dumppfx = argv[++i];
   else if (strcmp (argv[i], "-march=fiji") == 0)
 	elf_arch = EF_AMDGPU_MACH_AMDGCN_GFX803;
-  else if (strcmp (argv[i], "-march=gfx900") == 0)
-	elf_arch = EF_AMDGPU_MACH_AMDGCN_GFX900;
-  else if (strcmp (argv[i], "-march=gfx906") == 0)
-	elf_arch = EF_AMDGPU_MACH_AMDGCN_GFX906;
-  else if (strcmp (argv[i], "-march=gfx908") == 0)
-	elf_arch = EF_AMDGPU_MACH_AMDGCN_GFX908;
-  else if (strcmp (argv[i], "-march=gfx90a") == 0)
-	elf_arch = EF_AMDGPU_MACH_AMDGCN_GFX90a;
-  else if (strcmp (argv[i], "-march=gfx1030") == 0)
-	elf_arch = EF_AMDGPU_MACH_AMDGCN_GFX1030;
-  else if (strcmp (argv[i], "-march=gfx1100") == 0)
-	elf_arch = EF_AMDGPU_MACH_AMDGCN_GFX1100;
+#define AMDGPU_ISA(suffix, str, val) \
+  else if (strcmp (argv[i], "-march=" str) == 0) \
+	elf_arch = EF_AMDGPU_MACH_AMDGCN_ ## suffix;
+#include "../libgomp/plugin/gcn-devices.def"
+#undef AMDGPU_ISA
+
 #define STR "-mstack-size="
   else if (startswith (argv[i], STR))
 	gcn_stack_size = atoi (argv[i] + strlen (STR));
@@ -1029,7 +1017,7 @@ main (int argc, char **argv)
   if (TEST_SRAM_ECC_UNSET (elf_flags))
 	SET_SRAM_ECC_ANY (elf_flags);
   break;
-case EF_AMDGPU_MACH_AMDGCN_GFX90a:
+case EF_AMDGPU_MACH_AMDGCN_GFX90A:
   if (TEST_XNACK_UNSET 

Re: [PATCH] vect: Use xor to invert oversized vector masks

2024-03-15 Thread Andrew Stubbs

On 15/03/2024 07:35, Richard Biener wrote:

On Fri, Mar 15, 2024 at 4:35 AM Hongtao Liu  wrote:


On Thu, Mar 14, 2024 at 11:42 PM Andrew Stubbs  wrote:


Don't enable excess lanes when inverting vector bit-masks smaller than the
integer mode.  This is yet another case of wrong-code due to mishandling
of oversized bitmasks.

This issue shows up in vect/tsvc/vect-tsvc-s278.c and
vect/tsvc/vect-tsvc-s279.c if I set the preferred vector size to V32
(down from V64) on amdgcn.

OK for mainline?

Andrew

gcc/ChangeLog:

 * expr.cc (expand_expr_real_2): Use xor to invert vector masks.
---
  gcc/expr.cc | 11 +++
  1 file changed, 11 insertions(+)

diff --git a/gcc/expr.cc b/gcc/expr.cc
index 403eeaa108e4..3540327d879e 100644
--- a/gcc/expr.cc
+++ b/gcc/expr.cc
@@ -10497,6 +10497,17 @@ expand_expr_real_2 (sepops ops, rtx target, 
machine_mode tmode,
immed_wide_int_const (mask, int_mode),
target, 1, OPTAB_LIB_WIDEN);
 }
+  /* If it's a vector mask don't enable excess bits.  */
+  else if (VECTOR_BOOLEAN_TYPE_P (type)
+  && SCALAR_INT_MODE_P (mode)
+  && maybe_ne (GET_MODE_PRECISION (mode),
+   TYPE_VECTOR_SUBPARTS (type).to_constant ()))
+   {
+ auto nunits = TYPE_VECTOR_SUBPARTS (type).to_constant ();
+ temp = expand_binop (mode, xor_optab, op0,
+  GEN_INT ((HOST_WIDE_INT_1U << nunits) - 1),
+  target, true, OPTAB_WIDEN);
+   }

Not review, just curious, should the issue be fixed by the commit in PR113576.
Also wonder besides cbranch, excess land bits also matter?
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113576#c35


Yes, you patch BIT_NOT but we decided to patch final compares.  Is it that
we need to fixup every mask use in a .COND_* expansion as well?  If so
we should do it there.


I thought that the "not" to "xor" change was nice and there was already 
code there for fixing bitfields, but OK, I take your point.


The .COND_* statements are handled as internal function calls that are 
expanded directly via the optab with no special cases for different call 
types. This is because the "expand_cond_*_optab_fn" functions just map 
straight to "expand_direct_optab_fn" would that be the right place 
to insert a special case handler to insert "and" operations?


Andrew


Re: [PATCH, OpenACC 2.7, v2] Adjust acc_map_data/acc_unmap_data interaction with reference counters

2024-03-15 Thread Thomas Schwinge
Hi Chung-Lin!

I realized: please add "PR libgomp/92840" to the Git commit log, as your
changes are directly a continuation of my earlier changes.


On 2024-03-05T01:18:28+0900, Chung-Lin Tang  wrote:
> On 2023/10/31 11:06 PM, Thomas Schwinge wrote:
>>> @@ -691,15 +694,27 @@ goacc_exit_datum_1 (struct gomp_device_descr 
>>> *acc_dev, void *h, size_t s,
>>>
>>>if (finalize)
>>>  {
>>> -  if (n->refcount != REFCOUNT_INFINITY)
>>> +  if (n->refcount != REFCOUNT_INFINITY
>>> +   && n->refcount != REFCOUNT_ACC_MAP_DATA)
>>>   n->refcount -= n->dynamic_refcount;
>>> -  n->dynamic_refcount = 0;
>>> +
>>> +  if (n->refcount == REFCOUNT_ACC_MAP_DATA)
>>> + /* Mappings created by acc_map_data are returned to initial
>>> +dynamic_refcount of 1. Can only be deleted by acc_unmap_data.  */
>>> + n->dynamic_refcount = 1;
>>> +  else
>>> + n->dynamic_refcount = 0;
>>>  }
>>>else if (n->dynamic_refcount)
>>>  {
>>> -  if (n->refcount != REFCOUNT_INFINITY)
>>> +  if (n->refcount != REFCOUNT_INFINITY
>>> +   && n->refcount != REFCOUNT_ACC_MAP_DATA)
>>>   n->refcount--;
>>> -  n->dynamic_refcount--;
>>> +
>>> +  /* When mapping is created by acc_map_data, dynamic_refcount must be
>>> +  maintained at >= 1.  */
>>> +  if (n->refcount != REFCOUNT_ACC_MAP_DATA || n->dynamic_refcount > 1)
>>> + n->dynamic_refcount--;
>>>  }
>> 
>> I'd find those changes more concise to understand if done the following
>> way: restore both 'if (finalize)' and 'else if (n->dynamic_refcount)'
>> branches to their original form (other than excluding 'n->refcount'
>> modification for 'REFCOUNT_ACC_MAP_DATA', as you have), and instead then
>> afterwards (that is, here), do:
>> 
>> /* Mappings created by 'acc_map_data' can only be deleted by 
>> 'acc_unmap_data'.  */
>> if (n->refcount == REFCOUNT_ACC_MAP_DATA
>> && n->dynamic_refcount == 0)
>>   n->dynamic_refcount = 1;
>> 
>> That does have the same semantics, please verify?
>
> This does not have the same semantics, because if the original 
> finalize/n->dynamic_refcount
> cases are left unmodified, they will treat REFCOUNT_ACC_MAP_DATA like a 
> normal refcount and
> decrement n->refcount, and handling n->refcount == REFCOUNT_ACC_MAP_DATA 
> later won't work either.

That's why I said: "restore [...] excluding 'n->refcount' modification
for 'REFCOUNT_ACC_MAP_DATA', as you have [...]".  Sorry if that was
unclear.

> I have however, adjusted the nesting of cases to split the 'n->refcount == 
> REFCOUNT_ACC_MAP_DATA'
> case away. This should be easier to read.

Thanks, that easier to follow indeed.  I had meant (on top of your v2):

--- a/libgomp/oacc-mem.c
+++ b/libgomp/oacc-mem.c
@@ -686,35 +686,27 @@ goacc_exit_datum_1 (struct gomp_device_descr 
*acc_dev, void *h, size_t s,
   gomp_fatal ("Dynamic reference counting assert fail\n");
 }
 
-  if (n->refcount == REFCOUNT_ACC_MAP_DATA)
+  if (finalize)
 {
-  if (finalize)
-   {
- /* Mappings created by acc_map_data are returned to initial
-dynamic_refcount of 1. Can only be deleted by acc_unmap_data.  */
- n->dynamic_refcount = 1;
-   }
-  else if (n->dynamic_refcount)
-   {
- /* When mapping is created by acc_map_data, dynamic_refcount must be
-maintained at >= 1.  */
- if (n->dynamic_refcount > 1)
-   n->dynamic_refcount--;
-   }
-}
-  else if (finalize)
-{
-  if (n->refcount != REFCOUNT_INFINITY)
+  if (n->refcount != REFCOUNT_INFINITY
+ && n->refcount != REFCOUNT_ACC_MAP_DATA)
n->refcount -= n->dynamic_refcount;
   n->dynamic_refcount = 0;
 }
   else if (n->dynamic_refcount)
 {
-  if (n->refcount != REFCOUNT_INFINITY)
+  if (n->refcount != REFCOUNT_INFINITY
+ && n->refcount != REFCOUNT_ACC_MAP_DATA)
n->refcount--;
   n->dynamic_refcount--;
 }
 
+  /* Mappings created by 'acc_map_data' may only be deleted by
+ 'acc_unmap_data'.  */
+  if (n->refcount == REFCOUNT_ACC_MAP_DATA
+  && n->dynamic_refcount == 0)
+n->dynamic_refcount = 1;
+
   if (n->refcount == 0)
 {
   bool copyout = (kind == GOMP_MAP_FROM

..., which really should have the same semantics?  No strong opinion on
which of the two variants you now chose.


>>> @@ -480,7 +480,9 @@ gomp_increment_refcount (splay_tree_key k, htab_t 
>>> *refcount_set)
>>>
>>>uintptr_t *refcount_ptr = >refcount;
>>>
>>> -  if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount))
>>> +  if (k->refcount == REFCOUNT_ACC_MAP_DATA)
>>> +refcount_ptr = >dynamic_refcount;
>>> +  else if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount))
>>>  refcount_ptr = >structelem_refcount;
>>>else if (REFCOUNT_STRUCTELEM_P (k->refcount))
>>>  refcount_ptr = 

[committed] lower-subreg, edit-context: Fix comment typos

2024-03-15 Thread Jakub Jelinek
Hi!

When backporting r14-9315 to 13 branch, I've noticed I've missed
one letter in a comment.  And grepping for similar issues I found
one word with too many.

Committed to trunk as obvious.

2024-03-15  Jakub Jelinek  

* lower-subreg.cc (resolve_simple_move): Fix comment typo,
betwee -> between.
* edit-context.cc (class line_event): Fix comment typo,
betweeen -> between.

--- gcc/lower-subreg.cc.jj  2024-03-05 10:32:32.502954822 +0100
+++ gcc/lower-subreg.cc 2024-03-15 12:18:03.189527341 +0100
@@ -933,7 +933,7 @@ resolve_simple_move (rtx set, rtx_insn *
  if (reg_overlap_mentioned_p (XVECEXP (dest, 0, 0),
   XVECEXP (src, 0, 1)))
{
- /* If there is overlap betwee the first half of the
+ /* If there is overlap between the first half of the
 destination and what will be stored to the second one,
 use a temporary pseudo.  See PR114211.  */
  rtx tem = gen_reg_rtx (GET_MODE (XVECEXP (src, 0, 1)));
--- gcc/edit-context.cc.jj  2024-01-03 11:51:33.746700448 +0100
+++ gcc/edit-context.cc 2024-03-15 12:18:21.702277391 +0100
@@ -129,7 +129,7 @@ class added_line
 };
 
 /* Class for representing edit events that have occurred on one line of
-   one file: the replacement of some text betweeen some columns
+   one file: the replacement of some text between some columns
on the line.
 
Subsequent events will need their columns adjusting if they're

Jakub



[PATCH] aarch64: Add +lse128 architectural extension command-line flag

2024-03-15 Thread Victor Do Nascimento
Given how, at present, the choice of using LSE128 atomic instructions
by the toolchain is delegated to run-time selection in the form of
Libatomic ifuncs, responsible for querying target support, the
`+lse128' target architecture compile-time flag is absent from GCC.

This, however, contrasts with the Binutils implementation, which gates
LSE128 instructions behind the `+lse128' flag.  This can lead to
problems in GCC for certain use-cases.  One such example is in the use
of inline assembly, whereby the inability of enabling the feature in
the command-line prevents the compiler from automatically issuing the
necessary LSE128 `.arch' directive.

This patch therefore brings GCC into alignment with LLVM and Binutils
in adding support for the `+lse128' architectural extension flag.

gcc/ChangeLog:

* config/aarch64/aarch64-option-extensions.def: Add LSE128
AARCH64_OPT_EXTENSION, adding it as a dependency for the D128
feature.

gcc/testsuite/ChangeLog:

* gcc.target/aarch64/lse128-flag.c: New.
* gcc.target/aarch64/cpunative/info_23: Likewise.
* gcc.target/aarch64/cpunative/native_cpu_23.c: Likewise.
---
 gcc/config/aarch64/aarch64-option-extensions.def  |  4 +++-
 gcc/testsuite/gcc.target/aarch64/cpunative/info_23|  8 
 .../gcc.target/aarch64/cpunative/native_cpu_23.c  | 11 +++
 gcc/testsuite/gcc.target/aarch64/lse128-flag.c| 10 ++
 4 files changed, 32 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/cpunative/info_23
 create mode 100644 gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_23.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/lse128-flag.c

diff --git a/gcc/config/aarch64/aarch64-option-extensions.def 
b/gcc/config/aarch64/aarch64-option-extensions.def
index 1a3b91c68cf..ac54b899a06 100644
--- a/gcc/config/aarch64/aarch64-option-extensions.def
+++ b/gcc/config/aarch64/aarch64-option-extensions.def
@@ -275,7 +275,9 @@ AARCH64_OPT_EXTENSION("mops", MOPS, (), (), (), "")
 
 AARCH64_OPT_EXTENSION("cssc", CSSC, (), (), (), "cssc")
 
-AARCH64_OPT_EXTENSION("d128", D128, (), (), (), "d128")
+AARCH64_OPT_EXTENSION("lse128", LSE128, (LSE), (), (), "lse128")
+
+AARCH64_OPT_EXTENSION("d128", D128, (LSE128), (), (), "d128")
 
 AARCH64_OPT_EXTENSION("the", THE, (), (), (), "the")
 
diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/info_23 
b/gcc/testsuite/gcc.target/aarch64/cpunative/info_23
new file mode 100644
index 000..d77c25d2f61
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/cpunative/info_23
@@ -0,0 +1,8 @@
+processor  : 0
+BogoMIPS   : 100.00
+Features   : fp asimd evtstrm aes pmull sha1 sha2 crc32 asimddp atomics 
lse128
+CPU implementer: 0xfe
+CPU architecture: 8
+CPU variant: 0x0
+CPU part   : 0xd08
+CPU revision   : 2
\ No newline at end of file
diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_23.c 
b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_23.c
new file mode 100644
index 000..8a1e235d8ab
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_23.c
@@ -0,0 +1,11 @@
+/* { dg-do compile { target { { aarch64*-*-linux*} && native } } } */
+/* { dg-set-compiler-env-var GCC_CPUINFO 
"$srcdir/gcc.target/aarch64/cpunative/info_23" } */
+/* { dg-additional-options "-mcpu=native" } */
+
+int main()
+{
+  return 0;
+}
+
+/* { dg-final { scan-assembler {\.arch armv8-a\+dotprod\+crc\+crypto\+lse128} 
} } */
+/* Test one where lse128 is available and so should be emitted.  */
diff --git a/gcc/testsuite/gcc.target/aarch64/lse128-flag.c 
b/gcc/testsuite/gcc.target/aarch64/lse128-flag.c
new file mode 100644
index 000..71339c3af6d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/lse128-flag.c
@@ -0,0 +1,10 @@
+/* { dg-do compile { target { aarch64*-*-*} } } */
+/* { dg-additional-options "-march=armv9.4-a+lse128" } */
+
+int main()
+{
+  return 0;
+}
+
+/* { dg-final { scan-assembler {\.arch armv9\.4-a\+crc\+lse128} } } */
+/* Test a normal looking procinfo.  */
-- 
2.34.1



[PING^3] Re: [PATCH] analyzer: deal with -fshort-enums

2024-03-15 Thread Torbjorn SVENSSON

Ping!

Kind regards,
Torbjörn

On 2024-03-08 10:14, Torbjorn SVENSSON wrote:

Ping!

Kind regards,
Torbjörn

On 2024-02-22 09:51, Torbjorn SVENSSON wrote:

Ping!

Kind regards,
Torbjörn

On 2024-02-07 17:21, Torbjorn SVENSSON wrote:

Hi,

Is it okay to backport 3cbab07b08d2f3a3ed34b6ec12e67727c59d285c to 
releases/gcc-13?


Without this backport, I see these failures on arm-none-eabi:

FAIL: gcc.dg/analyzer/switch-enum-1.c  (test for bogus messages, line 
26)
FAIL: gcc.dg/analyzer/switch-enum-1.c  (test for bogus messages, line 
44)
FAIL: gcc.dg/analyzer/switch-enum-2.c  (test for bogus messages, line 
34)
FAIL: gcc.dg/analyzer/switch-enum-2.c  (test for bogus messages, line 
52)
FAIL: gcc.dg/analyzer/torture/switch-enum-pr105273-doom-p_floor.c -O0 
  (test for bogus messages, line 82)
FAIL: gcc.dg/analyzer/torture/switch-enum-pr105273-doom-p_maputl.c 
-O0    (test for bogus messages, line 83)


Kind regards,
Torbjörn


On 2023-12-06 23:22, David Malcolm wrote:

On Wed, 2023-12-06 at 02:31 -0300, Alexandre Oliva wrote:

On Nov 22, 2023, Alexandre Oliva  wrote:


Ah, nice, that's a great idea, I wish I'd thought of that!  Will
do.


Sorry it took me so long, here it is.  I added two tests, so that,
regardless of the defaults, we get both circumstances tested, without
repetition.

Regstrapped on x86_64-linux-gnu.  Also tested on arm-eabi.  Ok to
install?


Thanks for the updated patch.

Looks good to me.

Dave




analyzer: deal with -fshort-enums

On platforms that enable -fshort-enums by default, various switch-
enum
analyzer tests fail, because apply_constraints_for_gswitch doesn't
expect the integral promotion type cast.  I've arranged for the code
to cope with those casts.


for  gcc/analyzer/ChangeLog

 * region-model.cc (has_nondefault_case_for_value_p): Take
 enumerate type as a parameter.
 (region_model::apply_constraints_for_gswitch): Cope with
 integral promotion type casts.

for  gcc/testsuite/ChangeLog

 * gcc.dg/analyzer/switch-short-enum-1.c: New.
 * gcc.dg/analyzer/switch-no-short-enum-1.c: New.
---
  gcc/analyzer/region-model.cc   |   27 +++-
  .../gcc.dg/analyzer/switch-no-short-enum-1.c   |  141

  .../gcc.dg/analyzer/switch-short-enum-1.c  |  140

  3 files changed, 304 insertions(+), 4 deletions(-)
  create mode 100644 gcc/testsuite/gcc.dg/analyzer/switch-no-short-
enum-1.c
  create mode 100644 gcc/testsuite/gcc.dg/analyzer/switch-short-enum-
1.c

diff --git a/gcc/analyzer/region-model.cc b/gcc/analyzer/region-
model.cc
index 2157ad2578b85..6a7a8bc9f4884 100644
--- a/gcc/analyzer/region-model.cc
+++ b/gcc/analyzer/region-model.cc
@@ -5387,10 +5387,10 @@ has_nondefault_case_for_value_p (const
gswitch *switch_stmt, tree int_cst)
 has nondefault cases handling all values in the enum.  */
  static bool
-has_nondefault_cases_for_all_enum_values_p (const gswitch
*switch_stmt)
+has_nondefault_cases_for_all_enum_values_p (const gswitch
*switch_stmt,
+   tree type)
  {
    gcc_assert (switch_stmt);
-  tree type = TREE_TYPE (gimple_switch_index (switch_stmt));
    gcc_assert (TREE_CODE (type) == ENUMERAL_TYPE);
    for (tree enum_val_iter = TYPE_VALUES (type);
@@ -5426,6 +5426,23 @@ apply_constraints_for_gswitch (const
switch_cfg_superedge ,
  {
    tree index  = gimple_switch_index (switch_stmt);
    const svalue *index_sval = get_rvalue (index, ctxt);
+  bool check_index_type = true;
+
+  /* With -fshort-enum, there may be a type cast.  */
+  if (ctxt && index_sval->get_kind () == SK_UNARYOP
+  && TREE_CODE (index_sval->get_type ()) == INTEGER_TYPE)
+    {
+  const unaryop_svalue *unaryop = as_a 
(index_sval);
+  if (unaryop->get_op () == NOP_EXPR
+ && is_a  (unaryop->get_arg ()))
+   if (const initial_svalue *initvalop = (as_a 
+  (unaryop->get_arg
(
+ if (TREE_CODE (initvalop->get_type ()) == ENUMERAL_TYPE)
+   {
+ index_sval = initvalop;
+ check_index_type = false;
+   }
+    }
    /* If we're switching based on an enum type, assume that the user
is only
   working with values from the enum.  Hence if this is an
@@ -5437,12 +5454,14 @@ apply_constraints_for_gswitch (const
switch_cfg_superedge ,
    ctxt
    /* Must be an enum value.  */
    && index_sval->get_type ()
-  && TREE_CODE (TREE_TYPE (index)) == ENUMERAL_TYPE
+  && (!check_index_type
+ || TREE_CODE (TREE_TYPE (index)) == ENUMERAL_TYPE)
    && TREE_CODE (index_sval->get_type ()) == ENUMERAL_TYPE
    /* If we have a constant, then we can check it directly.  */
    && index_sval->get_kind () != SK_CONSTANT
    && edge.implicitly_created_default_p ()
-  && has_nondefault_cases_for_all_enum_values_p (switch_stmt)
+  && has_nondefault_cases_for_all_enum_values_p (switch_stmt,
+   

[PATCH] handle unwind tables that are embedded within unwinding code, [PR111731]

2024-03-15 Thread Thomas Neumann

Original bug report: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111731
Given that this is a regression, is this okay for gcc 13 and mainline?

The unwinding mechanism registers both the code range and the unwind
table itself within a b-tree lookup structure. That data structure
assumes that is consists of non-overlappping intervals. This
becomes a problem if the unwinding table is embedded within the
code itself, as now the intervals do overlap.

To fix this problem we now keep the unwind tables in a separate
b-tree, which prevents the overlap.

libgcc/ChangeLog:
PR libgcc/111731
* unwind-dw2-fde.c: Split unwind ranges if they contain the
unwind table.
---
 libgcc/unwind-dw2-fde.c | 37 +
 1 file changed, 21 insertions(+), 16 deletions(-)

diff --git a/libgcc/unwind-dw2-fde.c b/libgcc/unwind-dw2-fde.c
index 61a578d097e..9d503545677 100644
--- a/libgcc/unwind-dw2-fde.c
+++ b/libgcc/unwind-dw2-fde.c
@@ -48,6 +48,7 @@ typedef __UINTPTR_TYPE__ uintptr_type;
 #include "unwind-dw2-btree.h"
 
 static struct btree registered_frames;

+static struct btree registered_objects;
 static bool in_shutdown;
 
 static void

@@ -58,6 +59,7 @@ release_registered_frames (void)
   /* Release the b-tree and all frames. Frame releases that happen later are
* silently ignored */
   btree_destroy (_frames);
+  btree_destroy (_objects);
   in_shutdown = true;
 }
 
@@ -103,6 +105,21 @@ static __gthread_mutex_t object_mutex;

 #endif
 #endif
 
+#ifdef ATOMIC_FDE_FAST_PATH

+// Register the pc range for a given object in the lookup structure.
+static void
+register_pc_range_for_object (uintptr_type begin, struct object *ob)
+{
+  // Register the object itself to know the base pointer on deregistration.
+  btree_insert (_objects, begin, 1, ob);
+
+  // Register the frame in the b-tree
+  uintptr_type range[2];
+  get_pc_range (ob, range);
+  btree_insert (_frames, range[0], range[1] - range[0], ob);
+}
+#endif
+
 /* Called from crtbegin.o to register the unwind info for an object.  */
 
 void

@@ -124,13 +141,7 @@ __register_frame_info_bases (const void *begin, struct 
object *ob,
 #endif
 
 #ifdef ATOMIC_FDE_FAST_PATH

-  // Register the object itself to know the base pointer on deregistration.
-  btree_insert (_frames, (uintptr_type) begin, 1, ob);
-
-  // Register the frame in the b-tree
-  uintptr_type range[2];
-  get_pc_range (ob, range);
-  btree_insert (_frames, range[0], range[1] - range[0], ob);
+  register_pc_range_for_object ((uintptr_type) begin, ob);
 #else
   init_object_mutex_once ();
   __gthread_mutex_lock (_mutex);
@@ -178,13 +189,7 @@ __register_frame_info_table_bases (void *begin, struct 
object *ob,
   ob->s.b.encoding = DW_EH_PE_omit;
 
 #ifdef ATOMIC_FDE_FAST_PATH

-  // Register the object itself to know the base pointer on deregistration.
-  btree_insert (_frames, (uintptr_type) begin, 1, ob);
-
-  // Register the frame in the b-tree
-  uintptr_type range[2];
-  get_pc_range (ob, range);
-  btree_insert (_frames, range[0], range[1] - range[0], ob);
+  register_pc_range_for_object ((uintptr_type) begin, ob);
 #else
   init_object_mutex_once ();
   __gthread_mutex_lock (_mutex);
@@ -232,7 +237,7 @@ __deregister_frame_info_bases (const void *begin)
 
 #ifdef ATOMIC_FDE_FAST_PATH

   // Find the originally registered object to get the base pointer.
-  ob = btree_remove (_frames, (uintptr_type) begin);
+  ob = btree_remove (_objects, (uintptr_type) begin);
 
   // Remove the corresponding PC range.

   if (ob)
@@ -240,7 +245,7 @@ __deregister_frame_info_bases (const void *begin)
   uintptr_type range[2];
   get_pc_range (ob, range);
   if (range[0] != range[1])
-btree_remove (_frames, range[0]);
+   btree_remove (_frames, range[0]);
 }
 
   // Deallocate the sort array if any.

--
2.43.0



Re: [PATCH v1] C++: Support constexpr strings for asm statements

2024-03-15 Thread Jakub Jelinek
On Thu, Jan 25, 2024 at 04:18:19AM -0800, Andi Kleen wrote:
> Some programing styles use a lot of inline assembler, and it is common
> to use very complex preprocessor macros to generate the assembler
> strings for the asm statements. In C++ there would be a typesafe alternative
> using templates and constexpr to generate the assembler strings, but
> unfortunately the asm statement requires plain string literals, so this
> doesn't work.
> 
> This patch modifies the C++ parser to accept strings generated by
> constexpr instead of just plain strings. This requires new syntax
> because e.g. asm("..." : "r" (expr)) would be ambigious with a function
> call. I chose () to make it unique. For example now you can write
> 
> constexpr const char *genasm() { return "insn"; }
> constexpr const char *genconstraint() { return "r"; }
> 
>   asm(genasm() :: (genconstraint()) (input));
> 
> The constexpr strings are allowed for the asm template, the
> constraints and the clobbers (every time current asm accepts a string)
> 
> The drawback of this scheme is that the constexpr doesn't have
> full control over the input/output/clobber lists, but that can be
> usually handled with a switch statement.  One could imagine
> more flexible ways to handle that, for example supporting constexpr
> vectors for the clobber list, or similar. But even without
> that it is already useful.
> 
> Bootstrapped and full test on x86_64-linux.
> ---
>  gcc/cp/parser.cc   | 76 ++
>  gcc/doc/extend.texi| 17 +-
>  gcc/testsuite/g++.dg/constexpr-asm-1.C | 30 ++
>  3 files changed, 99 insertions(+), 24 deletions(-)
>  create mode 100644 gcc/testsuite/g++.dg/constexpr-asm-1.C
> 
> diff --git a/gcc/cp/parser.cc b/gcc/cp/parser.cc
> index 3748ccd49ff3..cc323dc8557a 100644
> --- a/gcc/cp/parser.cc
> +++ b/gcc/cp/parser.cc
> @@ -22654,6 +22654,43 @@ cp_parser_using_directive (cp_parser* parser)
>cp_parser_require (parser, CPP_SEMICOLON, RT_SEMICOLON);
>  }
>  
> +/* Parse a string literal or constant expression yielding a string.
> +   The constant expression uses extra parens to avoid ambiguity with "x" 
> (expr).
> +
> +   asm-string-expr:
> + string-literal
> + ( constant-expr ) */
> +
> +static tree
> +cp_parser_asm_string_expression (cp_parser *parser)
> +{
> +  location_t sloc = cp_lexer_peek_token (parser->lexer)->location;
> +
> +  if (cp_lexer_next_token_is (parser->lexer, CPP_OPEN_PAREN))

Why should it be wrapped in ()s?

> +{
> +  matching_parens parens;
> +  parens.consume_open (parser);
> +  tree string = cp_parser_constant_expression (parser);
> +  if (string != error_mark_node)
> + string = cxx_constant_value (string, tf_error);
> +  if (TREE_CODE (string) == NOP_EXPR)
> + string = TREE_OPERAND (string, 0);
> +  if (TREE_CODE (string) == ADDR_EXPR && TREE_CODE (TREE_OPERAND 
> (string, 0)) == STRING_CST)

Too long line.

> + string = TREE_OPERAND (string, 0);
> +  if (TREE_CODE (string) == VIEW_CONVERT_EXPR)
> + string = TREE_OPERAND (string, 0);
> +  if (TREE_CODE (string) != STRING_CST && string != error_mark_node)
> + {
> +   error_at (sloc, "Expected string valued constant expression for 
> %, not type %qT",
> + TREE_TYPE (string));

Again, too long line, diagnostics should never start with a capital letter,
but more importantly, this will handle only a small subset of what one can
construct with constexpr functions, not everything they can return even if
they return const char * is necessarily a STRING_LITERAL, could be an array
of chars or something similar, especially if the intent is not just to
return prepared whole string literals, but construct the template etc. from
substrings.

Given the https://wg21.link/P2741R3 C++26 addition, I wonder if it wouldn't
be much better to stay compatible with the static_assert extension there and
use similar thing for inline asm.
See https://gcc.gnu.org/r14-5771 and r14-5956 follow-up for the actual
implementation.
One issue is the character set question.  The strings in inline asm right
now are untranslated, i.e. remain in SOURCE_CHARSET, usually UTF-8 (in
theory there is also UTF-EBCDIC support, but nobody knows if it actually
works), which is presumably what the assembler expects too.
Most of the string literals and character literals constexpr deals with
are in the execution character set though.  For static_assert we just assume
the user knows what he is doing when trying to emit non-ASCII characters in
the message when using say -fexec-charset=EBCDICUS , but should that be the
case for inline asm too?  Or should we try to translate those strings back
from execution character set to source character set?  Or require that it
actually constructs UTF-8 string literals and for the UTF-EBCDIC case
translate from UTF-8 to UTF-EBCDIC?  So the user constexpr functions then
would return u8"insn"; or construct from u8'i' etc. 

Re: CI for "Option handling: add documentation URLs"

2024-03-15 Thread Mark Wielaard
Hi YunQiang Su,

On Fri, Mar 15, 2024 at 03:33:28PM +0800, YunQiang Su wrote:
> Great work. The CI works well now: it blames me ;)
> https://builder.sourceware.org/buildbot/#/builders/269/builds/3846
> 
> When I add '-mstrict-align' option to MIPS,
> the riscv.opt.urls, sysv4.opt.urls, xtensa.opt.urls are changed also.
> (why they are effected?

They are effected because they also have a '-mstrict-align' option and
each option with the same name gets an unique number.

> So what's the best practice for this cases?
> Should I push a new commit? Or in fact a single commit is preferred?

I don't know if there is a rule for this. But I hope this falls under
the obvious rule. What I would do is simply take that diff the CI
produced.
https://builder.sourceware.org/buildbot/api/v2/logs/7798308/raw

Apply it and commit that with:

Regenerate opt.urls

Fixes: acc38ff59976 ("MIPS: Add -m(no-)strict-align option")

gcc/ChangeLog:

* config/riscv/riscv.opt.urls: Regenerated.
* config/rs6000/sysv4.opt.urls: Likewise.
* config/xtensa/xtensa.opt.urls: Likewise.

Cheers,

Mark



Re: [PATCH] vect: Use xor to invert oversized vector masks

2024-03-15 Thread Andrew Stubbs

On 15/03/2024 03:45, Hongtao Liu wrote:

On Thu, Mar 14, 2024 at 11:42 PM Andrew Stubbs  wrote:


Don't enable excess lanes when inverting vector bit-masks smaller than the
integer mode.  This is yet another case of wrong-code due to mishandling
of oversized bitmasks.

This issue shows up in vect/tsvc/vect-tsvc-s278.c and
vect/tsvc/vect-tsvc-s279.c if I set the preferred vector size to V32
(down from V64) on amdgcn.

OK for mainline?

Andrew

gcc/ChangeLog:

 * expr.cc (expand_expr_real_2): Use xor to invert vector masks.
---
  gcc/expr.cc | 11 +++
  1 file changed, 11 insertions(+)

diff --git a/gcc/expr.cc b/gcc/expr.cc
index 403eeaa108e4..3540327d879e 100644
--- a/gcc/expr.cc
+++ b/gcc/expr.cc
@@ -10497,6 +10497,17 @@ expand_expr_real_2 (sepops ops, rtx target, 
machine_mode tmode,
immed_wide_int_const (mask, int_mode),
target, 1, OPTAB_LIB_WIDEN);
 }
+  /* If it's a vector mask don't enable excess bits.  */
+  else if (VECTOR_BOOLEAN_TYPE_P (type)
+  && SCALAR_INT_MODE_P (mode)
+  && maybe_ne (GET_MODE_PRECISION (mode),
+   TYPE_VECTOR_SUBPARTS (type).to_constant ()))
+   {
+ auto nunits = TYPE_VECTOR_SUBPARTS (type).to_constant ();
+ temp = expand_binop (mode, xor_optab, op0,
+  GEN_INT ((HOST_WIDE_INT_1U << nunits) - 1),
+  target, true, OPTAB_WIDEN);
+   }

Not review, just curious, should the issue be fixed by the commit in PR113576.
Also wonder besides cbranch, excess land bits also matter?
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113576#c35


It does seem to be another case of the same problem, but those commits 
are long enough ago that I do have them, and still saw a problem.


Andrew



Re: [PATCH] i386: Fix a pasto in ix86_expand_int_sse_cmp [PR114339]

2024-03-15 Thread Uros Bizjak
On Fri, Mar 15, 2024 at 9:50 AM Jakub Jelinek  wrote:
>
> Hi!
>
> In r13-3803-gfa271afb58 I've added an optimization for LE/LEU/GE/GEU
> comparison against CONST_VECTOR.  As the comments say:
>  /* x <= cst can be handled as x < cst + 1 unless there is
> wrap around in cst + 1.  */
> ...
>  /* For LE punt if some element is signed maximum.  */
> ...
>  /* For LEU punt if some element is unsigned maximum.  */
> and
>  /* x >= cst can be handled as x > cst - 1 unless there is
> wrap around in cst - 1.  */
> ...
>  /* For GE punt if some element is signed minimum.  */
> ...
>  /* For GEU punt if some element is zero.  */
> Apparently I wrote the GE/GEU (second case) first and then
> copied/adjusted it for LE/LEU, most of the adjustments look correct, but
> I've left if (code == GE) comparison when testing if it should punt for
> signed maximum.  That condition is never true, because this is in
> switch (code) { ... case LE: case LEU: block and we really meant to
> be what the comment says, for LE punt if some element is signed maximum,
> as then cst + 1 wraps around.
>
> The following patch fixes the pasto.
>
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
>
> 2024-03-15  Jakub Jelinek  
>
> PR target/114339
> * config/i386/i386-expand.cc (ix86_expand_int_sse_cmp) : Fix
> a pasto, compare code against LE rather than GE.
>
> * gcc.target/i386/pr114339.c: New test.

OK.

Thanks,
Uros.

>
> --- gcc/config/i386/i386-expand.cc.jj   2024-03-07 08:34:21.043802912 +0100
> +++ gcc/config/i386/i386-expand.cc  2024-03-14 22:55:57.321842686 +0100
> @@ -4690,7 +4690,7 @@ ix86_expand_int_sse_cmp (rtx dest, enum
>   rtx elt = CONST_VECTOR_ELT (cop1, i);
>   if (!CONST_INT_P (elt))
> break;
> - if (code == GE)
> + if (code == LE)
> {
>   /* For LE punt if some element is signed maximum.  */
>   if ((INTVAL (elt) & (GET_MODE_MASK (eltmode) >> 1))
> --- gcc/testsuite/gcc.target/i386/pr114339.c.jj 2024-03-14 22:58:04.739076025 
> +0100
> +++ gcc/testsuite/gcc.target/i386/pr114339.c2024-03-14 22:38:59.736972124 
> +0100
> @@ -0,0 +1,20 @@
> +/* PR target/114339 */
> +/* { dg-do run } */
> +/* { dg-options "-O2 -Wno-psabi" } */
> +/* { dg-additional-options "-mavx" { target avx_runtime } } */
> +
> +typedef long long V __attribute__((vector_size (16)));
> +
> +__attribute__((noipa)) V
> +foo (V a)
> +{
> +  return a <= (V) {0, __LONG_LONG_MAX__ };
> +}
> +
> +int
> +main ()
> +{
> +  V t = foo ((V) { 0, 0 });
> +  if (t[0] != -1LL || t[1] != -1LL)
> +__builtin_abort ();
> +}
>
> Jakub
>


[committed] testsuite: Fix up pr104601.C for recent libstdc++ changes

2024-03-15 Thread Jakub Jelinek
Hi!

On Thu, Mar 14, 2024 at 04:58:41PM +, Jonathan Wakely wrote:
> Add the [[nodiscard]] attribute to several functions in .

r14-9478 added [[nodiscard]] to various  APIs including find_if
the pr104601.C testcase uses.  As it is an optimization bug fix testcase,
haven't tried to adjust the testcase to use the find_if result, but instead
have added -Wno-unused-result flag to quiet the warning.

The testcase tests side-effects of the lambda used by find_if rather than
its actual result.

Tested on x86_64-linux -m32/-m64, committed to trunk as obvious.

2024-03-15  Jakub Jelinek  

* g++.dg/torture/pr104601.C: Add -Wno-unused-result to dg-options.

--- gcc/testsuite/g++.dg/torture/pr104601.C.jj  2022-05-23 21:44:48.390854093 
+0200
+++ gcc/testsuite/g++.dg/torture/pr104601.C 2024-03-15 09:59:08.581824938 
+0100
@@ -1,6 +1,6 @@
 // PR tree-optimization/104601
 // { dg-do run }
-// { dg-options "-std=c++17" }
+// { dg-options "-std=c++17 -Wno-unused-result" }
 
 #include 
 #include 


Jakub



Re: [PATCH] expand: EXTEND_BITINT CALL_EXPR results [PR114332]

2024-03-15 Thread Richard Biener
On Fri, 15 Mar 2024, Jakub Jelinek wrote:

> Hi!
> 
> The x86-64 and aarch64 psABIs (and the unwritten ia64 psABI part) say that
> the padding bits of _BitInt are undefined, while the expansion internally
> typically assumes that non-mode precision integers are sign/zero extended
> and extends after operations.  We handle that mismatch with EXTEND_BITINT
> done when reading from untrusted sources like function arguments, reading
> _BitInt from memory etc. but otherwise keep relying on stuff being extended
> internally (say in pseudos).
> The return value of a function is an ABI boundary though too and we need
> to extend that too.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

OK.

> 2024-03-15  Jakub Jelinek  
> 
>   PR middle-end/114332
>   * expr.cc (expand_expr_real_1): EXTEND_BITINT also CALL_EXPR results.
> 
> --- gcc/expr.cc.jj2024-03-04 19:20:27.120200885 +0100
> +++ gcc/expr.cc   2024-03-14 19:22:50.623550538 +0100
> @@ -12350,7 +12350,8 @@ expand_expr_real_1 (tree exp, rtx target
>   return expand_builtin (exp, target, subtarget, tmode, ignore);
> }
>}
> -  return expand_call (exp, target, ignore);
> +  temp = expand_call (exp, target, ignore);
> +  return EXTEND_BITINT (temp);
>  
>  case VIEW_CONVERT_EXPR:
>op0 = NULL_RTX;
> --- gcc/testsuite/gcc.dg/torture/bitint-64.c.jj   2024-03-14 
> 19:50:41.051311949 +0100
> +++ gcc/testsuite/gcc.dg/torture/bitint-64.c  2024-03-14 19:50:05.480806808 
> +0100
> @@ -0,0 +1,22 @@
> +/* PR middle-end/114332 */
> +/* { dg-do run { target bitint } } */
> +/* { dg-options "-std=c23 -fwrapv" } */
> +/* { dg-skip-if "" { ! run_expensive_tests }  { "*" } { "-O0" "-O2" } } */
> +/* { dg-skip-if "" { ! run_expensive_tests } { "-flto" } { "" } } */
> +
> +enum E { E22 = 22 } e = E22;
> +
> +_BitInt (5)
> +foo (void)
> +{
> +  _Atomic _BitInt (5) b = 0;
> +  b += e;
> +  return b;
> +}
> +
> +int
> +main ()
> +{
> +  if (foo () != -10)
> +__builtin_abort ();
> +}
> 
>   Jakub
> 
> 

-- 
Richard Biener 
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)


[PATCH] i386: Fix a pasto in ix86_expand_int_sse_cmp [PR114339]

2024-03-15 Thread Jakub Jelinek
Hi!

In r13-3803-gfa271afb58 I've added an optimization for LE/LEU/GE/GEU
comparison against CONST_VECTOR.  As the comments say:
 /* x <= cst can be handled as x < cst + 1 unless there is
wrap around in cst + 1.  */
...
 /* For LE punt if some element is signed maximum.  */
...
 /* For LEU punt if some element is unsigned maximum.  */
and
 /* x >= cst can be handled as x > cst - 1 unless there is
wrap around in cst - 1.  */
...
 /* For GE punt if some element is signed minimum.  */
...
 /* For GEU punt if some element is zero.  */
Apparently I wrote the GE/GEU (second case) first and then
copied/adjusted it for LE/LEU, most of the adjustments look correct, but
I've left if (code == GE) comparison when testing if it should punt for
signed maximum.  That condition is never true, because this is in
switch (code) { ... case LE: case LEU: block and we really meant to
be what the comment says, for LE punt if some element is signed maximum,
as then cst + 1 wraps around.

The following patch fixes the pasto.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2024-03-15  Jakub Jelinek  

PR target/114339
* config/i386/i386-expand.cc (ix86_expand_int_sse_cmp) : Fix
a pasto, compare code against LE rather than GE.

* gcc.target/i386/pr114339.c: New test.

--- gcc/config/i386/i386-expand.cc.jj   2024-03-07 08:34:21.043802912 +0100
+++ gcc/config/i386/i386-expand.cc  2024-03-14 22:55:57.321842686 +0100
@@ -4690,7 +4690,7 @@ ix86_expand_int_sse_cmp (rtx dest, enum
  rtx elt = CONST_VECTOR_ELT (cop1, i);
  if (!CONST_INT_P (elt))
break;
- if (code == GE)
+ if (code == LE)
{
  /* For LE punt if some element is signed maximum.  */
  if ((INTVAL (elt) & (GET_MODE_MASK (eltmode) >> 1))
--- gcc/testsuite/gcc.target/i386/pr114339.c.jj 2024-03-14 22:58:04.739076025 
+0100
+++ gcc/testsuite/gcc.target/i386/pr114339.c2024-03-14 22:38:59.736972124 
+0100
@@ -0,0 +1,20 @@
+/* PR target/114339 */
+/* { dg-do run } */
+/* { dg-options "-O2 -Wno-psabi" } */
+/* { dg-additional-options "-mavx" { target avx_runtime } } */
+
+typedef long long V __attribute__((vector_size (16)));
+
+__attribute__((noipa)) V
+foo (V a)
+{
+  return a <= (V) {0, __LONG_LONG_MAX__ };
+}
+
+int
+main ()
+{
+  V t = foo ((V) { 0, 0 });
+  if (t[0] != -1LL || t[1] != -1LL)
+__builtin_abort ();
+}

Jakub



[PATCH] expand: EXTEND_BITINT CALL_EXPR results [PR114332]

2024-03-15 Thread Jakub Jelinek
Hi!

The x86-64 and aarch64 psABIs (and the unwritten ia64 psABI part) say that
the padding bits of _BitInt are undefined, while the expansion internally
typically assumes that non-mode precision integers are sign/zero extended
and extends after operations.  We handle that mismatch with EXTEND_BITINT
done when reading from untrusted sources like function arguments, reading
_BitInt from memory etc. but otherwise keep relying on stuff being extended
internally (say in pseudos).
The return value of a function is an ABI boundary though too and we need
to extend that too.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2024-03-15  Jakub Jelinek  

PR middle-end/114332
* expr.cc (expand_expr_real_1): EXTEND_BITINT also CALL_EXPR results.

--- gcc/expr.cc.jj  2024-03-04 19:20:27.120200885 +0100
+++ gcc/expr.cc 2024-03-14 19:22:50.623550538 +0100
@@ -12350,7 +12350,8 @@ expand_expr_real_1 (tree exp, rtx target
return expand_builtin (exp, target, subtarget, tmode, ignore);
  }
   }
-  return expand_call (exp, target, ignore);
+  temp = expand_call (exp, target, ignore);
+  return EXTEND_BITINT (temp);
 
 case VIEW_CONVERT_EXPR:
   op0 = NULL_RTX;
--- gcc/testsuite/gcc.dg/torture/bitint-64.c.jj 2024-03-14 19:50:41.051311949 
+0100
+++ gcc/testsuite/gcc.dg/torture/bitint-64.c2024-03-14 19:50:05.480806808 
+0100
@@ -0,0 +1,22 @@
+/* PR middle-end/114332 */
+/* { dg-do run { target bitint } } */
+/* { dg-options "-std=c23 -fwrapv" } */
+/* { dg-skip-if "" { ! run_expensive_tests }  { "*" } { "-O0" "-O2" } } */
+/* { dg-skip-if "" { ! run_expensive_tests } { "-flto" } { "" } } */
+
+enum E { E22 = 22 } e = E22;
+
+_BitInt (5)
+foo (void)
+{
+  _Atomic _BitInt (5) b = 0;
+  b += e;
+  return b;
+}
+
+int
+main ()
+{
+  if (foo () != -10)
+__builtin_abort ();
+}

Jakub



[PATCH] libgcc: Fix quotient and/or remainder negation in __divmodbitint4 [PR114327]

2024-03-15 Thread Jakub Jelinek
Hi!

While for __mulbitint3 we actually don't negate anything and perform the
multiplication in unsigned style always, for __divmodbitint4 if the operands
aren't unsigned and are negative, we negate them first and then try to
negate them as needed at the end.
quotient is negated if just one of the operands was negated and the other
wasn't or vice versa, and remainder is negated if the first operand was
negated.
The case which doesn't work correctly is if due to limited range of the
operands we perform the division/modulo in some smaller number of limbs
and then extend it to the desired precision of the quotient and/or
remainder results.  If they aren't negated, the extension is done with
memset to 0, if they are negated, the extension was done with memset
to -1.  The problem is that if the quotient or remainder is zero,
then bitint_negate negates it again to zero (that is ok), but we should
then extend with memset to 0, not memset to -1.

The following patch achieves that by letting bitint_negate also check if
the negated operand is zero and changes the memset argument based on that.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2024-03-15  Jakub Jelinek  

PR libgcc/114327
* libgcc2.c (bitint_negate): Return UWtype bitwise or of all the limbs
before negation rather than void.
(__divmodbitint4): Determine whether to fill in the upper limbs after
negation based on whether bitint_negate returned 0 or non-zero, rather
then always filling with -1.

* gcc.dg/torture/bitint-63.c: New test.

--- libgcc/libgcc2.c.jj 2024-02-02 22:14:14.946684874 +0100
+++ libgcc/libgcc2.c2024-03-14 13:37:38.227332706 +0100
@@ -1642,19 +1642,22 @@ __mulbitint3 (UBILtype *ret, SItype retp
 #ifdef L_divmodbitint4
 /* D = -S.  */
 
-static void
+static UWtype
 bitint_negate (UBILtype *d, const UBILtype *s, SItype n)
 {
   UWtype c = 1;
+  UWtype r = 0;
   do
 {
   UWtype sv = *s, lo;
+  r |= sv;
   s += BITINT_INC;
   c = __builtin_add_overflow (~sv, c, );
   *d = lo;
   d += BITINT_INC;
 }
   while (--n);
+  return r;
 }
 
 /* D -= S * L.  */
@@ -1977,10 +1980,10 @@ __divmodbitint4 (UBILtype *q, SItype qpr
n = qn;
  else
n = un - vn + 1;
- bitint_negate (q + BITINT_END (qn - 1, 0),
-q2 + BITINT_END (un - vn, 0), n);
+ SItype c = bitint_negate (q + BITINT_END (qn - 1, 0),
+   q2 + BITINT_END (un - vn, 0), n) ? -1 : 0;
  if (qn > n)
-   __builtin_memset (q + BITINT_END (0, n), -1,
+   __builtin_memset (q + BITINT_END (0, n), c,
  (qn - n) * sizeof (UWtype));
}
   else
@@ -1999,11 +2002,11 @@ __divmodbitint4 (UBILtype *q, SItype qpr
   if (uprec < 0)
{
  /* Negative remainder.  */
- bitint_negate (r + BITINT_END (rn - 1, 0),
-r + BITINT_END (rn - 1, 0),
-rn > vn ? vn : rn);
+ SItype c = bitint_negate (r + BITINT_END (rn - 1, 0),
+   r + BITINT_END (rn - 1, 0),
+   rn > vn ? vn : rn) ? -1 : 0;
  if (rn > vn)
-   __builtin_memset (r + BITINT_END (0, vn), -1,
+   __builtin_memset (r + BITINT_END (0, vn), c,
  (rn - vn) * sizeof (UWtype));
}
   else
--- gcc/testsuite/gcc.dg/torture/bitint-63.c.jj 2024-03-14 13:46:31.591938158 
+0100
+++ gcc/testsuite/gcc.dg/torture/bitint-63.c2024-03-14 13:45:58.306399642 
+0100
@@ -0,0 +1,30 @@
+/* PR libgcc/114327 */
+/* { dg-do run { target bitint } } */
+/* { dg-options "-std=c23" } */
+/* { dg-skip-if "" { ! run_expensive_tests }  { "*" } { "-O0" "-O2" } } */
+/* { dg-skip-if "" { ! run_expensive_tests } { "-flto" } { "" } } */
+
+#if __BITINT_MAXWIDTH__ >= 256
+_BitInt(256)
+foo (_BitInt(256) b, _BitInt(256) c)
+{
+  return b % c;
+}
+
+_BitInt(256)
+bar (_BitInt(256) b, _BitInt(256) c)
+{
+  return b / c;
+}
+#endif
+
+int
+main ()
+{
+#if __BITINT_MAXWIDTH__ >= 256
+  if (foo (-0x9e9b9fe60wb, 1wb))
+__builtin_abort ();
+  if (bar (1wb, -0x9e9b9fe60wb))
+__builtin_abort ();
+#endif
+}

Jakub



[committed] testsuite: Added missing } in the dg-bogus comment [PR114343]

2024-03-15 Thread Torbjörn SVENSSON
Committed below as obvious. The } got lost in my copy from the internal system.
Sorry for the inconvinience.

--

gcc/testsuite/ChangeLog:

PR testsuite/114343
* gcc.dg/analyzer/null-deref-pr108251-smp_fetch_ssl_fc_has_early-O2.c:
Added missing } in the dg-bogus comment.

Signed-off-by: Torbjörn SVENSSON 
---
 .../null-deref-pr108251-smp_fetch_ssl_fc_has_early-O2.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git 
a/gcc/testsuite/gcc.dg/analyzer/null-deref-pr108251-smp_fetch_ssl_fc_has_early-O2.c
 
b/gcc/testsuite/gcc.dg/analyzer/null-deref-pr108251-smp_fetch_ssl_fc_has_early-O2.c
index e8cde7338a0..33cf10c1e29 100644
--- 
a/gcc/testsuite/gcc.dg/analyzer/null-deref-pr108251-smp_fetch_ssl_fc_has_early-O2.c
+++ 
b/gcc/testsuite/gcc.dg/analyzer/null-deref-pr108251-smp_fetch_ssl_fc_has_early-O2.c
@@ -60,7 +60,7 @@ static inline enum obj_type obj_type(const enum obj_type *t)
 }
 static inline struct connection *__objt_conn(enum obj_type *t)
 {
- return ((struct connection *)(((void *)(t)) - ((long)&((struct connection 
*)0)->obj_type))); /* { dg-bogus "may result in an unaligned pointer value" 
"Fixed in r14-6517-gb7e4a4c626e" { xfail short_enums } */
+ return ((struct connection *)(((void *)(t)) - ((long)&((struct connection 
*)0)->obj_type))); /* { dg-bogus "may result in an unaligned pointer value" 
"Fixed in r14-6517-gb7e4a4c626e" { xfail short_enums } } */
 }
 static inline struct connection *objt_conn(enum obj_type *t)
 {
-- 
2.25.1



[PATCH v15 23/26] c++: Implement __is_invocable built-in trait

2024-03-15 Thread Ken Matsui
Added diagnostics for build_invoke.

Ok for 15?

-- >8 --

This patch implements built-in trait for std::is_invocable.

gcc/cp/ChangeLog:

* cp-trait.def: Define __is_invocable.
* constraint.cc (diagnose_trait_expr): Handle CPTK_IS_INVOCABLE.
* semantics.cc (trait_expr_value): Likewise.
(finish_trait_expr): Likewise.
* cp-tree.h (build_invoke): New function.
* method.cc (build_invoke): New function.

gcc/testsuite/ChangeLog:

* g++.dg/ext/has-builtin-1.C: Test existence of __is_invocable.
* g++.dg/ext/is_invocable1.C: New test.
* g++.dg/ext/is_invocable2.C: New test.
* g++.dg/ext/is_invocable3.C: New test.
* g++.dg/ext/is_invocable4.C: New test.

Signed-off-by: Ken Matsui 
---
 gcc/cp/constraint.cc |   6 +
 gcc/cp/cp-trait.def  |   1 +
 gcc/cp/cp-tree.h |   2 +
 gcc/cp/method.cc | 156 ++
 gcc/cp/semantics.cc  |   4 +
 gcc/testsuite/g++.dg/ext/has-builtin-1.C |   3 +
 gcc/testsuite/g++.dg/ext/is_invocable1.C | 349 +++
 gcc/testsuite/g++.dg/ext/is_invocable2.C | 139 +
 gcc/testsuite/g++.dg/ext/is_invocable3.C |  51 
 gcc/testsuite/g++.dg/ext/is_invocable4.C |  33 +++
 10 files changed, 744 insertions(+)
 create mode 100644 gcc/testsuite/g++.dg/ext/is_invocable1.C
 create mode 100644 gcc/testsuite/g++.dg/ext/is_invocable2.C
 create mode 100644 gcc/testsuite/g++.dg/ext/is_invocable3.C
 create mode 100644 gcc/testsuite/g++.dg/ext/is_invocable4.C

diff --git a/gcc/cp/constraint.cc b/gcc/cp/constraint.cc
index 23ea66d9c12..c87b126fdb1 100644
--- a/gcc/cp/constraint.cc
+++ b/gcc/cp/constraint.cc
@@ -3791,6 +3791,12 @@ diagnose_trait_expr (tree expr, tree args)
 case CPTK_IS_FUNCTION:
   inform (loc, "  %qT is not a function", t1);
   break;
+case CPTK_IS_INVOCABLE:
+  if (!t2)
+inform (loc, "  %qT is not invocable", t1);
+  else
+inform (loc, "  %qT is not invocable by %qE", t1, t2);
+  break;
 case CPTK_IS_LAYOUT_COMPATIBLE:
   inform (loc, "  %qT is not layout compatible with %qT", t1, t2);
   break;
diff --git a/gcc/cp/cp-trait.def b/gcc/cp/cp-trait.def
index 85056c8140b..6cb2b55f4ea 100644
--- a/gcc/cp/cp-trait.def
+++ b/gcc/cp/cp-trait.def
@@ -75,6 +75,7 @@ DEFTRAIT_EXPR (IS_EMPTY, "__is_empty", 1)
 DEFTRAIT_EXPR (IS_ENUM, "__is_enum", 1)
 DEFTRAIT_EXPR (IS_FINAL, "__is_final", 1)
 DEFTRAIT_EXPR (IS_FUNCTION, "__is_function", 1)
+DEFTRAIT_EXPR (IS_INVOCABLE, "__is_invocable", -1)
 DEFTRAIT_EXPR (IS_LAYOUT_COMPATIBLE, "__is_layout_compatible", 2)
 DEFTRAIT_EXPR (IS_LITERAL_TYPE, "__is_literal_type", 1)
 DEFTRAIT_EXPR (IS_MEMBER_FUNCTION_POINTER, "__is_member_function_pointer", 1)
diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h
index 14895bc6585..f76e3ae0389 100644
--- a/gcc/cp/cp-tree.h
+++ b/gcc/cp/cp-tree.h
@@ -7333,6 +7333,8 @@ extern tree get_copy_assign   (tree);
 extern tree get_default_ctor   (tree);
 extern tree get_dtor   (tree, tsubst_flags_t);
 extern tree build_stub_object  (tree);
+extern tree build_invoke   (tree, const_tree,
+tsubst_flags_t);
 extern tree strip_inheriting_ctors (tree);
 extern tree inherited_ctor_binfo   (tree);
 extern bool base_ctor_omit_inherited_parms (tree);
diff --git a/gcc/cp/method.cc b/gcc/cp/method.cc
index 98c10e6a8b5..2282ce71c06 100644
--- a/gcc/cp/method.cc
+++ b/gcc/cp/method.cc
@@ -1928,6 +1928,162 @@ build_trait_object (tree type)
   return build_stub_object (type);
 }
 
+/* [func.require] Build an expression of INVOKE(FN_TYPE, ARG_TYPES...).  If the
+   given is not invocable, returns error_mark_node.  */
+
+tree
+build_invoke (tree fn_type, const_tree arg_types, tsubst_flags_t complain)
+{
+  if (error_operand_p (fn_type) || error_operand_p (arg_types))
+return error_mark_node;
+
+  gcc_assert (TYPE_P (fn_type));
+  gcc_assert (TREE_CODE (arg_types) == TREE_VEC);
+
+  /* Access check is required to determine if the given is invocable.  */
+  deferring_access_check_sentinel acs (dk_no_deferred);
+
+  /* INVOKE is an unevaluated context.  */
+  cp_unevaluated cp_uneval_guard;
+
+  bool is_ptrdatamem;
+  bool is_ptrmemfunc;
+  if (TREE_CODE (fn_type) == REFERENCE_TYPE)
+{
+  tree deref_fn_type = TREE_TYPE (fn_type);
+  is_ptrdatamem = TYPE_PTRDATAMEM_P (deref_fn_type);
+  is_ptrmemfunc = TYPE_PTRMEMFUNC_P (deref_fn_type);
+
+  /* Dereference fn_type if it is a pointer to member.  */
+  if (is_ptrdatamem || is_ptrmemfunc)
+   fn_type = deref_fn_type;
+}
+  else
+{
+  is_ptrdatamem = TYPE_PTRDATAMEM_P (fn_type);
+  is_ptrmemfunc = TYPE_PTRMEMFUNC_P (fn_type);
+}
+
+  if (is_ptrdatamem && TREE_VEC_LENGTH (arg_types) != 1)
+{
+  if (complain & tf_error)
+ 

Re: C/C++ frontend patches ping

2024-03-15 Thread Richard Biener
On Fri, Mar 15, 2024 at 6:20 AM Andi Kleen  wrote:
>
> Andrew Pinski  writes:
>
> > On Thu, Mar 14, 2024 at 9:36 PM Andi Kleen  wrote:
> >>
> >>
> >> musttail support for C/C++
> >>
> >> https://gcc.gnu.org/pipermail/gcc-patches/2024-January/643867.html
> >>
> >>
> >> Support constexpr for asm statements in C++
> >>
> >> https://gcc.gnu.org/pipermail/gcc-patches/2024-January/643933.html
> >
> >
> > Both of these were posted long after the start of stage 3 and close
> > into the beginning of stage 4 and since they are both new features I
> > really doubt they will be reviewed until stage 1 opens up which will
> > be in about a month or so.
>
> I don't buy it.
>
> This mailing list and the git logs are full of approved feature patches
> that are clearly not bug fixes.  If there is really such a rule it is
> extremely selectively and unfairly enforced.

I'm not aware of any new features being proposed and approved after
stage4 started.  Target specific things always have discretion of target
maintainers (but even they should know better).

So yes, I have expected [[musttail]] to be a GCC 15 feature only.

Richard.

> -Andi


Re: [PATCH] vect: Use xor to invert oversized vector masks

2024-03-15 Thread Richard Biener
On Fri, Mar 15, 2024 at 4:35 AM Hongtao Liu  wrote:
>
> On Thu, Mar 14, 2024 at 11:42 PM Andrew Stubbs  wrote:
> >
> > Don't enable excess lanes when inverting vector bit-masks smaller than the
> > integer mode.  This is yet another case of wrong-code due to mishandling
> > of oversized bitmasks.
> >
> > This issue shows up in vect/tsvc/vect-tsvc-s278.c and
> > vect/tsvc/vect-tsvc-s279.c if I set the preferred vector size to V32
> > (down from V64) on amdgcn.
> >
> > OK for mainline?
> >
> > Andrew
> >
> > gcc/ChangeLog:
> >
> > * expr.cc (expand_expr_real_2): Use xor to invert vector masks.
> > ---
> >  gcc/expr.cc | 11 +++
> >  1 file changed, 11 insertions(+)
> >
> > diff --git a/gcc/expr.cc b/gcc/expr.cc
> > index 403eeaa108e4..3540327d879e 100644
> > --- a/gcc/expr.cc
> > +++ b/gcc/expr.cc
> > @@ -10497,6 +10497,17 @@ expand_expr_real_2 (sepops ops, rtx target, 
> > machine_mode tmode,
> >immed_wide_int_const (mask, int_mode),
> >target, 1, OPTAB_LIB_WIDEN);
> > }
> > +  /* If it's a vector mask don't enable excess bits.  */
> > +  else if (VECTOR_BOOLEAN_TYPE_P (type)
> > +  && SCALAR_INT_MODE_P (mode)
> > +  && maybe_ne (GET_MODE_PRECISION (mode),
> > +   TYPE_VECTOR_SUBPARTS (type).to_constant ()))
> > +   {
> > + auto nunits = TYPE_VECTOR_SUBPARTS (type).to_constant ();
> > + temp = expand_binop (mode, xor_optab, op0,
> > +  GEN_INT ((HOST_WIDE_INT_1U << nunits) - 1),
> > +  target, true, OPTAB_WIDEN);
> > +   }
> Not review, just curious, should the issue be fixed by the commit in PR113576.
> Also wonder besides cbranch, excess land bits also matter?
> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113576#c35

Yes, you patch BIT_NOT but we decided to patch final compares.  Is it that
we need to fixup every mask use in a .COND_* expansion as well?  If so
we should do it there.

Richard.

> >else
> > temp = expand_unop (mode, one_cmpl_optab, op0, target, 1);
> >gcc_assert (temp);
> > --
> > 2.41.0
> >
>
>
> --
> BR,
> Hongtao


Re: CI for "Option handling: add documentation URLs"

2024-03-15 Thread YunQiang Su
Great work. The CI works well now: it blames me ;)
https://builder.sourceware.org/buildbot/#/builders/269/builds/3846

When I add '-mstrict-align' option to MIPS,
the riscv.opt.urls, sysv4.opt.urls, xtensa.opt.urls are changed also.
(why they are effected?

So what's the best practice for this cases?
Should I push a new commit? Or in fact a single commit is preferred?

-- 
YunQiang Su


[PATCH v3 1/2] extend.texi: Arrange pre-existing built-in traits alphabetically

2024-03-15 Thread Ken Matsui
Consolidated most patches into one for easier review and added
documentation for all missing built-in traits.

Ok for trunk?

-- >8 --

This patch arranges pre-existing built-in traits alphabetically for
better codebase consistency and easier future integration of changes.

gcc/ChangeLog:

* doc/extend.texi (Type Traits): Arrange pre-existing built-in
traits alphabetically.

Signed-off-by: Ken Matsui 
---
 gcc/doc/extend.texi | 50 ++---
 1 file changed, 25 insertions(+), 25 deletions(-)

diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index df0982fdfda..1c61682b102 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -29500,15 +29500,6 @@ Requires: @var{type} shall be a complete type, 
(possibly cv-qualified)
 @code{void}, or an array of unknown bound.
 @enddefbuiltin
 
-@defbuiltin{bool __has_nothrow_copy (@var{type})}
-If @code{__has_trivial_copy (type)} is @code{true} then the trait is
-@code{true}, else if @var{type} is a cv-qualified class or union type
-with copy constructors that are known not to throw an exception then
-the trait is @code{true}, else it is @code{false}.
-Requires: @var{type} shall be a complete type, (possibly cv-qualified)
-@code{void}, or an array of unknown bound.
-@enddefbuiltin
-
 @defbuiltin{bool __has_nothrow_constructor (@var{type})}
 If @code{__has_trivial_constructor (type)} is @code{true} then the trait
 is @code{true}, else if @var{type} is a cv class or union type (or array
@@ -29518,6 +29509,15 @@ Requires: @var{type} shall be a complete type, 
(possibly cv-qualified)
 @code{void}, or an array of unknown bound.
 @enddefbuiltin
 
+@defbuiltin{bool __has_nothrow_copy (@var{type})}
+If @code{__has_trivial_copy (type)} is @code{true} then the trait is
+@code{true}, else if @var{type} is a cv-qualified class or union type
+with copy constructors that are known not to throw an exception then
+the trait is @code{true}, else it is @code{false}.
+Requires: @var{type} shall be a complete type, (possibly cv-qualified)
+@code{void}, or an array of unknown bound.
+@enddefbuiltin
+
 @defbuiltin{bool __has_trivial_assign (@var{type})}
 If @var{type} is @code{const}- qualified or is a reference type then
 the trait is @code{false}.  Otherwise if @code{__is_trivial (type)} is
@@ -29528,15 +29528,6 @@ Requires: @var{type} shall be a complete type, 
(possibly cv-qualified)
 @code{void}, or an array of unknown bound.
 @enddefbuiltin
 
-@defbuiltin{bool __has_trivial_copy (@var{type})}
-If @code{__is_trivial (type)} is @code{true} or @var{type} is a reference
-type then the trait is @code{true}, else if @var{type} is a cv class
-or union type with a trivial copy constructor ([class.copy]) then the trait
-is @code{true}, else it is @code{false}.  Requires: @var{type} shall be
-a complete type, (possibly cv-qualified) @code{void}, or an array of unknown
-bound.
-@enddefbuiltin
-
 @defbuiltin{bool __has_trivial_constructor (@var{type})}
 If @code{__is_trivial (type)} is @code{true} then the trait is @code{true},
 else if @var{type} is a cv-qualified class or union type (or array thereof)
@@ -29546,6 +29537,15 @@ Requires: @var{type} shall be a complete type, 
(possibly cv-qualified)
 @code{void}, or an array of unknown bound.
 @enddefbuiltin
 
+@defbuiltin{bool __has_trivial_copy (@var{type})}
+If @code{__is_trivial (type)} is @code{true} or @var{type} is a reference
+type then the trait is @code{true}, else if @var{type} is a cv class
+or union type with a trivial copy constructor ([class.copy]) then the trait
+is @code{true}, else it is @code{false}.  Requires: @var{type} shall be
+a complete type, (possibly cv-qualified) @code{void}, or an array of unknown
+bound.
+@enddefbuiltin
+
 @defbuiltin{bool __has_trivial_destructor (@var{type})}
 If @code{__is_trivial (type)} is @code{true} or @var{type} is a reference type
 then the trait is @code{true}, else if @var{type} is a cv class or union
@@ -29561,6 +29561,13 @@ If @var{type} is a class type with a virtual destructor
 Requires: If @var{type} is a non-union class type, it shall be a complete type.
 @enddefbuiltin
 
+@defbuiltin{bool __integer_pack (@var{length})}
+When used as the pattern of a pack expansion within a template
+definition, expands to a template argument pack containing integers
+from @code{0} to @code{@var{length}-1}.  This is provided for
+efficient implementation of @code{std::make_integer_sequence}.
+@enddefbuiltin
+
 @defbuiltin{bool __is_abstract (@var{type})}
 If @var{type} is an abstract class ([class.abstract]) then the trait
 is @code{true}, else it is @code{false}.
@@ -29662,13 +29669,6 @@ The underlying type of @var{type}.
 Requires: @var{type} shall be an enumeration type ([dcl.enum]).
 @enddefbuiltin
 
-@defbuiltin{bool __integer_pack (@var{length})}
-When used as the pattern of a pack expansion within a template
-definition, expands to a template argument pack containing integers
-from @code{0} to @code{@var{length}-1}.  This is provided 

[PATCH v3 2/2] extend.texi: Add documentation for all missing built-in traits [PR87343]

2024-03-15 Thread Ken Matsui
PR c++/87343

gcc/ChangeLog:

* doc/extend.texi (Expression-yielding Type Traits): New
subsection.
(Type-yielding Type Traits): Likewise.
(C++ Concepts): Move __is_same to ...
(Expression-yielding Type Traits): ... here.
(__has_unique_object_representations) New documentation.
(__is_array): Likewise.
(__is_assignable): Likewise.
(__is_bounded_array): Likewise.
(__is_constructible): Likewise.
(__is_convertible): Likewise.
(__is_function): Likewise.
(__is_layout_compatible): Likewise.
(__is_member_function_pointer): Likewise.
(__is_member_object_pointer): Likewise.
(__is_member_pointer): Likewise.
(__is_nothrow_assignable): Likewise.
(__is_nothrow_constructible): Likewise.
(__is_nothrow_convertible): Likewise.
(__is_object): Likewise.
(__is_pointer_interconvertible_base_of): Likewise.
(__is_reference): Likewise.
(__is_scoped_enum): Likewise.
(__is_trivially_assignable): Likewise.
(__is_trivially_constructible): Likewise.
(__is_trivially_copyable): Likewise.
(__reference_constructs_from_temporary): Likewise.
(__reference_converts_from_temporary): Likewise.
(__bases): Likewise.
(__direct_bases): Likewise.
(__remove_cv): Likewise.
(__remove_cvref): Likewise.
(__remove_pointer): Likewise.
(__remove_reference): Likewise.
(__type_pack_element): Likewise.
(__underlying_type): Likewise.
(__is_enum): Add @anchor and @ref to related traits.
(__is_trivial): Likewise.

Signed-off-by: Ken Matsui 
---
 gcc/doc/extend.texi | 279 ++--
 1 file changed, 270 insertions(+), 9 deletions(-)

diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index 1c61682b102..2c8199fb1ab 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -29489,6 +29489,12 @@ compile-time determination of
 various characteristics of a type (or of a
 pair of types).
 
+
+@subsection Expression-yielding Type Traits
+
+These built-in traits yield an expression of type @code{bool}
+or @code{size_t}.
+
 @defbuiltin{bool __has_nothrow_assign (@var{type})}
 If @var{type} is @code{const}-qualified or is a reference type then
 the trait is @code{false}.  Otherwise if @code{__has_trivial_assign (type)}
@@ -29555,6 +29561,15 @@ Requires: @var{type} shall be a complete type, 
(possibly cv-qualified)
 @code{void}, or an array of unknown bound.
 @enddefbuiltin
 
+@defbuiltin{bool __has_unique_object_representations (@var{type})}
+If @code{__is_trivially_copyable (type)} is @code{true} and @var{type}
+has no padding bits, meaning that any two objects of @var{type} with
+the same value will have identical object representations, then
+the trait is @code{true}, else it is @code{false}.
+Requires: @var{type} shall be a complete type, (possibly cv-qualified)
+@code{void}, or an array of unknown bound.
+@enddefbuiltin
+
 @defbuiltin{bool __has_virtual_destructor (@var{type})}
 If @var{type} is a class type with a virtual destructor
 ([class.dtor]) then the trait is @code{true}, else it is @code{false}.
@@ -29580,6 +29595,23 @@ If @var{type} is an aggregate type ([dcl.init.aggr]) 
the trait is
 Requires: If @var{type} is a class type, it shall be a complete type.
 @enddefbuiltin
 
+@anchor{__is_array}
+@defbuiltin{bool __is_array (@var{type})}
+If @var{type} is an array type ([dcl.array]) the trait is @code{true},
+else it is @code{false}.
+See also: @ref{__is_bounded_array}.
+@enddefbuiltin
+
+@anchor{__is_assignable}
+@defbuiltin{bool __is_assignable (@var{type1}, @var{type2})}
+If @var{type1} is a class type that has an assignment operator
+accepting @var{type2}, or if @var{type2} can be implicitly converted to
+a type that @var{type1} can accept on assignment, then the trait is
+@code{true}, else it is @code{false}.
+Requires: If @var{type1} is a class type, it shall be a complete type.
+See also: @ref{__is_nothrow_assignable}, @ref{__is_trivially_assignable}.
+@enddefbuiltin
+
 @defbuiltin{bool __is_base_of (@var{base_type}, @var{derived_type})}
 If @var{base_type} is a base class of @var{derived_type}
 ([class.derived]) then the trait is @code{true}, otherwise it is @code{false}.
@@ -29592,11 +29624,37 @@ type (disregarding cv-qualifiers), @var{derived_type} 
shall be a complete
 type.  A diagnostic is produced if this requirement is not met.
 @enddefbuiltin
 
+@anchor{__is_bounded_array}
+@defbuiltin{bool __is_bounded_array (@var{type})}
+If @var{type} is an array type of known bound ([dcl.array])
+the trait is @code{true}, else it is @code{false}.
+See also: @ref{__is_array}.
+@enddefbuiltin
+
 @defbuiltin{bool __is_class (@var{type})}
 If @var{type} is a cv-qualified class type, and not a union type
 ([basic.compound]) the trait is @code{true}, else it is @code{false}.
 @enddefbuiltin
 

Re: Patch ping Re: [PATCH] icf: Reset SSA_NAME_{PTR,RANGE}_INFO in successfully merged functions [PR113907]

2024-03-15 Thread Richard Biener
On Thu, 14 Mar 2024, Jan Hubicka wrote:

> > > We have wrong code with LTO, too.
> > 
> > I know.
> > 
> > > The problem is that IPA passes (and
> > > not only that, loop analysis too) does analysis at compile time (with
> > > value numbers in) and streams the info separately.
> > 
> > And that is desirable, because otherwise it simply couldn't derive any
> > ranges.
> > 
> > >  Removal of value ranges
> > > (either by LTO or by your patch) happens between computing these
> > > summaries and using them, so this can be used to trigger wrong code,
> > > sadly.
> > 
> > Yes.  But with LTO, I don't see how the IPA ICF comparison whether
> > two functions are the same or not could be done with the
> > SSA_NAME_{RANGE,PTR}_INFO in, otherwise it could only ICF merge functions
> > from the same TUs.  So the comparison IMHO (and the assert checks in my
> > patch prove that) is done when the SSA_NAME_{RANGE,PTR}_INFO aren't in
> > anymore.  So, one just needs to compare and punt or union whatever
> > is or could be influenced in the IPA streamed data from the ranges etc.
> > And because one has to do it for LTO, doing it for non-LTO should be
> > sufficient too.
> 
> Sorry, this was bit of a misunderstanding: I tought you still considered
> the original patch to be full fix, while I tought I should look into it
> more and dig out more issues.  This is bit of can of worms.  Overall I
> think the plan is:
> 
> This stage4
> 1) fix VR divergences by either comparing or droping them
> 2) fix jump function differences by comparing them
>(I just constructed a testcase showing that jump functions can
> diverge for other reasons than just VR, so this is deeper problem,
> too)
> 3) try to construct aditional wrong code testcases (finite_p
>divergences, trapping)
> Next stage1
> 4) implement VR and PTR info streaming
> 5) make ICF to compare VRs and punt otherwise
> 6) implement optimize_size feature to ICF that will not punt on
>diverging TBAA or value ranges and do merging instead.
>We need some extra infrastructure for that, being able to keep the
>maps between basic blocks and SSA names from comparsion stage to
>merging stage
> 7) measure how effective ICF becomes in optimize_size mode and implement
>heuristics on how much metadata merging we want to do with -O2/-O3 as
>well.
>Based on older data Martin collected for his thesis, ICF used to be
>must more effective on libreoffice then it is today, so hopefully we
>can recover 10-15% binary size improvmeents here.
> 8) General ICF TLC.  There are many half finished things for a while in
>this pass (such as merging with different BB or stmt orders).
> 
> I am attaching the compare patch which also fixes the original wrong
> code. If you preffer your version, just go ahead to commit it. Otherwise
> I will add your testcase for this patch and commit this one.
> Statistically we almost never merge functions with different value
> ranges (three in testsuite, 0 during bootstrap, 1 during LTO bootstrap
> and probably few in LLVM build - there are 15 cases reported, but some
> are false positives caused by hash function conflicts).
> 
> Honza
> 
> gcc/ChangeLog:
> 
>   * ipa-icf-gimple.cc (func_checker::compare_ssa_name): Compare value 
> ranges.
> 
> diff --git a/gcc/ipa-icf-gimple.cc b/gcc/ipa-icf-gimple.cc
> index 8c2df7a354e..37c416d777d 100644
> --- a/gcc/ipa-icf-gimple.cc
> +++ b/gcc/ipa-icf-gimple.cc
> @@ -39,9 +39,11 @@ along with GCC; see the file COPYING3.  If not see
>  #include "cfgloop.h"
>  #include "attribs.h"
>  #include "gimple-walk.h"
> +#include "value-query.h"
> +#include "value-range-storage.h"
>  
>  #include "tree-ssa-alias-compare.h"
>  #include "ipa-icf-gimple.h"
>  
>  namespace ipa_icf_gimple {
>  
> @@ -109,6 +116,35 @@ func_checker::compare_ssa_name (const_tree t1, 
> const_tree t2)
>else if (m_target_ssa_names[i2] != (int) i1)
>  return false;
>  
> +  if (POINTER_TYPE_P (TREE_TYPE (t1)))
> +{
> +  if (SSA_NAME_PTR_INFO (t1))
> + {
> +   if (!SSA_NAME_PTR_INFO (t2)
> +   || SSA_NAME_PTR_INFO (t1)->align != SSA_NAME_PTR_INFO (t2)->align
> +   || SSA_NAME_PTR_INFO (t1)->misalign != SSA_NAME_PTR_INFO 
> (t2)->misalign)

You want to compare SSA_NAME_PTR_INFO ()->pt.zero as well I think since
we store pointer non-null-ness from VRP there.

You are not comparing the actual points-to info - but of course I'd
expect that to differ as soon as local decls are involved.  Still looks
like a hole to me.

> + return false;
> + }
> +  else if (SSA_NAME_PTR_INFO (t2))
> + return false;
> +}
> +  else
> +{
> +  if (SSA_NAME_RANGE_INFO (t1))
> + {
> +   if (!SSA_NAME_RANGE_INFO (t2))
> + return false;
> +   Value_Range r1 (TREE_TYPE (t1));
> +   Value_Range r2 (TREE_TYPE (t2));
> +   SSA_NAME_RANGE_INFO (t1)->get_vrange (r1, TREE_TYPE (t1));
> +   SSA_NAME_RANGE_INFO (t2)->get_vrange (r2, TREE_TYPE (t2));
> 

[commit] MIPS: Add -m(no-)strict-align option

2024-03-15 Thread YunQiang Su
We support options -m(no-)unaligned-access 2 years ago, while
currently most of other ports prefer -m(no-)strict-align.
Let's support -m(no-)strict-align, and keep -m(no-)unaligned-access
as alias.

gcc
* config/mips/mips.opt: Support -mstrict-align, and use
TARGET_STRICT_ALIGN as the flag; keep -m(no-)unaligned-access
as alias.
* config/mips/mips.h: Use TARGET_STRICT_ALIGN.
* config/mips/mips.opt.urls: Regenerate.
* doc/invoke.texi: Document -m(no-)strict-algin for MIPSr6.
---
 gcc/config/mips/mips.h|  2 +-
 gcc/config/mips/mips.opt  | 12 ++--
 gcc/config/mips/mips.opt.urls |  6 ++
 gcc/doc/invoke.texi   | 18 --
 4 files changed, 29 insertions(+), 9 deletions(-)

diff --git a/gcc/config/mips/mips.h b/gcc/config/mips/mips.h
index 7145d23c650..6444a68dfd5 100644
--- a/gcc/config/mips/mips.h
+++ b/gcc/config/mips/mips.h
@@ -251,7 +251,7 @@ struct mips_cpu_info {
 || ISA_HAS_MSA))
 
 /* ISA load/store instructions can handle unaligned address */
-#define ISA_HAS_UNALIGNED_ACCESS (TARGET_UNALIGNED_ACCESS \
+#define ISA_HAS_UNALIGNED_ACCESS (!TARGET_STRICT_ALIGN \
 && (mips_isa_rev >= 6))
 
 /* The ISA compression flags that are currently in effect.  */
diff --git a/gcc/config/mips/mips.opt b/gcc/config/mips/mips.opt
index ce36942aabe..c1abb36212f 100644
--- a/gcc/config/mips/mips.opt
+++ b/gcc/config/mips/mips.opt
@@ -429,9 +429,17 @@ mtune=
 Target RejectNegative Joined Var(mips_tune_option) ToLower 
Enum(mips_arch_opt_value)
 -mtune=PROCESSOR   Optimize the output for PROCESSOR.
 
+mstrict-align
+Target Var(TARGET_STRICT_ALIGN) Init(0)
+Don't generate code with unaligned load store, only valid for MIPS R6.
+
 munaligned-access
-Target Var(TARGET_UNALIGNED_ACCESS) Init(1)
-Generate code with unaligned load store, valid for MIPS R6.
+Target RejectNegative Alias(mstrict-align) NegativeAlias
+Generate code with unaligned load store for R6 (alias of -mno-strict-align).
+
+mno-unaligned-access
+Target RejectNegative Alias(mstrict-align)
+Don't generate code with unaligned load store for R6 (alias of -mstrict-align).
 
 muninit-const-in-rodata
 Target Var(TARGET_UNINIT_CONST_IN_RODATA)
diff --git a/gcc/config/mips/mips.opt.urls b/gcc/config/mips/mips.opt.urls
index 96aba041026..9d166646d65 100644
--- a/gcc/config/mips/mips.opt.urls
+++ b/gcc/config/mips/mips.opt.urls
@@ -233,9 +233,15 @@ UrlSuffix(gcc/MIPS-Options.html#index-mmadd4)
 mtune=
 UrlSuffix(gcc/MIPS-Options.html#index-mtune-10)
 
+mstrict-align
+UrlSuffix(gcc/MIPS-Options.html#index-mstrict-align-3)
+
 munaligned-access
 UrlSuffix(gcc/MIPS-Options.html#index-munaligned-access-1)
 
+mno-unaligned-access
+UrlSuffix(gcc/MIPS-Options.html#index-mno-unaligned-access-1)
+
 muninit-const-in-rodata
 UrlSuffix(gcc/MIPS-Options.html#index-muninit-const-in-rodata)
 
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 85c938d4a14..864768fd2f4 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -1143,7 +1143,8 @@ Objective-C and Objective-C++ Dialects}.
 -mcheck-zero-division  -mno-check-zero-division
 -mdivide-traps  -mdivide-breaks
 -mload-store-pairs  -mno-load-store-pairs
--munaligned-access  -mno-unaligned-access
+-mstrict-align  -mno-strict-align
+-mno-unaligned-access  -munaligned-access
 -mmemcpy  -mno-memcpy  -mlong-calls  -mno-long-calls
 -mmad  -mno-mad  -mimadd  -mno-imadd  -mfused-madd  -mno-fused-madd  -nocpp
 -mfix-24k  -mno-fix-24k
@@ -28561,14 +28562,19 @@ instructions to enable load/store bonding.  This 
option is enabled by
 default but only takes effect when the selected architecture is known
 to support bonding.
 
+@opindex mstrict-align
+@opindex mno-strict-align
 @opindex munaligned-access
 @opindex mno-unaligned-access
-@item -munaligned-access
+@item -mstrict-align
+@itemx -mno-strict-align
+@itemx -munaligned-access
 @itemx -mno-unaligned-access
-Enable (disable) direct unaligned access for MIPS Release 6.
-MIPSr6 requires load/store unaligned-access support,
-by hardware or trap
-So @option{-mno-unaligned-access} may be needed by kernel.
+Disable (enable) direct unaligned access for MIPS Release 6.
+MIPSr6 requires load/store unaligned-access support, by hardware or
+trap  So @option{-mstrict-align} may be needed by kernel.  The
+options @option{-munaligned-access} and @option{-mno-unaligned-access}
+are obsoleted, and only for backward-compatible.
 
 @opindex mmemcpy
 @opindex mno-memcpy
-- 
2.39.2



Re: [PATCH v14 23/26] c++: Implement __is_invocable built-in trait

2024-03-15 Thread Ken Matsui
On Thu, Mar 14, 2024 at 6:53 PM Ken Matsui  wrote:
>
> On Fri, Mar 8, 2024 at 9:17 AM Patrick Palka  wrote:
> >
> > On Wed, 28 Feb 2024, Ken Matsui wrote:
> >
> > > This patch implements built-in trait for std::is_invocable.
> > >
> > > gcc/cp/ChangeLog:
> > >
> > >   * cp-trait.def: Define __is_invocable.
> > >   * constraint.cc (diagnose_trait_expr): Handle CPTK_IS_INVOCABLE.
> > >   * semantics.cc (trait_expr_value): Likewise.
> > >   (finish_trait_expr): Likewise.
> > >   * cp-tree.h (build_invoke): New function.
> > >   * method.cc (build_invoke): New function.
> > >
> > > gcc/testsuite/ChangeLog:
> > >
> > >   * g++.dg/ext/has-builtin-1.C: Test existence of __is_invocable.
> > >   * g++.dg/ext/is_invocable1.C: New test.
> > >   * g++.dg/ext/is_invocable2.C: New test.
> > >   * g++.dg/ext/is_invocable3.C: New test.
> > >   * g++.dg/ext/is_invocable4.C: New test.
> >
> > Thanks, this looks great!  This generic build_invoke function could be
> > used for invoke_result etc as well, and it could also cache the built-up
> > call across __is_invocable and __is_nothrow_invocable checks on the same
> > arguments (which is a common pattern in the standard library).  LGTM
> >
> > >
> > > Signed-off-by: Ken Matsui 
> > > ---
> > >  gcc/cp/constraint.cc |   6 +
> > >  gcc/cp/cp-trait.def  |   1 +
> > >  gcc/cp/cp-tree.h |   2 +
> > >  gcc/cp/method.cc | 132 +
> > >  gcc/cp/semantics.cc  |   4 +
> > >  gcc/testsuite/g++.dg/ext/has-builtin-1.C |   3 +
> > >  gcc/testsuite/g++.dg/ext/is_invocable1.C | 349 +++
> > >  gcc/testsuite/g++.dg/ext/is_invocable2.C | 139 +
> > >  gcc/testsuite/g++.dg/ext/is_invocable3.C |  51 
> > >  gcc/testsuite/g++.dg/ext/is_invocable4.C |  33 +++
> > >  10 files changed, 720 insertions(+)
> > >  create mode 100644 gcc/testsuite/g++.dg/ext/is_invocable1.C
> > >  create mode 100644 gcc/testsuite/g++.dg/ext/is_invocable2.C
> > >  create mode 100644 gcc/testsuite/g++.dg/ext/is_invocable3.C
> > >  create mode 100644 gcc/testsuite/g++.dg/ext/is_invocable4.C
> > >
> > > diff --git a/gcc/cp/constraint.cc b/gcc/cp/constraint.cc
> > > index 23ea66d9c12..c87b126fdb1 100644
> > > --- a/gcc/cp/constraint.cc
> > > +++ b/gcc/cp/constraint.cc
> > > @@ -3791,6 +3791,12 @@ diagnose_trait_expr (tree expr, tree args)
> > >  case CPTK_IS_FUNCTION:
> > >inform (loc, "  %qT is not a function", t1);
> > >break;
> > > +case CPTK_IS_INVOCABLE:
> > > +  if (!t2)
> > > +inform (loc, "  %qT is not invocable", t1);
> > > +  else
> > > +inform (loc, "  %qT is not invocable by %qE", t1, t2);
> > > +  break;
> > >  case CPTK_IS_LAYOUT_COMPATIBLE:
> > >inform (loc, "  %qT is not layout compatible with %qT", t1, t2);
> > >break;
> > > diff --git a/gcc/cp/cp-trait.def b/gcc/cp/cp-trait.def
> > > index 85056c8140b..6cb2b55f4ea 100644
> > > --- a/gcc/cp/cp-trait.def
> > > +++ b/gcc/cp/cp-trait.def
> > > @@ -75,6 +75,7 @@ DEFTRAIT_EXPR (IS_EMPTY, "__is_empty", 1)
> > >  DEFTRAIT_EXPR (IS_ENUM, "__is_enum", 1)
> > >  DEFTRAIT_EXPR (IS_FINAL, "__is_final", 1)
> > >  DEFTRAIT_EXPR (IS_FUNCTION, "__is_function", 1)
> > > +DEFTRAIT_EXPR (IS_INVOCABLE, "__is_invocable", -1)
> > >  DEFTRAIT_EXPR (IS_LAYOUT_COMPATIBLE, "__is_layout_compatible", 2)
> > >  DEFTRAIT_EXPR (IS_LITERAL_TYPE, "__is_literal_type", 1)
> > >  DEFTRAIT_EXPR (IS_MEMBER_FUNCTION_POINTER, 
> > > "__is_member_function_pointer", 1)
> > > diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h
> > > index 334c11396c2..261d3a71faa 100644
> > > --- a/gcc/cp/cp-tree.h
> > > +++ b/gcc/cp/cp-tree.h
> > > @@ -7334,6 +7334,8 @@ extern tree get_copy_assign 
> > > (tree);
> > >  extern tree get_default_ctor (tree);
> > >  extern tree get_dtor (tree, tsubst_flags_t);
> > >  extern tree build_stub_object(tree);
> > > +extern tree build_invoke (tree, const_tree,
> > > +  tsubst_flags_t);
> > >  extern tree strip_inheriting_ctors   (tree);
> > >  extern tree inherited_ctor_binfo (tree);
> > >  extern bool base_ctor_omit_inherited_parms   (tree);
> > > diff --git a/gcc/cp/method.cc b/gcc/cp/method.cc
> > > index 98c10e6a8b5..953f1bed6fc 100644
> > > --- a/gcc/cp/method.cc
> > > +++ b/gcc/cp/method.cc
> > > @@ -1928,6 +1928,138 @@ build_trait_object (tree type)
> > >return build_stub_object (type);
> > >  }
> > >
> > > +/* [func.require] Build an expression of INVOKE(FN_TYPE, ARG_TYPES...).  
> > > If the
> > > +   given is not invocable, returns error_mark_node.  */
> > > +
> > > +tree
> > > +build_invoke (tree fn_type, const_tree arg_types, tsubst_flags_t 
> > > complain)
> > > +{
> > > +  if (fn_type == error_mark_node || arg_types == error_mark_node)
> > > +