Re: [PATCH] c++: __has_builtin gives the wrong answer [PR106759]

2022-08-29 Thread Jason Merrill via Gcc-patches

On 8/29/22 17:26, Marek Polacek wrote:

We've supported __is_nothrow_constructible since r11-4386, but
names_builtin_p didn't know about it, so it gave the wrong answer for
  #if __has_builtin(__is_nothrow_constructible)
  ...
  #endif

I've tested all C++-only built-ins and only two were missing.

Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?


OK.


PR c++/106759

gcc/cp/ChangeLog:

* cp-objcp-common.cc (names_builtin_p): Handle RID_IS_NOTHROW_ASSIGNABLE
and RID_IS_NOTHROW_CONSTRUCTIBLE.

gcc/testsuite/ChangeLog:

* g++.dg/ext/has-builtin-1.C: New test.
---
  gcc/cp/cp-objcp-common.cc|   2 +
  gcc/testsuite/g++.dg/ext/has-builtin-1.C | 133 +++
  2 files changed, 135 insertions(+)
  create mode 100644 gcc/testsuite/g++.dg/ext/has-builtin-1.C

diff --git a/gcc/cp/cp-objcp-common.cc b/gcc/cp/cp-objcp-common.cc
index 4079a4b4aec..1ffac08c32f 100644
--- a/gcc/cp/cp-objcp-common.cc
+++ b/gcc/cp/cp-objcp-common.cc
@@ -460,6 +460,8 @@ names_builtin_p (const char *name)
  case RID_IS_UNION:
  case RID_IS_ASSIGNABLE:
  case RID_IS_CONSTRUCTIBLE:
+case RID_IS_NOTHROW_ASSIGNABLE:
+case RID_IS_NOTHROW_CONSTRUCTIBLE:
  case RID_UNDERLYING_TYPE:
  case RID_REF_CONSTRUCTS_FROM_TEMPORARY:
  case RID_REF_CONVERTS_FROM_TEMPORARY:
diff --git a/gcc/testsuite/g++.dg/ext/has-builtin-1.C 
b/gcc/testsuite/g++.dg/ext/has-builtin-1.C
new file mode 100644
index 000..fe25cb2f669
--- /dev/null
+++ b/gcc/testsuite/g++.dg/ext/has-builtin-1.C
@@ -0,0 +1,133 @@
+// PR c++/106759
+// { dg-do compile }
+// Verify that __has_builtin gives the correct answer for C++ built-ins.
+
+#if !__has_builtin (__builtin_addressof)
+# error "__has_builtin (__builtin_addressof) failed"
+#endif
+#if !__has_builtin (__builtin_bit_cast)
+# error "__has_builtin (__builtin_bit_cast) failed"
+#endif
+#if !__has_builtin (__builtin_launder)
+# error "__has_builtin (__builtin_launder) failed"
+#endif
+#if !__has_builtin (__has_nothrow_assign)
+# error "__has_builtin (__has_nothrow_assign) failed"
+#endif
+#if !__has_builtin (__has_nothrow_constructor)
+# error "__has_builtin (__has_nothrow_constructor) failed"
+#endif
+#if !__has_builtin (__has_nothrow_copy)
+# error "__has_builtin (__has_nothrow_copy) failed"
+#endif
+#if !__has_builtin (__has_trivial_assign)
+# error "__has_builtin (__has_trivial_assign) failed"
+#endif
+#if !__has_builtin (__has_trivial_constructor)
+# error "__has_builtin (__has_trivial_constructor) failed"
+#endif
+#if !__has_builtin (__has_trivial_copy)
+# error "__has_builtin (__has_trivial_copy) failed"
+#endif
+#if !__has_builtin (__has_trivial_destructor)
+# error "__has_builtin (__has_trivial_destructor) failed"
+#endif
+#if !__has_builtin (__has_unique_object_representations)
+# error "__has_builtin (__has_unique_object_representations) failed"
+#endif
+#if !__has_builtin (__has_virtual_destructor)
+# error "__has_builtin (__has_virtual_destructor) failed"
+#endif
+#if !__has_builtin (__is_abstract)
+# error "__has_builtin (__is_abstract) failed"
+#endif
+#if !__has_builtin (__is_aggregate)
+# error "__has_builtin (__is_aggregate) failed"
+#endif
+#if !__has_builtin (__is_base_of)
+# error "__has_builtin (__is_base_of) failed"
+#endif
+#if !__has_builtin (__is_class)
+# error "__has_builtin (__is_class) failed"
+#endif
+#if !__has_builtin (__is_empty)
+# error "__has_builtin (__is_empty) failed"
+#endif
+#if !__has_builtin (__is_enum)
+# error "__has_builtin (__is_enum) failed"
+#endif
+#if !__has_builtin (__is_final)
+# error "__has_builtin (__is_final) failed"
+#endif
+#if !__has_builtin (__is_layout_compatible)
+# error "__has_builtin (__is_layout_compatible) failed"
+#endif
+#if !__has_builtin (__is_literal_type)
+# error "__has_builtin (__is_literal_type) failed"
+#endif
+#if !__has_builtin (__is_pointer_interconvertible_base_of)
+# error "__has_builtin (__is_pointer_interconvertible_base_of) failed"
+#endif
+#if !__has_builtin (__is_pod)
+# error "__has_builtin (__is_pod) failed"
+#endif
+#if !__has_builtin (__is_polymorphic)
+# error "__has_builtin (__is_polymorphic) failed"
+#endif
+#if !__has_builtin (__is_same)
+# error "__has_builtin (__is_same) failed"
+#endif
+#if !__has_builtin (__is_same_as)
+# error "__has_builtin (__is_same_as) failed"
+#endif
+#if !__has_builtin (__is_standard_layout)
+# error "__has_builtin (__is_standard_layout) failed"
+#endif
+#if !__has_builtin (__is_trivial)
+# error "__has_builtin (__is_trivial) failed"
+#endif
+#if !__has_builtin (__is_trivially_assignable)
+# error "__has_builtin (__is_trivially_assignable) failed"
+#endif
+#if !__has_builtin (__is_trivially_constructible)
+# error "__has_builtin (__is_trivially_constructible) failed"
+#endif
+#if !__has_builtin (__is_trivially_copyable)
+# error "__has_builtin (__is_trivially_copyable) failed"
+#endif
+#if !__has_builtin (__is_union)
+# error "__has_builtin (__is_union) failed"
+#endif
+#if !__has_builtin 

Re: [PATCH] libcpp: Add -Winvalid-utf8 warning [PR106655]

2022-08-29 Thread Jason Merrill via Gcc-patches

On 8/29/22 17:35, Jakub Jelinek wrote:

On Mon, Aug 29, 2022 at 05:15:26PM -0400, Jason Merrill wrote:

On 8/29/22 04:15, Jakub Jelinek wrote:

Hi!

The following patch introduces a new warning - -Winvalid-utf8 similarly
to what clang now has - to diagnose invalid UTF-8 byte sequences in
comments.  In identifiers and in string literals it should be diagnosed
already but comment content hasn't been really verified.

I'm not sure if this is enough to say P2295R6 is implemented or not.

The problem is that in the most common case, people don't use
-finput-charset= option and the sources often are UTF-8, but sometimes
could be some ASCII compatible single byte encoding where non-ASCII
characters only appear in comments.  So having the warning off by default
is IMO desirable.  Now, if people use explicit -finput-charset=UTF-8,
perhaps we could make the warning on by default for C++23 and use pedwarn
instead of warning, because then the user told us explicitly that the source
is UTF-8.  From the paper I understood one of the implementation options
is to claim that the implementation supports 2 encodings, UTF-8 and UTF-8
like encodings where invalid UTF-8 characters in comments are replaced say
by spaces, where the latter could be the default and the former only
used if -finput-charset=UTF-8 -Werror=invalid-utf8 options are used.

Thoughts on this?


That sounds good to me.


The pedwarn on -std=c++23 -finput-charset=UTF-8 or just documenting that
"conforming" UTF-8 is only -finput-charset=UTF-8 -Werror=invalid-utf8 ?


The former.


+static const uchar *
+_cpp_warn_invalid_utf8 (cpp_reader *pfile)
+{
+  cpp_buffer *buffer = pfile->buffer;
+  const uchar *cur = buffer->cur;
+
+  if (cur[0] < utf8_signifier
+  || cur[1] < utf8_continuation || cur[1] >= utf8_signifier)
+{
+  cpp_warning_with_line (pfile, CPP_W_INVALID_UTF8,
+pfile->line_table->highest_line,
+CPP_BUF_COL (buffer),
+"invalid UTF-8 character <%x> in comment",
+cur[0]);
+  return cur + 1;
+}
+  else if (cur[2] < utf8_continuation || cur[2] >= utf8_signifier)


Unicode table 3-7 says that the second byte is sometimes restricted to less
than this range.


That is true and I've tried to include tests for all of those cases in the
testcase and all of them get a warning.  Some of them are through:
   /* Make sure the shortest possible encoding was used.  */

   if (c <=  0x7F && nbytes > 1) return EILSEQ;
   if (c <= 0x7FF && nbytes > 2) return EILSEQ;
   if (c <=0x && nbytes > 3) return EILSEQ;
   if (c <=  0x1F && nbytes > 4) return EILSEQ;
   if (c <= 0x3FF && nbytes > 5) return EILSEQ;
and others are through:
   /* Make sure the character is valid.  */
   if (c > 0x7FFF || (c >= 0xD800 && c <= 0xDFFF)) return EILSEQ;
All I had to do outside of what one_utf8_to_cppchar already implements was:


+ if (_cpp_valid_utf8 (pfile, , buffer->rlimit, 0, NULL, )
+ && s <= 0x0010)


the <= 0x0010 check, because one_utf8_to_cppchar as written happily
supports up to 6 bytes long UTF-8 which can encode up to 7FFF:
-007F   0xxx
0080-07FF   110x 10xx
0800-   1110 10xx 10xx
0001-001F   0xxx 10xx 10xx 10xx
0020-03FF   10xx 10xx 10xx 10xx 10xx
0400-7FFF   110x 10xx 10xx 10xx 10xx 10xx
while 3-7 only talks about encoding 0..D7FF and D800..10 in up to 4
bytes.

I guess I should try what happens with 0x11 and 0x7fff in
identifiers and string literals.

Jakub





[PATCH] RISC-V: Fix annotation

2022-08-29 Thread juzhe . zhong
From: zhongjuzhe 

gcc/ChangeLog:

* config/riscv/riscv.h (enum reg_class): Change vype to vtype.

---
 gcc/config/riscv/riscv.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/config/riscv/riscv.h b/gcc/config/riscv/riscv.h
index 29582f7c545..3ee5a93ce6a 100644
--- a/gcc/config/riscv/riscv.h
+++ b/gcc/config/riscv/riscv.h
@@ -462,7 +462,7 @@ enum reg_class
   FP_REGS, /* floating-point registers */
   FRAME_REGS,  /* arg pointer and frame pointer */
   VL_REGS, /* vl register */
-  VTYPE_REGS,  /* vype register */
+  VTYPE_REGS,  /* vtype register */
   VM_REGS, /* v0.t registers */
   VD_REGS, /* vector registers except v0.t */
   V_REGS,  /* vector registers */
-- 
2.36.1



Re: [PATCH] rs6000: Allow conversions of MMA pointer types [PR106017]

2022-08-29 Thread Peter Bergner via Gcc-patches
On 8/27/22 7:47 PM, Peter Bergner via Gcc-patches wrote:
> On 8/27/22 4:37 PM, Segher Boessenkool wrote:
>>> The fix is to just remove the MMA pointer conversion
>>> handling code altogether.
>>
>> Okay for trunk and all backports.  Thanks!
> 
> Ok, pushed to trunk.  I'll backport after some burn-in.  Thanks!

Test results on Bill's autotesters were clean on multiple systems,
so I pushed the backports to GCC 12, 11 and 10, so it's fixed
everywhere.  Thanks!

Peter



[PATCH] RISC-V: Fix riscv_vector_chunks configuration according to TARGET_MIN_VLEN

2022-08-29 Thread juzhe . zhong
From: zhongjuzhe 

gcc/ChangeLog:

* config/riscv/riscv.cc (riscv_convert_vector_bits): Change 
configuration according to TARGET_MIN_VLEN.
* config/riscv/riscv.h (UNITS_PER_FP_REG): Fix annotation.

---
 gcc/config/riscv/riscv.cc | 11 ++-
 gcc/config/riscv/riscv.h  |  2 +-
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 4d439e15392..ef606f33983 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -5219,14 +5219,15 @@ riscv_init_machine_status (void)
 static poly_uint16
 riscv_convert_vector_bits (void)
 {
-  /* The runtime invariant is only meaningful when vector is enabled. */
+  /* The runtime invariant is only meaningful when TARGET_VECTOR is enabled. */
   if (!TARGET_VECTOR)
 return 0;
 
-  if (TARGET_VECTOR_ELEN_64 || TARGET_VECTOR_ELEN_FP_64)
+  if (TARGET_MIN_VLEN > 32)
 {
-  /* When targetting Zve64* (ELEN = 64) extensions, we should use 64-bit
-chunk size. Runtime invariant: The single indeterminate represent the
+  /* When targetting minimum VLEN > 32, we should use 64-bit chunk size.
+ Otherwise we can not include sew = 64bits.
+Runtime invariant: The single indeterminate represent the
 number of 64-bit chunks in a vector beyond minimum length of 64 bits.
 Thus the number of bytes in a vector is 8 + 8 * x1 which is
 riscv_vector_chunks * 8 = poly_int (8, 8). */
@@ -5234,7 +5235,7 @@ riscv_convert_vector_bits (void)
 }
   else
 {
-  /* When targetting Zve32* (ELEN = 32) extensions, we should use 32-bit
+  /* When targetting minimum VLEN = 32, we should use 32-bit
 chunk size. Runtime invariant: The single indeterminate represent the
 number of 32-bit chunks in a vector beyond minimum length of 32 bits.
 Thus the number of bytes in a vector is 4 + 4 * x1 which is
diff --git a/gcc/config/riscv/riscv.h b/gcc/config/riscv/riscv.h
index 1d8139c2c9b..29582f7c545 100644
--- a/gcc/config/riscv/riscv.h
+++ b/gcc/config/riscv/riscv.h
@@ -160,7 +160,7 @@ ASM_MISA_SPEC
 
 /* The `Q' extension is not yet supported.  */
 #define UNITS_PER_FP_REG (TARGET_DOUBLE_FLOAT ? 8 : 4)
-/* Size per vector register. For zve32*, size = poly (4, 4). Otherwise, size = 
poly (8, 8). */
+/* Size per vector register. For VLEN = 32, size = poly (4, 4). Otherwise, 
size = poly (8, 8). */
 #define UNITS_PER_V_REG (riscv_vector_chunks * riscv_bytes_per_vector_chunk)
 
 /* The largest type that can be passed in floating-point registers.  */
-- 
2.36.1



Re: [PATCH] riscv: elf-multilib: add rv32iafc to defaults

2022-08-29 Thread Palmer Dabbelt

On Mon, 29 Aug 2022 17:38:08 PDT (-0700), gcc-patches@gcc.gnu.org wrote:

Ping; any comments on this?


It looks fine to me, having an extra reuse pattern is pretty much free.




rv32iafc-ilp32 is compatible with rv32iac-ilp32 for library
implementation, so add a reuse rule allowing the default configuration
to support rv32iafc.


Re: [PATCH] riscv: elf-multilib: add rv32iafc to defaults

2022-08-29 Thread Peter Marheine via Gcc-patches
Ping; any comments on this?

> rv32iafc-ilp32 is compatible with rv32iac-ilp32 for library
> implementation, so add a reuse rule allowing the default configuration
> to support rv32iafc.


Re: [COMMITTED] bpf: define __bpf__ as well as __BPF__ as a target macro

2022-08-29 Thread Fangrui Song via Gcc-patches
On Mon, Aug 29, 2022 at 1:16 PM Jose E. Marchesi via Gcc-patches
 wrote:
>
>
> LLVM defines both __bpf__ and __BPF_ as target macros.
> GCC was defining only __BPF__.
>
> This patch defines __bpf__ as a target macro for BPF.
> Tested in bpf-unknown-none.
>
> gcc/ChangeLog:
>
> * config/bpf/bpf.cc (bpf_target_macros): Define __bpf__ as a
> target macro.
> ---
>  gcc/config/bpf/bpf.cc | 1 +
>  1 file changed, 1 insertion(+)
>
> diff --git a/gcc/config/bpf/bpf.cc b/gcc/config/bpf/bpf.cc
> index 7e37e080808..9cb56cfb287 100644
> --- a/gcc/config/bpf/bpf.cc
> +++ b/gcc/config/bpf/bpf.cc
> @@ -291,6 +291,7 @@ void
>  bpf_target_macros (cpp_reader *pfile)
>  {
>builtin_define ("__BPF__");
> +  builtin_define ("__bpf__");
>
>if (TARGET_BIG_ENDIAN)
>  builtin_define ("__BPF_BIG_ENDIAN__");
> --
> 2.30.2
>

Having multiple choices in this case seems to just add confusion to
users and making code search slightly more inconvenient.

How much code uses LLVM specific __bpf__? Can it be migrated? Should
LLVM undefine the macro instead?


Re: [PATCH] libcpp: Add -Winvalid-utf8 warning [PR106655]

2022-08-29 Thread Jakub Jelinek via Gcc-patches
On Mon, Aug 29, 2022 at 11:35:44PM +0200, Jakub Jelinek wrote:
> I guess I should try what happens with 0x11 and 0x7fff in
> identifiers and string literals.

It is rejected in identifiers, but happily accepted in string literals:
const char32_t *a = U"";
const char32_t *b = U"��";
int ab = 1;
int c��d = 2;

Jakub



Re: [PATCH] libcpp: Add -Winvalid-utf8 warning [PR106655]

2022-08-29 Thread Jakub Jelinek via Gcc-patches
On Mon, Aug 29, 2022 at 05:15:26PM -0400, Jason Merrill wrote:
> On 8/29/22 04:15, Jakub Jelinek wrote:
> > Hi!
> > 
> > The following patch introduces a new warning - -Winvalid-utf8 similarly
> > to what clang now has - to diagnose invalid UTF-8 byte sequences in
> > comments.  In identifiers and in string literals it should be diagnosed
> > already but comment content hasn't been really verified.
> > 
> > I'm not sure if this is enough to say P2295R6 is implemented or not.
> > 
> > The problem is that in the most common case, people don't use
> > -finput-charset= option and the sources often are UTF-8, but sometimes
> > could be some ASCII compatible single byte encoding where non-ASCII
> > characters only appear in comments.  So having the warning off by default
> > is IMO desirable.  Now, if people use explicit -finput-charset=UTF-8,
> > perhaps we could make the warning on by default for C++23 and use pedwarn
> > instead of warning, because then the user told us explicitly that the source
> > is UTF-8.  From the paper I understood one of the implementation options
> > is to claim that the implementation supports 2 encodings, UTF-8 and UTF-8
> > like encodings where invalid UTF-8 characters in comments are replaced say
> > by spaces, where the latter could be the default and the former only
> > used if -finput-charset=UTF-8 -Werror=invalid-utf8 options are used.
> > 
> > Thoughts on this?
> 
> That sounds good to me.

The pedwarn on -std=c++23 -finput-charset=UTF-8 or just documenting that
"conforming" UTF-8 is only -finput-charset=UTF-8 -Werror=invalid-utf8 ?

> > +static const uchar *
> > +_cpp_warn_invalid_utf8 (cpp_reader *pfile)
> > +{
> > +  cpp_buffer *buffer = pfile->buffer;
> > +  const uchar *cur = buffer->cur;
> > +
> > +  if (cur[0] < utf8_signifier
> > +  || cur[1] < utf8_continuation || cur[1] >= utf8_signifier)
> > +{
> > +  cpp_warning_with_line (pfile, CPP_W_INVALID_UTF8,
> > +pfile->line_table->highest_line,
> > +CPP_BUF_COL (buffer),
> > +"invalid UTF-8 character <%x> in comment",
> > +cur[0]);
> > +  return cur + 1;
> > +}
> > +  else if (cur[2] < utf8_continuation || cur[2] >= utf8_signifier)
> 
> Unicode table 3-7 says that the second byte is sometimes restricted to less
> than this range.

That is true and I've tried to include tests for all of those cases in the
testcase and all of them get a warning.  Some of them are through:
  /* Make sure the shortest possible encoding was used.  */

  if (c <=  0x7F && nbytes > 1) return EILSEQ;
  if (c <= 0x7FF && nbytes > 2) return EILSEQ;
  if (c <=0x && nbytes > 3) return EILSEQ;
  if (c <=  0x1F && nbytes > 4) return EILSEQ;
  if (c <= 0x3FF && nbytes > 5) return EILSEQ;
and others are through:
  /* Make sure the character is valid.  */
  if (c > 0x7FFF || (c >= 0xD800 && c <= 0xDFFF)) return EILSEQ;
All I had to do outside of what one_utf8_to_cppchar already implements was:

> > + if (_cpp_valid_utf8 (pfile, , buffer->rlimit, 0, NULL, )
> > + && s <= 0x0010)

the <= 0x0010 check, because one_utf8_to_cppchar as written happily
supports up to 6 bytes long UTF-8 which can encode up to 7FFF:
   -007F   0xxx
   0080-07FF   110x 10xx
   0800-   1110 10xx 10xx
   0001-001F   0xxx 10xx 10xx 10xx
   0020-03FF   10xx 10xx 10xx 10xx 10xx
   0400-7FFF   110x 10xx 10xx 10xx 10xx 10xx
while 3-7 only talks about encoding 0..D7FF and D800..10 in up to 4
bytes.

I guess I should try what happens with 0x11 and 0x7fff in
identifiers and string literals.

Jakub



[PATCH] c++: __has_builtin gives the wrong answer [PR106759]

2022-08-29 Thread Marek Polacek via Gcc-patches
We've supported __is_nothrow_constructible since r11-4386, but
names_builtin_p didn't know about it, so it gave the wrong answer for
 #if __has_builtin(__is_nothrow_constructible)
 ...
 #endif

I've tested all C++-only built-ins and only two were missing.

Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?

PR c++/106759

gcc/cp/ChangeLog:

* cp-objcp-common.cc (names_builtin_p): Handle RID_IS_NOTHROW_ASSIGNABLE
and RID_IS_NOTHROW_CONSTRUCTIBLE.

gcc/testsuite/ChangeLog:

* g++.dg/ext/has-builtin-1.C: New test.
---
 gcc/cp/cp-objcp-common.cc|   2 +
 gcc/testsuite/g++.dg/ext/has-builtin-1.C | 133 +++
 2 files changed, 135 insertions(+)
 create mode 100644 gcc/testsuite/g++.dg/ext/has-builtin-1.C

diff --git a/gcc/cp/cp-objcp-common.cc b/gcc/cp/cp-objcp-common.cc
index 4079a4b4aec..1ffac08c32f 100644
--- a/gcc/cp/cp-objcp-common.cc
+++ b/gcc/cp/cp-objcp-common.cc
@@ -460,6 +460,8 @@ names_builtin_p (const char *name)
 case RID_IS_UNION:
 case RID_IS_ASSIGNABLE:
 case RID_IS_CONSTRUCTIBLE:
+case RID_IS_NOTHROW_ASSIGNABLE:
+case RID_IS_NOTHROW_CONSTRUCTIBLE:
 case RID_UNDERLYING_TYPE:
 case RID_REF_CONSTRUCTS_FROM_TEMPORARY:
 case RID_REF_CONVERTS_FROM_TEMPORARY:
diff --git a/gcc/testsuite/g++.dg/ext/has-builtin-1.C 
b/gcc/testsuite/g++.dg/ext/has-builtin-1.C
new file mode 100644
index 000..fe25cb2f669
--- /dev/null
+++ b/gcc/testsuite/g++.dg/ext/has-builtin-1.C
@@ -0,0 +1,133 @@
+// PR c++/106759
+// { dg-do compile }
+// Verify that __has_builtin gives the correct answer for C++ built-ins.
+
+#if !__has_builtin (__builtin_addressof)
+# error "__has_builtin (__builtin_addressof) failed"
+#endif
+#if !__has_builtin (__builtin_bit_cast)
+# error "__has_builtin (__builtin_bit_cast) failed"
+#endif
+#if !__has_builtin (__builtin_launder)
+# error "__has_builtin (__builtin_launder) failed"
+#endif
+#if !__has_builtin (__has_nothrow_assign)
+# error "__has_builtin (__has_nothrow_assign) failed"
+#endif
+#if !__has_builtin (__has_nothrow_constructor)
+# error "__has_builtin (__has_nothrow_constructor) failed"
+#endif
+#if !__has_builtin (__has_nothrow_copy)
+# error "__has_builtin (__has_nothrow_copy) failed"
+#endif
+#if !__has_builtin (__has_trivial_assign)
+# error "__has_builtin (__has_trivial_assign) failed"
+#endif
+#if !__has_builtin (__has_trivial_constructor)
+# error "__has_builtin (__has_trivial_constructor) failed"
+#endif
+#if !__has_builtin (__has_trivial_copy)
+# error "__has_builtin (__has_trivial_copy) failed"
+#endif
+#if !__has_builtin (__has_trivial_destructor)
+# error "__has_builtin (__has_trivial_destructor) failed"
+#endif
+#if !__has_builtin (__has_unique_object_representations)
+# error "__has_builtin (__has_unique_object_representations) failed"
+#endif
+#if !__has_builtin (__has_virtual_destructor)
+# error "__has_builtin (__has_virtual_destructor) failed"
+#endif
+#if !__has_builtin (__is_abstract)
+# error "__has_builtin (__is_abstract) failed"
+#endif
+#if !__has_builtin (__is_aggregate)
+# error "__has_builtin (__is_aggregate) failed"
+#endif
+#if !__has_builtin (__is_base_of)
+# error "__has_builtin (__is_base_of) failed"
+#endif
+#if !__has_builtin (__is_class)
+# error "__has_builtin (__is_class) failed"
+#endif
+#if !__has_builtin (__is_empty)
+# error "__has_builtin (__is_empty) failed"
+#endif
+#if !__has_builtin (__is_enum)
+# error "__has_builtin (__is_enum) failed"
+#endif
+#if !__has_builtin (__is_final)
+# error "__has_builtin (__is_final) failed"
+#endif
+#if !__has_builtin (__is_layout_compatible)
+# error "__has_builtin (__is_layout_compatible) failed"
+#endif
+#if !__has_builtin (__is_literal_type)
+# error "__has_builtin (__is_literal_type) failed"
+#endif
+#if !__has_builtin (__is_pointer_interconvertible_base_of)
+# error "__has_builtin (__is_pointer_interconvertible_base_of) failed"
+#endif
+#if !__has_builtin (__is_pod)
+# error "__has_builtin (__is_pod) failed"
+#endif
+#if !__has_builtin (__is_polymorphic)
+# error "__has_builtin (__is_polymorphic) failed"
+#endif
+#if !__has_builtin (__is_same)
+# error "__has_builtin (__is_same) failed"
+#endif
+#if !__has_builtin (__is_same_as)
+# error "__has_builtin (__is_same_as) failed"
+#endif
+#if !__has_builtin (__is_standard_layout)
+# error "__has_builtin (__is_standard_layout) failed"
+#endif
+#if !__has_builtin (__is_trivial)
+# error "__has_builtin (__is_trivial) failed"
+#endif
+#if !__has_builtin (__is_trivially_assignable)
+# error "__has_builtin (__is_trivially_assignable) failed"
+#endif
+#if !__has_builtin (__is_trivially_constructible)
+# error "__has_builtin (__is_trivially_constructible) failed"
+#endif
+#if !__has_builtin (__is_trivially_copyable)
+# error "__has_builtin (__is_trivially_copyable) failed"
+#endif
+#if !__has_builtin (__is_union)
+# error "__has_builtin (__is_union) failed"
+#endif
+#if !__has_builtin (__underlying_type)
+# error "__has_builtin (__underlying_type) failed"

Re: [PATCH v2] c++: Fix C++11 attribute propagation [PR106712]

2022-08-29 Thread Jason Merrill via Gcc-patches

On 8/29/22 16:01, Marek Polacek wrote:

On Mon, Aug 29, 2022 at 01:32:29PM -0400, Jason Merrill wrote:

On 8/26/22 19:01, Marek Polacek wrote:

When we have

[[noreturn]] int fn1 [[nodiscard]](), fn2();

"noreturn" should apply to both fn1 and fn2 but "nodiscard" only to fn1:
[dcl.pre]/3: "The attribute-specifier-seq appertains to each of
the entities declared by the declarators of the init-declarator-list."
[dcl.spec.general]: "The attribute-specifier-seq affects the type
only for the declaration it appears in, not other declarations involving
the same type."

As Ed Catmur correctly analyzed, this is because, for the test above,
we call start_decl with prefix_attributes=noreturn, but this line:

attributes = attr_chainon (attributes, prefix_attributes);

results in attributes == prefix_attributes, because chainon sees
that attributes is null so it just returns prefix_attributes.  Then
in grokdeclarator we reach

*attrlist = attr_chainon (*attrlist, declarator->std_attributes);

which modifies prefix_attributes so now it's "noreturn, nodiscard"
and so fn2 is wrongly marked nodiscard as well.  Fixed by copying
prefix_attributes.


How about reversing the order of arguments to the call in grokdeclarator, so
that the prefix attributes can remain shared at the end of the list?


Thanks, that seems like a cheaper solution.  It works because this way
we tack the prefix attributes onto ->std_attributes, avoiding modifying
prefix_attributes.

Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?


OK.


-- >8 --
When we have

   [[noreturn]] int fn1 [[nodiscard]](), fn2();

"noreturn" should apply to both fn1 and fn2 but "nodiscard" only to fn1:
[dcl.pre]/3: "The attribute-specifier-seq appertains to each of
the entities declared by the declarators of the init-declarator-list."
[dcl.spec.general]: "The attribute-specifier-seq affects the type
only for the declaration it appears in, not other declarations involving
the same type."

As Ed Catmur correctly analyzed, this is because, for the test above,
we call start_decl with prefix_attributes=noreturn, but this line:

   attributes = attr_chainon (attributes, prefix_attributes);

results in attributes == prefix_attributes, because chainon sees
that attributes is null so it just returns prefix_attributes.  Then
in grokdeclarator we reach

   *attrlist = attr_chainon (*attrlist, declarator->std_attributes);

which modifies prefix_attributes so now it's "noreturn, nodiscard"
and so fn2 is wrongly marked nodiscard as well.  Fixed by reversing
the order of arguments to attr_chainon.  That way, we tack the prefix
attributes onto ->std_attributes, avoiding modifying prefix_attributes.

PR c++/106712

gcc/cp/ChangeLog:

* decl.cc (grokdeclarator): Reverse the order of arguments to
attr_chainon.

gcc/testsuite/ChangeLog:

* g++.dg/cpp0x/gen-attrs-77.C: New test.
---
  gcc/cp/decl.cc|  2 +-
  gcc/testsuite/g++.dg/cpp0x/gen-attrs-77.C | 17 +
  2 files changed, 18 insertions(+), 1 deletion(-)
  create mode 100644 gcc/testsuite/g++.dg/cpp0x/gen-attrs-77.C

diff --git a/gcc/cp/decl.cc b/gcc/cp/decl.cc
index d46a347a6c7..b72b2a8456b 100644
--- a/gcc/cp/decl.cc
+++ b/gcc/cp/decl.cc
@@ -13474,7 +13474,7 @@ grokdeclarator (const cp_declarator *declarator,
/* [dcl.meaning]/1: The optional attribute-specifier-seq following
 a declarator-id appertains to the entity that is declared.  */
if (declarator->std_attributes != error_mark_node)
-   *attrlist = attr_chainon (*attrlist, declarator->std_attributes);
+   *attrlist = attr_chainon (declarator->std_attributes, *attrlist);
else
/* We should have already diagnosed the issue (c++/78344).  */
gcc_assert (seen_error ());
diff --git a/gcc/testsuite/g++.dg/cpp0x/gen-attrs-77.C 
b/gcc/testsuite/g++.dg/cpp0x/gen-attrs-77.C
new file mode 100644
index 000..2c41c62f33b
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp0x/gen-attrs-77.C
@@ -0,0 +1,17 @@
+// PR c++/106712
+// { dg-do compile { target c++11 } }
+
+[[noreturn]] int f1 [[nodiscard]](), f2 ();
+[[nodiscard]] int f3 (), f4 ();
+int f5 [[nodiscard]](), f6 ();
+
+int
+main ()
+{
+  f1 (); // { dg-warning "ignoring" }
+  f2 ();
+  f3 (); // { dg-warning "ignoring" }
+  f4 (); // { dg-warning "ignoring" }
+  f5 (); // { dg-warning "ignoring" }
+  f6 ();
+}

base-commit: 60d1d296b42b22b08d4eaa38bea02100c07261ac




Re: [PATCH] libcpp: Add -Winvalid-utf8 warning [PR106655]

2022-08-29 Thread Jason Merrill via Gcc-patches

On 8/29/22 04:15, Jakub Jelinek wrote:

Hi!

The following patch introduces a new warning - -Winvalid-utf8 similarly
to what clang now has - to diagnose invalid UTF-8 byte sequences in
comments.  In identifiers and in string literals it should be diagnosed
already but comment content hasn't been really verified.

I'm not sure if this is enough to say P2295R6 is implemented or not.

The problem is that in the most common case, people don't use
-finput-charset= option and the sources often are UTF-8, but sometimes
could be some ASCII compatible single byte encoding where non-ASCII
characters only appear in comments.  So having the warning off by default
is IMO desirable.  Now, if people use explicit -finput-charset=UTF-8,
perhaps we could make the warning on by default for C++23 and use pedwarn
instead of warning, because then the user told us explicitly that the source
is UTF-8.  From the paper I understood one of the implementation options
is to claim that the implementation supports 2 encodings, UTF-8 and UTF-8
like encodings where invalid UTF-8 characters in comments are replaced say
by spaces, where the latter could be the default and the former only
used if -finput-charset=UTF-8 -Werror=invalid-utf8 options are used.

Thoughts on this?


That sounds good to me.


2022-08-29  Jakub Jelinek  

PR c++/106655
libcpp/
* include/cpplib.h (struct cpp_options): Implement C++23
P2295R6 - Support for UTF-8 as a portable source file encoding.
Add cpp_warn_invalid_utf8 field.
(enum cpp_warning_reason): Add CPP_W_INVALID_UTF8 enumerator.
* init.cc (cpp_create_reader): Initialize cpp_warn_invalid_utf8.
* lex.cc (utf8_continuation): New const variable.
(utf8_signifier): Move earlier in the file.
(_cpp_warn_invalid_utf8): New function.
(_cpp_skip_block_comment): Handle -Winvalid-utf8 warning.
(skip_line_comment): Likewise.
gcc/
* doc/invoke.texi (-Winvalid-utf8): Document it.
gcc/c-family/
* c.opt (-Winvalid-utf8): New warning.
gcc/testsuite/
* c-c++-common/cpp/Winvalid-utf8-1.c: New test.

--- libcpp/include/cpplib.h.jj  2022-08-25 14:25:23.866912426 +0200
+++ libcpp/include/cpplib.h 2022-08-27 12:17:55.185022807 +0200
@@ -560,6 +560,9 @@ struct cpp_options
   cpp_bidirectional_level.  */
unsigned char cpp_warn_bidirectional;
  
+  /* True if libcpp should warn about invalid UTF-8 characters in comments.  */

+  bool cpp_warn_invalid_utf8;
+
/* Dependency generation.  */
struct
{
@@ -666,7 +669,8 @@ enum cpp_warning_reason {
CPP_W_CXX11_COMPAT,
CPP_W_CXX20_COMPAT,
CPP_W_EXPANSION_TO_DEFINED,
-  CPP_W_BIDIRECTIONAL
+  CPP_W_BIDIRECTIONAL,
+  CPP_W_INVALID_UTF8
  };
  
  /* Callback for header lookup for HEADER, which is the name of a

--- libcpp/init.cc.jj   2022-08-24 09:55:44.571876638 +0200
+++ libcpp/init.cc  2022-08-27 12:18:54.559246323 +0200
@@ -227,6 +227,7 @@ cpp_create_reader (enum c_lang lang, cpp
CPP_OPTION (pfile, ext_numeric_literals) = 1;
CPP_OPTION (pfile, warn_date_time) = 0;
CPP_OPTION (pfile, cpp_warn_bidirectional) = bidirectional_unpaired;
+  CPP_OPTION (pfile, cpp_warn_invalid_utf8) = 0;
  
/* Default CPP arithmetic to something sensible for the host for the

   benefit of dumb users like fix-header.  */
--- libcpp/lex.cc.jj2022-08-26 09:24:12.089615949 +0200
+++ libcpp/lex.cc   2022-08-27 13:43:40.560769087 +0200
@@ -1704,6 +1704,59 @@ maybe_warn_bidi_on_char (cpp_reader *pfi
bidi::on_char (kind, ucn_p, loc);
  }
  
+static const cppchar_t utf8_continuation = 0x80;

+static const cppchar_t utf8_signifier = 0xC0; >
+/* Emit -Winvalid-utf8 warning on invalid UTF-8 character starting
+   at PFILE->buffer->cur.  Return a pointer after the diagnosed
+   invalid character.  */
+
+static const uchar *
+_cpp_warn_invalid_utf8 (cpp_reader *pfile)
+{
+  cpp_buffer *buffer = pfile->buffer;
+  const uchar *cur = buffer->cur;
+
+  if (cur[0] < utf8_signifier
+  || cur[1] < utf8_continuation || cur[1] >= utf8_signifier)
+{
+  cpp_warning_with_line (pfile, CPP_W_INVALID_UTF8,
+pfile->line_table->highest_line,
+CPP_BUF_COL (buffer),
+"invalid UTF-8 character <%x> in comment",
+cur[0]);
+  return cur + 1;
+}
+  else if (cur[2] < utf8_continuation || cur[2] >= utf8_signifier)


Unicode table 3-7 says that the second byte is sometimes restricted to 
less than this range.  Hmm, it looks like one_utf8_to_cppchar doesn't 
check that, either.


Did you consider adding error reporting to one_utf8_to_cppchar?  It 
might be better to localize the utf8 logic.



+{
+  cpp_warning_with_line (pfile, CPP_W_INVALID_UTF8,
+pfile->line_table->highest_line,
+CPP_BUF_COL (buffer),
+"invalid UTF-8 character <%x><%x> in comment",

[PATCH] btf: Add support to BTF_KIND_ENUM64 type

2022-08-29 Thread Guillermo E. Martinez via Gcc-patches
Hello GCC team,

The following patch update BTF/CTF backend to support
BTF_KIND_ENUM64 type.

Comments will be welcomed and appreciated!,

Kind regards,
guillermo
--

BTF supports 64-bits enumerators with following encoding:

  struct btf_type:
name_off: 0 or offset to a valid C identifier
info.kind_flag: 0 for unsigned, 1 for signed
info.kind: BTF_KIND_ENUM64
info.vlen: number of enum values
size: 1/2/4/8

The btf_type is followed by info.vlen number of:

struct btf_enum64
{
  uint32_t name_off;   /* Offset in string section of enumerator name.  */
  uint32_t val_lo32;   /* lower 32-bit value for a 64-bit value Enumerator 
*/
  uint32_t val_hi32;   /* high 32-bit value for a 64-bit value Enumerator */
};

So, a new btf_enum64 structure was added to represent BTF_KIND_ENUM64
and a new field in ctf_dtdef to represent specific type's properties, in
the particular case for CTF enums it helps to distinguish when its
enumerators values are signed or unsigned, later that information is
used to encode the BTF enum type.

gcc/ChangeLog:

* btfout.cc (btf_calc_num_vbytes): Compute enumeration size depending of
enumerator type btf_enum{,64}.
(btf_asm_type): Update btf_kflag according to enumerators sign,
using correct BPF type in BTF_KIND_ENUMi{,64}.
(btf_asm_enum_const): New argument to represent the size of
the BTF enum type.
* ctfc.cc (ctf_add_enum): Use and initialization of flag field to
CTF_ENUM_F_NONE.
(ctf_add_enumerator): New argument to represent CTF flags,
updating the comment and flag vaue according to enumerators
sing.
* ctfc.h (ctf_dmdef): Update dmd_value to HOST_WIDE_INT to allow
use 32/64 bits enumerators.
(ctf_dtdef): Add flags to to describe specifyc type's properties.
* dwarf2ctf.cc (gen_ctf_enumeration_type): Update flags field
depending when a signed enumerator value is found.
include/btf.h (btf_enum64): Add new definition and new symbolic
constant to BTF_KIND_ENUM64 and BTF_KF_ENUM_{UN,}SIGNED.

gcc/testsuite/ChangeLog:

gcc.dg/debug/btf/btf-enum-1.c: Update testcase, with correct
info.kflags encoding.
gcc.dg/debug/btf/btf-enum64-1.c: New testcase.
---
 gcc/btfout.cc | 24 ---
 gcc/ctfc.cc   | 14 ---
 gcc/ctfc.h|  9 +++-
 gcc/dwarf2ctf.cc  |  9 +++-
 gcc/testsuite/gcc.dg/debug/btf/btf-enum-1.c   |  2 +-
 gcc/testsuite/gcc.dg/debug/btf/btf-enum64-1.c | 41 +++
 include/btf.h | 19 +++--
 7 files changed, 99 insertions(+), 19 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/debug/btf/btf-enum64-1.c

diff --git a/gcc/btfout.cc b/gcc/btfout.cc
index 997a33fa089..4b11c867c23 100644
--- a/gcc/btfout.cc
+++ b/gcc/btfout.cc
@@ -223,7 +223,9 @@ btf_calc_num_vbytes (ctf_dtdef_ref dtd)
   break;
 
 case BTF_KIND_ENUM:
-  vlen_bytes += vlen * sizeof (struct btf_enum);
+  vlen_bytes += (dtd->dtd_data.ctti_size == 0x8)
+   ? vlen * sizeof (struct btf_enum64)
+   : vlen * sizeof (struct btf_enum);
   break;
 
 case BTF_KIND_FUNC_PROTO:
@@ -622,6 +624,15 @@ btf_asm_type (ctf_container_ref ctfc, ctf_dtdef_ref dtd)
   btf_size_type = 0;
 }
 
+ if (btf_kind == BTF_KIND_ENUM)
+   {
+ btf_kflag = (dtd->flags & CTF_ENUM_F_ENUMERATORS_SIGNED)
+   ? BTF_KF_ENUM_SIGNED
+   : BTF_KF_ENUM_UNSIGNED;
+ if (dtd->dtd_data.ctti_size == 0x8)
+   btf_kind = BTF_KIND_ENUM64;
+   }
+
   dw2_asm_output_data (4, dtd->dtd_data.ctti_name, "btt_name");
   dw2_asm_output_data (4, BTF_TYPE_INFO (btf_kind, btf_kflag, btf_vlen),
   "btt_info: kind=%u, kflag=%u, vlen=%u",
@@ -634,6 +645,7 @@ btf_asm_type (ctf_container_ref ctfc, ctf_dtdef_ref dtd)
 case BTF_KIND_UNION:
 case BTF_KIND_ENUM:
 case BTF_KIND_DATASEC:
+case BTF_KIND_ENUM64:
   dw2_asm_output_data (4, dtd->dtd_data.ctti_size, "btt_size: %uB",
   dtd->dtd_data.ctti_size);
   return;
@@ -707,13 +719,13 @@ btf_asm_sou_member (ctf_container_ref ctfc, ctf_dmdef_t * 
dmd)
 }
 }
 
-/* Asm'out an enum constant following a BTF_KIND_ENUM.  */
+/* Asm'out an enum constant following a BTF_KIND_ENUM{,64}.  */
 
 static void
-btf_asm_enum_const (ctf_dmdef_t * dmd)
+btf_asm_enum_const (unsigned int size, ctf_dmdef_t * dmd)
 {
   dw2_asm_output_data (4, dmd->dmd_name_offset, "bte_name");
-  dw2_asm_output_data (4, dmd->dmd_value, "bte_value");
+  dw2_asm_output_data (size, dmd->dmd_value, "bte_value");
 }
 
 /* Asm'out a function parameter description following a BTF_KIND_FUNC_PROTO.  
*/
@@ -871,7 +883,7 @@ output_asm_btf_sou_fields (ctf_container_ref ctfc, 
ctf_dtdef_ref dtd)
   btf_asm_sou_member (ctfc, 

Re: [Patch] OpenMP/Fortran: Permit end-clause on directive

2022-08-29 Thread Harald Anlauf via Gcc-patches

Hi Tobias,

this is not really a review, but:

Am 26.08.22 um 20:21 schrieb Tobias Burnus:

I did run into some issues related to this; those turned out to be
unrelated, but I end ended up implementing this feature.

Side remark: 'omp parallel workshare' seems to actually permit 'nowait'
now, but I guess that's an unintended change due to the
syntax-representation change. Hence, it is now tracked as Spec Issue
3338 and I do not permit it.

OK for mainline?


Regarding testcase nowait-4.f90: it has a part that tests for many
formally correct uses, and a part that tests for many invalid nowait.
Both parts seem to be giving reasonable coverage, so I wonder whether
it would be beneficial to split this one into two subsets.

It makes sense to have fewer but larger testcases in the testsuite,
to keep the time for regtesting at bay, but I'm split here on this
one - and yes, pun intended.

Harald


Tobias
-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201,
80634 München; Gesellschaft mit beschränkter Haftung; Geschäftsführer:
Thomas Heurung, Frank Thürauf; Sitz der Gesellschaft: München;
Registergericht München, HRB 106955




[PATCH] A == 0 ? A : -A same as -A (when A is 0.0)

2022-08-29 Thread Aldy Hernandez via Gcc-patches
The upcoming work for frange triggers a regression in
gcc.dg/tree-ssa/phi-opt-24.c.

For -O2 -fno-signed-zeros, we fail to transform the following into -A:

float f0(float A)
{
  // A == 0? A : -Asame as -A
  if (A == 0)  return A;
  return -A;
}

This is because the abs/negative match.pd pattern here:

/* abs/negative simplifications moved from fold_cond_expr_with_comparison,
   Need to handle (A - B) case as fold_cond_expr_with_comparison does.
   Need to handle UN* comparisons.
   ...
   ...

Sees IL that has the 0.0 propagated.

Instead of:

   [local count: 1073741824]:
  if (A_2(D) == 0.0)
goto ; [34.00%]
  else
goto ; [66.00%]

   [local count: 708669601]:
  _3 = -A_2(D);

   [local count: 1073741824]:
  # _1 = PHI 

It now sees:

   [local count: 1073741824]:
  # _1 = PHI <0.0(2), _3(3)>

which it leaves untouched, causing the if conditional to survive.

Adding a variant to the pattern that for real_zerop fixes the problem.

I am a match.pd weenie, and am avoiding touching more patterns that
may also benefit from handling real constants.  So please use simple
words if what I'm doing isn't correct ;-).

I did not include a testcase, as it's just phi-opt-24.c which will get
triggered when I commit the frange with endpoints work.

Tested on x86-64 & ppc64le Linux.

OK?

gcc/ChangeLog:

* match.pd ((cmp @0 zerop) real_zerop (negate@1 @0)): Add variant
for real zero.
---
 gcc/match.pd | 4 
 1 file changed, 4 insertions(+)

diff --git a/gcc/match.pd b/gcc/match.pd
index 1bb936fc401..5bdea300f94 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -4803,6 +4803,10 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
(cnd (cmp @0 zerop) @0 (negate@1 @0))
 (if (!HONOR_SIGNED_ZEROS (type))
  @1))
+  (simplify
+   (cnd (cmp @0 zerop) real_zerop (negate@1 @0))
+(if (!HONOR_SIGNED_ZEROS (type))
+ @1))
   (simplify
(cnd (cmp @0 zerop) integer_zerop (negate@1 @0))
 (if (!HONOR_SIGNED_ZEROS (type))
-- 
2.37.1



Re: [PATCH v2] bpf: handle anonymous members in CO-RE reloc [PR106745]

2022-08-29 Thread Jose E. Marchesi via Gcc-patches


> [changes from v1: simplify the new conditional logic as suggested.]
>
> The old method for computing a member index for a CO-RE relocation
> relied on a name comparison, which could SEGV if the member in question
> is itself part of an anonymous inner struct or union.
>
> This patch changes the index computation to not rely on a name, while
> maintaining the ability to account for other sibling fields which may
> not have a representation in BTF.
>
> Tested in bpf-unknown-none, no known regressions.
> OK?

OK, thank you.

> Thanks.
>
> gcc/ChangeLog:
>
>   PR target/106745
>   * config/bpf/coreout.cc (bpf_core_get_sou_member_index): Fix
>   computation of index for anonymous members.
>
> gcc/testsuite/ChangeLog:
>
>   PR target/106745
>   * gcc.target/bpf/core-pr106745.c: New test.
> ---
>  gcc/config/bpf/coreout.cc| 16 +++
>  gcc/testsuite/gcc.target/bpf/core-pr106745.c | 30 
>  2 files changed, 40 insertions(+), 6 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/bpf/core-pr106745.c
>
> diff --git a/gcc/config/bpf/coreout.cc b/gcc/config/bpf/coreout.cc
> index cceaaa969cc..8897a045ea1 100644
> --- a/gcc/config/bpf/coreout.cc
> +++ b/gcc/config/bpf/coreout.cc
> @@ -207,7 +207,6 @@ bpf_core_get_sou_member_index (ctf_container_ref ctfc, 
> const tree node)
>if (TREE_CODE (node) == FIELD_DECL)
>  {
>const tree container = DECL_CONTEXT (node);
> -  const char * name = IDENTIFIER_POINTER (DECL_NAME (node));
>  
>/* Lookup the CTF type info for the containing type.  */
>dw_die_ref die = lookup_type_die (container);
> @@ -222,16 +221,21 @@ bpf_core_get_sou_member_index (ctf_container_ref ctfc, 
> const tree node)
>if (kind != CTF_K_STRUCT && kind != CTF_K_UNION)
>  return -1;
>  
> +  tree field = TYPE_FIELDS (container);
>int i = 0;
>ctf_dmdef_t * dmd;
>for (dmd = dtd->dtd_u.dtu_members;
> dmd != NULL; dmd = (ctf_dmdef_t *) ctf_dmd_list_next (dmd))
>  {
> -  if (get_btf_id (dmd->dmd_type) > BTF_MAX_TYPE)
> -continue;
> -  if (strcmp (dmd->dmd_name, name) == 0)
> -return i;
> -  i++;
> +   bool field_has_btf = get_btf_id (dmd->dmd_type) <= BTF_MAX_TYPE;
> +
> +   if (field == node)
> + return field_has_btf ? i : -1;
> +
> +   if (field_has_btf)
> + i++;
> +
> +   field = DECL_CHAIN (field);
>  }
>  }
>return -1;
> diff --git a/gcc/testsuite/gcc.target/bpf/core-pr106745.c 
> b/gcc/testsuite/gcc.target/bpf/core-pr106745.c
> new file mode 100644
> index 000..9d347006a69
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/bpf/core-pr106745.c
> @@ -0,0 +1,30 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O0 -gbtf -dA -mco-re" } */
> +
> +struct weird
> +{
> +  struct
> +  {
> +int b;
> +  };
> +
> +  char x;
> +
> +  union
> +  {
> +int a;
> +int c;
> +  };
> +};
> +
> +
> +int test (struct weird *arg) {
> +  int *x = __builtin_preserve_access_index (>b);
> +  int *y = __builtin_preserve_access_index (>c);
> +
> +  return *x + *y;
> +}
> +
> +
> +/* { dg-final { scan-assembler-times "ascii \"0:0:0.0\"\[\t 
> \]+\[^\n\]*btf_aux_string" 1 } } */
> +/* { dg-final { scan-assembler-times "ascii \"0:2:1.0\"\[\t 
> \]+\[^\n\]*btf_aux_string" 1 } } */


[PATCH v2] bpf: handle anonymous members in CO-RE reloc [PR106745]

2022-08-29 Thread David Faust via Gcc-patches
[changes from v1: simplify the new conditional logic as suggested.]

The old method for computing a member index for a CO-RE relocation
relied on a name comparison, which could SEGV if the member in question
is itself part of an anonymous inner struct or union.

This patch changes the index computation to not rely on a name, while
maintaining the ability to account for other sibling fields which may
not have a representation in BTF.

Tested in bpf-unknown-none, no known regressions.
OK?

Thanks.

gcc/ChangeLog:

PR target/106745
* config/bpf/coreout.cc (bpf_core_get_sou_member_index): Fix
computation of index for anonymous members.

gcc/testsuite/ChangeLog:

PR target/106745
* gcc.target/bpf/core-pr106745.c: New test.
---
 gcc/config/bpf/coreout.cc| 16 +++
 gcc/testsuite/gcc.target/bpf/core-pr106745.c | 30 
 2 files changed, 40 insertions(+), 6 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/bpf/core-pr106745.c

diff --git a/gcc/config/bpf/coreout.cc b/gcc/config/bpf/coreout.cc
index cceaaa969cc..8897a045ea1 100644
--- a/gcc/config/bpf/coreout.cc
+++ b/gcc/config/bpf/coreout.cc
@@ -207,7 +207,6 @@ bpf_core_get_sou_member_index (ctf_container_ref ctfc, 
const tree node)
   if (TREE_CODE (node) == FIELD_DECL)
 {
   const tree container = DECL_CONTEXT (node);
-  const char * name = IDENTIFIER_POINTER (DECL_NAME (node));
 
   /* Lookup the CTF type info for the containing type.  */
   dw_die_ref die = lookup_type_die (container);
@@ -222,16 +221,21 @@ bpf_core_get_sou_member_index (ctf_container_ref ctfc, 
const tree node)
   if (kind != CTF_K_STRUCT && kind != CTF_K_UNION)
 return -1;
 
+  tree field = TYPE_FIELDS (container);
   int i = 0;
   ctf_dmdef_t * dmd;
   for (dmd = dtd->dtd_u.dtu_members;
dmd != NULL; dmd = (ctf_dmdef_t *) ctf_dmd_list_next (dmd))
 {
-  if (get_btf_id (dmd->dmd_type) > BTF_MAX_TYPE)
-continue;
-  if (strcmp (dmd->dmd_name, name) == 0)
-return i;
-  i++;
+ bool field_has_btf = get_btf_id (dmd->dmd_type) <= BTF_MAX_TYPE;
+
+ if (field == node)
+   return field_has_btf ? i : -1;
+
+ if (field_has_btf)
+   i++;
+
+ field = DECL_CHAIN (field);
 }
 }
   return -1;
diff --git a/gcc/testsuite/gcc.target/bpf/core-pr106745.c 
b/gcc/testsuite/gcc.target/bpf/core-pr106745.c
new file mode 100644
index 000..9d347006a69
--- /dev/null
+++ b/gcc/testsuite/gcc.target/bpf/core-pr106745.c
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-options "-O0 -gbtf -dA -mco-re" } */
+
+struct weird
+{
+  struct
+  {
+int b;
+  };
+
+  char x;
+
+  union
+  {
+int a;
+int c;
+  };
+};
+
+
+int test (struct weird *arg) {
+  int *x = __builtin_preserve_access_index (>b);
+  int *y = __builtin_preserve_access_index (>c);
+
+  return *x + *y;
+}
+
+
+/* { dg-final { scan-assembler-times "ascii \"0:0:0.0\"\[\t 
\]+\[^\n\]*btf_aux_string" 1 } } */
+/* { dg-final { scan-assembler-times "ascii \"0:2:1.0\"\[\t 
\]+\[^\n\]*btf_aux_string" 1 } } */
-- 
2.36.1



[COMMITTED] bpf: define __bpf__ as well as __BPF__ as a target macro

2022-08-29 Thread Jose E. Marchesi via Gcc-patches


LLVM defines both __bpf__ and __BPF_ as target macros.
GCC was defining only __BPF__.

This patch defines __bpf__ as a target macro for BPF.
Tested in bpf-unknown-none.

gcc/ChangeLog:

* config/bpf/bpf.cc (bpf_target_macros): Define __bpf__ as a
target macro.
---
 gcc/config/bpf/bpf.cc | 1 +
 1 file changed, 1 insertion(+)

diff --git a/gcc/config/bpf/bpf.cc b/gcc/config/bpf/bpf.cc
index 7e37e080808..9cb56cfb287 100644
--- a/gcc/config/bpf/bpf.cc
+++ b/gcc/config/bpf/bpf.cc
@@ -291,6 +291,7 @@ void
 bpf_target_macros (cpp_reader *pfile)
 {
   builtin_define ("__BPF__");
+  builtin_define ("__bpf__");
 
   if (TARGET_BIG_ENDIAN)
 builtin_define ("__BPF_BIG_ENDIAN__");
-- 
2.30.2



Re: [PATCH] bpf: handle anonymous members in CO-RE reloc [PR106745]

2022-08-29 Thread David Faust via Gcc-patches



On 8/29/22 12:57, Jose E. Marchesi wrote:
> 
> Hi David.
> 
>> The old method for computing a member index for a CO-RE relocation
>> relied on a name comparison, which could SEGV if the member in question
>> is itself part of an anonymous inner struct or union.
>>
>> This patch changes the index computation to not rely on a name, while
>> maintaining the ability to account for other sibling fields which may
>> not have a representation in BTF.
>>
>> Tested in bpf-unknown-none, no known regressions.
>> OK?
>>
>> Thanks.
>>
>> gcc/ChangeLog:
>>
>>  PR target/106745
>>  * config/bpf/coreout.cc (bpf_core_get_sou_member_index): Fix
>>  computation of index for anonymous members.
>>
>> gcc/testsuite/ChangeLog:
>>
>>  PR target/106745
>>  * gcc.target/bpf/core-pr106745.c: New test.
>> ---
>>  gcc/config/bpf/coreout.cc| 19 +
>>  gcc/testsuite/gcc.target/bpf/core-pr106745.c | 30 
>>  2 files changed, 44 insertions(+), 5 deletions(-)
>>  create mode 100644 gcc/testsuite/gcc.target/bpf/core-pr106745.c
>>
>> diff --git a/gcc/config/bpf/coreout.cc b/gcc/config/bpf/coreout.cc
>> index cceaaa969cc..caad4380fa1 100644
>> --- a/gcc/config/bpf/coreout.cc
>> +++ b/gcc/config/bpf/coreout.cc
>> @@ -207,7 +207,6 @@ bpf_core_get_sou_member_index (ctf_container_ref ctfc, 
>> const tree node)
>>if (TREE_CODE (node) == FIELD_DECL)
>>  {
>>const tree container = DECL_CONTEXT (node);
>> -  const char * name = IDENTIFIER_POINTER (DECL_NAME (node));
>>  
>>/* Lookup the CTF type info for the containing type.  */
>>dw_die_ref die = lookup_type_die (container);
>> @@ -222,16 +221,26 @@ bpf_core_get_sou_member_index (ctf_container_ref ctfc, 
>> const tree node)
>>if (kind != CTF_K_STRUCT && kind != CTF_K_UNION)
>>  return -1;
>>  
>> +  tree field = TYPE_FIELDS (container);
>>int i = 0;
>>ctf_dmdef_t * dmd;
>>for (dmd = dtd->dtd_u.dtu_members;
>> dmd != NULL; dmd = (ctf_dmdef_t *) ctf_dmd_list_next (dmd))
>>  {
>>if (get_btf_id (dmd->dmd_type) > BTF_MAX_TYPE)
>> -continue;
>> -  if (strcmp (dmd->dmd_name, name) == 0)
>> -return i;
>> -  i++;
>> +{
>> +  /* This field does not have a BTF representation.  */
>> +  if (field == node)
>> +return -1;
>> +}
>> +  else
>> +{
>> +  if (field == node)
>> +return i;
>> +  i++;
>> +}
>> +
>> +  field = DECL_CHAIN (field);
>>  }
> 
> I find the logic of the new conditional a little difficult to follow.
> What about something like this instead:
> 
> for (dmd = dtd->dtd_u.dtu_members;
>  dmd != NULL; dmd = (ctf_dmdef_t *) ctf_dmd_list_next (dmd))
>   {
> bool field_has_btf = get_btf_id (dmd->dmd_type) <= BTF_MAX_TYPE;
> 
> if (field == node)
>   return field_has_btf ? i : -1;
> 
> if (field_has_btf)
>   i++;
> field = DECL_CHAIN (field);
>   }
> 
> WDYT?

Thanks, that is certainly easier to follow.

v2 coming shortly.


> 
>>  }
>>return -1;
>> diff --git a/gcc/testsuite/gcc.target/bpf/core-pr106745.c 
>> b/gcc/testsuite/gcc.target/bpf/core-pr106745.c
>> new file mode 100644
>> index 000..9d347006a69
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/bpf/core-pr106745.c
>> @@ -0,0 +1,30 @@
>> +/* { dg-do compile } */
>> +/* { dg-options "-O0 -gbtf -dA -mco-re" } */
>> +
>> +struct weird
>> +{
>> +  struct
>> +  {
>> +int b;
>> +  };
>> +
>> +  char x;
>> +
>> +  union
>> +  {
>> +int a;
>> +int c;
>> +  };
>> +};
>> +
>> +
>> +int test (struct weird *arg) {
>> +  int *x = __builtin_preserve_access_index (>b);
>> +  int *y = __builtin_preserve_access_index (>c);
>> +
>> +  return *x + *y;
>> +}
>> +
>> +
>> +/* { dg-final { scan-assembler-times "ascii \"0:0:0.0\"\[\t 
>> \]+\[^\n\]*btf_aux_string" 1 } } */
>> +/* { dg-final { scan-assembler-times "ascii \"0:2:1.0\"\[\t 
>> \]+\[^\n\]*btf_aux_string" 1 } } */


Re: [PATCH] Always default to DWARF2_DEBUG if not specified, warn about deprecated STABS

2022-08-29 Thread Jan-Benedict Glaw
Hi Jeff!

On Sun, 2022-08-28 15:32:53 -0600, Jeff Law via Gcc-patches 
 wrote:
> On 8/28/2022 1:50 AM, Jan-Benedict Glaw wrote:
> > On Tue, 2021-09-21 16:25:19 +0200, Richard Biener via Gcc-patches 
> >  wrote:
> > > This makes defaults.h choose DWARF2_DEBUG if PREFERRED_DEBUGGING_TYPE
> > > is not specified by the target and errors out if DWARF DWARF is not 
> > > supported.
> > While I think the pdp11 bits arreved, the rest did not (yet). Just
> > checked my auto-builder logs. When building current HEAD as
> > 
> > ../gcc/configure --prefix=... --enable-werror-always \
> > --enable-languages=all --disable-gcov \
> > --disable-shared --disable-threads --without-headers \
> > --target=...
> > make V=1 all-gcc
> > 
> > ALL of these targets won't build right now:
[...]
> Umm, most of those -elf targets do build.  See:
> 
> http://law-sandy.freeddns.org:8080

Another builder. :)  Randomly picking xtensa-elf, you're configuring
as

+ ../../gcc/configure --disable-analyzer --with-system-libunwind
--with-newlib --without-headers --disable-threads --disable-shared
--enable-languages=c,c++
--prefix=/home/jlaw/jenkins/workspace/xtensa-elf/xtensa-elf-obj/gcc/../../xtensa-elf-installed
--target=xtensa-elf

I guess the main difference that lets my builds fail might be
--enable-languages=all (vs. c,c++ in your case.)

Maybe you'd give that a try? (...and I'll trigger a build with just
c,c++ on my builder.)

MfG, JBG

-- 


signature.asc
Description: PGP signature


[PATCH v2] c++: Fix C++11 attribute propagation [PR106712]

2022-08-29 Thread Marek Polacek via Gcc-patches
On Mon, Aug 29, 2022 at 01:32:29PM -0400, Jason Merrill wrote:
> On 8/26/22 19:01, Marek Polacek wrote:
> > When we have
> > 
> >[[noreturn]] int fn1 [[nodiscard]](), fn2();
> > 
> > "noreturn" should apply to both fn1 and fn2 but "nodiscard" only to fn1:
> > [dcl.pre]/3: "The attribute-specifier-seq appertains to each of
> > the entities declared by the declarators of the init-declarator-list."
> > [dcl.spec.general]: "The attribute-specifier-seq affects the type
> > only for the declaration it appears in, not other declarations involving
> > the same type."
> > 
> > As Ed Catmur correctly analyzed, this is because, for the test above,
> > we call start_decl with prefix_attributes=noreturn, but this line:
> > 
> >attributes = attr_chainon (attributes, prefix_attributes);
> > 
> > results in attributes == prefix_attributes, because chainon sees
> > that attributes is null so it just returns prefix_attributes.  Then
> > in grokdeclarator we reach
> > 
> >*attrlist = attr_chainon (*attrlist, declarator->std_attributes);
> > 
> > which modifies prefix_attributes so now it's "noreturn, nodiscard"
> > and so fn2 is wrongly marked nodiscard as well.  Fixed by copying
> > prefix_attributes.
> 
> How about reversing the order of arguments to the call in grokdeclarator, so
> that the prefix attributes can remain shared at the end of the list?

Thanks, that seems like a cheaper solution.  It works because this way
we tack the prefix attributes onto ->std_attributes, avoiding modifying
prefix_attributes.

Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?

-- >8 --
When we have

  [[noreturn]] int fn1 [[nodiscard]](), fn2();

"noreturn" should apply to both fn1 and fn2 but "nodiscard" only to fn1:
[dcl.pre]/3: "The attribute-specifier-seq appertains to each of
the entities declared by the declarators of the init-declarator-list."
[dcl.spec.general]: "The attribute-specifier-seq affects the type
only for the declaration it appears in, not other declarations involving
the same type."

As Ed Catmur correctly analyzed, this is because, for the test above,
we call start_decl with prefix_attributes=noreturn, but this line:

  attributes = attr_chainon (attributes, prefix_attributes);

results in attributes == prefix_attributes, because chainon sees
that attributes is null so it just returns prefix_attributes.  Then
in grokdeclarator we reach

  *attrlist = attr_chainon (*attrlist, declarator->std_attributes);

which modifies prefix_attributes so now it's "noreturn, nodiscard"
and so fn2 is wrongly marked nodiscard as well.  Fixed by reversing
the order of arguments to attr_chainon.  That way, we tack the prefix
attributes onto ->std_attributes, avoiding modifying prefix_attributes.

PR c++/106712

gcc/cp/ChangeLog:

* decl.cc (grokdeclarator): Reverse the order of arguments to
attr_chainon.

gcc/testsuite/ChangeLog:

* g++.dg/cpp0x/gen-attrs-77.C: New test.
---
 gcc/cp/decl.cc|  2 +-
 gcc/testsuite/g++.dg/cpp0x/gen-attrs-77.C | 17 +
 2 files changed, 18 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/g++.dg/cpp0x/gen-attrs-77.C

diff --git a/gcc/cp/decl.cc b/gcc/cp/decl.cc
index d46a347a6c7..b72b2a8456b 100644
--- a/gcc/cp/decl.cc
+++ b/gcc/cp/decl.cc
@@ -13474,7 +13474,7 @@ grokdeclarator (const cp_declarator *declarator,
   /* [dcl.meaning]/1: The optional attribute-specifier-seq following
 a declarator-id appertains to the entity that is declared.  */
   if (declarator->std_attributes != error_mark_node)
-   *attrlist = attr_chainon (*attrlist, declarator->std_attributes);
+   *attrlist = attr_chainon (declarator->std_attributes, *attrlist);
   else
/* We should have already diagnosed the issue (c++/78344).  */
gcc_assert (seen_error ());
diff --git a/gcc/testsuite/g++.dg/cpp0x/gen-attrs-77.C 
b/gcc/testsuite/g++.dg/cpp0x/gen-attrs-77.C
new file mode 100644
index 000..2c41c62f33b
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp0x/gen-attrs-77.C
@@ -0,0 +1,17 @@
+// PR c++/106712
+// { dg-do compile { target c++11 } }
+
+[[noreturn]] int f1 [[nodiscard]](), f2 ();
+[[nodiscard]] int f3 (), f4 ();
+int f5 [[nodiscard]](), f6 ();
+
+int
+main ()
+{
+  f1 (); // { dg-warning "ignoring" }
+  f2 ();
+  f3 (); // { dg-warning "ignoring" }
+  f4 (); // { dg-warning "ignoring" }
+  f5 (); // { dg-warning "ignoring" }
+  f6 ();
+}

base-commit: 60d1d296b42b22b08d4eaa38bea02100c07261ac
-- 
2.37.2



Re: [PATCH] bpf: handle anonymous members in CO-RE reloc [PR106745]

2022-08-29 Thread Jose E. Marchesi via Gcc-patches


Hi David.

> The old method for computing a member index for a CO-RE relocation
> relied on a name comparison, which could SEGV if the member in question
> is itself part of an anonymous inner struct or union.
>
> This patch changes the index computation to not rely on a name, while
> maintaining the ability to account for other sibling fields which may
> not have a representation in BTF.
>
> Tested in bpf-unknown-none, no known regressions.
> OK?
>
> Thanks.
>
> gcc/ChangeLog:
>
>   PR target/106745
>   * config/bpf/coreout.cc (bpf_core_get_sou_member_index): Fix
>   computation of index for anonymous members.
>
> gcc/testsuite/ChangeLog:
>
>   PR target/106745
>   * gcc.target/bpf/core-pr106745.c: New test.
> ---
>  gcc/config/bpf/coreout.cc| 19 +
>  gcc/testsuite/gcc.target/bpf/core-pr106745.c | 30 
>  2 files changed, 44 insertions(+), 5 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/bpf/core-pr106745.c
>
> diff --git a/gcc/config/bpf/coreout.cc b/gcc/config/bpf/coreout.cc
> index cceaaa969cc..caad4380fa1 100644
> --- a/gcc/config/bpf/coreout.cc
> +++ b/gcc/config/bpf/coreout.cc
> @@ -207,7 +207,6 @@ bpf_core_get_sou_member_index (ctf_container_ref ctfc, 
> const tree node)
>if (TREE_CODE (node) == FIELD_DECL)
>  {
>const tree container = DECL_CONTEXT (node);
> -  const char * name = IDENTIFIER_POINTER (DECL_NAME (node));
>  
>/* Lookup the CTF type info for the containing type.  */
>dw_die_ref die = lookup_type_die (container);
> @@ -222,16 +221,26 @@ bpf_core_get_sou_member_index (ctf_container_ref ctfc, 
> const tree node)
>if (kind != CTF_K_STRUCT && kind != CTF_K_UNION)
>  return -1;
>  
> +  tree field = TYPE_FIELDS (container);
>int i = 0;
>ctf_dmdef_t * dmd;
>for (dmd = dtd->dtd_u.dtu_members;
> dmd != NULL; dmd = (ctf_dmdef_t *) ctf_dmd_list_next (dmd))
>  {
>if (get_btf_id (dmd->dmd_type) > BTF_MAX_TYPE)
> -continue;
> -  if (strcmp (dmd->dmd_name, name) == 0)
> -return i;
> -  i++;
> + {
> +   /* This field does not have a BTF representation.  */
> +   if (field == node)
> + return -1;
> + }
> +   else
> + {
> +   if (field == node)
> + return i;
> +   i++;
> + }
> +
> +   field = DECL_CHAIN (field);
>  }

I find the logic of the new conditional a little difficult to follow.
What about something like this instead:

for (dmd = dtd->dtd_u.dtu_members;
 dmd != NULL; dmd = (ctf_dmdef_t *) ctf_dmd_list_next (dmd))
  {
bool field_has_btf = get_btf_id (dmd->dmd_type) <= BTF_MAX_TYPE;

if (field == node)
  return field_has_btf ? i : -1;

if (field_has_btf)
  i++;
field = DECL_CHAIN (field);
  }

WDYT?

>  }
>return -1;
> diff --git a/gcc/testsuite/gcc.target/bpf/core-pr106745.c 
> b/gcc/testsuite/gcc.target/bpf/core-pr106745.c
> new file mode 100644
> index 000..9d347006a69
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/bpf/core-pr106745.c
> @@ -0,0 +1,30 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O0 -gbtf -dA -mco-re" } */
> +
> +struct weird
> +{
> +  struct
> +  {
> +int b;
> +  };
> +
> +  char x;
> +
> +  union
> +  {
> +int a;
> +int c;
> +  };
> +};
> +
> +
> +int test (struct weird *arg) {
> +  int *x = __builtin_preserve_access_index (>b);
> +  int *y = __builtin_preserve_access_index (>c);
> +
> +  return *x + *y;
> +}
> +
> +
> +/* { dg-final { scan-assembler-times "ascii \"0:0:0.0\"\[\t 
> \]+\[^\n\]*btf_aux_string" 1 } } */
> +/* { dg-final { scan-assembler-times "ascii \"0:2:1.0\"\[\t 
> \]+\[^\n\]*btf_aux_string" 1 } } */


Re: [PATCH] [ranger] x == -0.0 does not mean we can replace x with -0.0

2022-08-29 Thread Toon Moene

On 8/29/22 19:07, Jeff Law via Gcc-patches wrote:

One of the more interesting ones is to try to limit the range of the 
input to the trigonometric functions - that way you could use ones 
without any argument reduction phase ...


The difficult part is that most of the trig stuff is in libraries, so we 
don't have visibility into the full range.


What we do sometimes have is knowledge that the special values are 
already handled which allows us to do things like automatically 
transform a division into estimation + NR correction steps (atan2).


I guess we could do specialization based on the input range.  So rather 
than calling "sin" we could call a special one that didn't have the 
reduction step when we know the input value is in a sensible range.


Exactly. It's probably not that hard to have sin/cos/tan with a special 
entry point that foregoes the whole argument reduction step.


In every weather forecast, you have to compute the local solar height 
(to get the effects of solar radiation correct) every time step, in 
every grid point.


You *know* that angle is between 0 and 90 degrees, as are all the angles 
that go into that computation (latitude, longitude (and time [hour of 
the day, day of the year]).


--
Toon Moene - e-mail: t...@moene.org - phone: +31 346 214290
Saturnushof 14, 3738 XG  Maartensdijk, The Netherlands



[PATCH] bpf: handle anonymous members in CO-RE reloc [PR106745]

2022-08-29 Thread David Faust via Gcc-patches
The old method for computing a member index for a CO-RE relocation
relied on a name comparison, which could SEGV if the member in question
is itself part of an anonymous inner struct or union.

This patch changes the index computation to not rely on a name, while
maintaining the ability to account for other sibling fields which may
not have a representation in BTF.

Tested in bpf-unknown-none, no known regressions.
OK?

Thanks.

gcc/ChangeLog:

PR target/106745
* config/bpf/coreout.cc (bpf_core_get_sou_member_index): Fix
computation of index for anonymous members.

gcc/testsuite/ChangeLog:

PR target/106745
* gcc.target/bpf/core-pr106745.c: New test.
---
 gcc/config/bpf/coreout.cc| 19 +
 gcc/testsuite/gcc.target/bpf/core-pr106745.c | 30 
 2 files changed, 44 insertions(+), 5 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/bpf/core-pr106745.c

diff --git a/gcc/config/bpf/coreout.cc b/gcc/config/bpf/coreout.cc
index cceaaa969cc..caad4380fa1 100644
--- a/gcc/config/bpf/coreout.cc
+++ b/gcc/config/bpf/coreout.cc
@@ -207,7 +207,6 @@ bpf_core_get_sou_member_index (ctf_container_ref ctfc, 
const tree node)
   if (TREE_CODE (node) == FIELD_DECL)
 {
   const tree container = DECL_CONTEXT (node);
-  const char * name = IDENTIFIER_POINTER (DECL_NAME (node));
 
   /* Lookup the CTF type info for the containing type.  */
   dw_die_ref die = lookup_type_die (container);
@@ -222,16 +221,26 @@ bpf_core_get_sou_member_index (ctf_container_ref ctfc, 
const tree node)
   if (kind != CTF_K_STRUCT && kind != CTF_K_UNION)
 return -1;
 
+  tree field = TYPE_FIELDS (container);
   int i = 0;
   ctf_dmdef_t * dmd;
   for (dmd = dtd->dtd_u.dtu_members;
dmd != NULL; dmd = (ctf_dmdef_t *) ctf_dmd_list_next (dmd))
 {
   if (get_btf_id (dmd->dmd_type) > BTF_MAX_TYPE)
-continue;
-  if (strcmp (dmd->dmd_name, name) == 0)
-return i;
-  i++;
+   {
+ /* This field does not have a BTF representation.  */
+ if (field == node)
+   return -1;
+   }
+ else
+   {
+ if (field == node)
+   return i;
+ i++;
+   }
+
+ field = DECL_CHAIN (field);
 }
 }
   return -1;
diff --git a/gcc/testsuite/gcc.target/bpf/core-pr106745.c 
b/gcc/testsuite/gcc.target/bpf/core-pr106745.c
new file mode 100644
index 000..9d347006a69
--- /dev/null
+++ b/gcc/testsuite/gcc.target/bpf/core-pr106745.c
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-options "-O0 -gbtf -dA -mco-re" } */
+
+struct weird
+{
+  struct
+  {
+int b;
+  };
+
+  char x;
+
+  union
+  {
+int a;
+int c;
+  };
+};
+
+
+int test (struct weird *arg) {
+  int *x = __builtin_preserve_access_index (>b);
+  int *y = __builtin_preserve_access_index (>c);
+
+  return *x + *y;
+}
+
+
+/* { dg-final { scan-assembler-times "ascii \"0:0:0.0\"\[\t 
\]+\[^\n\]*btf_aux_string" 1 } } */
+/* { dg-final { scan-assembler-times "ascii \"0:2:1.0\"\[\t 
\]+\[^\n\]*btf_aux_string" 1 } } */
-- 
2.36.1



Re: [Patch][2/3][v2] nvptx: libgomp+mkoffload.cc: Prepare for reverse offload fn lookup

2022-08-29 Thread Tobias Burnus

Slightly revised version, fixing some issues in mkoffload.cc. Otherwise, the 
same applies:

On 25.08.22 19:30, Tobias Burnus wrote:
On 25.08.22 16:54, Tobias Burnus wrote:

The attached patch prepare for reverse-offload device->host
function-address lookup by requesting (if needed) the on-device address.


This patch adds the actual implementation for NVPTX.

Having  array[] = {fn1,fn2};  works with nvptx only since sm_35; hence,
if there is a reverse_offload and sm_30 is used, there will be a compile-time
error.

To avoid incompatibilities, I compile with the same PTX ISA .version and
sm_XX version as the (last) file that contains the reverse offload. While
it should not matter, some newer CUDA might not support, e.g., sm_35 or
do not like a specific ISA version - thus, that seemed to be safer.

This is currently effectively a no op as with [1/3] patch, always NULL
is passed and as GOMP_OFFLOAD_get_num_devices returns <= 0 as soon as
'omp requires reverse_offload' has been specified.

OK for mainline?


Tobias

-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955
nvptx: libgomp+mkoffload.cc: Prepare for reverse offload fn lookup

Add support to nvptx for reverse lookup of function name to prepare for
'omp target device(ancestor:1)'.

gcc/ChangeLog:

	* config/nvptx/mkoffload.cc (struct id_map): Add 'dim' member.
	(record_id): Store func name without quotes, store dim separately.
	(process): For GOMP_REQUIRES_REVERSE_OFFLOAD, check that -march is
	at least sm_35, create '$offload_func_table' global array and init
	with reverse-offload function addresses.
	* config/nvptx/nvptx.cc (write_fn_proto_1, write_fn_proto): New
	force_public attribute to force .visible.
	(nvptx_declare_function_name): For "omp target
	device_ancestor_nohost" attribut, force .visible/TREE_PUBLIC.

libgomp/ChangeLog:

	* plugin/plugin-nvptx.c (GOMP_OFFLOAD_load_image): Read offload
	function address table '$offload_func_table' if rev_fn_table
	is not NULL.

 gcc/config/nvptx/mkoffload.cc | 119 --
 gcc/config/nvptx/nvptx.cc |  20 +---
 libgomp/plugin/plugin-nvptx.c |  19 +++-
 3 files changed, 146 insertions(+), 12 deletions(-)

diff --git a/gcc/config/nvptx/mkoffload.cc b/gcc/config/nvptx/mkoffload.cc
index 3eea0a8f138..834b2059aac 100644
--- a/gcc/config/nvptx/mkoffload.cc
+++ b/gcc/config/nvptx/mkoffload.cc
@@ -47,6 +47,7 @@ struct id_map
 {
   id_map *next;
   char *ptx_name;
+  char *dim;
 };
 
 static id_map *func_ids, **funcs_tail = _ids;
@@ -108,8 +109,11 @@ xputenv (const char *string)
 static void
 record_id (const char *p1, id_map ***where)
 {
-  const char *end = strchr (p1, '\n');
-  if (!end)
+  gcc_assert (p1[0] == '"');
+  p1++;
+  const char *end = strchr (p1, '"');
+  const char *end2 = strchr (p1, '\n');
+  if (!end || !end2 || end >= end2)
 fatal_error (input_location, "malformed ptx file");
 
   id_map *v = XNEW (id_map);
@@ -117,6 +121,16 @@ record_id (const char *p1, id_map ***where)
   v->ptx_name = XNEWVEC (char, len + 1);
   memcpy (v->ptx_name, p1, len);
   v->ptx_name[len] = '\0';
+  p1 = end + 1;
+  if (*end != '\n')
+{
+  len = end2 - p1;
+  v->dim = XNEWVEC (char, len + 1);
+  memcpy (v->dim, p1, len);
+  v->dim[len] = '\0';
+}
+  else
+v->dim = NULL;
   v->next = NULL;
   id_map **tail = *where;
   *tail = v;
@@ -242,6 +256,10 @@ process (FILE *in, FILE *out, uint32_t omp_requires)
   id_map const *id;
   unsigned obj_count = 0;
   unsigned ix;
+  const char *sm_ver = NULL, *version = NULL;
+  const char *sm_ver2 = NULL, *version2 = NULL;
+  size_t file_cnt = 0;
+  size_t *file_idx = XALLOCAVEC (size_t, len);
 
   fprintf (out, "#include \n\n");
 
@@ -250,6 +268,8 @@ process (FILE *in, FILE *out, uint32_t omp_requires)
   for (size_t i = 0; i != len;)
 {
   char c;
+  bool output_fn_ptr = false;
+  file_idx[file_cnt++] = i;
 
   fprintf (out, "static const char ptx_code_%u[] =\n\t\"", obj_count++);
   while ((c = input[i++]))
@@ -261,6 +281,16 @@ process (FILE *in, FILE *out, uint32_t omp_requires)
 	case '\n':
 	  fprintf (out, "\\n\"\n\t\"");
 	  /* Look for mappings on subsequent lines.  */
+	  if (UNLIKELY (startswith (input + i, ".target sm_")))
+		{
+		  sm_ver = input + i + strlen (".target sm_");
+		  continue;
+		}
+	  if (UNLIKELY (startswith (input + i, ".version ")))
+		{
+		  version = input + i + strlen (".version ");
+		  continue;
+		}
 	  while (startswith (input + i, "//:"))
 		{
 		  i += 3;
@@ -268,7 +298,10 @@ process (FILE *in, FILE *out, uint32_t omp_requires)
 		  if (startswith (input + i, "VAR_MAP "))
 		record_id (input + i + 8, _tail);
 		  else if (startswith (input + i, "FUNC_MAP "))
-		record_id (input + i + 9, _tail);
+		{

Re: [PATCH] [ranger] x == -0.0 does not mean we can replace x with -0.0

2022-08-29 Thread Koning, Paul via Gcc-patches



> On Aug 29, 2022, at 1:07 PM, Jeff Law via Gcc-patches 
>  wrote:
> 
> ...
> I guess we could do specialization based on the input range.  So rather than 
> calling "sin" we could call a special one that didn't have the reduction step 
> when we know the input value is in a sensible range.

There's some precedent for that, though for a somewhat different reason: 
functions like "log1p".  And in fact, it would make sense for the optimizer to 
transform log calls into log1p calls when the range is known to be right for 
doing so.

paul



Re: [PATCH] c++: Fix C++11 attribute propagation [PR106712]

2022-08-29 Thread Jason Merrill via Gcc-patches

On 8/26/22 19:01, Marek Polacek wrote:

When we have

   [[noreturn]] int fn1 [[nodiscard]](), fn2();

"noreturn" should apply to both fn1 and fn2 but "nodiscard" only to fn1:
[dcl.pre]/3: "The attribute-specifier-seq appertains to each of
the entities declared by the declarators of the init-declarator-list."
[dcl.spec.general]: "The attribute-specifier-seq affects the type
only for the declaration it appears in, not other declarations involving
the same type."

As Ed Catmur correctly analyzed, this is because, for the test above,
we call start_decl with prefix_attributes=noreturn, but this line:

   attributes = attr_chainon (attributes, prefix_attributes);

results in attributes == prefix_attributes, because chainon sees
that attributes is null so it just returns prefix_attributes.  Then
in grokdeclarator we reach

   *attrlist = attr_chainon (*attrlist, declarator->std_attributes);

which modifies prefix_attributes so now it's "noreturn, nodiscard"
and so fn2 is wrongly marked nodiscard as well.  Fixed by copying
prefix_attributes.


How about reversing the order of arguments to the call in 
grokdeclarator, so that the prefix attributes can remain shared at the 
end of the list?



Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?

PR c++/106712

gcc/cp/ChangeLog:

* decl.cc (start_decl): Copy prefix_attributes.

gcc/testsuite/ChangeLog:

* g++.dg/cpp0x/gen-attrs-77.C: New test.
---
  gcc/cp/decl.cc|  3 ++-
  gcc/testsuite/g++.dg/cpp0x/gen-attrs-77.C | 17 +
  2 files changed, 19 insertions(+), 1 deletion(-)
  create mode 100644 gcc/testsuite/g++.dg/cpp0x/gen-attrs-77.C

diff --git a/gcc/cp/decl.cc b/gcc/cp/decl.cc
index d46a347a6c7..9fa80b926d5 100644
--- a/gcc/cp/decl.cc
+++ b/gcc/cp/decl.cc
@@ -5566,7 +5566,8 @@ start_decl (const cp_declarator *declarator,
*pushed_scope_p = NULL_TREE;
  
if (prefix_attributes != error_mark_node)

-attributes = attr_chainon (attributes, prefix_attributes);
+/* Don't let grokdeclarator modify prefix_attributes.  */
+attributes = attr_chainon (attributes, copy_list (prefix_attributes));
  
decl = grokdeclarator (declarator, declspecs, NORMAL, initialized,

 );
diff --git a/gcc/testsuite/g++.dg/cpp0x/gen-attrs-77.C 
b/gcc/testsuite/g++.dg/cpp0x/gen-attrs-77.C
new file mode 100644
index 000..2c41c62f33b
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp0x/gen-attrs-77.C
@@ -0,0 +1,17 @@
+// PR c++/106712
+// { dg-do compile { target c++11 } }
+
+[[noreturn]] int f1 [[nodiscard]](), f2 ();
+[[nodiscard]] int f3 (), f4 ();
+int f5 [[nodiscard]](), f6 ();
+
+int
+main ()
+{
+  f1 (); // { dg-warning "ignoring" }
+  f2 ();
+  f3 (); // { dg-warning "ignoring" }
+  f4 (); // { dg-warning "ignoring" }
+  f5 (); // { dg-warning "ignoring" }
+  f6 ();
+}

base-commit: 390f94eee1ae694901f896ac45bfb148f8126baa




Re: [PATCH] [ranger] x == -0.0 does not mean we can replace x with -0.0

2022-08-29 Thread Jeff Law via Gcc-patches




On 8/29/2022 9:13 AM, Toon Moene wrote:

On 8/29/22 17:08, Jeff Law via Gcc-patches wrote:


However, I'm hoping to forget as many floating point details, as fast
as possible, as soon as I can ;-).


Actually FP isn't that bad -- I'd largely avoided it for decades, but 
didn't have a choice earlier this year.  And there's a lot more 
headroom for improvements in the FP space than the integer space IMHO.


One of the more interesting ones is to try to limit the range of the 
input to the trigonometric functions - that way you could use ones 
without any argument reduction phase ...
The difficult part is that most of the trig stuff is in libraries, so we 
don't have visibility into the full range.


What we do sometimes have is knowledge that the special values are 
already handled which allows us to do things like automatically 
transform a division into estimation + NR correction steps (atan2).


I guess we could do specialization based on the input range.  So rather 
than calling "sin" we could call a special one that didn't have the 
reduction step when we know the input value is in a sensible range.


Jeff



Re: [PATCH 2/2] vec: Add array_slice::bsearch

2022-08-29 Thread Martin Jambor
Hello,

On Sat, Aug 27 2022, Richard Biener wrote:
>> Am 26.08.2022 um 23:45 schrieb Martin Jambor :
>>
>> Hi,
>>
>>> On Fri, Aug 26 2022, Richard Sandiford wrote:
>>> Richard Biener  writes:
> Am 26.08.2022 um 18:40 schrieb Martin Jambor :
>
> Hi,
>
> This adds a method to binary search in a sorted array_slice.
>
> The implementation is direct copy of vec:bsearch.  Moreover, to only
> copy it once and not twice, I used const_cast in the non-const
> variants to be able to use the const variants.  I hope that is
> acceptable abuse of const_cast but I'll be happy to change that if
> not.
>
> Bootstrapped and tested along code that actually uses it on
> x86_64-linux.  OK for trunk?

 Can you avoid the copying by using array slice bsearch from the vec<> 
 bsearch?
>>>
>>> IMO it would be better to transition to using  routines
>>> for this kind of thing (for new code).  In this case that would be
>>> std::lower_bound.
>>>
>>> Using std::lower_bound is more convenient because it avoids the void *
>>> thing (you can use lambdas to capture any number of variables instead)
>>> and because it works on subranges, not just whole ranges.
>>>
>>
>> OK, I can use std::lower_bound with simple lambdas too.  The semantics
>> of returning the first matching a criterion actually allows me to use it
>> one more time.
>
> Can you try to compare generated code?

I have had a look and the std::lower_bound is slightly less efficient,
we get 40 instructions or so instead of 28 or so, but it is mostly
because it lacks the early exit bsearch has when its comparator returns
zero and because of the final checks whether the lower bound is what we
were searching for.  But the templates and lambdas themselves do not
seem to lead to any egregious overhead.

As I wrote earlier, in one of the three searches I want to do I actually
want to look for a lower bound (in the example below the first with the
given index) and so I'd be willing to accept the trade-off.

Full story:

The data structure being searched is essentially an array of these
structures, sorted primarily by index and those with the same index by
unit_offset:

  struct GTY(()) ipa_argagg_value
  {
tree value;
unsigned unit_offset;
unsigned index : 16;
unsigned by_ref : 1;
  };


The C-way implementation:

  --
  /* Callback for bsearch and qsort to sort aggregate values.  */

  static int
  compare_agg_values (const void *a, const void *b)
  {
const ipa_argagg_value *agg1 = (const ipa_argagg_value *) a;
const ipa_argagg_value *agg2 = (const ipa_argagg_value *) b;

if (agg1->index < agg2->index)
  return -1;
if (agg1->index > agg2->index)
  return 1;

if (agg1->unit_offset < agg2->unit_offset)
  return -1;
if (agg1->unit_offset > agg2->unit_offset)
  return 1;
return 0;
  }

  const ipa_argagg_value * __attribute__((noinline))
  testfun (const array_slice ,
  int index, unsigned unit_offset)
  {
ipa_argagg_value key;
key.index = index;
key.unit_offset = unit_offset;
return elts.bsearch (, compare_agg_values);
  }
  --

The C++-ish one:

  --
  const ipa_argagg_value * __attribute__((noinline))
  testfun (const array_slice ,
  int index, unsigned unit_offset)
  {
ipa_argagg_value key;
key.index = index;
key.unit_offset = unit_offset;
const ipa_argagg_value *res
  = std::lower_bound (elts.begin (), elts.end (), key,
[] (const ipa_argagg_value ,
const ipa_argagg_value )
{
  if (elt.index < val.index)
return true;
  if (elt.index > val.index)
return false;
  if (elt.unit_offset < val.unit_offset)
return true;
  return false;
});

if (res == elts.end ()
|| res->index != index
|| res->unit_offset != unit_offset)
  res = nullptr;
return res;
  }
  --

Using GCC 12.1, the use of bsearch yields the following optimized dump:

  --
  ;; Function testfun (_Z7testfunRK11array_sliceIK16ipa_argagg_valueEij, 
funcdef_no=3449, decl_uid=129622, cgraph_uid=2433, symbol_order=2603)

  __attribute__((noinline))
  const struct ipa_argagg_value * testfun (const struct array_slice & elts, int 
index, unsigned int unit_offset)
  {
const void * p;
size_t idx;
size_t u;
size_t l;
short unsigned int _1;
const struct ipa_argagg_value * _11;
unsigned int _12;

Re: [PATCH] [ranger] x == -0.0 does not mean we can replace x with -0.0

2022-08-29 Thread Aldy Hernandez via Gcc-patches
On Mon, Aug 29, 2022 at 5:08 PM Jeff Law  wrote:
>
>
>
> On 8/29/2022 8:26 AM, Aldy Hernandez wrote:
> > On Mon, Aug 29, 2022 at 4:22 PM Jeff Law via Gcc-patches
> >  wrote:
> >>
> >>
> >> On 8/29/2022 7:31 AM, Aldy Hernandez via Gcc-patches wrote:
> >>> On Mon, Aug 29, 2022 at 3:22 PM Jakub Jelinek  wrote:
>  On Mon, Aug 29, 2022 at 03:13:21PM +0200, Aldy Hernandez wrote:
> > It seems to me we can do this optimization regardless, but then treat
> > positive and negative zero the same throughout the frange class.
> > Particularly, in frange::singleton_p().  We should never return TRUE
> > for any version of 0.0.  This will keep VRP from propagating an
> > incorrect 0.0, since all VRP does is propagate when a range is
> > provably a singleton.  Also, frange::zero_p() shall return true for
> > any version of 0.0.
>  Well, I think for HONOR_SIGNED_ZEROS it would be nice if frange was able 
>  to
>  differentiate between 0.0 and -0.0.
>  One reason is e.g. to be able to optimize copysign/signbit - if we can
>  prove that the sign bit on some value will be always cleared or always 
>  set,
>  we can fold those.
>  On the other side, with -fno-signed-zeros it is invalid to use
>  copysign/signbit on values that could be zero (well, nothing guarantees
>  whether the sign bit is set or clear), so for MODE_HAS_SIGNED_ZEROS &&
>  !HONOR_SIGNED_ZEROS it is best to treat contains_p as {-0.0,0.0} being
>  one thing (just not singleton_p) and not bother with details like whether
>  a range ends or starts with -0.0 or 0.0, either of them would work the 
>  same.
>  And for !MODE_HAS_SIGNED_ZEROS, obviously 0.0 can be singleton_p.
> >>> *head explodes*
> >>>
> >>> Ok, I think I can add a zero property we can track (like we do for
> >>> NAN), and set it appropriately at constant creation and upon results
> >>> from copysign/signbit.  However, I am running out of time before
> >>> Cauldron, so I think I'll just treat +-0.0 ambiguously for now, and do
> >>> that as a follow-up.
> >> We definitely want to be able to track +-0.0 and distinguish between
> >> them.  IIRC there's cases where you can start eliminating comparisons
> >> and arithmetic once you start tracking -0.0 state.
> > Absolutely.  That was always the plan.  However, my goal was just to
> > add relop stubs so others could flesh out the rest.  Alas, I'm way
> > over that now :).  We're tracking NANs, endpoints, infinities, etc.
> Well, we'll probably cycle back and forth a bit between the solver and
> figuring out what we need to track.
>
> Related: I had a short email thread with Siddhesh and Carlos WRT the
> idea of putting in some __builtin_unreachables into the math routines.
> They're generally OK with it, though we do need to verify those
> conditionals aren't generating extra code.   So there's a potential path
> forward for that side of the problem as well.
>
>
> >
> > However, I'm hoping to forget as many floating point details, as fast
> > as possible, as soon as I can ;-).
> Actually FP isn't that bad -- I'd largely avoided it for decades, but
> didn't have a choice earlier this year.  And there's a lot more headroom
> for improvements in the FP space than the integer space IMHO.

Absolutely.  Just working with basic relationals I've noticed that we
generate absolute garbage for some simple testcases.  I'm amazed what
makes it all the way to the assembly because we fail to fold simple
things.

Aldy



Re: [PATCH] [ranger] x == -0.0 does not mean we can replace x with -0.0

2022-08-29 Thread Toon Moene

On 8/29/22 17:08, Jeff Law via Gcc-patches wrote:


However, I'm hoping to forget as many floating point details, as fast
as possible, as soon as I can ;-).


Actually FP isn't that bad -- I'd largely avoided it for decades, but 
didn't have a choice earlier this year.  And there's a lot more headroom 
for improvements in the FP space than the integer space IMHO.


One of the more interesting ones is to try to limit the range of the 
input to the trigonometric functions - that way you could use ones 
without any argument reduction phase ...


--
Toon Moene - e-mail: t...@moene.org - phone: +31 346 214290
Saturnushof 14, 3738 XG  Maartensdijk, The Netherlands



Re: [PATCH] [ranger] x == -0.0 does not mean we can replace x with -0.0

2022-08-29 Thread Jeff Law via Gcc-patches




On 8/29/2022 8:26 AM, Aldy Hernandez wrote:

On Mon, Aug 29, 2022 at 4:22 PM Jeff Law via Gcc-patches
 wrote:



On 8/29/2022 7:31 AM, Aldy Hernandez via Gcc-patches wrote:

On Mon, Aug 29, 2022 at 3:22 PM Jakub Jelinek  wrote:

On Mon, Aug 29, 2022 at 03:13:21PM +0200, Aldy Hernandez wrote:

It seems to me we can do this optimization regardless, but then treat
positive and negative zero the same throughout the frange class.
Particularly, in frange::singleton_p().  We should never return TRUE
for any version of 0.0.  This will keep VRP from propagating an
incorrect 0.0, since all VRP does is propagate when a range is
provably a singleton.  Also, frange::zero_p() shall return true for
any version of 0.0.

Well, I think for HONOR_SIGNED_ZEROS it would be nice if frange was able to
differentiate between 0.0 and -0.0.
One reason is e.g. to be able to optimize copysign/signbit - if we can
prove that the sign bit on some value will be always cleared or always set,
we can fold those.
On the other side, with -fno-signed-zeros it is invalid to use
copysign/signbit on values that could be zero (well, nothing guarantees
whether the sign bit is set or clear), so for MODE_HAS_SIGNED_ZEROS &&
!HONOR_SIGNED_ZEROS it is best to treat contains_p as {-0.0,0.0} being
one thing (just not singleton_p) and not bother with details like whether
a range ends or starts with -0.0 or 0.0, either of them would work the same.
And for !MODE_HAS_SIGNED_ZEROS, obviously 0.0 can be singleton_p.

*head explodes*

Ok, I think I can add a zero property we can track (like we do for
NAN), and set it appropriately at constant creation and upon results
from copysign/signbit.  However, I am running out of time before
Cauldron, so I think I'll just treat +-0.0 ambiguously for now, and do
that as a follow-up.

We definitely want to be able to track +-0.0 and distinguish between
them.  IIRC there's cases where you can start eliminating comparisons
and arithmetic once you start tracking -0.0 state.

Absolutely.  That was always the plan.  However, my goal was just to
add relop stubs so others could flesh out the rest.  Alas, I'm way
over that now :).  We're tracking NANs, endpoints, infinities, etc.
Well, we'll probably cycle back and forth a bit between the solver and 
figuring out what we need to track.


Related: I had a short email thread with Siddhesh and Carlos WRT the 
idea of putting in some __builtin_unreachables into the math routines.  
They're generally OK with it, though we do need to verify those 
conditionals aren't generating extra code.   So there's a potential path 
forward for that side of the problem as well.





However, I'm hoping to forget as many floating point details, as fast
as possible, as soon as I can ;-).
Actually FP isn't that bad -- I'd largely avoided it for decades, but 
didn't have a choice earlier this year.  And there's a lot more headroom 
for improvements in the FP space than the integer space IMHO.


Jeff


Re: [PATCH] Add support for floating point endpoints to frange.

2022-08-29 Thread Toon Moene

On 8/29/22 16:36, Aldy Hernandez wrote:


On Mon, Aug 29, 2022 at 4:30 PM Toon Moene  wrote:


On 8/29/22 16:15, Aldy Hernandez wrote:


But even with -ffinite-math-only, is there any benefit to propagating
a known NAN?  For example:


The original intent (in 2002) for the option -ffinite-math-only was for
the optimizers to ignore all the various exceptions to common
optimizations because they might not work correctly when presented with
a NaN or an Inf.

I do not know what the effect for floating point range information would
be - offhand.

But in the *spirit* of this option would be to ignore that the range
[5.0, 5.0] would "also" contain NaN, for instance.


Hmm, this is somewhat similar to what Jakub suggested.  Perhaps we
could categorically set !NAN for !HONOR_NANS at frange construction
time?

For reference:
bool
HONOR_NANS (machine_mode m)
{
   return MODE_HAS_NANS (m) && !flag_finite_math_only;
}

Thanks.
Aldy



Yep, I think that would do it.

Thanks,

--
Toon Moene - e-mail: t...@moene.org - phone: +31 346 214290
Saturnushof 14, 3738 XG  Maartensdijk, The Netherlands



[PATCH] tree-optimization/56654 - sort uninit candidates after RPO

2022-08-29 Thread Richard Biener via Gcc-patches
The following sorts the immediate uses of a possibly uninitialized
SSA variable after their RPO order so we prefer warning for an
earlier occuring use rather than issueing the diagnostic for the
first uninitialized immediate use.

The sorting will inevitably be imperfect but it also allows us to
optimize the expensive predicate check for the case where there
are multiple uses in the same basic-block which is a nice side-effect.

Bootstrap and regtest running on x86_64-unknown-linux-gnu.

PR tree-optimization/56654
* tree-ssa-uninit.cc (cand_cmp): New.
(find_uninit_use): First process all PHIs and collect candidate
stmts, then sort those after RPO.
(warn_uninitialized_phi): Pass on bb_to_rpo.
(execute_late_warn_uninitialized): Compute and pass on
reverse lookup of RPO number from basic block index.
---
 gcc/tree-ssa-uninit.cc | 92 --
 1 file changed, 61 insertions(+), 31 deletions(-)

diff --git a/gcc/tree-ssa-uninit.cc b/gcc/tree-ssa-uninit.cc
index 3e5816f1ebb..0bd567f237c 100644
--- a/gcc/tree-ssa-uninit.cc
+++ b/gcc/tree-ssa-uninit.cc
@@ -1154,6 +1154,21 @@ uninit_undef_val_t::phi_arg_set (gphi *phi)
   return compute_uninit_opnds_pos (phi);
 }
 
+/* sort helper for find_uninit_use.  */
+
+static int
+cand_cmp (const void *a, const void *b, void *data)
+{
+  int *bb_to_rpo = (int *)data;
+  const gimple *sa = *(const gimple * const *)a;
+  const gimple *sb = *(const gimple * const *)b;
+  if (bb_to_rpo[gimple_bb (sa)->index] < bb_to_rpo[gimple_bb (sb)->index])
+return -1;
+  else if (bb_to_rpo[gimple_bb (sa)->index] > bb_to_rpo[gimple_bb (sb)->index])
+return 1;
+  return 0;
+}
+
 /* Searches through all uses of a potentially
uninitialized variable defined by PHI and returns a use
statement if the use is not properly guarded.  It returns
@@ -1161,7 +1176,7 @@ uninit_undef_val_t::phi_arg_set (gphi *phi)
holding the position(s) of uninit PHI operands.  */
 
 static gimple *
-find_uninit_use (gphi *phi, unsigned uninit_opnds)
+find_uninit_use (gphi *phi, unsigned uninit_opnds, int *bb_to_rpo)
 {
   /* The Boolean predicate guarding the PHI definition.  Initialized
  lazily from PHI in the first call to is_use_guarded() and cached
@@ -1169,54 +1184,70 @@ find_uninit_use (gphi *phi, unsigned uninit_opnds)
   uninit_undef_val_t eval;
   uninit_analysis def_preds (eval);
 
+  /* First process PHIs and record other candidates.  */
+  auto_vec cands;
   use_operand_p use_p;
   imm_use_iterator iter;
   tree phi_result = gimple_phi_result (phi);
-  gimple *uninit_use = NULL;
   FOR_EACH_IMM_USE_FAST (use_p, iter, phi_result)
 {
   gimple *use_stmt = USE_STMT (use_p);
   if (is_gimple_debug (use_stmt))
continue;
 
-  basic_block use_bb;
   if (gphi *use_phi = dyn_cast (use_stmt))
{
- edge e = gimple_phi_arg_edge (use_phi,
-   PHI_ARG_INDEX_FROM_USE (use_p));
- use_bb = e->src;
+ unsigned idx = PHI_ARG_INDEX_FROM_USE (use_p);
+ edge e = gimple_phi_arg_edge (use_phi, idx);
  /* Do not look for uses in the next iteration of a loop, predicate
 analysis will not use the appropriate predicates to prove
 reachability.  */
  if (e->flags & EDGE_DFS_BACK)
continue;
-   }
-  else if (uninit_use)
-   /* Only process the first real uninitialized use, but continue
-  looking for unguarded uses in PHIs.  */
-   continue;
-  else
-   use_bb = gimple_bb (use_stmt);
 
-  if (def_preds.is_use_guarded (use_stmt, use_bb, phi, uninit_opnds))
-   {
- /* For a guarded use in a PHI record the PHI argument as
-initialized.  */
- if (gphi *use_phi = dyn_cast (use_stmt))
+ basic_block use_bb = e->src;
+ if (def_preds.is_use_guarded (use_stmt, use_bb, phi, uninit_opnds))
{
- unsigned idx = PHI_ARG_INDEX_FROM_USE (use_p);
+ /* For a guarded use in a PHI record the PHI argument as
+initialized.  */
  if (idx < uninit_analysis::func_t::max_phi_args)
{
  bool existed_p;
  auto _mask
-   = defined_args->get_or_insert (use_phi, _p);
+ = defined_args->get_or_insert (use_phi, _p);
  if (!existed_p)
def_mask = 0;
  MASK_SET_BIT (def_mask, idx);
}
+ continue;
}
- continue;
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+   {
+ fprintf (dump_file, "Found unguarded use in bb %u: ",
+  use_bb->index);
+ print_gimple_stmt (dump_file, use_stmt, 0);
+   }
+ /* Found a phi use that is not guarded, mark the phi_result as
+possibly undefined.  */
+ 

Re: [PATCH] Add support for floating point endpoints to frange.

2022-08-29 Thread Aldy Hernandez via Gcc-patches
On Mon, Aug 29, 2022 at 4:30 PM Toon Moene  wrote:
>
> On 8/29/22 16:15, Aldy Hernandez wrote:
>
> > But even with -ffinite-math-only, is there any benefit to propagating
> > a known NAN?  For example:
>
> The original intent (in 2002) for the option -ffinite-math-only was for
> the optimizers to ignore all the various exceptions to common
> optimizations because they might not work correctly when presented with
> a NaN or an Inf.
>
> I do not know what the effect for floating point range information would
> be - offhand.
>
> But in the *spirit* of this option would be to ignore that the range
> [5.0, 5.0] would "also" contain NaN, for instance.

Hmm, this is somewhat similar to what Jakub suggested.  Perhaps we
could categorically set !NAN for !HONOR_NANS at frange construction
time?

For reference:
bool
HONOR_NANS (machine_mode m)
{
  return MODE_HAS_NANS (m) && !flag_finite_math_only;
}

Thanks.
Aldy



Re: [PATCH] Add support for floating point endpoints to frange.

2022-08-29 Thread Aldy Hernandez via Gcc-patches
On Mon, Aug 29, 2022 at 4:27 PM Jakub Jelinek  wrote:
>
> On Mon, Aug 29, 2022 at 04:20:16PM +0200, Aldy Hernandez wrote:
> > Sure, I can add the HONOR_NANS, but can we even "see" a NAN in the IL
> > for -ffinite-math-only?
>
> Sure, you can, e.g. __builtin_nan{,s}{,f,l} etc. would do it.
> It would be UB to use it at runtime in -ffinite-math-only code though.
> Another question is, when making a range VARYING, do you set the NAN
> property or not when !HONOR_NANS && MODE_HAS_NANS?

A range of VARYING sets the NAN property to unknown
(fp_prop::VARYING).  If you prefer we can set the property to
fp_prop::NO for !HONOR_NANS && MODE_HAS_NANS.

??

Aldy



Re: [PATCH] Add support for floating point endpoints to frange.

2022-08-29 Thread Toon Moene

On 8/29/22 16:15, Aldy Hernandez wrote:


But even with -ffinite-math-only, is there any benefit to propagating
a known NAN?  For example:


The original intent (in 2002) for the option -ffinite-math-only was for 
the optimizers to ignore all the various exceptions to common 
optimizations because they might not work correctly when presented with 
a NaN or an Inf.


I do not know what the effect for floating point range information would 
be - offhand.


But in the *spirit* of this option would be to ignore that the range 
[5.0, 5.0] would "also" contain NaN, for instance.


Kind regards,

--
Toon Moene - e-mail: t...@moene.org - phone: +31 346 214290
Saturnushof 14, 3738 XG  Maartensdijk, The Netherlands



Re: [PATCH] Add support for floating point endpoints to frange.

2022-08-29 Thread Jakub Jelinek via Gcc-patches
On Mon, Aug 29, 2022 at 04:20:16PM +0200, Aldy Hernandez wrote:
> Sure, I can add the HONOR_NANS, but can we even "see" a NAN in the IL
> for -ffinite-math-only?

Sure, you can, e.g. __builtin_nan{,s}{,f,l} etc. would do it.
It would be UB to use it at runtime in -ffinite-math-only code though.
Another question is, when making a range VARYING, do you set the NAN
property or not when !HONOR_NANS && MODE_HAS_NANS?

>  I suppose it's cleaner with HONOR_NANS

Jakub



Re: [PATCH] [ranger] x == -0.0 does not mean we can replace x with -0.0

2022-08-29 Thread Aldy Hernandez via Gcc-patches
On Mon, Aug 29, 2022 at 4:22 PM Jeff Law via Gcc-patches
 wrote:
>
>
>
> On 8/29/2022 7:31 AM, Aldy Hernandez via Gcc-patches wrote:
> > On Mon, Aug 29, 2022 at 3:22 PM Jakub Jelinek  wrote:
> >> On Mon, Aug 29, 2022 at 03:13:21PM +0200, Aldy Hernandez wrote:
> >>> It seems to me we can do this optimization regardless, but then treat
> >>> positive and negative zero the same throughout the frange class.
> >>> Particularly, in frange::singleton_p().  We should never return TRUE
> >>> for any version of 0.0.  This will keep VRP from propagating an
> >>> incorrect 0.0, since all VRP does is propagate when a range is
> >>> provably a singleton.  Also, frange::zero_p() shall return true for
> >>> any version of 0.0.
> >> Well, I think for HONOR_SIGNED_ZEROS it would be nice if frange was able to
> >> differentiate between 0.0 and -0.0.
> >> One reason is e.g. to be able to optimize copysign/signbit - if we can
> >> prove that the sign bit on some value will be always cleared or always set,
> >> we can fold those.
> >> On the other side, with -fno-signed-zeros it is invalid to use
> >> copysign/signbit on values that could be zero (well, nothing guarantees
> >> whether the sign bit is set or clear), so for MODE_HAS_SIGNED_ZEROS &&
> >> !HONOR_SIGNED_ZEROS it is best to treat contains_p as {-0.0,0.0} being
> >> one thing (just not singleton_p) and not bother with details like whether
> >> a range ends or starts with -0.0 or 0.0, either of them would work the 
> >> same.
> >> And for !MODE_HAS_SIGNED_ZEROS, obviously 0.0 can be singleton_p.
> > *head explodes*
> >
> > Ok, I think I can add a zero property we can track (like we do for
> > NAN), and set it appropriately at constant creation and upon results
> > from copysign/signbit.  However, I am running out of time before
> > Cauldron, so I think I'll just treat +-0.0 ambiguously for now, and do
> > that as a follow-up.
> We definitely want to be able to track +-0.0 and distinguish between
> them.  IIRC there's cases where you can start eliminating comparisons
> and arithmetic once you start tracking -0.0 state.

Absolutely.  That was always the plan.  However, my goal was just to
add relop stubs so others could flesh out the rest.  Alas, I'm way
over that now :).  We're tracking NANs, endpoints, infinities, etc.

However, I'm hoping to forget as many floating point details, as fast
as possible, as soon as I can ;-).

Aldy



Re: [PATCH] [ranger] x == -0.0 does not mean we can replace x with -0.0

2022-08-29 Thread Jeff Law via Gcc-patches




On 8/29/2022 7:31 AM, Aldy Hernandez via Gcc-patches wrote:

On Mon, Aug 29, 2022 at 3:22 PM Jakub Jelinek  wrote:

On Mon, Aug 29, 2022 at 03:13:21PM +0200, Aldy Hernandez wrote:

It seems to me we can do this optimization regardless, but then treat
positive and negative zero the same throughout the frange class.
Particularly, in frange::singleton_p().  We should never return TRUE
for any version of 0.0.  This will keep VRP from propagating an
incorrect 0.0, since all VRP does is propagate when a range is
provably a singleton.  Also, frange::zero_p() shall return true for
any version of 0.0.

Well, I think for HONOR_SIGNED_ZEROS it would be nice if frange was able to
differentiate between 0.0 and -0.0.
One reason is e.g. to be able to optimize copysign/signbit - if we can
prove that the sign bit on some value will be always cleared or always set,
we can fold those.
On the other side, with -fno-signed-zeros it is invalid to use
copysign/signbit on values that could be zero (well, nothing guarantees
whether the sign bit is set or clear), so for MODE_HAS_SIGNED_ZEROS &&
!HONOR_SIGNED_ZEROS it is best to treat contains_p as {-0.0,0.0} being
one thing (just not singleton_p) and not bother with details like whether
a range ends or starts with -0.0 or 0.0, either of them would work the same.
And for !MODE_HAS_SIGNED_ZEROS, obviously 0.0 can be singleton_p.

*head explodes*

Ok, I think I can add a zero property we can track (like we do for
NAN), and set it appropriately at constant creation and upon results
from copysign/signbit.  However, I am running out of time before
Cauldron, so I think I'll just treat +-0.0 ambiguously for now, and do
that as a follow-up.
We definitely want to be able to track +-0.0 and distinguish between 
them.  IIRC there's cases where you can start eliminating comparisons 
and arithmetic once you start tracking -0.0 state.

Jeff


Re: [PATCH] Add support for floating point endpoints to frange.

2022-08-29 Thread Aldy Hernandez via Gcc-patches
On Mon, Aug 29, 2022 at 4:17 PM Jakub Jelinek  wrote:
>
> On Mon, Aug 29, 2022 at 04:08:58PM +0200, Aldy Hernandez wrote:
> > On Mon, Aug 29, 2022 at 3:55 PM Jakub Jelinek  wrote:
> > >
> > > On Mon, Aug 29, 2022 at 03:45:33PM +0200, Aldy Hernandez wrote:
> > > > For convenience, singleton_p() returns false for a NAN.  IMO, it makes
> > > > the implementation cleaner, but I'm not wed to the idea if someone
> > > > objects.
> > >
> > > If singleton_p() is used to decide whether one can just replace a variable
> > > with singleton range with a constant, then certainly.
> > > If MODE_HAS_SIGNED_ZEROS, zero has 2 representations (-0.0 and 0.0) and
> > > NaNs have lots of different representations (the sign bit is ignored
> > > except for stuff like copysign/signbit, there are qNaNs and sNaNs and
> > > except for the single case how Inf is represented, all other values of the
> > > mantissa mean different representations of NaN).  So, unless we track 
> > > which
> > > exact form of NaN can appear, NaN or any [x, x] range with NaN property
> >
> > Ok that was more or less what I was thinking.  And no, we don't keep
> > track of the type of NANs.
> >
> > How does this look?
> >
> > bool
> > frange::singleton_p (tree *result) const
> > {
> >   if (m_kind == VR_RANGE && real_identical (_min, _max))
> > {
> >   // If we're honoring signed zeros, fail because we don't know
> >   // which zero we have.  This avoids propagating the wrong zero.
> >   if (HONOR_SIGNED_ZEROS (m_type) && zero_p ())
> > return false;
> >
> >   // Return false for any singleton that may be a NAN.
> >   if (!get_nan ().no_p ())
> > return false;
>
> Perhaps if (HONOR_NANS (m_type) && !get_nan ().no_p ()) instead?
> Or do you ensure the nan property is never set for -ffinite-math-only?

See followup with Tom downthread.

Sure, I can add the HONOR_NANS, but can we even "see" a NAN in the IL
for -ffinite-math-only?  I suppose it's cleaner with HONOR_NANS

Aldy



Re: [PATCH] Add support for floating point endpoints to frange.

2022-08-29 Thread Jakub Jelinek via Gcc-patches
On Mon, Aug 29, 2022 at 04:08:58PM +0200, Aldy Hernandez wrote:
> On Mon, Aug 29, 2022 at 3:55 PM Jakub Jelinek  wrote:
> >
> > On Mon, Aug 29, 2022 at 03:45:33PM +0200, Aldy Hernandez wrote:
> > > For convenience, singleton_p() returns false for a NAN.  IMO, it makes
> > > the implementation cleaner, but I'm not wed to the idea if someone
> > > objects.
> >
> > If singleton_p() is used to decide whether one can just replace a variable
> > with singleton range with a constant, then certainly.
> > If MODE_HAS_SIGNED_ZEROS, zero has 2 representations (-0.0 and 0.0) and
> > NaNs have lots of different representations (the sign bit is ignored
> > except for stuff like copysign/signbit, there are qNaNs and sNaNs and
> > except for the single case how Inf is represented, all other values of the
> > mantissa mean different representations of NaN).  So, unless we track which
> > exact form of NaN can appear, NaN or any [x, x] range with NaN property
> 
> Ok that was more or less what I was thinking.  And no, we don't keep
> track of the type of NANs.
> 
> How does this look?
> 
> bool
> frange::singleton_p (tree *result) const
> {
>   if (m_kind == VR_RANGE && real_identical (_min, _max))
> {
>   // If we're honoring signed zeros, fail because we don't know
>   // which zero we have.  This avoids propagating the wrong zero.
>   if (HONOR_SIGNED_ZEROS (m_type) && zero_p ())
> return false;
> 
>   // Return false for any singleton that may be a NAN.
>   if (!get_nan ().no_p ())
> return false;

Perhaps if (HONOR_NANS (m_type) && !get_nan ().no_p ()) instead?
Or do you ensure the nan property is never set for -ffinite-math-only?
> 
>   if (result)
> *result = build_real (m_type, m_min);
>   return true;
> }
>   return false;
> }

Otherwise LGTM.

Jakub



Re: [PATCH] tree-object-size: Support strndup and strdup

2022-08-29 Thread Siddhesh Poyarekar

Ping!

On 2022-08-15 15:23, Siddhesh Poyarekar wrote:

Use string length of input to strdup to determine the usable size of the
resulting object.  Avoid doing the same for strndup since there's a
chance that the input may be too large, resulting in an unnecessary
overhead or worse, the input may not be NULL terminated, resulting in a
crash where there would otherwise have been none.

gcc/ChangeLog:

* tree-object-size.cc (get_whole_object): New function.
(addr_object_size): Use it.
(strdup_object_size): New function.
(call_object_size): Use it.
(pass_data_object_sizes, pass_data_early_object_sizes): Set
todo_flags_finish to TODO_update_ssa_no_phi.

gcc/testsuite/ChangeLog:

* gcc.dg/builtin-dynamic-object-size-0.c (test_strdup,
test_strndup, test_strdup_min, test_strndup_min): New tests.
(main): Call them.
* gcc.dg/builtin-dynamic-object-size-1.c: Silence overread
warnings.
* gcc.dg/builtin-dynamic-object-size-2.c: Likewise.
* gcc.dg/builtin-dynamic-object-size-3.c: Likewise.
* gcc.dg/builtin-dynamic-object-size-4.c: Likewise.
* gcc.dg/builtin-object-size-1.c: Silence overread warnings.
Declare free, strdup and strndup.
(test11): New test.
(main): Call it.
* gcc.dg/builtin-object-size-2.c: Silence overread warnings.
Declare free, strdup and strndup.
(test9): New test.
(main): Call it.
* gcc.dg/builtin-object-size-3.c: Silence overread warnings.
Declare free, strdup and strndup.
(test11): New test.
(main): Call it.
* gcc.dg/builtin-object-size-4.c: Silence overread warnings.
Declare free, strdup and strndup.
(test9): New test.
(main): Call it.
---
  .../gcc.dg/builtin-dynamic-object-size-0.c| 43 +++
  .../gcc.dg/builtin-dynamic-object-size-1.c|  2 +-
  .../gcc.dg/builtin-dynamic-object-size-2.c|  2 +-
  .../gcc.dg/builtin-dynamic-object-size-3.c|  2 +-
  .../gcc.dg/builtin-dynamic-object-size-4.c|  2 +-
  gcc/testsuite/gcc.dg/builtin-object-size-1.c  | 64 +++-
  gcc/testsuite/gcc.dg/builtin-object-size-2.c  | 63 ++-
  gcc/testsuite/gcc.dg/builtin-object-size-3.c  | 63 ++-
  gcc/testsuite/gcc.dg/builtin-object-size-4.c  | 63 ++-
  gcc/tree-object-size.cc   | 76 +--
  10 files changed, 366 insertions(+), 14 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/builtin-dynamic-object-size-0.c 
b/gcc/testsuite/gcc.dg/builtin-dynamic-object-size-0.c
index 01a280b2d7b..7f023708b15 100644
--- a/gcc/testsuite/gcc.dg/builtin-dynamic-object-size-0.c
+++ b/gcc/testsuite/gcc.dg/builtin-dynamic-object-size-0.c
@@ -479,6 +479,40 @@ test_loop (int *obj, size_t sz, size_t start, size_t end, 
int incr)
return __builtin_dynamic_object_size (ptr, 0);
  }
  
+/* strdup/strndup.  */

+
+size_t
+__attribute__ ((noinline))
+test_strdup (const char *in)
+{
+  char *res = __builtin_strdup (in);
+  return __builtin_dynamic_object_size (res, 0);
+}
+
+size_t
+__attribute__ ((noinline))
+test_strndup (const char *in, size_t bound)
+{
+  char *res = __builtin_strndup (in, bound);
+  return __builtin_dynamic_object_size (res, 0);
+}
+
+size_t
+__attribute__ ((noinline))
+test_strdup_min (const char *in)
+{
+  char *res = __builtin_strdup (in);
+  return __builtin_dynamic_object_size (res, 2);
+}
+
+size_t
+__attribute__ ((noinline))
+test_strndup_min (const char *in, size_t bound)
+{
+  char *res = __builtin_strndup (in, bound);
+  return __builtin_dynamic_object_size (res, 2);
+}
+
  /* Other tests.  */
  
  struct TV4

@@ -651,6 +685,15 @@ main (int argc, char **argv)
int *t = test_pr105736 ();
if (__builtin_dynamic_object_size (t, 0) != -1)
  FAIL ();
+  const char *str = "hello world";
+  if (test_strdup (str) != __builtin_strlen (str) + 1)
+FAIL ();
+  if (test_strndup (str, 4) != 5)
+FAIL ();
+  if (test_strdup_min (str) != __builtin_strlen (str) + 1)
+FAIL ();
+  if (test_strndup_min (str, 4) != 0)
+FAIL ();
  
if (nfails > 0)

  __builtin_abort ();
diff --git a/gcc/testsuite/gcc.dg/builtin-dynamic-object-size-1.c 
b/gcc/testsuite/gcc.dg/builtin-dynamic-object-size-1.c
index 7cc8b1c9488..8f17c8edcaf 100644
--- a/gcc/testsuite/gcc.dg/builtin-dynamic-object-size-1.c
+++ b/gcc/testsuite/gcc.dg/builtin-dynamic-object-size-1.c
@@ -1,5 +1,5 @@
  /* { dg-do run } */
-/* { dg-options "-O2" } */
+/* { dg-options "-O2 -Wno-stringop-overread" } */
  /* { dg-require-effective-target alloca } */
  
  #define __builtin_object_size __builtin_dynamic_object_size

diff --git a/gcc/testsuite/gcc.dg/builtin-dynamic-object-size-2.c 
b/gcc/testsuite/gcc.dg/builtin-dynamic-object-size-2.c
index 267dbf48ca7..3677782ff1c 100644
--- a/gcc/testsuite/gcc.dg/builtin-dynamic-object-size-2.c
+++ b/gcc/testsuite/gcc.dg/builtin-dynamic-object-size-2.c
@@ -1,5 +1,5 @@
  /* { dg-do 

Re: [PATCH] Add support for floating point endpoints to frange.

2022-08-29 Thread Aldy Hernandez via Gcc-patches
On Mon, Aug 29, 2022 at 4:08 PM Toon Moene  wrote:
>
> On 8/29/22 15:54, Jakub Jelinek via Gcc-patches wrote:
>
> > On Mon, Aug 29, 2022 at 03:45:33PM +0200, Aldy Hernandez wrote:
>
> >> For convenience, singleton_p() returns false for a NAN.  IMO, it makes
> >> the implementation cleaner, but I'm not wed to the idea if someone
> >> objects.
> >
> > If singleton_p() is used to decide whether one can just replace a variable
> > with singleton range with a constant, then certainly.
> > If MODE_HAS_SIGNED_ZEROS, zero has 2 representations (-0.0 and 0.0) and
> > NaNs have lots of different representations (the sign bit is ignored
> > except for stuff like copysign/signbit, there are qNaNs and sNaNs and
> > except for the single case how Inf is represented, all other values of the
> > mantissa mean different representations of NaN).  So, unless we track which
> > exact form of NaN can appear, NaN or any [x, x] range with NaN property
> > set can't be a singleton.  There could be programs that propagate something
> > important in NaN mantissa and would be upset if frange kills that.
> > Of course, one needs to take into account that when a FPU creates NaN, it
> > will create the canonical qNaN.
> >
> >   Jakub
> >
>
> But the NaNs are irrelevant with -ffinite-math-only, as are the various
> Infs (I do not know offhand which MODE_ that is) ...

But even with -ffinite-math-only, is there any benefit to propagating
a known NAN?  For example:

if (__builtin_isnan (x)) {
  use(x);
}

or perhaps:
if (x != x) {
  use(x);
}

Should we propagate NAN into the use of x?

For that matter, AFAICT we don't even generate the unordered
comparisons needed to distinguish a NAN for -ffinite-math-only.

void foo(float f)
{
  if (__builtin_isnan (f))
bark();
}

$ cat a.c.*original
;; Function foo (null)
;; enabled by -tree-original

{
  if (0)
{
  bark ();
}
}

Cheers.
Aldy



Re: [PATCH] Add support for floating point endpoints to frange.

2022-08-29 Thread Aldy Hernandez via Gcc-patches
On Mon, Aug 29, 2022 at 3:55 PM Jakub Jelinek  wrote:
>
> On Mon, Aug 29, 2022 at 03:45:33PM +0200, Aldy Hernandez wrote:
> > For convenience, singleton_p() returns false for a NAN.  IMO, it makes
> > the implementation cleaner, but I'm not wed to the idea if someone
> > objects.
>
> If singleton_p() is used to decide whether one can just replace a variable
> with singleton range with a constant, then certainly.
> If MODE_HAS_SIGNED_ZEROS, zero has 2 representations (-0.0 and 0.0) and
> NaNs have lots of different representations (the sign bit is ignored
> except for stuff like copysign/signbit, there are qNaNs and sNaNs and
> except for the single case how Inf is represented, all other values of the
> mantissa mean different representations of NaN).  So, unless we track which
> exact form of NaN can appear, NaN or any [x, x] range with NaN property

Ok that was more or less what I was thinking.  And no, we don't keep
track of the type of NANs.

How does this look?

bool
frange::singleton_p (tree *result) const
{
  if (m_kind == VR_RANGE && real_identical (_min, _max))
{
  // If we're honoring signed zeros, fail because we don't know
  // which zero we have.  This avoids propagating the wrong zero.
  if (HONOR_SIGNED_ZEROS (m_type) && zero_p ())
return false;

  // Return false for any singleton that may be a NAN.
  if (!get_nan ().no_p ())
return false;

  if (result)
*result = build_real (m_type, m_min);
  return true;
}
  return false;
}

Thanks.
Aldy

> set can't be a singleton.  There could be programs that propagate something
> important in NaN mantissa and would be upset if frange kills that.
> Of course, one needs to take into account that when a FPU creates NaN, it
> will create the canonical qNaN.
>
> Jakub
>



Re: [PATCH] Add support for floating point endpoints to frange.

2022-08-29 Thread Toon Moene

On 8/29/22 15:54, Jakub Jelinek via Gcc-patches wrote:


On Mon, Aug 29, 2022 at 03:45:33PM +0200, Aldy Hernandez wrote:



For convenience, singleton_p() returns false for a NAN.  IMO, it makes
the implementation cleaner, but I'm not wed to the idea if someone
objects.


If singleton_p() is used to decide whether one can just replace a variable
with singleton range with a constant, then certainly.
If MODE_HAS_SIGNED_ZEROS, zero has 2 representations (-0.0 and 0.0) and
NaNs have lots of different representations (the sign bit is ignored
except for stuff like copysign/signbit, there are qNaNs and sNaNs and
except for the single case how Inf is represented, all other values of the
mantissa mean different representations of NaN).  So, unless we track which
exact form of NaN can appear, NaN or any [x, x] range with NaN property
set can't be a singleton.  There could be programs that propagate something
important in NaN mantissa and would be upset if frange kills that.
Of course, one needs to take into account that when a FPU creates NaN, it
will create the canonical qNaN.

Jakub



But the NaNs are irrelevant with -ffinite-math-only, as are the various 
Infs (I do not know offhand which MODE_ that is) ...


Kind regards,

--
Toon Moene - e-mail: t...@moene.org - phone: +31 346 214290
Saturnushof 14, 3738 XG  Maartensdijk, The Netherlands



Re: [PATCH] 32-bit PA-RISC with HP-UX: remove deprecated ports

2022-08-29 Thread Martin Liška
On 8/28/22 18:34, John David Anglin wrote:
> On 2022-08-26 3:15 a.m., Martin Liška wrote:
>> Removes the deprecated ports. If I'm correct all hpux9,hpux10 should be 
>> removed
>> as they only provide 32-bit targets. On the contrary, hpux11 supports hppa64 
>> that
>> we still do support.
> It is my understanding that the 32-bit hppa hpux targets were deprecated 
> because they don't
> support ELF and the DWARF debug format (.stabs is to be removed).

Yes, .stabs is the motivation for the removal.

> Some of the changes to
> libtool.m4 affect the 32-bit ia64 hpux target.  As far as I know, it supports 
> ELF and the DWARF
> debug format.
> 
> Possibly, the removal of ia64-hpux should be considered but I think it's a 
> separate issue.
> 
>>
>> Ready to be installed?
>> Thanks,
>> Martin
>>
>> ChangeLog:
>>
>> * config.rpath: Delete hpux9 and hpux10.
>> * configure: Regenerate.
>> * configure.ac: Delete hpux9 and hpux10.
>> * libgo/config/libtool.m4: Ignore 32-bit
>> hpux targets.
>> * libgo/configure: Regenerate.
>> * libtool.m4: Delete hpux9 and hpux10.
> The libtool.m4 files are from GNU libtool.  I don't think these files should 
> be changed.


Thanks for the feedback, can you please check the updated version of the patch?

Martin

> 
> Dave
> 
From 42fb9db6c51861bf32ed013173a08f2ff4ae635f Mon Sep 17 00:00:00 2001
From: Martin Liska 
Date: Thu, 25 Aug 2022 14:30:51 +0200
Subject: [PATCH] 32-bit PA-RISC with HP-UX: remove deprecated ports

ChangeLog:

	* configure: Regenerate.
	* configure.ac: Delete hpux9 and hpux10.

config/ChangeLog:

	* mmap.m4: Remove hpux10.
	* mh-pa-hpux10: Removed.

contrib/ChangeLog:

	* config-list.mk: Remove deprecated ports.

contrib/header-tools/ChangeLog:

	* README: Remove deprecated ports.
	* reduce-headers: Likewise.

gcc/ChangeLog:

	* config.build: Remove deprecated ports.
	* config.gcc: Likewise.
	* config.host: Likewise.
	* configure.ac: Likewise.
	* configure: Regenerate.
	* config/pa/pa-hpux10.h: Removed.
	* config/pa/pa-hpux10.opt: Removed.
	* config/pa/t-dce-thr: Removed.

gnattools/ChangeLog:

	* configure.ac: Remove deprecated ports.
	* configure: Regenerate.

libffi/ChangeLog:

	* acinclude.m4: Remove deprecated ports.
	* configure: Regenerate.

libstdc++-v3/ChangeLog:

	* configure: Regenerate.
	* crossconfig.m4: Remove deprecated ports.

gcc/testsuite/ChangeLog:

	* g++.dg/cpp0x/lambda/lambda-conv.C: Remove useless test.
	* gcc.c-torture/execute/ieee/hugeval.x: Likewise.
	* gcc.dg/torture/pr47917.c: Likewise.
	* lib/target-supports.exp: Likewise.

libgcc/ChangeLog:

	* config.host: Remove hppa.

libitm/ChangeLog:

	* configure: Regenerate.

fixincludes/ChangeLog:

	* configure: Regenerate.
---
 config/mh-pa-hpux10   |   4 -
 config/mmap.m4|   2 +-
 configure |  14 --
 configure.ac  |  14 --
 contrib/config-list.mk|   3 +-
 contrib/header-tools/README   |   2 +-
 contrib/header-tools/reduce-headers   |   1 -
 fixincludes/configure |   2 +-
 gcc/config.build  |   5 +-
 gcc/config.gcc|  82 -
 gcc/config.host   |   5 -
 gcc/config/pa/pa-hpux10.h | 157 --
 gcc/config/pa/pa-hpux10.opt   |  22 ---
 gcc/config/pa/t-dce-thr   |   2 -
 gcc/configure |  21 +--
 gcc/configure.ac  |  13 --
 .../g++.dg/cpp0x/lambda/lambda-conv.C |   2 +-
 .../gcc.c-torture/execute/ieee/hugeval.x  |   3 -
 gcc/testsuite/gcc.dg/torture/pr47917.c|   1 -
 gcc/testsuite/lib/target-supports.exp |  15 +-
 gnattools/configure   |   2 -
 gnattools/configure.ac|   2 -
 libffi/acinclude.m4   |   2 +-
 libffi/configure  |   2 +-
 libgcc/config.host|  22 ---
 libitm/configure  |   2 +-
 libstdc++-v3/configure|  14 --
 libstdc++-v3/crossconfig.m4   |   9 -
 28 files changed, 13 insertions(+), 412 deletions(-)
 delete mode 100644 config/mh-pa-hpux10
 delete mode 100644 gcc/config/pa/pa-hpux10.h
 delete mode 100644 gcc/config/pa/pa-hpux10.opt
 delete mode 100644 gcc/config/pa/t-dce-thr

diff --git a/config/mh-pa-hpux10 b/config/mh-pa-hpux10
deleted file mode 100644
index 99a2278f281..000
--- a/config/mh-pa-hpux10
+++ /dev/null
@@ -1,4 +0,0 @@
-# The ada virtual array implementation requires that indexing be disabled on
-# hosts such as hpux that use a segmented memory architecture.  Both the c
-# and ada files need to be compiled with this option for correct operation.
-ADA_CFLAGS = -mdisable-indexing -D_X_HPUX10
diff --git 

Re: [PATCH] Add support for floating point endpoints to frange.

2022-08-29 Thread Jakub Jelinek via Gcc-patches
On Mon, Aug 29, 2022 at 03:45:33PM +0200, Aldy Hernandez wrote:
> For convenience, singleton_p() returns false for a NAN.  IMO, it makes
> the implementation cleaner, but I'm not wed to the idea if someone
> objects.

If singleton_p() is used to decide whether one can just replace a variable
with singleton range with a constant, then certainly.
If MODE_HAS_SIGNED_ZEROS, zero has 2 representations (-0.0 and 0.0) and
NaNs have lots of different representations (the sign bit is ignored
except for stuff like copysign/signbit, there are qNaNs and sNaNs and
except for the single case how Inf is represented, all other values of the
mantissa mean different representations of NaN).  So, unless we track which
exact form of NaN can appear, NaN or any [x, x] range with NaN property
set can't be a singleton.  There could be programs that propagate something
important in NaN mantissa and would be upset if frange kills that.
Of course, one needs to take into account that when a FPU creates NaN, it
will create the canonical qNaN.

Jakub



Re: [PATCH] Add support for floating point endpoints to frange.

2022-08-29 Thread Aldy Hernandez via Gcc-patches
Jakub, et al... here is the latest version of the frange endpoints
patch addressing the signed zero problem (well treating +-0.0
ambiguously), as well as implementing all the relational operators.

[Andrew M: I mostly copied our relop code from irange, while keeping
track NANs, etc.  It should be pleasantly familiar.]

A few notes...

We can now represent things like [5.0, 5.0], which is the singleton
5.0 *or* a NAN.  OTOH, we could also have [5.0, 5.0] !NAN, which is
5.0 without the possibility of a NAN.  This allows us to track
appropriate ranges on both sides of an if, but keep track of which
side (true side) we definitely know we won't have a NAN.

As mentioned, frange::singleton_p([0,0]) returns false for any version
of 0.0 (unless !HONOR_SIGNED_ZEROS).  I'll work on zero tracking at a
later time.  The patch is getting pretty large as is.

For convenience, singleton_p() returns false for a NAN.  IMO, it makes
the implementation cleaner, but I'm not wed to the idea if someone
objects.

This patch passes all tests for all languages, including go and ada,
with the aforementioned exception of gcc.dg/tree-ssa/phi-opt-24.c
which is getting confused because we have propagated (correctly) a 0.0
into the PHI.  Hints?  Takers? ;-).

Aldy

On Fri, Aug 26, 2022 at 9:44 PM Andrew Pinski  wrote:
>
> On Fri, Aug 26, 2022 at 12:16 PM Aldy Hernandez  wrote:
> >
> > On Fri, Aug 26, 2022 at 7:40 PM Andrew Pinski  wrote:
> > >
> > > On Fri, Aug 26, 2022 at 8:55 AM Aldy Hernandez  wrote:
> > > >
> > > > [pinskia: I'm CCing you as the author of the match.pd pattern.]
> > > >
> > > > So, as I wrap up the work here (latest patch attached), I see there's
> > > > another phiopt regression (not spaceship related).  I was hoping
> > > > someone could either give me a hand, or offer some guidance.
> > > >
> > > > The failure is in gcc.dg/tree-ssa/phi-opt-24.c.
> > > >
> > > > We fail to transform the following into -A:
> > > >
> > > > /* { dg-options "-O2 -fno-signed-zeros -fdump-tree-phiopt" } */
> > > >
> > > > float f0(float A)
> > > > {
> > > >   // A == 0? A : -Asame as -A
> > > >   if (A == 0)  return A;
> > > >   return -A;
> > > > }
> > > >
> > > > This is because the abs/negative match.pd pattern here:
> > > >
> > > > /* abs/negative simplifications moved from 
> > > > fold_cond_expr_with_comparison,
> > > >Need to handle (A - B) case as fold_cond_expr_with_comparison does.
> > > >Need to handle UN* comparisons.
> > > >...
> > > >...
> > > >
> > > > Sees IL that has the 0.0 propagated.
> > > >
> > > > Instead of:
> > > >
> > > >[local count: 1073741824]:
> > > >   if (A_2(D) == 0.0)
> > > > goto ; [34.00%]
> > > >   else
> > > > goto ; [66.00%]
> > > >
> > > >[local count: 708669601]:
> > > >   _3 = -A_2(D);
> > > >
> > > >[local count: 1073741824]:
> > > >   # _1 = PHI 
> > > >
> > > > It now sees:
> > > >
> > > >[local count: 1073741824]:
> > > >   # _1 = PHI <0.0(2), _3(3)>
> > > >
> > > > which it leaves untouched, causing the if conditional to survive.
> > > >
> > > > Is this something that can be done by improving the match.pd pattern,
> > > > or should be done elsewhere?
> > >
> > > Oh the pattern which is supposed to catch this does:
> > >   (simplify
> > >(cnd (cmp @0 zerop) integer_zerop (negate@1 @0))
> > > (if (!HONOR_SIGNED_ZEROS (type))
> > >  @1))
> >
> > On trunk without any patches, for the following snippet with -O2
> > -fno-signed-zeros -fdump-tree-phiopt-folding...
> >
> > float f0(float A)
> > {
> >   // A == 0? A : -Asame as -A
> >   if (A == 0)  return A;
> >   return -A;
> > }
> >
> > ...the phiopt2 dump file has:
> >
> > Applying pattern match.pd:4805, gimple-match.cc:69291, which
> > corresponds to the aforementioned pattern.  So it looks like that was
> > the pattern that was matching that isn't any more?
> >
> > Are you saying this pattern should only work with integers?
>
> I am saying the pattern which is right after the one that matches
> (without your patch) currrently works for integer only.
> You could change integer_zerop to zerop in that pattern but I am not
> 100% sure that is valid thing to do.
> Note there are a few other patterns in that for loop that does
> integer_zerop which might need to be zerop too.
>
> Thanks,
> Andrew Pinski
>
> >
> > Aldy
> >
>
From 67e61693dfda7fa6dadb0bddc62b2d70a88d9dce Mon Sep 17 00:00:00 2001
From: Aldy Hernandez 
Date: Tue, 23 Aug 2022 13:36:33 +0200
Subject: [PATCH] Add support for floating point endpoints to frange.

The current implementation of frange is just a type with some bits to
represent NAN and INF.  We can do better and represent endpoints to
ultimately solve longstanding PRs such as PR24021.  This patch adds
these endpoints.  In follow-up patches I will add support for
a bare bones PLUS_EXPR range-op-float entry to solve the PR.

I have chosen to use REAL_VALUE_TYPEs for the endpoints, since that's
what we use underneath the trees.  This will be somewhat analogous to

Re: [PATCH] [ranger] x == -0.0 does not mean we can replace x with -0.0

2022-08-29 Thread Aldy Hernandez via Gcc-patches
On Mon, Aug 29, 2022 at 3:22 PM Jakub Jelinek  wrote:
>
> On Mon, Aug 29, 2022 at 03:13:21PM +0200, Aldy Hernandez wrote:
> > It seems to me we can do this optimization regardless, but then treat
> > positive and negative zero the same throughout the frange class.
> > Particularly, in frange::singleton_p().  We should never return TRUE
> > for any version of 0.0.  This will keep VRP from propagating an
> > incorrect 0.0, since all VRP does is propagate when a range is
> > provably a singleton.  Also, frange::zero_p() shall return true for
> > any version of 0.0.
>
> Well, I think for HONOR_SIGNED_ZEROS it would be nice if frange was able to
> differentiate between 0.0 and -0.0.
> One reason is e.g. to be able to optimize copysign/signbit - if we can
> prove that the sign bit on some value will be always cleared or always set,
> we can fold those.
> On the other side, with -fno-signed-zeros it is invalid to use
> copysign/signbit on values that could be zero (well, nothing guarantees
> whether the sign bit is set or clear), so for MODE_HAS_SIGNED_ZEROS &&
> !HONOR_SIGNED_ZEROS it is best to treat contains_p as {-0.0,0.0} being
> one thing (just not singleton_p) and not bother with details like whether
> a range ends or starts with -0.0 or 0.0, either of them would work the same.
> And for !MODE_HAS_SIGNED_ZEROS, obviously 0.0 can be singleton_p.

*head explodes*

Ok, I think I can add a zero property we can track (like we do for
NAN), and set it appropriately at constant creation and upon results
from copysign/signbit.  However, I am running out of time before
Cauldron, so I think I'll just treat +-0.0 ambiguously for now, and do
that as a follow-up.

Aldy



Re: [PATCH] [ranger] x == -0.0 does not mean we can replace x with -0.0

2022-08-29 Thread Jakub Jelinek via Gcc-patches
On Mon, Aug 29, 2022 at 03:13:21PM +0200, Aldy Hernandez wrote:
> It seems to me we can do this optimization regardless, but then treat
> positive and negative zero the same throughout the frange class.
> Particularly, in frange::singleton_p().  We should never return TRUE
> for any version of 0.0.  This will keep VRP from propagating an
> incorrect 0.0, since all VRP does is propagate when a range is
> provably a singleton.  Also, frange::zero_p() shall return true for
> any version of 0.0.

Well, I think for HONOR_SIGNED_ZEROS it would be nice if frange was able to
differentiate between 0.0 and -0.0.
One reason is e.g. to be able to optimize copysign/signbit - if we can
prove that the sign bit on some value will be always cleared or always set,
we can fold those.
On the other side, with -fno-signed-zeros it is invalid to use
copysign/signbit on values that could be zero (well, nothing guarantees
whether the sign bit is set or clear), so for MODE_HAS_SIGNED_ZEROS &&
!HONOR_SIGNED_ZEROS it is best to treat contains_p as {-0.0,0.0} being
one thing (just not singleton_p) and not bother with details like whether
a range ends or starts with -0.0 or 0.0, either of them would work the same.
And for !MODE_HAS_SIGNED_ZEROS, obviously 0.0 can be singleton_p.

Jakub



Re: [PATCH] [ranger] x == -0.0 does not mean we can replace x with -0.0

2022-08-29 Thread Aldy Hernandez via Gcc-patches
On Fri, Aug 26, 2022 at 6:40 PM Jakub Jelinek  wrote:
>
> On Fri, Aug 26, 2022 at 05:46:06PM +0200, Aldy Hernandez wrote:
> > On the true side of x == -0.0, we can't just blindly value propagate
> > the -0.0 into every use of x because x could be +0.0 (or vice versa).
> >
> > With this change, we only allow the transformation if
> > !HONOR_SIGNED_ZEROS or if the range is known not to contain 0.
> >
> > Will commit after tests complete.
> >
> > gcc/ChangeLog:
> >
> >   * range-op-float.cc (foperator_equal::op1_range): Do not blindly
> >   copy op2 range when honoring signed zeros.
> > ---
> >  gcc/range-op-float.cc | 17 +++--
> >  1 file changed, 15 insertions(+), 2 deletions(-)
> >
> > diff --git a/gcc/range-op-float.cc b/gcc/range-op-float.cc
> > index ad2fae578d2..ff9fe312acf 100644
> > --- a/gcc/range-op-float.cc
> > +++ b/gcc/range-op-float.cc
> > @@ -252,8 +252,21 @@ foperator_equal::op1_range (frange , tree type,
> >switch (get_bool_state (r, lhs, type))
> >  {
> >  case BRS_TRUE:
> > -  // If it's true, the result is the same as OP2.
> > -  r = op2;
> > +  if (HONOR_SIGNED_ZEROS (type)
> > +   && op2.contains_p (build_zero_cst (type)))
>
> What exactly does op2.contains_p for zero?
> Does it use real_compare/real_equal under the hood, so it is
> equivalent to op2 == 0.0 or op2 == -0.0, where both will be
> true whether op2 is -0.0 or 0.0?  Or is it more strict and
> checks whether it is actually a positive zero?

frange::contains_p() uses real_compare(), so both -0.0 and 0.0 will
come out as true:

  return (real_compare (GE_EXPR, TREE_REAL_CST_PTR (cst), _min)
  && real_compare (LE_EXPR, TREE_REAL_CST_PTR (cst), _max));

I thought about this some more, and you're right, dropping to VARYING
is a big hammer.

It seems to me we can do this optimization regardless, but then treat
positive and negative zero the same throughout the frange class.
Particularly, in frange::singleton_p().  We should never return TRUE
for any version of 0.0.  This will keep VRP from propagating an
incorrect 0.0, since all VRP does is propagate when a range is
provably a singleton.  Also, frange::zero_p() shall return true for
any version of 0.0.

I fleshed out all the relational operators (with endpoints) with this
approach, and everything worked out...including go, ada, and fortran,
which had given me headaches.  As a bonus, we can get rid of the
INF/NINF property bits I was keeping around, since now the range will
have actual endpoints.  I will repost the full frange endpoints patch
(and CC you) in the appropriate thread.

Aldy

> In any case, for HONOR_SIGNED_ZEROS, VARYING is unnecessary, all you
> can do is extend the r range to contain both -0.0 and +0.0 if it contains
> at least one of them.
>
> > + {
> > +   // With signed zeros, x == -0.0 does not mean we can replace
> > +   // x with -0.0, because x may be either +0.0 or -0.0.
> > +   r.set_varying (type);
> > + }
> > +  else
> > + {
> > +   // If it's true, the result is the same as OP2.
> > +   //
> > +   // If the range does not actually contain zeros, this should
> > +   // always be OK.
> > +   r = op2;
> > + }
>
> !HONOR_SIGNED_ZEROS doesn't imply that negative zeros won't appear,
> but says that user doesn't care if he gets a positive or negative zero
> (unless !MODE_HAS_SIGNED_ZEROS - in that case -0.0 doesn't exist
> and one doesn't need to bother with it).
>
> Now, if all the code setting franges makes sure that for
> MODE_HAS_SIGNED_ZEROS && !HONOR_SIGNED_ZEROS if +0.0 or -0.0 are inside
> of a range, then both -0.0 and +0.0 are in the range, then yes,
> you can use r = op2;
>
> >// The TRUE side of op1 == op2 implies op1 is !NAN.
> >r.set_nan (fp_prop::NO);
> >break;
>
> Jakub
>



Re: [PATCH 1/2] vec: Add array_slice constructors from non-const and gc vectors

2022-08-29 Thread Richard Biener via Gcc-patches
On Mon, 29 Aug 2022, Martin Jambor wrote:

> Hi,
> 
> On Mon, Aug 29 2022, Richard Biener wrote:
> > On Mon, 29 Aug 2022, Martin Jambor wrote:
> >
> >> Hi again,
> >> 
> >> On Mon, Aug 29 2022, Richard Biener wrote:
> >> > On Fri, 26 Aug 2022, Martin Jambor wrote:
> >> >
> >> >> Hi,
> >> >> 
> >> >> On Fri, Aug 26 2022, Richard Biener wrote:
> >> >> >> Am 26.08.2022 um 18:39 schrieb Martin Jambor :
> >> >> >>
> >> >> >> Hi,
> >> >> >>
> >> >> >> This patch adds constructors of array_slice that are required to
> >> >> >> create them from non-const (heap or auto) vectors or from GC vectors.
> >> >> >>
> >> >> >> The use of non-const array_slices is somewhat limited, as creating 
> >> >> >> one
> >> >> >> from const vec still leads to array_slice >> >> >> some_type>,
> >> >> >> so I eventually also only resorted to having read-only array_slices.
> >> >> >> But I do need the constructor from the gc vector.
> >> >> >>
> >> >> >> Bootstrapped and tested along code that actually uses it on
> >> >> >> x86_64-linux.  OK for trunk?
> >> >> >>
> >> >> >> Thanks,
> >> >> >>
> >> >> >> Martin
> >> >> >>
> >> >> >>
> >> >> >> gcc/ChangeLog:
> >> >> >>
> >> >> >> 2022-08-08  Martin Jambor  
> >> >> >>
> >> >> >>* vec.h (array_slice): Add constructors for non-const reference to
> >> >> >>heap vector and pointers to heap vectors.
> >> >> >> ---
> >> >> >> gcc/vec.h | 12 
> >> >> >> 1 file changed, 12 insertions(+)
> >> >> >>
> >> >> >> diff --git a/gcc/vec.h b/gcc/vec.h
> >> >> >> index eed075addc9..b0477e1044c 100644
> >> >> >> --- a/gcc/vec.h
> >> >> >> +++ b/gcc/vec.h
> >> >> >> @@ -2264,6 +2264,18 @@ public:
> >> >> >>   array_slice (const vec )
> >> >> >> : m_base (v.address ()), m_size (v.length ()) {}
> >> >> >>
> >> >> >> +  template
> >> >> >> +  array_slice (vec )
> >> >> >> +: m_base (v.address ()), m_size (v.length ()) {}
> >> >> >> +
> >> >> >> +  template
> >> >> >> +  array_slice (const vec *v)
> >> >> >> +: m_base (v ? v->address () : nullptr), m_size (v ? v->length 
> >> >> >> () : 0) {}
> >> >> >> +
> >> >> >> +  template
> >> >> >> +  array_slice (vec *v)
> >> >> >> +: m_base (v ? v->address () : nullptr), m_size (v ? v->length 
> >> >> >> () : 0) {}
> >> >> >> +
> >> >> >
> >> >> > I don?t quite understand why the generic ctor doesn?t cover the GC 
> >> >> > case.  It looks more like reference vs pointer?
> >> >> >
> >> >> 
> >> >> If you think that this should work:
> >> >> 
> >> >>   vec *heh = cfun->local_decls;
> >> >>   array_slice arr_slice (*heh);
> >> >> 
> >> >> then it does not:
> >> >> 
> >> >>   /home/mjambor/gcc/mine/src/gcc/ipa-cp.cc:6693:36: error: no matching 
> >> >> function for call to 
> >> >> ?array_slice::array_slice(vec&)?
> >> >>6693 |   array_slice arr_slice (*heh);
> >> >> |^
> >> >>   In file included from /home/mjambor/gcc/mine/src/gcc/hash-table.h:248,
> >> >>from /home/mjambor/gcc/mine/src/gcc/coretypes.h:486,
> >> >>from /home/mjambor/gcc/mine/src/gcc/ipa-cp.cc:105:
> >> >>   /home/mjambor/gcc/mine/src/gcc/vec.h:2264:3: note: candidate: 
> >> >> ?template array_slice::array_slice(const vec&) 
> >> >> [with T = tree_node*]?
> >> >>2264 |   array_slice (const vec )
> >> >> |   ^~~
> >> >>   /home/mjambor/gcc/mine/src/gcc/vec.h:2264:3: note:   template 
> >> >> argument deduction/substitution failed:
> >> >>   /home/mjambor/gcc/mine/src/gcc/ipa-cp.cc:6693:36: note:   mismatched 
> >> >> types ?va_heap? and ?va_gc?
> >> >>6693 |   array_slice arr_slice (*heh);
> >> >> |^
> >> >> 
> >> >>   [... I trimmed notes about all other candidates...]
> >> >> 
> >> >> Or did you mean something else?
> >> >
> >> > Hmm, so what if you change
> >> >
> >> >   template
> >> >   array_slice (const vec )
> >> > : m_base (v.address ()), m_size (v.length ()) {}
> >> >
> >> > to
> >> >
> >> >   template
> >> >   array_slice (const vec )
> >> > : m_base (v.address ()), m_size (v.length ()) {}
> >> >
> >> > instead?  Thus allow any allocation / placement template arg?
> >> >
> >> 
> >> So being fully awake helps, the issue was of course in how I tested the
> >> code, the above works fine and I can adapt my code to use that.
> >> 
> >> However, is it really preferable?
> >> 
> >> We often use NULL as to mean zero-length vector, which my code handled
> >> gracefully:
> >> 
> >> +  template
> >> +  array_slice (const vec *v)
> >> +: m_base (v ? v->address () : nullptr), m_size (v ? v->length () : 0) 
> >> {}
> >> 
> >> whereas using the generic method will mean that users constructing the
> >> vector will have to special case it - and I bet most will end up using
> >> the above sequence and the constructor from explicit pointer and size in
> >> all constructors from gc vectors.
> >> 
> >> So, should I really change the patch and my code?
> >
> > Well, it's also inconsistent with a supposed use like
> 

Re: [PATCH] Make uninit PHI processing more consistent

2022-08-29 Thread Richard Biener via Gcc-patches
On Mon, 29 Aug 2022, Richard Biener wrote:

[...]

> The patch correctly diagnoses an uninitalized use of 'regnum'
> in store_bit_field_1 but also diagnoses an uninitialized use of
> best_match::m_best_candidate_len which I've chosen to silence
> without analyzing it in detail (I'm doing that right now).

To followup myself this is

  cpp_hashnode *best_macro = bmm.get_best_meaningful_candidate ();
  /* If a macro is the closest so far to NAME, use it, creating an 
 identifier tree node for it.  */
  if (best_macro)
{
  const char *id = (const char *)best_macro->ident.str;
  tree macro_as_identifier
= get_identifier_with_length (id, best_macro->ident.len);
  bm.set_best_so_far (macro_as_identifier,
  bmm.get_best_distance (),
  bmm.get_best_candidate_length ());

and

  edit_distance_t get_cutoff (size_t candidate_len) const
  {
return ::get_edit_distance_cutoff (m_goal_len, candidate_len);
  }
   
  candidate_t get_best_meaningful_candidate () const
  {
/* If the edit distance is too high, the suggestion is likely to be
   meaningless.  */
if (m_best_candidate)
  {
edit_distance_t cutoff = get_cutoff (m_best_candidate_len);
if (m_best_distance > cutoff)
  return NULL;
}

where the connection between m_best_candidate_len being initialized
when m_best_candidate is not NULL is not visible.  I think it's
OK to initialize the member together with m_best_candidate here.

I'm reducing a testcase, but not sure where that will go.

Richard.


[PATCH] Make uninit PHI processing more consistent

2022-08-29 Thread Richard Biener via Gcc-patches
Currently the main working of the maybe-uninit pass is to scan over
all PHIs with possibly undefined arguments, diagnosing whether there's
a direct not guarded use.  For not guarded uses in PHIs those are queued for
later processing and to make the uninit analysis PHI def handling work,
mark the PHI def as possibly uninitialized.  But this happens only
for those PHI uses that happen to be seen before a direct not guarded
use and whether all arguments of a PHI node which are defined by a PHI
are properly marked as maybe uninitialized depends on the processing
order.

The following changes the uninit pass to perform an RPO walk over
the function, ensuring that PHI argument defs are visited before
the PHI node (besides backedge uses which we ignore already),
getting rid of the worklist.  It also makes sure to process all
PHI uses, but recording those that are properly guarded so they
are not treated as maybe undefined when processing the PHI use
later.

Overall this should make behavior more consistent, avoid some
false negative because of the previous early out and order issue,
and avoid some false positive because of the missed recording
of guarded PHI uses.

The patch correctly diagnoses an uninitalized use of 'regnum'
in store_bit_field_1 but also diagnoses an uninitialized use of
best_match::m_best_candidate_len which I've chosen to silence
without analyzing it in detail (I'm doing that right now).

Bootstrapped and tested on x86_64-unknown-linux-gnu.

* gimple-predicate-analysis.h (uninit_analysis::operator()):
Remove.
* gimple-predicate-analysis.cc
(uninit_analysis::collect_phi_def_edges): Use phi_arg_set,
simplify a bit.
* tree-ssa-uninit.cc (defined_args): New global.
(compute_uninit_opnds_pos): Mask with the recorded set
of guarded maybe-uninitialized uses.
(uninit_undef_val_t::operator()): Remove.
(find_uninit_use): Process all PHI uses, recording the
guarded ones and marking the PHI result as uninitialized
consistently.
(warn_uninitialized_phi): Adjust.
(execute_late_warn_uninitialized): Get rid of the PHI worklist
and instead walk the function in RPO order.

* expmed.cc (store_bit_field_1): Initialize regnum.
* spellcheck.h (best_match::m_best_candidate_len): Initialize.
---
 gcc/expmed.cc|   2 +-
 gcc/gimple-predicate-analysis.cc |  49 +-
 gcc/gimple-predicate-analysis.h  |   2 -
 gcc/spellcheck.h |   3 +-
 gcc/tree-ssa-uninit.cc   | 149 +--
 5 files changed, 83 insertions(+), 122 deletions(-)

diff --git a/gcc/expmed.cc b/gcc/expmed.cc
index 8d7418be418..cdc0adb3892 100644
--- a/gcc/expmed.cc
+++ b/gcc/expmed.cc
@@ -794,7 +794,7 @@ store_bit_field_1 (rtx str_rtx, poly_uint64 bitsize, 
poly_uint64 bitnum,
 words or to cope with mode punning between equal-sized modes.
 In the latter case, use subreg on the rhs side, not lhs.  */
   rtx sub;
-  HOST_WIDE_INT regnum;
+  HOST_WIDE_INT regnum = 0;
   poly_uint64 regsize = REGMODE_NATURAL_SIZE (GET_MODE (op0));
   if (known_eq (bitnum, 0U)
  && known_eq (bitsize, GET_MODE_BITSIZE (GET_MODE (op0
diff --git a/gcc/gimple-predicate-analysis.cc b/gcc/gimple-predicate-analysis.cc
index e388bb37685..3f9b80d9128 100644
--- a/gcc/gimple-predicate-analysis.cc
+++ b/gcc/gimple-predicate-analysis.cc
@@ -543,35 +543,13 @@ uninit_analysis::collect_phi_def_edges (gphi *phi, 
basic_block cd_root,
 return;
 
   unsigned n = gimple_phi_num_args (phi);
+  unsigned opnds_arg_phi = m_eval.phi_arg_set (phi);
   for (unsigned i = 0; i < n; i++)
 {
-  edge opnd_edge = gimple_phi_arg_edge (phi, i);
-  tree opnd = gimple_phi_arg_def (phi, i);
-
-  if (TREE_CODE (opnd) == SSA_NAME)
-   {
- gimple *def = SSA_NAME_DEF_STMT (opnd);
-
- if (!m_eval (opnd))
-   {
- if (dump_file && (dump_flags & TDF_DETAILS))
-   {
- fprintf (dump_file,
-  "\tFound def edge %i -> %i for cd_root %i "
-  "and operand %u of: ",
-  opnd_edge->src->index, opnd_edge->dest->index,
-  cd_root->index, i);
- print_gimple_stmt (dump_file, phi, 0);
-   }
- edges->safe_push (opnd_edge);
-   }
- else if (gimple_code (def) == GIMPLE_PHI
-  && dominated_by_p (CDI_DOMINATORS, gimple_bb (def), cd_root))
-   collect_phi_def_edges (as_a (def), cd_root, edges,
-  visited);
-   }
-  else
+  if (!MASK_TEST_BIT (opnds_arg_phi, i))
{
+ /* Add the edge for a not maybe-undefined edge value.  */
+ edge opnd_edge = gimple_phi_arg_edge (phi, i);
  if (dump_file && (dump_flags & TDF_DETAILS))
{
  fprintf 

Re: [PATCH 1/2] vec: Add array_slice constructors from non-const and gc vectors

2022-08-29 Thread Martin Jambor
Hi,

On Mon, Aug 29 2022, Richard Biener wrote:
> On Mon, 29 Aug 2022, Martin Jambor wrote:
>
>> Hi again,
>> 
>> On Mon, Aug 29 2022, Richard Biener wrote:
>> > On Fri, 26 Aug 2022, Martin Jambor wrote:
>> >
>> >> Hi,
>> >> 
>> >> On Fri, Aug 26 2022, Richard Biener wrote:
>> >> >> Am 26.08.2022 um 18:39 schrieb Martin Jambor :
>> >> >>
>> >> >> Hi,
>> >> >>
>> >> >> This patch adds constructors of array_slice that are required to
>> >> >> create them from non-const (heap or auto) vectors or from GC vectors.
>> >> >>
>> >> >> The use of non-const array_slices is somewhat limited, as creating one
>> >> >> from const vec still leads to array_slice,
>> >> >> so I eventually also only resorted to having read-only array_slices.
>> >> >> But I do need the constructor from the gc vector.
>> >> >>
>> >> >> Bootstrapped and tested along code that actually uses it on
>> >> >> x86_64-linux.  OK for trunk?
>> >> >>
>> >> >> Thanks,
>> >> >>
>> >> >> Martin
>> >> >>
>> >> >>
>> >> >> gcc/ChangeLog:
>> >> >>
>> >> >> 2022-08-08  Martin Jambor  
>> >> >>
>> >> >>* vec.h (array_slice): Add constructors for non-const reference to
>> >> >>heap vector and pointers to heap vectors.
>> >> >> ---
>> >> >> gcc/vec.h | 12 
>> >> >> 1 file changed, 12 insertions(+)
>> >> >>
>> >> >> diff --git a/gcc/vec.h b/gcc/vec.h
>> >> >> index eed075addc9..b0477e1044c 100644
>> >> >> --- a/gcc/vec.h
>> >> >> +++ b/gcc/vec.h
>> >> >> @@ -2264,6 +2264,18 @@ public:
>> >> >>   array_slice (const vec )
>> >> >> : m_base (v.address ()), m_size (v.length ()) {}
>> >> >>
>> >> >> +  template
>> >> >> +  array_slice (vec )
>> >> >> +: m_base (v.address ()), m_size (v.length ()) {}
>> >> >> +
>> >> >> +  template
>> >> >> +  array_slice (const vec *v)
>> >> >> +: m_base (v ? v->address () : nullptr), m_size (v ? v->length () 
>> >> >> : 0) {}
>> >> >> +
>> >> >> +  template
>> >> >> +  array_slice (vec *v)
>> >> >> +: m_base (v ? v->address () : nullptr), m_size (v ? v->length () 
>> >> >> : 0) {}
>> >> >> +
>> >> >
>> >> > I don?t quite understand why the generic ctor doesn?t cover the GC 
>> >> > case.  It looks more like reference vs pointer?
>> >> >
>> >> 
>> >> If you think that this should work:
>> >> 
>> >>   vec *heh = cfun->local_decls;
>> >>   array_slice arr_slice (*heh);
>> >> 
>> >> then it does not:
>> >> 
>> >>   /home/mjambor/gcc/mine/src/gcc/ipa-cp.cc:6693:36: error: no matching 
>> >> function for call to 
>> >> ?array_slice::array_slice(vec&)?
>> >>6693 |   array_slice arr_slice (*heh);
>> >> |^
>> >>   In file included from /home/mjambor/gcc/mine/src/gcc/hash-table.h:248,
>> >>from /home/mjambor/gcc/mine/src/gcc/coretypes.h:486,
>> >>from /home/mjambor/gcc/mine/src/gcc/ipa-cp.cc:105:
>> >>   /home/mjambor/gcc/mine/src/gcc/vec.h:2264:3: note: candidate: 
>> >> ?template array_slice::array_slice(const vec&) 
>> >> [with T = tree_node*]?
>> >>2264 |   array_slice (const vec )
>> >> |   ^~~
>> >>   /home/mjambor/gcc/mine/src/gcc/vec.h:2264:3: note:   template argument 
>> >> deduction/substitution failed:
>> >>   /home/mjambor/gcc/mine/src/gcc/ipa-cp.cc:6693:36: note:   mismatched 
>> >> types ?va_heap? and ?va_gc?
>> >>6693 |   array_slice arr_slice (*heh);
>> >> |^
>> >> 
>> >>   [... I trimmed notes about all other candidates...]
>> >> 
>> >> Or did you mean something else?
>> >
>> > Hmm, so what if you change
>> >
>> >   template
>> >   array_slice (const vec )
>> > : m_base (v.address ()), m_size (v.length ()) {}
>> >
>> > to
>> >
>> >   template
>> >   array_slice (const vec )
>> > : m_base (v.address ()), m_size (v.length ()) {}
>> >
>> > instead?  Thus allow any allocation / placement template arg?
>> >
>> 
>> So being fully awake helps, the issue was of course in how I tested the
>> code, the above works fine and I can adapt my code to use that.
>> 
>> However, is it really preferable?
>> 
>> We often use NULL as to mean zero-length vector, which my code handled
>> gracefully:
>> 
>> +  template
>> +  array_slice (const vec *v)
>> +: m_base (v ? v->address () : nullptr), m_size (v ? v->length () : 0) {}
>> 
>> whereas using the generic method will mean that users constructing the
>> vector will have to special case it - and I bet most will end up using
>> the above sequence and the constructor from explicit pointer and size in
>> all constructors from gc vectors.
>> 
>> So, should I really change the patch and my code?
>
> Well, it's also inconsistent with a supposed use like
>
>   vec *v = NULL;
>   auto slice = array_slice (v);
>
> no?  So, if we want to provide a "safe" (as in, handle NULL pointer)
> CTOR, don't we want to handle non-GC allocated vectors the same way?
>

Our safe_* functions usually do no work with normal non-GC vectors
(which are not vl_embed), they do not accept them.  I guess that is
because that is 

Re: [PATCH 1/2] vec: Add array_slice constructors from non-const and gc vectors

2022-08-29 Thread Richard Biener via Gcc-patches
On Mon, 29 Aug 2022, Martin Jambor wrote:

> Hi again,
> 
> On Mon, Aug 29 2022, Richard Biener wrote:
> > On Fri, 26 Aug 2022, Martin Jambor wrote:
> >
> >> Hi,
> >> 
> >> On Fri, Aug 26 2022, Richard Biener wrote:
> >> >> Am 26.08.2022 um 18:39 schrieb Martin Jambor :
> >> >>
> >> >> Hi,
> >> >>
> >> >> This patch adds constructors of array_slice that are required to
> >> >> create them from non-const (heap or auto) vectors or from GC vectors.
> >> >>
> >> >> The use of non-const array_slices is somewhat limited, as creating one
> >> >> from const vec still leads to array_slice,
> >> >> so I eventually also only resorted to having read-only array_slices.
> >> >> But I do need the constructor from the gc vector.
> >> >>
> >> >> Bootstrapped and tested along code that actually uses it on
> >> >> x86_64-linux.  OK for trunk?
> >> >>
> >> >> Thanks,
> >> >>
> >> >> Martin
> >> >>
> >> >>
> >> >> gcc/ChangeLog:
> >> >>
> >> >> 2022-08-08  Martin Jambor  
> >> >>
> >> >>* vec.h (array_slice): Add constructors for non-const reference to
> >> >>heap vector and pointers to heap vectors.
> >> >> ---
> >> >> gcc/vec.h | 12 
> >> >> 1 file changed, 12 insertions(+)
> >> >>
> >> >> diff --git a/gcc/vec.h b/gcc/vec.h
> >> >> index eed075addc9..b0477e1044c 100644
> >> >> --- a/gcc/vec.h
> >> >> +++ b/gcc/vec.h
> >> >> @@ -2264,6 +2264,18 @@ public:
> >> >>   array_slice (const vec )
> >> >> : m_base (v.address ()), m_size (v.length ()) {}
> >> >>
> >> >> +  template
> >> >> +  array_slice (vec )
> >> >> +: m_base (v.address ()), m_size (v.length ()) {}
> >> >> +
> >> >> +  template
> >> >> +  array_slice (const vec *v)
> >> >> +: m_base (v ? v->address () : nullptr), m_size (v ? v->length () : 
> >> >> 0) {}
> >> >> +
> >> >> +  template
> >> >> +  array_slice (vec *v)
> >> >> +: m_base (v ? v->address () : nullptr), m_size (v ? v->length () : 
> >> >> 0) {}
> >> >> +
> >> >
> >> > I don?t quite understand why the generic ctor doesn?t cover the GC case. 
> >> >  It looks more like reference vs pointer?
> >> >
> >> 
> >> If you think that this should work:
> >> 
> >>   vec *heh = cfun->local_decls;
> >>   array_slice arr_slice (*heh);
> >> 
> >> then it does not:
> >> 
> >>   /home/mjambor/gcc/mine/src/gcc/ipa-cp.cc:6693:36: error: no matching 
> >> function for call to ?array_slice::array_slice(vec >> va_gc>&)?
> >>6693 |   array_slice arr_slice (*heh);
> >> |^
> >>   In file included from /home/mjambor/gcc/mine/src/gcc/hash-table.h:248,
> >>from /home/mjambor/gcc/mine/src/gcc/coretypes.h:486,
> >>from /home/mjambor/gcc/mine/src/gcc/ipa-cp.cc:105:
> >>   /home/mjambor/gcc/mine/src/gcc/vec.h:2264:3: note: candidate: 
> >> ?template array_slice::array_slice(const vec&) 
> >> [with T = tree_node*]?
> >>2264 |   array_slice (const vec )
> >> |   ^~~
> >>   /home/mjambor/gcc/mine/src/gcc/vec.h:2264:3: note:   template argument 
> >> deduction/substitution failed:
> >>   /home/mjambor/gcc/mine/src/gcc/ipa-cp.cc:6693:36: note:   mismatched 
> >> types ?va_heap? and ?va_gc?
> >>6693 |   array_slice arr_slice (*heh);
> >> |^
> >> 
> >>   [... I trimmed notes about all other candidates...]
> >> 
> >> Or did you mean something else?
> >
> > Hmm, so what if you change
> >
> >   template
> >   array_slice (const vec )
> > : m_base (v.address ()), m_size (v.length ()) {}
> >
> > to
> >
> >   template
> >   array_slice (const vec )
> > : m_base (v.address ()), m_size (v.length ()) {}
> >
> > instead?  Thus allow any allocation / placement template arg?
> >
> 
> So being fully awake helps, the issue was of course in how I tested the
> code, the above works fine and I can adapt my code to use that.
> 
> However, is it really preferable?
> 
> We often use NULL as to mean zero-length vector, which my code handled
> gracefully:
> 
> +  template
> +  array_slice (const vec *v)
> +: m_base (v ? v->address () : nullptr), m_size (v ? v->length () : 0) {}
> 
> whereas using the generic method will mean that users constructing the
> vector will have to special case it - and I bet most will end up using
> the above sequence and the constructor from explicit pointer and size in
> all constructors from gc vectors.
> 
> So, should I really change the patch and my code?

Well, it's also inconsistent with a supposed use like

  vec *v = NULL;
  auto slice = array_slice (v);

no?  So, if we want to provide a "safe" (as in, handle NULL pointer)
CTOR, don't we want to handle non-GC allocated vectors the same way?

Btw, we have

  template
  array_slice (T ()[N]) : m_base (array), m_size (N) {}

which would suggest handling NULL isn't desired(?)

Richard.


[Patch] libgomp.texi: Document libmemkind + nvptx/gcn specifics

2022-08-29 Thread Tobias Burnus

I had this patch lying around since about half a year. I did tweak and 
agumented it
a bit today, but finally want to get rid of it (locally - by getting it 
committed) ...

This patch changes -misa to -march for nvptx (the latter is now an alias
for the former), it adds a new section about libmemkind and some information
about interns of our nvptx/gcn implementation. (The latter should be mostly
correct, but I might have missed some fine print or a more recent update.)

OK for mainline?

Tobias


-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955
libgomp.texi: Document libmemkind + nvptx/gcn specifics

libgomp/ChangeLog:

	* libgomp.texi (OpenMP-Implementation Specifics): New; add libmemkind
	section; move OpenMP Context Selectors from ...
	(Offload-Target Specifics): ... here; add 'AMD Radeo (GCN)' and
	'nvptx' sections.

 libgomp/libgomp.texi | 132 ---
 1 file changed, 126 insertions(+), 6 deletions(-)

diff --git a/libgomp/libgomp.texi b/libgomp/libgomp.texi
index 6298de8254c..4c5903b55cc 100644
--- a/libgomp/libgomp.texi
+++ b/libgomp/libgomp.texi
@@ -113,6 +113,8 @@ changed to GNU Offloading and Multi Processing Runtime Library.
 * OpenACC Library Interoperability:: OpenACC library interoperability with the
NVIDIA CUBLAS library.
 * OpenACC Profiling Interface::
+* OpenMP-Implementation Specifics:: Notes specifics of this OpenMP
+   implementation
 * Offload-Target Specifics::   Notes on offload-target specific internals
 * The libgomp ABI::Notes on the external ABI presented by libgomp.
 * Reporting Bugs:: How to report bugs in the GNU Offloading and
@@ -4280,16 +4282,15 @@ offloading devices (it's not clear if they should be):
 @end itemize
 
 @c -
-@c Offload-Target Specifics
+@c OpenMP-Implementation Specifics
 @c -
 
-@node Offload-Target Specifics
-@chapter Offload-Target Specifics
-
-The following sections present notes on the offload-target specifics.
+@node OpenMP-Implementation Specifics:
+@chapter OpenMP-Implementation Specifics:
 
 @menu
 * OpenMP Context Selectors::
+* Memory allocation with libmemkind::
 @end menu
 
 @node OpenMP Context Selectors
@@ -4308,9 +4309,128 @@ The following sections present notes on the offload-target specifics.
   @tab See @code{-march=} in ``AMD GCN Options''
 @item @code{nvptx}
   @tab @code{gpu}
-  @tab See @code{-misa=} in ``Nvidia PTX Options''
+  @tab See @code{-march=} in ``Nvidia PTX Options''
 @end multitable
 
+@node Memory allocation with libmemkind
+@section Memory allocation with libmemkind
+
+On Linux systems, where the @uref{https://github.com/memkind/memkind, memkind
+library} (@code{libmemkind.so.0}) is available at runtime, it is used when
+creating memory allocators requesting
+
+@itemize
+@item the memory space @code{omp_high_bw_mem_space}
+@item the memory space @code{omp_large_cap_mem_space}
+@item the partition trait @code{omp_atv_interleaved}
+@end itemize
+
+
+@c -
+@c Offload-Target Specifics
+@c -
+
+@node Offload-Target Specifics
+@chapter Offload-Target Specifics
+
+The following sections present notes on the offload-target specifics
+
+@menu
+* AMD Radeon::
+* nvptx::
+@end menu
+
+@node AMD Radeon
+@section AMD Radeon (GCN)
+
+On the hardware side, there is the hierarchy (fine to coarse):
+@itemize
+@item work item (thread)
+@item wavefront
+@item work group
+@item compute unite (CU)
+@end itemize
+
+All OpenMP and OpenACC levels are used, i.e.
+@itemize
+@item OpenMP's simd and OpenACC's vector map to work items (thread)
+@item OpenMP's threads (``parallel'') and OpenACC's workers map
+  to wavefronts
+@item OpenMP's teams and OpenACC's gang use use a threadpool with the
+  size of the number of teams or gangs, respectively.
+@end itemize
+
+The used sizes are
+@itemize
+@item Number of teams is the specified @code{num_teams} (OpenMP) or
+  @code{num_gangs} (OpenACC) or otherwise the number of CU
+@item Number of wavefronts is 4 for gfx900 and 16 otherwise;
+  @code{num_threads} (OpenMP) and @code{num_workers} (OpenACC)
+  overrides this if smaller.
+@item The wavefront has 102 scalars and 64 vectors
+@item Number of workitems is always 64
+@item The hardware permits maximally 40 workgroups/CU and
+  16 wavefronts/workgroup up to a limit of 40 wavefronts in total per CU.
+@item 80 scalars registers and 24 vector registers in non-kernel functions
+  (the chosen procedure-calling 

Re: [PATCH]middle-end Use subregs to expand COMPLEX_EXPR to set the lowpart.

2022-08-29 Thread Richard Biener via Gcc-patches
On Tue, 5 Jul 2022, Richard Sandiford wrote:

> Tamar Christina  writes:
> >> > so that the multiple_p test is skipped if the structure is undefined.
> >> 
> >> Actually, we should probably skip the constant_multiple_p test as well.
> >> Keeping it would only be meaningful for little-endian.
> >> 
> >> simplify_gen_subreg should alread do the necessary checks to make sure
> >> that the subreg can be formed (via validate_subreg).
> >> 
> >
> > Bootstrapped Regtested on aarch64-none-linux-gnu, x86_64-pc-linux-gnu
> > and no issues.
> >
> > Ok for master?
> >
> > Thanks,
> > Tamar
> >
> > gcc/ChangeLog:
> >
> > * expmed.cc (store_bit_field):
> > * expmed.cc (store_bit_field_1): Add parameter that indicates if 
> > value is
> > still undefined and if so emit a subreg move instead.
> > (store_integral_bit_field): Likewise.
> > (store_bit_field): Likewise.
> > * expr.h (write_complex_part): Likewise.
> > * expmed.h (store_bit_field): Add new parameter.
> > * builtins.cc (expand_ifn_atomic_compare_exchange_into_call): Use new
> > parameter.
> > (expand_ifn_atomic_compare_exchange): Likewise.
> > * calls.cc (store_unaligned_arguments_into_pseudos): Likewise.
> > * emit-rtl.cc (validate_subreg): Likewise.
> > * expr.cc (emit_group_store): Likewise.
> > (copy_blkmode_from_reg): Likewise.
> > (copy_blkmode_to_reg): Likewise.
> > (clear_storage_hints): Likewise.
> > (write_complex_part):  Likewise.
> > (emit_move_complex_parts): Likewise.
> > (expand_assignment): Likewise.
> > (store_expr): Likewise.
> > (store_field): Likewise.
> > (expand_expr_real_2): Likewise.
> > * ifcvt.cc (noce_emit_move_insn): Likewise.
> > * internal-fn.cc (expand_arith_set_overflow): Likewise.
> > (expand_arith_overflow_result_store): Likewise.
> > (expand_addsub_overflow): Likewise.
> > (expand_neg_overflow): Likewise.
> > (expand_mul_overflow): Likewise.
> > (expand_arith_overflow): Likewise.
> >
> > gcc/testsuite/ChangeLog:
> >
> > * g++.target/aarch64/complex-init.C: New test.
> >
> > --- inline copy of patch ---
> >
> > [?]
> > diff --git a/gcc/testsuite/g++.target/aarch64/complex-init.C 
> > b/gcc/testsuite/g++.target/aarch64/complex-init.C
> > new file mode 100644
> > index 
> > ..d3fd3e88d04a87bacf1c4ee74ce25282c6ff81e8
> > --- /dev/null
> > +++ b/gcc/testsuite/g++.target/aarch64/complex-init.C
> > @@ -0,0 +1,40 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O2" } */
> > +/* { dg-final { check-function-bodies "**" "" "" { target { le } } } } */
> > +
> > +/*
> > +** _Z1fii:
> > +** ...
> > +** bfi x0, x1, 32, 32
> > +** ret
> > +** ...
> 
> Sorry for the test nit, but: it shouldn't be necessary to add ...
> after the ret.  Same for the other tests.
> 
> OK with that change, thanks.

This has

  HOST_WIDE_INT regnum;
...
  else if (((constant_multiple_p (bitnum, regsize * BITS_PER_UNIT, 
)
 && multiple_p (bitsize, regsize * BITS_PER_UNIT))
|| undefined_p)
   && known_ge (GET_MODE_BITSIZE (GET_MODE (op0)), bitsize))
{
  sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
 regnum * regsize);

where regnum is used uninitialized when undefined_p.  An uninit
improvement of mine will diagnose this now.

What's the intended behavior here?  Use regnum = 0?

Please fix.

Thanks,
Richard.

> Richard
> 
> > +*/
> > +_Complex int f(int a, int b) {
> > +_Complex int t = a + b * 1i;
> > +return t;
> > +}
> > +
> > +/*
> > +** _Z2f2ii:
> > +** ...
> > +** bfi x0, x1, 32, 32
> > +** ret
> > +** ...
> > +*/
> > +_Complex int f2(int a, int b) {
> > +_Complex int t = {a, b};
> > +return t;
> > +}
> > +
> > +/* 
> > +** _Z12f_convolutedii:
> > +** ...
> > +** bfi x0, x1, 32, 32
> > +** ret
> > +** ...
> > +*/
> > +_Complex int f_convoluted(int a, int b) {
> > +_Complex int t = (_Complex int)a;
> > +__imag__ t = b;
> > +return t;
> > +}
> 

-- 
Richard Biener 
SUSE Software Solutions Germany GmbH, Frankenstrasse 146, 90461 Nuernberg,
Germany; GF: Ivo Totev, Andrew Myers, Andrew McDonald, Boudien Moerman;
HRB 36809 (AG Nuernberg)


Re: [PATCH 1/2] vec: Add array_slice constructors from non-const and gc vectors

2022-08-29 Thread Martin Jambor
Hi again,

On Mon, Aug 29 2022, Richard Biener wrote:
> On Fri, 26 Aug 2022, Martin Jambor wrote:
>
>> Hi,
>> 
>> On Fri, Aug 26 2022, Richard Biener wrote:
>> >> Am 26.08.2022 um 18:39 schrieb Martin Jambor :
>> >>
>> >> Hi,
>> >>
>> >> This patch adds constructors of array_slice that are required to
>> >> create them from non-const (heap or auto) vectors or from GC vectors.
>> >>
>> >> The use of non-const array_slices is somewhat limited, as creating one
>> >> from const vec still leads to array_slice,
>> >> so I eventually also only resorted to having read-only array_slices.
>> >> But I do need the constructor from the gc vector.
>> >>
>> >> Bootstrapped and tested along code that actually uses it on
>> >> x86_64-linux.  OK for trunk?
>> >>
>> >> Thanks,
>> >>
>> >> Martin
>> >>
>> >>
>> >> gcc/ChangeLog:
>> >>
>> >> 2022-08-08  Martin Jambor  
>> >>
>> >>* vec.h (array_slice): Add constructors for non-const reference to
>> >>heap vector and pointers to heap vectors.
>> >> ---
>> >> gcc/vec.h | 12 
>> >> 1 file changed, 12 insertions(+)
>> >>
>> >> diff --git a/gcc/vec.h b/gcc/vec.h
>> >> index eed075addc9..b0477e1044c 100644
>> >> --- a/gcc/vec.h
>> >> +++ b/gcc/vec.h
>> >> @@ -2264,6 +2264,18 @@ public:
>> >>   array_slice (const vec )
>> >> : m_base (v.address ()), m_size (v.length ()) {}
>> >>
>> >> +  template
>> >> +  array_slice (vec )
>> >> +: m_base (v.address ()), m_size (v.length ()) {}
>> >> +
>> >> +  template
>> >> +  array_slice (const vec *v)
>> >> +: m_base (v ? v->address () : nullptr), m_size (v ? v->length () : 
>> >> 0) {}
>> >> +
>> >> +  template
>> >> +  array_slice (vec *v)
>> >> +: m_base (v ? v->address () : nullptr), m_size (v ? v->length () : 
>> >> 0) {}
>> >> +
>> >
>> > I don?t quite understand why the generic ctor doesn?t cover the GC case.  
>> > It looks more like reference vs pointer?
>> >
>> 
>> If you think that this should work:
>> 
>>   vec *heh = cfun->local_decls;
>>   array_slice arr_slice (*heh);
>> 
>> then it does not:
>> 
>>   /home/mjambor/gcc/mine/src/gcc/ipa-cp.cc:6693:36: error: no matching 
>> function for call to ?array_slice::array_slice(vec> va_gc>&)?
>>6693 |   array_slice arr_slice (*heh);
>> |^
>>   In file included from /home/mjambor/gcc/mine/src/gcc/hash-table.h:248,
>>from /home/mjambor/gcc/mine/src/gcc/coretypes.h:486,
>>from /home/mjambor/gcc/mine/src/gcc/ipa-cp.cc:105:
>>   /home/mjambor/gcc/mine/src/gcc/vec.h:2264:3: note: candidate: 
>> ?template array_slice::array_slice(const vec&) 
>> [with T = tree_node*]?
>>2264 |   array_slice (const vec )
>> |   ^~~
>>   /home/mjambor/gcc/mine/src/gcc/vec.h:2264:3: note:   template argument 
>> deduction/substitution failed:
>>   /home/mjambor/gcc/mine/src/gcc/ipa-cp.cc:6693:36: note:   mismatched types 
>> ?va_heap? and ?va_gc?
>>6693 |   array_slice arr_slice (*heh);
>> |^
>> 
>>   [... I trimmed notes about all other candidates...]
>> 
>> Or did you mean something else?
>
> Hmm, so what if you change
>
>   template
>   array_slice (const vec )
> : m_base (v.address ()), m_size (v.length ()) {}
>
> to
>
>   template
>   array_slice (const vec )
> : m_base (v.address ()), m_size (v.length ()) {}
>
> instead?  Thus allow any allocation / placement template arg?
>

So being fully awake helps, the issue was of course in how I tested the
code, the above works fine and I can adapt my code to use that.

However, is it really preferable?

We often use NULL as to mean zero-length vector, which my code handled
gracefully:

+  template
+  array_slice (const vec *v)
+: m_base (v ? v->address () : nullptr), m_size (v ? v->length () : 0) {}

whereas using the generic method will mean that users constructing the
vector will have to special case it - and I bet most will end up using
the above sequence and the constructor from explicit pointer and size in
all constructors from gc vectors.

So, should I really change the patch and my code?

Thanks,

Martin


[PATCH] gcc: honour -ffile-prefix-map in ASM_MAP [PR93371]

2022-08-29 Thread Rasmus Villemoes
-ffile-prefix-map is supposed to be a superset of -fmacro-prefix-map
and -fdebug-prefix-map. However, when building .S or .s files, gas is
not called with the appropriate --debug-prefix-map option when
-ffile-prefix-map is used.

While the user can specify -fdebug-prefix-map when building assembly
files via gcc, it's more ergonomic to also support -ffile-prefix-map;
especially since for .S files that could contain the __FILE__ macro,
one would then also have to specify -fmacro-prefix-map.

gcc:
PR driver/93371
* gcc.cc (ASM_MAP): Honour -ffile-prefix-map.
---

I've tested that this works as expected, both by looking at how gas is
now invoked, and by running 'strings' on the generated .o file. But I
don't know how to add something to the testsuite for this.

I stumbled on this since it came up on the U-Boot mailing list:
https://lore.kernel.org/u-boot/4ed9f811-5244-54ef-b58e-83dba5151...@prevas.dk/
.

 gcc/gcc.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/gcc.cc b/gcc/gcc.cc
index b6d562a92f0..44eafc60187 100644
--- a/gcc/gcc.cc
+++ b/gcc/gcc.cc
@@ -878,7 +878,7 @@ proper position among the other output files.  */
 #endif
 
 #ifdef HAVE_AS_DEBUG_PREFIX_MAP
-#define ASM_MAP " %{fdebug-prefix-map=*:--debug-prefix-map %*}"
+#define ASM_MAP " %{ffile-prefix-map=*:--debug-prefix-map %*} 
%{fdebug-prefix-map=*:--debug-prefix-map %*}"
 #else
 #define ASM_MAP ""
 #endif
-- 
2.37.2



[Patch] OpenMP: Document ompx warnings + add Fortran omx warning [PR106670]

2022-08-29 Thread Tobias Burnus

(Patch + RFC.)

OpenMP 5.2 has 'ompx' and (for fixed source form Fortran) 'omx'
as sentinel to provide a defined namespace for vendor extensions.

The behavior when encountering an unknown directive with ompx/omp sentinel
(or an unknown clause with ompx_ prefix) is implementation defined. For unknown
clauses there will be always an error in GCC.
For sentinels, GCC compiles the code and ignores the directive.

From the user perspective, either silently ignoring the directive or

showing a warning or showing an erroring is best - depending on the semantic
of that vendor extension. As the semantic is not known, providing a means
to warn makes most sense.

For warning, we can either show a
- specific message for ompx/omx - or a
- generic message
And we can either use some existing generic flag (unknown pragmas,
attributes, suprising behavior etc.), depending on language and source
language (C, C++, Fortran fixed/free)- or add & use a specific -W flag.

The attached patch just documents the existing warning behavior, which
is quite diverse + adds testcases for them. As fixed-form Fortran had no
warning, a new warning with a specific message was added.
(Cf. testcases in the patch for what's shown as message.)

OK for mainline - or other ideas how to handle this topic best?

Tobias


-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955
OpenMP: Document ompx warnings + add Fortran omx warning [PR106670]

omp/ompx sentinels are for vendor extensions; as they might be required for
the correctness of the program, a warning should be printable. This patch
documents in the OpenMP 5.2 table the existing warnings, including the new
warning for for fixed source form Fortran.

	PR fortran/106670

gcc/fortran/ChangeLog:

	* scanner.cc (skip_fixed_omp_sentinel): Add -Wsurprising warning
	for 'omx' sentinels with -fopenmp.
	* invoke.texi (-Wsurprising): Document additional warning case.

libgomp/ChangeLog:

	* libgomp.texi (OpenMP 5.2): Add comment to ompx/omx entry.

gcc/testsuite/ChangeLog:

	* c-c++-common/gomp/ompx-1.c: New test.
	* c-c++-common/gomp/ompx-2.c: New test.
	* g++.dg/gomp/ompx-attrs-1.C: New test.
	* gfortran.dg/gomp/ompx-1.f90: New test.
	* gfortran.dg/gomp/omx-1.f: New test.
	* gfortran.dg/gomp/omx-2.f: New test.

 gcc/fortran/invoke.texi   | 5 +
 gcc/fortran/scanner.cc| 8 ++--
 gcc/testsuite/c-c++-common/gomp/ompx-1.c  | 4 
 gcc/testsuite/c-c++-common/gomp/ompx-2.c  | 5 +
 gcc/testsuite/g++.dg/gomp/ompx-attrs-1.C  | 7 +++
 gcc/testsuite/gfortran.dg/gomp/ompx-1.f90 | 2 ++
 gcc/testsuite/gfortran.dg/gomp/omx-1.f| 7 +++
 gcc/testsuite/gfortran.dg/gomp/omx-2.f| 9 +
 libgomp/libgomp.texi  | 8 +++-
 9 files changed, 52 insertions(+), 3 deletions(-)

diff --git a/gcc/fortran/invoke.texi b/gcc/fortran/invoke.texi
index 4d1e0d6b513..58502d38ac8 100644
--- a/gcc/fortran/invoke.texi
+++ b/gcc/fortran/invoke.texi
@@ -1092,6 +1092,11 @@ The type of a function result is declared more than once with the same type.  If
 
 @item
 A @code{CHARACTER} variable is declared with negative length.
+
+@item
+With @option{-fopenmp}, for fixed-form source code, when an @code{omx}
+vendor-extension sentinel is encountered. (The equivalent @code{ompx},
+used in free-form source code, is diagnosed by default.)
 @end itemize
 
 @item -Wtabs
diff --git a/gcc/fortran/scanner.cc b/gcc/fortran/scanner.cc
index 2dff2514700..fa1d9cba394 100644
--- a/gcc/fortran/scanner.cc
+++ b/gcc/fortran/scanner.cc
@@ -982,8 +982,9 @@ static bool
 skip_fixed_omp_sentinel (locus *start)
 {
   gfc_char_t c;
-  if (((c = next_char ()) == 'm' || c == 'M')
-  && ((c = next_char ()) == 'p' || c == 'P'))
+  if ((c = next_char ()) != 'm' && c != 'M')
+return false;
+  if ((c = next_char ()) == 'p' || c == 'P')
 {
   c = next_char ();
   if (c != '\n'
@@ -1005,6 +1006,9 @@ skip_fixed_omp_sentinel (locus *start)
 	}
 	}
 }
+  else if (UNLIKELY (c == 'x' || c == 'X'))
+gfc_warning_now (OPT_Wsurprising,
+		 "Ignoring '!$omx' vendor-extension sentinel at %C");
   return false;
 }
 
diff --git a/gcc/testsuite/c-c++-common/gomp/ompx-1.c b/gcc/testsuite/c-c++-common/gomp/ompx-1.c
new file mode 100644
index 000..9758d0f0cae
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/gomp/ompx-1.c
@@ -0,0 +1,4 @@
+void f(void)
+{
+  #pragma ompx some_vendor_extension
+}
diff --git a/gcc/testsuite/c-c++-common/gomp/ompx-2.c b/gcc/testsuite/c-c++-common/gomp/ompx-2.c
new file mode 100644
index 000..4b66b0e2b1c
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/gomp/ompx-2.c
@@ -0,0 +1,5 @@
+/* { dg-additional-options "-Wunknown-pragmas" } */
+void f(void)
+{
+  #pragma ompx some_vendor_extension  /* { dg-warning "-:ignoring '#pragma 

Re: [PATCH 1/2] vec: Add array_slice constructors from non-const and gc vectors

2022-08-29 Thread Martin Jambor
Hi,

On Mon, Aug 29 2022, Richard Biener wrote:
> On Fri, 26 Aug 2022, Martin Jambor wrote:
>
>> Hi,
>> 
>> On Fri, Aug 26 2022, Richard Biener wrote:
>> >> Am 26.08.2022 um 18:39 schrieb Martin Jambor :
>> >>
>> >> Hi,
>> >>
>> >> This patch adds constructors of array_slice that are required to
>> >> create them from non-const (heap or auto) vectors or from GC vectors.
>> >>
>> >> The use of non-const array_slices is somewhat limited, as creating one
>> >> from const vec still leads to array_slice,
>> >> so I eventually also only resorted to having read-only array_slices.
>> >> But I do need the constructor from the gc vector.
>> >>
>> >> Bootstrapped and tested along code that actually uses it on
>> >> x86_64-linux.  OK for trunk?
>> >>
>> >> Thanks,
>> >>
>> >> Martin
>> >>
>> >>
>> >> gcc/ChangeLog:
>> >>
>> >> 2022-08-08  Martin Jambor  
>> >>
>> >>* vec.h (array_slice): Add constructors for non-const reference to
>> >>heap vector and pointers to heap vectors.
>> >> ---
>> >> gcc/vec.h | 12 
>> >> 1 file changed, 12 insertions(+)
>> >>
>> >> diff --git a/gcc/vec.h b/gcc/vec.h
>> >> index eed075addc9..b0477e1044c 100644
>> >> --- a/gcc/vec.h
>> >> +++ b/gcc/vec.h
>> >> @@ -2264,6 +2264,18 @@ public:
>> >>   array_slice (const vec )
>> >> : m_base (v.address ()), m_size (v.length ()) {}
>> >>
>> >> +  template
>> >> +  array_slice (vec )
>> >> +: m_base (v.address ()), m_size (v.length ()) {}
>> >> +
>> >> +  template
>> >> +  array_slice (const vec *v)
>> >> +: m_base (v ? v->address () : nullptr), m_size (v ? v->length () : 
>> >> 0) {}
>> >> +
>> >> +  template
>> >> +  array_slice (vec *v)
>> >> +: m_base (v ? v->address () : nullptr), m_size (v ? v->length () : 
>> >> 0) {}
>> >> +
>> >
>> > I don?t quite understand why the generic ctor doesn?t cover the GC case.  
>> > It looks more like reference vs pointer?
>> >
>> 
>> If you think that this should work:
>> 
>>   vec *heh = cfun->local_decls;
>>   array_slice arr_slice (*heh);
>> 
>> then it does not:
>> 
>>   /home/mjambor/gcc/mine/src/gcc/ipa-cp.cc:6693:36: error: no matching 
>> function for call to ?array_slice::array_slice(vec> va_gc>&)?
>>6693 |   array_slice arr_slice (*heh);
>> |^
>>   In file included from /home/mjambor/gcc/mine/src/gcc/hash-table.h:248,
>>from /home/mjambor/gcc/mine/src/gcc/coretypes.h:486,
>>from /home/mjambor/gcc/mine/src/gcc/ipa-cp.cc:105:
>>   /home/mjambor/gcc/mine/src/gcc/vec.h:2264:3: note: candidate: 
>> ?template array_slice::array_slice(const vec&) 
>> [with T = tree_node*]?
>>2264 |   array_slice (const vec )
>> |   ^~~
>>   /home/mjambor/gcc/mine/src/gcc/vec.h:2264:3: note:   template argument 
>> deduction/substitution failed:
>>   /home/mjambor/gcc/mine/src/gcc/ipa-cp.cc:6693:36: note:   mismatched types 
>> ?va_heap? and ?va_gc?
>>6693 |   array_slice arr_slice (*heh);
>> |^
>> 
>>   [... I trimmed notes about all other candidates...]
>> 
>> Or did you mean something else?
>
> Hmm, so what if you change
>
>   template
>   array_slice (const vec )
> : m_base (v.address ()), m_size (v.length ()) {}
>
> to
>
>   template
>   array_slice (const vec )
> : m_base (v.address ()), m_size (v.length ()) {}
>
> instead?  Thus allow any allocation / placement template arg?

I tried this on Friday night too (but I was already only half awake) and
it led to some very weird self-test ICEs (and so I went to bed).

(I can try again but debugging such things is not quite what I wanted to
spend my time on :-)

Martin



[PATCH] libcpp: Add -Winvalid-utf8 warning [PR106655]

2022-08-29 Thread Jakub Jelinek via Gcc-patches
Hi!

The following patch introduces a new warning - -Winvalid-utf8 similarly
to what clang now has - to diagnose invalid UTF-8 byte sequences in
comments.  In identifiers and in string literals it should be diagnosed
already but comment content hasn't been really verified.

I'm not sure if this is enough to say P2295R6 is implemented or not.

The problem is that in the most common case, people don't use
-finput-charset= option and the sources often are UTF-8, but sometimes
could be some ASCII compatible single byte encoding where non-ASCII
characters only appear in comments.  So having the warning off by default
is IMO desirable.  Now, if people use explicit -finput-charset=UTF-8,
perhaps we could make the warning on by default for C++23 and use pedwarn
instead of warning, because then the user told us explicitly that the source
is UTF-8.  From the paper I understood one of the implementation options
is to claim that the implementation supports 2 encodings, UTF-8 and UTF-8
like encodings where invalid UTF-8 characters in comments are replaced say
by spaces, where the latter could be the default and the former only
used if -finput-charset=UTF-8 -Werror=invalid-utf8 options are used.

Thoughts on this?

2022-08-29  Jakub Jelinek  

PR c++/106655
libcpp/
* include/cpplib.h (struct cpp_options): Implement C++23
P2295R6 - Support for UTF-8 as a portable source file encoding.
Add cpp_warn_invalid_utf8 field.
(enum cpp_warning_reason): Add CPP_W_INVALID_UTF8 enumerator.
* init.cc (cpp_create_reader): Initialize cpp_warn_invalid_utf8.
* lex.cc (utf8_continuation): New const variable.
(utf8_signifier): Move earlier in the file.
(_cpp_warn_invalid_utf8): New function.
(_cpp_skip_block_comment): Handle -Winvalid-utf8 warning.
(skip_line_comment): Likewise.
gcc/
* doc/invoke.texi (-Winvalid-utf8): Document it.
gcc/c-family/
* c.opt (-Winvalid-utf8): New warning.
gcc/testsuite/
* c-c++-common/cpp/Winvalid-utf8-1.c: New test.

--- libcpp/include/cpplib.h.jj  2022-08-25 14:25:23.866912426 +0200
+++ libcpp/include/cpplib.h 2022-08-27 12:17:55.185022807 +0200
@@ -560,6 +560,9 @@ struct cpp_options
  cpp_bidirectional_level.  */
   unsigned char cpp_warn_bidirectional;
 
+  /* True if libcpp should warn about invalid UTF-8 characters in comments.  */
+  bool cpp_warn_invalid_utf8;
+
   /* Dependency generation.  */
   struct
   {
@@ -666,7 +669,8 @@ enum cpp_warning_reason {
   CPP_W_CXX11_COMPAT,
   CPP_W_CXX20_COMPAT,
   CPP_W_EXPANSION_TO_DEFINED,
-  CPP_W_BIDIRECTIONAL
+  CPP_W_BIDIRECTIONAL,
+  CPP_W_INVALID_UTF8
 };
 
 /* Callback for header lookup for HEADER, which is the name of a
--- libcpp/init.cc.jj   2022-08-24 09:55:44.571876638 +0200
+++ libcpp/init.cc  2022-08-27 12:18:54.559246323 +0200
@@ -227,6 +227,7 @@ cpp_create_reader (enum c_lang lang, cpp
   CPP_OPTION (pfile, ext_numeric_literals) = 1;
   CPP_OPTION (pfile, warn_date_time) = 0;
   CPP_OPTION (pfile, cpp_warn_bidirectional) = bidirectional_unpaired;
+  CPP_OPTION (pfile, cpp_warn_invalid_utf8) = 0;
 
   /* Default CPP arithmetic to something sensible for the host for the
  benefit of dumb users like fix-header.  */
--- libcpp/lex.cc.jj2022-08-26 09:24:12.089615949 +0200
+++ libcpp/lex.cc   2022-08-27 13:43:40.560769087 +0200
@@ -1704,6 +1704,59 @@ maybe_warn_bidi_on_char (cpp_reader *pfi
   bidi::on_char (kind, ucn_p, loc);
 }
 
+static const cppchar_t utf8_continuation = 0x80;
+static const cppchar_t utf8_signifier = 0xC0;
+
+/* Emit -Winvalid-utf8 warning on invalid UTF-8 character starting
+   at PFILE->buffer->cur.  Return a pointer after the diagnosed
+   invalid character.  */
+
+static const uchar *
+_cpp_warn_invalid_utf8 (cpp_reader *pfile)
+{
+  cpp_buffer *buffer = pfile->buffer;
+  const uchar *cur = buffer->cur;
+
+  if (cur[0] < utf8_signifier
+  || cur[1] < utf8_continuation || cur[1] >= utf8_signifier)
+{
+  cpp_warning_with_line (pfile, CPP_W_INVALID_UTF8,
+pfile->line_table->highest_line,
+CPP_BUF_COL (buffer),
+"invalid UTF-8 character <%x> in comment",
+cur[0]);
+  return cur + 1;
+}
+  else if (cur[2] < utf8_continuation || cur[2] >= utf8_signifier)
+{
+  cpp_warning_with_line (pfile, CPP_W_INVALID_UTF8,
+pfile->line_table->highest_line,
+CPP_BUF_COL (buffer),
+"invalid UTF-8 character <%x><%x> in comment",
+cur[0], cur[1]);
+  return cur + 2;
+}
+  else if (cur[3] < utf8_continuation || cur[3] >= utf8_signifier)
+{
+  cpp_warning_with_line (pfile, CPP_W_INVALID_UTF8,
+pfile->line_table->highest_line,
+CPP_BUF_COL (buffer),
+"invalid UTF-8 character 

Re: [PATCH 1/2] vec: Add array_slice constructors from non-const and gc vectors

2022-08-29 Thread Richard Biener via Gcc-patches
On Fri, 26 Aug 2022, Martin Jambor wrote:

> Hi,
> 
> On Fri, Aug 26 2022, Richard Biener wrote:
> >> Am 26.08.2022 um 18:39 schrieb Martin Jambor :
> >>
> >> Hi,
> >>
> >> This patch adds constructors of array_slice that are required to
> >> create them from non-const (heap or auto) vectors or from GC vectors.
> >>
> >> The use of non-const array_slices is somewhat limited, as creating one
> >> from const vec still leads to array_slice,
> >> so I eventually also only resorted to having read-only array_slices.
> >> But I do need the constructor from the gc vector.
> >>
> >> Bootstrapped and tested along code that actually uses it on
> >> x86_64-linux.  OK for trunk?
> >>
> >> Thanks,
> >>
> >> Martin
> >>
> >>
> >> gcc/ChangeLog:
> >>
> >> 2022-08-08  Martin Jambor  
> >>
> >>* vec.h (array_slice): Add constructors for non-const reference to
> >>heap vector and pointers to heap vectors.
> >> ---
> >> gcc/vec.h | 12 
> >> 1 file changed, 12 insertions(+)
> >>
> >> diff --git a/gcc/vec.h b/gcc/vec.h
> >> index eed075addc9..b0477e1044c 100644
> >> --- a/gcc/vec.h
> >> +++ b/gcc/vec.h
> >> @@ -2264,6 +2264,18 @@ public:
> >>   array_slice (const vec )
> >> : m_base (v.address ()), m_size (v.length ()) {}
> >>
> >> +  template
> >> +  array_slice (vec )
> >> +: m_base (v.address ()), m_size (v.length ()) {}
> >> +
> >> +  template
> >> +  array_slice (const vec *v)
> >> +: m_base (v ? v->address () : nullptr), m_size (v ? v->length () : 0) 
> >> {}
> >> +
> >> +  template
> >> +  array_slice (vec *v)
> >> +: m_base (v ? v->address () : nullptr), m_size (v ? v->length () : 0) 
> >> {}
> >> +
> >
> > I don?t quite understand why the generic ctor doesn?t cover the GC case.  
> > It looks more like reference vs pointer?
> >
> 
> If you think that this should work:
> 
>   vec *heh = cfun->local_decls;
>   array_slice arr_slice (*heh);
> 
> then it does not:
> 
>   /home/mjambor/gcc/mine/src/gcc/ipa-cp.cc:6693:36: error: no matching 
> function for call to ?array_slice::array_slice(vec va_gc>&)?
>6693 |   array_slice arr_slice (*heh);
> |^
>   In file included from /home/mjambor/gcc/mine/src/gcc/hash-table.h:248,
>from /home/mjambor/gcc/mine/src/gcc/coretypes.h:486,
>from /home/mjambor/gcc/mine/src/gcc/ipa-cp.cc:105:
>   /home/mjambor/gcc/mine/src/gcc/vec.h:2264:3: note: candidate: 
> ?template array_slice::array_slice(const vec&) [with 
> T = tree_node*]?
>2264 |   array_slice (const vec )
> |   ^~~
>   /home/mjambor/gcc/mine/src/gcc/vec.h:2264:3: note:   template argument 
> deduction/substitution failed:
>   /home/mjambor/gcc/mine/src/gcc/ipa-cp.cc:6693:36: note:   mismatched types 
> ?va_heap? and ?va_gc?
>6693 |   array_slice arr_slice (*heh);
> |^
> 
>   [... I trimmed notes about all other candidates...]
> 
> Or did you mean something else?

Hmm, so what if you change

  template
  array_slice (const vec )
: m_base (v.address ()), m_size (v.length ()) {}

to

  template
  array_slice (const vec )
: m_base (v.address ()), m_size (v.length ()) {}

instead?  Thus allow any allocation / placement template arg?

Richard.

> Thanks,
> 
> Martin
> 

-- 
Richard Biener 
SUSE Software Solutions Germany GmbH, Frankenstrasse 146, 90461 Nuernberg,
Germany; GF: Ivo Totev, Andrew Myers, Andrew McDonald, Boudien Moerman;
HRB 36809 (AG Nuernberg)


Re: [[GCC13][Patch][V3] 1/2] Add a new option -fstrict-flex-array[=n] and new attribute strict_flex_array

2022-08-29 Thread Richard Biener via Gcc-patches
On Fri, 26 Aug 2022, Qing Zhao wrote:

> 
> 
> > On Aug 26, 2022, at 4:48 AM, Richard Biener  wrote:
> > 
> > On Wed, 17 Aug 2022, Qing Zhao wrote:
> > 
> >> Add the following new option -fstrict-flex-array[=n] and a corresponding
> >> attribute strict_flex_array to GCC:
> >> 
> >> '-fstrict-flex-array'
> >> Treat the trailing array of a structure as a flexible array member
> >> in a stricter way.  The positive form is equivalent to
> >> '-fstrict-flex-array=3', which is the strictest.  A trailing array
> >> is treated as a flexible array member only when it is declared as a
> >> flexible array member per C99 standard onwards.  The negative form
> >> is equivalent to '-fstrict-flex-array=0', which is the least
> >> strict.  All trailing arrays of structures are treated as flexible
> >> array members.
> >> 
> >> '-fstrict-flex-array=LEVEL'
> >> Treat the trailing array of a structure as a flexible array member
> >> in a stricter way.  The value of LEVEL controls the level of
> >> strictness.
> >> 
> >> The possible values of LEVEL are the same as for the
> >> 'strict_flex_array' attribute (*note Variable Attributes::).
> >> 
> >> You can control this behavior for a specific trailing array field
> >> of a structure by using the variable attribute 'strict_flex_array'
> >> attribute (*note Variable Attributes::).
> >> 
> >> This option is only valid when flexible array member is supported in 
> >> the
> >> language. FOR ISO C before C99 and ISO C++, no language support for 
> >> the flexible
> >> array member at all, this option will be invalid and a warning will be 
> >> issued.
> >> When -std=gnu89 is specified or C++ with GNU extension, only 
> >> zero-length array
> >> extension and one-size array are supported, as a result, LEVEL=3 will 
> >> be
> >> invalid and a warning will be issued.
> >> 
> >> 'strict_flex_array (LEVEL)'
> >> The 'strict_flex_array' attribute should be attached to the
> >> trailing array field of a structure.  It specifies the level of
> >> strictness of treating the trailing array field of a structure as a
> >> flexible array member.  LEVEL must be an integer betwen 0 to 3.
> >> 
> >> LEVEL=0 is the least strict level, all trailing arrays of
> >> structures are treated as flexible array members.  LEVEL=3 is the
> >> strictest level, only when the trailing array is declared as a
> >> flexible array member per C99 standard onwards ([]), it is treated
> >> as a flexible array member.
> >> 
> >> There are two more levels in between 0 and 3, which are provided to
> >> support older codes that use GCC zero-length array extension ([0])
> >> or one-size array as flexible array member ([1]): When LEVEL is 1,
> >> the trailing array is treated as a flexible array member when it is
> >> declared as either [], [0], or [1]; When LEVEL is 2, the trailing
> >> array is treated as a flexible array member when it is declared as
> >> either [], or [0].
> >> 
> >> This attribute can be used with or without '-fstrict-flex-array'.
> >> When both the attribute and the option present at the same time,
> >> the level of the strictness for the specific trailing array field
> >> is determined by the attribute.
> >> 
> >> This attribute is only valid when flexible array member is supported 
> >> in the
> >> language. For ISO C before C99 and ISO C++, no language support for 
> >> the flexible
> >> array member at all, this attribute will be invalid and a warning is 
> >> issued.
> >> When -std=gnu89 is specified or C++ with GNU extension, only 
> >> zero-length array
> >> extension and one-size array are supported, as a result, LEVEL=3 will 
> >> be
> >> invalid and a warning is issued.
> >> 
> >> gcc/c-family/ChangeLog:
> >> 
> >>* c-attribs.cc (handle_strict_flex_arrays_attribute): New function.
> >>(c_common_attribute_table): New item for strict_flex_array.
> >>* c-opts.cc (c_common_post_options): Handle the combination of
> >>-fstrict-flex-arrays and -std specially.
> >>* c.opt: (fstrict-flex-array): New option.
> >>(fstrict-flex-array=): New option.
> >> 
> >> gcc/c/ChangeLog:
> >> 
> >>* c-decl.cc (flexible_array_member_type_p): New function.
> >>(one_element_array_type_p): Likewise.
> >>(zero_length_array_type_p): Likewise.
> >>(add_flexible_array_elts_to_size): Call new utility
> >>routine flexible_array_member_type_p.
> >>(is_flexible_array_member_p): New function.
> >>(finish_struct): Set the new DECL_NOT_FLEXARRAY flag.
> >> 
> >> gcc/cp/ChangeLog:
> >> 
> >>* module.cc (trees_out::core_bools): Stream out new bit
> >>decl_not_flexarray.
> >>(trees_in::core_bools): Stream in new bit decl_not_flexarray.
> >> 
> >> gcc/ChangeLog:
> >> 
> >>* doc/extend.texi: Document strict_flex_array attribute.
> >>* doc/invoke.texi: Document 

Re: [PATCH] predict: Adjust optimize_function_for_size_p [PR105818]

2022-08-29 Thread Kewen.Lin via Gcc-patches
on 2022/8/15 16:33, Kewen.Lin via Gcc-patches wrote:
> on 2022/7/11 11:42, Kewen.Lin wrote:
>> on 2022/6/15 14:20, Kewen.Lin wrote:
>>> Hi Honza,
>>>
>>> Thanks for the comments!  Some replies are inlined below.
>>>
>>> on 2022/6/14 19:37, Jan Hubicka wrote:
> Hi,
>
> Function optimize_function_for_size_p returns OPTIMIZE_SIZE_NO
> if func->decl is not null but no cgraph node is available for it.
> As PR105818 shows, this could give unexpected result.  For the
> case in PR105818, when parsing bar decl in function foo, the cfun
> is a function structure for foo, for which there is none cgraph
> node, so it returns OPTIMIZE_SIZE_NO.  But it's incorrect since
> the context is to optimize for size, the flag optimize_size is
> true.
>
> The patch is to make optimize_function_for_size_p to check
> optimize_size as what it does when func->decl is unavailable.
>
> One regression failure got exposed on aarch64-linux-gnu:
>
> PASS->FAIL: gcc.dg/guality/pr54693-2.c   -Os \
>   -DPREVENT_OPTIMIZATION  line 21 x == 10 - i
>
> The difference comes from the macro LOGICAL_OP_NON_SHORT_CIRCUIT
> used in function fold_range_test during c parsing, it uses
> optimize_function_for_speed_p which is equal to the invertion
> of optimize_function_for_size_p.  At that time cfun->decl is valid
> but no cgraph node for it, w/o this patch function
> optimize_function_for_speed_p returns true eventually, while it
> returns false with this patch.  Since the command line option -Os
> is specified, there is no reason to interpret it as "for speed".
> I think this failure is expected and adjust the test case
> accordingly.
>
> Is it ok for trunk?
>
> BR,
> Kewen
> -
>
>   PR target/105818
>
> gcc/ChangeLog:
>
>   * predict.cc (optimize_function_for_size_p): Check optimize_size when
>   func->decl is valid but its cgraph node is unavailable.
>
> gcc/testsuite/ChangeLog:
>
>   * gcc.target/powerpc/pr105818.c: New test.
>   * gcc.dg/guality/pr54693-2.c: Adjust for aarch64.
> ---
>  gcc/predict.cc  | 2 +-
>  gcc/testsuite/gcc.dg/guality/pr54693-2.c| 2 +-
>  gcc/testsuite/gcc.target/powerpc/pr105818.c | 9 +
>  3 files changed, 11 insertions(+), 2 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/pr105818.c
>
> diff --git a/gcc/predict.cc b/gcc/predict.cc
> index 5734e4c8516..6c60a973236 100644
> --- a/gcc/predict.cc
> +++ b/gcc/predict.cc
> @@ -268,7 +268,7 @@ optimize_function_for_size_p (struct function *fun)
>cgraph_node *n = cgraph_node::get (fun->decl);
>if (n)
>  return n->optimize_for_size_p ();
> -  return OPTIMIZE_SIZE_NO;
> +  return optimize_size ? OPTIMIZE_SIZE_MAX : OPTIMIZE_SIZE_NO;

 We could also do (opt_for_fn (cfun->decl, optimize_size) that is
 probably better since one can change optimize_size with optimization
 attribute.
>>>
>>> Good point, agree!
>>>
 However I think in most cases we check for optimize_size early I think
 we are doing something wrong, since at that time htere is no profile
 available.  Why exactly PR105818 hits the flag change issue?
>>>
>>> For PR105818, the reason why the flag changs is that:
>>>
>>> Firstly, the inconsistent flag is OPTION_MASK_SAVE_TOC_INDIRECT bit
>>> of rs6000_isa_flags_explicit, it's set as below:
>>>
>>> /* If we can shrink-wrap the TOC register save separately, then use
>>>-msave-toc-indirect unless explicitly disabled.  */
>>> if ((rs6000_isa_flags_explicit & OPTION_MASK_SAVE_TOC_INDIRECT) == 0
>>> && flag_shrink_wrap_separate
>>> && optimize_function_for_speed_p (cfun))
>>>   rs6000_isa_flags |= OPTION_MASK_SAVE_TOC_INDIRECT;
>>>
>>> Initially, rs6000 initialize target_option_default_node with
>>> OPTION_MASK_SAVE_TOC_INDIRECT unset, at that time cfun is NULL
>>> and optimize_size is true.
>>>
>>> Later, when c parser handling function foo, it builds target
>>> option node as target_option_default_node in function
>>> handle_optimize_attribute, it does global option saving and
>>> verifying there as well, at that time the cfun is NULL, no
>>> issue is found.  And function store_parm_decls allocates
>>> struct_function for foo then, cfun becomes function struct
>>> for foo, when c parser continues to handle the decl bar in
>>> foo, function handle_optimize_attribute works as before,
>>> tries to restore the target options at the end, it calls
>>> targetm.target_option.restore (rs6000_function_specific_restore)
>>> which calls function rs6000_option_override_internal again,
>>> at this time the cfun is not NULL while there is no cgraph
>>> node for its decl, optimize_function_for_speed_p returns true
>>> and gets the OPTION_MASK_SAVE_TOC_INDIRECT bit of flag
>>> rs6000_isa_flags set unexpectedly.  It becomes 

PING^5 [PATCH v3] rs6000: Fix the check of bif argument number [PR104482]

2022-08-29 Thread Kewen.Lin via Gcc-patches
Hi,

Gentle ping https://gcc.gnu.org/pipermail/gcc-patches/2022-May/595208.html

I think this is a reasonable fix, the behavior is consistent with what we have 
in
the previous built-in framework, I'm going to push this a week later if no 
objections.  :)

BR,
Kewen
 
> Hi,
>
> As PR104482 shown, it's one regression about the handlings when
> the argument number is more than the one of built-in function
> prototype.  The new bif support only catches the case that the
> argument number is less than the one of function prototype, but
> it misses the case that the argument number is more than the one
> of function prototype.  Because it uses "n != expected_args",
> n is updated in
>
>for (n = 0; !VOID_TYPE_P (TREE_VALUE (fnargs)) && n < nargs;
> fnargs = TREE_CHAIN (fnargs), n++)
>
> , it's restricted to be less than or equal to expected_args with
> the guard !VOID_TYPE_P (TREE_VALUE (fnargs)), so it's wrong.
>
> The fix is to use nargs instead, also move the checking hunk's
> location ahead to avoid useless further scanning when the counts
> mismatch.
>
> Bootstrapped and regtested on powerpc64-linux-gnu P8 and
> powerpc64le-linux-gnu P9 and P10.
>
> v3: Update test case with dg-excess-errors.
>
> v2: Add one test case and refine commit logs.
> https://gcc.gnu.org/pipermail/gcc-patches/2022-April/593155.html
>
> v1: https://gcc.gnu.org/pipermail/gcc-patches/2022-March/591768.html
>
> Is it ok for trunk?
>
> BR,
> Kewen
> -
>   PR target/104482
>
> gcc/ChangeLog:
>
>   * config/rs6000/rs6000-c.cc (altivec_resolve_overloaded_builtin): Fix
>   the equality check for argument number, and move this hunk ahead.
>
> gcc/testsuite/ChangeLog:
>
>   * gcc.target/powerpc/pr104482.c: New test.
> ---
>  gcc/config/rs6000/rs6000-c.cc   | 60 ++---
>  gcc/testsuite/gcc.target/powerpc/pr104482.c | 16 ++
>  2 files changed, 46 insertions(+), 30 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/pr104482.c
>
> diff --git a/gcc/config/rs6000/rs6000-c.cc b/gcc/config/rs6000/rs6000-c.cc
> index 9c8cbd7a66e..61881f29230 100644
> --- a/gcc/config/rs6000/rs6000-c.cc
> +++ b/gcc/config/rs6000/rs6000-c.cc
> @@ -1756,6 +1756,36 @@ altivec_resolve_overloaded_builtin (location_t 
> loc, tree fndecl,
>vec *arglist = static_cast *> 
> (passed_arglist);
>unsigned int nargs = vec_safe_length (arglist);
>
> +  /* If the number of arguments did not match the prototype, return NULL
> + and the generic code will issue the appropriate error message.  Skip
> + this test for functions where we don't fully describe all the 
> possible
> + overload signatures in rs6000-overload.def (because they aren't 
> relevant
> + to the expansion here).  If we don't, we get confusing error 
> messages.  */
> +  /* As an example, for vec_splats we have:
> +
> +; There are no actual builtins for vec_splats.  There is special 
> handling for
> +; this in altivec_resolve_overloaded_builtin in rs6000-c.cc, where the 
> call
> +; is replaced by a constructor.  The single overload here causes
> +; __builtin_vec_splats to be registered with the front end so that can 
> happen.
> +[VEC_SPLATS, vec_splats, __builtin_vec_splats]
> +  vsi __builtin_vec_splats (vsi);
> +ABS_V4SI SPLATS_FAKERY
> +
> +So even though __builtin_vec_splats accepts all vector types, the
> +infrastructure cheats and just records one prototype.  We end up 
> getting
> +an error message that refers to this specific prototype even when we
> +are handling a different argument type.  That is completely confusing
> +to the user, so it's best to let these cases be handled individually
> +in the resolve_vec_splats, etc., helper functions.  */
> +
> +  if (expected_args != nargs
> +  && !(fcode == RS6000_OVLD_VEC_PROMOTE
> +|| fcode == RS6000_OVLD_VEC_SPLATS
> +|| fcode == RS6000_OVLD_VEC_EXTRACT
> +|| fcode == RS6000_OVLD_VEC_INSERT
> +|| fcode == RS6000_OVLD_VEC_STEP))
> +return NULL;
> +
>for (n = 0;
> !VOID_TYPE_P (TREE_VALUE (fnargs)) && n < nargs;
> fnargs = TREE_CHAIN (fnargs), n++)
> @@ -1816,36 +1846,6 @@ altivec_resolve_overloaded_builtin (location_t 
> loc, tree fndecl,
>types[n] = type;
>  }
>
> -  /* If the number of arguments did not match the prototype, return NULL
> - and the generic code will issue the appropriate error message.  Skip
> - this test for functions where we don't fully describe all the 
> possible
> - overload signatures in rs6000-overload.def (because 

PING^5 [PATCH] rs6000: Handle unresolved overloaded builtin [PR105485]

2022-08-29 Thread Kewen.Lin via Gcc-patches
Hi,

Gentle ping https://gcc.gnu.org/pipermail/gcc-patches/2022-May/594699.html

I think this is a reasonable fix, the behavior is consistent with what we have 
in
the previous built-in framework, I'm going to push this a week later if no 
objections.  :)

BR,
Kewen

>>>
 on 2022/5/13 13:29, Kewen.Lin via Gcc-patches wrote:
> Hi,
>
> PR105485 exposes that new builtin function framework doesn't handle
> unresolved overloaded builtin function well.  With new builtin
> function support, we don't have builtin info for any overloaded
> rs6000_gen_builtins enum, since they are expected to be resolved to
> one specific instance.  So when function rs6000_gimple_fold_builtin
> faces one unresolved overloaded builtin, the access for builtin info
> becomes out of bound and gets ICE then.
>
> We should not try to fold one unresolved overloaded builtin there
> and as the previous support we should emit one error message during
> expansion phase like "unresolved overload for builtin ...".
>
> Bootstrapped and regtested on powerpc64-linux-gnu P8 and
> powerpc64le-linux-gnu P9 and P10.
>
> Is it ok for trunk?
>
> BR,
> Kewen
> -
>   PR target/105485
>
> gcc/ChangeLog:
>
>   * config/rs6000/rs6000-builtin.cc (rs6000_gimple_fold_builtin): Add
>   the handling for unresolved overloaded builtin function.
>   (rs6000_expand_builtin): Likewise.
>
> gcc/testsuite/ChangeLog:
>
>   * g++.target/powerpc/pr105485.C: New test.
>
> ---
>  gcc/config/rs6000/rs6000-builtin.cc | 13 +
>  gcc/testsuite/g++.target/powerpc/pr105485.C |  9 +
>  2 files changed, 22 insertions(+)
>  create mode 100644 gcc/testsuite/g++.target/powerpc/pr105485.C
>
> diff --git a/gcc/config/rs6000/rs6000-builtin.cc 
> b/gcc/config/rs6000/rs6000-builtin.cc
> index e925ba9fad9..e102305c90c 100644
> --- a/gcc/config/rs6000/rs6000-builtin.cc
> +++ b/gcc/config/rs6000/rs6000-builtin.cc
> @@ -1294,6 +1294,11 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator 
> *gsi)
>enum tree_code bcode;
>gimple *g;
>
> +  /* For an unresolved overloaded builtin, return early here since there
> + is no builtin info for it and we are unable to fold it.  */
> +  if (fn_code > RS6000_OVLD_NONE)
> +return false;
> +
>size_t uns_fncode = (size_t) fn_code;
>enum insn_code icode = rs6000_builtin_info[uns_fncode].icode;
>const char *fn_name1 = rs6000_builtin_info[uns_fncode].bifname;
> @@ -3295,6 +3300,14 @@ rs6000_expand_builtin (tree exp, rtx target, rtx 
> /* subtarget */,
>tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
>enum rs6000_gen_builtins fcode
>  = (enum rs6000_gen_builtins) DECL_MD_FUNCTION_CODE (fndecl);
> +
> +  /* Emit error message if it's an unresolved overloaded builtin.  */
> +  if (fcode > RS6000_OVLD_NONE)
> +{
> +  error ("unresolved overload for builtin %qF", fndecl);
> +  return const0_rtx;
> +}
> +
>size_t uns_fcode = (size_t)fcode;
>enum insn_code icode = rs6000_builtin_info[uns_fcode].icode;
>
> diff --git a/gcc/testsuite/g++.target/powerpc/pr105485.C 
> b/gcc/testsuite/g++.target/powerpc/pr105485.C
> new file mode 100644
> index 000..a3b8290df8c
> --- /dev/null
> +++ b/gcc/testsuite/g++.target/powerpc/pr105485.C
> @@ -0,0 +1,9 @@
> +/* It's to verify no ICE here, ignore error/warning messages since
> +   they are not test points here.  */
> +/* { dg-excess-errors "pr105485" } */
> +
> +template  void __builtin_vec_vslv();
> +typedef  __attribute__((altivec(vector__))) char T;
> +T b (T c, T d) {
> +return __builtin_vec_vslv(c, d);
> +}



Re: ICE after folding svld1rq to vec_perm_expr duing forwprop

2022-08-29 Thread Prathamesh Kulkarni via Gcc-patches
On Thu, 18 Aug 2022 at 18:20, Prathamesh Kulkarni
 wrote:
>
> On Thu, 18 Aug 2022 at 18:14, Prathamesh Kulkarni
>  wrote:
> >
> > On Wed, 17 Aug 2022 at 17:01, Richard Biener  
> > wrote:
> > >
> > > On Tue, Aug 16, 2022 at 6:30 PM Richard Sandiford
> > >  wrote:
> > > >
> > > > Prathamesh Kulkarni  writes:
> > > > > On Tue, 9 Aug 2022 at 18:42, Richard Biener 
> > > > >  wrote:
> > > > >>
> > > > >> On Tue, Aug 9, 2022 at 12:10 PM Prathamesh Kulkarni
> > > > >>  wrote:
> > > > >> >
> > > > >> > On Mon, 8 Aug 2022 at 14:27, Richard Biener 
> > > > >> >  w>> > >
> > > > >> > >
> > > > >> > >   /* If result vector has greater length than input vector,
> > > > >> > > + then allow permuting two vectors as long as:
> > > > >> > > + a) sel.nelts_per_pattern == 1
> > > > >> > > + b) sel.npatterns == len of input vector.
> > > > >> > > + The intent is to permute input vectors, and
> > > > >> > > + dup the elements in resulting vector to target vector 
> > > > >> > > length.  */
> > > > >> > > +
> > > > >> > > +  if (maybe_gt (TYPE_VECTOR_SUBPARTS (type),
> > > > >> > > +   TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0
> > > > >> > > +{
> > > > >> > > +  nelts = sel.encoding ().npatterns ();
> > > > >> > > +  if (sel.encoding ().nelts_per_pattern () != 1
> > > > >> > > + || (!known_eq (nelts, TYPE_VECTOR_SUBPARTS (TREE_TYPE 
> > > > >> > > (arg0)
> > > > >> > > +   return NULL_TREE;
> > > > >> > > +}
> > > > >> > >
> > > > >> > > so the only case you add is non-VLA to VLA and there
> > > > >> > > explicitely only the case of a period that's same as the
> > > > >> > > element count in the input vectors.
> > > > >> > >
> > > > >> > >
> > > > >> > > @@ -2602,6 +2602,9 @@ dump_generic_node (pretty_printer *pp, tree
> > > > >> > > node, int spc, dump_flags_t flags,
> > > > >> > > pp_space (pp);
> > > > >> > >   }
> > > > >> > >   }
> > > > >> > > +   if (VECTOR_TYPE_P (TREE_TYPE (node))
> > > > >> > > +   && !TYPE_VECTOR_SUBPARTS (TREE_TYPE 
> > > > >> > > (node)).is_constant ())
> > > > >> > > + pp_string (pp, ", ... ");
> > > > >> > > pp_right_brace (pp);
> > > > >> > >
> > > > >> > > btw, I do wonder if VLA CONSTRUCTORs are a "thing"?  Are they?
> > > > >> > Well, it got created for the following case after folding:
> > > > >> > svint32_t f2(int a, int b, int c, int d)
> > > > >> > {
> > > > >> >   int32x4_t v = {a, b, c, d};
> > > > >> >   return svld1rq_s32 (svptrue_b8 (), [0]);
> > > > >> > }
> > > > >> >
> > > > >> > The svld1rq_s32 call gets folded to:
> > > > >> > v = {a, b, c, d}
> > > > >> > lhs = VEC_PERM_EXPR
> > > > >> >
> > > > >> > fold_vec_perm then folds the above VEC_PERM_EXPR to
> > > > >> > VLA constructor, since elements in v (in_elts) are not constant, 
> > > > >> > and
> > > > >> > need_ctor is thus true:
> > > > >> > lhs = {a, b, c, d, ...}
> > > > >> > I added "..." to make it more explicit that it's a VLA constructor.
> > > > >>
> > > > >> But I doubt we do anything reasonable with such a beast?  Do we?
> > > > >> I suppose it's like a vec_duplicate if you view it as V1TImode
> > > > >> but do we actually make sure to do this duplication?
> > > > > I am not sure. As mentioned above, the current code-gen for VLA
> > > > > constructor looks pretty bad.
> > > > > Should we avoid folding VLA constructors for now ?
> > > >
> > > > VLA constructors aren't really a thing.  At least, the only VLA vector
> > > > you could represent with current CONSTRUCTOR nodes is a fixed-length
> > > > sequence at the start of an otherwise zero vector.  I'm not sure
> > > > we even use that though (perhaps we do and I've forgotten).
> > > >
> > > > > I guess these are 2 different issues:
> > > > > (a) Resolving ICE with VEC_PERM_EXPR for above aarch64 tests.
> > > > > (b) Extending fold_vec_perm to handle vectors with differing lengths.
> > > > >
> > > > > For (a), I think the issue with using:
> > > > > res_type = gimple_assign_lhs (stmt)
> > > > > in previous patch, was that op2's type will change to match tgt_units,
> > > > > if we go thru
> > > > > (code == VIEW_CONVERT_EXPR || code2 == VIEW_CONVERT_EXPR) branch,
> > > > > and may thus not be same as len(lhs_type) anymore, and hit the assert
> > > > > in fold_vec_perm.
> > > > >
> > > > > IIUC, for lhs = VEC_PERM_EXPR, we now have the
> > > > > following semantics:
> > > > > (1) Element types for lhs, rhs1 and rhs2 should be the same.
> > > > > (2) len(lhs) == len(mask) and len(rhs1) == len(rhs2).
> > > >
> > > > Yeah.
> > > >
> > > > > The attached patch changes res_type from TREE_TYPE (arg0) to 
> > > > > following:
> > > > > res_type = build_vector_type (TREE_TYPE (TREE_TYPE (arg0)),
> > > > > TYPE_VECTOR_SUBPARTS 
> > > > > (op2))
> > > > > so it has same element type as arg0 (and arg1) and len of op2.
> > > > > Does that look reasonable ?
> > > > >
> > > > > If we need a cast from 

PING^2 [PATCH] rs6000: Suggest unroll factor for loop vectorization

2022-08-29 Thread Kewen.Lin via Gcc-patches
Hi,

Gentle ping: https://gcc.gnu.org/pipermail/gcc-patches/2022-July/598601.html

BR,
Kewen

> 
> on 2022/7/20 17:30, Kewen.Lin via Gcc-patches wrote:
>> Hi,
>>
>> Commit r12-6679-g7ca1582ca60dc8 made vectorizer accept one
>> unroll factor to be applied to vectorization factor when
>> vectorizing the main loop, it would be suggested by target
>> when doing costing.
>>
>> This patch introduces function determine_suggested_unroll_factor
>> for rs6000 port, to make it be able to suggest the unroll factor
>> for a given loop being vectorized.  Referring to aarch64 port
>> and basing on the analysis on SPEC2017 performance evaluation
>> results, it mainly considers these aspects:
>>   1) unroll option and pragma which can disable unrolling for the
>>  given loop;
>>   2) simple hardware resource model with issued non memory access
>>  vector insn per cycle;
>>   3) aggressive heuristics when iteration count is unknown:
>>  - reduction case to break cross iteration dependency;
>>  - emulated gather load;
>>   4) estimated iteration count when iteration count is unknown;
>>
>> With this patch, SPEC2017 performance evaluation results on
>> Power8/9/10 are listed below (speedup pct.):
>>
>>   * Power10
>> - O2: all are neutral (excluding some noises);
>> - Ofast: 510.parest_r +6.67%, the others are neutral
>>  (use ... for the followings);
>> - Ofast + unroll: 510.parest_r +5.91%, ...
>> - Ofast + LTO + PGO: 510.parest_r +3.00%, ...
>> - Ofast + cheap vect cost: 510.parest_r +6.23%, ...
>> - Ofast + very-cheap vect cost: all are neutral;
>>
>>   * Power9
>> - Ofast: 510.parest_r +8.73%, 538.imagick_r +11.18%
>>  (likely noise), 500.perlbench_r +1.84%, ...
>>
>>   * Power8
>> - Ofast: 510.parest_r +5.43%, ...;
>>
>> This patch also introduces one documented parameter
>> rs6000-vect-unroll-limit= similar to what aarch64 proposes,
>> by evaluating on P8/P9/P10, the default value 4 is slightly
>> better than the other choices like 2 and 8.
>>
>> It also parameterizes two other values as undocumented
>> parameters for future tweaking.  One parameter is
>> rs6000-vect-unroll-issue, it's to simply model hardware
>> resource for non memory access vector instructions to avoid
>> excessive unrolling, initially I tried to use the value in
>> the hook rs6000_issue_rate, but the evaluation showed it's
>> bad, so I evaluated different values 2/4/6/8 on P8/P9/P10 at
>> Ofast, the results showed the default value 4 is good enough
>> on these different architectures.  For a record, choice 8
>> could make 510.parest_r's gain become smaller or gone on
>> P8/P9/P10; choice 6 could make 503.bwaves_r degrade by more
>> than 1% on P8/P10; and choice 2 could make 538.imagick_r
>> degrade by 3.8%.  The other parameter is
>> rs6000-vect-unroll-reduc-threshold.  It's mainly inspired by
>> 510.parest_r and tweaked as it, evaluating with different
>> values 0/1/2/3 for the threshold, it showed value 1 is the
>> best choice.  For a record, choice 0 could make 525.x264_r
>> degrade by 2% and 527.cam4_r degrade by 2.95% on P10,
>> 548.exchange2_r degrade by 1.41% and 527.cam4_r degrade by
>> 2.54% on P8; choice 2 and bigger values could make
>> 510.parest_r's gain become smaller.
>>
>> Bootstrapped and regtested on powerpc64-linux-gnu P7 and P8,
>> and powerpc64le-linux-gnu P9.  Bootstrapped on
>> powerpc64le-linux-gnu P10, but one failure was exposed during
>> regression testing there, it's identified as one miss
>> optimization and can be reproduced without this support,
>> PR106365 was opened for further tracking.
>>
>> Is it for trunk?
>>
>> BR,
>> Kewen
>> --
>> gcc/ChangeLog:
>>
>>  * config/rs6000/rs6000.cc (class rs6000_cost_data): Add new members
>>  m_nstores, m_reduc_factor, m_gather_load and member function
>>  determine_suggested_unroll_factor.
>>  (rs6000_cost_data::update_target_cost_per_stmt): Update for m_nstores,
>>  m_reduc_factor and m_gather_load.
>>  (rs6000_cost_data::determine_suggested_unroll_factor): New function.
>>  (rs6000_cost_data::finish_cost): Use determine_suggested_unroll_factor.
>>  * config/rs6000/rs6000.opt (rs6000-vect-unroll-limit): New parameter.
>>  (rs6000-vect-unroll-issue): Likewise.
>>  (rs6000-vect-unroll-reduc-threshold): Likewise.
>>  * doc/invoke.texi (rs6000-vect-unroll-limit): Document new parameter.
>>
>> ---
>>  gcc/config/rs6000/rs6000.cc  | 125 ++-
>>  gcc/config/rs6000/rs6000.opt |  18 +
>>  gcc/doc/invoke.texi  |   7 ++
>>  3 files changed, 147 insertions(+), 3 deletions(-)
>>
>> diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
>> index 3ff16b8ae04..d0f107d70a8 100644
>> --- a/gcc/config/rs6000/rs6000.cc
>> +++ b/gcc/config/rs6000/rs6000.cc
>> @@ -5208,16 +5208,23 @@ protected:
>>  vect_cost_model_location, unsigned int);
>>void density_test (loop_vec_info);
>>void 

PING^2 [PATCH v2] rs6000/test: Fix empty TU in some cases of effective targets [PR106345]

2022-08-29 Thread Kewen.Lin via Gcc-patches
Hi,

Gentle ping this: 
https://gcc.gnu.org/pipermail/gcc-patches/2022-July/598748.html

BR,
Kewen

> on 2022/7/25 14:26, Kewen.Lin via Gcc-patches wrote:
>> Hi,
>>
>> As the failure of test case gcc.target/powerpc/pr92398.p9-.c in
>> PR106345 shows, some test sources for some powerpc effective
>> targets use empty translation unit wrongly.  The test sources
>> could go with options like "-ansi -pedantic-errors", then those
>> effective target checkings will fail unexpectedly with the
>> error messages like:
>>
>>   error: ISO C forbids an empty translation unit [-Wpedantic]
>>
>> This patch is to fix empty TUs with one dummy function definition
>> accordingly.
>>
>> Excepting for the failures on gcc.target/powerpc/pr92398.p9-.c
>> fixed, I can see it helps to bring back some testing coverage like:
>>
>> NA->PASS: gcc.target/powerpc/pr92398.p9+.c
>> NA->PASS: gcc.target/powerpc/pr93453-1.c
>>
>> Tested as before.
>>
>> v1: https://gcc.gnu.org/pipermail/gcc-patches/2022-July/598602.html
>> v2: Use dummy function instead of dummy int as Segher suggested.
>>
>> Segher, does this v2 look good to you?
>>
>> BR,
>> Kewen
>> -
>>  PR testsuite/106345
>>
>> gcc/testsuite/ChangeLog:
>>
>>  * lib/target-supports.exp (check_effective_target_powerpc_sqrt): Add
>>  a function definition to avoid pedwarn about empty translation unit.
>>  (check_effective_target_has_arch_pwr5): Likewise.
>>  (check_effective_target_has_arch_pwr6): Likewise.
>>  (check_effective_target_has_arch_pwr7): Likewise.
>>  (check_effective_target_has_arch_pwr8): Likewise.
>>  (check_effective_target_has_arch_pwr9): Likewise.
>>  (check_effective_target_has_arch_pwr10): Likewise.
>>  (check_effective_target_has_arch_ppc64): Likewise.
>>  (check_effective_target_ppc_float128): Likewise.
>>  (check_effective_target_ppc_float128_insns): Likewise.
>>  (check_effective_target_powerpc_vsx): Likewise.
>> ---
>>  gcc/testsuite/lib/target-supports.exp | 33 +++
>>  1 file changed, 33 insertions(+)
>>
>> diff --git a/gcc/testsuite/lib/target-supports.exp 
>> b/gcc/testsuite/lib/target-supports.exp
>> index 4ed7b25b9a4..06484330178 100644
>> --- a/gcc/testsuite/lib/target-supports.exp
>> +++ b/gcc/testsuite/lib/target-supports.exp
>> @@ -6259,9 +6259,12 @@ proc check_effective_target_powerpc_sqrt { } {
>>  }
>>
>>  return [check_no_compiler_messages powerpc_sqrt object {
>> +void test (void)
>> +{
>>  #ifndef _ARCH_PPCSQ
>>  #error _ARCH_PPCSQ is not defined
>>  #endif
>> +}
>>  } {}]
>>  }
>>
>> @@ -6369,71 +6372,92 @@ proc check_effective_target_powerpc_p9modulo_ok { } {
>>  # as provided by the test.
>>  proc check_effective_target_has_arch_pwr5 { } {
>>  return [check_no_compiler_messages_nocache arch_pwr5 assembly {
>> +void test (void)
>> +{
>>  #ifndef _ARCH_PWR5
>>  #error does not have power5 support.
>>  #else
>>  /* "has power5 support" */
>>  #endif
>> +}
>>  } [current_compiler_flags]]
>>  }
>>
>>  proc check_effective_target_has_arch_pwr6 { } {
>>  return [check_no_compiler_messages_nocache arch_pwr6 assembly {
>> +void test (void)
>> +{
>>  #ifndef _ARCH_PWR6
>>  #error does not have power6 support.
>>  #else
>>  /* "has power6 support" */
>>  #endif
>> +}
>>  } [current_compiler_flags]]
>>  }
>>
>>  proc check_effective_target_has_arch_pwr7 { } {
>>  return [check_no_compiler_messages_nocache arch_pwr7 assembly {
>> +void test (void)
>> +{
>>  #ifndef _ARCH_PWR7
>>  #error does not have power7 support.
>>  #else
>>  /* "has power7 support" */
>>  #endif
>> +}
>>  } [current_compiler_flags]]
>>  }
>>
>>  proc check_effective_target_has_arch_pwr8 { } {
>>  return [check_no_compiler_messages_nocache arch_pwr8 assembly {
>> +void test (void)
>> +{
>>  #ifndef _ARCH_PWR8
>>  #error does not have power8 support.
>>  #else
>>  /* "has power8 support" */
>>  #endif
>> +}
>>  } [current_compiler_flags]]
>>  }
>>
>>  proc check_effective_target_has_arch_pwr9 { } {
>>  return [check_no_compiler_messages_nocache arch_pwr9 assembly {
>> +void test (void)
>> +{
>>  #ifndef _ARCH_PWR9
>>  #error does not have power9 support.
>>  #else
>>  /* "has power9 support" */
>>  #endif
>> +}
>>  } [current_compiler_flags]]
>>  }
>>
>>  proc check_effective_target_has_arch_pwr10 { } {
>>  return [check_no_compiler_messages_nocache arch_pwr10 assembly {
>> +void test (void)
>> +{
>>  #ifndef _ARCH_PWR10
>> 

Re: [PATCH, rs6000] Put dg-options ahead of target selector checks

2022-08-29 Thread Kewen.Lin via Gcc-patches
Hi Haochen,

on 2022/8/26 15:29, HAO CHEN GUI wrote:
> Hi,
>   This patch changes the sequence of test directives for 3 cases. Originally,
> these 3 cases got failed or unsupported on some platforms, as their target
> selector checks depend on compiling options.

Maybe it's good to say more in the commit log to make it more clear, like: those
effective target has_arch_* adopt current_compiler_flags in their checks, 
dg-options
sitting before the dg-require-effective-target will be appended to 
current_compiler_flags,
but won't if it's after.  So adjusting the location of dg-options to make it 
more robust,
or something like that.  :)

> 
>   Bootstrapped and tested on powerpc64-linux BE and LE with no regressions.
> Is this okay for trunk? Any recommendations? Thanks a lot.
> 
> Thanks
> Gui Haochen
> 
> ChangeLog
> 2022-08-26  Haochen Gui  
> 
> rs6000: Change the sequence of test directives for some test cases.  Put
> dg-options ahead of target selector checks as the compiling options affect the
> result of these checks.
> 
> gcc/testsuite/
>   * gcc.target/powerpc/pr92398.p9+.c: Put dg-options ahead of target
>   selector check.
>   * gcc.target/powerpc/pr92398.p9-.c: Likewise.
>   * gcc.target/powerpc/pr93453-1.c: Likewise.
> 
> 
> patch.diff
> diff --git a/gcc/testsuite/gcc.target/powerpc/pr92398.p9+.c 
> b/gcc/testsuite/gcc.target/powerpc/pr92398.p9+.c
> index 72dd1d9a274..4e4fad620e8 100644
> --- a/gcc/testsuite/gcc.target/powerpc/pr92398.p9+.c
> +++ b/gcc/testsuite/gcc.target/powerpc/pr92398.p9+.c
> @@ -1,6 +1,8 @@
> -/* { dg-do compile { target { lp64 && has_arch_pwr9 } } } */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mdejagnu-cpu=power9 -mvsx" } */

Previously this only gets compiled if has_arch_pwr9, now it's tested always.
I assumed that this change with "-mdejagnu-cpu=power9" is intentional, but
it's unrelated to dg-options things, so maybe it's good to have one note for
this in commit log.

The others look good to me.

BR,
Kewen

> +/* { dg-require-effective-target has_arch_ppc64 } */
> +/* { dg-require-effective-target int128 } */
>  /* { dg-require-effective-target powerpc_vsx_ok } */
> -/* { dg-options "-O2 -mvsx" } */
> 
>  /* { dg-final { scan-assembler-times {\mmtvsrdd\M} 1 } } */
>  /* { dg-final { scan-assembler-times {\mxxlnor\M} 1 } } */
> diff --git a/gcc/testsuite/gcc.target/powerpc/pr92398.p9-.c 
> b/gcc/testsuite/gcc.target/powerpc/pr92398.p9-.c
> index bd7fa98af51..4e6a8c8cb8e 100644
> --- a/gcc/testsuite/gcc.target/powerpc/pr92398.p9-.c
> +++ b/gcc/testsuite/gcc.target/powerpc/pr92398.p9-.c
> @@ -1,6 +1,8 @@
> -/* { dg-do compile { target { lp64 && {! has_arch_pwr9} } } } */
> -/* { dg-require-effective-target powerpc_vsx_ok } */
>  /* { dg-options "-O2 -mvsx" } */
> +/* { dg-do compile { target { ! has_arch_pwr9 } } } */
> +/* { dg-require-effective-target int128 } */
> +/* { dg-require-effective-target has_arch_ppc64 } */
> +/* { dg-require-effective-target powerpc_vsx_ok } */
> 
>  /* { dg-final { scan-assembler-times {\mnot\M} 2 { xfail be } } } */
>  /* { dg-final { scan-assembler-times {\mstd\M} 2 { xfail { { {! 
> has_arch_pwr9} && has_arch_pwr8 } && be } } } } */
> diff --git a/gcc/testsuite/gcc.target/powerpc/pr93453-1.c 
> b/gcc/testsuite/gcc.target/powerpc/pr93453-1.c
> index b396458ba12..6f4d899c114 100644
> --- a/gcc/testsuite/gcc.target/powerpc/pr93453-1.c
> +++ b/gcc/testsuite/gcc.target/powerpc/pr93453-1.c
> @@ -1,5 +1,6 @@
> -/* { dg-do compile { target has_arch_ppc64 } } */
> +/* { dg-do compile } */
>  /* { dg-options "-mdejagnu-cpu=power6 -O2" } */
> +/* { dg-require-effective-target has_arch_ppc64 } */
> 
>  unsigned long load_byte_reverse (unsigned long *in)
>  {
> 


Re: Extend fold_vec_perm to fold VEC_PERM_EXPR in VLA manner

2022-08-29 Thread Prathamesh Kulkarni via Gcc-patches
On Wed, 17 Aug 2022 at 18:09, Prathamesh Kulkarni
 wrote:
>
> Hi,
> The attached prototype patch extends fold_vec_perm to fold VEC_PERM_EXPR
> in VLA manner, and currently handles the following cases:
> (a) fixed len arg0, arg1 and fixed len sel.
> (b) fixed len arg0, arg1 and vla sel
> (c) vla arg0, arg1 and vla sel with arg0, arg1 being VECTOR_CST.
>
> It seems to work for the VLA tests written in
> test_vec_perm_vla_folding (), and am working thru the fallout observed in
> regression testing.
>
> Does the approach taken in the patch look in the right direction ?
> I am not sure if I have got the conversion from "sel_index"
> to index of either arg0, or arg1 entirely correct.
> I would be grateful for suggestions on the patch.
ping https://gcc.gnu.org/pipermail/gcc-patches/2022-August/599888.html

Thanks,
Prathamesh
>
> Thanks,
> Prathamesh