Re: _LIBCXX_DEBUG value initialized singular iterators assert failures in std algorithms [PR104316]

2024-03-19 Thread François Dumont

Thanks to you doc:

    libstdc++: [_GLIBCXX_DEBUG] Define __[glibcxx,cpp_lib]_null_iterators

    _GLIBCXX_DEBUG has now fully N3344 compliant iterator checks, we 
can define
    __glibcxx_null_iterators and __cpp_lib_null_iterators macros like 
the normal

    mode.

    libstdc++-v3/ChangeLog:

    * version.def (null_iterators): Remove extra_cond.
    * version.h: Regenerate.

Ok to commit ?

I already noticed that GCC 13 has no version.h file so no backport question.

François

On 19/03/2024 16:41, Jonathan Wakely wrote:

On Tue, 19 Mar 2024 at 09:31, Jonathan Wakely wrote:

On Mon, 18 Mar 2024 at 21:38, François Dumont wrote:

Both committed now.

Just to confirm, those 2 last patches should be backported to gcc-13
branch, right ?

Yes please.


I might have a try to update version.h but if you want to do it before
don't hesitate.

You'll need to have 'autogen' installed to do it (I'm going to update
the docs to describe that today).

I've documented it in a new "Generated files" section at the end of
https://gcc.gnu.org/onlinedocs/libstdc++/manual/appendix_porting.html#build_hacking.generated
diff --git a/libstdc++-v3/include/bits/version.def 
b/libstdc++-v3/include/bits/version.def
index be5af18e818..26e62c6a9b2 100644
--- a/libstdc++-v3/include/bits/version.def
+++ b/libstdc++-v3/include/bits/version.def
@@ -209,7 +209,6 @@ ftms = {
   values = {
 v = 201304;
 cxxmin = 14;
-extra_cond = "!defined(_GLIBCXX_DEBUG)";
   };
 };
 
diff --git a/libstdc++-v3/include/bits/version.h 
b/libstdc++-v3/include/bits/version.h
index 9107b45a484..23c8c09ab4b 100644
--- a/libstdc++-v3/include/bits/version.h
+++ b/libstdc++-v3/include/bits/version.h
@@ -214,7 +214,7 @@
 #undef __glibcxx_want_make_reverse_iterator
 
 #if !defined(__cpp_lib_null_iterators)
-# if (__cplusplus >= 201402L) && (!defined(_GLIBCXX_DEBUG))
+# if (__cplusplus >= 201402L)
 #  define __glibcxx_null_iterators 201304L
 #  if defined(__glibcxx_want_all) || defined(__glibcxx_want_null_iterators)
 #   define __cpp_lib_null_iterators 201304L


[PATCH 3/3] Add -mcpu=power11 tests

2024-03-19 Thread Michael Meissner
This patch adds some simple tests for -mcpu=power11 support.  In order to run
these tests, you need an assembler that supports the appropriate option for
supporting the Power11 processor (-mpower11 under Linux or -mpwr11 under AIX).

I have tested this patch on a little endian power10 system and a big endian
power9 system using the latest binutils which includes support for power11.
There were no regressions, and the 3 power11 tests added ran on both systems.
Can I check this patch into GCC 15 when it opens up for general patches?

2024-03-18  Michael Meissner  

gcc/testsuite/

* gcc.target/powerpc/power11-1.c: New test.
* gcc.target/powerpc/power11-2.c: Likewise.
* gcc.target/powerpc/power11-3.c: Likewise.
* lib/target-supports.exp (check_effective_target_power11_ok): Add new
effective target.
---
 gcc/testsuite/gcc.target/powerpc/power11-1.c | 13 +
 gcc/testsuite/gcc.target/powerpc/power11-2.c | 20 
 gcc/testsuite/gcc.target/powerpc/power11-3.c | 10 ++
 gcc/testsuite/lib/target-supports.exp| 17 +
 4 files changed, 60 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/power11-1.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/power11-2.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/power11-3.c

diff --git a/gcc/testsuite/gcc.target/powerpc/power11-1.c 
b/gcc/testsuite/gcc.target/powerpc/power11-1.c
new file mode 100644
index 000..6a2e802eedf
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/power11-1.c
@@ -0,0 +1,13 @@
+/* { dg-do compile { target powerpc*-*-* } } */
+/* { dg-require-effective-target power11_ok } */
+/* { dg-options "-mdejagnu-cpu=power11 -O2" } */
+
+/* Basic check to see if the compiler supports -mcpu=power11.  */
+
+#ifndef _ARCH_PWR11
+#error "-mcpu=power11 is not supported"
+#endif
+
+void foo (void)
+{
+}
diff --git a/gcc/testsuite/gcc.target/powerpc/power11-2.c 
b/gcc/testsuite/gcc.target/powerpc/power11-2.c
new file mode 100644
index 000..7b9904c1d29
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/power11-2.c
@@ -0,0 +1,20 @@
+/* { dg-do compile { target powerpc*-*-* } } */
+/* { dg-require-effective-target power11_ok } */
+/* { dg-options "-O2" } */
+
+/* Check if we can set the power11 target via a target attribute.  */
+
+__attribute__((__target__("cpu=power9")))
+void foo_p9 (void)
+{
+}
+
+__attribute__((__target__("cpu=power10")))
+void foo_p10 (void)
+{
+}
+
+__attribute__((__target__("cpu=power11")))
+void foo_p11 (void)
+{
+}
diff --git a/gcc/testsuite/gcc.target/powerpc/power11-3.c 
b/gcc/testsuite/gcc.target/powerpc/power11-3.c
new file mode 100644
index 000..9b2d643cc0f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/power11-3.c
@@ -0,0 +1,10 @@
+/* { dg-do compile { target powerpc*-*-* } }  */
+/* { dg-require-effective-target power11_ok } */
+/* { dg-options "-mdejagnu-cpu=power8 -O2" }  */
+
+/* Check if we can set the power11 target via a target_clones attribute.  */
+
+__attribute__((__target_clones__("cpu=power11,cpu=power9,default")))
+void foo (void)
+{
+}
diff --git a/gcc/testsuite/lib/target-supports.exp 
b/gcc/testsuite/lib/target-supports.exp
index 467b539b20d..be80494be80 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -7104,6 +7104,23 @@ proc check_effective_target_power10_ok { } {
 }
 }
 
+# Return 1 if this is a PowerPC target supporting -mcpu=power11.
+
+proc check_effective_target_power11_ok { } {
+if { ([istarget powerpc*-*-*]) } {
+   return [check_no_compiler_messages power11_ok object {
+   int main (void) {
+   #ifndef _ARCH_PWR11
+   #error "-mcpu=power11 is not supported"
+   #endif
+   return 0;
+   }
+   } "-mcpu=power11"]
+} else {
+   return 0
+}
+}
+
 # Return 1 if this is a PowerPC target supporting -mfloat128 via either
 # software emulation on power7/power8 systems or hardware support on power9.
 
-- 
2.44.0


-- 
Michael Meissner, IBM
PO Box 98, Ayer, Massachusetts, USA, 01432
email: meiss...@linux.ibm.com


[PATCH 1/3] Add basic support for -mcpu=power11

2024-03-19 Thread Michael Meissner
This patch adds the power11 option to the -mcpu= and -mtune= switches.

This patch treats the power11 like a power10 in terms of costs and reassociation
width.

This patch issues a ".machine power11" to the assembly file if you use
-mcpu=power11.

This patch defines _ARCH_PWR11 if the user uses -mcpu=power11.

This patch allows GCC to be configured with the --with-cpu=power11 and
--with-tune=power11 options.

This patch passes -mpwr11 to the assembler if the user uses -mcpu=power11.

This patch adds support for using "power11" in the __builtin_cpu_is built-in
function.

I have tested this patch with a bootstrap build on a little endian power10
system and a bootstrap build on a big endian power9 system.  There were no
regressions.  Can I apply this patch when GCC 15 opens up for general patches?

2024-03-18  Michael Meissner  

gcc/

* config.gcc (rs6000*-*-*, powerpc*-*-*): Add support for power11.
* config/rs6000/aix71.h (ASM_CPU_SPEC): Add support for -mcpu=power11.
* config/rs6000/aix72.h (ASM_CPU_SPEC): Likewise.
* config/rs6000/aix73.h (ASM_CPU_SPEC): Likewise.
* config/rs6000/driver-rs6000.cc (asm_names): Likewise.
* config/rs6000/ppc-auxv.h (PPC_PLATFORM_POWER11): New define.
* config/rs6000/rs6000-builtin.cc (cpu_is_info): Add power11.
* config/rs6000/rs6000-c.cc (rs6000_target_modify_macros): Define
_ARCH_PWR11 if -mcpu=power11.
* config/rs6000/rs6000-cpus.def (ISA_POWER11_MASKS_SERVER): New define.
(POWERPC_MASKS): Add power11 isa bit.
(power11 cpu): Add power11 definition.
* config/rs6000/rs6000-opts.h (PROCESSOR_POWER11): Add power11 
processor.
* config/rs6000/rs6000-string.cc (expand_compare_loop): Likewise.
* config/rs6000/rs6000-tables.opt: Regenerate.
* config/rs6000/rs6000.cc (rs6000_option_override_internal): Add power11
support.
(rs6000_machine_from_flags): Likewise.
(rs6000_reassociation_width): Likewise.
(rs6000_adjust_cost): Likewise.
(rs6000_issue_rate): Likewise.
(rs6000_sched_reorder): Likewise.
(rs6000_sched_reorder2): Likewise.
(rs6000_register_move_cost): Likewise.
(rs6000_opt_masks): Likewise.
* config/rs6000/rs6000.h (ASM_CPU_SPEC): Likewise.
* config/rs6000/rs6000.md (cpu attribute): Add power11.
* config/rs6000/rs6000.opt (-mpower11): Add internal power11 ISA flag.
* doc/invoke.texi (RS/6000 and PowerPC Options): Document -mcpu=power11.
---
 gcc/config.gcc  |  6 --
 gcc/config/rs6000/aix71.h   |  1 +
 gcc/config/rs6000/aix72.h   |  1 +
 gcc/config/rs6000/aix73.h   |  1 +
 gcc/config/rs6000/driver-rs6000.cc  |  2 ++
 gcc/config/rs6000/ppc-auxv.h|  3 +--
 gcc/config/rs6000/rs6000-builtin.cc |  1 +
 gcc/config/rs6000/rs6000-c.cc   |  2 ++
 gcc/config/rs6000/rs6000-cpus.def   |  5 +
 gcc/config/rs6000/rs6000-opts.h |  3 ++-
 gcc/config/rs6000/rs6000-string.cc  |  1 +
 gcc/config/rs6000/rs6000-tables.opt |  3 +++
 gcc/config/rs6000/rs6000.cc | 32 +
 gcc/config/rs6000/rs6000.h  |  1 +
 gcc/config/rs6000/rs6000.md |  2 +-
 gcc/config/rs6000/rs6000.opt|  3 +++
 gcc/doc/invoke.texi |  5 +++--
 17 files changed, 56 insertions(+), 16 deletions(-)

diff --git a/gcc/config.gcc b/gcc/config.gcc
index 040afabd9ec..f8036b6476e 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -531,7 +531,9 @@ powerpc*-*-*)
extra_headers="${extra_headers} ppu_intrinsics.h spu2vmx.h vec_types.h 
si2vmx.h"
extra_headers="${extra_headers} amo.h"
case x$with_cpu in
-   
xpowerpc64|xdefault64|x6[23]0|x970|xG5|xpower[3456789]|xpower10|xpower6x|xrs64a|xcell|xa2|xe500mc64|xe5500|xe6500)
+   xpowerpc64 | xdefault64 | x6[23]0 | x970 | xG5 | xpower[3456789] \
+   | xpower1[01] | xpower6x | xrs64a | xcell | xa2 | xe500mc64 \
+   | xe5500 | xe6500)
cpu_is_64bit=yes
;;
esac
@@ -5566,7 +5568,7 @@ case "${target}" in
eval "with_$which=405"
;;
"" | common | native \
-   | power[3456789] | power10 | power5+ | power6x \
+   | power[3456789] | power1[01] | power5+ | power6x \
| powerpc | powerpc64 | powerpc64le \
| rs64 \
| 401 | 403 | 405 | 405fp | 440 | 440fp | 464 | 464fp \
diff --git a/gcc/config/rs6000/aix71.h b/gcc/config/rs6000/aix71.h
index 24bc301e37d..41037b3852d 100644
--- a/gcc/config/rs6000/aix71.h
+++ b/gcc/config/rs6000/aix71.h
@@ -79,6 +79,7 @@ do {  
\
 #undef ASM_CPU_SPEC
 #define ASM_CPU_SPEC \
 "%{mcpu=native: %(asm_cpu_native); \
+  mcpu=power11: -mpwr11; \
   mcpu=power10: 

[PATCH 2/3] Add tuning support for -mcpu=power11

2024-03-19 Thread Michael Meissner
This patch makes -mtune=power11 use the same tuning decisions as
-mtune=power10.

I have tested this patch on a little endian power10 system and a big endian
power9 system.  There were no regressions.  Can I check this into GCC 15 when
it is open for general patches?

2024-03-18  Michael Meissner  

gcc/

* config/rs6000/power10.md (all reservations): Add power11 as an
alternative to power10.
---
 gcc/config/rs6000/power10.md | 144 +--
 1 file changed, 72 insertions(+), 72 deletions(-)

diff --git a/gcc/config/rs6000/power10.md b/gcc/config/rs6000/power10.md
index fcc2199ab29..90312643858 100644
--- a/gcc/config/rs6000/power10.md
+++ b/gcc/config/rs6000/power10.md
@@ -1,4 +1,4 @@
-;; Scheduling description for the IBM POWER10 processor.
+;; Scheduling description for the IBM POWER10 and POWER11 processors.
 ;; Copyright (C) 2020-2024 Free Software Foundation, Inc.
 ;;
 ;; Contributed by Pat Haugen (pthau...@us.ibm.com).
@@ -97,12 +97,12 @@ (define_insn_reservation "power10-load" 4
(eq_attr "update" "no")
(eq_attr "size" "!128")
(eq_attr "prefixed" "no")
-   (eq_attr "cpu" "power10"))
+   (eq_attr "cpu" "power10,power11"))
   "DU_any_power10,LU_power10")
 
 (define_insn_reservation "power10-fused-load" 4
   (and (eq_attr "type" "fused_load_cmpi,fused_addis_load,fused_load_load")
-   (eq_attr "cpu" "power10"))
+   (eq_attr "cpu" "power10,power11"))
   "DU_even_power10,LU_power10")
 
 (define_insn_reservation "power10-prefixed-load" 4
@@ -110,13 +110,13 @@ (define_insn_reservation "power10-prefixed-load" 4
(eq_attr "update" "no")
(eq_attr "size" "!128")
(eq_attr "prefixed" "yes")
-   (eq_attr "cpu" "power10"))
+   (eq_attr "cpu" "power10,power11"))
   "DU_even_power10,LU_power10")
 
 (define_insn_reservation "power10-load-update" 4
   (and (eq_attr "type" "load")
(eq_attr "update" "yes")
-   (eq_attr "cpu" "power10"))
+   (eq_attr "cpu" "power10,power11"))
   "DU_even_power10,LU_power10+SXU_power10")
 
 (define_insn_reservation "power10-fpload-double" 4
@@ -124,7 +124,7 @@ (define_insn_reservation "power10-fpload-double" 4
(eq_attr "update" "no")
(eq_attr "size" "64")
(eq_attr "prefixed" "no")
-   (eq_attr "cpu" "power10"))
+   (eq_attr "cpu" "power10,power11"))
   "DU_any_power10,LU_power10")
 
 (define_insn_reservation "power10-prefixed-fpload-double" 4
@@ -132,14 +132,14 @@ (define_insn_reservation "power10-prefixed-fpload-double" 
4
(eq_attr "update" "no")
(eq_attr "size" "64")
(eq_attr "prefixed" "yes")
-   (eq_attr "cpu" "power10"))
+   (eq_attr "cpu" "power10,power11"))
   "DU_even_power10,LU_power10")
 
 (define_insn_reservation "power10-fpload-update-double" 4
   (and (eq_attr "type" "fpload")
(eq_attr "update" "yes")
(eq_attr "size" "64")
-   (eq_attr "cpu" "power10"))
+   (eq_attr "cpu" "power10,power11"))
   "DU_even_power10,LU_power10+SXU_power10")
 
 ; SFmode loads are cracked and have additional 3 cycles over DFmode
@@ -148,27 +148,27 @@ (define_insn_reservation "power10-fpload-single" 7
   (and (eq_attr "type" "fpload")
(eq_attr "update" "no")
(eq_attr "size" "32")
-   (eq_attr "cpu" "power10"))
+   (eq_attr "cpu" "power10,power11"))
   "DU_even_power10,LU_power10")
 
 (define_insn_reservation "power10-fpload-update-single" 7
   (and (eq_attr "type" "fpload")
(eq_attr "update" "yes")
(eq_attr "size" "32")
-   (eq_attr "cpu" "power10"))
+   (eq_attr "cpu" "power10,power11"))
   "DU_even_power10,LU_power10+SXU_power10")
 
 (define_insn_reservation "power10-vecload" 4
   (and (eq_attr "type" "vecload")
(eq_attr "size" "!256")
-   (eq_attr "cpu" "power10"))
+   (eq_attr "cpu" "power10,power11"))
   "DU_any_power10,LU_power10")
 
 ; lxvp
 (define_insn_reservation "power10-vecload-pair" 4
   (and (eq_attr "type" "vecload")
(eq_attr "size" "256")
-   (eq_attr "cpu" "power10"))
+   (eq_attr "cpu" "power10,power11"))
   "DU_even_power10,LU_power10+SXU_power10")
 
 ; Store Unit
@@ -178,12 +178,12 @@ (define_insn_reservation "power10-store" 0
(eq_attr "prefixed" "no")
(eq_attr "size" "!128")
(eq_attr "size" "!256")
-   (eq_attr "cpu" "power10"))
+   (eq_attr "cpu" "power10,power11"))
   "DU_any_power10,STU_power10")
 
 (define_insn_reservation "power10-fused-store" 0
   (and (eq_attr "type" "fused_store_store")
-   (eq_attr "cpu" "power10"))
+   (eq_attr "cpu" "power10,power11"))
   "DU_even_power10,STU_power10")
 
 (define_insn_reservation "power10-prefixed-store" 0
@@ -191,52 +191,52 @@ (define_insn_reservation "power10-prefixed-store" 0
(eq_attr "prefixed" "yes")
(eq_attr "size" "!128")
(eq_attr "size" "!256")
-   (eq_attr "cpu" "power10"))
+   (eq_attr "cpu" "power10,power11"))
   "DU_even_power10,STU_power10")
 
 ; Update forms 

[PATCH 0/3] Add support for -mcpu=power11

2024-03-19 Thread Michael Meissner
These three patches add support for -mcpu=power11 to the PowerPC GCC compiler.

There are 3 patches in the set.  I would like to check these patches into GCC
15 ASAP, and back port the patches into GCC 14 after GCC 14.1 ships.  I hope to
also back port these patches to other active branches after the code goes into
GCC 15 and then GCC 14.

Patch #1: This patch adds the basic support for power11.

*   This patch adds the -mcpu=power11.
*   This patch adds a power11 processor type.
*   This patch adds a bit to the isa_flags for power11 support.
*   This patch defines _ARCH_PWR11 if -mcpu=power11 is used.
*   This patch uses .machine power11 if -mcpu=power11 is used.
*   This patch passes -mpower11 or -mpwr11 to the assembler.
*   This patch uses the power10 defaults for power11.
*   This patch adds AUXV support for power11.

Patch #2: This patch adds tuning support for power11, treating power11 like
power10 at the current time.

Patch #3: This patch adds tests that are run if the assembler supports either
-mpower11 (under Linux) or -mpwr11 (under AIX).

These patches have been tested with bootstrap builds on a little endian power10
and a big endian power9 system.  When the GCC 15 tree opens up for general
patches, can I apply this patch?

-- 
Michael Meissner, IBM
PO Box 98, Ayer, Massachusetts, USA, 01432
email: meiss...@linux.ibm.com


Re: [pushed][PATCH v2 0/3] LoongArch: Cleanup unused/redundant codes.

2024-03-19 Thread chenglulu

Pushed to r14-9562...r14-9564.

在 2024/3/15 上午9:30, Chenghui Pan 写道:

Changes from v1: Some correction about ChangeLog format.

There's some unused/redundant definitions inside LoongArch target support
codes, these patches make a simple cleanup. Regression test passed.

Chenghui Pan (3):
   LoongArch: Remove unused/useless definitions.
   LoongArch: Change loongarch_expand_vec_cmp()'s return type from bool
 to void.
   LoongArch: Combine UNITS_PER_FP_REG and UNITS_PER_FPREG macros.

  gcc/config/loongarch/lasx.md|  6 ++--
  gcc/config/loongarch/loongarch-protos.h |  7 +
  gcc/config/loongarch/loongarch.cc   | 39 -
  gcc/config/loongarch/loongarch.h|  7 ++---
  gcc/config/loongarch/lsx.md |  6 ++--
  5 files changed, 13 insertions(+), 52 deletions(-)





[PATCH V2] Document -fexcess-precision=16.

2024-03-19 Thread liuhongt
gcc/ChangeLog:

* doc/invoke.texi: Document -fexcess-precision=16.
---
 gcc/doc/invoke.texi | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 85c938d4a14..6bc1ebf9721 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -14930,6 +14930,9 @@ assignments).  This option is enabled by default for C 
or C++ if a strict
 conformance option such as @option{-std=c99} or @option{-std=c++17} is used.
 @option{-ffast-math} enables @option{-fexcess-precision=fast} by default
 regardless of whether a strict conformance option is used.
+If @option{-fexcess-precision=16} is specified, constants and the
+results of expressions with types @code{_Float16} and @code{__bf16}
+are computed without excess precision.
 
 @opindex mfpmath
 @option{-fexcess-precision=standard} is not implemented for languages
-- 
2.31.1



[pushed] testsuite: fix target for linkage-1.C

2024-03-19 Thread Marek Polacek
Tested x86_64-pc-linux-gnu, applying to trunk.

-- >8 --
This test fails in C++11 due to:

linkage-1.C:3:8: error: 'f' function uses 'auto' type specifier without 
trailing return type
3 | inline auto f() {
  |^~~~
linkage-1.C:3:8: note: deduced return type only available with '-std=c++14' or 
'-std=gnu++14'

Compile it in C++14 thus.

gcc/testsuite/ChangeLog:

* g++.dg/cpp2a/linkage-1.C: Use target c++14.
---
 gcc/testsuite/g++.dg/cpp2a/linkage-1.C | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/testsuite/g++.dg/cpp2a/linkage-1.C 
b/gcc/testsuite/g++.dg/cpp2a/linkage-1.C
index 888ed6fa5b5..2b83ffe55b7 100644
--- a/gcc/testsuite/g++.dg/cpp2a/linkage-1.C
+++ b/gcc/testsuite/g++.dg/cpp2a/linkage-1.C
@@ -1,4 +1,4 @@
-// { dg-do compile { target c++11 } }
+// { dg-do compile { target c++14 } }
 
 inline auto f() {
   struct A {};

base-commit: 9c91f8a88b2db50c8faf70786d3cef27b39ac9fc
-- 
2.44.0



Re: [gcc-15 2/3] RISC-V: avoid LUI based const mat: keep stack offsets aligned

2024-03-19 Thread Palmer Dabbelt

On Tue, 19 Mar 2024 13:05:54 PDT (-0700), Vineet Gupta wrote:



On 3/19/24 06:10, Jeff Law wrote:

On 3/19/24 12:48 AM, Andrew Waterman wrote:

On Mon, Mar 18, 2024 at 5:28 PM Vineet Gupta  wrote:

On 3/16/24 13:21, Jeff Law wrote:

|   59944:add s0,sp,2047  <
|   59948:mv  a2,a0
|   5994c:mv  a3,a1
|   59950:mv  a0,sp
|   59954:li  a4,1
|   59958:lui a1,0x1
|   5995c:add s0,s0,1 <---
|   59960:jal 59a3c

SP here becomes unaligned, even if transitively which is undesirable as
well as incorrect:
   - ABI requires stack to be 8 byte aligned


It's 16-byte aligned in the default ABI, for Q, but that doesn't really 
matter.



   - asm code looks weird and unexpected
   - to the user it might falsely seem like a compiler bug even when not,
 specially when staring at asm for debugging unrelated issue.
It's not ideal, but I think it's still ABI compliant as-is.  If it
wasn't, then I suspect things like virtual origins in Ada couldn't be
made ABI compliant.

To be clear are u suggesting ADD sp, sp, 2047 is ABI compliant ?
I'd still like to avoid it as I'm sure someone will complain about it.


With the patch, we get following correct code instead:

| ..
| 59944: add s0,sp,2032
| ..
| 5995c: add s0,s0,16

Alternately you could tighten the positive side of the range of the
splitter from patch 1/3 so that you could always use 2032 rather than
2047 on the first addi.   ie instead of allowing 2048..4094, allow
2048..4064.

2033..4064 vs. 2048..4094

Yeah I was a bit split about this as well. Since you are OK with either,
I'll keep them as-is and perhaps add this observation to commitlog.

There's a subset of embedded use cases where an interrupt service
routine continues on the same stack as the interrupted thread,
requiring sp to always have an ABI-compliant value (i.e. 16B aligned,
and with no important data on the stack at an address below sp).

Although not all use cases care about this property, it seems more
straightforward to maintain the invariant everywhere, rather than
selectively enforce it.

Just to be clear, the changes don't misalign the stack pointer at all.
They merely have the potential to create *another* pointer into the
stack which may or may not be aligned.  Which is totally normal, it's no
different than taking the address of a char on the stack.


IIRC the "always"-ness of the stack pointer alignment has come up 
before, and we decided to keep it aligned for these embedded 
interrupt-related reasons that Andrew points out.  That's a bit 
different than most other ABI requirements, where we're just enforcing 
them on function boundaries.


That said, this one sounds like just a terminology issue: it's not SP 
that's misaligned, but intemediate SP-based addressing calculations.  
I'm not sure if there's a word for these SP-based intermediate values, 
during last week's team meeting we came up with "stack anchors".



Right I never saw any sp,sp,2047 getting generated - not even in the
first version of patch which lacked any filtering of stack regs via
riscv_reg_frame_related () and obviously didn't have the stack variant
of splitter. I don't know if that is just being lucky and not enough
testing exposure (I only spot checked buildroot libc, vmlinux) or
something somewhere enforces that.


I guess we could run the tests with something like

   diff --git a/target/riscv/translate.c b/target/riscv/translate.c
   index ab18899122..e87cc83067 100644
   --- a/target/riscv/translate.c
   +++ b/target/riscv/translate.c
   @@ -320,12 +320,18 @@ static void gen_goto_tb(DisasContext *ctx, int n, 
target_long diff)
 */
static TCGv get_gpr(DisasContext *ctx, int reg_num, DisasExtend ext)
{
   -TCGv t;
   +TCGv t, gpr;
   
if (reg_num == 0) {

return ctx->zero;
}
   
   +if (reg_num == 2) {

   +gpr = tcg_gen_temp_new();
   +tcg_gen_andi_tl(gpr, cpu_gpr[reg_num], 0xF);
   +} else
   +gpr = cpu_gpr[reg_num];
   +
switch (get_ol(ctx)) {
case MXL_RV32:
switch (ext) {
   @@ -333,11 +339,11 @@ static TCGv get_gpr(DisasContext *ctx, int reg_num, 
DisasExtend ext)
break;
case EXT_SIGN:
t = tcg_temp_new();
   -tcg_gen_ext32s_tl(t, cpu_gpr[reg_num]);
   +tcg_gen_ext32s_tl(t, gpr);
return t;
case EXT_ZERO:
t = tcg_temp_new();
   -tcg_gen_ext32u_tl(t, cpu_gpr[reg_num]);
   +tcg_gen_ext32u_tl(t, gpr);
return t;
default:
g_assert_not_reached();
   @@ -349,7 +355,7 @@ static TCGv get_gpr(DisasContext *ctx, int reg_num, 
DisasExtend ext)
default:
g_assert_not_reached();
}
   -return cpu_gpr[reg_num];
   +return gpr;
}
   
static TCGv get_gprh(DisasContext *ctx, int reg_num)


and see if anything blows up?  

[pushed][PR99829][LRA]: Fixing LRA ICE on arm

2024-03-19 Thread Vladimir Makarov

The following patch fixes

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=99829

The patch was successfully bootstrapped and tested on x86-64, ppc64le, 
and aarch64.


commit 9c91f8a88b2db50c8faf70786d3cef27b39ac9fc
Author: Vladimir N. Makarov 
Date:   Tue Mar 19 16:57:11 2024 -0400

[PR99829][LRA]: Fixing LRA ICE on arm

  LRA removed insn setting equivalence to memory whose output was
reloaded. This resulted in writing an uninitiated value to the memory
which triggered assert in LRA code checking the final generated code.
This patch fixes the problem.  Comment in the patch contains more
details about the problem and its solution.

gcc/ChangeLog:

PR target/99829
* lra-constraints.cc (lra_constraints): Prevent removing insn
with reverse equivalence to memory if the memory was reloaded.

diff --git a/gcc/lra-constraints.cc b/gcc/lra-constraints.cc
index 0ae81c1ff9c..10e3d4e4097 100644
--- a/gcc/lra-constraints.cc
+++ b/gcc/lra-constraints.cc
@@ -5213,7 +5213,7 @@ lra_constraints (bool first_p)
   bool changed_p;
   int i, hard_regno, new_insns_num;
   unsigned int min_len, new_min_len, uid;
-  rtx set, x, reg, dest_reg;
+  rtx set, x, reg, nosubreg_dest;
   rtx_insn *original_insn;
   basic_block last_bb;
   bitmap_iterator bi;
@@ -5377,14 +5377,14 @@ lra_constraints (bool first_p)
 	{
 	  if ((set = single_set (curr_insn)) != NULL_RTX)
 	{
-	  dest_reg = SET_DEST (set);
+	  nosubreg_dest = SET_DEST (set);
 	  /* The equivalence pseudo could be set up as SUBREG in a
 		 case when it is a call restore insn in a mode
 		 different from the pseudo mode.  */
-	  if (GET_CODE (dest_reg) == SUBREG)
-		dest_reg = SUBREG_REG (dest_reg);
-	  if ((REG_P (dest_reg)
-		   && (x = get_equiv (dest_reg)) != dest_reg
+	  if (GET_CODE (nosubreg_dest) == SUBREG)
+		nosubreg_dest = SUBREG_REG (nosubreg_dest);
+	  if ((REG_P (nosubreg_dest)
+		   && (x = get_equiv (nosubreg_dest)) != nosubreg_dest
 		   /* Remove insns which set up a pseudo whose value
 		  cannot be changed.  Such insns might be not in
 		  init_insns because we don't update equiv data
@@ -5403,11 +5403,21 @@ lra_constraints (bool first_p)
 			  up the equivalence.  */
 		   || in_list_p (curr_insn,
  ira_reg_equiv
- [REGNO (dest_reg)].init_insns)))
+ [REGNO (nosubreg_dest)].init_insns)))
 		  || (((x = get_equiv (SET_SRC (set))) != SET_SRC (set))
 		  && in_list_p (curr_insn,
 ira_reg_equiv
-[REGNO (SET_SRC (set))].init_insns)))
+[REGNO (SET_SRC (set))].init_insns)
+		  /* This is a reverse equivalence to memory (see ira.cc)
+			 in store insn.  We can reload all the destination and
+			 have an output reload which is a store to memory.  If
+			 we just remove the insn, we will have the output
+			 reload storing an undefined value to the memory.
+			 Check that we did not reload the memory to prevent a
+			 wrong code generation.  We could implement using the
+			 equivalence still in such case but doing this is not
+			 worth the efforts as such case is very rare.  */
+		  && MEM_P (nosubreg_dest)))
 		{
 		  /* This is equiv init insn of pseudo which did not get a
 		 hard register -- remove the insn.	*/


Re: [gcc-15 2/3] RISC-V: avoid LUI based const mat: keep stack offsets aligned

2024-03-19 Thread Andrew Waterman
On Tue, Mar 19, 2024 at 1:05 PM Vineet Gupta  wrote:
>
>
>
> On 3/19/24 06:10, Jeff Law wrote:
> > On 3/19/24 12:48 AM, Andrew Waterman wrote:
> >> On Mon, Mar 18, 2024 at 5:28 PM Vineet Gupta  wrote:
> >>> On 3/16/24 13:21, Jeff Law wrote:
>  |   59944:add s0,sp,2047  <
>  |   59948:mv  a2,a0
>  |   5994c:mv  a3,a1
>  |   59950:mv  a0,sp
>  |   59954:li  a4,1
>  |   59958:lui a1,0x1
>  |   5995c:add s0,s0,1 <---
>  |   59960:jal 59a3c
> 
>  SP here becomes unaligned, even if transitively which is undesirable as
>  well as incorrect:
> - ABI requires stack to be 8 byte aligned
> - asm code looks weird and unexpected
> - to the user it might falsely seem like a compiler bug even when not,
>   specially when staring at asm for debugging unrelated issue.
>  It's not ideal, but I think it's still ABI compliant as-is.  If it
>  wasn't, then I suspect things like virtual origins in Ada couldn't be
>  made ABI compliant.
> >>> To be clear are u suggesting ADD sp, sp, 2047 is ABI compliant ?
> >>> I'd still like to avoid it as I'm sure someone will complain about it.
> >>>
> > With the patch, we get following correct code instead:
> >
> > | ..
> > | 59944: add s0,sp,2032
> > | ..
> > | 5995c: add s0,s0,16
>  Alternately you could tighten the positive side of the range of the
>  splitter from patch 1/3 so that you could always use 2032 rather than
>  2047 on the first addi.   ie instead of allowing 2048..4094, allow
>  2048..4064.
> >>> 2033..4064 vs. 2048..4094
> >>>
> >>> Yeah I was a bit split about this as well. Since you are OK with either,
> >>> I'll keep them as-is and perhaps add this observation to commitlog.
> >> There's a subset of embedded use cases where an interrupt service
> >> routine continues on the same stack as the interrupted thread,
> >> requiring sp to always have an ABI-compliant value (i.e. 16B aligned,
> >> and with no important data on the stack at an address below sp).
> >>
> >> Although not all use cases care about this property, it seems more
> >> straightforward to maintain the invariant everywhere, rather than
> >> selectively enforce it.
> > Just to be clear, the changes don't misalign the stack pointer at all.
> > They merely have the potential to create *another* pointer into the
> > stack which may or may not be aligned.  Which is totally normal, it's no
> > different than taking the address of a char on the stack.
>
> Right I never saw any sp,sp,2047 getting generated - not even in the
> first version of patch which lacked any filtering of stack regs via
> riscv_reg_frame_related () and obviously didn't have the stack variant
> of splitter. I don't know if that is just being lucky and not enough
> testing exposure (I only spot checked buildroot libc, vmlinux) or
> something somewhere enforces that.
>
> However given that misaligned pointer off of stack is a non-issue, I
> think we can do the following:
>
> 1. keep just one splitter with 2047 based predicates and constraint (and
> not 2032) for both stack-related and general regs.
> 2. gate the splitter on only operands[0] being not stack related
> (currently it checks for either [0] or [1]) - this allows the prominent
> case where SP is simply a src, and avoids when any potential shenanigans
> to SP itself.

Agreed.  I misread the original code (add s0,sp,2047 looks a lot like
add sp,sp,2047 from a quick glance on a cell phone).

>
> -Vineet


Re: RISC-V: Use convert instructions instead of calling library functions

2024-03-19 Thread Palmer Dabbelt

On Tue, 19 Mar 2024 12:58:41 PDT (-0700), Andrew Waterman wrote:

On Tue, Mar 19, 2024 at 9:23 AM Palmer Dabbelt  wrote:


On Mon, 18 Mar 2024 20:50:14 PDT (-0700), jeffreya...@gmail.com wrote:
>
>
> On 3/18/24 3:09 AM, Jivan Hakobyan wrote:
>> As RV has round instructions it is reasonable to use them instead of
>> calling the library functions.
>>
>> With my patch for the following C code:
>> double foo(double a) {
>>  return ceil(a);
>> }
>>
>> GCC generates the following ASM code (before it was tail call)
>> foo:
>>  fabs.d  fa4,fa0
>>  lui a5,%hi(.LC0)
>>  fld fa3,%lo(.LC0)(a5)
>>  flt.d   a5,fa4,fa3
>>  beq a5,zero,.L3
>>  fcvt.l.d a5,fa0,rup

I'm not sure exactly what context this is in, but my reading of
"according to the current rounding mode" means we'd usually use the
dynamic rounding mode.



ceil doesn't depend on the current rounding mode; rup is correct.  For
rint, you'd be correct.


Ya, right, that's pretty obvious -- I must have just been falling asleep 
this morning.



>>  fcvt.d.lfa4,a5
>>  fsgnj.d fa0,fa4,fa0
>> .L3:
>>  ret
>>
>> .LC0:
>>  .word   0
>>  .word   1127219200 // 0x4330
>>
>>
>> The patch I have evaluated on SPEC2017.
>> Counted dynamic instructions counts and got the following improvements
>>
>> 510.parest_r   262 m  -
>> 511.povray_r  2.1  b0.04%
>> 521.wrt_r269 m   -
>> 526.blender_r3 b 0.1%
>> 527.cam4_r   15 b   0.6%
>> 538.imagick_r365 b 7.6%
>>
>> Overall executed 385 billion fewer instructions which is 0.5%.
> A few more notes.
>
> The sequence Jivan is using is derived from LLVM.  The condition in the
> generated code tests for those values were are supposed to pass through
> unaltered.  The condition in the pattern ensures we do something
> sensible WRT FE_INEXACT and mirrors how other ports handle these insns.

My only worry here is that when we were doing the other patterns we
decided not to do rint.  I can't remember exactly why, but from reading
the docs we probably just skipped it due to the inexact handling and Zfa
having an instruction that just does this.  FP stuff is always a bit of
a time sink, so at that point it probably just fell off the priority
list.

I'm not really an FP guy, so I usually just poke around what the other
ports generate and try to figure out what's going on.  arm64 has the Zfa
instruction and x86 FP is complicated, so I'm not sure exactly who else
to look at for this sort of stuff.  From just looking at the code,
though, I think there's two issues -- I'm not really an FP person,
though, so take this with a grain of salt:

IIUC what we've got here doesn't actually set the inexact flag for the
bounds check failure case



I think the original code was correct.  If you exceed the bounds, then by
construction you know that the input was already an integer, and so not
setting NX is correct.  If you didn't exceed the bounds, then fcvt.l.d will
set NX when appropriate.

, as we're just loading up an exact constant

and doing the bounds check.  We're also not clamping to INT_MAX-type
values, but not sure if we're supposed to.  I think we could fix both of
those by adjusting the expansion to something like

  fabs.d  fa4,fa0
  lui a5,%hi(.LC0)
  fld fa3,%lo(.LC0)(a5)
  flt.d   a5,fa4,fa3
  bne a5,zero,.L3
  mv  fa0, fa3
 .L3:
  fcvt.l.d a5,fa0,rup
  fcvt.d.lfa4,a5
  fsgnj.d fa0,fa4,fa0
  ret

and then adjusting the constant to be an epsilon larger than INT_MAX so
it'll still trigger the clamping but also inexact.



ceil/rint/etc. are supposed to work for the whole range of their
floating-point type; the range of the integral types isn't supposed to
affect the result.  The original code was correct in this regard, too.


Ya, sorry, I think I just misunderstood what was going on here -- it's 
not INT_MAX, it's just the largest FP values for which mantissas might 
produce non-integral values.



There's also a pair of changes to the ISA in 2020 that added the
conversion inexact handling requirement, it was a grey area before.  I
don't remember exactly what happened there, but I did remember it
happening.  I don't think anyone cares all that much about the
performance of systems that target the older ISAs, so maybe we just
restrict the non-libcall expansion to ISAs that contain the new wording?

commit 5890a1a702abf4157d5879717a39d8ecdae0de68 (tag:
draft-20201229-5890a1a)
Author: Andrew Waterman 
Date:   Mon Dec 28 18:19:44 2020 -0800

Clarify when FP conversions raise the Inexact flag

diff --git a/src/f.tex b/src/f.tex
index 8649d75..97c253b 100644
--- a/src/f.tex
+++ b/src/f.tex
@@ -594,9 +594,9 @@ instructions round according to the {\em rm}
field.  A 

New German PO file for 'gcc' (version 14.1-b20240218)

2024-03-19 Thread Translation Project Robot
Hello, gentle maintainer.

This is a message from the Translation Project robot.

A revised PO file for textual domain 'gcc' has been submitted
by the German team of translators.  The file is available at:

https://translationproject.org/latest/gcc/de.po

(This file, 'gcc-14.1-b20240218.de.po', has just now been sent to you in
a separate email.)

All other PO files for your package are available in:

https://translationproject.org/latest/gcc/

Please consider including all of these in your next release, whether
official or a pretest.

Whenever you have a new distribution with a new version number ready,
containing a newer POT file, please send the URL of that distribution
tarball to the address below.  The tarball may be just a pretest or a
snapshot, it does not even have to compile.  It is just used by the
translators when they need some extra translation context.

The following HTML page has been updated:

https://translationproject.org/domain/gcc.html

If any question arises, please contact the translation coordinator.

Thank you for all your work,

The Translation Project robot, in the
name of your translation coordinator.




[pushed] analyzer: fix ICE due to corrupt MEM_REFs [PR113505]

2024-03-19 Thread David Malcolm
From: Jakub Jelinek 

Jakub wrote this patch for PR analyzer/113505.

Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.
Successful run of analyzer integration tests on x86_64-pc-linux-gnu.
Pushed to trunk as r14-9555-gc87f1f3d660f41.

gcc/analyzer/ChangeLog
PR analyzer/113505
* region-model.cc (get_tree_for_byte_offset,
region_model::get_representative_path_var_1,
test_mem_ref, test_POINTER_PLUS_EXPR_then_MEM_REF): Use
char __attribute__((may_alias)) * as type of MEM_REF second argument.

gcc/testsuite/ChangeLog
PR analyzer/113505
* gcc.dg/analyzer/pr113505.c: New test.
---
 gcc/analyzer/region-model.cc | 16 ++--
 gcc/testsuite/gcc.dg/analyzer/pr113505.c | 24 
 2 files changed, 34 insertions(+), 6 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/analyzer/pr113505.c

diff --git a/gcc/analyzer/region-model.cc b/gcc/analyzer/region-model.cc
index f079d1fb37e..8fff5324173 100644
--- a/gcc/analyzer/region-model.cc
+++ b/gcc/analyzer/region-model.cc
@@ -3967,9 +3967,10 @@ static tree
 get_tree_for_byte_offset (tree ptr_expr, byte_offset_t byte_offset)
 {
   gcc_assert (ptr_expr);
+  tree ptype = build_pointer_type_for_mode (char_type_node, ptr_mode, true);
   return fold_build2 (MEM_REF,
  char_type_node,
- ptr_expr, wide_int_to_tree (size_type_node, byte_offset));
+ ptr_expr, wide_int_to_tree (ptype, byte_offset));
 }
 
 /* Simulate a series of reads of REG until we find a 0 byte
@@ -5360,9 +5361,10 @@ region_model::get_representative_path_var_1 (const 
region *reg,
tree addr_parent = build1 (ADDR_EXPR,
   build_pointer_type (reg->get_type ()),
   parent_pv.m_tree);
-   return path_var (build2 (MEM_REF,
-reg->get_type (),
-addr_parent, offset_pv.m_tree),
+   tree ptype = build_pointer_type_for_mode (char_type_node, ptr_mode,
+ true);
+   return path_var (build2 (MEM_REF, reg->get_type (), addr_parent,
+fold_convert (ptype, offset_pv.m_tree)),
 parent_pv.m_stack_depth);
   }
 
@@ -9024,7 +9026,8 @@ test_mem_ref ()
 
   tree int_17 = build_int_cst (integer_type_node, 17);
   tree addr_of_x = build1 (ADDR_EXPR, int_star, x);
-  tree offset_0 = build_int_cst (integer_type_node, 0);
+  tree ptype = build_pointer_type_for_mode (char_type_node, ptr_mode, true);
+  tree offset_0 = build_int_cst (ptype, 0);
   tree star_p = build2 (MEM_REF, integer_type_node, p, offset_0);
 
   region_model_manager mgr;
@@ -9074,7 +9077,8 @@ test_POINTER_PLUS_EXPR_then_MEM_REF ()
   tree a = build_global_decl ("a", int_star);
   tree offset_12 = build_int_cst (size_type_node, 12);
   tree pointer_plus_expr = build2 (POINTER_PLUS_EXPR, int_star, a, offset_12);
-  tree offset_0 = build_int_cst (integer_type_node, 0);
+  tree ptype = build_pointer_type_for_mode (char_type_node, ptr_mode, true);
+  tree offset_0 = build_int_cst (ptype, 0);
   tree mem_ref = build2 (MEM_REF, integer_type_node,
 pointer_plus_expr, offset_0);
   region_model_manager mgr;
diff --git a/gcc/testsuite/gcc.dg/analyzer/pr113505.c 
b/gcc/testsuite/gcc.dg/analyzer/pr113505.c
new file mode 100644
index 000..58a2b6cd6f4
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/analyzer/pr113505.c
@@ -0,0 +1,24 @@
+/* PR analyzer/113505 */
+/* { dg-additional-options "-O -fdump-analyzer" } */
+
+enum E **foo () __attribute__((__const__));
+char a[2];
+void bar (char *);
+
+void
+baz (void)
+{
+  char *s, *l;
+  for (;;)
+{
+  bar (a);
+  s = a;
+  while (foo ()[*s])
+   s++;
+  l = s;
+  *l++ = '\0';
+  while (foo ()[*l])
+   l++;
+  bar (s);
+}
+}
-- 
2.26.3



Re: [gcc-15 2/3] RISC-V: avoid LUI based const mat: keep stack offsets aligned

2024-03-19 Thread Vineet Gupta



On 3/19/24 06:10, Jeff Law wrote:
> On 3/19/24 12:48 AM, Andrew Waterman wrote:
>> On Mon, Mar 18, 2024 at 5:28 PM Vineet Gupta  wrote:
>>> On 3/16/24 13:21, Jeff Law wrote:
 |   59944:add s0,sp,2047  <
 |   59948:mv  a2,a0
 |   5994c:mv  a3,a1
 |   59950:mv  a0,sp
 |   59954:li  a4,1
 |   59958:lui a1,0x1
 |   5995c:add s0,s0,1 <---
 |   59960:jal 59a3c

 SP here becomes unaligned, even if transitively which is undesirable as
 well as incorrect:
- ABI requires stack to be 8 byte aligned
- asm code looks weird and unexpected
- to the user it might falsely seem like a compiler bug even when not,
  specially when staring at asm for debugging unrelated issue.
 It's not ideal, but I think it's still ABI compliant as-is.  If it
 wasn't, then I suspect things like virtual origins in Ada couldn't be
 made ABI compliant.
>>> To be clear are u suggesting ADD sp, sp, 2047 is ABI compliant ?
>>> I'd still like to avoid it as I'm sure someone will complain about it.
>>>
> With the patch, we get following correct code instead:
>
> | ..
> | 59944: add s0,sp,2032
> | ..
> | 5995c: add s0,s0,16
 Alternately you could tighten the positive side of the range of the
 splitter from patch 1/3 so that you could always use 2032 rather than
 2047 on the first addi.   ie instead of allowing 2048..4094, allow
 2048..4064.
>>> 2033..4064 vs. 2048..4094
>>>
>>> Yeah I was a bit split about this as well. Since you are OK with either,
>>> I'll keep them as-is and perhaps add this observation to commitlog.
>> There's a subset of embedded use cases where an interrupt service
>> routine continues on the same stack as the interrupted thread,
>> requiring sp to always have an ABI-compliant value (i.e. 16B aligned,
>> and with no important data on the stack at an address below sp).
>>
>> Although not all use cases care about this property, it seems more
>> straightforward to maintain the invariant everywhere, rather than
>> selectively enforce it.
> Just to be clear, the changes don't misalign the stack pointer at all. 
> They merely have the potential to create *another* pointer into the 
> stack which may or may not be aligned.  Which is totally normal, it's no 
> different than taking the address of a char on the stack.

Right I never saw any sp,sp,2047 getting generated - not even in the
first version of patch which lacked any filtering of stack regs via
riscv_reg_frame_related () and obviously didn't have the stack variant
of splitter. I don't know if that is just being lucky and not enough
testing exposure (I only spot checked buildroot libc, vmlinux) or
something somewhere enforces that.

However given that misaligned pointer off of stack is a non-issue, I
think we can do the following:

1. keep just one splitter with 2047 based predicates and constraint (and
not 2032) for both stack-related and general regs.
2. gate the splitter on only operands[0] being not stack related
(currently it checks for either [0] or [1]) - this allows the prominent
case where SP is simply a src, and avoids when any potential shenanigans
to SP itself.

-Vineet


Re: RISC-V: Use convert instructions instead of calling library functions

2024-03-19 Thread Andrew Waterman
On Tue, Mar 19, 2024 at 9:23 AM Palmer Dabbelt  wrote:

> On Mon, 18 Mar 2024 20:50:14 PDT (-0700), jeffreya...@gmail.com wrote:
> >
> >
> > On 3/18/24 3:09 AM, Jivan Hakobyan wrote:
> >> As RV has round instructions it is reasonable to use them instead of
> >> calling the library functions.
> >>
> >> With my patch for the following C code:
> >> double foo(double a) {
> >>  return ceil(a);
> >> }
> >>
> >> GCC generates the following ASM code (before it was tail call)
> >> foo:
> >>  fabs.d  fa4,fa0
> >>  lui a5,%hi(.LC0)
> >>  fld fa3,%lo(.LC0)(a5)
> >>  flt.d   a5,fa4,fa3
> >>  beq a5,zero,.L3
> >>  fcvt.l.d a5,fa0,rup
>
> I'm not sure exactly what context this is in, but my reading of
> "according to the current rounding mode" means we'd usually use the
> dynamic rounding mode.
>

ceil doesn't depend on the current rounding mode; rup is correct.  For
rint, you'd be correct.


>
> >>  fcvt.d.lfa4,a5
> >>  fsgnj.d fa0,fa4,fa0
> >> .L3:
> >>  ret
> >>
> >> .LC0:
> >>  .word   0
> >>  .word   1127219200 // 0x4330
> >>
> >>
> >> The patch I have evaluated on SPEC2017.
> >> Counted dynamic instructions counts and got the following improvements
> >>
> >> 510.parest_r   262 m  -
> >> 511.povray_r  2.1  b0.04%
> >> 521.wrt_r269 m   -
> >> 526.blender_r3 b 0.1%
> >> 527.cam4_r   15 b   0.6%
> >> 538.imagick_r365 b 7.6%
> >>
> >> Overall executed 385 billion fewer instructions which is 0.5%.
> > A few more notes.
> >
> > The sequence Jivan is using is derived from LLVM.  The condition in the
> > generated code tests for those values were are supposed to pass through
> > unaltered.  The condition in the pattern ensures we do something
> > sensible WRT FE_INEXACT and mirrors how other ports handle these insns.
>
> My only worry here is that when we were doing the other patterns we
> decided not to do rint.  I can't remember exactly why, but from reading
> the docs we probably just skipped it due to the inexact handling and Zfa
> having an instruction that just does this.  FP stuff is always a bit of
> a time sink, so at that point it probably just fell off the priority
> list.
>
> I'm not really an FP guy, so I usually just poke around what the other
> ports generate and try to figure out what's going on.  arm64 has the Zfa
> instruction and x86 FP is complicated, so I'm not sure exactly who else
> to look at for this sort of stuff.  From just looking at the code,
> though, I think there's two issues -- I'm not really an FP person,
> though, so take this with a grain of salt:
>
> IIUC what we've got here doesn't actually set the inexact flag for the
> bounds check failure case


I think the original code was correct.  If you exceed the bounds, then by
construction you know that the input was already an integer, and so not
setting NX is correct.  If you didn't exceed the bounds, then fcvt.l.d will
set NX when appropriate.

, as we're just loading up an exact constant
> and doing the bounds check.  We're also not clamping to INT_MAX-type
> values, but not sure if we're supposed to.  I think we could fix both of
> those by adjusting the expansion to something like
>
>   fabs.d  fa4,fa0
>   lui a5,%hi(.LC0)
>   fld fa3,%lo(.LC0)(a5)
>   flt.d   a5,fa4,fa3
>   bne a5,zero,.L3
>   mv  fa0, fa3
>  .L3:
>   fcvt.l.d a5,fa0,rup
>   fcvt.d.lfa4,a5
>   fsgnj.d fa0,fa4,fa0
>   ret
>
> and then adjusting the constant to be an epsilon larger than INT_MAX so
> it'll still trigger the clamping but also inexact.
>

ceil/rint/etc. are supposed to work for the whole range of their
floating-point type; the range of the integral types isn't supposed to
affect the result.  The original code was correct in this regard, too.


>
> There's also a pair of changes to the ISA in 2020 that added the
> conversion inexact handling requirement, it was a grey area before.  I
> don't remember exactly what happened there, but I did remember it
> happening.  I don't think anyone cares all that much about the
> performance of systems that target the older ISAs, so maybe we just
> restrict the non-libcall expansion to ISAs that contain the new wording?
>
> commit 5890a1a702abf4157d5879717a39d8ecdae0de68 (tag:
> draft-20201229-5890a1a)
> Author: Andrew Waterman 
> Date:   Mon Dec 28 18:19:44 2020 -0800
>
> Clarify when FP conversions raise the Inexact flag
>
> diff --git a/src/f.tex b/src/f.tex
> index 8649d75..97c253b 100644
> --- a/src/f.tex
> +++ b/src/f.tex
> @@ -594,9 +594,9 @@ instructions round according to the {\em rm}
> field.  A floating-point register
>  can be initialized to floating-point positive zero using FCVT.S.W
> {\em rd},
>  {\tt x0}, which 

Re: [PATCH] Document -fexcess-precision=16.

2024-03-19 Thread Joseph Myers
On Tue, 19 Mar 2024, Hongtao Liu wrote:

> On Tue, Mar 19, 2024 at 12:16 AM Joseph Myers  wrote:
> >
> > On Mon, 18 Mar 2024, liuhongt wrote:
> >
> > > +If @option{-fexcess-precision=16} is specified, casts and assignments of
> > > +@code{_Float16} and @code{bfloat16_t} cause value to be rounded to their
> > > +semantic types if they're supported by the target.
> >
> > Isn't that option about rounding results of all operations, whether or not
> > a cast or assignment is involved?  That's certainly what the brief mention
> > of this option in extend.texi says, and fits the intent that
> > -fexcess-precision=16 corresponds to FLT_EVAL_METHOD == 16.
> Yes, how about this.
> 
> 
> +If @option{-fexcess-precision=16} is specified, each operation of
> +@code{_Float16} and @code{bfloat16_t} causes value to be rounded to their
> +semantic types if they're supported by the target.

I'm sure sure it's quite right to say the operation causes the value to be 
rounded, considering it's results not arguments that are rounded, and from 
the user perspective the rounding is an implementation detail, it's just 
computing the results in the semantic type.  So maybe more like:

  If @option{-fexcess-precision=16} is specified, constants and the
  results of expressions with types @code{_Float16} and @code{__bf16}
  are computed without excess precision.

(Also, I've suggested there to use the built-in type name __bf16; the name 
__bfloat16 from avx512bf16vlintrin.h would also be a possibility.  As far 
as I know, the bfloat16_t name is only in C++.)

-- 
Joseph S. Myers
josmy...@redhat.com

Re: [PATCH] c++: direct-init of an array of class type [PR59465]

2024-03-19 Thread Marek Polacek
Ping.  Though I reckon it may be better to defer this to 15.

On Fri, Mar 01, 2024 at 07:58:51PM -0500, Marek Polacek wrote:
> Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?  I don't
> claim that this has to go to 14 though.
> 
> -- >8 --
> ...from another array in a mem-initializer should not be accepted.
> 
> We already reject
> 
>   struct string {} a[1];
>   string x[1](a);
> 
> but
> 
>   struct pair {
> string s[1];
> pair() : s(a) {}
>   };
> 
> is wrongly accepted.
> 
> It started to be accepted with r0-110915-ga034826198b771:
> 
> which was supposed to be a cleanup, not a deliberate change to start
> accepting the code.  The build_vec_init_expr code was added in r165976:
> .
> 
> It appears that we do the magic copy array when we have a defaulted
> constructor and we generate code for its mem-initializer which
> initializes an array.  I also see that we go that path for compound
> literals.  So when initializing an array member, we can limit building
> up a VEC_INIT_EXPR to those special cases.
> 
>   PR c++/59465
> 
> gcc/cp/ChangeLog:
> 
>   * init.cc (can_init_array_with_p): New.
>   (perform_member_init): Check it.
> 
> gcc/testsuite/ChangeLog:
> 
>   * g++.dg/init/array62.C: New test.
>   * g++.dg/init/array63.C: New test.
>   * g++.dg/init/array64.C: New test.
> ---
>  gcc/cp/init.cc  | 27 ++-
>  gcc/testsuite/g++.dg/init/array62.C | 19 +++
>  gcc/testsuite/g++.dg/init/array63.C | 13 +
>  gcc/testsuite/g++.dg/init/array64.C | 22 ++
>  4 files changed, 80 insertions(+), 1 deletion(-)
>  create mode 100644 gcc/testsuite/g++.dg/init/array62.C
>  create mode 100644 gcc/testsuite/g++.dg/init/array63.C
>  create mode 100644 gcc/testsuite/g++.dg/init/array64.C
> 
> diff --git a/gcc/cp/init.cc b/gcc/cp/init.cc
> index d2586fad86b..fb8c0e521fb 100644
> --- a/gcc/cp/init.cc
> +++ b/gcc/cp/init.cc
> @@ -934,6 +934,31 @@ find_uninit_fields (tree *t, hash_set 
> *uninitialized, tree member)
>  }
>  }
>  
> +/* Return true if it's OK to initialize an array from INIT.  Mere mortals
> +   can't copy arrays, but the compiler can do so with a VEC_INIT_EXPR in
> +   certain cases.  */
> +
> +static bool
> +can_init_array_with_p (tree init)
> +{
> +  if (!init)
> +return true;
> +
> +  /* We're called from synthesize_method, and we're processing the
> + mem-initializers of a constructor.  */
> +  if (DECL_DEFAULTED_FN (current_function_decl))
> +return true;
> +  /* As an extension, we allow copying from a compound literal.  */
> +  else if (TREE_CODE (init) == TARGET_EXPR)
> +{
> +  init = TARGET_EXPR_INITIAL (init);
> +  if (TREE_CODE (init) == CONSTRUCTOR)
> + return CONSTRUCTOR_C99_COMPOUND_LITERAL (init);
> +}
> +
> +  return false;
> +}
> +
>  /* Initialize MEMBER, a FIELD_DECL, with INIT, a TREE_LIST of
> arguments.  If TREE_LIST is void_type_node, an empty initializer
> list was given; if NULL_TREE no initializer was given.  UNINITIALIZED
> @@ -1085,7 +1110,7 @@ perform_member_init (tree member, tree init, 
> hash_set )
>else if (type_build_ctor_call (type)
>  || (init && CLASS_TYPE_P (strip_array_types (type
>  {
> -  if (TREE_CODE (type) == ARRAY_TYPE)
> +  if (TREE_CODE (type) == ARRAY_TYPE && can_init_array_with_p (init))
>   {
> if (init == NULL_TREE
> || same_type_ignoring_top_level_qualifiers_p (type,
> diff --git a/gcc/testsuite/g++.dg/init/array62.C 
> b/gcc/testsuite/g++.dg/init/array62.C
> new file mode 100644
> index 000..6d3935d7a66
> --- /dev/null
> +++ b/gcc/testsuite/g++.dg/init/array62.C
> @@ -0,0 +1,19 @@
> +// PR c++/59465
> +// { dg-do compile }
> +
> +struct string {} a[1];
> +struct pair {
> +  string s[1];
> +  pair() : s(a) {} // { dg-error "array must be initialized" }
> +};
> +
> +struct S {
> +  char s[10];
> +  S() : s("aaa") {}
> +};
> +
> +void
> +g ()
> +{
> +  string x[1](a); // { dg-error "array must be initialized" }
> +}
> diff --git a/gcc/testsuite/g++.dg/init/array63.C 
> b/gcc/testsuite/g++.dg/init/array63.C
> new file mode 100644
> index 000..96bc9a64b26
> --- /dev/null
> +++ b/gcc/testsuite/g++.dg/init/array63.C
> @@ -0,0 +1,13 @@
> +// PR c++/59465
> +// { dg-do compile }
> +
> +struct I {
> +const bool b;
> +};
> +struct O {
> +I a[2];
> +static I const data[2];
> +O() : a(data){}  // { dg-error "array must be initialized" }
> +};
> +
> +I const O::data[2] = {true, false};
> diff --git a/gcc/testsuite/g++.dg/init/array64.C 
> b/gcc/testsuite/g++.dg/init/array64.C
> new file mode 100644
> index 000..bbdd70c6df8
> --- /dev/null
> +++ b/gcc/testsuite/g++.dg/init/array64.C
> @@ -0,0 +1,22 @@
> +// PR c++/59465
> +// { dg-do compile }
> +
> +static const int 

Re: [PATCH v3] c++: ICE with temporary of class type in array DMI [PR109966]

2024-03-19 Thread Marek Polacek
On Thu, Mar 14, 2024 at 05:26:59PM -0400, Marek Polacek wrote:
> @@ -1441,11 +1406,13 @@ static tree
>  replace_placeholders_for_class_temp_r (tree *tp, int *, void *data)
>  {
>tree t = *tp;
> -  tree full_expr = *static_cast(data);
> +  auto pset = static_cast *>(data);
>  
>/* We're looking for a TARGET_EXPR nested in the whole expression.  */
>if (TREE_CODE (t) == TARGET_EXPR
> -  && !potential_prvalue_result_of (t, full_expr))
> +  /* That serves as temporary materialization, not an initializer.  */
> +  && !TARGET_EXPR_ELIDING_P (t)
> +  && !pset->add (t))
>  {
>tree init = TARGET_EXPR_INITIAL (t);
>while (TREE_CODE (init) == COMPOUND_EXPR)
> @@ -1460,6 +1427,16 @@ replace_placeholders_for_class_temp_r (tree *tp, int 
> *, void *data)
> gcc_checking_assert (!find_placeholders (init));
>   }
>  }
> +  /* TARGET_EXPRs initializing function arguments are not marked as eliding,
> + even though gimplify_arg drops them on the floor.  Don't go replacing
> + placeholders in them.  */
> +  else if (TREE_CODE (t) == CALL_EXPR || TREE_CODE (t) == AGGR_INIT_EXPR)
> +for (int i = 0; i < call_expr_nargs (t); ++i)
> +  {
> + tree arg = get_nth_callarg (t, i);
> + if (TREE_CODE (arg) == TARGET_EXPR)

I just realized this could also check !TARGET_EXPR_ELIDING_P; there's no point
to adding an eliding TARGET_EXPR into the pset.

> +   pset->add (arg);
> +  }

Marek



[PATCH v2] c++: explicit inst of template method not generated [PR110323]

2024-03-19 Thread Marek Polacek
On Mon, Mar 18, 2024 at 09:10:27PM -0400, Jason Merrill wrote:
> On 3/15/24 13:48, Marek Polacek wrote:
> > On Thu, Mar 14, 2024 at 03:39:04PM -0400, Jason Merrill wrote:
> > > On 3/8/24 12:02, Marek Polacek wrote:
> > > > Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?
> > > > 
> > > > -- >8 --
> > > > Consider
> > > > 
> > > > constexpr int VAL = 1;
> > > > struct foo {
> > > > template 
> > > > void bar(typename std::conditional::type 
> > > > arg) { }
> > > > };
> > > > template void foo::bar<1>(int arg);
> > > > 
> > > > where we since r11-291 fail to emit the code for the explicit
> > > > instantiation.  That's because cp_walk_subtrees/TYPENAME_TYPE now
> > > > walks TYPE_CONTEXT ('conditional' here) as well, and in a template
> > > > finds the B==VAL template argument.  VAL is constexpr, which implies 
> > > > const,
> > > > which in the global scope implies static.  
> > > > constrain_visibility_for_template
> > > > then makes "struct conditional<(B == VAL), int, float>" non-TREE_PUBLIC.
> > > > Then symtab_node::needed_p checks TREE_PUBLIC, sees it's 0, and we don't
> > > > emit any code.
> > > > 
> > > > I thought the fix would be some ODR-esque check to not consider
> > > > constexpr variables/fns that are used just for their value.  But
> > > > it turned out to be tricky.  For instance, we can't skip
> > > > determine_visibility in a template; we can't even skip it for value-dep
> > > > expressions.  For example, no-linkage-expr1.C has
> > > > 
> > > > using P = struct {}*;
> > > > template 
> > > > void f(int(*)[((P)0, N)]) {}
> > > > 
> > > > where ((P)0, N) is value-dep, but N is not relevant here: we have to
> > > > ferret out the anonymous type.  When instantiating, it's already gone.
> > > 
> > > Hmm, how is that different from the B == VAL case?  In both cases we're
> > > naming an internal entity that gets folded away.
> > > 
> > > I guess the difference is that B == VAL falls under the special allowance 
> > > in
> > > https://eel.is/c++draft/basic.def.odr#14.5.1 because it's a constant used 
> > > as
> > > a prvalue, and therefore is not odr-used under
> > > https://eel.is/c++draft/basic.def.odr#5.2
> > > 
> > > So I would limit this change to decl_constant_var_p.  Really we should 
> > > also
> > > be checking that the lvalue-rvalue conversion is applied, but that's more
> > > complicated.
> > 
> > Thanks.  My previous version had it, but it didn't handle
> > 
> >static constexpr int getval () { return 1; }
> > 
> >template 
> >void baz(typename conditional::type arg) { }
> > 
> > I'd say that "getval()" is one of "manifestly constant-evaluated 
> > expressions that
> > are not value-dependent", so it should be treated the same as B == VAL.
> 
> But it doesn't satisfy the 14.5 rule that corresponding names need to refer
> to the same entity; since getval names a function, it doesn't get the
> special exemption from that rule that VAL gets.
> 
> So this should not be treated the same as B == VAL.
 
Thanks for the explanation.

> > I don't know if this is important to handle.  Do you want me to poke 
> > further or
> > should we just go with decl_constant_var_p and leave it at that for now?
> 
> Just decl_constant_var_p.

Here it is:

Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?

-- >8 --
Consider

  constexpr int VAL = 1;
  struct foo {
  template 
  void bar(typename std::conditional::type arg) { }
  };
  template void foo::bar<1>(int arg);

where we since r11-291 fail to emit the code for the explicit
instantiation.  That's because cp_walk_subtrees/TYPENAME_TYPE now
walks TYPE_CONTEXT ('conditional' here) as well, and in a template
finds the B==VAL template argument.  VAL is constexpr, which implies const,
which in the global scope implies static.  constrain_visibility_for_template
then makes "struct conditional<(B == VAL), int, float>" non-TREE_PUBLIC.
Then symtab_node::needed_p checks TREE_PUBLIC, sees it's 0, and we don't
emit any code.

I thought the fix would be some ODR-esque check to not consider
constexpr variables/fns that are used just for their value.  But
it turned out to be tricky.  For instance, we can't skip
determine_visibility in a template; we can't even skip it for value-dep
expressions.  For example, no-linkage-expr1.C has

  using P = struct {}*;
  template 
  void f(int(*)[((P)0, N)]) {}

where ((P)0, N) is value-dep, but N is not relevant here: we have to
ferret out the anonymous type.  When instantiating, it's already gone.

This patch uses decl_constant_var_p.  This is to implement (an
approximation) [basic.def.odr]#14.5.1 and [basic.def.odr]#5.2.

PR c++/110323

gcc/cp/ChangeLog:

* decl2.cc (min_vis_expr_r) : Do nothing for
decl_constant_var_p VAR_DECLs.

gcc/testsuite/ChangeLog:

* g++.dg/template/explicit-instantiation6.C: New test.
* g++.dg/template/explicit-instantiation7.C: New test.
---
 gcc/cp/decl2.cc 

[pushed] diagnostics: fix corrupt json/SARIF on stderr [PR114348]

2024-03-19 Thread David Malcolm
Various values of -fdiagnostics-format= request machine-readable output
on stderr, using JSON, but in various places we use fnotice to write
free-form text to stderr, such as "compilation terminated", leading to
corrupt JSON.

Fix by having fnotice skip the output for such cases.

Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.
Lightly tested manually.
Pushed to trunk as r14-9554-g0bf99b1b7eda2f.

gcc/ChangeLog:
PR middle-end/114348
* diagnostic-format-json.cc
(json_stderr_output_format::machine_readable_stderr_p): New.
(json_file_output_format::machine_readable_stderr_p): New.
* diagnostic-format-sarif.cc
(sarif_stream_output_format::machine_readable_stderr_p): New.
(sarif_file_output_format::machine_readable_stderr_p): New.
* diagnostic.cc (diagnostic_context::action_after_output): Move
"fnotice" to before "finish" call, so that we still have the
diagnostic_context.
(fnotice): Bail out if the user requested one of the
machine-readable diagnostic output formats on stderr.
* diagnostic.h
(diagnostic_output_format::machine_readable_stderr_p): New pure
virtual function.
(diagnostic_text_output_format::machine_readable_stderr_p): New.
(diagnostic_context::get_output_format): New accessor.

Signed-off-by: David Malcolm 
---
 gcc/diagnostic-format-json.cc  |  8 
 gcc/diagnostic-format-sarif.cc |  8 
 gcc/diagnostic.cc  | 12 +++-
 gcc/diagnostic.h   | 10 ++
 4 files changed, 37 insertions(+), 1 deletion(-)

diff --git a/gcc/diagnostic-format-json.cc b/gcc/diagnostic-format-json.cc
index 51e016b6463..0782ae831eb 100644
--- a/gcc/diagnostic-format-json.cc
+++ b/gcc/diagnostic-format-json.cc
@@ -314,6 +314,10 @@ public:
   {
 flush_to_file (stderr);
   }
+  bool machine_readable_stderr_p () const final override
+  {
+return true;
+  }
 };
 
 class json_file_output_format : public json_output_format
@@ -345,6 +349,10 @@ public:
 fclose (outf);
 free (filename);
   }
+  bool machine_readable_stderr_p () const final override
+  {
+return false;
+  }
 
 private:
   char *m_base_file_name;
diff --git a/gcc/diagnostic-format-sarif.cc b/gcc/diagnostic-format-sarif.cc
index 307b2f56c28..97c5943cd33 100644
--- a/gcc/diagnostic-format-sarif.cc
+++ b/gcc/diagnostic-format-sarif.cc
@@ -1750,6 +1750,10 @@ public:
   {
 m_builder.flush_to_file (m_stream);
   }
+  bool machine_readable_stderr_p () const final override
+  {
+return m_stream == stderr;
+  }
 private:
   FILE *m_stream;
 };
@@ -1782,6 +1786,10 @@ public:
 fclose (outf);
 free (filename);
   }
+  bool machine_readable_stderr_p () const final override
+  {
+return false;
+  }
 
 private:
   char *m_base_file_name;
diff --git a/gcc/diagnostic.cc b/gcc/diagnostic.cc
index 1d143fa7498..8e4621f8031 100644
--- a/gcc/diagnostic.cc
+++ b/gcc/diagnostic.cc
@@ -802,8 +802,8 @@ diagnostic_context::action_after_output (diagnostic_t 
diag_kind)
 case DK_FATAL:
   if (m_abort_on_error)
real_abort ();
-  finish ();
   fnotice (stderr, "compilation terminated.\n");
+  finish ();
   exit (FATAL_EXIT_CODE);
 
 default:
@@ -2264,6 +2264,16 @@ diagnostic_context::emit_diagram (const 
diagnostic_diagram )
 void
 fnotice (FILE *file, const char *cmsgid, ...)
 {
+  /* If the user requested one of the machine-readable diagnostic output
+ formats on stderr (e.g. -fdiagnostics-format=sarif-stderr), then
+ emitting free-form text on stderr will lead to corrupt output.
+ Skip the message for such cases.  */
+  if (file == stderr && global_dc)
+if (const diagnostic_output_format *output_format
+ = global_dc->get_output_format ())
+  if (output_format->machine_readable_stderr_p ())
+   return;
+
   va_list ap;
 
   va_start (ap, cmsgid);
diff --git a/gcc/diagnostic.h b/gcc/diagnostic.h
index 0a7c7e02b37..065ac784e25 100644
--- a/gcc/diagnostic.h
+++ b/gcc/diagnostic.h
@@ -210,6 +210,7 @@ public:
   virtual void on_end_diagnostic (const diagnostic_info &,
  diagnostic_t orig_diag_kind) = 0;
   virtual void on_diagram (const diagnostic_diagram ) = 0;
+  virtual bool machine_readable_stderr_p () const = 0;
 
 protected:
   diagnostic_output_format (diagnostic_context )
@@ -238,6 +239,10 @@ public:
   void on_end_diagnostic (const diagnostic_info &,
  diagnostic_t orig_diag_kind) override;
   void on_diagram (const diagnostic_diagram ) override;
+  bool machine_readable_stderr_p () const final override
+  {
+return false;
+  }
 };
 
 /* A stack of sets of classifications: each entry in the stack is
@@ -432,6 +437,11 @@ public:
 
   void emit_diagram (const diagnostic_diagram );
 
+  const diagnostic_output_format *get_output_format () const
+  {
+return m_output_format;
+  }
+
   /* Various setters for use by option-handling 

Re: ping: [PATCH] diagnostics: Fix behavior of permerror options after diagnostic pop [PR111918]

2024-03-19 Thread David Malcolm
On Tue, 2024-03-19 at 09:03 -0400, Lewis Hyatt wrote:
> https://gcc.gnu.org/pipermail/gcc-patches/2023-November/638692.html

Sorry about the delay.

The patch looks good for trunk, assuming it's passed the usual
bootstrap and regression testing.

Thanks
Dave

> 
> Thanks!
> 
> On Fri, Feb 16, 2024 at 7:02 PM Lewis Hyatt  wrote:
> > https://gcc.gnu.org/pipermail/gcc-patches/2023-November/638692.html
> > 
> > On Thu, Jan 25, 2024 at 4:57 PM Lewis Hyatt 
> > wrote:
> > > 
> > > May I please ask again about this one? It's just a couple lines,
> > > and I
> > > think it fixes an important gap in the logic for #pragma GCC
> > > diagnostic. The PR was not reported by me so I think at least one
> > > other person does care about it :). Thanks!
> > > 
> > > https://gcc.gnu.org/pipermail/gcc-patches/2023-November/638692.html
> > > 
> > > -Lewis
> > > 
> > > On Mon, Jan 8, 2024 at 6:53 PM Lewis Hyatt 
> > > wrote:
> > > > 
> > > > Can I please ping this one again? It's 3 lines or so to fix the
> > > > PR. Thanks!
> > > > https://gcc.gnu.org/pipermail/gcc-patches/2023-November/638692.html
> > > > 
> > > > On Tue, Dec 19, 2023 at 6:20 PM Lewis Hyatt 
> > > > wrote:
> > > > > 
> > > > > Hello-
> > > > > 
> > > > > May I please ping this one? Thanks...
> > > > > https://gcc.gnu.org/pipermail/gcc-patches/2023-November/638692.html
> > > > > 
> > > > > -Lewis
> > > > > 
> > > > > On Wed, Nov 29, 2023 at 7:05 PM Lewis Hyatt
> > > > >  wrote:
> > > > > > 
> > > > > > On Thu, Nov 09, 2023 at 04:16:10PM -0500, Lewis Hyatt
> > > > > > wrote:
> > > > > > > https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111918
> > > > > > > 
> > > > > > > This patch fixes the behavior of `#pragma GCC diagnostic
> > > > > > > pop' for permissive
> > > > > > > error diagnostics such as -Wnarrowing (in C++11). Those
> > > > > > > currently do not
> > > > > > > return to the correct state after the last pop; they
> > > > > > > become effectively
> > > > > > > simple warnings instead. Bootstrap + regtest all
> > > > > > > languages on x86-64, does
> > > > > > > it look OK please? Thanks!
> > > > > > 
> > > > > > Hello-
> > > > > > 
> > > > > > May I please ping this bug fix?
> > > > > > https://gcc.gnu.org/pipermail/gcc-patches/2023-November/635871.html
> > > > > > 
> > > > > > Please note, it requires a trivial rebase on top of recent
> > > > > > changes to
> > > > > > the class diagnostic_context public interface. I attached
> > > > > > the rebased patch
> > > > > > here as well. Thanks!
> > > > > > 
> > > > > > -Lewis
> 



Re: [PATCH v3 2/2] fortran: Fix specification expression error with dummy procedures [PR111781]

2024-03-19 Thread Paul Richard Thomas
Hi Mikael,

This is very good. I am pleased to see global variables disappear and I
like the new helper functions.

As before, OK for mainline and, if you wish, 13-branch.

Thanks

Paul


On Tue, 19 Mar 2024 at 15:49, Mikael Morin  wrote:

> This fixes a spurious invalid variable in specification expression error.
> The error was caused on the testcase from the PR by two different bugs.
> First, the call to is_parent_of_current_ns was unable to recognize
> correct host association and returned false.  Second, an ad-hoc
> condition coming next was using a global variable previously improperly
> restored to false (instead of restoring it to its initial value).  The
> latter happened on the testcase because one dummy argument was a procedure,
> and checking that argument what causing a check of all its arguments with
> the (improper) reset of the flag at the end, and that preceded the check of
> the next argument.
>
> For the first bug, the wrong result of is_parent_of_current_ns is fixed by
> correcting the namespaces that function deals with, both the one passed
> as argument and the current one tracked in the gfc_current_ns global.  Two
> new functions are introduced to select the right namespace.
>
> Regarding the second bug, the problematic condition is removed, together
> with the formal_arg_flag associated with it.  Indeed, that condition was
> (wrongly) allowing local variables to be used in array bounds of dummy
> arguments.
>
> PR fortran/111781
>
> gcc/fortran/ChangeLog:
>
> * symbol.cc (gfc_get_procedure_ns, gfc_get_spec_ns): New functions.
> * gfortran.h (gfc_get_procedure_ns, gfc_get_spec ns): Declare them.
> (gfc_is_formal_arg): Remove.
> * expr.cc (check_restricted): Remove special case allowing local
> variable in dummy argument bound expressions.  Use gfc_get_spec_ns
> to get the right namespace.
> * resolve.cc (gfc_is_formal_arg, formal_arg_flag): Remove.
> (gfc_resolve_formal_arglist): Set gfc_current_ns.  Quit loop and
> restore gfc_current_ns instead of early returning.
> (resolve_symbol): Factor common array spec resolution code to...
> (resolve_symbol_array_spec): ... this new function.  Additionnally
> set and restore gfc_current_ns.
>
> gcc/testsuite/ChangeLog:
>
> * gfortran.dg/spec_expr_8.f90: New test.
> * gfortran.dg/spec_expr_9.f90: New test.
> ---
>  gcc/fortran/expr.cc   |  8 +--
>  gcc/fortran/gfortran.h|  4 +-
>  gcc/fortran/resolve.cc| 77 +++
>  gcc/fortran/symbol.cc | 58 +
>  gcc/testsuite/gfortran.dg/spec_expr_8.f90 | 24 +++
>  gcc/testsuite/gfortran.dg/spec_expr_9.f90 | 19 ++
>  6 files changed, 140 insertions(+), 50 deletions(-)
>  create mode 100644 gcc/testsuite/gfortran.dg/spec_expr_8.f90
>  create mode 100644 gcc/testsuite/gfortran.dg/spec_expr_9.f90
>
> diff --git a/gcc/fortran/expr.cc b/gcc/fortran/expr.cc
> index e4b1e8307e3..9a042cd7040 100644
> --- a/gcc/fortran/expr.cc
> +++ b/gcc/fortran/expr.cc
> @@ -3514,19 +3514,13 @@ check_restricted (gfc_expr *e)
>if (!check_references (e->ref, _restricted))
> break;
>
> -  /* gfc_is_formal_arg broadcasts that a formal argument list is being
> -processed in resolve.cc(resolve_formal_arglist).  This is done so
> -that host associated dummy array indices are accepted (PR23446).
> -This mechanism also does the same for the specification
> expressions
> -of array-valued functions.  */
>if (e->error
> || sym->attr.in_common
> || sym->attr.use_assoc
> || sym->attr.dummy
> || sym->attr.implied_index
> || sym->attr.flavor == FL_PARAMETER
> -   || is_parent_of_current_ns (sym->ns)
> -   || (gfc_is_formal_arg () && (sym->ns == gfc_current_ns)))
> +   || is_parent_of_current_ns (gfc_get_spec_ns (sym)))
> {
>   t = true;
>   break;
> diff --git a/gcc/fortran/gfortran.h b/gcc/fortran/gfortran.h
> index c7039730fad..26aa56b3358 100644
> --- a/gcc/fortran/gfortran.h
> +++ b/gcc/fortran/gfortran.h
> @@ -3612,6 +3612,9 @@ bool gfc_is_associate_pointer (gfc_symbol*);
>  gfc_symbol * gfc_find_dt_in_generic (gfc_symbol *);
>  gfc_formal_arglist *gfc_sym_get_dummy_args (gfc_symbol *);
>
> +gfc_namespace * gfc_get_procedure_ns (gfc_symbol *);
> +gfc_namespace * gfc_get_spec_ns (gfc_symbol *);
> +
>  /* intrinsic.cc -- true if working in an init-expr, false otherwise.  */
>  extern bool gfc_init_expr_flag;
>
> @@ -3821,7 +3824,6 @@ bool gfc_resolve_iterator (gfc_iterator *, bool,
> bool);
>  bool find_forall_index (gfc_expr *, gfc_symbol *, int);
>  bool gfc_resolve_index (gfc_expr *, int);
>  bool gfc_resolve_dim_arg (gfc_expr *);
> -bool gfc_is_formal_arg (void);
>  bool gfc_resolve_substring (gfc_ref *, bool *);
>  void 

Re: [PATCH v3 0/2] fortran: Fix specification checks [PR111781]

2024-03-19 Thread Paul Richard Thomas
Hi Mikael,

Sorry, I am replying to these in the order that they appear in my intray :-)

OK for mainline and, if you wish, 13-branch.

Thanks

Paul


On Tue, 19 Mar 2024 at 15:49, Mikael Morin  wrote:

> Hello,
>
> these patches correct diagnostics dealing with variables in specification
> expressions.
> The first patch is a testsuite change, which fixes invalid specification
> expressions that the second patch would diagnose.
> The second patch removes a spurious diagnostic when a dummy procedure is
> involved, and enables more valid ones, as visible in the testcases from the
> first patch.
>
> I haven't tested it again (same code as v2), but I plan to do it before
> the final push.
> Ok for master?
>
> Mikael
>
> v2 -> v3 changes:
>
>   - Correct first name in testcase comment
>   - Clarify and correct log and changelog text from second patch
>   - Target current stage (stage4) instead of next (stage1)
>
> v1 -> v2 changes:
>
>   - Fix condition guarding sym->result access.
>
>
> Mikael Morin (2):
>   testsuite: Declare fortran array bound variables
>   fortran: Fix specification expression error with dummy procedures
> [PR111781]
>
>  gcc/fortran/expr.cc   |  8 +-
>  gcc/fortran/gfortran.h|  4 +-
>  gcc/fortran/resolve.cc| 77 +--
>  gcc/fortran/symbol.cc | 58 ++
>  .../gfortran.dg/graphite/pr107865.f90 |  2 +-
>  gcc/testsuite/gfortran.dg/pr101267.f90|  2 +-
>  gcc/testsuite/gfortran.dg/pr112404.f90|  2 +-
>  gcc/testsuite/gfortran.dg/pr78061.f   |  2 +-
>  gcc/testsuite/gfortran.dg/pr79315.f90 |  6 +-
>  gcc/testsuite/gfortran.dg/spec_expr_8.f90 | 24 ++
>  gcc/testsuite/gfortran.dg/spec_expr_9.f90 | 19 +
>  gcc/testsuite/gfortran.dg/vect/pr90681.f  |  2 +-
>  gcc/testsuite/gfortran.dg/vect/pr97761.f90|  2 +-
>  gcc/testsuite/gfortran.dg/vect/pr99746.f90|  2 +-
>  14 files changed, 152 insertions(+), 58 deletions(-)
>  create mode 100644 gcc/testsuite/gfortran.dg/spec_expr_8.f90
>  create mode 100644 gcc/testsuite/gfortran.dg/spec_expr_9.f90
>
> --
> 2.43.0
>
>


Re: [PATCH] cpp: new built-in __EXP_COUNTER__

2024-03-19 Thread Kaz Kylheku
On 2024-03-18 00:30, Jonathan Wakely wrote:
> I don't have an opinion on the implementation, or the proposal itself,
> except that the implementation seems susprisingly simple, which is
> nice.

Hi Jonathan,

Here is an updated patch.

It rebased cleanly over more than newer 16000 commits, suggesting
that the area in the cpp code is "still waters", which is good.

I made the documentation change not to recommend using #if, but
#ifdef.

I got rid of the ChangeLog changes, and also tried to pay more
attention to the log message format, where the ChangeLog pieces
are specified.

In the first test case, I had to adjust the expected warning text
for two lines.

From 65effbcac172e8bb1a89f0621b3de6cdcb8dbab2 Mon Sep 17 00:00:00 2001
From: Kaz Kylheku 
Date: Wed, 20 Apr 2022 01:15:24 -0700
Subject: [PATCH] cpp: new built-in __EXP_COUNTER__

This change introduces a pair of related macros
__EXP_COUNTER__ and __UEXP_COUNTER__.  These macros access
integer values which enumerate macro expansions.
They can be used for the purposes of obtaining, unique
identifiers (within the scope of a translation unit), as a
replacement for unreliable hacks based on __LINE__.

Outside of macro expansions, these macros epand to 1,
so they are easy to test for in portable code that needs
to fall back on something, like __LINE__.

libcpp/ChangeLog:

	* libcpp/include/cpplib.h (struct cpp_macro): New members of
	type long long: exp_number and uexp_number.  These members are
	used to attach the current exp_counter value, and the parent
	context's copy of it to a new macro expansion.  The special
	macros then just access these.
	(enum cpp_builtin_type): New enumeration members,
	BT_EXP_COUNTER and BT_UEXP_COUNTER.

	* libcpp/macro.cc (exp_counter): New static variable for
	counting expansions.  There is an existing one,
	num_expanded_macros_counter, but that has its own purpose and
	is incremented in a specific place.  Plus it uses a narrower
	integer type.
	(_cpp_builtin_number_text): Change the local variable "number"
	from linenum_type to unsigned long long, so it holds at least
	64 bit values.  Handle the BT_EXP_COUNTER and BT_UEXP_COUNTER
	cases.  These just have to see if there is a current macro, and
	retrieve the values from it, otherwise do nothing so that the
	default 1 is produced.  In the case of BT_UEXP_COUNTER, if the
	value is zero, we don't use it, so 1 emerges.  The sprintf of
	the number is adjusted to use the right conversion specifier
	for the wider type.  Space is already being reserved for
	a 64 bit decimal.
	(enter_macro_context): After processing the macro arguments,
	if any, we increment exp_counter and attach its new value to
	the macro's context structure in the exp_number member.  We
	also calculate th emacro's uexp_number: the parent context's
	exp_cnumber.  This is tricky: we have to chase the previous
	macro context.  This works if the macro is object-like, or has
	no parameters.  If it has parameters, there is a parameter
	context, and so we have to climb one more flight of stairs to
	get to the real context.

gcc/ChangeLog:

	* gcc/doc/cpp.texi (__EXP_COUNTER__, __UEXP_COUNTER__):
	Special built-in macros documented.

gcc/testsuite/ChangeLog:

	* gcc.dg/cpp/expcounter1.c: New test.
	* gcc.dg/cpp/expcounter2.c: New test.
	* gcc.dg/cpp/expcounter3.c: New test.
	* gcc.dg/cpp/expcounter4.c: New test.
	* gcc.dg/cpp/expcounter5.c: New test.

Signed-off-by: Kaz Kylheku 
---
 gcc/doc/cpp.texi   | 81 ++
 gcc/testsuite/gcc.dg/cpp/expcounter1.c | 16 +
 gcc/testsuite/gcc.dg/cpp/expcounter2.c | 21 +++
 gcc/testsuite/gcc.dg/cpp/expcounter3.c | 22 +++
 gcc/testsuite/gcc.dg/cpp/expcounter4.c | 22 +++
 gcc/testsuite/gcc.dg/cpp/expcounter5.c | 28 +
 libcpp/include/cpplib.h|  8 +++
 libcpp/init.cc |  2 +
 libcpp/macro.cc| 44 +-
 9 files changed, 242 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/cpp/expcounter1.c
 create mode 100644 gcc/testsuite/gcc.dg/cpp/expcounter2.c
 create mode 100644 gcc/testsuite/gcc.dg/cpp/expcounter3.c
 create mode 100644 gcc/testsuite/gcc.dg/cpp/expcounter4.c
 create mode 100644 gcc/testsuite/gcc.dg/cpp/expcounter5.c

diff --git a/gcc/doc/cpp.texi b/gcc/doc/cpp.texi
index 3de6e7aa737..a21e8b44be1 100644
--- a/gcc/doc/cpp.texi
+++ b/gcc/doc/cpp.texi
@@ -1942,6 +1942,87 @@ generate unique identifiers.  Care must be taken to ensure that
 @code{__COUNTER__} is not expanded prior to inclusion of precompiled headers
 which use it.  Otherwise, the precompiled headers will not be used.
 
+@item __EXP_COUNTER__
+This macro's name means "(macro) expansion counter".
+Outside of macro replacement sequences, it expands to the integer
+token @code{1}.  This make it possible to easily test for the presence
+of this feature using conditional directives such as
+@code{#ifdef __EXP_COUNTER__}.
+When @code{__EXP_COUNTER__} occurs in the replacement 

Re: [PATCH v3 1/2] testsuite: Declare fortran array bound variables

2024-03-19 Thread Paul Richard Thomas
Hi Mikael,

This looks completely "obvious" to me. OK for mainline and, I would
suggest, 13-branch.

Thanks

Paul



On Tue, 19 Mar 2024 at 15:49, Mikael Morin  wrote:

> This fixes invalid undeclared fortran array bound variables
> in the testsuite.
>
> gcc/testsuite/ChangeLog:
>
> * gfortran.dg/graphite/pr107865.f90: Declare array bound
> variable(s)
> as dummy argument(s).
> * gfortran.dg/pr101267.f90: Likewise.
> * gfortran.dg/pr112404.f90: Likewise.
> * gfortran.dg/pr78061.f: Likewise.
> * gfortran.dg/pr79315.f90: Likewise.
> * gfortran.dg/vect/pr90681.f: Likewise.
> * gfortran.dg/vect/pr97761.f90: Likewise.
> * gfortran.dg/vect/pr99746.f90: Likewise.
> ---
>  gcc/testsuite/gfortran.dg/graphite/pr107865.f90 | 2 +-
>  gcc/testsuite/gfortran.dg/pr101267.f90  | 2 +-
>  gcc/testsuite/gfortran.dg/pr112404.f90  | 2 +-
>  gcc/testsuite/gfortran.dg/pr78061.f | 2 +-
>  gcc/testsuite/gfortran.dg/pr79315.f90   | 6 +-
>  gcc/testsuite/gfortran.dg/vect/pr90681.f| 2 +-
>  gcc/testsuite/gfortran.dg/vect/pr97761.f90  | 2 +-
>  gcc/testsuite/gfortran.dg/vect/pr99746.f90  | 2 +-
>  8 files changed, 12 insertions(+), 8 deletions(-)
>
> diff --git a/gcc/testsuite/gfortran.dg/graphite/pr107865.f90
> b/gcc/testsuite/gfortran.dg/graphite/pr107865.f90
> index 6bddb17a1be..323d8092ad2 100644
> --- a/gcc/testsuite/gfortran.dg/graphite/pr107865.f90
> +++ b/gcc/testsuite/gfortran.dg/graphite/pr107865.f90
> @@ -1,7 +1,7 @@
>  ! { dg-do compile }
>  ! { dg-options "-O1 -floop-parallelize-all -ftree-parallelize-loops=2" }
>
> -  SUBROUTINE FNC (F)
> +  SUBROUTINE FNC (F,N)
>
>IMPLICIT REAL (A-H)
>DIMENSION F(N)
> diff --git a/gcc/testsuite/gfortran.dg/pr101267.f90
> b/gcc/testsuite/gfortran.dg/pr101267.f90
> index 12723cf9c22..99a6dcfa342 100644
> --- a/gcc/testsuite/gfortran.dg/pr101267.f90
> +++ b/gcc/testsuite/gfortran.dg/pr101267.f90
> @@ -1,7 +1,7 @@
>  ! { dg-do compile }
>  ! { dg-options "-Ofast" }
>  ! { dg-additional-options "-march=znver2" { target x86_64-*-* i?86-*-* } }
> -   SUBROUTINE sfddagd( regime, znt,ite ,jte )
> +   SUBROUTINE sfddagd( regime, znt,ite ,jte, ime, IN )
> REAL, DIMENSION( ime, IN) :: regime, znt
> REAL, DIMENSION( ite, jte) :: wndcor_u
> LOGICAL wrf_dm_on_monitor
> diff --git a/gcc/testsuite/gfortran.dg/pr112404.f90
> b/gcc/testsuite/gfortran.dg/pr112404.f90
> index 573fa28164a..4508bbc8738 100644
> --- a/gcc/testsuite/gfortran.dg/pr112404.f90
> +++ b/gcc/testsuite/gfortran.dg/pr112404.f90
> @@ -1,7 +1,7 @@
>  ! { dg-do compile }
>  ! { dg-options "-Ofast" }
>  ! { dg-additional-options "-mavx2" { target avx2 } }
> -   SUBROUTINE sfddagd( regime, znt, ite, jte )
> +   SUBROUTINE sfddagd( regime, znt, ite, jte, ime, IN )
> REAL, DIMENSION( ime, IN) :: regime, znt
> REAL, DIMENSION( ite, jte) :: wndcor_u
> LOGICAL wrf_dm_on_monitor
> diff --git a/gcc/testsuite/gfortran.dg/pr78061.f
> b/gcc/testsuite/gfortran.dg/pr78061.f
> index 7e4dd3de8b5..9061dea74da 100644
> --- a/gcc/testsuite/gfortran.dg/pr78061.f
> +++ b/gcc/testsuite/gfortran.dg/pr78061.f
> @@ -1,6 +1,6 @@
>  ! { dg-do compile }
>  ! { dg-options "-O3 -fsplit-loops" }
> -  SUBROUTINE SSYMM(C)
> +  SUBROUTINE SSYMM(C,LDC)
>REAL C(LDC,*)
>LOGICAL LSAME
>LOGICAL UPPER
> diff --git a/gcc/testsuite/gfortran.dg/pr79315.f90
> b/gcc/testsuite/gfortran.dg/pr79315.f90
> index 8cd89691ce9..b754a2b3274 100644
> --- a/gcc/testsuite/gfortran.dg/pr79315.f90
> +++ b/gcc/testsuite/gfortran.dg/pr79315.f90
> @@ -10,7 +10,11 @@ SUBROUTINE wsm32D(t, &
>   its,&
> ite, &
> kts, &
> -   kte  &
> +   kte, &
> +   ims, &
> +   ime, &
> +   kms, &
> +   kme  &
>)
>REAL, DIMENSION( its:ite , kts:kte ),   &
>  INTENT(INOUT) ::  &
> diff --git a/gcc/testsuite/gfortran.dg/vect/pr90681.f
> b/gcc/testsuite/gfortran.dg/vect/pr90681.f
> index 03d3987b146..49f1d50ab8f 100644
> --- a/gcc/testsuite/gfortran.dg/vect/pr90681.f
> +++ b/gcc/testsuite/gfortran.dg/vect/pr90681.f
> @@ -1,6 +1,6 @@
>  C { dg-do compile }
>  C { dg-additional-options "-march=armv8.2-a+sve" { target { aarch64*-*-*
> } } }
> -  SUBROUTINE HMU (H1)
> +  SUBROUTINE HMU (H1,NORBS)
>COMMON DD(107)
>DIMENSION H1(NORBS,*)
>  DO 70 J1 = IA,I1
> diff --git a/gcc/testsuite/gfortran.dg/vect/pr97761.f90
> b/gcc/testsuite/gfortran.dg/vect/pr97761.f90
> index 250e2bf016e..401ef06e422 100644
> --- a/gcc/testsuite/gfortran.dg/vect/pr97761.f90
> +++ b/gcc/testsuite/gfortran.dg/vect/pr97761.f90
> @@ -1,7 +1,7 @@
>  ! { dg-do compile }
>  ! { dg-additional-options "-O1" }
>
> -subroutine ni (ps)
> +subroutine ni (ps, inout)
>  type vector
> real  x, y
>  end type
> diff --git a/gcc/testsuite/gfortran.dg/vect/pr99746.f90
> 

Re: [PATCH 3/3] bpf: Corrected index computation when present with unnamed struct fields

2024-03-19 Thread David Faust



On 3/13/24 07:24, Cupertino Miranda wrote:
> Any unnamed structure field if not a member of the BTF_KIND_STRUCT.
typo: if -> is

I'd suggest to clarify that "any unnamed structure field" is really
any unnamed non-struct-or-union field, since anonymous inner structs
and unions certainly are present in BTF (and you handle them here).

> For that reason, CO-RE access strings indexes should take that in
> consideration. This patch adds a condition to the incrementer that
> computes the index for the field access.

Otherwise, OK.
Thanks.

> 
> gcc/ChangeLog:
>   * config/bpf/core-builtins.cc (bpf_core_get_index): Check if
>   field contains a DECL_NAME.
> 
> gcc/testsuite/ChangeLog:
>   * gcc.target/bpf/core-builtin-fieldinfo-offset-1.c: Add
>   testcase for unnamed fields.
> ---
>  gcc/config/bpf/core-builtins.cc|  6 +-
>  .../gcc.target/bpf/core-builtin-fieldinfo-offset-1.c   | 10 --
>  2 files changed, 13 insertions(+), 3 deletions(-)
> 
> diff --git a/gcc/config/bpf/core-builtins.cc b/gcc/config/bpf/core-builtins.cc
> index 70b14e48e6e5..8333ad81d0e0 100644
> --- a/gcc/config/bpf/core-builtins.cc
> +++ b/gcc/config/bpf/core-builtins.cc
> @@ -553,7 +553,11 @@ bpf_core_get_index (const tree node, bool *valid)
>   {
> if (l == node)
>   return i;
> -   i++;
> +   /* Skip unnamed padding, not represented by BTF.  */
> +   if (DECL_NAME(l) != NULL_TREE
> +   || TREE_CODE (TREE_TYPE (l)) == UNION_TYPE
> +   || TREE_CODE (TREE_TYPE (l)) == RECORD_TYPE)
> + i++;
>   }
>  }
>else if (code == ARRAY_REF || code == ARRAY_RANGE_REF || code == MEM_REF)
> diff --git a/gcc/testsuite/gcc.target/bpf/core-builtin-fieldinfo-offset-1.c 
> b/gcc/testsuite/gcc.target/bpf/core-builtin-fieldinfo-offset-1.c
> index 27654205287d..8b1d8b012a2a 100644
> --- a/gcc/testsuite/gcc.target/bpf/core-builtin-fieldinfo-offset-1.c
> +++ b/gcc/testsuite/gcc.target/bpf/core-builtin-fieldinfo-offset-1.c
> @@ -14,6 +14,9 @@ struct T {
>struct S s[2];
>char c;
>char d;
> +  int a: 1;
> +  int:31;
> +  int f;
>  };
>  
>  enum {
> @@ -38,7 +41,9 @@ unsigned int foo (struct T *t)
>unsigned e1 = __builtin_preserve_field_info (bar()->d, FIELD_BYTE_OFFSET);
>unsigned e2 = __builtin_preserve_field_info (bar()->s[1].a4, 
> FIELD_BYTE_OFFSET);
>  
> -  return s0a1 + s0a4 + s0x + s1a1 + s1a4 + s1x + c + d + e1 + e2;
> +  unsigned f1 = __builtin_preserve_field_info (t->f, FIELD_BYTE_OFFSET);
> +
> +  return s0a1 + s0a4 + s0x + s1a1 + s1a4 + s1x + c + d + e1 + e2 + f1;
>  }
>  
>  /* { dg-final { scan-assembler-times "\[\t \]mov\[\t \]%r\[0-9\],4" 2 } } */
> @@ -65,5 +70,6 @@ unsigned int foo (struct T *t)
>  /* { dg-final { scan-assembler-times "bpfcr_astr_off \\(\"0:1:1:4\"\\)" 1 } 
> } */
>  /* { dg-final { scan-assembler-times "bpfcr_astr_off \\(\"0:2\"\\)" 1 } } */
>  /* { dg-final { scan-assembler-times "bpfcr_astr_off \\(\"0:3\"\\)" 2 } } */
> +/* { dg-final { scan-assembler-times "bpfcr_astr_off \\(\"0:5\"\\)" 1 } } */
>  
> -/* { dg-final { scan-assembler-times "0\[\t \]+\[^\n\]*bpfcr_kind" 10 } } */
> +/* { dg-final { scan-assembler-times "0\[\t \]+\[^\n\]*bpfcr_kind" 11 } } */


Re: [Committed] RISC-V: Update test expectancies with recent scheduler change

2024-03-19 Thread Edwin Lu



On 3/18/2024 8:14 PM, Jeff Law wrote:



On 3/12/24 3:56 PM, Edwin Lu wrote:

Given the recent change with adding the scheduler pipeline descriptions,
many scan-dump failures emerged. Relax the expected assembler output
conditions on the affected tests to reduce noise.

gcc/testsuite/ChangeLog:

* gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-6.c: Disable 
scheduling

* gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-8.c: Ditto
* gcc.target/riscv/rvv/base/pr108185-1.c: Update test expectancies
* gcc.target/riscv/rvv/base/pr108185-2.c: Ditto
* gcc.target/riscv/rvv/base/pr108185-3.c: Ditto
* gcc.target/riscv/rvv/base/pr108185-4.c: Ditto
* gcc.target/riscv/rvv/base/pr108185-5.c: Ditto
* gcc.target/riscv/rvv/base/pr108185-6.c: Ditto
* gcc.target/riscv/rvv/base/pr108185-7.c: Ditto
* gcc.target/riscv/rvv/base/vcreate.c: Disable scheduling and update
test expectancies
* gcc.target/riscv/rvv/vsetvl/vlmax_back_prop-30.c: Disable 
scheduling

* gcc.target/riscv/rvv/vsetvl/vlmax_back_prop-31.c: Ditto
* gcc.target/riscv/rvv/vsetvl/vlmax_single_block-17.c: Update test
expectancies
* gcc.target/riscv/rvv/vsetvl/vlmax_single_block-18.c: Ditto
* gcc.target/riscv/rvv/vsetvl/vlmax_switch_vtype-10.c: Ditto
* gcc.target/riscv/rvv/vsetvl/vlmax_switch_vtype-11.c: Ditto
* gcc.target/riscv/rvv/vsetvl/vlmax_switch_vtype-12.c: Ditto
* gcc.target/riscv/rvv/vsetvl/vlmax_switch_vtype-4.c: Ditto
* gcc.target/riscv/rvv/vsetvl/vlmax_switch_vtype-5.c: Ditto
* gcc.target/riscv/rvv/vsetvl/vlmax_switch_vtype-6.c: Ditto
* gcc.target/riscv/rvv/vsetvl/vlmax_switch_vtype-7.c: Ditto
* gcc.target/riscv/rvv/vsetvl/vlmax_switch_vtype-8.c: Ditto
* gcc.target/riscv/rvv/vsetvl/vlmax_switch_vtype-9.c: Ditto
As we discussed last week.  This should go forward as it brings a 
better degree of stability to these tests.  Looking forward to cleaner 
testresults as my tester has been complaining about this stuff for a 
month now :(



And a note for the future.  Let's take this one:

diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr108185-1.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/pr108185-1.c

index 4c6e88e7eed..46d3b5e98d4 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/base/pr108185-1.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr108185-1.c
@@ -60,11 +60,11 @@ test_vbool1_then_vbool64(int8_t * restrict in, 
int8_t * restrict out) {

  }
    /* { dg-final { scan-assembler-times 
{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m8,\s*ta,\s*ma} 6 } } */
-/* { dg-final { scan-assembler-times 
{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m4,\s*ta,\s*ma} 1 } } */
-/* { dg-final { scan-assembler-times 
{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m2,\s*ta,\s*ma} 1 } } */
-/* { dg-final { scan-assembler-times 
{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m1,\s*ta,\s*ma} 1 } } */
-/* { dg-final { scan-assembler-times 
{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf2,\s*ta,\s*ma} 1 } } */
-/* { dg-final { scan-assembler-times 
{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf4,\s*ta,\s*ma} 1 } } */
-/* { dg-final { scan-assembler-times 
{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf8,\s*ta,\s*ma} 1 } } */
+/* { dg-final { scan-assembler-times 
{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m4,\s*ta,\s*ma} 2 } } */
+/* { dg-final { scan-assembler-times 
{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m2,\s*ta,\s*ma} 2 } } */
+/* { dg-final { scan-assembler-times 
{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*m1,\s*ta,\s*ma} 2 } } */
+/* { dg-final { scan-assembler-times 
{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf2,\s*ta,\s*ma} 2 } } */
+/* { dg-final { scan-assembler-times 
{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf4,\s*ta,\s*ma} 2 } } */
+/* { dg-final { scan-assembler-times 
{vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf8,\s*ta,\s*ma} 2 } } */
  /* { dg-final { scan-assembler-times 
{vlm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 12 } } */
  /* { dg-final { scan-assembler-times 
{vsm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 12 } } */


This shows an example of how uarch information such as instruction 
latency will affect vset counts.  If someone wanted to test that 
pr108185-1 can drive the counts of each of those vsets to since 
instance, they can certainly #include pr108185-1 and provide suitable 
dg directives to set a specific uarch tuning and appropriate dg-final 
directives to ensure just a single instance of each vset occurs.


I'm not expecting you do to this.  Just making a note if someone 
really wants to use those tests to verify a specific set of vsets on a 
particular uarch.



Jeff


Committed!

Edwin



Re: [Committed] RISC-V: Fix C23 (...) functions returning large aggregates [PR114175]

2024-03-19 Thread Edwin Lu



On 3/18/2024 8:07 PM, Jeff Law wrote:



On 3/18/24 12:54 PM, Edwin Lu wrote:
We assume that TYPE_NO_NAMED_ARGS_STDARG_P don't have any named 
arguments and
there is nothing to advance, but that is not the case for (...) 
functions

returning by hidden reference which have one such artificial argument.
This causes gcc.dg/c23-stdarg-[68].c to fail

Fix the issue by checking if arg.type is NULL as r14-9503-g218d1749612
explains

Tested on linux rv64gcv.

gcc/ChangeLog:

PR target/114175
* config/riscv/riscv.cc (riscv_setup_incoming_varargs): Only skip
riscv_funciton_arg_advance for TYPE_NO_NAMED_ARGS_STDARG_P functions
if arg.type is NULL

OK.  Thanks for taking care of this.

Jeff


Committed!

Edwin



Re: [PATCH 2/3] bpf: Fix access string default for CO-RE type based relocations

2024-03-19 Thread David Faust



On 3/13/24 07:24, Cupertino Miranda wrote:
> Although part of all CO-RE relocation data, type based relocations do
> not require an access string.
> Initial implementation defined it as an empty string.
> On the other hand, libbpf when parsing the CO-RE relocations verifies
> that those strings would contain "0", otherwise reports an error.
> This patch makes GCC compliant with libbpf expectations.

OK, thanks.

> 
> gcc/Changelog:
>   * config/bpf/btfext-out.cc (cpf_core_reloc_add): Correct for new code.
>   Add assert to validate the string is set.
>   * config/bpf/core-builtins.cc (cr_final): Make string struct
>   field as const.
>   (process_enum_value): Correct for field type change.
>   (process_type): Set access string to "0".
> 
> gcc/testsuite/ChangeLog:
>   * gcc.target/bpf/core-builtin-type-based.c: Correct.
>   * gcc.target/bpf/core-builtin-type-id.c: Correct.
> ---
>  gcc/config/bpf/btfext-out.cc   |  5 +++--
>  gcc/config/bpf/core-builtins.cc| 10 ++
>  gcc/testsuite/gcc.target/bpf/core-builtin-type-based.c |  1 +
>  gcc/testsuite/gcc.target/bpf/core-builtin-type-id.c|  1 +
>  4 files changed, 11 insertions(+), 6 deletions(-)
> 
> diff --git a/gcc/config/bpf/btfext-out.cc b/gcc/config/bpf/btfext-out.cc
> index 57c0dc323812..ff1fd0739f1e 100644
> --- a/gcc/config/bpf/btfext-out.cc
> +++ b/gcc/config/bpf/btfext-out.cc
> @@ -299,8 +299,9 @@ bpf_core_reloc_add (const tree type, const char * 
> section_name,
>  
>/* Buffer the access string in the auxiliary strtab.  */
>bpfcr->bpfcr_astr_off = 0;
> -  if (accessor != NULL)
> -bpfcr->bpfcr_astr_off = btf_ext_add_string (accessor);
> +  gcc_assert (accessor != NULL);
> +  bpfcr->bpfcr_astr_off = btf_ext_add_string (accessor);
> +
>bpfcr->bpfcr_type = get_btf_id (ctf_lookup_tree_type (ctfc, type));
>bpfcr->bpfcr_insn_label = label;
>bpfcr->bpfcr_kind = kind;
> diff --git a/gcc/config/bpf/core-builtins.cc b/gcc/config/bpf/core-builtins.cc
> index 4256fea15e49..70b14e48e6e5 100644
> --- a/gcc/config/bpf/core-builtins.cc
> +++ b/gcc/config/bpf/core-builtins.cc
> @@ -205,7 +205,7 @@ struct cr_local
>  /* Core Relocation Final data */
>  struct cr_final
>  {
> -  char *str;
> +  const char *str;
>tree type;
>enum btf_core_reloc_kind kind;
>  };
> @@ -868,8 +868,10 @@ process_enum_value (struct cr_builtins *data)
>   {
> if (TREE_VALUE (l) == expr)
>   {
> -   ret.str = (char *) ggc_alloc_atomic ((index / 10) + 1);
> -   sprintf (ret.str, "%d", index);
> +   char *tmp = (char *) ggc_alloc_atomic ((index / 10) + 1);
> +   sprintf (tmp, "%d", index);
> +   ret.str = (const char *) tmp;
> +
> break;
>   }
> index++;
> @@ -987,7 +989,7 @@ process_type (struct cr_builtins *data)
> || data->kind == BPF_RELO_TYPE_MATCHES);
>  
>struct cr_final ret;
> -  ret.str = NULL;
> +  ret.str = ggc_strdup ("0");
>ret.type = data->type;
>ret.kind = data->kind;
>  
> diff --git a/gcc/testsuite/gcc.target/bpf/core-builtin-type-based.c 
> b/gcc/testsuite/gcc.target/bpf/core-builtin-type-based.c
> index 74a8d5a14d9d..9d818133c084 100644
> --- a/gcc/testsuite/gcc.target/bpf/core-builtin-type-based.c
> +++ b/gcc/testsuite/gcc.target/bpf/core-builtin-type-based.c
> @@ -56,3 +56,4 @@ int foo(void *data)
>  /* { dg-final { scan-assembler-times "0x8\[\t \]+\[^\n\]*bpfcr_kind" 13 } } 
> BPF_TYPE_EXISTS */
>  /* { dg-final { scan-assembler-times "0x9\[\t \]+\[^\n\]*bpfcr_kind" 11 } } 
> BPF_TYPE_SIZE */
>  /* { dg-final { scan-assembler-times "0xc\[\t \]+\[^\n\]*bpfcr_kind" 13 } } 
> BPF_TYPE_MATCHES */
> +/* { dg-final { scan-assembler-times "bpfcr_astr_off \[(\"\]+0\[(\"\]+" 37 } 
> } */
> diff --git a/gcc/testsuite/gcc.target/bpf/core-builtin-type-id.c 
> b/gcc/testsuite/gcc.target/bpf/core-builtin-type-id.c
> index 4b23288eac08..9576b91bc940 100644
> --- a/gcc/testsuite/gcc.target/bpf/core-builtin-type-id.c
> +++ b/gcc/testsuite/gcc.target/bpf/core-builtin-type-id.c
> @@ -38,3 +38,4 @@ int foo(void *data)
>  /* { dg-final { scan-assembler-times "0\[\t \]+\[^\n\]*bpfcr_type" 0  { 
> xfail *-*-* } } } */
>  /* { dg-final { scan-assembler-times "0x6\[\t \]+\[^\n\]*bpfcr_kind" 13 } } 
> BPF_TYPE_ID_LOCAL */
>  /* { dg-final { scan-assembler-times "0x7\[\t \]+\[^\n\]*bpfcr_kind" 7 } } 
> BPF_TYPE_ID_TARGET */
> +/* { dg-final { scan-assembler-times "bpfcr_astr_off \[(\"\]+0\[(\"\]+" 20 } 
> } */


Re: [PATCH] Fortran: add two small F2023 features

2024-03-19 Thread Steve Kargl
On Tue, Mar 19, 2024 at 04:17:32PM +0100, FX Coudert wrote:
> 
> These two (independent) patches add two tiny Fortran 2023 features: new 
> ISO_FORTRAN_ENV named constants and SELECTED_LOGICAL_KIND intrinsic.
> 
> Bootstrapped and regtested on x86_64-pc-linux-gnu.
> Please review, and let me know if it’s okay to push once we’re back in stage 
> 1.
> (I know it’s not particularly soon, but I don’t want to forget these so I’m 
> posting them for review)
> 

Both patches look good to me.  Thanks.

-- 
Steve


Re: [PATCH] analyzer: Bail out on function pointer for -Wanalyzer-allocation-size

2024-03-19 Thread David Malcolm
On Tue, 2024-03-19 at 16:10 +0100, Stefan Schulze Frielinghaus wrote:
> On s390 pr94688.c is failing due to excess error
> 
> pr94688.c:6:5: warning: allocated buffer size is not a multiple of
> the pointee's size [CWE-131] [-Wanalyzer-allocation-size]
> 
> This is because on s390 functions are by default aligned to an 8-byte
> boundary and during function type construction size is set to
> function
> boundary.  Thus, for the assignment
> 
> a.0_1 = (void (*) ()) 
> 
> we have that the right-hand side is pointing to a 4-byte memory
> region
> whereas the size of the function pointer is 8 byte and a warning is
> emitted.

FWIW the test case in question is a regression test for an ICE seen in
the GCC 10 implementation of the analyzer, which was fixed by the big
rewrite in r11-2694-g808f4dfeb3a95f.

So the code in the test doesn't make a great deal of sense.

> 
> I could follow and skip this test as done in PR112705, or we could
> bail
> out early in the analyzer for function pointers.  My intuition so far
> is that -Wanalyzer-allocation-size shouldn't care about function
> pointer.  Therefore, I went for bailing out early.  If you believe
> this
> is wrong I can still go by skipping this test on s390.  Any thoughts?

I tried imagining a situation where we're analyzing a function
generated at run-time, but it strikes me that the buffer allocated for
such a function can be of arbitrary size.  So -Wanalyzer-allocation-
size is meaningless for functions.

There's probably a case for checking for mismatches between pointers to
code vs pointers to data (e.g. alignments, Harvard architecture
machines, etc), but -Wanalyzer-allocation-size doesn't do that.

So I think your patch is correct.

OK to push it if it passes bootstrap testing.

Thanks
Dave

> ---
>  gcc/analyzer/region-model.cc | 4 
>  1 file changed, 4 insertions(+)
> 
> diff --git a/gcc/analyzer/region-model.cc b/gcc/analyzer/region-
> model.cc
> index f079d1fb37e..1b43443d168 100644
> --- a/gcc/analyzer/region-model.cc
> +++ b/gcc/analyzer/region-model.cc
> @@ -3514,6 +3514,10 @@ region_model::check_region_size (const region
> *lhs_reg, const svalue *rhs_sval,
>    || TYPE_SIZE_UNIT (pointee_type) == NULL_TREE)
>  return;
>  
> +  /* Bail out early on function pointers.  */
> +  if (TREE_CODE (pointee_type) == FUNCTION_TYPE)
> +    return;
> +
>    /* Bail out early on pointers to structs where we can
>   not deduce whether the buffer size is compatible.  */
>    bool is_struct = RECORD_OR_UNION_TYPE_P (pointee_type);



Re: RISC-V: Use convert instructions instead of calling library functions

2024-03-19 Thread Palmer Dabbelt

On Mon, 18 Mar 2024 20:50:14 PDT (-0700), jeffreya...@gmail.com wrote:



On 3/18/24 3:09 AM, Jivan Hakobyan wrote:

As RV has round instructions it is reasonable to use them instead of
calling the library functions.

With my patch for the following C code:
double foo(double a) {
     return ceil(a);
}

GCC generates the following ASM code (before it was tail call)
foo:
         fabs.d  fa4,fa0
         lui     a5,%hi(.LC0)
         fld     fa3,%lo(.LC0)(a5)
         flt.d   a5,fa4,fa3
         beq     a5,zero,.L3
         fcvt.l.d a5,fa0,rup


I'm not sure exactly what context this is in, but my reading of 
"according to the current rounding mode" means we'd usually use the 
dynamic rounding mode.



         fcvt.d.l        fa4,a5
         fsgnj.d fa0,fa4,fa0
.L3:
         ret

.LC0:
         .word   0
         .word   1127219200     // 0x4330


The patch I have evaluated on SPEC2017.
Counted dynamic instructions counts and got the following improvements

510.parest_r       262 m      -
511.povray_r      2.1  b        0.04%
521.wrt_r            269 m       -
526.blender_r    3 b             0.1%
527.cam4_r       15 b           0.6%
538.imagick_r    365 b         7.6%

Overall executed 385 billion fewer instructions which is 0.5%.

A few more notes.

The sequence Jivan is using is derived from LLVM.  The condition in the
generated code tests for those values were are supposed to pass through
unaltered.  The condition in the pattern ensures we do something
sensible WRT FE_INEXACT and mirrors how other ports handle these insns.


My only worry here is that when we were doing the other patterns we 
decided not to do rint.  I can't remember exactly why, but from reading 
the docs we probably just skipped it due to the inexact handling and Zfa 
having an instruction that just does this.  FP stuff is always a bit of 
a time sink, so at that point it probably just fell off the priority 
list.


I'm not really an FP guy, so I usually just poke around what the other 
ports generate and try to figure out what's going on.  arm64 has the Zfa 
instruction and x86 FP is complicated, so I'm not sure exactly who else 
to look at for this sort of stuff.  From just looking at the code, 
though, I think there's two issues -- I'm not really an FP person, 
though, so take this with a grain of salt:


IIUC what we've got here doesn't actually set the inexact flag for the 
bounds check failure case, as we're just loading up an exact constant 
and doing the bounds check.  We're also not clamping to INT_MAX-type 
values, but not sure if we're supposed to.  I think we could fix both of 
those by adjusting the expansion to something like


         fabs.d  fa4,fa0
         lui     a5,%hi(.LC0)
         fld     fa3,%lo(.LC0)(a5)
         flt.d   a5,fa4,fa3
         bne     a5,zero,.L3
 mv  fa0, fa3
.L3:
         fcvt.l.d a5,fa0,rup
         fcvt.d.l        fa4,a5
         fsgnj.d fa0,fa4,fa0
         ret

and then adjusting the constant to be an epsilon larger than INT_MAX so 
it'll still trigger the clamping but also inexact.


There's also a pair of changes to the ISA in 2020 that added the 
conversion inexact handling requirement, it was a grey area before.  I 
don't remember exactly what happened there, but I did remember it 
happening.  I don't think anyone cares all that much about the 
performance of systems that target the older ISAs, so maybe we just 
restrict the non-libcall expansion to ISAs that contain the new wording?


   commit 5890a1a702abf4157d5879717a39d8ecdae0de68 (tag: draft-20201229-5890a1a)
   Author: Andrew Waterman 
   Date:   Mon Dec 28 18:19:44 2020 -0800
   
   Clarify when FP conversions raise the Inexact flag
   
   diff --git a/src/f.tex b/src/f.tex

   index 8649d75..97c253b 100644
   --- a/src/f.tex
   +++ b/src/f.tex
   @@ -594,9 +594,9 @@ instructions round according to the {\em rm} field.  A 
floating-point register
can be initialized to floating-point positive zero using FCVT.S.W {\em rd},
{\tt x0}, which will never set any exception flags.
   
   -All floating-point conversion instructions set the Inexact exception flag if the

   -result differs from its operand value, yet is representable in the 
destination
   -format.
   +All floating-point conversion instructions set the Inexact exception flag if
   +the rounded result differs from the operand value and the Invalid exception
   +flag is not set.
   
\vspace{-0.2in}

\begin{center}
   
   commit 27b40fbc798357fcb4b1deaba4553646fe677576 (tag: draft-20200229-27b40fb)

   Author: Andrew Waterman 
   Date:   Fri Feb 28 18:12:47 2020 -0800
   
   Clarify that FCVT instructions signal inexact
   
   diff --git a/src/f.tex b/src/f.tex

   index 7680347..a9022c4 100644
   --- a/src/f.tex
   +++ b/src/f.tex
   @@ -582,6 +582,10 @@ instructions round according to the {\em rm} field.  A 
floating-point register
can be initialized to floating-point positive 

[committed] libstdc++: Fix infinite loop in std::binomial_distribution [PR114359]

2024-03-19 Thread Jonathan Wakely
Tested x86_64-linux. Pushed to trunk.

Not a regression, but worth backporting.

-- >8 --

The multiplication (4 * _M_t * __1p) can wraparound to zero if _M_t is
unsigned and 4 * _M_t wraps to zero. The third operand has type double,
so do the second multiplication first, so that we aren't multiplying
integers.

libstdc++-v3/ChangeLog:

PR libstdc++/114359
* include/bits/random.tcc (binomial_distribution::param_type):
Ensure arithmetic is done as type double.
* testsuite/26_numerics/random/binomial_distribution/114359.cc: New 
test.
---
 libstdc++-v3/include/bits/random.tcc |  2 +-
 .../random/binomial_distribution/114359.cc   | 12 
 2 files changed, 13 insertions(+), 1 deletion(-)
 create mode 100644 
libstdc++-v3/testsuite/26_numerics/random/binomial_distribution/114359.cc

diff --git a/libstdc++-v3/include/bits/random.tcc 
b/libstdc++-v3/include/bits/random.tcc
index ade416390b3..8216883c448 100644
--- a/libstdc++-v3/include/bits/random.tcc
+++ b/libstdc++-v3/include/bits/random.tcc
@@ -1503,7 +1503,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
  // sqrt(pi / 2)
  const double __spi_2 = 1.2533141373155002512078826424055226L;
  _M_s1 = std::sqrt(__np * __1p) * (1 + _M_d1 / (4 * __np));
- _M_s2 = std::sqrt(__np * __1p) * (1 + _M_d2 / (4 * _M_t * __1p));
+ _M_s2 = std::sqrt(__np * __1p) * (1 + _M_d2 / (4 * (_M_t * __1p)));
  _M_c = 2 * _M_d1 / __np;
  _M_a1 = std::exp(_M_c) * _M_s1 * __spi_2;
  const double __a12 = _M_a1 + _M_s2 * __spi_2;
diff --git 
a/libstdc++-v3/testsuite/26_numerics/random/binomial_distribution/114359.cc 
b/libstdc++-v3/testsuite/26_numerics/random/binomial_distribution/114359.cc
new file mode 100644
index 000..c1e4c380bf9
--- /dev/null
+++ b/libstdc++-v3/testsuite/26_numerics/random/binomial_distribution/114359.cc
@@ -0,0 +1,12 @@
+// { dg-do run { target c++11 } }
+
+// Bug 114359 - std::binomial_distribution hangs in infinite loop
+
+#include 
+
+int main()
+{
+  std::default_random_engine g{};
+  std::binomial_distribution b(1U << 30);
+  b(g);  // hangs forever
+}
-- 
2.44.0



[PATCH] libstdc++: Constrain std::vector default constructor [PR113841]

2024-03-19 Thread Jonathan Wakely
This fixes the problem in the PR, which is revealed by the new
concept-based constraints on std::pair constructors in C++20 mode. That
makes this a C++20 regression, as the PR's example compiles with C++17.

We need something similar for std::basic_string too, which I'll do
later.

Any comments?

Tested aarch64-linux.

-- >8 --

This is needed to avoid errors outside the immediate context when
evaluating is_default_constructible_v> when A is not
default constructible.

To avoid diagnostic regressions for 23_containers/vector/48101_neg.cc we
need to make the std::allocator partial specializations default
constructible, which they probably should have been anyway.

libstdc++-v3/ChangeLog:

PR libstdc++/113841
* include/bits/allocator.h (allocator): Add default
constructor to partial specializations for cv-qualified types.
* include/bits/stl_vector.h (_Vector_impl::_Vector_impl()):
Constrain so that it's only present if the allocator is default
constructible.
* include/bits/stl_bvector.h (_Bvector_impl::_Bvector_impl()):
Likewise.
* testsuite/23_containers/vector/cons/113841.cc: New test.
---
 libstdc++-v3/include/bits/allocator.h |  3 ++
 libstdc++-v3/include/bits/stl_bvector.h   |  3 ++
 libstdc++-v3/include/bits/stl_vector.h|  3 ++
 .../23_containers/vector/cons/113841.cc   | 34 +++
 4 files changed, 43 insertions(+)
 create mode 100644 libstdc++-v3/testsuite/23_containers/vector/cons/113841.cc

diff --git a/libstdc++-v3/include/bits/allocator.h 
b/libstdc++-v3/include/bits/allocator.h
index ff4f5b9137b..9e75b37fce7 100644
--- a/libstdc++-v3/include/bits/allocator.h
+++ b/libstdc++-v3/include/bits/allocator.h
@@ -254,6 +254,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 {
 public:
   typedef _Tp value_type;
+  allocator() { }
   template allocator(const allocator<_Up>&) { }
 };
 
@@ -262,6 +263,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 {
 public:
   typedef _Tp value_type;
+  allocator() { }
   template allocator(const allocator<_Up>&) { }
 };
 
@@ -270,6 +272,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 {
 public:
   typedef _Tp value_type;
+  allocator() { }
   template allocator(const allocator<_Up>&) { }
 };
   /// @endcond
diff --git a/libstdc++-v3/include/bits/stl_bvector.h 
b/libstdc++-v3/include/bits/stl_bvector.h
index a3343d95b36..d567e26f4e4 100644
--- a/libstdc++-v3/include/bits/stl_bvector.h
+++ b/libstdc++-v3/include/bits/stl_bvector.h
@@ -593,6 +593,9 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER
_GLIBCXX20_CONSTEXPR
_Bvector_impl() _GLIBCXX_NOEXCEPT_IF(
  is_nothrow_default_constructible<_Bit_alloc_type>::value)
+#if __cpp_concepts
+   requires is_default_constructible_v<_Bit_alloc_type>
+#endif
: _Bit_alloc_type()
{ }
 
diff --git a/libstdc++-v3/include/bits/stl_vector.h 
b/libstdc++-v3/include/bits/stl_vector.h
index a8d387f40a1..31169711a48 100644
--- a/libstdc++-v3/include/bits/stl_vector.h
+++ b/libstdc++-v3/include/bits/stl_vector.h
@@ -135,6 +135,9 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER
_GLIBCXX20_CONSTEXPR
_Vector_impl() _GLIBCXX_NOEXCEPT_IF(
is_nothrow_default_constructible<_Tp_alloc_type>::value)
+#if __cpp_lib_concepts
+   requires is_default_constructible_v<_Tp_alloc_type>
+#endif
: _Tp_alloc_type()
{ }
 
diff --git a/libstdc++-v3/testsuite/23_containers/vector/cons/113841.cc 
b/libstdc++-v3/testsuite/23_containers/vector/cons/113841.cc
new file mode 100644
index 000..a7721d27f79
--- /dev/null
+++ b/libstdc++-v3/testsuite/23_containers/vector/cons/113841.cc
@@ -0,0 +1,34 @@
+// { dg-do compile { target c++20 } }
+
+#include 
+
+template
+struct Alloc
+{
+  using value_type = T;
+
+  Alloc(int) { } // not default constructible
+
+  template Alloc(const Alloc&) { }
+
+  T* allocate(std::size_t n) { return std::allocator().allocate(n); }
+  void deallocate(T* p, std::size_t n) { std::allocator().deallocate(p, n); 
}
+};
+
+template struct wrap { T t; };
+
+template void do_adl(T&) { }
+
+void test_pr113841()
+{
+  using test_type = std::vector>;
+  std::pair>* h = nullptr;
+  do_adl(h);
+}
+
+void test_pr113841_bool()
+{
+  using test_type = std::vector>;
+  std::pair>* h = nullptr;
+  do_adl(h);
+}
-- 
2.44.0



[PATCH] libstdc++: Use feature test macros in

2024-03-19 Thread Jonathan Wakely
Does anybody see any issues with making this change?

The __cpp_constexpr_dynamic_alloc macro is defined with -std=c++2a
since Clang 10.0.0 so this won't result in std::construct_at
disappearing by anybody using libstdc++ with Clang.

Tested x86_64-linux and aarch64-linux. Basic smoke tests checked with
Clang 16 too.

-- >8 --

The preprocessor checks for __cplusplus in  should
use the appropriate feature test macros instead of __cplusplus, namely
__glibcxx_raw_memory_algorithms and __cpp_constexpr_dynamic_alloc.

For the latter, we want to check the compiler macro not the library's
__cpp_lib_constexpr_dynamic_alloc, because the latter is not defined for
freestanding but std::construct_at needs to be.

libstdc++-v3/ChangeLog:

* include/bits/stl_construct.h (destroy_at, construct_at): Guard
with feature test macros instead of just __cplusplus.
---
 libstdc++-v3/include/bits/stl_construct.h | 14 +++---
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/libstdc++-v3/include/bits/stl_construct.h 
b/libstdc++-v3/include/bits/stl_construct.h
index 7c394072b50..dc08fb7ea33 100644
--- a/libstdc++-v3/include/bits/stl_construct.h
+++ b/libstdc++-v3/include/bits/stl_construct.h
@@ -74,7 +74,7 @@ namespace std _GLIBCXX_VISIBILITY(default)
 {
 _GLIBCXX_BEGIN_NAMESPACE_VERSION
 
-#if __cplusplus >= 201703L
+#if __glibcxx_raw_memory_algorithms // >= C++17
   template 
 _GLIBCXX20_CONSTEXPR inline void
 destroy_at(_Tp* __location)
@@ -88,7 +88,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
__location->~_Tp();
 }
 
-#if __cplusplus >= 202002L
+#if __cpp_constexpr_dynamic_alloc // >= C++20
   template
 constexpr auto
 construct_at(_Tp* __location, _Args&&... __args)
@@ -108,7 +108,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 inline void
 _Construct(_Tp* __p, _Args&&... __args)
 {
-#if __cplusplus >= 202002L
+#if __cpp_constexpr_dynamic_alloc // >= C++20
   if (std::__is_constant_evaluated())
{
  // Allow std::_Construct to be used in constant expressions.
@@ -145,7 +145,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 _GLIBCXX14_CONSTEXPR inline void
 _Destroy(_Tp* __pointer)
 {
-#if __cplusplus > 201703L
+#if __cpp_constexpr_dynamic_alloc // >= C++20
   std::destroy_at(__pointer);
 #else
   __pointer->~_Tp();
@@ -188,7 +188,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   static_assert(is_destructible<_Value_type>::value,
"value type is destructible");
 #endif
-#if __cplusplus >= 202002L
+#if __cpp_constexpr_dynamic_alloc // >= C++20
   if (std::__is_constant_evaluated())
return std::_Destroy_aux::__destroy(__first, __last);
 #endif
@@ -237,7 +237,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   static_assert(is_destructible<_Value_type>::value,
"value type is destructible");
 #endif
-#if __cplusplus >= 202002L
+#if __cpp_constexpr_dynamic_alloc // >= C++20
   if (std::__is_constant_evaluated())
return std::_Destroy_n_aux::__destroy_n(__first, __count);
 #endif
@@ -245,7 +245,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
__destroy_n(__first, __count);
 }
 
-#if __cplusplus >= 201703L
+#if __glibcxx_raw_memory_algorithms // >= C++17
   template 
 _GLIBCXX20_CONSTEXPR inline void
 destroy(_ForwardIterator __first, _ForwardIterator __last)
-- 
2.44.0



[PATCH v3 2/2] fortran: Fix specification expression error with dummy procedures [PR111781]

2024-03-19 Thread Mikael Morin
This fixes a spurious invalid variable in specification expression error.
The error was caused on the testcase from the PR by two different bugs.
First, the call to is_parent_of_current_ns was unable to recognize
correct host association and returned false.  Second, an ad-hoc
condition coming next was using a global variable previously improperly
restored to false (instead of restoring it to its initial value).  The
latter happened on the testcase because one dummy argument was a procedure,
and checking that argument what causing a check of all its arguments with
the (improper) reset of the flag at the end, and that preceded the check of
the next argument.

For the first bug, the wrong result of is_parent_of_current_ns is fixed by
correcting the namespaces that function deals with, both the one passed
as argument and the current one tracked in the gfc_current_ns global.  Two
new functions are introduced to select the right namespace.

Regarding the second bug, the problematic condition is removed, together
with the formal_arg_flag associated with it.  Indeed, that condition was
(wrongly) allowing local variables to be used in array bounds of dummy
arguments.

PR fortran/111781

gcc/fortran/ChangeLog:

* symbol.cc (gfc_get_procedure_ns, gfc_get_spec_ns): New functions.
* gfortran.h (gfc_get_procedure_ns, gfc_get_spec ns): Declare them.
(gfc_is_formal_arg): Remove.
* expr.cc (check_restricted): Remove special case allowing local
variable in dummy argument bound expressions.  Use gfc_get_spec_ns
to get the right namespace.
* resolve.cc (gfc_is_formal_arg, formal_arg_flag): Remove.
(gfc_resolve_formal_arglist): Set gfc_current_ns.  Quit loop and
restore gfc_current_ns instead of early returning.
(resolve_symbol): Factor common array spec resolution code to...
(resolve_symbol_array_spec): ... this new function.  Additionnally
set and restore gfc_current_ns.

gcc/testsuite/ChangeLog:

* gfortran.dg/spec_expr_8.f90: New test.
* gfortran.dg/spec_expr_9.f90: New test.
---
 gcc/fortran/expr.cc   |  8 +--
 gcc/fortran/gfortran.h|  4 +-
 gcc/fortran/resolve.cc| 77 +++
 gcc/fortran/symbol.cc | 58 +
 gcc/testsuite/gfortran.dg/spec_expr_8.f90 | 24 +++
 gcc/testsuite/gfortran.dg/spec_expr_9.f90 | 19 ++
 6 files changed, 140 insertions(+), 50 deletions(-)
 create mode 100644 gcc/testsuite/gfortran.dg/spec_expr_8.f90
 create mode 100644 gcc/testsuite/gfortran.dg/spec_expr_9.f90

diff --git a/gcc/fortran/expr.cc b/gcc/fortran/expr.cc
index e4b1e8307e3..9a042cd7040 100644
--- a/gcc/fortran/expr.cc
+++ b/gcc/fortran/expr.cc
@@ -3514,19 +3514,13 @@ check_restricted (gfc_expr *e)
   if (!check_references (e->ref, _restricted))
break;
 
-  /* gfc_is_formal_arg broadcasts that a formal argument list is being
-processed in resolve.cc(resolve_formal_arglist).  This is done so
-that host associated dummy array indices are accepted (PR23446).
-This mechanism also does the same for the specification expressions
-of array-valued functions.  */
   if (e->error
|| sym->attr.in_common
|| sym->attr.use_assoc
|| sym->attr.dummy
|| sym->attr.implied_index
|| sym->attr.flavor == FL_PARAMETER
-   || is_parent_of_current_ns (sym->ns)
-   || (gfc_is_formal_arg () && (sym->ns == gfc_current_ns)))
+   || is_parent_of_current_ns (gfc_get_spec_ns (sym)))
{
  t = true;
  break;
diff --git a/gcc/fortran/gfortran.h b/gcc/fortran/gfortran.h
index c7039730fad..26aa56b3358 100644
--- a/gcc/fortran/gfortran.h
+++ b/gcc/fortran/gfortran.h
@@ -3612,6 +3612,9 @@ bool gfc_is_associate_pointer (gfc_symbol*);
 gfc_symbol * gfc_find_dt_in_generic (gfc_symbol *);
 gfc_formal_arglist *gfc_sym_get_dummy_args (gfc_symbol *);
 
+gfc_namespace * gfc_get_procedure_ns (gfc_symbol *);
+gfc_namespace * gfc_get_spec_ns (gfc_symbol *);
+
 /* intrinsic.cc -- true if working in an init-expr, false otherwise.  */
 extern bool gfc_init_expr_flag;
 
@@ -3821,7 +3824,6 @@ bool gfc_resolve_iterator (gfc_iterator *, bool, bool);
 bool find_forall_index (gfc_expr *, gfc_symbol *, int);
 bool gfc_resolve_index (gfc_expr *, int);
 bool gfc_resolve_dim_arg (gfc_expr *);
-bool gfc_is_formal_arg (void);
 bool gfc_resolve_substring (gfc_ref *, bool *);
 void gfc_resolve_substring_charlen (gfc_expr *);
 gfc_expr *gfc_expr_to_initialize (gfc_expr *);
diff --git a/gcc/fortran/resolve.cc b/gcc/fortran/resolve.cc
index c5ae826bd6e..50d51b06c92 100644
--- a/gcc/fortran/resolve.cc
+++ b/gcc/fortran/resolve.cc
@@ -72,9 +72,6 @@ static bool first_actual_arg = false;
 
 static int omp_workshare_flag;
 
-/* True if we are processing a formal arglist. The corresponding function
-   resets the flag each 

[PATCH v3 1/2] testsuite: Declare fortran array bound variables

2024-03-19 Thread Mikael Morin
This fixes invalid undeclared fortran array bound variables
in the testsuite.

gcc/testsuite/ChangeLog:

* gfortran.dg/graphite/pr107865.f90: Declare array bound variable(s)
as dummy argument(s).
* gfortran.dg/pr101267.f90: Likewise.
* gfortran.dg/pr112404.f90: Likewise.
* gfortran.dg/pr78061.f: Likewise.
* gfortran.dg/pr79315.f90: Likewise.
* gfortran.dg/vect/pr90681.f: Likewise.
* gfortran.dg/vect/pr97761.f90: Likewise.
* gfortran.dg/vect/pr99746.f90: Likewise.
---
 gcc/testsuite/gfortran.dg/graphite/pr107865.f90 | 2 +-
 gcc/testsuite/gfortran.dg/pr101267.f90  | 2 +-
 gcc/testsuite/gfortran.dg/pr112404.f90  | 2 +-
 gcc/testsuite/gfortran.dg/pr78061.f | 2 +-
 gcc/testsuite/gfortran.dg/pr79315.f90   | 6 +-
 gcc/testsuite/gfortran.dg/vect/pr90681.f| 2 +-
 gcc/testsuite/gfortran.dg/vect/pr97761.f90  | 2 +-
 gcc/testsuite/gfortran.dg/vect/pr99746.f90  | 2 +-
 8 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/gcc/testsuite/gfortran.dg/graphite/pr107865.f90 
b/gcc/testsuite/gfortran.dg/graphite/pr107865.f90
index 6bddb17a1be..323d8092ad2 100644
--- a/gcc/testsuite/gfortran.dg/graphite/pr107865.f90
+++ b/gcc/testsuite/gfortran.dg/graphite/pr107865.f90
@@ -1,7 +1,7 @@
 ! { dg-do compile }
 ! { dg-options "-O1 -floop-parallelize-all -ftree-parallelize-loops=2" }
 
-  SUBROUTINE FNC (F)
+  SUBROUTINE FNC (F,N)
 
   IMPLICIT REAL (A-H)
   DIMENSION F(N)
diff --git a/gcc/testsuite/gfortran.dg/pr101267.f90 
b/gcc/testsuite/gfortran.dg/pr101267.f90
index 12723cf9c22..99a6dcfa342 100644
--- a/gcc/testsuite/gfortran.dg/pr101267.f90
+++ b/gcc/testsuite/gfortran.dg/pr101267.f90
@@ -1,7 +1,7 @@
 ! { dg-do compile }
 ! { dg-options "-Ofast" }
 ! { dg-additional-options "-march=znver2" { target x86_64-*-* i?86-*-* } }
-   SUBROUTINE sfddagd( regime, znt,ite ,jte )
+   SUBROUTINE sfddagd( regime, znt,ite ,jte, ime, IN )
REAL, DIMENSION( ime, IN) :: regime, znt
REAL, DIMENSION( ite, jte) :: wndcor_u 
LOGICAL wrf_dm_on_monitor
diff --git a/gcc/testsuite/gfortran.dg/pr112404.f90 
b/gcc/testsuite/gfortran.dg/pr112404.f90
index 573fa28164a..4508bbc8738 100644
--- a/gcc/testsuite/gfortran.dg/pr112404.f90
+++ b/gcc/testsuite/gfortran.dg/pr112404.f90
@@ -1,7 +1,7 @@
 ! { dg-do compile }
 ! { dg-options "-Ofast" }
 ! { dg-additional-options "-mavx2" { target avx2 } }
-   SUBROUTINE sfddagd( regime, znt, ite, jte )
+   SUBROUTINE sfddagd( regime, znt, ite, jte, ime, IN )
REAL, DIMENSION( ime, IN) :: regime, znt
REAL, DIMENSION( ite, jte) :: wndcor_u 
LOGICAL wrf_dm_on_monitor
diff --git a/gcc/testsuite/gfortran.dg/pr78061.f 
b/gcc/testsuite/gfortran.dg/pr78061.f
index 7e4dd3de8b5..9061dea74da 100644
--- a/gcc/testsuite/gfortran.dg/pr78061.f
+++ b/gcc/testsuite/gfortran.dg/pr78061.f
@@ -1,6 +1,6 @@
 ! { dg-do compile }
 ! { dg-options "-O3 -fsplit-loops" }
-  SUBROUTINE SSYMM(C)
+  SUBROUTINE SSYMM(C,LDC)
   REAL C(LDC,*)
   LOGICAL LSAME
   LOGICAL UPPER
diff --git a/gcc/testsuite/gfortran.dg/pr79315.f90 
b/gcc/testsuite/gfortran.dg/pr79315.f90
index 8cd89691ce9..b754a2b3274 100644
--- a/gcc/testsuite/gfortran.dg/pr79315.f90
+++ b/gcc/testsuite/gfortran.dg/pr79315.f90
@@ -10,7 +10,11 @@ SUBROUTINE wsm32D(t, &
  its,&
ite, &
kts, &
-   kte  &
+   kte, &
+   ims, &
+   ime, &
+   kms, &
+   kme  &
   )
   REAL, DIMENSION( its:ite , kts:kte ),   &
 INTENT(INOUT) ::  &
diff --git a/gcc/testsuite/gfortran.dg/vect/pr90681.f 
b/gcc/testsuite/gfortran.dg/vect/pr90681.f
index 03d3987b146..49f1d50ab8f 100644
--- a/gcc/testsuite/gfortran.dg/vect/pr90681.f
+++ b/gcc/testsuite/gfortran.dg/vect/pr90681.f
@@ -1,6 +1,6 @@
 C { dg-do compile }
 C { dg-additional-options "-march=armv8.2-a+sve" { target { aarch64*-*-* } } }
-  SUBROUTINE HMU (H1)
+  SUBROUTINE HMU (H1,NORBS)
   COMMON DD(107)
   DIMENSION H1(NORBS,*)
 DO 70 J1 = IA,I1
diff --git a/gcc/testsuite/gfortran.dg/vect/pr97761.f90 
b/gcc/testsuite/gfortran.dg/vect/pr97761.f90
index 250e2bf016e..401ef06e422 100644
--- a/gcc/testsuite/gfortran.dg/vect/pr97761.f90
+++ b/gcc/testsuite/gfortran.dg/vect/pr97761.f90
@@ -1,7 +1,7 @@
 ! { dg-do compile }
 ! { dg-additional-options "-O1" }
 
-subroutine ni (ps)
+subroutine ni (ps, inout)
 type vector
real  x, y
 end type 
diff --git a/gcc/testsuite/gfortran.dg/vect/pr99746.f90 
b/gcc/testsuite/gfortran.dg/vect/pr99746.f90
index fe947ae7ccf..121d67d564d 100644
--- a/gcc/testsuite/gfortran.dg/vect/pr99746.f90
+++ b/gcc/testsuite/gfortran.dg/vect/pr99746.f90
@@ -1,6 +1,6 @@
 ! { dg-do compile }
 ! { dg-additional-options "-march=armv8.3-a" { target aarch64-*-* } }
-SUBROUTINE CLAREF(A, WANTZ, Z, ICOL1, ITMP1, ITMP2, T1, T2, V2)
+SUBROUTINE CLAREF(A, WANTZ, Z, ICOL1, ITMP1, ITMP2, T1, T2, V2, LDA)
 

[PATCH v3 0/2] fortran: Fix specification checks [PR111781]

2024-03-19 Thread Mikael Morin
Hello,

these patches correct diagnostics dealing with variables in specification
expressions.
The first patch is a testsuite change, which fixes invalid specification
expressions that the second patch would diagnose.
The second patch removes a spurious diagnostic when a dummy procedure is
involved, and enables more valid ones, as visible in the testcases from the
first patch.

I haven't tested it again (same code as v2), but I plan to do it before the 
final push.
Ok for master?

Mikael

v2 -> v3 changes:

  - Correct first name in testcase comment
  - Clarify and correct log and changelog text from second patch
  - Target current stage (stage4) instead of next (stage1)

v1 -> v2 changes:

  - Fix condition guarding sym->result access.

 
Mikael Morin (2):
  testsuite: Declare fortran array bound variables
  fortran: Fix specification expression error with dummy procedures
[PR111781]

 gcc/fortran/expr.cc   |  8 +-
 gcc/fortran/gfortran.h|  4 +-
 gcc/fortran/resolve.cc| 77 +--
 gcc/fortran/symbol.cc | 58 ++
 .../gfortran.dg/graphite/pr107865.f90 |  2 +-
 gcc/testsuite/gfortran.dg/pr101267.f90|  2 +-
 gcc/testsuite/gfortran.dg/pr112404.f90|  2 +-
 gcc/testsuite/gfortran.dg/pr78061.f   |  2 +-
 gcc/testsuite/gfortran.dg/pr79315.f90 |  6 +-
 gcc/testsuite/gfortran.dg/spec_expr_8.f90 | 24 ++
 gcc/testsuite/gfortran.dg/spec_expr_9.f90 | 19 +
 gcc/testsuite/gfortran.dg/vect/pr90681.f  |  2 +-
 gcc/testsuite/gfortran.dg/vect/pr97761.f90|  2 +-
 gcc/testsuite/gfortran.dg/vect/pr99746.f90|  2 +-
 14 files changed, 152 insertions(+), 58 deletions(-)
 create mode 100644 gcc/testsuite/gfortran.dg/spec_expr_8.f90
 create mode 100644 gcc/testsuite/gfortran.dg/spec_expr_9.f90

-- 
2.43.0



Re: _LIBCXX_DEBUG value initialized singular iterators assert failures in std algorithms [PR104316]

2024-03-19 Thread Jonathan Wakely
On Tue, 19 Mar 2024 at 09:31, Jonathan Wakely wrote:
>
> On Mon, 18 Mar 2024 at 21:38, François Dumont wrote:
> >
> > Both committed now.
> >
> > Just to confirm, those 2 last patches should be backported to gcc-13
> > branch, right ?
>
> Yes please.
>
> >
> > I might have a try to update version.h but if you want to do it before
> > don't hesitate.
>
> You'll need to have 'autogen' installed to do it (I'm going to update
> the docs to describe that today).

I've documented it in a new "Generated files" section at the end of
https://gcc.gnu.org/onlinedocs/libstdc++/manual/appendix_porting.html#build_hacking.generated



[committed] libstdc++: Regenerate in maintainer mode

2024-03-19 Thread Jonathan Wakely
Tested x86_64-linux. Pushed to trunk.

I've taken the liberty of assuming that the change to
gcc/doc/install.texi is sufficiently "obviously true" to not require
separate approval.

-- >8 --

This is a generated header but regenerating it requires the manual step
of running 'make -C include update-version' in the libstdc++ build dir.
Make it regenerate automatically when --enable-maintainer-mode is used.

libstdc++-v3/ChangeLog:

* include/Makefile.am [MAINTAINER_MODE]: Add target to
automatically update .
* include/Makefile.in: Regenerate.

gcc/ChangeLog:

* doc/install.texi (Prerequisites): Document use of autogen for
libstdc++.
---
 gcc/doc/install.texi | 2 ++
 libstdc++-v3/include/Makefile.am | 6 ++
 libstdc++-v3/include/Makefile.in | 4 
 3 files changed, 12 insertions(+)

diff --git a/gcc/doc/install.texi b/gcc/doc/install.texi
index e3650e0c4f4..014ca25aa62 100644
--- a/gcc/doc/install.texi
+++ b/gcc/doc/install.texi
@@ -545,6 +545,8 @@ Necessary to run @samp{make check} for @file{fixinc}.
 Necessary to regenerate the top level @file{Makefile.in} file from
 @file{Makefile.tpl} and @file{Makefile.def}.
 
+Necessary to regenerate the @file{bits/version.h} header for libstdc++.
+
 @item Flex version 2.5.4 (or later)
 
 Necessary when modifying @file{*.l} files.
diff --git a/libstdc++-v3/include/Makefile.am b/libstdc++-v3/include/Makefile.am
index 64152351ed0..cb902de36ae 100644
--- a/libstdc++-v3/include/Makefile.am
+++ b/libstdc++-v3/include/Makefile.am
@@ -1480,6 +1480,12 @@ update-version:
cd ${bits_srcdir} && \
autogen version.def
 
+if MAINTAINER_MODE
+# Regenerate it automatically in maintainer mode.
+${bits_srcdir}/version.h: ${bits_srcdir}/version.def ${bits_srcdir}/version.tpl
+   $(MAKE) update-version
+endif
+
 # The real deal.
 install-data-local: install-headers
 install-headers:



[committed] libstdc++: Update docs on build process and generated files

2024-03-19 Thread Jonathan Wakely
Pushed to trunk.

-- >8 --

There are several more sub-directories below 'src' now, with lots more
conveience libraries. Document them all as of GCC 14.

Also document how to regenerate the generated headers under include/bits
and how to update the tzdata.zi file.

libstdc++-v3/ChangeLog:

* doc/xml/manual/build_hacking.xml: Document generated files.
Update list of convenience libraries and sub-directories under
the src directory.
* doc/html/*: Regenerate.
---
 libstdc++-v3/doc/html/index.html  |   2 +-
 libstdc++-v3/doc/html/manual/appendix.html|   2 +-
 .../doc/html/manual/appendix_porting.html | 119 +++--
 libstdc++-v3/doc/html/manual/index.html   |   2 +-
 libstdc++-v3/doc/xml/manual/build_hacking.xml | 164 --
 5 files changed, 262 insertions(+), 27 deletions(-)

diff --git a/libstdc++-v3/doc/xml/manual/build_hacking.xml 
b/libstdc++-v3/doc/xml/manual/build_hacking.xml
index 36f659cea4b..077c0632a79 100644
--- a/libstdc++-v3/doc/xml/manual/build_hacking.xml
+++ b/libstdc++-v3/doc/xml/manual/build_hacking.xml
@@ -554,9 +554,9 @@ baseline file.
  make src


- Generates two convenience libraries, one for C++98 and one for
- C++11, various compatibility files for shared and static
- libraries, and then collects all the generated bits and creates
+ Generates several convenience libraries,
+ various compatibility files for shared and static libraries,
+ and then collects all the generated bits and creates
  the final libstdc++ libraries.
   
 
@@ -566,8 +566,8 @@ baseline file.


  Generates a libtool convenience library,
- libc++98convenience with language-support
- routines. Uses the -std=gnu++98 dialect.
+ libc++98convenience with the library components
+ defined by C++98. Uses the -std=gnu++98 dialect.

  
  
@@ -576,8 +576,84 @@ baseline file.


  Generates a libtool convenience library,
- libc++11convenience with language-support
- routines. Uses the -std=gnu++11 dialect.
+ libc++11convenience with the library components
+ that were added or changed in C++11.
+ Uses the -std=gnu++11 dialect.
+   
+ 
+ 
+   
+ make src/c++17
+   
+   
+ Generates a libtool convenience library,
+ libc++17convenience with the library components
+ that were added or changed in C++17.
+ Uses the -std=gnu++17 dialect.
+   
+ 
+ 
+   
+ make src/c++20
+   
+   
+ Generates a libtool convenience library,
+ libc++20convenience with the library components
+ that were added or changed in C++20.
+ Uses the -std=gnu++20 dialect.
+   
+ 
+ 
+   
+ make src/c++23
+   
+   
+ Generates a libtool convenience library,
+ libc++23convenience with the library components
+ that were added or changed in C++23.
+ At the time of writing (GCC 14) this convenience library is included
+ in libstdc++exp.a and not in the final
+ libstdc++ libraries.
+ Uses the -std=gnu++23 dialect.
+   
+ 
+ 
+   
+ make src/filesystem
+   
+   
+ Generates a libtool convenience library,
+ libstdc++fsconvenience,
+ and a standalone static library,
+ libstdc++fs.a.
+ These contain definitions of the Filesystem TS extensions.
+ Uses the -std=gnu++17 dialect.
+   
+ 
+ 
+   
+ make src/libbacktrace
+   
+   
+ Generates a libtool convenience library,
+ libstdc++_libbacktrace,
+ containing the libbacktrace definitions used by the C++23
+ std::stacktrace feature.
+   
+ 
+ 
+   
+ make src/experimental
+   
+   
+ Generates a standalone static library,
+ libstdc++exp.a, containing the symbol definitions
+ for experimental features and extensions. This collects the convenience
+ libraries libstdc++fsconvenience,
+ libstdc++_libbacktrace, and
+ (at the time of writing) libc++23convenience
+ and combines them into one.
+ Uses the -std=gnu++17 dialect.

  
  
@@ -591,10 +667,11 @@ baseline file.

 

- Then, collects all the generated convenience libraries, adds in
- any required compatibility objects, and creates the final shared
- and static libraries: libstdc++.so and
- libstdc++.a.
+ Then, collects all the generated convenience libraries that weren't
+ added to libstdc++exp.a,
+ adds in any required compatibility objects,
+ and creates the final shared and static libraries:
+ libstdc++.so and libstdc++.a.

 
  
@@ -604,4 +681,69 @@ baseline file.
 
  
 
+Generated 
files
+
+
+  Some files in the libstdc++ source tree are auto-generated from other files.
+  In general, these are not regenerated automatically, so it must be done
+  manually when the files they depend on are updated.
+
+
+
+ 
+   
+   The header file
+   include/bits/version.h
+   is generated from version.def and
+   version.tpl in the same directory.
+   After editing those files, either run autogen version.def
+   in 

[committed] libstdc++: Fix Python scripts to output the correct filename

2024-03-19 Thread Jonathan Wakely
Tested x86_64-linux. Pushed to trunk.

-- >8 --

These scripts both print "generated by $file, do not edit" header but
one of them prints the wrong filename. Use the built-in __file__
attribute to ensure it's correct.

contrib/ChangeLog:

* unicode/gen_libstdcxx_unicode_data.py: Fix header of generated
file to name the correct script.

libstdc++-v3/ChangeLog:

* include/bits/text_encoding-data.h: Regenerate.
* include/bits/unicode-data.h: Regenerate.
* scripts/gen_text_encoding_data.py: Fix header of generated
file to name the correct script.
---
 contrib/unicode/gen_libstdcxx_unicode_data.py  | 6 --
 libstdc++-v3/include/bits/text_encoding-data.h | 3 ++-
 libstdc++-v3/include/bits/unicode-data.h   | 2 +-
 libstdc++-v3/scripts/gen_text_encoding_data.py | 5 -
 4 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/contrib/unicode/gen_libstdcxx_unicode_data.py 
b/contrib/unicode/gen_libstdcxx_unicode_data.py
index 2341a442f6a..da2f6ee66bf 100755
--- a/contrib/unicode/gen_libstdcxx_unicode_data.py
+++ b/contrib/unicode/gen_libstdcxx_unicode_data.py
@@ -29,9 +29,11 @@
 import sys
 import re
 import math
+import os
 
-print("""// Generated by contrib/unicode/gen_std_format_width.py, do not edit.
-
+self = os.path.basename(__file__)
+print("// Generated by contrib/unicode/{}, do not edit.".format(self))
+print("""
 // Copyright The GNU Toolchain Authors.
 //
 // This file is part of the GNU ISO C++ Library.  This library is free
diff --git a/libstdc++-v3/include/bits/text_encoding-data.h 
b/libstdc++-v3/include/bits/text_encoding-data.h
index 81bd94e6c3a..d6c34f895f5 100644
--- a/libstdc++-v3/include/bits/text_encoding-data.h
+++ b/libstdc++-v3/include/bits/text_encoding-data.h
@@ -1,4 +1,5 @@
-// Generated by gen_text_encoding_data.py, do not edit.
+// Generated by scripts/gen_text_encoding_data.py, do not edit.
+
 
 // Copyright The GNU Toolchain Authors.
 //
diff --git a/libstdc++-v3/include/bits/unicode-data.h 
b/libstdc++-v3/include/bits/unicode-data.h
index 69b8f2926c3..e39a6c45f6c 100644
--- a/libstdc++-v3/include/bits/unicode-data.h
+++ b/libstdc++-v3/include/bits/unicode-data.h
@@ -1,4 +1,4 @@
-// Generated by contrib/unicode/gen_std_format_width.py, do not edit.
+// Generated by contrib/unicode/gen_libstdcxx_unicode_data.py, do not edit.
 
 // Copyright The GNU Toolchain Authors.
 //
diff --git a/libstdc++-v3/scripts/gen_text_encoding_data.py 
b/libstdc++-v3/scripts/gen_text_encoding_data.py
index 13792b5f5e7..e11b26e69fc 100755
--- a/libstdc++-v3/scripts/gen_text_encoding_data.py
+++ b/libstdc++-v3/scripts/gen_text_encoding_data.py
@@ -26,12 +26,15 @@
 
 import sys
 import csv
+import os
 
 if len(sys.argv) != 2:
 print("Usage: %s " % sys.argv[0], file=sys.stderr)
 sys.exit(1)
 
-print("""// Generated by gen_text_encoding_data.py, do not edit.
+self = os.path.basename(__file__)
+print("// Generated by scripts/{}, do not edit.".format(self))
+print("""
 
 // Copyright The GNU Toolchain Authors.
 //
-- 
2.44.0



[committed] libstdc++: Fix typos in MemoryChecker assertion messages in PSTL tests

2024-03-19 Thread Jonathan Wakely
Tested x86_64-linux. Pushed to trunk.

-- >8 --

This has been reported upstream.

libstdc++-v3/ChangeLog:

* testsuite/util/pstl/test_utils.h: Fix typos in comments.
---
 libstdc++-v3/testsuite/util/pstl/test_utils.h | 14 +++---
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/libstdc++-v3/testsuite/util/pstl/test_utils.h 
b/libstdc++-v3/testsuite/util/pstl/test_utils.h
index e35084eabb2..55b510098a0 100644
--- a/libstdc++-v3/testsuite/util/pstl/test_utils.h
+++ b/libstdc++-v3/testsuite/util/pstl/test_utils.h
@@ -252,7 +252,7 @@ struct MemoryChecker {
 MemoryChecker(MemoryChecker&& other) : _value(other.value()) {
 // check for EXPECT_TRUE(state() != alive_state, ...) has not been 
done since
 // compiler can optimize out the move ctor call that results in false 
positive failure
-EXPECT_TRUE(other.state() == alive_state, "wrong effect from 
MemoryChecker(MemoryChecker&&): attemp to construct an object from non-existing 
object");
+EXPECT_TRUE(other.state() == alive_state, "wrong effect from 
MemoryChecker(MemoryChecker&&): attempt to construct an object from 
non-existing object");
 // set constructed state and increment counter for living object
 inc_alive_objects();
 _state = alive_state;
@@ -260,15 +260,15 @@ struct MemoryChecker {
 MemoryChecker(const MemoryChecker& other) : _value(other.value()) {
 // check for EXPECT_TRUE(state() != alive_state, ...) has not been 
done since
 // compiler can optimize out the copy ctor call that results in false 
positive failure
-EXPECT_TRUE(other.state() == alive_state, "wrong effect from 
MemoryChecker(const MemoryChecker&): attemp to construct an object from 
non-existing object");
+EXPECT_TRUE(other.state() == alive_state, "wrong effect from 
MemoryChecker(const MemoryChecker&): attempt to construct an object from 
non-existing object");
 // set constructed state and increment counter for living object
 inc_alive_objects();
 _state = alive_state;
 }
 MemoryChecker& operator=(MemoryChecker&& other) {
 // check if we do not assign over uninitialized memory
-EXPECT_TRUE(state() == alive_state, "wrong effect from 
MemoryChecker::operator=(MemoryChecker&& other): attemp to assign to 
non-existing object");
-EXPECT_TRUE(other.state() == alive_state, "wrong effect from 
MemoryChecker::operator=(MemoryChecker&& other): attemp to assign from 
non-existing object");
+EXPECT_TRUE(state() == alive_state, "wrong effect from 
MemoryChecker::operator=(MemoryChecker&& other): attempt to assign to 
non-existing object");
+EXPECT_TRUE(other.state() == alive_state, "wrong effect from 
MemoryChecker::operator=(MemoryChecker&& other): attempt to assign from 
non-existing object");
 // just assign new value, counter is the same, state is the same
 _value = other.value();
 
@@ -276,8 +276,8 @@ struct MemoryChecker {
 }
 MemoryChecker& operator=(const MemoryChecker& other) {
 // check if we do not assign over uninitialized memory
-EXPECT_TRUE(state() == alive_state, "wrong effect from 
MemoryChecker::operator=(const MemoryChecker& other): attemp to assign to 
non-existing object");
-EXPECT_TRUE(other.state() == alive_state, "wrong effect from 
MemoryChecker::operator=(const MemoryChecker& other): attemp to assign from 
non-existing object");
+EXPECT_TRUE(state() == alive_state, "wrong effect from 
MemoryChecker::operator=(const MemoryChecker& other): attempt to assign to 
non-existing object");
+EXPECT_TRUE(other.state() == alive_state, "wrong effect from 
MemoryChecker::operator=(const MemoryChecker& other): attempt to assign from 
non-existing object");
 // just assign new value, counter is the same, state is the same
 _value = other.value();
 
@@ -285,7 +285,7 @@ struct MemoryChecker {
 }
 ~MemoryChecker() {
 // check if we do not double destruct the object
-EXPECT_TRUE(state() == alive_state, "wrong effect from 
~MemoryChecker(): attemp to destroy non-existing object");
+EXPECT_TRUE(state() == alive_state, "wrong effect from 
~MemoryChecker(): attempt to destroy non-existing object");
 // set destructed state and decrement counter for living object
 static_cast(_state) = dead_state;
 dec_alive_objects();
-- 
2.44.0



[PATCH] Fortran: add two small F2023 features

2024-03-19 Thread FX Coudert
Hi,

These two (independent) patches add two tiny Fortran 2023 features: new 
ISO_FORTRAN_ENV named constants and SELECTED_LOGICAL_KIND intrinsic.

Bootstrapped and regtested on x86_64-pc-linux-gnu.
Please review, and let me know if it’s okay to push once we’re back in stage 1.
(I know it’s not particularly soon, but I don’t want to forget these so I’m 
posting them for review)

FX





0001-Fortran-add-F2023-ISO_FORTRAN_ENV-named-constants.patch
Description: Binary data


0002-Fortran-add-SELECTED_LOGICAL_KIND.patch
Description: Binary data


[committed] libstdc++: Begin lifetime of storage in std::vector [PR114367]

2024-03-19 Thread Jonathan Wakely
Tested x86_64-linux. Pushed to trunk. Backport to gcc-13 to follow.

-- >8 --

This doesn't cause a problem with GCC, but Clang correctly diagnoses a
bug in the code. The objects in the allocated storage need to begin
their lifetime before we start using them.

This change uses the allocator's construct function instead of using
std::construct_at directly, in order to support fancy pointers.

libstdc++-v3/ChangeLog:

PR libstdc++/114367
* include/bits/stl_bvector.h (_M_allocate): Use allocator's
construct function to begin lifetime of words.
---
 libstdc++-v3/include/bits/stl_bvector.h | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/libstdc++-v3/include/bits/stl_bvector.h 
b/libstdc++-v3/include/bits/stl_bvector.h
index 2c8b892b07a..a3343d95b36 100644
--- a/libstdc++-v3/include/bits/stl_bvector.h
+++ b/libstdc++-v3/include/bits/stl_bvector.h
@@ -674,13 +674,13 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER
   _M_allocate(size_t __n)
   {
_Bit_pointer __p = _Bit_alloc_traits::allocate(_M_impl, _S_nword(__n));
-#if __cpp_lib_is_constant_evaluated
+#if __cpp_lib_is_constant_evaluated && __cpp_constexpr_dynamic_alloc
if (std::is_constant_evaluated())
-   {
- __n = _S_nword(__n);
- for (size_t __i = 0; __i < __n; ++__i)
-   __p[__i] = 0ul;
-   }
+ {
+   __n = _S_nword(__n);
+   for (size_t __i = 0; __i < __n; ++__i)
+ std::construct_at(std::to_address(__p) + __i);
+ }
 #endif
return __p;
   }
-- 
2.44.0



[PATCH] analyzer: Bail out on function pointer for -Wanalyzer-allocation-size

2024-03-19 Thread Stefan Schulze Frielinghaus
On s390 pr94688.c is failing due to excess error

pr94688.c:6:5: warning: allocated buffer size is not a multiple of the 
pointee's size [CWE-131] [-Wanalyzer-allocation-size]

This is because on s390 functions are by default aligned to an 8-byte
boundary and during function type construction size is set to function
boundary.  Thus, for the assignment

a.0_1 = (void (*) ()) 

we have that the right-hand side is pointing to a 4-byte memory region
whereas the size of the function pointer is 8 byte and a warning is
emitted.

I could follow and skip this test as done in PR112705, or we could bail
out early in the analyzer for function pointers.  My intuition so far
is that -Wanalyzer-allocation-size shouldn't care about function
pointer.  Therefore, I went for bailing out early.  If you believe this
is wrong I can still go by skipping this test on s390.  Any thoughts?
---
 gcc/analyzer/region-model.cc | 4 
 1 file changed, 4 insertions(+)

diff --git a/gcc/analyzer/region-model.cc b/gcc/analyzer/region-model.cc
index f079d1fb37e..1b43443d168 100644
--- a/gcc/analyzer/region-model.cc
+++ b/gcc/analyzer/region-model.cc
@@ -3514,6 +3514,10 @@ region_model::check_region_size (const region *lhs_reg, 
const svalue *rhs_sval,
   || TYPE_SIZE_UNIT (pointee_type) == NULL_TREE)
 return;
 
+  /* Bail out early on function pointers.  */
+  if (TREE_CODE (pointee_type) == FUNCTION_TYPE)
+return;
+
   /* Bail out early on pointers to structs where we can
  not deduce whether the buffer size is compatible.  */
   bool is_struct = RECORD_OR_UNION_TYPE_P (pointee_type);
-- 
2.43.0



Re: [PATCH] middle-end/113396 - int128 array index and value-ranges

2024-03-19 Thread Jakub Jelinek
On Tue, Mar 19, 2024 at 03:47:37PM +0100, Richard Biener wrote:
> The following fixes bogus truncation of a value-range for an int128
> array index when computing the maximum extent for a variable array
> reference.  Instead of possibly slowing things down by using
> widest_int the following makes sure the range bounds fit within
> the constraints offset_int were designed for.

Perhaps you could use wide_int/poly_wide_int with precision
of offset_int if it is at most 64-bit precision and twice that precision
otherwise.
I think large BITINT_TYPEs shouldn't be a problem since r14-7200,
so another fix might to truncate at gimplification time
ARRAY_REF indexes wider than sizetype to sizetype.  Maybe GCC 15-ish
material though.

Anyway, guess your patch is ok as is too.

>   PR middle-end/113396
>   * tree-dfa.cc (get_ref_base_and_extent): Use index range
>   bounds only if they fit within the address-range constraints
>   of offset_int.
> 
>   * gcc.dg/torture/pr113396.c: New testcase.
> ---
>  gcc/testsuite/gcc.dg/torture/pr113396.c | 19 +++
>  gcc/tree-dfa.cc |  6 --
>  2 files changed, 23 insertions(+), 2 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.dg/torture/pr113396.c
> 
> diff --git a/gcc/testsuite/gcc.dg/torture/pr113396.c 
> b/gcc/testsuite/gcc.dg/torture/pr113396.c
> new file mode 100644
> index 000..585f717bdda
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/torture/pr113396.c
> @@ -0,0 +1,19 @@
> +/* { dg-do run } */
> +/* { dg-require-effective-target int128 } */
> +
> +unsigned char m[] = {5, 79, 79, 79, 79};
> +__int128 p;
> +int main()
> +{
> +  int g1 = 0;
> +  p = 0;
> +  for (int aj = 0; aj < 256; aj++)
> +   {
> +  m[0] = -4;
> +  for (; p >= 0; p -= 1) {
> +g1 = m[p];
> +  }
> +  }
> +  if (g1 != 0xfc)
> +__builtin_abort();
> +}
> diff --git a/gcc/tree-dfa.cc b/gcc/tree-dfa.cc
> index cbd3774b21f..93e53b29a6d 100644
> --- a/gcc/tree-dfa.cc
> +++ b/gcc/tree-dfa.cc
> @@ -549,7 +549,8 @@ get_ref_base_and_extent (tree exp, poly_int64 *poffset,
>   /* Try to constrain maxsize with range information.  */
>   offset_int omax
> = offset_int::from (max, TYPE_SIGN (TREE_TYPE (index)));
> - if (known_lt (lbound, omax))
> + if (wi::get_precision (max) <= ADDR_MAX_BITSIZE
> + && known_lt (lbound, omax))
> {
>   poly_offset_int rmaxsize;
>   rmaxsize = (omax - lbound + 1)
> @@ -567,7 +568,8 @@ get_ref_base_and_extent (tree exp, poly_int64 *poffset,
>   /* Try to adjust bit_offset with range information.  */
>   offset_int omin
> = offset_int::from (min, TYPE_SIGN (TREE_TYPE (index)));
> - if (known_le (lbound, omin))
> + if (wi::get_precision (min) <= ADDR_MAX_BITSIZE
> + && known_le (lbound, omin))
> {
>   poly_offset_int woffset
> = wi::sext (omin - lbound,
> -- 
> 2.35.3

Jakub



[PATCH] middle-end/113396 - int128 array index and value-ranges

2024-03-19 Thread Richard Biener
The following fixes bogus truncation of a value-range for an int128
array index when computing the maximum extent for a variable array
reference.  Instead of possibly slowing things down by using
widest_int the following makes sure the range bounds fit within
the constraints offset_int were designed for.

Bootstrap and regtest running on x86_64-unknown-linux-gnu.

OK?

Thanks,
Richard.

PR middle-end/113396
* tree-dfa.cc (get_ref_base_and_extent): Use index range
bounds only if they fit within the address-range constraints
of offset_int.

* gcc.dg/torture/pr113396.c: New testcase.
---
 gcc/testsuite/gcc.dg/torture/pr113396.c | 19 +++
 gcc/tree-dfa.cc |  6 --
 2 files changed, 23 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/torture/pr113396.c

diff --git a/gcc/testsuite/gcc.dg/torture/pr113396.c 
b/gcc/testsuite/gcc.dg/torture/pr113396.c
new file mode 100644
index 000..585f717bdda
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/torture/pr113396.c
@@ -0,0 +1,19 @@
+/* { dg-do run } */
+/* { dg-require-effective-target int128 } */
+
+unsigned char m[] = {5, 79, 79, 79, 79};
+__int128 p;
+int main()
+{
+  int g1 = 0;
+  p = 0;
+  for (int aj = 0; aj < 256; aj++)
+   {
+  m[0] = -4;
+  for (; p >= 0; p -= 1) {
+g1 = m[p];
+  }
+  }
+  if (g1 != 0xfc)
+__builtin_abort();
+}
diff --git a/gcc/tree-dfa.cc b/gcc/tree-dfa.cc
index cbd3774b21f..93e53b29a6d 100644
--- a/gcc/tree-dfa.cc
+++ b/gcc/tree-dfa.cc
@@ -549,7 +549,8 @@ get_ref_base_and_extent (tree exp, poly_int64 *poffset,
/* Try to constrain maxsize with range information.  */
offset_int omax
  = offset_int::from (max, TYPE_SIGN (TREE_TYPE (index)));
-   if (known_lt (lbound, omax))
+   if (wi::get_precision (max) <= ADDR_MAX_BITSIZE
+   && known_lt (lbound, omax))
  {
poly_offset_int rmaxsize;
rmaxsize = (omax - lbound + 1)
@@ -567,7 +568,8 @@ get_ref_base_and_extent (tree exp, poly_int64 *poffset,
/* Try to adjust bit_offset with range information.  */
offset_int omin
  = offset_int::from (min, TYPE_SIGN (TREE_TYPE (index)));
-   if (known_le (lbound, omin))
+   if (wi::get_precision (min) <= ADDR_MAX_BITSIZE
+   && known_le (lbound, omin))
  {
poly_offset_int woffset
  = wi::sext (omin - lbound,
-- 
2.35.3


Re: [PATCH][tree-object-size]Pass OST_DYNAMIC information to early_object_size phase

2024-03-19 Thread Qing Zhao


On Mar 19, 2024, at 09:46, Jakub Jelinek  wrote:

On Tue, Mar 19, 2024 at 01:14:51PM +, Qing Zhao wrote:
Currently, the OST_DYNAMIC information is not passed to
early_object_sizes phase. Pass this information to it, and adjust the code
and testing case accordingly.

Can you explain why do you think this is desirable?
Having both passes emit the dynamic instrumentation is IMHO undesirable,
the first pass exists just to catch subobject properties which are later
lost.

Okay, thanks for the comments. This makes good sense to me. So, the dynamic 
information
was intended to be ignored in the early pass.

I will try to fix the original issue (for the counted-by patches) in the other 
direction.


In any case, if this isn't a regression fix, it isn't suitable for
stage4, seems quite risky.

Agreed.

thanks.

Qing



* tree-object-size.cc (early_object_sizes_execute_one): Add one more
argument is_dynamic.
(object_sizes_execute): Call early_object_sizes_execute_one with one
more argument.

gcc/testsuite/ChangeLog:

* gcc.dg/builtin-dynamic-object-size-10.c: Update testing case.
---
gcc/testsuite/gcc.dg/builtin-dynamic-object-size-10.c |  4 ++--
gcc/tree-object-size.cc   | 11 ---
2 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/builtin-dynamic-object-size-10.c 
b/gcc/testsuite/gcc.dg/builtin-dynamic-object-size-10.c
index 3a2d9821a44e..3c5430b51358 100644
--- a/gcc/testsuite/gcc.dg/builtin-dynamic-object-size-10.c
+++ b/gcc/testsuite/gcc.dg/builtin-dynamic-object-size-10.c
@@ -7,5 +7,5 @@

/* early_objsz should resolve __builtin_dynamic_object_size like
   __builtin_object_size.  */
-/* { dg-final { scan-tree-dump "maximum object size 21" "early_objsz" } } */
-/* { dg-final { scan-tree-dump "maximum subobject size 16" "early_objsz" } } */
+/* { dg-final { scan-tree-dump "maximum dynamic object size 21" "early_objsz" 
} } */
+/* { dg-final { scan-tree-dump "maximum dynamic subobject size 16" 
"early_objsz" } } */
diff --git a/gcc/tree-object-size.cc b/gcc/tree-object-size.cc
index 018fbc30cbb6..57739eed3abf 100644
--- a/gcc/tree-object-size.cc
+++ b/gcc/tree-object-size.cc
@@ -2050,7 +2050,8 @@ do_valueize (tree t)
   since we're only looking for constant bounds.  */

static void
-early_object_sizes_execute_one (gimple_stmt_iterator *i, gimple *call)
+early_object_sizes_execute_one (gimple_stmt_iterator *i, gimple *call,
+ bool is_dynamic)
{
  tree ost = gimple_call_arg (call, 1);
  tree lhs = gimple_call_lhs (call);
@@ -2060,9 +2061,12 @@ early_object_sizes_execute_one (gimple_stmt_iterator *i, 
gimple *call)
return;

  unsigned HOST_WIDE_INT object_size_type = tree_to_uhwi (ost);
+  if (is_dynamic)
+object_size_type |= OST_DYNAMIC;
+
  tree ptr = gimple_call_arg (call, 0);

-  if (object_size_type != 1 && object_size_type != 3)
+  if ((object_size_type & OST_SUBOBJECT) == 0)
return;

  if (TREE_CODE (ptr) != ADDR_EXPR && TREE_CODE (ptr) != SSA_NAME)
@@ -2071,6 +2075,7 @@ early_object_sizes_execute_one (gimple_stmt_iterator *i, 
gimple *call)
  tree type = TREE_TYPE (lhs);
  tree bytes;
  if (!compute_builtin_object_size (ptr, object_size_type, )
+  || (TREE_CODE (bytes) != INTEGER_CST)
  || !int_fits_type_p (bytes, type))
return;

@@ -2153,7 +2158,7 @@ object_sizes_execute (function *fun, bool early)
 __builtin_dynamic_object_size too.  */
  if (early)
{
-   early_object_sizes_execute_one (, call);
+   early_object_sizes_execute_one (, call, dynamic);
  continue;
}

--
2.31.1

Jakub



[PATCH] tree-optimization/113727 - bogus SRA with BIT_FIELD_REF

2024-03-19 Thread Richard Biener
When SRA analyzes BIT_FIELD_REFs it handles writes and not byte
aligned reads differently from byte aligned reads.  Instead of
trying to create replacements for the loaded portion the former
cases try to replace the base object while keeping the wrapping
BIT_FIELD_REFs.  This breaks when we have both kinds operating
on the same base object if there's no appearant overlap conflict
as the conflict that then nevertheless exists isn't handled with.
The fix is to enforce what I think is part of the design handling
the former case - that only the full base object gets replaced
and no further sub-objects are created within as otherwise
keeping the wrapping BIT_FIELD_REF cannot work.  The patch
enforces this within analyze_access_subtree.

Bootstrap and regtest running on x86_64-unknown-linux-gnu.

OK?

Thanks,
Richard.

PR tree-optimization/113727
* tree-sra.cc (analyze_access_subtree): Do not allow
replacements in subtrees when grp_partial_lhs.

* gcc.dg/torture/pr113727.c: New testcase.
---
 gcc/testsuite/gcc.dg/torture/pr113727.c | 26 +
 gcc/tree-sra.cc |  3 ++-
 2 files changed, 28 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.dg/torture/pr113727.c

diff --git a/gcc/testsuite/gcc.dg/torture/pr113727.c 
b/gcc/testsuite/gcc.dg/torture/pr113727.c
new file mode 100644
index 000..f92ddad5c8e
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/torture/pr113727.c
@@ -0,0 +1,26 @@
+/* { dg-do run } */
+/* { dg-require-effective-target int32plus } */
+
+struct f {
+  unsigned au : 5;
+  unsigned f3 : 21;
+} g_994;
+
+int main()
+{
+  struct f aq1 = {};
+{
+  struct f aq = {9, 5};
+  struct f as = aq;
+  for (int y = 0 ; y <= 4; y += 1)
+   if (as.au)
+ {
+   struct f aa[5] = {{2, 154}, {2, 154}, {2, 154}, {2, 154}, {2, 154}};
+   as = aa[0];
+ }
+  aq1 = as;
+}
+  if (aq1.f3 != 0x9a)
+__builtin_abort();
+  return 0;
+}
diff --git a/gcc/tree-sra.cc b/gcc/tree-sra.cc
index f8e71ec48b9..dbfae5e7fdd 100644
--- a/gcc/tree-sra.cc
+++ b/gcc/tree-sra.cc
@@ -2735,7 +2735,8 @@ analyze_access_subtree (struct access *root, struct 
access *parent,
 {
   hole |= covered_to < child->offset;
   sth_created |= analyze_access_subtree (child, root,
-allow_replacements && !scalar,
+allow_replacements && !scalar
+&& !root->grp_partial_lhs,
 totally);
 
   root->grp_unscalarized_data |= child->grp_unscalarized_data;
-- 
2.35.3


Re: [PATCH][tree-object-size]Pass OST_DYNAMIC information to early_object_size phase

2024-03-19 Thread Jakub Jelinek
On Tue, Mar 19, 2024 at 01:14:51PM +, Qing Zhao wrote:
>  Currently, the OST_DYNAMIC information is not passed to
>  early_object_sizes phase. Pass this information to it, and adjust the code
>  and testing case accordingly.

Can you explain why do you think this is desirable?
Having both passes emit the dynamic instrumentation is IMHO undesirable,
the first pass exists just to catch subobject properties which are later
lost.

In any case, if this isn't a regression fix, it isn't suitable for
stage4, seems quite risky.

>   * tree-object-size.cc (early_object_sizes_execute_one): Add one more
>   argument is_dynamic.
>   (object_sizes_execute): Call early_object_sizes_execute_one with one
>   more argument.
> 
> gcc/testsuite/ChangeLog:
> 
>   * gcc.dg/builtin-dynamic-object-size-10.c: Update testing case.
> ---
>  gcc/testsuite/gcc.dg/builtin-dynamic-object-size-10.c |  4 ++--
>  gcc/tree-object-size.cc   | 11 ---
>  2 files changed, 10 insertions(+), 5 deletions(-)
> 
> diff --git a/gcc/testsuite/gcc.dg/builtin-dynamic-object-size-10.c 
> b/gcc/testsuite/gcc.dg/builtin-dynamic-object-size-10.c
> index 3a2d9821a44e..3c5430b51358 100644
> --- a/gcc/testsuite/gcc.dg/builtin-dynamic-object-size-10.c
> +++ b/gcc/testsuite/gcc.dg/builtin-dynamic-object-size-10.c
> @@ -7,5 +7,5 @@
>  
>  /* early_objsz should resolve __builtin_dynamic_object_size like
> __builtin_object_size.  */
> -/* { dg-final { scan-tree-dump "maximum object size 21" "early_objsz" } } */
> -/* { dg-final { scan-tree-dump "maximum subobject size 16" "early_objsz" } } 
> */
> +/* { dg-final { scan-tree-dump "maximum dynamic object size 21" 
> "early_objsz" } } */
> +/* { dg-final { scan-tree-dump "maximum dynamic subobject size 16" 
> "early_objsz" } } */
> diff --git a/gcc/tree-object-size.cc b/gcc/tree-object-size.cc
> index 018fbc30cbb6..57739eed3abf 100644
> --- a/gcc/tree-object-size.cc
> +++ b/gcc/tree-object-size.cc
> @@ -2050,7 +2050,8 @@ do_valueize (tree t)
> since we're only looking for constant bounds.  */
>  
>  static void
> -early_object_sizes_execute_one (gimple_stmt_iterator *i, gimple *call)
> +early_object_sizes_execute_one (gimple_stmt_iterator *i, gimple *call,
> + bool is_dynamic)
>  {
>tree ost = gimple_call_arg (call, 1);
>tree lhs = gimple_call_lhs (call);
> @@ -2060,9 +2061,12 @@ early_object_sizes_execute_one (gimple_stmt_iterator 
> *i, gimple *call)
>  return;
>  
>unsigned HOST_WIDE_INT object_size_type = tree_to_uhwi (ost);
> +  if (is_dynamic)
> +object_size_type |= OST_DYNAMIC;
> +
>tree ptr = gimple_call_arg (call, 0);
>  
> -  if (object_size_type != 1 && object_size_type != 3)
> +  if ((object_size_type & OST_SUBOBJECT) == 0)
>  return;
>  
>if (TREE_CODE (ptr) != ADDR_EXPR && TREE_CODE (ptr) != SSA_NAME)
> @@ -2071,6 +2075,7 @@ early_object_sizes_execute_one (gimple_stmt_iterator 
> *i, gimple *call)
>tree type = TREE_TYPE (lhs);
>tree bytes;
>if (!compute_builtin_object_size (ptr, object_size_type, )
> +  || (TREE_CODE (bytes) != INTEGER_CST)
>|| !int_fits_type_p (bytes, type))
>  return;
>  
> @@ -2153,7 +2158,7 @@ object_sizes_execute (function *fun, bool early)
>__builtin_dynamic_object_size too.  */
> if (early)
>   {
> -   early_object_sizes_execute_one (, call);
> +   early_object_sizes_execute_one (, call, dynamic);
> continue;
>   }
>  
> -- 
> 2.31.1

Jakub



Re: [PATCH v2 08/13] aarch64: Add Cygwin and MinGW environments for AArch64

2024-03-19 Thread Christophe Lyon
On Mon, 18 Mar 2024 at 22:35, Evgeny Karpov  wrote:
>
> Monday, March 18, 2024 2:27 PM
> Christophe Lyon wrote:
>
> > > +/* Disable SEH and declare the required SEH-related macros that are
> > > +still needed for compilation.  */ #undef TARGET_SEH #define
> > > +TARGET_SEH 0
> > > +
> > > +#define SSE_REGNO_P(N) 0
> > > +#define GENERAL_REGNO_P(N) 0
> > I think you forgot to add a comment to explain the above two lines.
> > (it was requested during v1 review)
> >
> > Thanks,
> >
> > Christophe
>
> Hi Christophe,
>
> Thank you for the review!
> The comment regarding SEH and SEH-related macros has been added two lines 
> above.
> It may not be obvious, but these macros are needed to emit SEH data in 
> mingw/winnt.cc.
> This group is separated by an empty line; however, it still relates to 
> SEH-related macros.
>
Thanks for the clarification, I thought that comment only applied to
the two lines about TARGET_SEH.

Christophe

> Regards,
> Evgeny


[PATCH][tree-object-size]Pass OST_DYNAMIC information to early_object_size phase

2024-03-19 Thread Qing Zhao
 Currently, the OST_DYNAMIC information is not passed to
 early_object_sizes phase. Pass this information to it, and adjust the code
 and testing case accordingly.

bootstrapped and regress tested on both x86 and aarch64. no issue.

Okay for trunk?

thanks.

Qing

gcc/ChangeLog:

* tree-object-size.cc (early_object_sizes_execute_one): Add one more
argument is_dynamic.
(object_sizes_execute): Call early_object_sizes_execute_one with one
more argument.

gcc/testsuite/ChangeLog:

* gcc.dg/builtin-dynamic-object-size-10.c: Update testing case.
---
 gcc/testsuite/gcc.dg/builtin-dynamic-object-size-10.c |  4 ++--
 gcc/tree-object-size.cc   | 11 ---
 2 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/builtin-dynamic-object-size-10.c 
b/gcc/testsuite/gcc.dg/builtin-dynamic-object-size-10.c
index 3a2d9821a44e..3c5430b51358 100644
--- a/gcc/testsuite/gcc.dg/builtin-dynamic-object-size-10.c
+++ b/gcc/testsuite/gcc.dg/builtin-dynamic-object-size-10.c
@@ -7,5 +7,5 @@
 
 /* early_objsz should resolve __builtin_dynamic_object_size like
__builtin_object_size.  */
-/* { dg-final { scan-tree-dump "maximum object size 21" "early_objsz" } } */
-/* { dg-final { scan-tree-dump "maximum subobject size 16" "early_objsz" } } */
+/* { dg-final { scan-tree-dump "maximum dynamic object size 21" "early_objsz" 
} } */
+/* { dg-final { scan-tree-dump "maximum dynamic subobject size 16" 
"early_objsz" } } */
diff --git a/gcc/tree-object-size.cc b/gcc/tree-object-size.cc
index 018fbc30cbb6..57739eed3abf 100644
--- a/gcc/tree-object-size.cc
+++ b/gcc/tree-object-size.cc
@@ -2050,7 +2050,8 @@ do_valueize (tree t)
since we're only looking for constant bounds.  */
 
 static void
-early_object_sizes_execute_one (gimple_stmt_iterator *i, gimple *call)
+early_object_sizes_execute_one (gimple_stmt_iterator *i, gimple *call,
+   bool is_dynamic)
 {
   tree ost = gimple_call_arg (call, 1);
   tree lhs = gimple_call_lhs (call);
@@ -2060,9 +2061,12 @@ early_object_sizes_execute_one (gimple_stmt_iterator *i, 
gimple *call)
 return;
 
   unsigned HOST_WIDE_INT object_size_type = tree_to_uhwi (ost);
+  if (is_dynamic)
+object_size_type |= OST_DYNAMIC;
+
   tree ptr = gimple_call_arg (call, 0);
 
-  if (object_size_type != 1 && object_size_type != 3)
+  if ((object_size_type & OST_SUBOBJECT) == 0)
 return;
 
   if (TREE_CODE (ptr) != ADDR_EXPR && TREE_CODE (ptr) != SSA_NAME)
@@ -2071,6 +2075,7 @@ early_object_sizes_execute_one (gimple_stmt_iterator *i, 
gimple *call)
   tree type = TREE_TYPE (lhs);
   tree bytes;
   if (!compute_builtin_object_size (ptr, object_size_type, )
+  || (TREE_CODE (bytes) != INTEGER_CST)
   || !int_fits_type_p (bytes, type))
 return;
 
@@ -2153,7 +2158,7 @@ object_sizes_execute (function *fun, bool early)
 __builtin_dynamic_object_size too.  */
  if (early)
{
- early_object_sizes_execute_one (, call);
+ early_object_sizes_execute_one (, call, dynamic);
  continue;
}
 
-- 
2.31.1



Re: [gcc-15 2/3] RISC-V: avoid LUI based const mat: keep stack offsets aligned

2024-03-19 Thread Jeff Law




On 3/19/24 12:48 AM, Andrew Waterman wrote:

On Mon, Mar 18, 2024 at 5:28 PM Vineet Gupta  wrote:




On 3/16/24 13:21, Jeff Law wrote:

|   59944:add s0,sp,2047  <
|   59948:mv  a2,a0
|   5994c:mv  a3,a1
|   59950:mv  a0,sp
|   59954:li  a4,1
|   59958:lui a1,0x1
|   5995c:add s0,s0,1 <---
|   59960:jal 59a3c

SP here becomes unaligned, even if transitively which is undesirable as
well as incorrect:
   - ABI requires stack to be 8 byte aligned
   - asm code looks weird and unexpected
   - to the user it might falsely seem like a compiler bug even when not,
 specially when staring at asm for debugging unrelated issue.
It's not ideal, but I think it's still ABI compliant as-is.  If it
wasn't, then I suspect things like virtual origins in Ada couldn't be
made ABI compliant.


To be clear are u suggesting ADD sp, sp, 2047 is ABI compliant ?
I'd still like to avoid it as I'm sure someone will complain about it.


With the patch, we get following correct code instead:

| ..
| 59944: add s0,sp,2032
| ..
| 5995c: add s0,s0,16

Alternately you could tighten the positive side of the range of the
splitter from patch 1/3 so that you could always use 2032 rather than
2047 on the first addi.   ie instead of allowing 2048..4094, allow
2048..4064.


2033..4064 vs. 2048..4094

Yeah I was a bit split about this as well. Since you are OK with either,
I'll keep them as-is and perhaps add this observation to commitlog.


There's a subset of embedded use cases where an interrupt service
routine continues on the same stack as the interrupted thread,
requiring sp to always have an ABI-compliant value (i.e. 16B aligned,
and with no important data on the stack at an address below sp).

Although not all use cases care about this property, it seems more
straightforward to maintain the invariant everywhere, rather than
selectively enforce it.
Just to be clear, the changes don't misalign the stack pointer at all. 
They merely have the potential to create *another* pointer into the 
stack which may or may not be aligned.  Which is totally normal, it's no 
different than taking the address of a char on the stack.


jeff



[pushed] analyzer: fixes to __atomic_{exchange, load, store} [PR114286]

2024-03-19 Thread David Malcolm
In r14-1497-gef768035ae8090 I added some support to the analyzer for
__atomic_ builtins (enough to fix false positives I was seeing in
my integration tests).

Unfortunately I messed up the implementation of
__atomic_{exchange,load,store}, leading to ICEs seen in
PR analyzer/114286.

Fixed thusly, fixing the ICEs.  Given that we're in stage 4, the patch
doesn't add support for any of the various __atomic_compare_exchange
builtins, so that these continue to fall back to the analyzer's
"anything could happen" handling of unknown functions.

Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.
Successful run of analyzer integration tests on x86_64-pc-linux-gnu.
Pushed to trunk as r14-9544-gc7a774edbf802d.

gcc/analyzer/ChangeLog:
PR analyzer/114286
* kf.cc (class kf_atomic_exchange): Reimplement based on signature
seen in gimple, rather than user-facing signature.
(class kf_atomic_load): Likewise.
(class kf_atomic_store): New.
(register_atomic_builtins): Register kf_atomic_store.

gcc/testsuite/ChangeLog:
PR analyzer/114286
* c-c++-common/analyzer/atomic-builtins-pr114286.c: New test.

Signed-off-by: David Malcolm 
---
 gcc/analyzer/kf.cc| 135 +-
 .../analyzer/atomic-builtins-pr114286.c   |  48 +++
 2 files changed, 150 insertions(+), 33 deletions(-)
 create mode 100644 
gcc/testsuite/c-c++-common/analyzer/atomic-builtins-pr114286.c

diff --git a/gcc/analyzer/kf.cc b/gcc/analyzer/kf.cc
index ed48ffbcba2..d197ccbd0f0 100644
--- a/gcc/analyzer/kf.cc
+++ b/gcc/analyzer/kf.cc
@@ -116,39 +116,54 @@ kf_alloca::impl_call_pre (const call_details ) const
   cd.maybe_set_lhs (ptr_sval);
 }
 
-/* Handler for:
-   void __atomic_exchange (type *ptr, type *val, type *ret, int memorder).  */
+/* Handler for __atomic_exchange.
+   Although the user-facing documentation specifies it as having this
+   signature:
+ void __atomic_exchange (type *ptr, type *val, type *ret, int memorder)
+
+   by the time the C/C++ frontends have acted on it, any calls that
+   can't be mapped to a _N variation end up with this signature:
+
+ void
+ __atomic_exchange (size_t sz, void *ptr, void *val, void *ret,
+   int memorder)
+
+   as seen in the gimple seen by the analyzer, and as specified
+   in sync-builtins.def.  */
 
 class kf_atomic_exchange : public internal_known_function
 {
 public:
   /* This is effectively:
-   *RET = *PTR;
-   *PTR = *VAL;
+   tmpA = *PTR;
+   tmpB = *VAL;
+   *PTR = tmpB;
+   *RET = tmpA;
   */
   void impl_call_pre (const call_details ) const final override
   {
-const svalue *ptr_ptr_sval = cd.get_arg_svalue (0);
-tree ptr_ptr_tree = cd.get_arg_tree (0);
-const svalue *val_ptr_sval = cd.get_arg_svalue (1);
-tree val_ptr_tree = cd.get_arg_tree (1);
-const svalue *ret_ptr_sval = cd.get_arg_svalue (2);
-tree ret_ptr_tree = cd.get_arg_tree (2);
+const svalue *num_bytes_sval = cd.get_arg_svalue (0);
+const svalue *ptr_sval = cd.get_arg_svalue (1);
+tree ptr_tree = cd.get_arg_tree (1);
+const svalue *val_sval = cd.get_arg_svalue (2);
+tree val_tree = cd.get_arg_tree (2);
+const svalue *ret_sval = cd.get_arg_svalue (3);
+tree ret_tree = cd.get_arg_tree (3);
 /* Ignore the memorder param.  */
 
 region_model *model = cd.get_model ();
 region_model_context *ctxt = cd.get_ctxt ();
 
-const region *val_region
-  = model->deref_rvalue (val_ptr_sval, val_ptr_tree, ctxt);
-const svalue *star_val_sval = model->get_store_value (val_region, ctxt);
-const region *ptr_region
-  = model->deref_rvalue (ptr_ptr_sval, ptr_ptr_tree, ctxt);
-const svalue *star_ptr_sval = model->get_store_value (ptr_region, ctxt);
-const region *ret_region
-  = model->deref_rvalue (ret_ptr_sval, ret_ptr_tree, ctxt);
-model->set_value (ptr_region, star_val_sval, ctxt);
-model->set_value (ret_region, star_ptr_sval, ctxt);
+const region *ptr_reg = model->deref_rvalue (ptr_sval, ptr_tree, ctxt);
+const region *val_reg = model->deref_rvalue (val_sval, val_tree, ctxt);
+const region *ret_reg = model->deref_rvalue (ret_sval, ret_tree, ctxt);
+
+const svalue *tmp_a_sval
+  = model->read_bytes (ptr_reg, ptr_tree, num_bytes_sval, ctxt);
+const svalue *tmp_b_sval
+  = model->read_bytes (val_reg, val_tree, num_bytes_sval, ctxt);
+model->write_bytes (ptr_reg, num_bytes_sval, tmp_b_sval, ctxt);
+model->write_bytes (ret_reg, num_bytes_sval, tmp_a_sval, ctxt);
   }
 };
 
@@ -265,32 +280,85 @@ private:
   enum tree_code m_op;
 };
 
-/* Handler for:
-   void __atomic_load (type *ptr, type *ret, int memorder).  */
+/* Handler for __atomic_load.
+   Although the user-facing documentation specifies it as having this
+   signature:
+
+  void __atomic_load (type *ptr, type *ret, int memorder)
+
+   by the time the C/C++ frontends have acted on it, any 

[pushed] testsuite, Darwin: Use the IOKit framework in framework-1.c [PR114049].

2024-03-19 Thread Iain Sandoe
Tested on x86_64-darwin21, a cross to powerpc-darwin9 and on
x86_64-linux-gnu, pushed to trunk (will backport to branches soon).
thanks,
Iain

--- 8< ---

The intent of the test is to show that we find a framework that
is installed in /System/Library/Frameworks when the user has added
a '-F' option.  The trick is to choose some header that is present
for all the Darwin versions we support and that does not contain any
content we cannot parse.  We had been using the Kernel framework for
this, but recent SDK versions have revealed that this is not suitable.

Replacing with a use of IOKit.

PR target/114049

gcc/testsuite/ChangeLog:

* gcc.dg/framework-1.c: Use an IOKit header instead of a
Kernel one.

Signed-off-by: Iain Sandoe 
---
 gcc/testsuite/gcc.dg/framework-1.c | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.dg/framework-1.c 
b/gcc/testsuite/gcc.dg/framework-1.c
index de4adc39868..fdec129a8fb 100644
--- a/gcc/testsuite/gcc.dg/framework-1.c
+++ b/gcc/testsuite/gcc.dg/framework-1.c
@@ -1,4 +1,10 @@
 /* { dg-do compile { target *-*-darwin* } } */
 /* { dg-options "-F." } */
 
-#include 
+/* The intent of the test is to show that we find a framework that
+   is installed in /System/Library/Frameworks when the user has added
+   a '-F' option.  The trick is to choose some header that is present
+   for all the Darwin versions we support and that does not contain any
+   content we cannot parse.  */
+
+#include 
-- 
2.39.2 (Apple Git-143)



ping: [PATCH] diagnostics: Fix behavior of permerror options after diagnostic pop [PR111918]

2024-03-19 Thread Lewis Hyatt
https://gcc.gnu.org/pipermail/gcc-patches/2023-November/638692.html

Thanks!

On Fri, Feb 16, 2024 at 7:02 PM Lewis Hyatt  wrote:
> https://gcc.gnu.org/pipermail/gcc-patches/2023-November/638692.html
>
> On Thu, Jan 25, 2024 at 4:57 PM Lewis Hyatt  wrote:
> >
> > May I please ask again about this one? It's just a couple lines, and I
> > think it fixes an important gap in the logic for #pragma GCC
> > diagnostic. The PR was not reported by me so I think at least one
> > other person does care about it :). Thanks!
> >
> > https://gcc.gnu.org/pipermail/gcc-patches/2023-November/638692.html
> >
> > -Lewis
> >
> > On Mon, Jan 8, 2024 at 6:53 PM Lewis Hyatt  wrote:
> > >
> > > Can I please ping this one again? It's 3 lines or so to fix the PR. 
> > > Thanks!
> > > https://gcc.gnu.org/pipermail/gcc-patches/2023-November/638692.html
> > >
> > > On Tue, Dec 19, 2023 at 6:20 PM Lewis Hyatt  wrote:
> > > >
> > > > Hello-
> > > >
> > > > May I please ping this one? Thanks...
> > > > https://gcc.gnu.org/pipermail/gcc-patches/2023-November/638692.html
> > > >
> > > > -Lewis
> > > >
> > > > On Wed, Nov 29, 2023 at 7:05 PM Lewis Hyatt  wrote:
> > > > >
> > > > > On Thu, Nov 09, 2023 at 04:16:10PM -0500, Lewis Hyatt wrote:
> > > > > > https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111918
> > > > > >
> > > > > > This patch fixes the behavior of `#pragma GCC diagnostic pop' for 
> > > > > > permissive
> > > > > > error diagnostics such as -Wnarrowing (in C++11). Those currently 
> > > > > > do not
> > > > > > return to the correct state after the last pop; they become 
> > > > > > effectively
> > > > > > simple warnings instead. Bootstrap + regtest all languages on 
> > > > > > x86-64, does
> > > > > > it look OK please? Thanks!
> > > > >
> > > > > Hello-
> > > > >
> > > > > May I please ping this bug fix?
> > > > > https://gcc.gnu.org/pipermail/gcc-patches/2023-November/635871.html
> > > > >
> > > > > Please note, it requires a trivial rebase on top of recent changes to
> > > > > the class diagnostic_context public interface. I attached the rebased 
> > > > > patch
> > > > > here as well. Thanks!
> > > > >
> > > > > -Lewis


[pushed] libstdc++: Sync the atomic_link_flags implementation with GCC.

2024-03-19 Thread Iain Sandoe
This was pre-approved on irc by Jonathan, tested on x86_64-linux,Darwin,
pushed to trunk, thanks,
Iain

--- 8< ---

For Darwin, in order to allow uninstalled testing, we need to provide
a '-B' option pointing to each path containing an uninstalled library
that we are using (these get appended to the embedded runpaths).

This updates the version of the atomic_link_flags proc in the libstdc++
testsuite to do the same as the one in the GCC testsuite.

libstdc++-v3/ChangeLog:

* testsuite/lib/dg-options.exp (atomic_link_flags): Emit a -B
option for the path to the uninstalled libatomic.

Signed-off-by: Iain Sandoe 
---
 libstdc++-v3/testsuite/lib/dg-options.exp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libstdc++-v3/testsuite/lib/dg-options.exp 
b/libstdc++-v3/testsuite/lib/dg-options.exp
index bc387d17ed7..00ca678a53a 100644
--- a/libstdc++-v3/testsuite/lib/dg-options.exp
+++ b/libstdc++-v3/testsuite/lib/dg-options.exp
@@ -314,7 +314,7 @@ proc atomic_link_flags { paths } {
   if { [file exists "${gccpath}/libatomic/.libs/libatomic.a"]
|| [file exists 
"${gccpath}/libatomic/.libs/libatomic.${shlib_ext}"] } {
   append flags " -B${gccpath}/libatomic/ "
-  append flags " -L${gccpath}/libatomic/.libs"
+  append flags " -B${gccpath}/libatomic/.libs"
   append ld_library_path ":${gccpath}/libatomic/.libs"
   }
 } else {
-- 
2.39.2 (Apple Git-143)



Re: Re: [PATCH] RISC-V: Add XiangShan Nanhu microarchitecture.

2024-03-19 Thread jiawei



 -原始邮件-
 发件人: "Jeff Law" 
 发送时间: 2024-03-19 10:54:09 (星期二)
 收件人: Jiawei , gcc-patches@gcc.gnu.org
 抄送: kito.ch...@sifive.com, pal...@dabbelt.com, 
christoph.muell...@vrull.eu, wuwei2...@iscas.ac.cn, shi...@iscas.ac.cn, 
shiyul...@iscas.ac.cn, chenyix...@iscas.ac.cn
 主题: Re: [PATCH] RISC-V: Add XiangShan Nanhu microarchitecture.
 
 
 
 On 2/27/24 1:52 AM, Jiawei wrote:
  From: Chen Jiawei 
  
  Co-Authored by: Lin Jiawei 
  
  This patch add XiangShan Nanhu cpu microarchitecture,
  Nanhu is a 6-issue, superscalar, out-of-order processor.
  More details see: 
https://xiangshan-doc.readthedocs.io/zh-cn/latest/arch
  
  gcc/ChangeLog:
  
   * config/riscv/riscv-cores.def (RISCV_TUNE): New def.
   (RISCV_CORE): Ditto.
   * config/riscv/riscv-opts.h (enum
   * riscv_microarchitecture_type): New option.
   * config/riscv/riscv.cc: New def.
   * config/riscv/riscv.md: New include.
   * config/riscv/xiangshan.md: New file.
  
  gcc/testsuite/ChangeLog:
  
   * gcc.target/riscv/mcpu-xiangshan-nanhu.c: New test.
 As was discussed last Tuesday, this should be safe, even at this late 
 stage in the gcc-14 cycle.
 

  +/* Costs to use when optimizing for xiangshan nanhu.  */
  +static const struct riscv_tune_param xiangshan_nanhu_tune_info = {
  +  {COSTS_N_INSNS (3), COSTS_N_INSNS (3)},/* fp_add */
  +  {COSTS_N_INSNS (3), COSTS_N_INSNS (3)},/* fp_mul */
  +  {COSTS_N_INSNS (10), COSTS_N_INSNS (20)},  /* fp_div */
  +  {COSTS_N_INSNS (3), COSTS_N_INSNS (3)},/* int_mul */
  +  {COSTS_N_INSNS (6), COSTS_N_INSNS (6)},/* int_div */
  +  6, /* issue_rate */
  +  3, /* branch_cost */
  +  3, /* memory_cost */
  +  3, /* fmv_cost */
  +  true,  /* 
slow_unaligned_access */
  +  false, /* use_divmod_expansion 
*/
  +  RISCV_FUSE_ZEXTW | RISCV_FUSE_ZEXTH,  /* fusible_ops */
  +  NULL,  /* vector cost 
*/
 Is your integer division really that fast?  The table above essentially 
 says that your cpu can do integer division in 6 cycles.
 
  +
  +(define_insn_reservation "xiangshan_mul" 3
  +  (and (eq_attr "tune" "xiangshan")
  +   (eq_attr "type" "imul"))
  +  "xs_mdu_rs")
  +
  +(define_insn_reservation "xiangshan_div" 21
  +  (and (eq_attr "tune" "xiangshan")
  +   (eq_attr "type" "idiv"))
  +  "xs_mdu_rs")
 Whereas your pipeline description says it's 21c.
 
 I strongly suspect you want to increase the cost of the int_div in the 
 tuning table.  And with a the higher cost you probably want to turn on 
 use_divmod_expansion.
 
 I'll also note that your scheduler description also indicates your 
 division is fully pipelined.  Is that correct?  if not, you'll want to 
 adjust that reservation.
 
 
 
  +
  +(define_insn_reservation "xiangshan_sfdiv" 11
  +  (and (eq_attr "tune" "xiangshan")
  +   (eq_attr "type" "fdiv")
  +   (eq_attr "mode" "SF"))
  +  "xs_fmisc_rs")
  +
  +(define_insn_reservation "xiangshan_sfsqrt" 17
  +  (and (eq_attr "tune" "xiangshan")
  +   (eq_attr "type" "fsqrt")
  +   (eq_attr "mode" "SF"))
  +  "xs_fmisc_rs")
  +
  +(define_insn_reservation "xiangshan_dfdiv" 21
  +  (and (eq_attr "tune" "xiangshan")
  +   (eq_attr "type" "fdiv")
  +   (eq_attr "mode" "DF"))
  +  "xs_fmisc_rs")
  +
  +(define_insn_reservation "xiangshan_dfsqrt" 37
  +  (and (eq_attr "tune" "xiangshan")
  +   (eq_attr "type" "fsqrt")
  +   (eq_attr "mode" "DF"))
  +  "xs_fmisc_rs")
 Similarly these say your fpdiv and fpsqrt are fully pipelined.  It's 
 certainly possible, but I suspect it's really just an oversight.  Given 
 these values you may also want to adjust the cost of an fp division in 
 the cost table.
 
 
 Finally with such high values for for the div/sqrt units, we find that 
 the DFA "blows up" causing genattrtab to run for a very long time. We'll 
 have to keep an eye on that.
 
 And just to be clear, I think these can be done as a followup patch. I'm 
 going to push this patch as-is rather than make any adjustments -- you 
 almost certainly know the processor's capabilities better than myself or 
 anyone else on this list :-)
 
 
 Jeff

Thank you for the comment, some pipeline processing costs may still need to
 be confirmed, and I will correct them in next patch.

BR,
Jiawei

Re: [PATCH] tree-optimization/114151 - revert PR114074 fix

2024-03-19 Thread Jakub Jelinek
On Tue, Mar 19, 2024 at 01:06:50PM +0100, Richard Biener wrote:
> The following reverts the chrec_fold_multiply fix and only keeps
> handling of constant overflow which keeps the original testcase
> fixed.  A better solution might involve ranger improvements or
> tracking of assumptions during SCEV analysis similar to what niter
> analysis does.
> 
> Bootstrapped and tested on x86_64-unknown-linux-gnu.
> 
> OK for trunk?  Or do you prefer to not fix the INTEGER_CST case
> either?
> 
> Thanks,
> Richard.
> 
>   PR tree-optimization/114151
>   PR tree-optimization/114269
>   PR tree-optimization/114322
>   PR tree-optimization/114074
>   * tree-chrec.cc (chrec_fold_multiply): Restrict the use of
>   unsigned arithmetic when actual overflow on constant operands
>   is observed.
> 
>   * gcc.dg/pr68317.c: Revert last change.

LGTM.

Jakub



[PATCH] tree-optimization/114151 - revert PR114074 fix

2024-03-19 Thread Richard Biener
The following reverts the chrec_fold_multiply fix and only keeps
handling of constant overflow which keeps the original testcase
fixed.  A better solution might involve ranger improvements or
tracking of assumptions during SCEV analysis similar to what niter
analysis does.

Bootstrapped and tested on x86_64-unknown-linux-gnu.

OK for trunk?  Or do you prefer to not fix the INTEGER_CST case
either?

Thanks,
Richard.

PR tree-optimization/114151
PR tree-optimization/114269
PR tree-optimization/114322
PR tree-optimization/114074
* tree-chrec.cc (chrec_fold_multiply): Restrict the use of
unsigned arithmetic when actual overflow on constant operands
is observed.

* gcc.dg/pr68317.c: Revert last change.
---
 gcc/testsuite/gcc.dg/pr68317.c |  4 +--
 gcc/tree-chrec.cc  | 63 +++---
 2 files changed, 30 insertions(+), 37 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/pr68317.c b/gcc/testsuite/gcc.dg/pr68317.c
index 06cd2e1da9c..bd053a7522b 100644
--- a/gcc/testsuite/gcc.dg/pr68317.c
+++ b/gcc/testsuite/gcc.dg/pr68317.c
@@ -12,8 +12,8 @@ foo ()
 {
  int32_t index = 0;
 
- for (index; index <= 10; index--) /* { dg-warning "iteration \[0-9\]+ invokes 
undefined behavior" } */
+ for (index; index <= 10; index--) // expected warning here
/* Result of the following multiply will overflow
   when converted to signed int32_t.  */
-   bar ((0xcafe + index) * 0xdead);
+   bar ((0xcafe + index) * 0xdead);  /* { dg-warning "iteration \[0-9\]+ 
invokes undefined behavior" } */
 }
diff --git a/gcc/tree-chrec.cc b/gcc/tree-chrec.cc
index 1b2ed753551..8b7982a2dbe 100644
--- a/gcc/tree-chrec.cc
+++ b/gcc/tree-chrec.cc
@@ -38,8 +38,6 @@ along with GCC; see the file COPYING3.  If not see
 #include "gimple.h"
 #include "tree-ssa-loop.h"
 #include "dumpfile.h"
-#include "value-range.h"
-#include "value-query.h"
 #include "tree-scalar-evolution.h"
 
 /* Extended folder for chrecs.  */
@@ -475,41 +473,36 @@ chrec_fold_multiply (tree type,
 
  /* When overflow is undefined and CHREC_LEFT/RIGHT do not have the
 same sign or CHREC_LEFT is zero then folding the multiply into
-the addition does not have the same behavior on overflow.  Use
-unsigned arithmetic in that case.  */
- value_range rl, rr;
- if (!ANY_INTEGRAL_TYPE_P (type)
- || TYPE_OVERFLOW_WRAPS (type)
- || integer_zerop (CHREC_LEFT (op0))
- || (TREE_CODE (CHREC_LEFT (op0)) == INTEGER_CST
- && TREE_CODE (CHREC_RIGHT (op0)) == INTEGER_CST
- && (tree_int_cst_sgn (CHREC_LEFT (op0))
- == tree_int_cst_sgn (CHREC_RIGHT (op0
- || (get_range_query (cfun)->range_of_expr (rl, CHREC_LEFT (op0))
- && !rl.undefined_p ()
- && (rl.nonpositive_p () || rl.nonnegative_p ())
- && get_range_query (cfun)->range_of_expr (rr,
-   CHREC_RIGHT (op0))
- && !rr.undefined_p ()
- && ((rl.nonpositive_p () && rr.nonpositive_p ())
- || (rl.nonnegative_p () && rr.nonnegative_p ()
-   {
- tree left = chrec_fold_multiply (type, CHREC_LEFT (op0), op1);
- tree right = chrec_fold_multiply (type, CHREC_RIGHT (op0), op1);
- return build_polynomial_chrec (CHREC_VARIABLE (op0), left, right);
-   }
- else
+the addition does not have the same behavior on overflow.
+Using unsigned arithmetic in that case causes too many performance
+regressions, but catch the constant case where the multiplication
+of the step overflows.  */
+ if (INTEGRAL_TYPE_P (type)
+ && TYPE_OVERFLOW_UNDEFINED (type)
+ && !integer_zerop (CHREC_LEFT (op0))
+ && TREE_CODE (op1) == INTEGER_CST
+ && TREE_CODE (CHREC_RIGHT (op0)) == INTEGER_CST)
{
- tree utype = unsigned_type_for (type);
- tree uop1 = chrec_convert_rhs (utype, op1);
- tree uleft0 = chrec_convert_rhs (utype, CHREC_LEFT (op0));
- tree uright0 = chrec_convert_rhs (utype, CHREC_RIGHT (op0));
- tree left = chrec_fold_multiply (utype, uleft0, uop1);
- tree right = chrec_fold_multiply (utype, uright0, uop1);
- tree tem = build_polynomial_chrec (CHREC_VARIABLE (op0),
-left, right);
- return chrec_convert_rhs (type, tem);
+ wi::overflow_type ovf = wi::OVF_NONE;
+ wide_int res
+   = wi::mul (wi::to_wide (CHREC_RIGHT (op0)),
+  wi::to_wide (op1), TYPE_SIGN (type), );
+ if (ovf != wi::OVF_NONE)
+   {
+ tree utype = unsigned_type_for (type);
+ tree 

Re: [PATCH] system.h: rename vec_step to workaround powerpc/clang bug [PR114369]

2024-03-19 Thread Richard Biener
On Tue, 19 Mar 2024, Jakub Jelinek wrote:

> On Tue, Mar 19, 2024 at 12:54:47PM +0100, Richard Biener wrote:
> > Works for me, but would
> > 
> > #undef vec_step
> >
> > work or is it really a keyword in the clang side?
> 
> No, it is really keyword.
> #undef vec_step
> int
> main ()
> {
>   int vec_step = 0;
>   return vec_step;
> }
> clang --target=powerpc64le-linux -o /tmp/vecstep{,.c}
> /tmp/vecstep.c:5:7: error: expected identifier or '('
>   int vec_step = 0;
>   ^
> /tmp/vecstep.c:6:18: error: expected expression
>   return vec_step;
>  ^
> 2 errors generated.

Ah, OK - thanks for clarifying.

Richard.


Re: [PATCH] system.h: rename vec_step to workaround powerpc/clang bug [PR114369]

2024-03-19 Thread Jakub Jelinek
On Tue, Mar 19, 2024 at 12:54:47PM +0100, Richard Biener wrote:
> Works for me, but would
> 
> #undef vec_step
>
> work or is it really a keyword in the clang side?

No, it is really keyword.
#undef vec_step
int
main ()
{
  int vec_step = 0;
  return vec_step;
}
clang --target=powerpc64le-linux -o /tmp/vecstep{,.c}
/tmp/vecstep.c:5:7: error: expected identifier or '('
  int vec_step = 0;
  ^
/tmp/vecstep.c:6:18: error: expected expression
  return vec_step;
 ^
2 errors generated.

Jakub



Re: [PATCH] system.h: rename vec_step to workaround powerpc/clang bug [PR114369]

2024-03-19 Thread Richard Biener
On Tue, 19 Mar 2024, Jakub Jelinek wrote:

> On Sat, Jul 20, 2019 at 05:26:57PM +0100, Richard Sandiford wrote:
> > Gerald Pfeifer  writes:
> > > I have seen an increasing number of reports of GCC failing to
> > > build with clang on powerpc (on FreeBSD, though that's probably
> > > immaterial).
> > >
> > > Turns out that clang has vec_step as a reserved word on powerpc
> > > with AltiVec.
> > >
> > > We OTOH use vec_step s as a variable name in gcc/tree-vect-loop.c.
> > >
> > >
> > > The best approach I can see is to rename vec_step.  Before I prepare
> > > a patch: what alternate name/spelling would you prefer?
> > 
> > Would it work to #define vec_step to vec_step_ or something on affected
> > hosts, say in system.h?
> > 
> > I'd prefer that to renmaing since "vec_step" does seem the most natural
> > name for the variable.  The equivalent scalar variable is "step" and
> > other vector values in the surrounding code also use the "vec_" prefix.
> 
> So like this?

Works for me, but would

#undef vec_step

work or is it really a keyword in the clang side?

Thanks,
Richard.

> If/when clang finally fixes https://github.com/llvm/llvm-project/issues/85579
> on their side, we can then limit it to clang versions which still have the
> bug.
> 
> I've git grepped for vec_set and appart from altivec.h it is just used in
> tree-vect-loop.cc, some Ada files which aren't preprocessed, ChangeLogs,
> rs6000-vecdefines.h (but that header is only included from altivec.h and
> vec_step is then redefined to the function-like macro) and in 
> rs6000-overload.def
> but that file is processed with a generator, not included in C/C++ sources.
> 
> 2024-03-19  Jakub Jelinek  
> 
>   PR bootstrap/114369
>   * system.h (vec_step): Define to vec_step_ when compiling
>   with clang on PowerPC.
> 
> --- gcc/system.h.jj   2024-03-08 09:07:29.484624793 +0100
> +++ gcc/system.h  2024-03-19 11:39:18.122700551 +0100
> @@ -1302,6 +1302,12 @@ void gcc_stablesort_r (void *, size_t, s
>  #define NULL nullptr
>  #endif
>  
> +/* Workaround clang on PowerPC which has vec_step as reserved keyword
> +   rather than function-like macro defined in .  See PR114369.  */
> +#if defined(__clang__) && defined(__powerpc__)
> +#define vec_step vec_step_
> +#endif
> +
>  /* Return true if STR string starts with PREFIX.  */
>  
>  inline bool
> 
> 
>   Jakub
> 
> 

-- 
Richard Biener 
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)


Re: Frontend access to target features (was Re: [PATCH] libgccjit: Add ability to get CPU features)

2024-03-19 Thread Arthur Cohen

Hi,

On 3/5/24 16:09, David Malcolm wrote:

On Thu, 2023-11-09 at 19:33 -0500, Antoni Boucher wrote:

Hi.
See answers below.

On Thu, 2023-11-09 at 18:04 -0500, David Malcolm wrote:

On Thu, 2023-11-09 at 17:27 -0500, Antoni Boucher wrote:

Hi.
This patch adds support for getting the CPU features in libgccjit
(bug
112466)

There's a TODO in the test:
I'm not sure how to test that gcc_jit_target_info_arch returns
the
correct value since it is dependant on the CPU.
Any idea on how to improve this?

Also, I created a CStringHash to be able to have a
std::unordered_set. Is there any built-in way of
doing
this?


Thanks for the patch.

Some high-level questions:

Is this specifically about detecting capabilities of the host that
libgccjit is currently running on? or how the target was configured
when libgccjit was built?


I'm less sure about this part. I'll need to do more tests.



One of the benefits of libgccjit is that, in theory, we support all
of
the targets that GCC already supports.  Does this patch change
that,
or
is this more about giving client code the ability to determine
capabilities of the specific host being compiled for?


This should not change that. If it does, this is a bug.



I'm nervous about having per-target jit code.  Presumably there's a
reason that we can't reuse existing target logic here - can you
please
describe what the problem is.  I see that the ChangeLog has:


 * config/i386/i386-jit.cc: New file.


where i386-jit.cc has almost 200 lines of nontrivial code.  Where
did
this come from?  Did you base it on existing code in our source
tree,
making modifications to fit the new internal API, or did you write
it
from scratch?  In either case, how onerous would this be for other
targets?


This was mostly copied from the same code done for the Rust and D
frontends.
See this commit and the following:
https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=b1c06fd9723453dd2b2ec306684cb806dc2b4fbb
The equivalent to i386-jit.cc is there:
https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=22e3557e2d52f129f2bbfdc98688b945dba28dc9


[CCing Iain and Arthur re those patches; for reference, the patch being
discussed is attached to :
https://gcc.gnu.org/pipermail/jit/2024q1/001792.html ]

One of my concerns about this patch is that we seem to be gaining code
that's per-(frontend x config) which seems to be copied and pasted with
a search and replace, which could lead to an M*N explosion.


I think this is definitely already the case, and it would be worth 
investigating if C/C++/Rust/jit can reuse a similar set of target files, 
or how to factor them together. I imagine that all of these components 
share similar needs for the targets they support.




Is there any real difference between the per-config code for the
different frontends, or should there be a general "enumerate all
features of the target" hook that's independent of the frontend? (but
perhaps calls into it).

Am I right in thinking that (rustc with default LLVM backend) has some
set of feature strings that both (rustc with rustc_codegen_gcc) and
gccrs are trying to emulate?  If so, is it presumably a goal that
libgccjit gives identical results to gccrs?  If so, would it be crazy
for libgccjit to consume e.g. config/i386/i386-rust.cc ?


I think this would definitely make sense, and it could probably be 
extended to other frontends. For the time being I think it makes sense 
to try it out for gccrs and jit. But finding a fitting name will be hard :)


Best,

Arthur



Dave





I'm not at expert at target hooks (or at the i386 backend), so if
we
do
go with this approach I'd want someone else to review those parts
of
the patch.

Have you verified that GCC builds with this patch with jit *not*
enabled in the enabled languages?


I will do.



[...snip...]

A nitpick:


+.. function:: const char * \
+  gcc_jit_target_info_arch (gcc_jit_target_info
*info)
+
+   Get the architecture of the currently running CPU.


What does this string look like?
How long does the pointer remain valid?


It's the march string, like "znver2", for instance.
It remains valid until we free the gcc_jit_target_info object.



Thanks again; hope the above makes sense
Dave







OpenPGP_0x1B3465B044AD9C65.asc
Description: OpenPGP public key


OpenPGP_signature.asc
Description: OpenPGP digital signature


Re: [PATCH] libstdc++, Darwin: Do not use dev/null as the file for executables.

2024-03-19 Thread Jonathan Wakely
On Tue, 19 Mar 2024 at 10:55, Iain Sandoe wrote:
>
> Note that Windows-based platforms do something quite similar, but always
> use the same (x.exe) filename.  I wonder, in passing, if that makes a
> vulnerability in parallel testing (I chose to avoid the possibility for
> Darwin).

Parallel testing should use a separate directory for each runtest
process, so each x.exe is in a separate directory. That's a
predictable name that could be used by an attacker, but I don't think
it's a problem specific to parallel testing. A single runtest process
has the same behaviour.

> Tested on x86_64-darwin21, x86_64-linux-gnu,
> OK for trunk?
> back-ports where applicable (I did not yet check)?
> thanks
> Iain
>
> --- 8< ---
>
> Darwin has a separate debug linker, which is invoked when the command
> line contains source files and debug is enabled.
>
> Using /dev/null as the executable name does not, therefore, work when
> debug is enabled, since the debug linker does not accept /dev/null as
> a valid executable name.
>
> The leads to incorrectly UNSUPPORTED testcases because of the unintended
> error result from the test compilation.
>
> The solution here is to use a temporary file that is deleted at the
> end of the test (which is the mechanism used elsewhere)
>
> libstdc++-v3/ChangeLog:
>
> * testsuite/lib/libstdc++.exp (v3_target_compile): Instead of
> /dev/null, use a temporary file for test executables on Darwin.
>
> Signed-off-by: Iain Sandoe 
> ---
>  libstdc++-v3/testsuite/lib/libstdc++.exp | 20 +++-
>  1 file changed, 19 insertions(+), 1 deletion(-)
>
> diff --git a/libstdc++-v3/testsuite/lib/libstdc++.exp 
> b/libstdc++-v3/testsuite/lib/libstdc++.exp
> index 58804ecab26..4ae4cecb169 100644
> --- a/libstdc++-v3/testsuite/lib/libstdc++.exp
> +++ b/libstdc++-v3/testsuite/lib/libstdc++.exp
> @@ -615,11 +615,15 @@ proc v3_target_compile { source dest type options } {
> }
>  }
>
> +# For Windows and Darwin we might want to create a temporary file.
> +# note that it needs deleteting.

Capital N for Note, and "deleting".

OK for trunk and branches with that change.


> +set file_to_delete ""
>  # Small adjustment for Windows hosts.
>  if { $dest == "/dev/null"
>   && [info exists ::env(OS)] && [string match "Windows*" $::env(OS)] 
> } {
> if { $type == "executable" } {
> set dest "x.exe"
> +   set file_to_delete ${dest}
> } else {
> # Windows uses special file named "nul" as a substitute for
> # /dev/null
> @@ -627,6 +631,15 @@ proc v3_target_compile { source dest type options } {
> }
>  }
>
> +# Using /dev/null as the executable name does not work on Darwin when
> +# debug is enabled, since the debug linker does not accept /dev/null as
> +# a valid executable name.
> +if { $dest == "/dev/null" && [istarget *-*-darwin*]
> + && $type == "executable" } {
> +   set dest dev-null-[pid].exe
> +   set file_to_delete ${dest}
> +}
> +
>  lappend options "compiler=$cxx_final"
>  lappend options "timeout=[timeout_value]"
>
> @@ -637,7 +650,12 @@ proc v3_target_compile { source dest type options } {
>  }
>
>  set comp_output [target_compile $source $dest $type $options]
> -
> +if { $type == "executable" && $file_to_delete != "" } {
> +   file delete $file_to_delete
> +   if { [istarget *-*-darwin*] && [file exists $file_to_delete.dSYM] } {
> +   file delete -force $file_to_delete.dSYM
> +   }
> +}
>  return $comp_output
>  }
>
> --
> 2.39.2 (Apple Git-143)
>



[PATCH] system.h: rename vec_step to workaround powerpc/clang bug [PR114369]

2024-03-19 Thread Jakub Jelinek
On Sat, Jul 20, 2019 at 05:26:57PM +0100, Richard Sandiford wrote:
> Gerald Pfeifer  writes:
> > I have seen an increasing number of reports of GCC failing to
> > build with clang on powerpc (on FreeBSD, though that's probably
> > immaterial).
> >
> > Turns out that clang has vec_step as a reserved word on powerpc
> > with AltiVec.
> >
> > We OTOH use vec_step s as a variable name in gcc/tree-vect-loop.c.
> >
> >
> > The best approach I can see is to rename vec_step.  Before I prepare
> > a patch: what alternate name/spelling would you prefer?
> 
> Would it work to #define vec_step to vec_step_ or something on affected
> hosts, say in system.h?
> 
> I'd prefer that to renmaing since "vec_step" does seem the most natural
> name for the variable.  The equivalent scalar variable is "step" and
> other vector values in the surrounding code also use the "vec_" prefix.

So like this?

If/when clang finally fixes https://github.com/llvm/llvm-project/issues/85579
on their side, we can then limit it to clang versions which still have the
bug.

I've git grepped for vec_set and appart from altivec.h it is just used in
tree-vect-loop.cc, some Ada files which aren't preprocessed, ChangeLogs,
rs6000-vecdefines.h (but that header is only included from altivec.h and
vec_step is then redefined to the function-like macro) and in 
rs6000-overload.def
but that file is processed with a generator, not included in C/C++ sources.

2024-03-19  Jakub Jelinek  

PR bootstrap/114369
* system.h (vec_step): Define to vec_step_ when compiling
with clang on PowerPC.

--- gcc/system.h.jj 2024-03-08 09:07:29.484624793 +0100
+++ gcc/system.h2024-03-19 11:39:18.122700551 +0100
@@ -1302,6 +1302,12 @@ void gcc_stablesort_r (void *, size_t, s
 #define NULL nullptr
 #endif
 
+/* Workaround clang on PowerPC which has vec_step as reserved keyword
+   rather than function-like macro defined in .  See PR114369.  */
+#if defined(__clang__) && defined(__powerpc__)
+#define vec_step vec_step_
+#endif
+
 /* Return true if STR string starts with PREFIX.  */
 
 inline bool


Jakub



[PATCH] libstdc++, Darwin: Do not use dev/null as the file for executables.

2024-03-19 Thread Iain Sandoe
Note that Windows-based platforms do something quite similar, but always
use the same (x.exe) filename.  I wonder, in passing, if that makes a
vulnerability in parallel testing (I chose to avoid the possibility for
Darwin).

Tested on x86_64-darwin21, x86_64-linux-gnu,
OK for trunk?
back-ports where applicable (I did not yet check)?
thanks
Iain

--- 8< ---

Darwin has a separate debug linker, which is invoked when the command
line contains source files and debug is enabled.

Using /dev/null as the executable name does not, therefore, work when
debug is enabled, since the debug linker does not accept /dev/null as
a valid executable name.

The leads to incorrectly UNSUPPORTED testcases because of the unintended
error result from the test compilation.

The solution here is to use a temporary file that is deleted at the
end of the test (which is the mechanism used elsewhere)

libstdc++-v3/ChangeLog:

* testsuite/lib/libstdc++.exp (v3_target_compile): Instead of
/dev/null, use a temporary file for test executables on Darwin.

Signed-off-by: Iain Sandoe 
---
 libstdc++-v3/testsuite/lib/libstdc++.exp | 20 +++-
 1 file changed, 19 insertions(+), 1 deletion(-)

diff --git a/libstdc++-v3/testsuite/lib/libstdc++.exp 
b/libstdc++-v3/testsuite/lib/libstdc++.exp
index 58804ecab26..4ae4cecb169 100644
--- a/libstdc++-v3/testsuite/lib/libstdc++.exp
+++ b/libstdc++-v3/testsuite/lib/libstdc++.exp
@@ -615,11 +615,15 @@ proc v3_target_compile { source dest type options } {
}
 }
 
+# For Windows and Darwin we might want to create a temporary file.
+# note that it needs deleteting.
+set file_to_delete ""
 # Small adjustment for Windows hosts.
 if { $dest == "/dev/null"
  && [info exists ::env(OS)] && [string match "Windows*" $::env(OS)] } {
if { $type == "executable" } {
set dest "x.exe"
+   set file_to_delete ${dest}
} else {
# Windows uses special file named "nul" as a substitute for
# /dev/null
@@ -627,6 +631,15 @@ proc v3_target_compile { source dest type options } {
}
 }
 
+# Using /dev/null as the executable name does not work on Darwin when
+# debug is enabled, since the debug linker does not accept /dev/null as
+# a valid executable name.
+if { $dest == "/dev/null" && [istarget *-*-darwin*]
+ && $type == "executable" } {
+   set dest dev-null-[pid].exe
+   set file_to_delete ${dest}
+}
+
 lappend options "compiler=$cxx_final"
 lappend options "timeout=[timeout_value]"
 
@@ -637,7 +650,12 @@ proc v3_target_compile { source dest type options } {
 }
 
 set comp_output [target_compile $source $dest $type $options]
-
+if { $type == "executable" && $file_to_delete != "" } {
+   file delete $file_to_delete
+   if { [istarget *-*-darwin*] && [file exists $file_to_delete.dSYM] } {
+   file delete -force $file_to_delete.dSYM
+   }
+}
 return $comp_output
 }
 
-- 
2.39.2 (Apple Git-143)



Re: _LIBCXX_DEBUG value initialized singular iterators assert failures in std algorithms [PR104316]

2024-03-19 Thread Jonathan Wakely
On Mon, 18 Mar 2024 at 21:38, François Dumont wrote:
>
> Both committed now.
>
> Just to confirm, those 2 last patches should be backported to gcc-13
> branch, right ?

Yes please.

>
> I might have a try to update version.h but if you want to do it before
> don't hesitate.

You'll need to have 'autogen' installed to do it (I'm going to update
the docs to describe that today).



[committed] arc: Fix up arc_setup_incoming_varargs [PR114175]

2024-03-19 Thread Jakub Jelinek
Hi!

Like for x86-64, alpha or rs6000, arc seems to be affected too.

Just visually checked differences in c23-stdarg-9.c assembly in a cross
without/with the patch, committed to trunk.

2024-03-19  Jakub Jelinek  

PR target/114175
* config/arc/arc.cc (arc_setup_incoming_varargs): Only skip
arc_function_arg_advance for TYPE_NO_NAMED_ARGS_STDARG_P functions
if arg.type is NULL.

--- gcc/config/arc/arc.cc.jj2024-01-09 13:47:23.156686787 +0100
+++ gcc/config/arc/arc.cc   2024-03-19 09:28:04.107112113 +0100
@@ -2352,7 +2352,8 @@ arc_setup_incoming_varargs (cumulative_a
   /* We must treat `__builtin_va_alist' as an anonymous arg.  */
 
   next_cum = *get_cumulative_args (args_so_far);
-  if (!TYPE_NO_NAMED_ARGS_STDARG_P (TREE_TYPE (current_function_decl)))
+  if (!TYPE_NO_NAMED_ARGS_STDARG_P (TREE_TYPE (current_function_decl))
+  || arg.type != NULL_TREE)
 arc_function_arg_advance (pack_cumulative_args (_cum), arg);
   first_anon_arg = next_cum;
 

Jakub



Pushed: [PATCH v2] LoongArch: Fix C23 (...) functions returning large aggregates [PR114175]

2024-03-19 Thread Xi Ruoyao
On Tue, 2024-03-19 at 11:19 +0800, chenglulu wrote:
> 
> 在 2024/3/18 下午5:34, Xi Ruoyao 写道:
> > We were assuming TYPE_NO_NAMED_ARGS_STDARG_P don't have any named
> > arguments and there is nothing to advance, but that is not the case
> > for (...) functions returning by hidden reference which have one
> > such
> > artificial argument.  This is causing gcc.dg/c23-stdarg-6.c and
> > gcc.dg/c23-stdarg-8.c to fail.
> > 
> > Fix the issue by checking if arg.type is NULL, as r14-9503 explains.
> > 
> > gcc/ChangeLog:
> > 
> > PR target/114175
> > * config/loongarch/loongarch.cc
> > (loongarch_setup_incoming_varargs): Only skip
> > loongarch_function_arg_advance for
> > TYPE_NO_NAMED_ARGS_STDARG_P
> > functions if arg.type is NULL.
> > ---
> > 
> > Bootstrapped and regtested on loongarch64-linux-gnu.  Ok for trunk?
> > 
> >   gcc/config/loongarch/loongarch.cc | 3 ++-
> >   1 file changed, 2 insertions(+), 1 deletion(-)
> > 
> > diff --git a/gcc/config/loongarch/loongarch.cc
> > b/gcc/config/loongarch/loongarch.cc
> > index 70e31bb831c..57de8ef7d20 100644
> > --- a/gcc/config/loongarch/loongarch.cc
> > +++ b/gcc/config/loongarch/loongarch.cc
> > @@ -767,7 +767,8 @@ loongarch_setup_incoming_varargs
> > (cumulative_args_t cum,
> >    argument.  Advance a local copy of CUM past the last "real"
> > named
> >    argument, to find out how many registers are left over.  */
> >     local_cum = *get_cumulative_args (cum);
> I think it's important to add annotation information here:
>  /* where there is no hidden return argument passed, arg.type
> 
>   is always NULL.  */
> 
> Others LTGM.
> 
> Thanks!

Pushed v2 with a comment added as attached.

-- 
Xi Ruoyao 
School of Aerospace Science and Technology, Xidian University
From c1fd4589c2bf9fd8409d51b94df219cb75107762 Mon Sep 17 00:00:00 2001
From: Xi Ruoyao 
Date: Mon, 18 Mar 2024 17:18:34 +0800
Subject: [PATCH v2] LoongArch: Fix C23 (...) functions returning large
 aggregates [PR114175]

We were assuming TYPE_NO_NAMED_ARGS_STDARG_P don't have any named
arguments and there is nothing to advance, but that is not the case
for (...) functions returning by hidden reference which have one such
artificial argument.  This is causing gcc.dg/c23-stdarg-6.c and
gcc.dg/c23-stdarg-8.c to fail.

Fix the issue by checking if arg.type is NULL, as r14-9503 explains.

gcc/ChangeLog:

	PR target/114175
	* config/loongarch/loongarch.cc
	(loongarch_setup_incoming_varargs): Only skip
	loongarch_function_arg_advance for TYPE_NO_NAMED_ARGS_STDARG_P
	functions if arg.type is NULL.
---
 gcc/config/loongarch/loongarch.cc | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
index 70e31bb831c..5344f2a6987 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -767,7 +767,13 @@ loongarch_setup_incoming_varargs (cumulative_args_t cum,
  argument.  Advance a local copy of CUM past the last "real" named
  argument, to find out how many registers are left over.  */
   local_cum = *get_cumulative_args (cum);
-  if (!TYPE_NO_NAMED_ARGS_STDARG_P (TREE_TYPE (current_function_decl)))
+
+  /* For a C23 variadic function w/o any named argument, and w/o an
+ artifical argument for large return value, skip advancing args.
+ There is such an artifical argument iff. arg.type is non-NULL
+ (PR 114175).  */
+  if (!TYPE_NO_NAMED_ARGS_STDARG_P (TREE_TYPE (current_function_decl))
+  || arg.type != NULL_TREE)
 loongarch_function_arg_advance (pack_cumulative_args (_cum), arg);
 
   /* Found out how many registers we need to save.  */
-- 
2.44.0



[committed] openmp: Make c_omp_check_loop_binding_exprs diagnostics translatable [PR114364]

2024-03-19 Thread Jakub Jelinek
Hi!

c_omp_check_loop_binding_exprs with check_loop_binding_expr was composing
diagnostics from a format string with %s that provided additional words (but not
keywords).  That is a big no no for translations, both because the translator
can't choose a different word order and because the %s part wasn't translated
at all (would need to use _("...") to get translated), so this patch rewrites it
such that the whole messages are in the format strings.

Bootstrapped/regtested on x86_64-linux and i686-linux, committed to trunk.

2024-03-19  Jakub Jelinek  

PR c/114364
* c-omp.cc (enum check_loop_binding_expr_ctx): New type.
(check_loop_binding_expr): Remove context argument, add ctx
argument with check_loop_binding_expr_ctx type at the end.  Don't
create diagnostic message from multiple pieces.
(c_omp_check_loop_binding_exprs): Adjust callers.

--- gcc/c-family/c-omp.cc.jj2024-01-09 13:47:23.132687124 +0100
+++ gcc/c-family/c-omp.cc   2024-03-16 16:51:54.158702954 +0100
@@ -1793,22 +1793,46 @@ check_loop_binding_expr_r (tree *tp, int
 #define LOCATION_OR(loc1, loc2) \
   ((loc1) != UNKNOWN_LOCATION ? (loc1) : (loc2))
 
+enum check_loop_binding_expr_ctx {
+  CHECK_LOOP_BINDING_EXPR_CTX_LOOP_VAR,
+  CHECK_LOOP_BINDING_EXPR_CTX_IN_INIT,
+  CHECK_LOOP_BINDING_EXPR_CTX_END_TEST,
+  CHECK_LOOP_BINDING_EXPR_CTX_INCR
+};
+
 /* Check a single expression EXPR for references to variables bound in
intervening code in BODY.  Return true if ok, otherwise give an error
referencing CONTEXT and return false.  Use LOC for the error message
if EXPR doesn't have one.  */
 static bool
-check_loop_binding_expr (tree expr, tree body, const char *context,
-location_t loc)
+check_loop_binding_expr (tree expr, tree body, location_t loc,
+check_loop_binding_expr_ctx ctx)
 {
   tree bad = walk_tree (, check_loop_binding_expr_r, (void *), NULL);
 
   if (bad)
 {
   location_t eloc = EXPR_LOCATION (expr);
-  error_at (LOCATION_OR (eloc, loc),
-   "variable %qD used %s is bound "
-   "in intervening code", bad, context);
+  eloc = LOCATION_OR (eloc, loc);
+  switch (ctx)
+   {
+   case CHECK_LOOP_BINDING_EXPR_CTX_LOOP_VAR:
+ error_at (eloc, "variable %qD used as loop variable is bound "
+   "in intervening code", bad);
+ break;
+   case CHECK_LOOP_BINDING_EXPR_CTX_IN_INIT:
+ error_at (eloc, "variable %qD used in initializer is bound "
+   "in intervening code", bad);
+ break;
+   case CHECK_LOOP_BINDING_EXPR_CTX_END_TEST:
+ error_at (eloc, "variable %qD used in end test is bound "
+   "in intervening code", bad);
+ break;
+   case CHECK_LOOP_BINDING_EXPR_CTX_INCR:
+ error_at (eloc, "variable %qD used in increment expression is bound "
+   "in intervening code", bad);
+ break;
+   }
   return false;
 }
   return true;
@@ -1839,13 +1863,15 @@ c_omp_check_loop_binding_exprs (tree stm
 
   e = TREE_OPERAND (init, 1);
   eloc = LOCATION_OR (EXPR_LOCATION (init), loc);
-  if (!check_loop_binding_expr (decl, body, "as loop variable", eloc))
+  if (!check_loop_binding_expr (decl, body, eloc,
+   CHECK_LOOP_BINDING_EXPR_CTX_LOOP_VAR))
ok = false;
-  if (!check_loop_binding_expr (e, body, "in initializer", eloc))
+  if (!check_loop_binding_expr (e, body, eloc,
+   CHECK_LOOP_BINDING_EXPR_CTX_IN_INIT))
ok = false;
   if (orig_init
- && !check_loop_binding_expr (orig_init, body,
-  "in initializer", eloc))
+ && !check_loop_binding_expr (orig_init, body, eloc,
+  CHECK_LOOP_BINDING_EXPR_CTX_IN_INIT))
ok = false;
 
   /* INCR and/or COND may be null if this is a template with a
@@ -1859,7 +1885,8 @@ c_omp_check_loop_binding_exprs (tree stm
e = TREE_OPERAND (cond, 0);
  else
e = cond;
- if (!check_loop_binding_expr (e, body, "in end test", eloc))
+ if (!check_loop_binding_expr (e, body, eloc,
+   CHECK_LOOP_BINDING_EXPR_CTX_END_TEST))
ok = false;
}
 
@@ -1870,8 +1897,8 @@ c_omp_check_loop_binding_exprs (tree stm
 increment/decrement.  We don't have to check the latter
 since there are no operands besides the iteration variable.  */
  if (TREE_CODE (incr) == MODIFY_EXPR
- && !check_loop_binding_expr (TREE_OPERAND (incr, 1), body,
-  "in increment expression", eloc))
+ && !check_loop_binding_expr (TREE_OPERAND (incr, 1), body, eloc,
+  CHECK_LOOP_BINDING_EXPR_CTX_INCR))
ok = false;
}

Re: [PATCH v2 00/13] Add aarch64-w64-mingw32 target

2024-03-19 Thread Martin Storsjö

On Mon, 18 Mar 2024, Fangrui Song wrote:


LLVM has had an aarch64 mingw ABI support for a long time. Does this
patch series introduce a different ABI?
If yes, do you have a summary?


This patchset in itself does not reach ABI compatibility with the 
preexisting aarch64 mingw ecosystem - but this is also just the first 
patchset to lay out the groundwork for a new mingw target within GCC.


As far as I've understood, the divergence is not intended, and they are 
working on converging towards compatibility - but such bits are to be 
handled in later patchsets.


Off the top of my head, the major missing pieces wrt compatbility are the 
variadic calling convention, and using 64 bit doubles for "long double" 
(just like for Darwin).



Does the patch need any adaptation on the LLVM side, or should a
different target triple be picked?


No, I don't think a different target triple should be used - it is 
intended to be the same, but compatibility is a work in progress.



I have always been wondering what "32" in "x86_x64-w64-mingw32" means.


(Nit, I presume you meant "x86_64-w64-mingw32".)

As Andrew replied, w32 stands for win32, which isn't so much about the 
bitness as "the current windows API as of the last 30 years, as opposed to 
win16". So it's more of a name than something indicating any form of 
bitness.


Likewise, "w64" just indicates which vendor/fork is involved, so we also 
have i686-w64-mingw32 and armv7-w64-mingw32, for the 32 bit ABIs with an 
SDK from the same vendor.



https://github.com/llvm/llvm-project/pull/78908 even introduced the
first use of the triple "arm64ec-w64-mingw32" into llvm-project.


ARM64EC is a totally different thing though; that's an entirely separate 
ABI on almost every single level.


// Martin



Re: [PATCH] rs6000: Fix up setup_incoming_varargs [PR114175]

2024-03-19 Thread Kewen.Lin
Hi Jakub,

on 2024/3/19 01:21, Jakub Jelinek wrote:
> Hi!
> 
> The c23-stdarg-8.c test (as well as the new test below added to cover even
> more cases) FAIL on powerpc64le-linux and presumably other powerpc* targets
> as well.
> Like in the r14-9503-g218d174961 change on x86-64 we need to advance
> next_cum after the hidden return pointer argument even in case where
> there are no user arguments before ... in C23.
> The following patch does that.
> 
> There is another TYPE_NO_NAMED_ARGS_STDARG_P use later on:
>   if (!TYPE_NO_NAMED_ARGS_STDARG_P (TREE_TYPE (current_function_decl))
>   && targetm.calls.must_pass_in_stack (arg))
> first_reg_offset += rs6000_arg_size (TYPE_MODE (arg.type), arg.type);
> but I believe it was added there in r13-3549-g4fe34cdc unnecessarily,
> when there is no hidden return pointer argument, arg.type is NULL and
> must_pass_in_stack_var_size as well as must_pass_in_stack_var_size_or_pad
> return false in that case, and for the TYPE_NO_NAMED_ARGS_STDARG_P
> case with hidden return pointer argument that argument should have pointer
> type and it is the first argument, so must_pass_in_stack shouldn't be true
> for it either.
> 
> Bootstrapped/regtested on powerpc64le-linux, bootstrap/regtest on
> powerpc64-linux running, ok for trunk?

Okay for trunk (I guess all testings should go well), thanks for taking
care of this!

FWIW, I also tested c23-stdarg-* test cases on aix with this patch, all
of them worked well.

BR,
Kewen

> 
> 2024-03-18  Jakub Jelinek  
> 
>   PR target/114175
>   * config/rs6000/rs6000-call.cc (setup_incoming_varargs): Only skip
>   rs6000_function_arg_advance_1 for TYPE_NO_NAMED_ARGS_STDARG_P functions
>   if arg.type is NULL.
> 
>   * gcc.dg/c23-stdarg-9.c: New test.
> 
> --- gcc/config/rs6000/rs6000-call.cc.jj   2024-01-03 12:01:19.645532834 
> +0100
> +++ gcc/config/rs6000/rs6000-call.cc  2024-03-18 11:36:02.376846802 +0100
> @@ -2253,7 +2253,8 @@ setup_incoming_varargs (cumulative_args_
>  
>/* Skip the last named argument.  */
>next_cum = *get_cumulative_args (cum);
> -  if (!TYPE_NO_NAMED_ARGS_STDARG_P (TREE_TYPE (current_function_decl)))
> +  if (!TYPE_NO_NAMED_ARGS_STDARG_P (TREE_TYPE (current_function_decl))
> +  || arg.type != NULL_TREE)
>  rs6000_function_arg_advance_1 (_cum, arg.mode, arg.type, arg.named,
>  0);
>  
> --- gcc/testsuite/gcc.dg/c23-stdarg-9.c.jj2024-03-18 11:46:17.281200214 
> +0100
> +++ gcc/testsuite/gcc.dg/c23-stdarg-9.c   2024-03-18 11:46:26.826065998 
> +0100
> @@ -0,0 +1,284 @@
> +/* Test C23 variadic functions with no named parameters, or last named
> +   parameter with a declaration not allowed in C17.  Execution tests.  */
> +/* { dg-do run } */
> +/* { dg-options "-O2 -std=c23 -pedantic-errors" } */
> +
> +#include 
> +
> +struct S { int a[1024]; };
> +
> +int
> +f1 (...)
> +{
> +  int r = 0;
> +  va_list ap;
> +  va_start (ap);
> +  r += va_arg (ap, int);
> +  va_end (ap);
> +  return r;
> +}
> +
> +int
> +f2 (...)
> +{
> +  int r = 0;
> +  va_list ap;
> +  va_start (ap);
> +  r += va_arg (ap, int);
> +  r += va_arg (ap, int);
> +  va_end (ap);
> +  return r;
> +}
> +
> +int
> +f3 (...)
> +{
> +  int r = 0;
> +  va_list ap;
> +  va_start (ap);
> +  r += va_arg (ap, int);
> +  r += va_arg (ap, int);
> +  r += va_arg (ap, int);
> +  va_end (ap);
> +  return r;
> +}
> +
> +int
> +f4 (...)
> +{
> +  int r = 0;
> +  va_list ap;
> +  va_start (ap);
> +  r += va_arg (ap, int);
> +  r += va_arg (ap, int);
> +  r += va_arg (ap, int);
> +  r += va_arg (ap, int);
> +  va_end (ap);
> +  return r;
> +}
> +
> +int
> +f5 (...)
> +{
> +  int r = 0;
> +  va_list ap;
> +  va_start (ap);
> +  r += va_arg (ap, int);
> +  r += va_arg (ap, int);
> +  r += va_arg (ap, int);
> +  r += va_arg (ap, int);
> +  r += va_arg (ap, int);
> +  va_end (ap);
> +  return r;
> +}
> +
> +int
> +f6 (...)
> +{
> +  int r = 0;
> +  va_list ap;
> +  va_start (ap);
> +  r += va_arg (ap, int);
> +  r += va_arg (ap, int);
> +  r += va_arg (ap, int);
> +  r += va_arg (ap, int);
> +  r += va_arg (ap, int);
> +  r += va_arg (ap, int);
> +  va_end (ap);
> +  return r;
> +}
> +
> +int
> +f7 (...)
> +{
> +  int r = 0;
> +  va_list ap;
> +  va_start (ap);
> +  r += va_arg (ap, int);
> +  r += va_arg (ap, int);
> +  r += va_arg (ap, int);
> +  r += va_arg (ap, int);
> +  r += va_arg (ap, int);
> +  r += va_arg (ap, int);
> +  r += va_arg (ap, int);
> +  va_end (ap);
> +  return r;
> +}
> +
> +int
> +f8 (...)
> +{
> +  int r = 0;
> +  va_list ap;
> +  va_start (ap);
> +  r += va_arg (ap, int);
> +  r += va_arg (ap, int);
> +  r += va_arg (ap, int);
> +  r += va_arg (ap, int);
> +  r += va_arg (ap, int);
> +  r += va_arg (ap, int);
> +  r += va_arg (ap, int);
> +  r += va_arg (ap, int);
> +  va_end (ap);
> +  return r;
> +}
> +
> +struct S
> +s1 (...)
> +{
> +  int r = 0;
> +  va_list ap;
> +  va_start (ap);
> +  r += va_arg (ap, int);
> +  va_end (ap);
> +  struct S s = {};
> + 

Re: [gcc-15 2/3] RISC-V: avoid LUI based const mat: keep stack offsets aligned

2024-03-19 Thread Andrew Waterman
On Mon, Mar 18, 2024 at 5:28 PM Vineet Gupta  wrote:
>
>
>
> On 3/16/24 13:21, Jeff Law wrote:
> > |   59944:add s0,sp,2047  <
> > |   59948:mv  a2,a0
> > |   5994c:mv  a3,a1
> > |   59950:mv  a0,sp
> > |   59954:li  a4,1
> > |   59958:lui a1,0x1
> > |   5995c:add s0,s0,1 <---
> > |   59960:jal 59a3c
> >
> > SP here becomes unaligned, even if transitively which is undesirable as
> > well as incorrect:
> >   - ABI requires stack to be 8 byte aligned
> >   - asm code looks weird and unexpected
> >   - to the user it might falsely seem like a compiler bug even when not,
> > specially when staring at asm for debugging unrelated issue.
> > It's not ideal, but I think it's still ABI compliant as-is.  If it
> > wasn't, then I suspect things like virtual origins in Ada couldn't be
> > made ABI compliant.
>
> To be clear are u suggesting ADD sp, sp, 2047 is ABI compliant ?
> I'd still like to avoid it as I'm sure someone will complain about it.
>
> >> With the patch, we get following correct code instead:
> >>
> >> | ..
> >> | 59944: add s0,sp,2032
> >> | ..
> >> | 5995c: add s0,s0,16
> > Alternately you could tighten the positive side of the range of the
> > splitter from patch 1/3 so that you could always use 2032 rather than
> > 2047 on the first addi.   ie instead of allowing 2048..4094, allow
> > 2048..4064.
>
> 2033..4064 vs. 2048..4094
>
> Yeah I was a bit split about this as well. Since you are OK with either,
> I'll keep them as-is and perhaps add this observation to commitlog.

There's a subset of embedded use cases where an interrupt service
routine continues on the same stack as the interrupted thread,
requiring sp to always have an ABI-compliant value (i.e. 16B aligned,
and with no important data on the stack at an address below sp).

Although not all use cases care about this property, it seems more
straightforward to maintain the invariant everywhere, rather than
selectively enforce it.

>
> Thx,
> -Vineet