Re: [PATCH v2] LoongArch: Split loongarch_option_override_internal into smaller procedures

2024-02-21 Thread chenglulu

Hi,yujie:

When using this patch to compile test cases, ICE will be reported.


 test.c

 float
foo(float a, float b)
{
  return a / b;
}

# ./gcc/cc1 test.c -o - -O2 -ffast-math -mrecip

recip.c: 在函数‘foo’中:
recip.c:5:1: 错误:无法识别的指令:
    5 | }
  | ^
(insn 9 8 10 2 (set (reg:SF 84)
    (unspec:SF [
    (reg/v:SF 82 [ b ])
    ] UNSPEC_RECIPE)) "recip.c":4:12 -1
 (nil))
during RTL pass: vregs
recip.c:5:1: 编译器内部错误:在 extract_insn 中,于 recog.cc:2812
0x135d1d4 _fatal_insn(char const*, rtx_def const*, char const*, int, 
char const*)

/home/chenglulu/work/loongisa-toolchain/gcc-upstream/gcc/rtl-error.cc:108
0x135d215 _fatal_insn_not_found(rtx_def const*, char const*, int, char 
const*)

/home/chenglulu/work/loongisa-toolchain/gcc-upstream/gcc/rtl-error.cc:116
0x13111b6 extract_insn(rtx_insn*)
/home/chenglulu/work/loongisa-toolchain/gcc-upstream/gcc/recog.cc:2812
0xf84e72 instantiate_virtual_regs_in_insn
/home/chenglulu/work/loongisa-toolchain/gcc-upstream/gcc/function.cc:1611
0xf85e90 instantiate_virtual_regs
/home/chenglulu/work/loongisa-toolchain/gcc-upstream/gcc/function.cc:1994
0xf85f56 execute
/home/chenglulu/work/loongisa-toolchain/gcc-upstream/gcc/function.cc:2041
Please submit a full bug report, with preprocessed source (by using 
-freport-bug).

Please include the complete backtrace with any bug report.
参阅  以获取指示。


在 2024/2/21 上午11:36, Yang Yujie 写道:

gcc/ChangeLog:

* config/loongarch/genopts/loongarch.opt.in: Mark -m[no-]recip as
aliases to -mrecip={all,none}.
* config/loongarch/loongarch.opt: Same.
* config/loongarch/loongarch-def.h: Modify ABI condition macros for
convenience.
* config/loongarch/loongarch-opts.cc: Define option-handling
procedures split from the original loongarch_option_override_internal.
* config/loongarch/loongarch-opts.h: Same.
* config/loongarch/loongarch.cc: Clean up
loongarch_option_override_internal.
---
  gcc/config/loongarch/genopts/loongarch.opt.in |   8 +-
  gcc/config/loongarch/loongarch-def.h  |  11 +-
  gcc/config/loongarch/loongarch-opts.cc| 248 +
  gcc/config/loongarch/loongarch-opts.h |  27 +-
  gcc/config/loongarch/loongarch.cc | 253 +++---
  gcc/config/loongarch/loongarch.opt|   8 +-
  6 files changed, 325 insertions(+), 230 deletions(-)

diff --git a/gcc/config/loongarch/genopts/loongarch.opt.in 
b/gcc/config/loongarch/genopts/loongarch.opt.in
index 02f918053f5..a77893d31d9 100644
--- a/gcc/config/loongarch/genopts/loongarch.opt.in
+++ b/gcc/config/loongarch/genopts/loongarch.opt.in
@@ -197,14 +197,14 @@ mexplicit-relocs
  Target Alias(mexplicit-relocs=, always, none)
  Use %reloc() assembly operators (for backward compatibility).
  
-mrecip

-Target RejectNegative Var(la_recip) Save
-Generate approximate reciprocal divide and square root for better throughput.
-
  mrecip=
  Target RejectNegative Joined Var(la_recip_name) Save
  Control generation of reciprocal estimates.
  
+mrecip

+Target Alias(mrecip=, all, none)
+Generate approximate reciprocal divide and square root for better throughput.
+
  ; The code model option names for -mcmodel.
  Enum
  Name(cmodel) Type(int)
diff --git a/gcc/config/loongarch/loongarch-def.h 
b/gcc/config/loongarch/loongarch-def.h
index 2dbf006d013..0cbf9476690 100644
--- a/gcc/config/loongarch/loongarch-def.h
+++ b/gcc/config/loongarch/loongarch-def.h
@@ -90,11 +90,16 @@ extern loongarch_def_array
  
  #define TO_LP64_ABI_BASE(C) (C)
  
-#define ABI_FPU_64(abi_base) \

+#define ABI_LP64_P(abi_base) \
+  (abi_base == ABI_BASE_LP64D \
+   || abi_base == ABI_BASE_LP64F \
+   || abi_base == ABI_BASE_LP64S)
+
+#define ABI_FPU64_P(abi_base) \
(abi_base == ABI_BASE_LP64D)
-#define ABI_FPU_32(abi_base) \
+#define ABI_FPU32_P(abi_base) \
(abi_base == ABI_BASE_LP64F)
-#define ABI_FPU_NONE(abi_base) \
+#define ABI_NOFPU_P(abi_base) \
(abi_base == ABI_BASE_LP64S)
  
  
diff --git a/gcc/config/loongarch/loongarch-opts.cc b/gcc/config/loongarch/loongarch-opts.cc

index 7eeac43ed2f..380208f38bf 100644
--- a/gcc/config/loongarch/loongarch-opts.cc
+++ b/gcc/config/loongarch/loongarch-opts.cc
@@ -25,6 +25,7 @@ along with GCC; see the file COPYING3.  If not see
  #include "coretypes.h"
  #include "tm.h"
  #include "obstack.h"
+#include "opts.h"
  #include "diagnostic-core.h"
  
  #include "loongarch-cpu.h"

@@ -32,8 +33,12 @@ along with GCC; see the file COPYING3.  If not see
  #include "loongarch-str.h"
  #include "loongarch-def.h"
  
+/* Target configuration */

  struct loongarch_target la_target;
  
+/* RTL cost information */

+const struct loongarch_rtx_cost_data *loongarch_cost;
+
  /* ABI-related configuration.  */
  #define ABI_COUNT (sizeof(abi_priority_list)/sizeof(struct loongarch_abi))
  static const struct loongarch_abi
@@ -795,3 +800,246 @@ loongarch_update_gcc_opt_status (struct loongarch_target 
*target,
/* 

[PATCH][_GLIBCXX_INLINE_VERSION] Adapt dg-error messages

2024-02-21 Thread François Dumont
For those using my patch to build in gnu-versioned-namespace mode it 
would be preferable not to have any failures when running testsuite.


    libstdc++: [_GLIBCXX_INLINE_VERSION] Adapt dg-error message

    libstdc++-v3/ChangeLog:

    * testsuite/20_util/function_objects/bind_back/111327.cc: 
Adapt dg-error message

    for __8 namespace.
    * testsuite/20_util/function_objects/bind_front/111327.cc: 
Likewise.


Ok to commit ?

François
diff --git 
a/libstdc++-v3/testsuite/20_util/function_objects/bind_back/111327.cc 
b/libstdc++-v3/testsuite/20_util/function_objects/bind_back/111327.cc
index d634db9dc1d..f8a65127ccf 100644
--- a/libstdc++-v3/testsuite/20_util/function_objects/bind_back/111327.cc
+++ b/libstdc++-v3/testsuite/20_util/function_objects/bind_back/111327.cc
@@ -39,4 +39,4 @@ int main() {
   std::move(std::as_const(g1))();
 }
 
-// { dg-error "no type named 'type' in 'struct std::invoke_result" "" { target 
c++23 } 0 }
+// { dg-error "no type named 'type' in 'struct std::(__8::)?invoke_result" "" 
{ target c++23 } 0 }
diff --git 
a/libstdc++-v3/testsuite/20_util/function_objects/bind_front/111327.cc 
b/libstdc++-v3/testsuite/20_util/function_objects/bind_front/111327.cc
index 5fe0a83baec..896492b3d74 100644
--- a/libstdc++-v3/testsuite/20_util/function_objects/bind_front/111327.cc
+++ b/libstdc++-v3/testsuite/20_util/function_objects/bind_front/111327.cc
@@ -39,4 +39,4 @@ int main() {
   std::move(std::as_const(g1))();
 }
 
-// { dg-error "no type named 'type' in 'struct std::invoke_result" "" { target 
c++23 } 0 }
+// { dg-error "no type named 'type' in 'struct std::(__8::)?invoke_result" "" 
{ target c++23 } 0 }


[commit] invoke.texi: Fix some skipping UrlSuffix problem for MIPS

2024-02-21 Thread YunQiang Su
The problem is that, there are these lines in mips.opt.urls:
  ; skipping UrlSuffix for 'mabi=' due to finding no URLs
  ; skipping UrlSuffix for 'mno-flush-func' due to finding no URLs
  ; skipping UrlSuffix for 'mexplicit-relocs' due to finding no URLs

These lines is not fixed by this patch due to that we don't
document these options:
  ; skipping UrlSuffix for 'mlra' due to finding no URLs
  ; skipping UrlSuffix for 'mdebug' due to finding no URLs
  ; skipping UrlSuffix for 'meb' due to finding no URLs
  ; skipping UrlSuffix for 'mel' due to finding no URLs

gcc
* doc/invoke.texi(MIPS Options): Fix skipping UrlSuffix
problem of mabi=, mno-flush-func, mexplicit-relocs;
add missing leading - of mbranch-cost option.
* config/mips/mips.opt.urls: Regenerate.
---
 gcc/config/mips/mips.opt.urls | 12 ++--
 gcc/doc/invoke.texi   | 14 +-
 2 files changed, 15 insertions(+), 11 deletions(-)

diff --git a/gcc/config/mips/mips.opt.urls b/gcc/config/mips/mips.opt.urls
index ff2f0aee0e3..96aba041026 100644
--- a/gcc/config/mips/mips.opt.urls
+++ b/gcc/config/mips/mips.opt.urls
@@ -6,7 +6,8 @@ UrlSuffix(gcc/MIPS-Options.html#index-EB-2)
 EL
 UrlSuffix(gcc/MIPS-Options.html#index-EL-2)
 
-; skipping UrlSuffix for 'mabi=' due to finding no URLs
+mabi=
+UrlSuffix(gcc/MIPS-Options.html#index-mabi-3)
 
 mabicalls
 UrlSuffix(gcc/MIPS-Options.html#index-mabicalls)
@@ -65,9 +66,15 @@ UrlSuffix(gcc/MIPS-Options.html#index-membedded-data)
 meva
 UrlSuffix(gcc/MIPS-Options.html#index-meva)
 
+mexplicit-relocs=
+UrlSuffix(gcc/MIPS-Options.html#index-mexplicit-relocs-2)
+
 mexplicit-relocs
 UrlSuffix(gcc/MIPS-Options.html#index-mexplicit-relocs-2)
 
+mno-explicit-relocs
+UrlSuffix(gcc/MIPS-Options.html#index-mno-explicit-relocs-2)
+
 mextern-sdata
 UrlSuffix(gcc/MIPS-Options.html#index-mextern-sdata)
 
@@ -173,7 +180,8 @@ UrlSuffix(gcc/MIPS-Options.html#index-mno-float)
 mmcu
 UrlSuffix(gcc/MIPS-Options.html#index-mmcu-1)
 
-; skipping UrlSuffix for 'mno-flush-func' due to finding no URLs
+mno-flush-func
+UrlSuffix(gcc/MIPS-Options.html#index-mno-flush-func-1)
 
 mno-mdmx
 UrlSuffix(gcc/MIPS-Options.html#index-mno-mdmx)
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 8219a6a5947..58527e1ea3c 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -27995,11 +27995,7 @@ Aliases of @option{-minterlink-compressed} and
 @option{-mno-interlink-compressed}.  These options predate the microMIPS ASE
 and are retained for backwards compatibility.
 
-@opindex mabi=32
-@opindex mabi=o64
-@opindex mabi=n32
-@opindex mabi=64
-@opindex mabi=eabi
+@opindex mabi
 @item -mabi=32
 @itemx -mabi=o64
 @itemx -mabi=n32
@@ -28486,9 +28482,8 @@ Enable (disable) use of the @code{%hi()} and 
@code{%lo()} assembler
 relocation operators.  This option has been superseded by
 @option{-mexplicit-relocs} but is retained for backwards compatibility.
 
-@opindex mexplicit-relocs=none
-@opindex mexplicit-relocs=base
-@opindex mexplicit-relocs=pcrel
+@opindex mexplicit-relocs
+@opindex mno-explicit-relocs
 @item -mexplicit-relocs=none
 @itemx -mexplicit-relocs=base
 @itemx -mexplicit-relocs=pcrel
@@ -28767,6 +28762,7 @@ Disable the insertion of cache barriers.  This is the 
default setting.
 @end table
 
 @opindex mflush-func
+@opindex mno-flush-func
 @item -mflush-func=@var{func}
 @itemx -mno-flush-func
 Specifies the function to call to flush the I and D caches, or to not
@@ -28778,7 +28774,7 @@ depends on the target GCC was configured for, but 
commonly is either
 @code{_flush_func} or @code{__cpu_flush}.
 
 @opindex mbranch-cost
-@item mbranch-cost=@var{num}
+@item -mbranch-cost=@var{num}
 Set the cost of branches to roughly @var{num} ``simple'' instructions.
 This cost is only a heuristic and is not guaranteed to produce
 consistent results across releases.  A zero cost redundantly selects
-- 
2.39.2



Re: CI for "Option handling: add documentation URLs"

2024-02-21 Thread YunQiang Su
Mark Wielaard  于2024年2月19日周一 06:58写道:
>
> Hi David,
>
> On Thu, Jan 04, 2024 at 09:57:09AM -0500, David Malcolm wrote:
> > I've pushed the .opt.urls patch kit to gcc trunk [1], so hopefully the
> > CI check you wrote can go live now.
>
> And then I was on vacation myself and forgot. I am sorry.
>
> So, I did try the regenerate-opt-urls locally, and it did generate the
> attached diff. Which seems to show we really need this automated.
>
> Going over the diff. The -Winfinite-recursion in rust does indeed seem
> new.  As do the -mapx-inline-asm-use-gpr32 and mevex512 for i386.  And
> the avr options -mskip-bug, -mflmap and mrodata-in-ram.  The change in
> common.opt.urls for -Wuse-after-free comes from it being moved from
> c++ to the c-family. The changes in mips.opt.urls seem to come from
> commit 46df1369 "doc/invoke: Remove duplicate explicit-relocs entry of
> MIPS".
>

For MIPS, it's due to malformed patches to invoke.text.
I will fix them.

> The changes in c.opt.urls seem mostly reordering. The sorting makes
> more sense after the diff imho. And must have come from commit
> 4666cbde5 "Sort warning options in c-family/c.opt".
>
> Also the documentation for -Warray-parameter was fixed.
>
> So I think the regenerate-opt-urls check does work as intended. So
> lets automate it, because it looks like nobody regenerated the
> url.opts after updating the documentation.
>
> But we should first apply this diff. Could you double check it is
> sane/correct?
>
> Thanks,
>
> Mark



-- 
YunQiang Su


Re:[pushed] [PATCH v1 0/4] Fix a series of problems caused by

2024-02-21 Thread chenglulu

Pushed to r13-8349...r13-8352.

在 2024/2/21 上午11:04, Lulu Cheng 写道:

Because binutils2.42 corrects the implementation of
".align [abs-expr,[abs-expr[,abs-expr]]]".
The macro ASM_OUTPUT_ALIGN_WITH_NOP in GCC uses this assembler directive,
and an error occurs. See link below for detailed description.
https://gcc.gnu.org/pipermail/gcc-patches/2024-February/645067.html

In order to solve the above problems, do the following operations:

1. Delete macro definition ASM_OUTPUT_ALIGN_WITH_NOP. (cherry pick r14-4674)
2. Check whether binutils supports the relax function. (cherry pick r14-4160)
3. Disable relaxation if the assembler don't support
   conditional branch relaxation. (cherry pick r14-5434)

PR112299 is also fixed here.

Lulu Cheng (2):
   LoongArch: Delete macro definition ASM_OUTPUT_ALIGN_WITH_NOP.
   LoongArch: Check whether binutils supports the relax function. If
 supported, explicit relocs are turned off by default.

Xi Ruoyao (2):
   LoongArch: Disable relaxation if the assembler don't support
 conditional branch relaxation [PR112330]
   LoongArch: Define HAVE_AS_TLS to 0 if it's undefined [PR112299]

  gcc/config.in | 12 
  gcc/config/loongarch/genopts/loongarch.opt.in | 11 +++-
  gcc/config/loongarch/gnu-user.h   |  3 +-
  gcc/config/loongarch/loongarch-opts.h | 12 
  gcc/config/loongarch/loongarch.h  | 22 +--
  gcc/config/loongarch/loongarch.opt| 11 +++-
  gcc/configure | 66 +++
  gcc/configure.ac  | 14 
  gcc/doc/invoke.texi   | 24 ++-
  9 files changed, 165 insertions(+), 10 deletions(-)





Re: [pushed][PATCH v1 0/4] Fix a series of problems caused by ASM_OUTPUT_ALIGN_WITH_NOP (release/gcc-12).

2024-02-21 Thread chenglulu

Pushed to r12-10169...r12-10172.

在 2024/2/21 上午11:10, Lulu Cheng 写道:

Because binutils2.42 corrects the implementation of
".align [abs-expr,[abs-expr[,abs-expr]]]".
The macro ASM_OUTPUT_ALIGN_WITH_NOP in GCC uses this assembler directive,
and an error occurs. See link below for detailed description.
https://gcc.gnu.org/pipermail/gcc-patches/2024-February/645067.html

In order to solve the above problems, do the following operations:

1. Delete macro definition ASM_OUTPUT_ALIGN_WITH_NOP. (cherry pick r14-4674)
2. Check whether binutils supports the relax function. (cherry pick r14-4160)
3. Disable relaxation if the assembler don't support
   conditional branch relaxation. (cherry pick r14-5434)

PR112299 is also fixed here.

Lulu Cheng (2):
   LoongArch: Delete macro definition ASM_OUTPUT_ALIGN_WITH_NOP.
   LoongArch: Check whether binutils supports the relax function. If
 supported, explicit relocs are turned off by default.

Xi Ruoyao (2):
   LoongArch: Disable relaxation if the assembler don't support
 conditional branch relaxation [PR112330]
   LoongArch: Define HAVE_AS_TLS to 0 if it's undefined [PR112299]

  gcc/config.in | 18 +
  gcc/config/loongarch/genopts/loongarch.opt.in |  9 +++
  gcc/config/loongarch/gnu-user.h   |  4 +-
  gcc/config/loongarch/loongarch-opts.h | 12 
  gcc/config/loongarch/loongarch.h  | 22 +--
  gcc/config/loongarch/loongarch.opt|  9 +++
  gcc/configure | 66 +++
  gcc/configure.ac  | 14 
  gcc/doc/invoke.texi   | 24 ++-
  9 files changed, 169 insertions(+), 9 deletions(-)





RE: [PATCH v1] RISC-V: Upgrade RVV intrinsic version to 0.12

2024-02-21 Thread Li, Pan2
Committed, thanks Kito.

Pan

From: Kito Cheng 
Sent: Thursday, February 22, 2024 7:35 AM
To: Li, Pan2 
Cc: GCC Patches ; 钟居哲 ; Wang, 
Yanzhang 
Subject: Re: [PATCH v1] RISC-V: Upgrade RVV intrinsic version to 0.12

LGTM for the patch
Li, Pan2 mailto:pan2...@intel.com>> 於 2024年2月21日 週三 12:31 寫道:
Hi kito and juzhe.

There may be 2 items for double-confirm. Thanks a lot.

1. Not very sure if we need to upgrade the version for __riscv_th_v_intrinsic.

Yes since 0.11 and 0.12 is not really compatible


2. Do we need to upgrade the even a newer version (like 1.0) for the GCC 14 
release, or we can do it later.

Yeah, Ideal case is we can update that before release made :p




Pan

-Original Message-
From: Li, Pan2 mailto:pan2...@intel.com>>
Sent: Wednesday, February 21, 2024 12:27 PM
To: gcc-patches@gcc.gnu.org
Cc: juzhe.zh...@rivai.ai; Li, Pan2 
mailto:pan2...@intel.com>>; Wang, Yanzhang 
mailto:yanzhang.w...@intel.com>>; 
kito.ch...@gmail.com
Subject: [PATCH v1] RISC-V: Upgrade RVV intrinsic version to 0.12

From: Pan Li mailto:pan2...@intel.com>>

Upgrade the version of RVV intrinsic from 0.11 to 0.12.

PR target/114017

gcc/ChangeLog:

* config/riscv/riscv-c.cc (riscv_cpu_cpp_builtins): Upgrade
the version to 0.12.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/predef-__riscv_v_intrinsic.c: Update the
version to 0.12.
* gcc.target/riscv/rvv/base/pr114017-1.c: New test.

Signed-off-by: Pan Li mailto:pan2...@intel.com>>
---
 gcc/config/riscv/riscv-c.cc   |  2 +-
 .../riscv/predef-__riscv_v_intrinsic.c|  2 +-
 .../gcc.target/riscv/rvv/base/pr114017-1.c| 19 +++
 3 files changed, 21 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr114017-1.c

diff --git a/gcc/config/riscv/riscv-c.cc b/gcc/config/riscv/riscv-c.cc
index 3ef06dcfd2d..3755ec0b8ef 100644
--- a/gcc/config/riscv/riscv-c.cc
+++ b/gcc/config/riscv/riscv-c.cc
@@ -139,7 +139,7 @@ riscv_cpu_cpp_builtins (cpp_reader *pfile)
 {
   builtin_define ("__riscv_vector");
   builtin_define_with_int_value ("__riscv_v_intrinsic",
-riscv_ext_version_value (0, 11));
+riscv_ext_version_value (0, 12));
 }

if (TARGET_XTHEADVECTOR)
diff --git a/gcc/testsuite/gcc.target/riscv/predef-__riscv_v_intrinsic.c 
b/gcc/testsuite/gcc.target/riscv/predef-__riscv_v_intrinsic.c
index dbbedf54f87..07f1f159a8f 100644
--- a/gcc/testsuite/gcc.target/riscv/predef-__riscv_v_intrinsic.c
+++ b/gcc/testsuite/gcc.target/riscv/predef-__riscv_v_intrinsic.c
@@ -3,7 +3,7 @@

 int main () {

-#if __riscv_v_intrinsic != 11000
+#if __riscv_v_intrinsic != 12000
 #error "__riscv_v_intrinsic"
 #endif

diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr114017-1.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/pr114017-1.c
new file mode 100644
index 000..8eee7c68f71
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr114017-1.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
+
+#include "riscv_vector.h"
+
+vuint8mf2_t
+test (vuint16m1_t val, size_t shift, size_t vl)
+{
+#if __riscv_v_intrinsic == 11000
+  #warning "RVV Intrinsics v0.11"
+  return __riscv_vnclipu (val, shift, vl);
+#endif
+
+#if __riscv_v_intrinsic == 12000
+  #warning "RVV Intrinsics v0.12" /* { dg-warning "RVV Intrinsics v0.12" } */
+  return __riscv_vnclipu (val, shift, 0, vl);
+#endif
+}
+
--
2.34.1


Re: [PATCH] RISC-V: Add tests for constraints "i" and "s"

2024-02-21 Thread Fangrui Song
On Wed, Feb 21, 2024 at 4:07 PM Kito Cheng  wrote:
>
> LGTM, but I am OoO today, will commit that once I have laptop :p

Thanks! Dropped the gcc/doc/md.texi change and pushed as commit
9ca4c1bf082a4691482ca9f4814fea68f04e2cb3
(I have write-after-approval now:) )

> Fangrui Song  於 2024年2月22日 週四 05:54 寫道:
>>
>> On Tue, Feb 13, 2024 at 10:36 PM Fangrui Song  wrote:
>> >
>> > The constraints "i" and "s" can be used with a symbol that binds
>> > externally, e.g.
>> > ```
>> > namespace ns { extern int var, a[4]; }
>> > void foo() {
>> >   asm(".pushsection .xxx,\"aw\"; .dc.a %0; .popsection" :: "s"(::var));
>> >   asm(".reloc ., BFD_RELOC_NONE, %0" :: "s"(::a[3]));
>> > }
>> > ```
>> >
>> > gcc/testsuite/ChangeLog:
>> >
>> > * gcc.target/riscv/asm-raw-symbol.c: New test.
>> > ---
>> >  gcc/doc/md.texi |  2 +-
>> >  gcc/testsuite/gcc.target/riscv/asm-raw-symbol.c | 14 ++
>> >  2 files changed, 15 insertions(+), 1 deletion(-)
>> >  create mode 100644 gcc/testsuite/gcc.target/riscv/asm-raw-symbol.c
>> >
>> > diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
>> > index b0c61925120..c75e5bf259d 100644
>> > --- a/gcc/doc/md.texi
>> > +++ b/gcc/doc/md.texi
>> > @@ -1947,7 +1947,7 @@ Integer constant that is valid as an immediate 
>> > operand in a 64-bit @code{MOV}
>> >  pseudo instruction
>> >
>> >  @item S
>> > -An absolute symbolic address or a label reference
>> > +A symbolic reference or label reference.
>> >
>> >  @item Y
>> >  Floating point constant zero
>> > diff --git a/gcc/testsuite/gcc.target/riscv/asm-raw-symbol.c 
>> > b/gcc/testsuite/gcc.target/riscv/asm-raw-symbol.c
>> > new file mode 100644
>> > index 000..28305a8b1f0
>> > --- /dev/null
>> > +++ b/gcc/testsuite/gcc.target/riscv/asm-raw-symbol.c
>> > @@ -0,0 +1,14 @@
>> > +/* { dg-do compile } */
>> > +/* { dg-options "-fpic" } */
>> > +
>> > +extern int var, arr[2][2];
>> > +
>> > +void
>> > +test (void)
>> > +{
>> > +  __asm__ ("@ %0" : : "i"());
>> > +  __asm__ ("@ %0 %1 %2" : : "s"(), "s"([1][1]), "s"(test));
>> > +}
>> > +
>> > +/* { dg-final { scan-assembler "@ var arr\\+12 test" } } */
>> > +/* { dg-final { scan-assembler "@ var" } } */
>> > --
>> > 2.43.0.687.g38aa6559b0-goog
>> >
>>
>> Ping:)
>>
>>
>> --
>> 宋方睿



-- 
宋方睿


Re: [PATCH] Fix fortran/PR114024

2024-02-21 Thread Steve Kargl
On Wed, Feb 21, 2024 at 01:42:32PM -0800, Steve Kargl wrote:
> On Wed, Feb 21, 2024 at 10:20:43PM +0100, Harald Anlauf wrote:
> > On 2/21/24 22:00, Steve Kargl wrote:
> > > memleak vs ICE.  I think I'll take one over the other.
> > > Probably need to free code->expr3 before the copy.
> > 
> > Yep.
> > 
> > > I tried gfc_replace_expr in an earlier patch.  It did not
> > > work.


I tried freeing code->expr3 before assigning the new expression.
That leads to

% gfcx -c ~/gcc/gccx/gcc/testsuite/gfortran.dg/allocate_with_source_28.f90 
pid 69473 comm f951 has trashed its stack, killing
gfortran: internal compiler error: Illegal instruction signal terminated 
program f951

If I don't free code->expr3 but simply assign the new
expression from gfc_get_parentheses(), your example
now compiles are executes are expected.  It now
allocate_with_source_28.f90.  Caveat:  I don't know
how to test the CLASS uu.

> > > > - it still fails on the following code, because the traversal
> > > >of the refs is incomplete / wrong:
> > > > 
> > > > program foo
> > > > implicit none
> > > > complex   :: cmp(3)
> > > > real, pointer :: pp(:)
> > > > class(*), allocatable :: uu(:)
> > > > type t
> > > >real :: re
> > > >real :: im
> > > > end type t
> > > > type u
> > > >type(t) :: tt(3)
> > > > end type u
> > > > type(u) :: cc
> > > > 
> > > > cmp = (3.45,6.78)
> > > > cc% tt% re = cmp% re
> > > > cc% tt% im = cmp% im
> > > > allocate (pp, source = cc% tt% im)   ! ICE
> > > 
> > > cc%tt%im isn't a complex-part-ref, so this seems to
> > > be a different (maybe related) issue.  Does the code
> > > compile with 'source = (cc%tt%im)'?  If so, perhaps,
> > > detecting a component reference and doing the simply
> > > wrapping with parentheses can be done.
> > 
> > Yes, that's why I tried to make up the above example.
> > I think %re and %im are not too special, they work
> > here pretty much like component refs elsewhere.
> > 
> 
> I see.  The %re and %im complex-part-ref correspond to 
> ref->u.i == INQUIRY_RE and INQUIRY_IM, respectively.
> A part-ref for a user-defined type doesn't have an
> INQUIRY_xxx, so we'll need to see if there is a way to
> easily identify, e.g., cc%tt%re from your testcase.  

The attach patch uses ref->type == REF_COMPONENT to deal 
with the above code.

-- 
Steve
diff --git a/gcc/fortran/trans-stmt.cc b/gcc/fortran/trans-stmt.cc
index 5247d3d39d7..414248fe2e5 100644
--- a/gcc/fortran/trans-stmt.cc
+++ b/gcc/fortran/trans-stmt.cc
@@ -6354,9 +6354,35 @@ gfc_trans_allocate (gfc_code * code, gfc_omp_namelist *omp_allocate)
 	   al = al->next)
 	vtab_needed = (al->expr->ts.type == BT_CLASS);
 
+  /* When expr3 is a variable, i.e., a very simple expression, then
+	 convert it once here.  */
+ 
   gfc_init_se (, NULL);
-  /* When expr3 is a variable, i.e., a very simple expression,
-	 then convert it once here.  */
+
+  /* If one has source = z%re or z%im with z a complex array or 
+	 source = a%b%c where a or b is an array of a derived type, then
+	 things can go sideways with the complex-part-refi or part-ref, so
+	 wrap the entity in parentheses to force evaluation of an expression.
+	 That is, the else-branch in the following if-else-stmt is entered.  */
+
+  if (code->expr3->expr_type == EXPR_VARIABLE
+	  && code->expr3->ts.type == BT_REAL
+	  && code->expr3->ref)
+	{
+	  gfc_ref *ref = code->expr3->ref;
+
+	  while (ref->next)
+	ref = ref->next;
+
+	  if (ref->u.i == INQUIRY_IM || ref->u.i == INQUIRY_RE
+	  || ref->type == REF_COMPONENT)
+	{
+	  gfc_expr *etmp = gfc_get_parentheses (code->expr3);
+	  code->expr3 = gfc_copy_expr (etmp);
+	  gfc_free_expr (etmp);
+	}
+	}
+
   if (code->expr3->expr_type == EXPR_VARIABLE
 	  || code->expr3->expr_type == EXPR_ARRAY
 	  || code->expr3->expr_type == EXPR_CONSTANT)
diff --git a/gcc/testsuite/gfortran.dg/allocate_with_source_27.f90 b/gcc/testsuite/gfortran.dg/allocate_with_source_27.f90
new file mode 100644
index 000..d0f0f3c4a84
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/allocate_with_source_27.f90
@@ -0,0 +1,20 @@
+!
+! { dg-do run }
+!
+! fortran/PR114024
+! https://github.com/fujitsu/compiler-test-suite
+! Modified from Fortran/0093/0093_0130.f90
+!
+program foo
+   implicit none
+   complex :: cmp(3)
+   real, allocatable :: xx(:), yy(:), zz(:)
+   cmp = (3., 6.78)
+   allocate(xx, source = cmp%re)  ! This caused an ICE.
+   allocate(yy, source = cmp(1:3)%re) ! This caused an ICE.
+   allocate(zz, source = (cmp%re))
+   if (any(xx /= [3., 3., 3.])) stop 1
+   if (any(yy /= [3., 3., 3.])) stop 2
+   if (any(zz /= [3., 3., 3.])) stop 3
+end program foo
+
diff --git a/gcc/testsuite/gfortran.dg/allocate_with_source_28.f90 b/gcc/testsuite/gfortran.dg/allocate_with_source_28.f90
new file mode 100644
index 000..5b167cd
--- /dev/null
+++ 

Re: [PATCH] libgccjit: Make new_array_type take unsigned long

2024-02-21 Thread Antoni Boucher
Thanks for the review.

Here's the updated patch.

On Thu, 2023-12-07 at 20:04 -0500, David Malcolm wrote:
> On Thu, 2023-12-07 at 17:29 -0500, Antoni Boucher wrote:
> > Hi.
> > This patches update gcc_jit_context_new_array_type to take the size
> > as
> > an unsigned long instead of a int, to allow creating bigger array
> > types.
> > 
> > I haven't written the ChangeLog yet because I wasn't sure it's
> > allowed
> > to change the type of a function like that.
> > If it isn't, what would you suggest?
> 
> We've kept ABI compatibility all the way back to the version in GCC
> 5,
> so it seems a shame to break ABI.
> 
> How about a new API entrypoint:
>   gcc_jit_context_new_array_type_unsigned_long
> whilst keeping the old one.
> 
> Then everything internally can use "unsigned long"; we just keep the
> old entrypoint accepting int (which internally promotes the arg to
> unsigned long, if positive, sharing all the implementation).
> 
> Alternatively, I think there may be a way to do this with symbol
> versioning:
>   https://gcc.gnu.org/wiki/SymbolVersioning
> see e.g. Section 3.7 of Ulrich Drepper's "How To Write Shared
> Libraries", but I'm a bit wary of cross-platform compatibility with
> that.
> 
> Dave
> 
> 

From 4886f05909b0770a673f220a8957d0104d5014d3 Mon Sep 17 00:00:00 2001
From: Antoni Boucher 
Date: Sat, 4 Mar 2023 00:44:49 -0500
Subject: [PATCH] libgccjit: Make new_array_type take unsigned long

gcc/jit/ChangeLog:

	* jit-playback.cc (new_array_type): Change num_elements type to
	unsigned long.
	* jit-playback.h (new_array_type): Change num_elements type to
	unsigned long.
	* jit-recording.cc (recording::context::new_array_type): Change
	num_elements type to unsigned long.
	(recording::array_type::make_debug_string): Use unsigned long
	format.
	(recording::array_type::write_reproducer): Switch to
	gcc_jit_context_new_array_type_unsigned_long.
	* jit-recording.h (class array_type): Change num_elements type
	to unsigned long.
	(new_array_type): Change num_elements type to unsigned long.
	(num_elements): Change return type to unsigned long.
	* libgccjit.cc (gcc_jit_context_new_array_type_unsigned_long):
	New function.
	* libgccjit.h (gcc_jit_context_new_array_type_unsigned_long):
	New function.
	* libgccjit.map: New function.

gcc/testsuite/ChangeLog:

	* jit.dg/all-non-failing-tests.h: Add test-arrays-unsigned-long.c.
	* jit.dg/test-arrays-unsigned-long.c: New test.
---
 gcc/jit/jit-playback.cc   |   2 +-
 gcc/jit/jit-playback.h|   2 +-
 gcc/jit/jit-recording.cc  |  12 +-
 gcc/jit/jit-recording.h   |   8 +-
 gcc/jit/libgccjit.cc  |  12 +-
 gcc/jit/libgccjit.h   |   7 +
 gcc/jit/libgccjit.map |   5 +
 gcc/testsuite/jit.dg/all-non-failing-tests.h  |  10 ++
 .../jit.dg/test-arrays-unsigned-long.c| 165 ++
 9 files changed, 210 insertions(+), 13 deletions(-)
 create mode 100644 gcc/testsuite/jit.dg/test-arrays-unsigned-long.c

diff --git a/gcc/jit/jit-playback.cc b/gcc/jit/jit-playback.cc
index 6baa838af10..b3775a18a83 100644
--- a/gcc/jit/jit-playback.cc
+++ b/gcc/jit/jit-playback.cc
@@ -327,7 +327,7 @@ playback::type *
 playback::context::
 new_array_type (playback::location *loc,
 		playback::type *element_type,
-		int num_elements)
+		unsigned long num_elements)
 {
   gcc_assert (element_type);
 
diff --git a/gcc/jit/jit-playback.h b/gcc/jit/jit-playback.h
index aa6a086613c..7cbb2d1f8d8 100644
--- a/gcc/jit/jit-playback.h
+++ b/gcc/jit/jit-playback.h
@@ -80,7 +80,7 @@ public:
   type *
   new_array_type (location *loc,
 		  type *element_type,
-		  int num_elements);
+		  unsigned long num_elements);
 
   field *
   new_field (location *loc,
diff --git a/gcc/jit/jit-recording.cc b/gcc/jit/jit-recording.cc
index 68a2e860c1f..3a05e91c140 100644
--- a/gcc/jit/jit-recording.cc
+++ b/gcc/jit/jit-recording.cc
@@ -841,7 +841,7 @@ recording::context::get_int_type (int num_bytes, int is_signed)
 recording::type *
 recording::context::new_array_type (recording::location *loc,
 recording::type *element_type,
-int num_elements)
+unsigned long num_elements)
 {
   if (struct_ *s = element_type->dyn_cast_struct ())
 if (!s->get_fields ())
@@ -3129,7 +3129,7 @@ recording::string *
 recording::array_type::make_debug_string ()
 {
   return string::from_printf (m_ctxt,
-			  "%s[%d]",
+			  "%s[%ld]",
 			  m_element_type->get_debug_string (),
 			  m_num_elements);
 }
@@ -3142,10 +3142,10 @@ recording::array_type::write_reproducer (reproducer )
 {
   const char *id = r.make_identifier (this, "array_type");
   r.write ("  gcc_jit_type *%s =\n"
-	   "gcc_jit_context_new_array_type (%s,\n"
-	   "%s, /* gcc_jit_location *loc */\n"
-	   "%s, /* gcc_jit_type *element_type */\n"
-	   "%i); /* 

[PATCH v2] c++: implement [[gnu::non_owning]] [PR110358]

2024-02-21 Thread Marek Polacek
On Fri, Jan 26, 2024 at 04:04:35PM -0500, Jason Merrill wrote:
> On 1/25/24 20:37, Marek Polacek wrote:
> > Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?
> > 
> > -- >8 --
> > Since -Wdangling-reference has false positives that can't be
> > prevented, we should offer an easy way to suppress the warning.
> > Currently, that is only possible by using a #pragma, either around the
> > enclosing class or around the call site.  But #pragma GCC diagnostic tend
> > to be onerous.  A better solution would be to have an attribute.  Such
> > an attribute should not be tied to this particular warning though.  [*]
> > 
> > The warning bogusly triggers for classes that are like std::span,
> > std::reference_wrapper, and std::ranges::ref_view.  The common property
> > seems to be that these classes are only wrappers around some data.  So
> > I chose the name non_owning, but I'm not attached to it.  I hope that
> > in the future the attribute can be used for something other than this
> > diagnostic.
> 
> You decided not to pursue Barry's request for a bool argument to the
> attribute?

At first I thought it'd be an unnecessary complication but it was actually
pretty easy.  Better to accept the optional argument from the get-go
otherwise people would have to add > GCC 14 checks.
 
> Might it be more useful for the attribute to make reference_like_class_p
> return true, so that we still warn about a temporary of another type passing
> through it?

Good point.  Fixed.

Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?

-- >8 --
Since -Wdangling-reference has false positives that can't be
prevented, we should offer an easy way to suppress the warning.
Currently, that is only possible by using a #pragma, either around the
enclosing class or around the call site.  But #pragma GCC diagnostic tend
to be onerous.  A better solution would be to have an attribute.  Such
an attribute should not be tied to this particular warning though.

The warning bogusly triggers for classes that are like std::span,
std::reference_wrapper, and std::ranges::ref_view.  The common property
seems to be that these classes are only wrappers around some data.  So
I chose the name non_owning, but I'm not attached to it.  I hope that
in the future the attribute can be used for something other than this
diagnostic.

This attribute takes an optional bool argument to support cases like:

  template 
  struct [[gnu::non_owning(std::is_reference_v)]] S {
 // ...
  };

PR c++/110358
PR c++/109642

gcc/cp/ChangeLog:

* call.cc (non_owning_p): New.
(reference_like_class_p): Use it.
(do_warn_dangling_reference): Use it.  Don't warn when the function
or its enclosing class has attribute gnu::non_owning.
* tree.cc (cxx_gnu_attributes): Add gnu::non_owning.
(handle_non_owning_attribute): New.

gcc/ChangeLog:

* doc/extend.texi: Document gnu::non_owning.
* doc/invoke.texi: Mention that gnu::non_owning disables
-Wdangling-reference.

gcc/testsuite/ChangeLog:

* g++.dg/ext/attr-non-owning1.C: New test.
* g++.dg/ext/attr-non-owning2.C: New test.
* g++.dg/ext/attr-non-owning3.C: New test.
* g++.dg/ext/attr-non-owning4.C: New test.
* g++.dg/ext/attr-non-owning5.C: New test.
* g++.dg/ext/attr-non-owning6.C: New test.
* g++.dg/ext/attr-non-owning7.C: New test.
* g++.dg/ext/attr-non-owning8.C: New test.
* g++.dg/ext/attr-non-owning9.C: New test.
---
 gcc/cp/call.cc  | 38 ++--
 gcc/cp/tree.cc  | 26 +
 gcc/doc/extend.texi | 25 
 gcc/doc/invoke.texi | 21 +++
 gcc/testsuite/g++.dg/ext/attr-non-owning1.C | 38 
 gcc/testsuite/g++.dg/ext/attr-non-owning2.C | 29 +
 gcc/testsuite/g++.dg/ext/attr-non-owning3.C | 24 
 gcc/testsuite/g++.dg/ext/attr-non-owning4.C | 14 +
 gcc/testsuite/g++.dg/ext/attr-non-owning5.C | 31 ++
 gcc/testsuite/g++.dg/ext/attr-non-owning6.C | 65 +
 gcc/testsuite/g++.dg/ext/attr-non-owning7.C | 31 ++
 gcc/testsuite/g++.dg/ext/attr-non-owning8.C | 30 ++
 gcc/testsuite/g++.dg/ext/attr-non-owning9.C | 25 
 13 files changed, 391 insertions(+), 6 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/ext/attr-non-owning1.C
 create mode 100644 gcc/testsuite/g++.dg/ext/attr-non-owning2.C
 create mode 100644 gcc/testsuite/g++.dg/ext/attr-non-owning3.C
 create mode 100644 gcc/testsuite/g++.dg/ext/attr-non-owning4.C
 create mode 100644 gcc/testsuite/g++.dg/ext/attr-non-owning5.C
 create mode 100644 gcc/testsuite/g++.dg/ext/attr-non-owning6.C
 create mode 100644 gcc/testsuite/g++.dg/ext/attr-non-owning7.C
 create mode 100644 gcc/testsuite/g++.dg/ext/attr-non-owning8.C
 create mode 100644 gcc/testsuite/g++.dg/ext/attr-non-owning9.C

diff --git a/gcc/cp/call.cc b/gcc/cp/call.cc

Re: [PATCH] RISC-V: Add tests for constraints "i" and "s"

2024-02-21 Thread Kito Cheng
LGTM, but I am OoO today, will commit that once I have laptop :p

Fangrui Song  於 2024年2月22日 週四 05:54 寫道:

> On Tue, Feb 13, 2024 at 10:36 PM Fangrui Song  wrote:
> >
> > The constraints "i" and "s" can be used with a symbol that binds
> > externally, e.g.
> > ```
> > namespace ns { extern int var, a[4]; }
> > void foo() {
> >   asm(".pushsection .xxx,\"aw\"; .dc.a %0; .popsection" ::
> "s"(::var));
> >   asm(".reloc ., BFD_RELOC_NONE, %0" :: "s"(::a[3]));
> > }
> > ```
> >
> > gcc/testsuite/ChangeLog:
> >
> > * gcc.target/riscv/asm-raw-symbol.c: New test.
> > ---
> >  gcc/doc/md.texi |  2 +-
> >  gcc/testsuite/gcc.target/riscv/asm-raw-symbol.c | 14 ++
> >  2 files changed, 15 insertions(+), 1 deletion(-)
> >  create mode 100644 gcc/testsuite/gcc.target/riscv/asm-raw-symbol.c
> >
> > diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
> > index b0c61925120..c75e5bf259d 100644
> > --- a/gcc/doc/md.texi
> > +++ b/gcc/doc/md.texi
> > @@ -1947,7 +1947,7 @@ Integer constant that is valid as an immediate
> operand in a 64-bit @code{MOV}
> >  pseudo instruction
> >
> >  @item S
> > -An absolute symbolic address or a label reference
> > +A symbolic reference or label reference.
> >
> >  @item Y
> >  Floating point constant zero
> > diff --git a/gcc/testsuite/gcc.target/riscv/asm-raw-symbol.c
> b/gcc/testsuite/gcc.target/riscv/asm-raw-symbol.c
> > new file mode 100644
> > index 000..28305a8b1f0
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/riscv/asm-raw-symbol.c
> > @@ -0,0 +1,14 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-fpic" } */
> > +
> > +extern int var, arr[2][2];
> > +
> > +void
> > +test (void)
> > +{
> > +  __asm__ ("@ %0" : : "i"());
> > +  __asm__ ("@ %0 %1 %2" : : "s"(), "s"([1][1]), "s"(test));
> > +}
> > +
> > +/* { dg-final { scan-assembler "@ var arr\\+12 test" } } */
> > +/* { dg-final { scan-assembler "@ var" } } */
> > --
> > 2.43.0.687.g38aa6559b0-goog
> >
>
> Ping:)
>
>
> --
> 宋方睿
>


Re: [PATCH v1] RISC-V: Upgrade RVV intrinsic version to 0.12

2024-02-21 Thread Kito Cheng
Palmer Dabbelt  於 2024年2月22日 週四 07:42 寫道:

> On Wed, 21 Feb 2024 15:34:32 PST (-0800), Kito Cheng wrote:
> > LGTM for the patch
> >
> > Li, Pan2  於 2024年2月21日 週三 12:31 寫道:
> >
> >> Hi kito and juzhe.
> >>
> >> There may be 2 items for double-confirm. Thanks a lot.
> >>
> >> 1. Not very sure if we need to upgrade the version for
> >> __riscv_th_v_intrinsic.
> >>
> >
> > Yes since 0.11 and 0.12 is not really compatible
>
> Where are the incompatibilities?  The whole reason we accepted the
> intrinsics in the first place is because the RVI folks said they
> wouldn't break compatibility, if that's changed then just dropping the
> old version is going to break users.
>

0.12 have interface for segment load store and new fixed points intrinsic
compare to 0.11, the first one item is not incompatible change since it's
new added and gcc 13 isn't implemented the legacy one, the later one is
kinda broken on both llvm and gcc which is made is not really useful in
practice.

Other than that, everything are same, it's not 100% compatible so I am not
intend to cheating my self to say it's compatible, but we do think it's
necessary evil since fixing point stuff are not right design and
implementation.

Anyway it's became frozen mode, 1.0 rc0 has been tagged, no API will
change/remove.


> > 2. Do we need to upgrade the even a newer version (like 1.0) for the GCC
> 14
> >> release, or we can do it later.
> >>
> >
> > Yeah, Ideal case is we can update that before release made :p
> >
> >
> >
> >
> >> Pan
> >>
> >> -Original Message-
> >> From: Li, Pan2 
> >> Sent: Wednesday, February 21, 2024 12:27 PM
> >> To: gcc-patches@gcc.gnu.org
> >> Cc: juzhe.zh...@rivai.ai; Li, Pan2 ; Wang, Yanzhang
> <
> >> yanzhang.w...@intel.com>; kito.ch...@gmail.com
> >> Subject: [PATCH v1] RISC-V: Upgrade RVV intrinsic version to 0.12
> >>
> >> From: Pan Li 
> >>
> >> Upgrade the version of RVV intrinsic from 0.11 to 0.12.
> >>
> >> PR target/114017
> >>
> >> gcc/ChangeLog:
> >>
> >> * config/riscv/riscv-c.cc (riscv_cpu_cpp_builtins): Upgrade
> >> the version to 0.12.
> >>
> >> gcc/testsuite/ChangeLog:
> >>
> >> * gcc.target/riscv/predef-__riscv_v_intrinsic.c: Update the
> >> version to 0.12.
> >> * gcc.target/riscv/rvv/base/pr114017-1.c: New test.
> >>
> >> Signed-off-by: Pan Li 
> >> ---
> >>  gcc/config/riscv/riscv-c.cc   |  2 +-
> >>  .../riscv/predef-__riscv_v_intrinsic.c|  2 +-
> >>  .../gcc.target/riscv/rvv/base/pr114017-1.c| 19 +++
> >>  3 files changed, 21 insertions(+), 2 deletions(-)
> >>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr114017-1.c
> >>
> >> diff --git a/gcc/config/riscv/riscv-c.cc b/gcc/config/riscv/riscv-c.cc
> >> index 3ef06dcfd2d..3755ec0b8ef 100644
> >> --- a/gcc/config/riscv/riscv-c.cc
> >> +++ b/gcc/config/riscv/riscv-c.cc
> >> @@ -139,7 +139,7 @@ riscv_cpu_cpp_builtins (cpp_reader *pfile)
> >>  {
> >>builtin_define ("__riscv_vector");
> >>builtin_define_with_int_value ("__riscv_v_intrinsic",
> >> -riscv_ext_version_value (0, 11));
> >> +riscv_ext_version_value (0, 12));
> >>  }
> >>
> >> if (TARGET_XTHEADVECTOR)
> >> diff --git a/gcc/testsuite/gcc.target/riscv/predef-__riscv_v_intrinsic.c
> >> b/gcc/testsuite/gcc.target/riscv/predef-__riscv_v_intrinsic.c
> >> index dbbedf54f87..07f1f159a8f 100644
> >> --- a/gcc/testsuite/gcc.target/riscv/predef-__riscv_v_intrinsic.c
> >> +++ b/gcc/testsuite/gcc.target/riscv/predef-__riscv_v_intrinsic.c
> >> @@ -3,7 +3,7 @@
> >>
> >>  int main () {
> >>
> >> -#if __riscv_v_intrinsic != 11000
> >> +#if __riscv_v_intrinsic != 12000
> >>  #error "__riscv_v_intrinsic"
> >>  #endif
> >>
> >> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr114017-1.c
> >> b/gcc/testsuite/gcc.target/riscv/rvv/base/pr114017-1.c
> >> new file mode 100644
> >> index 000..8eee7c68f71
> >> --- /dev/null
> >> +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr114017-1.c
> >> @@ -0,0 +1,19 @@
> >> +/* { dg-do compile } */
> >> +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
> >> +
> >> +#include "riscv_vector.h"
> >> +
> >> +vuint8mf2_t
> >> +test (vuint16m1_t val, size_t shift, size_t vl)
> >> +{
> >> +#if __riscv_v_intrinsic == 11000
> >> +  #warning "RVV Intrinsics v0.11"
> >> +  return __riscv_vnclipu (val, shift, vl);
> >> +#endif
> >> +
> >> +#if __riscv_v_intrinsic == 12000
> >> +  #warning "RVV Intrinsics v0.12" /* { dg-warning "RVV Intrinsics
> v0.12"
> >> } */
> >> +  return __riscv_vnclipu (val, shift, 0, vl);
> >> +#endif
> >> +}
> >> +
> >> --
> >> 2.34.1
> >>
> >>
>


Re: [PATCH v1] RISC-V: Upgrade RVV intrinsic version to 0.12

2024-02-21 Thread Palmer Dabbelt

On Wed, 21 Feb 2024 15:34:32 PST (-0800), Kito Cheng wrote:

LGTM for the patch

Li, Pan2  於 2024年2月21日 週三 12:31 寫道:


Hi kito and juzhe.

There may be 2 items for double-confirm. Thanks a lot.

1. Not very sure if we need to upgrade the version for
__riscv_th_v_intrinsic.



Yes since 0.11 and 0.12 is not really compatible


Where are the incompatibilities?  The whole reason we accepted the 
intrinsics in the first place is because the RVI folks said they 
wouldn't break compatibility, if that's changed then just dropping the 
old version is going to break users.



2. Do we need to upgrade the even a newer version (like 1.0) for the GCC 14

release, or we can do it later.



Yeah, Ideal case is we can update that before release made :p





Pan

-Original Message-
From: Li, Pan2 
Sent: Wednesday, February 21, 2024 12:27 PM
To: gcc-patches@gcc.gnu.org
Cc: juzhe.zh...@rivai.ai; Li, Pan2 ; Wang, Yanzhang <
yanzhang.w...@intel.com>; kito.ch...@gmail.com
Subject: [PATCH v1] RISC-V: Upgrade RVV intrinsic version to 0.12

From: Pan Li 

Upgrade the version of RVV intrinsic from 0.11 to 0.12.

PR target/114017

gcc/ChangeLog:

* config/riscv/riscv-c.cc (riscv_cpu_cpp_builtins): Upgrade
the version to 0.12.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/predef-__riscv_v_intrinsic.c: Update the
version to 0.12.
* gcc.target/riscv/rvv/base/pr114017-1.c: New test.

Signed-off-by: Pan Li 
---
 gcc/config/riscv/riscv-c.cc   |  2 +-
 .../riscv/predef-__riscv_v_intrinsic.c|  2 +-
 .../gcc.target/riscv/rvv/base/pr114017-1.c| 19 +++
 3 files changed, 21 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr114017-1.c

diff --git a/gcc/config/riscv/riscv-c.cc b/gcc/config/riscv/riscv-c.cc
index 3ef06dcfd2d..3755ec0b8ef 100644
--- a/gcc/config/riscv/riscv-c.cc
+++ b/gcc/config/riscv/riscv-c.cc
@@ -139,7 +139,7 @@ riscv_cpu_cpp_builtins (cpp_reader *pfile)
 {
   builtin_define ("__riscv_vector");
   builtin_define_with_int_value ("__riscv_v_intrinsic",
-riscv_ext_version_value (0, 11));
+riscv_ext_version_value (0, 12));
 }

if (TARGET_XTHEADVECTOR)
diff --git a/gcc/testsuite/gcc.target/riscv/predef-__riscv_v_intrinsic.c
b/gcc/testsuite/gcc.target/riscv/predef-__riscv_v_intrinsic.c
index dbbedf54f87..07f1f159a8f 100644
--- a/gcc/testsuite/gcc.target/riscv/predef-__riscv_v_intrinsic.c
+++ b/gcc/testsuite/gcc.target/riscv/predef-__riscv_v_intrinsic.c
@@ -3,7 +3,7 @@

 int main () {

-#if __riscv_v_intrinsic != 11000
+#if __riscv_v_intrinsic != 12000
 #error "__riscv_v_intrinsic"
 #endif

diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr114017-1.c
b/gcc/testsuite/gcc.target/riscv/rvv/base/pr114017-1.c
new file mode 100644
index 000..8eee7c68f71
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr114017-1.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
+
+#include "riscv_vector.h"
+
+vuint8mf2_t
+test (vuint16m1_t val, size_t shift, size_t vl)
+{
+#if __riscv_v_intrinsic == 11000
+  #warning "RVV Intrinsics v0.11"
+  return __riscv_vnclipu (val, shift, vl);
+#endif
+
+#if __riscv_v_intrinsic == 12000
+  #warning "RVV Intrinsics v0.12" /* { dg-warning "RVV Intrinsics v0.12"
} */
+  return __riscv_vnclipu (val, shift, 0, vl);
+#endif
+}
+
--
2.34.1




Re: [PATCH RFA] build: drop target libs from LD_LIBRARY_PATH [PR105688]

2024-02-21 Thread Iain Sandoe



> On 21 Feb 2024, at 23:06, Jason Merrill  wrote:
> 
> On 2/20/24 00:45, Alexandre Oliva wrote:
>> On Feb 16, 2024, Jason Merrill  wrote:
>>> So, for stage2+, let's add just prev- libgcc.
>> I'm pretty sure this will break bootstrap-lean where libgcc_s isn't a
>> system library, and we're building post-bootstrap host tools :-(
>> We need the current stage lib after the prev stage is removed.
> 
> That's a good point, we should make sure it doesn't break.  It looks to me 
> like stage3-bubble removes stage1 after we're done building stage3, which 
> should be fine, but compare removes the stage2 libgcc that we might still 
> need to run stage3.  So indeed I guess we still want both prev and current 
> libgcc directories in RPATH to handle the case where we've removed the 
> previous stage, as below.

I’ll try that on darwin and aarch64 linux (I quite often need to use 
bootstrap-lean on the latter becuase of low disk space).

>> I also doubt that TARGET_LIB_PATH was defined and used for no reason.
>> My hunch is that bootstrap options and/or targets that don't have these
>> libraries as system libraries will break in some obscure way without it.
>> But I don't have the bandwidth to track down the history behind their
>> inclusion.
> 
> That has not seemed to be the case in Iain's testing on a system without 
> these libraries as system libraries.

Unless we change to (or add) a bootstrap where we use shared libstdc++ in the 
compiler, I think that is the case.

As I mentioned in an earlier post, unfortunately we do not yet have a way to 
distinguish module builds for host from module builds for target (when a 
library is used for both - which is the case for libstdc++, libbacktrace and 
libgrust at least),  This means that either the target library has to be built 
without a shared version (libbacktrace does this), or the host versions get 
built with a shared library which is not used (libstdc++) .. AFAICT the only 
reason we build libgomp and libatomic in bootstrap phase 1 and 2 is because 
they are dependents of the unused shared libstdc++.

Ideally, we’d fix Makefile.{tpl,def} to allow the same module to have different 
recipies for host and target builds, but that’s also not a 5 minute hack….

> I can't think of why we would need to depend on the current stage target 
> libraries, and we already weren't depending on the previous stage target 
> libraries.  I believe the only target code we run is tests, and if the tests 
> need the target libraries in RPATH that should happen in the testsuite.

Which could also be improved (we do not in Dejagnu really distinguish runpaths 
needed by the compiler from those needed by the built executables)

> It's arguable that we should pass TARGET_LIB_PATH down to make it easier for 
> the testsuites to find them, in case they are currently relying on them being 
> part of RPATH.  

> My impression from Iain's testing is that this isn't actually needed.

there’s actually a fair amount of specific code to locate dependent libs in 
places (some of which I just cleaned up a bit since it was now causing fails 
with Darwin’s new linker complaining about duplicated libs and so on).  So we 
are not currently expecting this information to be passed down.

>  I wouldn't mind keeping TARGET_LIB_PATH unused, but I'm not sure why that 
> would be better than bringing it back if we turn out to need it.

+1
> 
> 
>> I insist that the entire approach of choosing the same set of target
>> library directories regardless of the freshness relationship between
>> e.g. a system libstdc++ and the one we're building can't possibly be an
>> overall improvement, it's only trading problems in some scenarios (where
>> we're building an older libstdc++) for problems in other scenarios
>> (where we're building a newer libstdc++).  The latter is unfortunately
>> far more likely, which is reason enough for the current arrangement, but
>> libstdc++ problems will likely only hit if the gap between system and
>> being-built libraries is large enough (say, new symbols in the newer
>> libstdc++ used by the compiler, but not available in the system
>> library).
> 
> If bootstrap doesn't actually need the target libraries, as seems to be the 
> case, then I think removing them from RPATH trades the former problem for no 
> problem.
> 
>> I'm really uncomfortable with this change, especially at this stage.
>> I'd much rather have a relatively obscure workaround for this relatively
>> obscure problem, while keeping the defaults that have accumulated lots
>> of testing on lots of configurations.
> 
> I'm happy to defer this change to GCC 15 stage 1.
> 
>> An idea that occurred to me is to have some configure option or just a
>> make variable that would be prepended to RPATH_ENVVAR, so that it would
>> preempt TARGET_LIB_PATH.  That would be a far more conservative change,
>> that I think we could make even at this stage.  WDYT?
> 
> I'm hoping for a fix that doesn't require individual users to know about 

Re: [PATCH v1] RISC-V: Upgrade RVV intrinsic version to 0.12

2024-02-21 Thread Kito Cheng
LGTM for the patch

Li, Pan2  於 2024年2月21日 週三 12:31 寫道:

> Hi kito and juzhe.
>
> There may be 2 items for double-confirm. Thanks a lot.
>
> 1. Not very sure if we need to upgrade the version for
> __riscv_th_v_intrinsic.
>

Yes since 0.11 and 0.12 is not really compatible


2. Do we need to upgrade the even a newer version (like 1.0) for the GCC 14
> release, or we can do it later.
>

Yeah, Ideal case is we can update that before release made :p




> Pan
>
> -Original Message-
> From: Li, Pan2 
> Sent: Wednesday, February 21, 2024 12:27 PM
> To: gcc-patches@gcc.gnu.org
> Cc: juzhe.zh...@rivai.ai; Li, Pan2 ; Wang, Yanzhang <
> yanzhang.w...@intel.com>; kito.ch...@gmail.com
> Subject: [PATCH v1] RISC-V: Upgrade RVV intrinsic version to 0.12
>
> From: Pan Li 
>
> Upgrade the version of RVV intrinsic from 0.11 to 0.12.
>
> PR target/114017
>
> gcc/ChangeLog:
>
> * config/riscv/riscv-c.cc (riscv_cpu_cpp_builtins): Upgrade
> the version to 0.12.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/riscv/predef-__riscv_v_intrinsic.c: Update the
> version to 0.12.
> * gcc.target/riscv/rvv/base/pr114017-1.c: New test.
>
> Signed-off-by: Pan Li 
> ---
>  gcc/config/riscv/riscv-c.cc   |  2 +-
>  .../riscv/predef-__riscv_v_intrinsic.c|  2 +-
>  .../gcc.target/riscv/rvv/base/pr114017-1.c| 19 +++
>  3 files changed, 21 insertions(+), 2 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr114017-1.c
>
> diff --git a/gcc/config/riscv/riscv-c.cc b/gcc/config/riscv/riscv-c.cc
> index 3ef06dcfd2d..3755ec0b8ef 100644
> --- a/gcc/config/riscv/riscv-c.cc
> +++ b/gcc/config/riscv/riscv-c.cc
> @@ -139,7 +139,7 @@ riscv_cpu_cpp_builtins (cpp_reader *pfile)
>  {
>builtin_define ("__riscv_vector");
>builtin_define_with_int_value ("__riscv_v_intrinsic",
> -riscv_ext_version_value (0, 11));
> +riscv_ext_version_value (0, 12));
>  }
>
> if (TARGET_XTHEADVECTOR)
> diff --git a/gcc/testsuite/gcc.target/riscv/predef-__riscv_v_intrinsic.c
> b/gcc/testsuite/gcc.target/riscv/predef-__riscv_v_intrinsic.c
> index dbbedf54f87..07f1f159a8f 100644
> --- a/gcc/testsuite/gcc.target/riscv/predef-__riscv_v_intrinsic.c
> +++ b/gcc/testsuite/gcc.target/riscv/predef-__riscv_v_intrinsic.c
> @@ -3,7 +3,7 @@
>
>  int main () {
>
> -#if __riscv_v_intrinsic != 11000
> +#if __riscv_v_intrinsic != 12000
>  #error "__riscv_v_intrinsic"
>  #endif
>
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr114017-1.c
> b/gcc/testsuite/gcc.target/riscv/rvv/base/pr114017-1.c
> new file mode 100644
> index 000..8eee7c68f71
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr114017-1.c
> @@ -0,0 +1,19 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
> +
> +#include "riscv_vector.h"
> +
> +vuint8mf2_t
> +test (vuint16m1_t val, size_t shift, size_t vl)
> +{
> +#if __riscv_v_intrinsic == 11000
> +  #warning "RVV Intrinsics v0.11"
> +  return __riscv_vnclipu (val, shift, vl);
> +#endif
> +
> +#if __riscv_v_intrinsic == 12000
> +  #warning "RVV Intrinsics v0.12" /* { dg-warning "RVV Intrinsics v0.12"
> } */
> +  return __riscv_vnclipu (val, shift, 0, vl);
> +#endif
> +}
> +
> --
> 2.34.1
>
>


Re: [PATCH RFA] build: drop target libs from LD_LIBRARY_PATH [PR105688]

2024-02-21 Thread Jason Merrill

On 2/20/24 00:45, Alexandre Oliva wrote:

On Feb 16, 2024, Jason Merrill  wrote:


So, for stage2+, let's add just prev- libgcc.


I'm pretty sure this will break bootstrap-lean where libgcc_s isn't a
system library, and we're building post-bootstrap host tools :-(
We need the current stage lib after the prev stage is removed.


That's a good point, we should make sure it doesn't break.  It looks to 
me like stage3-bubble removes stage1 after we're done building stage3, 
which should be fine, but compare removes the stage2 libgcc that we 
might still need to run stage3.  So indeed I guess we still want both 
prev and current libgcc directories in RPATH to handle the case where 
we've removed the previous stage, as below.



I also doubt that TARGET_LIB_PATH was defined and used for no reason.
My hunch is that bootstrap options and/or targets that don't have these
libraries as system libraries will break in some obscure way without it.
But I don't have the bandwidth to track down the history behind their
inclusion.


That has not seemed to be the case in Iain's testing on a system without 
these libraries as system libraries.


I can't think of why we would need to depend on the current stage target 
libraries, and we already weren't depending on the previous stage target 
libraries.  I believe the only target code we run is tests, and if the 
tests need the target libraries in RPATH that should happen in the 
testsuite.


It's arguable that we should pass TARGET_LIB_PATH down to make it easier 
for the testsuites to find them, in case they are currently relying on 
them being part of RPATH.  My impression from Iain's testing is that 
this isn't actually needed.  I wouldn't mind keeping TARGET_LIB_PATH 
unused, but I'm not sure why that would be better than bringing it back 
if we turn out to need it.



I insist that the entire approach of choosing the same set of target
library directories regardless of the freshness relationship between
e.g. a system libstdc++ and the one we're building can't possibly be an
overall improvement, it's only trading problems in some scenarios (where
we're building an older libstdc++) for problems in other scenarios
(where we're building a newer libstdc++).  The latter is unfortunately
far more likely, which is reason enough for the current arrangement, but
libstdc++ problems will likely only hit if the gap between system and
being-built libraries is large enough (say, new symbols in the newer
libstdc++ used by the compiler, but not available in the system
library).


If bootstrap doesn't actually need the target libraries, as seems to be 
the case, then I think removing them from RPATH trades the former 
problem for no problem.



I'm really uncomfortable with this change, especially at this stage.
I'd much rather have a relatively obscure workaround for this relatively
obscure problem, while keeping the defaults that have accumulated lots
of testing on lots of configurations.


I'm happy to defer this change to GCC 15 stage 1.


An idea that occurred to me is to have some configure option or just a
make variable that would be prepended to RPATH_ENVVAR, so that it would
preempt TARGET_LIB_PATH.  That would be a far more conservative change,
that I think we could make even at this stage.  WDYT?


I'm hoping for a fix that doesn't require individual users to know about 
a workaround.


JasonFrom 5312d534f1724c106feabebc9fdfa99e3717f859 Mon Sep 17 00:00:00 2001
From: Jason Merrill 
Date: Wed, 21 Feb 2024 23:03:36 +
Subject: [PATCH] lean
To: gcc-patches@gcc.gnu.org

ChangeLog:

	* Makefile.tpl:
---
 Makefile.tpl | 8 +---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/Makefile.tpl b/Makefile.tpl
index e3de2a7ba3b..674a6d7c711 100644
--- a/Makefile.tpl
+++ b/Makefile.tpl
@@ -650,10 +650,12 @@ HOST_LIB_PATH = [+ FOR host_modules +][+
   IF lib_path +]$(HOST_LIB_PATH_[+module+])[+ ENDIF lib_path +][+
   ENDFOR host_modules +]$(HOST_LIB_PATH_gcc)
 
-# libgcc is a target library in the current stage and a host library
-# in the previous stage (when bootstrapping).
+# When bootstrapping, the previous stage libgcc is a host library.  But with
+# bootstrap-lean we might remove the previous libgcc and still want to run the
+# current gcc that depends on it, so let's also include the current stage
+# libgcc to accommodate that situation.
 @if gcc-bootstrap
-HOST_LIB_PATH_gcc = $$r/$(HOST_SUBDIR)/prev-gcc$(GCC_SHLIB_SUBDIR):
+HOST_LIB_PATH_gcc = $$r/$(HOST_SUBDIR)/prev-gcc$(GCC_SHLIB_SUBDIR):$$r/$(HOST_SUBDIR)/gcc$(GCC_SHLIB_SUBDIR):
 @endif gcc-bootstrap
 
 [+ FOR host_modules +][+ IF lib_path +]
-- 
2.43.0



New German PO file for 'cpplib' (version 14.1-b20240218)

2024-02-21 Thread Translation Project Robot
Hello, gentle maintainer.

This is a message from the Translation Project robot.

A revised PO file for textual domain 'cpplib' has been submitted
by the German team of translators.  The file is available at:

https://translationproject.org/latest/cpplib/de.po

(This file, 'cpplib-14.1-b20240218.de.po', has just now been sent to you in
a separate email.)

All other PO files for your package are available in:

https://translationproject.org/latest/cpplib/

Please consider including all of these in your next release, whether
official or a pretest.

Whenever you have a new distribution with a new version number ready,
containing a newer POT file, please send the URL of that distribution
tarball to the address below.  The tarball may be just a pretest or a
snapshot, it does not even have to compile.  It is just used by the
translators when they need some extra translation context.

The following HTML page has been updated:

https://translationproject.org/domain/cpplib.html

If any question arises, please contact the translation coordinator.

Thank you for all your work,

The Translation Project robot, in the
name of your translation coordinator.




Contents of PO file 'cpplib-14.1-b20240218.de.po'

2024-02-21 Thread Translation Project Robot


cpplib-14.1-b20240218.de.po.gz
Description: Binary data
The Translation Project robot, in the
name of your translation coordinator.



Re: [Committed V4 4/5] RISC-V: Quick and simple fixes to testcases that break due to reordering

2024-02-21 Thread Edwin Lu

On 2/21/2024 10:57 AM, Robin Dapp wrote:



For calling-convention-*.c, LGTM but one nit about change log. Take
**Update** here may make others not easy to learn what you did about
the file. You can say similar to "Rearrange and adjust the
asm-checker times" or likewise. Of course, you can refine the
changelog when commit.

* gcc.target/riscv/rvv/autovec/vls/calling-convention-1.c: update




Yes, agreed,  changes LGTM but please refine the commit message
slightly.  The first letter should also be capitalized I believe.

The rest of the is already ACK'ed so I believe it's good to go now.
I didn't pay a lot of attention to the other commit messages.
In case they need refining you can do that still.



Thanks! I updated the changelogs and committed.

Edwin



Re: [PATCH] RISC-V: Add tests for constraints "i" and "s"

2024-02-21 Thread Fangrui Song
On Tue, Feb 13, 2024 at 10:36 PM Fangrui Song  wrote:
>
> The constraints "i" and "s" can be used with a symbol that binds
> externally, e.g.
> ```
> namespace ns { extern int var, a[4]; }
> void foo() {
>   asm(".pushsection .xxx,\"aw\"; .dc.a %0; .popsection" :: "s"(::var));
>   asm(".reloc ., BFD_RELOC_NONE, %0" :: "s"(::a[3]));
> }
> ```
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/riscv/asm-raw-symbol.c: New test.
> ---
>  gcc/doc/md.texi |  2 +-
>  gcc/testsuite/gcc.target/riscv/asm-raw-symbol.c | 14 ++
>  2 files changed, 15 insertions(+), 1 deletion(-)
>  create mode 100644 gcc/testsuite/gcc.target/riscv/asm-raw-symbol.c
>
> diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
> index b0c61925120..c75e5bf259d 100644
> --- a/gcc/doc/md.texi
> +++ b/gcc/doc/md.texi
> @@ -1947,7 +1947,7 @@ Integer constant that is valid as an immediate operand 
> in a 64-bit @code{MOV}
>  pseudo instruction
>
>  @item S
> -An absolute symbolic address or a label reference
> +A symbolic reference or label reference.
>
>  @item Y
>  Floating point constant zero
> diff --git a/gcc/testsuite/gcc.target/riscv/asm-raw-symbol.c 
> b/gcc/testsuite/gcc.target/riscv/asm-raw-symbol.c
> new file mode 100644
> index 000..28305a8b1f0
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/asm-raw-symbol.c
> @@ -0,0 +1,14 @@
> +/* { dg-do compile } */
> +/* { dg-options "-fpic" } */
> +
> +extern int var, arr[2][2];
> +
> +void
> +test (void)
> +{
> +  __asm__ ("@ %0" : : "i"());
> +  __asm__ ("@ %0 %1 %2" : : "s"(), "s"([1][1]), "s"(test));
> +}
> +
> +/* { dg-final { scan-assembler "@ var arr\\+12 test" } } */
> +/* { dg-final { scan-assembler "@ var" } } */
> --
> 2.43.0.687.g38aa6559b0-goog
>

Ping:)


-- 
宋方睿


Re: [PATCH] Fix fortran/PR114024

2024-02-21 Thread Steve Kargl
On Wed, Feb 21, 2024 at 10:20:43PM +0100, Harald Anlauf wrote:
> On 2/21/24 22:00, Steve Kargl wrote:
> > Unfortunately, valgrind does not work on AMD FX-8350 cpu.
> 
> Do you mean valgrind does not work at all?
> For gcc, you need to configure --enable-valgrind-annotations
> to not get bogus warnings.

It does not work at all unless one tracks done an obscure
patch for valgrind.  The FX-8350 has a tbm instruction.
Everything on the system would need be compiled with -mno-tbm
(or -fno-tbm).

%  valgrind ./z
...
==88861== Process terminating with default action of signal 4 (SIGILL):
 dumping core
==88861==  Illegal opcode at address 0x4D30D87
==88861==at 0x4D30D87: ??? (in /lib/libc.so.7)
==88861==by 0x4D213DE: ??? (in /lib/libc.so.7)
==88861==by 0x4D2B935: ??? (in /lib/libc.so.7)
==88861==by 0x4D2C34E: ??? (in /lib/libc.so.7)
==88861==by 0x400AB8C: ??? (in /libexec/ld-elf.so.1)
==88861==by 0x4009828: ??? (in /libexec/ld-elf.so.1)
==88861==by 0x4006AE8: ??? (in /libexec/ld-elf.so.1)

 

> > memleak vs ICE.  I think I'll take one over the other.
> > Probably need to free code->expr3 before the copy.
> 
> Yep.
> 
> > I tried gfc_replace_expr in an earlier patch.  It did not
> > work.
> 
> 
> 
> > > - it still fails on the following code, because the traversal
> > >of the refs is incomplete / wrong:
> > > 
> > > program foo
> > > implicit none
> > > complex   :: cmp(3)
> > > real, pointer :: pp(:)
> > > class(*), allocatable :: uu(:)
> > > type t
> > >real :: re
> > >real :: im
> > > end type t
> > > type u
> > >type(t) :: tt(3)
> > > end type u
> > > type(u) :: cc
> > > 
> > > cmp = (3.45,6.78)
> > > cc% tt% re = cmp% re
> > > cc% tt% im = cmp% im
> > > allocate (pp, source = cc% tt% im)   ! ICE
> > 
> > cc%tt%im isn't a complex-part-ref, so this seems to
> > be a different (maybe related) issue.  Does the code
> > compile with 'source = (cc%tt%im)'?  If so, perhaps,
> > detecting a component reference and doing the simply
> > wrapping with parentheses can be done.
> 
> Yes, that's why I tried to make up the above example.
> I think %re and %im are not too special, they work
> here pretty much like component refs elsewhere.
> 

I see.  The %re and %im complex-part-ref correspond to 
ref->u.i == INQUIRY_RE and INQUIRY_IM, respectively.
A part-ref for a user-defined type doesn't have an
INQUIRY_xxx, so we'll need to see if there is a way to
easily identify, e.g., cc%tt%re from your testcase.  

-- 
Steve


Re: [PATCH v1 05/13] Reuse MinGW from i386 for AArch64

2024-02-21 Thread rep . dot . nop
On 21 February 2024 19:34:43 CET, Evgeny Karpov  
wrote:
>

Please use git send-email. Your mail ends up as empty as here, otherwise.

The ChangeLog has to be expressed in present tense, as mandated by the 
standard; s/Moved/Move/g etc.

In any sane world ( and in gcc ) to fold, respectively a folder, is something 
else compared to a directory ( which you probably mean when moving a file from 
one directory to another directory as you seem to do ).

Most of the free world has left COFF behind since several decades, so I won't 
comment on that. YMMV.

HTH


Re: [PATCH] Fix fortran/PR114024

2024-02-21 Thread Harald Anlauf

On 2/21/24 22:00, Steve Kargl wrote:

Unfortunately, valgrind does not work on AMD FX-8350 cpu.


Do you mean valgrind does not work at all?
For gcc, you need to configure --enable-valgrind-annotations
to not get bogus warnings.


memleak vs ICE.  I think I'll take one over the other.
Probably need to free code->expr3 before the copy.


Yep.


I tried gfc_replace_expr in an earlier patch.  It did not
work.





- it still fails on the following code, because the traversal
   of the refs is incomplete / wrong:

program foo
implicit none
complex   :: cmp(3)
real, pointer :: pp(:)
class(*), allocatable :: uu(:)
type t
   real :: re
   real :: im
end type t
type u
   type(t) :: tt(3)
end type u
type(u) :: cc

cmp = (3.45,6.78)
cc% tt% re = cmp% re
cc% tt% im = cmp% im
allocate (pp, source = cc% tt% im)   ! ICE


cc%tt%im isn't a complex-part-ref, so this seems to
be a different (maybe related) issue.  Does the code
compile with 'source = (cc%tt%im)'?  If so, perhaps,
detecting a component reference and doing the simply
wrapping with parentheses can be done.


Yes, that's why I tried to make up the above example.
I think %re and %im are not too special, they work
here pretty much like component refs elsewhere.




print *, pp
allocate (uu, source = cc% tt% im)   ! ICE


Ditto.  Not to mention I know nothing about the implementation
of CLASS in gfortran.



You can ignore this one for now.  It works if one places
parens around the source expr as for the other cases.

Harald




Re: [PATCH] Fix fortran/PR114024

2024-02-21 Thread Steve Kargl
On Wed, Feb 21, 2024 at 09:28:16PM +0100, Harald Anlauf wrote:
> On 2/21/24 20:41, Jerry D wrote:
> > On 2/21/24 10:30 AM, Steve Kargl wrote:
> > > I have attached a patch to PR114024, see
> > > 
> > > https://gcc.gnu.org/pipermail/gcc-bugs/2024-February/854651.html
> > > 
> > > The patch contains a new testcase and passes regression
> > > testing on x86_64-*-freebsd.  Could someone castr an eye
> > > over the patch and commit it?
> > > 
> > 
> > Hi Steve,
> > 
> > I looked it over and looks reasonable.  I will try to apply it next few
> > days and test here. If OK, I will commit.
> > 
> > Jerry
> > 
> 
> Actually the patch has two issues:
> 
> - a minor one: a new front-end memleak which can be avoided by
>   using either gfc_replace_expr (see its other uses)
>   Hint: try valgrind on f951

Unfortunately, valgrind does not work on AMD FX-8350 cpu.
memleak vs ICE.  I think I'll take one over the other.
Probably need to free code->expr3 before the copy.
I tried gfc_replace_expr in an earlier patch.  It did not
work. 

> - it still fails on the following code, because the traversal
>   of the refs is incomplete / wrong:
> 
> program foo
>implicit none
>complex   :: cmp(3)
>real, pointer :: pp(:)
>class(*), allocatable :: uu(:)
>type t
>   real :: re
>   real :: im
>end type t
>type u
>   type(t) :: tt(3)
>end type u
>type(u) :: cc
> 
>cmp = (3.45,6.78)
>cc% tt% re = cmp% re
>cc% tt% im = cmp% im
>allocate (pp, source = cc% tt% im)   ! ICE

cc%tt%im isn't a complex-part-ref, so this seems to
be a different (maybe related) issue.  Does the code
compile with 'source = (cc%tt%im)'?  If so, perhaps,
detecting a component reference and doing the simply
wrapping with parentheses can be done.

>print *, pp
>allocate (uu, source = cc% tt% im)   ! ICE

Ditto.  Not to mention I know nothing about the implementation
of CLASS in gfortran.

-- 
Steve


Re: [PATCH 5/5] bpf: renamed coreout.* files to btfext-out.*.

2024-02-21 Thread rep . dot . nop
On 21 February 2024 18:16:30 CET, David Faust  wrote:
>
>
>On 2/20/24 02:24, Cupertino Miranda wrote:
>> gcc/ChangeLog:
>>  * config.gcc (target_gtfiles): changed coreout to btfext-out.
>>  (extra_objs): changed coreout to btfext-out.
>
>I think these entries should start with a capital letter ("Changed...").

Present tense in ChangeLog,  as mandated by the standard: /ed/s/ed//g

thanks,

>
>>  * config/bpf/coreout.cc: Renamed to btfext-out.cc
>>  * config/bpf/btfext-out.cc: Added
>>  * config/bpf/coreout.h: Renamed to btfext-out.h
>>  * config/bpf/btfext-out.h: Added
>>  * config/bpf/core-builtins.cc: Changed include
>>  * config/bpf/core-builtins.h: Changed include
>>  * config/bpf/t-bpf: Renamed file.
>



Re: [PATCH] Fix fortran/PR114024

2024-02-21 Thread Jerry D

On 2/21/24 12:28 PM, Harald Anlauf wrote:

On 2/21/24 20:41, Jerry D wrote:

On 2/21/24 10:30 AM, Steve Kargl wrote:

I have attached a patch to PR114024, see

https://gcc.gnu.org/pipermail/gcc-bugs/2024-February/854651.html

The patch contains a new testcase and passes regression
testing on x86_64-*-freebsd.  Could someone castr an eye
over the patch and commit it?



Hi Steve,

I looked it over and looks reasonable.  I will try to apply it next few
days and test here. If OK, I will commit.

Jerry



Actually the patch has two issues:

- a minor one: a new front-end memleak which can be avoided by
   using either gfc_replace_expr (see its other uses)
   Hint: try valgrind on f951


Yes, I am learning to do that.



- it still fails on the following code, because the traversal
   of the refs is incomplete / wrong:

program foo
    implicit none
    complex   :: cmp(3)
    real, pointer :: pp(:)
    class(*), allocatable :: uu(:)
    type t
   real :: re
   real :: im
    end type t
    type u
   type(t) :: tt(3)
    end type u
    type(u) :: cc

    cmp = (3.45,6.78)
    cc% tt% re = cmp% re
    cc% tt% im = cmp% im
    allocate (pp, source = cc% tt% im)   ! ICE
    print *, pp
    allocate (uu, source = cc% tt% im)   ! ICE
end

This still crashes for me for the indicated cases.

Harald



Good catch.  I will hold off until that is figured out.

Jerry


Re: [PATCH] Fix fortran/PR114024

2024-02-21 Thread Harald Anlauf

On 2/21/24 20:41, Jerry D wrote:

On 2/21/24 10:30 AM, Steve Kargl wrote:

I have attached a patch to PR114024, see

https://gcc.gnu.org/pipermail/gcc-bugs/2024-February/854651.html

The patch contains a new testcase and passes regression
testing on x86_64-*-freebsd.  Could someone castr an eye
over the patch and commit it?



Hi Steve,

I looked it over and looks reasonable.  I will try to apply it next few 
days and test here. If OK, I will commit.


Jerry



Actually the patch has two issues:

- a minor one: a new front-end memleak which can be avoided by
  using either gfc_replace_expr (see its other uses)
  Hint: try valgrind on f951

- it still fails on the following code, because the traversal
  of the refs is incomplete / wrong:

program foo
   implicit none
   complex   :: cmp(3)
   real, pointer :: pp(:)
   class(*), allocatable :: uu(:)
   type t
  real :: re
  real :: im
   end type t
   type u
  type(t) :: tt(3)
   end type u
   type(u) :: cc

   cmp = (3.45,6.78)
   cc% tt% re = cmp% re
   cc% tt% im = cmp% im
   allocate (pp, source = cc% tt% im)   ! ICE
   print *, pp
   allocate (uu, source = cc% tt% im)   ! ICE
end

This still crashes for me for the indicated cases.

Harald




[committed] doc: Fix typos in -Wmismatched-dealloc docs

2024-02-21 Thread Jonathan Wakely
Pushed to trunk as obvious.

-- >8 --

gcc/ChangeLog:

* doc/invoke.texi (Warning Options): Fix typos.
---
 gcc/doc/invoke.texi | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index d75b28484bb..b4e4ee9fb81 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -7262,7 +7262,7 @@ when non-existent profile data is justified.
 @item -Wmismatched-dealloc
 
 Warn for calls to deallocation functions with pointer arguments returned
-from from allocations functions for which the former isn't a suitable
+from allocation functions for which the former isn't a suitable
 deallocator.  A pair of functions can be associated as matching allocators
 and deallocators by use of attribute @code{malloc}.  Unless disabled by
 the @option{-fno-builtin} option the standard functions @code{calloc},
-- 
2.43.0



Re: [PATCH] Fix fortran/PR114024

2024-02-21 Thread Jerry D

On 2/21/24 10:30 AM, Steve Kargl wrote:

I have attached a patch to PR114024, see

https://gcc.gnu.org/pipermail/gcc-bugs/2024-February/854651.html

The patch contains a new testcase and passes regression
testing on x86_64-*-freebsd.  Could someone castr an eye
over the patch and commit it?



Hi Steve,

I looked it over and looks reasonable.  I will try to apply it next few 
days and test here. If OK, I will commit.


Jerry


Re: [PATCH] libgccjit: Fix get_size of size_t

2024-02-21 Thread Antoni Boucher
On Thu, 2023-12-07 at 19:57 -0500, David Malcolm wrote:
> On Thu, 2023-12-07 at 17:26 -0500, Antoni Boucher wrote:
> > Hi.
> > This patch fixes getting the size of size_t (bug 112910).
> > 
> > There's one issue with this patch: like every other feature that
> > checks
> > for target-specific stuff, it requires a compilation before
> > actually
> > fetching the size of the type.
> > Which means that getting the size before a compilation might be
> > wrong
> > (and I actually believe is wrong on x86-64).
> > 
> > I was wondering if we should always implicitely do the first
> > compilation to gather the correct info: this would fix this issue
> > and
> > all the others that we have due to that.
> > I'm not sure what would be the performance implication.
> 
> Maybe introduce a new class target_info which contains all the
> information we might want to find via a compilation, and have the
> top-
> level recording::context have a pointer to it, which starts as
> nullptr,
> but can be populated on-demand the first time something needs it?

That would mean that we'll need to populate it for every top-level
context, right? Would the idea be that we should then use child
contexts to have the proper information filled?
If so, how is this different than just compiling two contexts like what
I currently do?
This would also mean that we'll do an implicit compilation whenever we
use an API that needs this info, right? Wouldn't that be unexpected?

Thanks for the idea.

> 
> > 
> > Another solution that I have been thinking about for a while now
> > would
> > be to have another frontend libgccaot (I don't like that name),
> > which
> > is like libgccjit but removes the JIT part so that we get access to
> > the
> > target stuff directly and would remove the need for having a
> > seperation
> > between recording and playback as far as I understand.
> > That's a long-term solution, but I wanted to share the idea now and
> > gather your thoughts on that.
> 
> FWIW the initial version of libgccjit didn't have a split between
> recording and playback; instead the client code had to pass in a
> callback to call into the various API functions (creating tree
> nodes).
> See:
> https://gcc.gnu.org/legacy-ml/gcc-patches/2013-10/msg00228.html
> 
> Dave
> 



Re: [PATCH V4 4/5] RISC-V: Quick and simple fixes to testcases that break due to reordering

2024-02-21 Thread Robin Dapp


> For calling-convention-*.c, LGTM but one nit about change log. Take
> **Update** here may make others not easy to learn what you did about
> the file. You can say similar to "Rearrange and adjust the
> asm-checker times" or likewise. Of course, you can refine the
> changelog when commit.
>> * gcc.target/riscv/rvv/autovec/vls/calling-convention-1.c: update
> 

Yes, agreed,  changes LGTM but please refine the commit message
slightly.  The first letter should also be capitalized I believe.

The rest of the is already ACK'ed so I believe it's good to go now.
I didn't pay a lot of attention to the other commit messages.
In case they need refining you can do that still.  

Regards
 Robin



Re: [PATCH V4 1/5] RISC-V: Add non-vector types to dfa pipelines

2024-02-21 Thread Robin Dapp
OK.

Regards
 Robin



Re: [PATCH v1 05/13] Reuse MinGW from i386 for AArch64

2024-02-21 Thread Andrew Pinski
On Wed, Feb 21, 2024 at 10:38 AM Evgeny Karpov
 wrote:
>
>

In config/i386/winnt.cc there are many x86_64 comments and many
function names that use i386 in them? When moving it seems like better
to rename them and remove references to 86.
I don't see any changes that rename the functions or comments there.

Thanks,
Andrew


[PATCH v1 13/13] Add aarch64-w64-mingw32 target to libgcc

2024-02-21 Thread Evgeny Karpov

From fa02812c6fb39fd409a27cd09664530f2fabd9e3 Mon Sep 17 00:00:00 2001
From: Zac Walker 
Date: Mon, 12 Feb 2024 15:22:47 +0100
Subject: [PATCH v1 13/13] Add aarch64-w64-mingw32 target to libgcc

Reuse MinGW definitions from i386 for libgcc. Move reused files to
libgcc/config/mingw folder.

libgcc/ChangeLog:

* config.host: Add aarch64-w64-mingw32 target. Adjust targets
after moving MinGW files.
* config/i386/t-gthr-win32: Moved to...
* config/mingw/t-gthr-win32: ...here.
* config/i386/t-mingw-pthread: Moved to...
* config/mingw/t-mingw-pthread: ...here.
* config/aarch64/t-no-eh: New file. EH is not yet implemented for
the target, and the default definition should be disabled.
---
 libgcc/config.host| 23 +++
 libgcc/config/aarch64/t-no-eh |  2 ++
 libgcc/config/{i386 => mingw}/t-gthr-win32|  0
 libgcc/config/{i386 => mingw}/t-mingw-pthread |  0
 4 files changed, 21 insertions(+), 4 deletions(-)
 create mode 100644 libgcc/config/aarch64/t-no-eh
 rename libgcc/config/{i386 => mingw}/t-gthr-win32 (100%)
 rename libgcc/config/{i386 => mingw}/t-mingw-pthread (100%)

diff --git a/libgcc/config.host b/libgcc/config.host
index 59a42d3a01f..3396a84893f 100644
--- a/libgcc/config.host
+++ b/libgcc/config.host
@@ -456,6 +456,21 @@ aarch64*-*-vxworks7*)
tmake_file="${tmake_file} ${cpu_type}/t-softfp t-softfp t-crtfm"
tmake_file="${tmake_file} t-dfprules"
;;
+aarch64-*-mingw*)
+   case ${target_thread_file} in
+ win32)
+   tmake_thr_file="mingw/t-gthr-win32"
+   ;;
+ posix)
+   tmake_thr_file="mingw/t-mingw-pthread"
+   ;;
+   esac
+   tmake_file="${tmake_file} ${cpu_type}/t-no-eh ${tmake_thr_file}"
+   tmake_file="${tmake_file} t-dfprules"
+   tmake_file="${tmake_file} ${cpu_type}/t-aarch64"
+   tmake_file="${tmake_file} ${cpu_type}/t-lse"
+   tmake_file="${tmake_file} ${cpu_type}/t-softfp t-softfp t-crtfm"
+   ;;
 alpha*-*-linux*)
tmake_file="${tmake_file} alpha/t-alpha alpha/t-ieee t-crtfm 
alpha/t-linux"
extra_parts="$extra_parts crtfastmath.o"
@@ -874,10 +889,10 @@ i[34567]86-*-mingw*)
fi
case ${target_thread_file} in
  win32)
-   tmake_thr_file="i386/t-gthr-win32"
+   tmake_thr_file="mingw/t-gthr-win32"
;;
  posix)
-   tmake_thr_file="i386/t-mingw-pthread"
+   tmake_thr_file="mingw/t-mingw-pthread"
;;
  mcf)
tmake_thr_file="i386/t-mingw-mcfgthread"
@@ -901,10 +916,10 @@ i[34567]86-*-mingw*)
 x86_64-*-mingw*)
case ${target_thread_file} in
  win32)
-   tmake_thr_file="i386/t-gthr-win32"
+   tmake_thr_file="mingw/t-gthr-win32"
;;
  posix)
-   tmake_thr_file="i386/t-mingw-pthread"
+   tmake_thr_file="mingw/t-mingw-pthread"
;;
  mcf)
tmake_thr_file="i386/t-mingw-mcfgthread"
diff --git a/libgcc/config/aarch64/t-no-eh b/libgcc/config/aarch64/t-no-eh
new file mode 100644
index 000..1802339a583
--- /dev/null
+++ b/libgcc/config/aarch64/t-no-eh
@@ -0,0 +1,2 @@
+# Not using EH
+LIB2ADDEH =
diff --git a/libgcc/config/i386/t-gthr-win32 b/libgcc/config/mingw/t-gthr-win32
similarity index 100%
rename from libgcc/config/i386/t-gthr-win32
rename to libgcc/config/mingw/t-gthr-win32
diff --git a/libgcc/config/i386/t-mingw-pthread 
b/libgcc/config/mingw/t-mingw-pthread
similarity index 100%
rename from libgcc/config/i386/t-mingw-pthread
rename to libgcc/config/mingw/t-mingw-pthread
-- 
2.25.1



[PATCH v1 12/13] aarch64: Add aarch64-w64-mingw32 target to libatomic

2024-02-21 Thread Evgeny Karpov

From d6de6d367f109e5e0b3b0a228e4f3411b53d8e7f Mon Sep 17 00:00:00 2001
From: Zac Walker 
Date: Mon, 8 Jan 2024 22:19:45 +0100
Subject: [PATCH v1 12/13] aarch64: Add aarch64-w64-mingw32 target to libatomic

libatomic/ChangeLog:

* configure.tgt: Add aarch64-w64-mingw32 target.
---
 libatomic/configure.tgt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libatomic/configure.tgt b/libatomic/configure.tgt
index 4237f283fe4..eea4c25e061 100644
--- a/libatomic/configure.tgt
+++ b/libatomic/configure.tgt
@@ -44,7 +44,7 @@ case "${target_cpu}" in
   aarch64*)
ARCH=aarch64
case "${target}" in
-   aarch64*-*-linux*)
+   aarch64*-*-linux* | aarch64*-*-mingw*)
if test -n "$enable_aarch64_lse"; then
try_ifunc=yes
fi
-- 
2.25.1



[PATCH v1 11/13] aarch64: Build and add objects for Cygwin and MinGW for AArch64

2024-02-21 Thread Evgeny Karpov

From c00d64291446f722dd2268aaca6ca156e9888fe7 Mon Sep 17 00:00:00 2001
From: Zac Walker 
Date: Tue, 20 Feb 2024 13:55:51 +0100
Subject: [PATCH v1 11/13] aarch64: Build and add objects for Cygwin and MinGW
 for AArch64

gcc/ChangeLog:

* config.gcc: Build and add objects for Cygwin and MinGW. Add Cygwin
and MinGW options to the target.
---
 gcc/config.gcc | 5 +
 1 file changed, 5 insertions(+)

diff --git a/gcc/config.gcc b/gcc/config.gcc
index 96e3508eb30..3e839eee369 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -1270,6 +1270,11 @@ aarch64*-*-mingw*)
tm_file="${tm_file} mingw/mingw-stdint.h"
tmake_file="${tmake_file} aarch64/t-aarch64"
target_gtfiles="$target_gtfiles \$(srcdir)/config/mingw/winnt.cc"
+   extra_options="${extra_options} mingw/cygming.opt mingw/mingw.opt"
+   extra_objs="${extra_objs} winnt.o"
+   c_target_objs="${c_target_objs} msformat-c.o"
+   d_target_objs="${d_target_objs} winnt-d.o"
+   tmake_file="${tmake_file} mingw/t-cygming"
case ${enable_threads} in
  "" | yes | win32)
thread_file='win32'
-- 
2.25.1



[PATCH v1 10/13] Rename "x86 Windows Options" to "Cygwin and MinGW Options"

2024-02-21 Thread Evgeny Karpov

From 7d27cda54a56e93ba63c5cf4d47e5c865bdae525 Mon Sep 17 00:00:00 2001
From: Zac Walker 
Date: Tue, 20 Feb 2024 15:45:53 +0100
Subject: [PATCH v1 10/13] Rename "x86 Windows Options" to "Cygwin and MinGW
 Options"

Rename "x86 Windows Options" to "Cygwin and MinGW Options".
It will be used also for AArch64.

gcc/ChangeLog:

* config/i386/mingw-w64.opt.urls: Rename options' name and
regenerate option URLs.
* config/lynx.opt.urls: Likewise.
* config/mingw/cygming.opt.urls: Likewise.
* config/mingw/mingw.opt.urls: Likewise.
* doc/invoke.texi: Likewise.
---
 gcc/config/i386/mingw-w64.opt.urls |  2 +-
 gcc/config/lynx.opt.urls   |  2 +-
 gcc/config/mingw/cygming.opt.urls  | 18 +-
 gcc/config/mingw/mingw.opt.urls|  2 +-
 gcc/doc/invoke.texi| 12 ++--
 5 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/gcc/config/i386/mingw-w64.opt.urls 
b/gcc/config/i386/mingw-w64.opt.urls
index 6bb53ef29b2..5cceba1d1a1 100644
--- a/gcc/config/i386/mingw-w64.opt.urls
+++ b/gcc/config/i386/mingw-w64.opt.urls
@@ -1,5 +1,5 @@
 ; Autogenerated by regenerate-opt-urls.py from gcc/config/i386/mingw-w64.opt 
and generated HTML
 
 municode
-UrlSuffix(gcc/x86-Windows-Options.html#index-municode)
+UrlSuffix(gcc/Cygwin-and-MinGW-Options.html#index-municode)
 
diff --git a/gcc/config/lynx.opt.urls b/gcc/config/lynx.opt.urls
index 63e7b9c4b33..b547138f7ff 100644
--- a/gcc/config/lynx.opt.urls
+++ b/gcc/config/lynx.opt.urls
@@ -1,5 +1,5 @@
 ; Autogenerated by regenerate-opt-urls.py from gcc/config/lynx.opt and 
generated HTML
 
 mthreads
-UrlSuffix(gcc/x86-Windows-Options.html#index-mthreads-1)
+UrlSuffix(gcc/Cygwin-and-MinGW-Options.html#index-mthreads-1)
 
diff --git a/gcc/config/mingw/cygming.opt.urls 
b/gcc/config/mingw/cygming.opt.urls
index 87799befe3c..c624e22e442 100644
--- a/gcc/config/mingw/cygming.opt.urls
+++ b/gcc/config/mingw/cygming.opt.urls
@@ -1,30 +1,30 @@
 ; Autogenerated by regenerate-opt-urls.py from gcc/config/i386/cygming.opt and 
generated HTML
 
 mconsole
-UrlSuffix(gcc/x86-Windows-Options.html#index-mconsole)
+UrlSuffix(gcc/Cygwin-and-MinGW-Options.html#index-mconsole)
 
 mdll
-UrlSuffix(gcc/x86-Windows-Options.html#index-mdll)
+UrlSuffix(gcc/Cygwin-and-MinGW-Options.html#index-mdll)
 
 mnop-fun-dllimport
-UrlSuffix(gcc/x86-Windows-Options.html#index-mnop-fun-dllimport)
+UrlSuffix(gcc/Cygwin-and-MinGW-Options.html#index-mnop-fun-dllimport)
 
 ; skipping UrlSuffix for 'mthreads' due to multiple URLs:
+;   duplicate: 'gcc/Cygwin-and-MinGW-Options.html#index-mthreads-1'
 ;   duplicate: 'gcc/x86-Options.html#index-mthreads'
-;   duplicate: 'gcc/x86-Windows-Options.html#index-mthreads-1'
 
 mwin32
-UrlSuffix(gcc/x86-Windows-Options.html#index-mwin32)
+UrlSuffix(gcc/Cygwin-and-MinGW-Options.html#index-mwin32)
 
 mwindows
-UrlSuffix(gcc/x86-Windows-Options.html#index-mwindows)
+UrlSuffix(gcc/Cygwin-and-MinGW-Options.html#index-mwindows)
 
 mpe-aligned-commons
-UrlSuffix(gcc/x86-Windows-Options.html#index-mpe-aligned-commons)
+UrlSuffix(gcc/Cygwin-and-MinGW-Options.html#index-mpe-aligned-commons)
 
 fset-stack-executable
-UrlSuffix(gcc/x86-Windows-Options.html#index-fno-set-stack-executable)
+UrlSuffix(gcc/Cygwin-and-MinGW-Options.html#index-fno-set-stack-executable)
 
 fwritable-relocated-rdata
-UrlSuffix(gcc/x86-Windows-Options.html#index-fno-writable-relocated-rdata)
+UrlSuffix(gcc/Cygwin-and-MinGW-Options.html#index-fno-writable-relocated-rdata)
 
diff --git a/gcc/config/mingw/mingw.opt.urls b/gcc/config/mingw/mingw.opt.urls
index 2cbbaadf310..f8ee5be6a53 100644
--- a/gcc/config/mingw/mingw.opt.urls
+++ b/gcc/config/mingw/mingw.opt.urls
@@ -1,7 +1,7 @@
 ; Autogenerated by regenerate-opt-urls.py from gcc/config/i386/mingw.opt and 
generated HTML
 
 mcrtdll=
-UrlSuffix(gcc/x86-Windows-Options.html#index-mcrtdll)
+UrlSuffix(gcc/Cygwin-and-MinGW-Options.html#index-mcrtdll)
 
 ; skipping UrlSuffix for 'pthread' due to multiple URLs:
 ;   duplicate: 'gcc/Link-Options.html#index-pthread-1'
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index e18886e0ac7..eaa1e3dc6d2 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -1492,7 +1492,7 @@ See RS/6000 and PowerPC Options.
 -mindirect-branch-cs-prefix -mneeded -mno-direct-extern-access
 -munroll-only-small-loops -mlam=@var{choice}}
 
-@emph{x86 Windows Options}
+@emph{Cygwin and MinGW Options}
 @gccoptlist{-mconsole  -mcrtdll=@var{library}  -mdll
 -mnop-fun-dllimport  -mthread
 -municode  -mwin32  -mwindows  -fno-set-stack-executable}
@@ -20970,6 +20970,7 @@ platform.
 * C6X Options::
 * CRIS Options::
 * C-SKY Options::
+* Cygwin and MinGW Options::
 * Darwin Options::
 * DEC Alpha Options::
 * eBPF Options::
@@ -21014,7 +21015,6 @@ platform.
 * VMS Options::
 * VxWorks Options::
 * x86 Options::
-* x86 Windows Options::
 * Xstormy16 Options::
 * Xtensa Options::
 * zSeries Options::
@@ -36094,10 +36094,10 @@ positions 62:48 can be used for metadata; 

[PATCH v1 08/13] aarch64: Add Cygwin and MinGW environments for AArch64

2024-02-21 Thread Evgeny Karpov

From 1ea6efa6f88d131884ecef21c4b5d2ecbab14ea7 Mon Sep 17 00:00:00 2001
From: Zac Walker 
Date: Tue, 20 Feb 2024 18:06:36 +0100
Subject: [PATCH v1 08/13] aarch64: Add Cygwin and MinGW environments for
 AArch64

Define Cygwin and MinGW environment such as types, SEH definitions,
shared libraries, etc.

gcc/ChangeLog:

* config.gcc: Add Cygwin and MinGW difinitions.
* config/aarch64/aarch64-protos.h
(mingw_pe_maybe_record_exported_symbol): Declare functions
which are used in Cygwin and MinGW environment.
(mingw_pe_section_type_flags): Likewise.
(mingw_pe_unique_section): Likewise.
(mingw_pe_encode_section_info): Likewise.
* config/aarch64/cygming.h: New file.
---
 gcc/config.gcc  |   1 +
 gcc/config/aarch64/aarch64-protos.h |   5 +
 gcc/config/aarch64/cygming.h| 178 
 3 files changed, 184 insertions(+)
 create mode 100644 gcc/config/aarch64/cygming.h

diff --git a/gcc/config.gcc b/gcc/config.gcc
index 498ee702607..96e3508eb30 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -1265,6 +1265,7 @@ aarch64*-*-linux*)
;;
 aarch64*-*-mingw*)
tm_file="${tm_file} aarch64/aarch64-coff.h"
+   tm_file="${tm_file} aarch64/cygming.h"
tm_file="${tm_file} mingw/mingw32.h"
tm_file="${tm_file} mingw/mingw-stdint.h"
tmake_file="${tmake_file} aarch64/t-aarch64"
diff --git a/gcc/config/aarch64/aarch64-protos.h 
b/gcc/config/aarch64/aarch64-protos.h
index a0b142e0b94..6d85452b0f6 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -1109,6 +1109,11 @@ extern void aarch64_output_patchable_area (unsigned int, 
bool);
 
 extern void aarch64_adjust_reg_alloc_order ();
 
+extern void mingw_pe_maybe_record_exported_symbol (tree, const char *, int);
+extern unsigned int mingw_pe_section_type_flags (tree, const char *, int);
+extern void mingw_pe_unique_section (tree, int);
+extern void mingw_pe_encode_section_info (tree, rtx, int);
+
 bool aarch64_optimize_mode_switching (aarch64_mode_entity);
 void aarch64_restore_za (rtx);
 
diff --git a/gcc/config/aarch64/cygming.h b/gcc/config/aarch64/cygming.h
new file mode 100644
index 000..cf47184eb66
--- /dev/null
+++ b/gcc/config/aarch64/cygming.h
@@ -0,0 +1,178 @@
+/* Operating system specific defines to be used when targeting GCC for
+   hosting on Windows32, using a Unix style C library and tools.
+   Copyright (C) 1995-2024 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+.  */
+
+#ifndef GCC_AARCH64_CYGMING_H
+#define GCC_AARCH64_CYGMING_H
+
+#undef PREFERRED_DEBUGGING_TYPE
+#define PREFERRED_DEBUGGING_TYPE DINFO_TYPE_NONE
+
+#define FASTCALL_PREFIX '@'
+
+#define print_reg(rtx, code, file)
+
+#define SYMBOL_FLAG_DLLIMPORT 0
+#define SYMBOL_FLAG_DLLEXPORT 0
+
+#define SYMBOL_REF_DLLEXPORT_P(X) \
+   ((SYMBOL_REF_FLAGS (X) & SYMBOL_FLAG_DLLEXPORT) != 0)
+
+#undef TARGET_SEH
+#define TARGET_SEH 0
+
+#define SSE_REGNO_P(N) 0
+#define GENERAL_REGNO_P(N) 0
+#define SEH_MAX_FRAME_SIZE 0
+
+#undef DEFAULT_ABI
+#define DEFAULT_ABI MS_ABI
+
+#undef TARGET_PECOFF
+#define TARGET_PECOFF 1
+
+#include 
+#ifdef __MINGW32__
+#include 
+#endif
+
+extern void mingw_pe_asm_named_section (const char *, unsigned int, tree);
+extern void mingw_pe_declare_function_type (FILE *file, const char *name,
+   int pub);
+
+#define TARGET_ASM_NAMED_SECTION  mingw_pe_asm_named_section
+
+/* Select attributes for named sections.  */
+#define TARGET_SECTION_TYPE_FLAGS  mingw_pe_section_type_flags
+
+#define TARGET_ASM_UNIQUE_SECTION mingw_pe_unique_section
+#define TARGET_ENCODE_SECTION_INFO  mingw_pe_encode_section_info
+
+/* Declare the type properly for any external libcall.  */
+#define ASM_OUTPUT_EXTERNAL_LIBCALL(FILE, FUN) \
+  mingw_pe_declare_function_type (FILE, XSTR (FUN, 0), 1)
+
+#define TARGET_OS_CPP_BUILTINS()   \
+  do   \
+{  \
+  builtin_define ("__MSVCRT__");   \
+  builtin_define ("__MINGW32__");  \
+  builtin_define ("_WIN32");   \
+  builtin_define_std ("WIN32");

[PATCH v1 09/13] aarch64: Add SEH to machine_function

2024-02-21 Thread Evgeny Karpov

From e82f58dc1ef9405a86f6e77c29fe2359128a22aa Mon Sep 17 00:00:00 2001
From: Zac Walker 
Date: Tue, 20 Feb 2024 18:10:08 +0100
Subject: [PATCH v1 09/13] aarch64: Add SEH to machine_function

SEH is not enabled in aarch64-w64-mingw32 target yet. However, it is
needed to be declared in machine_function for reusing winnt.cc.

gcc/ChangeLog:

* config/aarch64/aarch64.h (struct seh_frame_state): Declare SEH
structure in machine_function.
(GTY): Add SEH field.
---
 gcc/config/aarch64/aarch64.h | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index 36916e7a97d..7f025e74a66 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -1055,6 +1055,9 @@ struct GTY (()) aarch64_frame
   bool is_scs_enabled;
 };
 
+/* Private to winnt.cc.  */
+struct seh_frame_state;
+
 #ifdef hash_set_h
 typedef struct GTY (()) machine_function
 {
@@ -1095,6 +1098,9 @@ typedef struct GTY (()) machine_function
  still exists and still fulfils its original purpose. the same register
  can be reused by other code.  */
   rtx_insn *advsimd_zero_insn;
+
+  /* During SEH output, this is non-null.  */
+  struct seh_frame_state * GTY ((skip (""))) seh;
 } machine_function;
 #endif
 #endif
-- 
2.25.1



[PATCH v1 05/13] Reuse MinGW from i386 for AArch64

2024-02-21 Thread Evgeny Karpov

From 408ca45f7c5c6e58208a7b8004f764dc0c1afdd3 Mon Sep 17 00:00:00 2001
From: Zac Walker 
Date: Tue, 20 Feb 2024 18:15:27 +0100
Subject: [PATCH v1 05/13] Reuse MinGW from i386 for AArch64

This patch creates a new config/mingw directory to share MinGW
related definitions, and moves there the corresponding existing files
from config/i386. It also makes uses of them on the new
aarch64-w64-mingw32 target.

gcc/ChangeLog:

* config.gcc: Adjust targets after moving MinGW related files
from i386 to mingw folder.
* config/i386/cygming.opt: Moved to...
* config/mingw/cygming.opt: ...here.
* config/i386/cygming.opt.urls: Moved to...
* config/mingw/cygming.opt.urls: ...here.
* config/i386/cygwin-d.cc: Moved to...
* config/mingw/cygwin-d.cc: ...here.
* config/i386/mingw-stdint.h: Moved to...
* config/mingw/mingw-stdint.h: ...here.
* config/i386/mingw.opt: Moved to...
* config/mingw/mingw.opt: ...here.
* config/i386/mingw.opt.urls: Moved to...
* config/mingw/mingw.opt.urls: ...here.
* config/i386/mingw32.h: Moved to...
* config/mingw/mingw32.h: ...here.
* config/i386/msformat-c.cc: Moved to...
* config/mingw/msformat-c.cc: ...here.
* config/i386/t-cygming: Moved to...
* config/mingw/t-cygming: ...here and updated.
* config/i386/winnt-cxx.cc: Moved to...
* config/mingw/winnt-cxx.cc: ...here.
* config/i386/winnt-d.cc: Moved to...
* config/mingw/winnt-d.cc: ...here.
* config/i386/winnt-stubs.cc: Moved to...
* config/mingw/winnt-stubs.cc: ...here.
* config/i386/winnt.cc: Moved to...
* config/mingw/winnt.cc: ...here.
---
 gcc/config.gcc  | 25 -
 gcc/config/{i386 => mingw}/cygming.opt  |  0
 gcc/config/{i386 => mingw}/cygming.opt.urls |  0
 gcc/config/{i386 => mingw}/cygwin-d.cc  |  0
 gcc/config/{i386 => mingw}/mingw-stdint.h   |  0
 gcc/config/{i386 => mingw}/mingw.opt|  0
 gcc/config/{i386 => mingw}/mingw.opt.urls   |  0
 gcc/config/{i386 => mingw}/mingw32.h|  0
 gcc/config/{i386 => mingw}/msformat-c.cc|  0
 gcc/config/{i386 => mingw}/t-cygming| 23 ++-
 gcc/config/{i386 => mingw}/winnt-cxx.cc |  0
 gcc/config/{i386 => mingw}/winnt-d.cc   |  0
 gcc/config/{i386 => mingw}/winnt-stubs.cc   |  0
 gcc/config/{i386 => mingw}/winnt.cc |  0
 14 files changed, 27 insertions(+), 21 deletions(-)
 rename gcc/config/{i386 => mingw}/cygming.opt (100%)
 rename gcc/config/{i386 => mingw}/cygming.opt.urls (100%)
 rename gcc/config/{i386 => mingw}/cygwin-d.cc (100%)
 rename gcc/config/{i386 => mingw}/mingw-stdint.h (100%)
 rename gcc/config/{i386 => mingw}/mingw.opt (100%)
 rename gcc/config/{i386 => mingw}/mingw.opt.urls (100%)
 rename gcc/config/{i386 => mingw}/mingw32.h (100%)
 rename gcc/config/{i386 => mingw}/msformat-c.cc (100%)
 rename gcc/config/{i386 => mingw}/t-cygming (73%)
 rename gcc/config/{i386 => mingw}/winnt-cxx.cc (100%)
 rename gcc/config/{i386 => mingw}/winnt-d.cc (100%)
 rename gcc/config/{i386 => mingw}/winnt-stubs.cc (100%)
 rename gcc/config/{i386 => mingw}/winnt.cc (100%)

diff --git a/gcc/config.gcc b/gcc/config.gcc
index 34c7be72fb6..498ee702607 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -1265,7 +1265,10 @@ aarch64*-*-linux*)
;;
 aarch64*-*-mingw*)
tm_file="${tm_file} aarch64/aarch64-coff.h"
+   tm_file="${tm_file} mingw/mingw32.h"
+   tm_file="${tm_file} mingw/mingw-stdint.h"
tmake_file="${tmake_file} aarch64/t-aarch64"
+   target_gtfiles="$target_gtfiles \$(srcdir)/config/mingw/winnt.cc"
case ${enable_threads} in
  "" | yes | win32)
thread_file='win32'
@@ -2161,9 +2164,9 @@ i[4567]86-wrs-vxworks*|x86_64-wrs-vxworks7*)
 i[34567]86-*-cygwin*)
tm_file="${tm_file} i386/unix.h i386/bsd.h i386/gas.h i386/cygming.h 
i386/cygwin.h i386/cygwin-stdint.h"
xm_file=i386/xm-cygwin.h
-   tmake_file="${tmake_file} i386/t-cygming t-slibgcc"
-   target_gtfiles="$target_gtfiles \$(srcdir)/config/i386/winnt.cc"
-   extra_options="${extra_options} i386/cygming.opt i386/cygwin.opt"
+   tmake_file="${tmake_file} mingw/t-cygming t-slibgcc"
+   target_gtfiles="$target_gtfiles \$(srcdir)/config/mingw/winnt.cc"
+   extra_options="${extra_options} mingw/cygming.opt i386/cygwin.opt"
extra_objs="${extra_objs} winnt.o winnt-stubs.o"
c_target_objs="${c_target_objs} msformat-c.o"
cxx_target_objs="${cxx_target_objs} winnt-cxx.o msformat-c.o"
@@ -2179,9 +2182,9 @@ x86_64-*-cygwin*)
need_64bit_isa=yes
tm_file="${tm_file} i386/unix.h i386/bsd.h i386/gas.h i386/cygming.h 
i386/cygwin.h i386/cygwin-w64.h i386/cygwin-stdint.h"
xm_file=i386/xm-cygwin.h
-   tmake_file="${tmake_file} i386/t-cygming t-slibgcc"
-   target_gtfiles="$target_gtfiles 

[PATCH v1 07/13] Exclude i386 functionality from aarch64 build

2024-02-21 Thread Evgeny Karpov

From 1b7451707709d6745efb52b54b17e798d7375aac Mon Sep 17 00:00:00 2001
From: Zac Walker 
Date: Tue, 20 Feb 2024 17:31:58 +0100
Subject: [PATCH v1 07/13] Exclude i386 functionality from aarch64 build

ix86_get_callcvt is used for i386 build and needs to be excluded to
reuse winnt.cc in aarch64.

gcc/ChangeLog:

* config/mingw/winnt.cc (defined): Use TARGET_ARM64_MS_ABI to
exclude ix86_get_callcvt.
(i386_pe_maybe_mangle_decl_assembler_name): Likewise.
(i386_pe_mangle_decl_assembler_name): Likewise.
---
 gcc/config/mingw/winnt.cc | 8 
 1 file changed, 8 insertions(+)

diff --git a/gcc/config/mingw/winnt.cc b/gcc/config/mingw/winnt.cc
index 1ed383155d0..bd86cfb1156 100644
--- a/gcc/config/mingw/winnt.cc
+++ b/gcc/config/mingw/winnt.cc
@@ -224,6 +224,8 @@ gen_stdcall_or_fastcall_suffix (tree decl, tree id, bool 
fastcall)
   return get_identifier (new_str);
 }
 
+#if !defined (TARGET_ARM64_MS_ABI)
+
 /* Maybe decorate and get a new identifier for the DECL of a stdcall or
fastcall function. The original identifier is supplied in ID. */
 
@@ -250,6 +252,8 @@ i386_pe_maybe_mangle_decl_assembler_name (tree decl, tree 
id)
   return new_id;
 }
 
+#endif
+
 /* Emit an assembler directive to set symbol for DECL visibility to
the visibility type VIS, which must not be VISIBILITY_DEFAULT.
As for PE there is no hidden support in gas, we just warn for
@@ -266,6 +270,8 @@ i386_pe_assemble_visibility (tree decl, int)
  "in this configuration; ignored");
 }
 
+#if !defined (TARGET_ARM64_MS_ABI)
+
 /* This is used as a target hook to modify the DECL_ASSEMBLER_NAME
in the language-independent default hook
langhooks,c:lhd_set_decl_assembler_name ()
@@ -278,6 +284,8 @@ i386_pe_mangle_decl_assembler_name (tree decl, tree id)
   return (new_id ? new_id : id);
 }
 
+#endif
+
 /* This hook behaves the same as varasm.cc/assemble_name(), but
generates the name into memory rather than outputting it to
a file stream.  */
-- 
2.25.1



[PATCH v1 06/13] Rename section and encoding functions from i386 which will be used in aarch64

2024-02-21 Thread Evgeny Karpov

From 69561787206b968b5a86c48ca9c9f9073761ed78 Mon Sep 17 00:00:00 2001
From: Zac Walker 
Date: Tue, 20 Feb 2024 17:22:31 +0100
Subject: [PATCH v1 06/13] Rename section and encoding functions from i386
 which will be used in aarch64

gcc/ChangeLog:

* config/i386/cygming.h (SUBTARGET_ENCODE_SECTION_INFO):
Rename functions in mingw folder which will be reused for
aarch64.
(TARGET_ASM_UNIQUE_SECTION): Likewise.
(TARGET_ASM_NAMED_SECTION): Likewise.
(TARGET_SECTION_TYPE_FLAGS): Likewise.
(ASM_DECLARE_COLD_FUNCTION_NAME): Likewise.
(ASM_OUTPUT_EXTERNAL_LIBCALL): Likewise.
* config/i386/i386-protos.h (i386_pe_unique_section):
Rename into ...
(mingw_pe_unique_section): ... this.
(i386_pe_declare_function_type): Rename into ...
(mingw_pe_declare_function_type): ... this.
(i386_pe_encode_section_info): Rename into ...
(mingw_pe_encode_section_info): ... this.
(i386_pe_maybe_record_exported_symbol): Rename into ...
(mingw_pe_maybe_record_exported_symbol): ... this.
(i386_pe_section_type_flags): Rename into ...
(mingw_pe_section_type_flags): ... this.
(i386_pe_asm_named_section): Rename into ...
(mingw_pe_asm_named_section): ... this.
* config/mingw/winnt.cc (i386_pe_encode_section_info):
Rename into ...
(mingw_pe_encode_section_info): ... this.
(i386_pe_unique_section): Rename into ...
(mingw_pe_unique_section): ... this.
(i386_pe_section_type_flags): Rename into ...
(mingw_pe_section_type_flags): ... this.
(i386_pe_asm_named_section): Rename into ...
(mingw_pe_asm_named_section): ... this.
(i386_pe_asm_output_aligned_decl_common): Likewise.
(i386_pe_declare_function_type): Rename into ...
(mingw_pe_declare_function_type): ... this.
(i386_pe_maybe_record_exported_symbol): Rename into ...
(mingw_pe_maybe_record_exported_symbol): ... this.
(i386_pe_start_function): Likewise.
* varasm.cc (switch_to_comdat_section): Likewise.
---
 gcc/config/i386/cygming.h | 18 +-
 gcc/config/i386/i386-protos.h | 12 ++--
 gcc/config/mingw/winnt.cc | 22 +++---
 gcc/varasm.cc |  2 +-
 4 files changed, 27 insertions(+), 27 deletions(-)

diff --git a/gcc/config/i386/cygming.h b/gcc/config/i386/cygming.h
index 1af5bc380a5..beedf7c398a 100644
--- a/gcc/config/i386/cygming.h
+++ b/gcc/config/i386/cygming.h
@@ -219,7 +219,7 @@ do {
\
section and we need to set DECL_SECTION_NAME so we do that here.
Note that we can be called twice on the same decl.  */
 
-#define SUBTARGET_ENCODE_SECTION_INFO  i386_pe_encode_section_info
+#define SUBTARGET_ENCODE_SECTION_INFO  mingw_pe_encode_section_info
 
 /* Local and global relocs can be placed always into readonly memory
for PE-COFF targets.  */
@@ -235,7 +235,7 @@ do {
\
 #undef ASM_DECLARE_OBJECT_NAME
 #define ASM_DECLARE_OBJECT_NAME(STREAM, NAME, DECL)\
 do {   \
-  i386_pe_maybe_record_exported_symbol (DECL, NAME, 1);\
+  mingw_pe_maybe_record_exported_symbol (DECL, NAME, 1);   \
   ASM_OUTPUT_LABEL ((STREAM), (NAME)); \
 } while (0)
 
@@ -283,16 +283,16 @@ do {  \
 /* Windows uses explicit import from shared libraries.  */
 #define MULTIPLE_SYMBOL_SPACES 1
 
-#define TARGET_ASM_UNIQUE_SECTION i386_pe_unique_section
+#define TARGET_ASM_UNIQUE_SECTION mingw_pe_unique_section
 #define TARGET_ASM_FUNCTION_RODATA_SECTION default_no_function_rodata_section
 
 #define SUPPORTS_ONE_ONLY 1
 
 /* Switch into a generic section.  */
-#define TARGET_ASM_NAMED_SECTION  i386_pe_asm_named_section
+#define TARGET_ASM_NAMED_SECTION  mingw_pe_asm_named_section
 
 /* Select attributes for named sections.  */
-#define TARGET_SECTION_TYPE_FLAGS  i386_pe_section_type_flags
+#define TARGET_SECTION_TYPE_FLAGS  mingw_pe_section_type_flags
 
 /* Write the extra assembler code needed to declare a function
properly.  */
@@ -307,7 +307,7 @@ do {\
 #define ASM_DECLARE_COLD_FUNCTION_NAME(FILE, NAME, DECL)   \
   do   \
 {  \
-  i386_pe_declare_function_type (FILE, NAME, 0);   \
+  mingw_pe_declare_function_type (FILE, NAME, 0);  \
   i386_pe_seh_cold_init (FILE, NAME);  \
   ASM_OUTPUT_LABEL (FILE, NAME);   \
 }  \
@@ -333,7 +333,7 @@ do {\
 
 /* Declare 

[PATCH v1 04/13] aarch64: Add aarch64-w64-mingw32 COFF

2024-02-21 Thread Evgeny Karpov

From 55fd2a63afa9abb3543d714b6f5925efd2682e08 Mon Sep 17 00:00:00 2001
From: Zac Walker 
Date: Wed, 21 Feb 2024 12:20:46 +0100
Subject: [PATCH v1 04/13] aarch64: Add aarch64-w64-mingw32 COFF

Define ASM specific for COFF format on AArch64.

gcc/ChangeLog:

* config.gcc: Add COFF format support definitions.
* config/aarch64/aarch64-coff.h: New file.
---
 gcc/config.gcc|  1 +
 gcc/config/aarch64/aarch64-coff.h | 92 +++
 2 files changed, 93 insertions(+)
 create mode 100644 gcc/config/aarch64/aarch64-coff.h

diff --git a/gcc/config.gcc b/gcc/config.gcc
index 2a9e4c44f50..34c7be72fb6 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -1264,6 +1264,7 @@ aarch64*-*-linux*)
TM_MULTILIB_CONFIG=`echo $TM_MULTILIB_CONFIG | sed 's/^,//'`
;;
 aarch64*-*-mingw*)
+   tm_file="${tm_file} aarch64/aarch64-coff.h"
tmake_file="${tmake_file} aarch64/t-aarch64"
case ${enable_threads} in
  "" | yes | win32)
diff --git a/gcc/config/aarch64/aarch64-coff.h 
b/gcc/config/aarch64/aarch64-coff.h
new file mode 100644
index 000..d91bc36b67b
--- /dev/null
+++ b/gcc/config/aarch64/aarch64-coff.h
@@ -0,0 +1,92 @@
+/* Machine description for AArch64 architecture.
+   Copyright (C) 2024 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   .  */
+
+#ifndef GCC_AARCH64_COFF_H
+#define GCC_AARCH64_COFF_H
+
+#include "aarch64.h"
+
+#ifndef LOCAL_LABEL_PREFIX
+# define LOCAL_LABEL_PREFIX""
+#endif
+
+/* Using long long breaks -ansi and -std=c90, so these will need to be
+   made conditional for an LLP64 ABI.  */
+#undef SIZE_TYPE
+#define SIZE_TYPE  "long long unsigned int"
+
+#undef PTRDIFF_TYPE
+#define PTRDIFF_TYPE   "long long int"
+
+#define TARGET_64BIT 1
+#undef LONG_TYPE_SIZE
+#define LONG_TYPE_SIZE 32
+
+#ifndef ASM_GENERATE_INTERNAL_LABEL
+# define ASM_GENERATE_INTERNAL_LABEL(STRING, PREFIX, NUM)  \
+  sprintf (STRING, "*%s%s%u", LOCAL_LABEL_PREFIX, PREFIX, (unsigned int)(NUM))
+#endif
+
+#define ASM_OUTPUT_ALIGN(STREAM, POWER)\
+  fprintf (STREAM, "\t.align\t%d\n", (int)POWER)
+
+/* Output a common block.  */
+#ifndef ASM_OUTPUT_COMMON
+# define ASM_OUTPUT_COMMON(STREAM, NAME, SIZE, ROUNDED)\
+{  \
+  fprintf (STREAM, "\t.comm\t");   \
+  assemble_name (STREAM, NAME);\
+  asm_fprintf (STREAM, ", %d, %d\n",   \
+  (int)(ROUNDED), (int)(SIZE));\
+}
+#endif
+
+/* Output a local common block.  /bin/as can't do this, so hack a
+   `.space' into the bss segment.  Note that this is *bad* practice,
+   which is guaranteed NOT to work since it doesn't define STATIC
+   COMMON space but merely STATIC BSS space.  */
+#ifndef ASM_OUTPUT_ALIGNED_LOCAL
+# define ASM_OUTPUT_ALIGNED_LOCAL(STREAM, NAME, SIZE, ALIGN)   \
+{  \
+  switch_to_section (bss_section); \
+  ASM_OUTPUT_ALIGN (STREAM, floor_log2 (ALIGN / BITS_PER_UNIT));   \
+  ASM_OUTPUT_LABEL (STREAM, NAME); \
+  fprintf (STREAM, "\t.space\t%d\n", (int)(SIZE)); \
+}
+#endif
+
+#define ASM_OUTPUT_SKIP(STREAM, NBYTES)\
+  fprintf (STREAM, "\t.space\t%d  // skip\n", (int) (NBYTES))
+
+#define ASM_OUTPUT_TYPE_DIRECTIVE(STREAM, NAME, TYPE)
+#define ASM_DECLARE_FUNCTION_SIZE(FILE, FNAME, DECL)
+
+#define TEXT_SECTION_ASM_OP"\t.text"
+#define DATA_SECTION_ASM_OP"\t.data"
+#define BSS_SECTION_ASM_OP "\t.bss"
+
+#define CTORS_SECTION_ASM_OP   "\t.section\t.ctors, \"aw\""
+#define DTORS_SECTION_ASM_OP   "\t.section\t.dtors, \"aw\""
+
+#define GLOBAL_ASM_OP "\t.global\t"
+
+#undef SUPPORTS_INIT_PRIORITY
+#define SUPPORTS_INIT_PRIORITY 0
+
+#endif
-- 
2.25.1



[PATCH] Fix fortran/PR114024

2024-02-21 Thread Steve Kargl
I have attached a patch to PR114024, see

https://gcc.gnu.org/pipermail/gcc-bugs/2024-February/854651.html

The patch contains a new testcase and passes regression
testing on x86_64-*-freebsd.  Could someone castr an eye 
over the patch and commit it?

-- 
Steve


[PATCH v1 03/13] aarch64: Mark x18 register as a fixed register for MS ABI

2024-02-21 Thread Evgeny Karpov

From 72ca3f49e3eef9b18946b8d4e77019c1441e1a97 Mon Sep 17 00:00:00 2001
From: Zac Walker 
Date: Tue, 20 Feb 2024 15:30:33 +0100
Subject: [PATCH v1 03/13] aarch64: Mark x18 register as a fixed register for
 MS ABI

Define the MS ABI for aarch64-w64-mingw32.
Adjust FIXED_REGISTERS and STATIC_CHAIN_REGNUM for different ABIs.
The X18 register is reserved on Windows for the TEB.

gcc/ChangeLog:

* config.gcc: Define TARGET_ARM64_MS_ABI when Arm64 MS ABI is
used.
* config/aarch64/aarch64.h (FIXED_X18): Define if X18
regsiter is fixed.
(CALL_USED_X18): Define if X18 register is call used.
(FIXED_REGISTERS): Adjust FIXED_REGISTERS for different ABIs.
(STATIC_CHAIN_REGNUM): Define STATIC_CHAIN_REGNUM acording to
ABI.
---
 gcc/config.gcc   |  1 +
 gcc/config/aarch64/aarch64.h | 19 ---
 2 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/gcc/config.gcc b/gcc/config.gcc
index 092a091595d..2a9e4c44f50 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -1276,6 +1276,7 @@ aarch64*-*-mingw*)
default_use_cxa_atexit=yes
need_64bit_isa=yes
user_headers_inc_next_post="${user_headers_inc_next_post} float.h"
+   tm_defines="${tm_defines} TARGET_ARM64_MS_ABI=1"
;;
 aarch64*-wrs-vxworks*)
 tm_file="${tm_file} elfos.h aarch64/aarch64-elf.h"
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index 45e901cda64..36916e7a97d 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -536,11 +536,20 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = 
AARCH64_FL_SM_OFF;
register.  GCC internally uses the poly_int variable aarch64_sve_vg
instead.  */
 
+/* X18 reserved for the TEB on Windows.  */
+#ifdef TARGET_ARM64_MS_ABI
+# define FIXED_X18 1
+# define CALL_USED_X18 0
+#else
+# define FIXED_X18 0
+# define CALL_USED_X18 1
+#endif
+
 #define FIXED_REGISTERS\
   {\
 0, 0, 0, 0,   0, 0, 0, 0,  /* R0 - R7 */   \
 0, 0, 0, 0,   0, 0, 0, 0,  /* R8 - R15 */  \
-0, 0, 0, 0,   0, 0, 0, 0,  /* R16 - R23 */ \
+0, 0, FIXED_X18, 0,   0, 0, 0, 0,  /* R16 - R23.  */   \
 0, 0, 0, 0,   0, 1, 0, 1,  /* R24 - R30, SP */ \
 0, 0, 0, 0,   0, 0, 0, 0,   /* V0 - V7 */   \
 0, 0, 0, 0,   0, 0, 0, 0,   /* V8 - V15 */ \
@@ -564,7 +573,7 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = 
AARCH64_FL_SM_OFF;
   {\
 1, 1, 1, 1,   1, 1, 1, 1,  /* R0 - R7 */   \
 1, 1, 1, 1,   1, 1, 1, 1,  /* R8 - R15 */  \
-1, 1, 1, 0,   0, 0, 0, 0,  /* R16 - R23 */ \
+1, 1, CALL_USED_X18, 0, 0,   0, 0, 0, /* R16 - R23.  */   \
 0, 0, 0, 0,   0, 1, 1, 1,  /* R24 - R30, SP */ \
 1, 1, 1, 1,   1, 1, 1, 1,  /* V0 - V7 */   \
 0, 0, 0, 0,   0, 0, 0, 0,  /* V8 - V15 */  \
@@ -642,7 +651,11 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = 
AARCH64_FL_SM_OFF;
uses alloca.  */
 #define EXIT_IGNORE_STACK  (cfun->calls_alloca)
 
-#define STATIC_CHAIN_REGNUMR18_REGNUM
+#ifdef TARGET_ARM64_MS_ABI
+# define STATIC_CHAIN_REGNUM   R17_REGNUM
+#else
+# define STATIC_CHAIN_REGNUM   R18_REGNUM
+#endif
 #define HARD_FRAME_POINTER_REGNUM  R29_REGNUM
 #define FRAME_POINTER_REGNUM   SFP_REGNUM
 #define STACK_POINTER_REGNUM   SP_REGNUM
-- 
2.25.1



Re: [PATCH v1 00/13] Add aarch64-w64-mingw32 target

2024-02-21 Thread Maxim Kuvyrkov
Hi Evgeny,

Great job!

For reference, here is a test build of this patch series using Linaro Toolchain 
CI: 
https://ci.linaro.org/view/tcwg-build/job/tcwg_gnu_mingw_build--master-woa64-build/9/artifact/artifacts/
 

--
Maxim Kuvyrkov
https://www.linaro.org

> On Feb 21, 2024, at 21:47, Evgeny Karpov  wrote:
> 
> Hello,
> 
> We would like to take your attention to the review of changes for the
> new GCC target, aarch64-w64-mingw32. The new target will be
> supported, tested, added to CI, and maintained by Linaro. This marks
> the first of three planned patch series contributing to the GCC C
> compiler's support for Windows Arm64.
> 
> 1. Minimal aarch64-w64-mingw32 C implementation to cross-compile
> hello-world with libgcc for Windows Arm64 using MinGW.
> 2. Extension of the aarch64-w64-mingw32 C implementation to
> cross-compile OpenSSL, OpenBLAS, FFmpeg, and libjpeg-turbo. All
> packages successfully pass tests.
> 3. Addition of call stack support for debugging, resolution of
> optimization issues in the C compiler, and DLL export/import for the
> aarch64-w64-mingw32 target.
> 
> This patch series introduces the 1st point, which involves building
> hello-world for the aarch64-w64-mingw32 target. The patch depends on
> the binutils changes for the aarch64-w64-mingw32 target that have
> already been merged.
> 
> The binutils should include recent relocation fixes.
> f87eaf8ff3995a5888c6dc4996a20c770e6bcd36
> aarch64: Add new relocations and limit COFF AArch64 relocation offsets
> 
> The series is structured in a way to trivially show that it should not
> affect any other targets.
> 
> In this patch, several changes have been made to support the
> aarch64-w64-mingw32 target for GCC. The modifications include the
> definition of the MS ABI for aarch64, adjustments to FIXED_REGISTERS
> and STATIC_CHAIN_REGNUM for different ABIs, and specific definitions
> for COFF format on AArch64. Additionally, the patch reuses MinGW
>  types and definitions from i386, relocating them to a new
> mingw folder for shared usage between both targets.
> 
> MinGW-specific options have been introduced for AArch64, along with
> override options for aarch64-w64-mingw32. Builtin stack probing for
> override options for aarch64-w64-mingw32. Builtin stack probing for
> AArch64 has been enabled as an alternative for chkstk. Symbol name
> encoding and section information handling for aarch64-w64-mingw32 have
> been incorporated, and the MinGW environment has been added, which
> will also be utilized for defining the Cygwin environment in the
> future.
> 
> The patch includes renaming "x86 Windows Options" to "Cygwin and MinGW
> Options," which now encompasses AArch64 as well. AArch64-specific
> Cygwin and MinGW Options have been introduced for the unique
> requirements of the AArch64 architecture.
> 
> Function type declaration and named sections support have been added.
> The necessary objects for Cygwin and MinGW have been built for the
> aarch64-w64-mingw32 target, and relevant files such as msformat-c.cc
> and winnt-d.cc have been moved to the mingw folder for reuse in
> AArch64.
> 
> Furthermore, the aarch64-w64-mingw32 target has been included in both
> libatomic and libgcc, ensuring support for the AArch64 architecture
> within these libraries. These changes collectively enhance the
> capabilities of GCC for the specified target.
> 
> Coauthors: Zac Walker ,
> Mark Harmstone   and
> Ron Riddle 
> 
> Refactored, prepared, and validated by 
> Radek Barton  and 
> Evgeny Karpov 
> 
> Special thanks to the Linaro GNU toolchain team for internal review
> and assistance in preparing the patch series!
> 
> Regards,
> Evgeny
> 
> 
> Zac Walker (13):
>  Introduce aarch64-w64-mingw32 target
>  aarch64: The aarch64-w64-mingw32 target implements the MS ABI
>  aarch64: Mark x18 register as a fixed register for MS ABI
>  aarch64: Add aarch64-w64-mingw32 COFF
>  Reuse MinGW from i386 for AArch64
>  Rename section and encoding functions from i386 which will be used in
>aarch64
>  Exclude i386 functionality from aarch64 build
>  aarch64: Add Cygwin and MinGW environments for AArch64
>  aarch64: Add SEH to machine_function
>  Rename "x86 Windows Options" to "Cygwin and MinGW Options"
>  aarch64: Build and add objects for Cygwin and MinGW for AArch64
>  aarch64: Add aarch64-w64-mingw32 target to libatomic
>  Add aarch64-w64-mingw32 target to libgcc
> 
> fixincludes/mkfixinc.sh   |   3 +-
> gcc/config.gcc|  47 +++--
> gcc/config/aarch64/aarch64-coff.h |  92 +
> gcc/config/aarch64/aarch64-opts.h |   7 +
> gcc/config/aarch64/aarch64-protos.h   |   5 +
> gcc/config/aarch64/aarch64.h  |  25 ++-
> gcc/config/aarch64/cygming.h  | 178 ++
> gcc/config/i386/cygming.h |  18 +-
> gcc/config/i386/cygming.opt.urls  |  30 ---
> gcc/config/i386/i386-protos.h |  12 +-
> 

[PATCH v1 02/13] aarch64: The aarch64-w64-mingw32 target implements

2024-02-21 Thread Evgeny Karpov

From 5cab07f01f66ba162b7d542e1a61c96f49942331 Mon Sep 17 00:00:00 2001
From: Zac Walker 
Date: Tue, 20 Feb 2024 15:32:08 +0100
Subject: [PATCH v1 02/13] aarch64: The aarch64-w64-mingw32 target implements
 the MS ABI

Two ABIs for aarch64 have been defined for different platforms.

gcc/ChangeLog:

* config/aarch64/aarch64-opts.h (enum calling_abi): Define
two ABIs.
---
 gcc/config/aarch64/aarch64-opts.h | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/gcc/config/aarch64/aarch64-opts.h 
b/gcc/config/aarch64/aarch64-opts.h
index a05c0d3ded1..77e3eae9595 100644
--- a/gcc/config/aarch64/aarch64-opts.h
+++ b/gcc/config/aarch64/aarch64-opts.h
@@ -131,4 +131,11 @@ enum aarch64_early_ra_scope {
   AARCH64_EARLY_RA_NONE
 };
 
+/* Available call ABIs.  */
+enum calling_abi
+{
+  AARCH64_EABI = 0,
+  MS_ABI = 1
+};
+
 #endif
-- 
2.25.1



Re: [PATCH v1 01/13] Introduce aarch64-w64-mingw32 target

2024-02-21 Thread Andrew Pinski
On Wed, Feb 21, 2024 at 10:17 AM Evgeny Karpov
 wrote:
>
>

> need_64bit_isa=yes

This is not needed in the patch as it is only used for x86_64 targets.

Should you make sure nobody specifies the big-endian target:
aarch64_be-w64-mingw32  ?

Thanks,
Andrew Pinski


[PATCH v1 01/13] Introduce aarch64-w64-mingw32 target

2024-02-21 Thread Evgeny Karpov

From c5fec28077184119bc81d927e6062704c1796446 Mon Sep 17 00:00:00 2001
From: Zac Walker 
Date: Tue, 30 Jan 2024 06:42:46 +0100
Subject: [PATCH v1 01/13] Introduce aarch64-w64-mingw32 target

Add the initial aarch64-w64-mingw32 target for gcc.

fixincludes/ChangeLog:

* mkfixinc.sh: Extend for *-mingw32* targets.

gcc/ChangeLog:

* config.gcc: Add aarch64-w64-mingw32 target.
---
 fixincludes/mkfixinc.sh |  3 +--
 gcc/config.gcc  | 14 ++
 2 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/fixincludes/mkfixinc.sh b/fixincludes/mkfixinc.sh
index df90720b716..7112f4dcd64 100755
--- a/fixincludes/mkfixinc.sh
+++ b/fixincludes/mkfixinc.sh
@@ -12,8 +12,7 @@ target=fixinc.sh
 # Check for special fix rules for particular targets
 case $machine in
 i?86-*-cygwin* | \
-i?86-*-mingw32* | \
-x86_64-*-mingw32* | \
+*-mingw32* | \
 powerpc-*-eabisim* | \
 powerpc-*-eabi*| \
 powerpc-*-rtems*   | \
diff --git a/gcc/config.gcc b/gcc/config.gcc
index a0f9c672308..092a091595d 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -1263,6 +1263,20 @@ aarch64*-*-linux*)
done
TM_MULTILIB_CONFIG=`echo $TM_MULTILIB_CONFIG | sed 's/^,//'`
;;
+aarch64*-*-mingw*)
+   tmake_file="${tmake_file} aarch64/t-aarch64"
+   case ${enable_threads} in
+ "" | yes | win32)
+   thread_file='win32'
+   ;;
+ posix)
+   thread_file='posix'
+   ;;
+   esac
+   default_use_cxa_atexit=yes
+   need_64bit_isa=yes
+   user_headers_inc_next_post="${user_headers_inc_next_post} float.h"
+   ;;
 aarch64*-wrs-vxworks*)
 tm_file="${tm_file} elfos.h aarch64/aarch64-elf.h"
 tm_file="${tm_file} vx-common.h vxworks.h aarch64/aarch64-vxworks.h"
-- 
2.25.1



[PATCH v1 00/13] Add aarch64-w64-mingw32 target

2024-02-21 Thread Evgeny Karpov
Hello,

We would like to take your attention to the review of changes for the
new GCC target, aarch64-w64-mingw32. The new target will be
supported, tested, added to CI, and maintained by Linaro. This marks
the first of three planned patch series contributing to the GCC C
compiler's support for Windows Arm64.

1. Minimal aarch64-w64-mingw32 C implementation to cross-compile
hello-world with libgcc for Windows Arm64 using MinGW.
2. Extension of the aarch64-w64-mingw32 C implementation to
cross-compile OpenSSL, OpenBLAS, FFmpeg, and libjpeg-turbo. All
packages successfully pass tests.
3. Addition of call stack support for debugging, resolution of
optimization issues in the C compiler, and DLL export/import for the
aarch64-w64-mingw32 target.

This patch series introduces the 1st point, which involves building
hello-world for the aarch64-w64-mingw32 target. The patch depends on
the binutils changes for the aarch64-w64-mingw32 target that have
already been merged.

The binutils should include recent relocation fixes.
f87eaf8ff3995a5888c6dc4996a20c770e6bcd36
aarch64: Add new relocations and limit COFF AArch64 relocation offsets

The series is structured in a way to trivially show that it should not
affect any other targets.

In this patch, several changes have been made to support the
aarch64-w64-mingw32 target for GCC. The modifications include the
definition of the MS ABI for aarch64, adjustments to FIXED_REGISTERS
and STATIC_CHAIN_REGNUM for different ABIs, and specific definitions
for COFF format on AArch64. Additionally, the patch reuses MinGW
 types and definitions from i386, relocating them to a new
mingw folder for shared usage between both targets.

MinGW-specific options have been introduced for AArch64, along with
override options for aarch64-w64-mingw32. Builtin stack probing for
override options for aarch64-w64-mingw32. Builtin stack probing for
AArch64 has been enabled as an alternative for chkstk. Symbol name
encoding and section information handling for aarch64-w64-mingw32 have
been incorporated, and the MinGW environment has been added, which
will also be utilized for defining the Cygwin environment in the
future.

The patch includes renaming "x86 Windows Options" to "Cygwin and MinGW
Options," which now encompasses AArch64 as well. AArch64-specific
Cygwin and MinGW Options have been introduced for the unique
requirements of the AArch64 architecture.

Function type declaration and named sections support have been added.
The necessary objects for Cygwin and MinGW have been built for the
aarch64-w64-mingw32 target, and relevant files such as msformat-c.cc
and winnt-d.cc have been moved to the mingw folder for reuse in
AArch64.

Furthermore, the aarch64-w64-mingw32 target has been included in both
libatomic and libgcc, ensuring support for the AArch64 architecture
within these libraries. These changes collectively enhance the
capabilities of GCC for the specified target.

Coauthors: Zac Walker ,
Mark Harmstone   and
Ron Riddle 

Refactored, prepared, and validated by 
Radek Barton  and 
Evgeny Karpov 

Special thanks to the Linaro GNU toolchain team for internal review
and assistance in preparing the patch series!

Regards,
Evgeny


Zac Walker (13):
  Introduce aarch64-w64-mingw32 target
  aarch64: The aarch64-w64-mingw32 target implements the MS ABI
  aarch64: Mark x18 register as a fixed register for MS ABI
  aarch64: Add aarch64-w64-mingw32 COFF
  Reuse MinGW from i386 for AArch64
  Rename section and encoding functions from i386 which will be used in
aarch64
  Exclude i386 functionality from aarch64 build
  aarch64: Add Cygwin and MinGW environments for AArch64
  aarch64: Add SEH to machine_function
  Rename "x86 Windows Options" to "Cygwin and MinGW Options"
  aarch64: Build and add objects for Cygwin and MinGW for AArch64
  aarch64: Add aarch64-w64-mingw32 target to libatomic
  Add aarch64-w64-mingw32 target to libgcc

 fixincludes/mkfixinc.sh   |   3 +-
 gcc/config.gcc|  47 +++--
 gcc/config/aarch64/aarch64-coff.h |  92 +
 gcc/config/aarch64/aarch64-opts.h |   7 +
 gcc/config/aarch64/aarch64-protos.h   |   5 +
 gcc/config/aarch64/aarch64.h  |  25 ++-
 gcc/config/aarch64/cygming.h  | 178 ++
 gcc/config/i386/cygming.h |  18 +-
 gcc/config/i386/cygming.opt.urls  |  30 ---
 gcc/config/i386/i386-protos.h |  12 +-
 gcc/config/i386/mingw-w64.opt.urls|   2 +-
 gcc/config/lynx.opt.urls  |   2 +-
 gcc/config/{i386 => mingw}/cygming.opt|   0
 gcc/config/mingw/cygming.opt.urls |  30 +++
 gcc/config/{i386 => mingw}/cygwin-d.cc|   0
 gcc/config/{i386 => mingw}/mingw-stdint.h |   0
 gcc/config/{i386 => mingw}/mingw.opt  |   0
 gcc/config/{i386 => mingw}/mingw.opt.urls |   2 +-
 gcc/config/{i386 => mingw}/mingw32.h  |   0
 

[PATCH v4 5/5] arm: Add support for MVE Tail-Predicated Low Overhead Loops

2024-02-21 Thread Andre Vieira

This patch adds support for MVE Tail-Predicated Low Overhead Loops by using the
doloop funcitonality added to support predicated vectorized hardware loops.

gcc/ChangeLog:

* config/arm/arm-protos.h (arm_target_bb_ok_for_lob): Change
declaration to pass basic_block.
(arm_attempt_dlstp_transform): New declaration.
* config/arm/arm.cc (TARGET_LOOP_UNROLL_ADJUST): Define targethook.
(TARGET_PREDICT_DOLOOP_P): Likewise.
(arm_target_bb_ok_for_lob): Adapt condition.
(arm_mve_get_vctp_lanes): New function.
(arm_dl_usage_type): New internal enum.
(arm_get_required_vpr_reg): New function.
(arm_get_required_vpr_reg_param): New function.
(arm_get_required_vpr_reg_ret_val): New function.
(arm_mve_get_loop_vctp): New function.
(arm_mve_insn_predicated_by): New function.
(arm_mve_across_lane_insn_p): New function.
(arm_mve_load_store_insn_p): New function.
(arm_mve_impl_pred_on_outputs_p): New function.
(arm_mve_impl_pred_on_inputs_p): New function.
(arm_last_vect_def_insn): New function.
(arm_mve_impl_predicated_p): New function.
(arm_mve_check_reg_origin_is_num_elems): New function.
(arm_mve_dlstp_check_inc_counter): New function.
(arm_mve_dlstp_check_dec_counter): New function.
(arm_mve_loop_valid_for_dlstp): New function.
(arm_predict_doloop_p): New function.
(arm_loop_unroll_adjust): New function.
(arm_emit_mve_unpredicated_insn_to_seq): New function.
(arm_mve_get_vctp_vec_form): New function.
(arm_attempt_dlstp_transform): New function.
* config/arm/arm.opt (mdlstp): New option.
* config/arm/iteratords.md (dlstp_elemsize, letp_num_lanes,
letp_num_lanes_neg, letp_num_lanes_minus_1): New attributes.
(DLSTP, LETP): New iterators.
(predicated_doloop_end_internal): New pattern.
(dlstp_insn): New pattern.
* config/arm/thumb2.md (doloop_end): Adapt to support tail-predicated
loops.
(doloop_begin): Likewise.
* config/arm/types.md (mve_misc): New mve type to represent
predicated_loop_end insn sequences.
* config/arm/unspecs.md:
(DLSTP8, DLSTP16, DLSTP32, DSLTP64,
LETP8, LETP16, LETP32, LETP64): New unspecs for DLSTP and LETP.

gcc/testsuite/ChangeLog:

* gcc.target/arm/lob.h: Add new helpers.
* gcc.target/arm/lob1.c: Use new helpers.
* gcc.target/arm/lob6.c: Likewise.
* gcc.target/arm/dlstp-compile-asm-1.c: New test.
* gcc.target/arm/dlstp-compile-asm-2.c: New test.
* gcc.target/arm/dlstp-compile-asm-3.c: New test.
* gcc.target/arm/dlstp-int8x16.c: New test.
* gcc.target/arm/dlstp-int8x16-run.c: New test.
* gcc.target/arm/dlstp-int16x8.c: New test.
* gcc.target/arm/dlstp-int16x8-run.c: New test.
* gcc.target/arm/dlstp-int32x4.c: New test.
* gcc.target/arm/dlstp-int32x4-run.c: New test.
* gcc.target/arm/dlstp-int64x2.c: New test.
* gcc.target/arm/dlstp-int64x2-run.c: New test.
* gcc.target/arm/dlstp-invalid-asm.c: New test.

Co-authored-by: Stam Markianos-Wright 

diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h
index 2cd560c9925..34d6be76e94 100644
--- a/gcc/config/arm/arm-protos.h
+++ b/gcc/config/arm/arm-protos.h
@@ -65,8 +65,8 @@ extern void arm_emit_speculation_barrier_function (void);
 extern void arm_decompose_di_binop (rtx, rtx, rtx *, rtx *, rtx *, rtx *);
 extern bool arm_q_bit_access (void);
 extern bool arm_ge_bits_access (void);
-extern bool arm_target_insn_ok_for_lob (rtx);
-
+extern bool arm_target_bb_ok_for_lob (basic_block);
+extern int arm_attempt_dlstp_transform (rtx);
 #ifdef RTX_CODE
 enum reg_class
 arm_mode_base_reg_class (machine_mode);
diff --git a/gcc/config/arm/arm.cc b/gcc/config/arm/arm.cc
index e5a944486d7..4fdcf5ed82a 100644
--- a/gcc/config/arm/arm.cc
+++ b/gcc/config/arm/arm.cc
@@ -668,6 +668,12 @@ static const scoped_attribute_specs *const arm_attribute_table[] =
 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
 
+#undef TARGET_LOOP_UNROLL_ADJUST
+#define TARGET_LOOP_UNROLL_ADJUST arm_loop_unroll_adjust
+
+#undef TARGET_PREDICT_DOLOOP_P
+#define TARGET_PREDICT_DOLOOP_P arm_predict_doloop_p
+
 #undef TARGET_LEGITIMATE_CONSTANT_P
 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
 
@@ -34483,19 +34489,1279 @@ arm_invalid_within_doloop (const rtx_insn *insn)
 }
 
 bool
-arm_target_insn_ok_for_lob (rtx insn)
-{
-  basic_block bb = BLOCK_FOR_INSN (insn);
-  /* Make sure the basic block of the target insn is a simple latch
- having as single predecessor and successor the body of the loop
- itself.  Only simple loops with a single basic block as body are
- supported for 'low over head loop' making sure that LE target is
- above LE 

[PATCH v4 4/5] arm: Fix a wrong attribute use and remove unused unspecs and iterators

2024-02-21 Thread Andre Vieira

This patch fixes the erroneous use of a mode attribute without a mode iterator
in the pattern and removes unused unspecs and iterators.

gcc/ChangeLog:

* config/arm/iterators.md (supf): Remove VMLALDAVXQ_U, VMLALDAVXQ_P_U,
VMLALDAVAXQ_U cases.
(VMLALDAVXQ): Remove iterator.
(VMLALDAVXQ_P): Likewise.
(VMLALDAVAXQ): Likewise.
* config/arm/mve.md (mve_vstrwq_p_fv4sf): Replace use of 
mode iterator attribute with V4BI mode.
* config/arm/unspecs.md (VMLALDAVXQ_U, VMLALDAVXQ_P_U,
VMLALDAVAXQ_U): Remove unused unspecs.

diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
index 22b3ddf5637..3206bcab4cf 100644
--- a/gcc/config/arm/iterators.md
+++ b/gcc/config/arm/iterators.md
@@ -2370,7 +2370,7 @@ (define_int_attr supf [(VCVTQ_TO_F_S "s") (VCVTQ_TO_F_U "u") (VREV16Q_S "s")
 		   (VSUBQ_S "s") (VSUBQ_U "u") (VADDVAQ_S "s")
 		   (VADDVAQ_U "u") (VADDLVAQ_S "s") (VADDLVAQ_U "u")
 		   (VBICQ_N_S "s") (VBICQ_N_U "u") (VMLALDAVQ_U "u")
-		   (VMLALDAVQ_S "s") (VMLALDAVXQ_U "u") (VMLALDAVXQ_S "s")
+		   (VMLALDAVQ_S "s") (VMLALDAVXQ_S "s")
 		   (VMOVNBQ_U "u") (VMOVNBQ_S "s") (VMOVNTQ_U "u")
 		   (VMOVNTQ_S "s") (VORRQ_N_S "s") (VORRQ_N_U "u")
 		   (VQMOVNBQ_U "u") (VQMOVNBQ_S "s") (VQMOVNTQ_S "s")
@@ -2412,8 +2412,8 @@ (define_int_attr supf [(VCVTQ_TO_F_S "s") (VCVTQ_TO_F_U "u") (VREV16Q_S "s")
 		   (VREV16Q_M_S "s") (VREV16Q_M_U "u")
 		   (VQRSHRNTQ_N_U "u") (VMOVNTQ_M_U "u") (VMOVLBQ_M_U "u")
 		   (VMLALDAVAQ_U "u") (VQSHRNBQ_N_U "u") (VSHRNBQ_N_U "u")
-		   (VRSHRNBQ_N_U "u") (VMLALDAVXQ_P_U "u")
-		   (VMVNQ_M_N_U "u") (VQSHRNTQ_N_U "u") (VMLALDAVAXQ_U "u")
+		   (VRSHRNBQ_N_U "u")
+		   (VMVNQ_M_N_U "u") (VQSHRNTQ_N_U "u")
 		   (VQMOVNTQ_M_U "u") (VSHRNTQ_N_U "u") (VCVTMQ_M_S "s")
 		   (VCVTMQ_M_U "u") (VCVTNQ_M_S "s") (VCVTNQ_M_U "u")
 		   (VCVTPQ_M_S "s") (VCVTPQ_M_U "u") (VADDLVAQ_P_S "s")
@@ -2762,7 +2762,6 @@ (define_int_iterator VSUBQ_N [VSUBQ_N_S VSUBQ_N_U])
 (define_int_iterator VADDLVAQ [VADDLVAQ_S VADDLVAQ_U])
 (define_int_iterator VBICQ_N [VBICQ_N_S VBICQ_N_U])
 (define_int_iterator VMLALDAVQ [VMLALDAVQ_U VMLALDAVQ_S])
-(define_int_iterator VMLALDAVXQ [VMLALDAVXQ_U VMLALDAVXQ_S])
 (define_int_iterator VMOVNBQ [VMOVNBQ_U VMOVNBQ_S])
 (define_int_iterator VMOVNTQ [VMOVNTQ_S VMOVNTQ_U])
 (define_int_iterator VORRQ_N [VORRQ_N_U VORRQ_N_S])
@@ -2817,11 +2816,9 @@ (define_int_iterator VMLALDAVAQ [VMLALDAVAQ_S VMLALDAVAQ_U])
 (define_int_iterator VQSHRNBQ_N [VQSHRNBQ_N_U VQSHRNBQ_N_S])
 (define_int_iterator VSHRNBQ_N [VSHRNBQ_N_U VSHRNBQ_N_S])
 (define_int_iterator VRSHRNBQ_N [VRSHRNBQ_N_S VRSHRNBQ_N_U])
-(define_int_iterator VMLALDAVXQ_P [VMLALDAVXQ_P_U VMLALDAVXQ_P_S])
 (define_int_iterator VQMOVNTQ_M [VQMOVNTQ_M_U VQMOVNTQ_M_S])
 (define_int_iterator VMVNQ_M_N [VMVNQ_M_N_U VMVNQ_M_N_S])
 (define_int_iterator VQSHRNTQ_N [VQSHRNTQ_N_U VQSHRNTQ_N_S])
-(define_int_iterator VMLALDAVAXQ [VMLALDAVAXQ_S VMLALDAVAXQ_U])
 (define_int_iterator VSHRNTQ_N [VSHRNTQ_N_S VSHRNTQ_N_U])
 (define_int_iterator VCVTMQ_M [VCVTMQ_M_S VCVTMQ_M_U])
 (define_int_iterator VCVTNQ_M [VCVTNQ_M_S VCVTNQ_M_U])
diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
index d7bdcd862f8..9fe51298cdc 100644
--- a/gcc/config/arm/mve.md
+++ b/gcc/config/arm/mve.md
@@ -4605,7 +4605,7 @@ (define_insn "mve_vstrwq_p_fv4sf"
   [(set (match_operand:V4SI 0 "mve_memory_operand" "=Ux")
 	(unspec:V4SI
 	 [(match_operand:V4SF 1 "s_register_operand" "w")
-	  (match_operand: 2 "vpr_register_operand" "Up")
+	  (match_operand:V4BI 2 "vpr_register_operand" "Up")
 	  (match_dup 0)]
 	 VSTRWQ_F))]
   "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
diff --git a/gcc/config/arm/unspecs.md b/gcc/config/arm/unspecs.md
index b9db306c067..46ac8b37157 100644
--- a/gcc/config/arm/unspecs.md
+++ b/gcc/config/arm/unspecs.md
@@ -717,7 +717,6 @@ (define_c_enum "unspec" [
   VCVTBQ_F16_F32
   VCVTTQ_F16_F32
   VMLALDAVQ_U
-  VMLALDAVXQ_U
   VMLALDAVXQ_S
   VMLALDAVQ_S
   VMLSLDAVQ_S
@@ -934,7 +933,6 @@ (define_c_enum "unspec" [
   VSHRNBQ_N_S
   VRSHRNBQ_N_S
   VRSHRNBQ_N_U
-  VMLALDAVXQ_P_U
   VMLALDAVXQ_P_S
   VQMOVNTQ_M_U
   VQMOVNTQ_M_S
@@ -943,7 +941,6 @@ (define_c_enum "unspec" [
   VQSHRNTQ_N_U
   VQSHRNTQ_N_S
   VMLALDAVAXQ_S
-  VMLALDAVAXQ_U
   VSHRNTQ_N_S
   VSHRNTQ_N_U
   VCVTBQ_M_F16_F32


Re: [Committed V2] RISC-V: Specify mtune and march for PR113742

2024-02-21 Thread Edwin Lu

Committed

Edwin

On 2/20/2024 5:36 PM, Kito Cheng wrote:

LGTM, thanks for fixing that issue :)

On Wed, Feb 21, 2024 at 6:03 AM Edwin Lu  wrote:

The testcase pr113742.c is failing for 32 bit targets due to the following cc1
error:
cc1: error: ABI requries '-march=rv64'

Specify '-march=rv64gc' with '-mtune=sifive-p600-series'

V1: https://gcc.gnu.org/pipermail/gcc-patches/2024-February/645609.html

 PR target/113742

gcc/testsuite/ChangeLog:

 * gcc.target/riscv/pr113742.c: change mcpu to mtune and add march

Signed-off-by: Edwin Lu 
---
V1: use require-effective-target
V2: switch to specifying march and mtune
---
  gcc/testsuite/gcc.target/riscv/pr113742.c | 4 ++--
  1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/testsuite/gcc.target/riscv/pr113742.c 
b/gcc/testsuite/gcc.target/riscv/pr113742.c
index ab8934c2a8a..573afd6f0ad 100644
--- a/gcc/testsuite/gcc.target/riscv/pr113742.c
+++ b/gcc/testsuite/gcc.target/riscv/pr113742.c
@@ -1,4 +1,4 @@
-//* { dg-do compile } */
-/* { dg-options "-O2 -finstrument-functions -mabi=lp64d -mcpu=sifive-p670" } */
+/* { dg-do compile } */
+/* { dg-options "-O2 -finstrument-functions -march=rv64gc -mabi=lp64d 
-mtune=sifive-p600-series" } */

  void foo(void) {}
--
2.34.1



[PATCH v4 3/5] arm: Annotate instructions with mve_safe_imp_xlane_pred

2024-02-21 Thread Andre Vieira

This patch annotates some MVE across lane instructions with a new attribute.
We use this attribute to let the compiler know that these instructions can be
safely implicitly predicated when tail predicating if their operands are
guaranteed to have zeroed tail predicated lanes.  These instructions were
selected because having the value 0 in those lanes or 'tail-predicating' those
lanes have the same effect.

gcc/ChangeLog:

* config/arm/arm.md (mve_safe_imp_xlane_pred): New attribute.
* config/arm/iterators.md (mve_vmaxmin_safe_imp): New iterator
attribute.
* config/arm/mve.md (vaddvq_s, vaddvq_u, vaddlvq_s, vaddlvq_u,
vaddvaq_s, vaddvaq_u, vmaxavq_s, vmaxvq_u, vmladavq_s, vmladavq_u,
vmladavxq_s, vmlsdavq_s, vmlsdavxq_s, vaddlvaq_s, vaddlvaq_u,
vmlaldavq_u, vmlaldavq_s, vmlaldavq_u, vmlaldavxq_s, vmlsldavq_s,
vmlsldavxq_s, vrmlaldavhq_u, vrmlaldavhq_s, vrmlaldavhxq_s,
vrmlsldavhq_s, vrmlsldavhxq_s, vrmlaldavhaq_s, vrmlaldavhaq_u,
vrmlaldavhaxq_s, vrmlsldavhaq_s, vrmlsldavhaxq_s, vabavq_s, vabavq_u,
vmladavaq_u, vmladavaq_s, vmladavaxq_s, vmlsdavaq_s, vmlsdavaxq_s,
vmlaldavaq_s, vmlaldavaq_u, vmlaldavaxq_s, vmlsldavaq_s,
vmlsldavaxq_s): Added mve_safe_imp_xlane_pred.

diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index 3f2863adf44..58619393858 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -130,6 +130,12 @@ (define_attr "predicated" "yes,no" (const_string "no"))
 ; encode that it is a predicable instruction.
 (define_attr "mve_unpredicated_insn" "" (symbol_ref "CODE_FOR_nothing"))
 
+; An attribute used by the loop-doloop pass when determining whether it is
+; safe to predicate a MVE instruction, that operates across lanes, and was
+; previously not predicated.  The pass will still check whether all inputs
+; are predicated by the VCTP predication mask.
+(define_attr "mve_safe_imp_xlane_pred" "yes,no" (const_string "no"))
+
 ; LENGTH of an instruction (in bytes)
 (define_attr "length" ""
   (const_int 4))
diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
index 7600bf62531..22b3ddf5637 100644
--- a/gcc/config/arm/iterators.md
+++ b/gcc/config/arm/iterators.md
@@ -869,6 +869,14 @@ (define_code_attr mve_addsubmul [
 		 (plus "vadd")
 		 ])
 
+(define_int_attr mve_vmaxmin_safe_imp [
+		 (VMAXVQ_U "yes")
+		 (VMAXVQ_S "no")
+		 (VMAXAVQ_S "yes")
+		 (VMINVQ_U "no")
+		 (VMINVQ_S "no")
+		 (VMINAVQ_S "no")])
+
 (define_int_attr mve_cmp_op1 [
 		 (VCMPCSQ_M_U "cs")
 		 (VCMPEQQ_M_S "eq") (VCMPEQQ_M_U "eq")
diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
index 8aa0bded7f0..d7bdcd862f8 100644
--- a/gcc/config/arm/mve.md
+++ b/gcc/config/arm/mve.md
@@ -393,6 +393,7 @@ (define_insn "@mve_q_"
   "TARGET_HAVE_MVE"
   ".%#\t%0, %q1"
  [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_q_"))
+  (set_attr "mve_safe_imp_xlane_pred" "yes")
   (set_attr "type" "mve_move")
 ])
 
@@ -529,6 +530,7 @@ (define_insn "@mve_q_v4si"
   "TARGET_HAVE_MVE"
   ".32\t%Q0, %R0, %q1"
  [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_q_v4si"))
+  (set_attr "mve_safe_imp_xlane_pred" "yes")
   (set_attr "type" "mve_move")
 ])
 
@@ -802,6 +804,7 @@ (define_insn "@mve_q_"
   "TARGET_HAVE_MVE"
   ".%#\t%0, %q2"
  [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_q_"))
+  (set_attr "mve_safe_imp_xlane_pred" "yes")
   (set_attr "type" "mve_move")
 ])
 
@@ -1014,6 +1017,7 @@ (define_insn "@mve_q_"
   "TARGET_HAVE_MVE"
   ".%#\t%0, %q2"
  [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_q_"))
+  (set_attr "mve_safe_imp_xlane_pred" "")
   (set_attr "type" "mve_move")
 ])
 
@@ -1033,6 +1037,7 @@ (define_insn "@mve_q_"
   "TARGET_HAVE_MVE"
   ".%#\t%0, %q1, %q2"
  [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_q_"))
+  (set_attr "mve_safe_imp_xlane_pred" "yes")
   (set_attr "type" "mve_move")
 ])
 
@@ -1219,6 +1224,7 @@ (define_insn "@mve_q_v4si"
   "TARGET_HAVE_MVE"
   ".32\t%Q0, %R0, %q2"
  [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_q_v4si"))
+  (set_attr "mve_safe_imp_xlane_pred" "yes")
   (set_attr "type" "mve_move")
 ])
 
@@ -1450,6 +1456,7 @@ (define_insn "@mve_q_"
   "TARGET_HAVE_MVE"
   ".%#\t%Q0, %R0, %q1, %q2"
  [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_q_"))
+  (set_attr "mve_safe_imp_xlane_pred" "yes")
   (set_attr "type" "mve_move")
 ])
 
@@ -1588,6 +1595,7 @@ (define_insn "@mve_q_v4si"
   "TARGET_HAVE_MVE"
   ".32\t%Q0, %R0, %q1, %q2"
  [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_q_v4si"))
+  (set_attr "mve_safe_imp_xlane_pred" "yes")
   (set_attr "type" "mve_move")
 ])
 
@@ -1725,6 +1733,7 @@ (define_insn "@mve_q_v4si"
   "TARGET_HAVE_MVE"
   ".32\t%Q0, %R0, %q2, %q3"
  [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_q_v4si"))
+  (set_attr "mve_safe_imp_xlane_pred" "yes")
   (set_attr "type" "mve_move")
 ])
 
@@ -1742,6 +1751,7 @@ 

[PATCH v4 1/5] arm: Add define_attr to to create a mapping between MVE predicated and unpredicated insns

2024-02-21 Thread Andre Vieira

This patch adds an attribute to the mve md patterns to be able to identify
predicable MVE instructions and what their predicated and unpredicated variants
are.  This attribute is used to encode the icode of the unpredicated variant of
an instruction in its predicated variant.

This will make it possible for us to transform VPT-predicated insns in
the insn chain into their unpredicated equivalents when transforming the loop
into a MVE Tail-Predicated Low Overhead Loop. For example:
`mve_vldrbq_z_ -> mve_vldrbq_`.

gcc/ChangeLog:

* config/arm/arm.md (mve_unpredicated_insn): New attribute.
* config/arm/arm.h (MVE_VPT_PREDICATED_INSN_P): New define.
(MVE_VPT_UNPREDICATED_INSN_P): Likewise.
(MVE_VPT_PREDICABLE_INSN_P): Likewise.
* config/arm/vec-common.md (mve_vshlq_): Add attribute.
* config/arm/mve.md (arm_vcx1q_p_v16qi): Add attribute.
(arm_vcx1qv16qi): Likewise.
(arm_vcx1qav16qi): Likewise.
(arm_vcx1qv16qi): Likewise.
(arm_vcx2q_p_v16qi): Likewise.
(arm_vcx2qv16qi): Likewise.
(arm_vcx2qav16qi): Likewise.
(arm_vcx2qv16qi): Likewise.
(arm_vcx3q_p_v16qi): Likewise.
(arm_vcx3qv16qi): Likewise.
(arm_vcx3qav16qi): Likewise.
(arm_vcx3qv16qi): Likewise.
(@mve_q_): Likewise.
(@mve_q_int_): Likewise.
(@mve_q_v4si): Likewise.
(@mve_q_n_): Likewise.
(@mve_q_r_): Likewise.
(@mve_q_f): Likewise.
(@mve_q_m_): Likewise.
(@mve_q_m_n_): Likewise.
(@mve_q_m_r_): Likewise.
(@mve_q_m_f): Likewise.
(@mve_q_int_m_): Likewise.
(@mve_q_p_v4si): Likewise.
(@mve_q_p_): Likewise.
(@mve_q_): Likewise.
(@mve_q_f): Likewise.
(@mve_q_m_): Likewise.
(@mve_q_m_f): Likewise.
(mve_vq_f): Likewise.
(mve_q): Likewise.
(mve_q_f): Likewise.
(mve_vadciq_v4si): Likewise.
(mve_vadciq_m_v4si): Likewise.
(mve_vadcq_v4si): Likewise.
(mve_vadcq_m_v4si): Likewise.
(mve_vandq_): Likewise.
(mve_vandq_f): Likewise.
(mve_vandq_m_): Likewise.
(mve_vandq_m_f): Likewise.
(mve_vandq_s): Likewise.
(mve_vandq_u): Likewise.
(mve_vbicq_): Likewise.
(mve_vbicq_f): Likewise.
(mve_vbicq_m_): Likewise.
(mve_vbicq_m_f): Likewise.
(mve_vbicq_m_n_): Likewise.
(mve_vbicq_n_): Likewise.
(mve_vbicq_s): Likewise.
(mve_vbicq_u): Likewise.
(@mve_vclzq_s): Likewise.
(mve_vclzq_u): Likewise.
(@mve_vcmp_q_): Likewise.
(@mve_vcmp_q_n_): Likewise.
(@mve_vcmp_q_f): Likewise.
(@mve_vcmp_q_n_f): Likewise.
(@mve_vcmp_q_m_f): Likewise.
(@mve_vcmp_q_m_n_): Likewise.
(@mve_vcmp_q_m_): Likewise.
(@mve_vcmp_q_m_n_f): Likewise.
(mve_vctpq): Likewise.
(mve_vctpq_m): Likewise.
(mve_vcvtaq_): Likewise.
(mve_vcvtaq_m_): Likewise.
(mve_vcvtbq_f16_f32v8hf): Likewise.
(mve_vcvtbq_f32_f16v4sf): Likewise.
(mve_vcvtbq_m_f16_f32v8hf): Likewise.
(mve_vcvtbq_m_f32_f16v4sf): Likewise.
(mve_vcvtmq_): Likewise.
(mve_vcvtmq_m_): Likewise.
(mve_vcvtnq_): Likewise.
(mve_vcvtnq_m_): Likewise.
(mve_vcvtpq_): Likewise.
(mve_vcvtpq_m_): Likewise.
(mve_vcvtq_from_f_): Likewise.
(mve_vcvtq_m_from_f_): Likewise.
(mve_vcvtq_m_n_from_f_): Likewise.
(mve_vcvtq_m_n_to_f_): Likewise.
(mve_vcvtq_m_to_f_): Likewise.
(mve_vcvtq_n_from_f_): Likewise.
(mve_vcvtq_n_to_f_): Likewise.
(mve_vcvtq_to_f_): Likewise.
(mve_vcvttq_f16_f32v8hf): Likewise.
(mve_vcvttq_f32_f16v4sf): Likewise.
(mve_vcvttq_m_f16_f32v8hf): Likewise.
(mve_vcvttq_m_f32_f16v4sf): Likewise.
(mve_vdwdupq_m_wb_u_insn): Likewise.
(mve_vdwdupq_wb_u_insn): Likewise.
(mve_veorq_s>): Likewise.
(mve_veorq_u>): Likewise.
(mve_veorq_f): Likewise.
(mve_vidupq_m_wb_u_insn): Likewise.
(mve_vidupq_u_insn): Likewise.
(mve_viwdupq_m_wb_u_insn): Likewise.
(mve_viwdupq_wb_u_insn): Likewise.
(mve_vldrbq_): Likewise.
(mve_vldrbq_gather_offset_): Likewise.
(mve_vldrbq_gather_offset_z_): Likewise.
(mve_vldrbq_z_): Likewise.
(mve_vldrdq_gather_base_v2di): Likewise.
(mve_vldrdq_gather_base_wb_v2di_insn): Likewise.
(mve_vldrdq_gather_base_wb_z_v2di_insn): Likewise.
(mve_vldrdq_gather_base_z_v2di): Likewise.
(mve_vldrdq_gather_offset_v2di): Likewise.
(mve_vldrdq_gather_offset_z_v2di): Likewise.
(mve_vldrdq_gather_shifted_offset_v2di): Likewise.
(mve_vldrdq_gather_shifted_offset_z_v2di): Likewise.
(mve_vldrhq_): Likewise.
(mve_vldrhq_fv8hf): Likewise.
(mve_vldrhq_gather_offset_): 

[PATCH v4 2/5] doloop: Add support for predicated vectorized loops

2024-02-21 Thread Andre Vieira

This patch adds support in the target agnostic doloop pass for the detection of
predicated vectorized hardware loops.  Arm is currently the only target that
will make use of this feature.

The doloop_condition_get function is used to validate that the 'transformed'
jump instruction is one of the conditions that would be correct for the
canonincal loop format where the loop counter is increased by 1 each time.
The way Arm models predicated vectorized hardware loops transforms the loop
counter to 'step' using the element count for each iteration rather than a step
of one.  This means we had to change the condition test in the jump instruction,
meaning we had to add a different condition we accept in doloop_condition_get.

gcc/ChangeLog:

* df-core.cc (df_bb_regno_only_def_find): New helper function.
* df.h (df_bb_regno_only_def_find): Declare new function.
* loop-doloop.cc (doloop_condition_get): Accept conditions generated by
predicated vectorized hardware loops.
(doloop_modify): Add support for GTU condition checks.
(doloop_optimize): Update costing computation to support alterations to
desc->niter_expr by the backend.

Co-authored-by: Stam Markianos-Wright 

diff --git a/gcc/df-core.cc b/gcc/df-core.cc
index f0eb4c93957..b0e8a88d433 100644
--- a/gcc/df-core.cc
+++ b/gcc/df-core.cc
@@ -1964,6 +1964,21 @@ df_bb_regno_last_def_find (basic_block bb, unsigned int regno)
   return NULL;
 }
 
+/* Return the one and only def of REGNO within BB.  If there is no def or
+   there are multiple defs, return NULL.  */
+
+df_ref
+df_bb_regno_only_def_find (basic_block bb, unsigned int regno)
+{
+  df_ref temp = df_bb_regno_first_def_find (bb, regno);
+  if (!temp)
+return NULL;
+  else if (temp == df_bb_regno_last_def_find (bb, regno))
+return temp;
+  else
+return NULL;
+}
+
 /* Finds the reference corresponding to the definition of REG in INSN.
DF is the dataflow object.  */
 
diff --git a/gcc/df.h b/gcc/df.h
index 84e5aa8b524..c4e690b40cf 100644
--- a/gcc/df.h
+++ b/gcc/df.h
@@ -987,6 +987,7 @@ extern void df_check_cfg_clean (void);
 #endif
 extern df_ref df_bb_regno_first_def_find (basic_block, unsigned int);
 extern df_ref df_bb_regno_last_def_find (basic_block, unsigned int);
+extern df_ref df_bb_regno_only_def_find (basic_block, unsigned int);
 extern df_ref df_find_def (rtx_insn *, rtx);
 extern bool df_reg_defined (rtx_insn *, rtx);
 extern df_ref df_find_use (rtx_insn *, rtx);
diff --git a/gcc/loop-doloop.cc b/gcc/loop-doloop.cc
index 529e810e530..8953e1de960 100644
--- a/gcc/loop-doloop.cc
+++ b/gcc/loop-doloop.cc
@@ -85,10 +85,10 @@ doloop_condition_get (rtx_insn *doloop_pat)
  forms:
 
  1)  (parallel [(set (pc) (if_then_else (condition)
-	  			(label_ref (label))
-(pc)))
-	 (set (reg) (plus (reg) (const_int -1)))
-	 (additional clobbers and uses)])
+	(label_ref (label))
+	(pc)))
+		 (set (reg) (plus (reg) (const_int -1)))
+		 (additional clobbers and uses)])
 
  The branch must be the first entry of the parallel (also required
  by jump.cc), and the second entry of the parallel must be a set of
@@ -96,19 +96,33 @@ doloop_condition_get (rtx_insn *doloop_pat)
  the loop counter in an if_then_else too.
 
  2)  (set (reg) (plus (reg) (const_int -1))
- (set (pc) (if_then_else (reg != 0)
-	 (label_ref (label))
-			 (pc))).  
+	 (set (pc) (if_then_else (reg != 0)
+ (label_ref (label))
+ (pc))).
 
- Some targets (ARM) do the comparison before the branch, as in the
+ 3) Some targets (Arm) do the comparison before the branch, as in the
  following form:
 
- 3) (parallel [(set (cc) (compare ((plus (reg) (const_int -1), 0)))
-   (set (reg) (plus (reg) (const_int -1)))])
-(set (pc) (if_then_else (cc == NE)
-(label_ref (label))
-(pc))) */
-
+ (parallel [(set (cc) (compare (plus (reg) (const_int -1)) 0))
+		(set (reg) (plus (reg) (const_int -1)))])
+ (set (pc) (if_then_else (cc == NE)
+			 (label_ref (label))
+			 (pc)))
+
+  4) This form supports a construct that is used to represent a vectorized
+  do loop with predication, however we do not need to care about the
+  details of the predication here.
+  Arm uses this construct to support MVE tail predication.
+
+  (parallel
+   [(set (pc)
+	 (if_then_else (gtu (plus (reg) (const_int -n))
+(const_int n-1))
+			   (label_ref)
+			   (pc)))
+	(set (reg) (plus (reg) (const_int -n)))
+	(additional clobbers and uses)])
+ */
   pattern = PATTERN (doloop_pat);
 
   if (GET_CODE (pattern) != PARALLEL)
@@ -173,15 +187,17 @@ doloop_condition_get (rtx_insn *doloop_pat)
   if (! REG_P (reg))
 return 0;
 
-  /* Check if something = (plus (reg) (const_int -1)).
+  /* Check if something = (plus (reg) (const_int 

[PATCH v4 0/5] arm: Add support for MVE Tail-Predicated Low Overhead Loops

2024-02-21 Thread Andre Vieira
Hi,

This is a reworked patch series from.  The main differences are a further split
of patches, where:
[1/5] is arm specific and has been approved before,
[2/5] is target agnostic, has had no substantial changes from v3.
[3/5] new arm specific patch that is split from the original last patch and
annotates across lane instructions that are safe for tail predication if their
tail predicated operands are zeroed.
[4/5] new arm specific patch that could be committed indepdent of series to fix
an obvious issue and remove unused unspecs & iterators.
[5/5] reworked last patch refactoring the implicit predication and some other
validity checks.

Original cover letter:
This patch adds support for Arm's MVE Tail Predicated Low Overhead Loop
feature.

The M-class Arm-ARM:
https://developer.arm.com/documentation/ddi0553/bu/?lang=en
Section B5.5.1 "Loop tail predication" describes the feature
we are adding support for with this patch (although
we only add codegen for DLSTP/LETP instruction loops).

Previously with commit d2ed233cb94 we'd added support for
non-MVE DLS/LE loops through the loop-doloop pass, which, given
a standard MVE loop like:

```
void  __attribute__ ((noinline)) test (int16_t *a, int16_t *b, int16_t *c, int 
n)
{
  while (n > 0)
{
  mve_pred16_t p = vctp16q (n);
  int16x8_t va = vldrhq_z_s16 (a, p);
  int16x8_t vb = vldrhq_z_s16 (b, p);
  int16x8_t vc = vaddq_x_s16 (va, vb, p);
  vstrhq_p_s16 (c, vc, p);
  c+=8;
  a+=8;
  b+=8;
  n-=8;
}
}
```
.. would output:

```

dls lr, lr
.L3:
vctp.16 r3
vmrsip, P0  @ movhi
sxthip, ip
vmsr P0, ip @ movhi
mov r4, r0
vpst
vldrht.16   q2, [r4]
mov r4, r1
vmovq3, q0
vpst
vldrht.16   q1, [r4]
mov r4, r2
vpst
vaddt.i16   q3, q2, q1
subsr3, r3, #8
vpst
vstrht.16   q3, [r4]
addsr0, r0, #16
addsr1, r1, #16
addsr2, r2, #16
le  lr, .L3
```

where the LE instruction will decrement LR by 1, compare and
branch if needed.

(there are also other inefficiencies with the above code, like the
pointless vmrs/sxth/vmsr on the VPR and the adds not being merged
into the vldrht/vstrht as a #16 offsets and some random movs!
But that's different problems...)

The MVE version is similar, except that:
* Instead of DLS/LE the instructions are DLSTP/LETP.
* Instead of pre-calculating the number of iterations of the
  loop, we place the number of elements to be processed by the
  loop into LR.
* Instead of decrementing the LR by one, LETP will decrement it
  by FPSCR.LTPSIZE, which is the number of elements being
  processed in each iteration: 16 for 8-bit elements, 5 for 16-bit
  elements, etc.
* On the final iteration, automatic Loop Tail Predication is
  performed, as if the instructions within the loop had been VPT
  predicated with a VCTP generating the VPR predicate in every
  loop iteration.

The dlstp/letp loop now looks like:

```

dlstp.16lr, r3
.L14:
mov r3, r0
vldrh.16q3, [r3]
mov r3, r1
vldrh.16q2, [r3]
mov r3, r2
vadd.i16  q3, q3, q2
addsr0, r0, #16
vstrh.16q3, [r3]
addsr1, r1, #16
addsr2, r2, #16
letplr, .L14

```

Since the loop tail predication is automatic, we have eliminated
the VCTP that had been specified by the user in the intrinsic
and converted the VPT-predicated instructions into their
unpredicated equivalents (which also saves us from VPST insns).

The LE instruction here decrements LR by 8 in each iteration.

Stam Markianos-Wright (1):
  arm: Add define_attr to to create a mapping between MVE predicated and
unpredicated insns

Andre Vieira (4):
  doloop: Add support for predicated vectorized loops
  arm: Annotate instructions with mve_safe_imp_xlane_pred
  arm: Fix a wrong attribute use and remove unused unspecs and iterators
  arm: Add support for MVE Tail-Predicated Low Overhead Loops


-- 
2.17.1


Re: [PATCH 5/5] bpf: renamed coreout.* files to btfext-out.*.

2024-02-21 Thread David Faust



On 2/20/24 02:24, Cupertino Miranda wrote:
> gcc/ChangeLog:
>   * config.gcc (target_gtfiles): changed coreout to btfext-out.
>   (extra_objs): changed coreout to btfext-out.

I think these entries should start with a capital letter ("Changed...").

>   * config/bpf/coreout.cc: Renamed to btfext-out.cc
>   * config/bpf/btfext-out.cc: Added
>   * config/bpf/coreout.h: Renamed to btfext-out.h
>   * config/bpf/btfext-out.h: Added
>   * config/bpf/core-builtins.cc: Changed include
>   * config/bpf/core-builtins.h: Changed include
>   * config/bpf/t-bpf: Renamed file.

But, otherwise OK. Thanks.

> ---
>  gcc/config.gcc   | 4 ++--
>  gcc/config/bpf/{coreout.cc => btfext-out.cc} | 4 ++--
>  gcc/config/bpf/{coreout.h => btfext-out.h}   | 2 +-
>  gcc/config/bpf/core-builtins.cc  | 2 +-
>  gcc/config/bpf/core-builtins.h   | 2 +-
>  gcc/config/bpf/t-bpf | 4 ++--
>  6 files changed, 9 insertions(+), 9 deletions(-)
>  rename gcc/config/bpf/{coreout.cc => btfext-out.cc} (99%)
>  rename gcc/config/bpf/{coreout.h => btfext-out.h} (98%)
> 
> diff --git a/gcc/config.gcc b/gcc/config.gcc
> index a0f9c6723083..1ca033d75b66 100644
> --- a/gcc/config.gcc
> +++ b/gcc/config.gcc
> @@ -1653,8 +1653,8 @@ bpf-*-*)
>  tmake_file="${tmake_file} bpf/t-bpf"
>  use_collect2=no
>  use_gcc_stdint=provide
> -extra_objs="coreout.o core-builtins.o"
> -target_gtfiles="$target_gtfiles \$(srcdir)/config/bpf/coreout.cc 
> \$(srcdir)/config/bpf/core-builtins.cc"
> +extra_objs="btfext-out.o core-builtins.o"
> +target_gtfiles="$target_gtfiles \$(srcdir)/config/bpf/btfext-out.cc 
> \$(srcdir)/config/bpf/core-builtins.cc"
>  ;;
>  cris-*-elf | cris-*-none)
>   tm_file="elfos.h newlib-stdint.h ${tm_file}"
> diff --git a/gcc/config/bpf/coreout.cc b/gcc/config/bpf/btfext-out.cc
> similarity index 99%
> rename from gcc/config/bpf/coreout.cc
> rename to gcc/config/bpf/btfext-out.cc
> index 31b2abc3151b..4281cca83e13 100644
> --- a/gcc/config/bpf/coreout.cc
> +++ b/gcc/config/bpf/btfext-out.cc
> @@ -33,7 +33,7 @@
>  #include "tree-pretty-print.h"
>  #include "cgraph.h"
>  
> -#include "coreout.h"
> +#include "btfext-out.h"
>  
>  /* This file contains data structures and routines for construction and 
> output
> of BPF Compile Once - Run Everywhere (BPF CO-RE) information.
> @@ -618,4 +618,4 @@ btf_ext_output (void)
>dw2_asm_output_data (4, 0, "Required padding by libbpf structs");
>  }
>  
> -#include "gt-coreout.h"
> +#include "gt-btfext-out.h"
> diff --git a/gcc/config/bpf/coreout.h b/gcc/config/bpf/btfext-out.h
> similarity index 98%
> rename from gcc/config/bpf/coreout.h
> rename to gcc/config/bpf/btfext-out.h
> index 1c26b9274739..b36309475c97 100644
> --- a/gcc/config/bpf/coreout.h
> +++ b/gcc/config/bpf/btfext-out.h
> @@ -1,4 +1,4 @@
> -/* coreout.h - Declarations and definitions related to
> +/* btfext-out.h - Declarations and definitions related to
> BPF Compile Once - Run Everywhere (CO-RE) support.
> Copyright (C) 2021-2024 Free Software Foundation, Inc.
>  
> diff --git a/gcc/config/bpf/core-builtins.cc b/gcc/config/bpf/core-builtins.cc
> index aa75fd68cae6..8d8c54c1fb3d 100644
> --- a/gcc/config/bpf/core-builtins.cc
> +++ b/gcc/config/bpf/core-builtins.cc
> @@ -45,7 +45,7 @@ along with GCC; see the file COPYING3.  If not see
>  
>  #include "ctfc.h"
>  #include "btf.h"
> -#include "coreout.h"
> +#include "btfext-out.h"
>  #include "core-builtins.h"
>  
>  /* BPF CO-RE builtins definition.
> diff --git a/gcc/config/bpf/core-builtins.h b/gcc/config/bpf/core-builtins.h
> index c54f6ddac812..e56b55b94e0c 100644
> --- a/gcc/config/bpf/core-builtins.h
> +++ b/gcc/config/bpf/core-builtins.h
> @@ -1,7 +1,7 @@
>  #ifndef BPF_CORE_BUILTINS_H
>  #define BPF_CORE_BUILTINS_H
>  
> -#include "coreout.h"
> +#include "btfext-out.h"
>  
>  enum bpf_builtins
>  {
> diff --git a/gcc/config/bpf/t-bpf b/gcc/config/bpf/t-bpf
> index 18f1fa67794d..dc50332350c4 100644
> --- a/gcc/config/bpf/t-bpf
> +++ b/gcc/config/bpf/t-bpf
> @@ -1,7 +1,7 @@
>  
> -TM_H += $(srcdir)/config/bpf/coreout.h $(srcdir)/config/bpf/core-builtins.h
> +TM_H += $(srcdir)/config/bpf/btfext-out.h 
> $(srcdir)/config/bpf/core-builtins.h
>  
> -coreout.o: $(srcdir)/config/bpf/coreout.cc
> +btfext-out.o: $(srcdir)/config/bpf/btfext-out.cc
>   $(COMPILE) $<
>   $(POSTCOMPILE)
>  


Re: [PATCH 2/5] btf: added KIND_FUNC traversal function.

2024-02-21 Thread David Faust


On 2/20/24 02:24, Cupertino Miranda wrote:
> Added a traversal function to traverse all BTF_KIND_FUNC nodes with a
> callback function. Used for .BTF.ext section content creation.
> 
> gcc/ChangeLog
> 
>   * btfout.cc (output_btf_func_types): use FOR_EACH_VEC_ELT.
>   (traverse_btf_func_types): Defined function.
>   * ctfc.h (funcs_traverse_callback): typedef for function
>   prototype.
>   (traverse_btf_func_types): Added prototype.

OK, thanks.

> ---
>  gcc/btfout.cc | 22 --
>  gcc/ctfc.h|  3 +++
>  2 files changed, 23 insertions(+), 2 deletions(-)
> 
> diff --git a/gcc/btfout.cc b/gcc/btfout.cc
> index 7e114e224449..7aabd99f3e7c 100644
> --- a/gcc/btfout.cc
> +++ b/gcc/btfout.cc
> @@ -1276,8 +1276,10 @@ output_btf_types (ctf_container_ref ctfc)
>  static void
>  output_btf_func_types (ctf_container_ref ctfc)
>  {
> -  for (size_t i = 0; i < vec_safe_length (funcs); i++)
> -btf_asm_func_type (ctfc, (*funcs)[i], i);
> +  ctf_dtdef_ref ref;
> +  unsigned i;
> +  FOR_EACH_VEC_ELT (*funcs, i, ref)
> +btf_asm_func_type (ctfc, ref, i);
>  }
>  
>  /* Output all BTF_KIND_DATASEC records.  */
> @@ -1452,4 +1454,20 @@ btf_finalize (void)
>tu_ctfc = NULL;
>  }
>  
> +/* Traversal function for all BTF_KIND_FUNC type records.  */
> +
> +bool
> +traverse_btf_func_types (funcs_traverse_callback callback, void *data)
> +{
> +  ctf_dtdef_ref ref;
> +  unsigned i;
> +  FOR_EACH_VEC_ELT (*funcs, i, ref)
> +{
> +  bool stop = callback (ref, data);
> +  if (stop == true)
> + return true;
> +}
> +  return false;
> +}
> +
>  #include "gt-btfout.h"
> diff --git a/gcc/ctfc.h b/gcc/ctfc.h
> index 7aac57edac55..fa188bf2f5a4 100644
> --- a/gcc/ctfc.h
> +++ b/gcc/ctfc.h
> @@ -441,6 +441,9 @@ extern int ctf_add_variable (ctf_container_ref, const 
> char *, ctf_id_t,
>  extern ctf_id_t ctf_lookup_tree_type (ctf_container_ref, const tree);
>  extern ctf_id_t get_btf_id (ctf_id_t);
>  
> +typedef bool (*funcs_traverse_callback) (ctf_dtdef_ref, void *);
> +bool traverse_btf_func_types (funcs_traverse_callback, void *);
> +
>  /* CTF section does not emit location information; at this time, location
> information is needed for BTF CO-RE use-cases.  */
>  


Re: Stabilizing flaky libgomp GCN target/offloading testing (was: libgomp GCN gfx1030/gfx1100 offloading status)

2024-02-21 Thread Richard Biener



> Am 21.02.2024 um 13:34 schrieb Thomas Schwinge :
> 
> Hi!
> 
>> On 2024-02-01T15:49:02+0100, Richard Biener  wrote:
>>> On Thu, 1 Feb 2024, Thomas Schwinge wrote:
>>> On 2024-01-26T10:45:10+0100, Richard Biener  wrote:
 On Fri, 26 Jan 2024, Richard Biener wrote:
> On Wed, 24 Jan 2024, Andrew Stubbs wrote:
>> [...] is enough to get gfx1100 working for most purposes, on top of the
>> patch that Tobias committed a week or so ago; there are still some test
>> failures to investigate, and probably some tuning to do.
>> 
>> It might also get gfx1030 working too. @Richi, could you test it,
>> please?
> 
> I can report partial success here.  [...]
>>> 
> I'll followup with a test summary once the (serial) run of libgomp
> testing finished.
>>> 
>>> (Why serial, by the way?)
>> 
>> Just out of caution ... (I'm using the GPU for the desktop at the
>> same time and dmesg gets spammed with some not-so reassuring
>> "errors" during the offloading)
> 
> Yeah, indeed 'dmesg' is full of "notes"...
> 
> However, note that per my work on 
> "libgomp make check time is excessive", all execution testing in libgomp
> is serialized in 'libgomp/testsuite/lib/libgomp.exp:libgomp_load'.  So,
> no problem/difference in that regard, to run parallel
> 'check-target-libgomp'.  (... with the caveat that execution tests for
> effective-targets are *not* governed by that, as I've found yesterday.
> I have a WIP hack for that, too.)
> 
> 
>>> [...] what I
>>> got with '-march=gfx1100' for AMD Radeon RX 7900 XTX.  [...]
> 
>>> [...] execution test FAILs.  Not all FAILs appear all the time [...]
> 
> What disturbs the testing a lot is, that the GPU may get into a bad
> state, upon which any use either fails with a
> 'HSA_STATUS_ERROR_OUT_OF_RESOURCES' error -- or by just hanging, deep in
> 'libhsa-runtime64.so.1'...
> 
> I've now tried to debug the latter case (hang).  When the GPU gets into
> this bad state (whatever exactly that is),
> 'hsa_executable_load_code_object' still returns 'HSA_STATUS_SUCCESS', but
> then GCN target execution ('gcn-run') hangs in 'hsa_executable_freeze'
> vs. GCN offloading execution ('libgomp-plugin-gcn.so.1') hangs right
> before 'hsa_executable_freeze', in the GCN heap setup 'hsa_memory_copy'.
> There it hangs until killed (for example, until DejaGnu's timeout
> mechanism kills the process -- just that the next GPU-using execution
> test then runs into the same thing again...).
> 
> In this state (and also the 'HSA_STATUS_ERROR_OUT_OF_RESOURCES' state),
> we're able to recover via:
> 
>$ flock /tmp/gpu.lock sudo cat /sys/kernel/debug/dri/0/amdgpu_gpu_recover
>0
> 
> This is, obviously, a hack, probably needs a serial lock to not disturb
> other things, has hard-coded 'dri/0', and as I said in
> 
> "GCN RDNA2+ vs. GCC SLP vectorizer":
> 
> | I've no idea what
> | 'amdgpu_gpu_recover' would do if the GPU is also used for display.

It ends up terminating your X session… (there’s some automatic driver recovery 
that’s also sometimes triggered which sounds like the same thing).  I need to 
try using the integrated graphics for X11 to see if that avoids the issue.

Guess AMD needs to improve the driver/runtime (or we - it’s open source at 
least up to the firmware).

Richard 

> However, it's very useful in my testing.  :-|
> 
> The questions is, how to detect the "hang" state without first running
> into a timeout (and disambiguating such a timeout from a user code
> timeout)?  Add a watchdog: call 'alarm([a few seconds])' before device
> initialization, and before the actual GPU kernel launch cancel it with
> 'alarm(0)'?  (..., and add a handler for 'SIGALRM' to print a distinct
> error message that we can then react on, like for
> 'HSA_STATUS_ERROR_OUT_OF_RESOURCES'.)  Probably 'alarm'/'SIGALRM' is a
> no-go in libgomp -- instead, use a helper thread to similarly implement a
> watchdog?  ('libgomp/plugin/plugin-gcn.c' already is using pthreads for
> other purposes.)  Any other clever ideas?  What's a suitable value for
> "a few seconds"?
> 
> 
> Grüße
> Thomas


Re: [PATCH] ARM: Fix conditional execution [PR113915]

2024-02-21 Thread Richard Earnshaw (lists)
On 21/02/2024 14:34, Wilco Dijkstra wrote:
> 
> By default most patterns can be conditionalized on Arm targets.  However
> Thumb-2 predication requires the "predicable" attribute be explicitly
> set to "yes".  Most patterns are shared between Arm and Thumb(-2) and are
> marked with "predicable".  Given this sharing, it does not make sense to
> use a different default for Arm.  So only consider conditional execution
> of instructions that have the predicable attribute set to yes.  This ensures
> that patterns not explicitly marked as such are never accidentally 
> conditionally executed like in the PR.
> 
> GLIBC codesize was ~0.014% worse due to atomic operations now being
> unconditional and a small number of patterns not setting "predicable".
> 
> Passes regress and bootstrap, OK for commit?
> 
> gcc/ChangeLog:
> PR target/113915
> * config/arm/arm.md (NOCOND): Improve comment.
> * config/arm/arm.cc (arm_final_prescan_insn): Add check for
> PREDICABLE_YES.
> 
> gcc/testsuite/ChangeLog:
> PR target/113915
> * gcc.target/arm/builtin-bswap-1.c: Fix test.
> 
> ---
> 
> diff --git a/gcc/config/arm/arm.cc b/gcc/config/arm/arm.cc
> index 
> c44047c377a802d0c1dc1406df1b88a6b079607b..29771d284831a995adcf9adbb525396fbabb1ea2
>  100644
> --- a/gcc/config/arm/arm.cc
> +++ b/gcc/config/arm/arm.cc
> @@ -25610,11 +25610,12 @@ arm_final_prescan_insn (rtx_insn *insn)
> break;
>  
>   case INSN:
> -   /* Instructions using or affecting the condition codes make it
> -  fail.  */
> +   /* Check the instruction is explicitly marked as predicable.
> +  Instructions using or affecting the condition codes are not.  
> */
> scanbody = PATTERN (this_insn);
> if (!(GET_CODE (scanbody) == SET
>   || GET_CODE (scanbody) == PARALLEL)
> +   || get_attr_predicable (this_insn) != PREDICABLE_YES
> || get_attr_conds (this_insn) != CONDS_NOCOND)
>   fail = TRUE;
> break;
> diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
> index 
> 5816409f86f1106b410c5e21d77e599b485f85f2..671f093862259c2c0df93a986fc22fa56a8ea6c7
>  100644
> --- a/gcc/config/arm/arm.md
> +++ b/gcc/config/arm/arm.md
> @@ -307,6 +307,8 @@
>  ;
>  ; NOCOND means that the instruction does not use or alter the condition
>  ;   codes but can be converted into a conditionally exectuted instruction.
> +;   Given that NOCOND is the default for most instructions if omitted,
> +;   the attribute predicable must be set to yes as well.
>  
>  (define_attr "conds" "use,set,clob,unconditional,nocond"
>   (if_then_else

While this is ok, 

> diff --git a/gcc/testsuite/gcc.target/arm/builtin-bswap-1.c 
> b/gcc/testsuite/gcc.target/arm/builtin-bswap-1.c
> index 
> c1e7740d14d3ca4e93a71e38b12f82c19791a204..3de7cea81c1128c2fe5a9e1216e6b027d26bcab9
>  100644
> --- a/gcc/testsuite/gcc.target/arm/builtin-bswap-1.c
> +++ b/gcc/testsuite/gcc.target/arm/builtin-bswap-1.c
> @@ -5,14 +5,8 @@
> of the instructions.  Add an -mtune option known to facilitate that.  */
>  /* { dg-additional-options "-O2 -mtune=cortex-a53" } */
>  /* { dg-final { scan-assembler-not "orr\[ \t\]" } } */
> -/* { dg-final { scan-assembler-times "revsh\\t" 1 { target { arm_nothumb } } 
> } }  */
> -/* { dg-final { scan-assembler-times "revshne\\t" 1 { target { arm_nothumb } 
> } } }  */
> -/* { dg-final { scan-assembler-times "revsh\\t" 2 { target { ! arm_nothumb } 
> } } }  */
> -/* { dg-final { scan-assembler-times "rev16\\t" 1 { target { arm_nothumb } } 
> } }  */
> -/* { dg-final { scan-assembler-times "rev16ne\\t" 1 { target { arm_nothumb } 
> } } }  */
> -/* { dg-final { scan-assembler-times "rev16\\t" 2 { target { ! arm_nothumb } 
> } } }  */
> -/* { dg-final { scan-assembler-times "rev\\t" 2 { target { arm_nothumb } } } 
> }  */
> -/* { dg-final { scan-assembler-times "revne\\t" 2 { target { arm_nothumb } } 
> } }  */
> -/* { dg-final { scan-assembler-times "rev\\t" 4 { target { ! arm_nothumb } } 
> } }  */
> +/* { dg-final { scan-assembler-times "revsh\\t" 2 } }  */
> +/* { dg-final { scan-assembler-times "rev16\\t" 2 } }  */
> +/* { dg-final { scan-assembler-times "rev\\t" 4 } }  */
>  
>  #include "builtin-bswap.x"

This bit isn't.  The correct fix here is to fix the pattern(s) concerned to add 
the missing predicate.

Note that builtin-bswap.x explicitly mentions predicated mnemonics in the 
comments.

R.


Re: [PATCH] libgccjit: Add support for the type bfloat16

2024-02-21 Thread Antoni Boucher
Thanks for the review.
Here's the updated patch.

On Fri, 2023-12-01 at 12:45 -0500, David Malcolm wrote:
> On Thu, 2023-11-16 at 17:20 -0500, Antoni Boucher wrote:
> > I forgot to attach the patch.
> > 
> > On Thu, 2023-11-16 at 17:19 -0500, Antoni Boucher wrote:
> > > Hi.
> > > This patch adds the support for the type bfloat16 (bug 112574).
> > > 
> > > This was asked to be splitted from a another patch sent here:
> > > https://gcc.gnu.org/pipermail/jit/2023q1/001607.html
> > > 
> > > Thanks for the review.
> > 
> 
> Thanks for the patch.
> 
> > diff --git a/gcc/jit/jit-playback.cc b/gcc/jit/jit-playback.cc
> > index 18cc4da25b8..7e1c97a4638 100644
> > --- a/gcc/jit/jit-playback.cc
> > +++ b/gcc/jit/jit-playback.cc
> > @@ -280,6 +280,8 @@ get_tree_node_for_type (enum gcc_jit_types
> > type_)
> >  
> >  case GCC_JIT_TYPE_FLOAT:
> >    return float_type_node;
> > +    case GCC_JIT_TYPE_BFLOAT16:
> > +  return bfloat16_type_node;
> 
> The code to create bfloat16_type_node (in build_common_tree_nodes) is
> guarded by #ifdef HAVE_BFmode, so we should probably have a test for
> this in case GCC_JIT_TYPE_BFLOAT16 to at least add an error message
> when it's NULL_TREE, rather than silently returning NULL_TREE and
> crashing.
> 
> [...]
> 
> > diff --git a/gcc/testsuite/jit.dg/test-bfloat16.c
> > b/gcc/testsuite/jit.dg/test-bfloat16.c
> > new file mode 100644
> > index 000..6aed3920351
> > --- /dev/null
> > +++ b/gcc/testsuite/jit.dg/test-bfloat16.c
> > @@ -0,0 +1,37 @@
> > +/* { dg-do compile { target x86_64-*-* } } */
> > +
> > +#include 
> > +#include 
> > +
> > +#include "libgccjit.h"
> > +
> > +/* We don't want set_options() in harness.h to set -O3 so our
> > little local
> > +   is optimized away. */
> > +#define TEST_ESCHEWS_SET_OPTIONS
> > +static void set_options (gcc_jit_context *ctxt, const char *argv0)
> > +{
> > +}
> 
> 
> Please add a comment to all-non-failing-tests.h noting the exclusion
> of
> this test case from the array.
> 
> [...]
> 
> > diff --git a/gcc/testsuite/jit.dg/test-types.c
> > b/gcc/testsuite/jit.dg/test-types.c
> > index a01944e35fa..9e7c4f3e046 100644
> > --- a/gcc/testsuite/jit.dg/test-types.c
> > +++ b/gcc/testsuite/jit.dg/test-types.c
> > @@ -1,3 +1,4 @@
> > +#include 
> >  #include 
> >  #include 
> >  #include 
> > @@ -492,4 +493,5 @@ verify_code (gcc_jit_context *ctxt,
> > gcc_jit_result *result)
> >  
> >    CHECK_VALUE (gcc_jit_type_get_size (gcc_jit_context_get_type
> > (ctxt, GCC_JIT_TYPE_FLOAT)), sizeof (float));
> >    CHECK_VALUE (gcc_jit_type_get_size (gcc_jit_context_get_type
> > (ctxt, GCC_JIT_TYPE_DOUBLE)), sizeof (double));
> > +  CHECK_VALUE (gcc_jit_type_get_size (gcc_jit_context_get_type
> > (ctxt, GCC_JIT_TYPE_BFLOAT16)), sizeof (__bfloat16));
> 
> 
> This is only going to work on targets which #ifdef HAVE_BFmode, so
> this
> CHECK_VALUE needs to be conditionalized somehow, to avoid having
> this,
> test-combination, and test-threads from bailing out early on targets
> without BFmode.
> 
> Dave
> 

From 3b31bca3a1144a5fa4dc34e402ad3287ccca84dd Mon Sep 17 00:00:00 2001
From: Antoni Boucher 
Date: Thu, 16 Nov 2023 10:59:22 -0500
Subject: [PATCH] libgccjit: Add support for the type bfloat16

gcc/jit/ChangeLog:

	PR jit/112574
	* docs/topics/types.rst: Document GCC_JIT_TYPE_BFLOAT16.
	* jit-common.h: Update NUM_GCC_JIT_TYPES.
	* jit-playback.cc (get_tree_node_for_type): Support bfloat16.
	* jit-recording.cc (recording::memento_of_get_type::get_size,
	recording::memento_of_get_type::dereference,
	recording::memento_of_get_type::is_int,
	recording::memento_of_get_type::is_signed,
	recording::memento_of_get_type::is_float,
	recording::memento_of_get_type::is_bool): Support bfloat16.
	* libgccjit.h (enum gcc_jit_types): Add GCC_JIT_TYPE_BFLOAT16.

gcc/testsuite/ChangeLog:

	PR jit/112574
	* jit.dg/all-non-failing-tests.h: New test test-bfloat16.c.
	* jit.dg/test-types.c: Test GCC_JIT_TYPE_BFLOAT16.
	* jit.dg/test-bfloat16.c: New test.
---
 gcc/jit/docs/topics/types.rst|  2 ++
 gcc/jit/jit-common.h |  2 +-
 gcc/jit/jit-playback.cc  |  6 
 gcc/jit/jit-recording.cc | 11 ++
 gcc/jit/libgccjit.h  |  4 ++-
 gcc/testsuite/jit.dg/all-non-failing-tests.h |  3 ++
 gcc/testsuite/jit.dg/test-bfloat16.c | 37 
 gcc/testsuite/jit.dg/test-types.c|  4 +++
 8 files changed, 67 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/jit.dg/test-bfloat16.c

diff --git a/gcc/jit/docs/topics/types.rst b/gcc/jit/docs/topics/types.rst
index bb51f037b7e..6a7a35280ed 100644
--- a/gcc/jit/docs/topics/types.rst
+++ b/gcc/jit/docs/topics/types.rst
@@ -113,6 +113,8 @@ Standard types
- C99's ``__int128_t``
  * - :c:data:`GCC_JIT_TYPE_FLOAT`
-
+ * - :c:data:`GCC_JIT_TYPE_BFLOAT16`
+   - C's ``__bfloat16``
  * - :c:data:`GCC_JIT_TYPE_DOUBLE`
-
  * - 

[PATCH] ipa: Create indirect call edges also for lambdas

2024-02-21 Thread Martin Jambor
Hi,

Even though we have had code to handle creation of indirect call graph
edges (so that these calls can than be made direct as part of IPA-CP
and inlining and eventually also inlined) for C++ member pointers for
many years, this code expects the member pointers to be structures
passed by value.  In PR 108802 it turned out that for lambdas these
are passed by reference.  This patch adjusts the code for that so that
small lambdas are also inlineable without depending on early inlining.

Bootstrapped and LTO bootstrapped on x86_64-linux.  This is technically
a regression against GCC 10.  OK for master even now?

Thanks,

Martin


gcc/ChangeLog:

2024-02-20  Martin Jambor  

PR ipa/108802
* ipa-prop.cc (ipa_get_stmt_member_ptr_load_param): Also recognize
loads from a pointer parameter.
(ipa_analyze_indirect_call_uses): Likewise.

gcc/testsuite/ChangeLog:

2024-02-20  Martin Jambor  

PR ipa/108802
* g++.dg/ipa/pr108802.C: New test.
---
 gcc/ipa-prop.cc | 56 +
 gcc/testsuite/g++.dg/ipa/pr108802.C | 14 
 2 files changed, 55 insertions(+), 15 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/ipa/pr108802.C

diff --git a/gcc/ipa-prop.cc b/gcc/ipa-prop.cc
index bec0ebd210c..25d252fd57c 100644
--- a/gcc/ipa-prop.cc
+++ b/gcc/ipa-prop.cc
@@ -2514,14 +2514,26 @@ ipa_get_stmt_member_ptr_load_param (gimple *stmt, bool 
use_delta,
   if (TREE_CODE (rhs) != MEM_REF)
 return NULL_TREE;
   rec = TREE_OPERAND (rhs, 0);
-  if (TREE_CODE (rec) != ADDR_EXPR)
-return NULL_TREE;
-  rec = TREE_OPERAND (rec, 0);
-  if (TREE_CODE (rec) != PARM_DECL
-  || !type_like_member_ptr_p (TREE_TYPE (rec), _field, _field))
+  if (TREE_CODE (rec) == ADDR_EXPR)
+{
+  rec = TREE_OPERAND (rec, 0);
+  if (TREE_CODE (rec) != PARM_DECL
+ || !type_like_member_ptr_p (TREE_TYPE (rec), _field,
+ _field))
+   return NULL_TREE;
+}
+  else if (TREE_CODE (rec) == SSA_NAME
+  && SSA_NAME_IS_DEFAULT_DEF (rec))
+{
+  if (TREE_CODE (SSA_NAME_VAR (rec)) != PARM_DECL
+ || !type_like_member_ptr_p (TREE_TYPE (TREE_TYPE (rec)), _field,
+ _field))
+   return NULL_TREE;
+}
+  else
 return NULL_TREE;
-  ref_offset = TREE_OPERAND (rhs, 1);
 
+  ref_offset = TREE_OPERAND (rhs, 1);
   if (use_delta)
 fld = delta_field;
   else
@@ -2757,17 +2769,31 @@ ipa_analyze_indirect_call_uses (struct 
ipa_func_body_info *fbi, gcall *call,
   if (rec != rec2)
 return;
 
-  index = ipa_get_param_decl_index (info, rec);
-  if (index >= 0
-  && parm_preserved_before_stmt_p (fbi, index, call, rec))
+  if (TREE_CODE (rec) == SSA_NAME)
 {
-  struct cgraph_edge *cs = ipa_note_param_call (fbi->node, index,
-   call, false);
-  cs->indirect_info->offset = offset;
-  cs->indirect_info->agg_contents = 1;
-  cs->indirect_info->member_ptr = 1;
-  cs->indirect_info->guaranteed_unmodified = 1;
+  index = ipa_get_param_decl_index (info, SSA_NAME_VAR (rec));
+  if (index < 0
+ || !parm_ref_data_preserved_p (fbi, index, call,
+gimple_assign_rhs1 (def)))
+   return;
+  by_ref = true;
 }
+  else
+{
+  index = ipa_get_param_decl_index (info, rec);
+  if (index < 0
+ || !parm_preserved_before_stmt_p (fbi, index, call, rec))
+   return;
+  by_ref = false;
+}
+
+  struct cgraph_edge *cs = ipa_note_param_call (fbi->node, index,
+   call, false);
+  cs->indirect_info->offset = offset;
+  cs->indirect_info->agg_contents = 1;
+  cs->indirect_info->member_ptr = 1;
+  cs->indirect_info->by_ref = by_ref;
+  cs->indirect_info->guaranteed_unmodified = 1;
 
   return;
 }
diff --git a/gcc/testsuite/g++.dg/ipa/pr108802.C 
b/gcc/testsuite/g++.dg/ipa/pr108802.C
new file mode 100644
index 000..2e2b6c66b64
--- /dev/null
+++ b/gcc/testsuite/g++.dg/ipa/pr108802.C
@@ -0,0 +1,14 @@
+/* { dg-do compile  } */
+/* { dg-options "-O2 -std=c++14 -fdump-ipa-inline -fno-early-inlining"  } */
+/* { dg-add-options bind_pic_locally } */
+
+struct A {
+int interesting(int x) { return 2 * x; }
+};
+
+int f1() {
+A a;
+return [&](auto&& f) { return (a.*f)(42); } (::interesting);
+}
+
+/* { dg-final { scan-ipa-dump "A::interesting\[^\\n\]*inline copy in int f1"  
"inline"  } } */
-- 
2.43.0



[PATCH] ARM: Fix conditional execution [PR113915]

2024-02-21 Thread Wilco Dijkstra

By default most patterns can be conditionalized on Arm targets.  However
Thumb-2 predication requires the "predicable" attribute be explicitly
set to "yes".  Most patterns are shared between Arm and Thumb(-2) and are
marked with "predicable".  Given this sharing, it does not make sense to
use a different default for Arm.  So only consider conditional execution
of instructions that have the predicable attribute set to yes.  This ensures
that patterns not explicitly marked as such are never accidentally 
conditionally executed like in the PR.

GLIBC codesize was ~0.014% worse due to atomic operations now being
unconditional and a small number of patterns not setting "predicable".

Passes regress and bootstrap, OK for commit?

gcc/ChangeLog:
PR target/113915
* config/arm/arm.md (NOCOND): Improve comment.
* config/arm/arm.cc (arm_final_prescan_insn): Add check for
PREDICABLE_YES.

gcc/testsuite/ChangeLog:
PR target/113915
* gcc.target/arm/builtin-bswap-1.c: Fix test.

---

diff --git a/gcc/config/arm/arm.cc b/gcc/config/arm/arm.cc
index 
c44047c377a802d0c1dc1406df1b88a6b079607b..29771d284831a995adcf9adbb525396fbabb1ea2
 100644
--- a/gcc/config/arm/arm.cc
+++ b/gcc/config/arm/arm.cc
@@ -25610,11 +25610,12 @@ arm_final_prescan_insn (rtx_insn *insn)
  break;
 
case INSN:
- /* Instructions using or affecting the condition codes make it
-fail.  */
+ /* Check the instruction is explicitly marked as predicable.
+Instructions using or affecting the condition codes are not.  
*/
  scanbody = PATTERN (this_insn);
  if (!(GET_CODE (scanbody) == SET
|| GET_CODE (scanbody) == PARALLEL)
+ || get_attr_predicable (this_insn) != PREDICABLE_YES
  || get_attr_conds (this_insn) != CONDS_NOCOND)
fail = TRUE;
  break;
diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index 
5816409f86f1106b410c5e21d77e599b485f85f2..671f093862259c2c0df93a986fc22fa56a8ea6c7
 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -307,6 +307,8 @@
 ;
 ; NOCOND means that the instruction does not use or alter the condition
 ;   codes but can be converted into a conditionally exectuted instruction.
+;   Given that NOCOND is the default for most instructions if omitted,
+;   the attribute predicable must be set to yes as well.
 
 (define_attr "conds" "use,set,clob,unconditional,nocond"
(if_then_else
diff --git a/gcc/testsuite/gcc.target/arm/builtin-bswap-1.c 
b/gcc/testsuite/gcc.target/arm/builtin-bswap-1.c
index 
c1e7740d14d3ca4e93a71e38b12f82c19791a204..3de7cea81c1128c2fe5a9e1216e6b027d26bcab9
 100644
--- a/gcc/testsuite/gcc.target/arm/builtin-bswap-1.c
+++ b/gcc/testsuite/gcc.target/arm/builtin-bswap-1.c
@@ -5,14 +5,8 @@
of the instructions.  Add an -mtune option known to facilitate that.  */
 /* { dg-additional-options "-O2 -mtune=cortex-a53" } */
 /* { dg-final { scan-assembler-not "orr\[ \t\]" } } */
-/* { dg-final { scan-assembler-times "revsh\\t" 1 { target { arm_nothumb } } } 
}  */
-/* { dg-final { scan-assembler-times "revshne\\t" 1 { target { arm_nothumb } } 
} }  */
-/* { dg-final { scan-assembler-times "revsh\\t" 2 { target { ! arm_nothumb } } 
} }  */
-/* { dg-final { scan-assembler-times "rev16\\t" 1 { target { arm_nothumb } } } 
}  */
-/* { dg-final { scan-assembler-times "rev16ne\\t" 1 { target { arm_nothumb } } 
} }  */
-/* { dg-final { scan-assembler-times "rev16\\t" 2 { target { ! arm_nothumb } } 
} }  */
-/* { dg-final { scan-assembler-times "rev\\t" 2 { target { arm_nothumb } } } } 
 */
-/* { dg-final { scan-assembler-times "revne\\t" 2 { target { arm_nothumb } } } 
}  */
-/* { dg-final { scan-assembler-times "rev\\t" 4 { target { ! arm_nothumb } } } 
}  */
+/* { dg-final { scan-assembler-times "revsh\\t" 2 } }  */
+/* { dg-final { scan-assembler-times "rev16\\t" 2 } }  */
+/* { dg-final { scan-assembler-times "rev\\t" 4 } }  */
 
 #include "builtin-bswap.x"



[COMMITTED 4/9] gccrs: expand: Fix formatting for "macro not found" error

2024-02-21 Thread arthur . cohen
From: Arthur Cohen 

gcc/rust/ChangeLog:

* expand/rust-macro-expand.h (struct MacroExpander): Nitpick: fix
formatting of emitted error.
---
 gcc/rust/expand/rust-macro-expand.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/gcc/rust/expand/rust-macro-expand.h 
b/gcc/rust/expand/rust-macro-expand.h
index f18e8e24a1d..896cdc6dcc8 100644
--- a/gcc/rust/expand/rust-macro-expand.h
+++ b/gcc/rust/expand/rust-macro-expand.h
@@ -414,7 +414,7 @@ struct MacroExpander
   = mappings->lookup_derive_proc_macro_invocation (path);
 if (!macro.has_value ())
   {
-   rust_error_at (path.get_locus (), "Macro not found");
+   rust_error_at (path.get_locus (), "macro not found");
return AST::Fragment::create_error ();
   }
 
@@ -437,7 +437,7 @@ struct MacroExpander
   = mappings->lookup_bang_proc_macro_invocation (invocation);
 if (!macro.has_value ())
   {
-   rust_error_at (invocation.get_locus (), "Macro not found");
+   rust_error_at (invocation.get_locus (), "macro not found");
return AST::Fragment::create_error ();
   }
 
@@ -459,7 +459,7 @@ struct MacroExpander
   = mappings->lookup_attribute_proc_macro_invocation (path);
 if (!macro.has_value ())
   {
-   rust_error_at (path.get_locus (), "Macro not found");
+   rust_error_at (path.get_locus (), "macro not found");
return AST::Fragment::create_error ();
   }
 
-- 
2.42.1



[COMMITTED 9/9] Update copyright years.

2024-02-21 Thread arthur . cohen
From: Sahil Yeole 

Signed-off-by: Sahil Yeole 
---
 gcc/rust/ast/rust-ast-builder.cc  | 2 +-
 gcc/rust/ast/rust-ast-builder.h   | 2 +-
 gcc/rust/ast/rust-ast-collector.cc| 2 +-
 gcc/rust/ast/rust-ast-collector.h | 2 +-
 gcc/rust/ast/rust-ast-formatting.cc   | 2 +-
 gcc/rust/ast/rust-ast-formatting.h| 2 +-
 gcc/rust/ast/rust-ast-visitor.cc  | 2 +-
 gcc/rust/ast/rust-path.cc | 2 +-
 gcc/rust/ast/rust-pattern.cc  | 2 +-
 gcc/rust/backend/rust-mangle-legacy.cc| 2 +-
 gcc/rust/backend/rust-mangle-v0.cc| 2 +-
 gcc/rust/backend/rust-mangle.cc   | 2 +-
 gcc/rust/checks/errors/borrowck/rust-bir-builder-expr-stmt.cc | 2 +-
 gcc/rust/checks/errors/borrowck/rust-bir-builder-expr-stmt.h  | 2 +-
 gcc/rust/checks/errors/borrowck/rust-bir-builder-internal.h   | 2 +-
 .../checks/errors/borrowck/rust-bir-builder-lazyboolexpr.h| 2 +-
 gcc/rust/checks/errors/borrowck/rust-bir-builder-pattern.h| 2 +-
 gcc/rust/checks/errors/borrowck/rust-bir-builder-struct.h | 2 +-
 gcc/rust/checks/errors/borrowck/rust-bir-builder.h| 2 +-
 gcc/rust/checks/errors/borrowck/rust-bir-dump.h   | 2 +-
 gcc/rust/checks/errors/borrowck/rust-bir-place.h  | 2 +-
 gcc/rust/checks/errors/borrowck/rust-bir-visitor.h| 2 +-
 gcc/rust/checks/errors/borrowck/rust-bir.h| 2 +-
 gcc/rust/checks/errors/borrowck/rust-borrow-checker.cc| 2 +-
 gcc/rust/checks/errors/borrowck/rust-borrow-checker.h | 2 +-
 gcc/rust/checks/errors/borrowck/rust-function-collector.h | 2 +-
 gcc/rust/checks/errors/rust-ast-validation.cc | 2 +-
 gcc/rust/checks/errors/rust-ast-validation.h  | 2 +-
 gcc/rust/checks/errors/rust-readonly-check.cc | 2 +-
 gcc/rust/checks/errors/rust-readonly-check.h  | 2 +-
 gcc/rust/expand/rust-derive-clone.cc  | 2 +-
 gcc/rust/expand/rust-derive-clone.h   | 2 +-
 gcc/rust/expand/rust-derive-copy.cc   | 2 +-
 gcc/rust/expand/rust-derive-copy.h| 2 +-
 gcc/rust/expand/rust-derive.cc| 2 +-
 gcc/rust/expand/rust-derive.h | 2 +-
 gcc/rust/expand/rust-expand-visitor.cc| 2 +-
 gcc/rust/expand/rust-expand-visitor.h | 2 +-
 gcc/rust/expand/rust-macro-invoc-lexer.cc | 2 +-
 gcc/rust/expand/rust-macro-substitute-ctx.cc  | 2 +-
 gcc/rust/expand/rust-proc-macro-invoc-lexer.cc| 2 +-
 gcc/rust/expand/rust-proc-macro-invoc-lexer.h | 2 +-
 gcc/rust/hir/rust-ast-lower-implitem.cc   | 2 +-
 gcc/rust/hir/rust-ast-lower-stmt.cc   | 2 +-
 gcc/rust/lex/rust-input-source.h  | 2 +-
 gcc/rust/resolve/rust-default-resolver.cc | 2 +-
 gcc/rust/resolve/rust-default-resolver.h  | 2 +-
 gcc/rust/resolve/rust-early-name-resolver-2.0.cc  | 2 +-
 gcc/rust/resolve/rust-early-name-resolver-2.0.h   | 2 +-
 gcc/rust/resolve/rust-forever-stack.h | 2 +-
 gcc/rust/resolve/rust-forever-stack.hxx   | 2 +-
 gcc/rust/resolve/rust-late-name-resolver-2.0.cc   | 2 +-
 gcc/rust/resolve/rust-late-name-resolver-2.0.h| 2 +-
 gcc/rust/resolve/rust-name-resolution-context.cc  | 2 +-
 gcc/rust/resolve/rust-name-resolution-context.h   | 2 +-
 gcc/rust/resolve/rust-rib.cc  | 2 +-
 gcc/rust/resolve/rust-rib.h   | 2 +-
 gcc/rust/resolve/rust-toplevel-name-resolver-2.0.cc   | 2 +-
 gcc/rust/resolve/rust-toplevel-name-resolver-2.0.h| 2 +-
 gcc/rust/rust-attribs.cc  | 2 +-
 gcc/rust/rust-error-codes.def | 2 +-
 gcc/rust/util/bi-map.h| 2 +-
 gcc/rust/util/make-rust-unicode.py| 4 ++--
 gcc/rust/util/rust-attribute-values.h | 2 +-
 gcc/rust/util/rust-dir-owner.cc   | 2 +-
 gcc/rust/util/rust-dir-owner.h| 2 +-
 gcc/rust/util/rust-dump.h | 2 +-
 gcc/rust/util/rust-keyword-values.cc  | 2 +-
 gcc/rust/util/rust-keyword-values.h   | 2 +-
 gcc/rust/util/rust-punycode.cc| 2 +-
 gcc/rust/util/rust-punycode.h   

[COMMITTED 5/9] gccrs: Add testcase for #[rustc_const_stable]

2024-02-21 Thread arthur . cohen
From: Arthur Cohen 

To ensure we don't introduce regressions back to issue #2314

gcc/testsuite/ChangeLog:

* rust/compile/rustc_const_stable.rs: New test.
---
 gcc/testsuite/rust/compile/rustc_const_stable.rs | 2 ++
 1 file changed, 2 insertions(+)
 create mode 100644 gcc/testsuite/rust/compile/rustc_const_stable.rs

diff --git a/gcc/testsuite/rust/compile/rustc_const_stable.rs 
b/gcc/testsuite/rust/compile/rustc_const_stable.rs
new file mode 100644
index 000..9208b1ab3b6
--- /dev/null
+++ b/gcc/testsuite/rust/compile/rustc_const_stable.rs
@@ -0,0 +1,2 @@
+#[rustc_const_stable(feature = "const_ascii_ctype_on_intrinsics", since = 
"1.47.0")]
+pub fn foo() {} // { dg-error "macro not found" "" { target *-*-* } .-1 }
-- 
2.42.1



[COMMITTED 7/9] gccrs: Fix lookup of TuplePattern sub-pattern types

2024-02-21 Thread arthur . cohen
From: Owen Avery 

gcc/rust/ChangeLog:

* backend/rust-compile-pattern.cc
(CompilePatternLet::visit):
Lookup type of sub-pattern, not tuple pattern itself.

gcc/testsuite/ChangeLog:

* rust/compile/issue-2847-b.rs: New test.

Signed-off-by: Owen Avery 
---
 gcc/rust/backend/rust-compile-pattern.cc   | 12 ++--
 gcc/testsuite/rust/compile/issue-2847-b.rs |  4 
 2 files changed, 10 insertions(+), 6 deletions(-)
 create mode 100644 gcc/testsuite/rust/compile/issue-2847-b.rs

diff --git a/gcc/rust/backend/rust-compile-pattern.cc 
b/gcc/rust/backend/rust-compile-pattern.cc
index 4fa611f5383..1a32f02c3ea 100644
--- a/gcc/rust/backend/rust-compile-pattern.cc
+++ b/gcc/rust/backend/rust-compile-pattern.cc
@@ -678,8 +678,8 @@ CompilePatternLet::visit (HIR::TuplePattern )
for (auto  : items_lower)
  {
TyTy::BaseType *ty_sub = nullptr;
-   HirId pattern_id = pattern.get_mappings ().get_hirid ();
-   bool ok = ctx->get_tyctx ()->lookup_type (pattern_id, _sub);
+   HirId sub_id = sub->get_mappings ().get_hirid ();
+   bool ok = ctx->get_tyctx ()->lookup_type (sub_id, _sub);
rust_assert (ok);
 
tree sub_init
@@ -697,8 +697,8 @@ CompilePatternLet::visit (HIR::TuplePattern )
for (auto  : items_upper)
  {
TyTy::BaseType *ty_sub = nullptr;
-   HirId pattern_id = pattern.get_mappings ().get_hirid ();
-   bool ok = ctx->get_tyctx ()->lookup_type (pattern_id, _sub);
+   HirId sub_id = sub->get_mappings ().get_hirid ();
+   bool ok = ctx->get_tyctx ()->lookup_type (sub_id, _sub);
rust_assert (ok);
 
tree sub_init
@@ -719,8 +719,8 @@ CompilePatternLet::visit (HIR::TuplePattern )
for (auto  : items.get_patterns ())
  {
TyTy::BaseType *ty_sub = nullptr;
-   HirId pattern_id = pattern.get_mappings ().get_hirid ();
-   bool ok = ctx->get_tyctx ()->lookup_type (pattern_id, _sub);
+   HirId sub_id = sub->get_mappings ().get_hirid ();
+   bool ok = ctx->get_tyctx ()->lookup_type (sub_id, _sub);
rust_assert (ok);
 
tree sub_init
diff --git a/gcc/testsuite/rust/compile/issue-2847-b.rs 
b/gcc/testsuite/rust/compile/issue-2847-b.rs
new file mode 100644
index 000..ab2614210fc
--- /dev/null
+++ b/gcc/testsuite/rust/compile/issue-2847-b.rs
@@ -0,0 +1,4 @@
+pub fn test() -> i32 {
+let (a, _) = (1, 2);
+a
+}
-- 
2.42.1



[COMMITTED 1/9] gccrs: Fix typo in RegionConstraints instance

2024-02-21 Thread arthur . cohen
From: Arthur Cohen 

gcc/rust/ChangeLog:

* typecheck/rust-hir-type-check-implitem.h: Fix typo in field
(region_costraints -> region_constraints).
---
 gcc/rust/typecheck/rust-hir-type-check-implitem.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/rust/typecheck/rust-hir-type-check-implitem.h 
b/gcc/rust/typecheck/rust-hir-type-check-implitem.h
index 4d178440775..541b7280c1b 100644
--- a/gcc/rust/typecheck/rust-hir-type-check-implitem.h
+++ b/gcc/rust/typecheck/rust-hir-type-check-implitem.h
@@ -97,7 +97,7 @@ private:
   HIR::ImplBlock *parent;
   TyTy::BaseType *self;
   std::vector substitutions;
-  TyTy::RegionConstraints region_costraints;
+  TyTy::RegionConstraints region_constraints;
 };
 
 } // namespace Resolver
-- 
2.42.1



[COMMITTED 8/9] gccrs: Add variadic check on function params

2024-02-21 Thread arthur . cohen
From: 0xn4utilus 

gcc/rust/ChangeLog:

* checks/errors/rust-ast-validation.cc (ASTValidation::visit):
Add variadic check on all parameters.

gcc/testsuite/ChangeLog:

* rust/compile/issue-2850.rs: New test.

Signed-off-by: 0xn4utilus 
---
 gcc/rust/checks/errors/rust-ast-validation.cc | 12 
 gcc/testsuite/rust/compile/issue-2850.rs  | 17 +
 2 files changed, 25 insertions(+), 4 deletions(-)
 create mode 100644 gcc/testsuite/rust/compile/issue-2850.rs

diff --git a/gcc/rust/checks/errors/rust-ast-validation.cc 
b/gcc/rust/checks/errors/rust-ast-validation.cc
index d57b7cb70fe..fcba57d0a92 100644
--- a/gcc/rust/checks/errors/rust-ast-validation.cc
+++ b/gcc/rust/checks/errors/rust-ast-validation.cc
@@ -132,10 +132,14 @@ ASTValidation::visit (AST::Function )
rust_error_at (function.get_locus (), "free function without a body");
 }
 
-  if (function.is_variadic ())
-rust_error_at (
-  function.get_function_params ().back ()->get_locus (),
-  "only foreign or % functions may be C-variadic");
+  auto _params = function.get_function_params ();
+  for (auto it = function_params.begin (); it != function_params.end (); it++)
+{
+  if (it->get ()->is_variadic ())
+   rust_error_at (it->get ()->get_locus (),
+  "only foreign or % functions may "
+  "be C-variadic");
+}
 
   AST::ContextualASTVisitor::visit (function);
 }
diff --git a/gcc/testsuite/rust/compile/issue-2850.rs 
b/gcc/testsuite/rust/compile/issue-2850.rs
new file mode 100644
index 000..62cbe0f6524
--- /dev/null
+++ b/gcc/testsuite/rust/compile/issue-2850.rs
@@ -0,0 +1,17 @@
+fn myfun0(...,_:i32) {}
+// { dg-error "only foreign or .unsafe extern \"C\". functions may be 
C-variadic" "" { target *-*-* } .-1 }
+
+fn myfun1(a:i32,...,_:i32) {}
+// { dg-error "only foreign or .unsafe extern \"C\". functions may be 
C-variadic" "" { target *-*-* } .-1 }
+
+struct z {
+x: f64,
+y: f64,
+}
+
+impl z {
+fn new(x: f64, ..., y: f64) -> z {
+// { dg-error "only foreign or .unsafe extern \"C\". functions may be 
C-variadic" "" { target *-*-* } .-1 }
+z { x: x, y: y }
+}
+}
\ No newline at end of file
-- 
2.42.1



[COMMITTED 6/9] gccrs: add powi intrinsics

2024-02-21 Thread arthur . cohen
From: Marc Poulhiès 

gcc/rust/ChangeLog:

* backend/rust-builtins.cc
(BuiltinsContext::register_rust_mappings): Add powi and reformat.
* backend/rust-builtins.h: Add missing copyright header.

gcc/testsuite/ChangeLog:

* rust/compile/torture/intrinsics-math.rs: Adjust pow test, add
test for powi.

Signed-off-by: Marc Poulhiès 
---
 gcc/rust/backend/rust-builtins.cc | 30 +--
 gcc/rust/backend/rust-builtins.h  |  2 ++
 .../rust/compile/torture/intrinsics-math.rs   | 12 ++--
 3 files changed, 39 insertions(+), 5 deletions(-)

diff --git a/gcc/rust/backend/rust-builtins.cc 
b/gcc/rust/backend/rust-builtins.cc
index 1a87f869206..282ecba70cb 100644
--- a/gcc/rust/backend/rust-builtins.cc
+++ b/gcc/rust/backend/rust-builtins.cc
@@ -285,46 +285,70 @@ void
 BuiltinsContext::register_rust_mappings ()
 {
   rust_intrinsic_to_gcc_builtin = {
-{"sinf32", "__builtin_sinf"},
-{"sqrtf32", "__builtin_sqrtf"},
-{"sqrtf64", "__builtin_sqrt"},
 {"unreachable", "__builtin_unreachable"},
 {"abort", "__builtin_abort"},
+
+// Math intrinsics
+{"sqrtf32", "__builtin_sqrtf"},
+{"sqrtf64", "__builtin_sqrt"},
+
+{"sinf32", "__builtin_sinf"},
 {"sinf64", "__builtin_sin"},
+
 {"cosf32", "__builtin_cosf"},
 {"cosf64", "__builtin_cos"},
+
 {"powf32", "__builtin_powf"},
 {"powf64", "__builtin_pow"},
+
+{"powif32", "__builtin_powif"},
+{"powif64", "__builtin_powi"},
+
 {"expf32", "__builtin_expf"},
 {"expf64", "__builtin_exp"},
+
 {"exp2f32", "__builtin_exp2f"},
 {"exp2f64", "__builtin_exp2"},
+
 {"logf32", "__builtin_logf"},
 {"logf64", "__builtin_log"},
+
 {"log10f32", "__builtin_log10f"},
 {"log10f64", "__builtin_log10"},
+
 {"log2f32", "__builtin_log2f"},
 {"log2f64", "__builtin_log2"},
+
 {"fmaf32", "__builtin_fmaf"},
 {"fmaf64", "__builtin_fma"},
+
 {"fabsf32", "__builtin_fabsf"},
 {"fabsf64", "__builtin_fabs"},
+
 {"minnumf32", "__builtin_fminf"},
 {"minnumf64", "__builtin_fmin"},
+
 {"maxnumf32", "__builtin_fmaxf"},
 {"maxnumf64", "__builtin_fmax"},
+
 {"copysignf32", "__builtin_copysignf"},
 {"copysignf64", "__builtin_copysign"},
+
 {"floorf32", "__builtin_floorf"},
 {"floorf64", "__builtin_floor"},
+
 {"ceilf32", "__builtin_ceilf"},
 {"ceilf64", "__builtin_ceil"},
+
 {"truncf32", "__builtin_truncf"},
 {"truncf64", "__builtin_trunc"},
+
 {"rintf32", "__builtin_rintf"},
 {"rintf64", "__builtin_rint"},
+
 {"nearbyintf32", "__builtin_nearbyintf"},
 {"nearbyintf64", "__builtin_nearbyint"},
+
 {"roundf32", "__builtin_roundf"},
 {"roundf64", "__builtin_round"},
   };
diff --git a/gcc/rust/backend/rust-builtins.h b/gcc/rust/backend/rust-builtins.h
index 5052edad51e..65dd850e3c6 100644
--- a/gcc/rust/backend/rust-builtins.h
+++ b/gcc/rust/backend/rust-builtins.h
@@ -1,3 +1,5 @@
+// Copyright (C) 2020-2024 Free Software Foundation, Inc.
+
 // This file is part of GCC.
 
 // GCC is free software; you can redistribute it and/or modify it under
diff --git a/gcc/testsuite/rust/compile/torture/intrinsics-math.rs 
b/gcc/testsuite/rust/compile/torture/intrinsics-math.rs
index 42acdde1494..e0d15f99d48 100644
--- a/gcc/testsuite/rust/compile/torture/intrinsics-math.rs
+++ b/gcc/testsuite/rust/compile/torture/intrinsics-math.rs
@@ -14,6 +14,9 @@ extern "rust-intrinsic" {
 pub fn powf32(a: f32, x: f32) -> f32;
 pub fn powf64(a: f64, x: f64) -> f64;
 
+pub fn powif32(a: f32, x: i32) -> f32;
+pub fn powif64(a: f64, x: i32) -> f64;
+
 pub fn expf32(x: f32) -> f32;
 pub fn expf64(x: f64) -> f64;
 
@@ -84,9 +87,14 @@ fn main() {
 // { dg-final { scan-tree-dump-times {(?n)f64 .* __builtin_cos.? 
\(.*6\.0e\+0\);$} 1 original } }
 
 f32 = powf32(7f32, 8f32);
-// { dg-final { scan-tree-dump-times {(?n)f32 .* __builtin_pow. 
\(.*7\.0e\+0, .*8\.0e\+0\);$} 1 original } }
+// { dg-final { scan-tree-dump-times {(?n)f32 .* __builtin_pow[^i] 
\(.*7\.0e\+0, .*8\.0e\+0\);$} 1 original } }
 f64 = powf64(9f64, 10f64);
-// { dg-final { scan-tree-dump-times {(?n)f64 .* __builtin_pow.? 
\(.*9\.0e\+0, .*1\.0e\+1\);$} 1 original } }
+// { dg-final { scan-tree-dump-times {(?n)f64 .* __builtin_pow[^i]? 
\(.*9\.0e\+0, .*1\.0e\+1\);$} 1 original } }
+
+f32 = powif32(7f32, 8i32);
+// { dg-final { scan-tree-dump-times {(?n)f32 .* __builtin_powi. 
\(.*7\.0e\+0, .*8\);$} 1 original } }
+f64 = powif64(9f64, 10i32);
+// { dg-final { scan-tree-dump-times {(?n)f64 .* __builtin_powi.? 
\(.*9\.0e\+0, .*10\);$} 1 original } }
 
 f32 = expf32(11f32);
 // { dg-final { scan-tree-dump-times {(?n)f32 .* __builtin_exp. 
\(.*1\.1e\+1\);$} 1 original } }
-- 
2.42.1



[COMMITTED 3/9] gccrs: Fix rebinding imports

2024-02-21 Thread arthur . cohen
From: Owen Avery 

gcc/rust/ChangeLog:

* resolve/rust-ast-resolve-item.cc
(flatten_glob): Use Import class.
(flatten_rebind): Likewise.
(flatten_list): Likewise.
(flatten): Likewise.
(flatten_use_dec_to_paths): Likewise.
(flatten_use_dec_to_imports): Likewise.
(ResolveItem::visit): Likewise.
(Import::add_prefix): New.
(rust_flatten_nested_glob): Adjust test.
(rust_flatten_glob): Likewise.
(rust_flatten_rebind_none): Likewise.
(rust_flatten_rebind): Likewise.
(rust_flatten_rebind_nested): Likewise.
(rust_flatten_list): Likewise.
* resolve/rust-ast-resolve-item.h
(class Import): New.

gcc/testsuite/ChangeLog:

* rust/compile/use_2.rs: New test.

Signed-off-by: Owen Avery 
---
 gcc/rust/resolve/rust-ast-resolve-item.cc | 179 +++---
 gcc/rust/resolve/rust-ast-resolve-item.h  |  23 +++
 gcc/testsuite/rust/compile/use_2.rs   |   7 +
 3 files changed, 116 insertions(+), 93 deletions(-)
 create mode 100644 gcc/testsuite/rust/compile/use_2.rs

diff --git a/gcc/rust/resolve/rust-ast-resolve-item.cc 
b/gcc/rust/resolve/rust-ast-resolve-item.cc
index e69b945407c..743657bc421 100644
--- a/gcc/rust/resolve/rust-ast-resolve-item.cc
+++ b/gcc/rust/resolve/rust-ast-resolve-item.cc
@@ -824,33 +824,30 @@ ResolveItem::resolve_extern_item (AST::ExternalItem *item)
 }
 
 static void
-flatten_glob (const AST::UseTreeGlob ,
- std::vector );
+flatten_glob (const AST::UseTreeGlob , std::vector );
 static void
-flatten_rebind (const AST::UseTreeRebind ,
-   std::vector );
+flatten_rebind (const AST::UseTreeRebind , std::vector );
 static void
-flatten_list (const AST::UseTreeList ,
- std::vector );
+flatten_list (const AST::UseTreeList , std::vector );
 
 static void
-flatten (const AST::UseTree *tree, std::vector )
+flatten (const AST::UseTree *tree, std::vector )
 {
   switch (tree->get_kind ())
 {
   case AST::UseTree::Glob: {
auto glob = static_cast (tree);
-   flatten_glob (*glob, paths);
+   flatten_glob (*glob, imports);
break;
   }
   case AST::UseTree::Rebind: {
auto rebind = static_cast (tree);
-   flatten_rebind (*rebind, paths);
+   flatten_rebind (*rebind, imports);
break;
   }
   case AST::UseTree::List: {
auto list = static_cast (tree);
-   flatten_list (*list, paths);
+   flatten_list (*list, imports);
break;
   }
   break;
@@ -858,36 +855,28 @@ flatten (const AST::UseTree *tree, 
std::vector )
 }
 
 static void
-flatten_glob (const AST::UseTreeGlob , std::vector 
)
+flatten_glob (const AST::UseTreeGlob , std::vector )
 {
   if (glob.has_path ())
-paths.emplace_back (glob.get_path ());
+imports.emplace_back (glob.get_path (), true, std::string ());
 }
 
 static void
-flatten_rebind (const AST::UseTreeRebind ,
-   std::vector )
+flatten_rebind (const AST::UseTreeRebind , std::vector )
 {
   auto path = rebind.get_path ();
-  if (rebind.has_path ())
-paths.emplace_back (path);
 
-  // FIXME: Do we want to emplace the rebind here as well?
+  std::string label;
   if (rebind.has_identifier ())
-{
-  auto rebind_path = path;
-  auto new_seg = rebind.get_identifier ();
-
-  // Add the identifier as a new path
-  rebind_path.get_segments ().back ()
-   = AST::SimplePathSegment (new_seg.as_string (), UNDEF_LOCATION);
+label = rebind.get_identifier ().as_string ();
+  else
+label = path.get_final_segment ().as_string ();
 
-  paths.emplace_back (rebind_path);
-}
+  imports.emplace_back (path, false, label);
 }
 
 static void
-flatten_list (const AST::UseTreeList , std::vector 
)
+flatten_list (const AST::UseTreeList , std::vector )
 {
   auto prefix = AST::SimplePath::create_empty ();
   if (list.has_path ())
@@ -895,21 +884,25 @@ flatten_list (const AST::UseTreeList , 
std::vector )
 
   for (const auto  : list.get_trees ())
 {
-  auto sub_paths = std::vector ();
-  flatten (tree.get (), sub_paths);
+  // append imports to the main list, then modify them in-place
+  auto start_idx = imports.size ();
+  flatten (tree.get (), imports);
 
-  for (auto _path : sub_paths)
-   {
- auto new_path = prefix;
- std::copy (sub_path.get_segments ().begin (),
-sub_path.get_segments ().end (),
-std::back_inserter (new_path.get_segments ()));
-
- paths.emplace_back (new_path);
-   }
+  for (auto import = imports.begin () + start_idx; import != imports.end 
();
+  import++)
+   import->add_prefix (prefix);
 }
 }
 
+void
+Import::add_prefix (AST::SimplePath prefix)
+{
+  AST::SimplePath old_path (std::move (path));
+  path = std::move (prefix);
+  std::move (old_path.get_segments ().begin (), old_path.get_segments ().end 
(),
+std::back_inserter 

[COMMITTED 2/9] gccrs: Add testcase for matches!() macro

2024-02-21 Thread arthur . cohen
From: Arthur Cohen 

This adds a testcase for issue #2129.

gcc/testsuite/ChangeLog:

* rust/execute/torture/matches_macro.rs: New test.
---
 .../rust/execute/torture/matches_macro.rs | 30 +++
 1 file changed, 30 insertions(+)
 create mode 100644 gcc/testsuite/rust/execute/torture/matches_macro.rs

diff --git a/gcc/testsuite/rust/execute/torture/matches_macro.rs 
b/gcc/testsuite/rust/execute/torture/matches_macro.rs
new file mode 100644
index 000..7b61570727d
--- /dev/null
+++ b/gcc/testsuite/rust/execute/torture/matches_macro.rs
@@ -0,0 +1,30 @@
+macro_rules! matches {
+($expression:expr, $($pattern:pat)|+ $( if $guard:expr ),*) => {
+match $expression {
+$($pattern)|+ => true,
+_ => false,
+}
+}
+}
+
+pub fn should_match() -> bool {
+matches!(1, 1)
+}
+
+pub fn shouldnt() -> bool {
+matches!(1, 2)
+}
+
+fn main() -> i32 {
+let mut retval = 2;
+
+if should_match() {
+retval -= 1;
+}
+
+if !shouldnt() {
+retval -= 1;
+}
+
+retval
+}
-- 
2.42.1



[PATCHSET] Update Rust frontend 21/02/2024

2024-02-21 Thread arthur . cohen
Hey everyone,

Here is the latest patchset of the Rust frontend, taken directly from our 
development branch.

The patches have been built, tested and bootstrapped on x86-64.

[COMMITTED 1/9] gccrs: Fix typo in RegionConstraints instance
[COMMITTED 2/9] gccrs: Add testcase for matches!() macro
[COMMITTED 3/9] gccrs: Fix rebinding imports
[COMMITTED 4/9] gccrs: expand: Fix formatting for "macro not found"
[COMMITTED 5/9] gccrs: Add testcase for #[rustc_const_stable]
[COMMITTED 6/9] gccrs: add powi intrinsics
[COMMITTED 7/9] gccrs: Fix lookup of TuplePattern sub-pattern types
[COMMITTED 8/9] gccrs: Add variadic check on function params
[COMMITTED 9/9] Update copyright years.

Best,

Arthur



Stabilizing flaky libgomp GCN target/offloading testing (was: libgomp GCN gfx1030/gfx1100 offloading status)

2024-02-21 Thread Thomas Schwinge
Hi!

On 2024-02-01T15:49:02+0100, Richard Biener  wrote:
> On Thu, 1 Feb 2024, Thomas Schwinge wrote:
>> On 2024-01-26T10:45:10+0100, Richard Biener  wrote:
>> > On Fri, 26 Jan 2024, Richard Biener wrote:
>> >> On Wed, 24 Jan 2024, Andrew Stubbs wrote:
>> >> > [...] is enough to get gfx1100 working for most purposes, on top of the
>> >> > patch that Tobias committed a week or so ago; there are still some test
>> >> > failures to investigate, and probably some tuning to do.
>> >> > 
>> >> > It might also get gfx1030 working too. @Richi, could you test it,
>> >> > please?
>> >> 
>> >> I can report partial success here.  [...]
>> 
>> >> I'll followup with a test summary once the (serial) run of libgomp
>> >> testing finished.
>> 
>> (Why serial, by the way?)
>
> Just out of caution ... (I'm using the GPU for the desktop at the
> same time and dmesg gets spammed with some not-so reassuring
> "errors" during the offloading)

Yeah, indeed 'dmesg' is full of "notes"...

However, note that per my work on 
"libgomp make check time is excessive", all execution testing in libgomp
is serialized in 'libgomp/testsuite/lib/libgomp.exp:libgomp_load'.  So,
no problem/difference in that regard, to run parallel
'check-target-libgomp'.  (... with the caveat that execution tests for
effective-targets are *not* governed by that, as I've found yesterday.
I have a WIP hack for that, too.)


>> [...] what I
>> got with '-march=gfx1100' for AMD Radeon RX 7900 XTX.  [...]

>> [...] execution test FAILs.  Not all FAILs appear all the time [...]

What disturbs the testing a lot is, that the GPU may get into a bad
state, upon which any use either fails with a
'HSA_STATUS_ERROR_OUT_OF_RESOURCES' error -- or by just hanging, deep in
'libhsa-runtime64.so.1'...

I've now tried to debug the latter case (hang).  When the GPU gets into
this bad state (whatever exactly that is),
'hsa_executable_load_code_object' still returns 'HSA_STATUS_SUCCESS', but
then GCN target execution ('gcn-run') hangs in 'hsa_executable_freeze'
vs. GCN offloading execution ('libgomp-plugin-gcn.so.1') hangs right
before 'hsa_executable_freeze', in the GCN heap setup 'hsa_memory_copy'.
There it hangs until killed (for example, until DejaGnu's timeout
mechanism kills the process -- just that the next GPU-using execution
test then runs into the same thing again...).

In this state (and also the 'HSA_STATUS_ERROR_OUT_OF_RESOURCES' state),
we're able to recover via:

$ flock /tmp/gpu.lock sudo cat /sys/kernel/debug/dri/0/amdgpu_gpu_recover
0

This is, obviously, a hack, probably needs a serial lock to not disturb
other things, has hard-coded 'dri/0', and as I said in

"GCN RDNA2+ vs. GCC SLP vectorizer":

| I've no idea what
| 'amdgpu_gpu_recover' would do if the GPU is also used for display.

However, it's very useful in my testing.  :-|

The questions is, how to detect the "hang" state without first running
into a timeout (and disambiguating such a timeout from a user code
timeout)?  Add a watchdog: call 'alarm([a few seconds])' before device
initialization, and before the actual GPU kernel launch cancel it with
'alarm(0)'?  (..., and add a handler for 'SIGALRM' to print a distinct
error message that we can then react on, like for
'HSA_STATUS_ERROR_OUT_OF_RESOURCES'.)  Probably 'alarm'/'SIGALRM' is a
no-go in libgomp -- instead, use a helper thread to similarly implement a
watchdog?  ('libgomp/plugin/plugin-gcn.c' already is using pthreads for
other purposes.)  Any other clever ideas?  What's a suitable value for
"a few seconds"?


Grüße
 Thomas


[pushed] aarch64: Remove duplicated call

2024-02-21 Thread Richard Sandiford
I noticed while working on another patch that we had a duplicated
call to aarch64_process_target_attr.

Tested on aarch64-linux-gnu & pushed.

Richard


gcc/
* config/aarch64/aarch64.cc (aarch64_option_valid_attribute_p):
Remove duplicated call.
---
 gcc/config/aarch64/aarch64.cc | 1 -
 1 file changed, 1 deletion(-)

diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index f9cedd31bc1..8a3aa98a32e 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -19542,7 +19542,6 @@ aarch64_option_valid_attribute_p (tree fndecl, tree, 
tree args, int)
 cl_target_option_restore (_options, _options_set,
  TREE_TARGET_OPTION (target_option_current_node));
 
-  ret = aarch64_process_target_attr (args);
   ret = aarch64_process_target_attr (args);
   if (ret)
 {
-- 
2.25.1



[pushed] aarch64: More SME vs -mtrack-speculation

2024-02-21 Thread Richard Sandiford
The sequence to commit a lazy save includes a branch based on
whether TPIDR2_EL0 is zero.  The code assumed that CBZ could
be used for this, but that instruction is forbidden when
-mtrack-speculation is being used.

Tested on aarch64-linux-gnu & pushed.

Richard


gcc/
* config/aarch64/aarch64.cc (aarch64_mode_emit_local_sme_state):
Use aarch64_gen_compare_zero_and_branch rather than emitting
a CBZ directly.

gcc/testsuite/
* gcc.target/aarch64/sme/locally_streaming_1_ts.c: New test.
* gcc.target/aarch64/sme/sibcall_7_ts.c: Likewise.
---
 gcc/config/aarch64/aarch64.cc | 3 ++-
 gcc/testsuite/gcc.target/aarch64/sme/locally_streaming_1_ts.c | 3 +++
 gcc/testsuite/gcc.target/aarch64/sme/sibcall_7_ts.c   | 3 +++
 3 files changed, 8 insertions(+), 1 deletion(-)
 create mode 100644 
gcc/testsuite/gcc.target/aarch64/sme/locally_streaming_1_ts.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/sibcall_7_ts.c

diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 8a3aa98a32e..28d1555dada 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -29344,7 +29344,8 @@ aarch64_mode_emit_local_sme_state 
(aarch64_local_sme_state mode,
   auto tmp_reg = gen_reg_rtx (DImode);
   emit_insn (gen_aarch64_read_tpidr2 (tmp_reg));
   auto label = gen_label_rtx ();
-  auto jump = emit_jump_insn (gen_aarch64_cbeqdi1 (tmp_reg, label));
+  rtx branch = aarch64_gen_compare_zero_and_branch (EQ, tmp_reg, label);
+  auto jump = emit_jump_insn (branch);
   JUMP_LABEL (jump) = label;
   emit_insn (gen_aarch64_tpidr2_save ());
   emit_insn (gen_aarch64_clear_tpidr2 ());
diff --git a/gcc/testsuite/gcc.target/aarch64/sme/locally_streaming_1_ts.c 
b/gcc/testsuite/gcc.target/aarch64/sme/locally_streaming_1_ts.c
new file mode 100644
index 000..25ac345a6dd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sme/locally_streaming_1_ts.c
@@ -0,0 +1,3 @@
+// { dg-options "-O -fomit-frame-pointer -mtrack-speculation" }
+
+#include "locally_streaming_1.c"
diff --git a/gcc/testsuite/gcc.target/aarch64/sme/sibcall_7_ts.c 
b/gcc/testsuite/gcc.target/aarch64/sme/sibcall_7_ts.c
new file mode 100644
index 000..4b47678bf00
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sme/sibcall_7_ts.c
@@ -0,0 +1,3 @@
+// { dg-options "-O2 -mtrack-speculation" }
+
+#include "sibcall_7.c"
-- 
2.25.1



[pushed] aarch64: Fix sibcalls involving shared-ZT0 functions

2024-02-21 Thread Richard Sandiford
In:

  void bar() __arm_inout("za");
  void foo() __arm_inout("za", "zt0") { bar(); }

foo cannot tail-call bar because foo needs to restore ZT0 after
the call.  I'd forgotten to update the ok_for_sibcall rules
to handle this when adding SME2.

Thanks to Sander de Smalen for the spot.

Tested on aarch64-linux-gnu & pushed.

Richard


gcc/
* config/aarch64/aarch64.cc (aarch64_function_ok_for_sibcall):
Check that each individual piece of state is shared in the same
way, rather than using an aggregate check for PSTATE.ZA.

gcc/testsuite/
* gcc.target/aarch64/sme/sibcall_9.c: New test.
---
 gcc/config/aarch64/aarch64.cc |  6 ++-
 .../gcc.target/aarch64/sme/sibcall_9.c| 51 +++
 2 files changed, 55 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/sibcall_9.c

diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index de746e28ca5..f9cedd31bc1 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -6334,8 +6334,10 @@ aarch64_function_ok_for_sibcall (tree, tree exp)
   tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
   if (aarch64_fntype_pstate_sm (fntype) & ~aarch64_cfun_incoming_pstate_sm ())
 return false;
-  if (aarch64_fntype_pstate_za (fntype) != aarch64_cfun_incoming_pstate_za ())
-return false;
+  for (auto state : { "za", "zt0" })
+if (bool (aarch64_cfun_shared_flags (state))
+   != bool (aarch64_fntype_shared_flags (fntype, state)))
+  return false;
   return true;
 }
 
diff --git a/gcc/testsuite/gcc.target/aarch64/sme/sibcall_9.c 
b/gcc/testsuite/gcc.target/aarch64/sme/sibcall_9.c
new file mode 100644
index 000..2e133c881a8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sme/sibcall_9.c
@@ -0,0 +1,51 @@
+// { dg-options "-O2 -fno-schedule-insns -fno-schedule-insns2" }
+// { dg-final { check-function-bodies "**" "" } }
+
+#pragma GCC target "+sme2"
+
+void gen_zt0() __arm_preserves("za") __arm_out("zt0");
+void callee() __arm_inout("za");
+
+/*
+** caller_inout:
+** ...
+** str zt0, \[[^\n]+\]
+** bl  callee
+** ldr zt0, \[[^\n]+\]
+** ...
+** ret
+*/
+void caller_inout() __arm_inout("za", "zt0") { callee(); }
+
+/*
+** caller_in:
+** ...
+** str zt0, \[[^\n]+\]
+** bl  callee
+** ldr zt0, \[[^\n]+\]
+** ...
+** ret
+*/
+void caller_in() __arm_inout("za") __arm_in("zt0") { callee(); }
+
+/*
+** caller_out:
+** ...
+** str zt0, \[[^\n]+\]
+** bl  callee
+** ldr zt0, \[[^\n]+\]
+** ...
+** ret
+*/
+void caller_out() __arm_inout("za") __arm_in("zt0") { gen_zt0(); callee(); }
+
+/*
+** caller_preserves:
+** ...
+** str zt0, \[[^\n]+\]
+** bl  callee
+** ldr zt0, \[[^\n]+\]
+** ...
+** ret
+*/
+void caller_preserves() __arm_inout("za") __arm_preserves("zt0") { callee(); }
-- 
2.25.1



[PATCH] aarch64: Ensure ZT0 is zeroed in a new-ZT0 function

2024-02-21 Thread Richard Sandiford
ACLE guarantees that a function like:

  __arm_new("zt0") foo() { ... }

will start with ZT0 equal to zero.  I'd forgotten to enforce that
after commiting a lazy save.  After such a save, we should zero
ZA iff the function has ZA state and zero ZT0 iff the function
has ZT0 state.

Tested on aarch64-linux-gnu & pushed.

Richard


gcc/
* config/aarch64/aarch64.cc (aarch64_mode_emit_local_sme_state):
In the code that commits a lazy save, only zero ZA if the function
has ZA state.  Similarly zero ZT0 if the function has ZT0 state.

gcc/testsuite/
* gcc.target/aarch64/sme/zt0_state_5.c (test3): Expect ZT0 rather
than ZA to be zeroed.
(test5): Remove zeroing of ZA.
---
 gcc/config/aarch64/aarch64.cc  | 8 +++-
 gcc/testsuite/gcc.target/aarch64/sme/zt0_state_5.c | 3 +--
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index ed7fbca512b..de746e28ca5 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -29338,6 +29338,7 @@ aarch64_mode_emit_local_sme_state 
(aarch64_local_sme_state mode,
 bl __arm_tpidr2_save
 msr tpidr2_el0, xzr
 zero { za }   // Only if ZA is live
+zero { zt0 }  // Only if ZT0 is live
 no_save:  */
   auto tmp_reg = gen_reg_rtx (DImode);
   emit_insn (gen_aarch64_read_tpidr2 (tmp_reg));
@@ -29348,7 +29349,12 @@ aarch64_mode_emit_local_sme_state 
(aarch64_local_sme_state mode,
   emit_insn (gen_aarch64_clear_tpidr2 ());
   if (mode == aarch64_local_sme_state::ACTIVE_LIVE
  || mode == aarch64_local_sme_state::ACTIVE_DEAD)
-   emit_insn (gen_aarch64_initial_zero_za ());
+   {
+ if (aarch64_cfun_has_state ("za"))
+   emit_insn (gen_aarch64_initial_zero_za ());
+ if (aarch64_cfun_has_state ("zt0"))
+   emit_insn (gen_aarch64_sme_zero_zt0 ());
+   }
   emit_label (label);
 }
 
diff --git a/gcc/testsuite/gcc.target/aarch64/sme/zt0_state_5.c 
b/gcc/testsuite/gcc.target/aarch64/sme/zt0_state_5.c
index 0fba21868ed..2e008463aec 100644
--- a/gcc/testsuite/gcc.target/aarch64/sme/zt0_state_5.c
+++ b/gcc/testsuite/gcc.target/aarch64/sme/zt0_state_5.c
@@ -54,7 +54,7 @@ __arm_new("zt0") int test3()
 ** cbz x0, [^\n]+
 ** bl  __arm_tpidr2_save
 ** msr tpidr2_el0, xzr
-** zero{ za }
+** zero{ zt0 }
 ** smstart za
 ** bl  in_zt0
 ** smstop  za
@@ -102,7 +102,6 @@ __arm_new("zt0") void test5()
 ** cbz x0, [^\n]+
 ** bl  __arm_tpidr2_save
 ** msr tpidr2_el0, xzr
-** zero{ za }
 ** smstart za
 ** bl  out_zt0
 ** ...
-- 
2.25.1



[pushed] aarch64: Remove the aarch64_commit_lazy_save pattern

2024-02-21 Thread Richard Sandiford
The main purpose of the aarch64_commit_lazy_save pattern
was to defer insertion of a half-diamond until splitting,
since splitting knew how to create the associated basic blocks.

However, the fix for PR113220 means that mode-switching also
knows how to do that.  This patch therefore removes the pattern
and emits the subinstructions directly.

On its own, this is actually a slight regression, since it
means we keep an unnecessary zero { za }.  But the cases
where that happens are wrong for a different reason, and this
patch is a prerequisite to fixing it.

Tested on aarch64-linux-gnu & pushed.

Richard


gcc/
* config/aarch64/aarch64-sme.md (aarch64_commit_lazy_save): Remove,
directly inserting the associated sequence
* config/aarch64/aarch64.cc (aarch64_mode_emit_local_sme_state):
...here instead.

gcc/testsuite/
* gcc.target/aarch64/sme/zt0_state_5.c (test3, test5): Expect
zero { za }s.
---
 gcc/config/aarch64/aarch64-sme.md | 45 ---
 gcc/config/aarch64/aarch64.cc | 13 --
 .../gcc.target/aarch64/sme/zt0_state_5.c  |  2 +
 3 files changed, 11 insertions(+), 49 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-sme.md 
b/gcc/config/aarch64/aarch64-sme.md
index 81d941871ac..c95d4aa696c 100644
--- a/gcc/config/aarch64/aarch64-sme.md
+++ b/gcc/config/aarch64/aarch64-sme.md
@@ -455,51 +455,6 @@ (define_insn "aarch64_end_private_za_call"
   [(set_attr "type" "no_insn")]
 )
 
-;; This pseudo-instruction is emitted before a private-ZA function uses
-;; PSTATE.ZA state for the first time.  The instruction checks whether
-;; ZA currently contains data belonging to a caller and commits the
-;; lazy save if so.
-;;
-;; Operand 0 is the incoming value of TPIDR2_EL0.  Operand 1 is nonzero
-;; if ZA is live, and should therefore be zeroed after committing a save.
-;;
-;; The instruction is generated by the mode-switching pass.  It is a
-;; define_insn_and_split rather than a define_expand because of the
-;; internal control flow.
-(define_insn_and_split "aarch64_commit_lazy_save"
-  [(set (reg:DI ZA_FREE_REGNUM)
-   (unspec:DI [(match_operand 0 "pmode_register_operand" "r")
-   (match_operand 1 "const_int_operand")
-   (reg:DI SME_STATE_REGNUM)
-   (reg:DI TPIDR2_SETUP_REGNUM)
-   (reg:VNx16QI ZA_REGNUM)] UNSPEC_COMMIT_LAZY_SAVE))
-   (set (reg:DI ZA_REGNUM)
-   (unspec:DI [(reg:DI SME_STATE_REGNUM)
-   (reg:DI ZA_FREE_REGNUM)] UNSPEC_INITIAL_ZERO_ZA))
-   (clobber (reg:DI R14_REGNUM))
-   (clobber (reg:DI R15_REGNUM))
-   (clobber (reg:DI R16_REGNUM))
-   (clobber (reg:DI R17_REGNUM))
-   (clobber (reg:DI R18_REGNUM))
-   (clobber (reg:DI R30_REGNUM))
-   (clobber (reg:CC CC_REGNUM))]
-  ""
-  "#"
-  "true"
-  [(const_int 0)]
-  {
-auto label = gen_label_rtx ();
-auto jump = emit_jump_insn (gen_aarch64_cbeqdi1 (operands[0], label));
-JUMP_LABEL (jump) = label;
-emit_insn (gen_aarch64_tpidr2_save ());
-emit_insn (gen_aarch64_clear_tpidr2 ());
-if (INTVAL (operands[1]) != 0)
-  emit_insn (gen_aarch64_initial_zero_za ());
-emit_label (label);
-DONE;
-  }
-)
-
 ;; =
 ;; == Loads, stores and moves
 ;; =
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 6a39ed8eddf..ed7fbca512b 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -29339,12 +29339,17 @@ aarch64_mode_emit_local_sme_state 
(aarch64_local_sme_state mode,
 msr tpidr2_el0, xzr
 zero { za }   // Only if ZA is live
 no_save:  */
-  bool is_active = (mode == aarch64_local_sme_state::ACTIVE_LIVE
-   || mode == aarch64_local_sme_state::ACTIVE_DEAD);
   auto tmp_reg = gen_reg_rtx (DImode);
-  auto active_flag = gen_int_mode (is_active, DImode);
   emit_insn (gen_aarch64_read_tpidr2 (tmp_reg));
-  emit_insn (gen_aarch64_commit_lazy_save (tmp_reg, active_flag));
+  auto label = gen_label_rtx ();
+  auto jump = emit_jump_insn (gen_aarch64_cbeqdi1 (tmp_reg, label));
+  JUMP_LABEL (jump) = label;
+  emit_insn (gen_aarch64_tpidr2_save ());
+  emit_insn (gen_aarch64_clear_tpidr2 ());
+  if (mode == aarch64_local_sme_state::ACTIVE_LIVE
+ || mode == aarch64_local_sme_state::ACTIVE_DEAD)
+   emit_insn (gen_aarch64_initial_zero_za ());
+  emit_label (label);
 }
 
   if (mode == aarch64_local_sme_state::ACTIVE_LIVE
diff --git a/gcc/testsuite/gcc.target/aarch64/sme/zt0_state_5.c 
b/gcc/testsuite/gcc.target/aarch64/sme/zt0_state_5.c
index e18b395476c..0fba21868ed 100644
--- a/gcc/testsuite/gcc.target/aarch64/sme/zt0_state_5.c
+++ b/gcc/testsuite/gcc.target/aarch64/sme/zt0_state_5.c
@@ -54,6 +54,7 @@ __arm_new("zt0") int test3()
 ** cbz

[pushed] aarch64: Stack-clash prologues and VG saves [PR113995]

2024-02-21 Thread Richard Sandiford
This patch fixes an ICE for a combination of:

- -fstack-clash-protection
- a frame that has SVE save slots
- a frame that has no GPR save slots
- a frame that has a VG save slot

The allocation code was folding the SVE save slot allocation into
the initial frame allocation, so that we had one allocation of
size  + 16.  But the VG save code itself
expected the allocations to remain separate, since it wants to
store at a constant offset from SP or FP.

The VG save isn't shrink-wrapped and so acts as a probe of the
initial allocations.  It should therefore be safe to keep separate
allocations in this case.

The scans in locally_streaming_1.c expect no stack clash protection,
so the patch forces that and adds a separate compile-only test for
when protection is enabled.

Tested on aarch64-linux-gnu & pushed.

Richard


gcc/
PR target/113995
* config/aarch64/aarch64.cc (aarch64_expand_prologue): Don't
fold the SVE allocation into the initial allocation if the
initial allocation includes a VG save.

gcc/testsuite/
PR target/113995
* gcc.target/aarch64/sme/locally_streaming_1.c: Require
-fno-stack-clash-protection.
* gcc.target/aarch64/sme/locally_streaming_1_scp.c: New test.
---
 gcc/config/aarch64/aarch64.cc| 9 +++--
 .../gcc.target/aarch64/sme/locally_streaming_1.c | 2 +-
 .../gcc.target/aarch64/sme/locally_streaming_1_scp.c | 3 +++
 3 files changed, 11 insertions(+), 3 deletions(-)
 create mode 100644 
gcc/testsuite/gcc.target/aarch64/sme/locally_streaming_1_scp.c

diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 104f7e1831e..6a39ed8eddf 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -9523,7 +9523,9 @@ aarch64_expand_prologue (void)
   if (aarch64_cfun_enables_pstate_sm ())
 force_isa_mode = AARCH64_FL_SM_ON;
 
-  if (flag_stack_clash_protection && known_eq (callee_adjust, 0))
+  if (flag_stack_clash_protection
+  && known_eq (callee_adjust, 0)
+  && known_lt (frame.reg_offset[VG_REGNUM], 0))
 {
   /* Fold the SVE allocation into the initial allocation.
 We don't do this in aarch64_layout_arg to avoid pessimizing
@@ -9651,7 +9653,10 @@ aarch64_expand_prologue (void)
   if (maybe_ne (sve_callee_adjust, 0))
 {
   gcc_assert (!flag_stack_clash_protection
- || known_eq (initial_adjust, 0));
+ || known_eq (initial_adjust, 0)
+ /* The VG save isn't shrink-wrapped and so serves as
+a probe of the initial allocation.  */
+ || known_eq (frame.reg_offset[VG_REGNUM], bytes_below_sp));
   aarch64_allocate_and_probe_stack_space (tmp1_rtx, tmp0_rtx,
  sve_callee_adjust,
  force_isa_mode,
diff --git a/gcc/testsuite/gcc.target/aarch64/sme/locally_streaming_1.c 
b/gcc/testsuite/gcc.target/aarch64/sme/locally_streaming_1.c
index 4bb637f4781..cb235f5c832 100644
--- a/gcc/testsuite/gcc.target/aarch64/sme/locally_streaming_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sme/locally_streaming_1.c
@@ -1,4 +1,4 @@
-// { dg-options "-O -fomit-frame-pointer" }
+// { dg-options "-O -fomit-frame-pointer -fno-stack-clash-protection" }
 // { dg-final { check-function-bodies "**" "" } }
 
 void consume_za () [[arm::streaming, arm::inout("za")]];
diff --git a/gcc/testsuite/gcc.target/aarch64/sme/locally_streaming_1_scp.c 
b/gcc/testsuite/gcc.target/aarch64/sme/locally_streaming_1_scp.c
new file mode 100644
index 000..6b7f47dce7a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sme/locally_streaming_1_scp.c
@@ -0,0 +1,3 @@
+// { dg-options "-O -fomit-frame-pointer -fstack-clash-protection" }
+
+#include "locally_streaming_1.c"
-- 
2.25.1



Re: [PATCH] Allow mode-switching to introduce internal loops [PR113220]

2024-02-21 Thread Jakub Jelinek
On Wed, Feb 21, 2024 at 10:03:17AM +, Richard Sandiford wrote:
> In this PR, the SME mode-switching code needs to insert a stack-probe
> loop for an alloca.  This patch allows the target to do that.
> 
> There are two parts to it: allowing loops for insertions in blocks,
> and allowing them for insertions on edges.  The former can be handled
> entirely within mode-switching itself, by recording which blocks have
> had new branches inserted.  The latter requires an extension to
> commit_one_edge_insertion.
> 
> I think the extension to commit_one_edge_insertion makes logical sense,
> since it already explicitly allows internal loops during RTL expansion.
> The single-block find_sub_basic_blocks is a relatively recent addition,
> so wouldn't have been available when the code was originally written.
> 
> The patch also has a small and obvious fix to make the aarch64 emit
> hook cope with labels.
> 
> I've added specific -fstack-clash-protection versions of all
> aarch64-sme.exp tests that previously failed because of this bug.
> I've also added -fno-stack-clash-protection to the original versions
> of these tests if they contain scans that assume no protection.
> 
> Tested on aarch64-linux-gnu.  OK to install?
> 
> Richard
> 
> 
> gcc/
>   PR target/113220
>   * cfgrtl.cc (commit_one_edge_insertion): Handle sequences that
>   contain jumps even if called after initial RTL expansion.
>   * mode-switching.cc: Include cfgbuild.h.
>   (optimize_mode_switching): Allow the sequence returned by the
>   emit hook to contain internal jumps.  Record which blocks
>   contain such jumps and split the blocks at the end.
>   * config/aarch64/aarch64.cc (aarch64_mode_emit): Check for
>   non-debug insns when scanning the sequence.

LGTM.

Jakub



[PATCH] c++: Revert deferring emission of inline variables [PR114013]

2024-02-21 Thread Nathaniel Shead
My earlier patch appears to have caused some regressions. I've taken a
quick look to see if there are obvious workarounds, but given the time
frame and the fact that I still don't really understand all the details
of how and when symbols get emitted, I felt it was safer to revert the
non-modules parts of this change instead.

Bootstrapped and regtested on x86_64-pc-linux-gnu, OK for trunk?

-- >8 --

This is a (partial) reversion of r14-8987-gdd9d14f7d53 to return to
eagerly emitting inline variables to the middle-end when they are
declared. 'import_export_decl' will still continue to accept them, as
allowing this is a pure extension and doesn't seem to cause issues with
modules, but otherwise deferring the emission of inline variables
appears to cause issues on some targets and prevents some code using
inline variable templates from correctly linking.

There might be a more targetted way to fix this, but due to the
complexity of handling linkage I'd prefer to wait till GCC 15 to explore
our options.

PR c++/113970
PR c++/114013

gcc/cp/ChangeLog:

* decl.cc (make_rtl_for_nonlocal_decl): Don't defer inline
variables.
* decl2.cc (import_export_decl): Only support inline variables
imported from a module.

gcc/testsuite/ChangeLog:

* g++.dg/cpp1z/inline-var10.C: New test.
---
 gcc/cp/decl.cc|  4 ---
 gcc/cp/decl2.cc   |  6 +++--
 gcc/testsuite/g++.dg/cpp1z/inline-var10.C | 33 +++
 3 files changed, 37 insertions(+), 6 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/cpp1z/inline-var10.C

diff --git a/gcc/cp/decl.cc b/gcc/cp/decl.cc
index e47f694e4e5..d19d09adde4 100644
--- a/gcc/cp/decl.cc
+++ b/gcc/cp/decl.cc
@@ -7954,10 +7954,6 @@ make_rtl_for_nonlocal_decl (tree decl, tree init, const 
char* asmspec)
   && DECL_IMPLICIT_INSTANTIATION (decl))
 defer_p = 1;
 
-  /* Defer vague-linkage variables.  */
-  if (DECL_INLINE_VAR_P (decl))
-defer_p = 1;
-
   /* If we're not deferring, go ahead and assemble the variable.  */
   if (!defer_p)
 rest_of_decl_compilation (decl, toplev, at_eof);
diff --git a/gcc/cp/decl2.cc b/gcc/cp/decl2.cc
index 1dddbaab38b..24a9332ccb1 100644
--- a/gcc/cp/decl2.cc
+++ b/gcc/cp/decl2.cc
@@ -3362,7 +3362,7 @@ import_export_decl (tree decl)
 
  * inline functions
 
- * inline variables
+ * inline variables (from modules)
 
  * implicit instantiations of static data members of class
templates
@@ -3385,7 +3385,9 @@ import_export_decl (tree decl)
|| DECL_DECLARED_INLINE_P (decl));
   else
 gcc_assert (DECL_IMPLICIT_INSTANTIATION (decl)
-   || DECL_INLINE_VAR_P (decl)
+   || (DECL_INLINE_VAR_P (decl)
+   && DECL_LANG_SPECIFIC (decl)
+   && DECL_MODULE_IMPORT_P (decl))
|| DECL_VTABLE_OR_VTT_P (decl)
|| DECL_TINFO_P (decl));
   /* Check that a definition of DECL is available in this translation
diff --git a/gcc/testsuite/g++.dg/cpp1z/inline-var10.C 
b/gcc/testsuite/g++.dg/cpp1z/inline-var10.C
new file mode 100644
index 000..8a198556778
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp1z/inline-var10.C
@@ -0,0 +1,33 @@
+// PR c++/114013
+// { dg-do link { target c++17 } }
+
+struct S { int a, b; };
+
+template 
+constexpr struct S var[8] = {};
+
+template <>
+constexpr inline struct S var<6>[8] = {
+  { 1, 1 }, { 2, 0 }, { 3, 1 }, { 4, 0 },
+  { 5, 1 }, { 6, 0 }, { 7, 1 }, { 8, 0 }
+};
+
+[[gnu::noipa]] void
+foo (S)
+{
+}
+
+template 
+void
+bar (int x)
+{
+  foo (var[x]);
+}
+
+volatile int x;
+
+int
+main ()
+{
+  bar <6> (x);
+}
-- 
2.43.0



[PATCH] Allow mode-switching to introduce internal loops [PR113220]

2024-02-21 Thread Richard Sandiford
In this PR, the SME mode-switching code needs to insert a stack-probe
loop for an alloca.  This patch allows the target to do that.

There are two parts to it: allowing loops for insertions in blocks,
and allowing them for insertions on edges.  The former can be handled
entirely within mode-switching itself, by recording which blocks have
had new branches inserted.  The latter requires an extension to
commit_one_edge_insertion.

I think the extension to commit_one_edge_insertion makes logical sense,
since it already explicitly allows internal loops during RTL expansion.
The single-block find_sub_basic_blocks is a relatively recent addition,
so wouldn't have been available when the code was originally written.

The patch also has a small and obvious fix to make the aarch64 emit
hook cope with labels.

I've added specific -fstack-clash-protection versions of all
aarch64-sme.exp tests that previously failed because of this bug.
I've also added -fno-stack-clash-protection to the original versions
of these tests if they contain scans that assume no protection.

Tested on aarch64-linux-gnu.  OK to install?

Richard


gcc/
PR target/113220
* cfgrtl.cc (commit_one_edge_insertion): Handle sequences that
contain jumps even if called after initial RTL expansion.
* mode-switching.cc: Include cfgbuild.h.
(optimize_mode_switching): Allow the sequence returned by the
emit hook to contain internal jumps.  Record which blocks
contain such jumps and split the blocks at the end.
* config/aarch64/aarch64.cc (aarch64_mode_emit): Check for
non-debug insns when scanning the sequence.

gcc/testsuite/
PR target/113220
* gcc.target/aarch64/sme/call_sm_switch_5.c: Add
-fno-stack-clash-protection.
* gcc.target/aarch64/sme/call_sm_switch_5_scp.c: New test.
* gcc.target/aarch64/sme/sibcall_6_scp.c: New test.
* gcc.target/aarch64/sme/za_state_4.c: Add
-fno-stack-clash-protection.
* gcc.target/aarch64/sme/za_state_4_scp.c: New test.
* gcc.target/aarch64/sme/za_state_5.c: Add
-fno-stack-clash-protection.
* gcc.target/aarch64/sme/za_state_5_scp.c: New test.
---
 gcc/cfgrtl.cc | 27 ++-
 gcc/config/aarch64/aarch64.cc |  2 ++
 gcc/mode-switching.cc | 15 +++
 .../gcc.target/aarch64/sme/call_sm_switch_5.c |  2 +-
 .../aarch64/sme/call_sm_switch_5_scp.c|  3 +++
 .../gcc.target/aarch64/sme/sibcall_6_scp.c|  3 +++
 .../gcc.target/aarch64/sme/za_state_4.c   |  2 +-
 .../gcc.target/aarch64/sme/za_state_4_scp.c   |  3 +++
 .../gcc.target/aarch64/sme/za_state_5.c   |  2 +-
 .../gcc.target/aarch64/sme/za_state_5_scp.c   |  3 +++
 10 files changed, 53 insertions(+), 9 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_5_scp.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/sibcall_6_scp.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/za_state_4_scp.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/za_state_5_scp.c

diff --git a/gcc/cfgrtl.cc b/gcc/cfgrtl.cc
index 15259c5e984..304c429c99b 100644
--- a/gcc/cfgrtl.cc
+++ b/gcc/cfgrtl.cc
@@ -2018,6 +2018,21 @@ commit_one_edge_insertion (edge e)
   insns = e->insns.r;
   e->insns.r = NULL;
 
+  /* Allow the sequence to contain internal jumps, such as a memcpy loop
+ or an allocation loop.  If such a sequence is emitted during RTL
+ expansion, we'll create the appropriate basic blocks later,
+ at the end of the pass.  But if such a sequence is emitted after
+ initial expansion, we'll need to find the subblocks ourselves.  */
+  bool contains_jump = false;
+  if (!currently_expanding_to_rtl)
+for (rtx_insn *insn = insns; insn; insn = NEXT_INSN (insn))
+  if (JUMP_P (insn))
+   {
+ rebuild_jump_labels_chain (insns);
+ contains_jump = true;
+ break;
+   }
+
   /* Figure out where to put these insns.  If the destination has
  one predecessor, insert there.  Except for the exit block.  */
   if (single_pred_p (e->dest) && e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun))
@@ -2112,13 +2127,13 @@ commit_one_edge_insertion (edge e)
delete_insn (before);
 }
   else
-/* Some builtin expanders, such as those for memset and memcpy,
-   may generate loops and conditionals, and those may get emitted
-   into edges.  That's ok while expanding to rtl, basic block
-   boundaries will be identified and split afterwards.  ???  Need
-   we check whether the destination labels of any inserted jumps
-   are also part of the inserted sequence?  */
+/* Sequences inserted after RTL expansion are expected to be SESE,
+   with only internal branches allowed.  If the sequence jumps outside
+   itself then we do not know how to add the associated edges here.  */
 gcc_assert (!JUMP_P (last) || 

Re: [PATCH v3] bpf: add inline memmove and memcpy expansion

2024-02-21 Thread Jose E. Marchesi


Hi David.

This is OK.
Thank you, very nice stuff!

> [Changes from v2: 
>  - Fix incorrectly passing a location instead of OPT_W* for warning ().
>  - Reword warning/error message and test accordingly.  ]
>  
> [Changes from v1: Jose's review comments, all of which I agree with.
>  - Fix 'implments' typo in commit message.
>  - Change check that alignment is CONST_INT to gcc_assert ().
>  - Change default case in alignment switch to gcc_unreachable ().
>  - Reword error message for non-constant size memmove/memcpy, and
>update test for the error accordingly.
>  - Delete CPYMEM_EXPAND_ERR macro, since it was now only used in
>one place.  ]
>
> BPF programs are not typically linked, which means we cannot fall back
> on library calls to implement __builtin_{memmove,memcpy} and should
> always expand them inline if possible.
>
> GCC already successfully expands these builtins inline in many cases,
> but failed to do so for a few for simple cases involving overlapping
> memmove in the kernel BPF selftests and was instead emitting a libcall.
>
> This patch implements a simple inline expansion of memcpy and memmove in
> the BPF backend in a verifier-friendly way, with the caveat that the
> size must be an integer constant, which is also required by clang.
>
> Tested for bpf-unknown-none on x86_64-linux-gnu host.
>
> Also tested against the BPF verifier by compiling and loading a test
> program with overlapping memmove (essentially the memmove-1.c test)
> which failed before due to a libcall, and now successfully loads and
> passes the verifier.
>
> gcc/
>
>   * config/bpf/bpf-protos.h (bpf_expand_cpymem): New.
>   * config/bpf/bpf.cc: (emit_move_loop, bpf_expand_cpymem): New.
>   * config/bpf/bpf.md: (cpymemdi, movmemdi): New define_expands.
>
> gcc/testsuite/
>
>   * gcc.target/bpf/memcpy-1.c: New test.
>   * gcc.target/bpf/memmove-1.c: New test.
>   * gcc.target/bpf/memmove-2.c: New test.
> ---
>  gcc/config/bpf/bpf-protos.h  |   2 +
>  gcc/config/bpf/bpf.cc| 115 +++
>  gcc/config/bpf/bpf.md|  36 +++
>  gcc/testsuite/gcc.target/bpf/memcpy-1.c  |  26 +
>  gcc/testsuite/gcc.target/bpf/memmove-1.c |  46 +
>  gcc/testsuite/gcc.target/bpf/memmove-2.c |  23 +
>  6 files changed, 248 insertions(+)
>  create mode 100644 gcc/testsuite/gcc.target/bpf/memcpy-1.c
>  create mode 100644 gcc/testsuite/gcc.target/bpf/memmove-1.c
>  create mode 100644 gcc/testsuite/gcc.target/bpf/memmove-2.c
>
> diff --git a/gcc/config/bpf/bpf-protos.h b/gcc/config/bpf/bpf-protos.h
> index 46d950bd990..366acb87ae4 100644
> --- a/gcc/config/bpf/bpf-protos.h
> +++ b/gcc/config/bpf/bpf-protos.h
> @@ -35,4 +35,6 @@ const char *bpf_add_core_reloc (rtx *operands, const char 
> *templ);
>  class gimple_opt_pass;
>  gimple_opt_pass *make_pass_lower_bpf_core (gcc::context *ctxt);
>  
> +bool bpf_expand_cpymem (rtx *, bool);
> +
>  #endif /* ! GCC_BPF_PROTOS_H */
> diff --git a/gcc/config/bpf/bpf.cc b/gcc/config/bpf/bpf.cc
> index d6ca47eeecb..f9ac263613a 100644
> --- a/gcc/config/bpf/bpf.cc
> +++ b/gcc/config/bpf/bpf.cc
> @@ -1184,6 +1184,121 @@ bpf_use_by_pieces_infrastructure_p (unsigned 
> HOST_WIDE_INT size,
>  #define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
>bpf_use_by_pieces_infrastructure_p
>  
> +/* Helper for bpf_expand_cpymem.  Emit an unrolled loop moving the bytes
> +   from SRC to DST.  */
> +
> +static void
> +emit_move_loop (rtx src, rtx dst, machine_mode mode, int offset, int inc,
> + unsigned iters, unsigned remainder)
> +{
> +  rtx reg = gen_reg_rtx (mode);
> +
> +  /* First copy in chunks as large as alignment permits.  */
> +  for (unsigned int i = 0; i < iters; i++)
> +{
> +  emit_move_insn (reg, adjust_address (src, mode, offset));
> +  emit_move_insn (adjust_address (dst, mode, offset), reg);
> +  offset += inc;
> +}
> +
> +  /* Handle remaining bytes which might be smaller than the chunks
> + used above.  */
> +  if (remainder & 4)
> +{
> +  emit_move_insn (reg, adjust_address (src, SImode, offset));
> +  emit_move_insn (adjust_address (dst, SImode, offset), reg);
> +  offset += (inc < 0 ? -4 : 4);
> +  remainder -= 4;
> +}
> +  if (remainder & 2)
> +{
> +  emit_move_insn (reg, adjust_address (src, HImode, offset));
> +  emit_move_insn (adjust_address (dst, HImode, offset), reg);
> +  offset += (inc < 0 ? -2 : 2);
> +  remainder -= 2;
> +}
> +  if (remainder & 1)
> +{
> +  emit_move_insn (reg, adjust_address (src, QImode, offset));
> +  emit_move_insn (adjust_address (dst, QImode, offset), reg);
> +}
> +}
> +
> +/* Expand cpymem/movmem, as from __builtin_memcpy/memmove.
> +   OPERANDS are the same as the cpymem/movmem patterns.
> +   IS_MOVE is true if this is a memmove, false for memcpy.
> +   Return true if we successfully expanded, or false if we cannot
> +   and must punt to a libcall.  */
> +
> 

[PATCH v13 11/26] c++: Implement __remove_extent built-in trait

2024-02-21 Thread Ken Matsui
This patch implements built-in trait for std::remove_extent.

gcc/cp/ChangeLog:

* cp-trait.def: Define __remove_extent.
* semantics.cc (finish_trait_type): Handle CPTK_REMOVE_EXTENT.

gcc/testsuite/ChangeLog:

* g++.dg/ext/has-builtin-1.C: Test existence of __remove_extent.
* g++.dg/ext/remove_extent.C: New test.

Signed-off-by: Ken Matsui 
---
 gcc/cp/cp-trait.def  |  1 +
 gcc/cp/semantics.cc  |  5 +
 gcc/testsuite/g++.dg/ext/has-builtin-1.C |  3 +++
 gcc/testsuite/g++.dg/ext/remove_extent.C | 16 
 4 files changed, 25 insertions(+)
 create mode 100644 gcc/testsuite/g++.dg/ext/remove_extent.C

diff --git a/gcc/cp/cp-trait.def b/gcc/cp/cp-trait.def
index 63f879287ce..577c96d579b 100644
--- a/gcc/cp/cp-trait.def
+++ b/gcc/cp/cp-trait.def
@@ -100,6 +100,7 @@ DEFTRAIT_EXPR (REF_CONSTRUCTS_FROM_TEMPORARY, 
"__reference_constructs_from_tempo
 DEFTRAIT_EXPR (REF_CONVERTS_FROM_TEMPORARY, 
"__reference_converts_from_temporary", 2)
 DEFTRAIT_TYPE (REMOVE_CV, "__remove_cv", 1)
 DEFTRAIT_TYPE (REMOVE_CVREF, "__remove_cvref", 1)
+DEFTRAIT_TYPE (REMOVE_EXTENT, "__remove_extent", 1)
 DEFTRAIT_TYPE (REMOVE_POINTER, "__remove_pointer", 1)
 DEFTRAIT_TYPE (REMOVE_REFERENCE, "__remove_reference", 1)
 DEFTRAIT_TYPE (TYPE_PACK_ELEMENT, "__type_pack_element", -1)
diff --git a/gcc/cp/semantics.cc b/gcc/cp/semantics.cc
index 635441a7a90..58696225fc4 100644
--- a/gcc/cp/semantics.cc
+++ b/gcc/cp/semantics.cc
@@ -12793,6 +12793,11 @@ finish_trait_type (cp_trait_kind kind, tree type1, 
tree type2,
type1 = TREE_TYPE (type1);
   return cv_unqualified (type1);
 
+case CPTK_REMOVE_EXTENT:
+  if (TREE_CODE (type1) == ARRAY_TYPE)
+   type1 = TREE_TYPE (type1);
+  return type1;
+
 case CPTK_REMOVE_POINTER:
   if (TYPE_PTR_P (type1))
type1 = TREE_TYPE (type1);
diff --git a/gcc/testsuite/g++.dg/ext/has-builtin-1.C 
b/gcc/testsuite/g++.dg/ext/has-builtin-1.C
index 9d861398bae..5d5cbe3b019 100644
--- a/gcc/testsuite/g++.dg/ext/has-builtin-1.C
+++ b/gcc/testsuite/g++.dg/ext/has-builtin-1.C
@@ -182,6 +182,9 @@
 #if !__has_builtin (__remove_cvref)
 # error "__has_builtin (__remove_cvref) failed"
 #endif
+#if !__has_builtin (__remove_extent)
+# error "__has_builtin (__remove_extent) failed"
+#endif
 #if !__has_builtin (__remove_pointer)
 # error "__has_builtin (__remove_pointer) failed"
 #endif
diff --git a/gcc/testsuite/g++.dg/ext/remove_extent.C 
b/gcc/testsuite/g++.dg/ext/remove_extent.C
new file mode 100644
index 000..6183aca5a48
--- /dev/null
+++ b/gcc/testsuite/g++.dg/ext/remove_extent.C
@@ -0,0 +1,16 @@
+// { dg-do compile { target c++11 } }
+
+#define SA(X) static_assert((X),#X)
+
+class ClassType { };
+
+SA(__is_same(__remove_extent(int), int));
+SA(__is_same(__remove_extent(int[2]), int));
+SA(__is_same(__remove_extent(int[2][3]), int[3]));
+SA(__is_same(__remove_extent(int[][3]), int[3]));
+SA(__is_same(__remove_extent(const int[2]), const int));
+SA(__is_same(__remove_extent(ClassType), ClassType));
+SA(__is_same(__remove_extent(ClassType[2]), ClassType));
+SA(__is_same(__remove_extent(ClassType[2][3]), ClassType[3]));
+SA(__is_same(__remove_extent(ClassType[][3]), ClassType[3]));
+SA(__is_same(__remove_extent(const ClassType[2]), const ClassType));
-- 
2.43.2



[PATCH v13 20/26] libstdc++: Optimize std::decay compilation performance

2024-02-21 Thread Ken Matsui
This patch optimizes the compilation performance of std::decay
by dispatching to the new __decay built-in trait.

libstdc++-v3/ChangeLog:

* include/std/type_traits (decay): Use __decay built-in trait.

Signed-off-by: Ken Matsui 
---
 libstdc++-v3/include/std/type_traits | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/libstdc++-v3/include/std/type_traits 
b/libstdc++-v3/include/std/type_traits
index 18a5e4de2d3..2f4c8dd3b21 100644
--- a/libstdc++-v3/include/std/type_traits
+++ b/libstdc++-v3/include/std/type_traits
@@ -2316,6 +2316,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 
   /// @cond undocumented
 
+#if _GLIBCXX_USE_BUILTIN_TRAIT(__decay)
+  template
+struct decay
+{ using type = __decay(_Tp); };
+#else
   // Decay trait for arrays and functions, used for perfect forwarding
   // in make_pair, make_tuple, etc.
   template
@@ -2347,6 +2352,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   template
 struct decay<_Tp&&>
 { using type = typename __decay_selector<_Tp>::type; };
+#endif
 
   /// @cond undocumented
 
-- 
2.43.2



[PATCH v13 03/26] c++: Implement __is_volatile built-in trait

2024-02-21 Thread Ken Matsui
This patch implements built-in trait for std::is_volatile.

gcc/cp/ChangeLog:

* cp-trait.def: Define __is_volatile.
* constraint.cc (diagnose_trait_expr): Handle CPTK_IS_VOLATILE.
* semantics.cc (trait_expr_value): Likewise.
(finish_trait_expr): Likewise.

gcc/testsuite/ChangeLog:

* g++.dg/ext/has-builtin-1.C: Test existence of __is_volatile.
* g++.dg/ext/is_volatile.C: New test.

Signed-off-by: Ken Matsui 
---
 gcc/cp/constraint.cc |  3 +++
 gcc/cp/cp-trait.def  |  1 +
 gcc/cp/semantics.cc  |  4 
 gcc/testsuite/g++.dg/ext/has-builtin-1.C |  3 +++
 gcc/testsuite/g++.dg/ext/is_volatile.C   | 20 
 5 files changed, 31 insertions(+)
 create mode 100644 gcc/testsuite/g++.dg/ext/is_volatile.C

diff --git a/gcc/cp/constraint.cc b/gcc/cp/constraint.cc
index f32a1c78d63..9a7a12629e7 100644
--- a/gcc/cp/constraint.cc
+++ b/gcc/cp/constraint.cc
@@ -3861,6 +3861,9 @@ diagnose_trait_expr (tree expr, tree args)
 case CPTK_IS_UNION:
   inform (loc, "  %qT is not a union", t1);
   break;
+case CPTK_IS_VOLATILE:
+  inform (loc, "  %qT is not a volatile type", t1);
+  break;
 case CPTK_REF_CONSTRUCTS_FROM_TEMPORARY:
   inform (loc, "  %qT is not a reference that binds to a temporary "
  "object of type %qT (direct-initialization)", t1, t2);
diff --git a/gcc/cp/cp-trait.def b/gcc/cp/cp-trait.def
index 36faed9c0b3..e9347453829 100644
--- a/gcc/cp/cp-trait.def
+++ b/gcc/cp/cp-trait.def
@@ -92,6 +92,7 @@ DEFTRAIT_EXPR (IS_TRIVIALLY_ASSIGNABLE, 
"__is_trivially_assignable", 2)
 DEFTRAIT_EXPR (IS_TRIVIALLY_CONSTRUCTIBLE, "__is_trivially_constructible", -1)
 DEFTRAIT_EXPR (IS_TRIVIALLY_COPYABLE, "__is_trivially_copyable", 1)
 DEFTRAIT_EXPR (IS_UNION, "__is_union", 1)
+DEFTRAIT_EXPR (IS_VOLATILE, "__is_volatile", 1)
 DEFTRAIT_EXPR (REF_CONSTRUCTS_FROM_TEMPORARY, 
"__reference_constructs_from_temporary", 2)
 DEFTRAIT_EXPR (REF_CONVERTS_FROM_TEMPORARY, 
"__reference_converts_from_temporary", 2)
 DEFTRAIT_TYPE (REMOVE_CV, "__remove_cv", 1)
diff --git a/gcc/cp/semantics.cc b/gcc/cp/semantics.cc
index 0d08900492b..41c25f43d27 100644
--- a/gcc/cp/semantics.cc
+++ b/gcc/cp/semantics.cc
@@ -12532,6 +12532,9 @@ trait_expr_value (cp_trait_kind kind, tree type1, tree 
type2)
 case CPTK_IS_UNION:
   return type_code1 == UNION_TYPE;
 
+case CPTK_IS_VOLATILE:
+  return CP_TYPE_VOLATILE_P (type1);
+
 case CPTK_REF_CONSTRUCTS_FROM_TEMPORARY:
   return ref_xes_from_temporary (type1, type2, /*direct_init=*/true);
 
@@ -12702,6 +12705,7 @@ finish_trait_expr (location_t loc, cp_trait_kind kind, 
tree type1, tree type2)
 case CPTK_IS_SAME:
 case CPTK_IS_SCOPED_ENUM:
 case CPTK_IS_UNION:
+case CPTK_IS_VOLATILE:
   break;
 
 case CPTK_IS_LAYOUT_COMPATIBLE:
diff --git a/gcc/testsuite/g++.dg/ext/has-builtin-1.C 
b/gcc/testsuite/g++.dg/ext/has-builtin-1.C
index e3640faeb96..b2e2f2f694d 100644
--- a/gcc/testsuite/g++.dg/ext/has-builtin-1.C
+++ b/gcc/testsuite/g++.dg/ext/has-builtin-1.C
@@ -158,6 +158,9 @@
 #if !__has_builtin (__is_union)
 # error "__has_builtin (__is_union) failed"
 #endif
+#if !__has_builtin (__is_volatile)
+# error "__has_builtin (__is_volatile) failed"
+#endif
 #if !__has_builtin (__reference_constructs_from_temporary)
 # error "__has_builtin (__reference_constructs_from_temporary) failed"
 #endif
diff --git a/gcc/testsuite/g++.dg/ext/is_volatile.C 
b/gcc/testsuite/g++.dg/ext/is_volatile.C
new file mode 100644
index 000..80a1cfc880d
--- /dev/null
+++ b/gcc/testsuite/g++.dg/ext/is_volatile.C
@@ -0,0 +1,20 @@
+// { dg-do compile { target c++11 } }
+
+#define SA(X) static_assert((X),#X)
+
+class ClassType { };
+using cClassType = const ClassType;
+using vClassType = volatile ClassType;
+using cvClassType = const volatile ClassType;
+
+// Positive tests.
+SA(__is_volatile(volatile int));
+SA(__is_volatile(const volatile int));
+SA(__is_volatile(vClassType));
+SA(__is_volatile(cvClassType));
+
+// Negative tests.
+SA(!__is_volatile(int));
+SA(!__is_volatile(const int));
+SA(!__is_volatile(ClassType));
+SA(!__is_volatile(cClassType));
-- 
2.43.2



[PATCH v13 02/26] libstdc++: Optimize std::is_const compilation performance

2024-02-21 Thread Ken Matsui
This patch optimizes the compilation performance of std::is_const
by dispatching to the new __is_const built-in trait.

libstdc++-v3/ChangeLog:

* include/std/type_traits (is_const): Use __is_const built-in
trait.
(is_const_v): Likewise.

Signed-off-by: Ken Matsui 
---
 libstdc++-v3/include/std/type_traits | 12 
 1 file changed, 12 insertions(+)

diff --git a/libstdc++-v3/include/std/type_traits 
b/libstdc++-v3/include/std/type_traits
index 21402fd8c13..6e9ebfb8a18 100644
--- a/libstdc++-v3/include/std/type_traits
+++ b/libstdc++-v3/include/std/type_traits
@@ -835,6 +835,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   // Type properties.
 
   /// is_const
+#if _GLIBCXX_USE_BUILTIN_TRAIT(__is_const)
+  template
+struct is_const
+: public __bool_constant<__is_const(_Tp)>
+{ };
+#else
   template
 struct is_const
 : public false_type { };
@@ -842,6 +848,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   template
 struct is_const<_Tp const>
 : public true_type { };
+#endif
 
   /// is_volatile
   template
@@ -3327,10 +3334,15 @@ template 
   inline constexpr bool is_member_pointer_v = is_member_pointer<_Tp>::value;
 #endif
 
+#if _GLIBCXX_USE_BUILTIN_TRAIT(__is_const)
+template 
+  inline constexpr bool is_const_v = __is_const(_Tp);
+#else
 template 
   inline constexpr bool is_const_v = false;
 template 
   inline constexpr bool is_const_v = true;
+#endif
 
 #if _GLIBCXX_USE_BUILTIN_TRAIT(__is_function)
 template 
-- 
2.43.2



[PATCH v13 09/26] c++: Implement __add_pointer built-in trait

2024-02-21 Thread Ken Matsui
This patch implements built-in trait for std::add_pointer.

gcc/cp/ChangeLog:

* cp-trait.def: Define __add_pointer.
* semantics.cc (finish_trait_type): Handle CPTK_ADD_POINTER.

gcc/testsuite/ChangeLog:

* g++.dg/ext/has-builtin-1.C: Test existence of __add_pointer.
* g++.dg/ext/add_pointer.C: New test.

Signed-off-by: Ken Matsui 
---
 gcc/cp/cp-trait.def  |  1 +
 gcc/cp/semantics.cc  |  9 ++
 gcc/testsuite/g++.dg/ext/add_pointer.C   | 39 
 gcc/testsuite/g++.dg/ext/has-builtin-1.C |  3 ++
 4 files changed, 52 insertions(+)
 create mode 100644 gcc/testsuite/g++.dg/ext/add_pointer.C

diff --git a/gcc/cp/cp-trait.def b/gcc/cp/cp-trait.def
index 05514a51c21..63f879287ce 100644
--- a/gcc/cp/cp-trait.def
+++ b/gcc/cp/cp-trait.def
@@ -48,6 +48,7 @@
 #define DEFTRAIT_TYPE_DEFAULTED
 #endif
 
+DEFTRAIT_TYPE (ADD_POINTER, "__add_pointer", 1)
 DEFTRAIT_EXPR (HAS_NOTHROW_ASSIGN, "__has_nothrow_assign", 1)
 DEFTRAIT_EXPR (HAS_NOTHROW_CONSTRUCTOR, "__has_nothrow_constructor", 1)
 DEFTRAIT_EXPR (HAS_NOTHROW_COPY, "__has_nothrow_copy", 1)
diff --git a/gcc/cp/semantics.cc b/gcc/cp/semantics.cc
index 1794e83baa2..635441a7a90 100644
--- a/gcc/cp/semantics.cc
+++ b/gcc/cp/semantics.cc
@@ -12776,6 +12776,15 @@ finish_trait_type (cp_trait_kind kind, tree type1, 
tree type2,
 
   switch (kind)
 {
+case CPTK_ADD_POINTER:
+  if (FUNC_OR_METHOD_TYPE_P (type1)
+ && (type_memfn_quals (type1) != TYPE_UNQUALIFIED
+ || type_memfn_rqual (type1) != REF_QUAL_NONE))
+   return type1;
+  if (TYPE_REF_P (type1))
+   type1 = TREE_TYPE (type1);
+  return build_pointer_type (type1);
+
 case CPTK_REMOVE_CV:
   return cv_unqualified (type1);
 
diff --git a/gcc/testsuite/g++.dg/ext/add_pointer.C 
b/gcc/testsuite/g++.dg/ext/add_pointer.C
new file mode 100644
index 000..c405cdd0feb
--- /dev/null
+++ b/gcc/testsuite/g++.dg/ext/add_pointer.C
@@ -0,0 +1,39 @@
+// { dg-do compile { target c++11 } }
+
+#define SA(X) static_assert((X),#X)
+
+class ClassType { };
+
+SA(__is_same(__add_pointer(int), int*));
+SA(__is_same(__add_pointer(int*), int**));
+SA(__is_same(__add_pointer(const int), const int*));
+SA(__is_same(__add_pointer(int&), int*));
+SA(__is_same(__add_pointer(ClassType*), ClassType**));
+SA(__is_same(__add_pointer(ClassType), ClassType*));
+SA(__is_same(__add_pointer(void), void*));
+SA(__is_same(__add_pointer(const void), const void*));
+SA(__is_same(__add_pointer(volatile void), volatile void*));
+SA(__is_same(__add_pointer(const volatile void), const volatile void*));
+
+void f1();
+using f1_type = decltype(f1);
+using pf1_type = decltype();
+SA(__is_same(__add_pointer(f1_type), pf1_type));
+
+void f2() noexcept; // PR libstdc++/78361
+using f2_type = decltype(f2);
+using pf2_type = decltype();
+SA(__is_same(__add_pointer(f2_type), pf2_type));
+
+using fn_type = void();
+using pfn_type = void(*)();
+SA(__is_same(__add_pointer(fn_type), pfn_type));
+
+SA(__is_same(__add_pointer(void() &), void() &));
+SA(__is_same(__add_pointer(void() & noexcept), void() & noexcept));
+SA(__is_same(__add_pointer(void() const), void() const));
+SA(__is_same(__add_pointer(void(...) &), void(...) &));
+SA(__is_same(__add_pointer(void(...) & noexcept), void(...) & noexcept));
+SA(__is_same(__add_pointer(void(...) const), void(...) const));
+
+SA(__is_same(__add_pointer(void() __restrict), void() __restrict));
diff --git a/gcc/testsuite/g++.dg/ext/has-builtin-1.C 
b/gcc/testsuite/g++.dg/ext/has-builtin-1.C
index b1430e9bd8b..9d861398bae 100644
--- a/gcc/testsuite/g++.dg/ext/has-builtin-1.C
+++ b/gcc/testsuite/g++.dg/ext/has-builtin-1.C
@@ -2,6 +2,9 @@
 // { dg-do compile }
 // Verify that __has_builtin gives the correct answer for C++ built-ins.
 
+#if !__has_builtin (__add_pointer)
+# error "__has_builtin (__add_pointer) failed"
+#endif
 #if !__has_builtin (__builtin_addressof)
 # error "__has_builtin (__builtin_addressof) failed"
 #endif
-- 
2.43.2



[PATCH v13 21/26] c++: Implement __rank built-in trait

2024-02-21 Thread Ken Matsui
This patch implements built-in trait for std::rank.

gcc/cp/ChangeLog:

* cp-trait.def: Define __rank.
* constraint.cc (diagnose_trait_expr): Handle CPTK_RANK.
* semantics.cc (trait_expr_value): Likewise.
(finish_trait_expr): Likewise.

gcc/testsuite/ChangeLog:

* g++.dg/ext/has-builtin-1.C: Test existence of __rank.
* g++.dg/ext/rank.C: New test.

Signed-off-by: Ken Matsui 
---
 gcc/cp/constraint.cc |  3 +++
 gcc/cp/cp-trait.def  |  1 +
 gcc/cp/semantics.cc  | 23 ---
 gcc/testsuite/g++.dg/ext/has-builtin-1.C |  3 +++
 gcc/testsuite/g++.dg/ext/rank.C  | 24 
 5 files changed, 51 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/ext/rank.C

diff --git a/gcc/cp/constraint.cc b/gcc/cp/constraint.cc
index 000df847342..23ea66d9c12 100644
--- a/gcc/cp/constraint.cc
+++ b/gcc/cp/constraint.cc
@@ -3870,6 +3870,9 @@ diagnose_trait_expr (tree expr, tree args)
 case CPTK_IS_VOLATILE:
   inform (loc, "  %qT is not a volatile type", t1);
   break;
+case CPTK_RANK:
+  inform (loc, "  %qT cannot yield a rank", t1);
+  break;
 case CPTK_REF_CONSTRUCTS_FROM_TEMPORARY:
   inform (loc, "  %qT is not a reference that binds to a temporary "
  "object of type %qT (direct-initialization)", t1, t2);
diff --git a/gcc/cp/cp-trait.def b/gcc/cp/cp-trait.def
index 2d1cb7c227c..85056c8140b 100644
--- a/gcc/cp/cp-trait.def
+++ b/gcc/cp/cp-trait.def
@@ -99,6 +99,7 @@ DEFTRAIT_EXPR (IS_TRIVIALLY_COPYABLE, 
"__is_trivially_copyable", 1)
 DEFTRAIT_EXPR (IS_UNBOUNDED_ARRAY, "__is_unbounded_array", 1)
 DEFTRAIT_EXPR (IS_UNION, "__is_union", 1)
 DEFTRAIT_EXPR (IS_VOLATILE, "__is_volatile", 1)
+DEFTRAIT_EXPR (RANK, "__rank", 1)
 DEFTRAIT_EXPR (REF_CONSTRUCTS_FROM_TEMPORARY, 
"__reference_constructs_from_temporary", 2)
 DEFTRAIT_EXPR (REF_CONVERTS_FROM_TEMPORARY, 
"__reference_converts_from_temporary", 2)
 DEFTRAIT_TYPE (REMOVE_ALL_EXTENTS, "__remove_all_extents", 1)
diff --git a/gcc/cp/semantics.cc b/gcc/cp/semantics.cc
index 45dc509855a..7242db75248 100644
--- a/gcc/cp/semantics.cc
+++ b/gcc/cp/semantics.cc
@@ -12550,6 +12550,9 @@ trait_expr_value (cp_trait_kind kind, tree type1, tree 
type2)
 case CPTK_IS_DEDUCIBLE:
   return type_targs_deducible_from (type1, type2);
 
+/* __rank is handled in finish_trait_expr. */
+case CPTK_RANK:
+
 #define DEFTRAIT_TYPE(CODE, NAME, ARITY) \
 case CPTK_##CODE:
 #include "cp-trait.def"
@@ -12622,7 +12625,10 @@ finish_trait_expr (location_t loc, cp_trait_kind kind, 
tree type1, tree type2)
   if (processing_template_decl)
 {
   tree trait_expr = make_node (TRAIT_EXPR);
-  TREE_TYPE (trait_expr) = boolean_type_node;
+  if (kind == CPTK_RANK)
+   TREE_TYPE (trait_expr) = size_type_node;
+  else
+   TREE_TYPE (trait_expr) = boolean_type_node;
   TRAIT_EXPR_TYPE1 (trait_expr) = type1;
   TRAIT_EXPR_TYPE2 (trait_expr) = type2;
   TRAIT_EXPR_KIND (trait_expr) = kind;
@@ -12714,6 +12720,7 @@ finish_trait_expr (location_t loc, cp_trait_kind kind, 
tree type1, tree type2)
 case CPTK_IS_UNBOUNDED_ARRAY:
 case CPTK_IS_UNION:
 case CPTK_IS_VOLATILE:
+case CPTK_RANK:
   break;
 
 case CPTK_IS_LAYOUT_COMPATIBLE:
@@ -12745,8 +12752,18 @@ finish_trait_expr (location_t loc, cp_trait_kind kind, 
tree type1, tree type2)
   gcc_unreachable ();
 }
 
-  tree val = (trait_expr_value (kind, type1, type2)
- ? boolean_true_node : boolean_false_node);
+  tree val;
+  if (kind == CPTK_RANK)
+{
+  size_t rank = 0;
+  for (; TREE_CODE (type1) == ARRAY_TYPE; type1 = TREE_TYPE (type1))
+   ++rank;
+  val = build_int_cst (size_type_node, rank);
+}
+  else
+val = (trait_expr_value (kind, type1, type2)
+  ? boolean_true_node : boolean_false_node);
+
   return maybe_wrap_with_location (val, loc);
 }
 
diff --git a/gcc/testsuite/g++.dg/ext/has-builtin-1.C 
b/gcc/testsuite/g++.dg/ext/has-builtin-1.C
index 3aca273aad6..7f7b27f7aa7 100644
--- a/gcc/testsuite/g++.dg/ext/has-builtin-1.C
+++ b/gcc/testsuite/g++.dg/ext/has-builtin-1.C
@@ -179,6 +179,9 @@
 #if !__has_builtin (__is_volatile)
 # error "__has_builtin (__is_volatile) failed"
 #endif
+#if !__has_builtin (__rank)
+# error "__has_builtin (__rank) failed"
+#endif
 #if !__has_builtin (__reference_constructs_from_temporary)
 # error "__has_builtin (__reference_constructs_from_temporary) failed"
 #endif
diff --git a/gcc/testsuite/g++.dg/ext/rank.C b/gcc/testsuite/g++.dg/ext/rank.C
new file mode 100644
index 000..28894184387
--- /dev/null
+++ b/gcc/testsuite/g++.dg/ext/rank.C
@@ -0,0 +1,24 @@
+// { dg-do compile { target c++11 } }
+
+#include 
+
+#define SA(X) static_assert((X),#X)
+
+class ClassType { };
+
+SA(__rank(int) == 0);
+SA(__rank(int[2]) == 1);
+SA(__rank(int[][4]) == 2);
+SA(__rank(int[2][2][4][4][6][6]) == 6);

[PATCH v13 26/26] libstdc++: Optimize std::is_nothrow_invocable compilation performance

2024-02-21 Thread Ken Matsui
This patch optimizes the compilation performance of
std::is_nothrow_invocable by dispatching to the new
__is_nothrow_invocable built-in trait.

libstdc++-v3/ChangeLog:

* include/std/type_traits (is_nothrow_invocable): Use
__is_nothrow_invocable built-in trait.
* testsuite/20_util/is_nothrow_invocable/incomplete_args_neg.cc:
Handle the new error from __is_nothrow_invocable.
* testsuite/20_util/is_nothrow_invocable/incomplete_neg.cc:
Likewise.

Signed-off-by: Ken Matsui 
---
 libstdc++-v3/include/std/type_traits  | 4 
 .../20_util/is_nothrow_invocable/incomplete_args_neg.cc   | 1 +
 .../testsuite/20_util/is_nothrow_invocable/incomplete_neg.cc  | 1 +
 3 files changed, 6 insertions(+)

diff --git a/libstdc++-v3/include/std/type_traits 
b/libstdc++-v3/include/std/type_traits
index 9af233bcc75..093d85a51a8 100644
--- a/libstdc++-v3/include/std/type_traits
+++ b/libstdc++-v3/include/std/type_traits
@@ -3265,8 +3265,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   /// std::is_nothrow_invocable
   template
 struct is_nothrow_invocable
+#if _GLIBCXX_USE_BUILTIN_TRAIT(__is_nothrow_invocable)
+: public __bool_constant<__is_nothrow_invocable(_Fn, _ArgTypes...)>
+#else
 : __and_<__is_invocable_impl<__invoke_result<_Fn, _ArgTypes...>, void>,
 __call_is_nothrow_<_Fn, _ArgTypes...>>::type
+#endif
 {
   static_assert(std::__is_complete_or_unbounded(__type_identity<_Fn>{}),
"_Fn must be a complete class or an unbounded array");
diff --git 
a/libstdc++-v3/testsuite/20_util/is_nothrow_invocable/incomplete_args_neg.cc 
b/libstdc++-v3/testsuite/20_util/is_nothrow_invocable/incomplete_args_neg.cc
index 3c225883eaf..3f8542dd366 100644
--- a/libstdc++-v3/testsuite/20_util/is_nothrow_invocable/incomplete_args_neg.cc
+++ b/libstdc++-v3/testsuite/20_util/is_nothrow_invocable/incomplete_args_neg.cc
@@ -18,6 +18,7 @@
 // .
 
 // { dg-error "must be a complete class" "" { target *-*-* } 0 }
+// { dg-prune-output "invalid use of incomplete type" }
 
 #include 
 
diff --git 
a/libstdc++-v3/testsuite/20_util/is_nothrow_invocable/incomplete_neg.cc 
b/libstdc++-v3/testsuite/20_util/is_nothrow_invocable/incomplete_neg.cc
index 5a728bfa03b..d3bdf08448b 100644
--- a/libstdc++-v3/testsuite/20_util/is_nothrow_invocable/incomplete_neg.cc
+++ b/libstdc++-v3/testsuite/20_util/is_nothrow_invocable/incomplete_neg.cc
@@ -18,6 +18,7 @@
 // .
 
 // { dg-error "must be a complete class" "" { target *-*-* } 0 }
+// { dg-prune-output "invalid use of incomplete type" }
 
 #include 
 
-- 
2.43.2



[PATCH v13 13/26] c++: Implement __remove_all_extents built-in trait

2024-02-21 Thread Ken Matsui
This patch implements built-in trait for std::remove_all_extents.

gcc/cp/ChangeLog:

* cp-trait.def: Define __remove_all_extents.
* semantics.cc (finish_trait_type): Handle
CPTK_REMOVE_ALL_EXTENTS.

gcc/testsuite/ChangeLog:

* g++.dg/ext/has-builtin-1.C: Test existence of
__remove_all_extents.
* g++.dg/ext/remove_all_extents.C: New test.

Signed-off-by: Ken Matsui 
---
 gcc/cp/cp-trait.def   |  1 +
 gcc/cp/semantics.cc   |  3 +++
 gcc/testsuite/g++.dg/ext/has-builtin-1.C  |  3 +++
 gcc/testsuite/g++.dg/ext/remove_all_extents.C | 16 
 4 files changed, 23 insertions(+)
 create mode 100644 gcc/testsuite/g++.dg/ext/remove_all_extents.C

diff --git a/gcc/cp/cp-trait.def b/gcc/cp/cp-trait.def
index 577c96d579b..933c8bcbe68 100644
--- a/gcc/cp/cp-trait.def
+++ b/gcc/cp/cp-trait.def
@@ -98,6 +98,7 @@ DEFTRAIT_EXPR (IS_UNION, "__is_union", 1)
 DEFTRAIT_EXPR (IS_VOLATILE, "__is_volatile", 1)
 DEFTRAIT_EXPR (REF_CONSTRUCTS_FROM_TEMPORARY, 
"__reference_constructs_from_temporary", 2)
 DEFTRAIT_EXPR (REF_CONVERTS_FROM_TEMPORARY, 
"__reference_converts_from_temporary", 2)
+DEFTRAIT_TYPE (REMOVE_ALL_EXTENTS, "__remove_all_extents", 1)
 DEFTRAIT_TYPE (REMOVE_CV, "__remove_cv", 1)
 DEFTRAIT_TYPE (REMOVE_CVREF, "__remove_cvref", 1)
 DEFTRAIT_TYPE (REMOVE_EXTENT, "__remove_extent", 1)
diff --git a/gcc/cp/semantics.cc b/gcc/cp/semantics.cc
index 58696225fc4..078424dac23 100644
--- a/gcc/cp/semantics.cc
+++ b/gcc/cp/semantics.cc
@@ -12785,6 +12785,9 @@ finish_trait_type (cp_trait_kind kind, tree type1, tree 
type2,
type1 = TREE_TYPE (type1);
   return build_pointer_type (type1);
 
+case CPTK_REMOVE_ALL_EXTENTS:
+  return strip_array_types (type1);
+
 case CPTK_REMOVE_CV:
   return cv_unqualified (type1);
 
diff --git a/gcc/testsuite/g++.dg/ext/has-builtin-1.C 
b/gcc/testsuite/g++.dg/ext/has-builtin-1.C
index 5d5cbe3b019..85b74bd676b 100644
--- a/gcc/testsuite/g++.dg/ext/has-builtin-1.C
+++ b/gcc/testsuite/g++.dg/ext/has-builtin-1.C
@@ -176,6 +176,9 @@
 #if !__has_builtin (__reference_converts_from_temporary)
 # error "__has_builtin (__reference_converts_from_temporary) failed"
 #endif
+#if !__has_builtin (__remove_all_extents)
+# error "__has_builtin (__remove_all_extents) failed"
+#endif
 #if !__has_builtin (__remove_cv)
 # error "__has_builtin (__remove_cv) failed"
 #endif
diff --git a/gcc/testsuite/g++.dg/ext/remove_all_extents.C 
b/gcc/testsuite/g++.dg/ext/remove_all_extents.C
new file mode 100644
index 000..60ade2ade7f
--- /dev/null
+++ b/gcc/testsuite/g++.dg/ext/remove_all_extents.C
@@ -0,0 +1,16 @@
+// { dg-do compile { target c++11 } }
+
+#define SA(X) static_assert((X),#X)
+
+class ClassType { };
+
+SA(__is_same(__remove_all_extents(int), int));
+SA(__is_same(__remove_all_extents(int[2]), int));
+SA(__is_same(__remove_all_extents(int[2][3]), int));
+SA(__is_same(__remove_all_extents(int[][3]), int));
+SA(__is_same(__remove_all_extents(const int[2][3]), const int));
+SA(__is_same(__remove_all_extents(ClassType), ClassType));
+SA(__is_same(__remove_all_extents(ClassType[2]), ClassType));
+SA(__is_same(__remove_all_extents(ClassType[2][3]), ClassType));
+SA(__is_same(__remove_all_extents(ClassType[][3]), ClassType));
+SA(__is_same(__remove_all_extents(const ClassType[2][3]), const ClassType));
-- 
2.43.2



[PATCH v13 05/26] c++: Implement __is_pointer built-in trait

2024-02-21 Thread Ken Matsui
This patch implements built-in trait for std::is_pointer.

gcc/cp/ChangeLog:

* cp-trait.def: Define __is_pointer.
* constraint.cc (diagnose_trait_expr): Handle CPTK_IS_POINTER.
* semantics.cc (trait_expr_value): Likewise.
(finish_trait_expr): Likewise.

gcc/testsuite/ChangeLog:

* g++.dg/ext/has-builtin-1.C: Test existence of __is_pointer.
* g++.dg/ext/is_pointer.C: New test.

Signed-off-by: Ken Matsui 
---
 gcc/cp/constraint.cc |  3 ++
 gcc/cp/cp-trait.def  |  1 +
 gcc/cp/semantics.cc  |  4 ++
 gcc/testsuite/g++.dg/ext/has-builtin-1.C |  3 ++
 gcc/testsuite/g++.dg/ext/is_pointer.C| 51 
 5 files changed, 62 insertions(+)
 create mode 100644 gcc/testsuite/g++.dg/ext/is_pointer.C

diff --git a/gcc/cp/constraint.cc b/gcc/cp/constraint.cc
index 9a7a12629e7..244070d93c2 100644
--- a/gcc/cp/constraint.cc
+++ b/gcc/cp/constraint.cc
@@ -3828,6 +3828,9 @@ diagnose_trait_expr (tree expr, tree args)
 case CPTK_IS_POD:
   inform (loc, "  %qT is not a POD type", t1);
   break;
+case CPTK_IS_POINTER:
+  inform (loc, "  %qT is not a pointer", t1);
+  break;
 case CPTK_IS_POLYMORPHIC:
   inform (loc, "  %qT is not a polymorphic type", t1);
   break;
diff --git a/gcc/cp/cp-trait.def b/gcc/cp/cp-trait.def
index e9347453829..18e2d0f3480 100644
--- a/gcc/cp/cp-trait.def
+++ b/gcc/cp/cp-trait.def
@@ -82,6 +82,7 @@ DEFTRAIT_EXPR (IS_NOTHROW_CONVERTIBLE, 
"__is_nothrow_convertible", 2)
 DEFTRAIT_EXPR (IS_OBJECT, "__is_object", 1)
 DEFTRAIT_EXPR (IS_POINTER_INTERCONVERTIBLE_BASE_OF, 
"__is_pointer_interconvertible_base_of", 2)
 DEFTRAIT_EXPR (IS_POD, "__is_pod", 1)
+DEFTRAIT_EXPR (IS_POINTER, "__is_pointer", 1)
 DEFTRAIT_EXPR (IS_POLYMORPHIC, "__is_polymorphic", 1)
 DEFTRAIT_EXPR (IS_REFERENCE, "__is_reference", 1)
 DEFTRAIT_EXPR (IS_SAME, "__is_same", 2)
diff --git a/gcc/cp/semantics.cc b/gcc/cp/semantics.cc
index 41c25f43d27..9dcdb06191a 100644
--- a/gcc/cp/semantics.cc
+++ b/gcc/cp/semantics.cc
@@ -12502,6 +12502,9 @@ trait_expr_value (cp_trait_kind kind, tree type1, tree 
type2)
 case CPTK_IS_POD:
   return pod_type_p (type1);
 
+case CPTK_IS_POINTER:
+  return TYPE_PTR_P (type1);
+
 case CPTK_IS_POLYMORPHIC:
   return CLASS_TYPE_P (type1) && TYPE_POLYMORPHIC_P (type1);
 
@@ -12701,6 +12704,7 @@ finish_trait_expr (location_t loc, cp_trait_kind kind, 
tree type1, tree type2)
 case CPTK_IS_MEMBER_OBJECT_POINTER:
 case CPTK_IS_MEMBER_POINTER:
 case CPTK_IS_OBJECT:
+case CPTK_IS_POINTER:
 case CPTK_IS_REFERENCE:
 case CPTK_IS_SAME:
 case CPTK_IS_SCOPED_ENUM:
diff --git a/gcc/testsuite/g++.dg/ext/has-builtin-1.C 
b/gcc/testsuite/g++.dg/ext/has-builtin-1.C
index b2e2f2f694d..96b7a89e4f1 100644
--- a/gcc/testsuite/g++.dg/ext/has-builtin-1.C
+++ b/gcc/testsuite/g++.dg/ext/has-builtin-1.C
@@ -125,6 +125,9 @@
 #if !__has_builtin (__is_pod)
 # error "__has_builtin (__is_pod) failed"
 #endif
+#if !__has_builtin (__is_pointer)
+# error "__has_builtin (__is_pointer) failed"
+#endif
 #if !__has_builtin (__is_polymorphic)
 # error "__has_builtin (__is_polymorphic) failed"
 #endif
diff --git a/gcc/testsuite/g++.dg/ext/is_pointer.C 
b/gcc/testsuite/g++.dg/ext/is_pointer.C
new file mode 100644
index 000..d6e39565950
--- /dev/null
+++ b/gcc/testsuite/g++.dg/ext/is_pointer.C
@@ -0,0 +1,51 @@
+// { dg-do compile { target c++11 } }
+
+#define SA(X) static_assert((X),#X)
+
+SA(!__is_pointer(int));
+SA(__is_pointer(int*));
+SA(__is_pointer(int**));
+
+SA(__is_pointer(const int*));
+SA(__is_pointer(const int**));
+SA(__is_pointer(int* const));
+SA(__is_pointer(int** const));
+SA(__is_pointer(int* const* const));
+
+SA(__is_pointer(volatile int*));
+SA(__is_pointer(volatile int**));
+SA(__is_pointer(int* volatile));
+SA(__is_pointer(int** volatile));
+SA(__is_pointer(int* volatile* volatile));
+
+SA(__is_pointer(const volatile int*));
+SA(__is_pointer(const volatile int**));
+SA(__is_pointer(const int* volatile));
+SA(__is_pointer(volatile int* const));
+SA(__is_pointer(int* const volatile));
+SA(__is_pointer(const int** volatile));
+SA(__is_pointer(volatile int** const));
+SA(__is_pointer(int** const volatile));
+SA(__is_pointer(int* const* const volatile));
+SA(__is_pointer(int* volatile* const volatile));
+SA(__is_pointer(int* const volatile* const volatile));
+
+SA(!__is_pointer(int&));
+SA(!__is_pointer(const int&));
+SA(!__is_pointer(volatile int&));
+SA(!__is_pointer(const volatile int&));
+
+SA(!__is_pointer(int&&));
+SA(!__is_pointer(const int&&));
+SA(!__is_pointer(volatile int&&));
+SA(!__is_pointer(const volatile int&&));
+
+SA(!__is_pointer(int[3]));
+SA(!__is_pointer(const int[3]));
+SA(!__is_pointer(volatile int[3]));
+SA(!__is_pointer(const volatile int[3]));
+
+SA(!__is_pointer(int(int)));
+SA(__is_pointer(int(*const)(int)));
+SA(__is_pointer(int(*volatile)(int)));
+SA(__is_pointer(int(*const 

[PATCH v13 22/26] libstdc++: Optimize std::rank compilation performance

2024-02-21 Thread Ken Matsui
This patch optimizes the compilation performance of std::rank
by dispatching to the new __rank built-in trait.

libstdc++-v3/ChangeLog:

* include/std/type_traits (rank): Use __rank built-in trait.
(rank_v): Likewise.

Signed-off-by: Ken Matsui 
---
 libstdc++-v3/include/std/type_traits | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/libstdc++-v3/include/std/type_traits 
b/libstdc++-v3/include/std/type_traits
index 2f4c8dd3b21..1577042a5b8 100644
--- a/libstdc++-v3/include/std/type_traits
+++ b/libstdc++-v3/include/std/type_traits
@@ -1473,6 +1473,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 };
 
   /// rank
+#if _GLIBCXX_USE_BUILTIN_TRAIT(__rank)
+  template
+struct rank
+: public integral_constant { };
+#else
   template
 struct rank
 : public integral_constant { };
@@ -1484,6 +1489,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   template
 struct rank<_Tp[]>
 : public integral_constant::value> { };
+#endif
 
   /// extent
   template
@@ -3579,12 +3585,17 @@ template 
 template 
   inline constexpr size_t alignment_of_v = alignment_of<_Tp>::value;
 
+#if _GLIBCXX_USE_BUILTIN_TRAIT(__rank)
+template 
+  inline constexpr size_t rank_v = __rank(_Tp);
+#else
 template 
   inline constexpr size_t rank_v = 0;
 template 
   inline constexpr size_t rank_v<_Tp[_Size]> = 1 + rank_v<_Tp>;
 template 
   inline constexpr size_t rank_v<_Tp[]> = 1 + rank_v<_Tp>;
+#endif
 
 template 
   inline constexpr size_t extent_v = 0;
-- 
2.43.2



[PATCH v13 04/26] libstdc++: Optimize std::is_volatile compilation performance

2024-02-21 Thread Ken Matsui
This patch optimizes the compilation performance of std::is_volatile
by dispatching to the new __is_volatile built-in trait.

libstdc++-v3/ChangeLog:

* include/std/type_traits (is_volatile): Use __is_volatile
built-in trait.
(is_volatile_v): Likewise.

Signed-off-by: Ken Matsui 
---
 libstdc++-v3/include/std/type_traits | 12 
 1 file changed, 12 insertions(+)

diff --git a/libstdc++-v3/include/std/type_traits 
b/libstdc++-v3/include/std/type_traits
index 6e9ebfb8a18..60cd22b6f15 100644
--- a/libstdc++-v3/include/std/type_traits
+++ b/libstdc++-v3/include/std/type_traits
@@ -851,6 +851,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 #endif
 
   /// is_volatile
+#if _GLIBCXX_USE_BUILTIN_TRAIT(__is_volatile)
+  template
+struct is_volatile
+: public __bool_constant<__is_volatile(_Tp)>
+{ };
+#else
   template
 struct is_volatile
 : public false_type { };
@@ -858,6 +864,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   template
 struct is_volatile<_Tp volatile>
 : public true_type { };
+#endif
 
   /// is_trivial
   template
@@ -3356,10 +3363,15 @@ template 
   inline constexpr bool is_function_v<_Tp&&> = false;
 #endif
 
+#if _GLIBCXX_USE_BUILTIN_TRAIT(__is_volatile)
+template 
+  inline constexpr bool is_volatile_v = __is_volatile(_Tp);
+#else
 template 
   inline constexpr bool is_volatile_v = false;
 template 
   inline constexpr bool is_volatile_v = true;
+#endif
 
 template 
   inline constexpr bool is_trivial_v = __is_trivial(_Tp);
-- 
2.43.2



[PATCH v13 12/26] libstdc++: Optimize std::remove_extent compilation performance

2024-02-21 Thread Ken Matsui
This patch optimizes the compilation performance of std::remove_extent
by dispatching to the new __remove_extent built-in trait.

libstdc++-v3/ChangeLog:

* include/std/type_traits (remove_extent): Use __remove_extent
built-in trait.

Signed-off-by: Ken Matsui 
---
 libstdc++-v3/include/std/type_traits | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/libstdc++-v3/include/std/type_traits 
b/libstdc++-v3/include/std/type_traits
index 6346d1daee2..73ddce351fd 100644
--- a/libstdc++-v3/include/std/type_traits
+++ b/libstdc++-v3/include/std/type_traits
@@ -2092,6 +2092,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   // Array modifications.
 
   /// remove_extent
+#if _GLIBCXX_USE_BUILTIN_TRAIT(__remove_extent)
+  template
+struct remove_extent
+{ using type = __remove_extent(_Tp); };
+#else
   template
 struct remove_extent
 { using type = _Tp; };
@@ -2103,6 +2108,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   template
 struct remove_extent<_Tp[]>
 { using type = _Tp; };
+#endif
 
   /// remove_all_extents
   template
-- 
2.43.2



[PATCH v13 23/26] c++: Implement __is_invocable built-in trait

2024-02-21 Thread Ken Matsui
This patch implements built-in trait for std::is_invocable.

gcc/cp/ChangeLog:

* cp-trait.def: Define __is_invocable.
* constraint.cc (diagnose_trait_expr): Handle CPTK_IS_INVOCABLE.
* semantics.cc (trait_expr_value): Likewise.
(finish_trait_expr): Likewise.
* cp-tree.h (build_invoke): New function.
* method.cc (build_invoke): New function.

gcc/testsuite/ChangeLog:

* g++.dg/ext/has-builtin-1.C: Test existence of __is_invocable.
* g++.dg/ext/is_invocable1.C: New test.
* g++.dg/ext/is_invocable2.C: New test.
* g++.dg/ext/is_invocable3.C: New test.
* g++.dg/ext/is_invocable4.C: New test.

Signed-off-by: Ken Matsui 
---
 gcc/cp/constraint.cc |   6 +
 gcc/cp/cp-trait.def  |   1 +
 gcc/cp/cp-tree.h |   2 +
 gcc/cp/method.cc | 132 +
 gcc/cp/semantics.cc  |   4 +
 gcc/testsuite/g++.dg/ext/has-builtin-1.C |   3 +
 gcc/testsuite/g++.dg/ext/is_invocable1.C | 349 +++
 gcc/testsuite/g++.dg/ext/is_invocable2.C | 139 +
 gcc/testsuite/g++.dg/ext/is_invocable3.C |  51 
 gcc/testsuite/g++.dg/ext/is_invocable4.C |  33 +++
 10 files changed, 720 insertions(+)
 create mode 100644 gcc/testsuite/g++.dg/ext/is_invocable1.C
 create mode 100644 gcc/testsuite/g++.dg/ext/is_invocable2.C
 create mode 100644 gcc/testsuite/g++.dg/ext/is_invocable3.C
 create mode 100644 gcc/testsuite/g++.dg/ext/is_invocable4.C

diff --git a/gcc/cp/constraint.cc b/gcc/cp/constraint.cc
index 23ea66d9c12..c87b126fdb1 100644
--- a/gcc/cp/constraint.cc
+++ b/gcc/cp/constraint.cc
@@ -3791,6 +3791,12 @@ diagnose_trait_expr (tree expr, tree args)
 case CPTK_IS_FUNCTION:
   inform (loc, "  %qT is not a function", t1);
   break;
+case CPTK_IS_INVOCABLE:
+  if (!t2)
+inform (loc, "  %qT is not invocable", t1);
+  else
+inform (loc, "  %qT is not invocable by %qE", t1, t2);
+  break;
 case CPTK_IS_LAYOUT_COMPATIBLE:
   inform (loc, "  %qT is not layout compatible with %qT", t1, t2);
   break;
diff --git a/gcc/cp/cp-trait.def b/gcc/cp/cp-trait.def
index 85056c8140b..6cb2b55f4ea 100644
--- a/gcc/cp/cp-trait.def
+++ b/gcc/cp/cp-trait.def
@@ -75,6 +75,7 @@ DEFTRAIT_EXPR (IS_EMPTY, "__is_empty", 1)
 DEFTRAIT_EXPR (IS_ENUM, "__is_enum", 1)
 DEFTRAIT_EXPR (IS_FINAL, "__is_final", 1)
 DEFTRAIT_EXPR (IS_FUNCTION, "__is_function", 1)
+DEFTRAIT_EXPR (IS_INVOCABLE, "__is_invocable", -1)
 DEFTRAIT_EXPR (IS_LAYOUT_COMPATIBLE, "__is_layout_compatible", 2)
 DEFTRAIT_EXPR (IS_LITERAL_TYPE, "__is_literal_type", 1)
 DEFTRAIT_EXPR (IS_MEMBER_FUNCTION_POINTER, "__is_member_function_pointer", 1)
diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h
index 334c11396c2..261d3a71faa 100644
--- a/gcc/cp/cp-tree.h
+++ b/gcc/cp/cp-tree.h
@@ -7334,6 +7334,8 @@ extern tree get_copy_assign   (tree);
 extern tree get_default_ctor   (tree);
 extern tree get_dtor   (tree, tsubst_flags_t);
 extern tree build_stub_object  (tree);
+extern tree build_invoke   (tree, const_tree,
+tsubst_flags_t);
 extern tree strip_inheriting_ctors (tree);
 extern tree inherited_ctor_binfo   (tree);
 extern bool base_ctor_omit_inherited_parms (tree);
diff --git a/gcc/cp/method.cc b/gcc/cp/method.cc
index 98c10e6a8b5..953f1bed6fc 100644
--- a/gcc/cp/method.cc
+++ b/gcc/cp/method.cc
@@ -1928,6 +1928,138 @@ build_trait_object (tree type)
   return build_stub_object (type);
 }
 
+/* [func.require] Build an expression of INVOKE(FN_TYPE, ARG_TYPES...).  If the
+   given is not invocable, returns error_mark_node.  */
+
+tree
+build_invoke (tree fn_type, const_tree arg_types, tsubst_flags_t complain)
+{
+  if (fn_type == error_mark_node || arg_types == error_mark_node)
+return error_mark_node;
+
+  gcc_assert (TYPE_P (fn_type));
+  gcc_assert (TREE_CODE (arg_types) == TREE_VEC);
+
+  /* Access check is required to determine if the given is invocable.  */
+  deferring_access_check_sentinel acs (dk_no_deferred);
+
+  /* INVOKE is an unevaluated context.  */
+  cp_unevaluated cp_uneval_guard;
+
+  bool is_ptrdatamem;
+  bool is_ptrmemfunc;
+  if (TREE_CODE (fn_type) == REFERENCE_TYPE)
+{
+  tree deref_fn_type = TREE_TYPE (fn_type);
+  is_ptrdatamem = TYPE_PTRDATAMEM_P (deref_fn_type);
+  is_ptrmemfunc = TYPE_PTRMEMFUNC_P (deref_fn_type);
+
+  /* Dereference fn_type if it is a pointer to member.  */
+  if (is_ptrdatamem || is_ptrmemfunc)
+   fn_type = deref_fn_type;
+}
+  else
+{
+  is_ptrdatamem = TYPE_PTRDATAMEM_P (fn_type);
+  is_ptrmemfunc = TYPE_PTRMEMFUNC_P (fn_type);
+}
+
+  if (is_ptrdatamem && TREE_VEC_LENGTH (arg_types) != 1)
+/* Only a pointer to data member with one argument is invocable.  */
+return error_mark_node;

[PATCH v13 10/26] libstdc++: Optimize std::add_pointer compilation performance

2024-02-21 Thread Ken Matsui
This patch optimizes the compilation performance of std::add_pointer
by dispatching to the new __add_pointer built-in trait.

libstdc++-v3/ChangeLog:

* include/std/type_traits (add_pointer): Use __add_pointer
built-in trait.

Signed-off-by: Ken Matsui 
---
 libstdc++-v3/include/std/type_traits | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/libstdc++-v3/include/std/type_traits 
b/libstdc++-v3/include/std/type_traits
index c4585a23df9..6346d1daee2 100644
--- a/libstdc++-v3/include/std/type_traits
+++ b/libstdc++-v3/include/std/type_traits
@@ -2149,6 +2149,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 { };
 #endif
 
+  /// add_pointer
+#if _GLIBCXX_USE_BUILTIN_TRAIT(__add_pointer)
+  template
+struct add_pointer
+{ using type = __add_pointer(_Tp); };
+#else
   template
 struct __add_pointer_helper
 { using type = _Tp; };
@@ -2157,7 +2163,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 struct __add_pointer_helper<_Tp, __void_t<_Tp*>>
 { using type = _Tp*; };
 
-  /// add_pointer
   template
 struct add_pointer
 : public __add_pointer_helper<_Tp>
@@ -2170,6 +2175,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   template
 struct add_pointer<_Tp&&>
 { using type = _Tp*; };
+#endif
 
 #if __cplusplus > 201103L
   /// Alias template for remove_pointer
-- 
2.43.2



  1   2   >