[PATCH] lower-bitint: Avoid merging non-mergeable stmt with cast and mergeable stmt [PR112902]

2023-12-07 Thread Jakub Jelinek
Hi!

Before bitint lowering, the IL has:
  b.0_1 = b;
  _2 = -b.0_1;
  _3 = (unsigned _BitInt(512)) _2;
  a.1_4 = a;
  a.2_5 = (unsigned _BitInt(512)) a.1_4;
  _6 = _3 * a.2_5;
on the first function.  Now, gimple_lower_bitint has an optimization
(when not -O0) that it avoids assigning underlying VAR_DECLs for certain
SSA_NAMEs where it is possible to lower it in a single loop (or straight
line code) rather than in multiple loops.
So, e.g. the multiplication above uses handle_operand_addr, which can deal
with INTEGER_CST arguments, loads but also casts, so it is fine
not to assign an underlying VAR_DECL for SSA_NAMEs a.1_4 and a.2_5, as
the multiplication can handle it fine.
The more problematic case is the other multiplication operand.
It is again a result of a (in this case narrowing) cast, so it is fine
not to assign VAR_DECL for _3.  Normally we can merge the load (b.0_1)
with the negation (_2) and even with the following cast (_3).  If _3
was used in a mergeable operation like addition, subtraction, negation,
&|^ or equality comparison, all of b.0_1, _2 and _3 could be without
underlying VAR_DECLs.
The problem is that the current code does that even when the cast is used
by a non-mergeable operation, and handle_operand_addr certainly can't handle
the mergeable operations feeding the rhs1 of the cast, for multiplication
we don't emit any loop in which it could appear, for other operations like
shifts or non-equality comparisons we emit loops, but either in the reverse
direction or with unpredictable indexes (for shifts).
So, in order to lower the above correctly, we need to have an underlying
VAR_DECL for either _2 or _3; if we choose _2, then the load and negation
would be done in one loop and extension handled as part of the
multiplication, if we choose _3, then the load, negation and cast are done
in one loop and the multiplication just uses the underlying VAR_DECL
computed by that.
It is far easier to do this for _3, which is what the following patch
implements.
It actually already had code for most of it, just it did that for widening
casts only (optimize unless the cast rhs1 is not SSA_NAME, or is SSA_NAME
defined in some other bb, or with more than one use, etc.).
This falls through into such code even for the narrowing or same precision
casts, unless the cast is used in a mergeable operation.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2023-12-08  Jakub Jelinek  

PR tree-optimization/112902
* gimple-lower-bitint.cc (gimple_lower_bitint): For a narrowing
or same precision cast don't set SSA_NAME_VERSION in m_names only
if use_stmt is mergeable_op or fall through into the check that
use is a store or rhs1 is not mergeable or other reasons prevent
merging.

* gcc.dg/bitint-52.c: New test.

--- gcc/gimple-lower-bitint.cc.jj   2023-12-06 09:55:18.522993378 +0100
+++ gcc/gimple-lower-bitint.cc  2023-12-07 18:05:17.183692049 +0100
@@ -5989,10 +5989,11 @@ gimple_lower_bitint (void)
{
  if (TREE_CODE (TREE_TYPE (rhs1)) != BITINT_TYPE
  || (bitint_precision_kind (TREE_TYPE (rhs1))
- < bitint_prec_large)
- || (TYPE_PRECISION (TREE_TYPE (rhs1))
- >= TYPE_PRECISION (TREE_TYPE (s)))
- || mergeable_op (SSA_NAME_DEF_STMT (s)))
+ < bitint_prec_large))
+   continue;
+ if ((TYPE_PRECISION (TREE_TYPE (rhs1))
+  >= TYPE_PRECISION (TREE_TYPE (s)))
+ && mergeable_op (use_stmt))
continue;
  /* Prevent merging a widening non-mergeable cast
 on result of some narrower mergeable op
@@ -6011,7 +6012,9 @@ gimple_lower_bitint (void)
  || !mergeable_op (SSA_NAME_DEF_STMT (rhs1))
  || gimple_store_p (use_stmt))
continue;
- if (gimple_assign_cast_p (SSA_NAME_DEF_STMT (rhs1)))
+ if ((TYPE_PRECISION (TREE_TYPE (rhs1))
+  < TYPE_PRECISION (TREE_TYPE (s)))
+ && gimple_assign_cast_p (SSA_NAME_DEF_STMT (rhs1)))
{
  /* Another exception is if the widening cast is
 from mergeable same precision cast from something
--- gcc/testsuite/gcc.dg/bitint-52.c.jj 2023-12-08 00:35:39.970953164 +0100
+++ gcc/testsuite/gcc.dg/bitint-52.c2023-12-08 00:35:21.983205440 +0100
@@ -0,0 +1,22 @@
+/* PR tree-optimization/112902 */
+/* { dg-do compile { target bitint } } */
+/* { dg-options "-std=c23 -O2" } */
+
+double c;
+#if __BITINT_MAXWIDTH__ >= 2048
+_BitInt (512) a;
+_BitInt (2048) b;
+
+void
+foo (void)
+{
+  b = __builtin_mul_overflow_p (40, (_BitInt (512)) (-b * a), 0);
+}
+
+

[PATCH] vr-values: Avoid ICEs on large _BitInt cast to floating point [PR112901]

2023-12-07 Thread Jakub Jelinek
Hi!

For casts from integers to floating point,
simplify_float_conversion_using_ranges uses SCALAR_INT_TYPE_MODE
and queries optabs on the optimization it wants to make.

That doesn't really work for large/huge BITINT_TYPE, those have BLKmode
which is not scalar int mode.  Querying an optab is not useful for that
either.

I think it is best to just skip this optimization for those bitints,
after all, bitint lowering uses ranges already to determine minimum
precision for bitint operands of the integer to float casts.

Bootstrapped/regrtested on x86_64-linux and i686-linux, ok for trunk?

2023-12-08  Jakub Jelinek  

PR tree-optimization/112901
* vr-values.cc
(simplify_using_ranges::simplify_float_conversion_using_ranges):
Return false if rhs1 has BITINT_TYPE type with BLKmode TYPE_MODE.

* gcc.dg/bitint-51.c: New test.

--- gcc/vr-values.cc.jj 2023-09-06 17:28:24.240977329 +0200
+++ gcc/vr-values.cc2023-12-07 14:34:36.935121459 +0100
@@ -1656,6 +1656,11 @@ simplify_using_ranges::simplify_float_co
   || vr.undefined_p ())
 return false;
 
+  /* The code below doesn't work for large/huge _BitInt, nor is really
+ needed for those, bitint lowering does use ranges already.  */
+  if (TREE_CODE (TREE_TYPE (rhs1)) == BITINT_TYPE
+  && TYPE_MODE (TREE_TYPE (rhs1)) == BLKmode)
+return false;
   /* First check if we can use a signed type in place of an unsigned.  */
   scalar_int_mode rhs_mode = SCALAR_INT_TYPE_MODE (TREE_TYPE (rhs1));
   if (TYPE_UNSIGNED (TREE_TYPE (rhs1))
--- gcc/testsuite/gcc.dg/bitint-51.c.jj 2023-12-07 15:10:20.500384705 +0100
+++ gcc/testsuite/gcc.dg/bitint-51.c2023-12-07 15:09:54.159750006 +0100
@@ -0,0 +1,14 @@
+/* PR tree-optimization/112901 */
+/* { dg-do compile { target bitint } } */
+/* { dg-options "-O2" } */
+
+float f;
+#if __BITINT_MAXWIDTH__ >= 256
+_BitInt(256) i;
+
+void
+foo (void)
+{
+  f *= 4 * i;
+}
+#endif

Jakub



Re: [PATCH] haifa-sched: Avoid overflows in extend_h_i_d [PR112411]

2023-12-07 Thread Richard Biener
On Fri, 8 Dec 2023, Jakub Jelinek wrote:

> On Thu, Dec 07, 2023 at 11:54:01AM +0100, Jakub Jelinek wrote:
> > On Thu, Dec 07, 2023 at 09:36:23AM +0100, Jakub Jelinek wrote:
> > > Without the dg-skip-if I got on 64-bit host:
> > > cc1: out of memory allocating 571230784744 bytes after a total of 2772992 
> > > bytes
> > 
> > I've looked at this and the problem is in haifa-sched.cc:
> > 9047  h_i_d.safe_grow_cleared (3 * get_max_uid () / 2, true);
> > get_max_uid () is 0x424d with the --param 
> > min-nondebug-insn-uid=0x4000
> > and so 3 * get_max_uid () / 2 actually overflows to -536870028 but as vec.h
> > then treats the value as unsigned, it attempts to allocate
> > 0xe374U * 152UL bytes, i.e. those 532GB.  If the above is fixed to do
> > 3U * get_max_uid () / 2 instead, it will get slightly better and will only
> > need 0x6373U * 152UL bytes, i.e. 228GB.
> 
> Here it is in a patch form.
> For the other changes, it would be more work and the question is if it would
> be beneficial for average compilation, if the uids aren't sparse enough,
> it would waste more memory (8-bytes per uid for the pointer in the array
> plus the 152 byte allocation, probably even rounded up for next bucket size
> unless we use say pool allocator).
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

OK.

> 2023-12-08  Jakub Jelinek  
> 
>   PR middle-end/112411
>   * haifa-sched.cc (extend_h_i_d): Use 3U instead of 3 in
>   3 * get_max_uid () / 2 calculation.
> 
> --- gcc/haifa-sched.cc.jj 2023-08-08 15:55:06.705161670 +0200
> +++ gcc/haifa-sched.cc2023-12-07 11:57:17.869611646 +0100
> @@ -9044,7 +9044,7 @@ extend_h_i_d (void)
>if (reserve > 0
>&& ! h_i_d.space (reserve))
>  {
> -  h_i_d.safe_grow_cleared (3 * get_max_uid () / 2, true);
> +  h_i_d.safe_grow_cleared (3U * get_max_uid () / 2, true);
>sched_extend_target ();
>  }
>  }
> 
>   Jakub
> 
> 

-- 
Richard Biener 
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)


Re: [pushed][PATCH v2 0/2] Delete ISA_BASE_LA64V110 related definitions.

2023-12-07 Thread chenglulu

Pushed to r14-6303 and r14-6304.

在 2023/12/5 上午10:30, Lulu Cheng 写道:

1. Rebase Xi Ruoyao's patch a to the latest commit.
https://gcc.gnu.org/pipermail/gcc-patches/2023-November/636798.html

2. remove the #if
!defined(IN_LIBGCC2) && !defined(IN_TARGET_LIBS) && !defined(IN_RTS)
guards in loongarch-def.h and loongarch-opts.h as they'll be unneeded.

3. Described in LoongArch Reference Manual v1.1:
The new functional subsets in each new version have independent identification
bits in the return value of the CPUCFG instruction. It is recommended that the
software determines the running process based on this information rather than
the version number of the Loongson architecture.

So delete the ISA_BASE_LA64V110 related definitions here.

*** BLURB HERE ***

Lulu Cheng (1):
   LoongArch: Remove the definition of ISA_BASE_LA64V110 from the code.

Xi Ruoyao (1):
   LoongArch: Switch loongarch-def from C to C++ to make it possible.

  .../loongarch/genopts/loongarch-strings   |   1 -
  gcc/config/loongarch/genopts/loongarch.opt.in |   3 -
  gcc/config/loongarch/loongarch-cpu.cc |  23 +-
  gcc/config/loongarch/loongarch-def-array.h|  40 +++
  gcc/config/loongarch/loongarch-def.c  | 227 --
  gcc/config/loongarch/loongarch-def.cc | 193 +++
  gcc/config/loongarch/loongarch-def.h  |  67 +++---
  gcc/config/loongarch/loongarch-opts.cc|  10 +-
  gcc/config/loongarch/loongarch-opts.h |   9 +-
  gcc/config/loongarch/loongarch-str.h  |   1 -
  gcc/config/loongarch/loongarch-tune.h | 123 +-
  gcc/config/loongarch/loongarch.opt|   3 -
  gcc/config/loongarch/t-loongarch  |   4 +-
  13 files changed, 405 insertions(+), 299 deletions(-)
  create mode 100644 gcc/config/loongarch/loongarch-def-array.h
  delete mode 100644 gcc/config/loongarch/loongarch-def.c
  create mode 100644 gcc/config/loongarch/loongarch-def.cc





[PATCH] haifa-sched: Avoid overflows in extend_h_i_d [PR112411]

2023-12-07 Thread Jakub Jelinek
On Thu, Dec 07, 2023 at 11:54:01AM +0100, Jakub Jelinek wrote:
> On Thu, Dec 07, 2023 at 09:36:23AM +0100, Jakub Jelinek wrote:
> > Without the dg-skip-if I got on 64-bit host:
> > cc1: out of memory allocating 571230784744 bytes after a total of 2772992 
> > bytes
> 
> I've looked at this and the problem is in haifa-sched.cc:
> 9047h_i_d.safe_grow_cleared (3 * get_max_uid () / 2, true);
> get_max_uid () is 0x424d with the --param min-nondebug-insn-uid=0x4000
> and so 3 * get_max_uid () / 2 actually overflows to -536870028 but as vec.h
> then treats the value as unsigned, it attempts to allocate
> 0xe374U * 152UL bytes, i.e. those 532GB.  If the above is fixed to do
> 3U * get_max_uid () / 2 instead, it will get slightly better and will only
> need 0x6373U * 152UL bytes, i.e. 228GB.

Here it is in a patch form.
For the other changes, it would be more work and the question is if it would
be beneficial for average compilation, if the uids aren't sparse enough,
it would waste more memory (8-bytes per uid for the pointer in the array
plus the 152 byte allocation, probably even rounded up for next bucket size
unless we use say pool allocator).

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2023-12-08  Jakub Jelinek  

PR middle-end/112411
* haifa-sched.cc (extend_h_i_d): Use 3U instead of 3 in
3 * get_max_uid () / 2 calculation.

--- gcc/haifa-sched.cc.jj   2023-08-08 15:55:06.705161670 +0200
+++ gcc/haifa-sched.cc  2023-12-07 11:57:17.869611646 +0100
@@ -9044,7 +9044,7 @@ extend_h_i_d (void)
   if (reserve > 0
   && ! h_i_d.space (reserve))
 {
-  h_i_d.safe_grow_cleared (3 * get_max_uid () / 2, true);
+  h_i_d.safe_grow_cleared (3U * get_max_uid () / 2, true);
   sched_extend_target ();
 }
 }

Jakub



Re: [PATCH] treat argp-based mem as frame related in dse

2023-12-07 Thread Jiufu Guo


Hi,

Jeff Law  writes:

> On 12/6/23 02:27, Jiufu Guo wrote:
>> Hi,
>>
>> The issue mentioned in PR112525 would be able to be handled by
>> updating dse.cc to treat arg_pointer_rtx similarly with frame_pointer_rtx.
>> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=30271#c10 also mentioned
>> this idea.
>>  One thing, 
>> arpg area may be used to pass argument to callee. So, it would
>> be needed to check if call insns are using that mem.
>>
>> Bootstrap  pass on ppc64{,le} and x86_64.
>> Is this ok for trunk?
>>
>> BR,
>> Jeff (Jiufu Guo)
>>
>>
>>  PR rtl-optimization/112525
>>
>> gcc/ChangeLog:
>>
>>  * dse.cc (get_group_info): Add arg_pointer_rtx as frame_related.
>>  (check_mem_read_rtx): Add parameter to indicate if it is checking mem
>>  for call insn.
>>  (scan_insn): Add mem checking on call usage.
>>
>> gcc/testsuite/ChangeLog:
>>
>>  * gcc.target/powerpc/pr112525.c: New test.
> So conceptually the first chunk makes sense.  Though I do worry about
> Andrew's comment about it causing a bootstrap failure.  Even thought
> it was 15 years ago, it remains worrisome.
>
Yes, I understand your point.
At that time, it is a comparesion failure. It may be related to debug
info.  But I did not figure out possible failures.

>
>> @@ -2368,7 +2370,8 @@ check_mem_read_rtx (rtx *loc, bb_info_t bb_info)
>>/* If this read is just reading back something that we just
>>   stored, rewrite the read.  */
>> -  if (store_info->rhs
>> +  if (!used_in_call
>> +  && store_info->rhs
>>&& store_info->group_id == -1
>>&& store_info->cse_base == base
>>&& known_subrange_p (offset, width, store_info->offset,
>> @@ -2650,6 +2653,12 @@ scan_insn (bb_info_t bb_info, rtx_insn *insn, int 
>> max_active_local_stores)
>>  that is not relative to the frame.  */
>>   add_non_frame_wild_read (bb_info);
>>   +  for (rtx link = CALL_INSN_FUNCTION_USAGE (insn);
>> +   link != NULL_RTX;
>> +   link = XEXP (link, 1))
>> +if (GET_CODE (XEXP (link, 0)) == USE && MEM_P (XEXP (XEXP (link, 0),0)))
>> +  check_mem_read_rtx ( (XEXP (link, 0),0), bb_info, true);
> I'm having a bit of a hard time convincing myself this is correct
> though.  I can't see how rewriting the load to read the source of the
> prior store is unsafe.  If that fixes a problem, then it would seem
> like we've gone wrong before here -- perhaps failing to use the fusage
> loads to "kill" any available stores to the same or aliased memory
> locations.
As you said the later one, call's fusage would killing the previous
store. It is a kind of case like:

  134: [argp:SI+0x8]=r134:SI
  135: [argp:SI+0x4]=0x1
  136: [argp:SI]=r132:SI
  137: ax:SI=call [`memset'] argc:0xc
  REG_CALL_DECL `memset'
  REG_EH_REGION 0

This call insn is:
(call_insn/j 137 136 147 27 (set (reg:SI 0 ax)
(call (mem:QI (symbol_ref:SI ("memset") [flags 0x41]  ) [0 __builtin_memset S1 A8])
(const_int 12 [0xc]))) "pr102798.c":23:22 1086 {*sibcall_value}
 (expr_list:REG_UNUSED (reg:SI 0 ax)
(expr_list:REG_CALL_DECL (symbol_ref:SI ("memset") [flags 0x41]  
)
(expr_list:REG_EH_REGION (const_int 0 [0])
(nil
(expr_list:SI (use (mem/f:SI (reg/f:SI 16 argp) [0  S4 A32]))
(expr_list:SI (use (mem:SI (plus:SI (reg/f:SI 16 argp) (const_int 4 
[0x4])) [0  S4 A32]))
(expr_list:SI (use (mem:SI (plus:SI (reg/f:SI 16 argp) (const_int 8 
[0x8])) [0  S4 A32]))
(nil)

The stores in "insns 134-136" are used by the call. "check_mem_read_rtx"
would prevent them to eliminated.

>
> Assuming we get to a point where we think this or something similar to
> it is safe, then we should retest pr30271 and if it's fixed reference
> it in the ChangeLog.
Yes, thanks for pointing out this.  The ChangeLog should also reference
30271.

BR,
Jeff (Jiufu Guo)

>
> Jeff


[PATCH] [ICE] Support vpcmov for V4HF/V4BF/V2HF/V2BF under TARGET_XOP.

2023-12-07 Thread liuhongt
Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
Ready push to trunk.

gcc/ChangeLog:

PR target/112904
* config/i386/mmx.md (*xop_pcmov_): New define_insn.

gcc/testsuite/ChangeLog:

* g++.target/i386/pr112904.C: New test.
---
 gcc/config/i386/mmx.md   | 22 +++
 gcc/testsuite/g++.target/i386/pr112904.C | 27 
 2 files changed, 49 insertions(+)
 create mode 100644 gcc/testsuite/g++.target/i386/pr112904.C

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index a07a921b739..06d6c57876b 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -4136,6 +4136,17 @@ (define_insn "*xop_pcmov_"
   [(set_attr "type" "sse4arg")
(set_attr "mode" "TI")])
 
+(define_insn "*xop_pcmov_"
+  [(set (match_operand:V4F_64 0 "register_operand" "=x")
+(if_then_else:V4F_64
+  (match_operand:V4F_64 3 "register_operand" "x")
+  (match_operand:V4F_64 1 "register_operand" "x")
+  (match_operand:V4F_64 2 "register_operand" "x")))]
+  "TARGET_XOP && TARGET_MMX_WITH_SSE"
+  "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "type" "sse4arg")
+   (set_attr "mode" "TI")])
+
 (define_insn "*xop_pcmov_"
   [(set (match_operand:VI_16_32 0 "register_operand" "=x")
 (if_then_else:VI_16_32
@@ -4147,6 +4158,17 @@ (define_insn "*xop_pcmov_"
   [(set_attr "type" "sse4arg")
(set_attr "mode" "TI")])
 
+(define_insn "*xop_pcmov_"
+  [(set (match_operand:V2F_32 0 "register_operand" "=x")
+(if_then_else:V2F_32
+  (match_operand:V2F_32 3 "register_operand" "x")
+  (match_operand:V2F_32 1 "register_operand" "x")
+  (match_operand:V2F_32 2 "register_operand" "x")))]
+  "TARGET_XOP"
+  "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "type" "sse4arg")
+   (set_attr "mode" "TI")])
+
 ;; XOP permute instructions
 (define_insn "mmx_ppermv64"
   [(set (match_operand:V8QI 0 "register_operand" "=x")
diff --git a/gcc/testsuite/g++.target/i386/pr112904.C 
b/gcc/testsuite/g++.target/i386/pr112904.C
new file mode 100644
index 000..556be921197
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/pr112904.C
@@ -0,0 +1,27 @@
+typedef _Float16 v4hf __attribute__((vector_size(8)));
+typedef short v4hi __attribute__((vector_size(8)));
+typedef _Float16 v2hf __attribute__((vector_size(4)));
+typedef short v2hi __attribute__((vector_size(4)));
+
+typedef __bf16 v4bf __attribute__((vector_size(8)));
+typedef __bf16 v2bf __attribute__((vector_size(4)));
+
+v4hf foo(v4hf a, v4hf b, v4hi c)
+{
+  return c ? a : b;
+}
+
+v2hf foo1(v2hf a, v2hf b, v2hi c)
+{
+  return c ? a : b;
+}
+
+v4bf foo(v4bf a, v4bf b, v4hi c)
+{
+  return c ? a : b;
+}
+
+v2bf foo1(v2bf a, v2bf b, v2hi c)
+{
+  return c ? a : b;
+}
-- 
2.31.1



[gcc-wwwdocs PATCH] gcc-13/14: Mention recent update for x86_64 backend

2023-12-07 Thread Haochen Jiang
Hi all,

This patch will mention the following changes in wwwdocs for x86_64 backend:

  - AVX10.1 support
  - APX EGPR, PUSH2POP2, PPX and NDD support
  - Xeon Phi ISAs deprecated

Also I adjust the words in x86_64 part for GCC 13. Ok for gcc-wwwdocs?

Thx,
Haochen

Mention AVX10.1 support, APX support and Xeon Phi deprecate in GCC 14.
Also adjust documentation in GCC 13.
---
 htdocs/gcc-13/changes.html | 14 --
 htdocs/gcc-14/changes.html | 18 ++
 2 files changed, 26 insertions(+), 6 deletions(-)

diff --git a/htdocs/gcc-13/changes.html b/htdocs/gcc-13/changes.html
index 8ef3d639..e29ca72e 100644
--- a/htdocs/gcc-13/changes.html
+++ b/htdocs/gcc-13/changes.html
@@ -579,13 +579,13 @@ You may also want to check out our
   
   GCC now supports the Intel CPU named Sierra Forest through
 -march=sierraforest.
-The switch enables the AVX-IFMA, AVX-VNNI-INT8, AVX-NE-CONVERT, CMPccXADD,
-ENQCMD and UINTR ISA extensions.
+Based on ISA extensions enabled on Alder Lake, the switch further enables
+the AVX-IFMA, AVX-VNNI-INT8, AVX-NE-CONVERT, CMPccXADD, ENQCMD and UINTR
+ISA extensions.
   
   GCC now supports the Intel CPU named Grand Ridge through
 -march=grandridge.
-The switch enables the AVX-IFMA, AVX-VNNI-INT8, AVX-NE-CONVERT, CMPccXADD,
-ENQCMD, UINTR and RAO-INT ISA extensions.
+Based on Sierra Forest, the switch further enables RAO-INT ISA extensions.
   
   GCC now supports the Intel CPU named Emerald Rapids through
 -march=emeraldrapids.
@@ -593,11 +593,13 @@ You may also want to check out our
   
   GCC now supports the Intel CPU named Granite Rapids through
 -march=graniterapids.
-The switch enables the AMX-FP16, PREFETCHI ISA extensions.
+Based on Sapphire Rapids, the switch further enables the AMX-FP16 and
+PREFETCHI ISA extensions.
   
   GCC now supports the Intel CPU named Granite Rapids D through
 -march=graniterapids-d.
-The switch enables the AMX-FP16, PREFETCHI and AMX-COMPLEX ISA extensions.
+Based on Granite Rapids, the switch further enables the AMX-COMPLEX ISA
+extensions.
   
   GCC now supports AMD CPUs based on the znver4 core
 via -march=znver4.  The switch makes GCC consider
diff --git a/htdocs/gcc-14/changes.html b/htdocs/gcc-14/changes.html
index 6d7138f8..8590f735 100644
--- a/htdocs/gcc-14/changes.html
+++ b/htdocs/gcc-14/changes.html
@@ -296,6 +296,16 @@ a work-in-progress.
   USER_MSR intrinsics are available via the -muser_msr
   compiler switch.
   
+  New ISA extension support for Intel AVX10.1 was added.
+  AVX10.1 intrinsics are available via the -mavx10.1 or
+  -mavx10.1-256 compiler switch with 256 bit vector size
+  support. 512 bit vector size support for AVX10.1 intrinsics are
+  available via the -mavx10.1-512 compiler switch.
+  
+  Part of new feature support for Intel APX was added, including EGPR,
+  PUSH2POP2, PPX and NDD. APX features are available via the
+  -mapxf compiler switch.
+  
   GCC now supports the Intel CPU named Clearwater Forest through
 -march=clearwaterforest.
 Based on Sierra Forest, the switch further enables the AVX-VNNI-INT16,
@@ -321,6 +331,14 @@ a work-in-progress.
 Based on Arrow Lake S, the switch further enables the PREFETCHI ISA
 extensions.
   
+  Xeon Phi CPUs support (a.k.a. Knight Landing and Knight Mill) are marked
+as deprecated. GCC will emit a warning when using the
+-mavx5124fmaps, -mavx5124vnniw,
+-mavx512er, -mavx512pf,
+-mprefetchwt1, -march=knl,
+-march=knm, -mtune=knl and 
-mtune=knm
+compiler switch. The support will be removed in GCC 15.
+  
 
 
 
-- 
2.31.1



Re: [PATCH] testsuite: scev: expect fail on ilp32

2023-12-07 Thread Richard Biener
On Thu, 7 Dec 2023, Hans-Peter Nilsson wrote:

> > Date: Mon, 4 Dec 2023 12:58:03 +0100 (CET)
> > From: Richard Biener 
> 
> > On Sat, 2 Dec 2023, Hans-Peter Nilsson wrote:
> > > > Date: Fri, 1 Dec 2023 08:07:14 +0100 (CET)
> > > > From: Richard Biener 
> > > > I read from your messages that the testcases pass on arm*-*-*?
> > > Yes: they pass (currently XPASS) on arm-eabi and
> > > arm-unknown-linux-gnueabi, default configurations.  But,
> > > scev-3 and -5 fail with for example -mcpu=cortex-r5
> > 
> > I see.  As said, the testcases test for "cost" things, so that we
> > "regressed" might mean we really "regressed" here.  Even the x86 -m32
> > result is questionable.
> > 
> > Of course whether using a single IV makes sense for all archs is
> > unknown.
> > 
> > Btw, if we turn the testcases into ones that are (sub-)target
> > specific then we want to again use C code as input.
> > 
> > I think at this point we've lost track and I'm juggling between
> > removing the testcases or moving them to a place they succeed
> > (with some specific -mcpu=?)
> > 
> > Richard.
> 
> So to not drop the ball(s) on this, here's a patch with your
> first alternative: remove them.
> 
> Ok?

OK.

Thanks,
Richard.

> -- >8 --
> Subject: [PATCH] testsuite: Remove gcc.dg/tree-ssa/scev-3.c -4.c and 5.c
> 
> These tests were recently xfailed on ilp32 targets though
> passing on almost all ilp32 targets (known exceptions: ia32
> and some arm subtargets).  They've been changed around too
> much to remain useful.
> 
>   PR testsuite/112786
>   * gcc.dg/tree-ssa/scev-3.c, gcc.dg/tree-ssa/scev-4.c,
>   gcc.dg/tree-ssa/scev-5.c: Remove.
> ---
>  gcc/testsuite/gcc.dg/tree-ssa/scev-3.c | 44 ---
>  gcc/testsuite/gcc.dg/tree-ssa/scev-4.c | 49 --
>  gcc/testsuite/gcc.dg/tree-ssa/scev-5.c | 44 ---
>  3 files changed, 137 deletions(-)
>  delete mode 100644 gcc/testsuite/gcc.dg/tree-ssa/scev-3.c
>  delete mode 100644 gcc/testsuite/gcc.dg/tree-ssa/scev-4.c
>  delete mode 100644 gcc/testsuite/gcc.dg/tree-ssa/scev-5.c
> 
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/scev-3.c 
> b/gcc/testsuite/gcc.dg/tree-ssa/scev-3.c
> deleted file mode 100644
> index beea9aed9fe9..
> --- a/gcc/testsuite/gcc.dg/tree-ssa/scev-3.c
> +++ /dev/null
> @@ -1,44 +0,0 @@
> -/* { dg-do compile } */
> -/* { dg-options "-O2 -fgimple -fdump-tree-ivopts" } */
> -
> -int *a_p;
> -int a[1000];
> -
> -void __GIMPLE (ssa,startwith ("loop"))
> -f (int k)
> -{
> -  int i;
> -  int * _1;
> -
> -__BB(2):
> -  i_5 = k_4(D);
> -  if (i_5 <= 999)
> -goto __BB4;
> -  else
> -goto __BB3;
> -
> -__BB(3):
> -  return;
> -
> -__BB(4):
> -  goto __BB5;
> -
> -__BB(5):
> -  i_12 = __PHI (__BB6: i_9, __BB4: i_5);
> -  _1 = [i_12];
> -  a_p = _1;
> -  __MEM  ((int *))[i_12] = 100;
> -  i_9 = i_5 + i_12;
> -  if (i_9 <= 999)
> -goto __BB6;
> -  else
> -goto __BB3;
> -
> -__BB(6):
> -  ;
> -  goto __BB5;
> -
> -}
> -
> -/* Not all 32-bit systems fail this, but several do.  */
> -/* { dg-final { scan-tree-dump-times "" 1 "ivopts" { xfail ilp32 } } } */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/scev-4.c 
> b/gcc/testsuite/gcc.dg/tree-ssa/scev-4.c
> deleted file mode 100644
> index a97f75f81f65..
> --- a/gcc/testsuite/gcc.dg/tree-ssa/scev-4.c
> +++ /dev/null
> @@ -1,49 +0,0 @@
> -/* { dg-do compile } */
> -/* { dg-options "-O2 -fgimple -fdump-tree-ivopts" } */
> -
> -typedef struct {
> -int x;
> -int y;
> -} S;
> -
> -int *a_p;
> -S a[1000];
> -
> -void __GIMPLE (ssa, startwith ("loop"))
> -f (int k)
> -{
> -  int i;
> -  int * _1;
> -
> -__BB(2):
> -  i_5 = k_4(D);
> -  if (i_5 <= 999)
> -goto __BB4;
> -  else
> -goto __BB3;
> -
> -__BB(3):
> -  return;
> -
> -__BB(4):
> -  goto __BB5;
> -
> -__BB(5):
> -  i_12 = __PHI (__BB6: i_9, __BB4: i_5);
> -  _1 = [i_12].y;
> -  a_p = _1;
> -  __MEM  ((int *))[i_12].y = 100;
> -  i_9 = i_5 + i_12;
> -  if (i_9 <= 999)
> -goto __BB6;
> -  else
> -goto __BB3;
> -
> -__BB(6):
> -  ;
> -  goto __BB5;
> -
> -}
> -
> -/* Not all 32-bit systems fail this, but several do.  */
> -/* { dg-final { scan-tree-dump-times "" 1 "ivopts" { xfail ilp32 } } } */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/scev-5.c 
> b/gcc/testsuite/gcc.dg/tree-ssa/scev-5.c
> deleted file mode 100644
> index 08f4260403c4..
> --- a/gcc/testsuite/gcc.dg/tree-ssa/scev-5.c
> +++ /dev/null
> @@ -1,44 +0,0 @@
> -/* { dg-do compile } */
> -/* { dg-options "-O2 -fgimple -fdump-tree-ivopts" } */
> -
> -int *a_p;
> -int a[1000];
> -
> -void __GIMPLE (ssa,startwith ("loop"))
> -f (int k)
> -{
> -  long long int i;
> -  int * _1;
> -
> -__BB(2):
> -  i_5 = (long long int) k_4(D);
> -  if (i_5 <= 999ll)
> -goto __BB4;
> -  else
> -goto __BB3;
> -
> -__BB(3):
> -  return;
> -
> -__BB(4):
> -  goto __BB5;
> -
> -__BB(5):
> -  i_12 = __PHI (__BB6: i_9, __BB4: i_5);
> -  _1 = [i_12];
> -  a_p = _1;
> -  __MEM  ((int *))[i_12] = 100;
> -  i_9 = i_5 + 

Re: [PATCH]

2023-12-07 Thread Richard Biener
On Thu, Dec 7, 2023 at 6:52 PM Alexandre Oliva  wrote:
>
> On Dec  7, 2023, Thomas Schwinge  wrote:
>
> > Thank you for looking into this so promptly!
>
> You're welcome ;-)
>
>
> > during IPA pass: emutls
> > [...]/source-gcc/gcc/testsuite/c-c++-common/strub-unsupported-3.c:18:1: 
> > internal compiler error: in verify_curr_properties, at passes.cc:2198
>
> Aah, this smells a lot like the issue that François-Xavier reported,
> that the following patch is expected to fix.  I'm still regstrapping it
> on x86_64-linux-gnu, after checking that it addressed the symptom on a
> cross compiler to the target for which it had originally been reported.
> Ok to install, once you confirm that it cures these ICEs?
>
>
> strub: skip emutls after strubm errors
>
> The emutls pass requires PROP_ssa, but if the strubm pass (or any
> other pre-SSA pass) issues errors, all of the build_ssa_passes are
> skipped, so the property is not set, but emutls still attempts to run,
> on targets that use it, despite earlier errors, so it hits the
> unsatisfied requirement.
>
> Adjust emutls to be skipped in case of earlier errors.

OK.

>
> for  gcc/ChangeLog
>
> * tree-emutls.cc: Include diagnostic-core.h.
> (pass_ipa_lower_emutls::gate): Skip if errors were seen.
> ---
>  gcc/tree-emutls.cc |3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
>
> diff --git a/gcc/tree-emutls.cc b/gcc/tree-emutls.cc
> index 5dca5a8291356..38de202717a1a 100644
> --- a/gcc/tree-emutls.cc
> +++ b/gcc/tree-emutls.cc
> @@ -35,6 +35,7 @@ along with GCC; see the file COPYING3.  If not see
>  #include "langhooks.h"
>  #include "tree-iterator.h"
>  #include "gimplify.h"
> +#include "diagnostic-core.h" /* for seen_error */
>
>  /* Whenever a target does not support thread-local storage (TLS) natively,
> we can emulate it with some run-time support in libgcc.  This will in
> @@ -841,7 +842,7 @@ public:
>bool gate (function *) final override
>  {
>/* If the target supports TLS natively, we need do nothing here.  */
> -  return !targetm.have_tls;
> +  return !targetm.have_tls && !seen_error ();
>  }
>
>unsigned int execute (function *) final override
>
>
> --
> Alexandre Oliva, happy hackerhttps://FSFLA.org/blogs/lxo/
>Free Software Activist   GNU Toolchain Engineer
> More tolerance and less prejudice are key for inclusion and diversity
> Excluding neuro-others for not behaving ""normal"" is *not* inclusive


[Committed] RISC-V: Remove redundant check of better_main_loop_than_p in COST model

2023-12-07 Thread Juzhe-Zhong
Since loop vectorizer won't call better_main_loop_than_p if 
!flag_vect_cost_model.

Committed as it is obvious.

gcc/ChangeLog:

* config/riscv/riscv-vector-costs.cc (costs::better_main_loop_than_p): 
Remove redundant check.

---
 gcc/config/riscv/riscv-vector-costs.cc | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/gcc/config/riscv/riscv-vector-costs.cc 
b/gcc/config/riscv/riscv-vector-costs.cc
index 8036c9c40d7..c062c12a263 100644
--- a/gcc/config/riscv/riscv-vector-costs.cc
+++ b/gcc/config/riscv/riscv-vector-costs.cc
@@ -630,9 +630,6 @@ costs::better_main_loop_than_p (const vector_costs 
*uncast_other) const
 {
   auto other = static_cast (uncast_other);
 
-  if (!flag_vect_cost_model)
-return vector_costs::better_main_loop_than_p (other);
-
   if (riscv_autovec_lmul == RVV_DYNAMIC)
 {
   bool post_dom_available_p = dom_info_available_p (CDI_POST_DOMINATORS);
-- 
2.36.3



Re: [PATCH] i386: Mark Xeon Phi ISAs as deprecated

2023-12-07 Thread Hongtao Liu
On Wed, Dec 6, 2023 at 3:52 PM Richard Biener
 wrote:
>
> On Wed, Dec 6, 2023 at 3:33 AM Jiang, Haochen  wrote:
> >
> > > -Original Message-
> > > From: Jiang, Haochen
> > > Sent: Friday, December 1, 2023 4:51 PM
> > > To: Richard Biener 
> > > Cc: gcc-patches@gcc.gnu.org; Liu, Hongtao ;
> > > ubiz...@gmail.com
> > > Subject: RE: [PATCH] i386: Mark Xeon Phi ISAs as deprecated
> > >
> > > > -Original Message-
> > > > From: Richard Biener 
> > > > Sent: Friday, December 1, 2023 4:37 PM
> > > > To: Jiang, Haochen 
> > > > Cc: gcc-patches@gcc.gnu.org; Liu, Hongtao ;
> > > > ubiz...@gmail.com
> > > > Subject: Re: [PATCH] i386: Mark Xeon Phi ISAs as deprecated
> > > >
> > > > On Fri, Dec 1, 2023 at 8:34 AM Jiang, Haochen 
> > > > wrote:
> > > > >
> > > > > > -Original Message-
> > > > > > From: Richard Biener 
> > > > > > Sent: Friday, December 1, 2023 3:04 PM
> > > > > > To: Jiang, Haochen 
> > > > > > Cc: gcc-patches@gcc.gnu.org; Liu, Hongtao ;
> > > > > > ubiz...@gmail.com
> > > > > > Subject: Re: [PATCH] i386: Mark Xeon Phi ISAs as deprecated
> > > > > >
> > > > > > On Fri, Dec 1, 2023 at 3:22 AM Haochen Jiang
> > > 
> > > > > > wrote:
> > > > > > >
> > > > > > > Since Knight Landing and Knight Mill microarchitectures are EOL, 
> > > > > > > we
> > > > > > > would like to remove its support in GCC 15. In GCC 14, we will 
> > > > > > > first
> > > > > > > emit a warning for the usage.
> > > > > >
> > > > > > I think it's better to keep supporting -mtune/arch=knl without 
> > > > > > diagnostics
> > > > >
> > > > > I see, it could be a choice and might be better. But if we take this, 
> > > > > how
> > > should
> > > > > we define -mtune=knl remains a question.
> > > >
> > > > I'd say mapping it to a "close" micro-architecture makes most sense, but
> > > > we could also simply keep the tuning entry for knl?
> > >
> > > Actually I have written a removal test patch, one of the issue might be 
> > > there is
> > > something specific about knl in tuning for VZEROUPPER, which is also 
> > > reflected
> > > in
> > > PR82990.
> > >
> > > /* X86_TUNE_EMIT_VZEROUPPER: This enables vzeroupper instruction
> > > insertion
> > >before a transfer of control flow out of the function.  */
> > > DEF_TUNE (X86_TUNE_EMIT_VZEROUPPER, "emit_vzeroupper", ~m_KNL)
> > >
> > > If we chose to keep them, this behavior will be changed.
> >
> > Hi Richard,
> >
> > After double thinking, I suppose we still should remove the arch/tune 
> > options
> > here to avoid misleading behavior since there will always something be 
> > changed.
> >
> > What is your concern about removing? Do you have anything that relies on the
> > tune and arch?
>
> We usually promise backwards compatibility with respect to accepted options
> which is why we have things like
>
> ftree-vect-loop-version
> Common Ignore
> Does nothing. Preserved for backward compatibility.
>
> the backend errors on unknown march/tune and that would be a regression
> for build systems using that (even if that's indeed very unlikely).  That's 
> why
> I suggested to make it still do something (doing "nothing", aka keeping 
> generic
> is probably worse than dropping).  I guess having -march=knl behave 
> differently
> is also bad so I guess there's not a good solution for that.
To avoid confusion,  I prefer to remove all of them.
>
> So - just to have made the above point, I'm fine with what x86 maintainers
> decide here.
>
> Richard.
>
> > Thx,
> > Haochen
> >
> > >
> > > >
> > > > > > but simply not enable the ISAs we don't support.  The better 
> > > > > > question is
> > > > > > what to do about KNL specific intrinsics headers / intrinsics?  
> > > > > > Will we
> > > > > > simply remove those?
> > > > >
> > > > > If there is no objection, The intrinsics are planned to be removed in 
> > > > > GCC 15.
> > > > > As far as concerned, almost nobody are using them with the latest GCC.
> > > And
> > > > > there is no complaint when removing them in ICC/ICX.
> > > >
> > > > I see.  Replacing the header contents with #error "XYZ is no longer
> > > supported"
> > > > might be nicer.  OTOH x86intrin.h should simply no longer include them.
> > >
> > > That is nicer. I will take that in GCC 15 patch.
> > >
> > > Thx,
> > > Haochen
> > >
> > > >
> > > > Richard.
> > > >
> > > > > Thx,
> > > > > Haochen
> > > > >
> > > > > >
> > > > > > Richard.
> > > > > >
> > > > > > > gcc/ChangeLog:
> > > > > > >
> > > > > > > * config/i386/driver-i386.cc (host_detect_local_cpu):
> > > > > > > Do not append "-mno-" for Xeon Phi ISAs.
> > > > > > > * config/i386/i386-options.cc 
> > > > > > > (ix86_option_override_internal):
> > > > > > > Emit a warning for KNL/KNM targets.
> > > > > > > * config/i386/i386.opt: Emit a warning for Xeon Phi ISAs.
> > > > > > >
> > > > > > > gcc/testsuite/ChangeLog:
> > > > > > >
> > > > > > > * g++.dg/other/i386-2.C: Adjust testcases.
> > > > > > > * g++.dg/other/i386-3.C: Ditto.
> > > > > > >   

Re: Re: [PATCH][v2] RISC-V: Add avail interface into function_group_info

2023-12-07 Thread Feng Wang
2023-12-08 11:35 Kito Cheng  wrote:



>> Without comma, there is a warning "warning: ISO C++11 requires at least one 
>> argument for the "..." in a variadic macro".
>> So I add the comma.
>
>Oh, thanks, I wasn't aware that, maybe take one more argument to '...'
>
>#define DEF_RVV_FUNCTION(NAME, SHAPE, PREDS, ...)
>
>Anyway my intention is to reduce unnecessary diff as possible.

Hi Kito, I understand. 
So I tried to many ways want to minimize this modification, I have also tried 
the method you mentioned,there is still a warning.
But just a warning won't affect the functionality.If you think it's okay, then 
there's no need to modify the original definition.

Re: [PATCH] LoongArch: Allow -mcmodel=extreme and model attribute with -mexplicit-relocs=auto

2023-12-07 Thread Jinyang He

On 2023-12-08 10:04, chenglulu wrote:



在 2023/12/7 下午8:20, Xi Ruoyao 写道:

There seems no real reason to require -mexplicit-relocs=always for
-mcmodel=extreme or model attribute.  As the linker does not know how to
relax a 3-operand la.local or la.global pseudo instruction, just emit
explicit relocs for SYMBOL_PCREL64, and under TARGET_CMODEL_EXTREME also
SYMBOL_GOT_DISP.


I think there is no problem, HeJinyang is testing and if there is no 
problem with the


test play, you can merge in.


Thanks!



Thanks! Now I can compile linux kernel with `-mexplicit-relocs=auto`,
and instructions is as expected. (kernel relax some pcalau12i+addi.d
pair to pcaddi and module still access percpu var by 64bits-la.pcrel)



Re: Re: [PATCH][v2] RISC-V: Add avail interface into function_group_info

2023-12-07 Thread Kito Cheng
> Without comma, there is a warning "warning: ISO C++11 requires at least one 
> argument for the "..." in a variadic macro".
> So I add the comma.

Oh, thanks, I wasn't aware that, maybe take one more argument to '...'

#define DEF_RVV_FUNCTION(NAME, SHAPE, PREDS, ...)

Anyway my intention is to reduce unnecessary diff as possible.


Re: [PATCH] tree-optimization/PR112774 - SCEV: extend the chrec tree with a nonwrapping flag

2023-12-07 Thread Hao Liu OS
> Yes, I can see that.  I think the patch is OK with a minor nit - can you
> please document the nothrow flag usage in TREE_CHREC in
> tree-core.h?  There's a big comment doing flags documentation:

Thanks, committed with the new documentation:
https://gcc.gnu.org/g:2efe3a7de0107618397264017fb045f237764cc7

Thanks,
Hao.


From: Richard Biener 
Sent: Thursday, December 7, 2023 22:12
To: Hao Liu OS
Cc: GCC-patches@gcc.gnu.org
Subject: Re: [PATCH] tree-optimization/PR112774 - SCEV: extend the chrec tree 
with a nonwrapping flag

On Thu, Dec 7, 2023 at 9:59 AM Hao Liu OS  wrote:
>
> > Can you try to do some statistics on say SPEC CPU?  I'm usually
> > building (with -j1) with -fopt-info-vec and diff build logs, you can then 
> > see
> > how many more loops (and which) we vectorize additionally?
>
> I tried this option with SPEC2017 intrate+fprate and count the "optimized: "
> lines. Five more loops are vectorized (aarch64-linux-gnu: O3/Ofast for 
> int/fp):
>
> | O3/Ofast| before | after | additionally vectorized |
> | --- | -- | - | --- |
> | 502.gcc_r   | 1075   | 1076  | reload1.c:1934  |
> | 510.parest_r| 9818   | 9819  | fe_dgp_monomial.cc:104  |
> | 523.xalancbmk_r | 4791   | 4824  | XMLReader.cpp:1650  |
> | 526.blender_r   | 4520   | 4522  | infback.c:441,inflate.c:983 |
>
> All of them access arrays with unsigned offsets, which are previously thought
> can be overflow. E.g., the case in 502.gcc:
>
> unsigned int i;
> unsigned int this_nregs = ...;
>
> for (j = 1; j < this_nregs; j++)
>   {
> this_cost += spill_add_cost[regno + j];
> if ((TEST_HARD_REG_BIT (not_usable, regno + j))
>   || TEST_HARD_REG_BIT (used_by_other_reload, regno + j))
>   ok = 0;
>   }
>
> However, as they are not hot, the performance is not affected. I measured the 
> build time, which is also not affected. With "-flto", more benchmarks (12 in 
> total) will be affected (details are not analyzed):
>
> | O3/Ofast + -flto | before | after | diff |
> |  | -- | - |  |
> | 502.gcc_r| 979| 980   | +1   |
> | 507.cactuBSSN_r  | 3454   | 3458  | +4   |
> | 508.namd_r   | 858| 857   | -1   |
> | 510.parest_r | 1575   | 1577  | +2   |
> | 511.povray_r | 810| 812   | +2   |
> | 521.wrf_r| 8769   | 8763  | -6   |
> | 523.xalancbmk_r  | 3959   | 3979  | +20  |
> | 526.blender_r| 4580   | 4575  | -5   |
> | 527.cam4_r   | 2371   | 2370  | -1   |
> | 538.imagick_r| 462| 461   | -1   |
> | 549.fotonik3d_r  | 436| 437   | +1   |
> | 554.roms_r   | 852| 851   | -1   |
>
> I think using unsigned index to access array should be rare. Programmers
> tend to use "for (int i; ...)" instead of unsigned values. But there may be
> special requirements. This opportunity is found in a real application, which
> has hot loops with such unsigned access pattern, and it can get huge
> improvements.

Yes, I can see that.  I think the patch is OK with a minor nit - can you
please document the nothrow flag usage in TREE_CHREC in
tree-core.h?  There's a big comment doing flags documentation:


/* The following table lists the uses of each of the above flags and
   for which types of nodes they are defined.
...

OK with that change.   Let's see how it goes ...

Thanks,
Richard.

> Thanks,
> Hao
>
> 
> From: Richard Biener 
> Sent: Wednesday, December 6, 2023 19:49
> To: Hao Liu OS
> Cc: GCC-patches@gcc.gnu.org
> Subject: Re: [PATCH] tree-optimization/PR112774 - SCEV: extend the chrec tree 
> with a nonwrapping flag
>
> On Wed, Dec 6, 2023 at 10:46 AM Hao Liu OS  
> wrote:
> >
> > Hi,
> >
> > Update the patch to fix problems in the test case:
> >  - add "-details" option to the dump command
> >  - add dg-require and target filters to avoid potential failures on 
> > platforms that don't support vectorization.
>
> Interesting simple trick - the downside is that this makes the
> recursive dependence
> of SCEV on niter analysis and niter analysis on SCEV even "worse".  Also you
> set the flag on CHRECs that are not necessarily cached, so I'm not sure how
> effective this will be ...
>
> Can you try to do some statistics on say SPEC CPU?  I'm usually
> building (with -j1) with -fopt-info-vec and diff build logs, you can then see
> how many more loops (and which) we vectorize additionally?
>
> Thanks,
> Richard.
>
> > Thanks,
> > -Hao
> >
> > gcc/ChangeLog:
> >
> > PR tree-optimization/112774
> > * tree-pretty-print.cc: if nonwrapping flag is set, chrec will be
> > printed with additional  info.
> > * tree-scalar-evolution.cc: add record_nonwrapping_chrec and
> > nonwrapping_chrec_p to set and check the new flag respectively.
> > * 

Re: [PATCH V3 2/3] Using pli for constant splitting

2023-12-07 Thread Jiufu Guo


Hi,

Thanks for your insight and helpful review!

"Kewen.Lin"  writes:

> Hi Jeff,
>
> on 2023/12/6 13:24, Jiufu Guo wrote:
>> Hi,
>> 
>> For constant building e.g. r120=0x, which does not fit 'li or lis',
>> 'pli' is used to build this constant via 'emit_move_insn'.
>> 
>> While for a complicated constant, e.g. 0xULL, when using
>> 'rs6000_emit_set_long_const' to split the constant recursively, it fails to
>> use 'pli' to build the half part constant: 0x.
>> 
>> 'rs6000_emit_set_long_const' could be updated to use 'pli' to build half
>> part of the constant when necessary.  For example: 0xULL,
>> "pli 3,1717986918; rldimi 3,3,32,0" can be used.
>> 
>> Compare with previous:
>> https://gcc.gnu.org/pipermail/gcc-patches/2023-November/636567.html
>> This verion is refreshed and added with a new testcase.
>> 
>> Bootstrap pass on ppc64{,le}.
>> Is this ok for trunk?
>> 
>> BR,
>> Jeff (Jiufu Guo)
>> 
>> gcc/ChangeLog:
>> 
>>  * config/rs6000/rs6000.cc (rs6000_emit_set_long_const): Add code to use
>>  pli for 34bit constant.
>> 
>> gcc/testsuite/ChangeLog:
>> 
>>  * gcc.target/powerpc/const_split_pli.c: New test.
>
> Nit: Now we have:
>
> gcc/testsuite/gcc.target/powerpc/const-build.c
> gcc/testsuite/gcc.target/powerpc/const_anchors.c
> gcc/testsuite/gcc.target/powerpc/const-compare.c
>
> I prefer the name of this new case is like const-build-1.c
> (put a detailed comment inside) or const-build-split-pli.c,
> to align with the existing.
Thanks!
>
>> 
>> ---
>>  gcc/config/rs6000/rs6000.cc| 7 +++
>>  gcc/testsuite/gcc.target/powerpc/const_split_pli.c | 9 +
>>  2 files changed, 16 insertions(+)
>>  create mode 100644 gcc/testsuite/gcc.target/powerpc/const_split_pli.c
>> 
>> diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
>> index dbdc72dce5d..2e074a21a05 100644
>> --- a/gcc/config/rs6000/rs6000.cc
>> +++ b/gcc/config/rs6000/rs6000.cc
>> @@ -10509,6 +10509,13 @@ rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT 
>> c, int *num_insns)
>> GEN_INT (0x)));
>>};
>>  
>> +  if (TARGET_PREFIXED && SIGNED_INTEGER_34BIT_P (c))
>> +{
>> +  /* li/lis/pli */
>> +  count_or_emit_insn (dest, GEN_INT (c));
>> +  return;
>> +}
>> +
>>if ((ud4 == 0x && ud3 == 0x && ud2 == 0x && (ud1 & 0x8000))
>>|| (ud4 == 0 && ud3 == 0 && ud2 == 0 && !(ud1 & 0x8000)))
>>  {
>> diff --git a/gcc/testsuite/gcc.target/powerpc/const_split_pli.c 
>> b/gcc/testsuite/gcc.target/powerpc/const_split_pli.c
>> new file mode 100644
>> index 000..626c93084aa
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/powerpc/const_split_pli.c
>> @@ -0,0 +1,9 @@
>> +/* { dg-do compile { target lp64 } } */
>> +/* { dg-options "-O2" } */
>
> It needs -mdejagnu-cpu=power10 as well.
Yeap, thanks.
>
>> +/* { dg-require-effective-target power10_ok } */
>> +
>> +unsigned long long msk66() { return 0xULL; }
>> +
>> +/* { dg-final { scan-assembler-times {\mpli\M} 1 } } */
>> +/* { dg-final { scan-assembler-not {\mli\M} } } */
>> +/* { dg-final { scan-assembler-not {\mlis\M} } } */
>
> OK for trunk with the above nits tweaked, thanks!
>
> BR,
> Kewen


Re: [PATCH V3 1/3]rs6000: update num_insns_constant for 2 insns

2023-12-07 Thread Jiufu Guo


Hi,

Thanks for your always kind and helpful review!!

"Kewen.Lin"  writes:

> Hi Jeff,
>
> on 2023/12/6 13:24, Jiufu Guo wrote:
>> Hi,
>> 
>> Trunk gcc supports more constants to be built via two instructions:
>> e.g. "li/lis; xori/xoris/rldicl/rldicr/rldic".
>> And then num_insns_constant should also be updated.
>> 
>> Function "rs6000_emit_set_long_const" is used to build complicated
>> constants; and "num_insns_constant_gpr" is used to compute 'how
>> many instructions are needed" to build the constant. So, these 
>> two functions should be aligned.
>> 
>> The idea of this patch is: to reuse "rs6000_emit_set_long_const" to
>> compute/record the instruction number(when computing the insn_num, 
>> then do not emit instructions).
>> 
>> Compare with the previous version:
>> https://gcc.gnu.org/pipermail/gcc-patches/2023-November/636565.html
>> This version updates "rs6000_emit_set_long_const" to use a condition
>> if to select either "computing insn number" or "emitting the insn".
>> And put them together to avoid misalign in the future.
>> 
>> Bootstrap & regtest pass ppc64{,le}.
>> Is this ok for trunk?
>> 
>> BR,
>> Jeff (Jiufu Guo)
>> 
>> gcc/ChangeLog:
>> 
>>  * config/rs6000/rs6000.cc (rs6000_emit_set_long_const): Add new
>>  parameter to record number of instructions to build the constant.
>>  (num_insns_constant_gpr): Call rs6000_emit_set_long_const to compute
>>  num_insn.
>> 
>> ---
>>  gcc/config/rs6000/rs6000.cc | 272 ++--
>>  1 file changed, 137 insertions(+), 135 deletions(-)
>> 
>> diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
>> index 3dfd79c4c43..dbdc72dce5d 100644
>> --- a/gcc/config/rs6000/rs6000.cc
>> +++ b/gcc/config/rs6000/rs6000.cc
>> @@ -1115,7 +1115,7 @@ static tree rs6000_handle_longcall_attribute (tree *, 
>> tree, tree, int, bool *);
>>  static tree rs6000_handle_altivec_attribute (tree *, tree, tree, int, bool 
>> *);
>>  static tree rs6000_handle_struct_attribute (tree *, tree, tree, int, bool 
>> *);
>>  static tree rs6000_builtin_vectorized_libmass (combined_fn, tree, tree);
>> -static void rs6000_emit_set_long_const (rtx, HOST_WIDE_INT);
>> +static void rs6000_emit_set_long_const (rtx, HOST_WIDE_INT, int * = 
>> nullptr);
>>  static int rs6000_memory_move_cost (machine_mode, reg_class_t, bool);
>>  static bool rs6000_debug_rtx_costs (rtx, machine_mode, int, int, int *, 
>> bool);
>>  static int rs6000_debug_address_cost (rtx, machine_mode, addr_space_t,
>> @@ -6054,21 +6054,9 @@ num_insns_constant_gpr (HOST_WIDE_INT value)
>>  
>>else if (TARGET_POWERPC64)
>>  {
>> -  HOST_WIDE_INT low = sext_hwi (value, 32);
>> -  HOST_WIDE_INT high = value >> 31;
>> -
>> -  if (high == 0 || high == -1)
>> -return 2;
>> -
>> -  high >>= 1;
>> -
>> -  if (low == 0 || low == high)
>> -return num_insns_constant_gpr (high) + 1;
>> -  else if (high == 0)
>> -return num_insns_constant_gpr (low) + 1;
>> -  else
>> -return (num_insns_constant_gpr (high)
>> -+ num_insns_constant_gpr (low) + 1);
>> +  int num_insns = 0;
>> +  rs6000_emit_set_long_const (NULL, value, _insns);
>
> Nit: Maybe nullptr to align with the others in this patch?
ok.
>
>> +  return num_insns;
>>  }
>>  
>>else
>> @@ -10494,14 +10482,13 @@ can_be_built_by_li_and_rldic (HOST_WIDE_INT c, int 
>> *shift, HOST_WIDE_INT *mask)
>>  
>>  /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode.
>> Output insns to set DEST equal to the constant C as a series of
>> -   lis, ori and shl instructions.  */
>> +   lis, ori and shl instructions.  If NUM_INSNS is not NULL, then
>> +   only increase *NUM_INSNS as the number of insns, and do not output
>> +   real insns.  */
>
> Nit: Maybe s/output real/emit any/.
Thanks.
>
>>  
>>  static void
>> -rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c)
>> +rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c, int *num_insns)
>>  {
>> -  rtx temp;
>> -  int shift;
>> -  HOST_WIDE_INT mask;
>>HOST_WIDE_INT ud1, ud2, ud3, ud4;
>>  
>>ud1 = c & 0x;
>> @@ -10509,168 +10496,183 @@ rs6000_emit_set_long_const (rtx dest, 
>> HOST_WIDE_INT c)
>>ud3 = (c >> 32) & 0x;
>>ud4 = (c >> 48) & 0x;
>>  
>> -  if ((ud4 == 0x && ud3 == 0x && ud2 == 0x && (ud1 & 0x8000))
>> -  || (ud4 == 0 && ud3 == 0 && ud2 == 0 && ! (ud1 & 0x8000)))
>> -emit_move_insn (dest, GEN_INT (sext_hwi (ud1, 16)));
>> +  /* This lambda is used to emit one insn or just increase the insn count.
>> + When counting the insn number, no need to emit the insn.  Here, two
>> + kinds of insns are needed: move and rldimi. */
>
> Can we make the latter a bit more generic?  Like something below?
Great sugguestion! Thanks.
>
>> +  auto count_or_emit_insn = [_insns] (rtx dest, rtx op1, rtx op2 = 
>> NULL) {
>> +if (num_insns)
>> +  (*num_insns)++;
>
> Nit: Make it early return.
ok.
>
>> +else if (!op2)
>> +  

Re: Re: [PATCH][v2] RISC-V: Add avail interface into function_group_info

2023-12-07 Thread Feng Wang
2023-12-08 11:18 Kito Cheng  wrote:



>> @@ -31,624 +31,627 @@ along with GCC; see the file COPYING3. If not see



>>   - OPS_INFO describes all information of return type and each



>> argument type.



>>



>> + - AVAIL this argument is the optional for AVAIL.Determin the enable



>> +   of the intrinsic function.



>> +



>>  */



>>  #ifndef DEF_RVV_FUNCTION



>> -#define DEF_RVV_FUNCTION(NAME, SHAPE, PREDS, OPS_INFO)



>> +#define DEF_RVV_FUNCTION(NAME, SHAPE, PREDS, OPS_INFO, AVAIL)



>



>



>#define DEF_RVV_FUNCTION(NAME, SHAPE, PREDS, OPS_INFO, ...)



>



>>  #endif



>>



>>  /* Internal helper functions for gimple fold use.  */



>> -DEF_RVV_FUNCTION (read_vl, read_vl, none_preds, p_none_void_ops)



>> -DEF_RVV_FUNCTION (vlenb, vlenb, none_preds, ul_none_void_ops)



>> +DEF_RVV_FUNCTION (read_vl, read_vl, none_preds, p_none_void_ops, )



>> +DEF_RVV_FUNCTION (vlenb, vlenb, none_preds, ul_none_void_ops, )



>



>And keep this unchanged, you don't need extra comma.



>

Without comma, there is a warning "warning: ISO C++11 requires at least one 
argument for the "..." in a variadic macro".
So I add the comma.

>DEF_RVV_FUNCTION (read_vl, read_vl, none_preds, p_none_void_ops)



>DEF_RVV_FUNCTION (vlenb, vlenb, none_preds, ul_none_void_ops)



>



>>



>>  /* 6. Configuration-Setting Instructions.  */



>>



>> -DEF_RVV_FUNCTION (vsetvl, vsetvl, none_preds, i_none_size_size_ops)



>> -DEF_RVV_FUNCTION (vsetvlmax, vsetvlmax, none_preds, i_none_size_void_ops)



>> +DEF_RVV_FUNCTION (vsetvl, vsetvl, none_preds, i_none_size_size_ops, )



>> +DEF_RVV_FUNCTION (vsetvlmax, vsetvlmax, none_preds, i_none_size_void_ops, )



>>



>>  /* 7. Vector Loads and Stores. */



>>



>>  // 7.4. Vector Unit-Stride Instructions



>> -DEF_RVV_FUNCTION (vle, loadstore, full_preds, all_v_scalar_const_ptr_ops)



>> -DEF_RVV_FUNCTION (vse, loadstore, none_m_preds, all_v_scalar_ptr_ops)



>> -DEF_RVV_FUNCTION (vlm, loadstore, none_preds, b_v_scalar_const_ptr_ops)



>> -DEF_RVV_FUNCTION (vsm, loadstore, none_preds, b_v_scalar_ptr_ops)



>> +DEF_RVV_FUNCTION (vle, loadstore, full_preds, all_v_scalar_const_ptr_ops, )



>> +DEF_RVV_FUNCTION (vse, loadstore, none_m_preds, all_v_scalar_ptr_ops, )



>> +DEF_RVV_FUNCTION (vlm, loadstore, none_preds, b_v_scalar_const_ptr_ops, )



>> +DEF_RVV_FUNCTION (vsm, loadstore, none_preds, b_v_scalar_ptr_ops, )



>>



>>  // 7.5. Vector Strided Instructions



>> -DEF_RVV_FUNCTION (vlse, loadstore, full_preds, 
>> all_v_scalar_const_ptr_ptrdiff_ops)



>> -DEF_RVV_FUNCTION (vsse, loadstore, none_m_preds, 
>> all_v_scalar_ptr_ptrdiff_ops)



>> +DEF_RVV_FUNCTION (vlse, loadstore, full_preds, 
>> all_v_scalar_const_ptr_ptrdiff_ops, )



>> +DEF_RVV_FUNCTION (vsse, loadstore, none_m_preds, 
>> all_v_scalar_ptr_ptrdiff_ops, )



>>



>>  // 7.6. Vector Indexed Instructions



>> -DEF_RVV_FUNCTION (vluxei8, indexed_loadstore, full_preds, 
>> all_v_scalar_const_ptr_eew8_index_ops)



>> -DEF_RVV_FUNCTION (vluxei16, indexed_loadstore, full_preds, 
>> all_v_scalar_const_ptr_eew16_index_ops)



>> -DEF_RVV_FUNCTION (vluxei32, indexed_loadstore, full_preds, 
>> all_v_scalar_const_ptr_eew32_index_ops)



>> -DEF_RVV_FUNCTION (vluxei64, indexed_loadstore, full_preds, 
>> all_v_scalar_const_ptr_eew64_index_ops)



>> -DEF_RVV_FUNCTION (vloxei8, indexed_loadstore, full_preds, 
>> all_v_scalar_const_ptr_eew8_index_ops)



>> -DEF_RVV_FUNCTION (vloxei16, indexed_loadstore, full_preds, 
>> all_v_scalar_const_ptr_eew16_index_ops)



>> -DEF_RVV_FUNCTION (vloxei32, indexed_loadstore, full_preds, 
>> all_v_scalar_const_ptr_eew32_index_ops)



>> -DEF_RVV_FUNCTION (vloxei64, indexed_loadstore, full_preds, 
>> all_v_scalar_const_ptr_eew64_index_ops)



>> -DEF_RVV_FUNCTION (vsuxei8, indexed_loadstore, none_m_preds, 
>> all_v_scalar_ptr_eew8_index_ops)



>> -DEF_RVV_FUNCTION (vsuxei16, indexed_loadstore, none_m_preds, 
>> all_v_scalar_ptr_eew16_index_ops)



>> -DEF_RVV_FUNCTION (vsuxei32, indexed_loadstore, none_m_preds, 
>> all_v_scalar_ptr_eew32_index_ops)



>> -DEF_RVV_FUNCTION (vsuxei64, indexed_loadstore, none_m_preds, 
>> all_v_scalar_ptr_eew64_index_ops)



>> -DEF_RVV_FUNCTION (vsoxei8, indexed_loadstore, none_m_preds, 
>> all_v_scalar_ptr_eew8_index_ops)



>> -DEF_RVV_FUNCTION (vsoxei16, indexed_loadstore, none_m_preds, 
>> all_v_scalar_ptr_eew16_index_ops)



>> -DEF_RVV_FUNCTION (vsoxei32, indexed_loadstore, none_m_preds, 
>> all_v_scalar_ptr_eew32_index_ops)



>> -DEF_RVV_FUNCTION (vsoxei64, indexed_loadstore, none_m_preds, 
>> all_v_scalar_ptr_eew64_index_ops)



>> +DEF_RVV_FUNCTION (vluxei8, indexed_loadstore, full_preds, 
>> all_v_scalar_const_ptr_eew8_index_ops, )



>> +DEF_RVV_FUNCTION (vluxei16, indexed_loadstore, full_preds, 
>> all_v_scalar_const_ptr_eew16_index_ops, )



>> +DEF_RVV_FUNCTION (vluxei32, indexed_loadstore, full_preds, 
>> all_v_scalar_const_ptr_eew32_index_ops, )



>> +DEF_RVV_FUNCTION (vluxei64, 

Re: [PATCH][v2] RISC-V: Add avail interface into function_group_info

2023-12-07 Thread Kito Cheng
> @@ -31,624 +31,627 @@ along with GCC; see the file COPYING3. If not see
>   - OPS_INFO describes all information of return type and each
> argument type.
>
> + - AVAIL this argument is the optional for AVAIL.Determin the enable
> +   of the intrinsic function.
> +
>  */
>  #ifndef DEF_RVV_FUNCTION
> -#define DEF_RVV_FUNCTION(NAME, SHAPE, PREDS, OPS_INFO)
> +#define DEF_RVV_FUNCTION(NAME, SHAPE, PREDS, OPS_INFO, AVAIL)


#define DEF_RVV_FUNCTION(NAME, SHAPE, PREDS, OPS_INFO, ...)

>  #endif
>
>  /* Internal helper functions for gimple fold use.  */
> -DEF_RVV_FUNCTION (read_vl, read_vl, none_preds, p_none_void_ops)
> -DEF_RVV_FUNCTION (vlenb, vlenb, none_preds, ul_none_void_ops)
> +DEF_RVV_FUNCTION (read_vl, read_vl, none_preds, p_none_void_ops, )
> +DEF_RVV_FUNCTION (vlenb, vlenb, none_preds, ul_none_void_ops, )

And keep this unchanged, you don't need extra comma.

DEF_RVV_FUNCTION (read_vl, read_vl, none_preds, p_none_void_ops)
DEF_RVV_FUNCTION (vlenb, vlenb, none_preds, ul_none_void_ops)

>
>  /* 6. Configuration-Setting Instructions.  */
>
> -DEF_RVV_FUNCTION (vsetvl, vsetvl, none_preds, i_none_size_size_ops)
> -DEF_RVV_FUNCTION (vsetvlmax, vsetvlmax, none_preds, i_none_size_void_ops)
> +DEF_RVV_FUNCTION (vsetvl, vsetvl, none_preds, i_none_size_size_ops, )
> +DEF_RVV_FUNCTION (vsetvlmax, vsetvlmax, none_preds, i_none_size_void_ops, )
>
>  /* 7. Vector Loads and Stores. */
>
>  // 7.4. Vector Unit-Stride Instructions
> -DEF_RVV_FUNCTION (vle, loadstore, full_preds, all_v_scalar_const_ptr_ops)
> -DEF_RVV_FUNCTION (vse, loadstore, none_m_preds, all_v_scalar_ptr_ops)
> -DEF_RVV_FUNCTION (vlm, loadstore, none_preds, b_v_scalar_const_ptr_ops)
> -DEF_RVV_FUNCTION (vsm, loadstore, none_preds, b_v_scalar_ptr_ops)
> +DEF_RVV_FUNCTION (vle, loadstore, full_preds, all_v_scalar_const_ptr_ops, )
> +DEF_RVV_FUNCTION (vse, loadstore, none_m_preds, all_v_scalar_ptr_ops, )
> +DEF_RVV_FUNCTION (vlm, loadstore, none_preds, b_v_scalar_const_ptr_ops, )
> +DEF_RVV_FUNCTION (vsm, loadstore, none_preds, b_v_scalar_ptr_ops, )
>
>  // 7.5. Vector Strided Instructions
> -DEF_RVV_FUNCTION (vlse, loadstore, full_preds, 
> all_v_scalar_const_ptr_ptrdiff_ops)
> -DEF_RVV_FUNCTION (vsse, loadstore, none_m_preds, 
> all_v_scalar_ptr_ptrdiff_ops)
> +DEF_RVV_FUNCTION (vlse, loadstore, full_preds, 
> all_v_scalar_const_ptr_ptrdiff_ops, )
> +DEF_RVV_FUNCTION (vsse, loadstore, none_m_preds, 
> all_v_scalar_ptr_ptrdiff_ops, )
>
>  // 7.6. Vector Indexed Instructions
> -DEF_RVV_FUNCTION (vluxei8, indexed_loadstore, full_preds, 
> all_v_scalar_const_ptr_eew8_index_ops)
> -DEF_RVV_FUNCTION (vluxei16, indexed_loadstore, full_preds, 
> all_v_scalar_const_ptr_eew16_index_ops)
> -DEF_RVV_FUNCTION (vluxei32, indexed_loadstore, full_preds, 
> all_v_scalar_const_ptr_eew32_index_ops)
> -DEF_RVV_FUNCTION (vluxei64, indexed_loadstore, full_preds, 
> all_v_scalar_const_ptr_eew64_index_ops)
> -DEF_RVV_FUNCTION (vloxei8, indexed_loadstore, full_preds, 
> all_v_scalar_const_ptr_eew8_index_ops)
> -DEF_RVV_FUNCTION (vloxei16, indexed_loadstore, full_preds, 
> all_v_scalar_const_ptr_eew16_index_ops)
> -DEF_RVV_FUNCTION (vloxei32, indexed_loadstore, full_preds, 
> all_v_scalar_const_ptr_eew32_index_ops)
> -DEF_RVV_FUNCTION (vloxei64, indexed_loadstore, full_preds, 
> all_v_scalar_const_ptr_eew64_index_ops)
> -DEF_RVV_FUNCTION (vsuxei8, indexed_loadstore, none_m_preds, 
> all_v_scalar_ptr_eew8_index_ops)
> -DEF_RVV_FUNCTION (vsuxei16, indexed_loadstore, none_m_preds, 
> all_v_scalar_ptr_eew16_index_ops)
> -DEF_RVV_FUNCTION (vsuxei32, indexed_loadstore, none_m_preds, 
> all_v_scalar_ptr_eew32_index_ops)
> -DEF_RVV_FUNCTION (vsuxei64, indexed_loadstore, none_m_preds, 
> all_v_scalar_ptr_eew64_index_ops)
> -DEF_RVV_FUNCTION (vsoxei8, indexed_loadstore, none_m_preds, 
> all_v_scalar_ptr_eew8_index_ops)
> -DEF_RVV_FUNCTION (vsoxei16, indexed_loadstore, none_m_preds, 
> all_v_scalar_ptr_eew16_index_ops)
> -DEF_RVV_FUNCTION (vsoxei32, indexed_loadstore, none_m_preds, 
> all_v_scalar_ptr_eew32_index_ops)
> -DEF_RVV_FUNCTION (vsoxei64, indexed_loadstore, none_m_preds, 
> all_v_scalar_ptr_eew64_index_ops)
> +DEF_RVV_FUNCTION (vluxei8, indexed_loadstore, full_preds, 
> all_v_scalar_const_ptr_eew8_index_ops, )
> +DEF_RVV_FUNCTION (vluxei16, indexed_loadstore, full_preds, 
> all_v_scalar_const_ptr_eew16_index_ops, )
> +DEF_RVV_FUNCTION (vluxei32, indexed_loadstore, full_preds, 
> all_v_scalar_const_ptr_eew32_index_ops, )
> +DEF_RVV_FUNCTION (vluxei64, indexed_loadstore, full_preds, 
> all_v_scalar_const_ptr_eew64_index_ops, )
> +DEF_RVV_FUNCTION (vloxei8, indexed_loadstore, full_preds, 
> all_v_scalar_const_ptr_eew8_index_ops, )
> +DEF_RVV_FUNCTION (vloxei16, indexed_loadstore, full_preds, 
> all_v_scalar_const_ptr_eew16_index_ops, )
> +DEF_RVV_FUNCTION (vloxei32, indexed_loadstore, full_preds, 
> all_v_scalar_const_ptr_eew32_index_ops, )
> +DEF_RVV_FUNCTION (vloxei64, indexed_loadstore, full_preds, 
> all_v_scalar_const_ptr_eew64_index_ops, )
> 

Re: [V2 PATCH] Simplify vector ((VCE (a cmp b ? -1 : 0)) < 0) ? c : d to just (VCE ((a cmp b) ? (VCE c) : (VCE d))).

2023-12-07 Thread Hongtao Liu
ping.

On Thu, Nov 16, 2023 at 6:49 PM liuhongt  wrote:
>
> Update in V2:
> 1) Add some comments before the pattern.
> 2) Remove ? from view_convert.
>
> Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
> Ok for trunk?
>
> When I'm working on PR112443, I notice there's some misoptimizations:
> after we fold _mm{,256}_blendv_epi8/pd/ps into gimple, the backend
> fails to combine it back to v{,p}blendv{v,ps,pd} since the pattern is
> too complicated, so I think maybe we should hanlde it in the gimple
> level.
>
> The dump is like
>
>   _1 = c_3(D) >= { 0, 0, 0, 0 };
>   _2 = VEC_COND_EXPR <_1, { -1, -1, -1, -1 }, { 0, 0, 0, 0 }>;
>   _7 = VIEW_CONVERT_EXPR(_2);
>   _8 = VIEW_CONVERT_EXPR(b_6(D));
>   _9 = VIEW_CONVERT_EXPR(a_5(D));
>   _10 = _7 < { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
>   _11 = VEC_COND_EXPR <_10, _8, _9>;
>
> It can be optimized to
>
>   _1 = c_2(D) >= { 0, 0, 0, 0 };
>   _6 = VEC_COND_EXPR <_1, b_5(D), a_4(D)>;
>
> since _7 is either -1 or 0, the selection of _7 < 0 ? _8 : _9 should
> be euqal to _1 ? b : a as long as TYPE_PRECISION of the component type
> of the second VEC_COND_EXPR is less equal to the first one.
> The patch add a gimple pattern to handle that.
>
> gcc/ChangeLog:
>
> * match.pd (VCE (a cmp b ? -1 : 0) < 0) ? c : d ---> (VCE ((a
> cmp b) ? (VCE:c) : (VCE:d))): New gimple simplication.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/i386/avx512vl-blendv-3.c: New test.
> * gcc.target/i386/blendv-3.c: New test.
> ---
>  gcc/match.pd  | 22 +
>  .../gcc.target/i386/avx512vl-blendv-3.c   |  6 +++
>  gcc/testsuite/gcc.target/i386/blendv-3.c  | 46 +++
>  3 files changed, 74 insertions(+)
>  create mode 100644 gcc/testsuite/gcc.target/i386/avx512vl-blendv-3.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/blendv-3.c
>
> diff --git a/gcc/match.pd b/gcc/match.pd
> index dbc811b2b38..2a69622a300 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -5170,6 +5170,28 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>   (if (optimize_vectors_before_lowering_p () && types_match (@0, @3))
>(vec_cond (bit_and @0 (bit_not @3)) @2 @1)))
>
> +/*  ((VCE (a cmp b ? -1 : 0)) < 0) ? c : d is just
> +(VCE ((a cmp b) ? (VCE c) : (VCE d))) when TYPE_PRECISION of the
> +component type of the outer vec_cond is greater equal the inner one.  */
> +(for cmp (simple_comparison)
> + (simplify
> +  (vec_cond
> +(lt (view_convert@5 (vec_cond@6 (cmp@4 @0 @1)
> +   integer_all_onesp
> +   integer_zerop))
> + integer_zerop) @2 @3)
> +  (if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (@0))
> +   && VECTOR_INTEGER_TYPE_P (TREE_TYPE (@5))
> +   && !TYPE_UNSIGNED (TREE_TYPE (@5))
> +   && VECTOR_TYPE_P (TREE_TYPE (@6))
> +   && VECTOR_TYPE_P (type)
> +   && (TYPE_PRECISION (TREE_TYPE (type))
> + <= TYPE_PRECISION (TREE_TYPE (TREE_TYPE (@6
> +   && TYPE_SIZE (type) == TYPE_SIZE (TREE_TYPE (@6)))
> +   (with { tree vtype = TREE_TYPE (@6);}
> + (view_convert:type
> +   (vec_cond @4 (view_convert:vtype @2) (view_convert:vtype @3)))
> +
>  /* c1 ? c2 ? a : b : b  -->  (c1 & c2) ? a : b  */
>  (simplify
>   (vec_cond @0 (vec_cond:s @1 @2 @3) @3)
> diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-blendv-3.c 
> b/gcc/testsuite/gcc.target/i386/avx512vl-blendv-3.c
> new file mode 100644
> index 000..2777e72ab5f
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/avx512vl-blendv-3.c
> @@ -0,0 +1,6 @@
> +/* { dg-do compile } */
> +/* { dg-options "-mavx512vl -mavx512bw -O2" } */
> +/* { dg-final { scan-assembler-times {vp?blendv(?:b|p[sd])[ \t]*} 6 } } */
> +/* { dg-final { scan-assembler-not {vpcmp} } } */
> +
> +#include "blendv-3.c"
> diff --git a/gcc/testsuite/gcc.target/i386/blendv-3.c 
> b/gcc/testsuite/gcc.target/i386/blendv-3.c
> new file mode 100644
> index 000..fa0fb067a73
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/blendv-3.c
> @@ -0,0 +1,46 @@
> +/* { dg-do compile } */
> +/* { dg-options "-mavx2 -O2" } */
> +/* { dg-final { scan-assembler-times {vp?blendv(?:b|p[sd])[ \t]*} 6 } } */
> +/* { dg-final { scan-assembler-not {vpcmp} } } */
> +
> +#include 
> +
> +__m256i
> +foo (__m256i a, __m256i b, __m256i c)
> +{
> +  return _mm256_blendv_epi8 (a, b, ~c < 0);
> +}
> +
> +__m256d
> +foo1 (__m256d a, __m256d b, __m256i c)
> +{
> +  __m256i d = ~c < 0;
> +  return _mm256_blendv_pd (a, b, (__m256d)d);
> +}
> +
> +__m256
> +foo2 (__m256 a, __m256 b, __m256i c)
> +{
> +  __m256i d = ~c < 0;
> +  return _mm256_blendv_ps (a, b, (__m256)d);
> +}
> +
> +__m128i
> +foo4 (__m128i a, __m128i b, __m128i c)
> +{
> +  return _mm_blendv_epi8 (a, b, ~c < 0);
> +}
> +
> +__m128d
> +foo5 (__m128d a, __m128d b, __m128i c)
> +{
> +  __m128i d = ~c < 0;
> +  return _mm_blendv_pd (a, b, (__m128d)d);
> +}
> +
> +__m128
> +foo6 

Re: [PATCH][v2] RISC-V: Add avail interface into function_group_info

2023-12-07 Thread Feng Wang
2023-12-08 10:44 Feng Wang  wrote:

Have run all the vector api-testing.

>Patch v2: Using variadic macro and add the dependency into t-riscv



>



>In order to add other extension about vector,this patch add



>unsigned int (*avail) (void) into function_group_info to determine



>whether to register the intrinsic based on ISA info.



>



>gcc/ChangeLog:



>



>   * config/riscv/riscv-vector-builtins-functions.def (DEF_RVV_FUNCTION):



>   Add AVAIL argument.



>   (read_vl): Using AVAIL argument default value.



>   (vlenb): Ditto.



>   (vsetvl): Ditto.



>   (vsetvlmax): Ditto.



>   (vle): Ditto.



>   (vse): Ditto.



>   (vlm): Ditto.



>   (vsm): Ditto.



>   (vlse): Ditto.



>   (vsse): Ditto.



>   (vluxei8): Ditto.



>   (vluxei16): Ditto.



>   (vluxei32): Ditto.



>   (vluxei64): Ditto.



>   (vloxei8): Ditto.



>   (vloxei16): Ditto.



>   (vloxei32): Ditto.



>   (vloxei64): Ditto.



>   (vsuxei8): Ditto.



>   (vsuxei16): Ditto.



>   (vsuxei32): Ditto.



>   (vsuxei64): Ditto.



>   (vsoxei8): Ditto.



>   (vsoxei16): Ditto.



>   (vsoxei32): Ditto.



>   (vsoxei64): Ditto.



>   (vleff): Ditto.



>   (vadd): Ditto.



>   (vsub): Ditto.



>   (vrsub): Ditto.



>   (vneg): Ditto.



>   (vwaddu): Ditto.



>   (vwsubu): Ditto.



>   (vwadd): Ditto.



>   (vwsub): Ditto.



>   (vwcvt_x): Ditto.



>   (vwcvtu_x): Ditto.



>   (vzext): Ditto.



>   (vsext): Ditto.



>   (vadc): Ditto.



>   (vmadc): Ditto.



>   (vsbc): Ditto.



>   (vmsbc): Ditto.



>   (vand): Ditto.



>   (vor): Ditto.



>   (vxor): Ditto.



>   (vnot): Ditto.



>   (vsll): Ditto.



>   (vsra): Ditto.



>   (vsrl): Ditto.



>   (vnsrl): Ditto.



>   (vnsra): Ditto.



>   (vncvt_x): Ditto.



>   (vmseq): Ditto.



>   (vmsne): Ditto.



>   (vmsltu): Ditto.



>   (vmslt): Ditto.



>   (vmsleu): Ditto.



>   (vmsle): Ditto.



>   (vmsgtu): Ditto.



>   (vmsgt): Ditto.



>   (vmsgeu): Ditto.



>   (vmsge): Ditto.



>   (vminu): Ditto.



>   (vmin): Ditto.



>   (vmaxu): Ditto.



>   (vmax): Ditto.



>   (vmul): Ditto.



>   (vmulh): Ditto.



>   (vmulhu): Ditto.



>   (vmulhsu): Ditto.



>   (vdivu): Ditto.



>   (vdiv): Ditto.



>   (vremu): Ditto.



>   (vrem): Ditto.



>   (vwmul): Ditto.



>   (vwmulu): Ditto.



>   (vwmulsu): Ditto.



>   (vmacc): Ditto.



>   (vnmsac): Ditto.



>   (vmadd): Ditto.



>   (vnmsub): Ditto.



>   (vwmaccu): Ditto.



>   (vwmacc): Ditto.



>   (vwmaccsu): Ditto.



>   (vwmaccus): Ditto.



>   (vmerge): Ditto.



>   (vmv_v): Ditto.



>   (vsaddu): Ditto.



>   (vsadd): Ditto.



>   (vssubu): Ditto.



>   (vssub): Ditto.



>   (vaaddu): Ditto.



>   (vaadd): Ditto.



>   (vasubu): Ditto.



>   (vasub): Ditto.



>   (vsmul): Ditto.



>   (vssrl): Ditto.



>   (vssra): Ditto.



>   (vnclipu): Ditto.



>   (vnclip): Ditto.



>   (vfadd): Ditto.



>   (vfsub): Ditto.



>   (vfrsub): Ditto.



>   (vfadd_frm): Ditto.



>   (vfsub_frm): Ditto.



>   (vfrsub_frm): Ditto.



>   (vfwadd): Ditto.



>   (vfwsub): Ditto.



>   (vfwadd_frm): Ditto.



>   (vfwsub_frm): Ditto.



>   (vfmul): Ditto.



>   (vfdiv): Ditto.



>   (vfrdiv): Ditto.



>   (vfmul_frm): Ditto.



>   (vfdiv_frm): Ditto.



>   (vfrdiv_frm): Ditto.



>   (vfwmul): Ditto.



>   (vfwmul_frm): Ditto.



>   (vfmacc): Ditto.



>   (vfnmsac): Ditto.



>   (vfmadd): Ditto.



>   (vfnmsub): Ditto.



>   (vfnmacc): Ditto.



>   (vfmsac): Ditto.



>   (vfnmadd): Ditto.



>   (vfmsub): Ditto.



>   (vfmacc_frm): Ditto.



>   (vfnmacc_frm): Ditto.



>   (vfmsac_frm): Ditto.



>   (vfnmsac_frm): Ditto.



>   (vfmadd_frm): Ditto.



>   (vfnmadd_frm): Ditto.



>   (vfmsub_frm): Ditto.



>   (vfnmsub_frm): Ditto.



>   (vfwmacc): Ditto.



>   (vfwnmacc): Ditto.



>   (vfwmsac): Ditto.



>   (vfwnmsac): Ditto.



>   (vfwmacc_frm): Ditto.



>   (vfwnmacc_frm): Ditto.



>   (vfwmsac_frm): Ditto.



>   (vfwnmsac_frm): Ditto.



>   (vfsqrt): Ditto.



>   (vfsqrt_frm): Ditto.



>   (vfrsqrt7): Ditto.



>   (vfrec7): Ditto.



>   (vfrec7_frm): Ditto.



>   (vfmin): Ditto.



>   (vfmax): Ditto.



>   (vfsgnj): Ditto.



>   (vfsgnjn): Ditto.



>   (vfsgnjx): Ditto.



>   (vfneg): Ditto.



>   (vfabs): Ditto.



>   (vmfeq): Ditto.



>   (vmfne): 

[PATCH][v2] RISC-V: Add avail interface into function_group_info

2023-12-07 Thread Feng Wang
Patch v2: Using variadic macro and add the dependency into t-riscv

In order to add other extension about vector,this patch add
unsigned int (*avail) (void) into function_group_info to determine
whether to register the intrinsic based on ISA info.

gcc/ChangeLog:

* config/riscv/riscv-vector-builtins-functions.def (DEF_RVV_FUNCTION):
Add AVAIL argument.
(read_vl): Using AVAIL argument default value.
(vlenb): Ditto.
(vsetvl): Ditto.
(vsetvlmax): Ditto.
(vle): Ditto.
(vse): Ditto.
(vlm): Ditto.
(vsm): Ditto.
(vlse): Ditto.
(vsse): Ditto.
(vluxei8): Ditto.
(vluxei16): Ditto.
(vluxei32): Ditto.
(vluxei64): Ditto.
(vloxei8): Ditto.
(vloxei16): Ditto.
(vloxei32): Ditto.
(vloxei64): Ditto.
(vsuxei8): Ditto.
(vsuxei16): Ditto.
(vsuxei32): Ditto.
(vsuxei64): Ditto.
(vsoxei8): Ditto.
(vsoxei16): Ditto.
(vsoxei32): Ditto.
(vsoxei64): Ditto.
(vleff): Ditto.
(vadd): Ditto.
(vsub): Ditto.
(vrsub): Ditto.
(vneg): Ditto.
(vwaddu): Ditto.
(vwsubu): Ditto.
(vwadd): Ditto.
(vwsub): Ditto.
(vwcvt_x): Ditto.
(vwcvtu_x): Ditto.
(vzext): Ditto.
(vsext): Ditto.
(vadc): Ditto.
(vmadc): Ditto.
(vsbc): Ditto.
(vmsbc): Ditto.
(vand): Ditto.
(vor): Ditto.
(vxor): Ditto.
(vnot): Ditto.
(vsll): Ditto.
(vsra): Ditto.
(vsrl): Ditto.
(vnsrl): Ditto.
(vnsra): Ditto.
(vncvt_x): Ditto.
(vmseq): Ditto.
(vmsne): Ditto.
(vmsltu): Ditto.
(vmslt): Ditto.
(vmsleu): Ditto.
(vmsle): Ditto.
(vmsgtu): Ditto.
(vmsgt): Ditto.
(vmsgeu): Ditto.
(vmsge): Ditto.
(vminu): Ditto.
(vmin): Ditto.
(vmaxu): Ditto.
(vmax): Ditto.
(vmul): Ditto.
(vmulh): Ditto.
(vmulhu): Ditto.
(vmulhsu): Ditto.
(vdivu): Ditto.
(vdiv): Ditto.
(vremu): Ditto.
(vrem): Ditto.
(vwmul): Ditto.
(vwmulu): Ditto.
(vwmulsu): Ditto.
(vmacc): Ditto.
(vnmsac): Ditto.
(vmadd): Ditto.
(vnmsub): Ditto.
(vwmaccu): Ditto.
(vwmacc): Ditto.
(vwmaccsu): Ditto.
(vwmaccus): Ditto.
(vmerge): Ditto.
(vmv_v): Ditto.
(vsaddu): Ditto.
(vsadd): Ditto.
(vssubu): Ditto.
(vssub): Ditto.
(vaaddu): Ditto.
(vaadd): Ditto.
(vasubu): Ditto.
(vasub): Ditto.
(vsmul): Ditto.
(vssrl): Ditto.
(vssra): Ditto.
(vnclipu): Ditto.
(vnclip): Ditto.
(vfadd): Ditto.
(vfsub): Ditto.
(vfrsub): Ditto.
(vfadd_frm): Ditto.
(vfsub_frm): Ditto.
(vfrsub_frm): Ditto.
(vfwadd): Ditto.
(vfwsub): Ditto.
(vfwadd_frm): Ditto.
(vfwsub_frm): Ditto.
(vfmul): Ditto.
(vfdiv): Ditto.
(vfrdiv): Ditto.
(vfmul_frm): Ditto.
(vfdiv_frm): Ditto.
(vfrdiv_frm): Ditto.
(vfwmul): Ditto.
(vfwmul_frm): Ditto.
(vfmacc): Ditto.
(vfnmsac): Ditto.
(vfmadd): Ditto.
(vfnmsub): Ditto.
(vfnmacc): Ditto.
(vfmsac): Ditto.
(vfnmadd): Ditto.
(vfmsub): Ditto.
(vfmacc_frm): Ditto.
(vfnmacc_frm): Ditto.
(vfmsac_frm): Ditto.
(vfnmsac_frm): Ditto.
(vfmadd_frm): Ditto.
(vfnmadd_frm): Ditto.
(vfmsub_frm): Ditto.
(vfnmsub_frm): Ditto.
(vfwmacc): Ditto.
(vfwnmacc): Ditto.
(vfwmsac): Ditto.
(vfwnmsac): Ditto.
(vfwmacc_frm): Ditto.
(vfwnmacc_frm): Ditto.
(vfwmsac_frm): Ditto.
(vfwnmsac_frm): Ditto.
(vfsqrt): Ditto.
(vfsqrt_frm): Ditto.
(vfrsqrt7): Ditto.
(vfrec7): Ditto.
(vfrec7_frm): Ditto.
(vfmin): Ditto.
(vfmax): Ditto.
(vfsgnj): Ditto.
(vfsgnjn): Ditto.
(vfsgnjx): Ditto.
(vfneg): Ditto.
(vfabs): Ditto.
(vmfeq): Ditto.
(vmfne): Ditto.
(vmflt): Ditto.
(vmfle): Ditto.
(vmfgt): Ditto.
(vmfge): Ditto.
(vfclass): Ditto.
(vfmerge): Ditto.
(vfmv_v): Ditto.
(vfcvt_x): Ditto.
(vfcvt_xu): Ditto.
(vfcvt_rtz_x): Ditto.
(vfcvt_rtz_xu): Ditto.
(vfcvt_f): Ditto.
(vfcvt_x_frm): Ditto.
(vfcvt_xu_frm): Ditto.
(vfcvt_f_frm): Ditto.
(vfwcvt_x): Ditto.
(vfwcvt_xu): Ditto.
(vfwcvt_rtz_x): Ditto.
(vfwcvt_rtz_xu) Ditto.:
(vfwcvt_f): Ditto.
(vfwcvt_x_frm): 

Re: [patch-2v3, rs6000] Guard fctid on PowerPC64 and PowerPC476 [PR112707]

2023-12-07 Thread Kewen.Lin
Hi Haochen,

on 2023/12/8 09:58, HAO CHEN GUI wrote:
> Hi,
>   The "fctid" is supported on 64-bit Power processors and PowerPC476. It
> need a guard to check it. The patch fixes the issue.
> 
>   Compared with last version,
> https://gcc.gnu.org/pipermail/gcc-patches/2023-December/639536.html
> the main change is to change the target requirement in pr88558*.c.
> 
>   Bootstrapped and tested on x86 and powerpc64-linux BE and LE with no
> regressions. Is this OK for trunk?
> 
> 
> ChangeLog
> rs6000: Guard fctid on PowerPC64 and PowerPC476.
> 
> fctid is supported on 64-bit Power processors and powerpc 476. It should

Nit: s/is/is only/

> be guarded by this condition. The patch fixes the issue.
> 
> gcc/
>   PR target/112707
>   * config/rs6000/rs6000.h (TARGET_FCTID): Define.
>   * config/rs6000/rs6000.md (lrintdi2): Add guard TARGET_FCTID.
>   * (lrounddi2): Replace TARGET_FPRND with TARGET_FCTID.
> 
> gcc/testsuite/
>   PR target/112707
>   * gcc.target/powerpc/pr112707.h: New.
>   * gcc.target/powerpc/pr112707-2.c: New.
>   * gcc.target/powerpc/pr112707-3.c: New.
>   * gcc.target/powerpc/pr88558-p7.c: Check fctid on ilp32 and
>   has_arch_ppc64 as it's now guarded by powerpc64.
>   * gcc.target/powerpc/pr88558-p8.c: Likewise.
>   * gfortran.dg/nint_p7.f90: Add powerpc64 target requirement as
>   lrounddi2 is now guarded by powerpc64.
> 
> patch.diff
> diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
> index 22595f6ebd7..8c29ca68ccf 100644
> --- a/gcc/config/rs6000/rs6000.h
> +++ b/gcc/config/rs6000/rs6000.h
> @@ -467,6 +467,8 @@ extern int rs6000_vector_align[];
>  #define TARGET_FCFIDUS   TARGET_POPCNTD
>  #define TARGET_FCTIDUZ   TARGET_POPCNTD
>  #define TARGET_FCTIWUZ   TARGET_POPCNTD
> +/* Enable fctid on ppc64 and powerpc476.  */

Nit: Changed to "Only powerpc64 and powerpc476 support fctid."

Ok for trunk with the nits fixed, thanks!

BR,
Kewen

> +#define TARGET_FCTID (TARGET_POWERPC64 || rs6000_cpu == PROCESSOR_PPC476)
>  #define TARGET_CTZ   TARGET_MODULO
>  #define TARGET_EXTSWSLI  (TARGET_MODULO && TARGET_POWERPC64)
>  #define TARGET_MADDLDTARGET_MODULO
> diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
> index 2a1b5ecfaee..3be79d49dc0 100644
> --- a/gcc/config/rs6000/rs6000.md
> +++ b/gcc/config/rs6000/rs6000.md
> @@ -6718,7 +6718,7 @@ (define_insn "lrintdi2"
>[(set (match_operand:DI 0 "gpc_reg_operand" "=d")
>   (unspec:DI [(match_operand:SFDF 1 "gpc_reg_operand" "")]
>  UNSPEC_FCTID))]
> -  "TARGET_HARD_FLOAT"
> +  "TARGET_HARD_FLOAT && TARGET_FCTID"
>"fctid %0,%1"
>[(set_attr "type" "fp")])
> 
> @@ -6784,7 +6784,7 @@ (define_expand "lrounddi2"
> (set (match_operand:DI 0 "gpc_reg_operand")
>   (unspec:DI [(match_dup 2)]
>  UNSPEC_FCTID))]
> -  "TARGET_HARD_FLOAT && TARGET_VSX && TARGET_FPRND"
> +  "TARGET_HARD_FLOAT && TARGET_VSX && TARGET_FCTID"
>  {
>operands[2] = gen_reg_rtx (mode);
>  })
> diff --git a/gcc/testsuite/gcc.target/powerpc/pr112707-2.c 
> b/gcc/testsuite/gcc.target/powerpc/pr112707-2.c
> new file mode 100644
> index 000..672e00691ea
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/pr112707-2.c
> @@ -0,0 +1,9 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mdejagnu-cpu=7450 -fno-math-errno" } */
> +/* { dg-require-effective-target ilp32 } */
> +/* { dg-skip-if "" { has_arch_ppc64 } } */
> +/* { dg-final { scan-assembler-not {\mfctid\M} } }  */
> +
> +/* powerpc 7450 doesn't support ppc64 (-m32 -mpowerpc64), so skips it.  */
> +
> +#include "pr112707.h"
> diff --git a/gcc/testsuite/gcc.target/powerpc/pr112707-3.c 
> b/gcc/testsuite/gcc.target/powerpc/pr112707-3.c
> new file mode 100644
> index 000..924338fd390
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/pr112707-3.c
> @@ -0,0 +1,9 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fno-math-errno -mdejagnu-cpu=476fp" } */
> +/* { dg-require-effective-target ilp32 } */
> +
> +/* powerpc 476fp has hard float enabled which is required by fctid */
> +
> +#include "pr112707.h"
> +
> +/* { dg-final { scan-assembler-times {\mfctid\M} 2 } } */
> diff --git a/gcc/testsuite/gcc.target/powerpc/pr112707.h 
> b/gcc/testsuite/gcc.target/powerpc/pr112707.h
> new file mode 100644
> index 000..e427dc6a72e
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/pr112707.h
> @@ -0,0 +1,10 @@
> +long long test1 (double a)
> +{
> +  return __builtin_llrint (a);
> +}
> +
> +long long test2 (float a)
> +{
> +  return __builtin_llrint (a);
> +}
> +
> diff --git a/gcc/testsuite/gcc.target/powerpc/pr88558-p7.c 
> b/gcc/testsuite/gcc.target/powerpc/pr88558-p7.c
> index 3932656c5fd..2fa0b997e52 100644
> --- a/gcc/testsuite/gcc.target/powerpc/pr88558-p7.c
> +++ b/gcc/testsuite/gcc.target/powerpc/pr88558-p7.c
> @@ -6,7 +6,7 @@
>  #include "pr88558.h"
> 
>  /* { dg-final { scan-assembler-times {\mfctid\M} 4 { target lp64 } } 

[PATCH] Don't assume it's AVX_U128_CLEAN after call_insn whose abi.mode_clobber(V4DImode) deosn't contains all SSE_REGS.

2023-12-07 Thread liuhongt
If the function desn't clobber any sse registers or only clobber
128-bit part, then vzeroupper isn't issued before the function exit.
the status not CLEAN but ANY after the function.

Also for sibling_call, it's safe to issue an vzeroupper. Also there
could be missing vzeroupper since there's no mode_exit for
sibling_call_p.

Compared to the patch in the PR, this patch add sibling_call part.
Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
Ok for trunk and backport?

gcc/ChangeLog:

PR target/112891
* config/i386/i386.cc (ix86_avx_u128_mode_after): Return
AVX_U128_ANY if callee_abi doesn't clobber all_sse_regs to
align with ix86_avx_u128_mode_needed.
(ix86_avx_u128_mode_needed): Return AVX_U128_ClEAN for
sibling_call.

gcc/testsuite/ChangeLog:

* gcc.target/i386/pr112891.c: New test.
* gcc.target/i386/pr112891-2.c: New test.
---
 gcc/config/i386/i386.cc| 22 +---
 gcc/testsuite/gcc.target/i386/pr112891-2.c | 30 ++
 gcc/testsuite/gcc.target/i386/pr112891.c   | 29 +
 3 files changed, 78 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr112891-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr112891.c

diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index 7c5cab4e2c6..fe259cdb789 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -15038,8 +15038,12 @@ ix86_avx_u128_mode_needed (rtx_insn *insn)
 vzeroupper if all SSE registers are clobbered.  */
   const function_abi  = insn_callee_abi (insn);
   if (vzeroupper_pattern (PATTERN (insn), VOIDmode)
- || !hard_reg_set_subset_p (reg_class_contents[SSE_REGS],
-abi.mode_clobbers (V4DImode)))
+ /* Should be safe to issue an vzeroupper before sibling_call_p.
+Also there not mode_exit for sibling_call, so there could be
+missing vzeroupper for that.  */
+ || !(SIBLING_CALL_P (insn)
+  || hard_reg_set_subset_p (reg_class_contents[SSE_REGS],
+abi.mode_clobbers (V4DImode
return AVX_U128_ANY;
 
   return AVX_U128_CLEAN;
@@ -15177,7 +15181,19 @@ ix86_avx_u128_mode_after (int mode, rtx_insn *insn)
   bool avx_upper_reg_found = false;
   note_stores (insn, ix86_check_avx_upper_stores, _upper_reg_found);
 
-  return avx_upper_reg_found ? AVX_U128_DIRTY : AVX_U128_CLEAN;
+  if (avx_upper_reg_found)
+   return AVX_U128_DIRTY;
+
+  /* If the function desn't clobber any sse registers or only clobber
+128-bit part, Then vzeroupper isn't issued before the function exit.
+the status not CLEAN but ANY after the function.  */
+  const function_abi  = insn_callee_abi (insn);
+  if (!(SIBLING_CALL_P (insn)
+   || hard_reg_set_subset_p (reg_class_contents[SSE_REGS],
+ abi.mode_clobbers (V4DImode
+   return AVX_U128_ANY;
+
+  return  AVX_U128_CLEAN;
 }
 
   /* Otherwise, return current mode.  Remember that if insn
diff --git a/gcc/testsuite/gcc.target/i386/pr112891-2.c 
b/gcc/testsuite/gcc.target/i386/pr112891-2.c
new file mode 100644
index 000..164c3985d50
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr112891-2.c
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx2 -O3" } */
+/* { dg-final { scan-assembler-times "vzeroupper" 1 } } */
+
+void
+__attribute__((noinline))
+bar (double* a)
+{
+  a[0] = 1.0;
+  a[1] = 2.0;
+}
+
+double
+__attribute__((noinline))
+foo (double* __restrict a, double* b)
+{
+  a[0] += b[0];
+  a[1] += b[1];
+  a[2] += b[2];
+  a[3] += b[3];
+  bar (b);
+  return a[5] + b[5];
+}
+
+double
+foo1 (double* __restrict a, double* b)
+{
+  double c = foo (a, b);
+  return __builtin_exp (c);
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr112891.c 
b/gcc/testsuite/gcc.target/i386/pr112891.c
new file mode 100644
index 000..dbf6c67948a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr112891.c
@@ -0,0 +1,29 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx2 -O3" } */
+/* { dg-final { scan-assembler-times "vzeroupper" 1 } } */
+
+void
+__attribute__((noinline))
+bar (double* a)
+{
+  a[0] = 1.0;
+  a[1] = 2.0;
+}
+
+void
+__attribute__((noinline))
+foo (double* __restrict a, double* b)
+{
+  a[0] += b[0];
+  a[1] += b[1];
+  a[2] += b[2];
+  a[3] += b[3];
+  bar (b);
+}
+
+double
+foo1 (double* __restrict a, double* b)
+{
+  foo (a, b);
+  return __builtin_exp (b[1]);
+}
-- 
2.31.1



Re: [PATCH v3] LoongArch: Fix eh_return epilogue for normal returns

2023-12-07 Thread Yang Yujie
On Thu, Dec 07, 2023 at 04:13:51PM +0800, Xi Ruoyao wrote:
> 
> I understand and I don't think adding {} is wrong.  The problem is the
> indent change causes a large chunk of diff and it makes reviewing more
> difficult.  Thus generally we should not mix real code change and format
> change in a commit.
> 
> i. e. it would be better to separate it into two patches, the first adds
> {} and changes the indent, and the second changes the logic.  But now I
> don't think it's needed to make a V4, just pushing this should be fine.
> 

Thanks for the suggestion.  It is clearer that way.

Yujie



Re: [PATCH] LoongArch: Allow -mcmodel=extreme and model attribute with -mexplicit-relocs=auto

2023-12-07 Thread chenglulu



在 2023/12/7 下午8:20, Xi Ruoyao 写道:

There seems no real reason to require -mexplicit-relocs=always for
-mcmodel=extreme or model attribute.  As the linker does not know how to
relax a 3-operand la.local or la.global pseudo instruction, just emit
explicit relocs for SYMBOL_PCREL64, and under TARGET_CMODEL_EXTREME also
SYMBOL_GOT_DISP.


I think there is no problem, HeJinyang is testing and if there is no 
problem with the


test play, you can merge in.


Thanks!



Re: [PATCH] testsuite: Adjust for the new permerror -Wincompatible-pointer-types

2023-12-07 Thread Yang Yujie
On Thu, Dec 07, 2023 at 10:18:47AM +0100, Florian Weimer wrote:
> * Yang Yujie:
> 
> > With this patch, I also noticed a few errors in building unpatched older
> > software like expect-5.45.4, perl-5.28.3 and bash-5.0.  Will this also be
> > the case when GCC 14 gets released?
> 
> For Fedora, we keep pointers of the changes needed here:
> 
>   
>   
>   
> 
> (For perl, it looks like another change will be needed.)
> 
> This also includes changes that we imported through rebases after we
> started tracking.
> 
> We do not have control over the release schedule for those upstream
> projects, and of course there are many project which are more or less
> dormant and have not made a release in many years.
> 
> Thanks,
> Florian

Got it.  Thanks for the pointers to the fedora patches.

Yujie



Re: [patch-2v3, rs6000] Guard fctid on PowerPC64 and PowerPC476 [PR112707]

2023-12-07 Thread HAO CHEN GUI
Hi,
  The "fctid" is supported on 64-bit Power processors and PowerPC476. It
need a guard to check it. The patch fixes the issue.

  Compared with last version,
https://gcc.gnu.org/pipermail/gcc-patches/2023-December/639536.html
the main change is to change the target requirement in pr88558*.c.

  Bootstrapped and tested on x86 and powerpc64-linux BE and LE with no
regressions. Is this OK for trunk?


ChangeLog
rs6000: Guard fctid on PowerPC64 and PowerPC476.

fctid is supported on 64-bit Power processors and powerpc 476. It should
be guarded by this condition. The patch fixes the issue.

gcc/
PR target/112707
* config/rs6000/rs6000.h (TARGET_FCTID): Define.
* config/rs6000/rs6000.md (lrintdi2): Add guard TARGET_FCTID.
* (lrounddi2): Replace TARGET_FPRND with TARGET_FCTID.

gcc/testsuite/
PR target/112707
* gcc.target/powerpc/pr112707.h: New.
* gcc.target/powerpc/pr112707-2.c: New.
* gcc.target/powerpc/pr112707-3.c: New.
* gcc.target/powerpc/pr88558-p7.c: Check fctid on ilp32 and
has_arch_ppc64 as it's now guarded by powerpc64.
* gcc.target/powerpc/pr88558-p8.c: Likewise.
* gfortran.dg/nint_p7.f90: Add powerpc64 target requirement as
lrounddi2 is now guarded by powerpc64.

patch.diff
diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
index 22595f6ebd7..8c29ca68ccf 100644
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -467,6 +467,8 @@ extern int rs6000_vector_align[];
 #define TARGET_FCFIDUS TARGET_POPCNTD
 #define TARGET_FCTIDUZ TARGET_POPCNTD
 #define TARGET_FCTIWUZ TARGET_POPCNTD
+/* Enable fctid on ppc64 and powerpc476.  */
+#define TARGET_FCTID   (TARGET_POWERPC64 || rs6000_cpu == PROCESSOR_PPC476)
 #define TARGET_CTZ TARGET_MODULO
 #define TARGET_EXTSWSLI(TARGET_MODULO && TARGET_POWERPC64)
 #define TARGET_MADDLD  TARGET_MODULO
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 2a1b5ecfaee..3be79d49dc0 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -6718,7 +6718,7 @@ (define_insn "lrintdi2"
   [(set (match_operand:DI 0 "gpc_reg_operand" "=d")
(unspec:DI [(match_operand:SFDF 1 "gpc_reg_operand" "")]
   UNSPEC_FCTID))]
-  "TARGET_HARD_FLOAT"
+  "TARGET_HARD_FLOAT && TARGET_FCTID"
   "fctid %0,%1"
   [(set_attr "type" "fp")])

@@ -6784,7 +6784,7 @@ (define_expand "lrounddi2"
(set (match_operand:DI 0 "gpc_reg_operand")
(unspec:DI [(match_dup 2)]
   UNSPEC_FCTID))]
-  "TARGET_HARD_FLOAT && TARGET_VSX && TARGET_FPRND"
+  "TARGET_HARD_FLOAT && TARGET_VSX && TARGET_FCTID"
 {
   operands[2] = gen_reg_rtx (mode);
 })
diff --git a/gcc/testsuite/gcc.target/powerpc/pr112707-2.c 
b/gcc/testsuite/gcc.target/powerpc/pr112707-2.c
new file mode 100644
index 000..672e00691ea
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr112707-2.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mdejagnu-cpu=7450 -fno-math-errno" } */
+/* { dg-require-effective-target ilp32 } */
+/* { dg-skip-if "" { has_arch_ppc64 } } */
+/* { dg-final { scan-assembler-not {\mfctid\M} } }  */
+
+/* powerpc 7450 doesn't support ppc64 (-m32 -mpowerpc64), so skips it.  */
+
+#include "pr112707.h"
diff --git a/gcc/testsuite/gcc.target/powerpc/pr112707-3.c 
b/gcc/testsuite/gcc.target/powerpc/pr112707-3.c
new file mode 100644
index 000..924338fd390
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr112707-3.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fno-math-errno -mdejagnu-cpu=476fp" } */
+/* { dg-require-effective-target ilp32 } */
+
+/* powerpc 476fp has hard float enabled which is required by fctid */
+
+#include "pr112707.h"
+
+/* { dg-final { scan-assembler-times {\mfctid\M} 2 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/pr112707.h 
b/gcc/testsuite/gcc.target/powerpc/pr112707.h
new file mode 100644
index 000..e427dc6a72e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr112707.h
@@ -0,0 +1,10 @@
+long long test1 (double a)
+{
+  return __builtin_llrint (a);
+}
+
+long long test2 (float a)
+{
+  return __builtin_llrint (a);
+}
+
diff --git a/gcc/testsuite/gcc.target/powerpc/pr88558-p7.c 
b/gcc/testsuite/gcc.target/powerpc/pr88558-p7.c
index 3932656c5fd..2fa0b997e52 100644
--- a/gcc/testsuite/gcc.target/powerpc/pr88558-p7.c
+++ b/gcc/testsuite/gcc.target/powerpc/pr88558-p7.c
@@ -6,7 +6,7 @@
 #include "pr88558.h"

 /* { dg-final { scan-assembler-times {\mfctid\M} 4 { target lp64 } } } */
-/* { dg-final { scan-assembler-times {\mfctid\M} 2 { target ilp32 } } } */
+/* { dg-final { scan-assembler-times {\mfctid\M} 2 { target { ilp32 && 
has_arch_ppc64 } } } } */
 /* { dg-final { scan-assembler-times {\mfctiw\M} 2 { target lp64 } } } */
 /* { dg-final { scan-assembler-times {\mfctiw\M} 4 { target ilp32 } } } */
 /* { dg-final { scan-assembler-times {\mstfiwx\M} 2 { target lp64 } } } */
diff --git 

Re: [PATCH v3 2/3] libphobos: Update build scripts for LoongArch64.

2023-12-07 Thread Yang Yujie
On Thu, Dec 07, 2023 at 11:34:28AM +0100, Iain Buclaw wrote:
> 
> Just a nitpick, I'd thought the committing of switchcontext.S should
> come before this. I have no strong opinion either way.
> 
> OK to commit.
> 
> Iain.

Thanks for the suggestion, indeed this makes sense.

Yujie



Re: [PATCH v3 1/3] LoongArch: Adjust D version strings.

2023-12-07 Thread Yang Yujie
On Thu, Dec 07, 2023 at 11:30:16AM +0100, Iain Buclaw wrote:
> Hi,
> 
> Thanks for this.
> 
> Excerpts from Yang Yujie's message of Dezember 1, 2023 11:08 am:
> > diff --git a/gcc/d/dmd/cond.d b/gcc/d/dmd/cond.d
> > index 568b639e0b6..02af0cc9e29 100644
> > --- a/gcc/d/dmd/cond.d
> > +++ b/gcc/d/dmd/cond.d
> > @@ -693,10 +693,10 @@ extern (C++) final class VersionCondition : 
> > DVCondition
> >  case "LDC":
> >  case "linux":
> >  case "LittleEndian":
> > -case "LoongArch32":
> >  case "LoongArch64":
> > -case "LoongArch_HardFloat":
> > -case "LoongArch_SoftFloat":
> > +case "LoongArch_F64":
> > +case "LoongArch_F32":
> > +case "LoongArch_SF":
> >  case "MinGW":
> >  case "MIPS32":
> >  case "MIPS64":
> 
> Changes to this module should be submitted to github.com/dlang/dmd,
> otherwise it'll get overwritten on the next "merge" with upstream.
> 
> What's the rationale for F64 and SF abbreviations?
> 
> Otherwise, looks reasonable.
> 
> Iain.

Hi Iain,

Thanks for the review!  I will push this to the dmd repo first shortly
and then send a v4 here.

By definition, LoongArch at the current stage can choose to implement either a
64-bit / 32-bit or no FPU at all, which are represented with target triplets
loongarch*-*{f64,f32.sf}.  The F64/F32/SF-suffixed version strings represents
this distinction, though the support of the "F32" ISA variant is not active
upstream.

>From what I can see, the current usage of "F64/SF" is only needed for FP
context code.  I will also push the corresponding change to druntime later.

Also If you have the time, does the following patch look OK to you?
I couldn't get libphobos to build as a static library and this is the fix
I came up with.
https://gcc.gnu.org/pipermail/gcc-patches/2023-November/636767.html

Thanks,
Yujie



Re: [PATCH] libgccjit: Add type checks in gcc_jit_block_add_assignment_op

2023-12-07 Thread David Malcolm
On Thu, 2023-12-07 at 17:34 -0500, Antoni Boucher wrote:
> Hi.
> This patch adds checks gcc_jit_block_add_assignment_op to make sure
> it
> is only ever called on numeric types.
> 
> With the previous patch, this might require a change to also allow
> vector types here.
> 
> Thanks for the review.

Thanks for the patch.

[...snip...]

> @@ -2890,6 +2900,17 @@ gcc_jit_block_add_assignment_op (gcc_jit_block *block,
>  lvalue->get_type ()->get_debug_string (),
>  rvalue->get_debug_string (),
>  rvalue->get_type ()->get_debug_string ());
> +  // TODO: check if it is a numeric vector?
> +  RETURN_IF_FAIL_PRINTF3 (
> +lvalue->get_type ()->is_numeric () && rvalue->get_type ()->is_numeric 
> (), ctxt, loc,
> +"gcc_jit_block_add_assignment_op %s has non-numeric lvalue %s (type: 
> %s)",
> +gcc::jit::binary_op_reproducer_strings[op],
> +lvalue->get_debug_string (), lvalue->get_type ()->get_debug_string ());

The condition being tested here should probably just be:

   lvalue->get_type ()->is_numeric ()

since otherwise if the lvalue's type is numeric and the rvalue's type
fails to be, then the user would incorrectly get a message about the
lvalue.

> +  RETURN_IF_FAIL_PRINTF3 (
> +rvalue->get_type ()->is_numeric () && rvalue->get_type ()->is_numeric 
> (), ctxt, loc,
> +"gcc_jit_block_add_assignment_op %s has non-numeric rvalue %s (type: 
> %s)",
> +gcc::jit::binary_op_reproducer_strings[op],
> +rvalue->get_debug_string (), rvalue->get_type ()->get_debug_string ());

The condition being tested here seems to have a redundant repeated:
  && rvalue->get_type ()->is_numeric ()

Am I missing something, or is that a typo?

[...snip...]

The patch is OK otherwise.

Thanks
Dave





Re: [PATCH] libgccjit: Make is_int return false on vector types

2023-12-07 Thread Antoni Boucher
Can I merge this on master even though we're not in phase 1 anymore?

On Thu, 2023-12-07 at 20:07 -0500, David Malcolm wrote:
> On Thu, 2023-12-07 at 17:32 -0500, Antoni Boucher wrote:
> > Hi.
> > This patch changes the function is_int to return false on vector
> > types.
> > Thanks for the review.
> 
> Thanks; looks good to me
> 
> Dave
> 



Re: [PATCH] libgccjit: Make is_int return false on vector types

2023-12-07 Thread David Malcolm
On Thu, 2023-12-07 at 17:32 -0500, Antoni Boucher wrote:
> Hi.
> This patch changes the function is_int to return false on vector
> types.
> Thanks for the review.

Thanks; looks good to me

Dave



Re: [PATCH] libgccjit: Make new_array_type take unsigned long

2023-12-07 Thread David Malcolm
On Thu, 2023-12-07 at 17:29 -0500, Antoni Boucher wrote:
> Hi.
> This patches update gcc_jit_context_new_array_type to take the size
> as
> an unsigned long instead of a int, to allow creating bigger array
> types.
> 
> I haven't written the ChangeLog yet because I wasn't sure it's
> allowed
> to change the type of a function like that.
> If it isn't, what would you suggest?

We've kept ABI compatibility all the way back to the version in GCC 5,
so it seems a shame to break ABI.

How about a new API entrypoint:
  gcc_jit_context_new_array_type_unsigned_long
whilst keeping the old one.

Then everything internally can use "unsigned long"; we just keep the
old entrypoint accepting int (which internally promotes the arg to
unsigned long, if positive, sharing all the implementation).

Alternatively, I think there may be a way to do this with symbol
versioning:
  https://gcc.gnu.org/wiki/SymbolVersioning
see e.g. Section 3.7 of Ulrich Drepper's "How To Write Shared
Libraries", but I'm a bit wary of cross-platform compatibility with
that.

Dave




Re: [PATCH] libgccjit: Fix get_size of size_t

2023-12-07 Thread David Malcolm
On Thu, 2023-12-07 at 17:26 -0500, Antoni Boucher wrote:
> Hi.
> This patch fixes getting the size of size_t (bug 112910).
> 
> There's one issue with this patch: like every other feature that
> checks
> for target-specific stuff, it requires a compilation before actually
> fetching the size of the type.
> Which means that getting the size before a compilation might be wrong
> (and I actually believe is wrong on x86-64).
> 
> I was wondering if we should always implicitely do the first
> compilation to gather the correct info: this would fix this issue and
> all the others that we have due to that.
> I'm not sure what would be the performance implication.

Maybe introduce a new class target_info which contains all the
information we might want to find via a compilation, and have the top-
level recording::context have a pointer to it, which starts as nullptr,
but can be populated on-demand the first time something needs it?

> 
> Another solution that I have been thinking about for a while now
> would
> be to have another frontend libgccaot (I don't like that name), which
> is like libgccjit but removes the JIT part so that we get access to
> the
> target stuff directly and would remove the need for having a
> seperation
> between recording and playback as far as I understand.
> That's a long-term solution, but I wanted to share the idea now and
> gather your thoughts on that.

FWIW the initial version of libgccjit didn't have a split between
recording and playback; instead the client code had to pass in a
callback to call into the various API functions (creating tree nodes).
See:
https://gcc.gnu.org/legacy-ml/gcc-patches/2013-10/msg00228.html

Dave



Re: [PATCH 1/5][V3][ifcvt] optimize x=c ? (y op z) : y by RISC-V Zicond like insns

2023-12-07 Thread Jeff Law




On 12/5/23 01:12, Fei Gao wrote:

op=[PLUS, MINUS, IOR, XOR]

Conditional op, if zero
rd = (rc == 0) ? (rs1 op rs2) : rs1
-->
czero.nez rd, rs2, rc
op rd, rs1, rd

Conditional op, if non-zero
rd = (rc != 0) ? (rs1 op rs2) : rs1
-->
czero.eqz rd, rs2, rc
op rd, rs1, rd

Co-authored-by: Xiao Zeng

gcc/ChangeLog:

* ifcvt.cc (noce_try_cond_zero_arith):handler for condtional zero based 
ifcvt
 (noce_emit_czero): helper for noce_try_cond_zero_arith
 (noce_cond_zero_binary_op_supported): check supported OPs for 
condtional zero based ifcvt
 (get_base_reg): get the reg itself or NULL_RTX if not a reg
 (noce_bbs_ok_for_cond_zero_arith): check if BBs are OK for condtional 
zero based ifcvt
 (noce_process_if_block): add noce_try_cond_zero_arith

gcc/testsuite/ChangeLog:

* gcc.target/riscv/zicond_ifcvt_opt.c: New test.
Thanks.  In the future make sure to note testing you've done.  Given 
this is a change to target independent code it must be bootstrapped and 
regression tested on a primary platform.


FTR I've done the bootstrap and regression test on x86_64 in addition to 
regression testing on rv64gc.


I'll push this to the trunk momentarily.

jeff





[pushed] analyzer: fix ICE for 2 bits before the start of base region [PR112889]

2023-12-07 Thread David Malcolm
Cncrete bindings were using -1 and -2 in the offset field to signify
deleted and empty hash slots, but these are valid values, leading to
assertion failures inside hash_map::put on a debug build, and probable
bugs in a release build.

(gdb) call k.dump(true)
start: -2, size: 1, next: -1

(gdb) p k.is_empty()
$6 = true

Fix by using the size field rather than the offset.

Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.
Successful run of analyzer integration tests on x86_64-pc-linux-gnu.
Pushed to trunk as r14-6297-g775aeabcb870b7.

gcc/analyzer/ChangeLog:
PR analyzer/112889
* store.h (concrete_binding::concrete_binding): Strengthen
assertion to require size to be be positive, rather than just
non-zero.
(concrete_binding::mark_deleted): Use size rather than start bit
offset.
(concrete_binding::mark_empty): Likewise.
(concrete_binding::is_deleted): Likewise.
(concrete_binding::is_empty): Likewise.

gcc/testsuite/ChangeLog:
PR analyzer/112889
* c-c++-common/analyzer/ice-pr112889.c: New test.
---
 gcc/analyzer/store.h| 10 +-
 .../c-c++-common/analyzer/ice-pr112889.c| 17 +
 2 files changed, 22 insertions(+), 5 deletions(-)
 create mode 100644 gcc/testsuite/c-c++-common/analyzer/ice-pr112889.c

diff --git a/gcc/analyzer/store.h b/gcc/analyzer/store.h
index cf10fa3b010..d75d69d0b7f 100644
--- a/gcc/analyzer/store.h
+++ b/gcc/analyzer/store.h
@@ -377,7 +377,7 @@ public:
   concrete_binding (bit_offset_t start_bit_offset, bit_size_t size_in_bits)
   : m_bit_range (start_bit_offset, size_in_bits)
   {
-gcc_assert (!m_bit_range.empty_p ());
+gcc_assert (m_bit_range.m_size_in_bits > 0);
   }
   bool concrete_p () const final override { return true; }
 
@@ -419,10 +419,10 @@ public:
 
   static int cmp_ptr_ptr (const void *, const void *);
 
-  void mark_deleted () { m_bit_range.m_start_bit_offset = -1; }
-  void mark_empty () { m_bit_range.m_start_bit_offset = -2; }
-  bool is_deleted () const { return m_bit_range.m_start_bit_offset == -1; }
-  bool is_empty () const { return m_bit_range.m_start_bit_offset == -2; }
+  void mark_deleted () { m_bit_range.m_size_in_bits = -1; }
+  void mark_empty () { m_bit_range.m_size_in_bits = -2; }
+  bool is_deleted () const { return m_bit_range.m_size_in_bits == -1; }
+  bool is_empty () const { return m_bit_range.m_size_in_bits == -2; }
 
 private:
   bit_range m_bit_range;
diff --git a/gcc/testsuite/c-c++-common/analyzer/ice-pr112889.c 
b/gcc/testsuite/c-c++-common/analyzer/ice-pr112889.c
new file mode 100644
index 000..e90a53e79ba
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/analyzer/ice-pr112889.c
@@ -0,0 +1,17 @@
+typedef unsigned char __u8;
+struct sk_buff
+{
+  unsigned char *data;
+};
+struct cpl_pass_accept_req
+{
+  __u8 : 6;
+  __u8 sack : 1;
+};
+void build_cpl_pass_accept_req(struct sk_buff* skb)
+{
+  struct cpl_pass_accept_req* req;
+  skb->data -= sizeof(*req);
+  req = (struct cpl_pass_accept_req *)skb->data;
+  req->sack = 1;
+}
-- 
2.26.3



Re: [PATCH] Expand: Pass down equality only flag to cmpmem expand

2023-12-07 Thread Jeff Law




On 11/28/23 00:43, HAO CHEN GUI wrote:

Hi,
   This patch passes down the equality only flags from
emit_block_cmp_hints to cmpmem optab so that the target specific expand
can generate optimized insns for equality only compare. Targets
(e.g. rs6000) can generate more efficient insn sequence if the block
compare is equality only.

   Bootstrapped and tested on x86 and powerpc64-linux BE and LE with
no regressions. Is this OK for trunk?

Thanks
Gui Haochen

ChangeLog
Expand: Pass down equality only flag to cmpmem expand

Targets (e.g. rs6000) can generate more efficient insn sequence if the
block compare is equality only.  This patch passes down the equality
only flags from emit_block_cmp_hints to cmpmem optab so that the target
specific expand can generate optimized insns for equality only compare.

gcc/
* expr.cc (expand_cmpstrn_or_cmpmem): Rename to...
(expand_cmpstrn): ...this.
(expand_cmpmem): New function.  Pass down equality only flag to
cmpmem expand.
(emit_block_cmp_via_cmpmem): Add an argument for equality only
flag and call expand_cmpmem instead of expand_cmpstrn_or_cmpmem.
(emit_block_cmp_hints): Call emit_block_cmp_via_cmpmem with
equality only flag.
* expr.h (expand_cmpstrn, expand_cmpmem): Declare.
* builtins.cc (expand_builtin_strcmp, expand_builtin_strncmp):
Call expand_cmpstrn instead of expand_cmpstrn_or_cmpmem.
* config/i386/i386.md (cmpmemsi): Add the sixth operand for
equality only flag.
* config/rs6000/rs6000.md (cmpmemsi): Likewise.
* config/s390/s390.md (cmpmemsi): Likewise.
* doc/md.texi (cmpmem): Modify the document and add an operand
for equality only flag.
This doesn't appear to fix a bug (no bug refrenced) and I don't see a 
version of this patch posted before stage1 closed (Nov 19).  So I think 
this needs to defer until gcc-15.


jeff


[PATCH] libgccjit: Add type checks in gcc_jit_block_add_assignment_op

2023-12-07 Thread Antoni Boucher
Hi.
This patch adds checks gcc_jit_block_add_assignment_op to make sure it
is only ever called on numeric types.

With the previous patch, this might require a change to also allow
vector types here.

Thanks for the review.
From 932048619671c61af224708a3da484b9f54a30a3 Mon Sep 17 00:00:00 2001
From: Antoni Boucher 
Date: Wed, 18 Oct 2023 18:33:18 -0400
Subject: [PATCH] libgccjit: Add type checks in gcc_jit_block_add_assignment_op

gcc/jit/ChangeLog:

	* libgccjit.cc (RETURN_IF_FAIL_PRINTF3): New macro.
	(gcc_jit_block_add_assignment_op): Add numeric checks.

gcc/testsuite/ChangeLog:

	* jit.dg/test-error-bad-assignment-op.c: New test.
---
 gcc/jit/libgccjit.cc  | 21 +++
 .../jit.dg/test-error-bad-assignment-op.c | 57 +++
 2 files changed, 78 insertions(+)
 create mode 100644 gcc/testsuite/jit.dg/test-error-bad-assignment-op.c

diff --git a/gcc/jit/libgccjit.cc b/gcc/jit/libgccjit.cc
index 0451b4df7f9..eb6817a0a99 100644
--- a/gcc/jit/libgccjit.cc
+++ b/gcc/jit/libgccjit.cc
@@ -267,6 +267,16 @@ struct gcc_jit_extended_asm : public gcc::jit::recording::extended_asm
   }\
   JIT_END_STMT
 
+#define RETURN_IF_FAIL_PRINTF3(TEST_EXPR, CTXT, LOC, ERR_FMT, A0, A1, A2) \
+  JIT_BEGIN_STMT			\
+if (!(TEST_EXPR))			\
+  {\
+	jit_error ((CTXT), (LOC), "%s: " ERR_FMT,\
+		   __func__, (A0), (A1), (A2));			\
+	return;			\
+  }\
+  JIT_END_STMT
+
 #define RETURN_IF_FAIL_PRINTF4(TEST_EXPR, CTXT, LOC, ERR_FMT, A0, A1, A2, A3) \
   JIT_BEGIN_STMT			\
 if (!(TEST_EXPR))			\
@@ -2890,6 +2900,17 @@ gcc_jit_block_add_assignment_op (gcc_jit_block *block,
 lvalue->get_type ()->get_debug_string (),
 rvalue->get_debug_string (),
 rvalue->get_type ()->get_debug_string ());
+  // TODO: check if it is a numeric vector?
+  RETURN_IF_FAIL_PRINTF3 (
+lvalue->get_type ()->is_numeric () && rvalue->get_type ()->is_numeric (), ctxt, loc,
+"gcc_jit_block_add_assignment_op %s has non-numeric lvalue %s (type: %s)",
+gcc::jit::binary_op_reproducer_strings[op],
+lvalue->get_debug_string (), lvalue->get_type ()->get_debug_string ());
+  RETURN_IF_FAIL_PRINTF3 (
+rvalue->get_type ()->is_numeric () && rvalue->get_type ()->is_numeric (), ctxt, loc,
+"gcc_jit_block_add_assignment_op %s has non-numeric rvalue %s (type: %s)",
+gcc::jit::binary_op_reproducer_strings[op],
+rvalue->get_debug_string (), rvalue->get_type ()->get_debug_string ());
 
   gcc::jit::recording::statement *stmt = block->add_assignment_op (loc, lvalue, op, rvalue);
 
diff --git a/gcc/testsuite/jit.dg/test-error-bad-assignment-op.c b/gcc/testsuite/jit.dg/test-error-bad-assignment-op.c
new file mode 100644
index 000..683ebbfb1fe
--- /dev/null
+++ b/gcc/testsuite/jit.dg/test-error-bad-assignment-op.c
@@ -0,0 +1,57 @@
+#include 
+#include 
+
+#include "libgccjit.h"
+
+#include "harness.h"
+
+void
+create_code (gcc_jit_context *ctxt, void *user_data)
+{
+  /* Let's try to inject the equivalent of:
+
+ void
+ test_fn ()
+ {
+const char *variable;
+variable += "test";
+ }
+
+ and verify that the API complains about the mismatching types
+ in the assignments.
+  */
+  gcc_jit_type *void_type =
+gcc_jit_context_get_type (ctxt, GCC_JIT_TYPE_VOID);
+  gcc_jit_type *const_char_ptr_type =
+gcc_jit_context_get_type (ctxt, GCC_JIT_TYPE_CONST_CHAR_PTR);
+
+  gcc_jit_function *func =
+gcc_jit_context_new_function (ctxt, NULL,
+  GCC_JIT_FUNCTION_EXPORTED,
+  void_type,
+  "test_fn",
+  0, NULL,
+  0);
+
+  gcc_jit_lvalue *variable = gcc_jit_function_new_local (func, NULL, const_char_ptr_type, "variable");
+  gcc_jit_block *initial =
+gcc_jit_function_new_block (func, "initial");
+  gcc_jit_rvalue *string =
+gcc_jit_context_new_string_literal (ctxt, "test");
+  gcc_jit_block_add_assignment_op (initial, NULL, variable, GCC_JIT_BINARY_OP_PLUS, string);
+
+  gcc_jit_block_end_with_void_return (initial, NULL);
+}
+
+void
+verify_code (gcc_jit_context *ctxt, gcc_jit_result *result)
+{
+  CHECK_VALUE (result, NULL);
+
+  /* Verify that the correct error messages were emitted.  */
+  CHECK_STRING_VALUE (gcc_jit_context_get_first_error (ctxt),
+		  "gcc_jit_block_add_assignment_op:"
+  " gcc_jit_block_add_assignment_op GCC_JIT_BINARY_OP_PLUS"
+  " has non-numeric lvalue variable (type: const char *)");
+}
+
-- 
2.43.0



[PATCH] libgccjit: Make is_int return false on vector types

2023-12-07 Thread Antoni Boucher
Hi.
This patch changes the function is_int to return false on vector types.
Thanks for the review.
From 60ebfb998bd349ca2f05b115de5452378027e4de Mon Sep 17 00:00:00 2001
From: Antoni Boucher 
Date: Thu, 26 Oct 2023 19:17:55 -0400
Subject: [PATCH] libgccjit: Make is_int return false on vector types

gcc/jit/ChangeLog:

	* jit-recording.h (is_numeric_vector, vector_type::new_int): New
	functions.
	* libgccjit.cc (gcc_jit_context_new_unary_op,
	gcc_jit_context_new_binary_op): add checks for
	is_numeric_vector.

gcc/testsuite/ChangeLog:

	* jit.dg/test-reflection.c: Add check to make sure
	gcc_jit_type_is_integral returns 0 on a vector type.
---
 gcc/jit/jit-recording.h| 12 +++-
 gcc/jit/libgccjit.cc   |  4 ++--
 gcc/testsuite/jit.dg/test-reflection.c |  1 +
 3 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/gcc/jit/jit-recording.h b/gcc/jit/jit-recording.h
index 4a8082991fb..ffadbe968af 100644
--- a/gcc/jit/jit-recording.h
+++ b/gcc/jit/jit-recording.h
@@ -567,6 +567,7 @@ public:
   virtual bool is_int () const = 0;
   virtual bool is_float () const = 0;
   virtual bool is_bool () const = 0;
+  virtual bool is_numeric_vector () const { return false; }
   virtual type *is_pointer () = 0;
   virtual type *is_volatile () { return NULL; }
   virtual type *is_restrict () { return NULL; }
@@ -701,9 +702,10 @@ public:
 
   size_t get_size () final override { return m_other_type->get_size (); };
 
-  bool is_int () const final override { return m_other_type->is_int (); }
+  bool is_int () const override { return m_other_type->is_int (); }
   bool is_float () const final override { return m_other_type->is_float (); }
   bool is_bool () const final override { return m_other_type->is_bool (); }
+  bool is_numeric_vector () const override { return m_other_type->is_numeric_vector (); }
   type *is_pointer () final override { return m_other_type->is_pointer (); }
   type *is_array () final override { return m_other_type->is_array (); }
   struct_ *is_struct () final override { return m_other_type->is_struct (); }
@@ -826,6 +828,14 @@ public:
   : decorated_type (other_type),
 m_num_units (num_units) {}
 
+  bool is_int () const final override {
+return false;
+  }
+
+  bool is_numeric_vector () const final override {
+return true;
+  }
+
   size_t get_num_units () const { return m_num_units; }
 
   vector_type *dyn_cast_vector_type () final override { return this; }
diff --git a/gcc/jit/libgccjit.cc b/gcc/jit/libgccjit.cc
index 0451b4df7f9..852f4103839 100644
--- a/gcc/jit/libgccjit.cc
+++ b/gcc/jit/libgccjit.cc
@@ -2114,7 +2114,7 @@ gcc_jit_context_new_unary_op (gcc_jit_context *ctxt,
 op);
   RETURN_NULL_IF_FAIL (result_type, ctxt, loc, "NULL result_type");
   RETURN_NULL_IF_FAIL_PRINTF3 (
-result_type->is_numeric (), ctxt, loc,
+result_type->is_numeric () || result_type->is_numeric_vector (), ctxt, loc,
 "gcc_jit_unary_op %s with operand %s "
 "has non-numeric result_type: %s",
 gcc::jit::unary_op_reproducer_strings[op],
@@ -2171,7 +2171,7 @@ gcc_jit_context_new_binary_op (gcc_jit_context *ctxt,
 b->get_debug_string (),
 b->get_type ()->get_debug_string ());
   RETURN_NULL_IF_FAIL_PRINTF4 (
-result_type->is_numeric (), ctxt, loc,
+result_type->is_numeric () || result_type->is_numeric_vector (), ctxt, loc,
 "gcc_jit_binary_op %s with operands a: %s b: %s "
 "has non-numeric result_type: %s",
 gcc::jit::binary_op_reproducer_strings[op],
diff --git a/gcc/testsuite/jit.dg/test-reflection.c b/gcc/testsuite/jit.dg/test-reflection.c
index 112a2455c07..afa76ff81f6 100644
--- a/gcc/testsuite/jit.dg/test-reflection.c
+++ b/gcc/testsuite/jit.dg/test-reflection.c
@@ -59,6 +59,7 @@ verify_code (gcc_jit_context *ctxt, gcc_jit_result *result)
   CHECK (vec_type != double_type);
   CHECK_VALUE (gcc_jit_vector_type_get_element_type(vector_type), double_type);
   CHECK_VALUE (gcc_jit_vector_type_get_num_units(vector_type), 4);
+  CHECK (!gcc_jit_type_is_integral(vec_type));
 
   CHECK (!gcc_jit_type_is_pointer(double_type));
   CHECK_VALUE (gcc_jit_type_is_pointer(gcc_jit_type_get_pointer(double_type)), double_type);
-- 
2.43.0



[PATCH] libgccjit: Make new_array_type take unsigned long

2023-12-07 Thread Antoni Boucher
Hi.
This patches update gcc_jit_context_new_array_type to take the size as
an unsigned long instead of a int, to allow creating bigger array
types.

I haven't written the ChangeLog yet because I wasn't sure it's allowed
to change the type of a function like that.
If it isn't, what would you suggest?

Thanks.
From 59b7e8af99d5f680e6d622631b1b75609fe1b982 Mon Sep 17 00:00:00 2001
From: Antoni Boucher 
Date: Sat, 4 Mar 2023 00:44:49 -0500
Subject: [PATCH] libgccjit: Make new_array_type take unsigned long

---
 gcc/jit/jit-playback.cc  | 2 +-
 gcc/jit/jit-playback.h   | 2 +-
 gcc/jit/jit-recording.cc | 6 +++---
 gcc/jit/jit-recording.h  | 8 
 gcc/jit/libgccjit.cc | 2 +-
 gcc/jit/libgccjit.h  | 2 +-
 6 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/gcc/jit/jit-playback.cc b/gcc/jit/jit-playback.cc
index 537f3b1..9da05b0b4b1 100644
--- a/gcc/jit/jit-playback.cc
+++ b/gcc/jit/jit-playback.cc
@@ -326,7 +326,7 @@ playback::type *
 playback::context::
 new_array_type (playback::location *loc,
 		playback::type *element_type,
-		int num_elements)
+		unsigned long num_elements)
 {
   gcc_assert (element_type);
 
diff --git a/gcc/jit/jit-playback.h b/gcc/jit/jit-playback.h
index b0166f8f6ce..848cb5f25e8 100644
--- a/gcc/jit/jit-playback.h
+++ b/gcc/jit/jit-playback.h
@@ -69,7 +69,7 @@ public:
   type *
   new_array_type (location *loc,
 		  type *element_type,
-		  int num_elements);
+		  unsigned long num_elements);
 
   field *
   new_field (location *loc,
diff --git a/gcc/jit/jit-recording.cc b/gcc/jit/jit-recording.cc
index 9b5b8005ebe..4ab4f0df25b 100644
--- a/gcc/jit/jit-recording.cc
+++ b/gcc/jit/jit-recording.cc
@@ -840,7 +840,7 @@ recording::context::get_int_type (int num_bytes, int is_signed)
 recording::type *
 recording::context::new_array_type (recording::location *loc,
 recording::type *element_type,
-int num_elements)
+unsigned long num_elements)
 {
   if (struct_ *s = element_type->dyn_cast_struct ())
 if (!s->get_fields ())
@@ -3113,7 +3113,7 @@ recording::string *
 recording::array_type::make_debug_string ()
 {
   return string::from_printf (m_ctxt,
-			  "%s[%d]",
+			  "%s[%ld]",
 			  m_element_type->get_debug_string (),
 			  m_num_elements);
 }
@@ -3129,7 +3129,7 @@ recording::array_type::write_reproducer (reproducer )
 	   "gcc_jit_context_new_array_type (%s,\n"
 	   "%s, /* gcc_jit_location *loc */\n"
 	   "%s, /* gcc_jit_type *element_type */\n"
-	   "%i); /* int num_elements */\n",
+	   "%li); /* int num_elements */\n",
 	   id,
 	   r.get_identifier (get_context ()),
 	   r.get_identifier (m_loc),
diff --git a/gcc/jit/jit-recording.h b/gcc/jit/jit-recording.h
index 4a8082991fb..0bb035a5ae5 100644
--- a/gcc/jit/jit-recording.h
+++ b/gcc/jit/jit-recording.h
@@ -91,7 +91,7 @@ public:
   type *
   new_array_type (location *loc,
 		  type *element_type,
-		  int num_elements);
+		  unsigned long num_elements);
 
   field *
   new_field (location *loc,
@@ -859,7 +859,7 @@ class array_type : public type
   array_type (context *ctxt,
 	  location *loc,
 	  type *element_type,
-	  int num_elements)
+	  unsigned long num_elements)
   : type (ctxt),
 m_loc (loc),
 m_element_type (element_type),
@@ -873,7 +873,7 @@ class array_type : public type
   bool is_bool () const final override { return false; }
   type *is_pointer () final override { return NULL; }
   type *is_array () final override { return m_element_type; }
-  int num_elements () { return m_num_elements; }
+  unsigned long num_elements () { return m_num_elements; }
   bool is_signed () const final override { return false; }
 
   void replay_into (replayer *) final override;
@@ -885,7 +885,7 @@ class array_type : public type
  private:
   location *m_loc;
   type *m_element_type;
-  int m_num_elements;
+  unsigned long m_num_elements;
 };
 
 class function_type : public type
diff --git a/gcc/jit/libgccjit.cc b/gcc/jit/libgccjit.cc
index 0451b4df7f9..1424a5c305f 100644
--- a/gcc/jit/libgccjit.cc
+++ b/gcc/jit/libgccjit.cc
@@ -766,7 +766,7 @@ gcc_jit_type *
 gcc_jit_context_new_array_type (gcc_jit_context *ctxt,
 gcc_jit_location *loc,
 gcc_jit_type *element_type,
-int num_elements)
+unsigned long num_elements)
 {
   RETURN_NULL_IF_FAIL (ctxt, NULL, loc, "NULL context");
   JIT_LOG_FUNC (ctxt->get_logger ());
diff --git a/gcc/jit/libgccjit.h b/gcc/jit/libgccjit.h
index 749f6c24177..88399b6124d 100644
--- a/gcc/jit/libgccjit.h
+++ b/gcc/jit/libgccjit.h
@@ -661,7 +661,7 @@ extern gcc_jit_type *
 gcc_jit_context_new_array_type (gcc_jit_context *ctxt,
 gcc_jit_location *loc,
 gcc_jit_type *element_type,
-int num_elements);
+unsigned long num_elements);
 
 /* Struct-handling.  */
 
-- 
2.43.0



[PATCH] libgccjit: Fix get_size of size_t

2023-12-07 Thread Antoni Boucher
Hi.
This patch fixes getting the size of size_t (bug 112910).

There's one issue with this patch: like every other feature that checks
for target-specific stuff, it requires a compilation before actually
fetching the size of the type.
Which means that getting the size before a compilation might be wrong
(and I actually believe is wrong on x86-64).

I was wondering if we should always implicitely do the first
compilation to gather the correct info: this would fix this issue and
all the others that we have due to that.
I'm not sure what would be the performance implication.

Another solution that I have been thinking about for a while now would
be to have another frontend libgccaot (I don't like that name), which
is like libgccjit but removes the JIT part so that we get access to the
target stuff directly and would remove the need for having a seperation
between recording and playback as far as I understand.
That's a long-term solution, but I wanted to share the idea now and
gather your thoughts on that.

Thanks for the review.
From 37d25e55a0c79893c7ea7f4cb9f0842b8a9b4906 Mon Sep 17 00:00:00 2001
From: Antoni Boucher 
Date: Fri, 3 Nov 2023 17:49:18 -0400
Subject: [PATCH] libgccjit: Fix get_size of size_t

gcc/jit/ChangeLog:
	PR jit/112910
	* jit-recording.cc (recording::memento_of_get_type::get_size):
	Correctly compute the size of size_t.
---
 gcc/jit/jit-recording.cc | 29 -
 1 file changed, 28 insertions(+), 1 deletion(-)

diff --git a/gcc/jit/jit-recording.cc b/gcc/jit/jit-recording.cc
index 9b5b8005ebe..ea1f76d4415 100644
--- a/gcc/jit/jit-recording.cc
+++ b/gcc/jit/jit-recording.cc
@@ -2392,7 +2392,34 @@ recording::memento_of_get_type::get_size ()
   size = LONG_DOUBLE_TYPE_SIZE;
   break;
 case GCC_JIT_TYPE_SIZE_T:
-  size = MAX_BITS_PER_WORD;
+  /* Compare with tree.cc's build_common_tree_nodes.  */
+  if (strcmp (SIZE_TYPE, "unsigned int") == 0)
+size = INT_TYPE_SIZE;
+  else if (strcmp (SIZE_TYPE, "long unsigned int") == 0)
+size = LONG_TYPE_SIZE;
+  else if (strcmp (SIZE_TYPE, "long long unsigned int") == 0)
+size = LONG_LONG_TYPE_SIZE;
+  else if (strcmp (SIZE_TYPE, "short unsigned int") == 0)
+size = SHORT_TYPE_SIZE;
+  else
+  {
+int i;
+
+for (i = 0; i < NUM_INT_N_ENTS; i++)
+  if (int_n_enabled_p[i])
+  {
+fprintf (stderr, "%d\n", i);
+char name[50], altname[50];
+sprintf (name, "__int%d unsigned", int_n_data[i].bitsize);
+sprintf (altname, "__int%d__ unsigned", int_n_data[i].bitsize);
+
+if (strcmp (name, SIZE_TYPE) == 0 || strcmp (altname, SIZE_TYPE) == 0)
+{
+  return int_n_data[i].bitsize / BITS_PER_UNIT;
+}
+  }
+gcc_unreachable ();
+  }
   break;
 default:
   /* As this function is called by
-- 
2.43.0



[Committed V2] RISC-V: Support interleave vector with different step sequence

2023-12-07 Thread Juzhe-Zhong
This patch fixes 64 ICEs in full coverage testing since they happens due to 
same reason.

Before this patch:

internal compiler error: in expand_const_vector, at config/riscv/riscv-v.cc:1270

appears 400 times in full coverage testing report.

The root cause is we didn't support interleave vector with different steps.

Here is the story:

We already supported interleave with single same step, that is:
e.g. v = { 0, 100, 2, 102, 4, 104, ... }
This sequence can be interpreted as interleave vector by 2 seperate sequences:
sequence1 = { 0, 2, 4, ... } and sequence2 = { 100, 102, 104, ... }.
Their step are both 2.

However, we didn't support interleave vector when they have different steps 
which
cause ICE in such situations.

This patch support different steps interleaved vector for the following 2 
situations:

1. When vector can be extended EEW:

Case 1: { 0, 0, 1, 0, 2, 0, ... }
It's interleaved by sequence1 = { 0, 1, 2, ... } and sequence1 = { 0, 0, 0, ... 
}
Suppose the original vector can be extended EEW, e.g. mode = RVVM1SImode.
Then such interleaved vector can be achieved with { 1, 2, 3, ... } with 
RVVM1DImode.
So, for this situation the codegen is pretty efficient and clean:

.MASK_LEN_STORE (, 32B, { -1, ... }, 16, 0, { 0, 0, 1, 0, 2, 0, ... });

->
   vsetvli  a5,zero,e64,m8,ta,ma
   vid.vv8
   vsetivli zero,16,e32,m8,ta,ma
   vse32.v  v8,0(a4)

Case 2: { 0, 100, 1, 100, 2, 100, ... }

.MASK_LEN_STORE (, 32B, { -1, ... }, 16, 0, { 0, 100, 1, 100, 2, 100, ... });

->

   vsetvli  a1,zero,e64,m8,ta,ma
   vid.vv8
   li   a7,100
   vand.vx  v8,v8,a4
   vsetivli zero,16,e32,m8,ta,ma
   vse32.v  v8,0(a5)

2. When vector can't be extended EEW:

Since we can't use EEW = 64, for example, RVVM1SImode in -march=rv32gc_zve32f,
we use vmerge to combine the sequence.

.MASK_LEN_STORE (, 32B, { -1, ... }, 16, 0, { 200, 100, 201, 103, 202, 106, 
... });

1. Generate sequence1 = { 200, 200, 201, 201, 202, 202, ... } and sequence2 = { 
100, 100, 103, 103, 106, 106, ... }
2. Merge sequence1 and sequence2 with mask { 0, 1, 0, 1, ... }

gcc/ChangeLog:

* config/riscv/riscv-protos.h (expand_vec_series): Adapt function.
* config/riscv/riscv-v.cc (rvv_builder::double_steps_npatterns_p): New 
function.
(expand_vec_series): Adapt function.
(expand_const_vector): Support new interleave vector with different 
step.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/slp-interleave-1.c: New test.
* gcc.target/riscv/rvv/autovec/slp-interleave-2.c: New test.
* gcc.target/riscv/rvv/autovec/slp-interleave-3.c: New test.
* gcc.target/riscv/rvv/autovec/slp-interleave-4.c: New test.

---
 gcc/config/riscv/riscv-protos.h   |   2 +-
 gcc/config/riscv/riscv-v.cc   | 148 --
 .../riscv/rvv/autovec/slp-interleave-1.c  |  17 ++
 .../riscv/rvv/autovec/slp-interleave-2.c  |  18 +++
 .../riscv/rvv/autovec/slp-interleave-3.c  |  19 +++
 .../riscv/rvv/autovec/slp-interleave-4.c  |  19 +++
 6 files changed, 211 insertions(+), 12 deletions(-)
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/slp-interleave-1.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/slp-interleave-2.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/slp-interleave-3.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/slp-interleave-4.c

diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index bfbd2bf0d18..a6f204f3066 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -543,7 +543,7 @@ void expand_tuple_move (rtx *);
 bool expand_block_move (rtx, rtx, rtx);
 machine_mode preferred_simd_mode (scalar_mode);
 machine_mode get_mask_mode (machine_mode);
-void expand_vec_series (rtx, rtx, rtx);
+void expand_vec_series (rtx, rtx, rtx, rtx = 0);
 void expand_vec_init (rtx, rtx);
 void expand_vec_perm (rtx, rtx, rtx, rtx);
 void expand_select_vl (rtx *);
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 71cb7567f1a..9b99d0aca84 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -432,6 +432,7 @@ public:
 
   bool single_step_npatterns_p () const;
   bool npatterns_all_equal_p () const;
+  bool interleaved_stepped_npatterns_p () const;
 
   machine_mode new_mode () const { return m_new_mode; }
   scalar_mode inner_mode () const { return m_inner_mode; }
@@ -668,6 +669,27 @@ rvv_builder::single_step_npatterns_p () const
   return true;
 }
 
+/* Return true if the permutation consists of two
+   interleaved patterns with a constant step each.
+   TODO: We currently only support NPATTERNS = 2.  */
+bool
+rvv_builder::interleaved_stepped_npatterns_p () const
+{
+  if (npatterns () != 2 || nelts_per_pattern () != 3)
+return false;
+  for (unsigned int i = 0; i < npatterns (); i++)
+{
+  poly_int64 ele0 = rtx_to_poly_int64 (elt (i));
+ 

Re: [PATCH] treat argp-based mem as frame related in dse

2023-12-07 Thread Jeff Law




On 12/6/23 02:27, Jiufu Guo wrote:

Hi,

The issue mentioned in PR112525 would be able to be handled by
updating dse.cc to treat arg_pointer_rtx similarly with frame_pointer_rtx.
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=30271#c10 also mentioned
this idea.
 
One thing, arpg area may be used to pass argument to callee. So, it would

be needed to check if call insns are using that mem.

Bootstrap  pass on ppc64{,le} and x86_64.
Is this ok for trunk?

BR,
Jeff (Jiufu Guo)


PR rtl-optimization/112525

gcc/ChangeLog:

* dse.cc (get_group_info): Add arg_pointer_rtx as frame_related.
(check_mem_read_rtx): Add parameter to indicate if it is checking mem
for call insn.
(scan_insn): Add mem checking on call usage.

gcc/testsuite/ChangeLog:

* gcc.target/powerpc/pr112525.c: New test.
So conceptually the first chunk makes sense.  Though I do worry about 
Andrew's comment about it causing a bootstrap failure.  Even thought it 
was 15 years ago, it remains worrisome.




@@ -2368,7 +2370,8 @@ check_mem_read_rtx (rtx *loc, bb_info_t bb_info)
  
  	  /* If this read is just reading back something that we just

 stored, rewrite the read.  */
- if (store_info->rhs
+ if (!used_in_call
+ && store_info->rhs
  && store_info->group_id == -1
  && store_info->cse_base == base
  && known_subrange_p (offset, width, store_info->offset,
@@ -2650,6 +2653,12 @@ scan_insn (bb_info_t bb_info, rtx_insn *insn, int 
max_active_local_stores)
 that is not relative to the frame.  */
  add_non_frame_wild_read (bb_info);
  
+  for (rtx link = CALL_INSN_FUNCTION_USAGE (insn);

+  link != NULL_RTX;
+  link = XEXP (link, 1))
+   if (GET_CODE (XEXP (link, 0)) == USE && MEM_P (XEXP (XEXP (link, 0),0)))
+ check_mem_read_rtx ( (XEXP (link, 0),0), bb_info, true);
I'm having a bit of a hard time convincing myself this is correct 
though.  I can't see how rewriting the load to read the source of the 
prior store is unsafe.  If that fixes a problem, then it would seem like 
we've gone wrong before here -- perhaps failing to use the fusage loads 
to "kill" any available stores to the same or aliased memory locations.


Assuming we get to a point where we think this or something similar to 
it is safe, then we should retest pr30271 and if it's fixed reference it 
in the ChangeLog.


Jeff


Re: [PATCH v5] Introduce strub: machine-independent stack scrubbing

2023-12-07 Thread Alexandre Oliva
On Dec  6, 2023, Jan Hubicka  wrote:

> There is also access attribute which speaks directly about individual
> arugments, perhaps you want to drop this one too?

Ah, I've looked a little into it, and now I have a vague recollection of
why I don't mess with them: they only apply to arguments of pointer or
reference type, and those are not (supposed to be) affected by the
changes, not by indirection (which is what would make some "fn spec"
notes impossible to convey), not by insertion of synthetic parameters.
Those are placed at the end in part to avoid messing with
parameter-index attributes.  So attribute access can safely be left
alone.

Thanks for raising the issue.  Maybe there should be at least a comment
there, and perhaps some asserts to check that pointer and reference
types don't make to indirect_parms.

-- 
Alexandre Oliva, happy hackerhttps://FSFLA.org/blogs/lxo/
   Free Software Activist   GNU Toolchain Engineer
More tolerance and less prejudice are key for inclusion and diversity
Excluding neuro-others for not behaving ""normal"" is *not* inclusive


Re: [PATCH v5] Introduce strub: machine-independent stack scrubbing

2023-12-07 Thread Alexandre Oliva
On Dec  6, 2023, Jan Hubicka  wrote:

> I am sorry for sending this late.

No need to be sorry.  Thank you very much for taking the time to review
and comment on it.

> I think the ipa changes are generally fine.

Phew :-)

>> +static inline bool
>> +strub_always_inline_p (cgraph_node *node)
>> +{
>> +  return lookup_attribute ("always_inline", DECL_ATTRIBUTES (node->decl));
>> +}
> We may want to ahve this as cgraph_node::always_inline_p since there are
> now quite many places we look up this attribute.

Can do.  Would such a global refactoring still be welcome at this stage,
or should it be saved for stage1?  I guess it could still go in, so
simple it is...


>> +/* The strub pass proper adjusts types, signatures, and at-calls calls, and
>> +   splits internal-strub functions.  */
>> +
>> +unsigned int
>> +pass_ipa_strub::execute (function *)
>> +{
>> +  cgraph_node *onode;
>> +
>> +  ipa_strub_set_mode_for_new_functions ();
>> +
>> +  /* First, adjust the signature of at-calls functions.  We adjust types of
>> + at-calls functions first, so that we don't modify types in place unless
>> + strub is explicitly requested.  */

> I think Martin ma have more specific opinion on this, but since this is
> not running as the ipa pass during WPA stage, I think the param
> modification infrastructure is not really that much hepful here. 

Hmm...  I wonder if this is indeed what Martin refers to.  There are two
separate pieces of logic for parm-tweaking, one for "at-calls" strub
functions, that get the signature and the type of the function itself
modified (akin to adding the implicit "this" parameter to a C++
nonstatic member-function), and is implemented under the comment above,
and there's the splitting-out of "internal" strub function bodies into a
clone with a modified signature, that is implemented elsewhere.  The
latter uses cloning and thus (some, but not much) IPA param modification
infrastructure, but the former doesn't IIRC.

>> +/* ??? Maybe we could adjust it instead.  */
>> +if (drop_fnspec)
>> +  remove_named_attribute_unsharing ("fn spec",
>> +_ATTRIBUTES (nftype));

> ipa param modification also doesn't know how to update fn spec, this is
> something we should look into next stage1...
> There is also access attribute which speaks directly about individual
> arugments, perhaps you want to drop this one too?

Hmm, I can't recall whether I've come across it before (it sounds
vaguely familiar, but unless they become "fn spec" (ISTR synthetic "fn
spec"s), I think I'd have dealt with them already.

I'll dig it a little further.

> Are variadic thunks working with scrubbing?

Yeah, the wrapper is rewritten to call va_start itself, and the
split-out wrapped body is modified to call va_copy instead of va_start.

-- 
Alexandre Oliva, happy hackerhttps://FSFLA.org/blogs/lxo/
   Free Software Activist   GNU Toolchain Engineer
More tolerance and less prejudice are key for inclusion and diversity
Excluding neuro-others for not behaving ""normal"" is *not* inclusive


Re: [PATCH] RISC-V: Fix AVL propagation ICE for vleff/vlsegff

2023-12-07 Thread Jeff Law




On 12/7/23 05:15, Robin Dapp wrote:

LGTM.

Btw your vsetvl patch from yesterday fixes the vectorized
strlen/strcmp problems.  Those use vleff as first instruction.

Definitely good news.

jeff


Re: [PATCH] testsuite: scev: expect fail on ilp32

2023-12-07 Thread Jeff Law




On 12/7/23 09:33, Hans-Peter Nilsson wrote:

Date: Mon, 4 Dec 2023 12:58:03 +0100 (CET)
From: Richard Biener 



On Sat, 2 Dec 2023, Hans-Peter Nilsson wrote:

Date: Fri, 1 Dec 2023 08:07:14 +0100 (CET)
From: Richard Biener 
I read from your messages that the testcases pass on arm*-*-*?

Yes: they pass (currently XPASS) on arm-eabi and
arm-unknown-linux-gnueabi, default configurations.  But,
scev-3 and -5 fail with for example -mcpu=cortex-r5


I see.  As said, the testcases test for "cost" things, so that we
"regressed" might mean we really "regressed" here.  Even the x86 -m32
result is questionable.

Of course whether using a single IV makes sense for all archs is
unknown.

Btw, if we turn the testcases into ones that are (sub-)target
specific then we want to again use C code as input.

I think at this point we've lost track and I'm juggling between
removing the testcases or moving them to a place they succeed
(with some specific -mcpu=?)

Richard.


So to not drop the ball(s) on this, here's a patch with your
first alternative: remove them.

Ok?
OK, but give Richard until Monday PM to chime in if he wants to try and 
save them by putting them into a target specific directory.


jeff


Re: [PATCH V2 0/2] RISC-V: Add intrinsics for Bitmanip and Scalar Crypto extensions

2023-12-07 Thread Jeff Law




On 12/7/23 09:59, Christoph Müllner wrote:

On Thu, Dec 7, 2023 at 11:18 AM Liao Shihua  wrote:


In accordance with the suggestions of Christoph Müllner, the following 
amendments are made

Update v1 -> v2:
   1. Rename *_intrinsic-* to *_intrinsic-XLEN.
   2. Typo fix.
   3. Intrinsics with immediate arguments will use marcos at O0 .

It's a little patch add just provides a mapping from the RV intrinsics to the 
builtin
names within GCC.


Thanks for the update!

I think this patchset was not properly tested as I see the tests failing.

Thanks for pointing this out.

I think we're well past the point where if a patchkit doesn't specify 
how it was tested that it should be rejected.  This is standard 
procedure for the rest of the compiler and there's no good reason why we 
should have a lax policy in the RISC-V target files.


Jeff


[committed] libstdc++: Fix misleading typedef name in

2023-12-07 Thread Jonathan Wakely
Tested x86_64-linux. Pushed to trunk.

-- >8 --

This local typedef for uintptr_t was accidentally named uint64_t,
probably from a careless code completion shortcut. We don't need the
typedef at all since it's only used once. Just use __UINTPTR_TYPE__
directly instead.

libstdc++-v3/ChangeLog:

* include/std/format (_Iter_sink):
Remove uint64_t local type.
---
 libstdc++-v3/include/std/format | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/libstdc++-v3/include/std/format b/libstdc++-v3/include/std/format
index 01f0a58392a..04d03e0ceb7 100644
--- a/libstdc++-v3/include/std/format
+++ b/libstdc++-v3/include/std/format
@@ -2809,7 +2809,6 @@ namespace __format
 requires same_as, _CharT>
 class _Iter_sink<_CharT, _OutIter> : public _Sink<_CharT>
 {
-  using uint64_t = __UINTPTR_TYPE__;
   _OutIter _M_first;
   iter_difference_t<_OutIter> _M_max = -1;
 protected:
@@ -2883,7 +2882,7 @@ namespace __format
  return {__ptr, __bytes / sizeof(_CharT)};
 #endif
// Avoid forming a pointer to a different memory page.
-   uint64_t __off = reinterpret_cast(__ptr) % 1024;
+   const auto __off = reinterpret_cast<__UINTPTR_TYPE__>(__ptr) % 1024;
__n = (1024 - __off) / sizeof(_CharT);
if (__n > 0) [[likely]]
  return {__ptr, static_cast(__n)};
-- 
2.43.0



[committed] libstdc++: Use instead of in

2023-12-07 Thread Jonathan Wakely
Tested x86_64-linux. Pushed to trunk. I'll backport this too.

-- >8 --

In r14-5922-g6c8f2d3a08bc01 I added  to ,
so that uintptr_t is declared if that header is compiled as a header
unit. I used  because that's what  already includes,
so it seemed simpler to be consistent. However, this means that name
lookup for uintptr_t in  depends on whether
 has been included by another header first. Whether name lookup
finds std::uintptr_t or ::uintptr_t will depend on include order. This
causes problems when compiling modules with Clang:

bits/atomic_wait.h:251:7: error: 'std::__detail::__waiter_pool_base' has 
different definitions in different modules; first difference is defined here 
found method '_S_for' with body
  _S_for(const void* __addr) noexcept
  ^~~
bits/atomic_wait.h:251:7: note: but in 'tm.' found method '_S_for' with 
different body
  _S_for(const void* __addr) noexcept
  ^~~

By including  we would ensure that name lookup always finds the
name in namespace std. Alternatively, we can stop including 
for those types, so that we don't declare the entire contents of
 when we only need a couple of types from it. This patch does
the former, which is appropriate for backporting.

libstdc++-v3/ChangeLog:

* include/bits/atomic_wait.h: Include  instead of
.
---
 libstdc++-v3/include/bits/atomic_wait.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libstdc++-v3/include/bits/atomic_wait.h 
b/libstdc++-v3/include/bits/atomic_wait.h
index 1460b1d8d5c..8e01a9c518d 100644
--- a/libstdc++-v3/include/bits/atomic_wait.h
+++ b/libstdc++-v3/include/bits/atomic_wait.h
@@ -35,7 +35,7 @@
 #include 
 
 #if __glibcxx_atomic_wait
-#include 
+#include 
 #include 
 #include 
 #include 
-- 
2.43.0



[committed] libstdc++: Fix recent changes to __glibcxx_assert [PR112882]

2023-12-07 Thread Jonathan Wakely
Tested x86_64-linux. Pushed to trunk.

-- >8 --

The changes in r14-6198-g5e8a30d8b8f4d7 were broken, as I used
_GLIBCXX17_CONSTEXPR for the 'if _GLIBCXX17_CONSTEXPR (true)' condition,
forgetting that it would also be used for the is_constant_evaluated()
check. Using 'if constexpr (std::is_constant_evaluated())' is a bug.

Additionally, relying on __glibcxx_assert_fail to give a "not a constant
expression" error is a problem because at -O0 an undefined reference to
__glibcxx_assert_fail is present in the compiled code. This means you
can't use libstdc++ headers without also linking to libstdc++ for the
symbol definition.

This fix rewrites the __glibcxx_assert macro again. This still avoids
doing the duplicate checks, once for constexpr and once at runtime (if
_GLIBCXX_ASSERTIONS is defined). When _GLIBCXX_ASSERTIONS is defined we
still rely on __glibcxx_assert_fail to give a "not a constant
expression" error during constant evaluation (because when assertions
are defined it's not a problem to emit a reference to the symbol). But
when that macro is not defined, we use a new inline (but not constexpr)
overload of __glibcxx_assert_fail to cause compilation to fail. That
inline function doesn't cause an undefined reference to a symbol in the
library (and will be optimized away anyway).

We can also add always_inline to the __is_constant_evaluated function,
although this doesn't actually matter for -O0 and it's always inlined
with any optimization enabled.

libstdc++-v3/ChangeLog:

PR libstdc++/112882
* include/bits/c++config (__is_constant_evaluated): Add
always_inline attribute.
(_GLIBCXX_DO_ASSERT): Remove macro.
(__glibcxx_assert): Define separately for assertions-enabled and
constexpr-only cases.
---
 libstdc++-v3/include/bits/c++config | 33 -
 1 file changed, 23 insertions(+), 10 deletions(-)

diff --git a/libstdc++-v3/include/bits/c++config 
b/libstdc++-v3/include/bits/c++config
index 284d24d933f..25d37428fc1 100644
--- a/libstdc++-v3/include/bits/c++config
+++ b/libstdc++-v3/include/bits/c++config
@@ -538,6 +538,7 @@ namespace std
   // This can be used without checking if the compiler supports the feature.
   // The macro _GLIBCXX_HAVE_IS_CONSTANT_EVALUATED can be used to check if
   // the compiler support is present to make this function work as expected.
+  __attribute__((__always_inline__))
   _GLIBCXX_CONSTEXPR inline bool
   __is_constant_evaluated() _GLIBCXX_NOEXCEPT
   {
@@ -598,19 +599,31 @@ namespace std
 #endif
 
 #if defined(_GLIBCXX_ASSERTIONS)
-# define _GLIBCXX_DO_ASSERT true
-#elif _GLIBCXX_HAVE_IS_CONSTANT_EVALUATED
-# define _GLIBCXX_DO_ASSERT std::__is_constant_evaluated()
-#else
-# define _GLIBCXX_DO_ASSERT false
-#endif
-
+// Enable runtime assertion checks, and also check in constant expressions.
 # define __glibcxx_assert(cond)
\
   do { \
-if _GLIBCXX17_CONSTEXPR (_GLIBCXX_DO_ASSERT)   \
-  if (__builtin_expect(!bool(cond), false))
\
-   _GLIBCXX_ASSERT_FAIL(cond); \
+if (__builtin_expect(!bool(cond), false))  \
+  _GLIBCXX_ASSERT_FAIL(cond);  \
   } while (false)
+#elif _GLIBCXX_HAVE_IS_CONSTANT_EVALUATED
+// Only check assertions during constant evaluation.
+namespace std
+{
+  __attribute__((__always_inline__,__visibility__("default")))
+  inline void
+  __glibcxx_assert_fail()
+  { }
+}
+# define __glibcxx_assert(cond)
\
+  do { \
+if (std::__is_constant_evaluated())
\
+  if (__builtin_expect(!bool(cond), false))
\
+   std::__glibcxx_assert_fail();   \
+  } while (false)
+#else
+// Don't check any assertions.
+# define __glibcxx_assert(cond)
+#endif
 
 // Macro indicating that TSAN is in use.
 #if __SANITIZE_THREAD__
-- 
2.43.0



Re: [PATCH v7] Introduce attribute sym_alias

2023-12-07 Thread Alexandre Oliva
On Dec  6, 2023, Jan Hubicka  wrote:

>> On Nov 30, 2023, Jan Hubicka  wrote:
>> 
>> >> +  if (VAR_P (replaced))
>> >> + varpool_node::create_alias (sym_node->decl, replacement);
>> >> +  else
>> >> + cgraph_node::create_alias (sym_node->decl, replacement);
>> 
>> Unfortunately, this change didn't work.  Several of the C++ tests
>> regressed with it.  Going back to same-body aliases, they work.
>> 
>> I suspect this may have to do with the infrastructure put in to deal
>> with cdtors clones.

> Do you have short testcase for this?

attr-sym-alias-[13].C are not too big, and show various regressions with
the incremental patchlet below (to be applied on top of v7], but here's
a minimal testcase that triggers the problem:

struct foo {
  __attribute__ ((__sym_alias__ ("FOODTR_A"))) ~foo() {}
};
foo bar;

> THe main oddities with same body
> aliases comes from the fact that C++ FE creates them early during
> parsing before all declaration flags are finished.

*nod*, this is probably why some of the cdtor and even sym_aliases for
inline member functions (FOOBAR_B and FOOBAR_C in attr-sym-alias-1.C,
and FOOCLS_INT_VIRT in attr-sym-alias-3.C) fail to be output when not
using create_same_body_alias.

diff --git a/gcc/attribs.cc b/gcc/attribs.cc
index abfbbbf6294..65ce610f2d4 100644
--- a/gcc/attribs.cc
+++ b/gcc/attribs.cc
@@ -2717,8 +2717,8 @@ create_sym_alias_decl (tree decl, tree id)
 // node = varpool_node::create_extra_name_alias (clone, decl);
 node = varpool_node::create_alias (clone, decl);
   else
-node = cgraph_node::create_same_body_alias (clone, decl);
-// node = cgraph_node::create_alias (clone, decl);
+// node = cgraph_node::create_same_body_alias (clone, decl);
+node = cgraph_node::create_alias (clone, decl);
   if (symtab_node *dnode = symtab_node::get_create (decl))
 node->copy_visibility_from (dnode);
 
diff --git a/gcc/symtab.cc b/gcc/symtab.cc
index 44df52095c1..e40240077f2 100644
--- a/gcc/symtab.cc
+++ b/gcc/symtab.cc
@@ -1993,8 +1993,8 @@ symtab_node::remap_sym_alias_target (tree replaced, tree 
replacement)
// varpool_node::create_extra_name_alias (sym_node->decl, replacement);
varpool_node::create_alias (sym_node->decl, replacement);
   else
-   cgraph_node::create_same_body_alias (sym_node->decl, replacement);
-   // cgraph_node::create_alias (sym_node->decl, replacement);
+   // cgraph_node::create_same_body_alias (sym_node->decl, replacement);
+   cgraph_node::create_alias (sym_node->decl, replacement);
   sym_node->copy_visibility_from (repl_node);
 }
 }

> Fixup copies some flags such as inline flags, visibility and comdat
> groups which can change during parsing process.

*nod*, I've run into some of that, and had to add visibility propagation
to the sym_aliases to make up for it.

But I'm not sure that that's the issue you're getting at.  Some
sym_aliases don't even get output with this patchlet.  FOODTR_A* aliases
get created during parsing, when maybe_clone_body creates the dtor
clones and their cgraph nodes to set their comdat group.  ISTM that it's
the later visibility copying because of same body alias that enables the
alias declaration to get the same (final) visibility as the declarations
they alias.  Which suggests that there could be another way to ensure
the update takes place, but the best spot for it has so far eluded me.

-- 
Alexandre Oliva, happy hackerhttps://FSFLA.org/blogs/lxo/
   Free Software Activist   GNU Toolchain Engineer
More tolerance and less prejudice are key for inclusion and diversity
Excluding neuro-others for not behaving ""normal"" is *not* inclusive


[PATCH] RISC-V: XFAIL scan dump fails for autovec PR111311

2023-12-07 Thread Edwin Lu
Clean up scan dump failures Juzhe mentioned could be ignored for now
with vector enabled. This will help reduce noise and make it more obvious
if a bug or regression is introduced. The failures that are still reported
are either execution failures or failures that are also present on armv8-a+sve

gcc/testsuite/ChangeLog:

* c-c++-common/vector-subscript-4.c: xfail testcase
* g++.dg/tree-ssa/pr83518.C: ditto
* gcc.dg/signbit-2.c: ditto
* gcc.dg/tree-ssa/cunroll-16.c: ditto
* gcc.dg/tree-ssa/gen-vect-34.c: ditto
* gcc.dg/tree-ssa/loop-bound-1.c: ditto
* gcc.dg/tree-ssa/loop-bound-2.c: ditto
* gcc.dg/tree-ssa/pr84512.c: remove xfail
* gcc.dg/tree-ssa/predcom-4.c: xfail testcase
* gcc.dg/tree-ssa/predcom-5.c: ditto
* gcc.dg/tree-ssa/predcom-9.c: ditto
* gcc.dg/tree-ssa/reassoc-46.c: ditto
* gcc.dg/tree-ssa/scev-10.c: ditto
* gcc.dg/tree-ssa/scev-11.c: ditto
* gcc.dg/tree-ssa/scev-14.c: ditto
* gcc.dg/tree-ssa/scev-9.c: ditto
* gcc.dg/tree-ssa/split-path-11.c: ditto
* gcc.dg/unroll-8.c: ditto
* gcc.dg/var-expand1.c: ditto
* gcc.dg/vect/pr103116-1.c: ditto
* gcc.dg/vect/pr103116-2.c: ditto
* gfortran.dg/vect/pr83232.f90: ditto
* gfortran.dg/vect/vect-8.f90: ditto

Signed-off-by: Edwin Lu 
---
 gcc/testsuite/c-c++-common/vector-subscript-4.c | 3 ++-
 gcc/testsuite/g++.dg/tree-ssa/pr83518.C | 2 +-
 gcc/testsuite/gcc.dg/signbit-2.c| 5 +++--
 gcc/testsuite/gcc.dg/tree-ssa/cunroll-16.c  | 5 +++--
 gcc/testsuite/gcc.dg/tree-ssa/gen-vect-34.c | 3 ++-
 gcc/testsuite/gcc.dg/tree-ssa/loop-bound-1.c| 3 ++-
 gcc/testsuite/gcc.dg/tree-ssa/loop-bound-2.c| 3 ++-
 gcc/testsuite/gcc.dg/tree-ssa/pr84512.c | 2 +-
 gcc/testsuite/gcc.dg/tree-ssa/predcom-4.c   | 5 +++--
 gcc/testsuite/gcc.dg/tree-ssa/predcom-5.c   | 5 +++--
 gcc/testsuite/gcc.dg/tree-ssa/predcom-9.c   | 3 ++-
 gcc/testsuite/gcc.dg/tree-ssa/reassoc-46.c  | 3 ++-
 gcc/testsuite/gcc.dg/tree-ssa/scev-10.c | 3 ++-
 gcc/testsuite/gcc.dg/tree-ssa/scev-11.c | 3 ++-
 gcc/testsuite/gcc.dg/tree-ssa/scev-14.c | 4 +++-
 gcc/testsuite/gcc.dg/tree-ssa/scev-9.c  | 3 ++-
 gcc/testsuite/gcc.dg/tree-ssa/split-path-11.c   | 3 ++-
 gcc/testsuite/gcc.dg/unroll-8.c | 8 +---
 gcc/testsuite/gcc.dg/var-expand1.c  | 3 ++-
 gcc/testsuite/gcc.dg/vect/pr103116-1.c  | 3 ++-
 gcc/testsuite/gcc.dg/vect/pr103116-2.c  | 3 ++-
 gcc/testsuite/gfortran.dg/vect/pr83232.f90  | 3 ++-
 gcc/testsuite/gfortran.dg/vect/vect-8.f90   | 3 ++-
 23 files changed, 52 insertions(+), 29 deletions(-)

diff --git a/gcc/testsuite/c-c++-common/vector-subscript-4.c 
b/gcc/testsuite/c-c++-common/vector-subscript-4.c
index 2c2481f88b7..eb0bca1c19e 100644
--- a/gcc/testsuite/c-c++-common/vector-subscript-4.c
+++ b/gcc/testsuite/c-c++-common/vector-subscript-4.c
@@ -25,5 +25,6 @@ foobar(16)
 foobar(32)
 foobar(64)
 
+/* Xfail riscv PR112531.  */
 /* Verify we don't have any vector temporaries in the IL.  */
-/* { dg-final { scan-tree-dump-not "vector" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "vector" "optimized" { xfail { riscv_v && 
vect_variable_length } } } } */
diff --git a/gcc/testsuite/g++.dg/tree-ssa/pr83518.C 
b/gcc/testsuite/g++.dg/tree-ssa/pr83518.C
index b8a2bd1ebbd..6f2fc56c82c 100644
--- a/gcc/testsuite/g++.dg/tree-ssa/pr83518.C
+++ b/gcc/testsuite/g++.dg/tree-ssa/pr83518.C
@@ -24,4 +24,4 @@ unsigned test()
   return sum;
 }
 
-/* { dg-final { scan-tree-dump "return 15;" "optimized" { xfail 
vect_variable_length } } } */
+/* { dg-final { scan-tree-dump "return 15;" "optimized" { xfail { 
vect_variable_length && aarch64*-*-* } } } } */
diff --git a/gcc/testsuite/gcc.dg/signbit-2.c b/gcc/testsuite/gcc.dg/signbit-2.c
index 62bb4047d74..2a32568de9e 100644
--- a/gcc/testsuite/gcc.dg/signbit-2.c
+++ b/gcc/testsuite/gcc.dg/signbit-2.c
@@ -22,6 +22,7 @@ void fun2(int32_t *x, int n)
 }
 
 /* Xfail amdgcn where vector truth type is not integer type.  */
-/* { dg-final { scan-tree-dump {\s+>\s+\{ 0(, 0)+ \}} optimized { target 
vect_shift xfail amdgcn-*-* } } } */
+/* Xfail riscv PR111311.  */
+/* { dg-final { scan-tree-dump {\s+>\s+\{ 0(, 0)+ \}} optimized { target 
vect_shift xfail { amdgcn-*-* || { riscv_v && vect_variable_length } } } } } */
 /* { dg-final { scan-tree-dump {\s+>\s+0} optimized { target { ! vect_shift } 
} } } */
-/* { dg-final { scan-tree-dump-not {\s+>>\s+31} optimized { xfail amdgcn-*-* } 
} } */
+/* { dg-final { scan-tree-dump-not {\s+>>\s+31} optimized { xfail { amdgcn-*-* 
|| { riscv_v && vect_variable_length } } } } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/cunroll-16.c 
b/gcc/testsuite/gcc.dg/tree-ssa/cunroll-16.c
index 9bb66ff8299..53bb75e9c17 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/cunroll-16.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/cunroll-16.c

Re: [Committed] RISC-V: Remove xfail from ssa-fre-3.c testcase

2023-12-07 Thread Edwin Lu

Committed!

On 12/6/2023 8:22 AM, Palmer Dabbelt wrote:

On Tue, 05 Dec 2023 16:39:06 PST (-0800), e...@rivosinc.com wrote:
Ran the test case at 122e7b4f9d0c2d54d865272463a1d812002d0a5c where 
the xfail


That's the original port submission, I'm actually kind of surprised it 
still builds/works at all.


was introduced. The test did pass at that hash and has continued to 
pass since

then. Remove the xfail

gcc/testsuite/ChangeLog:

* gcc.dg/tree-ssa/ssa-fre-3.c: Remove xfail

Signed-off-by: Edwin Lu 
---
 gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-3.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-3.c 
b/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-3.c

index 224dd4f72ef..b2924837a22 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-3.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-3.c
@@ -18,4 +18,4 @@ foo (int a, int b)
   return aa + bb;
 }

-/* { dg-final { scan-tree-dump "Replaced \\\(int\\\) aa_.*with a_" 
"fre1" { xfail { riscv*-*-* && lp64 } } } } */
+/* { dg-final { scan-tree-dump "Replaced \\\(int\\\) aa_.*with a_" 
"fre1" } } */


Reviewed-by: Palmer Dabbelt 

Though Kito did all the test suite stuff back then, so not sure if he 
happens to remember anything specific about what was going on.


Thanks!


Re: [PATCH 3/4] libgcc: aarch64: Add SME runtime support

2023-12-07 Thread Szabolcs Nagy
The 12/07/2023 17:36, Richard Sandiford wrote:
> Szabolcs Nagy  writes:
> > +
> > +#include "auto-target.h"
> > +#include 
> > +

sorry, this seems to fail when building --without-headers

i will respin this, handling the 'inhibit_libc' case.


Re: [PING][PATCH 2/2] arm: Add support for MVE Tail-Predicated Low Overhead Loops

2023-12-07 Thread Andre Vieira (lists)
Thanks for addressing my comments. I have reviewed this and the other 
patch before and they LGTM. I however do not have approval rights so you 
will need the OK from a maintainer.


Thanks for doing this :)

Andre

On 30/11/2023 12:55, Stamatis Markianos-Wright wrote:

Hi Andre,

Thanks for the comments, see latest revision attached.

On 27/11/2023 12:47, Andre Vieira (lists) wrote:

Hi Stam,

Just some comments.

+/* Recursively scan through the DF chain backwards within the basic 
block and
+   determine if any of the USEs of the original insn (or the USEs of 
the insns
s/Recursively scan/Scan/ as you no longer recurse, thanks for that by 
the way :) +   where thy were DEF-ed, etc., recursively) were affected 
by implicit VPT

remove recursively for the same reasons.

+  if (!CONST_INT_P (cond_counter_iv.step) || !CONST_INT_P 
(cond_temp_iv.step))

+    return NULL;
+  /* Look at the steps and swap around the rtx's if needed. Error 
out if

+ one of them cannot be identified as constant.  */
+  if (INTVAL (cond_counter_iv.step) != 0 && INTVAL 
(cond_temp_iv.step) != 0)

+    return NULL;

Move the comment above the if before, as the erroring out it talks 
about is there.

Done


+  emit_note_after ((enum insn_note)NOTE_KIND (insn), BB_END (body));
 space after 'insn_note)'

@@ -173,14 +176,14 @@ doloop_condition_get (rtx_insn *doloop_pat)
   if (! REG_P (reg))
 return 0;
 -  /* Check if something = (plus (reg) (const_int -1)).
+  /* Check if something = (plus (reg) (const_int -n)).
  On IA-64, this decrement is wrapped in an if_then_else.  */
   inc_src = SET_SRC (inc);
   if (GET_CODE (inc_src) == IF_THEN_ELSE)
 inc_src = XEXP (inc_src, 1);
   if (GET_CODE (inc_src) != PLUS
   || XEXP (inc_src, 0) != reg
-  || XEXP (inc_src, 1) != constm1_rtx)
+  || !CONST_INT_P (XEXP (inc_src, 1)))

Do we ever check that inc_src is negative? We used to check if it was 
-1, now we only check it's a constnat, but not a negative one, so I 
suspect this needs a:

|| INTVAL (XEXP (inc_src, 1)) >= 0

Good point. Done


@@ -492,7 +519,8 @@ doloop_modify (class loop *loop, class niter_desc 
*desc,

 case GE:
   /* Currently only GE tests against zero are supported.  */
   gcc_assert (XEXP (condition, 1) == const0_rtx);
-
+  /* FALLTHRU */
+    case GTU:
   noloop = constm1_rtx;

I spent a very long time staring at this trying to understand why 
noloop = constm1_rtx for GTU, where I thought it should've been (count 
& (n-1)). For the current use of doloop it doesn't matter because ARM 
is the only target using it and you set desc->noloop_assumptions to 
null_rtx in 'arm_attempt_dlstp_transform' so noloop is never used. 
However, if a different target accepts this GTU pattern then this 
target agnostic code will do the wrong thing.  I suggest we either:
 - set noloop to what we think might be the correct value, which if 
you ask me should be 'count & (XEXP (condition, 1))',
 - or add a gcc_assert (GET_CODE (condition) != GTU); under the if 
(desc->noloop_assumption); part and document why.  I have a slight 
preference for the assert given otherwise we are adding code that we 
can't test.


Yea, that's true tbh. I've done the latter, but also separated out the 
"case GTU:" and added a comment, so that it's more clear that the noloop 
things aren't used in the only implemented GTU case (Arm)


Thank you :)



LGTM otherwise (but I don't have the power to approve this ;)).

Kind regards,
Andre

From: Stamatis Markianos-Wright 
Sent: Thursday, November 16, 2023 11:36 AM
To: Stamatis Markianos-Wright via Gcc-patches; Richard Earnshaw; 
Richard Sandiford; Kyrylo Tkachov
Subject: [PING][PATCH 2/2] arm: Add support for MVE Tail-Predicated 
Low Overhead Loops


Pinging back to the top of reviewers' inboxes due to worry about Stage 1
End in a few days :)


See the last email for the latest version of the 2/2 patch. The 1/2
patch is A-Ok from Kyrill's earlier target-backend review.


On 10/11/2023 12:41, Stamatis Markianos-Wright wrote:


On 06/11/2023 17:29, Stamatis Markianos-Wright wrote:


On 06/11/2023 11:24, Richard Sandiford wrote:

Stamatis Markianos-Wright  writes:
One of the main reasons for reading the arm bits was to try to 
answer

the question: if we switch to a downcounting loop with a GE
condition,
how do we make sure that the start value is not a large unsigned
number that is interpreted as negative by GE?  E.g. if the loop
originally counted up in steps of N and used an LTU condition,
it could stop at a value in the range [INT_MAX + 1, UINT_MAX].
But the loop might never iterate if we start counting down from
most values in that range.

Does the patch handle that?

So AFAICT this is actually handled in the generic code in
`doloop_valid_p`:

This kind of loops fail because of they are "desc->infinite", then no
loop-doloop conversion is attempted at all (even for standard
dls/le loops)

Thanks to that check I haven't 

[PATCH]

2023-12-07 Thread Alexandre Oliva
On Dec  7, 2023, Thomas Schwinge  wrote:

> Thank you for looking into this so promptly!

You're welcome ;-)


> during IPA pass: emutls
> [...]/source-gcc/gcc/testsuite/c-c++-common/strub-unsupported-3.c:18:1: 
> internal compiler error: in verify_curr_properties, at passes.cc:2198

Aah, this smells a lot like the issue that François-Xavier reported,
that the following patch is expected to fix.  I'm still regstrapping it
on x86_64-linux-gnu, after checking that it addressed the symptom on a
cross compiler to the target for which it had originally been reported.
Ok to install, once you confirm that it cures these ICEs?


strub: skip emutls after strubm errors

The emutls pass requires PROP_ssa, but if the strubm pass (or any
other pre-SSA pass) issues errors, all of the build_ssa_passes are
skipped, so the property is not set, but emutls still attempts to run,
on targets that use it, despite earlier errors, so it hits the
unsatisfied requirement.

Adjust emutls to be skipped in case of earlier errors.


for  gcc/ChangeLog

* tree-emutls.cc: Include diagnostic-core.h.
(pass_ipa_lower_emutls::gate): Skip if errors were seen.
---
 gcc/tree-emutls.cc |3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/gcc/tree-emutls.cc b/gcc/tree-emutls.cc
index 5dca5a8291356..38de202717a1a 100644
--- a/gcc/tree-emutls.cc
+++ b/gcc/tree-emutls.cc
@@ -35,6 +35,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "langhooks.h"
 #include "tree-iterator.h"
 #include "gimplify.h"
+#include "diagnostic-core.h" /* for seen_error */
 
 /* Whenever a target does not support thread-local storage (TLS) natively,
we can emulate it with some run-time support in libgcc.  This will in
@@ -841,7 +842,7 @@ public:
   bool gate (function *) final override
 {
   /* If the target supports TLS natively, we need do nothing here.  */
-  return !targetm.have_tls;
+  return !targetm.have_tls && !seen_error ();
 }
 
   unsigned int execute (function *) final override


-- 
Alexandre Oliva, happy hackerhttps://FSFLA.org/blogs/lxo/
   Free Software Activist   GNU Toolchain Engineer
More tolerance and less prejudice are key for inclusion and diversity
Excluding neuro-others for not behaving ""normal"" is *not* inclusive


Re: [PATCH v2] ifcvt: Handle multiple rewired regs and refactor noce_convert_multiple_sets

2023-12-07 Thread Manolis Tsamis
On Thu, Nov 23, 2023 at 11:01 PM Richard Sandiford
 wrote:
>
> Manolis Tsamis  writes:
> > The existing implementation of need_cmov_or_rewire and
> > noce_convert_multiple_sets_1 assumes that sets are either REG or SUBREG.
> > This commit enchances them so they can handle/rewire arbitrary set 
> > statements.
> >
> > To do that a new helper struct noce_multiple_sets_info is introduced which 
> > is
> > used by noce_convert_multiple_sets and its helper functions. This results in
> > cleaner function signatures, improved efficientcy (a number of vecs and hash
> > set/map are replaced with a single vec of struct) and simplicity.
> >
> > gcc/ChangeLog:
> >
> >   * ifcvt.cc (need_cmov_or_rewire): Renamed 
> > init_noce_multiple_sets_info.
> >   (init_noce_multiple_sets_info): Initialize noce_multiple_sets_info.
> >   (noce_convert_multiple_sets_1): Use noce_multiple_sets_info and handle
> >   rewiring of multiple registers.
> >   (noce_convert_multiple_sets): Updated to use noce_multiple_sets_info.
> >   * ifcvt.h (struct noce_multiple_sets_info): Introduce new struct
> >   noce_multiple_sets_info to store info for noce_convert_multiple_sets.
> >
> > Signed-off-by: Manolis Tsamis 
> > ---
>
> Thanks, this looks like a really nice clean-up.  One comment below:
>
> >
> > Changes in v2:
> > - Made standalone patch.
> > - Better comments, some more checks.
> >
> >  gcc/ifcvt.cc | 252 +++
> >  gcc/ifcvt.h  |  16 
> >  2 files changed, 129 insertions(+), 139 deletions(-)
> >
> > diff --git a/gcc/ifcvt.cc b/gcc/ifcvt.cc
> > index a0af553b9ff..9486d54de34 100644
> > --- a/gcc/ifcvt.cc
> > +++ b/gcc/ifcvt.cc
> > @@ -98,14 +98,10 @@ static bool dead_or_predicable (basic_block, 
> > basic_block, basic_block,
> >   edge, bool);
> >  static void noce_emit_move_insn (rtx, rtx);
> >  static rtx_insn *block_has_only_trap (basic_block);
> > -static void need_cmov_or_rewire (basic_block, hash_set *,
> > -  hash_map *);
> > +static void init_noce_multiple_sets_info (basic_block,
> > +  auto_delete_vec &);
> >  static bool noce_convert_multiple_sets_1 (struct noce_if_info *,
> > -   hash_set *,
> > -   hash_map *,
> > -   auto_vec *,
> > -   auto_vec *,
> > -   auto_vec *, int *);
> > +  auto_delete_vec &, int *);
> >
> >  /* Count the number of non-jump active insns in BB.  */
> >
> > @@ -3270,24 +3266,13 @@ noce_convert_multiple_sets (struct noce_if_info 
> > *if_info)
> >rtx x = XEXP (cond, 0);
> >rtx y = XEXP (cond, 1);
> >
> > -  /* The true targets for a conditional move.  */
> > -  auto_vec targets;
> > -  /* The temporaries introduced to allow us to not consider register
> > - overlap.  */
> > -  auto_vec temporaries;
> > -  /* The insns we've emitted.  */
> > -  auto_vec unmodified_insns;
> > -
> > -  hash_set need_no_cmov;
> > -  hash_map rewired_src;
> > -
> > -  need_cmov_or_rewire (then_bb, _no_cmov, _src);
> > +  auto_delete_vec insn_info;
> > +  init_noce_multiple_sets_info (then_bb, insn_info);
> >
> >int last_needs_comparison = -1;
> >
> >bool ok = noce_convert_multiple_sets_1
> > -(if_info, _no_cmov, _src, , ,
> > - _insns, _needs_comparison);
> > +(if_info, insn_info, _needs_comparison);
> >if (!ok)
> >return false;
> >
> > @@ -3302,8 +3287,7 @@ noce_convert_multiple_sets (struct noce_if_info 
> > *if_info)
> >end_sequence ();
> >start_sequence ();
> >ok = noce_convert_multiple_sets_1
> > - (if_info, _no_cmov, _src, , ,
> > -  _insns, _needs_comparison);
> > + (if_info, insn_info, _needs_comparison);
> >/* Actually we should not fail anymore if we reached here,
> >but better still check.  */
> >if (!ok)
> > @@ -3312,12 +3296,12 @@ noce_convert_multiple_sets (struct noce_if_info 
> > *if_info)
> >
> >/* We must have seen some sort of insn to insert, otherwise we were
> >   given an empty BB to convert, and we can't handle that.  */
> > -  gcc_assert (!unmodified_insns.is_empty ());
> > +  gcc_assert (!insn_info.is_empty ());
> >
> >/* Now fixup the assignments.  */
> > -  for (unsigned i = 0; i < targets.length (); i++)
> > -if (targets[i] != temporaries[i])
> > -  noce_emit_move_insn (targets[i], temporaries[i]);
> > +  for (unsigned i = 0; i < insn_info.length (); i++)
> > +if (insn_info[i]->target != insn_info[i]->temporary)
> > +  noce_emit_move_insn (insn_info[i]->target, insn_info[i]->temporary);
> >
> >/* Actually emit the sequence if it isn't too expensive.  */
> >rtx_insn *seq = get_insns ();
> > @@ -3332,10 +3316,10 @@ noce_convert_multiple_sets (struct noce_if_info 
> > *if_info)
> >  set_used_flags (insn);
> >
> >/* Mark all our 

Re: [PATCH 3/4] libgcc: aarch64: Add SME runtime support

2023-12-07 Thread Richard Sandiford
Szabolcs Nagy  writes:
> The call ABI for SME (Scalable Matrix Extension) requires a number of
> helper routines which are added to libgcc so they are tied to the
> compiler version instead of the libc version. See
> https://github.com/ARM-software/abi-aa/blob/main/aapcs64/aapcs64.rst#sme-support-routines
>
> The routines are in shared libgcc and static libgcc eh, even though
> they are not related to exception handling.  This is to avoid linking
> a copy of the routines into dynamic linked binaries, because TPIDR2_EL0
> block can be extended in the future which is better to handle in a
> single place per process.
>
> The support routines have to decide if SME is accessible or not. Linux
> tells userspace if SME is accessible via AT_HWCAP2, otherwise a new
> __aarch64_sme_accessible symbol was introduced that a libc can define.
> Due to libgcc and libc build order, the symbol availability cannot be
> checked so for __aarch64_sme_accessible an unistd.h feature test macro
> is used while such detection mechanism is not available for __getauxval
> so we rely on configure checks based on the target triplet.
>
> Asm helper code is added to make writing the routines easier.
>
> libgcc/ChangeLog:
>
>   * config/aarch64/t-aarch64: Add sources to the build.
>   * config/aarch64/__aarch64_have_sme.c: New file.
>   * config/aarch64/__arm_sme_state.S: New file.
>   * config/aarch64/__arm_tpidr2_restore.S: New file.
>   * config/aarch64/__arm_tpidr2_save.S: New file.
>   * config/aarch64/__arm_za_disable.S: New file.
>   * config/aarch64/aarch64-asm.h: New file.
>   * config/aarch64/libgcc-sme.ver: New file.
> ---
>  libgcc/config/aarch64/__aarch64_have_sme.c   |  71 +
>  libgcc/config/aarch64/__arm_sme_state.S  |  55 ++
>  libgcc/config/aarch64/__arm_tpidr2_restore.S |  89 
>  libgcc/config/aarch64/__arm_tpidr2_save.S| 101 +++
>  libgcc/config/aarch64/__arm_za_disable.S |  66 
>  libgcc/config/aarch64/aarch64-asm.h  |  98 ++
>  libgcc/config/aarch64/libgcc-sme.ver |  24 +
>  libgcc/config/aarch64/t-aarch64  |  10 ++
>  8 files changed, 514 insertions(+)
>  create mode 100644 libgcc/config/aarch64/__aarch64_have_sme.c
>  create mode 100644 libgcc/config/aarch64/__arm_sme_state.S
>  create mode 100644 libgcc/config/aarch64/__arm_tpidr2_restore.S
>  create mode 100644 libgcc/config/aarch64/__arm_tpidr2_save.S
>  create mode 100644 libgcc/config/aarch64/__arm_za_disable.S
>  create mode 100644 libgcc/config/aarch64/aarch64-asm.h
>  create mode 100644 libgcc/config/aarch64/libgcc-sme.ver
>
> diff --git a/libgcc/config/aarch64/__aarch64_have_sme.c 
> b/libgcc/config/aarch64/__aarch64_have_sme.c
> new file mode 100644
> index 000..2dc6be63ce9
> --- /dev/null
> +++ b/libgcc/config/aarch64/__aarch64_have_sme.c
> @@ -0,0 +1,71 @@
> +/* Initializer for SME support.
> +   Copyright (C) 2023 Free Software Foundation, Inc.
> +
> +   This file is part of GCC.
> +
> +   GCC is free software; you can redistribute it and/or modify it
> +   under the terms of the GNU General Public License as published
> +   by the Free Software Foundation; either version 3, or (at your
> +   option) any later version.
> +
> +   GCC is distributed in the hope that it will be useful, but WITHOUT
> +   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
> +   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
> +   License for more details.
> +
> +   Under Section 7 of GPL version 3, you are granted additional
> +   permissions described in the GCC Runtime Library Exception, version
> +   3.1, as published by the Free Software Foundation.
> +
> +   You should have received a copy of the GNU General Public License and
> +   a copy of the GCC Runtime Library Exception along with this program;
> +   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
> +   .  */
> +
> +#include "auto-target.h"
> +#include 
> +
> +#if __ARM_FEATURE_SME
> +/* Avoid runtime SME detection if libgcc is built with SME.  */
> +# define HAVE_SME_CONST const
> +# define HAVE_SME_VALUE 1
> +#elif HAVE___GETAUXVAL
> +/* SME access detection on Linux.  */
> +# define HAVE_SME_CONST
> +# define HAVE_SME_VALUE 0
> +# define HAVE_SME_CTOR sme_accessible ()
> +
> +# define AT_HWCAP2   26
> +# define HWCAP2_SME  (1 << 23)
> +unsigned long int __getauxval (unsigned long int);
> +
> +static _Bool
> +sme_accessible (void)
> +{
> +  unsigned long hwcap2 = __getauxval (AT_HWCAP2);
> +  return (hwcap2 & HWCAP2_SME) != 0;
> +}
> +#elif __LIBC___AARCH64_SME_ACCESSIBLE
> +/* Alternative SME access detection.  */
> +# define HAVE_SME_CONST
> +# define HAVE_SME_VALUE 0
> +# define HAVE_SME_CTOR __aarch64_sme_accessible ()
> +_Bool __aarch64_sme_accessible (void);
> +#else
> +# define HAVE_SME_CONST const
> +# define HAVE_SME_VALUE 0
> +#endif
> +
> +/* Define the 

Re: [PATCH v7 3/5] OpenMP: Pointers and member mappings

2023-12-07 Thread Julian Brown
Hi Tobias!

On Wed, 6 Dec 2023 12:36:34 +0100
Tobias Burnus  wrote:

> Hi Julian,
> 
> LGTM, except for:
> 
> * The 'target exit data' handling - comments below - looks a bit
> fishy/inconsistent.
> 
> I intent to have a closer look with more code context, but maybe you
> should have a look at it as well.
> 
> BTW: Fortran deep-mapping is not yet on mainline. Are you aware of
> changes or in particular testcases on OG13 related to your patch
> series that should be included when upstreaming that auto-mapping of
> allocatable components patch?

I thought I'd adjusted some tests to use "pointer" instead of
"allocatable" at some point for mainline submission, but now I can't
find where I'm thinking of.  So possibly.  (I'll keep an eye out...)

> > +  if (OMP_CLAUSE_MAP_KIND (node) == GOMP_MAP_DELETE
> > +   || OMP_CLAUSE_MAP_KIND (node) == GOMP_MAP_RELEASE
> > +   || op == EXEC_OMP_TARGET_EXIT_DATA)
> >   {
> > [...]
> > +   gomp_map_kind map_kind
> > + = (op == EXEC_OMP_TARGET_EXIT_DATA) ? GOMP_MAP_RELEASE
> > + : OMP_CLAUSE_MAP_KIND
> > (node);
> > +   OMP_CLAUSE_SET_MAP_KIND (node2, map_kind);
> > +   OMP_CLAUSE_RELEASE_DESCRIPTOR (node2) = 1;  
> 
> For '!$omp target exit data map(delete: array)' this looks wrong as it
> replaces 'delete' by 'release' for the descriptor - while for
>   '!$omp target (data) map(delete: array)'
> it remains 'delete'.
> 
> Thus, I wonder whether that shouldn't be instead
>OMP_CLAUSE_MAP_KIND (node) == GOMP_MAP_DELETE
>? GOMP_MAP_DELETE : GOMP_MAP_RELEASE;

I've fixed that as you suggest.  Actually I've made OpenACC use the new
node layout as well, since (a) it works and (b) it was weirdly
inconsistent before.  That is, exit data directives will no longer use
e.g.:

  GOMP_MAP_FROM
  GOMP_MAP_TO_PSET
  GOMP_MAP_ATTACH_DETACH

but instead,

  GOMP_MAP_FROM
  GOMP_MAP_RELEASE (with OMP_CLAUSE_RELEASE_DESCRIPTOR set)
  GOMP_MAP_ATTACH_DETACH

actually the current state is that GOMP_MAP_TO_PSET will be used for
the descriptor on an "exit data" directive if you refer to the whole
array, but GOMP_MAP_RELEASE (etc.) will be used if you refer to an array
section (without the flag newly added in this patch, of course). I
don't think there's any reason to maintain that inconsistency.

> We later have the following; just reading the patch, I wonder whether
> GOMP_TO_PSET is correct for a generic 'target exit data' here or not.
>  It seems at a glance as if an "|| op == 'target exit data'" is
> missing here:
> 
> > -   if (openacc)
> > - OMP_CLAUSE_SET_MAP_KIND (desc_node,
> > +   if (openacc
> > +   || (map_kind != GOMP_MAP_RELEASE
> > +   && map_kind != GOMP_MAP_DELETE))
> > + OMP_CLAUSE_SET_MAP_KIND (node2,
> >GOMP_MAP_TO_PSET);
> > else
> > - OMP_CLAUSE_SET_MAP_KIND (desc_node,
> > map_kind);
> > -   OMP_CLAUSE_DECL (desc_node) = inner;
> > -   OMP_CLAUSE_SIZE (desc_node) =
> > TYPE_SIZE_UNIT (type);
> > -   if (openacc)
> > - node2 = desc_node;
> > -   else
> > + OMP_CLAUSE_SET_MAP_KIND (node2,
> > map_kind);  
> 
> (Here, without setting OMP_CLAUSE_RELEASE_DESCRIPTOR.)
> 
> And for completeness, we later have:
> 
> > +omp_map_clause_descriptor_p (tree c)
> > +{
> > +  if (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_MAP)
> > +return false;
> > +
> > +  if (OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_TO_PSET)
> > +return true;
> > +
> > +  if ((OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_RELEASE
> > +   || OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_DELETE)
> > +  && OMP_CLAUSE_RELEASE_DESCRIPTOR (c))
> > +return true;  

In that section, the "exit data" directive case is handled a few lines
higher up the function, with a condition that (now) reads:

  else if (op == EXEC_OMP_TARGET_EXIT_DATA
   || op == EXEC_OACC_EXIT_DATA)
map_kind = GOMP_MAP_RELEASE;

so I think we're OK -- unless I missed something?  (Removing the
OpenACC special case and inverting a conditional simplifies the logic
here a bit.)

> > +   /* Save array descriptor for use
> > +  in
> > gfc_omp_deep_mapping{,_p,_cnt}; force
> > +  evaluate to ensure that it is
> > +  not gimplified + is a decl.  */  
> This is part of my Fortran allocatable-components deep-mapping patch
> that is currently only on OG9 (?) to OG13, badly needs to be
> upstreamed but required that Jakub had a look at it - well, I still
> would like that he has a look at the omp-low.cc parts and it needs to
> be re-diffed.
> 
> Hence, while I wouldn't mind to keep it to avoid more 

Re: [PATCH 4/5] aarch64: rcpc3: add Neon ACLE wrapper functions to `arm_neon.h'

2023-12-07 Thread Prathamesh Kulkarni
On Thu, 9 Nov 2023 at 19:44, Victor Do Nascimento
 wrote:
>
> Create the necessary mappings from the ACLE-defined Neon intrinsics
> names[1] to the internal builtin function names.
>
> [1] https://arm-software.github.io/acle/neon_intrinsics/advsimd.html
Hi Victor,
It seems this patch broke kernel build after the recent patch to
upgrade -Wincompatible-pointer-types to an error:

00:00:56 
/home/tcwg-buildslave/workspace/tcwg_kernel_1/abe/builds/destdir/x86_64-pc-linux-gnu/lib/gcc/aarch64-linux-gnu/14.0.0/include/arm_neon.h:
In function ‘vldap1_lane_s64’:
00:00:56 
/home/tcwg-buildslave/workspace/tcwg_kernel_1/abe/builds/destdir/x86_64-pc-linux-gnu/lib/gcc/aarch64-linux-gnu/14.0.0/include/arm_neon.h:13474:48:
error: passing argument 1 of ‘__builtin_aarch64_vec_ldap1_lanev1di’
from incompatible pointer type [-Wincompatible-pointer-types]
00:00:56 13474 |   return __builtin_aarch64_vec_ldap1_lanev1di (__src,
__vec, __lane);
00:00:56   |^
00:00:56   ||
00:00:56   |const
int64_t * {aka const long long int *}
00:00:56 
/home/tcwg-buildslave/workspace/tcwg_kernel_1/abe/builds/destdir/x86_64-pc-linux-gnu/lib/gcc/aarch64-linux-gnu/14.0.0/include/arm_neon.h:13474:48:
note: expected ‘const long int *’ but argument is of type ‘const
int64_t *’ {aka ‘const long long int *’}

Looking cursorily at the code, should __src be casted to
(__builtin_aarch64_simd_di *) before passing it to
__builtin_aarch64_vec_ldap1_lanev1di ?
For more details, please see:
https://ci.linaro.org/job/tcwg_kernel--gnu-master-aarch64-next-defconfig-build/91/artifact/artifacts/notify/mail-body.txt/*view*/

Thanks,
Prathamesh


>
> gcc/ChangeLog:
>
> * gcc/config/aarch64/arm_neon.h (vldap1_lane_u64): New.
> (vldap1q_lane_u64): Likewise.
> (vldap1_lane_s64): Likewise.
> (vldap1q_lane_s64): Likewise.
> (vldap1_lane_f64): Likewise.
> (vldap1q_lane_f64): Likewise.
> (vldap1_lane_p64): Likewise.
> (vldap1q_lane_p64): Likewise.
> (vstl1_lane_u64): Likewise.
> (vstl1q_lane_u64): Likewise.
> (vstl1_lane_s64): Likewise.
> (vstl1q_lane_s64): Likewise.
> (vstl1_lane_f64): Likewise.
> (vstl1q_lane_f64): Likewise.
> (vstl1_lane_p64): Likewise.
> (vstl1q_lane_p64): Likewise.
> ---
>  gcc/config/aarch64/arm_neon.h | 129 ++
>  1 file changed, 129 insertions(+)
>
> diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
> index 349f3167699..ef0d75e07ce 100644
> --- a/gcc/config/aarch64/arm_neon.h
> +++ b/gcc/config/aarch64/arm_neon.h
> @@ -13446,6 +13446,135 @@ vld1q_lane_u64 (const uint64_t *__src, uint64x2_t 
> __vec, const int __lane)
>return __aarch64_vset_lane_any (*__src, __vec, __lane);
>  }
>
> +#pragma GCC push_options
> +#pragma GCC target ("+nothing+rcpc3+simd")
> +
> +/* vldap1_lane.  */
> +
> +__extension__ extern __inline uint64x1_t
> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> +vldap1_lane_u64 (const uint64_t *__src, uint64x1_t __vec, const int __lane)
> +{
> +  return __builtin_aarch64_vec_ldap1_lanev1di_usus (
> + (__builtin_aarch64_simd_di *) __src, __vec, __lane);
> +}
> +
> +__extension__ extern __inline uint64x2_t
> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> +vldap1q_lane_u64 (const uint64_t *__src, uint64x2_t __vec, const int __lane)
> +{
> +  return __builtin_aarch64_vec_ldap1_lanev2di_usus (
> + (__builtin_aarch64_simd_di *) __src, __vec, __lane);
> +}
> +
> +__extension__ extern __inline int64x1_t
> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> +vldap1_lane_s64 (const int64_t *__src, int64x1_t __vec, const int __lane)
> +{
> +  return __builtin_aarch64_vec_ldap1_lanev1di (__src, __vec, __lane);
> +}
> +
> +__extension__ extern __inline int64x2_t
> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> +vldap1q_lane_s64 (const int64_t *__src, int64x2_t __vec, const int __lane)
> +{
> +  return __builtin_aarch64_vec_ldap1_lanev2di (__src, __vec, __lane);
> +}
> +
> +__extension__ extern __inline float64x1_t
> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> +vldap1_lane_f64 (const float64_t *__src, float64x1_t __vec, const int __lane)
> +{
> +  return __builtin_aarch64_vec_ldap1_lanev1df (__src, __vec, __lane);
> +}
> +
> +__extension__ extern __inline float64x2_t
> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> +vldap1q_lane_f64 (const float64_t *__src, float64x2_t __vec, const int 
> __lane)
> +{
> +  return __builtin_aarch64_vec_ldap1_lanev2df (__src, __vec, __lane);
> +}
> +
> +__extension__ extern __inline poly64x1_t
> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> +vldap1_lane_p64 (const poly64_t *__src, poly64x1_t __vec, 

Re: Several test failures due to "Introduce strub: machine-independent stack scrubbing"

2023-12-07 Thread Alexandre Oliva
On Dec  7, 2023, FX Coudert  wrote:

> The commit
> https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=f0a90c7d7333fc7f554b906245c84bdf04d716d7
> (Introduce strub: machine-independent stack scrubbing) has introduced
> many test failures on x86_64-apple-darwin21:

Nevermind, the followup patch I'd suggested won't help..  The problem is
that after the (expected) errors issued by strub mode pass for those
tests.  The passes that convert the function to SSA mode (and thus
provide the SSA pass property required by emutls) don't run, but emutls
doesn't have the same '&& !seen_error()' condition in its gate function,
so it tries to run, and finds that the required properties aren't there.

This patchlet should cure it.  Testing...

diff --git a/gcc/tree-emutls.cc b/gcc/tree-emutls.cc
index 5dca5a8291356..871e5a14f1e38 100644
--- a/gcc/tree-emutls.cc
+++ b/gcc/tree-emutls.cc
@@ -841,7 +841,7 @@ public:
   bool gate (function *) final override
 {
   /* If the target supports TLS natively, we need do nothing here.  */
-  return !targetm.have_tls;
+  return !targetm.have_tls && !seen_error ();
 }
 
   unsigned int execute (function *) final override

-- 
Alexandre Oliva, happy hackerhttps://FSFLA.org/blogs/lxo/
   Free Software Activist   GNU Toolchain Engineer
More tolerance and less prejudice are key for inclusion and diversity
Excluding neuro-others for not behaving ""normal"" is *not* inclusive


Re: [PATCH] Add support for function attributes and variable attributes

2023-12-07 Thread Antoni Boucher
It seems like you forgot to prefix the commit message with "libgccjit:
".

On Thu, 2023-11-30 at 10:55 +0100, Guillaume Gomez wrote:
> Ping David. :)
> 
> Le jeu. 23 nov. 2023 à 22:59, Antoni Boucher  a
> écrit :
> > David: I found back the comment you made. Here it is:
> > 
> >    I see you have patches to add function and variable attributes;
> > I
> >    wonder if this would be cleaner internally if there was a
> >    recording::attribute class, rather than the std::pair currently
> > in
> >    use
> >    (some attributes have int arguments rather than string, others
> > have
> >    multiple args).
> > 
> >    I also wondered if a "gcc_jit_attribute" type could be exposed
> > to
> >    the
> >    user, e.g.:
> > 
> >      attr1 = gcc_jit_context_new_attribute (ctxt, "noreturn");
> >      attr2 = gcc_jit_context_new_attribute_with_string (ctxt,
> > "alias",
> >    "__foo");
> >      gcc_jit_function_add_attribute (ctxt, attr1);
> >      gcc_jit_function_add_attribute (ctxt, attr2);
> > 
> >    or somesuch?  But I think the API you currently have is OK. 
> > 
> > On Thu, 2023-11-23 at 22:52 +0100, Guillaume Gomez wrote:
> > > Ping David. :)
> > > 
> > > Le mer. 15 nov. 2023 à 17:56, Antoni Boucher  a
> > > écrit :
> > > > 
> > > > David: another thing I remember you mentioned when you reviewed
> > > > an
> > > > earlier version of this patch is the usage of `std::pair`.
> > > > I can't find where you said that, but I remember you mentioned
> > > > that
> > > > we
> > > > should use a struct instead.
> > > > Can you please elaborate again?
> > > > Thanks.
> > > > 
> > > > On Wed, 2023-11-15 at 17:53 +0100, Guillaume Gomez wrote:
> > > > > Hi,
> > > > > 
> > > > > This patch adds the (incomplete) support for function and
> > > > > variable
> > > > > attributes. The added attributes are the ones we're using in
> > > > > rustc_codegen_gcc but all the groundwork is done to add more
> > > > > (and
> > > > > we
> > > > > will very likely add more as we didn't add all the ones we
> > > > > use in
> > > > > rustc_codegen_gcc yet).
> > > > > 
> > > > > The only big question with this patch is about `inline`. We
> > > > > currently
> > > > > handle it as an attribute because it is more convenient for
> > > > > us
> > > > > but is
> > > > > it ok or should we create a separate function to mark a
> > > > > function
> > > > > as
> > > > > inlined?
> > > > > 
> > > > > Thanks in advance for the review.
> > > > 
> > 



Re: Several test failures due to "Introduce strub: machine-independent stack scrubbing"

2023-12-07 Thread FX Coudert
> However, I'm very surprised that you're hitting this with the initial
> commit.  It's as if strub support was disabled on the target, but even
> if you were hitting this with e.g. offloading, only the followup commit
> introduced code to disable strub for such targets as nvptx.  Anyway, do
> you by any chance have any offloading enabled?

I may have misidentified the problem, my testing was done on:

LAST_UPDATED: Wed Dec 6 16:01:43 UTC 2023 (revision 458e7c93792)
https://gcc.gnu.org/pipermail/gcc-testresults/2023-December/802595.html

I’m restarting a build right now, will report back.

FX

Re: [PATCH V2 0/2] RISC-V: Add intrinsics for Bitmanip and Scalar Crypto extensions

2023-12-07 Thread Christoph Müllner
On Thu, Dec 7, 2023 at 11:18 AM Liao Shihua  wrote:
>
> In accordance with the suggestions of Christoph Müllner, the following 
> amendments are made
>
> Update v1 -> v2:
>   1. Rename *_intrinsic-* to *_intrinsic-XLEN.
>   2. Typo fix.
>   3. Intrinsics with immediate arguments will use marcos at O0 .
>
> It's a little patch add just provides a mapping from the RV intrinsics to the 
> builtin
> names within GCC.

Thanks for the update!

I think this patchset was not properly tested as I see the tests failing.

$ /opt/riscv-mainline/bin/riscv64-unknown-linux-gnu-gcc
-march=rv64gc_zbb_zbc_zbkb_zbkc_zbkx -mabi=lp64d
/home/cm/src/gcc/riscv-mainline/gcc/testsuite/gcc.target/riscv/scalar_bitmanip_intrinsic-64.c
In file included from
/home/cm/src/gcc/riscv-mainline/gcc/testsuite/gcc.target/riscv/scalar_bitmanip_intrinsic-64.c:5:
/opt/riscv-mainline/lib/gcc/riscv64-unknown-linux-gnu/14.0.0/include/riscv_bitmanip.h:
In function '__riscv_orc_b_32':
/opt/riscv-mainline/lib/gcc/riscv64-unknown-linux-gnu/14.0.0/include/riscv_bitmanip.h:61:10:
error: implicit declaration of function '__builtin_riscv_orc_b_32';
did you mean '__builtin_riscv_orc_b_64'?
[-Wimplicit-function-declaration]
   61 |   return __builtin_riscv_orc_b_32 (x);
  |  ^~~~
  |  __builtin_riscv_orc_b_64

The spec says: Emulated with rev8+sext.w on RV64.
But I think this is a bug in the spec and should be "orc.b + sext.w".
Still, you need to handle that somehow.

$ /opt/riscv-mainline/bin/riscv64-unknown-linux-gnu-gcc
-march=rv64gc_zknd_zkne_zknh_zksed_zksh -mabi=lp64 -mabi=lp64d
/home/cm/src/gcc/riscv-mainline/gcc/testsuite/gcc.target/riscv/scalar_crypto_intrinsic-64.c
/tmp/ccynQLn2.s: Assembler messages:
/tmp/ccynQLn2.s:127: Error: instruction aes64ks1i requires absolute expression
/tmp/ccynQLn2.s:593: Error: instruction sm4ed requires absolute expression
/tmp/ccynQLn2.s:633: Error: instruction sm4ks requires absolute expression

The absolute expression means that you cannot use a variable but must
use an immediate.
E.g.:
uint64_t foo4(uint64_t rs1)
{
return __riscv_aes64ks1i(rs1, 3);
}
Here the 3 will be encoded into the instruction.

There are probably more issues, but I stopped investigating after these two.

Also, there are some missing spaces to separate arguments. E.g.:
  return __riscv_aes64ks1i(rs1,rnum);
...should be...
  return __riscv_aes64ks1i(rs1, rnum);

Please make sure to test these patches for RV32 and RV64 before
sending a new revision.
If you run into issues that you can't resolve, then just reach out.

BR
Christoph

>
>
> Liao Shihua (2):
>   Add C intrinsics of Scalar Crypto Extension
>   Add C intrinsics of Bitmanip Extension
>
>  gcc/config.gcc|   2 +-
>  gcc/config/riscv/riscv-builtins.cc|  22 ++
>  gcc/config/riscv/riscv-ftypes.def |   2 +
>  gcc/config/riscv/riscv-scalar-crypto.def  |  18 +
>  gcc/config/riscv/riscv_bitmanip.h | 297 +
>  gcc/config/riscv/riscv_crypto.h   | 309 ++
>  .../riscv/scalar_bitmanip_intrinsic-32.c  |  97 ++
>  .../riscv/scalar_bitmanip_intrinsic-64.c  | 115 +++
>  .../riscv/scalar_crypto_intrinsic-32.c| 115 +++
>  .../riscv/scalar_crypto_intrinsic-64.c| 122 +++
>  10 files changed, 1098 insertions(+), 1 deletion(-)
>  create mode 100644 gcc/config/riscv/riscv_bitmanip.h
>  create mode 100644 gcc/config/riscv/riscv_crypto.h
>  create mode 100644 
> gcc/testsuite/gcc.target/riscv/scalar_bitmanip_intrinsic-32.c
>  create mode 100644 
> gcc/testsuite/gcc.target/riscv/scalar_bitmanip_intrinsic-64.c
>  create mode 100644 
> gcc/testsuite/gcc.target/riscv/scalar_crypto_intrinsic-32.c
>  create mode 100644 
> gcc/testsuite/gcc.target/riscv/scalar_crypto_intrinsic-64.c
>
> --
> 2.34.1
>


Re: [ARC PATCH] Add *extvsi_n_0 define_insn_and_split for PR 110717.

2023-12-07 Thread Jeff Law




On 12/7/23 09:04, Roger Sayle wrote:


Hi Jeff,
Doh!  Great catch.  The perils of not (yet) being able to actually
run any ARC execution tests myself.

ACK.





Shouldn't operands[4] be GEN_INT ((HOST_WIDE_INT_1U << tmp) - 1)?

Yes(-ish), operands[4] should be GEN_INT(HOST_WIDE_INT_1U << (tmp - 1)).

And the 32s in the test cases need to be 16s (the MSB of a five bit field is 
16).

You're probably also thinking the same thing that I am... that it might be 
possible
to implement this in the middle-end, but things are complicated by combine's
make_compound_operation/expand_compound_operation, and that
combine doesn't (normally) like turning two instructions into three.
Yea, I pondered, but didn't explore.   I was expecting problems around 
costing, make_compound_operation/expand_compound_operation didn't even 
cross my mind, but I can certainly see how they'd be problematical.





Fingers-crossed the attached patch works better on the nightly testers.
The H8 variant without the -1 failed in my tester, but passed once the 
-1 was added.  I've got one thing to re-review as the -1 changes a few 
things on the codegen side and I need to re-verify the profitability 
across the various H8 configurations.




jeff


[PATCH 4/4] libgcc: aarch64: Add SME unwinder support

2023-12-07 Thread Szabolcs Nagy
To support the ZA lazy save scheme, the PCS requires the unwinder to
reset the SME state to PSTATE.SM=0, PSTATE.ZA=0, TPIDR2_EL0=0 on entry
to an exception handler. We use the __arm_za_disable SME runtime call
unconditionally to achieve this.
https://github.com/ARM-software/abi-aa/blob/main/aapcs64/aapcs64.rst#exceptions

The hidden alias is used to avoid a PLT and avoid inconsistent VPCS
marking (we don't rely on special PCS at the call site). In case of
static linking the SME runtime init code is linked in code that raises
exceptions.

libgcc/ChangeLog:

* config/aarch64/__arm_za_disable.S: Add hidden alias.
* config/aarch64/aarch64-unwind.h: Reset the SME state before
EH return via the _Unwind_Frames_Extra hook.
---
 libgcc/config/aarch64/__arm_za_disable.S |  5 +
 libgcc/config/aarch64/aarch64-unwind.h   | 16 
 2 files changed, 21 insertions(+)

diff --git a/libgcc/config/aarch64/__arm_za_disable.S 
b/libgcc/config/aarch64/__arm_za_disable.S
index 7a888a98d49..f61d4255fdc 100644
--- a/libgcc/config/aarch64/__arm_za_disable.S
+++ b/libgcc/config/aarch64/__arm_za_disable.S
@@ -64,3 +64,8 @@ ENTRY (__arm_za_disable)
 L(end):
ret
 END (__arm_za_disable)
+
+/* Hidden alias used by the unwinder.  */
+.global __libgcc_arm_za_disable
+.hidden __libgcc_arm_za_disable
+.set __libgcc_arm_za_disable, __arm_za_disable
diff --git a/libgcc/config/aarch64/aarch64-unwind.h 
b/libgcc/config/aarch64/aarch64-unwind.h
index d669edd671b..9fe6c8f61c3 100644
--- a/libgcc/config/aarch64/aarch64-unwind.h
+++ b/libgcc/config/aarch64/aarch64-unwind.h
@@ -78,4 +78,20 @@ aarch64_demangle_return_addr (struct _Unwind_Context 
*context,
   return addr;
 }
 
+/* SME runtime function local to libgcc, streaming compatible
+   and preserves more registers than the base PCS requires, but
+   we don't rely on that here.  */
+__attribute__ ((visibility ("hidden")))
+void __libgcc_arm_za_disable (void);
+
+/* Disable the SME ZA state in case an unwound frame used the ZA
+   lazy saving scheme.  */
+#undef _Unwind_Frames_Extra
+#define _Unwind_Frames_Extra(x)\
+  do   \
+{  \
+  __libgcc_arm_za_disable ();  \
+}  \
+  while (0)
+
 #endif /* defined AARCH64_UNWIND_H && defined __ILP32__ */
-- 
2.25.1



[PATCH 3/4] libgcc: aarch64: Add SME runtime support

2023-12-07 Thread Szabolcs Nagy
The call ABI for SME (Scalable Matrix Extension) requires a number of
helper routines which are added to libgcc so they are tied to the
compiler version instead of the libc version. See
https://github.com/ARM-software/abi-aa/blob/main/aapcs64/aapcs64.rst#sme-support-routines

The routines are in shared libgcc and static libgcc eh, even though
they are not related to exception handling.  This is to avoid linking
a copy of the routines into dynamic linked binaries, because TPIDR2_EL0
block can be extended in the future which is better to handle in a
single place per process.

The support routines have to decide if SME is accessible or not. Linux
tells userspace if SME is accessible via AT_HWCAP2, otherwise a new
__aarch64_sme_accessible symbol was introduced that a libc can define.
Due to libgcc and libc build order, the symbol availability cannot be
checked so for __aarch64_sme_accessible an unistd.h feature test macro
is used while such detection mechanism is not available for __getauxval
so we rely on configure checks based on the target triplet.

Asm helper code is added to make writing the routines easier.

libgcc/ChangeLog:

* config/aarch64/t-aarch64: Add sources to the build.
* config/aarch64/__aarch64_have_sme.c: New file.
* config/aarch64/__arm_sme_state.S: New file.
* config/aarch64/__arm_tpidr2_restore.S: New file.
* config/aarch64/__arm_tpidr2_save.S: New file.
* config/aarch64/__arm_za_disable.S: New file.
* config/aarch64/aarch64-asm.h: New file.
* config/aarch64/libgcc-sme.ver: New file.
---
 libgcc/config/aarch64/__aarch64_have_sme.c   |  71 +
 libgcc/config/aarch64/__arm_sme_state.S  |  55 ++
 libgcc/config/aarch64/__arm_tpidr2_restore.S |  89 
 libgcc/config/aarch64/__arm_tpidr2_save.S| 101 +++
 libgcc/config/aarch64/__arm_za_disable.S |  66 
 libgcc/config/aarch64/aarch64-asm.h  |  98 ++
 libgcc/config/aarch64/libgcc-sme.ver |  24 +
 libgcc/config/aarch64/t-aarch64  |  10 ++
 8 files changed, 514 insertions(+)
 create mode 100644 libgcc/config/aarch64/__aarch64_have_sme.c
 create mode 100644 libgcc/config/aarch64/__arm_sme_state.S
 create mode 100644 libgcc/config/aarch64/__arm_tpidr2_restore.S
 create mode 100644 libgcc/config/aarch64/__arm_tpidr2_save.S
 create mode 100644 libgcc/config/aarch64/__arm_za_disable.S
 create mode 100644 libgcc/config/aarch64/aarch64-asm.h
 create mode 100644 libgcc/config/aarch64/libgcc-sme.ver

diff --git a/libgcc/config/aarch64/__aarch64_have_sme.c 
b/libgcc/config/aarch64/__aarch64_have_sme.c
new file mode 100644
index 000..2dc6be63ce9
--- /dev/null
+++ b/libgcc/config/aarch64/__aarch64_have_sme.c
@@ -0,0 +1,71 @@
+/* Initializer for SME support.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   .  */
+
+#include "auto-target.h"
+#include 
+
+#if __ARM_FEATURE_SME
+/* Avoid runtime SME detection if libgcc is built with SME.  */
+# define HAVE_SME_CONST const
+# define HAVE_SME_VALUE 1
+#elif HAVE___GETAUXVAL
+/* SME access detection on Linux.  */
+# define HAVE_SME_CONST
+# define HAVE_SME_VALUE 0
+# define HAVE_SME_CTOR sme_accessible ()
+
+# define AT_HWCAP2 26
+# define HWCAP2_SME(1 << 23)
+unsigned long int __getauxval (unsigned long int);
+
+static _Bool
+sme_accessible (void)
+{
+  unsigned long hwcap2 = __getauxval (AT_HWCAP2);
+  return (hwcap2 & HWCAP2_SME) != 0;
+}
+#elif __LIBC___AARCH64_SME_ACCESSIBLE
+/* Alternative SME access detection.  */
+# define HAVE_SME_CONST
+# define HAVE_SME_VALUE 0
+# define HAVE_SME_CTOR __aarch64_sme_accessible ()
+_Bool __aarch64_sme_accessible (void);
+#else
+# define HAVE_SME_CONST const
+# define HAVE_SME_VALUE 0
+#endif
+
+/* Define the symbol gating SME support in libgcc.  */
+HAVE_SME_CONST _Bool __aarch64_have_sme
+  __attribute__((visibility("hidden"), nocommon)) = HAVE_SME_VALUE;
+
+#ifdef HAVE_SME_CTOR
+/* Use a higher priority to ensure it runs before 

[PATCH 2/4] libgcc: aarch64: Configure check for __getauxval

2023-12-07 Thread Szabolcs Nagy
Add configure check for the __getauxval ABI symbol, which is always
available on aarch64 glibc, and may be available on other linux C
runtimes. For now only enabled on glibc, others have to override it

  target_configargs=libgcc_cv_have___getauxval=yes

This is deliberately obscure as it should be auto detected, ideally
via a feature test macro in unistd.h (link time detection is not
possible since the libc may not be installed at libgcc build time),
but currently there is no such feature test mechanism.

Without __getauxval, libgcc cannot do runtime CPU feature detection
and has to assume only the build time known features are available.

libgcc/ChangeLog:

* config.in: Undef HAVE___GETAUXVAL.
* configure: Regenerate.
* configure.ac: Check for __getauxval.
---
 libgcc/config.in|  3 +++
 libgcc/configure| 26 ++
 libgcc/configure.ac | 19 +++
 3 files changed, 48 insertions(+)

diff --git a/libgcc/config.in b/libgcc/config.in
index 5dd96cdf648..441d4d39b95 100644
--- a/libgcc/config.in
+++ b/libgcc/config.in
@@ -16,6 +16,9 @@
 /* Define to 1 if the assembler supports .variant_pcs. */
 #undef HAVE_AS_VARIANT_PCS
 
+/* Define to 1 if __getauxval is available. */
+#undef HAVE___GETAUXVAL
+
 /* Define to 1 if the target assembler supports thread-local storage. */
 #undef HAVE_CC_TLS
 
diff --git a/libgcc/configure b/libgcc/configure
index 571e3437701..746d29587d5 100755
--- a/libgcc/configure
+++ b/libgcc/configure
@@ -5657,6 +5657,32 @@ $as_echo "#define HAVE_AS_VARIANT_PCS 1" >>confdefs.h
   ;;
 esac
 
+# Check __getauxval ABI symbol for CPU feature detection.
+case ${target} in
+aarch64*-linux-*)
+  # No link check because the libc may not be present.
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for __getauxval" >&5
+$as_echo_n "checking for __getauxval... " >&6; }
+if ${libgcc_cv_have___getauxval+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  case ${target} in
+ *-linux-gnu*)
+   libgcc_cv_have___getauxval=yes
+   ;;
+ *)
+   libgcc_cv_have___getauxval=no
+ esac
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libgcc_cv_have___getauxval" 
>&5
+$as_echo "$libgcc_cv_have___getauxval" >&6; }
+  if test x$libgcc_cv_have___getauxval = xyes; then
+
+$as_echo "#define HAVE___GETAUXVAL 1" >>confdefs.h
+
+  fi
+esac
+
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for init priority support" 
>&5
 $as_echo_n "checking for init priority support... " >&6; }
 if ${libgcc_cv_init_priority+:} false; then :
diff --git a/libgcc/configure.ac b/libgcc/configure.ac
index abc398c91e4..64b45ae1423 100644
--- a/libgcc/configure.ac
+++ b/libgcc/configure.ac
@@ -665,6 +665,25 @@ aarch64*-*-*)
 esac])
 LIBGCC_CHECK_AS_VARIANT_PCS
 
+# Check __getauxval ABI symbol for CPU feature detection.
+case ${target} in
+aarch64*-linux-*)
+  # No link check because the libc may not be present.
+  AC_CACHE_CHECK([for __getauxval],
+[libgcc_cv_have___getauxval],
+[case ${target} in
+ *-linux-gnu*)
+   libgcc_cv_have___getauxval=yes
+   ;;
+ *)
+   libgcc_cv_have___getauxval=no
+ esac])
+  if test x$libgcc_cv_have___getauxval = xyes; then
+AC_DEFINE(HAVE___GETAUXVAL, 1,
+ [Define to 1 if __getauxval is available.])
+  fi
+esac
+
 dnl Check if as supports RTM instructions.
 AC_CACHE_CHECK(for init priority support, libgcc_cv_init_priority, [
 AC_COMPILE_IFELSE([AC_LANG_PROGRAM(,
-- 
2.25.1



[PATCH 1/4] libgcc: aarch64: Configure check for .variant_pcs support

2023-12-07 Thread Szabolcs Nagy
Ideally SME support routines in libgcc are marked as variant PCS symbols
so check if as supports the directive.
---
 libgcc/config.in|  3 +++
 libgcc/configure| 39 +++
 libgcc/configure.ac | 17 +
 3 files changed, 59 insertions(+)

diff --git a/libgcc/config.in b/libgcc/config.in
index f93c64a00c3..5dd96cdf648 100644
--- a/libgcc/config.in
+++ b/libgcc/config.in
@@ -13,6 +13,9 @@
 /* Define to 1 if the assembler supports LSE. */
 #undef HAVE_AS_LSE
 
+/* Define to 1 if the assembler supports .variant_pcs. */
+#undef HAVE_AS_VARIANT_PCS
+
 /* Define to 1 if the target assembler supports thread-local storage. */
 #undef HAVE_CC_TLS
 
diff --git a/libgcc/configure b/libgcc/configure
index cf149209652..571e3437701 100755
--- a/libgcc/configure
+++ b/libgcc/configure
@@ -5618,6 +5618,45 @@ $as_echo "#define HAVE_AS_LSE 1" >>confdefs.h
   ;;
 esac
 
+
+
+case "${target}" in
+aarch64*-*-*)
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking if as supports 
.variant_pcs" >&5
+$as_echo_n "checking if as supports .variant_pcs... " >&6; }
+if ${libgcc_cv_as_variant_pcs+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+main ()
+{
+asm (".variant_pcs foobar");
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  libgcc_cv_as_variant_pcs=yes
+else
+  libgcc_cv_as_variant_pcs=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libgcc_cv_as_variant_pcs" >&5
+$as_echo "$libgcc_cv_as_variant_pcs" >&6; }
+  if test x$libgcc_cv_as_variant_pcs = xyes; then
+
+$as_echo "#define HAVE_AS_VARIANT_PCS 1" >>confdefs.h
+
+  fi
+  ;;
+esac
+
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for init priority support" 
>&5
 $as_echo_n "checking for init priority support... " >&6; }
 if ${libgcc_cv_init_priority+:} false; then :
diff --git a/libgcc/configure.ac b/libgcc/configure.ac
index 2fc9d5d7c93..abc398c91e4 100644
--- a/libgcc/configure.ac
+++ b/libgcc/configure.ac
@@ -648,6 +648,23 @@ changequote([,])dnl
 esac])
 LIBGCC_CHECK_AS_LSE
 
+dnl Check if as supports .variant_pcs.
+AC_DEFUN([LIBGCC_CHECK_AS_VARIANT_PCS], [
+case "${target}" in
+aarch64*-*-*)
+  AC_CACHE_CHECK([if as supports .variant_pcs], libgcc_cv_as_variant_pcs, [
+AC_COMPILE_IFELSE([AC_LANG_PROGRAM(,
+  [[asm (".variant_pcs foobar");]])],
+  [libgcc_cv_as_variant_pcs=yes], [libgcc_cv_as_variant_pcs=no])
+  ])
+  if test x$libgcc_cv_as_variant_pcs = xyes; then
+AC_DEFINE(HAVE_AS_VARIANT_PCS, 1,
+ [Define to 1 if the assembler supports .variant_pcs.])
+  fi
+  ;;
+esac])
+LIBGCC_CHECK_AS_VARIANT_PCS
+
 dnl Check if as supports RTM instructions.
 AC_CACHE_CHECK(for init priority support, libgcc_cv_init_priority, [
 AC_COMPILE_IFELSE([AC_LANG_PROGRAM(,
-- 
2.25.1



[PATCH 0/4] libgcc: aarch64: Add SME runtime

2023-12-07 Thread Szabolcs Nagy
Add SME (Scalable Matrix Extension) support to libgcc.

Szabolcs Nagy (4):
  libgcc: aarch64: Configure check for .variant_pcs support
  libgcc: aarch64: Configure check for __getauxval
  libgcc: aarch64: Add SME runtime support
  libgcc: aarch64: Add SME unwinder support

 libgcc/config.in |   6 ++
 libgcc/config/aarch64/__aarch64_have_sme.c   |  71 +
 libgcc/config/aarch64/__arm_sme_state.S  |  55 ++
 libgcc/config/aarch64/__arm_tpidr2_restore.S |  89 
 libgcc/config/aarch64/__arm_tpidr2_save.S| 101 +++
 libgcc/config/aarch64/__arm_za_disable.S |  71 +
 libgcc/config/aarch64/aarch64-asm.h  |  98 ++
 libgcc/config/aarch64/aarch64-unwind.h   |  16 +++
 libgcc/config/aarch64/libgcc-sme.ver |  24 +
 libgcc/config/aarch64/t-aarch64  |  10 ++
 libgcc/configure |  65 
 libgcc/configure.ac  |  36 +++
 12 files changed, 642 insertions(+)
 create mode 100644 libgcc/config/aarch64/__aarch64_have_sme.c
 create mode 100644 libgcc/config/aarch64/__arm_sme_state.S
 create mode 100644 libgcc/config/aarch64/__arm_tpidr2_restore.S
 create mode 100644 libgcc/config/aarch64/__arm_tpidr2_save.S
 create mode 100644 libgcc/config/aarch64/__arm_za_disable.S
 create mode 100644 libgcc/config/aarch64/aarch64-asm.h
 create mode 100644 libgcc/config/aarch64/libgcc-sme.ver

-- 
2.25.1



Re: Several test failures due to "Introduce strub: machine-independent stack scrubbing"

2023-12-07 Thread Alexandre Oliva
On Dec  7, 2023, FX Coudert  wrote:

> The commit
> https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=f0a90c7d7333fc7f554b906245c84bdf04d716d7
> (Introduce strub: machine-independent stack scrubbing) has introduced
> many test failures on x86_64-apple-darwin21:

Please give the followup commit f908368d2cb50 a try, it should at least
silence the errors.  (but not fix the problem; I suppose we want
strubbing enabled on darwin)

However, I'm very surprised that you're hitting this with the initial
commit.  It's as if strub support was disabled on the target, but even
if you were hitting this with e.g. offloading, only the followup commit
introduced code to disable strub for such targets as nvptx.  Anyway, do
you by any chance have any offloading enabled?

I'll try to trigger this with a cross compiler.  Thanks,

-- 
Alexandre Oliva, happy hackerhttps://FSFLA.org/blogs/lxo/
   Free Software Activist   GNU Toolchain Engineer
More tolerance and less prejudice are key for inclusion and diversity
Excluding neuro-others for not behaving ""normal"" is *not* inclusive


Re: [PATCH v2 0/3] [GCC] arm: vld1_types_xN ACLE intrinsics

2023-12-07 Thread Richard Earnshaw

Pushed, thanks.

R.


On 07/12/2023 15:41, ezra.sito...@arm.com wrote:

Add xN variants of vld1_types intrinsic.




Re: [PATCH v2 0/3] [GCC] arm: vst1q_types_xN ACLE intrinsics

2023-12-07 Thread Richard Earnshaw

Pushed, thanks.

R.


On 07/12/2023 15:36, ezra.sito...@arm.com wrote:

Add xN variants of vst1q_types intrinsic.




Re: [PATCH] strub: enable conditional support

2023-12-07 Thread Thomas Schwinge
Hi Alexandre!

Thank you for looking into this so promptly!

On 2023-12-07T00:33:59-0300, Alexandre Oliva  wrote:
> On Dec  6, 2023, Alexandre Oliva  wrote:
>
>> Disabling the runtime bits is easy, once we determine what condition we
>> wish to test for.  I suppose testing for target support in the compiler,
>> issuing a 'sorry' in case the feature is required, would provide
>> something for libgcc configure and testsuite effective-target to test
>> for and decide whether to enable runtime support and run the tests.
>
> Instead of doing something equivalent to an implicit -fstrub=disable,
> that would quietly compile without stack scrubbing, I thought it would
> be safer to be noisy if the feature is used (requested, really) when
> support is not available.
>
>
> Targets that don't expose callee stacks to callers, such as nvptx, as
> well as -fsplit-stack compilations, violate fundamental assumptions of
> the current strub implementation.  This patch enables targets to
> disable strub, and disables it when -fsplit-stack is enabled.
>
> When strub support is disabled, the testsuite will now skip strub
> tests, and libgcc will not build the strub runtime components.
>
> Regstrapped on x86_64-linux-gnu.  Also tested with an additional patch
> for i386.cc that mirrors the nvptx.cc change, to check that strub gets
> disabled without noisy test results.  Ok to install?

GCC/nvptx target again builds as before, and testing just completed: as
expected, the "strub" test cases generally UNSUPPORTED, just something's
wrong with two of the 'c-c++-common/strub-unsupported*.c' test cases:

+PASS: c-c++-common/strub-unsupported-2.c  -Wc++-compat   (test for 
warnings, line 12)
+PASS: c-c++-common/strub-unsupported-2.c  -Wc++-compat  (test for excess 
errors)

+FAIL: c-c++-common/strub-unsupported-3.c  -Wc++-compat  (internal compiler 
error: in verify_curr_properties, at passes.cc:2198)
+PASS: c-c++-common/strub-unsupported-3.c  -Wc++-compat   (test for 
warnings, line 10)
+PASS: c-c++-common/strub-unsupported-3.c  -Wc++-compat   (test for 
warnings, line 13)
+FAIL: c-c++-common/strub-unsupported-3.c  -Wc++-compat  (test for excess 
errors)

+FAIL: c-c++-common/strub-unsupported.c  -Wc++-compat  (internal compiler 
error: in verify_curr_properties, at passes.cc:2198)
+PASS: c-c++-common/strub-unsupported.c  -Wc++-compat   (test for warnings, 
line 8)
+PASS: c-c++-common/strub-unsupported.c  -Wc++-compat   (test for warnings, 
line 11)
+FAIL: c-c++-common/strub-unsupported.c  -Wc++-compat  (test for excess 
errors)

Similar for C++ testing.

The ICE is:

during IPA pass: emutls
[...]/source-gcc/gcc/testsuite/c-c++-common/strub-unsupported-3.c:18:1: 
internal compiler error: in verify_curr_properties, at passes.cc:2198
0x10b671db verify_curr_properties
[...]/source-gcc/gcc/passes.cc:2198
0x10b67ca3 do_per_function
[...]/source-gcc/gcc/passes.cc:1694

I'm certainly fine if we deal with that one incrementally.


I'll answer your (quite right) '__builtin_{frame,stack}_address' remarks
(earlier email) separately, later on.


Grüße
 Thomas


> for  gcc/ChangeLog
>
>   * target.def (have_strub_support_for): New hook.
>   * doc/tm.texi.in: Document it.
>   * doc/tm.texi: Rebuild.
>   * ipa-strub.cc: Include target.h.
>   (strub_target_support_p): New.
>   (can_strub_p): Call it.  Test for no flag_split_stack.
>   (pass_ipa_strub::adjust_at_calls_call): Check for target
>   support.
>   * config/nvptx/nvptx.cc (TARGET_HAVE_STRUB_SUPPORT_FOR):
> Disable.
>   * doc/sourcebuild.texi (strub): Document new effective
>   target.
>
> for  gcc/testsuite/ChangeLog
>
>   * gcc.dg/strub-split-stack.c: New.
>   * gcc.dg/strub-unsupported.c: New.
>   * gcc.dg/strub-unsupported-2.c: New.
>   * gcc.dg/strub-unsupported-3.c: New.
>   * lib/target-supports.exp (check_effective_target_strub): New.
>   * c-c++-common/strub-O0.c: Require effective target strub.
>   * c-c++-common/strub-O1.c: Likewise.
>   * c-c++-common/strub-O2.c: Likewise.
>   * c-c++-common/strub-O2fni.c: Likewise.
>   * c-c++-common/strub-O3.c: Likewise.
>   * c-c++-common/strub-O3fni.c: Likewise.
>   * c-c++-common/strub-Og.c: Likewise.
>   * c-c++-common/strub-Os.c: Likewise.
>   * c-c++-common/strub-all1.c: Likewise.
>   * c-c++-common/strub-all2.c: Likewise.
>   * c-c++-common/strub-apply1.c: Likewise.
>   * c-c++-common/strub-apply2.c: Likewise.
>   * c-c++-common/strub-apply3.c: Likewise.
>   * c-c++-common/strub-apply4.c: Likewise.
>   * c-c++-common/strub-at-calls1.c: Likewise.
>   * c-c++-common/strub-at-calls2.c: Likewise.
>   * c-c++-common/strub-defer-O1.c: Likewise.
>   * c-c++-common/strub-defer-O2.c: Likewise.
>   * c-c++-common/strub-defer-O3.c: Likewise.
>   * c-c++-common/strub-defer-Os.c: Likewise.
>   * 

Re: [PATCH v2 0/3] [GCC] arm: vst1_types_xN ACLE intrinsics

2023-12-07 Thread Richard Earnshaw

Pushed, thanks.

R.


On 07/12/2023 15:28, ezra.sito...@arm.com wrote:

Add xN variants of vst1_types intrinsic.




Re: [PATCH] testsuite: adjust call to abort in excess-precision-12

2023-12-07 Thread Jakub Jelinek
On Thu, Dec 07, 2023 at 05:27:28PM +0100, Marc Poulhiès wrote:
> abort() is not always available, using the builtin as done in other
> tests.
> 
> gcc/testsuite/ChangeLog:
> 
>   * g++.target/i386/excess-precision-12.C: call builtin_abort instead of 
> abort.
> ---
> Tested on x86_64-linux and x86_64-elf.
> 
> Ok for master?
> 
>  gcc/testsuite/g++.target/i386/excess-precision-12.C | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/gcc/testsuite/g++.target/i386/excess-precision-12.C 
> b/gcc/testsuite/g++.target/i386/excess-precision-12.C
> index dff48c07c8b..e59f7c3b1fb 100644
> --- a/gcc/testsuite/g++.target/i386/excess-precision-12.C
> +++ b/gcc/testsuite/g++.target/i386/excess-precision-12.C
> @@ -13,8 +13,8 @@ main (void)
>unsigned long long int u = (1ULL << 63) + 1;
>  
>if ((f <=> u) >= 0)
> -abort ();
> +__builtin_abort ();
>  
>if ((u <=> f) <= 0)
> -abort ();
> +__builtin_abort ();

Why wouldn't they have abort and what else does __builtin_abort () expand
to?
There are 2000+ other tests in gcc.target/i386/ which call abort (),
not __builtin_abort (), after including  directly or indirectly
or declaring it themselves.  This test in particular includes 

Does whatever target you are running this into provide just std::abort ()
and not abort (); from ?  If so, perhaps it should call
std::abort (); instead of abort ().

Jakub



Re: [PATCH] testsuite: require avx_runtime for vect-simd-clone-17f

2023-12-07 Thread Jakub Jelinek
On Thu, Dec 07, 2023 at 05:28:09PM +0100, Marc Poulhiès wrote:
> The test fails parsing the 'vect' dump when not using -mavx. Make the
> dependency explicit.
> 
> gcc/testsuite/ChangeLog:
> 
>   * gcc.dg/vect/vect-simd-clone-17f.c: Add dep on avx_runtime.
> ---
> Tested on x86_64-linux and x86_64-elf.
> 
> Ok for master?
> 
>  gcc/testsuite/gcc.dg/vect/vect-simd-clone-17f.c | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
> 
> diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17f.c 
> b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17f.c
> index 177521dc445..eb2b149981f 100644
> --- a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17f.c
> +++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17f.c
> @@ -1,6 +1,7 @@
>  /* { dg-require-effective-target vect_simd_clones } */
> +/* { dg-require-effective-target avx_runtime } */
>  /* { dg-additional-options "-fopenmp-simd --param vect-epilogues-nomask=0" } 
> */
> -/* { dg-additional-options "-mavx" { target avx_runtime } } */
> +/* { dg-additional-options "-mavx" } */
>  /* { dg-additional-options "-mno-avx512f" { target { { i?86*-*-* x86_64-*-* 
> } && { ! lp64 } } } } */
>  
>  #define TYPE __INT64_TYPE__

This looks wrong, then it won't be tested at all on non-x86 targets.

Jakub



Re: [PATCH] testsuite: add missing dg-require ifunc in pr105554.c

2023-12-07 Thread Jakub Jelinek
On Thu, Dec 07, 2023 at 05:25:39PM +0100, Marc Poulhiès wrote:
> The 'target_clones' attribute depends on the ifunc support.
> 
> gcc/testsuite/ChangeLog:
>   * gcc.target/i386/pr105554.c: Add dg-require ifunc.
> ---
> Tested on x86_64-linux and x86_64-elf.
> 
> Ok for master?
> 
>  gcc/testsuite/gcc.target/i386/pr105554.c | 1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/gcc/testsuite/gcc.target/i386/pr105554.c 
> b/gcc/testsuite/gcc.target/i386/pr105554.c
> index e9ef494270a..420987e5df8 100644
> --- a/gcc/testsuite/gcc.target/i386/pr105554.c
> +++ b/gcc/testsuite/gcc.target/i386/pr105554.c
> @@ -2,6 +2,7 @@
>  /* { dg-do compile } */
>  /* { dg-require-ifunc "" } */
>  /* { dg-options "-O2 -Wno-psabi -mno-sse3" } */
> +/* { dg-require-ifunc "" } */

That is 2 lines above this already...

Jakub



Re: [PATCH] testsuite: scev: expect fail on ilp32

2023-12-07 Thread Hans-Peter Nilsson
> Date: Mon, 4 Dec 2023 12:58:03 +0100 (CET)
> From: Richard Biener 

> On Sat, 2 Dec 2023, Hans-Peter Nilsson wrote:
> > > Date: Fri, 1 Dec 2023 08:07:14 +0100 (CET)
> > > From: Richard Biener 
> > > I read from your messages that the testcases pass on arm*-*-*?
> > Yes: they pass (currently XPASS) on arm-eabi and
> > arm-unknown-linux-gnueabi, default configurations.  But,
> > scev-3 and -5 fail with for example -mcpu=cortex-r5
> 
> I see.  As said, the testcases test for "cost" things, so that we
> "regressed" might mean we really "regressed" here.  Even the x86 -m32
> result is questionable.
> 
> Of course whether using a single IV makes sense for all archs is
> unknown.
> 
> Btw, if we turn the testcases into ones that are (sub-)target
> specific then we want to again use C code as input.
> 
> I think at this point we've lost track and I'm juggling between
> removing the testcases or moving them to a place they succeed
> (with some specific -mcpu=?)
> 
> Richard.

So to not drop the ball(s) on this, here's a patch with your
first alternative: remove them.

Ok?

-- >8 --
Subject: [PATCH] testsuite: Remove gcc.dg/tree-ssa/scev-3.c -4.c and 5.c

These tests were recently xfailed on ilp32 targets though
passing on almost all ilp32 targets (known exceptions: ia32
and some arm subtargets).  They've been changed around too
much to remain useful.

PR testsuite/112786
* gcc.dg/tree-ssa/scev-3.c, gcc.dg/tree-ssa/scev-4.c,
gcc.dg/tree-ssa/scev-5.c: Remove.
---
 gcc/testsuite/gcc.dg/tree-ssa/scev-3.c | 44 ---
 gcc/testsuite/gcc.dg/tree-ssa/scev-4.c | 49 --
 gcc/testsuite/gcc.dg/tree-ssa/scev-5.c | 44 ---
 3 files changed, 137 deletions(-)
 delete mode 100644 gcc/testsuite/gcc.dg/tree-ssa/scev-3.c
 delete mode 100644 gcc/testsuite/gcc.dg/tree-ssa/scev-4.c
 delete mode 100644 gcc/testsuite/gcc.dg/tree-ssa/scev-5.c

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/scev-3.c 
b/gcc/testsuite/gcc.dg/tree-ssa/scev-3.c
deleted file mode 100644
index beea9aed9fe9..
--- a/gcc/testsuite/gcc.dg/tree-ssa/scev-3.c
+++ /dev/null
@@ -1,44 +0,0 @@
-/* { dg-do compile } */
-/* { dg-options "-O2 -fgimple -fdump-tree-ivopts" } */
-
-int *a_p;
-int a[1000];
-
-void __GIMPLE (ssa,startwith ("loop"))
-f (int k)
-{
-  int i;
-  int * _1;
-
-__BB(2):
-  i_5 = k_4(D);
-  if (i_5 <= 999)
-goto __BB4;
-  else
-goto __BB3;
-
-__BB(3):
-  return;
-
-__BB(4):
-  goto __BB5;
-
-__BB(5):
-  i_12 = __PHI (__BB6: i_9, __BB4: i_5);
-  _1 = [i_12];
-  a_p = _1;
-  __MEM  ((int *))[i_12] = 100;
-  i_9 = i_5 + i_12;
-  if (i_9 <= 999)
-goto __BB6;
-  else
-goto __BB3;
-
-__BB(6):
-  ;
-  goto __BB5;
-
-}
-
-/* Not all 32-bit systems fail this, but several do.  */
-/* { dg-final { scan-tree-dump-times "" 1 "ivopts" { xfail ilp32 } } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/scev-4.c 
b/gcc/testsuite/gcc.dg/tree-ssa/scev-4.c
deleted file mode 100644
index a97f75f81f65..
--- a/gcc/testsuite/gcc.dg/tree-ssa/scev-4.c
+++ /dev/null
@@ -1,49 +0,0 @@
-/* { dg-do compile } */
-/* { dg-options "-O2 -fgimple -fdump-tree-ivopts" } */
-
-typedef struct {
-int x;
-int y;
-} S;
-
-int *a_p;
-S a[1000];
-
-void __GIMPLE (ssa, startwith ("loop"))
-f (int k)
-{
-  int i;
-  int * _1;
-
-__BB(2):
-  i_5 = k_4(D);
-  if (i_5 <= 999)
-goto __BB4;
-  else
-goto __BB3;
-
-__BB(3):
-  return;
-
-__BB(4):
-  goto __BB5;
-
-__BB(5):
-  i_12 = __PHI (__BB6: i_9, __BB4: i_5);
-  _1 = [i_12].y;
-  a_p = _1;
-  __MEM  ((int *))[i_12].y = 100;
-  i_9 = i_5 + i_12;
-  if (i_9 <= 999)
-goto __BB6;
-  else
-goto __BB3;
-
-__BB(6):
-  ;
-  goto __BB5;
-
-}
-
-/* Not all 32-bit systems fail this, but several do.  */
-/* { dg-final { scan-tree-dump-times "" 1 "ivopts" { xfail ilp32 } } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/scev-5.c 
b/gcc/testsuite/gcc.dg/tree-ssa/scev-5.c
deleted file mode 100644
index 08f4260403c4..
--- a/gcc/testsuite/gcc.dg/tree-ssa/scev-5.c
+++ /dev/null
@@ -1,44 +0,0 @@
-/* { dg-do compile } */
-/* { dg-options "-O2 -fgimple -fdump-tree-ivopts" } */
-
-int *a_p;
-int a[1000];
-
-void __GIMPLE (ssa,startwith ("loop"))
-f (int k)
-{
-  long long int i;
-  int * _1;
-
-__BB(2):
-  i_5 = (long long int) k_4(D);
-  if (i_5 <= 999ll)
-goto __BB4;
-  else
-goto __BB3;
-
-__BB(3):
-  return;
-
-__BB(4):
-  goto __BB5;
-
-__BB(5):
-  i_12 = __PHI (__BB6: i_9, __BB4: i_5);
-  _1 = [i_12];
-  a_p = _1;
-  __MEM  ((int *))[i_12] = 100;
-  i_9 = i_5 + i_12;
-  if (i_9 <= 999ll)
-goto __BB6;
-  else
-goto __BB3;
-
-__BB(6):
-  ;
-  goto __BB5;
-
-}
-
-/* Not all 32-bit systems fail this, but several do.  */
-/* { dg-final { scan-tree-dump-times "" 1 "ivopts" { xfail ilp32 } } } */
-- 
2.30.2



[PATCH] testsuite: require avx_runtime for vect-simd-clone-17f

2023-12-07 Thread Marc Poulhiès
The test fails parsing the 'vect' dump when not using -mavx. Make the
dependency explicit.

gcc/testsuite/ChangeLog:

* gcc.dg/vect/vect-simd-clone-17f.c: Add dep on avx_runtime.
---
Tested on x86_64-linux and x86_64-elf.

Ok for master?

 gcc/testsuite/gcc.dg/vect/vect-simd-clone-17f.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17f.c 
b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17f.c
index 177521dc445..eb2b149981f 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17f.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17f.c
@@ -1,6 +1,7 @@
 /* { dg-require-effective-target vect_simd_clones } */
+/* { dg-require-effective-target avx_runtime } */
 /* { dg-additional-options "-fopenmp-simd --param vect-epilogues-nomask=0" } */
-/* { dg-additional-options "-mavx" { target avx_runtime } } */
+/* { dg-additional-options "-mavx" } */
 /* { dg-additional-options "-mno-avx512f" { target { { i?86*-*-* x86_64-*-* } 
&& { ! lp64 } } } } */
 
 #define TYPE __INT64_TYPE__
-- 
2.43.0



[PATCH] testsuite: adjust call to abort in excess-precision-12

2023-12-07 Thread Marc Poulhiès
abort() is not always available, using the builtin as done in other
tests.

gcc/testsuite/ChangeLog:

* g++.target/i386/excess-precision-12.C: call builtin_abort instead of 
abort.
---
Tested on x86_64-linux and x86_64-elf.

Ok for master?

 gcc/testsuite/g++.target/i386/excess-precision-12.C | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/testsuite/g++.target/i386/excess-precision-12.C 
b/gcc/testsuite/g++.target/i386/excess-precision-12.C
index dff48c07c8b..e59f7c3b1fb 100644
--- a/gcc/testsuite/g++.target/i386/excess-precision-12.C
+++ b/gcc/testsuite/g++.target/i386/excess-precision-12.C
@@ -13,8 +13,8 @@ main (void)
   unsigned long long int u = (1ULL << 63) + 1;
 
   if ((f <=> u) >= 0)
-abort ();
+__builtin_abort ();
 
   if ((u <=> f) <= 0)
-abort ();
+__builtin_abort ();
 }
-- 
2.43.0



[PATCH] testsuite: add missing dg-require ifunc in pr105554.c

2023-12-07 Thread Marc Poulhiès
The 'target_clones' attribute depends on the ifunc support.

gcc/testsuite/ChangeLog:
* gcc.target/i386/pr105554.c: Add dg-require ifunc.
---
Tested on x86_64-linux and x86_64-elf.

Ok for master?

 gcc/testsuite/gcc.target/i386/pr105554.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/gcc/testsuite/gcc.target/i386/pr105554.c 
b/gcc/testsuite/gcc.target/i386/pr105554.c
index e9ef494270a..420987e5df8 100644
--- a/gcc/testsuite/gcc.target/i386/pr105554.c
+++ b/gcc/testsuite/gcc.target/i386/pr105554.c
@@ -2,6 +2,7 @@
 /* { dg-do compile } */
 /* { dg-require-ifunc "" } */
 /* { dg-options "-O2 -Wno-psabi -mno-sse3" } */
+/* { dg-require-ifunc "" } */
 
 typedef long long v4di __attribute__((__vector_size__(32)));
 
-- 
2.43.0



Re: [PATCH] aarch64: add -fno-stack-protector to tests

2023-12-07 Thread Marek Polacek
On Thu, Dec 07, 2023 at 04:05:47PM +, Richard Sandiford wrote:
> Marek Polacek  writes:
> > Bootstrapped/regtested on aarch64-pc-linux-gnu, ok for trunk/13?
> >
> > -- >8 --
> > These tests fail when the testsuite is executed with 
> > -fstack-protector-strong.
> > To avoid this, this patch adds -fno-stack-protector to dg-options.
> >
> > The list of FAILs is appended.  As you can see, it's mostly about
> > scan-assembler-* which are sort of expected to fail with the stack
> > protector on.
> >
> > FAIL: gcc.target/aarch64/ldp_stp_unaligned_2.c scan-assembler-not 
> > mov\\tx[0-9]+, sp
> > FAIL: gcc.target/aarch64/shadow_call_stack_5.c scan-assembler-times 
> > stptx29, x30, [sp] 1
> > FAIL: gcc.target/aarch64/shadow_call_stack_5.c scan-assembler ldrtx29, 
> > [sp]
> > FAIL: gcc.target/aarch64/shadow_call_stack_6.c scan-assembler-times 
> > strtx30, [sp] 1
> > FAIL: gcc.target/aarch64/shadow_call_stack_7.c scan-assembler-times 
> > stptx19, x30, [sp, -[0-9]+]! 1
> > FAIL: gcc.target/aarch64/shadow_call_stack_7.c scan-assembler ldrtx19, 
> > [sp], [0-9]+
> > FAIL: gcc.target/aarch64/shadow_call_stack_8.c scan-assembler-times 
> > stptx19, x20, [sp, -[0-9]+]! 1
> > FAIL: gcc.target/aarch64/shadow_call_stack_8.c scan-assembler ldptx19, 
> > x20, [sp], [0-9]+
> > FAIL: gcc.target/aarch64/stack-check-12.c scan-assembler-times strtxzr, 
> >  2
> > FAIL: gcc.target/aarch64/stack-check-prologue-11.c scan-assembler-times 
> > strs+xzr, [sp, 1024] 1
> > FAIL: gcc.target/aarch64/stack-check-prologue-12.c scan-assembler-times 
> > strs+xzr, [sp, 1024] 1
> > FAIL: gcc.target/aarch64/stack-check-prologue-13.c scan-assembler-times 
> > strs+xzr, [sp, 1024] 1
> > FAIL: gcc.target/aarch64/stack-check-prologue-13.c scan-assembler-times 
> > strs+x30, [sp] 1
> > FAIL: gcc.target/aarch64/stack-check-prologue-14.c scan-assembler-times 
> > strs+xzr, [sp, 1024] 1
> > FAIL: gcc.target/aarch64/stack-check-prologue-14.c scan-assembler-times 
> > strs+x30, [sp] 1
> > FAIL: gcc.target/aarch64/stack-check-prologue-15.c scan-assembler-times 
> > strs+xzr, [sp, 1024] 1
> > FAIL: gcc.target/aarch64/stack-check-prologue-15.c scan-assembler-times 
> > strs+x30, [sp] 1
> > FAIL: gcc.target/aarch64/stack-check-prologue-17.c check-function-bodies 
> > test1
> > FAIL: gcc.target/aarch64/stack-check-prologue-17.c check-function-bodies 
> > test2
> > FAIL: gcc.target/aarch64/stack-check-prologue-18.c check-function-bodies 
> > test1
> > FAIL: gcc.target/aarch64/stack-check-prologue-18.c check-function-bodies 
> > test2
> > FAIL: gcc.target/aarch64/stack-check-prologue-18.c check-function-bodies 
> > test3
> > FAIL: gcc.target/aarch64/stack-check-prologue-19.c check-function-bodies 
> > test1
> > FAIL: gcc.target/aarch64/stack-check-prologue-19.c check-function-bodies 
> > test2
> > FAIL: gcc.target/aarch64/stack-check-prologue-19.c check-function-bodies 
> > test3
> > FAIL: gcc.target/aarch64/stack-check-prologue-2.c scan-assembler-times 
> > strs+xzr, 0
> > FAIL: gcc.target/aarch64/stack-check-prologue-5.c scan-assembler-times 
> > strs+xzr, [sp, 1024] 1
> > FAIL: gcc.target/aarch64/stack-check-prologue-6.c scan-assembler-times 
> > strs+xzr, [sp, 1024] 1
> > FAIL: gcc.target/aarch64/stack-check-prologue-8.c scan-assembler-times 
> > strs+xzr, [sp, 1024] 2
> > FAIL: gcc.target/aarch64/stack-check-prologue-9.c scan-assembler-times 
> > strs+xzr, [sp, 1024] 1
> > FAIL: gcc.target/aarch64/test_frame_1.c scan-assembler-times str\\tx30, 
> > [sp, -[0-9]+]! 2
> > FAIL: gcc.target/aarch64/test_frame_10.c scan-assembler-times stp\\tx19, 
> > x30, [sp, [0-9]+] 1
> > FAIL: gcc.target/aarch64/test_frame_10.c scan-assembler ldp\\tx19, x30, 
> > [sp, [0-9]+]
> > FAIL: gcc.target/aarch64/test_frame_11.c scan-assembler-times stp\\tx29, 
> > x30, [sp, -[0-9]+]! 2
> > FAIL: gcc.target/aarch64/test_frame_13.c scan-assembler-times stp\\tx29, 
> > x30, [sp] 1
> > FAIL: gcc.target/aarch64/test_frame_15.c scan-assembler-times stp\\tx29, 
> > x30, [sp, [0-9]+] 1
> > FAIL: gcc.target/aarch64/test_frame_2.c scan-assembler-times stp\\tx19, 
> > x30, [sp, -[0-9]+]! 1
> > FAIL: gcc.target/aarch64/test_frame_2.c scan-assembler ldp\\tx19, x30, 
> > [sp], [0-9]+
> > FAIL: gcc.target/aarch64/test_frame_4.c scan-assembler-times stp\\tx19, 
> > x30, [sp, -[0-9]+]! 1
> > FAIL: gcc.target/aarch64/test_frame_4.c scan-assembler ldp\\tx19, x30, 
> > [sp], [0-9]+
> > FAIL: gcc.target/aarch64/test_frame_6.c scan-assembler-times str\\tx30, 
> > [sp] 1
> > FAIL: gcc.target/aarch64/test_frame_7.c scan-assembler-times stp\\tx19, 
> > x30, [sp] 1
> > FAIL: gcc.target/aarch64/test_frame_8.c scan-assembler-times str\\tx30, 
> > [sp, [0-9]+] 1
> > FAIL: 

Re: [PATCH] aarch64: add -fno-stack-protector to tests

2023-12-07 Thread Richard Sandiford
Marek Polacek  writes:
> Bootstrapped/regtested on aarch64-pc-linux-gnu, ok for trunk/13?
>
> -- >8 --
> These tests fail when the testsuite is executed with -fstack-protector-strong.
> To avoid this, this patch adds -fno-stack-protector to dg-options.
>
> The list of FAILs is appended.  As you can see, it's mostly about
> scan-assembler-* which are sort of expected to fail with the stack
> protector on.
>
> FAIL: gcc.target/aarch64/ldp_stp_unaligned_2.c scan-assembler-not 
> mov\\tx[0-9]+, sp
> FAIL: gcc.target/aarch64/shadow_call_stack_5.c scan-assembler-times 
> stptx29, x30, [sp] 1
> FAIL: gcc.target/aarch64/shadow_call_stack_5.c scan-assembler ldrtx29, 
> [sp]
> FAIL: gcc.target/aarch64/shadow_call_stack_6.c scan-assembler-times 
> strtx30, [sp] 1
> FAIL: gcc.target/aarch64/shadow_call_stack_7.c scan-assembler-times 
> stptx19, x30, [sp, -[0-9]+]! 1
> FAIL: gcc.target/aarch64/shadow_call_stack_7.c scan-assembler ldrtx19, 
> [sp], [0-9]+
> FAIL: gcc.target/aarch64/shadow_call_stack_8.c scan-assembler-times 
> stptx19, x20, [sp, -[0-9]+]! 1
> FAIL: gcc.target/aarch64/shadow_call_stack_8.c scan-assembler ldptx19, 
> x20, [sp], [0-9]+
> FAIL: gcc.target/aarch64/stack-check-12.c scan-assembler-times strtxzr,  2
> FAIL: gcc.target/aarch64/stack-check-prologue-11.c scan-assembler-times 
> strs+xzr, [sp, 1024] 1
> FAIL: gcc.target/aarch64/stack-check-prologue-12.c scan-assembler-times 
> strs+xzr, [sp, 1024] 1
> FAIL: gcc.target/aarch64/stack-check-prologue-13.c scan-assembler-times 
> strs+xzr, [sp, 1024] 1
> FAIL: gcc.target/aarch64/stack-check-prologue-13.c scan-assembler-times 
> strs+x30, [sp] 1
> FAIL: gcc.target/aarch64/stack-check-prologue-14.c scan-assembler-times 
> strs+xzr, [sp, 1024] 1
> FAIL: gcc.target/aarch64/stack-check-prologue-14.c scan-assembler-times 
> strs+x30, [sp] 1
> FAIL: gcc.target/aarch64/stack-check-prologue-15.c scan-assembler-times 
> strs+xzr, [sp, 1024] 1
> FAIL: gcc.target/aarch64/stack-check-prologue-15.c scan-assembler-times 
> strs+x30, [sp] 1
> FAIL: gcc.target/aarch64/stack-check-prologue-17.c check-function-bodies test1
> FAIL: gcc.target/aarch64/stack-check-prologue-17.c check-function-bodies test2
> FAIL: gcc.target/aarch64/stack-check-prologue-18.c check-function-bodies test1
> FAIL: gcc.target/aarch64/stack-check-prologue-18.c check-function-bodies test2
> FAIL: gcc.target/aarch64/stack-check-prologue-18.c check-function-bodies test3
> FAIL: gcc.target/aarch64/stack-check-prologue-19.c check-function-bodies test1
> FAIL: gcc.target/aarch64/stack-check-prologue-19.c check-function-bodies test2
> FAIL: gcc.target/aarch64/stack-check-prologue-19.c check-function-bodies test3
> FAIL: gcc.target/aarch64/stack-check-prologue-2.c scan-assembler-times 
> strs+xzr, 0
> FAIL: gcc.target/aarch64/stack-check-prologue-5.c scan-assembler-times 
> strs+xzr, [sp, 1024] 1
> FAIL: gcc.target/aarch64/stack-check-prologue-6.c scan-assembler-times 
> strs+xzr, [sp, 1024] 1
> FAIL: gcc.target/aarch64/stack-check-prologue-8.c scan-assembler-times 
> strs+xzr, [sp, 1024] 2
> FAIL: gcc.target/aarch64/stack-check-prologue-9.c scan-assembler-times 
> strs+xzr, [sp, 1024] 1
> FAIL: gcc.target/aarch64/test_frame_1.c scan-assembler-times str\\tx30, 
> [sp, -[0-9]+]! 2
> FAIL: gcc.target/aarch64/test_frame_10.c scan-assembler-times stp\\tx19, x30, 
> [sp, [0-9]+] 1
> FAIL: gcc.target/aarch64/test_frame_10.c scan-assembler ldp\\tx19, x30, 
> [sp, [0-9]+]
> FAIL: gcc.target/aarch64/test_frame_11.c scan-assembler-times stp\\tx29, x30, 
> [sp, -[0-9]+]! 2
> FAIL: gcc.target/aarch64/test_frame_13.c scan-assembler-times stp\\tx29, x30, 
> [sp] 1
> FAIL: gcc.target/aarch64/test_frame_15.c scan-assembler-times stp\\tx29, x30, 
> [sp, [0-9]+] 1
> FAIL: gcc.target/aarch64/test_frame_2.c scan-assembler-times stp\\tx19, x30, 
> [sp, -[0-9]+]! 1
> FAIL: gcc.target/aarch64/test_frame_2.c scan-assembler ldp\\tx19, x30, 
> [sp], [0-9]+
> FAIL: gcc.target/aarch64/test_frame_4.c scan-assembler-times stp\\tx19, x30, 
> [sp, -[0-9]+]! 1
> FAIL: gcc.target/aarch64/test_frame_4.c scan-assembler ldp\\tx19, x30, 
> [sp], [0-9]+
> FAIL: gcc.target/aarch64/test_frame_6.c scan-assembler-times str\\tx30, 
> [sp] 1
> FAIL: gcc.target/aarch64/test_frame_7.c scan-assembler-times stp\\tx19, x30, 
> [sp] 1
> FAIL: gcc.target/aarch64/test_frame_8.c scan-assembler-times str\\tx30, 
> [sp, [0-9]+] 1
> FAIL: gcc.target/aarch64/test_frame_8.c scan-assembler ldr\\tx30, [sp, 
> [0-9]+]
> FAIL: gcc.target/aarch64/sve/struct_vect_24.c scan-assembler-times 
> cmps+x[0-9]+, 61440 4
> FAIL: gcc.target/aarch64/sve/struct_vect_24.c scan-assembler-times 
> subs+x[0-9]+, x[0-9]+, 61440 4
> FAIL: 

Re: [PATCH] driver: Fix memory leak.

2023-12-07 Thread Jakub Jelinek
On Thu, Dec 07, 2023 at 04:01:11PM +, Costas Argyris wrote:
> Thanks for all the explanations.
> 
> In that case I restrict this patch to just freeing the buffer from
> within driver::finalize only (I think it should be XDELETEVEC
> instead of XDELETE, no?).

Both macros are exactly the same, but XDELETEVEC is probably better
counterpart to XNEWVEC.

Jakub



RE: [ARC PATCH] Add *extvsi_n_0 define_insn_and_split for PR 110717.

2023-12-07 Thread Roger Sayle

Hi Jeff,
Doh!  Great catch.  The perils of not (yet) being able to actually
run any ARC execution tests myself.

> Shouldn't operands[4] be GEN_INT ((HOST_WIDE_INT_1U << tmp) - 1)?
Yes(-ish), operands[4] should be GEN_INT(HOST_WIDE_INT_1U << (tmp - 1)).

And the 32s in the test cases need to be 16s (the MSB of a five bit field is 
16).

You're probably also thinking the same thing that I am... that it might be 
possible
to implement this in the middle-end, but things are complicated by combine's
make_compound_operation/expand_compound_operation, and that
combine doesn't (normally) like turning two instructions into three.

Fingers-crossed the attached patch works better on the nightly testers.

Thanks in advance,
Roger
--

> -Original Message-
> From: Jeff Law 
> Sent: 07 December 2023 14:47
> To: Roger Sayle ; gcc-patches@gcc.gnu.org
> Cc: 'Claudiu Zissulescu' 
> Subject: Re: [ARC PATCH] Add *extvsi_n_0 define_insn_and_split for PR 110717.
> 
> On 12/5/23 06:59, Roger Sayle wrote:
> > This patch improves the code generated for bitfield sign extensions on
> > ARC cpus without a barrel shifter.
> >
> >
> > Compiling the following test case:
> >
> > int foo(int x) { return (x<<27)>>27; }
> >
> > with -O2 -mcpu=em, generates two loops:
> >
> > foo:mov lp_count,27
> >  lp  2f
> >  add r0,r0,r0
> >  nop
> > 2:  # end single insn loop
> >  mov lp_count,27
> >  lp  2f
> >  asr r0,r0
> >  nop
> > 2:  # end single insn loop
> >  j_s [blink]
> >
> >
> > and the closely related test case:
> >
> > struct S { int a : 5; };
> > int bar (struct S *p) { return p->a; }
> >
> > generates the slightly better:
> >
> > bar:ldb_s   r0,[r0]
> >  mov_s   r2,0;3
> >  add3r0,r2,r0
> >  sexb_s  r0,r0
> >  asr_s   r0,r0
> >  asr_s   r0,r0
> >  j_s.d   [blink]
> >  asr_s   r0,r0
> >
> > which uses 6 instructions to perform this particular sign extension.
> > It turns out that sign extensions can always be implemented using at
> > most three instructions on ARC (without a barrel shifter) using the
> > idiom ((x)^msb)-msb [as described in section "2-5 Sign Extension"
> > of Henry Warren's book "Hacker's Delight"].  Using this, the sign
> > extensions above on ARC's EM both become:
> >
> >  bmsk_s  r0,r0,4
> >  xor r0,r0,32
> >  sub r0,r0,32
> >
> > which takes about 3 cycles, compared to the ~112 cycles for the loops
> > in foo.
> >
> >
> > Tested with a cross-compiler to arc-linux hosted on x86_64, with no
> > new (compile-only) regressions from make -k check.
> > Ok for mainline if this passes Claudiu's nightly testing?
> >
> >
> > 2023-12-05  Roger Sayle
> >
> > gcc/ChangeLog
> >  * config/arc/arc.md (*extvsi_n_0): New define_insn_and_split to
> >  implement SImode sign extract using a AND, XOR and MINUS sequence.
> >
> > gcc/testsuite/ChangeLog
> >  * gcc.target/arc/extvsi-1.c: New test case.
> >  * gcc.target/arc/extvsi-2.c: Likewise.
> >
> >
> > Thanks in advance,
> > Roger
> > --
> >
> >
> > patchar.txt
> >
> > diff --git a/gcc/config/arc/arc.md b/gcc/config/arc/arc.md index
> > bf9f88eff047..5ebaf2e20ab0 100644
> > --- a/gcc/config/arc/arc.md
> > +++ b/gcc/config/arc/arc.md
> > @@ -6127,6 +6127,26 @@ archs4x, archs4xd"
> > ""
> > [(set_attr "length" "8")])
> >
> > +(define_insn_and_split "*extvsi_n_0"
> > +  [(set (match_operand:SI 0 "register_operand" "=r")
> > +   (sign_extract:SI (match_operand:SI 1 "register_operand" "0")
> > +(match_operand:QI 2 "const_int_operand")
> > +(const_int 0)))]
> > +  "!TARGET_BARREL_SHIFTER
> > +   && IN_RANGE (INTVAL (operands[2]), 2,
> > +   (optimize_insn_for_size_p () ? 28 : 30))"
> > +  "#"
> > +  "&& 1"
> > +[(set (match_dup 0) (and:SI (match_dup 0) (match_dup 3)))  (set
> > +(match_dup 0) (xor:SI (match_dup 0) (match_dup 4)))  (set (match_dup
> > +0) (minus:SI (match_dup 0) (match_dup 4)))] {
> > +  int tmp = INTVAL (operands[2]);
> > +  operands[3] = GEN_INT (~(HOST_WIDE_INT_M1U << tmp));
> > +  operands[4] = GEN_INT (HOST_WIDE_INT_1U << tmp);
> Shouldn't operands[4] be GEN_INT ((HOST_WIDE_INT_1U << tmp) - 1)?
> Otherwise it's flipping the wrong bit AFAICT.
> 
> H8 can benefit from the same transformation which is how I found this little 
> goof.
> It's not as big a gain as ARC, but it does affect one of those 
> builtin-overflow tests
> which tend to dominate testing time on the H8.
> 
> jeff
diff --git a/gcc/config/arc/arc.md b/gcc/config/arc/arc.md
index bf9f88eff047..d980876eff8f 100644
--- a/gcc/config/arc/arc.md
+++ b/gcc/config/arc/arc.md
@@ -6127,6 +6127,26 @@ archs4x, archs4xd"
   ""
   [(set_attr "length" "8")])
 
+(define_insn_and_split "*extvsi_n_0"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+   (sign_extract:SI (match_operand:SI 1 "register_operand" "0")
+  

Re: [PATCH] driver: Fix memory leak.

2023-12-07 Thread Costas Argyris
Thanks for all the explanations.

In that case I restrict this patch to just freeing the buffer from
within driver::finalize only (I think it should be XDELETEVEC
instead of XDELETE, no?).

On Thu, 7 Dec 2023 at 15:42, Jakub Jelinek  wrote:

> On Thu, Dec 07, 2023 at 03:16:29PM +, Costas Argyris wrote:
> > >  Still reachable memory at exit e.g. from valgrind is not a bug.
> >
> > Indeed, this is coming from a valgrind report here:
> >
> > https://gcc.gnu.org/bugzilla/show_bug.cgi?id=93019
> >
> > where it was noted that the driver memory leaks could be
> > problematic for JIT.
>
> In the invoke_embedded_driver JIT case yes, that calls driver::finalize (),
> which is why it should be freed before clearing the pointer in there (as
> then it is a real leak).
>
> > So, since using std::vector did reduce the valgrind records
> > by one (I only targeted a single variable to begin with) I took
> > that as a good sign.
> >
> > Regarding adding a call to XDELETE (mdswitches), yes,
> > that would help in the case where driver::finalize () is actually
> > called, which I think is for JIT.I was trying to take care of the
> > case where it doesn't get called as well, but from what you say
> > I take it that this case is not of interest.
>
> That is wasted compile time on a non-issue.
>
> If you see a JIT issue with definitely lost records, that is something
> that obviously should be fixed (but even in that area I think we've been a
> little bit lazy in the option handling).
> The most important is that the actual compiler binaries (cc1, cc1plus, ...)
> don't leak memory (in the definitely lost kind) like crazy, we have
> --enable-checking=valgrind
> for that purpose, where the driver runs cc1/cc1plus etc. under valgrind,
> but this is very expensive and slow, so usually it is run once during a
> cycle (if at all), on a fast machine could take even in non-bootstrap mode
> a weekend to go through the whole testsuite, then one can look at the
> leaks.
>
> Jakub
>
>


0001-driver-Fix-memory-leak-in-driver-finalize.patch
Description: Binary data


Re: [PATCH] c-family: Fix up -fno-debug-cpp [PR111965]

2023-12-07 Thread Marek Polacek
On Thu, Dec 07, 2023 at 08:53:37AM +0100, Jakub Jelinek wrote:
> Hi!
> 
> As can be seen in the second testcase, -fno-debug-cpp is actually
> implemented the same as -fdebug-cpp and so doesn't turn the debugging
> off.
> 
> The following patch fixes that.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

Ok, thanks.
 
> 2023-12-07  Andrew Pinski  
>   Jakub Jelinek  
> 
>   PR preprocessor/111965
> gcc/c-family/
>   * c-opts.cc (c_common_handle_option) : Set
>   cpp_opts->debug to value rather than 1.
> gcc/testsuite/
>   * gcc.dg/cpp/pr111965-1.c: New test.
>   * gcc.dg/cpp/pr111965-2.c: New test.
> 
> --- gcc/c-family/c-opts.cc.jj 2023-12-05 09:06:05.867881859 +0100
> +++ gcc/c-family/c-opts.cc2023-12-06 18:02:20.445469185 +0100
> @@ -532,7 +532,7 @@ c_common_handle_option (size_t scode, co
>break;
>  
>  case OPT_fdebug_cpp:
> -  cpp_opts->debug = 1;
> +  cpp_opts->debug = value;
>break;
>  
>  case OPT_ftrack_macro_expansion:
> --- gcc/testsuite/gcc.dg/cpp/pr111965-1.c.jj  2023-12-06 17:54:03.696424916 
> +0100
> +++ gcc/testsuite/gcc.dg/cpp/pr111965-1.c 2023-12-06 18:01:32.341142764 
> +0100
> @@ -0,0 +1,5 @@
> +/* PR preprocessor/111965
> +   { dg-do preprocess }
> +   { dg-options "-fdebug-cpp" }
> +   { dg-final { scan-file pr111965-1.i "P:;F:;" } } */
> +int x;
> --- gcc/testsuite/gcc.dg/cpp/pr111965-2.c.jj  2023-12-06 17:59:36.953758477 
> +0100
> +++ gcc/testsuite/gcc.dg/cpp/pr111965-2.c 2023-12-06 18:01:27.147215490 
> +0100
> @@ -0,0 +1,5 @@
> +/* PR preprocessor/111965
> +   { dg-do preprocess }
> +   { dg-options "-fdebug-cpp -fno-debug-cpp" }
> +   { dg-final { scan-file-not pr111965-2.i "P:;F:;" } } */
> +int x;
> 
>   Jakub
> 

Marek



[PATCH V3 3/4] OpenMP: Use enumerators for names of trait-sets and traits

2023-12-07 Thread Sandra Loosemore
This patch introduces enumerators to represent trait-set names and
trait names, which makes it easier to use tables to control other
behavior and for switch statements to dispatch on the tags.  The tags
are stored in the same place in the TREE_LIST structure (OMP_TSS_ID or
OMP_TS_ID) and are encoded there as integer constants.

gcc/ChangeLog
* omp-selectors.h: New file.
* omp-general.h: Include omp-selectors.h.
(OMP_TSS_CODE, OMP_TSS_NAME): New.
(OMP_TS_CODE, OMP_TS_NAME): New.
(make_trait_set_selector, make_trait_selector): Adjust declarations.
(omp_construct_traits_to_codes): Likewise.
(omp_context_selector_set_compare): Likewise.
(omp_get_context_selector): Likewise.
(omp_get_context_selector_list): New.
* omp-general.cc (omp_construct_traits_to_codes): Pass length in
as argument instead of returning it.  Make it table-driven.
(omp_tss_map): New.
(kind_properties, vendor_properties, extension_properties): New.
(atomic_default_mem_order_properties): New.
(omp_ts_map): New.
(omp_check_context_selector): Simplify lookup and dispatch logic.
(omp_mark_declare_variant): Ignore variants with unknown construct
selectors.  Adjust for new representation.
(make_trait_set_selector, make_trait_selector): Adjust for new
representations.
(omp_context_selector_matches): Simplify dispatch logic.  Avoid
fixed-sized buffers and adjust call to omp_construct_traits_to_codes.
(omp_context_selector_props_compare): Adjust for new representations
and simplify dispatch logic.
(omp_context_selector_set_compare): Likewise.
(omp_context_selector_compare): Likewise.
(omp_get_context_selector): Adjust for new representations, and split
out...
(omp_get_context_selector_list): New function.
(omp_lookup_tss_code): New.
(omp_lookup_ts_code): New.
(omp_context_compute_score): Adjust for new representations.  Avoid
fixed-sized buffers and magic numbers.  Adjust call to
omp_construct_traits_to_codes.
* gimplify.cc (omp_construct_selector_matches): Avoid use of
fixed-size buffer.  Adjust call to omp_construct_traits_to_codes.

gcc/c/ChangeLog
* c-parser.cc (omp_construct_selectors): Delete.
(omp_device_selectors): Delete.
(omp_implementation_selectors): Delete.
(omp_user_selectors): Delete.
(c_parser_omp_context_selector): Adjust for new representations
and simplify dispatch logic.  Uniformly warn instead of sometimes
error when an unknown selector is found.
(c_parser_omp_context_selector_specification): Likewise.
(c_finish_omp_declare_variant): Adjust for new representations.

gcc/cp/ChangeLog
* decl.cc (omp_declare_variant_finalize_one): Adjust for new
representations.
* parser.cc (omp_construct_selectors): Delete.
(omp_device_selectors): Delete.
(omp_implementation_selectors): Delete.
(omp_user_selectors): Delete.
(cp_parser_omp_context_selector): Adjust for new representations
and simplify dispatch logic.  Uniformly warn instead of sometimes
error when an unknown selector is found.
(cp_parser_omp_context_selector_specification): Likewise.
* pt.cc (tsubst_attribute): Adjust for new representations.

gcc/fortran/ChangeLog
* gfortran.h: Include omp-selectors.h.
(enum gfc_omp_trait_property_kind): Delete, and replace all
references with equivalent omp_tp_type enumerators.
(struct gfc_omp_trait_property): Update for omp_tp_type.
(struct gfc_omp_selector): Replace string name with new enumerator.
(struct gfc_omp_set_selector): Likewise.
* openmp.cc (gfc_free_omp_trait_property_list): Update for
omp_tp_type.
(omp_construct_selectors): Delete.
(omp_device_selectors): Delete.
(omp_implementation_selectors): Delete.
(omp_user_selectors): Delete.
(gfc_ignore_trait_property_extension): New.
(gfc_ignore_trait_property_extension_list): New.
(gfc_match_omp_selector): Adjust for new representations and simplify
dispatch logic.  Uniformly warn instead of sometimes error when an
unknown selector is found.
(gfc_match_omp_context_selector): Adjust for new representations.
(gfc_match_omp_context_selector_specification): Likewise.
* trans-openmp.cc (gfc_trans_omp_declare_variant): Adjust for
new representations.

gcc/testsuite/
* c-c++-common/gomp/declare-variant-1.c: Expect warning on
unknown selectors.
* c-c++-common/gomp/declare-variant-2.c: Likewise.
* gfortran.dg/gomp/declare-variant-1.f90: Likewise.
* gfortran.dg/gomp/declare-variant-2.f90: Likewise.
---
 gcc/c/c-parser.cc | 185 

[PATCH V3 4/4] OpenMP: Permit additional selector properties

2023-12-07 Thread Sandra Loosemore
This patch adds "hpe" to the known properties for the "vendor" selector,
and support for "acquire" and "release" for "atomic_default_mem_order".

gcc/ChangeLog
* omp-general.cc (vendor_properties): Add "hpe".
(atomic_default_mem_order_properties): Add "acquire" and "release".
(omp_context_selector_matches): Handle "acquire" and "release".

gcc/testsuite/ChangeLog
* c-c++-common/gomp/declare-variant-2.c: Don't expect error on
"acquire" and "release".
* gfortran.dg/gomp/declare-variant-2a.f90: Likewise.
---
 gcc/omp-general.cc| 10 --
 gcc/testsuite/c-c++-common/gomp/declare-variant-2.c   |  4 ++--
 gcc/testsuite/gfortran.dg/gomp/declare-variant-2a.f90 |  4 ++--
 3 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/gcc/omp-general.cc b/gcc/omp-general.cc
index 5f0cb041ffa..4f7c83fbd2c 100644
--- a/gcc/omp-general.cc
+++ b/gcc/omp-general.cc
@@ -1126,12 +1126,12 @@ const char *omp_tss_map[] =
 static const char *const kind_properties[] =
   { "host", "nohost", "cpu", "gpu", "fpga", "any", NULL };
 static const char *const vendor_properties[] =
-  { "amd", "arm", "bsc", "cray", "fujitsu", "gnu", "ibm", "intel",
+  { "amd", "arm", "bsc", "cray", "fujitsu", "gnu", "hpe", "ibm", "intel",
 "llvm", "nvidia", "pgi", "ti", "unknown", NULL };
 static const char *const extension_properties[] =
   { NULL };
 static const char *const atomic_default_mem_order_properties[] =
-  { "seq_cst", "relaxed", "acq_rel", NULL };
+  { "seq_cst", "relaxed", "acq_rel", "acquire", "release", NULL };
 
 struct omp_ts_info omp_ts_map[] =
   {
@@ -1551,6 +1551,12 @@ omp_context_selector_matches (tree ctx)
  else if (!strcmp (prop, "acq_rel")
   && omo != OMP_MEMORY_ORDER_ACQ_REL)
return 0;
+ else if (!strcmp (prop, "acquire")
+  && omo != OMP_MEMORY_ORDER_ACQUIRE)
+   return 0;
+ else if (!strcmp (prop, "release")
+  && omo != OMP_MEMORY_ORDER_RELEASE)
+   return 0;
}
  break;
case OMP_TRAIT_DEVICE_ARCH:
diff --git a/gcc/testsuite/c-c++-common/gomp/declare-variant-2.c 
b/gcc/testsuite/c-c++-common/gomp/declare-variant-2.c
index 97285fa3b74..bc3f443379f 100644
--- a/gcc/testsuite/c-c++-common/gomp/declare-variant-2.c
+++ b/gcc/testsuite/c-c++-common/gomp/declare-variant-2.c
@@ -105,9 +105,9 @@ void f50 (void);
/* { dg-error "expected '\\\}' before '\\(' token" "" {
 void f51 (void);   
/* { dg-error "expected '\\\}' before '\\(' token" "" { target c } .-1 } */
 #pragma omp declare variant (f1) 
match(implementation={atomic_default_mem_order})  /* { dg-error "expected 
'\\(' before '\\\}' token" } */
 void f52 (void);
-#pragma omp declare variant (f1) 
match(implementation={atomic_default_mem_order(acquire)}) /* { dg-error 
"incorrect property 'acquire' of 'atomic_default_mem_order' selector" } */
+#pragma omp declare variant (f1) 
match(implementation={atomic_default_mem_order(acquire)})
 void f53 (void);
-#pragma omp declare variant (f1) 
match(implementation={atomic_default_mem_order(release)}) /* { dg-error 
"incorrect property 'release' of 'atomic_default_mem_order' selector" } */
+#pragma omp declare variant (f1) 
match(implementation={atomic_default_mem_order(release)})
 void f54 (void);
 #pragma omp declare variant (f1) 
match(implementation={atomic_default_mem_order(foobar)})  /* { dg-error 
"incorrect property 'foobar' of 'atomic_default_mem_order' selector" } */
 void f55 (void);
diff --git a/gcc/testsuite/gfortran.dg/gomp/declare-variant-2a.f90 
b/gcc/testsuite/gfortran.dg/gomp/declare-variant-2a.f90
index 56de1177789..edc9b27f884 100644
--- a/gcc/testsuite/gfortran.dg/gomp/declare-variant-2a.f90
+++ b/gcc/testsuite/gfortran.dg/gomp/declare-variant-2a.f90
@@ -29,10 +29,10 @@ contains
 !$omp declare variant (f1) match(implementation={vendor("foobar")}) ! { 
dg-warning "unknown property '.foobar.' of 'vendor' selector" }
   end subroutine
   subroutine f53 ()
-!$omp declare variant (f1) 
match(implementation={atomic_default_mem_order(acquire)})  ! { dg-error 
"incorrect property 'acquire' of 'atomic_default_mem_order' selector" }
+!$omp declare variant (f1) 
match(implementation={atomic_default_mem_order(acquire)})
   end subroutine
   subroutine f54 ()
-!$omp declare variant (f1) 
match(implementation={atomic_default_mem_order(release)})  ! { dg-error 
"incorrect property 'release' of 'atomic_default_mem_order' selector" }
+!$omp declare variant (f1) 
match(implementation={atomic_default_mem_order(release)})
   end subroutine
   subroutine f55 ()
 !$omp declare variant (f1) 
match(implementation={atomic_default_mem_order(foobar)}) ! { dg-error 
"incorrect 

[PATCH V3 1/4] OpenMP: Introduce accessor macros and constructors for context selectors.

2023-12-07 Thread Sandra Loosemore
This patch hides the underlying nested TREE_LIST structure of context
selectors behind accessor macros that have more meaningful names than
the generic TREE_PURPOSE/TREE_VALUE accessors.  There is a slight
change to the representation in that the score expression in
trait-selectors has a distinguished tag and is separated from the
ordinary properties, although internally it is still represented as
the first item in the TREE_VALUE of the selector.  This patch also renames
some local variables with slightly more descriptive names so it is easier
to track whether something is a selector-set, selector, or property.

gcc/ChangeLog
* omp-general.h (OMP_TS_SCORE_NODE): New.
(OMP_TSS_ID, OMP_TSS_TRAIT_SELECTORS): New.
(OMP_TS_ID, OMP_TS_SCORE, OMP_TS_PROPERTIES): New.
(OMP_TP_NAME, OMP_TP_VALUE): New.
(make_trait_set_selector): Declare.
(make_trait_selector): Declare.
(make_trait_property): Declare.
(omp_constructor_traits_to_codes): Rename to
omp_construct_traits_to_codes.
* omp-general.cc (omp_constructor_traits_to_codes): Rename
to omp_construct_traits_to_codes.  Use new accessors.
(omp_check_context_selector): Use new accessors.
(make_trait_set_selector): New.
(make_trait_selector): New.
(make_trait_property): New.
(omp_context_name_list_prop): Use new accessors.
(omp_context_selector_matches): Use new accessors.
(omp_context_selector_props_compare): Use new accessors.
(omp_context_selector_set_compare): Use new accessors.
(omp_get_context_selector): Use new accessors.
(omp_context_compute_score): Use new accessors.
* gimplify.cc (omp_construct_selector_matches): Adjust for renaming
of omp_constructor_traits_to_codes.

gcc/c/ChangeLog
* c-parser.cc (c_parser_omp_context_selector): Use new constructors.

gcc/cp/ChangeLog
* parser.cc (cp_parser_omp_context_selector): Use new constructors.
* pt.cc: Include omp-general.h.
(tsubst_attribute): Use new context selector accessors and
 constructors.

gcc/fortran/ChangeLog
* trans-openmp.cc (gfc_trans_omp_declare_variant): Use new
constructors.
---
 gcc/c/c-parser.cc   |  27 ++--
 gcc/cp/parser.cc|  30 ++--
 gcc/cp/pt.cc|  82 ++
 gcc/fortran/trans-openmp.cc |  27 ++--
 gcc/gimplify.cc |   4 +-
 gcc/omp-general.cc  | 293 ++--
 gcc/omp-general.h   |  48 +-
 7 files changed, 297 insertions(+), 214 deletions(-)

diff --git a/gcc/c/c-parser.cc b/gcc/c/c-parser.cc
index df9a07928b5..9a17a8c5760 100644
--- a/gcc/c/c-parser.cc
+++ b/gcc/c/c-parser.cc
@@ -24353,7 +24353,10 @@ static const char *const omp_user_selectors[] = {
  trait-selector-name[([trait-score:]trait-property[,trait-property[,...]])]
 
trait-score:
- score(score-expression)  */
+ score(score-expression)
+
+   Note that this function returns a list of trait selectors for the
+   trait-selector-set SET.  */
 
 static tree
 c_parser_omp_context_selector (c_parser *parser, tree set, tree parms)
@@ -24372,6 +24375,7 @@ c_parser_omp_context_selector (c_parser *parser, tree 
set, tree parms)
}
 
   tree properties = NULL_TREE;
+  tree scoreval = NULL_TREE;
   const char *const *selectors = NULL;
   bool allow_score = true;
   bool allow_user = false;
@@ -24478,8 +24482,7 @@ c_parser_omp_context_selector (c_parser *parser, tree 
set, tree parms)
error_at (token->location, "score argument must be "
  "non-negative");
  else
-   properties = tree_cons (get_identifier (" score"),
-   score, properties);
+   scoreval = score;
}
  token = c_parser_peek_token (parser);
}
@@ -24492,7 +24495,8 @@ c_parser_omp_context_selector (c_parser *parser, tree 
set, tree parms)
{
  t = c_parser_expr_no_commas (parser, NULL).value;
  if (TREE_CODE (t) == STRING_CST)
-   properties = tree_cons (NULL_TREE, t, properties);
+   properties = make_trait_property (NULL_TREE, t,
+ properties);
  else if (t != error_mark_node)
{
  mark_exp_read (t);
@@ -24503,7 +24507,8 @@ c_parser_omp_context_selector (c_parser *parser, tree 
set, tree parms)
  "constant integer expression or string "
  "literal");
  else
-   properties = tree_cons (NULL_TREE, t, properties);
+   properties = make_trait_property (NULL_TREE, t,
+ properties);

[PATCH V3 2/4] OpenMP: Unify representation of name-list properties.

2023-12-07 Thread Sandra Loosemore
Previously, name-list properties specified as identifiers were stored
in the TREE_PURPOSE/OMP_TP_NAME slot, while those specified as strings
were stored in the TREE_VALUE/OMP_TP_VALUE slot.  This patch puts both
representations in OMP_TP_VALUE with a magic cookie in OMP_TP_NAME.

gcc/ChangeLog
* omp-general.h (OMP_TP_NAMELIST_NODE): New.
* omp-general.cc (omp_context_name_list_prop): Move earlier
in the file, and adjust for new representation.
(omp_check_context_selector): Adjust this too.
(omp_context_selector_props_compare): Likewise.

gcc/c/ChangeLog
* c-parser.cc (c_parser_omp_context_selector): Adjust for new
namelist property representation.

gcc/cp/ChangeLog
* parser.cc (cp_parser_omp_context_selector): Adjust for new
namelist property representation.
* pt.cc (tsubst_attribute): Likewise.

gcc/fortran/ChangeLog
* trans-openmp.cc (gfc_trans_omp_declare_varaint): Adjust for
new namelist property representation.
---
 gcc/c/c-parser.cc   |  5 ++-
 gcc/cp/parser.cc|  5 ++-
 gcc/cp/pt.cc|  4 +-
 gcc/fortran/trans-openmp.cc |  5 ++-
 gcc/omp-general.cc  | 84 +
 gcc/omp-general.h   |  1 +
 6 files changed, 61 insertions(+), 43 deletions(-)

diff --git a/gcc/c/c-parser.cc b/gcc/c/c-parser.cc
index 9a17a8c5760..7fe449dc54a 100644
--- a/gcc/c/c-parser.cc
+++ b/gcc/c/c-parser.cc
@@ -24538,11 +24538,12 @@ c_parser_omp_context_selector (c_parser *parser, tree 
set, tree parms)
case CTX_PROPERTY_NAME_LIST:
  do
{
- tree prop = NULL_TREE, value = NULL_TREE;
+ tree prop = OMP_TP_NAMELIST_NODE;
+ tree value = NULL_TREE;
  if (c_parser_next_token_is (parser, CPP_KEYWORD)
  || c_parser_next_token_is (parser, CPP_NAME))
{
- prop = c_parser_peek_token (parser)->value;
+ value = c_parser_peek_token (parser)->value;
  c_parser_consume_token (parser);
}
  else if (c_parser_next_token_is (parser, CPP_STRING))
diff --git a/gcc/cp/parser.cc b/gcc/cp/parser.cc
index 9d4ae9726e5..eb1b0f26003 100644
--- a/gcc/cp/parser.cc
+++ b/gcc/cp/parser.cc
@@ -47539,11 +47539,12 @@ cp_parser_omp_context_selector (cp_parser *parser, 
tree set, bool has_parms_p)
case CTX_PROPERTY_NAME_LIST:
  do
{
- tree prop = NULL_TREE, value = NULL_TREE;
+ tree prop = OMP_TP_NAMELIST_NODE;
+ tree value = NULL_TREE;
  if (cp_lexer_next_token_is (parser->lexer, CPP_KEYWORD)
  || cp_lexer_next_token_is (parser->lexer, CPP_NAME))
{
- prop = cp_lexer_peek_token (parser->lexer)->u.value;
+ value = cp_lexer_peek_token (parser->lexer)->u.value;
  cp_lexer_consume_token (parser->lexer);
}
  else if (cp_lexer_next_token_is (parser->lexer, CPP_STRING))
diff --git a/gcc/cp/pt.cc b/gcc/cp/pt.cc
index 2541ad95cc1..ca4975dcd6f 100644
--- a/gcc/cp/pt.cc
+++ b/gcc/cp/pt.cc
@@ -11869,7 +11869,9 @@ tsubst_attribute (tree t, tree *decl_p, tree args,
}
  properties = copy_list (OMP_TS_PROPERTIES (ts));
  for (tree p = properties; p; p = TREE_CHAIN (p))
-   if (OMP_TP_VALUE (p))
+   if (OMP_TP_NAME (p) == OMP_TP_NAMELIST_NODE)
+ continue;
+   else if (OMP_TP_VALUE (p))
  {
bool allow_string
  = (OMP_TS_ID (ts) != condition || set[0] != 'u');
diff --git a/gcc/fortran/trans-openmp.cc b/gcc/fortran/trans-openmp.cc
index fe8044a57cd..60154ff3751 100644
--- a/gcc/fortran/trans-openmp.cc
+++ b/gcc/fortran/trans-openmp.cc
@@ -8235,9 +8235,10 @@ gfc_trans_omp_declare_variant (gfc_namespace *ns)
  break;
case CTX_PROPERTY_NAME_LIST:
  {
-   tree prop = NULL_TREE, value = NULL_TREE;
+   tree prop = OMP_TP_NAMELIST_NODE;
+   tree value = NULL_TREE;
if (otp->is_name)
- prop = get_identifier (otp->name);
+ value = get_identifier (otp->name);
else
  value = gfc_conv_constant_to_tree (otp->expr);
 
diff --git a/gcc/omp-general.cc b/gcc/omp-general.cc
index 75980fcf4da..156508eabb5 100644
--- a/gcc/omp-general.cc
+++ b/gcc/omp-general.cc
@@ -1114,6 +1114,30 @@ omp_maybe_offloaded (void)
   return false;
 }
 
+/* Return a name from PROP, a property in selectors accepting
+   name lists.  */
+
+static const char *

[PATCH V3 0/4] OpenMP: Improve data abstractions for context selectors

2023-12-07 Thread Sandra Loosemore
Here is a new version of my context selector implementation cleanup
patch set, incorporating comments from Tobias on V2 of part 3.  Parts
1 and 2 are unchanged from V1 except that I rebased them so they
should apply cleanly to mainline head now.  There's a new part 4 that
adds new functionality, handling the additional properties Tobias asked
for.

The part 3 changes from V2 are primarily in making the front ends pass
through a marker for unknown selectors to later processing, instead of
ignoring them completely; anything that contains these unknown
selectors then fails to match.  As Tobias suggested, I also changed
the front ends to uniformly warn about any unknown selector;
previously it either gave no diagnostic at all or gave an error,
depending on the selector set.  To get this to work in the Fortran
front end, I moved the mapping from strings to the new enums to the
parse phase instead of the translate phase, where I'd had it before.
I think this is a cleaner design anyway as it allows getting rid of the
duplicate set of allowed names in the Fortran front end and sharing the
same tables used everywhere else.

-Sandra


Sandra Loosemore (4):
  OpenMP: Introduce accessor macros and constructors for context
selectors.
  OpenMP: Unify representation of name-list properties.
  OpenMP: Use enumerators for names of trait-sets and traits
  OpenMP: Permit additional selector properties

 gcc/c/c-parser.cc | 205 ++---
 gcc/cp/decl.cc|   8 +-
 gcc/cp/parser.cc  | 205 ++---
 gcc/cp/pt.cc  |  93 ++-
 gcc/fortran/gfortran.h|  20 +-
 gcc/fortran/openmp.cc | 189 ++---
 gcc/fortran/trans-openmp.cc   |  66 +-
 gcc/gimplify.cc   |  17 +-
 gcc/omp-general.cc| 778 +++---
 gcc/omp-general.h |  66 +-
 gcc/omp-selectors.h   |  93 +++
 .../c-c++-common/gomp/declare-variant-1.c |   1 +
 .../c-c++-common/gomp/declare-variant-2.c |  44 +-
 .../gfortran.dg/gomp/declare-variant-1.f90|   1 +
 .../gfortran.dg/gomp/declare-variant-2.f90|  34 +-
 .../gfortran.dg/gomp/declare-variant-2a.f90   |   4 +-
 16 files changed, 1015 insertions(+), 809 deletions(-)
 create mode 100644 gcc/omp-selectors.h

-- 
2.31.1



Re: [gcc15] nested functions in C

2023-12-07 Thread Siddhesh Poyarekar

On 2023-12-07 10:42, Eric Botcazou wrote:

I think from a language standpoint, the general idea that nested
functions are just any functions inside functions (which is how the C
nested functions essentially behave) is too broad and they should be
restricted to minimal implementations that, e.g. don't have side-effects
or if they do, there's explicit syntactic sugar to make it clearer.


That sounds totally arbitrary though.  Algol-derived languages have had nested
subprograms for ages, e.g. Pascal or Ada, and they can be very useful.


I'll admit that it is a subjective preference and is probably not in the 
spirit of traditional C.


Sid


Re: [PATCH] driver: Fix memory leak.

2023-12-07 Thread Jakub Jelinek
On Thu, Dec 07, 2023 at 03:16:29PM +, Costas Argyris wrote:
> >  Still reachable memory at exit e.g. from valgrind is not a bug.
> 
> Indeed, this is coming from a valgrind report here:
> 
> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=93019
> 
> where it was noted that the driver memory leaks could be
> problematic for JIT.

In the invoke_embedded_driver JIT case yes, that calls driver::finalize (),
which is why it should be freed before clearing the pointer in there (as
then it is a real leak).

> So, since using std::vector did reduce the valgrind records
> by one (I only targeted a single variable to begin with) I took
> that as a good sign.
> 
> Regarding adding a call to XDELETE (mdswitches), yes,
> that would help in the case where driver::finalize () is actually
> called, which I think is for JIT.I was trying to take care of the
> case where it doesn't get called as well, but from what you say
> I take it that this case is not of interest.

That is wasted compile time on a non-issue.

If you see a JIT issue with definitely lost records, that is something
that obviously should be fixed (but even in that area I think we've been a
little bit lazy in the option handling).
The most important is that the actual compiler binaries (cc1, cc1plus, ...)
don't leak memory (in the definitely lost kind) like crazy, we have
--enable-checking=valgrind
for that purpose, where the driver runs cc1/cc1plus etc. under valgrind,
but this is very expensive and slow, so usually it is run once during a
cycle (if at all), on a fast machine could take even in non-bootstrap mode
a weekend to go through the whole testsuite, then one can look at the leaks.

Jakub



Re: [gcc15] nested functions in C

2023-12-07 Thread Eric Botcazou
> I think from a language standpoint, the general idea that nested
> functions are just any functions inside functions (which is how the C
> nested functions essentially behave) is too broad and they should be
> restricted to minimal implementations that, e.g. don't have side-effects
> or if they do, there's explicit syntactic sugar to make it clearer.

That sounds totally arbitrary though.  Algol-derived languages have had nested 
subprograms for ages, e.g. Pascal or Ada, and they can be very useful.

-- 
Eric Botcazou




[PATCH v2 1/3] [GCC] arm: vld1_types_x2 ACLE intrinsics

2023-12-07 Thread Ezra.Sitorus
From: Ezra Sitorus 

This patch is part of a series of patches implementing the _xN
variants of the vld1 intrinsic for the arm port. This patch adds the
_x2 variants of the vld1 intrinsic.

The previous vld1_x2 has been updated to vld1q_x2 to take into
account that it works with 4-word-length types. vld1_x2 is now
only for 2-word-length types.

ACLE documents:
https://developer.arm.com/documentation/ihi0053/latest/

ISA documents:
https://developer.arm.com/documentation/ddi0487/latest/

gcc/ChangeLog:
* config/arm/arm_neon.h
(vld1_u8_x2, vld1_u16_x2, vld1_u32_x2, vld1_u64_x2): New
(vld1_s8_x2, vld1_s16_x2, vld1_s32_x2, vld1_s64_x2): New.
(vld1_f16_x2, vld1_f32_x2): New.
(vld1_p8_x2, vld1_p16_x2, vld1_p64_x2): New.
(vld1_bf16_x2): New.
(vld1q_types_x2): Updated to use vld1q_x2 from
arm_neon_builtins.def
* config/arm/arm_neon_builtins.def
(vld1_x2): Updated entries.
(vld1q_x2): New entries, but comes from the old vld1_x2
* config/arm/neon.md
(neon_vld1_x2): Updated
from neon_vld1_x2.

gcc/testsuite/ChangeLog:
* gcc.target/arm/simd/vld1_base_xN_1.c: Add new tests.
* gcc.target/arm/simd/vld1_bf16_xN_1.c: Add new tests.
* gcc.target/arm/simd/vld1_fp16_xN_1.c: Add new tests.
* gcc.target/arm/simd/vld1_p64_xN_1.c: Add new tests.
---
 gcc/config/arm/arm_neon.h | 156 --
 gcc/config/arm/arm_neon_builtins.def  |   3 +-
 gcc/config/arm/neon.md|  10 +-
 .../gcc.target/arm/simd/vld1_base_xN_1.c  |  66 
 .../gcc.target/arm/simd/vld1_bf16_xN_1.c  |  13 ++
 .../gcc.target/arm/simd/vld1_fp16_xN_1.c  |  13 ++
 .../gcc.target/arm/simd/vld1_p64_xN_1.c   |  13 ++
 7 files changed, 254 insertions(+), 20 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/arm/simd/vld1_base_xN_1.c
 create mode 100644 gcc/testsuite/gcc.target/arm/simd/vld1_bf16_xN_1.c
 create mode 100644 gcc/testsuite/gcc.target/arm/simd/vld1_fp16_xN_1.c
 create mode 100644 gcc/testsuite/gcc.target/arm/simd/vld1_p64_xN_1.c

diff --git a/gcc/config/arm/arm_neon.h b/gcc/config/arm/arm_neon.h
index af1f747f262..669b8fffb40 100644
--- a/gcc/config/arm/arm_neon.h
+++ b/gcc/config/arm/arm_neon.h
@@ -10307,6 +10307,15 @@ vld1_p64 (const poly64_t * __a)
   return (poly64x1_t) { *__a };
 }
 
+__extension__ extern __inline poly64x1x2_t
+__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
+vld1_p64_x2 (const poly64_t * __a)
+{
+  union { poly64x1x2_t __i; __builtin_neon_ti __o; } __rv;
+  __rv.__o = __builtin_neon_vld1_x2di ((const __builtin_neon_di *) __a);
+  return __rv.__i;
+}
+
 #pragma GCC pop_options
 __extension__ extern __inline int8x8_t
 __attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
@@ -10336,6 +10345,42 @@ vld1_s64 (const int64_t * __a)
   return (int64x1_t) { *__a };
 }
 
+__extension__ extern __inline int8x8x2_t
+__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
+vld1_s8_x2 (const int8_t * __a)
+{
+  union { int8x8x2_t __i; __builtin_neon_ti __o; } __rv;
+  __rv.__o = __builtin_neon_vld1_x2v8qi ((const __builtin_neon_qi *) __a);
+  return __rv.__i;
+}
+
+__extension__ extern __inline int16x4x2_t
+__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
+vld1_s16_x2 (const int16_t * __a)
+{
+  union { int16x4x2_t __i; __builtin_neon_ti __o; } __rv;
+  __rv.__o = __builtin_neon_vld1_x2v4hi ((const __builtin_neon_hi *) __a);
+  return __rv.__i;
+}
+
+__extension__ extern __inline int32x2x2_t
+__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
+vld1_s32_x2 (const int32_t * __a)
+{
+  union { int32x2x2_t __i; __builtin_neon_ti __o; } __rv;
+  __rv.__o = __builtin_neon_vld1_x2v2si ((const __builtin_neon_si *) __a);
+  return __rv.__i;
+}
+
+__extension__ extern __inline int64x1x2_t
+__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
+vld1_s64_x2 (const int64_t * __a)
+{
+  union { int64x1x2_t __i; __builtin_neon_ti __o; } __rv;
+  __rv.__o = __builtin_neon_vld1_x2di ((const __builtin_neon_di *) __a);
+  return __rv.__i;
+}
+
 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
 __extension__ extern __inline float16x4_t
 __attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
@@ -10352,6 +10397,26 @@ vld1_f32 (const float32_t * __a)
   return (float32x2_t)__builtin_neon_vld1v2sf ((const __builtin_neon_sf *) 
__a);
 }
 
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+__extension__ extern __inline float16x4x2_t
+__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
+vld1_f16_x2 (const float16_t * __a)
+{
+  union { float16x4x2_t __i; __builtin_neon_ti __o; } __rv;
+  __rv.__o = __builtin_neon_vld1_x2v4hf (__a);
+  return __rv.__i;
+}
+#endif
+
+__extension__ extern __inline float32x2x2_t
+__attribute__  ((__always_inline__, __gnu_inline__, 

  1   2   >