Re: [PATCH v1] RISC-V: Fix one bug for floating-point static frm

2023-07-03 Thread juzhe.zh...@rivai.ai
LGTM



juzhe.zh...@rivai.ai
 
From: pan2.li
Date: 2023-07-04 13:50
To: gcc-patches
CC: juzhe.zhong; rdapp.gcc; jeffreyalaw; pan2.li; yanzhang.wang; kito.cheng
Subject: [PATCH v1] RISC-V: Fix one bug for floating-point static frm
From: Pan Li 
 
This patch would like to fix one bug to align below items of spec.
 
1. By default, the RVV floating-point will take dyn mode.
2. DYN is invalid in FRM register for RVV floating-point.
 
When mode switching the function entry and exit, it will take DYN as
the frm mode.
 
Signed-off-by: Pan Li 
 
gcc/ChangeLog:
 
* config/riscv/riscv.cc (riscv_emit_mode_set): Avoid emit insn
when FRM_MODE_DYN.
(riscv_mode_entry): Take FRM_MODE_DYN as entry mode.
(riscv_mode_exit): Likewise for exit mode.
 
gcc/testsuite/ChangeLog:
 
* gcc.target/riscv/rvv/base/float-point-frm-insert-6.c: New test.
---
gcc/config/riscv/riscv.cc |  6 ++--
.../riscv/rvv/base/float-point-frm-insert-6.c | 31 +++
2 files changed, 34 insertions(+), 3 deletions(-)
create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/base/float-point-frm-insert-6.c
 
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index e4dc8115e69..f5fe910426e 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -7670,7 +7670,7 @@ riscv_emit_mode_set (int entity, int mode, int prev_mode,
emit_insn (gen_vxrmsi (gen_int_mode (mode, SImode)));
   break;
 case RISCV_FRM:
-  if (mode != FRM_MODE_NONE && mode != prev_mode)
+  if (mode != FRM_MODE_DYN && mode != prev_mode)
{
  rtx scaler = gen_reg_rtx (SImode);
  rtx imm = gen_int_mode (mode, SImode);
@@ -7774,7 +7774,7 @@ riscv_mode_entry (int entity)
 case RISCV_VXRM:
   return VXRM_MODE_NONE;
 case RISCV_FRM:
-  return FRM_MODE_NONE;
+  return FRM_MODE_DYN;
 default:
   gcc_unreachable ();
 }
@@ -7791,7 +7791,7 @@ riscv_mode_exit (int entity)
 case RISCV_VXRM:
   return VXRM_MODE_NONE;
 case RISCV_FRM:
-  return FRM_MODE_NONE;
+  return FRM_MODE_DYN;
 default:
   gcc_unreachable ();
 }
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/float-point-frm-insert-6.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/float-point-frm-insert-6.c
new file mode 100644
index 000..6d896e0953e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/float-point-frm-insert-6.c
@@ -0,0 +1,31 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64 -O3 -Wno-psabi" } */
+
+#include "riscv_vector.h"
+
+typedef float float32_t;
+
+vfloat32m1_t
+test_riscv_vfadd_vv_f32m1_rm (vfloat32m1_t op1, vfloat32m1_t op2, size_t vl) {
+  return __riscv_vfadd_vv_f32m1_rm (op1, op2, 7, vl);
+}
+
+vfloat32m1_t
+test_vfadd_vv_f32m1_m_rm(vbool32_t mask, vfloat32m1_t op1, vfloat32m1_t op2,
+ size_t vl) {
+  return __riscv_vfadd_vv_f32m1_m_rm(mask, op1, op2, 7, vl);
+}
+
+vfloat32m1_t
+test_vfadd_vf_f32m1_rm(vfloat32m1_t op1, float32_t op2, size_t vl) {
+  return __riscv_vfadd_vf_f32m1_rm(op1, op2, 7, vl);
+}
+
+vfloat32m1_t
+test_vfadd_vf_f32m1_m_rm(vbool32_t mask, vfloat32m1_t op1, float32_t op2,
+ size_t vl) {
+  return __riscv_vfadd_vf_f32m1_m_rm(mask, op1, op2, 7, vl);
+}
+
+/* { dg-final { scan-assembler-times 
{vfadd\.v[vf]\s+v[0-9]+,\s*v[0-9]+,\s*[fav]+[0-9]+} 4 } } */
+/* { dg-final { scan-assembler-not {fsrm\s+[ax][0-9]+,\s*[ax][0-9]+} } } */
-- 
2.34.1
 
 


[PATCH v1] RISC-V: Fix one bug for floating-point static frm

2023-07-03 Thread Pan Li via Gcc-patches
From: Pan Li 

This patch would like to fix one bug to align below items of spec.

1. By default, the RVV floating-point will take dyn mode.
2. DYN is invalid in FRM register for RVV floating-point.

When mode switching the function entry and exit, it will take DYN as
the frm mode.

Signed-off-by: Pan Li 

gcc/ChangeLog:

* config/riscv/riscv.cc (riscv_emit_mode_set): Avoid emit insn
when FRM_MODE_DYN.
(riscv_mode_entry): Take FRM_MODE_DYN as entry mode.
(riscv_mode_exit): Likewise for exit mode.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/float-point-frm-insert-6.c: New test.
---
 gcc/config/riscv/riscv.cc |  6 ++--
 .../riscv/rvv/base/float-point-frm-insert-6.c | 31 +++
 2 files changed, 34 insertions(+), 3 deletions(-)
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/base/float-point-frm-insert-6.c

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index e4dc8115e69..f5fe910426e 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -7670,7 +7670,7 @@ riscv_emit_mode_set (int entity, int mode, int prev_mode,
emit_insn (gen_vxrmsi (gen_int_mode (mode, SImode)));
   break;
 case RISCV_FRM:
-  if (mode != FRM_MODE_NONE && mode != prev_mode)
+  if (mode != FRM_MODE_DYN && mode != prev_mode)
{
  rtx scaler = gen_reg_rtx (SImode);
  rtx imm = gen_int_mode (mode, SImode);
@@ -7774,7 +7774,7 @@ riscv_mode_entry (int entity)
 case RISCV_VXRM:
   return VXRM_MODE_NONE;
 case RISCV_FRM:
-  return FRM_MODE_NONE;
+  return FRM_MODE_DYN;
 default:
   gcc_unreachable ();
 }
@@ -7791,7 +7791,7 @@ riscv_mode_exit (int entity)
 case RISCV_VXRM:
   return VXRM_MODE_NONE;
 case RISCV_FRM:
-  return FRM_MODE_NONE;
+  return FRM_MODE_DYN;
 default:
   gcc_unreachable ();
 }
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/float-point-frm-insert-6.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/float-point-frm-insert-6.c
new file mode 100644
index 000..6d896e0953e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/float-point-frm-insert-6.c
@@ -0,0 +1,31 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64 -O3 -Wno-psabi" } */
+
+#include "riscv_vector.h"
+
+typedef float float32_t;
+
+vfloat32m1_t
+test_riscv_vfadd_vv_f32m1_rm (vfloat32m1_t op1, vfloat32m1_t op2, size_t vl) {
+  return __riscv_vfadd_vv_f32m1_rm (op1, op2, 7, vl);
+}
+
+vfloat32m1_t
+test_vfadd_vv_f32m1_m_rm(vbool32_t mask, vfloat32m1_t op1, vfloat32m1_t op2,
+size_t vl) {
+  return __riscv_vfadd_vv_f32m1_m_rm(mask, op1, op2, 7, vl);
+}
+
+vfloat32m1_t
+test_vfadd_vf_f32m1_rm(vfloat32m1_t op1, float32_t op2, size_t vl) {
+  return __riscv_vfadd_vf_f32m1_rm(op1, op2, 7, vl);
+}
+
+vfloat32m1_t
+test_vfadd_vf_f32m1_m_rm(vbool32_t mask, vfloat32m1_t op1, float32_t op2,
+size_t vl) {
+  return __riscv_vfadd_vf_f32m1_m_rm(mask, op1, op2, 7, vl);
+}
+
+/* { dg-final { scan-assembler-times 
{vfadd\.v[vf]\s+v[0-9]+,\s*v[0-9]+,\s*[fav]+[0-9]+} 4 } } */
+/* { dg-final { scan-assembler-not {fsrm\s+[ax][0-9]+,\s*[ax][0-9]+} } } */
-- 
2.34.1



[Bug target/110170] Sub-optimal conditional jumps in conditional-swap with floating point

2023-07-03 Thread crazylht at gmail dot com via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110170

--- Comment #10 from Hongtao.liu  ---
There're couple of other issues.
1. rtx_cost for and/ior/xor:SF/DF is not right, it actually generate vector
instructions.
2. branch_cost is COSTS_N_INSN(1) instead of BRANCH_COST ().
which make noce more conservative to eliminate condition.
w/ sse2, backend tries

(insn 34 0 36 (set (reg:DF 86 [ _1 ])
(reg:DF 82 [ _1 ])) 151 {*movdf_internal}
 (nil))

(insn 36 34 37 (set (reg:DF 92)
(unspec:DF [
(reg:DF 83 [ _2 ])
(reg:DF 82 [ _1 ])
] UNSPEC_IEEE_MAX)) -1
 (nil))

(insn 37 36 38 (set (reg:DF 93)
(lt:DF (reg:DF 82 [ _1 ])
(reg:DF 83 [ _2 ]))) -1
 (nil))

(insn 38 37 39 (set (reg:DF 94)
(and:DF (reg:DF 86 [ _1 ])
(reg:DF 93))) -1
 (nil))

(insn 39 38 40 (set (reg:DF 95)
(and:DF (not:DF (reg:DF 93))
(reg:DF 83 [ _2 ]))) -1
 (nil))

(insn 40 39 41 (set (reg:DF 83 [ _2 ])
(ior:DF (reg:DF 95)
(reg:DF 94))) -1
 (nil))

(insn 41 40 0 (set (reg:DF 82 [ _1 ])
(reg:DF 92)) 151 {*movdf_internal}
 (nil))

which is cost is 28, and original cost is 12 (3 moves + 1 branch).(needs also
conside comparison? since it's counted in cmov seq), if use ix86_branch_cost +
count comparison cost in the orginal seq, then the cost should be 28 vs 28.)


(insn 5 17 6 3 (set (reg:DF 86 [ _1 ])
(reg:DF 82 [ _1 ]))
"/export/users/liuhongt/tools-build/build_intel-innersource_pr110170_debug/test.c":5:23
151 {*movdf_internal}
 (expr_list:REG_DEAD (reg:DF 82 [ _1 ])
(nil)))
(insn 6 5 7 3 (set (reg:DF 82 [ _1 ])
(reg:DF 83 [ _2 ]))
"/export/users/liuhongt/tools-build/build_intel-innersource_pr110170_debug/test.c":6:15
discrim 1 151 {*movdf_internal}
 (expr_list:REG_DEAD (reg:DF 83 [ _2 ])
(nil)))
(insn 7 6 18 3 (set (reg:DF 83 [ _2 ])
(reg:DF 86 [ _1 ]))
"/export/users/liuhongt/tools-build/build_intel-innersource_pr110170_debug/test.c":5:23
discrim 1 151 {*movdf_internal}
 (expr_list:REG_DEAD (reg:DF 86 [ _1 ])
(nil)))

[Bug tree-optimization/110531] Vect: slp_done_for_suggested_uf is not initialized in tree-vect-loop.cc

2023-07-03 Thread hliu at amperecomputing dot com via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110531

--- Comment #5 from Hao Liu  ---
BTW, there is no warning is probably because the original code is too
complicated and not inlined. 
Compile the simple case by "g++ -O3 -S -Wall hello.c":
int foo(bool a) {
  bool b;
  if (a || b)
return 1;
  b = true;
  return 0;
}

gcc report warning:
hello.c: In function ‘int foo(bool)’:
hello.c:4:9: warning: ‘b’ is used uninitialized [-Wuninitialized]
4 |   if (a || b)
  |   ~~^~~~

[Bug tree-optimization/110531] Vect: slp_done_for_suggested_uf is not initialized in tree-vect-loop.cc

2023-07-03 Thread hliu at amperecomputing dot com via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110531

--- Comment #4 from Hao Liu  ---
> IMHO, the initialization with false is unnecessary and very likely it isn't 
> able to get optimized, it seems worse from this point of view.

Sorry. I don't think so. See more at
https://www.oreilly.com/library/view/c-coding-standards/0321113586/ch20.html:

Start with a clean slate: Uninitialized variables are a common source of bugs
in C and C++ programs. There are few reasons to ever leave a variable
uninitialized. None is serious enough to justify the hazard of undefined
behavior.

Re: [PATCH V4 1/4] rs6000: build constant via li;rotldi

2023-07-03 Thread Kewen.Lin via Gcc-patches
Hi Jeff,

on 2023/7/4 10:18, Jiufu Guo via Gcc-patches wrote:
> Hi,
> 
> If a constant is possible to be rotated to/from a positive or negative
> value from "li", then "li;rotldi" can be used to build the constant.
> 
> Compare with the previous version:
> https://gcc.gnu.org/pipermail/gcc-patches/2023-June/621961.html
> This patch just did minor changes to the style and comments.
> 
> Bootstrap and regtest pass on ppc64{,le}.
> 
> Since the previous version is approved with conditions, this version
> explained the concern too.  If no objection, I would like to apply
> this patch to trunk.
> 
> 
> BR,
> Jeff (Jiufu)
> 
> gcc/ChangeLog:
> 
>   * config/rs6000/rs6000.cc (can_be_built_by_li_and_rotldi): New function.
>   (rs6000_emit_set_long_const): Call can_be_built_by_li_and_rotldi.
> 
> gcc/testsuite/ChangeLog:
> 
>   * gcc.target/powerpc/const-build.c: New test.
> ---
>  gcc/config/rs6000/rs6000.cc   | 47 +--
>  .../gcc.target/powerpc/const-build.c  | 57 +++
>  2 files changed, 98 insertions(+), 6 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/const-build.c
> 
> diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
> index 42f49e4a56b..acc332acc05 100644
> --- a/gcc/config/rs6000/rs6000.cc
> +++ b/gcc/config/rs6000/rs6000.cc
> @@ -10258,6 +10258,31 @@ rs6000_emit_set_const (rtx dest, rtx source)
>return true;
>  }
>  
> +/* Check if value C can be built by 2 instructions: one is 'li', another is
> +   rotldi.

Nit: different style, li is with "'" but rotldi isn't.

> +
> +   If so, *SHIFT is set to the shift operand of rotldi(rldicl), and *MASK
> +   is set to the mask operand of rotldi(rldicl), and return true.
> +   Return false otherwise.  */
> +
> +static bool
> +can_be_built_by_li_and_rotldi (HOST_WIDE_INT c, int *shift,
> +HOST_WIDE_INT *mask)
> +{
> +  /* If C or ~C contains at least 49 successive zeros, then C can be rotated
> + to/from a positive or negative value that 'li' is able to load.  */
> +  int n;
> +  if (can_be_rotated_to_lowbits (c, 15, )
> +  || can_be_rotated_to_lowbits (~c, 15, ))
> +{
> +  *mask = HOST_WIDE_INT_M1;
> +  *shift = HOST_BITS_PER_WIDE_INT - n;
> +  return true;
> +}
> +
> +  return false;
> +}
> +
>  /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode.
> Output insns to set DEST equal to the constant C as a series of
> lis, ori and shl instructions.  */
> @@ -10266,15 +10291,14 @@ static void
>  rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c)
>  {
>rtx temp;
> +  int shift;
> +  HOST_WIDE_INT mask;
>HOST_WIDE_INT ud1, ud2, ud3, ud4;
>  
>ud1 = c & 0x;
> -  c = c >> 16;
> -  ud2 = c & 0x;
> -  c = c >> 16;
> -  ud3 = c & 0x;
> -  c = c >> 16;
> -  ud4 = c & 0x;
> +  ud2 = (c >> 16) & 0x;
> +  ud3 = (c >> 32) & 0x;
> +  ud4 = (c >> 48) & 0x;
>  
>if ((ud4 == 0x && ud3 == 0x && ud2 == 0x && (ud1 & 0x8000))
>|| (ud4 == 0 && ud3 == 0 && ud2 == 0 && ! (ud1 & 0x8000)))
> @@ -10305,6 +10329,17 @@ rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT 
> c)
>emit_move_insn (dest, gen_rtx_XOR (DImode, temp,
>GEN_INT ((ud2 ^ 0x) << 16)));
>  }
> +  else if (can_be_built_by_li_and_rotldi (c, , ))
> +{
> +  temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
> +  unsigned HOST_WIDE_INT imm = (c | ~mask);
> +  imm = (imm >> shift) | (imm << (HOST_BITS_PER_WIDE_INT - shift));
> +
> +  emit_move_insn (temp, GEN_INT (imm));
> +  if (shift != 0)
> + temp = gen_rtx_ROTATE (DImode, temp, GEN_INT (shift));
> +  emit_move_insn (dest, temp);
> +}
>else if (ud3 == 0 && ud4 == 0)
>  {
>temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
> diff --git a/gcc/testsuite/gcc.target/powerpc/const-build.c 
> b/gcc/testsuite/gcc.target/powerpc/const-build.c
> new file mode 100644
> index 000..69b37e2bb53
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/const-build.c
> @@ -0,0 +1,57 @@
> +/* { dg-do run } */
> +/* { dg-options "-O2 -save-temps" } */
> +/* { dg-require-effective-target has_arch_ppc64 } */
> +
> +/* Verify that two instructions are sucessfully used to build constants.

s/sucessfully/successfully/

> +   One insn is li or lis, another is rotate: rldicl, rldicr or rldic.  */

Nit: This patch is for insn li + insn rldicl only, you probably want to keep
consistent in the comments.

The others look good to me, thanks!

Segher had one question on "~c" before, I saw you had explained for it, it
makes sense to me, but in case he has more questions I'd defer the final
approval to him.

BR,
Kewen


RE: [PATCH v1] RISC-V: Fix one typo of FRM dynamic definition

2023-07-03 Thread Li, Pan2 via Gcc-patches
Hi Robin,

Just revert this patch, it reports some weird illegal instr, I may need more 
time for this.

Pan

-Original Message-
From: Li, Pan2 
Sent: Monday, July 3, 2023 11:00 PM
To: Robin Dapp ; juzhe.zh...@rivai.ai; gcc-patches 

Cc: jeffreyalaw ; Wang, Yanzhang 
; kito.cheng 
Subject: RE: [PATCH v1] RISC-V: Fix one typo of FRM dynamic definition

Sure, every change need test and will pay attention for this in future.

Pan

-Original Message-
From: Robin Dapp  
Sent: Monday, July 3, 2023 10:57 PM
To: Li, Pan2 ; juzhe.zh...@rivai.ai; gcc-patches 

Cc: rdapp@gmail.com; jeffreyalaw ; Wang, Yanzhang 
; kito.cheng 
Subject: Re: [PATCH v1] RISC-V: Fix one typo of FRM dynamic definition

> Sorry for inconvenient, still working on fix it. If urgent I can
> revert this change to unblock your work ASAP.

I'm not blocked by this, thanks, just wanted to document it here.
I was testing another patch and needed to dig for a while until
I realized the FAILs come from this one.  In general I would
assume that even obvious patches are tested before (I have
introduced bugs by obvious ones before so I make sure to).

Regards
 Robin


RE: [PATCH V7] Machine Description: Add LEN_MASK_{GATHER_LOAD, SCATTER_STORE} pattern

2023-07-03 Thread Li, Pan2 via Gcc-patches
Committed as both the bootstrap and regression tests passed, thanks Richard.

Pan

-Original Message-
From: Gcc-patches  On Behalf 
Of Richard Sandiford via Gcc-patches
Sent: Monday, July 3, 2023 9:50 PM
To: juzhe.zh...@rivai.ai
Cc: gcc-patches@gcc.gnu.org; rguent...@suse.de
Subject: Re: [PATCH V7] Machine Description: Add LEN_MASK_{GATHER_LOAD, 
SCATTER_STORE} pattern

juzhe.zh...@rivai.ai writes:
> From: Ju-Zhe Zhong 
>
> Hi, Richi and Richard.
>
> Base one the review comments from Richard:
> https://gcc.gnu.org/pipermail/gcc-patches/2023-July/623405.html
>
> I change len_mask_gather_load/len_mask_scatter_store order into:
> {len,bias,mask}
>
> We adjust adding len and mask using using add_len_and_mask_args
> which is same as partial_load/parial_store.
>
> Now, the codes become more reasonable and easier maintain.
>
> This patch is adding LEN_MASK_{GATHER_LOAD,SCATTER_STORE} to allow targets
> handle flow control by mask and loop control by length on gather/scatter 
> memory
> operations. Consider this following case:
>
> #include 
> void
> f (uint8_t *restrict a,
>uint8_t *restrict b, int n,
>int base, int step,
>int *restrict cond)
> {
>   for (int i = 0; i < n; ++i)
> {
>   if (cond[i])
> a[i * step + base] = b[i * step + base];
> }
> }
>
> We hope RVV can vectorize such case into following IR:
>
> loop_len = SELECT_VL
> control_mask = comparison
> v = LEN_MASK_GATHER_LOAD (.., loop_len, bias, control_mask)
> LEN_SCATTER_STORE (... v, ..., loop_len, bias, control_mask)
>
> This patch doesn't apply such patterns into vectorizer, just add patterns
> and update the documents.
>
> Will send patch which apply such patterns into vectorizer soon after this
> patch is approved.
>
> Ok for trunk?
>
> gcc/ChangeLog:
>
> * doc/md.texi: Add len_mask_gather_load/len_mask_scatter_store.
> * internal-fn.cc (expand_scatter_store_optab_fn): Ditto.
> (expand_gather_load_optab_fn): Ditto.
> (internal_load_fn_p): Ditto.
> (internal_store_fn_p): Ditto.
> (internal_gather_scatter_fn_p): Ditto.
> (internal_fn_len_index): Ditto.
> (internal_fn_mask_index): Ditto.
> (internal_fn_stored_value_index): Ditto.
> * internal-fn.def (LEN_MASK_GATHER_LOAD): Ditto.
> (LEN_MASK_SCATTER_STORE): Ditto.
> * optabs.def (OPTAB_CD): Ditto.

Nice!  OK, thanks.

Richard

> ---
>  gcc/doc/md.texi | 17 +
>  gcc/internal-fn.cc  | 32 +---
>  gcc/internal-fn.def |  8 ++--
>  gcc/optabs.def  |  2 ++
>  4 files changed, 42 insertions(+), 17 deletions(-)
>
> diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
> index 5e5482265cd..f14dd32b2dc 100644
> --- a/gcc/doc/md.texi
> +++ b/gcc/doc/md.texi
> @@ -5040,6 +5040,15 @@ operand 5.  Bit @var{i} of the mask is set if element 
> @var{i}
>  of the result should be loaded from memory and clear if element @var{i}
>  of the result should be set to zero.
>  
> +@cindex @code{len_mask_gather_load@var{m}@var{n}} instruction pattern
> +@item @samp{len_mask_gather_load@var{m}@var{n}}
> +Like @samp{gather_load@var{m}@var{n}}, but takes an extra length operand 
> (operand 5),
> +a bias operand (operand 6) as well as a mask operand (operand 7).  Similar 
> to len_maskload,
> +the instruction loads at most (operand 5 + operand 6) elements from memory.
> +Bit @var{i} of the mask is set if element @var{i} of the result should
> +be loaded from memory and clear if element @var{i} of the result should be 
> undefined.
> +Mask elements @var{i} with @var{i} > (operand 5 + operand 6) are ignored.
> +
>  @cindex @code{scatter_store@var{m}@var{n}} instruction pattern
>  @item @samp{scatter_store@var{m}@var{n}}
>  Store a vector of mode @var{m} into several distinct memory locations.
> @@ -5069,6 +5078,14 @@ Like @samp{scatter_store@var{m}@var{n}}, but takes an 
> extra mask operand as
>  operand 5.  Bit @var{i} of the mask is set if element @var{i}
>  of the result should be stored to memory.
>  
> +@cindex @code{len_mask_scatter_store@var{m}@var{n}} instruction pattern
> +@item @samp{len_mask_scatter_store@var{m}@var{n}}
> +Like @samp{scatter_store@var{m}@var{n}}, but takes an extra length operand 
> (operand 5),
> +a bias operand (operand 6) as well as a mask operand (operand 7).  The 
> instruction stores
> +at most (operand 5 + operand 6) elements of (operand 4) to memory.
> +Bit @var{i} of the mask is set if element @var{i} of (operand 4) should be 
> stored.
> +Mask elements @var{i} with @var{i} > (operand 5 + operand 6) are ignored.
> +
>  @cindex @code{vec_set@var{m}} instruction pattern
>  @item @samp{vec_set@var{m}}
>  Set given field in the vector value.  Operand 0 is the vector to modify,
> diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc
> index c1fcb38b17b..303df102d81 100644
> --- a/gcc/internal-fn.cc
> +++ b/gcc/internal-fn.cc
> @@ -3507,7 +3507,6 @@ expand_scatter_store_optab_fn (internal_fn, gcall 
> *stmt, direct_optab 

[PATCH V2] i386: Inline function with default arch/tune to caller

2023-07-03 Thread Hongyu Wang via Gcc-patches
Hi,

For function with different target attributes, current logic rejects to
inline the callee when any arch or tune is mismatched. Relax the
condition to allow callee with default arch/tune to be inlined.

Boostrapped/regtested on x86-64-linux-gnu{-m32,}.

Ok for trunk?

gcc/ChangeLog:

* config/i386/i386.cc (ix86_can_inline_p): If callee has
default arch=x86-64 and tune=generic, do not block the
inlining to its caller.

gcc/testsuite/ChangeLog:

* gcc.target/i386/inline_target_clones.c: New test.
---
 gcc/config/i386/i386.cc   | 22 +++--
 .../gcc.target/i386/inline_target_clones.c| 24 +++
 2 files changed, 39 insertions(+), 7 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/inline_target_clones.c

diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index 8989985700a..4741c9b5364 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -605,13 +605,6 @@ ix86_can_inline_p (tree caller, tree callee)
   != (callee_opts->x_target_flags & ~always_inline_safe_mask))
 ret = false;
 
-  /* See if arch, tune, etc. are the same.  */
-  else if (caller_opts->arch != callee_opts->arch)
-ret = false;
-
-  else if (!always_inline && caller_opts->tune != callee_opts->tune)
-ret = false;
-
   else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath
   /* If the calle doesn't use FP expressions differences in
  ix86_fpmath can be ignored.  We are called from FEs
@@ -622,6 +615,21 @@ ix86_can_inline_p (tree caller, tree callee)
   || ipa_fn_summaries->get (callee_node)->fp_expressions))
 ret = false;
 
+  /* At this point we cannot identify whether arch or tune setting
+ comes from target attribute or not. So the most conservative way
+ is to allow the callee that uses default arch and tune string to
+ be inlined.  */
+  else if (!strcmp (callee_opts->x_ix86_arch_string, "x86-64")
+  && !strcmp (callee_opts->x_ix86_tune_string, "generic"))
+ret = true;
+
+  /* See if arch, tune, etc. are the same.  */
+  else if (caller_opts->arch != callee_opts->arch)
+ret = false;
+
+  else if (!always_inline && caller_opts->tune != callee_opts->tune)
+ret = false;
+
   else if (!always_inline
   && caller_opts->branch_cost != callee_opts->branch_cost)
 ret = false;
diff --git a/gcc/testsuite/gcc.target/i386/inline_target_clones.c 
b/gcc/testsuite/gcc.target/i386/inline_target_clones.c
new file mode 100644
index 000..53db1600ce5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/inline_target_clones.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-require-ifunc "" } */
+/* { dg-options "-O3 -march=x86-64" } */
+/* { dg-final { scan-assembler-not "call\[ \t\]+callee" } } */
+
+float callee (float a, float b, float c, float d,
+ float e, float f, float g, float h)
+{
+  return a * b + c * d + e * f + g + h + a * c + b * c
++ a * d + b * e + a * f + c * h + 
+b * (a - 0.4f) * (c + h) * (b + e * d) - a / f * h;
+}
+
+__attribute__((target_clones("default","arch=icelake-server")))
+void caller (int n, float *a,
+float c1, float c2, float c3,
+float c4, float c5, float c6,
+float c7)
+{
+  for (int i = 0; i < n; i++)
+{
+  a[i] = callee (a[i], c1, c2, c3, c4, c5, c6, c7);
+}
+}
-- 
2.31.1



ping^^^^: [PATCH V2] rs6000: Enhance lowpart/highpart DI->SF by mtvsrws/mtvsrd

2023-07-03 Thread Jiufu Guo via Gcc-patches


Hi,

Gentle ping ...

Jiufu Guo via Gcc-patches  writes:

> Gentle ping...
>
> Jiufu Guo via Gcc-patches  writes:
>
>> Gentle ping...
>>
>> Jiufu Guo via Gcc-patches  writes:
>>
>>> Hi
>>>
>>> I would like to ping this patch for stage1:
>>> https://gcc.gnu.org/pipermail/gcc-patches/2023-February/612168.html
>>>
>>> BR,
>>> Jeff (Jiufu)
>>>
>>> Jiufu Guo  writes:
>>>
 Hi,

 Compare with previous version:
 https://gcc.gnu.org/pipermail/gcc-patches/2023-January/609654.html
 This patch does not use UNSPEC for insn mtvsrws anymore.  And to handle
 the subreg better on BE and LE, predicate "lowpart_subreg_operator"
 is introducted. To help combine pass to match the pattern on high32
 bit of DI, shiftrt is still used.

 As mentioned in PR108338, on p9, we could use mtvsrws to implement
 the conversion from SI#0 to SF (or lowpart DI to SF).

 For examples:
   *(long long*)buff = di;
   float f = *(float*)(buff);
 We generate "sldi 9,3,32 ; mtvsrd 1,9 ; xscvspdpn 1,1" instead of
 "mtvsrws 1,3 ; xscvspdpn 1,1".

 This patch update this, and also enhance the bitcast from highpart
 DI to SF.

 Bootstrap and regtests pass on ppc64{,le}.
 Is this ok for trunk?

 BR,
 Jeff (Jiufu)

PR target/108338

 gcc/ChangeLog:

* config/rs6000/predicates.md (lowpart_subreg_operator): New
define_predicate.
* config/rs6000/rs6000.md (any_rshift): New code_iterator.
(movsf_from_si2): Rename to...
(movsf_from_si2_): ... this.
(si2sf_mtvsrws): New define_insn.

 gcc/testsuite/ChangeLog:

* gcc.target/powerpc/pr108338.c: New test.

 ---
  gcc/config/rs6000/predicates.md |  5 +++
  gcc/config/rs6000/rs6000.md | 35 -
  gcc/testsuite/gcc.target/powerpc/pr108338.c | 42 +
  3 files changed, 73 insertions(+), 9 deletions(-)
  create mode 100644 gcc/testsuite/gcc.target/powerpc/pr108338.c

 diff --git a/gcc/config/rs6000/predicates.md 
 b/gcc/config/rs6000/predicates.md
 index 52c65534e51..e57c9d99c6b 100644
 --- a/gcc/config/rs6000/predicates.md
 +++ b/gcc/config/rs6000/predicates.md
 @@ -2064,3 +2064,8 @@ (define_predicate "macho_pic_address"
else
  return false;
  })
 +
 +(define_predicate "lowpart_subreg_operator"
 +  (and (match_code "subreg")
 +   (match_test "subreg_lowpart_offset (mode, GET_MODE (SUBREG_REG 
 (op)))
 +  == SUBREG_BYTE (op)")))
 diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
 index 4a7812fa592..5b4a7f8d801 100644
 --- a/gcc/config/rs6000/rs6000.md
 +++ b/gcc/config/rs6000/rs6000.md
 @@ -7539,6 +7539,14 @@ (define_split
 UNSPEC_MOVSI_GOT))]
"")
  
 +(define_insn "si2sf_mtvsrws"
 +  [(set (match_operand:SF 0 "gpc_reg_operand" "=wa")
 +   (subreg:SF (match_operand:SI 1 "gpc_reg_operand" "r") 0))]
 +  "TARGET_P9_VECTOR && TARGET_XSCVSPDPN"
 +  "mtvsrws %x0,%1\n\txscvspdpn %x0,%x0"
 +  [(set_attr "type" "mfvsr")
 +   (set_attr "length" "8")])
 +
  ;;   MR  LA
  ;;   LWZ LFIWZX  LXSIWZX
  ;;   STW STFIWX  STXSIWX
 @@ -8203,10 +8211,18 @@ (define_insn_and_split "movsf_from_si"
rtx op2 = operands[2];
rtx op1_di = gen_rtx_REG (DImode, REGNO (op1));
  
 -  /* Move SF value to upper 32-bits for xscvspdpn.  */
 -  emit_insn (gen_ashldi3 (op2, op1_di, GEN_INT (32)));
 -  emit_insn (gen_p8_mtvsrd_sf (op0, op2));
 -  emit_insn (gen_vsx_xscvspdpn_directmove (op0, op0));
 +  if (TARGET_P9_VECTOR)
 +{
 +  emit_insn (gen_si2sf_mtvsrws (op0, gen_lowpart (SImode, op1_di)));
 +}
 +  else
 +{
 +  /* Move SF value to upper 32-bits for xscvspdpn.  */
 +  emit_insn (gen_ashldi3 (op2, op1_di, GEN_INT (32)));
 +  emit_insn (gen_p8_mtvsrd_sf (op0, op2));
 +  emit_insn (gen_vsx_xscvspdpn_directmove (op0, op0));
 +}
 +
DONE;
  }
[(set_attr "length"
 @@ -8219,18 +8235,19 @@ (define_insn_and_split "movsf_from_si"
"*,  *, p9v,   p8v,   *, *,
 p8v,p8v,   p8v,   *")])
  
 +(define_code_iterator any_rshift [ashiftrt lshiftrt])
 +
  ;; For extracting high part element from DImode register like:
  ;; {%1:SF=unspec[r122:DI>>0x20#0] 86;clobber scratch;}
  ;; split it before reload with "and mask" to avoid generating shift right
  ;; 32 bit then shift left 32 bit.
 -(define_insn_and_split "movsf_from_si2"
 +(define_insn_and_split "movsf_from_si2_"
[(set (match_operand:SF 0 "gpc_reg_operand" "=wa")
(unspec:SF
 -   

[PATCH] Break false dependence for vpternlog by inserting vpxor.

2023-07-03 Thread liuhongt via Gcc-patches
vpternlog is also used for optimization which doesn't need any valid
input operand, in that case, the destination is used as input in the
instruction and that creates a false dependence.

Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
Ready to push to trunk.

gcc/ChangeLog:

PR target/110438
* config/i386/predicates.md
(int_float_vector_all_ones_operand): New predicate.
* config/i386/sse.md (*vmov_constm1_pternlog): New
define_insn.
(*_cvtmask2): Adjust to
define_insn_and_split to avoid false dependence.
(*_cvtmask2_pternlog): New
define_insn.

gcc/testsuite/ChangeLog:

* gcc.target/i386/pr110438.c: New test.
---
 gcc/config/i386/predicates.md|  8 ++-
 gcc/config/i386/sse.md   | 69 +++-
 gcc/testsuite/gcc.target/i386/pr110438.c | 30 +++
 3 files changed, 94 insertions(+), 13 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr110438.c

diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md
index fb07707dcba..df0d9e20def 100644
--- a/gcc/config/i386/predicates.md
+++ b/gcc/config/i386/predicates.md
@@ -1192,12 +1192,18 @@ (define_predicate "float_vector_all_ones_operand"
 return false;
 })
 
-/* Return true if operand is a vector constant that is all ones. */
+/* Return true if operand is an integral vector constant that is all ones. */
 (define_predicate "vector_all_ones_operand"
   (and (match_code "const_vector")
(match_test "INTEGRAL_MODE_P (GET_MODE (op))")
(match_test "op == CONSTM1_RTX (GET_MODE (op))")))
 
+/* Return true if operand is a vector constant that is all ones. */
+(define_predicate "int_float_vector_all_ones_operand"
+  (ior (match_operand 0 "vector_all_ones_operand")
+   (match_operand 0 "float_vector_all_ones_operand")
+   (match_test "op == constm1_rtx")))
+
 /* Return true if operand is an 128/256bit all ones vector
that zero-extends to 256/512bit.  */
 (define_predicate "vector_all_ones_zero_extend_half_operand"
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 812cfca4b92..93cdd844026 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -1382,6 +1382,28 @@ (define_insn "mov_internal"
  ]
  (symbol_ref "true")))])
 
+; False dependency happens on destination register which is not really
+; used when moving all ones to vector register
+(define_split
+  [(set (match_operand:VMOVE 0 "register_operand")
+   (match_operand:VMOVE 1 "int_float_vector_all_ones_operand"))]
+  "TARGET_AVX512F && reload_completed
+  && ( == 64 || EXT_REX_SSE_REG_P (operands[0]))"
+  [(set (match_dup 0) (match_dup 2))
+   (parallel
+ [(set (match_dup 0) (match_dup 1))
+  (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP)])]
+  "operands[2] = CONST0_RTX (mode);")
+
+(define_insn "*vmov_constm1_pternlog"
+  [(set (match_operand:VMOVE 0 "register_operand" "=v")
+   (match_operand:VMOVE 1 "int_float_vector_all_ones_operand" 
""))
+   (unspec [(match_operand:VMOVE 2 "register_operand" "0")] 
UNSPEC_INSN_FALSE_DEP)]
+   "TARGET_AVX512VL ||  == 64"
+   "vpternlogd\t{$0xFF, %0, %0, %0|%0, %0, %0, 0xFF}"
+  [(set_attr "type" "sselog1")
+   (set_attr "prefix" "evex")])
+
 ;; If mem_addr points to a memory region with less than whole vector size bytes
 ;; of accessible memory and k is a mask that would prevent reading the 
inaccessible
 ;; bytes from mem_addr, add UNSPEC_MASKLOAD to prevent it to be transformed to 
vpblendd
@@ -9336,7 +9358,7 @@ (define_expand "_cvtmask2"
 operands[3] = CONST0_RTX (mode);
   }")
 
-(define_insn "*_cvtmask2"
+(define_insn_and_split "*_cvtmask2"
   [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v,v")
(vec_merge:VI48_AVX512VL
  (match_operand:VI48_AVX512VL 2 "vector_all_ones_operand")
@@ -9345,12 +9367,35 @@ (define_insn "*_cvtmask2"
   "TARGET_AVX512F"
   "@
vpmovm2\t{%1, %0|%0, %1}
-   vpternlog\t{$0x81, %0, %0, %0%{%1%}%{z%}|%0%{%1%}%{z%}, %0, 
%0, 0x81}"
+   #"
+  "&& !TARGET_AVX512DQ && reload_completed"
+  [(set (match_dup 0) (match_dup 4))
+   (parallel
+[(set (match_dup 0)
+ (vec_merge:VI48_AVX512VL
+   (match_dup 2)
+   (match_dup 3)
+   (match_dup 1)))
+ (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP)])]
+  "operands[4] = CONST0_RTX (mode);"
   [(set_attr "isa" "avx512dq,*")
(set_attr "length_immediate" "0,1")
(set_attr "prefix" "evex")
(set_attr "mode" "")])
 
+(define_insn "*_cvtmask2_pternlog"
+  [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
+   (vec_merge:VI48_AVX512VL
+ (match_operand:VI48_AVX512VL 2 "vector_all_ones_operand")
+ (match_operand:VI48_AVX512VL 3 "const0_operand")
+ (match_operand: 1 "register_operand" "Yk")))
+   (unspec [(match_operand:VI48_AVX512VL 4 "register_operand" "0")] 
UNSPEC_INSN_FALSE_DEP)]
+  "TARGET_AVX512F && !TARGET_AVX512DQ"
+  

RE: [VSETVL PASS] RISC-V: Optimize local AVL propagation

2023-07-03 Thread Li, Pan2 via Gcc-patches
Committed, thanks Kito.

Pan

-Original Message-
From: Gcc-patches  On Behalf 
Of Kito Cheng via Gcc-patches
Sent: Tuesday, July 4, 2023 10:20 AM
To: Robin Dapp 
Cc: Juzhe-Zhong ; gcc-patches@gcc.gnu.org; 
kito.ch...@sifive.com; pal...@dabbelt.com; pal...@rivosinc.com; 
jeffreya...@gmail.com
Subject: Re: [VSETVL PASS] RISC-V: Optimize local AVL propagation

LGTM

On Mon, Jul 3, 2023 at 8:47 PM Robin Dapp  wrote:
>
> LGTM.
>
> Regards
>  Robin
>


Re: [PATCH ver 3] rs6000: Update the vsx-vector-6.* tests.

2023-07-03 Thread Kewen.Lin via Gcc-patches
Hi Carl,

on 2023/6/30 05:36, Carl Love wrote:
> GCC maintainers:
> 
> Ver 3.  Added __attribute__ ((noipa)) to the test files.  Changed some
> of the scan-assembler-times checks to cover multiple similar
> instructions.  Change the function check macro to a macro to generate a
> function to do the test and check the results.  Retested on the various
> processor types and BE/LE versions.
> 
> Ver 2.  Switched to using code macros to generate the call to the
> builtin and test the results.  Added in instruction counts for the key
> instruction for the builtin.  Moved the tests into an additional
> function call to ensure the compile doesn't replace the builtin call
> code with the statically computed results.  The compiler was doing this
> for a few of the simpler tests.  
> 
> The following patch takes the tests in vsx-vector-6-p7.h,  vsx-vector-
> 6-p8.h, vsx-vector-6-p9.h and reorganizes them into a series of smaller
> test files by functionality rather than processor version.
> 
> Tested the patch on Power 8 LE/BE, Power 9 LE/BE and Power 10 LE with
> no regresions.
> 
> Please let me know if this patch is acceptable for mainline.  Thanks.
> 
>Carl
> 
> 
> -
> rs6000: Update the vsx-vector-6.* tests.
> 
> The vsx-vector-6.h file is included into the processor specific test files
> vsx-vector-6.p7.c, vsx-vector-6.p8.c, and vsx-vector-6.p9.c.  The .h file
> contains a large number of vsx vector builtin tests.  The processor
> specific files contain the number of instructions that the tests are
> expected to generate for that processor.  The tests are compile only.
> 
> The tests are broken up into a seriers of files for related tests.  The

s/seriers/series/

> new tests are runnable tests to verify the builtin argument types and the
> functional correctness of each test rather then verifying the type and
> number of instructions generated.
> 
> gcc/testsuite/
>   * gcc.target/powerpc/vsx-vector-6-1op.c: New test file.
>   * gcc.target/powerpc/vsx-vector-6-2lop.c: New test file.
>   * gcc.target/powerpc/vsx-vector-6-2op.c: New test file.
>   * gcc.target/powerpc/vsx-vector-6-3op.c: New test file.
>   * gcc.target/powerpc/vsx-vector-6-cmp-all.c: New test file.
>   * gcc.target/powerpc/vsx-vector-6-cmp.c: New test file.

Missing "func-" in the names ...

>   * gcc.target/powerpc/vsx-vector-6.h: Remove test file.
>   * gcc.target/powerpc/vsx-vector-6-p7.h: Remove test file.
>   * gcc.target/powerpc/vsx-vector-6-p8.h: Remove test file.
>   * gcc.target/powerpc/vsx-vector-6-p9.h: Remove test file.

should be vsx-vector-6-p{7,8,9}.c, "git gcc-verify" should catch these.

> ---
>  .../powerpc/vsx-vector-6-func-1op.c   | 141 ++
>  .../powerpc/vsx-vector-6-func-2lop.c  | 217 +++
>  .../powerpc/vsx-vector-6-func-2op.c   | 133 +
>  .../powerpc/vsx-vector-6-func-3op.c   | 257 ++
>  .../powerpc/vsx-vector-6-func-cmp-all.c   | 211 ++
>  .../powerpc/vsx-vector-6-func-cmp.c   | 121 +
>  .../gcc.target/powerpc/vsx-vector-6.h | 154 ---
>  .../gcc.target/powerpc/vsx-vector-6.p7.c  |  43 ---
>  .../gcc.target/powerpc/vsx-vector-6.p8.c  |  43 ---
>  .../gcc.target/powerpc/vsx-vector-6.p9.c  |  42 ---
>  10 files changed, 1080 insertions(+), 282 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/vsx-vector-6-func-1op.c
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/vsx-vector-6-func-2lop.c
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/vsx-vector-6-func-2op.c
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/vsx-vector-6-func-3op.c
>  create mode 100644 
> gcc/testsuite/gcc.target/powerpc/vsx-vector-6-func-cmp-all.c
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/vsx-vector-6-func-cmp.c
>  delete mode 100644 gcc/testsuite/gcc.target/powerpc/vsx-vector-6.h
>  delete mode 100644 gcc/testsuite/gcc.target/powerpc/vsx-vector-6.p7.c
>  delete mode 100644 gcc/testsuite/gcc.target/powerpc/vsx-vector-6.p8.c
>  delete mode 100644 gcc/testsuite/gcc.target/powerpc/vsx-vector-6.p9.c
> 
> diff --git a/gcc/testsuite/gcc.target/powerpc/vsx-vector-6-func-1op.c 
> b/gcc/testsuite/gcc.target/powerpc/vsx-vector-6-func-1op.c
> new file mode 100644
> index 000..52c7ae3e983
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/vsx-vector-6-func-1op.c
> @@ -0,0 +1,141 @@
> +/* { dg-do run { target lp64 } } */
> +/* { dg-skip-if "" { powerpc*-*-darwin* } } */
> +/* { dg-options "-O2 -save-temps" } */

I just noticed that we missed an effective target check here to ensure the
support of those bifs during the test run, and since it's a runnable test
case, also need to ensure the generated hw insn supported, it's "vsx_hw"
like:

/* { dg-require-effective-target vsx_hw } */

And adding "-mvsx" to the dg-options.

This is also applied for the other test cases.


[Bug tree-optimization/110531] Vect: slp_done_for_suggested_uf is not initialized in tree-vect-loop.cc

2023-07-03 Thread linkw at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110531

--- Comment #3 from Kewen Lin  ---
(In reply to Hao Liu from comment #2)
> > Is the warning from some static analyzer?
> 
> No. I just find it maybe a bug while looking at the code.
> 
> > slp should be true always (always do analyze slp), it doesn't care what's 
> > in slp_done_for_suggested_uf.
> 
> Oh, I see. This is not a real bug.
> 
> 
> IMHO, it would be better to initialize it as "false", which should be much
> easier for someone to understand the code.

IMHO, the initialization with false is unnecessary and very likely it isn't
able to get optimized, it seems worse from this point of view. I was also
taught previously that useless initialization isn't recommended as sometimes it
can cover problems. But for the readability maybe one comment with more
explanation is worthy and better.

Re: [VSETVL PASS] RISC-V: Optimize local AVL propagation

2023-07-03 Thread Kito Cheng via Gcc-patches
LGTM

On Mon, Jul 3, 2023 at 8:47 PM Robin Dapp  wrote:
>
> LGTM.
>
> Regards
>  Robin
>


[PATCH V4 1/4] rs6000: build constant via li;rotldi

2023-07-03 Thread Jiufu Guo via Gcc-patches
Hi,

If a constant is possible to be rotated to/from a positive or negative
value from "li", then "li;rotldi" can be used to build the constant.

Compare with the previous version:
https://gcc.gnu.org/pipermail/gcc-patches/2023-June/621961.html
This patch just did minor changes to the style and comments.

Bootstrap and regtest pass on ppc64{,le}.

Since the previous version is approved with conditions, this version
explained the concern too.  If no objection, I would like to apply
this patch to trunk.


BR,
Jeff (Jiufu)

gcc/ChangeLog:

* config/rs6000/rs6000.cc (can_be_built_by_li_and_rotldi): New function.
(rs6000_emit_set_long_const): Call can_be_built_by_li_and_rotldi.

gcc/testsuite/ChangeLog:

* gcc.target/powerpc/const-build.c: New test.
---
 gcc/config/rs6000/rs6000.cc   | 47 +--
 .../gcc.target/powerpc/const-build.c  | 57 +++
 2 files changed, 98 insertions(+), 6 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/const-build.c

diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index 42f49e4a56b..acc332acc05 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -10258,6 +10258,31 @@ rs6000_emit_set_const (rtx dest, rtx source)
   return true;
 }
 
+/* Check if value C can be built by 2 instructions: one is 'li', another is
+   rotldi.
+
+   If so, *SHIFT is set to the shift operand of rotldi(rldicl), and *MASK
+   is set to the mask operand of rotldi(rldicl), and return true.
+   Return false otherwise.  */
+
+static bool
+can_be_built_by_li_and_rotldi (HOST_WIDE_INT c, int *shift,
+  HOST_WIDE_INT *mask)
+{
+  /* If C or ~C contains at least 49 successive zeros, then C can be rotated
+ to/from a positive or negative value that 'li' is able to load.  */
+  int n;
+  if (can_be_rotated_to_lowbits (c, 15, )
+  || can_be_rotated_to_lowbits (~c, 15, ))
+{
+  *mask = HOST_WIDE_INT_M1;
+  *shift = HOST_BITS_PER_WIDE_INT - n;
+  return true;
+}
+
+  return false;
+}
+
 /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode.
Output insns to set DEST equal to the constant C as a series of
lis, ori and shl instructions.  */
@@ -10266,15 +10291,14 @@ static void
 rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c)
 {
   rtx temp;
+  int shift;
+  HOST_WIDE_INT mask;
   HOST_WIDE_INT ud1, ud2, ud3, ud4;
 
   ud1 = c & 0x;
-  c = c >> 16;
-  ud2 = c & 0x;
-  c = c >> 16;
-  ud3 = c & 0x;
-  c = c >> 16;
-  ud4 = c & 0x;
+  ud2 = (c >> 16) & 0x;
+  ud3 = (c >> 32) & 0x;
+  ud4 = (c >> 48) & 0x;
 
   if ((ud4 == 0x && ud3 == 0x && ud2 == 0x && (ud1 & 0x8000))
   || (ud4 == 0 && ud3 == 0 && ud2 == 0 && ! (ud1 & 0x8000)))
@@ -10305,6 +10329,17 @@ rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c)
   emit_move_insn (dest, gen_rtx_XOR (DImode, temp,
 GEN_INT ((ud2 ^ 0x) << 16)));
 }
+  else if (can_be_built_by_li_and_rotldi (c, , ))
+{
+  temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
+  unsigned HOST_WIDE_INT imm = (c | ~mask);
+  imm = (imm >> shift) | (imm << (HOST_BITS_PER_WIDE_INT - shift));
+
+  emit_move_insn (temp, GEN_INT (imm));
+  if (shift != 0)
+   temp = gen_rtx_ROTATE (DImode, temp, GEN_INT (shift));
+  emit_move_insn (dest, temp);
+}
   else if (ud3 == 0 && ud4 == 0)
 {
   temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
diff --git a/gcc/testsuite/gcc.target/powerpc/const-build.c 
b/gcc/testsuite/gcc.target/powerpc/const-build.c
new file mode 100644
index 000..69b37e2bb53
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/const-build.c
@@ -0,0 +1,57 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -save-temps" } */
+/* { dg-require-effective-target has_arch_ppc64 } */
+
+/* Verify that two instructions are sucessfully used to build constants.
+   One insn is li or lis, another is rotate: rldicl, rldicr or rldic.  */
+
+#define NOIPA __attribute__ ((noipa))
+
+struct fun
+{
+  long long (*f) (void);
+  long long val;
+};
+
+long long NOIPA
+li_rotldi_1 (void)
+{
+  return 0x75310LL;
+}
+
+long long NOIPA
+li_rotldi_2 (void)
+{
+  return 0x2164LL;
+}
+
+long long NOIPA
+li_rotldi_3 (void)
+{
+  return 0x8531LL;
+}
+
+long long NOIPA
+li_rotldi_4 (void)
+{
+  return 0x2194LL;
+}
+
+struct fun arr[] = {
+  {li_rotldi_1, 0x75310LL},
+  {li_rotldi_2, 0x2164LL},
+  {li_rotldi_3, 0x8531LL},
+  {li_rotldi_4, 0x2194LL},
+};
+
+/* { dg-final { scan-assembler-times {\mrotldi\M} 4 } } */
+
+int
+main ()
+{
+  for (int i = 0; i < sizeof (arr) / sizeof (arr[0]); i++)
+if ((*arr[i].f) () != arr[i].val)
+  __builtin_abort ();
+
+  return 0;
+}
-- 
2.39.3



Re: [PATCH] rs6000: Update the vsx-vector-6.* tests.

2023-07-03 Thread Kewen.Lin via Gcc-patches
Hi Carl,

on 2023/7/3 23:57, Carl Love wrote:
> Kewen:
> 
> On Fri, 2023-06-30 at 15:20 -0700, Carl Love wrote:
>> Segher never liked the above way of looking at the assembly.  He
>> prefers:
>>   gcc -S -g -mcpu=power8 -o vsx-vector-6-func-2lop.s vsx-vector-6-
>> func-
>> 2lop.c
>>
>>   grep xxlor vsx-vector-6-func-2lop.s | wc
>>  34  68 516
>>
>> So, again, I get the same count of 34 on both makalu and genoa.  But
>> again, that doesn't agree with what make script/scan-assembler thinks
>> the counts should be.
>>
>> When I looked at the vsx-vector-6-func-2lop.s I see on BE:
>>
>>  
>> lxvd2x 0,10,9
>> xxlor 0,12,0
>> xxlnor 0,0,0
>>  ...
>>
>> I was guessing that it was adjusting the data layout from the load. 
>> But looking again more carefully versus LE:
>>
>> 
>> lxvd2x 0,31,9 
>>xxpermdi 0,0,0,2 
>>xxlor 0,12,0  
>>xxlnor 0,0,0  
>>xxpermdi 0,0,0,2 
>> 
>>
>> the xxpermdi is probably what is really doing the data layout change.
>>
>> So, we have the issue that looking at the assembly gives different
>> instruction counts then what 
>>
>>dg-final { scan-assembler-times {\mxxlor\M} }
>>
>> comes up with???  Now I am really confused.  I don't know how the
>> scan-
>> assembler-times works but I will go see if I can find it and see if I
>> can figure out what the issue is.  I would expect that the scan-
>> assembler is working off the --save-temp files, which get deleted as
>> part of the run.  I would guess that scan-assembler does a grep to
>> find
>> the instructions and then maybe uses wc to count them??? I will go
>> see
>> if I can figure out how scan-assembler-times works.
> 
> OK, I figured out why I was getting 34 xxlor instructions instead of
> the 22 that the scan-assembler-times was getting.  The difference was
> when I compiled the program I forgot to use -O2.  So with -O2 I get the
> same number of xxlor instructins as scan-assembler-instructions.  I get
> 34 if I do not specify optimization.

OK, thanks for looking into it.  When you run a test case with RUNTESTFLAGS,
you can add the "-v" (and even more times) to RUNTESTFLAGS, then you can find
the exact compiling commands in the dumping, I usually used this way for
reproducing and I hope it can avoid some inconsistency for reproduction.

> 
> So, I think the scan-assembler-times are all correct.
> 
> As Peter says, counting xxlor is a bit problematic in general.  We
> could just drop counting xxlor or have the LE/BE count qualifier for
> the instructions.  Your call.

Yeah, I agree that counting xxlor in the checking code (from function main)
is fragile, but as you said we still want to check expected xxlor generated
for bif vec_or, so I'd prefer to separate the existing case into the
compiling part and run part, I'll reply with more details to your latest v3.

Thanks,
Kewen


[Bug tree-optimization/110531] Vect: slp_done_for_suggested_uf is not initialized in tree-vect-loop.cc

2023-07-03 Thread hliu at amperecomputing dot com via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110531

--- Comment #2 from Hao Liu  ---
> Is the warning from some static analyzer?

No. I just find it maybe a bug while looking at the code.

> slp should be true always (always do analyze slp), it doesn't care what's in 
> slp_done_for_suggested_uf.

Oh, I see. This is not a real bug.


IMHO, it would be better to initialize it as "false", which should be much
easier for someone to understand the code.

[committed] CRIS: Replace unspec CRIS_UNSPEC_SWAP_BITS with rtx bitreverse

2023-07-03 Thread Hans-Peter Nilsson via Gcc-patches
This is just expected to be a change in representation.
No code is expected to change; no new tests are added.

* config/cris/cris.md (CRIS_UNSPEC_SWAP_BITS): Remove.
("cris_swap_bits", "ctzsi2"): Use bitreverse instead.
---
 gcc/config/cris/cris.md | 9 ++---
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/gcc/config/cris/cris.md b/gcc/config/cris/cris.md
index 7504b63dabf3..deb2f0c6b7c7 100644
--- a/gcc/config/cris/cris.md
+++ b/gcc/config/cris/cris.md
@@ -50,9 +50,6 @@ (define_c_enum ""
   [
;; Stack frame deallocation barrier.
CRIS_UNSPEC_FRAME_DEALLOC
-
-   ;; Swap all 32 bits of the operand; 31 <=> 0, 30 <=> 1...
-   CRIS_UNSPEC_SWAP_BITS
   ])
 
 ;; Register numbers.
@@ -2177,8 +2174,7 @@ (define_insn 
"bswapsi2"
 
 (define_insn "cris_swap_bits"
   [(set (match_operand:SI 0 "register_operand" "=r")
-   (unspec:SI [(match_operand:SI 1 "register_operand" "0")]
-  CRIS_UNSPEC_SWAP_BITS))
+   (bitreverse:SI (match_operand:SI 1 "register_operand" "0")))
(clobber (reg:CC CRIS_CC0_REGNUM))]
   "TARGET_HAS_SWAP"
   "swapwbr %0"
@@ -2193,8 +2189,7 @@ (define_expand "ctzsi2"
  (match_operand:SI 1 "register_operand"))
  (clobber (reg:CC CRIS_CC0_REGNUM))])
(parallel
-[(set (match_dup 2)
- (unspec:SI [(match_dup 2)] CRIS_UNSPEC_SWAP_BITS))
+[(set (match_dup 2) (bitreverse:SI (match_dup 2)))
  (clobber (reg:CC CRIS_CC0_REGNUM))])
(parallel
 [(set (match_operand:SI 0 "register_operand")
-- 
2.30.2



[committed] dwarf2out.cc (mem_loc_descriptor): Handle BITREVERSE

2023-07-03 Thread Hans-Peter Nilsson via Gcc-patches
Committed as obvious after regtest for cris-elf together
with the "next" patch, that replaces unspec
CRIS_UNSPEC_SWAP_BITS with bitreverse (which hit the ICE).

-- >8 --
This seems to have just been overlooked when introducing
BITREVERSE.  Note that the function name mem_loc_descriptor
is a misnomer; it'd better be called rtx_loc_descriptor or
any_loc_descriptor, because "anything" RTX can end up here.
To wit, when introducing new RTL that ends up as code or for
other reasons appear in debug expressions, don't forget to
update this function.  This was observed by building
libstdc+++ for cris-elf with a patch replacing the
CRIS_UNSPEC_SWAP_BITS by bitreverse, as hitting the
internal-error-generating default case.

Looking at the BSWAP, POPCOUNT and ROTATE cases, BITREVERSE
can probably be fully expressed as DWARF code if need be,
but let's start with not throwing an internal error.

gcc:
* dwarf2out.cc (mem_loc_descriptor): Handle BITREVERSE.
---
 gcc/dwarf2out.cc | 1 +
 1 file changed, 1 insertion(+)

diff --git a/gcc/dwarf2out.cc b/gcc/dwarf2out.cc
index 9112fc0c64b5..e973644102c0 100644
--- a/gcc/dwarf2out.cc
+++ b/gcc/dwarf2out.cc
@@ -16940,6 +16940,7 @@ mem_loc_descriptor (rtx rtl, machine_mode mode,
 case CLOBBER:
 case SMUL_HIGHPART:
 case UMUL_HIGHPART:
+case BITREVERSE:
   break;
 
 case CONST_STRING:
-- 
2.30.2



[PATCH] xtensa: Use HARD_REG_SET instead of bare integer

2023-07-03 Thread Takayuki 'January June' Suwa via Gcc-patches
gcc/ChangeLog:

* config/xtensa/xtensa.cc (machine_function, xtensa_expand_prologue):
Change to use HARD_REG_BIT and its macros.
* config/xtensa/xtensa.md
(peephole2: regmove elimination during DFmode input reload):
Likewise.
---
 gcc/config/xtensa/xtensa.cc |  9 +
 gcc/config/xtensa/xtensa.md | 13 ++---
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/gcc/config/xtensa/xtensa.cc b/gcc/config/xtensa/xtensa.cc
index 3298d53493c..992e80d824d 100644
--- a/gcc/config/xtensa/xtensa.cc
+++ b/gcc/config/xtensa/xtensa.cc
@@ -107,7 +107,7 @@ struct GTY(()) machine_function
   bool epilogue_done;
   bool inhibit_logues_a1_adjusts;
   rtx last_logues_a9_content;
-  HOST_WIDE_INT eliminated_callee_saved_bmp;
+  HARD_REG_SET eliminated_callee_saved;
 };
 
 static void xtensa_option_override (void);
@@ -3586,7 +3586,8 @@ xtensa_expand_prologue (void)
df_insn_rescan (insnS);
SET_SRC (PATTERN (insnR)) = copy_rtx (mem);
df_insn_rescan (insnR);
-   cfun->machine->eliminated_callee_saved_bmp |= 1 << regno;
+   SET_HARD_REG_BIT (cfun->machine->eliminated_callee_saved,
+ regno);
  }
else
  {
@@ -3690,8 +3691,8 @@ xtensa_expand_epilogue (bool sibcall_p)
   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
if (xtensa_call_save_reg(regno))
  {
-   if (! (cfun->machine->eliminated_callee_saved_bmp
-  & (1 << regno)))
+   if (! TEST_HARD_REG_BIT (cfun->machine->eliminated_callee_saved,
+regno))
  {
rtx x = gen_rtx_PLUS (Pmode,
  stack_pointer_rtx, GEN_INT (offset));
diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md
index 664424f1239..5386e45b51d 100644
--- a/gcc/config/xtensa/xtensa.md
+++ b/gcc/config/xtensa/xtensa.md
@@ -3240,15 +3240,14 @@
(set (match_dup 3)
(match_dup 7))]
 {
-  uint32_t check = 0;
+  HARD_REG_SET regs;
   int i;
+  CLEAR_HARD_REG_SET (regs);
   for (i = 0; i <= 3; ++i)
-{
-  uint32_t mask = (uint32_t)1 << REGNO (operands[i]);
-  if (check & mask)
-   FAIL;
-  check |= mask;
-}
+if (TEST_HARD_REG_BIT (regs, REGNO (operands[i])))
+  FAIL;
+else
+  SET_HARD_REG_BIT (regs, REGNO (operands[i]));
   operands[6] = gen_rtx_MEM (SFmode, XEXP (operands[6], 0));
   operands[7] = gen_rtx_MEM (SFmode, XEXP (operands[7], 0));
 })
-- 
2.30.2


[Bug rtl-optimization/110528] selective scheduling seems to go into an infinite loop

2023-07-03 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110528

--- Comment #6 from Andrew Pinski  ---
With selective scheduling on my reduced testcase:
```
Time variable   usr   sys  wall
  GGC
 phase setup:   0.00 (  0%)   0.01 ( 25%)   0.01 (  0%)
 1825k ( 51%)
 phase parsing  :   0.00 (  0%)   0.00 (  0%)   0.01 (  0%)
  174k (  5%)
 phase opt and generate :   4.03 (100%)   0.03 ( 75%)   4.05 ( 99%)
 1563k ( 44%)
 phase finalize :   0.00 (  0%)   0.00 (  0%)   0.02 (  0%)
0  (  0%)
 callgraph functions expansion  :   4.02 (100%)   0.03 ( 75%)   4.05 ( 99%)
 1521k ( 43%)
 callgraph ipa passes   :   0.01 (  0%)   0.00 (  0%)   0.00 (  0%)
   29k (  1%)
 CFG verifier   :   0.00 (  0%)   0.00 (  0%)   0.01 (  0%)
0  (  0%)
 df use-def / def-use chains:   0.01 (  0%)   0.00 (  0%)   0.01 (  0%)
0  (  0%)
 lexical analysis   :   0.00 (  0%)   0.00 (  0%)   0.01 (  0%)
0  (  0%)
 tree operand scan  :   0.01 (  0%)   0.00 (  0%)   0.01 (  0%)
 5120  (  0%)
 tree CCP   :   0.00 (  0%)   0.00 (  0%)   0.01 (  0%)
  224  (  0%)
 complete unrolling :   0.00 (  0%)   0.01 ( 25%)   0.00 (  0%)
  482k ( 14%)
 tree SSA verifier  :   0.03 (  1%)   0.00 (  0%)   0.01 (  0%)
0  (  0%)
 tree STMT verifier :   0.04 (  1%)   0.00 (  0%)   0.05 (  1%)
0  (  0%)
 expand :   0.00 (  0%)   0.00 (  0%)   0.01 (  0%)
  467k ( 13%)
 CSE:   0.01 (  0%)   0.00 (  0%)   0.00 (  0%)
0  (  0%)
 integrated RA  :   0.00 (  0%)   0.00 (  0%)   0.01 (  0%)
   24k (  1%)
 scheduling 2   :   3.91 ( 97%)   0.02 ( 50%)   3.92 ( 96%)
  130k (  4%)
 verify RTL sharing :   0.02 (  0%)   0.00 (  0%)   0.01 (  0%)
0  (  0%)
 TOTAL  :   4.03  0.04  4.09   
 3564k
Extra diagnostic checks enabled; compiler may run slowly.
Configure with --enable-checking=release to disable checks.
```

Without:
```
Time variable   usr   sys  wall
  GGC
 phase setup:   0.01 (  7%)   0.00 (  0%)   0.02 ( 11%)
 1825k ( 49%)
 phase opt and generate :   0.14 ( 93%)   0.00 (  0%)   0.14 ( 78%)
 1694k ( 46%)
 phase finalize :   0.00 (  0%)   0.00 (  0%)   0.02 ( 11%)
0  (  0%)
 callgraph functions expansion  :   0.13 ( 87%)   0.00 (  0%)   0.14 ( 78%)
 1652k ( 45%)
 callgraph ipa passes   :   0.01 (  7%)   0.00 (  0%)   0.00 (  0%)
   29k (  1%)
 cfg construction   :   0.00 (  0%)   0.00 (  0%)   0.01 (  6%)
0  (  0%)
 CFG verifier   :   0.02 ( 13%)   0.00 (  0%)   0.02 ( 11%)
0  (  0%)
 alias analysis :   0.00 (  0%)   0.00 (  0%)   0.01 (  6%)
 1640  (  0%)
 tree VRP   :   0.01 (  7%)   0.00 (  0%)   0.00 (  0%)
 2160  (  0%)
 tree operand scan  :   0.01 (  7%)   0.00 (  0%)   0.01 (  6%)
 5120  (  0%)
 tree DSE   :   0.01 (  7%)   0.00 (  0%)   0.00 (  0%)
0  (  0%)
 complete unrolling :   0.01 (  7%)   0.00 (  0%)   0.00 (  0%)
  482k ( 13%)
 tree SSA verifier  :   0.02 ( 13%)   0.00 (  0%)   0.00 (  0%)
0  (  0%)
 tree STMT verifier :   0.03 ( 20%)   0.00 (  0%)   0.05 ( 28%)
0  (  0%)
 forward prop   :   0.01 (  7%)   0.00 (  0%)   0.00 (  0%)
0  (  0%)
 integrated RA  :   0.00 (  0%)   0.00 (  0%)   0.01 (  6%)
   24k (  1%)
 reload CSE regs:   0.01 (  7%)   0.00 (  0%)   0.00 (  0%)
   48  (  0%)
 scheduling 2   :   0.01 (  7%)   0.00 (  0%)   0.01 (  6%)
  260k (  7%)
 initialize rtl :   0.00 (  0%)   0.00 (  0%)   0.01 (  6%)
   12k (  0%)
 rest of compilation:   0.00 (  0%)   0.00 (  0%)   0.01 (  6%)
 3496  (  0%)
 TOTAL  :   0.15  0.00  0.18   
 3695k
Extra diagnostic checks enabled; compiler may run slowly.
Configure with --enable-checking=release to disable checks.
```

You can see selective scheduling is really bad.

[Bug rtl-optimization/110528] selective scheduling seems to go into an infinite loop

2023-07-03 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110528

--- Comment #5 from Andrew Pinski  ---
Here is a testcase which does not go into an infinite loop but takes a little
more than 4 seconds to compile which is a lot:
```
static unsigned short g_231 = 1UL;
void func_61(unsigned p_62) {
  unsigned char g_116 = 0xE3L;
  int l_733;
  int l_661;
  unsigned char l_746;
  for (l_733 = 5; (l_733 >= 1); l_733 -= 1) 
for (l_746 = 1; (l_746 <= 7); l_746 += 1) 
  for (g_116 = 2; (g_116 <= 7); g_116 += 1) 
for (l_661 = 6; (l_661 >= 0); l_661 -= 1)
{
  _231;
  (g_231) = (0x84EAL <= p_62);
}
}

```

This is with `-gstatement-frontiers -O3 -fselective-scheduling2`.
Without `-fselective-scheduling2` GCC compiles this less than a second.

Re: [PATCH] Fortran: fixes for procedures with ALLOCATABLE,INTENT(OUT) arguments [PR92178]

2023-07-03 Thread Steve Kargl via Gcc-patches
On Mon, Jul 03, 2023 at 10:49:36PM +0200, Harald Anlauf via Fortran wrote:
> 
> Indeed, this is a nice demonstration.
> 
> While playing, I was wondering whether the following code is conforming:
> 
> program p
>   call s ((1))
> contains
>   subroutine s (x)
> integer :: x
> x = 42
>   end subroutine
> end
> 
> (It crashes with gfortran, but not with any foreign brand tested).
> 

It's not conforming.  '(1)' is an expression and it cannot appear
in a variable definition condition.  I am not aware of any numbered
constraint tha would require a Fortran processor to generate an
error.

-- 
Steve


Re: [PATCH] libstdc++: Split up pstl/set.cc testcase

2023-07-03 Thread Jonathan Wakely via Gcc-patches
On Mon, 3 Jul 2023 at 23:14, Thomas Rodgers via Libstdc++
 wrote:
>
> This testcase is causing some timeout issues. This patch splits the
> testcase up by individual set algorithm.

I think the Apache license requires a notice saying the original file
was modified. A comment in each new file noting it was derived from
pstl/alg_sorting/set.cc (or whatever the file is called upstream)
should be sufficient.

OK with that change, thanks.



[committed] libstdc++: Fix synopsis test

2023-07-03 Thread Jonathan Wakely via Gcc-patches
Tested x86_64-linux. Pushed to trunk.

-- >8 --

The  header is only supported for the cxx11 ABI. The
declarations of basic_syncbuf, basic_osyncstream, syncbuf and
osyncstream were already correctly guarded by a check for
_GLIBCXX_USE_CXX11_ABI, but the wsyncbuf and wosyncstream declarations
were not.

libstdc++-v3/ChangeLog:

* testsuite/27_io/headers/iosfwd/synopsis.cc: Make wsyncbuf and
wosyncstream depend on _GLIBCXX_USE_CXX11_ABI.
---
 libstdc++-v3/testsuite/27_io/headers/iosfwd/synopsis.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libstdc++-v3/testsuite/27_io/headers/iosfwd/synopsis.cc 
b/libstdc++-v3/testsuite/27_io/headers/iosfwd/synopsis.cc
index b6d3fa7a719..12f47ae8133 100644
--- a/libstdc++-v3/testsuite/27_io/headers/iosfwd/synopsis.cc
+++ b/libstdc++-v3/testsuite/27_io/headers/iosfwd/synopsis.cc
@@ -115,7 +115,7 @@ _GLIBCXX_END_NAMESPACE_CXX11
   typedef basic_ofstream wofstream;
   typedef basic_fstream  wfstream;
 
-#if __cplusplus >= 202002L
+#if __cplusplus >= 202002L && _GLIBCXX_USE_CXX11_ABI
   typedef basic_syncbuf wsyncbuf;
   typedef basic_osyncstream wosyncstream;
 #endif
-- 
2.41.0



Re: [PATCH] libstdc++: Enable OpenMP 5.0 pragmas in PSTL headers

2023-07-03 Thread Jonathan Wakely via Gcc-patches
Pushed to trunk now.

On Fri, 30 Jun 2023 at 21:17, Jonathan Wakely via Libstdc++
 wrote:
>
> Jakub made a similar change a few yeas ago, but I think it got lost
> in the recent PSTL rebase.
>
> Tested x86_64-linux.
>
> Does this look OK for trunk?
>
> -- >8 --
>
> This reapplies r10-1314-g32bab8b6ad0a90 which was lost in the recent
> PSTL rebase from upstream.
>
> * include/pstl/pstl_config.h (_PSTL_PRAGMA_SIMD_SCAN,
> _PSTL_PRAGMA_SIMD_INCLUSIVE_SCAN, _PSTL_PRAGMA_SIMD_EXCLUSIVE_SCAN):
> Define to OpenMP 5.0 pragmas even for GCC 10.0+.
> (_PSTL_UDS_PRESENT): Define to 1 for GCC 10.0+.
> ---
>  libstdc++-v3/include/pstl/pstl_config.h | 6 --
>  1 file changed, 4 insertions(+), 2 deletions(-)
>
> diff --git a/libstdc++-v3/include/pstl/pstl_config.h 
> b/libstdc++-v3/include/pstl/pstl_config.h
> index 74d2139c736..ccb9dd32838 100644
> --- a/libstdc++-v3/include/pstl/pstl_config.h
> +++ b/libstdc++-v3/include/pstl/pstl_config.h
> @@ -82,7 +82,8 @@
>  #define _PSTL_PRAGMA_FORCEINLINE
>  #endif
>
> -#if defined(__INTEL_COMPILER) && __INTEL_COMPILER >= 1900
> +#if (defined(__INTEL_COMPILER) && __INTEL_COMPILER >= 1900) || \
> +(!defined(__INTEL_COMPILER) && _PSTL_GCC_VERSION >= 10)
>  #define _PSTL_PRAGMA_SIMD_SCAN(PRM) _PSTL_PRAGMA(omp simd 
> reduction(inscan, PRM))
>  #define _PSTL_PRAGMA_SIMD_INCLUSIVE_SCAN(PRM) _PSTL_PRAGMA(omp scan 
> inclusive(PRM))
>  #define _PSTL_PRAGMA_SIMD_EXCLUSIVE_SCAN(PRM) _PSTL_PRAGMA(omp scan 
> exclusive(PRM))
> @@ -126,7 +127,8 @@
>  #define _PSTL_UDR_PRESENT
>  #endif
>
> -#if defined(__INTEL_COMPILER) && __INTEL_COMPILER >= 1900 && 
> __INTEL_COMPILER_BUILD_DATE >= 20180626
> +#if (defined(__INTEL_COMPILER) && __INTEL_COMPILER >= 1900 && 
> __INTEL_COMPILER_BUILD_DATE >= 20180626) || \
> +(!defined(__INTEL_COMPILER) && _PSTL_GCC_VERSION >= 10)
>  #   define _PSTL_UDS_PRESENT
>  #endif
>
> --
> 2.41.0
>



[committed] libstdc++: Qualify calls to std::_Destroy and _Destroy_aux

2023-07-03 Thread Jonathan Wakely via Gcc-patches
Tested x86_64-linux. Pushed to trunk.

This isn't a regression, but is safe to backport.

-- >8 --

These calls should be qualified to prevent ADL, which can cause errors
for incomplete types that are associated classes.

libstdc++-v3/ChangeLog:

* include/bits/alloc_traits.h (_Destroy): Qualify call.
* include/bits/stl_construct.h (_Destroy, _Destroy_n): Likewise.
* testsuite/23_containers/vector/cons/destroy-adl.cc: New test.
---
 libstdc++-v3/include/bits/alloc_traits.h  |  2 +-
 libstdc++-v3/include/bits/stl_construct.h |  4 ++--
 .../23_containers/vector/cons/destroy-adl.cc  | 11 +++
 3 files changed, 14 insertions(+), 3 deletions(-)
 create mode 100644 
libstdc++-v3/testsuite/23_containers/vector/cons/destroy-adl.cc

diff --git a/libstdc++-v3/include/bits/alloc_traits.h 
b/libstdc++-v3/include/bits/alloc_traits.h
index cd91d152f64..182c3e23eed 100644
--- a/libstdc++-v3/include/bits/alloc_traits.h
+++ b/libstdc++-v3/include/bits/alloc_traits.h
@@ -944,7 +944,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 _Destroy(_ForwardIterator __first, _ForwardIterator __last,
 allocator<_Tp>&)
 {
-  _Destroy(__first, __last);
+  std::_Destroy(__first, __last);
 }
 #endif
   /// @endcond
diff --git a/libstdc++-v3/include/bits/stl_construct.h 
b/libstdc++-v3/include/bits/stl_construct.h
index 574f4fa50b4..cf62d927cdb 100644
--- a/libstdc++-v3/include/bits/stl_construct.h
+++ b/libstdc++-v3/include/bits/stl_construct.h
@@ -190,7 +190,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 #endif
 #if __cplusplus >= 202002L
   if (std::__is_constant_evaluated())
-   return _Destroy_aux::__destroy(__first, __last);
+   return std::_Destroy_aux::__destroy(__first, __last);
 #endif
   std::_Destroy_aux<__has_trivial_destructor(_Value_type)>::
__destroy(__first, __last);
@@ -239,7 +239,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 #endif
 #if __cplusplus >= 202002L
   if (std::__is_constant_evaluated())
-   return _Destroy_n_aux::__destroy_n(__first, __count);
+   return std::_Destroy_n_aux::__destroy_n(__first, __count);
 #endif
   return std::_Destroy_n_aux<__has_trivial_destructor(_Value_type)>::
__destroy_n(__first, __count);
diff --git a/libstdc++-v3/testsuite/23_containers/vector/cons/destroy-adl.cc 
b/libstdc++-v3/testsuite/23_containers/vector/cons/destroy-adl.cc
new file mode 100644
index 000..5623842e9b1
--- /dev/null
+++ b/libstdc++-v3/testsuite/23_containers/vector/cons/destroy-adl.cc
@@ -0,0 +1,11 @@
+// { dg-do compile }
+
+#include 
+
+template struct Holder { T t; }; // { dg-bogus "incomplete type" }
+struct Incomplete;
+
+void destroy(std::vector*>* p)
+{
+  p->~vector();
+}
-- 
2.41.0



Re: Expert Engagement

2023-07-03 Thread Dave Blanchard
On Mon, 3 Jul 2023 19:24:23 +
Richard Nardi via Gcc  wrote:

> 
> Hello,
> I hope you are having a wonderful day. I would like to engage your firm to 
> prepare my tax return for the current tax year. [...]

Sounds legit. I have a feeling you'll be asking me for my credit card number 
and bank account information also at some point, plus my home mailing address 
so you can send me all your documents, so would it be helpful if I went ahead 
and emailed those to you right now? 

Thanks in advance for this opportunity to help you with your financial 
troubles. Can't wait to get started!

Sincerely,
Dave
Professional Tax Preparer and All-Around Generous Guy
www.intuit.com
phone number: 1-800-4INTUIT


[PATCH] libstdc++: Split up pstl/set.cc testcase

2023-07-03 Thread Thomas Rodgers via Gcc-patches
This testcase is causing some timeout issues. This patch splits the
testcase up by individual set algorithm.
From 857359b72f8886b6e90db3b596d04f08559d2b51 Mon Sep 17 00:00:00 2001
From: Thomas Rodgers 
Date: Mon, 3 Jul 2023 15:04:45 -0700
Subject: [PATCH] libstdc++: Split up pstl/set.cc testcase

This testcase is causing some timeout issues. This patch splits the
testcase up by individual set algorithm.

libstdc++-v3:/ChangeLog:
	* testsuite/25_algorithms/pstl/alg_sorting/set.cc: Delete
	file.
	* testsuite/25_algorithms/pstl/alg_sorting/set_difference.cc:
	New file.
	* testsuite/25_algorithms/pstl/alg_sorting/set_intersection.cc:
	Likewise.
	* testsuite/25_algorithms/pstl/alg_sorting/set_symmetric_difference.cc:
	Likewise.
	* testsuite/25_algorithms/pstl/alg_sorting/set_union.cc:
	Likewise.
	* testsuite/25_algorithms/pstl/alg_sorting/set_util.h:
	Likewise.
---
 .../25_algorithms/pstl/alg_sorting/set.cc | 289 --
 .../pstl/alg_sorting/set_difference.cc|  90 ++
 .../pstl/alg_sorting/set_intersection.cc  |  91 ++
 .../alg_sorting/set_symmetric_difference.cc   |  92 ++
 .../pstl/alg_sorting/set_union.cc |  90 ++
 .../25_algorithms/pstl/alg_sorting/set_util.h |  72 +
 6 files changed, 435 insertions(+), 289 deletions(-)
 delete mode 100644 libstdc++-v3/testsuite/25_algorithms/pstl/alg_sorting/set.cc
 create mode 100644 libstdc++-v3/testsuite/25_algorithms/pstl/alg_sorting/set_difference.cc
 create mode 100644 libstdc++-v3/testsuite/25_algorithms/pstl/alg_sorting/set_intersection.cc
 create mode 100644 libstdc++-v3/testsuite/25_algorithms/pstl/alg_sorting/set_symmetric_difference.cc
 create mode 100644 libstdc++-v3/testsuite/25_algorithms/pstl/alg_sorting/set_union.cc
 create mode 100644 libstdc++-v3/testsuite/25_algorithms/pstl/alg_sorting/set_util.h

diff --git a/libstdc++-v3/testsuite/25_algorithms/pstl/alg_sorting/set.cc b/libstdc++-v3/testsuite/25_algorithms/pstl/alg_sorting/set.cc
deleted file mode 100644
index 0343739dfd1..000
--- a/libstdc++-v3/testsuite/25_algorithms/pstl/alg_sorting/set.cc
+++ /dev/null
@@ -1,289 +0,0 @@
-// -*- C++ -*-
-// { dg-options "-ltbb" }
-// { dg-do run { target c++17 } }
-// { dg-timeout-factor 3 }
-// { dg-require-effective-target tbb_backend }
-
-//===-- set.pass.cpp --===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===--===//
-
-#include "pstl/pstl_test_config.h"
-
-#ifdef PSTL_STANDALONE_TESTS
-
-#include 
-#include 
-
-#include "pstl/execution"
-#include "pstl/algorithm"
-#else
-#include 
-#include 
-#endif // PSTL_STANDALONE_TESTS
-
-#include "pstl/test_utils.h"
-
-using namespace TestUtils;
-
-template 
-struct Num
-{
-T val;
-
-Num() : val{} {}
-Num(const T& v) : val(v) {}
-
-//for "includes" checks
-template 
-bool
-operator<(const Num& v1) const
-{
-return val < v1.val;
-}
-
-//The types Type1 and Type2 must be such that an object of type InputIt can be dereferenced and then implicitly converted to both of them
-template 
-operator Num() const
-{
-return Num((T1)val);
-}
-
-friend bool
-operator==(const Num& v1, const Num& v2)
-{
-return v1.val == v2.val;
-}
-};
-
-template 
-struct test_set_union
-{
-template 
-typename std::enable_if::value, void>::type
-operator()(Policy&& exec, InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2,
-   Compare comp)
-{
-using T1 = typename std::iterator_traits::value_type;
-
-auto n1 = std::distance(first1, last1);
-auto n2 = std::distance(first2, last2);
-auto n = n1 + n2;
-Sequence expect(n);
-Sequence out(n);
-
-auto expect_res = std::set_union(first1, last1, first2, last2, expect.begin(), comp);
-auto res = std::set_union(exec, first1, last1, first2, last2, out.begin(), comp);
-
-EXPECT_TRUE(expect_res - expect.begin() == res - out.begin(), "wrong result for set_union");
-EXPECT_EQ_N(expect.begin(), out.begin(), std::distance(out.begin(), res), "wrong set_union effect");
-}
-
-template 
-typename std::enable_if::value, void>::type
-operator()(Policy&&, InputIterator1, InputIterator1, InputIterator2, InputIterator2, Compare)
-{
-}
-};
-
-template 
-struct test_set_intersection
-{
-template 
-typename std::enable_if::value, void>::type
-operator()(Policy&& exec, InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2,
-   Compare comp)
-{
-using T1 = typename std::iterator_traits::value_type;
-
-auto n1 = std::distance(first1, last1);

Re: [PATCH] tree-optimization/110310 - move vector epilogue disabling to analysis phase

2023-07-03 Thread Richard Sandiford via Gcc-patches
Richard Biener  writes:
> The following removes late deciding to elide vectorized epilogues to
> the analysis phase and also avoids altering the epilogues niter.
> The costing part from vect_determine_partial_vectors_and_peeling is
> moved to vect_analyze_loop_costing where we use the main loop
> analysis to constrain the epilogue scalar iterations.
>
> I have not tried to integrate this with vect_known_niters_smaller_than_vf.
>
> It seems the for_epilogue_p parameter in
> vect_determine_partial_vectors_and_peeling is largely useless and
> we could compute that in the function itself.
>
> Bootstrapped and tested on x86_64-unknown-linux-gnu, OK?
>
> I suppose testing on aarch64 would be nice-to-have - any takers?

Sorry, ran this earlier today and then forgot about it.  And yeah,
it passes bootstrap & regtest on aarch64-linux-gnu (all languages).

LGTM FWIW, except:

> diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
> index 0a03f56aae7..f39a1ecb306 100644
> --- a/gcc/tree-vect-loop.cc
> +++ b/gcc/tree-vect-loop.cc
> @@ -2144,14 +2144,76 @@ vect_analyze_loop_costing (loop_vec_info loop_vinfo,
>  
>/* Only loops that can handle partially-populated vectors can have 
> iteration
>   counts less than the vectorization factor.  */
> -  if (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo))
> +  if (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)
> +  && vect_known_niters_smaller_than_vf (loop_vinfo))
>  {
> -  if (vect_known_niters_smaller_than_vf (loop_vinfo))
> +  if (dump_enabled_p ())
> + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> +  "not vectorized: iteration count smaller than "
> +  "vectorization factor.\n");
> +  return 0;
> +}
> +
> +  /* If we know the number of iterations we can do better, for the
> + epilogue we can also decide whether the main loop leaves us
> + with enough iterations, prefering a smaller vector epilog then
> + also possibly used for the case we skip the vector loop.  */
> +  if (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)
> +  && LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
> +{
> +  widest_int scalar_niters
> + = wi::to_widest (LOOP_VINFO_NITERSM1 (loop_vinfo)) + 1;
> +  if (LOOP_VINFO_EPILOGUE_P (loop_vinfo))
> + {
> +   loop_vec_info orig_loop_vinfo
> + = LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo);
> +   unsigned lowest_vf
> + = constant_lower_bound (LOOP_VINFO_VECT_FACTOR (orig_loop_vinfo));
> +   int prolog_peeling = 0;
> +   if (!vect_use_loop_mask_for_alignment_p (loop_vinfo))
> + prolog_peeling = LOOP_VINFO_PEELING_FOR_ALIGNMENT (orig_loop_vinfo);
> +   if (prolog_peeling >= 0
> +   && known_eq (LOOP_VINFO_VECT_FACTOR (orig_loop_vinfo),
> +lowest_vf))
> + {
> +   unsigned gap
> + = LOOP_VINFO_PEELING_FOR_GAPS (orig_loop_vinfo) ? 1 : 0;
> +   scalar_niters = ((scalar_niters - gap - prolog_peeling)
> +% lowest_vf + gap);

Are you sure we want this + gap?  A vectorised epilogue can't handle the
gap either, at least for things that use (say) the first vector of LD2
and ignore the second vector.

Thanks,
Richard

> +   if (scalar_niters == 0)
> + {
> +   if (dump_enabled_p ())
> + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> +  "not vectorized: loop never entered\n");
> +   return 0;
> + }
> + }
> + }
> +
> +  /* Check that the loop processes at least one full vector.  */
> +  poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
> +  if (known_lt (scalar_niters, vf))
>   {
> if (dump_enabled_p ())
>   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> -  "not vectorized: iteration count smaller than "
> -  "vectorization factor.\n");
> +  "loop does not have enough iterations "
> +  "to support vectorization.\n");
> +   return 0;
> + }
> +
> +  /* If we need to peel an extra epilogue iteration to handle data
> +  accesses with gaps, check that there are enough scalar iterations
> +  available.
> +
> +  The check above is redundant with this one when peeling for gaps,
> +  but the distinction is useful for diagnostics.  */
> +  if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
> +   && known_le (scalar_niters, vf))
> + {
> +   if (dump_enabled_p ())
> + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> +  "loop does not have enough iterations "
> +  "to support peeling for gaps.\n");
> return 0;
>   }
>  }
> @@ -2502,31 +2564,6 @@ vect_determine_partial_vectors_and_peeling 
> (loop_vec_info loop_vinfo,
>   

Re: [PATCH v2] RISC-V: Add support for vector crypto extensions

2023-07-03 Thread Philipp Tomsich
Thanks, applied to master.
--Philipp.

On Mon, 3 Jul 2023 at 15:42, Kito Cheng  wrote:

> Thanks, LGTM :)
>
> Christoph Muellner 於 2023年7月3日 週一,19:08寫道:
>
>> From: Christoph Müllner 
>>
>> This series adds basic support for the vector crypto extensions:
>> * Zvbb
>> * Zvbc
>> * Zvkg
>> * Zvkned
>> * Zvkhn[a,b]
>> * Zvksed
>> * Zvksh
>> * Zvkn
>> * Zvknc
>> * Zvkng
>> * Zvks
>> * Zvksc
>> * Zvksg
>> * Zvkt
>>
>> This patch is based on the v20230620 version of the Vector Cryptography
>> specification. The specification is frozen and can be found here:
>>   https://github.com/riscv/riscv-crypto/releases/tag/v20230620
>>
>> Binutils support has been merged upstream a few days ago.
>>
>> All extensions come with tests for the feature test macros.
>>
>> gcc/ChangeLog:
>>
>> * common/config/riscv/riscv-common.cc: Add support for zvbb,
>> zvbc, zvkg, zvkned, zvknha, zvknhb, zvksed, zvksh, zvkn,
>> zvknc, zvkng, zvks, zvksc, zvksg, zvkt and the implied subsets.
>> * config/riscv/arch-canonicalize: Add canonicalization info for
>> zvkn, zvknc, zvkng, zvks, zvksc, zvksg.
>> * config/riscv/riscv-opts.h (MASK_ZVBB): New macro.
>> (MASK_ZVBC): Likewise.
>> (TARGET_ZVBB): Likewise.
>> (TARGET_ZVBC): Likewise.
>> (MASK_ZVKG): Likewise.
>> (MASK_ZVKNED): Likewise.
>> (MASK_ZVKNHA): Likewise.
>> (MASK_ZVKNHB): Likewise.
>> (MASK_ZVKSED): Likewise.
>> (MASK_ZVKSH): Likewise.
>> (MASK_ZVKN): Likewise.
>> (MASK_ZVKNC): Likewise.
>> (MASK_ZVKNG): Likewise.
>> (MASK_ZVKS): Likewise.
>> (MASK_ZVKSC): Likewise.
>> (MASK_ZVKSG): Likewise.
>> (MASK_ZVKT): Likewise.
>> (TARGET_ZVKG): Likewise.
>> (TARGET_ZVKNED): Likewise.
>> (TARGET_ZVKNHA): Likewise.
>> (TARGET_ZVKNHB): Likewise.
>> (TARGET_ZVKSED): Likewise.
>> (TARGET_ZVKSH): Likewise.
>> (TARGET_ZVKN): Likewise.
>> (TARGET_ZVKNC): Likewise.
>> (TARGET_ZVKNG): Likewise.
>> (TARGET_ZVKS): Likewise.
>> (TARGET_ZVKSC): Likewise.
>> (TARGET_ZVKSG): Likewise.
>> (TARGET_ZVKT): Likewise.
>> * config/riscv/riscv.opt: Introduction of riscv_zv{b,k}_subext.
>>
>> gcc/testsuite/ChangeLog:
>>
>> * gcc.target/riscv/zvbb.c: New test.
>> * gcc.target/riscv/zvbc.c: New test.
>> * gcc.target/riscv/zvkg.c: New test.
>> * gcc.target/riscv/zvkn-1.c: New test.
>> * gcc.target/riscv/zvkn.c: New test.
>> * gcc.target/riscv/zvknc-1.c: New test.
>> * gcc.target/riscv/zvknc-2.c: New test.
>> * gcc.target/riscv/zvknc.c: New test.
>> * gcc.target/riscv/zvkned.c: New test.
>> * gcc.target/riscv/zvkng-1.c: New test.
>> * gcc.target/riscv/zvkng-2.c: New test.
>> * gcc.target/riscv/zvkng.c: New test.
>> * gcc.target/riscv/zvknha.c: New test.
>> * gcc.target/riscv/zvknhb.c: New test.
>> * gcc.target/riscv/zvks-1.c: New test.
>> * gcc.target/riscv/zvks.c: New test.
>> * gcc.target/riscv/zvksc-1.c: New test.
>> * gcc.target/riscv/zvksc-2.c: New test.
>> * gcc.target/riscv/zvksc.c: New test.
>> * gcc.target/riscv/zvksed.c: New test.
>> * gcc.target/riscv/zvksg-1.c: New test.
>> * gcc.target/riscv/zvksg-2.c: New test.
>> * gcc.target/riscv/zvksg.c: New test.
>> * gcc.target/riscv/zvksh.c: New test.
>> * gcc.target/riscv/zvkt.c: New test.
>>
>> Signed-off-by: Christoph Müllner 
>> ---
>> Changes for v2:
>> - Update patch for specification version v20230620
>>
>>  gcc/common/config/riscv/riscv-common.cc  | 55 
>>  gcc/config/riscv/arch-canonicalize   |  7 +++
>>  gcc/config/riscv/riscv-opts.h| 34 +++
>>  gcc/config/riscv/riscv.opt   |  6 +++
>>  gcc/testsuite/gcc.target/riscv/zvbb.c| 13 ++
>>  gcc/testsuite/gcc.target/riscv/zvbc.c| 13 ++
>>  gcc/testsuite/gcc.target/riscv/zvkg.c| 13 ++
>>  gcc/testsuite/gcc.target/riscv/zvkn-1.c  | 29 +
>>  gcc/testsuite/gcc.target/riscv/zvkn.c| 29 +
>>  gcc/testsuite/gcc.target/riscv/zvknc-1.c | 37 
>>  gcc/testsuite/gcc.target/riscv/zvknc-2.c | 37 
>>  gcc/testsuite/gcc.target/riscv/zvknc.c   | 37 
>>  gcc/testsuite/gcc.target/riscv/zvkned.c  | 13 ++
>>  gcc/testsuite/gcc.target/riscv/zvkng-1.c | 37 
>>  gcc/testsuite/gcc.target/riscv/zvkng-2.c | 37 
>>  gcc/testsuite/gcc.target/riscv/zvkng.c   | 37 
>>  gcc/testsuite/gcc.target/riscv/zvknha.c  | 13 ++
>>  gcc/testsuite/gcc.target/riscv/zvknhb.c  | 13 ++
>>  gcc/testsuite/gcc.target/riscv/zvks-1.c  | 29 +
>>  gcc/testsuite/gcc.target/riscv/zvks.c| 29 +
>>  gcc/testsuite/gcc.target/riscv/zvksc-1.c | 37 

[Bug rtl-optimization/110528] selective scheduling seems to go into an infinite loop

2023-07-03 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110528

Andrew Pinski  changed:

   What|Removed |Added

Summary|Timeout with with specific  |selective scheduling seems
   |optimizations   |to go into an infinite loop
 Target||x86_64-linux-gnu
  Known to fail||14.0
  Component|middle-end  |rtl-optimization

--- Comment #4 from Andrew Pinski  ---
#0  0x00f67ac0 in vinsn_equal_p(vinsn_def*, vinsn_def*) () at
/home/apinski/src/upstream-gcc/gcc/gcc/sel-sched-ir.cc:1603
#1  0x00f68679 in av_set_lookup (set=,
sought_vinsn=0x5003ee0) at
/home/apinski/src/upstream-gcc/gcc/gcc/sel-sched-ir.cc:2120
#2  0x00f78747 in compute_av_set_inside_bb(rtx_insn*, _list_node*, int,
bool) () at /home/apinski/src/upstream-gcc/gcc/gcc/vec.h:1504
#3  0x00f7dc9d in compute_av_set (unique_p=, ws=,
p=, insn=) at
/home/apinski/src/upstream-gcc/gcc/gcc/sel-sched.cc:5058
#4  compute_av_set_on_boundaries (av_vliw_p=0x7fffc008,
av_vliw_p@entry=, bnds=, fence=) at
/home/apinski/src/upstream-gcc/gcc/gcc/sel-sched.cc:5058
#5  fill_insns (scheduled_insns_tailpp=, seqno=-1,
fence=) at
/home/apinski/src/upstream-gcc/gcc/gcc/sel-sched.cc:5523
#6  schedule_on_fences (scheduled_insns_tailpp=,
max_seqno=1, fences=) at
/home/apinski/src/upstream-gcc/gcc/gcc/sel-sched.cc:7353
#7  sel_sched_region_2(int) () at
/home/apinski/src/upstream-gcc/gcc/gcc/sel-sched.cc:7491
#8  0x00f7ff97 in sel_sched_region_1() () at
/home/apinski/src/upstream-gcc/gcc/gcc/sel-sched.cc:7533
#9  0x00f8157c in sel_sched_region (rgn=20) at
/home/apinski/src/upstream-gcc/gcc/gcc/sel-sched.cc:7634
#10 sel_sched_region(int) () at
/home/apinski/src/upstream-gcc/gcc/gcc/sel-sched.cc:7619
#11 0x00f8174a in run_selective_scheduling () at
/home/apinski/src/upstream-gcc/gcc/gcc/sel-sched.cc:7720
#12 0x00f634ed in rest_of_handle_sched () at
/home/apinski/src/upstream-gcc/gcc/gcc/sched-rgn.cc:3729

[Bug middle-end/110228] [13/14 Regression] llvm-16 miscompiled due to an maybe uninitialized variable

2023-07-03 Thread slyfox at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110228

--- Comment #25 from Sergei Trofimovich  ---
Specifically this bug.c.034t.ccp1's bit looks fishy:

...
Folding statement: LookupFlags_14 = 1;
Queued stmt for removal.  Folds to: 1
Folding statement: LookupFlags_15 = 0;
Queued stmt for removal.  Folds to: 0
Folding PHI node: LookupFlags_4 = PHI <0(4), LookupFlags_5(5), 1(6)>
No folding possible
Folding statement: *p_16(D) = LookupFlags_4;
Not folded
Folding statement: i_18 = i_6 + 1;
Not folded
Removing dead stmt:i_8 = 0;
Removing dead stmt:LookupFlags_14 = 1;
Removing dead stmt:LookupFlags_15 = 0;

[Bug middle-end/110510] ggc infinite recursion

2023-07-03 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110510

Andrew Pinski  changed:

   What|Removed |Added

 Resolution|--- |FIXED
   Target Milestone|--- |14.0
 Status|ASSIGNED|RESOLVED

--- Comment #6 from Andrew Pinski  ---
Fixed. Thanks for reporting this.

[Bug middle-end/110510] ggc infinite recursion

2023-07-03 Thread cvs-commit at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110510

--- Comment #5 from CVS Commits  ---
The trunk branch has been updated by Andrew Pinski :

https://gcc.gnu.org/g:d0a333612bfb7faf8d61210c831165388e758768

commit r14-2270-gd0a333612bfb7faf8d61210c831165388e758768
Author: Andrew Pinski 
Date:   Sat Jul 1 10:52:48 2023 -0700

Use chain_next on eh_landing_pad_d for GTY (PR middle-end/110510)

The backtrace in the bug report suggest there is a running out of
stack during GC collection, because of a long chain of eh_landing_pad_d.
This might fix that by adding chain_next onto eh_landing_pad_d's GTY
marker.

OK? Bootstrapped and tested on x86_64-linux-gnu with no regressions.

gcc/ChangeLog:

PR middle-end/110510
* except.h (struct eh_landing_pad_d): Add chain_next GTY.

[PATCH 5/5] OpenMP: Array shaping operator and strided "target update" for C

2023-07-03 Thread Julian Brown
Following the similar support for C++ and Fortran, here is the
C implementation for the OpenMP 5.0 array-shaping operator, and for
strided and rectangular updates for "target update".

Much of the implementation is shared with the C++ support added earlier
in this patch series.  Some details of parsing necessarily differ for C,
but the general ideas are the same.

2023-07-03  Julian Brown  

gcc/c/
* c-parser.cc (c_parser_braced_init): Disallow array-shaping operator
in braced init.
(c_parser_conditional_expression): Disallow array-shaping operator in
conditional expression.
(c_parser_cast_expression): Add array-shaping operator support.
(c_parser_postfix_expression): Disallow array-shaping operator in
statement expressions.
(c_parser_postfix_expression_after_primary): Add OpenMP array section
stride support.
(c_parser_expr_list): Disallow array-shaping operator in expression
lists.
(c_array_type_nelts_top, c_array_type_nelts_total): New functions.
(c_parser_omp_variable_list): Support array-shaping operator.
(c_parser_omp_target_update): Recognize GOMP_MAP_TO_GRID and
GOMP_MAP_FROM_GRID map kinds as well as OMP_CLAUSE_TO/OMP_CLAUSE_FROM.
* c-tree.h (c_omp_array_shaping_op_p, c_omp_has_array_shape_p): New
extern declarations.
(create_omp_arrayshape_type): Add prototype.
* c-typeck.cc (c_omp_array_shaping_op_p, c_omp_has_array_shape_p): New
globals.
(build_omp_array_section): Permit integral types, not just integer
constants, when creating array types for array sections.
(create_omp_arrayshape_type): New function.
(handle_omp_array_sections_1): Add DISCONTIGUOUS parameter.  Add
strided/rectangular array section support.
(omp_array_section_low_bound): New function.
(handle_omp_array_sections): Add DISCONTIGUOUS parameter.  Add
strided/rectangular array section support.
(c_finish_omp_clauses): Update calls to handle_omp_array_sections.
Handle discontiguous updates.

gcc/testsuite/
* gcc.dg/gomp/bad-array-shaping-c-1.c: New test.
* gcc.dg/gomp/bad-array-shaping-c-2.c: New test.
* gcc.dg/gomp/bad-array-shaping-c-3.c: New test.
* gcc.dg/gomp/bad-array-shaping-c-4.c: New test.
* gcc.dg/gomp/bad-array-shaping-c-5.c: New test.
* gcc.dg/gomp/bad-array-shaping-c-6.c: New test.
* gcc.dg/gomp/bad-array-shaping-c-7.c: New test.

libgomp/
* testsuite/libgomp.c/array-shaping-1.c: New test.
* testsuite/libgomp.c/array-shaping-2.c: New test.
* testsuite/libgomp.c/array-shaping-3.c: New test.
* testsuite/libgomp.c/array-shaping-4.c: New test.
* testsuite/libgomp.c/array-shaping-5.c: New test.
* testsuite/libgomp.c/array-shaping-6.c: New test.
---
 gcc/c/c-parser.cc | 301 +-
 gcc/c/c-tree.h|   4 +
 gcc/c/c-typeck.cc | 241 --
 .../gcc.dg/gomp/bad-array-shaping-c-1.c   |  26 ++
 .../gcc.dg/gomp/bad-array-shaping-c-2.c   |  24 ++
 .../gcc.dg/gomp/bad-array-shaping-c-3.c   |  30 ++
 .../gcc.dg/gomp/bad-array-shaping-c-4.c   |  27 ++
 .../gcc.dg/gomp/bad-array-shaping-c-5.c   |  17 +
 .../gcc.dg/gomp/bad-array-shaping-c-6.c   |  26 ++
 .../gcc.dg/gomp/bad-array-shaping-c-7.c   |  15 +
 libgomp/testsuite/libgomp.c/array-shaping-1.c | 236 ++
 libgomp/testsuite/libgomp.c/array-shaping-2.c |  39 +++
 libgomp/testsuite/libgomp.c/array-shaping-3.c |  42 +++
 libgomp/testsuite/libgomp.c/array-shaping-4.c |  36 +++
 libgomp/testsuite/libgomp.c/array-shaping-5.c |  38 +++
 libgomp/testsuite/libgomp.c/array-shaping-6.c |  45 +++
 16 files changed, 1099 insertions(+), 48 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/gomp/bad-array-shaping-c-1.c
 create mode 100644 gcc/testsuite/gcc.dg/gomp/bad-array-shaping-c-2.c
 create mode 100644 gcc/testsuite/gcc.dg/gomp/bad-array-shaping-c-3.c
 create mode 100644 gcc/testsuite/gcc.dg/gomp/bad-array-shaping-c-4.c
 create mode 100644 gcc/testsuite/gcc.dg/gomp/bad-array-shaping-c-5.c
 create mode 100644 gcc/testsuite/gcc.dg/gomp/bad-array-shaping-c-6.c
 create mode 100644 gcc/testsuite/gcc.dg/gomp/bad-array-shaping-c-7.c
 create mode 100644 libgomp/testsuite/libgomp.c/array-shaping-1.c
 create mode 100644 libgomp/testsuite/libgomp.c/array-shaping-2.c
 create mode 100644 libgomp/testsuite/libgomp.c/array-shaping-3.c
 create mode 100644 libgomp/testsuite/libgomp.c/array-shaping-4.c
 create mode 100644 libgomp/testsuite/libgomp.c/array-shaping-5.c
 create mode 100644 libgomp/testsuite/libgomp.c/array-shaping-6.c

diff --git a/gcc/c/c-parser.cc b/gcc/c/c-parser.cc
index 280426ddf10..7e895e11da2 100644
--- a/gcc/c/c-parser.cc
+++ b/gcc/c/c-parser.cc
@@ -5764,7 +5764,9 @@ c_parser_braced_init (c_parser *parser, 

[PATCH 4/5] OpenMP: Noncontiguous "target update" for Fortran

2023-07-03 Thread Julian Brown
This patch implements noncontiguous "target update" for Fortran.
The existing middle end/runtime bits relating to C++ support are reused,
with some small adjustments, e.g.:

  1. The node used to map the OMP "array descriptor" (from omp-low.cc
 onwards) now uses the OMP_CLAUSE_SIZE field as a bias (the difference
 between the "virtual origin" element with zero indices in each
 dimension and the first element actually stored in memory).

  2. The OMP_CLAUSE_SIZE field of a GOMP_MAP_DIM_STRIDE node may now be
 used to store a "span", which is the distance in bytes between
 two adjacent elements in an array (with unit stride) when that is
 different from the element size, as it can be in Fortran.

The implementation goes to some effort to massage Fortran array metadata
(array descriptors) into a form that can ultimately be consumed by
omp_target_memcpy_rect_worker. The method for doing this is described
in comments in the patch body.

2023-07-03  Julian Brown  

gcc/fortran/
* trans-openmp.cc (gfc_omp_deep_map_kind_p): Handle
GOMP_MAP_{TO,FROM}_GRID, GOMP_MAP_GRID_{DIM,STRIDE}.
(gfc_trans_omp_arrayshape_type, gfc_omp_calculate_gcd,
gfc_desc_to_omp_noncontig_array, gfc_omp_contiguous_update_p): New
functions.
(gfc_trans_omp_clauses): Handle noncontiguous to/from clauses for OMP
"target update" directives.

gcc/
* gimplify.cc (gimplify_adjust_omp_clauses): Don't gimplify
VIEW_CONVERT_EXPR away in GOMP_MAP_TO_GRID/GOMP_MAP_FROM_GRID clauses.
* omp-low.cc (omp_noncontig_descriptor_type): Add SPAN field.
(scan_sharing_clauses): Don't store descriptor size in its
OMP_CLAUSE_SIZE field.
(lower_omp_target): Add missing OMP_CLAUSE_MAP check.  Add special-case
string handling.  Handle span and bias.  Use low bound instead of zero
as index for trailing full dimensions.

libgomp/
* libgomp.h (omp_noncontig_array_desc): Add span field.
* target.c (omp_target_memcpy_rect_worker): Add span parameter. Update
forward declaration. Handle span != element_size.
(gomp_update): Handle bias in descriptor's size slot.  Update calls to
omp_target_memcpy_rect_worker.
* testsuite/libgomp.fortran/noncontig-updates-1.f90: New test.
* testsuite/libgomp.fortran/noncontig-updates-2.f90: New test.
* testsuite/libgomp.fortran/noncontig-updates-3.f90: New test.
* testsuite/libgomp.fortran/noncontig-updates-4.f90: New test.
* testsuite/libgomp.fortran/noncontig-updates-5.f90: New test.
* testsuite/libgomp.fortran/noncontig-updates-6.f90: New test.
* testsuite/libgomp.fortran/noncontig-updates-7.f90: New test.
* testsuite/libgomp.fortran/noncontig-updates-8.f90: New test.
* testsuite/libgomp.fortran/noncontig-updates-9.f90: New test.
* testsuite/libgomp.fortran/noncontig-updates-10.f90: New test.
* testsuite/libgomp.fortran/noncontig-updates-11.f90: New test.
* testsuite/libgomp.fortran/noncontig-updates-12.f90: New test.
* testsuite/libgomp.fortran/noncontig-updates-13.f90: New test.

gcc/testsuite/
* gfortran.dg/gomp/noncontig-updates-1.f90: New test.
* gfortran.dg/gomp/noncontig-updates-2.f90: New test.
* gfortran.dg/gomp/noncontig-updates-3.f90: New test.
* gfortran.dg/gomp/noncontig-updates-4.f90: New test.
---
 gcc/fortran/trans-openmp.cc   | 500 ++
 gcc/gimplify.cc   |  10 +
 gcc/omp-low.cc|  73 ++-
 .../gfortran.dg/gomp/noncontig-updates-1.f90  |  19 +
 .../gfortran.dg/gomp/noncontig-updates-2.f90  |  16 +
 .../gfortran.dg/gomp/noncontig-updates-3.f90  |  16 +
 .../gfortran.dg/gomp/noncontig-updates-4.f90  |  15 +
 libgomp/libgomp.h |   1 +
 libgomp/target.c  |  47 +-
 .../libgomp.fortran/noncontig-updates-1.f90   |  54 ++
 .../libgomp.fortran/noncontig-updates-10.f90  |  29 +
 .../libgomp.fortran/noncontig-updates-11.f90  |  51 ++
 .../libgomp.fortran/noncontig-updates-12.f90  |  59 +++
 .../libgomp.fortran/noncontig-updates-13.f90  |  42 ++
 .../libgomp.fortran/noncontig-updates-2.f90   | 101 
 .../libgomp.fortran/noncontig-updates-3.f90   |  47 ++
 .../libgomp.fortran/noncontig-updates-4.f90   |  78 +++
 .../libgomp.fortran/noncontig-updates-5.f90   |  55 ++
 .../libgomp.fortran/noncontig-updates-6.f90   |  34 ++
 .../libgomp.fortran/noncontig-updates-7.f90   |  36 ++
 .../libgomp.fortran/noncontig-updates-8.f90   |  39 ++
 .../libgomp.fortran/noncontig-updates-9.f90   |  34 ++
 22 files changed, 1325 insertions(+), 31 deletions(-)
 create mode 100644 gcc/testsuite/gfortran.dg/gomp/noncontig-updates-1.f90
 create mode 100644 gcc/testsuite/gfortran.dg/gomp/noncontig-updates-2.f90
 create mode 100644 gcc/testsuite/gfortran.dg/gomp/noncontig-updates-3.f90
 create mode 

[PATCH 2/5] OpenMP: Allow complete replacement of clause during map/to/from expansion

2023-07-03 Thread Julian Brown
At present, map/to/from clauses on OpenMP "target" directives may be
expanded into several mapping nodes if they describe array sections with
pointer or reference bases, or similar.  This patch allows the original
clause to be replaced during that expansion, mostly by passing the list
pointer to the node to various functions rather than the node itself.

This is needed by the following patch. There shouldn't be any functional
changes introduced by this patch itself.

2023-07-03  Julian Brown  

gcc/c-family/
* c-common.h (expand_array_base, expand_component_selector,
expand_map_clause): Adjust member declarations.
* c-omp.cc (omp_expand_access_chain): Pass and return pointer to
clause.
(c_omp_address_inspector::expand_array_base): Likewise.
(c_omp_address_inspector::expand_component_selector): Likewise.
(c_omp_address_inspector::expand_map_clause): Likewise.

gcc/c/
* c-typeck.cc (handle_omp_array_sections): Pass pointer to clause to
process instead of clause.
(c_finish_omp_clauses): Update calls to handle_omp_array_sections.
Handle cases where initial clause might be replaced.

gcc/cp/
* semantics.cc (handle_omp_array_sections): Pass pointer to clause
instead of clause.  Add PNEXT return parameter for next clause in list
to process.
(finish_omp_clauses): Update calls to handle_omp_array_sections.
Handle cases where initial clause might be replaced.
---
 gcc/c-family/c-common.h | 12 +++
 gcc/c-family/c-omp.cc   | 75 +
 gcc/c/c-typeck.cc   | 32 +++---
 gcc/cp/semantics.cc | 37 +---
 4 files changed, 88 insertions(+), 68 deletions(-)

diff --git a/gcc/c-family/c-common.h b/gcc/c-family/c-common.h
index acd0c861a55..756358f3fd8 100644
--- a/gcc/c-family/c-common.h
+++ b/gcc/c-family/c-common.h
@@ -1375,12 +1375,12 @@ public:
 
   bool maybe_zero_length_array_section (tree);
 
-  tree expand_array_base (tree, vec &, tree, unsigned *,
- c_omp_region_type, bool);
-  tree expand_component_selector (tree, vec &, tree,
- unsigned *);
-  tree expand_map_clause (tree, tree, vec &,
- c_omp_region_type);
+  tree * expand_array_base (tree *, vec &, tree, unsigned *,
+   c_omp_region_type, bool);
+  tree * expand_component_selector (tree *, vec &, tree,
+   unsigned *);
+  tree * expand_map_clause (tree *, tree, vec &,
+   c_omp_region_type);
 };
 
 enum c_omp_directive_kind {
diff --git a/gcc/c-family/c-omp.cc b/gcc/c-family/c-omp.cc
index 16b620fcb3d..17f3d71c655 100644
--- a/gcc/c-family/c-omp.cc
+++ b/gcc/c-family/c-omp.cc
@@ -4130,11 +4130,12 @@ 
c_omp_address_inspector::maybe_zero_length_array_section (tree clause)
expression types here, because e.g. you can't have an array of
references.  See also gimplify.cc:omp_expand_access_chain.  */
 
-static tree
-omp_expand_access_chain (tree c, tree expr, vec _tokens,
-unsigned *idx)
+static tree *
+omp_expand_access_chain (tree *pc, tree expr,
+vec _tokens, unsigned *idx)
 {
   using namespace omp_addr_tokenizer;
+  tree c = *pc;
   location_t loc = OMP_CLAUSE_LOCATION (c);
   unsigned i = *idx;
   tree c2 = NULL_TREE;
@@ -4172,35 +4173,36 @@ omp_expand_access_chain (tree c, tree expr, 
vec _tokens,
   break;
 
 default:
-  return error_mark_node;
+  return NULL;
 }
 
   if (c2)
 {
   OMP_CLAUSE_CHAIN (c2) = OMP_CLAUSE_CHAIN (c);
   OMP_CLAUSE_CHAIN (c) = c2;
-  c = c2;
+  pc = _CLAUSE_CHAIN (c);
 }
 
   *idx = ++i;
 
   if (i < addr_tokens.length ()
   && addr_tokens[i]->type == ACCESS_METHOD)
-return omp_expand_access_chain (c, expr, addr_tokens, idx);
+return omp_expand_access_chain (pc, expr, addr_tokens, idx);
 
-  return c;
+  return pc;
 }
 
 /* Translate "array_base_decl access_method" to OMP mapping clauses.  */
 
-tree
-c_omp_address_inspector::expand_array_base (tree c,
+tree *
+c_omp_address_inspector::expand_array_base (tree *pc,
vec _tokens,
tree expr, unsigned *idx,
c_omp_region_type ort,
bool decl_p)
 {
   using namespace omp_addr_tokenizer;
+  tree c = *pc;
   location_t loc = OMP_CLAUSE_LOCATION (c);
   int i = *idx;
   tree decl = addr_tokens[i + 1]->expr;
@@ -4225,7 +4227,7 @@ c_omp_address_inspector::expand_array_base (tree c,
  || OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_DETACH))
 {
   *idx = ++i;
-  return c;
+  return pc;
 }
 
   switch (addr_tokens[i + 1]->u.access_kind)
@@ -4474,7 +4476,7 @@ c_omp_address_inspector::expand_array_base (tree c,
 
 default:
   *idx = i + 

[PATCH 0/5] [og13] OpenMP: strides, rectangular updates and array-shaping operator for "target update"

2023-07-03 Thread Julian Brown
This patch series adds support for the array-shaping operator from OpenMP
5.0, and strided and rectangular transfers for "target update" directives.
The patches were previously posted for mainline here:

  https://gcc.gnu.org/pipermail/gcc-patches/2023-March/613785.html (C++)
  https://gcc.gnu.org/pipermail/gcc-patches/2023-April/616921.html (Fortran)
  https://gcc.gnu.org/pipermail/gcc-patches/2023-May/618738.html (C)

This time the patches have been merged to the og13 branch (mostly
straightforward, though note the conflict described in patch 3/5).

Re-tested with offloading to AMD GCN.  I will apply shortly.

Julian Brown (5):
  OpenMP: Fix "exit data" for array sections for ref-to-ptr components
  OpenMP: Allow complete replacement of clause during map/to/from
expansion
  OpenMP: Support strided and shaped-array updates for C++
  OpenMP: Noncontiguous "target update" for Fortran
  OpenMP: Array shaping operator and strided "target update" for C

 gcc/c-family/c-common.h   |  12 +-
 gcc/c-family/c-omp.cc | 277 --
 gcc/c-family/c-pretty-print.cc|   5 +
 gcc/c/c-parser.cc | 331 +++-
 gcc/c/c-tree.h|   6 +-
 gcc/c/c-typeck.cc | 287 --
 gcc/cp/cp-objcp-common.cc |   1 +
 gcc/cp/cp-tree.def|   1 +
 gcc/cp/cp-tree.h  |  13 +-
 gcc/cp/decl.cc|  75 +++
 gcc/cp/decl2.cc   |  19 +-
 gcc/cp/error.cc   |   5 +
 gcc/cp/mangle.cc  |   1 +
 gcc/cp/operators.def  |   1 +
 gcc/cp/parser.cc  | 303 ++-
 gcc/cp/parser.h   |   7 +
 gcc/cp/pt.cc  |  39 +-
 gcc/cp/semantics.cc   | 289 --
 gcc/cp/typeck.cc  |  12 +-
 gcc/fortran/trans-openmp.cc   | 500 ++
 gcc/gimplify.cc   |  84 ++-
 gcc/omp-general.cc|  47 ++
 gcc/omp-general.h |   4 +-
 gcc/omp-low.cc| 459 +++-
 gcc/testsuite/g++.dg/gomp/array-shaping-1.C   |  22 +
 gcc/testsuite/g++.dg/gomp/array-shaping-2.C   | 134 +
 .../g++.dg/gomp/bad-array-shaping-1.C |  47 ++
 .../g++.dg/gomp/bad-array-shaping-2.C |  52 ++
 .../g++.dg/gomp/bad-array-shaping-3.C |  53 ++
 .../g++.dg/gomp/bad-array-shaping-4.C |  60 +++
 .../g++.dg/gomp/bad-array-shaping-5.C |  55 ++
 .../g++.dg/gomp/bad-array-shaping-6.C |  59 +++
 .../g++.dg/gomp/bad-array-shaping-7.C |  48 ++
 .../g++.dg/gomp/bad-array-shaping-8.C |  50 ++
 .../gcc.dg/gomp/bad-array-shaping-c-1.c   |  26 +
 .../gcc.dg/gomp/bad-array-shaping-c-2.c   |  24 +
 .../gcc.dg/gomp/bad-array-shaping-c-3.c   |  30 ++
 .../gcc.dg/gomp/bad-array-shaping-c-4.c   |  27 +
 .../gcc.dg/gomp/bad-array-shaping-c-5.c   |  17 +
 .../gcc.dg/gomp/bad-array-shaping-c-6.c   |  26 +
 .../gcc.dg/gomp/bad-array-shaping-c-7.c   |  15 +
 .../gfortran.dg/gomp/noncontig-updates-1.f90  |  19 +
 .../gfortran.dg/gomp/noncontig-updates-2.f90  |  16 +
 .../gfortran.dg/gomp/noncontig-updates-3.f90  |  16 +
 .../gfortran.dg/gomp/noncontig-updates-4.f90  |  15 +
 gcc/tree-pretty-print.cc  |  17 +
 gcc/tree.def  |   2 +-
 include/gomp-constants.h  |   7 +-
 libgomp/libgomp.h |  15 +
 libgomp/target.c  | 261 ++---
 .../testsuite/libgomp.c++/array-shaping-1.C   | 469 
 .../testsuite/libgomp.c++/array-shaping-10.C  |  61 +++
 .../testsuite/libgomp.c++/array-shaping-11.C  |  63 +++
 .../testsuite/libgomp.c++/array-shaping-12.C  |  65 +++
 .../testsuite/libgomp.c++/array-shaping-13.C  |  89 
 .../testsuite/libgomp.c++/array-shaping-2.C   |  38 ++
 .../testsuite/libgomp.c++/array-shaping-3.C   |  38 ++
 .../testsuite/libgomp.c++/array-shaping-4.C   |  38 ++
 .../testsuite/libgomp.c++/array-shaping-5.C   |  38 ++
 .../testsuite/libgomp.c++/array-shaping-6.C   |  54 ++
 .../testsuite/libgomp.c++/array-shaping-7.C   |  54 ++
 .../testsuite/libgomp.c++/array-shaping-8.C   |  65 +++
 .../testsuite/libgomp.c++/array-shaping-9.C   |  95 
 libgomp/testsuite/libgomp.c/array-shaping-1.c | 236 +
 libgomp/testsuite/libgomp.c/array-shaping-2.c |  39 ++
 libgomp/testsuite/libgomp.c/array-shaping-3.c |  42 ++
 libgomp/testsuite/libgomp.c/array-shaping-4.c |  36 ++
 libgomp/testsuite/libgomp.c/array-shaping-5.c |  38 ++
 libgomp/testsuite/libgomp.c/array-shaping-6.c |  45 ++
 .../libgomp.fortran/noncontig-updates-1.f90   |  54 ++
 

[PATCH 1/5] OpenMP: Fix "exit data" for array sections for ref-to-ptr components

2023-07-03 Thread Julian Brown
This patch fixes "exit data" for (C++) reference-to-pointer struct
components with array sections, such as:

  struct S { int * [...] };
  ...
  #pragma omp target exit data map(from: str->ptr, str->ptr[0:n])

Such exits need two "detach" operations. We need to unmap
both the pointer and the slice. That idiom is recognized by
omp_resolve_clause_dependencies, but before omp_build_struct_sibling_lists
finishes the resulting mapping nodes are represented like this:

  GOMP_MAP_FROM GOMP_MAP_DETACH GOMP_MAP_ATTACH_DETACH

And at the moment, that won't be recognized as a single mapping group
as it should be. This patch fixes that.

(This is covered by a test case added in later patches in this series,
e.g. libgomp/testsuite/libgomp.c++/array-shaping-8.C.)

2023-07-03  Julian Brown  

gcc/
* gimplify.cc (omp_get_attachment): Handle GOMP_MAP_DETACH here.
(omp_group_last): Handle *, GOMP_MAP_DETACH, GOMP_MAP_ATTACH_DETACH
groups for "exit data" of reference-to-pointer component array
sections.
(omp_group_base): Handle GOMP_MAP_DETACH.
---
 gcc/gimplify.cc | 30 ++
 1 file changed, 26 insertions(+), 4 deletions(-)

diff --git a/gcc/gimplify.cc b/gcc/gimplify.cc
index 20aba45110f..6280eb7e028 100644
--- a/gcc/gimplify.cc
+++ b/gcc/gimplify.cc
@@ -9171,6 +9171,7 @@ omp_get_attachment (omp_mapping_group *grp)
 
  case GOMP_MAP_ATTACH_DETACH:
  case GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION:
+ case GOMP_MAP_DETACH:
return OMP_CLAUSE_DECL (node);
 
  default:
@@ -9247,23 +9248,43 @@ omp_group_last (tree *start_p)
 == GOMP_MAP_POINTER_TO_ZERO_LENGTH_ARRAY_SECTION)
 || (OMP_CLAUSE_MAP_KIND (nc)
 == GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION)
+|| OMP_CLAUSE_MAP_KIND (nc) == GOMP_MAP_DETACH
 || OMP_CLAUSE_MAP_KIND (nc) == GOMP_MAP_ALWAYS_POINTER
 || omp_map_clause_descriptor_p (nc)))
{
- grp_last_p = _CLAUSE_CHAIN (c);
- c = nc;
  tree nc2 = OMP_CLAUSE_CHAIN (nc);
+ if (OMP_CLAUSE_MAP_KIND (nc) == GOMP_MAP_DETACH)
+   {
+ /* In the specific case we're doing "exit data" on an array
+slice of a reference-to-pointer struct component, we will see
+DETACH followed by ATTACH_DETACH here.  We want to treat that
+as a single group. In other cases DETACH might represent a
+stand-alone "detach" clause, so we don't want to consider
+that part of the group.  */
+ if (nc2
+ && OMP_CLAUSE_CODE (nc2) == OMP_CLAUSE_MAP
+ && OMP_CLAUSE_MAP_KIND (nc2) == GOMP_MAP_ATTACH_DETACH)
+   goto consume_two_nodes;
+ else
+   break;
+   }
  if (nc2
  && OMP_CLAUSE_CODE (nc2) == OMP_CLAUSE_MAP
  && (OMP_CLAUSE_MAP_KIND (nc)
  == GOMP_MAP_POINTER_TO_ZERO_LENGTH_ARRAY_SECTION)
  && OMP_CLAUSE_MAP_KIND (nc2) == GOMP_MAP_ATTACH)
{
+   consume_two_nodes:
  grp_last_p = _CLAUSE_CHAIN (nc);
  c = nc2;
- nc2 = OMP_CLAUSE_CHAIN (nc2);
+ nc = OMP_CLAUSE_CHAIN (nc2);
+   }
+ else
+   {
+ grp_last_p = _CLAUSE_CHAIN (c);
+ c = nc;
+ nc = nc2;
}
-  nc = nc2;
}
   break;
 
@@ -9416,6 +9437,7 @@ omp_group_base (omp_mapping_group *grp, unsigned int 
*chained,
case GOMP_MAP_ALWAYS_POINTER:
case GOMP_MAP_ATTACH_DETACH:
case GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION:
+   case GOMP_MAP_DETACH:
  return *grp->grp_start;
 
default:
-- 
2.25.1



[Bug middle-end/110228] [13/14 Regression] llvm-16 miscompiled due to an maybe uninitialized variable

2023-07-03 Thread slyfox at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110228

Sergei Trofimovich  changed:

   What|Removed |Added

 CC||rguenth at gcc dot gnu.org

--- Comment #24 from Sergei Trofimovich  ---
Trying to understand the failure mode here:

In bug.c.033t.early_objsz I still see the explicit stores to LocalFlags:

   :
  LookupFlags_15 = 0;
  goto ; [INV]

   :
  if (v_13 == 1)
goto ; [INV]
  else
goto ; [INV]

   :
  LookupFlags_14 = 1;

   :
  # LookupFlags_4 = PHI 
  *p_16(D) = LookupFlags_4;

But in bug.c.034t.ccp1 I see no stores at all:

  if (v_13 == 0)
goto ; [INV]
  else
goto ; [INV]

   :
  if (v_13 == 1)
goto ; [INV]
  else
goto ; [INV]

   :

   :
  # LookupFlags_4 = PHI <0(3), LookupFlags_5(4), 1(5)>

Specifically '# LookupFlags_4 = PHI <0(3), LookupFlags_5(4), 1(5)>' claims that
somehow gets values '0' and '1' into a PHI node.

AFAIU PHI is an equivalent of a mutable variable in an otherwise immutable SSA
form. It has to be a write it needed value is not there yet. Why the store of
'1' was removed? Is it because variable 'v_13' already happens to have 0 or 1
value?

I wonder why PHI does not look like something below:

# LookupFlags_4 = PHI 

[Bug middle-end/110534] confusing -Wuninitialized when strict aliasing is violated

2023-07-03 Thread egallager at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110534

Eric Gallager  changed:

   What|Removed |Added

 CC||egallager at gcc dot gnu.org

--- Comment #3 from Eric Gallager  ---
(In reply to Andrew Pinski from comment #1)
> There are different levels of -Wstrict-aliasing and iirc level 3 will warn.
>

Remember that -Wstrict-aliasing's numerical levels are weird compared to other
warnings with numerical levels

> Note I don't think the uninitialized warning is a bad thing here. Because it
> does point out gcc is thinking it is uninitialized due to the alias
> violation.

The wording that gets printed is still a bit confusing, though, since the
variable that gets the first caret isn't the same one that gets named in the
first line of the warning. The note helps a bit, but I can still see how it
might be confusing to some.

Re: [PATCH] Fortran: fixes for procedures with ALLOCATABLE,INTENT(OUT) arguments [PR92178]

2023-07-03 Thread Harald Anlauf via Gcc-patches

Hi Mikael,

Am 03.07.23 um 13:46 schrieb Mikael Morin:

A few thing to double check below.


diff --git a/gcc/fortran/trans-expr.cc b/gcc/fortran/trans-expr.cc
index 30946ba3f63..16e8f037cfc 100644
--- a/gcc/fortran/trans-expr.cc
+++ b/gcc/fortran/trans-expr.cc

(...)

@@ -6117,6 +6118,33 @@ gfc_conv_procedure_call (gfc_se * se,
gfc_symbol * sym,
    && UNLIMITED_POLY (sym)
    && comp && (strcmp ("_copy", comp->name) == 0);

+  /* First scan argument list for allocatable actual arguments passed to
+ allocatable dummy arguments with INTENT(OUT).  As the corresponding
+ actual arguments are deallocated before execution of the
procedure, we
+ evaluate actual argument expressions to avoid problems with
possible
+ dependencies.  */
+  bool force_eval_args = false;
+  gfc_formal_arglist *tmp_formal;
+  for (arg = args, tmp_formal = formal; arg != NULL;
+   arg = arg->next, tmp_formal = tmp_formal ? tmp_formal->next :
NULL)
+    {
+  e = arg->expr;
+  fsym = tmp_formal ? tmp_formal->sym : NULL;
+  if (e && fsym
+  && e->expr_type == EXPR_VARIABLE
+  && fsym->attr.intent == INTENT_OUT
+  && (fsym->ts.type == BT_CLASS && fsym->attr.class_ok
+  ? CLASS_DATA (fsym)->attr.allocatable
+  : fsym->attr.allocatable)
+  && e->symtree
+  && e->symtree->n.sym
+  && gfc_variable_attr (e, NULL).allocatable)
+    {
+  force_eval_args = true;
+  break;
+    }
+    }
+

The function is already big enough, would you mind outlining this to its
own function?


This can be done.  At least it is not part of the monster loop.




   /* Evaluate the arguments.  */
   for (arg = args, argc = 0; arg != NULL;
    arg = arg->next, formal = formal ? formal->next : NULL, ++argc)
@@ -6680,7 +6708,7 @@ gfc_conv_procedure_call (gfc_se * se, gfc_symbol
* sym,
   else
 tmp = gfc_finish_block ();

-  gfc_add_expr_to_block (>pre, tmp);
+  gfc_add_expr_to_block (_blk, tmp);
 }

   /* A class array element needs converting back to be a
@@ -6980,7 +7008,7 @@ gfc_conv_procedure_call (gfc_se * se, gfc_symbol
* sym,
 build_empty_stmt (input_location));
   }
 if (tmp != NULL_TREE)
-  gfc_add_expr_to_block (>pre, tmp);
+  gfc_add_expr_to_block (_blk, tmp);
   }

   tmp = parmse.expr;
@@ -7004,7 +7032,7 @@ gfc_conv_procedure_call (gfc_se * se, gfc_symbol
* sym,
  void_type_node,
  gfc_conv_expr_present (e->symtree->n.sym),
    tmp, build_empty_stmt (input_location));
-  gfc_add_expr_to_block (>pre, tmp);
+  gfc_add_expr_to_block (_blk, tmp);
 }
 }
 }

These look good, but I'm surprised that there is no similar change at
the 6819 line.
This is the class array actual vs class array dummy case.
It seems to be checked by the "bar" subroutine in your testcase, except
that the intent(out) argument comes last there, whereas it was coming
first with the original testcases in the PR.
Can you double check?


I believe I tried that before and encountered regressions.
The change

diff --git a/gcc/fortran/trans-expr.cc b/gcc/fortran/trans-expr.cc
index 16e8f037cfc..43e013fa720 100644
--- a/gcc/fortran/trans-expr.cc
+++ b/gcc/fortran/trans-expr.cc
@@ -6844,7 +6844,8 @@ gfc_conv_procedure_call (gfc_se * se, gfc_symbol *
sym,
  else
tmp = gfc_finish_block ();

- gfc_add_expr_to_block (>pre, tmp);
+//   gfc_add_expr_to_block (>pre, tmp);
+ gfc_add_expr_to_block (_blk, tmp);
}

  /* The conversion does not repackage the reference to a class

regresses on:
gfortran.dg/class_array_16.f90
gfortran.dg/finalize_12.f90
gfortran.dg/optional_class_1.f90

A simplified testcase for further study:

program p
  implicit none
  class(*),  allocatable :: c(:)
  c = [3, 4]
  call bar (allocated (c), c, allocated (c))
  if (allocated (c)) stop 14
contains
  subroutine bar (alloc, x, alloc2)
logical :: alloc, alloc2
class(*), allocatable, intent(out) :: x(:)
if (allocated (x)) stop 5
if (.not. alloc)   stop 6
if (.not. alloc2)  stop 16
  end subroutine bar
end

(This fails in a different place for the posted patch and for
the above trial change.  Need to go to the drawing board...)



@@ -7101,6 +7129,21 @@ gfc_conv_procedure_call (gfc_se * se,
gfc_symbol * sym,
 }
 }

+  /* If any actual argument of the procedure is allocatable and
passed
+ to an allocatable dummy with INTENT(OUT), we conservatively
+ evaluate all actual argument expressions before deallocations are
+ performed and the procedure is executed.  This ensures we conform
+ to F2023:15.5.3, 15.5.4.  Create temporaries except for constants,
+ variables, and functions returning pointers that can appear in a
+ variable 

Re: [RFC] Bridging the gap between the Linux Kernel Memory Consistency Model (LKMM) and C11/C++11 atomics

2023-07-03 Thread Alan Stern
On Mon, Jul 03, 2023 at 03:20:31PM -0400, Olivier Dion wrote:
> Hi all,
> 
> This is a request for comments on extending the atomic builtins API to
> help avoiding redundant memory barriers.  Indeed, there are

What atomic builtins API are you talking about?  The kernel's?  That's 
what it sounded like when I first read this sentence -- why else post 
your message on a kernel mailing list?

> discrepancies between the Linux kernel consistency memory model (LKMM)
> and the C11/C++11 memory consistency model [0].  For example,

Indeed.  The kernel's usage of C differs from the standard in several 
respects, and there's no particular reason for its memory model to match 
the standard's.

> fully-ordered atomic operations like xchg and cmpxchg success in LKMM
> have implicit memory barriers before/after the operations [1-2], while
> atomic operations using the __ATOMIC_SEQ_CST memory order in C11/C++11
> do not have any ordering guarantees of an atomic thread fence
> __ATOMIC_SEQ_CST with respect to other non-SEQ_CST operations [3].

After reading what you wrote below, I realized that the API you're 
thinking of modifying is the one used by liburcu for user programs.  
It's a shame you didn't mention this in either the subject line or the 
first few paragraphs of the email; that would have made understanding 
the message a little easier.

In any case, your proposal seems reasonable to me at first glance, with 
two possible exceptions:

1.  I can see why you have special fences for before/after load, 
store, and rmw operations.  But why clear?  In what way is 
clearing an atomic variable different from storing a 0 in it?

2.  You don't have a special fence for use after initializing an 
atomic.  This operation can be treated specially, because at the 
point where an atomic is initialized, it generally has not yet 
been made visible to any other threads.  Therefore the fence 
which would normally appear after a store (or clear) generally 
need not appear after an initialization, and you might want to 
add a special API to force the generation of such a fence.

Alan Stern


[Bug target/110533] [x86-64] naked with -O0 and register-passed struct/int128 clobbers parameters/callee-saved regs

2023-07-03 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110533

--- Comment #1 from Andrew Pinski  ---
>clobbering other parameters and callee-saved registers.


(insn 2 8 3 2 (set (reg:DI 84)
(reg:DI 5 di [ aD.2522 ])) "/app/example.cpp":3:25 -1
 (nil))
(insn 3 2 4 2 (set (reg:DI 85)
(reg:DI 4 si [ aD.2522+8 ])) "/app/example.cpp":3:25 -1
 (nil))
(insn 4 3 5 2 (set (reg:TI 83)
(subreg:TI (reg:DI 84) 0)) "/app/example.cpp":3:25 -1
 (nil))
(insn 5 4 6 2 (set (subreg:DI (reg:TI 83) 8)
(reg:DI 85)) "/app/example.cpp":3:25 -1
 (nil))
(insn 6 5 7 2 (set (reg/v:TI 82 [ aD.2522 ])
(reg:TI 83)) "/app/example.cpp":3:25 -1
 (nil))

[Bug c++/110535] Internal error when performing a surrogate call with unsatisfied constraints

2023-07-03 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110535

Andrew Pinski  changed:

   What|Removed |Added

   Last reconfirmed||2023-07-03
  Known to fail||8.1.0, 9.3.0
   Keywords||ice-on-invalid-code
 Ever confirmed|0   |1
 Status|UNCONFIRMED |NEW

--- Comment #1 from Andrew Pinski  ---
Confirmed.

In GCC 7, GCC didn't crash (with -std=c++17 -fconcepts) but gave a wrong error
message:
:11:24: error: no match for call to '(A) (int)'
int j = A{}(0); // Crash
^
:11:24: note: candidate: int (*)(int) 
:11:24: note:   candidate expects 1 argument, 2 provided

The crash started in GCC 8.

[Bug tree-optimization/110536] Bogus -Wstringop-overflow warning in std::transform

2023-07-03 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110536

Andrew Pinski  changed:

   What|Removed |Added

   See Also||https://gcc.gnu.org/bugzill
   ||a/show_bug.cgi?id=106757

--- Comment #1 from Andrew Pinski  ---
Most likely a dup of bug 106757.

Expert Engagement

2023-07-03 Thread Richard Nardi via Gcc


Hello,
I hope you are having a wonderful day. I would like to engage your firm to 
prepare my tax return for the current tax year. Prior to this year, my wife had 
always been in charge of our tax returns. However, our financial situation has 
changed, and she has also taken on additional responsibilities at work. 
Currently, I have concluded that having a professional prepare my tax return 
would be the most beneficial option for me. I can say for sure that we are 
fairly organized with our tax documentation. Besides our employment income, we 
also earn income from rental properties (Airbnb), stock options, dividends, and 
interest. While I understand that it is a busy time of year for tax 
professionals, I would appreciate your consideration of my request. I can send 
you my most recent tax documents and we can jump on a call with your quote, 
reasonable I hope, and any further questions you might have.

Looking forward to your response. Happy 4th of July!!

Richard Nardi

Senior Managing Director, Investments

NNN Properties, LLC
275 Madison Avenue, 13th Floor
New York, NY 10016

Website: 

www.nnnpro.com/our-team/ http://www.nnnpro.com/our-team/

Office: (332) 345-3212

License: 10401296108


[RFC] Bridging the gap between the Linux Kernel Memory Consistency Model (LKMM) and C11/C++11 atomics

2023-07-03 Thread Olivier Dion via Gcc
Hi all,

This is a request for comments on extending the atomic builtins API to
help avoiding redundant memory barriers.  Indeed, there are
discrepancies between the Linux kernel consistency memory model (LKMM)
and the C11/C++11 memory consistency model [0].  For example,
fully-ordered atomic operations like xchg and cmpxchg success in LKMM
have implicit memory barriers before/after the operations [1-2], while
atomic operations using the __ATOMIC_SEQ_CST memory order in C11/C++11
do not have any ordering guarantees of an atomic thread fence
__ATOMIC_SEQ_CST with respect to other non-SEQ_CST operations [3].

For a little bit of context here, we are porting liburcu [4] to atomic
builtins.  Before that, liburcu was using its own implementation for
atomic operations and its CMM memory consistency model was mimicking the
LKMM.  liburcu is now extending its CMM memory consistency model to
become close to the C11/C++11 memory consistency model, with the
exception of the extra SEQ_CST_FENCE memory order that is similar to
SEQ_CST, but ensure that a thread fence is emitted.  This is necessary
for backward compatibility of the liburcu uatomic API, but also for
closing the gap between the LKMM and the C11/C+11 memory consistency
model.  For example, to make Read-Modify-Write (RMW) operations match
the Linux kernel "full barrier before/after" semantics, the liburcu's
uatomic API has to emit both a SEQ_CST RMW operation and a subsequent
thread fence SEQ_CST, which leads to duplicated barriers in some cases.

Consider for example the following Dekker and the resulting assemblers
generated:

  int x = 0;
  int y = 0;
  int r0, r1;

  int dummy;

  void t0(void)
  {
  __atomic_store_n(, 1, __ATOMIC_RELAXED);

  __atomic_exchange_n(, 1, __ATOMIC_SEQ_CST);
  __atomic_thread_fence(__ATOMIC_SEQ_CST);

  r0 = __atomic_load_n(, __ATOMIC_RELAXED);
  }

  void t1(void)
  {
  __atomic_store_n(, 1, __ATOMIC_RELAXED);
  __atomic_thread_fence(__ATOMIC_SEQ_CST);
  r1 = __atomic_load_n(, __ATOMIC_RELAXED);
  }

  // BUG_ON(r0 == 0 && r1 == 0)

On x86-64 (gcc 13.1 -O2) we get:

  t0():
  movl$1, x(%rip)
  movl$1, %eax
  xchgl   dummy(%rip), %eax
  lock orq $0, (%rsp)   ;; Redundant with previous exchange.
  movly(%rip), %eax
  movl%eax, r0(%rip)
  ret
  t1():
  movl$1, y(%rip)
  lock orq $0, (%rsp)
  movlx(%rip), %eax
  movl%eax, r1(%rip)
  ret

On x86-64 (clang 16 -O2) we get:

  t0():
  movl$1, x(%rip)
  movl$1, %eax
  xchgl   %eax, dummy(%rip)
  mfence;; Redundant with previous exchange.
  movly(%rip), %eax
  movl%eax, r0(%rip)
  retq
  t1():
  movl$1, y(%rip)
  mfence
  movlx(%rip), %eax
  movl%eax, r1(%rip)
  retq

On armv8-a (gcc 13.1 -O2) we get:

  t0():
  adrpx0, .LANCHOR0
  mov w1, 1
  add x0, x0, :lo12:.LANCHOR0
  str w1, [x0]
  add x1, x0, 4
  mov w2, 1
  .L3:
  ldaxr   w3, [x1]
  stlxr   w4, w2, [x1]
  cbnzw4, .L3
  dmb ish   ;; Okay!
  add x1, x0, 8
  ldr w1, [x1]
  str w1, [x0, 12]
  ret
  t1():
  adrpx0, .LANCHOR0
  add x0, x0, :lo12:.LANCHOR0
  add x1, x0, 8
  mov w2, 1
  str w2, [x1]
  dmb ish
  ldr w1, [x0]
  str w1, [x0, 16]
  ret

On armv8.1-a (gcc 13.1 -O2) we get:

  t0():
  adrpx0, .LANCHOR0
  mov w1, 1
  add x0, x0, :lo12:.LANCHOR0
  str w1, [x0]
  add x2, x0, 4
  swpal   w1, w1, [x2]
  dmb ish   ;; Okay!
  add x1, x0, 8
  ldr w1, [x1]
  str w1, [x0, 12]
  ret
  t1():
  adrpx0, .LANCHOR0
  add x0, x0, :lo12:.LANCHOR0
  add x1, x0, 8
  mov w2,p 1
  str w2, [x1]
  dmb ish
  ldr w1, [x0]
  str w1, [x0, 16]
  ret

For the initial transition to the atomic builtins in liburcu, we plan on
emitting memory barriers to ensure correctness at the expense of
performance.  However, new primitives in the atomic builtins API would
help avoiding the redundant thread fences.

Indeed, eliminating redundant memory fences is often done in the Linux
kernel.  For example in kernel/sched/core.c:try_to_wake_up():

  /*
   * smp_mb__after_spinlock() provides the equivalent of a full memory
   * barrier between program-order earlier lock acquisitions and
   * program-order later memory accesses.
   * ...
   * Since most load-store architectures implement ACQUIRE with an
   * smp_mb() after the LL/SC loop, 

[pushed] testsuite, Darwin: Remove an unnecessary flags addition.

2023-07-03 Thread Iain Sandoe via Gcc-patches
This has been in use for some time in the Darwin branches that are used
by downstream distributions. Re-tested on x86_64-darwin, pushed to trunk,
thanks,
Iain

--- 8< ---

The addition of the multiply_defined suppress flag has been handled for some
considerable time now in the Darwin specs; remove it from the testsuite libs.
Avoid duplicates in the specs.

Signed-off-by: Iain Sandoe 

gcc/ChangeLog:

* config/darwin.h: Avoid duplicate multiply_defined specs on
earlier Darwin versions with shared libgcc.

libstdc++-v3/ChangeLog:

* testsuite/lib/libstdc++.exp: Remove additional flag handled
by Darwin specs.

gcc/testsuite/ChangeLog:

* lib/g++.exp: Remove additional flag handled by Darwin specs.
* lib/obj-c++.exp: Likewise.
---
 gcc/config/darwin.h  | 5 ++---
 gcc/testsuite/lib/g++.exp| 4 
 gcc/testsuite/lib/obj-c++.exp| 4 
 libstdc++-v3/testsuite/lib/libstdc++.exp | 3 ---
 4 files changed, 2 insertions(+), 14 deletions(-)

diff --git a/gcc/config/darwin.h b/gcc/config/darwin.h
index e6f76e598e6..714d3d5cc0d 100644
--- a/gcc/config/darwin.h
+++ b/gcc/config/darwin.h
@@ -217,8 +217,7 @@ extern GTY(()) int darwin_ms_struct;
   "%{image_base*:-Xlinker -image_base -Xlinker %*} %= 10.7 mmacosx-version-min= -no_pie) }"
 
 #define DARWIN_CC1_SPEC
\
-  "%

[Bug driver/93019] memory leak in gcc -O2 reported by Valgrind

2023-07-03 Thread costas.argyris at gmail dot com via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=93019

--- Comment #6 from Costas Argyris  ---
Part of this may be because the driver::finalize function introduced here:

https://gcc.gnu.org/git/?p=gcc.git;a=commitdiff;h=9376dd63e6a2d94823f6faf8212c9f37bef5a656

is not called from main:

 int
 main (int argc, char **argv)
 {
-  driver d;
+  driver d (false, /* can_finalize */
+   false); /* debug */

   return d.main (argc, argv);
 }

It only gets called from the jit code it was designed to serve:

+void
+playback::context::
+invoke_embedded_driver (const vec  *argvec)
+{
+  JIT_LOG_SCOPE (get_logger ());
+  driver d (true, /* can_finalize */
+   false); /* debug */
+  int result = d.main (argvec->length (),
+  const_cast  (argvec->address ()));
+  d.finalize ();
+  if (result)
+add_error (NULL, "error invoking gcc driver");
+}

What is confusing to me though is that the can_finalize argument to the driver
constructor really seems to be referring only to the environment manager
env_manager component, not the entire driver.driver::finalize does a lot
more than just call env.restore (), including freeing allocated memory.

I don't see why the inability to call env_manager::restore () should block
anyone from calling driver::finalize (), as the latter starts from env.restore
() but does a lot more later.In other words, what does environment variable
management have to do with memory management and why is it allowed to block it?
   Can't these two be done independently?

Simply adding a call to driver::finalize in main:

 int
 main (int argc, char **argv)
 {
-  driver d (false, /* can_finalize */
+  driver d (true, /* can_finalize */
false); /* debug */

-  return d.main (argc, argv);
+  int result = d.main (argc, argv);
+  d.finalize ();
+  return result;
 }


decreases the number of valgrind loss records from 95 to 62.

If can_finalize must stay false in main for whatever reason, then
driver::finalize could simply check if it can call env.restore such that it is
always possible to call driver::finalize regardless of what was passed for
can_finalize, and leave only env_manager::restore depend on that argument, as
it seems to be the only thing that is really relying on it (that would only
require an extra public method on env_manager to get the can_finalize value
that was passed - it is called m_can_restore in the class).

[Bug c++/110536] New: Bogus -Wstringop-overflow warning in std::transform

2023-07-03 Thread eric.niebler at gmail dot com via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110536

Bug ID: 110536
   Summary: Bogus -Wstringop-overflow warning in std::transform
   Product: gcc
   Version: 14.0
Status: UNCONFIRMED
  Severity: normal
  Priority: P3
 Component: c++
  Assignee: unassigned at gcc dot gnu.org
  Reporter: eric.niebler at gmail dot com
  Target Milestone: ---

Compile the following with -O3 -std=c++17 -Wall

<<<<<
#include 
#include 

template 
std::vector
make_type_param_vector(std::initializer_list const& init_list) {
  // std::vector input{init_list}; //uncomment to remove warning
  std::vector vec(init_list.size());
  std::transform(std::cbegin(init_list), std::cend(init_list), std::begin(vec),
[](auto const& e) {
if constexpr (std::is_unsigned_v) { return
static_cast(std::abs(e)); }
return static_cast(e);
  });
  return vec;
}

template 
void validate_A() {
  auto const input_column_valid_a = make_type_param_vector({1, 0});
  auto const input_column_valid_b = make_type_param_vector({0, 0});
  auto const input_column_valid_c = make_type_param_vector({15, 16});
}

int main() {
  validate_A();
  validate_A();
  validate_A();
  validate_A();
  validate_A();
  validate_A();
  validate_A();
  validate_A();
  validate_A();
}

<<<<<:1:
In function '_OIter std::transform(_IIter, _IIter, _OIter, _UnaryOperation)
[with _IIter = const int*; _OIter = __gnu_cxx::__normal_iterator > >; _UnaryOperation =
make_type_param_vector(const
std::initializer_list&)::]',
inlined from 'std::vector make_type_param_vector(const
std::initializer_list&) [with TypeParam = unsigned char; T = int]' at
:10:17:
/opt/compiler-explorer/gcc-trunk-20230703/include/c++/14.0.0/bits/stl_algo.h:4216:19:
warning: writing 1 byte into a region of size 0 [-Wstringop-overflow=]
 4216 | *__result = __unary_op(*__first);
  | ~~^~


Demo:
https://godbolt.org/z/PKqfjr9cb

Re: wishlist: support for shorter pointers

2023-07-03 Thread Rafał Pietrak via Gcc




W dniu 3.07.2023 o 18:57, Richard Earnshaw (lists) pisze:

On 03/07/2023 17:42, Rafał Pietrak via Gcc wrote:

Hi Ian,

[-]
And WiKi reporting up to 40% performance improvements in some corner 
cases is impressive and encouraging. I believe, that the reported 
average of 5-8% improvement would be significantly better within MCU 
tiny resources environment. In MCU world, such improvement could mean 
fit-nofit of a project into a particular device.


-R


I think you need to be very careful when reading benchmarketing (sic) 
numbers like this.  Firstly, this is a 32-bit vs 64-bit measurement; 
secondly, the benchmark (spec 2000) is very old now and IIRC was not 
fully optimized for 64-bit processors (it predates the 64-bit version of 
the x86 instruction set); thirdly, there are benchmarks in SPEC which 
are very sensitive to cache size and the 32-bit ABI just happened to 
allow them to fit enough data in the caches to make the numbers leap.


Yes. Sure. I am. I thought I've expressed it clearly, that the 
"fantastic 40%" I regard as just "corner case" - those don't usually 
reflect ordinary usage.


I was only highlighting the fact, that mare 5-8% improvement can result 
on fit-nofit of a particular design into a particular device ... in 
consequence requiring to use 4k-RAM device instead of 2k-RAM one.


Tiny improvements of performance of x64 workhorses can become relatively 
huge in micros like stm32. That's all.


-R


Re: wishlist: support for shorter pointers

2023-07-03 Thread Richard Earnshaw (lists) via Gcc

On 03/07/2023 17:42, Rafał Pietrak via Gcc wrote:

Hi Ian,

W dniu 3.07.2023 o 17:07, Ian Lance Taylor pisze:
On Wed, Jun 28, 2023 at 11:21 PM Rafał Pietrak via Gcc 
 wrote:

[]

I was thinking about that, and it doesn't look as requiring that deep
rewrites. ABI spec, that  could accomodate the functionality could be as
little as one additional attribute to linker segments.


If I understand correctly, you are looking for something like the x32
mode that was available for a while on x86_64 processors:
https://en.wikipedia.org/wiki/X32_ABI .  That was a substantial amount
of work including changes to the compiler, assembler, linker, standard
library, and kernel.  And at least to me it's never seemed
particularly popular.


Yes.

And WiKi reporting up to 40% performance improvements in some corner 
cases is impressive and encouraging. I believe, that the reported 
average of 5-8% improvement would be significantly better within MCU 
tiny resources environment. In MCU world, such improvement could mean 
fit-nofit of a project into a particular device.


-R


I think you need to be very careful when reading benchmarketing (sic) 
numbers like this.  Firstly, this is a 32-bit vs 64-bit measurement; 
secondly, the benchmark (spec 2000) is very old now and IIRC was not 
fully optimized for 64-bit processors (it predates the 64-bit version of 
the x86 instruction set); thirdly, there are benchmarks in SPEC which 
are very sensitive to cache size and the 32-bit ABI just happened to 
allow them to fit enough data in the caches to make the numbers leap.


R.


[Bug target/106895] powerpc64 unable to specify even/odd register pairs in extended inline asm

2023-07-03 Thread schwab--- via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=106895

--- Comment #7 from Andreas Schwab  ---
You are probably looking for a constraint that mirrors the quad_int_reg_operand
predicate.

[PATCH] vect: Treat vector widening IFN calls as 'simple' [PR110436]

2023-07-03 Thread Andre Vieira (lists) via Gcc-patches

Hi,

This patch makes the vectorizer treat any vector widening IFN as simple, 
like

it did with the tree codes VEC_WIDEN_*.

I wasn't sure whether I should make all IFN's simple and then exclude 
some (like GOMP_ ones), or include more than just the new widening IFNs. 
But since this is the only behaviour that changed with the ifn patch, I 
decided to only special case the widening IFNs for now. Let me know if 
you have different thoughts on this.


Bootstrapped and regression tested on aarch64-unknow-linux-gnu.

gcc/ChangeLog:

PR tree-optimization/110436
* tree-vect-stmts.cc (is_simple_and_all_uses_invariant): Treat widening
IFN's as simple.

gcc/testsuite/ChangeLog:

* gcc.dg/pr110436.c: New test.diff --git a/gcc/testsuite/gcc.dg/pr110436.c b/gcc/testsuite/gcc.dg/pr110436.c
new file mode 100644
index 
..c146f99fac9f0524eaa3b1230b56e9f94eed5bda
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr110436.c
@@ -0,0 +1,5 @@
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+
+#include "pr83089.c"
+
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index 
d642d3c257f8d540a8562eedbcd40372b9550959..706055e9af94f0c1500c25faf4bd74fc08bf3cd6
 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -296,8 +296,11 @@ is_simple_and_all_uses_invariant (stmt_vec_info stmt_info,
   tree op;
   ssa_op_iter iter;
 
-  gassign *stmt = dyn_cast  (stmt_info->stmt);
-  if (!stmt)
+  gimple *stmt = stmt_info->stmt;
+  if (!is_gimple_assign (stmt)
+  && !(is_gimple_call (stmt)
+  && gimple_call_internal_p (stmt)
+  && widening_fn_p (gimple_call_combined_fn (stmt
 return false;
 
   FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)


Re: wishlist: support for shorter pointers

2023-07-03 Thread Rafał Pietrak via Gcc

Hi Ian,

W dniu 3.07.2023 o 17:07, Ian Lance Taylor pisze:

On Wed, Jun 28, 2023 at 11:21 PM Rafał Pietrak via Gcc  wrote:

[]

I was thinking about that, and it doesn't look as requiring that deep
rewrites. ABI spec, that  could accomodate the functionality could be as
little as one additional attribute to linker segments.


If I understand correctly, you are looking for something like the x32
mode that was available for a while on x86_64 processors:
https://en.wikipedia.org/wiki/X32_ABI .  That was a substantial amount
of work including changes to the compiler, assembler, linker, standard
library, and kernel.  And at least to me it's never seemed
particularly popular.


Yes.

And WiKi reporting up to 40% performance improvements in some corner 
cases is impressive and encouraging. I believe, that the reported 
average of 5-8% improvement would be significantly better within MCU 
tiny resources environment. In MCU world, such improvement could mean 
fit-nofit of a project into a particular device.


-R


[Bug c++/110535] New: Internal error when performing a surrogate call with unsatisfied constraints

2023-07-03 Thread corentinjabot at gmail dot com via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110535

Bug ID: 110535
   Summary: Internal error when performing a surrogate call with
unsatisfied constraints
   Product: gcc
   Version: 14.0
Status: UNCONFIRMED
  Severity: normal
  Priority: P3
 Component: c++
  Assignee: unassigned at gcc dot gnu.org
  Reporter: corentinjabot at gmail dot com
  Target Milestone: ---

Consider 

   int f1(int);

   template 
   struct A {
using F = int(int);
operator F*() requires N { return f1; }
   };

   int i = A{}(0);  // Fine
   int j = A{}(0); // Crash


This affects tip of trunk on CE https://godbolt.org/z/7vahavxv6

Re: wishlist: support for shorter pointers

2023-07-03 Thread Rafał Pietrak via Gcc

Hi David,

W dniu 3.07.2023 o 16:52, David Brown pisze:
[]


But, before I dive into learning C++ (forgive the naive question) 
isn't it so, that C++ comes with a heavy runtime? One that will bloat 
my tiny project? Or the bloat comes only when one uses particular 
elaborated class/inheritance scenarios, and this particular case ( for 
(...; ...; x = x->next) {} ) will not draw any of that into this project?





Let me make a few points (in no particular order) :

1. For some RISC targets, such as PowerPC, it is common to have a 
section of memory called the "small data section".  One of the registers 
is dedicated as an anchor to this section, and data within it is 
addressed as Rx + 16-bit offset.  But this is primarily for data at 
fixed (statically allocated) addresses, since reads and writes using 
this address mode are smaller and faster than full 32-bit addresses. 
Normal pointers are still 32-bit.  It also requires a dedicated register 
- not a big cost when you have 31 GPRs, but much more costly when you 
have only 13.


I don't have any experience with PowerPC, all you say here is new to me. 
And PPC architecture today is "kind of exotic", but I appreciate the 
info and I may look it up for insight how "short pointers" influence 
performance. Thenx.


2. C++ is only costly if you use costly features.  On small embedded 
systems, you want "-fno-exceptions -fno-rtti", and you will get as good 
(or bad!) results for C++ as for C.  Many standard library features 
will, however, result in a great deal of code - it is usually fairly 
obvious which classes and functions are appropriate.


OK. I become aware, that I will no longer be able to turn a blind eye on 
C++. :(




3. In C, you could make a type such as :

 typedef struct {
     uint16_t p;
 } small_pointer_t;

and conversion functions :

 static const uintptr_t ram_base = 0x2000;

 static inline void * sp_to_voidp(small_pointer_t sp) {
     return (void *)(ram_base + sp);
 }

 static inline small_pointer_t voidp_to_sp(void * p) {
     small_pointer_t sp;
     sp.p = (uintptr_t) p - ram_base;
     return sp;
 }

Then you would use these access functions to turn your "small pointers" 
into normal pointers.  The source code would become significantly harder 
to read and write, and less type-safe, but could be quite efficient.


That actually is a problem. I really can make a lot of the code in 
question into an assembler, and have it behave precisely as I desire, 
but that'll make the project not portable - that's why I though of 
casting the use case onto this list here. This way (I hoped) it may 
inspire "the world" and have it supported at compiler level some time in 
the future. Should it not be the case, I'd rather stay with "plain C" 
and keep the code portable and readable (rather then obfuscate it  ... 
even by merely too "talkative sources").


[]
to ram and to peripheral groups.  This mailing list is not really the 
place to work through an implementation of such class templates - but it 
certainly could be done.


OK. I fully agree.

FYI: it was never my intention to inquire for advice of how to cook such 
"short/funny" pointers by special constructs / technic in c-programming. 
Actually I was a little set back reading such advice as first responses 
to my email. It was nice, but surprising.


I hoped to get a discussion more towards "how to let compiler know", 
that a particular segment/section of a program-data will be emitted into 
an executable in a "constraint output section", so that compiler could 
"automagicly" know, that using "short" pointers for that data would 
suffice, and in consequence would generate such instructions without 
any change to the source code.


It's sort of obvious, that this would also require support from libc 
(like a specific "malloc()" and friends), but application sources could 
stay untouched, and that's IMHO key point here.


4. It is worth taking a step back, and thinking about how you would like 
to use these pointers.  It is likely that you would be better thinking 
in terms of an array, rather than pointers - after all, you don't want 
to be using dynamically allocated memory here if you can avoid it, and 
certainly not generic malloc().  If you can use an array, then your 
index type can be as small as you like - maybe uint8_t is enough.


I did that trip ... some time ago. May be I discarded the idea 
prematurely, but I dropped it because I was afraid of cost of 
multiplication (index calculation) in micros. That my "assumption" may 
actually not be true, since today even the mini-minis often have integer 
multiplication units, so my reasoning became false.


But. Even if I turn pointers into indices for tiny micros ... that'd 
make the code not portable. I'm not to eager to do that.


Still, thank you very much for sharing those concepts.

With best regards,

-R


[Bug middle-end/24639] [meta-bug] bug to track all Wuninitialized issues

2023-07-03 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=24639
Bug 24639 depends on bug 110534, which changed state.

Bug 110534 Summary: confusing -Wuninitialized when strict aliasing is violated
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110534

   What|Removed |Added

 Status|UNCONFIRMED |RESOLVED
 Resolution|--- |DUPLICATE

[Bug middle-end/99768] Unhelpful -Wuninitialized diagnostic with type punning

2023-07-03 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=99768

Andrew Pinski  changed:

   What|Removed |Added

 CC||vanyacpp at gmail dot com

--- Comment #4 from Andrew Pinski  ---
*** Bug 110534 has been marked as a duplicate of this bug. ***

[Bug middle-end/110534] confusing -Wuninitialized when strict aliasing is violated

2023-07-03 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110534

Andrew Pinski  changed:

   What|Removed |Added

 Status|UNCONFIRMED |RESOLVED
 Resolution|--- |DUPLICATE

--- Comment #2 from Andrew Pinski  ---
Oh and there is already a bug recording this, PR 99768 (still open too).

*** This bug has been marked as a duplicate of bug 99768 ***

[PATCH v2] libstdc++: PSTL dispatch for C++20 range random access iterators [PR110512]

2023-07-03 Thread Gonzalo Brito Gadeschi via Gcc-patches
libstdc++: Recognize C++ random access iterators as random access in PSTL
[PR110432]

The check for random access iterators in the PSTL only checks whether the
iterator inherits from the random_access_iterator_tag, failing to recognize
random access iterators originating in C++20 ranges and views.

This patch extends the check to also recognize types that model the C++20
random_access_iterator concept as providing random access.

This is allowed by C++23's P2408, which is safe to backport to C++20,
because
any application that would break already exhibits undefined
behavior due to precondition violation.

libstdc++-v3/ChangeLog:
PR libstdc++/110512
* include/pstl/execution_impl.h Recognize C++20 random access iterators as
random access.

Bootstrapping and testing
* Tested with x86_64-pc-linux-gnu.

---
 libstdc++-v3/include/pstl/execution_impl.h | 10 +-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/libstdc++-v3/include/pstl/execution_impl.h
b/libstdc++-v3/include/pstl/execution_impl.h
index 64f6cc4357a..c17da29141e 100644
--- a/libstdc++-v3/include/pstl/execution_impl.h
+++ b/libstdc++-v3/include/pstl/execution_impl.h
@@ -22,7 +22,15 @@ namespace __internal

 template 
 using __are_iterators_of = std::conjunction<
-std::is_base_of<_IteratorTag, typename
std::iterator_traits>::iterator_category>...>;
+#if __cplusplus >= 202002L
+std::disjunction<
+std::is_base_of<_IteratorTag, typename
std::iterator_traits>::iterator_category>,
+std::integral_constant>
+>...
+#else   // __cplusplus
+std::is_base_of<_IteratorTag, typename
std::iterator_traits>::iterator_category>...
+#endif  // __cplusplus
+>;

 template 
 using __are_random_access_iterators =
__are_iterators_of;
-- 
2.17.1


[Bug middle-end/110534] confusing -Wuninitialized when strict aliasing is violated

2023-07-03 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110534

--- Comment #1 from Andrew Pinski  ---
There are different levels of -Wstrict-aliasing and iirc level 3 will warn.

Note I don't think the uninitialized warning is a bad thing here. Because it
does point out gcc is thinking it is uninitialized due to the alias violation.

Re: [PATCH] rs6000: Update the vsx-vector-6.* tests.

2023-07-03 Thread Carl Love via Gcc-patches
Kewen:

On Fri, 2023-06-30 at 15:20 -0700, Carl Love wrote:
> Segher never liked the above way of looking at the assembly.  He
> prefers:
>   gcc -S -g -mcpu=power8 -o vsx-vector-6-func-2lop.s vsx-vector-6-
> func-
> 2lop.c
> 
>   grep xxlor vsx-vector-6-func-2lop.s | wc
>  34  68 516
> 
> So, again, I get the same count of 34 on both makalu and genoa.  But
> again, that doesn't agree with what make script/scan-assembler thinks
> the counts should be.
> 
> When I looked at the vsx-vector-6-func-2lop.s I see on BE:
> 
>  
> lxvd2x 0,10,9
> xxlor 0,12,0
> xxlnor 0,0,0
>  ...
> 
> I was guessing that it was adjusting the data layout from the load. 
> But looking again more carefully versus LE:
> 
> 
> lxvd2x 0,31,9 
>xxpermdi 0,0,0,2 
>xxlor 0,12,0  
>xxlnor 0,0,0  
>xxpermdi 0,0,0,2 
> 
> 
> the xxpermdi is probably what is really doing the data layout change.
> 
> So, we have the issue that looking at the assembly gives different
> instruction counts then what 
> 
>dg-final { scan-assembler-times {\mxxlor\M} }
> 
> comes up with???  Now I am really confused.  I don't know how the
> scan-
> assembler-times works but I will go see if I can find it and see if I
> can figure out what the issue is.  I would expect that the scan-
> assembler is working off the --save-temp files, which get deleted as
> part of the run.  I would guess that scan-assembler does a grep to
> find
> the instructions and then maybe uses wc to count them??? I will go
> see
> if I can figure out how scan-assembler-times works.

OK, I figured out why I was getting 34 xxlor instructions instead of
the 22 that the scan-assembler-times was getting.  The difference was
when I compiled the program I forgot to use -O2.  So with -O2 I get the
same number of xxlor instructins as scan-assembler-instructions.  I get
34 if I do not specify optimization.

So, I think the scan-assembler-times are all correct.

As Peter says, counting xxlor is a bit problematic in general.  We
could just drop counting xxlor or have the LE/BE count qualifier for
the instructions.  Your call.

 Carl 



[Bug target/106895] powerpc64 unable to specify even/odd register pairs in extended inline asm

2023-07-03 Thread bergner at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=106895

--- Comment #6 from Peter Bergner  ---
(In reply to Segher Boessenkool from comment #5)
> Constraints are completely the wrong tool for this.  Just use modes, which
> *are* the right tool?

Well you cannot specify modes in the asm, so I think you're saying we need use
the correct type that maps to a internal to GCC mode that has the even/odd
register behavior, so something like:

  unsigned int foo __attribute__ ((mode (XX)));

...where XXmode is the new integer mode that gives us even/odd register pairs? 
Of course we have to be careful about how this all works wrt -m32 versus -m64.

Re: wishlist: support for shorter pointers

2023-07-03 Thread Ian Lance Taylor via Gcc
On Wed, Jun 28, 2023 at 11:21 PM Rafał Pietrak via Gcc  wrote:
>
> W dniu 28.06.2023 o 17:44, Richard Earnshaw (lists) pisze:
> [---]
> > I think I understand what you're asking for but:
> > 1) You'd need a new ABI specification to handle this, probably involving
> > register assignments (for the 'segment' addresses), the initialization
> > of those at startup, assembler and linker extensions to allow for
> > relocations describing the symbols, etc.
>
> I was thinking about that, and it doesn't look as requiring that deep
> rewrites. ABI spec, that  could accomodate the functionality could be as
> little as one additional attribute to linker segments.

If I understand correctly, you are looking for something like the x32
mode that was available for a while on x86_64 processors:
https://en.wikipedia.org/wiki/X32_ABI .  That was a substantial amount
of work including changes to the compiler, assembler, linker, standard
library, and kernel.  And at least to me it's never seemed
particularly popular.

Ian


RE: [PATCH v1] RISC-V: Fix one typo of FRM dynamic definition

2023-07-03 Thread Li, Pan2 via Gcc-patches
Sure, every change need test and will pay attention for this in future.

Pan

-Original Message-
From: Robin Dapp  
Sent: Monday, July 3, 2023 10:57 PM
To: Li, Pan2 ; juzhe.zh...@rivai.ai; gcc-patches 

Cc: rdapp@gmail.com; jeffreyalaw ; Wang, Yanzhang 
; kito.cheng 
Subject: Re: [PATCH v1] RISC-V: Fix one typo of FRM dynamic definition

> Sorry for inconvenient, still working on fix it. If urgent I can
> revert this change to unblock your work ASAP.

I'm not blocked by this, thanks, just wanted to document it here.
I was testing another patch and needed to dig for a while until
I realized the FAILs come from this one.  In general I would
assume that even obvious patches are tested before (I have
introduced bugs by obvious ones before so I make sure to).

Regards
 Robin


Re: [PATCH v1] RISC-V: Fix one typo of FRM dynamic definition

2023-07-03 Thread Robin Dapp via Gcc-patches
> Sorry for inconvenient, still working on fix it. If urgent I can
> revert this change to unblock your work ASAP.

I'm not blocked by this, thanks, just wanted to document it here.
I was testing another patch and needed to dig for a while until
I realized the FAILs come from this one.  In general I would
assume that even obvious patches are tested before (I have
introduced bugs by obvious ones before so I make sure to).

Regards
 Robin


RE: [PATCH v1] RISC-V: Fix one typo of FRM dynamic definition

2023-07-03 Thread Li, Pan2 via Gcc-patches
Sorry for inconvenient, still working on fix it. If urgent I can revert this 
change to unblock your work ASAP.

Pan

-Original Message-
From: Robin Dapp  
Sent: Monday, July 3, 2023 10:49 PM
To: Li, Pan2 ; juzhe.zh...@rivai.ai; gcc-patches 

Cc: rdapp@gmail.com; jeffreyalaw ; Wang, Yanzhang 
; kito.cheng 
Subject: Re: [PATCH v1] RISC-V: Fix one typo of FRM dynamic definition

Hmm, looks like it wasn't simple enough...

I'm seeing execution fails for various floating point test cases.
This is due to a mismatch between the FRM_DYN definition (0b111 == 7)
and the attribute value (== 5).  Therefore we set the rounding mode
to 5 instead of 7.

Regards
 Robin



Re: wishlist: support for shorter pointers

2023-07-03 Thread David Brown via Gcc

On 28/06/2023 10:35, Rafał Pietrak via Gcc wrote:

Hi Jonathan,

W dniu 28.06.2023 o 09:31, Jonathan Wakely pisze:




If you use a C++ library type for your pointers the syntax above 
doesn't need to change, and the fancy pointer type can be implemented 
portable, with customisation for targets where you could use 16 bits 
for the pointers.


As you can expect from the problem I've stated - I don't know C++, so 
I'll need some more advice there.


But, before I dive into learning C++ (forgive the naive question) 
isn't it so, that C++ comes with a heavy runtime? One that will bloat my 
tiny project? Or the bloat comes only when one uses particular 
elaborated class/inheritance scenarios, and this particular case ( for 
(...; ...; x = x->next) {} ) will not draw any of that into this project?





Let me make a few points (in no particular order) :

1. For some RISC targets, such as PowerPC, it is common to have a 
section of memory called the "small data section".  One of the registers 
is dedicated as an anchor to this section, and data within it is 
addressed as Rx + 16-bit offset.  But this is primarily for data at 
fixed (statically allocated) addresses, since reads and writes using 
this address mode are smaller and faster than full 32-bit addresses. 
Normal pointers are still 32-bit.  It also requires a dedicated register 
- not a big cost when you have 31 GPRs, but much more costly when you 
have only 13.


2. C++ is only costly if you use costly features.  On small embedded 
systems, you want "-fno-exceptions -fno-rtti", and you will get as good 
(or bad!) results for C++ as for C.  Many standard library features 
will, however, result in a great deal of code - it is usually fairly 
obvious which classes and functions are appropriate.


3. In C, you could make a type such as :

typedef struct {
uint16_t p;
} small_pointer_t;

and conversion functions :

static const uintptr_t ram_base = 0x2000;

static inline void * sp_to_voidp(small_pointer_t sp) {
return (void *)(ram_base + sp);
}

static inline small_pointer_t voidp_to_sp(void * p) {
small_pointer_t sp;
sp.p = (uintptr_t) p - ram_base;
return sp;
}

Then you would use these access functions to turn your "small pointers" 
into normal pointers.  The source code would become significantly harder 
to read and write, and less type-safe, but could be quite efficient.


In C++, you'd use the same kinds of functions.  But they would now be 
methods in a class template, and tied to overloaded operators and/or 
conversion functions.  The result would be type-safe and let you 
continue to use a normal pointer-like syntax, and with equally efficient 
generated code.  You could also equally conveniently have small pointers 
to ram and to peripheral groups.  This mailing list is not really the 
place to work through an implementation of such class templates - but it 
certainly could be done.



4. It is worth taking a step back, and thinking about how you would like 
to use these pointers.  It is likely that you would be better thinking 
in terms of an array, rather than pointers - after all, you don't want 
to be using dynamically allocated memory here if you can avoid it, and 
certainly not generic malloc().  If you can use an array, then your 
index type can be as small as you like - maybe uint8_t is enough.



David





Re: [PATCH v1] RISC-V: Fix one typo of FRM dynamic definition

2023-07-03 Thread Robin Dapp via Gcc-patches
Hmm, looks like it wasn't simple enough...

I'm seeing execution fails for various floating point test cases.
This is due to a mismatch between the FRM_DYN definition (0b111 == 7)
and the attribute value (== 5).  Therefore we set the rounding mode
to 5 instead of 7.

Regards
 Robin



Re: gcc tricore porting

2023-07-03 Thread Richard Earnshaw (lists) via Gcc

On 03/07/2023 15:34, Joel Sherrill wrote:

On Mon, Jul 3, 2023, 4:33 AM Claudio Eterno 
wrote:


Hi Joel, I'll give an answer ASAP on the newlib and libgloss...
I supposed your question were about the licences question on newlib,
instead you were really asking what changed on the repo libs...



It was a bit of both. If they put the right licenses on the newlib and
libgloss ports, you should be able to use them and eventually submit them.
But GCC, binutils, and gdb would be gpl and require an assignment to the
FSF. That is all I meant.


It's not quite as restricted as that.  For GCC, I suggest reading 
https://gcc.gnu.org/contribute.html#legal for more details.


I think there are similar processes in place for binutils as well.  (I'm 
not quite so sure for GDB).


R.



An option here is to reach out to the authors and ask if they are willing
to do the FSF assignment. If they are, then any GPL licensed code from them
might be a baseline.

It looks like their current products may be based on LLVM.

--joel


C.



Il giorno dom 2 lug 2023 alle ore 19:53 Claudio Eterno <
eterno.clau...@gmail.com> ha scritto:


Hi Joel, can you give me more info regarding newlib or libgloss cases?
Unfortunately I'm a newbie on th9is world...
Thank you,
Claudio

Il giorno dom 2 lug 2023 alle ore 17:38 Joel Sherrill 
ha scritto:




On Sun, Jul 2, 2023, 3:29 AM Claudio Eterno 
wrote:


Hi, Joel and Mikael
taking a look at the code it seems that the repo owner is higtech
 but we have no confirmations.
In fact, after a comparison with gcc 9.4.0 original files i see this on
a lot of ("WITH_HIGHTEC") [intl.c]:
[image: image.png]
Probably this version of gcc is a basic version of their tricore-gcc
and probably works fine but that repo doesn't show any extra info.
Seems also impossible to contact the owner (that account doesn't show
any email or other info)..
Honestly with these conditions, from gcc development point of view,
that repo has no value.



Without an assignment, you can't submit that code. That's a blocker on
using it if there isn't one.

But you can file an issue against the repo asking questions.


Anyway this is a good starting point...




Maybe not if you can't submit it. Anything that needs to be GOL licensed
and owned by the FSF is off limits.

But areas with permissive licenses might be ok if they stuck with those.
Look at what they did with newlib and libgloss.

--joel



C.



Il giorno lun 19 giu 2023 alle ore 18:55 Joel Sherrill 
ha scritto:




On Mon, Jun 19, 2023, 10:36 AM Mikael Pettersson via Gcc <
gcc@gcc.gnu.org> wrote:


(Note I'm reading the gcc mailing list via the Web archives, which
doesn't let me
create "proper" replies. Oh well.)

On Sun Jun 18 09:58:56 GMT 2023,  wrote:

Hi, this is my first time with open source development. I worked in
automotive for 22 years and we (generally) were using tricore

series for

these products. GCC doesn't compile on that platform. I left my

work some

days ago and so I'll have some spare time in the next few months. I

would

like to know how difficult it is to port the tricore platform on

gcc and if

during this process somebody can support me as tutor and... also if

the gcc

team is interested in this item...


https://github.com/volumit has a port of gcc + binutils + newlib +
gdb
to Tricore,
and it's not _that_ ancient. I have no idea where it originates from
or how complete
it is, but I do know the gcc-4.9.4 based one builds with some tweaks.




https://github.com/volumit/package_494 says there is a port in

process to gcc 9. Perhaps digging in and assessing that would be a good
start.



One question is whether that code has proper assignments on file for
ultimate inclusion. That should be part of your assessment.

--joel





I don't know anything more about it, I'm just a collector of

cross-compilers for
obscure / lost / forgotten / abandoned targets.

/Mikael





--
Claudio Eterno
via colle dell'Assietta 17
10036 Settimo Torinese (TO)





--
Claudio Eterno
via colle dell'Assietta 17
10036 Settimo Torinese (TO)




--
Claudio Eterno
via colle dell'Assietta 17
10036 Settimo Torinese (TO)







[committed] tree+ggc: Change return type of predicate functions from int to bool

2023-07-03 Thread Uros Bizjak via Gcc-patches
Also change internal variable from int to bool.

gcc/ChangeLog:

* tree.h (tree_int_cst_equal): Change return type from int to bool.
(operand_equal_for_phi_arg_p): Ditto.
(tree_map_base_marked_p): Ditto.
* tree.cc (contains_placeholder_p): Update function body
for bool return type.
(type_cache_hasher::equal): Ditto.
(tree_map_base_hash): Change return type
from int to void and adjust function body accordingly.
(tree_int_cst_equal): Ditto.
(operand_equal_for_phi_arg_p): Ditto.
(get_narrower): Change "first" variable to bool.
(cl_option_hasher::equal): Update function body for bool return type.
* ggc.h (ggc_set_mark): Change return type from int to bool.
(ggc_marked_p): Ditto.
* ggc-page.cc (gt_ggc_mx): Change return type
from int to void and adjust function body accordingly.
(ggc_set_mark): Ditto.

Bootstrapped and regression tested on x86_64-linux-gnu {,-m32}.

Uros.
diff --git a/gcc/ggc-page.cc b/gcc/ggc-page.cc
index c25218d7415..2f0b72e1b22 100644
--- a/gcc/ggc-page.cc
+++ b/gcc/ggc-page.cc
@@ -1538,7 +1538,7 @@ gt_ggc_mx (unsigned char& x ATTRIBUTE_UNUSED)
P must have been allocated by the GC allocator; it mustn't point to
static objects, stack variables, or memory allocated with malloc.  */
 
-int
+bool
 ggc_set_mark (const void *p)
 {
   page_entry *entry;
@@ -1558,7 +1558,7 @@ ggc_set_mark (const void *p)
 
   /* If the bit was previously set, skip it.  */
   if (entry->in_use_p[word] & mask)
-return 1;
+return true;
 
   /* Otherwise set it, and decrement the free object count.  */
   entry->in_use_p[word] |= mask;
@@ -1567,14 +1567,14 @@ ggc_set_mark (const void *p)
   if (GGC_DEBUG_LEVEL >= 4)
 fprintf (G.debug_file, "Marking %p\n", p);
 
-  return 0;
+  return false;
 }
 
-/* Return 1 if P has been marked, zero otherwise.
+/* Return true if P has been marked, zero otherwise.
P must have been allocated by the GC allocator; it mustn't point to
static objects, stack variables, or memory allocated with malloc.  */
 
-int
+bool
 ggc_marked_p (const void *p)
 {
   page_entry *entry;
diff --git a/gcc/ggc.h b/gcc/ggc.h
index 78eab7eaba6..34108e2f006 100644
--- a/gcc/ggc.h
+++ b/gcc/ggc.h
@@ -90,15 +90,15 @@ extern const struct ggc_root_tab * const 
gt_pch_scalar_rtab[];
 
 /* Actually set the mark on a particular region of memory, but don't
follow pointers.  This function is called by ggc_mark_*.  It
-   returns zero if the object was not previously marked; nonzero if
+   returns false if the object was not previously marked; true if
the object was already marked, or if, for any other reason,
pointers in this data structure should not be traversed.  */
-extern int ggc_set_mark(const void *);
+extern bool ggc_set_mark (const void *);
 
-/* Return 1 if P has been marked, zero otherwise.
+/* Return true if P has been marked, zero otherwise.
P must have been allocated by the GC allocator; it mustn't point to
static objects, stack variables, or memory allocated with malloc.  */
-extern int ggc_marked_p(const void *);
+extern bool ggc_marked_p (const void *);
 
 /* PCH and GGC handling for strings, mostly trivial.  */
 extern void gt_pch_n_S (const void *);
diff --git a/gcc/tree.cc b/gcc/tree.cc
index 58288efa2e2..bd500ec72a5 100644
--- a/gcc/tree.cc
+++ b/gcc/tree.cc
@@ -2839,7 +2839,7 @@ grow_tree_vec (tree v, int len MEM_STAT_DECL)
   return v;
 }
 
-/* Return 1 if EXPR is the constant zero, whether it is integral, float or
+/* Return true if EXPR is the constant zero, whether it is integral, float or
fixed, and scalar, complex or vector.  */
 
 bool
@@ -2850,7 +2850,7 @@ zerop (const_tree expr)
  || fixed_zerop (expr));
 }
 
-/* Return 1 if EXPR is the integer constant zero or a complex constant
+/* Return true if EXPR is the integer constant zero or a complex constant
of zero, or a location wrapper for such a constant.  */
 
 bool
@@ -2874,7 +2874,7 @@ integer_zerop (const_tree expr)
 }
 }
 
-/* Return 1 if EXPR is the integer constant one or the corresponding
+/* Return true if EXPR is the integer constant one or the corresponding
complex constant, or a location wrapper for such a constant.  */
 
 bool
@@ -2898,9 +2898,9 @@ integer_onep (const_tree expr)
 }
 }
 
-/* Return 1 if EXPR is the integer constant one.  For complex and vector,
-   return 1 if every piece is the integer constant one.
-   Also return 1 for location wrappers for such a constant.  */
+/* Return true if EXPR is the integer constant one.  For complex and vector,
+   return true if every piece is the integer constant one.
+   Also return true for location wrappers for such a constant.  */
 
 bool
 integer_each_onep (const_tree expr)
@@ -2914,8 +2914,8 @@ integer_each_onep (const_tree expr)
 return integer_onep (expr);
 }
 
-/* Return 1 if EXPR is an integer containing all 1's in as much precision as
-   it contains, or a complex or vector whose subparts are such integers,

RE: [PATCH V2] Middle-end: Change order of LEN_MASK_LOAD/LEN_MASK_STORE arguments

2023-07-03 Thread Li, Pan2 via Gcc-patches
Committed as passed both the bootstrap and regression test, thanks Richard.

Pan

-Original Message-
From: Gcc-patches  On Behalf 
Of Richard Sandiford via Gcc-patches
Sent: Monday, July 3, 2023 5:27 PM
To: juzhe.zh...@rivai.ai
Cc: gcc-patches@gcc.gnu.org; rguent...@suse.de
Subject: Re: [PATCH V2] Middle-end: Change order of 
LEN_MASK_LOAD/LEN_MASK_STORE arguments

juzhe.zh...@rivai.ai writes:
> From: Ju-Zhe Zhong 
>
> Hi, Richard. I fix the order as you suggeted.
>
> Before this patch, the order is {len,mask,bias}.
>
> Now, after this patch, the order becomes {len,bias,mask}.
>
> Since you said we should not need 'internal_fn_bias_index', the bias index 
> should always be the len index + 1.
> I notice LEN_STORE order is {len,vector,bias}, to make them consistent, I 
> reorder into LEN_STORE {len,bias,vector}.
> Just like MASK_STORE {mask,vector}.
>
> Ok for trunk ?
>
> gcc/ChangeLog:
>
> * config/riscv/autovec.md: Change order of 
> LEN_MASK_LOAD/LEN_MASK_STORE/LEN_LOAD/LEN_STORE arguments.
> * config/riscv/riscv-v.cc (expand_load_store): Ditto.
> * doc/md.texi: Ditto.
> * gimple-fold.cc (gimple_fold_partial_load_store_mem_ref): Ditto.
> * internal-fn.cc (len_maskload_direct): Ditto.
> (len_maskstore_direct): Ditto.
> (add_len_and_mask_args): New function.
> (expand_partial_load_optab_fn): Change order of 
> LEN_MASK_LOAD/LEN_MASK_STORE/LEN_LOAD/LEN_STORE arguments.
> (expand_partial_store_optab_fn): Ditto.
> (internal_fn_len_index): New function.
> (internal_fn_mask_index): Change order of 
> LEN_MASK_LOAD/LEN_MASK_STORE/LEN_LOAD/LEN_STORE arguments.
> (internal_fn_stored_value_index): Ditto.
> (internal_len_load_store_bias): Ditto.
> * internal-fn.h (internal_fn_len_index): New function.
> * tree-ssa-dse.cc (initialize_ao_ref_for_dse): Change order of 
> LEN_MASK_LOAD/LEN_MASK_STORE/LEN_LOAD/LEN_STORE arguments.
> * tree-vect-stmts.cc (vectorizable_store): Ditto.
> (vectorizable_load): Ditto.

OK, thanks.

Richard

> ---
>  gcc/config/riscv/autovec.md |   8 +-
>  gcc/config/riscv/riscv-v.cc |   2 +-
>  gcc/doc/md.texi |  16 ++--
>  gcc/gimple-fold.cc  |   8 +-
>  gcc/internal-fn.cc  | 156 ++--
>  gcc/internal-fn.h   |   1 +
>  gcc/tree-ssa-dse.cc |  11 +--
>  gcc/tree-vect-stmts.cc  |  11 +--
>  8 files changed, 107 insertions(+), 106 deletions(-)
>
> diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
> index 1488f2be1be..4ab0e9f99eb 100644
> --- a/gcc/config/riscv/autovec.md
> +++ b/gcc/config/riscv/autovec.md
> @@ -26,8 +26,8 @@
>[(match_operand:V 0 "register_operand")
> (match_operand:V 1 "memory_operand")
> (match_operand 2 "autovec_length_operand")
> -   (match_operand: 3 "vector_mask_operand")
> -   (match_operand 4 "const_0_operand")]
> +   (match_operand 3 "const_0_operand")
> +   (match_operand: 4 "vector_mask_operand")]
>"TARGET_VECTOR"
>  {
>riscv_vector::expand_load_store (operands, true);
> @@ -38,8 +38,8 @@
>[(match_operand:V 0 "memory_operand")
> (match_operand:V 1 "register_operand")
> (match_operand 2 "autovec_length_operand")
> -   (match_operand: 3 "vector_mask_operand")
> -   (match_operand 4 "const_0_operand")]
> +   (match_operand 3 "const_0_operand")
> +   (match_operand: 4 "vector_mask_operand")]
>"TARGET_VECTOR"
>  {
>riscv_vector::expand_load_store (operands, false);
> diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
> index adb8d7d36a5..8d5bed7ebe4 100644
> --- a/gcc/config/riscv/riscv-v.cc
> +++ b/gcc/config/riscv/riscv-v.cc
> @@ -2777,7 +2777,7 @@ expand_load_store (rtx *ops, bool is_load)
>  {
>poly_int64 value;
>rtx len = ops[2];
> -  rtx mask = ops[3];
> +  rtx mask = ops[4];
>machine_mode mode = GET_MODE (ops[0]);
>  
>if (poly_int_rtx_p (len, ) && known_eq (value, GET_MODE_NUNITS 
> (mode)))
> diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
> index cefdee84821..5e5482265cd 100644
> --- a/gcc/doc/md.texi
> +++ b/gcc/doc/md.texi
> @@ -5302,15 +5302,15 @@ This pattern is not allowed to @code{FAIL}.
>  @cindex @code{len_maskload@var{m}@var{n}} instruction pattern
>  @item @samp{len_maskload@var{m}@var{n}}
>  Perform a masked load from the memory location pointed to by operand 1
> -into register operand 0.  (operand 2 + operand 4) elements are loaded from
> +into register operand 0.  (operand 2 + operand 3) elements are loaded from
>  memory and other elements in operand 0 are set to undefined values.
>  This is a combination of len_load and maskload.
>  Operands 0 and 1 have mode @var{m}, which must be a vector mode.  Operand 2
>  has whichever integer mode the target prefers.  A mask is specified in
> -operand 3 which must be of type @var{n}.  The mask has lower precedence than
> +operand 4 which must be of type @var{n}.  The mask has lower precedence than

[Bug target/108743] [objective-c, NeXT runtime] -fconstant-cfstrings not supported

2023-07-03 Thread cvs-commit at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108743

--- Comment #11 from CVS Commits  ---
The releases/gcc-13 branch has been updated by Iain D Sandoe
:

https://gcc.gnu.org/g:12897414d309d9cf398259c212923aa7b031a3af

commit r13-7528-g12897414d309d9cf398259c212923aa7b031a3af
Author: Iain Sandoe 
Date:   Sun Jul 2 14:18:04 2023 +0100

Darwin, Objective-C: Support -fconstant-cfstrings [PR108743].

This support the -fconstant-cfstrings option as used by clang (and
expect by some build scripts) as an alias to the target-specific
-mconstant-cfstrings.

The documentation is also updated to reflect that the 'f' option is
only available on Darwin, and to add the 'm' option to the Darwin
section of the invocation text.

Signed-off-by: Iain Sandoe 

PR target/108743

gcc/ChangeLog:

* config/darwin.opt: Add fconstant-cfstrings alias to
mconstant-cfstrings.
* doc/invoke.texi: Amend invocation descriptions to reflect
that the fconstant-cfstrings is a target-option alias and to
add the missing mconstant-cfstrings option description to the
Darwin section.

(cherry picked from commit cdd4b3c0f0f428678c24de74b1f626628450799c)

[Bug d/103944] [12/13/14 Regression] Testsuite hang due to libphobos/testsuite/libphobos.gc/forkgc2.d

2023-07-03 Thread cvs-commit at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103944

--- Comment #14 from CVS Commits  ---
The releases/gcc-13 branch has been updated by Iain D Sandoe
:

https://gcc.gnu.org/g:025a3f417899577710eb88527897fc571a0280ff

commit r13-7526-g025a3f417899577710eb88527897fc571a0280ff
Author: Iain Sandoe 
Date:   Sun Feb 26 13:53:52 2023 +

libphobos, testsuite: Disable forkgc2 on Darwin [PR103944]

It hangs the testsuite (requiring manual intervention to kill the
spawned processes) which breaks CI.  The reason for the hang id not
clear.  This skips the test for now (xfail does not work).

Signed-off-by: Iain Sandoe 

PR d/103944

libphobos/ChangeLog:

* testsuite/libphobos.gc/forkgc2.d: Skip for Darwin.

(cherry picked from commit fca6d9c12f5bf06469cf9f7db8c42f66ef792fd2)

[Bug testsuite/108835] gm2 tests at large -jNN numbers do not return

2023-07-03 Thread cvs-commit at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108835

--- Comment #8 from CVS Commits  ---
The releases/gcc-13 branch has been updated by Iain D Sandoe
:

https://gcc.gnu.org/g:e79c65331190bef99c88062c77d557d161caf380

commit r13-7525-ge79c65331190bef99c88062c77d557d161caf380
Author: Iain Sandoe 
Date:   Sat Feb 25 23:18:13 2023 +

modula-2: Amend the handling of failed select() calls in RTint [PR108835].

When we make a select() that fails, there is an attempt to (a) diagnose
why and (b) make a fallback.  These actions are causing some tests to
hang on some Darwin versions, this is because the first action that is
tried to assist in diagnosis/fallback handling is to replace the set
timeout with NIL (which causes select to wait forever, modulo other
reasons it might complete).

To fix this, call select with a zero timeout when checking for error
conditions.  Also, as we check the possible failure conditions, if we
find a change that succeeds, then stop looking for errors.

Signed-off-by: Iain Sandoe 

PR testsuite/108835

gcc/m2/ChangeLog:

* gm2-libs/RTint.mod: Do not use NIL timeout setting on select,
test failures sequentially, finishing on the first success.

(cherry picked from commit ef4ea6e08707d27674a8c5ddb4b478aac8713c03)

[Bug driver/110522] `-fdiagnostics-format=sarif-file`: file name conflicts / races

2023-07-03 Thread lebedev.ri at gmail dot com via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110522

--- Comment #1 from Roman Lebedev  ---
To spell it out explicitly, not storing the resulting `.sarif`
next to the produced object file itself, like it's done in (all?)
other cases, very much looks like a not-a-feature,
basically making the feature to be borderline unusable in general.

There can be some edge-cases (`-o /dev/null`?),
but they shouldn't dictate how normal cases are handled.

Re: [PATCH V7] Machine Description: Add LEN_MASK_{GATHER_LOAD, SCATTER_STORE} pattern

2023-07-03 Thread Richard Sandiford via Gcc-patches
juzhe.zh...@rivai.ai writes:
> From: Ju-Zhe Zhong 
>
> Hi, Richi and Richard.
>
> Base one the review comments from Richard:
> https://gcc.gnu.org/pipermail/gcc-patches/2023-July/623405.html
>
> I change len_mask_gather_load/len_mask_scatter_store order into:
> {len,bias,mask}
>
> We adjust adding len and mask using using add_len_and_mask_args
> which is same as partial_load/parial_store.
>
> Now, the codes become more reasonable and easier maintain.
>
> This patch is adding LEN_MASK_{GATHER_LOAD,SCATTER_STORE} to allow targets
> handle flow control by mask and loop control by length on gather/scatter 
> memory
> operations. Consider this following case:
>
> #include 
> void
> f (uint8_t *restrict a,
>uint8_t *restrict b, int n,
>int base, int step,
>int *restrict cond)
> {
>   for (int i = 0; i < n; ++i)
> {
>   if (cond[i])
> a[i * step + base] = b[i * step + base];
> }
> }
>
> We hope RVV can vectorize such case into following IR:
>
> loop_len = SELECT_VL
> control_mask = comparison
> v = LEN_MASK_GATHER_LOAD (.., loop_len, bias, control_mask)
> LEN_SCATTER_STORE (... v, ..., loop_len, bias, control_mask)
>
> This patch doesn't apply such patterns into vectorizer, just add patterns
> and update the documents.
>
> Will send patch which apply such patterns into vectorizer soon after this
> patch is approved.
>
> Ok for trunk?
>
> gcc/ChangeLog:
>
> * doc/md.texi: Add len_mask_gather_load/len_mask_scatter_store.
> * internal-fn.cc (expand_scatter_store_optab_fn): Ditto.
> (expand_gather_load_optab_fn): Ditto.
> (internal_load_fn_p): Ditto.
> (internal_store_fn_p): Ditto.
> (internal_gather_scatter_fn_p): Ditto.
> (internal_fn_len_index): Ditto.
> (internal_fn_mask_index): Ditto.
> (internal_fn_stored_value_index): Ditto.
> * internal-fn.def (LEN_MASK_GATHER_LOAD): Ditto.
> (LEN_MASK_SCATTER_STORE): Ditto.
> * optabs.def (OPTAB_CD): Ditto.

Nice!  OK, thanks.

Richard

> ---
>  gcc/doc/md.texi | 17 +
>  gcc/internal-fn.cc  | 32 +---
>  gcc/internal-fn.def |  8 ++--
>  gcc/optabs.def  |  2 ++
>  4 files changed, 42 insertions(+), 17 deletions(-)
>
> diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
> index 5e5482265cd..f14dd32b2dc 100644
> --- a/gcc/doc/md.texi
> +++ b/gcc/doc/md.texi
> @@ -5040,6 +5040,15 @@ operand 5.  Bit @var{i} of the mask is set if element 
> @var{i}
>  of the result should be loaded from memory and clear if element @var{i}
>  of the result should be set to zero.
>  
> +@cindex @code{len_mask_gather_load@var{m}@var{n}} instruction pattern
> +@item @samp{len_mask_gather_load@var{m}@var{n}}
> +Like @samp{gather_load@var{m}@var{n}}, but takes an extra length operand 
> (operand 5),
> +a bias operand (operand 6) as well as a mask operand (operand 7).  Similar 
> to len_maskload,
> +the instruction loads at most (operand 5 + operand 6) elements from memory.
> +Bit @var{i} of the mask is set if element @var{i} of the result should
> +be loaded from memory and clear if element @var{i} of the result should be 
> undefined.
> +Mask elements @var{i} with @var{i} > (operand 5 + operand 6) are ignored.
> +
>  @cindex @code{scatter_store@var{m}@var{n}} instruction pattern
>  @item @samp{scatter_store@var{m}@var{n}}
>  Store a vector of mode @var{m} into several distinct memory locations.
> @@ -5069,6 +5078,14 @@ Like @samp{scatter_store@var{m}@var{n}}, but takes an 
> extra mask operand as
>  operand 5.  Bit @var{i} of the mask is set if element @var{i}
>  of the result should be stored to memory.
>  
> +@cindex @code{len_mask_scatter_store@var{m}@var{n}} instruction pattern
> +@item @samp{len_mask_scatter_store@var{m}@var{n}}
> +Like @samp{scatter_store@var{m}@var{n}}, but takes an extra length operand 
> (operand 5),
> +a bias operand (operand 6) as well as a mask operand (operand 7).  The 
> instruction stores
> +at most (operand 5 + operand 6) elements of (operand 4) to memory.
> +Bit @var{i} of the mask is set if element @var{i} of (operand 4) should be 
> stored.
> +Mask elements @var{i} with @var{i} > (operand 5 + operand 6) are ignored.
> +
>  @cindex @code{vec_set@var{m}} instruction pattern
>  @item @samp{vec_set@var{m}}
>  Set given field in the vector value.  Operand 0 is the vector to modify,
> diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc
> index c1fcb38b17b..303df102d81 100644
> --- a/gcc/internal-fn.cc
> +++ b/gcc/internal-fn.cc
> @@ -3507,7 +3507,6 @@ expand_scatter_store_optab_fn (internal_fn, gcall 
> *stmt, direct_optab optab)
>  {
>internal_fn ifn = gimple_call_internal_fn (stmt);
>int rhs_index = internal_fn_stored_value_index (ifn);
> -  int mask_index = internal_fn_mask_index (ifn);
>tree base = gimple_call_arg (stmt, 0);
>tree offset = gimple_call_arg (stmt, 1);
>tree scale = gimple_call_arg (stmt, 2);
> @@ -3518,19 +3517,14 @@ expand_scatter_store_optab_fn 

[Bug target/109973] [13 Regression] Wrong code for AVX2 since 13.1 by combining VPAND and VPTEST since r13-2006-ga56c1641e9d25e

2023-07-03 Thread jakub at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=109973

--- Comment #12 from Jakub Jelinek  ---
Wrong-code issues like this shouldn't be just closed.
I think you should ping Uros on this, or another option would be to revert on
the branch the change that caused the regression.

Re: [PATCH v2] RISC-V: Add support for vector crypto extensions

2023-07-03 Thread Kito Cheng via Gcc-patches
Thanks, LGTM :)

Christoph Muellner 於 2023年7月3日 週一,19:08寫道:

> From: Christoph Müllner 
>
> This series adds basic support for the vector crypto extensions:
> * Zvbb
> * Zvbc
> * Zvkg
> * Zvkned
> * Zvkhn[a,b]
> * Zvksed
> * Zvksh
> * Zvkn
> * Zvknc
> * Zvkng
> * Zvks
> * Zvksc
> * Zvksg
> * Zvkt
>
> This patch is based on the v20230620 version of the Vector Cryptography
> specification. The specification is frozen and can be found here:
>   https://github.com/riscv/riscv-crypto/releases/tag/v20230620
>
> Binutils support has been merged upstream a few days ago.
>
> All extensions come with tests for the feature test macros.
>
> gcc/ChangeLog:
>
> * common/config/riscv/riscv-common.cc: Add support for zvbb,
> zvbc, zvkg, zvkned, zvknha, zvknhb, zvksed, zvksh, zvkn,
> zvknc, zvkng, zvks, zvksc, zvksg, zvkt and the implied subsets.
> * config/riscv/arch-canonicalize: Add canonicalization info for
> zvkn, zvknc, zvkng, zvks, zvksc, zvksg.
> * config/riscv/riscv-opts.h (MASK_ZVBB): New macro.
> (MASK_ZVBC): Likewise.
> (TARGET_ZVBB): Likewise.
> (TARGET_ZVBC): Likewise.
> (MASK_ZVKG): Likewise.
> (MASK_ZVKNED): Likewise.
> (MASK_ZVKNHA): Likewise.
> (MASK_ZVKNHB): Likewise.
> (MASK_ZVKSED): Likewise.
> (MASK_ZVKSH): Likewise.
> (MASK_ZVKN): Likewise.
> (MASK_ZVKNC): Likewise.
> (MASK_ZVKNG): Likewise.
> (MASK_ZVKS): Likewise.
> (MASK_ZVKSC): Likewise.
> (MASK_ZVKSG): Likewise.
> (MASK_ZVKT): Likewise.
> (TARGET_ZVKG): Likewise.
> (TARGET_ZVKNED): Likewise.
> (TARGET_ZVKNHA): Likewise.
> (TARGET_ZVKNHB): Likewise.
> (TARGET_ZVKSED): Likewise.
> (TARGET_ZVKSH): Likewise.
> (TARGET_ZVKN): Likewise.
> (TARGET_ZVKNC): Likewise.
> (TARGET_ZVKNG): Likewise.
> (TARGET_ZVKS): Likewise.
> (TARGET_ZVKSC): Likewise.
> (TARGET_ZVKSG): Likewise.
> (TARGET_ZVKT): Likewise.
> * config/riscv/riscv.opt: Introduction of riscv_zv{b,k}_subext.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/riscv/zvbb.c: New test.
> * gcc.target/riscv/zvbc.c: New test.
> * gcc.target/riscv/zvkg.c: New test.
> * gcc.target/riscv/zvkn-1.c: New test.
> * gcc.target/riscv/zvkn.c: New test.
> * gcc.target/riscv/zvknc-1.c: New test.
> * gcc.target/riscv/zvknc-2.c: New test.
> * gcc.target/riscv/zvknc.c: New test.
> * gcc.target/riscv/zvkned.c: New test.
> * gcc.target/riscv/zvkng-1.c: New test.
> * gcc.target/riscv/zvkng-2.c: New test.
> * gcc.target/riscv/zvkng.c: New test.
> * gcc.target/riscv/zvknha.c: New test.
> * gcc.target/riscv/zvknhb.c: New test.
> * gcc.target/riscv/zvks-1.c: New test.
> * gcc.target/riscv/zvks.c: New test.
> * gcc.target/riscv/zvksc-1.c: New test.
> * gcc.target/riscv/zvksc-2.c: New test.
> * gcc.target/riscv/zvksc.c: New test.
> * gcc.target/riscv/zvksed.c: New test.
> * gcc.target/riscv/zvksg-1.c: New test.
> * gcc.target/riscv/zvksg-2.c: New test.
> * gcc.target/riscv/zvksg.c: New test.
> * gcc.target/riscv/zvksh.c: New test.
> * gcc.target/riscv/zvkt.c: New test.
>
> Signed-off-by: Christoph Müllner 
> ---
> Changes for v2:
> - Update patch for specification version v20230620
>
>  gcc/common/config/riscv/riscv-common.cc  | 55 
>  gcc/config/riscv/arch-canonicalize   |  7 +++
>  gcc/config/riscv/riscv-opts.h| 34 +++
>  gcc/config/riscv/riscv.opt   |  6 +++
>  gcc/testsuite/gcc.target/riscv/zvbb.c| 13 ++
>  gcc/testsuite/gcc.target/riscv/zvbc.c| 13 ++
>  gcc/testsuite/gcc.target/riscv/zvkg.c| 13 ++
>  gcc/testsuite/gcc.target/riscv/zvkn-1.c  | 29 +
>  gcc/testsuite/gcc.target/riscv/zvkn.c| 29 +
>  gcc/testsuite/gcc.target/riscv/zvknc-1.c | 37 
>  gcc/testsuite/gcc.target/riscv/zvknc-2.c | 37 
>  gcc/testsuite/gcc.target/riscv/zvknc.c   | 37 
>  gcc/testsuite/gcc.target/riscv/zvkned.c  | 13 ++
>  gcc/testsuite/gcc.target/riscv/zvkng-1.c | 37 
>  gcc/testsuite/gcc.target/riscv/zvkng-2.c | 37 
>  gcc/testsuite/gcc.target/riscv/zvkng.c   | 37 
>  gcc/testsuite/gcc.target/riscv/zvknha.c  | 13 ++
>  gcc/testsuite/gcc.target/riscv/zvknhb.c  | 13 ++
>  gcc/testsuite/gcc.target/riscv/zvks-1.c  | 29 +
>  gcc/testsuite/gcc.target/riscv/zvks.c| 29 +
>  gcc/testsuite/gcc.target/riscv/zvksc-1.c | 37 
>  gcc/testsuite/gcc.target/riscv/zvksc-2.c | 37 
>  gcc/testsuite/gcc.target/riscv/zvksc.c   | 37 
>  gcc/testsuite/gcc.target/riscv/zvksed.c  | 13 ++
>  

Re: [PATCH v1] RISC-V: Fix one typo for emit_mode_set.

2023-07-03 Thread Kito Cheng via Gcc-patches
Lgtm


juzhe.zh...@rivai.ai 於 2023年7月3日 週一,19:11寫道:

> LGTM
>
>
>
> juzhe.zh...@rivai.ai
>
> From: pan2.li
> Date: 2023-07-03 18:57
> To: gcc-patches
> CC: juzhe.zhong; jeffreyalaw; pan2.li; yanzhang.wang; kito.cheng
> Subject: [PATCH v1] RISC-V: Fix one typo for emit_mode_set.
> From: Pan Li 
>
> This patch would like to fix one typo for scaler[should be scalar] in
> emit_mode_set, as well as minor change for mov emit.
>
> Signed-off-by: Pan Li 
>
> gcc/ChangeLog:
>
> * config/riscv/riscv.cc (riscv_emit_mode_set): Fix typo.
> ---
> gcc/config/riscv/riscv.cc | 6 +++---
> 1 file changed, 3 insertions(+), 3 deletions(-)
>
> diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
> index e4dc8115e69..7761e946761 100644
> --- a/gcc/config/riscv/riscv.cc
> +++ b/gcc/config/riscv/riscv.cc
> @@ -7672,11 +7672,11 @@ riscv_emit_mode_set (int entity, int mode, int
> prev_mode,
>  case RISCV_FRM:
>if (mode != FRM_MODE_NONE && mode != prev_mode)
> {
> -   rtx scaler = gen_reg_rtx (SImode);
> +   rtx scalar = gen_reg_rtx (SImode);
>   rtx imm = gen_int_mode (mode, SImode);
> -   emit_insn (gen_movsi (scaler, imm));
> -   emit_insn (gen_fsrm (scaler, scaler));
> +   emit_move_insn (scalar, imm);
> +   emit_insn (gen_fsrm (scalar, scalar));
> }
>break;
>  default:
> --
> 2.34.1
>
>
>


[Bug target/109973] [13 Regression] Wrong code for AVX2 since 13.1 by combining VPAND and VPTEST since r13-2006-ga56c1641e9d25e

2023-07-03 Thread roger at nextmovesoftware dot com via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=109973

Roger Sayle  changed:

   What|Removed |Added

 Status|ASSIGNED|NEW
Summary|[13/14 Regression] Wrong|[13 Regression] Wrong code
   |code for AVX2 since 13.1 by |for AVX2 since 13.1 by
   |combining VPAND and VPTEST  |combining VPAND and VPTEST
   |since   |since
   |r13-2006-ga56c1641e9d25e|r13-2006-ga56c1641e9d25e

--- Comment #11 from Roger Sayle  ---
This issue is now fixed on mainline for GCC 14, and a backport of the relevant
bits (minimum viable fix) for GCC 13 was proposed here
https://gcc.gnu.org/pipermail/gcc-patches/2023-June/621256.html

[COMMITTED] ada: Fix renaming of predefined equality operator for unchecked union types

2023-07-03 Thread Marc Poulhiès via Gcc-patches
From: Eric Botcazou 

The problem is that the predefined equality operator for unchecked union
types is implemented out of line by invoking a function that takes more
parameters than the two operands, which means that the renaming is not
seen as type conforming with this function and, therefore, is rejected.

The way out is to implement these additional parameters as "extra" formal
parameters, since this kind of parameters is not taken into account for
semantic checks.  The change also factors out the duplicated generation
of actuals for these additional parameters into a single procedure.

gcc/ada/

* exp_ch3.ads (Build_Variant_Record_Equality): Add Spec_Id as second
parameter.
* exp_ch3.adb (Build_Variant_Record_Equality): For unchecked union
types, build the additional parameters as extra formal parameters.
(Expand_Freeze_Record_Type.Build_Variant_Record_Equality): Pass
Empty as Spec_Id in call to Build_Variant_Record_Equality.
* exp_ch4.ads (Expand_Unchecked_Union_Equality): New procedure.
* exp_ch4.adb (Expand_Composite_Equality): In the presence of a
function implementing composite equality, do not special case the
unchecked union types, and only convert the operands if the base
types are not the same like in Build_Equality_Call.
(Build_Equality_Call): Do not special case the unchecked union types
and relocate the operands only once.
(Expand_N_Op_Eq): Do not special case the unchecked union types.
(Expand_Unchecked_Union_Equality): New procedure implementing the
specific expansion of calls to the predefined equality function.
* exp_ch6.adb (Is_Unchecked_Union_Equality): New predicate.
(Expand_Call): Call Is_Unchecked_Union_Equality to determine whether
to call Expand_Unchecked_Union_Equality or Expand_Call_Helper.
* exp_ch8.adb (Build_Body_For_Renaming): Set Has_Delayed_Freeze flag
earlier on Id and pass Id in call to Build_Variant_Record_Equality.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/exp_ch3.adb |  57 +++-
 gcc/ada/exp_ch3.ads |   4 +-
 gcc/ada/exp_ch4.adb | 682 ++--
 gcc/ada/exp_ch4.ads |   8 +
 gcc/ada/exp_ch6.adb |  63 +++-
 gcc/ada/exp_ch8.adb |   3 +-
 6 files changed, 390 insertions(+), 427 deletions(-)

diff --git a/gcc/ada/exp_ch3.adb b/gcc/ada/exp_ch3.adb
index 463b77fae67..daf27fb25e9 100644
--- a/gcc/ada/exp_ch3.adb
+++ b/gcc/ada/exp_ch3.adb
@@ -4606,6 +4606,7 @@ package body Exp_Ch3 is
 
function Build_Variant_Record_Equality
  (Typ : Entity_Id;
+  Spec_Id : Entity_Id;
   Body_Id : Entity_Id;
   Param_Specs : List_Id) return Node_Id
is
@@ -4652,42 +4653,66 @@ package body Exp_Ch3 is
 
   if Is_Unchecked_Union (Typ) then
  declare
+Right_Formal : constant Entity_Id :=
+  (if Present (Spec_Id) then Last_Formal (Spec_Id) else Right);
+Scop : constant Entity_Id :=
+  (if Present (Spec_Id) then Spec_Id else Body_Id);
+
+procedure Decorate_Extra_Formal (F, F_Typ : Entity_Id);
+--  Decorate extra formal F with type F_Typ
+
+---
+-- Decorate_Extra_Formal --
+---
+
+procedure Decorate_Extra_Formal (F, F_Typ : Entity_Id) is
+begin
+   Mutate_Ekind  (F, E_In_Parameter);
+   Set_Etype (F, F_Typ);
+   Set_Scope (F, Scop);
+   Set_Mechanism (F, By_Copy);
+end Decorate_Extra_Formal;
+
 A  : Entity_Id;
 B  : Entity_Id;
 Discr  : Entity_Id;
 Discr_Type : Entity_Id;
+Last_Extra : Entity_Id := Empty;
 New_Discrs : Elist_Id;
 
  begin
+Mutate_Ekind (Body_Id, E_Subprogram_Body);
 New_Discrs := New_Elmt_List;
 
 Discr := First_Discriminant (Typ);
 while Present (Discr) loop
Discr_Type := Etype (Discr);
 
+   --  Add the new parameters as extra formals
+
A :=
  Make_Defining_Identifier (Loc,
Chars => New_External_Name (Chars (Discr), 'A'));
 
+   Decorate_Extra_Formal (A, Discr_Type);
+
+   if Present (Last_Extra) then
+  Set_Extra_Formal (Last_Extra, A);
+   else
+  Set_Extra_Formal (Right_Formal, A);
+  Set_Extra_Formals (Scop, A);
+   end if;
+
+   Append_Elmt (A, New_Discrs);
+
B :=
  Make_Defining_Identifier (Loc,
Chars => New_External_Name (Chars (Discr), 'B'));
 
-   --  Add new parameters to the parameter list
+   Decorate_Extra_Formal (B, Discr_Type);
 
-   

[COMMITTED] ada: Fix discrepancy in expansion of untagged record equality

2023-07-03 Thread Marc Poulhiès via Gcc-patches
From: Eric Botcazou 

The expansion of the predefined equality operator for untagged record types
can be done either in line, i.e. into the component-wise comparison of the
operands, or out of line, i.e. into a call to a function implementing this
comparison, and the heuristics of the selection are essentially based on the
complexity of the implementation.

For discriminated record types with a variant part, which comprise unchecked
union types, the expansion is always done out of line.  For nondiscriminated
types, the expansion is done in line, unless one of the components is of a
record type for which a user-defined equality operator exists, in which case
the expansion is done out of line.

For the third case, i.e. discriminated record types without a variant part,
the expansion is always done in line.  Now given that the discriminants are
considered as mere components for the purpose of predefined equality in this
case, there does not seem to be any reason for treating it differently from
the second case above.

gcc/ada/

* exp_ch3.adb (Build_Untagged_Equality): Rename into...
(Build_Untagged_Record_Equality): ...this.
(Expand_Freeze_Record_Type): Adjust to above renaming and invoke
the procedure also for discriminated types without a variant part.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/exp_ch3.adb | 41 -
 1 file changed, 16 insertions(+), 25 deletions(-)

diff --git a/gcc/ada/exp_ch3.adb b/gcc/ada/exp_ch3.adb
index 7ac4680b395..463b77fae67 100644
--- a/gcc/ada/exp_ch3.adb
+++ b/gcc/ada/exp_ch3.adb
@@ -139,7 +139,7 @@ package body Exp_Ch3 is
--  the code expansion for controlled components (when control actions
--  are active) can lead to very large blocks that GCC handles poorly.
 
-   procedure Build_Untagged_Equality (Typ : Entity_Id);
+   procedure Build_Untagged_Record_Equality (Typ : Entity_Id);
--  AI05-0123: Equality on untagged records composes. This procedure
--  builds the equality routine for an untagged record that has components
--  of a record type that has user-defined primitive equality operations.
@@ -4450,11 +4450,11 @@ package body Exp_Ch3 is
   Set_Is_Pure (Proc_Name);
end Build_Slice_Assignment;
 
-   -
-   -- Build_Untagged_Equality --
-   -
+   
+   -- Build_Untagged_Record_Equality --
+   
 
-   procedure Build_Untagged_Equality (Typ : Entity_Id) is
+   procedure Build_Untagged_Record_Equality (Typ : Entity_Id) is
   Build_Eq : Boolean;
   Comp : Entity_Id;
   Decl : Node_Id;
@@ -4481,7 +4481,7 @@ package body Exp_Ch3 is
  end if;
   end User_Defined_Eq;
 
-   --  Start of processing for Build_Untagged_Equality
+   --  Start of processing for Build_Untagged_Record_Equality
 
begin
   --  If a record component has a primitive equality operation, we must
@@ -4558,7 +4558,7 @@ package body Exp_Ch3 is
 Set_Is_Public (Op);
  end if;
   end if;
-   end Build_Untagged_Equality;
+   end Build_Untagged_Record_Equality;
 
---
-- Build_Variant_Record_Equality --
@@ -5803,25 +5803,18 @@ package body Exp_Ch3 is
  end if;
 
   --  In the untagged case, ever since Ada 83 an equality function must
-  --  be  provided for variant records that are not unchecked unions.
-  --  In Ada 2012 the equality function composes, and thus must be built
-  --  explicitly just as for tagged records.
+  --  be provided for variant records that are not unchecked unions.
 
   elsif Has_Discriminants (Typ)
 and then not Is_Limited_Type (Typ)
+and then Present (Component_List (Type_Definition (Typ_Decl)))
+and then
+  Present (Variant_Part (Component_List (Type_Definition (Typ_Decl
   then
- declare
-Comps : constant Node_Id :=
-  Component_List (Type_Definition (Typ_Decl));
- begin
-if Present (Comps)
-  and then Present (Variant_Part (Comps))
-then
-   Build_Variant_Record_Equality (Typ);
-end if;
- end;
+ Build_Variant_Record_Equality (Typ);
 
-  --  Otherwise create primitive equality operation (AI05-0123)
+  --  In Ada 2012 the equality function composes, and thus must be built
+  --  explicitly just as for tagged records.
 
   --  This is done unconditionally to ensure that tools can be linked
   --  properly with user programs compiled with older language versions.
@@ -5832,7 +5825,7 @@ package body Exp_Ch3 is
 and then Convention (Typ) = Convention_Ada
 and then not Is_Limited_Type (Typ)
   then
- Build_Untagged_Equality (Typ);
+ Build_Untagged_Record_Equality (Typ);
   end if;
 
   --  Before 

[COMMITTED] ada: Fix small inaccuracy in implementation of B.3.3(20/2)

2023-07-03 Thread Marc Poulhiès via Gcc-patches
From: Eric Botcazou 

This is the clause about inferable discriminants in unchecked unions.

gcc/ada/

* sem_util.adb (Has_Inferable_Discriminants): In the case of a
component with a per-object constraint, also return true if the
enclosing object is not of an unchecked union type.
In the default case, remove a useless call to Base_Type.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/sem_util.adb | 35 ++-
 1 file changed, 14 insertions(+), 21 deletions(-)

diff --git a/gcc/ada/sem_util.adb b/gcc/ada/sem_util.adb
index d9ea00e53cb..736751f5fae 100644
--- a/gcc/ada/sem_util.adb
+++ b/gcc/ada/sem_util.adb
@@ -12272,33 +12272,26 @@ package body Sem_Util is
begin
   --  For selected components, the subtype of the selector must be a
   --  constrained Unchecked_Union. If the component is subject to a
-  --  per-object constraint, then the enclosing object must have inferable
-  --  discriminants.
+  --  per-object constraint, then the enclosing object must either be
+  --  a regular discriminated type or must have inferable discriminants.
 
   if Nkind (N) = N_Selected_Component then
- if Has_Per_Object_Constraint (Entity (Selector_Name (N))) then
-
---  A small hack. If we have a per-object constrained selected
---  component of a formal parameter, return True since we do not
---  know the actual parameter association yet.
-
-if Prefix_Is_Formal_Parameter (N) then
-   return True;
-
---  Otherwise, check the enclosing object and the selector
-
-else
-   return Has_Inferable_Discriminants (Prefix (N))
- and then Has_Inferable_Discriminants (Selector_Name (N));
-end if;
-
  --  The call to Has_Inferable_Discriminants will determine whether
  --  the selector has a constrained Unchecked_Union nominal type.
 
- else
-return Has_Inferable_Discriminants (Selector_Name (N));
+ if not Has_Inferable_Discriminants (Selector_Name (N)) then
+return False;
  end if;
 
+ --  A small hack. If we have a per-object constrained selected
+ --  component of a formal parameter, return True since we do not
+ --  know the actual parameter association yet.
+
+ return not Has_Per_Object_Constraint (Entity (Selector_Name (N)))
+   or else not Is_Unchecked_Union (Etype (Prefix (N)))
+   or else Has_Inferable_Discriminants (Prefix (N))
+   or else Prefix_Is_Formal_Parameter (N);
+
   --  A qualified expression has inferable discriminants if its subtype
   --  mark is a constrained Unchecked_Union subtype.
 
@@ -12310,7 +12303,7 @@ package body Sem_Util is
   --  Unchecked_Union nominal subtype.
 
   else
- return Is_Unchecked_Union (Base_Type (Etype (N)))
+ return Is_Unchecked_Union (Etype (N))
and then Is_Constrained (Etype (N));
   end if;
end Has_Inferable_Discriminants;
-- 
2.40.0



[Bug middle-end/110534] New: confusing -Wuninitialized when strict aliasing is violated

2023-07-03 Thread vanyacpp at gmail dot com via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110534

Bug ID: 110534
   Summary: confusing -Wuninitialized when strict aliasing is
violated
   Product: gcc
   Version: 13.1.0
Status: UNCONFIRMED
  Severity: normal
  Priority: P3
 Component: middle-end
  Assignee: unassigned at gcc dot gnu.org
  Reporter: vanyacpp at gmail dot com
  Target Milestone: ---

GCC gives -Wuninitialized on this code:

#include 
uint16_t test()
{
uint32_t foo32[4] = {0, 0, 0, 0};
uint16_t* foo16 = reinterpret_cast([0]);
return foo16[0];
}

:7:19: warning: 'foo32' is used uninitialized [-Wuninitialized]
7 | return foo16[0];
  |   ^
:5:14: note: 'foo32' declared here
5 | uint32_t foo32[4] = {0, 0, 0, 0};
  |  ^

This issue was originally published on reddit:
https://www.reddit.com/r/cpp/comments/14lc9w9/gcc_warnings_for_uninitialized_variables_is/

The poster found the warning quite confusing and I agree with them.

I believe the ideal behavior would be to show -Wstrict-aliasing on this code
and avoid showing -Wuninitialized.

Re: [PATCH] middle-end/110495 - avoid associating constants with (VL) vectors

2023-07-03 Thread Richard Biener via Gcc-patches
On Mon, 3 Jul 2023, Richard Sandiford wrote:

> Richard Biener via Gcc-patches  writes:
> > When trying to associate (v + INT_MAX) + INT_MAX we are using
> > the TREE_OVERFLOW bit to check for correctness.  That isn't
> > working for VECTOR_CSTs and it can't in general when one considers
> > VL vectors.  It looks like it should work for COMPLEX_CSTs but
> > I didn't try to single out _Complex int in this change.
> >
> > The following makes sure that for vectors we use the fallback of
> > using unsigned arithmetic when associating the above to
> > v + (INT_MAX + INT_MAX).
> >
> > Bootstrapped and tested on x86_64-unknown-linux-gnu, OK?
> >
> > Thanks,
> > Richard.
> >
> > PR middle-end/110495
> > * tree.h (TREE_OVERFLOW): Do not mention VECTOR_CSTs
> > since we do not set TREE_OVERFLOW on those since the
> > introduction of VL vectors.
> > * match.pd (x +- CST +- CST): For VECTOR_CST do not look
> > at TREE_OVERFLOW to determine validity of association.
> >
> > * gcc.dg/tree-ssa/addadd-2.c: Amend.
> > * gcc.dg/tree-ssa/forwprop-27.c: Adjust.
> > ---
> >  gcc/match.pd| 9 +
> >  gcc/testsuite/gcc.dg/tree-ssa/addadd-2.c| 1 +
> >  gcc/testsuite/gcc.dg/tree-ssa/forwprop-27.c | 4 +++-
> >  gcc/tree.h  | 2 +-
> >  4 files changed, 10 insertions(+), 6 deletions(-)
> >
> > diff --git a/gcc/match.pd b/gcc/match.pd
> > index f09583bbcac..d193a572005 100644
> > --- a/gcc/match.pd
> > +++ b/gcc/match.pd
> > @@ -3025,7 +3025,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> > (with { tree cst = const_binop (outer_op == inner_op
> > ? PLUS_EXPR : MINUS_EXPR,
> > type, @1, @2); }
> > -(if (cst && !TREE_OVERFLOW (cst))
> > +(if (INTEGRAL_TYPE_P (type) && cst && !TREE_OVERFLOW (cst))
> >   (inner_op @0 { cst; } )
> >   /* X+INT_MAX+1 is X-INT_MIN.  */
> >   (if (INTEGRAL_TYPE_P (type) && cst
> > @@ -3037,7 +3037,8 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> >  (view_convert (inner_op
> > (view_convert:utype @0)
> > (view_convert:utype
> > -{ drop_tree_overflow (cst); }))
> > +{ TREE_OVERFLOW (cst)
> > +  ? drop_tree_overflow (cst) : cst; }))
> 
> It looks like the whole ?(with ?)? expects cst to be nonnull,
> but the ?last resort? doesn't check it (unless I'm misreading).
> Would it be easier to add a top-level ?if (cst)??  (Obviously
> a preexisting thing.)

Hmm, indeed.  I've added an outer if (cst).

> >  
> >/* (CST1 - A) +- CST2 -> CST3 - A  */
> >(for outer_op (plus minus)
> > @@ -3049,7 +3050,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> > forever if something doesn't simplify into a constant.  */
> >   (if (!CONSTANT_CLASS_P (@0))
> >(minus (outer_op! (view_convert @1) @2) (view_convert @0)))
> > - (if (!ANY_INTEGRAL_TYPE_P (TREE_TYPE (@0))
> > + (if (!INTEGRAL_TYPE_P (TREE_TYPE (@0))
> >   || TYPE_OVERFLOW_WRAPS (TREE_TYPE (@0)))
> >(view_convert (minus (outer_op! @1 (view_convert @2)) @0))
> >(if (types_match (type, @0) && !TYPE_OVERFLOW_SANITIZED (type))
> > @@ -3068,7 +3069,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> >forever if something doesn't simplify into a constant.  */
> >  (if (!CONSTANT_CLASS_P (@0))
> >   (plus (view_convert @0) (minus! @1 (view_convert @2
> > -(if (!ANY_INTEGRAL_TYPE_P (TREE_TYPE (@0))
> > +(if (!INTEGRAL_TYPE_P (TREE_TYPE (@0))
> >  || TYPE_OVERFLOW_WRAPS (TREE_TYPE (@0)))
> >   (view_convert (plus @0 (minus! (view_convert @1) @2)))
> >   (if (types_match (type, @0) && !TYPE_OVERFLOW_SANITIZED (type))
> 
> I didn't understand this part.  Doesn't it mean that we allow
> overflow-inducing reassociations for all vector integer types,
> albeit not immediately folded away?

Oh, indeed - I though I can circumvent the TREE_OVERFLOW check for
those (where I don't yet have a testcase) by altering the guarding
check - but that check is to guard the TYPE_OVERFLOW_* checks.

To fix this we'd have to add unsigned fallbacks like for the above
pattern.  I'm going to remove the two hunks for now.

> Also, why do we keep the:
> 
>   !ANY_INTEGRAL_TYPE_P (type) || TYPE_OVERFLOW_WRAPS (type)
> 
> in the outer ifs?

I think this is distict types since both patterns have conditiona
conversions in the patterns they match.  Otherwise it would be
redundant checking and would have been better if placed as 'else'
branch of the inner ifs.

As said, going to fix the missing conditional on non-null 'cst'
and drop the two hunks unrelated to the PR (which also were
wrong - thanks for noticing).

Richard.


> 
> But that's just me not understanding match.pd very well.
> Feel free to ignore if it's nonsense. :)
> 
> Thanks,
> Richard
> 

-- 
Richard Biener 
SUSE Software Solutions 

[Bug driver/93019] memory leak in gcc -O2 reported by Valgrind

2023-07-03 Thread costas.argyris at gmail dot com via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=93019

Costas Argyris  changed:

   What|Removed |Added

 CC||costas.argyris at gmail dot com

--- Comment #5 from Costas Argyris  ---
Created attachment 55464
  --> https://gcc.gnu.org/bugzilla/attachment.cgi?id=55464=edit
Valgrind memcheck report as of 2 July 2023

The original valgrind report is now 3.5 years old.I ran the exact same case
with the latest source code (as of yesterday) and attach the report as a file
because it is actually too large to fit in a comment (there are 95 loss records
now, from the 69 that are seen in the original report).

[PATCH] tree-optimization/110310 - move vector epilogue disabling to analysis phase

2023-07-03 Thread Richard Biener via Gcc-patches
The following removes late deciding to elide vectorized epilogues to
the analysis phase and also avoids altering the epilogues niter.
The costing part from vect_determine_partial_vectors_and_peeling is
moved to vect_analyze_loop_costing where we use the main loop
analysis to constrain the epilogue scalar iterations.

I have not tried to integrate this with vect_known_niters_smaller_than_vf.

It seems the for_epilogue_p parameter in
vect_determine_partial_vectors_and_peeling is largely useless and
we could compute that in the function itself.

Bootstrapped and tested on x86_64-unknown-linux-gnu, OK?

I suppose testing on aarch64 would be nice-to-have - any takers?

Thanks,
Richard.

PR tree-optimization/110310
* tree-vect-loop.cc (vect_determine_partial_vectors_and_peeling):
Move costing part ...
(vect_analyze_loop_costing): ... here.  Integrate better
estimate for epilogues from ...
(vect_analyze_loop_2): Call vect_determine_partial_vectors_and_peeling
with actual epilogue status.
* tree-vect-loop-manip.cc (vect_do_peeling): ... here and
avoid cancelling epilogue vectorization.
(vect_update_epilogue_niters): Remove.  No longer update
epilogue LOOP_VINFO_NITERS.

* gcc.target/i386/pr110310.c: New testcase.
* gcc.dg/vect/slp-perm-12.c: Disable epilogue vectorization.
---
 gcc/testsuite/gcc.dg/vect/slp-perm-12.c  |   1 +
 gcc/testsuite/gcc.target/i386/pr110310.c |  13 +++
 gcc/tree-vect-loop-manip.cc  | 104 +--
 gcc/tree-vect-loop.cc|  98 ++---
 4 files changed, 102 insertions(+), 114 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr110310.c

diff --git a/gcc/testsuite/gcc.dg/vect/slp-perm-12.c 
b/gcc/testsuite/gcc.dg/vect/slp-perm-12.c
index 113223ab0f9..635fca54399 100644
--- a/gcc/testsuite/gcc.dg/vect/slp-perm-12.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-perm-12.c
@@ -1,5 +1,6 @@
 /* { dg-require-effective-target vect_int } */
 /* { dg-require-effective-target vect_pack_trunc } */
+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
 /* { dg-additional-options "-msse4" { target { i?86-*-* x86_64-*-* } } } */
 
 #include "tree-vect.h"
diff --git a/gcc/testsuite/gcc.target/i386/pr110310.c 
b/gcc/testsuite/gcc.target/i386/pr110310.c
new file mode 100644
index 000..dce388aeb20
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr110310.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=znver4 -fdump-tree-vect-optimized" } */
+
+void foo (int * __restrict a, int *b)
+{
+  for (int i = 0; i < 20; ++i)
+a[i] = b[i] + 42;
+}
+
+/* We should vectorize the main loop with AVX512 and the epilog with SSE.  */
+
+/* { dg-final { scan-tree-dump "optimized: loop vectorized using 64 byte 
vectors" "vect" } } */
+/* { dg-final { scan-tree-dump "optimized: loop vectorized using 16 byte 
vectors" "vect" } } */
diff --git a/gcc/tree-vect-loop-manip.cc b/gcc/tree-vect-loop-manip.cc
index 20f570e4a0d..6c452e07880 100644
--- a/gcc/tree-vect-loop-manip.cc
+++ b/gcc/tree-vect-loop-manip.cc
@@ -2882,34 +2882,6 @@ slpeel_update_phi_nodes_for_lcssa (class loop *epilog)
 rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi.phi (), e));
 }
 
-/* EPILOGUE_VINFO is an epilogue loop that we now know would need to
-   iterate exactly CONST_NITERS times.  Make a final decision about
-   whether the epilogue loop should be used, returning true if so.  */
-
-static bool
-vect_update_epilogue_niters (loop_vec_info epilogue_vinfo,
-unsigned HOST_WIDE_INT const_niters)
-{
-  /* Avoid wrap-around when computing const_niters - 1.  Also reject
- using an epilogue loop for a single scalar iteration, even if
- we could in principle implement that using partial vectors.  */
-  unsigned int gap_niters = LOOP_VINFO_PEELING_FOR_GAPS (epilogue_vinfo);
-  if (const_niters <= gap_niters + 1)
-return false;
-
-  /* Install the number of iterations.  */
-  tree niters_type = TREE_TYPE (LOOP_VINFO_NITERS (epilogue_vinfo));
-  tree niters_tree = build_int_cst (niters_type, const_niters);
-  tree nitersm1_tree = build_int_cst (niters_type, const_niters - 1);
-
-  LOOP_VINFO_NITERS (epilogue_vinfo) = niters_tree;
-  LOOP_VINFO_NITERSM1 (epilogue_vinfo) = nitersm1_tree;
-
-  /* Decide what to do if the number of epilogue iterations is not
- a multiple of the epilogue loop's vectorization factor.  */
-  return vect_determine_partial_vectors_and_peeling (epilogue_vinfo, true);
-}
-
 /* LOOP_VINFO is an epilogue loop whose corresponding main loop can be skipped.
Return a value that equals:
 
@@ -3039,7 +3011,6 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, 
tree nitersm1,
   int estimated_vf;
   int prolog_peeling = 0;
   bool vect_epilogues = loop_vinfo->epilogue_vinfos.length () > 0;
-  bool vect_epilogues_updated_niters = false;
   /* We currently do not support prolog peeling if 

Tiny asm

2023-07-03 Thread jacob navia
Dear Friends:

1) I have (of course) kept your copyright notice at the start of the « asm.h » 
header file of my project.

2) I have published my source code using your GPL V3 license

I am not trying to steal you anything. And I would insist that I have great 
respect for the people working with gcc. In no way I am trying to minimize 
their accomplishments. What happens is that layers of code produced by many 
developers have accumulated across the years, like the dust in the glass shelf 
of my grand mother back home. Sometimes in spring she would clean it. 

I am doing just that.

That said, now I have some questions:

1) What kind of options does gcc pass to its assembler? Is there in the huge 
source tree of gcc a place where those options are emitted?
  This would allow me to keep only those options into tiny-asm and erase all 
others (and the associated code)

2) I have to re-engineer the output of assembler instructions. Instead of 
writing to an assembler file (or to a memory assembler file) I will have to 
convince gcc to output into a buffer, and will pass the buffer address to the 
assembler. 

So, instead of outputting several MBs worth of assembler instructions, we would 
pass only 8 bytes of a buffer address. If the buffer is small (4K, for 
instance), it would pass into the CPU cache. Since the CPU cache is 16KB some 
of it may be kept there.

3) To do that, I need to know where in the back end source code you are writing 
to disk.

Thanks for your help, and thanks to the people that posted encouraging words.

jacob



Re: [VSETVL PASS] RISC-V: Optimize local AVL propagation

2023-07-03 Thread Robin Dapp via Gcc-patches
LGTM.

Regards
 Robin



Re: [PATCH] middle-end/110495 - avoid associating constants with (VL) vectors

2023-07-03 Thread Richard Sandiford via Gcc-patches
Richard Biener via Gcc-patches  writes:
> When trying to associate (v + INT_MAX) + INT_MAX we are using
> the TREE_OVERFLOW bit to check for correctness.  That isn't
> working for VECTOR_CSTs and it can't in general when one considers
> VL vectors.  It looks like it should work for COMPLEX_CSTs but
> I didn't try to single out _Complex int in this change.
>
> The following makes sure that for vectors we use the fallback of
> using unsigned arithmetic when associating the above to
> v + (INT_MAX + INT_MAX).
>
> Bootstrapped and tested on x86_64-unknown-linux-gnu, OK?
>
> Thanks,
> Richard.
>
>   PR middle-end/110495
>   * tree.h (TREE_OVERFLOW): Do not mention VECTOR_CSTs
>   since we do not set TREE_OVERFLOW on those since the
>   introduction of VL vectors.
>   * match.pd (x +- CST +- CST): For VECTOR_CST do not look
>   at TREE_OVERFLOW to determine validity of association.
>
>   * gcc.dg/tree-ssa/addadd-2.c: Amend.
>   * gcc.dg/tree-ssa/forwprop-27.c: Adjust.
> ---
>  gcc/match.pd| 9 +
>  gcc/testsuite/gcc.dg/tree-ssa/addadd-2.c| 1 +
>  gcc/testsuite/gcc.dg/tree-ssa/forwprop-27.c | 4 +++-
>  gcc/tree.h  | 2 +-
>  4 files changed, 10 insertions(+), 6 deletions(-)
>
> diff --git a/gcc/match.pd b/gcc/match.pd
> index f09583bbcac..d193a572005 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -3025,7 +3025,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>   (with { tree cst = const_binop (outer_op == inner_op
>   ? PLUS_EXPR : MINUS_EXPR,
>   type, @1, @2); }
> -  (if (cst && !TREE_OVERFLOW (cst))
> +  (if (INTEGRAL_TYPE_P (type) && cst && !TREE_OVERFLOW (cst))
> (inner_op @0 { cst; } )
> /* X+INT_MAX+1 is X-INT_MIN.  */
> (if (INTEGRAL_TYPE_P (type) && cst
> @@ -3037,7 +3037,8 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>(view_convert (inner_op
>   (view_convert:utype @0)
>   (view_convert:utype
> -  { drop_tree_overflow (cst); }))
> +  { TREE_OVERFLOW (cst)
> +? drop_tree_overflow (cst) : cst; }))

It looks like the whole “(with …)” expects cst to be nonnull,
but the “last resort” doesn't check it (unless I'm misreading).
Would it be easier to add a top-level “if (cst)”?  (Obviously
a preexisting thing.)
>  
>/* (CST1 - A) +- CST2 -> CST3 - A  */
>(for outer_op (plus minus)
> @@ -3049,7 +3050,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>   forever if something doesn't simplify into a constant.  */
>   (if (!CONSTANT_CLASS_P (@0))
>(minus (outer_op! (view_convert @1) @2) (view_convert @0)))
> - (if (!ANY_INTEGRAL_TYPE_P (TREE_TYPE (@0))
> + (if (!INTEGRAL_TYPE_P (TREE_TYPE (@0))
> || TYPE_OVERFLOW_WRAPS (TREE_TYPE (@0)))
>(view_convert (minus (outer_op! @1 (view_convert @2)) @0))
>(if (types_match (type, @0) && !TYPE_OVERFLOW_SANITIZED (type))
> @@ -3068,7 +3069,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>forever if something doesn't simplify into a constant.  */
>  (if (!CONSTANT_CLASS_P (@0))
>   (plus (view_convert @0) (minus! @1 (view_convert @2
> -(if (!ANY_INTEGRAL_TYPE_P (TREE_TYPE (@0))
> +(if (!INTEGRAL_TYPE_P (TREE_TYPE (@0))
>|| TYPE_OVERFLOW_WRAPS (TREE_TYPE (@0)))
>   (view_convert (plus @0 (minus! (view_convert @1) @2)))
>   (if (types_match (type, @0) && !TYPE_OVERFLOW_SANITIZED (type))

I didn't understand this part.  Doesn't it mean that we allow
overflow-inducing reassociations for all vector integer types,
albeit not immediately folded away?

Also, why do we keep the:

  !ANY_INTEGRAL_TYPE_P (type) || TYPE_OVERFLOW_WRAPS (type)

in the outer ifs?

But that's just me not understanding match.pd very well.
Feel free to ignore if it's nonsense. :)

Thanks,
Richard


[Bug c++/109680] [13 Regression] is_convertible incorrectly true

2023-07-03 Thread redi at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=109680

--- Comment #12 from Jonathan Wakely  ---
13.1 has already been released, it can't be changed now.

As comment 10 says, the fix is only on trunk so far and has not been backported
to the gcc-13 branch. It will be in the next 13.x release after it's backported
to that branch.

[Bug target/110533] New: [x86-64] naked with -O0 and register-passed struct/int128 clobbers parameters/callee-saved regs

2023-07-03 Thread engelke at in dot tum.de via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110533

Bug ID: 110533
   Summary: [x86-64] naked with -O0 and register-passed
struct/int128 clobbers parameters/callee-saved regs
   Product: gcc
   Version: unknown
Status: UNCONFIRMED
  Severity: normal
  Priority: P3
 Component: target
  Assignee: unassigned at gcc dot gnu.org
  Reporter: engelke at in dot tum.de
  Target Milestone: ---

Compiling a naked function with a parameter that is split over multiple
registers generates several mov operations with -O0, clobbering other
parameters and callee-saved registers. This does not happen with -O1. This
happens since the introduction of naked in GCC 8, at least up to GCC 13.

Example:

__attribute__((naked))
void fn(__int128 a) {
asm("ret");
}

Compiles to; note that rbx (callee-saved) is clobbered:

fn:
.LFB0:
.cfi_startproc
movq%rdi, %rdx
movq%rsi, %rax
movq%rcx, %rbx
movq%rdx, %rcx
movq%rax, %rbx
#APP
# 3 "" 1
ret
# 0 "" 2
#NO_APP
nop
ud2
.cfi_endproc

With two parameters:

__attribute__((naked))
void fn(__int128 a, __int128 b) {
asm("ret");
}

Compiles to; note that rbx and the second parameter are clobbered:

fn:
.LFB0:
.cfi_startproc
movq%rdi, %rdx
movq%rsi, %rax
movq%rcx, %rbx
movq%rdx, %rcx
movq%rax, %rbx
#APP
# 3 "" 1
ret
# 0 "" 2
#NO_APP
nop
ud2
.cfi_endproc

With a slight modification everything works as expected:

__attribute__((naked))
void fn(int x, int y, __int128 a) {
asm("ret");
}

Compiles to:

fn:
.LFB0:
.cfi_startproc
#APP
# 3 "" 1
ret
# 0 "" 2
#NO_APP
nop
ud2
.cfi_endproc

(Above examples generated with gcc 12.2.1, but many other versions are affected
as well.)

[VSETVL PASS] RISC-V: Optimize local AVL propagation

2023-07-03 Thread Juzhe-Zhong
I recently noticed that current VSETVL pass has a unnecessary restriction on 
local
AVL propgation.

Consider this following case:

+  insn 1: vsetvli a5,a3,e8,mf4,ta,mu
+  insn 2: vsetvli zero,a5,e32,m1,ta,ma
+  ...
+  vle32.v v1,0(a1)
+  vsetvli a2,zero,e32,m1,ta,ma
+  vadd.vv v1,v1,v1
+  vsetvli zero,a5,e32,m1,ta,ma
+  vse32.v v1,0(a0)
+  ...
+  insn 3: sub a3,a3,a5
+  ...

We failed to elide insn 2 (vsetvl insn) since insn 3 is modifying "a3" AVL.
Actually, we don't really care about insn 3 since we should only check and make 
sure
there is no insn between insn 1 and insn 2 that modifies "a3" AVL. Then, we can 
propgate
AVL "a3" from insn 1 to insn 2. Finally, insn 2 is eliminated.

After this patch:

+  insn 1: vsetvli a5,a3,e8,mf4,ta,ma
+  ...
+  vle32.v v1,0(a1)
+  vsetvli a2,zero,e32,m1,ta,ma
+  vadd.vv v1,v1,v1
+  vsetvli zero,a5,e32,m1,ta,ma
+  vse32.v v1,0(a0)
+  ...
+  insn 3: sub a3,a3,a5
+  ...

gcc/ChangeLog:

* config/riscv/riscv-vsetvl.cc (vector_insn_info::parse_insn): Add 
early break.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/vsetvl/avl_prop-1.c: New test.

---
 gcc/config/riscv/riscv-vsetvl.cc  | 22 +++
 .../gcc.target/riscv/rvv/vsetvl/avl_prop-1.c  | 21 ++
 2 files changed, 43 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vsetvl/avl_prop-1.c

diff --git a/gcc/config/riscv/riscv-vsetvl.cc b/gcc/config/riscv/riscv-vsetvl.cc
index 2d576e8d5c1..ab47901e23f 100644
--- a/gcc/config/riscv/riscv-vsetvl.cc
+++ b/gcc/config/riscv/riscv-vsetvl.cc
@@ -2025,6 +2025,28 @@ vector_insn_info::parse_insn (insn_info *insn)
   real_insn_and_same_bb_p (i, get_insn ()->bb ());
   i = i->next_nondebug_insn ())
{
+ /* Consider this following sequence:
+
+  insn 1: vsetvli a5,a3,e8,mf4,ta,mu
+  insn 2: vsetvli zero,a5,e32,m1,ta,ma
+  ...
+  vle32.v v1,0(a1)
+  vsetvli a2,zero,e32,m1,ta,ma
+  vadd.vv v1,v1,v1
+  vsetvli zero,a5,e32,m1,ta,ma
+  vse32.v v1,0(a0)
+  ...
+  insn 3: sub a3,a3,a5
+  ...
+
+  We can local AVL propagate "a3" from insn 1 to insn 2
+  if no insns between insn 1 and insn 2 modify "a3 even
+  though insn 3 modifies "a3".
+  Otherwise, we can't perform local AVL propagation.
+
+  Early break if we reach the insn 2.  */
+ if (!before_p (i, insn))
+   break;
  if (find_access (i->defs (), REGNO (new_info.get_avl (
{
  modified_p = true;
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/avl_prop-1.c 
b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/avl_prop-1.c
new file mode 100644
index 000..19ea0f14df5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/avl_prop-1.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-schedule-insns 
-fno-schedule-insns2" } */
+
+#include "riscv_vector.h"
+
+void
+foo (void *a, void *b, void *c, size_t n)
+{
+  for (size_t vl; n > 0; n -= vl, a += vl, b += vl * 4, c += vl)
+{
+  vl = __riscv_vsetvl_e8mf4 (n);
+  vint32m1_t vec_b = __riscv_vle32_v_i32m1 (b, vl);
+  vint32m1_t vec_a = __riscv_vadd_vv_i32m1 (vec_b, vec_b, 
__riscv_vsetvlmax_e32m1 ());
+  __riscv_vse32_v_i32m1 (a, vec_a, vl);
+}
+}
+
+/* { dg-final { scan-assembler-times {vsetvli} 3 { target { no-opts "-O0" 
no-opts "-Os" no-opts "-Oz" no-opts "-O1" no-opts "-g" no-opts "-funroll-loops" 
} } } } */
+/* { dg-final { scan-assembler-times 
{vsetvli\s+[a-x0-9]+,\s*[a-x0-9]+,\s*e8,\s*mf4,\s*t[au],\s*m[au]} 1 { target { 
no-opts "-O0" no-opts "-Os" no-opts "-Oz" no-opts "-O1" no-opts "-g" no-opts 
"-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times 
{vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*m1,\s*t[au],\s*m[au]} 1 { target { 
no-opts "-O0" no-opts "-Os" no-opts "-Oz" no-opts "-O1" no-opts "-g" no-opts 
"-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times 
{vsetvli\s+[a-x0-9]+,\s*zero,\s*e32,\s*m1,\s*t[au],\s*m[au]} 1 { target { 
no-opts "-O0" no-opts "-Os" no-opts "-Oz" no-opts "-O1" no-opts "-g" no-opts 
"-funroll-loops" } } } } */
-- 
2.36.3



July Community Call

2023-07-03 Thread Arthur Cohen

Hi everyone,

We will be having our next monthly community call on the 10th of July at 
9am UTC.


https://meet.jit.si/gccrs-community-call-july
https://hackmd.io/Abr_eMmrRty8fJMsZQXB7Q

Please feel free to join even if you'd just like to listen!

Kindly,

Arthur


OpenPGP_0x1B3465B044AD9C65.asc
Description: OpenPGP public key


OpenPGP_signature
Description: OpenPGP digital signature
-- 
Gcc-rust mailing list
Gcc-rust@gcc.gnu.org
https://gcc.gnu.org/mailman/listinfo/gcc-rust


[PATCH] middle-end/110495 - avoid associating constants with (VL) vectors

2023-07-03 Thread Richard Biener via Gcc-patches
When trying to associate (v + INT_MAX) + INT_MAX we are using
the TREE_OVERFLOW bit to check for correctness.  That isn't
working for VECTOR_CSTs and it can't in general when one considers
VL vectors.  It looks like it should work for COMPLEX_CSTs but
I didn't try to single out _Complex int in this change.

The following makes sure that for vectors we use the fallback of
using unsigned arithmetic when associating the above to
v + (INT_MAX + INT_MAX).

Bootstrapped and tested on x86_64-unknown-linux-gnu, OK?

Thanks,
Richard.

PR middle-end/110495
* tree.h (TREE_OVERFLOW): Do not mention VECTOR_CSTs
since we do not set TREE_OVERFLOW on those since the
introduction of VL vectors.
* match.pd (x +- CST +- CST): For VECTOR_CST do not look
at TREE_OVERFLOW to determine validity of association.

* gcc.dg/tree-ssa/addadd-2.c: Amend.
* gcc.dg/tree-ssa/forwprop-27.c: Adjust.
---
 gcc/match.pd| 9 +
 gcc/testsuite/gcc.dg/tree-ssa/addadd-2.c| 1 +
 gcc/testsuite/gcc.dg/tree-ssa/forwprop-27.c | 4 +++-
 gcc/tree.h  | 2 +-
 4 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/gcc/match.pd b/gcc/match.pd
index f09583bbcac..d193a572005 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3025,7 +3025,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
(with { tree cst = const_binop (outer_op == inner_op
? PLUS_EXPR : MINUS_EXPR,
type, @1, @2); }
-(if (cst && !TREE_OVERFLOW (cst))
+(if (INTEGRAL_TYPE_P (type) && cst && !TREE_OVERFLOW (cst))
  (inner_op @0 { cst; } )
  /* X+INT_MAX+1 is X-INT_MIN.  */
  (if (INTEGRAL_TYPE_P (type) && cst
@@ -3037,7 +3037,8 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 (view_convert (inner_op
(view_convert:utype @0)
(view_convert:utype
-{ drop_tree_overflow (cst); }))
+{ TREE_OVERFLOW (cst)
+  ? drop_tree_overflow (cst) : cst; }))
 
   /* (CST1 - A) +- CST2 -> CST3 - A  */
   (for outer_op (plus minus)
@@ -3049,7 +3050,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
forever if something doesn't simplify into a constant.  */
  (if (!CONSTANT_CLASS_P (@0))
   (minus (outer_op! (view_convert @1) @2) (view_convert @0)))
- (if (!ANY_INTEGRAL_TYPE_P (TREE_TYPE (@0))
+ (if (!INTEGRAL_TYPE_P (TREE_TYPE (@0))
  || TYPE_OVERFLOW_WRAPS (TREE_TYPE (@0)))
   (view_convert (minus (outer_op! @1 (view_convert @2)) @0))
   (if (types_match (type, @0) && !TYPE_OVERFLOW_SANITIZED (type))
@@ -3068,7 +3069,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
   forever if something doesn't simplify into a constant.  */
 (if (!CONSTANT_CLASS_P (@0))
  (plus (view_convert @0) (minus! @1 (view_convert @2
-(if (!ANY_INTEGRAL_TYPE_P (TREE_TYPE (@0))
+(if (!INTEGRAL_TYPE_P (TREE_TYPE (@0))
 || TYPE_OVERFLOW_WRAPS (TREE_TYPE (@0)))
  (view_convert (plus @0 (minus! (view_convert @1) @2)))
  (if (types_match (type, @0) && !TYPE_OVERFLOW_SANITIZED (type))
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/addadd-2.c 
b/gcc/testsuite/gcc.dg/tree-ssa/addadd-2.c
index 39aa032c9b1..8c05911f473 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/addadd-2.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/addadd-2.c
@@ -12,4 +12,5 @@ void k(S*x){
   *x = (S)(y + __INT_MAX__);
 }
 
+/* { dg-final { scan-tree-dump "4294967294" "optimized" { target int32plus } } 
} */
 /* { dg-final { scan-tree-dump-not "2147483647" "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/forwprop-27.c 
b/gcc/testsuite/gcc.dg/tree-ssa/forwprop-27.c
index 9775a4c6367..6c71a4fc81c 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/forwprop-27.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/forwprop-27.c
@@ -33,7 +33,9 @@ void i (V *v1, V *v2){
   *v2 = (c1-*v2)+c2;
 }
 
-/* { dg-final { scan-tree-dump-not "\\\+" "forwprop1"} } */
+/* { dg-final { scan-tree-dump-times "\\\+" 1 "forwprop1"} } */
 /* { dg-final { scan-tree-dump "{ 0, 4 }" "forwprop1"} } */
 /* { dg-final { scan-tree-dump "{ 37, -5 }" "forwprop1"} } */
+/* { dg-final { scan-tree-dump "{ 27, 23 }" "forwprop1"} } */
+/* { dg-final { scan-tree-dump "{ 37, 3 }" "forwprop1"} } */
 
diff --git a/gcc/tree.h b/gcc/tree.h
index f11c758afb9..bedbbb0bcc0 100644
--- a/gcc/tree.h
+++ b/gcc/tree.h
@@ -824,7 +824,7 @@ extern void omp_clause_range_check_failed (const_tree, 
const char *, int,
 #define TYPE_REF_CAN_ALIAS_ALL(NODE) \
   (PTR_OR_REF_CHECK (NODE)->base.static_flag)
 
-/* In an INTEGER_CST, REAL_CST, COMPLEX_CST, or VECTOR_CST, this means
+/* In an INTEGER_CST, REAL_CST, or COMPLEX_CST, this means
there was an overflow in folding.  */
 
 #define TREE_OVERFLOW(NODE) (CST_CHECK (NODE)->base.public_flag)
-- 
2.35.3


  1   2   >