date:20210811

[PATCH] [i386] Optimize vec_perm_expr to match vpmov{dw,qd,wb}.

2021-08-11 Thread liuhongt via Gcc-patches

Hi:
  This is another patch to optimize vec_perm_expr to match vpmov{dw,dq,wb}
under AVX512.
  For scenarios(like pr101846-2.c) where the upper half is not used, this patch
generates better code with only one vpmov{wb,dw,qd} instruction. For
scenarios(like pr101846-3.c) where the upper half is actually used,  if the src
vector length is 256/512bits, the patch can still generate better code, but for
128bits, the code generation is worse.

128 bits upper half not used.

-   vpshufb .LC2(%rip), %xmm0, %xmm0
+   vpmovdw %xmm0, %xmm0

128 bits upper half used.
-   vpshufb .LC2(%rip), %xmm0, %xmm0
+   vpmovdw %xmm0, %xmm1
+   vmovq   %xmm1, %rax
+   vpinsrq $0, %rax, %xmm0, %xmm0

  Maybe expand_vec_perm_trunc_vinsert should only deal with 256/512bits of
vectors, but considering the real use of scenarios like pr101846-3.c
foo_*_128 possibility is relatively low, I still keep this part of the code.

  Bootstrapped and regtested on x86_64-linux-gnu{-m32,}.
  Ok for trunk?

gcc/ChangeLog:

PR target/101846
* config/i386/i386-expand.c (expand_vec_perm_trunc_vinsert):
New function.
(ix86_vectorize_vec_perm_const): Call
expand_vec_perm_trunc_vinsert.
* config/i386/sse.md (vec_set_lo_v32hi): New define_insn.
(vec_set_lo_v64qi): Ditto.
(vec_set_lo_): Extend to no-avx512dq.

gcc/testsuite/ChangeLog:

PR target/101846
* gcc.target/i386/pr101846-2.c: New test.
* gcc.target/i386/pr101846-3.c: New test.
---
 gcc/config/i386/i386-expand.c  | 125 +
 gcc/config/i386/sse.md |  60 +-
 gcc/testsuite/gcc.target/i386/pr101846-2.c |  81 +
 gcc/testsuite/gcc.target/i386/pr101846-3.c |  95 
 4 files changed, 359 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr101846-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr101846-3.c

diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c
index bd21efa9530..519caac2e15 100644
--- a/gcc/config/i386/i386-expand.c
+++ b/gcc/config/i386/i386-expand.c
@@ -18317,6 +18317,126 @@ expand_vec_perm_1 (struct expand_vec_perm_d *d)
   return false;
 }
 
+/* A subroutine of ix86_expand_vec_perm_const_1.  Try to implement D
+   in terms of a pair of vpmovdw + vinserti128 instructions.  */
+static bool
+expand_vec_perm_trunc_vinsert (struct expand_vec_perm_d *d)
+{
+  unsigned i, nelt = d->nelt, mask = d->nelt - 1;
+  unsigned half = nelt / 2;
+  machine_mode half_mode, trunc_mode;
+
+  /* vpmov{wb,dw,qd} only available under AVX512.  */
+  if (!d->one_operand_p || !TARGET_AVX512F
+  || (!TARGET_AVX512VL  && GET_MODE_SIZE (d->vmode) < 64)
+  || GET_MODE_SIZE (GET_MODE_INNER (d->vmode)) > 4)
+return false;
+
+  /* TARGET_AVX512BW is needed for vpmovwb.  */
+  if (GET_MODE_INNER (d->vmode) == E_QImode && !TARGET_AVX512BW)
+return false;
+
+  for (i = 0; i < nelt; i++)
+{
+  unsigned idx = d->perm[i] & mask;
+  if (idx != i * 2 && i < half)
+   return false;
+  if (idx != i && i >= half)
+   return false;
+}
+
+  rtx (*gen_trunc) (rtx, rtx) = NULL;
+  rtx (*gen_vec_set_lo) (rtx, rtx, rtx) = NULL;
+  switch (d->vmode)
+{
+case E_V16QImode:
+  gen_trunc = gen_truncv8hiv8qi2;
+  gen_vec_set_lo = gen_vec_setv2di;
+  half_mode = V8QImode;
+  trunc_mode = V8HImode;
+  break;
+case E_V32QImode:
+  gen_trunc = gen_truncv16hiv16qi2;
+  gen_vec_set_lo = gen_vec_set_lo_v32qi;
+  half_mode = V16QImode;
+  trunc_mode = V16HImode;
+  break;
+case E_V64QImode:
+  gen_trunc = gen_truncv32hiv32qi2;
+  gen_vec_set_lo = gen_vec_set_lo_v64qi;
+  half_mode = V32QImode;
+  trunc_mode = V32HImode;
+  break;
+case E_V8HImode:
+  gen_trunc = gen_truncv4siv4hi2;
+  gen_vec_set_lo = gen_vec_setv2di;
+  half_mode = V4HImode;
+  trunc_mode = V4SImode;
+  break;
+case E_V16HImode:
+  gen_trunc = gen_truncv8siv8hi2;
+  gen_vec_set_lo = gen_vec_set_lo_v16hi;
+  half_mode = V8HImode;
+  trunc_mode = V8SImode;
+  break;
+case E_V32HImode:
+  gen_trunc = gen_truncv16siv16hi2;
+  gen_vec_set_lo = gen_vec_set_lo_v32hi;
+  half_mode = V16HImode;
+  trunc_mode = V16SImode;
+  break;
+case E_V4SImode:
+  gen_trunc = gen_truncv2div2si2;
+  gen_vec_set_lo = gen_vec_setv2di;
+  half_mode = V2SImode;
+  trunc_mode = V2DImode;
+  break;
+case E_V8SImode:
+  gen_trunc = gen_truncv4div4si2;
+  gen_vec_set_lo = gen_vec_set_lo_v8si;
+  half_mode = V4SImode;
+  trunc_mode = V4DImode;
+  break;
+case E_V16SImode:
+  gen_trunc = gen_truncv8div8si2;
+  gen_vec_set_lo = gen_vec_set_lo_v16si;
+  half_mode = V8SImode;
+  trunc_mode = V8DImode;
+  break;
+
+default:
+  break;
+}
+
+  if (gen_trunc == NULL)
+return false;
+
+  rtx op_half

Re: [PATCH] [i386] Introduce a scalar version of avx512f_vmscalef and adjust ldexp3 for it.

2021-08-11 Thread Hongtao Liu via Gcc-patches

On Thu, Aug 12, 2021 at 12:05 PM liuhongt  wrote:
>
> Hi:
>   This is the patch i'm going to checkin.
>   Bootstrapped and regtested on x86_64-linux-gnu{-m32,};
>
>
> 2021-08-12  Uros Bizjak  
>
> gcc/ChangeLog:
>
> PR target/98309
> * config/i386/i386.md (avx512f_scalef2): New
> define_insn.
> (ldexp3): Adjust for new define_insn.
> (UNSPEC_SCALEF): Move from sse.md.
> * config/i386/sse.md (UNSPEC_SCALEF): Move to i386.md.
> ---
>  gcc/config/i386/i386.md | 27 +++
>  gcc/config/i386/sse.md  |  1 -
>  2 files changed, 19 insertions(+), 9 deletions(-)
>
> diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
> index 56b09c566ed..4a8e8fea290 100644
> --- a/gcc/config/i386/i386.md
> +++ b/gcc/config/i386/i386.md
> @@ -125,6 +125,9 @@ (define_c_enum "unspec" [
>UNSPEC_RSQRT
>UNSPEC_PSADBW
>
> +  ;; For AVX512F support
> +  UNSPEC_SCALEF
> +
>;; Generic math support
>UNSPEC_COPYSIGN
>UNSPEC_XORSIGN
> @@ -17894,6 +17897,17 @@ (define_expand "expm12"
>DONE;
>  })
>
> +(define_insn "avx512f_scalef2"
> +  [(set (match_operand:MODEF 0 "register_operand" "=v")
> +   (unspec:MODEF
> + [(match_operand:MODEF 1 "register_operand" "v")
> +  (match_operand:MODEF 2 "nonimmediate_operand" "vm")]
> + UNSPEC_SCALEF))]
> +  "TARGET_AVX512F"
> +  "vscalef\t{%2, %1, %0|%0, %1, %2}"
> +  [(set_attr "prefix" "evex")
> +   (set_attr "mode"  "")])
> +
>  (define_expand "ldexpxf3"
>[(match_operand:XF 0 "register_operand")
> (match_operand:XF 1 "register_operand")
> @@ -17924,15 +17938,12 @@ (define_expand "ldexp3"
>if (TARGET_AVX512F && TARGET_SSE_MATH)
> {
>   rtx op2 = gen_reg_rtx (mode);
> - emit_insn (gen_floatsi2 (op2, operands[2]));
> - operands[0] = lowpart_subreg (mode, operands[0], 
> mode);
> - if (MEM_P (operands[1]))
> +
> + if (!nonimmediate_operand (operands[1], mode))
> operands[1] = force_reg (mode, operands[1]);
> - operands[1] = lowpart_subreg (mode, operands[1], 
> mode);
> - op2 = lowpart_subreg (mode, op2, mode);
> - emit_insn (gen_avx512f_vmscalef (operands[0],
> -  operands[1],
> -  op2));
> +
> + emit_insn (gen_floatsi2 (op2, operands[2]));
> + emit_insn (gen_avx512f_scalef2 (operands[0], operands[1], op2));
> }
>else
>  {
> diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
> index 3957c86c3df..9233dfc6150 100644
> --- a/gcc/config/i386/sse.md
> +++ b/gcc/config/i386/sse.md
> @@ -92,7 +92,6 @@ (define_c_enum "unspec" [
>UNSPEC_RCP14
>UNSPEC_RSQRT14
>UNSPEC_FIXUPIMM
> -  UNSPEC_SCALEF
>UNSPEC_VTERNLOG
>UNSPEC_GETEXP
>UNSPEC_GETMANT
> --
> 2.18.1
>

Please ignore this, have replied in another thread.

-- 
BR,
Hongtao

Re: [PATCH] Extend ldexp{s, d}f3 to vscalefs{s, d} when TARGET_AVX512F and TARGET_SSE_MATH.

2021-08-11 Thread Hongtao Liu via Gcc-patches

On Wed, Aug 11, 2021 at 7:16 PM Uros Bizjak  wrote:
>
> On Wed, Aug 11, 2021 at 8:36 AM Uros Bizjak  wrote:
> >
> > On Tue, Aug 10, 2021 at 2:13 PM liuhongt  wrote:
> > >
> > > Hi:
> > >   AVX512F supported vscalefs{s,d} which is the same as ldexp except the 
> > > second operand should be floating point.
> > >   Bootstrapped and regtested on x86_64-linux-gnu{-m32,}.
> > >
> > > gcc/ChangeLog:
> > >
> > > PR target/98309
> > > * config/i386/i386.md (ldexp3): Extend to vscalefs[sd]
> > > when TARGET_AVX512F and TARGET_SSE_MATH.
> > >
> > > gcc/testsuite/ChangeLog:
> > >
> > > PR target/98309
> > > * gcc.target/i386/pr98309-1.c: New test.
> > > * gcc.target/i386/pr98309-2.c: New test.
> >
> > OK.
>
> Actually, we should introduce a scalar version of avx512f_vmscalef, so
> we can avoid all subreg conversions with the vector-merge (VM)
> version, and will also allow memory in operand 2.
>
> Please test the attached incremental patch.
>
Bootstrapped and regtested on x86_64-linux-gnu{-m32,} on CLX.
tests is fine.
> Uros.



-- 
BR,
Hongtao

[PATCH] [i386] Introduce a scalar version of avx512f_vmscalef and adjust ldexp3 for it.

2021-08-11 Thread liuhongt via Gcc-patches

Hi:
  This is the patch i'm going to checkin.
  Bootstrapped and regtested on x86_64-linux-gnu{-m32,};


2021-08-12  Uros Bizjak  

gcc/ChangeLog:

PR target/98309
* config/i386/i386.md (avx512f_scalef2): New
define_insn.
(ldexp3): Adjust for new define_insn.
(UNSPEC_SCALEF): Move from sse.md.
* config/i386/sse.md (UNSPEC_SCALEF): Move to i386.md.
---
 gcc/config/i386/i386.md | 27 +++
 gcc/config/i386/sse.md  |  1 -
 2 files changed, 19 insertions(+), 9 deletions(-)

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 56b09c566ed..4a8e8fea290 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -125,6 +125,9 @@ (define_c_enum "unspec" [
   UNSPEC_RSQRT
   UNSPEC_PSADBW
 
+  ;; For AVX512F support
+  UNSPEC_SCALEF
+
   ;; Generic math support
   UNSPEC_COPYSIGN
   UNSPEC_XORSIGN
@@ -17894,6 +17897,17 @@ (define_expand "expm12"
   DONE;
 })
 
+(define_insn "avx512f_scalef2"
+  [(set (match_operand:MODEF 0 "register_operand" "=v")
+   (unspec:MODEF
+ [(match_operand:MODEF 1 "register_operand" "v")
+  (match_operand:MODEF 2 "nonimmediate_operand" "vm")]
+ UNSPEC_SCALEF))]
+  "TARGET_AVX512F"
+  "vscalef\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "prefix" "evex")
+   (set_attr "mode"  "")])
+
 (define_expand "ldexpxf3"
   [(match_operand:XF 0 "register_operand")
(match_operand:XF 1 "register_operand")
@@ -17924,15 +17938,12 @@ (define_expand "ldexp3"
   if (TARGET_AVX512F && TARGET_SSE_MATH)
{
  rtx op2 = gen_reg_rtx (mode);
- emit_insn (gen_floatsi2 (op2, operands[2]));
- operands[0] = lowpart_subreg (mode, operands[0], mode);
- if (MEM_P (operands[1]))
+
+ if (!nonimmediate_operand (operands[1], mode))
operands[1] = force_reg (mode, operands[1]);
- operands[1] = lowpart_subreg (mode, operands[1], mode);
- op2 = lowpart_subreg (mode, op2, mode);
- emit_insn (gen_avx512f_vmscalef (operands[0],
-  operands[1],
-  op2));
+
+ emit_insn (gen_floatsi2 (op2, operands[2]));
+ emit_insn (gen_avx512f_scalef2 (operands[0], operands[1], op2));
}
   else
 {
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 3957c86c3df..9233dfc6150 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -92,7 +92,6 @@ (define_c_enum "unspec" [
   UNSPEC_RCP14
   UNSPEC_RSQRT14
   UNSPEC_FIXUPIMM
-  UNSPEC_SCALEF
   UNSPEC_VTERNLOG
   UNSPEC_GETEXP
   UNSPEC_GETMANT
-- 
2.18.1

Re: [PATCH] Fix loop split incorrect count and probability

2021-08-11 Thread Xionghu Luo via Gcc-patches





On 2021/8/11 17:16, Richard Biener wrote:

On Wed, 11 Aug 2021, Xionghu Luo wrote:




On 2021/8/10 22:47, Richard Biener wrote:

On Mon, 9 Aug 2021, Xionghu Luo wrote:


Thanks,

On 2021/8/6 19:46, Richard Biener wrote:

On Tue, 3 Aug 2021, Xionghu Luo wrote:


loop split condition is moved between loop1 and loop2, the split bb's
count and probability should also be duplicated instead of (100% vs INV),
secondly, the original loop1 and loop2 count need be propotional from the
original loop.


diff base/loop-cond-split-1.c.151t.lsplit  
patched/loop-cond-split-1.c.151t.lsplit:
...
  int prephitmp_16;
  int prephitmp_25;

   [local count: 118111600]:
  if (n_7(D) > 0)
goto ; [89.00%]
  else
goto ; [11.00%]

   [local count: 118111600]:
  return;

   [local count: 105119324]:
  pretmp_3 = ga;

-   [local count: 955630225]:
+   [local count: 315357973]:
  # i_13 = PHI 
  # prephitmp_12 = PHI 
  if (prephitmp_12 != 0)
goto ; [33.00%]
  else
goto ; [67.00%]

-   [local count: 315357972]:
+   [local count: 104068130]:
  _2 = do_something ();
  ga = _2;

-   [local count: 955630225]:
+   [local count: 315357973]:
  # prephitmp_5 = PHI 
  i_10 = inc (i_13);
  if (n_7(D) > i_10)
goto ; [89.00%]
  else
goto ; [11.00%]

   [local count: 105119324]:
  goto ; [100.00%]

-   [local count: 850510901]:
+   [local count: 280668596]:
  if (prephitmp_12 != 0)
-goto ; [100.00%]
+goto ; [33.00%]
  else
-goto ; [INV]
+goto ; [67.00%]

-   [local count: 850510901]:
+   [local count: 280668596]:
  goto ; [100.00%]

-   [count: 0]:
+   [local count: 70429947]:
  # i_23 = PHI 
  # prephitmp_25 = PHI 

-   [local count: 955630225]:
+   [local count: 640272252]:
  # i_15 = PHI 
  # prephitmp_16 = PHI 
  i_22 = inc (i_15);
  if (n_7(D) > i_22)
goto ; [89.00%]
  else
goto ; [11.00%]

-   [local count: 850510901]:
+   [local count: 569842305]:
  goto ; [100.00%]

}

gcc/ChangeLog:

* tree-ssa-loop-split.c (split_loop): Fix incorrect probability.
(do_split_loop_on_cond): Likewise.
---
gcc/tree-ssa-loop-split.c | 16 
1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/gcc/tree-ssa-loop-split.c b/gcc/tree-ssa-loop-split.c
index 3a09bbc39e5..8e5a7ded0f7 100644
--- a/gcc/tree-ssa-loop-split.c
+++ b/gcc/tree-ssa-loop-split.c
@@ -583,10 +583,10 @@ split_loop (class loop *loop1)
basic_block cond_bb;


if (!initial_true)
- cond = fold_build1 (TRUTH_NOT_EXPR, boolean_type_node, cond);
+ cond = fold_build1 (TRUTH_NOT_EXPR, boolean_type_node, cond);
+
+   edge true_edge = EDGE_SUCC (bbs[i], 0)->flags & EDGE_TRUE_VALUE
+  ? EDGE_SUCC (bbs[i], 0)
+  : EDGE_SUCC (bbs[i], 1);


	class loop *loop2 = loop_version (loop1, cond, _bb,

-  profile_probability::always (),
-  profile_probability::always (),
-  profile_probability::always (),
-  profile_probability::always (),
+  true_edge->probability,
+  true_edge->probability.invert (),
+  true_edge->probability,
+  true_edge->probability.invert (),
   true);


there is no 'true_edge' variable at this point.


Sorry, missed the above hunk when split the patch.




gcc_assert (loop2);

@@ -1486,10 +1486,10 @@ do_split_loop_on_cond (struct loop *loop1, edge invar_branch)

  initialize_original_copy_tables ();

  struct loop *loop2 = loop_version (loop1, boolean_true_node, NULL,

-profile_probability::always (),
-profile_probability::never (),
-profile_probability::always (),
-profile_probability::always (),
+invar_branch->probability.invert (),
+invar_branch->probability,
+invar_branch->probability.invert (),
+invar_branch->probability,
 true);
  if (!loop2)
{


The patch introduction seems to talk about do_split_loop_on_cond only.


split_loop faces similar issue though it sets the two branches to 100% vs 100%
and no scaling which seems also incorrect.


Since loop versioning inserts a condition with the passed probabilities
but in this case a 'boolean_true_node' condition the then and else
probabilities passed look correct.  It's just the scaling

Re: [patch] Make -no-pie option work for native Windows

2021-08-11 Thread Jonathan Yong via Gcc-patches


On 8/11/21 2:21 PM, Eric Botcazou wrote:

Hi,

as already mentioned on the list, binutils 2.36 generates PIE executables by
default on native Windows (because --dynamicbase is the default) so it makes
sense to have a simple way to counter that and -no-pie seems appropriate,
all the more so that it is automatically passed when building the compiler.

Bootstrapped on x86 and x86-64/Windows, w/ and w/o binutils 2.36, OK for the
mainline and 11 branch?


2021-08-11  Eric Botcazou  

* configure.ac (PE linker --disable-dynamicbase support): New check.
* configure: Regenerate.
* config.in: Likewise.
* config/i386/mingw32.h (LINK_SPEC_DISABLE_DYNAMICBASE): New define.
(LINK_SPEC): Use it.
* config/i386/mingw-w64.h (LINK_SPEC_DISABLE_DYNAMICBASE): Likewise.
(LINK_SPEC): Likewise.



Looks good to me. Do you have push permissions?


OpenPGP_0x713B5FE29C145D45.asc
Description: OpenPGP public key


OpenPGP_signature
Description: OpenPGP digital signature

Re: [PATCH] rs6000: Make some BIFs vectorized on P10

2021-08-11 Thread Kewen.Lin via Gcc-patches

Hi Bill,

Thanks for your prompt review!

on 2021/8/12 上午12:34, Bill Schmidt wrote:
> Hi Kewen,
> 
> FWIW, it's easier on reviewers if you include the patch inline instead of as 
> an attachment.
> 
> On 8/11/21 1:56 AM, Kewen.Lin wrote:
>> Hi,
>>
>> This patch is to add the support to make vectorizer able to
>> vectorize scalar version of some built-in functions with its
>> corresponding vector version with Power10 support.
>>
>> Bootstrapped & regtested on powerpc64le-linux-gnu {P9,P10}
>> and powerpc64-linux-gnu P8.
>>
>> Is it ok for trunk?
>>
>> BR,
>> Kewen
>> -
>> gcc/ChangeLog:
>>
>>  * config/rs6000/rs6000.c (rs6000_builtin_md_vectorized_function): Add
>>  support for some built-in functions vectorized on Power10.
>>
>> gcc/testsuite/ChangeLog:
>>
>>  * gcc.target/powerpc/dive-vectorize-1.c: New test.
>>  * gcc.target/powerpc/dive-vectorize-1.h: New test.
>>  * gcc.target/powerpc/dive-vectorize-2.c: New test.
>>  * gcc.target/powerpc/dive-vectorize-2.h: New test.
>>  * gcc.target/powerpc/dive-vectorize-run-1.c: New test.
>>  * gcc.target/powerpc/dive-vectorize-run-2.c: New test.
>>  * gcc.target/powerpc/p10-bifs-vectorize-1.c: New test.
>>  * gcc.target/powerpc/p10-bifs-vectorize-1.h: New test.
>>  * gcc.target/powerpc/p10-bifs-vectorize-run-1.c: New test.
> 
> ---
>  gcc/config/rs6000/rs6000.c| 55 +++
>  .../gcc.target/powerpc/dive-vectorize-1.c | 11 
>  .../gcc.target/powerpc/dive-vectorize-1.h | 22 
>  .../gcc.target/powerpc/dive-vectorize-2.c | 12 
>  .../gcc.target/powerpc/dive-vectorize-2.h | 22 
>  .../gcc.target/powerpc/dive-vectorize-run-1.c | 52 ++
>  .../gcc.target/powerpc/dive-vectorize-run-2.c | 53 ++
>  .../gcc.target/powerpc/p10-bifs-vectorize-1.c | 15 +
>  .../gcc.target/powerpc/p10-bifs-vectorize-1.h | 40 ++
>  .../powerpc/p10-bifs-vectorize-run-1.c| 45 +++
>  10 files changed, 327 insertions(+)
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/dive-vectorize-1.c
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/dive-vectorize-1.h
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/dive-vectorize-2.c
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/dive-vectorize-2.h
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/dive-vectorize-run-1.c
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/dive-vectorize-run-2.c
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/p10-bifs-vectorize-1.c
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/p10-bifs-vectorize-1.h
>  create mode 100644 
> gcc/testsuite/gcc.target/powerpc/p10-bifs-vectorize-run-1.c
> 
> diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
> index 279f00cc648..3eac1d05101 100644
> --- a/gcc/config/rs6000/rs6000.c
> +++ b/gcc/config/rs6000/rs6000.c
> @@ -5785,6 +5785,61 @@ rs6000_builtin_md_vectorized_function (tree fndecl, 
> tree type_out,
>  default:
>break;
>  }
> +
> +  machine_mode in_vmode = TYPE_MODE (type_in);
> +  machine_mode out_vmode = TYPE_MODE (type_out);
> +
> +  /* Power10 supported vectorized built-in functions.  */
> +  if (TARGET_POWER10
> +  && in_vmode == out_vmode
> +  && VECTOR_UNIT_ALTIVEC_OR_VSX_P (in_vmode))
> +{
> +  machine_mode exp_mode = DImode;
> +  machine_mode exp_vmode = V2DImode;
> +  enum rs6000_builtins vname = RS6000_BUILTIN_COUNT;
> 
> Using this as a flag value looks unnecessary.  Is this just being done to 
> silence a warning?
> 

Good question!  I didn't notice there is a warning or not, just get used to 
initializing variable
with one suitable value if possible.  If you don't mind, may I still keep it?  
Since if some
future codes use vname in a path where it's not assigned, one explicitly wrong 
enum (bif) seems
better than a random one.  Or will this mentioned possibility definitely never 
happen since the
current uninitialized variables detection and warning scheme is robust and 
should not worry about
that completely?

> +  switch (fn)
> + {
> + case MISC_BUILTIN_DIVWE:
> + case MISC_BUILTIN_DIVWEU:
> +   exp_mode = SImode;
> +   exp_vmode = V4SImode;
> +   if (fn == MISC_BUILTIN_DIVWE)
> + vname = P10V_BUILTIN_DIVES_V4SI;
> +   else
> + vname = P10V_BUILTIN_DIVEU_V4SI;
> +   break;
> + case MISC_BUILTIN_DIVDE:
> + case MISC_BUILTIN_DIVDEU:
> +   if (fn == MISC_BUILTIN_DIVDE)
> + vname = P10V_BUILTIN_DIVES_V2DI;
> +   else
> + vname = P10V_BUILTIN_DIVEU_V2DI;
> +   break;
> + case P10_BUILTIN_CFUGED:
> +   vname = P10V_BUILTIN_VCFUGED;
> +   break;
> + case P10_BUILTIN_CNTLZDM:
> +   vname = P10V_BUILTIN_VCLZDM;
> +   break;
> + case P10_BUILTIN_CNTTZDM:
> +   vname = P10V_BUILTIN_VCTZDM;
> +   break;
> + case P10_BUILTIN_PDEPD:
> +   vname = P10V_BUILTIN_VPDEPD;
> +

Re: [PATCH] c++: constexpr std::construct_at on empty field [PR101663]

2021-08-11 Thread Jason Merrill via Gcc-patches


On 8/3/21 4:04 PM, Patrick Palka wrote:

Here during constexpr evaluation of

   std::construct_at(_M_value)

we find ourselves in cxx_eval_store_expression where the target object
is 'a._M_value' and the initializer is {}.  Since _M_value is an empty
[[no_unique_address]] member we don't create a sub-CONSTRUCTOR for it,
so we end up in the early exit code path for empty stores with mismatched
types and we trip over the assert therein

   gcc_assert (is_empty_class (TREE_TYPE (init)) && !lval);

because lval is true.  The reason it's true is because the INIT_EXPR in
question is the LHS of a COMPOUND_EXPR, and evaluation of the LHS is
always performed with lval=true for some reason.  This is the case ever
since r5-5900, before which we used to do the evaluation with
lval=false.

I'm not sure why we evaluate the LHS of a COMPOUND_EXPR with lval=true


Because there's no lvalue-rvalue conversion. We could change that bool 
to be a tri-value enum that also includes discarded-value expressions 
such as this, but that hasn't seemed necessary.



(changing it to false survives bootstrap+regtest and is sufficient to
fix the PR), but regardless it's also straightforward enough to make the
relevant code path in cxx_eval_store_expression handle lval=true, which
is the approach this patch takes.

This patch also consolidates the duplicate implementations of
std::construct_at/destroy_at from some of the C++20 constexpr tests into
a common header file.

Bootstrapped and regtested on x86_64-pc-linux-gnu, does this look OK for
trunk/11?

PR c++/101663

gcc/cp/ChangeLog:

* constexpr.c (cxx_eval_store_expression): In the early exit
code path for mismatched types,
Pass false instead of true for lval when evaluating the LHS.

gcc/testsuite/ChangeLog:

* g++.dg/cpp2a/construct_at.h: New convenience header that
defines minimal implementations of std::construct_at/destroy_at,
split out from ...
* g++.dg/cpp2a/constexpr-new5.C: ... here.
* g++.dg/cpp2a/constexpr-new6.C: Use the header.
* g++.dg/cpp2a/constexpr-new14.C: Likewise.
* g++.dg/cpp2a/constexpr-new20.C: New test.
---
  gcc/cp/constexpr.c   |  4 +-
  gcc/testsuite/g++.dg/cpp2a/constexpr-new14.C | 60 +-
  gcc/testsuite/g++.dg/cpp2a/constexpr-new20.C | 18 ++
  gcc/testsuite/g++.dg/cpp2a/constexpr-new5.C  | 60 +-
  gcc/testsuite/g++.dg/cpp2a/constexpr-new6.C  | 64 +---
  gcc/testsuite/g++.dg/cpp2a/construct_at.h| 62 +++
  6 files changed, 85 insertions(+), 183 deletions(-)
  create mode 100644 gcc/testsuite/g++.dg/cpp2a/constexpr-new20.C
  create mode 100644 gcc/testsuite/g++.dg/cpp2a/construct_at.h

diff --git a/gcc/cp/constexpr.c b/gcc/cp/constexpr.c
index 1af365d47b9..25d84a377d8 100644
--- a/gcc/cp/constexpr.c
+++ b/gcc/cp/constexpr.c
@@ -5588,8 +5588,8 @@ cxx_eval_store_expression (const constexpr_ctx *ctx, tree 
t,
   argument, which has the derived type rather than the base type.  In
   this situation, just evaluate the initializer and return, since
   there's no actual data to store.  */
- gcc_assert (is_empty_class (TREE_TYPE (init)) && !lval);
- return init;
+ gcc_assert (is_empty_class (TREE_TYPE (init)));
+ return lval ? target : init;
}
CONSTRUCTOR_ELTS (*valp) = CONSTRUCTOR_ELTS (init);
TREE_CONSTANT (*valp) = TREE_CONSTANT (init);
diff --git a/gcc/testsuite/g++.dg/cpp2a/constexpr-new14.C 
b/gcc/testsuite/g++.dg/cpp2a/constexpr-new14.C
index fd6f6075ef0..26037397b1d 100644
--- a/gcc/testsuite/g++.dg/cpp2a/constexpr-new14.C
+++ b/gcc/testsuite/g++.dg/cpp2a/constexpr-new14.C
@@ -1,65 +1,7 @@
  // PR c++/97195
  // { dg-do compile { target c++20 } }
  
-namespace std

-{
-  typedef __SIZE_TYPE__ size_t;
-
-  template 
-  struct allocator
-  {
-constexpr allocator () noexcept {}
-
-constexpr T *allocate (size_t n)
-{ return static_cast (::operator new (n * sizeof(T))); }
-
-constexpr void
-deallocate (T *p, size_t n)
-{ ::operator delete (p); }
-  };
-
-  template 
-  U __declval (int);
-  template 
-  T __declval (long);
-  template 
-  auto declval () noexcept -> decltype (__declval (0));
-
-  template 
-  struct remove_reference
-  { typedef T type; };
-  template 
-  struct remove_reference
-  { typedef T type; };
-  template 
-  struct remove_reference
-  { typedef T type; };
-
-  template 
-  constexpr T &&
-  forward (typename std::remove_reference::type ) noexcept
-  { return static_cast (t); }
-
-  template
-  constexpr T &&
-  forward (typename std::remove_reference::type &) noexcept
-  { return static_cast (t); }
-
-  template 
-  constexpr auto
-  construct_at (T *l, A &&... a)
-  noexcept (noexcept (::new ((void *) 0) T (std::declval ()...)))
-  -> decltype (::new ((void *) 0) T (std::declval ()...))
-  { return ::new ((void *) l) T (std::forward (a)...); }
-
-

[committed] libstdc++: Fix test that fails randomly [PR101866]

2021-08-11 Thread Jonathan Wakely via Gcc-patches

This test assumes that the same sequence of three values cannot occur,
which is incorect. It's unlikely, but not impossible.

Perform the check in a loop, so that in the unlikely event of an
identical sequence, we retry. If the library code is buggy it will keep
producing the same sequence and the test will time out. If the code is
working correctly then we will usually break out of the loop after one
iteration, or very rarely after two or three.

libstdc++-v3/ChangeLog:

PR libstdc++/101866
* testsuite/experimental/random/randint.cc: Loop and retry if
reseed() produces the same sequence.

Tested x86_64-linux. Committed to trunk. I'll backport too.

commit 93f1dbc7cdcc4b31ea4061efb4c2acf2d4f81eb8
Author: Jonathan Wakely 
Date:   Wed Aug 11 22:11:19 2021

libstdc++: Fix test that fails randomly [PR101866]

This test assumes that the same sequence of three values cannot occur,
which is incorect. It's unlikely, but not impossible.

Perform the check in a loop, so that in the unlikely event of an
identical sequence, we retry. If the library code is buggy it will keep
producing the same sequence and the test will time out. If the code is
working correctly then we will usually break out of the loop after one
iteration, or very rarely after two or three.

libstdc++-v3/ChangeLog:

PR libstdc++/101866
* testsuite/experimental/random/randint.cc: Loop and retry if
reseed() produces the same sequence.

diff --git a/libstdc++-v3/testsuite/experimental/random/randint.cc 
b/libstdc++-v3/testsuite/experimental/random/randint.cc
index d6225eba1df..e05151e5ea0 100644
--- a/libstdc++-v3/testsuite/experimental/random/randint.cc
+++ b/libstdc++-v3/testsuite/experimental/random/randint.cc
@@ -34,7 +34,7 @@ test01()
   }
 
   std::experimental::reseed(99u);
-  const long n1[] = {
+  const int n1[] = {
 std::experimental::randint(0, 100),
 std::experimental::randint(0, 100),
 std::experimental::randint(0, 100),
@@ -42,7 +42,7 @@ test01()
 std::experimental::randint(0, 100)
   };
   std::experimental::reseed(99u);
-  const long n2[] = {
+  const int n2[] = {
 std::experimental::randint(0, 100),
 std::experimental::randint(0, 100),
 std::experimental::randint(0, 100),
@@ -52,13 +52,13 @@ test01()
   for (int i = 0; i < 5; ++i)
 VERIFY( n1[i] == n2[i] );
 
-  std::experimental::reseed();
-  const long n3[] = {
-std::experimental::randint(0, 100),
-std::experimental::randint(0, 100),
-std::experimental::randint(0, 100)
-  };
-  VERIFY( !(n3[0] == n1[0] && n3[1] == n1[1] && n3[2] == n1[2]) );
+  do
+  {
+std::experimental::reseed();
+  }
+  while (std::experimental::randint(0, 100) == n1[0]
+  && std::experimental::randint(0, 100) == n1[1]
+  && std::experimental::randint(0, 100) == n1[2]);
 }
 
 void

Re: [PATCH] c++: suppress all warnings on memper pointers to work around dICE [PR101219]

2021-08-11 Thread Sergei Trofimovich via Gcc-patches

On Wed, 11 Aug 2021 15:19:58 -0400
Jason Merrill  wrote:

> On 8/6/21 11:34 AM, Sergei Trofimovich wrote:
> > On Thu, 29 Jul 2021 11:41:39 -0400
> > Jason Merrill  wrote:
> >   
> >> On 7/22/21 7:15 PM, Sergei Trofimovich wrote:  
> >>> From: Sergei Trofimovich 
> >>>
> >>> r12-1804 ("cp: add support for per-location warning groups.") among other
> >>> things removed warning suppression from a few places including 
> >>> ptrmemfuncs.
> >>>
> >>> Currently ptrmemfuncs don't have valid BINFO attached which causes ICEs
> >>> in access checks:
> >>>
> >>>   crash_signal
> >>>   gcc/toplev.c:328
> >>>   perform_or_defer_access_check(tree_node*, tree_node*, tree_node*, 
> >>> int, access_failure_info*)
> >>>   gcc/cp/semantics.c:490
> >>>   finish_non_static_data_member(tree_node*, tree_node*, tree_node*)
> >>>   gcc/cp/semantics.c:2208
> >>>   ...
> >>>
> >>> The change suppresses warnings again until we provide BINFOs for 
> >>> ptrmemfuncs.  
> >>
> >> We don't need BINFOs for PMFs, we need to avoid paths that expect them.
> >>
> >> It looks like the problem is with tsubst_copy_and_build calling
> >> finish_non_static_data_member instead of build_ptrmemfunc_access_expr.  
> > 
> > Sounds good. I'm not sure what would be the best way to match it. Here is
> > my attempt seems to survive all regtests:
> > 
> > --- a/gcc/cp/pt.c
> > +++ b/gcc/cp/pt.c
> > @@ -20530,7 +20530,13 @@ tsubst_copy_and_build (tree t,
> >  if (member == error_mark_node)
> >RETURN (error_mark_node);
> > 
> > -   if (TREE_CODE (member) == FIELD_DECL)
> > +   if (object_type && TYPE_PTRMEMFUNC_P(object_type)
> > +   && TREE_CODE (member) == FIELD_DECL)
> > + {
> > +   r = build_ptrmemfunc_access_expr (object, DECL_NAME(member));
> > +   RETURN (r);
> > + }
> > +   else if (TREE_CODE (member) == FIELD_DECL)
> >{
> >  r = finish_non_static_data_member (member, object, NULL_TREE);
> >  if (TREE_CODE (r) == COMPONENT_REF)
> >   
> >>>   PR c++/101219
> >>>
> >>> gcc/cp/ChangeLog:
> >>>
> >>>   * typeck.c (build_ptrmemfunc_access_expr): Suppress all warnings
> >>>   to avoid ICE.
> >>>
> >>> gcc/testsuite/ChangeLog:
> >>>
> >>>   * g++.dg/torture/pr101219.C: New test.  
> >>
> >> This doesn't need to be in torture; it has nothing to do with 
> >> optimization.  
> > 
> > Aha, moved to gcc/testsuite/g++.dg/warn/pr101219.C.
> > 
> > --- /dev/null
> > +++ b/gcc/testsuite/g++.dg/warn/pr101219.C
> > @@ -0,0 +1,11 @@
> > +/* PR c++/101219 - ICE on use of uninitialized memfun pointer
> > +   { dg-do compile }
> > +   { dg-options "-Wall" } */
> > +
> > +struct S { void m(); };
> > +
> > +template  bool f() {
> > +  void (S::*mp)();
> > +
> > +  return ::m == mp; // no warning emitted here (no instantiation)
> > +}
> > 
> > Another question: Is it expected that gcc generates no warnings here?
> > It's an uninstantiated function (-1 for warn), but from what I
> > understand it's guaranteed to generate comparison with uninitialized
> > data if it ever gets instantiated. Given that we used to ICE in
> > warning code gcc could possibly flag it? (+1 for warn)  
> 
> Generally it's desirable to diagnose templates for which no valid 
> instantiation is possible.  It seems reasonable in most cases to also 
> warn about templates for which all instantiations would warn.
> 
> But uninitialized warnings rely on flow analysis that we only do on 
> instantiated functions, and in any case the ICE doesn't depend on mp 
> being uninitialized; I get the same crash if I add = 0 to the declaration.

Aha. That makes sense. Let's just fix ICE then.

> > +   if (object_type && TYPE_PTRMEMFUNC_P(object_type)  
> 
> Missing space before (.
> 
> > +   && TREE_CODE (member) == FIELD_DECL)
> > + {
> > +   r = build_ptrmemfunc_access_expr (object, DECL_NAME(member));  
> 
> And here.

Added both. Attached as v3.

-- 

  Sergei
>From dbb17a22383faa7837bdd2ea9c902bfab53fa8f2 Mon Sep 17 00:00:00 2001
From: Sergei Trofimovich 
Date: Fri, 6 Aug 2021 16:14:16 +0100
Subject: [PATCH v3] c++: fix ptrmemfunc template instantiation [PR101219]

r12-1804 ("cp: add support for per-location warning groups.") among other
things removed warning suppression from a few places including ptrmemfuncs.

This exposed a bug in warning detection code as a reference to missing
BINFO (it's intentionally missing for ptrmemfunc types):

crash_signal
gcc/toplev.c:328
perform_or_defer_access_check(tree_node*, tree_node*, tree_node*, int, access_failure_info*)
gcc/cp/semantics.c:490
finish_non_static_data_member(tree_node*, tree_node*, tree_node*)
gcc/cp/semantics.c:2208
...

The change special cases ptrmemfuncs in templace substitution by using
build_ptrmemfunc_access_expr() instead of finish_non_static_data_member().

PR c++/101219

gcc/cp/ChangeLog:

* pt.c (tsubst_copy_and_build): Use

gfortran.dg/PR82376.f90: Avoid matching a file-path.

2021-08-11 Thread Hans-Peter Nilsson via Gcc-patches

I had a file-path to sources with the substring "new" in it,
and (only) this test regressed compared to results from
another build without "new" in the name.

The test does
 ! { dg-final { scan-tree-dump-times "new" 4 "original" } }
i.e. the contents of the tree-dump-file .original needs to match
the undelimited string "new" exactly four times.  Very brittle.

In the dump-file, there are three lines with calls to new:
 D.908 = new ((integer(kind=4) *) data);
 integer(kind=4) * new (integer(kind=4) & data)
   static integer(kind=4) * new (integer(kind=4) &);

But, there's also a line, which for me and cris-elf looked like:
 _gfortran_runtime_error_at (&"At line 46 of file
  /X/xyzzynewfrob/gcc/testsuite/gfortran.dg/PR82376.f90"[1]{lb: 1 sz: 1},
  &"Pointer actual argument \'new\' is not associated"[1]{lb: 1 sz: 1});
The fourth match is obviously intended to match this line, but only
with *one* match, whereas the path can as above yield another hit.

With Tcl, the regexp for matching the " " *and* the "'"
*and* the "\" gets a bit unsightly, so I suggest just
matching the "new" calls, which according to the comment in
the test is the key point.  You can't have a file-path with
spaces and parentheses in a gcc build.  I'm also making use
of {} rather than "" needing one level of quoting; the "\("
is needed because the matched string is a regexp.

Ok to commit?

testsuite:
* gfortran.dg/PR82376.f90: Robustify match.
---
 gcc/testsuite/gfortran.dg/PR82376.f90 | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/gcc/testsuite/gfortran.dg/PR82376.f90 
b/gcc/testsuite/gfortran.dg/PR82376.f90
index 07143ab7e82e..b99779ce9d8a 100644
--- a/gcc/testsuite/gfortran.dg/PR82376.f90
+++ b/gcc/testsuite/gfortran.dg/PR82376.f90
@@ -2,7 +2,8 @@
 ! { dg-options "-fdump-tree-original -fcheck=pointer" }
 !
 ! Test the fix for PR82376. The pointer check was doubling up the call
-! to new. The fix reduces the count of 'new' from 5 to 4.
+! to new. The fix reduces the count of 'new' from 5 to 4, or to 3, when
+! counting only calls.
 !
 ! Contributed by JosÃ© Rui Faustino de Sousa  
 !
@@ -56,4 +57,4 @@ contains
   end subroutine set
 
 end program main_p
-! { dg-final { scan-tree-dump-times "new" 4 "original" } }
+! { dg-final { scan-tree-dump-times { new \(} 3 "original" } }
-- 
2.11.0

Re: [patch][version 6] add -ftrivial-auto-var-init and variable attribute "uninitialized" to gcc

2021-08-11 Thread Qing Zhao via Gcc-patches

Hi, 

I finally decided to take another approach to resolve this issue, it resolved 
all the potential issues with the “address taken” auto variable.

The basic idea is to avoid generating the temporary variable in the beginning. 
As you mentioned, "The reason is that alt_reloc is memory (because it is 
address taken) and that GIMPLE says that register typed stores 
need to use a is_gimple_val RHS which the call is not.”
In order to avoid generating the temporary variable for “address taken” auto 
variable, I updated the utility routine “is_gimple_val” as following:

diff --git a/gcc/gimple-expr.c b/gcc/gimple-expr.c
index a2563a45c37d..d5ef1aef8cea 100644
--- a/gcc/gimple-expr.c
+++ b/gcc/gimple-expr.c
@@ -787,8 +787,20 @@ is_gimple_reg (tree t)
   return !DECL_NOT_GIMPLE_REG_P (t);
 }
 
+/* Return true if T is a call to .DEFERRED_INIT internal function.  */ 
+static bool
+is_deferred_init_call (tree t)
+{
+  if (TREE_CODE (t) == CALL_EXPR
+  &&  CALL_EXPR_IFN (t) == IFN_DEFERRED_INIT)
+return true;
+  return false;
+}
+
 
-/* Return true if T is a GIMPLE rvalue, i.e. an identifier or a constant.  */
+/* Return true if T is a GIMPLE rvalue, i.e. an identifier or a constant,
+   or a call to .DEFERRED_INIT internal function because the call to
+   .DEFERRED_INIT will eventually be expanded as a constant.  */
 
 bool
 is_gimple_val (tree t)
@@ -799,7 +811,8 @@ is_gimple_val (tree t)
   && !is_gimple_reg (t))
 return false;
 
-  return (is_gimple_variable (t) || is_gimple_min_invariant (t));
+  return (is_gimple_variable (t) || is_gimple_min_invariant (t)
+ || is_deferred_init_call (t));
 }
 
With this change, the temporary variable will not be created for “address 
taken” auto variable, and uninitialized analysis does not need any change. 
Everything works well.

And I believe that treating “call to .DEFERRED_INIT” as “is_gimple_val” is 
reasonable since this call actually is a constant. 

Let me know if you have any objection on this solution.

thanks.

Qing

> On Aug 11, 2021, at 3:30 PM, Qing Zhao via Gcc-patches 
>  wrote:
> 
> Hi, 
> 
> I met another issue for “address taken” auto variable, see below for details:
> 
>  the testing case: (gcc/testsuite/gcc.dg/uninit-16.c)
> 
> int foo, bar;
> 
> static
> void decode_reloc(int reloc, int *is_alt)
> {
>  if (reloc >= 20)
>  *is_alt = 1;
>  else if (reloc >= 10)
>  *is_alt = 0;
> }
> 
> void testfunc()
> {
>  int alt_reloc;
> 
>  decode_reloc(foo, _reloc);
> 
>  if (alt_reloc) /* { dg-warning "may be used uninitialized" } */
>bar = 42;
> }
> 
> When compiled with -ftrivial-auto-var-init=zero -O2 -Wuninitialized 
> -fdump-tree-all:
> 
> .*gimple dump:
> 
> void testfunc ()
> { 
>  int alt_reloc;
> 
>  try
>{
>  _1 = .DEFERRED_INIT (4, 2, 0);
>  alt_reloc = _1;
>  foo.0_2 = foo;
>  decode_reloc (foo.0_2, _reloc);
>  alt_reloc.1_3 = alt_reloc;
>  if (alt_reloc.1_3 != 0) goto ; else goto ;
>  :
>  bar = 42;
>  :
>}
>  finally
>{
>  alt_reloc = {CLOBBER};
>}
> }
> 
> **fre1 dump:
> 
> void testfunc ()
> {
>  int alt_reloc;
>  int _1;
>  int foo.0_2;
> 
>   :
>  _1 = .DEFERRED_INIT (4, 2, 0);
>  foo.0_2 = foo;
>  if (foo.0_2 > 19)
>goto ; [50.00%]
>  else
>goto ; [50.00%]
> 
>   :
>  goto ; [100.00%]
> 
>   :
>  if (foo.0_2 > 9)
>goto ; [50.00%]
>  else
>goto ; [50.00%]
> 
>   :
>  goto ; [100.00%]
> 
>   :
>  if (_1 != 0)
>goto ; [INV]
>  else
>goto ; [INV]
> 
>   :
>  bar = 42;
> 
>   :
>  return;
> 
> }
> 
> From the above IR file after “FRE”, we can see that the major issue with this 
> IR is:
> 
> The address taken auto variable “alt_reloc” has been completely replaced by 
> the temporary variable “_1” in all
> the uses of the original “alt_reloc”. 
> 
> The major problem with such IR is,  during uninitialized analysis phase, the 
> original use of “alt_reloc” disappeared completely.
> So, the warning cannot be reported.
> 
> 
> My questions:
> 
> 1. Is it possible to get the original “alt_reloc” through the temporary 
> variable “_1” with some available information recorded in the IR?
> 2. If not, then we have to record the relationship between “alt_reloc” and 
> “_1” when the original “alt_reloc” is replaced by “_1” and get such 
> relationship during
>Uninitialized analysis phase.  Is this doable?
> 3. Looks like that for “address taken” auto variable, if we have to introduce 
> a new temporary variable and split the call to .DEFERRED_INIT into two:
> 
>  temp = .DEFERRED_INIT (4, 2, 0);
>  alt_reloc = temp;
> 
>   More issues might possible.
> 
> Any comments and suggestions on this issue?
> 
> Qing
> 
> j
>> On Aug 11, 2021, at 11:55 AM, Richard Biener  wrote:
>> 
>> On August 11, 2021 6:22:00 PM GMT+02:00, Qing Zhao  
>> wrote:
>>> 
>>> 
 On Aug 11, 2021, at 10:53 AM, Richard Biener  wrote:
 
 On August 11, 2021 5:30:40 PM GMT+02:00, Qing Zhao  
 wrote:
> I modified the

Re: [PATCH] c++: parameterized requires-expr as default argument [PR101725]

2021-08-11 Thread Jason Merrill via Gcc-patches


On 8/3/21 4:04 PM, Patrick Palka wrote:

Here we're rejecting the default template argument

   requires (T t) { x(t); }

because we consider the 't' in the requirement to be a local variable
(according to local_variable_p), and we generally forbid local variables
from appearing inside template arguments.  We can perhaps fix this by
giving special treatment to parameters introduced by requires-expressions,
but DR 2082 relaxed the restriction about local variables appearing inside
default arguments to permit them inside unevaluated operands thereof.
So this patch just implements DR 2082 which also fixes this PR since a
requires-expression is an unevaluated context.

Bootstrapped and regtested on x86_64-pc-linux-gnu, does this look OK for
trunk and perhaps 11?


OK for both.


PR c++/101725
DR 2082

gcc/cp/ChangeLog:

* cp-tree.h (unevaluated_p): Return true for REQUIRES_EXPR.
* decl.c (local_variable_p_walkfn): Don't walk into unevaluated
operands.
* parser.c (cp_parser_primary_expression) : Never
reject uses of local variables in unevaluated contexts.
* tree.c (cp_walk_subtrees) : Increment
cp_unevaluated_operand.  Use cp_walk_tree directly instead of
WALK_SUBTREE to avoid the goto.  Use REQUIRES_EXPR_REQS instead
of TREE_OPERAND directly.

gcc/testsuite/ChangeLog:

* g++.dg/DRs/dr2082.C: New test.
* g++.dg/cpp2a/concepts-uneval4.C: New test.
* g++.dg/cpp2a/concepts-uneval5.C: New test.
---
  gcc/cp/cp-tree.h  |  3 ++-
  gcc/cp/decl.c |  8 
  gcc/cp/parser.c   |  5 -
  gcc/cp/tree.c |  4 +++-
  gcc/testsuite/g++.dg/DRs/dr2082.C | 12 
  gcc/testsuite/g++.dg/cpp2a/concepts-uneval4.C | 12 
  6 files changed, 41 insertions(+), 3 deletions(-)
  create mode 100644 gcc/testsuite/g++.dg/DRs/dr2082.C
  create mode 100644 gcc/testsuite/g++.dg/cpp2a/concepts-uneval4.C

diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h
index 9a47a8787d6..6a8264b0c61 100644
--- a/gcc/cp/cp-tree.h
+++ b/gcc/cp/cp-tree.h
@@ -8494,7 +8494,8 @@ unevaluated_p (tree_code code)
return (code == DECLTYPE_TYPE
  || code == ALIGNOF_EXPR
  || code == SIZEOF_EXPR
- || code == NOEXCEPT_EXPR);
+ || code == NOEXCEPT_EXPR
+ || code == REQUIRES_EXPR);
  }
  
  /* RAII class to push/pop the access scope for T.  */

diff --git a/gcc/cp/decl.c b/gcc/cp/decl.c
index 6fa6b9adc87..b0b492360af 100644
--- a/gcc/cp/decl.c
+++ b/gcc/cp/decl.c
@@ -14270,6 +14270,14 @@ static tree
  local_variable_p_walkfn (tree *tp, int *walk_subtrees,
 void * /*data*/)
  {
+  if (unevaluated_p (TREE_CODE (*tp)))
+{
+  /* DR 2082 permits local variables in unevaluated contexts
+within a default argument.  */
+  *walk_subtrees = 0;
+  return NULL_TREE;
+}
+
if (local_variable_p (*tp)
&& (!DECL_ARTIFICIAL (*tp) || DECL_NAME (*tp) == this_identifier))
  return *tp;
diff --git a/gcc/cp/parser.c b/gcc/cp/parser.c
index 47bf7d9ad1f..8b551db2c8a 100644
--- a/gcc/cp/parser.c
+++ b/gcc/cp/parser.c
@@ -5971,7 +5971,10 @@ cp_parser_primary_expression (cp_parser *parser,
/* Check to see if DECL is a local variable in a context
   where that is forbidden.  */
if ((parser->local_variables_forbidden_p & LOCAL_VARS_FORBIDDEN)
-   && local_variable_p (decl))
+   && local_variable_p (decl)
+   /* DR 2082 permits local variables in unevaluated contexts
+  within a default argument.  */
+   && !cp_unevaluated_operand)
  {
const char *msg
  = (TREE_CODE (decl) == PARM_DECL
diff --git a/gcc/cp/tree.c b/gcc/cp/tree.c
index 8345396ec33..e8831b21802 100644
--- a/gcc/cp/tree.c
+++ b/gcc/cp/tree.c
@@ -5386,7 +5386,9 @@ cp_walk_subtrees (tree *tp, int *walk_subtrees_p, 
walk_tree_fn func,
// walk the parameter list. Doing so causes false
// positives in the pack expansion checker since the
// requires parameters are introduced as pack expansions.
-  WALK_SUBTREE (TREE_OPERAND (*tp, 1));
+  ++cp_unevaluated_operand;
+  result = cp_walk_tree (_EXPR_REQS (*tp), func, data, pset);
+  --cp_unevaluated_operand;
*walk_subtrees_p = 0;
break;
  
diff --git a/gcc/testsuite/g++.dg/DRs/dr2082.C b/gcc/testsuite/g++.dg/DRs/dr2082.C

new file mode 100644
index 000..84bb23f63f2
--- /dev/null
+++ b/gcc/testsuite/g++.dg/DRs/dr2082.C
@@ -0,0 +1,12 @@
+// DR 2082
+
+void f() {
+  int i;
+  extern void h(int x = sizeof(i));
+}
+
+class A {
+  void f(A* p = this) { } // { dg-error "this" }
+};
+
+int h(int a, int b = sizeof(a));
diff --git a/gcc/testsuite/g++.dg/cpp2a/concepts-uneval4.C 
b/gcc/testsuite/g++.dg/cpp2a/concepts-uneval4.C
new

Re: [Patch v3 Fortran] Fix c_float128 and c_float128_complex on targets with 128-bit long double.

2021-08-11 Thread Michael Meissner via Gcc-patches

On Wed, Aug 11, 2021 at 05:55:39AM -0500, Segher Boessenkool wrote:
> Hi!
> 
> On Tue, Aug 10, 2021 at 04:46:11PM -0600, Sandra Loosemore wrote:
> > OK.  I used your wording verbatim for the first one.  For the second 
> > one, I'm still pretty confused as I think it is at least theoretically 
> > possible on PowerPC to have a target with 64-bit long double (AIX?) that 
> 
> Some embedded and embedded-like subtargets use 64-bit long double by
> default.  You can also configure this on any Power target (not that it
> will necessarily work ;-) )

It will work on Linux LE systems with glibc 2.32 (it may work with earlier
glibcs).  I've built parallel toolchains with all 3 long double formats.  There
are some tests in the test suite that fail if you configure 64-bit long doubles.

-- 
Michael Meissner, IBM
IBM, M/S 2506R, 550 King Street, Littleton, MA 01460-6245, USA
email: meiss...@linux.ibm.com, phone: +1 (978) 899-4797

Re: [patch][version 6] add -ftrivial-auto-var-init and variable attribute "uninitialized" to gcc

2021-08-11 Thread Qing Zhao via Gcc-patches

Hi, 

I met another issue for “address taken” auto variable, see below for details:

 the testing case: (gcc/testsuite/gcc.dg/uninit-16.c)

int foo, bar;

static
void decode_reloc(int reloc, int *is_alt)
{
  if (reloc >= 20)
  *is_alt = 1;
  else if (reloc >= 10)
  *is_alt = 0;
}

void testfunc()
{
  int alt_reloc;

  decode_reloc(foo, _reloc);

  if (alt_reloc) /* { dg-warning "may be used uninitialized" } */
bar = 42;
}

When compiled with -ftrivial-auto-var-init=zero -O2 -Wuninitialized 
-fdump-tree-all:

.*gimple dump:

void testfunc ()
{ 
  int alt_reloc;

  try
{
  _1 = .DEFERRED_INIT (4, 2, 0);
  alt_reloc = _1;
  foo.0_2 = foo;
  decode_reloc (foo.0_2, _reloc);
  alt_reloc.1_3 = alt_reloc;
  if (alt_reloc.1_3 != 0) goto ; else goto ;
  :
  bar = 42;
  :
}
  finally
{
  alt_reloc = {CLOBBER};
}
}

**fre1 dump:

void testfunc ()
{
  int alt_reloc;
  int _1;
  int foo.0_2;

   :
  _1 = .DEFERRED_INIT (4, 2, 0);
  foo.0_2 = foo;
  if (foo.0_2 > 19)
goto ; [50.00%]
  else
goto ; [50.00%]

   :
  goto ; [100.00%]

   :
  if (foo.0_2 > 9)
goto ; [50.00%]
  else
goto ; [50.00%]

   :
  goto ; [100.00%]

   :
  if (_1 != 0)
goto ; [INV]
  else
goto ; [INV]

   :
  bar = 42;

   :
  return;

}

From the above IR file after “FRE”, we can see that the major issue with this 
IR is:

The address taken auto variable “alt_reloc” has been completely replaced by the 
temporary variable “_1” in all
the uses of the original “alt_reloc”. 

The major problem with such IR is,  during uninitialized analysis phase, the 
original use of “alt_reloc” disappeared completely.
So, the warning cannot be reported.

My questions:

1. Is it possible to get the original “alt_reloc” through the temporary 
variable “_1” with some available information recorded in the IR?
2. If not, then we have to record the relationship between “alt_reloc” and “_1” 
when the original “alt_reloc” is replaced by “_1” and get such relationship 
during
Uninitialized analysis phase.  Is this doable?
3. Looks like that for “address taken” auto variable, if we have to introduce a 
new temporary variable and split the call to .DEFERRED_INIT into two:

  temp = .DEFERRED_INIT (4, 2, 0);
  alt_reloc = temp;

   More issues might possible.

Any comments and suggestions on this issue?

Qing

j
> On Aug 11, 2021, at 11:55 AM, Richard Biener  wrote:
> 
> On August 11, 2021 6:22:00 PM GMT+02:00, Qing Zhao  
> wrote:
>> 
>> 
>>> On Aug 11, 2021, at 10:53 AM, Richard Biener  wrote:
>>> 
>>> On August 11, 2021 5:30:40 PM GMT+02:00, Qing Zhao  
>>> wrote:
 I modified the routine “gimple_add_init_for_auto_var” as the following:

 /* Generate initialization to automatic variable DECL based on INIT_TYPE.
 Build a call to internal const function DEFERRED_INIT:
 1st argument: SIZE of the DECL;
 2nd argument: INIT_TYPE;
 3rd argument: IS_VLA, 0 NO, 1 YES;

 as DEFERRED_INIT (SIZE of the DECL, INIT_TYPE, IS_VLA).  */
 static void
 gimple_add_init_for_auto_var (tree decl,
enum auto_init_type init_type,
bool is_vla,
gimple_seq *seq_p)
 {
 gcc_assert (VAR_P (decl) && !DECL_EXTERNAL (decl) && !TREE_STATIC (decl));
 gcc_assert (init_type > AUTO_INIT_UNINITIALIZED);
 tree decl_size = TYPE_SIZE_UNIT (TREE_TYPE (decl));

 tree init_type_node
  = build_int_cst (integer_type_node, (int) init_type);
 tree is_vla_node
  = build_int_cst (integer_type_node, (int) is_vla);

 tree call = build_call_expr_internal_loc (UNKNOWN_LOCATION, 
 IFN_DEFERRED_INIT,
  TREE_TYPE (decl), 3,
  decl_size, init_type_node,
  is_vla_node);

 /* If this DECL is a VLA, a temporary address variable for it has been
   created, the replacement for DECL is recorded in DECL_VALUE_EXPR (decl),
   we should use it as the LHS of the call.  */

 tree lhs_call
  = is_vla ? DECL_VALUE_EXPR (decl) : decl;
 gimplify_assign (lhs_call, call, seq_p);
 }

 With this change, the current issue is resolved, the gimple dump now is:

 (*arr.1) = .DEFERRED_INIT (D.1952, 2, 1);

 However, there is another new issue:

 For the following testing case:

 ==
 [opc@qinzhao-ol8u3-x86 gcc]$ cat t.c
 int bar;

 extern void decode_reloc(int *);

 void testfunc()
 {
 int alt_reloc;

 decode_reloc(_reloc);

 if (alt_reloc) /* { dg-warning "may be used uninitialized" } */
  bar = 42; 
 }
 =

 In the above, the auto var “alt_reloc” is address taken, then the gimple 
 dump for it when compiled with -ftrivial-auto-var-init=zero

Re: [PATCH] c++: parameterized requires-expr as default argument [PR101725]

2021-08-11 Thread Jason Merrill via Gcc-patches


On 8/3/21 4:04 PM, Patrick Palka wrote:

Here we're rejecting the default template argument

   requires (T t) { x(t); }

because we consider the 't' in the requirement to be a local variable
(according to local_variable_p), and we generally forbid local variables
from appearing inside template arguments.  We can perhaps fix this by
giving special treatment to parameters introduced by requires-expressions,
but DR 2082 relaxed the restriction about local variables appearing inside
default arguments to permit them inside unevaluated operands thereof.
So this patch just implements DR 2082 which also fixes this PR since a
requires-expression is an unevaluated context.

Bootstrapped and regtested on x86_64-pc-linux-gnu, does this look OK for
trunk and perhaps 11?


OK for both.


PR c++/101725
DR 2082

gcc/cp/ChangeLog:

* cp-tree.h (unevaluated_p): Return true for REQUIRES_EXPR.
* decl.c (local_variable_p_walkfn): Don't walk into unevaluated
operands.
* parser.c (cp_parser_primary_expression) : Never
reject uses of local variables in unevaluated contexts.
* tree.c (cp_walk_subtrees) : Increment
cp_unevaluated_operand.  Use cp_walk_tree directly instead of
WALK_SUBTREE to avoid the goto.  Use REQUIRES_EXPR_REQS instead
of TREE_OPERAND directly.

gcc/testsuite/ChangeLog:

* g++.dg/DRs/dr2082.C: New test.
* g++.dg/cpp2a/concepts-uneval4.C: New test.
* g++.dg/cpp2a/concepts-uneval5.C: New test.
---
  gcc/cp/cp-tree.h  |  3 ++-
  gcc/cp/decl.c |  8 
  gcc/cp/parser.c   |  5 -
  gcc/cp/tree.c |  4 +++-
  gcc/testsuite/g++.dg/DRs/dr2082.C | 12 
  gcc/testsuite/g++.dg/cpp2a/concepts-uneval4.C | 12 
  6 files changed, 41 insertions(+), 3 deletions(-)
  create mode 100644 gcc/testsuite/g++.dg/DRs/dr2082.C
  create mode 100644 gcc/testsuite/g++.dg/cpp2a/concepts-uneval4.C

diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h
index 9a47a8787d6..6a8264b0c61 100644
--- a/gcc/cp/cp-tree.h
+++ b/gcc/cp/cp-tree.h
@@ -8494,7 +8494,8 @@ unevaluated_p (tree_code code)
return (code == DECLTYPE_TYPE
  || code == ALIGNOF_EXPR
  || code == SIZEOF_EXPR
- || code == NOEXCEPT_EXPR);
+ || code == NOEXCEPT_EXPR
+ || code == REQUIRES_EXPR);
  }
  
  /* RAII class to push/pop the access scope for T.  */

diff --git a/gcc/cp/decl.c b/gcc/cp/decl.c
index 6fa6b9adc87..b0b492360af 100644
--- a/gcc/cp/decl.c
+++ b/gcc/cp/decl.c
@@ -14270,6 +14270,14 @@ static tree
  local_variable_p_walkfn (tree *tp, int *walk_subtrees,
 void * /*data*/)
  {
+  if (unevaluated_p (TREE_CODE (*tp)))
+{
+  /* DR 2082 permits local variables in unevaluated contexts
+within a default argument.  */
+  *walk_subtrees = 0;
+  return NULL_TREE;
+}
+
if (local_variable_p (*tp)
&& (!DECL_ARTIFICIAL (*tp) || DECL_NAME (*tp) == this_identifier))
  return *tp;
diff --git a/gcc/cp/parser.c b/gcc/cp/parser.c
index 47bf7d9ad1f..8b551db2c8a 100644
--- a/gcc/cp/parser.c
+++ b/gcc/cp/parser.c
@@ -5971,7 +5971,10 @@ cp_parser_primary_expression (cp_parser *parser,
/* Check to see if DECL is a local variable in a context
   where that is forbidden.  */
if ((parser->local_variables_forbidden_p & LOCAL_VARS_FORBIDDEN)
-   && local_variable_p (decl))
+   && local_variable_p (decl)
+   /* DR 2082 permits local variables in unevaluated contexts
+  within a default argument.  */
+   && !cp_unevaluated_operand)
  {
const char *msg
  = (TREE_CODE (decl) == PARM_DECL
diff --git a/gcc/cp/tree.c b/gcc/cp/tree.c
index 8345396ec33..e8831b21802 100644
--- a/gcc/cp/tree.c
+++ b/gcc/cp/tree.c
@@ -5386,7 +5386,9 @@ cp_walk_subtrees (tree *tp, int *walk_subtrees_p, 
walk_tree_fn func,
// walk the parameter list. Doing so causes false
// positives in the pack expansion checker since the
// requires parameters are introduced as pack expansions.
-  WALK_SUBTREE (TREE_OPERAND (*tp, 1));
+  ++cp_unevaluated_operand;
+  result = cp_walk_tree (_EXPR_REQS (*tp), func, data, pset);
+  --cp_unevaluated_operand;
*walk_subtrees_p = 0;
break;
  
diff --git a/gcc/testsuite/g++.dg/DRs/dr2082.C b/gcc/testsuite/g++.dg/DRs/dr2082.C

new file mode 100644
index 000..84bb23f63f2
--- /dev/null
+++ b/gcc/testsuite/g++.dg/DRs/dr2082.C
@@ -0,0 +1,12 @@
+// DR 2082
+
+void f() {
+  int i;
+  extern void h(int x = sizeof(i));
+}
+
+class A {
+  void f(A* p = this) { } // { dg-error "this" }
+};
+
+int h(int a, int b = sizeof(a));
diff --git a/gcc/testsuite/g++.dg/cpp2a/concepts-uneval4.C 
b/gcc/testsuite/g++.dg/cpp2a/concepts-uneval4.C
new

Re: [PATCH] c++: Fix up parsing of attributes for using-directive

2021-08-11 Thread Jason Merrill via Gcc-patches


On 8/4/21 6:05 AM, Jakub Jelinek wrote:

Hi!

As I've said earlier and added xfails in gen-attrs-76.C test,
https://eel.is/c++draft/namespace.udir#nt:using-directive
has attribute-specifier-seq[opt] at the start, not at the end before ;
as gcc is expecting.
IMHO we should continue parsing at the end the GNU attributes
because using namespace N __attribute__((strong));, while not supported
anymore, used to be supported in the past, but my code searches for
using namespace N [[gnu::strong]]; didn't reveal anything at all.



Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2021-08-04  Jakub Jelinek  

* parser.c (cp_parser_block_declaration): Call
cp_parser_using_directive for C++11 attributes followed by
using namespace tokens.
(cp_parser_using_directive): Parse C++11 attributes at the start
of the directive rather than at the end, only parse GNU attributes
at the end.

* g++.dg/lookup/strong-using.C: Add test using [[gnu::strong]]
as well.
* g++.dg/lookup/strong-using2.C: Likewise.
* g++.dg/cpp0x/gen-attrs-58.C: Move alignas(int) before
using namespace.
* g++.dg/cpp0x/gen-attrs-59.C: Move alignas(X) before
using namespace, add tests for alignas before semicolon.
* g++.dg/cpp0x/gen-attrs-76.C: Remove xfails.  Add test for
C++11 attributes on using directive before semicolon.

--- gcc/cp/parser.c.jj  2021-08-03 00:44:32.890492433 +0200
+++ gcc/cp/parser.c 2021-08-03 17:38:07.541725977 +0200
@@ -14655,6 +14655,7 @@ cp_parser_block_declaration (cp_parser *
/* Peek at the next token to figure out which kind of declaration is
   present.  */
cp_token *token1 = cp_lexer_peek_token (parser->lexer);
+  size_t attr_idx;
  
/* If the next keyword is `asm', we have an asm-definition.  */

if (token1->keyword == RID_ASM)
@@ -14708,6 +14709,18 @@ cp_parser_block_declaration (cp_parser *
/* If the next token is `static_assert' we have a static assertion.  */
else if (token1->keyword == RID_STATIC_ASSERT)
  cp_parser_static_assert (parser, /*member_p=*/false);
+  /* If the next tokens after attributes is `using namespace', then we have
+ a using-directive.  */
+  else if ((attr_idx = cp_parser_skip_std_attribute_spec_seq (parser, 1)) != 1
+  && cp_lexer_peek_nth_token (parser->lexer,
+  attr_idx)->keyword == RID_USING
+  && cp_lexer_peek_nth_token (parser->lexer,
+  attr_idx + 1)->keyword == RID_NAMESPACE)


Let's use cp_lexer_nth_token_is_keyword here.  OK with that change.


+{
+  if (statement_p)
+   cp_parser_commit_to_tentative_parse (parser);
+  cp_parser_using_directive (parser);
+}
/* Anything else must be a simple-declaration.  */
else
  cp_parser_simple_declaration (parser, !statement_p,
@@ -21394,14 +21407,21 @@ cp_parser_alias_declaration (cp_parser*
  /* Parse a using-directive.
  
 using-directive:

- using namespace :: [opt] nested-name-specifier [opt]
-   namespace-name ;  */
+ attribute-specifier-seq [opt] using namespace :: [opt]
+   nested-name-specifier [opt] namespace-name ;  */
  
  static void

  cp_parser_using_directive (cp_parser* parser)
  {
tree namespace_decl;
-  tree attribs;
+  tree attribs = cp_parser_std_attribute_spec_seq (parser);
+  if (cp_lexer_next_token_is (parser->lexer, CPP_SEMICOLON))
+{
+  /* Error during attribute parsing that resulted in skipping
+to next semicolon.  */
+  cp_parser_require (parser, CPP_SEMICOLON, RT_SEMICOLON);
+  return;
+}
  
/* Look for the `using' keyword.  */

cp_parser_require_keyword (parser, RID_USING, RT_USING);
@@ -21418,8 +21438,9 @@ cp_parser_using_directive (cp_parser* pa
/* Get the namespace being used.  */
namespace_decl = cp_parser_namespace_name (parser);
cp_warn_deprecated_use_scopes (namespace_decl);
-  /* And any specified attributes.  */
-  attribs = cp_parser_attributes_opt (parser);
+  /* And any specified GNU attributes.  */
+  if (cp_next_tokens_can_be_gnu_attribute_p (parser))
+attribs = chainon (attribs, cp_parser_gnu_attributes_opt (parser));
  
/* Update the symbol table.  */

finish_using_directive (namespace_decl, attribs);
--- gcc/testsuite/g++.dg/lookup/strong-using.C.jj   2020-01-12 
11:54:37.197401580 +0100
+++ gcc/testsuite/g++.dg/lookup/strong-using.C  2021-08-03 17:12:05.872281490 
+0200
@@ -8,3 +8,12 @@ namespace A
  
using namespace B __attribute__ ((strong)); // { dg-warning "no longer supported" "" }

  }
+
+namespace C
+{
+  namespace D // { dg-message "inline namespace" }
+  {
+  }
+
+  [[gnu::strong]] using namespace D; // { dg-warning "no longer supported" "" }
+}
--- gcc/testsuite/g++.dg/lookup/strong-using2.C.jj  2020-01-12 
11:54:37.197401580 +0100
+++ gcc/testsuite/g++.dg/lookup/strong-using2.C 2021-08-03

PING [PATCH, part2] PR fortran/98411 [10/11/12 Regression] Pointless: Array larger than ‘-fmax-stack-var-size=’, ...

2021-08-11 Thread Harald Anlauf via Gcc-patches

*Ping*

> Gesendet: Mittwoch, 04. August 2021 um 23:09 Uhr
> Von: "Harald Anlauf" 
> An: "fortran" , "gcc-patches" 
> Betreff: [PATCH, part2] PR fortran/98411 [10/11/12 Regression] Pointless: 
> Array larger than ‘-fmax-stack-var-size=’, ...
>
> Dear all,
> 
> here's the second part that should fix this regression for good.
> The patch also adjusts the warning message to make it easier to
> understand, using the suggestion by Tobias (see PR).
> 
> Since F2018 in principle makes RECURSIVE the default, which might
> conflict with the purpose of the testcase, I chose to change the
> options to include -std=f2008, and to verify that implicit SAVE
> works the same as explicit SAVE.
> 
> Regtested on x86_64-pc-linux-gnu.  OK for affected branches?
> 
> Thanks,
> Harald
> 
> 
> Fortran: fix pointless warning for static variables
> 
> gcc/fortran/ChangeLog:
> 
>   PR fortran/98411
>   * trans-decl.c (gfc_finish_var_decl): Adjust check to handle
>   implicit SAVE as well as variables in the main program.  Improve
>   warning message text.
> 
> gcc/testsuite/ChangeLog:
> 
>   PR fortran/98411
>   * gfortran.dg/pr98411.f90: Adjust testcase options to restrict to
>   F2008, and verify case of implicit SAVE.
> 
>

PING Re: [PATCH] PR fortran/100950 - ICE in output_constructor_regular_field, at varasm.c:5514

2021-08-11 Thread Harald Anlauf via Gcc-patches

*Ping*

> Gesendet: Dienstag, 03. August 2021 um 23:17 Uhr
> Von: "Harald Anlauf" 
> An: "Harald Anlauf" 
> Cc: "Tobias Burnus" , "Bernhard Reutner-Fischer" 
> , "Harald Anlauf via Gcc-patches" 
> , "fortran" 
> Betreff: Re: [PATCH] PR fortran/100950 - ICE in 
> output_constructor_regular_field, at varasm.c:5514
>
> Here's now my third attempt to fix this PR, taking into account
> the comments by Tobias and Bernhard.
> 
> > > On 10.06.21 20:52, Harald Anlauf via Fortran wrote:
> > > > +static bool
> > > > +substring_has_constant_len (gfc_expr *e)
> > > > +{
> > > > +  ptrdiff_t istart, iend;
> > > > +  size_t length;
> > > > +  bool equal_length = false;
> > > > +
> > > > +  if (e->ts.type != BT_CHARACTER
> > > > +  || !e->ref
> > > > +  || e->ref->type != REF_SUBSTRING
> > > 
> > > Is there a reason why you do not handle:
> > > 
> > > type t
> > >character(len=5) :: str1
> > >character(len=:), allocatable :: str2
> > > end type
> > > type(t) :: x
> > > 
> > > allocate(x%str2, source="abd")
> > > if (len (x%str)) /= 1) ...
> > > if (len (x%str2(1:2) /= 2) ...
> > > etc.
> > > 
> > > Namely: Search the last_ref = expr->ref->next->next ...?
> > > and then check that lastref?
> 
> The mentioned search is now implemented.
> 
> Note, however, that gfc_simplify_len still won't handle neither
> deferred strings nor their substrings.
> 
> I think there is nothing to simplify at compile time here.  Otherwise
> there would be a conflict/inconsistency with type parameter inquiry,
> see F2018:9.4.5(2):
> 
> "A deferred type parameter of a pointer that is not associated or
> of an unallocated allocatable variable shall not be inquired about."
> 
> > >* * *
> > > 
> > > Slightly unrelated: I think the following does not violate
> > > F2018's R916 / C923 – but is rejected, namely:
> > >R916  type-param-inquiry  is  designator % type-param-name
> > > the latter is 'len' or 'kind' for intrinsic types. And:
> > >R901  designator is ...
> > > or substring
> > > But
> > > 
> > > character(len=5) :: str
> > > print *, str(1:3)%len
> > > end
> > > 
> > > fails with
> > > 
> > >  2 | print *, str(1:3)%len
> > >|  1
> > > Error: Syntax error in PRINT statement at (1)
> > > 
> > > 
> > > Assuming you don't want to handle it, can you open a new PR?
> > > Thanks!
> 
> I tried to look into this, but there appear to be several unrelated
> issues requiring a separate treatment.  I therefore opened:
> 
>   https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101735
> 
> > > > +  istart = gfc_mpz_get_hwi (e->ref->u.ss.start->value.integer);
> > > > +  iend = gfc_mpz_get_hwi (e->ref->u.ss.end->value.integer);
> > > > +  length = gfc_mpz_get_hwi 
> > > > (e->ref->u.ss.length->length->value.integer);
> > > > +
> > > > +  if (istart <= iend)
> > > > +{
> > > > +  if (istart < 1)
> > > > + {
> > > > +   gfc_error ("Substring start index (%ld) at %L below 1",
> > > > +  (long) istart, >ref->u.ss.start->where);
> > > 
> > > As mentioned by Bernhard, you could use HOST_WIDE_INT_PRINT_DEC.
> > > 
> > > (It probably only matters on Windows which uses long == int = 32bit for
> > > strings longer than INT_MAX.)
> 
> Done.
> 
> The updated patch regtests fine.  OK?
> 
> Thanks,
> Harald
> 
> 
> Fortran - simplify length of substring with constant bounds
> 
> gcc/fortran/ChangeLog:
> 
>   PR fortran/100950
>   * simplify.c (substring_has_constant_len): New.
>   (gfc_simplify_len): Handle case of substrings with constant
>   bounds.
> 
> gcc/testsuite/ChangeLog:
> 
>   PR fortran/100950
>   * gfortran.dg/pr100950.f90: New test.
> 
>

[PATCH] PR fortran/99351 - ICE in gfc_finish_var_decl, at fortran/trans-decl.c:695

2021-08-11 Thread Harald Anlauf via Gcc-patches

Dear all,

the checks for the STAT= and ERRMSG= arguments to the coarray SYNC statements
did not properly handle several cases, such as named constants (parameters).
While fixing this, I adjusted the code similarly to what was recently done
for (DE)ALLOCATE.  We now also accept function references with data pointer
result.  (See also PR101652).

Regtested on x86_64-pc-linux-gnu.  OK for mainline?

Thanks,
Harald


Fortran: fix checks for STAT= and ERRMSG= arguments of SYNC ALL/SYNC IMAGES

gcc/fortran/ChangeLog:

PR fortran/99351
* match.c (sync_statement): Replace %v code by %e in gfc_match to
allow for function references as STAT and ERRMSG arguments.
* resolve.c (resolve_sync): Adjust checks of STAT= and ERRMSG= to
being definable arguments.  Function references with a data
pointer result are accepted.
* trans-stmt.c (gfc_trans_sync): Adjust assertion.

gcc/testsuite/ChangeLog:

PR fortran/99351
* gfortran.dg/coarray_sync.f90: New test.
* gfortran.dg/coarray_3.f90: Adjust to change error messages.

diff --git a/gcc/fortran/match.c b/gcc/fortran/match.c
index b1105481099..16502da001d 100644
--- a/gcc/fortran/match.c
+++ b/gcc/fortran/match.c
@@ -3855,7 +3855,7 @@ sync_statement (gfc_statement st)

   for (;;)
 {
-  m = gfc_match (" stat = %v", );
+  m = gfc_match (" stat = %e", );
   if (m == MATCH_ERROR)
 	goto syntax;
   if (m == MATCH_YES)
@@ -3875,7 +3875,7 @@ sync_statement (gfc_statement st)
 	  break;
 	}

-  m = gfc_match (" errmsg = %v", );
+  m = gfc_match (" errmsg = %e", );
   if (m == MATCH_ERROR)
 	goto syntax;
   if (m == MATCH_YES)
diff --git a/gcc/fortran/resolve.c b/gcc/fortran/resolve.c
index 592364689f9..959f0bed4fb 100644
--- a/gcc/fortran/resolve.c
+++ b/gcc/fortran/resolve.c
@@ -10236,19 +10236,27 @@ resolve_sync (gfc_code *code)

   /* Check STAT.  */
   gfc_resolve_expr (code->expr2);
-  if (code->expr2
-  && (code->expr2->ts.type != BT_INTEGER || code->expr2->rank != 0
-	  || code->expr2->expr_type != EXPR_VARIABLE))
-gfc_error ("STAT= argument at %L must be a scalar INTEGER variable",
-	   >expr2->where);
+  if (code->expr2)
+{
+  if (code->expr2->ts.type != BT_INTEGER || code->expr2->rank != 0)
+	gfc_error ("STAT= argument at %L must be a scalar INTEGER variable",
+		   >expr2->where);
+  else
+	gfc_check_vardef_context (code->expr2, false, false, false,
+  _("STAT variable"));
+}

   /* Check ERRMSG.  */
   gfc_resolve_expr (code->expr3);
-  if (code->expr3
-  && (code->expr3->ts.type != BT_CHARACTER || code->expr3->rank != 0
-	  || code->expr3->expr_type != EXPR_VARIABLE))
-gfc_error ("ERRMSG= argument at %L must be a scalar CHARACTER variable",
-	   >expr3->where);
+  if (code->expr3)
+{
+  if (code->expr3->ts.type != BT_CHARACTER || code->expr3->rank != 0)
+	gfc_error ("ERRMSG= argument at %L must be a scalar CHARACTER variable",
+		   >expr3->where);
+  else
+	gfc_check_vardef_context (code->expr3, false, false, false,
+  _("ERRMSG variable"));
+}
 }


diff --git a/gcc/fortran/trans-stmt.c b/gcc/fortran/trans-stmt.c
index 7cbdef7a304..11df1863bad 100644
--- a/gcc/fortran/trans-stmt.c
+++ b/gcc/fortran/trans-stmt.c
@@ -1226,7 +1226,8 @@ gfc_trans_sync (gfc_code *code, gfc_exec_op type)

   if (code->expr2)
 {
-  gcc_assert (code->expr2->expr_type == EXPR_VARIABLE);
+  gcc_assert (code->expr2->expr_type == EXPR_VARIABLE
+		  || code->expr2->expr_type == EXPR_FUNCTION);
   gfc_init_se (, NULL);
   gfc_conv_expr_val (, code->expr2);
   stat = argse.expr;
@@ -1236,7 +1237,8 @@ gfc_trans_sync (gfc_code *code, gfc_exec_op type)

   if (code->expr3 && flag_coarray == GFC_FCOARRAY_LIB)
 {
-  gcc_assert (code->expr3->expr_type == EXPR_VARIABLE);
+  gcc_assert (code->expr3->expr_type == EXPR_VARIABLE
+		  || code->expr3->expr_type == EXPR_FUNCTION);
   gfc_init_se (, NULL);
   argse.want_pointer = 1;
   gfc_conv_expr (, code->expr3);
diff --git a/gcc/testsuite/gfortran.dg/coarray_3.f90 b/gcc/testsuite/gfortran.dg/coarray_3.f90
index d152ce1b2bd..1c294cd0189 100644
--- a/gcc/testsuite/gfortran.dg/coarray_3.f90
+++ b/gcc/testsuite/gfortran.dg/coarray_3.f90
@@ -11,11 +11,11 @@ character(len=30) :: str(2)
 critical fkl ! { dg-error "Syntax error in CRITICAL" }
 end critical fkl ! { dg-error "Expecting END PROGRAM" }

-sync all (stat=1) ! { dg-error "Syntax error in SYNC ALL" }
+sync all (stat=1) ! { dg-error "Non-variable expression" }
 sync all ( stat = n,stat=k) ! { dg-error "Redundant STAT" }
 sync memory (errmsg=str) ! { dg-error "must be a scalar CHARACTER variable" }
 sync memory (errmsg=n) ! { dg-error "must be a scalar CHARACTER variable" }
-sync images (*, stat=1.0) ! { dg-error "Syntax error in SYNC IMAGES" }
+sync images (*, stat=1.0) ! { dg-error "must be a scalar INTEGER variable" }
 sync images (-1) ! { dg-error "must between 1 and num_images" }

Re: [PATCH] c++: Optimize constinit thread_local vars [PR101786]

2021-08-11 Thread Jason Merrill via Gcc-patches


On 8/6/21 4:16 AM, Jakub Jelinek wrote:

Hi!

The paper that introduced constinit mentioned in rationale that constinit
can be used on externs as well and that it can be used to avoid the
thread_local initialization wrappers, because the standard requires that
if constinit is present on any declaration, it is also present on the
initialization declaration, even if it is in some other TU etc.

There is a small problem though, we use the tls wrappers not just if
the thread_local variable needs dynamic initialization, but also when
it has static initialization, but non-trivial destructor, as the
"dynamic initialization" in that case needs to register the destructor.

So, the following patch optimizes constinit thread_local vars only
if we can prove they will not have non-trivial destructors.  That includes
the case where we have incomplete type where we don't know and need to
conservatively assume the type will have non-trivial destructor at the
initializing declaration side.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?


OK.


2021-08-06  Jakub Jelinek  

PR c++/101786
* decl2.c (var_defined_without_dynamic_init): Return true for
DECL_DECLARED_CONSTINIT_P with complete type and trivial destructor.

* g++.dg/cpp2a/constinit16.C: New test.

--- gcc/cp/decl2.c.jj   2021-07-02 21:59:12.359171627 +0200
+++ gcc/cp/decl2.c  2021-08-05 16:09:39.833599188 +0200
@@ -3447,6 +3447,12 @@ set_guard (tree guard)
  static bool
  var_defined_without_dynamic_init (tree var)
  {
+  /* constinit vars are guaranteed to not have dynamic initializer,
+ but still registering the destructor counts as dynamic initialization.  */
+  if (DECL_DECLARED_CONSTINIT_P (var)
+  && COMPLETE_TYPE_P (TREE_TYPE (var))
+  && !TYPE_HAS_NONTRIVIAL_DESTRUCTOR (TREE_TYPE (var)))
+return true;
/* If it's defined in another TU, we can't tell.  */
if (DECL_EXTERNAL (var))
  return false;
--- gcc/testsuite/g++.dg/cpp2a/constinit16.C.jj 2021-08-05 15:50:49.702463664 
+0200
+++ gcc/testsuite/g++.dg/cpp2a/constinit16.C2021-08-05 16:14:52.893202685 
+0200
@@ -0,0 +1,21 @@
+// PR c++/101786
+// { dg-do compile { target c++20 } }
+// { dg-add-options tls }
+// { dg-require-alias "" }
+// { dg-require-effective-target tls_runtime }
+// { dg-final { scan-assembler-not "_ZTH17mythreadlocalvar1" } }
+// { dg-final { scan-assembler "_ZTH17mythreadlocalvar2" } }
+// { dg-final { scan-assembler-not "_ZTH17mythreadlocalvar3" } }
+// { dg-final { scan-assembler "_ZTH17mythreadlocalvar4" } }
+
+extern thread_local constinit int mythreadlocalvar1;
+struct S;
+extern thread_local constinit S mythreadlocalvar2;
+struct T { int t; };
+extern thread_local constinit T mythreadlocalvar3;
+struct U { int u; ~U (); };
+extern thread_local constinit U mythreadlocalvar4;
+int foo () { return mythreadlocalvar1; }
+S *bar () { return  }
+T *baz () { return  }
+U *qux () { return  }

Jakub

Re: [PATCH] c++: suppress all warnings on memper pointers to work around dICE [PR101219]

2021-08-11 Thread Jason Merrill via Gcc-patches


On 8/6/21 11:34 AM, Sergei Trofimovich wrote:

On Thu, 29 Jul 2021 11:41:39 -0400
Jason Merrill  wrote:


On 7/22/21 7:15 PM, Sergei Trofimovich wrote:

From: Sergei Trofimovich 

r12-1804 ("cp: add support for per-location warning groups.") among other
things removed warning suppression from a few places including ptrmemfuncs.

Currently ptrmemfuncs don't have valid BINFO attached which causes ICEs
in access checks:

  crash_signal
  gcc/toplev.c:328
  perform_or_defer_access_check(tree_node*, tree_node*, tree_node*, int, 
access_failure_info*)
  gcc/cp/semantics.c:490
  finish_non_static_data_member(tree_node*, tree_node*, tree_node*)
  gcc/cp/semantics.c:2208
  ...

The change suppresses warnings again until we provide BINFOs for ptrmemfuncs.


We don't need BINFOs for PMFs, we need to avoid paths that expect them.

It looks like the problem is with tsubst_copy_and_build calling
finish_non_static_data_member instead of build_ptrmemfunc_access_expr.


Sounds good. I'm not sure what would be the best way to match it. Here is
my attempt seems to survive all regtests:

--- a/gcc/cp/pt.c
+++ b/gcc/cp/pt.c
@@ -20530,7 +20530,13 @@ tsubst_copy_and_build (tree t,
 if (member == error_mark_node)
   RETURN (error_mark_node);

-   if (TREE_CODE (member) == FIELD_DECL)
+   if (object_type && TYPE_PTRMEMFUNC_P(object_type)
+   && TREE_CODE (member) == FIELD_DECL)
+ {
+   r = build_ptrmemfunc_access_expr (object, DECL_NAME(member));
+   RETURN (r);
+ }
+   else if (TREE_CODE (member) == FIELD_DECL)
   {
 r = finish_non_static_data_member (member, object, NULL_TREE);
 if (TREE_CODE (r) == COMPONENT_REF)


PR c++/101219

gcc/cp/ChangeLog:

* typeck.c (build_ptrmemfunc_access_expr): Suppress all warnings
to avoid ICE.

gcc/testsuite/ChangeLog:

* g++.dg/torture/pr101219.C: New test.


This doesn't need to be in torture; it has nothing to do with optimization.


Aha, moved to gcc/testsuite/g++.dg/warn/pr101219.C.

--- /dev/null
+++ b/gcc/testsuite/g++.dg/warn/pr101219.C
@@ -0,0 +1,11 @@
+/* PR c++/101219 - ICE on use of uninitialized memfun pointer
+   { dg-do compile }
+   { dg-options "-Wall" } */
+
+struct S { void m(); };
+
+template  bool f() {
+  void (S::*mp)();
+
+  return ::m == mp; // no warning emitted here (no instantiation)
+}

Another question: Is it expected that gcc generates no warnings here?
It's an uninstantiated function (-1 for warn), but from what I
understand it's guaranteed to generate comparison with uninitialized
data if it ever gets instantiated. Given that we used to ICE in
warning code gcc could possibly flag it? (+1 for warn)


Generally it's desirable to diagnose templates for which no valid 
instantiation is possible.  It seems reasonable in most cases to also 
warn about templates for which all instantiations would warn.


But uninitialized warnings rely on flow analysis that we only do on 
instantiated functions, and in any case the ICE doesn't depend on mp 
being uninitialized; I get the same crash if I add = 0 to the declaration.



+   if (object_type && TYPE_PTRMEMFUNC_P(object_type)


Missing space before (.


+   && TREE_CODE (member) == FIELD_DECL)
+ {
+   r = build_ptrmemfunc_access_expr (object, DECL_NAME(member));


And here.

Jason

Re: [PATCH] c++: recognize class-scope non-template dguides [PR79501]

2021-08-11 Thread Jason Merrill via Gcc-patches


On 8/11/21 2:29 PM, Patrick Palka wrote:

On Wed, 11 Aug 2021, Jason Merrill wrote:


On 8/9/21 1:16 PM, Patrick Palka wrote:

It looks like we still don't recognize class-scope non-template
deduction guides even after r12-2260.   This is because deduction guides
are handled in cp_parser_init_declarator after calling
cp_parser_declarator, but in the class-scope non-template case we call
cp_parser_declarator directly from cp_parser_member_declaration.

This patch makes us handle deduction guides in cp_parser_member_declaration
as well.

Bootstrapped and regtested on x86_64-pc-linux-gnu, does this look OK for
trunk?

PR c++/79501

gcc/cp/ChangeLog:

* parser.c (cp_parser_maybe_adjust_declarator_for_dguide): New,
split out from ...
(cp_parser_init_declarator): ... here.
(cp_parser_member_declaration): Use it.

gcc/testsuite/ChangeLog:

* g++.dg/cpp1z/class-deduction98.C: New test.
---
   gcc/cp/parser.c   | 54 +--
   .../g++.dg/cpp1z/class-deduction98.C  | 10 
   2 files changed, 49 insertions(+), 15 deletions(-)
   create mode 100644 gcc/testsuite/g++.dg/cpp1z/class-deduction98.C

diff --git a/gcc/cp/parser.c b/gcc/cp/parser.c
index d4da25ca703..04fdeec32ab 100644
--- a/gcc/cp/parser.c
+++ b/gcc/cp/parser.c
@@ -21869,6 +21869,30 @@ warn_about_ambiguous_parse (const
cp_decl_specifier_seq *decl_specifiers,
   }
   }
   +/* If the function declarator DECLARATOR names a class template, adjust
+   it to name its deduction guides and return true.  Otherwise return
false.  */
+
+static bool
+cp_parser_maybe_adjust_declarator_for_dguide (cp_parser *parser,
+ cp_declarator *declarator)
+{
+  gcc_assert (function_declarator_p (declarator));
+
+  cp_declarator *id = get_id_declarator (declarator);
+  tree name = id->u.id.unqualified_name;
+  parser->scope = id->u.id.qualifying_scope;
+  tree tmpl = cp_parser_lookup_name_simple (parser, name, id->id_loc);
+  if (tmpl
+  && (DECL_CLASS_TEMPLATE_P (tmpl)
+ || DECL_TEMPLATE_TEMPLATE_PARM_P (tmpl)))
+{
+  id->u.id.unqualified_name = dguide_name (tmpl);
+  id->u.id.sfk = sfk_deduction_guide;
+  return true;
+}
+  return false;
+}
+
   /* Declarators [gram.dcl.decl] */
 /* Parse an init-declarator.
@@ -22045,25 +22069,16 @@ cp_parser_init_declarator (cp_parser* parser,
   if (function_declarator_p (declarator))
   {
-  /* Handle C++17 deduction guides.  */
+  /* Handle C++17 deduction guides.  Note that class-scope
+non-template deduction guides are handled in
+cp_parser_member_declaration.  */
 if (!decl_specifiers->type
  && !decl_specifiers->any_type_specifiers_p
  && ctor_dtor_or_conv_p <= 0
  && cxx_dialect >= cxx17)
-   {
- cp_declarator *id = get_id_declarator (declarator);
- tree name = id->u.id.unqualified_name;
- parser->scope = id->u.id.qualifying_scope;
- tree tmpl = cp_parser_lookup_name_simple (parser, name, id->id_loc);
- if (tmpl
- && (DECL_CLASS_TEMPLATE_P (tmpl)
- || DECL_TEMPLATE_TEMPLATE_PARM_P (tmpl)))
-   {
- id->u.id.unqualified_name = dguide_name (tmpl);
- id->u.id.sfk = sfk_deduction_guide;
- ctor_dtor_or_conv_p = 1;
-   }
-   }
+   if (cp_parser_maybe_adjust_declarator_for_dguide (parser,
+ declarator))
+ ctor_dtor_or_conv_p = 1;
   if (!member_p && !cp_parser_error_occurred (parser))
warn_about_ambiguous_parse (decl_specifiers, declarator);
@@ -26719,6 +26734,15 @@ cp_parser_member_declaration (cp_parser* parser)
cp_lexer_consume_token (parser->lexer);
  goto out;
}
+ /* Handle class-scope non-template C++17 deduction guides.  */
+ if (function_declarator_p (declarator)
+ && !decl_specifiers.type
+ && !decl_specifiers.any_type_specifiers_p
+ && ctor_dtor_or_conv_p <= 0
+ && cxx_dialect >= cxx17)


Looks like you could factor more of the tests into the new function.


That works nicely, like so?  Bootstrap and regtesting in progress.


OK.


-- >8 --

Subject: [PATCH] c++: recognize class-scope non-template dguides [PR79501]

It looks like we still don't recognize class-scope non-template
deduction guides even after r12-2260.   This is because deduction guides
are tagged as such in cp_parser_init_declarator after calling
cp_parser_declarator, but in cp_parser_member_declaration we call
cp_parser_declarator directly.

This patch makes us handle deduction guides in cp_parser_member_declaration
as well.

PR c++/79501

gcc/cp/ChangeLog:

* parser.c (maybe_adjust_declarator_for_dguide): New, split out
from ...

Re: [PATCH] c++: most vexing parse and braced CTAD [PR89062]

2021-08-11 Thread Jason Merrill via Gcc-patches


On 8/9/21 1:15 PM, Patrick Palka wrote:

Here grokdeclarator is emitting the error

   error: class template placeholder ‘Foo’ not permitted in this context

during the tentative (and ultimately futile) parse of 'x' as a function
declaration.  This happens because when parsing 'Foo{1}',
cp_parser_parameter_declaration yields an parameter declaration
with no declarator and whose type is a CTAD placeholder, and stops
short of consuming the '{'.  The caller cp_parser_parameter_declaration_list
then calls grokdeclarator on this declarator, hence the error, and soon
thereafter we abort this tentative parse since the next token '{' doesn't
make sense in the context of a parameter list.

Note that we don't have this issue when using only parentheses

   Foo x(Foo(1));

because in this case cp_parser_direct_declarator (called indirectly from
c_p_p_d) instead consumes the '(' and returns a cp_error_declarator
rather than a NULL declarator (and also simulates a parse error), and
grokdeclarator exits early for this declarator without emitting any error.

Since grokdeclarator doesn't take a 'complain' parameter, to fix this we
need to avoid calling grokdeclarator in this situation.  To that end
this patch makes c_p_p_d simulate an error when a construct is a CTAD
expression and definitely not a parameter declaration, so that c_p_p_d_l
can avoid calling grokdeclarator by checking if an error has been
simulated.  Alternatively we could keep all this logic inside c_p_p_d_l
and not touch c_p_p_d at all, but this approach seems slightly less adhoc.



On the other hand, it seems weird that cp_parser_direct_declarator (with
flags=CP_PARSER_DECLARATOR_EITHER) returns cp_error_declarator for
'Foo(1)' (and consumes the '(') but NULL for 'Foo{1}' (and doesn't
consume the '{'), and perhaps this issue could fixed by returning
cp_error_declarator in the latter case as well, but I didn't try this
approach.


From the comment, this seems to be because ObjC++ allows { at the end 
of the parameter-declaration-list.  No idea what that would mean.



Bootstrapped and regtested on x86_64-pc-linux-gnu, does this look OK
for trunk?


OK.


PR c++/89062

gcc/cp/ChangeLog:

* parser.c (cp_parser_parameter_declaration_list): Don't call
grokdeclarator if cp_parser_error_occurred.
(cp_parser_parameter_declaration): Simulate an error if

gcc/testsuite/ChangeLog:

* g++.dg/cpp1z/class-deduction97.C: New test.
---
  gcc/cp/parser.c| 17 +
  gcc/testsuite/g++.dg/cpp1z/class-deduction97.C |  6 ++
  2 files changed, 19 insertions(+), 4 deletions(-)
  create mode 100644 gcc/testsuite/g++.dg/cpp1z/class-deduction97.C

diff --git a/gcc/cp/parser.c b/gcc/cp/parser.c
index 8b551db2c8a..d4da25ca703 100644
--- a/gcc/cp/parser.c
+++ b/gcc/cp/parser.c
@@ -24061,7 +24061,7 @@ cp_parser_parameter_declaration_list (cp_parser* 
parser, cp_parser_flags flags)
 and warn in grokparms if appropriate.  */
deprecated_state = DEPRECATED_SUPPRESS;
  
-  if (parameter)

+  if (parameter && !cp_parser_error_occurred (parser))
{
  decl = grokdeclarator (parameter->declarator,
 >decl_specifiers,
@@ -24276,7 +24276,7 @@ cp_parser_parameter_declaration (cp_parser *parser,
parser->default_arg_ok_p = false;
  
/* After seeing a decl-specifier-seq, if the next token is not a

-"(", there is no possibility that the code is a valid
+"(" or "{", there is no possibility that the code is a valid
 expression.  Therefore, if parsing tentatively, we commit at
 this point.  */
if (!parser->in_template_argument_list_p
@@ -24289,9 +24289,18 @@ cp_parser_parameter_declaration (cp_parser *parser,
 of some object of type "char" to "int".  */
  && !parser->in_type_id_in_expr_p
  && cp_parser_uncommitted_to_tentative_parse_p (parser)
- && cp_lexer_next_token_is_not (parser->lexer, CPP_OPEN_BRACE)
  && cp_lexer_next_token_is_not (parser->lexer, CPP_OPEN_PAREN))
-   cp_parser_commit_to_tentative_parse (parser);
+   {
+ if (cp_lexer_next_token_is (parser->lexer, CPP_OPEN_BRACE))
+   {
+ if (decl_specifiers.type
+ && template_placeholder_p (decl_specifiers.type))
+   /* This is a CTAD expression, not a parameter declaration.  */
+   cp_parser_simulate_error (parser); > +   }
+ else
+   cp_parser_commit_to_tentative_parse (parser);
+   }
/* Parse the declarator.  */
declarator_token_start = token;
declarator = cp_parser_declarator (parser,
diff --git a/gcc/testsuite/g++.dg/cpp1z/class-deduction97.C 
b/gcc/testsuite/g++.dg/cpp1z/class-deduction97.C
new file mode 100644
index 000..32818681d8f
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp1z/class-deduction97.C
@@ -0,0 +1,6 @@
+// PR c++/89062
+// { dg-do

Re: [PATCH] c++: recognize class-scope non-template dguides [PR79501]

2021-08-11 Thread Patrick Palka via Gcc-patches

On Wed, 11 Aug 2021, Jason Merrill wrote:

> On 8/9/21 1:16 PM, Patrick Palka wrote:
> > It looks like we still don't recognize class-scope non-template
> > deduction guides even after r12-2260.   This is because deduction guides
> > are handled in cp_parser_init_declarator after calling
> > cp_parser_declarator, but in the class-scope non-template case we call
> > cp_parser_declarator directly from cp_parser_member_declaration.
> > 
> > This patch makes us handle deduction guides in cp_parser_member_declaration
> > as well.
> > 
> > Bootstrapped and regtested on x86_64-pc-linux-gnu, does this look OK for
> > trunk?
> > 
> > PR c++/79501
> > 
> > gcc/cp/ChangeLog:
> > 
> > * parser.c (cp_parser_maybe_adjust_declarator_for_dguide): New,
> > split out from ...
> > (cp_parser_init_declarator): ... here.
> > (cp_parser_member_declaration): Use it.
> > 
> > gcc/testsuite/ChangeLog:
> > 
> > * g++.dg/cpp1z/class-deduction98.C: New test.
> > ---
> >   gcc/cp/parser.c   | 54 +--
> >   .../g++.dg/cpp1z/class-deduction98.C  | 10 
> >   2 files changed, 49 insertions(+), 15 deletions(-)
> >   create mode 100644 gcc/testsuite/g++.dg/cpp1z/class-deduction98.C
> > 
> > diff --git a/gcc/cp/parser.c b/gcc/cp/parser.c
> > index d4da25ca703..04fdeec32ab 100644
> > --- a/gcc/cp/parser.c
> > +++ b/gcc/cp/parser.c
> > @@ -21869,6 +21869,30 @@ warn_about_ambiguous_parse (const
> > cp_decl_specifier_seq *decl_specifiers,
> >   }
> >   }
> >   +/* If the function declarator DECLARATOR names a class template, adjust
> > +   it to name its deduction guides and return true.  Otherwise return
> > false.  */
> > +
> > +static bool
> > +cp_parser_maybe_adjust_declarator_for_dguide (cp_parser *parser,
> > + cp_declarator *declarator)
> > +{
> > +  gcc_assert (function_declarator_p (declarator));
> > +
> > +  cp_declarator *id = get_id_declarator (declarator);
> > +  tree name = id->u.id.unqualified_name;
> > +  parser->scope = id->u.id.qualifying_scope;
> > +  tree tmpl = cp_parser_lookup_name_simple (parser, name, id->id_loc);
> > +  if (tmpl
> > +  && (DECL_CLASS_TEMPLATE_P (tmpl)
> > + || DECL_TEMPLATE_TEMPLATE_PARM_P (tmpl)))
> > +{
> > +  id->u.id.unqualified_name = dguide_name (tmpl);
> > +  id->u.id.sfk = sfk_deduction_guide;
> > +  return true;
> > +}
> > +  return false;
> > +}
> > +
> >   /* Declarators [gram.dcl.decl] */
> > /* Parse an init-declarator.
> > @@ -22045,25 +22069,16 @@ cp_parser_init_declarator (cp_parser* parser,
> >   if (function_declarator_p (declarator))
> >   {
> > -  /* Handle C++17 deduction guides.  */
> > +  /* Handle C++17 deduction guides.  Note that class-scope
> > +non-template deduction guides are handled in
> > +cp_parser_member_declaration.  */
> > if (!decl_specifiers->type
> >   && !decl_specifiers->any_type_specifiers_p
> >   && ctor_dtor_or_conv_p <= 0
> >   && cxx_dialect >= cxx17)
> > -   {
> > - cp_declarator *id = get_id_declarator (declarator);
> > - tree name = id->u.id.unqualified_name;
> > - parser->scope = id->u.id.qualifying_scope;
> > - tree tmpl = cp_parser_lookup_name_simple (parser, name, id->id_loc);
> > - if (tmpl
> > - && (DECL_CLASS_TEMPLATE_P (tmpl)
> > - || DECL_TEMPLATE_TEMPLATE_PARM_P (tmpl)))
> > -   {
> > - id->u.id.unqualified_name = dguide_name (tmpl);
> > - id->u.id.sfk = sfk_deduction_guide;
> > - ctor_dtor_or_conv_p = 1;
> > -   }
> > -   }
> > +   if (cp_parser_maybe_adjust_declarator_for_dguide (parser,
> > + declarator))
> > + ctor_dtor_or_conv_p = 1;
> >   if (!member_p && !cp_parser_error_occurred (parser))
> > warn_about_ambiguous_parse (decl_specifiers, declarator);
> > @@ -26719,6 +26734,15 @@ cp_parser_member_declaration (cp_parser* parser)
> > cp_lexer_consume_token (parser->lexer);
> >   goto out;
> > }
> > + /* Handle class-scope non-template C++17 deduction guides.  */
> > + if (function_declarator_p (declarator)
> > + && !decl_specifiers.type
> > + && !decl_specifiers.any_type_specifiers_p
> > + && ctor_dtor_or_conv_p <= 0
> > + && cxx_dialect >= cxx17)
> 
> Looks like you could factor more of the tests into the new function.

That works nicely, like so?  Bootstrap and regtesting in progress.

-- >8 --

Subject: [PATCH] c++: recognize class-scope non-template dguides [PR79501]

It looks like we still don't recognize class-scope non-template
deduction guides even after r12-2260.   This is because deduction guides
are tagged as such in cp_parser_init_declarator after calling
cp_parser_declarator, but in cp_parser_member_declaration we call
cp_parser_declarator directly.

This patch makes us handle deduction guides

Re: [PATCH] c++: Fix ICE on defaulted spaceship with pointer return type [PR94162]

2021-08-11 Thread Jason Merrill via Gcc-patches


On 8/11/21 10:01 AM, Jakub Jelinek wrote:

On Wed, Aug 11, 2021 at 09:42:56AM -0400, Jason Merrill wrote:

Checking CLASS_TYPE_P && decl_in_std_namespace (TYPE_MAIN_DECL) before
looking at the name makes sense to me.


CLASS_TYPE_P is cheap, but isn't decl_in_std_namespace, especially when
it needs to walk inline namespaces, better done only if we get a match, so
like below?

Though I can do it even in the first if if you think it is better...


Let's do it first; no need to micro-optimize this function.

OK with that change.


2021-08-11  Jakub Jelinek  

gcc/cp/
PR c++/94162
* method.c (cat_tag_for): Return cc_last for !CLASS_TYPE_P
or for classes not in std namespace.
gcc/testsuite/
PR c++/99429
* g++.dg/cpp2a/spaceship-synth11.C: New test.

PR c++/94162
* g++.dg/cpp2a/spaceship-synth-neg6.C: New test.

--- gcc/cp/method.c.jj  2021-08-09 15:03:00.923206463 +0200
+++ gcc/cp/method.c 2021-08-11 15:52:27.157437691 +0200
@@ -1029,10 +1029,13 @@ is_cat (tree type, comp_cat_tag tag)
  static comp_cat_tag
  cat_tag_for (tree type)
  {
+  if (!CLASS_TYPE_P (type))
+return cc_last;
for (int i = 0; i < cc_last; ++i)
  {
comp_cat_tag tag = (comp_cat_tag)i;
-  if (is_cat (type, tag))
+  if (is_cat (type, tag)
+ && decl_in_std_namespace_p (TYPE_MAIN_DECL (type)))
return tag;
  }
return cc_last;
--- gcc/testsuite/g++.dg/cpp2a/spaceship-synth11.C.jj   2021-08-11 
15:49:05.267204333 +0200
+++ gcc/testsuite/g++.dg/cpp2a/spaceship-synth11.C  2021-08-11 
15:49:05.267204333 +0200
@@ -0,0 +1,29 @@
+// PR c++/99429
+// { dg-do compile { target c++20 } }
+
+namespace std {
+struct strong_ordering {
+  int _v;
+  constexpr strong_ordering (int v) :_v(v) {}
+  constexpr operator int (void) const { return _v; }
+  static const strong_ordering less;
+  static const strong_ordering equal;
+  static const strong_ordering greater;
+};
+constexpr strong_ordering strong_ordering::less = -1;
+constexpr strong_ordering strong_ordering::equal = 0;
+constexpr strong_ordering strong_ordering::greater = 1;
+}
+
+template 
+struct duration {
+  static constexpr const long period = N;
+  constexpr duration (void) = default;
+  constexpr duration (const duration& d) = default;
+  constexpr bool operator== (const duration& d) const = default;
+  constexpr bool operator<=> (const duration& d) const = default;
+  long _d;
+};
+
+using nanoseconds = duration<1>;
+using microseconds = duration;
--- gcc/testsuite/g++.dg/cpp2a/spaceship-synth-neg6.C.jj2021-08-11 
15:49:05.268204320 +0200
+++ gcc/testsuite/g++.dg/cpp2a/spaceship-synth-neg6.C   2021-08-11 
15:49:05.268204320 +0200
@@ -0,0 +1,11 @@
+// PR c++/94162
+// { dg-do compile { target c++20 } }
+
+#include 
+
+struct S {
+  int a;   // { dg-error "three-way comparison of 'S::a' has 
type 'std::strong_ordering', which does not convert to 'int\\*'" }
+  int *operator<=>(const S&) const = default;
+};
+
+bool b = S{} < S{}; // { dg-error "use of deleted function 'constexpr int\\* 
S::operator<=>\\\(const S&\\\) const'" }

Jakub

Re: [PATCH] c++: Improve memory usage of subsumption [PR100828]

2021-08-11 Thread Jason Merrill via Gcc-patches


On 8/11/21 10:53 AM, Patrick Palka wrote:

On Wed, 11 Aug 2021, Jason Merrill wrote:


On 8/9/21 5:07 PM, Patrick Palka wrote:

On Wed, Jul 28, 2021 at 4:42 PM Jason Merrill  wrote:


On 7/19/21 6:05 PM, Patrick Palka wrote:

Constraint subsumption is implemented in two steps.  The first step
computes the disjunctive (or conjunctive) normal form of one of the
constraints, and the second step verifies that each clause in the
decomposed form implies the other constraint.   Performing these two
steps separately is problematic because in the first step the
disjunctive normal form can be exponentially larger than the original
constraint, and by computing it ahead of time we'd have to keep all of
it in memory.

This patch fixes this exponential blowup in memory usage by interleaving
these two steps, so that as soon as we decompose one clause we check
implication for it.  In turn, memory usage during subsumption is now
worst case linear in the size of the constraints rather than
exponential, and so we can safely remove the hard limit of 16 clauses
without introducing runaway memory usage on some inputs.  (Note the
_time_ complexity of subsumption is still exponential in the worst
case.)

In order for this to work we need formula::branch to prepend the copy
of the current clause directly after the current clause rather than
at the end of the list, so that we fully decompose a clause shortly
after creating it.  Otherwise we'd end up accumulating exponentially
many (partially decomposed) clauses in memory anyway.

Bootstrapped and regtested on x86_64-pc-linux-gnu, and also tested on
range-v3 and cmcstl2.  Does this look OK for trunk and perhaps 11?


OK for trunk.


Thanks a lot, patch committed to trunk as r12-2658.  Since this low
complexity limit was introduced in GCC 10, what do you think about
increasing the limit from 16 to say 128 in the 10/11 release branches
as a relatively safe stopgap?


Now that 11.2 is out, go ahead and apply this patch to the 11 branch.


Ah great, will do.



Won't a limit of 128 in GCC 10 lead to extremely long compile times for
affected code?  Is that more desirable than an error?


Potentially, though I think that'd be the case only if the original
(normalized) constraint is huge to begin with.  The comment for
max_problem_size says

  /* The largest number of clauses in CNF or DNF we accept as input
 for subsumption. This an upper bound of 2^16 expressions.  */
  static int max_problem_size = 16;

which implies increasing it to 128 would allow for at most 2^128
expressions (clearly unacceptable), but I'm not sure how this upper
bound was obtained.

FWIW I think another upper bound for the number of expressions in the
CNF/DNF is roughly 'max_problem_size * size_of_original_constraint',
since we allow at most 'max_problem_size' clauses in the decomposed form
and each clause is definitely no larger than the original constraint.
So according to this upper bound the dependence on max_problem_size as
it relates to worst-case compile time/memory usage of subsumption is
linear rather than exponential, contrary to the comment.  In that case
increasing the limit from 16 to 128 doesn't seem to be too bad.


Fair, though I would expect anyone writing new concepts code to use GCC 
11.  Up to you.



PR c++/100828

gcc/cp/ChangeLog:

* logic.cc (formula::formula): Use emplace_back.
(formula::branch): Insert a copy of m_current in front of
m_current instead of at the end of the list.
(formula::erase): Define.
(decompose_formula): Remove.
(decompose_antecedents): Remove.
(decompose_consequents): Remove.
(derive_proofs): Remove.
(max_problem_size): Remove.
(diagnose_constraint_size): Remove.
(subsumes_constraints_nonnull): Rewrite directly in terms of
decompose_clause and derive_proof, interleaving decomposition
with implication checking.  Use formula::erase to free the
current clause before moving on to the next one.
---
gcc/cp/logic.cc | 118
++--
1 file changed, 35 insertions(+), 83 deletions(-)

diff --git a/gcc/cp/logic.cc b/gcc/cp/logic.cc
index 142457e408a..3f872c11fe2 100644
--- a/gcc/cp/logic.cc
+++ b/gcc/cp/logic.cc
@@ -223,9 +223,7 @@ struct formula

  formula (tree t)
  {
-/* This should call emplace_back(). There's an extra copy being
-   invoked by using push_back().  */
-m_clauses.push_back (t);
+m_clauses.emplace_back (t);
m_current = m_clauses.begin ();
  }

@@ -248,8 +246,7 @@ struct formula
  clause& branch ()
  {
gcc_assert (!done ());
-m_clauses.push_back (*m_current);
-return m_clauses.back ();
+return *m_clauses.insert (std::next (m_current), *m_current);
  }

  /* Returns the position of the current clause.  */
@@ -287,6 +284,14 @@ struct formula
return m_clauses.end ();
  }

+  /* Remove the specified clause.  */
+
+

Re: [PATCH] c++: recognize class-scope non-template dguides [PR79501]

2021-08-11 Thread Jason Merrill via Gcc-patches


On 8/9/21 1:16 PM, Patrick Palka wrote:

It looks like we still don't recognize class-scope non-template
deduction guides even after r12-2260.   This is because deduction guides
are handled in cp_parser_init_declarator after calling
cp_parser_declarator, but in the class-scope non-template case we call
cp_parser_declarator directly from cp_parser_member_declaration.

This patch makes us handle deduction guides in cp_parser_member_declaration
as well.

Bootstrapped and regtested on x86_64-pc-linux-gnu, does this look OK for
trunk?

PR c++/79501

gcc/cp/ChangeLog:

* parser.c (cp_parser_maybe_adjust_declarator_for_dguide): New,
split out from ...
(cp_parser_init_declarator): ... here.
(cp_parser_member_declaration): Use it.

gcc/testsuite/ChangeLog:

* g++.dg/cpp1z/class-deduction98.C: New test.
---
  gcc/cp/parser.c   | 54 +--
  .../g++.dg/cpp1z/class-deduction98.C  | 10 
  2 files changed, 49 insertions(+), 15 deletions(-)
  create mode 100644 gcc/testsuite/g++.dg/cpp1z/class-deduction98.C

diff --git a/gcc/cp/parser.c b/gcc/cp/parser.c
index d4da25ca703..04fdeec32ab 100644
--- a/gcc/cp/parser.c
+++ b/gcc/cp/parser.c
@@ -21869,6 +21869,30 @@ warn_about_ambiguous_parse (const 
cp_decl_specifier_seq *decl_specifiers,
  }
  }
  
+/* If the function declarator DECLARATOR names a class template, adjust

+   it to name its deduction guides and return true.  Otherwise return false.  
*/
+
+static bool
+cp_parser_maybe_adjust_declarator_for_dguide (cp_parser *parser,
+ cp_declarator *declarator)
+{
+  gcc_assert (function_declarator_p (declarator));
+
+  cp_declarator *id = get_id_declarator (declarator);
+  tree name = id->u.id.unqualified_name;
+  parser->scope = id->u.id.qualifying_scope;
+  tree tmpl = cp_parser_lookup_name_simple (parser, name, id->id_loc);
+  if (tmpl
+  && (DECL_CLASS_TEMPLATE_P (tmpl)
+ || DECL_TEMPLATE_TEMPLATE_PARM_P (tmpl)))
+{
+  id->u.id.unqualified_name = dguide_name (tmpl);
+  id->u.id.sfk = sfk_deduction_guide;
+  return true;
+}
+  return false;
+}
+
  /* Declarators [gram.dcl.decl] */
  
  /* Parse an init-declarator.

@@ -22045,25 +22069,16 @@ cp_parser_init_declarator (cp_parser* parser,
  
if (function_declarator_p (declarator))

  {
-  /* Handle C++17 deduction guides.  */
+  /* Handle C++17 deduction guides.  Note that class-scope
+non-template deduction guides are handled in
+cp_parser_member_declaration.  */
if (!decl_specifiers->type
  && !decl_specifiers->any_type_specifiers_p
  && ctor_dtor_or_conv_p <= 0
  && cxx_dialect >= cxx17)
-   {
- cp_declarator *id = get_id_declarator (declarator);
- tree name = id->u.id.unqualified_name;
- parser->scope = id->u.id.qualifying_scope;
- tree tmpl = cp_parser_lookup_name_simple (parser, name, id->id_loc);
- if (tmpl
- && (DECL_CLASS_TEMPLATE_P (tmpl)
- || DECL_TEMPLATE_TEMPLATE_PARM_P (tmpl)))
-   {
- id->u.id.unqualified_name = dguide_name (tmpl);
- id->u.id.sfk = sfk_deduction_guide;
- ctor_dtor_or_conv_p = 1;
-   }
-   }
+   if (cp_parser_maybe_adjust_declarator_for_dguide (parser,
+ declarator))
+ ctor_dtor_or_conv_p = 1;
  
if (!member_p && !cp_parser_error_occurred (parser))

warn_about_ambiguous_parse (decl_specifiers, declarator);
@@ -26719,6 +26734,15 @@ cp_parser_member_declaration (cp_parser* parser)
cp_lexer_consume_token (parser->lexer);
  goto out;
}
+ /* Handle class-scope non-template C++17 deduction guides.  */
+ if (function_declarator_p (declarator)
+ && !decl_specifiers.type
+ && !decl_specifiers.any_type_specifiers_p
+ && ctor_dtor_or_conv_p <= 0
+ && cxx_dialect >= cxx17)


Looks like you could factor more of the tests into the new function.


+   if (cp_parser_maybe_adjust_declarator_for_dguide (parser,
+ declarator))
+ ctor_dtor_or_conv_p = 1;
  
  	  if (declares_class_or_enum & 2)

cp_parser_check_for_definition_in_return_type
diff --git a/gcc/testsuite/g++.dg/cpp1z/class-deduction98.C 
b/gcc/testsuite/g++.dg/cpp1z/class-deduction98.C
new file mode 100644
index 000..bee0ce433ee
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp1z/class-deduction98.C
@@ -0,0 +1,10 @@
+// PR c++/79501
+// { dg-do compile { target c++17 } }
+
+template
+struct A {
+  template struct B { template B(V); };
+  B(T) -> B;
+};
+
+A::B b(0);

[PATCH 8/8] OpenMP 5.0: [WIP, RFC] Clause ordering for OpenMP 5.0 (topological sorting by base pointer)

2021-08-11 Thread Julian Brown

This patch reimplements the omp_target_reorder_clauses function in
anticipation of supporting "deeper" struct mappings (that is, with
several structure dereference operators, or similar).

The idea is that in place of the (possibly quadratic) algorithm in
omp_target_reorder_clauses that greedily moves clauses containing
addresses that are subexpressions of other addresses before those other
addresses, we employ a topological sort algorithm to calculate a proper
order for map clauses. This should run in linear time, and hopefully
handles degenerate cases where multiple "levels" of indirect accesses
are present on a given directive.

The new method also takes care to keep clause groups together, addressing
the concerns raised in:

  https://gcc.gnu.org/pipermail/gcc-patches/2021-May/570501.html

To figure out if some given clause depends on a base pointer in another
clause, we strip off the outer layers of the address expression, and check
(via a tree_operand_hash hash table we have built) if the result is a
"base pointer" as defined in OpenMP 5.0 (1.2.6 Data Terminology). There
are some subtleties involved, however:

 - We must treat MEM_REF with zero offset the same as INDIRECT_REF.
   This should probably be fixed in the front ends instead so we always
   use a canonical form (probably INDIRECT_REF). The following patch
   shows one instance of the problem, but there may be others:

   https://gcc.gnu.org/pipermail/gcc-patches/2021-May/571382.html

 - Mapping a whole struct implies mapping each of that struct's
   elements, which may be base pointers. Because those base pointers
   aren't necessarily explicitly referenced in the directive in question,
   we treat the whole-struct mapping as a dependency instead.

 - We also need to special-case handling for "*struct_var" (including
   "*this"), so the un-dereferenced variable is treated as the dependency
   -- which feels a bit wrong. (A subsequent GOMP_MAP_POINTER handles the
   pointer itself for those types of mapping, but the current approach
   only processes the first node in each group.)

Jakub, Chung-Lin -- does this approach seem reasonable? Any comments at
this stage?

2021-08-10  Julian Brown  

gcc/
* gimplify.c (is_or_contains_p, omp_target_reorder_clauses): Delete
function.
(omp_tsort_mark): Add enum.
(omp_mapping_group): Add struct.
(omp_get_base_pointer, omp_gather_mapping_groups,
omp_index_mapping_groups, omp_tsort_mapping_groups_1,
omp_tsort_mapping_groups, omp_segregate_mapping_groups,
omp_reorder_mapping_groups): New functions.
(gimplify_scan_omp_clauses): Call above functions instead of
omp_target_reorder_clauses.

gcc/testsuite/
* g++.dg/gomp/target-this-3.C: Adjust expected output.
* g++.dg/gomp/target-this-4.C: Likewise.
---
 gcc/gimplify.c| 598 +++---
 gcc/testsuite/g++.dg/gomp/target-this-3.C |   2 +-
 gcc/testsuite/g++.dg/gomp/target-this-4.C |   2 +-
 3 files changed, 411 insertions(+), 191 deletions(-)

diff --git a/gcc/gimplify.c b/gcc/gimplify.c
index 0ef2dbde710..ca106ef7acf 100644
--- a/gcc/gimplify.c
+++ b/gcc/gimplify.c
@@ -8534,29 +8534,6 @@ extract_base_bit_offset (tree base, tree *base_ind, tree 
*base_ref,
   return base;
 }
 
-/* Returns true if EXPR is or contains (as a sub-component) BASE_PTR.  */
-
-static bool
-is_or_contains_p (tree expr, tree base_ptr)
-{
-  if ((TREE_CODE (expr) == INDIRECT_REF && TREE_CODE (base_ptr) == MEM_REF)
-  || (TREE_CODE (expr) == MEM_REF && TREE_CODE (base_ptr) == INDIRECT_REF))
-return operand_equal_p (TREE_OPERAND (expr, 0),
-   TREE_OPERAND (base_ptr, 0));
-  while (!operand_equal_p (expr, base_ptr))
-{
-  if (TREE_CODE (base_ptr) == COMPOUND_EXPR)
-   base_ptr = TREE_OPERAND (base_ptr, 1);
-  if (TREE_CODE (base_ptr) == COMPONENT_REF
- || TREE_CODE (base_ptr) == POINTER_PLUS_EXPR
- || TREE_CODE (base_ptr) == SAVE_EXPR)
-   base_ptr = TREE_OPERAND (base_ptr, 0);
-  else
-   break;
-}
-  return operand_equal_p (expr, base_ptr);
-}
-
 /* Remove COMPONENT_REFS and indirections from EXPR.  */
 
 static tree
@@ -8599,184 +8576,413 @@ aggregate_base_p (tree expr)
   return false;
 }
 
-/* Implement OpenMP 5.x map ordering rules for target directives. There are
-   several rules, and with some level of ambiguity, hopefully we can at least
-   collect the complexity here in one place.  */
+enum omp_tsort_mark {
+  UNVISITED,
+  TEMPORARY,
+  PERMANENT
+};
+
+struct omp_mapping_group {
+  tree *grp_start;
+  tree grp_end;
+  omp_tsort_mark mark;
+  struct omp_mapping_group *sibling;
+  struct omp_mapping_group *next;
+};
+
+/* Return the OpenMP "base pointer" of an expression EXPR, or NULL if there
+   isn't one.  This needs improvement.  */
+
+static tree
+omp_get_base_pointer (tree expr)
+{
+  while (TREE_CODE (expr) == COMPONENT_REF
+&& (DECL_P (TREE_OPERAND

[PATCH 7/8] OpenACC: Rework indirect struct handling in gimplify.c

2021-08-11 Thread Julian Brown

(Previously posted here:
https://gcc.gnu.org/pipermail/gcc-patches/2021-May/570400.html)

This patch reworks indirect struct handling in gimplify.c (i.e. for
struct components mapped with "mystruct->a[0:n]", "mystruct->b", etc.),
for OpenACC.  The key observation leading to these changes was that
component mappings of references-to-structures is already implemented
and working, and indirect struct component handling via a pointer can
work quite similarly.  That lets us remove some earlier, special-case
handling for mapping indirect struct component accesses for OpenACC,
which required the pointed-to struct to be manually mapped before the
indirect component mapping.

With this patch, you can map struct components directly (e.g. an array
slice "mystruct->a[0:n]") just like you can map a non-indirect struct
component slice ("mystruct.a[0:n]"). Both references-to-pointers (with
the former syntax) and references to structs (with the latter syntax)
work now.

For Fortran class pointers, we no longer re-use GOMP_MAP_TO_PSET for the
class metadata (the structure that points to the class data and vptr)
-- it is instead treated as any other struct.

For C++, the struct handling also works for class members ("this->foo"),
without having to explicitly map "this[:1]" first.

For OpenACC, we permit chained indirect component references
("mystruct->a->b[0:n]"), though only the last part of such mappings will
trigger an attach/detach operation.  To properly use such a construct
on the target, you must still manually map "mystruct->a[:1]" first --
but there's no need to map "mystruct[:1]" explicitly before that.

This version of the patch avoids altering code paths for OpenMP,
where possible.

2021-06-02  Julian Brown  

gcc/fortran/
* trans-openmp.c (gfc_trans_omp_clauses): Don't create GOMP_MAP_TO_PSET
mappings for class metadata, nor GOMP_MAP_POINTER mappings for
POINTER_TYPE_P decls.

gcc/
* gimplify.c (extract_base_bit_offset): Add BASE_IND and OPENMP
parameters.  Handle pointer-typed indirect references for OpenACC
alongside reference-typed ones.
(strip_components_and_deref, aggregate_base_p): New functions.
(build_struct_group): Add pointer type indirect ref handling,
including chained references, for OpenACC.  Also handle references to
structs for OpenACC.  Conditionalise bits for OpenMP only where
appropriate.
(gimplify_scan_omp_clauses): Rework pointer-type indirect structure
access handling to work more like the reference-typed handling for
OpenACC only.
* omp-low.c (scan_sharing_clauses): Handle pointer-type indirect struct
references, and references to pointers to structs also.

gcc/testsuite/
* g++.dg/goacc/member-array-acc.C: New test.
* g++.dg/gomp/member-array-omp.C: New test.

libgomp/
* testsuite/libgomp.oacc-c-c++-common/deep-copy-15.c: New test.
* testsuite/libgomp.oacc-c-c++-common/deep-copy-16.c: New test.
* testsuite/libgomp.oacc-c++/deep-copy-17.C: New test.
---
 gcc/fortran/trans-openmp.c|  20 +-
 gcc/gimplify.c| 214 +---
 gcc/omp-low.c |  16 +-
 gcc/testsuite/g++.dg/goacc/member-array-acc.C |  13 +
 gcc/testsuite/g++.dg/gomp/member-array-omp.C  |  13 +
 .../testsuite/libgomp.oacc-c++/deep-copy-17.C | 101 
 .../libgomp.oacc-c-c++-common/deep-copy-15.c  |  68 ++
 .../libgomp.oacc-c-c++-common/deep-copy-16.c  | 231 ++
 8 files changed, 618 insertions(+), 58 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/goacc/member-array-acc.C
 create mode 100644 gcc/testsuite/g++.dg/gomp/member-array-omp.C
 create mode 100644 libgomp/testsuite/libgomp.oacc-c++/deep-copy-17.C
 create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-15.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-16.c

diff --git a/gcc/fortran/trans-openmp.c b/gcc/fortran/trans-openmp.c
index 9dc2b6fc6a5..ff2058e9571 100644
--- a/gcc/fortran/trans-openmp.c
+++ b/gcc/fortran/trans-openmp.c
@@ -3032,30 +3032,16 @@ gfc_trans_omp_clauses (stmtblock_t *block, 
gfc_omp_clauses *clauses,
  tree present = gfc_omp_check_optional_argument (decl, true);
  if (openacc && n->sym->ts.type == BT_CLASS)
{
- tree type = TREE_TYPE (decl);
  if (n->sym->attr.optional)
sorry ("optional class parameter");
- if (POINTER_TYPE_P (type))
-   {
- node4 = build_omp_clause (input_location,
-   OMP_CLAUSE_MAP);
- OMP_CLAUSE_SET_MAP_KIND (node4, GOMP_MAP_POINTER);
- OMP_CLAUSE_DECL (node4) = decl;
- OMP_CLAUSE_SIZE (node4) = size_int (0);
-

[PATCH 6/8] OpenACC/OpenMP: Refactor struct lowering in gimplify.c

2021-08-11 Thread Julian Brown

(Previously submitted here:
https://gcc.gnu.org/pipermail/gcc-patches/2021-May/570398.html)

This patch is a second attempt at refactoring struct component mapping
handling for OpenACC/OpenMP during gimplification, after the patch I
posted here:

  https://gcc.gnu.org/pipermail/gcc-patches/2018-November/510503.html

And improved here, post-review:

  https://gcc.gnu.org/pipermail/gcc-patches/2019-November/533394.html

This patch goes further, in that the struct-handling code is outlined
into its own function (to create the "GOMP_MAP_STRUCT" node and the
sorted list of nodes immediately following it, from a set of mappings
of components of a given struct or derived type). I've also gone through
the list-handling code and attempted to add comments documenting how it
works to the best of my understanding, and broken out a couple of helper
functions in order to (hopefully) have the code self-document better also.

2021-06-02  Julian Brown  

gcc/
* gimplify.c (insert_struct_comp_map): Refactor function into...
(build_struct_comp_nodes): This new function.  Remove list handling
and improve self-documentation.
(insert_node_after, move_node_after, move_nodes_after,
move_concat_nodes_after): New helper functions.
(build_struct_group): New function to build up GOMP_MAP_STRUCT node
groups to map struct components. Outlined from...
(gimplify_scan_omp_clauses): Here.  Call above function.
---
 gcc/gimplify.c | 975 +++--
 1 file changed, 611 insertions(+), 364 deletions(-)

diff --git a/gcc/gimplify.c b/gcc/gimplify.c
index 974d25b2d05..8558dda079f 100644
--- a/gcc/gimplify.c
+++ b/gcc/gimplify.c
@@ -8403,73 +8403,66 @@ gimplify_omp_depend (tree *list_p, gimple_seq *pre_p)
   return 1;
 }
 
-/* Insert a GOMP_MAP_ALLOC or GOMP_MAP_RELEASE node following a
-   GOMP_MAP_STRUCT mapping.  C is an always_pointer mapping.  STRUCT_NODE is
-   the struct node to insert the new mapping after (when the struct node is
-   initially created).  PREV_NODE is the first of two or three mappings for a
-   pointer, and is either:
- - the node before C, when a pair of mappings is used, e.g. for a C/C++
-   array section.
- - not the node before C.  This is true when we have a reference-to-pointer
-   type (with a mapping for the reference and for the pointer), or for
-   Fortran derived-type mappings with a GOMP_MAP_TO_PSET.
-   If SCP is non-null, the new node is inserted before *SCP.
-   if SCP is null, the new node is inserted before PREV_NODE.
-   The return type is:
- - PREV_NODE, if SCP is non-null.
- - The newly-created ALLOC or RELEASE node, if SCP is null.
- - The second newly-created ALLOC or RELEASE node, if we are mapping a
-   reference to a pointer.  */
+/* For a set of mappings describing an array section pointed to by a struct
+   (or derived type, etc.) component, create an "alloc" or "release" node to
+   insert into a list following a GOMP_MAP_STRUCT node.  For some types of
+   mapping (e.g. Fortran arrays with descriptors), an additional mapping may
+   be created that is inserted into the list of mapping nodes attached to the
+   directive being processed -- not part of the sorted list of nodes after
+   GOMP_MAP_STRUCT.
+
+   CODE is the code of the directive being processed.  GRP_START and GRP_END
+   are the first and last of two or three nodes representing this array section
+   mapping (e.g. a data movement node like GOMP_MAP_{TO,FROM}, optionally a
+   GOMP_MAP_TO_PSET, and finally a GOMP_MAP_ALWAYS_POINTER).  EXTRA_NODE is
+   filled with the additional node described above, if needed.
+
+   This function does not add the new nodes to any lists itself.  It is the
+   responsibility of the caller to do that.  */
 
 static tree
-insert_struct_comp_map (enum tree_code code, tree c, tree struct_node,
-   tree prev_node, tree *scp)
+build_struct_comp_nodes (enum tree_code code, tree grp_start, tree grp_end,
+tree *extra_node)
 {
   enum gomp_map_kind mkind
 = (code == OMP_TARGET_EXIT_DATA || code == OACC_EXIT_DATA)
   ? GOMP_MAP_RELEASE : GOMP_MAP_ALLOC;
 
-  tree c2 = build_omp_clause (OMP_CLAUSE_LOCATION (c), OMP_CLAUSE_MAP);
-  tree cl = scp ? prev_node : c2;
+  gcc_assert (grp_start != grp_end);
+
+  tree c2 = build_omp_clause (OMP_CLAUSE_LOCATION (grp_end), OMP_CLAUSE_MAP);
   OMP_CLAUSE_SET_MAP_KIND (c2, mkind);
-  OMP_CLAUSE_DECL (c2) = unshare_expr (OMP_CLAUSE_DECL (c));
-  OMP_CLAUSE_CHAIN (c2) = scp ? *scp : prev_node;
-  if (OMP_CLAUSE_CHAIN (prev_node) != c
-  && OMP_CLAUSE_CODE (OMP_CLAUSE_CHAIN (prev_node)) == OMP_CLAUSE_MAP
-  && (OMP_CLAUSE_MAP_KIND (OMP_CLAUSE_CHAIN (prev_node))
- == GOMP_MAP_TO_PSET))
-OMP_CLAUSE_SIZE (c2) = OMP_CLAUSE_SIZE (OMP_CLAUSE_CHAIN (prev_node));
+  OMP_CLAUSE_DECL (c2) = unshare_expr (OMP_CLAUSE_DECL (grp_end));
+  OMP_CLAUSE_CHAIN (c2) = NULL_TREE;
+  tree

[PATCH 5/8] OpenMP/OpenACC: Move array_ref/indirect_ref handling code out of extract_base_bit_offset

2021-08-11 Thread Julian Brown

This patch slightly cleans up the semantics of extract_base_bit_offset,
in that the stripping of ARRAY_REFS/INDIRECT_REFS out of
extract_base_bit_offset is moved back into the (two) call sites of the
function. This is done in preparation for follow-on patches that extend
the function.

Previously posted for the og11 branch here (patch & reversion/rework):

  https://gcc.gnu.org/pipermail/gcc-patches/2021-June/571712.html
  https://gcc.gnu.org/pipermail/gcc-patches/2021-June/571884.html

2021-06-03  Julian Brown  

gcc/
* gimplify.c (extract_base_bit_offset): Don't look through ARRAY_REFs or
INDIRECT_REFs here.
(build_struct_group): Reinstate previous behaviour for handling
ARRAY_REFs/INDIRECT_REFs.
---
 gcc/gimplify.c | 59 +-
 1 file changed, 29 insertions(+), 30 deletions(-)

diff --git a/gcc/gimplify.c b/gcc/gimplify.c
index 141ef6b2b1e..974d25b2d05 100644
--- a/gcc/gimplify.c
+++ b/gcc/gimplify.c
@@ -8490,31 +8490,7 @@ extract_base_bit_offset (tree base, tree *base_ref, 
poly_int64 *bitposp,
   poly_offset_int poffset;
 
   if (base_ref)
-{
-  *base_ref = NULL_TREE;
-
-  while (TREE_CODE (base) == ARRAY_REF)
-   base = TREE_OPERAND (base, 0);
-
-  if (TREE_CODE (base) == INDIRECT_REF)
-   base = TREE_OPERAND (base, 0);
-}
-  else
-{
-  if (TREE_CODE (base) == ARRAY_REF)
-   {
- while (TREE_CODE (base) == ARRAY_REF)
-   base = TREE_OPERAND (base, 0);
- if (TREE_CODE (base) != COMPONENT_REF
- || TREE_CODE (TREE_TYPE (base)) != ARRAY_TYPE)
-   return NULL_TREE;
-   }
-  else if (TREE_CODE (base) == INDIRECT_REF
-  && TREE_CODE (TREE_OPERAND (base, 0)) == COMPONENT_REF
-  && (TREE_CODE (TREE_TYPE (TREE_OPERAND (base, 0)))
-  == REFERENCE_TYPE))
-   base = TREE_OPERAND (base, 0);
-}
+*base_ref = NULL_TREE;
 
   base = get_inner_reference (base, , , , ,
  , , );
@@ -9482,12 +9458,17 @@ gimplify_scan_omp_clauses (tree *list_p, gimple_seq 
*pre_p,
  poly_offset_int offset1;
  poly_int64 bitpos1;
  tree tree_offset1;
- tree base_ref;
+ tree base_ref, ocd = OMP_CLAUSE_DECL (c);
 
- tree base
-   = extract_base_bit_offset (OMP_CLAUSE_DECL (c), _ref,
-  , ,
-  _offset1);
+ while (TREE_CODE (ocd) == ARRAY_REF)
+   ocd = TREE_OPERAND (ocd, 0);
+
+ if (TREE_CODE (ocd) == INDIRECT_REF)
+   ocd = TREE_OPERAND (ocd, 0);
+
+ tree base = extract_base_bit_offset (ocd, _ref,
+  , ,
+  _offset1);
 
  bool do_map_struct = (base == decl && !tree_offset1);
 
@@ -9679,6 +9660,24 @@ gimplify_scan_omp_clauses (tree *list_p, gimple_seq 
*pre_p,
poly_offset_int offsetn;
poly_int64 bitposn;
tree tree_offsetn;
+
+   if (TREE_CODE (sc_decl) == ARRAY_REF)
+ {
+   while (TREE_CODE (sc_decl) == ARRAY_REF)
+ sc_decl = TREE_OPERAND (sc_decl, 0);
+   if (TREE_CODE (sc_decl) != COMPONENT_REF
+   || (TREE_CODE (TREE_TYPE (sc_decl))
+   != ARRAY_TYPE))
+ break;
+ }
+   else if (TREE_CODE (sc_decl) == INDIRECT_REF
+&& (TREE_CODE (TREE_OPERAND (sc_decl, 0))
+== COMPONENT_REF)
+&& (TREE_CODE (TREE_TYPE
+ (TREE_OPERAND (sc_decl, 0)))
+== REFERENCE_TYPE))
+ sc_decl = TREE_OPERAND (sc_decl, 0);
+
tree base
  = extract_base_bit_offset (sc_decl, NULL,
 , ,
-- 
2.29.2

[PATCH 3/8] Remove array section base-pointer mapping semantics, and other front-end adjustments (mainline trunk)

2021-08-11 Thread Julian Brown

From: Chung-Lin Tang 

This is a version of a patch by Chung-Lin, merged to current mainline. Any
errors introduced are my own! It was previously posted here:

https://gcc.gnu.org/pipermail/gcc-patches/2021-May/571195.html

Chung-Lin's description from the previous submission follows (edited a
little for formatting).

This is a version of this patch:
https://gcc.gnu.org/pipermail/gcc-patches/2021-May/570075.html
for mainline trunk.

This patch largely implements three pieces of functionality:

(1) Per discussion and clarification on the omp-lang mailing list,
standards conforming behavior for mapping array sections should *NOT*
also map the base-pointer, i.e for this code:

 struct S { int *ptr; ... };
 struct S s;
 #pragma omp target enter data map(to: s.ptr[:100])

Currently we generate after gimplify:
  map(to:*_1 [len: 400]) map(attach:s.ptr [bias: 0])

which is deemed incorrect. After this patch, the gimplify results are
now adjusted to:
(the attach operation is still generated, and if s.ptr is already mapped
prior, attachment will happen)

The correct way of achieving the base-pointer-also-mapped behavior would
be to use:

This adjustment in behavior required a number of small adjustments here
and there in gimplify, including to accomodate map sequences for C++
references.

There is also a small Fortran front-end patch involved (hence
CCing Tobias and fortran@).  The new gimplify processing changed
behavior in handling GOMP_MAP_ALWAYS_POINTER maps such that the
libgomp.fortran/struct-elem-map-1.f90 regressed. It appeared
that the Fortran FE was generating a GOMP_MAP_ALWAYS_POINTER for
array types, which didn't seem quite correct, and the pre-patch
behavior was removing this map anyways. I have a small change in
trans-openmp.c:gfc_trans_omp_array_section to not generate the map in
this case, and so far no bad test results.

(2) The second part (though kind of related to the first above) are fixes
in libgomp/target.c to not overwrite attached pointers when handling
device<->host copies, mainly for the "always" case.  This behavior is
also noted in the 5.0 spec, but not yet properly coded before.

(3) The third is a set of changes to the C/C++ front-ends to extend the
allowed component access syntax in map clauses. This is actually mainly
an effort to allow SPEC HPC to compile, so despite in the long term
the entire map clause syntax parsing is probably going to be revamped,
we're still adding this in for now. These changes are enabled for both
OpenACC and OpenMP.

Tested on x86_64-linux with nvptx offloading with no regressions. This
patch was merged and tested atop of the prior submitted patches:

  (a) https://gcc.gnu.org/pipermail/gcc-patches/2021-May/570886.html
  "[PATCH, OpenMP 5.0] Improve OpenMP target support for C++ (includes 
PR92120 v3)"
  (b) https://gcc.gnu.org/pipermail/gcc-patches/2021-May/570365.html
  "[PATCH, OpenMP 5.0] Implement relaxation of implicit map vs. existing 
device mappings (for mainline trunk)"

so you might queued this one later than those for review.

Thanks,
Chung-Lin

2021-05-25  Chung-Lin Tang  

gcc/c/ChangeLog:

* c-parser.c (struct omp_dim): New struct type for use inside
c_parser_omp_variable_list.
(c_parser_omp_variable_list): Allow multiple levels of array and
component accesses in array section base-pointer expression.
(c_parser_omp_clause_to): Set 'allow_deref' to true in call to
c_parser_omp_var_list_parens.
(c_parser_omp_clause_from): Likewise.
* c-typeck.c (handle_omp_array_sections_1): Extend allowed range
of base-pointer expressions involving INDIRECT/MEM/ARRAY_REF and
POINTER_PLUS_EXPR.
(c_finish_omp_clauses): Extend allowed ranged of expressions
involving INDIRECT/MEM/ARRAY_REF and POINTER_PLUS_EXPR.

gcc/cp/ChangeLog:

* parser.c (struct omp_dim): New struct type for use inside
cp_parser_omp_var_list_no_open.
(cp_parser_omp_var_list_no_open): Allow multiple levels of array and
component accesses in array section base-pointer expression.
(cp_parser_omp_all_clauses): Set 'allow_deref' to true in call to
cp_parser_omp_var_list for to/from clauses.
* semantics.c (handle_omp_array_sections_1): Extend allowed range
of base-pointer expressions involving INDIRECT/MEM/ARRAY_REF and
POINTER_PLUS_EXPR.
(handle_omp_array_sections): Adjust pointer map generation of
references.
(finish_omp_clauses): Extend allowed ranged of expressions
involving INDIRECT/MEM/ARRAY_REF and POINTER_PLUS_EXPR.

gcc/fortran/ChangeLog:

* trans-openmp.c (gfc_trans_omp_array_section): Do not generate
GOMP_MAP_ALWAYS_POINTER map for main array maps of ARRAY_TYPE type.

gcc/ChangeLog:

* gimplify.c (extract_base_bit_offset): Add 'tree *offsetp' parameter,
accomodate case where 'offset' return of

[PATCH 4/8] Rewrite GOMP_MAP_ATTACH_DETACH mappings unconditionally

2021-08-11 Thread Julian Brown

It never makes sense for a GOMP_MAP_ATTACH_DETACH mapping to survive
beyond gimplify.c, so this patch rewrites such mappings to GOMP_MAP_ATTACH
or GOMP_MAP_DETACH unconditionally (rather than checking for a list
of types of OpenACC or OpenMP constructs), in cases where it hasn't
otherwise been done already in the preceding code.

Previously posted here:

  https://gcc.gnu.org/pipermail/gcc-patches/2021-May/570399.html
  https://gcc.gnu.org/pipermail/gcc-patches/2021-June/571711.html (og11)

2021-06-02  Julian Brown  

gcc/
* gimplify.c (gimplify_scan_omp_clauses): Simplify condition
for changing GOMP_MAP_ATTACH_DETACH to GOMP_MAP_ATTACH or
GOMP_MAP_DETACH.
---
 gcc/gimplify.c | 10 +-
 1 file changed, 1 insertion(+), 9 deletions(-)

diff --git a/gcc/gimplify.c b/gcc/gimplify.c
index fb35d240b34..141ef6b2b1e 100644
--- a/gcc/gimplify.c
+++ b/gcc/gimplify.c
@@ -9773,15 +9773,7 @@ gimplify_scan_omp_clauses (tree *list_p, gimple_seq 
*pre_p,
skip_map_struct:
  ;
}
- else if ((code == OACC_ENTER_DATA
-   || code == OACC_EXIT_DATA
-   || code == OACC_DATA
-   || code == OACC_PARALLEL
-   || code == OACC_KERNELS
-   || code == OACC_SERIAL
-   || code == OMP_TARGET_ENTER_DATA
-   || code == OMP_TARGET_EXIT_DATA)
-  && OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_ATTACH_DETACH)
+ else if (OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_ATTACH_DETACH)
{
  gomp_map_kind k = ((code == OACC_EXIT_DATA
  || code == OMP_TARGET_EXIT_DATA)
-- 
2.29.2

[PATCH 1/8] Improve OpenMP target support for C++ [PR92120 v4b]

2021-08-11 Thread Julian Brown

From: Chung-Lin Tang 

This is a version "v4b" of a patch by Chung-Lin, merged to current
mainline. All errors introduced are my own! Previously posted here:

https://gcc.gnu.org/pipermail/gcc-patches/2021-June/573166.html

Chung-Lin's description from the last submission follows.

This patch is the "v4" version of my PR92120 patch, v3 was here:
https://gcc.gnu.org/pipermail/gcc-patches/2021-May/570886.html

(there I listed the various patches from devel/omp/gcc-10 branch that
was combined, which I won't repeat here).

Basically this v4 adds fixes for lambda capture, which was already
pushed to devel/omp/gcc-11 yesterday:

https://gcc.gnu.org/pipermail/gcc-patches/2021-June/572988.html

Thanks,
Chung-Lin

gcc/cp/
* cp-tree.h (finish_omp_target): New declaration.
(finish_omp_target_clauses): Likewise.
* parser.c (cp_parser_omp_clause_map): Adjust call to
cp_parser_omp_var_list_no_open to set 'allow_deref' argument to true.
(cp_parser_omp_target): Factor out code, adjust into calls to new
function finish_omp_target.
* pt.c (tsubst_expr): Add call to finish_omp_target_clauses for
OMP_TARGET case.
* semantics.c (handle_omp_array_sections_1): Add handling to create
'this->member' from 'member' FIELD_DECL.
(handle_omp_array_sections): Likewise.
(finish_omp_clauses): Likewise. Adjust to allow 'this[]' in OpenMP
map clauses. Handle 'A->member' case in map clauses.
(struct omp_target_walk_data): New struct for walking over
target-directive tree body.
(finish_omp_target_clauses_r): New function for tree walk.
(finish_omp_target_clauses): New function.
(finish_omp_target): New function.

gcc/c/
* c-parser.c (c_parser_omp_clause_map): Set 'allow_deref' argument in
call to c_parser_omp_variable_list to 'true'.
* c-typeck.c (handle_omp_array_sections_1): Add strip of MEM_REF in
array base handling.
(c_finish_omp_clauses): Handle 'A->member' case in map clauses.

gcc/
* gimplify.c ("tree-hash-traits.h"): Add include.
(gimplify_scan_omp_clauses): Change struct_map_to_clause to type
hash_map *. Adjust struct map handling to handle
cases of *A and A->B expressions. Under !DECL_P case of
GOMP_CLAUSE_MAP handling, add STRIP_NOPS for indir_p case, add to
struct_deref_set for map(*ptr_to_struct) cases. Add MEM_REF case when
handling component_ref_p case. Add unshare_expr and gimplification
when created GOMP_MAP_STRUCT is not a DECL. Add code to add
firstprivate pointer for *pointer-to-struct case.
(gimplify_adjust_omp_clauses): Move GOMP_MAP_STRUCT removal code for
exit data directives code to earlier position.
* omp-low.c (lower_omp_target):
Handle GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION, and
GOMP_MAP_POINTER_TO_ZERO_LENGTH_ARRAY_SECTION map kinds.
* tree-pretty-print.c (dump_omp_clause): Likewise.

gcc/testsuite/
* gcc.dg/gomp/target-3.c: New testcase.
* g++.dg/gomp/target-3.C: New testcase.
* g++.dg/gomp/target-lambda-1.C: New testcase.
* g++.dg/gomp/target-lambda-2.C: New testcase.
* g++.dg/gomp/target-this-1.C: New testcase.
* g++.dg/gomp/target-this-2.C: New testcase.
* g++.dg/gomp/target-this-3.C: New testcase.
* g++.dg/gomp/target-this-4.C: New testcase.
* g++.dg/gomp/target-this-5.C: New testcase.
* g++.dg/gomp/this-2.C: Adjust testcase.

include/
* gomp-constants.h (enum gomp_map_kind):
Add GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION, and
GOMP_MAP_POINTER_TO_ZERO_LENGTH_ARRAY_SECTION map kinds.
(GOMP_MAP_POINTER_P):
Include GOMP_MAP_POINTER_TO_ZERO_LENGTH_ARRAY_SECTION.

libgomp/
* libgomp.h (gomp_attach_pointer): Add bool parameter.
* oacc-mem.c (acc_attach_async): Update call to gomp_attach_pointer.
(goacc_enter_data_internal): Likewise.
* target.c (gomp_map_vars_existing): Update assert condition to
include GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION.
(gomp_map_pointer): Add 'bool allow_zero_length_array_sections'
parameter, add support for mapping a pointer with NULL target.
(gomp_attach_pointer): Add 'bool allow_zero_length_array_sections'
parameter, add support for attaching a pointer with NULL target.
(gomp_map_vars_internal): Update calls to gomp_map_pointer and
gomp_attach_pointer, add handling for
GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION, and
GOMP_MAP_POINTER_TO_ZERO_LENGTH_ARRAY_SECTION cases.
* testsuite/libgomp.c++/target-23.C: New testcase.
* testsuite/libgomp.c++/target-lambda-1.C: New testcase.
* testsuite/libgomp.c++/target-lambda-2.C: New testcase.
* testsuite/libgomp.c++/target-this-1.C: New testcase.

[PATCH 2/8] OpenMP 5.0: Implement relaxation of implicit map vs. existing device mappings

2021-08-11 Thread Julian Brown

From: Chung-Lin Tang 

This is a version of a patch by Chung-Lin, merged to current mainline. Any
errors introduced are my own! It was previously posted here:

https://gcc.gnu.org/pipermail/gcc-patches/2021-May/570365.html

Chung-Lin's description from the previous submission follows. (Edit:
it seems that I may have picked up the "non-mainline" version of this
patch to merge, but I think the only relevant differences are in the
included tests.)

This patch implements relaxing the requirements when a map with the implicit
attribute encounters an overlapping existing map. As the OpenMP 5.0 spec
describes on page 320, lines 18-27 (and 5.1 spec, page 352, lines 13-22):

"If a single contiguous part of the original storage of a list item with an
 implicit data-mapping attribute has corresponding storage in the device data
 environment prior to a task encountering the construct that is associated with
 the map clause, only that part of the original storage will have corresponding
 storage in the device data environment as a result of the map clause."

Also tracked in the OpenMP spec context as issue #1463:
https://github.com/OpenMP/spec/issues/1463

2021-05-05  Chung-Lin Tang  

include/ChangeLog:

* gomp-constants.h (GOMP_MAP_IMPLICIT): New special map kind bits value.
(GOMP_MAP_FLAG_SPECIAL_BITS): Define helper mask for whole set of
special map kind bits.
(GOMP_MAP_NONCONTIG_ARRAY_P): Adjust test for non-contiguous array map
kind bits to be more specific.
(GOMP_MAP_IMPLICIT_P): New predicate macro for implicit map kinds.

gcc/ChangeLog:

* tree.h (OMP_CLAUSE_MAP_IMPLICIT_P): New access macro for 'implicit'
bit, using 'base.deprecated_flag' field of tree_node.
* tree-pretty-print.c (dump_omp_clause): Add support for printing
implicit attribute in tree dumping.
* gimplify.c (gimplify_adjust_omp_clauses_1):
Set OMP_CLAUSE_MAP_IMPLICIT_P to 1 if map clause is implicitly created.
(gimplify_adjust_omp_clauses): Adjust place of adding implicitly created
clauses, from simple append, to starting of list, after non-map clauses.
* omp-low.c (lower_omp_target): Add GOMP_MAP_IMPLICIT bits into kind
values passed to libgomp for implicit maps.

gcc/testsuite/ChangeLog:

* c-c++-common/gomp/target-implicit-map-1.c: New test.
* c-c++-common/goacc/combined-reduction.c: Adjust scan test pattern.
* c-c++-common/goacc/firstprivate-mappings-1.c: Likewise.
* c-c++-common/goacc/mdc-1.c: Likewise.
* c-c++-common/goacc/reduction-1.c: Likewise.
* c-c++-common/goacc/reduction-2.c: Likewise.
* c-c++-common/goacc/reduction-3.c: Likewise.
* c-c++-common/goacc/reduction-4.c: Likewise.
* c-c++-common/goacc/reduction-8.c: Likewise.
* g++.dg/goacc/firstprivate-mappings-1.C: Likewise.
* g++.dg/gomp/target-lambda-1.C: Likewise.
* g++.dg/gomp/target-this-3.C: Likewise.
* g++.dg/gomp/target-this-4.C: Likewise.
* gfortran.dg/goacc/common-block-3.f90: Likewise.
* gfortran.dg/goacc/loop-tree-1.f90: Likewise.
* gfortran.dg/goacc/private-explicit-kernels-1.f95: Likewise.
* gfortran.dg/goacc/private-predetermined-kernels-1.f95: Likewise.

libgomp/ChangeLog:

* target.c (gomp_map_vars_existing): Add 'bool implicit' parameter, add
implicit map handling to allow a "superset" existing map as valid case.
(get_kind): Adjust to filter out GOMP_MAP_IMPLICIT bits in return value.
(get_implicit): New function to extract implicit status.
(gomp_map_fields_existing): Adjust arguments in calls to
gomp_map_vars_existing, and add uses of get_implicit.
(gomp_map_vars_internal): Likewise.

* testsuite/libgomp.c-c++-common/target-implicit-map-1.c: New test.
---
 gcc/gimplify.c| 11 ++-
 gcc/omp-low.c | 13 
 .../c-c++-common/goacc/combined-reduction.c   |  4 +-
 .../goacc/firstprivate-mappings-1.c   |  6 +-
 gcc/testsuite/c-c++-common/goacc/mdc-1.c  |  2 +-
 .../c-c++-common/goacc/reduction-1.c  |  4 +-
 .../c-c++-common/goacc/reduction-2.c  |  4 +-
 .../c-c++-common/goacc/reduction-3.c  |  4 +-
 .../c-c++-common/goacc/reduction-4.c  |  4 +-
 .../c-c++-common/gomp/target-implicit-map-1.c | 39 ++
 .../g++.dg/goacc/firstprivate-mappings-1.C|  2 +-
 gcc/testsuite/g++.dg/gomp/target-lambda-1.C   |  6 +-
 gcc/testsuite/g++.dg/gomp/target-this-3.C |  4 +-
 gcc/testsuite/g++.dg/gomp/target-this-4.C |  4 +-
 .../gfortran.dg/goacc/common-block-3.f90  |  8 +-
 .../gfortran.dg/goacc/loop-tree-1.f90 |  2 +-
 .../goacc/private-explicit-kernels-1.f95  |  4 +-
 .../goacc/private-predetermined-kernels-1.f95 |  4 +-
 gcc/tree-pretty-print.c   |  3 +
 gcc/tree.h|  5 ++

[PATCH 0/8] OpenMP 5.0: Topological sort for OpenMP 5.0 base pointers

2021-08-11 Thread Julian Brown

Hi,

This patch series contains a reimplementation of the clause-ordering code
in gimplify.c (omp_target_reorder_clauses), in anticipation of extending
support for lvalues in mapping clauses for OpenMP 5.0. This builds (or
will build) on a series of patches by Chung-Lin and myself that haven't
completed review for mainline yet: I've included these for context,
but I'm not proposing "taking ownership" of Chung-Lin's patches -- my
assumption is that those patches will make it upstream without fundamental
changes, so the work built on top of them will still be largely valid.

(The merges of Chung-Lin's patches on top of current
mainline aren't entirely trivial because of a clash with
https://gcc.gnu.org/pipermail/gcc-patches/2021-June/573600.html.
Hopefully my merge attempt helps, though I've probably made mistakes in
parts of the code I don't fully understand!).

Further commentary on individual patches -- particularly the 8th, which
is intended as an RFC, and is the main reason for posting this series.

Tested with offloading to NVPTX, and bootstrapped.

Thanks,

Julian

Chung-Lin Tang (3):
  Improve OpenMP target support for C++ [PR92120 v4b]
  OpenMP 5.0: Implement relaxation of implicit map vs. existing device
mappings
  Remove array section base-pointer mapping semantics, and other
front-end adjustments (mainline trunk)

Julian Brown (5):
  Rewrite GOMP_MAP_ATTACH_DETACH mappings unconditionally
  OpenMP/OpenACC: Move array_ref/indirect_ref handling code out of
extract_base_bit_offset
  OpenACC/OpenMP: Refactor struct lowering in gimplify.c
  OpenACC: Rework indirect struct handling in gimplify.c
  OpenMP 5.0: [WIP, RFC] Clause ordering for OpenMP 5.0 (topological
sorting by base pointer)

 gcc/c/c-parser.c  |   57 +-
 gcc/c/c-typeck.c  |  103 +-
 gcc/cp/cp-tree.h  |2 +
 gcc/cp/parser.c   |  126 +-
 gcc/cp/pt.c   |5 +
 gcc/cp/semantics.c|  677 ++-
 gcc/fortran/trans-openmp.c|   23 +-
 gcc/gimplify.c| 1665 +
 gcc/omp-low.c |   31 +-
 .../c-c++-common/goacc/combined-reduction.c   |4 +-
 .../goacc/deep-copy-arrayofstruct.c   |5 +-
 .../goacc/firstprivate-mappings-1.c   |6 +-
 gcc/testsuite/c-c++-common/goacc/mdc-1.c  |2 +-
 .../c-c++-common/goacc/reduction-1.c  |4 +-
 .../c-c++-common/goacc/reduction-2.c  |4 +-
 .../c-c++-common/goacc/reduction-3.c  |4 +-
 .../c-c++-common/goacc/reduction-4.c  |4 +-
 .../c-c++-common/gomp/target-enter-data-1.c   |   24 +
 .../c-c++-common/gomp/target-implicit-map-1.c |   39 +
 .../c-c++-common/gomp/target-implicit-map-2.c |   52 +
 .../g++.dg/goacc/firstprivate-mappings-1.C|2 +-
 gcc/testsuite/g++.dg/goacc/member-array-acc.C |   13 +
 gcc/testsuite/g++.dg/gomp/member-array-omp.C  |   13 +
 gcc/testsuite/g++.dg/gomp/target-3.C  |   36 +
 gcc/testsuite/g++.dg/gomp/target-lambda-1.C   |   94 +
 gcc/testsuite/g++.dg/gomp/target-lambda-2.C   |   35 +
 gcc/testsuite/g++.dg/gomp/target-this-1.C |   33 +
 gcc/testsuite/g++.dg/gomp/target-this-2.C |   49 +
 gcc/testsuite/g++.dg/gomp/target-this-3.C |  105 ++
 gcc/testsuite/g++.dg/gomp/target-this-4.C |  107 ++
 gcc/testsuite/g++.dg/gomp/target-this-5.C |   34 +
 gcc/testsuite/g++.dg/gomp/this-2.C|   24 +-
 gcc/testsuite/gcc.dg/gomp/target-3.c  |   16 +
 .../gfortran.dg/goacc/common-block-3.f90  |8 +-
 .../gfortran.dg/goacc/loop-tree-1.f90 |2 +-
 .../goacc/private-explicit-kernels-1.f95  |4 +-
 .../goacc/private-predetermined-kernels-1.f95 |4 +-
 gcc/tree-pretty-print.c   |   11 +
 gcc/tree.h|5 +
 include/gomp-constants.h  |   27 +-
 libgomp/libgomp.h |2 +-
 libgomp/oacc-mem.c|7 +-
 libgomp/target.c  |  246 ++-
 libgomp/testsuite/libgomp.c++/target-11.C |   14 +-
 libgomp/testsuite/libgomp.c++/target-12.C |2 +-
 libgomp/testsuite/libgomp.c++/target-15.C |   20 +-
 libgomp/testsuite/libgomp.c++/target-16.C |   20 +-
 libgomp/testsuite/libgomp.c++/target-17.C |   20 +-
 libgomp/testsuite/libgomp.c++/target-21.C |8 +-
 libgomp/testsuite/libgomp.c++/target-23.C |   34 +
 .../testsuite/libgomp.c++/target-lambda-1.C   |   86 +
 .../testsuite/libgomp.c++/target-lambda-2.C   |   30 +
 libgomp/testsuite/libgomp.c++/target-this-1.C |   29 +
 libgomp/testsuite/libgomp.c++/target-this-2.C |   47 +
 libgomp/testsuite/libgomp.c++/target-this-3.C |   99 +
 libgomp/testsuite/libgomp.c++/target-this-4.C |  104 +
 libgomp/testsuite/libgomp.c++/target-this-5.C |   30 +

Re: [patch][version 6] add -ftrivial-auto-var-init and variable attribute "uninitialized" to gcc

2021-08-11 Thread Qing Zhao via Gcc-patches



> On Aug 11, 2021, at 11:55 AM, Richard Biener  wrote:
> 
> On August 11, 2021 6:22:00 PM GMT+02:00, Qing Zhao  
> wrote:
>> 
>> 
>>> On Aug 11, 2021, at 10:53 AM, Richard Biener  wrote:
>>> 
>>> On August 11, 2021 5:30:40 PM GMT+02:00, Qing Zhao  
>>> wrote:
 I modified the routine “gimple_add_init_for_auto_var” as the following:
 
 /* Generate initialization to automatic variable DECL based on INIT_TYPE.
 Build a call to internal const function DEFERRED_INIT:
 1st argument: SIZE of the DECL;
 2nd argument: INIT_TYPE;
 3rd argument: IS_VLA, 0 NO, 1 YES;
 
 as DEFERRED_INIT (SIZE of the DECL, INIT_TYPE, IS_VLA).  */
 static void
 gimple_add_init_for_auto_var (tree decl,
enum auto_init_type init_type,
bool is_vla,
gimple_seq *seq_p)
 {
 gcc_assert (VAR_P (decl) && !DECL_EXTERNAL (decl) && !TREE_STATIC (decl));
 gcc_assert (init_type > AUTO_INIT_UNINITIALIZED);
 tree decl_size = TYPE_SIZE_UNIT (TREE_TYPE (decl));
 
 tree init_type_node
  = build_int_cst (integer_type_node, (int) init_type);
 tree is_vla_node
  = build_int_cst (integer_type_node, (int) is_vla);
 
 tree call = build_call_expr_internal_loc (UNKNOWN_LOCATION, 
 IFN_DEFERRED_INIT,
  TREE_TYPE (decl), 3,
  decl_size, init_type_node,
  is_vla_node);
 
 /* If this DECL is a VLA, a temporary address variable for it has been
   created, the replacement for DECL is recorded in DECL_VALUE_EXPR (decl),
   we should use it as the LHS of the call.  */
 
 tree lhs_call
  = is_vla ? DECL_VALUE_EXPR (decl) : decl;
 gimplify_assign (lhs_call, call, seq_p);
 }
 
 With this change, the current issue is resolved, the gimple dump now is:
 
 (*arr.1) = .DEFERRED_INIT (D.1952, 2, 1);
 
 However, there is another new issue:
 
 For the following testing case:
 
 ==
 [opc@qinzhao-ol8u3-x86 gcc]$ cat t.c
 int bar;
 
 extern void decode_reloc(int *);
 
 void testfunc()
 {
 int alt_reloc;
 
 decode_reloc(_reloc);
 
 if (alt_reloc) /* { dg-warning "may be used uninitialized" } */
  bar = 42; 
 }
 =
 
 In the above, the auto var “alt_reloc” is address taken, then the gimple 
 dump for it when compiled with -ftrivial-auto-var-init=zero is:
 
 void testfunc ()
 {
 int alt_reloc;
 
 try
  {
_1 = .DEFERRED_INIT (4, 2, 0);
alt_reloc = _1;
decode_reloc (_reloc);
alt_reloc.0_2 = alt_reloc;
if (alt_reloc.0_2 != 0) goto ; else goto ;
:
bar = 42;
:
  }
 finally
  {
alt_reloc = {CLOBBER};
  }
 }
 
 I.e, instead of the expected IR:
 
 alt_reloc = .DEFERRED_INIT (4, 2, 0);
 
 We got the following:
 
 _1 = .DEFERRED_INIT (4, 2, 0);
alt_reloc = _1;
 
 I guess the temp “_1” is created because “alt_reloc” is address taken. 
>>> 
>>> Yes and no. The reason is that alt_reloc is memory (because it is address 
>>> taken) and that GIMPLE says that register typed stores need to use a 
>>> is_gimple_val RHS which the call is not.
>> 
>> Okay.
>>> 
 My questions:
 
 Shall we accept such IR for .DEFERRED_INIT purpose when the auto var is 
 address taken? 
>>> 
>>> I think so. Note it doesn't necessarily need address taking but any other 
>>> reason that prevents SSA rewriting the variable suffices. 
>> 
>> You mean, in addition to “address taken”, there are other situations that 
>> will introduce such IR:
>> 
>> temp = .DEFERRED_INIT();
>> auto_var = temp;
>> 
>> So, such IR is unavoidable and we have to handle it?
> 
> Yes. 
> 
>> If we have to handle it,  what’ the best way to do it?
>> 
>> The solution in my mind is:
>> 1. During uninitialized analysis phase, following the data flow to connect 
>> .DEFERRED_INIT to “auto_var”, and then decide that “auto_var” is 
>> uninitialized.
> 
> Yes. Basically if there's an artificial variable auto initialized you have to 
> look at its uses. 
Okay. 

> 
>> 2. During RTL expansion, following the data flow to connect .DEFERRED_INIT 
>> to “auto_var”, and then delete “temp”, and then expand .DEFERRED_INIT to 
>> auto_var.
> 
> That shouldn't be necessary. You'd initialize a temporary register which is 
> then copied to the real variable. That's good enough and should be optimized 
> by the RTL pipeline. 

Okay, I see. 

I will try to update the code to see whether all the issues can be resolved.

Thanks a lot for your help.

Qing
> 
>> Let me know your comments and suggestions on this.
>> 
>> 
>>> 
>>> The only other option is to force. DEFERED_INIT making the LHS address 
>>> taken which I think could

Re: [patch][version 6] add -ftrivial-auto-var-init and variable attribute "uninitialized" to gcc

2021-08-11 Thread Richard Biener via Gcc-patches

On August 11, 2021 6:22:00 PM GMT+02:00, Qing Zhao  wrote:
>
>
>> On Aug 11, 2021, at 10:53 AM, Richard Biener  wrote:
>> 
>> On August 11, 2021 5:30:40 PM GMT+02:00, Qing Zhao  
>> wrote:
>>> I modified the routine “gimple_add_init_for_auto_var” as the following:
>>> 
>>> /* Generate initialization to automatic variable DECL based on INIT_TYPE.
>>>  Build a call to internal const function DEFERRED_INIT:
>>>  1st argument: SIZE of the DECL;
>>>  2nd argument: INIT_TYPE;
>>>  3rd argument: IS_VLA, 0 NO, 1 YES;
>>> 
>>>  as DEFERRED_INIT (SIZE of the DECL, INIT_TYPE, IS_VLA).  */
>>> static void
>>> gimple_add_init_for_auto_var (tree decl,
>>> enum auto_init_type init_type,
>>> bool is_vla,
>>> gimple_seq *seq_p)
>>> {
>>> gcc_assert (VAR_P (decl) && !DECL_EXTERNAL (decl) && !TREE_STATIC (decl));
>>> gcc_assert (init_type > AUTO_INIT_UNINITIALIZED);
>>> tree decl_size = TYPE_SIZE_UNIT (TREE_TYPE (decl));
>>> 
>>> tree init_type_node
>>>   = build_int_cst (integer_type_node, (int) init_type);
>>> tree is_vla_node
>>>   = build_int_cst (integer_type_node, (int) is_vla);
>>> 
>>> tree call = build_call_expr_internal_loc (UNKNOWN_LOCATION, 
>>> IFN_DEFERRED_INIT,
>>>   TREE_TYPE (decl), 3,
>>>   decl_size, init_type_node,
>>>   is_vla_node);
>>> 
>>> /* If this DECL is a VLA, a temporary address variable for it has been
>>>created, the replacement for DECL is recorded in DECL_VALUE_EXPR (decl),
>>>we should use it as the LHS of the call.  */
>>> 
>>> tree lhs_call
>>>   = is_vla ? DECL_VALUE_EXPR (decl) : decl;
>>> gimplify_assign (lhs_call, call, seq_p);
>>> }
>>> 
>>> With this change, the current issue is resolved, the gimple dump now is:
>>> 
>>> (*arr.1) = .DEFERRED_INIT (D.1952, 2, 1);
>>> 
>>> However, there is another new issue:
>>> 
>>> For the following testing case:
>>> 
>>> ==
>>> [opc@qinzhao-ol8u3-x86 gcc]$ cat t.c
>>> int bar;
>>> 
>>> extern void decode_reloc(int *);
>>> 
>>> void testfunc()
>>> {
>>> int alt_reloc;
>>> 
>>> decode_reloc(_reloc);
>>> 
>>> if (alt_reloc) /* { dg-warning "may be used uninitialized" } */
>>>   bar = 42; 
>>> }
>>> =
>>> 
>>> In the above, the auto var “alt_reloc” is address taken, then the gimple 
>>> dump for it when compiled with -ftrivial-auto-var-init=zero is:
>>> 
>>> void testfunc ()
>>> {
>>> int alt_reloc;
>>> 
>>> try
>>>   {
>>> _1 = .DEFERRED_INIT (4, 2, 0);
>>> alt_reloc = _1;
>>> decode_reloc (_reloc);
>>> alt_reloc.0_2 = alt_reloc;
>>> if (alt_reloc.0_2 != 0) goto ; else goto ;
>>> :
>>> bar = 42;
>>> :
>>>   }
>>> finally
>>>   {
>>> alt_reloc = {CLOBBER};
>>>   }
>>> }
>>> 
>>> I.e, instead of the expected IR:
>>> 
>>> alt_reloc = .DEFERRED_INIT (4, 2, 0);
>>> 
>>> We got the following:
>>> 
>>> _1 = .DEFERRED_INIT (4, 2, 0);
>>> alt_reloc = _1;
>>> 
>>> I guess the temp “_1” is created because “alt_reloc” is address taken. 
>> 
>> Yes and no. The reason is that alt_reloc is memory (because it is address 
>> taken) and that GIMPLE says that register typed stores need to use a 
>> is_gimple_val RHS which the call is not.
>
>Okay.
>> 
>>> My questions:
>>> 
>>> Shall we accept such IR for .DEFERRED_INIT purpose when the auto var is 
>>> address taken? 
>> 
>> I think so. Note it doesn't necessarily need address taking but any other 
>> reason that prevents SSA rewriting the variable suffices. 
>
>You mean, in addition to “address taken”, there are other situations that will 
>introduce such IR:
>
>temp = .DEFERRED_INIT();
>auto_var = temp;
>
>So, such IR is unavoidable and we have to handle it?

Yes. 

>If we have to handle it,  what’ the best way to do it?
>
>The solution in my mind is:
>1. During uninitialized analysis phase, following the data flow to connect 
>.DEFERRED_INIT to “auto_var”, and then decide that “auto_var” is uninitialized.

Yes. Basically if there's an artificial variable auto initialized you have to 
look at its uses. 

>2. During RTL expansion, following the data flow to connect .DEFERRED_INIT to 
>“auto_var”, and then delete “temp”, and then expand .DEFERRED_INIT to auto_var.

That shouldn't be necessary. You'd initialize a temporary register which is 
then copied to the real variable. That's good enough and should be optimized by 
the RTL pipeline. 

>Let me know your comments and suggestions on this.
>
>
>> 
>> The only other option is to force. DEFERED_INIT making the LHS address taken 
>> which I think could be achieved by passing it the address as argument 
>> instead of having a LHS. But let's not go down this route - it will have 
>> quite bad behavior on alias analysis and optimization. 
>
>Okay.
>
>Qing
>> 
>>> If so, “uninitialized analysis” phase need to be further adjusted to 
>>> specially handle such IR. 
>>> 
>>> If not, what should we do when the auto

Re: [ARM] PR66791: Replace builtins for vdup_n and vmov_n intrinsics

2021-08-11 Thread Christophe Lyon via Gcc-patches

On Thu, Jun 24, 2021 at 6:29 PM Kyrylo Tkachov via Gcc-patches <
gcc-patches@gcc.gnu.org> wrote:

>
>
> > -Original Message-
> > From: Prathamesh Kulkarni 
> > Sent: 24 June 2021 12:11
> > To: gcc Patches ; Kyrylo Tkachov
> > 
> > Subject: [ARM] PR66791: Replace builtins for vdup_n and vmov_n intrinsics
> >
> > Hi,
> > This patch replaces builtins for vdup_n and vmov_n.
> > The patch results in regression for pr51534.c.
> > Consider following function:
> >
> > uint8x8_t f1 (uint8x8_t a) {
> >   return vcgt_u8(a, vdup_n_u8(0));
> > }
> >
> > code-gen before patch:
> > f1:
> > vmov.i32  d16, #0  @ v8qi
> > vcgt.u8 d0, d0, d16
> > bx lr
> >
> > code-gen after patch:
> > f1:
> > vceq.i8 d0, d0, #0
> > vmvnd0, d0
> > bx lr
> >
> > I am not sure which one is better tho ?
>
>
Hi Prathamesh,

This patch introduces a regression on non-hardfp configs (eg
arm-linux-gnueabi or arm-eabi):
FAIL:  gcc:gcc.target/arm/arm.exp=gcc.target/arm/pr51534.c
scan-assembler-times vmov.i32[ \t]+[dD][0-9]+, #0x 3
FAIL:  gcc:gcc.target/arm/arm.exp=gcc.target/arm/pr51534.c
scan-assembler-times vmov.i32[ \t]+[qQ][0-9]+, #4294967295 3

Can you fix this?

Thanks

Christophe



> I think they're equivalent in practice, in any case the patch itself is
> good (move away from RTL builtins).
> Ok.
> Thanks,
> Kyrill
>
> >
> > Also, this patch regressed bf16_dup.c on arm-linux-gnueabi,
> > which is due to a missed opt in lowering. I had filed it as
> > PR98435, and posted a fix for it here:
> > https://gcc.gnu.org/pipermail/gcc-patches/2021-June/572648.html
> >
> > Thanks,
> > Prathamesh
>

Re: [PATCH] rs6000: Fix ICE expanding lxvp and stxvp gimple built-ins [PR101849]

2021-08-11 Thread Bill Schmidt via Gcc-patches


Hi Peter,

LGTM.  Still needs maintainer review, of course. :)

Bill

On 8/10/21 6:37 PM, Peter Bergner wrote:

PR101849 shows we ICE on a test case when we pass a non __vector_pair *
pointer to the __builtin_vsx_lxvp and __builtin_vsx_stxvp built-ins
that is cast to __vector_pair *.  The problem is that when we expand
the built-in, the cast has already been removed from gimple and we are
only given the base pointer.  The solution used here (which fixes the ICE)
is to catch this case and convert the pointer to a __vector_pair * pointer
when expanding the built-in.

This passed bootstrap and regression testing on powerpc64le-linux with
no regressions.  Ok for mainline?  This also affects GCC 11 and 10, so
ok there too after it has baked on trunk for a few days?

Peter


gcc/
PR target/101849
* config/rs6000/rs6000-call.c (rs6000_gimple_fold_mma_builtin): Cast
pointer to __vector_pair *.

gcc/testsuite/
PR target/101849
* gcc.target/powerpc/pr101849.c: New test.


diff --git a/gcc/config/rs6000/rs6000-call.c b/gcc/config/rs6000/rs6000-call.c
index 904e104c058..d04011c0489 100644
--- a/gcc/config/rs6000/rs6000-call.c
+++ b/gcc/config/rs6000/rs6000-call.c
@@ -11919,6 +11919,9 @@ rs6000_gimple_fold_mma_builtin (gimple_stmt_iterator 
*gsi)
tree offset = gimple_call_arg (stmt, 0);
tree ptr = gimple_call_arg (stmt, 1);
tree lhs = gimple_call_lhs (stmt);
+  if (TREE_TYPE (TREE_TYPE (ptr)) != vector_pair_type_node)
+   ptr = build1 (VIEW_CONVERT_EXPR,
+ build_pointer_type (vector_pair_type_node), ptr);
tree mem = build_simple_mem_ref (build2 (POINTER_PLUS_EXPR,
   TREE_TYPE (ptr), ptr, offset));
gimplify_assign (lhs, mem, _seq);
@@ -11932,6 +11935,9 @@ rs6000_gimple_fold_mma_builtin (gimple_stmt_iterator 
*gsi)
tree src = gimple_call_arg (stmt, 0);
tree offset = gimple_call_arg (stmt, 1);
tree ptr = gimple_call_arg (stmt, 2);
+  if (TREE_TYPE (TREE_TYPE (ptr)) != vector_pair_type_node)
+   ptr = build1 (VIEW_CONVERT_EXPR,
+ build_pointer_type (vector_pair_type_node), ptr);
tree mem = build_simple_mem_ref (build2 (POINTER_PLUS_EXPR,
   TREE_TYPE (ptr), ptr, offset));
gimplify_assign (mem, src, _seq);
diff --git a/gcc/testsuite/gcc.target/powerpc/pr101849.c 
b/gcc/testsuite/gcc.target/powerpc/pr101849.c
new file mode 100644
index 000..6d2e3b79282
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr101849.c
@@ -0,0 +1,19 @@
+/* PR target/101849 */
+/* { dg-do compile } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-O2 -mdejagnu-cpu=power10" } */
+
+/* Verify we do not ICE on the tests below.  */
+
+__vector_pair vp;
+void
+foo (double *x)
+{
+   vp = __builtin_vsx_lxvp (0, (__vector_pair *)(void *)x);
+}
+
+void
+bar (__vector_pair *src, double *x)
+{
+  __builtin_vsx_stxvp (*src, 0, (__vector_pair *)(void *)x);
+}

Re: [PATCH] rs6000: Make some BIFs vectorized on P10

2021-08-11 Thread Bill Schmidt via Gcc-patches


Hi Kewen,

FWIW, it's easier on reviewers if you include the patch inline instead 
of as an attachment.


On 8/11/21 1:56 AM, Kewen.Lin wrote:

Hi,

This patch is to add the support to make vectorizer able to
vectorize scalar version of some built-in functions with its
corresponding vector version with Power10 support.

Bootstrapped & regtested on powerpc64le-linux-gnu {P9,P10}
and powerpc64-linux-gnu P8.

Is it ok for trunk?

BR,
Kewen
-
gcc/ChangeLog:

* config/rs6000/rs6000.c (rs6000_builtin_md_vectorized_function): Add
support for some built-in functions vectorized on Power10.

gcc/testsuite/ChangeLog:

* gcc.target/powerpc/dive-vectorize-1.c: New test.
* gcc.target/powerpc/dive-vectorize-1.h: New test.
* gcc.target/powerpc/dive-vectorize-2.c: New test.
* gcc.target/powerpc/dive-vectorize-2.h: New test.
* gcc.target/powerpc/dive-vectorize-run-1.c: New test.
* gcc.target/powerpc/dive-vectorize-run-2.c: New test.
* gcc.target/powerpc/p10-bifs-vectorize-1.c: New test.
* gcc.target/powerpc/p10-bifs-vectorize-1.h: New test.
* gcc.target/powerpc/p10-bifs-vectorize-run-1.c: New test.


---
 gcc/config/rs6000/rs6000.c| 55 +++
 .../gcc.target/powerpc/dive-vectorize-1.c | 11 
 .../gcc.target/powerpc/dive-vectorize-1.h | 22 
 .../gcc.target/powerpc/dive-vectorize-2.c | 12 
 .../gcc.target/powerpc/dive-vectorize-2.h | 22 
 .../gcc.target/powerpc/dive-vectorize-run-1.c | 52 ++
 .../gcc.target/powerpc/dive-vectorize-run-2.c | 53 ++
 .../gcc.target/powerpc/p10-bifs-vectorize-1.c | 15 +
 .../gcc.target/powerpc/p10-bifs-vectorize-1.h | 40 ++
 .../powerpc/p10-bifs-vectorize-run-1.c| 45 +++
 10 files changed, 327 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/dive-vectorize-1.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/dive-vectorize-1.h
 create mode 100644 gcc/testsuite/gcc.target/powerpc/dive-vectorize-2.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/dive-vectorize-2.h
 create mode 100644 gcc/testsuite/gcc.target/powerpc/dive-vectorize-run-1.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/dive-vectorize-run-2.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/p10-bifs-vectorize-1.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/p10-bifs-vectorize-1.h
 create mode 100644 gcc/testsuite/gcc.target/powerpc/p10-bifs-vectorize-run-1.c

diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 279f00cc648..3eac1d05101 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -5785,6 +5785,61 @@ rs6000_builtin_md_vectorized_function (tree fndecl, tree 
type_out,
 default:
   break;
 }
+
+  machine_mode in_vmode = TYPE_MODE (type_in);
+  machine_mode out_vmode = TYPE_MODE (type_out);
+
+  /* Power10 supported vectorized built-in functions.  */
+  if (TARGET_POWER10
+  && in_vmode == out_vmode
+  && VECTOR_UNIT_ALTIVEC_OR_VSX_P (in_vmode))
+{
+  machine_mode exp_mode = DImode;
+  machine_mode exp_vmode = V2DImode;
+  enum rs6000_builtins vname = RS6000_BUILTIN_COUNT;

Using this as a flag value looks unnecessary.  Is this just being done to 
silence a warning?

+  switch (fn)
+   {
+   case MISC_BUILTIN_DIVWE:
+   case MISC_BUILTIN_DIVWEU:
+ exp_mode = SImode;
+ exp_vmode = V4SImode;
+ if (fn == MISC_BUILTIN_DIVWE)
+   vname = P10V_BUILTIN_DIVES_V4SI;
+ else
+   vname = P10V_BUILTIN_DIVEU_V4SI;
+ break;
+   case MISC_BUILTIN_DIVDE:
+   case MISC_BUILTIN_DIVDEU:
+ if (fn == MISC_BUILTIN_DIVDE)
+   vname = P10V_BUILTIN_DIVES_V2DI;
+ else
+   vname = P10V_BUILTIN_DIVEU_V2DI;
+ break;
+   case P10_BUILTIN_CFUGED:
+ vname = P10V_BUILTIN_VCFUGED;
+ break;
+   case P10_BUILTIN_CNTLZDM:
+ vname = P10V_BUILTIN_VCLZDM;
+ break;
+   case P10_BUILTIN_CNTTZDM:
+ vname = P10V_BUILTIN_VCTZDM;
+ break;
+   case P10_BUILTIN_PDEPD:
+ vname = P10V_BUILTIN_VPDEPD;
+ break;
+   case P10_BUILTIN_PEXTD:
+ vname = P10V_BUILTIN_VPEXTD;
+ break;
+   default:
+ return NULL_TREE;
+   }
+
+  if (vname != RS6000_BUILTIN_COUNT

Check is not necessary, as you will have returned by now in that case.

Otherwise this patch LGTM.  Thanks!  Still needs maintainer approval, of course.

Bill

+ && in_mode == exp_mode
+ && in_vmode == exp_vmode)
+   return rs6000_builtin_decls[vname];
+}
+
   return NULL_TREE;
 }
 
diff --git a/gcc/testsuite/gcc.target/powerpc/dive-vectorize-1.c 
b/gcc/testsuite/gcc.target/powerpc/dive-vectorize-1.c
new file mode 100644
index 000..84f1b0a88f2
--- /dev/null
+++

Re: [patch][version 6] add -ftrivial-auto-var-init and variable attribute "uninitialized" to gcc

2021-08-11 Thread Qing Zhao via Gcc-patches



> On Aug 11, 2021, at 11:15 AM, Richard Sandiford  
> wrote:
> 
> Qing Zhao  writes:
>>> On Aug 11, 2021, at 4:02 AM, Richard Sandiford  
>>> wrote:
 I came up with the following solution:
 
 Define the IFN_DEFERRED_INIT function as:
 
  LHS = DEFERRED_INIT (SIZE of the DECL, INIT_TYPE, IS_VLA);
 
  if IS_VLA is false, the LHS is the DECL itself,
  if IS_VLA is true, the LHS is the pointer to this DECL that created by
  gimplify_vla_decl.
 
 
 The benefit of this solution are:
 
 1. Resolved the invalid IR issue;
 2. The call stmt carries the address of the VLA natually;
 
 The issue with this solution is:
 
 For VLA and non-VLA, the LHS will be different, 
 
 Do you see any other potential issues with this solution?
>>> 
>>> The idea behind the DECL version of the .DEFERRED_INIT semantics was
>>> that .DEFERRED_INIT just returns a SIZE-byte value that the caller
>>> then assigns to a SIZE-byte lhs (with the caller choosing the lhs).
>>> .DEFEREED_INIT itself doesn't read or write memory and so can be const,
>>> which in turn allows alias analysis to be more precise.
>> Yes. That’s right.
>> 
>>> 
>>> If we want to handle the VLA case using pointers instead then I think
>>> that needs to be a different IFN.
>>> 
>>> If we did handle the VLA case using pointers (not expressing an opinion
>>> on that), then it would be the caller's job to allocate the VLA and work
>>> out the address of the VLA;
>> 
>> the current routine “gimplify_vla_decl” has done this already:
>> 
>> It created a temporary variable for the address of the VLA, and created a 
>> call to “alloca” to allocate the VLA.
> 
> Right, that's what I mean.  It's this alloca that allocates the VLA
> and determines its address.  This address is therefore logically an
> input rather than an output to the following zero/pattern initialisation.
> 
> In C you wouldn't write:
> 
>  addr = alloca(size);
>  addr = initialise(size);
> 
> to allocate and initialise a size-byte buffer, because initialise()
> would need to know the address of the memory it's supposed to initialise.
> The same is true for this gimple code.

This really make good sense to me.  :-)

> 
>> My -ftrivial-auto-var-init work just try to use the “address variable of the 
>> VLA” in the new .DEFERRED_INIT call to carry it to RTL expansion phase.
>> 
>> 
>>> this isn't something that .DEFERRED_INIT
>>> would work out on the caller's behalf.  The address of the VLA should
>>> therefore be an argument to the new IFN, rather than something that
>>> the IFN returns.
>> 
>> Then what’s the LHS of this call? Currently the major issue is the LHS is 
>> invalid gimple.
> 
> For this (different, address-taking, VLA-only) IFN, there would be no lhs.
> The IFN would be similar to a memset.

I see. 
> 
> Like I say, this is all hypothetical, based on “if we did handle the VLA
> case using pointers”.  As discussed, it would make alias analysis less
> precise.  I was just answering the question about whether there were
> potential issues.

Okay,  understood.

I will not handle the VLA case using pointers at this time. 

Per discussion with Richard Biener in the other emails, I might go the other 
route to special handle the 

_1 = .DEFERRED_INIT (4, 2, 0);
  alt_reloc = _1;

To see whether that can resolve the issues.

Let me know your opinion.

Thanks a lot.

Qing

> 
> Thanks,
> Richard

Re: [PATCH] rs6000: Add missing unsigned info for some P10 bifs

2021-08-11 Thread Bill Schmidt via Gcc-patches


Hi Kewen,

On 8/11/21 12:44 AM, Kewen.Lin wrote:

Hi,

This patch is to make prototypes of some Power10 built-in
functions consistent with what's in the documentation, as
well as the vector version.  Otherwise, useless conversions
can be generated in gimple IR, and the vectorized versions
will have inconsistent types.

Bootstrapped & regtested on powerpc64le-linux-gnu P9 and
powerpc64-linux-gnu P8.

Is it ok for trunk?


LGTM.  Maintainers, this is necessary in the short term for the old 
builtins support, but this fragile thing that people always forget will 
go away with the new support.  What Kewen is proposing here is correct 
for now.


Thanks,
Bill



BR,
Kewen
-
gcc/ChangeLog:

* config/rs6000/rs6000-call.c (builtin_function_type): Add unsigned
signedness for some Power10 bifs.

Re: [patch][version 6] add -ftrivial-auto-var-init and variable attribute "uninitialized" to gcc

2021-08-11 Thread Qing Zhao via Gcc-patches



> On Aug 11, 2021, at 10:53 AM, Richard Biener  wrote:
> 
> On August 11, 2021 5:30:40 PM GMT+02:00, Qing Zhao  
> wrote:
>> I modified the routine “gimple_add_init_for_auto_var” as the following:
>> 
>> /* Generate initialization to automatic variable DECL based on INIT_TYPE.
>>  Build a call to internal const function DEFERRED_INIT:
>>  1st argument: SIZE of the DECL;
>>  2nd argument: INIT_TYPE;
>>  3rd argument: IS_VLA, 0 NO, 1 YES;
>> 
>>  as DEFERRED_INIT (SIZE of the DECL, INIT_TYPE, IS_VLA).  */
>> static void
>> gimple_add_init_for_auto_var (tree decl,
>> enum auto_init_type init_type,
>> bool is_vla,
>> gimple_seq *seq_p)
>> {
>> gcc_assert (VAR_P (decl) && !DECL_EXTERNAL (decl) && !TREE_STATIC (decl));
>> gcc_assert (init_type > AUTO_INIT_UNINITIALIZED);
>> tree decl_size = TYPE_SIZE_UNIT (TREE_TYPE (decl));
>> 
>> tree init_type_node
>>   = build_int_cst (integer_type_node, (int) init_type);
>> tree is_vla_node
>>   = build_int_cst (integer_type_node, (int) is_vla);
>> 
>> tree call = build_call_expr_internal_loc (UNKNOWN_LOCATION, 
>> IFN_DEFERRED_INIT,
>>   TREE_TYPE (decl), 3,
>>   decl_size, init_type_node,
>>   is_vla_node);
>> 
>> /* If this DECL is a VLA, a temporary address variable for it has been
>>created, the replacement for DECL is recorded in DECL_VALUE_EXPR (decl),
>>we should use it as the LHS of the call.  */
>> 
>> tree lhs_call
>>   = is_vla ? DECL_VALUE_EXPR (decl) : decl;
>> gimplify_assign (lhs_call, call, seq_p);
>> }
>> 
>> With this change, the current issue is resolved, the gimple dump now is:
>> 
>> (*arr.1) = .DEFERRED_INIT (D.1952, 2, 1);
>> 
>> However, there is another new issue:
>> 
>> For the following testing case:
>> 
>> ==
>> [opc@qinzhao-ol8u3-x86 gcc]$ cat t.c
>> int bar;
>> 
>> extern void decode_reloc(int *);
>> 
>> void testfunc()
>> {
>> int alt_reloc;
>> 
>> decode_reloc(_reloc);
>> 
>> if (alt_reloc) /* { dg-warning "may be used uninitialized" } */
>>   bar = 42; 
>> }
>> =
>> 
>> In the above, the auto var “alt_reloc” is address taken, then the gimple 
>> dump for it when compiled with -ftrivial-auto-var-init=zero is:
>> 
>> void testfunc ()
>> {
>> int alt_reloc;
>> 
>> try
>>   {
>> _1 = .DEFERRED_INIT (4, 2, 0);
>> alt_reloc = _1;
>> decode_reloc (_reloc);
>> alt_reloc.0_2 = alt_reloc;
>> if (alt_reloc.0_2 != 0) goto ; else goto ;
>> :
>> bar = 42;
>> :
>>   }
>> finally
>>   {
>> alt_reloc = {CLOBBER};
>>   }
>> }
>> 
>> I.e, instead of the expected IR:
>> 
>> alt_reloc = .DEFERRED_INIT (4, 2, 0);
>> 
>> We got the following:
>> 
>> _1 = .DEFERRED_INIT (4, 2, 0);
>> alt_reloc = _1;
>> 
>> I guess the temp “_1” is created because “alt_reloc” is address taken. 
> 
> Yes and no. The reason is that alt_reloc is memory (because it is address 
> taken) and that GIMPLE says that register typed stores need to use a 
> is_gimple_val RHS which the call is not.

Okay.
> 
>> My questions:
>> 
>> Shall we accept such IR for .DEFERRED_INIT purpose when the auto var is 
>> address taken? 
> 
> I think so. Note it doesn't necessarily need address taking but any other 
> reason that prevents SSA rewriting the variable suffices. 

You mean, in addition to “address taken”, there are other situations that will 
introduce such IR:

temp = .DEFERRED_INIT();
auto_var = temp;

So, such IR is unavoidable and we have to handle it?

If we have to handle it,  what’ the best way to do it?

The solution in my mind is:
1. During uninitialized analysis phase, following the data flow to connect 
.DEFERRED_INIT to “auto_var”, and then decide that “auto_var” is uninitialized.

2. During RTL expansion, following the data flow to connect .DEFERRED_INIT to 
“auto_var”, and then delete “temp”, and then expand .DEFERRED_INIT to auto_var.

Let me know your comments and suggestions on this.


> 
> The only other option is to force. DEFERED_INIT making the LHS address taken 
> which I think could be achieved by passing it the address as argument instead 
> of having a LHS. But let's not go down this route - it will have quite bad 
> behavior on alias analysis and optimization. 

Okay.

Qing
> 
>> If so, “uninitialized analysis” phase need to be further adjusted to 
>> specially handle such IR. 
>> 
>> If not, what should we do when the auto var is address taken?
>> 
>> Thanks a lot.
>> 
>> Qing
>> 
>> 
>>> On Aug 11, 2021, at 8:58 AM, Richard Biener  wrote:
>>> 
>>> On Wed, 11 Aug 2021, Qing Zhao wrote:
>>> 
 
 
> On Aug 11, 2021, at 8:37 AM, Richard Biener  wrote:
> 
> On Wed, 11 Aug 2021, Qing Zhao wrote:
> 
>> 
>> 
>>> On Aug 11, 2021, at 2:02 AM, Richard Biener  wrote:
>>> 
>>> On Tue, 10 Aug 2021, Qing Zhao wrote:
>>> 
 
 
> On Aug 10, 2021, at 3:16

Re: [patch][version 6] add -ftrivial-auto-var-init and variable attribute "uninitialized" to gcc

2021-08-11 Thread Richard Sandiford via Gcc-patches

Qing Zhao  writes:
>> On Aug 11, 2021, at 4:02 AM, Richard Sandiford  
>> wrote:
>>> I came up with the following solution:
>>> 
>>> Define the IFN_DEFERRED_INIT function as:
>>> 
>>>   LHS = DEFERRED_INIT (SIZE of the DECL, INIT_TYPE, IS_VLA);
>>> 
>>>   if IS_VLA is false, the LHS is the DECL itself,
>>>   if IS_VLA is true, the LHS is the pointer to this DECL that created by
>>>   gimplify_vla_decl.
>>> 
>>> 
>>> The benefit of this solution are:
>>> 
>>> 1. Resolved the invalid IR issue;
>>> 2. The call stmt carries the address of the VLA natually;
>>> 
>>> The issue with this solution is:
>>> 
>>> For VLA and non-VLA, the LHS will be different, 
>>> 
>>> Do you see any other potential issues with this solution?
>> 
>> The idea behind the DECL version of the .DEFERRED_INIT semantics was
>> that .DEFERRED_INIT just returns a SIZE-byte value that the caller
>> then assigns to a SIZE-byte lhs (with the caller choosing the lhs).
>> .DEFEREED_INIT itself doesn't read or write memory and so can be const,
>> which in turn allows alias analysis to be more precise.
> Yes. That’s right.
>
>> 
>> If we want to handle the VLA case using pointers instead then I think
>> that needs to be a different IFN.
>> 
>> If we did handle the VLA case using pointers (not expressing an opinion
>> on that), then it would be the caller's job to allocate the VLA and work
>> out the address of the VLA;
>
> the current routine “gimplify_vla_decl” has done this already:
>
> It created a temporary variable for the address of the VLA, and created a 
> call to “alloca” to allocate the VLA.

Right, that's what I mean.  It's this alloca that allocates the VLA
and determines its address.  This address is therefore logically an
input rather than an output to the following zero/pattern initialisation.

In C you wouldn't write:

  addr = alloca(size);
  addr = initialise(size);

to allocate and initialise a size-byte buffer, because initialise()
would need to know the address of the memory it's supposed to initialise.
The same is true for this gimple code.

> My -ftrivial-auto-var-init work just try to use the “address variable of the 
> VLA” in the new .DEFERRED_INIT call to carry it to RTL expansion phase.
>
>
>> this isn't something that .DEFERRED_INIT
>> would work out on the caller's behalf.  The address of the VLA should
>> therefore be an argument to the new IFN, rather than something that
>> the IFN returns.
>
> Then what’s the LHS of this call? Currently the major issue is the LHS is 
> invalid gimple.

For this (different, address-taking, VLA-only) IFN, there would be no lhs.
The IFN would be similar to a memset.

Like I say, this is all hypothetical, based on “if we did handle the VLA
case using pointers”.  As discussed, it would make alias analysis less
precise.  I was just answering the question about whether there were
potential issues.

Thanks,
Richard

Re: [PATCH v3] gcov: Add TARGET_GCOV_TYPE_SIZE target macro

2021-08-11 Thread Joseph Myers

On Wed, 11 Aug 2021, Sebastian Huber wrote:

> 64-bit atomic operations.  Allow targets to override the default type
> size with the new TARGET_GCOV_TYPE_SIZE target macro.

Hook, not macro.

> diff --git a/gcc/c-family/c-cppbuiltin.c b/gcc/c-family/c-cppbuiltin.c
> index f79f939bd10f..e85b60c79f49 100644
> --- a/gcc/c-family/c-cppbuiltin.c
> +++ b/gcc/c-family/c-cppbuiltin.c
> @@ -1450,6 +1450,8 @@ c_cpp_builtins (cpp_reader *pfile)
>/* For libgcov.  */
>builtin_define_with_int_value ("__LIBGCC_VTABLE_USES_DESCRIPTORS__",
>TARGET_VTABLE_USES_DESCRIPTORS);
> +  builtin_define_with_int_value ("__LIBGCC_GCOV_TYPE_SIZE",
> +  TARGET_GCOV_TYPE_SIZE);

The TARGET_* macros used to initialize targetm may only be defined to 
their final values in the architecture-specific .c file that actually 
defines targetm.  All other files should access the hook via targetm, not 
the TARGET_* macros.  (TARGET_VTABLE_USES_DESCRIPTORS in the diff context 
is a target macro, not a hook at all.)

> +DEFHOOKPOD
> +(gcov_type_size,
> + "The gcov type size in bits.  This type is used for example for counters\n\
> +incremented by profiling and code-coverage events.  The default value is 
> 64,\n\
> +if the type size of long long is greater than 32, otherwise the default\n\
> +value is 32.  A 64-bit type is recommended to avoid overflows of the\n\
> +counters.  If the @option{-fprofile-update=atomic} is used, then the\n\
> +counters are incremented using atomic operations.  Targets not supporting\n\
> +64-bit atomic operations may override the default value and request a 
> 32-bit\n\
> +type.",
> + HOST_WIDE_INT, (LONG_LONG_TYPE_SIZE > 32 ? 64 : 32))

LONG_LONG_TYPE_SIZE may depend on command-line options passed to the 
compiler (it does for AVR).  The hook thus needs to be a function 
returning the desired size.

-- 
Joseph S. Myers
jos...@codesourcery.com

[PATCH, rs6000 V2] Add store fusion support for Power10

2021-08-11 Thread Pat Haugen via Gcc-patches

Enable store fusion on Power10.

Use the SCHED_REORDER hook to implement Power10 specific ready list reordering.
As of now this is just store fusion.

Things changed in this version of the patch
- Separate patch for additional load/store checks
- Move option check from is_fusable_store() to caller
- Misc coding style changes pointed out in review (parens/braces)
- Add testcases

Bootstrap/regtest on powerpc64(32/64) and powerpc64le(Power10) with no new 
regressions.
Ok for master?

-Pat


2021-08-11  Pat Haugen  

gcc/ChangeLog:

* config/rs6000/rs6000-cpus.def (ISA_3_1_MASKS_SERVER): Add new flag.
(POWERPC_MASKS): Likewise.
* config/rs6000/rs6000.c (rs6000_option_override_internal): Enable
store fusion for Power10.
(is_fusable_store): New.
(power10_sched_reorder): Likewise.
(rs6000_sched_reorder): Do Power10 specific reordering.
(rs6000_sched_reorder2): Likewise.
* config/rs6000/rs6000.opt: Add new option.

gcc/testsuite/ChangeLog:

* gcc.target/powerpc/fusion-p10-stst.c: New test.
* gcc.target/powerpc/fusion-p10-stst2.c: New test.



diff --git a/gcc/config/rs6000/rs6000-cpus.def 
b/gcc/config/rs6000/rs6000-cpus.def
index 6758296c0fd..f5812da0184 100644
--- a/gcc/config/rs6000/rs6000-cpus.def
+++ b/gcc/config/rs6000/rs6000-cpus.def
@@ -90,7 +90,8 @@
 | OPTION_MASK_P10_FUSION_2LOGICAL  \
 | OPTION_MASK_P10_FUSION_LOGADD\
 | OPTION_MASK_P10_FUSION_ADDLOG\
-| OPTION_MASK_P10_FUSION_2ADD)
+| OPTION_MASK_P10_FUSION_2ADD  \
+| OPTION_MASK_P10_FUSION_2STORE)
 
 /* Flags that need to be turned off if -mno-power9-vector.  */
 #define OTHER_P9_VECTOR_MASKS  (OPTION_MASK_FLOAT128_HW\
@@ -143,6 +144,7 @@
 | OPTION_MASK_P10_FUSION_LOGADD\
 | OPTION_MASK_P10_FUSION_ADDLOG\
 | OPTION_MASK_P10_FUSION_2ADD  \
+| OPTION_MASK_P10_FUSION_2STORE\
 | OPTION_MASK_HTM  \
 | OPTION_MASK_ISEL \
 | OPTION_MASK_MFCRF\
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 60f406a4ff6..402cc924e3f 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -4495,6 +4495,10 @@ rs6000_option_override_internal (bool global_init_p)
   && (rs6000_isa_flags_explicit & OPTION_MASK_P10_FUSION_2ADD) == 0)
 rs6000_isa_flags |= OPTION_MASK_P10_FUSION_2ADD;
 
+  if (TARGET_POWER10
+  && (rs6000_isa_flags_explicit & OPTION_MASK_P10_FUSION_2STORE) == 0)
+rs6000_isa_flags |= OPTION_MASK_P10_FUSION_2STORE;
+
   /* Turn off vector pair/mma options on non-power10 systems.  */
   else if (!TARGET_POWER10 && TARGET_MMA)
 {
@@ -18874,6 +18878,91 @@ power9_sched_reorder2 (rtx_insn **ready, int lastpos)
   return cached_can_issue_more;
 }
 
+/* Determine if INSN is a store to memory that can be fused with a similar
+   adjacent store.  */
+
+static bool
+is_fusable_store (rtx_insn *insn, rtx *str_mem)
+{
+  /* Insn must be a non-prefixed base+disp form store.  */
+  if (is_store_insn (insn, str_mem)
+  && get_attr_prefixed (insn) == PREFIXED_NO
+  && get_attr_update (insn) == UPDATE_NO
+  && get_attr_indexed (insn) == INDEXED_NO)
+{
+  /* Further restictions by mode and size.  */
+  machine_mode mode = GET_MODE (*str_mem);
+  HOST_WIDE_INT size;
+  if (MEM_SIZE_KNOWN_P (*str_mem))
+   size = MEM_SIZE (*str_mem);
+  else
+   return false;
+
+  if (INTEGRAL_MODE_P (mode))
+   /* Must be word or dword size.  */
+   return (size == 4 || size == 8);
+  else if (FLOAT_MODE_P (mode))
+   /* Must be dword size.  */
+   return (size == 8);
+}
+
+  return false;
+}
+
+/* Do Power10 specific reordering of the ready list.  */
+
+static int
+power10_sched_reorder (rtx_insn **ready, int lastpos)
+{
+  rtx mem1;
+
+  /* Do store fusion during sched2 only.  */
+  if (!reload_completed)
+return cached_can_issue_more;
+
+  /* If the prior insn finished off a store fusion pair then simply
+ reset the counter and return, nothing more to do.  */
+  if (load_store_pendulum != 0)
+{
+  load_store_pendulum = 0;
+  return cached_can_issue_more;
+}
+
+  /* Try to pair certain store insns to adjacent memory locations
+ so that the hardware will fuse them to a single operation.  */
+  if (TARGET_P10_FUSION && TARGET_P10_FUSION_2STORE
+  && is_fusable_store (last_scheduled_insn, ))
+{
+  int pos;
+  rtx mem2;
+
+  /* A fusable store was just scheduled.  Scan the ready list for

[committed] libstdc++: Define std::is_pointer_interconvertible_base_of for C++20

2021-08-11 Thread Jonathan Wakely via Gcc-patches

Implement these traits using the new built-ins that Jakub added
recently.

Signed-off-by: Jonathan Wakely 

libstdc++-v3/ChangeLog:

* include/std/type_traits (__cpp_lib_is_pointer_interconvertible)
(is_pointer_interconvertible_base_of_v)
(is_pointer_interconvertible_base_of): Define for C++20.
* include/std/version (__cpp_lib_is_pointer_interconvertible):
Define.
* testsuite/23_containers/span/layout_compat.cc: Use correct
feature test macro for std::is_layout_compatible_v.
* testsuite/20_util/is_pointer_interconvertible/value.cc: New test.
* testsuite/20_util/is_pointer_interconvertible/version.cc: New test.

Tested powerpc64le-linux. Committed to trunk.

commit 4fa6c0ec350bb4a8dd52a7c6f9881ab427151588
Author: Jonathan Wakely 
Date:   Tue Aug 10 15:37:23 2021

libstdc++: Define std::is_pointer_interconvertible_base_of for C++20

Implement these traits using the new built-ins that Jakub added
recently.

Signed-off-by: Jonathan Wakely 

libstdc++-v3/ChangeLog:

* include/std/type_traits (__cpp_lib_is_pointer_interconvertible)
(is_pointer_interconvertible_base_of_v)
(is_pointer_interconvertible_base_of): Define for C++20.
* include/std/version (__cpp_lib_is_pointer_interconvertible):
Define.
* testsuite/23_containers/span/layout_compat.cc: Use correct
feature test macro for std::is_layout_compatible_v.
* testsuite/20_util/is_pointer_interconvertible/value.cc: New test.
* testsuite/20_util/is_pointer_interconvertible/version.cc: New 
test.

diff --git a/libstdc++-v3/include/std/type_traits 
b/libstdc++-v3/include/std/type_traits
index 46edde905f8..2be4944e2a6 100644
--- a/libstdc++-v3/include/std/type_traits
+++ b/libstdc++-v3/include/std/type_traits
@@ -3389,6 +3389,33 @@ template
 inline constexpr bool is_unbounded_array_v
   = is_unbounded_array<_Tp>::value;
 
+#if __has_builtin(__is_pointer_interconvertible_base_of)
+  /// True if `_Derived` is standard-layout and has a base class of type 
`_Base`
+  /// @since C++20
+  template
+struct is_pointer_interconvertible_base_of
+: bool_constant<__is_pointer_interconvertible_base_of(_Base, _Derived)>
+{ };
+
+  /// @ingroup variable_templates
+  /// @since C++20
+  template
+constexpr bool is_pointer_interconvertible_base_of_v
+  = __is_pointer_interconvertible_base_of(_Base, _Derived);
+
+#if __has_builtin(__builtin_is_pointer_interconvertible_with_class)
+#define __cpp_lib_is_pointer_interconvertible 201907L
+
+  /// True if `__mp` points to the first member of a standard-layout type
+  /// @returns true if `s.*__mp` is pointer-interconvertible with `s`
+  /// @since C++20
+  template
+constexpr bool
+is_pointer_interconvertible_with_class(_Mem _Tp::*__mp) noexcept
+{ return __builtin_is_pointer_interconvertible_with_class(__mp); }
+#endif
+#endif
+
 #if __cplusplus > 202002L
 #define __cpp_lib_is_scoped_enum 202011L
 
diff --git a/libstdc++-v3/include/std/version b/libstdc++-v3/include/std/version
index d5fa38d7786..925f27704c4 100644
--- a/libstdc++-v3/include/std/version
+++ b/libstdc++-v3/include/std/version
@@ -236,6 +236,10 @@
 #ifdef _GLIBCXX_HAS_GTHREADS
 # define __cpp_lib_jthread 201911L
 #endif
+#if __has_builtin(__is_pointer_interconvertible_base_of) \
+ && __has_builtin(__builtin_is_pointer_interconvertible_with_class)
+# define __cpp_lib_is_pointer_interconvertible 201907L
+#endif
 #if __cpp_lib_atomic_wait
 # define __cpp_lib_latch 201907L
 #endif
diff --git 
a/libstdc++-v3/testsuite/20_util/is_pointer_interconvertible/value.cc 
b/libstdc++-v3/testsuite/20_util/is_pointer_interconvertible/value.cc
new file mode 100644
index 000..471571cac58
--- /dev/null
+++ b/libstdc++-v3/testsuite/20_util/is_pointer_interconvertible/value.cc
@@ -0,0 +1,51 @@
+// { dg-options "-std=gnu++20" }
+// { dg-do compile { target c++20 } }
+#include 
+
+#ifndef __cpp_lib_is_pointer_interconvertible
+# error "Feature test macro for is_pointer_interconvertible is missing in 
"
+#elif __cpp_lib_is_pointer_interconvertible < 201907L
+# error "Feature test macro for is_pointer_interconvertible has wrong value in 
"
+#endif
+
+static_assert( std::is_pointer_interconvertible_base_of::value
+   == std::is_pointer_interconvertible_base_of_v );
+
+struct B { };
+
+static_assert( std::is_pointer_interconvertible_base_of::value
+   == std::is_pointer_interconvertible_base_of_v );
+
+static_assert( std::is_pointer_interconvertible_base_of_v );
+static_assert( std::is_pointer_interconvertible_base_of_v );
+static_assert( std::is_pointer_interconvertible_base_of_v );
+static_assert( std::is_pointer_interconvertible_base_of_v );
+
+struct D : B { int i; };
+
+static_assert( std::is_pointer_interconvertible_base_of_v );
+
+static_assert( std::is_pointer_interconvertible_base_of_v );
+static_assert(

Re: [PATCH] Adding target hook allows to reject initialization of register

2021-08-11 Thread Richard Sandiford via Gcc-patches

Richard Biener  writes:
> On Wed, Aug 11, 2021 at 11:28 AM Richard Sandiford
>  wrote:
>>
>> Richard Biener  writes:
>> > On Tue, Aug 10, 2021 at 10:33 AM Jojo R via Gcc-patches
>> >  wrote:
>> >>
>> >> Some target like RISC-V allow to group vector register as a whole,
>> >> and only operate part of it in fact, but the 'init-regs' pass will add 
>> >> initialization
>> >> for uninitialized registers. Add this hook to reject this action for 
>> >> reducing instruction.
>> >
>> > Are these groups "visible"?  That is, are the pseudos multi-reg
>> > pseudos?  I wonder
>> > if there's a more generic way to tame down initregs w/o introducing a new 
>> > target
>> > hook.
>> >
>> > Btw, initregs is a red herring - it ideally should go away.  See PR61810.
>> >
>> > So instead of adding to it can you see whether disabling the pass for 
>> > RISC-V
>> > works w/o fallout (and add a comment to the PR)?  Maybe some more RTL
>> > literate (in particular DF literate) can look at the remaining issue.
>> > Richard, did you
>> > ever have a look into the "issue" that initregs covers up (whatever
>> > that exactly is)?
>>
>> No, sorry.  I don't really understand what it would be from the comment
>> in the code:
>>
>>[...] papers over some problems on the arm and other
>>processors where certain isa constraints cannot be handled by gcc.
>>These are of the form where two operands to an insn my not be the
>>same.  The ra will only make them the same if they do not
>>interfere, and this can only happen if one is not initialized.
>>
>> That would definitely be an RA bug if true, since the constraints need
>> to be applied independently of dataflow information.  But the comment
>> and code predate LRA and maybe no-one fancied poking around in reload
>> (hard to believe).
>>
>> I'd be very surprised if LRA gets this wrong.
>
> OK, we're wondering since quite some time - how about changing the
> gate of initregs to optimize > 0 && !targetm.lra_p ()?  We'll hopefully
> figure out the "real" issue the pass is papering over.  At the same time
> we're leaving old reload (and likely unmaintianed) targets unaffected.

Sounds good to me.

Thanks,
Richard

Re: [patch][version 6] add -ftrivial-auto-var-init and variable attribute "uninitialized" to gcc

2021-08-11 Thread Richard Biener via Gcc-patches

On August 11, 2021 5:30:40 PM GMT+02:00, Qing Zhao  wrote:
>I modified the routine “gimple_add_init_for_auto_var” as the following:
>
>/* Generate initialization to automatic variable DECL based on INIT_TYPE.
>   Build a call to internal const function DEFERRED_INIT:
>   1st argument: SIZE of the DECL;
>   2nd argument: INIT_TYPE;
>   3rd argument: IS_VLA, 0 NO, 1 YES;
>
>   as DEFERRED_INIT (SIZE of the DECL, INIT_TYPE, IS_VLA).  */
>static void
>gimple_add_init_for_auto_var (tree decl,
>  enum auto_init_type init_type,
>  bool is_vla,
>  gimple_seq *seq_p)
>{
>  gcc_assert (VAR_P (decl) && !DECL_EXTERNAL (decl) && !TREE_STATIC (decl));
>  gcc_assert (init_type > AUTO_INIT_UNINITIALIZED);
>  tree decl_size = TYPE_SIZE_UNIT (TREE_TYPE (decl));
>
>  tree init_type_node
>= build_int_cst (integer_type_node, (int) init_type);
>  tree is_vla_node
>= build_int_cst (integer_type_node, (int) is_vla);
>
>  tree call = build_call_expr_internal_loc (UNKNOWN_LOCATION, 
> IFN_DEFERRED_INIT,
>TREE_TYPE (decl), 3,
>decl_size, init_type_node,
>is_vla_node);
>
>  /* If this DECL is a VLA, a temporary address variable for it has been
> created, the replacement for DECL is recorded in DECL_VALUE_EXPR (decl),
> we should use it as the LHS of the call.  */
>
>  tree lhs_call
>= is_vla ? DECL_VALUE_EXPR (decl) : decl;
>  gimplify_assign (lhs_call, call, seq_p);
>}
>
>With this change, the current issue is resolved, the gimple dump now is:
>
> (*arr.1) = .DEFERRED_INIT (D.1952, 2, 1);
>
>However, there is another new issue:
>
>For the following testing case:
>
>==
>[opc@qinzhao-ol8u3-x86 gcc]$ cat t.c
>int bar;
>
>extern void decode_reloc(int *);
>
>void testfunc()
>{
>  int alt_reloc;
>
>  decode_reloc(_reloc);
>
>  if (alt_reloc) /* { dg-warning "may be used uninitialized" } */
>bar = 42; 
>}
>=
>
>In the above, the auto var “alt_reloc” is address taken, then the gimple dump 
>for it when compiled with -ftrivial-auto-var-init=zero is:
>
>void testfunc ()
>{
>  int alt_reloc;
>
>  try
>{
>  _1 = .DEFERRED_INIT (4, 2, 0);
>  alt_reloc = _1;
>  decode_reloc (_reloc);
>  alt_reloc.0_2 = alt_reloc;
>  if (alt_reloc.0_2 != 0) goto ; else goto ;
>  :
>  bar = 42;
>  :
>}
>  finally
>{
>  alt_reloc = {CLOBBER};
>}
>}
>
>I.e, instead of the expected IR:
>
>alt_reloc = .DEFERRED_INIT (4, 2, 0);
>
>We got the following:
>
> _1 = .DEFERRED_INIT (4, 2, 0);
>  alt_reloc = _1;
>
>I guess the temp “_1” is created because “alt_reloc” is address taken. 

Yes and no. The reason is that alt_reloc is memory (because it is address 
taken) and that GIMPLE says that register typed stores need to use a 
is_gimple_val RHS which the call is not.

>My questions:
>
>Shall we accept such IR for .DEFERRED_INIT purpose when the auto var is 
>address taken? 

I think so. Note it doesn't necessarily need address taking but any other 
reason that prevents SSA rewriting the variable suffices. 

The only other option is to force. DEFERED_INIT making the LHS address taken 
which I think could be achieved by passing it the address as argument instead 
of having a LHS. But let's not go down this route - it will have quite bad 
behavior on alias analysis and optimization. 

>If so, “uninitialized analysis” phase need to be further adjusted to specially 
>handle such IR. 
>
>If not, what should we do when the auto var is address taken?
>
>Thanks a lot.
>
>Qing
>
>
>> On Aug 11, 2021, at 8:58 AM, Richard Biener  wrote:
>> 
>> On Wed, 11 Aug 2021, Qing Zhao wrote:
>> 
>>> 
>>> 
 On Aug 11, 2021, at 8:37 AM, Richard Biener  wrote:
 
 On Wed, 11 Aug 2021, Qing Zhao wrote:
 
> 
> 
>> On Aug 11, 2021, at 2:02 AM, Richard Biener  wrote:
>> 
>> On Tue, 10 Aug 2021, Qing Zhao wrote:
>> 
>>> 
>>> 
 On Aug 10, 2021, at 3:16 PM, Qing Zhao via Gcc-patches 
  wrote:
 
 Hi, Richard,
 
> On Aug 10, 2021, at 10:22 AM, Richard Biener  
> wrote:
>>> 
>>> Especially in the VLA case but likely also in general (though 
>>> unlikely
>>> since usually the receiver of initializations are simple enough).  
>>> I'd
>>> expect the VLA case end up as
>>> 
>>> *ptr_to_decl = .DEFERRED_INIT (...);
>>> 
>>> where *ptr_to_decl is the DECL_VALUE_EXPR of the decl.
>> 
>> So, for the following small testing case:
>> 
>> 
>> extern void bar (int);
>> 
>> void foo(int n)
>> {
>> int arr[n];
>> bar (arr[2]);
>> return;
>> }
>> =
>> 
>> If I compile it with -ftrivial-auto-var-init=zero

Re: [patch][version 6] add -ftrivial-auto-var-init and variable attribute "uninitialized" to gcc

2021-08-11 Thread Qing Zhao via Gcc-patches

I modified the routine “gimple_add_init_for_auto_var” as the following:

/* Generate initialization to automatic variable DECL based on INIT_TYPE.
   Build a call to internal const function DEFERRED_INIT:
   1st argument: SIZE of the DECL;
   2nd argument: INIT_TYPE;
   3rd argument: IS_VLA, 0 NO, 1 YES;

   as DEFERRED_INIT (SIZE of the DECL, INIT_TYPE, IS_VLA).  */
static void
gimple_add_init_for_auto_var (tree decl,
  enum auto_init_type init_type,
  bool is_vla,
  gimple_seq *seq_p)
{
  gcc_assert (VAR_P (decl) && !DECL_EXTERNAL (decl) && !TREE_STATIC (decl));
  gcc_assert (init_type > AUTO_INIT_UNINITIALIZED);
  tree decl_size = TYPE_SIZE_UNIT (TREE_TYPE (decl));

  tree init_type_node
= build_int_cst (integer_type_node, (int) init_type);
  tree is_vla_node
= build_int_cst (integer_type_node, (int) is_vla);

  tree call = build_call_expr_internal_loc (UNKNOWN_LOCATION, IFN_DEFERRED_INIT,
TREE_TYPE (decl), 3,
decl_size, init_type_node,
is_vla_node);

  /* If this DECL is a VLA, a temporary address variable for it has been
 created, the replacement for DECL is recorded in DECL_VALUE_EXPR (decl),
 we should use it as the LHS of the call.  */

  tree lhs_call
= is_vla ? DECL_VALUE_EXPR (decl) : decl;
  gimplify_assign (lhs_call, call, seq_p);
}

With this change, the current issue is resolved, the gimple dump now is:

 (*arr.1) = .DEFERRED_INIT (D.1952, 2, 1);

However, there is another new issue:

For the following testing case:

==
[opc@qinzhao-ol8u3-x86 gcc]$ cat t.c
int bar;

extern void decode_reloc(int *);

void testfunc()
{
  int alt_reloc;

  decode_reloc(_reloc);

  if (alt_reloc) /* { dg-warning "may be used uninitialized" } */
bar = 42; 
}
=

In the above, the auto var “alt_reloc” is address taken, then the gimple dump 
for it when compiled with -ftrivial-auto-var-init=zero is:

void testfunc ()
{
  int alt_reloc;

  try
{
  _1 = .DEFERRED_INIT (4, 2, 0);
  alt_reloc = _1;
  decode_reloc (_reloc);
  alt_reloc.0_2 = alt_reloc;
  if (alt_reloc.0_2 != 0) goto ; else goto ;
  :
  bar = 42;
  :
}
  finally
{
  alt_reloc = {CLOBBER};
}
}

I.e, instead of the expected IR:

alt_reloc = .DEFERRED_INIT (4, 2, 0);

We got the following:

 _1 = .DEFERRED_INIT (4, 2, 0);
  alt_reloc = _1;

I guess the temp “_1” is created because “alt_reloc” is address taken. 

My questions:

Shall we accept such IR for .DEFERRED_INIT purpose when the auto var is address 
taken? 
If so, “uninitialized analysis” phase need to be further adjusted to specially 
handle such IR. 

If not, what should we do when the auto var is address taken?

Thanks a lot.

Qing

> On Aug 11, 2021, at 8:58 AM, Richard Biener  wrote:
> 
> On Wed, 11 Aug 2021, Qing Zhao wrote:
> 
>> 
>> 
>>> On Aug 11, 2021, at 8:37 AM, Richard Biener  wrote:
>>> 
>>> On Wed, 11 Aug 2021, Qing Zhao wrote:
>>> 

> On Aug 11, 2021, at 2:02 AM, Richard Biener  wrote:
> 
> On Tue, 10 Aug 2021, Qing Zhao wrote:
> 
>> 
>> 
>>> On Aug 10, 2021, at 3:16 PM, Qing Zhao via Gcc-patches 
>>>  wrote:
>>> 
>>> Hi, Richard,
>>> 
 On Aug 10, 2021, at 10:22 AM, Richard Biener  wrote:
>> 
>> Especially in the VLA case but likely also in general (though 
>> unlikely
>> since usually the receiver of initializations are simple enough).  
>> I'd
>> expect the VLA case end up as
>> 
>> *ptr_to_decl = .DEFERRED_INIT (...);
>> 
>> where *ptr_to_decl is the DECL_VALUE_EXPR of the decl.
> 
> So, for the following small testing case:
> 
> 
> extern void bar (int);
> 
> void foo(int n)
> {
> int arr[n];
> bar (arr[2]);
> return;
> }
> =
> 
> If I compile it with -ftrivial-auto-var-init=zero -fdump-tree-gimple 
> -S -o auto-init-11.s -fdump-rtl-expand, the *.gimple dump is:
> 
> =
> void foo (int n)
> {
> int n.0;
> sizetype D.1950;
> bitsizetype D.1951;
> sizetype D.1952;
> bitsizetype D.1953;
> sizetype D.1954;
> int[0:D.1950] * arr.1;
> void * saved_stack.2;
> int arr[0:D.1950] [value-expr: *arr.1];
> 
> saved_stack.2 = __builtin_stack_save ();
> try
> {
>  n.0 = n;
>  _1 = (long int) n.0;
>  _2 = _1 + -1;
>  _3 = (sizetype) _2;
>  D.1950 = _3;
>  _4 = (sizetype) n.0;
>  _5 = (bitsizetype) _4;
>  _6 = _5 * 32;
>  D.1951 = _6;
>  _7 = (sizetype) n.0;
>  _8 = _7 * 4;
>  D.1952 = _8;

[PATCH] aarch64: Replace some uses of GET_CODE with RTL predicate macros

2021-08-11 Thread Alistair Lee via Gcc-patches


Hi all, this patch changed some RTL nodes to appropriate macros in Aarch64.c
backend using a script. Would this be okay for trunk?


Also, this is my first contribution, and I do not yet have commit 
rights, so if everything is okay  could someone commit this for me?


Thank you and I would appreciate any feedback/advice!


gcc/ChangeLog:


2021-07-21  Alistair_Lee  alistair@arm.com


 * rtl.h (CONST_VECTOR_P): New macro.
 (CONST_STRING_P): New macro.

 * config/aarch64/aarch64.c (aarch64_get_sve_pred_bits): Use RTL 
code testing macros.

 (aarch64_ptrue_all_mode): Likewise.
 (aarch64_expand_mov_immediate): Likewise.
 (aarch64_const_vec_all_in_range_p): Likewise.
 (aarch64_rtx_costs): Likewise.
 (aarch64_legitimate_constant_p): Likewise.
 (aarch64_simd_valid_immediate): Likewise.
 (aarch64_simd_make_constant): Likewise.
 (aarch64_convert_mult_to_shift): Likewise.
 (aarch64_expand_sve_vec_perm): Likewise.
 (aarch64_vec_fpconst_pow_of_2): Likewise.

diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 
2753c852abdfab96ec6016075aa386eee73ad85d..d1408a6ab371223cd7d042012a32a4b0a76d6885
 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -4035,7 +4035,7 @@ aarch64_force_temporary (machine_mode mode, rtx x, rtx 
value)
 static bool
 aarch64_get_sve_pred_bits (rtx_vector_builder , rtx x)
 {
-  if (GET_CODE (x) != CONST_VECTOR)
+  if (!CONST_VECTOR_P (x))
 return false;
 
   unsigned int factor = vector_element_size (GET_MODE_NUNITS (VNx16BImode),
@@ -4091,7 +4091,7 @@ opt_machine_mode
 aarch64_ptrue_all_mode (rtx x)
 {
   gcc_assert (GET_MODE (x) == VNx16BImode);
-  if (GET_CODE (x) != CONST_VECTOR
+  if (!CONST_VECTOR_P (x)
   || !CONST_VECTOR_DUPLICATE_P (x)
   || !CONST_INT_P (CONST_VECTOR_ENCODED_ELT (x, 0))
   || INTVAL (CONST_VECTOR_ENCODED_ELT (x, 0)) == 0)
@@ -5791,7 +5791,7 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm)
  return;
}
 
-  if (GET_CODE (imm) == CONST_VECTOR && aarch64_sve_data_mode_p (mode))
+  if (CONST_VECTOR_P (imm) && aarch64_sve_data_mode_p (mode))
if (rtx res = aarch64_expand_sve_const_vector (dest, imm))
  {
if (dest != res)
@@ -10495,7 +10495,7 @@ aarch64_const_vec_all_in_range_p (rtx vec,
  HOST_WIDE_INT minval,
  HOST_WIDE_INT maxval)
 {
-  if (GET_CODE (vec) != CONST_VECTOR
+  if (!CONST_VECTOR_P (vec)
   || GET_MODE_CLASS (GET_MODE (vec)) != MODE_VECTOR_INT)
 return false;
 
@@ -12595,7 +12595,7 @@ aarch64_rtx_costs (rtx x, machine_mode mode, int outer 
ATTRIBUTE_UNUSED,
case SIGN_EXTRACT:
  /* Bit-field insertion.  Strip any redundant widening of
 the RHS to meet the width of the target.  */
- if (GET_CODE (op1) == SUBREG)
+ if (SUBREG_P (op1))
op1 = SUBREG_REG (op1);
  if ((GET_CODE (op1) == ZERO_EXTEND
   || GET_CODE (op1) == SIGN_EXTEND)
@@ -12868,7 +12868,7 @@ aarch64_rtx_costs (rtx x, machine_mode mode, int outer 
ATTRIBUTE_UNUSED,
  But the integer MINUS logic expects the shift/extend
  operation in op1.  */
   if (! (REG_P (op0)
- || (GET_CODE (op0) == SUBREG && REG_P (SUBREG_REG (op0)
+ || (SUBREG_P (op0) && REG_P (SUBREG_REG (op0)
   {
 op0 = XEXP (x, 1);
 op1 = XEXP (x, 0);
@@ -17997,7 +17997,7 @@ aarch64_legitimate_constant_p (machine_mode mode, rtx x)
 
   /* Otherwise, accept any CONST_VECTOR that, if all else fails, can at
  least be forced to memory and loaded from there.  */
-  if (GET_CODE (x) == CONST_VECTOR)
+  if (CONST_VECTOR_P (x))
 return !targetm.cannot_force_const_mem (mode, x);
 
   /* Do not allow vector struct mode constants for Advanced SIMD.
@@ -19804,7 +19804,7 @@ aarch64_simd_valid_immediate (rtx op, 
simd_immediate_info *info,
   scalar_mode elt_mode = GET_MODE_INNER (mode);
   rtx base, step;
   unsigned int n_elts;
-  if (GET_CODE (op) == CONST_VECTOR
+  if (CONST_VECTOR_P (op)
   && CONST_VECTOR_DUPLICATE_P (op))
 n_elts = CONST_VECTOR_NPATTERNS (op);
   else if ((vec_flags & VEC_SVE_DATA)
@@ -19826,7 +19826,7 @@ aarch64_simd_valid_immediate (rtx op, 
simd_immediate_info *info,
}
   return true;
 }
-  else if (GET_CODE (op) == CONST_VECTOR
+  else if (CONST_VECTOR_P (op)
   && CONST_VECTOR_NUNITS (op).is_constant (_elts))
 /* N_ELTS set above.  */;
   else
@@ -20425,7 +20425,7 @@ aarch64_simd_make_constant (rtx vals)
   int n_const = 0;
   int i;
 
-  if (GET_CODE (vals) == CONST_VECTOR)
+  if (CONST_VECTOR_P (vals))
 const_vec = vals;
   else if (GET_CODE (vals) == PARALLEL)
 {
@@ -20966,7 +20966,7 @@ aarch64_sve_expand_vector_init (rtx target, rtx vals)
 static rtx
 aarch64_convert_mult_to_shift (rtx value, rtx_code )
 {
-  if (GET_CODE (value) !=

Re: [PATCH] c++: Improve memory usage of subsumption [PR100828]

2021-08-11 Thread Patrick Palka via Gcc-patches

On Wed, 11 Aug 2021, Jason Merrill wrote:

> On 8/9/21 5:07 PM, Patrick Palka wrote:
> > On Wed, Jul 28, 2021 at 4:42 PM Jason Merrill  wrote:
> > > 
> > > On 7/19/21 6:05 PM, Patrick Palka wrote:
> > > > Constraint subsumption is implemented in two steps.  The first step
> > > > computes the disjunctive (or conjunctive) normal form of one of the
> > > > constraints, and the second step verifies that each clause in the
> > > > decomposed form implies the other constraint.   Performing these two
> > > > steps separately is problematic because in the first step the
> > > > disjunctive normal form can be exponentially larger than the original
> > > > constraint, and by computing it ahead of time we'd have to keep all of
> > > > it in memory.
> > > > 
> > > > This patch fixes this exponential blowup in memory usage by interleaving
> > > > these two steps, so that as soon as we decompose one clause we check
> > > > implication for it.  In turn, memory usage during subsumption is now
> > > > worst case linear in the size of the constraints rather than
> > > > exponential, and so we can safely remove the hard limit of 16 clauses
> > > > without introducing runaway memory usage on some inputs.  (Note the
> > > > _time_ complexity of subsumption is still exponential in the worst
> > > > case.)
> > > > 
> > > > In order for this to work we need formula::branch to prepend the copy
> > > > of the current clause directly after the current clause rather than
> > > > at the end of the list, so that we fully decompose a clause shortly
> > > > after creating it.  Otherwise we'd end up accumulating exponentially
> > > > many (partially decomposed) clauses in memory anyway.
> > > > 
> > > > Bootstrapped and regtested on x86_64-pc-linux-gnu, and also tested on
> > > > range-v3 and cmcstl2.  Does this look OK for trunk and perhaps 11?
> > > 
> > > OK for trunk.
> > 
> > Thanks a lot, patch committed to trunk as r12-2658.  Since this low
> > complexity limit was introduced in GCC 10, what do you think about
> > increasing the limit from 16 to say 128 in the 10/11 release branches
> > as a relatively safe stopgap?
> 
> Now that 11.2 is out, go ahead and apply this patch to the 11 branch.

Ah great, will do.

> 
> Won't a limit of 128 in GCC 10 lead to extremely long compile times for
> affected code?  Is that more desirable than an error?

Potentially, though I think that'd be the case only if the original
(normalized) constraint is huge to begin with.  The comment for
max_problem_size says

 /* The largest number of clauses in CNF or DNF we accept as input
for subsumption. This an upper bound of 2^16 expressions.  */
 static int max_problem_size = 16;

which implies increasing it to 128 would allow for at most 2^128
expressions (clearly unacceptable), but I'm not sure how this upper
bound was obtained.

FWIW I think another upper bound for the number of expressions in the
CNF/DNF is roughly 'max_problem_size * size_of_original_constraint',
since we allow at most 'max_problem_size' clauses in the decomposed form
and each clause is definitely no larger than the original constraint.
So according to this upper bound the dependence on max_problem_size as
it relates to worst-case compile time/memory usage of subsumption is
linear rather than exponential, contrary to the comment.  In that case
increasing the limit from 16 to 128 doesn't seem to be too bad.

> 
> > > >PR c++/100828
> > > > 
> > > > gcc/cp/ChangeLog:
> > > > 
> > > >* logic.cc (formula::formula): Use emplace_back.
> > > >(formula::branch): Insert a copy of m_current in front of
> > > >m_current instead of at the end of the list.
> > > >(formula::erase): Define.
> > > >(decompose_formula): Remove.
> > > >(decompose_antecedents): Remove.
> > > >(decompose_consequents): Remove.
> > > >(derive_proofs): Remove.
> > > >(max_problem_size): Remove.
> > > >(diagnose_constraint_size): Remove.
> > > >(subsumes_constraints_nonnull): Rewrite directly in terms of
> > > >decompose_clause and derive_proof, interleaving decomposition
> > > >with implication checking.  Use formula::erase to free the
> > > >current clause before moving on to the next one.
> > > > ---
> > > >gcc/cp/logic.cc | 118
> > > > ++--
> > > >1 file changed, 35 insertions(+), 83 deletions(-)
> > > > 
> > > > diff --git a/gcc/cp/logic.cc b/gcc/cp/logic.cc
> > > > index 142457e408a..3f872c11fe2 100644
> > > > --- a/gcc/cp/logic.cc
> > > > +++ b/gcc/cp/logic.cc
> > > > @@ -223,9 +223,7 @@ struct formula
> > > > 
> > > >  formula (tree t)
> > > >  {
> > > > -/* This should call emplace_back(). There's an extra copy being
> > > > -   invoked by using push_back().  */
> > > > -m_clauses.push_back (t);
> > > > +m_clauses.emplace_back (t);
> > > >m_current = m_clauses.begin ();
> > > >  }
> > > > 
> > > > @@ -248,8

Re: [Patch v3 Fortran] Fix c_float128 and c_float128_complex on targets with 128-bit long double.

2021-08-11 Thread Sandra Loosemore


On 8/11/21 2:05 AM, Tobias Burnus wrote:

On 11.08.21 00:46, Sandra Loosemore wrote:

On 8/10/21 2:29 AM, Tobias Burnus wrote:


[snip]

To conclude: I like the code changes (LGTM); the
'__float128' -> 'TFmode' comment change also matches the code.

However, I think both longer comments need to be updated.


OK.  I used your wording verbatim for the first one.  For the second 
one, I'm still pretty confused as I think it is at least theoretically 
possible on PowerPC to have a target with 64-bit long double (AIX?) 
that also supports the __ibm128 format, and it would be wrong to 
assume that *any* 128-bit mode that's not long double is IEEE.  So I 
decided the best thing is just to replace the FIXME with a pointer to 
the issue I opened yesterday


https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101835


LGTM – but ...


+  /* See PR101835.  */


... I wonder whether your PR reference should have a TODO or FIXME 
prefix – or a "for some issue" suffix. Currently, it can be read as if 
the PR describes why the code was added – and not for questioning the code.


OK, thank you.  I've pushed the patch with the addition of "TODO" to 
that comment.


-Sandra

[PATCH][RFC] target/61810 - disable init-regs pass on targets using LRA

2021-08-11 Thread Richard Biener via Gcc-patches

This patch disables the init-regs pass on targets using LRA.
It's not clear as to what issue the init-regs papers over and
most definitely LRA should get it right.  This leaves targets
still using reload unaffected but will expose any such LRA
bug if it exists, allowing it to be identified and fixed.

The change can cause testsuite fallout since the explicit
initializations can have an effect on passes like combine.
For example on x86_64 this results in

+FAIL: gcc.target/i386/extract-insert-combining.c scan-assembler-times (?:vmovd|
movd)[ t]+[^{\\n]*%xmm[0-9] 3
+FAIL: gcc.target/i386/extract-insert-combining.c scan-assembler-times (?:vpinsr
d|pinsrd)[ t]+[^{\\n]*%xmm[0-9] 1

it also results in yet unanalyzed

+FAIL: gnat.dg/sso8.adb execution test

I welcome trying this change on other architectures, like
for example 'arm' that's singled out in the init-regs comments
as affected architecture.

Bootstrapped and tested on x86_64-unknown-linux-gnu with all languages
and {,-m32} with the above remaining fallout (I fixed another
testism already).

2021-08-11  Richard Biener  

PR target/61810
* init-regs.c (pass_initialize_regs::gate): Gate on
targetm.lra_p ().
---
 gcc/init-regs.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/gcc/init-regs.c b/gcc/init-regs.c
index 72e898f3e33..8f038f54fff 100644
--- a/gcc/init-regs.c
+++ b/gcc/init-regs.c
@@ -27,6 +27,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "memmodel.h"
 #include "emit-rtl.h"
 #include "expr.h"
+#include "target.h"
 #include "tree-pass.h"
 
 /* Check all of the uses of pseudo variables.  If any use that is MUST
@@ -153,7 +154,7 @@ public:
   {}
 
   /* opt_pass methods: */
-  virtual bool gate (function *) { return optimize > 0; }
+  virtual bool gate (function *) { return optimize > 0 && !targetm.lra_p (); }
   virtual unsigned int execute (function *)
 {
   initialize_uninitialized_regs ();
-- 
2.31.1

[PATCH] Fix gcc.dg/lto/pr48622_0.c testcase

2021-08-11 Thread Richard Biener via Gcc-patches

This fixes the testcase to not rely on the reference to
ashift_qi_1 being optimized out by RTL optimization via
help of the initregs pass that changes comparisons of
uninitialized data with a comparison that is always false.

Tested on x86_64-unknown-linux-gnu, pushed.

2021-08-11  Richard Biener  

* gcc.dg/lto/pr48622_1.c: Provide non-LTO definition
of ashift_qi_1.
---
 gcc/testsuite/gcc.dg/lto/pr48622_1.c | 6 ++
 1 file changed, 6 insertions(+)
 create mode 100644 gcc/testsuite/gcc.dg/lto/pr48622_1.c

diff --git a/gcc/testsuite/gcc.dg/lto/pr48622_1.c 
b/gcc/testsuite/gcc.dg/lto/pr48622_1.c
new file mode 100644
index 000..4d05bae2114
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/lto/pr48622_1.c
@@ -0,0 +1,6 @@
+/* { dg-options "-fno-lto" } */
+
+typedef unsigned int u8 __attribute__ ((mode (QI)));
+u8 ashift_qi_1 (u8)
+{
+}
-- 
2.31.1

Re: [PATCH][v2] Adjust volatile handling of the operand scanner

2021-08-11 Thread Richard Biener via Gcc-patches

On Wed, 11 Aug 2021, Eric Botcazou wrote:

> > So I'm leaning towards leaving build3 alone and fixing up frontends
> > as issues pop up.
> 
> FWIW fine with me.

OK, so I pushed the original change (reposted below).

Bootstrapped / tested on x86_64-unknown-linux-gnu.

Richard.

>From e5a23d54d189f3d160c82f770683288a15c3645e Mon Sep 17 00:00:00 2001
From: Richard Biener 
Date: Mon, 9 Aug 2021 13:12:08 +0200
Subject: [PATCH] Adjust volatile handling of the operand scanner
To: gcc-patches@gcc.gnu.org

The GIMPLE SSA operand scanner handles COMPONENT_REFs that are
not marked TREE_THIS_VOLATILE but have a TREE_THIS_VOLATILE
FIELD_DECL as volatile.  That's inconsistent in how TREE_THIS_VOLATILE
testing on GENERIC refs works which requires operand zero of
component references to mirror TREE_THIS_VOLATILE to the ref
so that testing TREE_THIS_VOLATILE on the outermost reference
is enough to determine the volatileness.

The following patch thus removes FIELD_DECL scanning from
the GIMPLE SSA operand scanner, possibly leaving fewer stmts
marked as gimple_has_volatile_ops.

It shows we miss at least one case in the fortran frontend, though
there's a suspicious amount of COMPONENT_REF creation compared
to little setting of TREE_THIS_VOLATILE.  This fixes the FAIL
of gfortran.dg/volatile11.f90 that would otherwise occur.

Visually inspecting fortran/ reveals a bunch of likely to fix
cases but I don't know the constraints of 'volatile' uses in
the fortran language to assess whether some of these are not
necessary.

2021-08-09  Richard Biener  

gcc/
* tree-ssa-operands.c (operands_scanner::get_expr_operands):
Do not look at COMPONENT_REF FIELD_DECLs TREE_THIS_VOLATILE
to determine has_volatile_ops.

gcc/fortran/
* trans-common.c (create_common): Set TREE_THIS_VOLATILE on the
COMPONENT_REF if the field is volatile.
---
 gcc/fortran/trans-common.c | 9 +
 gcc/tree-ssa-operands.c| 7 +--
 2 files changed, 6 insertions(+), 10 deletions(-)

diff --git a/gcc/fortran/trans-common.c b/gcc/fortran/trans-common.c
index a11cf4c839e..7bcf18dc475 100644
--- a/gcc/fortran/trans-common.c
+++ b/gcc/fortran/trans-common.c
@@ -759,10 +759,11 @@ create_common (gfc_common_head *com, segment_info *head, 
bool saw_equiv)
   else
gfc_add_decl_to_function (var_decl);

-  SET_DECL_VALUE_EXPR (var_decl,
-  fold_build3_loc (input_location, COMPONENT_REF,
-   TREE_TYPE (s->field),
-   decl, s->field, NULL_TREE));
+  tree comp = build3_loc (input_location, COMPONENT_REF,
+ TREE_TYPE (s->field), decl, s->field, NULL_TREE);
+  if (TREE_THIS_VOLATILE (s->field))
+   TREE_THIS_VOLATILE (comp) = 1;
+  SET_DECL_VALUE_EXPR (var_decl, comp);
   DECL_HAS_VALUE_EXPR_P (var_decl) = 1;
   GFC_DECL_COMMON_OR_EQUIV (var_decl) = 1;

diff --git a/gcc/tree-ssa-operands.c b/gcc/tree-ssa-operands.c
index c15575416dd..ebf7eea3b04 100644
--- a/gcc/tree-ssa-operands.c
+++ b/gcc/tree-ssa-operands.c
@@ -834,12 +834,7 @@ operands_scanner::get_expr_operands (tree *expr_p, int 
flags)
get_expr_operands (_OPERAND (expr, 0), flags);

if (code == COMPONENT_REF)
- {
-   if (!(flags & opf_no_vops)
-   && TREE_THIS_VOLATILE (TREE_OPERAND (expr, 1)))
- gimple_set_has_volatile_ops (stmt, true);
-   get_expr_operands (_OPERAND (expr, 2), uflags);
- }
+ get_expr_operands (_OPERAND (expr, 2), uflags);
else if (code == ARRAY_REF || code == ARRAY_RANGE_REF)
  {
get_expr_operands (_OPERAND (expr, 1), uflags);
-- 
2.31.1

[PATCH] target/101788 - avoid decomposing hard-register "loads"

2021-08-11 Thread Richard Biener via Gcc-patches

This avoids decomposing hard-register accesses that masquerade as
loads.

Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.

2021-08-11  Richard Biener  

PR target/101877
* tree-ssa-forwprop.c (pass_forwprop::execute): Do not decompose
hard-register accesses.
---
 gcc/tree-ssa-forwprop.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/gcc/tree-ssa-forwprop.c b/gcc/tree-ssa-forwprop.c
index bd64b8e46bc..5b30d4c1a76 100644
--- a/gcc/tree-ssa-forwprop.c
+++ b/gcc/tree-ssa-forwprop.c
@@ -3190,7 +3190,8 @@ pass_forwprop::execute (function *fun)
   || (fun->curr_properties & PROP_gimple_lvec))
   && gimple_assign_load_p (stmt)
   && !gimple_has_volatile_ops (stmt)
-  && !stmt_can_throw_internal (cfun, stmt))
+  && !stmt_can_throw_internal (cfun, stmt)
+  && (!VAR_P (rhs) || !DECL_HARD_REGISTER (rhs)))
optimize_vector_load ();
 
  else if (code == COMPLEX_EXPR)
-- 
2.31.1

[patch] Make -no-pie option work for native Windows

2021-08-11 Thread Eric Botcazou

Hi,

as already mentioned on the list, binutils 2.36 generates PIE executables by
default on native Windows (because --dynamicbase is the default) so it makes
sense to have a simple way to counter that and -no-pie seems appropriate,
all the more so that it is automatically passed when building the compiler.

Bootstrapped on x86 and x86-64/Windows, w/ and w/o binutils 2.36, OK for the
mainline and 11 branch?


2021-08-11  Eric Botcazou  

* configure.ac (PE linker --disable-dynamicbase support): New check.
* configure: Regenerate.
* config.in: Likewise.
* config/i386/mingw32.h (LINK_SPEC_DISABLE_DYNAMICBASE): New define.
(LINK_SPEC): Use it.
* config/i386/mingw-w64.h (LINK_SPEC_DISABLE_DYNAMICBASE): Likewise.
(LINK_SPEC): Likewise.

-- 
Eric Botcazoudiff --git a/gcc/config/i386/mingw-w64.h b/gcc/config/i386/mingw-w64.h
index 0cec6b02787..6cc7ac54fdd 100644
--- a/gcc/config/i386/mingw-w64.h
+++ b/gcc/config/i386/mingw-w64.h
@@ -89,6 +89,14 @@ along with GCC; see the file COPYING3.  If not see
 # define LINK_SPEC_LARGE_ADDR_AWARE ""
 #endif
 
+#undef LINK_SPEC_DISABLE_DYNAMICBASE
+#if HAVE_LD_PE_DISABLE_DYNAMICBASE
+# define LINK_SPEC_DISABLE_DYNAMICBASE \
+  "%{!shared:%{!mdll:%{no-pie:--disable-dynamicbase}}}"
+#else
+# define LINK_SPEC_DISABLE_DYNAMICBASE ""
+#endif
+
 #undef LINK_SPEC
 #define LINK_SPEC SUB_LINK_SPEC " %{mwindows:--subsystem windows} \
   %{mconsole:--subsystem console} \
@@ -97,6 +105,7 @@ along with GCC; see the file COPYING3.  If not see
   %{static:-Bstatic} %{!static:-Bdynamic} \
   %{shared|mdll: " SUB_LINK_ENTRY " --enable-auto-image-base} \
   " LINK_SPEC_LARGE_ADDR_AWARE "\
+  " LINK_SPEC_DISABLE_DYNAMICBASE "\
   %(shared_libgcc_undefs)"
 
 /* Enable sincos optimization, overriding cygming.h.  sincos, sincosf
diff --git a/gcc/config/i386/mingw32.h b/gcc/config/i386/mingw32.h
index 36e7bae5e1b..779c9335711 100644
--- a/gcc/config/i386/mingw32.h
+++ b/gcc/config/i386/mingw32.h
@@ -148,6 +148,13 @@ along with GCC; see the file COPYING3.  If not see
   "%{!shared:%{!mdll:%{!m64:--large-address-aware}}}"
 #endif
 
+#if HAVE_LD_PE_DISABLE_DYNAMICBASE
+# define LINK_SPEC_DISABLE_DYNAMICBASE \
+  "%{!shared:%{!mdll:%{no-pie:--disable-dynamicbase}}}"
+#else
+# define LINK_SPEC_DISABLE_DYNAMICBASE ""
+#endif
+
 #define LINK_SPEC "%{mwindows:--subsystem windows} \
   %{mconsole:--subsystem console} \
   %{shared: %{mdll: %eshared and mdll are not compatible}} \
@@ -155,6 +162,7 @@ along with GCC; see the file COPYING3.  If not see
   %{static:-Bstatic} %{!static:-Bdynamic} \
   %{shared|mdll: " SUB_LINK_ENTRY " --enable-auto-image-base} \
   " LINK_SPEC_LARGE_ADDR_AWARE "\
+  " LINK_SPEC_DISABLE_DYNAMICBASE "\
   %(shared_libgcc_undefs)"
 
 /* Include in the mingw32 libraries with libgcc */
diff --git a/gcc/configure.ac b/gcc/configure.ac
index c8e0d63fe70..653a1cc561d 100644
--- a/gcc/configure.ac
+++ b/gcc/configure.ac
@@ -6383,6 +6383,23 @@ case $target_os in
 	[Define if the PE linker has broken DWARF 5 support.])
 fi
 AC_MSG_RESULT($gcc_cv_ld_broken_pe_dwarf5)
+
+AC_MSG_CHECKING(PE linker --disable-dynamicbase support)
+gcc_cv_ld_disable_dynamicbase=no
+if test $in_tree_ld = yes; then
+  if test "$gcc_cv_gld_major_version" -eq 2 -a "$gcc_cv_gld_minor_version" -ge 36 -o "$gcc_cv_gld_major_version" -gt 2; then \
+gcc_cv_ld_disable_dynamicbase=yes
+  fi
+else
+  if $gcc_cv_ld --help 2>&1 | grep -q 'disable\-]dynamicbase' > /dev/null; then
+gcc_cv_ld_disable_dynamicbase=yes
+  fi
+fi
+if test x"$gcc_cv_ld_disable_dynamicbase" = xyes; then
+  AC_DEFINE(HAVE_LD_PE_DISABLE_DYNAMICBASE, 1,
+[Define if the PE linker supports --disable-dynamicbase option.])
+fi
+AC_MSG_RESULT($gcc_cv_ld_disable_dynamicbase)
 ;;
 esac

Re: [PATCH] arm: Fix multilib mapping for CDE extensions [PR100856]

2021-08-11 Thread Christophe Lyon via Gcc-patches

ping?
https://gcc.gnu.org/pipermail/gcc-patches/2021-July/575310.html


On Wed, Aug 4, 2021 at 11:13 AM Christophe Lyon via Gcc-patches <
gcc-patches@gcc.gnu.org> wrote:

> ping?
>
> On Thu, 15 Jul 2021 at 15:07, Christophe LYON via Gcc-patches
>  wrote:
> >
> > This is a followup to Srinath's recent patch: the newly added test is
> > failing e.g. on arm-linux-gnueabihf without R/M profile multilibs.
> >
> > It is also failing on arm-eabi with R/M profile multilibs if the
> > execution engine does not support v8.1-M instructions.
> >
> > The patch avoids this by adding check_effective_target_FUNC_multilib
> > in target-supports.exp which effectively checks whether the target
> > supports linking and execution, like what is already done for other
> > ARM effective targets.  pr100856.c is updated to use it instead of
> > arm_v8_1m_main_cde_mve_ok (which makes the testcase a bit of a
> > duplicate with check_effective_target_FUNC_multilib).
> >
> > In addition, I noticed that requiring MVE does not seem necessary and
> > this enables the test to pass even when targeting a CPU without MVE:
> > since the test does not involve actual CDE instructions, it can pass
> > on other architecture versions.  For instance, when requiring MVE, we
> > have to use cortex-m55 under QEMU for the test to pass because the
> > memset() that comes from v8.1-m.main+mve multilib uses LOB
> > instructions (DLS) (memset is used during startup).  Keeping
> > arm_v8_1m_main_cde_mve_ok would mean we would enable the test provided
> > we have the right multilibs, causing a runtime error if the simulator
> > does not support LOB instructions (e.g. when targeting cortex-m7).
> >
> > I do not update sourcebuild.texi since the CDE effective targets are
> > already collectively documented.
> >
> > Finally, the patch fixes two typos in comments.
> >
> > 2021-07-15  Christophe Lyon  
> >
> >  PR target/100856
> >  gcc/
> >  * config/arm/arm.opt: Fix typo.
> >  * config/arm/t-rmprofile: Fix typo.
> >
> >  gcc/testsuite/
> >  * gcc.target/arm/acle/pr100856.c: Use arm_v8m_main_cde_multilib
> >  and arm_v8m_main_cde.
> >  * lib/target-supports.exp: Add
> > check_effective_target_FUNC_multilib for ARM CDE.
> >
> >
>

Re: [PATCH] c++: Fix ICE on defaulted spaceship with pointer return type [PR94162]

2021-08-11 Thread Jakub Jelinek via Gcc-patches

On Wed, Aug 11, 2021 at 09:42:56AM -0400, Jason Merrill wrote:
> Checking CLASS_TYPE_P && decl_in_std_namespace (TYPE_MAIN_DECL) before
> looking at the name makes sense to me.

CLASS_TYPE_P is cheap, but isn't decl_in_std_namespace, especially when
it needs to walk inline namespaces, better done only if we get a match, so
like below?

Though I can do it even in the first if if you think it is better...

2021-08-11  Jakub Jelinek  

gcc/cp/
PR c++/94162
* method.c (cat_tag_for): Return cc_last for !CLASS_TYPE_P
or for classes not in std namespace.
gcc/testsuite/
PR c++/99429
* g++.dg/cpp2a/spaceship-synth11.C: New test.

PR c++/94162
* g++.dg/cpp2a/spaceship-synth-neg6.C: New test.

--- gcc/cp/method.c.jj  2021-08-09 15:03:00.923206463 +0200
+++ gcc/cp/method.c 2021-08-11 15:52:27.157437691 +0200
@@ -1029,10 +1029,13 @@ is_cat (tree type, comp_cat_tag tag)
 static comp_cat_tag
 cat_tag_for (tree type)
 {
+  if (!CLASS_TYPE_P (type))
+return cc_last;
   for (int i = 0; i < cc_last; ++i)
 {
   comp_cat_tag tag = (comp_cat_tag)i;
-  if (is_cat (type, tag))
+  if (is_cat (type, tag)
+ && decl_in_std_namespace_p (TYPE_MAIN_DECL (type)))
return tag;
 }
   return cc_last;
--- gcc/testsuite/g++.dg/cpp2a/spaceship-synth11.C.jj   2021-08-11 
15:49:05.267204333 +0200
+++ gcc/testsuite/g++.dg/cpp2a/spaceship-synth11.C  2021-08-11 
15:49:05.267204333 +0200
@@ -0,0 +1,29 @@
+// PR c++/99429
+// { dg-do compile { target c++20 } }
+
+namespace std {
+struct strong_ordering {
+  int _v;
+  constexpr strong_ordering (int v) :_v(v) {}
+  constexpr operator int (void) const { return _v; }
+  static const strong_ordering less;
+  static const strong_ordering equal;
+  static const strong_ordering greater;
+};
+constexpr strong_ordering strong_ordering::less = -1;
+constexpr strong_ordering strong_ordering::equal = 0;
+constexpr strong_ordering strong_ordering::greater = 1;
+}
+
+template 
+struct duration {
+  static constexpr const long period = N;
+  constexpr duration (void) = default;
+  constexpr duration (const duration& d) = default;
+  constexpr bool operator== (const duration& d) const = default;
+  constexpr bool operator<=> (const duration& d) const = default;
+  long _d;
+};
+
+using nanoseconds = duration<1>;
+using microseconds = duration;
--- gcc/testsuite/g++.dg/cpp2a/spaceship-synth-neg6.C.jj2021-08-11 
15:49:05.268204320 +0200
+++ gcc/testsuite/g++.dg/cpp2a/spaceship-synth-neg6.C   2021-08-11 
15:49:05.268204320 +0200
@@ -0,0 +1,11 @@
+// PR c++/94162
+// { dg-do compile { target c++20 } }
+
+#include 
+
+struct S {
+  int a;   // { dg-error "three-way comparison of 'S::a' 
has type 'std::strong_ordering', which does not convert to 'int\\*'" }
+  int *operator<=>(const S&) const = default;
+};
+
+bool b = S{} < S{};// { dg-error "use of deleted function 
'constexpr int\\* S::operator<=>\\\(const S&\\\) const'" }

Jakub

Re: [patch][version 6] add -ftrivial-auto-var-init and variable attribute "uninitialized" to gcc

2021-08-11 Thread Qing Zhao via Gcc-patches



> On Aug 11, 2021, at 8:58 AM, Richard Biener  wrote:
> 
> On Wed, 11 Aug 2021, Qing Zhao wrote:
> 
>> 
>> 
>>> On Aug 11, 2021, at 8:37 AM, Richard Biener  wrote:
>>> 
>>> On Wed, 11 Aug 2021, Qing Zhao wrote:
>>> 
 
 
> On Aug 11, 2021, at 2:02 AM, Richard Biener  wrote:
> 
> On Tue, 10 Aug 2021, Qing Zhao wrote:
> 
>> 
>> 
>>> On Aug 10, 2021, at 3:16 PM, Qing Zhao via Gcc-patches 
>>>  wrote:
>>> 
>>> Hi, Richard,
>>> 
 On Aug 10, 2021, at 10:22 AM, Richard Biener  wrote:
>> 
>> Especially in the VLA case but likely also in general (though 
>> unlikely
>> since usually the receiver of initializations are simple enough).  
>> I'd
>> expect the VLA case end up as
>> 
>> *ptr_to_decl = .DEFERRED_INIT (...);
>> 
>> where *ptr_to_decl is the DECL_VALUE_EXPR of the decl.
> 
> So, for the following small testing case:
> 
> 
> extern void bar (int);
> 
> void foo(int n)
> {
> int arr[n];
> bar (arr[2]);
> return;
> }
> =
> 
> If I compile it with -ftrivial-auto-var-init=zero -fdump-tree-gimple 
> -S -o auto-init-11.s -fdump-rtl-expand, the *.gimple dump is:
> 
> =
> void foo (int n)
> {
> int n.0;
> sizetype D.1950;
> bitsizetype D.1951;
> sizetype D.1952;
> bitsizetype D.1953;
> sizetype D.1954;
> int[0:D.1950] * arr.1;
> void * saved_stack.2;
> int arr[0:D.1950] [value-expr: *arr.1];
> 
> saved_stack.2 = __builtin_stack_save ();
> try
> {
>  n.0 = n;
>  _1 = (long int) n.0;
>  _2 = _1 + -1;
>  _3 = (sizetype) _2;
>  D.1950 = _3;
>  _4 = (sizetype) n.0;
>  _5 = (bitsizetype) _4;
>  _6 = _5 * 32;
>  D.1951 = _6;
>  _7 = (sizetype) n.0;
>  _8 = _7 * 4;
>  D.1952 = _8;
>  _9 = (sizetype) n.0;
>  _10 = (bitsizetype) _9;
>  _11 = _10 * 32;
>  D.1953 = _11;
>  _12 = (sizetype) n.0;
>  _13 = _12 * 4;
>  D.1954 = _13;
>  arr.1 = __builtin_alloca_with_align (D.1954, 32);
>  arr = .DEFERRED_INIT (D.1952, 2, 1);
>  _14 = (*arr.1)[2];
>  bar (_14);
>  return;
> }
> finally
> {
>  __builtin_stack_restore (saved_stack.2);
> }
> }
> 
> 
> 
> You think that the above .DEFEERED_INIT is not correct?
> It should be:
> 
> *arr.1 = .DEFERRED_INIT (D.1952. 2, 1);
> 
> ?
 
 Yes.
 
>>> 
>>> I updated gimplify.c for VLA and now it emits the call to 
>>> .DEFERRED_INIT as:
>>> 
>>>   arr.1 = __builtin_alloca_with_align (D.1954, 32);
>>>   *arr.1 = .DEFERRED_INIT (D.1952, 2, 1);
>>> 
>>> However, this call triggered the assertion failure in 
>>> verify_gimple_call of tree-cfg.c because the LHS is not a valid LHS. 
>>> Then I modify tree-cfg.c as:
>>> 
>>> diff --git a/gcc/tree-cfg.c b/gcc/tree-cfg.c
>>> index 330eb7dd89bf..180d4f1f9e32 100644
>>> --- a/gcc/tree-cfg.c
>>> +++ b/gcc/tree-cfg.c
>>> @@ -3375,7 +3375,11 @@ verify_gimple_call (gcall *stmt)
>>>   }
>>> 
>>> tree lhs = gimple_call_lhs (stmt);
>>> +  /* For .DEFERRED_INIT call, the LHS might be an indirection of
>>> + a pointer for the VLA variable, which is not a valid LHS of
>>> + a gimple call, we ignore the asssertion on this.  */ 
>>> if (lhs
>>> +  && (!gimple_call_internal_p (stmt, IFN_DEFERRED_INIT))
>>>&& (!is_gimple_reg (lhs)
>>>   && (!is_gimple_lvalue (lhs)
>>>   || verify_types_in_gimple_reference
>>> 
>>> The assertion failure in tree-cfg.c got resolved, but I got another 
>>> assertion failure in operands_scanner::get_expr_operands (tree *expr_p, 
>>> int flags), line 945:
>>> 
>>> 939   /* If we get here, something has gone wrong.  */
>>> 940   if (flag_checking)
>>> 941 {
>>> 942   fprintf (stderr, "unhandled expression in 
>>> get_expr_operands():\n");
>>> 943   debug_tree (expr);
>>> 944   fputs ("\n", stderr);
>>> 945   gcc_unreachable ();
>>> 946 }
>>> 
>>> Looks like that  the gimple statement:
>>> *arr.1 = .DEFERRED_INIT (D.1952, 2, 1);
>>> 
>>> Is not valid.  i.e, the LHS should not be an indirection to a pointer. 
>>> 
>>> How to resolve this issue?
> 
> It sounds like the LHS is an INDIRECT_REF maybe?  That means it's
> still not properly gimplified because it should end up as a MEM_REF
> instead.
> 
> But I'm just guessing here ... if you are

Re: [patch][version 6] add -ftrivial-auto-var-init and variable attribute "uninitialized" to gcc

2021-08-11 Thread Richard Biener via Gcc-patches

On Wed, 11 Aug 2021, Qing Zhao wrote:

> 
> 
> > On Aug 11, 2021, at 8:37 AM, Richard Biener  wrote:
> > 
> > On Wed, 11 Aug 2021, Qing Zhao wrote:
> > 
> >> 
> >> 
> >>> On Aug 11, 2021, at 2:02 AM, Richard Biener  wrote:
> >>> 
> >>> On Tue, 10 Aug 2021, Qing Zhao wrote:
> >>> 
>  
>  
> > On Aug 10, 2021, at 3:16 PM, Qing Zhao via Gcc-patches 
> >  wrote:
> > 
> > Hi, Richard,
> > 
> >> On Aug 10, 2021, at 10:22 AM, Richard Biener  wrote:
>  
>  Especially in the VLA case but likely also in general (though 
>  unlikely
>  since usually the receiver of initializations are simple enough).  
>  I'd
>  expect the VLA case end up as
>  
>  *ptr_to_decl = .DEFERRED_INIT (...);
>  
>  where *ptr_to_decl is the DECL_VALUE_EXPR of the decl.
> >>> 
> >>> So, for the following small testing case:
> >>> 
> >>> 
> >>> extern void bar (int);
> >>> 
> >>> void foo(int n)
> >>> {
> >>> int arr[n];
> >>> bar (arr[2]);
> >>> return;
> >>> }
> >>> =
> >>> 
> >>> If I compile it with -ftrivial-auto-var-init=zero -fdump-tree-gimple 
> >>> -S -o auto-init-11.s -fdump-rtl-expand, the *.gimple dump is:
> >>> 
> >>> =
> >>> void foo (int n)
> >>> {
> >>> int n.0;
> >>> sizetype D.1950;
> >>> bitsizetype D.1951;
> >>> sizetype D.1952;
> >>> bitsizetype D.1953;
> >>> sizetype D.1954;
> >>> int[0:D.1950] * arr.1;
> >>> void * saved_stack.2;
> >>> int arr[0:D.1950] [value-expr: *arr.1];
> >>> 
> >>> saved_stack.2 = __builtin_stack_save ();
> >>> try
> >>> {
> >>>   n.0 = n;
> >>>   _1 = (long int) n.0;
> >>>   _2 = _1 + -1;
> >>>   _3 = (sizetype) _2;
> >>>   D.1950 = _3;
> >>>   _4 = (sizetype) n.0;
> >>>   _5 = (bitsizetype) _4;
> >>>   _6 = _5 * 32;
> >>>   D.1951 = _6;
> >>>   _7 = (sizetype) n.0;
> >>>   _8 = _7 * 4;
> >>>   D.1952 = _8;
> >>>   _9 = (sizetype) n.0;
> >>>   _10 = (bitsizetype) _9;
> >>>   _11 = _10 * 32;
> >>>   D.1953 = _11;
> >>>   _12 = (sizetype) n.0;
> >>>   _13 = _12 * 4;
> >>>   D.1954 = _13;
> >>>   arr.1 = __builtin_alloca_with_align (D.1954, 32);
> >>>   arr = .DEFERRED_INIT (D.1952, 2, 1);
> >>>   _14 = (*arr.1)[2];
> >>>   bar (_14);
> >>>   return;
> >>> }
> >>> finally
> >>> {
> >>>   __builtin_stack_restore (saved_stack.2);
> >>> }
> >>> }
> >>> 
> >>> 
> >>> 
> >>> You think that the above .DEFEERED_INIT is not correct?
> >>> It should be:
> >>> 
> >>> *arr.1 = .DEFERRED_INIT (D.1952. 2, 1);
> >>> 
> >>> ?
> >> 
> >> Yes.
> >> 
> > 
> > I updated gimplify.c for VLA and now it emits the call to 
> > .DEFERRED_INIT as:
> > 
> >arr.1 = __builtin_alloca_with_align (D.1954, 32);
> >*arr.1 = .DEFERRED_INIT (D.1952, 2, 1);
> > 
> > However, this call triggered the assertion failure in 
> > verify_gimple_call of tree-cfg.c because the LHS is not a valid LHS. 
> > Then I modify tree-cfg.c as:
> > 
> > diff --git a/gcc/tree-cfg.c b/gcc/tree-cfg.c
> > index 330eb7dd89bf..180d4f1f9e32 100644
> > --- a/gcc/tree-cfg.c
> > +++ b/gcc/tree-cfg.c
> > @@ -3375,7 +3375,11 @@ verify_gimple_call (gcall *stmt)
> >}
> > 
> > tree lhs = gimple_call_lhs (stmt);
> > +  /* For .DEFERRED_INIT call, the LHS might be an indirection of
> > + a pointer for the VLA variable, which is not a valid LHS of
> > + a gimple call, we ignore the asssertion on this.  */ 
> > if (lhs
> > +  && (!gimple_call_internal_p (stmt, IFN_DEFERRED_INIT))
> > && (!is_gimple_reg (lhs)
> >&& (!is_gimple_lvalue (lhs)
> >|| verify_types_in_gimple_reference
> > 
> > The assertion failure in tree-cfg.c got resolved, but I got another 
> > assertion failure in operands_scanner::get_expr_operands (tree *expr_p, 
> > int flags), line 945:
> > 
> > 939   /* If we get here, something has gone wrong.  */
> > 940   if (flag_checking)
> > 941 {
> > 942   fprintf (stderr, "unhandled expression in 
> > get_expr_operands():\n");
> > 943   debug_tree (expr);
> > 944   fputs ("\n", stderr);
> > 945   gcc_unreachable ();
> > 946 }
> > 
> > Looks like that  the gimple statement:
> >  *arr.1 = .DEFERRED_INIT (D.1952, 2, 1);
> > 
> > Is not valid.  i.e, the LHS should not be an indirection to a pointer. 
> > 
> > How to resolve this issue?
> >>> 
> >>> It sounds like the LHS is an INDIRECT_REF maybe?  That means it's
> >>> still not properly gimplified because it should end up as a MEM_REF
> >>> instead.
> >>> 
> >>> But I'm just guessing here ... if you are in a debugger then you can
> >>>

Re: [patch][version 6] add -ftrivial-auto-var-init and variable attribute "uninitialized" to gcc

2021-08-11 Thread Qing Zhao via Gcc-patches



> On Aug 11, 2021, at 8:37 AM, Richard Biener  wrote:
> 
> On Wed, 11 Aug 2021, Qing Zhao wrote:
> 
>> 
>> 
>>> On Aug 11, 2021, at 2:02 AM, Richard Biener  wrote:
>>> 
>>> On Tue, 10 Aug 2021, Qing Zhao wrote:
>>> 
 
 
> On Aug 10, 2021, at 3:16 PM, Qing Zhao via Gcc-patches 
>  wrote:
> 
> Hi, Richard,
> 
>> On Aug 10, 2021, at 10:22 AM, Richard Biener  wrote:
 
 Especially in the VLA case but likely also in general (though unlikely
 since usually the receiver of initializations are simple enough).  I'd
 expect the VLA case end up as
 
 *ptr_to_decl = .DEFERRED_INIT (...);
 
 where *ptr_to_decl is the DECL_VALUE_EXPR of the decl.
>>> 
>>> So, for the following small testing case:
>>> 
>>> 
>>> extern void bar (int);
>>> 
>>> void foo(int n)
>>> {
>>> int arr[n];
>>> bar (arr[2]);
>>> return;
>>> }
>>> =
>>> 
>>> If I compile it with -ftrivial-auto-var-init=zero -fdump-tree-gimple -S 
>>> -o auto-init-11.s -fdump-rtl-expand, the *.gimple dump is:
>>> 
>>> =
>>> void foo (int n)
>>> {
>>> int n.0;
>>> sizetype D.1950;
>>> bitsizetype D.1951;
>>> sizetype D.1952;
>>> bitsizetype D.1953;
>>> sizetype D.1954;
>>> int[0:D.1950] * arr.1;
>>> void * saved_stack.2;
>>> int arr[0:D.1950] [value-expr: *arr.1];
>>> 
>>> saved_stack.2 = __builtin_stack_save ();
>>> try
>>> {
>>>   n.0 = n;
>>>   _1 = (long int) n.0;
>>>   _2 = _1 + -1;
>>>   _3 = (sizetype) _2;
>>>   D.1950 = _3;
>>>   _4 = (sizetype) n.0;
>>>   _5 = (bitsizetype) _4;
>>>   _6 = _5 * 32;
>>>   D.1951 = _6;
>>>   _7 = (sizetype) n.0;
>>>   _8 = _7 * 4;
>>>   D.1952 = _8;
>>>   _9 = (sizetype) n.0;
>>>   _10 = (bitsizetype) _9;
>>>   _11 = _10 * 32;
>>>   D.1953 = _11;
>>>   _12 = (sizetype) n.0;
>>>   _13 = _12 * 4;
>>>   D.1954 = _13;
>>>   arr.1 = __builtin_alloca_with_align (D.1954, 32);
>>>   arr = .DEFERRED_INIT (D.1952, 2, 1);
>>>   _14 = (*arr.1)[2];
>>>   bar (_14);
>>>   return;
>>> }
>>> finally
>>> {
>>>   __builtin_stack_restore (saved_stack.2);
>>> }
>>> }
>>> 
>>> 
>>> 
>>> You think that the above .DEFEERED_INIT is not correct?
>>> It should be:
>>> 
>>> *arr.1 = .DEFERRED_INIT (D.1952. 2, 1);
>>> 
>>> ?
>> 
>> Yes.
>> 
> 
> I updated gimplify.c for VLA and now it emits the call to .DEFERRED_INIT 
> as:
> 
>arr.1 = __builtin_alloca_with_align (D.1954, 32);
>*arr.1 = .DEFERRED_INIT (D.1952, 2, 1);
> 
> However, this call triggered the assertion failure in verify_gimple_call 
> of tree-cfg.c because the LHS is not a valid LHS. 
> Then I modify tree-cfg.c as:
> 
> diff --git a/gcc/tree-cfg.c b/gcc/tree-cfg.c
> index 330eb7dd89bf..180d4f1f9e32 100644
> --- a/gcc/tree-cfg.c
> +++ b/gcc/tree-cfg.c
> @@ -3375,7 +3375,11 @@ verify_gimple_call (gcall *stmt)
>}
> 
> tree lhs = gimple_call_lhs (stmt);
> +  /* For .DEFERRED_INIT call, the LHS might be an indirection of
> + a pointer for the VLA variable, which is not a valid LHS of
> + a gimple call, we ignore the asssertion on this.  */ 
> if (lhs
> +  && (!gimple_call_internal_p (stmt, IFN_DEFERRED_INIT))
> && (!is_gimple_reg (lhs)
>&& (!is_gimple_lvalue (lhs)
>|| verify_types_in_gimple_reference
> 
> The assertion failure in tree-cfg.c got resolved, but I got another 
> assertion failure in operands_scanner::get_expr_operands (tree *expr_p, 
> int flags), line 945:
> 
> 939   /* If we get here, something has gone wrong.  */
> 940   if (flag_checking)
> 941 {
> 942   fprintf (stderr, "unhandled expression in 
> get_expr_operands():\n");
> 943   debug_tree (expr);
> 944   fputs ("\n", stderr);
> 945   gcc_unreachable ();
> 946 }
> 
> Looks like that  the gimple statement:
>  *arr.1 = .DEFERRED_INIT (D.1952, 2, 1);
> 
> Is not valid.  i.e, the LHS should not be an indirection to a pointer. 
> 
> How to resolve this issue?
>>> 
>>> It sounds like the LHS is an INDIRECT_REF maybe?  That means it's
>>> still not properly gimplified because it should end up as a MEM_REF
>>> instead.
>>> 
>>> But I'm just guessing here ... if you are in a debugger then you can
>>> invoke debug_tree (lhs) in the inferior to see what it exactly is
>>> at the point of the failure.
>> 
>> Yes, it’s an INDIRECT_REF at the point of the failure even though I added a 
>> 
>> gimplify_var_or_parm_decl  (lhs) 
> 
> I think the easiest is to build the .DEFERRED_INIT as GENERIC
> and use gimplify_assign () to gimplify and add the result
> to the sequence.  Thus,

Re: [PATCH] c++: Improve memory usage of subsumption [PR100828]

2021-08-11 Thread Jason Merrill via Gcc-patches


On 8/9/21 5:07 PM, Patrick Palka wrote:

On Wed, Jul 28, 2021 at 4:42 PM Jason Merrill  wrote:


On 7/19/21 6:05 PM, Patrick Palka wrote:

Constraint subsumption is implemented in two steps.  The first step
computes the disjunctive (or conjunctive) normal form of one of the
constraints, and the second step verifies that each clause in the
decomposed form implies the other constraint.   Performing these two
steps separately is problematic because in the first step the
disjunctive normal form can be exponentially larger than the original
constraint, and by computing it ahead of time we'd have to keep all of
it in memory.

This patch fixes this exponential blowup in memory usage by interleaving
these two steps, so that as soon as we decompose one clause we check
implication for it.  In turn, memory usage during subsumption is now
worst case linear in the size of the constraints rather than
exponential, and so we can safely remove the hard limit of 16 clauses
without introducing runaway memory usage on some inputs.  (Note the
_time_ complexity of subsumption is still exponential in the worst case.)

In order for this to work we need formula::branch to prepend the copy
of the current clause directly after the current clause rather than
at the end of the list, so that we fully decompose a clause shortly
after creating it.  Otherwise we'd end up accumulating exponentially
many (partially decomposed) clauses in memory anyway.

Bootstrapped and regtested on x86_64-pc-linux-gnu, and also tested on
range-v3 and cmcstl2.  Does this look OK for trunk and perhaps 11?


OK for trunk.


Thanks a lot, patch committed to trunk as r12-2658.  Since this low
complexity limit was introduced in GCC 10, what do you think about
increasing the limit from 16 to say 128 in the 10/11 release branches
as a relatively safe stopgap?


Now that 11.2 is out, go ahead and apply this patch to the 11 branch.

Won't a limit of 128 in GCC 10 lead to extremely long compile times for 
affected code?  Is that more desirable than an error?



   PR c++/100828

gcc/cp/ChangeLog:

   * logic.cc (formula::formula): Use emplace_back.
   (formula::branch): Insert a copy of m_current in front of
   m_current instead of at the end of the list.
   (formula::erase): Define.
   (decompose_formula): Remove.
   (decompose_antecedents): Remove.
   (decompose_consequents): Remove.
   (derive_proofs): Remove.
   (max_problem_size): Remove.
   (diagnose_constraint_size): Remove.
   (subsumes_constraints_nonnull): Rewrite directly in terms of
   decompose_clause and derive_proof, interleaving decomposition
   with implication checking.  Use formula::erase to free the
   current clause before moving on to the next one.
---
   gcc/cp/logic.cc | 118 ++--
   1 file changed, 35 insertions(+), 83 deletions(-)

diff --git a/gcc/cp/logic.cc b/gcc/cp/logic.cc
index 142457e408a..3f872c11fe2 100644
--- a/gcc/cp/logic.cc
+++ b/gcc/cp/logic.cc
@@ -223,9 +223,7 @@ struct formula

 formula (tree t)
 {
-/* This should call emplace_back(). There's an extra copy being
-   invoked by using push_back().  */
-m_clauses.push_back (t);
+m_clauses.emplace_back (t);
   m_current = m_clauses.begin ();
 }

@@ -248,8 +246,7 @@ struct formula
 clause& branch ()
 {
   gcc_assert (!done ());
-m_clauses.push_back (*m_current);
-return m_clauses.back ();
+return *m_clauses.insert (std::next (m_current), *m_current);
 }

 /* Returns the position of the current clause.  */
@@ -287,6 +284,14 @@ struct formula
   return m_clauses.end ();
 }

+  /* Remove the specified clause.  */
+
+  void erase (iterator i)
+  {
+gcc_assert (i != m_current);
+m_clauses.erase (i);
+  }
+
 std::list m_clauses; /* The list of clauses.  */
 iterator m_current; /* The current clause.  */
   };
@@ -659,39 +664,6 @@ decompose_clause (formula& f, clause& c, rules r)
 f.advance ();
   }

-/* Decompose the logical formula F according to the logical
-   rules determined by R.  The result is a formula containing
-   clauses that contain only atomic terms.  */
-
-void
-decompose_formula (formula& f, rules r)
-{
-  while (!f.done ())
-decompose_clause (f, *f.current (), r);
-}
-
-/* Fully decomposing T into a list of sequents, each comprised of
-   a list of atomic constraints, as if T were an antecedent.  */
-
-static formula
-decompose_antecedents (tree t)
-{
-  formula f (t);
-  decompose_formula (f, left);
-  return f;
-}
-
-/* Fully decomposing T into a list of sequents, each comprised of
-   a list of atomic constraints, as if T were a consequent.  */
-
-static formula
-decompose_consequents (tree t)
-{
-  formula f (t);
-  decompose_formula (f, right);
-  return f;
-}
-
   static bool derive_proof (clause&, tree, rules);

   /* Derive a proof of both operands of T.  */
@@ -744,28 +716,6 @@ derive_proof (clause& c, tree t,

Re: [patch][version 6] add -ftrivial-auto-var-init and variable attribute "uninitialized" to gcc

2021-08-11 Thread Qing Zhao via Gcc-patches



> On Aug 11, 2021, at 4:02 AM, Richard Sandiford  
> wrote:
> 
> Qing Zhao via Gcc-patches  writes:
>>> On Aug 10, 2021, at 3:16 PM, Qing Zhao via Gcc-patches 
>>>  wrote:
>>> 
>>> Hi, Richard,
>>> 
 On Aug 10, 2021, at 10:22 AM, Richard Biener  wrote:
>> 
>> Especially in the VLA case but likely also in general (though unlikely
>> since usually the receiver of initializations are simple enough).  I'd
>> expect the VLA case end up as
>> 
>> *ptr_to_decl = .DEFERRED_INIT (...);
>> 
>> where *ptr_to_decl is the DECL_VALUE_EXPR of the decl.
> 
> So, for the following small testing case:
> 
> 
> extern void bar (int);
> 
> void foo(int n)
> {
> int arr[n];
> bar (arr[2]);
> return;
> }
> =
> 
> If I compile it with -ftrivial-auto-var-init=zero -fdump-tree-gimple -S 
> -o auto-init-11.s -fdump-rtl-expand, the *.gimple dump is:
> 
> =
> void foo (int n)
> {
> int n.0;
> sizetype D.1950;
> bitsizetype D.1951;
> sizetype D.1952;
> bitsizetype D.1953;
> sizetype D.1954;
> int[0:D.1950] * arr.1;
> void * saved_stack.2;
> int arr[0:D.1950] [value-expr: *arr.1];
> 
> saved_stack.2 = __builtin_stack_save ();
> try
>  {
>n.0 = n;
>_1 = (long int) n.0;
>_2 = _1 + -1;
>_3 = (sizetype) _2;
>D.1950 = _3;
>_4 = (sizetype) n.0;
>_5 = (bitsizetype) _4;
>_6 = _5 * 32;
>D.1951 = _6;
>_7 = (sizetype) n.0;
>_8 = _7 * 4;
>D.1952 = _8;
>_9 = (sizetype) n.0;
>_10 = (bitsizetype) _9;
>_11 = _10 * 32;
>D.1953 = _11;
>_12 = (sizetype) n.0;
>_13 = _12 * 4;
>D.1954 = _13;
>arr.1 = __builtin_alloca_with_align (D.1954, 32);
>arr = .DEFERRED_INIT (D.1952, 2, 1);
>_14 = (*arr.1)[2];
>bar (_14);
>return;
>  }
> finally
>  {
>__builtin_stack_restore (saved_stack.2);
>  }
> }
> 
> 
> 
> You think that the above .DEFEERED_INIT is not correct?
> It should be:
> 
> *arr.1 = .DEFERRED_INIT (D.1952. 2, 1);
> 
> ?
 
 Yes.
 
>>> 
>>> I updated gimplify.c for VLA and now it emits the call to .DEFERRED_INIT as:
>>> 
>>> arr.1 = __builtin_alloca_with_align (D.1954, 32);
>>> *arr.1 = .DEFERRED_INIT (D.1952, 2, 1);
>>> 
>>> However, this call triggered the assertion failure in verify_gimple_call of 
>>> tree-cfg.c because the LHS is not a valid LHS. 
>>> Then I modify tree-cfg.c as:
>>> 
>>> diff --git a/gcc/tree-cfg.c b/gcc/tree-cfg.c
>>> index 330eb7dd89bf..180d4f1f9e32 100644
>>> --- a/gcc/tree-cfg.c
>>> +++ b/gcc/tree-cfg.c
>>> @@ -3375,7 +3375,11 @@ verify_gimple_call (gcall *stmt)
>>> }
>>> 
>>>  tree lhs = gimple_call_lhs (stmt);
>>> +  /* For .DEFERRED_INIT call, the LHS might be an indirection of
>>> + a pointer for the VLA variable, which is not a valid LHS of
>>> + a gimple call, we ignore the asssertion on this.  */ 
>>>  if (lhs
>>> +  && (!gimple_call_internal_p (stmt, IFN_DEFERRED_INIT))
>>>  && (!is_gimple_reg (lhs)
>>> && (!is_gimple_lvalue (lhs)
>>> || verify_types_in_gimple_reference
>>> 
>>> The assertion failure in tree-cfg.c got resolved, but I got another 
>>> assertion failure in operands_scanner::get_expr_operands (tree *expr_p, int 
>>> flags), line 945:
>>> 
>>> 939   /* If we get here, something has gone wrong.  */
>>> 940   if (flag_checking)
>>> 941 {
>>> 942   fprintf (stderr, "unhandled expression in 
>>> get_expr_operands():\n");
>>> 943   debug_tree (expr);
>>> 944   fputs ("\n", stderr);
>>> 945   gcc_unreachable ();
>>> 946 }
>>> 
>>> Looks like that  the gimple statement:
>>>   *arr.1 = .DEFERRED_INIT (D.1952, 2, 1);
>>> 
>>> Is not valid.  i.e, the LHS should not be an indirection to a pointer. 
>>> 
>>> How to resolve this issue?
>> 
>> I came up with the following solution:
>> 
>> Define the IFN_DEFERRED_INIT function as:
>> 
>>   LHS = DEFERRED_INIT (SIZE of the DECL, INIT_TYPE, IS_VLA);
>> 
>>   if IS_VLA is false, the LHS is the DECL itself,
>>   if IS_VLA is true, the LHS is the pointer to this DECL that created by
>>   gimplify_vla_decl.
>> 
>> 
>> The benefit of this solution are:
>> 
>> 1. Resolved the invalid IR issue;
>> 2. The call stmt carries the address of the VLA natually;
>> 
>> The issue with this solution is:
>> 
>> For VLA and non-VLA, the LHS will be different, 
>> 
>> Do you see any other potential issues with this solution?
> 
> The idea behind the DECL version of the .DEFERRED_INIT semantics was
> that .DEFERRED_INIT just returns a SIZE-byte value that the caller
> then assigns to a SIZE-byte lhs (with the caller choosing the lhs).
> .DEFEREED_INIT itself doesn't read or write memory and so can be const,
> which in turn allows alias analysis to be more precise.
Yes. That’s right.

Re: [PATCH] c++: Fix ICE on defaulted spaceship with pointer return type [PR94162]

2021-08-11 Thread Jason Merrill via Gcc-patches


On 8/10/21 4:39 AM, Jakub Jelinek wrote:

Hi!

The spaceship-synth-neg6.C testcase ICEs because we call cat_tag_for
on the explicit return type, but pointer types don't have
TYPE_LINKAGE_IDENTIFIER.  The patch fixes that.
Or should I be checking for if (!CLASS_TYPE_P (type)) return cc_last;
instead (are class type guaranteed to have TYPE_LINKAGE_IDENTIFIER?)?



I also wonder if after finding a match we shouldn't verify if is
a class type in std namespace (i.e. that
TYPE_NAME (TYPE_MAIN_VARIANT (type)) is a TYPE_DECL
and
decl_in_std_namespace_p (TYPE_NAME (TYPE_MAIN_VARIANT (type)))
because it seems nothing prevents it from returning non-cc_last say on
namespace N {
   struct partial_ordering {};
}
etc.


Checking CLASS_TYPE_P && decl_in_std_namespace (TYPE_MAIN_DECL) before 
looking at the name makes sense to me.



The g++.dg/cpp2a/spaceship-synth11.C testcase is from a PR that has been
fixed with r12-619-gfc178519771db508c03611cff4a1466cf67fce1d (but
not backported to 11).

Bootstrapped/regtested on x86_64-linux and i686-linux.

2021-08-10  Jakub Jelinek  

gcc/cp/
PR c++/94162
* method.c (cat_tag_for): Return cc_last for types with no
linkage identifier.
gcc/testsuite/
PR c++/99429
* g++.dg/cpp2a/spaceship-synth11.C: New test.

PR c++/94162
* g++.dg/cpp2a/spaceship-synth-neg6.C: New test.

--- gcc/cp/method.c.jj  2021-06-25 10:36:22.169019953 +0200
+++ gcc/cp/method.c 2021-08-09 12:26:38.590166006 +0200
@@ -1029,6 +1029,8 @@ is_cat (tree type, comp_cat_tag tag)
  static comp_cat_tag
  cat_tag_for (tree type)
  {
+  if (!TYPE_LINKAGE_IDENTIFIER (type))
+return cc_last;
for (int i = 0; i < cc_last; ++i)
  {
comp_cat_tag tag = (comp_cat_tag)i;
--- gcc/testsuite/g++.dg/cpp2a/spaceship-synth11.C.jj   2021-08-09 
12:28:58.748240310 +0200
+++ gcc/testsuite/g++.dg/cpp2a/spaceship-synth11.C  2021-08-09 
12:29:44.023618250 +0200
@@ -0,0 +1,29 @@
+// PR c++/99429
+// { dg-do compile { target c++20 } }
+
+namespace std {
+struct strong_ordering {
+  int _v;
+  constexpr strong_ordering (int v) :_v(v) {}
+  constexpr operator int (void) const { return _v; }
+  static const strong_ordering less;
+  static const strong_ordering equal;
+  static const strong_ordering greater;
+};
+constexpr strong_ordering strong_ordering::less = -1;
+constexpr strong_ordering strong_ordering::equal = 0;
+constexpr strong_ordering strong_ordering::greater = 1;
+}
+
+template 
+struct duration {
+  static constexpr const long period = N;
+  constexpr duration (void) = default;
+  constexpr duration (const duration& d) = default;
+  constexpr bool operator== (const duration& d) const = default;
+  constexpr bool operator<=> (const duration& d) const = default;
+  long _d;
+};
+
+using nanoseconds = duration<1>;
+using microseconds = duration;
--- gcc/testsuite/g++.dg/cpp2a/spaceship-synth-neg6.C.jj2021-08-09 
12:31:47.411922957 +0200
+++ gcc/testsuite/g++.dg/cpp2a/spaceship-synth-neg6.C   2021-08-09 
12:35:26.995906403 +0200
@@ -0,0 +1,11 @@
+// PR c++/94162
+// { dg-do compile { target c++20 } }
+
+#include 
+
+struct S {
+  int a;   // { dg-error "three-way comparison of 'S::a' has 
type 'std::strong_ordering', which does not convert to 'int\\*'" }
+  int *operator<=>(const S&) const = default;
+};
+
+bool b = S{} < S{}; // { dg-error "use of deleted function 'constexpr int\\* 
S::operator<=>\\\(const S&\\\) const'" }

Jakub

Re: [patch][version 6] add -ftrivial-auto-var-init and variable attribute "uninitialized" to gcc

2021-08-11 Thread Richard Biener via Gcc-patches

On Wed, 11 Aug 2021, Qing Zhao wrote:

> 
> 
> > On Aug 11, 2021, at 2:02 AM, Richard Biener  wrote:
> > 
> > On Tue, 10 Aug 2021, Qing Zhao wrote:
> > 
> >> 
> >> 
> >>> On Aug 10, 2021, at 3:16 PM, Qing Zhao via Gcc-patches 
> >>>  wrote:
> >>> 
> >>> Hi, Richard,
> >>> 
>  On Aug 10, 2021, at 10:22 AM, Richard Biener  wrote:
> >> 
> >> Especially in the VLA case but likely also in general (though unlikely
> >> since usually the receiver of initializations are simple enough).  I'd
> >> expect the VLA case end up as
> >> 
> >> *ptr_to_decl = .DEFERRED_INIT (...);
> >> 
> >> where *ptr_to_decl is the DECL_VALUE_EXPR of the decl.
> > 
> > So, for the following small testing case:
> > 
> > 
> > extern void bar (int);
> > 
> > void foo(int n)
> > {
> > int arr[n];
> > bar (arr[2]);
> > return;
> > }
> > =
> > 
> > If I compile it with -ftrivial-auto-var-init=zero -fdump-tree-gimple -S 
> > -o auto-init-11.s -fdump-rtl-expand, the *.gimple dump is:
> > 
> > =
> > void foo (int n)
> > {
> > int n.0;
> > sizetype D.1950;
> > bitsizetype D.1951;
> > sizetype D.1952;
> > bitsizetype D.1953;
> > sizetype D.1954;
> > int[0:D.1950] * arr.1;
> > void * saved_stack.2;
> > int arr[0:D.1950] [value-expr: *arr.1];
> > 
> > saved_stack.2 = __builtin_stack_save ();
> > try
> >  {
> >n.0 = n;
> >_1 = (long int) n.0;
> >_2 = _1 + -1;
> >_3 = (sizetype) _2;
> >D.1950 = _3;
> >_4 = (sizetype) n.0;
> >_5 = (bitsizetype) _4;
> >_6 = _5 * 32;
> >D.1951 = _6;
> >_7 = (sizetype) n.0;
> >_8 = _7 * 4;
> >D.1952 = _8;
> >_9 = (sizetype) n.0;
> >_10 = (bitsizetype) _9;
> >_11 = _10 * 32;
> >D.1953 = _11;
> >_12 = (sizetype) n.0;
> >_13 = _12 * 4;
> >D.1954 = _13;
> >arr.1 = __builtin_alloca_with_align (D.1954, 32);
> >arr = .DEFERRED_INIT (D.1952, 2, 1);
> >_14 = (*arr.1)[2];
> >bar (_14);
> >return;
> >  }
> > finally
> >  {
> >__builtin_stack_restore (saved_stack.2);
> >  }
> > }
> > 
> > 
> > 
> > You think that the above .DEFEERED_INIT is not correct?
> > It should be:
> > 
> > *arr.1 = .DEFERRED_INIT (D.1952. 2, 1);
> > 
> > ?
>  
>  Yes.
>  
> >>> 
> >>> I updated gimplify.c for VLA and now it emits the call to .DEFERRED_INIT 
> >>> as:
> >>> 
> >>> arr.1 = __builtin_alloca_with_align (D.1954, 32);
> >>> *arr.1 = .DEFERRED_INIT (D.1952, 2, 1);
> >>> 
> >>> However, this call triggered the assertion failure in verify_gimple_call 
> >>> of tree-cfg.c because the LHS is not a valid LHS. 
> >>> Then I modify tree-cfg.c as:
> >>> 
> >>> diff --git a/gcc/tree-cfg.c b/gcc/tree-cfg.c
> >>> index 330eb7dd89bf..180d4f1f9e32 100644
> >>> --- a/gcc/tree-cfg.c
> >>> +++ b/gcc/tree-cfg.c
> >>> @@ -3375,7 +3375,11 @@ verify_gimple_call (gcall *stmt)
> >>> }
> >>> 
> >>>  tree lhs = gimple_call_lhs (stmt);
> >>> +  /* For .DEFERRED_INIT call, the LHS might be an indirection of
> >>> + a pointer for the VLA variable, which is not a valid LHS of
> >>> + a gimple call, we ignore the asssertion on this.  */ 
> >>>  if (lhs
> >>> +  && (!gimple_call_internal_p (stmt, IFN_DEFERRED_INIT))
> >>>  && (!is_gimple_reg (lhs)
> >>> && (!is_gimple_lvalue (lhs)
> >>> || verify_types_in_gimple_reference
> >>> 
> >>> The assertion failure in tree-cfg.c got resolved, but I got another 
> >>> assertion failure in operands_scanner::get_expr_operands (tree *expr_p, 
> >>> int flags), line 945:
> >>> 
> >>> 939   /* If we get here, something has gone wrong.  */
> >>> 940   if (flag_checking)
> >>> 941 {
> >>> 942   fprintf (stderr, "unhandled expression in 
> >>> get_expr_operands():\n");
> >>> 943   debug_tree (expr);
> >>> 944   fputs ("\n", stderr);
> >>> 945   gcc_unreachable ();
> >>> 946 }
> >>> 
> >>> Looks like that  the gimple statement:
> >>>   *arr.1 = .DEFERRED_INIT (D.1952, 2, 1);
> >>> 
> >>> Is not valid.  i.e, the LHS should not be an indirection to a pointer. 
> >>> 
> >>> How to resolve this issue?
> > 
> > It sounds like the LHS is an INDIRECT_REF maybe?  That means it's
> > still not properly gimplified because it should end up as a MEM_REF
> > instead.
> > 
> > But I'm just guessing here ... if you are in a debugger then you can
> > invoke debug_tree (lhs) in the inferior to see what it exactly is
> > at the point of the failure.
> 
> Yes, it’s an INDIRECT_REF at the point of the failure even though I added a 
> 
> gimplify_var_or_parm_decl  (lhs) 

I think the easiest is to build the .DEFERRED_INIT as GENERIC
and use gimplify_assign () to gimplify and add the result
to the sequence.  Thus, build a GENERIC CALL_EXPR and then

Small tweak to expand_used_vars

2021-08-11 Thread Eric Botcazou

This completes the replacement of DECL_ATTRIBUTES (current_function_decl) with 
the attribs local variable.

Tested on x86-64/Linux, applied on the mainline as obvious.


2021-08-11  Eric Botcazuo  

* cfgexpand.c (expand_used_vars): Reuse attribs local variable.

-- 
Eric Botcazoudiff --git a/gcc/cfgexpand.c b/gcc/cfgexpand.c
index 818328071db..03260b019e5 100644
--- a/gcc/cfgexpand.c
+++ b/gcc/cfgexpand.c
@@ -2294,22 +2294,19 @@ expand_used_vars (bitmap forced_stack_vars)
 	if (gen_stack_protect_signal
 	|| cfun->calls_alloca
 	|| has_protected_decls
-	|| lookup_attribute ("stack_protect",
- DECL_ATTRIBUTES (current_function_decl)))
+	|| lookup_attribute ("stack_protect", attribs))
 	  create_stack_guard ();
 	break;
 
   case SPCT_FLAG_DEFAULT:
 	if (cfun->calls_alloca
 	|| has_protected_decls
-	|| lookup_attribute ("stack_protect",
- DECL_ATTRIBUTES (current_function_decl)))
+	|| lookup_attribute ("stack_protect", attribs))
 	  create_stack_guard ();
 	break;
 
   case SPCT_FLAG_EXPLICIT:
-	if (lookup_attribute ("stack_protect",
-			  DECL_ATTRIBUTES (current_function_decl)))
+	if (lookup_attribute ("stack_protect", attribs))
 	  create_stack_guard ();
 	break;

Re: [patch][version 6] add -ftrivial-auto-var-init and variable attribute "uninitialized" to gcc

2021-08-11 Thread Qing Zhao via Gcc-patches



> On Aug 11, 2021, at 2:02 AM, Richard Biener  wrote:
> 
> On Tue, 10 Aug 2021, Qing Zhao wrote:
> 
>> 
>> 
>>> On Aug 10, 2021, at 3:16 PM, Qing Zhao via Gcc-patches 
>>>  wrote:
>>> 
>>> Hi, Richard,
>>> 
 On Aug 10, 2021, at 10:22 AM, Richard Biener  wrote:
>> 
>> Especially in the VLA case but likely also in general (though unlikely
>> since usually the receiver of initializations are simple enough).  I'd
>> expect the VLA case end up as
>> 
>> *ptr_to_decl = .DEFERRED_INIT (...);
>> 
>> where *ptr_to_decl is the DECL_VALUE_EXPR of the decl.
> 
> So, for the following small testing case:
> 
> 
> extern void bar (int);
> 
> void foo(int n)
> {
> int arr[n];
> bar (arr[2]);
> return;
> }
> =
> 
> If I compile it with -ftrivial-auto-var-init=zero -fdump-tree-gimple -S 
> -o auto-init-11.s -fdump-rtl-expand, the *.gimple dump is:
> 
> =
> void foo (int n)
> {
> int n.0;
> sizetype D.1950;
> bitsizetype D.1951;
> sizetype D.1952;
> bitsizetype D.1953;
> sizetype D.1954;
> int[0:D.1950] * arr.1;
> void * saved_stack.2;
> int arr[0:D.1950] [value-expr: *arr.1];
> 
> saved_stack.2 = __builtin_stack_save ();
> try
>  {
>n.0 = n;
>_1 = (long int) n.0;
>_2 = _1 + -1;
>_3 = (sizetype) _2;
>D.1950 = _3;
>_4 = (sizetype) n.0;
>_5 = (bitsizetype) _4;
>_6 = _5 * 32;
>D.1951 = _6;
>_7 = (sizetype) n.0;
>_8 = _7 * 4;
>D.1952 = _8;
>_9 = (sizetype) n.0;
>_10 = (bitsizetype) _9;
>_11 = _10 * 32;
>D.1953 = _11;
>_12 = (sizetype) n.0;
>_13 = _12 * 4;
>D.1954 = _13;
>arr.1 = __builtin_alloca_with_align (D.1954, 32);
>arr = .DEFERRED_INIT (D.1952, 2, 1);
>_14 = (*arr.1)[2];
>bar (_14);
>return;
>  }
> finally
>  {
>__builtin_stack_restore (saved_stack.2);
>  }
> }
> 
> 
> 
> You think that the above .DEFEERED_INIT is not correct?
> It should be:
> 
> *arr.1 = .DEFERRED_INIT (D.1952. 2, 1);
> 
> ?
 
 Yes.
 
>>> 
>>> I updated gimplify.c for VLA and now it emits the call to .DEFERRED_INIT as:
>>> 
>>> arr.1 = __builtin_alloca_with_align (D.1954, 32);
>>> *arr.1 = .DEFERRED_INIT (D.1952, 2, 1);
>>> 
>>> However, this call triggered the assertion failure in verify_gimple_call of 
>>> tree-cfg.c because the LHS is not a valid LHS. 
>>> Then I modify tree-cfg.c as:
>>> 
>>> diff --git a/gcc/tree-cfg.c b/gcc/tree-cfg.c
>>> index 330eb7dd89bf..180d4f1f9e32 100644
>>> --- a/gcc/tree-cfg.c
>>> +++ b/gcc/tree-cfg.c
>>> @@ -3375,7 +3375,11 @@ verify_gimple_call (gcall *stmt)
>>> }
>>> 
>>>  tree lhs = gimple_call_lhs (stmt);
>>> +  /* For .DEFERRED_INIT call, the LHS might be an indirection of
>>> + a pointer for the VLA variable, which is not a valid LHS of
>>> + a gimple call, we ignore the asssertion on this.  */ 
>>>  if (lhs
>>> +  && (!gimple_call_internal_p (stmt, IFN_DEFERRED_INIT))
>>>  && (!is_gimple_reg (lhs)
>>> && (!is_gimple_lvalue (lhs)
>>> || verify_types_in_gimple_reference
>>> 
>>> The assertion failure in tree-cfg.c got resolved, but I got another 
>>> assertion failure in operands_scanner::get_expr_operands (tree *expr_p, int 
>>> flags), line 945:
>>> 
>>> 939   /* If we get here, something has gone wrong.  */
>>> 940   if (flag_checking)
>>> 941 {
>>> 942   fprintf (stderr, "unhandled expression in 
>>> get_expr_operands():\n");
>>> 943   debug_tree (expr);
>>> 944   fputs ("\n", stderr);
>>> 945   gcc_unreachable ();
>>> 946 }
>>> 
>>> Looks like that  the gimple statement:
>>>   *arr.1 = .DEFERRED_INIT (D.1952, 2, 1);
>>> 
>>> Is not valid.  i.e, the LHS should not be an indirection to a pointer. 
>>> 
>>> How to resolve this issue?
> 
> It sounds like the LHS is an INDIRECT_REF maybe?  That means it's
> still not properly gimplified because it should end up as a MEM_REF
> instead.
> 
> But I'm just guessing here ... if you are in a debugger then you can
> invoke debug_tree (lhs) in the inferior to see what it exactly is
> at the point of the failure.

Yes, it’s an INDIRECT_REF at the point of the failure even though I added a 

gimplify_var_or_parm_decl  (lhs) 

Qing

> 
>> I came up with the following solution:
>> 
>> Define the IFN_DEFERRED_INIT function as:
>> 
>>   LHS = DEFERRED_INIT (SIZE of the DECL, INIT_TYPE, IS_VLA);
>> 
>>   if IS_VLA is false, the LHS is the DECL itself,
>>   if IS_VLA is true, the LHS is the pointer to this DECL that created by
>>   gimplify_vla_decl.
>> 
>> 
>> The benefit of this solution are:
>> 
>> 1. Resolved the invalid IR issue;
>> 2. The call stmt carries the address of the VLA natually;
>> 
>> The issue with this solution is:
>> 
>> For VLA and non-VLA, the

Re: [PATCH] Do not use tuple-like interface for pair in unordered containers

2021-08-11 Thread François Dumont via Gcc-patches


Hi

    Sorry for the delay, I had just miss this message.

    I think you are clearly more expert than me for the changes you 
propose. I had a look at the patch and it seems just fine as it keeps 
the forwarding as expected. Nice simplification in 
_NodeBuilder<_Select1st>, we indeed only need to deal with std::pair 
type in this case.


François


On 26/07/21 7:25 pm, Jonathan Wakely wrote:

On 23/07/21 19:21 +0100, Jonathan Wakely wrote:

I've been experimenting with this patch, which removes the need to use
std::tuple_element and std::get to access the members of a std::pair
in unordered_{map,multimap}.

I'm in the process of refactoring the  header to reduce
header dependencies throughout the library, and this is the only use
of the tuple-like interface for std::pair in the library.

Using tuple_element and std::get resolved PR 53339 by allowing the
std::pair type to be incomplete, however that is no longer supported
anyway (the 23_containers/unordered_map/requirements/53339.cc test
case is XFAILed). That means we could just define _Select1st as:

 struct _Select1st
 {
   template
 auto
 operator()(_Tp&& __x) const noexcept
 -> decltype(std::forward<_Tp>(__x).first)
 { return std::forward<_Tp>(__x).first; }
 };

But the approach in the patch seems OK too.


Actually I have a fix for PR 53339 so that we can support incomplete
types again. So we don't want to access the .first member in the
return type, as that requires a complete type.

Re: ipa-modref: merge flags when adding escape

2021-08-11 Thread Jan Hubicka

> While working on some function splitting changes, I've got a
> miscompilation in stagefeedback that I've tracked down to a
> complicated scenario:
> 
> - ipa-modref miscomputes a function parameter as having EAF_DIRECT,
>   because it's dereferenced and passed on to another function, but
>   add_escape_point does not update the flags for the dereferenced
>   SSA_NAME passed as a parameter, and the EAF_UNUSED in the value that
>   first initializes it, that remains unchanged throughout, causes
>   deref_flags to set EAF_DIRECT, among other flags.
> 
> - structalias, seeing the EAF_DIRECT in the parameter for that
>   function, refrains from mak[ing]_transitive_closure_constraints for
>   a pointer passed in that parameter.
> 
> - tree dse2 concludes the initializer of the pointed-to variable is a
>   dead store and removes it.
> 
> The test depends on gimple passes's processing of functions in a
> certain order to expose parm flag miscomputed by ipa-modref.  A
> different order may enable the non-ipa modref2 pass to compute flags
> differently and avoid the problem.
> 
> I've arranged for add_escape_point to merge flags, as the non-ipa path
> does.  I've also caught and fixed an error in the dumping of escaping
> flags.
> 
> The problem affects at least trunk and gcc-11.  I've so far bootstrapped
> GCC 11, and I'm now regstrapping trunk.  Ok to install if it passes?
> 
> 
> for  gcc/ChangeLog
> 
>   * ipa-modref.c (modref_lattice::add_escape_point): Merge
>   min_flags into flags.
>   (modref_lattice::dump): Fix escape_point's min_flags dumping.
> 
> for  gcc/testsuite/ChangeLog
> 
>   * c-c++-common/modref-dse.c: New.

Hi,
thank you for looking into the bug and sorry for taking so long to
respond.  The fix you propose is bit too generous, since it essentially
disable IPA bits of the ipa-modref (it will resort to worst case
solution w/o any IPA propagation).  

In IPA mode the proper flags are supposed to be determined by
propagation via "escape points".  The bug is bit subtle caused by
optimization that avoids recording flags for escape points where
we know that we do not care.  This is tested by comparing min_flags
(which is the known conservative estimation used by local analysis) with
flags of the value being determined.  If these flags are subset of
min_flags there is nothing to gain.

While merging lattices there is case where direct escape becomes
indirect and in this case I forgot to update min_flags to dereferenced
version which in turn makes the escape point to be skipped.

This is improved patch I have bootstrapped/regtested x86_64-linux and I
am collecting stats for (it should have minimal effect on overal
effectivity of modref).

Honza

gcc/ChangeLog:

2021-08-11  Jan Hubicka  
Alexandre Oliva  

* ipa-modref.c (modref_lattice::dump): Fix escape_point's min_flags
dumping.
(modref_lattice::merge_deref): Fix handling of indirect scape points.
(update_escape_summary_1): Likewise.
(update_escape_summary): Likewise.
(ipa_merge_modref_summary_after_inlining): Likewise.

gcc/testsuite/ChangeLog:

2021-08-11  Alexandre Oliva  

* c-c++-common/modref-dse.c: New test.

diff --git a/gcc/ipa-modref.c b/gcc/ipa-modref.c
index ef5e62beb0e..dccaf658720 100644
--- a/gcc/ipa-modref.c
+++ b/gcc/ipa-modref.c
@@ -1392,7 +1392,7 @@ modref_lattice::dump (FILE *out, int indent) const
  fprintf (out, "%*s  Arg %i (%s) min flags", indent, "",
   escape_points[i].arg,
   escape_points[i].direct ? "direct" : "indirect");
- dump_eaf_flags (out, flags, false);
+ dump_eaf_flags (out, escape_points[i].min_flags, false);
  fprintf (out, " in call ");
  print_gimple_stmt (out, escape_points[i].call, 0);
}
@@ -1489,10 +1489,18 @@ modref_lattice::merge_deref (const modref_lattice 
, bool ignore_stores)
   if (!flags)
 return changed;
   for (unsigned int i = 0; i < with.escape_points.length (); i++)
-changed |= add_escape_point (with.escape_points[i].call,
-with.escape_points[i].arg,
-with.escape_points[i].min_flags,
-false);
+{
+  int min_flags = with.escape_points[i].min_flags;
+
+  if (with.escape_points[i].direct)
+   min_flags = deref_flags (min_flags, ignore_stores);
+  else if (ignore_stores)
+   min_flags |= EAF_NOCLOBBER | EAF_NOESCAPE | EAF_NODIRECTESCAPE;
+  changed |= add_escape_point (with.escape_points[i].call,
+  with.escape_points[i].arg,
+  min_flags,
+  false);
+}
   return changed;
 }
 
@@ -2992,7 +3000,8 @@ struct escape_map
 
 static void
 update_escape_summary_1 (cgraph_edge *e,
-vec > )
+vec > ,
+bool ignore_stores)
 {
   escape_summary *sum =

Re: [Patch v3 Fortran] Fix c_float128 and c_float128_complex on targets with 128-bit long double.

2021-08-11 Thread Segher Boessenkool

Hi!

On Wed, Aug 11, 2021 at 12:29:06PM +0100, Iain Sandoe wrote:
> > On 11 Aug 2021, at 11:55, Segher Boessenkool  
> > wrote:
> > On Tue, Aug 10, 2021 at 04:46:11PM -0600, Sandra Loosemore wrote:
> >> OK.  I used your wording verbatim for the first one.  For the second 
> >> one, I'm still pretty confused as I think it is at least theoretically 
> >> possible on PowerPC to have a target with 64-bit long double (AIX?) that 
> > 
> > Some embedded and embedded-like subtargets use 64-bit long double by
> > default.  You can also configure this on any Power target (not that it
> > will necessarily work ;-) )
> > 
> > I don't know if any subtarget with default 64-bit long double supports
> > Fortran.
> 
> I realize that this is not directly relevant to unscrambling the PPC 128bit 
> stuff,
> but aarch64-apple-darwin2x has only 64b long double and supports gfortran.
> (on both the new M1 desktop macOS and embedded iOS)

Yes, but aarch64-apple-darwin2x is not an rs6000 subtarget :-)  There
certainly are many targets with a 64b long double.

>  - it is not clear to me yet if there will at some point be a transition to a 
> 128b
>long double for at least the desktop version.

Yeah.  I recommend any new target (or target for which this is new) to
use an IEEE QP float as long double, even if just as soft float -- the
advantages are just too great.

> So the permutation definitely exists for at least one non-legacy, non-embedded
> platform (and gfortran is very much in demand from the new M1 users).

M1 is not embedded?  :-)


Segher

Re: [PATCH] aix: handle 64bit inodes for include directories

2021-08-11 Thread CHIGOT, CLEMENT via Gcc-patches

Hi guys,

Did you have a chance to take a look at the new version of this patch ?

Thanks,
Clément

From: CHIGOT, CLEMENT 
Sent: Monday, June 28, 2021 9:16 AM
To: Jeff Law ; David Malcolm 
Cc: gcc-patches@gcc.gnu.org ; David Edelsohn 

Subject: Re: [PATCH] aix: handle 64bit inodes for include directories

>On 6/23/2021 12:53 AM, CHIGOT, CLEMENT via Gcc-patches wrote:
>> Hi David,
>>
>> Did you have a chance to take look at this patch ?
>>
>> Thanks,
>> Clément
>>
>>
>>> +DavidMalcolm
>>>
>>> Can you review this patch when you have a moment?
>>>
>>> Thanks, David
>>>
>>> On Mon, May 17, 2021 at 3:05 PM David Edelsohn  wrote:
 The aix.h change is okay with me, but you need to get approval for the
 incpath.c and cpplib.h parts of the patch from the appropriate
 maintainers.

 Thanks, David

 On Mon, May 17, 2021 at 7:44 AM CHIGOT, CLEMENT  
 wrote:
> On AIX, stat will store inodes in 32bit even when using LARGE_FILES.
> If the inode is larger, it will return -1 in st_ino.
> Thus, in incpath.c when comparing include directories, if several
> of them have 64bit inodes, they will be considered as duplicated.
>
> gcc/ChangeLog:
> 2021-05-06  Clément Chigot  
>
>  * configure.ac: Check sizeof ino_t and dev_t.
>  * config.in: Regenerate.
>  * configure: Regenerate.
>  * config/rs6000/aix.h (HOST_STAT_FOR_64BIT_INODES): New define.
>  * incpath.c (HOST_STAT_FOR_64BIT_INODES): New define.
>  (remove_duplicates): Use it.
>
> libcpp/ChangeLog:
> 2021-05-06  Clément Chigot  
>
>  * configure.ac: Check sizeof ino_t and dev_t.
>  * config.in: Regenerate.
>  * configure: Regenerate.
>  * include/cpplib.h (INO_T_CPP): Change for AIX.
>  (DEV_T_CPP): New macro.
>  (struct cpp_dir): Use it.
> So my worry here is this is really a host property -- ie, this is
> behavior of where GCC runs, not the target for which GCC is generating code.
>
> That implies that the change in aix.h is wrong.  aix.h is for the
> target, not the host -- you don't want to define something like
> HOST_STAT_FOR_64BIT_INODES there.
>
> You'd want to be triggering this behavior via a host fragment, x-aix, or
> better yet via an autoconf test.

Indeed, would this version be better ? I'm not sure about the configure test.
But as we are retrieving the size of dev_t and ino_t just above, I'm assuming
that the one being used in stat directly. At least, that's the case on AIX, and
this test is only made for AIX.

Clément

Re: [Patch v3 Fortran] Fix c_float128 and c_float128_complex on targets with 128-bit long double.

2021-08-11 Thread Iain Sandoe via Gcc-patches

Hi Folks

> On 11 Aug 2021, at 11:55, Segher Boessenkool  
> wrote:

> On Tue, Aug 10, 2021 at 04:46:11PM -0600, Sandra Loosemore wrote:
>> OK.  I used your wording verbatim for the first one.  For the second 
>> one, I'm still pretty confused as I think it is at least theoretically 
>> possible on PowerPC to have a target with 64-bit long double (AIX?) that 
> 
> Some embedded and embedded-like subtargets use 64-bit long double by
> default.  You can also configure this on any Power target (not that it
> will necessarily work ;-) )
> 
> I don't know if any subtarget with default 64-bit long double supports
> Fortran.

I realize that this is not directly relevant to unscrambling the PPC 128bit 
stuff,
but aarch64-apple-darwin2x has only 64b long double and supports gfortran.
(on both the new M1 desktop macOS and embedded iOS)

 - it is not clear to me yet if there will at some point be a transition to a 
128b
   long double for at least the desktop version.

So the permutation definitely exists for at least one non-legacy, non-embedded
platform (and gfortran is very much in demand from the new M1 users).

Iain

>> also supports the __ibm128 format, and it would be wrong to assume that 
>> *any* 128-bit mode that's not long double is IEEE.
> 
> Absolutely.  Modes are not types, and types are not modes.  There are
> 128-bit floating point modes that are not IEEE, there are that are, and
> either can be used for long double, or neither.
> 
> 
> Segher

Re: [PATCH] Extend ldexp{s, d}f3 to vscalefs{s, d} when TARGET_AVX512F and TARGET_SSE_MATH.

2021-08-11 Thread Uros Bizjak via Gcc-patches

On Wed, Aug 11, 2021 at 8:36 AM Uros Bizjak  wrote:
>
> On Tue, Aug 10, 2021 at 2:13 PM liuhongt  wrote:
> >
> > Hi:
> >   AVX512F supported vscalefs{s,d} which is the same as ldexp except the 
> > second operand should be floating point.
> >   Bootstrapped and regtested on x86_64-linux-gnu{-m32,}.
> >
> > gcc/ChangeLog:
> >
> > PR target/98309
> > * config/i386/i386.md (ldexp3): Extend to vscalefs[sd]
> > when TARGET_AVX512F and TARGET_SSE_MATH.
> >
> > gcc/testsuite/ChangeLog:
> >
> > PR target/98309
> > * gcc.target/i386/pr98309-1.c: New test.
> > * gcc.target/i386/pr98309-2.c: New test.
>
> OK.

Actually, we should introduce a scalar version of avx512f_vmscalef, so
we can avoid all subreg conversions with the vector-merge (VM)
version, and will also allow memory in operand 2.

Please test the attached incremental patch.

Uros.
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 56b09c566ed..4a8e8fea290 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -125,6 +125,9 @@
   UNSPEC_RSQRT
   UNSPEC_PSADBW
 
+  ;; For AVX512F support
+  UNSPEC_SCALEF
+
   ;; Generic math support
   UNSPEC_COPYSIGN
   UNSPEC_XORSIGN
@@ -17894,6 +17897,17 @@
   DONE;
 })
 
+(define_insn "avx512f_scalef2"
+  [(set (match_operand:MODEF 0 "register_operand" "=v")
+   (unspec:MODEF
+ [(match_operand:MODEF 1 "register_operand" "v")
+  (match_operand:MODEF 2 "nonimmediate_operand" "vm")]
+ UNSPEC_SCALEF))]
+  "TARGET_AVX512F"
+  "vscalef\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "prefix" "evex")
+   (set_attr "mode"  "")])
+
 (define_expand "ldexpxf3"
   [(match_operand:XF 0 "register_operand")
(match_operand:XF 1 "register_operand")
@@ -17924,15 +17938,12 @@
   if (TARGET_AVX512F && TARGET_SSE_MATH)
{
  rtx op2 = gen_reg_rtx (mode);
- emit_insn (gen_floatsi2 (op2, operands[2]));
- operands[0] = lowpart_subreg (mode, operands[0], mode);
- if (MEM_P (operands[1]))
+
+ if (!nonimmediate_operand (operands[1], mode))
operands[1] = force_reg (mode, operands[1]);
- operands[1] = lowpart_subreg (mode, operands[1], mode);
- op2 = lowpart_subreg (mode, op2, mode);
- emit_insn (gen_avx512f_vmscalef (operands[0],
-  operands[1],
-  op2));
+
+ emit_insn (gen_floatsi2 (op2, operands[2]));
+ emit_insn (gen_avx512f_scalef2 (operands[0], operands[1], op2));
}
   else
 {
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 3957c86c3df..9233dfc6150 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -92,7 +92,6 @@
   UNSPEC_RCP14
   UNSPEC_RSQRT14
   UNSPEC_FIXUPIMM
-  UNSPEC_SCALEF
   UNSPEC_VTERNLOG
   UNSPEC_GETEXP
   UNSPEC_GETMANT

Re: [Patch v3 Fortran] Fix c_float128 and c_float128_complex on targets with 128-bit long double.

2021-08-11 Thread Segher Boessenkool

Hi!

On Tue, Aug 10, 2021 at 04:46:11PM -0600, Sandra Loosemore wrote:
> OK.  I used your wording verbatim for the first one.  For the second 
> one, I'm still pretty confused as I think it is at least theoretically 
> possible on PowerPC to have a target with 64-bit long double (AIX?) that 

Some embedded and embedded-like subtargets use 64-bit long double by
default.  You can also configure this on any Power target (not that it
will necessarily work ;-) )

I don't know if any subtarget with default 64-bit long double supports
Fortran.

> also supports the __ibm128 format, and it would be wrong to assume that 
> *any* 128-bit mode that's not long double is IEEE.

Absolutely.  Modes are not types, and types are not modes.  There are
128-bit floating point modes that are not IEEE, there are that are, and
either can be used for long double, or neither.

Segher

Re: [PATCH] Adding target hook allows to reject initialization of register

2021-08-11 Thread Richard Biener via Gcc-patches

On Wed, Aug 11, 2021 at 11:28 AM Richard Sandiford
 wrote:
>
> Richard Biener  writes:
> > On Tue, Aug 10, 2021 at 10:33 AM Jojo R via Gcc-patches
> >  wrote:
> >>
> >> Some target like RISC-V allow to group vector register as a whole,
> >> and only operate part of it in fact, but the 'init-regs' pass will add 
> >> initialization
> >> for uninitialized registers. Add this hook to reject this action for 
> >> reducing instruction.
> >
> > Are these groups "visible"?  That is, are the pseudos multi-reg
> > pseudos?  I wonder
> > if there's a more generic way to tame down initregs w/o introducing a new 
> > target
> > hook.
> >
> > Btw, initregs is a red herring - it ideally should go away.  See PR61810.
> >
> > So instead of adding to it can you see whether disabling the pass for RISC-V
> > works w/o fallout (and add a comment to the PR)?  Maybe some more RTL
> > literate (in particular DF literate) can look at the remaining issue.
> > Richard, did you
> > ever have a look into the "issue" that initregs covers up (whatever
> > that exactly is)?
>
> No, sorry.  I don't really understand what it would be from the comment
> in the code:
>
>[...] papers over some problems on the arm and other
>processors where certain isa constraints cannot be handled by gcc.
>These are of the form where two operands to an insn my not be the
>same.  The ra will only make them the same if they do not
>interfere, and this can only happen if one is not initialized.
>
> That would definitely be an RA bug if true, since the constraints need
> to be applied independently of dataflow information.  But the comment
> and code predate LRA and maybe no-one fancied poking around in reload
> (hard to believe).
>
> I'd be very surprised if LRA gets this wrong.

OK, we're wondering since quite some time - how about changing the
gate of initregs to optimize > 0 && !targetm.lra_p ()?  We'll hopefully
figure out the "real" issue the pass is papering over.  At the same time
we're leaving old reload (and likely unmaintianed) targets unaffected.

Richard.

> Thanks,
> Richard

Re: [PATCH 05/34] rs6000: Add available-everywhere and ancient builtins

2021-08-11 Thread Segher Boessenkool

On Tue, Aug 10, 2021 at 04:29:10PM -0500, Bill Schmidt wrote:
> On 8/10/21 12:34 PM, Segher Boessenkool wrote:
> >On Tue, Aug 10, 2021 at 11:17:05AM -0500, will schmidt wrote:
> >>On Thu, 2021-07-29 at 08:30 -0500, Bill Schmidt wrote:
> >>>+; This will break for long double == _Float128.  libgcc history.
> >>>+  const long double __builtin_pack_longdouble (double, double);
> >>>+PACK_TF packtf {}
> >>Add a few more words to provide bigger hints for future archeological
> >>digs?  (This is perhaps an obvious issue, but I'd need to do some
> >>spelunking)
> >It is for __ibm128 only, not for other long double formats (we have
> >three: plain double, double double, IEEE QP).  So maybe the return type
> >should be changed?  The name of the builtin of course is unfortunate,
> >but it is too late to change :-)
> 
> Yeah...I'm not sure how much flexibility we have here to avoid breaking 
> code in the field, but it's not a big break because whoever may be using 
> it has to be assuming long double = __ibm128, and probably has work to 
> do anyway.

We do have an
  __ibm128 __builtin_pack_ibm128 (double, double);
already, so we just should get people to use that one, make it more
prominent in the documentation?  Or we can also make
__builtin_pack_longdouble warn (or even error) if used when long double
is not double-double.  Maybe an attribute (or what is it called, a
{thing} I mean) in the new description files to say "warn (or error) if
long double is not ibm128"?

> Perhaps I should commit as is for now, and then prepare a separate patch 
> to change this builtin?  There may be test suite fallout, not sure offhand.

Yes, I did approve it already, right?  Reviewing these patches I notice
things that should be improved, but that does not have to be done *now*,
or by you for that matter :-)

Cheers,

Segher

Re: [PATCH][v2] Adjust volatile handling of the operand scanner

2021-08-11 Thread Eric Botcazou

> So I'm leaning towards leaving build3 alone and fixing up frontends
> as issues pop up.

FWIW fine with me.

-- 
Eric Botcazou

[PATCH] tree-optimization/101861 - fix gather use for non-gather refs

2021-08-11 Thread Richard Biener via Gcc-patches

My previous change broke the usage of gather for strided loads.
The following fixes it.

Bootstrapped on x86_64-unknown-linux-gnu, will push as obvious.

2021-08-11  Richard Biener  

PR tree-optimization/101861
* tree-vect-stmts.c (vectorizable_load): Fix error in
previous change with regard to gather vectorization.
---
 gcc/tree-vect-stmts.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index ab402b57fb4..cc6c091e41e 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -9492,7 +9492,8 @@ vectorizable_load (vec_info *vinfo,
if (memory_access_type == VMAT_GATHER_SCATTER
&& gs_info.ifn != IFN_LAST)
  {
-   vec_offset = vec_offsets[j];
+   if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
+ vec_offset = vec_offsets[j];
tree zero = build_zero_cst (vectype);
tree scale = size_int (gs_info.scale);
gcall *call;
-- 
2.31.1

Re: [PATCH] Adding target hook allows to reject initialization of register

2021-08-11 Thread Richard Sandiford via Gcc-patches

Richard Biener  writes:
> On Tue, Aug 10, 2021 at 10:33 AM Jojo R via Gcc-patches
>  wrote:
>>
>> Some target like RISC-V allow to group vector register as a whole,
>> and only operate part of it in fact, but the 'init-regs' pass will add 
>> initialization
>> for uninitialized registers. Add this hook to reject this action for 
>> reducing instruction.
>
> Are these groups "visible"?  That is, are the pseudos multi-reg
> pseudos?  I wonder
> if there's a more generic way to tame down initregs w/o introducing a new 
> target
> hook.
>
> Btw, initregs is a red herring - it ideally should go away.  See PR61810.
>
> So instead of adding to it can you see whether disabling the pass for RISC-V
> works w/o fallout (and add a comment to the PR)?  Maybe some more RTL
> literate (in particular DF literate) can look at the remaining issue.
> Richard, did you
> ever have a look into the "issue" that initregs covers up (whatever
> that exactly is)?

No, sorry.  I don't really understand what it would be from the comment
in the code:

   [...] papers over some problems on the arm and other
   processors where certain isa constraints cannot be handled by gcc.
   These are of the form where two operands to an insn my not be the
   same.  The ra will only make them the same if they do not
   interfere, and this can only happen if one is not initialized.

That would definitely be an RA bug if true, since the constraints need
to be applied independently of dataflow information.  But the comment
and code predate LRA and maybe no-one fancied poking around in reload
(hard to believe).

I'd be very surprised if LRA gets this wrong.

Thanks,
Richard

Re: [PATCH] [i386] Combine avx_vec_concatv16si and avx512f_zero_extendv16hiv16si2_1 to avx512f_zero_extendv16hiv16si2_2.

2021-08-11 Thread Hongtao Liu via Gcc-patches

On Wed, Aug 11, 2021 at 3:58 PM Jakub Jelinek  wrote:
>
> On Wed, Aug 11, 2021 at 02:43:06PM +0800, liuhongt wrote:
> >   Add define_insn_and_split to combine avx_vec_concatv16si/2 and
> > avx512f_zero_extendv16hiv16si2_1 since the latter already zero_extend
> > the upper bits, similar for other patterns which are related to
> > pmovzx{bw,wd,dq}.
> >
> > It will do optimization like
> >
> > -   vmovdqa %ymm0, %ymm0# 7 [c=4 l=6]  avx_vec_concatv16si/2
> > vpmovzxwd   %ymm0, %zmm0# 22[c=4 l=6]  
> > avx512f_zero_extendv16hiv16si2
> > ret # 25[c=0 l=1]  simple_return_internal
> >
> >   Bootstrapped and regtested on x86_64-linux-gnu{-m32,}.
> >   Ok for trunk?
> >
> > gcc/ChangeLog:
> >
> >   PR target/101846
> >   * config/i386/sse.md (*avx2_zero_extendv16qiv16hi2_2): New
> >   post_reload define_insn_and_split.
>
> The ChangeLog doesn't mention the newly added mode iterators, perhaps it
> should.
>
> >   (*avx512bw_zero_extendv32qiv32hi2_2): Ditto.
> >   (*sse4_1_zero_extendv8qiv8hi2_4): Ditto.
> >   (*avx512f_zero_extendv16hiv16si2_2): Ditto.
> >   (*avx2_zero_extendv8hiv8si2_2): Ditto.
> >   (*sse4_1_zero_extendv4hiv4si2_4): Ditto.
> >   (*avx512f_zero_extendv8siv8di2_2): Ditto.
> >   (*avx2_zero_extendv4siv4di2_2): Ditto.
> >   (*sse4_1_zero_extendv2siv2di2_4): Ditto.
> >
> > gcc/testsuite/ChangeLog:
> >
> >   PR target/101846
> >   * gcc.target/i386/pr101846-1.c: New test.
> > ---
> >  gcc/config/i386/sse.md | 220 +
> >  gcc/testsuite/gcc.target/i386/pr101846-1.c |  95 +
> >  2 files changed, 315 insertions(+)
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr101846-1.c
> >
> > diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
> > index a46a2373547..6450c058458 100644
> > --- a/gcc/config/i386/sse.md
> > +++ b/gcc/config/i386/sse.md
> > @@ -673,8 +673,14 @@ (define_mode_iterator VI12_128 [V16QI V8HI])
> >  (define_mode_iterator VI14_128 [V16QI V4SI])
> >  (define_mode_iterator VI124_128 [V16QI V8HI V4SI])
> >  (define_mode_iterator VI24_128 [V8HI V4SI])
> > +(define_mode_iterator VI128_128 [V16QI V8HI V2DI])
>
> And this mode iterator isn't used anywhere in the patch it seems.
>
> Otherwise LGTM, although it fixes just particular, though perhaps very
> important, cases, for detecting generally that some operations on
> a vector aren't needed because following permutation that uses it never
> reads those elements is something that would need to be done on gimple.
>
> Would it be possible to handle also the similar pmovzx{bd,wq,bq} cases?
Yes, regarding testcase bar, vec_perm can be implemented as vpmovdw
and vinserti64x4, and the latter instructions will be optimized off
since the upper bits are never used.
I'm working on a patch.
>
> Jakub
>


-- 
BR,
Hongtao

Re: [PATCH][v2] Adjust volatile handling of the operand scanner

2021-08-11 Thread Richard Biener via Gcc-patches

On Wed, 11 Aug 2021, Richard Biener wrote:

> On Tue, 10 Aug 2021, Eric Botcazou wrote:
> 
> > > The question is whether we instead want to amend build3 to
> > > set TREE_THIS_VOLATILE automatically when the FIELD_DECL has
> > > it set.  At least for the Fortran FE cases the gimplifier
> > > fails to see some volatile references and thus can generate
> > > wrong code which is a latent issue.
> > 
> > What do we do for other similar flags, e.g. TREE_READONLY?
> 
> build3 currently does no special processing for the FIELD_DECL operand,
> it just sets TREE_THIS_VOLATILE from operand zero for tcc_references.
> 
> The C and C++ frontends have repeated patterns like
> 
>   ref = build3 (COMPONENT_REF, subtype, datum, subdatum,
> NULL_TREE);
>   SET_EXPR_LOCATION (ref, loc);
>   if (TREE_READONLY (subdatum)
>   || (use_datum_quals && TREE_READONLY (datum)))
> TREE_READONLY (ref) = 1;
>   if (TREE_THIS_VOLATILE (subdatum)
>   || (use_datum_quals && TREE_THIS_VOLATILE (datum)))
> TREE_THIS_VOLATILE (ref) = 1;
> 
> Leaving out TREE_READONLY shouldn't have any correctness issue.  It's
> just that when adjusting the SSA operand scanner to correctly interpret
> GENERIC that this uncovers pre-existing issues in the Fortran frontend
> (one manifests in a testsuite FAIL - otherwise I wouldn't have noticed).
> 
> I'm fine with just plugging the Fortran FE holes as we discover them
> but I did not check other frontends and testsuite coverage is weak.
> 
> Now - I wonder if there's a reason a frontend might _not_ want to
> set TREE_THIS_VOLATILE on a COMPONENT_REF when the FIELD_DECL has
> TREE_THIS_VOLATILE set.
> 
> I guess I'll do one more experiment and add verification that
> TREE_THIS_VOLATILE on COMPONENT_REFs and FIELD_DECLs is consistent
> and see where that trips.

It trips for

struct X { volatile int i; };

void foo ()
{
  struct X x = (struct X){ .i = 0 };
}

where the gimplifier in gimplify_init_ctor_eval does

  gcc_assert (TREE_CODE (purpose) == FIELD_DECL);
  cref = build3 (COMPONENT_REF, TREE_TYPE (purpose),
 unshare_expr (object), purpose, NULL_TREE);

producing

  x.i = 0;

that is not volatile qualified.  This manifests itself during the
build of libasan.  I'm not sure whether the gimplifiers doing is
correct or not.  Changing build3 would alter the behavior here.

Then there's a case where the COMPONENT_REF is TREE_THIS_VOLATILE
but neither the FIELD_DECL nor the base reference is.  This
trips during libtsan build and again is from gimplification/folding,
this time gimplify_modify_expr_rhs doing

case INDIRECT_REF:
  {
/* If we have code like

 *(const A*)(A*)

 where the type of "x" is a (possibly cv-qualified variant
 of "A"), treat the entire expression as identical to "x".
 This kind of code arises in C++ when an object is bound
 to a const reference, and if "x" is a TARGET_EXPR we want
 to take advantage of the optimization below.  */
bool volatile_p = TREE_THIS_VOLATILE (*from_p);
tree t = gimple_fold_indirect_ref_rhs (TREE_OPERAND (*from_p, 
0));
if (t)
  {
if (TREE_THIS_VOLATILE (t) != volatile_p)
  {
if (DECL_P (t))
  t = build_simple_mem_ref_loc (EXPR_LOCATION 
(*from_p),
build_fold_addr_expr 
(t));
if (REFERENCE_CLASS_P (t))
  TREE_THIS_VOLATILE (t) = volatile_p;

I suppose that's OK, it's folding volatile
*(void (*__sanitizer_sighandler_ptr) (int) *) >D.5368.handler
to act->D.5368.handler which wouldn't be volatile.

The opposite could happen, too, of course - casting away volatileness
for an access but letting that slip through verification would make
it moot.  So ...

With those cases fixed bootstrap runs through and testing reveals
no additional issues apart from the already known
gfortran.dg/volatile11.f90

So I'm leaning towards leaving build3 alone and fixing up frontends
as issues pop up.

Ricahrd.

Re: [PATCH] Fix loop split incorrect count and probability

2021-08-11 Thread Richard Biener via Gcc-patches

On Wed, 11 Aug 2021, Xionghu Luo wrote:

> 
> 
> On 2021/8/10 22:47, Richard Biener wrote:
> > On Mon, 9 Aug 2021, Xionghu Luo wrote:
> > 
> >> Thanks,
> >>
> >> On 2021/8/6 19:46, Richard Biener wrote:
> >>> On Tue, 3 Aug 2021, Xionghu Luo wrote:
> >>>
>  loop split condition is moved between loop1 and loop2, the split bb's
>  count and probability should also be duplicated instead of (100% vs INV),
>  secondly, the original loop1 and loop2 count need be propotional from the
>  original loop.
> 
> 
>  diff base/loop-cond-split-1.c.151t.lsplit  
>  patched/loop-cond-split-1.c.151t.lsplit:
>  ...
>   int prephitmp_16;
>   int prephitmp_25;
> 
>    [local count: 118111600]:
>   if (n_7(D) > 0)
> goto ; [89.00%]
>   else
> goto ; [11.00%]
> 
>    [local count: 118111600]:
>   return;
> 
>    [local count: 105119324]:
>   pretmp_3 = ga;
> 
>  -   [local count: 955630225]:
>  +   [local count: 315357973]:
>   # i_13 = PHI 
>   # prephitmp_12 = PHI 
>   if (prephitmp_12 != 0)
> goto ; [33.00%]
>   else
> goto ; [67.00%]
> 
>  -   [local count: 315357972]:
>  +   [local count: 104068130]:
>   _2 = do_something ();
>   ga = _2;
> 
>  -   [local count: 955630225]:
>  +   [local count: 315357973]:
>   # prephitmp_5 = PHI 
>   i_10 = inc (i_13);
>   if (n_7(D) > i_10)
> goto ; [89.00%]
>   else
> goto ; [11.00%]
> 
>    [local count: 105119324]:
>   goto ; [100.00%]
> 
>  -   [local count: 850510901]:
>  +   [local count: 280668596]:
>   if (prephitmp_12 != 0)
>  -goto ; [100.00%]
>  +goto ; [33.00%]
>   else
>  -goto ; [INV]
>  +goto ; [67.00%]
> 
>  -   [local count: 850510901]:
>  +   [local count: 280668596]:
>   goto ; [100.00%]
> 
>  -   [count: 0]:
>  +   [local count: 70429947]:
>   # i_23 = PHI 
>   # prephitmp_25 = PHI 
> 
>  -   [local count: 955630225]:
>  +   [local count: 640272252]:
>   # i_15 = PHI 
>   # prephitmp_16 = PHI 
>   i_22 = inc (i_15);
>   if (n_7(D) > i_22)
> goto ; [89.00%]
>   else
> goto ; [11.00%]
> 
>  -   [local count: 850510901]:
>  +   [local count: 569842305]:
>   goto ; [100.00%]
> 
> }
> 
>  gcc/ChangeLog:
> 
>   * tree-ssa-loop-split.c (split_loop): Fix incorrect probability.
>   (do_split_loop_on_cond): Likewise.
>  ---
> gcc/tree-ssa-loop-split.c | 16 
> 1 file changed, 8 insertions(+), 8 deletions(-)
> 
>  diff --git a/gcc/tree-ssa-loop-split.c b/gcc/tree-ssa-loop-split.c
>  index 3a09bbc39e5..8e5a7ded0f7 100644
>  --- a/gcc/tree-ssa-loop-split.c
>  +++ b/gcc/tree-ssa-loop-split.c
>  @@ -583,10 +583,10 @@ split_loop (class loop *loop1)
>   basic_block cond_bb;
> >>
> >>if (!initial_true)
> >> -cond = fold_build1 (TRUTH_NOT_EXPR, boolean_type_node, cond);
> >> +cond = fold_build1 (TRUTH_NOT_EXPR, boolean_type_node, cond);
> >> +
> >> +  edge true_edge = EDGE_SUCC (bbs[i], 0)->flags & EDGE_TRUE_VALUE
> >> + ? EDGE_SUCC (bbs[i], 0)
> >> + : EDGE_SUCC (bbs[i], 1);
> >>
> 
>   class loop *loop2 = loop_version (loop1, cond, _bb,
>  -   profile_probability::always 
>  (),
>  -   profile_probability::always 
>  (),
>  -   profile_probability::always 
>  (),
>  -   profile_probability::always 
>  (),
>  +   true_edge->probability,
>  +   
>  true_edge->probability.invert (),
>  +   true_edge->probability,
>  +   
>  true_edge->probability.invert (),
>  true);
> >>>
> >>> there is no 'true_edge' variable at this point.
> >>
> >> Sorry, missed the above hunk when split the patch.
> >>
> >>>
>   gcc_assert (loop2);
> 
>  @@ -1486,10 +1486,10 @@ do_split_loop_on_cond (struct loop *loop1, edge 
>  invar_branch)
>   initialize_original_copy_tables ();
> 
>   struct loop *loop2 = loop_version (loop1, boolean_true_node, NULL,
>  - profile_probability::always (),
>  - profile_probability::never (),
>  -

Re: [patch][version 6] add -ftrivial-auto-var-init and variable attribute "uninitialized" to gcc

2021-08-11 Thread Richard Sandiford via Gcc-patches

Qing Zhao via Gcc-patches  writes:
>> On Aug 10, 2021, at 3:16 PM, Qing Zhao via Gcc-patches 
>>  wrote:
>> 
>> Hi, Richard,
>> 
>>> On Aug 10, 2021, at 10:22 AM, Richard Biener  wrote:
> 
> Especially in the VLA case but likely also in general (though unlikely
> since usually the receiver of initializations are simple enough).  I'd
> expect the VLA case end up as
> 
> *ptr_to_decl = .DEFERRED_INIT (...);
> 
> where *ptr_to_decl is the DECL_VALUE_EXPR of the decl.
 
 So, for the following small testing case:
 
 
 extern void bar (int);
 
 void foo(int n)
 {
 int arr[n];
 bar (arr[2]);
 return;
 }
 =
 
 If I compile it with -ftrivial-auto-var-init=zero -fdump-tree-gimple -S -o 
 auto-init-11.s -fdump-rtl-expand, the *.gimple dump is:
 
 =
 void foo (int n)
 {
 int n.0;
 sizetype D.1950;
 bitsizetype D.1951;
 sizetype D.1952;
 bitsizetype D.1953;
 sizetype D.1954;
 int[0:D.1950] * arr.1;
 void * saved_stack.2;
 int arr[0:D.1950] [value-expr: *arr.1];
 
 saved_stack.2 = __builtin_stack_save ();
 try
   {
 n.0 = n;
 _1 = (long int) n.0;
 _2 = _1 + -1;
 _3 = (sizetype) _2;
 D.1950 = _3;
 _4 = (sizetype) n.0;
 _5 = (bitsizetype) _4;
 _6 = _5 * 32;
 D.1951 = _6;
 _7 = (sizetype) n.0;
 _8 = _7 * 4;
 D.1952 = _8;
 _9 = (sizetype) n.0;
 _10 = (bitsizetype) _9;
 _11 = _10 * 32;
 D.1953 = _11;
 _12 = (sizetype) n.0;
 _13 = _12 * 4;
 D.1954 = _13;
 arr.1 = __builtin_alloca_with_align (D.1954, 32);
 arr = .DEFERRED_INIT (D.1952, 2, 1);
 _14 = (*arr.1)[2];
 bar (_14);
 return;
   }
 finally
   {
 __builtin_stack_restore (saved_stack.2);
   }
 }
 
 
 
 You think that the above .DEFEERED_INIT is not correct?
 It should be:
 
 *arr.1 = .DEFERRED_INIT (D.1952. 2, 1);
 
 ?
>>> 
>>> Yes.
>>> 
>> 
>> I updated gimplify.c for VLA and now it emits the call to .DEFERRED_INIT as:
>> 
>>  arr.1 = __builtin_alloca_with_align (D.1954, 32);
>>  *arr.1 = .DEFERRED_INIT (D.1952, 2, 1);
>> 
>> However, this call triggered the assertion failure in verify_gimple_call of 
>> tree-cfg.c because the LHS is not a valid LHS. 
>> Then I modify tree-cfg.c as:
>> 
>> diff --git a/gcc/tree-cfg.c b/gcc/tree-cfg.c
>> index 330eb7dd89bf..180d4f1f9e32 100644
>> --- a/gcc/tree-cfg.c
>> +++ b/gcc/tree-cfg.c
>> @@ -3375,7 +3375,11 @@ verify_gimple_call (gcall *stmt)
>>  }
>> 
>>   tree lhs = gimple_call_lhs (stmt);
>> +  /* For .DEFERRED_INIT call, the LHS might be an indirection of
>> + a pointer for the VLA variable, which is not a valid LHS of
>> + a gimple call, we ignore the asssertion on this.  */ 
>>   if (lhs
>> +  && (!gimple_call_internal_p (stmt, IFN_DEFERRED_INIT))
>>   && (!is_gimple_reg (lhs)
>>  && (!is_gimple_lvalue (lhs)
>>  || verify_types_in_gimple_reference
>> 
>> The assertion failure in tree-cfg.c got resolved, but I got another 
>> assertion failure in operands_scanner::get_expr_operands (tree *expr_p, int 
>> flags), line 945:
>> 
>> 939   /* If we get here, something has gone wrong.  */
>> 940   if (flag_checking)
>> 941 {
>> 942   fprintf (stderr, "unhandled expression in get_expr_operands():\n");
>> 943   debug_tree (expr);
>> 944   fputs ("\n", stderr);
>> 945   gcc_unreachable ();
>> 946 }
>> 
>> Looks like that  the gimple statement:
>>*arr.1 = .DEFERRED_INIT (D.1952, 2, 1);
>> 
>> Is not valid.  i.e, the LHS should not be an indirection to a pointer. 
>> 
>> How to resolve this issue?
>
> I came up with the following solution:
>
> Define the IFN_DEFERRED_INIT function as:
>
>LHS = DEFERRED_INIT (SIZE of the DECL, INIT_TYPE, IS_VLA);
>
>if IS_VLA is false, the LHS is the DECL itself,
>if IS_VLA is true, the LHS is the pointer to this DECL that created by
>gimplify_vla_decl.
>
>
> The benefit of this solution are:
>
> 1. Resolved the invalid IR issue;
> 2. The call stmt carries the address of the VLA natually;
>
> The issue with this solution is:
>
> For VLA and non-VLA, the LHS will be different, 
>
> Do you see any other potential issues with this solution?

The idea behind the DECL version of the .DEFERRED_INIT semantics was
that .DEFERRED_INIT just returns a SIZE-byte value that the caller
then assigns to a SIZE-byte lhs (with the caller choosing the lhs).
.DEFEREED_INIT itself doesn't read or write memory and so can be const,
which in turn allows alias analysis to be more precise.

If we want to handle the VLA case using pointers instead then I think
that needs to be a different IFN.

If we did handle the VLA case using pointers (not expressing an opinion
on that), then it would be the caller's job to

Re: [PATCH] Fix loop split incorrect count and probability

2021-08-11 Thread Xionghu Luo via Gcc-patches




On 2021/8/10 22:47, Richard Biener wrote:
> On Mon, 9 Aug 2021, Xionghu Luo wrote:
> 
>> Thanks,
>>
>> On 2021/8/6 19:46, Richard Biener wrote:
>>> On Tue, 3 Aug 2021, Xionghu Luo wrote:
>>>
 loop split condition is moved between loop1 and loop2, the split bb's
 count and probability should also be duplicated instead of (100% vs INV),
 secondly, the original loop1 and loop2 count need be propotional from the
 original loop.


 diff base/loop-cond-split-1.c.151t.lsplit  
 patched/loop-cond-split-1.c.151t.lsplit:
 ...
  int prephitmp_16;
  int prephitmp_25;

   [local count: 118111600]:
  if (n_7(D) > 0)
goto ; [89.00%]
  else
goto ; [11.00%]

   [local count: 118111600]:
  return;

   [local count: 105119324]:
  pretmp_3 = ga;

 -   [local count: 955630225]:
 +   [local count: 315357973]:
  # i_13 = PHI 
  # prephitmp_12 = PHI 
  if (prephitmp_12 != 0)
goto ; [33.00%]
  else
goto ; [67.00%]

 -   [local count: 315357972]:
 +   [local count: 104068130]:
  _2 = do_something ();
  ga = _2;

 -   [local count: 955630225]:
 +   [local count: 315357973]:
  # prephitmp_5 = PHI 
  i_10 = inc (i_13);
  if (n_7(D) > i_10)
goto ; [89.00%]
  else
goto ; [11.00%]

   [local count: 105119324]:
  goto ; [100.00%]

 -   [local count: 850510901]:
 +   [local count: 280668596]:
  if (prephitmp_12 != 0)
 -goto ; [100.00%]
 +goto ; [33.00%]
  else
 -goto ; [INV]
 +goto ; [67.00%]

 -   [local count: 850510901]:
 +   [local count: 280668596]:
  goto ; [100.00%]

 -   [count: 0]:
 +   [local count: 70429947]:
  # i_23 = PHI 
  # prephitmp_25 = PHI 

 -   [local count: 955630225]:
 +   [local count: 640272252]:
  # i_15 = PHI 
  # prephitmp_16 = PHI 
  i_22 = inc (i_15);
  if (n_7(D) > i_22)
goto ; [89.00%]
  else
goto ; [11.00%]

 -   [local count: 850510901]:
 +   [local count: 569842305]:
  goto ; [100.00%]

}

 gcc/ChangeLog:

* tree-ssa-loop-split.c (split_loop): Fix incorrect probability.
(do_split_loop_on_cond): Likewise.
 ---
gcc/tree-ssa-loop-split.c | 16 
1 file changed, 8 insertions(+), 8 deletions(-)

 diff --git a/gcc/tree-ssa-loop-split.c b/gcc/tree-ssa-loop-split.c
 index 3a09bbc39e5..8e5a7ded0f7 100644
 --- a/gcc/tree-ssa-loop-split.c
 +++ b/gcc/tree-ssa-loop-split.c
 @@ -583,10 +583,10 @@ split_loop (class loop *loop1)
basic_block cond_bb;
>>
>>  if (!initial_true)
>> -  cond = fold_build1 (TRUTH_NOT_EXPR, boolean_type_node, cond);
>> +  cond = fold_build1 (TRUTH_NOT_EXPR, boolean_type_node, cond);
>> +
>> +edge true_edge = EDGE_SUCC (bbs[i], 0)->flags & EDGE_TRUE_VALUE
>> +   ? EDGE_SUCC (bbs[i], 0)
>> +   : EDGE_SUCC (bbs[i], 1);
>>

class loop *loop2 = loop_version (loop1, cond, _bb,
 - profile_probability::always (),
 - profile_probability::always (),
 - profile_probability::always (),
 - profile_probability::always (),
 + true_edge->probability,
 + true_edge->probability.invert (),
 + true_edge->probability,
 + true_edge->probability.invert (),
   true);
>>>
>>> there is no 'true_edge' variable at this point.
>>
>> Sorry, missed the above hunk when split the patch.
>>
>>>
gcc_assert (loop2);

 @@ -1486,10 +1486,10 @@ do_split_loop_on_cond (struct loop *loop1, edge 
 invar_branch)
  initialize_original_copy_tables ();

  struct loop *loop2 = loop_version (loop1, boolean_true_node, NULL,
 -   profile_probability::always (),
 -   profile_probability::never (),
 -   profile_probability::always (),
 -   profile_probability::always (),
 +   invar_branch->probability.invert (),
 +   invar_branch->probability,
 +   invar_branch->probability.invert (),
 +   invar_branch->probability,

Re: [PATCH][v2] Adjust volatile handling of the operand scanner

2021-08-11 Thread Eric Botcazou

> build3 currently does no special processing for the FIELD_DECL operand,
> it just sets TREE_THIS_VOLATILE from operand zero for tcc_references.
> 
> The C and C++ frontends have repeated patterns like
> 
>   ref = build3 (COMPONENT_REF, subtype, datum, subdatum,
> NULL_TREE);
>   SET_EXPR_LOCATION (ref, loc);
>   if (TREE_READONLY (subdatum)
> 
>   || (use_datum_quals && TREE_READONLY (datum)))
> 
> TREE_READONLY (ref) = 1;
>   if (TREE_THIS_VOLATILE (subdatum)
> 
>   || (use_datum_quals && TREE_THIS_VOLATILE (datum)))
> 
> TREE_THIS_VOLATILE (ref) = 1;

Likewise in the Ada front-end (gigi).

> Now - I wonder if there's a reason a frontend might _not_ want to
> set TREE_THIS_VOLATILE on a COMPONENT_REF when the FIELD_DECL has
> TREE_THIS_VOLATILE set.

This would be weird semantics in my opinion.

> I guess I'll do one more experiment and add verification that
> TREE_THIS_VOLATILE on COMPONENT_REFs and FIELD_DECLs is consistent
> and see where that trips.

Sounds good to me.

-- 
Eric Botcazou

[PATCH] middle-end/101858 - avoid shift of pointer in folding

2021-08-11 Thread Richard Biener via Gcc-patches

This makes sure to not generate a shift of pointer types in
simplification of X < (cast) (1 << Y).

Bootstrap and regtest pending on x86_64-unknown-linux-gnu.

2021-08-11  Richard Biener  

PR middle-end/101858
* fold-const.c (fold_binary_loc): Guard simplification
of  X < (cast) (1 << Y) to integer types.

* gcc.dg/pr101858.c: New testcase.
---
 gcc/fold-const.c| 2 ++
 gcc/testsuite/gcc.dg/pr101858.c | 9 +
 2 files changed, 11 insertions(+)
 create mode 100644 gcc/testsuite/gcc.dg/pr101858.c

diff --git a/gcc/fold-const.c b/gcc/fold-const.c
index 0f701287ba1..3917e97dfb0 100644
--- a/gcc/fold-const.c
+++ b/gcc/fold-const.c
@@ -12497,6 +12497,8 @@ fold_binary_loc (location_t loc, enum tree_code code, 
tree type,
 we can't optimize this.  E.g. (unsigned long long) (1 << Y) for Y
 31 might be 0x8000.  */
   if ((code == LT_EXPR || code == GE_EXPR)
+ && (INTEGRAL_TYPE_P (TREE_TYPE (arg0))
+ || VECTOR_INTEGER_TYPE_P (TREE_TYPE (arg0)))
  && TYPE_UNSIGNED (TREE_TYPE (arg0))
  && CONVERT_EXPR_P (arg1)
  && TREE_CODE (TREE_OPERAND (arg1, 0)) == LSHIFT_EXPR
diff --git a/gcc/testsuite/gcc.dg/pr101858.c b/gcc/testsuite/gcc.dg/pr101858.c
new file mode 100644
index 000..61fcca60982
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr101858.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-w" } */
+
+int foo(int a)
+{
+  if (a < (int*)((__INTPTR_TYPE__)1 << a))
+a = 0;
+  return a;
+}
-- 
2.31.1

[committed] sanitizer: Cherry-pick realpath fix

2021-08-11 Thread Jakub Jelinek via Gcc-patches

Hi!

tsan in some cases starts ignoring interceptors and only calls the
intercepted functions.  But for realpath the behavior for NULL second argument
was only handled in the interceptor and intercepted function was the one
found by dlsym which is often one that doesn't handle NULL as second argument.

Fixed by using dlvsym with "GLIBC_2.3" if possible for intercepted function
and don't emulate behavior in the wrapper.

Bootstrapped/regtested on x86_64-linux and i686-linux, committed to trunk.

2021-08-11  Jakub Jelinek  

* sanitizer_common/sanitizer_common_interceptors.inc: Cherry-pick
llvm-project revision faef0d042f523357fe5590e7cb6a8391cf0351a8.

--- libsanitizer/sanitizer_common/sanitizer_common_interceptors.inc
+++ libsanitizer/sanitizer_common/sanitizer_common_interceptors.inc
@@ -3664,21 +3664,11 @@ INTERCEPTOR(char *, realpath, const char *path, char 
*resolved_path) {
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, realpath, path, resolved_path);
   if (path) COMMON_INTERCEPTOR_READ_RANGE(ctx, path, REAL(strlen)(path) + 1);
-
-  // Workaround a bug in glibc where dlsym(RTLD_NEXT, ...) returns the oldest
-  // version of a versioned symbol. For realpath(), this gives us something
-  // (called __old_realpath) that does not handle NULL in the second argument.
-  // Handle it as part of the interceptor.
-  char *allocated_path = nullptr;
-  if (!resolved_path)
-allocated_path = resolved_path = (char *)WRAP(malloc)(path_max + 1);
-
   char *res = REAL(realpath)(path, resolved_path);
-  if (allocated_path && !res) WRAP(free)(allocated_path);
   if (res) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, res, REAL(strlen)(res) + 1);
   return res;
 }
-#define INIT_REALPATH COMMON_INTERCEPT_FUNCTION(realpath);
+#define INIT_REALPATH COMMON_INTERCEPT_FUNCTION_GLIBC_VER_MIN(realpath, 
"GLIBC_2.3");
 #else
 #define INIT_REALPATH
 #endif


Jakub

Re: [Patch v3 Fortran] Fix c_float128 and c_float128_complex on targets with 128-bit long double.

2021-08-11 Thread Tobias Burnus


On 11.08.21 00:46, Sandra Loosemore wrote:

On 8/10/21 2:29 AM, Tobias Burnus wrote:


[snip]

To conclude: I like the code changes (LGTM); the
'__float128' -> 'TFmode' comment change also matches the code.

However, I think both longer comments need to be updated.


OK.  I used your wording verbatim for the first one.  For the second
one, I'm still pretty confused as I think it is at least theoretically
possible on PowerPC to have a target with 64-bit long double (AIX?)
that also supports the __ibm128 format, and it would be wrong to
assume that *any* 128-bit mode that's not long double is IEEE.  So I
decided the best thing is just to replace the FIXME with a pointer to
the issue I opened yesterday

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101835


LGTM – but ...


+  /* See PR101835.  */


... I wonder whether your PR reference should have a TODO or FIXME
prefix – or a "for some issue" suffix. Currently, it can be read as if
the PR describes why the code was added – and not for questioning the code.

Tobias

PS: I added some more notes to the PR + extended the subject to make it
easier to find.

-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955

Re: [PATCH] [i386] Combine avx_vec_concatv16si and avx512f_zero_extendv16hiv16si2_1 to avx512f_zero_extendv16hiv16si2_2.

2021-08-11 Thread Jakub Jelinek via Gcc-patches

On Wed, Aug 11, 2021 at 02:43:06PM +0800, liuhongt wrote:
>   Add define_insn_and_split to combine avx_vec_concatv16si/2 and
> avx512f_zero_extendv16hiv16si2_1 since the latter already zero_extend
> the upper bits, similar for other patterns which are related to
> pmovzx{bw,wd,dq}.
> 
> It will do optimization like
> 
> -   vmovdqa %ymm0, %ymm0# 7 [c=4 l=6]  avx_vec_concatv16si/2
> vpmovzxwd   %ymm0, %zmm0# 22[c=4 l=6]  
> avx512f_zero_extendv16hiv16si2
> ret # 25[c=0 l=1]  simple_return_internal
> 
>   Bootstrapped and regtested on x86_64-linux-gnu{-m32,}.
>   Ok for trunk?
> 
> gcc/ChangeLog:
> 
>   PR target/101846
>   * config/i386/sse.md (*avx2_zero_extendv16qiv16hi2_2): New
>   post_reload define_insn_and_split.

The ChangeLog doesn't mention the newly added mode iterators, perhaps it
should.

>   (*avx512bw_zero_extendv32qiv32hi2_2): Ditto.
>   (*sse4_1_zero_extendv8qiv8hi2_4): Ditto.
>   (*avx512f_zero_extendv16hiv16si2_2): Ditto.
>   (*avx2_zero_extendv8hiv8si2_2): Ditto.
>   (*sse4_1_zero_extendv4hiv4si2_4): Ditto.
>   (*avx512f_zero_extendv8siv8di2_2): Ditto.
>   (*avx2_zero_extendv4siv4di2_2): Ditto.
>   (*sse4_1_zero_extendv2siv2di2_4): Ditto.
> 
> gcc/testsuite/ChangeLog:
> 
>   PR target/101846
>   * gcc.target/i386/pr101846-1.c: New test.
> ---
>  gcc/config/i386/sse.md | 220 +
>  gcc/testsuite/gcc.target/i386/pr101846-1.c |  95 +
>  2 files changed, 315 insertions(+)
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr101846-1.c
> 
> diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
> index a46a2373547..6450c058458 100644
> --- a/gcc/config/i386/sse.md
> +++ b/gcc/config/i386/sse.md
> @@ -673,8 +673,14 @@ (define_mode_iterator VI12_128 [V16QI V8HI])
>  (define_mode_iterator VI14_128 [V16QI V4SI])
>  (define_mode_iterator VI124_128 [V16QI V8HI V4SI])
>  (define_mode_iterator VI24_128 [V8HI V4SI])
> +(define_mode_iterator VI128_128 [V16QI V8HI V2DI])

And this mode iterator isn't used anywhere in the patch it seems.

Otherwise LGTM, although it fixes just particular, though perhaps very
important, cases, for detecting generally that some operations on
a vector aren't needed because following permutation that uses it never
reads those elements is something that would need to be done on gimple.

Would it be possible to handle also the similar pmovzx{bd,wq,bq} cases?

Jakub

Re: [patch][version 6] add -ftrivial-auto-var-init and variable attribute "uninitialized" to gcc

2021-08-11 Thread Richard Biener via Gcc-patches

On Tue, 10 Aug 2021, Qing Zhao wrote:

> 
> 
> > On Aug 10, 2021, at 3:16 PM, Qing Zhao via Gcc-patches 
> >  wrote:
> > 
> > Hi, Richard,
> > 
> >> On Aug 10, 2021, at 10:22 AM, Richard Biener  wrote:
>  
>  Especially in the VLA case but likely also in general (though unlikely
>  since usually the receiver of initializations are simple enough).  I'd
>  expect the VLA case end up as
>  
>  *ptr_to_decl = .DEFERRED_INIT (...);
>  
>  where *ptr_to_decl is the DECL_VALUE_EXPR of the decl.
> >>> 
> >>> So, for the following small testing case:
> >>> 
> >>> 
> >>> extern void bar (int);
> >>> 
> >>> void foo(int n)
> >>> {
> >>> int arr[n];
> >>> bar (arr[2]);
> >>> return;
> >>> }
> >>> =
> >>> 
> >>> If I compile it with -ftrivial-auto-var-init=zero -fdump-tree-gimple -S 
> >>> -o auto-init-11.s -fdump-rtl-expand, the *.gimple dump is:
> >>> 
> >>> =
> >>> void foo (int n)
> >>> {
> >>> int n.0;
> >>> sizetype D.1950;
> >>> bitsizetype D.1951;
> >>> sizetype D.1952;
> >>> bitsizetype D.1953;
> >>> sizetype D.1954;
> >>> int[0:D.1950] * arr.1;
> >>> void * saved_stack.2;
> >>> int arr[0:D.1950] [value-expr: *arr.1];
> >>> 
> >>> saved_stack.2 = __builtin_stack_save ();
> >>> try
> >>>   {
> >>> n.0 = n;
> >>> _1 = (long int) n.0;
> >>> _2 = _1 + -1;
> >>> _3 = (sizetype) _2;
> >>> D.1950 = _3;
> >>> _4 = (sizetype) n.0;
> >>> _5 = (bitsizetype) _4;
> >>> _6 = _5 * 32;
> >>> D.1951 = _6;
> >>> _7 = (sizetype) n.0;
> >>> _8 = _7 * 4;
> >>> D.1952 = _8;
> >>> _9 = (sizetype) n.0;
> >>> _10 = (bitsizetype) _9;
> >>> _11 = _10 * 32;
> >>> D.1953 = _11;
> >>> _12 = (sizetype) n.0;
> >>> _13 = _12 * 4;
> >>> D.1954 = _13;
> >>> arr.1 = __builtin_alloca_with_align (D.1954, 32);
> >>> arr = .DEFERRED_INIT (D.1952, 2, 1);
> >>> _14 = (*arr.1)[2];
> >>> bar (_14);
> >>> return;
> >>>   }
> >>> finally
> >>>   {
> >>> __builtin_stack_restore (saved_stack.2);
> >>>   }
> >>> }
> >>> 
> >>> 
> >>> 
> >>> You think that the above .DEFEERED_INIT is not correct?
> >>> It should be:
> >>> 
> >>> *arr.1 = .DEFERRED_INIT (D.1952. 2, 1);
> >>> 
> >>> ?
> >> 
> >> Yes.
> >> 
> > 
> > I updated gimplify.c for VLA and now it emits the call to .DEFERRED_INIT as:
> > 
> >  arr.1 = __builtin_alloca_with_align (D.1954, 32);
> >  *arr.1 = .DEFERRED_INIT (D.1952, 2, 1);
> > 
> > However, this call triggered the assertion failure in verify_gimple_call of 
> > tree-cfg.c because the LHS is not a valid LHS. 
> > Then I modify tree-cfg.c as:
> > 
> > diff --git a/gcc/tree-cfg.c b/gcc/tree-cfg.c
> > index 330eb7dd89bf..180d4f1f9e32 100644
> > --- a/gcc/tree-cfg.c
> > +++ b/gcc/tree-cfg.c
> > @@ -3375,7 +3375,11 @@ verify_gimple_call (gcall *stmt)
> >  }
> > 
> >   tree lhs = gimple_call_lhs (stmt);
> > +  /* For .DEFERRED_INIT call, the LHS might be an indirection of
> > + a pointer for the VLA variable, which is not a valid LHS of
> > + a gimple call, we ignore the asssertion on this.  */ 
> >   if (lhs
> > +  && (!gimple_call_internal_p (stmt, IFN_DEFERRED_INIT))
> >   && (!is_gimple_reg (lhs)
> >  && (!is_gimple_lvalue (lhs)
> >  || verify_types_in_gimple_reference
> > 
> > The assertion failure in tree-cfg.c got resolved, but I got another 
> > assertion failure in operands_scanner::get_expr_operands (tree *expr_p, int 
> > flags), line 945:
> > 
> > 939   /* If we get here, something has gone wrong.  */
> > 940   if (flag_checking)
> > 941 {
> > 942   fprintf (stderr, "unhandled expression in 
> > get_expr_operands():\n");
> > 943   debug_tree (expr);
> > 944   fputs ("\n", stderr);
> > 945   gcc_unreachable ();
> > 946 }
> > 
> > Looks like that  the gimple statement:
> >*arr.1 = .DEFERRED_INIT (D.1952, 2, 1);
> > 
> > Is not valid.  i.e, the LHS should not be an indirection to a pointer. 
> > 
> > How to resolve this issue?

It sounds like the LHS is an INDIRECT_REF maybe?  That means it's
still not properly gimplified because it should end up as a MEM_REF
instead.

But I'm just guessing here ... if you are in a debugger then you can
invoke debug_tree (lhs) in the inferior to see what it exactly is
at the point of the failure.

> I came up with the following solution:
> 
> Define the IFN_DEFERRED_INIT function as:
> 
>LHS = DEFERRED_INIT (SIZE of the DECL, INIT_TYPE, IS_VLA);
> 
>if IS_VLA is false, the LHS is the DECL itself,
>if IS_VLA is true, the LHS is the pointer to this DECL that created by
>gimplify_vla_decl.
> 
> 
> The benefit of this solution are:
> 
> 1. Resolved the invalid IR issue;
> 2. The call stmt carries the address of the VLA natually;
> 
> The issue with this solution is:
> 
> For VLA and non-VLA, the LHS will be different, 
> 
> Do you see any other potential issues with this solution?
> 
> thanks.
> 
> Qing
> 
> 
> 
> 
> 

-- 
Richard Biener 
SUSE Software Solutions Germany

[PATCH v3] gcov: Add TARGET_GCOV_TYPE_SIZE target macro

2021-08-11 Thread Sebastian Huber

If -fprofile-update=atomic is used, then the target must provide atomic
operations for the counters of the type returned by get_gcov_type().
This is a 64-bit type for targets which have a 64-bit long long type.
On 32-bit targets this could be an issue since they may not provide
64-bit atomic operations.  Allow targets to override the default type
size with the new TARGET_GCOV_TYPE_SIZE target macro.

If a 32-bit gcov type size is used, then there is currently a warning in
libgcov-driver.c in a dead code block due to
sizeof (counter) == sizeof (gcov_unsigned_t):

libgcc/libgcov-driver.c: In function 'dump_counter':
libgcc/libgcov-driver.c:401:46: warning: right shift count >= width of type 
[-Wshift-count-overflow]
  401 | dump_unsigned ((gcov_unsigned_t)(counter >> 32), dump_fn, arg);
  |  ^~

gcc/

* c-family/c-cppbuiltin.c (c_cpp_builtins): Define
__LIBGCC_GCOV_TYPE_SIZE if flag_building_libgcc is true.
* config/sparc/rtemself.h (TARGET_GCOV_TYPE_SIZE): Redefine.
* coverage.c (get_gcov_type): Use targetm.gcov_type_size.
* doc/tm.texi (TARGET_GCOV_TYPE_SIZE): Add hook under "Misc".
* doc/tm.texi.in: Regenerate.
* target.def (gcov_type_size): New POD hook.
* tree-profile.c (gimple_gen_edge_profiler): Use precision of
gcov_type_node.
(gimple_gen_time_profiler): Likewise.

libgcc/

* libgcov.h (gcov_type): Define using __LIBGCC_GCOV_TYPE_SIZE.
(gcov_type_unsigned): Likewise.
---
 gcc/c-family/c-cppbuiltin.c |  2 ++
 gcc/config/sparc/rtemself.h |  3 +++
 gcc/coverage.c  |  3 +--
 gcc/doc/tm.texi | 11 +++
 gcc/doc/tm.texi.in  |  2 ++
 gcc/target.def  | 12 
 gcc/tree-profile.c  |  4 ++--
 libgcc/libgcov.h|  6 +++---
 8 files changed, 36 insertions(+), 7 deletions(-)

diff --git a/gcc/c-family/c-cppbuiltin.c b/gcc/c-family/c-cppbuiltin.c
index f79f939bd10f..e85b60c79f49 100644
--- a/gcc/c-family/c-cppbuiltin.c
+++ b/gcc/c-family/c-cppbuiltin.c
@@ -1450,6 +1450,8 @@ c_cpp_builtins (cpp_reader *pfile)
   /* For libgcov.  */
   builtin_define_with_int_value ("__LIBGCC_VTABLE_USES_DESCRIPTORS__",
 TARGET_VTABLE_USES_DESCRIPTORS);
+  builtin_define_with_int_value ("__LIBGCC_GCOV_TYPE_SIZE",
+TARGET_GCOV_TYPE_SIZE);
 }
 
   /* For use in assembly language.  */
diff --git a/gcc/config/sparc/rtemself.h b/gcc/config/sparc/rtemself.h
index fa972af640cc..87a3ceb640c0 100644
--- a/gcc/config/sparc/rtemself.h
+++ b/gcc/config/sparc/rtemself.h
@@ -40,3 +40,6 @@
 
 /* Use the default */
 #undef LINK_GCC_C_SEQUENCE_SPEC
+
+#undef TARGET_GCOV_TYPE_SIZE
+#define TARGET_GCOV_TYPE_SIZE 32
diff --git a/gcc/coverage.c b/gcc/coverage.c
index ac9a9fdad228..6166247ad179 100644
--- a/gcc/coverage.c
+++ b/gcc/coverage.c
@@ -145,8 +145,7 @@ static void coverage_obj_finish (vec *);
 tree
 get_gcov_type (void)
 {
-  scalar_int_mode mode
-= smallest_int_mode_for_size (LONG_LONG_TYPE_SIZE > 32 ? 64 : 32);
+  scalar_int_mode mode = smallest_int_mode_for_size (targetm.gcov_type_size);
   return lang_hooks.types.type_for_mode (mode, false);
 }
 
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index a30fdcbbf3d6..429e7edf0e9d 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -12588,3 +12588,14 @@ Return an RTX representing @var{tagged_pointer} with 
its tag set to zero.
 Store the result in @var{target} if convenient.
 The default clears the top byte of the original pointer.
 @end deftypefn
+
+@deftypevr {Target Hook} HOST_WIDE_INT TARGET_GCOV_TYPE_SIZE
+The gcov type size in bits.  This type is used for example for counters
+incremented by profiling and code-coverage events.  The default value is 64,
+if the type size of long long is greater than 32, otherwise the default
+value is 32.  A 64-bit type is recommended to avoid overflows of the
+counters.  If the @option{-fprofile-update=atomic} is used, then the
+counters are incremented using atomic operations.  Targets not supporting
+64-bit atomic operations may override the default value and request a 32-bit
+type.
+@end deftypevr
diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
index 611fc500ac86..fdf16b901c53 100644
--- a/gcc/doc/tm.texi.in
+++ b/gcc/doc/tm.texi.in
@@ -8180,3 +8180,5 @@ maintainer is familiar with.
 @hook TARGET_MEMTAG_EXTRACT_TAG
 
 @hook TARGET_MEMTAG_UNTAGGED_POINTER
+
+@hook TARGET_GCOV_TYPE_SIZE
diff --git a/gcc/target.def b/gcc/target.def
index 7676d5e626e3..b94c2c40dcf1 100644
--- a/gcc/target.def
+++ b/gcc/target.def
@@ -7104,6 +7104,18 @@ DEFHOOK
  void, (void),
  NULL)
 
+DEFHOOKPOD
+(gcov_type_size,
+ "The gcov type size in bits.  This type is used for example for counters\n\
+incremented by profiling and code-coverage events.  The default value is 64,\n\
+if the type size of long long is greater than 32, otherwise the default\n\
+value is

[PATCH] rs6000: Make some BIFs vectorized on P10

2021-08-11 Thread Kewen.Lin via Gcc-patches

Hi,

This patch is to add the support to make vectorizer able to
vectorize scalar version of some built-in functions with its
corresponding vector version with Power10 support.

Bootstrapped & regtested on powerpc64le-linux-gnu {P9,P10}
and powerpc64-linux-gnu P8.

Is it ok for trunk?

BR,
Kewen
-
gcc/ChangeLog:

* config/rs6000/rs6000.c (rs6000_builtin_md_vectorized_function): Add
support for some built-in functions vectorized on Power10.

gcc/testsuite/ChangeLog:

* gcc.target/powerpc/dive-vectorize-1.c: New test.
* gcc.target/powerpc/dive-vectorize-1.h: New test.
* gcc.target/powerpc/dive-vectorize-2.c: New test.
* gcc.target/powerpc/dive-vectorize-2.h: New test.
* gcc.target/powerpc/dive-vectorize-run-1.c: New test.
* gcc.target/powerpc/dive-vectorize-run-2.c: New test.
* gcc.target/powerpc/p10-bifs-vectorize-1.c: New test.
* gcc.target/powerpc/p10-bifs-vectorize-1.h: New test.
* gcc.target/powerpc/p10-bifs-vectorize-run-1.c: New test.
---
 gcc/config/rs6000/rs6000.c| 55 +++
 .../gcc.target/powerpc/dive-vectorize-1.c | 11 
 .../gcc.target/powerpc/dive-vectorize-1.h | 22 
 .../gcc.target/powerpc/dive-vectorize-2.c | 12 
 .../gcc.target/powerpc/dive-vectorize-2.h | 22 
 .../gcc.target/powerpc/dive-vectorize-run-1.c | 52 ++
 .../gcc.target/powerpc/dive-vectorize-run-2.c | 53 ++
 .../gcc.target/powerpc/p10-bifs-vectorize-1.c | 15 +
 .../gcc.target/powerpc/p10-bifs-vectorize-1.h | 40 ++
 .../powerpc/p10-bifs-vectorize-run-1.c| 45 +++
 10 files changed, 327 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/dive-vectorize-1.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/dive-vectorize-1.h
 create mode 100644 gcc/testsuite/gcc.target/powerpc/dive-vectorize-2.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/dive-vectorize-2.h
 create mode 100644 gcc/testsuite/gcc.target/powerpc/dive-vectorize-run-1.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/dive-vectorize-run-2.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/p10-bifs-vectorize-1.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/p10-bifs-vectorize-1.h
 create mode 100644 gcc/testsuite/gcc.target/powerpc/p10-bifs-vectorize-run-1.c

diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 279f00cc648..3eac1d05101 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -5785,6 +5785,61 @@ rs6000_builtin_md_vectorized_function (tree fndecl, tree 
type_out,
 default:
   break;
 }
+
+  machine_mode in_vmode = TYPE_MODE (type_in);
+  machine_mode out_vmode = TYPE_MODE (type_out);
+
+  /* Power10 supported vectorized built-in functions.  */
+  if (TARGET_POWER10
+  && in_vmode == out_vmode
+  && VECTOR_UNIT_ALTIVEC_OR_VSX_P (in_vmode))
+{
+  machine_mode exp_mode = DImode;
+  machine_mode exp_vmode = V2DImode;
+  enum rs6000_builtins vname = RS6000_BUILTIN_COUNT;
+  switch (fn)
+   {
+   case MISC_BUILTIN_DIVWE:
+   case MISC_BUILTIN_DIVWEU:
+ exp_mode = SImode;
+ exp_vmode = V4SImode;
+ if (fn == MISC_BUILTIN_DIVWE)
+   vname = P10V_BUILTIN_DIVES_V4SI;
+ else
+   vname = P10V_BUILTIN_DIVEU_V4SI;
+ break;
+   case MISC_BUILTIN_DIVDE:
+   case MISC_BUILTIN_DIVDEU:
+ if (fn == MISC_BUILTIN_DIVDE)
+   vname = P10V_BUILTIN_DIVES_V2DI;
+ else
+   vname = P10V_BUILTIN_DIVEU_V2DI;
+ break;
+   case P10_BUILTIN_CFUGED:
+ vname = P10V_BUILTIN_VCFUGED;
+ break;
+   case P10_BUILTIN_CNTLZDM:
+ vname = P10V_BUILTIN_VCLZDM;
+ break;
+   case P10_BUILTIN_CNTTZDM:
+ vname = P10V_BUILTIN_VCTZDM;
+ break;
+   case P10_BUILTIN_PDEPD:
+ vname = P10V_BUILTIN_VPDEPD;
+ break;
+   case P10_BUILTIN_PEXTD:
+ vname = P10V_BUILTIN_VPEXTD;
+ break;
+   default:
+ return NULL_TREE;
+   }
+
+  if (vname != RS6000_BUILTIN_COUNT
+ && in_mode == exp_mode
+ && in_vmode == exp_vmode)
+   return rs6000_builtin_decls[vname];
+}
+
   return NULL_TREE;
 }
 
diff --git a/gcc/testsuite/gcc.target/powerpc/dive-vectorize-1.c 
b/gcc/testsuite/gcc.target/powerpc/dive-vectorize-1.c
new file mode 100644
index 000..84f1b0a88f2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/dive-vectorize-1.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2 -ftree-vectorize 
-fno-vect-cost-model -fno-unroll-loops -fdump-tree-vect-details" } */
+
+/* Test if signed/unsigned int extended divisions get vectorized.  */
+
+#include "dive-vectorize-1.h"
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect"

[PATCH] [i386] Combine avx_vec_concatv16si and avx512f_zero_extendv16hiv16si2_1 to avx512f_zero_extendv16hiv16si2_2.

2021-08-11 Thread liuhongt via Gcc-patches

Hi:
  Add define_insn_and_split to combine avx_vec_concatv16si/2 and
avx512f_zero_extendv16hiv16si2_1 since the latter already zero_extend
the upper bits, similar for other patterns which are related to
pmovzx{bw,wd,dq}.

It will do optimization like

-   vmovdqa %ymm0, %ymm0# 7 [c=4 l=6]  avx_vec_concatv16si/2
vpmovzxwd   %ymm0, %zmm0# 22[c=4 l=6]  
avx512f_zero_extendv16hiv16si2
ret # 25[c=0 l=1]  simple_return_internal

  Bootstrapped and regtested on x86_64-linux-gnu{-m32,}.
  Ok for trunk?

gcc/ChangeLog:

PR target/101846
* config/i386/sse.md (*avx2_zero_extendv16qiv16hi2_2): New
post_reload define_insn_and_split.
(*avx512bw_zero_extendv32qiv32hi2_2): Ditto.
(*sse4_1_zero_extendv8qiv8hi2_4): Ditto.
(*avx512f_zero_extendv16hiv16si2_2): Ditto.
(*avx2_zero_extendv8hiv8si2_2): Ditto.
(*sse4_1_zero_extendv4hiv4si2_4): Ditto.
(*avx512f_zero_extendv8siv8di2_2): Ditto.
(*avx2_zero_extendv4siv4di2_2): Ditto.
(*sse4_1_zero_extendv2siv2di2_4): Ditto.

gcc/testsuite/ChangeLog:

PR target/101846
* gcc.target/i386/pr101846-1.c: New test.
---
 gcc/config/i386/sse.md | 220 +
 gcc/testsuite/gcc.target/i386/pr101846-1.c |  95 +
 2 files changed, 315 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr101846-1.c

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index a46a2373547..6450c058458 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -673,8 +673,14 @@ (define_mode_iterator VI12_128 [V16QI V8HI])
 (define_mode_iterator VI14_128 [V16QI V4SI])
 (define_mode_iterator VI124_128 [V16QI V8HI V4SI])
 (define_mode_iterator VI24_128 [V8HI V4SI])
+(define_mode_iterator VI128_128 [V16QI V8HI V2DI])
 (define_mode_iterator VI248_128 [V8HI V4SI V2DI])
+(define_mode_iterator VI248_256 [V16HI V8SI V4DI])
+(define_mode_iterator VI248_512 [V32HI V16SI V8DI])
 (define_mode_iterator VI48_128 [V4SI V2DI])
+(define_mode_iterator VI148_512 [V64QI V16SI V8DI])
+(define_mode_iterator VI148_256 [V32QI V8SI V4DI])
+(define_mode_iterator VI148_128 [V16QI V4SI V2DI])
 
 ;; Various 256bit and 512 vector integer mode combinations
 (define_mode_iterator VI124_256 [V32QI V16HI V8SI])
@@ -18499,6 +18505,26 @@ (define_insn_and_split "*avx2_zero_extendv16qiv16hi2_1"
   operands[1] = lowpart_subreg (V16QImode, operands[1], V32QImode);
 })
 
+(define_insn_and_split "*avx2_zero_extendv16qiv16hi2_2"
+  [(set (match_operand:V32QI 0 "register_operand" "=v")
+   (vec_select:V32QI
+ (vec_concat:V64QI
+   (subreg:V32QI
+ (vec_concat:VI248_256
+   (match_operand: 1 "nonimmediate_operand" "vm")
+   (match_operand: 2 "const0_operand" "C")) 0)
+   (match_operand:V32QI 3 "const0_operand" "C"))
+ (match_parallel 4 "pmovzx_parallel"
+   [(match_operand 5 "const_int_operand" "n")])))]
+  "TARGET_AVX2"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0) (zero_extend:V16HI (match_dup 1)))]
+{
+  operands[0] = lowpart_subreg (V16HImode, operands[0], V32QImode);
+  operands[1] = lowpart_subreg (V16QImode, operands[1], mode);
+})
+
 (define_expand "v16qiv16hi2"
   [(set (match_operand:V16HI 0 "register_operand")
(any_extend:V16HI
@@ -18533,6 +18559,26 @@ (define_insn_and_split 
"*avx512bw_zero_extendv32qiv32hi2_1"
   operands[1] = lowpart_subreg (V32QImode, operands[1], V64QImode);
 })
 
+(define_insn_and_split "*avx512bw_zero_extendv32qiv32hi2_2"
+  [(set (match_operand:V64QI 0 "register_operand" "=v")
+   (vec_select:V64QI
+ (vec_concat:V128QI
+   (subreg:V64QI
+ (vec_concat:VI248_512
+   (match_operand: 1 "nonimmediate_operand" "vm")
+   (match_operand: 2 "const0_operand" "C")) 0)
+   (match_operand:V64QI 3 "const0_operand" "C"))
+ (match_parallel 4 "pmovzx_parallel"
+   [(match_operand 5 "const_int_operand" "n")])))]
+  "TARGET_AVX512BW"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0) (zero_extend:V32HI (match_dup 1)))]
+{
+  operands[0] = lowpart_subreg (V32HImode, operands[0], V64QImode);
+  operands[1] = lowpart_subreg (V32QImode, operands[1], mode);
+})
+
 (define_expand "v32qiv32hi2"
   [(set (match_operand:V32HI 0 "register_operand")
(any_extend:V32HI
@@ -18619,6 +18665,41 @@ (define_insn_and_split "*sse4_1_zero_extendv8qiv8hi2_3"
 }
   [(set_attr "isa" "noavx,noavx,avx")])
 
+(define_insn_and_split "*sse4_1_zero_extendv8qiv8hi2_4"
+  [(set (match_operand:V16QI 0 "register_operand" "=Yr,*x,Yw")
+   (vec_select:V16QI
+ (vec_concat:V32QI
+   (subreg:V16QI
+ (vec_concat:VI248_128
+   (match_operand: 1 "vector_operand" 
"YrBm,*xBm,Ywm")
+   (match_operand: 2 "const0_operand" "C,C,C")) 0)
+   (match_operand:V16QI 3 "const0_operand" "C,C,C"))
+ (match_parallel 4

Re: [PATCH][v2] Adjust volatile handling of the operand scanner

2021-08-11 Thread Richard Biener via Gcc-patches

On Tue, 10 Aug 2021, Eric Botcazou wrote:

> > The question is whether we instead want to amend build3 to
> > set TREE_THIS_VOLATILE automatically when the FIELD_DECL has
> > it set.  At least for the Fortran FE cases the gimplifier
> > fails to see some volatile references and thus can generate
> > wrong code which is a latent issue.
> 
> What do we do for other similar flags, e.g. TREE_READONLY?

build3 currently does no special processing for the FIELD_DECL operand,
it just sets TREE_THIS_VOLATILE from operand zero for tcc_references.

The C and C++ frontends have repeated patterns like

  ref = build3 (COMPONENT_REF, subtype, datum, subdatum,
NULL_TREE);
  SET_EXPR_LOCATION (ref, loc);
  if (TREE_READONLY (subdatum)
  || (use_datum_quals && TREE_READONLY (datum)))
TREE_READONLY (ref) = 1;
  if (TREE_THIS_VOLATILE (subdatum)
  || (use_datum_quals && TREE_THIS_VOLATILE (datum)))
TREE_THIS_VOLATILE (ref) = 1;

Leaving out TREE_READONLY shouldn't have any correctness issue.  It's
just that when adjusting the SSA operand scanner to correctly interpret
GENERIC that this uncovers pre-existing issues in the Fortran frontend
(one manifests in a testsuite FAIL - otherwise I wouldn't have noticed).

I'm fine with just plugging the Fortran FE holes as we discover them
but I did not check other frontends and testsuite coverage is weak.

Now - I wonder if there's a reason a frontend might _not_ want to
set TREE_THIS_VOLATILE on a COMPONENT_REF when the FIELD_DECL has
TREE_THIS_VOLATILE set.

I guess I'll do one more experiment and add verification that
TREE_THIS_VOLATILE on COMPONENT_REFs and FIELD_DECLs is consistent
and see where that trips.

Richard.

Re: [PATCH] Extend ldexp{s, d}f3 to vscalefs{s, d} when TARGET_AVX512F and TARGET_SSE_MATH.

2021-08-11 Thread Uros Bizjak via Gcc-patches

On Tue, Aug 10, 2021 at 2:13 PM liuhongt  wrote:
>
> Hi:
>   AVX512F supported vscalefs{s,d} which is the same as ldexp except the 
> second operand should be floating point.
>   Bootstrapped and regtested on x86_64-linux-gnu{-m32,}.
>
> gcc/ChangeLog:
>
> PR target/98309
> * config/i386/i386.md (ldexp3): Extend to vscalefs[sd]
> when TARGET_AVX512F and TARGET_SSE_MATH.
>
> gcc/testsuite/ChangeLog:
>
> PR target/98309
> * gcc.target/i386/pr98309-1.c: New test.
> * gcc.target/i386/pr98309-2.c: New test.

OK.

Thanks,
Uros.

> ---
>  gcc/config/i386/i386.md   | 34 +++-
>  gcc/testsuite/gcc.target/i386/pr98309-1.c | 18 +++
>  gcc/testsuite/gcc.target/i386/pr98309-2.c | 39 +++
>  3 files changed, 83 insertions(+), 8 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr98309-1.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr98309-2.c
>
> diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
> index bc1c30b77f4..56b09c566ed 100644
> --- a/gcc/config/i386/i386.md
> +++ b/gcc/config/i386/i386.md
> @@ -17914,17 +17914,35 @@ (define_expand "ldexp3"
>[(use (match_operand:MODEF 0 "register_operand"))
> (use (match_operand:MODEF 1 "general_operand"))
> (use (match_operand:SI 2 "register_operand"))]
> -  "TARGET_USE_FANCY_MATH_387
> -   && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
> -   || TARGET_MIX_SSE_I387)
> +  "((TARGET_USE_FANCY_MATH_387
> + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
> +|| TARGET_MIX_SSE_I387))
> +|| (TARGET_AVX512F && TARGET_SSE_MATH))
> && flag_unsafe_math_optimizations"
>  {
> -  rtx op0 = gen_reg_rtx (XFmode);
> -  rtx op1 = gen_reg_rtx (XFmode);
> +  /* Prefer avx512f version.  */
> +  if (TARGET_AVX512F && TARGET_SSE_MATH)
> +   {
> + rtx op2 = gen_reg_rtx (mode);
> + emit_insn (gen_floatsi2 (op2, operands[2]));
> + operands[0] = lowpart_subreg (mode, operands[0], 
> mode);
> + if (MEM_P (operands[1]))
> +   operands[1] = force_reg (mode, operands[1]);
> + operands[1] = lowpart_subreg (mode, operands[1], 
> mode);
> + op2 = lowpart_subreg (mode, op2, mode);
> + emit_insn (gen_avx512f_vmscalef (operands[0],
> +  operands[1],
> +  op2));
> +   }
> +  else
> +{
> +  rtx op0 = gen_reg_rtx (XFmode);
> +  rtx op1 = gen_reg_rtx (XFmode);
>
> -  emit_insn (gen_extendxf2 (op1, operands[1]));
> -  emit_insn (gen_ldexpxf3 (op0, op1, operands[2]));
> -  emit_insn (gen_truncxf2 (operands[0], op0));
> +  emit_insn (gen_extendxf2 (op1, operands[1]));
> +  emit_insn (gen_ldexpxf3 (op0, op1, operands[2]));
> +  emit_insn (gen_truncxf2 (operands[0], op0));
> +  }
>DONE;
>  })
>
> diff --git a/gcc/testsuite/gcc.target/i386/pr98309-1.c 
> b/gcc/testsuite/gcc.target/i386/pr98309-1.c
> new file mode 100644
> index 000..3a7afb58971
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr98309-1.c
> @@ -0,0 +1,18 @@
> +/* { dg-do compile } */
> +/* { dg-options "-mavx512f -O2 -mfpmath=sse -ffast-math" } */
> +/* { dg-final { scan-assembler-times "vcvtsi2s\[sd\]" "2" } } */
> +/* { dg-final { scan-assembler-times "vscalefs\[sd\]" "2" } } */
> +
> +double
> +__attribute__((noipa))
> +foo (double a, int b)
> +{
> +  return __builtin_ldexp (a, b);
> +}
> +
> +float
> +__attribute__((noipa))
> +foo2 (float a, int b)
> +{
> +  return __builtin_ldexpf (a, b);
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/pr98309-2.c 
> b/gcc/testsuite/gcc.target/i386/pr98309-2.c
> new file mode 100644
> index 000..ecfb9168b7d
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr98309-2.c
> @@ -0,0 +1,39 @@
> +/* { dg-do run } */
> +/* { dg-options "-mavx512f -O2 -mfpmath=sse -ffast-math" } */
> +/* { dg-require-effective-target avx512f } */
> +
> +#define AVX512F
> +#ifndef CHECK
> +#define CHECK "avx512f-helper.h"
> +#endif
> +
> +#include CHECK
> +
> +#include "pr98309-1.c"
> +
> +double
> +__attribute__((noipa, target("fpmath=387")))
> +foo_i387 (double a, int b)
> +{
> +  return __builtin_ldexp (a, b);
> +}
> +
> +float
> +__attribute__((noipa, target("fpmath=387")))
> +foo2_i387 (float a, int b)
> +{
> +  return __builtin_ldexpf (a, b);
> +}
> +
> +static void
> +test_512 (void)
> +{
> +  float fa = 14.5;
> +  double da = 44.5;
> +  int fb = 12;
> +  int db = 8;
> +  if (foo_i387 (da, db) != foo (da, db))
> +abort ();
> +  if (foo2_i387 (fa, fb) != foo2 (fa, fb))
> +abort ();
> +}
> --
> 2.27.0
>

99 matches

Mail list logo