[PATCH] [i386] Optimize vec_perm_expr to match vpmov{dw,qd,wb}.
Hi: This is another patch to optimize vec_perm_expr to match vpmov{dw,dq,wb} under AVX512. For scenarios(like pr101846-2.c) where the upper half is not used, this patch generates better code with only one vpmov{wb,dw,qd} instruction. For scenarios(like pr101846-3.c) where the upper half is actually used, if the src vector length is 256/512bits, the patch can still generate better code, but for 128bits, the code generation is worse. 128 bits upper half not used. - vpshufb .LC2(%rip), %xmm0, %xmm0 + vpmovdw %xmm0, %xmm0 128 bits upper half used. - vpshufb .LC2(%rip), %xmm0, %xmm0 + vpmovdw %xmm0, %xmm1 + vmovq %xmm1, %rax + vpinsrq $0, %rax, %xmm0, %xmm0 Maybe expand_vec_perm_trunc_vinsert should only deal with 256/512bits of vectors, but considering the real use of scenarios like pr101846-3.c foo_*_128 possibility is relatively low, I still keep this part of the code. Bootstrapped and regtested on x86_64-linux-gnu{-m32,}. Ok for trunk? gcc/ChangeLog: PR target/101846 * config/i386/i386-expand.c (expand_vec_perm_trunc_vinsert): New function. (ix86_vectorize_vec_perm_const): Call expand_vec_perm_trunc_vinsert. * config/i386/sse.md (vec_set_lo_v32hi): New define_insn. (vec_set_lo_v64qi): Ditto. (vec_set_lo_): Extend to no-avx512dq. gcc/testsuite/ChangeLog: PR target/101846 * gcc.target/i386/pr101846-2.c: New test. * gcc.target/i386/pr101846-3.c: New test. --- gcc/config/i386/i386-expand.c | 125 + gcc/config/i386/sse.md | 60 +- gcc/testsuite/gcc.target/i386/pr101846-2.c | 81 + gcc/testsuite/gcc.target/i386/pr101846-3.c | 95 4 files changed, 359 insertions(+), 2 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr101846-2.c create mode 100644 gcc/testsuite/gcc.target/i386/pr101846-3.c diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c index bd21efa9530..519caac2e15 100644 --- a/gcc/config/i386/i386-expand.c +++ b/gcc/config/i386/i386-expand.c @@ -18317,6 +18317,126 @@ expand_vec_perm_1 (struct expand_vec_perm_d *d) return false; } +/* A subroutine of ix86_expand_vec_perm_const_1. Try to implement D + in terms of a pair of vpmovdw + vinserti128 instructions. */ +static bool +expand_vec_perm_trunc_vinsert (struct expand_vec_perm_d *d) +{ + unsigned i, nelt = d->nelt, mask = d->nelt - 1; + unsigned half = nelt / 2; + machine_mode half_mode, trunc_mode; + + /* vpmov{wb,dw,qd} only available under AVX512. */ + if (!d->one_operand_p || !TARGET_AVX512F + || (!TARGET_AVX512VL && GET_MODE_SIZE (d->vmode) < 64) + || GET_MODE_SIZE (GET_MODE_INNER (d->vmode)) > 4) +return false; + + /* TARGET_AVX512BW is needed for vpmovwb. */ + if (GET_MODE_INNER (d->vmode) == E_QImode && !TARGET_AVX512BW) +return false; + + for (i = 0; i < nelt; i++) +{ + unsigned idx = d->perm[i] & mask; + if (idx != i * 2 && i < half) + return false; + if (idx != i && i >= half) + return false; +} + + rtx (*gen_trunc) (rtx, rtx) = NULL; + rtx (*gen_vec_set_lo) (rtx, rtx, rtx) = NULL; + switch (d->vmode) +{ +case E_V16QImode: + gen_trunc = gen_truncv8hiv8qi2; + gen_vec_set_lo = gen_vec_setv2di; + half_mode = V8QImode; + trunc_mode = V8HImode; + break; +case E_V32QImode: + gen_trunc = gen_truncv16hiv16qi2; + gen_vec_set_lo = gen_vec_set_lo_v32qi; + half_mode = V16QImode; + trunc_mode = V16HImode; + break; +case E_V64QImode: + gen_trunc = gen_truncv32hiv32qi2; + gen_vec_set_lo = gen_vec_set_lo_v64qi; + half_mode = V32QImode; + trunc_mode = V32HImode; + break; +case E_V8HImode: + gen_trunc = gen_truncv4siv4hi2; + gen_vec_set_lo = gen_vec_setv2di; + half_mode = V4HImode; + trunc_mode = V4SImode; + break; +case E_V16HImode: + gen_trunc = gen_truncv8siv8hi2; + gen_vec_set_lo = gen_vec_set_lo_v16hi; + half_mode = V8HImode; + trunc_mode = V8SImode; + break; +case E_V32HImode: + gen_trunc = gen_truncv16siv16hi2; + gen_vec_set_lo = gen_vec_set_lo_v32hi; + half_mode = V16HImode; + trunc_mode = V16SImode; + break; +case E_V4SImode: + gen_trunc = gen_truncv2div2si2; + gen_vec_set_lo = gen_vec_setv2di; + half_mode = V2SImode; + trunc_mode = V2DImode; + break; +case E_V8SImode: + gen_trunc = gen_truncv4div4si2; + gen_vec_set_lo = gen_vec_set_lo_v8si; + half_mode = V4SImode; + trunc_mode = V4DImode; + break; +case E_V16SImode: + gen_trunc = gen_truncv8div8si2; + gen_vec_set_lo = gen_vec_set_lo_v16si; + half_mode = V8SImode; + trunc_mode = V8DImode; + break; + +default: + break; +} + + if (gen_trunc == NULL) +return false; + + rtx op_half
Re: [PATCH] [i386] Introduce a scalar version of avx512f_vmscalef and adjust ldexp3 for it.
On Thu, Aug 12, 2021 at 12:05 PM liuhongt wrote: > > Hi: > This is the patch i'm going to checkin. > Bootstrapped and regtested on x86_64-linux-gnu{-m32,}; > > > 2021-08-12 Uros Bizjak > > gcc/ChangeLog: > > PR target/98309 > * config/i386/i386.md (avx512f_scalef2): New > define_insn. > (ldexp3): Adjust for new define_insn. > (UNSPEC_SCALEF): Move from sse.md. > * config/i386/sse.md (UNSPEC_SCALEF): Move to i386.md. > --- > gcc/config/i386/i386.md | 27 +++ > gcc/config/i386/sse.md | 1 - > 2 files changed, 19 insertions(+), 9 deletions(-) > > diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md > index 56b09c566ed..4a8e8fea290 100644 > --- a/gcc/config/i386/i386.md > +++ b/gcc/config/i386/i386.md > @@ -125,6 +125,9 @@ (define_c_enum "unspec" [ >UNSPEC_RSQRT >UNSPEC_PSADBW > > + ;; For AVX512F support > + UNSPEC_SCALEF > + >;; Generic math support >UNSPEC_COPYSIGN >UNSPEC_XORSIGN > @@ -17894,6 +17897,17 @@ (define_expand "expm12" >DONE; > }) > > +(define_insn "avx512f_scalef2" > + [(set (match_operand:MODEF 0 "register_operand" "=v") > + (unspec:MODEF > + [(match_operand:MODEF 1 "register_operand" "v") > + (match_operand:MODEF 2 "nonimmediate_operand" "vm")] > + UNSPEC_SCALEF))] > + "TARGET_AVX512F" > + "vscalef\t{%2, %1, %0|%0, %1, %2}" > + [(set_attr "prefix" "evex") > + (set_attr "mode" "")]) > + > (define_expand "ldexpxf3" >[(match_operand:XF 0 "register_operand") > (match_operand:XF 1 "register_operand") > @@ -17924,15 +17938,12 @@ (define_expand "ldexp3" >if (TARGET_AVX512F && TARGET_SSE_MATH) > { > rtx op2 = gen_reg_rtx (mode); > - emit_insn (gen_floatsi2 (op2, operands[2])); > - operands[0] = lowpart_subreg (mode, operands[0], > mode); > - if (MEM_P (operands[1])) > + > + if (!nonimmediate_operand (operands[1], mode)) > operands[1] = force_reg (mode, operands[1]); > - operands[1] = lowpart_subreg (mode, operands[1], > mode); > - op2 = lowpart_subreg (mode, op2, mode); > - emit_insn (gen_avx512f_vmscalef (operands[0], > - operands[1], > - op2)); > + > + emit_insn (gen_floatsi2 (op2, operands[2])); > + emit_insn (gen_avx512f_scalef2 (operands[0], operands[1], op2)); > } >else > { > diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md > index 3957c86c3df..9233dfc6150 100644 > --- a/gcc/config/i386/sse.md > +++ b/gcc/config/i386/sse.md > @@ -92,7 +92,6 @@ (define_c_enum "unspec" [ >UNSPEC_RCP14 >UNSPEC_RSQRT14 >UNSPEC_FIXUPIMM > - UNSPEC_SCALEF >UNSPEC_VTERNLOG >UNSPEC_GETEXP >UNSPEC_GETMANT > -- > 2.18.1 > Please ignore this, have replied in another thread. -- BR, Hongtao
Re: [PATCH] Extend ldexp{s, d}f3 to vscalefs{s, d} when TARGET_AVX512F and TARGET_SSE_MATH.
On Wed, Aug 11, 2021 at 7:16 PM Uros Bizjak wrote: > > On Wed, Aug 11, 2021 at 8:36 AM Uros Bizjak wrote: > > > > On Tue, Aug 10, 2021 at 2:13 PM liuhongt wrote: > > > > > > Hi: > > > AVX512F supported vscalefs{s,d} which is the same as ldexp except the > > > second operand should be floating point. > > > Bootstrapped and regtested on x86_64-linux-gnu{-m32,}. > > > > > > gcc/ChangeLog: > > > > > > PR target/98309 > > > * config/i386/i386.md (ldexp3): Extend to vscalefs[sd] > > > when TARGET_AVX512F and TARGET_SSE_MATH. > > > > > > gcc/testsuite/ChangeLog: > > > > > > PR target/98309 > > > * gcc.target/i386/pr98309-1.c: New test. > > > * gcc.target/i386/pr98309-2.c: New test. > > > > OK. > > Actually, we should introduce a scalar version of avx512f_vmscalef, so > we can avoid all subreg conversions with the vector-merge (VM) > version, and will also allow memory in operand 2. > > Please test the attached incremental patch. > Bootstrapped and regtested on x86_64-linux-gnu{-m32,} on CLX. tests is fine. > Uros. -- BR, Hongtao
[PATCH] [i386] Introduce a scalar version of avx512f_vmscalef and adjust ldexp3 for it.
Hi: This is the patch i'm going to checkin. Bootstrapped and regtested on x86_64-linux-gnu{-m32,}; 2021-08-12 Uros Bizjak gcc/ChangeLog: PR target/98309 * config/i386/i386.md (avx512f_scalef2): New define_insn. (ldexp3): Adjust for new define_insn. (UNSPEC_SCALEF): Move from sse.md. * config/i386/sse.md (UNSPEC_SCALEF): Move to i386.md. --- gcc/config/i386/i386.md | 27 +++ gcc/config/i386/sse.md | 1 - 2 files changed, 19 insertions(+), 9 deletions(-) diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 56b09c566ed..4a8e8fea290 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -125,6 +125,9 @@ (define_c_enum "unspec" [ UNSPEC_RSQRT UNSPEC_PSADBW + ;; For AVX512F support + UNSPEC_SCALEF + ;; Generic math support UNSPEC_COPYSIGN UNSPEC_XORSIGN @@ -17894,6 +17897,17 @@ (define_expand "expm12" DONE; }) +(define_insn "avx512f_scalef2" + [(set (match_operand:MODEF 0 "register_operand" "=v") + (unspec:MODEF + [(match_operand:MODEF 1 "register_operand" "v") + (match_operand:MODEF 2 "nonimmediate_operand" "vm")] + UNSPEC_SCALEF))] + "TARGET_AVX512F" + "vscalef\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "prefix" "evex") + (set_attr "mode" "")]) + (define_expand "ldexpxf3" [(match_operand:XF 0 "register_operand") (match_operand:XF 1 "register_operand") @@ -17924,15 +17938,12 @@ (define_expand "ldexp3" if (TARGET_AVX512F && TARGET_SSE_MATH) { rtx op2 = gen_reg_rtx (mode); - emit_insn (gen_floatsi2 (op2, operands[2])); - operands[0] = lowpart_subreg (mode, operands[0], mode); - if (MEM_P (operands[1])) + + if (!nonimmediate_operand (operands[1], mode)) operands[1] = force_reg (mode, operands[1]); - operands[1] = lowpart_subreg (mode, operands[1], mode); - op2 = lowpart_subreg (mode, op2, mode); - emit_insn (gen_avx512f_vmscalef (operands[0], - operands[1], - op2)); + + emit_insn (gen_floatsi2 (op2, operands[2])); + emit_insn (gen_avx512f_scalef2 (operands[0], operands[1], op2)); } else { diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 3957c86c3df..9233dfc6150 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -92,7 +92,6 @@ (define_c_enum "unspec" [ UNSPEC_RCP14 UNSPEC_RSQRT14 UNSPEC_FIXUPIMM - UNSPEC_SCALEF UNSPEC_VTERNLOG UNSPEC_GETEXP UNSPEC_GETMANT -- 2.18.1
Re: [PATCH] Fix loop split incorrect count and probability
On 2021/8/11 17:16, Richard Biener wrote: On Wed, 11 Aug 2021, Xionghu Luo wrote: On 2021/8/10 22:47, Richard Biener wrote: On Mon, 9 Aug 2021, Xionghu Luo wrote: Thanks, On 2021/8/6 19:46, Richard Biener wrote: On Tue, 3 Aug 2021, Xionghu Luo wrote: loop split condition is moved between loop1 and loop2, the split bb's count and probability should also be duplicated instead of (100% vs INV), secondly, the original loop1 and loop2 count need be propotional from the original loop. diff base/loop-cond-split-1.c.151t.lsplit patched/loop-cond-split-1.c.151t.lsplit: ... int prephitmp_16; int prephitmp_25; [local count: 118111600]: if (n_7(D) > 0) goto ; [89.00%] else goto ; [11.00%] [local count: 118111600]: return; [local count: 105119324]: pretmp_3 = ga; - [local count: 955630225]: + [local count: 315357973]: # i_13 = PHI # prephitmp_12 = PHI if (prephitmp_12 != 0) goto ; [33.00%] else goto ; [67.00%] - [local count: 315357972]: + [local count: 104068130]: _2 = do_something (); ga = _2; - [local count: 955630225]: + [local count: 315357973]: # prephitmp_5 = PHI i_10 = inc (i_13); if (n_7(D) > i_10) goto ; [89.00%] else goto ; [11.00%] [local count: 105119324]: goto ; [100.00%] - [local count: 850510901]: + [local count: 280668596]: if (prephitmp_12 != 0) -goto ; [100.00%] +goto ; [33.00%] else -goto ; [INV] +goto ; [67.00%] - [local count: 850510901]: + [local count: 280668596]: goto ; [100.00%] - [count: 0]: + [local count: 70429947]: # i_23 = PHI # prephitmp_25 = PHI - [local count: 955630225]: + [local count: 640272252]: # i_15 = PHI # prephitmp_16 = PHI i_22 = inc (i_15); if (n_7(D) > i_22) goto ; [89.00%] else goto ; [11.00%] - [local count: 850510901]: + [local count: 569842305]: goto ; [100.00%] } gcc/ChangeLog: * tree-ssa-loop-split.c (split_loop): Fix incorrect probability. (do_split_loop_on_cond): Likewise. --- gcc/tree-ssa-loop-split.c | 16 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/gcc/tree-ssa-loop-split.c b/gcc/tree-ssa-loop-split.c index 3a09bbc39e5..8e5a7ded0f7 100644 --- a/gcc/tree-ssa-loop-split.c +++ b/gcc/tree-ssa-loop-split.c @@ -583,10 +583,10 @@ split_loop (class loop *loop1) basic_block cond_bb; if (!initial_true) - cond = fold_build1 (TRUTH_NOT_EXPR, boolean_type_node, cond); + cond = fold_build1 (TRUTH_NOT_EXPR, boolean_type_node, cond); + + edge true_edge = EDGE_SUCC (bbs[i], 0)->flags & EDGE_TRUE_VALUE + ? EDGE_SUCC (bbs[i], 0) + : EDGE_SUCC (bbs[i], 1); class loop *loop2 = loop_version (loop1, cond, _bb, - profile_probability::always (), - profile_probability::always (), - profile_probability::always (), - profile_probability::always (), + true_edge->probability, + true_edge->probability.invert (), + true_edge->probability, + true_edge->probability.invert (), true); there is no 'true_edge' variable at this point. Sorry, missed the above hunk when split the patch. gcc_assert (loop2); @@ -1486,10 +1486,10 @@ do_split_loop_on_cond (struct loop *loop1, edge invar_branch) initialize_original_copy_tables (); struct loop *loop2 = loop_version (loop1, boolean_true_node, NULL, -profile_probability::always (), -profile_probability::never (), -profile_probability::always (), -profile_probability::always (), +invar_branch->probability.invert (), +invar_branch->probability, +invar_branch->probability.invert (), +invar_branch->probability, true); if (!loop2) { The patch introduction seems to talk about do_split_loop_on_cond only. split_loop faces similar issue though it sets the two branches to 100% vs 100% and no scaling which seems also incorrect. Since loop versioning inserts a condition with the passed probabilities but in this case a 'boolean_true_node' condition the then and else probabilities passed look correct. It's just the scaling
Re: [patch] Make -no-pie option work for native Windows
On 8/11/21 2:21 PM, Eric Botcazou wrote: Hi, as already mentioned on the list, binutils 2.36 generates PIE executables by default on native Windows (because --dynamicbase is the default) so it makes sense to have a simple way to counter that and -no-pie seems appropriate, all the more so that it is automatically passed when building the compiler. Bootstrapped on x86 and x86-64/Windows, w/ and w/o binutils 2.36, OK for the mainline and 11 branch? 2021-08-11 Eric Botcazou * configure.ac (PE linker --disable-dynamicbase support): New check. * configure: Regenerate. * config.in: Likewise. * config/i386/mingw32.h (LINK_SPEC_DISABLE_DYNAMICBASE): New define. (LINK_SPEC): Use it. * config/i386/mingw-w64.h (LINK_SPEC_DISABLE_DYNAMICBASE): Likewise. (LINK_SPEC): Likewise. Looks good to me. Do you have push permissions? OpenPGP_0x713B5FE29C145D45.asc Description: OpenPGP public key OpenPGP_signature Description: OpenPGP digital signature
Re: [PATCH] rs6000: Make some BIFs vectorized on P10
Hi Bill, Thanks for your prompt review! on 2021/8/12 上午12:34, Bill Schmidt wrote: > Hi Kewen, > > FWIW, it's easier on reviewers if you include the patch inline instead of as > an attachment. > > On 8/11/21 1:56 AM, Kewen.Lin wrote: >> Hi, >> >> This patch is to add the support to make vectorizer able to >> vectorize scalar version of some built-in functions with its >> corresponding vector version with Power10 support. >> >> Bootstrapped & regtested on powerpc64le-linux-gnu {P9,P10} >> and powerpc64-linux-gnu P8. >> >> Is it ok for trunk? >> >> BR, >> Kewen >> - >> gcc/ChangeLog: >> >> * config/rs6000/rs6000.c (rs6000_builtin_md_vectorized_function): Add >> support for some built-in functions vectorized on Power10. >> >> gcc/testsuite/ChangeLog: >> >> * gcc.target/powerpc/dive-vectorize-1.c: New test. >> * gcc.target/powerpc/dive-vectorize-1.h: New test. >> * gcc.target/powerpc/dive-vectorize-2.c: New test. >> * gcc.target/powerpc/dive-vectorize-2.h: New test. >> * gcc.target/powerpc/dive-vectorize-run-1.c: New test. >> * gcc.target/powerpc/dive-vectorize-run-2.c: New test. >> * gcc.target/powerpc/p10-bifs-vectorize-1.c: New test. >> * gcc.target/powerpc/p10-bifs-vectorize-1.h: New test. >> * gcc.target/powerpc/p10-bifs-vectorize-run-1.c: New test. > > --- > gcc/config/rs6000/rs6000.c| 55 +++ > .../gcc.target/powerpc/dive-vectorize-1.c | 11 > .../gcc.target/powerpc/dive-vectorize-1.h | 22 > .../gcc.target/powerpc/dive-vectorize-2.c | 12 > .../gcc.target/powerpc/dive-vectorize-2.h | 22 > .../gcc.target/powerpc/dive-vectorize-run-1.c | 52 ++ > .../gcc.target/powerpc/dive-vectorize-run-2.c | 53 ++ > .../gcc.target/powerpc/p10-bifs-vectorize-1.c | 15 + > .../gcc.target/powerpc/p10-bifs-vectorize-1.h | 40 ++ > .../powerpc/p10-bifs-vectorize-run-1.c| 45 +++ > 10 files changed, 327 insertions(+) > create mode 100644 gcc/testsuite/gcc.target/powerpc/dive-vectorize-1.c > create mode 100644 gcc/testsuite/gcc.target/powerpc/dive-vectorize-1.h > create mode 100644 gcc/testsuite/gcc.target/powerpc/dive-vectorize-2.c > create mode 100644 gcc/testsuite/gcc.target/powerpc/dive-vectorize-2.h > create mode 100644 gcc/testsuite/gcc.target/powerpc/dive-vectorize-run-1.c > create mode 100644 gcc/testsuite/gcc.target/powerpc/dive-vectorize-run-2.c > create mode 100644 gcc/testsuite/gcc.target/powerpc/p10-bifs-vectorize-1.c > create mode 100644 gcc/testsuite/gcc.target/powerpc/p10-bifs-vectorize-1.h > create mode 100644 > gcc/testsuite/gcc.target/powerpc/p10-bifs-vectorize-run-1.c > > diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c > index 279f00cc648..3eac1d05101 100644 > --- a/gcc/config/rs6000/rs6000.c > +++ b/gcc/config/rs6000/rs6000.c > @@ -5785,6 +5785,61 @@ rs6000_builtin_md_vectorized_function (tree fndecl, > tree type_out, > default: >break; > } > + > + machine_mode in_vmode = TYPE_MODE (type_in); > + machine_mode out_vmode = TYPE_MODE (type_out); > + > + /* Power10 supported vectorized built-in functions. */ > + if (TARGET_POWER10 > + && in_vmode == out_vmode > + && VECTOR_UNIT_ALTIVEC_OR_VSX_P (in_vmode)) > +{ > + machine_mode exp_mode = DImode; > + machine_mode exp_vmode = V2DImode; > + enum rs6000_builtins vname = RS6000_BUILTIN_COUNT; > > Using this as a flag value looks unnecessary. Is this just being done to > silence a warning? > Good question! I didn't notice there is a warning or not, just get used to initializing variable with one suitable value if possible. If you don't mind, may I still keep it? Since if some future codes use vname in a path where it's not assigned, one explicitly wrong enum (bif) seems better than a random one. Or will this mentioned possibility definitely never happen since the current uninitialized variables detection and warning scheme is robust and should not worry about that completely? > + switch (fn) > + { > + case MISC_BUILTIN_DIVWE: > + case MISC_BUILTIN_DIVWEU: > + exp_mode = SImode; > + exp_vmode = V4SImode; > + if (fn == MISC_BUILTIN_DIVWE) > + vname = P10V_BUILTIN_DIVES_V4SI; > + else > + vname = P10V_BUILTIN_DIVEU_V4SI; > + break; > + case MISC_BUILTIN_DIVDE: > + case MISC_BUILTIN_DIVDEU: > + if (fn == MISC_BUILTIN_DIVDE) > + vname = P10V_BUILTIN_DIVES_V2DI; > + else > + vname = P10V_BUILTIN_DIVEU_V2DI; > + break; > + case P10_BUILTIN_CFUGED: > + vname = P10V_BUILTIN_VCFUGED; > + break; > + case P10_BUILTIN_CNTLZDM: > + vname = P10V_BUILTIN_VCLZDM; > + break; > + case P10_BUILTIN_CNTTZDM: > + vname = P10V_BUILTIN_VCTZDM; > + break; > + case P10_BUILTIN_PDEPD: > + vname = P10V_BUILTIN_VPDEPD; > +
Re: [PATCH] c++: constexpr std::construct_at on empty field [PR101663]
On 8/3/21 4:04 PM, Patrick Palka wrote: Here during constexpr evaluation of std::construct_at(_M_value) we find ourselves in cxx_eval_store_expression where the target object is 'a._M_value' and the initializer is {}. Since _M_value is an empty [[no_unique_address]] member we don't create a sub-CONSTRUCTOR for it, so we end up in the early exit code path for empty stores with mismatched types and we trip over the assert therein gcc_assert (is_empty_class (TREE_TYPE (init)) && !lval); because lval is true. The reason it's true is because the INIT_EXPR in question is the LHS of a COMPOUND_EXPR, and evaluation of the LHS is always performed with lval=true for some reason. This is the case ever since r5-5900, before which we used to do the evaluation with lval=false. I'm not sure why we evaluate the LHS of a COMPOUND_EXPR with lval=true Because there's no lvalue-rvalue conversion. We could change that bool to be a tri-value enum that also includes discarded-value expressions such as this, but that hasn't seemed necessary. (changing it to false survives bootstrap+regtest and is sufficient to fix the PR), but regardless it's also straightforward enough to make the relevant code path in cxx_eval_store_expression handle lval=true, which is the approach this patch takes. This patch also consolidates the duplicate implementations of std::construct_at/destroy_at from some of the C++20 constexpr tests into a common header file. Bootstrapped and regtested on x86_64-pc-linux-gnu, does this look OK for trunk/11? PR c++/101663 gcc/cp/ChangeLog: * constexpr.c (cxx_eval_store_expression): In the early exit code path for mismatched types, Pass false instead of true for lval when evaluating the LHS. gcc/testsuite/ChangeLog: * g++.dg/cpp2a/construct_at.h: New convenience header that defines minimal implementations of std::construct_at/destroy_at, split out from ... * g++.dg/cpp2a/constexpr-new5.C: ... here. * g++.dg/cpp2a/constexpr-new6.C: Use the header. * g++.dg/cpp2a/constexpr-new14.C: Likewise. * g++.dg/cpp2a/constexpr-new20.C: New test. --- gcc/cp/constexpr.c | 4 +- gcc/testsuite/g++.dg/cpp2a/constexpr-new14.C | 60 +- gcc/testsuite/g++.dg/cpp2a/constexpr-new20.C | 18 ++ gcc/testsuite/g++.dg/cpp2a/constexpr-new5.C | 60 +- gcc/testsuite/g++.dg/cpp2a/constexpr-new6.C | 64 +--- gcc/testsuite/g++.dg/cpp2a/construct_at.h| 62 +++ 6 files changed, 85 insertions(+), 183 deletions(-) create mode 100644 gcc/testsuite/g++.dg/cpp2a/constexpr-new20.C create mode 100644 gcc/testsuite/g++.dg/cpp2a/construct_at.h diff --git a/gcc/cp/constexpr.c b/gcc/cp/constexpr.c index 1af365d47b9..25d84a377d8 100644 --- a/gcc/cp/constexpr.c +++ b/gcc/cp/constexpr.c @@ -5588,8 +5588,8 @@ cxx_eval_store_expression (const constexpr_ctx *ctx, tree t, argument, which has the derived type rather than the base type. In this situation, just evaluate the initializer and return, since there's no actual data to store. */ - gcc_assert (is_empty_class (TREE_TYPE (init)) && !lval); - return init; + gcc_assert (is_empty_class (TREE_TYPE (init))); + return lval ? target : init; } CONSTRUCTOR_ELTS (*valp) = CONSTRUCTOR_ELTS (init); TREE_CONSTANT (*valp) = TREE_CONSTANT (init); diff --git a/gcc/testsuite/g++.dg/cpp2a/constexpr-new14.C b/gcc/testsuite/g++.dg/cpp2a/constexpr-new14.C index fd6f6075ef0..26037397b1d 100644 --- a/gcc/testsuite/g++.dg/cpp2a/constexpr-new14.C +++ b/gcc/testsuite/g++.dg/cpp2a/constexpr-new14.C @@ -1,65 +1,7 @@ // PR c++/97195 // { dg-do compile { target c++20 } } -namespace std -{ - typedef __SIZE_TYPE__ size_t; - - template - struct allocator - { -constexpr allocator () noexcept {} - -constexpr T *allocate (size_t n) -{ return static_cast (::operator new (n * sizeof(T))); } - -constexpr void -deallocate (T *p, size_t n) -{ ::operator delete (p); } - }; - - template - U __declval (int); - template - T __declval (long); - template - auto declval () noexcept -> decltype (__declval (0)); - - template - struct remove_reference - { typedef T type; }; - template - struct remove_reference - { typedef T type; }; - template - struct remove_reference - { typedef T type; }; - - template - constexpr T && - forward (typename std::remove_reference::type ) noexcept - { return static_cast (t); } - - template - constexpr T && - forward (typename std::remove_reference::type &) noexcept - { return static_cast (t); } - - template - constexpr auto - construct_at (T *l, A &&... a) - noexcept (noexcept (::new ((void *) 0) T (std::declval ()...))) - -> decltype (::new ((void *) 0) T (std::declval ()...)) - { return ::new ((void *) l) T (std::forward (a)...); } - -
[committed] libstdc++: Fix test that fails randomly [PR101866]
This test assumes that the same sequence of three values cannot occur, which is incorect. It's unlikely, but not impossible. Perform the check in a loop, so that in the unlikely event of an identical sequence, we retry. If the library code is buggy it will keep producing the same sequence and the test will time out. If the code is working correctly then we will usually break out of the loop after one iteration, or very rarely after two or three. libstdc++-v3/ChangeLog: PR libstdc++/101866 * testsuite/experimental/random/randint.cc: Loop and retry if reseed() produces the same sequence. Tested x86_64-linux. Committed to trunk. I'll backport too. commit 93f1dbc7cdcc4b31ea4061efb4c2acf2d4f81eb8 Author: Jonathan Wakely Date: Wed Aug 11 22:11:19 2021 libstdc++: Fix test that fails randomly [PR101866] This test assumes that the same sequence of three values cannot occur, which is incorect. It's unlikely, but not impossible. Perform the check in a loop, so that in the unlikely event of an identical sequence, we retry. If the library code is buggy it will keep producing the same sequence and the test will time out. If the code is working correctly then we will usually break out of the loop after one iteration, or very rarely after two or three. libstdc++-v3/ChangeLog: PR libstdc++/101866 * testsuite/experimental/random/randint.cc: Loop and retry if reseed() produces the same sequence. diff --git a/libstdc++-v3/testsuite/experimental/random/randint.cc b/libstdc++-v3/testsuite/experimental/random/randint.cc index d6225eba1df..e05151e5ea0 100644 --- a/libstdc++-v3/testsuite/experimental/random/randint.cc +++ b/libstdc++-v3/testsuite/experimental/random/randint.cc @@ -34,7 +34,7 @@ test01() } std::experimental::reseed(99u); - const long n1[] = { + const int n1[] = { std::experimental::randint(0, 100), std::experimental::randint(0, 100), std::experimental::randint(0, 100), @@ -42,7 +42,7 @@ test01() std::experimental::randint(0, 100) }; std::experimental::reseed(99u); - const long n2[] = { + const int n2[] = { std::experimental::randint(0, 100), std::experimental::randint(0, 100), std::experimental::randint(0, 100), @@ -52,13 +52,13 @@ test01() for (int i = 0; i < 5; ++i) VERIFY( n1[i] == n2[i] ); - std::experimental::reseed(); - const long n3[] = { -std::experimental::randint(0, 100), -std::experimental::randint(0, 100), -std::experimental::randint(0, 100) - }; - VERIFY( !(n3[0] == n1[0] && n3[1] == n1[1] && n3[2] == n1[2]) ); + do + { +std::experimental::reseed(); + } + while (std::experimental::randint(0, 100) == n1[0] + && std::experimental::randint(0, 100) == n1[1] + && std::experimental::randint(0, 100) == n1[2]); } void
Re: [PATCH] c++: suppress all warnings on memper pointers to work around dICE [PR101219]
On Wed, 11 Aug 2021 15:19:58 -0400 Jason Merrill wrote: > On 8/6/21 11:34 AM, Sergei Trofimovich wrote: > > On Thu, 29 Jul 2021 11:41:39 -0400 > > Jason Merrill wrote: > > > >> On 7/22/21 7:15 PM, Sergei Trofimovich wrote: > >>> From: Sergei Trofimovich > >>> > >>> r12-1804 ("cp: add support for per-location warning groups.") among other > >>> things removed warning suppression from a few places including > >>> ptrmemfuncs. > >>> > >>> Currently ptrmemfuncs don't have valid BINFO attached which causes ICEs > >>> in access checks: > >>> > >>> crash_signal > >>> gcc/toplev.c:328 > >>> perform_or_defer_access_check(tree_node*, tree_node*, tree_node*, > >>> int, access_failure_info*) > >>> gcc/cp/semantics.c:490 > >>> finish_non_static_data_member(tree_node*, tree_node*, tree_node*) > >>> gcc/cp/semantics.c:2208 > >>> ... > >>> > >>> The change suppresses warnings again until we provide BINFOs for > >>> ptrmemfuncs. > >> > >> We don't need BINFOs for PMFs, we need to avoid paths that expect them. > >> > >> It looks like the problem is with tsubst_copy_and_build calling > >> finish_non_static_data_member instead of build_ptrmemfunc_access_expr. > > > > Sounds good. I'm not sure what would be the best way to match it. Here is > > my attempt seems to survive all regtests: > > > > --- a/gcc/cp/pt.c > > +++ b/gcc/cp/pt.c > > @@ -20530,7 +20530,13 @@ tsubst_copy_and_build (tree t, > > if (member == error_mark_node) > >RETURN (error_mark_node); > > > > - if (TREE_CODE (member) == FIELD_DECL) > > + if (object_type && TYPE_PTRMEMFUNC_P(object_type) > > + && TREE_CODE (member) == FIELD_DECL) > > + { > > + r = build_ptrmemfunc_access_expr (object, DECL_NAME(member)); > > + RETURN (r); > > + } > > + else if (TREE_CODE (member) == FIELD_DECL) > >{ > > r = finish_non_static_data_member (member, object, NULL_TREE); > > if (TREE_CODE (r) == COMPONENT_REF) > > > >>> PR c++/101219 > >>> > >>> gcc/cp/ChangeLog: > >>> > >>> * typeck.c (build_ptrmemfunc_access_expr): Suppress all warnings > >>> to avoid ICE. > >>> > >>> gcc/testsuite/ChangeLog: > >>> > >>> * g++.dg/torture/pr101219.C: New test. > >> > >> This doesn't need to be in torture; it has nothing to do with > >> optimization. > > > > Aha, moved to gcc/testsuite/g++.dg/warn/pr101219.C. > > > > --- /dev/null > > +++ b/gcc/testsuite/g++.dg/warn/pr101219.C > > @@ -0,0 +1,11 @@ > > +/* PR c++/101219 - ICE on use of uninitialized memfun pointer > > + { dg-do compile } > > + { dg-options "-Wall" } */ > > + > > +struct S { void m(); }; > > + > > +template bool f() { > > + void (S::*mp)(); > > + > > + return ::m == mp; // no warning emitted here (no instantiation) > > +} > > > > Another question: Is it expected that gcc generates no warnings here? > > It's an uninstantiated function (-1 for warn), but from what I > > understand it's guaranteed to generate comparison with uninitialized > > data if it ever gets instantiated. Given that we used to ICE in > > warning code gcc could possibly flag it? (+1 for warn) > > Generally it's desirable to diagnose templates for which no valid > instantiation is possible. It seems reasonable in most cases to also > warn about templates for which all instantiations would warn. > > But uninitialized warnings rely on flow analysis that we only do on > instantiated functions, and in any case the ICE doesn't depend on mp > being uninitialized; I get the same crash if I add = 0 to the declaration. Aha. That makes sense. Let's just fix ICE then. > > + if (object_type && TYPE_PTRMEMFUNC_P(object_type) > > Missing space before (. > > > + && TREE_CODE (member) == FIELD_DECL) > > + { > > + r = build_ptrmemfunc_access_expr (object, DECL_NAME(member)); > > And here. Added both. Attached as v3. -- Sergei >From dbb17a22383faa7837bdd2ea9c902bfab53fa8f2 Mon Sep 17 00:00:00 2001 From: Sergei Trofimovich Date: Fri, 6 Aug 2021 16:14:16 +0100 Subject: [PATCH v3] c++: fix ptrmemfunc template instantiation [PR101219] r12-1804 ("cp: add support for per-location warning groups.") among other things removed warning suppression from a few places including ptrmemfuncs. This exposed a bug in warning detection code as a reference to missing BINFO (it's intentionally missing for ptrmemfunc types): crash_signal gcc/toplev.c:328 perform_or_defer_access_check(tree_node*, tree_node*, tree_node*, int, access_failure_info*) gcc/cp/semantics.c:490 finish_non_static_data_member(tree_node*, tree_node*, tree_node*) gcc/cp/semantics.c:2208 ... The change special cases ptrmemfuncs in templace substitution by using build_ptrmemfunc_access_expr() instead of finish_non_static_data_member(). PR c++/101219 gcc/cp/ChangeLog: * pt.c (tsubst_copy_and_build): Use
gfortran.dg/PR82376.f90: Avoid matching a file-path.
I had a file-path to sources with the substring "new" in it, and (only) this test regressed compared to results from another build without "new" in the name. The test does ! { dg-final { scan-tree-dump-times "new" 4 "original" } } i.e. the contents of the tree-dump-file .original needs to match the undelimited string "new" exactly four times. Very brittle. In the dump-file, there are three lines with calls to new: D.908 = new ((integer(kind=4) *) data); integer(kind=4) * new (integer(kind=4) & data) static integer(kind=4) * new (integer(kind=4) &); But, there's also a line, which for me and cris-elf looked like: _gfortran_runtime_error_at (&"At line 46 of file /X/xyzzynewfrob/gcc/testsuite/gfortran.dg/PR82376.f90"[1]{lb: 1 sz: 1}, &"Pointer actual argument \'new\' is not associated"[1]{lb: 1 sz: 1}); The fourth match is obviously intended to match this line, but only with *one* match, whereas the path can as above yield another hit. With Tcl, the regexp for matching the " " *and* the "'" *and* the "\" gets a bit unsightly, so I suggest just matching the "new" calls, which according to the comment in the test is the key point. You can't have a file-path with spaces and parentheses in a gcc build. I'm also making use of {} rather than "" needing one level of quoting; the "\(" is needed because the matched string is a regexp. Ok to commit? testsuite: * gfortran.dg/PR82376.f90: Robustify match. --- gcc/testsuite/gfortran.dg/PR82376.f90 | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/gcc/testsuite/gfortran.dg/PR82376.f90 b/gcc/testsuite/gfortran.dg/PR82376.f90 index 07143ab7e82e..b99779ce9d8a 100644 --- a/gcc/testsuite/gfortran.dg/PR82376.f90 +++ b/gcc/testsuite/gfortran.dg/PR82376.f90 @@ -2,7 +2,8 @@ ! { dg-options "-fdump-tree-original -fcheck=pointer" } ! ! Test the fix for PR82376. The pointer check was doubling up the call -! to new. The fix reduces the count of 'new' from 5 to 4. +! to new. The fix reduces the count of 'new' from 5 to 4, or to 3, when +! counting only calls. ! ! Contributed by José Rui Faustino de Sousa ! @@ -56,4 +57,4 @@ contains end subroutine set end program main_p -! { dg-final { scan-tree-dump-times "new" 4 "original" } } +! { dg-final { scan-tree-dump-times { new \(} 3 "original" } } -- 2.11.0
Re: [patch][version 6] add -ftrivial-auto-var-init and variable attribute "uninitialized" to gcc
Hi, I finally decided to take another approach to resolve this issue, it resolved all the potential issues with the “address taken” auto variable. The basic idea is to avoid generating the temporary variable in the beginning. As you mentioned, "The reason is that alt_reloc is memory (because it is address taken) and that GIMPLE says that register typed stores need to use a is_gimple_val RHS which the call is not.” In order to avoid generating the temporary variable for “address taken” auto variable, I updated the utility routine “is_gimple_val” as following: diff --git a/gcc/gimple-expr.c b/gcc/gimple-expr.c index a2563a45c37d..d5ef1aef8cea 100644 --- a/gcc/gimple-expr.c +++ b/gcc/gimple-expr.c @@ -787,8 +787,20 @@ is_gimple_reg (tree t) return !DECL_NOT_GIMPLE_REG_P (t); } +/* Return true if T is a call to .DEFERRED_INIT internal function. */ +static bool +is_deferred_init_call (tree t) +{ + if (TREE_CODE (t) == CALL_EXPR + && CALL_EXPR_IFN (t) == IFN_DEFERRED_INIT) +return true; + return false; +} + -/* Return true if T is a GIMPLE rvalue, i.e. an identifier or a constant. */ +/* Return true if T is a GIMPLE rvalue, i.e. an identifier or a constant, + or a call to .DEFERRED_INIT internal function because the call to + .DEFERRED_INIT will eventually be expanded as a constant. */ bool is_gimple_val (tree t) @@ -799,7 +811,8 @@ is_gimple_val (tree t) && !is_gimple_reg (t)) return false; - return (is_gimple_variable (t) || is_gimple_min_invariant (t)); + return (is_gimple_variable (t) || is_gimple_min_invariant (t) + || is_deferred_init_call (t)); } With this change, the temporary variable will not be created for “address taken” auto variable, and uninitialized analysis does not need any change. Everything works well. And I believe that treating “call to .DEFERRED_INIT” as “is_gimple_val” is reasonable since this call actually is a constant. Let me know if you have any objection on this solution. thanks. Qing > On Aug 11, 2021, at 3:30 PM, Qing Zhao via Gcc-patches > wrote: > > Hi, > > I met another issue for “address taken” auto variable, see below for details: > > the testing case: (gcc/testsuite/gcc.dg/uninit-16.c) > > int foo, bar; > > static > void decode_reloc(int reloc, int *is_alt) > { > if (reloc >= 20) > *is_alt = 1; > else if (reloc >= 10) > *is_alt = 0; > } > > void testfunc() > { > int alt_reloc; > > decode_reloc(foo, _reloc); > > if (alt_reloc) /* { dg-warning "may be used uninitialized" } */ >bar = 42; > } > > When compiled with -ftrivial-auto-var-init=zero -O2 -Wuninitialized > -fdump-tree-all: > > .*gimple dump: > > void testfunc () > { > int alt_reloc; > > try >{ > _1 = .DEFERRED_INIT (4, 2, 0); > alt_reloc = _1; > foo.0_2 = foo; > decode_reloc (foo.0_2, _reloc); > alt_reloc.1_3 = alt_reloc; > if (alt_reloc.1_3 != 0) goto ; else goto ; > : > bar = 42; > : >} > finally >{ > alt_reloc = {CLOBBER}; >} > } > > **fre1 dump: > > void testfunc () > { > int alt_reloc; > int _1; > int foo.0_2; > > : > _1 = .DEFERRED_INIT (4, 2, 0); > foo.0_2 = foo; > if (foo.0_2 > 19) >goto ; [50.00%] > else >goto ; [50.00%] > > : > goto ; [100.00%] > > : > if (foo.0_2 > 9) >goto ; [50.00%] > else >goto ; [50.00%] > > : > goto ; [100.00%] > > : > if (_1 != 0) >goto ; [INV] > else >goto ; [INV] > > : > bar = 42; > > : > return; > > } > > From the above IR file after “FRE”, we can see that the major issue with this > IR is: > > The address taken auto variable “alt_reloc” has been completely replaced by > the temporary variable “_1” in all > the uses of the original “alt_reloc”. > > The major problem with such IR is, during uninitialized analysis phase, the > original use of “alt_reloc” disappeared completely. > So, the warning cannot be reported. > > > My questions: > > 1. Is it possible to get the original “alt_reloc” through the temporary > variable “_1” with some available information recorded in the IR? > 2. If not, then we have to record the relationship between “alt_reloc” and > “_1” when the original “alt_reloc” is replaced by “_1” and get such > relationship during >Uninitialized analysis phase. Is this doable? > 3. Looks like that for “address taken” auto variable, if we have to introduce > a new temporary variable and split the call to .DEFERRED_INIT into two: > > temp = .DEFERRED_INIT (4, 2, 0); > alt_reloc = temp; > > More issues might possible. > > Any comments and suggestions on this issue? > > Qing > > j >> On Aug 11, 2021, at 11:55 AM, Richard Biener wrote: >> >> On August 11, 2021 6:22:00 PM GMT+02:00, Qing Zhao >> wrote: >>> >>> On Aug 11, 2021, at 10:53 AM, Richard Biener wrote: On August 11, 2021 5:30:40 PM GMT+02:00, Qing Zhao wrote: > I modified the
Re: [PATCH] c++: parameterized requires-expr as default argument [PR101725]
On 8/3/21 4:04 PM, Patrick Palka wrote: Here we're rejecting the default template argument requires (T t) { x(t); } because we consider the 't' in the requirement to be a local variable (according to local_variable_p), and we generally forbid local variables from appearing inside template arguments. We can perhaps fix this by giving special treatment to parameters introduced by requires-expressions, but DR 2082 relaxed the restriction about local variables appearing inside default arguments to permit them inside unevaluated operands thereof. So this patch just implements DR 2082 which also fixes this PR since a requires-expression is an unevaluated context. Bootstrapped and regtested on x86_64-pc-linux-gnu, does this look OK for trunk and perhaps 11? OK for both. PR c++/101725 DR 2082 gcc/cp/ChangeLog: * cp-tree.h (unevaluated_p): Return true for REQUIRES_EXPR. * decl.c (local_variable_p_walkfn): Don't walk into unevaluated operands. * parser.c (cp_parser_primary_expression) : Never reject uses of local variables in unevaluated contexts. * tree.c (cp_walk_subtrees) : Increment cp_unevaluated_operand. Use cp_walk_tree directly instead of WALK_SUBTREE to avoid the goto. Use REQUIRES_EXPR_REQS instead of TREE_OPERAND directly. gcc/testsuite/ChangeLog: * g++.dg/DRs/dr2082.C: New test. * g++.dg/cpp2a/concepts-uneval4.C: New test. * g++.dg/cpp2a/concepts-uneval5.C: New test. --- gcc/cp/cp-tree.h | 3 ++- gcc/cp/decl.c | 8 gcc/cp/parser.c | 5 - gcc/cp/tree.c | 4 +++- gcc/testsuite/g++.dg/DRs/dr2082.C | 12 gcc/testsuite/g++.dg/cpp2a/concepts-uneval4.C | 12 6 files changed, 41 insertions(+), 3 deletions(-) create mode 100644 gcc/testsuite/g++.dg/DRs/dr2082.C create mode 100644 gcc/testsuite/g++.dg/cpp2a/concepts-uneval4.C diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h index 9a47a8787d6..6a8264b0c61 100644 --- a/gcc/cp/cp-tree.h +++ b/gcc/cp/cp-tree.h @@ -8494,7 +8494,8 @@ unevaluated_p (tree_code code) return (code == DECLTYPE_TYPE || code == ALIGNOF_EXPR || code == SIZEOF_EXPR - || code == NOEXCEPT_EXPR); + || code == NOEXCEPT_EXPR + || code == REQUIRES_EXPR); } /* RAII class to push/pop the access scope for T. */ diff --git a/gcc/cp/decl.c b/gcc/cp/decl.c index 6fa6b9adc87..b0b492360af 100644 --- a/gcc/cp/decl.c +++ b/gcc/cp/decl.c @@ -14270,6 +14270,14 @@ static tree local_variable_p_walkfn (tree *tp, int *walk_subtrees, void * /*data*/) { + if (unevaluated_p (TREE_CODE (*tp))) +{ + /* DR 2082 permits local variables in unevaluated contexts +within a default argument. */ + *walk_subtrees = 0; + return NULL_TREE; +} + if (local_variable_p (*tp) && (!DECL_ARTIFICIAL (*tp) || DECL_NAME (*tp) == this_identifier)) return *tp; diff --git a/gcc/cp/parser.c b/gcc/cp/parser.c index 47bf7d9ad1f..8b551db2c8a 100644 --- a/gcc/cp/parser.c +++ b/gcc/cp/parser.c @@ -5971,7 +5971,10 @@ cp_parser_primary_expression (cp_parser *parser, /* Check to see if DECL is a local variable in a context where that is forbidden. */ if ((parser->local_variables_forbidden_p & LOCAL_VARS_FORBIDDEN) - && local_variable_p (decl)) + && local_variable_p (decl) + /* DR 2082 permits local variables in unevaluated contexts + within a default argument. */ + && !cp_unevaluated_operand) { const char *msg = (TREE_CODE (decl) == PARM_DECL diff --git a/gcc/cp/tree.c b/gcc/cp/tree.c index 8345396ec33..e8831b21802 100644 --- a/gcc/cp/tree.c +++ b/gcc/cp/tree.c @@ -5386,7 +5386,9 @@ cp_walk_subtrees (tree *tp, int *walk_subtrees_p, walk_tree_fn func, // walk the parameter list. Doing so causes false // positives in the pack expansion checker since the // requires parameters are introduced as pack expansions. - WALK_SUBTREE (TREE_OPERAND (*tp, 1)); + ++cp_unevaluated_operand; + result = cp_walk_tree (_EXPR_REQS (*tp), func, data, pset); + --cp_unevaluated_operand; *walk_subtrees_p = 0; break; diff --git a/gcc/testsuite/g++.dg/DRs/dr2082.C b/gcc/testsuite/g++.dg/DRs/dr2082.C new file mode 100644 index 000..84bb23f63f2 --- /dev/null +++ b/gcc/testsuite/g++.dg/DRs/dr2082.C @@ -0,0 +1,12 @@ +// DR 2082 + +void f() { + int i; + extern void h(int x = sizeof(i)); +} + +class A { + void f(A* p = this) { } // { dg-error "this" } +}; + +int h(int a, int b = sizeof(a)); diff --git a/gcc/testsuite/g++.dg/cpp2a/concepts-uneval4.C b/gcc/testsuite/g++.dg/cpp2a/concepts-uneval4.C new
Re: [Patch v3 Fortran] Fix c_float128 and c_float128_complex on targets with 128-bit long double.
On Wed, Aug 11, 2021 at 05:55:39AM -0500, Segher Boessenkool wrote: > Hi! > > On Tue, Aug 10, 2021 at 04:46:11PM -0600, Sandra Loosemore wrote: > > OK. I used your wording verbatim for the first one. For the second > > one, I'm still pretty confused as I think it is at least theoretically > > possible on PowerPC to have a target with 64-bit long double (AIX?) that > > Some embedded and embedded-like subtargets use 64-bit long double by > default. You can also configure this on any Power target (not that it > will necessarily work ;-) ) It will work on Linux LE systems with glibc 2.32 (it may work with earlier glibcs). I've built parallel toolchains with all 3 long double formats. There are some tests in the test suite that fail if you configure 64-bit long doubles. -- Michael Meissner, IBM IBM, M/S 2506R, 550 King Street, Littleton, MA 01460-6245, USA email: meiss...@linux.ibm.com, phone: +1 (978) 899-4797
Re: [patch][version 6] add -ftrivial-auto-var-init and variable attribute "uninitialized" to gcc
Hi, I met another issue for “address taken” auto variable, see below for details: the testing case: (gcc/testsuite/gcc.dg/uninit-16.c) int foo, bar; static void decode_reloc(int reloc, int *is_alt) { if (reloc >= 20) *is_alt = 1; else if (reloc >= 10) *is_alt = 0; } void testfunc() { int alt_reloc; decode_reloc(foo, _reloc); if (alt_reloc) /* { dg-warning "may be used uninitialized" } */ bar = 42; } When compiled with -ftrivial-auto-var-init=zero -O2 -Wuninitialized -fdump-tree-all: .*gimple dump: void testfunc () { int alt_reloc; try { _1 = .DEFERRED_INIT (4, 2, 0); alt_reloc = _1; foo.0_2 = foo; decode_reloc (foo.0_2, _reloc); alt_reloc.1_3 = alt_reloc; if (alt_reloc.1_3 != 0) goto ; else goto ; : bar = 42; : } finally { alt_reloc = {CLOBBER}; } } **fre1 dump: void testfunc () { int alt_reloc; int _1; int foo.0_2; : _1 = .DEFERRED_INIT (4, 2, 0); foo.0_2 = foo; if (foo.0_2 > 19) goto ; [50.00%] else goto ; [50.00%] : goto ; [100.00%] : if (foo.0_2 > 9) goto ; [50.00%] else goto ; [50.00%] : goto ; [100.00%] : if (_1 != 0) goto ; [INV] else goto ; [INV] : bar = 42; : return; } From the above IR file after “FRE”, we can see that the major issue with this IR is: The address taken auto variable “alt_reloc” has been completely replaced by the temporary variable “_1” in all the uses of the original “alt_reloc”. The major problem with such IR is, during uninitialized analysis phase, the original use of “alt_reloc” disappeared completely. So, the warning cannot be reported. My questions: 1. Is it possible to get the original “alt_reloc” through the temporary variable “_1” with some available information recorded in the IR? 2. If not, then we have to record the relationship between “alt_reloc” and “_1” when the original “alt_reloc” is replaced by “_1” and get such relationship during Uninitialized analysis phase. Is this doable? 3. Looks like that for “address taken” auto variable, if we have to introduce a new temporary variable and split the call to .DEFERRED_INIT into two: temp = .DEFERRED_INIT (4, 2, 0); alt_reloc = temp; More issues might possible. Any comments and suggestions on this issue? Qing j > On Aug 11, 2021, at 11:55 AM, Richard Biener wrote: > > On August 11, 2021 6:22:00 PM GMT+02:00, Qing Zhao > wrote: >> >> >>> On Aug 11, 2021, at 10:53 AM, Richard Biener wrote: >>> >>> On August 11, 2021 5:30:40 PM GMT+02:00, Qing Zhao >>> wrote: I modified the routine “gimple_add_init_for_auto_var” as the following: /* Generate initialization to automatic variable DECL based on INIT_TYPE. Build a call to internal const function DEFERRED_INIT: 1st argument: SIZE of the DECL; 2nd argument: INIT_TYPE; 3rd argument: IS_VLA, 0 NO, 1 YES; as DEFERRED_INIT (SIZE of the DECL, INIT_TYPE, IS_VLA). */ static void gimple_add_init_for_auto_var (tree decl, enum auto_init_type init_type, bool is_vla, gimple_seq *seq_p) { gcc_assert (VAR_P (decl) && !DECL_EXTERNAL (decl) && !TREE_STATIC (decl)); gcc_assert (init_type > AUTO_INIT_UNINITIALIZED); tree decl_size = TYPE_SIZE_UNIT (TREE_TYPE (decl)); tree init_type_node = build_int_cst (integer_type_node, (int) init_type); tree is_vla_node = build_int_cst (integer_type_node, (int) is_vla); tree call = build_call_expr_internal_loc (UNKNOWN_LOCATION, IFN_DEFERRED_INIT, TREE_TYPE (decl), 3, decl_size, init_type_node, is_vla_node); /* If this DECL is a VLA, a temporary address variable for it has been created, the replacement for DECL is recorded in DECL_VALUE_EXPR (decl), we should use it as the LHS of the call. */ tree lhs_call = is_vla ? DECL_VALUE_EXPR (decl) : decl; gimplify_assign (lhs_call, call, seq_p); } With this change, the current issue is resolved, the gimple dump now is: (*arr.1) = .DEFERRED_INIT (D.1952, 2, 1); However, there is another new issue: For the following testing case: == [opc@qinzhao-ol8u3-x86 gcc]$ cat t.c int bar; extern void decode_reloc(int *); void testfunc() { int alt_reloc; decode_reloc(_reloc); if (alt_reloc) /* { dg-warning "may be used uninitialized" } */ bar = 42; } = In the above, the auto var “alt_reloc” is address taken, then the gimple dump for it when compiled with -ftrivial-auto-var-init=zero
Re: [PATCH] c++: parameterized requires-expr as default argument [PR101725]
On 8/3/21 4:04 PM, Patrick Palka wrote: Here we're rejecting the default template argument requires (T t) { x(t); } because we consider the 't' in the requirement to be a local variable (according to local_variable_p), and we generally forbid local variables from appearing inside template arguments. We can perhaps fix this by giving special treatment to parameters introduced by requires-expressions, but DR 2082 relaxed the restriction about local variables appearing inside default arguments to permit them inside unevaluated operands thereof. So this patch just implements DR 2082 which also fixes this PR since a requires-expression is an unevaluated context. Bootstrapped and regtested on x86_64-pc-linux-gnu, does this look OK for trunk and perhaps 11? OK for both. PR c++/101725 DR 2082 gcc/cp/ChangeLog: * cp-tree.h (unevaluated_p): Return true for REQUIRES_EXPR. * decl.c (local_variable_p_walkfn): Don't walk into unevaluated operands. * parser.c (cp_parser_primary_expression) : Never reject uses of local variables in unevaluated contexts. * tree.c (cp_walk_subtrees) : Increment cp_unevaluated_operand. Use cp_walk_tree directly instead of WALK_SUBTREE to avoid the goto. Use REQUIRES_EXPR_REQS instead of TREE_OPERAND directly. gcc/testsuite/ChangeLog: * g++.dg/DRs/dr2082.C: New test. * g++.dg/cpp2a/concepts-uneval4.C: New test. * g++.dg/cpp2a/concepts-uneval5.C: New test. --- gcc/cp/cp-tree.h | 3 ++- gcc/cp/decl.c | 8 gcc/cp/parser.c | 5 - gcc/cp/tree.c | 4 +++- gcc/testsuite/g++.dg/DRs/dr2082.C | 12 gcc/testsuite/g++.dg/cpp2a/concepts-uneval4.C | 12 6 files changed, 41 insertions(+), 3 deletions(-) create mode 100644 gcc/testsuite/g++.dg/DRs/dr2082.C create mode 100644 gcc/testsuite/g++.dg/cpp2a/concepts-uneval4.C diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h index 9a47a8787d6..6a8264b0c61 100644 --- a/gcc/cp/cp-tree.h +++ b/gcc/cp/cp-tree.h @@ -8494,7 +8494,8 @@ unevaluated_p (tree_code code) return (code == DECLTYPE_TYPE || code == ALIGNOF_EXPR || code == SIZEOF_EXPR - || code == NOEXCEPT_EXPR); + || code == NOEXCEPT_EXPR + || code == REQUIRES_EXPR); } /* RAII class to push/pop the access scope for T. */ diff --git a/gcc/cp/decl.c b/gcc/cp/decl.c index 6fa6b9adc87..b0b492360af 100644 --- a/gcc/cp/decl.c +++ b/gcc/cp/decl.c @@ -14270,6 +14270,14 @@ static tree local_variable_p_walkfn (tree *tp, int *walk_subtrees, void * /*data*/) { + if (unevaluated_p (TREE_CODE (*tp))) +{ + /* DR 2082 permits local variables in unevaluated contexts +within a default argument. */ + *walk_subtrees = 0; + return NULL_TREE; +} + if (local_variable_p (*tp) && (!DECL_ARTIFICIAL (*tp) || DECL_NAME (*tp) == this_identifier)) return *tp; diff --git a/gcc/cp/parser.c b/gcc/cp/parser.c index 47bf7d9ad1f..8b551db2c8a 100644 --- a/gcc/cp/parser.c +++ b/gcc/cp/parser.c @@ -5971,7 +5971,10 @@ cp_parser_primary_expression (cp_parser *parser, /* Check to see if DECL is a local variable in a context where that is forbidden. */ if ((parser->local_variables_forbidden_p & LOCAL_VARS_FORBIDDEN) - && local_variable_p (decl)) + && local_variable_p (decl) + /* DR 2082 permits local variables in unevaluated contexts + within a default argument. */ + && !cp_unevaluated_operand) { const char *msg = (TREE_CODE (decl) == PARM_DECL diff --git a/gcc/cp/tree.c b/gcc/cp/tree.c index 8345396ec33..e8831b21802 100644 --- a/gcc/cp/tree.c +++ b/gcc/cp/tree.c @@ -5386,7 +5386,9 @@ cp_walk_subtrees (tree *tp, int *walk_subtrees_p, walk_tree_fn func, // walk the parameter list. Doing so causes false // positives in the pack expansion checker since the // requires parameters are introduced as pack expansions. - WALK_SUBTREE (TREE_OPERAND (*tp, 1)); + ++cp_unevaluated_operand; + result = cp_walk_tree (_EXPR_REQS (*tp), func, data, pset); + --cp_unevaluated_operand; *walk_subtrees_p = 0; break; diff --git a/gcc/testsuite/g++.dg/DRs/dr2082.C b/gcc/testsuite/g++.dg/DRs/dr2082.C new file mode 100644 index 000..84bb23f63f2 --- /dev/null +++ b/gcc/testsuite/g++.dg/DRs/dr2082.C @@ -0,0 +1,12 @@ +// DR 2082 + +void f() { + int i; + extern void h(int x = sizeof(i)); +} + +class A { + void f(A* p = this) { } // { dg-error "this" } +}; + +int h(int a, int b = sizeof(a)); diff --git a/gcc/testsuite/g++.dg/cpp2a/concepts-uneval4.C b/gcc/testsuite/g++.dg/cpp2a/concepts-uneval4.C new
Re: [PATCH] c++: Fix up parsing of attributes for using-directive
On 8/4/21 6:05 AM, Jakub Jelinek wrote: Hi! As I've said earlier and added xfails in gen-attrs-76.C test, https://eel.is/c++draft/namespace.udir#nt:using-directive has attribute-specifier-seq[opt] at the start, not at the end before ; as gcc is expecting. IMHO we should continue parsing at the end the GNU attributes because using namespace N __attribute__((strong));, while not supported anymore, used to be supported in the past, but my code searches for using namespace N [[gnu::strong]]; didn't reveal anything at all. Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? 2021-08-04 Jakub Jelinek * parser.c (cp_parser_block_declaration): Call cp_parser_using_directive for C++11 attributes followed by using namespace tokens. (cp_parser_using_directive): Parse C++11 attributes at the start of the directive rather than at the end, only parse GNU attributes at the end. * g++.dg/lookup/strong-using.C: Add test using [[gnu::strong]] as well. * g++.dg/lookup/strong-using2.C: Likewise. * g++.dg/cpp0x/gen-attrs-58.C: Move alignas(int) before using namespace. * g++.dg/cpp0x/gen-attrs-59.C: Move alignas(X) before using namespace, add tests for alignas before semicolon. * g++.dg/cpp0x/gen-attrs-76.C: Remove xfails. Add test for C++11 attributes on using directive before semicolon. --- gcc/cp/parser.c.jj 2021-08-03 00:44:32.890492433 +0200 +++ gcc/cp/parser.c 2021-08-03 17:38:07.541725977 +0200 @@ -14655,6 +14655,7 @@ cp_parser_block_declaration (cp_parser * /* Peek at the next token to figure out which kind of declaration is present. */ cp_token *token1 = cp_lexer_peek_token (parser->lexer); + size_t attr_idx; /* If the next keyword is `asm', we have an asm-definition. */ if (token1->keyword == RID_ASM) @@ -14708,6 +14709,18 @@ cp_parser_block_declaration (cp_parser * /* If the next token is `static_assert' we have a static assertion. */ else if (token1->keyword == RID_STATIC_ASSERT) cp_parser_static_assert (parser, /*member_p=*/false); + /* If the next tokens after attributes is `using namespace', then we have + a using-directive. */ + else if ((attr_idx = cp_parser_skip_std_attribute_spec_seq (parser, 1)) != 1 + && cp_lexer_peek_nth_token (parser->lexer, + attr_idx)->keyword == RID_USING + && cp_lexer_peek_nth_token (parser->lexer, + attr_idx + 1)->keyword == RID_NAMESPACE) Let's use cp_lexer_nth_token_is_keyword here. OK with that change. +{ + if (statement_p) + cp_parser_commit_to_tentative_parse (parser); + cp_parser_using_directive (parser); +} /* Anything else must be a simple-declaration. */ else cp_parser_simple_declaration (parser, !statement_p, @@ -21394,14 +21407,21 @@ cp_parser_alias_declaration (cp_parser* /* Parse a using-directive. using-directive: - using namespace :: [opt] nested-name-specifier [opt] - namespace-name ; */ + attribute-specifier-seq [opt] using namespace :: [opt] + nested-name-specifier [opt] namespace-name ; */ static void cp_parser_using_directive (cp_parser* parser) { tree namespace_decl; - tree attribs; + tree attribs = cp_parser_std_attribute_spec_seq (parser); + if (cp_lexer_next_token_is (parser->lexer, CPP_SEMICOLON)) +{ + /* Error during attribute parsing that resulted in skipping +to next semicolon. */ + cp_parser_require (parser, CPP_SEMICOLON, RT_SEMICOLON); + return; +} /* Look for the `using' keyword. */ cp_parser_require_keyword (parser, RID_USING, RT_USING); @@ -21418,8 +21438,9 @@ cp_parser_using_directive (cp_parser* pa /* Get the namespace being used. */ namespace_decl = cp_parser_namespace_name (parser); cp_warn_deprecated_use_scopes (namespace_decl); - /* And any specified attributes. */ - attribs = cp_parser_attributes_opt (parser); + /* And any specified GNU attributes. */ + if (cp_next_tokens_can_be_gnu_attribute_p (parser)) +attribs = chainon (attribs, cp_parser_gnu_attributes_opt (parser)); /* Update the symbol table. */ finish_using_directive (namespace_decl, attribs); --- gcc/testsuite/g++.dg/lookup/strong-using.C.jj 2020-01-12 11:54:37.197401580 +0100 +++ gcc/testsuite/g++.dg/lookup/strong-using.C 2021-08-03 17:12:05.872281490 +0200 @@ -8,3 +8,12 @@ namespace A using namespace B __attribute__ ((strong)); // { dg-warning "no longer supported" "" } } + +namespace C +{ + namespace D // { dg-message "inline namespace" } + { + } + + [[gnu::strong]] using namespace D; // { dg-warning "no longer supported" "" } +} --- gcc/testsuite/g++.dg/lookup/strong-using2.C.jj 2020-01-12 11:54:37.197401580 +0100 +++ gcc/testsuite/g++.dg/lookup/strong-using2.C 2021-08-03
*PING* [PATCH, part2] PR fortran/98411 [10/11/12 Regression] Pointless: Array larger than ‘-fmax-stack-var-size=’, ...
*Ping* > Gesendet: Mittwoch, 04. August 2021 um 23:09 Uhr > Von: "Harald Anlauf" > An: "fortran" , "gcc-patches" > Betreff: [PATCH, part2] PR fortran/98411 [10/11/12 Regression] Pointless: > Array larger than ‘-fmax-stack-var-size=’, ... > > Dear all, > > here's the second part that should fix this regression for good. > The patch also adjusts the warning message to make it easier to > understand, using the suggestion by Tobias (see PR). > > Since F2018 in principle makes RECURSIVE the default, which might > conflict with the purpose of the testcase, I chose to change the > options to include -std=f2008, and to verify that implicit SAVE > works the same as explicit SAVE. > > Regtested on x86_64-pc-linux-gnu. OK for affected branches? > > Thanks, > Harald > > > Fortran: fix pointless warning for static variables > > gcc/fortran/ChangeLog: > > PR fortran/98411 > * trans-decl.c (gfc_finish_var_decl): Adjust check to handle > implicit SAVE as well as variables in the main program. Improve > warning message text. > > gcc/testsuite/ChangeLog: > > PR fortran/98411 > * gfortran.dg/pr98411.f90: Adjust testcase options to restrict to > F2008, and verify case of implicit SAVE. > >
*PING* Re: [PATCH] PR fortran/100950 - ICE in output_constructor_regular_field, at varasm.c:5514
*Ping* > Gesendet: Dienstag, 03. August 2021 um 23:17 Uhr > Von: "Harald Anlauf" > An: "Harald Anlauf" > Cc: "Tobias Burnus" , "Bernhard Reutner-Fischer" > , "Harald Anlauf via Gcc-patches" > , "fortran" > Betreff: Re: [PATCH] PR fortran/100950 - ICE in > output_constructor_regular_field, at varasm.c:5514 > > Here's now my third attempt to fix this PR, taking into account > the comments by Tobias and Bernhard. > > > > On 10.06.21 20:52, Harald Anlauf via Fortran wrote: > > > > +static bool > > > > +substring_has_constant_len (gfc_expr *e) > > > > +{ > > > > + ptrdiff_t istart, iend; > > > > + size_t length; > > > > + bool equal_length = false; > > > > + > > > > + if (e->ts.type != BT_CHARACTER > > > > + || !e->ref > > > > + || e->ref->type != REF_SUBSTRING > > > > > > Is there a reason why you do not handle: > > > > > > type t > > >character(len=5) :: str1 > > >character(len=:), allocatable :: str2 > > > end type > > > type(t) :: x > > > > > > allocate(x%str2, source="abd") > > > if (len (x%str)) /= 1) ... > > > if (len (x%str2(1:2) /= 2) ... > > > etc. > > > > > > Namely: Search the last_ref = expr->ref->next->next ...? > > > and then check that lastref? > > The mentioned search is now implemented. > > Note, however, that gfc_simplify_len still won't handle neither > deferred strings nor their substrings. > > I think there is nothing to simplify at compile time here. Otherwise > there would be a conflict/inconsistency with type parameter inquiry, > see F2018:9.4.5(2): > > "A deferred type parameter of a pointer that is not associated or > of an unallocated allocatable variable shall not be inquired about." > > > >* * * > > > > > > Slightly unrelated: I think the following does not violate > > > F2018's R916 / C923 – but is rejected, namely: > > >R916 type-param-inquiry is designator % type-param-name > > > the latter is 'len' or 'kind' for intrinsic types. And: > > >R901 designator is ... > > > or substring > > > But > > > > > > character(len=5) :: str > > > print *, str(1:3)%len > > > end > > > > > > fails with > > > > > > 2 | print *, str(1:3)%len > > >| 1 > > > Error: Syntax error in PRINT statement at (1) > > > > > > > > > Assuming you don't want to handle it, can you open a new PR? > > > Thanks! > > I tried to look into this, but there appear to be several unrelated > issues requiring a separate treatment. I therefore opened: > > https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101735 > > > > > + istart = gfc_mpz_get_hwi (e->ref->u.ss.start->value.integer); > > > > + iend = gfc_mpz_get_hwi (e->ref->u.ss.end->value.integer); > > > > + length = gfc_mpz_get_hwi > > > > (e->ref->u.ss.length->length->value.integer); > > > > + > > > > + if (istart <= iend) > > > > +{ > > > > + if (istart < 1) > > > > + { > > > > + gfc_error ("Substring start index (%ld) at %L below 1", > > > > + (long) istart, >ref->u.ss.start->where); > > > > > > As mentioned by Bernhard, you could use HOST_WIDE_INT_PRINT_DEC. > > > > > > (It probably only matters on Windows which uses long == int = 32bit for > > > strings longer than INT_MAX.) > > Done. > > The updated patch regtests fine. OK? > > Thanks, > Harald > > > Fortran - simplify length of substring with constant bounds > > gcc/fortran/ChangeLog: > > PR fortran/100950 > * simplify.c (substring_has_constant_len): New. > (gfc_simplify_len): Handle case of substrings with constant > bounds. > > gcc/testsuite/ChangeLog: > > PR fortran/100950 > * gfortran.dg/pr100950.f90: New test. > >
[PATCH] PR fortran/99351 - ICE in gfc_finish_var_decl, at fortran/trans-decl.c:695
Dear all, the checks for the STAT= and ERRMSG= arguments to the coarray SYNC statements did not properly handle several cases, such as named constants (parameters). While fixing this, I adjusted the code similarly to what was recently done for (DE)ALLOCATE. We now also accept function references with data pointer result. (See also PR101652). Regtested on x86_64-pc-linux-gnu. OK for mainline? Thanks, Harald Fortran: fix checks for STAT= and ERRMSG= arguments of SYNC ALL/SYNC IMAGES gcc/fortran/ChangeLog: PR fortran/99351 * match.c (sync_statement): Replace %v code by %e in gfc_match to allow for function references as STAT and ERRMSG arguments. * resolve.c (resolve_sync): Adjust checks of STAT= and ERRMSG= to being definable arguments. Function references with a data pointer result are accepted. * trans-stmt.c (gfc_trans_sync): Adjust assertion. gcc/testsuite/ChangeLog: PR fortran/99351 * gfortran.dg/coarray_sync.f90: New test. * gfortran.dg/coarray_3.f90: Adjust to change error messages. diff --git a/gcc/fortran/match.c b/gcc/fortran/match.c index b1105481099..16502da001d 100644 --- a/gcc/fortran/match.c +++ b/gcc/fortran/match.c @@ -3855,7 +3855,7 @@ sync_statement (gfc_statement st) for (;;) { - m = gfc_match (" stat = %v", ); + m = gfc_match (" stat = %e", ); if (m == MATCH_ERROR) goto syntax; if (m == MATCH_YES) @@ -3875,7 +3875,7 @@ sync_statement (gfc_statement st) break; } - m = gfc_match (" errmsg = %v", ); + m = gfc_match (" errmsg = %e", ); if (m == MATCH_ERROR) goto syntax; if (m == MATCH_YES) diff --git a/gcc/fortran/resolve.c b/gcc/fortran/resolve.c index 592364689f9..959f0bed4fb 100644 --- a/gcc/fortran/resolve.c +++ b/gcc/fortran/resolve.c @@ -10236,19 +10236,27 @@ resolve_sync (gfc_code *code) /* Check STAT. */ gfc_resolve_expr (code->expr2); - if (code->expr2 - && (code->expr2->ts.type != BT_INTEGER || code->expr2->rank != 0 - || code->expr2->expr_type != EXPR_VARIABLE)) -gfc_error ("STAT= argument at %L must be a scalar INTEGER variable", - >expr2->where); + if (code->expr2) +{ + if (code->expr2->ts.type != BT_INTEGER || code->expr2->rank != 0) + gfc_error ("STAT= argument at %L must be a scalar INTEGER variable", + >expr2->where); + else + gfc_check_vardef_context (code->expr2, false, false, false, + _("STAT variable")); +} /* Check ERRMSG. */ gfc_resolve_expr (code->expr3); - if (code->expr3 - && (code->expr3->ts.type != BT_CHARACTER || code->expr3->rank != 0 - || code->expr3->expr_type != EXPR_VARIABLE)) -gfc_error ("ERRMSG= argument at %L must be a scalar CHARACTER variable", - >expr3->where); + if (code->expr3) +{ + if (code->expr3->ts.type != BT_CHARACTER || code->expr3->rank != 0) + gfc_error ("ERRMSG= argument at %L must be a scalar CHARACTER variable", + >expr3->where); + else + gfc_check_vardef_context (code->expr3, false, false, false, + _("ERRMSG variable")); +} } diff --git a/gcc/fortran/trans-stmt.c b/gcc/fortran/trans-stmt.c index 7cbdef7a304..11df1863bad 100644 --- a/gcc/fortran/trans-stmt.c +++ b/gcc/fortran/trans-stmt.c @@ -1226,7 +1226,8 @@ gfc_trans_sync (gfc_code *code, gfc_exec_op type) if (code->expr2) { - gcc_assert (code->expr2->expr_type == EXPR_VARIABLE); + gcc_assert (code->expr2->expr_type == EXPR_VARIABLE + || code->expr2->expr_type == EXPR_FUNCTION); gfc_init_se (, NULL); gfc_conv_expr_val (, code->expr2); stat = argse.expr; @@ -1236,7 +1237,8 @@ gfc_trans_sync (gfc_code *code, gfc_exec_op type) if (code->expr3 && flag_coarray == GFC_FCOARRAY_LIB) { - gcc_assert (code->expr3->expr_type == EXPR_VARIABLE); + gcc_assert (code->expr3->expr_type == EXPR_VARIABLE + || code->expr3->expr_type == EXPR_FUNCTION); gfc_init_se (, NULL); argse.want_pointer = 1; gfc_conv_expr (, code->expr3); diff --git a/gcc/testsuite/gfortran.dg/coarray_3.f90 b/gcc/testsuite/gfortran.dg/coarray_3.f90 index d152ce1b2bd..1c294cd0189 100644 --- a/gcc/testsuite/gfortran.dg/coarray_3.f90 +++ b/gcc/testsuite/gfortran.dg/coarray_3.f90 @@ -11,11 +11,11 @@ character(len=30) :: str(2) critical fkl ! { dg-error "Syntax error in CRITICAL" } end critical fkl ! { dg-error "Expecting END PROGRAM" } -sync all (stat=1) ! { dg-error "Syntax error in SYNC ALL" } +sync all (stat=1) ! { dg-error "Non-variable expression" } sync all ( stat = n,stat=k) ! { dg-error "Redundant STAT" } sync memory (errmsg=str) ! { dg-error "must be a scalar CHARACTER variable" } sync memory (errmsg=n) ! { dg-error "must be a scalar CHARACTER variable" } -sync images (*, stat=1.0) ! { dg-error "Syntax error in SYNC IMAGES" } +sync images (*, stat=1.0) ! { dg-error "must be a scalar INTEGER variable" } sync images (-1) ! { dg-error "must between 1 and num_images" }
Re: [PATCH] c++: Optimize constinit thread_local vars [PR101786]
On 8/6/21 4:16 AM, Jakub Jelinek wrote: Hi! The paper that introduced constinit mentioned in rationale that constinit can be used on externs as well and that it can be used to avoid the thread_local initialization wrappers, because the standard requires that if constinit is present on any declaration, it is also present on the initialization declaration, even if it is in some other TU etc. There is a small problem though, we use the tls wrappers not just if the thread_local variable needs dynamic initialization, but also when it has static initialization, but non-trivial destructor, as the "dynamic initialization" in that case needs to register the destructor. So, the following patch optimizes constinit thread_local vars only if we can prove they will not have non-trivial destructors. That includes the case where we have incomplete type where we don't know and need to conservatively assume the type will have non-trivial destructor at the initializing declaration side. Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? OK. 2021-08-06 Jakub Jelinek PR c++/101786 * decl2.c (var_defined_without_dynamic_init): Return true for DECL_DECLARED_CONSTINIT_P with complete type and trivial destructor. * g++.dg/cpp2a/constinit16.C: New test. --- gcc/cp/decl2.c.jj 2021-07-02 21:59:12.359171627 +0200 +++ gcc/cp/decl2.c 2021-08-05 16:09:39.833599188 +0200 @@ -3447,6 +3447,12 @@ set_guard (tree guard) static bool var_defined_without_dynamic_init (tree var) { + /* constinit vars are guaranteed to not have dynamic initializer, + but still registering the destructor counts as dynamic initialization. */ + if (DECL_DECLARED_CONSTINIT_P (var) + && COMPLETE_TYPE_P (TREE_TYPE (var)) + && !TYPE_HAS_NONTRIVIAL_DESTRUCTOR (TREE_TYPE (var))) +return true; /* If it's defined in another TU, we can't tell. */ if (DECL_EXTERNAL (var)) return false; --- gcc/testsuite/g++.dg/cpp2a/constinit16.C.jj 2021-08-05 15:50:49.702463664 +0200 +++ gcc/testsuite/g++.dg/cpp2a/constinit16.C2021-08-05 16:14:52.893202685 +0200 @@ -0,0 +1,21 @@ +// PR c++/101786 +// { dg-do compile { target c++20 } } +// { dg-add-options tls } +// { dg-require-alias "" } +// { dg-require-effective-target tls_runtime } +// { dg-final { scan-assembler-not "_ZTH17mythreadlocalvar1" } } +// { dg-final { scan-assembler "_ZTH17mythreadlocalvar2" } } +// { dg-final { scan-assembler-not "_ZTH17mythreadlocalvar3" } } +// { dg-final { scan-assembler "_ZTH17mythreadlocalvar4" } } + +extern thread_local constinit int mythreadlocalvar1; +struct S; +extern thread_local constinit S mythreadlocalvar2; +struct T { int t; }; +extern thread_local constinit T mythreadlocalvar3; +struct U { int u; ~U (); }; +extern thread_local constinit U mythreadlocalvar4; +int foo () { return mythreadlocalvar1; } +S *bar () { return } +T *baz () { return } +U *qux () { return } Jakub
Re: [PATCH] c++: suppress all warnings on memper pointers to work around dICE [PR101219]
On 8/6/21 11:34 AM, Sergei Trofimovich wrote: On Thu, 29 Jul 2021 11:41:39 -0400 Jason Merrill wrote: On 7/22/21 7:15 PM, Sergei Trofimovich wrote: From: Sergei Trofimovich r12-1804 ("cp: add support for per-location warning groups.") among other things removed warning suppression from a few places including ptrmemfuncs. Currently ptrmemfuncs don't have valid BINFO attached which causes ICEs in access checks: crash_signal gcc/toplev.c:328 perform_or_defer_access_check(tree_node*, tree_node*, tree_node*, int, access_failure_info*) gcc/cp/semantics.c:490 finish_non_static_data_member(tree_node*, tree_node*, tree_node*) gcc/cp/semantics.c:2208 ... The change suppresses warnings again until we provide BINFOs for ptrmemfuncs. We don't need BINFOs for PMFs, we need to avoid paths that expect them. It looks like the problem is with tsubst_copy_and_build calling finish_non_static_data_member instead of build_ptrmemfunc_access_expr. Sounds good. I'm not sure what would be the best way to match it. Here is my attempt seems to survive all regtests: --- a/gcc/cp/pt.c +++ b/gcc/cp/pt.c @@ -20530,7 +20530,13 @@ tsubst_copy_and_build (tree t, if (member == error_mark_node) RETURN (error_mark_node); - if (TREE_CODE (member) == FIELD_DECL) + if (object_type && TYPE_PTRMEMFUNC_P(object_type) + && TREE_CODE (member) == FIELD_DECL) + { + r = build_ptrmemfunc_access_expr (object, DECL_NAME(member)); + RETURN (r); + } + else if (TREE_CODE (member) == FIELD_DECL) { r = finish_non_static_data_member (member, object, NULL_TREE); if (TREE_CODE (r) == COMPONENT_REF) PR c++/101219 gcc/cp/ChangeLog: * typeck.c (build_ptrmemfunc_access_expr): Suppress all warnings to avoid ICE. gcc/testsuite/ChangeLog: * g++.dg/torture/pr101219.C: New test. This doesn't need to be in torture; it has nothing to do with optimization. Aha, moved to gcc/testsuite/g++.dg/warn/pr101219.C. --- /dev/null +++ b/gcc/testsuite/g++.dg/warn/pr101219.C @@ -0,0 +1,11 @@ +/* PR c++/101219 - ICE on use of uninitialized memfun pointer + { dg-do compile } + { dg-options "-Wall" } */ + +struct S { void m(); }; + +template bool f() { + void (S::*mp)(); + + return ::m == mp; // no warning emitted here (no instantiation) +} Another question: Is it expected that gcc generates no warnings here? It's an uninstantiated function (-1 for warn), but from what I understand it's guaranteed to generate comparison with uninitialized data if it ever gets instantiated. Given that we used to ICE in warning code gcc could possibly flag it? (+1 for warn) Generally it's desirable to diagnose templates for which no valid instantiation is possible. It seems reasonable in most cases to also warn about templates for which all instantiations would warn. But uninitialized warnings rely on flow analysis that we only do on instantiated functions, and in any case the ICE doesn't depend on mp being uninitialized; I get the same crash if I add = 0 to the declaration. + if (object_type && TYPE_PTRMEMFUNC_P(object_type) Missing space before (. + && TREE_CODE (member) == FIELD_DECL) + { + r = build_ptrmemfunc_access_expr (object, DECL_NAME(member)); And here. Jason
Re: [PATCH] c++: recognize class-scope non-template dguides [PR79501]
On 8/11/21 2:29 PM, Patrick Palka wrote: On Wed, 11 Aug 2021, Jason Merrill wrote: On 8/9/21 1:16 PM, Patrick Palka wrote: It looks like we still don't recognize class-scope non-template deduction guides even after r12-2260. This is because deduction guides are handled in cp_parser_init_declarator after calling cp_parser_declarator, but in the class-scope non-template case we call cp_parser_declarator directly from cp_parser_member_declaration. This patch makes us handle deduction guides in cp_parser_member_declaration as well. Bootstrapped and regtested on x86_64-pc-linux-gnu, does this look OK for trunk? PR c++/79501 gcc/cp/ChangeLog: * parser.c (cp_parser_maybe_adjust_declarator_for_dguide): New, split out from ... (cp_parser_init_declarator): ... here. (cp_parser_member_declaration): Use it. gcc/testsuite/ChangeLog: * g++.dg/cpp1z/class-deduction98.C: New test. --- gcc/cp/parser.c | 54 +-- .../g++.dg/cpp1z/class-deduction98.C | 10 2 files changed, 49 insertions(+), 15 deletions(-) create mode 100644 gcc/testsuite/g++.dg/cpp1z/class-deduction98.C diff --git a/gcc/cp/parser.c b/gcc/cp/parser.c index d4da25ca703..04fdeec32ab 100644 --- a/gcc/cp/parser.c +++ b/gcc/cp/parser.c @@ -21869,6 +21869,30 @@ warn_about_ambiguous_parse (const cp_decl_specifier_seq *decl_specifiers, } } +/* If the function declarator DECLARATOR names a class template, adjust + it to name its deduction guides and return true. Otherwise return false. */ + +static bool +cp_parser_maybe_adjust_declarator_for_dguide (cp_parser *parser, + cp_declarator *declarator) +{ + gcc_assert (function_declarator_p (declarator)); + + cp_declarator *id = get_id_declarator (declarator); + tree name = id->u.id.unqualified_name; + parser->scope = id->u.id.qualifying_scope; + tree tmpl = cp_parser_lookup_name_simple (parser, name, id->id_loc); + if (tmpl + && (DECL_CLASS_TEMPLATE_P (tmpl) + || DECL_TEMPLATE_TEMPLATE_PARM_P (tmpl))) +{ + id->u.id.unqualified_name = dguide_name (tmpl); + id->u.id.sfk = sfk_deduction_guide; + return true; +} + return false; +} + /* Declarators [gram.dcl.decl] */ /* Parse an init-declarator. @@ -22045,25 +22069,16 @@ cp_parser_init_declarator (cp_parser* parser, if (function_declarator_p (declarator)) { - /* Handle C++17 deduction guides. */ + /* Handle C++17 deduction guides. Note that class-scope +non-template deduction guides are handled in +cp_parser_member_declaration. */ if (!decl_specifiers->type && !decl_specifiers->any_type_specifiers_p && ctor_dtor_or_conv_p <= 0 && cxx_dialect >= cxx17) - { - cp_declarator *id = get_id_declarator (declarator); - tree name = id->u.id.unqualified_name; - parser->scope = id->u.id.qualifying_scope; - tree tmpl = cp_parser_lookup_name_simple (parser, name, id->id_loc); - if (tmpl - && (DECL_CLASS_TEMPLATE_P (tmpl) - || DECL_TEMPLATE_TEMPLATE_PARM_P (tmpl))) - { - id->u.id.unqualified_name = dguide_name (tmpl); - id->u.id.sfk = sfk_deduction_guide; - ctor_dtor_or_conv_p = 1; - } - } + if (cp_parser_maybe_adjust_declarator_for_dguide (parser, + declarator)) + ctor_dtor_or_conv_p = 1; if (!member_p && !cp_parser_error_occurred (parser)) warn_about_ambiguous_parse (decl_specifiers, declarator); @@ -26719,6 +26734,15 @@ cp_parser_member_declaration (cp_parser* parser) cp_lexer_consume_token (parser->lexer); goto out; } + /* Handle class-scope non-template C++17 deduction guides. */ + if (function_declarator_p (declarator) + && !decl_specifiers.type + && !decl_specifiers.any_type_specifiers_p + && ctor_dtor_or_conv_p <= 0 + && cxx_dialect >= cxx17) Looks like you could factor more of the tests into the new function. That works nicely, like so? Bootstrap and regtesting in progress. OK. -- >8 -- Subject: [PATCH] c++: recognize class-scope non-template dguides [PR79501] It looks like we still don't recognize class-scope non-template deduction guides even after r12-2260. This is because deduction guides are tagged as such in cp_parser_init_declarator after calling cp_parser_declarator, but in cp_parser_member_declaration we call cp_parser_declarator directly. This patch makes us handle deduction guides in cp_parser_member_declaration as well. PR c++/79501 gcc/cp/ChangeLog: * parser.c (maybe_adjust_declarator_for_dguide): New, split out from ...
Re: [PATCH] c++: most vexing parse and braced CTAD [PR89062]
On 8/9/21 1:15 PM, Patrick Palka wrote: Here grokdeclarator is emitting the error error: class template placeholder ‘Foo’ not permitted in this context during the tentative (and ultimately futile) parse of 'x' as a function declaration. This happens because when parsing 'Foo{1}', cp_parser_parameter_declaration yields an parameter declaration with no declarator and whose type is a CTAD placeholder, and stops short of consuming the '{'. The caller cp_parser_parameter_declaration_list then calls grokdeclarator on this declarator, hence the error, and soon thereafter we abort this tentative parse since the next token '{' doesn't make sense in the context of a parameter list. Note that we don't have this issue when using only parentheses Foo x(Foo(1)); because in this case cp_parser_direct_declarator (called indirectly from c_p_p_d) instead consumes the '(' and returns a cp_error_declarator rather than a NULL declarator (and also simulates a parse error), and grokdeclarator exits early for this declarator without emitting any error. Since grokdeclarator doesn't take a 'complain' parameter, to fix this we need to avoid calling grokdeclarator in this situation. To that end this patch makes c_p_p_d simulate an error when a construct is a CTAD expression and definitely not a parameter declaration, so that c_p_p_d_l can avoid calling grokdeclarator by checking if an error has been simulated. Alternatively we could keep all this logic inside c_p_p_d_l and not touch c_p_p_d at all, but this approach seems slightly less adhoc. On the other hand, it seems weird that cp_parser_direct_declarator (with flags=CP_PARSER_DECLARATOR_EITHER) returns cp_error_declarator for 'Foo(1)' (and consumes the '(') but NULL for 'Foo{1}' (and doesn't consume the '{'), and perhaps this issue could fixed by returning cp_error_declarator in the latter case as well, but I didn't try this approach. From the comment, this seems to be because ObjC++ allows { at the end of the parameter-declaration-list. No idea what that would mean. Bootstrapped and regtested on x86_64-pc-linux-gnu, does this look OK for trunk? OK. PR c++/89062 gcc/cp/ChangeLog: * parser.c (cp_parser_parameter_declaration_list): Don't call grokdeclarator if cp_parser_error_occurred. (cp_parser_parameter_declaration): Simulate an error if gcc/testsuite/ChangeLog: * g++.dg/cpp1z/class-deduction97.C: New test. --- gcc/cp/parser.c| 17 + gcc/testsuite/g++.dg/cpp1z/class-deduction97.C | 6 ++ 2 files changed, 19 insertions(+), 4 deletions(-) create mode 100644 gcc/testsuite/g++.dg/cpp1z/class-deduction97.C diff --git a/gcc/cp/parser.c b/gcc/cp/parser.c index 8b551db2c8a..d4da25ca703 100644 --- a/gcc/cp/parser.c +++ b/gcc/cp/parser.c @@ -24061,7 +24061,7 @@ cp_parser_parameter_declaration_list (cp_parser* parser, cp_parser_flags flags) and warn in grokparms if appropriate. */ deprecated_state = DEPRECATED_SUPPRESS; - if (parameter) + if (parameter && !cp_parser_error_occurred (parser)) { decl = grokdeclarator (parameter->declarator, >decl_specifiers, @@ -24276,7 +24276,7 @@ cp_parser_parameter_declaration (cp_parser *parser, parser->default_arg_ok_p = false; /* After seeing a decl-specifier-seq, if the next token is not a -"(", there is no possibility that the code is a valid +"(" or "{", there is no possibility that the code is a valid expression. Therefore, if parsing tentatively, we commit at this point. */ if (!parser->in_template_argument_list_p @@ -24289,9 +24289,18 @@ cp_parser_parameter_declaration (cp_parser *parser, of some object of type "char" to "int". */ && !parser->in_type_id_in_expr_p && cp_parser_uncommitted_to_tentative_parse_p (parser) - && cp_lexer_next_token_is_not (parser->lexer, CPP_OPEN_BRACE) && cp_lexer_next_token_is_not (parser->lexer, CPP_OPEN_PAREN)) - cp_parser_commit_to_tentative_parse (parser); + { + if (cp_lexer_next_token_is (parser->lexer, CPP_OPEN_BRACE)) + { + if (decl_specifiers.type + && template_placeholder_p (decl_specifiers.type)) + /* This is a CTAD expression, not a parameter declaration. */ + cp_parser_simulate_error (parser); > + } + else + cp_parser_commit_to_tentative_parse (parser); + } /* Parse the declarator. */ declarator_token_start = token; declarator = cp_parser_declarator (parser, diff --git a/gcc/testsuite/g++.dg/cpp1z/class-deduction97.C b/gcc/testsuite/g++.dg/cpp1z/class-deduction97.C new file mode 100644 index 000..32818681d8f --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp1z/class-deduction97.C @@ -0,0 +1,6 @@ +// PR c++/89062 +// { dg-do
Re: [PATCH] c++: recognize class-scope non-template dguides [PR79501]
On Wed, 11 Aug 2021, Jason Merrill wrote: > On 8/9/21 1:16 PM, Patrick Palka wrote: > > It looks like we still don't recognize class-scope non-template > > deduction guides even after r12-2260. This is because deduction guides > > are handled in cp_parser_init_declarator after calling > > cp_parser_declarator, but in the class-scope non-template case we call > > cp_parser_declarator directly from cp_parser_member_declaration. > > > > This patch makes us handle deduction guides in cp_parser_member_declaration > > as well. > > > > Bootstrapped and regtested on x86_64-pc-linux-gnu, does this look OK for > > trunk? > > > > PR c++/79501 > > > > gcc/cp/ChangeLog: > > > > * parser.c (cp_parser_maybe_adjust_declarator_for_dguide): New, > > split out from ... > > (cp_parser_init_declarator): ... here. > > (cp_parser_member_declaration): Use it. > > > > gcc/testsuite/ChangeLog: > > > > * g++.dg/cpp1z/class-deduction98.C: New test. > > --- > > gcc/cp/parser.c | 54 +-- > > .../g++.dg/cpp1z/class-deduction98.C | 10 > > 2 files changed, 49 insertions(+), 15 deletions(-) > > create mode 100644 gcc/testsuite/g++.dg/cpp1z/class-deduction98.C > > > > diff --git a/gcc/cp/parser.c b/gcc/cp/parser.c > > index d4da25ca703..04fdeec32ab 100644 > > --- a/gcc/cp/parser.c > > +++ b/gcc/cp/parser.c > > @@ -21869,6 +21869,30 @@ warn_about_ambiguous_parse (const > > cp_decl_specifier_seq *decl_specifiers, > > } > > } > > +/* If the function declarator DECLARATOR names a class template, adjust > > + it to name its deduction guides and return true. Otherwise return > > false. */ > > + > > +static bool > > +cp_parser_maybe_adjust_declarator_for_dguide (cp_parser *parser, > > + cp_declarator *declarator) > > +{ > > + gcc_assert (function_declarator_p (declarator)); > > + > > + cp_declarator *id = get_id_declarator (declarator); > > + tree name = id->u.id.unqualified_name; > > + parser->scope = id->u.id.qualifying_scope; > > + tree tmpl = cp_parser_lookup_name_simple (parser, name, id->id_loc); > > + if (tmpl > > + && (DECL_CLASS_TEMPLATE_P (tmpl) > > + || DECL_TEMPLATE_TEMPLATE_PARM_P (tmpl))) > > +{ > > + id->u.id.unqualified_name = dguide_name (tmpl); > > + id->u.id.sfk = sfk_deduction_guide; > > + return true; > > +} > > + return false; > > +} > > + > > /* Declarators [gram.dcl.decl] */ > > /* Parse an init-declarator. > > @@ -22045,25 +22069,16 @@ cp_parser_init_declarator (cp_parser* parser, > > if (function_declarator_p (declarator)) > > { > > - /* Handle C++17 deduction guides. */ > > + /* Handle C++17 deduction guides. Note that class-scope > > +non-template deduction guides are handled in > > +cp_parser_member_declaration. */ > > if (!decl_specifiers->type > > && !decl_specifiers->any_type_specifiers_p > > && ctor_dtor_or_conv_p <= 0 > > && cxx_dialect >= cxx17) > > - { > > - cp_declarator *id = get_id_declarator (declarator); > > - tree name = id->u.id.unqualified_name; > > - parser->scope = id->u.id.qualifying_scope; > > - tree tmpl = cp_parser_lookup_name_simple (parser, name, id->id_loc); > > - if (tmpl > > - && (DECL_CLASS_TEMPLATE_P (tmpl) > > - || DECL_TEMPLATE_TEMPLATE_PARM_P (tmpl))) > > - { > > - id->u.id.unqualified_name = dguide_name (tmpl); > > - id->u.id.sfk = sfk_deduction_guide; > > - ctor_dtor_or_conv_p = 1; > > - } > > - } > > + if (cp_parser_maybe_adjust_declarator_for_dguide (parser, > > + declarator)) > > + ctor_dtor_or_conv_p = 1; > > if (!member_p && !cp_parser_error_occurred (parser)) > > warn_about_ambiguous_parse (decl_specifiers, declarator); > > @@ -26719,6 +26734,15 @@ cp_parser_member_declaration (cp_parser* parser) > > cp_lexer_consume_token (parser->lexer); > > goto out; > > } > > + /* Handle class-scope non-template C++17 deduction guides. */ > > + if (function_declarator_p (declarator) > > + && !decl_specifiers.type > > + && !decl_specifiers.any_type_specifiers_p > > + && ctor_dtor_or_conv_p <= 0 > > + && cxx_dialect >= cxx17) > > Looks like you could factor more of the tests into the new function. That works nicely, like so? Bootstrap and regtesting in progress. -- >8 -- Subject: [PATCH] c++: recognize class-scope non-template dguides [PR79501] It looks like we still don't recognize class-scope non-template deduction guides even after r12-2260. This is because deduction guides are tagged as such in cp_parser_init_declarator after calling cp_parser_declarator, but in cp_parser_member_declaration we call cp_parser_declarator directly. This patch makes us handle deduction guides
Re: [PATCH] c++: Fix ICE on defaulted spaceship with pointer return type [PR94162]
On 8/11/21 10:01 AM, Jakub Jelinek wrote: On Wed, Aug 11, 2021 at 09:42:56AM -0400, Jason Merrill wrote: Checking CLASS_TYPE_P && decl_in_std_namespace (TYPE_MAIN_DECL) before looking at the name makes sense to me. CLASS_TYPE_P is cheap, but isn't decl_in_std_namespace, especially when it needs to walk inline namespaces, better done only if we get a match, so like below? Though I can do it even in the first if if you think it is better... Let's do it first; no need to micro-optimize this function. OK with that change. 2021-08-11 Jakub Jelinek gcc/cp/ PR c++/94162 * method.c (cat_tag_for): Return cc_last for !CLASS_TYPE_P or for classes not in std namespace. gcc/testsuite/ PR c++/99429 * g++.dg/cpp2a/spaceship-synth11.C: New test. PR c++/94162 * g++.dg/cpp2a/spaceship-synth-neg6.C: New test. --- gcc/cp/method.c.jj 2021-08-09 15:03:00.923206463 +0200 +++ gcc/cp/method.c 2021-08-11 15:52:27.157437691 +0200 @@ -1029,10 +1029,13 @@ is_cat (tree type, comp_cat_tag tag) static comp_cat_tag cat_tag_for (tree type) { + if (!CLASS_TYPE_P (type)) +return cc_last; for (int i = 0; i < cc_last; ++i) { comp_cat_tag tag = (comp_cat_tag)i; - if (is_cat (type, tag)) + if (is_cat (type, tag) + && decl_in_std_namespace_p (TYPE_MAIN_DECL (type))) return tag; } return cc_last; --- gcc/testsuite/g++.dg/cpp2a/spaceship-synth11.C.jj 2021-08-11 15:49:05.267204333 +0200 +++ gcc/testsuite/g++.dg/cpp2a/spaceship-synth11.C 2021-08-11 15:49:05.267204333 +0200 @@ -0,0 +1,29 @@ +// PR c++/99429 +// { dg-do compile { target c++20 } } + +namespace std { +struct strong_ordering { + int _v; + constexpr strong_ordering (int v) :_v(v) {} + constexpr operator int (void) const { return _v; } + static const strong_ordering less; + static const strong_ordering equal; + static const strong_ordering greater; +}; +constexpr strong_ordering strong_ordering::less = -1; +constexpr strong_ordering strong_ordering::equal = 0; +constexpr strong_ordering strong_ordering::greater = 1; +} + +template +struct duration { + static constexpr const long period = N; + constexpr duration (void) = default; + constexpr duration (const duration& d) = default; + constexpr bool operator== (const duration& d) const = default; + constexpr bool operator<=> (const duration& d) const = default; + long _d; +}; + +using nanoseconds = duration<1>; +using microseconds = duration; --- gcc/testsuite/g++.dg/cpp2a/spaceship-synth-neg6.C.jj2021-08-11 15:49:05.268204320 +0200 +++ gcc/testsuite/g++.dg/cpp2a/spaceship-synth-neg6.C 2021-08-11 15:49:05.268204320 +0200 @@ -0,0 +1,11 @@ +// PR c++/94162 +// { dg-do compile { target c++20 } } + +#include + +struct S { + int a; // { dg-error "three-way comparison of 'S::a' has type 'std::strong_ordering', which does not convert to 'int\\*'" } + int *operator<=>(const S&) const = default; +}; + +bool b = S{} < S{}; // { dg-error "use of deleted function 'constexpr int\\* S::operator<=>\\\(const S&\\\) const'" } Jakub
Re: [PATCH] c++: Improve memory usage of subsumption [PR100828]
On 8/11/21 10:53 AM, Patrick Palka wrote: On Wed, 11 Aug 2021, Jason Merrill wrote: On 8/9/21 5:07 PM, Patrick Palka wrote: On Wed, Jul 28, 2021 at 4:42 PM Jason Merrill wrote: On 7/19/21 6:05 PM, Patrick Palka wrote: Constraint subsumption is implemented in two steps. The first step computes the disjunctive (or conjunctive) normal form of one of the constraints, and the second step verifies that each clause in the decomposed form implies the other constraint. Performing these two steps separately is problematic because in the first step the disjunctive normal form can be exponentially larger than the original constraint, and by computing it ahead of time we'd have to keep all of it in memory. This patch fixes this exponential blowup in memory usage by interleaving these two steps, so that as soon as we decompose one clause we check implication for it. In turn, memory usage during subsumption is now worst case linear in the size of the constraints rather than exponential, and so we can safely remove the hard limit of 16 clauses without introducing runaway memory usage on some inputs. (Note the _time_ complexity of subsumption is still exponential in the worst case.) In order for this to work we need formula::branch to prepend the copy of the current clause directly after the current clause rather than at the end of the list, so that we fully decompose a clause shortly after creating it. Otherwise we'd end up accumulating exponentially many (partially decomposed) clauses in memory anyway. Bootstrapped and regtested on x86_64-pc-linux-gnu, and also tested on range-v3 and cmcstl2. Does this look OK for trunk and perhaps 11? OK for trunk. Thanks a lot, patch committed to trunk as r12-2658. Since this low complexity limit was introduced in GCC 10, what do you think about increasing the limit from 16 to say 128 in the 10/11 release branches as a relatively safe stopgap? Now that 11.2 is out, go ahead and apply this patch to the 11 branch. Ah great, will do. Won't a limit of 128 in GCC 10 lead to extremely long compile times for affected code? Is that more desirable than an error? Potentially, though I think that'd be the case only if the original (normalized) constraint is huge to begin with. The comment for max_problem_size says /* The largest number of clauses in CNF or DNF we accept as input for subsumption. This an upper bound of 2^16 expressions. */ static int max_problem_size = 16; which implies increasing it to 128 would allow for at most 2^128 expressions (clearly unacceptable), but I'm not sure how this upper bound was obtained. FWIW I think another upper bound for the number of expressions in the CNF/DNF is roughly 'max_problem_size * size_of_original_constraint', since we allow at most 'max_problem_size' clauses in the decomposed form and each clause is definitely no larger than the original constraint. So according to this upper bound the dependence on max_problem_size as it relates to worst-case compile time/memory usage of subsumption is linear rather than exponential, contrary to the comment. In that case increasing the limit from 16 to 128 doesn't seem to be too bad. Fair, though I would expect anyone writing new concepts code to use GCC 11. Up to you. PR c++/100828 gcc/cp/ChangeLog: * logic.cc (formula::formula): Use emplace_back. (formula::branch): Insert a copy of m_current in front of m_current instead of at the end of the list. (formula::erase): Define. (decompose_formula): Remove. (decompose_antecedents): Remove. (decompose_consequents): Remove. (derive_proofs): Remove. (max_problem_size): Remove. (diagnose_constraint_size): Remove. (subsumes_constraints_nonnull): Rewrite directly in terms of decompose_clause and derive_proof, interleaving decomposition with implication checking. Use formula::erase to free the current clause before moving on to the next one. --- gcc/cp/logic.cc | 118 ++-- 1 file changed, 35 insertions(+), 83 deletions(-) diff --git a/gcc/cp/logic.cc b/gcc/cp/logic.cc index 142457e408a..3f872c11fe2 100644 --- a/gcc/cp/logic.cc +++ b/gcc/cp/logic.cc @@ -223,9 +223,7 @@ struct formula formula (tree t) { -/* This should call emplace_back(). There's an extra copy being - invoked by using push_back(). */ -m_clauses.push_back (t); +m_clauses.emplace_back (t); m_current = m_clauses.begin (); } @@ -248,8 +246,7 @@ struct formula clause& branch () { gcc_assert (!done ()); -m_clauses.push_back (*m_current); -return m_clauses.back (); +return *m_clauses.insert (std::next (m_current), *m_current); } /* Returns the position of the current clause. */ @@ -287,6 +284,14 @@ struct formula return m_clauses.end (); } + /* Remove the specified clause. */ + +
Re: [PATCH] c++: recognize class-scope non-template dguides [PR79501]
On 8/9/21 1:16 PM, Patrick Palka wrote: It looks like we still don't recognize class-scope non-template deduction guides even after r12-2260. This is because deduction guides are handled in cp_parser_init_declarator after calling cp_parser_declarator, but in the class-scope non-template case we call cp_parser_declarator directly from cp_parser_member_declaration. This patch makes us handle deduction guides in cp_parser_member_declaration as well. Bootstrapped and regtested on x86_64-pc-linux-gnu, does this look OK for trunk? PR c++/79501 gcc/cp/ChangeLog: * parser.c (cp_parser_maybe_adjust_declarator_for_dguide): New, split out from ... (cp_parser_init_declarator): ... here. (cp_parser_member_declaration): Use it. gcc/testsuite/ChangeLog: * g++.dg/cpp1z/class-deduction98.C: New test. --- gcc/cp/parser.c | 54 +-- .../g++.dg/cpp1z/class-deduction98.C | 10 2 files changed, 49 insertions(+), 15 deletions(-) create mode 100644 gcc/testsuite/g++.dg/cpp1z/class-deduction98.C diff --git a/gcc/cp/parser.c b/gcc/cp/parser.c index d4da25ca703..04fdeec32ab 100644 --- a/gcc/cp/parser.c +++ b/gcc/cp/parser.c @@ -21869,6 +21869,30 @@ warn_about_ambiguous_parse (const cp_decl_specifier_seq *decl_specifiers, } } +/* If the function declarator DECLARATOR names a class template, adjust + it to name its deduction guides and return true. Otherwise return false. */ + +static bool +cp_parser_maybe_adjust_declarator_for_dguide (cp_parser *parser, + cp_declarator *declarator) +{ + gcc_assert (function_declarator_p (declarator)); + + cp_declarator *id = get_id_declarator (declarator); + tree name = id->u.id.unqualified_name; + parser->scope = id->u.id.qualifying_scope; + tree tmpl = cp_parser_lookup_name_simple (parser, name, id->id_loc); + if (tmpl + && (DECL_CLASS_TEMPLATE_P (tmpl) + || DECL_TEMPLATE_TEMPLATE_PARM_P (tmpl))) +{ + id->u.id.unqualified_name = dguide_name (tmpl); + id->u.id.sfk = sfk_deduction_guide; + return true; +} + return false; +} + /* Declarators [gram.dcl.decl] */ /* Parse an init-declarator. @@ -22045,25 +22069,16 @@ cp_parser_init_declarator (cp_parser* parser, if (function_declarator_p (declarator)) { - /* Handle C++17 deduction guides. */ + /* Handle C++17 deduction guides. Note that class-scope +non-template deduction guides are handled in +cp_parser_member_declaration. */ if (!decl_specifiers->type && !decl_specifiers->any_type_specifiers_p && ctor_dtor_or_conv_p <= 0 && cxx_dialect >= cxx17) - { - cp_declarator *id = get_id_declarator (declarator); - tree name = id->u.id.unqualified_name; - parser->scope = id->u.id.qualifying_scope; - tree tmpl = cp_parser_lookup_name_simple (parser, name, id->id_loc); - if (tmpl - && (DECL_CLASS_TEMPLATE_P (tmpl) - || DECL_TEMPLATE_TEMPLATE_PARM_P (tmpl))) - { - id->u.id.unqualified_name = dguide_name (tmpl); - id->u.id.sfk = sfk_deduction_guide; - ctor_dtor_or_conv_p = 1; - } - } + if (cp_parser_maybe_adjust_declarator_for_dguide (parser, + declarator)) + ctor_dtor_or_conv_p = 1; if (!member_p && !cp_parser_error_occurred (parser)) warn_about_ambiguous_parse (decl_specifiers, declarator); @@ -26719,6 +26734,15 @@ cp_parser_member_declaration (cp_parser* parser) cp_lexer_consume_token (parser->lexer); goto out; } + /* Handle class-scope non-template C++17 deduction guides. */ + if (function_declarator_p (declarator) + && !decl_specifiers.type + && !decl_specifiers.any_type_specifiers_p + && ctor_dtor_or_conv_p <= 0 + && cxx_dialect >= cxx17) Looks like you could factor more of the tests into the new function. + if (cp_parser_maybe_adjust_declarator_for_dguide (parser, + declarator)) + ctor_dtor_or_conv_p = 1; if (declares_class_or_enum & 2) cp_parser_check_for_definition_in_return_type diff --git a/gcc/testsuite/g++.dg/cpp1z/class-deduction98.C b/gcc/testsuite/g++.dg/cpp1z/class-deduction98.C new file mode 100644 index 000..bee0ce433ee --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp1z/class-deduction98.C @@ -0,0 +1,10 @@ +// PR c++/79501 +// { dg-do compile { target c++17 } } + +template +struct A { + template struct B { template B(V); }; + B(T) -> B; +}; + +A::B b(0);
[PATCH 8/8] OpenMP 5.0: [WIP, RFC] Clause ordering for OpenMP 5.0 (topological sorting by base pointer)
This patch reimplements the omp_target_reorder_clauses function in anticipation of supporting "deeper" struct mappings (that is, with several structure dereference operators, or similar). The idea is that in place of the (possibly quadratic) algorithm in omp_target_reorder_clauses that greedily moves clauses containing addresses that are subexpressions of other addresses before those other addresses, we employ a topological sort algorithm to calculate a proper order for map clauses. This should run in linear time, and hopefully handles degenerate cases where multiple "levels" of indirect accesses are present on a given directive. The new method also takes care to keep clause groups together, addressing the concerns raised in: https://gcc.gnu.org/pipermail/gcc-patches/2021-May/570501.html To figure out if some given clause depends on a base pointer in another clause, we strip off the outer layers of the address expression, and check (via a tree_operand_hash hash table we have built) if the result is a "base pointer" as defined in OpenMP 5.0 (1.2.6 Data Terminology). There are some subtleties involved, however: - We must treat MEM_REF with zero offset the same as INDIRECT_REF. This should probably be fixed in the front ends instead so we always use a canonical form (probably INDIRECT_REF). The following patch shows one instance of the problem, but there may be others: https://gcc.gnu.org/pipermail/gcc-patches/2021-May/571382.html - Mapping a whole struct implies mapping each of that struct's elements, which may be base pointers. Because those base pointers aren't necessarily explicitly referenced in the directive in question, we treat the whole-struct mapping as a dependency instead. - We also need to special-case handling for "*struct_var" (including "*this"), so the un-dereferenced variable is treated as the dependency -- which feels a bit wrong. (A subsequent GOMP_MAP_POINTER handles the pointer itself for those types of mapping, but the current approach only processes the first node in each group.) Jakub, Chung-Lin -- does this approach seem reasonable? Any comments at this stage? 2021-08-10 Julian Brown gcc/ * gimplify.c (is_or_contains_p, omp_target_reorder_clauses): Delete function. (omp_tsort_mark): Add enum. (omp_mapping_group): Add struct. (omp_get_base_pointer, omp_gather_mapping_groups, omp_index_mapping_groups, omp_tsort_mapping_groups_1, omp_tsort_mapping_groups, omp_segregate_mapping_groups, omp_reorder_mapping_groups): New functions. (gimplify_scan_omp_clauses): Call above functions instead of omp_target_reorder_clauses. gcc/testsuite/ * g++.dg/gomp/target-this-3.C: Adjust expected output. * g++.dg/gomp/target-this-4.C: Likewise. --- gcc/gimplify.c| 598 +++--- gcc/testsuite/g++.dg/gomp/target-this-3.C | 2 +- gcc/testsuite/g++.dg/gomp/target-this-4.C | 2 +- 3 files changed, 411 insertions(+), 191 deletions(-) diff --git a/gcc/gimplify.c b/gcc/gimplify.c index 0ef2dbde710..ca106ef7acf 100644 --- a/gcc/gimplify.c +++ b/gcc/gimplify.c @@ -8534,29 +8534,6 @@ extract_base_bit_offset (tree base, tree *base_ind, tree *base_ref, return base; } -/* Returns true if EXPR is or contains (as a sub-component) BASE_PTR. */ - -static bool -is_or_contains_p (tree expr, tree base_ptr) -{ - if ((TREE_CODE (expr) == INDIRECT_REF && TREE_CODE (base_ptr) == MEM_REF) - || (TREE_CODE (expr) == MEM_REF && TREE_CODE (base_ptr) == INDIRECT_REF)) -return operand_equal_p (TREE_OPERAND (expr, 0), - TREE_OPERAND (base_ptr, 0)); - while (!operand_equal_p (expr, base_ptr)) -{ - if (TREE_CODE (base_ptr) == COMPOUND_EXPR) - base_ptr = TREE_OPERAND (base_ptr, 1); - if (TREE_CODE (base_ptr) == COMPONENT_REF - || TREE_CODE (base_ptr) == POINTER_PLUS_EXPR - || TREE_CODE (base_ptr) == SAVE_EXPR) - base_ptr = TREE_OPERAND (base_ptr, 0); - else - break; -} - return operand_equal_p (expr, base_ptr); -} - /* Remove COMPONENT_REFS and indirections from EXPR. */ static tree @@ -8599,184 +8576,413 @@ aggregate_base_p (tree expr) return false; } -/* Implement OpenMP 5.x map ordering rules for target directives. There are - several rules, and with some level of ambiguity, hopefully we can at least - collect the complexity here in one place. */ +enum omp_tsort_mark { + UNVISITED, + TEMPORARY, + PERMANENT +}; + +struct omp_mapping_group { + tree *grp_start; + tree grp_end; + omp_tsort_mark mark; + struct omp_mapping_group *sibling; + struct omp_mapping_group *next; +}; + +/* Return the OpenMP "base pointer" of an expression EXPR, or NULL if there + isn't one. This needs improvement. */ + +static tree +omp_get_base_pointer (tree expr) +{ + while (TREE_CODE (expr) == COMPONENT_REF +&& (DECL_P (TREE_OPERAND
[PATCH 7/8] OpenACC: Rework indirect struct handling in gimplify.c
(Previously posted here: https://gcc.gnu.org/pipermail/gcc-patches/2021-May/570400.html) This patch reworks indirect struct handling in gimplify.c (i.e. for struct components mapped with "mystruct->a[0:n]", "mystruct->b", etc.), for OpenACC. The key observation leading to these changes was that component mappings of references-to-structures is already implemented and working, and indirect struct component handling via a pointer can work quite similarly. That lets us remove some earlier, special-case handling for mapping indirect struct component accesses for OpenACC, which required the pointed-to struct to be manually mapped before the indirect component mapping. With this patch, you can map struct components directly (e.g. an array slice "mystruct->a[0:n]") just like you can map a non-indirect struct component slice ("mystruct.a[0:n]"). Both references-to-pointers (with the former syntax) and references to structs (with the latter syntax) work now. For Fortran class pointers, we no longer re-use GOMP_MAP_TO_PSET for the class metadata (the structure that points to the class data and vptr) -- it is instead treated as any other struct. For C++, the struct handling also works for class members ("this->foo"), without having to explicitly map "this[:1]" first. For OpenACC, we permit chained indirect component references ("mystruct->a->b[0:n]"), though only the last part of such mappings will trigger an attach/detach operation. To properly use such a construct on the target, you must still manually map "mystruct->a[:1]" first -- but there's no need to map "mystruct[:1]" explicitly before that. This version of the patch avoids altering code paths for OpenMP, where possible. 2021-06-02 Julian Brown gcc/fortran/ * trans-openmp.c (gfc_trans_omp_clauses): Don't create GOMP_MAP_TO_PSET mappings for class metadata, nor GOMP_MAP_POINTER mappings for POINTER_TYPE_P decls. gcc/ * gimplify.c (extract_base_bit_offset): Add BASE_IND and OPENMP parameters. Handle pointer-typed indirect references for OpenACC alongside reference-typed ones. (strip_components_and_deref, aggregate_base_p): New functions. (build_struct_group): Add pointer type indirect ref handling, including chained references, for OpenACC. Also handle references to structs for OpenACC. Conditionalise bits for OpenMP only where appropriate. (gimplify_scan_omp_clauses): Rework pointer-type indirect structure access handling to work more like the reference-typed handling for OpenACC only. * omp-low.c (scan_sharing_clauses): Handle pointer-type indirect struct references, and references to pointers to structs also. gcc/testsuite/ * g++.dg/goacc/member-array-acc.C: New test. * g++.dg/gomp/member-array-omp.C: New test. libgomp/ * testsuite/libgomp.oacc-c-c++-common/deep-copy-15.c: New test. * testsuite/libgomp.oacc-c-c++-common/deep-copy-16.c: New test. * testsuite/libgomp.oacc-c++/deep-copy-17.C: New test. --- gcc/fortran/trans-openmp.c| 20 +- gcc/gimplify.c| 214 +--- gcc/omp-low.c | 16 +- gcc/testsuite/g++.dg/goacc/member-array-acc.C | 13 + gcc/testsuite/g++.dg/gomp/member-array-omp.C | 13 + .../testsuite/libgomp.oacc-c++/deep-copy-17.C | 101 .../libgomp.oacc-c-c++-common/deep-copy-15.c | 68 ++ .../libgomp.oacc-c-c++-common/deep-copy-16.c | 231 ++ 8 files changed, 618 insertions(+), 58 deletions(-) create mode 100644 gcc/testsuite/g++.dg/goacc/member-array-acc.C create mode 100644 gcc/testsuite/g++.dg/gomp/member-array-omp.C create mode 100644 libgomp/testsuite/libgomp.oacc-c++/deep-copy-17.C create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-15.c create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-16.c diff --git a/gcc/fortran/trans-openmp.c b/gcc/fortran/trans-openmp.c index 9dc2b6fc6a5..ff2058e9571 100644 --- a/gcc/fortran/trans-openmp.c +++ b/gcc/fortran/trans-openmp.c @@ -3032,30 +3032,16 @@ gfc_trans_omp_clauses (stmtblock_t *block, gfc_omp_clauses *clauses, tree present = gfc_omp_check_optional_argument (decl, true); if (openacc && n->sym->ts.type == BT_CLASS) { - tree type = TREE_TYPE (decl); if (n->sym->attr.optional) sorry ("optional class parameter"); - if (POINTER_TYPE_P (type)) - { - node4 = build_omp_clause (input_location, - OMP_CLAUSE_MAP); - OMP_CLAUSE_SET_MAP_KIND (node4, GOMP_MAP_POINTER); - OMP_CLAUSE_DECL (node4) = decl; - OMP_CLAUSE_SIZE (node4) = size_int (0); -
[PATCH 6/8] OpenACC/OpenMP: Refactor struct lowering in gimplify.c
(Previously submitted here: https://gcc.gnu.org/pipermail/gcc-patches/2021-May/570398.html) This patch is a second attempt at refactoring struct component mapping handling for OpenACC/OpenMP during gimplification, after the patch I posted here: https://gcc.gnu.org/pipermail/gcc-patches/2018-November/510503.html And improved here, post-review: https://gcc.gnu.org/pipermail/gcc-patches/2019-November/533394.html This patch goes further, in that the struct-handling code is outlined into its own function (to create the "GOMP_MAP_STRUCT" node and the sorted list of nodes immediately following it, from a set of mappings of components of a given struct or derived type). I've also gone through the list-handling code and attempted to add comments documenting how it works to the best of my understanding, and broken out a couple of helper functions in order to (hopefully) have the code self-document better also. 2021-06-02 Julian Brown gcc/ * gimplify.c (insert_struct_comp_map): Refactor function into... (build_struct_comp_nodes): This new function. Remove list handling and improve self-documentation. (insert_node_after, move_node_after, move_nodes_after, move_concat_nodes_after): New helper functions. (build_struct_group): New function to build up GOMP_MAP_STRUCT node groups to map struct components. Outlined from... (gimplify_scan_omp_clauses): Here. Call above function. --- gcc/gimplify.c | 975 +++-- 1 file changed, 611 insertions(+), 364 deletions(-) diff --git a/gcc/gimplify.c b/gcc/gimplify.c index 974d25b2d05..8558dda079f 100644 --- a/gcc/gimplify.c +++ b/gcc/gimplify.c @@ -8403,73 +8403,66 @@ gimplify_omp_depend (tree *list_p, gimple_seq *pre_p) return 1; } -/* Insert a GOMP_MAP_ALLOC or GOMP_MAP_RELEASE node following a - GOMP_MAP_STRUCT mapping. C is an always_pointer mapping. STRUCT_NODE is - the struct node to insert the new mapping after (when the struct node is - initially created). PREV_NODE is the first of two or three mappings for a - pointer, and is either: - - the node before C, when a pair of mappings is used, e.g. for a C/C++ - array section. - - not the node before C. This is true when we have a reference-to-pointer - type (with a mapping for the reference and for the pointer), or for - Fortran derived-type mappings with a GOMP_MAP_TO_PSET. - If SCP is non-null, the new node is inserted before *SCP. - if SCP is null, the new node is inserted before PREV_NODE. - The return type is: - - PREV_NODE, if SCP is non-null. - - The newly-created ALLOC or RELEASE node, if SCP is null. - - The second newly-created ALLOC or RELEASE node, if we are mapping a - reference to a pointer. */ +/* For a set of mappings describing an array section pointed to by a struct + (or derived type, etc.) component, create an "alloc" or "release" node to + insert into a list following a GOMP_MAP_STRUCT node. For some types of + mapping (e.g. Fortran arrays with descriptors), an additional mapping may + be created that is inserted into the list of mapping nodes attached to the + directive being processed -- not part of the sorted list of nodes after + GOMP_MAP_STRUCT. + + CODE is the code of the directive being processed. GRP_START and GRP_END + are the first and last of two or three nodes representing this array section + mapping (e.g. a data movement node like GOMP_MAP_{TO,FROM}, optionally a + GOMP_MAP_TO_PSET, and finally a GOMP_MAP_ALWAYS_POINTER). EXTRA_NODE is + filled with the additional node described above, if needed. + + This function does not add the new nodes to any lists itself. It is the + responsibility of the caller to do that. */ static tree -insert_struct_comp_map (enum tree_code code, tree c, tree struct_node, - tree prev_node, tree *scp) +build_struct_comp_nodes (enum tree_code code, tree grp_start, tree grp_end, +tree *extra_node) { enum gomp_map_kind mkind = (code == OMP_TARGET_EXIT_DATA || code == OACC_EXIT_DATA) ? GOMP_MAP_RELEASE : GOMP_MAP_ALLOC; - tree c2 = build_omp_clause (OMP_CLAUSE_LOCATION (c), OMP_CLAUSE_MAP); - tree cl = scp ? prev_node : c2; + gcc_assert (grp_start != grp_end); + + tree c2 = build_omp_clause (OMP_CLAUSE_LOCATION (grp_end), OMP_CLAUSE_MAP); OMP_CLAUSE_SET_MAP_KIND (c2, mkind); - OMP_CLAUSE_DECL (c2) = unshare_expr (OMP_CLAUSE_DECL (c)); - OMP_CLAUSE_CHAIN (c2) = scp ? *scp : prev_node; - if (OMP_CLAUSE_CHAIN (prev_node) != c - && OMP_CLAUSE_CODE (OMP_CLAUSE_CHAIN (prev_node)) == OMP_CLAUSE_MAP - && (OMP_CLAUSE_MAP_KIND (OMP_CLAUSE_CHAIN (prev_node)) - == GOMP_MAP_TO_PSET)) -OMP_CLAUSE_SIZE (c2) = OMP_CLAUSE_SIZE (OMP_CLAUSE_CHAIN (prev_node)); + OMP_CLAUSE_DECL (c2) = unshare_expr (OMP_CLAUSE_DECL (grp_end)); + OMP_CLAUSE_CHAIN (c2) = NULL_TREE; + tree
[PATCH 5/8] OpenMP/OpenACC: Move array_ref/indirect_ref handling code out of extract_base_bit_offset
This patch slightly cleans up the semantics of extract_base_bit_offset, in that the stripping of ARRAY_REFS/INDIRECT_REFS out of extract_base_bit_offset is moved back into the (two) call sites of the function. This is done in preparation for follow-on patches that extend the function. Previously posted for the og11 branch here (patch & reversion/rework): https://gcc.gnu.org/pipermail/gcc-patches/2021-June/571712.html https://gcc.gnu.org/pipermail/gcc-patches/2021-June/571884.html 2021-06-03 Julian Brown gcc/ * gimplify.c (extract_base_bit_offset): Don't look through ARRAY_REFs or INDIRECT_REFs here. (build_struct_group): Reinstate previous behaviour for handling ARRAY_REFs/INDIRECT_REFs. --- gcc/gimplify.c | 59 +- 1 file changed, 29 insertions(+), 30 deletions(-) diff --git a/gcc/gimplify.c b/gcc/gimplify.c index 141ef6b2b1e..974d25b2d05 100644 --- a/gcc/gimplify.c +++ b/gcc/gimplify.c @@ -8490,31 +8490,7 @@ extract_base_bit_offset (tree base, tree *base_ref, poly_int64 *bitposp, poly_offset_int poffset; if (base_ref) -{ - *base_ref = NULL_TREE; - - while (TREE_CODE (base) == ARRAY_REF) - base = TREE_OPERAND (base, 0); - - if (TREE_CODE (base) == INDIRECT_REF) - base = TREE_OPERAND (base, 0); -} - else -{ - if (TREE_CODE (base) == ARRAY_REF) - { - while (TREE_CODE (base) == ARRAY_REF) - base = TREE_OPERAND (base, 0); - if (TREE_CODE (base) != COMPONENT_REF - || TREE_CODE (TREE_TYPE (base)) != ARRAY_TYPE) - return NULL_TREE; - } - else if (TREE_CODE (base) == INDIRECT_REF - && TREE_CODE (TREE_OPERAND (base, 0)) == COMPONENT_REF - && (TREE_CODE (TREE_TYPE (TREE_OPERAND (base, 0))) - == REFERENCE_TYPE)) - base = TREE_OPERAND (base, 0); -} +*base_ref = NULL_TREE; base = get_inner_reference (base, , , , , , , ); @@ -9482,12 +9458,17 @@ gimplify_scan_omp_clauses (tree *list_p, gimple_seq *pre_p, poly_offset_int offset1; poly_int64 bitpos1; tree tree_offset1; - tree base_ref; + tree base_ref, ocd = OMP_CLAUSE_DECL (c); - tree base - = extract_base_bit_offset (OMP_CLAUSE_DECL (c), _ref, - , , - _offset1); + while (TREE_CODE (ocd) == ARRAY_REF) + ocd = TREE_OPERAND (ocd, 0); + + if (TREE_CODE (ocd) == INDIRECT_REF) + ocd = TREE_OPERAND (ocd, 0); + + tree base = extract_base_bit_offset (ocd, _ref, + , , + _offset1); bool do_map_struct = (base == decl && !tree_offset1); @@ -9679,6 +9660,24 @@ gimplify_scan_omp_clauses (tree *list_p, gimple_seq *pre_p, poly_offset_int offsetn; poly_int64 bitposn; tree tree_offsetn; + + if (TREE_CODE (sc_decl) == ARRAY_REF) + { + while (TREE_CODE (sc_decl) == ARRAY_REF) + sc_decl = TREE_OPERAND (sc_decl, 0); + if (TREE_CODE (sc_decl) != COMPONENT_REF + || (TREE_CODE (TREE_TYPE (sc_decl)) + != ARRAY_TYPE)) + break; + } + else if (TREE_CODE (sc_decl) == INDIRECT_REF +&& (TREE_CODE (TREE_OPERAND (sc_decl, 0)) +== COMPONENT_REF) +&& (TREE_CODE (TREE_TYPE + (TREE_OPERAND (sc_decl, 0))) +== REFERENCE_TYPE)) + sc_decl = TREE_OPERAND (sc_decl, 0); + tree base = extract_base_bit_offset (sc_decl, NULL, , , -- 2.29.2
[PATCH 3/8] Remove array section base-pointer mapping semantics, and other front-end adjustments (mainline trunk)
From: Chung-Lin Tang This is a version of a patch by Chung-Lin, merged to current mainline. Any errors introduced are my own! It was previously posted here: https://gcc.gnu.org/pipermail/gcc-patches/2021-May/571195.html Chung-Lin's description from the previous submission follows (edited a little for formatting). This is a version of this patch: https://gcc.gnu.org/pipermail/gcc-patches/2021-May/570075.html for mainline trunk. This patch largely implements three pieces of functionality: (1) Per discussion and clarification on the omp-lang mailing list, standards conforming behavior for mapping array sections should *NOT* also map the base-pointer, i.e for this code: struct S { int *ptr; ... }; struct S s; #pragma omp target enter data map(to: s.ptr[:100]) Currently we generate after gimplify: map(to:*_1 [len: 400]) map(attach:s.ptr [bias: 0]) which is deemed incorrect. After this patch, the gimplify results are now adjusted to: (the attach operation is still generated, and if s.ptr is already mapped prior, attachment will happen) The correct way of achieving the base-pointer-also-mapped behavior would be to use: This adjustment in behavior required a number of small adjustments here and there in gimplify, including to accomodate map sequences for C++ references. There is also a small Fortran front-end patch involved (hence CCing Tobias and fortran@). The new gimplify processing changed behavior in handling GOMP_MAP_ALWAYS_POINTER maps such that the libgomp.fortran/struct-elem-map-1.f90 regressed. It appeared that the Fortran FE was generating a GOMP_MAP_ALWAYS_POINTER for array types, which didn't seem quite correct, and the pre-patch behavior was removing this map anyways. I have a small change in trans-openmp.c:gfc_trans_omp_array_section to not generate the map in this case, and so far no bad test results. (2) The second part (though kind of related to the first above) are fixes in libgomp/target.c to not overwrite attached pointers when handling device<->host copies, mainly for the "always" case. This behavior is also noted in the 5.0 spec, but not yet properly coded before. (3) The third is a set of changes to the C/C++ front-ends to extend the allowed component access syntax in map clauses. This is actually mainly an effort to allow SPEC HPC to compile, so despite in the long term the entire map clause syntax parsing is probably going to be revamped, we're still adding this in for now. These changes are enabled for both OpenACC and OpenMP. Tested on x86_64-linux with nvptx offloading with no regressions. This patch was merged and tested atop of the prior submitted patches: (a) https://gcc.gnu.org/pipermail/gcc-patches/2021-May/570886.html "[PATCH, OpenMP 5.0] Improve OpenMP target support for C++ (includes PR92120 v3)" (b) https://gcc.gnu.org/pipermail/gcc-patches/2021-May/570365.html "[PATCH, OpenMP 5.0] Implement relaxation of implicit map vs. existing device mappings (for mainline trunk)" so you might queued this one later than those for review. Thanks, Chung-Lin 2021-05-25 Chung-Lin Tang gcc/c/ChangeLog: * c-parser.c (struct omp_dim): New struct type for use inside c_parser_omp_variable_list. (c_parser_omp_variable_list): Allow multiple levels of array and component accesses in array section base-pointer expression. (c_parser_omp_clause_to): Set 'allow_deref' to true in call to c_parser_omp_var_list_parens. (c_parser_omp_clause_from): Likewise. * c-typeck.c (handle_omp_array_sections_1): Extend allowed range of base-pointer expressions involving INDIRECT/MEM/ARRAY_REF and POINTER_PLUS_EXPR. (c_finish_omp_clauses): Extend allowed ranged of expressions involving INDIRECT/MEM/ARRAY_REF and POINTER_PLUS_EXPR. gcc/cp/ChangeLog: * parser.c (struct omp_dim): New struct type for use inside cp_parser_omp_var_list_no_open. (cp_parser_omp_var_list_no_open): Allow multiple levels of array and component accesses in array section base-pointer expression. (cp_parser_omp_all_clauses): Set 'allow_deref' to true in call to cp_parser_omp_var_list for to/from clauses. * semantics.c (handle_omp_array_sections_1): Extend allowed range of base-pointer expressions involving INDIRECT/MEM/ARRAY_REF and POINTER_PLUS_EXPR. (handle_omp_array_sections): Adjust pointer map generation of references. (finish_omp_clauses): Extend allowed ranged of expressions involving INDIRECT/MEM/ARRAY_REF and POINTER_PLUS_EXPR. gcc/fortran/ChangeLog: * trans-openmp.c (gfc_trans_omp_array_section): Do not generate GOMP_MAP_ALWAYS_POINTER map for main array maps of ARRAY_TYPE type. gcc/ChangeLog: * gimplify.c (extract_base_bit_offset): Add 'tree *offsetp' parameter, accomodate case where 'offset' return of
[PATCH 4/8] Rewrite GOMP_MAP_ATTACH_DETACH mappings unconditionally
It never makes sense for a GOMP_MAP_ATTACH_DETACH mapping to survive beyond gimplify.c, so this patch rewrites such mappings to GOMP_MAP_ATTACH or GOMP_MAP_DETACH unconditionally (rather than checking for a list of types of OpenACC or OpenMP constructs), in cases where it hasn't otherwise been done already in the preceding code. Previously posted here: https://gcc.gnu.org/pipermail/gcc-patches/2021-May/570399.html https://gcc.gnu.org/pipermail/gcc-patches/2021-June/571711.html (og11) 2021-06-02 Julian Brown gcc/ * gimplify.c (gimplify_scan_omp_clauses): Simplify condition for changing GOMP_MAP_ATTACH_DETACH to GOMP_MAP_ATTACH or GOMP_MAP_DETACH. --- gcc/gimplify.c | 10 +- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/gcc/gimplify.c b/gcc/gimplify.c index fb35d240b34..141ef6b2b1e 100644 --- a/gcc/gimplify.c +++ b/gcc/gimplify.c @@ -9773,15 +9773,7 @@ gimplify_scan_omp_clauses (tree *list_p, gimple_seq *pre_p, skip_map_struct: ; } - else if ((code == OACC_ENTER_DATA - || code == OACC_EXIT_DATA - || code == OACC_DATA - || code == OACC_PARALLEL - || code == OACC_KERNELS - || code == OACC_SERIAL - || code == OMP_TARGET_ENTER_DATA - || code == OMP_TARGET_EXIT_DATA) - && OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_ATTACH_DETACH) + else if (OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_ATTACH_DETACH) { gomp_map_kind k = ((code == OACC_EXIT_DATA || code == OMP_TARGET_EXIT_DATA) -- 2.29.2
[PATCH 1/8] Improve OpenMP target support for C++ [PR92120 v4b]
From: Chung-Lin Tang This is a version "v4b" of a patch by Chung-Lin, merged to current mainline. All errors introduced are my own! Previously posted here: https://gcc.gnu.org/pipermail/gcc-patches/2021-June/573166.html Chung-Lin's description from the last submission follows. This patch is the "v4" version of my PR92120 patch, v3 was here: https://gcc.gnu.org/pipermail/gcc-patches/2021-May/570886.html (there I listed the various patches from devel/omp/gcc-10 branch that was combined, which I won't repeat here). Basically this v4 adds fixes for lambda capture, which was already pushed to devel/omp/gcc-11 yesterday: https://gcc.gnu.org/pipermail/gcc-patches/2021-June/572988.html Thanks, Chung-Lin gcc/cp/ * cp-tree.h (finish_omp_target): New declaration. (finish_omp_target_clauses): Likewise. * parser.c (cp_parser_omp_clause_map): Adjust call to cp_parser_omp_var_list_no_open to set 'allow_deref' argument to true. (cp_parser_omp_target): Factor out code, adjust into calls to new function finish_omp_target. * pt.c (tsubst_expr): Add call to finish_omp_target_clauses for OMP_TARGET case. * semantics.c (handle_omp_array_sections_1): Add handling to create 'this->member' from 'member' FIELD_DECL. (handle_omp_array_sections): Likewise. (finish_omp_clauses): Likewise. Adjust to allow 'this[]' in OpenMP map clauses. Handle 'A->member' case in map clauses. (struct omp_target_walk_data): New struct for walking over target-directive tree body. (finish_omp_target_clauses_r): New function for tree walk. (finish_omp_target_clauses): New function. (finish_omp_target): New function. gcc/c/ * c-parser.c (c_parser_omp_clause_map): Set 'allow_deref' argument in call to c_parser_omp_variable_list to 'true'. * c-typeck.c (handle_omp_array_sections_1): Add strip of MEM_REF in array base handling. (c_finish_omp_clauses): Handle 'A->member' case in map clauses. gcc/ * gimplify.c ("tree-hash-traits.h"): Add include. (gimplify_scan_omp_clauses): Change struct_map_to_clause to type hash_map *. Adjust struct map handling to handle cases of *A and A->B expressions. Under !DECL_P case of GOMP_CLAUSE_MAP handling, add STRIP_NOPS for indir_p case, add to struct_deref_set for map(*ptr_to_struct) cases. Add MEM_REF case when handling component_ref_p case. Add unshare_expr and gimplification when created GOMP_MAP_STRUCT is not a DECL. Add code to add firstprivate pointer for *pointer-to-struct case. (gimplify_adjust_omp_clauses): Move GOMP_MAP_STRUCT removal code for exit data directives code to earlier position. * omp-low.c (lower_omp_target): Handle GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION, and GOMP_MAP_POINTER_TO_ZERO_LENGTH_ARRAY_SECTION map kinds. * tree-pretty-print.c (dump_omp_clause): Likewise. gcc/testsuite/ * gcc.dg/gomp/target-3.c: New testcase. * g++.dg/gomp/target-3.C: New testcase. * g++.dg/gomp/target-lambda-1.C: New testcase. * g++.dg/gomp/target-lambda-2.C: New testcase. * g++.dg/gomp/target-this-1.C: New testcase. * g++.dg/gomp/target-this-2.C: New testcase. * g++.dg/gomp/target-this-3.C: New testcase. * g++.dg/gomp/target-this-4.C: New testcase. * g++.dg/gomp/target-this-5.C: New testcase. * g++.dg/gomp/this-2.C: Adjust testcase. include/ * gomp-constants.h (enum gomp_map_kind): Add GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION, and GOMP_MAP_POINTER_TO_ZERO_LENGTH_ARRAY_SECTION map kinds. (GOMP_MAP_POINTER_P): Include GOMP_MAP_POINTER_TO_ZERO_LENGTH_ARRAY_SECTION. libgomp/ * libgomp.h (gomp_attach_pointer): Add bool parameter. * oacc-mem.c (acc_attach_async): Update call to gomp_attach_pointer. (goacc_enter_data_internal): Likewise. * target.c (gomp_map_vars_existing): Update assert condition to include GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION. (gomp_map_pointer): Add 'bool allow_zero_length_array_sections' parameter, add support for mapping a pointer with NULL target. (gomp_attach_pointer): Add 'bool allow_zero_length_array_sections' parameter, add support for attaching a pointer with NULL target. (gomp_map_vars_internal): Update calls to gomp_map_pointer and gomp_attach_pointer, add handling for GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION, and GOMP_MAP_POINTER_TO_ZERO_LENGTH_ARRAY_SECTION cases. * testsuite/libgomp.c++/target-23.C: New testcase. * testsuite/libgomp.c++/target-lambda-1.C: New testcase. * testsuite/libgomp.c++/target-lambda-2.C: New testcase. * testsuite/libgomp.c++/target-this-1.C: New testcase.
[PATCH 2/8] OpenMP 5.0: Implement relaxation of implicit map vs. existing device mappings
From: Chung-Lin Tang This is a version of a patch by Chung-Lin, merged to current mainline. Any errors introduced are my own! It was previously posted here: https://gcc.gnu.org/pipermail/gcc-patches/2021-May/570365.html Chung-Lin's description from the previous submission follows. (Edit: it seems that I may have picked up the "non-mainline" version of this patch to merge, but I think the only relevant differences are in the included tests.) This patch implements relaxing the requirements when a map with the implicit attribute encounters an overlapping existing map. As the OpenMP 5.0 spec describes on page 320, lines 18-27 (and 5.1 spec, page 352, lines 13-22): "If a single contiguous part of the original storage of a list item with an implicit data-mapping attribute has corresponding storage in the device data environment prior to a task encountering the construct that is associated with the map clause, only that part of the original storage will have corresponding storage in the device data environment as a result of the map clause." Also tracked in the OpenMP spec context as issue #1463: https://github.com/OpenMP/spec/issues/1463 2021-05-05 Chung-Lin Tang include/ChangeLog: * gomp-constants.h (GOMP_MAP_IMPLICIT): New special map kind bits value. (GOMP_MAP_FLAG_SPECIAL_BITS): Define helper mask for whole set of special map kind bits. (GOMP_MAP_NONCONTIG_ARRAY_P): Adjust test for non-contiguous array map kind bits to be more specific. (GOMP_MAP_IMPLICIT_P): New predicate macro for implicit map kinds. gcc/ChangeLog: * tree.h (OMP_CLAUSE_MAP_IMPLICIT_P): New access macro for 'implicit' bit, using 'base.deprecated_flag' field of tree_node. * tree-pretty-print.c (dump_omp_clause): Add support for printing implicit attribute in tree dumping. * gimplify.c (gimplify_adjust_omp_clauses_1): Set OMP_CLAUSE_MAP_IMPLICIT_P to 1 if map clause is implicitly created. (gimplify_adjust_omp_clauses): Adjust place of adding implicitly created clauses, from simple append, to starting of list, after non-map clauses. * omp-low.c (lower_omp_target): Add GOMP_MAP_IMPLICIT bits into kind values passed to libgomp for implicit maps. gcc/testsuite/ChangeLog: * c-c++-common/gomp/target-implicit-map-1.c: New test. * c-c++-common/goacc/combined-reduction.c: Adjust scan test pattern. * c-c++-common/goacc/firstprivate-mappings-1.c: Likewise. * c-c++-common/goacc/mdc-1.c: Likewise. * c-c++-common/goacc/reduction-1.c: Likewise. * c-c++-common/goacc/reduction-2.c: Likewise. * c-c++-common/goacc/reduction-3.c: Likewise. * c-c++-common/goacc/reduction-4.c: Likewise. * c-c++-common/goacc/reduction-8.c: Likewise. * g++.dg/goacc/firstprivate-mappings-1.C: Likewise. * g++.dg/gomp/target-lambda-1.C: Likewise. * g++.dg/gomp/target-this-3.C: Likewise. * g++.dg/gomp/target-this-4.C: Likewise. * gfortran.dg/goacc/common-block-3.f90: Likewise. * gfortran.dg/goacc/loop-tree-1.f90: Likewise. * gfortran.dg/goacc/private-explicit-kernels-1.f95: Likewise. * gfortran.dg/goacc/private-predetermined-kernels-1.f95: Likewise. libgomp/ChangeLog: * target.c (gomp_map_vars_existing): Add 'bool implicit' parameter, add implicit map handling to allow a "superset" existing map as valid case. (get_kind): Adjust to filter out GOMP_MAP_IMPLICIT bits in return value. (get_implicit): New function to extract implicit status. (gomp_map_fields_existing): Adjust arguments in calls to gomp_map_vars_existing, and add uses of get_implicit. (gomp_map_vars_internal): Likewise. * testsuite/libgomp.c-c++-common/target-implicit-map-1.c: New test. --- gcc/gimplify.c| 11 ++- gcc/omp-low.c | 13 .../c-c++-common/goacc/combined-reduction.c | 4 +- .../goacc/firstprivate-mappings-1.c | 6 +- gcc/testsuite/c-c++-common/goacc/mdc-1.c | 2 +- .../c-c++-common/goacc/reduction-1.c | 4 +- .../c-c++-common/goacc/reduction-2.c | 4 +- .../c-c++-common/goacc/reduction-3.c | 4 +- .../c-c++-common/goacc/reduction-4.c | 4 +- .../c-c++-common/gomp/target-implicit-map-1.c | 39 ++ .../g++.dg/goacc/firstprivate-mappings-1.C| 2 +- gcc/testsuite/g++.dg/gomp/target-lambda-1.C | 6 +- gcc/testsuite/g++.dg/gomp/target-this-3.C | 4 +- gcc/testsuite/g++.dg/gomp/target-this-4.C | 4 +- .../gfortran.dg/goacc/common-block-3.f90 | 8 +- .../gfortran.dg/goacc/loop-tree-1.f90 | 2 +- .../goacc/private-explicit-kernels-1.f95 | 4 +- .../goacc/private-predetermined-kernels-1.f95 | 4 +- gcc/tree-pretty-print.c | 3 + gcc/tree.h| 5 ++
[PATCH 0/8] OpenMP 5.0: Topological sort for OpenMP 5.0 base pointers
Hi, This patch series contains a reimplementation of the clause-ordering code in gimplify.c (omp_target_reorder_clauses), in anticipation of extending support for lvalues in mapping clauses for OpenMP 5.0. This builds (or will build) on a series of patches by Chung-Lin and myself that haven't completed review for mainline yet: I've included these for context, but I'm not proposing "taking ownership" of Chung-Lin's patches -- my assumption is that those patches will make it upstream without fundamental changes, so the work built on top of them will still be largely valid. (The merges of Chung-Lin's patches on top of current mainline aren't entirely trivial because of a clash with https://gcc.gnu.org/pipermail/gcc-patches/2021-June/573600.html. Hopefully my merge attempt helps, though I've probably made mistakes in parts of the code I don't fully understand!). Further commentary on individual patches -- particularly the 8th, which is intended as an RFC, and is the main reason for posting this series. Tested with offloading to NVPTX, and bootstrapped. Thanks, Julian Chung-Lin Tang (3): Improve OpenMP target support for C++ [PR92120 v4b] OpenMP 5.0: Implement relaxation of implicit map vs. existing device mappings Remove array section base-pointer mapping semantics, and other front-end adjustments (mainline trunk) Julian Brown (5): Rewrite GOMP_MAP_ATTACH_DETACH mappings unconditionally OpenMP/OpenACC: Move array_ref/indirect_ref handling code out of extract_base_bit_offset OpenACC/OpenMP: Refactor struct lowering in gimplify.c OpenACC: Rework indirect struct handling in gimplify.c OpenMP 5.0: [WIP, RFC] Clause ordering for OpenMP 5.0 (topological sorting by base pointer) gcc/c/c-parser.c | 57 +- gcc/c/c-typeck.c | 103 +- gcc/cp/cp-tree.h |2 + gcc/cp/parser.c | 126 +- gcc/cp/pt.c |5 + gcc/cp/semantics.c| 677 ++- gcc/fortran/trans-openmp.c| 23 +- gcc/gimplify.c| 1665 + gcc/omp-low.c | 31 +- .../c-c++-common/goacc/combined-reduction.c |4 +- .../goacc/deep-copy-arrayofstruct.c |5 +- .../goacc/firstprivate-mappings-1.c |6 +- gcc/testsuite/c-c++-common/goacc/mdc-1.c |2 +- .../c-c++-common/goacc/reduction-1.c |4 +- .../c-c++-common/goacc/reduction-2.c |4 +- .../c-c++-common/goacc/reduction-3.c |4 +- .../c-c++-common/goacc/reduction-4.c |4 +- .../c-c++-common/gomp/target-enter-data-1.c | 24 + .../c-c++-common/gomp/target-implicit-map-1.c | 39 + .../c-c++-common/gomp/target-implicit-map-2.c | 52 + .../g++.dg/goacc/firstprivate-mappings-1.C|2 +- gcc/testsuite/g++.dg/goacc/member-array-acc.C | 13 + gcc/testsuite/g++.dg/gomp/member-array-omp.C | 13 + gcc/testsuite/g++.dg/gomp/target-3.C | 36 + gcc/testsuite/g++.dg/gomp/target-lambda-1.C | 94 + gcc/testsuite/g++.dg/gomp/target-lambda-2.C | 35 + gcc/testsuite/g++.dg/gomp/target-this-1.C | 33 + gcc/testsuite/g++.dg/gomp/target-this-2.C | 49 + gcc/testsuite/g++.dg/gomp/target-this-3.C | 105 ++ gcc/testsuite/g++.dg/gomp/target-this-4.C | 107 ++ gcc/testsuite/g++.dg/gomp/target-this-5.C | 34 + gcc/testsuite/g++.dg/gomp/this-2.C| 24 +- gcc/testsuite/gcc.dg/gomp/target-3.c | 16 + .../gfortran.dg/goacc/common-block-3.f90 |8 +- .../gfortran.dg/goacc/loop-tree-1.f90 |2 +- .../goacc/private-explicit-kernels-1.f95 |4 +- .../goacc/private-predetermined-kernels-1.f95 |4 +- gcc/tree-pretty-print.c | 11 + gcc/tree.h|5 + include/gomp-constants.h | 27 +- libgomp/libgomp.h |2 +- libgomp/oacc-mem.c|7 +- libgomp/target.c | 246 ++- libgomp/testsuite/libgomp.c++/target-11.C | 14 +- libgomp/testsuite/libgomp.c++/target-12.C |2 +- libgomp/testsuite/libgomp.c++/target-15.C | 20 +- libgomp/testsuite/libgomp.c++/target-16.C | 20 +- libgomp/testsuite/libgomp.c++/target-17.C | 20 +- libgomp/testsuite/libgomp.c++/target-21.C |8 +- libgomp/testsuite/libgomp.c++/target-23.C | 34 + .../testsuite/libgomp.c++/target-lambda-1.C | 86 + .../testsuite/libgomp.c++/target-lambda-2.C | 30 + libgomp/testsuite/libgomp.c++/target-this-1.C | 29 + libgomp/testsuite/libgomp.c++/target-this-2.C | 47 + libgomp/testsuite/libgomp.c++/target-this-3.C | 99 + libgomp/testsuite/libgomp.c++/target-this-4.C | 104 + libgomp/testsuite/libgomp.c++/target-this-5.C | 30 +
Re: [patch][version 6] add -ftrivial-auto-var-init and variable attribute "uninitialized" to gcc
> On Aug 11, 2021, at 11:55 AM, Richard Biener wrote: > > On August 11, 2021 6:22:00 PM GMT+02:00, Qing Zhao > wrote: >> >> >>> On Aug 11, 2021, at 10:53 AM, Richard Biener wrote: >>> >>> On August 11, 2021 5:30:40 PM GMT+02:00, Qing Zhao >>> wrote: I modified the routine “gimple_add_init_for_auto_var” as the following: /* Generate initialization to automatic variable DECL based on INIT_TYPE. Build a call to internal const function DEFERRED_INIT: 1st argument: SIZE of the DECL; 2nd argument: INIT_TYPE; 3rd argument: IS_VLA, 0 NO, 1 YES; as DEFERRED_INIT (SIZE of the DECL, INIT_TYPE, IS_VLA). */ static void gimple_add_init_for_auto_var (tree decl, enum auto_init_type init_type, bool is_vla, gimple_seq *seq_p) { gcc_assert (VAR_P (decl) && !DECL_EXTERNAL (decl) && !TREE_STATIC (decl)); gcc_assert (init_type > AUTO_INIT_UNINITIALIZED); tree decl_size = TYPE_SIZE_UNIT (TREE_TYPE (decl)); tree init_type_node = build_int_cst (integer_type_node, (int) init_type); tree is_vla_node = build_int_cst (integer_type_node, (int) is_vla); tree call = build_call_expr_internal_loc (UNKNOWN_LOCATION, IFN_DEFERRED_INIT, TREE_TYPE (decl), 3, decl_size, init_type_node, is_vla_node); /* If this DECL is a VLA, a temporary address variable for it has been created, the replacement for DECL is recorded in DECL_VALUE_EXPR (decl), we should use it as the LHS of the call. */ tree lhs_call = is_vla ? DECL_VALUE_EXPR (decl) : decl; gimplify_assign (lhs_call, call, seq_p); } With this change, the current issue is resolved, the gimple dump now is: (*arr.1) = .DEFERRED_INIT (D.1952, 2, 1); However, there is another new issue: For the following testing case: == [opc@qinzhao-ol8u3-x86 gcc]$ cat t.c int bar; extern void decode_reloc(int *); void testfunc() { int alt_reloc; decode_reloc(_reloc); if (alt_reloc) /* { dg-warning "may be used uninitialized" } */ bar = 42; } = In the above, the auto var “alt_reloc” is address taken, then the gimple dump for it when compiled with -ftrivial-auto-var-init=zero is: void testfunc () { int alt_reloc; try { _1 = .DEFERRED_INIT (4, 2, 0); alt_reloc = _1; decode_reloc (_reloc); alt_reloc.0_2 = alt_reloc; if (alt_reloc.0_2 != 0) goto ; else goto ; : bar = 42; : } finally { alt_reloc = {CLOBBER}; } } I.e, instead of the expected IR: alt_reloc = .DEFERRED_INIT (4, 2, 0); We got the following: _1 = .DEFERRED_INIT (4, 2, 0); alt_reloc = _1; I guess the temp “_1” is created because “alt_reloc” is address taken. >>> >>> Yes and no. The reason is that alt_reloc is memory (because it is address >>> taken) and that GIMPLE says that register typed stores need to use a >>> is_gimple_val RHS which the call is not. >> >> Okay. >>> My questions: Shall we accept such IR for .DEFERRED_INIT purpose when the auto var is address taken? >>> >>> I think so. Note it doesn't necessarily need address taking but any other >>> reason that prevents SSA rewriting the variable suffices. >> >> You mean, in addition to “address taken”, there are other situations that >> will introduce such IR: >> >> temp = .DEFERRED_INIT(); >> auto_var = temp; >> >> So, such IR is unavoidable and we have to handle it? > > Yes. > >> If we have to handle it, what’ the best way to do it? >> >> The solution in my mind is: >> 1. During uninitialized analysis phase, following the data flow to connect >> .DEFERRED_INIT to “auto_var”, and then decide that “auto_var” is >> uninitialized. > > Yes. Basically if there's an artificial variable auto initialized you have to > look at its uses. Okay. > >> 2. During RTL expansion, following the data flow to connect .DEFERRED_INIT >> to “auto_var”, and then delete “temp”, and then expand .DEFERRED_INIT to >> auto_var. > > That shouldn't be necessary. You'd initialize a temporary register which is > then copied to the real variable. That's good enough and should be optimized > by the RTL pipeline. Okay, I see. I will try to update the code to see whether all the issues can be resolved. Thanks a lot for your help. Qing > >> Let me know your comments and suggestions on this. >> >> >>> >>> The only other option is to force. DEFERED_INIT making the LHS address >>> taken which I think could
Re: [patch][version 6] add -ftrivial-auto-var-init and variable attribute "uninitialized" to gcc
On August 11, 2021 6:22:00 PM GMT+02:00, Qing Zhao wrote: > > >> On Aug 11, 2021, at 10:53 AM, Richard Biener wrote: >> >> On August 11, 2021 5:30:40 PM GMT+02:00, Qing Zhao >> wrote: >>> I modified the routine “gimple_add_init_for_auto_var” as the following: >>> >>> /* Generate initialization to automatic variable DECL based on INIT_TYPE. >>> Build a call to internal const function DEFERRED_INIT: >>> 1st argument: SIZE of the DECL; >>> 2nd argument: INIT_TYPE; >>> 3rd argument: IS_VLA, 0 NO, 1 YES; >>> >>> as DEFERRED_INIT (SIZE of the DECL, INIT_TYPE, IS_VLA). */ >>> static void >>> gimple_add_init_for_auto_var (tree decl, >>> enum auto_init_type init_type, >>> bool is_vla, >>> gimple_seq *seq_p) >>> { >>> gcc_assert (VAR_P (decl) && !DECL_EXTERNAL (decl) && !TREE_STATIC (decl)); >>> gcc_assert (init_type > AUTO_INIT_UNINITIALIZED); >>> tree decl_size = TYPE_SIZE_UNIT (TREE_TYPE (decl)); >>> >>> tree init_type_node >>> = build_int_cst (integer_type_node, (int) init_type); >>> tree is_vla_node >>> = build_int_cst (integer_type_node, (int) is_vla); >>> >>> tree call = build_call_expr_internal_loc (UNKNOWN_LOCATION, >>> IFN_DEFERRED_INIT, >>> TREE_TYPE (decl), 3, >>> decl_size, init_type_node, >>> is_vla_node); >>> >>> /* If this DECL is a VLA, a temporary address variable for it has been >>>created, the replacement for DECL is recorded in DECL_VALUE_EXPR (decl), >>>we should use it as the LHS of the call. */ >>> >>> tree lhs_call >>> = is_vla ? DECL_VALUE_EXPR (decl) : decl; >>> gimplify_assign (lhs_call, call, seq_p); >>> } >>> >>> With this change, the current issue is resolved, the gimple dump now is: >>> >>> (*arr.1) = .DEFERRED_INIT (D.1952, 2, 1); >>> >>> However, there is another new issue: >>> >>> For the following testing case: >>> >>> == >>> [opc@qinzhao-ol8u3-x86 gcc]$ cat t.c >>> int bar; >>> >>> extern void decode_reloc(int *); >>> >>> void testfunc() >>> { >>> int alt_reloc; >>> >>> decode_reloc(_reloc); >>> >>> if (alt_reloc) /* { dg-warning "may be used uninitialized" } */ >>> bar = 42; >>> } >>> = >>> >>> In the above, the auto var “alt_reloc” is address taken, then the gimple >>> dump for it when compiled with -ftrivial-auto-var-init=zero is: >>> >>> void testfunc () >>> { >>> int alt_reloc; >>> >>> try >>> { >>> _1 = .DEFERRED_INIT (4, 2, 0); >>> alt_reloc = _1; >>> decode_reloc (_reloc); >>> alt_reloc.0_2 = alt_reloc; >>> if (alt_reloc.0_2 != 0) goto ; else goto ; >>> : >>> bar = 42; >>> : >>> } >>> finally >>> { >>> alt_reloc = {CLOBBER}; >>> } >>> } >>> >>> I.e, instead of the expected IR: >>> >>> alt_reloc = .DEFERRED_INIT (4, 2, 0); >>> >>> We got the following: >>> >>> _1 = .DEFERRED_INIT (4, 2, 0); >>> alt_reloc = _1; >>> >>> I guess the temp “_1” is created because “alt_reloc” is address taken. >> >> Yes and no. The reason is that alt_reloc is memory (because it is address >> taken) and that GIMPLE says that register typed stores need to use a >> is_gimple_val RHS which the call is not. > >Okay. >> >>> My questions: >>> >>> Shall we accept such IR for .DEFERRED_INIT purpose when the auto var is >>> address taken? >> >> I think so. Note it doesn't necessarily need address taking but any other >> reason that prevents SSA rewriting the variable suffices. > >You mean, in addition to “address taken”, there are other situations that will >introduce such IR: > >temp = .DEFERRED_INIT(); >auto_var = temp; > >So, such IR is unavoidable and we have to handle it? Yes. >If we have to handle it, what’ the best way to do it? > >The solution in my mind is: >1. During uninitialized analysis phase, following the data flow to connect >.DEFERRED_INIT to “auto_var”, and then decide that “auto_var” is uninitialized. Yes. Basically if there's an artificial variable auto initialized you have to look at its uses. >2. During RTL expansion, following the data flow to connect .DEFERRED_INIT to >“auto_var”, and then delete “temp”, and then expand .DEFERRED_INIT to auto_var. That shouldn't be necessary. You'd initialize a temporary register which is then copied to the real variable. That's good enough and should be optimized by the RTL pipeline. >Let me know your comments and suggestions on this. > > >> >> The only other option is to force. DEFERED_INIT making the LHS address taken >> which I think could be achieved by passing it the address as argument >> instead of having a LHS. But let's not go down this route - it will have >> quite bad behavior on alias analysis and optimization. > >Okay. > >Qing >> >>> If so, “uninitialized analysis” phase need to be further adjusted to >>> specially handle such IR. >>> >>> If not, what should we do when the auto
Re: [ARM] PR66791: Replace builtins for vdup_n and vmov_n intrinsics
On Thu, Jun 24, 2021 at 6:29 PM Kyrylo Tkachov via Gcc-patches < gcc-patches@gcc.gnu.org> wrote: > > > > -Original Message- > > From: Prathamesh Kulkarni > > Sent: 24 June 2021 12:11 > > To: gcc Patches ; Kyrylo Tkachov > > > > Subject: [ARM] PR66791: Replace builtins for vdup_n and vmov_n intrinsics > > > > Hi, > > This patch replaces builtins for vdup_n and vmov_n. > > The patch results in regression for pr51534.c. > > Consider following function: > > > > uint8x8_t f1 (uint8x8_t a) { > > return vcgt_u8(a, vdup_n_u8(0)); > > } > > > > code-gen before patch: > > f1: > > vmov.i32 d16, #0 @ v8qi > > vcgt.u8 d0, d0, d16 > > bx lr > > > > code-gen after patch: > > f1: > > vceq.i8 d0, d0, #0 > > vmvnd0, d0 > > bx lr > > > > I am not sure which one is better tho ? > > Hi Prathamesh, This patch introduces a regression on non-hardfp configs (eg arm-linux-gnueabi or arm-eabi): FAIL: gcc:gcc.target/arm/arm.exp=gcc.target/arm/pr51534.c scan-assembler-times vmov.i32[ \t]+[dD][0-9]+, #0x 3 FAIL: gcc:gcc.target/arm/arm.exp=gcc.target/arm/pr51534.c scan-assembler-times vmov.i32[ \t]+[qQ][0-9]+, #4294967295 3 Can you fix this? Thanks Christophe > I think they're equivalent in practice, in any case the patch itself is > good (move away from RTL builtins). > Ok. > Thanks, > Kyrill > > > > > Also, this patch regressed bf16_dup.c on arm-linux-gnueabi, > > which is due to a missed opt in lowering. I had filed it as > > PR98435, and posted a fix for it here: > > https://gcc.gnu.org/pipermail/gcc-patches/2021-June/572648.html > > > > Thanks, > > Prathamesh >
Re: [PATCH] rs6000: Fix ICE expanding lxvp and stxvp gimple built-ins [PR101849]
Hi Peter, LGTM. Still needs maintainer review, of course. :) Bill On 8/10/21 6:37 PM, Peter Bergner wrote: PR101849 shows we ICE on a test case when we pass a non __vector_pair * pointer to the __builtin_vsx_lxvp and __builtin_vsx_stxvp built-ins that is cast to __vector_pair *. The problem is that when we expand the built-in, the cast has already been removed from gimple and we are only given the base pointer. The solution used here (which fixes the ICE) is to catch this case and convert the pointer to a __vector_pair * pointer when expanding the built-in. This passed bootstrap and regression testing on powerpc64le-linux with no regressions. Ok for mainline? This also affects GCC 11 and 10, so ok there too after it has baked on trunk for a few days? Peter gcc/ PR target/101849 * config/rs6000/rs6000-call.c (rs6000_gimple_fold_mma_builtin): Cast pointer to __vector_pair *. gcc/testsuite/ PR target/101849 * gcc.target/powerpc/pr101849.c: New test. diff --git a/gcc/config/rs6000/rs6000-call.c b/gcc/config/rs6000/rs6000-call.c index 904e104c058..d04011c0489 100644 --- a/gcc/config/rs6000/rs6000-call.c +++ b/gcc/config/rs6000/rs6000-call.c @@ -11919,6 +11919,9 @@ rs6000_gimple_fold_mma_builtin (gimple_stmt_iterator *gsi) tree offset = gimple_call_arg (stmt, 0); tree ptr = gimple_call_arg (stmt, 1); tree lhs = gimple_call_lhs (stmt); + if (TREE_TYPE (TREE_TYPE (ptr)) != vector_pair_type_node) + ptr = build1 (VIEW_CONVERT_EXPR, + build_pointer_type (vector_pair_type_node), ptr); tree mem = build_simple_mem_ref (build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr), ptr, offset)); gimplify_assign (lhs, mem, _seq); @@ -11932,6 +11935,9 @@ rs6000_gimple_fold_mma_builtin (gimple_stmt_iterator *gsi) tree src = gimple_call_arg (stmt, 0); tree offset = gimple_call_arg (stmt, 1); tree ptr = gimple_call_arg (stmt, 2); + if (TREE_TYPE (TREE_TYPE (ptr)) != vector_pair_type_node) + ptr = build1 (VIEW_CONVERT_EXPR, + build_pointer_type (vector_pair_type_node), ptr); tree mem = build_simple_mem_ref (build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr), ptr, offset)); gimplify_assign (mem, src, _seq); diff --git a/gcc/testsuite/gcc.target/powerpc/pr101849.c b/gcc/testsuite/gcc.target/powerpc/pr101849.c new file mode 100644 index 000..6d2e3b79282 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr101849.c @@ -0,0 +1,19 @@ +/* PR target/101849 */ +/* { dg-do compile } */ +/* { dg-require-effective-target power10_ok } */ +/* { dg-options "-O2 -mdejagnu-cpu=power10" } */ + +/* Verify we do not ICE on the tests below. */ + +__vector_pair vp; +void +foo (double *x) +{ + vp = __builtin_vsx_lxvp (0, (__vector_pair *)(void *)x); +} + +void +bar (__vector_pair *src, double *x) +{ + __builtin_vsx_stxvp (*src, 0, (__vector_pair *)(void *)x); +}
Re: [PATCH] rs6000: Make some BIFs vectorized on P10
Hi Kewen, FWIW, it's easier on reviewers if you include the patch inline instead of as an attachment. On 8/11/21 1:56 AM, Kewen.Lin wrote: Hi, This patch is to add the support to make vectorizer able to vectorize scalar version of some built-in functions with its corresponding vector version with Power10 support. Bootstrapped & regtested on powerpc64le-linux-gnu {P9,P10} and powerpc64-linux-gnu P8. Is it ok for trunk? BR, Kewen - gcc/ChangeLog: * config/rs6000/rs6000.c (rs6000_builtin_md_vectorized_function): Add support for some built-in functions vectorized on Power10. gcc/testsuite/ChangeLog: * gcc.target/powerpc/dive-vectorize-1.c: New test. * gcc.target/powerpc/dive-vectorize-1.h: New test. * gcc.target/powerpc/dive-vectorize-2.c: New test. * gcc.target/powerpc/dive-vectorize-2.h: New test. * gcc.target/powerpc/dive-vectorize-run-1.c: New test. * gcc.target/powerpc/dive-vectorize-run-2.c: New test. * gcc.target/powerpc/p10-bifs-vectorize-1.c: New test. * gcc.target/powerpc/p10-bifs-vectorize-1.h: New test. * gcc.target/powerpc/p10-bifs-vectorize-run-1.c: New test. --- gcc/config/rs6000/rs6000.c| 55 +++ .../gcc.target/powerpc/dive-vectorize-1.c | 11 .../gcc.target/powerpc/dive-vectorize-1.h | 22 .../gcc.target/powerpc/dive-vectorize-2.c | 12 .../gcc.target/powerpc/dive-vectorize-2.h | 22 .../gcc.target/powerpc/dive-vectorize-run-1.c | 52 ++ .../gcc.target/powerpc/dive-vectorize-run-2.c | 53 ++ .../gcc.target/powerpc/p10-bifs-vectorize-1.c | 15 + .../gcc.target/powerpc/p10-bifs-vectorize-1.h | 40 ++ .../powerpc/p10-bifs-vectorize-run-1.c| 45 +++ 10 files changed, 327 insertions(+) create mode 100644 gcc/testsuite/gcc.target/powerpc/dive-vectorize-1.c create mode 100644 gcc/testsuite/gcc.target/powerpc/dive-vectorize-1.h create mode 100644 gcc/testsuite/gcc.target/powerpc/dive-vectorize-2.c create mode 100644 gcc/testsuite/gcc.target/powerpc/dive-vectorize-2.h create mode 100644 gcc/testsuite/gcc.target/powerpc/dive-vectorize-run-1.c create mode 100644 gcc/testsuite/gcc.target/powerpc/dive-vectorize-run-2.c create mode 100644 gcc/testsuite/gcc.target/powerpc/p10-bifs-vectorize-1.c create mode 100644 gcc/testsuite/gcc.target/powerpc/p10-bifs-vectorize-1.h create mode 100644 gcc/testsuite/gcc.target/powerpc/p10-bifs-vectorize-run-1.c diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index 279f00cc648..3eac1d05101 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -5785,6 +5785,61 @@ rs6000_builtin_md_vectorized_function (tree fndecl, tree type_out, default: break; } + + machine_mode in_vmode = TYPE_MODE (type_in); + machine_mode out_vmode = TYPE_MODE (type_out); + + /* Power10 supported vectorized built-in functions. */ + if (TARGET_POWER10 + && in_vmode == out_vmode + && VECTOR_UNIT_ALTIVEC_OR_VSX_P (in_vmode)) +{ + machine_mode exp_mode = DImode; + machine_mode exp_vmode = V2DImode; + enum rs6000_builtins vname = RS6000_BUILTIN_COUNT; Using this as a flag value looks unnecessary. Is this just being done to silence a warning? + switch (fn) + { + case MISC_BUILTIN_DIVWE: + case MISC_BUILTIN_DIVWEU: + exp_mode = SImode; + exp_vmode = V4SImode; + if (fn == MISC_BUILTIN_DIVWE) + vname = P10V_BUILTIN_DIVES_V4SI; + else + vname = P10V_BUILTIN_DIVEU_V4SI; + break; + case MISC_BUILTIN_DIVDE: + case MISC_BUILTIN_DIVDEU: + if (fn == MISC_BUILTIN_DIVDE) + vname = P10V_BUILTIN_DIVES_V2DI; + else + vname = P10V_BUILTIN_DIVEU_V2DI; + break; + case P10_BUILTIN_CFUGED: + vname = P10V_BUILTIN_VCFUGED; + break; + case P10_BUILTIN_CNTLZDM: + vname = P10V_BUILTIN_VCLZDM; + break; + case P10_BUILTIN_CNTTZDM: + vname = P10V_BUILTIN_VCTZDM; + break; + case P10_BUILTIN_PDEPD: + vname = P10V_BUILTIN_VPDEPD; + break; + case P10_BUILTIN_PEXTD: + vname = P10V_BUILTIN_VPEXTD; + break; + default: + return NULL_TREE; + } + + if (vname != RS6000_BUILTIN_COUNT Check is not necessary, as you will have returned by now in that case. Otherwise this patch LGTM. Thanks! Still needs maintainer approval, of course. Bill + && in_mode == exp_mode + && in_vmode == exp_vmode) + return rs6000_builtin_decls[vname]; +} + return NULL_TREE; } diff --git a/gcc/testsuite/gcc.target/powerpc/dive-vectorize-1.c b/gcc/testsuite/gcc.target/powerpc/dive-vectorize-1.c new file mode 100644 index 000..84f1b0a88f2 --- /dev/null +++
Re: [patch][version 6] add -ftrivial-auto-var-init and variable attribute "uninitialized" to gcc
> On Aug 11, 2021, at 11:15 AM, Richard Sandiford > wrote: > > Qing Zhao writes: >>> On Aug 11, 2021, at 4:02 AM, Richard Sandiford >>> wrote: I came up with the following solution: Define the IFN_DEFERRED_INIT function as: LHS = DEFERRED_INIT (SIZE of the DECL, INIT_TYPE, IS_VLA); if IS_VLA is false, the LHS is the DECL itself, if IS_VLA is true, the LHS is the pointer to this DECL that created by gimplify_vla_decl. The benefit of this solution are: 1. Resolved the invalid IR issue; 2. The call stmt carries the address of the VLA natually; The issue with this solution is: For VLA and non-VLA, the LHS will be different, Do you see any other potential issues with this solution? >>> >>> The idea behind the DECL version of the .DEFERRED_INIT semantics was >>> that .DEFERRED_INIT just returns a SIZE-byte value that the caller >>> then assigns to a SIZE-byte lhs (with the caller choosing the lhs). >>> .DEFEREED_INIT itself doesn't read or write memory and so can be const, >>> which in turn allows alias analysis to be more precise. >> Yes. That’s right. >> >>> >>> If we want to handle the VLA case using pointers instead then I think >>> that needs to be a different IFN. >>> >>> If we did handle the VLA case using pointers (not expressing an opinion >>> on that), then it would be the caller's job to allocate the VLA and work >>> out the address of the VLA; >> >> the current routine “gimplify_vla_decl” has done this already: >> >> It created a temporary variable for the address of the VLA, and created a >> call to “alloca” to allocate the VLA. > > Right, that's what I mean. It's this alloca that allocates the VLA > and determines its address. This address is therefore logically an > input rather than an output to the following zero/pattern initialisation. > > In C you wouldn't write: > > addr = alloca(size); > addr = initialise(size); > > to allocate and initialise a size-byte buffer, because initialise() > would need to know the address of the memory it's supposed to initialise. > The same is true for this gimple code. This really make good sense to me. :-) > >> My -ftrivial-auto-var-init work just try to use the “address variable of the >> VLA” in the new .DEFERRED_INIT call to carry it to RTL expansion phase. >> >> >>> this isn't something that .DEFERRED_INIT >>> would work out on the caller's behalf. The address of the VLA should >>> therefore be an argument to the new IFN, rather than something that >>> the IFN returns. >> >> Then what’s the LHS of this call? Currently the major issue is the LHS is >> invalid gimple. > > For this (different, address-taking, VLA-only) IFN, there would be no lhs. > The IFN would be similar to a memset. I see. > > Like I say, this is all hypothetical, based on “if we did handle the VLA > case using pointers”. As discussed, it would make alias analysis less > precise. I was just answering the question about whether there were > potential issues. Okay, understood. I will not handle the VLA case using pointers at this time. Per discussion with Richard Biener in the other emails, I might go the other route to special handle the _1 = .DEFERRED_INIT (4, 2, 0); alt_reloc = _1; To see whether that can resolve the issues. Let me know your opinion. Thanks a lot. Qing > > Thanks, > Richard
Re: [PATCH] rs6000: Add missing unsigned info for some P10 bifs
Hi Kewen, On 8/11/21 12:44 AM, Kewen.Lin wrote: Hi, This patch is to make prototypes of some Power10 built-in functions consistent with what's in the documentation, as well as the vector version. Otherwise, useless conversions can be generated in gimple IR, and the vectorized versions will have inconsistent types. Bootstrapped & regtested on powerpc64le-linux-gnu P9 and powerpc64-linux-gnu P8. Is it ok for trunk? LGTM. Maintainers, this is necessary in the short term for the old builtins support, but this fragile thing that people always forget will go away with the new support. What Kewen is proposing here is correct for now. Thanks, Bill BR, Kewen - gcc/ChangeLog: * config/rs6000/rs6000-call.c (builtin_function_type): Add unsigned signedness for some Power10 bifs.
Re: [patch][version 6] add -ftrivial-auto-var-init and variable attribute "uninitialized" to gcc
> On Aug 11, 2021, at 10:53 AM, Richard Biener wrote: > > On August 11, 2021 5:30:40 PM GMT+02:00, Qing Zhao > wrote: >> I modified the routine “gimple_add_init_for_auto_var” as the following: >> >> /* Generate initialization to automatic variable DECL based on INIT_TYPE. >> Build a call to internal const function DEFERRED_INIT: >> 1st argument: SIZE of the DECL; >> 2nd argument: INIT_TYPE; >> 3rd argument: IS_VLA, 0 NO, 1 YES; >> >> as DEFERRED_INIT (SIZE of the DECL, INIT_TYPE, IS_VLA). */ >> static void >> gimple_add_init_for_auto_var (tree decl, >> enum auto_init_type init_type, >> bool is_vla, >> gimple_seq *seq_p) >> { >> gcc_assert (VAR_P (decl) && !DECL_EXTERNAL (decl) && !TREE_STATIC (decl)); >> gcc_assert (init_type > AUTO_INIT_UNINITIALIZED); >> tree decl_size = TYPE_SIZE_UNIT (TREE_TYPE (decl)); >> >> tree init_type_node >> = build_int_cst (integer_type_node, (int) init_type); >> tree is_vla_node >> = build_int_cst (integer_type_node, (int) is_vla); >> >> tree call = build_call_expr_internal_loc (UNKNOWN_LOCATION, >> IFN_DEFERRED_INIT, >> TREE_TYPE (decl), 3, >> decl_size, init_type_node, >> is_vla_node); >> >> /* If this DECL is a VLA, a temporary address variable for it has been >>created, the replacement for DECL is recorded in DECL_VALUE_EXPR (decl), >>we should use it as the LHS of the call. */ >> >> tree lhs_call >> = is_vla ? DECL_VALUE_EXPR (decl) : decl; >> gimplify_assign (lhs_call, call, seq_p); >> } >> >> With this change, the current issue is resolved, the gimple dump now is: >> >> (*arr.1) = .DEFERRED_INIT (D.1952, 2, 1); >> >> However, there is another new issue: >> >> For the following testing case: >> >> == >> [opc@qinzhao-ol8u3-x86 gcc]$ cat t.c >> int bar; >> >> extern void decode_reloc(int *); >> >> void testfunc() >> { >> int alt_reloc; >> >> decode_reloc(_reloc); >> >> if (alt_reloc) /* { dg-warning "may be used uninitialized" } */ >> bar = 42; >> } >> = >> >> In the above, the auto var “alt_reloc” is address taken, then the gimple >> dump for it when compiled with -ftrivial-auto-var-init=zero is: >> >> void testfunc () >> { >> int alt_reloc; >> >> try >> { >> _1 = .DEFERRED_INIT (4, 2, 0); >> alt_reloc = _1; >> decode_reloc (_reloc); >> alt_reloc.0_2 = alt_reloc; >> if (alt_reloc.0_2 != 0) goto ; else goto ; >> : >> bar = 42; >> : >> } >> finally >> { >> alt_reloc = {CLOBBER}; >> } >> } >> >> I.e, instead of the expected IR: >> >> alt_reloc = .DEFERRED_INIT (4, 2, 0); >> >> We got the following: >> >> _1 = .DEFERRED_INIT (4, 2, 0); >> alt_reloc = _1; >> >> I guess the temp “_1” is created because “alt_reloc” is address taken. > > Yes and no. The reason is that alt_reloc is memory (because it is address > taken) and that GIMPLE says that register typed stores need to use a > is_gimple_val RHS which the call is not. Okay. > >> My questions: >> >> Shall we accept such IR for .DEFERRED_INIT purpose when the auto var is >> address taken? > > I think so. Note it doesn't necessarily need address taking but any other > reason that prevents SSA rewriting the variable suffices. You mean, in addition to “address taken”, there are other situations that will introduce such IR: temp = .DEFERRED_INIT(); auto_var = temp; So, such IR is unavoidable and we have to handle it? If we have to handle it, what’ the best way to do it? The solution in my mind is: 1. During uninitialized analysis phase, following the data flow to connect .DEFERRED_INIT to “auto_var”, and then decide that “auto_var” is uninitialized. 2. During RTL expansion, following the data flow to connect .DEFERRED_INIT to “auto_var”, and then delete “temp”, and then expand .DEFERRED_INIT to auto_var. Let me know your comments and suggestions on this. > > The only other option is to force. DEFERED_INIT making the LHS address taken > which I think could be achieved by passing it the address as argument instead > of having a LHS. But let's not go down this route - it will have quite bad > behavior on alias analysis and optimization. Okay. Qing > >> If so, “uninitialized analysis” phase need to be further adjusted to >> specially handle such IR. >> >> If not, what should we do when the auto var is address taken? >> >> Thanks a lot. >> >> Qing >> >> >>> On Aug 11, 2021, at 8:58 AM, Richard Biener wrote: >>> >>> On Wed, 11 Aug 2021, Qing Zhao wrote: >>> > On Aug 11, 2021, at 8:37 AM, Richard Biener wrote: > > On Wed, 11 Aug 2021, Qing Zhao wrote: > >> >> >>> On Aug 11, 2021, at 2:02 AM, Richard Biener wrote: >>> >>> On Tue, 10 Aug 2021, Qing Zhao wrote: >>> > On Aug 10, 2021, at 3:16
Re: [patch][version 6] add -ftrivial-auto-var-init and variable attribute "uninitialized" to gcc
Qing Zhao writes: >> On Aug 11, 2021, at 4:02 AM, Richard Sandiford >> wrote: >>> I came up with the following solution: >>> >>> Define the IFN_DEFERRED_INIT function as: >>> >>> LHS = DEFERRED_INIT (SIZE of the DECL, INIT_TYPE, IS_VLA); >>> >>> if IS_VLA is false, the LHS is the DECL itself, >>> if IS_VLA is true, the LHS is the pointer to this DECL that created by >>> gimplify_vla_decl. >>> >>> >>> The benefit of this solution are: >>> >>> 1. Resolved the invalid IR issue; >>> 2. The call stmt carries the address of the VLA natually; >>> >>> The issue with this solution is: >>> >>> For VLA and non-VLA, the LHS will be different, >>> >>> Do you see any other potential issues with this solution? >> >> The idea behind the DECL version of the .DEFERRED_INIT semantics was >> that .DEFERRED_INIT just returns a SIZE-byte value that the caller >> then assigns to a SIZE-byte lhs (with the caller choosing the lhs). >> .DEFEREED_INIT itself doesn't read or write memory and so can be const, >> which in turn allows alias analysis to be more precise. > Yes. That’s right. > >> >> If we want to handle the VLA case using pointers instead then I think >> that needs to be a different IFN. >> >> If we did handle the VLA case using pointers (not expressing an opinion >> on that), then it would be the caller's job to allocate the VLA and work >> out the address of the VLA; > > the current routine “gimplify_vla_decl” has done this already: > > It created a temporary variable for the address of the VLA, and created a > call to “alloca” to allocate the VLA. Right, that's what I mean. It's this alloca that allocates the VLA and determines its address. This address is therefore logically an input rather than an output to the following zero/pattern initialisation. In C you wouldn't write: addr = alloca(size); addr = initialise(size); to allocate and initialise a size-byte buffer, because initialise() would need to know the address of the memory it's supposed to initialise. The same is true for this gimple code. > My -ftrivial-auto-var-init work just try to use the “address variable of the > VLA” in the new .DEFERRED_INIT call to carry it to RTL expansion phase. > > >> this isn't something that .DEFERRED_INIT >> would work out on the caller's behalf. The address of the VLA should >> therefore be an argument to the new IFN, rather than something that >> the IFN returns. > > Then what’s the LHS of this call? Currently the major issue is the LHS is > invalid gimple. For this (different, address-taking, VLA-only) IFN, there would be no lhs. The IFN would be similar to a memset. Like I say, this is all hypothetical, based on “if we did handle the VLA case using pointers”. As discussed, it would make alias analysis less precise. I was just answering the question about whether there were potential issues. Thanks, Richard
Re: [PATCH v3] gcov: Add TARGET_GCOV_TYPE_SIZE target macro
On Wed, 11 Aug 2021, Sebastian Huber wrote: > 64-bit atomic operations. Allow targets to override the default type > size with the new TARGET_GCOV_TYPE_SIZE target macro. Hook, not macro. > diff --git a/gcc/c-family/c-cppbuiltin.c b/gcc/c-family/c-cppbuiltin.c > index f79f939bd10f..e85b60c79f49 100644 > --- a/gcc/c-family/c-cppbuiltin.c > +++ b/gcc/c-family/c-cppbuiltin.c > @@ -1450,6 +1450,8 @@ c_cpp_builtins (cpp_reader *pfile) >/* For libgcov. */ >builtin_define_with_int_value ("__LIBGCC_VTABLE_USES_DESCRIPTORS__", >TARGET_VTABLE_USES_DESCRIPTORS); > + builtin_define_with_int_value ("__LIBGCC_GCOV_TYPE_SIZE", > + TARGET_GCOV_TYPE_SIZE); The TARGET_* macros used to initialize targetm may only be defined to their final values in the architecture-specific .c file that actually defines targetm. All other files should access the hook via targetm, not the TARGET_* macros. (TARGET_VTABLE_USES_DESCRIPTORS in the diff context is a target macro, not a hook at all.) > +DEFHOOKPOD > +(gcov_type_size, > + "The gcov type size in bits. This type is used for example for counters\n\ > +incremented by profiling and code-coverage events. The default value is > 64,\n\ > +if the type size of long long is greater than 32, otherwise the default\n\ > +value is 32. A 64-bit type is recommended to avoid overflows of the\n\ > +counters. If the @option{-fprofile-update=atomic} is used, then the\n\ > +counters are incremented using atomic operations. Targets not supporting\n\ > +64-bit atomic operations may override the default value and request a > 32-bit\n\ > +type.", > + HOST_WIDE_INT, (LONG_LONG_TYPE_SIZE > 32 ? 64 : 32)) LONG_LONG_TYPE_SIZE may depend on command-line options passed to the compiler (it does for AVR). The hook thus needs to be a function returning the desired size. -- Joseph S. Myers jos...@codesourcery.com
[PATCH, rs6000 V2] Add store fusion support for Power10
Enable store fusion on Power10. Use the SCHED_REORDER hook to implement Power10 specific ready list reordering. As of now this is just store fusion. Things changed in this version of the patch - Separate patch for additional load/store checks - Move option check from is_fusable_store() to caller - Misc coding style changes pointed out in review (parens/braces) - Add testcases Bootstrap/regtest on powerpc64(32/64) and powerpc64le(Power10) with no new regressions. Ok for master? -Pat 2021-08-11 Pat Haugen gcc/ChangeLog: * config/rs6000/rs6000-cpus.def (ISA_3_1_MASKS_SERVER): Add new flag. (POWERPC_MASKS): Likewise. * config/rs6000/rs6000.c (rs6000_option_override_internal): Enable store fusion for Power10. (is_fusable_store): New. (power10_sched_reorder): Likewise. (rs6000_sched_reorder): Do Power10 specific reordering. (rs6000_sched_reorder2): Likewise. * config/rs6000/rs6000.opt: Add new option. gcc/testsuite/ChangeLog: * gcc.target/powerpc/fusion-p10-stst.c: New test. * gcc.target/powerpc/fusion-p10-stst2.c: New test. diff --git a/gcc/config/rs6000/rs6000-cpus.def b/gcc/config/rs6000/rs6000-cpus.def index 6758296c0fd..f5812da0184 100644 --- a/gcc/config/rs6000/rs6000-cpus.def +++ b/gcc/config/rs6000/rs6000-cpus.def @@ -90,7 +90,8 @@ | OPTION_MASK_P10_FUSION_2LOGICAL \ | OPTION_MASK_P10_FUSION_LOGADD\ | OPTION_MASK_P10_FUSION_ADDLOG\ -| OPTION_MASK_P10_FUSION_2ADD) +| OPTION_MASK_P10_FUSION_2ADD \ +| OPTION_MASK_P10_FUSION_2STORE) /* Flags that need to be turned off if -mno-power9-vector. */ #define OTHER_P9_VECTOR_MASKS (OPTION_MASK_FLOAT128_HW\ @@ -143,6 +144,7 @@ | OPTION_MASK_P10_FUSION_LOGADD\ | OPTION_MASK_P10_FUSION_ADDLOG\ | OPTION_MASK_P10_FUSION_2ADD \ +| OPTION_MASK_P10_FUSION_2STORE\ | OPTION_MASK_HTM \ | OPTION_MASK_ISEL \ | OPTION_MASK_MFCRF\ diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index 60f406a4ff6..402cc924e3f 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -4495,6 +4495,10 @@ rs6000_option_override_internal (bool global_init_p) && (rs6000_isa_flags_explicit & OPTION_MASK_P10_FUSION_2ADD) == 0) rs6000_isa_flags |= OPTION_MASK_P10_FUSION_2ADD; + if (TARGET_POWER10 + && (rs6000_isa_flags_explicit & OPTION_MASK_P10_FUSION_2STORE) == 0) +rs6000_isa_flags |= OPTION_MASK_P10_FUSION_2STORE; + /* Turn off vector pair/mma options on non-power10 systems. */ else if (!TARGET_POWER10 && TARGET_MMA) { @@ -18874,6 +18878,91 @@ power9_sched_reorder2 (rtx_insn **ready, int lastpos) return cached_can_issue_more; } +/* Determine if INSN is a store to memory that can be fused with a similar + adjacent store. */ + +static bool +is_fusable_store (rtx_insn *insn, rtx *str_mem) +{ + /* Insn must be a non-prefixed base+disp form store. */ + if (is_store_insn (insn, str_mem) + && get_attr_prefixed (insn) == PREFIXED_NO + && get_attr_update (insn) == UPDATE_NO + && get_attr_indexed (insn) == INDEXED_NO) +{ + /* Further restictions by mode and size. */ + machine_mode mode = GET_MODE (*str_mem); + HOST_WIDE_INT size; + if (MEM_SIZE_KNOWN_P (*str_mem)) + size = MEM_SIZE (*str_mem); + else + return false; + + if (INTEGRAL_MODE_P (mode)) + /* Must be word or dword size. */ + return (size == 4 || size == 8); + else if (FLOAT_MODE_P (mode)) + /* Must be dword size. */ + return (size == 8); +} + + return false; +} + +/* Do Power10 specific reordering of the ready list. */ + +static int +power10_sched_reorder (rtx_insn **ready, int lastpos) +{ + rtx mem1; + + /* Do store fusion during sched2 only. */ + if (!reload_completed) +return cached_can_issue_more; + + /* If the prior insn finished off a store fusion pair then simply + reset the counter and return, nothing more to do. */ + if (load_store_pendulum != 0) +{ + load_store_pendulum = 0; + return cached_can_issue_more; +} + + /* Try to pair certain store insns to adjacent memory locations + so that the hardware will fuse them to a single operation. */ + if (TARGET_P10_FUSION && TARGET_P10_FUSION_2STORE + && is_fusable_store (last_scheduled_insn, )) +{ + int pos; + rtx mem2; + + /* A fusable store was just scheduled. Scan the ready list for
[committed] libstdc++: Define std::is_pointer_interconvertible_base_of for C++20
Implement these traits using the new built-ins that Jakub added recently. Signed-off-by: Jonathan Wakely libstdc++-v3/ChangeLog: * include/std/type_traits (__cpp_lib_is_pointer_interconvertible) (is_pointer_interconvertible_base_of_v) (is_pointer_interconvertible_base_of): Define for C++20. * include/std/version (__cpp_lib_is_pointer_interconvertible): Define. * testsuite/23_containers/span/layout_compat.cc: Use correct feature test macro for std::is_layout_compatible_v. * testsuite/20_util/is_pointer_interconvertible/value.cc: New test. * testsuite/20_util/is_pointer_interconvertible/version.cc: New test. Tested powerpc64le-linux. Committed to trunk. commit 4fa6c0ec350bb4a8dd52a7c6f9881ab427151588 Author: Jonathan Wakely Date: Tue Aug 10 15:37:23 2021 libstdc++: Define std::is_pointer_interconvertible_base_of for C++20 Implement these traits using the new built-ins that Jakub added recently. Signed-off-by: Jonathan Wakely libstdc++-v3/ChangeLog: * include/std/type_traits (__cpp_lib_is_pointer_interconvertible) (is_pointer_interconvertible_base_of_v) (is_pointer_interconvertible_base_of): Define for C++20. * include/std/version (__cpp_lib_is_pointer_interconvertible): Define. * testsuite/23_containers/span/layout_compat.cc: Use correct feature test macro for std::is_layout_compatible_v. * testsuite/20_util/is_pointer_interconvertible/value.cc: New test. * testsuite/20_util/is_pointer_interconvertible/version.cc: New test. diff --git a/libstdc++-v3/include/std/type_traits b/libstdc++-v3/include/std/type_traits index 46edde905f8..2be4944e2a6 100644 --- a/libstdc++-v3/include/std/type_traits +++ b/libstdc++-v3/include/std/type_traits @@ -3389,6 +3389,33 @@ template inline constexpr bool is_unbounded_array_v = is_unbounded_array<_Tp>::value; +#if __has_builtin(__is_pointer_interconvertible_base_of) + /// True if `_Derived` is standard-layout and has a base class of type `_Base` + /// @since C++20 + template +struct is_pointer_interconvertible_base_of +: bool_constant<__is_pointer_interconvertible_base_of(_Base, _Derived)> +{ }; + + /// @ingroup variable_templates + /// @since C++20 + template +constexpr bool is_pointer_interconvertible_base_of_v + = __is_pointer_interconvertible_base_of(_Base, _Derived); + +#if __has_builtin(__builtin_is_pointer_interconvertible_with_class) +#define __cpp_lib_is_pointer_interconvertible 201907L + + /// True if `__mp` points to the first member of a standard-layout type + /// @returns true if `s.*__mp` is pointer-interconvertible with `s` + /// @since C++20 + template +constexpr bool +is_pointer_interconvertible_with_class(_Mem _Tp::*__mp) noexcept +{ return __builtin_is_pointer_interconvertible_with_class(__mp); } +#endif +#endif + #if __cplusplus > 202002L #define __cpp_lib_is_scoped_enum 202011L diff --git a/libstdc++-v3/include/std/version b/libstdc++-v3/include/std/version index d5fa38d7786..925f27704c4 100644 --- a/libstdc++-v3/include/std/version +++ b/libstdc++-v3/include/std/version @@ -236,6 +236,10 @@ #ifdef _GLIBCXX_HAS_GTHREADS # define __cpp_lib_jthread 201911L #endif +#if __has_builtin(__is_pointer_interconvertible_base_of) \ + && __has_builtin(__builtin_is_pointer_interconvertible_with_class) +# define __cpp_lib_is_pointer_interconvertible 201907L +#endif #if __cpp_lib_atomic_wait # define __cpp_lib_latch 201907L #endif diff --git a/libstdc++-v3/testsuite/20_util/is_pointer_interconvertible/value.cc b/libstdc++-v3/testsuite/20_util/is_pointer_interconvertible/value.cc new file mode 100644 index 000..471571cac58 --- /dev/null +++ b/libstdc++-v3/testsuite/20_util/is_pointer_interconvertible/value.cc @@ -0,0 +1,51 @@ +// { dg-options "-std=gnu++20" } +// { dg-do compile { target c++20 } } +#include + +#ifndef __cpp_lib_is_pointer_interconvertible +# error "Feature test macro for is_pointer_interconvertible is missing in " +#elif __cpp_lib_is_pointer_interconvertible < 201907L +# error "Feature test macro for is_pointer_interconvertible has wrong value in " +#endif + +static_assert( std::is_pointer_interconvertible_base_of::value + == std::is_pointer_interconvertible_base_of_v ); + +struct B { }; + +static_assert( std::is_pointer_interconvertible_base_of::value + == std::is_pointer_interconvertible_base_of_v ); + +static_assert( std::is_pointer_interconvertible_base_of_v ); +static_assert( std::is_pointer_interconvertible_base_of_v ); +static_assert( std::is_pointer_interconvertible_base_of_v ); +static_assert( std::is_pointer_interconvertible_base_of_v ); + +struct D : B { int i; }; + +static_assert( std::is_pointer_interconvertible_base_of_v ); + +static_assert( std::is_pointer_interconvertible_base_of_v ); +static_assert(
Re: [PATCH] Adding target hook allows to reject initialization of register
Richard Biener writes: > On Wed, Aug 11, 2021 at 11:28 AM Richard Sandiford > wrote: >> >> Richard Biener writes: >> > On Tue, Aug 10, 2021 at 10:33 AM Jojo R via Gcc-patches >> > wrote: >> >> >> >> Some target like RISC-V allow to group vector register as a whole, >> >> and only operate part of it in fact, but the 'init-regs' pass will add >> >> initialization >> >> for uninitialized registers. Add this hook to reject this action for >> >> reducing instruction. >> > >> > Are these groups "visible"? That is, are the pseudos multi-reg >> > pseudos? I wonder >> > if there's a more generic way to tame down initregs w/o introducing a new >> > target >> > hook. >> > >> > Btw, initregs is a red herring - it ideally should go away. See PR61810. >> > >> > So instead of adding to it can you see whether disabling the pass for >> > RISC-V >> > works w/o fallout (and add a comment to the PR)? Maybe some more RTL >> > literate (in particular DF literate) can look at the remaining issue. >> > Richard, did you >> > ever have a look into the "issue" that initregs covers up (whatever >> > that exactly is)? >> >> No, sorry. I don't really understand what it would be from the comment >> in the code: >> >>[...] papers over some problems on the arm and other >>processors where certain isa constraints cannot be handled by gcc. >>These are of the form where two operands to an insn my not be the >>same. The ra will only make them the same if they do not >>interfere, and this can only happen if one is not initialized. >> >> That would definitely be an RA bug if true, since the constraints need >> to be applied independently of dataflow information. But the comment >> and code predate LRA and maybe no-one fancied poking around in reload >> (hard to believe). >> >> I'd be very surprised if LRA gets this wrong. > > OK, we're wondering since quite some time - how about changing the > gate of initregs to optimize > 0 && !targetm.lra_p ()? We'll hopefully > figure out the "real" issue the pass is papering over. At the same time > we're leaving old reload (and likely unmaintianed) targets unaffected. Sounds good to me. Thanks, Richard
Re: [patch][version 6] add -ftrivial-auto-var-init and variable attribute "uninitialized" to gcc
On August 11, 2021 5:30:40 PM GMT+02:00, Qing Zhao wrote: >I modified the routine “gimple_add_init_for_auto_var” as the following: > >/* Generate initialization to automatic variable DECL based on INIT_TYPE. > Build a call to internal const function DEFERRED_INIT: > 1st argument: SIZE of the DECL; > 2nd argument: INIT_TYPE; > 3rd argument: IS_VLA, 0 NO, 1 YES; > > as DEFERRED_INIT (SIZE of the DECL, INIT_TYPE, IS_VLA). */ >static void >gimple_add_init_for_auto_var (tree decl, > enum auto_init_type init_type, > bool is_vla, > gimple_seq *seq_p) >{ > gcc_assert (VAR_P (decl) && !DECL_EXTERNAL (decl) && !TREE_STATIC (decl)); > gcc_assert (init_type > AUTO_INIT_UNINITIALIZED); > tree decl_size = TYPE_SIZE_UNIT (TREE_TYPE (decl)); > > tree init_type_node >= build_int_cst (integer_type_node, (int) init_type); > tree is_vla_node >= build_int_cst (integer_type_node, (int) is_vla); > > tree call = build_call_expr_internal_loc (UNKNOWN_LOCATION, > IFN_DEFERRED_INIT, >TREE_TYPE (decl), 3, >decl_size, init_type_node, >is_vla_node); > > /* If this DECL is a VLA, a temporary address variable for it has been > created, the replacement for DECL is recorded in DECL_VALUE_EXPR (decl), > we should use it as the LHS of the call. */ > > tree lhs_call >= is_vla ? DECL_VALUE_EXPR (decl) : decl; > gimplify_assign (lhs_call, call, seq_p); >} > >With this change, the current issue is resolved, the gimple dump now is: > > (*arr.1) = .DEFERRED_INIT (D.1952, 2, 1); > >However, there is another new issue: > >For the following testing case: > >== >[opc@qinzhao-ol8u3-x86 gcc]$ cat t.c >int bar; > >extern void decode_reloc(int *); > >void testfunc() >{ > int alt_reloc; > > decode_reloc(_reloc); > > if (alt_reloc) /* { dg-warning "may be used uninitialized" } */ >bar = 42; >} >= > >In the above, the auto var “alt_reloc” is address taken, then the gimple dump >for it when compiled with -ftrivial-auto-var-init=zero is: > >void testfunc () >{ > int alt_reloc; > > try >{ > _1 = .DEFERRED_INIT (4, 2, 0); > alt_reloc = _1; > decode_reloc (_reloc); > alt_reloc.0_2 = alt_reloc; > if (alt_reloc.0_2 != 0) goto ; else goto ; > : > bar = 42; > : >} > finally >{ > alt_reloc = {CLOBBER}; >} >} > >I.e, instead of the expected IR: > >alt_reloc = .DEFERRED_INIT (4, 2, 0); > >We got the following: > > _1 = .DEFERRED_INIT (4, 2, 0); > alt_reloc = _1; > >I guess the temp “_1” is created because “alt_reloc” is address taken. Yes and no. The reason is that alt_reloc is memory (because it is address taken) and that GIMPLE says that register typed stores need to use a is_gimple_val RHS which the call is not. >My questions: > >Shall we accept such IR for .DEFERRED_INIT purpose when the auto var is >address taken? I think so. Note it doesn't necessarily need address taking but any other reason that prevents SSA rewriting the variable suffices. The only other option is to force. DEFERED_INIT making the LHS address taken which I think could be achieved by passing it the address as argument instead of having a LHS. But let's not go down this route - it will have quite bad behavior on alias analysis and optimization. >If so, “uninitialized analysis” phase need to be further adjusted to specially >handle such IR. > >If not, what should we do when the auto var is address taken? > >Thanks a lot. > >Qing > > >> On Aug 11, 2021, at 8:58 AM, Richard Biener wrote: >> >> On Wed, 11 Aug 2021, Qing Zhao wrote: >> >>> >>> On Aug 11, 2021, at 8:37 AM, Richard Biener wrote: On Wed, 11 Aug 2021, Qing Zhao wrote: > > >> On Aug 11, 2021, at 2:02 AM, Richard Biener wrote: >> >> On Tue, 10 Aug 2021, Qing Zhao wrote: >> >>> >>> On Aug 10, 2021, at 3:16 PM, Qing Zhao via Gcc-patches wrote: Hi, Richard, > On Aug 10, 2021, at 10:22 AM, Richard Biener > wrote: >>> >>> Especially in the VLA case but likely also in general (though >>> unlikely >>> since usually the receiver of initializations are simple enough). >>> I'd >>> expect the VLA case end up as >>> >>> *ptr_to_decl = .DEFERRED_INIT (...); >>> >>> where *ptr_to_decl is the DECL_VALUE_EXPR of the decl. >> >> So, for the following small testing case: >> >> >> extern void bar (int); >> >> void foo(int n) >> { >> int arr[n]; >> bar (arr[2]); >> return; >> } >> = >> >> If I compile it with -ftrivial-auto-var-init=zero
Re: [patch][version 6] add -ftrivial-auto-var-init and variable attribute "uninitialized" to gcc
I modified the routine “gimple_add_init_for_auto_var” as the following: /* Generate initialization to automatic variable DECL based on INIT_TYPE. Build a call to internal const function DEFERRED_INIT: 1st argument: SIZE of the DECL; 2nd argument: INIT_TYPE; 3rd argument: IS_VLA, 0 NO, 1 YES; as DEFERRED_INIT (SIZE of the DECL, INIT_TYPE, IS_VLA). */ static void gimple_add_init_for_auto_var (tree decl, enum auto_init_type init_type, bool is_vla, gimple_seq *seq_p) { gcc_assert (VAR_P (decl) && !DECL_EXTERNAL (decl) && !TREE_STATIC (decl)); gcc_assert (init_type > AUTO_INIT_UNINITIALIZED); tree decl_size = TYPE_SIZE_UNIT (TREE_TYPE (decl)); tree init_type_node = build_int_cst (integer_type_node, (int) init_type); tree is_vla_node = build_int_cst (integer_type_node, (int) is_vla); tree call = build_call_expr_internal_loc (UNKNOWN_LOCATION, IFN_DEFERRED_INIT, TREE_TYPE (decl), 3, decl_size, init_type_node, is_vla_node); /* If this DECL is a VLA, a temporary address variable for it has been created, the replacement for DECL is recorded in DECL_VALUE_EXPR (decl), we should use it as the LHS of the call. */ tree lhs_call = is_vla ? DECL_VALUE_EXPR (decl) : decl; gimplify_assign (lhs_call, call, seq_p); } With this change, the current issue is resolved, the gimple dump now is: (*arr.1) = .DEFERRED_INIT (D.1952, 2, 1); However, there is another new issue: For the following testing case: == [opc@qinzhao-ol8u3-x86 gcc]$ cat t.c int bar; extern void decode_reloc(int *); void testfunc() { int alt_reloc; decode_reloc(_reloc); if (alt_reloc) /* { dg-warning "may be used uninitialized" } */ bar = 42; } = In the above, the auto var “alt_reloc” is address taken, then the gimple dump for it when compiled with -ftrivial-auto-var-init=zero is: void testfunc () { int alt_reloc; try { _1 = .DEFERRED_INIT (4, 2, 0); alt_reloc = _1; decode_reloc (_reloc); alt_reloc.0_2 = alt_reloc; if (alt_reloc.0_2 != 0) goto ; else goto ; : bar = 42; : } finally { alt_reloc = {CLOBBER}; } } I.e, instead of the expected IR: alt_reloc = .DEFERRED_INIT (4, 2, 0); We got the following: _1 = .DEFERRED_INIT (4, 2, 0); alt_reloc = _1; I guess the temp “_1” is created because “alt_reloc” is address taken. My questions: Shall we accept such IR for .DEFERRED_INIT purpose when the auto var is address taken? If so, “uninitialized analysis” phase need to be further adjusted to specially handle such IR. If not, what should we do when the auto var is address taken? Thanks a lot. Qing > On Aug 11, 2021, at 8:58 AM, Richard Biener wrote: > > On Wed, 11 Aug 2021, Qing Zhao wrote: > >> >> >>> On Aug 11, 2021, at 8:37 AM, Richard Biener wrote: >>> >>> On Wed, 11 Aug 2021, Qing Zhao wrote: >>> > On Aug 11, 2021, at 2:02 AM, Richard Biener wrote: > > On Tue, 10 Aug 2021, Qing Zhao wrote: > >> >> >>> On Aug 10, 2021, at 3:16 PM, Qing Zhao via Gcc-patches >>> wrote: >>> >>> Hi, Richard, >>> On Aug 10, 2021, at 10:22 AM, Richard Biener wrote: >> >> Especially in the VLA case but likely also in general (though >> unlikely >> since usually the receiver of initializations are simple enough). >> I'd >> expect the VLA case end up as >> >> *ptr_to_decl = .DEFERRED_INIT (...); >> >> where *ptr_to_decl is the DECL_VALUE_EXPR of the decl. > > So, for the following small testing case: > > > extern void bar (int); > > void foo(int n) > { > int arr[n]; > bar (arr[2]); > return; > } > = > > If I compile it with -ftrivial-auto-var-init=zero -fdump-tree-gimple > -S -o auto-init-11.s -fdump-rtl-expand, the *.gimple dump is: > > = > void foo (int n) > { > int n.0; > sizetype D.1950; > bitsizetype D.1951; > sizetype D.1952; > bitsizetype D.1953; > sizetype D.1954; > int[0:D.1950] * arr.1; > void * saved_stack.2; > int arr[0:D.1950] [value-expr: *arr.1]; > > saved_stack.2 = __builtin_stack_save (); > try > { > n.0 = n; > _1 = (long int) n.0; > _2 = _1 + -1; > _3 = (sizetype) _2; > D.1950 = _3; > _4 = (sizetype) n.0; > _5 = (bitsizetype) _4; > _6 = _5 * 32; > D.1951 = _6; > _7 = (sizetype) n.0; > _8 = _7 * 4; > D.1952 = _8;
[PATCH] aarch64: Replace some uses of GET_CODE with RTL predicate macros
Hi all, this patch changed some RTL nodes to appropriate macros in Aarch64.c backend using a script. Would this be okay for trunk? Also, this is my first contribution, and I do not yet have commit rights, so if everything is okay could someone commit this for me? Thank you and I would appreciate any feedback/advice! gcc/ChangeLog: 2021-07-21 Alistair_Lee alistair@arm.com * rtl.h (CONST_VECTOR_P): New macro. (CONST_STRING_P): New macro. * config/aarch64/aarch64.c (aarch64_get_sve_pred_bits): Use RTL code testing macros. (aarch64_ptrue_all_mode): Likewise. (aarch64_expand_mov_immediate): Likewise. (aarch64_const_vec_all_in_range_p): Likewise. (aarch64_rtx_costs): Likewise. (aarch64_legitimate_constant_p): Likewise. (aarch64_simd_valid_immediate): Likewise. (aarch64_simd_make_constant): Likewise. (aarch64_convert_mult_to_shift): Likewise. (aarch64_expand_sve_vec_perm): Likewise. (aarch64_vec_fpconst_pow_of_2): Likewise. diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index 2753c852abdfab96ec6016075aa386eee73ad85d..d1408a6ab371223cd7d042012a32a4b0a76d6885 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -4035,7 +4035,7 @@ aarch64_force_temporary (machine_mode mode, rtx x, rtx value) static bool aarch64_get_sve_pred_bits (rtx_vector_builder , rtx x) { - if (GET_CODE (x) != CONST_VECTOR) + if (!CONST_VECTOR_P (x)) return false; unsigned int factor = vector_element_size (GET_MODE_NUNITS (VNx16BImode), @@ -4091,7 +4091,7 @@ opt_machine_mode aarch64_ptrue_all_mode (rtx x) { gcc_assert (GET_MODE (x) == VNx16BImode); - if (GET_CODE (x) != CONST_VECTOR + if (!CONST_VECTOR_P (x) || !CONST_VECTOR_DUPLICATE_P (x) || !CONST_INT_P (CONST_VECTOR_ENCODED_ELT (x, 0)) || INTVAL (CONST_VECTOR_ENCODED_ELT (x, 0)) == 0) @@ -5791,7 +5791,7 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm) return; } - if (GET_CODE (imm) == CONST_VECTOR && aarch64_sve_data_mode_p (mode)) + if (CONST_VECTOR_P (imm) && aarch64_sve_data_mode_p (mode)) if (rtx res = aarch64_expand_sve_const_vector (dest, imm)) { if (dest != res) @@ -10495,7 +10495,7 @@ aarch64_const_vec_all_in_range_p (rtx vec, HOST_WIDE_INT minval, HOST_WIDE_INT maxval) { - if (GET_CODE (vec) != CONST_VECTOR + if (!CONST_VECTOR_P (vec) || GET_MODE_CLASS (GET_MODE (vec)) != MODE_VECTOR_INT) return false; @@ -12595,7 +12595,7 @@ aarch64_rtx_costs (rtx x, machine_mode mode, int outer ATTRIBUTE_UNUSED, case SIGN_EXTRACT: /* Bit-field insertion. Strip any redundant widening of the RHS to meet the width of the target. */ - if (GET_CODE (op1) == SUBREG) + if (SUBREG_P (op1)) op1 = SUBREG_REG (op1); if ((GET_CODE (op1) == ZERO_EXTEND || GET_CODE (op1) == SIGN_EXTEND) @@ -12868,7 +12868,7 @@ aarch64_rtx_costs (rtx x, machine_mode mode, int outer ATTRIBUTE_UNUSED, But the integer MINUS logic expects the shift/extend operation in op1. */ if (! (REG_P (op0) - || (GET_CODE (op0) == SUBREG && REG_P (SUBREG_REG (op0) + || (SUBREG_P (op0) && REG_P (SUBREG_REG (op0) { op0 = XEXP (x, 1); op1 = XEXP (x, 0); @@ -17997,7 +17997,7 @@ aarch64_legitimate_constant_p (machine_mode mode, rtx x) /* Otherwise, accept any CONST_VECTOR that, if all else fails, can at least be forced to memory and loaded from there. */ - if (GET_CODE (x) == CONST_VECTOR) + if (CONST_VECTOR_P (x)) return !targetm.cannot_force_const_mem (mode, x); /* Do not allow vector struct mode constants for Advanced SIMD. @@ -19804,7 +19804,7 @@ aarch64_simd_valid_immediate (rtx op, simd_immediate_info *info, scalar_mode elt_mode = GET_MODE_INNER (mode); rtx base, step; unsigned int n_elts; - if (GET_CODE (op) == CONST_VECTOR + if (CONST_VECTOR_P (op) && CONST_VECTOR_DUPLICATE_P (op)) n_elts = CONST_VECTOR_NPATTERNS (op); else if ((vec_flags & VEC_SVE_DATA) @@ -19826,7 +19826,7 @@ aarch64_simd_valid_immediate (rtx op, simd_immediate_info *info, } return true; } - else if (GET_CODE (op) == CONST_VECTOR + else if (CONST_VECTOR_P (op) && CONST_VECTOR_NUNITS (op).is_constant (_elts)) /* N_ELTS set above. */; else @@ -20425,7 +20425,7 @@ aarch64_simd_make_constant (rtx vals) int n_const = 0; int i; - if (GET_CODE (vals) == CONST_VECTOR) + if (CONST_VECTOR_P (vals)) const_vec = vals; else if (GET_CODE (vals) == PARALLEL) { @@ -20966,7 +20966,7 @@ aarch64_sve_expand_vector_init (rtx target, rtx vals) static rtx aarch64_convert_mult_to_shift (rtx value, rtx_code ) { - if (GET_CODE (value) !=
Re: [PATCH] c++: Improve memory usage of subsumption [PR100828]
On Wed, 11 Aug 2021, Jason Merrill wrote: > On 8/9/21 5:07 PM, Patrick Palka wrote: > > On Wed, Jul 28, 2021 at 4:42 PM Jason Merrill wrote: > > > > > > On 7/19/21 6:05 PM, Patrick Palka wrote: > > > > Constraint subsumption is implemented in two steps. The first step > > > > computes the disjunctive (or conjunctive) normal form of one of the > > > > constraints, and the second step verifies that each clause in the > > > > decomposed form implies the other constraint. Performing these two > > > > steps separately is problematic because in the first step the > > > > disjunctive normal form can be exponentially larger than the original > > > > constraint, and by computing it ahead of time we'd have to keep all of > > > > it in memory. > > > > > > > > This patch fixes this exponential blowup in memory usage by interleaving > > > > these two steps, so that as soon as we decompose one clause we check > > > > implication for it. In turn, memory usage during subsumption is now > > > > worst case linear in the size of the constraints rather than > > > > exponential, and so we can safely remove the hard limit of 16 clauses > > > > without introducing runaway memory usage on some inputs. (Note the > > > > _time_ complexity of subsumption is still exponential in the worst > > > > case.) > > > > > > > > In order for this to work we need formula::branch to prepend the copy > > > > of the current clause directly after the current clause rather than > > > > at the end of the list, so that we fully decompose a clause shortly > > > > after creating it. Otherwise we'd end up accumulating exponentially > > > > many (partially decomposed) clauses in memory anyway. > > > > > > > > Bootstrapped and regtested on x86_64-pc-linux-gnu, and also tested on > > > > range-v3 and cmcstl2. Does this look OK for trunk and perhaps 11? > > > > > > OK for trunk. > > > > Thanks a lot, patch committed to trunk as r12-2658. Since this low > > complexity limit was introduced in GCC 10, what do you think about > > increasing the limit from 16 to say 128 in the 10/11 release branches > > as a relatively safe stopgap? > > Now that 11.2 is out, go ahead and apply this patch to the 11 branch. Ah great, will do. > > Won't a limit of 128 in GCC 10 lead to extremely long compile times for > affected code? Is that more desirable than an error? Potentially, though I think that'd be the case only if the original (normalized) constraint is huge to begin with. The comment for max_problem_size says /* The largest number of clauses in CNF or DNF we accept as input for subsumption. This an upper bound of 2^16 expressions. */ static int max_problem_size = 16; which implies increasing it to 128 would allow for at most 2^128 expressions (clearly unacceptable), but I'm not sure how this upper bound was obtained. FWIW I think another upper bound for the number of expressions in the CNF/DNF is roughly 'max_problem_size * size_of_original_constraint', since we allow at most 'max_problem_size' clauses in the decomposed form and each clause is definitely no larger than the original constraint. So according to this upper bound the dependence on max_problem_size as it relates to worst-case compile time/memory usage of subsumption is linear rather than exponential, contrary to the comment. In that case increasing the limit from 16 to 128 doesn't seem to be too bad. > > > > >PR c++/100828 > > > > > > > > gcc/cp/ChangeLog: > > > > > > > >* logic.cc (formula::formula): Use emplace_back. > > > >(formula::branch): Insert a copy of m_current in front of > > > >m_current instead of at the end of the list. > > > >(formula::erase): Define. > > > >(decompose_formula): Remove. > > > >(decompose_antecedents): Remove. > > > >(decompose_consequents): Remove. > > > >(derive_proofs): Remove. > > > >(max_problem_size): Remove. > > > >(diagnose_constraint_size): Remove. > > > >(subsumes_constraints_nonnull): Rewrite directly in terms of > > > >decompose_clause and derive_proof, interleaving decomposition > > > >with implication checking. Use formula::erase to free the > > > >current clause before moving on to the next one. > > > > --- > > > >gcc/cp/logic.cc | 118 > > > > ++-- > > > >1 file changed, 35 insertions(+), 83 deletions(-) > > > > > > > > diff --git a/gcc/cp/logic.cc b/gcc/cp/logic.cc > > > > index 142457e408a..3f872c11fe2 100644 > > > > --- a/gcc/cp/logic.cc > > > > +++ b/gcc/cp/logic.cc > > > > @@ -223,9 +223,7 @@ struct formula > > > > > > > > formula (tree t) > > > > { > > > > -/* This should call emplace_back(). There's an extra copy being > > > > - invoked by using push_back(). */ > > > > -m_clauses.push_back (t); > > > > +m_clauses.emplace_back (t); > > > >m_current = m_clauses.begin (); > > > > } > > > > > > > > @@ -248,8
Re: [Patch v3 Fortran] Fix c_float128 and c_float128_complex on targets with 128-bit long double.
On 8/11/21 2:05 AM, Tobias Burnus wrote: On 11.08.21 00:46, Sandra Loosemore wrote: On 8/10/21 2:29 AM, Tobias Burnus wrote: [snip] To conclude: I like the code changes (LGTM); the '__float128' -> 'TFmode' comment change also matches the code. However, I think both longer comments need to be updated. OK. I used your wording verbatim for the first one. For the second one, I'm still pretty confused as I think it is at least theoretically possible on PowerPC to have a target with 64-bit long double (AIX?) that also supports the __ibm128 format, and it would be wrong to assume that *any* 128-bit mode that's not long double is IEEE. So I decided the best thing is just to replace the FIXME with a pointer to the issue I opened yesterday https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101835 LGTM – but ... + /* See PR101835. */ ... I wonder whether your PR reference should have a TODO or FIXME prefix – or a "for some issue" suffix. Currently, it can be read as if the PR describes why the code was added – and not for questioning the code. OK, thank you. I've pushed the patch with the addition of "TODO" to that comment. -Sandra
[PATCH][RFC] target/61810 - disable init-regs pass on targets using LRA
This patch disables the init-regs pass on targets using LRA. It's not clear as to what issue the init-regs papers over and most definitely LRA should get it right. This leaves targets still using reload unaffected but will expose any such LRA bug if it exists, allowing it to be identified and fixed. The change can cause testsuite fallout since the explicit initializations can have an effect on passes like combine. For example on x86_64 this results in +FAIL: gcc.target/i386/extract-insert-combining.c scan-assembler-times (?:vmovd| movd)[ t]+[^{\\n]*%xmm[0-9] 3 +FAIL: gcc.target/i386/extract-insert-combining.c scan-assembler-times (?:vpinsr d|pinsrd)[ t]+[^{\\n]*%xmm[0-9] 1 it also results in yet unanalyzed +FAIL: gnat.dg/sso8.adb execution test I welcome trying this change on other architectures, like for example 'arm' that's singled out in the init-regs comments as affected architecture. Bootstrapped and tested on x86_64-unknown-linux-gnu with all languages and {,-m32} with the above remaining fallout (I fixed another testism already). 2021-08-11 Richard Biener PR target/61810 * init-regs.c (pass_initialize_regs::gate): Gate on targetm.lra_p (). --- gcc/init-regs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/gcc/init-regs.c b/gcc/init-regs.c index 72e898f3e33..8f038f54fff 100644 --- a/gcc/init-regs.c +++ b/gcc/init-regs.c @@ -27,6 +27,7 @@ along with GCC; see the file COPYING3. If not see #include "memmodel.h" #include "emit-rtl.h" #include "expr.h" +#include "target.h" #include "tree-pass.h" /* Check all of the uses of pseudo variables. If any use that is MUST @@ -153,7 +154,7 @@ public: {} /* opt_pass methods: */ - virtual bool gate (function *) { return optimize > 0; } + virtual bool gate (function *) { return optimize > 0 && !targetm.lra_p (); } virtual unsigned int execute (function *) { initialize_uninitialized_regs (); -- 2.31.1
[PATCH] Fix gcc.dg/lto/pr48622_0.c testcase
This fixes the testcase to not rely on the reference to ashift_qi_1 being optimized out by RTL optimization via help of the initregs pass that changes comparisons of uninitialized data with a comparison that is always false. Tested on x86_64-unknown-linux-gnu, pushed. 2021-08-11 Richard Biener * gcc.dg/lto/pr48622_1.c: Provide non-LTO definition of ashift_qi_1. --- gcc/testsuite/gcc.dg/lto/pr48622_1.c | 6 ++ 1 file changed, 6 insertions(+) create mode 100644 gcc/testsuite/gcc.dg/lto/pr48622_1.c diff --git a/gcc/testsuite/gcc.dg/lto/pr48622_1.c b/gcc/testsuite/gcc.dg/lto/pr48622_1.c new file mode 100644 index 000..4d05bae2114 --- /dev/null +++ b/gcc/testsuite/gcc.dg/lto/pr48622_1.c @@ -0,0 +1,6 @@ +/* { dg-options "-fno-lto" } */ + +typedef unsigned int u8 __attribute__ ((mode (QI))); +u8 ashift_qi_1 (u8) +{ +} -- 2.31.1
Re: [PATCH][v2] Adjust volatile handling of the operand scanner
On Wed, 11 Aug 2021, Eric Botcazou wrote: > > So I'm leaning towards leaving build3 alone and fixing up frontends > > as issues pop up. > > FWIW fine with me. OK, so I pushed the original change (reposted below). Bootstrapped / tested on x86_64-unknown-linux-gnu. Richard. >From e5a23d54d189f3d160c82f770683288a15c3645e Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Mon, 9 Aug 2021 13:12:08 +0200 Subject: [PATCH] Adjust volatile handling of the operand scanner To: gcc-patches@gcc.gnu.org The GIMPLE SSA operand scanner handles COMPONENT_REFs that are not marked TREE_THIS_VOLATILE but have a TREE_THIS_VOLATILE FIELD_DECL as volatile. That's inconsistent in how TREE_THIS_VOLATILE testing on GENERIC refs works which requires operand zero of component references to mirror TREE_THIS_VOLATILE to the ref so that testing TREE_THIS_VOLATILE on the outermost reference is enough to determine the volatileness. The following patch thus removes FIELD_DECL scanning from the GIMPLE SSA operand scanner, possibly leaving fewer stmts marked as gimple_has_volatile_ops. It shows we miss at least one case in the fortran frontend, though there's a suspicious amount of COMPONENT_REF creation compared to little setting of TREE_THIS_VOLATILE. This fixes the FAIL of gfortran.dg/volatile11.f90 that would otherwise occur. Visually inspecting fortran/ reveals a bunch of likely to fix cases but I don't know the constraints of 'volatile' uses in the fortran language to assess whether some of these are not necessary. 2021-08-09 Richard Biener gcc/ * tree-ssa-operands.c (operands_scanner::get_expr_operands): Do not look at COMPONENT_REF FIELD_DECLs TREE_THIS_VOLATILE to determine has_volatile_ops. gcc/fortran/ * trans-common.c (create_common): Set TREE_THIS_VOLATILE on the COMPONENT_REF if the field is volatile. --- gcc/fortran/trans-common.c | 9 + gcc/tree-ssa-operands.c| 7 +-- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/gcc/fortran/trans-common.c b/gcc/fortran/trans-common.c index a11cf4c839e..7bcf18dc475 100644 --- a/gcc/fortran/trans-common.c +++ b/gcc/fortran/trans-common.c @@ -759,10 +759,11 @@ create_common (gfc_common_head *com, segment_info *head, bool saw_equiv) else gfc_add_decl_to_function (var_decl); - SET_DECL_VALUE_EXPR (var_decl, - fold_build3_loc (input_location, COMPONENT_REF, - TREE_TYPE (s->field), - decl, s->field, NULL_TREE)); + tree comp = build3_loc (input_location, COMPONENT_REF, + TREE_TYPE (s->field), decl, s->field, NULL_TREE); + if (TREE_THIS_VOLATILE (s->field)) + TREE_THIS_VOLATILE (comp) = 1; + SET_DECL_VALUE_EXPR (var_decl, comp); DECL_HAS_VALUE_EXPR_P (var_decl) = 1; GFC_DECL_COMMON_OR_EQUIV (var_decl) = 1; diff --git a/gcc/tree-ssa-operands.c b/gcc/tree-ssa-operands.c index c15575416dd..ebf7eea3b04 100644 --- a/gcc/tree-ssa-operands.c +++ b/gcc/tree-ssa-operands.c @@ -834,12 +834,7 @@ operands_scanner::get_expr_operands (tree *expr_p, int flags) get_expr_operands (_OPERAND (expr, 0), flags); if (code == COMPONENT_REF) - { - if (!(flags & opf_no_vops) - && TREE_THIS_VOLATILE (TREE_OPERAND (expr, 1))) - gimple_set_has_volatile_ops (stmt, true); - get_expr_operands (_OPERAND (expr, 2), uflags); - } + get_expr_operands (_OPERAND (expr, 2), uflags); else if (code == ARRAY_REF || code == ARRAY_RANGE_REF) { get_expr_operands (_OPERAND (expr, 1), uflags); -- 2.31.1
[PATCH] target/101788 - avoid decomposing hard-register "loads"
This avoids decomposing hard-register accesses that masquerade as loads. Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed. 2021-08-11 Richard Biener PR target/101877 * tree-ssa-forwprop.c (pass_forwprop::execute): Do not decompose hard-register accesses. --- gcc/tree-ssa-forwprop.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/gcc/tree-ssa-forwprop.c b/gcc/tree-ssa-forwprop.c index bd64b8e46bc..5b30d4c1a76 100644 --- a/gcc/tree-ssa-forwprop.c +++ b/gcc/tree-ssa-forwprop.c @@ -3190,7 +3190,8 @@ pass_forwprop::execute (function *fun) || (fun->curr_properties & PROP_gimple_lvec)) && gimple_assign_load_p (stmt) && !gimple_has_volatile_ops (stmt) - && !stmt_can_throw_internal (cfun, stmt)) + && !stmt_can_throw_internal (cfun, stmt) + && (!VAR_P (rhs) || !DECL_HARD_REGISTER (rhs))) optimize_vector_load (); else if (code == COMPLEX_EXPR) -- 2.31.1
[patch] Make -no-pie option work for native Windows
Hi, as already mentioned on the list, binutils 2.36 generates PIE executables by default on native Windows (because --dynamicbase is the default) so it makes sense to have a simple way to counter that and -no-pie seems appropriate, all the more so that it is automatically passed when building the compiler. Bootstrapped on x86 and x86-64/Windows, w/ and w/o binutils 2.36, OK for the mainline and 11 branch? 2021-08-11 Eric Botcazou * configure.ac (PE linker --disable-dynamicbase support): New check. * configure: Regenerate. * config.in: Likewise. * config/i386/mingw32.h (LINK_SPEC_DISABLE_DYNAMICBASE): New define. (LINK_SPEC): Use it. * config/i386/mingw-w64.h (LINK_SPEC_DISABLE_DYNAMICBASE): Likewise. (LINK_SPEC): Likewise. -- Eric Botcazoudiff --git a/gcc/config/i386/mingw-w64.h b/gcc/config/i386/mingw-w64.h index 0cec6b02787..6cc7ac54fdd 100644 --- a/gcc/config/i386/mingw-w64.h +++ b/gcc/config/i386/mingw-w64.h @@ -89,6 +89,14 @@ along with GCC; see the file COPYING3. If not see # define LINK_SPEC_LARGE_ADDR_AWARE "" #endif +#undef LINK_SPEC_DISABLE_DYNAMICBASE +#if HAVE_LD_PE_DISABLE_DYNAMICBASE +# define LINK_SPEC_DISABLE_DYNAMICBASE \ + "%{!shared:%{!mdll:%{no-pie:--disable-dynamicbase}}}" +#else +# define LINK_SPEC_DISABLE_DYNAMICBASE "" +#endif + #undef LINK_SPEC #define LINK_SPEC SUB_LINK_SPEC " %{mwindows:--subsystem windows} \ %{mconsole:--subsystem console} \ @@ -97,6 +105,7 @@ along with GCC; see the file COPYING3. If not see %{static:-Bstatic} %{!static:-Bdynamic} \ %{shared|mdll: " SUB_LINK_ENTRY " --enable-auto-image-base} \ " LINK_SPEC_LARGE_ADDR_AWARE "\ + " LINK_SPEC_DISABLE_DYNAMICBASE "\ %(shared_libgcc_undefs)" /* Enable sincos optimization, overriding cygming.h. sincos, sincosf diff --git a/gcc/config/i386/mingw32.h b/gcc/config/i386/mingw32.h index 36e7bae5e1b..779c9335711 100644 --- a/gcc/config/i386/mingw32.h +++ b/gcc/config/i386/mingw32.h @@ -148,6 +148,13 @@ along with GCC; see the file COPYING3. If not see "%{!shared:%{!mdll:%{!m64:--large-address-aware}}}" #endif +#if HAVE_LD_PE_DISABLE_DYNAMICBASE +# define LINK_SPEC_DISABLE_DYNAMICBASE \ + "%{!shared:%{!mdll:%{no-pie:--disable-dynamicbase}}}" +#else +# define LINK_SPEC_DISABLE_DYNAMICBASE "" +#endif + #define LINK_SPEC "%{mwindows:--subsystem windows} \ %{mconsole:--subsystem console} \ %{shared: %{mdll: %eshared and mdll are not compatible}} \ @@ -155,6 +162,7 @@ along with GCC; see the file COPYING3. If not see %{static:-Bstatic} %{!static:-Bdynamic} \ %{shared|mdll: " SUB_LINK_ENTRY " --enable-auto-image-base} \ " LINK_SPEC_LARGE_ADDR_AWARE "\ + " LINK_SPEC_DISABLE_DYNAMICBASE "\ %(shared_libgcc_undefs)" /* Include in the mingw32 libraries with libgcc */ diff --git a/gcc/configure.ac b/gcc/configure.ac index c8e0d63fe70..653a1cc561d 100644 --- a/gcc/configure.ac +++ b/gcc/configure.ac @@ -6383,6 +6383,23 @@ case $target_os in [Define if the PE linker has broken DWARF 5 support.]) fi AC_MSG_RESULT($gcc_cv_ld_broken_pe_dwarf5) + +AC_MSG_CHECKING(PE linker --disable-dynamicbase support) +gcc_cv_ld_disable_dynamicbase=no +if test $in_tree_ld = yes; then + if test "$gcc_cv_gld_major_version" -eq 2 -a "$gcc_cv_gld_minor_version" -ge 36 -o "$gcc_cv_gld_major_version" -gt 2; then \ +gcc_cv_ld_disable_dynamicbase=yes + fi +else + if $gcc_cv_ld --help 2>&1 | grep -q 'disable\-]dynamicbase' > /dev/null; then +gcc_cv_ld_disable_dynamicbase=yes + fi +fi +if test x"$gcc_cv_ld_disable_dynamicbase" = xyes; then + AC_DEFINE(HAVE_LD_PE_DISABLE_DYNAMICBASE, 1, +[Define if the PE linker supports --disable-dynamicbase option.]) +fi +AC_MSG_RESULT($gcc_cv_ld_disable_dynamicbase) ;; esac
Re: [PATCH] arm: Fix multilib mapping for CDE extensions [PR100856]
ping? https://gcc.gnu.org/pipermail/gcc-patches/2021-July/575310.html On Wed, Aug 4, 2021 at 11:13 AM Christophe Lyon via Gcc-patches < gcc-patches@gcc.gnu.org> wrote: > ping? > > On Thu, 15 Jul 2021 at 15:07, Christophe LYON via Gcc-patches > wrote: > > > > This is a followup to Srinath's recent patch: the newly added test is > > failing e.g. on arm-linux-gnueabihf without R/M profile multilibs. > > > > It is also failing on arm-eabi with R/M profile multilibs if the > > execution engine does not support v8.1-M instructions. > > > > The patch avoids this by adding check_effective_target_FUNC_multilib > > in target-supports.exp which effectively checks whether the target > > supports linking and execution, like what is already done for other > > ARM effective targets. pr100856.c is updated to use it instead of > > arm_v8_1m_main_cde_mve_ok (which makes the testcase a bit of a > > duplicate with check_effective_target_FUNC_multilib). > > > > In addition, I noticed that requiring MVE does not seem necessary and > > this enables the test to pass even when targeting a CPU without MVE: > > since the test does not involve actual CDE instructions, it can pass > > on other architecture versions. For instance, when requiring MVE, we > > have to use cortex-m55 under QEMU for the test to pass because the > > memset() that comes from v8.1-m.main+mve multilib uses LOB > > instructions (DLS) (memset is used during startup). Keeping > > arm_v8_1m_main_cde_mve_ok would mean we would enable the test provided > > we have the right multilibs, causing a runtime error if the simulator > > does not support LOB instructions (e.g. when targeting cortex-m7). > > > > I do not update sourcebuild.texi since the CDE effective targets are > > already collectively documented. > > > > Finally, the patch fixes two typos in comments. > > > > 2021-07-15 Christophe Lyon > > > > PR target/100856 > > gcc/ > > * config/arm/arm.opt: Fix typo. > > * config/arm/t-rmprofile: Fix typo. > > > > gcc/testsuite/ > > * gcc.target/arm/acle/pr100856.c: Use arm_v8m_main_cde_multilib > > and arm_v8m_main_cde. > > * lib/target-supports.exp: Add > > check_effective_target_FUNC_multilib for ARM CDE. > > > > >
Re: [PATCH] c++: Fix ICE on defaulted spaceship with pointer return type [PR94162]
On Wed, Aug 11, 2021 at 09:42:56AM -0400, Jason Merrill wrote: > Checking CLASS_TYPE_P && decl_in_std_namespace (TYPE_MAIN_DECL) before > looking at the name makes sense to me. CLASS_TYPE_P is cheap, but isn't decl_in_std_namespace, especially when it needs to walk inline namespaces, better done only if we get a match, so like below? Though I can do it even in the first if if you think it is better... 2021-08-11 Jakub Jelinek gcc/cp/ PR c++/94162 * method.c (cat_tag_for): Return cc_last for !CLASS_TYPE_P or for classes not in std namespace. gcc/testsuite/ PR c++/99429 * g++.dg/cpp2a/spaceship-synth11.C: New test. PR c++/94162 * g++.dg/cpp2a/spaceship-synth-neg6.C: New test. --- gcc/cp/method.c.jj 2021-08-09 15:03:00.923206463 +0200 +++ gcc/cp/method.c 2021-08-11 15:52:27.157437691 +0200 @@ -1029,10 +1029,13 @@ is_cat (tree type, comp_cat_tag tag) static comp_cat_tag cat_tag_for (tree type) { + if (!CLASS_TYPE_P (type)) +return cc_last; for (int i = 0; i < cc_last; ++i) { comp_cat_tag tag = (comp_cat_tag)i; - if (is_cat (type, tag)) + if (is_cat (type, tag) + && decl_in_std_namespace_p (TYPE_MAIN_DECL (type))) return tag; } return cc_last; --- gcc/testsuite/g++.dg/cpp2a/spaceship-synth11.C.jj 2021-08-11 15:49:05.267204333 +0200 +++ gcc/testsuite/g++.dg/cpp2a/spaceship-synth11.C 2021-08-11 15:49:05.267204333 +0200 @@ -0,0 +1,29 @@ +// PR c++/99429 +// { dg-do compile { target c++20 } } + +namespace std { +struct strong_ordering { + int _v; + constexpr strong_ordering (int v) :_v(v) {} + constexpr operator int (void) const { return _v; } + static const strong_ordering less; + static const strong_ordering equal; + static const strong_ordering greater; +}; +constexpr strong_ordering strong_ordering::less = -1; +constexpr strong_ordering strong_ordering::equal = 0; +constexpr strong_ordering strong_ordering::greater = 1; +} + +template +struct duration { + static constexpr const long period = N; + constexpr duration (void) = default; + constexpr duration (const duration& d) = default; + constexpr bool operator== (const duration& d) const = default; + constexpr bool operator<=> (const duration& d) const = default; + long _d; +}; + +using nanoseconds = duration<1>; +using microseconds = duration; --- gcc/testsuite/g++.dg/cpp2a/spaceship-synth-neg6.C.jj2021-08-11 15:49:05.268204320 +0200 +++ gcc/testsuite/g++.dg/cpp2a/spaceship-synth-neg6.C 2021-08-11 15:49:05.268204320 +0200 @@ -0,0 +1,11 @@ +// PR c++/94162 +// { dg-do compile { target c++20 } } + +#include + +struct S { + int a; // { dg-error "three-way comparison of 'S::a' has type 'std::strong_ordering', which does not convert to 'int\\*'" } + int *operator<=>(const S&) const = default; +}; + +bool b = S{} < S{};// { dg-error "use of deleted function 'constexpr int\\* S::operator<=>\\\(const S&\\\) const'" } Jakub
Re: [patch][version 6] add -ftrivial-auto-var-init and variable attribute "uninitialized" to gcc
> On Aug 11, 2021, at 8:58 AM, Richard Biener wrote: > > On Wed, 11 Aug 2021, Qing Zhao wrote: > >> >> >>> On Aug 11, 2021, at 8:37 AM, Richard Biener wrote: >>> >>> On Wed, 11 Aug 2021, Qing Zhao wrote: >>> > On Aug 11, 2021, at 2:02 AM, Richard Biener wrote: > > On Tue, 10 Aug 2021, Qing Zhao wrote: > >> >> >>> On Aug 10, 2021, at 3:16 PM, Qing Zhao via Gcc-patches >>> wrote: >>> >>> Hi, Richard, >>> On Aug 10, 2021, at 10:22 AM, Richard Biener wrote: >> >> Especially in the VLA case but likely also in general (though >> unlikely >> since usually the receiver of initializations are simple enough). >> I'd >> expect the VLA case end up as >> >> *ptr_to_decl = .DEFERRED_INIT (...); >> >> where *ptr_to_decl is the DECL_VALUE_EXPR of the decl. > > So, for the following small testing case: > > > extern void bar (int); > > void foo(int n) > { > int arr[n]; > bar (arr[2]); > return; > } > = > > If I compile it with -ftrivial-auto-var-init=zero -fdump-tree-gimple > -S -o auto-init-11.s -fdump-rtl-expand, the *.gimple dump is: > > = > void foo (int n) > { > int n.0; > sizetype D.1950; > bitsizetype D.1951; > sizetype D.1952; > bitsizetype D.1953; > sizetype D.1954; > int[0:D.1950] * arr.1; > void * saved_stack.2; > int arr[0:D.1950] [value-expr: *arr.1]; > > saved_stack.2 = __builtin_stack_save (); > try > { > n.0 = n; > _1 = (long int) n.0; > _2 = _1 + -1; > _3 = (sizetype) _2; > D.1950 = _3; > _4 = (sizetype) n.0; > _5 = (bitsizetype) _4; > _6 = _5 * 32; > D.1951 = _6; > _7 = (sizetype) n.0; > _8 = _7 * 4; > D.1952 = _8; > _9 = (sizetype) n.0; > _10 = (bitsizetype) _9; > _11 = _10 * 32; > D.1953 = _11; > _12 = (sizetype) n.0; > _13 = _12 * 4; > D.1954 = _13; > arr.1 = __builtin_alloca_with_align (D.1954, 32); > arr = .DEFERRED_INIT (D.1952, 2, 1); > _14 = (*arr.1)[2]; > bar (_14); > return; > } > finally > { > __builtin_stack_restore (saved_stack.2); > } > } > > > > You think that the above .DEFEERED_INIT is not correct? > It should be: > > *arr.1 = .DEFERRED_INIT (D.1952. 2, 1); > > ? Yes. >>> >>> I updated gimplify.c for VLA and now it emits the call to >>> .DEFERRED_INIT as: >>> >>> arr.1 = __builtin_alloca_with_align (D.1954, 32); >>> *arr.1 = .DEFERRED_INIT (D.1952, 2, 1); >>> >>> However, this call triggered the assertion failure in >>> verify_gimple_call of tree-cfg.c because the LHS is not a valid LHS. >>> Then I modify tree-cfg.c as: >>> >>> diff --git a/gcc/tree-cfg.c b/gcc/tree-cfg.c >>> index 330eb7dd89bf..180d4f1f9e32 100644 >>> --- a/gcc/tree-cfg.c >>> +++ b/gcc/tree-cfg.c >>> @@ -3375,7 +3375,11 @@ verify_gimple_call (gcall *stmt) >>> } >>> >>> tree lhs = gimple_call_lhs (stmt); >>> + /* For .DEFERRED_INIT call, the LHS might be an indirection of >>> + a pointer for the VLA variable, which is not a valid LHS of >>> + a gimple call, we ignore the asssertion on this. */ >>> if (lhs >>> + && (!gimple_call_internal_p (stmt, IFN_DEFERRED_INIT)) >>>&& (!is_gimple_reg (lhs) >>> && (!is_gimple_lvalue (lhs) >>> || verify_types_in_gimple_reference >>> >>> The assertion failure in tree-cfg.c got resolved, but I got another >>> assertion failure in operands_scanner::get_expr_operands (tree *expr_p, >>> int flags), line 945: >>> >>> 939 /* If we get here, something has gone wrong. */ >>> 940 if (flag_checking) >>> 941 { >>> 942 fprintf (stderr, "unhandled expression in >>> get_expr_operands():\n"); >>> 943 debug_tree (expr); >>> 944 fputs ("\n", stderr); >>> 945 gcc_unreachable (); >>> 946 } >>> >>> Looks like that the gimple statement: >>> *arr.1 = .DEFERRED_INIT (D.1952, 2, 1); >>> >>> Is not valid. i.e, the LHS should not be an indirection to a pointer. >>> >>> How to resolve this issue? > > It sounds like the LHS is an INDIRECT_REF maybe? That means it's > still not properly gimplified because it should end up as a MEM_REF > instead. > > But I'm just guessing here ... if you are
Re: [patch][version 6] add -ftrivial-auto-var-init and variable attribute "uninitialized" to gcc
On Wed, 11 Aug 2021, Qing Zhao wrote: > > > > On Aug 11, 2021, at 8:37 AM, Richard Biener wrote: > > > > On Wed, 11 Aug 2021, Qing Zhao wrote: > > > >> > >> > >>> On Aug 11, 2021, at 2:02 AM, Richard Biener wrote: > >>> > >>> On Tue, 10 Aug 2021, Qing Zhao wrote: > >>> > > > > On Aug 10, 2021, at 3:16 PM, Qing Zhao via Gcc-patches > > wrote: > > > > Hi, Richard, > > > >> On Aug 10, 2021, at 10:22 AM, Richard Biener wrote: > > Especially in the VLA case but likely also in general (though > unlikely > since usually the receiver of initializations are simple enough). > I'd > expect the VLA case end up as > > *ptr_to_decl = .DEFERRED_INIT (...); > > where *ptr_to_decl is the DECL_VALUE_EXPR of the decl. > >>> > >>> So, for the following small testing case: > >>> > >>> > >>> extern void bar (int); > >>> > >>> void foo(int n) > >>> { > >>> int arr[n]; > >>> bar (arr[2]); > >>> return; > >>> } > >>> = > >>> > >>> If I compile it with -ftrivial-auto-var-init=zero -fdump-tree-gimple > >>> -S -o auto-init-11.s -fdump-rtl-expand, the *.gimple dump is: > >>> > >>> = > >>> void foo (int n) > >>> { > >>> int n.0; > >>> sizetype D.1950; > >>> bitsizetype D.1951; > >>> sizetype D.1952; > >>> bitsizetype D.1953; > >>> sizetype D.1954; > >>> int[0:D.1950] * arr.1; > >>> void * saved_stack.2; > >>> int arr[0:D.1950] [value-expr: *arr.1]; > >>> > >>> saved_stack.2 = __builtin_stack_save (); > >>> try > >>> { > >>> n.0 = n; > >>> _1 = (long int) n.0; > >>> _2 = _1 + -1; > >>> _3 = (sizetype) _2; > >>> D.1950 = _3; > >>> _4 = (sizetype) n.0; > >>> _5 = (bitsizetype) _4; > >>> _6 = _5 * 32; > >>> D.1951 = _6; > >>> _7 = (sizetype) n.0; > >>> _8 = _7 * 4; > >>> D.1952 = _8; > >>> _9 = (sizetype) n.0; > >>> _10 = (bitsizetype) _9; > >>> _11 = _10 * 32; > >>> D.1953 = _11; > >>> _12 = (sizetype) n.0; > >>> _13 = _12 * 4; > >>> D.1954 = _13; > >>> arr.1 = __builtin_alloca_with_align (D.1954, 32); > >>> arr = .DEFERRED_INIT (D.1952, 2, 1); > >>> _14 = (*arr.1)[2]; > >>> bar (_14); > >>> return; > >>> } > >>> finally > >>> { > >>> __builtin_stack_restore (saved_stack.2); > >>> } > >>> } > >>> > >>> > >>> > >>> You think that the above .DEFEERED_INIT is not correct? > >>> It should be: > >>> > >>> *arr.1 = .DEFERRED_INIT (D.1952. 2, 1); > >>> > >>> ? > >> > >> Yes. > >> > > > > I updated gimplify.c for VLA and now it emits the call to > > .DEFERRED_INIT as: > > > >arr.1 = __builtin_alloca_with_align (D.1954, 32); > >*arr.1 = .DEFERRED_INIT (D.1952, 2, 1); > > > > However, this call triggered the assertion failure in > > verify_gimple_call of tree-cfg.c because the LHS is not a valid LHS. > > Then I modify tree-cfg.c as: > > > > diff --git a/gcc/tree-cfg.c b/gcc/tree-cfg.c > > index 330eb7dd89bf..180d4f1f9e32 100644 > > --- a/gcc/tree-cfg.c > > +++ b/gcc/tree-cfg.c > > @@ -3375,7 +3375,11 @@ verify_gimple_call (gcall *stmt) > >} > > > > tree lhs = gimple_call_lhs (stmt); > > + /* For .DEFERRED_INIT call, the LHS might be an indirection of > > + a pointer for the VLA variable, which is not a valid LHS of > > + a gimple call, we ignore the asssertion on this. */ > > if (lhs > > + && (!gimple_call_internal_p (stmt, IFN_DEFERRED_INIT)) > > && (!is_gimple_reg (lhs) > >&& (!is_gimple_lvalue (lhs) > >|| verify_types_in_gimple_reference > > > > The assertion failure in tree-cfg.c got resolved, but I got another > > assertion failure in operands_scanner::get_expr_operands (tree *expr_p, > > int flags), line 945: > > > > 939 /* If we get here, something has gone wrong. */ > > 940 if (flag_checking) > > 941 { > > 942 fprintf (stderr, "unhandled expression in > > get_expr_operands():\n"); > > 943 debug_tree (expr); > > 944 fputs ("\n", stderr); > > 945 gcc_unreachable (); > > 946 } > > > > Looks like that the gimple statement: > > *arr.1 = .DEFERRED_INIT (D.1952, 2, 1); > > > > Is not valid. i.e, the LHS should not be an indirection to a pointer. > > > > How to resolve this issue? > >>> > >>> It sounds like the LHS is an INDIRECT_REF maybe? That means it's > >>> still not properly gimplified because it should end up as a MEM_REF > >>> instead. > >>> > >>> But I'm just guessing here ... if you are in a debugger then you can > >>>
Re: [patch][version 6] add -ftrivial-auto-var-init and variable attribute "uninitialized" to gcc
> On Aug 11, 2021, at 8:37 AM, Richard Biener wrote: > > On Wed, 11 Aug 2021, Qing Zhao wrote: > >> >> >>> On Aug 11, 2021, at 2:02 AM, Richard Biener wrote: >>> >>> On Tue, 10 Aug 2021, Qing Zhao wrote: >>> > On Aug 10, 2021, at 3:16 PM, Qing Zhao via Gcc-patches > wrote: > > Hi, Richard, > >> On Aug 10, 2021, at 10:22 AM, Richard Biener wrote: Especially in the VLA case but likely also in general (though unlikely since usually the receiver of initializations are simple enough). I'd expect the VLA case end up as *ptr_to_decl = .DEFERRED_INIT (...); where *ptr_to_decl is the DECL_VALUE_EXPR of the decl. >>> >>> So, for the following small testing case: >>> >>> >>> extern void bar (int); >>> >>> void foo(int n) >>> { >>> int arr[n]; >>> bar (arr[2]); >>> return; >>> } >>> = >>> >>> If I compile it with -ftrivial-auto-var-init=zero -fdump-tree-gimple -S >>> -o auto-init-11.s -fdump-rtl-expand, the *.gimple dump is: >>> >>> = >>> void foo (int n) >>> { >>> int n.0; >>> sizetype D.1950; >>> bitsizetype D.1951; >>> sizetype D.1952; >>> bitsizetype D.1953; >>> sizetype D.1954; >>> int[0:D.1950] * arr.1; >>> void * saved_stack.2; >>> int arr[0:D.1950] [value-expr: *arr.1]; >>> >>> saved_stack.2 = __builtin_stack_save (); >>> try >>> { >>> n.0 = n; >>> _1 = (long int) n.0; >>> _2 = _1 + -1; >>> _3 = (sizetype) _2; >>> D.1950 = _3; >>> _4 = (sizetype) n.0; >>> _5 = (bitsizetype) _4; >>> _6 = _5 * 32; >>> D.1951 = _6; >>> _7 = (sizetype) n.0; >>> _8 = _7 * 4; >>> D.1952 = _8; >>> _9 = (sizetype) n.0; >>> _10 = (bitsizetype) _9; >>> _11 = _10 * 32; >>> D.1953 = _11; >>> _12 = (sizetype) n.0; >>> _13 = _12 * 4; >>> D.1954 = _13; >>> arr.1 = __builtin_alloca_with_align (D.1954, 32); >>> arr = .DEFERRED_INIT (D.1952, 2, 1); >>> _14 = (*arr.1)[2]; >>> bar (_14); >>> return; >>> } >>> finally >>> { >>> __builtin_stack_restore (saved_stack.2); >>> } >>> } >>> >>> >>> >>> You think that the above .DEFEERED_INIT is not correct? >>> It should be: >>> >>> *arr.1 = .DEFERRED_INIT (D.1952. 2, 1); >>> >>> ? >> >> Yes. >> > > I updated gimplify.c for VLA and now it emits the call to .DEFERRED_INIT > as: > >arr.1 = __builtin_alloca_with_align (D.1954, 32); >*arr.1 = .DEFERRED_INIT (D.1952, 2, 1); > > However, this call triggered the assertion failure in verify_gimple_call > of tree-cfg.c because the LHS is not a valid LHS. > Then I modify tree-cfg.c as: > > diff --git a/gcc/tree-cfg.c b/gcc/tree-cfg.c > index 330eb7dd89bf..180d4f1f9e32 100644 > --- a/gcc/tree-cfg.c > +++ b/gcc/tree-cfg.c > @@ -3375,7 +3375,11 @@ verify_gimple_call (gcall *stmt) >} > > tree lhs = gimple_call_lhs (stmt); > + /* For .DEFERRED_INIT call, the LHS might be an indirection of > + a pointer for the VLA variable, which is not a valid LHS of > + a gimple call, we ignore the asssertion on this. */ > if (lhs > + && (!gimple_call_internal_p (stmt, IFN_DEFERRED_INIT)) > && (!is_gimple_reg (lhs) >&& (!is_gimple_lvalue (lhs) >|| verify_types_in_gimple_reference > > The assertion failure in tree-cfg.c got resolved, but I got another > assertion failure in operands_scanner::get_expr_operands (tree *expr_p, > int flags), line 945: > > 939 /* If we get here, something has gone wrong. */ > 940 if (flag_checking) > 941 { > 942 fprintf (stderr, "unhandled expression in > get_expr_operands():\n"); > 943 debug_tree (expr); > 944 fputs ("\n", stderr); > 945 gcc_unreachable (); > 946 } > > Looks like that the gimple statement: > *arr.1 = .DEFERRED_INIT (D.1952, 2, 1); > > Is not valid. i.e, the LHS should not be an indirection to a pointer. > > How to resolve this issue? >>> >>> It sounds like the LHS is an INDIRECT_REF maybe? That means it's >>> still not properly gimplified because it should end up as a MEM_REF >>> instead. >>> >>> But I'm just guessing here ... if you are in a debugger then you can >>> invoke debug_tree (lhs) in the inferior to see what it exactly is >>> at the point of the failure. >> >> Yes, it’s an INDIRECT_REF at the point of the failure even though I added a >> >> gimplify_var_or_parm_decl (lhs) > > I think the easiest is to build the .DEFERRED_INIT as GENERIC > and use gimplify_assign () to gimplify and add the result > to the sequence. Thus,
Re: [PATCH] c++: Improve memory usage of subsumption [PR100828]
On 8/9/21 5:07 PM, Patrick Palka wrote: On Wed, Jul 28, 2021 at 4:42 PM Jason Merrill wrote: On 7/19/21 6:05 PM, Patrick Palka wrote: Constraint subsumption is implemented in two steps. The first step computes the disjunctive (or conjunctive) normal form of one of the constraints, and the second step verifies that each clause in the decomposed form implies the other constraint. Performing these two steps separately is problematic because in the first step the disjunctive normal form can be exponentially larger than the original constraint, and by computing it ahead of time we'd have to keep all of it in memory. This patch fixes this exponential blowup in memory usage by interleaving these two steps, so that as soon as we decompose one clause we check implication for it. In turn, memory usage during subsumption is now worst case linear in the size of the constraints rather than exponential, and so we can safely remove the hard limit of 16 clauses without introducing runaway memory usage on some inputs. (Note the _time_ complexity of subsumption is still exponential in the worst case.) In order for this to work we need formula::branch to prepend the copy of the current clause directly after the current clause rather than at the end of the list, so that we fully decompose a clause shortly after creating it. Otherwise we'd end up accumulating exponentially many (partially decomposed) clauses in memory anyway. Bootstrapped and regtested on x86_64-pc-linux-gnu, and also tested on range-v3 and cmcstl2. Does this look OK for trunk and perhaps 11? OK for trunk. Thanks a lot, patch committed to trunk as r12-2658. Since this low complexity limit was introduced in GCC 10, what do you think about increasing the limit from 16 to say 128 in the 10/11 release branches as a relatively safe stopgap? Now that 11.2 is out, go ahead and apply this patch to the 11 branch. Won't a limit of 128 in GCC 10 lead to extremely long compile times for affected code? Is that more desirable than an error? PR c++/100828 gcc/cp/ChangeLog: * logic.cc (formula::formula): Use emplace_back. (formula::branch): Insert a copy of m_current in front of m_current instead of at the end of the list. (formula::erase): Define. (decompose_formula): Remove. (decompose_antecedents): Remove. (decompose_consequents): Remove. (derive_proofs): Remove. (max_problem_size): Remove. (diagnose_constraint_size): Remove. (subsumes_constraints_nonnull): Rewrite directly in terms of decompose_clause and derive_proof, interleaving decomposition with implication checking. Use formula::erase to free the current clause before moving on to the next one. --- gcc/cp/logic.cc | 118 ++-- 1 file changed, 35 insertions(+), 83 deletions(-) diff --git a/gcc/cp/logic.cc b/gcc/cp/logic.cc index 142457e408a..3f872c11fe2 100644 --- a/gcc/cp/logic.cc +++ b/gcc/cp/logic.cc @@ -223,9 +223,7 @@ struct formula formula (tree t) { -/* This should call emplace_back(). There's an extra copy being - invoked by using push_back(). */ -m_clauses.push_back (t); +m_clauses.emplace_back (t); m_current = m_clauses.begin (); } @@ -248,8 +246,7 @@ struct formula clause& branch () { gcc_assert (!done ()); -m_clauses.push_back (*m_current); -return m_clauses.back (); +return *m_clauses.insert (std::next (m_current), *m_current); } /* Returns the position of the current clause. */ @@ -287,6 +284,14 @@ struct formula return m_clauses.end (); } + /* Remove the specified clause. */ + + void erase (iterator i) + { +gcc_assert (i != m_current); +m_clauses.erase (i); + } + std::list m_clauses; /* The list of clauses. */ iterator m_current; /* The current clause. */ }; @@ -659,39 +664,6 @@ decompose_clause (formula& f, clause& c, rules r) f.advance (); } -/* Decompose the logical formula F according to the logical - rules determined by R. The result is a formula containing - clauses that contain only atomic terms. */ - -void -decompose_formula (formula& f, rules r) -{ - while (!f.done ()) -decompose_clause (f, *f.current (), r); -} - -/* Fully decomposing T into a list of sequents, each comprised of - a list of atomic constraints, as if T were an antecedent. */ - -static formula -decompose_antecedents (tree t) -{ - formula f (t); - decompose_formula (f, left); - return f; -} - -/* Fully decomposing T into a list of sequents, each comprised of - a list of atomic constraints, as if T were a consequent. */ - -static formula -decompose_consequents (tree t) -{ - formula f (t); - decompose_formula (f, right); - return f; -} - static bool derive_proof (clause&, tree, rules); /* Derive a proof of both operands of T. */ @@ -744,28 +716,6 @@ derive_proof (clause& c, tree t,
Re: [patch][version 6] add -ftrivial-auto-var-init and variable attribute "uninitialized" to gcc
> On Aug 11, 2021, at 4:02 AM, Richard Sandiford > wrote: > > Qing Zhao via Gcc-patches writes: >>> On Aug 10, 2021, at 3:16 PM, Qing Zhao via Gcc-patches >>> wrote: >>> >>> Hi, Richard, >>> On Aug 10, 2021, at 10:22 AM, Richard Biener wrote: >> >> Especially in the VLA case but likely also in general (though unlikely >> since usually the receiver of initializations are simple enough). I'd >> expect the VLA case end up as >> >> *ptr_to_decl = .DEFERRED_INIT (...); >> >> where *ptr_to_decl is the DECL_VALUE_EXPR of the decl. > > So, for the following small testing case: > > > extern void bar (int); > > void foo(int n) > { > int arr[n]; > bar (arr[2]); > return; > } > = > > If I compile it with -ftrivial-auto-var-init=zero -fdump-tree-gimple -S > -o auto-init-11.s -fdump-rtl-expand, the *.gimple dump is: > > = > void foo (int n) > { > int n.0; > sizetype D.1950; > bitsizetype D.1951; > sizetype D.1952; > bitsizetype D.1953; > sizetype D.1954; > int[0:D.1950] * arr.1; > void * saved_stack.2; > int arr[0:D.1950] [value-expr: *arr.1]; > > saved_stack.2 = __builtin_stack_save (); > try > { >n.0 = n; >_1 = (long int) n.0; >_2 = _1 + -1; >_3 = (sizetype) _2; >D.1950 = _3; >_4 = (sizetype) n.0; >_5 = (bitsizetype) _4; >_6 = _5 * 32; >D.1951 = _6; >_7 = (sizetype) n.0; >_8 = _7 * 4; >D.1952 = _8; >_9 = (sizetype) n.0; >_10 = (bitsizetype) _9; >_11 = _10 * 32; >D.1953 = _11; >_12 = (sizetype) n.0; >_13 = _12 * 4; >D.1954 = _13; >arr.1 = __builtin_alloca_with_align (D.1954, 32); >arr = .DEFERRED_INIT (D.1952, 2, 1); >_14 = (*arr.1)[2]; >bar (_14); >return; > } > finally > { >__builtin_stack_restore (saved_stack.2); > } > } > > > > You think that the above .DEFEERED_INIT is not correct? > It should be: > > *arr.1 = .DEFERRED_INIT (D.1952. 2, 1); > > ? Yes. >>> >>> I updated gimplify.c for VLA and now it emits the call to .DEFERRED_INIT as: >>> >>> arr.1 = __builtin_alloca_with_align (D.1954, 32); >>> *arr.1 = .DEFERRED_INIT (D.1952, 2, 1); >>> >>> However, this call triggered the assertion failure in verify_gimple_call of >>> tree-cfg.c because the LHS is not a valid LHS. >>> Then I modify tree-cfg.c as: >>> >>> diff --git a/gcc/tree-cfg.c b/gcc/tree-cfg.c >>> index 330eb7dd89bf..180d4f1f9e32 100644 >>> --- a/gcc/tree-cfg.c >>> +++ b/gcc/tree-cfg.c >>> @@ -3375,7 +3375,11 @@ verify_gimple_call (gcall *stmt) >>> } >>> >>> tree lhs = gimple_call_lhs (stmt); >>> + /* For .DEFERRED_INIT call, the LHS might be an indirection of >>> + a pointer for the VLA variable, which is not a valid LHS of >>> + a gimple call, we ignore the asssertion on this. */ >>> if (lhs >>> + && (!gimple_call_internal_p (stmt, IFN_DEFERRED_INIT)) >>> && (!is_gimple_reg (lhs) >>> && (!is_gimple_lvalue (lhs) >>> || verify_types_in_gimple_reference >>> >>> The assertion failure in tree-cfg.c got resolved, but I got another >>> assertion failure in operands_scanner::get_expr_operands (tree *expr_p, int >>> flags), line 945: >>> >>> 939 /* If we get here, something has gone wrong. */ >>> 940 if (flag_checking) >>> 941 { >>> 942 fprintf (stderr, "unhandled expression in >>> get_expr_operands():\n"); >>> 943 debug_tree (expr); >>> 944 fputs ("\n", stderr); >>> 945 gcc_unreachable (); >>> 946 } >>> >>> Looks like that the gimple statement: >>> *arr.1 = .DEFERRED_INIT (D.1952, 2, 1); >>> >>> Is not valid. i.e, the LHS should not be an indirection to a pointer. >>> >>> How to resolve this issue? >> >> I came up with the following solution: >> >> Define the IFN_DEFERRED_INIT function as: >> >> LHS = DEFERRED_INIT (SIZE of the DECL, INIT_TYPE, IS_VLA); >> >> if IS_VLA is false, the LHS is the DECL itself, >> if IS_VLA is true, the LHS is the pointer to this DECL that created by >> gimplify_vla_decl. >> >> >> The benefit of this solution are: >> >> 1. Resolved the invalid IR issue; >> 2. The call stmt carries the address of the VLA natually; >> >> The issue with this solution is: >> >> For VLA and non-VLA, the LHS will be different, >> >> Do you see any other potential issues with this solution? > > The idea behind the DECL version of the .DEFERRED_INIT semantics was > that .DEFERRED_INIT just returns a SIZE-byte value that the caller > then assigns to a SIZE-byte lhs (with the caller choosing the lhs). > .DEFEREED_INIT itself doesn't read or write memory and so can be const, > which in turn allows alias analysis to be more precise. Yes. That’s right.
Re: [PATCH] c++: Fix ICE on defaulted spaceship with pointer return type [PR94162]
On 8/10/21 4:39 AM, Jakub Jelinek wrote: Hi! The spaceship-synth-neg6.C testcase ICEs because we call cat_tag_for on the explicit return type, but pointer types don't have TYPE_LINKAGE_IDENTIFIER. The patch fixes that. Or should I be checking for if (!CLASS_TYPE_P (type)) return cc_last; instead (are class type guaranteed to have TYPE_LINKAGE_IDENTIFIER?)? I also wonder if after finding a match we shouldn't verify if is a class type in std namespace (i.e. that TYPE_NAME (TYPE_MAIN_VARIANT (type)) is a TYPE_DECL and decl_in_std_namespace_p (TYPE_NAME (TYPE_MAIN_VARIANT (type))) because it seems nothing prevents it from returning non-cc_last say on namespace N { struct partial_ordering {}; } etc. Checking CLASS_TYPE_P && decl_in_std_namespace (TYPE_MAIN_DECL) before looking at the name makes sense to me. The g++.dg/cpp2a/spaceship-synth11.C testcase is from a PR that has been fixed with r12-619-gfc178519771db508c03611cff4a1466cf67fce1d (but not backported to 11). Bootstrapped/regtested on x86_64-linux and i686-linux. 2021-08-10 Jakub Jelinek gcc/cp/ PR c++/94162 * method.c (cat_tag_for): Return cc_last for types with no linkage identifier. gcc/testsuite/ PR c++/99429 * g++.dg/cpp2a/spaceship-synth11.C: New test. PR c++/94162 * g++.dg/cpp2a/spaceship-synth-neg6.C: New test. --- gcc/cp/method.c.jj 2021-06-25 10:36:22.169019953 +0200 +++ gcc/cp/method.c 2021-08-09 12:26:38.590166006 +0200 @@ -1029,6 +1029,8 @@ is_cat (tree type, comp_cat_tag tag) static comp_cat_tag cat_tag_for (tree type) { + if (!TYPE_LINKAGE_IDENTIFIER (type)) +return cc_last; for (int i = 0; i < cc_last; ++i) { comp_cat_tag tag = (comp_cat_tag)i; --- gcc/testsuite/g++.dg/cpp2a/spaceship-synth11.C.jj 2021-08-09 12:28:58.748240310 +0200 +++ gcc/testsuite/g++.dg/cpp2a/spaceship-synth11.C 2021-08-09 12:29:44.023618250 +0200 @@ -0,0 +1,29 @@ +// PR c++/99429 +// { dg-do compile { target c++20 } } + +namespace std { +struct strong_ordering { + int _v; + constexpr strong_ordering (int v) :_v(v) {} + constexpr operator int (void) const { return _v; } + static const strong_ordering less; + static const strong_ordering equal; + static const strong_ordering greater; +}; +constexpr strong_ordering strong_ordering::less = -1; +constexpr strong_ordering strong_ordering::equal = 0; +constexpr strong_ordering strong_ordering::greater = 1; +} + +template +struct duration { + static constexpr const long period = N; + constexpr duration (void) = default; + constexpr duration (const duration& d) = default; + constexpr bool operator== (const duration& d) const = default; + constexpr bool operator<=> (const duration& d) const = default; + long _d; +}; + +using nanoseconds = duration<1>; +using microseconds = duration; --- gcc/testsuite/g++.dg/cpp2a/spaceship-synth-neg6.C.jj2021-08-09 12:31:47.411922957 +0200 +++ gcc/testsuite/g++.dg/cpp2a/spaceship-synth-neg6.C 2021-08-09 12:35:26.995906403 +0200 @@ -0,0 +1,11 @@ +// PR c++/94162 +// { dg-do compile { target c++20 } } + +#include + +struct S { + int a; // { dg-error "three-way comparison of 'S::a' has type 'std::strong_ordering', which does not convert to 'int\\*'" } + int *operator<=>(const S&) const = default; +}; + +bool b = S{} < S{}; // { dg-error "use of deleted function 'constexpr int\\* S::operator<=>\\\(const S&\\\) const'" } Jakub
Re: [patch][version 6] add -ftrivial-auto-var-init and variable attribute "uninitialized" to gcc
On Wed, 11 Aug 2021, Qing Zhao wrote: > > > > On Aug 11, 2021, at 2:02 AM, Richard Biener wrote: > > > > On Tue, 10 Aug 2021, Qing Zhao wrote: > > > >> > >> > >>> On Aug 10, 2021, at 3:16 PM, Qing Zhao via Gcc-patches > >>> wrote: > >>> > >>> Hi, Richard, > >>> > On Aug 10, 2021, at 10:22 AM, Richard Biener wrote: > >> > >> Especially in the VLA case but likely also in general (though unlikely > >> since usually the receiver of initializations are simple enough). I'd > >> expect the VLA case end up as > >> > >> *ptr_to_decl = .DEFERRED_INIT (...); > >> > >> where *ptr_to_decl is the DECL_VALUE_EXPR of the decl. > > > > So, for the following small testing case: > > > > > > extern void bar (int); > > > > void foo(int n) > > { > > int arr[n]; > > bar (arr[2]); > > return; > > } > > = > > > > If I compile it with -ftrivial-auto-var-init=zero -fdump-tree-gimple -S > > -o auto-init-11.s -fdump-rtl-expand, the *.gimple dump is: > > > > = > > void foo (int n) > > { > > int n.0; > > sizetype D.1950; > > bitsizetype D.1951; > > sizetype D.1952; > > bitsizetype D.1953; > > sizetype D.1954; > > int[0:D.1950] * arr.1; > > void * saved_stack.2; > > int arr[0:D.1950] [value-expr: *arr.1]; > > > > saved_stack.2 = __builtin_stack_save (); > > try > > { > >n.0 = n; > >_1 = (long int) n.0; > >_2 = _1 + -1; > >_3 = (sizetype) _2; > >D.1950 = _3; > >_4 = (sizetype) n.0; > >_5 = (bitsizetype) _4; > >_6 = _5 * 32; > >D.1951 = _6; > >_7 = (sizetype) n.0; > >_8 = _7 * 4; > >D.1952 = _8; > >_9 = (sizetype) n.0; > >_10 = (bitsizetype) _9; > >_11 = _10 * 32; > >D.1953 = _11; > >_12 = (sizetype) n.0; > >_13 = _12 * 4; > >D.1954 = _13; > >arr.1 = __builtin_alloca_with_align (D.1954, 32); > >arr = .DEFERRED_INIT (D.1952, 2, 1); > >_14 = (*arr.1)[2]; > >bar (_14); > >return; > > } > > finally > > { > >__builtin_stack_restore (saved_stack.2); > > } > > } > > > > > > > > You think that the above .DEFEERED_INIT is not correct? > > It should be: > > > > *arr.1 = .DEFERRED_INIT (D.1952. 2, 1); > > > > ? > > Yes. > > >>> > >>> I updated gimplify.c for VLA and now it emits the call to .DEFERRED_INIT > >>> as: > >>> > >>> arr.1 = __builtin_alloca_with_align (D.1954, 32); > >>> *arr.1 = .DEFERRED_INIT (D.1952, 2, 1); > >>> > >>> However, this call triggered the assertion failure in verify_gimple_call > >>> of tree-cfg.c because the LHS is not a valid LHS. > >>> Then I modify tree-cfg.c as: > >>> > >>> diff --git a/gcc/tree-cfg.c b/gcc/tree-cfg.c > >>> index 330eb7dd89bf..180d4f1f9e32 100644 > >>> --- a/gcc/tree-cfg.c > >>> +++ b/gcc/tree-cfg.c > >>> @@ -3375,7 +3375,11 @@ verify_gimple_call (gcall *stmt) > >>> } > >>> > >>> tree lhs = gimple_call_lhs (stmt); > >>> + /* For .DEFERRED_INIT call, the LHS might be an indirection of > >>> + a pointer for the VLA variable, which is not a valid LHS of > >>> + a gimple call, we ignore the asssertion on this. */ > >>> if (lhs > >>> + && (!gimple_call_internal_p (stmt, IFN_DEFERRED_INIT)) > >>> && (!is_gimple_reg (lhs) > >>> && (!is_gimple_lvalue (lhs) > >>> || verify_types_in_gimple_reference > >>> > >>> The assertion failure in tree-cfg.c got resolved, but I got another > >>> assertion failure in operands_scanner::get_expr_operands (tree *expr_p, > >>> int flags), line 945: > >>> > >>> 939 /* If we get here, something has gone wrong. */ > >>> 940 if (flag_checking) > >>> 941 { > >>> 942 fprintf (stderr, "unhandled expression in > >>> get_expr_operands():\n"); > >>> 943 debug_tree (expr); > >>> 944 fputs ("\n", stderr); > >>> 945 gcc_unreachable (); > >>> 946 } > >>> > >>> Looks like that the gimple statement: > >>> *arr.1 = .DEFERRED_INIT (D.1952, 2, 1); > >>> > >>> Is not valid. i.e, the LHS should not be an indirection to a pointer. > >>> > >>> How to resolve this issue? > > > > It sounds like the LHS is an INDIRECT_REF maybe? That means it's > > still not properly gimplified because it should end up as a MEM_REF > > instead. > > > > But I'm just guessing here ... if you are in a debugger then you can > > invoke debug_tree (lhs) in the inferior to see what it exactly is > > at the point of the failure. > > Yes, it’s an INDIRECT_REF at the point of the failure even though I added a > > gimplify_var_or_parm_decl (lhs) I think the easiest is to build the .DEFERRED_INIT as GENERIC and use gimplify_assign () to gimplify and add the result to the sequence. Thus, build a GENERIC CALL_EXPR and then
Small tweak to expand_used_vars
This completes the replacement of DECL_ATTRIBUTES (current_function_decl) with the attribs local variable. Tested on x86-64/Linux, applied on the mainline as obvious. 2021-08-11 Eric Botcazuo * cfgexpand.c (expand_used_vars): Reuse attribs local variable. -- Eric Botcazoudiff --git a/gcc/cfgexpand.c b/gcc/cfgexpand.c index 818328071db..03260b019e5 100644 --- a/gcc/cfgexpand.c +++ b/gcc/cfgexpand.c @@ -2294,22 +2294,19 @@ expand_used_vars (bitmap forced_stack_vars) if (gen_stack_protect_signal || cfun->calls_alloca || has_protected_decls - || lookup_attribute ("stack_protect", - DECL_ATTRIBUTES (current_function_decl))) + || lookup_attribute ("stack_protect", attribs)) create_stack_guard (); break; case SPCT_FLAG_DEFAULT: if (cfun->calls_alloca || has_protected_decls - || lookup_attribute ("stack_protect", - DECL_ATTRIBUTES (current_function_decl))) + || lookup_attribute ("stack_protect", attribs)) create_stack_guard (); break; case SPCT_FLAG_EXPLICIT: - if (lookup_attribute ("stack_protect", - DECL_ATTRIBUTES (current_function_decl))) + if (lookup_attribute ("stack_protect", attribs)) create_stack_guard (); break;
Re: [patch][version 6] add -ftrivial-auto-var-init and variable attribute "uninitialized" to gcc
> On Aug 11, 2021, at 2:02 AM, Richard Biener wrote: > > On Tue, 10 Aug 2021, Qing Zhao wrote: > >> >> >>> On Aug 10, 2021, at 3:16 PM, Qing Zhao via Gcc-patches >>> wrote: >>> >>> Hi, Richard, >>> On Aug 10, 2021, at 10:22 AM, Richard Biener wrote: >> >> Especially in the VLA case but likely also in general (though unlikely >> since usually the receiver of initializations are simple enough). I'd >> expect the VLA case end up as >> >> *ptr_to_decl = .DEFERRED_INIT (...); >> >> where *ptr_to_decl is the DECL_VALUE_EXPR of the decl. > > So, for the following small testing case: > > > extern void bar (int); > > void foo(int n) > { > int arr[n]; > bar (arr[2]); > return; > } > = > > If I compile it with -ftrivial-auto-var-init=zero -fdump-tree-gimple -S > -o auto-init-11.s -fdump-rtl-expand, the *.gimple dump is: > > = > void foo (int n) > { > int n.0; > sizetype D.1950; > bitsizetype D.1951; > sizetype D.1952; > bitsizetype D.1953; > sizetype D.1954; > int[0:D.1950] * arr.1; > void * saved_stack.2; > int arr[0:D.1950] [value-expr: *arr.1]; > > saved_stack.2 = __builtin_stack_save (); > try > { >n.0 = n; >_1 = (long int) n.0; >_2 = _1 + -1; >_3 = (sizetype) _2; >D.1950 = _3; >_4 = (sizetype) n.0; >_5 = (bitsizetype) _4; >_6 = _5 * 32; >D.1951 = _6; >_7 = (sizetype) n.0; >_8 = _7 * 4; >D.1952 = _8; >_9 = (sizetype) n.0; >_10 = (bitsizetype) _9; >_11 = _10 * 32; >D.1953 = _11; >_12 = (sizetype) n.0; >_13 = _12 * 4; >D.1954 = _13; >arr.1 = __builtin_alloca_with_align (D.1954, 32); >arr = .DEFERRED_INIT (D.1952, 2, 1); >_14 = (*arr.1)[2]; >bar (_14); >return; > } > finally > { >__builtin_stack_restore (saved_stack.2); > } > } > > > > You think that the above .DEFEERED_INIT is not correct? > It should be: > > *arr.1 = .DEFERRED_INIT (D.1952. 2, 1); > > ? Yes. >>> >>> I updated gimplify.c for VLA and now it emits the call to .DEFERRED_INIT as: >>> >>> arr.1 = __builtin_alloca_with_align (D.1954, 32); >>> *arr.1 = .DEFERRED_INIT (D.1952, 2, 1); >>> >>> However, this call triggered the assertion failure in verify_gimple_call of >>> tree-cfg.c because the LHS is not a valid LHS. >>> Then I modify tree-cfg.c as: >>> >>> diff --git a/gcc/tree-cfg.c b/gcc/tree-cfg.c >>> index 330eb7dd89bf..180d4f1f9e32 100644 >>> --- a/gcc/tree-cfg.c >>> +++ b/gcc/tree-cfg.c >>> @@ -3375,7 +3375,11 @@ verify_gimple_call (gcall *stmt) >>> } >>> >>> tree lhs = gimple_call_lhs (stmt); >>> + /* For .DEFERRED_INIT call, the LHS might be an indirection of >>> + a pointer for the VLA variable, which is not a valid LHS of >>> + a gimple call, we ignore the asssertion on this. */ >>> if (lhs >>> + && (!gimple_call_internal_p (stmt, IFN_DEFERRED_INIT)) >>> && (!is_gimple_reg (lhs) >>> && (!is_gimple_lvalue (lhs) >>> || verify_types_in_gimple_reference >>> >>> The assertion failure in tree-cfg.c got resolved, but I got another >>> assertion failure in operands_scanner::get_expr_operands (tree *expr_p, int >>> flags), line 945: >>> >>> 939 /* If we get here, something has gone wrong. */ >>> 940 if (flag_checking) >>> 941 { >>> 942 fprintf (stderr, "unhandled expression in >>> get_expr_operands():\n"); >>> 943 debug_tree (expr); >>> 944 fputs ("\n", stderr); >>> 945 gcc_unreachable (); >>> 946 } >>> >>> Looks like that the gimple statement: >>> *arr.1 = .DEFERRED_INIT (D.1952, 2, 1); >>> >>> Is not valid. i.e, the LHS should not be an indirection to a pointer. >>> >>> How to resolve this issue? > > It sounds like the LHS is an INDIRECT_REF maybe? That means it's > still not properly gimplified because it should end up as a MEM_REF > instead. > > But I'm just guessing here ... if you are in a debugger then you can > invoke debug_tree (lhs) in the inferior to see what it exactly is > at the point of the failure. Yes, it’s an INDIRECT_REF at the point of the failure even though I added a gimplify_var_or_parm_decl (lhs) Qing > >> I came up with the following solution: >> >> Define the IFN_DEFERRED_INIT function as: >> >> LHS = DEFERRED_INIT (SIZE of the DECL, INIT_TYPE, IS_VLA); >> >> if IS_VLA is false, the LHS is the DECL itself, >> if IS_VLA is true, the LHS is the pointer to this DECL that created by >> gimplify_vla_decl. >> >> >> The benefit of this solution are: >> >> 1. Resolved the invalid IR issue; >> 2. The call stmt carries the address of the VLA natually; >> >> The issue with this solution is: >> >> For VLA and non-VLA, the
Re: [PATCH] Do not use tuple-like interface for pair in unordered containers
Hi Sorry for the delay, I had just miss this message. I think you are clearly more expert than me for the changes you propose. I had a look at the patch and it seems just fine as it keeps the forwarding as expected. Nice simplification in _NodeBuilder<_Select1st>, we indeed only need to deal with std::pair type in this case. François On 26/07/21 7:25 pm, Jonathan Wakely wrote: On 23/07/21 19:21 +0100, Jonathan Wakely wrote: I've been experimenting with this patch, which removes the need to use std::tuple_element and std::get to access the members of a std::pair in unordered_{map,multimap}. I'm in the process of refactoring the header to reduce header dependencies throughout the library, and this is the only use of the tuple-like interface for std::pair in the library. Using tuple_element and std::get resolved PR 53339 by allowing the std::pair type to be incomplete, however that is no longer supported anyway (the 23_containers/unordered_map/requirements/53339.cc test case is XFAILed). That means we could just define _Select1st as: struct _Select1st { template auto operator()(_Tp&& __x) const noexcept -> decltype(std::forward<_Tp>(__x).first) { return std::forward<_Tp>(__x).first; } }; But the approach in the patch seems OK too. Actually I have a fix for PR 53339 so that we can support incomplete types again. So we don't want to access the .first member in the return type, as that requires a complete type.
Re: ipa-modref: merge flags when adding escape
> While working on some function splitting changes, I've got a > miscompilation in stagefeedback that I've tracked down to a > complicated scenario: > > - ipa-modref miscomputes a function parameter as having EAF_DIRECT, > because it's dereferenced and passed on to another function, but > add_escape_point does not update the flags for the dereferenced > SSA_NAME passed as a parameter, and the EAF_UNUSED in the value that > first initializes it, that remains unchanged throughout, causes > deref_flags to set EAF_DIRECT, among other flags. > > - structalias, seeing the EAF_DIRECT in the parameter for that > function, refrains from mak[ing]_transitive_closure_constraints for > a pointer passed in that parameter. > > - tree dse2 concludes the initializer of the pointed-to variable is a > dead store and removes it. > > The test depends on gimple passes's processing of functions in a > certain order to expose parm flag miscomputed by ipa-modref. A > different order may enable the non-ipa modref2 pass to compute flags > differently and avoid the problem. > > I've arranged for add_escape_point to merge flags, as the non-ipa path > does. I've also caught and fixed an error in the dumping of escaping > flags. > > The problem affects at least trunk and gcc-11. I've so far bootstrapped > GCC 11, and I'm now regstrapping trunk. Ok to install if it passes? > > > for gcc/ChangeLog > > * ipa-modref.c (modref_lattice::add_escape_point): Merge > min_flags into flags. > (modref_lattice::dump): Fix escape_point's min_flags dumping. > > for gcc/testsuite/ChangeLog > > * c-c++-common/modref-dse.c: New. Hi, thank you for looking into the bug and sorry for taking so long to respond. The fix you propose is bit too generous, since it essentially disable IPA bits of the ipa-modref (it will resort to worst case solution w/o any IPA propagation). In IPA mode the proper flags are supposed to be determined by propagation via "escape points". The bug is bit subtle caused by optimization that avoids recording flags for escape points where we know that we do not care. This is tested by comparing min_flags (which is the known conservative estimation used by local analysis) with flags of the value being determined. If these flags are subset of min_flags there is nothing to gain. While merging lattices there is case where direct escape becomes indirect and in this case I forgot to update min_flags to dereferenced version which in turn makes the escape point to be skipped. This is improved patch I have bootstrapped/regtested x86_64-linux and I am collecting stats for (it should have minimal effect on overal effectivity of modref). Honza gcc/ChangeLog: 2021-08-11 Jan Hubicka Alexandre Oliva * ipa-modref.c (modref_lattice::dump): Fix escape_point's min_flags dumping. (modref_lattice::merge_deref): Fix handling of indirect scape points. (update_escape_summary_1): Likewise. (update_escape_summary): Likewise. (ipa_merge_modref_summary_after_inlining): Likewise. gcc/testsuite/ChangeLog: 2021-08-11 Alexandre Oliva * c-c++-common/modref-dse.c: New test. diff --git a/gcc/ipa-modref.c b/gcc/ipa-modref.c index ef5e62beb0e..dccaf658720 100644 --- a/gcc/ipa-modref.c +++ b/gcc/ipa-modref.c @@ -1392,7 +1392,7 @@ modref_lattice::dump (FILE *out, int indent) const fprintf (out, "%*s Arg %i (%s) min flags", indent, "", escape_points[i].arg, escape_points[i].direct ? "direct" : "indirect"); - dump_eaf_flags (out, flags, false); + dump_eaf_flags (out, escape_points[i].min_flags, false); fprintf (out, " in call "); print_gimple_stmt (out, escape_points[i].call, 0); } @@ -1489,10 +1489,18 @@ modref_lattice::merge_deref (const modref_lattice , bool ignore_stores) if (!flags) return changed; for (unsigned int i = 0; i < with.escape_points.length (); i++) -changed |= add_escape_point (with.escape_points[i].call, -with.escape_points[i].arg, -with.escape_points[i].min_flags, -false); +{ + int min_flags = with.escape_points[i].min_flags; + + if (with.escape_points[i].direct) + min_flags = deref_flags (min_flags, ignore_stores); + else if (ignore_stores) + min_flags |= EAF_NOCLOBBER | EAF_NOESCAPE | EAF_NODIRECTESCAPE; + changed |= add_escape_point (with.escape_points[i].call, + with.escape_points[i].arg, + min_flags, + false); +} return changed; } @@ -2992,7 +3000,8 @@ struct escape_map static void update_escape_summary_1 (cgraph_edge *e, -vec > ) +vec > , +bool ignore_stores) { escape_summary *sum =
Re: [Patch v3 Fortran] Fix c_float128 and c_float128_complex on targets with 128-bit long double.
Hi! On Wed, Aug 11, 2021 at 12:29:06PM +0100, Iain Sandoe wrote: > > On 11 Aug 2021, at 11:55, Segher Boessenkool > > wrote: > > On Tue, Aug 10, 2021 at 04:46:11PM -0600, Sandra Loosemore wrote: > >> OK. I used your wording verbatim for the first one. For the second > >> one, I'm still pretty confused as I think it is at least theoretically > >> possible on PowerPC to have a target with 64-bit long double (AIX?) that > > > > Some embedded and embedded-like subtargets use 64-bit long double by > > default. You can also configure this on any Power target (not that it > > will necessarily work ;-) ) > > > > I don't know if any subtarget with default 64-bit long double supports > > Fortran. > > I realize that this is not directly relevant to unscrambling the PPC 128bit > stuff, > but aarch64-apple-darwin2x has only 64b long double and supports gfortran. > (on both the new M1 desktop macOS and embedded iOS) Yes, but aarch64-apple-darwin2x is not an rs6000 subtarget :-) There certainly are many targets with a 64b long double. > - it is not clear to me yet if there will at some point be a transition to a > 128b >long double for at least the desktop version. Yeah. I recommend any new target (or target for which this is new) to use an IEEE QP float as long double, even if just as soft float -- the advantages are just too great. > So the permutation definitely exists for at least one non-legacy, non-embedded > platform (and gfortran is very much in demand from the new M1 users). M1 is not embedded? :-) Segher
Re: [PATCH] aix: handle 64bit inodes for include directories
Hi guys, Did you have a chance to take a look at the new version of this patch ? Thanks, Clément From: CHIGOT, CLEMENT Sent: Monday, June 28, 2021 9:16 AM To: Jeff Law ; David Malcolm Cc: gcc-patches@gcc.gnu.org ; David Edelsohn Subject: Re: [PATCH] aix: handle 64bit inodes for include directories >On 6/23/2021 12:53 AM, CHIGOT, CLEMENT via Gcc-patches wrote: >> Hi David, >> >> Did you have a chance to take look at this patch ? >> >> Thanks, >> Clément >> >> >>> +DavidMalcolm >>> >>> Can you review this patch when you have a moment? >>> >>> Thanks, David >>> >>> On Mon, May 17, 2021 at 3:05 PM David Edelsohn wrote: The aix.h change is okay with me, but you need to get approval for the incpath.c and cpplib.h parts of the patch from the appropriate maintainers. Thanks, David On Mon, May 17, 2021 at 7:44 AM CHIGOT, CLEMENT wrote: > On AIX, stat will store inodes in 32bit even when using LARGE_FILES. > If the inode is larger, it will return -1 in st_ino. > Thus, in incpath.c when comparing include directories, if several > of them have 64bit inodes, they will be considered as duplicated. > > gcc/ChangeLog: > 2021-05-06 Clément Chigot > > * configure.ac: Check sizeof ino_t and dev_t. > * config.in: Regenerate. > * configure: Regenerate. > * config/rs6000/aix.h (HOST_STAT_FOR_64BIT_INODES): New define. > * incpath.c (HOST_STAT_FOR_64BIT_INODES): New define. > (remove_duplicates): Use it. > > libcpp/ChangeLog: > 2021-05-06 Clément Chigot > > * configure.ac: Check sizeof ino_t and dev_t. > * config.in: Regenerate. > * configure: Regenerate. > * include/cpplib.h (INO_T_CPP): Change for AIX. > (DEV_T_CPP): New macro. > (struct cpp_dir): Use it. > So my worry here is this is really a host property -- ie, this is > behavior of where GCC runs, not the target for which GCC is generating code. > > That implies that the change in aix.h is wrong. aix.h is for the > target, not the host -- you don't want to define something like > HOST_STAT_FOR_64BIT_INODES there. > > You'd want to be triggering this behavior via a host fragment, x-aix, or > better yet via an autoconf test. Indeed, would this version be better ? I'm not sure about the configure test. But as we are retrieving the size of dev_t and ino_t just above, I'm assuming that the one being used in stat directly. At least, that's the case on AIX, and this test is only made for AIX. Clément
Re: [Patch v3 Fortran] Fix c_float128 and c_float128_complex on targets with 128-bit long double.
Hi Folks > On 11 Aug 2021, at 11:55, Segher Boessenkool > wrote: > On Tue, Aug 10, 2021 at 04:46:11PM -0600, Sandra Loosemore wrote: >> OK. I used your wording verbatim for the first one. For the second >> one, I'm still pretty confused as I think it is at least theoretically >> possible on PowerPC to have a target with 64-bit long double (AIX?) that > > Some embedded and embedded-like subtargets use 64-bit long double by > default. You can also configure this on any Power target (not that it > will necessarily work ;-) ) > > I don't know if any subtarget with default 64-bit long double supports > Fortran. I realize that this is not directly relevant to unscrambling the PPC 128bit stuff, but aarch64-apple-darwin2x has only 64b long double and supports gfortran. (on both the new M1 desktop macOS and embedded iOS) - it is not clear to me yet if there will at some point be a transition to a 128b long double for at least the desktop version. So the permutation definitely exists for at least one non-legacy, non-embedded platform (and gfortran is very much in demand from the new M1 users). Iain >> also supports the __ibm128 format, and it would be wrong to assume that >> *any* 128-bit mode that's not long double is IEEE. > > Absolutely. Modes are not types, and types are not modes. There are > 128-bit floating point modes that are not IEEE, there are that are, and > either can be used for long double, or neither. > > > Segher
Re: [PATCH] Extend ldexp{s, d}f3 to vscalefs{s, d} when TARGET_AVX512F and TARGET_SSE_MATH.
On Wed, Aug 11, 2021 at 8:36 AM Uros Bizjak wrote: > > On Tue, Aug 10, 2021 at 2:13 PM liuhongt wrote: > > > > Hi: > > AVX512F supported vscalefs{s,d} which is the same as ldexp except the > > second operand should be floating point. > > Bootstrapped and regtested on x86_64-linux-gnu{-m32,}. > > > > gcc/ChangeLog: > > > > PR target/98309 > > * config/i386/i386.md (ldexp3): Extend to vscalefs[sd] > > when TARGET_AVX512F and TARGET_SSE_MATH. > > > > gcc/testsuite/ChangeLog: > > > > PR target/98309 > > * gcc.target/i386/pr98309-1.c: New test. > > * gcc.target/i386/pr98309-2.c: New test. > > OK. Actually, we should introduce a scalar version of avx512f_vmscalef, so we can avoid all subreg conversions with the vector-merge (VM) version, and will also allow memory in operand 2. Please test the attached incremental patch. Uros. diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 56b09c566ed..4a8e8fea290 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -125,6 +125,9 @@ UNSPEC_RSQRT UNSPEC_PSADBW + ;; For AVX512F support + UNSPEC_SCALEF + ;; Generic math support UNSPEC_COPYSIGN UNSPEC_XORSIGN @@ -17894,6 +17897,17 @@ DONE; }) +(define_insn "avx512f_scalef2" + [(set (match_operand:MODEF 0 "register_operand" "=v") + (unspec:MODEF + [(match_operand:MODEF 1 "register_operand" "v") + (match_operand:MODEF 2 "nonimmediate_operand" "vm")] + UNSPEC_SCALEF))] + "TARGET_AVX512F" + "vscalef\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "prefix" "evex") + (set_attr "mode" "")]) + (define_expand "ldexpxf3" [(match_operand:XF 0 "register_operand") (match_operand:XF 1 "register_operand") @@ -17924,15 +17938,12 @@ if (TARGET_AVX512F && TARGET_SSE_MATH) { rtx op2 = gen_reg_rtx (mode); - emit_insn (gen_floatsi2 (op2, operands[2])); - operands[0] = lowpart_subreg (mode, operands[0], mode); - if (MEM_P (operands[1])) + + if (!nonimmediate_operand (operands[1], mode)) operands[1] = force_reg (mode, operands[1]); - operands[1] = lowpart_subreg (mode, operands[1], mode); - op2 = lowpart_subreg (mode, op2, mode); - emit_insn (gen_avx512f_vmscalef (operands[0], - operands[1], - op2)); + + emit_insn (gen_floatsi2 (op2, operands[2])); + emit_insn (gen_avx512f_scalef2 (operands[0], operands[1], op2)); } else { diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 3957c86c3df..9233dfc6150 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -92,7 +92,6 @@ UNSPEC_RCP14 UNSPEC_RSQRT14 UNSPEC_FIXUPIMM - UNSPEC_SCALEF UNSPEC_VTERNLOG UNSPEC_GETEXP UNSPEC_GETMANT
Re: [Patch v3 Fortran] Fix c_float128 and c_float128_complex on targets with 128-bit long double.
Hi! On Tue, Aug 10, 2021 at 04:46:11PM -0600, Sandra Loosemore wrote: > OK. I used your wording verbatim for the first one. For the second > one, I'm still pretty confused as I think it is at least theoretically > possible on PowerPC to have a target with 64-bit long double (AIX?) that Some embedded and embedded-like subtargets use 64-bit long double by default. You can also configure this on any Power target (not that it will necessarily work ;-) ) I don't know if any subtarget with default 64-bit long double supports Fortran. > also supports the __ibm128 format, and it would be wrong to assume that > *any* 128-bit mode that's not long double is IEEE. Absolutely. Modes are not types, and types are not modes. There are 128-bit floating point modes that are not IEEE, there are that are, and either can be used for long double, or neither. Segher
Re: [PATCH] Adding target hook allows to reject initialization of register
On Wed, Aug 11, 2021 at 11:28 AM Richard Sandiford wrote: > > Richard Biener writes: > > On Tue, Aug 10, 2021 at 10:33 AM Jojo R via Gcc-patches > > wrote: > >> > >> Some target like RISC-V allow to group vector register as a whole, > >> and only operate part of it in fact, but the 'init-regs' pass will add > >> initialization > >> for uninitialized registers. Add this hook to reject this action for > >> reducing instruction. > > > > Are these groups "visible"? That is, are the pseudos multi-reg > > pseudos? I wonder > > if there's a more generic way to tame down initregs w/o introducing a new > > target > > hook. > > > > Btw, initregs is a red herring - it ideally should go away. See PR61810. > > > > So instead of adding to it can you see whether disabling the pass for RISC-V > > works w/o fallout (and add a comment to the PR)? Maybe some more RTL > > literate (in particular DF literate) can look at the remaining issue. > > Richard, did you > > ever have a look into the "issue" that initregs covers up (whatever > > that exactly is)? > > No, sorry. I don't really understand what it would be from the comment > in the code: > >[...] papers over some problems on the arm and other >processors where certain isa constraints cannot be handled by gcc. >These are of the form where two operands to an insn my not be the >same. The ra will only make them the same if they do not >interfere, and this can only happen if one is not initialized. > > That would definitely be an RA bug if true, since the constraints need > to be applied independently of dataflow information. But the comment > and code predate LRA and maybe no-one fancied poking around in reload > (hard to believe). > > I'd be very surprised if LRA gets this wrong. OK, we're wondering since quite some time - how about changing the gate of initregs to optimize > 0 && !targetm.lra_p ()? We'll hopefully figure out the "real" issue the pass is papering over. At the same time we're leaving old reload (and likely unmaintianed) targets unaffected. Richard. > Thanks, > Richard
Re: [PATCH 05/34] rs6000: Add available-everywhere and ancient builtins
On Tue, Aug 10, 2021 at 04:29:10PM -0500, Bill Schmidt wrote: > On 8/10/21 12:34 PM, Segher Boessenkool wrote: > >On Tue, Aug 10, 2021 at 11:17:05AM -0500, will schmidt wrote: > >>On Thu, 2021-07-29 at 08:30 -0500, Bill Schmidt wrote: > >>>+; This will break for long double == _Float128. libgcc history. > >>>+ const long double __builtin_pack_longdouble (double, double); > >>>+PACK_TF packtf {} > >>Add a few more words to provide bigger hints for future archeological > >>digs? (This is perhaps an obvious issue, but I'd need to do some > >>spelunking) > >It is for __ibm128 only, not for other long double formats (we have > >three: plain double, double double, IEEE QP). So maybe the return type > >should be changed? The name of the builtin of course is unfortunate, > >but it is too late to change :-) > > Yeah...I'm not sure how much flexibility we have here to avoid breaking > code in the field, but it's not a big break because whoever may be using > it has to be assuming long double = __ibm128, and probably has work to > do anyway. We do have an __ibm128 __builtin_pack_ibm128 (double, double); already, so we just should get people to use that one, make it more prominent in the documentation? Or we can also make __builtin_pack_longdouble warn (or even error) if used when long double is not double-double. Maybe an attribute (or what is it called, a {thing} I mean) in the new description files to say "warn (or error) if long double is not ibm128"? > Perhaps I should commit as is for now, and then prepare a separate patch > to change this builtin? There may be test suite fallout, not sure offhand. Yes, I did approve it already, right? Reviewing these patches I notice things that should be improved, but that does not have to be done *now*, or by you for that matter :-) Cheers, Segher
Re: [PATCH][v2] Adjust volatile handling of the operand scanner
> So I'm leaning towards leaving build3 alone and fixing up frontends > as issues pop up. FWIW fine with me. -- Eric Botcazou
[PATCH] tree-optimization/101861 - fix gather use for non-gather refs
My previous change broke the usage of gather for strided loads. The following fixes it. Bootstrapped on x86_64-unknown-linux-gnu, will push as obvious. 2021-08-11 Richard Biener PR tree-optimization/101861 * tree-vect-stmts.c (vectorizable_load): Fix error in previous change with regard to gather vectorization. --- gcc/tree-vect-stmts.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c index ab402b57fb4..cc6c091e41e 100644 --- a/gcc/tree-vect-stmts.c +++ b/gcc/tree-vect-stmts.c @@ -9492,7 +9492,8 @@ vectorizable_load (vec_info *vinfo, if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.ifn != IFN_LAST) { - vec_offset = vec_offsets[j]; + if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)) + vec_offset = vec_offsets[j]; tree zero = build_zero_cst (vectype); tree scale = size_int (gs_info.scale); gcall *call; -- 2.31.1
Re: [PATCH] Adding target hook allows to reject initialization of register
Richard Biener writes: > On Tue, Aug 10, 2021 at 10:33 AM Jojo R via Gcc-patches > wrote: >> >> Some target like RISC-V allow to group vector register as a whole, >> and only operate part of it in fact, but the 'init-regs' pass will add >> initialization >> for uninitialized registers. Add this hook to reject this action for >> reducing instruction. > > Are these groups "visible"? That is, are the pseudos multi-reg > pseudos? I wonder > if there's a more generic way to tame down initregs w/o introducing a new > target > hook. > > Btw, initregs is a red herring - it ideally should go away. See PR61810. > > So instead of adding to it can you see whether disabling the pass for RISC-V > works w/o fallout (and add a comment to the PR)? Maybe some more RTL > literate (in particular DF literate) can look at the remaining issue. > Richard, did you > ever have a look into the "issue" that initregs covers up (whatever > that exactly is)? No, sorry. I don't really understand what it would be from the comment in the code: [...] papers over some problems on the arm and other processors where certain isa constraints cannot be handled by gcc. These are of the form where two operands to an insn my not be the same. The ra will only make them the same if they do not interfere, and this can only happen if one is not initialized. That would definitely be an RA bug if true, since the constraints need to be applied independently of dataflow information. But the comment and code predate LRA and maybe no-one fancied poking around in reload (hard to believe). I'd be very surprised if LRA gets this wrong. Thanks, Richard
Re: [PATCH] [i386] Combine avx_vec_concatv16si and avx512f_zero_extendv16hiv16si2_1 to avx512f_zero_extendv16hiv16si2_2.
On Wed, Aug 11, 2021 at 3:58 PM Jakub Jelinek wrote: > > On Wed, Aug 11, 2021 at 02:43:06PM +0800, liuhongt wrote: > > Add define_insn_and_split to combine avx_vec_concatv16si/2 and > > avx512f_zero_extendv16hiv16si2_1 since the latter already zero_extend > > the upper bits, similar for other patterns which are related to > > pmovzx{bw,wd,dq}. > > > > It will do optimization like > > > > - vmovdqa %ymm0, %ymm0# 7 [c=4 l=6] avx_vec_concatv16si/2 > > vpmovzxwd %ymm0, %zmm0# 22[c=4 l=6] > > avx512f_zero_extendv16hiv16si2 > > ret # 25[c=0 l=1] simple_return_internal > > > > Bootstrapped and regtested on x86_64-linux-gnu{-m32,}. > > Ok for trunk? > > > > gcc/ChangeLog: > > > > PR target/101846 > > * config/i386/sse.md (*avx2_zero_extendv16qiv16hi2_2): New > > post_reload define_insn_and_split. > > The ChangeLog doesn't mention the newly added mode iterators, perhaps it > should. > > > (*avx512bw_zero_extendv32qiv32hi2_2): Ditto. > > (*sse4_1_zero_extendv8qiv8hi2_4): Ditto. > > (*avx512f_zero_extendv16hiv16si2_2): Ditto. > > (*avx2_zero_extendv8hiv8si2_2): Ditto. > > (*sse4_1_zero_extendv4hiv4si2_4): Ditto. > > (*avx512f_zero_extendv8siv8di2_2): Ditto. > > (*avx2_zero_extendv4siv4di2_2): Ditto. > > (*sse4_1_zero_extendv2siv2di2_4): Ditto. > > > > gcc/testsuite/ChangeLog: > > > > PR target/101846 > > * gcc.target/i386/pr101846-1.c: New test. > > --- > > gcc/config/i386/sse.md | 220 + > > gcc/testsuite/gcc.target/i386/pr101846-1.c | 95 + > > 2 files changed, 315 insertions(+) > > create mode 100644 gcc/testsuite/gcc.target/i386/pr101846-1.c > > > > diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md > > index a46a2373547..6450c058458 100644 > > --- a/gcc/config/i386/sse.md > > +++ b/gcc/config/i386/sse.md > > @@ -673,8 +673,14 @@ (define_mode_iterator VI12_128 [V16QI V8HI]) > > (define_mode_iterator VI14_128 [V16QI V4SI]) > > (define_mode_iterator VI124_128 [V16QI V8HI V4SI]) > > (define_mode_iterator VI24_128 [V8HI V4SI]) > > +(define_mode_iterator VI128_128 [V16QI V8HI V2DI]) > > And this mode iterator isn't used anywhere in the patch it seems. > > Otherwise LGTM, although it fixes just particular, though perhaps very > important, cases, for detecting generally that some operations on > a vector aren't needed because following permutation that uses it never > reads those elements is something that would need to be done on gimple. > > Would it be possible to handle also the similar pmovzx{bd,wq,bq} cases? Yes, regarding testcase bar, vec_perm can be implemented as vpmovdw and vinserti64x4, and the latter instructions will be optimized off since the upper bits are never used. I'm working on a patch. > > Jakub > -- BR, Hongtao
Re: [PATCH][v2] Adjust volatile handling of the operand scanner
On Wed, 11 Aug 2021, Richard Biener wrote: > On Tue, 10 Aug 2021, Eric Botcazou wrote: > > > > The question is whether we instead want to amend build3 to > > > set TREE_THIS_VOLATILE automatically when the FIELD_DECL has > > > it set. At least for the Fortran FE cases the gimplifier > > > fails to see some volatile references and thus can generate > > > wrong code which is a latent issue. > > > > What do we do for other similar flags, e.g. TREE_READONLY? > > build3 currently does no special processing for the FIELD_DECL operand, > it just sets TREE_THIS_VOLATILE from operand zero for tcc_references. > > The C and C++ frontends have repeated patterns like > > ref = build3 (COMPONENT_REF, subtype, datum, subdatum, > NULL_TREE); > SET_EXPR_LOCATION (ref, loc); > if (TREE_READONLY (subdatum) > || (use_datum_quals && TREE_READONLY (datum))) > TREE_READONLY (ref) = 1; > if (TREE_THIS_VOLATILE (subdatum) > || (use_datum_quals && TREE_THIS_VOLATILE (datum))) > TREE_THIS_VOLATILE (ref) = 1; > > Leaving out TREE_READONLY shouldn't have any correctness issue. It's > just that when adjusting the SSA operand scanner to correctly interpret > GENERIC that this uncovers pre-existing issues in the Fortran frontend > (one manifests in a testsuite FAIL - otherwise I wouldn't have noticed). > > I'm fine with just plugging the Fortran FE holes as we discover them > but I did not check other frontends and testsuite coverage is weak. > > Now - I wonder if there's a reason a frontend might _not_ want to > set TREE_THIS_VOLATILE on a COMPONENT_REF when the FIELD_DECL has > TREE_THIS_VOLATILE set. > > I guess I'll do one more experiment and add verification that > TREE_THIS_VOLATILE on COMPONENT_REFs and FIELD_DECLs is consistent > and see where that trips. It trips for struct X { volatile int i; }; void foo () { struct X x = (struct X){ .i = 0 }; } where the gimplifier in gimplify_init_ctor_eval does gcc_assert (TREE_CODE (purpose) == FIELD_DECL); cref = build3 (COMPONENT_REF, TREE_TYPE (purpose), unshare_expr (object), purpose, NULL_TREE); producing x.i = 0; that is not volatile qualified. This manifests itself during the build of libasan. I'm not sure whether the gimplifiers doing is correct or not. Changing build3 would alter the behavior here. Then there's a case where the COMPONENT_REF is TREE_THIS_VOLATILE but neither the FIELD_DECL nor the base reference is. This trips during libtsan build and again is from gimplification/folding, this time gimplify_modify_expr_rhs doing case INDIRECT_REF: { /* If we have code like *(const A*)(A*) where the type of "x" is a (possibly cv-qualified variant of "A"), treat the entire expression as identical to "x". This kind of code arises in C++ when an object is bound to a const reference, and if "x" is a TARGET_EXPR we want to take advantage of the optimization below. */ bool volatile_p = TREE_THIS_VOLATILE (*from_p); tree t = gimple_fold_indirect_ref_rhs (TREE_OPERAND (*from_p, 0)); if (t) { if (TREE_THIS_VOLATILE (t) != volatile_p) { if (DECL_P (t)) t = build_simple_mem_ref_loc (EXPR_LOCATION (*from_p), build_fold_addr_expr (t)); if (REFERENCE_CLASS_P (t)) TREE_THIS_VOLATILE (t) = volatile_p; I suppose that's OK, it's folding volatile *(void (*__sanitizer_sighandler_ptr) (int) *) >D.5368.handler to act->D.5368.handler which wouldn't be volatile. The opposite could happen, too, of course - casting away volatileness for an access but letting that slip through verification would make it moot. So ... With those cases fixed bootstrap runs through and testing reveals no additional issues apart from the already known gfortran.dg/volatile11.f90 So I'm leaning towards leaving build3 alone and fixing up frontends as issues pop up. Ricahrd.
Re: [PATCH] Fix loop split incorrect count and probability
On Wed, 11 Aug 2021, Xionghu Luo wrote: > > > On 2021/8/10 22:47, Richard Biener wrote: > > On Mon, 9 Aug 2021, Xionghu Luo wrote: > > > >> Thanks, > >> > >> On 2021/8/6 19:46, Richard Biener wrote: > >>> On Tue, 3 Aug 2021, Xionghu Luo wrote: > >>> > loop split condition is moved between loop1 and loop2, the split bb's > count and probability should also be duplicated instead of (100% vs INV), > secondly, the original loop1 and loop2 count need be propotional from the > original loop. > > > diff base/loop-cond-split-1.c.151t.lsplit > patched/loop-cond-split-1.c.151t.lsplit: > ... > int prephitmp_16; > int prephitmp_25; > > [local count: 118111600]: > if (n_7(D) > 0) > goto ; [89.00%] > else > goto ; [11.00%] > > [local count: 118111600]: > return; > > [local count: 105119324]: > pretmp_3 = ga; > > - [local count: 955630225]: > + [local count: 315357973]: > # i_13 = PHI > # prephitmp_12 = PHI > if (prephitmp_12 != 0) > goto ; [33.00%] > else > goto ; [67.00%] > > - [local count: 315357972]: > + [local count: 104068130]: > _2 = do_something (); > ga = _2; > > - [local count: 955630225]: > + [local count: 315357973]: > # prephitmp_5 = PHI > i_10 = inc (i_13); > if (n_7(D) > i_10) > goto ; [89.00%] > else > goto ; [11.00%] > > [local count: 105119324]: > goto ; [100.00%] > > - [local count: 850510901]: > + [local count: 280668596]: > if (prephitmp_12 != 0) > -goto ; [100.00%] > +goto ; [33.00%] > else > -goto ; [INV] > +goto ; [67.00%] > > - [local count: 850510901]: > + [local count: 280668596]: > goto ; [100.00%] > > - [count: 0]: > + [local count: 70429947]: > # i_23 = PHI > # prephitmp_25 = PHI > > - [local count: 955630225]: > + [local count: 640272252]: > # i_15 = PHI > # prephitmp_16 = PHI > i_22 = inc (i_15); > if (n_7(D) > i_22) > goto ; [89.00%] > else > goto ; [11.00%] > > - [local count: 850510901]: > + [local count: 569842305]: > goto ; [100.00%] > > } > > gcc/ChangeLog: > > * tree-ssa-loop-split.c (split_loop): Fix incorrect probability. > (do_split_loop_on_cond): Likewise. > --- > gcc/tree-ssa-loop-split.c | 16 > 1 file changed, 8 insertions(+), 8 deletions(-) > > diff --git a/gcc/tree-ssa-loop-split.c b/gcc/tree-ssa-loop-split.c > index 3a09bbc39e5..8e5a7ded0f7 100644 > --- a/gcc/tree-ssa-loop-split.c > +++ b/gcc/tree-ssa-loop-split.c > @@ -583,10 +583,10 @@ split_loop (class loop *loop1) > basic_block cond_bb; > >> > >>if (!initial_true) > >> -cond = fold_build1 (TRUTH_NOT_EXPR, boolean_type_node, cond); > >> +cond = fold_build1 (TRUTH_NOT_EXPR, boolean_type_node, cond); > >> + > >> + edge true_edge = EDGE_SUCC (bbs[i], 0)->flags & EDGE_TRUE_VALUE > >> + ? EDGE_SUCC (bbs[i], 0) > >> + : EDGE_SUCC (bbs[i], 1); > >> > > class loop *loop2 = loop_version (loop1, cond, _bb, > - profile_probability::always > (), > - profile_probability::always > (), > - profile_probability::always > (), > - profile_probability::always > (), > + true_edge->probability, > + > true_edge->probability.invert (), > + true_edge->probability, > + > true_edge->probability.invert (), > true); > >>> > >>> there is no 'true_edge' variable at this point. > >> > >> Sorry, missed the above hunk when split the patch. > >> > >>> > gcc_assert (loop2); > > @@ -1486,10 +1486,10 @@ do_split_loop_on_cond (struct loop *loop1, edge > invar_branch) > initialize_original_copy_tables (); > > struct loop *loop2 = loop_version (loop1, boolean_true_node, NULL, > - profile_probability::always (), > - profile_probability::never (), > -
Re: [patch][version 6] add -ftrivial-auto-var-init and variable attribute "uninitialized" to gcc
Qing Zhao via Gcc-patches writes: >> On Aug 10, 2021, at 3:16 PM, Qing Zhao via Gcc-patches >> wrote: >> >> Hi, Richard, >> >>> On Aug 10, 2021, at 10:22 AM, Richard Biener wrote: > > Especially in the VLA case but likely also in general (though unlikely > since usually the receiver of initializations are simple enough). I'd > expect the VLA case end up as > > *ptr_to_decl = .DEFERRED_INIT (...); > > where *ptr_to_decl is the DECL_VALUE_EXPR of the decl. So, for the following small testing case: extern void bar (int); void foo(int n) { int arr[n]; bar (arr[2]); return; } = If I compile it with -ftrivial-auto-var-init=zero -fdump-tree-gimple -S -o auto-init-11.s -fdump-rtl-expand, the *.gimple dump is: = void foo (int n) { int n.0; sizetype D.1950; bitsizetype D.1951; sizetype D.1952; bitsizetype D.1953; sizetype D.1954; int[0:D.1950] * arr.1; void * saved_stack.2; int arr[0:D.1950] [value-expr: *arr.1]; saved_stack.2 = __builtin_stack_save (); try { n.0 = n; _1 = (long int) n.0; _2 = _1 + -1; _3 = (sizetype) _2; D.1950 = _3; _4 = (sizetype) n.0; _5 = (bitsizetype) _4; _6 = _5 * 32; D.1951 = _6; _7 = (sizetype) n.0; _8 = _7 * 4; D.1952 = _8; _9 = (sizetype) n.0; _10 = (bitsizetype) _9; _11 = _10 * 32; D.1953 = _11; _12 = (sizetype) n.0; _13 = _12 * 4; D.1954 = _13; arr.1 = __builtin_alloca_with_align (D.1954, 32); arr = .DEFERRED_INIT (D.1952, 2, 1); _14 = (*arr.1)[2]; bar (_14); return; } finally { __builtin_stack_restore (saved_stack.2); } } You think that the above .DEFEERED_INIT is not correct? It should be: *arr.1 = .DEFERRED_INIT (D.1952. 2, 1); ? >>> >>> Yes. >>> >> >> I updated gimplify.c for VLA and now it emits the call to .DEFERRED_INIT as: >> >> arr.1 = __builtin_alloca_with_align (D.1954, 32); >> *arr.1 = .DEFERRED_INIT (D.1952, 2, 1); >> >> However, this call triggered the assertion failure in verify_gimple_call of >> tree-cfg.c because the LHS is not a valid LHS. >> Then I modify tree-cfg.c as: >> >> diff --git a/gcc/tree-cfg.c b/gcc/tree-cfg.c >> index 330eb7dd89bf..180d4f1f9e32 100644 >> --- a/gcc/tree-cfg.c >> +++ b/gcc/tree-cfg.c >> @@ -3375,7 +3375,11 @@ verify_gimple_call (gcall *stmt) >> } >> >> tree lhs = gimple_call_lhs (stmt); >> + /* For .DEFERRED_INIT call, the LHS might be an indirection of >> + a pointer for the VLA variable, which is not a valid LHS of >> + a gimple call, we ignore the asssertion on this. */ >> if (lhs >> + && (!gimple_call_internal_p (stmt, IFN_DEFERRED_INIT)) >> && (!is_gimple_reg (lhs) >> && (!is_gimple_lvalue (lhs) >> || verify_types_in_gimple_reference >> >> The assertion failure in tree-cfg.c got resolved, but I got another >> assertion failure in operands_scanner::get_expr_operands (tree *expr_p, int >> flags), line 945: >> >> 939 /* If we get here, something has gone wrong. */ >> 940 if (flag_checking) >> 941 { >> 942 fprintf (stderr, "unhandled expression in get_expr_operands():\n"); >> 943 debug_tree (expr); >> 944 fputs ("\n", stderr); >> 945 gcc_unreachable (); >> 946 } >> >> Looks like that the gimple statement: >>*arr.1 = .DEFERRED_INIT (D.1952, 2, 1); >> >> Is not valid. i.e, the LHS should not be an indirection to a pointer. >> >> How to resolve this issue? > > I came up with the following solution: > > Define the IFN_DEFERRED_INIT function as: > >LHS = DEFERRED_INIT (SIZE of the DECL, INIT_TYPE, IS_VLA); > >if IS_VLA is false, the LHS is the DECL itself, >if IS_VLA is true, the LHS is the pointer to this DECL that created by >gimplify_vla_decl. > > > The benefit of this solution are: > > 1. Resolved the invalid IR issue; > 2. The call stmt carries the address of the VLA natually; > > The issue with this solution is: > > For VLA and non-VLA, the LHS will be different, > > Do you see any other potential issues with this solution? The idea behind the DECL version of the .DEFERRED_INIT semantics was that .DEFERRED_INIT just returns a SIZE-byte value that the caller then assigns to a SIZE-byte lhs (with the caller choosing the lhs). .DEFEREED_INIT itself doesn't read or write memory and so can be const, which in turn allows alias analysis to be more precise. If we want to handle the VLA case using pointers instead then I think that needs to be a different IFN. If we did handle the VLA case using pointers (not expressing an opinion on that), then it would be the caller's job to
Re: [PATCH] Fix loop split incorrect count and probability
On 2021/8/10 22:47, Richard Biener wrote: > On Mon, 9 Aug 2021, Xionghu Luo wrote: > >> Thanks, >> >> On 2021/8/6 19:46, Richard Biener wrote: >>> On Tue, 3 Aug 2021, Xionghu Luo wrote: >>> loop split condition is moved between loop1 and loop2, the split bb's count and probability should also be duplicated instead of (100% vs INV), secondly, the original loop1 and loop2 count need be propotional from the original loop. diff base/loop-cond-split-1.c.151t.lsplit patched/loop-cond-split-1.c.151t.lsplit: ... int prephitmp_16; int prephitmp_25; [local count: 118111600]: if (n_7(D) > 0) goto ; [89.00%] else goto ; [11.00%] [local count: 118111600]: return; [local count: 105119324]: pretmp_3 = ga; - [local count: 955630225]: + [local count: 315357973]: # i_13 = PHI # prephitmp_12 = PHI if (prephitmp_12 != 0) goto ; [33.00%] else goto ; [67.00%] - [local count: 315357972]: + [local count: 104068130]: _2 = do_something (); ga = _2; - [local count: 955630225]: + [local count: 315357973]: # prephitmp_5 = PHI i_10 = inc (i_13); if (n_7(D) > i_10) goto ; [89.00%] else goto ; [11.00%] [local count: 105119324]: goto ; [100.00%] - [local count: 850510901]: + [local count: 280668596]: if (prephitmp_12 != 0) -goto ; [100.00%] +goto ; [33.00%] else -goto ; [INV] +goto ; [67.00%] - [local count: 850510901]: + [local count: 280668596]: goto ; [100.00%] - [count: 0]: + [local count: 70429947]: # i_23 = PHI # prephitmp_25 = PHI - [local count: 955630225]: + [local count: 640272252]: # i_15 = PHI # prephitmp_16 = PHI i_22 = inc (i_15); if (n_7(D) > i_22) goto ; [89.00%] else goto ; [11.00%] - [local count: 850510901]: + [local count: 569842305]: goto ; [100.00%] } gcc/ChangeLog: * tree-ssa-loop-split.c (split_loop): Fix incorrect probability. (do_split_loop_on_cond): Likewise. --- gcc/tree-ssa-loop-split.c | 16 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/gcc/tree-ssa-loop-split.c b/gcc/tree-ssa-loop-split.c index 3a09bbc39e5..8e5a7ded0f7 100644 --- a/gcc/tree-ssa-loop-split.c +++ b/gcc/tree-ssa-loop-split.c @@ -583,10 +583,10 @@ split_loop (class loop *loop1) basic_block cond_bb; >> >> if (!initial_true) >> - cond = fold_build1 (TRUTH_NOT_EXPR, boolean_type_node, cond); >> + cond = fold_build1 (TRUTH_NOT_EXPR, boolean_type_node, cond); >> + >> +edge true_edge = EDGE_SUCC (bbs[i], 0)->flags & EDGE_TRUE_VALUE >> + ? EDGE_SUCC (bbs[i], 0) >> + : EDGE_SUCC (bbs[i], 1); >> class loop *loop2 = loop_version (loop1, cond, _bb, - profile_probability::always (), - profile_probability::always (), - profile_probability::always (), - profile_probability::always (), + true_edge->probability, + true_edge->probability.invert (), + true_edge->probability, + true_edge->probability.invert (), true); >>> >>> there is no 'true_edge' variable at this point. >> >> Sorry, missed the above hunk when split the patch. >> >>> gcc_assert (loop2); @@ -1486,10 +1486,10 @@ do_split_loop_on_cond (struct loop *loop1, edge invar_branch) initialize_original_copy_tables (); struct loop *loop2 = loop_version (loop1, boolean_true_node, NULL, - profile_probability::always (), - profile_probability::never (), - profile_probability::always (), - profile_probability::always (), + invar_branch->probability.invert (), + invar_branch->probability, + invar_branch->probability.invert (), + invar_branch->probability,
Re: [PATCH][v2] Adjust volatile handling of the operand scanner
> build3 currently does no special processing for the FIELD_DECL operand, > it just sets TREE_THIS_VOLATILE from operand zero for tcc_references. > > The C and C++ frontends have repeated patterns like > > ref = build3 (COMPONENT_REF, subtype, datum, subdatum, > NULL_TREE); > SET_EXPR_LOCATION (ref, loc); > if (TREE_READONLY (subdatum) > > || (use_datum_quals && TREE_READONLY (datum))) > > TREE_READONLY (ref) = 1; > if (TREE_THIS_VOLATILE (subdatum) > > || (use_datum_quals && TREE_THIS_VOLATILE (datum))) > > TREE_THIS_VOLATILE (ref) = 1; Likewise in the Ada front-end (gigi). > Now - I wonder if there's a reason a frontend might _not_ want to > set TREE_THIS_VOLATILE on a COMPONENT_REF when the FIELD_DECL has > TREE_THIS_VOLATILE set. This would be weird semantics in my opinion. > I guess I'll do one more experiment and add verification that > TREE_THIS_VOLATILE on COMPONENT_REFs and FIELD_DECLs is consistent > and see where that trips. Sounds good to me. -- Eric Botcazou
[PATCH] middle-end/101858 - avoid shift of pointer in folding
This makes sure to not generate a shift of pointer types in simplification of X < (cast) (1 << Y). Bootstrap and regtest pending on x86_64-unknown-linux-gnu. 2021-08-11 Richard Biener PR middle-end/101858 * fold-const.c (fold_binary_loc): Guard simplification of X < (cast) (1 << Y) to integer types. * gcc.dg/pr101858.c: New testcase. --- gcc/fold-const.c| 2 ++ gcc/testsuite/gcc.dg/pr101858.c | 9 + 2 files changed, 11 insertions(+) create mode 100644 gcc/testsuite/gcc.dg/pr101858.c diff --git a/gcc/fold-const.c b/gcc/fold-const.c index 0f701287ba1..3917e97dfb0 100644 --- a/gcc/fold-const.c +++ b/gcc/fold-const.c @@ -12497,6 +12497,8 @@ fold_binary_loc (location_t loc, enum tree_code code, tree type, we can't optimize this. E.g. (unsigned long long) (1 << Y) for Y 31 might be 0x8000. */ if ((code == LT_EXPR || code == GE_EXPR) + && (INTEGRAL_TYPE_P (TREE_TYPE (arg0)) + || VECTOR_INTEGER_TYPE_P (TREE_TYPE (arg0))) && TYPE_UNSIGNED (TREE_TYPE (arg0)) && CONVERT_EXPR_P (arg1) && TREE_CODE (TREE_OPERAND (arg1, 0)) == LSHIFT_EXPR diff --git a/gcc/testsuite/gcc.dg/pr101858.c b/gcc/testsuite/gcc.dg/pr101858.c new file mode 100644 index 000..61fcca60982 --- /dev/null +++ b/gcc/testsuite/gcc.dg/pr101858.c @@ -0,0 +1,9 @@ +/* { dg-do compile } */ +/* { dg-options "-w" } */ + +int foo(int a) +{ + if (a < (int*)((__INTPTR_TYPE__)1 << a)) +a = 0; + return a; +} -- 2.31.1
[committed] sanitizer: Cherry-pick realpath fix
Hi! tsan in some cases starts ignoring interceptors and only calls the intercepted functions. But for realpath the behavior for NULL second argument was only handled in the interceptor and intercepted function was the one found by dlsym which is often one that doesn't handle NULL as second argument. Fixed by using dlvsym with "GLIBC_2.3" if possible for intercepted function and don't emulate behavior in the wrapper. Bootstrapped/regtested on x86_64-linux and i686-linux, committed to trunk. 2021-08-11 Jakub Jelinek * sanitizer_common/sanitizer_common_interceptors.inc: Cherry-pick llvm-project revision faef0d042f523357fe5590e7cb6a8391cf0351a8. --- libsanitizer/sanitizer_common/sanitizer_common_interceptors.inc +++ libsanitizer/sanitizer_common/sanitizer_common_interceptors.inc @@ -3664,21 +3664,11 @@ INTERCEPTOR(char *, realpath, const char *path, char *resolved_path) { void *ctx; COMMON_INTERCEPTOR_ENTER(ctx, realpath, path, resolved_path); if (path) COMMON_INTERCEPTOR_READ_RANGE(ctx, path, REAL(strlen)(path) + 1); - - // Workaround a bug in glibc where dlsym(RTLD_NEXT, ...) returns the oldest - // version of a versioned symbol. For realpath(), this gives us something - // (called __old_realpath) that does not handle NULL in the second argument. - // Handle it as part of the interceptor. - char *allocated_path = nullptr; - if (!resolved_path) -allocated_path = resolved_path = (char *)WRAP(malloc)(path_max + 1); - char *res = REAL(realpath)(path, resolved_path); - if (allocated_path && !res) WRAP(free)(allocated_path); if (res) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, res, REAL(strlen)(res) + 1); return res; } -#define INIT_REALPATH COMMON_INTERCEPT_FUNCTION(realpath); +#define INIT_REALPATH COMMON_INTERCEPT_FUNCTION_GLIBC_VER_MIN(realpath, "GLIBC_2.3"); #else #define INIT_REALPATH #endif Jakub
Re: [Patch v3 Fortran] Fix c_float128 and c_float128_complex on targets with 128-bit long double.
On 11.08.21 00:46, Sandra Loosemore wrote: On 8/10/21 2:29 AM, Tobias Burnus wrote: [snip] To conclude: I like the code changes (LGTM); the '__float128' -> 'TFmode' comment change also matches the code. However, I think both longer comments need to be updated. OK. I used your wording verbatim for the first one. For the second one, I'm still pretty confused as I think it is at least theoretically possible on PowerPC to have a target with 64-bit long double (AIX?) that also supports the __ibm128 format, and it would be wrong to assume that *any* 128-bit mode that's not long double is IEEE. So I decided the best thing is just to replace the FIXME with a pointer to the issue I opened yesterday https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101835 LGTM – but ... + /* See PR101835. */ ... I wonder whether your PR reference should have a TODO or FIXME prefix – or a "for some issue" suffix. Currently, it can be read as if the PR describes why the code was added – and not for questioning the code. Tobias PS: I added some more notes to the PR + extended the subject to make it easier to find. - Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht München, HRB 106955
Re: [PATCH] [i386] Combine avx_vec_concatv16si and avx512f_zero_extendv16hiv16si2_1 to avx512f_zero_extendv16hiv16si2_2.
On Wed, Aug 11, 2021 at 02:43:06PM +0800, liuhongt wrote: > Add define_insn_and_split to combine avx_vec_concatv16si/2 and > avx512f_zero_extendv16hiv16si2_1 since the latter already zero_extend > the upper bits, similar for other patterns which are related to > pmovzx{bw,wd,dq}. > > It will do optimization like > > - vmovdqa %ymm0, %ymm0# 7 [c=4 l=6] avx_vec_concatv16si/2 > vpmovzxwd %ymm0, %zmm0# 22[c=4 l=6] > avx512f_zero_extendv16hiv16si2 > ret # 25[c=0 l=1] simple_return_internal > > Bootstrapped and regtested on x86_64-linux-gnu{-m32,}. > Ok for trunk? > > gcc/ChangeLog: > > PR target/101846 > * config/i386/sse.md (*avx2_zero_extendv16qiv16hi2_2): New > post_reload define_insn_and_split. The ChangeLog doesn't mention the newly added mode iterators, perhaps it should. > (*avx512bw_zero_extendv32qiv32hi2_2): Ditto. > (*sse4_1_zero_extendv8qiv8hi2_4): Ditto. > (*avx512f_zero_extendv16hiv16si2_2): Ditto. > (*avx2_zero_extendv8hiv8si2_2): Ditto. > (*sse4_1_zero_extendv4hiv4si2_4): Ditto. > (*avx512f_zero_extendv8siv8di2_2): Ditto. > (*avx2_zero_extendv4siv4di2_2): Ditto. > (*sse4_1_zero_extendv2siv2di2_4): Ditto. > > gcc/testsuite/ChangeLog: > > PR target/101846 > * gcc.target/i386/pr101846-1.c: New test. > --- > gcc/config/i386/sse.md | 220 + > gcc/testsuite/gcc.target/i386/pr101846-1.c | 95 + > 2 files changed, 315 insertions(+) > create mode 100644 gcc/testsuite/gcc.target/i386/pr101846-1.c > > diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md > index a46a2373547..6450c058458 100644 > --- a/gcc/config/i386/sse.md > +++ b/gcc/config/i386/sse.md > @@ -673,8 +673,14 @@ (define_mode_iterator VI12_128 [V16QI V8HI]) > (define_mode_iterator VI14_128 [V16QI V4SI]) > (define_mode_iterator VI124_128 [V16QI V8HI V4SI]) > (define_mode_iterator VI24_128 [V8HI V4SI]) > +(define_mode_iterator VI128_128 [V16QI V8HI V2DI]) And this mode iterator isn't used anywhere in the patch it seems. Otherwise LGTM, although it fixes just particular, though perhaps very important, cases, for detecting generally that some operations on a vector aren't needed because following permutation that uses it never reads those elements is something that would need to be done on gimple. Would it be possible to handle also the similar pmovzx{bd,wq,bq} cases? Jakub
Re: [patch][version 6] add -ftrivial-auto-var-init and variable attribute "uninitialized" to gcc
On Tue, 10 Aug 2021, Qing Zhao wrote: > > > > On Aug 10, 2021, at 3:16 PM, Qing Zhao via Gcc-patches > > wrote: > > > > Hi, Richard, > > > >> On Aug 10, 2021, at 10:22 AM, Richard Biener wrote: > > Especially in the VLA case but likely also in general (though unlikely > since usually the receiver of initializations are simple enough). I'd > expect the VLA case end up as > > *ptr_to_decl = .DEFERRED_INIT (...); > > where *ptr_to_decl is the DECL_VALUE_EXPR of the decl. > >>> > >>> So, for the following small testing case: > >>> > >>> > >>> extern void bar (int); > >>> > >>> void foo(int n) > >>> { > >>> int arr[n]; > >>> bar (arr[2]); > >>> return; > >>> } > >>> = > >>> > >>> If I compile it with -ftrivial-auto-var-init=zero -fdump-tree-gimple -S > >>> -o auto-init-11.s -fdump-rtl-expand, the *.gimple dump is: > >>> > >>> = > >>> void foo (int n) > >>> { > >>> int n.0; > >>> sizetype D.1950; > >>> bitsizetype D.1951; > >>> sizetype D.1952; > >>> bitsizetype D.1953; > >>> sizetype D.1954; > >>> int[0:D.1950] * arr.1; > >>> void * saved_stack.2; > >>> int arr[0:D.1950] [value-expr: *arr.1]; > >>> > >>> saved_stack.2 = __builtin_stack_save (); > >>> try > >>> { > >>> n.0 = n; > >>> _1 = (long int) n.0; > >>> _2 = _1 + -1; > >>> _3 = (sizetype) _2; > >>> D.1950 = _3; > >>> _4 = (sizetype) n.0; > >>> _5 = (bitsizetype) _4; > >>> _6 = _5 * 32; > >>> D.1951 = _6; > >>> _7 = (sizetype) n.0; > >>> _8 = _7 * 4; > >>> D.1952 = _8; > >>> _9 = (sizetype) n.0; > >>> _10 = (bitsizetype) _9; > >>> _11 = _10 * 32; > >>> D.1953 = _11; > >>> _12 = (sizetype) n.0; > >>> _13 = _12 * 4; > >>> D.1954 = _13; > >>> arr.1 = __builtin_alloca_with_align (D.1954, 32); > >>> arr = .DEFERRED_INIT (D.1952, 2, 1); > >>> _14 = (*arr.1)[2]; > >>> bar (_14); > >>> return; > >>> } > >>> finally > >>> { > >>> __builtin_stack_restore (saved_stack.2); > >>> } > >>> } > >>> > >>> > >>> > >>> You think that the above .DEFEERED_INIT is not correct? > >>> It should be: > >>> > >>> *arr.1 = .DEFERRED_INIT (D.1952. 2, 1); > >>> > >>> ? > >> > >> Yes. > >> > > > > I updated gimplify.c for VLA and now it emits the call to .DEFERRED_INIT as: > > > > arr.1 = __builtin_alloca_with_align (D.1954, 32); > > *arr.1 = .DEFERRED_INIT (D.1952, 2, 1); > > > > However, this call triggered the assertion failure in verify_gimple_call of > > tree-cfg.c because the LHS is not a valid LHS. > > Then I modify tree-cfg.c as: > > > > diff --git a/gcc/tree-cfg.c b/gcc/tree-cfg.c > > index 330eb7dd89bf..180d4f1f9e32 100644 > > --- a/gcc/tree-cfg.c > > +++ b/gcc/tree-cfg.c > > @@ -3375,7 +3375,11 @@ verify_gimple_call (gcall *stmt) > > } > > > > tree lhs = gimple_call_lhs (stmt); > > + /* For .DEFERRED_INIT call, the LHS might be an indirection of > > + a pointer for the VLA variable, which is not a valid LHS of > > + a gimple call, we ignore the asssertion on this. */ > > if (lhs > > + && (!gimple_call_internal_p (stmt, IFN_DEFERRED_INIT)) > > && (!is_gimple_reg (lhs) > > && (!is_gimple_lvalue (lhs) > > || verify_types_in_gimple_reference > > > > The assertion failure in tree-cfg.c got resolved, but I got another > > assertion failure in operands_scanner::get_expr_operands (tree *expr_p, int > > flags), line 945: > > > > 939 /* If we get here, something has gone wrong. */ > > 940 if (flag_checking) > > 941 { > > 942 fprintf (stderr, "unhandled expression in > > get_expr_operands():\n"); > > 943 debug_tree (expr); > > 944 fputs ("\n", stderr); > > 945 gcc_unreachable (); > > 946 } > > > > Looks like that the gimple statement: > >*arr.1 = .DEFERRED_INIT (D.1952, 2, 1); > > > > Is not valid. i.e, the LHS should not be an indirection to a pointer. > > > > How to resolve this issue? It sounds like the LHS is an INDIRECT_REF maybe? That means it's still not properly gimplified because it should end up as a MEM_REF instead. But I'm just guessing here ... if you are in a debugger then you can invoke debug_tree (lhs) in the inferior to see what it exactly is at the point of the failure. > I came up with the following solution: > > Define the IFN_DEFERRED_INIT function as: > >LHS = DEFERRED_INIT (SIZE of the DECL, INIT_TYPE, IS_VLA); > >if IS_VLA is false, the LHS is the DECL itself, >if IS_VLA is true, the LHS is the pointer to this DECL that created by >gimplify_vla_decl. > > > The benefit of this solution are: > > 1. Resolved the invalid IR issue; > 2. The call stmt carries the address of the VLA natually; > > The issue with this solution is: > > For VLA and non-VLA, the LHS will be different, > > Do you see any other potential issues with this solution? > > thanks. > > Qing > > > > > -- Richard Biener SUSE Software Solutions Germany
[PATCH v3] gcov: Add TARGET_GCOV_TYPE_SIZE target macro
If -fprofile-update=atomic is used, then the target must provide atomic operations for the counters of the type returned by get_gcov_type(). This is a 64-bit type for targets which have a 64-bit long long type. On 32-bit targets this could be an issue since they may not provide 64-bit atomic operations. Allow targets to override the default type size with the new TARGET_GCOV_TYPE_SIZE target macro. If a 32-bit gcov type size is used, then there is currently a warning in libgcov-driver.c in a dead code block due to sizeof (counter) == sizeof (gcov_unsigned_t): libgcc/libgcov-driver.c: In function 'dump_counter': libgcc/libgcov-driver.c:401:46: warning: right shift count >= width of type [-Wshift-count-overflow] 401 | dump_unsigned ((gcov_unsigned_t)(counter >> 32), dump_fn, arg); | ^~ gcc/ * c-family/c-cppbuiltin.c (c_cpp_builtins): Define __LIBGCC_GCOV_TYPE_SIZE if flag_building_libgcc is true. * config/sparc/rtemself.h (TARGET_GCOV_TYPE_SIZE): Redefine. * coverage.c (get_gcov_type): Use targetm.gcov_type_size. * doc/tm.texi (TARGET_GCOV_TYPE_SIZE): Add hook under "Misc". * doc/tm.texi.in: Regenerate. * target.def (gcov_type_size): New POD hook. * tree-profile.c (gimple_gen_edge_profiler): Use precision of gcov_type_node. (gimple_gen_time_profiler): Likewise. libgcc/ * libgcov.h (gcov_type): Define using __LIBGCC_GCOV_TYPE_SIZE. (gcov_type_unsigned): Likewise. --- gcc/c-family/c-cppbuiltin.c | 2 ++ gcc/config/sparc/rtemself.h | 3 +++ gcc/coverage.c | 3 +-- gcc/doc/tm.texi | 11 +++ gcc/doc/tm.texi.in | 2 ++ gcc/target.def | 12 gcc/tree-profile.c | 4 ++-- libgcc/libgcov.h| 6 +++--- 8 files changed, 36 insertions(+), 7 deletions(-) diff --git a/gcc/c-family/c-cppbuiltin.c b/gcc/c-family/c-cppbuiltin.c index f79f939bd10f..e85b60c79f49 100644 --- a/gcc/c-family/c-cppbuiltin.c +++ b/gcc/c-family/c-cppbuiltin.c @@ -1450,6 +1450,8 @@ c_cpp_builtins (cpp_reader *pfile) /* For libgcov. */ builtin_define_with_int_value ("__LIBGCC_VTABLE_USES_DESCRIPTORS__", TARGET_VTABLE_USES_DESCRIPTORS); + builtin_define_with_int_value ("__LIBGCC_GCOV_TYPE_SIZE", +TARGET_GCOV_TYPE_SIZE); } /* For use in assembly language. */ diff --git a/gcc/config/sparc/rtemself.h b/gcc/config/sparc/rtemself.h index fa972af640cc..87a3ceb640c0 100644 --- a/gcc/config/sparc/rtemself.h +++ b/gcc/config/sparc/rtemself.h @@ -40,3 +40,6 @@ /* Use the default */ #undef LINK_GCC_C_SEQUENCE_SPEC + +#undef TARGET_GCOV_TYPE_SIZE +#define TARGET_GCOV_TYPE_SIZE 32 diff --git a/gcc/coverage.c b/gcc/coverage.c index ac9a9fdad228..6166247ad179 100644 --- a/gcc/coverage.c +++ b/gcc/coverage.c @@ -145,8 +145,7 @@ static void coverage_obj_finish (vec *); tree get_gcov_type (void) { - scalar_int_mode mode -= smallest_int_mode_for_size (LONG_LONG_TYPE_SIZE > 32 ? 64 : 32); + scalar_int_mode mode = smallest_int_mode_for_size (targetm.gcov_type_size); return lang_hooks.types.type_for_mode (mode, false); } diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi index a30fdcbbf3d6..429e7edf0e9d 100644 --- a/gcc/doc/tm.texi +++ b/gcc/doc/tm.texi @@ -12588,3 +12588,14 @@ Return an RTX representing @var{tagged_pointer} with its tag set to zero. Store the result in @var{target} if convenient. The default clears the top byte of the original pointer. @end deftypefn + +@deftypevr {Target Hook} HOST_WIDE_INT TARGET_GCOV_TYPE_SIZE +The gcov type size in bits. This type is used for example for counters +incremented by profiling and code-coverage events. The default value is 64, +if the type size of long long is greater than 32, otherwise the default +value is 32. A 64-bit type is recommended to avoid overflows of the +counters. If the @option{-fprofile-update=atomic} is used, then the +counters are incremented using atomic operations. Targets not supporting +64-bit atomic operations may override the default value and request a 32-bit +type. +@end deftypevr diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in index 611fc500ac86..fdf16b901c53 100644 --- a/gcc/doc/tm.texi.in +++ b/gcc/doc/tm.texi.in @@ -8180,3 +8180,5 @@ maintainer is familiar with. @hook TARGET_MEMTAG_EXTRACT_TAG @hook TARGET_MEMTAG_UNTAGGED_POINTER + +@hook TARGET_GCOV_TYPE_SIZE diff --git a/gcc/target.def b/gcc/target.def index 7676d5e626e3..b94c2c40dcf1 100644 --- a/gcc/target.def +++ b/gcc/target.def @@ -7104,6 +7104,18 @@ DEFHOOK void, (void), NULL) +DEFHOOKPOD +(gcov_type_size, + "The gcov type size in bits. This type is used for example for counters\n\ +incremented by profiling and code-coverage events. The default value is 64,\n\ +if the type size of long long is greater than 32, otherwise the default\n\ +value is
[PATCH] rs6000: Make some BIFs vectorized on P10
Hi, This patch is to add the support to make vectorizer able to vectorize scalar version of some built-in functions with its corresponding vector version with Power10 support. Bootstrapped & regtested on powerpc64le-linux-gnu {P9,P10} and powerpc64-linux-gnu P8. Is it ok for trunk? BR, Kewen - gcc/ChangeLog: * config/rs6000/rs6000.c (rs6000_builtin_md_vectorized_function): Add support for some built-in functions vectorized on Power10. gcc/testsuite/ChangeLog: * gcc.target/powerpc/dive-vectorize-1.c: New test. * gcc.target/powerpc/dive-vectorize-1.h: New test. * gcc.target/powerpc/dive-vectorize-2.c: New test. * gcc.target/powerpc/dive-vectorize-2.h: New test. * gcc.target/powerpc/dive-vectorize-run-1.c: New test. * gcc.target/powerpc/dive-vectorize-run-2.c: New test. * gcc.target/powerpc/p10-bifs-vectorize-1.c: New test. * gcc.target/powerpc/p10-bifs-vectorize-1.h: New test. * gcc.target/powerpc/p10-bifs-vectorize-run-1.c: New test. --- gcc/config/rs6000/rs6000.c| 55 +++ .../gcc.target/powerpc/dive-vectorize-1.c | 11 .../gcc.target/powerpc/dive-vectorize-1.h | 22 .../gcc.target/powerpc/dive-vectorize-2.c | 12 .../gcc.target/powerpc/dive-vectorize-2.h | 22 .../gcc.target/powerpc/dive-vectorize-run-1.c | 52 ++ .../gcc.target/powerpc/dive-vectorize-run-2.c | 53 ++ .../gcc.target/powerpc/p10-bifs-vectorize-1.c | 15 + .../gcc.target/powerpc/p10-bifs-vectorize-1.h | 40 ++ .../powerpc/p10-bifs-vectorize-run-1.c| 45 +++ 10 files changed, 327 insertions(+) create mode 100644 gcc/testsuite/gcc.target/powerpc/dive-vectorize-1.c create mode 100644 gcc/testsuite/gcc.target/powerpc/dive-vectorize-1.h create mode 100644 gcc/testsuite/gcc.target/powerpc/dive-vectorize-2.c create mode 100644 gcc/testsuite/gcc.target/powerpc/dive-vectorize-2.h create mode 100644 gcc/testsuite/gcc.target/powerpc/dive-vectorize-run-1.c create mode 100644 gcc/testsuite/gcc.target/powerpc/dive-vectorize-run-2.c create mode 100644 gcc/testsuite/gcc.target/powerpc/p10-bifs-vectorize-1.c create mode 100644 gcc/testsuite/gcc.target/powerpc/p10-bifs-vectorize-1.h create mode 100644 gcc/testsuite/gcc.target/powerpc/p10-bifs-vectorize-run-1.c diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index 279f00cc648..3eac1d05101 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -5785,6 +5785,61 @@ rs6000_builtin_md_vectorized_function (tree fndecl, tree type_out, default: break; } + + machine_mode in_vmode = TYPE_MODE (type_in); + machine_mode out_vmode = TYPE_MODE (type_out); + + /* Power10 supported vectorized built-in functions. */ + if (TARGET_POWER10 + && in_vmode == out_vmode + && VECTOR_UNIT_ALTIVEC_OR_VSX_P (in_vmode)) +{ + machine_mode exp_mode = DImode; + machine_mode exp_vmode = V2DImode; + enum rs6000_builtins vname = RS6000_BUILTIN_COUNT; + switch (fn) + { + case MISC_BUILTIN_DIVWE: + case MISC_BUILTIN_DIVWEU: + exp_mode = SImode; + exp_vmode = V4SImode; + if (fn == MISC_BUILTIN_DIVWE) + vname = P10V_BUILTIN_DIVES_V4SI; + else + vname = P10V_BUILTIN_DIVEU_V4SI; + break; + case MISC_BUILTIN_DIVDE: + case MISC_BUILTIN_DIVDEU: + if (fn == MISC_BUILTIN_DIVDE) + vname = P10V_BUILTIN_DIVES_V2DI; + else + vname = P10V_BUILTIN_DIVEU_V2DI; + break; + case P10_BUILTIN_CFUGED: + vname = P10V_BUILTIN_VCFUGED; + break; + case P10_BUILTIN_CNTLZDM: + vname = P10V_BUILTIN_VCLZDM; + break; + case P10_BUILTIN_CNTTZDM: + vname = P10V_BUILTIN_VCTZDM; + break; + case P10_BUILTIN_PDEPD: + vname = P10V_BUILTIN_VPDEPD; + break; + case P10_BUILTIN_PEXTD: + vname = P10V_BUILTIN_VPEXTD; + break; + default: + return NULL_TREE; + } + + if (vname != RS6000_BUILTIN_COUNT + && in_mode == exp_mode + && in_vmode == exp_vmode) + return rs6000_builtin_decls[vname]; +} + return NULL_TREE; } diff --git a/gcc/testsuite/gcc.target/powerpc/dive-vectorize-1.c b/gcc/testsuite/gcc.target/powerpc/dive-vectorize-1.c new file mode 100644 index 000..84f1b0a88f2 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/dive-vectorize-1.c @@ -0,0 +1,11 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target power10_ok } */ +/* { dg-options "-mdejagnu-cpu=power10 -O2 -ftree-vectorize -fno-vect-cost-model -fno-unroll-loops -fdump-tree-vect-details" } */ + +/* Test if signed/unsigned int extended divisions get vectorized. */ + +#include "dive-vectorize-1.h" + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect"
[PATCH] [i386] Combine avx_vec_concatv16si and avx512f_zero_extendv16hiv16si2_1 to avx512f_zero_extendv16hiv16si2_2.
Hi: Add define_insn_and_split to combine avx_vec_concatv16si/2 and avx512f_zero_extendv16hiv16si2_1 since the latter already zero_extend the upper bits, similar for other patterns which are related to pmovzx{bw,wd,dq}. It will do optimization like - vmovdqa %ymm0, %ymm0# 7 [c=4 l=6] avx_vec_concatv16si/2 vpmovzxwd %ymm0, %zmm0# 22[c=4 l=6] avx512f_zero_extendv16hiv16si2 ret # 25[c=0 l=1] simple_return_internal Bootstrapped and regtested on x86_64-linux-gnu{-m32,}. Ok for trunk? gcc/ChangeLog: PR target/101846 * config/i386/sse.md (*avx2_zero_extendv16qiv16hi2_2): New post_reload define_insn_and_split. (*avx512bw_zero_extendv32qiv32hi2_2): Ditto. (*sse4_1_zero_extendv8qiv8hi2_4): Ditto. (*avx512f_zero_extendv16hiv16si2_2): Ditto. (*avx2_zero_extendv8hiv8si2_2): Ditto. (*sse4_1_zero_extendv4hiv4si2_4): Ditto. (*avx512f_zero_extendv8siv8di2_2): Ditto. (*avx2_zero_extendv4siv4di2_2): Ditto. (*sse4_1_zero_extendv2siv2di2_4): Ditto. gcc/testsuite/ChangeLog: PR target/101846 * gcc.target/i386/pr101846-1.c: New test. --- gcc/config/i386/sse.md | 220 + gcc/testsuite/gcc.target/i386/pr101846-1.c | 95 + 2 files changed, 315 insertions(+) create mode 100644 gcc/testsuite/gcc.target/i386/pr101846-1.c diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index a46a2373547..6450c058458 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -673,8 +673,14 @@ (define_mode_iterator VI12_128 [V16QI V8HI]) (define_mode_iterator VI14_128 [V16QI V4SI]) (define_mode_iterator VI124_128 [V16QI V8HI V4SI]) (define_mode_iterator VI24_128 [V8HI V4SI]) +(define_mode_iterator VI128_128 [V16QI V8HI V2DI]) (define_mode_iterator VI248_128 [V8HI V4SI V2DI]) +(define_mode_iterator VI248_256 [V16HI V8SI V4DI]) +(define_mode_iterator VI248_512 [V32HI V16SI V8DI]) (define_mode_iterator VI48_128 [V4SI V2DI]) +(define_mode_iterator VI148_512 [V64QI V16SI V8DI]) +(define_mode_iterator VI148_256 [V32QI V8SI V4DI]) +(define_mode_iterator VI148_128 [V16QI V4SI V2DI]) ;; Various 256bit and 512 vector integer mode combinations (define_mode_iterator VI124_256 [V32QI V16HI V8SI]) @@ -18499,6 +18505,26 @@ (define_insn_and_split "*avx2_zero_extendv16qiv16hi2_1" operands[1] = lowpart_subreg (V16QImode, operands[1], V32QImode); }) +(define_insn_and_split "*avx2_zero_extendv16qiv16hi2_2" + [(set (match_operand:V32QI 0 "register_operand" "=v") + (vec_select:V32QI + (vec_concat:V64QI + (subreg:V32QI + (vec_concat:VI248_256 + (match_operand: 1 "nonimmediate_operand" "vm") + (match_operand: 2 "const0_operand" "C")) 0) + (match_operand:V32QI 3 "const0_operand" "C")) + (match_parallel 4 "pmovzx_parallel" + [(match_operand 5 "const_int_operand" "n")])))] + "TARGET_AVX2" + "#" + "&& reload_completed" + [(set (match_dup 0) (zero_extend:V16HI (match_dup 1)))] +{ + operands[0] = lowpart_subreg (V16HImode, operands[0], V32QImode); + operands[1] = lowpart_subreg (V16QImode, operands[1], mode); +}) + (define_expand "v16qiv16hi2" [(set (match_operand:V16HI 0 "register_operand") (any_extend:V16HI @@ -18533,6 +18559,26 @@ (define_insn_and_split "*avx512bw_zero_extendv32qiv32hi2_1" operands[1] = lowpart_subreg (V32QImode, operands[1], V64QImode); }) +(define_insn_and_split "*avx512bw_zero_extendv32qiv32hi2_2" + [(set (match_operand:V64QI 0 "register_operand" "=v") + (vec_select:V64QI + (vec_concat:V128QI + (subreg:V64QI + (vec_concat:VI248_512 + (match_operand: 1 "nonimmediate_operand" "vm") + (match_operand: 2 "const0_operand" "C")) 0) + (match_operand:V64QI 3 "const0_operand" "C")) + (match_parallel 4 "pmovzx_parallel" + [(match_operand 5 "const_int_operand" "n")])))] + "TARGET_AVX512BW" + "#" + "&& reload_completed" + [(set (match_dup 0) (zero_extend:V32HI (match_dup 1)))] +{ + operands[0] = lowpart_subreg (V32HImode, operands[0], V64QImode); + operands[1] = lowpart_subreg (V32QImode, operands[1], mode); +}) + (define_expand "v32qiv32hi2" [(set (match_operand:V32HI 0 "register_operand") (any_extend:V32HI @@ -18619,6 +18665,41 @@ (define_insn_and_split "*sse4_1_zero_extendv8qiv8hi2_3" } [(set_attr "isa" "noavx,noavx,avx")]) +(define_insn_and_split "*sse4_1_zero_extendv8qiv8hi2_4" + [(set (match_operand:V16QI 0 "register_operand" "=Yr,*x,Yw") + (vec_select:V16QI + (vec_concat:V32QI + (subreg:V16QI + (vec_concat:VI248_128 + (match_operand: 1 "vector_operand" "YrBm,*xBm,Ywm") + (match_operand: 2 "const0_operand" "C,C,C")) 0) + (match_operand:V16QI 3 "const0_operand" "C,C,C")) + (match_parallel 4
Re: [PATCH][v2] Adjust volatile handling of the operand scanner
On Tue, 10 Aug 2021, Eric Botcazou wrote: > > The question is whether we instead want to amend build3 to > > set TREE_THIS_VOLATILE automatically when the FIELD_DECL has > > it set. At least for the Fortran FE cases the gimplifier > > fails to see some volatile references and thus can generate > > wrong code which is a latent issue. > > What do we do for other similar flags, e.g. TREE_READONLY? build3 currently does no special processing for the FIELD_DECL operand, it just sets TREE_THIS_VOLATILE from operand zero for tcc_references. The C and C++ frontends have repeated patterns like ref = build3 (COMPONENT_REF, subtype, datum, subdatum, NULL_TREE); SET_EXPR_LOCATION (ref, loc); if (TREE_READONLY (subdatum) || (use_datum_quals && TREE_READONLY (datum))) TREE_READONLY (ref) = 1; if (TREE_THIS_VOLATILE (subdatum) || (use_datum_quals && TREE_THIS_VOLATILE (datum))) TREE_THIS_VOLATILE (ref) = 1; Leaving out TREE_READONLY shouldn't have any correctness issue. It's just that when adjusting the SSA operand scanner to correctly interpret GENERIC that this uncovers pre-existing issues in the Fortran frontend (one manifests in a testsuite FAIL - otherwise I wouldn't have noticed). I'm fine with just plugging the Fortran FE holes as we discover them but I did not check other frontends and testsuite coverage is weak. Now - I wonder if there's a reason a frontend might _not_ want to set TREE_THIS_VOLATILE on a COMPONENT_REF when the FIELD_DECL has TREE_THIS_VOLATILE set. I guess I'll do one more experiment and add verification that TREE_THIS_VOLATILE on COMPONENT_REFs and FIELD_DECLs is consistent and see where that trips. Richard.
Re: [PATCH] Extend ldexp{s, d}f3 to vscalefs{s, d} when TARGET_AVX512F and TARGET_SSE_MATH.
On Tue, Aug 10, 2021 at 2:13 PM liuhongt wrote: > > Hi: > AVX512F supported vscalefs{s,d} which is the same as ldexp except the > second operand should be floating point. > Bootstrapped and regtested on x86_64-linux-gnu{-m32,}. > > gcc/ChangeLog: > > PR target/98309 > * config/i386/i386.md (ldexp3): Extend to vscalefs[sd] > when TARGET_AVX512F and TARGET_SSE_MATH. > > gcc/testsuite/ChangeLog: > > PR target/98309 > * gcc.target/i386/pr98309-1.c: New test. > * gcc.target/i386/pr98309-2.c: New test. OK. Thanks, Uros. > --- > gcc/config/i386/i386.md | 34 +++- > gcc/testsuite/gcc.target/i386/pr98309-1.c | 18 +++ > gcc/testsuite/gcc.target/i386/pr98309-2.c | 39 +++ > 3 files changed, 83 insertions(+), 8 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/i386/pr98309-1.c > create mode 100644 gcc/testsuite/gcc.target/i386/pr98309-2.c > > diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md > index bc1c30b77f4..56b09c566ed 100644 > --- a/gcc/config/i386/i386.md > +++ b/gcc/config/i386/i386.md > @@ -17914,17 +17914,35 @@ (define_expand "ldexp3" >[(use (match_operand:MODEF 0 "register_operand")) > (use (match_operand:MODEF 1 "general_operand")) > (use (match_operand:SI 2 "register_operand"))] > - "TARGET_USE_FANCY_MATH_387 > - && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) > - || TARGET_MIX_SSE_I387) > + "((TARGET_USE_FANCY_MATH_387 > + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) > +|| TARGET_MIX_SSE_I387)) > +|| (TARGET_AVX512F && TARGET_SSE_MATH)) > && flag_unsafe_math_optimizations" > { > - rtx op0 = gen_reg_rtx (XFmode); > - rtx op1 = gen_reg_rtx (XFmode); > + /* Prefer avx512f version. */ > + if (TARGET_AVX512F && TARGET_SSE_MATH) > + { > + rtx op2 = gen_reg_rtx (mode); > + emit_insn (gen_floatsi2 (op2, operands[2])); > + operands[0] = lowpart_subreg (mode, operands[0], > mode); > + if (MEM_P (operands[1])) > + operands[1] = force_reg (mode, operands[1]); > + operands[1] = lowpart_subreg (mode, operands[1], > mode); > + op2 = lowpart_subreg (mode, op2, mode); > + emit_insn (gen_avx512f_vmscalef (operands[0], > + operands[1], > + op2)); > + } > + else > +{ > + rtx op0 = gen_reg_rtx (XFmode); > + rtx op1 = gen_reg_rtx (XFmode); > > - emit_insn (gen_extendxf2 (op1, operands[1])); > - emit_insn (gen_ldexpxf3 (op0, op1, operands[2])); > - emit_insn (gen_truncxf2 (operands[0], op0)); > + emit_insn (gen_extendxf2 (op1, operands[1])); > + emit_insn (gen_ldexpxf3 (op0, op1, operands[2])); > + emit_insn (gen_truncxf2 (operands[0], op0)); > + } >DONE; > }) > > diff --git a/gcc/testsuite/gcc.target/i386/pr98309-1.c > b/gcc/testsuite/gcc.target/i386/pr98309-1.c > new file mode 100644 > index 000..3a7afb58971 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pr98309-1.c > @@ -0,0 +1,18 @@ > +/* { dg-do compile } */ > +/* { dg-options "-mavx512f -O2 -mfpmath=sse -ffast-math" } */ > +/* { dg-final { scan-assembler-times "vcvtsi2s\[sd\]" "2" } } */ > +/* { dg-final { scan-assembler-times "vscalefs\[sd\]" "2" } } */ > + > +double > +__attribute__((noipa)) > +foo (double a, int b) > +{ > + return __builtin_ldexp (a, b); > +} > + > +float > +__attribute__((noipa)) > +foo2 (float a, int b) > +{ > + return __builtin_ldexpf (a, b); > +} > diff --git a/gcc/testsuite/gcc.target/i386/pr98309-2.c > b/gcc/testsuite/gcc.target/i386/pr98309-2.c > new file mode 100644 > index 000..ecfb9168b7d > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pr98309-2.c > @@ -0,0 +1,39 @@ > +/* { dg-do run } */ > +/* { dg-options "-mavx512f -O2 -mfpmath=sse -ffast-math" } */ > +/* { dg-require-effective-target avx512f } */ > + > +#define AVX512F > +#ifndef CHECK > +#define CHECK "avx512f-helper.h" > +#endif > + > +#include CHECK > + > +#include "pr98309-1.c" > + > +double > +__attribute__((noipa, target("fpmath=387"))) > +foo_i387 (double a, int b) > +{ > + return __builtin_ldexp (a, b); > +} > + > +float > +__attribute__((noipa, target("fpmath=387"))) > +foo2_i387 (float a, int b) > +{ > + return __builtin_ldexpf (a, b); > +} > + > +static void > +test_512 (void) > +{ > + float fa = 14.5; > + double da = 44.5; > + int fb = 12; > + int db = 8; > + if (foo_i387 (da, db) != foo (da, db)) > +abort (); > + if (foo2_i387 (fa, fb) != foo2 (fa, fb)) > +abort (); > +} > -- > 2.27.0 >