[PATCH, rs6000] Support float from/to long conversion vectorization
Hi, This patch is to add the support for float from/to long conversion vectorization. ISA 2.06 supports the vector version instructions for conversion between float and long long (both signed and unsigned), but vectorizer can't exploit them since the optab check fails. So this patch is mainly to add related optab supports. I've verified the functionality on both LE and BE. Bootstrapped and regress tested on powerpc64le-linux-gnu. Thanks, Kewen --- gcc/ChangeLog 2019-09-27 Kewen Lin * config/rs6000/vsx.md (vec_pack[su]_float_v2di): New define_expand. (vec_unpack_[su]fix_trunc_hi_v4sf): Likewise. (vec_unpack_[su]fix_trunc_lo_v4sf): Likewise. gcc/testsuite/ChangeLog 2019-09-27 Kewen Lin * gcc.target/powerpc/conv-vectorize-1.c: New test. * gcc.target/powerpc/conv-vectorize-2.c: New test. diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md index 7633171..ef32971 100644 --- a/gcc/config/rs6000/vsx.md +++ b/gcc/config/rs6000/vsx.md @@ -5538,3 +5538,42 @@ operands[SFBOOL_TMP_VSX_DI] = gen_rtx_REG (DImode, regno_tmp_vsx); operands[SFBOOL_MTVSR_D_V4SF] = gen_rtx_REG (V4SFmode, regno_mtvsr_d); }) + +;; Support signed/unsigned long long to float conversion vectorization. +(define_expand "vec_pack_float_v2di" + [(match_operand:V4SF 0 "vfloat_operand") + (any_float:V4SF (parallel [(match_operand:V2DI 1 "vint_operand") + (match_operand:V2DI 2 "vint_operand")]))] + "TARGET_VSX" +{ + rtx r1 = gen_reg_rtx (V4SFmode); + rtx r2 = gen_reg_rtx (V4SFmode); + emit_insn (gen_vsx_xvcvxdsp (r1, operands[1])); + emit_insn (gen_vsx_xvcvxdsp (r2, operands[2])); + rs6000_expand_extract_even (operands[0], r1, r2); + DONE; +}) + +;; Support float to signed/unsigned long long conversion vectorization. +(define_expand "vec_unpack_fix_trunc_hi_v4sf" + [(match_operand:V2DI 0 "vint_operand") + (any_fix:V2DI (match_operand:V4SF 1 "vfloat_operand"))] + "TARGET_VSX" +{ + rtx reg = gen_reg_rtx (V4SFmode); + rs6000_expand_interleave (reg, operands[1], operands[1], BYTES_BIG_ENDIAN); + emit_insn (gen_vsx_xvcvspxds (operands[0], reg)); + DONE; +}) + +(define_expand "vec_unpack_fix_trunc_lo_v4sf" + [(match_operand:V2DI 0 "vint_operand") + (any_fix:V2DI (match_operand:V4SF 1 "vfloat_operand"))] + "TARGET_VSX" +{ + rtx reg = gen_reg_rtx (V4SFmode); + rs6000_expand_interleave (reg, operands[1], operands[1], !BYTES_BIG_ENDIAN); + emit_insn (gen_vsx_xvcvspxds (operands[0], reg)); + DONE; +}) + diff --git a/gcc/testsuite/gcc.target/powerpc/conv-vectorize-1.c b/gcc/testsuite/gcc.target/powerpc/conv-vectorize-1.c new file mode 100644 index 000..d96db14 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/conv-vectorize-1.c @@ -0,0 +1,37 @@ +/* { dg-require-effective-target powerpc_vsx_ok } */ +/* { dg-options "-O2 -ftree-vectorize -mvsx" } */ + +/* Test vectorizer can exploit vector conversion instructions to convert + unsigned/signed long long to float. */ + +#include + +#define SIZE 32 +#define ALIGN 16 + +float sflt_array[SIZE] __attribute__ ((__aligned__ (ALIGN))); +float uflt_array[SIZE] __attribute__ ((__aligned__ (ALIGN))); + +unsigned long long ulong_array[SIZE] __attribute__ ((__aligned__ (ALIGN))); +signed long long slong_array[SIZE] __attribute__ ((__aligned__ (ALIGN))); + +void +convert_slong_to_float (void) +{ + size_t i; + + for (i = 0; i < SIZE; i++) +sflt_array[i] = (float) slong_array[i]; +} + +void +convert_ulong_to_float (void) +{ + size_t i; + + for (i = 0; i < SIZE; i++) +uflt_array[i] = (float) ulong_array[i]; +} + +/* { dg-final { scan-assembler {\mxvcvsxdsp\M} } } */ +/* { dg-final { scan-assembler {\mxvcvuxdsp\M} } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/conv-vectorize-2.c b/gcc/testsuite/gcc.target/powerpc/conv-vectorize-2.c new file mode 100644 index 000..5dd5dea --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/conv-vectorize-2.c @@ -0,0 +1,37 @@ +/* { dg-require-effective-target powerpc_vsx_ok } */ +/* { dg-options "-O2 -ftree-vectorize -mvsx" } */ + +/* Test vectorizer can exploit vector conversion instructions to convert + float to unsigned/signed long long. */ + +#include + +#define SIZE 32 +#define ALIGN 16 + +float sflt_array[SIZE] __attribute__ ((__aligned__ (ALIGN))); +float uflt_array[SIZE] __attribute__ ((__aligned__ (ALIGN))); + +unsigned long long ulong_array[SIZE] __attribute__ ((__aligned__ (ALIGN))); +signed long long slong_array[SIZE] __attribute__ ((__aligned__ (ALIGN))); + +void +convert_float_to_slong (void) +{ + size_t i; + + for (i = 0; i < SIZE; i++) +slong_array[i] = (signed long long) sflt_array[i]; +} + +void +convert_float_to_ulong (void) +{ + size_t i; + + for (i = 0; i < SIZE; i++) +ulong_array[i] = (unsigned long long) uflt_array[i]; +} + +/* { dg-final { scan-assembler {\mxvcvspsxds\M} } } */ +/* { dg-final { scan-assembler {\mxvcvspuxds\M} } } */
C++ PATCH for c++/91889 - follow-up fix for DR 2352
It turned out that DR 2352 needs some fixing: as of now, it means that in void f(int*); // #1 void f(const int* const &); // #2 void g(int* p) { f(p); } the call to f is ambiguous. This broke e.g. Boost. I raised this on the CWG reflector and Jason suggested wording that would break this up; this patch is an attempt to implement that suggestion. Jason, you remarked that adding a ck_qual under the ck_ref_bind might be too much trouble, but it seems it's actually fairly simple. The comments hopefully explain my thinking. Is this what you had in mind? ref-bind6.C is something reduced from libstdc++ I came across when testing this patch. Bootstrapped/regtested on x86_64-linux, ok for trunk? 2019-09-26 Marek Polacek PR c++/91889 - follow-up fix for DR 2352. * call.c (involves_qualification_conversion_p): New function. (direct_reference_binding): Build a ck_qual if the conversion would involve a qualification conversion. (convert_like_real): Strip the conversion created by the ck_qual in direct_reference_binding. * g++.dg/cpp0x/ref-bind3.C: Add dg-error. * g++.dg/cpp0x/ref-bind4.C: New test. * g++.dg/cpp0x/ref-bind5.C: New test. * g++.dg/cpp0x/ref-bind6.C: New test. * g++.old-deja/g++.pt/spec35.C: Revert earlier change. diff --git gcc/cp/call.c gcc/cp/call.c index 45b984ecb11..09976369aed 100644 --- gcc/cp/call.c +++ gcc/cp/call.c @@ -1555,6 +1555,27 @@ reference_compatible_p (tree t1, tree t2) return true; } +/* Return true if converting FROM to TO would involve a qualification + conversion. */ + +static bool +involves_qualification_conversion_p (tree to, tree from) +{ + /* If we're not convering a pointer to another one, we won't get + a qualification conversion. */ + if (!((TYPE_PTR_P (to) && TYPE_PTR_P (from)) + || (TYPE_PTRDATAMEM_P (to) && TYPE_PTRDATAMEM_P (from +return false; + + conversion *conv = standard_conversion (to, from, NULL_TREE, + /*c_cast_p=*/false, 0, tf_none); + for (conversion *t = conv; t; t = next_conversion (t)) +if (t->kind == ck_qual) + return true; + + return false; +} + /* A reference of the indicated TYPE is being bound directly to the expression represented by the implicit conversion sequence CONV. Return a conversion sequence for this binding. */ @@ -1598,6 +1619,19 @@ direct_reference_binding (tree type, conversion *conv) That way, convert_like knows not to generate a temporary. */ conv->need_temporary_p = false; } + else if (involves_qualification_conversion_p (t, conv->type)) +/* Represent the qualification conversion. After DR 2352 + #1 and #2 were indistinguishable conversion sequences: + +void f(int*); // #1 +void f(const int* const &); // #2 +void g(int* p) { f(p); } + + because the types "int *" and "const int *const" are + reference-related and we were binding both directly and they + had the same rank. To break it up, we add a ck_qual under the + ck_ref_bind so that conversion sequence ranking chooses #1. */ +conv = build_conv (ck_qual, t, conv); return build_conv (ck_ref_bind, type, conv); } @@ -7342,6 +7376,16 @@ convert_like_real (conversion *convs, tree expr, tree fn, int argnum, { tree ref_type = totype; + /* direct_reference_binding might have inserted a ck_qual under + this ck_ref_bind for the benefit of conversion sequence ranking. + Ignore the conversion; we'll create our own below. */ + if (next_conversion (convs)->kind == ck_qual) + { + gcc_assert (same_type_p (TREE_TYPE (expr), +next_conversion (convs)->type)); + STRIP_NOPS (expr); + } + if (convs->bad_p && !next_conversion (convs)->bad_p) { tree extype = TREE_TYPE (expr); diff --git gcc/testsuite/g++.dg/cpp0x/ref-bind3.C gcc/testsuite/g++.dg/cpp0x/ref-bind3.C index 16e1bfe6ccc..b2c85ec684a 100644 --- gcc/testsuite/g++.dg/cpp0x/ref-bind3.C +++ gcc/testsuite/g++.dg/cpp0x/ref-bind3.C @@ -1,22 +1,18 @@ // PR c++/91844 - Implement CWG 2352, Similar types and reference binding. // { dg-do compile { target c++11 } } -template int f (const T *const &); // (1) -template int f (T *const &); // (2) -template int f (T *); // (3) - -/* Before CWG 2352, (2) was a better match than (1), but (2) and (3) were - equally good, so there was an ambiguity. (2) was better than (1) because - (1) required a qualification conversion whereas (2) didn't. But with this - CWG, (1) no longer requires a qualification conversion, because the types - "const int* const" and "int *" are now considered reference-related and we - bind directly, and (1) is more specialized than (2). And (1) is also a - better match than (3). */ +template int f (const T *const &); // 1 +template int f (T
C++ PATCH for c++/91921 - stray warning with -Woverloaded-virtual
Here we were emitting a stray "by" message if base_fndecl was in a system header but fns was not. As usually, we need to guard such a use. Further, use inform instead of warning_at. Bootstrapped/regtested on x86_64-linux, ok for trunk? 2019-09-26 Marek Polacek PR c++/91921 - stray warning with -Woverloaded-virtual. * class.c (warn_hidden): Only emit the second part of -Woverloaded-virtual if the first part was issued. Use inform instead warning_at. * g++.dg/warn/Woverloaded-2.C: New. * g++.dg/warn/Woverloaded-2.h: New. * g++.dg/warn/pr61945.C: Turn dg-warning into dg-message. * g++.old-deja/g++.mike/warn6.C: Likewise. * g++.old-deja/g++.warn/virt1.C: Likewise. diff --git gcc/cp/class.c gcc/cp/class.c index 59a3d1a0496..0fd5e8e188d 100644 --- gcc/cp/class.c +++ gcc/cp/class.c @@ -2915,11 +2915,10 @@ warn_hidden (tree t) if (base_fndecl) { /* Here we know it is a hider, and no overrider exists. */ - warning_at (location_of (base_fndecl), - OPT_Woverloaded_virtual, - "%qD was hidden", base_fndecl); - warning_at (location_of (fns), - OPT_Woverloaded_virtual, " by %qD", fns); + if (warning_at (location_of (base_fndecl), + OPT_Woverloaded_virtual, + "%qD was hidden", base_fndecl)) + inform (location_of (fns), " by %qD", fns); } } } diff --git gcc/testsuite/g++.dg/warn/Woverloaded-2.C gcc/testsuite/g++.dg/warn/Woverloaded-2.C new file mode 100644 index 000..84d65de05ce --- /dev/null +++ gcc/testsuite/g++.dg/warn/Woverloaded-2.C @@ -0,0 +1,9 @@ +// PR c++/91921 - stray warning with -Woverloaded-virtual. +// { dg-options "-Woverloaded-virtual" } + +#include "Woverloaded-2.h" + +struct B : A +{ + void f(int); +}; diff --git gcc/testsuite/g++.dg/warn/Woverloaded-2.h gcc/testsuite/g++.dg/warn/Woverloaded-2.h new file mode 100644 index 000..b9e15b0c331 --- /dev/null +++ gcc/testsuite/g++.dg/warn/Woverloaded-2.h @@ -0,0 +1,6 @@ +#pragma GCC system_header + +struct A +{ + virtual void f(); +}; diff --git gcc/testsuite/g++.dg/warn/pr61945.C gcc/testsuite/g++.dg/warn/pr61945.C index 5584d841692..3d40581e5e3 100644 --- gcc/testsuite/g++.dg/warn/pr61945.C +++ gcc/testsuite/g++.dg/warn/pr61945.C @@ -7,5 +7,5 @@ class A { }; class B : A { template - void foo (); // { dg-warning "by .B::foo\\(\\)." } + void foo (); // { dg-message "by .B::foo\\(\\)." } }; diff --git gcc/testsuite/g++.old-deja/g++.mike/warn6.C gcc/testsuite/g++.old-deja/g++.mike/warn6.C index 9c694d62559..26759cfb527 100644 --- gcc/testsuite/g++.old-deja/g++.mike/warn6.C +++ gcc/testsuite/g++.old-deja/g++.mike/warn6.C @@ -30,13 +30,13 @@ struct D : public B, public B2, public B3 { virtual void bothsame(int); - virtual void bothdiff(int); // { dg-warning "" } + virtual void bothdiff(int); // { dg-message "" } virtual void both2same(int); virtual void both2same(float); - virtual void both12diff(int);// { dg-warning "" } + virtual void both12diff(int);// { dg-message "" } - virtual void bothfardiff(int); // { dg-warning "" } + virtual void bothfardiff(int); // { dg-message "" } }; diff --git gcc/testsuite/g++.old-deja/g++.warn/virt1.C gcc/testsuite/g++.old-deja/g++.warn/virt1.C index 4550dd5e054..c68de8a7e7c 100644 --- gcc/testsuite/g++.old-deja/g++.warn/virt1.C +++ gcc/testsuite/g++.old-deja/g++.warn/virt1.C @@ -6,5 +6,5 @@ struct A { }; struct B: public A { - void f(int); // { dg-warning "" } by this + void f(int); // { dg-message "" } by this };
[FYI] [Ada] set DECL_SIZE_UNIT for zero-sized fields
Zero-sized fields do not get processed by finish_record_type: they're removed from the field list before and reinserted after, so their DECL_SIZE_UNIT remains unset, causing the translation of assignment statements with use_memset_p, in quite unusual circumstances, to use a NULL_TREE as the memset length. This patch sets DECL_SIZE_UNIT for the zero-sized fields, that don't go through language-independent layout, in language-specific layout. Regstrapped on x86_64-linux-gnu. I'm checking this in. for gcc/ada/ChangeLog * gcc-interface/decl.c (components_to_record): Set DECL_SIZE_UNIT for zero-sized fields. --- gcc/ada/gcc-interface/decl.c |1 + 1 file changed, 1 insertion(+) diff --git a/gcc/ada/gcc-interface/decl.c b/gcc/ada/gcc-interface/decl.c index 67b938ee7eef0..77c6c9f12e76c 100644 --- a/gcc/ada/gcc-interface/decl.c +++ b/gcc/ada/gcc-interface/decl.c @@ -7928,6 +7928,7 @@ components_to_record (Node_Id gnat_component_list, Entity_Id gnat_record_type, if (DECL_SIZE (gnu_field) && integer_zerop (DECL_SIZE (gnu_field))) { + DECL_SIZE_UNIT (gnu_field) = size_zero_node; DECL_FIELD_OFFSET (gnu_field) = size_zero_node; SET_DECL_OFFSET_ALIGN (gnu_field, BIGGEST_ALIGNMENT); DECL_FIELD_BIT_OFFSET (gnu_field) = bitsize_zero_node; -- Alexandre Oliva, freedom fighter he/him https://FSFLA.org/blogs/lxo Be the change, be Free!FSF VP & FSF Latin America board member GNU Toolchain EngineerFree Software Evangelist Hay que enGNUrecerse, pero sin perder la terGNUra jamás - Che GNUevara
Re: [PATCH 2/2] libada: Respect `--enable-version-specific-runtime-libs'
On Thu, 26 Sep 2019, Arnaud Charlet wrote: > > Notice that ADA_RTL_OBJ_DIR never changes with/without the use of this > > configuration option (as expected). > > > > Does it answer your question? > > Yes, that's now very clear, thanks! Sorry not to make the original change description clear enough. > The change is OK for me, thanks. Thanks for your review. Shall I amend the change description anyhow then? I know it has not (as yet, as discussed at the GNU Tools Cauldron recently) been enforced for the GCC project (unlike with e.g. glibc), however I mean to use it whole as the commit message, which is what I have been doing for quite a while now, because I recognise the value of change descriptions for future repository examination. Maciej
Re: [PATCH 1/2] libada: Remove racy duplicate gnatlib installation
On Thu, 26 Sep 2019, Arnaud Charlet wrote: > Unfortunately the Make-lang.in part is still needed when using the > --disable-libada configure switch so a more elaborate change is needed. Hmm, I've experimented a bit and AFAICT if `--disable-libada' is given without my proposed change applied, then the gnatlib stuff doesn't get built in the first place let alone installed anyway. Actually the `install-gnatlib' target invocation in `ada.install-common' I have proposed to get removed does go ahead in that case, and then fails, again non-fatally (thanks to the `-' recipe line prefix in action here too): make[4]: Entering directory '.../gcc/ada' You must first build the GNAT library: make gnatlib make[4]: *** [gcc-interface/Makefile:497: ../stamp-gnatlib-rts] Error 1 make[4]: Leaving directory '.../gcc/ada' make[3]: *** [.../gcc/ada/gcc-interface/Make-lang.in:852: install-gnatlib] Error 2 make[2]: [.../gcc/ada/gcc-interface/Make-lang.in:828: ada.install-common] Error 2 (ignored) so all my change effectively does in that case is to remove these messages. I went as far as to compare the installed trees with `--disable-libada' specified and then without and with my patch applied respectively and they were exactly the same. NB no gnattools stuff is built either when `--disable-libada' is in effect, due to this clause in Makefile.def: dependencies = { module=all-gnattools; on=all-target-libada; }; leaving the resulting installation with a bare Ada compiler driver and backend only, e.g. with my native `x86_64-linux-gnu' setup: $(prefix)/bin/gnatbind $(prefix)/libexec/gcc/x86_64-linux-gnu/10.0.0/gnat1 (which is then of course incapable of building a `riscv-linux-gnu' cross-compiler, so that's it as far as Ada support goes). Therefore I maintain my change is correct and safe to apply. OK to go ahead with it then? Or have I missed anything and you meant something else (I find "still needed" a bit vague)? Maciej
Re: [PATCH v2, rs6000] Replace X-form addressing with D-form addressing in new pass for Power 9
Hi Kelvin, Sorry for the slow review. On Tue, Sep 03, 2019 at 03:10:09PM -0500, Kelvin Nilsen wrote: > This new pass scans existing rtl expressions and replaces them with rtl > expressions that favor selection of the D-form instructions in contexts for > which the D-form instructions are preferred. The new pass runs after the RTL > loop optimizations since loop unrolling often introduces opportunities for > beneficial replacements of X-form addressing instructions. And that is before the "big" RTL passes (cprop, CSE, combine, and related). Okay. > --- gcc/config/rs6000/rs6000-p9dform.c(nonexistent) > +++ gcc/config/rs6000/rs6000-p9dform.c(working copy) > @@ -0,0 +1,1487 @@ > +/* Subroutines used to transform array subscripting expressions into > + forms that are more amenable to d-form instruction selection for p9 > + little-endian VSX code. > + Copyright (C) 1991-2018 Free Software Foundation, Inc. (Year needs updating here). > + This pass runs immediately after pass_loops2. Loops have already (typo, pass_loop2) > +/* This is based on the union-find logic in web.c. web_entry_base is > + defined in df.h. */ > +class indexing_web_entry : public web_entry_base > +{ > + public: Most places have no extra leading space here. Not sure what the coding standards say? > +static int count_links (struct df_link *def_link) > +{ > + int result; > + for (result = 0; def_link != NULL; result++) > +def_link = def_link->next; > + return result; > +} Maybe this belongs in more generic code? Or as inline in df.h even? > + > + int result = 0; > + for (i = 0; i < count; i++) > +{ > + result = (result << 6) | ((result & 0xf00) >> 28); Do you want an extra zero in the constant there? The expression is 0 as written. Shifting a signed (possibly negative!) number to the right isn't a great idea in any case. Maybe you want to use some standard hash, instead? > + /* if there is no def, or the def is artificial, > + or there are multiple defs, this is an originating > + use. */ (Sentence should start with a capital). > + unsigned int uid2 = > + insn_entry[uid].original_base_use; The = goes on the second line, instead. > + rtx mem = NULL; > + if (GET_CODE (body) == SET) > +{ > + if (MEM_P (SET_SRC (body))) > + mem = XEXP (SET_SRC (body), 0); > + else if (MEM_P (SET_DEST (body))) > + mem = XEXP (SET_DEST (body), 0); > +} > + /* Otherwise, this is neither load or store, so leave mem as NULL. */ > + > + if (mem != NULL) > +{ You could instead say if (!mem) return; and then you don't need the comment or the "if" after it. Or even: if (GET_CODE (body) != SET) return; rtx mem; if (MEM_P (SET_SRC (body))) mem = XEXP (SET_SRC (body), 0); else if (MEM_P (SET_DEST (body))) mem = XEXP (SET_DEST (body), 0); else return; > +{ > + rtx def_insn = DF_REF_INSN (def_link->ref); > + /* unsigned uid = INSN_UID (def_insn); not needed? */ Delete this line? Or is it useful still :-) > + if (GET_CODE (body) == CONST_INT) > + return true; if (CONST_INT_P (body)) ... > +static bool > +in_use_list (struct df_link *list, struct df_link *element) > +{ > + while (list != NULL) > +{ > + if (element->ref == list->ref) > + return true; > + list = list->next; > +} > + /* Got to end of list without finding element. */ > + return false; > +} This kind of function makes me scared of quadratic complexity somewhere. I didn't check, but it seems likely :-/ > + if ((insn_entry[uid_1].base_equivalence_hash != > + insn_entry[uid_2].base_equivalence_hash) || > + (insn_entry[uid_1].index_equivalence_hash != > + insn_entry[uid_2].index_equivalence_hash)) > +return false; > + else /* Hash codes match. Check details. */ After a return you don't need an else. Reduce indentation, instead :-) > +/* Return true iff instruction I2 dominates instruction I1. */ > +static bool > +insn_dominated_by_p (rtx_insn *i1, rtx_insn *i2) > +{ > + basic_block bb1 = BLOCK_FOR_INSN (i1); > + basic_block bb2 = BLOCK_FOR_INSN (i2); > + > + if (bb1 == bb2) > +{ > + for (rtx_insn *i = i2; > +(i != NULL) && (BLOCK_FOR_INSN (i) == bb1); i = NEXT_INSN (i)) How can it ever be NULL? > + if (i == i1) > + return true; > + return false; > +} > + else > +return dominated_by_p (CDI_DOMINATORS, bb1, bb2); > +} IRA already has an insn_dominated_by_p function, which is constant time (it does require some extra init for uid_luid[]). > + int hash = ((insn_entry [uid].base_equivalence_hash + No + at the end of line, either. The check_GNU_style script should check for this (not sure if it actually does, YMMV). > + /* Since this pass creates new instructions, get_max_uid () may > + return different values at different times during this pass. The > +
Re: Problem exposed by recent ARM multiply changes
On Fri, Sep 27, 2019 at 12:30:50AM +0200, Jakub Jelinek wrote: > On Thu, Sep 26, 2019 at 05:17:40PM -0500, Segher Boessenkool wrote: > > On Wed, Sep 25, 2019 at 10:06:13PM -0600, Jeff Law wrote: > > > (insn 14 13 16 2 (parallel [ > > > (set (reg:SI 132) > > > (plus:SI (mult:SI (zero_extend:DI (reg/v:SI 115 [ sec ])) > > > (zero_extend:DI (reg:SI 124))) > > > (reg:SI 130))) > > > (set (reg:SI 133 [+4 ]) > > > (plus:SI (truncate:SI (lshiftrt:DI (plus:DI (mult:DI > > > (zero_extend:DI (reg/v:SI 115 [ sec ])) > > > (zero_extend:DI (reg:SI 124))) > > > (zero_extend:DI (reg:SI 130))) > > > (const_int 32 [0x20]))) > > > (reg:SI 131 [+4 ]))) > > > ]) "j.c":10:54 60 {umlal} > > > (expr_list:REG_DEAD (reg:SI 131 [+4 ]) > > > (expr_list:REG_DEAD (reg:SI 130) > > > (expr_list:REG_DEAD (reg:SI 124) > > > (expr_list:REG_DEAD (reg/v:SI 115 [ sec ]) > > > (nil)) > > > > This is not a correct pattern for umlal (it should have a carry from the > > low half of the addition to the high half). > > Are you sure? > "The UMLAL instruction interprets the values from Rn and Rm as unsigned > integers. It multiplies these integers, and adds the 64-bit result to the > 64-bit unsigned integer contained in RdHi and RdLo." Yes. It adds two 64-bit numbers. That is not the same as adding the top and bottom 32-bit halfs of that separately. > Appart from the bug I've provided untested fix in the PR, the above pattern > seems to do what the documentation says, the low 32 bits are > (unsigned int) (Rn*Rm+RdLo), and the whole result for unsigned int > Rn, Rm, RdHi and RdLo is > (unsigned long long)Rn*Rm+((unsigned long long)RdHi<<32)+RdLo > and the upper 32 bits of that are IMHO equivalent to > (unsigned) (((unsigned long long)Rn*Rm+RdLo)>>32)+RdHi The upper half of the result should be one bigger if the addition of the low half carries. Segher
Re: Problem exposed by recent ARM multiply changes
On Thu, Sep 26, 2019 at 05:17:40PM -0500, Segher Boessenkool wrote: > On Wed, Sep 25, 2019 at 10:06:13PM -0600, Jeff Law wrote: > > (insn 14 13 16 2 (parallel [ > > (set (reg:SI 132) > > (plus:SI (mult:SI (zero_extend:DI (reg/v:SI 115 [ sec ])) > > (zero_extend:DI (reg:SI 124))) > > (reg:SI 130))) > > (set (reg:SI 133 [+4 ]) > > (plus:SI (truncate:SI (lshiftrt:DI (plus:DI (mult:DI > > (zero_extend:DI (reg/v:SI 115 [ sec ])) > > (zero_extend:DI (reg:SI 124))) > > (zero_extend:DI (reg:SI 130))) > > (const_int 32 [0x20]))) > > (reg:SI 131 [+4 ]))) > > ]) "j.c":10:54 60 {umlal} > > (expr_list:REG_DEAD (reg:SI 131 [+4 ]) > > (expr_list:REG_DEAD (reg:SI 130) > > (expr_list:REG_DEAD (reg:SI 124) > > (expr_list:REG_DEAD (reg/v:SI 115 [ sec ]) > > (nil)) > > This is not a correct pattern for umlal (it should have a carry from the > low half of the addition to the high half). Are you sure? "The UMLAL instruction interprets the values from Rn and Rm as unsigned integers. It multiplies these integers, and adds the 64-bit result to the 64-bit unsigned integer contained in RdHi and RdLo." Appart from the bug I've provided untested fix in the PR, the above pattern seems to do what the documentation says, the low 32 bits are (unsigned int) (Rn*Rm+RdLo), and the whole result for unsigned int Rn, Rm, RdHi and RdLo is (unsigned long long)Rn*Rm+((unsigned long long)RdHi<<32)+RdLo and the upper 32 bits of that are IMHO equivalent to (unsigned) (((unsigned long long)Rn*Rm+RdLo)>>32)+RdHi Jakub
Avoid libbacktrace warnings with old compilers
This patch to libbacktrace avoids a compilation warning when building libbacktrace with an old compiler. This fixes PR 91908. Bootstrapped and ran libbacktrace tests. Committed to mainline. Ian 2019-09-26 Ian Lance Taylor PR libbacktrace/91908 * pecoff.c (backtrace_initialize): Explicitly cast unchecked __sync_bool_compare_and_swap to void. * xcoff.c (backtrace_initialize): Likewise. Index: pecoff.c === --- pecoff.c(revision 275698) +++ pecoff.c(working copy) @@ -922,7 +922,8 @@ backtrace_initialize (struct backtrace_s if (found_sym) backtrace_atomic_store_pointer (&state->syminfo_fn, coff_syminfo); else - __sync_bool_compare_and_swap (&state->syminfo_fn, NULL, coff_nosyms); + (void) __sync_bool_compare_and_swap (&state->syminfo_fn, NULL, +coff_nosyms); } if (!state->threaded) Index: xcoff.c === --- xcoff.c (revision 275698) +++ xcoff.c (working copy) @@ -1592,7 +1592,8 @@ backtrace_initialize (struct backtrace_s if (found_sym) backtrace_atomic_store_pointer (&state->syminfo_fn, xcoff_syminfo); else - __sync_bool_compare_and_swap (&state->syminfo_fn, NULL, xcoff_nosyms); + (void) __sync_bool_compare_and_swap (&state->syminfo_fn, NULL, +xcoff_nosyms); } if (!state->threaded)
Re: Problem exposed by recent ARM multiply changes
On Wed, Sep 25, 2019 at 10:06:13PM -0600, Jeff Law wrote: > (insn 14 13 16 2 (parallel [ > (set (reg:SI 132) > (plus:SI (mult:SI (zero_extend:DI (reg/v:SI 115 [ sec ])) > (zero_extend:DI (reg:SI 124))) > (reg:SI 130))) > (set (reg:SI 133 [+4 ]) > (plus:SI (truncate:SI (lshiftrt:DI (plus:DI (mult:DI > (zero_extend:DI (reg/v:SI 115 [ sec ])) > (zero_extend:DI (reg:SI 124))) > (zero_extend:DI (reg:SI 130))) > (const_int 32 [0x20]))) > (reg:SI 131 [+4 ]))) > ]) "j.c":10:54 60 {umlal} > (expr_list:REG_DEAD (reg:SI 131 [+4 ]) > (expr_list:REG_DEAD (reg:SI 130) > (expr_list:REG_DEAD (reg:SI 124) > (expr_list:REG_DEAD (reg/v:SI 115 [ sec ]) > (nil)) This is not a correct pattern for umlal (it should have a carry from the low half of the addition to the high half). Segher
Re: Problem exposed by recent ARM multiply changes
On Thu, Sep 26, 2019 at 03:27:50PM -0600, Jeff Law wrote: > > Some machines support a multiplication that generates a product wider > > than the operands. Write the pattern for this as > > > > @smallexample > > (mult:@var{m} (sign_extend:@var{m} @var{x}) (sign_extend:@var{m} @var{y})) > > @end smallexample > > > > where @var{m} is wider than the modes of @var{x} and @var{y}, which need > > not be the same. > ^^^ this clause. I read this as x & y being able to vary relative to > each other. But I think you're reading it as x & y are the same, but > they can vary relative to m This talks about the mult with sign_extend operands, and in that case sure, x can have different mode from y, the only restriction is that the modes are scalar integral (otherwise sign_extend isn't valid) and that they are narrower than m. But, the mult still has both operands as well as result with the same mode m, x and y aren't operands of mult in that case, but of the sign_extends. Jakub
Re: Problem exposed by recent ARM multiply changes
On 9/26/19 10:05 AM, Jakub Jelinek wrote: > On Thu, Sep 26, 2019 at 10:01:56AM -0600, Jeff Law wrote: >> On 9/26/19 9:47 AM, Jakub Jelinek wrote: >>> On Thu, Sep 26, 2019 at 09:39:31AM -0600, Jeff Law wrote: Right. My point is that the multiplication patterns are an exception as well. >>> >>> Do you have some evidence for that? >> It's in the manual. And yes it potentially makes a huge mess due to the >> interactions with modeless CONST_INTs. > > Where? > Unless otherwise specified, all the operands of arithmetic expressions > must be valid for mode @var{m}. An operand is valid for mode @var{m} > if it has mode @var{m}, or if it is a @code{const_int} or > @code{const_double} and @var{m} is a mode of class @code{MODE_INT}. > > and for MULT: > Represents the signed product of the values represented by @var{x} and > @var{y} carried out in machine mode @var{m}. > @code{ss_mult} and @code{us_mult} ensure that an out-of-bounds result > saturates to the maximum or minimum signed or unsigned value. > > Some machines support a multiplication that generates a product wider > than the operands. Write the pattern for this as > > @smallexample > (mult:@var{m} (sign_extend:@var{m} @var{x}) (sign_extend:@var{m} @var{y})) > @end smallexample > > where @var{m} is wider than the modes of @var{x} and @var{y}, which need > not be the same. ^^^ this clause. I read this as x & y being able to vary relative to each other. But I think you're reading it as x & y are the same, but they can vary relative to m Jeff
Re: [PATCH] xtensa: fix PR target/91880
On Thu, Sep 26, 2019 at 1:42 PM augustine.sterl...@gmail.com wrote: > > On Tue, Sep 24, 2019 at 5:41 PM Max Filippov wrote: > > > > Xtensa hwloop_optimize segfaults when zero overhead loop is about to be > > inserted as the first instruction of the function. > > Insert zero overhead loop instruction into new basic block before the > > loop when basic block that precedes the loop is empty. > > > > 2019-09-24 Max Filippov > > gcc/ > > * config/xtensa/xtensa.c (hwloop_optimize): Insert zero overhead > > loop instruction into new basic block before the loop when basic > > block that precedes the loop is empty. > > > > gcc/testsuite/ > > * gcc.target/xtensa/pr91880.c: New test case. > > * gcc.target/xtensa/xtensa.exp: New test suite. > > --- > > gcc/config/xtensa/xtensa.c | 5 ++-- > > gcc/testsuite/gcc.target/xtensa/pr91880.c | 10 > > gcc/testsuite/gcc.target/xtensa/xtensa.exp | 41 > > ++ > > 3 files changed, 54 insertions(+), 2 deletions(-) > > create mode 100644 gcc/testsuite/gcc.target/xtensa/pr91880.c > > create mode 100644 gcc/testsuite/gcc.target/xtensa/xtensa.exp > > Approved. Thanks. Thanks. Applied to trunk. I'll backport it later to gcc-7..9 branches. -- Max
Re: [PATCH] Multibyte awareness for diagnostics (PR 49973)
On Thu, Sep 26, 2019 at 4:47 PM Joseph Myers wrote: > > On Thu, 26 Sep 2019, Lewis Hyatt wrote: > > > A couple notes: > > - In order to avoid any portability problems with wchar_t, the > > equivalent of wcwidth() from libc is implemented in-house. > > I'm uneasy about contrib/gen_wcwidth.cpp doing the generation using host > libc's wcwidth. The effect is that libcpp/generated_cpp_wcwidth.h is > *not* reproducible unless you know exactly what host (libc version, locale > used when running the program, distribution patches to libc and locale > data) was used to run the program. I think we need a generator that works > from Unicode data in some way so we can explicitly say what version of the > (unmodified) Unicode data was used. > Got it, no problem, will find something that accomplishes this. -Lewis
Re: [PATCH] Multibyte awareness for diagnostics (PR 49973)
On Thu, 26 Sep 2019, Lewis Hyatt wrote: > A couple notes: > - In order to avoid any portability problems with wchar_t, the > equivalent of wcwidth() from libc is implemented in-house. I'm uneasy about contrib/gen_wcwidth.cpp doing the generation using host libc's wcwidth. The effect is that libcpp/generated_cpp_wcwidth.h is *not* reproducible unless you know exactly what host (libc version, locale used when running the program, distribution patches to libc and locale data) was used to run the program. I think we need a generator that works from Unicode data in some way so we can explicitly say what version of the (unmodified) Unicode data was used. -- Joseph S. Myers jos...@codesourcery.com
Re: [PATCH] xtensa: fix PR target/91880
On Tue, Sep 24, 2019 at 5:41 PM Max Filippov wrote: > > Xtensa hwloop_optimize segfaults when zero overhead loop is about to be > inserted as the first instruction of the function. > Insert zero overhead loop instruction into new basic block before the > loop when basic block that precedes the loop is empty. > > 2019-09-24 Max Filippov > gcc/ > * config/xtensa/xtensa.c (hwloop_optimize): Insert zero overhead > loop instruction into new basic block before the loop when basic > block that precedes the loop is empty. > > gcc/testsuite/ > * gcc.target/xtensa/pr91880.c: New test case. > * gcc.target/xtensa/xtensa.exp: New test suite. > --- > gcc/config/xtensa/xtensa.c | 5 ++-- > gcc/testsuite/gcc.target/xtensa/pr91880.c | 10 > gcc/testsuite/gcc.target/xtensa/xtensa.exp | 41 > ++ > 3 files changed, 54 insertions(+), 2 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/xtensa/pr91880.c > create mode 100644 gcc/testsuite/gcc.target/xtensa/xtensa.exp Approved. Thanks.
[PATCH] Multibyte awareness for diagnostics (PR 49973)
Hello- PR preprocessor/49973 addresses the fact that the column number in diagnostics is not correct if the source lines contain multibyte characters. The attached patch corrects this. The outlines of it were discussed starting here: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=49973#c13. If it looks OK, could you please apply it? The patch is a bit large unfortunately, because the column number was just the tip of the iceberg. There is a lot of machinery in diagnostic-show-locus.c that also requires multibyte awareness, which I have done in this patch as well. A couple notes: - In order to avoid any portability problems with wchar_t, the equivalent of wcwidth() from libc is implemented in-house. - The diagnostic-show-locus stuff needs to know both the bytes column and the display column. All location infrastructure (e.g. line-map.h) tracks byte offsets, so I left that all alone. Rather, diagnostic-show-locus.c just computes display columns on demand when it needs them. - In the above-linked PR, Joseph and I also discussed a couple related fixes that need to be made, namely, when -finput-charset is in use, the translation needs to be applied when reading source lines for the purpose of diagnostics printing, which currently it is not, and also, when outputting diagnostics, the current locale should be inspected to avoid outputting bytes that the user's terminal cannot display. I have separate patches for these two as well, but since they are more or less orthogonal to this one, I will submit them later. Thank you for your time taking a look at this. It will be nice to clean these up as part of the support for extended characters in identifiers. I have a related issue for extended characters also in PR 91843 (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=91843#c1). I can also submit that couple-line patch here if it looks OK. -Lewis /ChangeLog 2019-09-26 Lewis Hyatt PR preprocessor/49973 * contrib/gen_wcwidth.cpp: New standalone utility to generate libcpp/generated_cpp_wcwidth.h. libcpp/ChangeLog 2019-09-26 Lewis Hyatt PR preprocessor/49973 * generated_cpp_wcwidth.h: New file generated by contrib/gen_wcwidth.cpp, supports new cpp_wcwidth function. * charset.c (compute_next_display_width): New function to help implement display columns. (cpp_byte_column_to_display_column): Likewise. (cpp_display_column_to_byte_column): Likewise. (cpp_wcwidth): Likewise. * include/cpplib.h (cpp_byte_column_to_display_column): Declare. (cpp_display_column_to_byte_column): Declare. (cpp_wcwidth): Declare. (cpp_display_width): New function. gcc/ChangeLog 2019-09-26 Lewis Hyatt PR preprocessor/49973 * input.c (location_compute_display_column): New function to help with multibyte awareness in diagnostics. (test_cpp_utf8): New self-test. (input_c_tests): Call the new test. * input.h (location_compute_display_column): Declare. * diagnostic.c (diagnostic_get_location_text): Use it to output the correct column number on diagnostics for source lines containing multibyte characters. * diagnostic-show-locus.c: Pervasive changes to add multibyte awareness to all classes and functions. (class exploc_with_display_col): New class. (class layout_point): Add m_display_col member. (layout_point::get_col): New function. (layout_range::contains_point): Add use_display argument. (test_layout_range_for_single_point): Pass new argument. (test_layout_range_for_single_line): Likewise. (test_layout_range_for_multiple_lines): Likewise. (line_bounds::convert_to_display_cols): New function. (layout::get_state_at_point): Add use_display argument. (class layout): m_exploc changed to exploc_with_display_col from expanded_location. (layout::layout): Add multibyte awareness. (layout::print_source_line): Likewise. (layout::print_annotation_line): Likewise. (line_label::line_label): Likewise. (layout::print_any_labels): Likewise. (layout::annotation_line_showed_range_p): Likewise. (get_printed_columns): Likewise. (get_affected_columns): Rename to... (get_affected_range): ...this; add use_display argument and multibyte awareness. (class correction): Add m_affected_bytes and m_display_cols members. Rename m_len to m_bytes for clarity. Add multibyte awareness. (correction::insertion_p): Add multibyte awareness. (correction::compute_display_cols): New function. (correction::ensure_terminated): m_len renamed to m_bytes. (line_corrections::add_hint): Add multibyte awareness. (layout::print_trailing_fixits): Likewise. (layout::get_x_bound_for_row): Likewise. (test_one_liner_simple_caret_utf8): New
Re: [Patch, Fortran] CO_BROADCAST for derived types with allocatable components
Hi Paul, that message was a copy/paste leftover. It doesn't make any sense in that part of the code, it's now removed. Committed as revision 276164. Thanks! Il giorno dom 15 set 2019 alle ore 12:19 Paul Richard Thomas ha scritto: > > Hi Sandro, > > This patch looks fine to me. I have a question about the comment: > "This code is obviously added because the finalizer is not trusted to > free all memory." > 'obviously'? Not to me :-( Maybe you can expand on this? > > As to the stat and errmsg: Leave them for the time being. However, an > attempt will have to be made to implement F2018 "16.6 Collective > subroutines". I don't know enough about the coarrays implementation to > be able to help you about detecting these conditions. Maybe Tobias > Burnus can help? > > OK to commit. > > Paul > > PS Sometime before very long, something will have to be done about the > exponential code bloat that structure_alloc_comps. The more users that > there are for it the tougher it is going to get! > > On Thu, 22 Aug 2019 at 18:41, Alessandro Fanfarillo > wrote: > > > > Dear all, > > please find in attachment a preliminary patch that adds support to > > co_broadcast for allocatable components of derived types. > > The patch is currently ignoring the stat and errmsg arguments, mostly > > because I am not sure how to handle them properly. I have created a > > new data structure called used to pass those argument to the > > preexisting structure_alloc_comps. > > Suggestions on how to handle them are more than welcome :-) > > > > The patch builds correctly on x86_64 and it has been tested with > > OpenCoarrays and the following test cases: > > > > https://github.com/sourceryinstitute/OpenCoarrays/blob/co_broadcast-derived-type/src/tests/unit/collectives/co_broadcast_allocatable_components.f90 > > > > https://github.com/sourceryinstitute/OpenCoarrays/blob/co_broadcast-derived-type/src/tests/unit/collectives/co_broadcast_allocatable_components_array.f90 > > > > Regards, > > > > -- > "If you can't explain it simply, you don't understand it well enough" > - Albert Einstein
[C++ Patch] Improve grokdeclarator error
Hi, over the last weeks, while working on various batches of location improvements (a new one is forthcoming, in case you are wondering ;) I noticed this grokdeclarator diagnostic, one of those not exercised by our testsuite. While constructing a testcase I realized that probably it's better to immediately return error_mark_node, and avoid an additional redundant error about variable or field declared void or even about "fancy" variable templates, depending on the return type of the function template. Tested x86_64-linux. Thanks, Paolo. // /cp 2019-09-26 Paolo Carlini * decl.c (grokdeclarator): Immediately return error_mark_node upon error about template-id used as a declarator. /testsuite 2019-09-26 Paolo Carlini * g++.dg/diagnostic/template-id-as-declarator-1.C: New. Index: testsuite/g++.dg/diagnostic/template-id-as-declarator-1.C === --- testsuite/g++.dg/diagnostic/template-id-as-declarator-1.C (nonexistent) +++ testsuite/g++.dg/diagnostic/template-id-as-declarator-1.C (working copy) @@ -0,0 +1,5 @@ +template +void foo() {} + +template void foo( // { dg-error "15:template-id .foo. used as a declarator" } +// { dg-error "expected" "" { target *-*-* } .-1 } Index: cp/decl.c === --- cp/decl.c (revision 276151) +++ cp/decl.c (working copy) @@ -12078,9 +12078,9 @@ grokdeclarator (const cp_declarator *declarator, && !FUNC_OR_METHOD_TYPE_P (type) && !variable_template_p (TREE_OPERAND (unqualified_id, 0))) { - error ("template-id %qD used as a declarator", -unqualified_id); - unqualified_id = dname; + error_at (id_loc, "template-id %qD used as a declarator", + unqualified_id); + return error_mark_node; } /* If TYPE is a FUNCTION_TYPE, but the function name was explicitly
Re: [00/32] Support multiple ABIs in the same translation unit
On Wed, 11 Sep 2019, 22:02:26 EEST Richard Sandiford wrote: > The reason for the PRU differences is that the port defines > targetm.hard_regno_call_part_clobbered, but uses it to test whether > a multi-register value contains a mixture of fully-clobbered and > fully-preserved registers. AFAICT the port doesn't actually have > individual registers that are partly clobbered, so it doesn't need > to define the hook. (I can see how the documentation gave a misleading > impression though. I've tried to improve it in one of the patches.) > The series moves away from testing hard_regno_call_part_clobbered > directly to testing cached information instead, and the way that the > cached information is calculated means that defining the hook the way > the PRU port does has no effect. In other words, after the series we > treat it (rightly IMO) as having a "normal" ABI whereas before we didn't. You are correct. Port does not have partially clobbered HW registers. And indeed I was worried about multi-register values. PRU testsuite showed no regression from trunk with your patch set. With your patch set, I tried to compare PRU assembly with and without defining the targetm.hard_regno_call_part_clobbered hook. There was much noise in compare-all-tests due to lto compiler ID strings, but after some filtering I think the output assembly was the same. Thanks, Dimitar
A bug with -fprofile-dir? Profile directory concatenated with object file path
Hi, we noticed that the profile directory will be concatenated with object file path. the following small example explain this behavior: [qinzhao@localhost small]$ cat t #! /bin/bash CC=/home/qinzhao/Install/latest/bin/gcc opt="-O3 -fprofile-generate" opt="$opt -fprofile-dir=/home/qinzhao/prof_dir" opt="$opt -c -o /home/qinzhao/obj_dir/t.o" tf="t.c" echo $CC $opt $tf $CC $opt $tf strings /home/qinzhao/obj_dir/t.o | grep prof_dir [qinzhao@localhost small]$ sh t /home/qinzhao/Install/latest/bin/gcc -O3 -fprofile-generate -fprofile-dir=/home/qinzhao/prof_dir -c -o /home/qinzhao/obj_dir/t.o t.c /home/qinzhao/prof_dir//home/qinzhao/obj_dir/t.gcda [qinzhao@localhost small]$ From the above, we can see: when specifying the profiling directory with -fprofile-dir as “/home/qinzhao/prof_dir”, the user expects that the profiling data will be stored in this specific directory. However, GCC concatenates the profiling directory “/home/qinzhao/prof_dir” with the path for the object file “/home/qinzhao/obj_dir” together. As a result, the profiling data will be stored to: /home/qinzhao/prof_dir/home/qinzhao/obj_dir/ instead of /home/qinzhao/prof_dir This looks like a bug to me. any comment for this? thank you! Qing
[Darwin, PPC, Mode Iterators 2/n] Eliminate picbase expanders.
Hi Segher, thanks for the pointers to how to simplify this! > On 25 Sep 2019, at 00:23, Segher Boessenkool > wrote: > On Tue, Sep 24, 2019 at 08:31:16PM +0100, Iain Sandoe wrote: >> This switches the picbase load and reload patterns to use the 'P' mode >> iterator instead of writing an SI and DI pattern for each (and deletes the >> old patterns). No functional change intended. > >> (define_expand "load_macho_picbase" >> - [(set (reg:SI LR_REGNO) >> + [(set (reg LR_REGNO) > > This changes it to VOIDmode instead? It should have been reg:P LR_REGNO? > >> (define_expand "reload_macho_picbase" >> - [(set (reg:SI LR_REGNO) >> + [(set (reg LR_REGNO) > > Same here. As we discussed this is symptomatic of the fact that the expanders only exist in this to pass the mode through. We can eliminate them completely by using an “@pattern” in the actual patterns (and updating callers to pass the mode as the first argument). tested on powerpc-darwin9, powerpc64-linux-gnu (m32, m64) applied to mainline, thanks Iain gcc/ChangeLog: 2019-09-26 Iain Sandoe * config/rs6000/darwin.md: Replace the expanders for load_macho_picbase and reload_macho_picbase with use of '@' in their respective define_insns. (nonlocal_goto_receiver): Pass Pmode to gen_reload_macho_picbase. * config/rs6000/rs6000-logue.c (rs6000_emit_prologue): Pass Pmode to gen_load_macho_picbase. * config/rs6000/rs6000.md: Likewise. diff --git a/gcc/config/rs6000/darwin.md b/gcc/config/rs6000/darwin.md index 4a284211af..a5c5a3af39 100644 --- a/gcc/config/rs6000/darwin.md +++ b/gcc/config/rs6000/darwin.md @@ -216,21 +216,7 @@ You should have received a copy of the GNU General Public License (match_dup 2))] "") -(define_expand "load_macho_picbase" - [(set (reg LR_REGNO) -(unspec [(match_operand 0 "")] - UNSPEC_LD_MPIC))] - "(DEFAULT_ABI == ABI_DARWIN) && flag_pic" -{ - if (TARGET_32BIT) -emit_insn (gen_load_macho_picbase_si (operands[0])); - else -emit_insn (gen_load_macho_picbase_di (operands[0])); - - DONE; -}) - -(define_insn "load_macho_picbase_" +(define_insn "@load_macho_picbase_" [(set (reg:P LR_REGNO) (unspec:P [(match_operand:P 0 "immediate_operand" "s") (pc)] UNSPEC_LD_MPIC))] @@ -284,21 +270,7 @@ You should have received a copy of the GNU General Public License "addis %0,%1,ha16(%2-%3)\n\taddi %0,%0,lo16(%2-%3)" [(set_attr "length" "8")]) -(define_expand "reload_macho_picbase" - [(set (reg LR_REGNO) -(unspec [(match_operand 0 "")] - UNSPEC_RELD_MPIC))] - "(DEFAULT_ABI == ABI_DARWIN) && flag_pic" -{ - if (TARGET_32BIT) -emit_insn (gen_reload_macho_picbase_si (operands[0])); - else -emit_insn (gen_reload_macho_picbase_di (operands[0])); - - DONE; -}) - -(define_insn "reload_macho_picbase_" +(define_insn "@reload_macho_picbase_" [(set (reg:P LR_REGNO) (unspec:P [(match_operand:P 0 "immediate_operand" "s") (pc)] UNSPEC_RELD_MPIC))] @@ -342,7 +314,7 @@ You should have received a copy of the GNU General Public License ASM_GENERATE_INTERNAL_LABEL(tmplab, "Lnlgr", ++n); tmplrtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (tmplab)); - emit_insn (gen_reload_macho_picbase (tmplrtx)); + emit_insn (gen_reload_macho_picbase (Pmode, tmplrtx)); emit_move_insn (picreg, gen_rtx_REG (Pmode, LR_REGNO)); emit_insn (gen_macho_correct_pic (picreg, picreg, picrtx, tmplrtx)); } diff --git a/gcc/config/rs6000/rs6000-logue.c b/gcc/config/rs6000/rs6000-logue.c index 633a253e43..e98893a440 100644 --- a/gcc/config/rs6000/rs6000-logue.c +++ b/gcc/config/rs6000/rs6000-logue.c @@ -3809,7 +3809,7 @@ rs6000_emit_prologue (void) if (!info->lr_save_p) emit_move_insn (gen_rtx_REG (Pmode, 0), lr); - emit_insn (gen_load_macho_picbase (src)); + emit_insn (gen_load_macho_picbase (Pmode, src)); emit_move_insn (gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM), diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index 4dbf85bbc9..c5443bab9e 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -10053,7 +10053,7 @@ CODE_LABEL_NUMBER (operands[0])); tmplabrtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (tmplab)); - emit_insn (gen_load_macho_picbase (tmplabrtx)); + emit_insn (gen_load_macho_picbase (Pmode, tmplabrtx)); emit_move_insn (picreg, gen_rtx_REG (Pmode, LR_REGNO)); emit_insn (gen_macho_correct_pic (picreg, picreg, picrtx, tmplabrtx)); }
Re: Problem exposed by recent ARM multiply changes
> On Sep 26, 2019, at 12:01 PM, Jeff Law wrote: > > On 9/26/19 9:47 AM, Jakub Jelinek wrote: >> On Thu, Sep 26, 2019 at 09:39:31AM -0600, Jeff Law wrote: >>> Right. My point is that the multiplication patterns are an exception as >>> well. >> >> Do you have some evidence for that? > It's in the manual. And yes it potentially makes a huge mess due to the > interactions with modeless CONST_INTs. Do you mean things like "mulhisi3"? That's a pattern for a multiplication whose output is longer than its inputs. But the RTL it builds has a regular mult insn with equal length operands (all SImode in this example), with sign_extend insns in the mult source operands. So I don't believe that contradicts what Jakub said. paul
Re: [PATCH, rs6000] Update powerpc vector load builtins with PURE attribute
On Thu, Sep 26, 2019 at 12:06:03PM -0500, Bill Schmidt wrote: > On 9/26/19 12:00 PM, Segher Boessenkool wrote: > >On Thu, Sep 26, 2019 at 10:40:29AM -0500, will schmidt wrote: > >>Update our (rs6000) vector load built-ins with the PURE attribute. These > >>were previously given the MEM attribute, which meant that redundant loads > >>surrounding the built-in calls could not be eliminated in earlier passes > >>since they were defined as having the potential to touch memory. > >>2019-09-26 Will Schmidt > >>* config/rs6000/rs6000-builtin.def: ( LVSL LVSR LVEBX LVEHX > > ^--- stray space > > > >Please put commas between the items, too? > > > >The patch is okay for trunk. Thank you! > > > I wonder whether we should also consider a backport to 9, when we > started expanding these earlier. Thoughts? Good idea, yeah. After waiting for a week or so to see if problems turn up on trunk, as usual. Segher
Re: [PATCH, rs6000] Update powerpc vector load builtins with PURE attribute
On 9/26/19 12:00 PM, Segher Boessenkool wrote: Hi Will, On Thu, Sep 26, 2019 at 10:40:29AM -0500, will schmidt wrote: Update our (rs6000) vector load built-ins with the PURE attribute. These were previously given the MEM attribute, which meant that redundant loads surrounding the built-in calls could not be eliminated in earlier passes since they were defined as having the potential to touch memory. 2019-09-26 Will Schmidt * config/rs6000/rs6000-builtin.def: ( LVSL LVSR LVEBX LVEHX ^--- stray space Please put commas between the items, too? The patch is okay for trunk. Thank you! I wonder whether we should also consider a backport to 9, when we started expanding these earlier. Thoughts? Bill Segher
Re: [PATCH, rs6000] Update powerpc vector load builtins with PURE attribute
Hi Will, On Thu, Sep 26, 2019 at 10:40:29AM -0500, will schmidt wrote: > Update our (rs6000) vector load built-ins with the PURE attribute. These > were previously given the MEM attribute, which meant that redundant loads > surrounding the built-in calls could not be eliminated in earlier passes > since they were defined as having the potential to touch memory. > 2019-09-26 Will Schmidt > * config/rs6000/rs6000-builtin.def: ( LVSL LVSR LVEBX LVEHX ^--- stray space Please put commas between the items, too? The patch is okay for trunk. Thank you! Segher
[PATCH] Split out nested cycle loop-closed PHI vectorization
Bootstrapped / tested on x86_64-unknown-linux-gnu, applied. Richard. 2019-09-26 Richard Biener * tree-vect-loop.c (vect_analyze_loop_operations): Analyze loop-closed PHIs that are vect_internal_def. (vect_create_epilog_for_reduction): Exit early for nested cycles. Simplify. (vectorizable_lc_phi): New. * tree-vect-stmts.c (vect_analyze_stmt): Call vectorize_lc_phi. (vect_transform_stmt): Likewise. * tree-vectorizer.h (stmt_vec_info_type): Add lc_phi_info_type. (vectorizable_lc_phi): Declare. Index: gcc/tree-vect-loop.c === --- gcc/tree-vect-loop.c(revision 276156) +++ gcc/tree-vect-loop.c(working copy) @@ -1519,12 +1519,16 @@ vect_analyze_loop_operations (loop_vec_i phi_op = PHI_ARG_DEF (phi, 0); stmt_vec_info op_def_info = loop_vinfo->lookup_def (phi_op); if (!op_def_info) - return opt_result::failure_at (phi, "unsupported phi"); + return opt_result::failure_at (phi, "unsupported phi\n"); if (STMT_VINFO_RELEVANT (op_def_info) != vect_used_in_outer && (STMT_VINFO_RELEVANT (op_def_info) != vect_used_in_outer_by_reduction)) - return opt_result::failure_at (phi, "unsupported phi"); + return opt_result::failure_at (phi, "unsupported phi\n"); + + if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_internal_def + && !vectorizable_lc_phi (stmt_info, NULL, NULL)) + return opt_result::failure_at (phi, "unsupported phi\n"); } continue; @@ -4396,6 +4400,10 @@ vect_create_epilog_for_reduction (vec # b1 = phi @@ -5313,7 +5311,6 @@ vect_create_epilog_for_reduction (vecinner; @@ -5473,7 +5470,7 @@ vect_create_epilog_for_reduction (vec (stmt_info->stmt) + || gimple_phi_num_args (stmt_info->stmt) != 1) +return false; + + /* To handle the nested_cycle_def for double-reductions we have to + refactor epilogue generation more. */ + if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def + /* && STMT_VINFO_DEF_TYPE (stmt_info) != vect_double_reduction_def */) +return false; + + if (!vec_stmt) /* transformation not required. */ +{ + STMT_VINFO_TYPE (stmt_info) = lc_phi_info_type; + return true; +} + + tree vectype = STMT_VINFO_VECTYPE (stmt_info); + tree scalar_dest = gimple_phi_result (stmt_info->stmt); + basic_block bb = gimple_bb (stmt_info->stmt); + edge e = single_pred_edge (bb); + tree vec_dest = vect_create_destination_var (scalar_dest, vectype); + vec vec_oprnds = vNULL; + vect_get_vec_defs (gimple_phi_arg_def (stmt_info->stmt, 0), NULL_TREE, +stmt_info, &vec_oprnds, NULL, slp_node); + if (slp_node) +{ + unsigned vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); + gcc_assert (vec_oprnds.length () == vec_num); + for (unsigned i = 0; i < vec_num; i++) + { + /* Create the vectorized LC PHI node. */ + gphi *new_phi = create_phi_node (vec_dest, bb); + add_phi_arg (new_phi, vec_oprnds[i], e, UNKNOWN_LOCATION); + stmt_vec_info new_phi_info = loop_vinfo->add_stmt (new_phi); + SLP_TREE_VEC_STMTS (slp_node).quick_push (new_phi_info); + } +} + else +{ + unsigned ncopies = vect_get_num_copies (loop_vinfo, vectype); + stmt_vec_info prev_phi_info = NULL; + for (unsigned i = 0; i < ncopies; i++) + { + if (i != 0) + vect_get_vec_defs_for_stmt_copy (loop_vinfo, &vec_oprnds, NULL); + /* Create the vectorized LC PHI node. */ + gphi *new_phi = create_phi_node (vec_dest, bb); + add_phi_arg (new_phi, vec_oprnds[0], e, UNKNOWN_LOCATION); + stmt_vec_info new_phi_info = loop_vinfo->add_stmt (new_phi); + if (i == 0) + STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_phi_info; + else + STMT_VINFO_RELATED_STMT (prev_phi_info) = new_phi_info; + prev_phi_info = new_phi_info; + } +} + vec_oprnds.release (); + + return true; +} + + /* Function vect_min_worthwhile_factor. For a loop where we could vectorize the operation indicated by CODE, @@ -8399,7 +8466,8 @@ vect_transform_loop (loop_vec_info loop_ if ((STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def || STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def - || STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle) + || STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle + || STMT_VINFO_DEF_TYPE (stmt_info) == vect_internal_def) && ! PURE_SLP_STMT (stmt_info)) { if (dump_enabled_p ()) Index: gcc
Re: [PATCH] PR libstdc++/91910 fix data race in Debug Mode destructors
On 26/09/19 14:20 +0100, Jonathan Wakely wrote: Fix data race when _Safe_iterator_base::_M_detach() runs concurrently with the _Safe_container_base destructor. PR libstdc++/91910 * src/c++11/debug.cc (_Safe_iterator_base::_M_detach()): Load pointer atomically and lock the mutex before accessing the sequence. (_Safe_iterator_base::_M_reset()): Clear _M_sequence atomically. Tested x86_64-linux. I plan to commit this to trunk unless somebody sees a problem with it. We need the same change in the _Safe_local_iterator_base::_M_detach() function, as in this new patch. commit 3ec426b2f46e65eb6e4ce7ec05863a6f9c05dffc Author: Jonathan Wakely Date: Thu Sep 26 13:08:48 2019 +0100 PR libstdc++/91910 fix data race in Debug Mode destructors Fix data race when _Safe_iterator_base::_M_detach() runs concurrently with the _Safe_container_base destructor. PR libstdc++/91910 * src/c++11/debug.cc (_Safe_iterator_base::_M_detach()): Load pointer atomically and lock the mutex before accessing the sequence. (_Safe_local_iterator_base::_M_detach()): Likewise. (_Safe_iterator_base::_M_reset()): Clear _M_sequence atomically. diff --git a/libstdc++-v3/src/c++11/debug.cc b/libstdc++-v3/src/c++11/debug.cc index f5a49992efa..efd1a9e0254 100644 --- a/libstdc++-v3/src/c++11/debug.cc +++ b/libstdc++-v3/src/c++11/debug.cc @@ -381,10 +381,17 @@ namespace __gnu_debug _Safe_iterator_base:: _M_detach() { -if (_M_sequence) +// This function can run concurrently with the sequence destructor, +// so there is a TOCTTOU race here: the sequence could be destroyed +// after we check that _M_sequence is not null. Use the pointer value +// to acquire the mutex (rather than via _M_sequence->_M_get_mutex()). +// If the sequence destructor runs between loading the pointer and +// locking the mutex, it will detach this iterator and set _M_sequence +// to null, and then _M_detach_single() will do nothing. +if (auto seq = __atomic_load_n(&_M_sequence, __ATOMIC_ACQUIRE)) { - _M_sequence->_M_detach(this); - _M_reset(); + __gnu_cxx::__scoped_lock sentry(get_safe_base_mutex(seq)); + _M_detach_single(); } } @@ -403,7 +410,7 @@ namespace __gnu_debug _Safe_iterator_base:: _M_reset() throw () { -_M_sequence = 0; +__atomic_store_n(&_M_sequence, (_Safe_sequence_base*)0, __ATOMIC_RELEASE); _M_version = 0; _M_prior = 0; _M_next = 0; @@ -466,10 +473,10 @@ namespace __gnu_debug _Safe_local_iterator_base:: _M_detach() { -if (_M_sequence) +if (auto seq = __atomic_load_n(&_M_sequence, __ATOMIC_ACQUIRE)) { - _M_get_container()->_M_detach_local(this); - _M_reset(); + __gnu_cxx::__scoped_lock sentry(get_safe_base_mutex(seq)); + _M_detach_single(); } }
[PATCH] Define std::to_array for Debug Mode
* include/debug/array (to_array): Define for debug mode. Tested x86_64-linux, committed to trunk. commit 6404cc56a2e300ffcf5f680f0728e695539a137a Author: Jonathan Wakely Date: Thu Sep 26 15:50:30 2019 +0100 Define std::to_array for Debug Mode * include/debug/array (to_array): Define for debug mode. diff --git a/libstdc++-v3/include/debug/array b/libstdc++-v3/include/debug/array index 9e94e656c04..2f8eb842eb8 100644 --- a/libstdc++-v3/include/debug/array +++ b/libstdc++-v3/include/debug/array @@ -314,6 +314,45 @@ namespace __debug static_assert(_Int < _Nm, "index is out of bounds"); return std::move(__debug::get<_Int>(__arr)); } + +#if __cplusplus > 201703L +#define __cpp_lib_to_array 201907L + + template +constexpr array, sizeof...(_Idx)> +__to_array(_Tp (&__a)[sizeof...(_Idx)], index_sequence<_Idx...>) +{ + if constexpr (_Move) + return {{std::move(__a[_Idx])...}}; + else + return {{__a[_Idx]...}}; +} + + template +constexpr array, _Nm> +to_array(_Tp (&__a)[_Nm]) +noexcept(is_nothrow_constructible_v<_Tp, _Tp&>) +{ + static_assert(!is_array_v<_Tp>); + static_assert(is_constructible_v<_Tp, _Tp&>); + if constexpr (is_constructible_v<_Tp, _Tp&>) + return __debug::__to_array(__a, make_index_sequence<_Nm>{}); + __builtin_unreachable(); // FIXME: see PR c++/91388 +} + + template +constexpr array, _Nm> +to_array(_Tp (&&__a)[_Nm]) +noexcept(is_nothrow_move_constructible_v<_Tp>) +{ + static_assert(!is_array_v<_Tp>); + static_assert(is_move_constructible_v<_Tp>); + if constexpr (is_move_constructible_v<_Tp>) + return __debug::__to_array<1>(__a, make_index_sequence<_Nm>{}); + __builtin_unreachable(); // FIXME: see PR c++/91388 +} +#endif // C++20 + } // namespace __debug _GLIBCXX_BEGIN_NAMESPACE_VERSION
[PATCH] Implement C++20 constexpr changes to std::pair (P1032R1)
* include/bits/stl_pair.h (pair): Add _GLIBCXX20_CONSTEXPR to piecewise construction constructor, assignment operators, and swap. * include/std/tuple (pair::pair(piecewise_construct_t, tuple, tuple)): Add _GLIBCXX20_CONSTEXPR. (pair::pair(tuple, tuple, _Index_tuple, _Index_tuple)): Likewise. * testsuite/20_util/pair/constexpr_assign.cc: New test. * testsuite/20_util/pair/constexpr_swap.cc: New test. There are still more pieces of P1032R1 that need to be implemented, including (at least) the std::tuple bits. Tested x86_64-linux, committed to trunk. commit 30fc6a7e208ba88bd874145fc9a1570862964a5f Author: Jonathan Wakely Date: Thu Sep 26 15:42:10 2019 +0100 Implement C++20 constexpr changes to std::pair (P1032R1) * include/bits/stl_pair.h (pair): Add _GLIBCXX20_CONSTEXPR to piecewise construction constructor, assignment operators, and swap. * include/std/tuple (pair::pair(piecewise_construct_t, tuple, tuple)): Add _GLIBCXX20_CONSTEXPR. (pair::pair(tuple, tuple, _Index_tuple, _Index_tuple)): Likewise. * testsuite/20_util/pair/constexpr_assign.cc: New test. * testsuite/20_util/pair/constexpr_swap.cc: New test. diff --git a/libstdc++-v3/include/bits/stl_pair.h b/libstdc++-v3/include/bits/stl_pair.h index c04f169bb6c..f7ad1696545 100644 --- a/libstdc++-v3/include/bits/stl_pair.h +++ b/libstdc++-v3/include/bits/stl_pair.h @@ -380,9 +380,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION second(std::forward<_U2>(__p.second)) { } template + _GLIBCXX20_CONSTEXPR pair(piecewise_construct_t, tuple<_Args1...>, tuple<_Args2...>); - pair& + _GLIBCXX20_CONSTEXPR pair& operator=(typename conditional< __and_, is_copy_assignable<_T2>>::value, @@ -393,7 +394,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION return *this; } - pair& + _GLIBCXX20_CONSTEXPR pair& operator=(typename conditional< __and_, is_move_assignable<_T2>>::value, @@ -407,9 +408,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION } template - typename enable_if<__and_, - is_assignable<_T2&, const _U2&>>::value, -pair&>::type + _GLIBCXX20_CONSTEXPR + typename enable_if<__and_, + is_assignable<_T2&, const _U2&>>::value, + pair&>::type operator=(const pair<_U1, _U2>& __p) { first = __p.first; @@ -418,9 +420,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION } template - typename enable_if<__and_, - is_assignable<_T2&, _U2&&>>::value, -pair&>::type + _GLIBCXX20_CONSTEXPR + typename enable_if<__and_, + is_assignable<_T2&, _U2&&>>::value, + pair&>::type operator=(pair<_U1, _U2>&& __p) { first = std::forward<_U1>(__p.first); @@ -429,7 +432,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION } /// Swap the first members and then the second members. - void + _GLIBCXX20_CONSTEXPR void swap(pair& __p) noexcept(__and_<__is_nothrow_swappable<_T1>, __is_nothrow_swappable<_T2>>::value) @@ -442,6 +445,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION private: template + _GLIBCXX20_CONSTEXPR pair(tuple<_Args1...>&, tuple<_Args2...>&, _Index_tuple<_Indexes1...>, _Index_tuple<_Indexes2...>); #endif @@ -503,7 +507,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION * which has performance implications, e.g. see https://gcc.gnu.org/PR38466 */ template -inline +_GLIBCXX20_CONSTEXPR inline #if __cplusplus > 201402L || !defined(__STRICT_ANSI__) // c++1z or gnu++11 // Constrained free swap overload, see p0185r1 typename enable_if<__and_<__is_swappable<_T1>, diff --git a/libstdc++-v3/include/std/tuple b/libstdc++-v3/include/std/tuple index dd966b3a0bc..ae1a3d0d18b 100644 --- a/libstdc++-v3/include/std/tuple +++ b/libstdc++-v3/include/std/tuple @@ -1570,7 +1570,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION */ template template - inline + _GLIBCXX20_CONSTEXPR inline pair<_T1, _T2>:: pair(piecewise_construct_t, tuple<_Args1...> __first, tuple<_Args2...> __second) @@ -1582,7 +1582,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION template template - inline + _GLIBCXX20_CONSTEXPR inline pair<_T1, _T2>:: pair(tuple<_Args1...>& __tuple1, tuple<_Args2...>& __tuple2, _Index_tuple<_Indexes1...>, _Index_tuple<_Indexes2...>) diff --git a/libstdc++-v3/testsuite/20_util/pair/constexpr_assign.cc b/libstdc++-v3/testsuite/20_util/pair/constexpr_assign.cc new file mode 100644 index 000..7abf2370658 --- /dev/null +++ b/libstdc++-v
[PATCH] Fix array index error in address_v6 comparisons
* include/experimental/internet (operator==, operator<): Fix loop condition to avoid reading past the end of the array. Tested x86_64-linux, committed to trunk. commit f5886e6a2c744196c089b16853d73d64e248a735 Author: Jonathan Wakely Date: Thu Sep 26 14:57:56 2019 +0100 Fix array index error in address_v6 comparisons * include/experimental/internet (operator==, operator<): Fix loop condition to avoid reading past the end of the array. diff --git a/libstdc++-v3/include/experimental/internet b/libstdc++-v3/include/experimental/internet index 44d757c3a97..929a747a250 100644 --- a/libstdc++-v3/include/experimental/internet +++ b/libstdc++-v3/include/experimental/internet @@ -539,7 +539,7 @@ namespace ip const auto& __aa = __a._M_bytes; const auto& __bb = __b._M_bytes; int __i = 0; -for (; __aa[__i] == __bb[__i] && __i < 16; ++__i) +for (; __i < 16 && __aa[__i] == __bb[__i]; ++__i) ; return __i == 16 ? __a.scope_id() == __b.scope_id() : false; } @@ -554,7 +554,7 @@ namespace ip const auto& __aa = __a._M_bytes; const auto& __bb = __b._M_bytes; int __i = 0; -for (; __aa[__i] == __bb[__i] && __i < 16; ++__i) +for (; __i < 16 && __aa[__i] == __bb[__i]; ++__i) ; return __i == 16 ? __a.scope_id() < __b.scope_id() : __aa[__i] < __bb[__i]; }
[PATCH] Remove include directives for deleted Profile Mode headers
* include/std/array: Remove references to profile mode. * include/std/bitset: Likewise. * include/std/deque: Likewise. * include/std/forward_list: Likewise. * include/std/list: Likewise. * include/std/map: Likewise. * include/std/set: Likewise. * include/std/unordered_map: Likewise. * include/std/unordered_set: Likewise. * include/std/vector: Likewise. * testsuite/17_intro/headers/c++1998/profile_mode.cc: New test. * testsuite/17_intro/headers/c++2011/profile_mode.cc: New test. Tested x86_64-linux, committed to trunk. commit ebfcc6644881b96fff60db313b37b5fb4843fbc1 Author: Jonathan Wakely Date: Thu Sep 26 14:31:51 2019 +0100 Remove include directives for deleted Profile Mode headers * include/std/array: Remove references to profile mode. * include/std/bitset: Likewise. * include/std/deque: Likewise. * include/std/forward_list: Likewise. * include/std/list: Likewise. * include/std/map: Likewise. * include/std/set: Likewise. * include/std/unordered_map: Likewise. * include/std/unordered_set: Likewise. * include/std/vector: Likewise. * testsuite/17_intro/headers/c++1998/profile_mode.cc: New test. * testsuite/17_intro/headers/c++2011/profile_mode.cc: New test. diff --git a/libstdc++-v3/include/std/array b/libstdc++-v3/include/std/array index a380f523d44..b1dc8887787 100644 --- a/libstdc++-v3/include/std/array +++ b/libstdc++-v3/include/std/array @@ -422,10 +422,6 @@ _GLIBCXX_END_NAMESPACE_VERSION # include #endif -#ifdef _GLIBCXX_PROFILE -# include -#endif - #endif // C++11 #endif // _GLIBCXX_ARRAY diff --git a/libstdc++-v3/include/std/bitset b/libstdc++-v3/include/std/bitset index d6958301e8a..ef4a91a0c15 100644 --- a/libstdc++-v3/include/std/bitset +++ b/libstdc++-v3/include/std/bitset @@ -1593,8 +1593,4 @@ _GLIBCXX_END_NAMESPACE_VERSION # include #endif -#ifdef _GLIBCXX_PROFILE -# include -#endif - #endif /* _GLIBCXX_BITSET */ diff --git a/libstdc++-v3/include/std/deque b/libstdc++-v3/include/std/deque index ed4927e13b7..3f8e9abe032 100644 --- a/libstdc++-v3/include/std/deque +++ b/libstdc++-v3/include/std/deque @@ -72,10 +72,6 @@ # include #endif -#ifdef _GLIBCXX_PROFILE -# include -#endif - #if __cplusplus >= 201703L namespace std _GLIBCXX_VISIBILITY(default) { diff --git a/libstdc++-v3/include/std/forward_list b/libstdc++-v3/include/std/forward_list index 9d6cc40593b..39c3f4438de 100644 --- a/libstdc++-v3/include/std/forward_list +++ b/libstdc++-v3/include/std/forward_list @@ -43,10 +43,6 @@ # include #endif -#ifdef _GLIBCXX_PROFILE -# include -#endif - #if __cplusplus >= 201703L namespace std _GLIBCXX_VISIBILITY(default) { diff --git a/libstdc++-v3/include/std/list b/libstdc++-v3/include/std/list index 8d6ac198c9a..35c9d44a39e 100644 --- a/libstdc++-v3/include/std/list +++ b/libstdc++-v3/include/std/list @@ -67,10 +67,6 @@ # include #endif -#ifdef _GLIBCXX_PROFILE -# include -#endif - #if __cplusplus >= 201703L namespace std _GLIBCXX_VISIBILITY(default) { diff --git a/libstdc++-v3/include/std/map b/libstdc++-v3/include/std/map index 0da7c895aac..0eb4f80f93f 100644 --- a/libstdc++-v3/include/std/map +++ b/libstdc++-v3/include/std/map @@ -67,10 +67,6 @@ # include #endif -#ifdef _GLIBCXX_PROFILE -# include -#endif - #if __cplusplus >= 201703L namespace std _GLIBCXX_VISIBILITY(default) { diff --git a/libstdc++-v3/include/std/set b/libstdc++-v3/include/std/set index a9ce58c4574..4213a4a01b8 100644 --- a/libstdc++-v3/include/std/set +++ b/libstdc++-v3/include/std/set @@ -67,10 +67,6 @@ # include #endif -#ifdef _GLIBCXX_PROFILE -# include -#endif - #if __cplusplus >= 201703L namespace std _GLIBCXX_VISIBILITY(default) { diff --git a/libstdc++-v3/include/std/unordered_map b/libstdc++-v3/include/std/unordered_map index 60f9bd7a6a6..eb69240a94f 100644 --- a/libstdc++-v3/include/std/unordered_map +++ b/libstdc++-v3/include/std/unordered_map @@ -52,10 +52,6 @@ # include #endif -#ifdef _GLIBCXX_PROFILE -# include -#endif - #if __cplusplus >= 201703L namespace std _GLIBCXX_VISIBILITY(default) { diff --git a/libstdc++-v3/include/std/unordered_set b/libstdc++-v3/include/std/unordered_set index 90438738614..a28b840c136 100644 --- a/libstdc++-v3/include/std/unordered_set +++ b/libstdc++-v3/include/std/unordered_set @@ -52,10 +52,6 @@ # include #endif -#ifdef _GLIBCXX_PROFILE -# include -#endif - #if __cplusplus >= 201703L namespace std _GLIBCXX_VISIBILITY(default) { diff --git a/libstdc++-v3/include/std/vector b/libstdc++-v3/include/std/vector index e5e13ab3ef7..c15faf1da9a 100644 --- a/libstdc++-v3/include/std/vector +++ b/libstdc++-v3/include/std/vector @@ -76,10 +76,6 @@ # include #endif -#ifdef _GLIBCXX_PROFILE -# include -#endif - #if __cplusplus >= 201703L namespace std _GLI
Re: Problem exposed by recent ARM multiply changes
On Thu, Sep 26, 2019 at 10:01:56AM -0600, Jeff Law wrote: > On 9/26/19 9:47 AM, Jakub Jelinek wrote: > > On Thu, Sep 26, 2019 at 09:39:31AM -0600, Jeff Law wrote: > >> Right. My point is that the multiplication patterns are an exception as > >> well. > > > > Do you have some evidence for that? > It's in the manual. And yes it potentially makes a huge mess due to the > interactions with modeless CONST_INTs. Where? Unless otherwise specified, all the operands of arithmetic expressions must be valid for mode @var{m}. An operand is valid for mode @var{m} if it has mode @var{m}, or if it is a @code{const_int} or @code{const_double} and @var{m} is a mode of class @code{MODE_INT}. and for MULT: Represents the signed product of the values represented by @var{x} and @var{y} carried out in machine mode @var{m}. @code{ss_mult} and @code{us_mult} ensure that an out-of-bounds result saturates to the maximum or minimum signed or unsigned value. Some machines support a multiplication that generates a product wider than the operands. Write the pattern for this as @smallexample (mult:@var{m} (sign_extend:@var{m} @var{x}) (sign_extend:@var{m} @var{y})) @end smallexample where @var{m} is wider than the modes of @var{x} and @var{y}, which need not be the same. For unsigned widening multiplication, use the same idiom, but with @code{zero_extend} instead of @code{sign_extend}. I don't read that as an exception to violate that, it simply says that in that case one should use sign/zero_extend. Jakub
Re: Problem exposed by recent ARM multiply changes
On 9/26/19 9:47 AM, Jakub Jelinek wrote: > On Thu, Sep 26, 2019 at 09:39:31AM -0600, Jeff Law wrote: >> Right. My point is that the multiplication patterns are an exception as >> well. > > Do you have some evidence for that? It's in the manual. And yes it potentially makes a huge mess due to the interactions with modeless CONST_INTs. jeff
Re: [libcpp] Issue a pedantic warning for UCNs outside UCS codespace
On Thu, 26 Sep 2019, Eric Botcazou wrote: > > For C, I think such UCNs violate the Semantics but not the Constraints on > > UCNs, so no diagnostic is actually required in C, although it is permitted > > as a pedwarn / error. > > > > However, while C++ doesn't have that Semantics / Constraints division, > > it's also the case that before C++2a, C++ only has a dated normative > > reference to ISO/IEC 10646-1:1993 (C++2a adds an undated reference and > > says the dated one is only for deprecated features, as well as explicitly > > making such UCNs outside the ISO 10646 code point range ill-formed). So I > > think that for C++, this is only correct as an error / pedwarn in the > > C++2a case. > > OK, thanks for the exegesis. ;-) Revision version attached. Checking "CPP_OPTION (pfile, lang) == CLK_CXX2A" is problematic because future versions later than C++2a should be handled the same as C++2a. The only place I see doing something similar (outside of init.c, most version conditionals are handled via language flags set there) does "CPP_OPTION (pfile, lang) > CLK_CXX11" (for "In C++14 and up these suffixes are in the standard library, so treat them as user-defined literals.", two places doing the same comparison). So I think that "CPP_OPTION (pfile, lang) > CLK_CXX17" is the right thing to replace the comparisons against CLK_CXX2A and CLK_GNUCXX2A. The patch is OK with that change. -- Joseph S. Myers jos...@codesourcery.com
Re: Problem exposed by recent ARM multiply changes
On Thu, Sep 26, 2019 at 09:39:31AM -0600, Jeff Law wrote: > Right. My point is that the multiplication patterns are an exception as > well. Do you have some evidence for that? I mean, e.g. simplify-rtx.c will in that case almost certainly misbehave, if some expression can have CONST_INT operand(s), then it really needs to have the mode for them determined by the result mode or mode of another operand, or during simplification can be handled by passing that mode around (that is why e.g. simplify_unary_operation has both mode and op_mode arguments, simplify_relational_operation has both mode and cmp_mode arguments etc.), though in such a case they must simplify into something that doesn't include the operation anymore, or CONST_INT can't be propagated into it. Jakub
Re: [PATCH] Retain TYPE_MODE more often for BIT_FIELD_REFs in get_inner_referece
> 2019-09-26 Richard Biener > > PR middle-end/91897 > * expr.c (get_inner_reference): For BIT_FIELD_REF with > vector type retain the original mode. > > * gcc.target/i386/pr91897.c: New testcase. This looks good to me, thanks. -- Eric Botcazou
[PATCH, rs6000] Update powerpc vector load builtins with PURE attribute
Hi, Update our (rs6000) vector load built-ins with the PURE attribute. These were previously given the MEM attribute, which meant that redundant loads surrounding the built-in calls could not be eliminated in earlier passes since they were defined as having the potential to touch memory. This change has been tested across assorted powerpc systems (p7, p8le, p8be, p9) with no regressions noted. OK for trunk? Thanks, -Will [gcc] 2019-09-26 Will Schmidt * config/rs6000/rs6000-builtin.def: ( LVSL LVSR LVEBX LVEHX LVEWX LVXL LVXL_V2DF LVXL_V2DI LVXL_V4SF LVXL_V4SI LVXL_V8HI LVXL_V16QI LVX LVX_V1TI LVX_V2DF LVX_V2DI LVX_V4SF LVX_V4SI LVX_V8HI LVX_V16QI LVLX LVLXL LVRX LVRXL LXSDX LXVD2X_V1TI LXVD2X_V2DF LXVD2X_V2DI LXVDSX LXVW4X_V4SF LXVW4X_V4SI LXVW4X_V8HI LXVW4X_V16QI LD_ELEMREV_V1TI LD_ELEMREV_V2DF LD_ELEMREV_V2DI LD_ELEMREV_V4SF LD_ELEMREV_V4SI LD_ELEMREV_V8HI LD_ELEMREV_V16QI): Use the PURE attribute. [testsuite] 2019-09-26 Will Schmidt * gcc.target/powerpc/pure-builtin-redundant-load.c: New. diff --git a/gcc/config/rs6000/rs6000-builtin.def b/gcc/config/rs6000/rs6000-builtin.def index 0a2bdb7..4d4f3b3 100644 --- a/gcc/config/rs6000/rs6000-builtin.def +++ b/gcc/config/rs6000/rs6000-builtin.def @@ -1175,41 +1175,41 @@ BU_ALTIVEC_P (VCMPGTUB_P, "vcmpgtub_p", CONST, vector_gtu_v16qi_p) /* AltiVec builtins that are handled as special cases. */ BU_ALTIVEC_X (MTVSCR, "mtvscr", MISC) BU_ALTIVEC_X (MFVSCR, "mfvscr", MISC) BU_ALTIVEC_X (DSSALL, "dssall", MISC) BU_ALTIVEC_X (DSS, "dss", MISC) -BU_ALTIVEC_X (LVSL,"lvsl", MEM) -BU_ALTIVEC_X (LVSR,"lvsr", MEM) -BU_ALTIVEC_X (LVEBX, "lvebx",MEM) -BU_ALTIVEC_X (LVEHX, "lvehx",MEM) -BU_ALTIVEC_X (LVEWX, "lvewx",MEM) -BU_ALTIVEC_X (LVXL,"lvxl", MEM) -BU_ALTIVEC_X (LVXL_V2DF, "lvxl_v2df",MEM) -BU_ALTIVEC_X (LVXL_V2DI, "lvxl_v2di",MEM) -BU_ALTIVEC_X (LVXL_V4SF, "lvxl_v4sf",MEM) -BU_ALTIVEC_X (LVXL_V4SI, "lvxl_v4si",MEM) -BU_ALTIVEC_X (LVXL_V8HI, "lvxl_v8hi",MEM) -BU_ALTIVEC_X (LVXL_V16QI, "lvxl_v16qi", MEM) -BU_ALTIVEC_X (LVX, "lvx", MEM) -BU_ALTIVEC_X (LVX_V1TI,"lvx_v1ti", MEM) -BU_ALTIVEC_X (LVX_V2DF,"lvx_v2df", MEM) -BU_ALTIVEC_X (LVX_V2DI,"lvx_v2di", MEM) -BU_ALTIVEC_X (LVX_V4SF,"lvx_v4sf", MEM) -BU_ALTIVEC_X (LVX_V4SI,"lvx_v4si", MEM) -BU_ALTIVEC_X (LVX_V8HI,"lvx_v8hi", MEM) -BU_ALTIVEC_X (LVX_V16QI, "lvx_v16qi",MEM) +BU_ALTIVEC_X (LVSL,"lvsl", PURE) +BU_ALTIVEC_X (LVSR,"lvsr", PURE) +BU_ALTIVEC_X (LVEBX, "lvebx",PURE) +BU_ALTIVEC_X (LVEHX, "lvehx",PURE) +BU_ALTIVEC_X (LVEWX, "lvewx",PURE) +BU_ALTIVEC_X (LVXL,"lvxl", PURE) +BU_ALTIVEC_X (LVXL_V2DF, "lvxl_v2df",PURE) +BU_ALTIVEC_X (LVXL_V2DI, "lvxl_v2di",PURE) +BU_ALTIVEC_X (LVXL_V4SF, "lvxl_v4sf",PURE) +BU_ALTIVEC_X (LVXL_V4SI, "lvxl_v4si",PURE) +BU_ALTIVEC_X (LVXL_V8HI, "lvxl_v8hi",PURE) +BU_ALTIVEC_X (LVXL_V16QI, "lvxl_v16qi", PURE) +BU_ALTIVEC_X (LVX, "lvx", PURE) +BU_ALTIVEC_X (LVX_V1TI,"lvx_v1ti", PURE) +BU_ALTIVEC_X (LVX_V2DF,"lvx_v2df", PURE) +BU_ALTIVEC_X (LVX_V2DI,"lvx_v2di", PURE) +BU_ALTIVEC_X (LVX_V4SF,"lvx_v4sf", PURE) +BU_ALTIVEC_X (LVX_V4SI,"lvx_v4si", PURE) +BU_ALTIVEC_X (LVX_V8HI,"lvx_v8hi", PURE) +BU_ALTIVEC_X (LVX_V16QI, "lvx_v16qi",PURE) BU_ALTIVEC_X (STVX,"stvx", MEM) BU_ALTIVEC_X (STVX_V2DF, "stvx_v2df",MEM) BU_ALTIVEC_X (STVX_V2DI, "stvx_v2di",MEM) BU_ALTIVEC_X (STVX_V4SF, "stvx_v4sf",MEM) BU_ALTIVEC_X (STVX_V4SI, "stvx_v4si",MEM) BU_ALTIVEC_X (STVX_V8HI, "stvx_v8hi",MEM) BU_ALTIVEC_X (STVX_V16QI, "stvx_v16qi", MEM) -BU_ALTIVEC_C (LVLX,"lvlx", MEM) -BU_ALTIVEC_C (LVLXL, "lvlxl",MEM) -BU_ALTIVEC_C (LVRX,"lvrx", MEM) -BU_ALTIVEC_C (LVRXL, "lvrxl",MEM) +BU_ALTIVEC_C (LVLX,"lvlx", PURE) +BU_ALTIVEC_C (LVLXL, "lvlxl",PURE) +BU_ALTIVEC_C (LVRX,"lvrx", PURE) +BU_ALTIVEC_C (LVRXL, "lvrxl",PURE) BU_ALTIVEC_X (STVE
Re: Problem exposed by recent ARM multiply changes
On 9/26/19 9:22 AM, Jakub Jelinek wrote: > On Thu, Sep 26, 2019 at 09:12:34AM -0600, Jeff Law wrote: >> On 9/26/19 12:49 AM, Jakub Jelinek wrote: >>> On Wed, Sep 25, 2019 at 10:06:13PM -0600, Jeff Law wrote: (insn 13 12 14 2 (set (reg:SI 124) (const_int -939524096 [0xc800])) "j.c":10:54 161 {*arm_movsi_insn} (nil)) (insn 14 13 16 2 (parallel [ (set (reg:SI 132) (plus:SI (mult:SI (zero_extend:DI (reg/v:SI 115 [ sec ])) (zero_extend:DI (reg:SI 124))) (reg:SI 130))) >>> >>> IMNSHO the bug is just in the backend, the above is not valid RTL. >>> SImode MULT has to have SImode operands, not DImode operands. >> Hmm, yea, I think you're right. I totally missed that last night. >> >> FWIW, we do allow the modes of the input operands to differ. But I >> don't think either is allowed to be wider than the result mode of the >> multiplication. > > Sure, there are some rtl codes that can have different operands, but most of > them don't. The exceptions are shifts/rotates (the second operand can have > different mode, but result/first operand needs to be the same), comparisons > (result mode can be different from operand modes, but operand mode needs to > be the same), zero_extract/sign_extract, some vec_* etc. > Most binary arithmetic rtxes need to have the same mode for the result and > both arguments though. Right. My point is that the multiplication patterns are an exception as well. I don't like the exceptions for the multiplication patterns as I think the same effect could have been achieved with more zero/sign extractions. Jeff
Re: Problem exposed by recent ARM multiply changes
On 9/26/19 4:32 PM, Jeff Law wrote: On 9/26/19 9:14 AM, Kyrill Tkachov wrote: On 9/26/19 4:12 PM, Jeff Law wrote: On 9/26/19 12:49 AM, Jakub Jelinek wrote: On Wed, Sep 25, 2019 at 10:06:13PM -0600, Jeff Law wrote: (insn 13 12 14 2 (set (reg:SI 124) (const_int -939524096 [0xc800])) "j.c":10:54 161 {*arm_movsi_insn} (nil)) (insn 14 13 16 2 (parallel [ (set (reg:SI 132) (plus:SI (mult:SI (zero_extend:DI (reg/v:SI 115 [ sec ])) (zero_extend:DI (reg:SI 124))) (reg:SI 130))) IMNSHO the bug is just in the backend, the above is not valid RTL. SImode MULT has to have SImode operands, not DImode operands. Hmm, yea, I think you're right. I totally missed that last night. FWIW, we do allow the modes of the input operands to differ. But I don't think either is allowed to be wider than the result mode of the multiplication. I think that means we punt this back to Wilco to fix since it's an ARM specific issue. Can you please file a PR for it so we don't lose track of it? 91919 Nice. Kyrill jeff
Re: Problem exposed by recent ARM multiply changes
On 9/26/19 9:14 AM, Kyrill Tkachov wrote: > > On 9/26/19 4:12 PM, Jeff Law wrote: >> On 9/26/19 12:49 AM, Jakub Jelinek wrote: >> > On Wed, Sep 25, 2019 at 10:06:13PM -0600, Jeff Law wrote: >> >> (insn 13 12 14 2 (set (reg:SI 124) >> >> (const_int -939524096 [0xc800])) "j.c":10:54 161 >> >> {*arm_movsi_insn} >> >> (nil)) >> >> >> >> (insn 14 13 16 2 (parallel [ >> >> (set (reg:SI 132) >> >> (plus:SI (mult:SI (zero_extend:DI (reg/v:SI 115 [ >> sec ])) >> >> (zero_extend:DI (reg:SI 124))) >> >> (reg:SI 130))) >> > >> > IMNSHO the bug is just in the backend, the above is not valid RTL. >> > SImode MULT has to have SImode operands, not DImode operands. >> Hmm, yea, I think you're right. I totally missed that last night. >> >> FWIW, we do allow the modes of the input operands to differ. But I >> don't think either is allowed to be wider than the result mode of the >> multiplication. >> >> >> I think that means we punt this back to Wilco to fix since it's an ARM >> specific issue. > > Can you please file a PR for it so we don't lose track of it? 91919 jeff
Re: Problem exposed by recent ARM multiply changes
On Thu, Sep 26, 2019 at 09:12:34AM -0600, Jeff Law wrote: > On 9/26/19 12:49 AM, Jakub Jelinek wrote: > > On Wed, Sep 25, 2019 at 10:06:13PM -0600, Jeff Law wrote: > >> (insn 13 12 14 2 (set (reg:SI 124) > >> (const_int -939524096 [0xc800])) "j.c":10:54 161 > >> {*arm_movsi_insn} > >> (nil)) > >> > >> (insn 14 13 16 2 (parallel [ > >> (set (reg:SI 132) > >> (plus:SI (mult:SI (zero_extend:DI (reg/v:SI 115 [ sec ])) > >> (zero_extend:DI (reg:SI 124))) > >> (reg:SI 130))) > > > > IMNSHO the bug is just in the backend, the above is not valid RTL. > > SImode MULT has to have SImode operands, not DImode operands. > Hmm, yea, I think you're right. I totally missed that last night. > > FWIW, we do allow the modes of the input operands to differ. But I > don't think either is allowed to be wider than the result mode of the > multiplication. Sure, there are some rtl codes that can have different operands, but most of them don't. The exceptions are shifts/rotates (the second operand can have different mode, but result/first operand needs to be the same), comparisons (result mode can be different from operand modes, but operand mode needs to be the same), zero_extract/sign_extract, some vec_* etc. Most binary arithmetic rtxes need to have the same mode for the result and both arguments though. > I think that means we punt this back to Wilco to fix since it's an ARM > specific issue. Sure. Jakub
Re: Problem exposed by recent ARM multiply changes
On 9/26/19 4:12 PM, Jeff Law wrote: On 9/26/19 12:49 AM, Jakub Jelinek wrote: > On Wed, Sep 25, 2019 at 10:06:13PM -0600, Jeff Law wrote: >> (insn 13 12 14 2 (set (reg:SI 124) >> (const_int -939524096 [0xc800])) "j.c":10:54 161 >> {*arm_movsi_insn} >> (nil)) >> >> (insn 14 13 16 2 (parallel [ >> (set (reg:SI 132) >> (plus:SI (mult:SI (zero_extend:DI (reg/v:SI 115 [ sec ])) >> (zero_extend:DI (reg:SI 124))) >> (reg:SI 130))) > > IMNSHO the bug is just in the backend, the above is not valid RTL. > SImode MULT has to have SImode operands, not DImode operands. Hmm, yea, I think you're right. I totally missed that last night. FWIW, we do allow the modes of the input operands to differ. But I don't think either is allowed to be wider than the result mode of the multiplication. I think that means we punt this back to Wilco to fix since it's an ARM specific issue. Can you please file a PR for it so we don't lose track of it? Thanks, Kyrill jeff
Re: Problem exposed by recent ARM multiply changes
On 9/26/19 12:49 AM, Jakub Jelinek wrote: > On Wed, Sep 25, 2019 at 10:06:13PM -0600, Jeff Law wrote: >> (insn 13 12 14 2 (set (reg:SI 124) >> (const_int -939524096 [0xc800])) "j.c":10:54 161 >> {*arm_movsi_insn} >> (nil)) >> >> (insn 14 13 16 2 (parallel [ >> (set (reg:SI 132) >> (plus:SI (mult:SI (zero_extend:DI (reg/v:SI 115 [ sec ])) >> (zero_extend:DI (reg:SI 124))) >> (reg:SI 130))) > > IMNSHO the bug is just in the backend, the above is not valid RTL. > SImode MULT has to have SImode operands, not DImode operands. Hmm, yea, I think you're right. I totally missed that last night. FWIW, we do allow the modes of the input operands to differ. But I don't think either is allowed to be wider than the result mode of the multiplication. I think that means we punt this back to Wilco to fix since it's an ARM specific issue. jeff
[AArch64][SVE2] Shift-Right Accumulate combine patterns
Hi, This patch adds combining support for SVE2's shift-right accumulate instructions. Example snippet: #define IMM ... void foo (TYPE a, TYPE b, int n) { for (int i = 0; i < n; i++) a[i] += b[i] >> IMM; } Signed: beforeasr z0.s, z0.s, #{IMM} add z0.s, z0.s, z1.s ... after ssraz0.s, z1.s, #{IMM} Unsigned: beforelsr z0.s, z0.s, #{IMM} add z0.s, z0.s, z1.s ... after usraz0.s, z1.s, #{IMM} Built and regression tested on aarch64-none-elf. Best Regards, Yuliang Wang gcc/ChangeLog: 2019-09-26 Yuliang Wang * config/aarch64/aarch64-sve2.md (aarch64_sve2_sra): New combine pattern. gcc/testsuite/ChangeLog: 2019-09-26 Yuliang Wang * gcc.target/aarch64/sve2/shracc_1.c: New test. rb11872.patch Description: rb11872.patch
Re: [PATCH 2/2] libada: Respect `--enable-version-specific-runtime-libs'
> Notice that ADA_RTL_OBJ_DIR never changes with/without the use of this > configuration option (as expected). > > Does it answer your question? Yes, that's now very clear, thanks! The change is OK for me, thanks. Arno
[Ada] Remove dependency on To_C/To_Ada
A recent change in osint.adb introduced a dependency on To_C/To_Ada subprograms which are (too) recent additions to System.OS_Lib, breaking builds of cross compilers with an older version of GNAT. Even though this is not guaranteed to work, this dependency is relatively easy to lift, so done. Tested on x86_64-pc-linux-gnu, committed on trunk * osint.adb (OS_Time_To_GNAT_Time): Remove dependency on To_C/To_Ada Index: osint.adb === --- osint.adb (revision 276149) +++ osint.adb (working copy) @@ -2183,7 +2183,19 @@ function OS_Time_To_GNAT_Time (T : OS_Time) return Time_Stamp_Type is GNAT_Time : Time_Stamp_Type; - TI : Long_Integer := To_C (T); + type Underlying_OS_Time is +range -(2 ** (Standard'Address_Size - Integer'(1))) .. + +(2 ** (Standard'Address_Size - Integer'(1)) - 1); + -- Underlying_OS_Time is a redeclaration of OS_Time to allow integer + -- manipulation. Remove this in favor of To_Ada/To_C once newer + -- GNAT releases are available with these functions. + + function To_Int is +new Unchecked_Conversion (OS_Time, Underlying_OS_Time); + function From_Int is +new Unchecked_Conversion (Underlying_OS_Time, OS_Time); + + TI : Underlying_OS_Time := To_Int (T); Y : Year_Type; Mo : Month_Type; D : Day_Type; @@ -2205,7 +2217,7 @@ TI := TI + 1; end if; - GM_Split (To_Ada (TI), Y, Mo, D, H, Mn, S); + GM_Split (From_Int (TI), Y, Mo, D, H, Mn, S); Make_Time_Stamp (Year=> Nat (Y),
[PATCH][PR89924] [missed-optimization] Function not de-virtualized within the same TU
This patch resolves subjected issue. bootstrapped and regtested on x86_64. ChangeLog: 2019-09-26 Kamlesh Kumar PR ipa/89924 * ipa-polymorphic-call.c (ipa_polymorphic_call_context::ipa_polymorphic_call_context): Updated outer_type. * g++.dg/opt/pr89924.C: New Test. * g++.dg/ipa/devirt-34.C: Modified. == diff --git a/gcc/ipa-polymorphic-call.c b/gcc/ipa-polymorphic-call.c index 705af03..b76793f 100644 --- a/gcc/ipa-polymorphic-call.c +++ b/gcc/ipa-polymorphic-call.c @@ -1118,6 +1118,10 @@ ipa_polymorphic_call_context::ipa_polymorphic_call_context (tree fndecl, We do not make this type of flow sensitive analysis yet. */ if (instance) *instance = base_pointer; + + if (((TREE_CODE (TREE_TYPE(base_type)) == RECORD_TYPE))) +outer_type = TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (base_pointer))); + return; } diff --git a/gcc/testsuite/g++.dg/ipa/devirt-34.C b/gcc/testsuite/g++.dg/ipa/devirt-34.C index 083c305..7961c0f 100644 --- a/gcc/testsuite/g++.dg/ipa/devirt-34.C +++ b/gcc/testsuite/g++.dg/ipa/devirt-34.C @@ -17,5 +17,4 @@ t(struct B *b) /* We should guess that the pointer of type B probably points to an instance of B or its derivates and exclude A::t from list of likely targets. */ -/* { dg-final { scan-ipa-dump "Speculative targets" "devirt" } } */ /* { dg-final { scan-ipa-dump "1 speculatively devirtualized" "devirt" } } */ diff --git a/gcc/testsuite/g++.dg/opt/pr89924.C b/gcc/testsuite/g++.dg/opt/pr89924.C new file mode 100644 index 000..a78ef67 --- /dev/null +++ b/gcc/testsuite/g++.dg/opt/pr89924.C @@ -0,0 +1,30 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -std=c++11 -fdump-tree-optimized" } */ + +struct A { + virtual A& operator+=(const A& other) noexcept = 0; + }; + + void foo_inner(int *p) noexcept { *p += *p; } + void foo_virtual_inner(A *p) noexcept { *p += *p; } + + void foo(int *p) noexcept + { + return foo_inner(p); + } + + struct Aint : public A { + int i; + A& operator+=(const A& other) noexcept override final + { + i += reinterpret_cast(other).i; + return *this; + } + }; + + void foo_virtual(Aint *p) noexcept + { + return foo_virtual_inner(p); + } + +//{ dg-final { scan-tree-dump-times "OBJ_TYPE_REF" 2 "optimized" } }
Re: [PATCH 2/2] libada: Respect `--enable-version-specific-runtime-libs'
On Thu, 26 Sep 2019, Arnaud Charlet wrote: > > Respect the `--enable-version-specific-runtime-libs' configuration > > option in libada/, so that shared gnatlib libraries will be installed > > in non-version-specific $(toolexeclibdir) if requested. In a > > cross-compilation environment this helps setting up a consistent > > sysroot, which can then be shared between the host and the target > > system. > > > > Update the settings of $(toolexecdir) and $(toolexeclibdir), unused till > > now, to keep the current arrangement in the version-specific case and > > make the new option to be enabled by default, unlike with the other > > target libraries, so as to keep existing people's build infrastructure > > unaffected. > > Can you clarify what will be the value of ADA_RTL_OBJ_DIR and ADA_RTL_DSO_DIR > in the following cases: > > - no version-specific-runtime-libs configure switch at all (default) ADA_RTL_OBJ_DIR=$(libsubdir)/adalib ADA_RTL_DSO_DIR=$(libdir)/gcc/$(target_noncanonical)/$(version)$(MULTISUBDIR)/adalib e.g. (no multilibs): ADA_RTL_OBJ_DIR=$(prefix)/lib/gcc/x86_64-linux-gnu/10.0.0/adalib ADA_RTL_DSO_DIR=$(prefix)/lib/gcc/x86_64-linux-gnu/10.0.0/adalib or (with multilibs): ADA_RTL_OBJ_DIR=$(prefix)/lib/gcc/riscv64-linux-gnu/10.0.0/lib32/ilp32/adalib ADA_RTL_DSO_DIR=$(prefix)/lib/gcc/riscv64-linux-gnu/10.0.0/lib32/ilp32/adalib ADA_RTL_OBJ_DIR=$(prefix)/lib/gcc/riscv64-linux-gnu/10.0.0/lib32/ilp32d/adalib ADA_RTL_DSO_DIR=$(prefix)/lib/gcc/riscv64-linux-gnu/10.0.0/lib32/ilp32d/adalib ADA_RTL_OBJ_DIR=$(prefix)/lib/gcc/riscv64-linux-gnu/10.0.0/lib64/lp64/adalib ADA_RTL_DSO_DIR=$(prefix)/lib/gcc/riscv64-linux-gnu/10.0.0/lib64/lp64/adalib [...] > - use of --enable-version-specific-runtime-libs Same as above. > - use of --disable-version-specific-runtime-libs ADA_RTL_OBJ_DIR=$(libsubdir)/adalib ADA_RTL_DSO_DIR varies in Makefile as the multilib part is now set by `configure' as with other target libraries, so with `x86_64-linux-gnu' (native) I have: ADA_RTL_DSO_DIR=$(libdir)/../lib64 and with `riscv64-linux-gnu' (cross) I have e.g.: ADA_RTL_DSO_DIR=$(exec_prefix)/$(target_alias)/lib/../lib64/lp64d and then specifically (no multilibs): ADA_RTL_OBJ_DIR=$(prefix)/lib/gcc/x86_64-linux-gnu/10.0.0/adalib ADA_RTL_DSO_DIR=$(prefix)/lib64 or (with multilibs): ADA_RTL_OBJ_DIR=$(prefix)/lib/gcc/riscv64-linux-gnu/10.0.0/lib32/ilp32/adalib ADA_RTL_DSO_DIR=$(prefix)/riscv64-linux-gnu/lib32/ilp32 ADA_RTL_OBJ_DIR=$(prefix)/lib/gcc/riscv64-linux-gnu/10.0.0/lib32/ilp32d/adalib ADA_RTL_DSO_DIR=$(prefix)/riscv64-linux-gnu/lib32/ilp32d ADA_RTL_OBJ_DIR=$(prefix)/lib/gcc/riscv64-linux-gnu/10.0.0/lib64/lp64/adalib ADA_RTL_DSO_DIR=$(prefix)/riscv64-linux-gnu/lib64/lp64 [...] Notice that ADA_RTL_OBJ_DIR never changes with/without the use of this configuration option (as expected). Does it answer your question? Maciej
[PATCH] PR libstdc++/91910 fix data race in Debug Mode destructors
Fix data race when _Safe_iterator_base::_M_detach() runs concurrently with the _Safe_container_base destructor. PR libstdc++/91910 * src/c++11/debug.cc (_Safe_iterator_base::_M_detach()): Load pointer atomically and lock the mutex before accessing the sequence. (_Safe_iterator_base::_M_reset()): Clear _M_sequence atomically. Tested x86_64-linux. I plan to commit this to trunk unless somebody sees a problem with it. commit c1ca5aa0271726d962c45220656b2411ece0e001 Author: Jonathan Wakely Date: Thu Sep 26 13:08:48 2019 +0100 PR libstdc++/91910 fix data race in Debug Mode destructors Fix data race when _Safe_iterator_base::_M_detach() runs concurrently with the _Safe_container_base destructor. PR libstdc++/91910 * src/c++11/debug.cc (_Safe_iterator_base::_M_detach()): Load pointer atomically and lock the mutex before accessing the sequence. (_Safe_iterator_base::_M_reset()): Clear _M_sequence atomically. diff --git a/libstdc++-v3/src/c++11/debug.cc b/libstdc++-v3/src/c++11/debug.cc index f5a49992efa..cc4107ba354 100644 --- a/libstdc++-v3/src/c++11/debug.cc +++ b/libstdc++-v3/src/c++11/debug.cc @@ -381,10 +381,17 @@ namespace __gnu_debug _Safe_iterator_base:: _M_detach() { -if (_M_sequence) +// This function can run concurrently with the sequence destructor, +// so there is a TOCTTOU race here: the sequence could be destroyed +// after we check that _M_sequence is not null. Use the pointer value +// to acquire the mutex (rather than via _M_sequence->_M_get_mutex()). +// If the sequence destructor runs between loading the pointer and +// locking the mutex, it will detach this iterator and set _M_sequence +// to null, and then _M_detach_single() will do nothing. +if (auto seq = __atomic_load_n(&_M_sequence, __ATOMIC_ACQUIRE)) { - _M_sequence->_M_detach(this); - _M_reset(); + __gnu_cxx::__scoped_lock sentry(get_safe_base_mutex(seq)); + _M_detach_single(); } } @@ -403,7 +410,7 @@ namespace __gnu_debug _Safe_iterator_base:: _M_reset() throw () { -_M_sequence = 0; +__atomic_store_n(&_M_sequence, (_Safe_sequence_base*)0, __ATOMIC_RELEASE); _M_version = 0; _M_prior = 0; _M_next = 0;
[PATCH] Do all reduction PHI creation upfront
The following patch moves all reduction (and nested cycle) PHI creation upfront, generating vector defs in program order so that even for PHIs the vector defs are present when processing uses. As part of this reorg I also moved the preheader argument filling to the time we create the PHI node (since all vector defs of uses are available - apart from those on backedges). This enables splitting out nested cycle handling (I have a followup for a part of that) as well as separating epilogue generation (more baby-steps necessary for that). Bootstrapped on x86_64-unknown-linux-gnu, testing in progress. Richard. 2019-09-26 Richard Biener * tree-vect-loop.c (vect_analyze_loop_operations): Also call vectorizable_reduction for vect_double_reduction_def. (vect_transform_loop): Likewise. (vect_create_epilog_for_reduction): Move double-reduction PHI creation and preheader argument setting of PHIs ... (vectorizable_reduction): ... here. Also process vect_double_reduction_def PHIs, creating the vectorized PHI nodes, remembering the scalar adjustment computed for the epilogue in STMT_VINFO_REDUC_EPILOGUE_ADJUSTMENT. Remember the original reduction code in STMT_VINFO_REDUC_CODE. * tree-vectorizer.c (vec_info::new_stmt_vec_info): Initialize STMT_VINFO_REDUC_CODE. * tree-vectorizer.h (_stmt_vec_info::reduc_epilogue_adjustment): New. (_stmt_vec_info::reduc_code): Likewise. (STMT_VINFO_REDUC_EPILOGUE_ADJUSTMENT): Likewise. (STMT_VINFO_REDUC_CODE): Likewise. Index: gcc/tree-vect-loop.c === --- gcc/tree-vect-loop.c(revision 276147) +++ gcc/tree-vect-loop.c(working copy) @@ -1548,6 +1548,8 @@ vect_analyze_loop_operations (loop_vec_i ok = vectorizable_induction (stmt_info, NULL, NULL, NULL, &cost_vec); else if ((STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def + || (STMT_VINFO_DEF_TYPE (stmt_info) + == vect_double_reduction_def) || STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle) && ! PURE_SLP_STMT (stmt_info)) ok = vectorizable_reduction (stmt_info, NULL, NULL, NULL, NULL, @@ -4299,20 +4301,17 @@ vect_create_epilog_for_reduction (vec new_phis; auto_vec inner_phis; int j, i; auto_vec scalar_results; unsigned int group_size = 1, k, ratio; - auto_vec vec_initial_defs; auto_vec phis; bool slp_reduc = false; bool direct_slp_reduc; @@ -4336,10 +4335,10 @@ vect_create_epilog_for_reduction (vec# REDUCTION_PHI + vec_def = phi # REDUCTION_PHI VECT_DEF = vector_stmt# vectorized form of STMT ... @@ -4350,18 +4349,10 @@ vect_create_epilog_for_reduction (vecreduc_phis, - &vec_initial_defs, vec_num, - REDUC_GROUP_FIRST_ELEMENT (stmt_info), - neutral_op); -} +; else { /* Get at the scalar def before the loop, that defines the initial value @@ -4373,64 +4364,29 @@ vect_create_epilog_for_reduction (vec (phi_info->stmt); - add_phi_arg (phi, vec_init_def, loop_preheader_edge (loop), - UNKNOWN_LOCATION); - - /* Set the loop-latch arg for the reduction-phi. */ - if (j > 0) - def = vect_get_vec_def_for_stmt_copy (loop_vinfo, def); - add_phi_arg (phi, def, loop_latch_edge (loop), UNKNOWN_LOCATION); if (dump_enabled_p ()) @@ -4934,7 +4890,7 @@ vect_create_epilog_for_reduction (veclookup_stmt (new_phis[k / ratio]); - reduction_phi_info = reduction_phis[k / ratio]; if (double_reduc) inner_phi = inner_phis[k / ratio]; } @@ -5435,6 +5390,8 @@ vect_create_epilog_for_reduction (vecstmt); } + if (outer_loop) + { phis.create (3); /* Find the loop-closed-use at the loop exit of the original scalar result. (The reduction result is expected to have two immediate uses - @@ -5449,8 +5406,6 @@ vect_create_epilog_for_reduction (veclookup_stmt (exit_phi); gphi *vect_phi; @@ -5482,7 +5437,6 @@ vect_create_epilog_for_reduction (vec - vs1 was created previously in this function by a call to - vect_get_vec_def_for_operand and is stored in - vec_initial_def; - vs2 is defined by INNER_PHI, the vectorized EXIT_PHI; - vs0 is created here. */ - - /* Create vector phi node. */ - vect_phi = create_phi_node (vec_initial_def, bb); - loop_vec_info_for_loop (outer_loop)->add_stmt (vect_phi); - - /* Create vs0
Add a simulate_enum_decl langhook
Similarly to the simulate_builtin_function_decl patch, this one adds a hook for simulating an enum declaration in the source language. Again, the main SVE ACLE patch has tests for various error conditions. Tested on aarch64-linux-gnu and x86_64-linux-gnu. OK to install? Richard 2019-09-26 Richard Sandiford gcc/ * coretypes.h (string_int_pair): New typedef. * langhooks-def.h (LANG_HOOKS_SIMULATE_ENUM_DECL): Define. (LANG_HOOKS_FOR_TYPES_INITIALIZER): Include it. * langhooks.h (lang_hooks_for_types::simulate_enum_decl): New hook. gcc/c/ * c-tree.h (c_simulate_enum_decl): Declare. * c-decl.c (c_simulate_enum_decl): New function. * c-objc-common.h (LANG_HOOKS_SIMULATE_ENUM_DECL): Define to the above. gcc/cp/ * cp-objcp-common.h (cxx_simulate_enum_decl): Declare. (LANG_HOOKS_SIMULATE_ENUM_DECL): Define to the above. * decl.c (cxx_simulate_enum_decl): New function. Index: gcc/coretypes.h === --- gcc/coretypes.h 2019-09-21 13:56:08.843935101 +0100 +++ gcc/coretypes.h 2019-09-26 13:02:48.907474244 +0100 @@ -341,6 +341,7 @@ typedef int reg_class_t; } typedef std::pair tree_pair; +typedef std::pair string_int_pair; /* Define a name->value mapping. */ template Index: gcc/langhooks-def.h === --- gcc/langhooks-def.h 2019-09-26 13:02:42.543520465 +0100 +++ gcc/langhooks-def.h 2019-09-26 13:02:48.911474214 +0100 @@ -171,6 +171,7 @@ #define LANG_HOOKS_TREE_DUMP_INITIALIZER extern tree lhd_unit_size_without_reusable_padding (tree); #define LANG_HOOKS_MAKE_TYPE lhd_make_node +#define LANG_HOOKS_SIMULATE_ENUM_DECL NULL #define LANG_HOOKS_CLASSIFY_RECORD NULL #define LANG_HOOKS_TYPE_FOR_SIZE lhd_type_for_size #define LANG_HOOKS_INCOMPLETE_TYPE_ERROR lhd_incomplete_type_error @@ -204,6 +205,7 @@ #define LANG_HOOKS_UNIT_SIZE_WITHOUT_REU #define LANG_HOOKS_FOR_TYPES_INITIALIZER { \ LANG_HOOKS_MAKE_TYPE, \ + LANG_HOOKS_SIMULATE_ENUM_DECL, \ LANG_HOOKS_CLASSIFY_RECORD, \ LANG_HOOKS_TYPE_FOR_MODE, \ LANG_HOOKS_TYPE_FOR_SIZE, \ Index: gcc/langhooks.h === --- gcc/langhooks.h 2019-09-26 13:02:42.543520465 +0100 +++ gcc/langhooks.h 2019-09-26 13:02:48.911474214 +0100 @@ -64,6 +64,10 @@ struct lang_hooks_for_types language-specific processing is required. */ tree (*make_type) (enum tree_code); + /* Make an enum type with the given name and values, associating + them all with the given source location. */ + tree (*simulate_enum_decl) (location_t, const char *, vec); + /* Return what kind of RECORD_TYPE this is, mainly for purposes of debug information. If not defined, record types are assumed to be structures. */ Index: gcc/c/c-tree.h === --- gcc/c/c-tree.h 2019-09-26 13:02:42.539520492 +0100 +++ gcc/c/c-tree.h 2019-09-26 13:02:48.907474244 +0100 @@ -563,6 +563,8 @@ extern tree finish_enum (tree, tree, tre extern void finish_function (void); extern tree finish_struct (location_t, tree, tree, tree, class c_struct_parse_info *); +extern tree c_simulate_enum_decl (location_t, const char *, + vec); extern struct c_arg_info *build_arg_info (void); extern struct c_arg_info *get_parm_info (bool, tree); extern tree grokfield (location_t, struct c_declarator *, Index: gcc/c/c-decl.c === --- gcc/c/c-decl.c 2019-09-26 13:02:42.539520492 +0100 +++ gcc/c/c-decl.c 2019-09-26 13:02:48.907474244 +0100 @@ -8903,6 +8903,36 @@ build_enumerator (location_t decl_loc, l return tree_cons (decl, value, NULL_TREE); } +/* Implement LANG_HOOKS_SIMULATE_ENUM_DECL. */ + +tree +c_simulate_enum_decl (location_t loc, const char *name, + vec values) +{ + location_t saved_loc = input_location; + input_location = loc; + + struct c_enum_contents the_enum; + tree enumtype = start_enum (loc, &the_enum, get_identifier (name)); + + tree value_chain = NULL_TREE; + string_int_pair *value; + unsigned int i; + FOR_EACH_VEC_ELT (values, i, value) +{ + tree decl = build_enumerator (loc, loc, &the_enum, + get_identifier (value->first), + build_int_cst (integer_type_node, + value->second)); + TREE_CHAIN (decl) = value_chain; + value_chain = decl; +} + + finish_enum (enumtype, nreverse (value_chain), NULL_TREE); + + input_location = saved_loc; + return enumtype; +} /* Create the FUNCTION_DECL for a function definition. DECLSPECS, DECLARATOR and ATTRIBUTES are the parts of Index: gcc/c/c-objc-common.h ===
Add a simulate_builin_function_decl langhook
Although it's possible to define the SVE intrinsics in a normal header file, it's much more convenient to define them directly in the compiler. This also speeds up compilation and gives better error messages. The idea is therefore for arm_sve.h (the main intrinsics header file) to have the pragma: #pragma GCC aarch64 "arm_sve.h" telling GCC to define (almost) everything arm_sve.h needs to define. The target then needs a way of injecting new built-in function declarations during compilation. The main hook for defining built-in functions is add_builtin_function. This is designed for use at start-up, and so has various features that are correct in that context but not for the pragma above: (1) the location is always BUILTINS_LOCATION, whereas for arm_sve.h it ought to be the location of the pragma. (2) the function is only immediately visible if it's in the implementation namespace, whereas the pragma is deliberately injecting functions into the general namespace. (3) there's no attempt to emulate a normal function declaration in C or C++, whereas functions declared by the pragma should be checked in the same way as an open-coded declaration would be. E.g. we should get an error if there was a previous incompatible declaration. (4) in C++, the function is treated as extern "C" and so can't be overloaded, whereas SVE intrinsics do use function overloading. This patch therefore adds a hook that targets can use to inject the equivalent of a source-level function declaration, but bound to a BUILT_IN_MD function. The main SVE intrinsic patch has tests to make sure that we report an error for conflicting definitions that appear either before or after including arm_sve.h. Tested on aarch64-linux-gnu and x86_64-linux-gnu. OK to install? Richard 2019-09-26 Richard Sandiford gcc/ * langhooks.h (lang_hooks::simulate_builtin_function_decl): New hook. (simulate_builtin_function_decl): Declare. * langhooks-def.h (LANG_HOOKS_SIMULATE_BUILTIN_FUNCTION_DECL): Define. (LANG_HOOKS_INITIALIZER): Include it. * langhooks.c (add_builtin_function_common): Rename to... (build_builtin_function): ...this. Add a location parameter and use it instead of BUILTINS_LOCATION. Remove the hook parameter and return the decl instead. (add_builtin_function): Update accordingly, passing the returned decl to the lang hook. (add_builtin_function_ext_scope): Likewise (simulate_builtin_function_decl): New function. gcc/c/ * c-tree.h (c_simulate_builtin_function_decl): Declare. * c-decl.c (c_simulate_builtin_function_decl): New function. * c-objc-common.h (LANG_HOOKS_SIMULATE_BUILTIN_FUNCTION_DECL): Define to the above. gcc/cp/ * cp-tree.h (cxx_simulate_builtin_function_decl): Declare. * decl.c (builtin_function_1): Add an is_simulated_source_decl parameter. When true, treat the function as a C++ function rather than as extern "C", and don't treat it as anticipated. (cxx_simulate_builtin_function_decl): New function. * cp-objcp-common.h (LANG_HOOKS_SIMULATE_BUILTIN_FUNCTION_DECL): Define to the above. Index: gcc/langhooks.h === --- gcc/langhooks.h 2019-03-08 18:15:33.660751905 + +++ gcc/langhooks.h 2019-09-26 13:02:42.543520465 +0100 @@ -494,6 +494,15 @@ struct lang_hooks backend must add all of the builtins at program initialization time. */ tree (*builtin_function_ext_scope) (tree decl); + /* Do language-specific processing for target-specific built-in + function DECL, so that it is defined in the global scope (only) + and is available without needing to be explicitly declared. + + This is intended for targets that want to inject declarations of + built-in functions into the source language (such as in response + to a pragma) rather than providing them in the source language itself. */ + tree (*simulate_builtin_function_decl) (tree decl); + /* Used to set up the tree_contains_structure array for a frontend. */ void (*init_ts) (void); @@ -562,6 +571,8 @@ extern tree add_builtin_function_ext_sco enum built_in_class cl, const char *library_name, tree attrs); +extern tree simulate_builtin_function_decl (location_t, const char *, tree, + int, const char *, tree); extern tree add_builtin_type (const char *name, tree type); /* Language helper functions. */ Index: gcc/langhooks-def.h === --- gcc/langhooks-def.h 2019-03-08 18:15:39.328730361 + +++ gcc/langhooks-def.h 2019-09-26 13:02:42.543520465 +0100 @@ -122,6 +122,7 @@ #define LANG_H
Re: [libcpp] Issue a pedantic warning for UCNs outside UCS codespace
> For C, I think such UCNs violate the Semantics but not the Constraints on > UCNs, so no diagnostic is actually required in C, although it is permitted > as a pedwarn / error. > > However, while C++ doesn't have that Semantics / Constraints division, > it's also the case that before C++2a, C++ only has a dated normative > reference to ISO/IEC 10646-1:1993 (C++2a adds an undated reference and > says the dated one is only for deprecated features, as well as explicitly > making such UCNs outside the ISO 10646 code point range ill-formed). So I > think that for C++, this is only correct as an error / pedwarn in the > C++2a case. OK, thanks for the exegesis. ;-) Revision version attached. 2019-09-26 Eric Botcazou libcpp/ * charset.c (UCS_LIMIT): New macro. (ucn_valid_in_identifier): Use it instead of a hardcoded constant. (_cpp_valid_ucn): Issue a pedantic warning for UCNs larger than UCS_LIMIT outside of identifiers in C and in C++2a. 2019-09-26 Eric Botcazou gcc/testsuite/ * gcc.dg/cpp/ucs.c: Add test for new warning and adjust. * gcc.dg/cpp/utf8-5byte-1.c: Add -w to the options. * gcc.dg/attr-alias-5.c: Likewise. * g++.dg/cpp/ucn-1.C: Add test for new warning. * g++.dg/cpp2a/ucn1.C: New test. -- Eric BotcazouIndex: libcpp/charset.c === --- libcpp/charset.c (revision 275988) +++ libcpp/charset.c (working copy) @@ -901,6 +901,9 @@ struct ucnrange { }; #include "ucnid.h" +/* ISO 10646 defines the UCS codespace as the range 0-0x10 inclusive. */ +#define UCS_LIMIT 0x10 + /* Returns 1 if C is valid in an identifier, 2 if C is valid except at the start of an identifier, and 0 if C is not valid in an identifier. We assume C has already gone through the checks of @@ -915,7 +918,7 @@ ucn_valid_in_identifier (cpp_reader *pfi int mn, mx, md; unsigned short valid_flags, invalid_start_flags; - if (c > 0x10) + if (c > UCS_LIMIT) return 0; mn = 0; @@ -1016,6 +1019,10 @@ ucn_valid_in_identifier (cpp_reader *pfi whose short identifier is less than 00A0 other than 0024 ($), 0040 (@), or 0060 (`), nor one in the range D800 through DFFF inclusive. + If the hexadecimal value is larger than the upper bound of the UCS + codespace specified in ISO/IEC 10646, a pedantic warning is issued + in all versions of C and in the C++2a version of C++. + *PSTR must be preceded by "\u" or "\U"; it is assumed that the buffer end is delimited by a non-hex digit. Returns false if the UCN has not been consumed, true otherwise. @@ -1135,6 +1142,13 @@ _cpp_valid_ucn (cpp_reader *pfile, const "universal character %.*s is not valid at the start of an identifier", (int) (str - base), base); } + else if (result > UCS_LIMIT + && (!CPP_OPTION (pfile, cplusplus) + || CPP_OPTION (pfile, lang) == CLK_CXX2A + || CPP_OPTION (pfile, lang) == CLK_GNUCXX2A)) +cpp_error (pfile, CPP_DL_PEDWARN, + "%.*s is outside the UCS codespace", + (int) (str - base), base); *cp = result; return true; Index: gcc/testsuite/g++.dg/cpp/ucn-1.C === --- gcc/testsuite/g++.dg/cpp/ucn-1.C (revision 275988) +++ gcc/testsuite/g++.dg/cpp/ucn-1.C (working copy) @@ -12,4 +12,6 @@ int main() int c\u0024c; // { dg-error "not valid in an identifier" "" { target { powerpc-ibm-aix* } } } U"\uD800"; // { dg-error "not a valid universal character" } + + U'\U0011'; // { dg-warning "outside" "11 outside UCS" { target c++2a } } } Index: gcc/testsuite/g++.dg/cpp2a/ucn1.C === --- gcc/testsuite/g++.dg/cpp2a/ucn1.C (nonexistent) +++ gcc/testsuite/g++.dg/cpp2a/ucn1.C (working copy) @@ -0,0 +1,7 @@ +// { dg-do compile } +// { dg-options "-std=c++2a" } + +int main() +{ + U'\U0011'; // { dg-warning "outside" "11 outside UCS" } +} Index: gcc/testsuite/gcc.dg/attr-alias-5.c === --- gcc/testsuite/gcc.dg/attr-alias-5.c (revision 275988) +++ gcc/testsuite/gcc.dg/attr-alias-5.c (working copy) @@ -1,7 +1,7 @@ /* Verify diagnostics for aliases to strings containing extended identifiers or bad characters. */ /* { dg-do compile } */ -/* { dg-options "-std=gnu99" } */ +/* { dg-options "-std=gnu99 -w" } */ /* { dg-require-alias "" } */ /* { dg-require-ascii-locale "" } */ /* { dg-skip-if "" { powerpc*-*-aix* } } */ Index: gcc/testsuite/gcc.dg/cpp/ucs.c === --- gcc/testsuite/gcc.dg/cpp/ucs.c (revision 275988) +++ gcc/testsuite/gcc.dg/cpp/ucs.c (working copy) @@ -39,7 +39,7 @@ #endif #if WCHAR_MAX >= 0x7ff -# if L'\U1234abcd' != 0x1234abcd +# if L'\U1234abcd' != 0x1234abcd /* { dg-warning "outside" "" } */ # error bad long ucs /* { dg-bogu
Re: Kyrylo Tkachov and Richard Sandiford appointed AArch64 maintainers.
Kyrill Tkachov writes: > On 9/26/19 8:02 AM, Ramana Radhakrishnan wrote: >> Hi, >> >> I'm pleased to announce that the GCC steering committee has appointed >> Kyrylo Tkachov and Richard Sandiford as AArch64 maintainers. >> >> Please join me in congratulating them both on their additional roles >> in the community. Kyrill / Richard, please update your listings in the >> MAINTAINERS file. >> > Thanks! +1 Committed as follows (keeping alphabetical ordering, in case the placement looks weird). Richard 2019-09-26 Richard Sandiford * MAINTAINERS: Add myself as an aarch64 maintainer. Index: MAINTAINERS === --- MAINTAINERS 2019-09-26 11:42:31.838492675 +0100 +++ MAINTAINERS 2019-09-26 11:54:27.037286003 +0100 @@ -45,9 +45,9 @@ docs, and the testsuite related to that. aarch64 port Richard Earnshaw aarch64 port James Greenhalgh +aarch64 port Richard Sandiford aarch64 port Marcus Shawcroft aarch64 port Kyrylo Tkachov -aarch64 SVE port Richard Sandiford alpha port Richard Henderson amdgcn portJulian Brown amdgcn portAndrew Stubbs
Re: [PATCH] driver: Also prune joined switches with negation
On 9/25/19 12:10 PM, Kyrill Tkachov wrote: On 9/24/19 7:47 PM, Matt Turner wrote: > When -march=native is passed to host_detect_local_cpu to the backend, > it overrides all command lines after it. That means > > $ gcc -march=native -march=armv8-a > > is treated as > > $ gcc -march=armv8-a -march=native > > Prune joined switches with Negative and RejectNegative to allow > -march=armv8-a to override previous -march=native on command-line. > > This is the same fix as was applied for i386 in SVN revision 269164 > but for > aarch64 and arm. > > gcc/ > > PR driver/69471 > * config/aarch64/aarch64.opt (march=): Add Negative(march=). > (mtune=): Add Negative(mtune=). (mcpu=): Add Negative(mcpu=). > * config/arm/arm.opt: Likewise. Thanks. This is ok for arm. LGTM for aarch64 but you'll need an aarch64 maintainer to approve. I've bootstrapped and tested this patch on arm-none-linux-gnueabihf and aarch64-none-linux-gnu and there's no fallout. I can commit it for you once the aarch64 part is I've now committed this to trunk with a slightly adjusted ChangeLog 2019-09-26 Matt Turner PR driver/69471 * config/aarch64/aarch64.opt (march=): Add Negative(march=). (mtune=): Add Negative(mtune=). (mcpu=): Add Negative(mcpu=). * config/arm/arm.opt: Likewise. as r276148. Backports will come a bit later after it's had some testing on trunk. I don't think this patch is above the threshold for a copyright assignment, but if you intend to write larger patches in the future you'll want to get that sorted out. Thanks! Kyrill Kyrill > --- > gcc/config/aarch64/aarch64.opt | 6 +++--- > gcc/config/arm/arm.opt | 6 +++--- > 2 files changed, 6 insertions(+), 6 deletions(-) > > diff --git a/gcc/config/aarch64/aarch64.opt > b/gcc/config/aarch64/aarch64.opt > index 865b6a6d8ca..fc43428b32a 100644 > --- a/gcc/config/aarch64/aarch64.opt > +++ b/gcc/config/aarch64/aarch64.opt > @@ -119,15 +119,15 @@ EnumValue > Enum(aarch64_tls_size) String(48) Value(48) > > march= > -Target RejectNegative ToLower Joined Var(aarch64_arch_string) > +Target RejectNegative Negative(march=) ToLower Joined > Var(aarch64_arch_string) > Use features of architecture ARCH. > > mcpu= > -Target RejectNegative ToLower Joined Var(aarch64_cpu_string) > +Target RejectNegative Negative(mcpu=) ToLower Joined > Var(aarch64_cpu_string) > Use features of and optimize for CPU. > > mtune= > -Target RejectNegative ToLower Joined Var(aarch64_tune_string) > +Target RejectNegative Negative(mtune=) ToLower Joined > Var(aarch64_tune_string) > Optimize for CPU. > > mabi= > diff --git a/gcc/config/arm/arm.opt b/gcc/config/arm/arm.opt > index 452f0cf6d67..76c10ab62a2 100644 > --- a/gcc/config/arm/arm.opt > +++ b/gcc/config/arm/arm.opt > @@ -82,7 +82,7 @@ mapcs-stack-check > Target Report Mask(APCS_STACK) Undocumented > > march= > -Target RejectNegative ToLower Joined Var(arm_arch_string) > +Target RejectNegative Negative(march=) ToLower Joined > Var(arm_arch_string) > Specify the name of the target architecture. > > ; Other arm_arch values are loaded from arm-tables.opt > @@ -107,7 +107,7 @@ Target Report Mask(CALLER_INTERWORKING) > Thumb: Assume function pointers may go to non-Thumb aware code. > > mcpu= > -Target RejectNegative ToLower Joined Var(arm_cpu_string) > +Target RejectNegative Negative(mcpu=) ToLower Joined Var(arm_cpu_string) > Specify the name of the target CPU. > > mfloat-abi= > @@ -232,7 +232,7 @@ Target Report Mask(TPCS_LEAF_FRAME) > Thumb: Generate (leaf) stack frames even if not needed. > > mtune= > -Target RejectNegative ToLower Joined Var(arm_tune_string) > +Target RejectNegative Negative(mtune=) ToLower Joined > Var(arm_tune_string) > Tune code for the given processor. > > mprint-tune-info > -- > 2.21.0 >
Re: [PATCH] Retain TYPE_MODE more often for BIT_FIELD_REFs in get_inner_referece
On Thu, 26 Sep 2019, Eric Botcazou wrote: > > I see. So I misremember seeing aggregate typed BIT_FIELD_REFs > > (that was probably VIEW_CONVERTs then...). Still the GIMPLE verifier > > only has > > > > else if (!INTEGRAL_TYPE_P (TREE_TYPE (expr)) > >&& TYPE_MODE (TREE_TYPE (expr)) != BLKmode > >&& maybe_ne (GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE > > (expr))), > > size)) > > { > > error ("mode size of non-integral result does not " > > "match field size of %qs", > > code_name); > > return true; > > } > > > > it doesn't verify that besides integral typed expressions only > > vector typed expressions are allowed. > > I think that the !INTEGRAL_TYPE_P is simply the opposite of the first case: > > if (INTEGRAL_TYPE_P (TREE_TYPE (t)) > && maybe_ne (TYPE_PRECISION (TREE_TYPE (t)), size)) > { > error ("integral result type precision does not match " >"field size of BIT_FIELD_REF"); > return t; > } > > > Anyhow - the original patch succeeded bootstrapping and testing. > > The way I proposed it: > > > > /* For vector types, with the correct size of access, use the mode > > of > > inner type. */ > > if (((TREE_CODE (TREE_TYPE (TREE_OPERAND (exp, 0))) == VECTOR_TYPE > > && TREE_TYPE (exp) == TREE_TYPE (TREE_TYPE (TREE_OPERAND (exp, > > 0 > > > >|| !INTEGRAL_TYPE_P (TREE_TYPE (exp))) > > > > && tree_int_cst_equal (size_tree, TYPE_SIZE (TREE_TYPE (exp > > mode = TYPE_MODE (TREE_TYPE (exp)); > > > > matches in sofar that we only restrict integer types (not modes) and > > for integer types allow extracts from vectors (the preexisting > > check for a matching component type is a bit too strict I guess). > > IMO if the !VECTOR_TYPE case cannot be covered, changes in this delicate area > ought to be restricted to VECTOR_TYPE. OK, so I'm testing the following then, simply adding the VECTOR_TYPE_P case in addition to the existing one plus adjusting the comment accordingly. Bootstrap / regtest running on x86_64-unknown-linux-gnu, OK if that passes? Thanks, Richard. 2019-09-26 Richard Biener PR middle-end/91897 * expr.c (get_inner_reference): For BIT_FIELD_REF with vector type retain the original mode. * gcc.target/i386/pr91897.c: New testcase. Index: gcc/testsuite/gcc.target/i386/pr91897.c === --- gcc/testsuite/gcc.target/i386/pr91897.c (nonexistent) +++ gcc/testsuite/gcc.target/i386/pr91897.c (working copy) @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx" } */ + +typedef double Double16 __attribute__((vector_size(8*16))); + +void mult(Double16 *res, const Double16 *v1, const Double16 *v2) +{ + *res = *v1 * *v2; +} + +/* We want 4 ymm loads and 4 ymm stores. */ +/* { dg-final { scan-assembler-times "movapd" 8 } } */ Index: gcc/expr.c === --- gcc/expr.c (revision 276147) +++ gcc/expr.c (working copy) @@ -7230,12 +7230,13 @@ get_inner_reference (tree exp, poly_int6 *punsignedp = (! INTEGRAL_TYPE_P (TREE_TYPE (exp)) || TYPE_UNSIGNED (TREE_TYPE (exp))); - /* For vector types, with the correct size of access, use the mode of -inner type. */ - if (TREE_CODE (TREE_TYPE (TREE_OPERAND (exp, 0))) == VECTOR_TYPE - && TREE_TYPE (exp) == TREE_TYPE (TREE_TYPE (TREE_OPERAND (exp, 0))) - && tree_int_cst_equal (size_tree, TYPE_SIZE (TREE_TYPE (exp -mode = TYPE_MODE (TREE_TYPE (exp)); + /* For vector element types with the correct size of access or for + vector typed accesses use the mode of the access type. */ + if ((TREE_CODE (TREE_TYPE (TREE_OPERAND (exp, 0))) == VECTOR_TYPE + && TREE_TYPE (exp) == TREE_TYPE (TREE_TYPE (TREE_OPERAND (exp, 0))) + && tree_int_cst_equal (size_tree, TYPE_SIZE (TREE_TYPE (exp + || VECTOR_TYPE_P (TREE_TYPE (exp))) + mode = TYPE_MODE (TREE_TYPE (exp)); } else {
Re: Make assemble_real generate canonical CONST_INTs
Christophe Lyon writes: > On Wed, 18 Sep 2019 at 11:41, Richard Sandiford > wrote: >> >> Richard Biener writes: >> > On Tue, Sep 17, 2019 at 4:33 PM Richard Sandiford >> > wrote: >> >> >> >> assemble_real used GEN_INT to create integers directly from the >> >> longs returned by real_to_target. assemble_integer then went on >> >> to interpret the const_ints as though they had the mode corresponding >> >> to the accompanying size parameter: >> >> >> >> imode = mode_for_size (size * BITS_PER_UNIT, mclass, 0).require (); >> >> >> >> for (i = 0; i < size; i += subsize) >> >> { >> >> rtx partial = simplify_subreg (omode, x, imode, i); >> >> >> >> But in the assemble_real case, X might not be canonical for IMODE. >> >> >> >> If the interface to assemble_integer is supposed to allow outputting >> >> (say) the low 4 bytes of a DImode integer, then the simplify_subreg >> >> above is wrong. But if the number of bytes passed to assemble_integer >> >> is supposed to be the number of bytes that the integer actually contains, >> >> assemble_real is wrong. >> >> >> >> This patch takes the latter interpretation and makes assemble_real >> >> generate const_ints that are canonical for the number of bytes passed. >> >> >> >> The flip_storage_order handling assumes that each long is a full >> >> SImode, which e.g. excludes BITS_PER_UNIT != 8 and float formats >> >> whose memory size is not a multiple of 32 bits (which includes >> >> HFmode at least). The patch therefore leaves that code alone. >> >> If interpreting each integer as SImode is correct, the const_ints >> >> that it generates are also correct. >> >> >> >> Tested on aarch64-linux-gnu and x86_64-linux-gnu. Also tested >> >> by making sure that there were no new errors from a range of >> >> cross-built targets. OK to install? >> >> >> >> Richard >> >> >> >> >> >> 2019-09-17 Richard Sandiford >> >> >> >> gcc/ >> >> * varasm.c (assemble_real): Generate canonical const_ints. >> >> >> >> Index: gcc/varasm.c >> >> === >> >> --- gcc/varasm.c2019-09-05 08:49:30.829739618 +0100 >> >> +++ gcc/varasm.c2019-09-17 15:30:10.400740515 +0100 >> >> @@ -2873,25 +2873,27 @@ assemble_real (REAL_VALUE_TYPE d, scalar >> >>real_to_target (data, &d, mode); >> >> >> >>/* Put out the first word with the specified alignment. */ >> >> + unsigned int chunk_nunits = MIN (nunits, units_per); >> >>if (reverse) >> >> elt = flip_storage_order (SImode, gen_int_mode (data[nelts - 1], >> >> SImode)); >> >>else >> >> -elt = GEN_INT (data[0]); >> >> - assemble_integer (elt, MIN (nunits, units_per), align, 1); >> >> - nunits -= units_per; >> >> +elt = GEN_INT (sext_hwi (data[0], chunk_nunits * BITS_PER_UNIT)); >> > >> > why the appearant difference between the storage-order flipping >> > variant using gen_int_mode vs. the GEN_INT with sext_hwi? >> > Can't we use gen_int_mode in the non-flipping path and be done with that? >> >> Yeah, I mentioned this in the covering note. The flip_storage_order >> stuff only seems to work for floats that are a multiple of 32 bits in >> size, so it doesn't e.g. handle HFmode or 80-bit floats, whereas the >> new "else" does. Hard-coding SImode also hard-codes BITS_PER_UNIT==8, >> unlike the "else". >> >> So if anything, it's flip_storage_order that might need to change >> to avoid hard-coding SImode. That doesn't look like a trivial change >> though. E.g. the number of bytes passed to assemble_integer would need >> to match the number of bytes in data[nelts - 1] rather than data[0]. >> The alignment code below would also need to be adjusted. Fixing that >> (if it is a bug) seems like a separate change and TBH I'd rather not >> touch it here. >> > > Hi Richard, > > I suspect you've probably noticed already, but in case you haven't: > this patch causes a regression on arm: > FAIL: gcc.target/arm/fp16-compile-alt-3.c scan-assembler \t.short\t49152 > FAIL: gcc.target/arm/fp16-compile-ieee-3.c scan-assembler \t.short\t49152 Hadn't noticed that actually (but should have) -- thanks for the heads up. I've applied the below as obvious after testing on armeb-eabi. Richard 2019-09-26 Richard Sandiford gcc/testsuite/ * gcc.target/arm/fp16-compile-alt-3.c: Expect (__fp16) -2.0 to be written as a negative short rather than a positive one. * gcc.target/arm/fp16-compile-ieee-3.c: Likewise. Index: gcc/testsuite/gcc.target/arm/fp16-compile-alt-3.c === --- gcc/testsuite/gcc.target/arm/fp16-compile-alt-3.c 2019-03-08 18:14:28.836998325 + +++ gcc/testsuite/gcc.target/arm/fp16-compile-alt-3.c 2019-09-26 11:42:47.502378676 +0100 @@ -7,4 +7,4 @@ __fp16 xx = -2.0; /* { dg-final { scan-assembler "\t.size\txx, 2" } } */ -/* { dg-final { scan-assembler "\t.short\t49152" } } */ +/* { dg-final { scan-assembler "\t.short\t-16384"
Re: [PATCH] Retain TYPE_MODE more often for BIT_FIELD_REFs in get_inner_referece
> I see. So I misremember seeing aggregate typed BIT_FIELD_REFs > (that was probably VIEW_CONVERTs then...). Still the GIMPLE verifier > only has > > else if (!INTEGRAL_TYPE_P (TREE_TYPE (expr)) >&& TYPE_MODE (TREE_TYPE (expr)) != BLKmode >&& maybe_ne (GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE > (expr))), > size)) > { > error ("mode size of non-integral result does not " > "match field size of %qs", > code_name); > return true; > } > > it doesn't verify that besides integral typed expressions only > vector typed expressions are allowed. I think that the !INTEGRAL_TYPE_P is simply the opposite of the first case: if (INTEGRAL_TYPE_P (TREE_TYPE (t)) && maybe_ne (TYPE_PRECISION (TREE_TYPE (t)), size)) { error ("integral result type precision does not match " "field size of BIT_FIELD_REF"); return t; } > Anyhow - the original patch succeeded bootstrapping and testing. > The way I proposed it: > > /* For vector types, with the correct size of access, use the mode > of > inner type. */ > if (((TREE_CODE (TREE_TYPE (TREE_OPERAND (exp, 0))) == VECTOR_TYPE > && TREE_TYPE (exp) == TREE_TYPE (TREE_TYPE (TREE_OPERAND (exp, > 0 > >|| !INTEGRAL_TYPE_P (TREE_TYPE (exp))) > > && tree_int_cst_equal (size_tree, TYPE_SIZE (TREE_TYPE (exp > mode = TYPE_MODE (TREE_TYPE (exp)); > > matches in sofar that we only restrict integer types (not modes) and > for integer types allow extracts from vectors (the preexisting > check for a matching component type is a bit too strict I guess). IMO if the !VECTOR_TYPE case cannot be covered, changes in this delicate area ought to be restricted to VECTOR_TYPE. -- Eric Botcazou
Re: Kyrylo Tkachov and Richard Sandiford appointed AArch64 maintainers.
On 9/26/19 8:02 AM, Ramana Radhakrishnan wrote: Hi, I'm pleased to announce that the GCC steering committee has appointed Kyrylo Tkachov and Richard Sandiford as AArch64 maintainers. Please join me in congratulating them both on their additional roles in the community. Kyrill / Richard, please update your listings in the MAINTAINERS file. Thanks! Committed the attached with r276142. Kyrill 2019-09-26 Kyrylo Tkachov * MAINTAINERS: Add myself as aarch64 maintainer. Thanks, Ramana diff --git a/MAINTAINERS b/MAINTAINERS index 948d56d8346ba2df42142955910d4e8a74f568e5..4bbedb4e5c06ac341abc0e2be3720376893a17f4 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -46,6 +46,7 @@ docs, and the testsuite related to that. aarch64 port Richard Earnshaw aarch64 port James Greenhalgh aarch64 port Marcus Shawcroft +aarch64 port Kyrylo Tkachov aarch64 SVE port Richard Sandiford alpha port Richard Henderson amdgcn port Julian Brown
Re: [PATCH][RFC] Add new ipa-reorder pass
On 9/25/19 6:36 PM, Evgeny Kudryashov wrote: > On 2019-09-19 11:33, Martin Liška wrote: >> Hi. >> >> Function reordering has been around for quite some time and a naive >> implementation was also part of my diploma thesis some time ago. >> Currently, the GCC can reorder function based on first execution, which >> happens with PGO and LTO of course. Known limitation is that the order >> is preserved only partially as various symbols go into different >> LTRANS partitions. >> >> There has been some research in the area and I would point out the >> Facebook paper >> ([1]) and Sony presentation ([2]). Based on that, I decided to make a >> new implementation >> in the GCC that does the same (in a proper way). First part of the >> enablement are patches >> to ld.bfd and ld.gold that come up with a new section .text.sorted, >> that is always sorted. >> >> Thoughts? I would definitely welcome any interesting measurement on a >> bigger load. >> >> Martin >> > > Hi, Martin! > > Some time ago I tried to do the same but didn't go that far. Hello. > > I also used the C3 algorithm, except for the fact that ipa_fn_summary > contains information about size and time (somehow missed it). Which is a key part of the algorithm as one wants not to cross a page size boundary (in ideal case). > The linker option --sort-section=name was used for prototyping. It, > obviously, sorts sections and allows to place functions in the desired order > (by placing them into named sections .text.sorted., without patching > linkers or adjusting linker scripts). Yes, that can work, but having the new section explicitly sorted will not require a passing of an option to linker. > For testing my implementation I used several benchmarks from SPEC2006: > perlbench, sjeng and gobmk. Unfortunately, no significant positive changes > were obtained. > > I've tested the proposed pass on perlbench with train and reference input > (PGO+LTO as a base) but couldn't obtain stable results (still unclear > environment or perlbench specific reasons). Yes, it's quite difficult to measure something. One needs a huge .text section of a binary and a test-case which has a flat profile. I was able to measure results on the gcc itself. Martin > > Evgeny. >
[PATCH, Fortran] Optionally suppress no-automatic overwrites recursive warning - for approval
Original thread starts here https://gcc.gnu.org/ml/gcc-patches/2019-09/msg01185.html OK to commit? gcc/fortran/ChangeLog Mark Eggleston * invoke.texi: Add -Wno-overwrite-recursive to list of options. Add description of -Wno-overwrite-recursive. Fix typo in description of -Winteger-division. * lang.opt: Add option -Woverwrite-recursive initialised as on. * option.c (gfc_post_options): Output warning only if it is enabled. gcc/testsuite/ChangeLog Mark Eggleston * gfortran.dg/no_overwrite_recursive_1.f90: New test. * gfortran.dg/no_overwrite_recursive_2.f90: New test. -- https://www.codethink.co.uk/privacy.html >From 30d7915ce44629edc85dba210cca9007d1c70d02 Mon Sep 17 00:00:00 2001 From: Mark Eggleston Date: Tue, 16 Apr 2019 09:09:12 +0100 Subject: [PATCH] Suppress warning with -Wno-overwrite-recursive The message "Warning: Flag '-fno-automatic' overwrites '-frecursive'" is output by default when -fno-automatic and -frecursive are used together. It warns that recursion may be broken, however if all the relavent variables in the recursive procedure have automatic attributes the warning is unnecessary so -Wno-overwrite-recursive can be used to suppress it. This will allow compilation when warnings are regarded as errors. Suppress warning with -Wno-overwrite-recursive --- gcc/fortran/invoke.texi | 20 +++- gcc/fortran/lang.opt | 4 gcc/fortran/options.c| 3 ++- .../gfortran.dg/no_overwrite_recursive_1.f90 | 11 +++ .../gfortran.dg/no_overwrite_recursive_2.f90 | 10 ++ 5 files changed, 42 insertions(+), 6 deletions(-) create mode 100644 gcc/testsuite/gfortran.dg/no_overwrite_recursive_1.f90 create mode 100644 gcc/testsuite/gfortran.dg/no_overwrite_recursive_2.f90 diff --git a/gcc/fortran/invoke.texi b/gcc/fortran/invoke.texi index b911ac90018..0177fb400a1 100644 --- a/gcc/fortran/invoke.texi +++ b/gcc/fortran/invoke.texi @@ -149,10 +149,11 @@ and warnings}. -Wc-binding-type -Wcharacter-truncation -Wconversion @gol -Wdo-subscript -Wfunction-elimination -Wimplicit-interface @gol -Wimplicit-procedure -Wintrinsic-shadow -Wuse-without-only @gol --Wintrinsics-std -Wline-truncation -Wno-align-commons -Wno-tabs @gol --Wreal-q-constant -Wsurprising -Wunderflow -Wunused-parameter @gol --Wrealloc-lhs -Wrealloc-lhs-all -Wfrontend-loop-interchange @gol --Wtarget-lifetime -fmax-errors=@var{n} -fsyntax-only -pedantic @gol +-Wintrinsics-std -Wline-truncation -Wno-align-commons @gol +-Wno-overwrite-recursive -Wno-tabs -Wreal-q-constant -Wsurprising @gol +-Wunderflow -Wunused-parameter -Wrealloc-lhs -Wrealloc-lhs-all @gol +-Wfrontend-loop-interchange -Wtarget-lifetime -fmax-errors=@var{n} @gol +-fsyntax-only -pedantic @gol -pedantic-errors @gol } @@ -995,7 +996,7 @@ nor has been declared as @code{EXTERNAL}. @opindex @code{Winteger-division} @cindex warnings, integer division @cindex warnings, division of integers -Warn if a constant integer division truncates it result. +Warn if a constant integer division truncates its result. As an example, 3/5 evaluates to 0. @item -Wintrinsics-std @@ -1008,6 +1009,15 @@ it as @code{EXTERNAL} procedure because of this. @option{-fall-intrinsics} can be used to never trigger this behavior and always link to the intrinsic regardless of the selected standard. +@item -Wno-overwrite-recursive +@opindex @code{Woverwrite-recursive} +@cindex warnings, overwrite recursive +Do not warn when @option{-fno-automatic} is used with @option{-frecursive}. Recursion +will be broken if the relevant local variables do not have the attribute +@code{AUTOMATIC} explicitly declared. This option can be used to suppress the warning +when it is known that recursion is not broken. Useful for build environments that use +@option{-Werror}. + @item -Wreal-q-constant @opindex @code{Wreal-q-constant} @cindex warnings, @code{q} exponent-letter diff --git a/gcc/fortran/lang.opt b/gcc/fortran/lang.opt index b8bf7fe7508..1b296c92d19 100644 --- a/gcc/fortran/lang.opt +++ b/gcc/fortran/lang.opt @@ -293,6 +293,10 @@ Wopenmp-simd Fortran ; Documented in C +Woverwrite-recursive +Fortran Warning Var(warn_overwrite_recursive) Init(1) +Warn that -fno-automatic may break recursion. + Wpedantic Fortran ; Documented in common.opt diff --git a/gcc/fortran/options.c b/gcc/fortran/options.c index a0f95fa8832..47042a25eab 100644 --- a/gcc/fortran/options.c +++ b/gcc/fortran/options.c @@ -419,7 +419,8 @@ gfc_post_options (const char **pfilename) gfc_warning_now (0, "Flag %<-fno-automatic%> overwrites %<-fmax-stack-var-size=%d%>", flag_max_stack_var_size); else if (!flag_automatic && flag_recursive) -gfc_warning_now (0, "Flag %<-fno-automatic%> overwrites %<-frecursive%>"); +gfc_warning_now (OPT_Woverwrite_recursive, "Flag %<-fno-automatic%> " + "overwrites %<-frecursive%>"); else if (
Re: [PATCH v4] Missed function specialization + partial devirtualization
On 9/26/19 7:23 AM, luoxhu wrote: > Thanks Martin, > > > On 2019/9/25 18:57, Martin Liška wrote: >> On 9/25/19 5:45 AM, luoxhu wrote: >>> Hi, >>> >>> Sorry for replying so late due to cauldron conference and other LTO issues >>> I was working on. >> >> Hello. >> >> That's fine, we still have plenty of time for patch review. >> >> Not fixed issues which I reported in v3 (and still valid in v4): >> - please come up with indirect_target_info::indirect_target_info and use it > Sorry for miss out. Hello. Sure, please use a contructor initialization (see my patch). > > >> - do you need to stream out indirect_call_targets when common_target_id == 0? > > No need to stream out items with common_target_id == 0, removed the if > condition in lto-cgraph.c. Fine. Do we have a guarantee that item->common_target_id is always != 0? Please put there an assert. > >> >> Then I'm suggesting to use vec::is_empty (please see my patch). > OK. But has_multiple_indirect_call_p should return different than > has_indirect_call_p as it checks more that one targets? Sure, that was mistake in my patch from previous reply. > > gcc/cgraph.c > /* Return true if this edge has multiple indirect call targets. */ > bool > cgraph_edge::has_multiple_indirect_call_p (void) > { > - return indirect_info && indirect_info->indirect_call_targets > - && indirect_info->indirect_call_targets->length () > 1; > + return (indirect_info && indirect_info->indirect_call_targets > + && indirect_info->indirect_call_targets->length () > 1); > } > > /* Return true if this edge has at least one indirect call target. */ > bool > cgraph_edge::has_indirect_call_p (void) > { > - return indirect_info && indirect_info->indirect_call_targets > - && indirect_info->indirect_call_targets->length (); > + return (indirect_info && indirect_info->indirect_call_targets > + && !indirect_info->indirect_call_targets->is_empty ()); > } > >> >> I see following failures for the tests provided: >> FAIL: gcc.dg/tree-prof/crossmodule-indir-call-topn-1.c compilation, >> -fprofile-generate -D_PROFILE_GENERATE >> FAIL: gcc.dg/tree-prof/crossmodule-indir-call-topn-2.c compilation, >> -fprofile-generate -D_PROFILE_GENERATE >> FAIL: gcc.dg/tree-prof/indir-call-prof-topn.c compilation, >> -fprofile-generate -D_PROFILE_GENERATE > > Sorry that I forgot to remove the deprecated build option in the 3 cases > (also updated the scan exp check): > -/* { dg-options "-O2 -flto -DDOJOB=1 -fdump-ipa-profile_estimate --param > indir-call-topn-profile=1" } */ > +/* { dg-options "-O2 -flto -DDOJOB=1 -fdump-ipa-profile_estimate" } */ > > > The new patch is attached. Thanks. Hm, looking at the gimple_ic_transform function. I think the function should always return false as it never does a GIMPLE transformation. Apart from that, I'm fine with the patch. Note that I'm not the maintainer, but I bet we simplified the patch review to Honza significantly. Last missing piece is probably the update ChangeLog. Thank you for working on that, Martin > > > Xiong Hu > >> >> Next comments follow directly in the email body: >> >>> >>> v4 Changes: >>> 1. Rebase to trunk. >>> 2. Remove num_of_ics and use vector's length to avoid redundancy. >>> 3. Update the code in ipa-profile.c to improve review feasibility. >>> 4. Add function has_indirect_call_p and has_multiple_indirect_call_p. >>> 5. For parameter control, I will leave it to next patch as it is a >>> relative independent function. Currently, maximum number of >>> promotions is GCOV_TOPN_VALUES as only 4 profiling value limited >>> from profile-generate, therefore minimum probability is adjusted to >>> 25% in value-prof.c, it was 75% also by hard code for single >>> indirect target. No control to minimal number of edge >>> executions yet. What's more, this patch is a bit large now. >>> >>> This patch aims to fix PR69678 caused by PGO indirect call profiling >>> performance issues. >>> The bug that profiling data is never working was fixed by Martin's pull >>> back of topN patches, performance got GEOMEAN ~1% improvement(+24% for >>> 511.povray_r specifically). >>> Still, currently the default profile only generates SINGLE indirect target >>> that called more than 75%. This patch leverages MULTIPLE indirect >>> targets use in LTO-WPA and LTO-LTRANS stage, as a result, function >>> specialization, profiling, partial devirtualization, inlining and >>> cloning could be done successfully based on it. >>> Performance can get improved from 0.70 sec to 0.38 sec on simple tests. >>> Details are: >>> 1. PGO with topn is enabled by default now, but only one indirect >>> target edge will be generated in ipa-profile pass, so add variables to >>> enable >>> multiple speculative edges through passes, speculative_id will record the >>> direct edge index bind to the indirect edge, indirect_call_targets length >>> records how many direct edges o
Re: [PATCH][AArch64] Don't split 64-bit constant stores to volatile location
On 9/25/19 10:24 PM, James Greenhalgh wrote: On Tue, Sep 24, 2019 at 02:40:20PM +0100, Kyrill Tkachov wrote: Hi all, On 8/22/19 10:16 AM, Kyrill Tkachov wrote: Hi all, The optimisation to optimise: typedef unsigned long long u64; void bar(u64 *x) { *x = 0xabcdef10abcdef10; } from: mov x1, 61200 movk x1, 0xabcd, lsl 16 movk x1, 0xef10, lsl 32 movk x1, 0xabcd, lsl 48 str x1, [x0] into: mov w1, 61200 movk w1, 0xabcd, lsl 16 stp w1, w1, [x0] ends up producing two distinct stores if the destination is volatile: void bar(u64 *x) { *(volatile u64 *)x = 0xabcdef10abcdef10; } mov w1, 61200 movk w1, 0xabcd, lsl 16 str w1, [x0] str w1, [x0, 4] because we end up not merging the strs into an stp. It's questionable whether the use of STP is valid for volatile in the first place. To avoid unnecessary pain in a context where it's unlikely to be performance critical [1] (use of volatile), this patch avoids this transformation for volatile destinations, so we produce the original single STR-X. Bootstrapped and tested on aarch64-none-linux-gnu. Ok for trunk (and eventual backports)? This has been approved by James offline. Committed to trunk with r276098. Does this need backporting? Yeah, this has been in since 2016. I've tested it on GCC 8 and 9 and will backport there and to 7 in due time. Kyrill Thanks, James Thanks, Kyrill Thanks, Kyrill [1] https://lore.kernel.org/lkml/20190821103200.kpufwtviqhpbuv2n@willie-the-truck/ gcc/ 2019-08-22 Kyrylo Tkachov * config/aarch64/aarch64.md (mov): Don't call aarch64_split_dimode_const_store on volatile MEM. gcc/testsuite/ 2019-08-22 Kyrylo Tkachov * gcc.target/aarch64/nosplit-di-const-volatile_1.c: New test.
Re: [PATCH] DWARF array bounds missing from C++ array definitions
On Thu, Sep 26, 2019 at 4:05 AM Alexandre Oliva wrote: > > On Sep 13, 2019, Richard Biener wrote: > > > On Fri, Sep 13, 2019 at 1:32 AM Alexandre Oliva wrote: > >> On Sep 12, 2019, Richard Biener wrote: > > >> > So - maybe we can have the patch a bit cleaner by adding > >> > a flag to add_type_attribute saying we only want it if it's > >> > different from that already present (on the specification DIE)? > > >> That's exactly what I meant completing_type_p to check. Do you mean it > >> should be stricter, do it more cheaply, or what? > > > I meant to do it more cheaply > > How? If it's the same type, lookup will find the DIE and be done with > it. If it's not, we'll want to build a new DIE anyway. Where > computation is there to avoid that is not already avoided? > > > also the name completing_type_p is > > misleading IMHO since it simply checks (re-)creating the type DIE > > will yield a different one > > True. The expectation is that the type of a decl will not transition > from complete to incomplete. different_type_p would be more accurate > for unexpected frontends that behaved so weirdly, but not quite as > intuitive as to the intent. I still like completing_type_p better, but > if you insist, I'll change it. Bonus points for a better name ;-) Heh, I don't have one - which usually makes me simply inline the beast into the single caller :P Maybe simply have_new_type_for_decl_with_old_die_p? Or new_type_for_die_p? > > sure how that would look like (create a new variable DIE and > > splice out "common" parts into an abstract DIE used by > > both the old and the new DIE?) > > In my exhaustive verification of all hits of completing_type_p in an > all-languages bootstrap, we had either a new DIE for the outermost array > type, now with bounds, sharing the remaining array dimensions; an > alternate symbolic name ultimately referring to the same type > definition; or an extra const-qualifying DIE for a const array type > whose base type is already const-qualified. None of these seemed > excessive to me, though the last one might be desirable and not too hard > to avoid. > > I haven't seen any case of transitioning to less-defined types. Yeah, that would be surprising indeed. The patch is OK with using new_type_for_die_p. Thanks, Richard. > -- > Alexandre Oliva, freedom fighter he/him https://FSFLA.org/blogs/lxo > Be the change, be Free!FSF VP & FSF Latin America board member > GNU Toolchain EngineerFree Software Evangelist > Hay que enGNUrecerse, pero sin perder la terGNUra jamás - Che GNUevara
Re: [PATCH] PR tree-optimization/90836 Missing popcount pattern matching
On Tue, Sep 24, 2019 at 5:29 PM Dmitrij Pochepko wrote: > > Hi, > > can anybody take a look at v2? +(if (tree_to_uhwi (@4) == 1 + && tree_to_uhwi (@10) == 2 && tree_to_uhwi (@5) == 4 those will still ICE for large __int128_t constants. Since you do not match any conversions you should probably restrict the precision of 'type' like with (if (TYPE_PRECISION (type) <= 64 && tree_to_uhwi (@4) ... likewise tree_to_uhwi will fail for negative constants thus if the pattern assumes unsigned you should verify that as well with && TYPE_UNSIGNED (type). Your 'argtype' is simply 'type' so you can elide it. + (switch + (if (types_match (argtype, long_long_unsigned_type_node)) + (convert (BUILT_IN_POPCOUNTLL:integer_type_node @0))) + (if (types_match (argtype, long_unsigned_type_node)) + (convert (BUILT_IN_POPCOUNTL:integer_type_node @0))) + (if (types_match (argtype, unsigned_type_node)) + (convert (BUILT_IN_POPCOUNT:integer_type_node @0))) Please test small types first so we can avoid popcountll when long == long long or long == int. I also wonder if we really want to use the builtins and check optab availability or if we nowadays should use direct_internal_fn_supported_p (IFN_POPCOUNT, integer_type_node, type, OPTIMIZE_FOR_BOTH) and (convert (IFN_POPCOUNT:type @0)) without the switch? Thanks, Richard. > Thanks, > Dmitrij > > On Mon, Sep 09, 2019 at 10:03:40PM +0300, Dmitrij Pochepko wrote: > > Hi all. > > > > Please take a look at v2 (attached). > > I changed patch according to review comments. The same testing was > > performed again. > > > > Thanks, > > Dmitrij > > > > On Thu, Sep 05, 2019 at 06:34:49PM +0300, Dmitrij Pochepko wrote: > > > This patch adds matching for Hamming weight (popcount) implementation. > > > The following sources: > > > > > > int > > > foo64 (unsigned long long a) > > > { > > > unsigned long long b = a; > > > b -= ((b>>1) & 0xULL); > > > b = ((b>>2) & 0xULL) + (b & 0xULL); > > > b = ((b>>4) + b) & 0x0F0F0F0F0F0F0F0FULL; > > > b *= 0x0101010101010101ULL; > > > return (int)(b >> 56); > > > } > > > > > > and > > > > > > int > > > foo32 (unsigned int a) > > > { > > > unsigned long b = a; > > > b -= ((b>>1) & 0xUL); > > > b = ((b>>2) & 0xUL) + (b & 0xUL); > > > b = ((b>>4) + b) & 0x0F0F0F0FUL; > > > b *= 0x01010101UL; > > > return (int)(b >> 24); > > > } > > > > > > and equivalents are now recognized as popcount for platforms with hw > > > popcount support. Bootstrapped and tested on x86_64-pc-linux-gnu and > > > aarch64-linux-gnu systems with no regressions. > > > > > > (I have no write access to repo) > > > > > > Thanks, > > > Dmitrij > > > > > > > > > gcc/ChangeLog: > > > > > > PR tree-optimization/90836 > > > > > > * gcc/match.pd (popcount): New pattern. > > > > > > gcc/testsuite/ChangeLog: > > > > > > PR tree-optimization/90836 > > > > > > * lib/target-supports.exp (check_effective_target_popcount) > > > (check_effective_target_popcountll): New effective targets. > > > * gcc.dg/tree-ssa/popcount4.c: New test. > > > * gcc.dg/tree-ssa/popcount4l.c: New test. > > > * gcc.dg/tree-ssa/popcount4ll.c: New test. > > > > > diff --git a/gcc/match.pd b/gcc/match.pd > > > index 0317bc7..b1867bf 100644 > > > --- a/gcc/match.pd > > > +++ b/gcc/match.pd > > > @@ -5358,6 +5358,70 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) > > >(cmp (popcount @0) integer_zerop) > > >(rep @0 { build_zero_cst (TREE_TYPE (@0)); } > > > > > > +/* 64- and 32-bits branchless implementations of popcount are detected: > > > + > > > + int popcount64c (uint64_t x) > > > + { > > > + x -= (x >> 1) & 0xULL; > > > + x = (x & 0xULL) + ((x >> 2) & > > > 0xULL); > > > + x = (x + (x >> 4)) & 0x0f0f0f0f0f0f0f0fULL; > > > + return (x * 0x0101010101010101ULL) >> 56; > > > + } > > > + > > > + int popcount32c (uint32_t x) > > > + { > > > + x -= (x >> 1) & 0x; > > > + x = (x & 0x) + ((x >> 2) & 0x); > > > + x = (x + (x >> 4)) & 0x0f0f0f0f; > > > + return (x * 0x01010101) >> 24; > > > + } */ > > > +(simplify > > > + (convert > > > +(rshift > > > + (mult > > > + (bit_and:c > > > + (plus:c > > > + (rshift @8 INTEGER_CST@5) > > > + (plus:c@8 > > > + (bit_and @6 INTEGER_CST@7) > > > + (bit_and > > > + (rshift > > > + (minus@6 > > > + @0 > > > + (bit_and > > > + (rshift @0 INTEGER_CST@4) > > > + INTEGER_CST@11)) > > > + INTEGER_CST@10) > > > + INTEGER_CST@9))) > > > + INTEGER_CST@3) > > > + INTEGER_CST@2) > > > + INTEGER_CST@1)) > > > + /* Check constants and optab. */ > > > + (with > > > +
Re: [PATCH 2/2] libada: Respect `--enable-version-specific-runtime-libs'
> Respect the `--enable-version-specific-runtime-libs' configuration > option in libada/, so that shared gnatlib libraries will be installed > in non-version-specific $(toolexeclibdir) if requested. In a > cross-compilation environment this helps setting up a consistent > sysroot, which can then be shared between the host and the target > system. > > Update the settings of $(toolexecdir) and $(toolexeclibdir), unused till > now, to keep the current arrangement in the version-specific case and > make the new option to be enabled by default, unlike with the other > target libraries, so as to keep existing people's build infrastructure > unaffected. Can you clarify what will be the value of ADA_RTL_OBJ_DIR and ADA_RTL_DSO_DIR in the following cases: - no version-specific-runtime-libs configure switch at all (default) - use of --enable-version-specific-runtime-libs - use of --disable-version-specific-runtime-libs Arno
Re: [PATCH 1/2] libada: Remove racy duplicate gnatlib installation
Unfortunately the Make-lang.in part is still needed when using the --disable-libada configure switch so a more elaborate change is needed. > Remove the extraneous `install-gnatlib' invocation from within gcc/ then > as all the gnatlib handling ought to be done in libada/ nowadays. > > gcc/ada/ > * gcc-interface/Make-lang.in (ada.install-common): Remove > `install-gnatlib' invocation.
Re: [PATCH] Add TODO_update_ssa for SLP BB vectorization (PR tree-optimization/91885).
On 9/26/19 9:32 AM, Richard Biener wrote: > On Wed, Sep 25, 2019 at 12:06 PM Martin Liška wrote: >> >> Hi. >> >> Similarly to SLP pass, we should probably set TODO_update_ssa >> when a SLP BB vectorization happens from the normal vect pass. >> >> Patch can bootstrap on x86_64-linux-gnu and survives regression tests. >> >> Ready to be installed? > > Hmm, this was supposed to be handled by > > if (num_vectorized_loops > 0) > { > /* If we vectorized any loop only virtual SSA form needs to be updated. > ??? Also while we try hard to update loop-closed SSA form we fail > to properly do this in some corner-cases (see PR56286). */ > rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa_only_virtuals); > return TODO_cleanup_cfg; > } > > but there isn't an equivalent of num_vectorized_bbs here. Given the above > effectively short-cuts your patch (all paths only ever set ret = > TODO_cleanup_cfg > right now...) it will work without pessimizing things by running > update-ssa twice. > > Can you check whether TODO_update_ssa_only_virtuals is enough as a fix? Yes, it's enough. > > Otherwise OK. I'm going to install the patch then. Martin > > Thanks, > Richard. > >> Thanks, >> Martin >> >> gcc/ChangeLog: >> >> 2019-09-25 Martin Liska >> >> PR tree-optimization/91885 >> * tree-vectorizer.c (try_vectorize_loop_1): >> Add TODO_update_ssa similarly to what slp >> pass does. >> >> gcc/testsuite/ChangeLog: >> >> 2019-09-25 Martin Liska >> >> PR tree-optimization/91885 >> * gcc.dg/pr91885.c: New test. >> --- >> gcc/testsuite/gcc.dg/pr91885.c | 47 ++ >> gcc/tree-vectorizer.c | 2 +- >> 2 files changed, 48 insertions(+), 1 deletion(-) >> create mode 100644 gcc/testsuite/gcc.dg/pr91885.c >> >>
Re: [PATCH] Fix continue condition in IPA-SRA's process_scan_results
On Wed, Sep 25, 2019 at 4:18 PM Martin Jambor wrote: > > Hi, > > On Tue, Sep 24 2019, Martin Jambor wrote: > > > > > > It is the correct thing to do, sorry for the breakage. I have to run > > now but will prepare a patch tomorrow. > > > > and here it is. The patch fixes the thinko explained in my email > yesterday - basically the test for locally_unused was intended for > unused aggregates which have however not been marked as such yet and > going this way for unsplitable but unused register-type parameters may > cause problems in some cases, if they are for example big SVE vectors. > > Passed bootstrap and testing on x86_64-linux. OK for trunk? OK. Richard. > Thanks, > > Martin > > > 2019-09-25 Martin Jambor > > * ipa-sra.c (process_scan_results): Fix continue condition. > --- > gcc/ipa-sra.c | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/gcc/ipa-sra.c b/gcc/ipa-sra.c > index 0ccebbd4607..b35fff69472 100644 > --- a/gcc/ipa-sra.c > +++ b/gcc/ipa-sra.c > @@ -2239,7 +2239,7 @@ process_scan_results (cgraph_node *node, struct > function *fun, > desc_index++, parm = DECL_CHAIN (parm)) > { >gensum_param_desc *desc = &(*param_descriptions)[desc_index]; > - if (!desc->locally_unused && !desc->split_candidate) > + if (!desc->split_candidate) > continue; > >if (flag_checking) > -- > 2.23.0 >
Re: [PATCH] Fix quoting in a call to internal_error
On Wed, Sep 25, 2019 at 5:29 PM Martin Jambor wrote: > > Hi, > > it was brought to my attention that my call to internal_error in the new > IPA-SRA makes -Wformat-diag complain because it thinks that everything > with an underscore is an identifier or a keyword and should be quoted. > Well, the string should not contain "IPA_SRA" but "IPA-SRA" in the first > place so this patch corrects that and hopefully the problem should go > away. While at it I noticed that the %s in the same string should > actually probably be quoted, so I'm replacing it with %qs too. > > Bootstrapped and tested on x86_64-linux. OK for trunk? OK. Richard. > Thanks, > > Martin > > > > 2019-09-25 Martin Jambor > > * ipa-sra.c (verify_splitting_accesses): Fix quoting in a call to > internal_error. > --- > gcc/ipa-sra.c | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/gcc/ipa-sra.c b/gcc/ipa-sra.c > index b35fff69472..50dee69e3db 100644 > --- a/gcc/ipa-sra.c > +++ b/gcc/ipa-sra.c > @@ -2452,7 +2452,7 @@ verify_splitting_accesses (cgraph_node *node, bool > certain_must_exist) > >bool certain_access_present = !certain_must_exist; >if (overlapping_certain_accesses_p (desc, &certain_access_present)) > - internal_error ("Function %s, parameter %u, has IPA_SRA accesses " > + internal_error ("Function %qs, parameter %u, has IPA-SRA accesses " > "which overlap", node->dump_name (), pidx); >if (!certain_access_present) > internal_error ("Function %s, parameter %u, is used but does not " > -- > 2.23.0 >
Re: [PATCH] Add TODO_update_ssa for SLP BB vectorization (PR tree-optimization/91885).
On Wed, Sep 25, 2019 at 12:06 PM Martin Liška wrote: > > Hi. > > Similarly to SLP pass, we should probably set TODO_update_ssa > when a SLP BB vectorization happens from the normal vect pass. > > Patch can bootstrap on x86_64-linux-gnu and survives regression tests. > > Ready to be installed? Hmm, this was supposed to be handled by if (num_vectorized_loops > 0) { /* If we vectorized any loop only virtual SSA form needs to be updated. ??? Also while we try hard to update loop-closed SSA form we fail to properly do this in some corner-cases (see PR56286). */ rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa_only_virtuals); return TODO_cleanup_cfg; } but there isn't an equivalent of num_vectorized_bbs here. Given the above effectively short-cuts your patch (all paths only ever set ret = TODO_cleanup_cfg right now...) it will work without pessimizing things by running update-ssa twice. Can you check whether TODO_update_ssa_only_virtuals is enough as a fix? Otherwise OK. Thanks, Richard. > Thanks, > Martin > > gcc/ChangeLog: > > 2019-09-25 Martin Liska > > PR tree-optimization/91885 > * tree-vectorizer.c (try_vectorize_loop_1): > Add TODO_update_ssa similarly to what slp > pass does. > > gcc/testsuite/ChangeLog: > > 2019-09-25 Martin Liska > > PR tree-optimization/91885 > * gcc.dg/pr91885.c: New test. > --- > gcc/testsuite/gcc.dg/pr91885.c | 47 ++ > gcc/tree-vectorizer.c | 2 +- > 2 files changed, 48 insertions(+), 1 deletion(-) > create mode 100644 gcc/testsuite/gcc.dg/pr91885.c > >
Re: [PR 91853] Prevent IPA-SRA ICEs on type-mismatched calls
On Wed, 25 Sep 2019, Martin Jambor wrote: > Hi, > > PR 91853 and its duplicate PR 91894 show that IPA-SRA can stumble when > presented with code with mismatched types, whether because it is a K&R C > or happening through an originally indirect call (or probably also > because of LTO). > > The problem is that we try to work with a register value - in this case > an integer constant - like if it was a pointer to a structure and try to > dereference it in the caller, leading to expressions like ADDR_EXPR of a > constant zero. Old IPA-SRA dealt with these simply by checking type > compatibility which is difficult in an LTO-capable IPA pass, basically > we would at least have to remember and stream a bitmap for each call > telling which arguments are pointers which looks a bit excessive given > that we just don't want to ICE. > > So this patch attempts to deal with the situation rather than avoid it. > When an integer is used instead of a pointer, there is some chance that > it actually contains the pointer value and so I create a NOP_EXPR to > convert it to a pointer (which in the testcase is actually a widening > conversion). For other register types, I don't bother and simply pull > an undefined pointer default definition SSA name and use that. I wonder > whether I should somehow warn as well. Hopefully there is no code doing > that that can conceivably work - maybe someone coding for x86_16 and > passing a vector of integers as a segment and offset pointer? :-) > > What do people think? In any event, this patch passed bootstrap and > testing and deals with the issue, so if it is OK, I'd like to commit it > to trunk. Humm... while I believe this "mostly" matches what we do in inlining (but that also has some type verification disabling inlining for really odd cases IIRC) I think the appropriate fix is in the IPA-SRA decision stage (at WPA) where we should see that we cannot modify a call in this way. That of course requires streaming of the actual call stmt (or at least it's "ABI signature"), not sure if you already do that. So in inlining we do static gimple * setup_one_parameter (copy_body_data *id, tree p, tree value, tree fn, basic_block bb, tree *vars) { ... if (value && value != error_mark_node && !useless_type_conversion_p (TREE_TYPE (p), TREE_TYPE (value))) { /* If we can match up types by promotion/demotion do so. */ if (fold_convertible_p (TREE_TYPE (p), value)) rhs = fold_convert (TREE_TYPE (p), value); else { /* ??? For valid programs we should not end up here. Still if we end up with truly mismatched types here, fall back to using a VIEW_CONVERT_EXPR or a literal zero to not leak invalid GIMPLE to the following passes. */ if (!is_gimple_reg_type (TREE_TYPE (value)) || TYPE_SIZE (TREE_TYPE (p)) == TYPE_SIZE (TREE_TYPE (value))) rhs = fold_build1 (VIEW_CONVERT_EXPR, TREE_TYPE (p), value); else rhs = build_zero_cst (TREE_TYPE (p)); } } I suggest that if we go a similar way that we copy this behavior rather than inventing sth similar but slightly different. Maybe split it out as tree force_value_to_type (tree type, tree val) which you then would need to eventually re-gimplify of course (we could in theory refactor setup_one_parameter to work with GIMPLE...) Richard. > Martin > > > > 2019-09-23 Martin Jambor > > PR ipa/91853 > * ipa-param-manipulation.c (ipa_param_adjustments::modify_call): Deal > with register type mismatches. > > testsuite/ > * gcc.dg/ipa/pr91853.c: New test. > --- > gcc/ipa-param-manipulation.c | 22 -- > gcc/testsuite/gcc.dg/ipa/pr91853.c | 30 ++ > 2 files changed, 50 insertions(+), 2 deletions(-) > create mode 100644 gcc/testsuite/gcc.dg/ipa/pr91853.c > > diff --git a/gcc/ipa-param-manipulation.c b/gcc/ipa-param-manipulation.c > index 913b96fefa4..bc175a5541a 100644 > --- a/gcc/ipa-param-manipulation.c > +++ b/gcc/ipa-param-manipulation.c > @@ -651,8 +651,26 @@ ipa_param_adjustments::modify_call (gcall *stmt, >bool deref_base = false; >unsigned int deref_align = 0; >if (TREE_CODE (base) != ADDR_EXPR > - && POINTER_TYPE_P (TREE_TYPE (base))) > - off = build_int_cst (apm->alias_ptr_type, apm->unit_offset); > + && is_gimple_reg_type (TREE_TYPE (base))) > + { > + /* Detect (gimple register) type mismatches in calls so that we don't > + ICE. Make a poor attempt to gracefully treat integers passed in > + place of pointers, for everything else create a proper undefined > + value which it is. */ > + if (INTEGRAL_TYPE_P (TREE_TYPE (base))) > + { > + tree tmp = make_ssa_name (ptr_type_node); > + gassign *convert = gimple_build_assign (tmp, NOP_EXPR, base); > +
Re: [PATCH] Use build_clobber some more
On Thu, 26 Sep 2019, Jakub Jelinek wrote: > Hi! > > I wasn't aware of this new build_clobber function added last year, > apparently we have still tons of spots that build clobbers by hand and using > build_clobber will make it clear what we are actually building. > > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? OK. Richard. > 2019-09-26 Jakub Jelinek > > * function.c (gimplify_parameters): Use build_clobber function. > * tree-ssa.c (execute_update_addresses_taken): Likewise. > * tree-inline.c (expand_call_inline): Likewise. > * tree-sra.c (clobber_subtree): Likewise. > * tree-ssa-ccp.c (insert_clobber_before_stack_restore): Likewise. > * omp-low.c (lower_rec_simd_input_clauses, lower_rec_input_clauses, > lower_omp_single, lower_depend_clauses, lower_omp_taskreg, > lower_omp_target): Likewise. > * omp-expand.c (expand_omp_for_generic): Likewise. > * omp-offload.c (ompdevlow_adjust_simt_enter): Likewise. > > --- gcc/function.c.jj 2019-09-11 10:27:40.170772183 +0200 > +++ gcc/function.c2019-09-25 18:22:43.414133650 +0200 > @@ -3892,9 +3892,8 @@ gimplify_parameters (gimple_seq *cleanup > if (!is_gimple_reg (local) > && flag_stack_reuse != SR_NONE) > { > - tree clobber = build_constructor (type, NULL); > + tree clobber = build_clobber (type); > gimple *clobber_stmt; > - TREE_THIS_VOLATILE (clobber) = 1; > clobber_stmt = gimple_build_assign (local, clobber); > gimple_seq_add_stmt (cleanup, clobber_stmt); > } > --- gcc/tree-ssa.c.jj 2019-07-19 11:56:10.438964997 +0200 > +++ gcc/tree-ssa.c2019-09-25 18:24:27.077559222 +0200 > @@ -2016,9 +2016,7 @@ execute_update_addresses_taken (void) > /* In ASAN_MARK (UNPOISON, &b, ...) the variable > is uninitialized. Avoid dependencies on > previous out of scope value. */ > - tree clobber > - = build_constructor (TREE_TYPE (var), NULL); > - TREE_THIS_VOLATILE (clobber) = 1; > + tree clobber = build_clobber (TREE_TYPE (var)); > gimple *g = gimple_build_assign (var, clobber); > gsi_replace (&gsi, g, GSI_SAME_STMT); > } > --- gcc/tree-inline.c.jj 2019-09-20 12:25:48.187387060 +0200 > +++ gcc/tree-inline.c 2019-09-25 18:23:35.633340550 +0200 > @@ -5016,9 +5016,8 @@ expand_call_inline (basic_block bb, gimp > tree *varp = id->decl_map->get (p); > if (varp && VAR_P (*varp) && !is_gimple_reg (*varp)) > { > - tree clobber = build_constructor (TREE_TYPE (*varp), NULL); > + tree clobber = build_clobber (TREE_TYPE (*varp)); > gimple *clobber_stmt; > - TREE_THIS_VOLATILE (clobber) = 1; > clobber_stmt = gimple_build_assign (*varp, clobber); > gimple_set_location (clobber_stmt, gimple_location (stmt)); > gsi_insert_before (&stmt_gsi, clobber_stmt, GSI_SAME_STMT); > @@ -5086,9 +5085,8 @@ expand_call_inline (basic_block bb, gimp > && !is_gimple_reg (id->retvar) > && !stmt_ends_bb_p (stmt)) > { > - tree clobber = build_constructor (TREE_TYPE (id->retvar), NULL); > + tree clobber = build_clobber (TREE_TYPE (id->retvar)); > gimple *clobber_stmt; > - TREE_THIS_VOLATILE (clobber) = 1; > clobber_stmt = gimple_build_assign (id->retvar, clobber); > gimple_set_location (clobber_stmt, gimple_location (old_stmt)); > gsi_insert_after (&stmt_gsi, clobber_stmt, GSI_SAME_STMT); > @@ -5134,9 +5132,8 @@ expand_call_inline (basic_block bb, gimp > && !TREE_THIS_VOLATILE (id->retvar) > && !is_gimple_reg (id->retvar)) > { > - tree clobber = build_constructor (TREE_TYPE (id->retvar), NULL); > + tree clobber = build_clobber (TREE_TYPE (id->retvar)); > gimple *clobber_stmt; > - TREE_THIS_VOLATILE (clobber) = 1; > clobber_stmt = gimple_build_assign (id->retvar, clobber); > gimple_set_location (clobber_stmt, gimple_location (stmt)); > gsi_replace (&stmt_gsi, clobber_stmt, false); > --- gcc/tree-sra.c.jj 2019-09-20 12:25:46.832408059 +0200 > +++ gcc/tree-sra.c2019-09-25 18:23:59.899971991 +0200 > @@ -3039,8 +3039,7 @@ clobber_subtree (struct access *access, >if (access->grp_to_be_replaced) > { >tree rep = get_access_replacement (access); > - tree clobber = build_constructor (access->type, NULL); > - TREE_THIS_VOLATILE (clobber) = 1; > + tree clobber = build_clobber (access->type); >gimple *stmt = gimple_build_assign (rep, clobber); > >if (insert_after) > --- gcc/tree-ssa